Projects
openEuler:24.03:SP1:Everything
gcc
Sign Up
Log In
Username
Password
Overview
Repositories
Revisions
Requests
Users
Attributes
Meta
Expand all
Collapse all
Changes of Revision 7
View file
_service:tar_scm:gcc.spec
Changed
@@ -2,7 +2,7 @@ %global gcc_major 12 # Note, gcc_release must be integer, if you want to add suffixes to # %%{release}, append them after %%{gcc_release} on Release: line. -%global gcc_release 32 +%global gcc_release 45 %global _unpackaged_files_terminate_build 0 %global _performance_build 1 @@ -69,12 +69,8 @@ %global multilib_32_arch i686 %endif %ifarch riscv64 -%global _lib lib %global _smp_mflags -j8 %endif -%ifarch loongarch64 -%global _lib lib -%endif %global isl_enable 0 %global check_enable 0 @@ -206,27 +202,218 @@ Patch94: 0094-BUGFIX-AutoBOLT-function-miss-bind-type.patch Patch95: 0095-STABS-remove-gstabs-and-gxcoff-functionality.patch Patch96: 0096-Bugfix-Autofdo-use-PMU-sampling-set-num-eauals-den.patch -Patch97: 0097-aarch64-Use-local-frame-vars-in-shrink-wrapping-code.patch -Patch98: 0098-aarch64-Avoid-a-use-of-callee-offset.patch -Patch99: 0099-aarch64-Explicitly-handle-frames-with-no-saved-registers.patch -Patch100: 0100-aarch64-Add-bytes-below-saved-regs-to-frame-info.patch -Patch101: 0101-aarch64-Add-bytes-below-hard-fp-to-frame-info.patch -Patch102: 0102-aarch64-Tweak-aarch64-save-restore-callee-saves.patch -Patch103: 0103-aarch64-Only-calculate-chain-offset-if-there-is-a-chain.patch -Patch104: 0104-aarch64-Rename-locals-offset-to-bytes-above-locals.patch -Patch105: 0105-aarch64-Rename-hard-fp-offset-to-bytes-above-hard-fp.patch -Patch106: 0106-aarch64-Tweak-frame-size-comment.patch -Patch107: 0107-aarch64-Measure-reg-offset-from-the-bottom-of-the-frame.patch -Patch108: 0108-aarch64-Simplify-top-of-frame-allocation.patch -Patch109: 0109-aarch64-Minor-initial-adjustment-tweak.patch -Patch110: 0110-aarch64-Tweak-stack-clash-boundary-condition.patch -Patch111: 0111-aarch64-Put-LR-save-probe-in-first-16-bytes.patch -Patch112: 0112-aarch64-Simplify-probe-of-final-frame-allocation.patch -Patch113: 0113-aarch64-Explicitly-record-probe-registers-in-frame-info.patch -Patch114: 0114-aarch64-Remove-below-hard-fp-saved-regs-size.patch -Patch115: 0115-aarch64-Make-stack-smash-canary-protect-saved-registers.patch -Patch116: 0116-aarch64-Fix-return-register-handling-in-untyped_call.patch -Patch117: 0117-aarch64-Fix-loose-ldpstp-check.patch +Patch97: 0097-Improve-non-loop-disambiguation.patch +Patch98: 0098-CHREC-multiplication-and-undefined-overflow.patch +Patch99: 0099-Enable-Transposed-SLP.patch +Patch100: 0100-Add-hip09-machine-discribtion.patch +Patch101: 0101-Add-hip11-CPU-pipeline-scheduling.patch +Patch102: 0102-Add-Crc32-Optimization-in-Gzip-For-crc32-algorithm-i.patch +Patch103: 0103-SME-Remove-hip09-and-hip11-in-aarch64-cores.def-to-b.patch +Patch104: 0104-Backport-SME-AArch64-Cleanup-CPU-option-processing-c.patch +Patch105: 0105-Backport-SME-AArch64-Cleanup-option-processing-code.patch +Patch106: 0106-Backport-SME-aarch64-Add-march-support-for-Armv9.1-A.patch +Patch107: 0107-Backport-SME-Revert-aarch64-Define-__ARM_FEATURE_RCP.patch +Patch108: 0108-Backport-SME-Revert-Ampere-1-and-Ampere-1A-core-defi.patch +Patch109: 0109-Backport-SME-aarch64-Rename-AARCH64_ISA-architecture.patch +Patch110: 0110-Backport-SME-aarch64-Rename-AARCH64_FL-architecture-.patch +Patch111: 0111-Backport-SME-aarch64-Rename-AARCH64_FL_FOR_ARCH-macr.patch +Patch112: 0112-Backport-SME-aarch64-Add-V-to-aarch64-arches.def-nam.patch +Patch113: 0113-Backport-SME-aarch64-Small-config.gcc-cleanups.patch +Patch114: 0114-Backport-SME-aarch64-Avoid-redundancy-in-aarch64-cor.patch +Patch115: 0115-Backport-SME-aarch64-Remove-AARCH64_FL_RCPC8_4-PR107.patch +Patch116: 0116-Backport-SME-aarch64-Fix-transitive-closure-of-featu.patch +Patch117: 0117-Backport-SME-aarch64-Reorder-an-entry-in-aarch64-opt.patch +Patch118: 0118-Backport-SME-aarch64-Simplify-feature-definitions.patch +Patch119: 0119-Backport-SME-aarch64-Simplify-generation-of-.arch-st.patch +Patch120: 0120-Backport-SME-aarch64-Avoid-std-string-in-static-data.patch +Patch121: 0121-Backport-SME-aarch64-Tweak-constness-of-option-relat.patch +Patch122: 0122-Backport-SME-aarch64-Make-more-use-of-aarch64_featur.patch +Patch123: 0123-Backport-SME-aarch64-Tweak-contents-of-flags_on-off-.patch +Patch124: 0124-Backport-SME-aarch64-Tweak-handling-of-mgeneral-regs.patch +Patch125: 0125-Backport-SME-aarch64-Remove-redundant-TARGET_-checks.patch +Patch126: 0126-Backport-SME-aarch64-Define-__ARM_FEATURE_RCPC.patch +Patch127: 0127-Backport-SME-Add-Ampere-1-and-Ampere-1A-core-definit.patch +Patch128: 0128-Backport-SME-aarch64-Fix-nosimd-handling-of-FPR-move.patch +Patch129: 0129-Backport-SME-aarch64-Commonise-some-folding-code.patch +Patch130: 0130-Backport-SME-aarch64-Add-a-Z-operand-modifier-for-SV.patch +Patch131: 0131-Backport-SME-mode-switching-Remove-unused-bbnum-fiel.patch +Patch132: 0132-Backport-SME-mode-switching-Tweak-the-macro-hook-doc.patch +Patch133: 0133-Backport-SME-mode-switching-Add-note-problem.patch +Patch134: 0134-Backport-SME-mode-switching-Avoid-quadractic-list-op.patch +Patch135: 0135-Backport-SME-mode-switching-Fix-the-mode-passed-to-t.patch +Patch136: 0136-Backport-SME-mode-switching-Simplify-recording-of-tr.patch +Patch137: 0137-Backport-SME-mode-switching-Tweak-entry-exit-handlin.patch +Patch138: 0138-Backport-SME-mode-switching-Allow-targets-to-set-the.patch +Patch139: 0139-Backport-SME-mode-switching-Pass-set-of-live-registe.patch +Patch140: 0140-Backport-SME-mode-switching-Pass-the-set-of-live-reg.patch +Patch141: 0141-Backport-SME-mode-switching-Use-1-based-edge-aux-fie.patch +Patch142: 0142-Backport-SME-mode-switching-Add-a-target-configurabl.patch +Patch143: 0143-Backport-SME-mode-switching-Add-a-backprop-hook.patch +Patch144: 0144-Backport-SME-aarch64-Add-a-result_mode-helper-functi.patch +Patch145: 0145-Backport-SME-rtl-Try-to-remove-EH-edges-after-pro-ep.patch +Patch146: 0146-Backport-SME-Fix-PR-middle-end-107705-ICE-after-recl.patch +Patch147: 0147-Backport-SME-function-Change-return-type-of-predicat.patch +Patch148: 0148-Backport-SME-Allow-prologues-and-epilogues-to-be-ins.patch +Patch149: 0149-Backport-SME-Add-a-target-hook-for-sibcall-epilogues.patch +Patch150: 0150-Backport-SME-Add-a-new-target-hook-TARGET_START_CALL.patch +Patch151: 0151-Backport-SME-Allow-targets-to-add-USEs-to-asms.patch +Patch152: 0152-Backport-SME-New-compact-syntax-for-insn-and-insn_sp.patch +Patch153: 0153-Backport-SME-recog-Improve-parser-for-pattern-new-co.patch +Patch154: 0154-Backport-SME-recog-Support-space-in-cons.patch +Patch155: 0155-Backport-SME-aarch64-Generalise-require_immediate_la.patch +Patch156: 0156-Backport-SME-aarch64-Add-backend-support-for-DFP.patch +Patch157: 0157-Backport-SME-aarch64-Vector-move-fixes-for-nosimd.patch +Patch158: 0158-Backport-SME-aarch64-Simplify-output-template-emissi.patch +Patch159: 0159-Backport-SME-Improve-immediate-expansion-PR106583.patch +Patch160: 0160-Backport-SME-AArch64-Cleanup-move-immediate-code.patch +Patch161: 0161-Backport-SME-AArch64-convert-some-patterns-to-compac.patch +Patch162: 0162-Backport-SME-aarch64-Use-SVE-s-RDVL-instruction.patch +Patch163: 0163-Backport-SME-aarch64-Make-AARCH64_FL_SVE-requirement.patch +Patch164: 0164-Backport-SME-aarch64-Add-group-suffixes-to-SVE-intri.patch +Patch165: 0165-Backport-SME-aarch64-Add-sve_type-to-SVE-builtins-co.patch +Patch166: 0166-Backport-SME-aarch64-Generalise-some-SVE-ACLE-error-.patch +Patch167: 0167-Backport-SME-aarch64-Replace-vague-previous-argument.patch +Patch168: 0168-Backport-SME-aarch64-Make-more-use-of-sve_type-in-AC.patch +Patch169: 0169-Backport-SME-aarch64-Tweak-error-message-for-tuple-v.patch +Patch170: 0170-Backport-SME-aarch64-Add-tuple-forms-of-svreinterpre.patch +Patch171: 0171-Backport-SME-attribs-Use-existing-traits-for-excl_ha.patch +Patch172: 0172-Backport-SME-Allow-target-attributes-in-non-gnu-name.patch +Patch173: 0173-Backport-SME-aarch64-Fix-plugin-header-install.patch +Patch174: 0174-Backport-SME-aarch64-Add-arm_streaming-_compatible-a.patch +Patch175: 0175-Backport-SME-aarch64-Add-sme.patch +Patch176: 0176-Backport-SME-aarch64-Add-r-m-and-m-r-alternatives-to.patch +Patch177: 0177-Backport-SME-AArch64-Rewrite-simd-move-immediate-pat.patch +Patch178: 0178-Backport-SME-AArch64-remove-test-comment-from-mov-mo.patch +Patch179: 0179-Backport-SME-aarch64-Distinguish-streaming-compatibl.patch +Patch180: 0180-Backport-SME-aarch64-Mark-relevant-SVE-instructions-.patch +Patch181: 0181-Backport-SME-AArch64-Support-new-tbranch-optab.patch +Patch182: 0182-Backport-SME-aarch64-Use-local-frame-vars-in-shrink-.patch +Patch183: 0183-Backport-SME-aarch64-Avoid-a-use-of-callee_offset.patch +Patch184: 0184-Backport-SME-aarch64-Explicitly-handle-frames-with-n.patch +Patch185: 0185-Backport-SME-aarch64-Add-bytes_below_saved_regs-to-f.patch +Patch186: 0186-Backport-SME-aarch64-Add-bytes_below_hard_fp-to-fram.patch +Patch187: 0187-Backport-SME-aarch64-Robustify-stack-tie-handling.patch +Patch188: 0188-Backport-SME-aarch64-Tweak-aarch64_save-restore_call.patch +Patch189: 0189-Backport-SME-aarch64-Only-calculate-chain_offset-if-.patch +Patch190: 0190-Backport-SME-aarch64-Rename-locals_offset-to-bytes_a.patch +Patch191: 0191-Backport-SME-aarch64-Rename-hard_fp_offset-to-bytes_.patch +Patch192: 0192-Backport-SME-aarch64-Tweak-frame_size-comment.patch +Patch193: 0193-Backport-SME-aarch64-Measure-reg_offset-from-the-bot.patch +Patch194: 0194-Backport-SME-aarch64-Simplify-top-of-frame-allocatio.patch +Patch195: 0195-Backport-SME-aarch64-Minor-initial-adjustment-tweak.patch +Patch196: 0196-Backport-SME-aarch64-Tweak-stack-clash-boundary-cond.patch +Patch197: 0197-Backport-SME-aarch64-Put-LR-save-probe-in-first-16-b.patch +Patch198: 0198-Backport-SME-aarch64-Simplify-probe-of-final-frame-a.patch +Patch199: 0199-Backport-SME-aarch64-Explicitly-record-probe-registe.patch +Patch200: 0200-Backport-SME-aarch64-Remove-below_hard_fp_saved_regs.patch +Patch201: 0201-Backport-SME-aarch64-Make-stack-smash-canary-protect.patch +Patch202: 0202-Backport-SME-Handle-epilogues-that-contain-jumps.patch +Patch203: 0203-Backport-SME-aarch64-Use-vecs-to-store-register-save.patch +Patch204: 0204-Backport-SME-aarch64-Put-LR-save-slot-first-in-more-.patch +Patch205: 0205-Backport-SME-aarch64-Switch-PSTATE.SM-around-calls.patch +Patch206: 0206-Backport-SME-aarch64-Add-support-for-SME-ZA-attribut.patch +Patch207: 0207-Backport-SME-aarch64-Add-a-register-class-for-w12-w1.patch +Patch208: 0208-Backport-SME-aarch64-Add-a-VNx1TI-mode.patch +Patch209: 0209-Backport-SME-aarch64-Generalise-unspec_based_functio.patch +Patch210: 0210-Backport-SME-aarch64-Generalise-_m-rules-for-SVE-int.patch +Patch211: 0211-Backport-SME-aarch64-Add-support-for-arm_sme.h.patch +Patch212: 0212-Backport-SME-aarch64-Add-support-for-__arm_locally_s.patch +Patch213: 0213-Backport-SME-aarch64-Handle-PSTATE.SM-across-abnorma.patch +Patch214: 0214-Backport-SME-aarch64-Enforce-inlining-restrictions-f.patch +Patch215: 0215-Backport-SME-aarch64-Update-sibcall-handling-for-SME.patch +Patch216: 0216-Backport-SME-libgcc-aarch64-Configure-check-for-.var.patch +Patch217: 0217-Backport-SME-libgcc-aarch64-Configure-check-for-__ge.patch +Patch218: 0218-Backport-SME-libgcc-aarch64-Add-SME-runtime-support.patch +Patch219: 0219-Backport-SME-libgcc-aarch64-Add-SME-unwinder-support.patch +Patch220: 0220-Backport-SME-libgcc-Fix-config.in.patch +Patch221: 0221-Backport-SME-aarch64-Add-funwind-tables-to-some-test.patch +Patch222: 0222-Backport-SME-aarch64-Skip-some-SME-register-save-tes.patch +Patch223: 0223-Backport-SME-Add-OPTIONS_H_EXTRA-to-GTFILES.patch +Patch224: 0224-Backport-SME-aarch64-Add-V1DI-mode.patch +Patch225: 0225-Backport-SME-Allow-md-iterators-to-include-other-ite.patch +Patch226: 0226-Backport-SME-riscv-Add-support-for-strlen-inline-exp.patch +Patch227: 0227-Backport-SME-attribs-Add-overloads-with-namespace-na.patch +Patch228: 0228-Backport-SME-vec-Add-array_slice-constructors-from-n.patch +Patch229: 0229-Backport-SME-A-couple-of-va_gc_atomic-tweaks.patch +Patch230: 0230-Backport-SME-middle-end-Fix-issue-of-poly_uint16-1-1.patch +Patch231: 0231-SME-Add-missing-header-file-in-aarch64.cc.patch +Patch232: 0232-Backport-SME-c-Add-support-for-__extension__.patch +Patch233: 0233-Backport-SME-lra-Updates-of-biggest-mode-for-hard-re.patch +Patch234: 0234-Backport-SME-c-Support-C2x-empty-initializer-braces.patch +Patch235: 0235-Backport-SME-aarch64-Update-sizeless-tests-for-recen.patch +Patch236: 0236-Backport-SME-attribs-Namespace-aware-lookup_attribut.patch +Patch237: 0237-Backport-SME-c-family-ICE-with-gnu-nocf_check-PR1069.patch +Patch238: 0238-Backport-SME-AArch64-Fix-assert-in-aarch64_move_imm-.patch +Patch239: 0239-Backport-SME-testsuite-Only-run-fcf-protection-test-.patch +Patch240: 0240-Backport-SME-Fix-PRs-106764-106765-and-107307-all-IC.patch +Patch241: 0241-Backport-SME-aarch64-Remove-expected-error-for-compo.patch +Patch242: 0242-Backport-SME-aarch64-Remove-redundant-builtins-code.patch +Patch243: 0243-Backport-SME-AArch64-Fix-Armv9-a-warnings-that-get-e.patch +Patch244: 0244-Backport-SME-Canonicalize-X-Y-as-X-Y-in-match.pd-whe.patch +Patch245: 0245-Backport-SME-middle-end-Add-new-tbranch-optab-to-add.patch +Patch246: 0246-Backport-SME-explow-Allow-dynamic-allocations-after-.patch +Patch247: 0247-Backport-SME-PR105169-Fix-references-to-discarded-se.patch +Patch248: 0248-Backport-SME-RISC-V-autovec-Verify-that-GET_MODE_NUN.patch +Patch249: 0249-Backport-SME-Add-operator-to-gimple_stmt_iterator-an.patch +Patch250: 0250-Backport-SME-tree-optimization-110221-SLP-and-loop-m.patch +Patch251: 0251-SME-Adapt-some-testsuites.patch +Patch252: 0252-SME-Fix-error-by-backported-patches-and-IPA-prefetch.patch +Patch253: 0253-aarch64-Fix-return-register-handling-in-untyped_call.patch +Patch254: 0254-aarch64-Fix-loose-ldpstp-check.patch +Patch255: 0255-x86-Add-a-new-option-mdaz-ftz-to-enable-FTZ-and-DAZ-.patch +Patch256: 0256-Explicitly-view_convert_expr-mask-to-signed-type-whe.patch +Patch257: 0257-Make-option-mvzeroupper-independent-of-optimization-.patch +Patch258: 0258-i386-Sync-tune_string-with-arch_string-for-target-at.patch +Patch259: 0259-Refine-maskloadmn-pattern-with-UNSPEC_MASKLOAD.patch +Patch260: 0260-Refine-maskstore-patterns-with-UNSPEC_MASKMOV.patch +Patch261: 0261-x86-Update-model-values-for-Alderlake-and-Rocketlake.patch +Patch262: 0262-Workaround-possible-CPUID-bug-in-Sandy-Bridge.patch +Patch263: 0263-Software-mitigation-Disable-gather-generation-in-vec.patch +Patch264: 0264-Support-m-no-gather-m-no-scatter-to-enable-disable-v.patch +Patch265: 0265-Remove-constraint-modifier-for-fcmaddcph-fmaddcph-fc.patch +Patch266: 0266-Disparage-slightly-for-the-alternative-which-move-DF.patch +Patch267: 0267-Fix-wrong-code-due-to-vec_merge-pcmp-to-blendvb-spli.patch +Patch268: 0268-Don-t-assume-it-s-AVX_U128_CLEAN-after-call_insn-who.patch +Patch269: 0269-Disable-FMADD-in-chains-for-Zen4-and-generic.patch +Patch270: 0270-Initial-Raptorlake-Support.patch +Patch271: 0271-Initial-Meteorlake-Support.patch +Patch272: 0272-Support-Intel-AMX-FP16-ISA.patch +Patch273: 0273-Support-Intel-prefetchit0-t1.patch +Patch274: 0274-Initial-Granite-Rapids-Support.patch +Patch275: 0275-Support-Intel-AMX-COMPLEX.patch +Patch276: 0276-i386-Add-AMX-COMPLEX-to-Granite-Rapids.patch +Patch277: 0277-Initial-Granite-Rapids-D-Support.patch +Patch278: 0278-Correct-Granite-Rapids-D-documentation.patch +Patch279: 0279-i386-Remove-Meteorlake-s-family_model.patch +Patch280: 0280-x86-Update-model-values-for-Alderlake-Rocketlake-and.patch +Patch281: 0281-x86-Update-model-values-for-Raptorlake.patch +Patch282: 0282-Fix-target_clone-arch-graniterapids-d.patch +Patch283: 0283-i386-Change-prefetchi-output-template.patch +Patch284: 0284-i386-Add-non-optimize-prefetchi-intrins.patch +Patch285: 0285-SME-Recover-hip09-and-hip11-in-aarch64-cores.def.patch +Patch286: 0286-Try-to-use-AI-model-to-guide-optimization.patch +Patch287: 0287-Add-dynamic-memory-access-checks.patch +Patch288: 0288-Enable-macro-use-commandline.patch +Patch289: 0289-tree-ssa-loop-crc.cc-TARGET_CRC32-may-be-not-defined.patch +Patch290: 0290-Add-ipa-prefetch-test-for-gcc-s-case.patch +Patch291: 0291-Fix-settings-for-wide-operations-tests.patch +Patch292: 0292-Fix-errors-in-ipa-prefetch-IAORPF-and-IAOSJ0.patch +Patch293: 0293-Fix-error-with-stmts-insertion-in-ipa-prefetch-for-I.patch +Patch294: 0294-Fix-errors-in-ipa-prefetch-IAO50J-and-IAO5H7.patch +Patch295: 0295-Fix-error-with-grouped_load-merge-in-slp-transpose-v.patch +Patch296: 0296-Fix-error-in-slp-transpose-vectorize-for-IAQFM3.patch +Patch297: 0297-Fix-grouped-load-merging-error-in-SLP-transpose-vectorization.patch +Patch298: 0298-Mark-prefetch-builtin-as-willreturn.patch +Patch299: 0299-Backport-Disallow-pointer-operands-for-and-partly-PR.patch +Patch300: 0300-Remove-erroneous-pattern-from-gimple-ifcvt.patch +Patch301: 0301-Add-required-check-for-iteration-through-uses.patch +Patch302: 0302-Added-param-for-optimization-for-merging-bb-s-with-c.patch +Patch303: 0303-Add-generation-of-stream-in-functions-for-pre-versio.patch +Patch304: 0304-Add-multi-version-lto-symbol-parse-cross-lto-units-i.patch +Patch305: 0305-Backport-varasm-Handle-private-COMDAT-function-symbo.patch +Patch306: 0306-RISC-V-Install-libstdc-libcc1-etc-to-lib64-instead-o.patch +Patch307: 0307-Set-fallback-value-for-print-multi-os-directory.patch +Patch308: 0308-Fix-enum-INPUT-MIDDLE-FINAL-aes_stage.patch # Part 3000 ~ 4999 %ifarch loongarch64 @@ -359,6 +546,199 @@ Patch3128: LoongArch-Add-LA664-support.patch Patch3129: LoongArch-Fix-internal-error-running-gcc-march-nativ.patch Patch3130: LoongArch-Fix-lsx-vshuf.c-and-lasx-xvshuf_b.c-tests-.patch +# -- Update to master, lastest commit: 60e99901aef8e7efd4d60adf9f82021fcbd1101f +Patch3131: 0001-LoongArch-Reimplement-multilib-build-option-handling.patch +Patch3132: 0002-LoongArch-Check-whether-binutils-supports-the-relax-.patch +Patch3133: 0003-Modify-gas-uleb128-support-test.patch +Patch3134: 0004-LoongArch-Optimizations-of-vector-construction.patch +Patch3135: 0005-LoongArch-Replace-UNSPEC_FCOPYSIGN-with-copysign-RTL.patch +Patch3136: 0006-LoongArch-Adjust-makefile-dependency-for-loongarch-h.patch +Patch3137: 0007-LoongArch-Enable-vect.exp-for-LoongArch.-PR111424.patch +Patch3138: 0008-LoongArch-Delete-macro-definition-ASM_OUTPUT_ALIGN_W.patch +Patch3139: 0009-LoongArch-Fix-vec_initv32qiv16qi-template-to-avoid-I.patch +Patch3140: 0010-LoongArch-Use-fcmp.caf.s-instead-of-movgr2cf-for-zer.patch +Patch3141: 0011-LoongArch-Implement-avg-and-sad-standard-names.patch +Patch3142: 0012-LoongArch-Implement-vec_widen-standard-names.patch +Patch3143: 0013-LoongArch-Implement-the-new-vector-cost-model-framew.patch +Patch3144: 0014-LoongArch-Define-macro-CLEAR_INSN_CACHE.patch +Patch3145: 0015-LoongArch-Add-enum-style-mexplicit-relocs-option.patch +Patch3146: 0016-LoongArch-Use-explicit-relocs-for-GOT-access-when-me.patch +Patch3147: 0017-LoongArch-Use-explicit-relocs-for-TLS-access-with-me.patch +Patch3148: 0018-LoongArch-Use-explicit-relocs-for-addresses-only-use.patch +Patch3149: 0019-LoongArch-Implement-__builtin_thread_pointer-for-TLS.patch +Patch3150: 0020-LoongArch-Fix-vfrint-releated-comments-in-lsxintrin..patch +Patch3151: 0021-LoongArch-Enable-vcond_mask_mn-expanders-for-SF-DF-m.patch +Patch3152: 0022-LoongArch-Define-HAVE_AS_TLS-to-0-if-it-s-undefined-.patch +Patch3153: 0023-LoongArch-Fix-instruction-name-typo-in-lsx_vreplgr2v.patch +Patch3154: 0024-LoongArch-Use-simplify_gen_subreg-instead-of-gen_rtx.patch +Patch3155: 0025-LoongArch-Optimize-single-used-address-with-mexplici.patch +Patch3156: 0026-LoongArch-Disable-relaxation-if-the-assembler-don-t-.patch +Patch3157: 0027-LoongArch-Remove-redundant-barrier-instructions-befo.patch +Patch3158: 0028-LoongArch-Fix-scan-assembler-times-of-lasx-lsx-test-.patch +Patch3159: 0029-LoongArch-Increase-cost-of-vector-aligned-store-load.patch +Patch3160: 0030-LoongArch-Implement-C-LT-Z_DEFINED_VALUE_AT_ZERO.patch +Patch3161: 0031-LoongArch-Handle-vectorized-copysign-x-1-expansion-e.patch +Patch3162: 0032-LoongArch-Add-code-generation-support-for-call36-fun.patch +Patch3163: 0033-LoongArch-Implement-atomic-operations-using-LoongArc.patch +Patch3164: 0034-LoongArch-atomic_load-and-atomic_store-are-implement.patch +Patch3165: 0035-LoongArch-genopts-Add-infrastructure-to-generate-cod.patch +Patch3166: 0036-LoongArch-Add-evolution-features-of-base-ISA-revisio.patch +Patch3167: 0037-LoongArch-Take-the-advantage-of-mdiv32-if-it-s-enabl.patch +Patch3168: 0038-LoongArch-Don-t-emit-dbar-0x700-if-mld-seq-sa.patch +Patch3169: 0039-LoongArch-Add-fine-grained-control-for-LAM_BH-and-LA.patch +Patch3170: 0040-LoongArch-Fix-mexplict-relocs-none-mcmodel-medium-pr.patch +Patch3171: 0041-LoongArch-Modify-MUSL_DYNAMIC_LINKER.patch +Patch3172: 0042-LoongArch-Fix-libgcc-build-failure-when-libc-is-not-.patch +Patch3173: 0043-LoongArch-Optimize-LSX-vector-shuffle-on-floating-po.patch +Patch3174: 0044-LoongArch-Optimize-the-loading-of-immediate-numbers-.patch +Patch3175: 0045-LoongArch-Fix-runtime-error-in-a-gcc-build-with-with.patch +Patch3176: 0046-LoongArch-Fix-usage-of-LSX-and-LASX-frint-ftint-inst.patch +Patch3177: 0047-LoongArch-Use-standard-pattern-name-and-RTX-code-for.patch +Patch3178: 0048-LoongArch-Use-standard-pattern-name-and-RTX-code-for.patch +Patch3179: 0049-LoongArch-Remove-lrint_allow_inexact.patch +Patch3180: 0050-LoongArch-Use-LSX-for-scalar-FP-rounding-with-explic.patch +Patch3181: 0051-LoongArch-Remove-duplicate-definition-of-CLZ_DEFINED.patch +Patch3182: 0052-LoongArch-Added-vectorized-hardware-inspection-for-t.patch +Patch3183: 0053-LoongArch-Accelerate-optimization-of-scalar-signed-u.patch +Patch3184: 0054-LoongArch-Optimize-vector-constant-extract-even-odd-.patch +Patch3185: 0055-LoongArch-Add-intrinsic-function-descriptions-for-LS.patch +Patch3186: 0056-LoongArch-Switch-loongarch-def-from-C-to-C-to-make-i.patch +Patch3187: 0057-LoongArch-Remove-the-definition-of-ISA_BASE_LA64V110.patch +Patch3188: 0058-LoongArch-Add-support-for-xorsign.patch +Patch3189: 0059-LoongArch-Add-support-for-LoongArch-V1.1-approximate.patch +Patch3190: 0060-LoongArch-Use-standard-pattern-name-for-xvfrsqrt-vfr.patch +Patch3191: 0061-LoongArch-Redefine-pattern-for-xvfrecip-vfrecip-inst.patch +Patch3192: 0062-LoongArch-New-options-mrecip-and-mrecip-with-ffast-m.patch +Patch3193: 0063-LoongArch-Vectorized-loop-unrolling-is-disable-for-d.patch +Patch3194: 0064-LoongArch-Fix-lsx-vshuf.c-and-lasx-xvshuf_b.c-tests-.patch +Patch3195: 0065-LoongArch-Fix-ICE-and-use-simplify_gen_subreg-instea.patch +Patch3196: 0066-LoongArch-Fix-eh_return-epilogue-for-normal-returns.patch +Patch3197: 0067-LoongArch-Allow-mcmodel-extreme-and-model-attribute-.patch +Patch3198: 0068-LoongArch-Fix-warnings-building-libgcc.patch +Patch3199: 0069-LoongArch-testsuite-Remove-XFAIL-in-vect-ftint-no-in.patch +Patch3200: 0070-LoongArch-Include-rtl.h-for-COSTS_N_INSNS-instead-of.patch +Patch3201: 0071-LoongArch-Fix-instruction-costs-PR112936.patch +Patch3202: 0072-LoongArch-Add-alslsi3_extend.patch +Patch3203: 0073-LoongArch-Add-support-for-D-frontend.patch +Patch3204: 0074-libruntime-Add-fiber-context-switch-code-for-LoongAr.patch +Patch3205: 0075-LoongArch-Fix-FP-vector-comparsons-PR113034.patch +Patch3206: 0076-LoongArch-Use-force_reg-instead-of-gen_reg_rtx-emit_.patch +Patch3207: 0077-LoongArch-Clean-up-vec_init-expander.patch +Patch3208: 0078-LoongArch-Fix-incorrect-code-generation-for-sad-patt.patch +Patch3209: 0079-LoongArch-Modify-the-check-type-of-the-vector-builti.patch +Patch3210: 0080-LoongArch-extend.texi-Fix-typos-in-LSX-intrinsics.patch +Patch3211: 0081-LoongArch-Fix-builtin-function-prototypes-for-LASX-i.patch +Patch3212: 0082-LoongArch-Add-asm-modifiers-to-the-LSX-and-LASX-dire.patch +Patch3213: 0083-LoongArch-Implement-FCCmode-reload-and-cstore-ANYF-m.patch +Patch3214: 0084-LoongArch-Add-sign_extend-pattern-for-32-bit-rotate-.patch +Patch3215: 0085-LoongArch-Fixed-bug-in-bstrins_-mode-_for_ior_mask-t.patch +Patch3216: 0086-LoongArch-Fix-insn-output-of-vec_concat-templates-fo.patch +Patch3217: 0087-LoongArch-Fix-ICE-when-passing-two-same-vector-argum.patch +Patch3218: 0088-LoongArch-Expand-left-rotate-to-right-rotate-with-ne.patch +Patch3219: 0089-LoongArch-Fix-infinite-secondary-reloading-of-FCCmod.patch +Patch3220: 0090-LoongArch-Replace-mexplicit-relocs-auto-simple-used-.patch +Patch3221: 0091-LoongArch-Fix-the-format-of-bstrins_-mode-_for_ior_m.patch +Patch3222: 0092-LoongArch-Added-TLS-Le-Relax-support.patch +Patch3223: 0093-LoongArch-Provide-fmin-fmax-RTL-pattern-for-vectors.patch +Patch3224: 0094-LoongArch-Merge-constant-vector-permuatation-impleme.patch +Patch3225: 0095-LoongArch-testsuite-Fix-FAIL-in-lasx-xvstelm.c-file.patch +Patch3226: 0096-LoongArch-testsuite-Modify-the-test-behavior-of-the-.patch +Patch3227: 0097-LoongArch-testsuite-Delete-the-default-run-behavior-.patch +Patch3228: 0098-LoongArch-testsuite-Added-additional-vectorization-m.patch +Patch3229: 0099-LoongArch-testsuite-Give-up-the-detection-of-the-gcc.patch +Patch3230: 0100-LoongArch-Fixed-the-problem-of-incorrect-judgment-of.patch +Patch3231: 0101-LoongArch-Improve-lasx_xvpermi_q_-LASX-mode-insn-pat.patch +Patch3232: 0102-LoongArch-Implement-vec_init-M-N-where-N-is-a-LSX-ve.patch +Patch3233: 0103-LoongArch-Handle-ISA-evolution-switches-along-with-o.patch +Patch3234: 0104-LoongArch-Rename-ISA_BASE_LA64V100-to-ISA_BASE_LA64.patch +Patch3235: 0105-LoongArch-Use-enums-for-constants.patch +Patch3236: 0106-LoongArch-Simplify-mexplicit-reloc-definitions.patch +Patch3237: 0107-LoongArch-testsuite-Add-loongarch-support-to-slp-21..patch +Patch3238: 0108-LoongArch-Optimized-some-of-the-symbolic-expansion-i.patch +Patch3239: 0109-LoongArch-Implement-option-save-restore.patch +Patch3240: 0110-LoongArch-Redundant-sign-extension-elimination-optim.patch +Patch3241: 0111-LoongArch-Redundant-sign-extension-elimination-optim.patch +Patch3242: 0112-LoongArch-Assign-the-u-attribute-to-the-mem-to-which.patch +Patch3243: 0113-LoongArch-testsuite-Fix-fail-in-gen-vect-2-25-.c-fil.patch +Patch3244: 0114-LoongArch-Remove-constraint-z-from-movsi_internal.patch +Patch3245: 0115-LoongArch-doc-Add-attribute-descriptions-defined-in-.patch +Patch3246: 0116-LoongArch-Disable-explicit-reloc-for-TLS-LD-GD-with-.patch +Patch3247: 0117-LoongArch-testsuite-Disable-stack-protector-for-got-.patch +Patch3248: 0118-LoongArch-Disable-TLS-type-symbols-from-generating-n.patch +Patch3249: 0119-LoongArch-Remove-vec_concatz-mode-pattern.patch +Patch3250: 0120-LoongArch-Optimize-implementation-of-single-precisio.patch +Patch3251: 0121-LoongArch-Define-LOGICAL_OP_NON_SHORT_CIRCUIT.patch +Patch3252: 0122-LoongArch-Split-vec_selects-of-bottom-elements-into-.patch +Patch3253: 0123-LoongArch-Modify-the-address-calculation-logic-for-o.patch +Patch3254: 0124-LoongArch-Merge-template-got_load_tls_-ld-gd-le-ie.patch +Patch3255: 0125-LoongArch-Add-the-macro-implementation-of-mcmodel-ex.patch +Patch3256: 0126-LoongArch-Enable-explicit-reloc-for-extreme-TLS-GD-L.patch +Patch3257: 0127-LoongArch-Added-support-for-loading-__get_tls_addr-s.patch +Patch3258: 0128-LoongArch-Don-t-split-the-instructions-containing-re.patch +Patch3259: 0129-LoongArch-Adjust-cost-of-vector_stmt-that-match-mult.patch +Patch3260: 0130-LoongArch-Fix-incorrect-return-type-for-frecipe-frsq.patch +Patch3261: 0131-LoongArch-Fix-an-ODR-violation.patch +Patch3262: 0132-LoongArch-testsuite-Fix-gcc.dg-vect-vect-reduc-mul_-.patch +Patch3263: 0133-LoongArch-Avoid-out-of-bounds-access-in-loongarch_sy.patch +Patch3264: 0134-LoongArch-Fix-wrong-LSX-FP-vector-negation.patch +Patch3265: 0135-LoongArch-Fix-wrong-return-value-type-of-__iocsrrd_h.patch +Patch3266: 0136-LoongArch-Remove-redundant-symbol-type-conversions-i.patch +Patch3267: 0137-LoongArch-When-checking-whether-the-assembler-suppor.patch +Patch3268: 0138-LoongArch-Don-t-falsely-claim-gold-supported-in-topl.patch +Patch3269: 0139-LoongArch-NFC-Deduplicate-crc-instruction-defines.patch +Patch3270: 0140-LoongArch-Remove-unneeded-sign-extension-after-crc-c.patch +Patch3271: 0141-LoongArch-Allow-s9-as-a-register-alias.patch +Patch3272: 0142-LoongArch-testsuite-Rewrite-x-vfcmp-d-f-.c-to-avoid-.patch +Patch3273: 0143-LoongArch-Use-lib-instead-of-lib64-as-the-library-se.patch +Patch3274: 0144-LoongArch-testsuite-Fix-problems-with-incorrect-resu.patch +Patch3275: 0145-LoongArch-Fixed-an-issue-with-the-implementation-of-.patch +Patch3276: 0146-LoongArch-testsuite-Add-compilation-options-to-the-r.patch +Patch3277: 0147-LoongArch-Emit-R_LARCH_RELAX-for-TLS-IE-with-non-ext.patch +Patch3278: 0148-LoongArch-Remove-unused-and-incorrect-sge-u-_-X-mode.patch +Patch3279: 0149-LoongArch-Remove-masking-process-for-operand-3-of-xv.patch +Patch3280: 0150-LoongArch-Fix-C23-.-functions-returning-large-aggreg.patch +Patch3281: 0151-LoongArch-Remove-unused-useless-definitions.patch +Patch3282: 0152-LoongArch-Change-loongarch_expand_vec_cmp-s-return-t.patch +Patch3283: 0153-LoongArch-Combine-UNITS_PER_FP_REG-and-UNITS_PER_FPR.patch +Patch3284: 0154-LoongArch-Fix-a-typo-PR-114407.patch +Patch3285: 0155-testsuite-Add-a-test-case-for-negating-FP-vectors-co.patch +Patch3286: 0156-LoongArch-Add-descriptions-of-the-compilation-option.patch +Patch3287: 0157-LoongArch-Split-loongarch_option_override_internal-i.patch +Patch3288: 0158-LoongArch-Regenerate-loongarch.opt.urls.patch +Patch3289: 0159-LoongArch-Add-support-for-TLS-descriptors.patch +Patch3290: 0160-LoongArch-Fix-missing-plugin-header.patch +Patch3291: 0161-LoongArch-Remove-unused-code.patch +Patch3292: 0162-LoongArch-Set-default-alignment-for-functions-jumps-.patch +Patch3293: 0163-LoongArch-Enable-switchable-target.patch +Patch3294: 0164-LoongArch-Define-ISA-versions.patch +Patch3295: 0165-LoongArch-Define-builtin-macros-for-ISA-evolutions.patch +Patch3296: 0166-LoongArch-Add-constraints-for-bit-string-operation-d.patch +Patch3297: 0167-LoongArch-Guard-REGNO-with-REG_P-in-loongarch_expand.patch +Patch3298: 0168-LoongArch-Fix-mode-size-comparision-in-loongarch_exp.patch +Patch3299: 0169-LoongArch-Use-bstrins-for-value-1u-const.patch +Patch3300: 0170-LoongArch-Tweak-IOR-rtx_cost-for-bstrins.patch +Patch3301: 0171-LoongArch-NFC-Dedup-and-sort-the-comment-in-loongarc.patch +Patch3302: 0172-LoongArch-Fix-explicit-relocs-extreme-tls-desc.c-tes.patch +Patch3303: 0173-LoongArch-Define-loongarch_insn_cost-and-set-the-cos.patch +Patch3304: 0174-LoongArch-TFmode-is-not-allowed-to-be-stored-in-the-.patch +Patch3305: 0175-LoongArch-Remove-unreachable-codes.patch +Patch3306: 0176-LoongArch-Organize-the-code-related-to-split-move-an.patch +Patch3307: 0177-LoongArch-Expand-some-SImode-operations-through-si3_.patch +Patch3308: 0178-LoongArch-Relax-ins_zero_bitmask_operand-and-remove-.patch +Patch3309: 0179-LoongArch-Rework-bswap-hi-si-di-2-definition.patch +Patch3310: 0180-testsuite-fix-dg-do-preprocess-typo.patch +Patch3311: 0181-LoongArch-Remove-gawk-extension-from-a-generator-scr.patch +Patch3312: 0182-LoongArch-Use-iorn-and-andn-standard-pattern-names.patch +Patch3313: 0183-LoongArch-Drop-vcond-u-expanders.patch +Patch3314: 0184-LoongArch-Provide-ashr-lshr-and-ashl-RTL-pattern-for.patch +Patch3315: 0185-LoongArch-Implement-scalar-isinf-isnormal-and-isfini.patch +Patch3316: 0186-LoongArch-Add-support-to-annotate-tablejump.patch +Patch3317: 0187-LoongArch-Fix-up-r15-4130.patch +Patch3318: 0188-libphobos-Update-build-scripts-for-LoongArch64.patch +Patch3319: 0189-LoongArch-fix-building-errors.patch +Patch3320: 0190-tree-optimization-110702-avoid-zero-based-memory-ref.patch +Patch3321: 0191-LoongArch-Change-OSDIR-for-distribution.patch +Patch3322: Fix-indentation-and-numbering-errors.diff %endif # On ARM EABI systems, we do want -gnueabi to be part of the @@ -827,225 +1207,609 @@ %prep %setup -q -n gcc-12.3.0 -%patch1 -p1 -%patch2 -p1 -%patch3 -p1 -%patch4 -p1 -%patch6 -p1 -%patch7 -p1 -%patch8 -p1 -%patch9 -p1 -%patch10 -p1 -%patch11 -p1 -%patch12 -p1 -%patch13 -p1 -%patch14 -p1 -%patch15 -p1 -%patch16 -p1 -%patch17 -p1 -%patch18 -p1 -%patch19 -p1 -%patch20 -p1 -%patch21 -p1 -%patch22 -p1 -%patch23 -p1 -%patch24 -p1 -%patch25 -p1 -%patch26 -p1 -%patch27 -p1 -%patch28 -p1 -%patch29 -p1 -%patch30 -p1 -%patch31 -p1 -%patch32 -p1 -%patch33 -p1 -%patch34 -p1 -%patch35 -p1 -%patch36 -p1 -%patch37 -p1 -%patch38 -p1 -%patch39 -p1 -%patch40 -p1 -%patch41 -p1 -%patch42 -p1 -%patch43 -p1 -%patch44 -p1 -%patch45 -p1 -%patch46 -p1 -%patch47 -p1 -%patch48 -p1 -%patch49 -p1 -%patch50 -p1 -%patch51 -p1 -%patch52 -p1 -%patch53 -p1 -%patch54 -p1 -%patch55 -p1 -%patch56 -p1 -%patch57 -p1 -%patch86 -p1 -%patch87 -p1 -%patch88 -p1 -%patch89 -p1 -%patch90 -p1 -%patch91 -p1 -%patch92 -p1 -%patch93 -p1 -%patch94 -p1 -%patch95 -p1 -%patch96 -p1 -%patch97 -p1 -%patch98 -p1 -%patch99 -p1 -%patch100 -p1 -%patch101 -p1 -%patch102 -p1 -%patch103 -p1 -%patch104 -p1 -%patch105 -p1 -%patch106 -p1 -%patch107 -p1 -%patch108 -p1 -%patch109 -p1 -%patch110 -p1 -%patch111 -p1 -%patch112 -p1 -%patch113 -p1 -%patch114 -p1 -%patch115 -p1 -%patch116 -p1 -%patch117 -p1 +%patch -P1 -p1 +%patch -P2 -p1 +%patch -P3 -p1 +%patch -P4 -p1 +%patch -P6 -p1 +%patch -P7 -p1 +%patch -P8 -p1 +%patch -P9 -p1 +%patch -P10 -p1 +%patch -P11 -p1 +%patch -P12 -p1 +%patch -P13 -p1 +%patch -P14 -p1 +%patch -P15 -p1 +%patch -P16 -p1 +%patch -P17 -p1 +%patch -P18 -p1 +%patch -P19 -p1 +%patch -P20 -p1 +%patch -P21 -p1 +%patch -P22 -p1 +%patch -P23 -p1 +%patch -P24 -p1 +%patch -P25 -p1 +%patch -P26 -p1 +%patch -P27 -p1 +%patch -P28 -p1 +%patch -P29 -p1 +%patch -P30 -p1 +%patch -P31 -p1 +%patch -P32 -p1 +%patch -P33 -p1 +%patch -P34 -p1 +%patch -P35 -p1 +%patch -P36 -p1 +%patch -P37 -p1 +%patch -P38 -p1 +%patch -P39 -p1 +%patch -P40 -p1 +%patch -P41 -p1 +%patch -P42 -p1 +%patch -P43 -p1 +%patch -P44 -p1 +%patch -P45 -p1 +%patch -P46 -p1 +%patch -P47 -p1 +%patch -P48 -p1 +%patch -P49 -p1 +%patch -P50 -p1 +%patch -P51 -p1 +%patch -P52 -p1 +%patch -P53 -p1 +%patch -P54 -p1 +%patch -P55 -p1 +%patch -P56 -p1 +%patch -P57 -p1 +%patch -P86 -p1 +%patch -P87 -p1 +%patch -P88 -p1 +%patch -P89 -p1 +%patch -P90 -p1 +%patch -P91 -p1 +%patch -P92 -p1 +%patch -P93 -p1 +%patch -P94 -p1 +%patch -P95 -p1 +%patch -P96 -p1 +%patch -P97 -p1 +%patch -P98 -p1 +%patch -P99 -p1 +%patch -P100 -p1 +%patch -P101 -p1 +%patch -P102 -p1 +%patch -P103 -p1 +%patch -P104 -p1 +%patch -P105 -p1 +%patch -P106 -p1 +%patch -P107 -p1 +%patch -P108 -p1 +%patch -P109 -p1 +%patch -P110 -p1 +%patch -P111 -p1 +%patch -P112 -p1 +%patch -P113 -p1 +%patch -P114 -p1 +%patch -P115 -p1 +%patch -P116 -p1 +%patch -P117 -p1 +%patch -P118 -p1 +%patch -P119 -p1 +%patch -P120 -p1 +%patch -P121 -p1 +%patch -P122 -p1 +%patch -P123 -p1 +%patch -P124 -p1 +%patch -P125 -p1 +%patch -P126 -p1 +%patch -P127 -p1 +%patch -P128 -p1 +%patch -P129 -p1 +%patch -P130 -p1 +%patch -P131 -p1 +%patch -P132 -p1 +%patch -P133 -p1 +%patch -P134 -p1 +%patch -P135 -p1 +%patch -P136 -p1 +%patch -P137 -p1 +%patch -P138 -p1 +%patch -P139 -p1 +%patch -P140 -p1 +%patch -P141 -p1 +%patch -P142 -p1 +%patch -P143 -p1 +%patch -P144 -p1 +%patch -P145 -p1 +%patch -P146 -p1 +%patch -P147 -p1 +%patch -P148 -p1 +%patch -P149 -p1 +%patch -P150 -p1 +%patch -P151 -p1 +%patch -P152 -p1 +%patch -P153 -p1 +%patch -P154 -p1 +%patch -P155 -p1 +%patch -P156 -p1 +%patch -P157 -p1 +%patch -P158 -p1 +%patch -P159 -p1 +%patch -P160 -p1 +%patch -P161 -p1 +%patch -P162 -p1 +%patch -P163 -p1 +%patch -P164 -p1 +%patch -P165 -p1 +%patch -P166 -p1 +%patch -P167 -p1 +%patch -P168 -p1 +%patch -P169 -p1 +%patch -P170 -p1 +%patch -P171 -p1 +%patch -P172 -p1 +%patch -P173 -p1 +%patch -P174 -p1 +%patch -P175 -p1 +%patch -P176 -p1 +%patch -P177 -p1 +%patch -P178 -p1 +%patch -P179 -p1 +%patch -P180 -p1 +%patch -P181 -p1 +%patch -P182 -p1 +%patch -P183 -p1 +%patch -P184 -p1 +%patch -P185 -p1 +%patch -P186 -p1 +%patch -P187 -p1 +%patch -P188 -p1 +%patch -P189 -p1 +%patch -P190 -p1 +%patch -P191 -p1 +%patch -P192 -p1 +%patch -P193 -p1 +%patch -P194 -p1 +%patch -P195 -p1 +%patch -P196 -p1 +%patch -P197 -p1 +%patch -P198 -p1 +%patch -P199 -p1 +%patch -P200 -p1 +%patch -P201 -p1 +%patch -P202 -p1 +%patch -P203 -p1 +%patch -P204 -p1 +%patch -P205 -p1 +%patch -P206 -p1 +%patch -P207 -p1 +%patch -P208 -p1 +%patch -P209 -p1 +%patch -P210 -p1 +%patch -P211 -p1 +%patch -P212 -p1 +%patch -P213 -p1 +%patch -P214 -p1 +%patch -P215 -p1 +%patch -P216 -p1 +%patch -P217 -p1 +%patch -P218 -p1 +%patch -P219 -p1 +%patch -P220 -p1 +%patch -P221 -p1 +%patch -P222 -p1 +%patch -P223 -p1 +%patch -P224 -p1 +%patch -P225 -p1 +%patch -P226 -p1 +%patch -P227 -p1 +%patch -P228 -p1 +%patch -P229 -p1 +%patch -P230 -p1 +%patch -P231 -p1 +%patch -P232 -p1 +%patch -P233 -p1 +%patch -P234 -p1 +%patch -P235 -p1 +%patch -P236 -p1 +%patch -P237 -p1 +%patch -P238 -p1 +%patch -P239 -p1 +%patch -P240 -p1 +%patch -P241 -p1 +%patch -P242 -p1 +%patch -P243 -p1 +%patch -P244 -p1 +%patch -P245 -p1 +%patch -P246 -p1 +%patch -P247 -p1 +%patch -P248 -p1 +%patch -P249 -p1 +%patch -P250 -p1 +%patch -P251 -p1 +%patch -P252 -p1 +%patch -P253 -p1 +%patch -P254 -p1 +%patch -P255 -p1 +%patch -P256 -p1 +%patch -P257 -p1 +%patch -P258 -p1 +%patch -P259 -p1 +%patch -P260 -p1 +%patch -P261 -p1 +%patch -P262 -p1 +%patch -P263 -p1 +%patch -P264 -p1 +%patch -P265 -p1 +%patch -P266 -p1 +%patch -P267 -p1 +%patch -P268 -p1 +%patch -P269 -p1 +%patch -P270 -p1 +%patch -P271 -p1 +%patch -P272 -p1 +%patch -P273 -p1 +%patch -P274 -p1 +%patch -P275 -p1 +%patch -P276 -p1 +%patch -P277 -p1 +%patch -P278 -p1 +%patch -P279 -p1 +%patch -P280 -p1 +%patch -P281 -p1 +%patch -P282 -p1 +%patch -P283 -p1 +%patch -P284 -p1 +%patch -P285 -p1 +%patch -P286 -p1 +%patch -P287 -p1 +%patch -P288 -p1 +%patch -P289 -p1 +%patch -P290 -p1 +%patch -P291 -p1 +%patch -P292 -p1 +%patch -P293 -p1 +%patch -P294 -p1 +%patch -P295 -p1 +%patch -P296 -p1 +%patch -P297 -p1 +%patch -P298 -p1 +%patch -P299 -p1 +%patch -P300 -p1 +%patch -P301 -p1 +%patch -P302 -p1 +%patch -P303 -p1 +%patch -P304 -p1 +%patch -P305 -p1 +%patch -P306 -p1 +%patch -P307 -p1 +%patch -P308 -p1 %ifarch loongarch64 -%patch3001 -p1 -%patch3002 -p1 -%patch3003 -p1 -%patch3004 -p1 -%patch3005 -p1 -%patch3006 -p1 -%patch3007 -p1 -%patch3008 -p1 -%patch3009 -p1 -%patch3010 -p1 -%patch3011 -p1 -%patch3012 -p1 -%patch3013 -p1 -%patch3014 -p1 -%patch3015 -p1 -%patch3016 -p1 -%patch3017 -p1 -%patch3018 -p1 -%patch3019 -p1 -%patch3020 -p1 -%patch3021 -p1 -%patch3022 -p1 -%patch3023 -p1 -%patch3024 -p1 -%patch3025 -p1 -%patch3026 -p1 -%patch3027 -p1 -%patch3028 -p1 -%patch3029 -p1 -%patch3030 -p1 -%patch3031 -p1 -%patch3032 -p1 -%patch3033 -p1 -%patch3034 -p1 -%patch3035 -p1 -%patch3036 -p1 -%patch3037 -p1 -%patch3038 -p1 -%patch3039 -p1 -%patch3040 -p1 -%patch3041 -p1 -%patch3042 -p1 -%patch3043 -p1 -%patch3044 -p1 -%patch3045 -p1 -%patch3046 -p1 -%patch3047 -p1 -%patch3048 -p1 -%patch3049 -p1 -%patch3050 -p1 -%patch3051 -p1 -%patch3052 -p1 -%patch3053 -p1 -%patch3054 -p1 -%patch3056 -p1 -%patch3057 -p1 -%patch3058 -p1 -%patch3059 -p1 -%patch3060 -p1 -%patch3061 -p1 -%patch3062 -p1 -%patch3063 -p1 -%patch3064 -p1 -%patch3065 -p1 -%patch3066 -p1 -%patch3067 -p1 -%patch3068 -p1 -%patch3069 -p1 -%patch3070 -p1 -%patch3071 -p1 -%patch3072 -p1 -%patch3073 -p1 -%patch3074 -p1 -%patch3075 -p1 -%patch3076 -p1 -%patch3077 -p1 -%patch3078 -p1 -%patch3079 -p1 -%patch3080 -p1 -%patch3081 -p1 -%patch3082 -p1 -%patch3083 -p1 -%patch3084 -p1 -%patch3085 -p1 -%patch3086 -p1 -%patch3087 -p1 -%patch3088 -p1 -%patch3089 -p1 -%patch3090 -p1 -%patch3091 -p1 -%patch3092 -p1 -%patch3093 -p1 -%patch3094 -p1 -%patch3095 -p1 -%patch3096 -p1 -%patch3097 -p1 -%patch3098 -p1 -%patch3099 -p1 -%patch3100 -p1 -%patch3101 -p1 -%patch3102 -p1 -%patch3103 -p1 -%patch3104 -p1 -%patch3105 -p1 -%patch3106 -p1 -%patch3107 -p1 -%patch3108 -p1 -%patch3109 -p1 -%patch3110 -p1 -%patch3111 -p1 -%patch3112 -p1 -%patch3113 -p1 -%patch3114 -p1 -%patch3115 -p1 -%patch3116 -p1 -%patch3117 -p1 -%patch3118 -p1 -%patch3119 -p1 -%patch3120 -p1 -%patch3121 -p1 -%patch3122 -p1 -%patch3123 -p1 -%patch3124 -p1 -%patch3125 -p1 -%patch3126 -p1 -%patch3127 -p1 -%patch3128 -p1 -%patch3129 -p1 -%patch3130 -p1 +%patch -P3001 -p1 +%patch -P3002 -p1 +%patch -P3003 -p1 +%patch -P3004 -p1 +%patch -P3005 -p1 +%patch -P3006 -p1 +%patch -P3007 -p1 +%patch -P3008 -p1 +%patch -P3009 -p1 +%patch -P3010 -p1 +%patch -P3011 -p1 +%patch -P3012 -p1 +%patch -P3013 -p1 +%patch -P3014 -p1 +%patch -P3015 -p1 +%patch -P3016 -p1 +%patch -P3017 -p1 +%patch -P3018 -p1 +%patch -P3019 -p1 +%patch -P3020 -p1 +%patch -P3021 -p1 +%patch -P3022 -p1 +%patch -P3023 -p1 +%patch -P3024 -p1 +%patch -P3025 -p1 +%patch -P3026 -p1 +%patch -P3027 -p1 +%patch -P3028 -p1 +%patch -P3029 -p1 +%patch -P3030 -p1 +%patch -P3031 -p1 +%patch -P3032 -p1 +%patch -P3033 -p1 +%patch -P3034 -p1 +%patch -P3035 -p1 +%patch -P3036 -p1 +%patch -P3037 -p1 +%patch -P3038 -p1 +%patch -P3039 -p1 +%patch -P3040 -p1 +%patch -P3041 -p1 +%patch -P3042 -p1 +%patch -P3043 -p1 +%patch -P3044 -p1 +%patch -P3045 -p1 +%patch -P3046 -p1 +%patch -P3047 -p1 +%patch -P3048 -p1 +%patch -P3049 -p1 +%patch -P3050 -p1 +%patch -P3051 -p1 +%patch -P3052 -p1 +%patch -P3053 -p1 +%patch -P3054 -p1 +%patch -P3056 -p1 +%patch -P3057 -p1 +%patch -P3058 -p1 +%patch -P3059 -p1 +%patch -P3060 -p1 +%patch -P3061 -p1 +%patch -P3062 -p1 +%patch -P3063 -p1 +%patch -P3064 -p1 +%patch -P3065 -p1 +%patch -P3066 -p1 +%patch -P3067 -p1 +%patch -P3068 -p1 +%patch -P3069 -p1 +%patch -P3070 -p1 +%patch -P3071 -p1 +%patch -P3072 -p1 +%patch -P3073 -p1 +%patch -P3074 -p1 +%patch -P3075 -p1 +%patch -P3076 -p1 +%patch -P3077 -p1 +%patch -P3078 -p1 +%patch -P3079 -p1 +%patch -P3080 -p1 +%patch -P3081 -p1 +%patch -P3082 -p1 +%patch -P3083 -p1 +%patch -P3084 -p1 +%patch -P3085 -p1 +%patch -P3086 -p1 +%patch -P3087 -p1 +%patch -P3088 -p1 +%patch -P3089 -p1 +%patch -P3090 -p1 +%patch -P3091 -p1 +%patch -P3092 -p1 +%patch -P3093 -p1 +%patch -P3094 -p1 +%patch -P3095 -p1 +%patch -P3096 -p1 +%patch -P3097 -p1 +%patch -P3098 -p1 +%patch -P3099 -p1 +%patch -P3100 -p1 +%patch -P3101 -p1 +%patch -P3102 -p1 +%patch -P3103 -p1 +%patch -P3104 -p1 +%patch -P3105 -p1 +%patch -P3106 -p1 +%patch -P3107 -p1 +%patch -P3108 -p1 +%patch -P3109 -p1 +%patch -P3110 -p1 +%patch -P3111 -p1 +%patch -P3112 -p1 +%patch -P3113 -p1 +%patch -P3114 -p1 +%patch -P3115 -p1 +%patch -P3116 -p1 +%patch -P3117 -p1 +%patch -P3118 -p1 +%patch -P3119 -p1 +%patch -P3120 -p1 +%patch -P3121 -p1 +%patch -P3122 -p1 +%patch -P3123 -p1 +%patch -P3124 -p1 +%patch -P3125 -p1 +%patch -P3126 -p1 +%patch -P3127 -p1 +%patch -P3128 -p1 +%patch -P3129 -p1 +%patch -P3130 -p1 +#-- +%patch -P3131 -p1 +%patch -P3132 -p1 +%patch -P3133 -p1 +%patch -P3134 -p1 +%patch -P3135 -p1 +%patch -P3136 -p1 +%patch -P3137 -p1 +%patch -P3138 -p1 +%patch -P3139 -p1 +%patch -P3140 -p1 +%patch -P3141 -p1 +%patch -P3142 -p1 +%patch -P3143 -p1 +%patch -P3144 -p1 +%patch -P3145 -p1 +%patch -P3146 -p1 +%patch -P3147 -p1 +%patch -P3148 -p1 +%patch -P3149 -p1 +%patch -P3150 -p1 +%patch -P3151 -p1 +%patch -P3152 -p1 +%patch -P3153 -p1 +%patch -P3154 -p1 +%patch -P3155 -p1 +%patch -P3156 -p1 +%patch -P3157 -p1 +%patch -P3158 -p1 +%patch -P3159 -p1 +%patch -P3160 -p1 +%patch -P3161 -p1 +%patch -P3162 -p1 +%patch -P3163 -p1 +%patch -P3164 -p1 +%patch -P3165 -p1 +%patch -P3166 -p1 +%patch -P3167 -p1 +%patch -P3168 -p1 +%patch -P3169 -p1 +%patch -P3170 -p1 +%patch -P3171 -p1 +%patch -P3172 -p1 +%patch -P3173 -p1 +%patch -P3174 -p1 +%patch -P3175 -p1 +%patch -P3176 -p1 +%patch -P3177 -p1 +%patch -P3178 -p1 +%patch -P3179 -p1 +%patch -P3180 -p1 +%patch -P3181 -p1 +%patch -P3182 -p1 +%patch -P3183 -p1 +%patch -P3184 -p1 +%patch -P3185 -p1 +%patch -P3186 -p1 +%patch -P3187 -p1 +%patch -P3188 -p1 +%patch -P3189 -p1 +%patch -P3190 -p1 +%patch -P3191 -p1 +%patch -P3192 -p1 +%patch -P3193 -p1 +%patch -P3194 -p1 +%patch -P3195 -p1 +%patch -P3196 -p1 +%patch -P3197 -p1 +%patch -P3198 -p1 +%patch -P3199 -p1 +%patch -P3200 -p1 +%patch -P3201 -p1 +%patch -P3202 -p1 +%patch -P3203 -p1 +%patch -P3204 -p1 +%patch -P3205 -p1 +%patch -P3206 -p1 +%patch -P3207 -p1 +%patch -P3208 -p1 +%patch -P3209 -p1 +%patch -P3210 -p1 +%patch -P3211 -p1 +%patch -P3212 -p1 +%patch -P3213 -p1 +%patch -P3214 -p1 +%patch -P3215 -p1 +%patch -P3216 -p1 +%patch -P3217 -p1 +%patch -P3218 -p1 +%patch -P3219 -p1 +%patch -P3220 -p1 +%patch -P3221 -p1 +%patch -P3222 -p1 +%patch -P3223 -p1 +%patch -P3224 -p1 +%patch -P3225 -p1 +%patch -P3226 -p1 +%patch -P3227 -p1 +%patch -P3228 -p1 +%patch -P3229 -p1 +%patch -P3230 -p1 +%patch -P3231 -p1 +%patch -P3232 -p1 +%patch -P3233 -p1 +%patch -P3234 -p1 +%patch -P3235 -p1 +%patch -P3236 -p1 +%patch -P3237 -p1 +%patch -P3238 -p1 +%patch -P3239 -p1 +%patch -P3240 -p1 +%patch -P3241 -p1 +%patch -P3242 -p1 +%patch -P3243 -p1 +%patch -P3244 -p1 +%patch -P3245 -p1 +%patch -P3246 -p1 +%patch -P3247 -p1 +%patch -P3248 -p1 +%patch -P3249 -p1 +%patch -P3250 -p1 +%patch -P3251 -p1 +%patch -P3252 -p1 +%patch -P3253 -p1 +%patch -P3254 -p1 +%patch -P3255 -p1 +%patch -P3256 -p1 +%patch -P3257 -p1 +%patch -P3258 -p1 +%patch -P3259 -p1 +%patch -P3260 -p1 +%patch -P3261 -p1 +%patch -P3262 -p1 +%patch -P3263 -p1 +%patch -P3264 -p1 +%patch -P3265 -p1 +%patch -P3266 -p1 +%patch -P3267 -p1 +%patch -P3268 -p1 +%patch -P3269 -p1 +%patch -P3270 -p1 +%patch -P3271 -p1 +%patch -P3272 -p1 +%patch -P3273 -p1 +%patch -P3274 -p1 +%patch -P3275 -p1 +%patch -P3276 -p1 +%patch -P3277 -p1 +%patch -P3278 -p1 +%patch -P3279 -p1 +%patch -P3280 -p1 +%patch -P3281 -p1 +%patch -P3282 -p1 +%patch -P3283 -p1 +%patch -P3284 -p1 +%patch -P3285 -p1 +%patch -P3286 -p1 +%patch -P3287 -p1 +%patch -P3288 -p1 +%patch -P3289 -p1 +%patch -P3290 -p1 +%patch -P3291 -p1 +%patch -P3292 -p1 +%patch -P3293 -p1 +%patch -P3294 -p1 +%patch -P3295 -p1 +%patch -P3296 -p1 +%patch -P3297 -p1 +%patch -P3298 -p1 +%patch -P3299 -p1 +%patch -P3300 -p1 +%patch -P3301 -p1 +%patch -P3302 -p1 +%patch -P3303 -p1 +%patch -P3304 -p1 +%patch -P3305 -p1 +%patch -P3306 -p1 +%patch -P3307 -p1 +%patch -P3308 -p1 +%patch -P3309 -p1 +%patch -P3310 -p1 +%patch -P3311 -p1 +%patch -P3312 -p1 +%patch -P3313 -p1 +%patch -P3314 -p1 +%patch -P3315 -p1 +%patch -P3316 -p1 +%patch -P3317 -p1 +%patch -P3318 -p1 +%patch -P3319 -p1 +%patch -P3320 -p1 +%patch -P3321 -p1 +%patch -P3322 -p1 %endif echo '%{_vendor} %{version}-%{release}' > gcc/DEV-PHASE @@ -2435,10 +3199,13 @@ %{_prefix}/lib/gcc/%{gcc_target_platform}/%{gcc_major}/include/amxtileintrin.h %{_prefix}/lib/gcc/%{gcc_target_platform}/%{gcc_major}/include/amxint8intrin.h %{_prefix}/lib/gcc/%{gcc_target_platform}/%{gcc_major}/include/amxbf16intrin.h +%{_prefix}/lib/gcc/%{gcc_target_platform}/%{gcc_major}/include/amxfp16intrin.h +%{_prefix}/lib/gcc/%{gcc_target_platform}/%{gcc_major}/include/amxcomplexintrin.h %{_prefix}/lib/gcc/%{gcc_target_platform}/%{gcc_major}/include/x86gprintrin.h %{_prefix}/lib/gcc/%{gcc_target_platform}/%{gcc_major}/include/uintrintrin.h %{_prefix}/lib/gcc/%{gcc_target_platform}/%{gcc_major}/include/hresetintrin.h %{_prefix}/lib/gcc/%{gcc_target_platform}/%{gcc_major}/include/keylockerintrin.h +%{_prefix}/lib/gcc/%{gcc_target_platform}/%{gcc_major}/include/prfchiintrin.h %{_prefix}/lib/gcc/%{gcc_target_platform}/%{gcc_major}/include/avxvnniintrin.h %{_prefix}/lib/gcc/%{gcc_target_platform}/%{gcc_major}/include/mwaitintrin.h %{_prefix}/lib/gcc/%{gcc_target_platform}/%{gcc_major}/include/avx512fp16intrin.h @@ -2487,6 +3254,7 @@ %{_prefix}/lib/gcc/%{gcc_target_platform}/%{gcc_major}/include/arm_fp16.h %{_prefix}/lib/gcc/%{gcc_target_platform}/%{gcc_major}/include/arm_bf16.h %{_prefix}/lib/gcc/%{gcc_target_platform}/%{gcc_major}/include/arm_sve.h +%{_prefix}/lib/gcc/%{gcc_target_platform}/%{gcc_major}/include/arm_sme.h %endif %ifarch loongarch64 %{_prefix}/lib/gcc/%{gcc_target_platform}/%{gcc_major}/include/larchintrin.h @@ -3310,6 +4078,80 @@ %doc rpm.doc/changelogs/libcc1/ChangeLog* %changelog +* Mon Nov 25 2024 Peng Fan <fanpeng@loongson.cn> - 12.3.1-45 +- Type: BUGFIX +- DESC: +- Fix indentation and numbering errors in makefile. + +* Thu Nov 21 2024 jchzhou <zhoujiacheng@iscas.ac.cn> - 12.3.1-44 +- Type: Sync +- DESC: Sync patches for fixing building issues with clang +- Source: https://gitee.com/openeuler/gcc/pulls/239 + +* Thu Nov 21 2024 YunQiang Su <yunqiang@isrc.iscas.ac.cn> - 12.3.1-43 +- Type: Sync +- DESC: RISC-V: Install libstdc++/libcc1 etc to /lib64 instead of lib + +* Thu Nov 21 2024 Feiyang Liu <liufeiyang6@huawei.com> - 12.3.1-42 +- Type:bugfix +- ID:NA +- SUG:NA +- DESC:Sync backport patch varasm-Handle-private-COMDAT from upstream. + +* Thu Nov 21 2024 liyancheng <412998149@qq.com> - 12.3.1-41 +- Type:Sync +- ID:NA +- SUG:NA +- DESC:Multi-version lto symbol parse and lto units ipa-inline extension + +* Thu Nov 21 2024 liyancheng <412998149@qq.com> - 12.3.1-40 +- Type:bugfix +- ID:NA +- SUG:NA +- DESC:Add missing header file for x86 + +* Thu Nov 21 2024 huangzifeng <huangzifeng6@huawei.com> - 12.3.1-39 +- Type:Sync +- ID:NA +- SUG:NA +- DESC:Sync patches from openeuler/gcc + +* Thu Nov 21 2024 huangzifeng <huangzifeng6@huawei.com> - 12.3.1-38 +- Type:Sync +- ID:NA +- SUG:NA +- DESC:Sync patches from branch openEuler-24.09 + +* Wed Nov 20 2024 Hu,Lin1 <lin1.hu@inte.com> - 12.3.1-37 +- Type:Sync +- ID:NA +- SUG:NA +- DESC: Sync some patch from src-openEuler/gcc's openeuler-24.09 + +* Tue Nov 19 2024 eastb233 <xiezhiheng@huawei.com> - 12.3.1-36 +- Type:Sync +- ID:NA +- SUG:NA +- DESC:Recover CVE-2023-4039 + +* Mon Nov 18 2024 eastb233 <xiezhiheng@huawei.com> - 12.3.1-35 +- Type:Sync +- ID:NA +- SUG:NA +- DESC:Apply SME patches + +* Mon Nov 18 2024 eastb233 <xiezhiheng@huawei.com> - 12.3.1-34 +- Type:Revert +- ID:NA +- SUG:NA +- DESC:Revert CVE-2023-4039 to apply SME patches + +* Tue Nov 5 2024 Peng Fan <fanpeng@loongson.cn> - 12.3.1-33 +- Type: Sync +- DESC: +- LoongArch: Sync patch from upstream +- Tweaks OSDIR are consistent with most other distributions. + * Fri Sep 20 2024 fuanan <fuanan3@h-partners.com> - 12.3.1-32 - Type:Sync - ID:NA
View file
_service:tar_scm:0001-LoongArch-Reimplement-multilib-build-option-handling.patch
Added
@@ -0,0 +1,464 @@ +From d394a9ac68674b40e0d2b436c09e23dd29d8b5d0 Mon Sep 17 00:00:00 2001 +From: Yang Yujie <yangyujie@loongson.cn> +Date: Wed, 13 Sep 2023 17:52:14 +0800 +Subject: PATCH 001/188 LoongArch: Reimplement multilib build option + handling. + +Library build options from --with-multilib-list used to be processed with +*self_spec, which missed the driver's initial canonicalization. This +caused limitations on CFLAGS override and the use of driver-only options +like -mno-lsx. + +The problem is solved by promoting the injection rules of --with-multilib-list +options to the first element of DRIVER_SELF_SPECS, to make them execute before +the canonialization. The library-build options are also hard-coded in +the driver and can be used conveniently by the builders of other non-gcc +libraries via the use of -fmultiflags. + +Bootstrapped and tested on loongarch64-linux-gnu. + +ChangeLog: + + * config-ml.in: Remove unneeded loongarch clause. + * configure.ac: Register custom makefile fragments mt-loongarch-* + for loongarch targets. + * configure: Regenerate. + +config/ChangeLog: + + * mt-loongarch-mlib: New file. Pass -fmultiflags when building + target libraries (FLAGS_FOR_TARGET). + * mt-loongarch-elf: New file. + * mt-loongarch-gnu: New file. + +gcc/ChangeLog: + + * config.gcc: Pass the default ABI via TM_MULTILIB_CONFIG. + * config/loongarch/loongarch-driver.h: Invoke MLIB_SELF_SPECS + before the driver canonicalization routines. + * config/loongarch/loongarch.h: Move definitions of CC1_SPEC etc. + to loongarch-driver.h + * config/loongarch/t-linux: Move multilib-related definitions to + t-multilib. + * config/loongarch/t-multilib: New file. Inject library build + options obtained from --with-multilib-list. + * config/loongarch/t-loongarch: Same. +--- + config-ml.in | 10 ---- + config/mt-loongarch-elf | 1 + + config/mt-loongarch-gnu | 2 + + config/mt-loongarch-mlib | 1 + + configure | 6 +++ + configure.ac | 6 +++ + gcc/config.gcc | 6 +-- + gcc/config/loongarch/loongarch-driver.h | 42 +++++++++++++++ + gcc/config/loongarch/loongarch.h | 50 ------------------ + gcc/config/loongarch/t-linux | 66 +++--------------------- + gcc/config/loongarch/t-loongarch | 2 +- + gcc/config/loongarch/t-multilib | 68 +++++++++++++++++++++++++ + 12 files changed, 137 insertions(+), 123 deletions(-) + create mode 100644 config/mt-loongarch-elf + create mode 100644 config/mt-loongarch-gnu + create mode 100644 config/mt-loongarch-mlib + create mode 100644 gcc/config/loongarch/t-multilib + +diff --git a/config-ml.in b/config-ml.in +index ad0db0781..68854a4f1 100644 +--- a/config-ml.in ++++ b/config-ml.in +@@ -301,16 +301,6 @@ arm-*-*) + done + fi + ;; +-loongarch*-*) +- old_multidirs="${multidirs}" +- multidirs="" +- for x in ${old_multidirs}; do +- case "$x" in +- `${CC-gcc} --print-multi-directory`) : ;; +- *) multidirs="${multidirs} ${x}" ;; +- esac +- done +- ;; + m68*-*-*) + if x$enable_softfloat = xno + then +diff --git a/config/mt-loongarch-elf b/config/mt-loongarch-elf +new file mode 100644 +index 000000000..bbf29bb57 +--- /dev/null ++++ b/config/mt-loongarch-elf +@@ -0,0 +1 @@ ++include $(srcdir)/config/mt-loongarch-mlib +diff --git a/config/mt-loongarch-gnu b/config/mt-loongarch-gnu +new file mode 100644 +index 000000000..dfefb44ed +--- /dev/null ++++ b/config/mt-loongarch-gnu +@@ -0,0 +1,2 @@ ++include $(srcdir)/config/mt-gnu ++include $(srcdir)/config/mt-loongarch-mlib +diff --git a/config/mt-loongarch-mlib b/config/mt-loongarch-mlib +new file mode 100644 +index 000000000..4cfe568f1 +--- /dev/null ++++ b/config/mt-loongarch-mlib +@@ -0,0 +1 @@ ++FLAGS_FOR_TARGET += -fmultiflags +diff --git a/configure b/configure +index aff62c464..81b4a3cec 100755 +--- a/configure ++++ b/configure +@@ -9548,6 +9548,12 @@ case "${target}" in + spu-*-*) + target_makefile_frag="config/mt-spu" + ;; ++ loongarch*-*linux* | loongarch*-*gnu*) ++ target_makefile_frag="config/mt-loongarch-gnu" ++ ;; ++ loongarch*-*elf*) ++ target_makefile_frag="config/mt-loongarch-elf" ++ ;; + mips*-sde-elf* | mips*-mti-elf* | mips*-img-elf*) + target_makefile_frag="config/mt-sde" + ;; +diff --git a/configure.ac b/configure.ac +index f310d75ca..9f8dbd319 100644 +--- a/configure.ac ++++ b/configure.ac +@@ -2729,6 +2729,12 @@ case "${target}" in + spu-*-*) + target_makefile_frag="config/mt-spu" + ;; ++ loongarch*-*linux* | loongarch*-*gnu*) ++ target_makefile_frag="config/mt-loongarch-gnu" ++ ;; ++ loongarch*-*elf*) ++ target_makefile_frag="config/mt-loongarch-elf" ++ ;; + mips*-sde-elf* | mips*-mti-elf* | mips*-img-elf*) + target_makefile_frag="config/mt-sde" + ;; +diff --git a/gcc/config.gcc b/gcc/config.gcc +index 3f870e966..e34a5fbb9 100644 +--- a/gcc/config.gcc ++++ b/gcc/config.gcc +@@ -2510,7 +2510,7 @@ loongarch*-*-linux*) + tm_file="elfos.h gnu-user.h linux.h linux-android.h glibc-stdint.h ${tm_file}" + tm_file="${tm_file} loongarch/gnu-user.h loongarch/linux.h" + extra_options="${extra_options} linux-android.opt" +- tmake_file="${tmake_file} loongarch/t-linux" ++ tmake_file="${tmake_file} loongarch/t-multilib loongarch/t-linux" + gnu_ld=yes + gas=yes + +@@ -2522,7 +2522,7 @@ loongarch*-*-linux*) + loongarch*-*-elf*) + tm_file="elfos.h newlib-stdint.h ${tm_file}" + tm_file="${tm_file} loongarch/elf.h loongarch/linux.h" +- tmake_file="${tmake_file} loongarch/t-linux" ++ tmake_file="${tmake_file} loongarch/t-multilib loongarch/t-linux" + gnu_ld=yes + gas=yes + +@@ -5241,7 +5241,7 @@ case "${target}" in + loongarch_multilib_list_sane=no + + # This one goes to TM_MULTILIB_CONFIG, for use in t-linux. +- loongarch_multilib_list_make="" ++ loongarch_multilib_list_make="${abi_base}," + + # This one goes to tm_defines, for use in loongarch-driver.c. + loongarch_multilib_list_c="" +diff --git a/gcc/config/loongarch/loongarch-driver.h b/gcc/config/loongarch/loongarch-driver.h +index 6cfe0efb5..e7d083677 100644 +--- a/gcc/config/loongarch/loongarch-driver.h ++++ b/gcc/config/loongarch/loongarch-driver.h +@@ -23,6 +23,39 @@ along with GCC; see the file COPYING3. If not see + + #include "loongarch-str.h" + ++#ifndef SUBTARGET_CPP_SPEC ++#define SUBTARGET_CPP_SPEC "" ++#endif ++ ++#ifndef SUBTARGET_CC1_SPEC ++#define SUBTARGET_CC1_SPEC "" ++#endif ++ ++#ifndef SUBTARGET_ASM_SPEC ++#define SUBTARGET_ASM_SPEC "" ++#endif ++ ++#define EXTRA_SPECS \ ++ {"early_self_spec", ""}, \ ++ {"subtarget_cc1_spec", SUBTARGET_CC1_SPEC}, \ ++ {"subtarget_cpp_spec", SUBTARGET_CPP_SPEC}, \ ++ {"subtarget_asm_spec", SUBTARGET_ASM_SPEC}, ++ ++ ++#undef CPP_SPEC ++#define CPP_SPEC \ ++ "%(subtarget_cpp_spec)" ++ ++#undef CC1_SPEC ++#define CC1_SPEC \ ++ "%{G*} %{,ada:-gnatea %{mabi=*} -gnatez} " \ ++ "%(subtarget_cc1_spec)" ++ ++#undef ASM_SPEC ++#define ASM_SPEC \ ++ "%{mabi=*} %(subtarget_asm_spec)" ++ ++ + extern const char* + la_driver_init (int argc, const char **argv); + +@@ -45,7 +78,16 @@ driver_get_normalized_m_opts (int argc, const char **argv); + #define LA_SET_PARM_SPEC(NAME) \ + " %{m" OPTSTR_##NAME "=*: %:set_m_parm(" OPTSTR_##NAME " %*)}" \ + ++/* For MLIB_SELF_SPECS. */ ++#include "loongarch-multilib.h" ++ ++#ifndef MLIB_SELF_SPECS ++#define MLIB_SELF_SPECS "" ++#endif ++ + #define DRIVER_HANDLE_MACHINE_OPTIONS \ ++ " %(early_self_spec)", \ ++ MLIB_SELF_SPECS \ + " %:driver_init()" \ + " %{c|S|E|nostdlib: %:set_no_link()}" \ + " %{nostartfiles: %{nodefaultlibs: %:set_no_link()}}" \ +diff --git a/gcc/config/loongarch/loongarch.h b/gcc/config/loongarch/loongarch.h +index c7e91a06d..a443a6427 100644 +--- a/gcc/config/loongarch/loongarch.h ++++ b/gcc/config/loongarch/loongarch.h +@@ -64,56 +64,6 @@ along with GCC; see the file COPYING3. If not see + #define NM_FLAGS "-Bn" + #endif + +-/* SUBTARGET_ASM_SPEC is always passed to the assembler. It may be +- overridden by subtargets. */ +- +-#ifndef SUBTARGET_ASM_SPEC +-#define SUBTARGET_ASM_SPEC "" +-#endif +- +-#undef ASM_SPEC +-#define ASM_SPEC "%{mabi=*} %{subtarget_asm_spec}" +- +-/* Extra switches sometimes passed to the linker. */ +- +-#ifndef LINK_SPEC +-#define LINK_SPEC "" +-#endif /* LINK_SPEC defined */ +- +-/* Specs for the compiler proper. */ +- +-/* CC1_SPEC is the set of arguments to pass to the compiler proper. */ +- +-#undef CC1_SPEC +-#define CC1_SPEC "%{,ada:-gnatea} %{m*} \ +-%{G*} \ +-%(subtarget_cc1_spec) %{,ada:-gnatez}" +- +-/* Preprocessor specs. */ +- +-/* SUBTARGET_CPP_SPEC is passed to the preprocessor. It may be +- overridden by subtargets. */ +-#ifndef SUBTARGET_CPP_SPEC +-#define SUBTARGET_CPP_SPEC "" +-#endif +- +-#define CPP_SPEC "%(subtarget_cpp_spec)" +- +-/* This macro defines names of additional specifications to put in the specs +- that can be used in various specifications like CC1_SPEC. Its definition +- is an initializer with a subgrouping for each command option. +- +- Each subgrouping contains a string constant, that defines the +- specification name, and a string constant that used by the GCC driver +- program. +- +- Do not define this macro if it does not need to do anything. */ +- +-#define EXTRA_SPECS \ +- {"subtarget_cc1_spec", SUBTARGET_CC1_SPEC}, \ +- {"subtarget_cpp_spec", SUBTARGET_CPP_SPEC}, \ +- {"subtarget_asm_spec", SUBTARGET_ASM_SPEC}, +- + /* Registers may have a prefix which can be ignored when matching + user asm and register definitions. */ + #ifndef REGISTER_PREFIX +diff --git a/gcc/config/loongarch/t-linux b/gcc/config/loongarch/t-linux +index 62a870b66..7cd7cde25 100644 +--- a/gcc/config/loongarch/t-linux ++++ b/gcc/config/loongarch/t-linux +@@ -16,68 +16,16 @@ + # along with GCC; see the file COPYING3. If not see + # <http://www.gnu.org/licenses/>. + +-# Multilib +-MULTILIB_OPTIONS = mabi=lp64d/mabi=lp64f/mabi=lp64s +-MULTILIB_DIRNAMES = base/lp64d base/lp64f base/lp64s +- +-# The GCC driver always gets all abi-related options on the command line. +-# (see loongarch-driver.c:driver_get_normalized_m_opts) +-comma=, +-MULTILIB_REQUIRED = $(foreach mlib,$(subst $(comma), ,$(TM_MULTILIB_CONFIG)),\ +- $(firstword $(subst /, ,$(mlib)))) +- +-SPECS = specs.install +- +-# temporary self_spec when building libraries (e.g. libgcc) +-gen_mlib_spec = $(if $(word 2,$1),\ +- %{$(firstword $1):$(patsubst %,-%,$(wordlist 2,$(words $1),$1))}) +- +-# clean up the result of DRIVER_SELF_SPEC to avoid conflict +-lib_build_self_spec = %<march=* %<mtune=* %<mcmodel=* %<mfpu=* %<msimd=* +- +-# append user-specified build options from --with-multilib-list +-lib_build_self_spec += $(foreach mlib,\ +- $(subst $(comma), ,$(TM_MULTILIB_CONFIG)),\ +- $(call gen_mlib_spec,$(subst /, ,$(mlib)))) +- +-specs: specs.install +- sed '/^*self_spec:$$/{ n;s/^$$/$(lib_build_self_spec)/g; }' $< > $@ +- +-# Do some preparation before regression tests: +-# remove lib-build-specs / make symlinks for the toplevel multilib variant +- +-LA_DEFAULT_MULTISUBDIR = $(shell $(GCC_FOR_TARGET) --print-multi-dir) +-.PHONY: remove-lib-specs +-check check-host check-target $(CHECK_TARGETS) $(lang_checks): remove-lib-specs +-remove-lib-specs: +- -mv -f specs.install specs 2>/dev/null +- -mv $(LA_DEFAULT_MULTISUBDIR)/* ./ +- -mkdir -p ../$(target_noncanonical)/`dirname $(LA_DEFAULT_MULTISUBDIR)` +- -$(LN_S) .. ../$(target_noncanonical)/$(LA_DEFAULT_MULTISUBDIR) +- +-# Multiarch +-ifneq ($(call if_multiarch,yes),yes) +- # Define LA_DISABLE_MULTIARCH if multiarch is disabled. +- tm_defines += LA_DISABLE_MULTIARCH +-else +- # Only define MULTIARCH_DIRNAME when multiarch is enabled, +- # or it would always introduce ${target} into the search path. +- MULTIARCH_DIRNAME = $(LA_MULTIARCH_TRIPLET) +-endif ++MULTIOSDIR_lp64d := ../lib64$(call if_multiarch,:loongarch64-linux-gnu) ++MULTIOSDIR_lp64f := ../lib64/f32$(call if_multiarch,:loongarch64-linux-gnuf32) ++MULTIOSDIR_lp64s := ../lib64/sf$(call if_multiarch,:loongarch64-linux-gnusf) + + # Don't define MULTILIB_OSDIRNAMES if multilib is disabled. + ifeq ($(filter LA_DISABLE_MULTILIB,$(tm_defines)),) + +- MULTILIB_OSDIRNAMES = \ +- mabi.lp64d=../lib64$\ +- $(call if_multiarch,:loongarch64-linux-gnu) +- +- MULTILIB_OSDIRNAMES += \ +- mabi.lp64f=../lib64/f32$\ +- $(call if_multiarch,:loongarch64-linux-gnuf32) +- +- MULTILIB_OSDIRNAMES += \ +- mabi.lp64s=../lib64/sf$\ +- $(call if_multiarch,:loongarch64-linux-gnusf) ++ MULTILIB_OSDIRNAMES = .=$(MULTIOSDIR_$(mlib_default)) ++ MULTILIB_OSDIRNAMES += mabi.lp64d=$(MULTIOSDIR_lp64d) ++ MULTILIB_OSDIRNAMES += mabi.lp64f=$(MULTIOSDIR_lp64f) ++ MULTILIB_OSDIRNAMES += mabi.lp64s=$(MULTIOSDIR_lp64s) + + endif +diff --git a/gcc/config/loongarch/t-loongarch b/gcc/config/loongarch/t-loongarch +index e73f4f437..28cfb49df 100644 +--- a/gcc/config/loongarch/t-loongarch ++++ b/gcc/config/loongarch/t-loongarch +@@ -16,7 +16,7 @@ + # along with GCC; see the file COPYING3. If not see + # <http://www.gnu.org/licenses/>. + +-TM_H += $(srcdir)/config/loongarch/loongarch-driver.h ++TM_H += loongarch-multilib.h $(srcdir)/config/loongarch/loongarch-driver.h + OPTIONS_H_EXTRA += $(srcdir)/config/loongarch/loongarch-def.h \ + $(srcdir)/config/loongarch/loongarch-tune.h + +diff --git a/gcc/config/loongarch/t-multilib b/gcc/config/loongarch/t-multilib +new file mode 100644 +index 000000000..bf6c18298 +--- /dev/null ++++ b/gcc/config/loongarch/t-multilib +@@ -0,0 +1,68 @@ ++# Copyright (C) 2023 Free Software Foundation, Inc. ++# ++# This file is part of GCC. ++# ++# GCC is free software; you can redistribute it and/or modify ++# it under the terms of the GNU General Public License as published by ++# the Free Software Foundation; either version 3, or (at your option) ++# any later version. ++# ++# GCC is distributed in the hope that it will be useful, ++# but WITHOUT ANY WARRANTY; without even the implied warranty of ++# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++# GNU General Public License for more details. ++# ++# You should have received a copy of the GNU General Public License ++# along with GCC; see the file COPYING3. If not see ++# <http://www.gnu.org/licenses/>. ++ ++# Helper definitions ++comma=, ++null := ++space := $(null) # ++exclude_1st = $(wordlist 2,$(words $1),$1) ++ ++# Common definitions ++mlib_all := lp64d lp64f lp64s ++$(foreach i,$(mlib_all),$(eval MULTISUBDIR_$i := base/$i)) ++ ++mlib_default := $(firstword $(subst $(comma), ,$(TM_MULTILIB_CONFIG))) ++mlib_all := $(filter-out $(mlib_default),$(mlib_all)) ++ ++MULTILIB_OPTIONS := $(subst $(space),/,$(foreach i,$(mlib_all),mabi=$(i))) ++MULTILIB_DIRNAMES := $(foreach i,$(mlib_all),$(MULTISUBDIR_$(i))) ++ ++# Customize builds with --with-multilib-list ++MULTILIB_REQUIRED := $(foreach i,$(call exclude_1st,\ ++ $(subst $(comma), ,$(TM_MULTILIB_CONFIG))),\ ++ $(firstword $(subst /, ,$(i)))) ++ ++## spec rules for building libraries, triggered by -fmultiflags ++gen_mlib_spec = $(if $(word 2,$1),\ ++ %{$(firstword $1):$(patsubst %,-%,$(call exclude_1st,$1)})) ++ ++lib_build_spec = $(foreach mlib,\ ++ $(call exclude_1st,$(subst $(comma), ,$(TM_MULTILIB_CONFIG))),\ ++ $(call gen_mlib_spec,$(subst /, ,$(mlib)))) ++ ++default_mlib_spec := %{fmultiflags:%{!mabi=*:-mabi=$(mlib_default)}} ++lib_build_spec := %{fmultiflags:$(lib_build_spec)} ++ ++ifneq ($(TM_MULTILIB_CONFIG),) ++loongarch-multilib.h: ++ @echo "#define MLIB_SELF_SPECS" \ ++ "\"$(default_mlib_spec)\"," \ ++ "\"$(lib_build_spec)\"," > $@ ++else ++loongarch-multilib.h: ; @touch $@ ++endif ++ ++# Multiarch ++ifneq ($(call if_multiarch,yes),yes) ++ # Define LA_DISABLE_MULTIARCH if multiarch is disabled. ++ tm_defines += LA_DISABLE_MULTIARCH ++else ++ # Only define MULTIARCH_DIRNAME when multiarch is enabled, ++ # or it would always introduce ${target} into the search path. ++ MULTIARCH_DIRNAME = $(LA_MULTIARCH_TRIPLET) ++endif +-- +2.43.0 +
View file
_service:tar_scm:0002-LoongArch-Check-whether-binutils-supports-the-relax-.patch
Added
@@ -0,0 +1,192 @@ +From 13c33536900709bf1f33171d5ae2b2af97789601 Mon Sep 17 00:00:00 2001 +From: Lulu Cheng <chenglulu@loongson.cn> +Date: Fri, 15 Sep 2023 10:22:49 +0800 +Subject: PATCH 002/188 LoongArch: Check whether binutils supports the relax + function. If supported, explicit relocs are turned off by default. + +gcc/ChangeLog: + + * config.in: Regenerate. + * config/loongarch/genopts/loongarch.opt.in: Add compilation option + mrelax. And set the initial value of explicit-relocs according to the + detection status. + * config/loongarch/gnu-user.h: When compiling with -mno-relax, pass the + --no-relax option to the linker. + * config/loongarch/loongarch-driver.h (ASM_SPEC): When compiling with + -mno-relax, pass the -mno-relax option to the assembler. + * config/loongarch/loongarch-opts.h (HAVE_AS_MRELAX_OPTION): Define macro. + * config/loongarch/loongarch.opt: Regenerate. + * configure: Regenerate. + * configure.ac: Add detection of support for binutils relax function. +--- + gcc/config.in | 6 ++++ + gcc/config/loongarch/genopts/loongarch.opt.in | 7 ++++- + gcc/config/loongarch/gnu-user.h | 3 +- + gcc/config/loongarch/loongarch-driver.h | 2 +- + gcc/config/loongarch/loongarch-opts.h | 4 +++ + gcc/config/loongarch/loongarch.opt | 7 ++++- + gcc/configure | 31 +++++++++++++++++++ + gcc/configure.ac | 4 +++ + 8 files changed, 60 insertions(+), 4 deletions(-) + +diff --git a/gcc/config.in b/gcc/config.in +index 0dff36199..0c55e67e7 100644 +--- a/gcc/config.in ++++ b/gcc/config.in +@@ -637,6 +637,12 @@ + #endif + + ++/* Define if your assembler supports -mrelax option. */ ++#ifndef USED_FOR_TARGET ++#undef HAVE_AS_MRELAX_OPTION ++#endif ++ ++ + /* Define if your assembler supports .mspabi_attribute. */ + #ifndef USED_FOR_TARGET + #undef HAVE_AS_MSPABI_ATTRIBUTE +diff --git a/gcc/config/loongarch/genopts/loongarch.opt.in b/gcc/config/loongarch/genopts/loongarch.opt.in +index 2ef1b1e3b..f18733c24 100644 +--- a/gcc/config/loongarch/genopts/loongarch.opt.in ++++ b/gcc/config/loongarch/genopts/loongarch.opt.in +@@ -181,7 +181,7 @@ Target Joined RejectNegative UInteger Var(loongarch_max_inline_memcpy_size) Init + -mmax-inline-memcpy-size=SIZE Set the max size of memcpy to inline, default is 1024. + + mexplicit-relocs +-Target Var(TARGET_EXPLICIT_RELOCS) Init(HAVE_AS_EXPLICIT_RELOCS) ++Target Var(TARGET_EXPLICIT_RELOCS) Init(HAVE_AS_EXPLICIT_RELOCS & !HAVE_AS_MRELAX_OPTION) + Use %reloc() assembly operators. + + ; The code model option names for -mcmodel. +@@ -214,3 +214,8 @@ Specify the code model. + mdirect-extern-access + Target Var(TARGET_DIRECT_EXTERN_ACCESS) Init(0) + Avoid using the GOT to access external symbols. ++ ++mrelax ++Target Var(loongarch_mrelax) Init(HAVE_AS_MRELAX_OPTION) ++Take advantage of linker relaxations to reduce the number of instructions ++required to materialize symbol addresses. +diff --git a/gcc/config/loongarch/gnu-user.h b/gcc/config/loongarch/gnu-user.h +index 44e4f2575..60ef75601 100644 +--- a/gcc/config/loongarch/gnu-user.h ++++ b/gcc/config/loongarch/gnu-user.h +@@ -48,7 +48,8 @@ along with GCC; see the file COPYING3. If not see + "%{!shared: %{static} " \ + "%{!static: %{!static-pie: %{rdynamic:-export-dynamic} " \ + "-dynamic-linker " GNU_USER_DYNAMIC_LINKER "}} " \ +- "%{static-pie: -static -pie --no-dynamic-linker -z text}}" ++ "%{static-pie: -static -pie --no-dynamic-linker -z text}}" \ ++ "%{mno-relax: --no-relax}" + + + /* Similar to standard Linux, but adding -ffast-math support. */ +diff --git a/gcc/config/loongarch/loongarch-driver.h b/gcc/config/loongarch/loongarch-driver.h +index e7d083677..59fa3263d 100644 +--- a/gcc/config/loongarch/loongarch-driver.h ++++ b/gcc/config/loongarch/loongarch-driver.h +@@ -53,7 +53,7 @@ along with GCC; see the file COPYING3. If not see + + #undef ASM_SPEC + #define ASM_SPEC \ +- "%{mabi=*} %(subtarget_asm_spec)" ++ "%{mabi=*} %{mno-relax} %(subtarget_asm_spec)" + + + extern const char* +diff --git a/gcc/config/loongarch/loongarch-opts.h b/gcc/config/loongarch/loongarch-opts.h +index 624e246bb..f2b59abe6 100644 +--- a/gcc/config/loongarch/loongarch-opts.h ++++ b/gcc/config/loongarch/loongarch-opts.h +@@ -99,4 +99,8 @@ loongarch_update_gcc_opt_status (struct loongarch_target *target, + #define HAVE_AS_EXPLICIT_RELOCS 0 + #endif + ++#ifndef HAVE_AS_MRELAX_OPTION ++#define HAVE_AS_MRELAX_OPTION 0 ++#endif ++ + #endif /* LOONGARCH_OPTS_H */ +diff --git a/gcc/config/loongarch/loongarch.opt b/gcc/config/loongarch/loongarch.opt +index f2d21c9f3..78f2baf3a 100644 +--- a/gcc/config/loongarch/loongarch.opt ++++ b/gcc/config/loongarch/loongarch.opt +@@ -188,7 +188,7 @@ Target Joined RejectNegative UInteger Var(loongarch_max_inline_memcpy_size) Init + -mmax-inline-memcpy-size=SIZE Set the max size of memcpy to inline, default is 1024. + + mexplicit-relocs +-Target Var(TARGET_EXPLICIT_RELOCS) Init(HAVE_AS_EXPLICIT_RELOCS) ++Target Var(TARGET_EXPLICIT_RELOCS) Init(HAVE_AS_EXPLICIT_RELOCS & !HAVE_AS_MRELAX_OPTION) + Use %reloc() assembly operators. + + ; The code model option names for -mcmodel. +@@ -221,3 +221,8 @@ Specify the code model. + mdirect-extern-access + Target Var(TARGET_DIRECT_EXTERN_ACCESS) Init(0) + Avoid using the GOT to access external symbols. ++ ++mrelax ++Target Var(loongarch_mrelax) Init(HAVE_AS_MRELAX_OPTION) ++Take advantage of linker relaxations to reduce the number of instructions ++required to materialize symbol addresses. +diff --git a/gcc/configure b/gcc/configure +index 2a5d3aaf3..8ae8a924a 100755 +--- a/gcc/configure ++++ b/gcc/configure +@@ -28830,6 +28830,37 @@ if test $gcc_cv_as_loongarch_eh_frame_pcrel_encoding_support = yes; then + + $as_echo "#define HAVE_AS_EH_FRAME_PCREL_ENCODING_SUPPORT 1" >>confdefs.h + ++fi ++ ++ { $as_echo "$as_me:${as_lineno-$LINENO}: checking assembler for -mrelax option" >&5 ++$as_echo_n "checking assembler for -mrelax option... " >&6; } ++if ${gcc_cv_as_loongarch_relax+:} false; then : ++ $as_echo_n "(cached) " >&6 ++else ++ gcc_cv_as_loongarch_relax=no ++ if test x$gcc_cv_as != x; then ++ $as_echo '.text' > conftest.s ++ if { ac_try='$gcc_cv_as $gcc_cv_as_flags -mrelax -o conftest.o conftest.s >&5' ++ { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5 ++ (eval $ac_try) 2>&5 ++ ac_status=$? ++ $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 ++ test $ac_status = 0; }; } ++ then ++ gcc_cv_as_loongarch_relax=yes ++ else ++ echo "configure: failed program was" >&5 ++ cat conftest.s >&5 ++ fi ++ rm -f conftest.o conftest.s ++ fi ++fi ++{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $gcc_cv_as_loongarch_relax" >&5 ++$as_echo "$gcc_cv_as_loongarch_relax" >&6; } ++if test $gcc_cv_as_loongarch_relax = yes; then ++ ++$as_echo "#define HAVE_AS_MRELAX_OPTION 1" >>confdefs.h ++ + fi + + ;; +diff --git a/gcc/configure.ac b/gcc/configure.ac +index ba2bf1ffc..f7161e66e 100644 +--- a/gcc/configure.ac ++++ b/gcc/configure.ac +@@ -5322,6 +5322,10 @@ x: + .cfi_endproc,, + AC_DEFINE(HAVE_AS_EH_FRAME_PCREL_ENCODING_SUPPORT, 1, + Define if your assembler supports eh_frame pcrel encoding.)) ++ gcc_GAS_CHECK_FEATURE(-mrelax option, gcc_cv_as_loongarch_relax, ++ -mrelax, .text,, ++ AC_DEFINE(HAVE_AS_MRELAX_OPTION, 1, ++ Define if your assembler supports -mrelax option.)) + ;; + s390*-*-*) + gcc_GAS_CHECK_FEATURE(.gnu_attribute support, +-- +2.43.0 +
View file
_service:tar_scm:0003-Modify-gas-uleb128-support-test.patch
Added
@@ -0,0 +1,115 @@ +From 38c338555e64da83fd35c608a1a89d738e1ca356 Mon Sep 17 00:00:00 2001 +From: mengqinggang <mengqinggang@loongson.cn> +Date: Fri, 15 Sep 2023 12:04:04 +0800 +Subject: PATCH 003/188 Modify gas uleb128 support test + +Some assemblers (GNU as for LoongArch) generates relocations for leb128 +symbol arithmetic for relaxation, we need to disable relaxation probing +leb128 support then. + +gcc/ChangeLog: + + * configure: Regenerate. + * configure.ac: Checking assembler for -mno-relax support. + Disable relaxation when probing leb128 support. + +co-authored-by: Xi Ruoyao <xry111@xry111.site> +--- + gcc/configure | 42 +++++++++++++++++++++++++++++++++++++++++- + gcc/configure.ac | 17 ++++++++++++++++- + 2 files changed, 57 insertions(+), 2 deletions(-) + +diff --git a/gcc/configure b/gcc/configure +index 8ae8a924a..430d44dc3 100755 +--- a/gcc/configure ++++ b/gcc/configure +@@ -24441,6 +24441,46 @@ _ACEOF + + + ++# Some assemblers (GNU as for LoongArch) generates relocations for ++# leb128 symbol arithmetic for relaxation, we need to disable relaxation ++# probing leb128 support then. ++case $target in ++ loongarch*-*-*) ++ { $as_echo "$as_me:${as_lineno-$LINENO}: checking assembler for -mno-relax support" >&5 ++$as_echo_n "checking assembler for -mno-relax support... " >&6; } ++if ${gcc_cv_as_mno_relax+:} false; then : ++ $as_echo_n "(cached) " >&6 ++else ++ gcc_cv_as_mno_relax=no ++ if test x$gcc_cv_as != x; then ++ $as_echo '.text' > conftest.s ++ if { ac_try='$gcc_cv_as $gcc_cv_as_flags -mno-relax -o conftest.o conftest.s >&5' ++ { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5 ++ (eval $ac_try) 2>&5 ++ ac_status=$? ++ $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 ++ test $ac_status = 0; }; } ++ then ++ gcc_cv_as_mno_relax=yes ++ else ++ echo "configure: failed program was" >&5 ++ cat conftest.s >&5 ++ fi ++ rm -f conftest.o conftest.s ++ fi ++fi ++{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $gcc_cv_as_mno_relax" >&5 ++$as_echo "$gcc_cv_as_mno_relax" >&6; } ++if test $gcc_cv_as_mno_relax = yes; then ++ check_leb128_asflags=-mno-relax ++fi ++ ++ ;; ++ *) ++ check_leb128_asflags= ++ ;; ++esac ++ + # Check if we have .usleb128, and support symbol arithmetic with it. + # Older versions of GAS and some non-GNU assemblers, have a bugs handling + # these directives, even when they appear to accept them. +@@ -24459,7 +24499,7 @@ L1: + L2: + .uleb128 0x8000000000000000 + ' > conftest.s +- if { ac_try='$gcc_cv_as $gcc_cv_as_flags -o conftest.o conftest.s >&5' ++ if { ac_try='$gcc_cv_as $gcc_cv_as_flags $check_leb128_asflags -o conftest.o conftest.s >&5' + { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5 + (eval $ac_try) 2>&5 + ac_status=$? +diff --git a/gcc/configure.ac b/gcc/configure.ac +index f7161e66e..4b24db190 100644 +--- a/gcc/configure.ac ++++ b/gcc/configure.ac +@@ -3185,10 +3185,25 @@ AC_MSG_RESULT($gcc_cv_ld_ro_rw_mix) + + gcc_AC_INITFINI_ARRAY + ++# Some assemblers (GNU as for LoongArch) generates relocations for ++# leb128 symbol arithmetic for relaxation, we need to disable relaxation ++# probing leb128 support then. ++case $target in ++ loongarch*-*-*) ++ gcc_GAS_CHECK_FEATURE(-mno-relax support, ++ gcc_cv_as_mno_relax,-mno-relax,.text,, ++ check_leb128_asflags=-mno-relax) ++ ;; ++ *) ++ check_leb128_asflags= ++ ;; ++esac ++ + # Check if we have .usleb128, and support symbol arithmetic with it. + # Older versions of GAS and some non-GNU assemblers, have a bugs handling + # these directives, even when they appear to accept them. +-gcc_GAS_CHECK_FEATURE(.sleb128 and .uleb128, gcc_cv_as_leb128,, ++gcc_GAS_CHECK_FEATURE(.sleb128 and .uleb128, gcc_cv_as_leb128, ++$check_leb128_asflags, + .data + .uleb128 L2 - L1 + L1: +-- +2.43.0 +
View file
_service:tar_scm:0004-LoongArch-Optimizations-of-vector-construction.patch
Added
@@ -0,0 +1,1310 @@ +From b74895b8b723a64bc136c4c560661abed81e013a Mon Sep 17 00:00:00 2001 +From: Guo Jie <guojie@loongson.cn> +Date: Thu, 21 Sep 2023 09:19:18 +0800 +Subject: PATCH 004/188 LoongArch: Optimizations of vector construction. + +gcc/ChangeLog: + + * config/loongarch/lasx.md (lasx_vecinit_merge_<LASX:mode>): New + pattern for vector construction. + (vec_set<mode>_internal): Ditto. + (lasx_xvinsgr2vr_<mode256_i_half>_internal): Ditto. + (lasx_xvilvl_<lasxfmt_f>_internal): Ditto. + * config/loongarch/loongarch.cc (loongarch_expand_vector_init): + Optimized the implementation of vector construction. + (loongarch_expand_vector_init_same): New function. + * config/loongarch/lsx.md (lsx_vilvl_<lsxfmt_f>_internal): New + pattern for vector construction. + (lsx_vreplvei_mirror_<lsxfmt_f>): New pattern for vector + construction. + (vec_concatv2df): Ditto. + (vec_concatv4sf): Ditto. + +gcc/testsuite/ChangeLog: + + * gcc.target/loongarch/vector/lasx/lasx-vec-construct-opt.c: New test. + * gcc.target/loongarch/vector/lsx/lsx-vec-construct-opt.c: New test. +--- + gcc/config/loongarch/lasx.md | 69 ++ + gcc/config/loongarch/loongarch.cc | 716 +++++++++--------- + gcc/config/loongarch/lsx.md | 134 ++++ + .../vector/lasx/lasx-vec-construct-opt.c | 102 +++ + .../vector/lsx/lsx-vec-construct-opt.c | 85 +++ + 5 files changed, 732 insertions(+), 374 deletions(-) + create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-vec-construct-opt.c + create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vec-construct-opt.c + +diff --git a/gcc/config/loongarch/lasx.md b/gcc/config/loongarch/lasx.md +index 8111c8bb7..2bc5d47ed 100644 +--- a/gcc/config/loongarch/lasx.md ++++ b/gcc/config/loongarch/lasx.md +@@ -186,6 +186,9 @@ + UNSPEC_LASX_XVLDI + UNSPEC_LASX_XVLDX + UNSPEC_LASX_XVSTX ++ UNSPEC_LASX_VECINIT_MERGE ++ UNSPEC_LASX_VEC_SET_INTERNAL ++ UNSPEC_LASX_XVILVL_INTERNAL + ) + + ;; All vector modes with 256 bits. +@@ -255,6 +258,15 @@ + (V8SF "V4SF") + (V4DF "V2DF")) + ++;; The attribute gives half int/float modes for vector modes. ++(define_mode_attr VHMODE256_ALL ++ (V32QI "V16QI") ++ (V16HI "V8HI") ++ (V8SI "V4SI") ++ (V4DI "V2DI") ++ (V8SF "V4SF") ++ (V4DF "V2DF")) ++ + ;; The attribute gives double modes for vector modes in LASX. + (define_mode_attr VDMODE256 + (V8SI "V4DI") +@@ -312,6 +324,11 @@ + (V4DI "v4df") + (V8SI "v8sf")) + ++;; This attribute gives V32QI mode and V16HI mode with half size. ++(define_mode_attr mode256_i_half ++ (V32QI "v16qi") ++ (V16HI "v8hi")) ++ + ;; This attribute gives suffix for LASX instructions. HOW? + (define_mode_attr lasxfmt + (V4DF "d") +@@ -756,6 +773,20 @@ + (set_attr "type" "simd_splat") + (set_attr "mode" "<MODE>")) + ++;; Only for loongarch_expand_vector_init in loongarch.cc. ++;; Support a LSX-mode input op2. ++(define_insn "lasx_vecinit_merge_<LASX:mode>" ++ (set (match_operand:LASX 0 "register_operand" "=f") ++ (unspec:LASX ++ (match_operand:LASX 1 "register_operand" "0") ++ (match_operand:<VHMODE256_ALL> 2 "register_operand" "f") ++ (match_operand 3 "const_uimm8_operand") ++ UNSPEC_LASX_VECINIT_MERGE)) ++ "ISA_HAS_LASX" ++ "xvpermi.q\t%u0,%u2,%3" ++ (set_attr "type" "simd_splat") ++ (set_attr "mode" "<MODE>")) ++ + (define_insn "lasx_xvpickve2gr_d<u>" + (set (match_operand:DI 0 "register_operand" "=r") + (any_extend:DI +@@ -779,6 +810,33 @@ + DONE; + }) + ++;; Only for loongarch_expand_vector_init in loongarch.cc. ++;; Simulate missing instructions xvinsgr2vr.b and xvinsgr2vr.h. ++(define_expand "vec_set<mode>_internal" ++ (match_operand:ILASX_HB 0 "register_operand") ++ (match_operand:<UNITMODE> 1 "reg_or_0_operand") ++ (match_operand 2 "const_<indeximm256>_operand") ++ "ISA_HAS_LASX" ++{ ++ rtx index = GEN_INT (1 << INTVAL (operands2)); ++ emit_insn (gen_lasx_xvinsgr2vr_<mode256_i_half>_internal ++ (operands0, operands1, operands0, index)); ++ DONE; ++}) ++ ++(define_insn "lasx_xvinsgr2vr_<mode256_i_half>_internal" ++ (set (match_operand:ILASX_HB 0 "register_operand" "=f") ++ (unspec:ILASX_HB (match_operand:<UNITMODE> 1 "reg_or_0_operand" "rJ") ++ (match_operand:ILASX_HB 2 "register_operand" "0") ++ (match_operand 3 "const_<bitmask256>_operand" "") ++ UNSPEC_LASX_VEC_SET_INTERNAL)) ++ "ISA_HAS_LASX" ++{ ++ return "vinsgr2vr.<lasxfmt>\t%w0,%z1,%y3"; ++} ++ (set_attr "type" "simd_insert") ++ (set_attr "mode" "<MODE>")) ++ + (define_expand "vec_set<mode>" + (match_operand:FLASX 0 "register_operand") + (match_operand:<UNITMODE> 1 "reg_or_0_operand") +@@ -1567,6 +1625,17 @@ + (set_attr "type" "simd_flog2") + (set_attr "mode" "<MODE>")) + ++;; Only for loongarch_expand_vector_init in loongarch.cc. ++;; Merge two scalar floating-point op1 and op2 into a LASX op0. ++(define_insn "lasx_xvilvl_<lasxfmt_f>_internal" ++ (set (match_operand:FLASX 0 "register_operand" "=f") ++ (unspec:FLASX (match_operand:<UNITMODE> 1 "register_operand" "f") ++ (match_operand:<UNITMODE> 2 "register_operand" "f") ++ UNSPEC_LASX_XVILVL_INTERNAL)) ++ "ISA_HAS_LASX" ++ "xvilvl.<lasxfmt>\t%u0,%u2,%u1" ++ (set_attr "type" "simd_permute") ++ (set_attr "mode" "<MODE>")) + + (define_insn "smax<mode>3" + (set (match_operand:FLASX 0 "register_operand" "=f") +diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc +index f2e796a6b..760b12268 100644 +--- a/gcc/config/loongarch/loongarch.cc ++++ b/gcc/config/loongarch/loongarch.cc +@@ -10193,300 +10193,344 @@ loongarch_expand_vector_group_init (rtx target, rtx vals) + ops1))); + } + ++/* Expand initialization of a vector which has all same elements. */ ++ + void +-loongarch_expand_vector_init (rtx target, rtx vals) ++loongarch_expand_vector_init_same (rtx target, rtx vals, unsigned nvar) + { + machine_mode vmode = GET_MODE (target); + machine_mode imode = GET_MODE_INNER (vmode); +- unsigned i, nelt = GET_MODE_NUNITS (vmode); +- unsigned nvar = 0; +- bool all_same = true; +- rtx x; ++ rtx same = XVECEXP (vals, 0, 0); ++ rtx temp, temp2; + +- for (i = 0; i < nelt; ++i) ++ if (CONST_INT_P (same) && nvar == 0 ++ && loongarch_signed_immediate_p (INTVAL (same), 10, 0)) ++ { ++ switch (vmode) ++ { ++ case E_V32QImode: ++ case E_V16HImode: ++ case E_V8SImode: ++ case E_V4DImode: ++ case E_V16QImode: ++ case E_V8HImode: ++ case E_V4SImode: ++ case E_V2DImode: ++ temp = gen_rtx_CONST_VECTOR (vmode, XVEC (vals, 0)); ++ emit_move_insn (target, temp); ++ return; ++ default: ++ gcc_unreachable (); ++ } ++ } ++ temp = gen_reg_rtx (imode); ++ if (imode == GET_MODE (same)) ++ temp2 = same; ++ else if (GET_MODE_SIZE (imode) >= UNITS_PER_WORD) + { +- x = XVECEXP (vals, 0, i); +- if (!loongarch_constant_elt_p (x)) +- nvar++; +- if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0))) +- all_same = false; ++ if (GET_CODE (same) == MEM) ++ { ++ rtx reg_tmp = gen_reg_rtx (GET_MODE (same)); ++ loongarch_emit_move (reg_tmp, same); ++ temp2 = simplify_gen_subreg (imode, reg_tmp, GET_MODE (reg_tmp), 0); ++ } ++ else ++ temp2 = simplify_gen_subreg (imode, same, GET_MODE (same), 0); + } +- +- if (ISA_HAS_LASX && GET_MODE_SIZE (vmode) == 32) ++ else + { +- if (all_same) ++ if (GET_CODE (same) == MEM) + { +- rtx same = XVECEXP (vals, 0, 0); +- rtx temp, temp2; ++ rtx reg_tmp = gen_reg_rtx (GET_MODE (same)); ++ loongarch_emit_move (reg_tmp, same); ++ temp2 = lowpart_subreg (imode, reg_tmp, GET_MODE (reg_tmp)); ++ } ++ else ++ temp2 = lowpart_subreg (imode, same, GET_MODE (same)); ++ } ++ emit_move_insn (temp, temp2); + +- if (CONST_INT_P (same) && nvar == 0 +- && loongarch_signed_immediate_p (INTVAL (same), 10, 0)) +- { +- switch (vmode) +- { +- case E_V32QImode: +- case E_V16HImode: +- case E_V8SImode: +- case E_V4DImode: +- temp = gen_rtx_CONST_VECTOR (vmode, XVEC (vals, 0)); +- emit_move_insn (target, temp); +- return; ++ switch (vmode) ++ { ++ case E_V32QImode: ++ case E_V16HImode: ++ case E_V8SImode: ++ case E_V4DImode: ++ case E_V16QImode: ++ case E_V8HImode: ++ case E_V4SImode: ++ case E_V2DImode: ++ loongarch_emit_move (target, gen_rtx_VEC_DUPLICATE (vmode, temp)); ++ break; + +- default: +- gcc_unreachable (); +- } +- } ++ case E_V8SFmode: ++ emit_insn (gen_lasx_xvreplve0_w_f_scalar (target, temp)); ++ break; + +- temp = gen_reg_rtx (imode); +- if (imode == GET_MODE (same)) +- temp2 = same; +- else if (GET_MODE_SIZE (imode) >= UNITS_PER_WORD) +- { +- if (GET_CODE (same) == MEM) +- { +- rtx reg_tmp = gen_reg_rtx (GET_MODE (same)); +- loongarch_emit_move (reg_tmp, same); +- temp2 = simplify_gen_subreg (imode, reg_tmp, +- GET_MODE (reg_tmp), 0); +- } +- else +- temp2 = simplify_gen_subreg (imode, same, +- GET_MODE (same), 0); +- } +- else +- { +- if (GET_CODE (same) == MEM) +- { +- rtx reg_tmp = gen_reg_rtx (GET_MODE (same)); +- loongarch_emit_move (reg_tmp, same); +- temp2 = lowpart_subreg (imode, reg_tmp, +- GET_MODE (reg_tmp)); +- } +- else +- temp2 = lowpart_subreg (imode, same, GET_MODE (same)); +- } +- emit_move_insn (temp, temp2); ++ case E_V4DFmode: ++ emit_insn (gen_lasx_xvreplve0_d_f_scalar (target, temp)); ++ break; + +- switch (vmode) +- { +- case E_V32QImode: +- case E_V16HImode: +- case E_V8SImode: +- case E_V4DImode: +- loongarch_emit_move (target, +- gen_rtx_VEC_DUPLICATE (vmode, temp)); +- break; ++ case E_V4SFmode: ++ emit_insn (gen_lsx_vreplvei_w_f_scalar (target, temp)); ++ break; + +- case E_V8SFmode: +- emit_insn (gen_lasx_xvreplve0_w_f_scalar (target, temp)); +- break; ++ case E_V2DFmode: ++ emit_insn (gen_lsx_vreplvei_d_f_scalar (target, temp)); ++ break; + +- case E_V4DFmode: +- emit_insn (gen_lasx_xvreplve0_d_f_scalar (target, temp)); +- break; ++ default: ++ gcc_unreachable (); ++ } ++} + +- default: +- gcc_unreachable (); +- } +- } +- else +- { +- rtvec vec = shallow_copy_rtvec (XVEC (vals, 0)); ++/* Expand a vector initialization. */ + +- for (i = 0; i < nelt; ++i) +- RTVEC_ELT (vec, i) = CONST0_RTX (imode); ++void ++loongarch_expand_vector_init (rtx target, rtx vals) ++{ ++ machine_mode vmode = GET_MODE (target); ++ machine_mode imode = GET_MODE_INNER (vmode); ++ unsigned i, nelt = GET_MODE_NUNITS (vmode); ++ /* VALS is divided into high and low half-part. */ ++ /* Number of non constant elements in corresponding parts of VALS. */ ++ unsigned nvar = 0, hi_nvar = 0, lo_nvar = 0; ++ /* all_same : true if all elements of VALS are the same. ++ hi_same : true if all elements of the high half-part are the same. ++ lo_same : true if all elements of the low half-part are the same. ++ half_same : true if the high half-part is the same as the low one. */ ++ bool all_same = false, hi_same = true, lo_same = true, half_same = true; ++ rtx val32, val_hi32, val_lo16; ++ rtx x, op0, op1; ++ /* Copy one element of vals to per element of target vector. */ ++ typedef rtx (*loongarch_vec_repl1_fn) (rtx, rtx); ++ /* Copy two elements of vals to target vector. */ ++ typedef rtx (*loongarch_vec_repl2_fn) (rtx, rtx, rtx); ++ /* Insert scalar operands into the specified position of the vector. */ ++ typedef rtx (*loongarch_vec_set_fn) (rtx, rtx, rtx); ++ /* Copy 64bit lowpart to highpart. */ ++ typedef rtx (*loongarch_vec_mirror_fn) (rtx, rtx, rtx); ++ /* Merge lowpart and highpart into target. */ ++ typedef rtx (*loongarch_vec_merge_fn) (rtx, rtx, rtx, rtx); ++ ++ loongarch_vec_repl1_fn loongarch_vec_repl1_128 = NULL, ++ loongarch_vec_repl1_256 = NULL; ++ loongarch_vec_repl2_fn loongarch_vec_repl2_128 = NULL, ++ loongarch_vec_repl2_256 = NULL; ++ loongarch_vec_set_fn loongarch_vec_set128 = NULL, loongarch_vec_set256 = NULL; ++ loongarch_vec_mirror_fn loongarch_vec_mirror = NULL; ++ loongarch_vec_merge_fn loongarch_lasx_vecinit_merge = NULL; ++ machine_mode half_mode = VOIDmode; ++ ++ /* Check whether elements of each part are the same. */ ++ for (i = 0; i < nelt / 2; ++i) ++ { ++ val_hii = val_hii + nelt / 2 = vali + nelt / 2 ++ = XVECEXP (vals, 0, i + nelt / 2); ++ val_loi = vali = XVECEXP (vals, 0, i); ++ if (!loongarch_constant_elt_p (val_hii)) ++ hi_nvar++; ++ if (!loongarch_constant_elt_p (val_loi)) ++ lo_nvar++; ++ if (i > 0 && !rtx_equal_p (val_hii, val_hi0)) ++ hi_same = false; ++ if (i > 0 && !rtx_equal_p (val_loi, val_lo0)) ++ lo_same = false; ++ if (!rtx_equal_p (val_hii, val_loi)) ++ half_same = false; ++ } ++ ++ /* If all elements are the same, set all_same true. */ ++ if (hi_same && lo_same && half_same) ++ all_same = true; ++ ++ nvar = hi_nvar + lo_nvar; + +- emit_move_insn (target, gen_rtx_CONST_VECTOR (vmode, vec)); ++ switch (vmode) ++ { ++ case E_V32QImode: ++ half_mode = E_V16QImode; ++ loongarch_vec_set256 = gen_vec_setv32qi_internal; ++ loongarch_vec_repl1_256 = gen_lasx_xvreplgr2vr_b; ++ loongarch_lasx_vecinit_merge ++ = half_same ? gen_lasx_xvpermi_q_v32qi : gen_lasx_vecinit_merge_v32qi; ++ /* FALLTHRU. */ ++ case E_V16QImode: ++ loongarch_vec_set128 = gen_vec_setv16qi; ++ loongarch_vec_repl1_128 = gen_lsx_vreplgr2vr_b; ++ loongarch_vec_mirror = gen_lsx_vreplvei_mirror_b; ++ break; + +- machine_mode half_mode = VOIDmode; +- rtx target_hi, target_lo; ++ case E_V16HImode: ++ half_mode = E_V8HImode; ++ loongarch_vec_set256 = gen_vec_setv16hi_internal; ++ loongarch_vec_repl1_256 = gen_lasx_xvreplgr2vr_h; ++ loongarch_lasx_vecinit_merge ++ = half_same ? gen_lasx_xvpermi_q_v16hi : gen_lasx_vecinit_merge_v16hi; ++ /* FALLTHRU. */ ++ case E_V8HImode: ++ loongarch_vec_set128 = gen_vec_setv8hi; ++ loongarch_vec_repl1_128 = gen_lsx_vreplgr2vr_h; ++ loongarch_vec_mirror = gen_lsx_vreplvei_mirror_h; ++ break; + +- switch (vmode) +- { +- case E_V32QImode: +- half_mode=E_V16QImode; +- target_hi = gen_reg_rtx (half_mode); +- target_lo = gen_reg_rtx (half_mode); +- for (i = 0; i < nelt/2; ++i) +- { +- rtx temp_hi = gen_reg_rtx (imode); +- rtx temp_lo = gen_reg_rtx (imode); +- emit_move_insn (temp_hi, XVECEXP (vals, 0, i+nelt/2)); +- emit_move_insn (temp_lo, XVECEXP (vals, 0, i)); +- if (i == 0) +- { +- emit_insn (gen_lsx_vreplvei_b_scalar (target_hi, +- temp_hi)); +- emit_insn (gen_lsx_vreplvei_b_scalar (target_lo, +- temp_lo)); +- } +- else +- { +- emit_insn (gen_vec_setv16qi (target_hi, temp_hi, +- GEN_INT (i))); +- emit_insn (gen_vec_setv16qi (target_lo, temp_lo, +- GEN_INT (i))); +- } +- } +- emit_insn (gen_rtx_SET (target, +- gen_rtx_VEC_CONCAT (vmode, target_hi, +- target_lo))); +- break; ++ case E_V8SImode: ++ half_mode = V4SImode; ++ loongarch_vec_set256 = gen_vec_setv8si; ++ loongarch_vec_repl1_256 = gen_lasx_xvreplgr2vr_w; ++ loongarch_lasx_vecinit_merge ++ = half_same ? gen_lasx_xvpermi_q_v8si : gen_lasx_vecinit_merge_v8si; ++ /* FALLTHRU. */ ++ case E_V4SImode: ++ loongarch_vec_set128 = gen_vec_setv4si; ++ loongarch_vec_repl1_128 = gen_lsx_vreplgr2vr_w; ++ loongarch_vec_mirror = gen_lsx_vreplvei_mirror_w; ++ break; + +- case E_V16HImode: +- half_mode=E_V8HImode; +- target_hi = gen_reg_rtx (half_mode); +- target_lo = gen_reg_rtx (half_mode); +- for (i = 0; i < nelt/2; ++i) +- { +- rtx temp_hi = gen_reg_rtx (imode); +- rtx temp_lo = gen_reg_rtx (imode); +- emit_move_insn (temp_hi, XVECEXP (vals, 0, i+nelt/2)); +- emit_move_insn (temp_lo, XVECEXP (vals, 0, i)); +- if (i == 0) +- { +- emit_insn (gen_lsx_vreplvei_h_scalar (target_hi, +- temp_hi)); +- emit_insn (gen_lsx_vreplvei_h_scalar (target_lo, +- temp_lo)); +- } +- else +- { +- emit_insn (gen_vec_setv8hi (target_hi, temp_hi, +- GEN_INT (i))); +- emit_insn (gen_vec_setv8hi (target_lo, temp_lo, +- GEN_INT (i))); +- } +- } +- emit_insn (gen_rtx_SET (target, +- gen_rtx_VEC_CONCAT (vmode, target_hi, +- target_lo))); +- break; ++ case E_V4DImode: ++ half_mode = E_V2DImode; ++ loongarch_vec_set256 = gen_vec_setv4di; ++ loongarch_vec_repl1_256 = gen_lasx_xvreplgr2vr_d; ++ loongarch_lasx_vecinit_merge ++ = half_same ? gen_lasx_xvpermi_q_v4di : gen_lasx_vecinit_merge_v4di; ++ /* FALLTHRU. */ ++ case E_V2DImode: ++ loongarch_vec_set128 = gen_vec_setv2di; ++ loongarch_vec_repl1_128 = gen_lsx_vreplgr2vr_d; ++ loongarch_vec_mirror = gen_lsx_vreplvei_mirror_d; ++ break; + +- case E_V8SImode: +- half_mode=V4SImode; +- target_hi = gen_reg_rtx (half_mode); +- target_lo = gen_reg_rtx (half_mode); +- for (i = 0; i < nelt/2; ++i) +- { +- rtx temp_hi = gen_reg_rtx (imode); +- rtx temp_lo = gen_reg_rtx (imode); +- emit_move_insn (temp_hi, XVECEXP (vals, 0, i+nelt/2)); +- emit_move_insn (temp_lo, XVECEXP (vals, 0, i)); +- if (i == 0) +- { +- emit_insn (gen_lsx_vreplvei_w_scalar (target_hi, +- temp_hi)); +- emit_insn (gen_lsx_vreplvei_w_scalar (target_lo, +- temp_lo)); +- } +- else +- { +- emit_insn (gen_vec_setv4si (target_hi, temp_hi, +- GEN_INT (i))); +- emit_insn (gen_vec_setv4si (target_lo, temp_lo, +- GEN_INT (i))); +- } +- } +- emit_insn (gen_rtx_SET (target, +- gen_rtx_VEC_CONCAT (vmode, target_hi, +- target_lo))); +- break; ++ case E_V8SFmode: ++ half_mode = E_V4SFmode; ++ loongarch_vec_set256 = gen_vec_setv8sf; ++ loongarch_vec_repl1_128 = gen_lsx_vreplvei_w_f_scalar; ++ loongarch_vec_repl2_256 = gen_lasx_xvilvl_w_f_internal; ++ loongarch_lasx_vecinit_merge ++ = half_same ? gen_lasx_xvpermi_q_v8sf : gen_lasx_vecinit_merge_v8sf; ++ /* FALLTHRU. */ ++ case E_V4SFmode: ++ loongarch_vec_set128 = gen_vec_setv4sf; ++ loongarch_vec_repl2_128 = gen_lsx_vilvl_w_f_internal; ++ loongarch_vec_mirror = gen_lsx_vreplvei_mirror_w_f; ++ break; + +- case E_V4DImode: +- half_mode=E_V2DImode; +- target_hi = gen_reg_rtx (half_mode); +- target_lo = gen_reg_rtx (half_mode); +- for (i = 0; i < nelt/2; ++i) +- { +- rtx temp_hi = gen_reg_rtx (imode); +- rtx temp_lo = gen_reg_rtx (imode); +- emit_move_insn (temp_hi, XVECEXP (vals, 0, i+nelt/2)); +- emit_move_insn (temp_lo, XVECEXP (vals, 0, i)); +- if (i == 0) +- { +- emit_insn (gen_lsx_vreplvei_d_scalar (target_hi, +- temp_hi)); +- emit_insn (gen_lsx_vreplvei_d_scalar (target_lo, +- temp_lo)); +- } +- else +- { +- emit_insn (gen_vec_setv2di (target_hi, temp_hi, +- GEN_INT (i))); +- emit_insn (gen_vec_setv2di (target_lo, temp_lo, +- GEN_INT (i))); +- } +- } +- emit_insn (gen_rtx_SET (target, +- gen_rtx_VEC_CONCAT (vmode, target_hi, +- target_lo))); +- break; ++ case E_V4DFmode: ++ half_mode = E_V2DFmode; ++ loongarch_vec_set256 = gen_vec_setv4df; ++ loongarch_vec_repl1_128 = gen_lsx_vreplvei_d_f_scalar; ++ loongarch_vec_repl2_256 = gen_lasx_xvilvl_d_f_internal; ++ loongarch_lasx_vecinit_merge ++ = half_same ? gen_lasx_xvpermi_q_v4df : gen_lasx_vecinit_merge_v4df; ++ /* FALLTHRU. */ ++ case E_V2DFmode: ++ loongarch_vec_set128 = gen_vec_setv2df; ++ loongarch_vec_repl2_128 = gen_lsx_vilvl_d_f_internal; ++ loongarch_vec_mirror = gen_lsx_vreplvei_mirror_d_f; ++ break; + +- case E_V8SFmode: +- half_mode=E_V4SFmode; +- target_hi = gen_reg_rtx (half_mode); +- target_lo = gen_reg_rtx (half_mode); +- for (i = 0; i < nelt/2; ++i) ++ default: ++ gcc_unreachable (); ++ } ++ ++ if (ISA_HAS_LASX && GET_MODE_SIZE (vmode) == 32) ++ { ++ /* If all elements are the same, just do a broadcost. */ ++ if (all_same) ++ loongarch_expand_vector_init_same (target, vals, nvar); ++ else ++ { ++ gcc_assert (nelt >= 4); ++ ++ rtx target_hi, target_lo; ++ /* Write elements of high half-part in target directly. */ ++ target_hi = target; ++ target_lo = gen_reg_rtx (half_mode); ++ ++ /* If all elements of high half-part are the same, ++ just do a broadcost. Also applicable to low half-part. */ ++ if (hi_same) ++ { ++ rtx vtmp = gen_rtx_PARALLEL (vmode, gen_rtvec_v (nelt, val_hi)); ++ loongarch_expand_vector_init_same (target_hi, vtmp, hi_nvar); ++ } ++ if (lo_same) ++ { ++ rtx vtmp ++ = gen_rtx_PARALLEL (half_mode, gen_rtvec_v (nelt / 2, val_lo)); ++ loongarch_expand_vector_init_same (target_lo, vtmp, lo_nvar); ++ } ++ ++ for (i = 0; i < nelt / 2; ++i) ++ { ++ if (!hi_same) + { +- rtx temp_hi = gen_reg_rtx (imode); +- rtx temp_lo = gen_reg_rtx (imode); +- emit_move_insn (temp_hi, XVECEXP (vals, 0, i+nelt/2)); +- emit_move_insn (temp_lo, XVECEXP (vals, 0, i)); +- if (i == 0) ++ if (vmode == E_V8SFmode || vmode == E_V4DFmode) + { +- emit_insn (gen_lsx_vreplvei_w_f_scalar (target_hi, +- temp_hi)); +- emit_insn (gen_lsx_vreplvei_w_f_scalar (target_lo, +- temp_lo)); ++ /* Using xvilvl to load lowest 2 elements simultaneously ++ to reduce the number of instructions. */ ++ if (i == 1) ++ { ++ op0 = gen_reg_rtx (imode); ++ emit_move_insn (op0, val_hi0); ++ op1 = gen_reg_rtx (imode); ++ emit_move_insn (op1, val_hi1); ++ emit_insn ( ++ loongarch_vec_repl2_256 (target_hi, op0, op1)); ++ } ++ else if (i > 1) ++ { ++ op0 = gen_reg_rtx (imode); ++ emit_move_insn (op0, val_hii); ++ emit_insn ( ++ loongarch_vec_set256 (target_hi, op0, GEN_INT (i))); ++ } + } + else + { +- emit_insn (gen_vec_setv4sf (target_hi, temp_hi, +- GEN_INT (i))); +- emit_insn (gen_vec_setv4sf (target_lo, temp_lo, +- GEN_INT (i))); ++ /* Assign the lowest element of val_hi to all elements ++ of target_hi. */ ++ if (i == 0) ++ { ++ op0 = gen_reg_rtx (imode); ++ emit_move_insn (op0, val_hi0); ++ emit_insn (loongarch_vec_repl1_256 (target_hi, op0)); ++ } ++ else if (!rtx_equal_p (val_hii, val_hi0)) ++ { ++ op0 = gen_reg_rtx (imode); ++ emit_move_insn (op0, val_hii); ++ emit_insn ( ++ loongarch_vec_set256 (target_hi, op0, GEN_INT (i))); ++ } + } + } +- emit_insn (gen_rtx_SET (target, +- gen_rtx_VEC_CONCAT (vmode, target_hi, +- target_lo))); +- break; +- +- case E_V4DFmode: +- half_mode=E_V2DFmode; +- target_hi = gen_reg_rtx (half_mode); +- target_lo = gen_reg_rtx (half_mode); +- for (i = 0; i < nelt/2; ++i) ++ if (!lo_same && !half_same) + { +- rtx temp_hi = gen_reg_rtx (imode); +- rtx temp_lo = gen_reg_rtx (imode); +- emit_move_insn (temp_hi, XVECEXP (vals, 0, i+nelt/2)); +- emit_move_insn (temp_lo, XVECEXP (vals, 0, i)); ++ /* Assign the lowest element of val_lo to all elements ++ of target_lo. */ + if (i == 0) + { +- emit_insn (gen_lsx_vreplvei_d_f_scalar (target_hi, +- temp_hi)); +- emit_insn (gen_lsx_vreplvei_d_f_scalar (target_lo, +- temp_lo)); ++ op0 = gen_reg_rtx (imode); ++ emit_move_insn (op0, val_lo0); ++ emit_insn (loongarch_vec_repl1_128 (target_lo, op0)); + } +- else ++ else if (!rtx_equal_p (val_loi, val_lo0)) + { +- emit_insn (gen_vec_setv2df (target_hi, temp_hi, +- GEN_INT (i))); +- emit_insn (gen_vec_setv2df (target_lo, temp_lo, +- GEN_INT (i))); ++ op0 = gen_reg_rtx (imode); ++ emit_move_insn (op0, val_loi); ++ emit_insn ( ++ loongarch_vec_set128 (target_lo, op0, GEN_INT (i))); + } + } +- emit_insn (gen_rtx_SET (target, +- gen_rtx_VEC_CONCAT (vmode, target_hi, +- target_lo))); +- break; +- +- default: +- gcc_unreachable (); + } +- ++ if (half_same) ++ { ++ emit_insn (loongarch_lasx_vecinit_merge (target, target_hi, ++ target_hi, const0_rtx)); ++ return; ++ } ++ emit_insn (loongarch_lasx_vecinit_merge (target, target_hi, target_lo, ++ GEN_INT (0x20))); + } + return; + } +@@ -10494,130 +10538,54 @@ loongarch_expand_vector_init (rtx target, rtx vals) + if (ISA_HAS_LSX) + { + if (all_same) ++ loongarch_expand_vector_init_same (target, vals, nvar); ++ else + { +- rtx same = XVECEXP (vals, 0, 0); +- rtx temp, temp2; +- +- if (CONST_INT_P (same) && nvar == 0 +- && loongarch_signed_immediate_p (INTVAL (same), 10, 0)) +- { +- switch (vmode) +- { +- case E_V16QImode: +- case E_V8HImode: +- case E_V4SImode: +- case E_V2DImode: +- temp = gen_rtx_CONST_VECTOR (vmode, XVEC (vals, 0)); +- emit_move_insn (target, temp); +- return; +- +- default: +- gcc_unreachable (); +- } +- } +- temp = gen_reg_rtx (imode); +- if (imode == GET_MODE (same)) +- temp2 = same; +- else if (GET_MODE_SIZE (imode) >= UNITS_PER_WORD) +- { +- if (GET_CODE (same) == MEM) +- { +- rtx reg_tmp = gen_reg_rtx (GET_MODE (same)); +- loongarch_emit_move (reg_tmp, same); +- temp2 = simplify_gen_subreg (imode, reg_tmp, +- GET_MODE (reg_tmp), 0); +- } +- else +- temp2 = simplify_gen_subreg (imode, same, GET_MODE (same), 0); +- } +- else ++ for (i = 0; i < nelt; ++i) + { +- if (GET_CODE (same) == MEM) ++ if (vmode == E_V4SFmode || vmode == E_V2DFmode) + { +- rtx reg_tmp = gen_reg_rtx (GET_MODE (same)); +- loongarch_emit_move (reg_tmp, same); +- temp2 = lowpart_subreg (imode, reg_tmp, GET_MODE (reg_tmp)); ++ /* Using vilvl to load lowest 2 elements simultaneously to ++ reduce the number of instructions. */ ++ if (i == 1) ++ { ++ op0 = gen_reg_rtx (imode); ++ emit_move_insn (op0, val0); ++ op1 = gen_reg_rtx (imode); ++ emit_move_insn (op1, val1); ++ emit_insn (loongarch_vec_repl2_128 (target, op0, op1)); ++ } ++ else if (i > 1) ++ { ++ op0 = gen_reg_rtx (imode); ++ emit_move_insn (op0, vali); ++ emit_insn ( ++ loongarch_vec_set128 (target, op0, GEN_INT (i))); ++ } + } + else +- temp2 = lowpart_subreg (imode, same, GET_MODE (same)); +- } +- emit_move_insn (temp, temp2); +- +- switch (vmode) +- { +- case E_V16QImode: +- case E_V8HImode: +- case E_V4SImode: +- case E_V2DImode: +- loongarch_emit_move (target, gen_rtx_VEC_DUPLICATE (vmode, temp)); +- break; +- +- case E_V4SFmode: +- emit_insn (gen_lsx_vreplvei_w_f_scalar (target, temp)); +- break; +- +- case E_V2DFmode: +- emit_insn (gen_lsx_vreplvei_d_f_scalar (target, temp)); +- break; +- +- default: +- gcc_unreachable (); +- } +- } +- else +- { +- emit_move_insn (target, CONST0_RTX (vmode)); +- +- for (i = 0; i < nelt; ++i) +- { +- rtx temp = gen_reg_rtx (imode); +- emit_move_insn (temp, XVECEXP (vals, 0, i)); +- switch (vmode) + { +- case E_V16QImode: +- if (i == 0) +- emit_insn (gen_lsx_vreplvei_b_scalar (target, temp)); +- else +- emit_insn (gen_vec_setv16qi (target, temp, GEN_INT (i))); +- break; +- +- case E_V8HImode: +- if (i == 0) +- emit_insn (gen_lsx_vreplvei_h_scalar (target, temp)); +- else +- emit_insn (gen_vec_setv8hi (target, temp, GEN_INT (i))); +- break; +- +- case E_V4SImode: +- if (i == 0) +- emit_insn (gen_lsx_vreplvei_w_scalar (target, temp)); +- else +- emit_insn (gen_vec_setv4si (target, temp, GEN_INT (i))); +- break; +- +- case E_V2DImode: +- if (i == 0) +- emit_insn (gen_lsx_vreplvei_d_scalar (target, temp)); +- else +- emit_insn (gen_vec_setv2di (target, temp, GEN_INT (i))); +- break; +- +- case E_V4SFmode: +- if (i == 0) +- emit_insn (gen_lsx_vreplvei_w_f_scalar (target, temp)); +- else +- emit_insn (gen_vec_setv4sf (target, temp, GEN_INT (i))); +- break; +- +- case E_V2DFmode: ++ if (half_same && i == nelt / 2) ++ { ++ emit_insn ( ++ loongarch_vec_mirror (target, target, const0_rtx)); ++ return; ++ } ++ /* Assign the lowest element of val to all elements of ++ target. */ + if (i == 0) +- emit_insn (gen_lsx_vreplvei_d_f_scalar (target, temp)); +- else +- emit_insn (gen_vec_setv2df (target, temp, GEN_INT (i))); +- break; +- +- default: +- gcc_unreachable (); ++ { ++ op0 = gen_reg_rtx (imode); ++ emit_move_insn (op0, val0); ++ emit_insn (loongarch_vec_repl1_128 (target, op0)); ++ } ++ else if (!rtx_equal_p (vali, val0)) ++ { ++ op0 = gen_reg_rtx (imode); ++ emit_move_insn (op0, vali); ++ emit_insn ( ++ loongarch_vec_set128 (target, op0, GEN_INT (i))); ++ } + } + } + } +@@ -10634,8 +10602,8 @@ loongarch_expand_vector_init (rtx target, rtx vals) + /* For two-part initialization, always use CONCAT. */ + if (nelt == 2) + { +- rtx op0 = force_reg (imode, XVECEXP (vals, 0, 0)); +- rtx op1 = force_reg (imode, XVECEXP (vals, 0, 1)); ++ rtx op0 = force_reg (imode, val0); ++ rtx op1 = force_reg (imode, val1); + x = gen_rtx_VEC_CONCAT (vmode, op0, op1); + emit_insn (gen_rtx_SET (target, x)); + return; +diff --git a/gcc/config/loongarch/lsx.md b/gcc/config/loongarch/lsx.md +index fb4d228ba..075f6ba56 100644 +--- a/gcc/config/loongarch/lsx.md ++++ b/gcc/config/loongarch/lsx.md +@@ -176,6 +176,8 @@ + UNSPEC_LSX_VSSRARNI + UNSPEC_LSX_VSSRARNI2 + UNSPEC_LSX_VPERMI ++ UNSPEC_LSX_VILVL_INTERNAL ++ UNSPEC_LSX_VREPLVEI_MIRROR + ) + + ;; This attribute gives suffix for integers in VHMODE. +@@ -1551,6 +1553,18 @@ + (set_attr "type" "simd_flog2") + (set_attr "mode" "<MODE>")) + ++;; Only for loongarch_expand_vector_init in loongarch.cc. ++;; Merge two scalar floating-point op1 and op2 into a LSX op0. ++(define_insn "lsx_vilvl_<lsxfmt_f>_internal" ++ (set (match_operand:FLSX 0 "register_operand" "=f") ++ (unspec:FLSX (match_operand:<UNITMODE> 1 "register_operand" "f") ++ (match_operand:<UNITMODE> 2 "register_operand" "f") ++ UNSPEC_LSX_VILVL_INTERNAL)) ++ "ISA_HAS_LSX" ++ "vilvl.<lsxfmt>\t%w0,%w2,%w1" ++ (set_attr "type" "simd_permute") ++ (set_attr "mode" "<MODE>")) ++ + (define_insn "smax<mode>3" + (set (match_operand:FLSX 0 "register_operand" "=f") + (smax:FLSX (match_operand:FLSX 1 "register_operand" "f") +@@ -2289,6 +2303,16 @@ + (set_attr "type" "simd_splat") + (set_attr "mode" "<MODE>")) + ++(define_insn "lsx_vreplvei_mirror_<lsxfmt_f>" ++ (set (match_operand:LSX 0 "register_operand" "=f") ++ (unspec: LSX (match_operand:LSX 1 "register_operand" "f") ++ (match_operand 2 "const_<indeximm>_operand" "") ++ UNSPEC_LSX_VREPLVEI_MIRROR)) ++ "ISA_HAS_LSX" ++ "vreplvei.d\t%w0,%w1,%2" ++ (set_attr "type" "simd_splat") ++ (set_attr "mode" "<MODE>")) ++ + (define_insn "lsx_vreplvei_<lsxfmt_f>" + (set (match_operand:LSX 0 "register_operand" "=f") + (vec_duplicate:LSX +@@ -2450,6 +2474,99 @@ + DONE; + }) + ++;; Implement vec_concatv2df by vilvl.d. ++(define_insn_and_split "vec_concatv2df" ++ (set (match_operand:V2DF 0 "register_operand" "=f") ++ (vec_concat:V2DF ++ (match_operand:DF 1 "register_operand" "f") ++ (match_operand:DF 2 "register_operand" "f"))) ++ "ISA_HAS_LSX" ++ "" ++ "&& reload_completed" ++ (const_int 0) ++{ ++ emit_insn (gen_lsx_vilvl_d_f (operands0, ++ gen_rtx_REG (V2DFmode, REGNO (operands1)), ++ gen_rtx_REG (V2DFmode, REGNO (operands2)))); ++ DONE; ++} ++ (set_attr "mode" "V2DF")) ++ ++;; Implement vec_concatv4sf. ++;; Optimize based on hardware register allocation of operands. ++(define_insn_and_split "vec_concatv4sf" ++ (set (match_operand:V4SF 0 "register_operand" "=f") ++ (vec_concat:V4SF ++ (vec_concat:V2SF ++ (match_operand:SF 1 "register_operand" "f") ++ (match_operand:SF 2 "register_operand" "f")) ++ (vec_concat:V2SF ++ (match_operand:SF 3 "register_operand" "f") ++ (match_operand:SF 4 "register_operand" "f")))) ++ "ISA_HAS_LSX" ++ "" ++ "&& reload_completed" ++ (const_int 0) ++{ ++ operands5 = GEN_INT (1); ++ operands6 = GEN_INT (2); ++ operands7 = GEN_INT (4); ++ operands8 = GEN_INT (8); ++ ++ /* If all input are same, use vreplvei.w to broadcast. */ ++ if (REGNO (operands1) == REGNO (operands2) ++ && REGNO (operands1) == REGNO (operands3) ++ && REGNO (operands1) == REGNO (operands4)) ++ { ++ emit_insn (gen_lsx_vreplvei_w_f_scalar (operands0, operands1)); ++ } ++ /* If op0 is equal to op3, use vreplvei.w to set each element of op0 as op3. ++ If other input is different from op3, use vextrins.w to insert. */ ++ else if (REGNO (operands0) == REGNO (operands3)) ++ { ++ emit_insn (gen_lsx_vreplvei_w_f_scalar (operands0, operands3)); ++ if (REGNO (operands1) != REGNO (operands3)) ++ emit_insn (gen_lsx_vextrins_w_f_scalar (operands0, operands1, ++ operands0, operands5)); ++ if (REGNO (operands2) != REGNO (operands3)) ++ emit_insn (gen_lsx_vextrins_w_f_scalar (operands0, operands2, ++ operands0, operands6)); ++ if (REGNO (operands4) != REGNO (operands3)) ++ emit_insn (gen_lsx_vextrins_w_f_scalar (operands0, operands4, ++ operands0, operands8)); ++ } ++ /* If op0 is equal to op4, use vreplvei.w to set each element of op0 as op4. ++ If other input is different from op4, use vextrins.w to insert. */ ++ else if (REGNO (operands0) == REGNO (operands4)) ++ { ++ emit_insn (gen_lsx_vreplvei_w_f_scalar (operands0, operands4)); ++ if (REGNO (operands1) != REGNO (operands4)) ++ emit_insn (gen_lsx_vextrins_w_f_scalar (operands0, operands1, ++ operands0, operands5)); ++ if (REGNO (operands2) != REGNO (operands4)) ++ emit_insn (gen_lsx_vextrins_w_f_scalar (operands0, operands2, ++ operands0, operands6)); ++ if (REGNO (operands3) != REGNO (operands4)) ++ emit_insn (gen_lsx_vextrins_w_f_scalar (operands0, operands3, ++ operands0, operands7)); ++ } ++ /* Otherwise, use vilvl.w to merge op1 and op2 first. ++ If op3 is different from op1, use vextrins.w to insert. ++ If op4 is different from op2, use vextrins.w to insert. */ ++ else ++ { ++ emit_insn ( ++ gen_lsx_vilvl_w_f (operands0, ++ gen_rtx_REG (V4SFmode, REGNO (operands1)), ++ gen_rtx_REG (V4SFmode, REGNO (operands2)))); ++ emit_insn (gen_lsx_vextrins_w_f_scalar (operands0, operands3, ++ operands0, operands7)); ++ emit_insn (gen_lsx_vextrins_w_f_scalar (operands0, operands4, ++ operands0, operands8)); ++ } ++ DONE; ++} ++ (set_attr "mode" "V4SF")) + + (define_insn "vandn<mode>3" + (set (match_operand:LSX 0 "register_operand" "=f") +@@ -4465,3 +4582,20 @@ + "vpermi.w\t%w0,%w2,%3" + (set_attr "type" "simd_bit") + (set_attr "mode" "V4SI")) ++ ++;; Delete one of two instructions that exactly play the same role. ++(define_peephole2 ++ (set (match_operand:V2DI 0 "register_operand") ++ (vec_duplicate:V2DI (match_operand:DI 1 "register_operand"))) ++ (set (match_operand:V2DI 2 "register_operand") ++ (vec_merge:V2DI ++ (vec_duplicate:V2DI (match_operand:DI 3 "register_operand")) ++ (match_operand:V2DI 4 "register_operand") ++ (match_operand 5 "const_int_operand"))) ++ "operands0 == operands2 && ++ operands1 == operands3 && ++ operands2 == operands4 && ++ INTVAL (operands5) == 2" ++ (set (match_dup 0) ++ (vec_duplicate:V2DI (match_dup 1))) ++ "") +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-vec-construct-opt.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-vec-construct-opt.c +new file mode 100644 +index 000000000..487816a48 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-vec-construct-opt.c +@@ -0,0 +1,102 @@ ++/* { dg-do compile } */ ++/* { dg-options "-mlasx -O3" } */ ++ ++#include <lasxintrin.h> ++ ++extern long long *x_di; ++extern int *x_si; ++extern short int *x_hi; ++extern char *x_qi; ++extern double *y_df; ++extern float *y_sf; ++ ++/* Remove some unnecessary vinsgr2vr.d as the corresponding elements ++ have already been set. */ ++/* { dg-final { scan-assembler-not "v4i64:.*\tvinsgr2vr\\.d.*v4i64" } } */ ++/* { dg-final { scan-assembler-times "v4i64:.*\txvldrepl\\.d.*v4i64" 1 } } */ ++v4i64 ++vec_construct_v4i64 () ++{ ++ v4i64 res = ++ { x_di0, x_di0, x_di1, x_di1 } ++ ; ++ return res; ++} ++ ++/* Remove some unnecessary vinsgr2vr.w as the corresponding elements ++ have already been set. */ ++/* { dg-final { scan-assembler-not "v8i32:.*\tvinsgr2vr\\.w.*v8i32" } } */ ++/* { dg-final { scan-assembler-times "v8i32:.*\txvreplgr2vr\\.w.*v8i32" 1 } } */ ++v8i32 ++vec_construct_v8i32 () ++{ ++ v8i32 res = ++ { x_si0, x_si0, x_si0, x_si0, ++ x_si0, x_si2, x_si0, x_si0 } ++ ; ++ return res; ++} ++ ++/* Remove some unnecessary vinsgr2vr.h as the corresponding elements ++ have already been set. */ ++/* { dg-final { scan-assembler-not "v16i16:.*\tvori\\.b.*v16i16" } } */ ++/* { dg-final { scan-assembler-times "v16i16:.*\txvreplgr2vr\\.h.*v16i1" 1 } } */ ++v16i16 ++vec_construct_v16i16 () ++{ ++ v16i16 res = ++ { x_hi1, x_hi2, x_hi1, x_hi1, ++ x_hi1, x_hi1, x_hi1, x_hi1, ++ x_hi1, x_hi1, x_hi1, x_hi1, ++ x_hi1, x_hi1, x_hi1, x_hi2 } ++ ; ++ return res; ++} ++ ++/* Remove some unnecessary vinsgr2vr.b as the corresponding elements ++ have already been set. */ ++/* { dg-final { scan-assembler-not "v32i8:.*\tvori\\.b.*v32i8" } } */ ++/* { dg-final { scan-assembler-times "v32i8:.*\txvreplgr2vr\\.b.*v32i8" 1 } } */ ++v32i8 ++vec_construct_v32i8 () ++{ ++ v32i8 res = ++ { x_qi0, x_qi0, x_qi0, x_qi0, ++ x_qi0, x_qi0, x_qi0, x_qi0, ++ x_qi0, x_qi0, x_qi0, x_qi0, ++ x_qi0, x_qi0, x_qi0, x_qi2, ++ x_qi0, x_qi0, x_qi0, x_qi0, ++ x_qi0, x_qi0, x_qi0, x_qi0, ++ x_qi0, x_qi0, x_qi0, x_qi0, ++ x_qi0, x_qi0, x_qi0, x_qi3 } ++ ; ++ return res; ++} ++ ++/* Set 2 elements of a vector simultaneously by vilvl.d ++ and reducing more vextrins.d. */ ++/* { dg-final { scan-assembler-not "v4f64:.*\tvori\\.b.*v4f64" } } */ ++/* { dg-final { scan-assembler-not "v4f64:.*\tvextrins\\.d.*v4f64" } } */ ++/* { dg-final { scan-assembler-times "v4f64:.*\tvilvl\\.d.*v4f64" 1 } } */ ++v4f64 ++vec_construct_v4f64 () ++{ ++ v4f64 res = ++ { y_df0, y_df2, y_df0, y_df0} ++ ; ++ return res; ++} ++ ++/* Set 2 elements of a vector simultaneously by vilvl.w ++ and reducing more vextrins.w. */ ++/* { dg-final { scan-assembler-not "v8f32:.*\tvextrins\\.w.*v8f32" } } */ ++/* { dg-final { scan-assembler-times "v8f32:.*\txvilvl\\.w.*v8f32" 1 } } */ ++v8f32 ++vec_construct_v8f32 () ++{ ++ v8f32 res = ++ { y_sf2, y_sf1, y_sf2, y_sf3, ++ y_sf2, y_sf1, y_sf2, y_sf3 } ++ ; ++ return res; ++} +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vec-construct-opt.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vec-construct-opt.c +new file mode 100644 +index 000000000..92da1c8af +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vec-construct-opt.c +@@ -0,0 +1,85 @@ ++/* { dg-do compile } */ ++/* { dg-options "-mlsx -O3" } */ ++ ++#include <lsxintrin.h> ++ ++extern long long *x_di; ++extern int *x_si; ++extern short int *x_hi; ++extern char *x_qi; ++extern double *y_df; ++extern float *y_sf; ++ ++/* No change for V2DI mode. */ ++v2i64 ++vec_construct_v2i64 () ++{ ++ v2i64 res = ++ { x_di1, x_di0} ++ ; ++ return res; ++} ++ ++/* Only load the lowest 2 elements and directly copy them to high half-part, ++ reducing more vinsgr2vr.w. */ ++/* { dg-final { scan-assembler-times "v4i32:.*\tvreplvei\\.d.*v4i32" 1 } } */ ++v4i32 ++vec_construct_v4i32 () ++{ ++ v4i32 res = ++ { x_si0, x_si1, x_si0, x_si1} ++ ; ++ return res; ++} ++ ++/* Only load the lowest 4 elements and directly copy them to high half-part, ++ reducing more vinsgr2vr.h. */ ++/* { dg-final { scan-assembler-times "v8i16:.*\tvreplvei\\.d.*v8i16" 1 } } */ ++v8i16 ++vec_construct_v8i16 () ++{ ++ v8i16 res = ++ { x_hi0, x_hi0, x_hi0, x_hi1, ++ x_hi0, x_hi0, x_hi0, x_hi1 } ++ ; ++ return res; ++} ++ ++/* Only load the lowest 8 elements and directly copy them to high half-part, ++ reducing more vinsgr2vr.b. */ ++/* { dg-final { scan-assembler-times "v16i8:.*\tvreplvei\\.d.*v16i8" 1 } } */ ++v16i8 ++vec_construct_v16i8 () ++{ ++ v16i8 res = ++ { x_qi0, x_qi1, x_qi0, x_qi2, ++ x_qi0, x_qi0, x_qi0, x_qi3, ++ x_qi0, x_qi1, x_qi0, x_qi2, ++ x_qi0, x_qi0, x_qi0, x_qi3 } ++ ; ++ return res; ++} ++ ++/* Set 2 elements of a vector simultaneously by vilvl.d. */ ++/* { dg-final { scan-assembler-not "v2f64:.*\tvextrins\\.d.*v2f64" } } */ ++/* { dg-final { scan-assembler-times "v2f64:.*\tvilvl\\.d.*v2f64" 1 } } */ ++v2f64 ++vec_construct_v2f64 () ++{ ++ v2f64 res = ++ { y_df0, y_df2 } ++ ; ++ return res; ++} ++ ++/* Set 2 elements of a vector simultaneously by vilvl.w ++ and reducing more vextrins.w. */ ++/* { dg-final { scan-assembler-times "v4f32:.*\tvilvl\\.w.*v4f32" 1 } } */ ++v4f32 ++vec_construct_v4f32 () ++{ ++ v4f32 res = ++ { y_sf0, y_sf1, y_sf0, y_sf0 } ++ ; ++ return res; ++} +-- +2.43.0 +
View file
_service:tar_scm:0005-LoongArch-Replace-UNSPEC_FCOPYSIGN-with-copysign-RTL.patch
Added
@@ -0,0 +1,51 @@ +From 9b2cbf361e38ea1ad672c2b8c8cf1dda4f6f7d72 Mon Sep 17 00:00:00 2001 +From: Xi Ruoyao <xry111@xry111.site> +Date: Mon, 2 Oct 2023 18:51:00 +0800 +Subject: PATCH 005/188 LoongArch: Replace UNSPEC_FCOPYSIGN with copysign RTL + +When I added copysign support for LoongArch (r13-3702), we did not have +a copysign RTL insn, so I had to use UNSPEC to represent the copysign +instruction. Now the copysign RTX code has been added in r14-1586, so +this patch removes those UNSPECs, and it uses the native RTL copysign +insn. + +Inspired by rs6000 patch "Cleanup: Replace UNSPEC_COPYSIGN with copysign +RTL" 1 from Michael Meissner. + +1: https://gcc.gnu.org/pipermail/gcc-patches/2023-September/631701.html + +gcc/ChangeLog: + + * config/loongarch/loongarch.md (UNSPEC_FCOPYSIGN): Delete. + (copysign<mode>3): Use copysign RTL instead of UNSPEC. +--- + gcc/config/loongarch/loongarch.md | 6 ++---- + 1 file changed, 2 insertions(+), 4 deletions(-) + +diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md +index 63ff32e75..73e2cbe0b 100644 +--- a/gcc/config/loongarch/loongarch.md ++++ b/gcc/config/loongarch/loongarch.md +@@ -37,7 +37,6 @@ + UNSPEC_FCLASS + UNSPEC_FMAX + UNSPEC_FMIN +- UNSPEC_FCOPYSIGN + UNSPEC_FTINT + UNSPEC_FTINTRM + UNSPEC_FTINTRP +@@ -1129,9 +1128,8 @@ + + (define_insn "copysign<mode>3" + (set (match_operand:ANYF 0 "register_operand" "=f") +- (unspec:ANYF (match_operand:ANYF 1 "register_operand" "f") +- (match_operand:ANYF 2 "register_operand" "f") +- UNSPEC_FCOPYSIGN)) ++ (copysign:ANYF (match_operand:ANYF 1 "register_operand" "f") ++ (match_operand:ANYF 2 "register_operand" "f"))) + "TARGET_HARD_FLOAT" + "fcopysign.<fmt>\t%0,%1,%2" + (set_attr "type" "fcopysign") +-- +2.43.0 +
View file
_service:tar_scm:0006-LoongArch-Adjust-makefile-dependency-for-loongarch-h.patch
Added
@@ -0,0 +1,71 @@ +From 746109cb61d6f3db4c25a9a107f30996c17f11db Mon Sep 17 00:00:00 2001 +From: Yang Yujie <yangyujie@loongson.cn> +Date: Wed, 11 Oct 2023 17:59:53 +0800 +Subject: PATCH 006/188 LoongArch: Adjust makefile dependency for loongarch + headers. + +gcc/ChangeLog: + + * config.gcc: Add loongarch-driver.h to tm_files. + * config/loongarch/loongarch.h: Do not include loongarch-driver.h. + * config/loongarch/t-loongarch: Append loongarch-multilib.h to $(GTM_H) + instead of $(TM_H) for building generator programs. +--- + gcc/config.gcc | 4 ++-- + gcc/config/loongarch/loongarch.h | 3 --- + gcc/config/loongarch/t-loongarch | 3 ++- + 3 files changed, 4 insertions(+), 6 deletions(-) + +diff --git a/gcc/config.gcc b/gcc/config.gcc +index e34a5fbb9..11ab620d0 100644 +--- a/gcc/config.gcc ++++ b/gcc/config.gcc +@@ -2508,7 +2508,7 @@ riscv*-*-freebsd*) + + loongarch*-*-linux*) + tm_file="elfos.h gnu-user.h linux.h linux-android.h glibc-stdint.h ${tm_file}" +- tm_file="${tm_file} loongarch/gnu-user.h loongarch/linux.h" ++ tm_file="${tm_file} loongarch/gnu-user.h loongarch/linux.h loongarch/loongarch-driver.h" + extra_options="${extra_options} linux-android.opt" + tmake_file="${tmake_file} loongarch/t-multilib loongarch/t-linux" + gnu_ld=yes +@@ -2521,7 +2521,7 @@ loongarch*-*-linux*) + + loongarch*-*-elf*) + tm_file="elfos.h newlib-stdint.h ${tm_file}" +- tm_file="${tm_file} loongarch/elf.h loongarch/linux.h" ++ tm_file="${tm_file} loongarch/elf.h loongarch/linux.h loongarch/loongarch-driver.h" + tmake_file="${tmake_file} loongarch/t-multilib loongarch/t-linux" + gnu_ld=yes + gas=yes +diff --git a/gcc/config/loongarch/loongarch.h b/gcc/config/loongarch/loongarch.h +index a443a6427..a2dc4ba8c 100644 +--- a/gcc/config/loongarch/loongarch.h ++++ b/gcc/config/loongarch/loongarch.h +@@ -49,9 +49,6 @@ along with GCC; see the file COPYING3. If not see + + #define TARGET_LIBGCC_SDATA_SECTION ".sdata" + +-/* Driver native functions for SPEC processing in the GCC driver. */ +-#include "loongarch-driver.h" +- + /* This definition replaces the formerly used 'm' constraint with a + different constraint letter in order to avoid changing semantics of + the 'm' constraint when accepting new address formats in +diff --git a/gcc/config/loongarch/t-loongarch b/gcc/config/loongarch/t-loongarch +index 28cfb49df..12734c37b 100644 +--- a/gcc/config/loongarch/t-loongarch ++++ b/gcc/config/loongarch/t-loongarch +@@ -16,7 +16,8 @@ + # along with GCC; see the file COPYING3. If not see + # <http://www.gnu.org/licenses/>. + +-TM_H += loongarch-multilib.h $(srcdir)/config/loongarch/loongarch-driver.h ++ ++GTM_H += loongarch-multilib.h + OPTIONS_H_EXTRA += $(srcdir)/config/loongarch/loongarch-def.h \ + $(srcdir)/config/loongarch/loongarch-tune.h + +-- +2.43.0 +
View file
_service:tar_scm:0007-LoongArch-Enable-vect.exp-for-LoongArch.-PR111424.patch
Added
@@ -0,0 +1,65 @@ +From b75f00086e863ac7e9e1ee37f8107b199cf62550 Mon Sep 17 00:00:00 2001 +From: Chenghui Pan <panchenghui@loongson.cn> +Date: Fri, 25 Oct 2024 00:58:01 +0000 +Subject: PATCH 007/188 LoongArch: Enable vect.exp for LoongArch. PR111424 + +gcc/testsuite/ChangeLog: + + PR target/111424 + * lib/target-supports.exp: Enable vect.exp for LoongArch. +--- + gcc/testsuite/lib/target-supports.exp | 31 +++++++++++++++++++++++++++ + 1 file changed, 31 insertions(+) + +diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp +index 192e0aded..bbe145c1c 100644 +--- a/gcc/testsuite/lib/target-supports.exp ++++ b/gcc/testsuite/lib/target-supports.exp +@@ -10535,6 +10535,13 @@ proc check_vect_support_and_set_flags { } { + } + } elseif istarget amdgcn-*-* { + set dg-do-what-default run ++ } elseif istarget loongarch*-*-* { ++ lappend DEFAULT_VECTCFLAGS "-mdouble-float" "-mlasx" ++ if check_effective_target_loongarch_asx_hw { ++ set dg-do-what-default run ++ } else { ++ set dg-do-what-default compile ++ } + } else { + return 0 + } +@@ -10542,6 +10549,30 @@ proc check_vect_support_and_set_flags { } { + return 1 + } + ++proc check_effective_target_loongarch_sx_hw { } { ++ return check_runtime loongarch_sx_hw { ++ #include <lsxintrin.h> ++ int main (void) ++ { ++ __m128i a, b, c; ++ c = __lsx_vand_v (a, b); ++ return 0; ++ } ++ } "-mlsx" ++} ++ ++proc check_effective_target_loongarch_asx_hw { } { ++ return check_runtime loongarch_asx_hw { ++ #include <lasxintrin.h> ++ int main (void) ++ { ++ __m256i a, b, c; ++ c = __lasx_xvand_v (a, b); ++ return 0; ++ } ++ } "-mlasx" ++} ++ + # Return 1 if the target does *not* require strict alignment. + + proc check_effective_target_non_strict_align {} { +-- +2.43.0 +
View file
_service:tar_scm:0008-LoongArch-Delete-macro-definition-ASM_OUTPUT_ALIGN_W.patch
Added
@@ -0,0 +1,48 @@ +From 3829ad1963a92526201b42233d2bb4facf7ba8d4 Mon Sep 17 00:00:00 2001 +From: Lulu Cheng <chenglulu@loongson.cn> +Date: Fri, 15 Sep 2023 11:56:01 +0800 +Subject: PATCH 008/188 LoongArch: Delete macro definition + ASM_OUTPUT_ALIGN_WITH_NOP. + +There are two reasons for removing this macro definition: +1. The default in the assembler is to use the nop instruction for filling. +2. For assembly directives: .align abs-expr, abs-expr, abs-expr + The third expression it is the maximum number of bytes that should be + skipped by this alignment directive. + Therefore, it will affect the display of the specified alignment rules + and affect the operating efficiency. + +This modification relies on binutils commit 1fb3cdd87ec61715a5684925fb6d6a6cf53bb97c. +(Since the assembler will add nop based on the .align information when doing relax, +it will cause the conditional branch to go out of bounds during the assembly process. +This submission of binutils solves this problem.) + +gcc/ChangeLog: + + * config/loongarch/loongarch.h (ASM_OUTPUT_ALIGN_WITH_NOP): + Delete. + +Co-authored-by: Chenghua Xu <xuchenghua@loongson.cn> +--- + gcc/config/loongarch/loongarch.h | 5 ----- + 1 file changed, 5 deletions(-) + +diff --git a/gcc/config/loongarch/loongarch.h b/gcc/config/loongarch/loongarch.h +index a2dc4ba8c..572b538be 100644 +--- a/gcc/config/loongarch/loongarch.h ++++ b/gcc/config/loongarch/loongarch.h +@@ -1058,11 +1058,6 @@ typedef struct { + + #define ASM_OUTPUT_ALIGN(STREAM, LOG) fprintf (STREAM, "\t.align\t%d\n", (LOG)) + +-/* "nop" instruction 54525952 (andi $r0,$r0,0) is +- used for padding. */ +-#define ASM_OUTPUT_ALIGN_WITH_NOP(STREAM, LOG) \ +- fprintf (STREAM, "\t.align\t%d,54525952,4\n", (LOG)) +- + /* This is how to output an assembler line to advance the location + counter by SIZE bytes. */ + +-- +2.43.0 +
View file
_service:tar_scm:0009-LoongArch-Fix-vec_initv32qiv16qi-template-to-avoid-I.patch
Added
@@ -0,0 +1,105 @@ +From aa947bf395b5722a23f2edd9d6302e220473d900 Mon Sep 17 00:00:00 2001 +From: Chenghui Pan <panchenghui@loongson.cn> +Date: Wed, 11 Oct 2023 16:41:25 +0800 +Subject: PATCH 009/188 LoongArch: Fix vec_initv32qiv16qi template to avoid + ICE. +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Following test code triggers unrecognized insn ICE on LoongArch target +with "-O3 -mlasx": + +void +foo (unsigned char *dst, unsigned char *src) +{ + for (int y = 0; y < 16; y++) + { + for (int x = 0; x < 16; x++) + dstx = srcx + 1; + dst += 32; + src += 32; + } +} + +ICE info: +./test.c: In function ‘foo’: +./test.c:8:1: error: unrecognizable insn: + 8 | } + | ^ +(insn 15 14 16 4 (set (reg:V32QI 185 vect__24.7 ) + (vec_concat:V32QI (reg:V16QI 186) + (const_vector:V16QI + (const_int 0 0) repeated x16 + ))) "./test.c":4:19 -1 + (nil)) +during RTL pass: vregs +./test.c:8:1: internal compiler error: in extract_insn, at recog.cc:2791 +0x12028023b _fatal_insn(char const*, rtx_def const*, char const*, int, char const*) + /home/panchenghui/upstream/gcc/gcc/rtl-error.cc:108 +0x12028026f _fatal_insn_not_found(rtx_def const*, char const*, int, char const*) + /home/panchenghui/upstream/gcc/gcc/rtl-error.cc:116 +0x120a03c5b extract_insn(rtx_insn*) + /home/panchenghui/upstream/gcc/gcc/recog.cc:2791 +0x12067ff73 instantiate_virtual_regs_in_insn + /home/panchenghui/upstream/gcc/gcc/function.cc:1610 +0x12067ff73 instantiate_virtual_regs + /home/panchenghui/upstream/gcc/gcc/function.cc:1983 +0x12067ff73 execute + /home/panchenghui/upstream/gcc/gcc/function.cc:2030 + +This RTL is generated inside loongarch_expand_vector_group_init function (related +to vec_initv32qiv16qi template). Original impl doesn't ensure all vec_concat arguments +are register type. This patch adds force_reg() to the vec_concat argument generation. + +gcc/ChangeLog: + + * config/loongarch/loongarch.cc (loongarch_expand_vector_group_init): + fix impl related to vec_initv32qiv16qi template to avoid ICE. + +gcc/testsuite/ChangeLog: + + * gcc.target/loongarch/vector/lasx/lasx-vec-init-1.c: New test. +--- + gcc/config/loongarch/loongarch.cc | 3 ++- + .../loongarch/vector/lasx/lasx-vec-init-1.c | 14 ++++++++++++++ + 2 files changed, 16 insertions(+), 1 deletion(-) + create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-vec-init-1.c + +diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc +index 760b12268..9a629a999 100644 +--- a/gcc/config/loongarch/loongarch.cc ++++ b/gcc/config/loongarch/loongarch.cc +@@ -10188,7 +10188,8 @@ loongarch_gen_const_int_vector_shuffle (machine_mode mode, int val) + void + loongarch_expand_vector_group_init (rtx target, rtx vals) + { +- rtx ops2 = { XVECEXP (vals, 0, 0), XVECEXP (vals, 0, 1) }; ++ rtx ops2 = { force_reg (E_V16QImode, XVECEXP (vals, 0, 0)), ++ force_reg (E_V16QImode, XVECEXP (vals, 0, 1)) }; + emit_insn (gen_rtx_SET (target, gen_rtx_VEC_CONCAT (E_V32QImode, ops0, + ops1))); + } +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-vec-init-1.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-vec-init-1.c +new file mode 100644 +index 000000000..28be32982 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-vec-init-1.c +@@ -0,0 +1,14 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O3" } */ ++ ++void ++foo (unsigned char *dst, unsigned char *src) ++{ ++ for (int y = 0; y < 16; y++) ++ { ++ for (int x = 0; x < 16; x++) ++ dstx = srcx + 1; ++ dst += 32; ++ src += 32; ++ } ++} +-- +2.43.0 +
View file
_service:tar_scm:0010-LoongArch-Use-fcmp.caf.s-instead-of-movgr2cf-for-zer.patch
Added
@@ -0,0 +1,35 @@ +From 35bce671a97b27a41c425109ba92b24ab87ff35b Mon Sep 17 00:00:00 2001 +From: Xi Ruoyao <xry111@xry111.site> +Date: Tue, 17 Oct 2023 21:55:05 +0800 +Subject: PATCH 010/188 LoongArch: Use fcmp.caf.s instead of movgr2cf for + zeroing a fcc + +During the review of an LLVM change 1, on LA464 we found that zeroing +an fcc with fcmp.caf.s is much faster than a movgr2cf from $r0. + +1: https://github.com/llvm/llvm-project/pull/69300 + +gcc/ChangeLog: + + * config/loongarch/loongarch.md (movfcc): Use fcmp.caf.s for + zeroing a fcc. +--- + gcc/config/loongarch/loongarch.md | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md +index 73e2cbe0b..5f9e63d66 100644 +--- a/gcc/config/loongarch/loongarch.md ++++ b/gcc/config/loongarch/loongarch.md +@@ -2150,7 +2150,7 @@ + (set (match_operand:FCC 0 "register_operand" "=z") + (const_int 0)) + "" +- "movgr2cf\t%0,$r0") ++ "fcmp.caf.s\t%0,$f0,$f0") + + ;; Conditional move instructions. + +-- +2.43.0 +
View file
_service:tar_scm:0011-LoongArch-Implement-avg-and-sad-standard-names.patch
Added
@@ -0,0 +1,389 @@ +From 159dd069968fae895f1f663ebda6f53970ec34b1 Mon Sep 17 00:00:00 2001 +From: Jiahao Xu <xujiahao@loongson.cn> +Date: Wed, 18 Oct 2023 17:36:12 +0800 +Subject: PATCH 011/188 LoongArch:Implement avg and sad standard names. + +gcc/ChangeLog: + + * config/loongarch/lasx.md + (avg<mode>3_ceil): New patterns. + (uavg<mode>3_ceil): Ditto. + (avg<mode>3_floor): Ditto. + (uavg<mode>3_floor): Ditto. + (usadv32qi): Ditto. + (ssadv32qi): Ditto. + * config/loongarch/lsx.md + (avg<mode>3_ceil): New patterns. + (uavg<mode>3_ceil): Ditto. + (avg<mode>3_floor): Ditto. + (uavg<mode>3_floor): Ditto. + (usadv16qi): Ditto. + (ssadv16qi): Ditto. + +gcc/testsuite/ChangeLog: + + * gcc.target/loongarch/avg-ceil-lasx.c: New test. + * gcc.target/loongarch/avg-ceil-lsx.c: New test. + * gcc.target/loongarch/avg-floor-lasx.c: New test. + * gcc.target/loongarch/avg-floor-lsx.c: New test. + * gcc.target/loongarch/sad-lasx.c: New test. + * gcc.target/loongarch/sad-lsx.c: New test. +--- + gcc/config/loongarch/lasx.md | 78 +++++++++++++++++++ + gcc/config/loongarch/lsx.md | 78 +++++++++++++++++++ + .../gcc.target/loongarch/avg-ceil-lasx.c | 22 ++++++ + .../gcc.target/loongarch/avg-ceil-lsx.c | 22 ++++++ + .../gcc.target/loongarch/avg-floor-lasx.c | 22 ++++++ + .../gcc.target/loongarch/avg-floor-lsx.c | 22 ++++++ + gcc/testsuite/gcc.target/loongarch/sad-lasx.c | 20 +++++ + gcc/testsuite/gcc.target/loongarch/sad-lsx.c | 20 +++++ + 8 files changed, 284 insertions(+) + create mode 100644 gcc/testsuite/gcc.target/loongarch/avg-ceil-lasx.c + create mode 100644 gcc/testsuite/gcc.target/loongarch/avg-ceil-lsx.c + create mode 100644 gcc/testsuite/gcc.target/loongarch/avg-floor-lasx.c + create mode 100644 gcc/testsuite/gcc.target/loongarch/avg-floor-lsx.c + create mode 100644 gcc/testsuite/gcc.target/loongarch/sad-lasx.c + create mode 100644 gcc/testsuite/gcc.target/loongarch/sad-lsx.c + +diff --git a/gcc/config/loongarch/lasx.md b/gcc/config/loongarch/lasx.md +index 2bc5d47ed..c7496d68a 100644 +--- a/gcc/config/loongarch/lasx.md ++++ b/gcc/config/loongarch/lasx.md +@@ -5171,3 +5171,81 @@ + const0_rtx)); + DONE; + }) ++ ++(define_expand "avg<mode>3_ceil" ++ (match_operand:ILASX_WHB 0 "register_operand") ++ (match_operand:ILASX_WHB 1 "register_operand") ++ (match_operand:ILASX_WHB 2 "register_operand") ++ "ISA_HAS_LASX" ++{ ++ emit_insn (gen_lasx_xvavgr_s_<lasxfmt> (operands0, ++ operands1, operands2)); ++ DONE; ++}) ++ ++(define_expand "uavg<mode>3_ceil" ++ (match_operand:ILASX_WHB 0 "register_operand") ++ (match_operand:ILASX_WHB 1 "register_operand") ++ (match_operand:ILASX_WHB 2 "register_operand") ++ "ISA_HAS_LASX" ++{ ++ emit_insn (gen_lasx_xvavgr_u_<lasxfmt_u> (operands0, ++ operands1, operands2)); ++ DONE; ++}) ++ ++(define_expand "avg<mode>3_floor" ++ (match_operand:ILASX_WHB 0 "register_operand") ++ (match_operand:ILASX_WHB 1 "register_operand") ++ (match_operand:ILASX_WHB 2 "register_operand") ++ "ISA_HAS_LASX" ++{ ++ emit_insn (gen_lasx_xvavg_s_<lasxfmt> (operands0, ++ operands1, operands2)); ++ DONE; ++}) ++ ++(define_expand "uavg<mode>3_floor" ++ (match_operand:ILASX_WHB 0 "register_operand") ++ (match_operand:ILASX_WHB 1 "register_operand") ++ (match_operand:ILASX_WHB 2 "register_operand") ++ "ISA_HAS_LASX" ++{ ++ emit_insn (gen_lasx_xvavg_u_<lasxfmt_u> (operands0, ++ operands1, operands2)); ++ DONE; ++}) ++ ++(define_expand "usadv32qi" ++ (match_operand:V8SI 0 "register_operand") ++ (match_operand:V32QI 1 "register_operand") ++ (match_operand:V32QI 2 "register_operand") ++ (match_operand:V8SI 3 "register_operand") ++ "ISA_HAS_LASX" ++{ ++ rtx t1 = gen_reg_rtx (V32QImode); ++ rtx t2 = gen_reg_rtx (V16HImode); ++ rtx t3 = gen_reg_rtx (V8SImode); ++ emit_insn (gen_lasx_xvabsd_u_bu (t1, operands1, operands2)); ++ emit_insn (gen_lasx_xvhaddw_h_b (t2, t1, t1)); ++ emit_insn (gen_lasx_xvhaddw_w_h (t3, t2, t2)); ++ emit_insn (gen_addv8si3 (operands0, t3, operands3)); ++ DONE; ++}) ++ ++(define_expand "ssadv32qi" ++ (match_operand:V8SI 0 "register_operand") ++ (match_operand:V32QI 1 "register_operand") ++ (match_operand:V32QI 2 "register_operand") ++ (match_operand:V8SI 3 "register_operand") ++ "ISA_HAS_LASX" ++{ ++ rtx t1 = gen_reg_rtx (V32QImode); ++ rtx t2 = gen_reg_rtx (V16HImode); ++ rtx t3 = gen_reg_rtx (V8SImode); ++ emit_insn (gen_lasx_xvabsd_s_b (t1, operands1, operands2)); ++ emit_insn (gen_lasx_xvhaddw_h_b (t2, t1, t1)); ++ emit_insn (gen_lasx_xvhaddw_w_h (t3, t2, t2)); ++ emit_insn (gen_addv8si3 (operands0, t3, operands3)); ++ DONE; ++}) +diff --git a/gcc/config/loongarch/lsx.md b/gcc/config/loongarch/lsx.md +index 075f6ba56..b4e92ae9c 100644 +--- a/gcc/config/loongarch/lsx.md ++++ b/gcc/config/loongarch/lsx.md +@@ -3581,6 +3581,84 @@ + DONE; + }) + ++(define_expand "avg<mode>3_ceil" ++ (match_operand:ILSX_WHB 0 "register_operand") ++ (match_operand:ILSX_WHB 1 "register_operand") ++ (match_operand:ILSX_WHB 2 "register_operand") ++ "ISA_HAS_LSX" ++{ ++ emit_insn (gen_lsx_vavgr_s_<lsxfmt> (operands0, ++ operands1, operands2)); ++ DONE; ++}) ++ ++(define_expand "uavg<mode>3_ceil" ++ (match_operand:ILSX_WHB 0 "register_operand") ++ (match_operand:ILSX_WHB 1 "register_operand") ++ (match_operand:ILSX_WHB 2 "register_operand") ++ "ISA_HAS_LSX" ++{ ++ emit_insn (gen_lsx_vavgr_u_<lsxfmt_u> (operands0, ++ operands1, operands2)); ++ DONE; ++}) ++ ++(define_expand "avg<mode>3_floor" ++ (match_operand:ILSX_WHB 0 "register_operand") ++ (match_operand:ILSX_WHB 1 "register_operand") ++ (match_operand:ILSX_WHB 2 "register_operand") ++ "ISA_HAS_LSX" ++{ ++ emit_insn (gen_lsx_vavg_s_<lsxfmt> (operands0, ++ operands1, operands2)); ++ DONE; ++}) ++ ++(define_expand "uavg<mode>3_floor" ++ (match_operand:ILSX_WHB 0 "register_operand") ++ (match_operand:ILSX_WHB 1 "register_operand") ++ (match_operand:ILSX_WHB 2 "register_operand") ++ "ISA_HAS_LSX" ++{ ++ emit_insn (gen_lsx_vavg_u_<lsxfmt_u> (operands0, ++ operands1, operands2)); ++ DONE; ++}) ++ ++(define_expand "usadv16qi" ++ (match_operand:V4SI 0 "register_operand") ++ (match_operand:V16QI 1 "register_operand") ++ (match_operand:V16QI 2 "register_operand") ++ (match_operand:V4SI 3 "register_operand") ++ "ISA_HAS_LSX" ++{ ++ rtx t1 = gen_reg_rtx (V16QImode); ++ rtx t2 = gen_reg_rtx (V8HImode); ++ rtx t3 = gen_reg_rtx (V4SImode); ++ emit_insn (gen_lsx_vabsd_u_bu (t1, operands1, operands2)); ++ emit_insn (gen_lsx_vhaddw_h_b (t2, t1, t1)); ++ emit_insn (gen_lsx_vhaddw_w_h (t3, t2, t2)); ++ emit_insn (gen_addv4si3 (operands0, t3, operands3)); ++ DONE; ++}) ++ ++(define_expand "ssadv16qi" ++ (match_operand:V4SI 0 "register_operand") ++ (match_operand:V16QI 1 "register_operand") ++ (match_operand:V16QI 2 "register_operand") ++ (match_operand:V4SI 3 "register_operand") ++ "ISA_HAS_LSX" ++{ ++ rtx t1 = gen_reg_rtx (V16QImode); ++ rtx t2 = gen_reg_rtx (V8HImode); ++ rtx t3 = gen_reg_rtx (V4SImode); ++ emit_insn (gen_lsx_vabsd_s_b (t1, operands1, operands2)); ++ emit_insn (gen_lsx_vhaddw_h_b (t2, t1, t1)); ++ emit_insn (gen_lsx_vhaddw_w_h (t3, t2, t2)); ++ emit_insn (gen_addv4si3 (operands0, t3, operands3)); ++ DONE; ++}) ++ + (define_insn "lsx_v<optab>wev_d_w<u>" + (set (match_operand:V2DI 0 "register_operand" "=f") + (addsubmul:V2DI +diff --git a/gcc/testsuite/gcc.target/loongarch/avg-ceil-lasx.c b/gcc/testsuite/gcc.target/loongarch/avg-ceil-lasx.c +new file mode 100644 +index 000000000..16db7bf72 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/avg-ceil-lasx.c +@@ -0,0 +1,22 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O3 -mlasx" } */ ++/* { dg-final { scan-assembler "xvavgr.b" } } */ ++/* { dg-final { scan-assembler "xvavgr.bu" } } */ ++/* { dg-final { scan-assembler "xvavgr.hu" } } */ ++/* { dg-final { scan-assembler "xvavgr.h" } } */ ++ ++#define N 1024 ++ ++#define TEST(TYPE, NAME) \ ++ TYPE a_##NAMEN, b_##NAMEN, c_##NAMEN; \ ++ void f_##NAME (void) \ ++ { \ ++ int i; \ ++ for (i = 0; i < N; i++) \ ++ a_##NAMEi = (b_##NAMEi + c_##NAMEi + 1) >> 1; \ ++ } ++ ++TEST(char, 1); ++TEST(short, 2); ++TEST(unsigned char, 3); ++TEST(unsigned short, 4); +diff --git a/gcc/testsuite/gcc.target/loongarch/avg-ceil-lsx.c b/gcc/testsuite/gcc.target/loongarch/avg-ceil-lsx.c +new file mode 100644 +index 000000000..94119c23b +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/avg-ceil-lsx.c +@@ -0,0 +1,22 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O3 -mlsx" } */ ++/* { dg-final { scan-assembler "vavgr.b" } } */ ++/* { dg-final { scan-assembler "vavgr.bu" } } */ ++/* { dg-final { scan-assembler "vavgr.hu" } } */ ++/* { dg-final { scan-assembler "vavgr.h" } } */ ++ ++#define N 1024 ++ ++#define TEST(TYPE, NAME) \ ++ TYPE a_##NAMEN, b_##NAMEN, c_##NAMEN; \ ++ void f_##NAME (void) \ ++ { \ ++ int i; \ ++ for (i = 0; i < N; i++) \ ++ a_##NAMEi = (b_##NAMEi + c_##NAMEi + 1) >> 1; \ ++ } ++ ++TEST(char, 1); ++TEST(short, 2); ++TEST(unsigned char, 3); ++TEST(unsigned short, 4); +diff --git a/gcc/testsuite/gcc.target/loongarch/avg-floor-lasx.c b/gcc/testsuite/gcc.target/loongarch/avg-floor-lasx.c +new file mode 100644 +index 000000000..da6896531 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/avg-floor-lasx.c +@@ -0,0 +1,22 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O3 -mlasx" } */ ++/* { dg-final { scan-assembler "xvavg.b" } } */ ++/* { dg-final { scan-assembler "xvavg.bu" } } */ ++/* { dg-final { scan-assembler "xvavg.hu" } } */ ++/* { dg-final { scan-assembler "xvavg.h" } } */ ++ ++#define N 1024 ++ ++#define TEST(TYPE, NAME) \ ++ TYPE a_##NAMEN, b_##NAMEN, c_##NAMEN; \ ++ void f_##NAME (void) \ ++ { \ ++ int i; \ ++ for (i = 0; i < N; i++) \ ++ a_##NAMEi = (b_##NAMEi + c_##NAMEi) >> 1; \ ++ } ++ ++TEST(char, 1); ++TEST(short, 2); ++TEST(unsigned char, 3); ++TEST(unsigned short, 4); +diff --git a/gcc/testsuite/gcc.target/loongarch/avg-floor-lsx.c b/gcc/testsuite/gcc.target/loongarch/avg-floor-lsx.c +new file mode 100644 +index 000000000..bbb9db527 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/avg-floor-lsx.c +@@ -0,0 +1,22 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O3 -mlsx" } */ ++/* { dg-final { scan-assembler "vavg.b" } } */ ++/* { dg-final { scan-assembler "vavg.bu" } } */ ++/* { dg-final { scan-assembler "vavg.hu" } } */ ++/* { dg-final { scan-assembler "vavg.h" } } */ ++ ++#define N 1024 ++ ++#define TEST(TYPE, NAME) \ ++ TYPE a_##NAMEN, b_##NAMEN, c_##NAMEN; \ ++ void f_##NAME (void) \ ++ { \ ++ int i; \ ++ for (i = 0; i < N; i++) \ ++ a_##NAMEi = (b_##NAMEi + c_##NAMEi) >> 1; \ ++ } ++ ++TEST(char, 1); ++TEST(short, 2); ++TEST(unsigned char, 3); ++TEST(unsigned short, 4); +diff --git a/gcc/testsuite/gcc.target/loongarch/sad-lasx.c b/gcc/testsuite/gcc.target/loongarch/sad-lasx.c +new file mode 100644 +index 000000000..6c0cdfd97 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/sad-lasx.c +@@ -0,0 +1,20 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O3 -mlasx" } */ ++ ++#define N 1024 ++ ++#define TEST(SIGN) \ ++ SIGN char a_##SIGNN, b_##SIGNN; \ ++ int f_##SIGN (void) \ ++ { \ ++ int i, sum = 0; \ ++ for (i = 0; i < N; i++) \ ++ sum += __builtin_abs (a_##SIGNi - b_##SIGNi);; \ ++ return sum; \ ++ } ++ ++TEST(signed); ++TEST(unsigned); ++ ++/* { dg-final { scan-assembler {\txvabsd.bu\t} } } */ ++/* { dg-final { scan-assembler {\txvabsd.b\t} } } */ +diff --git a/gcc/testsuite/gcc.target/loongarch/sad-lsx.c b/gcc/testsuite/gcc.target/loongarch/sad-lsx.c +new file mode 100644 +index 000000000..b92110a8b +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/sad-lsx.c +@@ -0,0 +1,20 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O3 -mlsx" } */ ++ ++#define N 1024 ++ ++#define TEST(SIGN) \ ++ SIGN char a_##SIGNN, b_##SIGNN; \ ++ int f_##SIGN (void) \ ++ { \ ++ int i, sum = 0; \ ++ for (i = 0; i < N; i++) \ ++ sum += __builtin_abs (a_##SIGNi - b_##SIGNi);; \ ++ return sum; \ ++ } ++ ++TEST(signed); ++TEST(unsigned); ++ ++/* { dg-final { scan-assembler {\tvabsd.bu\t} } } */ ++/* { dg-final { scan-assembler {\tvabsd.b\t} } } */ +-- +2.43.0 +
View file
_service:tar_scm:0012-LoongArch-Implement-vec_widen-standard-names.patch
Added
@@ -0,0 +1,403 @@ +From 81e2e22979d9f9d170b1c30ec27e30e1f25aec35 Mon Sep 17 00:00:00 2001 +From: Jiahao Xu <xujiahao@loongson.cn> +Date: Wed, 18 Oct 2023 17:39:40 +0800 +Subject: PATCH 012/188 LoongArch:Implement vec_widen standard names. + +Add support for vec_widen lo/hi patterns. These do not directly +match on Loongarch lasx instructions but can be emulated with +even/odd + vector merge. + +gcc/ChangeLog: + + * config/loongarch/lasx.md + (vec_widen_<su>mult_even_v8si): New patterns. + (vec_widen_<su>add_hi_<mode>): Ditto. + (vec_widen_<su>add_lo_<mode>): Ditto. + (vec_widen_<su>sub_hi_<mode>): Ditto. + (vec_widen_<su>sub_lo_<mode>): Ditto. + (vec_widen_<su>mult_hi_<mode>): Ditto. + (vec_widen_<su>mult_lo_<mode>): Ditto. + * config/loongarch/loongarch.md (u_bool): New iterator. + * config/loongarch/loongarch-protos.h + (loongarch_expand_vec_widen_hilo): New prototype. + * config/loongarch/loongarch.cc + (loongarch_expand_vec_interleave): New function. + (loongarch_expand_vec_widen_hilo): New function. + +gcc/testsuite/ChangeLog: + + * gcc.target/loongarch/vect-widen-add.c: New test. + * gcc.target/loongarch/vect-widen-mul.c: New test. + * gcc.target/loongarch/vect-widen-sub.c: New test. +--- + gcc/config/loongarch/lasx.md | 82 ++++++++--- + gcc/config/loongarch/loongarch-protos.h | 1 + + gcc/config/loongarch/loongarch.cc | 137 ++++++++++++++++++ + gcc/config/loongarch/loongarch.md | 2 + + .../gcc.target/loongarch/vect-widen-add.c | 24 +++ + .../gcc.target/loongarch/vect-widen-mul.c | 24 +++ + .../gcc.target/loongarch/vect-widen-sub.c | 24 +++ + 7 files changed, 277 insertions(+), 17 deletions(-) + create mode 100644 gcc/testsuite/gcc.target/loongarch/vect-widen-add.c + create mode 100644 gcc/testsuite/gcc.target/loongarch/vect-widen-mul.c + create mode 100644 gcc/testsuite/gcc.target/loongarch/vect-widen-sub.c + +diff --git a/gcc/config/loongarch/lasx.md b/gcc/config/loongarch/lasx.md +index c7496d68a..442fda246 100644 +--- a/gcc/config/loongarch/lasx.md ++++ b/gcc/config/loongarch/lasx.md +@@ -5048,23 +5048,71 @@ + (set_attr "type" "simd_store") + (set_attr "mode" "DI")) + +-(define_insn "vec_widen_<su>mult_even_v8si" +- (set (match_operand:V4DI 0 "register_operand" "=f") +- (mult:V4DI +- (any_extend:V4DI +- (vec_select:V4SI +- (match_operand:V8SI 1 "register_operand" "%f") +- (parallel (const_int 0) (const_int 2) +- (const_int 4) (const_int 6)))) +- (any_extend:V4DI +- (vec_select:V4SI +- (match_operand:V8SI 2 "register_operand" "f") +- (parallel (const_int 0) (const_int 2) +- (const_int 4) (const_int 6)))))) +- "ISA_HAS_LASX" +- "xvmulwev.d.w<u>\t%u0,%u1,%u2" +- (set_attr "type" "simd_int_arith") +- (set_attr "mode" "V4DI")) ++(define_expand "vec_widen_<su>add_hi_<mode>" ++ (match_operand:<VDMODE256> 0 "register_operand") ++ (any_extend:<VDMODE256> (match_operand:ILASX_HB 1 "register_operand")) ++ (any_extend:<VDMODE256> (match_operand:ILASX_HB 2 "register_operand")) ++ "ISA_HAS_LASX" ++{ ++ loongarch_expand_vec_widen_hilo (operands0, operands1, operands2, ++ <u_bool>, true, "add"); ++ DONE; ++}) ++ ++(define_expand "vec_widen_<su>add_lo_<mode>" ++ (match_operand:<VDMODE256> 0 "register_operand") ++ (any_extend:<VDMODE256> (match_operand:ILASX_HB 1 "register_operand")) ++ (any_extend:<VDMODE256> (match_operand:ILASX_HB 2 "register_operand")) ++ "ISA_HAS_LASX" ++{ ++ loongarch_expand_vec_widen_hilo (operands0, operands1, operands2, ++ <u_bool>, false, "add"); ++ DONE; ++}) ++ ++(define_expand "vec_widen_<su>sub_hi_<mode>" ++ (match_operand:<VDMODE256> 0 "register_operand") ++ (any_extend:<VDMODE256> (match_operand:ILASX_HB 1 "register_operand")) ++ (any_extend:<VDMODE256> (match_operand:ILASX_HB 2 "register_operand")) ++ "ISA_HAS_LASX" ++{ ++ loongarch_expand_vec_widen_hilo (operands0, operands1, operands2, ++ <u_bool>, true, "sub"); ++ DONE; ++}) ++ ++(define_expand "vec_widen_<su>sub_lo_<mode>" ++ (match_operand:<VDMODE256> 0 "register_operand") ++ (any_extend:<VDMODE256> (match_operand:ILASX_HB 1 "register_operand")) ++ (any_extend:<VDMODE256> (match_operand:ILASX_HB 2 "register_operand")) ++ "ISA_HAS_LASX" ++{ ++ loongarch_expand_vec_widen_hilo (operands0, operands1, operands2, ++ <u_bool>, false, "sub"); ++ DONE; ++}) ++ ++(define_expand "vec_widen_<su>mult_hi_<mode>" ++ (match_operand:<VDMODE256> 0 "register_operand") ++ (any_extend:<VDMODE256> (match_operand:ILASX_HB 1 "register_operand")) ++ (any_extend:<VDMODE256> (match_operand:ILASX_HB 2 "register_operand")) ++ "ISA_HAS_LASX" ++{ ++ loongarch_expand_vec_widen_hilo (operands0, operands1, operands2, ++ <u_bool>, true, "mult"); ++ DONE; ++}) ++ ++(define_expand "vec_widen_<su>mult_lo_<mode>" ++ (match_operand:<VDMODE256> 0 "register_operand") ++ (any_extend:<VDMODE256> (match_operand:ILASX_HB 1 "register_operand")) ++ (any_extend:<VDMODE256> (match_operand:ILASX_HB 2 "register_operand")) ++ "ISA_HAS_LASX" ++{ ++ loongarch_expand_vec_widen_hilo (operands0, operands1, operands2, ++ <u_bool>, false, "mult"); ++ DONE; ++}) + + ;; Vector reduction operation + (define_expand "reduc_plus_scal_v4di" +diff --git a/gcc/config/loongarch/loongarch-protos.h b/gcc/config/loongarch/loongarch-protos.h +index ea61cf567..163162598 100644 +--- a/gcc/config/loongarch/loongarch-protos.h ++++ b/gcc/config/loongarch/loongarch-protos.h +@@ -205,6 +205,7 @@ extern void loongarch_register_frame_header_opt (void); + extern void loongarch_expand_vec_cond_expr (machine_mode, machine_mode, rtx *); + extern void loongarch_expand_vec_cond_mask_expr (machine_mode, machine_mode, + rtx *); ++extern void loongarch_expand_vec_widen_hilo (rtx, rtx, rtx, bool, bool, const char *); + + /* Routines implemented in loongarch-c.c. */ + void loongarch_cpu_cpp_builtins (cpp_reader *); +diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc +index 9a629a999..c0f58f9a9 100644 +--- a/gcc/config/loongarch/loongarch.cc ++++ b/gcc/config/loongarch/loongarch.cc +@@ -8028,6 +8028,143 @@ loongarch_expand_vec_perm_even_odd (struct expand_vec_perm_d *d) + return loongarch_expand_vec_perm_even_odd_1 (d, odd); + } + ++static void ++loongarch_expand_vec_interleave (rtx target, rtx op0, rtx op1, bool high_p) ++{ ++ struct expand_vec_perm_d d; ++ unsigned i, nelt, base; ++ bool ok; ++ ++ d.target = target; ++ d.op0 = op0; ++ d.op1 = op1; ++ d.vmode = GET_MODE (target); ++ d.nelt = nelt = GET_MODE_NUNITS (d.vmode); ++ d.one_vector_p = false; ++ d.testing_p = false; ++ ++ base = high_p ? nelt / 2 : 0; ++ for (i = 0; i < nelt / 2; ++i) ++ { ++ d.permi * 2 = i + base; ++ d.permi * 2 + 1 = i + base + nelt; ++ } ++ ++ ok = loongarch_expand_vec_perm_interleave (&d); ++ gcc_assert (ok); ++} ++ ++/* The loongarch lasx instructions xvmulwev and xvmulwod return the even or odd ++ parts of the double sized result elements in the corresponding elements of ++ the target register. That's NOT what the vec_widen_umult_lo/hi patterns are ++ expected to do. We emulate the widening lo/hi multiplies with the even/odd ++ versions followed by a vector merge. */ ++ ++void ++loongarch_expand_vec_widen_hilo (rtx dest, rtx op1, rtx op2, ++ bool uns_p, bool high_p, const char *optab) ++{ ++ machine_mode wmode = GET_MODE (dest); ++ machine_mode mode = GET_MODE (op1); ++ rtx t1, t2, t3; ++ ++ t1 = gen_reg_rtx (wmode); ++ t2 = gen_reg_rtx (wmode); ++ t3 = gen_reg_rtx (wmode); ++ switch (mode) ++ { ++ case V16HImode: ++ if (!strcmp (optab, "add")) ++ { ++ if (!uns_p) ++ { ++ emit_insn (gen_lasx_xvaddwev_w_h (t1, op1, op2)); ++ emit_insn (gen_lasx_xvaddwod_w_h (t2, op1, op2)); ++ } ++ else ++ { ++ emit_insn (gen_lasx_xvaddwev_w_hu (t1, op1, op2)); ++ emit_insn (gen_lasx_xvaddwod_w_hu (t2, op1, op2)); ++ } ++ } ++ else if (!strcmp (optab, "mult")) ++ { ++ if (!uns_p) ++ { ++ emit_insn (gen_lasx_xvmulwev_w_h (t1, op1, op2)); ++ emit_insn (gen_lasx_xvmulwod_w_h (t2, op1, op2)); ++ } ++ else ++ { ++ emit_insn (gen_lasx_xvmulwev_w_hu (t1, op1, op2)); ++ emit_insn (gen_lasx_xvmulwod_w_hu (t2, op1, op2)); ++ } ++ } ++ else if (!strcmp (optab, "sub")) ++ { ++ if (!uns_p) ++ { ++ emit_insn (gen_lasx_xvsubwev_w_h (t1, op1, op2)); ++ emit_insn (gen_lasx_xvsubwod_w_h (t2, op1, op2)); ++ } ++ else ++ { ++ emit_insn (gen_lasx_xvsubwev_w_hu (t1, op1, op2)); ++ emit_insn (gen_lasx_xvsubwod_w_hu (t2, op1, op2)); ++ } ++ } ++ break; ++ ++ case V32QImode: ++ if (!strcmp (optab, "add")) ++ { ++ if (!uns_p) ++ { ++ emit_insn (gen_lasx_xvaddwev_h_b (t1, op1, op2)); ++ emit_insn (gen_lasx_xvaddwod_h_b (t2, op1, op2)); ++ } ++ else ++ { ++ emit_insn (gen_lasx_xvaddwev_h_bu (t1, op1, op2)); ++ emit_insn (gen_lasx_xvaddwod_h_bu (t2, op1, op2)); ++ } ++ } ++ else if (!strcmp (optab, "mult")) ++ { ++ if (!uns_p) ++ { ++ emit_insn (gen_lasx_xvmulwev_h_b (t1, op1, op2)); ++ emit_insn (gen_lasx_xvmulwod_h_b (t2, op1, op2)); ++ } ++ else ++ { ++ emit_insn (gen_lasx_xvmulwev_h_bu (t1, op1, op2)); ++ emit_insn (gen_lasx_xvmulwod_h_bu (t2, op1, op2)); ++ } ++ } ++ else if (!strcmp (optab, "sub")) ++ { ++ if (!uns_p) ++ { ++ emit_insn (gen_lasx_xvsubwev_h_b (t1, op1, op2)); ++ emit_insn (gen_lasx_xvsubwod_h_b (t2, op1, op2)); ++ } ++ else ++ { ++ emit_insn (gen_lasx_xvsubwev_h_bu (t1, op1, op2)); ++ emit_insn (gen_lasx_xvsubwod_h_bu (t2, op1, op2)); ++ } ++ } ++ break; ++ ++ default: ++ gcc_unreachable (); ++ } ++ ++ loongarch_expand_vec_interleave (t3, t1, t2, high_p); ++ emit_move_insn (dest, gen_lowpart (wmode, t3)); ++} ++ + /* Expand a variable vector permutation for LASX. */ + + void +diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md +index 5f9e63d66..29ac950bf 100644 +--- a/gcc/config/loongarch/loongarch.md ++++ b/gcc/config/loongarch/loongarch.md +@@ -509,6 +509,8 @@ + ;; <su> is like <u>, but the signed form expands to "s" rather than "". + (define_code_attr su (sign_extend "s") (zero_extend "u")) + ++(define_code_attr u_bool (sign_extend "false") (zero_extend "true")) ++ + ;; <optab> expands to the name of the optab for a particular code. + (define_code_attr optab (ashift "ashl") + (ashiftrt "ashr") +diff --git a/gcc/testsuite/gcc.target/loongarch/vect-widen-add.c b/gcc/testsuite/gcc.target/loongarch/vect-widen-add.c +new file mode 100644 +index 000000000..0bf832d0e +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/vect-widen-add.c +@@ -0,0 +1,24 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O3 -mlasx" } */ ++/* { dg-final { scan-assembler "xvaddwev.w.h" } } */ ++/* { dg-final { scan-assembler "xvaddwod.w.h" } } */ ++/* { dg-final { scan-assembler "xvaddwev.w.hu" } } */ ++/* { dg-final { scan-assembler "xvaddwod.w.hu" } } */ ++ ++#include <stdint.h> ++ ++#define SIZE 1024 ++ ++void ++wide_uadd (uint32_t *foo, uint16_t *a, uint16_t *b) ++{ ++ for ( int i = 0; i < SIZE; i++) ++ fooi = ai + bi; ++} ++ ++void ++wide_sadd (int32_t *foo, int16_t *a, int16_t *b) ++{ ++ for ( int i = 0; i < SIZE; i++) ++ fooi = ai + bi; ++} +diff --git a/gcc/testsuite/gcc.target/loongarch/vect-widen-mul.c b/gcc/testsuite/gcc.target/loongarch/vect-widen-mul.c +new file mode 100644 +index 000000000..84b020eea +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/vect-widen-mul.c +@@ -0,0 +1,24 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O3 -mlasx" } */ ++/* { dg-final { scan-assembler "xvmulwev.w.h" } } */ ++/* { dg-final { scan-assembler "xvmulwod.w.h" } } */ ++/* { dg-final { scan-assembler "xvmulwev.w.hu" } } */ ++/* { dg-final { scan-assembler "xvmulwod.w.hu" } } */ ++ ++#include <stdint.h> ++ ++#define SIZE 1024 ++ ++void ++wide_umul (uint32_t *foo, uint16_t *a, uint16_t *b) ++{ ++ for ( int i = 0; i < SIZE; i++) ++ fooi = ai * bi; ++} ++ ++void ++wide_smul (int32_t *foo, int16_t *a, int16_t *b) ++{ ++ for ( int i = 0; i < SIZE; i++) ++ fooi = ai * bi; ++} +diff --git a/gcc/testsuite/gcc.target/loongarch/vect-widen-sub.c b/gcc/testsuite/gcc.target/loongarch/vect-widen-sub.c +new file mode 100644 +index 000000000..69fc3a517 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/vect-widen-sub.c +@@ -0,0 +1,24 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O3 -mlasx" } */ ++/* { dg-final { scan-assembler "xvsubwev.w.h" } } */ ++/* { dg-final { scan-assembler "xvsubwod.w.h" } } */ ++/* { dg-final { scan-assembler "xvsubwev.w.hu" } } */ ++/* { dg-final { scan-assembler "xvsubwod.w.hu" } } */ ++ ++#include <stdint.h> ++ ++#define SIZE 1024 ++ ++void ++wide_usub (uint32_t *foo, uint16_t *a, uint16_t *b) ++{ ++ for ( int i = 0; i < SIZE; i++) ++ fooi = ai - bi; ++} ++ ++void ++wide_ssub (int32_t *foo, int16_t *a, int16_t *b) ++{ ++ for ( int i = 0; i < SIZE; i++) ++ fooi = ai - bi; ++} +-- +2.43.0 +
View file
_service:tar_scm:0013-LoongArch-Implement-the-new-vector-cost-model-framew.patch
Added
@@ -0,0 +1,354 @@ +From 472890b43d2848a46fa13945279308f0a21c55d9 Mon Sep 17 00:00:00 2001 +From: Jiahao Xu <xujiahao@loongson.cn> +Date: Wed, 18 Oct 2023 17:43:39 +0800 +Subject: PATCH 013/188 LoongArch:Implement the new vector cost model + framework. + +This patch make loongarch use the new vector hooks and implements the costing +function determine_suggested_unroll_factor, to make it be able to suggest the +unroll factor for a given loop being vectorized base vec_ops analysis during +vector costing and the available issue information. Referring to aarch64 and +rs6000 port. + +The patch also reduces the cost of unaligned stores, making it equal to the +cost of aligned ones in order to avoid odd alignment peeling. + +gcc/ChangeLog: + + * config/loongarch/loongarch.cc (loongarch_vector_costs): Inherit from + vector_costs. Add a constructor. + (loongarch_vector_costs::add_stmt_cost): Use adjust_cost_for_freq to + adjust the cost for inner loops. + (loongarch_vector_costs::count_operations): New function. + (loongarch_vector_costs::determine_suggested_unroll_factor): Ditto. + (loongarch_vector_costs::finish_cost): Ditto. + (loongarch_builtin_vectorization_cost): Adjust. + * config/loongarch/loongarch.opt (loongarch-vect-unroll-limit): New parameter. + (loongarcg-vect-issue-info): Ditto. + (mmemvec-cost): Delete. + * config/loongarch/genopts/loongarch.opt.in + (loongarch-vect-unroll-limit): Ditto. + (loongarcg-vect-issue-info): Ditto. + (mmemvec-cost): Delete. + * doc/invoke.texi (loongarcg-vect-unroll-limit): Document new option. +--- + gcc/config/loongarch/genopts/loongarch.opt.in | 15 +- + gcc/config/loongarch/loongarch.cc | 173 ++++++++++++++++-- + gcc/config/loongarch/loongarch.opt | 15 +- + gcc/doc/invoke.texi | 7 + + 4 files changed, 188 insertions(+), 22 deletions(-) + +diff --git a/gcc/config/loongarch/genopts/loongarch.opt.in b/gcc/config/loongarch/genopts/loongarch.opt.in +index f18733c24..74cf4a7f7 100644 +--- a/gcc/config/loongarch/genopts/loongarch.opt.in ++++ b/gcc/config/loongarch/genopts/loongarch.opt.in +@@ -152,10 +152,6 @@ mbranch-cost= + Target RejectNegative Joined UInteger Var(loongarch_branch_cost) + -mbranch-cost=COST Set the cost of branches to roughly COST instructions. + +-mmemvec-cost= +-Target RejectNegative Joined UInteger Var(loongarch_vector_access_cost) IntegerRange(1, 5) +-mmemvec-cost=COST Set the cost of vector memory access instructions. +- + mcheck-zero-division + Target Mask(CHECK_ZERO_DIV) + Trap on integer divide by zero. +@@ -219,3 +215,14 @@ mrelax + Target Var(loongarch_mrelax) Init(HAVE_AS_MRELAX_OPTION) + Take advantage of linker relaxations to reduce the number of instructions + required to materialize symbol addresses. ++ ++-param=loongarch-vect-unroll-limit= ++Target Joined UInteger Var(loongarch_vect_unroll_limit) Init(6) IntegerRange(1, 64) Param ++Used to limit unroll factor which indicates how much the autovectorizer may ++unroll a loop. The default value is 6. ++ ++-param=loongarch-vect-issue-info= ++Target Undocumented Joined UInteger Var(loongarch_vect_issue_info) Init(4) IntegerRange(1, 64) Param ++Indicate how many non memory access vector instructions can be issued per ++cycle, it's used in unroll factor determination for autovectorizer. The ++default value is 4. +diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc +index c0f58f9a9..e22a64600 100644 +--- a/gcc/config/loongarch/loongarch.cc ++++ b/gcc/config/loongarch/loongarch.cc +@@ -65,6 +65,8 @@ along with GCC; see the file COPYING3. If not see + #include "rtl-iter.h" + #include "opts.h" + #include "function-abi.h" ++#include "cfgloop.h" ++#include "tree-vectorizer.h" + + /* This file should be included last. */ + #include "target-def.h" +@@ -3841,8 +3843,6 @@ loongarch_rtx_costs (rtx x, machine_mode mode, int outer_code, + } + } + +-/* Vectorizer cost model implementation. */ +- + /* Implement targetm.vectorize.builtin_vectorization_cost. */ + + static int +@@ -3861,36 +3861,182 @@ loongarch_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost, + case vector_load: + case vec_to_scalar: + case scalar_to_vec: +- case cond_branch_not_taken: +- case vec_promote_demote: + case scalar_store: + case vector_store: + return 1; + ++ case vec_promote_demote: + case vec_perm: + return LASX_SUPPORTED_MODE_P (mode) + && !LSX_SUPPORTED_MODE_P (mode) ? 2 : 1; + + case unaligned_load: +- case vector_gather_load: +- return 2; +- + case unaligned_store: +- case vector_scatter_store: +- return 10; ++ return 2; + + case cond_branch_taken: +- return 3; ++ return 4; ++ ++ case cond_branch_not_taken: ++ return 2; + + case vec_construct: + elements = TYPE_VECTOR_SUBPARTS (vectype); +- return elements / 2 + 1; ++ if (ISA_HAS_LASX) ++ return elements + 1; ++ else ++ return elements; + + default: + gcc_unreachable (); + } + } + ++class loongarch_vector_costs : public vector_costs ++{ ++public: ++ using vector_costs::vector_costs; ++ ++ unsigned int add_stmt_cost (int count, vect_cost_for_stmt kind, ++ stmt_vec_info stmt_info, slp_tree, tree vectype, ++ int misalign, ++ vect_cost_model_location where) override; ++ void finish_cost (const vector_costs *) override; ++ ++protected: ++ void count_operations (vect_cost_for_stmt, stmt_vec_info, ++ vect_cost_model_location, unsigned int); ++ unsigned int determine_suggested_unroll_factor (loop_vec_info); ++ /* The number of vectorized stmts in loop. */ ++ unsigned m_stmts = 0; ++ /* The number of load and store operations in loop. */ ++ unsigned m_loads = 0; ++ unsigned m_stores = 0; ++ /* Reduction factor for suggesting unroll factor. */ ++ unsigned m_reduc_factor = 0; ++ /* True if the loop contains an average operation. */ ++ bool m_has_avg =false; ++}; ++ ++/* Implement TARGET_VECTORIZE_CREATE_COSTS. */ ++static vector_costs * ++loongarch_vectorize_create_costs (vec_info *vinfo, bool costing_for_scalar) ++{ ++ return new loongarch_vector_costs (vinfo, costing_for_scalar); ++} ++ ++void ++loongarch_vector_costs::count_operations (vect_cost_for_stmt kind, ++ stmt_vec_info stmt_info, ++ vect_cost_model_location where, ++ unsigned int count) ++{ ++ if (!m_costing_for_scalar ++ && is_a<loop_vec_info> (m_vinfo) ++ && where == vect_body) ++ { ++ m_stmts += count; ++ ++ if (kind == scalar_load ++ || kind == vector_load ++ || kind == unaligned_load) ++ m_loads += count; ++ else if (kind == scalar_store ++ || kind == vector_store ++ || kind == unaligned_store) ++ m_stores += count; ++ else if ((kind == scalar_stmt ++ || kind == vector_stmt ++ || kind == vec_to_scalar) ++ && stmt_info && vect_is_reduction (stmt_info)) ++ { ++ tree lhs = gimple_get_lhs (stmt_info->stmt); ++ unsigned int base = FLOAT_TYPE_P (TREE_TYPE (lhs)) ? 2 : 1; ++ m_reduc_factor = MAX (base * count, m_reduc_factor); ++ } ++ } ++} ++ ++unsigned int ++loongarch_vector_costs::determine_suggested_unroll_factor (loop_vec_info loop_vinfo) ++{ ++ class loop *loop = LOOP_VINFO_LOOP (loop_vinfo); ++ ++ if (m_has_avg) ++ return 1; ++ ++ /* Don't unroll if it's specified explicitly not to be unrolled. */ ++ if (loop->unroll == 1 ++ || (OPTION_SET_P (flag_unroll_loops) && !flag_unroll_loops) ++ || (OPTION_SET_P (flag_unroll_all_loops) && !flag_unroll_all_loops)) ++ return 1; ++ ++ unsigned int nstmts_nonldst = m_stmts - m_loads - m_stores; ++ /* Don't unroll if no vector instructions excepting for memory access. */ ++ if (nstmts_nonldst == 0) ++ return 1; ++ ++ /* Use this simple hardware resource model that how many non vld/vst ++ vector instructions can be issued per cycle. */ ++ unsigned int issue_info = loongarch_vect_issue_info; ++ unsigned int reduc_factor = m_reduc_factor > 1 ? m_reduc_factor : 1; ++ unsigned int uf = CEIL (reduc_factor * issue_info, nstmts_nonldst); ++ uf = MIN ((unsigned int) loongarch_vect_unroll_limit, uf); ++ ++ return 1 << ceil_log2 (uf); ++} ++ ++unsigned ++loongarch_vector_costs::add_stmt_cost (int count, vect_cost_for_stmt kind, ++ stmt_vec_info stmt_info, slp_tree, ++ tree vectype, int misalign, ++ vect_cost_model_location where) ++{ ++ unsigned retval = 0; ++ ++ if (flag_vect_cost_model) ++ { ++ int stmt_cost = loongarch_builtin_vectorization_cost (kind, vectype, ++ misalign); ++ retval = adjust_cost_for_freq (stmt_info, where, count * stmt_cost); ++ m_costswhere += retval; ++ ++ count_operations (kind, stmt_info, where, count); ++ } ++ ++ if (stmt_info) ++ { ++ /* Detect the use of an averaging operation. */ ++ gimple *stmt = stmt_info->stmt; ++ if (is_gimple_call (stmt) ++ && gimple_call_internal_p (stmt)) ++ { ++ switch (gimple_call_internal_fn (stmt)) ++ { ++ case IFN_AVG_FLOOR: ++ case IFN_AVG_CEIL: ++ m_has_avg = true; ++ default: ++ break; ++ } ++ } ++ } ++ ++ return retval; ++} ++ ++void ++loongarch_vector_costs::finish_cost (const vector_costs *scalar_costs) ++{ ++ loop_vec_info loop_vinfo = dyn_cast<loop_vec_info> (m_vinfo); ++ if (loop_vinfo) ++ { ++ m_suggested_unroll_factor = determine_suggested_unroll_factor (loop_vinfo); ++ } ++ ++ vector_costs::finish_cost (scalar_costs); ++} ++ + /* Implement TARGET_ADDRESS_COST. */ + + static int +@@ -7261,9 +7407,6 @@ loongarch_option_override_internal (struct gcc_options *opts, + if (TARGET_DIRECT_EXTERN_ACCESS && flag_shlib) + error ("%qs cannot be used for compiling a shared library", + "-mdirect-extern-access"); +- if (loongarch_vector_access_cost == 0) +- loongarch_vector_access_cost = 5; +- + + switch (la_target.cmodel) + { +@@ -11275,6 +11418,8 @@ loongarch_builtin_support_vector_misalignment (machine_mode mode, + #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST + #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \ + loongarch_builtin_vectorization_cost ++#undef TARGET_VECTORIZE_CREATE_COSTS ++#define TARGET_VECTORIZE_CREATE_COSTS loongarch_vectorize_create_costs + + + #undef TARGET_IN_SMALL_DATA_P +diff --git a/gcc/config/loongarch/loongarch.opt b/gcc/config/loongarch/loongarch.opt +index 78f2baf3a..34bd832bd 100644 +--- a/gcc/config/loongarch/loongarch.opt ++++ b/gcc/config/loongarch/loongarch.opt +@@ -159,10 +159,6 @@ mbranch-cost= + Target RejectNegative Joined UInteger Var(loongarch_branch_cost) + -mbranch-cost=COST Set the cost of branches to roughly COST instructions. + +-mmemvec-cost= +-Target RejectNegative Joined UInteger Var(loongarch_vector_access_cost) IntegerRange(1, 5) +-mmemvec-cost=COST Set the cost of vector memory access instructions. +- + mcheck-zero-division + Target Mask(CHECK_ZERO_DIV) + Trap on integer divide by zero. +@@ -226,3 +222,14 @@ mrelax + Target Var(loongarch_mrelax) Init(HAVE_AS_MRELAX_OPTION) + Take advantage of linker relaxations to reduce the number of instructions + required to materialize symbol addresses. ++ ++-param=loongarch-vect-unroll-limit= ++Target Joined UInteger Var(loongarch_vect_unroll_limit) Init(6) IntegerRange(1, 64) Param ++Used to limit unroll factor which indicates how much the autovectorizer may ++unroll a loop. The default value is 6. ++ ++-param=loongarch-vect-issue-info= ++Target Undocumented Joined UInteger Var(loongarch_vect_issue_info) Init(4) IntegerRange(1, 64) Param ++Indicate how many non memory access vector instructions can be issued per ++cycle, it's used in unroll factor determination for autovectorizer. The ++default value is 4. +diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi +index 7eed77836..168f3d0db 100644 +--- a/gcc/doc/invoke.texi ++++ b/gcc/doc/invoke.texi +@@ -24632,6 +24632,13 @@ environments where no dynamic link is performed, like firmwares, OS + kernels, executables linked with @option{-static} or @option{-static-pie}. + @option{-mdirect-extern-access} is not compatible with @option{-fPIC} or + @option{-fpic}. ++ ++@item loongarch-vect-unroll-limit ++The vectorizer will use available tuning information to determine whether it ++would be beneficial to unroll the main vectorized loop and by how much. This ++parameter set's the upper bound of how much the vectorizer will unroll the main ++loop. The default value is six. ++ + @end table + + @node M32C Options +-- +2.43.0 +
View file
_service:tar_scm:0014-LoongArch-Define-macro-CLEAR_INSN_CACHE.patch
Added
@@ -0,0 +1,34 @@ +From 7f4a912da99f5787c88b275b83ee547c9e1aa3d7 Mon Sep 17 00:00:00 2001 +From: Lulu Cheng <chenglulu@loongson.cn> +Date: Mon, 23 Oct 2023 09:07:32 +0800 +Subject: PATCH 014/188 LoongArch: Define macro CLEAR_INSN_CACHE. + +LoongArch's microstructure ensures cache consistency by hardware. +Due to out-of-order execution, "ibar" is required to ensure the visibility of the +store (invalidated icache) executed by this CPU before "ibar" (to the instance). +"ibar" will not invalidate the icache, so the start and end parameters are not Affect +"ibar" performance. + +gcc/ChangeLog: + + * config/loongarch/loongarch.h (CLEAR_INSN_CACHE): New definition. +--- + gcc/config/loongarch/loongarch.h | 5 +++++ + 1 file changed, 5 insertions(+) + +diff --git a/gcc/config/loongarch/loongarch.h b/gcc/config/loongarch/loongarch.h +index 572b538be..f7ddfc452 100644 +--- a/gcc/config/loongarch/loongarch.h ++++ b/gcc/config/loongarch/loongarch.h +@@ -1231,3 +1231,8 @@ struct GTY (()) machine_function + (TARGET_HARD_FLOAT_ABI ? (TARGET_DOUBLE_FLOAT_ABI ? 8 : 4) : 0) + + #define FUNCTION_VALUE_REGNO_P(N) ((N) == GP_RETURN || (N) == FP_RETURN) ++ ++/* LoongArch maintains ICache/DCache coherency by hardware, ++ we just need "ibar" to avoid instruction hazard here. */ ++#undef CLEAR_INSN_CACHE ++#define CLEAR_INSN_CACHE(beg, end) __builtin_loongarch_ibar (0) +-- +2.43.0 +
View file
_service:tar_scm:0015-LoongArch-Add-enum-style-mexplicit-relocs-option.patch
Added
@@ -0,0 +1,233 @@ +From 56403837a7859f0a7ccbc56c055261c9adf22fb8 Mon Sep 17 00:00:00 2001 +From: Xi Ruoyao <xry111@xry111.site> +Date: Mon, 23 Oct 2023 15:23:11 +0800 +Subject: PATCH 015/188 LoongArch: Add enum-style -mexplicit-relocs= option + +To take a better balance between scheduling and relaxation when -flto is +enabled, add three-way -mexplicit-relocs={auto,none,always} options. +The old -mexplicit-relocs and -mno-explicit-relocs options are still +supported, they are mapped to -mexplicit-relocs=always and +-mexplicit-relocs=none. + +The default choice is determined by probing assembler capabilities at +build time. If the assembler does not supports explicit relocs at all, +the default will be none; if it supports explicit relocs but not +relaxation, the default will be always; if both explicit relocs and +relaxation are supported, the default will be auto. + +Currently auto is same as none. We will make auto more clever in +following changes. + +gcc/ChangeLog: + + * config/loongarch/genopts/loongarch-strings: Add strings for + -mexplicit-relocs={auto,none,always}. + * config/loongarch/genopts/loongarch.opt.in: Add options for + -mexplicit-relocs={auto,none,always}. + * config/loongarch/loongarch-str.h: Regenerate. + * config/loongarch/loongarch.opt: Regenerate. + * config/loongarch/loongarch-def.h + (EXPLICIT_RELOCS_AUTO): Define. + (EXPLICIT_RELOCS_NONE): Define. + (EXPLICIT_RELOCS_ALWAYS): Define. + (N_EXPLICIT_RELOCS_TYPES): Define. + * config/loongarch/loongarch.cc + (loongarch_option_override_internal): Error out if the old-style + -mno-explicit-relocs option is used with + -mexplicit-relocs={auto,none,always} together. Map + -mno-explicit-relocs to -mexplicit-relocs=none and + -mexplicit-relocs to -mexplicit-relocs=always for backward + compatibility. Set a proper default for -mexplicit-relocs= + based on configure-time probed linker capability. Update a + diagnostic message to mention -mexplicit-relocs=always instead + of the old-style -mexplicit-relocs. + (loongarch_handle_model_attribute): Update a diagnostic message + to mention -mexplicit-relocs=always instead of the old-style + -mexplicit-relocs. + * config/loongarch/loongarch.h (TARGET_EXPLICIT_RELOCS): Define. +--- + .../loongarch/genopts/loongarch-strings | 6 +++++ + gcc/config/loongarch/genopts/loongarch.opt.in | 21 ++++++++++++++-- + gcc/config/loongarch/loongarch-def.h | 6 +++++ + gcc/config/loongarch/loongarch-str.h | 5 ++++ + gcc/config/loongarch/loongarch.cc | 24 +++++++++++++++++-- + gcc/config/loongarch/loongarch.h | 3 +++ + gcc/config/loongarch/loongarch.opt | 21 ++++++++++++++-- + 7 files changed, 80 insertions(+), 6 deletions(-) + +diff --git a/gcc/config/loongarch/genopts/loongarch-strings b/gcc/config/loongarch/genopts/loongarch-strings +index eb5086fe3..6c8a42af2 100644 +--- a/gcc/config/loongarch/genopts/loongarch-strings ++++ b/gcc/config/loongarch/genopts/loongarch-strings +@@ -65,3 +65,9 @@ STR_CMODEL_TS tiny-static + STR_CMODEL_MEDIUM medium + STR_CMODEL_LARGE large + STR_CMODEL_EXTREME extreme ++ ++# -mexplicit-relocs ++OPTSTR_EXPLICIT_RELOCS explicit-relocs ++STR_EXPLICIT_RELOCS_AUTO auto ++STR_EXPLICIT_RELOCS_NONE none ++STR_EXPLICIT_RELOCS_ALWAYS always +diff --git a/gcc/config/loongarch/genopts/loongarch.opt.in b/gcc/config/loongarch/genopts/loongarch.opt.in +index 74cf4a7f7..e7df1964a 100644 +--- a/gcc/config/loongarch/genopts/loongarch.opt.in ++++ b/gcc/config/loongarch/genopts/loongarch.opt.in +@@ -176,10 +176,27 @@ mmax-inline-memcpy-size= + Target Joined RejectNegative UInteger Var(loongarch_max_inline_memcpy_size) Init(1024) + -mmax-inline-memcpy-size=SIZE Set the max size of memcpy to inline, default is 1024. + +-mexplicit-relocs +-Target Var(TARGET_EXPLICIT_RELOCS) Init(HAVE_AS_EXPLICIT_RELOCS & !HAVE_AS_MRELAX_OPTION) ++Enum ++Name(explicit_relocs) Type(int) ++The code model option names for -mexplicit-relocs: ++ ++EnumValue ++Enum(explicit_relocs) String(@@STR_EXPLICIT_RELOCS_AUTO@@) Value(EXPLICIT_RELOCS_AUTO) ++ ++EnumValue ++Enum(explicit_relocs) String(@@STR_EXPLICIT_RELOCS_NONE@@) Value(EXPLICIT_RELOCS_NONE) ++ ++EnumValue ++Enum(explicit_relocs) String(@@STR_EXPLICIT_RELOCS_ALWAYS@@) Value(EXPLICIT_RELOCS_ALWAYS) ++ ++mexplicit-relocs= ++Target RejectNegative Joined Enum(explicit_relocs) Var(la_opt_explicit_relocs) Init(M_OPT_UNSET) + Use %reloc() assembly operators. + ++mexplicit-relocs ++Target Var(la_opt_explicit_relocs_backward) Init(M_OPT_UNSET) ++Use %reloc() assembly operators (for backward compatibility). ++ + ; The code model option names for -mcmodel. + Enum + Name(cmodel) Type(int) +diff --git a/gcc/config/loongarch/loongarch-def.h b/gcc/config/loongarch/loongarch-def.h +index eb8e53b20..4757de14b 100644 +--- a/gcc/config/loongarch/loongarch-def.h ++++ b/gcc/config/loongarch/loongarch-def.h +@@ -100,6 +100,12 @@ extern const char* loongarch_cmodel_strings; + #define CMODEL_EXTREME 5 + #define N_CMODEL_TYPES 6 + ++/* enum explicit_relocs */ ++#define EXPLICIT_RELOCS_AUTO 0 ++#define EXPLICIT_RELOCS_NONE 1 ++#define EXPLICIT_RELOCS_ALWAYS 2 ++#define N_EXPLICIT_RELOCS_TYPES 3 ++ + /* The common default value for variables whose assignments + are triggered by command-line options. */ + +diff --git a/gcc/config/loongarch/loongarch-str.h b/gcc/config/loongarch/loongarch-str.h +index ecfebf9db..037e9e583 100644 +--- a/gcc/config/loongarch/loongarch-str.h ++++ b/gcc/config/loongarch/loongarch-str.h +@@ -64,4 +64,9 @@ along with GCC; see the file COPYING3. If not see + #define STR_CMODEL_LARGE "large" + #define STR_CMODEL_EXTREME "extreme" + ++#define OPTSTR_EXPLICIT_RELOCS "explicit-relocs" ++#define STR_EXPLICIT_RELOCS_AUTO "auto" ++#define STR_EXPLICIT_RELOCS_NONE "none" ++#define STR_EXPLICIT_RELOCS_ALWAYS "always" ++ + #endif /* LOONGARCH_STR_H */ +diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc +index e22a64600..3258c8655 100644 +--- a/gcc/config/loongarch/loongarch.cc ++++ b/gcc/config/loongarch/loongarch.cc +@@ -7383,6 +7383,25 @@ loongarch_option_override_internal (struct gcc_options *opts, + loongarch_update_gcc_opt_status (&la_target, opts, opts_set); + loongarch_cpu_option_override (&la_target, opts, opts_set); + ++ if (la_opt_explicit_relocs != M_OPT_UNSET ++ && la_opt_explicit_relocs_backward != M_OPT_UNSET) ++ error ("do not use %qs (with %qs) and %qs (without %qs) together", ++ "-mexplicit-relocs=", "=", ++ la_opt_explicit_relocs_backward ? "-mexplicit-relocs" ++ : "-mno-explicit-relocs", "="); ++ ++ if (la_opt_explicit_relocs_backward != M_OPT_UNSET) ++ la_opt_explicit_relocs = (la_opt_explicit_relocs_backward ++ ? EXPLICIT_RELOCS_ALWAYS ++ : EXPLICIT_RELOCS_NONE); ++ ++ if (la_opt_explicit_relocs == M_OPT_UNSET) ++ la_opt_explicit_relocs = (HAVE_AS_EXPLICIT_RELOCS ++ ? (HAVE_AS_MRELAX_OPTION ++ ? EXPLICIT_RELOCS_AUTO ++ : EXPLICIT_RELOCS_ALWAYS) ++ : EXPLICIT_RELOCS_NONE); ++ + if (TARGET_ABI_LP64) + flag_pcc_struct_return = 0; + +@@ -7413,7 +7432,7 @@ loongarch_option_override_internal (struct gcc_options *opts, + case CMODEL_EXTREME: + if (!TARGET_EXPLICIT_RELOCS) + error ("code model %qs needs %s", +- "extreme", "-mexplicit-relocs"); ++ "extreme", "-mexplicit-relocs=always"); + + if (opts->x_flag_plt) + { +@@ -7717,7 +7736,8 @@ loongarch_handle_model_attribute (tree *node, tree name, tree arg, int, + if (!TARGET_EXPLICIT_RELOCS) + { + error_at (DECL_SOURCE_LOCATION (decl), +- "%qE attribute requires %s", name, "-mexplicit-relocs"); ++ "%qE attribute requires %s", name, ++ "-mexplicit-relocs=always"); + *no_add_attrs = true; + return NULL_TREE; + } +diff --git a/gcc/config/loongarch/loongarch.h b/gcc/config/loongarch/loongarch.h +index f7ddfc452..6e8ac293a 100644 +--- a/gcc/config/loongarch/loongarch.h ++++ b/gcc/config/loongarch/loongarch.h +@@ -1236,3 +1236,6 @@ struct GTY (()) machine_function + we just need "ibar" to avoid instruction hazard here. */ + #undef CLEAR_INSN_CACHE + #define CLEAR_INSN_CACHE(beg, end) __builtin_loongarch_ibar (0) ++ ++#define TARGET_EXPLICIT_RELOCS \ ++ (la_opt_explicit_relocs == EXPLICIT_RELOCS_ALWAYS) +diff --git a/gcc/config/loongarch/loongarch.opt b/gcc/config/loongarch/loongarch.opt +index 34bd832bd..44376fd77 100644 +--- a/gcc/config/loongarch/loongarch.opt ++++ b/gcc/config/loongarch/loongarch.opt +@@ -183,10 +183,27 @@ mmax-inline-memcpy-size= + Target Joined RejectNegative UInteger Var(loongarch_max_inline_memcpy_size) Init(1024) + -mmax-inline-memcpy-size=SIZE Set the max size of memcpy to inline, default is 1024. + +-mexplicit-relocs +-Target Var(TARGET_EXPLICIT_RELOCS) Init(HAVE_AS_EXPLICIT_RELOCS & !HAVE_AS_MRELAX_OPTION) ++Enum ++Name(explicit_relocs) Type(int) ++The code model option names for -mexplicit-relocs: ++ ++EnumValue ++Enum(explicit_relocs) String(auto) Value(EXPLICIT_RELOCS_AUTO) ++ ++EnumValue ++Enum(explicit_relocs) String(none) Value(EXPLICIT_RELOCS_NONE) ++ ++EnumValue ++Enum(explicit_relocs) String(always) Value(EXPLICIT_RELOCS_ALWAYS) ++ ++mexplicit-relocs= ++Target RejectNegative Joined Enum(explicit_relocs) Var(la_opt_explicit_relocs) Init(M_OPT_UNSET) + Use %reloc() assembly operators. + ++mexplicit-relocs ++Target Var(la_opt_explicit_relocs_backward) Init(M_OPT_UNSET) ++Use %reloc() assembly operators (for backward compatibility). ++ + ; The code model option names for -mcmodel. + Enum + Name(cmodel) Type(int) +-- +2.43.0 +
View file
_service:tar_scm:0016-LoongArch-Use-explicit-relocs-for-GOT-access-when-me.patch
Added
@@ -0,0 +1,212 @@ +From 8539e5560e7bf11473cc7c386043b7019264236a Mon Sep 17 00:00:00 2001 +From: Xi Ruoyao <xry111@xry111.site> +Date: Sat, 30 Sep 2023 18:46:28 +0800 +Subject: PATCH 016/188 LoongArch: Use explicit relocs for GOT access when + -mexplicit-relocs=auto and LTO during a final link with linker plugin + +If we are performing LTO for a final link and linker plugin is enabled, +then we are sure any GOT access may resolve to a symbol out of the link +unit (otherwise the linker plugin will tell us the symbol should be +resolved locally and we'll use PC-relative access instead). + +Produce machine instructions with explicit relocs instead of la.global +for better scheduling. + +gcc/ChangeLog: + + * config/loongarch/loongarch-protos.h + (loongarch_explicit_relocs_p): Declare new function. + * config/loongarch/loongarch.cc (loongarch_explicit_relocs_p): + Implement. + (loongarch_symbol_insns): Call loongarch_explicit_relocs_p for + SYMBOL_GOT_DISP, instead of using TARGET_EXPLICIT_RELOCS. + (loongarch_split_symbol): Call loongarch_explicit_relocs_p for + deciding if return early, instead of using + TARGET_EXPLICIT_RELOCS. + (loongarch_output_move): CAll loongarch_explicit_relocs_p + instead of using TARGET_EXPLICIT_RELOCS. + * config/loongarch/loongarch.md (*low<mode>): Remove + TARGET_EXPLICIT_RELOCS from insn condition. + (@ld_from_got<mode>): Likewise. + * config/loongarch/predicates.md (move_operand): Call + loongarch_explicit_relocs_p instead of using + TARGET_EXPLICIT_RELOCS. + +gcc/testsuite/ChangeLog: + + * gcc.target/loongarch/explicit-relocs-auto-lto.c: New test. +--- + gcc/config/loongarch/loongarch-protos.h | 1 + + gcc/config/loongarch/loongarch.cc | 34 +++++++++++++++---- + gcc/config/loongarch/loongarch.md | 4 +-- + gcc/config/loongarch/predicates.md | 8 ++--- + .../loongarch/explicit-relocs-auto-lto.c | 26 ++++++++++++++ + 5 files changed, 59 insertions(+), 14 deletions(-) + create mode 100644 gcc/testsuite/gcc.target/loongarch/explicit-relocs-auto-lto.c + +diff --git a/gcc/config/loongarch/loongarch-protos.h b/gcc/config/loongarch/loongarch-protos.h +index 163162598..51d38177b 100644 +--- a/gcc/config/loongarch/loongarch-protos.h ++++ b/gcc/config/loongarch/loongarch-protos.h +@@ -220,4 +220,5 @@ extern rtx loongarch_gen_const_int_vector_shuffle (machine_mode, int); + extern tree loongarch_build_builtin_va_list (void); + + extern rtx loongarch_build_signbit_mask (machine_mode, bool, bool); ++extern bool loongarch_explicit_relocs_p (enum loongarch_symbol_type); + #endif /* ! GCC_LOONGARCH_PROTOS_H */ +diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc +index 3258c8655..1d20577e7 100644 +--- a/gcc/config/loongarch/loongarch.cc ++++ b/gcc/config/loongarch/loongarch.cc +@@ -1922,6 +1922,29 @@ loongarch_symbolic_constant_p (rtx x, enum loongarch_symbol_type *symbol_type) + gcc_unreachable (); + } + ++/* If -mexplicit-relocs=auto, we use machine operations with reloc hints ++ for cases where the linker is unable to relax so we can schedule the ++ machine operations, otherwise use an assembler pseudo-op so the ++ assembler will generate R_LARCH_RELAX. */ ++ ++bool ++loongarch_explicit_relocs_p (enum loongarch_symbol_type type) ++{ ++ if (la_opt_explicit_relocs != EXPLICIT_RELOCS_AUTO) ++ return la_opt_explicit_relocs == EXPLICIT_RELOCS_ALWAYS; ++ ++ /* If we are performing LTO for a final link, and we have the linker ++ plugin so we know the resolution of the symbols, then all GOT ++ references are binding to external symbols or preemptable symbols. ++ So the linker cannot relax them. */ ++ return (in_lto_p ++ && !flag_incremental_link ++ && HAVE_LTO_PLUGIN == 2 ++ && (!global_options_set.x_flag_use_linker_plugin ++ || global_options.x_flag_use_linker_plugin) ++ && type == SYMBOL_GOT_DISP); ++} ++ + /* Returns the number of instructions necessary to reference a symbol. */ + + static int +@@ -1937,7 +1960,7 @@ loongarch_symbol_insns (enum loongarch_symbol_type type, machine_mode mode) + case SYMBOL_GOT_DISP: + /* The constant will have to be loaded from the GOT before it + is used in an address. */ +- if (!TARGET_EXPLICIT_RELOCS && mode != MAX_MACHINE_MODE) ++ if (!loongarch_explicit_relocs_p (type) && mode != MAX_MACHINE_MODE) + return 0; + + return 3; +@@ -3034,7 +3057,7 @@ loongarch_symbol_extreme_p (enum loongarch_symbol_type type) + If so, and if LOW_OUT is nonnull, emit the high part and store the + low part in *LOW_OUT. Leave *LOW_OUT unchanged otherwise. + +- Return false if build with '-mno-explicit-relocs'. ++ Return false if build with '-mexplicit-relocs=none'. + + TEMP is as for loongarch_force_temporary and is used to load the high + part into a register. +@@ -3048,12 +3071,9 @@ loongarch_split_symbol (rtx temp, rtx addr, machine_mode mode, rtx *low_out) + { + enum loongarch_symbol_type symbol_type; + +- /* If build with '-mno-explicit-relocs', don't split symbol. */ +- if (!TARGET_EXPLICIT_RELOCS) +- return false; +- + if ((GET_CODE (addr) == HIGH && mode == MAX_MACHINE_MODE) + || !loongarch_symbolic_constant_p (addr, &symbol_type) ++ || !loongarch_explicit_relocs_p (symbol_type) + || loongarch_symbol_insns (symbol_type, mode) == 0 + || !loongarch_split_symbol_type (symbol_type)) + return false; +@@ -4793,7 +4813,7 @@ loongarch_output_move (rtx dest, rtx src) + } + } + +- if (!TARGET_EXPLICIT_RELOCS ++ if (!loongarch_explicit_relocs_p (loongarch_classify_symbol (src)) + && dest_code == REG && symbolic_operand (src, VOIDmode)) + { + if (loongarch_classify_symbol (src) == SYMBOL_PCREL) +diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md +index 29ac950bf..81c97393b 100644 +--- a/gcc/config/loongarch/loongarch.md ++++ b/gcc/config/loongarch/loongarch.md +@@ -2247,7 +2247,7 @@ + (set (match_operand:P 0 "register_operand" "=r") + (lo_sum:P (match_operand:P 1 "register_operand" " r") + (match_operand:P 2 "symbolic_operand" ""))) +- "TARGET_EXPLICIT_RELOCS" ++ "" + "addi.<d>\t%0,%1,%L2" + (set_attr "type" "arith") + (set_attr "mode" "<MODE>")) +@@ -2275,7 +2275,7 @@ + (match_operand:P 1 "register_operand" "r") + (match_operand:P 2 "symbolic_operand"))) + UNSPEC_LOAD_FROM_GOT)) +- "TARGET_EXPLICIT_RELOCS" ++ "" + "ld.<d>\t%0,%1,%L2" + (set_attr "type" "move") + ) +diff --git a/gcc/config/loongarch/predicates.md b/gcc/config/loongarch/predicates.md +index ad6cee5c4..6b50b3a4d 100644 +--- a/gcc/config/loongarch/predicates.md ++++ b/gcc/config/loongarch/predicates.md +@@ -541,16 +541,14 @@ + case SYMBOL_REF: + case LABEL_REF: + return (loongarch_symbolic_constant_p (op, &symbol_type) +- && (!TARGET_EXPLICIT_RELOCS ++ && (!loongarch_explicit_relocs_p (symbol_type) + || !loongarch_split_symbol_type (symbol_type))); + + case HIGH: +- /* '-mno-explicit-relocs' don't generate high/low pairs. */ +- if (!TARGET_EXPLICIT_RELOCS) +- return false; +- + op = XEXP (op, 0); ++ + return (loongarch_symbolic_constant_p (op, &symbol_type) ++ && loongarch_explicit_relocs_p (symbol_type) + && loongarch_split_symbol_type (symbol_type)); + + default: +diff --git a/gcc/testsuite/gcc.target/loongarch/explicit-relocs-auto-lto.c b/gcc/testsuite/gcc.target/loongarch/explicit-relocs-auto-lto.c +new file mode 100644 +index 000000000..f53b54689 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/explicit-relocs-auto-lto.c +@@ -0,0 +1,26 @@ ++/* { dg-do link } */ ++/* { dg-require-effective-target lto } */ ++/* { dg-require-linker-plugin "" } */ ++/* { dg-options "-fpic -shared -O2 --save-temps -mexplicit-relocs=auto -flto -fuse-linker-plugin -flto-partition=one" } */ ++ ++int pcrel __attribute__ ((visibility ("hidden"))); ++int got __attribute__ ((visibility ("default"))); ++ ++int ++*addr_pcrel (void) ++{ ++ return &pcrel; ++} ++ ++int ++*addr_got (void) ++{ ++ return &got; ++} ++ ++/* With linker plugin we should use la.local (it can be relaxed to pcaddi), ++ but not la.global (we are pretty sure the linker cannot relax la.global ++ got). */ ++/* { dg-final { scan-lto-assembler "la.local.*pcrel" } } */ ++/* { dg-final { scan-lto-assembler "pcalau12i.*%got_pc_hi20\\\(got\\\)" } } */ ++/* { dg-final { scan-lto-assembler "ld.*%got_pc_lo12\\\(got\\\)" } } */ +-- +2.43.0 +
View file
_service:tar_scm:0017-LoongArch-Use-explicit-relocs-for-TLS-access-with-me.patch
Added
@@ -0,0 +1,146 @@ +From 23b4166c6699a1a3063b11fa45497c1a1524bd48 Mon Sep 17 00:00:00 2001 +From: Xi Ruoyao <xry111@xry111.site> +Date: Mon, 2 Oct 2023 13:00:18 +0800 +Subject: PATCH 017/188 LoongArch: Use explicit relocs for TLS access with + -mexplicit-relocs=auto + +The linker does not know how to relax TLS access for LoongArch, so let's +emit machine instructions with explicit relocs for TLS. + +gcc/ChangeLog: + + * config/loongarch/loongarch.cc (loongarch_explicit_relocs_p): + Return true for TLS symbol types if -mexplicit-relocs=auto. + (loongarch_call_tls_get_addr): Replace TARGET_EXPLICIT_RELOCS + with la_opt_explicit_relocs != EXPLICIT_RELOCS_NONE. + (loongarch_legitimize_tls_address): Likewise. + * config/loongarch/loongarch.md (@tls_low<mode>): Remove + TARGET_EXPLICIT_RELOCS from insn condition. + +gcc/testsuite/ChangeLog: + + * gcc.target/loongarch/explicit-relocs-auto-tls-ld-gd.c: New + test. + * gcc.target/loongarch/explicit-relocs-auto-tls-le-ie.c: New + test. +--- + gcc/config/loongarch/loongarch.cc | 37 ++++++++++++------- + gcc/config/loongarch/loongarch.md | 2 +- + .../explicit-relocs-auto-tls-ld-gd.c | 9 +++++ + .../explicit-relocs-auto-tls-le-ie.c | 6 +++ + 4 files changed, 40 insertions(+), 14 deletions(-) + create mode 100644 gcc/testsuite/gcc.target/loongarch/explicit-relocs-auto-tls-ld-gd.c + create mode 100644 gcc/testsuite/gcc.target/loongarch/explicit-relocs-auto-tls-le-ie.c + +diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc +index 1d20577e7..fa5c14be6 100644 +--- a/gcc/config/loongarch/loongarch.cc ++++ b/gcc/config/loongarch/loongarch.cc +@@ -1933,16 +1933,27 @@ loongarch_explicit_relocs_p (enum loongarch_symbol_type type) + if (la_opt_explicit_relocs != EXPLICIT_RELOCS_AUTO) + return la_opt_explicit_relocs == EXPLICIT_RELOCS_ALWAYS; + +- /* If we are performing LTO for a final link, and we have the linker +- plugin so we know the resolution of the symbols, then all GOT +- references are binding to external symbols or preemptable symbols. +- So the linker cannot relax them. */ +- return (in_lto_p +- && !flag_incremental_link +- && HAVE_LTO_PLUGIN == 2 +- && (!global_options_set.x_flag_use_linker_plugin +- || global_options.x_flag_use_linker_plugin) +- && type == SYMBOL_GOT_DISP); ++ switch (type) ++ { ++ case SYMBOL_TLS_IE: ++ case SYMBOL_TLS_LE: ++ case SYMBOL_TLSGD: ++ case SYMBOL_TLSLDM: ++ /* The linker don't know how to relax TLS accesses. */ ++ return true; ++ case SYMBOL_GOT_DISP: ++ /* If we are performing LTO for a final link, and we have the ++ linker plugin so we know the resolution of the symbols, then ++ all GOT references are binding to external symbols or ++ preemptable symbols. So the linker cannot relax them. */ ++ return (in_lto_p ++ && !flag_incremental_link ++ && HAVE_LTO_PLUGIN == 2 ++ && (!global_options_set.x_flag_use_linker_plugin ++ || global_options.x_flag_use_linker_plugin)); ++ default: ++ return false; ++ } + } + + /* Returns the number of instructions necessary to reference a symbol. */ +@@ -2749,7 +2760,7 @@ loongarch_call_tls_get_addr (rtx sym, enum loongarch_symbol_type type, rtx v0) + + start_sequence (); + +- if (TARGET_EXPLICIT_RELOCS) ++ if (la_opt_explicit_relocs != EXPLICIT_RELOCS_NONE) + { + /* Split tls symbol to high and low. */ + rtx high = gen_rtx_HIGH (Pmode, copy_rtx (loc)); +@@ -2914,7 +2925,7 @@ loongarch_legitimize_tls_address (rtx loc) + tp = gen_rtx_REG (Pmode, THREAD_POINTER_REGNUM); + tmp1 = gen_reg_rtx (Pmode); + dest = gen_reg_rtx (Pmode); +- if (TARGET_EXPLICIT_RELOCS) ++ if (la_opt_explicit_relocs != EXPLICIT_RELOCS_NONE) + { + tmp2 = loongarch_unspec_address (loc, SYMBOL_TLS_IE); + tmp3 = gen_reg_rtx (Pmode); +@@ -2951,7 +2962,7 @@ loongarch_legitimize_tls_address (rtx loc) + tmp1 = gen_reg_rtx (Pmode); + dest = gen_reg_rtx (Pmode); + +- if (TARGET_EXPLICIT_RELOCS) ++ if (la_opt_explicit_relocs != EXPLICIT_RELOCS_NONE) + { + tmp2 = loongarch_unspec_address (loc, SYMBOL_TLS_LE); + tmp3 = gen_reg_rtx (Pmode); +diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md +index 81c97393b..3b836d535 100644 +--- a/gcc/config/loongarch/loongarch.md ++++ b/gcc/config/loongarch/loongarch.md +@@ -2257,7 +2257,7 @@ + (unspec:P (mem:P (lo_sum:P (match_operand:P 1 "register_operand" "r") + (match_operand:P 2 "symbolic_operand" ""))) + UNSPEC_TLS_LOW)) +- "TARGET_EXPLICIT_RELOCS" ++ "" + "addi.<d>\t%0,%1,%L2" + (set_attr "type" "arith") + (set_attr "mode" "<MODE>")) +diff --git a/gcc/testsuite/gcc.target/loongarch/explicit-relocs-auto-tls-ld-gd.c b/gcc/testsuite/gcc.target/loongarch/explicit-relocs-auto-tls-ld-gd.c +new file mode 100644 +index 000000000..957ff98df +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/explicit-relocs-auto-tls-ld-gd.c +@@ -0,0 +1,9 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -fPIC -mexplicit-relocs=auto" } */ ++ ++__thread int a __attribute__((visibility("hidden"))); ++extern __thread int b __attribute__((visibility("default"))); ++ ++int test() { return a + b; } ++ ++/* { dg-final { scan-assembler-not "la.tls" { target tls_native } } } */ +diff --git a/gcc/testsuite/gcc.target/loongarch/explicit-relocs-auto-tls-le-ie.c b/gcc/testsuite/gcc.target/loongarch/explicit-relocs-auto-tls-le-ie.c +new file mode 100644 +index 000000000..78898cfc6 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/explicit-relocs-auto-tls-le-ie.c +@@ -0,0 +1,6 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -mexplicit-relocs=auto" } */ ++ ++#include "explicit-relocs-auto-tls-ld-gd.c" ++ ++/* { dg-final { scan-assembler-not "la.tls" { target tls_native } } } */ +-- +2.43.0 +
View file
_service:tar_scm:0018-LoongArch-Use-explicit-relocs-for-addresses-only-use.patch
Added
@@ -0,0 +1,245 @@ +From c29a4f4fb5ff24ef975ba27688a3da696aa7d006 Mon Sep 17 00:00:00 2001 +From: Xi Ruoyao <xry111@xry111.site> +Date: Sun, 1 Oct 2023 11:14:29 +0800 +Subject: PATCH 018/188 LoongArch: Use explicit relocs for addresses only + used for one load or store with -mexplicit-relocs=auto and + -mcmodel={normal,medium} + +In these cases, if we use explicit relocs, we end up with 2 +instructions: + + pcalau12i t0, %pc_hi20(x) + ld.d t0, t0, %pc_lo12(x) + +If we use la.local pseudo-op, in the best scenario (x is in +/- 2MiB +range) we still have 2 instructions: + + pcaddi t0, %pcrel_20(x) + ld.d t0, t0, 0 + +If x is out of the range we'll have 3 instructions. So for these cases +just emit machine instructions with explicit relocs. + +gcc/ChangeLog: + + * config/loongarch/predicates.md (symbolic_pcrel_operand): New + predicate. + * config/loongarch/loongarch.md (define_peephole2): Optimize + la.local + ld/st to pcalau12i + ld/st if the address is only used + once if -mexplicit-relocs=auto and -mcmodel=normal or medium. + +gcc/testsuite/ChangeLog: + + * gcc.target/loongarch/explicit-relocs-auto-single-load-store.c: + New test. + * gcc.target/loongarch/explicit-relocs-auto-single-load-store-no-anchor.c: + New test. +--- + gcc/config/loongarch/loongarch.md | 122 ++++++++++++++++++ + gcc/config/loongarch/predicates.md | 7 + + ...-relocs-auto-single-load-store-no-anchor.c | 6 + + .../explicit-relocs-auto-single-load-store.c | 14 ++ + 4 files changed, 149 insertions(+) + create mode 100644 gcc/testsuite/gcc.target/loongarch/explicit-relocs-auto-single-load-store-no-anchor.c + create mode 100644 gcc/testsuite/gcc.target/loongarch/explicit-relocs-auto-single-load-store.c + +diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md +index 3b836d535..c4c6baa60 100644 +--- a/gcc/config/loongarch/loongarch.md ++++ b/gcc/config/loongarch/loongarch.md +@@ -65,6 +65,7 @@ + + UNSPEC_LOAD_FROM_GOT + UNSPEC_PCALAU12I ++ UNSPEC_PCALAU12I_GR + UNSPEC_ORI_L_LO12 + UNSPEC_LUI_L_HI20 + UNSPEC_LUI_H_LO20 +@@ -2297,6 +2298,16 @@ + "pcalau12i\t%0,%%pc_hi20(%1)" + (set_attr "type" "move")) + ++;; @pcalau12i may be used for sibcall so it has a strict constraint. This ++;; allows any general register as the operand. ++(define_insn "@pcalau12i_gr<mode>" ++ (set (match_operand:P 0 "register_operand" "=r") ++ (unspec:P (match_operand:P 1 "symbolic_operand" "") ++ UNSPEC_PCALAU12I_GR)) ++ "" ++ "pcalau12i\t%0,%%pc_hi20(%1)" ++ (set_attr "type" "move")) ++ + (define_insn "@ori_l_lo12<mode>" + (set (match_operand:P 0 "register_operand" "=r") + (unspec:P (match_operand:P 1 "register_operand" "r") +@@ -3748,6 +3759,117 @@ + (set_attr "type" "unknown") + (set_attr "mode" "<MODE>")) + ++;; With normal or medium code models, if the only use of a pc-relative ++;; address is for loading or storing a value, then relying on linker ++;; relaxation is not better than emitting the machine instruction directly. ++;; Even if the la.local pseudo op can be relaxed, we get: ++;; ++;; pcaddi $t0, %pcrel_20(x) ++;; ld.d $t0, $t0, 0 ++;; ++;; There are still two instructions, same as using the machine instructions ++;; and explicit relocs: ++;; ++;; pcalau12i $t0, %pc_hi20(x) ++;; ld.d $t0, $t0, %pc_lo12(x) ++;; ++;; And if the pseudo op cannot be relaxed, we'll get a worse result (with ++;; 3 instructions). ++(define_peephole2 ++ (set (match_operand:P 0 "register_operand") ++ (match_operand:P 1 "symbolic_pcrel_operand")) ++ (set (match_operand:GPR 2 "register_operand") ++ (mem:GPR (match_dup 0))) ++ "la_opt_explicit_relocs == EXPLICIT_RELOCS_AUTO \ ++ && (TARGET_CMODEL_NORMAL || TARGET_CMODEL_MEDIUM) \ ++ && (peep2_reg_dead_p (2, operands0) \ ++ || REGNO (operands0) == REGNO (operands2))" ++ (set (match_dup 2) (mem:GPR (lo_sum:P (match_dup 0) (match_dup 1)))) ++ { ++ emit_insn (gen_pcalau12i_gr<P:mode> (operands0, operands1)); ++ }) ++ ++(define_peephole2 ++ (set (match_operand:P 0 "register_operand") ++ (match_operand:P 1 "symbolic_pcrel_operand")) ++ (set (match_operand:GPR 2 "register_operand") ++ (mem:GPR (plus (match_dup 0) ++ (match_operand 3 "const_int_operand")))) ++ "la_opt_explicit_relocs == EXPLICIT_RELOCS_AUTO \ ++ && (TARGET_CMODEL_NORMAL || TARGET_CMODEL_MEDIUM) \ ++ && (peep2_reg_dead_p (2, operands0) \ ++ || REGNO (operands0) == REGNO (operands2))" ++ (set (match_dup 2) (mem:GPR (lo_sum:P (match_dup 0) (match_dup 1)))) ++ { ++ operands1 = plus_constant (Pmode, operands1, INTVAL (operands3)); ++ emit_insn (gen_pcalau12i_gr<P:mode> (operands0, operands1)); ++ }) ++ ++(define_peephole2 ++ (set (match_operand:P 0 "register_operand") ++ (match_operand:P 1 "symbolic_pcrel_operand")) ++ (set (match_operand:GPR 2 "register_operand") ++ (any_extend:GPR (mem:SUBDI (match_dup 0)))) ++ "la_opt_explicit_relocs == EXPLICIT_RELOCS_AUTO \ ++ && (TARGET_CMODEL_NORMAL || TARGET_CMODEL_MEDIUM) \ ++ && (peep2_reg_dead_p (2, operands0) \ ++ || REGNO (operands0) == REGNO (operands2))" ++ (set (match_dup 2) ++ (any_extend:GPR (mem:SUBDI (lo_sum:P (match_dup 0) ++ (match_dup 1))))) ++ { ++ emit_insn (gen_pcalau12i_gr<P:mode> (operands0, operands1)); ++ }) ++ ++(define_peephole2 ++ (set (match_operand:P 0 "register_operand") ++ (match_operand:P 1 "symbolic_pcrel_operand")) ++ (set (match_operand:GPR 2 "register_operand") ++ (any_extend:GPR ++ (mem:SUBDI (plus (match_dup 0) ++ (match_operand 3 "const_int_operand"))))) ++ "la_opt_explicit_relocs == EXPLICIT_RELOCS_AUTO \ ++ && (TARGET_CMODEL_NORMAL || TARGET_CMODEL_MEDIUM) \ ++ && (peep2_reg_dead_p (2, operands0) \ ++ || REGNO (operands0) == REGNO (operands2))" ++ (set (match_dup 2) ++ (any_extend:GPR (mem:SUBDI (lo_sum:P (match_dup 0) ++ (match_dup 1))))) ++ { ++ operands1 = plus_constant (Pmode, operands1, INTVAL (operands3)); ++ emit_insn (gen_pcalau12i_gr<P:mode> (operands0, operands1)); ++ }) ++ ++(define_peephole2 ++ (set (match_operand:P 0 "register_operand") ++ (match_operand:P 1 "symbolic_pcrel_operand")) ++ (set (mem:QHWD (match_dup 0)) ++ (match_operand:QHWD 2 "register_operand")) ++ "la_opt_explicit_relocs == EXPLICIT_RELOCS_AUTO \ ++ && (TARGET_CMODEL_NORMAL || TARGET_CMODEL_MEDIUM) \ ++ && (peep2_reg_dead_p (2, operands0)) \ ++ && REGNO (operands0) != REGNO (operands2)" ++ (set (mem:QHWD (lo_sum:P (match_dup 0) (match_dup 1))) (match_dup 2)) ++ { ++ emit_insn (gen_pcalau12i_gr<P:mode> (operands0, operands1)); ++ }) ++ ++(define_peephole2 ++ (set (match_operand:P 0 "register_operand") ++ (match_operand:P 1 "symbolic_pcrel_operand")) ++ (set (mem:QHWD (plus (match_dup 0) ++ (match_operand 3 "const_int_operand"))) ++ (match_operand:QHWD 2 "register_operand")) ++ "la_opt_explicit_relocs == EXPLICIT_RELOCS_AUTO \ ++ && (TARGET_CMODEL_NORMAL || TARGET_CMODEL_MEDIUM) \ ++ && (peep2_reg_dead_p (2, operands0)) \ ++ && REGNO (operands0) != REGNO (operands2)" ++ (set (mem:QHWD (lo_sum:P (match_dup 0) (match_dup 1))) (match_dup 2)) ++ { ++ operands1 = plus_constant (Pmode, operands1, INTVAL (operands3)); ++ emit_insn (gen_pcalau12i_gr<P:mode> (operands0, operands1)); ++ }) ++ + ;; Synchronization instructions. + + (include "sync.md") +diff --git a/gcc/config/loongarch/predicates.md b/gcc/config/loongarch/predicates.md +index 6b50b3a4d..1d669f560 100644 +--- a/gcc/config/loongarch/predicates.md ++++ b/gcc/config/loongarch/predicates.md +@@ -563,6 +563,13 @@ + return loongarch_symbolic_constant_p (op, &type); + }) + ++(define_predicate "symbolic_pcrel_operand" ++ (match_code "const,symbol_ref,label_ref") ++{ ++ enum loongarch_symbol_type type; ++ return loongarch_symbolic_constant_p (op, &type) && type == SYMBOL_PCREL; ++}) ++ + (define_predicate "equality_operator" + (match_code "eq,ne")) + +diff --git a/gcc/testsuite/gcc.target/loongarch/explicit-relocs-auto-single-load-store-no-anchor.c b/gcc/testsuite/gcc.target/loongarch/explicit-relocs-auto-single-load-store-no-anchor.c +new file mode 100644 +index 000000000..fb03403d7 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/explicit-relocs-auto-single-load-store-no-anchor.c +@@ -0,0 +1,6 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -march=loongarch64 -mabi=lp64d -mexplicit-relocs=auto -fno-section-anchors" } */ ++ ++#include "explicit-relocs-auto-single-load-store.c" ++ ++/* { dg-final { scan-assembler-not "la.local" } } */ +diff --git a/gcc/testsuite/gcc.target/loongarch/explicit-relocs-auto-single-load-store.c b/gcc/testsuite/gcc.target/loongarch/explicit-relocs-auto-single-load-store.c +new file mode 100644 +index 000000000..0d53644cd +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/explicit-relocs-auto-single-load-store.c +@@ -0,0 +1,14 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -march=loongarch64 -mabi=lp64d -mexplicit-relocs=auto" } */ ++ ++long a; ++int b; ++unsigned int c; ++ ++long load_a() { return a; } ++long load_b() { return b; } ++long load_c() { return c; } ++void store_a(long x) { a = x; } ++void store_b(int x) { b = x; } ++ ++/* { dg-final { scan-assembler-not "la.local" } } */ +-- +2.43.0 +
View file
_service:tar_scm:0019-LoongArch-Implement-__builtin_thread_pointer-for-TLS.patch
Added
@@ -0,0 +1,84 @@ +From 619b6081064bf85a19f4659e278a361875e4f9fb Mon Sep 17 00:00:00 2001 +From: chenxiaolong <chenxiaolong@loongson.cn> +Date: Tue, 24 Oct 2023 14:40:14 +0800 +Subject: PATCH 019/188 LoongArch: Implement __builtin_thread_pointer for + TLS. + +gcc/ChangeLog: + + * config/loongarch/loongarch.md (get_thread_pointer<mode>):Adds the + instruction template corresponding to the __builtin_thread_pointer + function. + * doc/extend.texi:Add the __builtin_thread_pointer function support + description to the documentation. + +gcc/testsuite/ChangeLog: + + * gcc.target/loongarch/builtin_thread_pointer.c: New test. +--- + gcc/config/loongarch/loongarch.md | 7 +++++++ + gcc/doc/extend.texi | 5 +++++ + .../gcc.target/loongarch/builtin_thread_pointer.c | 10 ++++++++++ + 3 files changed, 22 insertions(+) + create mode 100644 gcc/testsuite/gcc.target/loongarch/builtin_thread_pointer.c + +diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md +index c4c6baa60..80487488d 100644 +--- a/gcc/config/loongarch/loongarch.md ++++ b/gcc/config/loongarch/loongarch.md +@@ -113,6 +113,7 @@ + + (define_constants + (RETURN_ADDR_REGNUM 1) ++ (TP_REGNUM 2) + (T0_REGNUM 12) + (T1_REGNUM 13) + (S0_REGNUM 23) +@@ -3647,6 +3648,12 @@ + (set_attr "length" "0") + (set_attr "type" "ghost")) + ++;; Named pattern for expanding thread pointer reference. ++(define_expand "get_thread_pointer<mode>" ++ (set (match_operand:P 0 "register_operand" "=r") ++ (reg:P TP_REGNUM)) ++ "HAVE_AS_TLS" ++ {}) +  + (define_split + (match_operand 0 "small_data_pattern") +diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi +index 1d1bac255..497c6de5f 100644 +--- a/gcc/doc/extend.texi ++++ b/gcc/doc/extend.texi +@@ -16257,6 +16257,11 @@ function you need to include @code{larchintrin.h}. + void __break (imm0_32767) + @end smallexample + ++Returns the value that is currently set in the @samp{tp} register. ++@smallexample ++ void * __builtin_thread_pointer (void) ++@end smallexample ++ + @node MIPS DSP Built-in Functions + @subsection MIPS DSP Built-in Functions + +diff --git a/gcc/testsuite/gcc.target/loongarch/builtin_thread_pointer.c b/gcc/testsuite/gcc.target/loongarch/builtin_thread_pointer.c +new file mode 100644 +index 000000000..541e3b143 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/builtin_thread_pointer.c +@@ -0,0 +1,10 @@ ++/* { dg-do compile } */ ++/* { dg-require-effective-target tls_native } */ ++/* { dg-options "-O2" } */ ++/* { dg-final { scan-assembler "or\t\\\$r4,\\\$r2,\\\$r0" } } */ ++ ++void * ++get_tp () ++{ ++ return __builtin_thread_pointer (); ++} +-- +2.43.0 +
View file
_service:tar_scm:0020-LoongArch-Fix-vfrint-releated-comments-in-lsxintrin..patch
Added
@@ -0,0 +1,189 @@ +From 9b29e6ba10716656ba9b32c33f021e920bb05f3d Mon Sep 17 00:00:00 2001 +From: Chenghui Pan <panchenghui@loongson.cn> +Date: Mon, 23 Oct 2023 10:13:24 +0800 +Subject: PATCH 020/188 LoongArch: Fix vfrint-releated comments in + lsxintrin.h and lasxintrin.h + +The comment of vfrint-related intrinsic functions does not match the return +value type in definition. This patch fixes these comments. + +gcc/ChangeLog: + + * config/loongarch/lasxintrin.h (__lasx_xvftintrnel_l_s): Fix comments. + (__lasx_xvfrintrne_s): Ditto. + (__lasx_xvfrintrne_d): Ditto. + (__lasx_xvfrintrz_s): Ditto. + (__lasx_xvfrintrz_d): Ditto. + (__lasx_xvfrintrp_s): Ditto. + (__lasx_xvfrintrp_d): Ditto. + (__lasx_xvfrintrm_s): Ditto. + (__lasx_xvfrintrm_d): Ditto. + * config/loongarch/lsxintrin.h (__lsx_vftintrneh_l_s): Ditto. + (__lsx_vfrintrne_s): Ditto. + (__lsx_vfrintrne_d): Ditto. + (__lsx_vfrintrz_s): Ditto. + (__lsx_vfrintrz_d): Ditto. + (__lsx_vfrintrp_s): Ditto. + (__lsx_vfrintrp_d): Ditto. + (__lsx_vfrintrm_s): Ditto. + (__lsx_vfrintrm_d): Ditto. +--- + gcc/config/loongarch/lasxintrin.h | 16 ++++++++-------- + gcc/config/loongarch/lsxintrin.h | 16 ++++++++-------- + 2 files changed, 16 insertions(+), 16 deletions(-) + +diff --git a/gcc/config/loongarch/lasxintrin.h b/gcc/config/loongarch/lasxintrin.h +index d39379927..7bce2c757 100644 +--- a/gcc/config/loongarch/lasxintrin.h ++++ b/gcc/config/loongarch/lasxintrin.h +@@ -3368,7 +3368,7 @@ __m256i __lasx_xvftintrnel_l_s (__m256 _1) + } + + /* Assembly instruction format: xd, xj. */ +-/* Data types in instruction templates: V8SI, V8SF. */ ++/* Data types in instruction templates: V8SF, V8SF. */ + extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) + __m256 __lasx_xvfrintrne_s (__m256 _1) + { +@@ -3376,7 +3376,7 @@ __m256 __lasx_xvfrintrne_s (__m256 _1) + } + + /* Assembly instruction format: xd, xj. */ +-/* Data types in instruction templates: V4DI, V4DF. */ ++/* Data types in instruction templates: V4DF, V4DF. */ + extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) + __m256d __lasx_xvfrintrne_d (__m256d _1) + { +@@ -3384,7 +3384,7 @@ __m256d __lasx_xvfrintrne_d (__m256d _1) + } + + /* Assembly instruction format: xd, xj. */ +-/* Data types in instruction templates: V8SI, V8SF. */ ++/* Data types in instruction templates: V8SF, V8SF. */ + extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) + __m256 __lasx_xvfrintrz_s (__m256 _1) + { +@@ -3392,7 +3392,7 @@ __m256 __lasx_xvfrintrz_s (__m256 _1) + } + + /* Assembly instruction format: xd, xj. */ +-/* Data types in instruction templates: V4DI, V4DF. */ ++/* Data types in instruction templates: V4DF, V4DF. */ + extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) + __m256d __lasx_xvfrintrz_d (__m256d _1) + { +@@ -3400,7 +3400,7 @@ __m256d __lasx_xvfrintrz_d (__m256d _1) + } + + /* Assembly instruction format: xd, xj. */ +-/* Data types in instruction templates: V8SI, V8SF. */ ++/* Data types in instruction templates: V8SF, V8SF. */ + extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) + __m256 __lasx_xvfrintrp_s (__m256 _1) + { +@@ -3408,7 +3408,7 @@ __m256 __lasx_xvfrintrp_s (__m256 _1) + } + + /* Assembly instruction format: xd, xj. */ +-/* Data types in instruction templates: V4DI, V4DF. */ ++/* Data types in instruction templates: V4DF, V4DF. */ + extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) + __m256d __lasx_xvfrintrp_d (__m256d _1) + { +@@ -3416,7 +3416,7 @@ __m256d __lasx_xvfrintrp_d (__m256d _1) + } + + /* Assembly instruction format: xd, xj. */ +-/* Data types in instruction templates: V8SI, V8SF. */ ++/* Data types in instruction templates: V8SF, V8SF. */ + extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) + __m256 __lasx_xvfrintrm_s (__m256 _1) + { +@@ -3424,7 +3424,7 @@ __m256 __lasx_xvfrintrm_s (__m256 _1) + } + + /* Assembly instruction format: xd, xj. */ +-/* Data types in instruction templates: V4DI, V4DF. */ ++/* Data types in instruction templates: V4DF, V4DF. */ + extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) + __m256d __lasx_xvfrintrm_d (__m256d _1) + { +diff --git a/gcc/config/loongarch/lsxintrin.h b/gcc/config/loongarch/lsxintrin.h +index ec4206990..29553c093 100644 +--- a/gcc/config/loongarch/lsxintrin.h ++++ b/gcc/config/loongarch/lsxintrin.h +@@ -3412,7 +3412,7 @@ __m128i __lsx_vftintrneh_l_s (__m128 _1) + } + + /* Assembly instruction format: vd, vj. */ +-/* Data types in instruction templates: V4SI, V4SF. */ ++/* Data types in instruction templates: V4SF, V4SF. */ + extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) + __m128 __lsx_vfrintrne_s (__m128 _1) + { +@@ -3420,7 +3420,7 @@ __m128 __lsx_vfrintrne_s (__m128 _1) + } + + /* Assembly instruction format: vd, vj. */ +-/* Data types in instruction templates: V2DI, V2DF. */ ++/* Data types in instruction templates: V2DF, V2DF. */ + extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) + __m128d __lsx_vfrintrne_d (__m128d _1) + { +@@ -3428,7 +3428,7 @@ __m128d __lsx_vfrintrne_d (__m128d _1) + } + + /* Assembly instruction format: vd, vj. */ +-/* Data types in instruction templates: V4SI, V4SF. */ ++/* Data types in instruction templates: V4SF, V4SF. */ + extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) + __m128 __lsx_vfrintrz_s (__m128 _1) + { +@@ -3436,7 +3436,7 @@ __m128 __lsx_vfrintrz_s (__m128 _1) + } + + /* Assembly instruction format: vd, vj. */ +-/* Data types in instruction templates: V2DI, V2DF. */ ++/* Data types in instruction templates: V2DF, V2DF. */ + extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) + __m128d __lsx_vfrintrz_d (__m128d _1) + { +@@ -3444,7 +3444,7 @@ __m128d __lsx_vfrintrz_d (__m128d _1) + } + + /* Assembly instruction format: vd, vj. */ +-/* Data types in instruction templates: V4SI, V4SF. */ ++/* Data types in instruction templates: V4SF, V4SF. */ + extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) + __m128 __lsx_vfrintrp_s (__m128 _1) + { +@@ -3452,7 +3452,7 @@ __m128 __lsx_vfrintrp_s (__m128 _1) + } + + /* Assembly instruction format: vd, vj. */ +-/* Data types in instruction templates: V2DI, V2DF. */ ++/* Data types in instruction templates: V2DF, V2DF. */ + extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) + __m128d __lsx_vfrintrp_d (__m128d _1) + { +@@ -3460,7 +3460,7 @@ __m128d __lsx_vfrintrp_d (__m128d _1) + } + + /* Assembly instruction format: vd, vj. */ +-/* Data types in instruction templates: V4SI, V4SF. */ ++/* Data types in instruction templates: V4SF, V4SF. */ + extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) + __m128 __lsx_vfrintrm_s (__m128 _1) + { +@@ -3468,7 +3468,7 @@ __m128 __lsx_vfrintrm_s (__m128 _1) + } + + /* Assembly instruction format: vd, vj. */ +-/* Data types in instruction templates: V2DI, V2DF. */ ++/* Data types in instruction templates: V2DF, V2DF. */ + extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) + __m128d __lsx_vfrintrm_d (__m128d _1) + { +-- +2.43.0 +
View file
_service:tar_scm:0021-LoongArch-Enable-vcond_mask_mn-expanders-for-SF-DF-m.patch
Added
@@ -0,0 +1,418 @@ +From 156d9451a5b20ac336370f1610a949db1bef7a26 Mon Sep 17 00:00:00 2001 +From: Jiahao Xu <xujiahao@loongson.cn> +Date: Thu, 26 Oct 2023 09:34:32 +0800 +Subject: PATCH 021/188 LoongArch:Enable vcond_mask_mn expanders for SF/DF + modes. + +If the vcond_mask patterns don't support fp modes, the vector +FP comparison instructions will not be generated. + +gcc/ChangeLog: + + * config/loongarch/lasx.md (vcond_mask_<ILASX:mode><ILASX:mode>): Change to + (vcond_mask_<mode><mode256_i>): this. + * config/loongarch/lsx.md (vcond_mask_<ILSX:mode><ILSX:mode>): Change to + (vcond_mask_<mode><mode_i>): this. + +gcc/testsuite/ChangeLog: + + * gcc.target/loongarch/vector/lasx/lasx-vcond-1.c: New test. + * gcc.target/loongarch/vector/lasx/lasx-vcond-2.c: New test. + * gcc.target/loongarch/vector/lsx/lsx-vcond-1.c: New test. + * gcc.target/loongarch/vector/lsx/lsx-vcond-2.c: New test. +--- + gcc/config/loongarch/lasx.md | 14 +-- + gcc/config/loongarch/lsx.md | 14 +-- + .../loongarch/vector/lasx/lasx-vcond-1.c | 64 ++++++++++++++ + .../loongarch/vector/lasx/lasx-vcond-2.c | 87 +++++++++++++++++++ + .../loongarch/vector/lsx/lsx-vcond-1.c | 64 ++++++++++++++ + .../loongarch/vector/lsx/lsx-vcond-2.c | 87 +++++++++++++++++++ + 6 files changed, 316 insertions(+), 14 deletions(-) + create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-vcond-1.c + create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-vcond-2.c + create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vcond-1.c + create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vcond-2.c + +diff --git a/gcc/config/loongarch/lasx.md b/gcc/config/loongarch/lasx.md +index 442fda246..f0f2dd08d 100644 +--- a/gcc/config/loongarch/lasx.md ++++ b/gcc/config/loongarch/lasx.md +@@ -906,15 +906,15 @@ + }) + + ;; Same as vcond_ +-(define_expand "vcond_mask_<ILASX:mode><ILASX:mode>" +- (match_operand:ILASX 0 "register_operand") +- (match_operand:ILASX 1 "reg_or_m1_operand") +- (match_operand:ILASX 2 "reg_or_0_operand") +- (match_operand:ILASX 3 "register_operand") ++(define_expand "vcond_mask_<mode><mode256_i>" ++ (match_operand:LASX 0 "register_operand") ++ (match_operand:LASX 1 "reg_or_m1_operand") ++ (match_operand:LASX 2 "reg_or_0_operand") ++ (match_operand:<VIMODE256> 3 "register_operand") + "ISA_HAS_LASX" + { +- loongarch_expand_vec_cond_mask_expr (<ILASX:MODE>mode, +- <ILASX:VIMODE256>mode, operands); ++ loongarch_expand_vec_cond_mask_expr (<MODE>mode, ++ <VIMODE256>mode, operands); + DONE; + }) + +diff --git a/gcc/config/loongarch/lsx.md b/gcc/config/loongarch/lsx.md +index b4e92ae9c..4af32c8df 100644 +--- a/gcc/config/loongarch/lsx.md ++++ b/gcc/config/loongarch/lsx.md +@@ -644,15 +644,15 @@ + DONE; + }) + +-(define_expand "vcond_mask_<ILSX:mode><ILSX:mode>" +- (match_operand:ILSX 0 "register_operand") +- (match_operand:ILSX 1 "reg_or_m1_operand") +- (match_operand:ILSX 2 "reg_or_0_operand") +- (match_operand:ILSX 3 "register_operand") ++(define_expand "vcond_mask_<mode><mode_i>" ++ (match_operand:LSX 0 "register_operand") ++ (match_operand:LSX 1 "reg_or_m1_operand") ++ (match_operand:LSX 2 "reg_or_0_operand") ++ (match_operand:<VIMODE> 3 "register_operand") + "ISA_HAS_LSX" + { +- loongarch_expand_vec_cond_mask_expr (<ILSX:MODE>mode, +- <ILSX:VIMODE>mode, operands); ++ loongarch_expand_vec_cond_mask_expr (<MODE>mode, ++ <VIMODE>mode, operands); + DONE; + }) + +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-vcond-1.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-vcond-1.c +new file mode 100644 +index 000000000..ee9cb1a1f +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-vcond-1.c +@@ -0,0 +1,64 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -ftree-vectorize -fno-unroll-loops -fno-vect-cost-model -mlasx" } */ ++ ++#include <stdint-gcc.h> ++ ++#define DEF_VCOND_VAR(DATA_TYPE, CMP_TYPE, COND, SUFFIX) \ ++ void __attribute__ ((noinline, noclone)) \ ++ vcond_var_##CMP_TYPE##_##SUFFIX (DATA_TYPE *__restrict__ r, \ ++ DATA_TYPE *__restrict__ x, \ ++ DATA_TYPE *__restrict__ y, \ ++ CMP_TYPE *__restrict__ a, \ ++ CMP_TYPE *__restrict__ b, \ ++ int n) \ ++ { \ ++ for (int i = 0; i < n; i++) \ ++ { \ ++ DATA_TYPE xval = xi, yval = yi; \ ++ CMP_TYPE aval = ai, bval = bi; \ ++ ri = aval COND bval ? xval : yval; \ ++ } \ ++ } ++ ++#define TEST_COND_VAR_SIGNED_ALL(T, COND, SUFFIX) \ ++ T (int8_t, int8_t, COND, SUFFIX) \ ++ T (int16_t, int16_t, COND, SUFFIX) \ ++ T (int32_t, int32_t, COND, SUFFIX) \ ++ T (int64_t, int64_t, COND, SUFFIX) \ ++ T (float, int32_t, COND, SUFFIX##_float) \ ++ T (double, int64_t, COND, SUFFIX##_double) ++ ++#define TEST_COND_VAR_UNSIGNED_ALL(T, COND, SUFFIX) \ ++ T (uint8_t, uint8_t, COND, SUFFIX) \ ++ T (uint16_t, uint16_t, COND, SUFFIX) \ ++ T (uint32_t, uint32_t, COND, SUFFIX) \ ++ T (uint64_t, uint64_t, COND, SUFFIX) \ ++ T (float, uint32_t, COND, SUFFIX##_float) \ ++ T (double, uint64_t, COND, SUFFIX##_double) ++ ++#define TEST_COND_VAR_ALL(T, COND, SUFFIX) \ ++ TEST_COND_VAR_SIGNED_ALL (T, COND, SUFFIX) \ ++ TEST_COND_VAR_UNSIGNED_ALL (T, COND, SUFFIX) ++ ++#define TEST_VAR_ALL(T) \ ++ TEST_COND_VAR_ALL (T, >, _gt) \ ++ TEST_COND_VAR_ALL (T, <, _lt) \ ++ TEST_COND_VAR_ALL (T, >=, _ge) \ ++ TEST_COND_VAR_ALL (T, <=, _le) \ ++ TEST_COND_VAR_ALL (T, ==, _eq) \ ++ TEST_COND_VAR_ALL (T, !=, _ne) ++ ++TEST_VAR_ALL (DEF_VCOND_VAR) ++ ++/* { dg-final { scan-assembler-times {\txvslt\.b} 4 } } */ ++/* { dg-final { scan-assembler-times {\txvslt\.h} 4 } } */ ++/* { dg-final { scan-assembler-times {\txvslt\.w} 4 } } */ ++/* { dg-final { scan-assembler-times {\txvslt\.d} 4 } } */ ++/* { dg-final { scan-assembler-times {\txvsle\.b} 4 } } */ ++/* { dg-final { scan-assembler-times {\txvsle\.h} 4 } } */ ++/* { dg-final { scan-assembler-times {\txvsle\.w} 4 } } */ ++/* { dg-final { scan-assembler-times {\txvsle\.d} 4 } } */ ++/* { dg-final { scan-assembler-times {\txvseq\.b} 4 } } */ ++/* { dg-final { scan-assembler-times {\txvseq\.h} 4 } } */ ++/* { dg-final { scan-assembler-times {\txvseq\.w} 4 } } */ ++/* { dg-final { scan-assembler-times {\txvseq\.d} 4 } } */ +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-vcond-2.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-vcond-2.c +new file mode 100644 +index 000000000..5f40ed44c +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-vcond-2.c +@@ -0,0 +1,87 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -ftree-vectorize -fno-vect-cost-model -fno-unroll-loops -mlasx" } */ ++ ++#include <stdint-gcc.h> ++ ++#define eq(A, B) ((A) == (B)) ++#define ne(A, B) ((A) != (B)) ++#define olt(A, B) ((A) < (B)) ++#define ole(A, B) ((A) <= (B)) ++#define oge(A, B) ((A) >= (B)) ++#define ogt(A, B) ((A) > (B)) ++#define ordered(A, B) (!__builtin_isunordered (A, B)) ++#define unordered(A, B) (__builtin_isunordered (A, B)) ++#define ueq(A, B) (!__builtin_islessgreater (A, B)) ++#define ult(A, B) (__builtin_isless (A, B)) ++#define ule(A, B) (__builtin_islessequal (A, B)) ++#define uge(A, B) (__builtin_isgreaterequal (A, B)) ++#define ugt(A, B) (__builtin_isgreater (A, B)) ++#define nueq(A, B) (__builtin_islessgreater (A, B)) ++#define nult(A, B) (!__builtin_isless (A, B)) ++#define nule(A, B) (!__builtin_islessequal (A, B)) ++#define nuge(A, B) (!__builtin_isgreaterequal (A, B)) ++#define nugt(A, B) (!__builtin_isgreater (A, B)) ++ ++#define TEST_LOOP(TYPE1, TYPE2, CMP) \ ++ void __attribute__ ((noinline, noclone)) \ ++ test_##TYPE1##_##TYPE2##_##CMP##_var (TYPE1 *restrict dest, \ ++ TYPE1 *restrict src, \ ++ TYPE1 fallback, \ ++ TYPE2 *restrict a, \ ++ TYPE2 *restrict b, \ ++ int count) \ ++ { \ ++ for (int i = 0; i < count; ++i) \ ++ {\ ++ TYPE2 aval = ai; \ ++ TYPE2 bval = bi; \ ++ TYPE1 srcval = srci; \ ++ desti = CMP (aval, bval) ? srcval : fallback; \ ++ }\ ++ } ++ ++#define TEST_CMP(CMP) \ ++ TEST_LOOP (int32_t, float, CMP) \ ++ TEST_LOOP (uint32_t, float, CMP) \ ++ TEST_LOOP (float, float, CMP) \ ++ TEST_LOOP (int64_t, double, CMP) \ ++ TEST_LOOP (uint64_t, double, CMP) \ ++ TEST_LOOP (double, double, CMP) ++ ++TEST_CMP (eq) ++TEST_CMP (ne) ++TEST_CMP (olt) ++TEST_CMP (ole) ++TEST_CMP (oge) ++TEST_CMP (ogt) ++TEST_CMP (ordered) ++TEST_CMP (unordered) ++TEST_CMP (ueq) ++TEST_CMP (ult) ++TEST_CMP (ule) ++TEST_CMP (uge) ++TEST_CMP (ugt) ++TEST_CMP (nueq) ++TEST_CMP (nult) ++TEST_CMP (nule) ++TEST_CMP (nuge) ++TEST_CMP (nugt) ++ ++/* { dg-final { scan-assembler-times {\txvfcmp\.ceq\.s} 2 } } */ ++/* { dg-final { scan-assembler-times {\txvfcmp\.ceq\.d} 2 } } */ ++/* { dg-final { scan-assembler-times {\txvfcmp\.cne\.s} 2 } } */ ++/* { dg-final { scan-assembler-times {\txvfcmp\.cne\.d} 2 } } */ ++/* { dg-final { scan-assembler-times {\txvfcmp\.slt\.s} 4 } } */ ++/* { dg-final { scan-assembler-times {\txvfcmp\.slt\.d} 4 } } */ ++/* { dg-final { scan-assembler-times {\txvfcmp\.sle\.s} 4 } } */ ++/* { dg-final { scan-assembler-times {\txvfcmp\.sle\.d} 4 } } */ ++/* { dg-final { scan-assembler-times {\txvfcmp\.cor\.s} 2 } } */ ++/* { dg-final { scan-assembler-times {\txvfcmp\.cor\.d} 2 } } */ ++/* { dg-final { scan-assembler-times {\txvfcmp\.cun\.s} 2 } } */ ++/* { dg-final { scan-assembler-times {\txvfcmp\.cun\.d} 2 } } */ ++/* { dg-final { scan-assembler-times {\txvfcmp\.cueq\.s} 4 } } */ ++/* { dg-final { scan-assembler-times {\txvfcmp\.cueq\.d} 4 } } */ ++/* { dg-final { scan-assembler-times {\txvfcmp\.cule\.s} 8 } } */ ++/* { dg-final { scan-assembler-times {\txvfcmp\.cule\.d} 8 } } */ ++/* { dg-final { scan-assembler-times {\txvfcmp\.cult\.s} 8 } } */ ++/* { dg-final { scan-assembler-times {\txvfcmp\.cult\.d} 8 } } */ +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vcond-1.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vcond-1.c +new file mode 100644 +index 000000000..138adccfa +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vcond-1.c +@@ -0,0 +1,64 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -ftree-vectorize -fno-unroll-loops -fno-vect-cost-model -mlsx" } */ ++ ++#include <stdint-gcc.h> ++ ++#define DEF_VCOND_VAR(DATA_TYPE, CMP_TYPE, COND, SUFFIX) \ ++ void __attribute__ ((noinline, noclone)) \ ++ vcond_var_##CMP_TYPE##_##SUFFIX (DATA_TYPE *__restrict__ r, \ ++ DATA_TYPE *__restrict__ x, \ ++ DATA_TYPE *__restrict__ y, \ ++ CMP_TYPE *__restrict__ a, \ ++ CMP_TYPE *__restrict__ b, \ ++ int n) \ ++ { \ ++ for (int i = 0; i < n; i++) \ ++ { \ ++ DATA_TYPE xval = xi, yval = yi; \ ++ CMP_TYPE aval = ai, bval = bi; \ ++ ri = aval COND bval ? xval : yval; \ ++ } \ ++ } ++ ++#define TEST_COND_VAR_SIGNED_ALL(T, COND, SUFFIX) \ ++ T (int8_t, int8_t, COND, SUFFIX) \ ++ T (int16_t, int16_t, COND, SUFFIX) \ ++ T (int32_t, int32_t, COND, SUFFIX) \ ++ T (int64_t, int64_t, COND, SUFFIX) \ ++ T (float, int32_t, COND, SUFFIX##_float) \ ++ T (double, int64_t, COND, SUFFIX##_double) ++ ++#define TEST_COND_VAR_UNSIGNED_ALL(T, COND, SUFFIX) \ ++ T (uint8_t, uint8_t, COND, SUFFIX) \ ++ T (uint16_t, uint16_t, COND, SUFFIX) \ ++ T (uint32_t, uint32_t, COND, SUFFIX) \ ++ T (uint64_t, uint64_t, COND, SUFFIX) \ ++ T (float, uint32_t, COND, SUFFIX##_float) \ ++ T (double, uint64_t, COND, SUFFIX##_double) ++ ++#define TEST_COND_VAR_ALL(T, COND, SUFFIX) \ ++ TEST_COND_VAR_SIGNED_ALL (T, COND, SUFFIX) \ ++ TEST_COND_VAR_UNSIGNED_ALL (T, COND, SUFFIX) ++ ++#define TEST_VAR_ALL(T) \ ++ TEST_COND_VAR_ALL (T, >, _gt) \ ++ TEST_COND_VAR_ALL (T, <, _lt) \ ++ TEST_COND_VAR_ALL (T, >=, _ge) \ ++ TEST_COND_VAR_ALL (T, <=, _le) \ ++ TEST_COND_VAR_ALL (T, ==, _eq) \ ++ TEST_COND_VAR_ALL (T, !=, _ne) ++ ++TEST_VAR_ALL (DEF_VCOND_VAR) ++ ++/* { dg-final { scan-assembler-times {\tvslt\.b} 4 } } */ ++/* { dg-final { scan-assembler-times {\tvslt\.h} 4 } } */ ++/* { dg-final { scan-assembler-times {\tvslt\.w} 4 } } */ ++/* { dg-final { scan-assembler-times {\tvslt\.d} 4 } } */ ++/* { dg-final { scan-assembler-times {\tvsle\.b} 4 } } */ ++/* { dg-final { scan-assembler-times {\tvsle\.h} 4 } } */ ++/* { dg-final { scan-assembler-times {\tvsle\.w} 4 } } */ ++/* { dg-final { scan-assembler-times {\tvsle\.d} 4 } } */ ++/* { dg-final { scan-assembler-times {\tvseq\.b} 4 } } */ ++/* { dg-final { scan-assembler-times {\tvseq\.h} 4 } } */ ++/* { dg-final { scan-assembler-times {\tvseq\.w} 4 } } */ ++/* { dg-final { scan-assembler-times {\tvseq\.d} 4 } } */ +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vcond-2.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vcond-2.c +new file mode 100644 +index 000000000..e8fe31f8f +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vcond-2.c +@@ -0,0 +1,87 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -ftree-vectorize -fno-vect-cost-model -fno-unroll-loops -mlsx" } */ ++ ++#include <stdint-gcc.h> ++ ++#define eq(A, B) ((A) == (B)) ++#define ne(A, B) ((A) != (B)) ++#define olt(A, B) ((A) < (B)) ++#define ole(A, B) ((A) <= (B)) ++#define oge(A, B) ((A) >= (B)) ++#define ogt(A, B) ((A) > (B)) ++#define ordered(A, B) (!__builtin_isunordered (A, B)) ++#define unordered(A, B) (__builtin_isunordered (A, B)) ++#define ueq(A, B) (!__builtin_islessgreater (A, B)) ++#define ult(A, B) (__builtin_isless (A, B)) ++#define ule(A, B) (__builtin_islessequal (A, B)) ++#define uge(A, B) (__builtin_isgreaterequal (A, B)) ++#define ugt(A, B) (__builtin_isgreater (A, B)) ++#define nueq(A, B) (__builtin_islessgreater (A, B)) ++#define nult(A, B) (!__builtin_isless (A, B)) ++#define nule(A, B) (!__builtin_islessequal (A, B)) ++#define nuge(A, B) (!__builtin_isgreaterequal (A, B)) ++#define nugt(A, B) (!__builtin_isgreater (A, B)) ++ ++#define TEST_LOOP(TYPE1, TYPE2, CMP) \ ++ void __attribute__ ((noinline, noclone)) \ ++ test_##TYPE1##_##TYPE2##_##CMP##_var (TYPE1 *restrict dest, \ ++ TYPE1 *restrict src, \ ++ TYPE1 fallback, \ ++ TYPE2 *restrict a, \ ++ TYPE2 *restrict b, \ ++ int count) \ ++ { \ ++ for (int i = 0; i < count; ++i) \ ++ {\ ++ TYPE2 aval = ai; \ ++ TYPE2 bval = bi; \ ++ TYPE1 srcval = srci; \ ++ desti = CMP (aval, bval) ? srcval : fallback; \ ++ }\ ++ } ++ ++#define TEST_CMP(CMP) \ ++ TEST_LOOP (int32_t, float, CMP) \ ++ TEST_LOOP (uint32_t, float, CMP) \ ++ TEST_LOOP (float, float, CMP) \ ++ TEST_LOOP (int64_t, double, CMP) \ ++ TEST_LOOP (uint64_t, double, CMP) \ ++ TEST_LOOP (double, double, CMP) ++ ++TEST_CMP (eq) ++TEST_CMP (ne) ++TEST_CMP (olt) ++TEST_CMP (ole) ++TEST_CMP (oge) ++TEST_CMP (ogt) ++TEST_CMP (ordered) ++TEST_CMP (unordered) ++TEST_CMP (ueq) ++TEST_CMP (ult) ++TEST_CMP (ule) ++TEST_CMP (uge) ++TEST_CMP (ugt) ++TEST_CMP (nueq) ++TEST_CMP (nult) ++TEST_CMP (nule) ++TEST_CMP (nuge) ++TEST_CMP (nugt) ++ ++/* { dg-final { scan-assembler-times {\tvfcmp\.ceq\.s} 2 } } */ ++/* { dg-final { scan-assembler-times {\tvfcmp\.ceq\.d} 2 } } */ ++/* { dg-final { scan-assembler-times {\tvfcmp\.cne\.s} 2 } } */ ++/* { dg-final { scan-assembler-times {\tvfcmp\.cne\.d} 2 } } */ ++/* { dg-final { scan-assembler-times {\tvfcmp\.slt\.s} 4 } } */ ++/* { dg-final { scan-assembler-times {\tvfcmp\.slt\.d} 4 } } */ ++/* { dg-final { scan-assembler-times {\tvfcmp\.sle\.s} 4 } } */ ++/* { dg-final { scan-assembler-times {\tvfcmp\.sle\.d} 4 } } */ ++/* { dg-final { scan-assembler-times {\tvfcmp\.cor\.s} 2 } } */ ++/* { dg-final { scan-assembler-times {\tvfcmp\.cor\.d} 2 } } */ ++/* { dg-final { scan-assembler-times {\tvfcmp\.cun\.s} 2 } } */ ++/* { dg-final { scan-assembler-times {\tvfcmp\.cun\.d} 2 } } */ ++/* { dg-final { scan-assembler-times {\tvfcmp\.cueq\.s} 4 } } */ ++/* { dg-final { scan-assembler-times {\tvfcmp\.cueq\.d} 4 } } */ ++/* { dg-final { scan-assembler-times {\tvfcmp\.cule\.s} 8 } } */ ++/* { dg-final { scan-assembler-times {\tvfcmp\.cule\.d} 8 } } */ ++/* { dg-final { scan-assembler-times {\tvfcmp\.cult\.s} 8 } } */ ++/* { dg-final { scan-assembler-times {\tvfcmp\.cult\.d} 8 } } */ +-- +2.43.0 +
View file
_service:tar_scm:0022-LoongArch-Define-HAVE_AS_TLS-to-0-if-it-s-undefined-.patch
Added
@@ -0,0 +1,34 @@ +From 0527589fb1b7b97cff2c441c1219fb9c8a44dd23 Mon Sep 17 00:00:00 2001 +From: Xi Ruoyao <xry111@xry111.site> +Date: Mon, 30 Oct 2023 19:39:27 +0800 +Subject: PATCH 022/188 LoongArch: Define HAVE_AS_TLS to 0 if it's undefined + PR112299 + +Now loongarch.md uses HAVE_AS_TLS, we need this to fix the failure +building a cross compiler if the cross assembler is not installed yet. + +gcc/ChangeLog: + + PR target/112299 + * config/loongarch/loongarch-opts.h (HAVE_AS_TLS): Define to 0 + if not defined yet. +--- + gcc/config/loongarch/loongarch-opts.h | 4 ++++ + 1 file changed, 4 insertions(+) + +diff --git a/gcc/config/loongarch/loongarch-opts.h b/gcc/config/loongarch/loongarch-opts.h +index f2b59abe6..c4975af00 100644 +--- a/gcc/config/loongarch/loongarch-opts.h ++++ b/gcc/config/loongarch/loongarch-opts.h +@@ -103,4 +103,8 @@ loongarch_update_gcc_opt_status (struct loongarch_target *target, + #define HAVE_AS_MRELAX_OPTION 0 + #endif + ++#ifndef HAVE_AS_TLS ++#define HAVE_AS_TLS 0 ++#endif ++ + #endif /* LOONGARCH_OPTS_H */ +-- +2.43.0 +
View file
_service:tar_scm:0023-LoongArch-Fix-instruction-name-typo-in-lsx_vreplgr2v.patch
Added
@@ -0,0 +1,30 @@ +From bc3ae60454a51b80538b6deba21975d43de23b6a Mon Sep 17 00:00:00 2001 +From: Chenghui Pan <panchenghui@loongson.cn> +Date: Fri, 3 Nov 2023 17:01:36 +0800 +Subject: PATCH 023/188 LoongArch: Fix instruction name typo in + lsx_vreplgr2vr_<lsxfmt_f> template + +gcc/ChangeLog: + + * config/loongarch/lsx.md: Fix instruction name typo in + lsx_vreplgr2vr_<lsxfmt_f> template. +--- + gcc/config/loongarch/lsx.md | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/gcc/config/loongarch/lsx.md b/gcc/config/loongarch/lsx.md +index 4af32c8df..55c7d79a0 100644 +--- a/gcc/config/loongarch/lsx.md ++++ b/gcc/config/loongarch/lsx.md +@@ -1523,7 +1523,7 @@ + "ISA_HAS_LSX" + { + if (which_alternative == 1) +- return "ldi.<lsxfmt>\t%w0,0"; ++ return "vldi.<lsxfmt>\t%w0,0"; + + if (!TARGET_64BIT && (<MODE>mode == V2DImode || <MODE>mode == V2DFmode)) + return "#"; +-- +2.43.0 +
View file
_service:tar_scm:0024-LoongArch-Use-simplify_gen_subreg-instead-of-gen_rtx.patch
Added
@@ -0,0 +1,116 @@ +From b8f47a362000bb51dec88e0a73f885c57a46f568 Mon Sep 17 00:00:00 2001 +From: Xi Ruoyao <xry111@xry111.site> +Date: Sun, 12 Nov 2023 00:55:13 +0800 +Subject: PATCH 024/188 LoongArch: Use simplify_gen_subreg instead of + gen_rtx_SUBREG in loongarch_expand_vec_cond_mask_expr PR112476 + +GCC internal says: + + 'subreg's of 'subreg's are not supported. Using + 'simplify_gen_subreg' is the recommended way to avoid this problem. + +Unfortunately loongarch_expand_vec_cond_mask_expr might create nested +subreg under certain circumstances, causing an ICE. + +Use simplify_gen_subreg as the internal document suggests. + +gcc/ChangeLog: + + PR target/112476 + * config/loongarch/loongarch.cc + (loongarch_expand_vec_cond_mask_expr): Call simplify_gen_subreg + instead of gen_rtx_SUBREG. + +gcc/testsuite/ChangeLog: + + PR target/112476 + * gcc.target/loongarch/pr112476-1.c: New test. + * gcc.target/loongarch/pr112476-2.c: New test. +--- + gcc/config/loongarch/loongarch.cc | 11 ++++++--- + .../gcc.target/loongarch/pr112476-1.c | 24 +++++++++++++++++++ + .../gcc.target/loongarch/pr112476-2.c | 5 ++++ + 3 files changed, 37 insertions(+), 3 deletions(-) + create mode 100644 gcc/testsuite/gcc.target/loongarch/pr112476-1.c + create mode 100644 gcc/testsuite/gcc.target/loongarch/pr112476-2.c + +diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc +index fa5c14be6..65ca1489f 100644 +--- a/gcc/config/loongarch/loongarch.cc ++++ b/gcc/config/loongarch/loongarch.cc +@@ -11190,7 +11190,9 @@ loongarch_expand_vec_cond_mask_expr (machine_mode mode, machine_mode vimode, + if (mode != vimode) + { + xop1 = gen_reg_rtx (vimode); +- emit_move_insn (xop1, gen_rtx_SUBREG (vimode, operands1, 0)); ++ emit_move_insn (xop1, ++ simplify_gen_subreg (vimode, operands1, ++ mode, 0)); + } + emit_move_insn (src1, xop1); + } +@@ -11207,7 +11209,9 @@ loongarch_expand_vec_cond_mask_expr (machine_mode mode, machine_mode vimode, + if (mode != vimode) + { + xop2 = gen_reg_rtx (vimode); +- emit_move_insn (xop2, gen_rtx_SUBREG (vimode, operands2, 0)); ++ emit_move_insn (xop2, ++ simplify_gen_subreg (vimode, operands2, ++ mode, 0)); + } + emit_move_insn (src2, xop2); + } +@@ -11226,7 +11230,8 @@ loongarch_expand_vec_cond_mask_expr (machine_mode mode, machine_mode vimode, + gen_rtx_AND (vimode, mask, src1)); + /* The result is placed back to a register with the mask. */ + emit_insn (gen_rtx_SET (mask, bsel)); +- emit_move_insn (operands0, gen_rtx_SUBREG (mode, mask, 0)); ++ emit_move_insn (operands0, simplify_gen_subreg (mode, mask, ++ vimode, 0)); + } + } + +diff --git a/gcc/testsuite/gcc.target/loongarch/pr112476-1.c b/gcc/testsuite/gcc.target/loongarch/pr112476-1.c +new file mode 100644 +index 000000000..4cf133e7a +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/pr112476-1.c +@@ -0,0 +1,24 @@ ++/* PR target/112476: ICE with -mlsx */ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -march=loongarch64 -mfpu=64 -mabi=lp64d -mlsx" } */ ++ ++int foo, bar; ++float baz, res, a; ++ ++void ++apply_adjacent_ternary (float *dst, float *src0) ++{ ++ do ++ { ++ __builtin_memcpy (&res, &src0, sizeof (res)); ++ *dst = foo ? baz : res; ++ dst++; ++ } ++ while (dst != src0); ++} ++ ++void ++xx (void) ++{ ++ apply_adjacent_ternary (&a, &a); ++} +diff --git a/gcc/testsuite/gcc.target/loongarch/pr112476-2.c b/gcc/testsuite/gcc.target/loongarch/pr112476-2.c +new file mode 100644 +index 000000000..cc0dfbfc9 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/pr112476-2.c +@@ -0,0 +1,5 @@ ++/* PR target/112476: ICE with -mlasx */ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -march=loongarch64 -mfpu=64 -mabi=lp64d -mlasx" } */ ++ ++#include "pr112476-1.c" +-- +2.43.0 +
View file
_service:tar_scm:0025-LoongArch-Optimize-single-used-address-with-mexplici.patch
Added
@@ -0,0 +1,116 @@ +From b23a89e835962ae7d89e5c6f87a69c021097d715 Mon Sep 17 00:00:00 2001 +From: Xi Ruoyao <xry111@xry111.site> +Date: Mon, 30 Oct 2023 20:24:58 +0800 +Subject: PATCH 025/188 LoongArch: Optimize single-used address with + -mexplicit-relocs=auto for fld/fst + +fld and fst have same address mode as ld.w and st.w, so the same +optimization as r14-4851 should be applied for them too. + +gcc/ChangeLog: + + * config/loongarch/loongarch.md (LD_AT_LEAST_32_BIT): New mode + iterator. + (ST_ANY): New mode iterator. + (define_peephole2): Use LD_AT_LEAST_32_BIT instead of GPR and + ST_ANY instead of QHWD for applicable patterns. +--- + gcc/config/loongarch/loongarch.md | 38 +++++++++++++++++++------------ + 1 file changed, 24 insertions(+), 14 deletions(-) + +diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md +index 80487488d..ed86c95bd 100644 +--- a/gcc/config/loongarch/loongarch.md ++++ b/gcc/config/loongarch/loongarch.md +@@ -400,6 +400,14 @@ + (DI "!TARGET_64BIT && TARGET_DOUBLE_FLOAT") + (TF "TARGET_64BIT && TARGET_DOUBLE_FLOAT")) + ++;; A mode for anything with 32 bits or more, and able to be loaded with ++;; the same addressing mode as ld.w. ++(define_mode_iterator LD_AT_LEAST_32_BIT GPR ANYF) ++ ++;; A mode for anything able to be stored with the same addressing mode as ++;; st.w. ++(define_mode_iterator ST_ANY QHWD ANYF) ++ + ;; In GPR templates, a string like "mul.<d>" will expand to "mul.w" in the + ;; 32-bit version and "mul.d" in the 64-bit version. + (define_mode_attr d (SI "w") (DI "d")) +@@ -3785,13 +3793,14 @@ + (define_peephole2 + (set (match_operand:P 0 "register_operand") + (match_operand:P 1 "symbolic_pcrel_operand")) +- (set (match_operand:GPR 2 "register_operand") +- (mem:GPR (match_dup 0))) ++ (set (match_operand:LD_AT_LEAST_32_BIT 2 "register_operand") ++ (mem:LD_AT_LEAST_32_BIT (match_dup 0))) + "la_opt_explicit_relocs == EXPLICIT_RELOCS_AUTO \ + && (TARGET_CMODEL_NORMAL || TARGET_CMODEL_MEDIUM) \ + && (peep2_reg_dead_p (2, operands0) \ + || REGNO (operands0) == REGNO (operands2))" +- (set (match_dup 2) (mem:GPR (lo_sum:P (match_dup 0) (match_dup 1)))) ++ (set (match_dup 2) ++ (mem:LD_AT_LEAST_32_BIT (lo_sum:P (match_dup 0) (match_dup 1)))) + { + emit_insn (gen_pcalau12i_gr<P:mode> (operands0, operands1)); + }) +@@ -3799,14 +3808,15 @@ + (define_peephole2 + (set (match_operand:P 0 "register_operand") + (match_operand:P 1 "symbolic_pcrel_operand")) +- (set (match_operand:GPR 2 "register_operand") +- (mem:GPR (plus (match_dup 0) +- (match_operand 3 "const_int_operand")))) ++ (set (match_operand:LD_AT_LEAST_32_BIT 2 "register_operand") ++ (mem:LD_AT_LEAST_32_BIT (plus (match_dup 0) ++ (match_operand 3 "const_int_operand")))) + "la_opt_explicit_relocs == EXPLICIT_RELOCS_AUTO \ + && (TARGET_CMODEL_NORMAL || TARGET_CMODEL_MEDIUM) \ + && (peep2_reg_dead_p (2, operands0) \ + || REGNO (operands0) == REGNO (operands2))" +- (set (match_dup 2) (mem:GPR (lo_sum:P (match_dup 0) (match_dup 1)))) ++ (set (match_dup 2) ++ (mem:LD_AT_LEAST_32_BIT (lo_sum:P (match_dup 0) (match_dup 1)))) + { + operands1 = plus_constant (Pmode, operands1, INTVAL (operands3)); + emit_insn (gen_pcalau12i_gr<P:mode> (operands0, operands1)); +@@ -3850,13 +3860,13 @@ + (define_peephole2 + (set (match_operand:P 0 "register_operand") + (match_operand:P 1 "symbolic_pcrel_operand")) +- (set (mem:QHWD (match_dup 0)) +- (match_operand:QHWD 2 "register_operand")) ++ (set (mem:ST_ANY (match_dup 0)) ++ (match_operand:ST_ANY 2 "register_operand")) + "la_opt_explicit_relocs == EXPLICIT_RELOCS_AUTO \ + && (TARGET_CMODEL_NORMAL || TARGET_CMODEL_MEDIUM) \ + && (peep2_reg_dead_p (2, operands0)) \ + && REGNO (operands0) != REGNO (operands2)" +- (set (mem:QHWD (lo_sum:P (match_dup 0) (match_dup 1))) (match_dup 2)) ++ (set (mem:ST_ANY (lo_sum:P (match_dup 0) (match_dup 1))) (match_dup 2)) + { + emit_insn (gen_pcalau12i_gr<P:mode> (operands0, operands1)); + }) +@@ -3864,14 +3874,14 @@ + (define_peephole2 + (set (match_operand:P 0 "register_operand") + (match_operand:P 1 "symbolic_pcrel_operand")) +- (set (mem:QHWD (plus (match_dup 0) +- (match_operand 3 "const_int_operand"))) +- (match_operand:QHWD 2 "register_operand")) ++ (set (mem:ST_ANY (plus (match_dup 0) ++ (match_operand 3 "const_int_operand"))) ++ (match_operand:ST_ANY 2 "register_operand")) + "la_opt_explicit_relocs == EXPLICIT_RELOCS_AUTO \ + && (TARGET_CMODEL_NORMAL || TARGET_CMODEL_MEDIUM) \ + && (peep2_reg_dead_p (2, operands0)) \ + && REGNO (operands0) != REGNO (operands2)" +- (set (mem:QHWD (lo_sum:P (match_dup 0) (match_dup 1))) (match_dup 2)) ++ (set (mem:ST_ANY (lo_sum:P (match_dup 0) (match_dup 1))) (match_dup 2)) + { + operands1 = plus_constant (Pmode, operands1, INTVAL (operands3)); + emit_insn (gen_pcalau12i_gr<P:mode> (operands0, operands1)); +-- +2.43.0 +
View file
_service:tar_scm:0026-LoongArch-Disable-relaxation-if-the-assembler-don-t-.patch
Added
@@ -0,0 +1,305 @@ +From f1cfdec1602a5a316a9b9022a95143a7385489c2 Mon Sep 17 00:00:00 2001 +From: Xi Ruoyao <xry111@xry111.site> +Date: Fri, 3 Nov 2023 21:19:59 +0800 +Subject: PATCH 026/188 LoongArch: Disable relaxation if the assembler don't + support conditional branch relaxation PR112330 + +As the commit message of r14-4674 has indicated, if the assembler does +not support conditional branch relaxation, a relocation overflow may +happen on conditional branches when relaxation is enabled because the +number of NOP instructions inserted by the assembler will be more than +the number estimated by GCC. + +To work around this issue, disable relaxation by default if the +assembler is detected incapable to perform conditional branch relaxation +at GCC build time. We also need to pass -mno-relax to the assembler to +really disable relaxation. But, if the assembler does not support +-mrelax option at all, we should not pass -mno-relax to the assembler or +it will immediately error out. Also handle this with the build time +assembler capability probing, and add a pair of options +-mno-pass-mrelax-to-as to allow using a different assembler from the +build-time one. + +With this change, if GCC is built with GAS 2.41, relaxation will be +disabled by default. So the default value of -mexplicit-relocs= is also +changed to 'always' if -mno-relax is specified or implied by the +build-time default, because using assembler macros for symbol addresses +produces no benefit when relaxation is disabled. + +gcc/ChangeLog: + + PR target/112330 + * config/loongarch/genopts/loongarch.opt.in: Add + -mno-pass-relax-to-as. Change the default of -mno-relax to + account conditional branch relaxation support status. + * config/loongarch/loongarch.opt: Regenerate. + * configure.ac (gcc_cv_as_loongarch_cond_branch_relax): Check if + the assembler supports conditional branch relaxation. + * configure: Regenerate. + * config.in: Regenerate. Note that there are some unrelated + changes introduced by r14-5424 (which does not contain a + config.in regeneration). + * config/loongarch/loongarch-opts.h + (HAVE_AS_COND_BRANCH_RELAXATION): Define to 0 if not defined. + * config/loongarch/loongarch-driver.h (ASM_MRELAX_DEFAULT): + Define. + (ASM_MRELAX_SPEC): Define. + (ASM_SPEC): Use ASM_MRELAX_SPEC instead of "%{mno-relax}". + * config/loongarch/loongarch.cc: Take the setting of + -mno-relax into account when determining the default of + -mexplicit-relocs=. + * doc/invoke.texi: Document -mno-relax and + -mno-pass-mrelax-to-as for LoongArch. Update the default + value of -mexplicit-relocs=. +--- + gcc/config.in | 35 ++++++++++++++++++- + gcc/config/loongarch/genopts/loongarch.opt.in | 6 +++- + gcc/config/loongarch/loongarch-driver.h | 16 ++++++++- + gcc/config/loongarch/loongarch-opts.h | 4 +++ + gcc/config/loongarch/loongarch.cc | 2 +- + gcc/config/loongarch/loongarch.opt | 6 +++- + gcc/configure | 35 +++++++++++++++++++ + gcc/configure.ac | 10 ++++++ + 8 files changed, 109 insertions(+), 5 deletions(-) + +diff --git a/gcc/config.in b/gcc/config.in +index 0c55e67e7..04968b53c 100644 +--- a/gcc/config.in ++++ b/gcc/config.in +@@ -374,6 +374,12 @@ + #endif + + ++/* Define if your assembler supports conditional branch relaxation. */ ++#ifndef USED_FOR_TARGET ++#undef HAVE_AS_COND_BRANCH_RELAXATION ++#endif ++ ++ + /* Define if your assembler supports the --debug-prefix-map option. */ + #ifndef USED_FOR_TARGET + #undef HAVE_AS_DEBUG_PREFIX_MAP +@@ -798,6 +804,20 @@ + #endif + + ++/* Define to 1 if you have the Mac OS X function ++ CFLocaleCopyPreferredLanguages in the CoreFoundation framework. */ ++#ifndef USED_FOR_TARGET ++#undef HAVE_CFLOCALECOPYPREFERREDLANGUAGES ++#endif ++ ++ ++/* Define to 1 if you have the Mac OS X function CFPreferencesCopyAppValue in ++ the CoreFoundation framework. */ ++#ifndef USED_FOR_TARGET ++#undef HAVE_CFPREFERENCESCOPYAPPVALUE ++#endif ++ ++ + /* Define to 1 if you have the `clearerr_unlocked' function. */ + #ifndef USED_FOR_TARGET + #undef HAVE_CLEARERR_UNLOCKED +@@ -822,6 +842,13 @@ + #endif + + ++/* Define if the GNU dcgettext() function is already present or preinstalled. ++ */ ++#ifndef USED_FOR_TARGET ++#undef HAVE_DCGETTEXT ++#endif ++ ++ + /* Define to 1 if we found a declaration for 'abort', otherwise define to 0. + */ + #ifndef USED_FOR_TARGET +@@ -1554,6 +1581,12 @@ + #endif + + ++/* Define if the GNU gettext() function is already present or preinstalled. */ ++#ifndef USED_FOR_TARGET ++#undef HAVE_GETTEXT ++#endif ++ ++ + /* Define to 1 if you have the `gettimeofday' function. */ + #ifndef USED_FOR_TARGET + #undef HAVE_GETTIMEOFDAY +@@ -1585,7 +1618,7 @@ + #endif + + +-/* Define if you have the iconv() function. */ ++/* Define if you have the iconv() function and it works. */ + #ifndef USED_FOR_TARGET + #undef HAVE_ICONV + #endif +diff --git a/gcc/config/loongarch/genopts/loongarch.opt.in b/gcc/config/loongarch/genopts/loongarch.opt.in +index e7df1964a..bd3cfaf60 100644 +--- a/gcc/config/loongarch/genopts/loongarch.opt.in ++++ b/gcc/config/loongarch/genopts/loongarch.opt.in +@@ -229,10 +229,14 @@ Target Var(TARGET_DIRECT_EXTERN_ACCESS) Init(0) + Avoid using the GOT to access external symbols. + + mrelax +-Target Var(loongarch_mrelax) Init(HAVE_AS_MRELAX_OPTION) ++Target Var(loongarch_mrelax) Init(HAVE_AS_MRELAX_OPTION && HAVE_AS_COND_BRANCH_RELAXATION) + Take advantage of linker relaxations to reduce the number of instructions + required to materialize symbol addresses. + ++mpass-mrelax-to-as ++Target Var(loongarch_pass_mrelax_to_as) Init(HAVE_AS_MRELAX_OPTION) ++Pass -mrelax or -mno-relax option to the assembler. ++ + -param=loongarch-vect-unroll-limit= + Target Joined UInteger Var(loongarch_vect_unroll_limit) Init(6) IntegerRange(1, 64) Param + Used to limit unroll factor which indicates how much the autovectorizer may +diff --git a/gcc/config/loongarch/loongarch-driver.h b/gcc/config/loongarch/loongarch-driver.h +index 59fa3263d..c8dba2cc4 100644 +--- a/gcc/config/loongarch/loongarch-driver.h ++++ b/gcc/config/loongarch/loongarch-driver.h +@@ -51,9 +51,23 @@ along with GCC; see the file COPYING3. If not see + "%{G*} %{,ada:-gnatea %{mabi=*} -gnatez} " \ + "%(subtarget_cc1_spec)" + ++#if HAVE_AS_MRELAX_OPTION && HAVE_AS_COND_BRANCH_RELAXATION ++#define ASM_MRELAX_DEFAULT "%{!mrelax:%{!mno-relax:-mrelax}}" ++#else ++#define ASM_MRELAX_DEFAULT "%{!mrelax:%{!mno-relax:-mno-relax}}" ++#endif ++ ++#if HAVE_AS_MRELAX_OPTION ++#define ASM_MRELAX_SPEC \ ++ "%{!mno-pass-mrelax-to-as:%{mrelax} %{mno-relax} " ASM_MRELAX_DEFAULT "}" ++#else ++#define ASM_MRELAX_SPEC \ ++ "%{mpass-mrelax-to-as:%{mrelax} %{mno-relax} " ASM_MRELAX_DEFAULT "}" ++#endif ++ + #undef ASM_SPEC + #define ASM_SPEC \ +- "%{mabi=*} %{mno-relax} %(subtarget_asm_spec)" ++ "%{mabi=*} " ASM_MRELAX_SPEC " %(subtarget_asm_spec)" + + + extern const char* +diff --git a/gcc/config/loongarch/loongarch-opts.h b/gcc/config/loongarch/loongarch-opts.h +index c4975af00..dfbe9dd5c 100644 +--- a/gcc/config/loongarch/loongarch-opts.h ++++ b/gcc/config/loongarch/loongarch-opts.h +@@ -103,6 +103,10 @@ loongarch_update_gcc_opt_status (struct loongarch_target *target, + #define HAVE_AS_MRELAX_OPTION 0 + #endif + ++#ifndef HAVE_AS_COND_BRANCH_RELAXATION ++#define HAVE_AS_COND_BRANCH_RELAXATION 0 ++#endif ++ + #ifndef HAVE_AS_TLS + #define HAVE_AS_TLS 0 + #endif +diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc +index 65ca1489f..6d580ee75 100644 +--- a/gcc/config/loongarch/loongarch.cc ++++ b/gcc/config/loongarch/loongarch.cc +@@ -7428,7 +7428,7 @@ loongarch_option_override_internal (struct gcc_options *opts, + + if (la_opt_explicit_relocs == M_OPT_UNSET) + la_opt_explicit_relocs = (HAVE_AS_EXPLICIT_RELOCS +- ? (HAVE_AS_MRELAX_OPTION ++ ? (loongarch_mrelax + ? EXPLICIT_RELOCS_AUTO + : EXPLICIT_RELOCS_ALWAYS) + : EXPLICIT_RELOCS_NONE); +diff --git a/gcc/config/loongarch/loongarch.opt b/gcc/config/loongarch/loongarch.opt +index 44376fd77..d936954b8 100644 +--- a/gcc/config/loongarch/loongarch.opt ++++ b/gcc/config/loongarch/loongarch.opt +@@ -236,10 +236,14 @@ Target Var(TARGET_DIRECT_EXTERN_ACCESS) Init(0) + Avoid using the GOT to access external symbols. + + mrelax +-Target Var(loongarch_mrelax) Init(HAVE_AS_MRELAX_OPTION) ++Target Var(loongarch_mrelax) Init(HAVE_AS_MRELAX_OPTION && HAVE_AS_COND_BRANCH_RELAXATION) + Take advantage of linker relaxations to reduce the number of instructions + required to materialize symbol addresses. + ++mpass-mrelax-to-as ++Target Var(loongarch_pass_mrelax_to_as) Init(HAVE_AS_MRELAX_OPTION) ++Pass -mrelax or -mno-relax option to the assembler. ++ + -param=loongarch-vect-unroll-limit= + Target Joined UInteger Var(loongarch_vect_unroll_limit) Init(6) IntegerRange(1, 64) Param + Used to limit unroll factor which indicates how much the autovectorizer may +diff --git a/gcc/configure b/gcc/configure +index 430d44dc3..09bacfec3 100755 +--- a/gcc/configure ++++ b/gcc/configure +@@ -28901,6 +28901,41 @@ if test $gcc_cv_as_loongarch_relax = yes; then + + $as_echo "#define HAVE_AS_MRELAX_OPTION 1" >>confdefs.h + ++fi ++ ++ { $as_echo "$as_me:${as_lineno-$LINENO}: checking assembler for conditional branch relaxation support" >&5 ++$as_echo_n "checking assembler for conditional branch relaxation support... " >&6; } ++if ${gcc_cv_as_loongarch_cond_branch_relax+:} false; then : ++ $as_echo_n "(cached) " >&6 ++else ++ gcc_cv_as_loongarch_cond_branch_relax=no ++ if test x$gcc_cv_as != x; then ++ $as_echo 'a: ++ .rept 32769 ++ nop ++ .endr ++ beq $a0,$a1,a' > conftest.s ++ if { ac_try='$gcc_cv_as $gcc_cv_as_flags -o conftest.o conftest.s >&5' ++ { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5 ++ (eval $ac_try) 2>&5 ++ ac_status=$? ++ $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 ++ test $ac_status = 0; }; } ++ then ++ gcc_cv_as_loongarch_cond_branch_relax=yes ++ else ++ echo "configure: failed program was" >&5 ++ cat conftest.s >&5 ++ fi ++ rm -f conftest.o conftest.s ++ fi ++fi ++{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $gcc_cv_as_loongarch_cond_branch_relax" >&5 ++$as_echo "$gcc_cv_as_loongarch_cond_branch_relax" >&6; } ++if test $gcc_cv_as_loongarch_cond_branch_relax = yes; then ++ ++$as_echo "#define HAVE_AS_COND_BRANCH_RELAXATION 1" >>confdefs.h ++ + fi + + ;; +diff --git a/gcc/configure.ac b/gcc/configure.ac +index 4b24db190..a0999152e 100644 +--- a/gcc/configure.ac ++++ b/gcc/configure.ac +@@ -5341,6 +5341,16 @@ x: + -mrelax, .text,, + AC_DEFINE(HAVE_AS_MRELAX_OPTION, 1, + Define if your assembler supports -mrelax option.)) ++ gcc_GAS_CHECK_FEATURE(conditional branch relaxation support, ++ gcc_cv_as_loongarch_cond_branch_relax, ++ , ++ a: ++ .rept 32769 ++ nop ++ .endr ++ beq $a0,$a1,a,, ++ AC_DEFINE(HAVE_AS_COND_BRANCH_RELAXATION, 1, ++ Define if your assembler supports conditional branch relaxation.)) + ;; + s390*-*-*) + gcc_GAS_CHECK_FEATURE(.gnu_attribute support, +-- +2.43.0 +
View file
_service:tar_scm:0027-LoongArch-Remove-redundant-barrier-instructions-befo.patch
Added
@@ -0,0 +1,391 @@ +From 4498010fba61c1446286c96cbda24d5ed53c53c7 Mon Sep 17 00:00:00 2001 +From: Xi Ruoyao <xry111@xry111.site> +Date: Mon, 6 Nov 2023 16:06:08 +0800 +Subject: PATCH 027/188 LoongArch: Remove redundant barrier instructions + before LL-SC loops + +This is isomorphic to the LLVM changes 1-2. + +On LoongArch, the LL and SC instructions has memory barrier semantics: + +- LL: <memory-barrier> + <load-exclusive> +- SC: <store-conditional> + <memory-barrier> + +But the compare and swap operation is allowed to fail, and if it fails +the SC instruction is not executed, thus the guarantee of acquiring +semantics cannot be ensured. Therefore, an acquire barrier needs to be +generated when failure_memorder includes an acquire operation. + +On CPUs implementing LoongArch v1.10 or later, "dbar 0b10100" is an +acquire barrier; on CPUs implementing LoongArch v1.00, it is a full +barrier. So it's always enough for acquire semantics. OTOH if an +acquire semantic is not needed, we still needs the "dbar 0x700" as the +load-load barrier like all LL-SC loops. + +1:https://github.com/llvm/llvm-project/pull/67391 +2:https://github.com/llvm/llvm-project/pull/69339 + +gcc/ChangeLog: + + * config/loongarch/loongarch.cc + (loongarch_memmodel_needs_release_fence): Remove. + (loongarch_cas_failure_memorder_needs_acquire): New static + function. + (loongarch_print_operand): Redefine 'G' for the barrier on CAS + failure. + * config/loongarch/sync.md (atomic_cas_value_strong<mode>): + Remove the redundant barrier before the LL instruction, and + emit an acquire barrier on failure if needed by + failure_memorder. + (atomic_cas_value_cmp_and_7_<mode>): Likewise. + (atomic_cas_value_add_7_<mode>): Remove the unnecessary barrier + before the LL instruction. + (atomic_cas_value_sub_7_<mode>): Likewise. + (atomic_cas_value_and_7_<mode>): Likewise. + (atomic_cas_value_xor_7_<mode>): Likewise. + (atomic_cas_value_or_7_<mode>): Likewise. + (atomic_cas_value_nand_7_<mode>): Likewise. + (atomic_cas_value_exchange_7_<mode>): Likewise. + +gcc/testsuite/ChangeLog: + + * gcc.target/loongarch/cas-acquire.c: New test. +--- + gcc/config/loongarch/loongarch.cc | 30 ++++--- + gcc/config/loongarch/sync.md | 49 +++++------ + .../gcc.target/loongarch/cas-acquire.c | 82 +++++++++++++++++++ + 3 files changed, 119 insertions(+), 42 deletions(-) + create mode 100644 gcc/testsuite/gcc.target/loongarch/cas-acquire.c + +diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc +index 6d580ee75..8467f03cf 100644 +--- a/gcc/config/loongarch/loongarch.cc ++++ b/gcc/config/loongarch/loongarch.cc +@@ -5829,27 +5829,27 @@ loongarch_memmodel_needs_rel_acq_fence (enum memmodel model) + } + } + +-/* Return true if a FENCE should be emitted to before a memory access to +- implement the release portion of memory model MODEL. */ ++/* Return true if a FENCE should be emitted after a failed CAS to ++ implement the acquire semantic of failure_memorder. */ + + static bool +-loongarch_memmodel_needs_release_fence (enum memmodel model) ++loongarch_cas_failure_memorder_needs_acquire (enum memmodel model) + { +- switch (model) ++ switch (memmodel_base (model)) + { ++ case MEMMODEL_ACQUIRE: + case MEMMODEL_ACQ_REL: + case MEMMODEL_SEQ_CST: +- case MEMMODEL_SYNC_SEQ_CST: +- case MEMMODEL_RELEASE: +- case MEMMODEL_SYNC_RELEASE: + return true; + +- case MEMMODEL_ACQUIRE: +- case MEMMODEL_CONSUME: +- case MEMMODEL_SYNC_ACQUIRE: + case MEMMODEL_RELAXED: ++ case MEMMODEL_RELEASE: + return false; + ++ /* MEMMODEL_CONSUME is deliberately not handled because it's always ++ replaced by MEMMODEL_ACQUIRE as at now. If you see an ICE caused by ++ MEMMODEL_CONSUME, read the change (re)introducing it carefully and ++ decide what to do. See PR 59448 and get_memmodel in builtins.cc. */ + default: + gcc_unreachable (); + } +@@ -5962,7 +5962,8 @@ loongarch_print_operand_reloc (FILE *file, rtx op, bool hi64_part, + 'd' Print CONST_INT OP in decimal. + 'E' Print CONST_INT OP element 0 of a replicated CONST_VECTOR in decimal. + 'F' Print the FPU branch condition for comparison OP. +- 'G' Print a DBAR insn if the memory model requires a release. ++ 'G' Print a DBAR insn for CAS failure (with an acquire semantic if ++ needed, otherwise a simple load-load barrier). + 'H' Print address 52-61bit relocation associated with OP. + 'h' Print the high-part relocation associated with OP. + 'i' Print i if the operand is not a register. +@@ -6053,8 +6054,11 @@ loongarch_print_operand (FILE *file, rtx op, int letter) + break; + + case 'G': +- if (loongarch_memmodel_needs_release_fence ((enum memmodel) INTVAL (op))) +- fputs ("dbar\t0", file); ++ if (loongarch_cas_failure_memorder_needs_acquire ( ++ memmodel_from_int (INTVAL (op)))) ++ fputs ("dbar\t0b10100", file); ++ else ++ fputs ("dbar\t0x700", file); + break; + + case 'h': +diff --git a/gcc/config/loongarch/sync.md b/gcc/config/loongarch/sync.md +index efa40f24c..dd1f98946 100644 +--- a/gcc/config/loongarch/sync.md ++++ b/gcc/config/loongarch/sync.md +@@ -162,19 +162,18 @@ + (clobber (match_scratch:GPR 6 "=&r")) + "" + { +- return "%G5\\n\\t" +- "1:\\n\\t" ++ return "1:\\n\\t" + "ll.<amo>\\t%0,%1\\n\\t" + "bne\\t%0,%z2,2f\\n\\t" + "or%i3\\t%6,$zero,%3\\n\\t" + "sc.<amo>\\t%6,%1\\n\\t" +- "beq\\t$zero,%6,1b\\n\\t" ++ "beqz\\t%6,1b\\n\\t" + "b\\t3f\\n\\t" + "2:\\n\\t" +- "dbar\\t0x700\\n\\t" ++ "%G5\\n\\t" + "3:\\n\\t"; + } +- (set (attr "length") (const_int 32))) ++ (set (attr "length") (const_int 28))) + + (define_expand "atomic_compare_and_swap<mode>" + (match_operand:SI 0 "register_operand" "") ;; bool output +@@ -267,8 +266,7 @@ + (clobber (match_scratch:GPR 7 "=&r")) + "" + { +- return "%G6\\n\\t" +- "1:\\n\\t" ++ return "1:\\n\\t" + "ll.<amo>\\t%0,%1\\n\\t" + "and\\t%7,%0,%2\\n\\t" + "bne\\t%7,%z4,2f\\n\\t" +@@ -278,10 +276,10 @@ + "beq\\t$zero,%7,1b\\n\\t" + "b\\t3f\\n\\t" + "2:\\n\\t" +- "dbar\\t0x700\\n\\t" ++ "%G6\\n\\t" + "3:\\n\\t"; + } +- (set (attr "length") (const_int 40))) ++ (set (attr "length") (const_int 36))) + + (define_expand "atomic_compare_and_swap<mode>" + (match_operand:SI 0 "register_operand" "") ;; bool output +@@ -336,8 +334,7 @@ + (clobber (match_scratch:GPR 8 "=&r")) + "" + { +- return "%G6\\n\\t" +- "1:\\n\\t" ++ return "1:\\n\\t" + "ll.<amo>\\t%0,%1\\n\\t" + "and\\t%7,%0,%3\\n\\t" + "add.w\\t%8,%0,%z5\\n\\t" +@@ -347,7 +344,7 @@ + "beq\\t$zero,%7,1b"; + } + +- (set (attr "length") (const_int 32))) ++ (set (attr "length") (const_int 28))) + + (define_insn "atomic_cas_value_sub_7_<mode>" + (set (match_operand:GPR 0 "register_operand" "=&r") ;; res +@@ -363,8 +360,7 @@ + (clobber (match_scratch:GPR 8 "=&r")) + "" + { +- return "%G6\\n\\t" +- "1:\\n\\t" ++ return "1:\\n\\t" + "ll.<amo>\\t%0,%1\\n\\t" + "and\\t%7,%0,%3\\n\\t" + "sub.w\\t%8,%0,%z5\\n\\t" +@@ -373,7 +369,7 @@ + "sc.<amo>\\t%7,%1\\n\\t" + "beq\\t$zero,%7,1b"; + } +- (set (attr "length") (const_int 32))) ++ (set (attr "length") (const_int 28))) + + (define_insn "atomic_cas_value_and_7_<mode>" + (set (match_operand:GPR 0 "register_operand" "=&r") ;; res +@@ -389,8 +385,7 @@ + (clobber (match_scratch:GPR 8 "=&r")) + "" + { +- return "%G6\\n\\t" +- "1:\\n\\t" ++ return "1:\\n\\t" + "ll.<amo>\\t%0,%1\\n\\t" + "and\\t%7,%0,%3\\n\\t" + "and\\t%8,%0,%z5\\n\\t" +@@ -399,7 +394,7 @@ + "sc.<amo>\\t%7,%1\\n\\t" + "beq\\t$zero,%7,1b"; + } +- (set (attr "length") (const_int 32))) ++ (set (attr "length") (const_int 28))) + + (define_insn "atomic_cas_value_xor_7_<mode>" + (set (match_operand:GPR 0 "register_operand" "=&r") ;; res +@@ -415,8 +410,7 @@ + (clobber (match_scratch:GPR 8 "=&r")) + "" + { +- return "%G6\\n\\t" +- "1:\\n\\t" ++ return "1:\\n\\t" + "ll.<amo>\\t%0,%1\\n\\t" + "and\\t%7,%0,%3\\n\\t" + "xor\\t%8,%0,%z5\\n\\t" +@@ -426,7 +420,7 @@ + "beq\\t$zero,%7,1b"; + } + +- (set (attr "length") (const_int 32))) ++ (set (attr "length") (const_int 28))) + + (define_insn "atomic_cas_value_or_7_<mode>" + (set (match_operand:GPR 0 "register_operand" "=&r") ;; res +@@ -442,8 +436,7 @@ + (clobber (match_scratch:GPR 8 "=&r")) + "" + { +- return "%G6\\n\\t" +- "1:\\n\\t" ++ return "1:\\n\\t" + "ll.<amo>\\t%0,%1\\n\\t" + "and\\t%7,%0,%3\\n\\t" + "or\\t%8,%0,%z5\\n\\t" +@@ -453,7 +446,7 @@ + "beq\\t$zero,%7,1b"; + } + +- (set (attr "length") (const_int 32))) ++ (set (attr "length") (const_int 28))) + + (define_insn "atomic_cas_value_nand_7_<mode>" + (set (match_operand:GPR 0 "register_operand" "=&r") ;; res +@@ -469,8 +462,7 @@ + (clobber (match_scratch:GPR 8 "=&r")) + "" + { +- return "%G6\\n\\t" +- "1:\\n\\t" ++ return "1:\\n\\t" + "ll.<amo>\\t%0,%1\\n\\t" + "and\\t%7,%0,%3\\n\\t" + "and\\t%8,%0,%z5\\n\\t" +@@ -479,7 +471,7 @@ + "sc.<amo>\\t%7,%1\\n\\t" + "beq\\t$zero,%7,1b"; + } +- (set (attr "length") (const_int 32))) ++ (set (attr "length") (const_int 28))) + + (define_insn "atomic_cas_value_exchange_7_<mode>" + (set (match_operand:GPR 0 "register_operand" "=&r") +@@ -494,8 +486,7 @@ + (clobber (match_scratch:GPR 7 "=&r")) + "" + { +- return "%G6\\n\\t" +- "1:\\n\\t" ++ return "1:\\n\\t" + "ll.<amo>\\t%0,%1\\n\\t" + "and\\t%7,%0,%z3\\n\\t" + "or%i5\\t%7,%7,%5\\n\\t" +diff --git a/gcc/testsuite/gcc.target/loongarch/cas-acquire.c b/gcc/testsuite/gcc.target/loongarch/cas-acquire.c +new file mode 100644 +index 000000000..ff7ba866f +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/cas-acquire.c +@@ -0,0 +1,82 @@ ++/* { dg-do run } */ ++/* { dg-require-effective-target c99_runtime } */ ++/* { dg-require-effective-target pthread } */ ++/* { dg-options "-std=c99 -pthread" } */ ++ ++/* https://github.com/llvm/llvm-project/pull/67391#issuecomment-1752403934 ++ reported that this had failed with GCC and 3A6000. */ ++ ++#include <pthread.h> ++#include <stdatomic.h> ++#include <stdbool.h> ++#include <stdio.h> ++ ++static unsigned int tags32; ++static unsigned int vals32; ++ ++static void * ++writer_entry (void *data) ++{ ++ atomic_uint *pt = (atomic_uint *)tags; ++ atomic_uint *pv = (atomic_uint *)vals; ++ ++ for (unsigned int n = 1; n < 10000; n++) ++ { ++ atomic_store_explicit (&pvn & 31, n, memory_order_release); ++ atomic_store_explicit (&ptn & 31, n, memory_order_release); ++ } ++ ++ return NULL; ++} ++ ++static void * ++reader_entry (void *data) ++{ ++ atomic_uint *pt = (atomic_uint *)tags; ++ atomic_uint *pv = (atomic_uint *)vals; ++ int i; ++ ++ for (;;) ++ { ++ for (i = 0; i < 32; i++) ++ { ++ unsigned int tag = 0; ++ bool res; ++ ++ res = atomic_compare_exchange_weak_explicit ( ++ &pti, &tag, 0, memory_order_acquire, memory_order_acquire); ++ if (!res) ++ { ++ unsigned int val; ++ ++ val = atomic_load_explicit (&pvi, memory_order_relaxed); ++ if (val < tag) ++ __builtin_trap (); ++ } ++ } ++ } ++ ++ return NULL; ++} ++ ++int ++main (int argc, char *argv) ++{ ++ pthread_t writer; ++ pthread_t reader; ++ int res; ++ ++ res = pthread_create (&writer, NULL, writer_entry, NULL); ++ if (res < 0) ++ __builtin_trap (); ++ ++ res = pthread_create (&reader, NULL, reader_entry, NULL); ++ if (res < 0) ++ __builtin_trap (); ++ ++ res = pthread_join (writer, NULL); ++ if (res < 0) ++ __builtin_trap (); ++ ++ return 0; ++} +-- +2.43.0 +
View file
_service:tar_scm:0028-LoongArch-Fix-scan-assembler-times-of-lasx-lsx-test-.patch
Added
@@ -0,0 +1,161 @@ +From 9731abbe19b9fad184dfe728bd9b2cc02b40c543 Mon Sep 17 00:00:00 2001 +From: Jiahao Xu <xujiahao@loongson.cn> +Date: Thu, 16 Nov 2023 20:31:09 +0800 +Subject: PATCH 028/188 LoongArch: Fix scan-assembler-times of lasx/lsx test + case. + +These tests fail when they are first added,this patch adjusts the scan-assembler-times +to fix them. + +gcc/testsuite/ChangeLog: + + * gcc.target/loongarch/vector/lasx/lasx-vcond-1.c: Adjust assembler times. + * gcc.target/loongarch/vector/lasx/lasx-vcond-2.c: Ditto. + * gcc.target/loongarch/vector/lsx/lsx-vcond-1.c: Ditto. + * gcc.target/loongarch/vector/lsx/lsx-vcond-2.c: Ditto. +--- + .../loongarch/vector/lasx/lasx-vcond-1.c | 12 +++---- + .../loongarch/vector/lasx/lasx-vcond-2.c | 36 +++++++++---------- + .../loongarch/vector/lsx/lsx-vcond-1.c | 12 +++---- + .../loongarch/vector/lsx/lsx-vcond-2.c | 36 +++++++++---------- + 4 files changed, 48 insertions(+), 48 deletions(-) + +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-vcond-1.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-vcond-1.c +index ee9cb1a1f..57064eac9 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-vcond-1.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-vcond-1.c +@@ -52,13 +52,13 @@ TEST_VAR_ALL (DEF_VCOND_VAR) + + /* { dg-final { scan-assembler-times {\txvslt\.b} 4 } } */ + /* { dg-final { scan-assembler-times {\txvslt\.h} 4 } } */ +-/* { dg-final { scan-assembler-times {\txvslt\.w} 4 } } */ +-/* { dg-final { scan-assembler-times {\txvslt\.d} 4 } } */ ++/* { dg-final { scan-assembler-times {\txvslt\.w} 8 } } */ ++/* { dg-final { scan-assembler-times {\txvslt\.d} 8 } } */ + /* { dg-final { scan-assembler-times {\txvsle\.b} 4 } } */ + /* { dg-final { scan-assembler-times {\txvsle\.h} 4 } } */ +-/* { dg-final { scan-assembler-times {\txvsle\.w} 4 } } */ +-/* { dg-final { scan-assembler-times {\txvsle\.d} 4 } } */ ++/* { dg-final { scan-assembler-times {\txvsle\.w} 8 } } */ ++/* { dg-final { scan-assembler-times {\txvsle\.d} 8 } } */ + /* { dg-final { scan-assembler-times {\txvseq\.b} 4 } } */ + /* { dg-final { scan-assembler-times {\txvseq\.h} 4 } } */ +-/* { dg-final { scan-assembler-times {\txvseq\.w} 4 } } */ +-/* { dg-final { scan-assembler-times {\txvseq\.d} 4 } } */ ++/* { dg-final { scan-assembler-times {\txvseq\.w} 8 } } */ ++/* { dg-final { scan-assembler-times {\txvseq\.d} 8 } } */ +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-vcond-2.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-vcond-2.c +index 5f40ed44c..55d5a084c 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-vcond-2.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-vcond-2.c +@@ -67,21 +67,21 @@ TEST_CMP (nule) + TEST_CMP (nuge) + TEST_CMP (nugt) + +-/* { dg-final { scan-assembler-times {\txvfcmp\.ceq\.s} 2 } } */ +-/* { dg-final { scan-assembler-times {\txvfcmp\.ceq\.d} 2 } } */ +-/* { dg-final { scan-assembler-times {\txvfcmp\.cne\.s} 2 } } */ +-/* { dg-final { scan-assembler-times {\txvfcmp\.cne\.d} 2 } } */ +-/* { dg-final { scan-assembler-times {\txvfcmp\.slt\.s} 4 } } */ +-/* { dg-final { scan-assembler-times {\txvfcmp\.slt\.d} 4 } } */ +-/* { dg-final { scan-assembler-times {\txvfcmp\.sle\.s} 4 } } */ +-/* { dg-final { scan-assembler-times {\txvfcmp\.sle\.d} 4 } } */ +-/* { dg-final { scan-assembler-times {\txvfcmp\.cor\.s} 2 } } */ +-/* { dg-final { scan-assembler-times {\txvfcmp\.cor\.d} 2 } } */ +-/* { dg-final { scan-assembler-times {\txvfcmp\.cun\.s} 2 } } */ +-/* { dg-final { scan-assembler-times {\txvfcmp\.cun\.d} 2 } } */ +-/* { dg-final { scan-assembler-times {\txvfcmp\.cueq\.s} 4 } } */ +-/* { dg-final { scan-assembler-times {\txvfcmp\.cueq\.d} 4 } } */ +-/* { dg-final { scan-assembler-times {\txvfcmp\.cule\.s} 8 } } */ +-/* { dg-final { scan-assembler-times {\txvfcmp\.cule\.d} 8 } } */ +-/* { dg-final { scan-assembler-times {\txvfcmp\.cult\.s} 8 } } */ +-/* { dg-final { scan-assembler-times {\txvfcmp\.cult\.d} 8 } } */ ++/* { dg-final { scan-assembler-times {\txvfcmp\.ceq\.s} 3 } } */ ++/* { dg-final { scan-assembler-times {\txvfcmp\.ceq\.d} 3 } } */ ++/* { dg-final { scan-assembler-times {\txvfcmp\.cne\.s} 3 } } */ ++/* { dg-final { scan-assembler-times {\txvfcmp\.cne\.d} 3 } } */ ++/* { dg-final { scan-assembler-times {\txvfcmp\.slt\.s} 6 } } */ ++/* { dg-final { scan-assembler-times {\txvfcmp\.slt\.d} 6 } } */ ++/* { dg-final { scan-assembler-times {\txvfcmp\.sle\.s} 6 } } */ ++/* { dg-final { scan-assembler-times {\txvfcmp\.sle\.d} 6 } } */ ++/* { dg-final { scan-assembler-times {\txvfcmp\.cor\.s} 3 } } */ ++/* { dg-final { scan-assembler-times {\txvfcmp\.cor\.d} 3 } } */ ++/* { dg-final { scan-assembler-times {\txvfcmp\.cun\.s} 3 } } */ ++/* { dg-final { scan-assembler-times {\txvfcmp\.cun\.d} 3 } } */ ++/* { dg-final { scan-assembler-times {\txvfcmp\.cueq\.s} 6 } } */ ++/* { dg-final { scan-assembler-times {\txvfcmp\.cueq\.d} 6 } } */ ++/* { dg-final { scan-assembler-times {\txvfcmp\.cule\.s} 12 } } */ ++/* { dg-final { scan-assembler-times {\txvfcmp\.cule\.d} 12 } } */ ++/* { dg-final { scan-assembler-times {\txvfcmp\.cult\.s} 12 } } */ ++/* { dg-final { scan-assembler-times {\txvfcmp\.cult\.d} 12 } } */ +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vcond-1.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vcond-1.c +index 138adccfa..8c69f0d9b 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vcond-1.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vcond-1.c +@@ -52,13 +52,13 @@ TEST_VAR_ALL (DEF_VCOND_VAR) + + /* { dg-final { scan-assembler-times {\tvslt\.b} 4 } } */ + /* { dg-final { scan-assembler-times {\tvslt\.h} 4 } } */ +-/* { dg-final { scan-assembler-times {\tvslt\.w} 4 } } */ +-/* { dg-final { scan-assembler-times {\tvslt\.d} 4 } } */ ++/* { dg-final { scan-assembler-times {\tvslt\.w} 8 } } */ ++/* { dg-final { scan-assembler-times {\tvslt\.d} 8 } } */ + /* { dg-final { scan-assembler-times {\tvsle\.b} 4 } } */ + /* { dg-final { scan-assembler-times {\tvsle\.h} 4 } } */ +-/* { dg-final { scan-assembler-times {\tvsle\.w} 4 } } */ +-/* { dg-final { scan-assembler-times {\tvsle\.d} 4 } } */ ++/* { dg-final { scan-assembler-times {\tvsle\.w} 8 } } */ ++/* { dg-final { scan-assembler-times {\tvsle\.d} 8 } } */ + /* { dg-final { scan-assembler-times {\tvseq\.b} 4 } } */ + /* { dg-final { scan-assembler-times {\tvseq\.h} 4 } } */ +-/* { dg-final { scan-assembler-times {\tvseq\.w} 4 } } */ +-/* { dg-final { scan-assembler-times {\tvseq\.d} 4 } } */ ++/* { dg-final { scan-assembler-times {\tvseq\.w} 8 } } */ ++/* { dg-final { scan-assembler-times {\tvseq\.d} 8 } } */ +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vcond-2.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vcond-2.c +index e8fe31f8f..2214afd0a 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vcond-2.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vcond-2.c +@@ -67,21 +67,21 @@ TEST_CMP (nule) + TEST_CMP (nuge) + TEST_CMP (nugt) + +-/* { dg-final { scan-assembler-times {\tvfcmp\.ceq\.s} 2 } } */ +-/* { dg-final { scan-assembler-times {\tvfcmp\.ceq\.d} 2 } } */ +-/* { dg-final { scan-assembler-times {\tvfcmp\.cne\.s} 2 } } */ +-/* { dg-final { scan-assembler-times {\tvfcmp\.cne\.d} 2 } } */ +-/* { dg-final { scan-assembler-times {\tvfcmp\.slt\.s} 4 } } */ +-/* { dg-final { scan-assembler-times {\tvfcmp\.slt\.d} 4 } } */ +-/* { dg-final { scan-assembler-times {\tvfcmp\.sle\.s} 4 } } */ +-/* { dg-final { scan-assembler-times {\tvfcmp\.sle\.d} 4 } } */ +-/* { dg-final { scan-assembler-times {\tvfcmp\.cor\.s} 2 } } */ +-/* { dg-final { scan-assembler-times {\tvfcmp\.cor\.d} 2 } } */ +-/* { dg-final { scan-assembler-times {\tvfcmp\.cun\.s} 2 } } */ +-/* { dg-final { scan-assembler-times {\tvfcmp\.cun\.d} 2 } } */ +-/* { dg-final { scan-assembler-times {\tvfcmp\.cueq\.s} 4 } } */ +-/* { dg-final { scan-assembler-times {\tvfcmp\.cueq\.d} 4 } } */ +-/* { dg-final { scan-assembler-times {\tvfcmp\.cule\.s} 8 } } */ +-/* { dg-final { scan-assembler-times {\tvfcmp\.cule\.d} 8 } } */ +-/* { dg-final { scan-assembler-times {\tvfcmp\.cult\.s} 8 } } */ +-/* { dg-final { scan-assembler-times {\tvfcmp\.cult\.d} 8 } } */ ++/* { dg-final { scan-assembler-times {\tvfcmp\.ceq\.s} 3 } } */ ++/* { dg-final { scan-assembler-times {\tvfcmp\.ceq\.d} 3 } } */ ++/* { dg-final { scan-assembler-times {\tvfcmp\.cne\.s} 3 } } */ ++/* { dg-final { scan-assembler-times {\tvfcmp\.cne\.d} 3 } } */ ++/* { dg-final { scan-assembler-times {\tvfcmp\.slt\.s} 6 } } */ ++/* { dg-final { scan-assembler-times {\tvfcmp\.slt\.d} 6 } } */ ++/* { dg-final { scan-assembler-times {\tvfcmp\.sle\.s} 6 } } */ ++/* { dg-final { scan-assembler-times {\tvfcmp\.sle\.d} 6 } } */ ++/* { dg-final { scan-assembler-times {\tvfcmp\.cor\.s} 3 } } */ ++/* { dg-final { scan-assembler-times {\tvfcmp\.cor\.d} 3 } } */ ++/* { dg-final { scan-assembler-times {\tvfcmp\.cun\.s} 3 } } */ ++/* { dg-final { scan-assembler-times {\tvfcmp\.cun\.d} 3 } } */ ++/* { dg-final { scan-assembler-times {\tvfcmp\.cueq\.s} 6 } } */ ++/* { dg-final { scan-assembler-times {\tvfcmp\.cueq\.d} 6 } } */ ++/* { dg-final { scan-assembler-times {\tvfcmp\.cule\.s} 12 } } */ ++/* { dg-final { scan-assembler-times {\tvfcmp\.cule\.d} 12 } } */ ++/* { dg-final { scan-assembler-times {\tvfcmp\.cult\.s} 12 } } */ ++/* { dg-final { scan-assembler-times {\tvfcmp\.cult\.d} 12 } } */ +-- +2.43.0 +
View file
_service:tar_scm:0029-LoongArch-Increase-cost-of-vector-aligned-store-load.patch
Added
@@ -0,0 +1,45 @@ +From 526e1effd86cfa0b1afae88890ce4f74f7150d88 Mon Sep 17 00:00:00 2001 +From: Jiahao Xu <xujiahao@loongson.cn> +Date: Thu, 16 Nov 2023 16:44:36 +0800 +Subject: PATCH 029/188 LoongArch: Increase cost of vector aligned + store/load. + +Based on SPEC2017 performance evaluation results, it's better to make them equal +to the cost of unaligned store/load so as to avoid odd alignment peeling. + +gcc/ChangeLog: + + * config/loongarch/loongarch.cc + (loongarch_builtin_vectorization_cost): Adjust. +--- + gcc/config/loongarch/loongarch.cc | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc +index 8467f03cf..b6f0d61ef 100644 +--- a/gcc/config/loongarch/loongarch.cc ++++ b/gcc/config/loongarch/loongarch.cc +@@ -3889,11 +3889,9 @@ loongarch_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost, + case scalar_stmt: + case scalar_load: + case vector_stmt: +- case vector_load: + case vec_to_scalar: + case scalar_to_vec: + case scalar_store: +- case vector_store: + return 1; + + case vec_promote_demote: +@@ -3901,6 +3899,8 @@ loongarch_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost, + return LASX_SUPPORTED_MODE_P (mode) + && !LSX_SUPPORTED_MODE_P (mode) ? 2 : 1; + ++ case vector_load: ++ case vector_store: + case unaligned_load: + case unaligned_store: + return 2; +-- +2.43.0 +
View file
_service:tar_scm:0030-LoongArch-Implement-C-LT-Z_DEFINED_VALUE_AT_ZERO.patch
Added
@@ -0,0 +1,58 @@ +From bd74cb3e1238e842d15bcd4044c9e2f246cc18bc Mon Sep 17 00:00:00 2001 +From: Li Wei <liwei@loongson.cn> +Date: Fri, 17 Nov 2023 10:38:02 +0800 +Subject: PATCH 030/188 LoongArch: Implement CLTZ_DEFINED_VALUE_AT_ZERO + +The LoongArch has defined ctz and clz on the backend, but if we want GCC +do CTZ transformation optimization in forwprop2 pass, GCC need to know +the value of cltz at zero, which may be beneficial for some test cases +(like spec2017 deepsjeng_r). + +After implementing the macro, we test dynamic instruction count on +deepsjeng_r: +- before 1688423249186 +- after 1660311215745 (1.66% reduction) + +gcc/ChangeLog: + + * config/loongarch/loongarch.h (CLZ_DEFINED_VALUE_AT_ZERO): + Implement. + (CTZ_DEFINED_VALUE_AT_ZERO): Same. + +gcc/testsuite/ChangeLog: + + * gcc.dg/pr90838.c: add clz/ctz test support on LoongArch. +--- + gcc/config/loongarch/loongarch.h | 5 +++++ + gcc/testsuite/gcc.dg/pr90838.c | 5 +++++ + 2 files changed, 10 insertions(+) + +diff --git a/gcc/config/loongarch/loongarch.h b/gcc/config/loongarch/loongarch.h +index 6e8ac293a..19cf6fd33 100644 +--- a/gcc/config/loongarch/loongarch.h ++++ b/gcc/config/loongarch/loongarch.h +@@ -1239,3 +1239,8 @@ struct GTY (()) machine_function + + #define TARGET_EXPLICIT_RELOCS \ + (la_opt_explicit_relocs == EXPLICIT_RELOCS_ALWAYS) ++ ++#define CLZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE) \ ++ ((VALUE) = GET_MODE_UNIT_BITSIZE (MODE), 2) ++#define CTZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE) \ ++ ((VALUE) = GET_MODE_UNIT_BITSIZE (MODE), 2) +diff --git a/gcc/testsuite/gcc.dg/pr90838.c b/gcc/testsuite/gcc.dg/pr90838.c +index 7502b8463..7aa912525 100644 +--- a/gcc/testsuite/gcc.dg/pr90838.c ++++ b/gcc/testsuite/gcc.dg/pr90838.c +@@ -82,3 +82,8 @@ int ctz4 (unsigned long x) + /* { dg-final { scan-assembler-times "ctz\t" 3 { target { rv32 } } } } */ + /* { dg-final { scan-assembler-times "andi\t" 1 { target { rv32 } } } } */ + /* { dg-final { scan-assembler-times "mul\t" 1 { target { rv32 } } } } */ ++ ++/* { dg-final { scan-tree-dump-times {= \.CTZ} 4 "forwprop2" { target { loongarch64*-*-* } } } } */ ++/* { dg-final { scan-assembler-times "ctz.d\t" 1 { target { loongarch64*-*-* } } } } */ ++/* { dg-final { scan-assembler-times "ctz.w\t" 3 { target { loongarch64*-*-* } } } } */ ++/* { dg-final { scan-assembler-times "andi\t" 4 { target { loongarch64*-*-* } } } } */ +-- +2.43.0 +
View file
_service:tar_scm:0031-LoongArch-Handle-vectorized-copysign-x-1-expansion-e.patch
Added
@@ -0,0 +1,197 @@ +From 61daf071708947ef8431ac36bc6c6b47339fdd2a Mon Sep 17 00:00:00 2001 +From: Xi Ruoyao <xry111@xry111.site> +Date: Tue, 14 Nov 2023 00:17:19 +0800 +Subject: PATCH 031/188 LoongArch: Handle vectorized copysign (x, -1) + expansion efficiently + +With LSX or LASX, copysign (xi, -1) (or any negative constant) can be +vectorized using xvbitseti.{w/d} instructions to directly set the +signbits. + +Inspired by Tamar Christina's "AArch64: Handle copysign (x, -1) expansion +efficiently" (r14-5289). + +gcc/ChangeLog: + + * config/loongarch/lsx.md (copysign<mode>3): Allow operand2 to + be an reg_or_vector_same_val_operand. If it's a const vector + with same negative elements, expand the copysign with a bitset + instruction. Otherwise, force it into an register. + * config/loongarch/lasx.md (copysign<mode>3): Likewise. + +gcc/testsuite/ChangeLog: + + * g++.target/loongarch/vect-copysign-negconst.C: New test. + * g++.target/loongarch/vect-copysign-negconst-run.C: New test. +--- + gcc/config/loongarch/lasx.md | 22 ++++++++- + gcc/config/loongarch/lsx.md | 22 ++++++++- + .../loongarch/vect-copysign-negconst-run.C | 47 +++++++++++++++++++ + .../loongarch/vect-copysign-negconst.C | 27 +++++++++++ + 4 files changed, 116 insertions(+), 2 deletions(-) + create mode 100644 gcc/testsuite/g++.target/loongarch/vect-copysign-negconst-run.C + create mode 100644 gcc/testsuite/g++.target/loongarch/vect-copysign-negconst.C + +diff --git a/gcc/config/loongarch/lasx.md b/gcc/config/loongarch/lasx.md +index f0f2dd08d..2e11f0612 100644 +--- a/gcc/config/loongarch/lasx.md ++++ b/gcc/config/loongarch/lasx.md +@@ -3136,11 +3136,31 @@ + (match_operand:FLASX 1 "register_operand"))) + (set (match_dup 5) + (and:FLASX (match_dup 3) +- (match_operand:FLASX 2 "register_operand"))) ++ (match_operand:FLASX 2 "reg_or_vector_same_val_operand"))) + (set (match_operand:FLASX 0 "register_operand") + (ior:FLASX (match_dup 4) (match_dup 5))) + "ISA_HAS_LASX" + { ++ /* copysign (x, -1) should instead be expanded as setting the sign ++ bit. */ ++ if (!REG_P (operands2)) ++ { ++ rtx op2_elt = unwrap_const_vec_duplicate (operands2); ++ if (GET_CODE (op2_elt) == CONST_DOUBLE ++ && real_isneg (CONST_DOUBLE_REAL_VALUE (op2_elt))) ++ { ++ rtx n = GEN_INT (8 * GET_MODE_SIZE (<UNITMODE>mode) - 1); ++ operands0 = lowpart_subreg (<VIMODE256>mode, operands0, ++ <MODE>mode); ++ operands1 = lowpart_subreg (<VIMODE256>mode, operands1, ++ <MODE>mode); ++ emit_insn (gen_lasx_xvbitseti_<lasxfmt> (operands0, ++ operands1, n)); ++ DONE; ++ } ++ } ++ ++ operands2 = force_reg (<MODE>mode, operands2); + operands3 = loongarch_build_signbit_mask (<MODE>mode, 1, 0); + + operands4 = gen_reg_rtx (<MODE>mode); +diff --git a/gcc/config/loongarch/lsx.md b/gcc/config/loongarch/lsx.md +index 55c7d79a0..8ea41c85b 100644 +--- a/gcc/config/loongarch/lsx.md ++++ b/gcc/config/loongarch/lsx.md +@@ -2873,11 +2873,31 @@ + (match_operand:FLSX 1 "register_operand"))) + (set (match_dup 5) + (and:FLSX (match_dup 3) +- (match_operand:FLSX 2 "register_operand"))) ++ (match_operand:FLSX 2 "reg_or_vector_same_val_operand"))) + (set (match_operand:FLSX 0 "register_operand") + (ior:FLSX (match_dup 4) (match_dup 5))) + "ISA_HAS_LSX" + { ++ /* copysign (x, -1) should instead be expanded as setting the sign ++ bit. */ ++ if (!REG_P (operands2)) ++ { ++ rtx op2_elt = unwrap_const_vec_duplicate (operands2); ++ if (GET_CODE (op2_elt) == CONST_DOUBLE ++ && real_isneg (CONST_DOUBLE_REAL_VALUE (op2_elt))) ++ { ++ rtx n = GEN_INT (8 * GET_MODE_SIZE (<UNITMODE>mode) - 1); ++ operands0 = lowpart_subreg (<VIMODE>mode, operands0, ++ <MODE>mode); ++ operands1 = lowpart_subreg (<VIMODE>mode, operands1, ++ <MODE>mode); ++ emit_insn (gen_lsx_vbitseti_<lsxfmt> (operands0, operands1, ++ n)); ++ DONE; ++ } ++ } ++ ++ operands2 = force_reg (<MODE>mode, operands2); + operands3 = loongarch_build_signbit_mask (<MODE>mode, 1, 0); + + operands4 = gen_reg_rtx (<MODE>mode); +diff --git a/gcc/testsuite/g++.target/loongarch/vect-copysign-negconst-run.C b/gcc/testsuite/g++.target/loongarch/vect-copysign-negconst-run.C +new file mode 100644 +index 000000000..d2d5d15c9 +--- /dev/null ++++ b/gcc/testsuite/g++.target/loongarch/vect-copysign-negconst-run.C +@@ -0,0 +1,47 @@ ++/* { dg-do run } */ ++/* { dg-options "-O2 -march=loongarch64 -mlasx -mno-strict-align" } */ ++/* { dg-require-effective-target loongarch_asx_hw } */ ++ ++#include "vect-copysign-negconst.C" ++ ++double d = {1.2, -3.4, -5.6, 7.8}; ++float f = {1.2, -3.4, -5.6, 7.8, -9.0, -11.4, 51.4, 1919.810}; ++ ++double _abs(double x) { return __builtin_fabs (x); } ++float _abs(float x) { return __builtin_fabsf (x); } ++ ++template <class T> ++void ++check (T *arr, T *orig, int len) ++{ ++ for (int i = 0; i < len; i++) ++ { ++ if (arri > 0) ++ __builtin_trap (); ++ if (_abs (arri) != _abs (origi)) ++ __builtin_trap (); ++ } ++} ++ ++int ++main() ++{ ++ double test_d4; ++ float test_f8; ++ ++ __builtin_memcpy (test_d, d, sizeof (test_d)); ++ force_negative<2> (test_d); ++ check (test_d, d, 2); ++ ++ __builtin_memcpy (test_d, d, sizeof (test_d)); ++ force_negative<4> (test_d); ++ check (test_d, d, 4); ++ ++ __builtin_memcpy (test_f, f, sizeof (test_f)); ++ force_negative<4> (test_f); ++ check (test_f, f, 4); ++ ++ __builtin_memcpy (test_f, f, sizeof (test_f)); ++ force_negative<8> (test_f); ++ check (test_f, f, 8); ++} +diff --git a/gcc/testsuite/g++.target/loongarch/vect-copysign-negconst.C b/gcc/testsuite/g++.target/loongarch/vect-copysign-negconst.C +new file mode 100644 +index 000000000..5e8820d2b +--- /dev/null ++++ b/gcc/testsuite/g++.target/loongarch/vect-copysign-negconst.C +@@ -0,0 +1,27 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -march=loongarch64 -mlasx -mno-strict-align" } */ ++/* { dg-final { scan-assembler "\txvbitseti.*63" } } */ ++/* { dg-final { scan-assembler "\txvbitseti.*31" } } */ ++/* { dg-final { scan-assembler "\tvbitseti.*63" } } */ ++/* { dg-final { scan-assembler "\tvbitseti.*31" } } */ ++ ++template <int N> ++__attribute__ ((noipa)) void ++force_negative (float *arr) ++{ ++ for (int i = 0; i < N; i++) ++ arri = __builtin_copysignf (arri, -2); ++} ++ ++template <int N> ++__attribute__ ((noipa)) void ++force_negative (double *arr) ++{ ++ for (int i = 0; i < N; i++) ++ arri = __builtin_copysign (arri, -3); ++} ++ ++template void force_negative<4>(float *); ++template void force_negative<8>(float *); ++template void force_negative<2>(double *); ++template void force_negative<4>(double *); +-- +2.43.0 +
View file
_service:tar_scm:0032-LoongArch-Add-code-generation-support-for-call36-fun.patch
Added
@@ -0,0 +1,561 @@ +From 5ab014701ddd9968855026f0e2ae1af2b165bcd7 Mon Sep 17 00:00:00 2001 +From: Lulu Cheng <chenglulu@loongson.cn> +Date: Thu, 16 Nov 2023 15:06:11 +0800 +Subject: PATCH 032/188 LoongArch: Add code generation support for call36 + function calls. + +When compiling with '-mcmodel=medium', the function call is made through +'pcaddu18i+jirl' if binutils supports call36, otherwise the +native implementation 'pcalau12i+jirl' is used. + +gcc/ChangeLog: + + * config.in: Regenerate. + * config/loongarch/loongarch-opts.h (HAVE_AS_SUPPORT_CALL36): Define macro. + * config/loongarch/loongarch.cc (loongarch_legitimize_call_address): + If binutils supports call36, the function call is not split over expand. + * config/loongarch/loongarch.md: Add call36 generation code. + * config/loongarch/predicates.md: Likewise. + * configure: Regenerate. + * configure.ac: Check whether binutils supports call36. + +gcc/testsuite/ChangeLog: + + * gcc.target/loongarch/func-call-medium-5.c: If the assembler supports call36, + the test is abandoned. + * gcc.target/loongarch/func-call-medium-6.c: Likewise. + * gcc.target/loongarch/func-call-medium-7.c: Likewise. + * gcc.target/loongarch/func-call-medium-8.c: Likewise. + * lib/target-supports.exp: Added a function to see if the assembler supports + the call36 relocation. + * gcc.target/loongarch/func-call-medium-call36-1.c: New test. + * gcc.target/loongarch/func-call-medium-call36.c: New test. + +Co-authored-by: Xi Ruoyao <xry111@xry111.site> +--- + gcc/config.in | 6 + + gcc/config/loongarch/loongarch-opts.h | 4 + + gcc/config/loongarch/loongarch.cc | 12 +- + gcc/config/loongarch/loongarch.md | 171 +++++++++++++++--- + gcc/config/loongarch/predicates.md | 7 +- + gcc/configure | 32 ++++ + gcc/configure.ac | 6 + + .../gcc.target/loongarch/func-call-medium-5.c | 1 + + .../gcc.target/loongarch/func-call-medium-6.c | 1 + + .../gcc.target/loongarch/func-call-medium-7.c | 1 + + .../gcc.target/loongarch/func-call-medium-8.c | 1 + + .../loongarch/func-call-medium-call36-1.c | 21 +++ + .../loongarch/func-call-medium-call36.c | 32 ++++ + gcc/testsuite/lib/target-supports.exp | 9 + + 14 files changed, 268 insertions(+), 36 deletions(-) + create mode 100644 gcc/testsuite/gcc.target/loongarch/func-call-medium-call36-1.c + create mode 100644 gcc/testsuite/gcc.target/loongarch/func-call-medium-call36.c + +diff --git a/gcc/config.in b/gcc/config.in +index 04968b53c..033cfb98b 100644 +--- a/gcc/config.in ++++ b/gcc/config.in +@@ -759,6 +759,12 @@ + #endif + + ++/* Define if your assembler supports call36 relocation. */ ++#ifndef USED_FOR_TARGET ++#undef HAVE_AS_SUPPORT_CALL36 ++#endif ++ ++ + /* Define if your assembler and linker support thread-local storage. */ + #ifndef USED_FOR_TARGET + #undef HAVE_AS_TLS +diff --git a/gcc/config/loongarch/loongarch-opts.h b/gcc/config/loongarch/loongarch-opts.h +index dfbe9dd5c..22ce1a122 100644 +--- a/gcc/config/loongarch/loongarch-opts.h ++++ b/gcc/config/loongarch/loongarch-opts.h +@@ -99,6 +99,10 @@ loongarch_update_gcc_opt_status (struct loongarch_target *target, + #define HAVE_AS_EXPLICIT_RELOCS 0 + #endif + ++#ifndef HAVE_AS_SUPPORT_CALL36 ++#define HAVE_AS_SUPPORT_CALL36 0 ++#endif ++ + #ifndef HAVE_AS_MRELAX_OPTION + #define HAVE_AS_MRELAX_OPTION 0 + #endif +diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc +index b6f0d61ef..43f0e82ba 100644 +--- a/gcc/config/loongarch/loongarch.cc ++++ b/gcc/config/loongarch/loongarch.cc +@@ -3002,12 +3002,16 @@ loongarch_legitimize_call_address (rtx addr) + + enum loongarch_symbol_type symbol_type = loongarch_classify_symbol (addr); + +- /* Split function call insn 'bl sym' or 'bl %plt(sym)' to : +- pcalau12i $rd, %pc_hi20(sym) +- jr $rd, %pc_lo12(sym). */ ++ /* If add the compilation option '-cmodel=medium', and the assembler does ++ not support call36. The following sequence of instructions will be ++ used for the function call: ++ pcalau12i $rd, %pc_hi20(sym) ++ jr $rd, %pc_lo12(sym) ++ */ + + if (TARGET_CMODEL_MEDIUM +- && TARGET_EXPLICIT_RELOCS ++ && !HAVE_AS_SUPPORT_CALL36 ++ && (la_opt_explicit_relocs != EXPLICIT_RELOCS_NONE) + && (SYMBOL_REF_P (addr) || LABEL_REF_P (addr)) + && (symbol_type == SYMBOL_PCREL + || (symbol_type == SYMBOL_GOT_DISP && flag_plt))) +diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md +index ed86c95bd..52e40a208 100644 +--- a/gcc/config/loongarch/loongarch.md ++++ b/gcc/config/loongarch/loongarch.md +@@ -3274,7 +3274,13 @@ + XEXP (target, 1), + operands1)); + else +- emit_call_insn (gen_sibcall_internal (target, operands1)); ++ { ++ rtx call = emit_call_insn (gen_sibcall_internal (target, operands1)); ++ ++ if (TARGET_CMODEL_MEDIUM && !REG_P (target)) ++ clobber_reg (&CALL_INSN_FUNCTION_USAGE (call), ++ gen_rtx_REG (Pmode, T0_REGNUM)); ++ } + DONE; + }) + +@@ -3282,10 +3288,25 @@ + (call (mem:SI (match_operand 0 "call_insn_operand" "j,c,b")) + (match_operand 1 "" "")) + "SIBLING_CALL_P (insn)" +- "@ +- jr\t%0 +- b\t%0 +- b\t%%plt(%0)" ++{ ++ switch (which_alternative) ++ { ++ case 0: ++ return "jr\t%0"; ++ case 1: ++ if (TARGET_CMODEL_MEDIUM) ++ return "pcaddu18i\t$r12,%%call36(%0)\n\tjirl\t$r0,$r12,0"; ++ else ++ return "b\t%0"; ++ case 2: ++ if (TARGET_CMODEL_MEDIUM) ++ return "pcaddu18i\t$r12,%%call36(%0)\n\tjirl\t$r0,$r12,0"; ++ else ++ return "b\t%%plt(%0)"; ++ default: ++ gcc_unreachable (); ++ } ++} + (set_attr "jirl" "indirect,direct,direct")) + + (define_insn "@sibcall_internal_1<mode>" +@@ -3318,9 +3339,17 @@ + operands2, + arg2)); + else +- emit_call_insn (gen_sibcall_value_multiple_internal (arg1, target, +- operands2, +- arg2)); ++ { ++ rtx call ++ = emit_call_insn (gen_sibcall_value_multiple_internal (arg1, ++ target, ++ operands2, ++ arg2)); ++ ++ if (TARGET_CMODEL_MEDIUM && !REG_P (target)) ++ clobber_reg (&CALL_INSN_FUNCTION_USAGE (call), ++ gen_rtx_REG (Pmode, T0_REGNUM)); ++ } + } + else + { +@@ -3334,8 +3363,15 @@ + XEXP (target, 1), + operands2)); + else +- emit_call_insn (gen_sibcall_value_internal (operands0, target, +- operands2)); ++ { ++ rtx call = emit_call_insn (gen_sibcall_value_internal (operands0, ++ target, ++ operands2)); ++ ++ if (TARGET_CMODEL_MEDIUM && !REG_P (target)) ++ clobber_reg (&CALL_INSN_FUNCTION_USAGE (call), ++ gen_rtx_REG (Pmode, T0_REGNUM)); ++ } + } + DONE; + }) +@@ -3345,10 +3381,25 @@ + (call (mem:SI (match_operand 1 "call_insn_operand" "j,c,b")) + (match_operand 2 "" ""))) + "SIBLING_CALL_P (insn)" +- "@ +- jr\t%1 +- b\t%1 +- b\t%%plt(%1)" ++{ ++ switch (which_alternative) ++ { ++ case 0: ++ return "jr\t%1"; ++ case 1: ++ if (TARGET_CMODEL_MEDIUM) ++ return "pcaddu18i\t$r12,%%call36(%1)\n\tjirl\t$r0,$r12,0"; ++ else ++ return "b\t%1"; ++ case 2: ++ if (TARGET_CMODEL_MEDIUM) ++ return "pcaddu18i\t$r12,%%call36(%1)\n\tjirl\t$r0,$r12,0"; ++ else ++ return "b\t%%plt(%1)"; ++ default: ++ gcc_unreachable (); ++ } ++} + (set_attr "jirl" "indirect,direct,direct")) + + (define_insn "@sibcall_value_internal_1<mode>" +@@ -3368,10 +3419,25 @@ + (call (mem:SI (match_dup 1)) + (match_dup 2))) + "SIBLING_CALL_P (insn)" +- "@ +- jr\t%1 +- b\t%1 +- b\t%%plt(%1)" ++{ ++ switch (which_alternative) ++ { ++ case 0: ++ return "jr\t%1"; ++ case 1: ++ if (TARGET_CMODEL_MEDIUM) ++ return "pcaddu18i\t$r12,%%call36(%1)\n\tjirl\t$r0,$r12,0"; ++ else ++ return "b\t%1"; ++ case 2: ++ if (TARGET_CMODEL_MEDIUM) ++ return "pcaddu18i\t$r12,%%call36(%1)\n\tjirl\t$r0,$r12,0"; ++ else ++ return "b\t%%plt(%1)"; ++ default: ++ gcc_unreachable (); ++ } ++} + (set_attr "jirl" "indirect,direct,direct")) + + (define_insn "@sibcall_value_multiple_internal_1<mode>" +@@ -3411,10 +3477,25 @@ + (match_operand 1 "" "")) + (clobber (reg:SI RETURN_ADDR_REGNUM)) + "" +- "@ +- jirl\t$r1,%0,0 +- bl\t%0 +- bl\t%%plt(%0)" ++{ ++ switch (which_alternative) ++ { ++ case 0: ++ return "jirl\t$r1,%0,0"; ++ case 1: ++ if (TARGET_CMODEL_MEDIUM) ++ return "pcaddu18i\t$r1,%%call36(%0)\n\tjirl\t$r1,$r1,0"; ++ else ++ return "bl\t%0"; ++ case 2: ++ if (TARGET_CMODEL_MEDIUM) ++ return "pcaddu18i\t$r1,%%call36(%0)\n\tjirl\t$r1,$r1,0"; ++ else ++ return "bl\t%%plt(%0)"; ++ default: ++ gcc_unreachable (); ++ } ++} + (set_attr "jirl" "indirect,direct,direct")) + + (define_insn "@call_internal_1<mode>" +@@ -3473,10 +3554,25 @@ + (match_operand 2 "" ""))) + (clobber (reg:SI RETURN_ADDR_REGNUM)) + "" +- "@ +- jirl\t$r1,%1,0 +- bl\t%1 +- bl\t%%plt(%1)" ++{ ++ switch (which_alternative) ++ { ++ case 0: ++ return "jirl\t$r1,%1,0"; ++ case 1: ++ if (TARGET_CMODEL_MEDIUM) ++ return "pcaddu18i\t$r1,%%call36(%1)\n\tjirl\t$r1,$r1,0"; ++ else ++ return "bl\t%1"; ++ case 2: ++ if (TARGET_CMODEL_MEDIUM) ++ return "pcaddu18i\t$r1,%%call36(%1)\n\tjirl\t$r1,$r1,0"; ++ else ++ return "bl\t%%plt(%1)"; ++ default: ++ gcc_unreachable (); ++ } ++} + (set_attr "jirl" "indirect,direct,direct")) + + (define_insn "@call_value_internal_1<mode>" +@@ -3498,10 +3594,25 @@ + (match_dup 2))) + (clobber (reg:SI RETURN_ADDR_REGNUM)) + "" +- "@ +- jirl\t$r1,%1,0 +- bl\t%1 +- bl\t%%plt(%1)" ++{ ++ switch (which_alternative) ++ { ++ case 0: ++ return "jirl\t$r1,%1,0"; ++ case 1: ++ if (TARGET_CMODEL_MEDIUM) ++ return "pcaddu18i\t$r1,%%call36(%1)\n\tjirl\t$r1,$r1,0"; ++ else ++ return "bl\t%1"; ++ case 2: ++ if (TARGET_CMODEL_MEDIUM) ++ return "pcaddu18i\t$r1,%%call36(%1)\n\tjirl\t$r1,$r1,0"; ++ else ++ return "bl\t%%plt(%1)"; ++ default: ++ gcc_unreachable (); ++ } ++} + (set_attr "jirl" "indirect,direct,direct")) + + (define_insn "@call_value_multiple_internal_1<mode>" +diff --git a/gcc/config/loongarch/predicates.md b/gcc/config/loongarch/predicates.md +index 1d669f560..2aae87db4 100644 +--- a/gcc/config/loongarch/predicates.md ++++ b/gcc/config/loongarch/predicates.md +@@ -443,7 +443,9 @@ + { + case SYMBOL_PCREL: + if (TARGET_CMODEL_EXTREME +- || (TARGET_CMODEL_MEDIUM && !TARGET_EXPLICIT_RELOCS)) ++ || (TARGET_CMODEL_MEDIUM ++ && HAVE_AS_SUPPORT_CALL36 ++ && (la_opt_explicit_relocs == EXPLICIT_RELOCS_NONE))) + return false; + else + return 1; +@@ -452,7 +454,8 @@ + if (TARGET_CMODEL_EXTREME + || !flag_plt + || (flag_plt && TARGET_CMODEL_MEDIUM +- && !TARGET_EXPLICIT_RELOCS)) ++ && HAVE_AS_SUPPORT_CALL36 ++ && (la_opt_explicit_relocs == EXPLICIT_RELOCS_NONE))) + return false; + else + return 1; +diff --git a/gcc/configure b/gcc/configure +index 09bacfec3..5842e7a18 100755 +--- a/gcc/configure ++++ b/gcc/configure +@@ -28836,6 +28836,38 @@ if test $gcc_cv_as_loongarch_explicit_relocs = yes; then + + $as_echo "#define HAVE_AS_EXPLICIT_RELOCS 1" >>confdefs.h + ++fi ++ ++ { $as_echo "$as_me:${as_lineno-$LINENO}: checking assembler for call36 relocation support" >&5 ++$as_echo_n "checking assembler for call36 relocation support... " >&6; } ++if ${gcc_cv_as_loongarch_call36+:} false; then : ++ $as_echo_n "(cached) " >&6 ++else ++ gcc_cv_as_loongarch_call36=no ++ if test x$gcc_cv_as != x; then ++ $as_echo 'pcaddu18i $r1, %call36(a) ++ jirl $r1, $r1, 0' > conftest.s ++ if { ac_try='$gcc_cv_as $gcc_cv_as_flags -o conftest.o conftest.s >&5' ++ { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5 ++ (eval $ac_try) 2>&5 ++ ac_status=$? ++ $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 ++ test $ac_status = 0; }; } ++ then ++ gcc_cv_as_loongarch_call36=yes ++ else ++ echo "configure: failed program was" >&5 ++ cat conftest.s >&5 ++ fi ++ rm -f conftest.o conftest.s ++ fi ++fi ++{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $gcc_cv_as_loongarch_call36" >&5 ++$as_echo "$gcc_cv_as_loongarch_call36" >&6; } ++if test $gcc_cv_as_loongarch_call36 = yes; then ++ ++$as_echo "#define HAVE_AS_SUPPORT_CALL36 1" >>confdefs.h ++ + fi + + { $as_echo "$as_me:${as_lineno-$LINENO}: checking assembler for eh_frame pcrel encoding support" >&5 +diff --git a/gcc/configure.ac b/gcc/configure.ac +index a0999152e..9c3fd3ad6 100644 +--- a/gcc/configure.ac ++++ b/gcc/configure.ac +@@ -5329,6 +5329,12 @@ x: + a:pcalau12i $t0,%pc_hi20(a),, + AC_DEFINE(HAVE_AS_EXPLICIT_RELOCS, 1, + Define if your assembler supports explicit relocation.)) ++ gcc_GAS_CHECK_FEATURE(call36 relocation support, ++ gcc_cv_as_loongarch_call36,, ++ pcaddu18i $r1, %call36(a) ++ jirl $r1, $r1, 0,, ++ AC_DEFINE(HAVE_AS_SUPPORT_CALL36, 1, ++ Define if your assembler supports call36 relocation.)) + gcc_GAS_CHECK_FEATURE(eh_frame pcrel encoding support, + gcc_cv_as_loongarch_eh_frame_pcrel_encoding_support,, + .cfi_startproc +diff --git a/gcc/testsuite/gcc.target/loongarch/func-call-medium-5.c b/gcc/testsuite/gcc.target/loongarch/func-call-medium-5.c +index 8a47b5afc..cae880bd8 100644 +--- a/gcc/testsuite/gcc.target/loongarch/func-call-medium-5.c ++++ b/gcc/testsuite/gcc.target/loongarch/func-call-medium-5.c +@@ -1,4 +1,5 @@ + /* { dg-do compile } */ ++/* { dg-skip-if "dg-require-effective-target loongarch_call36_support" { *-*-* } } */ + /* { dg-options "-mabi=lp64d -O0 -fpic -fplt -mexplicit-relocs -mcmodel=medium" } */ + /* { dg-final { scan-assembler "test:.*pcalau12i.*%pc_hi20\\(g\\)\n\tjirl.*pc_lo12\\(g\\)" } } */ + /* { dg-final { scan-assembler "test1:.*pcalau12i.*%pc_hi20\\(f\\)\n\tjirl.*%pc_lo12\\(f\\)" } } */ +diff --git a/gcc/testsuite/gcc.target/loongarch/func-call-medium-6.c b/gcc/testsuite/gcc.target/loongarch/func-call-medium-6.c +index 1e75e60e0..33819542d 100644 +--- a/gcc/testsuite/gcc.target/loongarch/func-call-medium-6.c ++++ b/gcc/testsuite/gcc.target/loongarch/func-call-medium-6.c +@@ -1,4 +1,5 @@ + /* { dg-do compile } */ ++/* { dg-skip-if "dg-require-effective-target loongarch_call36_support" { *-*-* } } */ + /* { dg-options "-mabi=lp64d -O0 -fno-pic -fplt -mexplicit-relocs -mcmodel=medium" } */ + /* { dg-final { scan-assembler "test:.*pcalau12i.*%pc_hi20\\(g\\)\n\tjirl.*pc_lo12\\(g\\)" } } */ + /* { dg-final { scan-assembler "test1:.*pcalau12i.*%pc_hi20\\(f\\)\n\tjirl.*%pc_lo12\\(f\\)" } } */ +diff --git a/gcc/testsuite/gcc.target/loongarch/func-call-medium-7.c b/gcc/testsuite/gcc.target/loongarch/func-call-medium-7.c +index 9e89085ca..969b59d04 100644 +--- a/gcc/testsuite/gcc.target/loongarch/func-call-medium-7.c ++++ b/gcc/testsuite/gcc.target/loongarch/func-call-medium-7.c +@@ -1,4 +1,5 @@ + /* { dg-do compile } */ ++/* { dg-skip-if "dg-require-effective-target loongarch_call36_support" { *-*-* } } */ + /* { dg-options "-mabi=lp64d -O0 -fpic -fno-plt -mexplicit-relocs -mcmodel=medium" } */ + /* { dg-final { scan-assembler "test:.*pcalau12i\t.*%got_pc_hi20\\(g\\)\n\tld\.d\t.*%got_pc_lo12\\(g\\)\n\tjirl" } } */ + /* { dg-final { scan-assembler "test1:.*pcalau12i\t.*%got_pc_hi20\\(f\\)\n\tld\.d\t.*%got_pc_lo12\\(f\\)\n\tjirl" } } */ +diff --git a/gcc/testsuite/gcc.target/loongarch/func-call-medium-8.c b/gcc/testsuite/gcc.target/loongarch/func-call-medium-8.c +index fde9c6e0e..786ff395f 100644 +--- a/gcc/testsuite/gcc.target/loongarch/func-call-medium-8.c ++++ b/gcc/testsuite/gcc.target/loongarch/func-call-medium-8.c +@@ -1,4 +1,5 @@ + /* { dg-do compile } */ ++/* { dg-skip-if "dg-require-effective-target loongarch_call36_support" { *-*-* } } */ + /* { dg-options "-mabi=lp64d -O0 -fno-pic -fno-plt -mexplicit-relocs -mcmodel=medium" } */ + /* { dg-final { scan-assembler "test:.*pcalau12i\t.*%got_pc_hi20\\(g\\)\n\tld\.d\t.*%got_pc_lo12\\(g\\)\n\tjirl" } } */ + /* { dg-final { scan-assembler "test1:.*pcalau12i\t.*%pc_hi20\\(f\\)\n\tjirl.*%pc_lo12\\(f\\)" } } */ +diff --git a/gcc/testsuite/gcc.target/loongarch/func-call-medium-call36-1.c b/gcc/testsuite/gcc.target/loongarch/func-call-medium-call36-1.c +new file mode 100644 +index 000000000..872ff32f8 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/func-call-medium-call36-1.c +@@ -0,0 +1,21 @@ ++/* { dg-do compile } */ ++/* { dg-require-effective-target loongarch_call36_support } */ ++/* { dg-options "-mcmodel=medium -mexplicit-relocs -fdump-rtl-final -O2" } */ ++/* { dg-final { scan-assembler "test:.*pcaddu18i\t\\\$r1,%call36\\(func\\)" } } */ ++/* { dg-final { scan-assembler "test_value:.*pcaddu18i\t\\\$r1,%call36\\(func_value\\)" } } */ ++ ++extern void func (void); ++int ++test (void) ++{ ++ func (); ++} ++ ++ ++extern int func_value (void); ++float ++test_value (void) ++{ ++ func_value (); ++} ++ +diff --git a/gcc/testsuite/gcc.target/loongarch/func-call-medium-call36.c b/gcc/testsuite/gcc.target/loongarch/func-call-medium-call36.c +new file mode 100644 +index 000000000..98ccd260d +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/func-call-medium-call36.c +@@ -0,0 +1,32 @@ ++/* { dg-do compile } */ ++/* { dg-require-effective-target loongarch_call36_support } */ ++/* { dg-options "-mcmodel=medium -mexplicit-relocs -fdump-rtl-final -O2" } */ ++/* { dg-final { scan-rtl-dump-times "\\(clobber \\(reg:DI 12 \\\$r12\\)\\)" 3 "final" } } */ ++/* { dg-final { scan-assembler "test:.*pcaddu18i\t\\\$r12,%call36\\(func\\)" } } */ ++/* { dg-final { scan-assembler "test_value:.*pcaddu18i\t\\\$r12,%call36\\(func_value\\)" } } */ ++/* { dg-final { scan-assembler "test_multi:.*pcaddu18i\t\\\$r12,%call36\\(func_multi\\)" } } */ ++ ++extern void func (void); ++void ++test (void) ++{ ++ func(); ++} ++ ++ ++extern int func_value (void); ++int ++test_value (void) ++{ ++ func_value (); ++} ++ ++struct t {float a; float b;}; ++ ++extern struct t func_multi (void); ++struct t ++test_multi (void) ++{ ++ func_multi (); ++} ++ +diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp +index bbe145c1c..b8bff1a31 100644 +--- a/gcc/testsuite/lib/target-supports.exp ++++ b/gcc/testsuite/lib/target-supports.exp +@@ -10573,6 +10573,15 @@ proc check_effective_target_loongarch_asx_hw { } { + } "-mlasx" + } + ++# Check whether LoongArch binutils supports call36 relocation. ++proc check_effective_target_loongarch_call36_support { } { ++ return check_no_compiler_messages loongarch_call36_support object { ++/* Assembly code */ ++ pcaddu18i $r1,%call36(a) ++ jirl $r1,$r1,0 ++ } "" ++} ++ + # Return 1 if the target does *not* require strict alignment. + + proc check_effective_target_non_strict_align {} { +-- +2.43.0 +
View file
_service:tar_scm:0033-LoongArch-Implement-atomic-operations-using-LoongArc.patch
Added
@@ -0,0 +1,362 @@ +From 704e67084fcd7f3ea89321e17dfafa7e907c907c Mon Sep 17 00:00:00 2001 +From: Lulu Cheng <chenglulu@loongson.cn> +Date: Fri, 17 Nov 2023 15:42:53 +0800 +Subject: PATCH 033/188 LoongArch: Implement atomic operations using + LoongArch1.1 instructions. + +1. short and char type calls for atomic_add_fetch and __atomic_fetch_add are + implemented using amadd{_db}.{b/h}. +2. Use amcas{_db}.{b/h/w/d} to implement __atomic_compare_exchange_n and __atomic_compare_exchange. +3. The short and char types of the functions __atomic_exchange and __atomic_exchange_n are + implemented using amswap{_db}.{b/h}. + +gcc/ChangeLog: + + * config/loongarch/loongarch-def.h: Add comments. + * config/loongarch/loongarch-opts.h (ISA_BASE_IS_LA64V110): Define macro. + * config/loongarch/loongarch.cc (loongarch_memmodel_needs_rel_acq_fence): + Remove redundant code implementations. + * config/loongarch/sync.md (d): Added QI, HI support. + (atomic_add<mode>): New template. + (atomic_exchange<mode>_short): Likewise. + (atomic_cas_value_strong<mode>_amcas): Likewise.. + (atomic_fetch_add<mode>_short): Likewise. +--- + gcc/config/loongarch/loongarch-def.h | 2 + + gcc/config/loongarch/loongarch-opts.h | 2 +- + gcc/config/loongarch/loongarch.cc | 6 +- + gcc/config/loongarch/sync.md | 186 ++++++++++++++++++++------ + 4 files changed, 147 insertions(+), 49 deletions(-) + +diff --git a/gcc/config/loongarch/loongarch-def.h b/gcc/config/loongarch/loongarch-def.h +index 4757de14b..078d8607d 100644 +--- a/gcc/config/loongarch/loongarch-def.h ++++ b/gcc/config/loongarch/loongarch-def.h +@@ -54,7 +54,9 @@ extern "C" { + + /* enum isa_base */ + extern const char* loongarch_isa_base_strings; ++/* LoongArch V1.00. */ + #define ISA_BASE_LA64V100 0 ++/* LoongArch V1.10. */ + #define ISA_BASE_LA64V110 1 + #define N_ISA_BASE_TYPES 2 + +diff --git a/gcc/config/loongarch/loongarch-opts.h b/gcc/config/loongarch/loongarch-opts.h +index 22ce1a122..9b3d023ac 100644 +--- a/gcc/config/loongarch/loongarch-opts.h ++++ b/gcc/config/loongarch/loongarch-opts.h +@@ -86,10 +86,10 @@ loongarch_update_gcc_opt_status (struct loongarch_target *target, + || la_target.isa.simd == ISA_EXT_SIMD_LASX) + #define ISA_HAS_LASX (la_target.isa.simd == ISA_EXT_SIMD_LASX) + +- + /* TARGET_ macros for use in *.md template conditionals */ + #define TARGET_uARCH_LA464 (la_target.cpu_tune == CPU_LA464) + #define TARGET_uARCH_LA664 (la_target.cpu_tune == CPU_LA664) ++#define ISA_BASE_IS_LA64V110 (la_target.isa.base == ISA_BASE_LA64V110) + + /* Note: optimize_size may vary across functions, + while -mno-memcpy imposes a global constraint. */ +diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc +index 43f0e82ba..7bb46a45d 100644 +--- a/gcc/config/loongarch/loongarch.cc ++++ b/gcc/config/loongarch/loongarch.cc +@@ -5813,16 +5813,12 @@ loongarch_print_operand_punct_valid_p (unsigned char code) + static bool + loongarch_memmodel_needs_rel_acq_fence (enum memmodel model) + { +- switch (model) ++ switch (memmodel_base (model)) + { + case MEMMODEL_ACQ_REL: + case MEMMODEL_SEQ_CST: +- case MEMMODEL_SYNC_SEQ_CST: + case MEMMODEL_RELEASE: +- case MEMMODEL_SYNC_RELEASE: + case MEMMODEL_ACQUIRE: +- case MEMMODEL_CONSUME: +- case MEMMODEL_SYNC_ACQUIRE: + return true; + + case MEMMODEL_RELAXED: +diff --git a/gcc/config/loongarch/sync.md b/gcc/config/loongarch/sync.md +index dd1f98946..1eabaec04 100644 +--- a/gcc/config/loongarch/sync.md ++++ b/gcc/config/loongarch/sync.md +@@ -38,7 +38,7 @@ + (plus "add") (ior "or") (xor "xor") (and "and")) + + ;; This attribute gives the format suffix for atomic memory operations. +-(define_mode_attr amo (SI "w") (DI "d")) ++(define_mode_attr amo (QI "b") (HI "h") (SI "w") (DI "d")) + + ;; <amop> expands to the name of the atomic operand that implements a + ;; particular code. +@@ -123,7 +123,18 @@ + UNSPEC_SYNC_OLD_OP)) + "" + "am<amop>%A2.<amo>\t$zero,%z1,%0" +- (set (attr "length") (const_int 8))) ++ (set (attr "length") (const_int 4))) ++ ++(define_insn "atomic_add<mode>" ++ (set (match_operand:SHORT 0 "memory_operand" "+ZB") ++ (unspec_volatile:SHORT ++ (plus:SHORT (match_dup 0) ++ (match_operand:SHORT 1 "reg_or_0_operand" "rJ")) ++ (match_operand:SI 2 "const_int_operand") ;; model ++ UNSPEC_SYNC_OLD_OP)) ++ "ISA_BASE_IS_LA64V110" ++ "amadd%A2.<amo>\t$zero,%z1,%0" ++ (set (attr "length") (const_int 4))) + + (define_insn "atomic_fetch_<atomic_optab><mode>" + (set (match_operand:GPR 0 "register_operand" "=&r") +@@ -131,12 +142,12 @@ + (set (match_dup 1) + (unspec_volatile:GPR + (any_atomic:GPR (match_dup 1) +- (match_operand:GPR 2 "reg_or_0_operand" "rJ")) ++ (match_operand:GPR 2 "reg_or_0_operand" "rJ")) + (match_operand:SI 3 "const_int_operand") ;; model + UNSPEC_SYNC_OLD_OP)) + "" + "am<amop>%A3.<amo>\t%0,%z2,%1" +- (set (attr "length") (const_int 8))) ++ (set (attr "length") (const_int 4))) + + (define_insn "atomic_exchange<mode>" + (set (match_operand:GPR 0 "register_operand" "=&r") +@@ -148,7 +159,19 @@ + (match_operand:GPR 2 "register_operand" "r")) + "" + "amswap%A3.<amo>\t%0,%z2,%1" +- (set (attr "length") (const_int 8))) ++ (set (attr "length") (const_int 4))) ++ ++(define_insn "atomic_exchange<mode>_short" ++ (set (match_operand:SHORT 0 "register_operand" "=&r") ++ (unspec_volatile:SHORT ++ (match_operand:SHORT 1 "memory_operand" "+ZB") ++ (match_operand:SI 3 "const_int_operand") ;; model ++ UNSPEC_SYNC_EXCHANGE)) ++ (set (match_dup 1) ++ (match_operand:SHORT 2 "register_operand" "r")) ++ "ISA_BASE_IS_LA64V110" ++ "amswap%A3.<amo>\t%0,%z2,%1" ++ (set (attr "length") (const_int 4))) + + (define_insn "atomic_cas_value_strong<mode>" + (set (match_operand:GPR 0 "register_operand" "=&r") +@@ -156,25 +179,36 @@ + (set (match_dup 1) + (unspec_volatile:GPR (match_operand:GPR 2 "reg_or_0_operand" "rJ") + (match_operand:GPR 3 "reg_or_0_operand" "rJ") +- (match_operand:SI 4 "const_int_operand") ;; mod_s +- (match_operand:SI 5 "const_int_operand") ;; mod_f ++ (match_operand:SI 4 "const_int_operand") ;; mod_s + UNSPEC_COMPARE_AND_SWAP)) +- (clobber (match_scratch:GPR 6 "=&r")) ++ (clobber (match_scratch:GPR 5 "=&r")) + "" + { + return "1:\\n\\t" + "ll.<amo>\\t%0,%1\\n\\t" + "bne\\t%0,%z2,2f\\n\\t" +- "or%i3\\t%6,$zero,%3\\n\\t" +- "sc.<amo>\\t%6,%1\\n\\t" +- "beqz\\t%6,1b\\n\\t" ++ "or%i3\\t%5,$zero,%3\\n\\t" ++ "sc.<amo>\\t%5,%1\\n\\t" ++ "beqz\\t%5,1b\\n\\t" + "b\\t3f\\n\\t" + "2:\\n\\t" +- "%G5\\n\\t" ++ "%G4\\n\\t" + "3:\\n\\t"; + } + (set (attr "length") (const_int 28))) + ++(define_insn "atomic_cas_value_strong<mode>_amcas" ++ (set (match_operand:QHWD 0 "register_operand" "=&r") ++ (match_operand:QHWD 1 "memory_operand" "+ZB")) ++ (set (match_dup 1) ++ (unspec_volatile:QHWD (match_operand:QHWD 2 "reg_or_0_operand" "rJ") ++ (match_operand:QHWD 3 "reg_or_0_operand" "rJ") ++ (match_operand:SI 4 "const_int_operand") ;; mod_s ++ UNSPEC_COMPARE_AND_SWAP)) ++ "ISA_BASE_IS_LA64V110" ++ "ori\t%0,%z2,0\n\tamcas%A4.<amo>\t%0,%z3,%1" ++ (set (attr "length") (const_int 8))) ++ + (define_expand "atomic_compare_and_swap<mode>" + (match_operand:SI 0 "register_operand" "") ;; bool output + (match_operand:GPR 1 "register_operand" "") ;; val output +@@ -186,9 +220,29 @@ + (match_operand:SI 7 "const_int_operand" "") ;; mod_f + "" + { +- emit_insn (gen_atomic_cas_value_strong<mode> (operands1, operands2, +- operands3, operands4, +- operands6, operands7)); ++ rtx mod_s, mod_f; ++ ++ mod_s = operands6; ++ mod_f = operands7; ++ ++ /* Normally the succ memory model must be stronger than fail, but in the ++ unlikely event of fail being ACQUIRE and succ being RELEASE we need to ++ promote succ to ACQ_REL so that we don't lose the acquire semantics. */ ++ ++ if (is_mm_acquire (memmodel_base (INTVAL (mod_f))) ++ && is_mm_release (memmodel_base (INTVAL (mod_s)))) ++ mod_s = GEN_INT (MEMMODEL_ACQ_REL); ++ ++ operands6 = mod_s; ++ ++ if (ISA_BASE_IS_LA64V110) ++ emit_insn (gen_atomic_cas_value_strong<mode>_amcas (operands1, operands2, ++ operands3, operands4, ++ operands6)); ++ else ++ emit_insn (gen_atomic_cas_value_strong<mode> (operands1, operands2, ++ operands3, operands4, ++ operands6)); + + rtx compare = operands1; + if (operands3 != const0_rtx) +@@ -292,31 +346,53 @@ + (match_operand:SI 7 "const_int_operand" "") ;; mod_f + "" + { +- union loongarch_gen_fn_ptrs generator; +- generator.fn_7 = gen_atomic_cas_value_cmp_and_7_si; +- loongarch_expand_atomic_qihi (generator, operands1, operands2, +- operands3, operands4, operands7); ++ rtx mod_s, mod_f; + +- rtx compare = operands1; +- if (operands3 != const0_rtx) +- { +- machine_mode mode = GET_MODE (operands3); +- rtx op1 = convert_modes (SImode, mode, operands1, true); +- rtx op3 = convert_modes (SImode, mode, operands3, true); +- rtx difference = gen_rtx_MINUS (SImode, op1, op3); +- compare = gen_reg_rtx (SImode); +- emit_insn (gen_rtx_SET (compare, difference)); +- } ++ mod_s = operands6; ++ mod_f = operands7; + +- if (word_mode != <MODE>mode) ++ /* Normally the succ memory model must be stronger than fail, but in the ++ unlikely event of fail being ACQUIRE and succ being RELEASE we need to ++ promote succ to ACQ_REL so that we don't lose the acquire semantics. */ ++ ++ if (is_mm_acquire (memmodel_base (INTVAL (mod_f))) ++ && is_mm_release (memmodel_base (INTVAL (mod_s)))) ++ mod_s = GEN_INT (MEMMODEL_ACQ_REL); ++ ++ operands6 = mod_s; ++ ++ if (ISA_BASE_IS_LA64V110) ++ emit_insn (gen_atomic_cas_value_strong<mode>_amcas (operands1, operands2, ++ operands3, operands4, ++ operands6)); ++ else + { +- rtx reg = gen_reg_rtx (word_mode); +- emit_insn (gen_rtx_SET (reg, gen_rtx_SIGN_EXTEND (word_mode, compare))); +- compare = reg; ++ union loongarch_gen_fn_ptrs generator; ++ generator.fn_7 = gen_atomic_cas_value_cmp_and_7_si; ++ loongarch_expand_atomic_qihi (generator, operands1, operands2, ++ operands3, operands4, operands6); + } + +- emit_insn (gen_rtx_SET (operands0, +- gen_rtx_EQ (SImode, compare, const0_rtx))); ++ rtx compare = operands1; ++ if (operands3 != const0_rtx) ++ { ++ machine_mode mode = GET_MODE (operands3); ++ rtx op1 = convert_modes (SImode, mode, operands1, true); ++ rtx op3 = convert_modes (SImode, mode, operands3, true); ++ rtx difference = gen_rtx_MINUS (SImode, op1, op3); ++ compare = gen_reg_rtx (SImode); ++ emit_insn (gen_rtx_SET (compare, difference)); ++ } ++ ++ if (word_mode != <MODE>mode) ++ { ++ rtx reg = gen_reg_rtx (word_mode); ++ emit_insn (gen_rtx_SET (reg, gen_rtx_SIGN_EXTEND (word_mode, compare))); ++ compare = reg; ++ } ++ ++ emit_insn (gen_rtx_SET (operands0, ++ gen_rtx_EQ (SImode, compare, const0_rtx))); + DONE; + }) + +@@ -505,13 +581,31 @@ + (match_operand:SHORT 2 "register_operand")) + "" + { +- union loongarch_gen_fn_ptrs generator; +- generator.fn_7 = gen_atomic_cas_value_exchange_7_si; +- loongarch_expand_atomic_qihi (generator, operands0, operands1, +- const0_rtx, operands2, operands3); ++ if (ISA_BASE_IS_LA64V110) ++ emit_insn (gen_atomic_exchange<mode>_short (operands0, operands1, operands2, operands3)); ++ else ++ { ++ union loongarch_gen_fn_ptrs generator; ++ generator.fn_7 = gen_atomic_cas_value_exchange_7_si; ++ loongarch_expand_atomic_qihi (generator, operands0, operands1, ++ const0_rtx, operands2, operands3); ++ } + DONE; + }) + ++(define_insn "atomic_fetch_add<mode>_short" ++ (set (match_operand:SHORT 0 "register_operand" "=&r") ++ (match_operand:SHORT 1 "memory_operand" "+ZB")) ++ (set (match_dup 1) ++ (unspec_volatile:SHORT ++ (plus:SHORT (match_dup 1) ++ (match_operand:SHORT 2 "reg_or_0_operand" "rJ")) ++ (match_operand:SI 3 "const_int_operand") ;; model ++ UNSPEC_SYNC_OLD_OP)) ++ "ISA_BASE_IS_LA64V110" ++ "amadd%A3.<amo>\t%0,%z2,%1" ++ (set (attr "length") (const_int 4))) ++ + (define_expand "atomic_fetch_add<mode>" + (set (match_operand:SHORT 0 "register_operand" "=&r") + (match_operand:SHORT 1 "memory_operand" "+ZB")) +@@ -523,10 +617,16 @@ + UNSPEC_SYNC_OLD_OP)) + "" + { +- union loongarch_gen_fn_ptrs generator; +- generator.fn_7 = gen_atomic_cas_value_add_7_si; +- loongarch_expand_atomic_qihi (generator, operands0, operands1, +- operands1, operands2, operands3); ++ if (ISA_BASE_IS_LA64V110) ++ emit_insn (gen_atomic_fetch_add<mode>_short (operands0, operands1, ++ operands2, operands3)); ++ else ++ { ++ union loongarch_gen_fn_ptrs generator; ++ generator.fn_7 = gen_atomic_cas_value_add_7_si; ++ loongarch_expand_atomic_qihi (generator, operands0, operands1, ++ operands1, operands2, operands3); ++ } + DONE; + }) + +-- +2.43.0 +
View file
_service:tar_scm:0034-LoongArch-atomic_load-and-atomic_store-are-implement.patch
Added
@@ -0,0 +1,140 @@ +From 61a70e6b6b44bf420eae559d998e109b70e5a9b6 Mon Sep 17 00:00:00 2001 +From: Lulu Cheng <chenglulu@loongson.cn> +Date: Fri, 17 Nov 2023 16:04:45 +0800 +Subject: PATCH 034/188 LoongArch: atomic_load and atomic_store are + implemented using dbar grading. + +Because the la464 memory model design allows the same address load out of order, +so in the following test example, the Load of 23 lines may be executed first over +the load of 21 lines, resulting in an error. +So when memmodel is MEMMODEL_RELAXED, the load instruction will be followed by +"dbar 0x700" when implementing _atomic_load. + + 1 void * + 2 gomp_ptrlock_get_slow (gomp_ptrlock_t *ptrlock) + 3 { + 4 int *intptr; + 5 uintptr_t oldval = 1; + 6 + 7 __atomic_compare_exchange_n (ptrlock, &oldval, 2, false, + 8 MEMMODEL_RELAXED, MEMMODEL_RELAXED); + 9 + 10 /* futex works on ints, not pointers. + 11 But a valid work share pointer will be at least + 12 8 byte aligned, so it is safe to assume the low + 13 32-bits of the pointer won't contain values 1 or 2. */ + 14 __asm volatile ("" : "=r" (intptr) : "0" (ptrlock)); + 15 #if __BYTE_ORDER == __BIG_ENDIAN + 16 if (sizeof (*ptrlock) > sizeof (int)) + 17 intptr += (sizeof (*ptrlock) / sizeof (int)) - 1; + 18 #endif + 19 do + 20 do_wait (intptr, 2); + 21 while (__atomic_load_n (intptr, MEMMODEL_RELAXED) == 2); + 22 __asm volatile ("" : : : "memory"); + 23 return (void *) __atomic_load_n (ptrlock, MEMMODEL_ACQUIRE); + 24 } + +gcc/ChangeLog: + + * config/loongarch/sync.md (atomic_load<mode>): New template. +--- + gcc/config/loongarch/sync.md | 70 +++++++++++++++++++++++++++++++++--- + 1 file changed, 65 insertions(+), 5 deletions(-) + +diff --git a/gcc/config/loongarch/sync.md b/gcc/config/loongarch/sync.md +index 1eabaec04..f4673c856 100644 +--- a/gcc/config/loongarch/sync.md ++++ b/gcc/config/loongarch/sync.md +@@ -30,6 +30,7 @@ + UNSPEC_SYNC_OLD_OP + UNSPEC_SYNC_EXCHANGE + UNSPEC_ATOMIC_STORE ++ UNSPEC_ATOMIC_LOAD + UNSPEC_MEMORY_BARRIER + ) + +@@ -103,16 +104,75 @@ + + ;; Atomic memory operations. + ++(define_insn "atomic_load<mode>" ++ (set (match_operand:QHWD 0 "register_operand" "=r") ++ (unspec_volatile:QHWD ++ (match_operand:QHWD 1 "memory_operand" "+m") ++ (match_operand:SI 2 "const_int_operand") ;; model ++ UNSPEC_ATOMIC_LOAD)) ++ "" ++{ ++ enum memmodel model = memmodel_base (INTVAL (operands2)); ++ ++ switch (model) ++ { ++ case MEMMODEL_SEQ_CST: ++ return "dbar\t0x11\\n\\t" ++ "ld.<size>\t%0,%1\\n\\t" ++ "dbar\t0x14\\n\\t"; ++ case MEMMODEL_ACQUIRE: ++ return "ld.<size>\t%0,%1\\n\\t" ++ "dbar\t0x14\\n\\t"; ++ case MEMMODEL_RELAXED: ++ return "ld.<size>\t%0,%1\\n\\t" ++ "dbar\t0x700\\n\\t"; ++ ++ default: ++ /* The valid memory order variants are __ATOMIC_RELAXED, __ATOMIC_SEQ_CST, ++ __ATOMIC_CONSUME and __ATOMIC_ACQUIRE. ++ The expand_builtin_atomic_store function converts all invalid memmodels ++ to MEMMODEL_SEQ_CST. ++ ++ __atomic builtins doc: "Consume is implemented using the ++ stronger acquire memory order because of a deficiency in C++11's ++ semantics." See PR 59448 and get_memmodel in builtins.cc. */ ++ gcc_unreachable (); ++ } ++} ++ (set (attr "length") (const_int 12))) ++ + ;; Implement atomic stores with amoswap. Fall back to fences for atomic loads. + (define_insn "atomic_store<mode>" +- (set (match_operand:GPR 0 "memory_operand" "+ZB") +- (unspec_volatile:GPR +- (match_operand:GPR 1 "reg_or_0_operand" "rJ") ++ (set (match_operand:QHWD 0 "memory_operand" "+m") ++ (unspec_volatile:QHWD ++ (match_operand:QHWD 1 "reg_or_0_operand" "rJ") + (match_operand:SI 2 "const_int_operand") ;; model + UNSPEC_ATOMIC_STORE)) + "" +- "amswap%A2.<amo>\t$zero,%z1,%0" +- (set (attr "length") (const_int 8))) ++{ ++ enum memmodel model = memmodel_base (INTVAL (operands2)); ++ ++ switch (model) ++ { ++ case MEMMODEL_SEQ_CST: ++ return "dbar\t0x12\\n\\t" ++ "st.<size>\t%z1,%0\\n\\t" ++ "dbar\t0x18\\n\\t"; ++ case MEMMODEL_RELEASE: ++ return "dbar\t0x12\\n\\t" ++ "st.<size>\t%z1,%0\\n\\t"; ++ case MEMMODEL_RELAXED: ++ return "st.<size>\t%z1,%0"; ++ ++ default: ++ /* The valid memory order variants are __ATOMIC_RELAXED, __ATOMIC_SEQ_CST, ++ and __ATOMIC_RELEASE. ++ The expand_builtin_atomic_store function converts all invalid memmodels ++ to MEMMODEL_SEQ_CST. */ ++ gcc_unreachable (); ++ } ++} ++ (set (attr "length") (const_int 12))) + + (define_insn "atomic_<atomic_optab><mode>" + (set (match_operand:GPR 0 "memory_operand" "+ZB") +-- +2.43.0 +
View file
_service:tar_scm:0035-LoongArch-genopts-Add-infrastructure-to-generate-cod.patch
Added
@@ -0,0 +1,615 @@ +From 535fb5a2d4347801439fbb51fa07cd0317183cee Mon Sep 17 00:00:00 2001 +From: Xi Ruoyao <xry111@xry111.site> +Date: Fri, 25 Oct 2024 02:08:03 +0000 +Subject: PATCH 035/188 LoongArch: genopts: Add infrastructure to generate + code for new features in ISA evolution + +LoongArch v1.10 introduced the concept of ISA evolution. During ISA +evolution, many independent features can be added and enumerated via +CPUCFG. + +Add a data file into genopts storing the CPUCFG word, bit, the name +of the command line option controlling if this feature should be used +for compilation, and the text description. Make genstr.sh process these +info and add the command line options into loongarch.opt and +loongarch-str.h, and generate a new file loongarch-cpucfg-map.h for +mapping CPUCFG output to the corresponding option. When handling +-march=native, use the information in loongarch-cpucfg-map.h to generate +the corresponding option mask. Enable the features implied by -march +setting unless the user has explicitly disabled the feature. + +The added options (-mdiv32 and -mld-seq-sa) are not really handled yet. +They'll be used in the following patches. + +gcc/ChangeLog: + + * config/loongarch/genopts/isa-evolution.in: New data file. + * config/loongarch/genopts/genstr.sh: Translate info in + isa-evolution.in when generating loongarch-str.h, loongarch.opt, + and loongarch-cpucfg-map.h. + * config/loongarch/genopts/loongarch.opt.in (isa_evolution): + New variable. + * config/loongarch/t-loongarch: (loongarch-cpucfg-map.h): New + rule. + (loongarch-str.h): Depend on isa-evolution.in. + (loongarch.opt): Depend on isa-evolution.in. + (loongarch-cpu.o): Depend on loongarch-cpucfg-map.h. + * config/loongarch/loongarch-str.h: Regenerate. + * config/loongarch/loongarch-def.h (loongarch_isa): Add field + for evolution features. Add helper function to enable features + in this field. + Probe native CPU capability and save the corresponding options + into preset. + * config/loongarch/loongarch-cpu.cc (fill_native_cpu_config): + Probe native CPU capability and save the corresponding options + into preset. + (cache_cpucfg): Simplify with C++11-style for loop. + (cpucfg_useful_idx, N_CPUCFG_WORDS): Move to ... + * config/loongarch/loongarch.cc + (loongarch_option_override_internal): Enable the ISA evolution + feature options implied by -march and not explicitly disabled. + (loongarch_asm_code_end): New function, print ISA information as + comments in the assembly if -fverbose-asm. It makes easier to + debug things like -march=native. + (TARGET_ASM_CODE_END): Define. + * config/loongarch/loongarch.opt: Regenerate. + * config/loongarch/loongarch-cpucfg-map.h: Generate. + (cpucfg_useful_idx, N_CPUCFG_WORDS) ... here. +--- + gcc/config/loongarch/genopts/genstr.sh | 92 ++++++++++++++++++- + gcc/config/loongarch/genopts/isa-evolution.in | 2 + + gcc/config/loongarch/genopts/loongarch.opt.in | 7 ++ + gcc/config/loongarch/loongarch-cpu.cc | 46 +++++----- + gcc/config/loongarch/loongarch-cpucfg-map.h | 48 ++++++++++ + gcc/config/loongarch/loongarch-def.h | 7 ++ + gcc/config/loongarch/loongarch-str.h | 6 +- + gcc/config/loongarch/loongarch.cc | 31 +++++++ + gcc/config/loongarch/loongarch.opt | 20 +++- + gcc/config/loongarch/t-loongarch | 21 ++++- + 10 files changed, 244 insertions(+), 36 deletions(-) + create mode 100644 gcc/config/loongarch/genopts/isa-evolution.in + create mode 100644 gcc/config/loongarch/loongarch-cpucfg-map.h + +diff --git a/gcc/config/loongarch/genopts/genstr.sh b/gcc/config/loongarch/genopts/genstr.sh +index 972ef125f..bcc616e98 100755 +--- a/gcc/config/loongarch/genopts/genstr.sh ++++ b/gcc/config/loongarch/genopts/genstr.sh +@@ -25,8 +25,8 @@ cd "$(dirname "$0")" + # Generate a header containing definitions from the string table. + gen_defines() { + cat <<EOF +-/* Generated automatically by "genstr" from "loongarch-strings". +- Please do not edit this file directly. ++/* Generated automatically by "genstr" from "loongarch-strings" and ++ "isa-evolution.in". Please do not edit this file directly. + + Copyright (C) 2021-2022 Free Software Foundation, Inc. + Contributed by Loongson Ltd. +@@ -56,6 +56,15 @@ EOF + loongarch-strings + + echo ++ ++ # Generate the strings from isa-evolution.in. ++ awk '{ ++ a=$3 ++ gsub(/-/, "_", a) ++ print("#define OPTSTR_"toupper(a)"\t\""$3"\"") ++ }' isa-evolution.in ++ ++ echo + echo "#endif /* LOONGARCH_STR_H */" + } + +@@ -77,11 +86,12 @@ gen_options() { + # print a header + cat << EOF + ; Generated by "genstr" from the template "loongarch.opt.in" +-; and definitions from "loongarch-strings". ++; and definitions from "loongarch-strings" and "isa-evolution.in". + ; + ; Please do not edit this file directly. + ; It will be automatically updated during a gcc build +-; if you change "loongarch.opt.in" or "loongarch-strings". ++; if you change "loongarch.opt.in", "loongarch-strings", or ++; "isa-evolution.in". + ; + EOF + +@@ -91,13 +101,85 @@ EOF + eval "echo \"$line\"" + done + } ++ ++ # Generate the strings from isa-evolution.in. ++ awk '{ ++ print("") ++ print("m"$3) ++ gsub(/-/, "_", $3) ++ print("Target Mask(ISA_"toupper($3)") Var(isa_evolution)") ++ $1=""; $2=""; $3="" ++ sub(/^ */, "", $0) ++ print($0) ++ }' isa-evolution.in ++} ++ ++gen_cpucfg_map() { ++ cat <<EOF ++/* Generated automatically by "genstr" from "isa-evolution.in". ++ Please do not edit this file directly. ++ ++ Copyright (C) 2023 Free Software Foundation, Inc. ++ ++This file is part of GCC. ++ ++GCC is free software; you can redistribute it and/or modify ++it under the terms of the GNU General Public License as published by ++the Free Software Foundation; either version 3, or (at your option) ++any later version. ++ ++GCC is distributed in the hope that it will be useful, ++but WITHOUT ANY WARRANTY; without even the implied warranty of ++MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++GNU General Public License for more details. ++ ++You should have received a copy of the GNU General Public License ++along with GCC; see the file COPYING3. If not see ++<http://www.gnu.org/licenses/>. */ ++ ++#ifndef LOONGARCH_CPUCFG_MAP_H ++#define LOONGARCH_CPUCFG_MAP_H ++ ++#include "options.h" ++ ++static constexpr struct { ++ int cpucfg_word; ++ unsigned int cpucfg_bit; ++ HOST_WIDE_INT isa_evolution_bit; ++} cpucfg_map = { ++EOF ++ ++ # Generate the strings from isa-evolution.in. ++ awk '{ ++ gsub(/-/, "_", $3) ++ print(" { "$1", 1u << "$2", OPTION_MASK_ISA_"toupper($3)" },") ++ }' isa-evolution.in ++ ++ echo "};" ++ echo ++ echo "static constexpr int cpucfg_useful_idx = {" ++ ++ awk 'BEGIN { print(" 0,\n 1,\n 2,\n 16,\n 17,\n 18,\n 19,") } ++ {if ($1+0 > max+0) max=$1; print(" "$1",")}' \ ++ isa-evolution.in | sort -n | uniq ++ ++ echo "};" ++ echo "" ++ ++ awk 'BEGIN { max=19 } ++ { if ($1+0 > max+0) max=$1 } ++ END { print "static constexpr int N_CPUCFG_WORDS = "1+max";" }' \ ++ isa-evolution.in ++ ++ echo "#endif /* LOONGARCH_CPUCFG_MAP_H */" + } + + main() { + case "$1" in ++ cpucfg-map) gen_cpucfg_map;; + header) gen_defines;; + opt) gen_options;; +- *) echo "Unknown Command: \"$1\". Available: header, opt"; exit 1;; ++ *) echo "Unknown Command: \"$1\". Available: cpucfg-map, header, opt"; exit 1;; + esac + } + +diff --git a/gcc/config/loongarch/genopts/isa-evolution.in b/gcc/config/loongarch/genopts/isa-evolution.in +new file mode 100644 +index 000000000..e58f0d6a1 +--- /dev/null ++++ b/gcc/config/loongarch/genopts/isa-evolution.in +@@ -0,0 +1,2 @@ ++2 26 div32 Support div.wu and mod.wu instructions with inputs not sign-extended. ++3 23 ld-seq-sa Do not need load-load barriers (dbar 0x700). +diff --git a/gcc/config/loongarch/genopts/loongarch.opt.in b/gcc/config/loongarch/genopts/loongarch.opt.in +index bd3cfaf60..a49de07c9 100644 +--- a/gcc/config/loongarch/genopts/loongarch.opt.in ++++ b/gcc/config/loongarch/genopts/loongarch.opt.in +@@ -247,3 +247,10 @@ Target Undocumented Joined UInteger Var(loongarch_vect_issue_info) Init(4) Integ + Indicate how many non memory access vector instructions can be issued per + cycle, it's used in unroll factor determination for autovectorizer. The + default value is 4. ++ ++; Features added during ISA evolution. This concept is different from ISA ++; extension, read Section 1.5 of LoongArch v1.10 Volume 1 for the ++; explanation. These features may be implemented and enumerated with ++; CPUCFG independantly, so we use bit flags to specify them. ++Variable ++HOST_WIDE_INT isa_evolution = 0 +diff --git a/gcc/config/loongarch/loongarch-cpu.cc b/gcc/config/loongarch/loongarch-cpu.cc +index cbe52d7ed..e1cd85d02 100644 +--- a/gcc/config/loongarch/loongarch-cpu.cc ++++ b/gcc/config/loongarch/loongarch-cpu.cc +@@ -29,12 +29,11 @@ along with GCC; see the file COPYING3. If not see + #include "loongarch-def.h" + #include "loongarch-opts.h" + #include "loongarch-cpu.h" ++#include "loongarch-cpucfg-map.h" + #include "loongarch-str.h" + + /* Native CPU detection with "cpucfg" */ +-#define N_CPUCFG_WORDS 0x15 + static uint32_t cpucfg_cacheN_CPUCFG_WORDS = { 0 }; +-static const int cpucfg_useful_idx = {0, 1, 2, 16, 17, 18, 19}; + + static uint32_t + read_cpucfg_word (int wordno) +@@ -56,11 +55,8 @@ read_cpucfg_word (int wordno) + void + cache_cpucfg (void) + { +- for (unsigned int i = 0; i < sizeof (cpucfg_useful_idx) / sizeof (int); i++) +- { +- cpucfg_cachecpucfg_useful_idxi +- = read_cpucfg_word (cpucfg_useful_idxi); +- } ++ for (int idx: cpucfg_useful_idx) ++ cpucfg_cacheidx = read_cpucfg_word (idx); + } + + uint32_t +@@ -125,11 +121,12 @@ fill_native_cpu_config (struct loongarch_target *tgt) + int tmp; + tgt->cpu_arch = native_cpu_type; + ++ auto &preset = loongarch_cpu_default_isatgt->cpu_arch; ++ + /* Fill: loongarch_cpu_default_isatgt->cpu_arch.base + With: base architecture (ARCH) + At: cpucfg_words11:0 */ + +- #define PRESET_ARCH (loongarch_cpu_default_isatgt->cpu_arch.base) + switch (cpucfg_cache1 & 0x3) + { + case 0x02: +@@ -144,19 +141,18 @@ fill_native_cpu_config (struct loongarch_target *tgt) + } + + /* Check consistency with PRID presets. */ +- if (native_cpu_type != CPU_NATIVE && tmp != PRESET_ARCH) ++ if (native_cpu_type != CPU_NATIVE && tmp != preset.base) + warning (0, "base architecture %qs differs from PRID preset %qs", + loongarch_isa_base_stringstmp, +- loongarch_isa_base_stringsPRESET_ARCH); ++ loongarch_isa_base_stringspreset.base); + + /* Use the native value anyways. */ +- PRESET_ARCH = tmp; ++ preset.base = tmp; + + /* Fill: loongarch_cpu_default_isatgt->cpu_arch.fpu + With: FPU type (FP, FP_SP, FP_DP) + At: cpucfg_words22:0 */ + +- #define PRESET_FPU (loongarch_cpu_default_isatgt->cpu_arch.fpu) + switch (cpucfg_cache2 & 0x7) + { + case 0x07: +@@ -179,20 +175,19 @@ fill_native_cpu_config (struct loongarch_target *tgt) + } + + /* Check consistency with PRID presets. */ +- if (native_cpu_type != CPU_NATIVE && tmp != PRESET_FPU) ++ if (native_cpu_type != CPU_NATIVE && tmp != preset.fpu) + warning (0, "floating-point unit %qs differs from PRID preset %qs", + loongarch_isa_ext_stringstmp, +- loongarch_isa_ext_stringsPRESET_FPU); ++ loongarch_isa_ext_stringspreset.fpu); + + /* Use the native value anyways. */ +- PRESET_FPU = tmp; ++ preset.fpu = tmp; + + + /* Fill: loongarch_cpu_default_isaCPU_NATIVE.simd + With: SIMD extension type (LSX, LASX) + At: cpucfg_words27:6 */ + +- #define PRESET_SIMD (loongarch_cpu_default_isatgt->cpu_arch.simd) + switch (cpucfg_cache2 & 0xc0) + { + case 0xc0: +@@ -219,14 +214,19 @@ fill_native_cpu_config (struct loongarch_target *tgt) + /* Check consistency with PRID presets. */ + + /* +- if (native_cpu_type != CPU_NATIVE && tmp != PRESET_SIMD) ++ if (native_cpu_type != CPU_NATIVE && tmp != preset.simd) + warning (0, "SIMD extension %qs differs from PRID preset %qs", + loongarch_isa_ext_stringstmp, +- loongarch_isa_ext_stringsPRESET_SIMD); ++ loongarch_isa_ext_stringspreset.simd); + */ + + /* Use the native value anyways. */ +- PRESET_SIMD = tmp; ++ preset.simd = tmp; ++ ++ /* Features added during ISA evolution. */ ++ for (const auto &entry: cpucfg_map) ++ if (cpucfg_cacheentry.cpucfg_word & entry.cpucfg_bit) ++ preset.evolution |= entry.isa_evolution_bit; + } + + if (tune_native_p) +@@ -237,7 +237,7 @@ fill_native_cpu_config (struct loongarch_target *tgt) + With: cache size info + At: cpucfg_words16:2031:0 */ + +- #define PRESET_CACHE (loongarch_cpu_cachetgt->cpu_tune) ++ auto &preset_cache = loongarch_cpu_cachetgt->cpu_tune; + struct loongarch_cache native_cache; + int l1d_present = 0, l1u_present = 0; + int l2d_present = 0; +@@ -268,8 +268,8 @@ fill_native_cpu_config (struct loongarch_target *tgt) + >> 10; /* in kibibytes */ + + /* Use the native value anyways. */ +- PRESET_CACHE.l1d_line_size = native_cache.l1d_line_size; +- PRESET_CACHE.l1d_size = native_cache.l1d_size; +- PRESET_CACHE.l2d_size = native_cache.l2d_size; ++ preset_cache.l1d_line_size = native_cache.l1d_line_size; ++ preset_cache.l1d_size = native_cache.l1d_size; ++ preset_cache.l2d_size = native_cache.l2d_size; + } + } +diff --git a/gcc/config/loongarch/loongarch-cpucfg-map.h b/gcc/config/loongarch/loongarch-cpucfg-map.h +new file mode 100644 +index 000000000..0c078c397 +--- /dev/null ++++ b/gcc/config/loongarch/loongarch-cpucfg-map.h +@@ -0,0 +1,48 @@ ++/* Generated automatically by "genstr" from "isa-evolution.in". ++ Please do not edit this file directly. ++ ++ Copyright (C) 2023 Free Software Foundation, Inc. ++ ++This file is part of GCC. ++ ++GCC is free software; you can redistribute it and/or modify ++it under the terms of the GNU General Public License as published by ++the Free Software Foundation; either version 3, or (at your option) ++any later version. ++ ++GCC is distributed in the hope that it will be useful, ++but WITHOUT ANY WARRANTY; without even the implied warranty of ++MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++GNU General Public License for more details. ++ ++You should have received a copy of the GNU General Public License ++along with GCC; see the file COPYING3. If not see ++<http://www.gnu.org/licenses/>. */ ++ ++#ifndef LOONGARCH_CPUCFG_MAP_H ++#define LOONGARCH_CPUCFG_MAP_H ++ ++#include "options.h" ++ ++static constexpr struct { ++ int cpucfg_word; ++ unsigned int cpucfg_bit; ++ HOST_WIDE_INT isa_evolution_bit; ++} cpucfg_map = { ++ { 2, 1u << 26, OPTION_MASK_ISA_DIV32 }, ++ { 3, 1u << 23, OPTION_MASK_ISA_LD_SEQ_SA }, ++}; ++ ++static constexpr int cpucfg_useful_idx = { ++ 0, ++ 1, ++ 2, ++ 3, ++ 16, ++ 17, ++ 18, ++ 19, ++}; ++ ++static constexpr int N_CPUCFG_WORDS = 20; ++#endif /* LOONGARCH_CPUCFG_MAP_H */ +diff --git a/gcc/config/loongarch/loongarch-def.h b/gcc/config/loongarch/loongarch-def.h +index 078d8607d..cb99caebe 100644 +--- a/gcc/config/loongarch/loongarch-def.h ++++ b/gcc/config/loongarch/loongarch-def.h +@@ -46,6 +46,7 @@ along with GCC; see the file COPYING3. If not see + #ifndef LOONGARCH_DEF_H + #define LOONGARCH_DEF_H + ++#include <stdint.h> + #include "loongarch-tune.h" + + #ifdef __cplusplus +@@ -121,6 +122,12 @@ struct loongarch_isa + int base; /* ISA_BASE_ */ + int fpu; /* ISA_EXT_FPU_ */ + int simd; /* ISA_EXT_SIMD_ */ ++ ++ /* ISA evolution features implied by -march=, for -march=native probed ++ via CPUCFG. The features implied by base may be not included here. ++ ++ Using int64_t instead of HOST_WIDE_INT for C compatibility. */ ++ int64_t evolution; + }; + + struct loongarch_abi +diff --git a/gcc/config/loongarch/loongarch-str.h b/gcc/config/loongarch/loongarch-str.h +index 037e9e583..cd9dbb41b 100644 +--- a/gcc/config/loongarch/loongarch-str.h ++++ b/gcc/config/loongarch/loongarch-str.h +@@ -1,5 +1,5 @@ +-/* Generated automatically by "genstr" from "loongarch-strings". +- Please do not edit this file directly. ++/* Generated automatically by "genstr" from "loongarch-strings" and ++ "isa-evolution.in". Please do not edit this file directly. + + Copyright (C) 2021-2022 Free Software Foundation, Inc. + Contributed by Loongson Ltd. +@@ -69,4 +69,6 @@ along with GCC; see the file COPYING3. If not see + #define STR_EXPLICIT_RELOCS_NONE "none" + #define STR_EXPLICIT_RELOCS_ALWAYS "always" + ++#define OPTSTR_DIV32 "div32" ++#define OPTSTR_LD_SEQ_SA "ld-seq-sa" + #endif /* LOONGARCH_STR_H */ +diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc +index 7bb46a45d..8bd46da62 100644 +--- a/gcc/config/loongarch/loongarch.cc ++++ b/gcc/config/loongarch/loongarch.cc +@@ -7451,6 +7451,10 @@ loongarch_option_override_internal (struct gcc_options *opts, + if (loongarch_branch_cost == 0) + loongarch_branch_cost = loongarch_cost->branch_cost; + ++ /* If the user hasn't disabled a feature added during ISA evolution, ++ use the processor's default. */ ++ isa_evolution |= (la_target.isa.evolution & ++ ~global_options_set.x_isa_evolution); + + /* Enable sw prefetching at -O3 and higher. */ + if (opts->x_flag_prefetch_loop_arrays < 0 +@@ -11427,6 +11431,30 @@ loongarch_builtin_support_vector_misalignment (machine_mode mode, + is_packed); + } + ++/* If -fverbose-asm, dump some info for debugging. */ ++static void ++loongarch_asm_code_end (void) ++{ ++#define DUMP_FEATURE(PRED) \ ++ fprintf (asm_out_file, "%s %s: %s\n", ASM_COMMENT_START, #PRED, \ ++ (PRED) ? "enabled" : "disabled") ++ ++ if (flag_verbose_asm) ++ { ++ fprintf (asm_out_file, "\n%s CPU: %s\n", ASM_COMMENT_START, ++ loongarch_cpu_strings la_target.cpu_arch); ++ fprintf (asm_out_file, "%s Tune: %s\n", ASM_COMMENT_START, ++ loongarch_cpu_strings la_target.cpu_tune); ++ fprintf (asm_out_file, "%s Base ISA: %s\n", ASM_COMMENT_START, ++ loongarch_isa_base_strings la_target.isa.base); ++ DUMP_FEATURE (TARGET_DIV32); ++ DUMP_FEATURE (TARGET_LD_SEQ_SA); ++ } ++ ++ fputs ("\n\n", asm_out_file); ++#undef DUMP_FEATURE ++} ++ + /* Initialize the GCC target structure. */ + #undef TARGET_ASM_ALIGNED_HI_OP + #define TARGET_ASM_ALIGNED_HI_OP "\t.half\t" +@@ -11446,6 +11474,9 @@ loongarch_builtin_support_vector_misalignment (machine_mode mode, + #undef TARGET_ASM_FUNCTION_RODATA_SECTION + #define TARGET_ASM_FUNCTION_RODATA_SECTION loongarch_function_rodata_section + ++#undef TARGET_ASM_CODE_END ++#define TARGET_ASM_CODE_END loongarch_asm_code_end ++ + #undef TARGET_SCHED_INIT + #define TARGET_SCHED_INIT loongarch_sched_init + #undef TARGET_SCHED_REORDER +diff --git a/gcc/config/loongarch/loongarch.opt b/gcc/config/loongarch/loongarch.opt +index d936954b8..5251f705d 100644 +--- a/gcc/config/loongarch/loongarch.opt ++++ b/gcc/config/loongarch/loongarch.opt +@@ -1,9 +1,10 @@ + ; Generated by "genstr" from the template "loongarch.opt.in" +-; and definitions from "loongarch-strings". ++; and definitions from "loongarch-strings" and "isa-evolution.in". + ; + ; Please do not edit this file directly. + ; It will be automatically updated during a gcc build +-; if you change "loongarch.opt.in" or "loongarch-strings". ++; if you change "loongarch.opt.in", "loongarch-strings", or ++; "isa-evolution.in". + ; + ; Copyright (C) 2021-2022 Free Software Foundation, Inc. + ; +@@ -254,3 +255,18 @@ Target Undocumented Joined UInteger Var(loongarch_vect_issue_info) Init(4) Integ + Indicate how many non memory access vector instructions can be issued per + cycle, it's used in unroll factor determination for autovectorizer. The + default value is 4. ++ ++; Features added during ISA evolution. This concept is different from ISA ++; extension, read Section 1.5 of LoongArch v1.10 Volume 1 for the ++; explanation. These features may be implemented and enumerated with ++; CPUCFG independantly, so we use bit flags to specify them. ++Variable ++HOST_WIDE_INT isa_evolution = 0 ++ ++mdiv32 ++Target Mask(ISA_DIV32) Var(isa_evolution) ++Support div.wu and mod.wu instructions with inputs not sign-extended. ++ ++mld-seq-sa ++Target Mask(ISA_LD_SEQ_SA) Var(isa_evolution) ++Do not need load-load barriers (dbar 0x700). +diff --git a/gcc/config/loongarch/t-loongarch b/gcc/config/loongarch/t-loongarch +index 12734c37b..57b1176bc 100644 +--- a/gcc/config/loongarch/t-loongarch ++++ b/gcc/config/loongarch/t-loongarch +@@ -18,8 +18,9 @@ + + + GTM_H += loongarch-multilib.h +-OPTIONS_H_EXTRA += $(srcdir)/config/loongarch/loongarch-def.h \ +- $(srcdir)/config/loongarch/loongarch-tune.h ++OPTIONS_H_EXTRA += $(srcdir)/config/loongarch/loongarch-def.h \ ++ $(srcdir)/config/loongarch/loongarch-tune.h \ ++ $(srcdir)/config/loongarch/loongarch-cpucfg-map.h + + # Canonical target triplet from config.gcc + LA_MULTIARCH_TRIPLET = $(patsubst LA_MULTIARCH_TRIPLET=%,%,$\ +@@ -31,7 +32,8 @@ LA_STR_H = $(srcdir)/config/loongarch/loongarch-str.h + # String definition header + $(LA_STR_H): s-loongarch-str ; @true + s-loongarch-str: $(srcdir)/config/loongarch/genopts/genstr.sh \ +- $(srcdir)/config/loongarch/genopts/loongarch-strings ++ $(srcdir)/config/loongarch/genopts/loongarch-strings \ ++ $(srcdir)/config/loongarch/genopts/isa-evolution.in + $(SHELL) $(srcdir)/config/loongarch/genopts/genstr.sh header \ + $(srcdir)/config/loongarch/genopts/loongarch-strings > \ + tmp-loongarch-str.h +@@ -58,7 +60,8 @@ loongarch-driver.o : $(srcdir)/config/loongarch/loongarch-driver.cc $(LA_STR_H) + loongarch-opts.o: $(srcdir)/config/loongarch/loongarch-opts.cc $(LA_STR_H) + $(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) $< + +-loongarch-cpu.o: $(srcdir)/config/loongarch/loongarch-cpu.cc $(LA_STR_H) ++loongarch-cpu.o: $(srcdir)/config/loongarch/loongarch-cpu.cc $(LA_STR_H) \ ++ $(srcdir)/config/loongarch/loongarch-cpucfg-map.h + $(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) $< + + loongarch-def.o: $(srcdir)/config/loongarch/loongarch-def.c $(LA_STR_H) +@@ -67,6 +70,7 @@ loongarch-def.o: $(srcdir)/config/loongarch/loongarch-def.c $(LA_STR_H) + $(srcdir)/config/loongarch/loongarch.opt: s-loongarch-opt ; @true + s-loongarch-opt: $(srcdir)/config/loongarch/genopts/genstr.sh \ + $(srcdir)/config/loongarch/genopts/loongarch.opt.in \ ++ $(srcdir)/config/loongarch/genopts/isa-evolution.in \ + $(srcdir)/config/loongarch/genopts/loongarch-strings $(LA_STR_H) + $(SHELL) $(srcdir)/config/loongarch/genopts/genstr.sh opt \ + $(srcdir)/config/loongarch/genopts/loongarch.opt.in \ +@@ -74,3 +78,12 @@ s-loongarch-opt: $(srcdir)/config/loongarch/genopts/genstr.sh \ + $(SHELL) $(srcdir)/../move-if-change tmp-loongarch.opt \ + $(srcdir)/config/loongarch/loongarch.opt + $(STAMP) s-loongarch-opt ++ ++$(srcdir)/config/loongarch/loongarch-cpucfg-map.h: s-loongarch-cpucfg-map ++ @true ++s-loongarch-cpucfg-map: $(srcdir)/config/loongarch/genopts/genstr.sh \ ++ $(srcdir)/config/loongarch/genopts/isa-evolution.in ++ $(SHELL) $< cpucfg-map > tmp-cpucfg.h ++ $(SHELL) $(srcdir)/../move-if-change tmp-cpucfg.h \ ++ $(srcdir)/config/loongarch/loongarch-cpucfg-map.h ++ $(STAMP) $@ +-- +2.43.0 +
View file
_service:tar_scm:0036-LoongArch-Add-evolution-features-of-base-ISA-revisio.patch
Added
@@ -0,0 +1,148 @@ +From 24648180418affbaf044a58ae0b5f79a0cf71155 Mon Sep 17 00:00:00 2001 +From: Xi Ruoyao <xry111@xry111.site> +Date: Sat, 18 Nov 2023 03:19:07 +0800 +Subject: PATCH 036/188 LoongArch: Add evolution features of base ISA + revisions + + * config/loongarch/loongarch-def.h: + (loongarch_isa_base_features): Declare. Define it in ... + * config/loongarch/loongarch-cpu.cc + (loongarch_isa_base_features): ... here. + (fill_native_cpu_config): If we know the base ISA of the CPU + model from PRID, use it instead of la64 (v1.0). Check if all + expected features of this base ISA is available, emit a warning + if not. + * config/loongarch/loongarch-opts.cc (config_target_isa): Enable + the features implied by the base ISA if not -march=native. +--- + gcc/config/loongarch/loongarch-cpu.cc | 62 ++++++++++++++++++-------- + gcc/config/loongarch/loongarch-def.h | 5 +++ + gcc/config/loongarch/loongarch-opts.cc | 3 ++ + 3 files changed, 52 insertions(+), 18 deletions(-) + +diff --git a/gcc/config/loongarch/loongarch-cpu.cc b/gcc/config/loongarch/loongarch-cpu.cc +index e1cd85d02..76d66fa55 100644 +--- a/gcc/config/loongarch/loongarch-cpu.cc ++++ b/gcc/config/loongarch/loongarch-cpu.cc +@@ -32,6 +32,19 @@ along with GCC; see the file COPYING3. If not see + #include "loongarch-cpucfg-map.h" + #include "loongarch-str.h" + ++/* loongarch_isa_base_features defined here instead of loongarch-def.c ++ because we need to use options.h. Pay attention on the order of elements ++ in the initializer becaue ISO C++ does not allow C99 designated ++ initializers! */ ++ ++#define ISA_BASE_LA64V110_FEATURES \ ++ (OPTION_MASK_ISA_DIV32 | OPTION_MASK_ISA_LD_SEQ_SA) ++ ++int64_t loongarch_isa_base_featuresN_ISA_BASE_TYPES = { ++ /* ISA_BASE_LA64V100 = */ 0, ++ /* ISA_BASE_LA64V110 = */ ISA_BASE_LA64V110_FEATURES, ++}; ++ + /* Native CPU detection with "cpucfg" */ + static uint32_t cpucfg_cacheN_CPUCFG_WORDS = { 0 }; + +@@ -127,24 +140,22 @@ fill_native_cpu_config (struct loongarch_target *tgt) + With: base architecture (ARCH) + At: cpucfg_words11:0 */ + +- switch (cpucfg_cache1 & 0x3) +- { +- case 0x02: +- tmp = ISA_BASE_LA64V100; +- break; +- +- default: +- fatal_error (UNKNOWN_LOCATION, +- "unknown native base architecture %<0x%x%>, " +- "%qs failed", (unsigned int) (cpucfg_cache1 & 0x3), +- "-m" OPTSTR_ARCH "=" STR_CPU_NATIVE); +- } +- +- /* Check consistency with PRID presets. */ +- if (native_cpu_type != CPU_NATIVE && tmp != preset.base) +- warning (0, "base architecture %qs differs from PRID preset %qs", +- loongarch_isa_base_stringstmp, +- loongarch_isa_base_stringspreset.base); ++ if (native_cpu_type != CPU_NATIVE) ++ tmp = loongarch_cpu_default_isanative_cpu_type.base; ++ else ++ switch (cpucfg_cache1 & 0x3) ++ { ++ case 0x02: ++ tmp = ISA_BASE_LA64V100; ++ break; ++ ++ default: ++ fatal_error (UNKNOWN_LOCATION, ++ "unknown native base architecture %<0x%x%>, " ++ "%qs failed", ++ (unsigned int) (cpucfg_cache1 & 0x3), ++ "-m" OPTSTR_ARCH "=" STR_CPU_NATIVE); ++ } + + /* Use the native value anyways. */ + preset.base = tmp; +@@ -227,6 +238,21 @@ fill_native_cpu_config (struct loongarch_target *tgt) + for (const auto &entry: cpucfg_map) + if (cpucfg_cacheentry.cpucfg_word & entry.cpucfg_bit) + preset.evolution |= entry.isa_evolution_bit; ++ ++ if (native_cpu_type != CPU_NATIVE) ++ { ++ /* Check if the local CPU really supports the features of the base ++ ISA of probed native_cpu_type. If any feature is not detected, ++ either GCC or the hardware is buggy. */ ++ auto base_isa_feature = loongarch_isa_base_featurespreset.base; ++ if ((preset.evolution & base_isa_feature) != base_isa_feature) ++ warning (0, ++ "detected base architecture %qs, but some of its " ++ "features are not detected; the detected base " ++ "architecture may be unreliable, only detected " ++ "features will be enabled", ++ loongarch_isa_base_stringspreset.base); ++ } + } + + if (tune_native_p) +diff --git a/gcc/config/loongarch/loongarch-def.h b/gcc/config/loongarch/loongarch-def.h +index cb99caebe..ca0a324dd 100644 +--- a/gcc/config/loongarch/loongarch-def.h ++++ b/gcc/config/loongarch/loongarch-def.h +@@ -55,12 +55,17 @@ extern "C" { + + /* enum isa_base */ + extern const char* loongarch_isa_base_strings; ++ + /* LoongArch V1.00. */ + #define ISA_BASE_LA64V100 0 + /* LoongArch V1.10. */ + #define ISA_BASE_LA64V110 1 + #define N_ISA_BASE_TYPES 2 + ++/* Unlike other arrays, this is defined in loongarch-cpu.cc. The problem is ++ we cannot use the C++ header options.h in loongarch-def.c. */ ++extern int64_t loongarch_isa_base_features; ++ + /* enum isa_ext_* */ + extern const char* loongarch_isa_ext_strings; + #define ISA_EXT_NONE 0 +diff --git a/gcc/config/loongarch/loongarch-opts.cc b/gcc/config/loongarch/loongarch-opts.cc +index f10a9d3ff..390720479 100644 +--- a/gcc/config/loongarch/loongarch-opts.cc ++++ b/gcc/config/loongarch/loongarch-opts.cc +@@ -284,6 +284,9 @@ config_target_isa: + /* Get default ISA from "-march" or its default value. */ + t.isa = loongarch_cpu_default_isat.cpu_arch; + ++ if (t.cpu_arch != CPU_NATIVE) ++ t.isa.evolution |= loongarch_isa_base_featurest.isa.base; ++ + /* Apply incremental changes. */ + /* "-march=native" overrides the default FPU type. */ + +-- +2.43.0 +
View file
_service:tar_scm:0037-LoongArch-Take-the-advantage-of-mdiv32-if-it-s-enabl.patch
Added
@@ -0,0 +1,156 @@ +From 6b483504c4fbb2a05a17d67e8f51b72149f1bbf9 Mon Sep 17 00:00:00 2001 +From: Xi Ruoyao <xry111@xry111.site> +Date: Thu, 16 Nov 2023 09:21:47 +0800 +Subject: PATCH 037/188 LoongArch: Take the advantage of -mdiv32 if it's + enabled + +With -mdiv32, we can assume div.wu and mod.wu works on low 32 bits +of a 64-bit GPR even if it's not sign-extended. + +gcc/ChangeLog: + + * config/loongarch/loongarch.md (DIV): New mode iterator. + (<optab:ANY_DIV><mode:GPR>3): Don't expand if TARGET_DIV32. + (<optab:ANY_DIV>di3_fake): Disable if TARGET_DIV32. + (*<optab:ANY_DIV><mode:GPR>3): Allow SImode if TARGET_DIV32. + (<optab:ANY_DIV>si3_extended): New insn if TARGET_DIV32. + +gcc/testsuite/ChangeLog: + + * gcc.target/loongarch/div-div32.c: New test. + * gcc.target/loongarch/div-no-div32.c: New test. +--- + gcc/config/loongarch/loongarch.md | 31 ++++++++++++++++--- + .../gcc.target/loongarch/div-div32.c | 31 +++++++++++++++++++ + .../gcc.target/loongarch/div-no-div32.c | 11 +++++++ + 3 files changed, 68 insertions(+), 5 deletions(-) + create mode 100644 gcc/testsuite/gcc.target/loongarch/div-div32.c + create mode 100644 gcc/testsuite/gcc.target/loongarch/div-no-div32.c + +diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md +index 52e40a208..c4e7af107 100644 +--- a/gcc/config/loongarch/loongarch.md ++++ b/gcc/config/loongarch/loongarch.md +@@ -408,6 +408,10 @@ + ;; st.w. + (define_mode_iterator ST_ANY QHWD ANYF) + ++;; A mode for anything legal as a input of a div or mod instruction. ++(define_mode_iterator DIV (DI "TARGET_64BIT") ++ (SI "!TARGET_64BIT || TARGET_DIV32")) ++ + ;; In GPR templates, a string like "mul.<d>" will expand to "mul.w" in the + ;; 32-bit version and "mul.d" in the 64-bit version. + (define_mode_attr d (SI "w") (DI "d")) +@@ -914,7 +918,7 @@ + (match_operand:GPR 2 "register_operand"))) + "" + { +- if (GET_MODE (operands0) == SImode && TARGET_64BIT) ++ if (GET_MODE (operands0) == SImode && TARGET_64BIT && !TARGET_DIV32) + { + rtx reg1 = gen_reg_rtx (DImode); + rtx reg2 = gen_reg_rtx (DImode); +@@ -934,9 +938,9 @@ + }) + + (define_insn "*<optab><mode>3" +- (set (match_operand:X 0 "register_operand" "=r,&r,&r") +- (any_div:X (match_operand:X 1 "register_operand" "r,r,0") +- (match_operand:X 2 "register_operand" "r,r,r"))) ++ (set (match_operand:DIV 0 "register_operand" "=r,&r,&r") ++ (any_div:DIV (match_operand:DIV 1 "register_operand" "r,r,0") ++ (match_operand:DIV 2 "register_operand" "r,r,r"))) + "" + { + return loongarch_output_division ("<insn>.<d><u>\t%0,%1,%2", operands); +@@ -949,6 +953,23 @@ + (const_string "yes") + (const_string "no")))) + ++(define_insn "<optab>si3_extended" ++ (set (match_operand:DI 0 "register_operand" "=r,&r,&r") ++ (sign_extend ++ (any_div:SI (match_operand:SI 1 "register_operand" "r,r,0") ++ (match_operand:SI 2 "register_operand" "r,r,r")))) ++ "TARGET_64BIT && TARGET_DIV32" ++{ ++ return loongarch_output_division ("<insn>.w<u>\t%0,%1,%2", operands); ++} ++ (set_attr "type" "idiv") ++ (set_attr "mode" "SI") ++ (set (attr "enabled") ++ (if_then_else ++ (match_test "!!which_alternative == loongarch_check_zero_div_p()") ++ (const_string "yes") ++ (const_string "no")))) ++ + (define_insn "<optab>di3_fake" + (set (match_operand:DI 0 "register_operand" "=r,&r,&r") + (sign_extend:DI +@@ -957,7 +978,7 @@ + (any_div:DI (match_operand:DI 1 "register_operand" "r,r,0") + (match_operand:DI 2 "register_operand" "r,r,r")) 0) + UNSPEC_FAKE_ANY_DIV))) +- "TARGET_64BIT" ++ "TARGET_64BIT && !TARGET_DIV32" + { + return loongarch_output_division ("<insn>.w<u>\t%0,%1,%2", operands); + } +diff --git a/gcc/testsuite/gcc.target/loongarch/div-div32.c b/gcc/testsuite/gcc.target/loongarch/div-div32.c +new file mode 100644 +index 000000000..8b1f686ec +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/div-div32.c +@@ -0,0 +1,31 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -march=loongarch64 -mabi=lp64d -mdiv32" } */ ++/* { dg-final { scan-assembler "div\.w" } } */ ++/* { dg-final { scan-assembler "div\.wu" } } */ ++/* { dg-final { scan-assembler "mod\.w" } } */ ++/* { dg-final { scan-assembler "mod\.wu" } } */ ++/* { dg-final { scan-assembler-not "slli\.w.*,0" } } */ ++ ++int ++divw (long a, long b) ++{ ++ return (int)a / (int)b; ++} ++ ++unsigned int ++divwu (long a, long b) ++{ ++ return (unsigned int)a / (unsigned int)b; ++} ++ ++int ++modw (long a, long b) ++{ ++ return (int)a % (int)b; ++} ++ ++unsigned int ++modwu (long a, long b) ++{ ++ return (unsigned int)a % (unsigned int)b; ++} +diff --git a/gcc/testsuite/gcc.target/loongarch/div-no-div32.c b/gcc/testsuite/gcc.target/loongarch/div-no-div32.c +new file mode 100644 +index 000000000..f0f697ba5 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/div-no-div32.c +@@ -0,0 +1,11 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -march=loongarch64 -mabi=lp64d" } */ ++/* { dg-final { scan-assembler "div\.w" } } */ ++/* { dg-final { scan-assembler "div\.wu" } } */ ++/* { dg-final { scan-assembler "mod\.w" } } */ ++/* { dg-final { scan-assembler "mod\.wu" } } */ ++ ++/* -mno-div32 should be implied by -march=loongarch64. */ ++/* { dg-final { scan-assembler-times "slli\.w\^\n\*0" 8 } } */ ++ ++#include "div-div32.c" +-- +2.43.0 +
View file
_service:tar_scm:0038-LoongArch-Don-t-emit-dbar-0x700-if-mld-seq-sa.patch
Added
@@ -0,0 +1,61 @@ +From 42368d6ab1200c157ff473c37889b56b596040e2 Mon Sep 17 00:00:00 2001 +From: Xi Ruoyao <xry111@xry111.site> +Date: Thu, 16 Nov 2023 09:30:14 +0800 +Subject: PATCH 038/188 LoongArch: Don't emit dbar 0x700 if -mld-seq-sa + +This option (CPUCFG word 0x3 bit 23) means "the hardware guarantee that +two loads on the same address won't be reordered with each other". Thus +we can omit the "load-load" barrier dbar 0x700. + +This is only a micro-optimization because dbar 0x700 is already treated +as nop if the hardware supports LD_SEQ_SA. + +gcc/ChangeLog: + + * config/loongarch/loongarch.cc (loongarch_print_operand): Don't + print dbar 0x700 if TARGET_LD_SEQ_SA. + * config/loongarch/sync.md (atomic_load<mode>): Likewise. +--- + gcc/config/loongarch/loongarch.cc | 2 +- + gcc/config/loongarch/sync.md | 9 +++++---- + 2 files changed, 6 insertions(+), 5 deletions(-) + +diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc +index 8bd46da62..c86b787c4 100644 +--- a/gcc/config/loongarch/loongarch.cc ++++ b/gcc/config/loongarch/loongarch.cc +@@ -6057,7 +6057,7 @@ loongarch_print_operand (FILE *file, rtx op, int letter) + if (loongarch_cas_failure_memorder_needs_acquire ( + memmodel_from_int (INTVAL (op)))) + fputs ("dbar\t0b10100", file); +- else ++ else if (!TARGET_LD_SEQ_SA) + fputs ("dbar\t0x700", file); + break; + +diff --git a/gcc/config/loongarch/sync.md b/gcc/config/loongarch/sync.md +index f4673c856..65443c899 100644 +--- a/gcc/config/loongarch/sync.md ++++ b/gcc/config/loongarch/sync.md +@@ -119,13 +119,14 @@ + case MEMMODEL_SEQ_CST: + return "dbar\t0x11\\n\\t" + "ld.<size>\t%0,%1\\n\\t" +- "dbar\t0x14\\n\\t"; ++ "dbar\t0x14"; + case MEMMODEL_ACQUIRE: + return "ld.<size>\t%0,%1\\n\\t" +- "dbar\t0x14\\n\\t"; ++ "dbar\t0x14"; + case MEMMODEL_RELAXED: +- return "ld.<size>\t%0,%1\\n\\t" +- "dbar\t0x700\\n\\t"; ++ return TARGET_LD_SEQ_SA ? "ld.<size>\t%0,%1\\n\\t" ++ : "ld.<size>\t%0,%1\\n\\t" ++ "dbar\t0x700"; + + default: + /* The valid memory order variants are __ATOMIC_RELAXED, __ATOMIC_SEQ_CST, +-- +2.43.0 +
View file
_service:tar_scm:0039-LoongArch-Add-fine-grained-control-for-LAM_BH-and-LA.patch
Added
@@ -0,0 +1,208 @@ +From 416bdd180a6c0dab4736a6da26de245cb0487c0e Mon Sep 17 00:00:00 2001 +From: Xi Ruoyao <xry111@xry111.site> +Date: Fri, 25 Oct 2024 02:13:53 +0000 +Subject: PATCH 039/188 LoongArch: Add fine-grained control for LAM_BH and + LAMCAS + +gcc/ChangeLog: + + * config/loongarch/genopts/isa-evolution.in: (lam-bh, lamcas): + Add. + * config/loongarch/loongarch-str.h: Regenerate. + * config/loongarch/loongarch.opt: Regenerate. + * config/loongarch/loongarch-cpucfg-map.h: Regenerate. + * config/loongarch/loongarch-cpu.cc + (ISA_BASE_LA64V110_FEATURES): Include OPTION_MASK_ISA_LAM_BH + and OPTION_MASK_ISA_LAMCAS. + * config/loongarch/sync.md (atomic_add<mode:SHORT>): Use + TARGET_LAM_BH instead of ISA_BASE_IS_LA64V110. Remove empty + lines from assembly output. + (atomic_exchange<mode>_short): Likewise. + (atomic_exchange<mode:SHORT>): Likewise. + (atomic_fetch_add<mode>_short): Likewise. + (atomic_fetch_add<mode:SHORT>): Likewise. + (atomic_cas_value_strong<mode>_amcas): Use TARGET_LAMCAS instead + of ISA_BASE_IS_LA64V110. + (atomic_compare_and_swap<mode>): Likewise. + (atomic_compare_and_swap<mode:GPR>): Likewise. + (atomic_compare_and_swap<mode:SHORT>): Likewise. + * config/loongarch/loongarch.cc (loongarch_asm_code_end): Dump + status if -mlam-bh and -mlamcas if -fverbose-asm. +--- + gcc/config/loongarch/genopts/isa-evolution.in | 2 ++ + gcc/config/loongarch/loongarch-cpu.cc | 3 ++- + gcc/config/loongarch/loongarch-cpucfg-map.h | 2 ++ + gcc/config/loongarch/loongarch-str.h | 2 ++ + gcc/config/loongarch/loongarch.cc | 2 ++ + gcc/config/loongarch/loongarch.opt | 8 ++++++++ + gcc/config/loongarch/sync.md | 18 +++++++++--------- + 7 files changed, 27 insertions(+), 10 deletions(-) + +diff --git a/gcc/config/loongarch/genopts/isa-evolution.in b/gcc/config/loongarch/genopts/isa-evolution.in +index e58f0d6a1..a6bc3f87f 100644 +--- a/gcc/config/loongarch/genopts/isa-evolution.in ++++ b/gcc/config/loongarch/genopts/isa-evolution.in +@@ -1,2 +1,4 @@ + 2 26 div32 Support div.wu and mod.wu instructions with inputs not sign-extended. ++2 27 lam-bh Support am{swap/add}_db.{b/h} instructions. ++2 28 lamcas Support amcas_db.{b/h/w/d} instructions. + 3 23 ld-seq-sa Do not need load-load barriers (dbar 0x700). +diff --git a/gcc/config/loongarch/loongarch-cpu.cc b/gcc/config/loongarch/loongarch-cpu.cc +index 76d66fa55..bbce82c9c 100644 +--- a/gcc/config/loongarch/loongarch-cpu.cc ++++ b/gcc/config/loongarch/loongarch-cpu.cc +@@ -38,7 +38,8 @@ along with GCC; see the file COPYING3. If not see + initializers! */ + + #define ISA_BASE_LA64V110_FEATURES \ +- (OPTION_MASK_ISA_DIV32 | OPTION_MASK_ISA_LD_SEQ_SA) ++ (OPTION_MASK_ISA_DIV32 | OPTION_MASK_ISA_LD_SEQ_SA \ ++ | OPTION_MASK_ISA_LAM_BH | OPTION_MASK_ISA_LAMCAS) + + int64_t loongarch_isa_base_featuresN_ISA_BASE_TYPES = { + /* ISA_BASE_LA64V100 = */ 0, +diff --git a/gcc/config/loongarch/loongarch-cpucfg-map.h b/gcc/config/loongarch/loongarch-cpucfg-map.h +index 0c078c397..02ff16712 100644 +--- a/gcc/config/loongarch/loongarch-cpucfg-map.h ++++ b/gcc/config/loongarch/loongarch-cpucfg-map.h +@@ -30,6 +30,8 @@ static constexpr struct { + HOST_WIDE_INT isa_evolution_bit; + } cpucfg_map = { + { 2, 1u << 26, OPTION_MASK_ISA_DIV32 }, ++ { 2, 1u << 27, OPTION_MASK_ISA_LAM_BH }, ++ { 2, 1u << 28, OPTION_MASK_ISA_LAMCAS }, + { 3, 1u << 23, OPTION_MASK_ISA_LD_SEQ_SA }, + }; + +diff --git a/gcc/config/loongarch/loongarch-str.h b/gcc/config/loongarch/loongarch-str.h +index cd9dbb41b..0fee9abe5 100644 +--- a/gcc/config/loongarch/loongarch-str.h ++++ b/gcc/config/loongarch/loongarch-str.h +@@ -70,5 +70,7 @@ along with GCC; see the file COPYING3. If not see + #define STR_EXPLICIT_RELOCS_ALWAYS "always" + + #define OPTSTR_DIV32 "div32" ++#define OPTSTR_LAM_BH "lam-bh" ++#define OPTSTR_LAMCAS "lamcas" + #define OPTSTR_LD_SEQ_SA "ld-seq-sa" + #endif /* LOONGARCH_STR_H */ +diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc +index c86b787c4..33d23a731 100644 +--- a/gcc/config/loongarch/loongarch.cc ++++ b/gcc/config/loongarch/loongarch.cc +@@ -11448,6 +11448,8 @@ loongarch_asm_code_end (void) + fprintf (asm_out_file, "%s Base ISA: %s\n", ASM_COMMENT_START, + loongarch_isa_base_strings la_target.isa.base); + DUMP_FEATURE (TARGET_DIV32); ++ DUMP_FEATURE (TARGET_LAM_BH); ++ DUMP_FEATURE (TARGET_LAMCAS); + DUMP_FEATURE (TARGET_LD_SEQ_SA); + } + +diff --git a/gcc/config/loongarch/loongarch.opt b/gcc/config/loongarch/loongarch.opt +index 5251f705d..ea0d5bb4e 100644 +--- a/gcc/config/loongarch/loongarch.opt ++++ b/gcc/config/loongarch/loongarch.opt +@@ -267,6 +267,14 @@ mdiv32 + Target Mask(ISA_DIV32) Var(isa_evolution) + Support div.wu and mod.wu instructions with inputs not sign-extended. + ++mlam-bh ++Target Mask(ISA_LAM_BH) Var(isa_evolution) ++Support am{swap/add}_db.{b/h} instructions. ++ ++mlamcas ++Target Mask(ISA_LAMCAS) Var(isa_evolution) ++Support amcas_db.{b/h/w/d} instructions. ++ + mld-seq-sa + Target Mask(ISA_LD_SEQ_SA) Var(isa_evolution) + Do not need load-load barriers (dbar 0x700). +diff --git a/gcc/config/loongarch/sync.md b/gcc/config/loongarch/sync.md +index 65443c899..a678e7131 100644 +--- a/gcc/config/loongarch/sync.md ++++ b/gcc/config/loongarch/sync.md +@@ -124,7 +124,7 @@ + return "ld.<size>\t%0,%1\\n\\t" + "dbar\t0x14"; + case MEMMODEL_RELAXED: +- return TARGET_LD_SEQ_SA ? "ld.<size>\t%0,%1\\n\\t" ++ return TARGET_LD_SEQ_SA ? "ld.<size>\t%0,%1" + : "ld.<size>\t%0,%1\\n\\t" + "dbar\t0x700"; + +@@ -193,7 +193,7 @@ + (match_operand:SHORT 1 "reg_or_0_operand" "rJ")) + (match_operand:SI 2 "const_int_operand") ;; model + UNSPEC_SYNC_OLD_OP)) +- "ISA_BASE_IS_LA64V110" ++ "TARGET_LAM_BH" + "amadd%A2.<amo>\t$zero,%z1,%0" + (set (attr "length") (const_int 4))) + +@@ -230,7 +230,7 @@ + UNSPEC_SYNC_EXCHANGE)) + (set (match_dup 1) + (match_operand:SHORT 2 "register_operand" "r")) +- "ISA_BASE_IS_LA64V110" ++ "TARGET_LAM_BH" + "amswap%A3.<amo>\t%0,%z2,%1" + (set (attr "length") (const_int 4))) + +@@ -266,7 +266,7 @@ + (match_operand:QHWD 3 "reg_or_0_operand" "rJ") + (match_operand:SI 4 "const_int_operand") ;; mod_s + UNSPEC_COMPARE_AND_SWAP)) +- "ISA_BASE_IS_LA64V110" ++ "TARGET_LAMCAS" + "ori\t%0,%z2,0\n\tamcas%A4.<amo>\t%0,%z3,%1" + (set (attr "length") (const_int 8))) + +@@ -296,7 +296,7 @@ + + operands6 = mod_s; + +- if (ISA_BASE_IS_LA64V110) ++ if (TARGET_LAMCAS) + emit_insn (gen_atomic_cas_value_strong<mode>_amcas (operands1, operands2, + operands3, operands4, + operands6)); +@@ -422,7 +422,7 @@ + + operands6 = mod_s; + +- if (ISA_BASE_IS_LA64V110) ++ if (TARGET_LAMCAS) + emit_insn (gen_atomic_cas_value_strong<mode>_amcas (operands1, operands2, + operands3, operands4, + operands6)); +@@ -642,7 +642,7 @@ + (match_operand:SHORT 2 "register_operand")) + "" + { +- if (ISA_BASE_IS_LA64V110) ++ if (TARGET_LAM_BH) + emit_insn (gen_atomic_exchange<mode>_short (operands0, operands1, operands2, operands3)); + else + { +@@ -663,7 +663,7 @@ + (match_operand:SHORT 2 "reg_or_0_operand" "rJ")) + (match_operand:SI 3 "const_int_operand") ;; model + UNSPEC_SYNC_OLD_OP)) +- "ISA_BASE_IS_LA64V110" ++ "TARGET_LAM_BH" + "amadd%A3.<amo>\t%0,%z2,%1" + (set (attr "length") (const_int 4))) + +@@ -678,7 +678,7 @@ + UNSPEC_SYNC_OLD_OP)) + "" + { +- if (ISA_BASE_IS_LA64V110) ++ if (TARGET_LAM_BH) + emit_insn (gen_atomic_fetch_add<mode>_short (operands0, operands1, + operands2, operands3)); + else +-- +2.43.0 +
View file
_service:tar_scm:0040-LoongArch-Fix-mexplict-relocs-none-mcmodel-medium-pr.patch
Added
@@ -0,0 +1,50 @@ +From 8ca46859ad70fb9473f6dbb1d3069e68ed43ef36 Mon Sep 17 00:00:00 2001 +From: Xi Ruoyao <xry111@xry111.site> +Date: Sun, 19 Nov 2023 01:41:12 +0800 +Subject: PATCH 040/188 LoongArch: Fix "-mexplict-relocs=none + -mcmodel=medium" producing %call36 when the assembler does not support it + +Even if !HAVE_AS_SUPPORT_CALL36, const_call_insn_operand should still +return false when -mexplict-relocs=none -mcmodel=medium to make +loongarch_legitimize_call_address emit la.local or la.global. + +gcc/ChangeLog: + + * config/loongarch/predicates.md (const_call_insn_operand): + Remove buggy "HAVE_AS_SUPPORT_CALL36" conditions. Change "1" to + "true" to make the coding style consistent. +--- + gcc/config/loongarch/predicates.md | 6 ++---- + 1 file changed, 2 insertions(+), 4 deletions(-) + +diff --git a/gcc/config/loongarch/predicates.md b/gcc/config/loongarch/predicates.md +index 2aae87db4..30a0dee9f 100644 +--- a/gcc/config/loongarch/predicates.md ++++ b/gcc/config/loongarch/predicates.md +@@ -444,21 +444,19 @@ + case SYMBOL_PCREL: + if (TARGET_CMODEL_EXTREME + || (TARGET_CMODEL_MEDIUM +- && HAVE_AS_SUPPORT_CALL36 + && (la_opt_explicit_relocs == EXPLICIT_RELOCS_NONE))) + return false; + else +- return 1; ++ return true; + + case SYMBOL_GOT_DISP: + if (TARGET_CMODEL_EXTREME + || !flag_plt + || (flag_plt && TARGET_CMODEL_MEDIUM +- && HAVE_AS_SUPPORT_CALL36 + && (la_opt_explicit_relocs == EXPLICIT_RELOCS_NONE))) + return false; + else +- return 1; ++ return true; + + default: + return false; +-- +2.43.0 +
View file
_service:tar_scm:0041-LoongArch-Modify-MUSL_DYNAMIC_LINKER.patch
Added
@@ -0,0 +1,43 @@ +From 4c24f920e52c0dddf4bbbc391d2e5d2524754b4a Mon Sep 17 00:00:00 2001 +From: Lulu Cheng <chenglulu@loongson.cn> +Date: Sat, 18 Nov 2023 11:04:42 +0800 +Subject: PATCH 041/188 LoongArch: Modify MUSL_DYNAMIC_LINKER. + +Use no suffix at all in the musl dynamic linker name for hard +float ABI. Use -sf and -sp suffixes in musl dynamic linker name +for soft float and single precision ABIs. The following table +outlines the musl interpreter names for the LoongArch64 ABI names. + +musl interpreter | LoongArch64 ABI +--------------------------- | ----------------- +ld-musl-loongarch64.so.1 | loongarch64-lp64d +ld-musl-loongarch64-sp.so.1 | loongarch64-lp64f +ld-musl-loongarch64-sf.so.1 | loongarch64-lp64s + +gcc/ChangeLog: + + * config/loongarch/gnu-user.h (MUSL_ABI_SPEC): Modify suffix. +--- + gcc/config/loongarch/gnu-user.h | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +diff --git a/gcc/config/loongarch/gnu-user.h b/gcc/config/loongarch/gnu-user.h +index 60ef75601..9fc49dc8f 100644 +--- a/gcc/config/loongarch/gnu-user.h ++++ b/gcc/config/loongarch/gnu-user.h +@@ -34,9 +34,9 @@ along with GCC; see the file COPYING3. If not see + "/lib" ABI_GRLEN_SPEC "/ld-linux-loongarch-" ABI_SPEC ".so.1" + + #define MUSL_ABI_SPEC \ +- "%{mabi=lp64d:-lp64d}" \ +- "%{mabi=lp64f:-lp64f}" \ +- "%{mabi=lp64s:-lp64s}" ++ "%{mabi=lp64d:}" \ ++ "%{mabi=lp64f:-sp}" \ ++ "%{mabi=lp64s:-sf}" + + #undef MUSL_DYNAMIC_LINKER + #define MUSL_DYNAMIC_LINKER \ +-- +2.43.0 +
View file
_service:tar_scm:0042-LoongArch-Fix-libgcc-build-failure-when-libc-is-not-.patch
Added
@@ -0,0 +1,85 @@ +From 0f65e5ebe60d9ad5141115661ed71c321156cd95 Mon Sep 17 00:00:00 2001 +From: Xi Ruoyao <xry111@xry111.site> +Date: Tue, 21 Nov 2023 09:09:25 +0800 +Subject: PATCH 042/188 LoongArch: Fix libgcc build failure when libc is not + available + +To use int64_t we included <stdint.h> in loongarch-def.h. +Unfortunately, loongarch-def.h is also used by libgcc etc., causing a +build failure when building a "stage1" cross compiler at which the +target libc is not built yet. + +As int64_t is used for a C-compatible replacement of HOST_WIDE_INT, it's +not directly or indirectly referred by the target libraries. So +guard everything requiring stdint.h with #if then they'll not block +target libraries. + +gcc/ChangeLog: + + * config/loongarch/loongarch-def.h (stdint.h): Guard with #if to + exclude it for target libraries. + (loongarch_isa_base_features): Likewise. + (loongarch_isa): Likewise. + (loongarch_abi): Likewise. + (loongarch_target): Likewise. + (loongarch_cpu_default_isa): Likewise. +--- + gcc/config/loongarch/loongarch-def.h | 10 +++++++++- + 1 file changed, 9 insertions(+), 1 deletion(-) + +diff --git a/gcc/config/loongarch/loongarch-def.h b/gcc/config/loongarch/loongarch-def.h +index ca0a324dd..ef848f606 100644 +--- a/gcc/config/loongarch/loongarch-def.h ++++ b/gcc/config/loongarch/loongarch-def.h +@@ -46,7 +46,10 @@ along with GCC; see the file COPYING3. If not see + #ifndef LOONGARCH_DEF_H + #define LOONGARCH_DEF_H + ++#if !defined(IN_LIBGCC2) && !defined(IN_TARGET_LIBS) && !defined(IN_RTS) + #include <stdint.h> ++#endif ++ + #include "loongarch-tune.h" + + #ifdef __cplusplus +@@ -62,9 +65,11 @@ extern const char* loongarch_isa_base_strings; + #define ISA_BASE_LA64V110 1 + #define N_ISA_BASE_TYPES 2 + ++#if !defined(IN_LIBGCC2) && !defined(IN_TARGET_LIBS) && !defined(IN_RTS) + /* Unlike other arrays, this is defined in loongarch-cpu.cc. The problem is + we cannot use the C++ header options.h in loongarch-def.c. */ + extern int64_t loongarch_isa_base_features; ++#endif + + /* enum isa_ext_* */ + extern const char* loongarch_isa_ext_strings; +@@ -121,6 +126,7 @@ extern const char* loongarch_cmodel_strings; + #define M_OPT_ABSENT(opt_enum) ((opt_enum) == M_OPT_UNSET) + + ++#if !defined(IN_LIBGCC2) && !defined(IN_TARGET_LIBS) && !defined(IN_RTS) + /* Internal representation of the target. */ + struct loongarch_isa + { +@@ -150,6 +156,9 @@ struct loongarch_target + int cmodel; /* CMODEL_ */ + }; + ++extern struct loongarch_isa loongarch_cpu_default_isa; ++#endif ++ + /* CPU properties. */ + /* index */ + #define CPU_NATIVE 0 +@@ -162,7 +171,6 @@ struct loongarch_target + + /* parallel tables. */ + extern const char* loongarch_cpu_strings; +-extern struct loongarch_isa loongarch_cpu_default_isa; + extern int loongarch_cpu_issue_rate; + extern int loongarch_cpu_multipass_dfa_lookahead; + +-- +2.43.0 +
View file
_service:tar_scm:0043-LoongArch-Optimize-LSX-vector-shuffle-on-floating-po.patch
Added
@@ -0,0 +1,148 @@ +From cdea7c114fa48012705d65134276619b5679fa35 Mon Sep 17 00:00:00 2001 +From: Xi Ruoyao <xry111@xry111.site> +Date: Sun, 19 Nov 2023 06:12:22 +0800 +Subject: PATCH 043/188 LoongArch: Optimize LSX vector shuffle on + floating-point vector +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +The vec_perm expander was wrongly defined. GCC internal says: + +Operand 3 is the “selector”. It is an integral mode vector of the same +width and number of elements as mode M. + +But we made operand 3 in the same mode as the shuffled vectors, so it +would be a FP mode vector if the shuffled vectors are FP mode. + +With this mistake, the generic code manages to work around and it ends +up creating some very nasty code for a simple __builtin_shuffle (a, b, +c) where a and b are V4SF, c is V4SI: + + la.local $r12,.LANCHOR0 + la.local $r13,.LANCHOR1 + vld $vr1,$r12,48 + vslli.w $vr1,$vr1,2 + vld $vr2,$r12,16 + vld $vr0,$r13,0 + vld $vr3,$r13,16 + vshuf.b $vr0,$vr1,$vr1,$vr0 + vld $vr1,$r12,32 + vadd.b $vr0,$vr0,$vr3 + vandi.b $vr0,$vr0,31 + vshuf.b $vr0,$vr1,$vr2,$vr0 + vst $vr0,$r12,0 + jr $r1 + +This is obviously stupid. Fix the expander definition and adjust +loongarch_expand_vec_perm to handle it correctly. + +gcc/ChangeLog: + + * config/loongarch/lsx.md (vec_perm<mode:LSX>): Make the + selector VIMODE. + * config/loongarch/loongarch.cc (loongarch_expand_vec_perm): + Use the mode of the selector (instead of the shuffled vector) + for truncating it. Operate on subregs in the selector mode if + the shuffled vector has a different mode (i. e. it's a + floating-point vector). + +gcc/testsuite/ChangeLog: + + * gcc.target/loongarch/vect-shuf-fp.c: New test. +--- + gcc/config/loongarch/loongarch.cc | 18 ++++++++++-------- + gcc/config/loongarch/lsx.md | 2 +- + .../gcc.target/loongarch/vect-shuf-fp.c | 16 ++++++++++++++++ + 3 files changed, 27 insertions(+), 9 deletions(-) + create mode 100644 gcc/testsuite/gcc.target/loongarch/vect-shuf-fp.c + +diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc +index 33d23a731..d95ac68e8 100644 +--- a/gcc/config/loongarch/loongarch.cc ++++ b/gcc/config/loongarch/loongarch.cc +@@ -8603,8 +8603,9 @@ void + loongarch_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel) + { + machine_mode vmode = GET_MODE (target); ++ machine_mode vimode = GET_MODE (sel); + auto nelt = GET_MODE_NUNITS (vmode); +- auto round_reg = gen_reg_rtx (vmode); ++ auto round_reg = gen_reg_rtx (vimode); + rtx round_dataMAX_VECT_LEN; + + for (int i = 0; i < nelt; i += 1) +@@ -8612,9 +8613,16 @@ loongarch_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel) + round_datai = GEN_INT (0x1f); + } + +- rtx round_data_rtx = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, round_data)); ++ rtx round_data_rtx = gen_rtx_CONST_VECTOR (vimode, gen_rtvec_v (nelt, round_data)); + emit_move_insn (round_reg, round_data_rtx); + ++ if (vmode != vimode) ++ { ++ target = lowpart_subreg (vimode, target, vmode); ++ op0 = lowpart_subreg (vimode, op0, vmode); ++ op1 = lowpart_subreg (vimode, op1, vmode); ++ } ++ + switch (vmode) + { + case E_V16QImode: +@@ -8622,17 +8630,11 @@ loongarch_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel) + emit_insn (gen_lsx_vshuf_b (target, op1, op0, sel)); + break; + case E_V2DFmode: +- emit_insn (gen_andv2di3 (sel, sel, round_reg)); +- emit_insn (gen_lsx_vshuf_d_f (target, sel, op1, op0)); +- break; + case E_V2DImode: + emit_insn (gen_andv2di3 (sel, sel, round_reg)); + emit_insn (gen_lsx_vshuf_d (target, sel, op1, op0)); + break; + case E_V4SFmode: +- emit_insn (gen_andv4si3 (sel, sel, round_reg)); +- emit_insn (gen_lsx_vshuf_w_f (target, sel, op1, op0)); +- break; + case E_V4SImode: + emit_insn (gen_andv4si3 (sel, sel, round_reg)); + emit_insn (gen_lsx_vshuf_w (target, sel, op1, op0)); +diff --git a/gcc/config/loongarch/lsx.md b/gcc/config/loongarch/lsx.md +index 8ea41c85b..5e8d8d74b 100644 +--- a/gcc/config/loongarch/lsx.md ++++ b/gcc/config/loongarch/lsx.md +@@ -837,7 +837,7 @@ + (match_operand:LSX 0 "register_operand") + (match_operand:LSX 1 "register_operand") + (match_operand:LSX 2 "register_operand") +- (match_operand:LSX 3 "register_operand") ++ (match_operand:<VIMODE> 3 "register_operand") + "ISA_HAS_LSX" + { + loongarch_expand_vec_perm (operands0, operands1, +diff --git a/gcc/testsuite/gcc.target/loongarch/vect-shuf-fp.c b/gcc/testsuite/gcc.target/loongarch/vect-shuf-fp.c +new file mode 100644 +index 000000000..7acc2113a +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/vect-shuf-fp.c +@@ -0,0 +1,16 @@ ++/* { dg-do compile } */ ++/* { dg-options "-mlasx -O3" } */ ++/* { dg-final { scan-assembler "vshuf\.w" } } */ ++ ++#define V __attribute__ ((vector_size (16))) ++ ++int a V; ++float b V; ++float c V; ++float d V; ++ ++void ++test (void) ++{ ++ d = __builtin_shuffle (b, c, a); ++} +-- +2.43.0 +
View file
_service:tar_scm:0044-LoongArch-Optimize-the-loading-of-immediate-numbers-.patch
Added
@@ -0,0 +1,112 @@ +From aaf58efe8414a4eaceb6721d9c242df710d1762c Mon Sep 17 00:00:00 2001 +From: Guo Jie <guojie@loongson.cn> +Date: Thu, 23 Nov 2023 11:04:17 +0800 +Subject: PATCH 044/188 LoongArch: Optimize the loading of immediate numbers + with the same high and low 32-bit values + +For the following immediate load operation in gcc/testsuite/gcc.target/loongarch/imm-load1.c: + + long long r = 0x0101010101010101; + +Before this patch: + + lu12i.w $r15,16842752>>12 + ori $r15,$r15,257 + lu32i.d $r15,0x1010100000000>>32 + lu52i.d $r15,$r15,0x100000000000000>>52 + +After this patch: + + lu12i.w $r15,16842752>>12 + ori $r15,$r15,257 + bstrins.d $r15,$r15,63,32 + +gcc/ChangeLog: + + * config/loongarch/loongarch.cc + (enum loongarch_load_imm_method): Add new method. + (loongarch_build_integer): Add relevant implementations for + new method. + (loongarch_move_integer): Ditto. + +gcc/testsuite/ChangeLog: + + * gcc.target/loongarch/imm-load1.c: Change old check. +--- + gcc/config/loongarch/loongarch.cc | 22 ++++++++++++++++++- + .../gcc.target/loongarch/imm-load1.c | 3 ++- + 2 files changed, 23 insertions(+), 2 deletions(-) + +diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc +index d95ac68e8..048d3802b 100644 +--- a/gcc/config/loongarch/loongarch.cc ++++ b/gcc/config/loongarch/loongarch.cc +@@ -142,12 +142,16 @@ struct loongarch_address_info + + METHOD_LU52I: + Load 52-63 bit of the immediate number. ++ ++ METHOD_MIRROR: ++ Copy 0-31 bit of the immediate number to 32-63bit. + */ + enum loongarch_load_imm_method + { + METHOD_NORMAL, + METHOD_LU32I, +- METHOD_LU52I ++ METHOD_LU52I, ++ METHOD_MIRROR + }; + + struct loongarch_integer_op +@@ -1553,11 +1557,23 @@ loongarch_build_integer (struct loongarch_integer_op *codes, + + int sign31 = (value & (HOST_WIDE_INT_1U << 31)) >> 31; + int sign51 = (value & (HOST_WIDE_INT_1U << 51)) >> 51; ++ ++ uint32_t hival = (uint32_t) (value >> 32); ++ uint32_t loval = (uint32_t) value; ++ + /* Determine whether the upper 32 bits are sign-extended from the lower + 32 bits. If it is, the instructions to load the high order can be + ommitted. */ + if (lu32isign31 && lu52isign31) + return cost; ++ /* If the lower 32 bits are the same as the upper 32 bits, just copy ++ the lower 32 bits to the upper 32 bits. */ ++ else if (loval == hival) ++ { ++ codescost.method = METHOD_MIRROR; ++ codescost.curr_value = value; ++ return cost + 1; ++ } + /* Determine whether bits 32-51 are sign-extended from the lower 32 + bits. If so, directly load 52-63 bits. */ + else if (lu32isign31) +@@ -3230,6 +3246,10 @@ loongarch_move_integer (rtx temp, rtx dest, unsigned HOST_WIDE_INT value) + gen_rtx_AND (DImode, x, GEN_INT (0xfffffffffffff)), + GEN_INT (codesi.value)); + break; ++ case METHOD_MIRROR: ++ gcc_assert (mode == DImode); ++ emit_insn (gen_insvdi (x, GEN_INT (32), GEN_INT (32), x)); ++ break; + default: + gcc_unreachable (); + } +diff --git a/gcc/testsuite/gcc.target/loongarch/imm-load1.c b/gcc/testsuite/gcc.target/loongarch/imm-load1.c +index 2ff029712..f64cc2956 100644 +--- a/gcc/testsuite/gcc.target/loongarch/imm-load1.c ++++ b/gcc/testsuite/gcc.target/loongarch/imm-load1.c +@@ -1,6 +1,7 @@ + /* { dg-do compile } */ + /* { dg-options "-mabi=lp64d -O2" } */ +-/* { dg-final { scan-assembler "test:.*lu52i\.d.*\n\taddi\.w.*\n\.L2:" } } */ ++/* { dg-final { scan-assembler-not "test:.*lu52i\.d.*\n\taddi\.w.*\n\.L2:" } } */ ++/* { dg-final { scan-assembler "test:.*lu12i\.w.*\n\tbstrins\.d.*\n\.L2:" } } */ + + + extern long long b10; +-- +2.43.0 +
View file
_service:tar_scm:0045-LoongArch-Fix-runtime-error-in-a-gcc-build-with-with.patch
Added
@@ -0,0 +1,30 @@ +From fa28ce4ac91691595e14838be49c9dd42b153b7f Mon Sep 17 00:00:00 2001 +From: Guo Jie <guojie@loongson.cn> +Date: Thu, 23 Nov 2023 11:05:56 +0800 +Subject: PATCH 045/188 LoongArch: Fix runtime error in a gcc build with + --with-build-config=bootstrap-ubsan + +gcc/ChangeLog: + + * config/loongarch/loongarch.cc (loongarch_split_plus_constant): + avoid left shift of negative value -0x8000. +--- + gcc/config/loongarch/loongarch.cc | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc +index 048d3802b..ecceca22d 100644 +--- a/gcc/config/loongarch/loongarch.cc ++++ b/gcc/config/loongarch/loongarch.cc +@@ -4265,7 +4265,7 @@ loongarch_split_plus_constant (rtx *op, machine_mode mode) + else if (loongarch_addu16i_imm12_operand_p (v, mode)) + a = (v & ~HWIT_UC_0xFFF) + ((v & 0x800) << 1); + else if (mode == DImode && DUAL_ADDU16I_OPERAND (v)) +- a = (v > 0 ? 0x7fff : -0x8000) << 16; ++ a = (v > 0 ? 0x7fff0000 : ~0x7fffffff); + else + gcc_unreachable (); + +-- +2.43.0 +
View file
_service:tar_scm:0046-LoongArch-Fix-usage-of-LSX-and-LASX-frint-ftint-inst.patch
Added
@@ -0,0 +1,1295 @@ +From d37308b7a62246e16ee61c40441548feb76761f1 Mon Sep 17 00:00:00 2001 +From: Xi Ruoyao <xry111@xry111.site> +Date: Sat, 18 Nov 2023 04:48:20 +0800 +Subject: PATCH 046/188 LoongArch: Fix usage of LSX and LASX frint/ftint + instructions PR112578 + +The usage LSX and LASX frint/ftint instructions had some problems: + +1. These instructions raises FE_INEXACT, which is not allowed with + -fno-fp-int-builtin-inexact for most C2x section F.10.6 functions + (the only exceptions are rint, lrint, and llrint). +2. The "frint" instruction without explicit rounding mode is used for + roundM2, this is incorrect because roundM2 is defined "rounding + operand 1 to the *nearest* integer, rounding away from zero in the + event of a tie". We actually don't have such an instruction. Our + frintrne instruction is roundevenM2 (unfortunately, this is not + documented). +3. These define_insn's are written in a way not so easy to hack. + +So I removed these instructions and created a "simd.md" file, then added +them and the corresponding expanders there. The advantage of the +simd.md file is we don't need to duplicate the RTL template twice (in +lsx.md and lasx.md). + +gcc/ChangeLog: + + PR target/112578 + * config/loongarch/lsx.md (UNSPEC_LSX_VFTINT_S, + UNSPEC_LSX_VFTINTRNE, UNSPEC_LSX_VFTINTRP, + UNSPEC_LSX_VFTINTRM, UNSPEC_LSX_VFRINTRNE_S, + UNSPEC_LSX_VFRINTRNE_D, UNSPEC_LSX_VFRINTRZ_S, + UNSPEC_LSX_VFRINTRZ_D, UNSPEC_LSX_VFRINTRP_S, + UNSPEC_LSX_VFRINTRP_D, UNSPEC_LSX_VFRINTRM_S, + UNSPEC_LSX_VFRINTRM_D): Remove. + (ILSX, FLSX): Move into ... + (VIMODE): Move into ... + (FRINT_S, FRINT_D): Remove. + (frint_pattern_s, frint_pattern_d, frint_suffix): Remove. + (lsx_vfrint_<flsxfmt>, lsx_vftint_s_<ilsxfmt>_<flsxfmt>, + lsx_vftintrne_w_s, lsx_vftintrne_l_d, lsx_vftintrp_w_s, + lsx_vftintrp_l_d, lsx_vftintrm_w_s, lsx_vftintrm_l_d, + lsx_vfrintrne_s, lsx_vfrintrne_d, lsx_vfrintrz_s, + lsx_vfrintrz_d, lsx_vfrintrp_s, lsx_vfrintrp_d, + lsx_vfrintrm_s, lsx_vfrintrm_d, + <FRINT_S:frint_pattern_s>v4sf2, + <FRINT_D:frint_pattern_d>v2df2, round<mode>2, + fix_trunc<mode>2): Remove. + * config/loongarch/lasx.md: Likewise. + * config/loongarch/simd.md: New file. + (ILSX, ILASX, FLSX, FLASX, VIMODE): ... here. + (IVEC, FVEC): New mode iterators. + (VIMODE): ... here. Extend it to work for all LSX/LASX vector + modes. + (x, wu, simd_isa, WVEC, vimode, simdfmt, simdifmt_for_f, + elebits): New mode attributes. + (UNSPEC_SIMD_FRINTRP, UNSPEC_SIMD_FRINTRZ, UNSPEC_SIMD_FRINT, + UNSPEC_SIMD_FRINTRM, UNSPEC_SIMD_FRINTRNE): New unspecs. + (SIMD_FRINT): New int iterator. + (simd_frint_rounding, simd_frint_pattern): New int attributes. + (<simd_isa>_<x>vfrint<simd_frint_rounding>_<simdfmt>): New + define_insn template for frint instructions. + (<simd_isa>_<x>vftint<simd_frint_rounding>_<simdifmt_for_f>_<simdfmt>): + Likewise, but for ftint instructions. + (<simd_frint_pattern><mode>2): New define_expand with + flag_fp_int_builtin_inexact checked. + (l<simd_frint_pattern><mode><vimode>2): Likewise. + (ftrunc<mode>2): New define_expand. It does not require + flag_fp_int_builtin_inexact. + (fix_trunc<mode><vimode>2): New define_insn_and_split. It does + not require flag_fp_int_builtin_inexact. + (include): Add lsx.md and lasx.md. + * config/loongarch/loongarch.md (include): Include simd.md, + instead of including lsx.md and lasx.md directly. + * config/loongarch/loongarch-builtins.cc + (CODE_FOR_lsx_vftint_w_s, CODE_FOR_lsx_vftint_l_d, + CODE_FOR_lasx_xvftint_w_s, CODE_FOR_lasx_xvftint_l_d): + Remove. + +gcc/testsuite/ChangeLog: + + PR target/112578 + * gcc.target/loongarch/vect-frint.c: New test. + * gcc.target/loongarch/vect-frint-no-inexact.c: New test. + * gcc.target/loongarch/vect-ftint.c: New test. + * gcc.target/loongarch/vect-ftint-no-inexact.c: New test. +--- + gcc/config/loongarch/lasx.md | 239 ----------------- + gcc/config/loongarch/loongarch-builtins.cc | 4 - + gcc/config/loongarch/loongarch.md | 7 +- + gcc/config/loongarch/lsx.md | 243 ------------------ + gcc/config/loongarch/simd.md | 213 +++++++++++++++ + .../loongarch/vect-frint-no-inexact.c | 48 ++++ + .../gcc.target/loongarch/vect-frint.c | 85 ++++++ + .../loongarch/vect-ftint-no-inexact.c | 44 ++++ + .../gcc.target/loongarch/vect-ftint.c | 83 ++++++ + 9 files changed, 475 insertions(+), 491 deletions(-) + create mode 100644 gcc/config/loongarch/simd.md + create mode 100644 gcc/testsuite/gcc.target/loongarch/vect-frint-no-inexact.c + create mode 100644 gcc/testsuite/gcc.target/loongarch/vect-frint.c + create mode 100644 gcc/testsuite/gcc.target/loongarch/vect-ftint-no-inexact.c + create mode 100644 gcc/testsuite/gcc.target/loongarch/vect-ftint.c + +diff --git a/gcc/config/loongarch/lasx.md b/gcc/config/loongarch/lasx.md +index 2e11f0612..d4a56c307 100644 +--- a/gcc/config/loongarch/lasx.md ++++ b/gcc/config/loongarch/lasx.md +@@ -53,7 +53,6 @@ + UNSPEC_LASX_XVFCMP_SULT + UNSPEC_LASX_XVFCMP_SUN + UNSPEC_LASX_XVFCMP_SUNE +- UNSPEC_LASX_XVFTINT_S + UNSPEC_LASX_XVFTINT_U + UNSPEC_LASX_XVCLO + UNSPEC_LASX_XVSAT_S +@@ -92,12 +91,6 @@ + UNSPEC_LASX_XVEXTRINS + UNSPEC_LASX_XVMSKLTZ + UNSPEC_LASX_XVSIGNCOV +- UNSPEC_LASX_XVFTINTRNE_W_S +- UNSPEC_LASX_XVFTINTRNE_L_D +- UNSPEC_LASX_XVFTINTRP_W_S +- UNSPEC_LASX_XVFTINTRP_L_D +- UNSPEC_LASX_XVFTINTRM_W_S +- UNSPEC_LASX_XVFTINTRM_L_D + UNSPEC_LASX_XVFTINT_W_D + UNSPEC_LASX_XVFFINT_S_L + UNSPEC_LASX_XVFTINTRZ_W_D +@@ -116,14 +109,6 @@ + UNSPEC_LASX_XVFTINTRML_L_S + UNSPEC_LASX_XVFTINTRNEL_L_S + UNSPEC_LASX_XVFTINTRNEH_L_S +- UNSPEC_LASX_XVFRINTRNE_S +- UNSPEC_LASX_XVFRINTRNE_D +- UNSPEC_LASX_XVFRINTRZ_S +- UNSPEC_LASX_XVFRINTRZ_D +- UNSPEC_LASX_XVFRINTRP_S +- UNSPEC_LASX_XVFRINTRP_D +- UNSPEC_LASX_XVFRINTRM_S +- UNSPEC_LASX_XVFRINTRM_D + UNSPEC_LASX_XVREPLVE0_Q + UNSPEC_LASX_XVPERM_W + UNSPEC_LASX_XVPERMI_Q +@@ -206,9 +191,6 @@ + ;; Only used for copy256_{u,s}.w. + (define_mode_iterator LASX_W V8SI V8SF) + +-;; Only integer modes in LASX. +-(define_mode_iterator ILASX V4DI V8SI V16HI V32QI) +- + ;; As ILASX but excludes V32QI. + (define_mode_iterator ILASX_DWH V4DI V8SI V16HI) + +@@ -224,9 +206,6 @@ + ;; Only integer modes smaller than a word. + (define_mode_iterator ILASX_HB V16HI V32QI) + +-;; Only floating-point modes in LASX. +-(define_mode_iterator FLASX V4DF V8SF) +- + ;; Only used for immediate set shuffle elements instruction. + (define_mode_iterator LASX_WHB_W V8SI V16HI V32QI V8SF) + +@@ -500,37 +479,6 @@ + (V16HI "w") + (V32QI "w")) + +-(define_int_iterator FRINT256_S UNSPEC_LASX_XVFRINTRP_S +- UNSPEC_LASX_XVFRINTRZ_S +- UNSPEC_LASX_XVFRINT +- UNSPEC_LASX_XVFRINTRM_S) +- +-(define_int_iterator FRINT256_D UNSPEC_LASX_XVFRINTRP_D +- UNSPEC_LASX_XVFRINTRZ_D +- UNSPEC_LASX_XVFRINT +- UNSPEC_LASX_XVFRINTRM_D) +- +-(define_int_attr frint256_pattern_s +- (UNSPEC_LASX_XVFRINTRP_S "ceil") +- (UNSPEC_LASX_XVFRINTRZ_S "btrunc") +- (UNSPEC_LASX_XVFRINT "rint") +- (UNSPEC_LASX_XVFRINTRM_S "floor")) +- +-(define_int_attr frint256_pattern_d +- (UNSPEC_LASX_XVFRINTRP_D "ceil") +- (UNSPEC_LASX_XVFRINTRZ_D "btrunc") +- (UNSPEC_LASX_XVFRINT "rint") +- (UNSPEC_LASX_XVFRINTRM_D "floor")) +- +-(define_int_attr frint256_suffix +- (UNSPEC_LASX_XVFRINTRP_S "rp") +- (UNSPEC_LASX_XVFRINTRP_D "rp") +- (UNSPEC_LASX_XVFRINTRZ_S "rz") +- (UNSPEC_LASX_XVFRINTRZ_D "rz") +- (UNSPEC_LASX_XVFRINT "") +- (UNSPEC_LASX_XVFRINTRM_S "rm") +- (UNSPEC_LASX_XVFRINTRM_D "rm")) +- + (define_expand "vec_init<mode><unitmode>" + (match_operand:LASX 0 "register_operand") + (match_operand:LASX 1 "") +@@ -1688,15 +1636,6 @@ + (set_attr "type" "simd_fdiv") + (set_attr "mode" "<MODE>")) + +-(define_insn "lasx_xvfrint_<flasxfmt>" +- (set (match_operand:FLASX 0 "register_operand" "=f") +- (unspec:FLASX (match_operand:FLASX 1 "register_operand" "f") +- UNSPEC_LASX_XVFRINT)) +- "ISA_HAS_LASX" +- "xvfrint.<flasxfmt>\t%u0,%u1" +- (set_attr "type" "simd_fcvt") +- (set_attr "mode" "<MODE>")) +- + (define_insn "lasx_xvfrsqrt_<flasxfmt>" + (set (match_operand:FLASX 0 "register_operand" "=f") + (unspec:FLASX (match_operand:FLASX 1 "register_operand" "f") +@@ -1706,16 +1645,6 @@ + (set_attr "type" "simd_fdiv") + (set_attr "mode" "<MODE>")) + +-(define_insn "lasx_xvftint_s_<ilasxfmt>_<flasxfmt>" +- (set (match_operand:<VIMODE256> 0 "register_operand" "=f") +- (unspec:<VIMODE256> (match_operand:FLASX 1 "register_operand" "f") +- UNSPEC_LASX_XVFTINT_S)) +- "ISA_HAS_LASX" +- "xvftint.<ilasxfmt>.<flasxfmt>\t%u0,%u1" +- (set_attr "type" "simd_fcvt") +- (set_attr "cnv_mode" "<FINTCNV256_2>") +- (set_attr "mode" "<MODE>")) +- + (define_insn "lasx_xvftint_u_<ilasxfmt_u>_<flasxfmt>" + (set (match_operand:<VIMODE256> 0 "register_operand" "=f") + (unspec:<VIMODE256> (match_operand:FLASX 1 "register_operand" "f") +@@ -1726,18 +1655,6 @@ + (set_attr "cnv_mode" "<FINTCNV256_2>") + (set_attr "mode" "<MODE>")) + +- +- +-(define_insn "fix_trunc<FLASX:mode><mode256_i>2" +- (set (match_operand:<VIMODE256> 0 "register_operand" "=f") +- (fix:<VIMODE256> (match_operand:FLASX 1 "register_operand" "f"))) +- "ISA_HAS_LASX" +- "xvftintrz.<ilasxfmt>.<flasxfmt>\t%u0,%u1" +- (set_attr "type" "simd_fcvt") +- (set_attr "cnv_mode" "<FINTCNV256_2>") +- (set_attr "mode" "<MODE>")) +- +- + (define_insn "fixuns_trunc<FLASX:mode><mode256_i>2" + (set (match_operand:<VIMODE256> 0 "register_operand" "=f") + (unsigned_fix:<VIMODE256> (match_operand:FLASX 1 "register_operand" "f"))) +@@ -3245,60 +3162,6 @@ + (set_attr "type" "simd_fmadd") + (set_attr "mode" "<MODE>")) + +-(define_insn "lasx_xvftintrne_w_s" +- (set (match_operand:V8SI 0 "register_operand" "=f") +- (unspec:V8SI (match_operand:V8SF 1 "register_operand" "f") +- UNSPEC_LASX_XVFTINTRNE_W_S)) +- "ISA_HAS_LASX" +- "xvftintrne.w.s\t%u0,%u1" +- (set_attr "type" "simd_shift") +- (set_attr "mode" "V8SF")) +- +-(define_insn "lasx_xvftintrne_l_d" +- (set (match_operand:V4DI 0 "register_operand" "=f") +- (unspec:V4DI (match_operand:V4DF 1 "register_operand" "f") +- UNSPEC_LASX_XVFTINTRNE_L_D)) +- "ISA_HAS_LASX" +- "xvftintrne.l.d\t%u0,%u1" +- (set_attr "type" "simd_shift") +- (set_attr "mode" "V4DF")) +- +-(define_insn "lasx_xvftintrp_w_s" +- (set (match_operand:V8SI 0 "register_operand" "=f") +- (unspec:V8SI (match_operand:V8SF 1 "register_operand" "f") +- UNSPEC_LASX_XVFTINTRP_W_S)) +- "ISA_HAS_LASX" +- "xvftintrp.w.s\t%u0,%u1" +- (set_attr "type" "simd_shift") +- (set_attr "mode" "V8SF")) +- +-(define_insn "lasx_xvftintrp_l_d" +- (set (match_operand:V4DI 0 "register_operand" "=f") +- (unspec:V4DI (match_operand:V4DF 1 "register_operand" "f") +- UNSPEC_LASX_XVFTINTRP_L_D)) +- "ISA_HAS_LASX" +- "xvftintrp.l.d\t%u0,%u1" +- (set_attr "type" "simd_shift") +- (set_attr "mode" "V4DF")) +- +-(define_insn "lasx_xvftintrm_w_s" +- (set (match_operand:V8SI 0 "register_operand" "=f") +- (unspec:V8SI (match_operand:V8SF 1 "register_operand" "f") +- UNSPEC_LASX_XVFTINTRM_W_S)) +- "ISA_HAS_LASX" +- "xvftintrm.w.s\t%u0,%u1" +- (set_attr "type" "simd_shift") +- (set_attr "mode" "V8SF")) +- +-(define_insn "lasx_xvftintrm_l_d" +- (set (match_operand:V4DI 0 "register_operand" "=f") +- (unspec:V4DI (match_operand:V4DF 1 "register_operand" "f") +- UNSPEC_LASX_XVFTINTRM_L_D)) +- "ISA_HAS_LASX" +- "xvftintrm.l.d\t%u0,%u1" +- (set_attr "type" "simd_shift") +- (set_attr "mode" "V4DF")) +- + (define_insn "lasx_xvftint_w_d" + (set (match_operand:V8SI 0 "register_operand" "=f") + (unspec:V8SI (match_operand:V4DF 1 "register_operand" "f") +@@ -3467,108 +3330,6 @@ + (set_attr "type" "simd_shift") + (set_attr "mode" "V8SF")) + +-(define_insn "lasx_xvfrintrne_s" +- (set (match_operand:V8SF 0 "register_operand" "=f") +- (unspec:V8SF (match_operand:V8SF 1 "register_operand" "f") +- UNSPEC_LASX_XVFRINTRNE_S)) +- "ISA_HAS_LASX" +- "xvfrintrne.s\t%u0,%u1" +- (set_attr "type" "simd_shift") +- (set_attr "mode" "V8SF")) +- +-(define_insn "lasx_xvfrintrne_d" +- (set (match_operand:V4DF 0 "register_operand" "=f") +- (unspec:V4DF (match_operand:V4DF 1 "register_operand" "f") +- UNSPEC_LASX_XVFRINTRNE_D)) +- "ISA_HAS_LASX" +- "xvfrintrne.d\t%u0,%u1" +- (set_attr "type" "simd_shift") +- (set_attr "mode" "V4DF")) +- +-(define_insn "lasx_xvfrintrz_s" +- (set (match_operand:V8SF 0 "register_operand" "=f") +- (unspec:V8SF (match_operand:V8SF 1 "register_operand" "f") +- UNSPEC_LASX_XVFRINTRZ_S)) +- "ISA_HAS_LASX" +- "xvfrintrz.s\t%u0,%u1" +- (set_attr "type" "simd_shift") +- (set_attr "mode" "V8SF")) +- +-(define_insn "lasx_xvfrintrz_d" +- (set (match_operand:V4DF 0 "register_operand" "=f") +- (unspec:V4DF (match_operand:V4DF 1 "register_operand" "f") +- UNSPEC_LASX_XVFRINTRZ_D)) +- "ISA_HAS_LASX" +- "xvfrintrz.d\t%u0,%u1" +- (set_attr "type" "simd_shift") +- (set_attr "mode" "V4DF")) +- +-(define_insn "lasx_xvfrintrp_s" +- (set (match_operand:V8SF 0 "register_operand" "=f") +- (unspec:V8SF (match_operand:V8SF 1 "register_operand" "f") +- UNSPEC_LASX_XVFRINTRP_S)) +- "ISA_HAS_LASX" +- "xvfrintrp.s\t%u0,%u1" +- (set_attr "type" "simd_shift") +- (set_attr "mode" "V8SF")) +- +-(define_insn "lasx_xvfrintrp_d" +- (set (match_operand:V4DF 0 "register_operand" "=f") +- (unspec:V4DF (match_operand:V4DF 1 "register_operand" "f") +- UNSPEC_LASX_XVFRINTRP_D)) +- "ISA_HAS_LASX" +- "xvfrintrp.d\t%u0,%u1" +- (set_attr "type" "simd_shift") +- (set_attr "mode" "V4DF")) +- +-(define_insn "lasx_xvfrintrm_s" +- (set (match_operand:V8SF 0 "register_operand" "=f") +- (unspec:V8SF (match_operand:V8SF 1 "register_operand" "f") +- UNSPEC_LASX_XVFRINTRM_S)) +- "ISA_HAS_LASX" +- "xvfrintrm.s\t%u0,%u1" +- (set_attr "type" "simd_shift") +- (set_attr "mode" "V8SF")) +- +-(define_insn "lasx_xvfrintrm_d" +- (set (match_operand:V4DF 0 "register_operand" "=f") +- (unspec:V4DF (match_operand:V4DF 1 "register_operand" "f") +- UNSPEC_LASX_XVFRINTRM_D)) +- "ISA_HAS_LASX" +- "xvfrintrm.d\t%u0,%u1" +- (set_attr "type" "simd_shift") +- (set_attr "mode" "V4DF")) +- +-;; Vector versions of the floating-point frint patterns. +-;; Expands to btrunc, ceil, floor, rint. +-(define_insn "<FRINT256_S:frint256_pattern_s>v8sf2" +- (set (match_operand:V8SF 0 "register_operand" "=f") +- (unspec:V8SF (match_operand:V8SF 1 "register_operand" "f") +- FRINT256_S)) +- "ISA_HAS_LASX" +- "xvfrint<FRINT256_S:frint256_suffix>.s\t%u0,%u1" +- (set_attr "type" "simd_shift") +- (set_attr "mode" "V8SF")) +- +-(define_insn "<FRINT256_D:frint256_pattern_d>v4df2" +- (set (match_operand:V4DF 0 "register_operand" "=f") +- (unspec:V4DF (match_operand:V4DF 1 "register_operand" "f") +- FRINT256_D)) +- "ISA_HAS_LASX" +- "xvfrint<FRINT256_D:frint256_suffix>.d\t%u0,%u1" +- (set_attr "type" "simd_shift") +- (set_attr "mode" "V4DF")) +- +-;; Expands to round. +-(define_insn "round<mode>2" +- (set (match_operand:FLASX 0 "register_operand" "=f") +- (unspec:FLASX (match_operand:FLASX 1 "register_operand" "f") +- UNSPEC_LASX_XVFRINT)) +- "ISA_HAS_LASX" +- "xvfrint.<flasxfmt>\t%u0,%u1" +- (set_attr "type" "simd_shift") +- (set_attr "mode" "<MODE>")) +- + ;; Offset load and broadcast + (define_expand "lasx_xvldrepl_<lasxfmt_f>" + (match_operand:LASX 0 "register_operand") +diff --git a/gcc/config/loongarch/loongarch-builtins.cc b/gcc/config/loongarch/loongarch-builtins.cc +index 2d9743d86..fb458feac 100644 +--- a/gcc/config/loongarch/loongarch-builtins.cc ++++ b/gcc/config/loongarch/loongarch-builtins.cc +@@ -419,8 +419,6 @@ AVAIL_ALL (lasx, ISA_HAS_LASX) + #define CODE_FOR_lsx_vabsd_hu CODE_FOR_lsx_vabsd_u_hu + #define CODE_FOR_lsx_vabsd_wu CODE_FOR_lsx_vabsd_u_wu + #define CODE_FOR_lsx_vabsd_du CODE_FOR_lsx_vabsd_u_du +-#define CODE_FOR_lsx_vftint_w_s CODE_FOR_lsx_vftint_s_w_s +-#define CODE_FOR_lsx_vftint_l_d CODE_FOR_lsx_vftint_s_l_d + #define CODE_FOR_lsx_vftint_wu_s CODE_FOR_lsx_vftint_u_wu_s + #define CODE_FOR_lsx_vftint_lu_d CODE_FOR_lsx_vftint_u_lu_d + #define CODE_FOR_lsx_vandn_v CODE_FOR_vandnv16qi3 +@@ -725,8 +723,6 @@ AVAIL_ALL (lasx, ISA_HAS_LASX) + #define CODE_FOR_lasx_xvssrlrn_bu_h CODE_FOR_lasx_xvssrlrn_u_bu_h + #define CODE_FOR_lasx_xvssrlrn_hu_w CODE_FOR_lasx_xvssrlrn_u_hu_w + #define CODE_FOR_lasx_xvssrlrn_wu_d CODE_FOR_lasx_xvssrlrn_u_wu_d +-#define CODE_FOR_lasx_xvftint_w_s CODE_FOR_lasx_xvftint_s_w_s +-#define CODE_FOR_lasx_xvftint_l_d CODE_FOR_lasx_xvftint_s_l_d + #define CODE_FOR_lasx_xvftint_wu_s CODE_FOR_lasx_xvftint_u_wu_s + #define CODE_FOR_lasx_xvftint_lu_d CODE_FOR_lasx_xvftint_u_lu_d + #define CODE_FOR_lasx_xvsllwil_h_b CODE_FOR_lasx_xvsllwil_s_h_b +diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md +index c4e7af107..d1c766cbf 100644 +--- a/gcc/config/loongarch/loongarch.md ++++ b/gcc/config/loongarch/loongarch.md +@@ -4026,11 +4026,8 @@ + (include "generic.md") + (include "la464.md") + +-; The LoongArch SX Instructions. +-(include "lsx.md") +- +-; The LoongArch ASX Instructions. +-(include "lasx.md") ++; The LoongArch SIMD Instructions. ++(include "simd.md") + + (define_c_enum "unspec" + UNSPEC_ADDRESS_FIRST +diff --git a/gcc/config/loongarch/lsx.md b/gcc/config/loongarch/lsx.md +index 5e8d8d74b..c1c3719e3 100644 +--- a/gcc/config/loongarch/lsx.md ++++ b/gcc/config/loongarch/lsx.md +@@ -55,7 +55,6 @@ + UNSPEC_LSX_VFCMP_SULT + UNSPEC_LSX_VFCMP_SUN + UNSPEC_LSX_VFCMP_SUNE +- UNSPEC_LSX_VFTINT_S + UNSPEC_LSX_VFTINT_U + UNSPEC_LSX_VSAT_S + UNSPEC_LSX_VSAT_U +@@ -89,9 +88,6 @@ + UNSPEC_LSX_VEXTRINS + UNSPEC_LSX_VMSKLTZ + UNSPEC_LSX_VSIGNCOV +- UNSPEC_LSX_VFTINTRNE +- UNSPEC_LSX_VFTINTRP +- UNSPEC_LSX_VFTINTRM + UNSPEC_LSX_VFTINT_W_D + UNSPEC_LSX_VFFINT_S_L + UNSPEC_LSX_VFTINTRZ_W_D +@@ -110,14 +106,6 @@ + UNSPEC_LSX_VFTINTRNEL_L_S + UNSPEC_LSX_VFTINTRNEH_L_S + UNSPEC_LSX_VFTINTH_L_H +- UNSPEC_LSX_VFRINTRNE_S +- UNSPEC_LSX_VFRINTRNE_D +- UNSPEC_LSX_VFRINTRZ_S +- UNSPEC_LSX_VFRINTRZ_D +- UNSPEC_LSX_VFRINTRP_S +- UNSPEC_LSX_VFRINTRP_D +- UNSPEC_LSX_VFRINTRM_S +- UNSPEC_LSX_VFRINTRM_D + UNSPEC_LSX_VSSRARN_S + UNSPEC_LSX_VSSRARN_U + UNSPEC_LSX_VSSRLN_U +@@ -221,9 +209,6 @@ + ;; Only used for copy_{u,s}.w and vilvh. + (define_mode_iterator LSX_W V4SI V4SF) + +-;; Only integer modes. +-(define_mode_iterator ILSX V2DI V4SI V8HI V16QI) +- + ;; As ILSX but excludes V16QI. + (define_mode_iterator ILSX_DWH V2DI V4SI V8HI) + +@@ -242,21 +227,9 @@ + ;;;; Only integer modes for fixed-point madd_q/maddr_q. + ;;(define_mode_iterator ILSX_WH V4SI V8HI) + +-;; Only floating-point modes. +-(define_mode_iterator FLSX V2DF V4SF) +- + ;; Only used for immediate set shuffle elements instruction. + (define_mode_iterator LSX_WHB_W V4SI V8HI V16QI V4SF) + +-;; The attribute gives the integer vector mode with same size. +-(define_mode_attr VIMODE +- (V2DF "V2DI") +- (V4SF "V4SI") +- (V2DI "V2DI") +- (V4SI "V4SI") +- (V8HI "V8HI") +- (V16QI "V16QI")) +- + ;; The attribute gives half modes for vector modes. + (define_mode_attr VHMODE + (V8HI "V16QI") +@@ -400,38 +373,6 @@ + (V4SI "uimm5") + (V2DI "uimm6")) + +- +-(define_int_iterator FRINT_S UNSPEC_LSX_VFRINTRP_S +- UNSPEC_LSX_VFRINTRZ_S +- UNSPEC_LSX_VFRINT +- UNSPEC_LSX_VFRINTRM_S) +- +-(define_int_iterator FRINT_D UNSPEC_LSX_VFRINTRP_D +- UNSPEC_LSX_VFRINTRZ_D +- UNSPEC_LSX_VFRINT +- UNSPEC_LSX_VFRINTRM_D) +- +-(define_int_attr frint_pattern_s +- (UNSPEC_LSX_VFRINTRP_S "ceil") +- (UNSPEC_LSX_VFRINTRZ_S "btrunc") +- (UNSPEC_LSX_VFRINT "rint") +- (UNSPEC_LSX_VFRINTRM_S "floor")) +- +-(define_int_attr frint_pattern_d +- (UNSPEC_LSX_VFRINTRP_D "ceil") +- (UNSPEC_LSX_VFRINTRZ_D "btrunc") +- (UNSPEC_LSX_VFRINT "rint") +- (UNSPEC_LSX_VFRINTRM_D "floor")) +- +-(define_int_attr frint_suffix +- (UNSPEC_LSX_VFRINTRP_S "rp") +- (UNSPEC_LSX_VFRINTRP_D "rp") +- (UNSPEC_LSX_VFRINTRZ_S "rz") +- (UNSPEC_LSX_VFRINTRZ_D "rz") +- (UNSPEC_LSX_VFRINT "") +- (UNSPEC_LSX_VFRINTRM_S "rm") +- (UNSPEC_LSX_VFRINTRM_D "rm")) +- + (define_expand "vec_init<mode><unitmode>" + (match_operand:LSX 0 "register_operand") + (match_operand:LSX 1 "") +@@ -1616,15 +1557,6 @@ + (set_attr "type" "simd_fdiv") + (set_attr "mode" "<MODE>")) + +-(define_insn "lsx_vfrint_<flsxfmt>" +- (set (match_operand:FLSX 0 "register_operand" "=f") +- (unspec:FLSX (match_operand:FLSX 1 "register_operand" "f") +- UNSPEC_LSX_VFRINT)) +- "ISA_HAS_LSX" +- "vfrint.<flsxfmt>\t%w0,%w1" +- (set_attr "type" "simd_fcvt") +- (set_attr "mode" "<MODE>")) +- + (define_insn "lsx_vfrsqrt_<flsxfmt>" + (set (match_operand:FLSX 0 "register_operand" "=f") + (unspec:FLSX (match_operand:FLSX 1 "register_operand" "f") +@@ -1634,16 +1566,6 @@ + (set_attr "type" "simd_fdiv") + (set_attr "mode" "<MODE>")) + +-(define_insn "lsx_vftint_s_<ilsxfmt>_<flsxfmt>" +- (set (match_operand:<VIMODE> 0 "register_operand" "=f") +- (unspec:<VIMODE> (match_operand:FLSX 1 "register_operand" "f") +- UNSPEC_LSX_VFTINT_S)) +- "ISA_HAS_LSX" +- "vftint.<ilsxfmt>.<flsxfmt>\t%w0,%w1" +- (set_attr "type" "simd_fcvt") +- (set_attr "cnv_mode" "<FINTCNV_2>") +- (set_attr "mode" "<MODE>")) +- + (define_insn "lsx_vftint_u_<ilsxfmt_u>_<flsxfmt>" + (set (match_operand:<VIMODE> 0 "register_operand" "=f") + (unspec:<VIMODE> (match_operand:FLSX 1 "register_operand" "f") +@@ -1654,15 +1576,6 @@ + (set_attr "cnv_mode" "<FINTCNV_2>") + (set_attr "mode" "<MODE>")) + +-(define_insn "fix_trunc<FLSX:mode><mode_i>2" +- (set (match_operand:<VIMODE> 0 "register_operand" "=f") +- (fix:<VIMODE> (match_operand:FLSX 1 "register_operand" "f"))) +- "ISA_HAS_LSX" +- "vftintrz.<ilsxfmt>.<flsxfmt>\t%w0,%w1" +- (set_attr "type" "simd_fcvt") +- (set_attr "cnv_mode" "<FINTCNV_2>") +- (set_attr "mode" "<MODE>")) +- + (define_insn "fixuns_trunc<FLSX:mode><mode_i>2" + (set (match_operand:<VIMODE> 0 "register_operand" "=f") + (unsigned_fix:<VIMODE> (match_operand:FLSX 1 "register_operand" "f"))) +@@ -2965,60 +2878,6 @@ + (set_attr "type" "simd_fmadd") + (set_attr "mode" "<MODE>")) + +-(define_insn "lsx_vftintrne_w_s" +- (set (match_operand:V4SI 0 "register_operand" "=f") +- (unspec:V4SI (match_operand:V4SF 1 "register_operand" "f") +- UNSPEC_LSX_VFTINTRNE)) +- "ISA_HAS_LSX" +- "vftintrne.w.s\t%w0,%w1" +- (set_attr "type" "simd_shift") +- (set_attr "mode" "V4SF")) +- +-(define_insn "lsx_vftintrne_l_d" +- (set (match_operand:V2DI 0 "register_operand" "=f") +- (unspec:V2DI (match_operand:V2DF 1 "register_operand" "f") +- UNSPEC_LSX_VFTINTRNE)) +- "ISA_HAS_LSX" +- "vftintrne.l.d\t%w0,%w1" +- (set_attr "type" "simd_shift") +- (set_attr "mode" "V2DF")) +- +-(define_insn "lsx_vftintrp_w_s" +- (set (match_operand:V4SI 0 "register_operand" "=f") +- (unspec:V4SI (match_operand:V4SF 1 "register_operand" "f") +- UNSPEC_LSX_VFTINTRP)) +- "ISA_HAS_LSX" +- "vftintrp.w.s\t%w0,%w1" +- (set_attr "type" "simd_shift") +- (set_attr "mode" "V4SF")) +- +-(define_insn "lsx_vftintrp_l_d" +- (set (match_operand:V2DI 0 "register_operand" "=f") +- (unspec:V2DI (match_operand:V2DF 1 "register_operand" "f") +- UNSPEC_LSX_VFTINTRP)) +- "ISA_HAS_LSX" +- "vftintrp.l.d\t%w0,%w1" +- (set_attr "type" "simd_shift") +- (set_attr "mode" "V2DF")) +- +-(define_insn "lsx_vftintrm_w_s" +- (set (match_operand:V4SI 0 "register_operand" "=f") +- (unspec:V4SI (match_operand:V4SF 1 "register_operand" "f") +- UNSPEC_LSX_VFTINTRM)) +- "ISA_HAS_LSX" +- "vftintrm.w.s\t%w0,%w1" +- (set_attr "type" "simd_shift") +- (set_attr "mode" "V4SF")) +- +-(define_insn "lsx_vftintrm_l_d" +- (set (match_operand:V2DI 0 "register_operand" "=f") +- (unspec:V2DI (match_operand:V2DF 1 "register_operand" "f") +- UNSPEC_LSX_VFTINTRM)) +- "ISA_HAS_LSX" +- "vftintrm.l.d\t%w0,%w1" +- (set_attr "type" "simd_shift") +- (set_attr "mode" "V2DF")) +- + (define_insn "lsx_vftint_w_d" + (set (match_operand:V4SI 0 "register_operand" "=f") + (unspec:V4SI (match_operand:V2DF 1 "register_operand" "f") +@@ -3187,108 +3046,6 @@ + (set_attr "type" "simd_shift") + (set_attr "mode" "V4SF")) + +-(define_insn "lsx_vfrintrne_s" +- (set (match_operand:V4SF 0 "register_operand" "=f") +- (unspec:V4SF (match_operand:V4SF 1 "register_operand" "f") +- UNSPEC_LSX_VFRINTRNE_S)) +- "ISA_HAS_LSX" +- "vfrintrne.s\t%w0,%w1" +- (set_attr "type" "simd_shift") +- (set_attr "mode" "V4SF")) +- +-(define_insn "lsx_vfrintrne_d" +- (set (match_operand:V2DF 0 "register_operand" "=f") +- (unspec:V2DF (match_operand:V2DF 1 "register_operand" "f") +- UNSPEC_LSX_VFRINTRNE_D)) +- "ISA_HAS_LSX" +- "vfrintrne.d\t%w0,%w1" +- (set_attr "type" "simd_shift") +- (set_attr "mode" "V2DF")) +- +-(define_insn "lsx_vfrintrz_s" +- (set (match_operand:V4SF 0 "register_operand" "=f") +- (unspec:V4SF (match_operand:V4SF 1 "register_operand" "f") +- UNSPEC_LSX_VFRINTRZ_S)) +- "ISA_HAS_LSX" +- "vfrintrz.s\t%w0,%w1" +- (set_attr "type" "simd_shift") +- (set_attr "mode" "V4SF")) +- +-(define_insn "lsx_vfrintrz_d" +- (set (match_operand:V2DF 0 "register_operand" "=f") +- (unspec:V2DF (match_operand:V2DF 1 "register_operand" "f") +- UNSPEC_LSX_VFRINTRZ_D)) +- "ISA_HAS_LSX" +- "vfrintrz.d\t%w0,%w1" +- (set_attr "type" "simd_shift") +- (set_attr "mode" "V2DF")) +- +-(define_insn "lsx_vfrintrp_s" +- (set (match_operand:V4SF 0 "register_operand" "=f") +- (unspec:V4SF (match_operand:V4SF 1 "register_operand" "f") +- UNSPEC_LSX_VFRINTRP_S)) +- "ISA_HAS_LSX" +- "vfrintrp.s\t%w0,%w1" +- (set_attr "type" "simd_shift") +- (set_attr "mode" "V4SF")) +- +-(define_insn "lsx_vfrintrp_d" +- (set (match_operand:V2DF 0 "register_operand" "=f") +- (unspec:V2DF (match_operand:V2DF 1 "register_operand" "f") +- UNSPEC_LSX_VFRINTRP_D)) +- "ISA_HAS_LSX" +- "vfrintrp.d\t%w0,%w1" +- (set_attr "type" "simd_shift") +- (set_attr "mode" "V2DF")) +- +-(define_insn "lsx_vfrintrm_s" +- (set (match_operand:V4SF 0 "register_operand" "=f") +- (unspec:V4SF (match_operand:V4SF 1 "register_operand" "f") +- UNSPEC_LSX_VFRINTRM_S)) +- "ISA_HAS_LSX" +- "vfrintrm.s\t%w0,%w1" +- (set_attr "type" "simd_shift") +- (set_attr "mode" "V4SF")) +- +-(define_insn "lsx_vfrintrm_d" +- (set (match_operand:V2DF 0 "register_operand" "=f") +- (unspec:V2DF (match_operand:V2DF 1 "register_operand" "f") +- UNSPEC_LSX_VFRINTRM_D)) +- "ISA_HAS_LSX" +- "vfrintrm.d\t%w0,%w1" +- (set_attr "type" "simd_shift") +- (set_attr "mode" "V2DF")) +- +-;; Vector versions of the floating-point frint patterns. +-;; Expands to btrunc, ceil, floor, rint. +-(define_insn "<FRINT_S:frint_pattern_s>v4sf2" +- (set (match_operand:V4SF 0 "register_operand" "=f") +- (unspec:V4SF (match_operand:V4SF 1 "register_operand" "f") +- FRINT_S)) +- "ISA_HAS_LSX" +- "vfrint<FRINT_S:frint_suffix>.s\t%w0,%w1" +- (set_attr "type" "simd_shift") +- (set_attr "mode" "V4SF")) +- +-(define_insn "<FRINT_D:frint_pattern_d>v2df2" +- (set (match_operand:V2DF 0 "register_operand" "=f") +- (unspec:V2DF (match_operand:V2DF 1 "register_operand" "f") +- FRINT_D)) +- "ISA_HAS_LSX" +- "vfrint<FRINT_D:frint_suffix>.d\t%w0,%w1" +- (set_attr "type" "simd_shift") +- (set_attr "mode" "V2DF")) +- +-;; Expands to round. +-(define_insn "round<mode>2" +- (set (match_operand:FLSX 0 "register_operand" "=f") +- (unspec:FLSX (match_operand:FLSX 1 "register_operand" "f") +- UNSPEC_LSX_VFRINT)) +- "ISA_HAS_LSX" +- "vfrint.<flsxfrint>\t%w0,%w1" +- (set_attr "type" "simd_shift") +- (set_attr "mode" "<MODE>")) +- + ;; Offset load and broadcast + (define_expand "lsx_vldrepl_<lsxfmt_f>" + (match_operand:LSX 0 "register_operand") +diff --git a/gcc/config/loongarch/simd.md b/gcc/config/loongarch/simd.md +new file mode 100644 +index 000000000..27d1ffecd +--- /dev/null ++++ b/gcc/config/loongarch/simd.md +@@ -0,0 +1,213 @@ ++;; Machine Description for LoongArch SIMD instructions for GNU compiler. ++;; Copyright (C) 2023 Free Software Foundation, Inc. ++ ++;; This file is part of GCC. ++ ++;; GCC is free software; you can redistribute it and/or modify ++;; it under the terms of the GNU General Public License as published by ++;; the Free Software Foundation; either version 3, or (at your option) ++;; any later version. ++ ++;; GCC is distributed in the hope that it will be useful, ++;; but WITHOUT ANY WARRANTY; without even the implied warranty of ++;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++;; GNU General Public License for more details. ++ ++;; You should have received a copy of the GNU General Public License ++;; along with GCC; see the file COPYING3. If not see ++;; <http://www.gnu.org/licenses/>. ++ ++;; Integer modes supported by LSX. ++(define_mode_iterator ILSX V2DI V4SI V8HI V16QI) ++ ++;; Integer modes supported by LASX. ++(define_mode_iterator ILASX V4DI V8SI V16HI V32QI) ++ ++;; FP modes supported by LSX ++(define_mode_iterator FLSX V2DF V4SF) ++ ++;; FP modes supported by LASX ++(define_mode_iterator FLASX V4DF V8SF) ++ ++;; All integer modes available ++(define_mode_iterator IVEC (ILSX "ISA_HAS_LSX") (ILASX "ISA_HAS_LASX")) ++ ++;; All FP modes available ++(define_mode_iterator FVEC (FLSX "ISA_HAS_LSX") (FLASX "ISA_HAS_LASX")) ++ ++;; Mnemonic prefix, "x" for LASX modes. ++(define_mode_attr x (V2DI "") (V4SI "") (V8HI "") (V16QI "") ++ (V2DF "") (V4SF "") ++ (V4DI "x") (V8SI "x") (V16HI "x") (V32QI "x") ++ (V4DF "x") (V8SF "x")) ++ ++;; Modifier for vector register, "w" for LSX modes, "u" for LASX modes. ++(define_mode_attr wu (V2DI "w") (V4SI "w") (V8HI "w") (V16QI "w") ++ (V2DF "w") (V4SF "w") ++ (V4DI "u") (V8SI "u") (V16HI "u") (V32QI "u") ++ (V4DF "u") (V8SF "u")) ++ ++;; define_insn name prefix, "lsx" or "lasx" ++(define_mode_attr simd_isa ++ (V2DI "lsx") (V4SI "lsx") (V8HI "lsx") (V16QI "lsx") ++ (V2DF "lsx") (V4SF "lsx") ++ (V4DI "lasx") (V8SI "lasx") (V16HI "lasx") (V32QI "lasx") ++ (V4DF "lasx") (V8SF "lasx")) ++ ++;; Widen integer modes for intermediate values in RTX pattern. ++(define_mode_attr WVEC (V2DI "V2TI") (V4DI "V4TI") ++ (V4SI "V4DI") (V8SI "V8DI") ++ (V8HI "V8SI") (V16HI "V16SI") ++ (V16QI "V16HI") (V32QI "V32HI")) ++ ++;; Integer vector modes with the same length and unit size as a mode. ++(define_mode_attr VIMODE (V2DI "V2DI") (V4SI "V4SI") ++ (V8HI "V8HI") (V16QI "V16QI") ++ (V2DF "V2DI") (V4SF "V4SI") ++ (V4DI "V4DI") (V8SI "V8SI") ++ (V16HI "V16HI") (V32QI "V32QI") ++ (V4DF "V4DI") (V8SF "V8SI")) ++ ++;; Lower-case version. ++(define_mode_attr vimode (V2DF "v2di") (V4SF "v4si") ++ (V4DF "v4di") (V8SF "v8si")) ++ ++;; Suffix for LSX or LASX instructions. ++(define_mode_attr simdfmt (V2DF "d") (V4DF "d") ++ (V4SF "s") (V8SF "s") ++ (V2DI "d") (V4DI "d") ++ (V4SI "w") (V8SI "w") ++ (V8HI "h") (V16HI "h") ++ (V16QI "b") (V32QI "b")) ++ ++;; Suffix for integer mode in LSX or LASX instructions with FP input but ++;; integer output. ++(define_mode_attr simdifmt_for_f (V2DF "l") (V4DF "l") ++ (V4SF "w") (V8SF "w")) ++ ++;; Size of vector elements in bits. ++(define_mode_attr elmbits (V2DI "64") (V4DI "64") ++ (V4SI "32") (V8SI "32") ++ (V8HI "16") (V16HI "16") ++ (V16QI "8") (V32QI "8")) ++ ++;; ======================================================================= ++;; For many LASX instructions, the only difference of it from the LSX ++;; counterpart is the length of vector operands. Describe these LSX/LASX ++;; instruction here so we can avoid duplicating logics. ++;; ======================================================================= ++ ++;; ++;; FP vector rounding instructions ++;; ++ ++(define_c_enum "unspec" ++ UNSPEC_SIMD_FRINTRP ++ UNSPEC_SIMD_FRINTRZ ++ UNSPEC_SIMD_FRINT ++ UNSPEC_SIMD_FRINTRM ++ UNSPEC_SIMD_FRINTRNE) ++ ++(define_int_iterator SIMD_FRINT ++ UNSPEC_SIMD_FRINTRP ++ UNSPEC_SIMD_FRINTRZ ++ UNSPEC_SIMD_FRINT ++ UNSPEC_SIMD_FRINTRM ++ UNSPEC_SIMD_FRINTRNE) ++ ++(define_int_attr simd_frint_rounding ++ (UNSPEC_SIMD_FRINTRP "rp") ++ (UNSPEC_SIMD_FRINTRZ "rz") ++ (UNSPEC_SIMD_FRINT "") ++ (UNSPEC_SIMD_FRINTRM "rm") ++ (UNSPEC_SIMD_FRINTRNE "rne")) ++ ++;; All these, but rint, are controlled by -ffp-int-builtin-inexact. ++;; Note: nearbyint is NOT allowed to raise FE_INEXACT even if ++;; -ffp-int-builtin-inexact, but rint is ALLOWED to raise it even if ++;; -fno-fp-int-builtin-inexact. ++(define_int_attr simd_frint_pattern ++ (UNSPEC_SIMD_FRINTRP "ceil") ++ (UNSPEC_SIMD_FRINTRZ "btrunc") ++ (UNSPEC_SIMD_FRINT "rint") ++ (UNSPEC_SIMD_FRINTRNE "roundeven") ++ (UNSPEC_SIMD_FRINTRM "floor")) ++ ++;; <x>vfrint.{/rp/rz/rm} ++(define_insn "<simd_isa>_<x>vfrint<simd_frint_rounding>_<simdfmt>" ++ (set (match_operand:FVEC 0 "register_operand" "=f") ++ (unspec:FVEC (match_operand:FVEC 1 "register_operand" "f") ++ SIMD_FRINT)) ++ "" ++ "<x>vfrint<simd_frint_rounding>.<simdfmt>\t%<wu>0,%<wu>1" ++ (set_attr "type" "simd_fcvt") ++ (set_attr "mode" "<MODE>")) ++ ++;; Expand the standard-named patterns to <x>vfrint instructions if ++;; raising inexact exception is allowed. ++ ++(define_expand "<simd_frint_pattern><mode>2" ++ (set (match_operand:FVEC 0 "register_operand" "=f") ++ (unspec:FVEC (match_operand:FVEC 1 "register_operand" "f") ++ SIMD_FRINT)) ++ "<SIMD_FRINT> == UNSPEC_SIMD_FRINT || ++ flag_fp_int_builtin_inexact || ++ !flag_trapping_math") ++ ++;; ftrunc is like btrunc, but it's allowed to raise inexact exception ++;; even if -fno-fp-int-builtin-inexact. ++(define_expand "ftrunc<mode>2" ++ (set (match_operand:FVEC 0 "register_operand" "=f") ++ (unspec:FVEC (match_operand:FVEC 1 "register_operand" "f") ++ UNSPEC_SIMD_FRINTRZ)) ++ "") ++ ++;; <x>vftint.{/rp/rz/rm} ++(define_insn ++ "<simd_isa>_<x>vftint<simd_frint_rounding>_<simdifmt_for_f>_<simdfmt>" ++ (set (match_operand:<VIMODE> 0 "register_operand" "=f") ++ (fix:<VIMODE> ++ (unspec:FVEC (match_operand:FVEC 1 "register_operand" "f") ++ SIMD_FRINT))) ++ "" ++ "<x>vftint<simd_frint_rounding>.<simdifmt_for_f>.<simdfmt>\t%<wu>0,%<wu>1" ++ (set_attr "type" "simd_fcvt") ++ (set_attr "mode" "<MODE>")) ++ ++;; Expand the standard-named patterns to <x>vftint instructions if ++;; raising inexact exception. ++ ++(define_expand "l<simd_frint_pattern><mode><vimode>2" ++ (set (match_operand:<VIMODE> 0 "register_operand" "=f") ++ (fix:<VIMODE> ++ (unspec:FVEC (match_operand:FVEC 1 "register_operand" "f") ++ SIMD_FRINT))) ++ "<SIMD_FRINT> == UNSPEC_SIMD_FRINT || ++ flag_fp_int_builtin_inexact || ++ !flag_trapping_math") ++ ++;; fix_trunc is allowed to raise inexact exception even if ++;; -fno-fp-int-builtin-inexact. Because the middle end trys to match ++;; (FIX x) and it does not know (FIX (UNSPEC_SIMD_FRINTRZ x)), we need ++;; to use define_insn_and_split instead of define_expand (expanders are ++;; not considered during matching). ++(define_insn_and_split "fix_trunc<mode><vimode>2" ++ (set (match_operand:<VIMODE> 0 "register_operand" "=f") ++ (fix:<VIMODE> (match_operand:FVEC 1 "register_operand" "f"))) ++ "" ++ "#" ++ "" ++ (const_int 0) ++ { ++ emit_insn (gen_<simd_isa>_<x>vftintrz_<simdifmt_for_f>_<simdfmt> ( ++ operands0, operands1)); ++ DONE; ++ } ++ (set_attr "type" "simd_fcvt") ++ (set_attr "mode" "<MODE>")) ++ ++; The LoongArch SX Instructions. ++(include "lsx.md") ++ ++; The LoongArch ASX Instructions. ++(include "lasx.md") +diff --git a/gcc/testsuite/gcc.target/loongarch/vect-frint-no-inexact.c b/gcc/testsuite/gcc.target/loongarch/vect-frint-no-inexact.c +new file mode 100644 +index 000000000..7bbaf1fba +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/vect-frint-no-inexact.c +@@ -0,0 +1,48 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -mabi=lp64d -mdouble-float -fno-math-errno -fno-fp-int-builtin-inexact -mlasx" } */ ++ ++#include "vect-frint.c" ++ ++/* ceil */ ++/* { dg-final { scan-assembler "bl\t%plt\\(ceil\\)" } } */ ++/* { dg-final { scan-assembler "bl\t%plt\\(ceilf\\)" } } */ ++/* { dg-final { scan-assembler-not "\tvfrintrp\.s" } } */ ++/* { dg-final { scan-assembler-not "\tvfrintrp\.d" } } */ ++/* { dg-final { scan-assembler-not "\txvfrintrp\.s" } } */ ++/* { dg-final { scan-assembler-not "\txvfrintrp\.d" } } */ ++ ++/* floor */ ++/* { dg-final { scan-assembler "bl\t%plt\\(floor\\)" } } */ ++/* { dg-final { scan-assembler "bl\t%plt\\(floorf\\)" } } */ ++/* { dg-final { scan-assembler-not "\tvfrintrm\.s" } } */ ++/* { dg-final { scan-assembler-not "\tvfrintrm\.d" } } */ ++/* { dg-final { scan-assembler-not "\txvfrintrm\.s" } } */ ++/* { dg-final { scan-assembler-not "\txvfrintrm\.d" } } */ ++ ++/* nearbyint + rint: Only rint is allowed */ ++/* { dg-final { scan-assembler "bl\t%plt\\(nearbyint\\)" } } */ ++/* { dg-final { scan-assembler "bl\t%plt\\(nearbyintf\\)" } } */ ++/* { dg-final { scan-assembler-times "\tvfrint\.s" 1 } } */ ++/* { dg-final { scan-assembler-times "\tvfrint\.d" 1 } } */ ++/* { dg-final { scan-assembler-times "\txvfrint\.s" 1 } } */ ++/* { dg-final { scan-assembler-times "\txvfrint\.d" 1 } } */ ++ ++/* round: we don't have a corresponding instruction */ ++/* { dg-final { scan-assembler "bl\t%plt\\(round\\)" } } */ ++/* { dg-final { scan-assembler "bl\t%plt\\(roundf\\)" } } */ ++ ++/* roundeven */ ++/* { dg-final { scan-assembler "bl\t%plt\\(roundeven\\)" } } */ ++/* { dg-final { scan-assembler "bl\t%plt\\(roundevenf\\)" } } */ ++/* { dg-final { scan-assembler-not "\tvfrintrne\.s" } } */ ++/* { dg-final { scan-assembler-not "\tvfrintrne\.d" } } */ ++/* { dg-final { scan-assembler-not "\txvfrintrne\.s" } } */ ++/* { dg-final { scan-assembler-not "\txvfrintrne\.d" } } */ ++ ++/* trunc */ ++/* { dg-final { scan-assembler "bl\t%plt\\(trunc\\)" } } */ ++/* { dg-final { scan-assembler "bl\t%plt\\(truncf\\)" } } */ ++/* { dg-final { scan-assembler-not "\tvfrintrz\.s" } } */ ++/* { dg-final { scan-assembler-not "\tvfrintrz\.d" } } */ ++/* { dg-final { scan-assembler-not "\txvfrintrz\.s" } } */ ++/* { dg-final { scan-assembler-not "\txvfrintrz\.d" } } */ +diff --git a/gcc/testsuite/gcc.target/loongarch/vect-frint.c b/gcc/testsuite/gcc.target/loongarch/vect-frint.c +new file mode 100644 +index 000000000..6bf211e7e +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/vect-frint.c +@@ -0,0 +1,85 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -mabi=lp64d -mdouble-float -fno-math-errno -ffp-int-builtin-inexact -mlasx" } */ ++ ++float out_x8; ++double out_y4; ++ ++float x8; ++double y4; ++ ++#define TEST(op, N, func) \ ++void \ ++test_##op##_##N##_##func () \ ++{ \ ++ for (int i = 0; i < N; i++) \ ++ out_##opi = __builtin_##func (opi); \ ++} ++ ++TEST(x, 4, ceilf); ++TEST(x, 4, floorf); ++TEST(x, 4, nearbyintf); ++TEST(x, 4, rintf); ++TEST(x, 4, roundf); ++TEST(x, 4, roundevenf); ++TEST(x, 4, truncf); ++ ++TEST(x, 8, ceilf); ++TEST(x, 8, floorf); ++TEST(x, 8, nearbyintf); ++TEST(x, 8, rintf); ++TEST(x, 8, roundf); ++TEST(x, 8, roundevenf); ++TEST(x, 8, truncf); ++ ++TEST(y, 2, ceil); ++TEST(y, 2, floor); ++TEST(y, 2, nearbyint); ++TEST(y, 2, rint); ++TEST(y, 2, round); ++TEST(y, 2, roundeven); ++TEST(y, 2, trunc); ++ ++TEST(y, 4, ceil); ++TEST(y, 4, floor); ++TEST(y, 4, nearbyint); ++TEST(y, 4, rint); ++TEST(y, 4, round); ++TEST(y, 4, roundeven); ++TEST(y, 4, trunc); ++ ++/* ceil */ ++/* { dg-final { scan-assembler "\tvfrintrp\.s" } } */ ++/* { dg-final { scan-assembler "\tvfrintrp\.d" } } */ ++/* { dg-final { scan-assembler "\txvfrintrp\.s" } } */ ++/* { dg-final { scan-assembler "\txvfrintrp\.d" } } */ ++ ++/* floor */ ++/* { dg-final { scan-assembler "\tvfrintrm\.s" } } */ ++/* { dg-final { scan-assembler "\tvfrintrm\.d" } } */ ++/* { dg-final { scan-assembler "\txvfrintrm\.s" } } */ ++/* { dg-final { scan-assembler "\txvfrintrm\.d" } } */ ++ ++/* rint and nearbyint ++ nearbyint has been disallowed to raise FE_INEXACT for decades. */ ++/* { dg-final { scan-assembler-times "\tvfrint\.s" 1 } } */ ++/* { dg-final { scan-assembler-times "\tvfrint\.d" 1 } } */ ++/* { dg-final { scan-assembler-times "\txvfrint\.s" 1 } } */ ++/* { dg-final { scan-assembler-times "\txvfrint\.d" 1 } } */ ++/* { dg-final { scan-assembler "bl\t%plt\\(nearbyint\\)" } } */ ++/* { dg-final { scan-assembler "bl\t%plt\\(nearbyintf\\)" } } */ ++ ++/* round: we don't have a corresponding instruction */ ++/* { dg-final { scan-assembler "bl\t%plt\\(round\\)" } } */ ++/* { dg-final { scan-assembler "bl\t%plt\\(roundf\\)" } } */ ++ ++/* roundeven */ ++/* { dg-final { scan-assembler "\tvfrintrne\.s" } } */ ++/* { dg-final { scan-assembler "\tvfrintrne\.d" } } */ ++/* { dg-final { scan-assembler "\txvfrintrne\.s" } } */ ++/* { dg-final { scan-assembler "\txvfrintrne\.d" } } */ ++ ++/* trunc */ ++/* { dg-final { scan-assembler "\tvfrintrz\.s" } } */ ++/* { dg-final { scan-assembler "\tvfrintrz\.d" } } */ ++/* { dg-final { scan-assembler "\txvfrintrz\.s" } } */ ++/* { dg-final { scan-assembler "\txvfrintrz\.d" } } */ +diff --git a/gcc/testsuite/gcc.target/loongarch/vect-ftint-no-inexact.c b/gcc/testsuite/gcc.target/loongarch/vect-ftint-no-inexact.c +new file mode 100644 +index 000000000..83d268099 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/vect-ftint-no-inexact.c +@@ -0,0 +1,44 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -mabi=lp64d -mdouble-float -fno-math-errno -fno-fp-int-builtin-inexact -mlasx" } */ ++ ++#include "vect-ftint.c" ++ ++/* ceil */ ++/* { dg-final { scan-assembler "bl\t%plt\\(ceil\\)" } } */ ++/* { dg-final { scan-assembler "bl\t%plt\\(ceilf\\)" } } */ ++/* { dg-final { scan-assembler-not "\tvftintrp\.w\.s" } } */ ++/* { dg-final { scan-assembler-not "\tvftintrp\.l\.d" } } */ ++/* { dg-final { scan-assembler-not "\txvftintrp\.w\.s" } } */ ++/* { dg-final { scan-assembler-not "\txvftintrp\.l\.d" } } */ ++ ++/* floor */ ++/* { dg-final { scan-assembler "bl\t%plt\\(floor\\)" } } */ ++/* { dg-final { scan-assembler "bl\t%plt\\(floorf\\)" } } */ ++/* { dg-final { scan-assembler-not "\tvftintrm\.w\.s" } } */ ++/* { dg-final { scan-assembler-not "\tvftintrm\.l\.d" } } */ ++/* { dg-final { scan-assembler-not "\txvftintrm\.w\.s" } } */ ++/* { dg-final { scan-assembler-not "\txvftintrm\.l\.d" } } */ ++ ++/* nearbyint + rint */ ++/* { dg-final { scan-assembler "bl\t%plt\\(floor\\)" } } */ ++/* { dg-final { scan-assembler "bl\t%plt\\(floorf\\)" } } */ ++/* { dg-final { scan-assembler-times "\tvftint\.w\.s" 1 } } */ ++/* { dg-final { scan-assembler-times "\tvftint\.l\.d" 1 } } */ ++/* { dg-final { scan-assembler-times "\txvftint\.w\.s" 1 } } */ ++/* { dg-final { scan-assembler-times "\txvftint\.l\.d" 1 } } */ ++ ++/* round: we don't have a corresponding instruction */ ++/* { dg-final { scan-assembler "bl\t%plt\\(lround\\)" } } */ ++/* { dg-final { scan-assembler "bl\t%plt\\(roundf\\)" } } */ ++ ++/* roundeven */ ++/* { dg-final { scan-assembler "bl\t%plt\\(roundeven\\)" } } */ ++/* { dg-final { scan-assembler "bl\t%plt\\(roundevenf\\)" } } */ ++/* { dg-final { scan-assembler-not "\tvftintrne\.w\.s" } } */ ++/* { dg-final { scan-assembler-not "\tvftintrne\.l\.d" } } */ ++/* { dg-final { scan-assembler-not "\txvftintrne\.w\.s" } } */ ++/* { dg-final { scan-assembler-not "\txvftintrne\.l\.d" } } */ ++ ++/* trunc: XFAIL due to PR 107723 */ ++/* { dg-final { scan-assembler "bl\t%plt\\(trunc\\)" { xfail *-*-* } } } */ ++/* { dg-final { scan-assembler "bl\t%plt\\(truncf\\)" } } */ +diff --git a/gcc/testsuite/gcc.target/loongarch/vect-ftint.c b/gcc/testsuite/gcc.target/loongarch/vect-ftint.c +new file mode 100644 +index 000000000..c4962ed17 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/vect-ftint.c +@@ -0,0 +1,83 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -mabi=lp64d -mdouble-float -fno-math-errno -ffp-int-builtin-inexact -mlasx" } */ ++ ++int out_x8; ++long out_y4; ++ ++float x8; ++double y4; ++ ++#define TEST(op, N, func) \ ++void \ ++test_##op##_##N##_##func () \ ++{ \ ++ for (int i = 0; i < N; i++) \ ++ out_##opi = __builtin_##func (opi); \ ++} ++ ++TEST(x, 4, ceilf); ++TEST(x, 4, floorf); ++TEST(x, 4, nearbyintf); ++TEST(x, 4, rintf); ++TEST(x, 4, roundf); ++TEST(x, 4, roundevenf); ++TEST(x, 4, truncf); ++ ++TEST(x, 8, ceilf); ++TEST(x, 8, floorf); ++TEST(x, 8, nearbyintf); ++TEST(x, 8, rintf); ++TEST(x, 8, roundf); ++TEST(x, 8, roundevenf); ++TEST(x, 8, truncf); ++ ++TEST(y, 2, ceil); ++TEST(y, 2, floor); ++TEST(y, 2, nearbyint); ++TEST(y, 2, rint); ++TEST(y, 2, round); ++TEST(y, 2, roundeven); ++TEST(y, 2, trunc); ++ ++TEST(y, 4, ceil); ++TEST(y, 4, floor); ++TEST(y, 4, nearbyint); ++TEST(y, 4, rint); ++TEST(y, 4, round); ++TEST(y, 4, roundeven); ++TEST(y, 4, trunc); ++ ++/* ceil */ ++/* { dg-final { scan-assembler "\tvftintrp\.w\.s" } } */ ++/* { dg-final { scan-assembler "\tvftintrp\.l\.d" } } */ ++/* { dg-final { scan-assembler "\txvftintrp\.w\.s" } } */ ++/* { dg-final { scan-assembler "\txvftintrp\.l\.d" } } */ ++ ++/* floor */ ++/* { dg-final { scan-assembler "\tvftintrm\.w\.s" } } */ ++/* { dg-final { scan-assembler "\tvftintrm\.l\.d" } } */ ++/* { dg-final { scan-assembler "\txvftintrm\.w\.s" } } */ ++/* { dg-final { scan-assembler "\txvftintrm\.l\.d" } } */ ++ ++/* rint and nearbyint ++ nearbyint has been disallowed to raise FE_INEXACT for decades. */ ++/* { dg-final { scan-assembler-times "\tvftint\.w\.s" 1 } } */ ++/* { dg-final { scan-assembler-times "\tvftint\.l\.d" 1 } } */ ++/* { dg-final { scan-assembler-times "\txvftint\.w\.s" 1 } } */ ++/* { dg-final { scan-assembler-times "\txvftint\.l\.d" 1 } } */ ++/* { dg-final { scan-assembler "bl\t%plt\\(nearbyint\\)" } } */ ++/* { dg-final { scan-assembler "bl\t%plt\\(nearbyintf\\)" } } */ ++ ++/* round: we don't have a corresponding instruction */ ++/* { dg-final { scan-assembler "bl\t%plt\\(lround\\)" } } */ ++/* { dg-final { scan-assembler "bl\t%plt\\(roundf\\)" } } */ ++ ++/* roundeven */ ++/* { dg-final { scan-assembler "\tvftintrne\.w\.s" } } */ ++/* { dg-final { scan-assembler "\tvftintrne\.l\.d" } } */ ++/* { dg-final { scan-assembler "\txvftintrne\.w\.s" } } */ ++/* { dg-final { scan-assembler "\txvftintrne\.l\.d" } } */ ++ ++/* trunc */ ++/* { dg-final { scan-assembler-not "bl\t%plt\\(trunc\\)" } } */ ++/* { dg-final { scan-assembler-not "bl\t%plt\\(truncf\\)" } } */ +-- +2.43.0 +
View file
_service:tar_scm:0047-LoongArch-Use-standard-pattern-name-and-RTX-code-for.patch
Added
@@ -0,0 +1,268 @@ +From 4c13256ea34b4169ceb3f9c7826843b754c6a6e0 Mon Sep 17 00:00:00 2001 +From: Xi Ruoyao <xry111@xry111.site> +Date: Sun, 19 Nov 2023 16:28:59 +0800 +Subject: PATCH 047/188 LoongArch: Use standard pattern name and RTX code for + LSX/LASX muh instructions + +Removes unnecessary UNSPECs and make the muh instructions useful with +GNU vectors or auto vectorization. + +gcc/ChangeLog: + + * config/loongarch/simd.md (muh): New code attribute mapping + any_extend to smul_highpart or umul_highpart. + (<su>mul<mode>3_highpart): New define_insn. + * config/loongarch/lsx.md (UNSPEC_LSX_VMUH_S): Remove. + (UNSPEC_LSX_VMUH_U): Remove. + (lsx_vmuh_s_<lsxfmt>): Remove. + (lsx_vmuh_u_<lsxfmt>): Remove. + * config/loongarch/lasx.md (UNSPEC_LASX_XVMUH_S): Remove. + (UNSPEC_LASX_XVMUH_U): Remove. + (lasx_xvmuh_s_<lasxfmt>): Remove. + (lasx_xvmuh_u_<lasxfmt>): Remove. + * config/loongarch/loongarch-builtins.cc (CODE_FOR_lsx_vmuh_b): + Redefine to standard pattern name. + (CODE_FOR_lsx_vmuh_h): Likewise. + (CODE_FOR_lsx_vmuh_w): Likewise. + (CODE_FOR_lsx_vmuh_d): Likewise. + (CODE_FOR_lsx_vmuh_bu): Likewise. + (CODE_FOR_lsx_vmuh_hu): Likewise. + (CODE_FOR_lsx_vmuh_wu): Likewise. + (CODE_FOR_lsx_vmuh_du): Likewise. + (CODE_FOR_lasx_xvmuh_b): Likewise. + (CODE_FOR_lasx_xvmuh_h): Likewise. + (CODE_FOR_lasx_xvmuh_w): Likewise. + (CODE_FOR_lasx_xvmuh_d): Likewise. + (CODE_FOR_lasx_xvmuh_bu): Likewise. + (CODE_FOR_lasx_xvmuh_hu): Likewise. + (CODE_FOR_lasx_xvmuh_wu): Likewise. + (CODE_FOR_lasx_xvmuh_du): Likewise. + +gcc/testsuite/ChangeLog: + + * gcc.target/loongarch/vect-muh.c: New test. +--- + gcc/config/loongarch/lasx.md | 22 ------------ + gcc/config/loongarch/loongarch-builtins.cc | 32 ++++++++--------- + gcc/config/loongarch/lsx.md | 22 ------------ + gcc/config/loongarch/simd.md | 16 +++++++++ + gcc/testsuite/gcc.target/loongarch/vect-muh.c | 36 +++++++++++++++++++ + 5 files changed, 68 insertions(+), 60 deletions(-) + create mode 100644 gcc/testsuite/gcc.target/loongarch/vect-muh.c + +diff --git a/gcc/config/loongarch/lasx.md b/gcc/config/loongarch/lasx.md +index d4a56c307..023a023b4 100644 +--- a/gcc/config/loongarch/lasx.md ++++ b/gcc/config/loongarch/lasx.md +@@ -68,8 +68,6 @@ + UNSPEC_LASX_BRANCH + UNSPEC_LASX_BRANCH_V + +- UNSPEC_LASX_XVMUH_S +- UNSPEC_LASX_XVMUH_U + UNSPEC_LASX_MXVEXTW_U + UNSPEC_LASX_XVSLLWIL_S + UNSPEC_LASX_XVSLLWIL_U +@@ -2823,26 +2821,6 @@ + (set_attr "type" "simd_logic") + (set_attr "mode" "<MODE>")) + +-(define_insn "lasx_xvmuh_s_<lasxfmt>" +- (set (match_operand:ILASX 0 "register_operand" "=f") +- (unspec:ILASX (match_operand:ILASX 1 "register_operand" "f") +- (match_operand:ILASX 2 "register_operand" "f") +- UNSPEC_LASX_XVMUH_S)) +- "ISA_HAS_LASX" +- "xvmuh.<lasxfmt>\t%u0,%u1,%u2" +- (set_attr "type" "simd_int_arith") +- (set_attr "mode" "<MODE>")) +- +-(define_insn "lasx_xvmuh_u_<lasxfmt_u>" +- (set (match_operand:ILASX 0 "register_operand" "=f") +- (unspec:ILASX (match_operand:ILASX 1 "register_operand" "f") +- (match_operand:ILASX 2 "register_operand" "f") +- UNSPEC_LASX_XVMUH_U)) +- "ISA_HAS_LASX" +- "xvmuh.<lasxfmt_u>\t%u0,%u1,%u2" +- (set_attr "type" "simd_int_arith") +- (set_attr "mode" "<MODE>")) +- + (define_insn "lasx_xvsllwil_s_<dlasxfmt>_<lasxfmt>" + (set (match_operand:<VDMODE256> 0 "register_operand" "=f") + (unspec:<VDMODE256> (match_operand:ILASX_WHB 1 "register_operand" "f") +diff --git a/gcc/config/loongarch/loongarch-builtins.cc b/gcc/config/loongarch/loongarch-builtins.cc +index fb458feac..41ea357cf 100644 +--- a/gcc/config/loongarch/loongarch-builtins.cc ++++ b/gcc/config/loongarch/loongarch-builtins.cc +@@ -319,6 +319,14 @@ AVAIL_ALL (lasx, ISA_HAS_LASX) + #define CODE_FOR_lsx_vmod_hu CODE_FOR_umodv8hi3 + #define CODE_FOR_lsx_vmod_wu CODE_FOR_umodv4si3 + #define CODE_FOR_lsx_vmod_du CODE_FOR_umodv2di3 ++#define CODE_FOR_lsx_vmuh_b CODE_FOR_smulv16qi3_highpart ++#define CODE_FOR_lsx_vmuh_h CODE_FOR_smulv8hi3_highpart ++#define CODE_FOR_lsx_vmuh_w CODE_FOR_smulv4si3_highpart ++#define CODE_FOR_lsx_vmuh_d CODE_FOR_smulv2di3_highpart ++#define CODE_FOR_lsx_vmuh_bu CODE_FOR_umulv16qi3_highpart ++#define CODE_FOR_lsx_vmuh_hu CODE_FOR_umulv8hi3_highpart ++#define CODE_FOR_lsx_vmuh_wu CODE_FOR_umulv4si3_highpart ++#define CODE_FOR_lsx_vmuh_du CODE_FOR_umulv2di3_highpart + #define CODE_FOR_lsx_vmul_b CODE_FOR_mulv16qi3 + #define CODE_FOR_lsx_vmul_h CODE_FOR_mulv8hi3 + #define CODE_FOR_lsx_vmul_w CODE_FOR_mulv4si3 +@@ -439,14 +447,6 @@ AVAIL_ALL (lasx, ISA_HAS_LASX) + #define CODE_FOR_lsx_vfnmsub_s CODE_FOR_vfnmsubv4sf4_nmsub4 + #define CODE_FOR_lsx_vfnmsub_d CODE_FOR_vfnmsubv2df4_nmsub4 + +-#define CODE_FOR_lsx_vmuh_b CODE_FOR_lsx_vmuh_s_b +-#define CODE_FOR_lsx_vmuh_h CODE_FOR_lsx_vmuh_s_h +-#define CODE_FOR_lsx_vmuh_w CODE_FOR_lsx_vmuh_s_w +-#define CODE_FOR_lsx_vmuh_d CODE_FOR_lsx_vmuh_s_d +-#define CODE_FOR_lsx_vmuh_bu CODE_FOR_lsx_vmuh_u_bu +-#define CODE_FOR_lsx_vmuh_hu CODE_FOR_lsx_vmuh_u_hu +-#define CODE_FOR_lsx_vmuh_wu CODE_FOR_lsx_vmuh_u_wu +-#define CODE_FOR_lsx_vmuh_du CODE_FOR_lsx_vmuh_u_du + #define CODE_FOR_lsx_vsllwil_h_b CODE_FOR_lsx_vsllwil_s_h_b + #define CODE_FOR_lsx_vsllwil_w_h CODE_FOR_lsx_vsllwil_s_w_h + #define CODE_FOR_lsx_vsllwil_d_w CODE_FOR_lsx_vsllwil_s_d_w +@@ -588,6 +588,14 @@ AVAIL_ALL (lasx, ISA_HAS_LASX) + #define CODE_FOR_lasx_xvmul_h CODE_FOR_mulv16hi3 + #define CODE_FOR_lasx_xvmul_w CODE_FOR_mulv8si3 + #define CODE_FOR_lasx_xvmul_d CODE_FOR_mulv4di3 ++#define CODE_FOR_lasx_xvmuh_b CODE_FOR_smulv32qi3_highpart ++#define CODE_FOR_lasx_xvmuh_h CODE_FOR_smulv16hi3_highpart ++#define CODE_FOR_lasx_xvmuh_w CODE_FOR_smulv8si3_highpart ++#define CODE_FOR_lasx_xvmuh_d CODE_FOR_smulv4di3_highpart ++#define CODE_FOR_lasx_xvmuh_bu CODE_FOR_umulv32qi3_highpart ++#define CODE_FOR_lasx_xvmuh_hu CODE_FOR_umulv16hi3_highpart ++#define CODE_FOR_lasx_xvmuh_wu CODE_FOR_umulv8si3_highpart ++#define CODE_FOR_lasx_xvmuh_du CODE_FOR_umulv4di3_highpart + #define CODE_FOR_lasx_xvclz_b CODE_FOR_clzv32qi2 + #define CODE_FOR_lasx_xvclz_h CODE_FOR_clzv16hi2 + #define CODE_FOR_lasx_xvclz_w CODE_FOR_clzv8si2 +@@ -697,14 +705,6 @@ AVAIL_ALL (lasx, ISA_HAS_LASX) + #define CODE_FOR_lasx_xvavgr_hu CODE_FOR_lasx_xvavgr_u_hu + #define CODE_FOR_lasx_xvavgr_wu CODE_FOR_lasx_xvavgr_u_wu + #define CODE_FOR_lasx_xvavgr_du CODE_FOR_lasx_xvavgr_u_du +-#define CODE_FOR_lasx_xvmuh_b CODE_FOR_lasx_xvmuh_s_b +-#define CODE_FOR_lasx_xvmuh_h CODE_FOR_lasx_xvmuh_s_h +-#define CODE_FOR_lasx_xvmuh_w CODE_FOR_lasx_xvmuh_s_w +-#define CODE_FOR_lasx_xvmuh_d CODE_FOR_lasx_xvmuh_s_d +-#define CODE_FOR_lasx_xvmuh_bu CODE_FOR_lasx_xvmuh_u_bu +-#define CODE_FOR_lasx_xvmuh_hu CODE_FOR_lasx_xvmuh_u_hu +-#define CODE_FOR_lasx_xvmuh_wu CODE_FOR_lasx_xvmuh_u_wu +-#define CODE_FOR_lasx_xvmuh_du CODE_FOR_lasx_xvmuh_u_du + #define CODE_FOR_lasx_xvssran_b_h CODE_FOR_lasx_xvssran_s_b_h + #define CODE_FOR_lasx_xvssran_h_w CODE_FOR_lasx_xvssran_s_h_w + #define CODE_FOR_lasx_xvssran_w_d CODE_FOR_lasx_xvssran_s_w_d +diff --git a/gcc/config/loongarch/lsx.md b/gcc/config/loongarch/lsx.md +index c1c3719e3..537afaf96 100644 +--- a/gcc/config/loongarch/lsx.md ++++ b/gcc/config/loongarch/lsx.md +@@ -64,8 +64,6 @@ + UNSPEC_LSX_VSRLR + UNSPEC_LSX_VSRLRI + UNSPEC_LSX_VSHUF +- UNSPEC_LSX_VMUH_S +- UNSPEC_LSX_VMUH_U + UNSPEC_LSX_VEXTW_S + UNSPEC_LSX_VEXTW_U + UNSPEC_LSX_VSLLWIL_S +@@ -2506,26 +2504,6 @@ + (set_attr "type" "simd_logic") + (set_attr "mode" "<MODE>")) + +-(define_insn "lsx_vmuh_s_<lsxfmt>" +- (set (match_operand:ILSX 0 "register_operand" "=f") +- (unspec:ILSX (match_operand:ILSX 1 "register_operand" "f") +- (match_operand:ILSX 2 "register_operand" "f") +- UNSPEC_LSX_VMUH_S)) +- "ISA_HAS_LSX" +- "vmuh.<lsxfmt>\t%w0,%w1,%w2" +- (set_attr "type" "simd_int_arith") +- (set_attr "mode" "<MODE>")) +- +-(define_insn "lsx_vmuh_u_<lsxfmt_u>" +- (set (match_operand:ILSX 0 "register_operand" "=f") +- (unspec:ILSX (match_operand:ILSX 1 "register_operand" "f") +- (match_operand:ILSX 2 "register_operand" "f") +- UNSPEC_LSX_VMUH_U)) +- "ISA_HAS_LSX" +- "vmuh.<lsxfmt_u>\t%w0,%w1,%w2" +- (set_attr "type" "simd_int_arith") +- (set_attr "mode" "<MODE>")) +- + (define_insn "lsx_vextw_s_d" + (set (match_operand:V2DI 0 "register_operand" "=f") + (unspec:V2DI (match_operand:V4SI 1 "register_operand" "f") +diff --git a/gcc/config/loongarch/simd.md b/gcc/config/loongarch/simd.md +index 27d1ffecd..a0e8db3c0 100644 +--- a/gcc/config/loongarch/simd.md ++++ b/gcc/config/loongarch/simd.md +@@ -206,6 +206,22 @@ + (set_attr "type" "simd_fcvt") + (set_attr "mode" "<MODE>")) + ++;; <x>vmuh.{b/h/w/d} ++ ++(define_code_attr muh ++ (sign_extend "smul_highpart") ++ (zero_extend "umul_highpart")) ++ ++(define_insn "<su>mul<mode>3_highpart" ++ (set (match_operand:IVEC 0 "register_operand" "=f") ++ (<muh>:IVEC (match_operand:IVEC 1 "register_operand" "f") ++ (match_operand:IVEC 2 "register_operand" "f"))) ++ (any_extend (const_int 0)) ++ "" ++ "<x>vmuh.<simdfmt><u>\t%<wu>0,%<wu>1,%<wu>2" ++ (set_attr "type" "simd_int_arith") ++ (set_attr "mode" "<MODE>")) ++ + ; The LoongArch SX Instructions. + (include "lsx.md") + +diff --git a/gcc/testsuite/gcc.target/loongarch/vect-muh.c b/gcc/testsuite/gcc.target/loongarch/vect-muh.c +new file mode 100644 +index 000000000..a788840b2 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/vect-muh.c +@@ -0,0 +1,36 @@ ++/* { dg-do compile } */ ++/* { dg-options "-mlasx -O3" } */ ++/* { dg-final { scan-assembler "\tvmuh\.w\t" } } */ ++/* { dg-final { scan-assembler "\tvmuh\.wu\t" } } */ ++/* { dg-final { scan-assembler "\txvmuh\.w\t" } } */ ++/* { dg-final { scan-assembler "\txvmuh\.wu\t" } } */ ++ ++int a8, b8, c8; ++ ++void ++test1 (void) ++{ ++ for (int i = 0; i < 4; i++) ++ ci = ((long)ai * (long)bi) >> 32; ++} ++ ++void ++test2 (void) ++{ ++ for (int i = 0; i < 4; i++) ++ ci = ((long)(unsigned)ai * (long)(unsigned)bi) >> 32; ++} ++ ++void ++test3 (void) ++{ ++ for (int i = 0; i < 8; i++) ++ ci = ((long)ai * (long)bi) >> 32; ++} ++ ++void ++test4 (void) ++{ ++ for (int i = 0; i < 8; i++) ++ ci = ((long)(unsigned)ai * (long)(unsigned)bi) >> 32; ++} +-- +2.43.0 +
View file
_service:tar_scm:0048-LoongArch-Use-standard-pattern-name-and-RTX-code-for.patch
Added
@@ -0,0 +1,285 @@ +From 9dde2178e64893e4c46b1c375a658f8ab6d34fdd Mon Sep 17 00:00:00 2001 +From: Xi Ruoyao <xry111@xry111.site> +Date: Sun, 19 Nov 2023 17:28:06 +0800 +Subject: PATCH 048/188 LoongArch: Use standard pattern name and RTX code for + LSX/LASX rotate shift + +Remove unnecessary UNSPECs and make the xvrotri instructions useful +with GNU vectors and auto vectorization. + +gcc/ChangeLog: + + * config/loongarch/lsx.md (bitimm): Move to ... + (UNSPEC_LSX_VROTR): Remove. + (lsx_vrotr_<lsxfmt>): Remove. + (lsx_vrotri_<lsxfmt>): Remove. + * config/loongarch/lasx.md (UNSPEC_LASX_XVROTR): Remove. + (lsx_vrotr_<lsxfmt>): Remove. + (lsx_vrotri_<lsxfmt>): Remove. + * config/loongarch/simd.md (bitimm): ... here. Expand it to + cover LASX modes. + (vrotr<mode>3): New define_insn. + (vrotri<mode>3): New define_insn. + * config/loongarch/loongarch-builtins.cc: + (CODE_FOR_lsx_vrotr_b): Use standard pattern name. + (CODE_FOR_lsx_vrotr_h): Likewise. + (CODE_FOR_lsx_vrotr_w): Likewise. + (CODE_FOR_lsx_vrotr_d): Likewise. + (CODE_FOR_lasx_xvrotr_b): Likewise. + (CODE_FOR_lasx_xvrotr_h): Likewise. + (CODE_FOR_lasx_xvrotr_w): Likewise. + (CODE_FOR_lasx_xvrotr_d): Likewise. + (CODE_FOR_lsx_vrotri_b): Define to standard pattern name. + (CODE_FOR_lsx_vrotri_h): Likewise. + (CODE_FOR_lsx_vrotri_w): Likewise. + (CODE_FOR_lsx_vrotri_d): Likewise. + (CODE_FOR_lasx_xvrotri_b): Likewise. + (CODE_FOR_lasx_xvrotri_h): Likewise. + (CODE_FOR_lasx_xvrotri_w): Likewise. + (CODE_FOR_lasx_xvrotri_d): Likewise. + +gcc/testsuite/ChangeLog: + + * gcc.target/loongarch/vect-rotr.c: New test. +--- + gcc/config/loongarch/lasx.md | 22 ------------ + gcc/config/loongarch/loongarch-builtins.cc | 16 +++++++++ + gcc/config/loongarch/lsx.md | 28 --------------- + gcc/config/loongarch/simd.md | 29 +++++++++++++++ + .../gcc.target/loongarch/vect-rotr.c | 36 +++++++++++++++++++ + 5 files changed, 81 insertions(+), 50 deletions(-) + create mode 100644 gcc/testsuite/gcc.target/loongarch/vect-rotr.c + +diff --git a/gcc/config/loongarch/lasx.md b/gcc/config/loongarch/lasx.md +index 023a023b4..116b30c07 100644 +--- a/gcc/config/loongarch/lasx.md ++++ b/gcc/config/loongarch/lasx.md +@@ -138,7 +138,6 @@ + UNSPEC_LASX_XVHSUBW_Q_D + UNSPEC_LASX_XVHADDW_QU_DU + UNSPEC_LASX_XVHSUBW_QU_DU +- UNSPEC_LASX_XVROTR + UNSPEC_LASX_XVADD_Q + UNSPEC_LASX_XVSUB_Q + UNSPEC_LASX_XVREPLVE +@@ -4232,18 +4231,6 @@ + (set_attr "type" "simd_int_arith") + (set_attr "mode" "V4DI")) + +-;;XVROTR.B XVROTR.H XVROTR.W XVROTR.D +-;;TODO-478 +-(define_insn "lasx_xvrotr_<lasxfmt>" +- (set (match_operand:ILASX 0 "register_operand" "=f") +- (unspec:ILASX (match_operand:ILASX 1 "register_operand" "f") +- (match_operand:ILASX 2 "register_operand" "f") +- UNSPEC_LASX_XVROTR)) +- "ISA_HAS_LASX" +- "xvrotr.<lasxfmt>\t%u0,%u1,%u2" +- (set_attr "type" "simd_int_arith") +- (set_attr "mode" "<MODE>")) +- + ;;XVADD.Q + ;;TODO2 + (define_insn "lasx_xvadd_q" +@@ -4426,15 +4413,6 @@ + (set_attr "type" "simd_fcvt") + (set_attr "mode" "V4DI")) + +-(define_insn "lasx_xvrotri_<lasxfmt>" +- (set (match_operand:ILASX 0 "register_operand" "=f") +- (rotatert:ILASX (match_operand:ILASX 1 "register_operand" "f") +- (match_operand 2 "const_<bitimm256>_operand" ""))) +- "ISA_HAS_LASX" +- "xvrotri.<lasxfmt>\t%u0,%u1,%2" +- (set_attr "type" "simd_shf") +- (set_attr "mode" "<MODE>")) +- + (define_insn "lasx_xvextl_q_d" + (set (match_operand:V4DI 0 "register_operand" "=f") + (unspec:V4DI (match_operand:V4DI 1 "register_operand" "f") +diff --git a/gcc/config/loongarch/loongarch-builtins.cc b/gcc/config/loongarch/loongarch-builtins.cc +index 41ea357cf..f4523c8bf 100644 +--- a/gcc/config/loongarch/loongarch-builtins.cc ++++ b/gcc/config/loongarch/loongarch-builtins.cc +@@ -369,6 +369,14 @@ AVAIL_ALL (lasx, ISA_HAS_LASX) + #define CODE_FOR_lsx_vsrli_h CODE_FOR_vlshrv8hi3 + #define CODE_FOR_lsx_vsrli_w CODE_FOR_vlshrv4si3 + #define CODE_FOR_lsx_vsrli_d CODE_FOR_vlshrv2di3 ++#define CODE_FOR_lsx_vrotr_b CODE_FOR_vrotrv16qi3 ++#define CODE_FOR_lsx_vrotr_h CODE_FOR_vrotrv8hi3 ++#define CODE_FOR_lsx_vrotr_w CODE_FOR_vrotrv4si3 ++#define CODE_FOR_lsx_vrotr_d CODE_FOR_vrotrv2di3 ++#define CODE_FOR_lsx_vrotri_b CODE_FOR_rotrv16qi3 ++#define CODE_FOR_lsx_vrotri_h CODE_FOR_rotrv8hi3 ++#define CODE_FOR_lsx_vrotri_w CODE_FOR_rotrv4si3 ++#define CODE_FOR_lsx_vrotri_d CODE_FOR_rotrv2di3 + #define CODE_FOR_lsx_vsub_b CODE_FOR_subv16qi3 + #define CODE_FOR_lsx_vsub_h CODE_FOR_subv8hi3 + #define CODE_FOR_lsx_vsub_w CODE_FOR_subv4si3 +@@ -634,6 +642,14 @@ AVAIL_ALL (lasx, ISA_HAS_LASX) + #define CODE_FOR_lasx_xvsrli_h CODE_FOR_vlshrv16hi3 + #define CODE_FOR_lasx_xvsrli_w CODE_FOR_vlshrv8si3 + #define CODE_FOR_lasx_xvsrli_d CODE_FOR_vlshrv4di3 ++#define CODE_FOR_lasx_xvrotr_b CODE_FOR_vrotrv32qi3 ++#define CODE_FOR_lasx_xvrotr_h CODE_FOR_vrotrv16hi3 ++#define CODE_FOR_lasx_xvrotr_w CODE_FOR_vrotrv8si3 ++#define CODE_FOR_lasx_xvrotr_d CODE_FOR_vrotrv4di3 ++#define CODE_FOR_lasx_xvrotri_b CODE_FOR_rotrv32qi3 ++#define CODE_FOR_lasx_xvrotri_h CODE_FOR_rotrv16hi3 ++#define CODE_FOR_lasx_xvrotri_w CODE_FOR_rotrv8si3 ++#define CODE_FOR_lasx_xvrotri_d CODE_FOR_rotrv4di3 + #define CODE_FOR_lasx_xvsub_b CODE_FOR_subv32qi3 + #define CODE_FOR_lasx_xvsub_h CODE_FOR_subv16hi3 + #define CODE_FOR_lasx_xvsub_w CODE_FOR_subv8si3 +diff --git a/gcc/config/loongarch/lsx.md b/gcc/config/loongarch/lsx.md +index 537afaf96..232399934 100644 +--- a/gcc/config/loongarch/lsx.md ++++ b/gcc/config/loongarch/lsx.md +@@ -141,7 +141,6 @@ + UNSPEC_LSX_VMADDWOD + UNSPEC_LSX_VMADDWOD2 + UNSPEC_LSX_VMADDWOD3 +- UNSPEC_LSX_VROTR + UNSPEC_LSX_VADD_Q + UNSPEC_LSX_VSUB_Q + UNSPEC_LSX_VEXTH_Q_D +@@ -363,14 +362,6 @@ + (V8HI "exp_8") + (V16QI "exp_16")) + +-;; This attribute is used to form an immediate operand constraint using +-;; "const_<bitimm>_operand". +-(define_mode_attr bitimm +- (V16QI "uimm3") +- (V8HI "uimm4") +- (V4SI "uimm5") +- (V2DI "uimm6")) +- + (define_expand "vec_init<mode><unitmode>" + (match_operand:LSX 0 "register_operand") + (match_operand:LSX 1 "") +@@ -4152,16 +4143,6 @@ + (set_attr "type" "simd_int_arith") + (set_attr "mode" "V2DI")) + +-(define_insn "lsx_vrotr_<lsxfmt>" +- (set (match_operand:ILSX 0 "register_operand" "=f") +- (unspec:ILSX (match_operand:ILSX 1 "register_operand" "f") +- (match_operand:ILSX 2 "register_operand" "f") +- UNSPEC_LSX_VROTR)) +- "ISA_HAS_LSX" +- "vrotr.<lsxfmt>\t%w0,%w1,%w2" +- (set_attr "type" "simd_int_arith") +- (set_attr "mode" "<MODE>")) +- + (define_insn "lsx_vadd_q" + (set (match_operand:V2DI 0 "register_operand" "=f") + (unspec:V2DI (match_operand:V2DI 1 "register_operand" "f") +@@ -4255,15 +4236,6 @@ + (set_attr "type" "simd_fcvt") + (set_attr "mode" "V2DI")) + +-(define_insn "lsx_vrotri_<lsxfmt>" +- (set (match_operand:ILSX 0 "register_operand" "=f") +- (rotatert:ILSX (match_operand:ILSX 1 "register_operand" "f") +- (match_operand 2 "const_<bitimm>_operand" ""))) +- "ISA_HAS_LSX" +- "vrotri.<lsxfmt>\t%w0,%w1,%2" +- (set_attr "type" "simd_shf") +- (set_attr "mode" "<MODE>")) +- + (define_insn "lsx_vextl_q_d" + (set (match_operand:V2DI 0 "register_operand" "=f") + (unspec:V2DI (match_operand:V2DI 1 "register_operand" "f") +diff --git a/gcc/config/loongarch/simd.md b/gcc/config/loongarch/simd.md +index a0e8db3c0..4ecf7a55e 100644 +--- a/gcc/config/loongarch/simd.md ++++ b/gcc/config/loongarch/simd.md +@@ -91,6 +91,13 @@ + (V8HI "16") (V16HI "16") + (V16QI "8") (V32QI "8")) + ++;; This attribute is used to form an immediate operand constraint using ++;; "const_<bitimm>_operand". ++(define_mode_attr bitimm (V16QI "uimm3") (V32QI "uimm3") ++ (V8HI "uimm4") (V16HI "uimm4") ++ (V4SI "uimm5") (V8SI "uimm5") ++ (V2DI "uimm6") (V4DI "uimm6")) ++ + ;; ======================================================================= + ;; For many LASX instructions, the only difference of it from the LSX + ;; counterpart is the length of vector operands. Describe these LSX/LASX +@@ -222,6 +229,28 @@ + (set_attr "type" "simd_int_arith") + (set_attr "mode" "<MODE>")) + ++;; <x>vrotr.{b/h/w/d} ++ ++(define_insn "vrotr<mode>3" ++ (set (match_operand:IVEC 0 "register_operand" "=f") ++ (rotatert:IVEC (match_operand:IVEC 1 "register_operand" "f") ++ (match_operand:IVEC 2 "register_operand" "f"))) ++ "" ++ "<x>vrotr.<simdfmt>\t%<wu>0,%<wu>1,%<wu>2" ++ (set_attr "type" "simd_int_arith") ++ (set_attr "mode" "<MODE>")) ++ ++;; <x>vrotri.{b/h/w/d} ++ ++(define_insn "rotr<mode>3" ++ (set (match_operand:IVEC 0 "register_operand" "=f") ++ (rotatert:IVEC (match_operand:IVEC 1 "register_operand" "f") ++ (match_operand:SI 2 "const_<bitimm>_operand"))) ++ "" ++ "<x>vrotri.<simdfmt>\t%<wu>0,%<wu>1,%2"; ++ (set_attr "type" "simd_int_arith") ++ (set_attr "mode" "<MODE>")) ++ + ; The LoongArch SX Instructions. + (include "lsx.md") + +diff --git a/gcc/testsuite/gcc.target/loongarch/vect-rotr.c b/gcc/testsuite/gcc.target/loongarch/vect-rotr.c +new file mode 100644 +index 000000000..733c36334 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/vect-rotr.c +@@ -0,0 +1,36 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -mlasx" } */ ++/* { dg-final { scan-assembler "\tvrotr\.w\t" } } */ ++/* { dg-final { scan-assembler "\txvrotr\.w\t" } } */ ++/* { dg-final { scan-assembler "\tvrotri\.w\t\^\n\*7\n" } } */ ++/* { dg-final { scan-assembler "\txvrotri\.w\t\^\n\*7\n" } } */ ++ ++unsigned int a8, b8; ++ ++void ++test1 (void) ++{ ++ for (int i = 0; i < 4; i++) ++ ai = ai >> bi | ai << (32 - bi); ++} ++ ++void ++test2 (void) ++{ ++ for (int i = 0; i < 8; i++) ++ ai = ai >> bi | ai << (32 - bi); ++} ++ ++void ++test3 (void) ++{ ++ for (int i = 0; i < 4; i++) ++ ai = ai >> 7 | ai << 25; ++} ++ ++void ++test4 (void) ++{ ++ for (int i = 0; i < 8; i++) ++ ai = ai >> 7 | ai << 25; ++} +-- +2.43.0 +
View file
_service:tar_scm:0049-LoongArch-Remove-lrint_allow_inexact.patch
Added
@@ -0,0 +1,42 @@ +From c898e4a85c04a72f08db9ba2a454130f15f6f280 Mon Sep 17 00:00:00 2001 +From: Xi Ruoyao <xry111@xry111.site> +Date: Mon, 20 Nov 2023 01:34:26 +0800 +Subject: PATCH 049/188 LoongArch: Remove lrint_allow_inexact + +No functional change, just a cleanup. + +gcc/ChangeLog: + + * config/loongarch/loongarch.md (lrint_allow_inexact): Remove. + (<lrint_pattern><ANYF:mode><ANYFI:mode>2): Check if <LRINT> + == UNSPEC_FTINT instead of <lrint_allow_inexact>. +--- + gcc/config/loongarch/loongarch.md | 5 +---- + 1 file changed, 1 insertion(+), 4 deletions(-) + +diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md +index d1c766cbf..11577f407 100644 +--- a/gcc/config/loongarch/loongarch.md ++++ b/gcc/config/loongarch/loongarch.md +@@ -585,9 +585,6 @@ + (define_int_attr lrint_submenmonic (UNSPEC_FTINT "") + (UNSPEC_FTINTRM "rm") + (UNSPEC_FTINTRP "rp")) +-(define_int_attr lrint_allow_inexact (UNSPEC_FTINT "1") +- (UNSPEC_FTINTRM "0") +- (UNSPEC_FTINTRP "0")) + + ;; Iterator and attributes for bytepick.d + (define_int_iterator bytepick_w_ashift_amount 8 16 24) +@@ -2384,7 +2381,7 @@ + (unspec:ANYFI (match_operand:ANYF 1 "register_operand" "f") + LRINT)) + "TARGET_HARD_FLOAT && +- (<lrint_allow_inexact> ++ (<LRINT> == UNSPEC_FTINT + || flag_fp_int_builtin_inexact + || !flag_trapping_math)" + "ftint<lrint_submenmonic>.<ANYFI:ifmt>.<ANYF:fmt> %0,%1" +-- +2.43.0 +
View file
_service:tar_scm:0050-LoongArch-Use-LSX-for-scalar-FP-rounding-with-explic.patch
Added
@@ -0,0 +1,150 @@ +From 05fafb78b301ce9a545e0dad896b19339f716eaf Mon Sep 17 00:00:00 2001 +From: Xi Ruoyao <xry111@xry111.site> +Date: Mon, 20 Nov 2023 03:51:56 +0800 +Subject: PATCH 050/188 LoongArch: Use LSX for scalar FP rounding with + explicit rounding mode + +In LoongArch FP base ISA there is only the frint.{s/d} instruction which +reads the global rounding mode. Utilize LSX for explicit rounding mode +even if the operand is scalar. It seems wasting the CPU power, but +still much faster than calling the library function. + +gcc/ChangeLog: + + * config/loongarch/simd.md (LSX_SCALAR_FRINT): New int iterator. + (VLSX_FOR_FMODE): New mode attribute. + (<simd_for_scalar_frint_pattern><mode>2): New expander, + expanding to vreplvei.{w/d} + frint{rp/rz/rm/rne}.{s.d}. + +gcc/testsuite/ChangeLog: + + * gcc.target/loongarch/vect-frint-scalar.c: New test. + * gcc.target/loongarch/vect-frint-scalar-no-inexact.c: New test. +--- + gcc/config/loongarch/simd.md | 28 ++++++++++++ + .../loongarch/vect-frint-scalar-no-inexact.c | 23 ++++++++++ + .../gcc.target/loongarch/vect-frint-scalar.c | 43 +++++++++++++++++++ + 3 files changed, 94 insertions(+) + create mode 100644 gcc/testsuite/gcc.target/loongarch/vect-frint-scalar-no-inexact.c + create mode 100644 gcc/testsuite/gcc.target/loongarch/vect-frint-scalar.c + +diff --git a/gcc/config/loongarch/simd.md b/gcc/config/loongarch/simd.md +index 4ecf7a55e..843b1a41f 100644 +--- a/gcc/config/loongarch/simd.md ++++ b/gcc/config/loongarch/simd.md +@@ -169,6 +169,34 @@ + UNSPEC_SIMD_FRINTRZ)) + "") + ++;; Use LSX for scalar ceil/floor/trunc/roundeven when -mlsx and -ffp-int- ++;; builtin-inexact. The base FP instruction set lacks these operations. ++;; Yes we are wasting 50% or even 75% of the CPU horsepower, but it's still ++;; much faster than calling a libc function: on LA464 and LA664 there is a ++;; 3x ~ 5x speed up. ++;; ++;; Note that a vreplvei instruction is needed or we'll also operate on the ++;; junk in high bits of the vector register and produce random FP exceptions. ++ ++(define_int_iterator LSX_SCALAR_FRINT ++ UNSPEC_SIMD_FRINTRP ++ UNSPEC_SIMD_FRINTRZ ++ UNSPEC_SIMD_FRINTRM ++ UNSPEC_SIMD_FRINTRNE) ++ ++(define_mode_attr VLSX_FOR_FMODE (DF "V2DF") (SF "V4SF")) ++ ++(define_expand "<simd_frint_pattern><mode>2" ++ (set (match_dup 2) ++ (vec_duplicate:<VLSX_FOR_FMODE> ++ (match_operand:ANYF 1 "register_operand"))) ++ (set (match_dup 2) ++ (unspec:<VLSX_FOR_FMODE> (match_dup 2) LSX_SCALAR_FRINT)) ++ (set (match_operand:ANYF 0 "register_operand") ++ (vec_select:ANYF (match_dup 2) (parallel (const_int 0)))) ++ "ISA_HAS_LSX && (flag_fp_int_builtin_inexact || !flag_trapping_math)" ++ "operands2 = gen_reg_rtx (<VLSX_FOR_FMODE>mode);") ++ + ;; <x>vftint.{/rp/rz/rm} + (define_insn + "<simd_isa>_<x>vftint<simd_frint_rounding>_<simdifmt_for_f>_<simdfmt>" +diff --git a/gcc/testsuite/gcc.target/loongarch/vect-frint-scalar-no-inexact.c b/gcc/testsuite/gcc.target/loongarch/vect-frint-scalar-no-inexact.c +new file mode 100644 +index 000000000..002e3b92d +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/vect-frint-scalar-no-inexact.c +@@ -0,0 +1,23 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -mlsx -fno-fp-int-builtin-inexact" } */ ++ ++#include "vect-frint-scalar.c" ++ ++/* cannot use LSX for these with -fno-fp-int-builtin-inexact, ++ call library function. */ ++/* { dg-final { scan-assembler "\tb\t%plt\\(ceil\\)" } } */ ++/* { dg-final { scan-assembler "\tb\t%plt\\(ceilf\\)" } } */ ++/* { dg-final { scan-assembler "\tb\t%plt\\(floor\\)" } } */ ++/* { dg-final { scan-assembler "\tb\t%plt\\(floorf\\)" } } */ ++/* { dg-final { scan-assembler "\tb\t%plt\\(trunc\\)" } } */ ++/* { dg-final { scan-assembler "\tb\t%plt\\(truncf\\)" } } */ ++/* { dg-final { scan-assembler "\tb\t%plt\\(roundeven\\)" } } */ ++/* { dg-final { scan-assembler "\tb\t%plt\\(roundevenf\\)" } } */ ++ ++/* nearbyint is not allowed to rasie FE_INEXACT for decades */ ++/* { dg-final { scan-assembler "\tb\t%plt\\(nearbyint\\)" } } */ ++/* { dg-final { scan-assembler "\tb\t%plt\\(nearbyintf\\)" } } */ ++ ++/* rint should just use basic FP operation */ ++/* { dg-final { scan-assembler "\tfrint\.s" } } */ ++/* { dg-final { scan-assembler "\tfrint\.d" } } */ +diff --git a/gcc/testsuite/gcc.target/loongarch/vect-frint-scalar.c b/gcc/testsuite/gcc.target/loongarch/vect-frint-scalar.c +new file mode 100644 +index 000000000..c7cb40be7 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/vect-frint-scalar.c +@@ -0,0 +1,43 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -mlsx" } */ ++ ++#define test(func, suffix) \ ++__typeof__ (1.##suffix) \ ++_##func##suffix (__typeof__ (1.##suffix) x) \ ++{ \ ++ return __builtin_##func##suffix (x); \ ++} ++ ++test (ceil, f) ++test (ceil, ) ++test (floor, f) ++test (floor, ) ++test (trunc, f) ++test (trunc, ) ++test (roundeven, f) ++test (roundeven, ) ++test (nearbyint, f) ++test (nearbyint, ) ++test (rint, f) ++test (rint, ) ++ ++/* { dg-final { scan-assembler "\tvfrintrp\.s" } } */ ++/* { dg-final { scan-assembler "\tvfrintrm\.s" } } */ ++/* { dg-final { scan-assembler "\tvfrintrz\.s" } } */ ++/* { dg-final { scan-assembler "\tvfrintrne\.s" } } */ ++/* { dg-final { scan-assembler "\tvfrintrp\.d" } } */ ++/* { dg-final { scan-assembler "\tvfrintrm\.d" } } */ ++/* { dg-final { scan-assembler "\tvfrintrz\.d" } } */ ++/* { dg-final { scan-assembler "\tvfrintrne\.d" } } */ ++ ++/* must do vreplvei first */ ++/* { dg-final { scan-assembler-times "\tvreplvei\.w\t\\\$vr0,\\\$vr0,0" 4 } } */ ++/* { dg-final { scan-assembler-times "\tvreplvei\.d\t\\\$vr0,\\\$vr0,0" 4 } } */ ++ ++/* nearbyint is not allowed to rasie FE_INEXACT for decades */ ++/* { dg-final { scan-assembler "\tb\t%plt\\(nearbyint\\)" } } */ ++/* { dg-final { scan-assembler "\tb\t%plt\\(nearbyintf\\)" } } */ ++ ++/* rint should just use basic FP operation */ ++/* { dg-final { scan-assembler "\tfrint\.s" } } */ ++/* { dg-final { scan-assembler "\tfrint\.d" } } */ +-- +2.43.0 +
View file
_service:tar_scm:0051-LoongArch-Remove-duplicate-definition-of-CLZ_DEFINED.patch
Added
@@ -0,0 +1,49 @@ +From 21bb4f07db53df717d02e9115dcdb7b5475ede2a Mon Sep 17 00:00:00 2001 +From: Li Wei <liwei@loongson.cn> +Date: Tue, 28 Nov 2023 15:56:35 +0800 +Subject: PATCH 051/188 LoongArch: Remove duplicate definition of + CLZ_DEFINED_VALUE_AT_ZERO. + +In the r14-5547 commit, CLTZ_DEFINED_VALUE_AT_ZERO were defined at +the same time, but in fact, CLZ_DEFINED_VALUE_AT_ZERO has already been +defined, so remove the duplicate definition. + +gcc/ChangeLog: + + * config/loongarch/loongarch.h (CTZ_DEFINED_VALUE_AT_ZERO): Add + description. + (CLZ_DEFINED_VALUE_AT_ZERO): Remove duplicate definition. +--- + gcc/config/loongarch/loongarch.h | 9 +++------ + 1 file changed, 3 insertions(+), 6 deletions(-) + +diff --git a/gcc/config/loongarch/loongarch.h b/gcc/config/loongarch/loongarch.h +index 19cf6fd33..8b28be0e4 100644 +--- a/gcc/config/loongarch/loongarch.h ++++ b/gcc/config/loongarch/loongarch.h +@@ -288,10 +288,12 @@ along with GCC; see the file COPYING3. If not see + /* Define if loading short immediate values into registers sign extends. */ + #define SHORT_IMMEDIATES_SIGN_EXTEND 1 + +-/* The clz.{w/d} instructions have the natural values at 0. */ ++/* The clz.{w/d}, ctz.{w/d} instructions have the natural values at 0. */ + + #define CLZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE) \ + ((VALUE) = GET_MODE_UNIT_BITSIZE (MODE), 2) ++#define CTZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE) \ ++ ((VALUE) = GET_MODE_UNIT_BITSIZE (MODE), 2) + + /* Standard register usage. */ + +@@ -1239,8 +1241,3 @@ struct GTY (()) machine_function + + #define TARGET_EXPLICIT_RELOCS \ + (la_opt_explicit_relocs == EXPLICIT_RELOCS_ALWAYS) +- +-#define CLZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE) \ +- ((VALUE) = GET_MODE_UNIT_BITSIZE (MODE), 2) +-#define CTZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE) \ +- ((VALUE) = GET_MODE_UNIT_BITSIZE (MODE), 2) +-- +2.43.0 +
View file
_service:tar_scm:0052-LoongArch-Added-vectorized-hardware-inspection-for-t.patch
Added
@@ -0,0 +1,4375 @@ +From 8d5c983efc35804f98823e203eada6263dd1604e Mon Sep 17 00:00:00 2001 +From: chenxiaolong <chenxiaolong@loongson.cn> +Date: Tue, 28 Nov 2023 16:23:53 +0800 +Subject: PATCH 052/188 LoongArch: Added vectorized hardware inspection for + testsuite. + +When GCC regression tests are executed on a cpu that does not support +vectorization, the loongarch/vector directory will have some FAIL entries for +all test cases related to vectorization runs. In order to solve this kind +of problem, a vectorized hardware detection function was added to the code, +which can only be compiled but not run. + +gcc/testsuite/ChangeLog: + + * gcc.target/loongarch/vector/lasx/lasx-xvabsd-1.c:Remove + the default Settings to run the behavior. + * gcc.target/loongarch/vector/lasx/lasx-xvabsd-2.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvadd.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvadda.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvaddi.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvaddwev-1.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvaddwev-2.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvaddwev-3.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvaddwod-1.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvaddwod-2.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvaddwod-3.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvand.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvandi.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvandn.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvavg-1.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvavg-2.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvavgr-1.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvavgr-2.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvbitclr.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvbitclri.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvbitrev.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvbitrevi.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvbitsel.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvbitseli.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvbitset.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvbitseti.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvbsll_v.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvbsrl_v.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvclo.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvclz.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvdiv-1.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvdiv-2.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvext2xv-1.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvext2xv-2.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvexth-1.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvexth-2.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvextl-1.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvextl-2.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvextrins.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvfadd_d.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvfadd_s.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvfclass_d.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvfclass_s.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvfcmp_caf_s.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvfcmp_ceq_s.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvfcmp_cle_s.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvfcmp_clt_s.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvfcmp_cne_s.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvfcmp_cor_s.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvfcmp_cun_s.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvfcmp_saf_s.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvfcmp_seq_s.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvfcmp_sle_s.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvfcmp_slt_s.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvfcmp_sne_s.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvfcmp_sor_s.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvfcmp_sun_s.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvfcvt.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvfcvth.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvffint-1.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvffint-2.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvffinth.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvflogb_d.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvflogb_s.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvfmadd_d.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvfmadd_s.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvfmax_d.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvfmax_s.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvfmaxa_d.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvfmaxa_s.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvfnmadd_d.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvfnmadd_s.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvfrint_d.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvfrint_s.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvfrstp.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvfrstpi.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvfsqrt_d.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvfsqrt_s.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvftint-1.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvftint-2.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvftint-3.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvftintl.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvhaddw-1.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvhaddw-2.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvhsubw-1.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvhsubw-2.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvilvh.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvilvl.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvinsgr2vr.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvinsve0.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvld.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvldi.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvmadd.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvmaddwev-1.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvmaddwev-2.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvmaddwev-3.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvmaddwod-1.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvmaddwod-2.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvmaddwod-3.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvmax-1.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvmax-2.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvmaxi-1.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvmaxi-2.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvmin-1.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvmin-2.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvmini-1.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvmini-2.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvmod-1.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvmod-2.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvmskgez.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvmskltz.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvmsknz.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvmsub.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvmuh-1.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvmuh-2.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvmul.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvmulwev-1.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvmulwev-2.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvmulwev-3.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvmulwod-1.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvmulwod-2.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvmulwod-3.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvneg.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvnor.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvnori.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvor.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvori.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvorn.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvpackev.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvpackod.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvpcnt.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvpickev.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvpickod.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvpickve.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvpickve2gr.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvprem.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvpremi.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvreplgr2vr.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvreplve.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvreplve0.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvreplvei.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvrotr.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvrotri.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvsadd-1.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvsadd-2.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvsat-1.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvsat-2.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvseq.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvseqi.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvshuf4i_b.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvshuf_b.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvsigncov.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvsle-1.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvsle-2.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvslei-1.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvslei-2.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvsll.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvslli.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvsllwil-1.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvsllwil-2.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvslt-1.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvslt-2.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvslti-1.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvslti-2.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvsra.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvsrai.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvsran.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvsrani.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvsrar.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvsrari.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvsrarn.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvsrarni.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvsrl.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvsrli.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvsrln.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvsrlni.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvsrlr.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvsrlri.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvsrlrn.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvsrlrni.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvssran.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvssrani.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvssrarn.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvssrarni.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvssrln.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvssrlni.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvssrlrn.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvssrlrni.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvssub-1.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvssub-2.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvst.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvsub.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvsubi.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvsubwev-1.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvsubwev-2.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvsubwod-1.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvsubwod-2.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvxor.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvxori.c:Dito. + * gcc.target/loongarch/vector/loongarch-vector.exp:Added hardware + detection to set the behavior of program execution based on the + characteristics of the hardware. + * gcc.target/loongarch/vector/lsx/lsx-vabsd-1.c:Remove the default + Settings to run the behavior. + * gcc.target/loongarch/vector/lsx/lsx-vabsd-2.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vadd.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vadda.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vaddi.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vaddwev-1.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vaddwev-2.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vaddwev-3.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vaddwod-1.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vaddwod-2.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vaddwod-3.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vand.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vandi.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vandn.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vavg-1.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vavg-2.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vavgr-1.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vavgr-2.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vbitclr.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vbitclri.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vbitrev.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vbitrevi.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vbitsel.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vbitseli.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vbitset.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vbitseti.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vbsll.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vbsrl.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vclo.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vclz.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vdiv-1.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vdiv-2.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vexth-1.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vexth-2.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vextl-1.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vextl-2.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vextrins.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vfadd_d.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vfadd_s.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vfclass_d.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vfclass_s.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vfcmp_caf.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vfcmp_ceq.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vfcmp_cle.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vfcmp_clt.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vfcmp_cne.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vfcmp_cor.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vfcmp_cun.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vfcmp_saf.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vfcmp_seq.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vfcmp_sle.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vfcmp_slt.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vfcmp_sne.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vfcmp_sor.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vfcmp_sun.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vfcvt-1.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vfcvt-2.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vffint-1.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vffint-2.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vffint-3.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vflogb_d.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vflogb_s.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vfmadd_d.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vfmadd_s.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vfmax_d.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vfmax_s.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vfmaxa_d.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vfmaxa_s.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vfnmadd_d.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vfnmadd_s.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vfrint_d.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vfrint_s.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vfrstp.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vfrstpi.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vfsqrt_d.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vfsqrt_s.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vftint-1.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vftint-2.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vftint-3.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vftint-4.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vhaddw-1.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vhaddw-2.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vhsubw-1.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vhsubw-2.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vilvh.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vilvl.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vinsgr2vr.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vld.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vldi.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vmadd.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vmaddwev-1.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vmaddwev-2.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vmaddwev-3.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vmaddwod-1.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vmaddwod-2.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vmaddwod-3.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vmax-1.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vmax-2.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vmaxi-1.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vmaxi-2.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vmin-1.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vmin-2.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vmini-1.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vmini-2.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vmod-1.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vmod-2.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vmskgez.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vmskltz.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vmsknz.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vmsub.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vmuh-1.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vmuh-2.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vmul.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vmulwev-1.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vmulwev-2.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vmulwev-3.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vmulwod-1.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vmulwod-2.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vmulwod-3.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vneg.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vnor.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vnori.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vor.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vori.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vorn.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vpackev.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vpackod.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vpcnt.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vpickev.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vpickod.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vpickve2gr.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vpremi.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vreplgr2vr.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vreplve.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vreplvei.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vrotr.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vrotri.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vsadd-1.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vsadd-2.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vsat-1.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vsat-2.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vseq.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vseqi.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vshuf.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vshuf4i.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vsigncov.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vsle-1.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vsle-2.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vslei-1.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vslei-2.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vsll.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vslli.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vsllwil-1.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vsllwil-2.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vslt-1.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vslt-2.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vslti-1.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vslti-2.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vsra.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vsrai.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vsran.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vsrani.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vsrar.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vsrari.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vsrarn.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vsrarni.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vsrl.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vsrli.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vsrln.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vsrlni.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vsrlr.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vsrlri.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vsrlrn.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vsrlrni.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vssran.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vssrani.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vssrarn.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vssrarni.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vssrln.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vssrlni.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vssrlrn.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vssrlrni.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vssub-1.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vssub-2.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vst.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vsub.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vsubi.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vsubwev-1.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vsubwev-2.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vsubwod-1.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vsubwod-2.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vxor.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vxori.c:Dito. +--- + .../loongarch/vector/lasx/lasx-xvabsd-1.c | 1 - + .../loongarch/vector/lasx/lasx-xvabsd-2.c | 1 - + .../loongarch/vector/lasx/lasx-xvadd.c | 1 - + .../loongarch/vector/lasx/lasx-xvadda.c | 1 - + .../loongarch/vector/lasx/lasx-xvaddi.c | 1 - + .../loongarch/vector/lasx/lasx-xvaddwev-1.c | 1 - + .../loongarch/vector/lasx/lasx-xvaddwev-2.c | 1 - + .../loongarch/vector/lasx/lasx-xvaddwev-3.c | 1 - + .../loongarch/vector/lasx/lasx-xvaddwod-1.c | 1 - + .../loongarch/vector/lasx/lasx-xvaddwod-2.c | 1 - + .../loongarch/vector/lasx/lasx-xvaddwod-3.c | 1 - + .../loongarch/vector/lasx/lasx-xvand.c | 1 - + .../loongarch/vector/lasx/lasx-xvandi.c | 1 - + .../loongarch/vector/lasx/lasx-xvandn.c | 1 - + .../loongarch/vector/lasx/lasx-xvavg-1.c | 1 - + .../loongarch/vector/lasx/lasx-xvavg-2.c | 1 - + .../loongarch/vector/lasx/lasx-xvavgr-1.c | 1 - + .../loongarch/vector/lasx/lasx-xvavgr-2.c | 1 - + .../loongarch/vector/lasx/lasx-xvbitclr.c | 1 - + .../loongarch/vector/lasx/lasx-xvbitclri.c | 1 - + .../loongarch/vector/lasx/lasx-xvbitrev.c | 1 - + .../loongarch/vector/lasx/lasx-xvbitrevi.c | 1 - + .../loongarch/vector/lasx/lasx-xvbitsel.c | 1 - + .../loongarch/vector/lasx/lasx-xvbitseli.c | 1 - + .../loongarch/vector/lasx/lasx-xvbitset.c | 1 - + .../loongarch/vector/lasx/lasx-xvbitseti.c | 1 - + .../loongarch/vector/lasx/lasx-xvbsll_v.c | 1 - + .../loongarch/vector/lasx/lasx-xvbsrl_v.c | 1 - + .../loongarch/vector/lasx/lasx-xvclo.c | 1 - + .../loongarch/vector/lasx/lasx-xvclz.c | 1 - + .../loongarch/vector/lasx/lasx-xvdiv-1.c | 1 - + .../loongarch/vector/lasx/lasx-xvdiv-2.c | 1 - + .../loongarch/vector/lasx/lasx-xvext2xv-1.c | 1 - + .../loongarch/vector/lasx/lasx-xvext2xv-2.c | 1 - + .../loongarch/vector/lasx/lasx-xvexth-1.c | 1 - + .../loongarch/vector/lasx/lasx-xvexth-2.c | 1 - + .../loongarch/vector/lasx/lasx-xvextl-1.c | 1 - + .../loongarch/vector/lasx/lasx-xvextl-2.c | 1 - + .../loongarch/vector/lasx/lasx-xvextrins.c | 1 - + .../loongarch/vector/lasx/lasx-xvfadd_d.c | 1 - + .../loongarch/vector/lasx/lasx-xvfadd_s.c | 1 - + .../loongarch/vector/lasx/lasx-xvfclass_d.c | 1 - + .../loongarch/vector/lasx/lasx-xvfclass_s.c | 1 - + .../loongarch/vector/lasx/lasx-xvfcmp_caf_s.c | 1 - + .../loongarch/vector/lasx/lasx-xvfcmp_ceq_s.c | 1 - + .../loongarch/vector/lasx/lasx-xvfcmp_cle_s.c | 1 - + .../loongarch/vector/lasx/lasx-xvfcmp_clt_s.c | 1 - + .../loongarch/vector/lasx/lasx-xvfcmp_cne_s.c | 1 - + .../loongarch/vector/lasx/lasx-xvfcmp_cor_s.c | 1 - + .../loongarch/vector/lasx/lasx-xvfcmp_cun_s.c | 1 - + .../loongarch/vector/lasx/lasx-xvfcmp_saf_s.c | 1 - + .../loongarch/vector/lasx/lasx-xvfcmp_seq_s.c | 1 - + .../loongarch/vector/lasx/lasx-xvfcmp_sle_s.c | 1 - + .../loongarch/vector/lasx/lasx-xvfcmp_slt_s.c | 1 - + .../loongarch/vector/lasx/lasx-xvfcmp_sne_s.c | 1 - + .../loongarch/vector/lasx/lasx-xvfcmp_sor_s.c | 1 - + .../loongarch/vector/lasx/lasx-xvfcmp_sun_s.c | 1 - + .../loongarch/vector/lasx/lasx-xvfcvt.c | 1 - + .../loongarch/vector/lasx/lasx-xvfcvth.c | 1 - + .../loongarch/vector/lasx/lasx-xvffint-1.c | 1 - + .../loongarch/vector/lasx/lasx-xvffint-2.c | 1 - + .../loongarch/vector/lasx/lasx-xvffinth.c | 1 - + .../loongarch/vector/lasx/lasx-xvflogb_d.c | 1 - + .../loongarch/vector/lasx/lasx-xvflogb_s.c | 1 - + .../loongarch/vector/lasx/lasx-xvfmadd_d.c | 1 - + .../loongarch/vector/lasx/lasx-xvfmadd_s.c | 1 - + .../loongarch/vector/lasx/lasx-xvfmax_d.c | 1 - + .../loongarch/vector/lasx/lasx-xvfmax_s.c | 1 - + .../loongarch/vector/lasx/lasx-xvfmaxa_d.c | 1 - + .../loongarch/vector/lasx/lasx-xvfmaxa_s.c | 1 - + .../loongarch/vector/lasx/lasx-xvfnmadd_d.c | 1 - + .../loongarch/vector/lasx/lasx-xvfnmadd_s.c | 1 - + .../loongarch/vector/lasx/lasx-xvfrint_d.c | 1 - + .../loongarch/vector/lasx/lasx-xvfrint_s.c | 1 - + .../loongarch/vector/lasx/lasx-xvfrstp.c | 1 - + .../loongarch/vector/lasx/lasx-xvfrstpi.c | 1 - + .../loongarch/vector/lasx/lasx-xvfsqrt_d.c | 1 - + .../loongarch/vector/lasx/lasx-xvfsqrt_s.c | 1 - + .../loongarch/vector/lasx/lasx-xvftint-1.c | 1 - + .../loongarch/vector/lasx/lasx-xvftint-2.c | 1 - + .../loongarch/vector/lasx/lasx-xvftint-3.c | 1 - + .../loongarch/vector/lasx/lasx-xvftintl.c | 1 - + .../loongarch/vector/lasx/lasx-xvhaddw-1.c | 1 - + .../loongarch/vector/lasx/lasx-xvhaddw-2.c | 1 - + .../loongarch/vector/lasx/lasx-xvhsubw-1.c | 1 - + .../loongarch/vector/lasx/lasx-xvhsubw-2.c | 1 - + .../loongarch/vector/lasx/lasx-xvilvh.c | 1 - + .../loongarch/vector/lasx/lasx-xvilvl.c | 1 - + .../loongarch/vector/lasx/lasx-xvinsgr2vr.c | 1 - + .../loongarch/vector/lasx/lasx-xvinsve0.c | 1 - + .../loongarch/vector/lasx/lasx-xvld.c | 1 - + .../loongarch/vector/lasx/lasx-xvldi.c | 1 - + .../loongarch/vector/lasx/lasx-xvmadd.c | 1 - + .../loongarch/vector/lasx/lasx-xvmaddwev-1.c | 1 - + .../loongarch/vector/lasx/lasx-xvmaddwev-2.c | 1 - + .../loongarch/vector/lasx/lasx-xvmaddwev-3.c | 1 - + .../loongarch/vector/lasx/lasx-xvmaddwod-1.c | 1 - + .../loongarch/vector/lasx/lasx-xvmaddwod-2.c | 1 - + .../loongarch/vector/lasx/lasx-xvmaddwod-3.c | 1 - + .../loongarch/vector/lasx/lasx-xvmax-1.c | 1 - + .../loongarch/vector/lasx/lasx-xvmax-2.c | 1 - + .../loongarch/vector/lasx/lasx-xvmaxi-1.c | 1 - + .../loongarch/vector/lasx/lasx-xvmaxi-2.c | 1 - + .../loongarch/vector/lasx/lasx-xvmin-1.c | 1 - + .../loongarch/vector/lasx/lasx-xvmin-2.c | 1 - + .../loongarch/vector/lasx/lasx-xvmini-1.c | 1 - + .../loongarch/vector/lasx/lasx-xvmini-2.c | 1 - + .../loongarch/vector/lasx/lasx-xvmod-1.c | 1 - + .../loongarch/vector/lasx/lasx-xvmod-2.c | 1 - + .../loongarch/vector/lasx/lasx-xvmskgez.c | 1 - + .../loongarch/vector/lasx/lasx-xvmskltz.c | 1 - + .../loongarch/vector/lasx/lasx-xvmsknz.c | 1 - + .../loongarch/vector/lasx/lasx-xvmsub.c | 1 - + .../loongarch/vector/lasx/lasx-xvmuh-1.c | 1 - + .../loongarch/vector/lasx/lasx-xvmuh-2.c | 1 - + .../loongarch/vector/lasx/lasx-xvmul.c | 1 - + .../loongarch/vector/lasx/lasx-xvmulwev-1.c | 1 - + .../loongarch/vector/lasx/lasx-xvmulwev-2.c | 1 - + .../loongarch/vector/lasx/lasx-xvmulwev-3.c | 1 - + .../loongarch/vector/lasx/lasx-xvmulwod-1.c | 1 - + .../loongarch/vector/lasx/lasx-xvmulwod-2.c | 1 - + .../loongarch/vector/lasx/lasx-xvmulwod-3.c | 1 - + .../loongarch/vector/lasx/lasx-xvneg.c | 1 - + .../loongarch/vector/lasx/lasx-xvnor.c | 1 - + .../loongarch/vector/lasx/lasx-xvnori.c | 1 - + .../loongarch/vector/lasx/lasx-xvor.c | 1 - + .../loongarch/vector/lasx/lasx-xvori.c | 1 - + .../loongarch/vector/lasx/lasx-xvorn.c | 1 - + .../loongarch/vector/lasx/lasx-xvpackev.c | 1 - + .../loongarch/vector/lasx/lasx-xvpackod.c | 1 - + .../loongarch/vector/lasx/lasx-xvpcnt.c | 1 - + .../loongarch/vector/lasx/lasx-xvpickev.c | 1 - + .../loongarch/vector/lasx/lasx-xvpickod.c | 1 - + .../loongarch/vector/lasx/lasx-xvpickve.c | 1 - + .../loongarch/vector/lasx/lasx-xvpickve2gr.c | 1 - + .../loongarch/vector/lasx/lasx-xvprem.c | 1 - + .../loongarch/vector/lasx/lasx-xvpremi.c | 1 - + .../loongarch/vector/lasx/lasx-xvreplgr2vr.c | 1 - + .../loongarch/vector/lasx/lasx-xvreplve.c | 1 - + .../loongarch/vector/lasx/lasx-xvreplve0.c | 1 - + .../loongarch/vector/lasx/lasx-xvreplvei.c | 1 - + .../loongarch/vector/lasx/lasx-xvrotr.c | 1 - + .../loongarch/vector/lasx/lasx-xvrotri.c | 1 - + .../loongarch/vector/lasx/lasx-xvsadd-1.c | 1 - + .../loongarch/vector/lasx/lasx-xvsadd-2.c | 1 - + .../loongarch/vector/lasx/lasx-xvsat-1.c | 1 - + .../loongarch/vector/lasx/lasx-xvsat-2.c | 1 - + .../loongarch/vector/lasx/lasx-xvseq.c | 1 - + .../loongarch/vector/lasx/lasx-xvseqi.c | 1 - + .../loongarch/vector/lasx/lasx-xvshuf4i_b.c | 1 - + .../loongarch/vector/lasx/lasx-xvshuf_b.c | 1 - + .../loongarch/vector/lasx/lasx-xvsigncov.c | 1 - + .../loongarch/vector/lasx/lasx-xvsle-1.c | 1 - + .../loongarch/vector/lasx/lasx-xvsle-2.c | 1 - + .../loongarch/vector/lasx/lasx-xvslei-1.c | 1 - + .../loongarch/vector/lasx/lasx-xvslei-2.c | 1 - + .../loongarch/vector/lasx/lasx-xvsll.c | 1 - + .../loongarch/vector/lasx/lasx-xvslli.c | 1 - + .../loongarch/vector/lasx/lasx-xvsllwil-1.c | 1 - + .../loongarch/vector/lasx/lasx-xvsllwil-2.c | 1 - + .../loongarch/vector/lasx/lasx-xvslt-1.c | 1 - + .../loongarch/vector/lasx/lasx-xvslt-2.c | 1 - + .../loongarch/vector/lasx/lasx-xvslti-1.c | 1 - + .../loongarch/vector/lasx/lasx-xvslti-2.c | 1 - + .../loongarch/vector/lasx/lasx-xvsra.c | 1 - + .../loongarch/vector/lasx/lasx-xvsrai.c | 1 - + .../loongarch/vector/lasx/lasx-xvsran.c | 1 - + .../loongarch/vector/lasx/lasx-xvsrani.c | 1 - + .../loongarch/vector/lasx/lasx-xvsrar.c | 1 - + .../loongarch/vector/lasx/lasx-xvsrari.c | 1 - + .../loongarch/vector/lasx/lasx-xvsrarn.c | 1 - + .../loongarch/vector/lasx/lasx-xvsrarni.c | 1 - + .../loongarch/vector/lasx/lasx-xvsrl.c | 1 - + .../loongarch/vector/lasx/lasx-xvsrli.c | 1 - + .../loongarch/vector/lasx/lasx-xvsrln.c | 1 - + .../loongarch/vector/lasx/lasx-xvsrlni.c | 1 - + .../loongarch/vector/lasx/lasx-xvsrlr.c | 1 - + .../loongarch/vector/lasx/lasx-xvsrlri.c | 1 - + .../loongarch/vector/lasx/lasx-xvsrlrn.c | 1 - + .../loongarch/vector/lasx/lasx-xvsrlrni.c | 1 - + .../loongarch/vector/lasx/lasx-xvssran.c | 1 - + .../loongarch/vector/lasx/lasx-xvssrani.c | 1 - + .../loongarch/vector/lasx/lasx-xvssrarn.c | 1 - + .../loongarch/vector/lasx/lasx-xvssrarni.c | 1 - + .../loongarch/vector/lasx/lasx-xvssrln.c | 1 - + .../loongarch/vector/lasx/lasx-xvssrlni.c | 1 - + .../loongarch/vector/lasx/lasx-xvssrlrn.c | 1 - + .../loongarch/vector/lasx/lasx-xvssrlrni.c | 1 - + .../loongarch/vector/lasx/lasx-xvssub-1.c | 1 - + .../loongarch/vector/lasx/lasx-xvssub-2.c | 1 - + .../loongarch/vector/lasx/lasx-xvst.c | 1 - + .../loongarch/vector/lasx/lasx-xvsub.c | 1 - + .../loongarch/vector/lasx/lasx-xvsubi.c | 1 - + .../loongarch/vector/lasx/lasx-xvsubwev-1.c | 1 - + .../loongarch/vector/lasx/lasx-xvsubwev-2.c | 1 - + .../loongarch/vector/lasx/lasx-xvsubwod-1.c | 1 - + .../loongarch/vector/lasx/lasx-xvsubwod-2.c | 1 - + .../loongarch/vector/lasx/lasx-xvxor.c | 1 - + .../loongarch/vector/lasx/lasx-xvxori.c | 1 - + .../loongarch/vector/loongarch-vector.exp | 23 +++++++++++++++++++ + .../loongarch/vector/lsx/lsx-vabsd-1.c | 1 - + .../loongarch/vector/lsx/lsx-vabsd-2.c | 1 - + .../loongarch/vector/lsx/lsx-vadd.c | 1 - + .../loongarch/vector/lsx/lsx-vadda.c | 1 - + .../loongarch/vector/lsx/lsx-vaddi.c | 1 - + .../loongarch/vector/lsx/lsx-vaddwev-1.c | 1 - + .../loongarch/vector/lsx/lsx-vaddwev-2.c | 1 - + .../loongarch/vector/lsx/lsx-vaddwev-3.c | 1 - + .../loongarch/vector/lsx/lsx-vaddwod-1.c | 1 - + .../loongarch/vector/lsx/lsx-vaddwod-2.c | 1 - + .../loongarch/vector/lsx/lsx-vaddwod-3.c | 1 - + .../loongarch/vector/lsx/lsx-vand.c | 1 - + .../loongarch/vector/lsx/lsx-vandi.c | 1 - + .../loongarch/vector/lsx/lsx-vandn.c | 1 - + .../loongarch/vector/lsx/lsx-vavg-1.c | 1 - + .../loongarch/vector/lsx/lsx-vavg-2.c | 1 - + .../loongarch/vector/lsx/lsx-vavgr-1.c | 1 - + .../loongarch/vector/lsx/lsx-vavgr-2.c | 1 - + .../loongarch/vector/lsx/lsx-vbitclr.c | 1 - + .../loongarch/vector/lsx/lsx-vbitclri.c | 1 - + .../loongarch/vector/lsx/lsx-vbitrev.c | 1 - + .../loongarch/vector/lsx/lsx-vbitrevi.c | 1 - + .../loongarch/vector/lsx/lsx-vbitsel.c | 1 - + .../loongarch/vector/lsx/lsx-vbitseli.c | 1 - + .../loongarch/vector/lsx/lsx-vbitset.c | 1 - + .../loongarch/vector/lsx/lsx-vbitseti.c | 1 - + .../loongarch/vector/lsx/lsx-vbsll.c | 1 - + .../loongarch/vector/lsx/lsx-vbsrl.c | 1 - + .../loongarch/vector/lsx/lsx-vclo.c | 1 - + .../loongarch/vector/lsx/lsx-vclz.c | 1 - + .../loongarch/vector/lsx/lsx-vdiv-1.c | 1 - + .../loongarch/vector/lsx/lsx-vdiv-2.c | 1 - + .../loongarch/vector/lsx/lsx-vexth-1.c | 1 - + .../loongarch/vector/lsx/lsx-vexth-2.c | 1 - + .../loongarch/vector/lsx/lsx-vextl-1.c | 1 - + .../loongarch/vector/lsx/lsx-vextl-2.c | 1 - + .../loongarch/vector/lsx/lsx-vextrins.c | 1 - + .../loongarch/vector/lsx/lsx-vfadd_d.c | 1 - + .../loongarch/vector/lsx/lsx-vfadd_s.c | 1 - + .../loongarch/vector/lsx/lsx-vfclass_d.c | 1 - + .../loongarch/vector/lsx/lsx-vfclass_s.c | 1 - + .../loongarch/vector/lsx/lsx-vfcmp_caf.c | 1 - + .../loongarch/vector/lsx/lsx-vfcmp_ceq.c | 1 - + .../loongarch/vector/lsx/lsx-vfcmp_cle.c | 1 - + .../loongarch/vector/lsx/lsx-vfcmp_clt.c | 1 - + .../loongarch/vector/lsx/lsx-vfcmp_cne.c | 1 - + .../loongarch/vector/lsx/lsx-vfcmp_cor.c | 1 - + .../loongarch/vector/lsx/lsx-vfcmp_cun.c | 1 - + .../loongarch/vector/lsx/lsx-vfcmp_saf.c | 1 - + .../loongarch/vector/lsx/lsx-vfcmp_seq.c | 1 - + .../loongarch/vector/lsx/lsx-vfcmp_sle.c | 1 - + .../loongarch/vector/lsx/lsx-vfcmp_slt.c | 1 - + .../loongarch/vector/lsx/lsx-vfcmp_sne.c | 1 - + .../loongarch/vector/lsx/lsx-vfcmp_sor.c | 1 - + .../loongarch/vector/lsx/lsx-vfcmp_sun.c | 1 - + .../loongarch/vector/lsx/lsx-vfcvt-1.c | 1 - + .../loongarch/vector/lsx/lsx-vfcvt-2.c | 1 - + .../loongarch/vector/lsx/lsx-vffint-1.c | 1 - + .../loongarch/vector/lsx/lsx-vffint-2.c | 1 - + .../loongarch/vector/lsx/lsx-vffint-3.c | 1 - + .../loongarch/vector/lsx/lsx-vflogb_d.c | 1 - + .../loongarch/vector/lsx/lsx-vflogb_s.c | 1 - + .../loongarch/vector/lsx/lsx-vfmadd_d.c | 1 - + .../loongarch/vector/lsx/lsx-vfmadd_s.c | 1 - + .../loongarch/vector/lsx/lsx-vfmax_d.c | 1 - + .../loongarch/vector/lsx/lsx-vfmax_s.c | 1 - + .../loongarch/vector/lsx/lsx-vfmaxa_d.c | 1 - + .../loongarch/vector/lsx/lsx-vfmaxa_s.c | 1 - + .../loongarch/vector/lsx/lsx-vfnmadd_d.c | 1 - + .../loongarch/vector/lsx/lsx-vfnmadd_s.c | 1 - + .../loongarch/vector/lsx/lsx-vfrint_d.c | 1 - + .../loongarch/vector/lsx/lsx-vfrint_s.c | 1 - + .../loongarch/vector/lsx/lsx-vfrstp.c | 1 - + .../loongarch/vector/lsx/lsx-vfrstpi.c | 1 - + .../loongarch/vector/lsx/lsx-vfsqrt_d.c | 1 - + .../loongarch/vector/lsx/lsx-vfsqrt_s.c | 1 - + .../loongarch/vector/lsx/lsx-vftint-1.c | 1 - + .../loongarch/vector/lsx/lsx-vftint-2.c | 1 - + .../loongarch/vector/lsx/lsx-vftint-3.c | 1 - + .../loongarch/vector/lsx/lsx-vftint-4.c | 1 - + .../loongarch/vector/lsx/lsx-vhaddw-1.c | 1 - + .../loongarch/vector/lsx/lsx-vhaddw-2.c | 1 - + .../loongarch/vector/lsx/lsx-vhsubw-1.c | 1 - + .../loongarch/vector/lsx/lsx-vhsubw-2.c | 1 - + .../loongarch/vector/lsx/lsx-vilvh.c | 1 - + .../loongarch/vector/lsx/lsx-vilvl.c | 1 - + .../loongarch/vector/lsx/lsx-vinsgr2vr.c | 1 - + .../gcc.target/loongarch/vector/lsx/lsx-vld.c | 1 - + .../loongarch/vector/lsx/lsx-vldi.c | 1 - + .../loongarch/vector/lsx/lsx-vmadd.c | 1 - + .../loongarch/vector/lsx/lsx-vmaddwev-1.c | 1 - + .../loongarch/vector/lsx/lsx-vmaddwev-2.c | 1 - + .../loongarch/vector/lsx/lsx-vmaddwev-3.c | 1 - + .../loongarch/vector/lsx/lsx-vmaddwod-1.c | 1 - + .../loongarch/vector/lsx/lsx-vmaddwod-2.c | 1 - + .../loongarch/vector/lsx/lsx-vmaddwod-3.c | 1 - + .../loongarch/vector/lsx/lsx-vmax-1.c | 1 - + .../loongarch/vector/lsx/lsx-vmax-2.c | 1 - + .../loongarch/vector/lsx/lsx-vmaxi-1.c | 1 - + .../loongarch/vector/lsx/lsx-vmaxi-2.c | 1 - + .../loongarch/vector/lsx/lsx-vmin-1.c | 1 - + .../loongarch/vector/lsx/lsx-vmin-2.c | 1 - + .../loongarch/vector/lsx/lsx-vmini-1.c | 1 - + .../loongarch/vector/lsx/lsx-vmini-2.c | 1 - + .../loongarch/vector/lsx/lsx-vmod-1.c | 1 - + .../loongarch/vector/lsx/lsx-vmod-2.c | 1 - + .../loongarch/vector/lsx/lsx-vmskgez.c | 1 - + .../loongarch/vector/lsx/lsx-vmskltz.c | 1 - + .../loongarch/vector/lsx/lsx-vmsknz.c | 1 - + .../loongarch/vector/lsx/lsx-vmsub.c | 1 - + .../loongarch/vector/lsx/lsx-vmuh-1.c | 1 - + .../loongarch/vector/lsx/lsx-vmuh-2.c | 1 - + .../loongarch/vector/lsx/lsx-vmul.c | 1 - + .../loongarch/vector/lsx/lsx-vmulwev-1.c | 1 - + .../loongarch/vector/lsx/lsx-vmulwev-2.c | 1 - + .../loongarch/vector/lsx/lsx-vmulwev-3.c | 1 - + .../loongarch/vector/lsx/lsx-vmulwod-1.c | 1 - + .../loongarch/vector/lsx/lsx-vmulwod-2.c | 1 - + .../loongarch/vector/lsx/lsx-vmulwod-3.c | 1 - + .../loongarch/vector/lsx/lsx-vneg.c | 1 - + .../loongarch/vector/lsx/lsx-vnor.c | 1 - + .../loongarch/vector/lsx/lsx-vnori.c | 1 - + .../gcc.target/loongarch/vector/lsx/lsx-vor.c | 1 - + .../loongarch/vector/lsx/lsx-vori.c | 1 - + .../loongarch/vector/lsx/lsx-vorn.c | 1 - + .../loongarch/vector/lsx/lsx-vpackev.c | 1 - + .../loongarch/vector/lsx/lsx-vpackod.c | 1 - + .../loongarch/vector/lsx/lsx-vpcnt.c | 1 - + .../loongarch/vector/lsx/lsx-vpickev.c | 1 - + .../loongarch/vector/lsx/lsx-vpickod.c | 1 - + .../loongarch/vector/lsx/lsx-vpickve2gr.c | 1 - + .../loongarch/vector/lsx/lsx-vpremi.c | 1 - + .../loongarch/vector/lsx/lsx-vreplgr2vr.c | 1 - + .../loongarch/vector/lsx/lsx-vreplve.c | 1 - + .../loongarch/vector/lsx/lsx-vreplvei.c | 1 - + .../loongarch/vector/lsx/lsx-vrotr.c | 1 - + .../loongarch/vector/lsx/lsx-vrotri.c | 1 - + .../loongarch/vector/lsx/lsx-vsadd-1.c | 1 - + .../loongarch/vector/lsx/lsx-vsadd-2.c | 1 - + .../loongarch/vector/lsx/lsx-vsat-1.c | 1 - + .../loongarch/vector/lsx/lsx-vsat-2.c | 1 - + .../loongarch/vector/lsx/lsx-vseq.c | 1 - + .../loongarch/vector/lsx/lsx-vseqi.c | 1 - + .../loongarch/vector/lsx/lsx-vshuf.c | 1 - + .../loongarch/vector/lsx/lsx-vshuf4i.c | 1 - + .../loongarch/vector/lsx/lsx-vsigncov.c | 1 - + .../loongarch/vector/lsx/lsx-vsle-1.c | 1 - + .../loongarch/vector/lsx/lsx-vsle-2.c | 1 - + .../loongarch/vector/lsx/lsx-vslei-1.c | 1 - + .../loongarch/vector/lsx/lsx-vslei-2.c | 1 - + .../loongarch/vector/lsx/lsx-vsll.c | 1 - + .../loongarch/vector/lsx/lsx-vslli.c | 1 - + .../loongarch/vector/lsx/lsx-vsllwil-1.c | 1 - + .../loongarch/vector/lsx/lsx-vsllwil-2.c | 1 - + .../loongarch/vector/lsx/lsx-vslt-1.c | 1 - + .../loongarch/vector/lsx/lsx-vslt-2.c | 1 - + .../loongarch/vector/lsx/lsx-vslti-1.c | 1 - + .../loongarch/vector/lsx/lsx-vslti-2.c | 1 - + .../loongarch/vector/lsx/lsx-vsra.c | 1 - + .../loongarch/vector/lsx/lsx-vsrai.c | 1 - + .../loongarch/vector/lsx/lsx-vsran.c | 1 - + .../loongarch/vector/lsx/lsx-vsrani.c | 1 - + .../loongarch/vector/lsx/lsx-vsrar.c | 1 - + .../loongarch/vector/lsx/lsx-vsrari.c | 1 - + .../loongarch/vector/lsx/lsx-vsrarn.c | 1 - + .../loongarch/vector/lsx/lsx-vsrarni.c | 1 - + .../loongarch/vector/lsx/lsx-vsrl.c | 1 - + .../loongarch/vector/lsx/lsx-vsrli.c | 1 - + .../loongarch/vector/lsx/lsx-vsrln.c | 1 - + .../loongarch/vector/lsx/lsx-vsrlni.c | 1 - + .../loongarch/vector/lsx/lsx-vsrlr.c | 1 - + .../loongarch/vector/lsx/lsx-vsrlri.c | 1 - + .../loongarch/vector/lsx/lsx-vsrlrn.c | 1 - + .../loongarch/vector/lsx/lsx-vsrlrni.c | 1 - + .../loongarch/vector/lsx/lsx-vssran.c | 1 - + .../loongarch/vector/lsx/lsx-vssrani.c | 1 - + .../loongarch/vector/lsx/lsx-vssrarn.c | 1 - + .../loongarch/vector/lsx/lsx-vssrarni.c | 1 - + .../loongarch/vector/lsx/lsx-vssrln.c | 1 - + .../loongarch/vector/lsx/lsx-vssrlni.c | 1 - + .../loongarch/vector/lsx/lsx-vssrlrn.c | 1 - + .../loongarch/vector/lsx/lsx-vssrlrni.c | 1 - + .../loongarch/vector/lsx/lsx-vssub-1.c | 1 - + .../loongarch/vector/lsx/lsx-vssub-2.c | 1 - + .../gcc.target/loongarch/vector/lsx/lsx-vst.c | 1 - + .../loongarch/vector/lsx/lsx-vsub.c | 1 - + .../loongarch/vector/lsx/lsx-vsubi.c | 1 - + .../loongarch/vector/lsx/lsx-vsubwev-1.c | 1 - + .../loongarch/vector/lsx/lsx-vsubwev-2.c | 1 - + .../loongarch/vector/lsx/lsx-vsubwod-1.c | 1 - + .../loongarch/vector/lsx/lsx-vsubwod-2.c | 1 - + .../loongarch/vector/lsx/lsx-vxor.c | 1 - + .../loongarch/vector/lsx/lsx-vxori.c | 1 - + 393 files changed, 23 insertions(+), 392 deletions(-) + +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvabsd-1.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvabsd-1.c +index 41fae32df..5e15a12cb 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvabsd-1.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvabsd-1.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lasxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvabsd-2.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvabsd-2.c +index bd7a9069d..fa0f9f6b5 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvabsd-2.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvabsd-2.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lasxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvadd.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvadd.c +index 293295723..82da73440 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvadd.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvadd.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lasxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvadda.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvadda.c +index d6b57d1cd..2c2701dc2 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvadda.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvadda.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lasxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvaddi.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvaddi.c +index 054bf6e55..064b26fb6 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvaddi.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvaddi.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lasxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvaddwev-1.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvaddwev-1.c +index 70f3bf783..160073927 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvaddwev-1.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvaddwev-1.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lasxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvaddwev-2.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvaddwev-2.c +index 22528a14f..c45840ea2 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvaddwev-2.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvaddwev-2.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lasxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvaddwev-3.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvaddwev-3.c +index 38a0a53d7..567bc1faf 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvaddwev-3.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvaddwev-3.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lasxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvaddwod-1.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvaddwod-1.c +index a4dc565e9..775b90547 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvaddwod-1.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvaddwod-1.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lasxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvaddwod-2.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvaddwod-2.c +index a2fbe9ed0..34721ad56 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvaddwod-2.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvaddwod-2.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lasxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvaddwod-3.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvaddwod-3.c +index 8c98fc4be..30d52b01c 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvaddwod-3.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvaddwod-3.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lasxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvand.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvand.c +index e485786dd..96ad473a3 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvand.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvand.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lasxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvandi.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvandi.c +index 26cddc53a..59d6a14ab 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvandi.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvandi.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lasxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvandn.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvandn.c +index bc3590c21..b2809d369 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvandn.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvandn.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lasxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvavg-1.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvavg-1.c +index 5ce31ebbd..18d186280 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvavg-1.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvavg-1.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lasxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvavg-2.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvavg-2.c +index d04e42753..4a79277b4 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvavg-2.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvavg-2.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lasxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvavgr-1.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvavgr-1.c +index 37b78aa1b..7e6a244e7 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvavgr-1.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvavgr-1.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lasxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvavgr-2.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvavgr-2.c +index 3944a6ac0..f020cbeea 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvavgr-2.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvavgr-2.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lasxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvbitclr.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvbitclr.c +index def7b588e..70c928886 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvbitclr.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvbitclr.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lasxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvbitclri.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvbitclri.c +index 713eb19d5..7eee98f40 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvbitclri.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvbitclri.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lasxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvbitrev.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvbitrev.c +index 2b0e7f8d1..a4f104e8e 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvbitrev.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvbitrev.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lasxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvbitrevi.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvbitrevi.c +index 2b8327d91..967a01f6d 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvbitrevi.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvbitrevi.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lasxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvbitsel.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvbitsel.c +index c9847a615..414080540 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvbitsel.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvbitsel.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lasxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvbitseli.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvbitseli.c +index 1edb4fca2..b2532f5eb 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvbitseli.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvbitseli.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lasxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvbitset.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvbitset.c +index c195cd91c..ff9d030f0 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvbitset.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvbitset.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lasxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvbitseti.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvbitseti.c +index 47f37e4b3..9081443bc 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvbitseti.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvbitseti.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lasxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvbsll_v.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvbsll_v.c +index 3c1a8b8e6..7110423fc 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvbsll_v.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvbsll_v.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lasxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvbsrl_v.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvbsrl_v.c +index 340f7691b..236b5b28e 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvbsrl_v.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvbsrl_v.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lasxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvclo.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvclo.c +index dbc52f92b..927fa16fe 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvclo.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvclo.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lasxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvclz.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvclz.c +index 89191c467..3e39c212a 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvclz.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvclz.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lasxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvdiv-1.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvdiv-1.c +index 0d7c67703..e3cfe283e 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvdiv-1.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvdiv-1.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lasxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvdiv-2.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvdiv-2.c +index fd8b6d38c..71543290a 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvdiv-2.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvdiv-2.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lasxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvext2xv-1.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvext2xv-1.c +index 94f31019c..2e9e4b03d 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvext2xv-1.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvext2xv-1.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lasxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvext2xv-2.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvext2xv-2.c +index d93201bc4..f6a098d96 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvext2xv-2.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvext2xv-2.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lasxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvexth-1.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvexth-1.c +index 9fb4e3ff0..c64e6cadf 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvexth-1.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvexth-1.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lasxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvexth-2.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvexth-2.c +index fe6ff15d8..33ede4dab 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvexth-2.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvexth-2.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lasxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvextl-1.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvextl-1.c +index c0d3e8e75..7f59c765d 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvextl-1.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvextl-1.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lasxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvextl-2.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvextl-2.c +index 8c7ab4ed3..d9eee597c 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvextl-2.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvextl-2.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lasxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvextrins.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvextrins.c +index 8e61f1c6d..e4dc8bf10 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvextrins.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvextrins.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lasxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfadd_d.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfadd_d.c +index 657a19e58..7cd7ad8a3 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfadd_d.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfadd_d.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lasxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfadd_s.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfadd_s.c +index 4002c4074..62ca8c9c3 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfadd_s.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfadd_s.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lasxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfclass_d.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfclass_d.c +index 5d5b4c43c..5a2733075 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfclass_d.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfclass_d.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + /* { dg-timeout 500 } */ + #include "../simd_correctness_check.h" +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfclass_s.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfclass_s.c +index 888e85b6e..cae82f6cb 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfclass_s.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfclass_s.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + /* { dg-timeout 500 } */ + #include "../simd_correctness_check.h" +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfcmp_caf_s.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfcmp_caf_s.c +index fa3372358..1fe7c8bc5 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfcmp_caf_s.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfcmp_caf_s.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lasxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfcmp_ceq_s.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfcmp_ceq_s.c +index 6d6649f6f..d4c4aa150 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfcmp_ceq_s.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfcmp_ceq_s.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lasxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfcmp_cle_s.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfcmp_cle_s.c +index a64dd7598..1ca2fbd91 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfcmp_cle_s.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfcmp_cle_s.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lasxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfcmp_clt_s.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfcmp_clt_s.c +index 733cc00ee..0dffd68e7 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfcmp_clt_s.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfcmp_clt_s.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lasxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfcmp_cne_s.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfcmp_cne_s.c +index 190741070..77ba5fca4 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfcmp_cne_s.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfcmp_cne_s.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lasxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfcmp_cor_s.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfcmp_cor_s.c +index 8dd58f228..954c7575c 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfcmp_cor_s.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfcmp_cor_s.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lasxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfcmp_cun_s.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfcmp_cun_s.c +index 3230c101d..98eb38573 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfcmp_cun_s.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfcmp_cun_s.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lasxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfcmp_saf_s.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfcmp_saf_s.c +index 23cbc4bf0..1427165fd 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfcmp_saf_s.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfcmp_saf_s.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lasxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfcmp_seq_s.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfcmp_seq_s.c +index 6641d2c58..e61e0e655 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfcmp_seq_s.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfcmp_seq_s.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lasxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfcmp_sle_s.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfcmp_sle_s.c +index d25fc25da..24f4f2054 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfcmp_sle_s.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfcmp_sle_s.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lasxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfcmp_slt_s.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfcmp_slt_s.c +index 8210f749b..f468d93c6 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfcmp_slt_s.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfcmp_slt_s.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lasxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfcmp_sne_s.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfcmp_sne_s.c +index 9d015a5c8..29c128e79 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfcmp_sne_s.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfcmp_sne_s.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lasxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfcmp_sor_s.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfcmp_sor_s.c +index a61681073..29c080c50 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfcmp_sor_s.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfcmp_sor_s.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lasxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfcmp_sun_s.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfcmp_sun_s.c +index 41f274920..eee56168b 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfcmp_sun_s.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfcmp_sun_s.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lasxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfcvt.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfcvt.c +index 116399a7c..8b6225d06 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfcvt.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfcvt.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lasxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfcvth.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfcvth.c +index 001ce1c69..7933ec580 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfcvth.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfcvth.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lasxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvffint-1.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvffint-1.c +index dd04fd788..e0240cb5c 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvffint-1.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvffint-1.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lasxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvffint-2.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvffint-2.c +index 3e2b15507..c6f4aeaa6 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvffint-2.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvffint-2.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lasxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvffinth.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvffinth.c +index e310ff5ee..4d8e71bd2 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvffinth.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvffinth.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lasxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvflogb_d.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvflogb_d.c +index bba1a06f3..57a4cd2b9 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvflogb_d.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvflogb_d.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lasxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvflogb_s.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvflogb_s.c +index b641c733f..798c75280 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvflogb_s.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvflogb_s.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lasxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfmadd_d.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfmadd_d.c +index c85c94bf6..f5c49f982 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfmadd_d.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfmadd_d.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lasxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfmadd_s.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfmadd_s.c +index bde41dd5c..d25bbe6dd 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfmadd_s.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfmadd_s.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lasxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfmax_d.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfmax_d.c +index 207ba167f..eefa1e5ac 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfmax_d.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfmax_d.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lasxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfmax_s.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfmax_s.c +index 9b7703231..a9271e60d 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfmax_s.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfmax_s.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lasxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfmaxa_d.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfmaxa_d.c +index 96bbb942d..63605b85c 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfmaxa_d.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfmaxa_d.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lasxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfmaxa_s.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfmaxa_s.c +index c73a8a74a..4b59e3403 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfmaxa_s.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfmaxa_s.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lasxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfnmadd_d.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfnmadd_d.c +index d161c850c..0f6c5e4cc 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfnmadd_d.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfnmadd_d.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lasxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfnmadd_s.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfnmadd_s.c +index c5e9576ea..3f4540425 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfnmadd_s.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfnmadd_s.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lasxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfrint_d.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfrint_d.c +index 4babf1638..e65ded196 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfrint_d.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfrint_d.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + /* { dg-timeout 500 } */ + #include "../simd_correctness_check.h" +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfrint_s.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfrint_s.c +index 9f2fa6747..fbfe300ea 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfrint_s.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfrint_s.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + /* { dg-timeout 500 } */ + #include "../simd_correctness_check.h" +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfrstp.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfrstp.c +index 557f9f8b5..72b3fe08d 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfrstp.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfrstp.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lasxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfrstpi.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfrstpi.c +index cdb7b11aa..cbb23e0a8 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfrstpi.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfrstpi.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lasxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfsqrt_d.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfsqrt_d.c +index 18d5c51de..21f617231 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfsqrt_d.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfsqrt_d.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lasxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfsqrt_s.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfsqrt_s.c +index 27df4a27d..0a28716bc 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfsqrt_s.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfsqrt_s.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lasxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvftint-1.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvftint-1.c +index c75468d42..24b21ef8a 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvftint-1.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvftint-1.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lasxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvftint-2.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvftint-2.c +index ad72f7596..5a72994d5 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvftint-2.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvftint-2.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lasxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvftint-3.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvftint-3.c +index 19db4e192..c02e00bdd 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvftint-3.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvftint-3.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lasxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvftintl.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvftintl.c +index b0fdf7e0b..f20ec5b83 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvftintl.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvftintl.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lasxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvhaddw-1.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvhaddw-1.c +index 1cf0ec698..03a885648 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvhaddw-1.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvhaddw-1.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lasxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvhaddw-2.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvhaddw-2.c +index 14ec081a4..9ee92aa85 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvhaddw-2.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvhaddw-2.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lasxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvhsubw-1.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvhsubw-1.c +index fa4d5fd6f..e5101a857 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvhsubw-1.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvhsubw-1.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lasxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvhsubw-2.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvhsubw-2.c +index 87c3e25b1..685b76e7e 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvhsubw-2.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvhsubw-2.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lasxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvilvh.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvilvh.c +index 5a047a508..cbadbd3d6 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvilvh.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvilvh.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lasxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvilvl.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvilvl.c +index 4393045c3..c78eb7fce 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvilvl.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvilvl.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lasxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvinsgr2vr.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvinsgr2vr.c +index ce28c4857..9e3cd7087 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvinsgr2vr.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvinsgr2vr.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lasxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvinsve0.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvinsve0.c +index 644d2ce4b..b356dd1bf 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvinsve0.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvinsve0.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lasxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvld.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvld.c +index c1eda6c6c..f39a94ab0 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvld.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvld.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lasxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvldi.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvldi.c +index 84b3c6599..51e4661d5 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvldi.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvldi.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lasxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvmadd.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvmadd.c +index f9634b128..6a04e7268 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvmadd.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvmadd.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lasxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvmaddwev-1.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvmaddwev-1.c +index 6238685bc..5e5b35de5 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvmaddwev-1.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvmaddwev-1.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lasxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvmaddwev-2.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvmaddwev-2.c +index 5fa080375..bfa095dc8 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvmaddwev-2.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvmaddwev-2.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lasxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvmaddwev-3.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvmaddwev-3.c +index 40549448e..6a4704583 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvmaddwev-3.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvmaddwev-3.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lasxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvmaddwod-1.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvmaddwod-1.c +index 683876933..d456cbfff 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvmaddwod-1.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvmaddwod-1.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lasxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvmaddwod-2.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvmaddwod-2.c +index f9f88b654..7f1c40c00 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvmaddwod-2.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvmaddwod-2.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lasxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvmaddwod-3.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvmaddwod-3.c +index 5210e4cf9..abe92a605 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvmaddwod-3.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvmaddwod-3.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lasxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvmax-1.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvmax-1.c +index 96c6671f2..4b8932ab0 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvmax-1.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvmax-1.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lasxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvmax-2.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvmax-2.c +index 38f2c0afe..561d964b1 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvmax-2.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvmax-2.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lasxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvmaxi-1.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvmaxi-1.c +index e804a0a45..cc52343ec 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvmaxi-1.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvmaxi-1.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lasxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvmaxi-2.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvmaxi-2.c +index b6b34063c..2373c96ef 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvmaxi-2.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvmaxi-2.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lasxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvmin-1.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvmin-1.c +index 7dbf335c1..9df0af7ed 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvmin-1.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvmin-1.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lasxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvmin-2.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvmin-2.c +index 9eaa0e9e7..0eb03acbe 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvmin-2.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvmin-2.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lasxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvmini-1.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvmini-1.c +index 01aabada8..6579978b7 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvmini-1.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvmini-1.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lasxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvmini-2.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvmini-2.c +index 8eb7d9355..7402ff6f0 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvmini-2.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvmini-2.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lasxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvmod-1.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvmod-1.c +index 6f34f6ffc..fd052cd81 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvmod-1.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvmod-1.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lasxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvmod-2.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvmod-2.c +index d0a9e9d2f..cb39dbbad 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvmod-2.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvmod-2.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lasxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvmskgez.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvmskgez.c +index 15e66ae38..952725afc 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvmskgez.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvmskgez.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lasxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvmskltz.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvmskltz.c +index 53b21f98b..22aa6ab0a 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvmskltz.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvmskltz.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lasxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvmsknz.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvmsknz.c +index 81865fd32..6b48f8ab8 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvmsknz.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvmsknz.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lasxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvmsub.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvmsub.c +index 8c8d4996b..4e13f34dd 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvmsub.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvmsub.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lasxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvmuh-1.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvmuh-1.c +index 58ad8bfcd..2e42c1d64 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvmuh-1.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvmuh-1.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lasxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvmuh-2.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvmuh-2.c +index 85d24fe44..2d420c280 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvmuh-2.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvmuh-2.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lasxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvmul.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvmul.c +index be3c8e718..f14aa47ca 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvmul.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvmul.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lasxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvmulwev-1.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvmulwev-1.c +index 01ff71649..e09174d08 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvmulwev-1.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvmulwev-1.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lasxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvmulwev-2.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvmulwev-2.c +index 32088f4ae..2a4c09c52 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvmulwev-2.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvmulwev-2.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lasxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvmulwev-3.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvmulwev-3.c +index 19157f682..7afa6ad94 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvmulwev-3.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvmulwev-3.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lasxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvmulwod-1.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvmulwod-1.c +index 80fdcda63..ad69c1e47 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvmulwod-1.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvmulwod-1.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lasxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvmulwod-2.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvmulwod-2.c +index 1a4b221fe..27a7fdd67 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvmulwod-2.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvmulwod-2.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lasxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvmulwod-3.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvmulwod-3.c +index 9fcd3ce0c..c55d20d45 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvmulwod-3.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvmulwod-3.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lasxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvneg.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvneg.c +index 3cd1626d4..fe17ef13f 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvneg.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvneg.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lasxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvnor.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvnor.c +index 3a491ecab..2b8e6228b 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvnor.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvnor.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lasxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvnori.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvnori.c +index 995a34c18..8a8062a99 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvnori.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvnori.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lasxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvor.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvor.c +index 27eef710d..11643896c 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvor.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvor.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lasxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvori.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvori.c +index ee91af95f..0341bde95 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvori.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvori.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lasxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvorn.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvorn.c +index fa6cdff31..de7a208c3 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvorn.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvorn.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lasxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvpackev.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvpackev.c +index 33b96d657..e83957070 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvpackev.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvpackev.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lasxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvpackod.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvpackod.c +index cdd20e881..ee335779f 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvpackod.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvpackod.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lasxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvpcnt.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvpcnt.c +index d2e742e81..7d6be3664 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvpcnt.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvpcnt.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lasxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvpickev.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvpickev.c +index 66faa74d0..831247beb 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvpickev.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvpickev.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lasxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvpickod.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvpickod.c +index a9778809f..65188ad41 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvpickod.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvpickod.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lasxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvpickve.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvpickve.c +index a2edbb80a..d23406674 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvpickve.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvpickve.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lasxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvpickve2gr.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvpickve2gr.c +index 8bd3a8273..2e18db108 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvpickve2gr.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvpickve2gr.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lasxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvprem.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvprem.c +index 9346f9bfb..e9fc1d7d3 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvprem.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvprem.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lasxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvpremi.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvpremi.c +index 9346f9bfb..e9fc1d7d3 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvpremi.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvpremi.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lasxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvreplgr2vr.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvreplgr2vr.c +index 81456bc1b..1685747c0 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvreplgr2vr.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvreplgr2vr.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lasxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvreplve.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvreplve.c +index 7aa76c2ba..beeee765f 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvreplve.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvreplve.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lasxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvreplve0.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvreplve0.c +index a2bc2da52..5643b913f 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvreplve0.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvreplve0.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lasxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvreplvei.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvreplvei.c +index 9346f9bfb..e9fc1d7d3 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvreplvei.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvreplvei.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lasxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvrotr.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvrotr.c +index 21446e55e..49439865c 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvrotr.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvrotr.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lasxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvrotri.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvrotri.c +index c1b8e1752..24d508f81 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvrotri.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvrotri.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lasxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvsadd-1.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvsadd-1.c +index 2a4f29b50..cecac6173 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvsadd-1.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvsadd-1.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lasxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvsadd-2.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvsadd-2.c +index a3afc9811..6cd4e0503 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvsadd-2.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvsadd-2.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lasxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvsat-1.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvsat-1.c +index b4ac50271..29a4f5ae2 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvsat-1.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvsat-1.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lasxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvsat-2.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvsat-2.c +index e5ee89deb..571145b84 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvsat-2.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvsat-2.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lasxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvseq.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvseq.c +index 2a42386ce..41b9470c1 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvseq.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvseq.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lasxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvseqi.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvseqi.c +index 5478d19c1..6c9b96460 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvseqi.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvseqi.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lasxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvshuf4i_b.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvshuf4i_b.c +index c8a00ca89..600168127 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvshuf4i_b.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvshuf4i_b.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lasxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvshuf_b.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvshuf_b.c +index 03c479a08..b8ab38711 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvshuf_b.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvshuf_b.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lasxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvsigncov.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvsigncov.c +index 2a6eee0fd..5137f5de6 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvsigncov.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvsigncov.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lasxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvsle-1.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvsle-1.c +index ed752df00..13f8c8c4f 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvsle-1.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvsle-1.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lasxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvsle-2.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvsle-2.c +index bc98b41af..ef1784f67 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvsle-2.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvsle-2.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lasxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvslei-1.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvslei-1.c +index 06717802c..21f68132b 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvslei-1.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvslei-1.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lasxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvslei-2.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvslei-2.c +index 093d5640e..0adadaa39 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvslei-2.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvslei-2.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lasxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvsll.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvsll.c +index 7179e715c..4a2927624 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvsll.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvsll.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lasxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvslli.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvslli.c +index 003e29b67..50e9a9f53 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvslli.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvslli.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lasxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvsllwil-1.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvsllwil-1.c +index ef3a47da5..22a7a31a9 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvsllwil-1.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvsllwil-1.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lasxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvsllwil-2.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvsllwil-2.c +index 76651af63..4b68aeb18 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvsllwil-2.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvsllwil-2.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lasxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvslt-1.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvslt-1.c +index ca1f5e94f..f44f083b7 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvslt-1.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvslt-1.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lasxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvslt-2.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvslt-2.c +index 6864f5eb8..60278e22b 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvslt-2.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvslt-2.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lasxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvslti-1.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvslti-1.c +index 7dd2778a5..87d069d1f 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvslti-1.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvslti-1.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lasxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvslti-2.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvslti-2.c +index d93e4314e..9eefa782b 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvslti-2.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvslti-2.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lasxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvsra.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvsra.c +index 2bf9ae9c3..b4bda4dab 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvsra.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvsra.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lasxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvsrai.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvsrai.c +index a51be899b..871d0241b 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvsrai.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvsrai.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lasxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvsran.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvsran.c +index e08934b12..eba7c1164 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvsran.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvsran.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lasxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvsrani.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvsrani.c +index 44c20a954..96382483e 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvsrani.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvsrani.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lasxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvsrar.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvsrar.c +index fb47385c0..542b6fd3a 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvsrar.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvsrar.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lasxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvsrari.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvsrari.c +index 63ba92ead..cfd61ba40 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvsrari.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvsrari.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lasxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvsrarn.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvsrarn.c +index c145f7ff3..c847e2812 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvsrarn.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvsrarn.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lasxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvsrarni.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvsrarni.c +index b5c0fca74..c0ce0dd88 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvsrarni.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvsrarni.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lasxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvsrl.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvsrl.c +index 1d591c35c..8ac09a026 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvsrl.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvsrl.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lasxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvsrli.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvsrli.c +index e8696701f..dd0a09c4e 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvsrli.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvsrli.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lasxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvsrln.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvsrln.c +index d54991051..42a695875 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvsrln.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvsrln.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lasxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvsrlni.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvsrlni.c +index 0fb6483cf..a7acf351d 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvsrlni.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvsrlni.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lasxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvsrlr.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvsrlr.c +index 22e62a3e7..c4e1e14e0 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvsrlr.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvsrlr.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lasxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvsrlri.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvsrlri.c +index 71f770aff..4a2e14712 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvsrlri.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvsrlri.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lasxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvsrlrn.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvsrlrn.c +index cbc1de371..b17c7c4b3 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvsrlrn.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvsrlrn.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lasxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvsrlrni.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvsrlrni.c +index 8fc7a0029..bfca007d7 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvsrlrni.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvsrlrni.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lasxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvssran.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvssran.c +index fdb0c25f1..4648f751a 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvssran.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvssran.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lasxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvssrani.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvssrani.c +index dd3c2c6f6..25482aebc 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvssrani.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvssrani.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lasxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvssrarn.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvssrarn.c +index 7848ddd41..c284254ab 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvssrarn.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvssrarn.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lasxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvssrarni.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvssrarni.c +index b1c16baf4..c39002ed5 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvssrarni.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvssrarni.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lasxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvssrln.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvssrln.c +index 356eb2182..09313d03c 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvssrln.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvssrln.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lasxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvssrlni.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvssrlni.c +index 116bebbb6..6d53719a6 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvssrlni.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvssrlni.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lasxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvssrlrn.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvssrlrn.c +index 977061097..c812a1b0c 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvssrlrn.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvssrlrn.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lasxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvssrlrni.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvssrlrni.c +index b55e388b1..2683355fe 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvssrlrni.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvssrlrni.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lasxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvssub-1.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvssub-1.c +index ada72a16a..dc187aa2c 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvssub-1.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvssub-1.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lasxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvssub-2.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvssub-2.c +index f42523850..b13ea88a7 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvssub-2.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvssub-2.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lasxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvst.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvst.c +index 3c5e775ff..68a2cac21 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvst.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvst.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lasxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvsub.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvsub.c +index c1de1e8d3..e940491a6 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvsub.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvsub.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lasxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvsubi.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvsubi.c +index a3c0de6d3..8a1272685 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvsubi.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvsubi.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lasxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvsubwev-1.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvsubwev-1.c +index caa72ca61..ba535d1e0 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvsubwev-1.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvsubwev-1.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lasxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvsubwev-2.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvsubwev-2.c +index 57d883c04..0a1d0277c 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvsubwev-2.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvsubwev-2.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lasxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvsubwod-1.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvsubwod-1.c +index 1687729d3..660c20da8 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvsubwod-1.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvsubwod-1.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lasxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvsubwod-2.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvsubwod-2.c +index 8d6ed92a1..9710d128c 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvsubwod-2.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvsubwod-2.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lasxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvxor.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvxor.c +index 18b36c873..506e983da 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvxor.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvxor.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lasxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvxori.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvxori.c +index 8fd6298f7..da7203af7 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvxori.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvxori.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lasxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/loongarch-vector.exp b/gcc/testsuite/gcc.target/loongarch/vector/loongarch-vector.exp +index 2c37aa91d..d53bee52a 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/loongarch-vector.exp ++++ b/gcc/testsuite/gcc.target/loongarch/vector/loongarch-vector.exp +@@ -33,9 +33,32 @@ if !info exists DEFAULT_CFLAGS then { + #Initialize `dg'. + dg-init + ++# If the target hardware supports LSX, the default action is "run", otherwise ++# just "compile". ++global dg-do-what-default ++if {check_effective_target_loongarch_sx_hw} then { ++ set dg-do-what-default run ++} else { ++ set dg-do-what-default compile ++} ++ + #Main loop. + dg-runtest lsort glob -nocomplain $srcdir/$subdir/lsx/*.\cS\ \ + " -mlsx" $DEFAULT_CFLAGS ++ ++dg-finish ++ ++dg-init ++# If the target hardware supports LASX, the default action is "run", otherwise ++# just "compile". ++ ++global dg-do-what-default ++if {check_effective_target_loongarch_asx_hw} then { ++ set dg-do-what-default run ++} else { ++ set dg-do-what-default compile ++} ++ + dg-runtest lsort glob -nocomplain $srcdir/$subdir/lasx/*.\cS\ \ + " -mlasx" $DEFAULT_CFLAGS + # All done. +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vabsd-1.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vabsd-1.c +index e336581f3..8790470a4 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vabsd-1.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vabsd-1.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lsxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vabsd-2.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vabsd-2.c +index c1af80e14..77e027bdb 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vabsd-2.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vabsd-2.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lsxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vadd.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vadd.c +index 7cfb989e4..e2c4f3ad3 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vadd.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vadd.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lsxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vadda.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vadda.c +index 4bb699eab..c7ce0a75b 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vadda.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vadda.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lsxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vaddi.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vaddi.c +index 77afabe92..23f28bc34 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vaddi.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vaddi.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lsxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vaddwev-1.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vaddwev-1.c +index b7b16a325..54503e22b 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vaddwev-1.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vaddwev-1.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lsxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vaddwev-2.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vaddwev-2.c +index a407cadfb..0b1e90959 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vaddwev-2.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vaddwev-2.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lsxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vaddwev-3.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vaddwev-3.c +index 4d5c60998..eefd0be2a 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vaddwev-3.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vaddwev-3.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lsxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vaddwod-1.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vaddwod-1.c +index 0ebe8c8a9..1016afe21 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vaddwod-1.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vaddwod-1.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lsxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vaddwod-2.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vaddwod-2.c +index 379517f39..befbf7049 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vaddwod-2.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vaddwod-2.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lsxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vaddwod-3.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vaddwod-3.c +index 30dc83518..9365d242d 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vaddwod-3.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vaddwod-3.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lsxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vand.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vand.c +index 1597749b5..374b8b035 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vand.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vand.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lsxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vandi.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vandi.c +index 906da69ca..ad4b5d307 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vandi.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vandi.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lsxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vandn.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vandn.c +index 3ae2d7694..e645b9475 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vandn.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vandn.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lsxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vavg-1.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vavg-1.c +index 2177ca3f6..0d7463eda 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vavg-1.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vavg-1.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lsxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vavg-2.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vavg-2.c +index 1b0d879e4..bc16057ff 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vavg-2.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vavg-2.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lsxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vavgr-1.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vavgr-1.c +index 4b7262537..e494870bc 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vavgr-1.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vavgr-1.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lsxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vavgr-2.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vavgr-2.c +index 22908b1ea..ff9907dd8 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vavgr-2.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vavgr-2.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lsxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vbitclr.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vbitclr.c +index 411dcaa40..d663653a0 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vbitclr.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vbitclr.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lsxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vbitclri.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vbitclri.c +index 5d7d66e06..9017d1541 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vbitclri.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vbitclri.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lsxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vbitrev.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vbitrev.c +index ba4f4b6dc..5d6d1ef4b 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vbitrev.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vbitrev.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lsxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vbitrevi.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vbitrevi.c +index 9739182cd..1f730a688 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vbitrevi.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vbitrevi.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lsxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vbitsel.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vbitsel.c +index 52ac9939f..2239b3740 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vbitsel.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vbitsel.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lsxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vbitseli.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vbitseli.c +index f2d6fb042..d5818879f 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vbitseli.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vbitseli.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lsxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vbitset.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vbitset.c +index e05af675e..a1737c51b 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vbitset.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vbitset.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lsxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vbitseti.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vbitseti.c +index 540a724a7..577fbeb4a 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vbitseti.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vbitseti.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lsxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vbsll.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vbsll.c +index 34246c551..d60d8434f 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vbsll.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vbsll.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lsxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vbsrl.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vbsrl.c +index 986b7d566..a8d0e0fe2 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vbsrl.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vbsrl.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lsxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vclo.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vclo.c +index 2c1099a04..c386ed74c 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vclo.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vclo.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lsxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vclz.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vclz.c +index 12df2c670..aa3e54a8d 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vclz.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vclz.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lsxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vdiv-1.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vdiv-1.c +index cb4be0475..36ee4b83b 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vdiv-1.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vdiv-1.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lsxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vdiv-2.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vdiv-2.c +index f2bc7df27..7cf31e21c 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vdiv-2.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vdiv-2.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lsxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vexth-1.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vexth-1.c +index f6390800d..32db7a9c7 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vexth-1.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vexth-1.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lsxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vexth-2.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vexth-2.c +index 6ab217e97..78afaa8bb 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vexth-2.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vexth-2.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lsxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vextl-1.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vextl-1.c +index 99854dbd8..998596169 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vextl-1.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vextl-1.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lsxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vextl-2.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vextl-2.c +index 73bb530c9..31a3b5e42 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vextl-2.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vextl-2.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lsxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vextrins.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vextrins.c +index 8d4158b57..e9187db90 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vextrins.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vextrins.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lsxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfadd_d.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfadd_d.c +index 7ffbd385e..b4d65d678 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfadd_d.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfadd_d.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lsxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfadd_s.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfadd_s.c +index 388430278..83b013b95 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfadd_s.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfadd_s.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lsxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfclass_d.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfclass_d.c +index 9706d7adc..d570dcd24 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfclass_d.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfclass_d.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lsxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfclass_s.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfclass_s.c +index 7166f954b..a3a5f44d8 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfclass_s.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfclass_s.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lsxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfcmp_caf.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfcmp_caf.c +index b448c2076..d38b6ab9d 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfcmp_caf.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfcmp_caf.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lsxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfcmp_ceq.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfcmp_ceq.c +index 98941b47d..74ff46f89 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfcmp_ceq.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfcmp_ceq.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lsxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfcmp_cle.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfcmp_cle.c +index 409bce0ec..a40019e39 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfcmp_cle.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfcmp_cle.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lsxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfcmp_clt.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfcmp_clt.c +index 39c9cf7a7..934169c6e 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfcmp_clt.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfcmp_clt.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lsxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfcmp_cne.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfcmp_cne.c +index c3da43bb4..c351daac0 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfcmp_cne.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfcmp_cne.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lsxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfcmp_cor.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfcmp_cor.c +index 5228dbede..8ca078c9e 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfcmp_cor.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfcmp_cor.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lsxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfcmp_cun.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfcmp_cun.c +index a2beff53f..b57cf604c 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfcmp_cun.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfcmp_cun.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lsxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfcmp_saf.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfcmp_saf.c +index bfa4914be..6d35a4a30 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfcmp_saf.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfcmp_saf.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lsxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfcmp_seq.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfcmp_seq.c +index bc573936d..07101104f 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfcmp_seq.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfcmp_seq.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lsxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfcmp_sle.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfcmp_sle.c +index 87cb8da7c..dd418110c 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfcmp_sle.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfcmp_sle.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lsxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfcmp_slt.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfcmp_slt.c +index 3845e8ec3..5b2e8d6a4 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfcmp_slt.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfcmp_slt.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lsxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfcmp_sne.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfcmp_sne.c +index 964eff79f..98a798c5f 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfcmp_sne.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfcmp_sne.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lsxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfcmp_sor.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfcmp_sor.c +index ea47baf40..413a81cb7 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfcmp_sor.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfcmp_sor.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lsxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfcmp_sun.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfcmp_sun.c +index 68cb5a52f..78c8f19a5 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfcmp_sun.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfcmp_sun.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lsxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfcvt-1.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfcvt-1.c +index d4a86e262..4d71b07ec 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfcvt-1.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfcvt-1.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lsxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfcvt-2.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfcvt-2.c +index e8f4f12b9..476782ce4 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfcvt-2.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfcvt-2.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lsxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vffint-1.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vffint-1.c +index 85db95762..4a54fe133 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vffint-1.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vffint-1.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lsxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vffint-2.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vffint-2.c +index f8839cfcd..bb4ac9dfc 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vffint-2.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vffint-2.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lsxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vffint-3.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vffint-3.c +index 9150e27ca..e12e95367 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vffint-3.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vffint-3.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lsxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vflogb_d.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vflogb_d.c +index cc36bf136..de5c46167 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vflogb_d.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vflogb_d.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lsxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vflogb_s.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vflogb_s.c +index 624589620..3556daa72 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vflogb_s.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vflogb_s.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lsxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfmadd_d.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfmadd_d.c +index c5de1ac7a..fa6ee6fd2 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfmadd_d.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfmadd_d.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lsxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfmadd_s.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfmadd_s.c +index 6b85e87bd..22a8f6b91 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfmadd_s.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfmadd_s.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lsxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfmax_d.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfmax_d.c +index 442473fb4..bd942da1c 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfmax_d.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfmax_d.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lsxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfmax_s.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfmax_s.c +index 876588827..a5e513c73 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfmax_s.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfmax_s.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lsxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfmaxa_d.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfmaxa_d.c +index c2766d5c6..ab8265bc2 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfmaxa_d.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfmaxa_d.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lsxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfmaxa_s.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfmaxa_s.c +index 5fcdedd3f..8a09f61fe 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfmaxa_s.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfmaxa_s.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lsxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfnmadd_d.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfnmadd_d.c +index 96b14aad6..0d0475a44 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfnmadd_d.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfnmadd_d.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lsxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfnmadd_s.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfnmadd_s.c +index bf8414b49..58470aef1 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfnmadd_s.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfnmadd_s.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lsxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfrint_d.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfrint_d.c +index c60ff2b46..0b1074016 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfrint_d.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfrint_d.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + /* { dg-timeout 500 } */ + #include "../simd_correctness_check.h" +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfrint_s.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfrint_s.c +index 12cb02303..61f28325a 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfrint_s.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfrint_s.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + /* { dg-timeout 500 } */ + #include "../simd_correctness_check.h" +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfrstp.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfrstp.c +index ac0ade8b1..30d6ed51c 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfrstp.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfrstp.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lsxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfrstpi.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfrstpi.c +index a2b110f21..e74dfb0d5 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfrstpi.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfrstpi.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lsxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfsqrt_d.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfsqrt_d.c +index 8a35dfe24..5bae5a67f 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfsqrt_d.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfsqrt_d.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lsxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfsqrt_s.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfsqrt_s.c +index ffd80540b..4a76ee69f 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfsqrt_s.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfsqrt_s.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lsxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vftint-1.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vftint-1.c +index 8d0d56632..5bf753662 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vftint-1.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vftint-1.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lsxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vftint-2.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vftint-2.c +index 5dba807f6..ffbdb0069 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vftint-2.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vftint-2.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lsxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vftint-3.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vftint-3.c +index 7f6d2f4d1..d13f7d0d9 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vftint-3.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vftint-3.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lsxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vftint-4.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vftint-4.c +index 9c5bb9131..2d6b92375 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vftint-4.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vftint-4.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lsxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vhaddw-1.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vhaddw-1.c +index af75f8e4e..ab3abf2a3 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vhaddw-1.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vhaddw-1.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lsxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vhaddw-2.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vhaddw-2.c +index 37c769a2d..078d229da 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vhaddw-2.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vhaddw-2.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lsxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vhsubw-1.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vhsubw-1.c +index 0b51cb8cf..1999543f4 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vhsubw-1.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vhsubw-1.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lsxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vhsubw-2.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vhsubw-2.c +index 26b51ee14..3d9b1a817 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vhsubw-2.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vhsubw-2.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lsxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vilvh.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vilvh.c +index aa802b295..aefcdb960 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vilvh.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vilvh.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lsxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vilvl.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vilvl.c +index 88c66f220..4226f8683 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vilvl.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vilvl.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lsxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vinsgr2vr.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vinsgr2vr.c +index 2b9dcc0b5..c45d72667 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vinsgr2vr.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vinsgr2vr.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lsxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vld.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vld.c +index 7cd9abb7c..815ca0cdb 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vld.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vld.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lsxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vldi.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vldi.c +index 089500ea9..6ba93f73c 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vldi.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vldi.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lsxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vmadd.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vmadd.c +index 3fade5157..33369303f 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vmadd.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vmadd.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lsxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vmaddwev-1.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vmaddwev-1.c +index d3fd83da7..2f55309ce 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vmaddwev-1.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vmaddwev-1.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lsxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vmaddwev-2.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vmaddwev-2.c +index 839285685..0a48f655a 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vmaddwev-2.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vmaddwev-2.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lsxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vmaddwev-3.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vmaddwev-3.c +index bab2c6cf3..091343e82 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vmaddwev-3.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vmaddwev-3.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lsxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vmaddwod-1.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vmaddwod-1.c +index 5875aa597..42d873b4c 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vmaddwod-1.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vmaddwod-1.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lsxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vmaddwod-2.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vmaddwod-2.c +index 4be7fce82..9f6aa3d12 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vmaddwod-2.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vmaddwod-2.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lsxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vmaddwod-3.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vmaddwod-3.c +index 8a4c39502..6b06e204e 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vmaddwod-3.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vmaddwod-3.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lsxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vmax-1.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vmax-1.c +index b0e22f955..c96462994 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vmax-1.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vmax-1.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lsxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vmax-2.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vmax-2.c +index 51a9a92e8..96db676e7 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vmax-2.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vmax-2.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lsxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vmaxi-1.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vmaxi-1.c +index 7cff1d848..64c61f0a1 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vmaxi-1.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vmaxi-1.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lsxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vmaxi-2.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vmaxi-2.c +index b79af2228..27c50bdbb 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vmaxi-2.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vmaxi-2.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lsxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vmin-1.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vmin-1.c +index b2a7a35bd..d076ae8f2 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vmin-1.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vmin-1.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lsxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vmin-2.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vmin-2.c +index c90cae75e..c6e183fd4 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vmin-2.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vmin-2.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lsxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vmini-1.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vmini-1.c +index 772d040c3..e1e10cb60 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vmini-1.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vmini-1.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lsxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vmini-2.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vmini-2.c +index 6eaae2134..c0e9a1a96 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vmini-2.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vmini-2.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lsxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vmod-1.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vmod-1.c +index 5470d40dd..cade92d25 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vmod-1.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vmod-1.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lsxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vmod-2.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vmod-2.c +index 8deb04427..4ecfff10c 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vmod-2.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vmod-2.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lsxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vmskgez.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vmskgez.c +index 64a950f81..717305270 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vmskgez.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vmskgez.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lsxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vmskltz.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vmskltz.c +index 8f743ec2e..cfccbb7e6 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vmskltz.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vmskltz.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lsxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vmsknz.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vmsknz.c +index d547af0d3..1cd2e7cdc 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vmsknz.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vmsknz.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lsxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vmsub.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vmsub.c +index 47cf33cfd..b4f171d20 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vmsub.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vmsub.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lsxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vmuh-1.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vmuh-1.c +index ab650a024..8f630371e 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vmuh-1.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vmuh-1.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lsxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vmuh-2.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vmuh-2.c +index 60b6e3503..78b745a38 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vmuh-2.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vmuh-2.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lsxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vmul.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vmul.c +index 8ba666275..5f3c049a1 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vmul.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vmul.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lsxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vmulwev-1.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vmulwev-1.c +index 8357f4e80..9a949ef18 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vmulwev-1.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vmulwev-1.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lsxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vmulwev-2.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vmulwev-2.c +index e4afc8247..a16b518af 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vmulwev-2.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vmulwev-2.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lsxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vmulwev-3.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vmulwev-3.c +index 346f0316a..5fbb48e81 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vmulwev-3.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vmulwev-3.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lsxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vmulwod-1.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vmulwod-1.c +index 6eea49a61..570bd1d13 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vmulwod-1.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vmulwod-1.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lsxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vmulwod-2.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vmulwod-2.c +index f3e4e0390..522f07950 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vmulwod-2.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vmulwod-2.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lsxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vmulwod-3.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vmulwod-3.c +index 9f5702e2c..62d1e3420 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vmulwod-3.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vmulwod-3.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lsxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vneg.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vneg.c +index 9441ba50e..e077ce7d0 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vneg.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vneg.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lsxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vnor.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vnor.c +index a7a3acce9..80b2da43d 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vnor.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vnor.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lsxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vnori.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vnori.c +index a07a02ab2..fb43da265 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vnori.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vnori.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lsxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vor.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vor.c +index 537a1bb3b..7686bcb5f 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vor.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vor.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lsxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vori.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vori.c +index 8a6e035c9..d40b093e6 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vori.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vori.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lsxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vorn.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vorn.c +index bb59bc312..6eb69cbf5 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vorn.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vorn.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lsxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vpackev.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vpackev.c +index 030e87fd8..17a43bbc5 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vpackev.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vpackev.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lsxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vpackod.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vpackod.c +index 783eedae1..85ae43e63 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vpackod.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vpackod.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lsxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vpcnt.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vpcnt.c +index 66982d89f..0b0200ed6 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vpcnt.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vpcnt.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lsxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vpickev.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vpickev.c +index 58591f1bb..5fd4af833 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vpickev.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vpickev.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lsxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vpickod.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vpickod.c +index 74269e319..e41c2f8f2 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vpickod.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vpickod.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lsxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vpickve2gr.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vpickve2gr.c +index acca2bee9..5ec0a4d2a 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vpickve2gr.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vpickve2gr.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lsxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vpremi.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vpremi.c +index ef0ad676e..36c9bf336 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vpremi.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vpremi.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lsxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vreplgr2vr.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vreplgr2vr.c +index a5f02b1b1..2f16a3483 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vreplgr2vr.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vreplgr2vr.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lsxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vreplve.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vreplve.c +index 463adb48e..6634b3a9f 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vreplve.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vreplve.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lsxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vreplvei.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vreplvei.c +index a81be76f1..157132c28 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vreplvei.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vreplvei.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lsxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vrotr.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vrotr.c +index c42440cea..286fe935a 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vrotr.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vrotr.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lsxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vrotri.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vrotri.c +index 4ae4dbf8b..81b16542f 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vrotri.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vrotri.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lsxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsadd-1.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsadd-1.c +index 1bc27c983..3eda1f166 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsadd-1.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsadd-1.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lsxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsadd-2.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsadd-2.c +index 67d189991..d08f84481 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsadd-2.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsadd-2.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lsxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsat-1.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsat-1.c +index cd8eefb47..0cf4c664b 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsat-1.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsat-1.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lsxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsat-2.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsat-2.c +index 31e3919bf..d709dbdb7 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsat-2.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsat-2.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lsxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vseq.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vseq.c +index 4362941ab..a031aaeb3 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vseq.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vseq.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lsxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vseqi.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vseqi.c +index c16a291de..f33c4a8b7 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vseqi.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vseqi.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lsxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vshuf.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vshuf.c +index 646935c92..f3b800f88 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vshuf.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vshuf.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lsxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vshuf4i.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vshuf4i.c +index cd441b841..ee4a7e5b7 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vshuf4i.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vshuf4i.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lsxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsigncov.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsigncov.c +index 0fb1bc18f..933cb3b0b 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsigncov.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsigncov.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lsxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsle-1.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsle-1.c +index a26eb0a3d..febb6345a 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsle-1.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsle-1.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lsxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsle-2.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsle-2.c +index 15c6cedc2..80b2db335 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsle-2.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsle-2.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lsxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vslei-1.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vslei-1.c +index 0e72a33dd..e78a8b07c 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vslei-1.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vslei-1.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lsxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vslei-2.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vslei-2.c +index 685a1bb36..361d41a04 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vslei-2.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vslei-2.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lsxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsll.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsll.c +index 7b8ad7d5a..169627dd3 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsll.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsll.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lsxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vslli.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vslli.c +index 7a77e80c0..6a3978317 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vslli.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vslli.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lsxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsllwil-1.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsllwil-1.c +index 796e88cad..985e32a24 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsllwil-1.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsllwil-1.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lsxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsllwil-2.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsllwil-2.c +index 5f46293dc..b20f92ef8 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsllwil-2.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsllwil-2.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lsxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vslt-1.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vslt-1.c +index 15c96ccfe..8ce161e92 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vslt-1.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vslt-1.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lsxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vslt-2.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vslt-2.c +index e8d69f0e9..6f8ddd219 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vslt-2.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vslt-2.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lsxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vslti-1.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vslti-1.c +index 5bf3ce6e8..442abf65b 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vslti-1.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vslti-1.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lsxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vslti-2.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vslti-2.c +index 768df528f..8dbba943a 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vslti-2.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vslti-2.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lsxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsra.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsra.c +index fd7c22a82..1285aa86a 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsra.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsra.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lsxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsrai.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsrai.c +index 2ca4f0b7a..efccd1822 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsrai.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsrai.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lsxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsran.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsran.c +index 4e7c7ab7e..ad6dd0908 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsran.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsran.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lsxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsrani.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsrani.c +index 92988035d..6cfec397d 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsrani.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsrani.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lsxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsrar.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsrar.c +index 6a842d9ce..b4ff77206 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsrar.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsrar.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lsxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsrari.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsrari.c +index 2a353d65a..8ddc6157d 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsrari.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsrari.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lsxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsrarn.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsrarn.c +index 60d474203..a0ecbc7dd 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsrarn.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsrarn.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lsxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsrarni.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsrarni.c +index 3aa23bdc8..6abc66b89 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsrarni.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsrarni.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lsxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsrl.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsrl.c +index f9c789855..9f59d5ea4 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsrl.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsrl.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lsxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsrli.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsrli.c +index 7b5e9a7bf..29e51a34d 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsrli.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsrli.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lsxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsrln.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsrln.c +index 5a8f4f70a..28e8a3ff2 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsrln.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsrln.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lsxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsrlni.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsrlni.c +index ca462c834..94b58e65c 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsrlni.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsrlni.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lsxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsrlr.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsrlr.c +index 211339bb8..ae9d88518 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsrlr.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsrlr.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lsxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsrlri.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsrlri.c +index 2c3a53416..d18448ea7 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsrlri.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsrlri.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lsxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsrlrn.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsrlrn.c +index c630b4261..639361d7b 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsrlrn.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsrlrn.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lsxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsrlrni.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsrlrni.c +index 468a17c15..11f19c249 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsrlrni.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsrlrni.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lsxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vssran.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vssran.c +index e45ca36f0..5ab683fd5 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vssran.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vssran.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lsxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vssrani.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vssrani.c +index 7ffcecde7..526fb15dc 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vssrani.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vssrani.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lsxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vssrarn.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vssrarn.c +index a23ad7cd2..b3c0c37c3 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vssrarn.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vssrarn.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lsxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vssrarni.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vssrarni.c +index 76fac97be..7785e9f59 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vssrarni.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vssrarni.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lsxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vssrln.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vssrln.c +index ed600c72d..a07d5c541 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vssrln.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vssrln.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lsxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vssrlni.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vssrlni.c +index 613668143..2189b8167 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vssrlni.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vssrlni.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lsxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vssrlrn.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vssrlrn.c +index ec688bb12..e1a633096 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vssrlrn.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vssrlrn.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lsxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vssrlrni.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vssrlrni.c +index 02f7ca08b..7035d256e 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vssrlrni.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vssrlrni.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lsxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vssub-1.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vssub-1.c +index fc4cbb4e5..d7a5d7f30 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vssub-1.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vssub-1.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lsxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vssub-2.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vssub-2.c +index 0d5987567..028664bd8 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vssub-2.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vssub-2.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lsxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vst.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vst.c +index 8afdffa50..ad0eef8d3 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vst.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vst.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lsxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsub.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsub.c +index f5c82bc74..01907dcee 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsub.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsub.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lsxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsubi.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsubi.c +index 37e0ccf4d..35cd761ee 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsubi.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsubi.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lsxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsubwev-1.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsubwev-1.c +index f0d391a09..358775ed3 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsubwev-1.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsubwev-1.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lsxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsubwev-2.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsubwev-2.c +index 3b18bc13c..986ead074 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsubwev-2.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsubwev-2.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lsxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsubwod-1.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsubwod-1.c +index 39ebff154..9c8688432 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsubwod-1.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsubwod-1.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lsxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsubwod-2.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsubwod-2.c +index 62837f1ac..c762b88f8 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsubwod-2.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsubwod-2.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lsxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vxor.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vxor.c +index 72fa97174..0b9ba4709 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vxor.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vxor.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lsxintrin.h> +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vxori.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vxori.c +index cc823d4ba..08ceab6ee 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vxori.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vxori.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include <lsxintrin.h> +-- +2.43.0 +
View file
_service:tar_scm:0053-LoongArch-Accelerate-optimization-of-scalar-signed-u.patch
Added
@@ -0,0 +1,148 @@ +From 87230032bc7fbcec1e3927b2b4a6aeba78040cc6 Mon Sep 17 00:00:00 2001 +From: Li Wei <liwei@loongson.cn> +Date: Tue, 28 Nov 2023 15:38:37 +0800 +Subject: PATCH 053/188 LoongArch: Accelerate optimization of scalar + signed/unsigned popcount. + +In LoongArch, the vector popcount has corresponding instructions, while +the scalar does not. Currently, the scalar popcount is calculated +through a loop, and the value of a non-power of two needs to be iterated +several times, so the vector popcount instruction is considered for +optimization. + +gcc/ChangeLog: + + * config/loongarch/loongarch.md (v2di): Used to simplify the + following templates. + (popcount<mode>2): New. + +gcc/testsuite/ChangeLog: + + * gcc.target/loongarch/popcnt.c: New test. + * gcc.target/loongarch/popcount.c: New test. +--- + gcc/config/loongarch/loongarch.md | 27 +++++++++++- + gcc/testsuite/gcc.target/loongarch/popcnt.c | 41 +++++++++++++++++++ + gcc/testsuite/gcc.target/loongarch/popcount.c | 17 ++++++++ + 3 files changed, 83 insertions(+), 2 deletions(-) + create mode 100644 gcc/testsuite/gcc.target/loongarch/popcnt.c + create mode 100644 gcc/testsuite/gcc.target/loongarch/popcount.c + +diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md +index 11577f407..cfd7a8ec6 100644 +--- a/gcc/config/loongarch/loongarch.md ++++ b/gcc/config/loongarch/loongarch.md +@@ -1512,7 +1512,30 @@ + (set_attr "cnv_mode" "D2S") + (set_attr "mode" "SF")) + +- ++;; In vector registers, popcount can be implemented directly through ++;; the vector instruction XVPCNT. For GP registers, we can implement ++;; it through the following method. Compared with loop implementation ++;; of popcount, the following method has better performance. ++ ++;; This attribute used for get connection of scalar mode and corresponding ++;; vector mode. ++(define_mode_attr cntmap (SI "v4si") (DI "v2di")) ++ ++(define_expand "popcount<mode>2" ++ (set (match_operand:GPR 0 "register_operand") ++ (popcount:GPR (match_operand:GPR 1 "register_operand"))) ++ "ISA_HAS_LSX" ++{ ++ rtx in = operands1; ++ rtx out = operands0; ++ rtx vreg = <MODE>mode == SImode ? gen_reg_rtx (V4SImode) : ++ gen_reg_rtx (V2DImode); ++ emit_insn (gen_lsx_vinsgr2vr_<size> (vreg, in, vreg, GEN_INT (1))); ++ emit_insn (gen_popcount<cntmap>2 (vreg, vreg)); ++ emit_insn (gen_lsx_vpickve2gr_<size> (out, vreg, GEN_INT (0))); ++ DONE; ++}) ++ + ;; + ;; .................... + ;; +@@ -3879,7 +3902,7 @@ + (any_extend:SI (match_dup 3)))) + "") + +- ++ + + (define_mode_iterator QHSD QI HI SI DI) + +diff --git a/gcc/testsuite/gcc.target/loongarch/popcnt.c b/gcc/testsuite/gcc.target/loongarch/popcnt.c +new file mode 100644 +index 000000000..a10fca420 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/popcnt.c +@@ -0,0 +1,41 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -mlsx" } */ ++/* { dg-final { scan-assembler-not {popcount} } } */ ++/* { dg-final { scan-assembler-times "vpcnt.d" 2 { target { loongarch64*-*-* } } } } */ ++/* { dg-final { scan-assembler-times "vpcnt.w" 4 { target { loongarch64*-*-* } } } } */ ++ ++int ++foo (int x) ++{ ++ return __builtin_popcount (x); ++} ++ ++long ++foo1 (long x) ++{ ++ return __builtin_popcountl (x); ++} ++ ++long long ++foo2 (long long x) ++{ ++ return __builtin_popcountll (x); ++} ++ ++int ++foo3 (int *p) ++{ ++ return __builtin_popcount (*p); ++} ++ ++unsigned ++foo4 (int x) ++{ ++ return __builtin_popcount (x); ++} ++ ++unsigned long ++foo5 (int x) ++{ ++ return __builtin_popcount (x); ++} +diff --git a/gcc/testsuite/gcc.target/loongarch/popcount.c b/gcc/testsuite/gcc.target/loongarch/popcount.c +new file mode 100644 +index 000000000..390ff0676 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/popcount.c +@@ -0,0 +1,17 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -mlsx -fdump-tree-optimized" } */ ++/* { dg-final { scan-tree-dump-times "__builtin_popcount|\\.POPCOUNT" 1 "optimized" } } */ ++ ++int ++PopCount (long b) ++{ ++ int c = 0; ++ ++ while (b) ++ { ++ b &= b - 1; ++ c++; ++ } ++ ++ return c; ++} +-- +2.43.0 +
View file
_service:tar_scm:0054-LoongArch-Optimize-vector-constant-extract-even-odd-.patch
Added
@@ -0,0 +1,163 @@ +From 19282fbb0dab42c3553326a1ed01ad9a599622dd Mon Sep 17 00:00:00 2001 +From: Li Wei <liwei@loongson.cn> +Date: Tue, 28 Nov 2023 15:39:00 +0800 +Subject: PATCH 054/188 LoongArch: Optimize vector constant + extract-{even/odd} permutation. + +For vector constant extract-{even/odd} permutation replace the default +xvshuf instruction combination with xvilv{l/h} instruction, which +can reduce instructions and improves performance. + +gcc/ChangeLog: + + * config/loongarch/loongarch.cc (loongarch_is_odd_extraction): + Supplementary function prototype. + (loongarch_is_even_extraction): Adjust. + (loongarch_try_expand_lsx_vshuf_const): Adjust. + (loongarch_is_extraction_permutation): Adjust. + (loongarch_expand_vec_perm_const_2): Adjust. + +gcc/testsuite/ChangeLog: + + * gcc.target/loongarch/lasx-extract-even_odd-opt.c: New test. +--- + gcc/config/loongarch/loongarch.cc | 33 +++++++++++- + .../loongarch/lasx-extract-even_odd-opt.c | 54 +++++++++++++++++++ + 2 files changed, 85 insertions(+), 2 deletions(-) + create mode 100644 gcc/testsuite/gcc.target/loongarch/lasx-extract-even_odd-opt.c + +diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc +index ecceca22d..3ef7e3605 100644 +--- a/gcc/config/loongarch/loongarch.cc ++++ b/gcc/config/loongarch/loongarch.cc +@@ -8668,6 +8668,12 @@ loongarch_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel) + } + } + ++static bool ++loongarch_is_odd_extraction (struct expand_vec_perm_d *); ++ ++static bool ++loongarch_is_even_extraction (struct expand_vec_perm_d *); ++ + static bool + loongarch_try_expand_lsx_vshuf_const (struct expand_vec_perm_d *d) + { +@@ -8690,6 +8696,24 @@ loongarch_try_expand_lsx_vshuf_const (struct expand_vec_perm_d *d) + if (d->testing_p) + return true; + ++ /* If match extract-even and extract-odd permutations pattern, use ++ * vselect much better than vshuf. */ ++ if (loongarch_is_odd_extraction (d) ++ || loongarch_is_even_extraction (d)) ++ { ++ if (loongarch_expand_vselect_vconcat (d->target, d->op0, d->op1, ++ d->perm, d->nelt)) ++ return true; ++ ++ unsigned char perm2MAX_VECT_LEN; ++ for (i = 0; i < d->nelt; ++i) ++ perm2i = (d->permi + d->nelt) & (2 * d->nelt - 1); ++ ++ if (loongarch_expand_vselect_vconcat (d->target, d->op1, d->op0, ++ perm2, d->nelt)) ++ return true; ++ } ++ + for (i = 0; i < d->nelt; i += 1) + { + rpermi = GEN_INT (d->permi); +@@ -8874,7 +8898,7 @@ loongarch_is_even_extraction (struct expand_vec_perm_d *d) + result = false; + break; + } +- buf += 1; ++ buf += 2; + } + + return result; +@@ -8896,7 +8920,7 @@ loongarch_is_extraction_permutation (struct expand_vec_perm_d *d) + result = false; + break; + } +- buf += 2; ++ buf += 1; + } + + return result; +@@ -9373,6 +9397,11 @@ loongarch_expand_vec_perm_const_2 (struct expand_vec_perm_d *d) + Selector after: { 1, 3, 1, 3 }. + Even extraction selector sample: E_V4DImode, { 0, 2, 4, 6 } + Selector after: { 0, 2, 0, 2 }. */ ++ ++ /* Better implement of extract-even and extract-odd permutations. */ ++ if (loongarch_expand_vec_perm_even_odd (d)) ++ return true; ++ + for (i = 0; i < d->nelt / 2; i += 1) + { + idx = d->permi; +diff --git a/gcc/testsuite/gcc.target/loongarch/lasx-extract-even_odd-opt.c b/gcc/testsuite/gcc.target/loongarch/lasx-extract-even_odd-opt.c +new file mode 100644 +index 000000000..515f0c862 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/lasx-extract-even_odd-opt.c +@@ -0,0 +1,54 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O3 -mlasx" } */ ++/* { dg-final { scan-assembler "xvilvl.d" } } */ ++/* { dg-final { scan-assembler "xvilvh.d" } } */ ++ ++#define CMUL(a, b, c) \ ++ { \ ++ (c).ai = (a).ai * (b).ai - (a).bi * (b).bi; \ ++ (c).bi = (a).ai * (b).bi + (a).bi * (b).ai; \ ++ (c).ci = (a).ci * (b).ci - (a).di * (b).di; \ ++ (c).di = (a).ci * (b).di + (a).di * (b).ci; \ ++ } ++#define CSUM(a, b) \ ++ { \ ++ (a).ai += (b).ai; \ ++ (a).bi += (b).bi; \ ++ (a).ci += (b).ci; \ ++ (a).di += (b).di; \ ++ } ++ ++typedef struct ++{ ++ double ai; ++ double bi; ++ double ci; ++ double di; ++} complex; ++ ++typedef struct ++{ ++ complex e66; ++} matrix; ++ ++typedef struct ++{ ++ complex c6; ++} vector; ++ ++void ++mult_adj_mat_vec (matrix *a, vector *b, vector *c) ++{ ++ register int i, j; ++ register complex x, y; ++ for (i = 0; i < 6; i++) ++ { ++ x.ai = x.bi = x.ci = x.di = 0.0; ++ for (j = 0; j < 6; j++) ++ { ++ CMUL (a->eji, b->cj, y); ++ CSUM (x, y); ++ } ++ c->ci = x; ++ } ++} +-- +2.43.0 +
View file
_service:tar_scm:0055-LoongArch-Add-intrinsic-function-descriptions-for-LS.patch
Added
@@ -0,0 +1,1697 @@ +From 548322a75cdeb96960fb9d324a2abf8735c4d254 Mon Sep 17 00:00:00 2001 +From: chenxiaolong <chenxiaolong@loongson.cn> +Date: Tue, 7 Nov 2023 11:53:39 +0800 +Subject: PATCH 055/188 LoongArch: Add intrinsic function descriptions for + LSX and LASX instructions to doc. + +gcc/ChangeLog: + + * doc/extend.texi: Add information about the intrinsic function of the vector + instruction. +--- + gcc/doc/extend.texi | 1662 +++++++++++++++++++++++++++++++++++++++++++ + 1 file changed, 1662 insertions(+) + +diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi +index 497c6de5f..7edd3974d 100644 +--- a/gcc/doc/extend.texi ++++ b/gcc/doc/extend.texi +@@ -14679,6 +14679,8 @@ instructions, but allow the compiler to schedule those calls. + * BPF Built-in Functions:: + * FR-V Built-in Functions:: + * LoongArch Base Built-in Functions:: ++* LoongArch SX Vector Intrinsics:: ++* LoongArch ASX Vector Intrinsics:: + * MIPS DSP Built-in Functions:: + * MIPS Paired-Single Support:: + * MIPS Loongson Built-in Functions:: +@@ -16262,6 +16264,1666 @@ Returns the value that is currently set in the @samp{tp} register. + void * __builtin_thread_pointer (void) + @end smallexample + ++@node LoongArch SX Vector Intrinsics ++@subsection LoongArch SX Vector Intrinsics ++ ++GCC provides intrinsics to access the LSX (Loongson SIMD Extension) instructions. ++The interface is made available by including @code{<lsxintrin.h>} and using ++@option{-mlsx}. ++ ++The following vectors typedefs are included in @code{lsxintrin.h}: ++ ++@itemize ++@item @code{__m128i}, a 128-bit vector of fixed point; ++@item @code{__m128}, a 128-bit vector of single precision floating point; ++@item @code{__m128d}, a 128-bit vector of double precision floating point. ++@end itemize ++ ++Instructions and corresponding built-ins may have additional restrictions and/or ++input/output values manipulated: ++@itemize ++@item @code{imm0_1}, an integer literal in range 0 to 1; ++@item @code{imm0_3}, an integer literal in range 0 to 3; ++@item @code{imm0_7}, an integer literal in range 0 to 7; ++@item @code{imm0_15}, an integer literal in range 0 to 15; ++@item @code{imm0_31}, an integer literal in range 0 to 31; ++@item @code{imm0_63}, an integer literal in range 0 to 63; ++@item @code{imm0_127}, an integer literal in range 0 to 127; ++@item @code{imm0_255}, an integer literal in range 0 to 255; ++@item @code{imm_n16_15}, an integer literal in range -16 to 15; ++@item @code{imm_n128_127}, an integer literal in range -128 to 127; ++@item @code{imm_n256_255}, an integer literal in range -256 to 255; ++@item @code{imm_n512_511}, an integer literal in range -512 to 511; ++@item @code{imm_n1024_1023}, an integer literal in range -1024 to 1023; ++@item @code{imm_n2048_2047}, an integer literal in range -2048 to 2047. ++@end itemize ++ ++For convenience, GCC defines functions @code{__lsx_vrepli_@{b/h/w/d@}} and ++@code{__lsx_bnz_@{v/b/h/w/d@}}, which are implemented as follows: ++ ++@smallexample ++a. @code{__lsx_vrepli_@{b/h/w/d@}}: Implemented the case where the highest ++ bit of @code{vldi} instruction @code{i13} is 1. ++ ++ i1312 == 1'b0 ++ case i1311:10 of : ++ 2'b00: __lsx_vrepli_b (imm_n512_511) ++ 2'b01: __lsx_vrepli_h (imm_n512_511) ++ 2'b10: __lsx_vrepli_w (imm_n512_511) ++ 2'b11: __lsx_vrepli_d (imm_n512_511) ++ ++b. @code{__lsx_bnz_@{v/b/h/w/d@}}: Since the @code{vseteqz} class directive ++ cannot be used on its own, this function is defined. ++ ++ _lsx_bz_v => vseteqz.v + bcnez ++ _lsx_bnz_v => vsetnez.v + bcnez ++ _lsx_bz_b => vsetanyeqz.b + bcnez ++ _lsx_bz_h => vsetanyeqz.h + bcnez ++ _lsx_bz_w => vsetanyeqz.w + bcnez ++ _lsx_bz_d => vsetanyeqz.d + bcnez ++ _lsx_bnz_b => vsetallnez.b + bcnez ++ _lsx_bnz_h => vsetallnez.h + bcnez ++ _lsx_bnz_w => vsetallnez.w + bcnez ++ _lsx_bnz_d => vsetallnez.d + bcnez ++@end smallexample ++ ++@smallexample ++eg: ++ #include <lsxintrin.h> ++ ++ extern __m128i @var{a}; ++ ++ void ++ test (void) ++ @{ ++ if (__lsx_bz_v (@var{a})) ++ printf ("1\n"); ++ else ++ printf ("2\n"); ++ @} ++@end smallexample ++ ++@emph{Note:} For directives where the intent operand is also the source operand ++(modifying only part of the bitfield of the intent register), the first parameter ++in the builtin call function is used as the intent operand. ++ ++@smallexample ++eg: ++ #include <lsxintrin.h> ++ ++ extern __m128i @var{dst}; ++ extern int @var{src}; ++ ++ void ++ test (void) ++ @{ ++ @var{dst} = __lsx_vinsgr2vr_b (@var{dst}, @var{src}, 3); ++ @} ++@end smallexample ++ ++The intrinsics provided are listed below: ++@smallexample ++int __lsx_bnz_b (__m128i); ++int __lsx_bnz_d (__m128i); ++int __lsx_bnz_h (__m128i); ++int __lsx_bnz_v (__m128i); ++int __lsx_bnz_w (__m128i); ++int __lsx_bz_b (__m128i); ++int __lsx_bz_d (__m128i); ++int __lsx_bz_h (__m128i); ++int __lsx_bz_v (__m128i); ++int __lsx_bz_w (__m128i); ++__m128i __lsx_vabsd_b (__m128i, __m128i); ++__m128i __lsx_vabsd_bu (__m128i, __m128i); ++__m128i __lsx_vabsd_di (__m128i, __m128i); ++__m128i __lsx_vabsd_du (__m128i, __m128i); ++__m128i __lsx_vabsd_h (__m128i, __m128i); ++__m128i __lsx_vabsd_hu (__m128i, __m128i); ++__m128i __lsx_vabsd_w (__m128i, __m128i); ++__m128i __lsx_vabsd_wu (__m128i, __m128i); ++__m128i __lsx_vadda_b (__m128i, __m128i); ++__m128i __lsx_vadda_d (__m128i, __m128i); ++__m128i __lsx_vadda_h (__m128i, __m128i); ++__m128i __lsx_vadda_w (__m128i, __m128i); ++__m128i __lsx_vadd_b (__m128i, __m128i); ++__m128i __lsx_vadd_d (__m128i, __m128i); ++__m128i __lsx_vadd_h (__m128i, __m128i); ++__m128i __lsx_vaddi_bu (__m128i, imm0_31); ++__m128i __lsx_vaddi_du (__m128i, imm0_31); ++__m128i __lsx_vaddi_hu (__m128i, imm0_31); ++__m128i __lsx_vaddi_wu (__m128i, imm0_31); ++__m128i __lsx_vadd_q (__m128i, __m128i); ++__m128i __lsx_vadd_w (__m128i, __m128i); ++__m128i __lsx_vaddwev_d_w (__m128i, __m128i); ++__m128i __lsx_vaddwev_d_wu (__m128i, __m128i); ++__m128i __lsx_vaddwev_d_wu_w (__m128i, __m128i); ++__m128i __lsx_vaddwev_h_b (__m128i, __m128i); ++__m128i __lsx_vaddwev_h_bu (__m128i, __m128i); ++__m128i __lsx_vaddwev_h_bu_b (__m128i, __m128i); ++__m128i __lsx_vaddwev_q_d (__m128i, __m128i); ++__m128i __lsx_vaddwev_q_du (__m128i, __m128i); ++__m128i __lsx_vaddwev_q_du_d (__m128i, __m128i); ++__m128i __lsx_vaddwev_w_h (__m128i, __m128i); ++__m128i __lsx_vaddwev_w_hu (__m128i, __m128i); ++__m128i __lsx_vaddwev_w_hu_h (__m128i, __m128i); ++__m128i __lsx_vaddwod_d_w (__m128i, __m128i); ++__m128i __lsx_vaddwod_d_wu (__m128i, __m128i); ++__m128i __lsx_vaddwod_d_wu_w (__m128i, __m128i); ++__m128i __lsx_vaddwod_h_b (__m128i, __m128i); ++__m128i __lsx_vaddwod_h_bu (__m128i, __m128i); ++__m128i __lsx_vaddwod_h_bu_b (__m128i, __m128i); ++__m128i __lsx_vaddwod_q_d (__m128i, __m128i); ++__m128i __lsx_vaddwod_q_du (__m128i, __m128i); ++__m128i __lsx_vaddwod_q_du_d (__m128i, __m128i); ++__m128i __lsx_vaddwod_w_h (__m128i, __m128i); ++__m128i __lsx_vaddwod_w_hu (__m128i, __m128i); ++__m128i __lsx_vaddwod_w_hu_h (__m128i, __m128i); ++__m128i __lsx_vandi_b (__m128i, imm0_255); ++__m128i __lsx_vandn_v (__m128i, __m128i); ++__m128i __lsx_vand_v (__m128i, __m128i); ++__m128i __lsx_vavg_b (__m128i, __m128i); ++__m128i __lsx_vavg_bu (__m128i, __m128i); ++__m128i __lsx_vavg_d (__m128i, __m128i); ++__m128i __lsx_vavg_du (__m128i, __m128i); ++__m128i __lsx_vavg_h (__m128i, __m128i); ++__m128i __lsx_vavg_hu (__m128i, __m128i); ++__m128i __lsx_vavgr_b (__m128i, __m128i); ++__m128i __lsx_vavgr_bu (__m128i, __m128i); ++__m128i __lsx_vavgr_d (__m128i, __m128i); ++__m128i __lsx_vavgr_du (__m128i, __m128i); ++__m128i __lsx_vavgr_h (__m128i, __m128i); ++__m128i __lsx_vavgr_hu (__m128i, __m128i); ++__m128i __lsx_vavgr_w (__m128i, __m128i); ++__m128i __lsx_vavgr_wu (__m128i, __m128i); ++__m128i __lsx_vavg_w (__m128i, __m128i); ++__m128i __lsx_vavg_wu (__m128i, __m128i); ++__m128i __lsx_vbitclr_b (__m128i, __m128i); ++__m128i __lsx_vbitclr_d (__m128i, __m128i); ++__m128i __lsx_vbitclr_h (__m128i, __m128i); ++__m128i __lsx_vbitclri_b (__m128i, imm0_7); ++__m128i __lsx_vbitclri_d (__m128i, imm0_63); ++__m128i __lsx_vbitclri_h (__m128i, imm0_15); ++__m128i __lsx_vbitclri_w (__m128i, imm0_31); ++__m128i __lsx_vbitclr_w (__m128i, __m128i); ++__m128i __lsx_vbitrev_b (__m128i, __m128i); ++__m128i __lsx_vbitrev_d (__m128i, __m128i); ++__m128i __lsx_vbitrev_h (__m128i, __m128i); ++__m128i __lsx_vbitrevi_b (__m128i, imm0_7); ++__m128i __lsx_vbitrevi_d (__m128i, imm0_63); ++__m128i __lsx_vbitrevi_h (__m128i, imm0_15); ++__m128i __lsx_vbitrevi_w (__m128i, imm0_31); ++__m128i __lsx_vbitrev_w (__m128i, __m128i); ++__m128i __lsx_vbitseli_b (__m128i, __m128i, imm0_255); ++__m128i __lsx_vbitsel_v (__m128i, __m128i, __m128i); ++__m128i __lsx_vbitset_b (__m128i, __m128i); ++__m128i __lsx_vbitset_d (__m128i, __m128i); ++__m128i __lsx_vbitset_h (__m128i, __m128i); ++__m128i __lsx_vbitseti_b (__m128i, imm0_7); ++__m128i __lsx_vbitseti_d (__m128i, imm0_63); ++__m128i __lsx_vbitseti_h (__m128i, imm0_15); ++__m128i __lsx_vbitseti_w (__m128i, imm0_31); ++__m128i __lsx_vbitset_w (__m128i, __m128i); ++__m128i __lsx_vbsll_v (__m128i, imm0_31); ++__m128i __lsx_vbsrl_v (__m128i, imm0_31); ++__m128i __lsx_vclo_b (__m128i); ++__m128i __lsx_vclo_d (__m128i); ++__m128i __lsx_vclo_h (__m128i); ++__m128i __lsx_vclo_w (__m128i); ++__m128i __lsx_vclz_b (__m128i); ++__m128i __lsx_vclz_d (__m128i); ++__m128i __lsx_vclz_h (__m128i); ++__m128i __lsx_vclz_w (__m128i); ++__m128i __lsx_vdiv_b (__m128i, __m128i); ++__m128i __lsx_vdiv_bu (__m128i, __m128i); ++__m128i __lsx_vdiv_d (__m128i, __m128i); ++__m128i __lsx_vdiv_du (__m128i, __m128i); ++__m128i __lsx_vdiv_h (__m128i, __m128i); ++__m128i __lsx_vdiv_hu (__m128i, __m128i); ++__m128i __lsx_vdiv_w (__m128i, __m128i); ++__m128i __lsx_vdiv_wu (__m128i, __m128i); ++__m128i __lsx_vexth_du_wu (__m128i); ++__m128i __lsx_vexth_d_w (__m128i); ++__m128i __lsx_vexth_h_b (__m128i); ++__m128i __lsx_vexth_hu_bu (__m128i); ++__m128i __lsx_vexth_q_d (__m128i); ++__m128i __lsx_vexth_qu_du (__m128i); ++__m128i __lsx_vexth_w_h (__m128i); ++__m128i __lsx_vexth_wu_hu (__m128i); ++__m128i __lsx_vextl_q_d (__m128i); ++__m128i __lsx_vextl_qu_du (__m128i); ++__m128i __lsx_vextrins_b (__m128i, __m128i, imm0_255); ++__m128i __lsx_vextrins_d (__m128i, __m128i, imm0_255); ++__m128i __lsx_vextrins_h (__m128i, __m128i, imm0_255); ++__m128i __lsx_vextrins_w (__m128i, __m128i, imm0_255); ++__m128d __lsx_vfadd_d (__m128d, __m128d); ++__m128 __lsx_vfadd_s (__m128, __m128); ++__m128i __lsx_vfclass_d (__m128d); ++__m128i __lsx_vfclass_s (__m128); ++__m128i __lsx_vfcmp_caf_d (__m128d, __m128d); ++__m128i __lsx_vfcmp_caf_s (__m128, __m128); ++__m128i __lsx_vfcmp_ceq_d (__m128d, __m128d); ++__m128i __lsx_vfcmp_ceq_s (__m128, __m128); ++__m128i __lsx_vfcmp_cle_d (__m128d, __m128d); ++__m128i __lsx_vfcmp_cle_s (__m128, __m128); ++__m128i __lsx_vfcmp_clt_d (__m128d, __m128d); ++__m128i __lsx_vfcmp_clt_s (__m128, __m128); ++__m128i __lsx_vfcmp_cne_d (__m128d, __m128d); ++__m128i __lsx_vfcmp_cne_s (__m128, __m128); ++__m128i __lsx_vfcmp_cor_d (__m128d, __m128d); ++__m128i __lsx_vfcmp_cor_s (__m128, __m128); ++__m128i __lsx_vfcmp_cueq_d (__m128d, __m128d); ++__m128i __lsx_vfcmp_cueq_s (__m128, __m128); ++__m128i __lsx_vfcmp_cule_d (__m128d, __m128d); ++__m128i __lsx_vfcmp_cule_s (__m128, __m128); ++__m128i __lsx_vfcmp_cult_d (__m128d, __m128d); ++__m128i __lsx_vfcmp_cult_s (__m128, __m128); ++__m128i __lsx_vfcmp_cun_d (__m128d, __m128d); ++__m128i __lsx_vfcmp_cune_d (__m128d, __m128d); ++__m128i __lsx_vfcmp_cune_s (__m128, __m128); ++__m128i __lsx_vfcmp_cun_s (__m128, __m128); ++__m128i __lsx_vfcmp_saf_d (__m128d, __m128d); ++__m128i __lsx_vfcmp_saf_s (__m128, __m128); ++__m128i __lsx_vfcmp_seq_d (__m128d, __m128d); ++__m128i __lsx_vfcmp_seq_s (__m128, __m128); ++__m128i __lsx_vfcmp_sle_d (__m128d, __m128d); ++__m128i __lsx_vfcmp_sle_s (__m128, __m128); ++__m128i __lsx_vfcmp_slt_d (__m128d, __m128d); ++__m128i __lsx_vfcmp_slt_s (__m128, __m128); ++__m128i __lsx_vfcmp_sne_d (__m128d, __m128d); ++__m128i __lsx_vfcmp_sne_s (__m128, __m128); ++__m128i __lsx_vfcmp_sor_d (__m128d, __m128d); ++__m128i __lsx_vfcmp_sor_s (__m128, __m128); ++__m128i __lsx_vfcmp_sueq_d (__m128d, __m128d); ++__m128i __lsx_vfcmp_sueq_s (__m128, __m128); ++__m128i __lsx_vfcmp_sule_d (__m128d, __m128d); ++__m128i __lsx_vfcmp_sule_s (__m128, __m128); ++__m128i __lsx_vfcmp_sult_d (__m128d, __m128d); ++__m128i __lsx_vfcmp_sult_s (__m128, __m128); ++__m128i __lsx_vfcmp_sun_d (__m128d, __m128d); ++__m128i __lsx_vfcmp_sune_d (__m128d, __m128d); ++__m128i __lsx_vfcmp_sune_s (__m128, __m128); ++__m128i __lsx_vfcmp_sun_s (__m128, __m128); ++__m128d __lsx_vfcvth_d_s (__m128); ++__m128i __lsx_vfcvt_h_s (__m128, __m128); ++__m128 __lsx_vfcvth_s_h (__m128i); ++__m128d __lsx_vfcvtl_d_s (__m128); ++__m128 __lsx_vfcvtl_s_h (__m128i); ++__m128 __lsx_vfcvt_s_d (__m128d, __m128d); ++__m128d __lsx_vfdiv_d (__m128d, __m128d); ++__m128 __lsx_vfdiv_s (__m128, __m128); ++__m128d __lsx_vffint_d_l (__m128i); ++__m128d __lsx_vffint_d_lu (__m128i); ++__m128d __lsx_vffinth_d_w (__m128i); ++__m128d __lsx_vffintl_d_w (__m128i); ++__m128 __lsx_vffint_s_l (__m128i, __m128i); ++__m128 __lsx_vffint_s_w (__m128i); ++__m128 __lsx_vffint_s_wu (__m128i); ++__m128d __lsx_vflogb_d (__m128d); ++__m128 __lsx_vflogb_s (__m128); ++__m128d __lsx_vfmadd_d (__m128d, __m128d, __m128d); ++__m128 __lsx_vfmadd_s (__m128, __m128, __m128); ++__m128d __lsx_vfmaxa_d (__m128d, __m128d); ++__m128 __lsx_vfmaxa_s (__m128, __m128); ++__m128d __lsx_vfmax_d (__m128d, __m128d); ++__m128 __lsx_vfmax_s (__m128, __m128); ++__m128d __lsx_vfmina_d (__m128d, __m128d); ++__m128 __lsx_vfmina_s (__m128, __m128); ++__m128d __lsx_vfmin_d (__m128d, __m128d); ++__m128 __lsx_vfmin_s (__m128, __m128); ++__m128d __lsx_vfmsub_d (__m128d, __m128d, __m128d); ++__m128 __lsx_vfmsub_s (__m128, __m128, __m128); ++__m128d __lsx_vfmul_d (__m128d, __m128d); ++__m128 __lsx_vfmul_s (__m128, __m128); ++__m128d __lsx_vfnmadd_d (__m128d, __m128d, __m128d); ++__m128 __lsx_vfnmadd_s (__m128, __m128, __m128); ++__m128d __lsx_vfnmsub_d (__m128d, __m128d, __m128d); ++__m128 __lsx_vfnmsub_s (__m128, __m128, __m128); ++__m128d __lsx_vfrecip_d (__m128d); ++__m128 __lsx_vfrecip_s (__m128); ++__m128d __lsx_vfrint_d (__m128d); ++__m128i __lsx_vfrintrm_d (__m128d); ++__m128i __lsx_vfrintrm_s (__m128); ++__m128i __lsx_vfrintrne_d (__m128d); ++__m128i __lsx_vfrintrne_s (__m128); ++__m128i __lsx_vfrintrp_d (__m128d); ++__m128i __lsx_vfrintrp_s (__m128); ++__m128i __lsx_vfrintrz_d (__m128d); ++__m128i __lsx_vfrintrz_s (__m128); ++__m128 __lsx_vfrint_s (__m128); ++__m128d __lsx_vfrsqrt_d (__m128d); ++__m128 __lsx_vfrsqrt_s (__m128); ++__m128i __lsx_vfrstp_b (__m128i, __m128i, __m128i); ++__m128i __lsx_vfrstp_h (__m128i, __m128i, __m128i); ++__m128i __lsx_vfrstpi_b (__m128i, __m128i, imm0_31); ++__m128i __lsx_vfrstpi_h (__m128i, __m128i, imm0_31); ++__m128d __lsx_vfsqrt_d (__m128d); ++__m128 __lsx_vfsqrt_s (__m128); ++__m128d __lsx_vfsub_d (__m128d, __m128d); ++__m128 __lsx_vfsub_s (__m128, __m128); ++__m128i __lsx_vftinth_l_s (__m128); ++__m128i __lsx_vftint_l_d (__m128d); ++__m128i __lsx_vftintl_l_s (__m128); ++__m128i __lsx_vftint_lu_d (__m128d); ++__m128i __lsx_vftintrmh_l_s (__m128); ++__m128i __lsx_vftintrm_l_d (__m128d); ++__m128i __lsx_vftintrml_l_s (__m128); ++__m128i __lsx_vftintrm_w_d (__m128d, __m128d); ++__m128i __lsx_vftintrm_w_s (__m128); ++__m128i __lsx_vftintrneh_l_s (__m128); ++__m128i __lsx_vftintrne_l_d (__m128d); ++__m128i __lsx_vftintrnel_l_s (__m128); ++__m128i __lsx_vftintrne_w_d (__m128d, __m128d); ++__m128i __lsx_vftintrne_w_s (__m128); ++__m128i __lsx_vftintrph_l_s (__m128); ++__m128i __lsx_vftintrp_l_d (__m128d); ++__m128i __lsx_vftintrpl_l_s (__m128); ++__m128i __lsx_vftintrp_w_d (__m128d, __m128d); ++__m128i __lsx_vftintrp_w_s (__m128); ++__m128i __lsx_vftintrzh_l_s (__m128); ++__m128i __lsx_vftintrz_l_d (__m128d); ++__m128i __lsx_vftintrzl_l_s (__m128); ++__m128i __lsx_vftintrz_lu_d (__m128d); ++__m128i __lsx_vftintrz_w_d (__m128d, __m128d); ++__m128i __lsx_vftintrz_w_s (__m128); ++__m128i __lsx_vftintrz_wu_s (__m128); ++__m128i __lsx_vftint_w_d (__m128d, __m128d); ++__m128i __lsx_vftint_w_s (__m128); ++__m128i __lsx_vftint_wu_s (__m128); ++__m128i __lsx_vhaddw_du_wu (__m128i, __m128i); ++__m128i __lsx_vhaddw_d_w (__m128i, __m128i); ++__m128i __lsx_vhaddw_h_b (__m128i, __m128i); ++__m128i __lsx_vhaddw_hu_bu (__m128i, __m128i); ++__m128i __lsx_vhaddw_q_d (__m128i, __m128i); ++__m128i __lsx_vhaddw_qu_du (__m128i, __m128i); ++__m128i __lsx_vhaddw_w_h (__m128i, __m128i); ++__m128i __lsx_vhaddw_wu_hu (__m128i, __m128i); ++__m128i __lsx_vhsubw_du_wu (__m128i, __m128i); ++__m128i __lsx_vhsubw_d_w (__m128i, __m128i); ++__m128i __lsx_vhsubw_h_b (__m128i, __m128i); ++__m128i __lsx_vhsubw_hu_bu (__m128i, __m128i); ++__m128i __lsx_vhsubw_q_d (__m128i, __m128i); ++__m128i __lsx_vhsubw_qu_du (__m128i, __m128i); ++__m128i __lsx_vhsubw_w_h (__m128i, __m128i); ++__m128i __lsx_vhsubw_wu_hu (__m128i, __m128i); ++__m128i __lsx_vilvh_b (__m128i, __m128i); ++__m128i __lsx_vilvh_d (__m128i, __m128i); ++__m128i __lsx_vilvh_h (__m128i, __m128i); ++__m128i __lsx_vilvh_w (__m128i, __m128i); ++__m128i __lsx_vilvl_b (__m128i, __m128i); ++__m128i __lsx_vilvl_d (__m128i, __m128i); ++__m128i __lsx_vilvl_h (__m128i, __m128i); ++__m128i __lsx_vilvl_w (__m128i, __m128i); ++__m128i __lsx_vinsgr2vr_b (__m128i, int, imm0_15); ++__m128i __lsx_vinsgr2vr_d (__m128i, long int, imm0_1); ++__m128i __lsx_vinsgr2vr_h (__m128i, int, imm0_7); ++__m128i __lsx_vinsgr2vr_w (__m128i, int, imm0_3); ++__m128i __lsx_vld (void *, imm_n2048_2047) ++__m128i __lsx_vldi (imm_n1024_1023) ++__m128i __lsx_vldrepl_b (void *, imm_n2048_2047) ++__m128i __lsx_vldrepl_d (void *, imm_n256_255) ++__m128i __lsx_vldrepl_h (void *, imm_n1024_1023) ++__m128i __lsx_vldrepl_w (void *, imm_n512_511) ++__m128i __lsx_vldx (void *, long int); ++__m128i __lsx_vmadd_b (__m128i, __m128i, __m128i); ++__m128i __lsx_vmadd_d (__m128i, __m128i, __m128i); ++__m128i __lsx_vmadd_h (__m128i, __m128i, __m128i); ++__m128i __lsx_vmadd_w (__m128i, __m128i, __m128i); ++__m128i __lsx_vmaddwev_d_w (__m128i, __m128i, __m128i); ++__m128i __lsx_vmaddwev_d_wu (__m128i, __m128i, __m128i); ++__m128i __lsx_vmaddwev_d_wu_w (__m128i, __m128i, __m128i); ++__m128i __lsx_vmaddwev_h_b (__m128i, __m128i, __m128i); ++__m128i __lsx_vmaddwev_h_bu (__m128i, __m128i, __m128i); ++__m128i __lsx_vmaddwev_h_bu_b (__m128i, __m128i, __m128i); ++__m128i __lsx_vmaddwev_q_d (__m128i, __m128i, __m128i); ++__m128i __lsx_vmaddwev_q_du (__m128i, __m128i, __m128i); ++__m128i __lsx_vmaddwev_q_du_d (__m128i, __m128i, __m128i); ++__m128i __lsx_vmaddwev_w_h (__m128i, __m128i, __m128i); ++__m128i __lsx_vmaddwev_w_hu (__m128i, __m128i, __m128i); ++__m128i __lsx_vmaddwev_w_hu_h (__m128i, __m128i, __m128i); ++__m128i __lsx_vmaddwod_d_w (__m128i, __m128i, __m128i); ++__m128i __lsx_vmaddwod_d_wu (__m128i, __m128i, __m128i); ++__m128i __lsx_vmaddwod_d_wu_w (__m128i, __m128i, __m128i); ++__m128i __lsx_vmaddwod_h_b (__m128i, __m128i, __m128i); ++__m128i __lsx_vmaddwod_h_bu (__m128i, __m128i, __m128i); ++__m128i __lsx_vmaddwod_h_bu_b (__m128i, __m128i, __m128i); ++__m128i __lsx_vmaddwod_q_d (__m128i, __m128i, __m128i); ++__m128i __lsx_vmaddwod_q_du (__m128i, __m128i, __m128i); ++__m128i __lsx_vmaddwod_q_du_d (__m128i, __m128i, __m128i); ++__m128i __lsx_vmaddwod_w_h (__m128i, __m128i, __m128i); ++__m128i __lsx_vmaddwod_w_hu (__m128i, __m128i, __m128i); ++__m128i __lsx_vmaddwod_w_hu_h (__m128i, __m128i, __m128i); ++__m128i __lsx_vmax_b (__m128i, __m128i); ++__m128i __lsx_vmax_bu (__m128i, __m128i); ++__m128i __lsx_vmax_d (__m128i, __m128i); ++__m128i __lsx_vmax_du (__m128i, __m128i); ++__m128i __lsx_vmax_h (__m128i, __m128i); ++__m128i __lsx_vmax_hu (__m128i, __m128i); ++__m128i __lsx_vmaxi_b (__m128i, imm_n16_15) ++__m128i __lsx_vmaxi_bu (__m128i, imm0_31); ++__m128i __lsx_vmaxi_d (__m128i, imm_n16_15) ++__m128i __lsx_vmaxi_du (__m128i, imm0_31); ++__m128i __lsx_vmaxi_h (__m128i, imm_n16_15) ++__m128i __lsx_vmaxi_hu (__m128i, imm0_31); ++__m128i __lsx_vmaxi_w (__m128i, imm_n16_15) ++__m128i __lsx_vmaxi_wu (__m128i, imm0_31); ++__m128i __lsx_vmax_w (__m128i, __m128i); ++__m128i __lsx_vmax_wu (__m128i, __m128i); ++__m128i __lsx_vmin_b (__m128i, __m128i); ++__m128i __lsx_vmin_bu (__m128i, __m128i); ++__m128i __lsx_vmin_d (__m128i, __m128i); ++__m128i __lsx_vmin_du (__m128i, __m128i); ++__m128i __lsx_vmin_h (__m128i, __m128i); ++__m128i __lsx_vmin_hu (__m128i, __m128i); ++__m128i __lsx_vmini_b (__m128i, imm_n16_15) ++__m128i __lsx_vmini_bu (__m128i, imm0_31); ++__m128i __lsx_vmini_d (__m128i, imm_n16_15) ++__m128i __lsx_vmini_du (__m128i, imm0_31); ++__m128i __lsx_vmini_h (__m128i, imm_n16_15) ++__m128i __lsx_vmini_hu (__m128i, imm0_31); ++__m128i __lsx_vmini_w (__m128i, imm_n16_15) ++__m128i __lsx_vmini_wu (__m128i, imm0_31); ++__m128i __lsx_vmin_w (__m128i, __m128i); ++__m128i __lsx_vmin_wu (__m128i, __m128i); ++__m128i __lsx_vmod_b (__m128i, __m128i); ++__m128i __lsx_vmod_bu (__m128i, __m128i); ++__m128i __lsx_vmod_d (__m128i, __m128i); ++__m128i __lsx_vmod_du (__m128i, __m128i); ++__m128i __lsx_vmod_h (__m128i, __m128i); ++__m128i __lsx_vmod_hu (__m128i, __m128i); ++__m128i __lsx_vmod_w (__m128i, __m128i); ++__m128i __lsx_vmod_wu (__m128i, __m128i); ++__m128i __lsx_vmskgez_b (__m128i); ++__m128i __lsx_vmskltz_b (__m128i); ++__m128i __lsx_vmskltz_d (__m128i); ++__m128i __lsx_vmskltz_h (__m128i); ++__m128i __lsx_vmskltz_w (__m128i); ++__m128i __lsx_vmsknz_b (__m128i); ++__m128i __lsx_vmsub_b (__m128i, __m128i, __m128i); ++__m128i __lsx_vmsub_d (__m128i, __m128i, __m128i); ++__m128i __lsx_vmsub_h (__m128i, __m128i, __m128i); ++__m128i __lsx_vmsub_w (__m128i, __m128i, __m128i); ++__m128i __lsx_vmuh_b (__m128i, __m128i); ++__m128i __lsx_vmuh_bu (__m128i, __m128i); ++__m128i __lsx_vmuh_d (__m128i, __m128i); ++__m128i __lsx_vmuh_du (__m128i, __m128i); ++__m128i __lsx_vmuh_h (__m128i, __m128i); ++__m128i __lsx_vmuh_hu (__m128i, __m128i); ++__m128i __lsx_vmuh_w (__m128i, __m128i); ++__m128i __lsx_vmuh_wu (__m128i, __m128i); ++__m128i __lsx_vmul_b (__m128i, __m128i); ++__m128i __lsx_vmul_d (__m128i, __m128i); ++__m128i __lsx_vmul_h (__m128i, __m128i); ++__m128i __lsx_vmul_w (__m128i, __m128i); ++__m128i __lsx_vmulwev_d_w (__m128i, __m128i); ++__m128i __lsx_vmulwev_d_wu (__m128i, __m128i); ++__m128i __lsx_vmulwev_d_wu_w (__m128i, __m128i); ++__m128i __lsx_vmulwev_h_b (__m128i, __m128i); ++__m128i __lsx_vmulwev_h_bu (__m128i, __m128i); ++__m128i __lsx_vmulwev_h_bu_b (__m128i, __m128i); ++__m128i __lsx_vmulwev_q_d (__m128i, __m128i); ++__m128i __lsx_vmulwev_q_du (__m128i, __m128i); ++__m128i __lsx_vmulwev_q_du_d (__m128i, __m128i); ++__m128i __lsx_vmulwev_w_h (__m128i, __m128i); ++__m128i __lsx_vmulwev_w_hu (__m128i, __m128i); ++__m128i __lsx_vmulwev_w_hu_h (__m128i, __m128i); ++__m128i __lsx_vmulwod_d_w (__m128i, __m128i); ++__m128i __lsx_vmulwod_d_wu (__m128i, __m128i); ++__m128i __lsx_vmulwod_d_wu_w (__m128i, __m128i); ++__m128i __lsx_vmulwod_h_b (__m128i, __m128i); ++__m128i __lsx_vmulwod_h_bu (__m128i, __m128i); ++__m128i __lsx_vmulwod_h_bu_b (__m128i, __m128i); ++__m128i __lsx_vmulwod_q_d (__m128i, __m128i); ++__m128i __lsx_vmulwod_q_du (__m128i, __m128i); ++__m128i __lsx_vmulwod_q_du_d (__m128i, __m128i); ++__m128i __lsx_vmulwod_w_h (__m128i, __m128i); ++__m128i __lsx_vmulwod_w_hu (__m128i, __m128i); ++__m128i __lsx_vmulwod_w_hu_h (__m128i, __m128i); ++__m128i __lsx_vneg_b (__m128i); ++__m128i __lsx_vneg_d (__m128i); ++__m128i __lsx_vneg_h (__m128i); ++__m128i __lsx_vneg_w (__m128i); ++__m128i __lsx_vnori_b (__m128i, imm0_255); ++__m128i __lsx_vnor_v (__m128i, __m128i); ++__m128i __lsx_vori_b (__m128i, imm0_255); ++__m128i __lsx_vorn_v (__m128i, __m128i); ++__m128i __lsx_vor_v (__m128i, __m128i); ++__m128i __lsx_vpackev_b (__m128i, __m128i); ++__m128i __lsx_vpackev_d (__m128i, __m128i); ++__m128i __lsx_vpackev_h (__m128i, __m128i); ++__m128i __lsx_vpackev_w (__m128i, __m128i); ++__m128i __lsx_vpackod_b (__m128i, __m128i); ++__m128i __lsx_vpackod_d (__m128i, __m128i); ++__m128i __lsx_vpackod_h (__m128i, __m128i); ++__m128i __lsx_vpackod_w (__m128i, __m128i); ++__m128i __lsx_vpcnt_b (__m128i); ++__m128i __lsx_vpcnt_d (__m128i); ++__m128i __lsx_vpcnt_h (__m128i); ++__m128i __lsx_vpcnt_w (__m128i); ++__m128i __lsx_vpermi_w (__m128i, __m128i, imm0_255); ++__m128i __lsx_vpickev_b (__m128i, __m128i); ++__m128i __lsx_vpickev_d (__m128i, __m128i); ++__m128i __lsx_vpickev_h (__m128i, __m128i); ++__m128i __lsx_vpickev_w (__m128i, __m128i); ++__m128i __lsx_vpickod_b (__m128i, __m128i); ++__m128i __lsx_vpickod_d (__m128i, __m128i); ++__m128i __lsx_vpickod_h (__m128i, __m128i); ++__m128i __lsx_vpickod_w (__m128i, __m128i); ++int __lsx_vpickve2gr_b (__m128i, imm0_15); ++unsinged int __lsx_vpickve2gr_bu (__m128i, imm0_15); ++long int __lsx_vpickve2gr_d (__m128i, imm0_1); ++unsigned long int __lsx_vpickve2gr_du (__m128i, imm0_1); ++int __lsx_vpickve2gr_h (__m128i, imm0_7); ++unsinged int __lsx_vpickve2gr_hu (__m128i, imm0_7); ++int __lsx_vpickve2gr_w (__m128i, imm0_3); ++unsigned int __lsx_vpickve2gr_wu (__m128i, imm0_3); ++__m128i __lsx_vreplgr2vr_b (int); ++__m128i __lsx_vreplgr2vr_d (long int); ++__m128i __lsx_vreplgr2vr_h (int); ++__m128i __lsx_vreplgr2vr_w (int); ++__m128i __lsx_vrepli_b (imm_n512_511); ++__m128i __lsx_vrepli_d (imm_n512_511); ++__m128i __lsx_vrepli_h (imm_n512_511); ++__m128i __lsx_vrepli_w (imm_n512_511); ++__m128i __lsx_vreplve_b (__m128i, int); ++__m128i __lsx_vreplve_d (__m128i, int); ++__m128i __lsx_vreplve_h (__m128i, int); ++__m128i __lsx_vreplvei_b (__m128i, imm0_15); ++__m128i __lsx_vreplvei_d (__m128i, imm0_1); ++__m128i __lsx_vreplvei_h (__m128i, imm0_7); ++__m128i __lsx_vreplvei_w (__m128i, imm0_3); ++__m128i __lsx_vreplve_w (__m128i, int); ++__m128i __lsx_vrotr_b (__m128i, __m128i); ++__m128i __lsx_vrotr_d (__m128i, __m128i); ++__m128i __lsx_vrotr_h (__m128i, __m128i); ++__m128i __lsx_vrotri_b (__m128i, imm0_7); ++__m128i __lsx_vrotri_d (__m128i, imm0_63); ++__m128i __lsx_vrotri_h (__m128i, imm0_15); ++__m128i __lsx_vrotri_w (__m128i, imm0_31); ++__m128i __lsx_vrotr_w (__m128i, __m128i); ++__m128i __lsx_vsadd_b (__m128i, __m128i); ++__m128i __lsx_vsadd_bu (__m128i, __m128i); ++__m128i __lsx_vsadd_d (__m128i, __m128i); ++__m128i __lsx_vsadd_du (__m128i, __m128i); ++__m128i __lsx_vsadd_h (__m128i, __m128i); ++__m128i __lsx_vsadd_hu (__m128i, __m128i); ++__m128i __lsx_vsadd_w (__m128i, __m128i); ++__m128i __lsx_vsadd_wu (__m128i, __m128i); ++__m128i __lsx_vsat_b (__m128i, imm0_7); ++__m128i __lsx_vsat_bu (__m128i, imm0_7); ++__m128i __lsx_vsat_d (__m128i, imm0_63); ++__m128i __lsx_vsat_du (__m128i, imm0_63); ++__m128i __lsx_vsat_h (__m128i, imm0_15); ++__m128i __lsx_vsat_hu (__m128i, imm0_15); ++__m128i __lsx_vsat_w (__m128i, imm0_31); ++__m128i __lsx_vsat_wu (__m128i, imm0_31); ++__m128i __lsx_vseq_b (__m128i, __m128i); ++__m128i __lsx_vseq_d (__m128i, __m128i); ++__m128i __lsx_vseq_h (__m128i, __m128i); ++__m128i __lsx_vseqi_b (__m128i, imm_n16_15); ++__m128i __lsx_vseqi_d (__m128i, imm_n16_15); ++__m128i __lsx_vseqi_h (__m128i, imm_n16_15); ++__m128i __lsx_vseqi_w (__m128i, imm_n16_15); ++__m128i __lsx_vseq_w (__m128i, __m128i); ++__m128i __lsx_vshuf4i_b (__m128i, imm0_255); ++__m128i __lsx_vshuf4i_d (__m128i, __m128i, imm0_255); ++__m128i __lsx_vshuf4i_h (__m128i, imm0_255); ++__m128i __lsx_vshuf4i_w (__m128i, imm0_255); ++__m128i __lsx_vshuf_b (__m128i, __m128i, __m128i); ++__m128i __lsx_vshuf_d (__m128i, __m128i, __m128i); ++__m128i __lsx_vshuf_h (__m128i, __m128i, __m128i); ++__m128i __lsx_vshuf_w (__m128i, __m128i, __m128i); ++__m128i __lsx_vsigncov_b (__m128i, __m128i); ++__m128i __lsx_vsigncov_d (__m128i, __m128i); ++__m128i __lsx_vsigncov_h (__m128i, __m128i); ++__m128i __lsx_vsigncov_w (__m128i, __m128i); ++__m128i __lsx_vsigncov_b (__m128i, __m128i); ++__m128i __lsx_vsigncov_d (__m128i, __m128i); ++__m128i __lsx_vsigncov_h (__m128i, __m128i); ++__m128i __lsx_vsigncov_w (__m128i, __m128i); ++__m128i __lsx_vsle_b (__m128i, __m128i); ++__m128i __lsx_vsle_bu (__m128i, __m128i); ++__m128i __lsx_vsle_d (__m128i, __m128i); ++__m128i __lsx_vsle_du (__m128i, __m128i); ++__m128i __lsx_vsle_h (__m128i, __m128i); ++__m128i __lsx_vsle_hu (__m128i, __m128i); ++__m128i __lsx_vslei_b (__m128i, imm_n16_15); ++__m128i __lsx_vslei_bu (__m128i, imm0_31); ++__m128i __lsx_vslei_d (__m128i, imm_n16_15); ++__m128i __lsx_vslei_du (__m128i, imm0_31); ++__m128i __lsx_vslei_h (__m128i, imm_n16_15); ++__m128i __lsx_vslei_hu (__m128i, imm0_31); ++__m128i __lsx_vslei_w (__m128i, imm_n16_15); ++__m128i __lsx_vslei_wu (__m128i, imm0_31); ++__m128i __lsx_vsle_w (__m128i, __m128i); ++__m128i __lsx_vsle_wu (__m128i, __m128i); ++__m128i __lsx_vsll_b (__m128i, __m128i); ++__m128i __lsx_vsll_d (__m128i, __m128i); ++__m128i __lsx_vsll_h (__m128i, __m128i); ++__m128i __lsx_vslli_b (__m128i, imm0_7); ++__m128i __lsx_vslli_d (__m128i, imm0_63); ++__m128i __lsx_vslli_h (__m128i, imm0_15); ++__m128i __lsx_vslli_w (__m128i, imm0_31); ++__m128i __lsx_vsll_w (__m128i, __m128i); ++__m128i __lsx_vsllwil_du_wu (__m128i, imm0_31); ++__m128i __lsx_vsllwil_d_w (__m128i, imm0_31); ++__m128i __lsx_vsllwil_h_b (__m128i, imm0_7); ++__m128i __lsx_vsllwil_hu_bu (__m128i, imm0_7); ++__m128i __lsx_vsllwil_w_h (__m128i, imm0_15); ++__m128i __lsx_vsllwil_wu_hu (__m128i, imm0_15); ++__m128i __lsx_vslt_b (__m128i, __m128i); ++__m128i __lsx_vslt_bu (__m128i, __m128i); ++__m128i __lsx_vslt_d (__m128i, __m128i); ++__m128i __lsx_vslt_du (__m128i, __m128i); ++__m128i __lsx_vslt_h (__m128i, __m128i); ++__m128i __lsx_vslt_hu (__m128i, __m128i); ++__m128i __lsx_vslti_b (__m128i, imm_n16_15); ++__m128i __lsx_vslti_bu (__m128i, imm0_31); ++__m128i __lsx_vslti_d (__m128i, imm_n16_15); ++__m128i __lsx_vslti_du (__m128i, imm0_31); ++__m128i __lsx_vslti_h (__m128i, imm_n16_15); ++__m128i __lsx_vslti_hu (__m128i, imm0_31); ++__m128i __lsx_vslti_w (__m128i, imm_n16_15); ++__m128i __lsx_vslti_wu (__m128i, imm0_31); ++__m128i __lsx_vslt_w (__m128i, __m128i); ++__m128i __lsx_vslt_wu (__m128i, __m128i); ++__m128i __lsx_vsra_b (__m128i, __m128i); ++__m128i __lsx_vsra_d (__m128i, __m128i); ++__m128i __lsx_vsra_h (__m128i, __m128i); ++__m128i __lsx_vsrai_b (__m128i, imm0_7); ++__m128i __lsx_vsrai_d (__m128i, imm0_63); ++__m128i __lsx_vsrai_h (__m128i, imm0_15); ++__m128i __lsx_vsrai_w (__m128i, imm0_31); ++__m128i __lsx_vsran_b_h (__m128i, __m128i); ++__m128i __lsx_vsran_h_w (__m128i, __m128i); ++__m128i __lsx_vsrani_b_h (__m128i, __m128i, imm0_15); ++__m128i __lsx_vsrani_d_q (__m128i, __m128i, imm0_127) ++__m128i __lsx_vsrani_h_w (__m128i, __m128i, imm0_31); ++__m128i __lsx_vsrani_w_d (__m128i, __m128i, imm0_63); ++__m128i __lsx_vsran_w_d (__m128i, __m128i); ++__m128i __lsx_vsrar_b (__m128i, __m128i); ++__m128i __lsx_vsrar_d (__m128i, __m128i); ++__m128i __lsx_vsrar_h (__m128i, __m128i); ++__m128i __lsx_vsrari_b (__m128i, imm0_7); ++__m128i __lsx_vsrari_d (__m128i, imm0_63); ++__m128i __lsx_vsrari_h (__m128i, imm0_15); ++__m128i __lsx_vsrari_w (__m128i, imm0_31); ++__m128i __lsx_vsrarn_b_h (__m128i, __m128i); ++__m128i __lsx_vsrarn_h_w (__m128i, __m128i); ++__m128i __lsx_vsrarni_b_h (__m128i, __m128i, imm0_15); ++__m128i __lsx_vsrarni_d_q (__m128i, __m128i, imm0_127) ++__m128i __lsx_vsrarni_h_w (__m128i, __m128i, imm0_31); ++__m128i __lsx_vsrarni_w_d (__m128i, __m128i, imm0_63); ++__m128i __lsx_vsrarn_w_d (__m128i, __m128i); ++__m128i __lsx_vsrar_w (__m128i, __m128i); ++__m128i __lsx_vsra_w (__m128i, __m128i); ++__m128i __lsx_vsrl_b (__m128i, __m128i); ++__m128i __lsx_vsrl_d (__m128i, __m128i); ++__m128i __lsx_vsrl_h (__m128i, __m128i); ++__m128i __lsx_vsrli_b (__m128i, imm0_7); ++__m128i __lsx_vsrli_d (__m128i, imm0_63); ++__m128i __lsx_vsrli_h (__m128i, imm0_15); ++__m128i __lsx_vsrli_w (__m128i, imm0_31); ++__m128i __lsx_vsrln_b_h (__m128i, __m128i); ++__m128i __lsx_vsrln_h_w (__m128i, __m128i); ++__m128i __lsx_vsrlni_b_h (__m128i, __m128i, imm0_15); ++__m128i __lsx_vsrlni_d_q (__m128i, __m128i, imm0_127) ++__m128i __lsx_vsrlni_h_w (__m128i, __m128i, imm0_31); ++__m128i __lsx_vsrlni_w_d (__m128i, __m128i, imm0_63); ++__m128i __lsx_vsrln_w_d (__m128i, __m128i); ++__m128i __lsx_vsrlr_b (__m128i, __m128i); ++__m128i __lsx_vsrlr_d (__m128i, __m128i); ++__m128i __lsx_vsrlr_h (__m128i, __m128i); ++__m128i __lsx_vsrlri_b (__m128i, imm0_7); ++__m128i __lsx_vsrlri_d (__m128i, imm0_63); ++__m128i __lsx_vsrlri_h (__m128i, imm0_15); ++__m128i __lsx_vsrlri_w (__m128i, imm0_31); ++__m128i __lsx_vsrlrn_b_h (__m128i, __m128i); ++__m128i __lsx_vsrlrn_h_w (__m128i, __m128i); ++__m128i __lsx_vsrlrni_b_h (__m128i, __m128i, imm0_15); ++__m128i __lsx_vsrlrni_d_q (__m128i, __m128i, imm0_127) ++__m128i __lsx_vsrlrni_h_w (__m128i, __m128i, imm0_31); ++__m128i __lsx_vsrlrni_w_d (__m128i, __m128i, imm0_63); ++__m128i __lsx_vsrlrn_w_d (__m128i, __m128i); ++__m128i __lsx_vsrlr_w (__m128i, __m128i); ++__m128i __lsx_vsrl_w (__m128i, __m128i); ++__m128i __lsx_vssran_b_h (__m128i, __m128i); ++__m128i __lsx_vssran_bu_h (__m128i, __m128i); ++__m128i __lsx_vssran_hu_w (__m128i, __m128i); ++__m128i __lsx_vssran_h_w (__m128i, __m128i); ++__m128i __lsx_vssrani_b_h (__m128i, __m128i, imm0_15); ++__m128i __lsx_vssrani_bu_h (__m128i, __m128i, imm0_15); ++__m128i __lsx_vssrani_d_q (__m128i, __m128i, imm0_127) ++__m128i __lsx_vssrani_du_q (__m128i, __m128i, imm0_127) ++__m128i __lsx_vssrani_hu_w (__m128i, __m128i, imm0_31); ++__m128i __lsx_vssrani_h_w (__m128i, __m128i, imm0_31); ++__m128i __lsx_vssrani_w_d (__m128i, __m128i, imm0_63); ++__m128i __lsx_vssrani_wu_d (__m128i, __m128i, imm0_63); ++__m128i __lsx_vssran_w_d (__m128i, __m128i); ++__m128i __lsx_vssran_wu_d (__m128i, __m128i); ++__m128i __lsx_vssrarn_b_h (__m128i, __m128i); ++__m128i __lsx_vssrarn_bu_h (__m128i, __m128i); ++__m128i __lsx_vssrarn_hu_w (__m128i, __m128i); ++__m128i __lsx_vssrarn_h_w (__m128i, __m128i); ++__m128i __lsx_vssrarni_b_h (__m128i, __m128i, imm0_15); ++__m128i __lsx_vssrarni_bu_h (__m128i, __m128i, imm0_15); ++__m128i __lsx_vssrarni_d_q (__m128i, __m128i, imm0_127) ++__m128i __lsx_vssrarni_du_q (__m128i, __m128i, imm0_127) ++__m128i __lsx_vssrarni_hu_w (__m128i, __m128i, imm0_31); ++__m128i __lsx_vssrarni_h_w (__m128i, __m128i, imm0_31); ++__m128i __lsx_vssrarni_w_d (__m128i, __m128i, imm0_63); ++__m128i __lsx_vssrarni_wu_d (__m128i, __m128i, imm0_63); ++__m128i __lsx_vssrarn_w_d (__m128i, __m128i); ++__m128i __lsx_vssrarn_wu_d (__m128i, __m128i); ++__m128i __lsx_vssrln_b_h (__m128i, __m128i); ++__m128i __lsx_vssrln_bu_h (__m128i, __m128i); ++__m128i __lsx_vssrln_hu_w (__m128i, __m128i); ++__m128i __lsx_vssrln_h_w (__m128i, __m128i); ++__m128i __lsx_vssrlni_b_h (__m128i, __m128i, imm0_15); ++__m128i __lsx_vssrlni_bu_h (__m128i, __m128i, imm0_15); ++__m128i __lsx_vssrlni_d_q (__m128i, __m128i, imm0_127) ++__m128i __lsx_vssrlni_du_q (__m128i, __m128i, imm0_127) ++__m128i __lsx_vssrlni_hu_w (__m128i, __m128i, imm0_31); ++__m128i __lsx_vssrlni_h_w (__m128i, __m128i, imm0_31); ++__m128i __lsx_vssrlni_w_d (__m128i, __m128i, imm0_63); ++__m128i __lsx_vssrlni_wu_d (__m128i, __m128i, imm0_63); ++__m128i __lsx_vssrln_w_d (__m128i, __m128i); ++__m128i __lsx_vssrln_wu_d (__m128i, __m128i); ++__m128i __lsx_vssrlrn_b_h (__m128i, __m128i); ++__m128i __lsx_vssrlrn_bu_h (__m128i, __m128i); ++__m128i __lsx_vssrlrn_hu_w (__m128i, __m128i); ++__m128i __lsx_vssrlrn_h_w (__m128i, __m128i); ++__m128i __lsx_vssrlrni_b_h (__m128i, __m128i, imm0_15); ++__m128i __lsx_vssrlrni_bu_h (__m128i, __m128i, imm0_15); ++__m128i __lsx_vssrlrni_d_q (__m128i, __m128i, imm0_127) ++__m128i __lsx_vssrlrni_du_q (__m128i, __m128i, imm0_127) ++__m128i __lsx_vssrlrni_hu_w (__m128i, __m128i, imm0_31); ++__m128i __lsx_vssrlrni_h_w (__m128i, __m128i, imm0_31); ++__m128i __lsx_vssrlrni_w_d (__m128i, __m128i, imm0_63); ++__m128i __lsx_vssrlrni_wu_d (__m128i, __m128i, imm0_63); ++__m128i __lsx_vssrlrn_w_d (__m128i, __m128i); ++__m128i __lsx_vssrlrn_wu_d (__m128i, __m128i); ++__m128i __lsx_vssub_b (__m128i, __m128i); ++__m128i __lsx_vssub_bu (__m128i, __m128i); ++__m128i __lsx_vssub_d (__m128i, __m128i); ++__m128i __lsx_vssub_du (__m128i, __m128i); ++__m128i __lsx_vssub_h (__m128i, __m128i); ++__m128i __lsx_vssub_hu (__m128i, __m128i); ++__m128i __lsx_vssub_w (__m128i, __m128i); ++__m128i __lsx_vssub_wu (__m128i, __m128i); ++void __lsx_vst (__m128i, void *, imm_n2048_2047) ++void __lsx_vstelm_b (__m128i, void *, imm_n128_127, idx); ++void __lsx_vstelm_d (__m128i, void *, imm_n128_127, idx); ++void __lsx_vstelm_h (__m128i, void *, imm_n128_127, idx); ++void __lsx_vstelm_w (__m128i, void *, imm_n128_127, idx); ++void __lsx_vstx (__m128i, void *, long int) ++__m128i __lsx_vsub_b (__m128i, __m128i); ++__m128i __lsx_vsub_d (__m128i, __m128i); ++__m128i __lsx_vsub_h (__m128i, __m128i); ++__m128i __lsx_vsubi_bu (__m128i, imm0_31); ++__m128i __lsx_vsubi_du (__m128i, imm0_31); ++__m128i __lsx_vsubi_hu (__m128i, imm0_31); ++__m128i __lsx_vsubi_wu (__m128i, imm0_31); ++__m128i __lsx_vsub_q (__m128i, __m128i); ++__m128i __lsx_vsub_w (__m128i, __m128i); ++__m128i __lsx_vsubwev_d_w (__m128i, __m128i); ++__m128i __lsx_vsubwev_d_wu (__m128i, __m128i); ++__m128i __lsx_vsubwev_h_b (__m128i, __m128i); ++__m128i __lsx_vsubwev_h_bu (__m128i, __m128i); ++__m128i __lsx_vsubwev_q_d (__m128i, __m128i); ++__m128i __lsx_vsubwev_q_du (__m128i, __m128i); ++__m128i __lsx_vsubwev_w_h (__m128i, __m128i); ++__m128i __lsx_vsubwev_w_hu (__m128i, __m128i); ++__m128i __lsx_vsubwod_d_w (__m128i, __m128i); ++__m128i __lsx_vsubwod_d_wu (__m128i, __m128i); ++__m128i __lsx_vsubwod_h_b (__m128i, __m128i); ++__m128i __lsx_vsubwod_h_bu (__m128i, __m128i); ++__m128i __lsx_vsubwod_q_d (__m128i, __m128i); ++__m128i __lsx_vsubwod_q_du (__m128i, __m128i); ++__m128i __lsx_vsubwod_w_h (__m128i, __m128i); ++__m128i __lsx_vsubwod_w_hu (__m128i, __m128i); ++__m128i __lsx_vxori_b (__m128i, imm0_255); ++__m128i __lsx_vxor_v (__m128i, __m128i); ++@end smallexample ++ ++@node LoongArch ASX Vector Intrinsics ++@subsection LoongArch ASX Vector Intrinsics ++ ++GCC provides intrinsics to access the LASX (Loongson Advanced SIMD Extension) ++instructions. The interface is made available by including @code{<lasxintrin.h>} ++and using @option{-mlasx}. ++ ++The following vectors typedefs are included in @code{lasxintrin.h}: ++ ++@itemize ++@item @code{__m256i}, a 256-bit vector of fixed point; ++@item @code{__m256}, a 256-bit vector of single precision floating point; ++@item @code{__m256d}, a 256-bit vector of double precision floating point. ++@end itemize ++ ++Instructions and corresponding built-ins may have additional restrictions and/or ++input/output values manipulated: ++ ++@itemize ++@item @code{imm0_1}, an integer literal in range 0 to 1. ++@item @code{imm0_3}, an integer literal in range 0 to 3. ++@item @code{imm0_7}, an integer literal in range 0 to 7. ++@item @code{imm0_15}, an integer literal in range 0 to 15. ++@item @code{imm0_31}, an integer literal in range 0 to 31. ++@item @code{imm0_63}, an integer literal in range 0 to 63. ++@item @code{imm0_127}, an integer literal in range 0 to 127. ++@item @code{imm0_255}, an integer literal in range 0 to 255. ++@item @code{imm_n16_15}, an integer literal in range -16 to 15. ++@item @code{imm_n128_127}, an integer literal in range -128 to 127. ++@item @code{imm_n256_255}, an integer literal in range -256 to 255. ++@item @code{imm_n512_511}, an integer literal in range -512 to 511. ++@item @code{imm_n1024_1023}, an integer literal in range -1024 to 1023. ++@item @code{imm_n2048_2047}, an integer literal in range -2048 to 2047. ++@end itemize ++ ++For convenience, GCC defines functions @code{__lasx_xvrepli_@{b/h/w/d@}} and ++@code{__lasx_bnz_@{v/b/h/w/d@}}, which are implemented as follows: ++ ++@smallexample ++a. @code{__lasx_xvrepli_@{b/h/w/d@}}: Implemented the case where the highest ++ bit of @code{xvldi} instruction @code{i13} is 1. ++ ++ i1312 == 1'b0 ++ case i1311:10 of : ++ 2'b00: __lasx_xvrepli_b (imm_n512_511) ++ 2'b01: __lasx_xvrepli_h (imm_n512_511) ++ 2'b10: __lasx_xvrepli_w (imm_n512_511) ++ 2'b11: __lasx_xvrepli_d (imm_n512_511) ++ ++b. @code{__lasx_bnz_@{v/b/h/w/d@}}: Since the @code{xvseteqz} class directive ++ cannot be used on its own, this function is defined. ++ ++ __lasx_xbz_v => xvseteqz.v + bcnez ++ __lasx_xbnz_v => xvsetnez.v + bcnez ++ __lasx_xbz_b => xvsetanyeqz.b + bcnez ++ __lasx_xbz_h => xvsetanyeqz.h + bcnez ++ __lasx_xbz_w => xvsetanyeqz.w + bcnez ++ __lasx_xbz_d => xvsetanyeqz.d + bcnez ++ __lasx_xbnz_b => xvsetallnez.b + bcnez ++ __lasx_xbnz_h => xvsetallnez.h + bcnez ++ __lasx_xbnz_w => xvsetallnez.w + bcnez ++ __lasx_xbnz_d => xvsetallnez.d + bcnez ++@end smallexample ++ ++@smallexample ++eg: ++ #include <lasxintrin.h> ++ ++ extern __m256i @var{a}; ++ ++ void ++ test (void) ++ @{ ++ if (__lasx_xbz_v (@var{a})) ++ printf ("1\n"); ++ else ++ printf ("2\n"); ++ @} ++@end smallexample ++ ++@emph{Note:} For directives where the intent operand is also the source operand ++(modifying only part of the bitfield of the intent register), the first parameter ++in the builtin call function is used as the intent operand. ++ ++@smallexample ++eg: ++ #include <lasxintrin.h> ++ extern __m256i @var{dst}; ++ int @var{src}; ++ ++ void ++ test (void) ++ @{ ++ @var{dst} = __lasx_xvinsgr2vr_w (@var{dst}, @var{src}, 3); ++ @} ++@end smallexample ++ ++ ++The intrinsics provided are listed below: ++ ++@smallexample ++__m256i __lasx_vext2xv_d_b (__m256i); ++__m256i __lasx_vext2xv_d_h (__m256i); ++__m256i __lasx_vext2xv_du_bu (__m256i); ++__m256i __lasx_vext2xv_du_hu (__m256i); ++__m256i __lasx_vext2xv_du_wu (__m256i); ++__m256i __lasx_vext2xv_d_w (__m256i); ++__m256i __lasx_vext2xv_h_b (__m256i); ++__m256i __lasx_vext2xv_hu_bu (__m256i); ++__m256i __lasx_vext2xv_w_b (__m256i); ++__m256i __lasx_vext2xv_w_h (__m256i); ++__m256i __lasx_vext2xv_wu_bu (__m256i); ++__m256i __lasx_vext2xv_wu_hu (__m256i); ++int __lasx_xbnz_b (__m256i); ++int __lasx_xbnz_d (__m256i); ++int __lasx_xbnz_h (__m256i); ++int __lasx_xbnz_v (__m256i); ++int __lasx_xbnz_w (__m256i); ++int __lasx_xbz_b (__m256i); ++int __lasx_xbz_d (__m256i); ++int __lasx_xbz_h (__m256i); ++int __lasx_xbz_v (__m256i); ++int __lasx_xbz_w (__m256i); ++__m256i __lasx_xvabsd_b (__m256i, __m256i); ++__m256i __lasx_xvabsd_bu (__m256i, __m256i); ++__m256i __lasx_xvabsd_d (__m256i, __m256i); ++__m256i __lasx_xvabsd_du (__m256i, __m256i); ++__m256i __lasx_xvabsd_h (__m256i, __m256i); ++__m256i __lasx_xvabsd_hu (__m256i, __m256i); ++__m256i __lasx_xvabsd_w (__m256i, __m256i); ++__m256i __lasx_xvabsd_wu (__m256i, __m256i); ++__m256i __lasx_xvadda_b (__m256i, __m256i); ++__m256i __lasx_xvadda_d (__m256i, __m256i); ++__m256i __lasx_xvadda_h (__m256i, __m256i); ++__m256i __lasx_xvadda_w (__m256i, __m256i); ++__m256i __lasx_xvadd_b (__m256i, __m256i); ++__m256i __lasx_xvadd_d (__m256i, __m256i); ++__m256i __lasx_xvadd_h (__m256i, __m256i); ++__m256i __lasx_xvaddi_bu (__m256i, imm0_31); ++__m256i __lasx_xvaddi_du (__m256i, imm0_31); ++__m256i __lasx_xvaddi_hu (__m256i, imm0_31); ++__m256i __lasx_xvaddi_wu (__m256i, imm0_31); ++__m256i __lasx_xvadd_q (__m256i, __m256i); ++__m256i __lasx_xvadd_w (__m256i, __m256i); ++__m256i __lasx_xvaddwev_d_w (__m256i, __m256i); ++__m256i __lasx_xvaddwev_d_wu (__m256i, __m256i); ++__m256i __lasx_xvaddwev_d_wu_w (__m256i, __m256i); ++__m256i __lasx_xvaddwev_h_b (__m256i, __m256i); ++__m256i __lasx_xvaddwev_h_bu (__m256i, __m256i); ++__m256i __lasx_xvaddwev_h_bu_b (__m256i, __m256i); ++__m256i __lasx_xvaddwev_q_d (__m256i, __m256i); ++__m256i __lasx_xvaddwev_q_du (__m256i, __m256i); ++__m256i __lasx_xvaddwev_q_du_d (__m256i, __m256i); ++__m256i __lasx_xvaddwev_w_h (__m256i, __m256i); ++__m256i __lasx_xvaddwev_w_hu (__m256i, __m256i); ++__m256i __lasx_xvaddwev_w_hu_h (__m256i, __m256i); ++__m256i __lasx_xvaddwod_d_w (__m256i, __m256i); ++__m256i __lasx_xvaddwod_d_wu (__m256i, __m256i); ++__m256i __lasx_xvaddwod_d_wu_w (__m256i, __m256i); ++__m256i __lasx_xvaddwod_h_b (__m256i, __m256i); ++__m256i __lasx_xvaddwod_h_bu (__m256i, __m256i); ++__m256i __lasx_xvaddwod_h_bu_b (__m256i, __m256i); ++__m256i __lasx_xvaddwod_q_d (__m256i, __m256i); ++__m256i __lasx_xvaddwod_q_du (__m256i, __m256i); ++__m256i __lasx_xvaddwod_q_du_d (__m256i, __m256i); ++__m256i __lasx_xvaddwod_w_h (__m256i, __m256i); ++__m256i __lasx_xvaddwod_w_hu (__m256i, __m256i); ++__m256i __lasx_xvaddwod_w_hu_h (__m256i, __m256i); ++__m256i __lasx_xvandi_b (__m256i, imm0_255); ++__m256i __lasx_xvandn_v (__m256i, __m256i); ++__m256i __lasx_xvand_v (__m256i, __m256i); ++__m256i __lasx_xvavg_b (__m256i, __m256i); ++__m256i __lasx_xvavg_bu (__m256i, __m256i); ++__m256i __lasx_xvavg_d (__m256i, __m256i); ++__m256i __lasx_xvavg_du (__m256i, __m256i); ++__m256i __lasx_xvavg_h (__m256i, __m256i); ++__m256i __lasx_xvavg_hu (__m256i, __m256i); ++__m256i __lasx_xvavgr_b (__m256i, __m256i); ++__m256i __lasx_xvavgr_bu (__m256i, __m256i); ++__m256i __lasx_xvavgr_d (__m256i, __m256i); ++__m256i __lasx_xvavgr_du (__m256i, __m256i); ++__m256i __lasx_xvavgr_h (__m256i, __m256i); ++__m256i __lasx_xvavgr_hu (__m256i, __m256i); ++__m256i __lasx_xvavgr_w (__m256i, __m256i); ++__m256i __lasx_xvavgr_wu (__m256i, __m256i); ++__m256i __lasx_xvavg_w (__m256i, __m256i); ++__m256i __lasx_xvavg_wu (__m256i, __m256i); ++__m256i __lasx_xvbitclr_b (__m256i, __m256i); ++__m256i __lasx_xvbitclr_d (__m256i, __m256i); ++__m256i __lasx_xvbitclr_h (__m256i, __m256i); ++__m256i __lasx_xvbitclri_b (__m256i, imm0_7); ++__m256i __lasx_xvbitclri_d (__m256i, imm0_63); ++__m256i __lasx_xvbitclri_h (__m256i, imm0_15); ++__m256i __lasx_xvbitclri_w (__m256i, imm0_31); ++__m256i __lasx_xvbitclr_w (__m256i, __m256i); ++__m256i __lasx_xvbitrev_b (__m256i, __m256i); ++__m256i __lasx_xvbitrev_d (__m256i, __m256i); ++__m256i __lasx_xvbitrev_h (__m256i, __m256i); ++__m256i __lasx_xvbitrevi_b (__m256i, imm0_7); ++__m256i __lasx_xvbitrevi_d (__m256i, imm0_63); ++__m256i __lasx_xvbitrevi_h (__m256i, imm0_15); ++__m256i __lasx_xvbitrevi_w (__m256i, imm0_31); ++__m256i __lasx_xvbitrev_w (__m256i, __m256i); ++__m256i __lasx_xvbitseli_b (__m256i, __m256i, imm0_255); ++__m256i __lasx_xvbitsel_v (__m256i, __m256i, __m256i); ++__m256i __lasx_xvbitset_b (__m256i, __m256i); ++__m256i __lasx_xvbitset_d (__m256i, __m256i); ++__m256i __lasx_xvbitset_h (__m256i, __m256i); ++__m256i __lasx_xvbitseti_b (__m256i, imm0_7); ++__m256i __lasx_xvbitseti_d (__m256i, imm0_63); ++__m256i __lasx_xvbitseti_h (__m256i, imm0_15); ++__m256i __lasx_xvbitseti_w (__m256i, imm0_31); ++__m256i __lasx_xvbitset_w (__m256i, __m256i); ++__m256i __lasx_xvbsll_v (__m256i, imm0_31); ++__m256i __lasx_xvbsrl_v (__m256i, imm0_31); ++__m256i __lasx_xvclo_b (__m256i); ++__m256i __lasx_xvclo_d (__m256i); ++__m256i __lasx_xvclo_h (__m256i); ++__m256i __lasx_xvclo_w (__m256i); ++__m256i __lasx_xvclz_b (__m256i); ++__m256i __lasx_xvclz_d (__m256i); ++__m256i __lasx_xvclz_h (__m256i); ++__m256i __lasx_xvclz_w (__m256i); ++__m256i __lasx_xvdiv_b (__m256i, __m256i); ++__m256i __lasx_xvdiv_bu (__m256i, __m256i); ++__m256i __lasx_xvdiv_d (__m256i, __m256i); ++__m256i __lasx_xvdiv_du (__m256i, __m256i); ++__m256i __lasx_xvdiv_h (__m256i, __m256i); ++__m256i __lasx_xvdiv_hu (__m256i, __m256i); ++__m256i __lasx_xvdiv_w (__m256i, __m256i); ++__m256i __lasx_xvdiv_wu (__m256i, __m256i); ++__m256i __lasx_xvexth_du_wu (__m256i); ++__m256i __lasx_xvexth_d_w (__m256i); ++__m256i __lasx_xvexth_h_b (__m256i); ++__m256i __lasx_xvexth_hu_bu (__m256i); ++__m256i __lasx_xvexth_q_d (__m256i); ++__m256i __lasx_xvexth_qu_du (__m256i); ++__m256i __lasx_xvexth_w_h (__m256i); ++__m256i __lasx_xvexth_wu_hu (__m256i); ++__m256i __lasx_xvextl_q_d (__m256i); ++__m256i __lasx_xvextl_qu_du (__m256i); ++__m256i __lasx_xvextrins_b (__m256i, __m256i, imm0_255); ++__m256i __lasx_xvextrins_d (__m256i, __m256i, imm0_255); ++__m256i __lasx_xvextrins_h (__m256i, __m256i, imm0_255); ++__m256i __lasx_xvextrins_w (__m256i, __m256i, imm0_255); ++__m256d __lasx_xvfadd_d (__m256d, __m256d); ++__m256 __lasx_xvfadd_s (__m256, __m256); ++__m256i __lasx_xvfclass_d (__m256d); ++__m256i __lasx_xvfclass_s (__m256); ++__m256i __lasx_xvfcmp_caf_d (__m256d, __m256d); ++__m256i __lasx_xvfcmp_caf_s (__m256, __m256); ++__m256i __lasx_xvfcmp_ceq_d (__m256d, __m256d); ++__m256i __lasx_xvfcmp_ceq_s (__m256, __m256); ++__m256i __lasx_xvfcmp_cle_d (__m256d, __m256d); ++__m256i __lasx_xvfcmp_cle_s (__m256, __m256); ++__m256i __lasx_xvfcmp_clt_d (__m256d, __m256d); ++__m256i __lasx_xvfcmp_clt_s (__m256, __m256); ++__m256i __lasx_xvfcmp_cne_d (__m256d, __m256d); ++__m256i __lasx_xvfcmp_cne_s (__m256, __m256); ++__m256i __lasx_xvfcmp_cor_d (__m256d, __m256d); ++__m256i __lasx_xvfcmp_cor_s (__m256, __m256); ++__m256i __lasx_xvfcmp_cueq_d (__m256d, __m256d); ++__m256i __lasx_xvfcmp_cueq_s (__m256, __m256); ++__m256i __lasx_xvfcmp_cule_d (__m256d, __m256d); ++__m256i __lasx_xvfcmp_cule_s (__m256, __m256); ++__m256i __lasx_xvfcmp_cult_d (__m256d, __m256d); ++__m256i __lasx_xvfcmp_cult_s (__m256, __m256); ++__m256i __lasx_xvfcmp_cun_d (__m256d, __m256d); ++__m256i __lasx_xvfcmp_cune_d (__m256d, __m256d); ++__m256i __lasx_xvfcmp_cune_s (__m256, __m256); ++__m256i __lasx_xvfcmp_cun_s (__m256, __m256); ++__m256i __lasx_xvfcmp_saf_d (__m256d, __m256d); ++__m256i __lasx_xvfcmp_saf_s (__m256, __m256); ++__m256i __lasx_xvfcmp_seq_d (__m256d, __m256d); ++__m256i __lasx_xvfcmp_seq_s (__m256, __m256); ++__m256i __lasx_xvfcmp_sle_d (__m256d, __m256d); ++__m256i __lasx_xvfcmp_sle_s (__m256, __m256); ++__m256i __lasx_xvfcmp_slt_d (__m256d, __m256d); ++__m256i __lasx_xvfcmp_slt_s (__m256, __m256); ++__m256i __lasx_xvfcmp_sne_d (__m256d, __m256d); ++__m256i __lasx_xvfcmp_sne_s (__m256, __m256); ++__m256i __lasx_xvfcmp_sor_d (__m256d, __m256d); ++__m256i __lasx_xvfcmp_sor_s (__m256, __m256); ++__m256i __lasx_xvfcmp_sueq_d (__m256d, __m256d); ++__m256i __lasx_xvfcmp_sueq_s (__m256, __m256); ++__m256i __lasx_xvfcmp_sule_d (__m256d, __m256d); ++__m256i __lasx_xvfcmp_sule_s (__m256, __m256); ++__m256i __lasx_xvfcmp_sult_d (__m256d, __m256d); ++__m256i __lasx_xvfcmp_sult_s (__m256, __m256); ++__m256i __lasx_xvfcmp_sun_d (__m256d, __m256d); ++__m256i __lasx_xvfcmp_sune_d (__m256d, __m256d); ++__m256i __lasx_xvfcmp_sune_s (__m256, __m256); ++__m256i __lasx_xvfcmp_sun_s (__m256, __m256); ++__m256d __lasx_xvfcvth_d_s (__m256); ++__m256i __lasx_xvfcvt_h_s (__m256, __m256); ++__m256 __lasx_xvfcvth_s_h (__m256i); ++__m256d __lasx_xvfcvtl_d_s (__m256); ++__m256 __lasx_xvfcvtl_s_h (__m256i); ++__m256 __lasx_xvfcvt_s_d (__m256d, __m256d); ++__m256d __lasx_xvfdiv_d (__m256d, __m256d); ++__m256 __lasx_xvfdiv_s (__m256, __m256); ++__m256d __lasx_xvffint_d_l (__m256i); ++__m256d __lasx_xvffint_d_lu (__m256i); ++__m256d __lasx_xvffinth_d_w (__m256i); ++__m256d __lasx_xvffintl_d_w (__m256i); ++__m256 __lasx_xvffint_s_l (__m256i, __m256i); ++__m256 __lasx_xvffint_s_w (__m256i); ++__m256 __lasx_xvffint_s_wu (__m256i); ++__m256d __lasx_xvflogb_d (__m256d); ++__m256 __lasx_xvflogb_s (__m256); ++__m256d __lasx_xvfmadd_d (__m256d, __m256d, __m256d); ++__m256 __lasx_xvfmadd_s (__m256, __m256, __m256); ++__m256d __lasx_xvfmaxa_d (__m256d, __m256d); ++__m256 __lasx_xvfmaxa_s (__m256, __m256); ++__m256d __lasx_xvfmax_d (__m256d, __m256d); ++__m256 __lasx_xvfmax_s (__m256, __m256); ++__m256d __lasx_xvfmina_d (__m256d, __m256d); ++__m256 __lasx_xvfmina_s (__m256, __m256); ++__m256d __lasx_xvfmin_d (__m256d, __m256d); ++__m256 __lasx_xvfmin_s (__m256, __m256); ++__m256d __lasx_xvfmsub_d (__m256d, __m256d, __m256d); ++__m256 __lasx_xvfmsub_s (__m256, __m256, __m256); ++__m256d __lasx_xvfmul_d (__m256d, __m256d); ++__m256 __lasx_xvfmul_s (__m256, __m256); ++__m256d __lasx_xvfnmadd_d (__m256d, __m256d, __m256d); ++__m256 __lasx_xvfnmadd_s (__m256, __m256, __m256); ++__m256d __lasx_xvfnmsub_d (__m256d, __m256d, __m256d); ++__m256 __lasx_xvfnmsub_s (__m256, __m256, __m256); ++__m256d __lasx_xvfrecip_d (__m256d); ++__m256 __lasx_xvfrecip_s (__m256); ++__m256d __lasx_xvfrint_d (__m256d); ++__m256i __lasx_xvfrintrm_d (__m256d); ++__m256i __lasx_xvfrintrm_s (__m256); ++__m256i __lasx_xvfrintrne_d (__m256d); ++__m256i __lasx_xvfrintrne_s (__m256); ++__m256i __lasx_xvfrintrp_d (__m256d); ++__m256i __lasx_xvfrintrp_s (__m256); ++__m256i __lasx_xvfrintrz_d (__m256d); ++__m256i __lasx_xvfrintrz_s (__m256); ++__m256 __lasx_xvfrint_s (__m256); ++__m256d __lasx_xvfrsqrt_d (__m256d); ++__m256 __lasx_xvfrsqrt_s (__m256); ++__m256i __lasx_xvfrstp_b (__m256i, __m256i, __m256i); ++__m256i __lasx_xvfrstp_h (__m256i, __m256i, __m256i); ++__m256i __lasx_xvfrstpi_b (__m256i, __m256i, imm0_31); ++__m256i __lasx_xvfrstpi_h (__m256i, __m256i, imm0_31); ++__m256d __lasx_xvfsqrt_d (__m256d); ++__m256 __lasx_xvfsqrt_s (__m256); ++__m256d __lasx_xvfsub_d (__m256d, __m256d); ++__m256 __lasx_xvfsub_s (__m256, __m256); ++__m256i __lasx_xvftinth_l_s (__m256); ++__m256i __lasx_xvftint_l_d (__m256d); ++__m256i __lasx_xvftintl_l_s (__m256); ++__m256i __lasx_xvftint_lu_d (__m256d); ++__m256i __lasx_xvftintrmh_l_s (__m256); ++__m256i __lasx_xvftintrm_l_d (__m256d); ++__m256i __lasx_xvftintrml_l_s (__m256); ++__m256i __lasx_xvftintrm_w_d (__m256d, __m256d); ++__m256i __lasx_xvftintrm_w_s (__m256); ++__m256i __lasx_xvftintrneh_l_s (__m256); ++__m256i __lasx_xvftintrne_l_d (__m256d); ++__m256i __lasx_xvftintrnel_l_s (__m256); ++__m256i __lasx_xvftintrne_w_d (__m256d, __m256d); ++__m256i __lasx_xvftintrne_w_s (__m256); ++__m256i __lasx_xvftintrph_l_s (__m256); ++__m256i __lasx_xvftintrp_l_d (__m256d); ++__m256i __lasx_xvftintrpl_l_s (__m256); ++__m256i __lasx_xvftintrp_w_d (__m256d, __m256d); ++__m256i __lasx_xvftintrp_w_s (__m256); ++__m256i __lasx_xvftintrzh_l_s (__m256); ++__m256i __lasx_xvftintrz_l_d (__m256d); ++__m256i __lasx_xvftintrzl_l_s (__m256); ++__m256i __lasx_xvftintrz_lu_d (__m256d); ++__m256i __lasx_xvftintrz_w_d (__m256d, __m256d); ++__m256i __lasx_xvftintrz_w_s (__m256); ++__m256i __lasx_xvftintrz_wu_s (__m256); ++__m256i __lasx_xvftint_w_d (__m256d, __m256d); ++__m256i __lasx_xvftint_w_s (__m256); ++__m256i __lasx_xvftint_wu_s (__m256); ++__m256i __lasx_xvhaddw_du_wu (__m256i, __m256i); ++__m256i __lasx_xvhaddw_d_w (__m256i, __m256i); ++__m256i __lasx_xvhaddw_h_b (__m256i, __m256i); ++__m256i __lasx_xvhaddw_hu_bu (__m256i, __m256i); ++__m256i __lasx_xvhaddw_q_d (__m256i, __m256i); ++__m256i __lasx_xvhaddw_qu_du (__m256i, __m256i); ++__m256i __lasx_xvhaddw_w_h (__m256i, __m256i); ++__m256i __lasx_xvhaddw_wu_hu (__m256i, __m256i); ++__m256i __lasx_xvhsubw_du_wu (__m256i, __m256i); ++__m256i __lasx_xvhsubw_d_w (__m256i, __m256i); ++__m256i __lasx_xvhsubw_h_b (__m256i, __m256i); ++__m256i __lasx_xvhsubw_hu_bu (__m256i, __m256i); ++__m256i __lasx_xvhsubw_q_d (__m256i, __m256i); ++__m256i __lasx_xvhsubw_qu_du (__m256i, __m256i); ++__m256i __lasx_xvhsubw_w_h (__m256i, __m256i); ++__m256i __lasx_xvhsubw_wu_hu (__m256i, __m256i); ++__m256i __lasx_xvilvh_b (__m256i, __m256i); ++__m256i __lasx_xvilvh_d (__m256i, __m256i); ++__m256i __lasx_xvilvh_h (__m256i, __m256i); ++__m256i __lasx_xvilvh_w (__m256i, __m256i); ++__m256i __lasx_xvilvl_b (__m256i, __m256i); ++__m256i __lasx_xvilvl_d (__m256i, __m256i); ++__m256i __lasx_xvilvl_h (__m256i, __m256i); ++__m256i __lasx_xvilvl_w (__m256i, __m256i); ++__m256i __lasx_xvinsgr2vr_d (__m256i, long int, imm0_3); ++__m256i __lasx_xvinsgr2vr_w (__m256i, int, imm0_7); ++__m256i __lasx_xvinsve0_d (__m256i, __m256i, imm0_3); ++__m256i __lasx_xvinsve0_w (__m256i, __m256i, imm0_7); ++__m256i __lasx_xvld (void *, imm_n2048_2047); ++__m256i __lasx_xvldi (imm_n1024_1023); ++__m256i __lasx_xvldrepl_b (void *, imm_n2048_2047); ++__m256i __lasx_xvldrepl_d (void *, imm_n256_255); ++__m256i __lasx_xvldrepl_h (void *, imm_n1024_1023); ++__m256i __lasx_xvldrepl_w (void *, imm_n512_511); ++__m256i __lasx_xvldx (void *, long int); ++__m256i __lasx_xvmadd_b (__m256i, __m256i, __m256i); ++__m256i __lasx_xvmadd_d (__m256i, __m256i, __m256i); ++__m256i __lasx_xvmadd_h (__m256i, __m256i, __m256i); ++__m256i __lasx_xvmadd_w (__m256i, __m256i, __m256i); ++__m256i __lasx_xvmaddwev_d_w (__m256i, __m256i, __m256i); ++__m256i __lasx_xvmaddwev_d_wu (__m256i, __m256i, __m256i); ++__m256i __lasx_xvmaddwev_d_wu_w (__m256i, __m256i, __m256i); ++__m256i __lasx_xvmaddwev_h_b (__m256i, __m256i, __m256i); ++__m256i __lasx_xvmaddwev_h_bu (__m256i, __m256i, __m256i); ++__m256i __lasx_xvmaddwev_h_bu_b (__m256i, __m256i, __m256i); ++__m256i __lasx_xvmaddwev_q_d (__m256i, __m256i, __m256i); ++__m256i __lasx_xvmaddwev_q_du (__m256i, __m256i, __m256i); ++__m256i __lasx_xvmaddwev_q_du_d (__m256i, __m256i, __m256i); ++__m256i __lasx_xvmaddwev_w_h (__m256i, __m256i, __m256i); ++__m256i __lasx_xvmaddwev_w_hu (__m256i, __m256i, __m256i); ++__m256i __lasx_xvmaddwev_w_hu_h (__m256i, __m256i, __m256i); ++__m256i __lasx_xvmaddwod_d_w (__m256i, __m256i, __m256i); ++__m256i __lasx_xvmaddwod_d_wu (__m256i, __m256i, __m256i); ++__m256i __lasx_xvmaddwod_d_wu_w (__m256i, __m256i, __m256i); ++__m256i __lasx_xvmaddwod_h_b (__m256i, __m256i, __m256i); ++__m256i __lasx_xvmaddwod_h_bu (__m256i, __m256i, __m256i); ++__m256i __lasx_xvmaddwod_h_bu_b (__m256i, __m256i, __m256i); ++__m256i __lasx_xvmaddwod_q_d (__m256i, __m256i, __m256i); ++__m256i __lasx_xvmaddwod_q_du (__m256i, __m256i, __m256i); ++__m256i __lasx_xvmaddwod_q_du_d (__m256i, __m256i, __m256i); ++__m256i __lasx_xvmaddwod_w_h (__m256i, __m256i, __m256i); ++__m256i __lasx_xvmaddwod_w_hu (__m256i, __m256i, __m256i); ++__m256i __lasx_xvmaddwod_w_hu_h (__m256i, __m256i, __m256i); ++__m256i __lasx_xvmax_b (__m256i, __m256i); ++__m256i __lasx_xvmax_bu (__m256i, __m256i); ++__m256i __lasx_xvmax_d (__m256i, __m256i); ++__m256i __lasx_xvmax_du (__m256i, __m256i); ++__m256i __lasx_xvmax_h (__m256i, __m256i); ++__m256i __lasx_xvmax_hu (__m256i, __m256i); ++__m256i __lasx_xvmaxi_b (__m256i, imm_n16_15); ++__m256i __lasx_xvmaxi_bu (__m256i, imm0_31); ++__m256i __lasx_xvmaxi_d (__m256i, imm_n16_15); ++__m256i __lasx_xvmaxi_du (__m256i, imm0_31); ++__m256i __lasx_xvmaxi_h (__m256i, imm_n16_15); ++__m256i __lasx_xvmaxi_hu (__m256i, imm0_31); ++__m256i __lasx_xvmaxi_w (__m256i, imm_n16_15); ++__m256i __lasx_xvmaxi_wu (__m256i, imm0_31); ++__m256i __lasx_xvmax_w (__m256i, __m256i); ++__m256i __lasx_xvmax_wu (__m256i, __m256i); ++__m256i __lasx_xvmin_b (__m256i, __m256i); ++__m256i __lasx_xvmin_bu (__m256i, __m256i); ++__m256i __lasx_xvmin_d (__m256i, __m256i); ++__m256i __lasx_xvmin_du (__m256i, __m256i); ++__m256i __lasx_xvmin_h (__m256i, __m256i); ++__m256i __lasx_xvmin_hu (__m256i, __m256i); ++__m256i __lasx_xvmini_b (__m256i, imm_n16_15); ++__m256i __lasx_xvmini_bu (__m256i, imm0_31); ++__m256i __lasx_xvmini_d (__m256i, imm_n16_15); ++__m256i __lasx_xvmini_du (__m256i, imm0_31); ++__m256i __lasx_xvmini_h (__m256i, imm_n16_15); ++__m256i __lasx_xvmini_hu (__m256i, imm0_31); ++__m256i __lasx_xvmini_w (__m256i, imm_n16_15); ++__m256i __lasx_xvmini_wu (__m256i, imm0_31); ++__m256i __lasx_xvmin_w (__m256i, __m256i); ++__m256i __lasx_xvmin_wu (__m256i, __m256i); ++__m256i __lasx_xvmod_b (__m256i, __m256i); ++__m256i __lasx_xvmod_bu (__m256i, __m256i); ++__m256i __lasx_xvmod_d (__m256i, __m256i); ++__m256i __lasx_xvmod_du (__m256i, __m256i); ++__m256i __lasx_xvmod_h (__m256i, __m256i); ++__m256i __lasx_xvmod_hu (__m256i, __m256i); ++__m256i __lasx_xvmod_w (__m256i, __m256i); ++__m256i __lasx_xvmod_wu (__m256i, __m256i); ++__m256i __lasx_xvmskgez_b (__m256i); ++__m256i __lasx_xvmskltz_b (__m256i); ++__m256i __lasx_xvmskltz_d (__m256i); ++__m256i __lasx_xvmskltz_h (__m256i); ++__m256i __lasx_xvmskltz_w (__m256i); ++__m256i __lasx_xvmsknz_b (__m256i); ++__m256i __lasx_xvmsub_b (__m256i, __m256i, __m256i); ++__m256i __lasx_xvmsub_d (__m256i, __m256i, __m256i); ++__m256i __lasx_xvmsub_h (__m256i, __m256i, __m256i); ++__m256i __lasx_xvmsub_w (__m256i, __m256i, __m256i); ++__m256i __lasx_xvmuh_b (__m256i, __m256i); ++__m256i __lasx_xvmuh_bu (__m256i, __m256i); ++__m256i __lasx_xvmuh_d (__m256i, __m256i); ++__m256i __lasx_xvmuh_du (__m256i, __m256i); ++__m256i __lasx_xvmuh_h (__m256i, __m256i); ++__m256i __lasx_xvmuh_hu (__m256i, __m256i); ++__m256i __lasx_xvmuh_w (__m256i, __m256i); ++__m256i __lasx_xvmuh_wu (__m256i, __m256i); ++__m256i __lasx_xvmul_b (__m256i, __m256i); ++__m256i __lasx_xvmul_d (__m256i, __m256i); ++__m256i __lasx_xvmul_h (__m256i, __m256i); ++__m256i __lasx_xvmul_w (__m256i, __m256i); ++__m256i __lasx_xvmulwev_d_w (__m256i, __m256i); ++__m256i __lasx_xvmulwev_d_wu (__m256i, __m256i); ++__m256i __lasx_xvmulwev_d_wu_w (__m256i, __m256i); ++__m256i __lasx_xvmulwev_h_b (__m256i, __m256i); ++__m256i __lasx_xvmulwev_h_bu (__m256i, __m256i); ++__m256i __lasx_xvmulwev_h_bu_b (__m256i, __m256i); ++__m256i __lasx_xvmulwev_q_d (__m256i, __m256i); ++__m256i __lasx_xvmulwev_q_du (__m256i, __m256i); ++__m256i __lasx_xvmulwev_q_du_d (__m256i, __m256i); ++__m256i __lasx_xvmulwev_w_h (__m256i, __m256i); ++__m256i __lasx_xvmulwev_w_hu (__m256i, __m256i); ++__m256i __lasx_xvmulwev_w_hu_h (__m256i, __m256i); ++__m256i __lasx_xvmulwod_d_w (__m256i, __m256i); ++__m256i __lasx_xvmulwod_d_wu (__m256i, __m256i); ++__m256i __lasx_xvmulwod_d_wu_w (__m256i, __m256i); ++__m256i __lasx_xvmulwod_h_b (__m256i, __m256i); ++__m256i __lasx_xvmulwod_h_bu (__m256i, __m256i); ++__m256i __lasx_xvmulwod_h_bu_b (__m256i, __m256i); ++__m256i __lasx_xvmulwod_q_d (__m256i, __m256i); ++__m256i __lasx_xvmulwod_q_du (__m256i, __m256i); ++__m256i __lasx_xvmulwod_q_du_d (__m256i, __m256i); ++__m256i __lasx_xvmulwod_w_h (__m256i, __m256i); ++__m256i __lasx_xvmulwod_w_hu (__m256i, __m256i); ++__m256i __lasx_xvmulwod_w_hu_h (__m256i, __m256i); ++__m256i __lasx_xvneg_b (__m256i); ++__m256i __lasx_xvneg_d (__m256i); ++__m256i __lasx_xvneg_h (__m256i); ++__m256i __lasx_xvneg_w (__m256i); ++__m256i __lasx_xvnori_b (__m256i, imm0_255); ++__m256i __lasx_xvnor_v (__m256i, __m256i); ++__m256i __lasx_xvori_b (__m256i, imm0_255); ++__m256i __lasx_xvorn_v (__m256i, __m256i); ++__m256i __lasx_xvor_v (__m256i, __m256i); ++__m256i __lasx_xvpackev_b (__m256i, __m256i); ++__m256i __lasx_xvpackev_d (__m256i, __m256i); ++__m256i __lasx_xvpackev_h (__m256i, __m256i); ++__m256i __lasx_xvpackev_w (__m256i, __m256i); ++__m256i __lasx_xvpackod_b (__m256i, __m256i); ++__m256i __lasx_xvpackod_d (__m256i, __m256i); ++__m256i __lasx_xvpackod_h (__m256i, __m256i); ++__m256i __lasx_xvpackod_w (__m256i, __m256i); ++__m256i __lasx_xvpcnt_b (__m256i); ++__m256i __lasx_xvpcnt_d (__m256i); ++__m256i __lasx_xvpcnt_h (__m256i); ++__m256i __lasx_xvpcnt_w (__m256i); ++__m256i __lasx_xvpermi_d (__m256i, imm0_255); ++__m256i __lasx_xvpermi_q (__m256i, __m256i, imm0_255); ++__m256i __lasx_xvpermi_w (__m256i, __m256i, imm0_255); ++__m256i __lasx_xvperm_w (__m256i, __m256i); ++__m256i __lasx_xvpickev_b (__m256i, __m256i); ++__m256i __lasx_xvpickev_d (__m256i, __m256i); ++__m256i __lasx_xvpickev_h (__m256i, __m256i); ++__m256i __lasx_xvpickev_w (__m256i, __m256i); ++__m256i __lasx_xvpickod_b (__m256i, __m256i); ++__m256i __lasx_xvpickod_d (__m256i, __m256i); ++__m256i __lasx_xvpickod_h (__m256i, __m256i); ++__m256i __lasx_xvpickod_w (__m256i, __m256i); ++long int __lasx_xvpickve2gr_d (__m256i, imm0_3); ++unsigned long int __lasx_xvpickve2gr_du (__m256i, imm0_3); ++int __lasx_xvpickve2gr_w (__m256i, imm0_7); ++unsigned int __lasx_xvpickve2gr_wu (__m256i, imm0_7); ++__m256i __lasx_xvpickve_d (__m256i, imm0_3); ++__m256d __lasx_xvpickve_d_f (__m256d, imm0_3); ++__m256i __lasx_xvpickve_w (__m256i, imm0_7); ++__m256 __lasx_xvpickve_w_f (__m256, imm0_7); ++__m256i __lasx_xvrepl128vei_b (__m256i, imm0_15); ++__m256i __lasx_xvrepl128vei_d (__m256i, imm0_1); ++__m256i __lasx_xvrepl128vei_h (__m256i, imm0_7); ++__m256i __lasx_xvrepl128vei_w (__m256i, imm0_3); ++__m256i __lasx_xvreplgr2vr_b (int); ++__m256i __lasx_xvreplgr2vr_d (long int); ++__m256i __lasx_xvreplgr2vr_h (int); ++__m256i __lasx_xvreplgr2vr_w (int); ++__m256i __lasx_xvrepli_b (imm_n512_511); ++__m256i __lasx_xvrepli_d (imm_n512_511); ++__m256i __lasx_xvrepli_h (imm_n512_511); ++__m256i __lasx_xvrepli_w (imm_n512_511); ++__m256i __lasx_xvreplve0_b (__m256i); ++__m256i __lasx_xvreplve0_d (__m256i); ++__m256i __lasx_xvreplve0_h (__m256i); ++__m256i __lasx_xvreplve0_q (__m256i); ++__m256i __lasx_xvreplve0_w (__m256i); ++__m256i __lasx_xvreplve_b (__m256i, int); ++__m256i __lasx_xvreplve_d (__m256i, int); ++__m256i __lasx_xvreplve_h (__m256i, int); ++__m256i __lasx_xvreplve_w (__m256i, int); ++__m256i __lasx_xvrotr_b (__m256i, __m256i); ++__m256i __lasx_xvrotr_d (__m256i, __m256i); ++__m256i __lasx_xvrotr_h (__m256i, __m256i); ++__m256i __lasx_xvrotri_b (__m256i, imm0_7); ++__m256i __lasx_xvrotri_d (__m256i, imm0_63); ++__m256i __lasx_xvrotri_h (__m256i, imm0_15); ++__m256i __lasx_xvrotri_w (__m256i, imm0_31); ++__m256i __lasx_xvrotr_w (__m256i, __m256i); ++__m256i __lasx_xvsadd_b (__m256i, __m256i); ++__m256i __lasx_xvsadd_bu (__m256i, __m256i); ++__m256i __lasx_xvsadd_d (__m256i, __m256i); ++__m256i __lasx_xvsadd_du (__m256i, __m256i); ++__m256i __lasx_xvsadd_h (__m256i, __m256i); ++__m256i __lasx_xvsadd_hu (__m256i, __m256i); ++__m256i __lasx_xvsadd_w (__m256i, __m256i); ++__m256i __lasx_xvsadd_wu (__m256i, __m256i); ++__m256i __lasx_xvsat_b (__m256i, imm0_7); ++__m256i __lasx_xvsat_bu (__m256i, imm0_7); ++__m256i __lasx_xvsat_d (__m256i, imm0_63); ++__m256i __lasx_xvsat_du (__m256i, imm0_63); ++__m256i __lasx_xvsat_h (__m256i, imm0_15); ++__m256i __lasx_xvsat_hu (__m256i, imm0_15); ++__m256i __lasx_xvsat_w (__m256i, imm0_31); ++__m256i __lasx_xvsat_wu (__m256i, imm0_31); ++__m256i __lasx_xvseq_b (__m256i, __m256i); ++__m256i __lasx_xvseq_d (__m256i, __m256i); ++__m256i __lasx_xvseq_h (__m256i, __m256i); ++__m256i __lasx_xvseqi_b (__m256i, imm_n16_15); ++__m256i __lasx_xvseqi_d (__m256i, imm_n16_15); ++__m256i __lasx_xvseqi_h (__m256i, imm_n16_15); ++__m256i __lasx_xvseqi_w (__m256i, imm_n16_15); ++__m256i __lasx_xvseq_w (__m256i, __m256i); ++__m256i __lasx_xvshuf4i_b (__m256i, imm0_255); ++__m256i __lasx_xvshuf4i_d (__m256i, __m256i, imm0_255); ++__m256i __lasx_xvshuf4i_h (__m256i, imm0_255); ++__m256i __lasx_xvshuf4i_w (__m256i, imm0_255); ++__m256i __lasx_xvshuf_b (__m256i, __m256i, __m256i); ++__m256i __lasx_xvshuf_d (__m256i, __m256i, __m256i); ++__m256i __lasx_xvshuf_h (__m256i, __m256i, __m256i); ++__m256i __lasx_xvshuf_w (__m256i, __m256i, __m256i); ++__m256i __lasx_xvsigncov_b (__m256i, __m256i); ++__m256i __lasx_xvsigncov_d (__m256i, __m256i); ++__m256i __lasx_xvsigncov_h (__m256i, __m256i); ++__m256i __lasx_xvsigncov_w (__m256i, __m256i); ++__m256i __lasx_xvsle_b (__m256i, __m256i); ++__m256i __lasx_xvsle_bu (__m256i, __m256i); ++__m256i __lasx_xvsle_d (__m256i, __m256i); ++__m256i __lasx_xvsle_du (__m256i, __m256i); ++__m256i __lasx_xvsle_h (__m256i, __m256i); ++__m256i __lasx_xvsle_hu (__m256i, __m256i); ++__m256i __lasx_xvslei_b (__m256i, imm_n16_15); ++__m256i __lasx_xvslei_bu (__m256i, imm0_31); ++__m256i __lasx_xvslei_d (__m256i, imm_n16_15); ++__m256i __lasx_xvslei_du (__m256i, imm0_31); ++__m256i __lasx_xvslei_h (__m256i, imm_n16_15); ++__m256i __lasx_xvslei_hu (__m256i, imm0_31); ++__m256i __lasx_xvslei_w (__m256i, imm_n16_15); ++__m256i __lasx_xvslei_wu (__m256i, imm0_31); ++__m256i __lasx_xvsle_w (__m256i, __m256i); ++__m256i __lasx_xvsle_wu (__m256i, __m256i); ++__m256i __lasx_xvsll_b (__m256i, __m256i); ++__m256i __lasx_xvsll_d (__m256i, __m256i); ++__m256i __lasx_xvsll_h (__m256i, __m256i); ++__m256i __lasx_xvslli_b (__m256i, imm0_7); ++__m256i __lasx_xvslli_d (__m256i, imm0_63); ++__m256i __lasx_xvslli_h (__m256i, imm0_15); ++__m256i __lasx_xvslli_w (__m256i, imm0_31); ++__m256i __lasx_xvsll_w (__m256i, __m256i); ++__m256i __lasx_xvsllwil_du_wu (__m256i, imm0_31); ++__m256i __lasx_xvsllwil_d_w (__m256i, imm0_31); ++__m256i __lasx_xvsllwil_h_b (__m256i, imm0_7); ++__m256i __lasx_xvsllwil_hu_bu (__m256i, imm0_7); ++__m256i __lasx_xvsllwil_w_h (__m256i, imm0_15); ++__m256i __lasx_xvsllwil_wu_hu (__m256i, imm0_15); ++__m256i __lasx_xvslt_b (__m256i, __m256i); ++__m256i __lasx_xvslt_bu (__m256i, __m256i); ++__m256i __lasx_xvslt_d (__m256i, __m256i); ++__m256i __lasx_xvslt_du (__m256i, __m256i); ++__m256i __lasx_xvslt_h (__m256i, __m256i); ++__m256i __lasx_xvslt_hu (__m256i, __m256i); ++__m256i __lasx_xvslti_b (__m256i, imm_n16_15); ++__m256i __lasx_xvslti_bu (__m256i, imm0_31); ++__m256i __lasx_xvslti_d (__m256i, imm_n16_15); ++__m256i __lasx_xvslti_du (__m256i, imm0_31); ++__m256i __lasx_xvslti_h (__m256i, imm_n16_15); ++__m256i __lasx_xvslti_hu (__m256i, imm0_31); ++__m256i __lasx_xvslti_w (__m256i, imm_n16_15); ++__m256i __lasx_xvslti_wu (__m256i, imm0_31); ++__m256i __lasx_xvslt_w (__m256i, __m256i); ++__m256i __lasx_xvslt_wu (__m256i, __m256i); ++__m256i __lasx_xvsra_b (__m256i, __m256i); ++__m256i __lasx_xvsra_d (__m256i, __m256i); ++__m256i __lasx_xvsra_h (__m256i, __m256i); ++__m256i __lasx_xvsrai_b (__m256i, imm0_7); ++__m256i __lasx_xvsrai_d (__m256i, imm0_63); ++__m256i __lasx_xvsrai_h (__m256i, imm0_15); ++__m256i __lasx_xvsrai_w (__m256i, imm0_31); ++__m256i __lasx_xvsran_b_h (__m256i, __m256i); ++__m256i __lasx_xvsran_h_w (__m256i, __m256i); ++__m256i __lasx_xvsrani_b_h (__m256i, __m256i, imm0_15); ++__m256i __lasx_xvsrani_d_q (__m256i, __m256i, imm0_127); ++__m256i __lasx_xvsrani_h_w (__m256i, __m256i, imm0_31); ++__m256i __lasx_xvsrani_w_d (__m256i, __m256i, imm0_63); ++__m256i __lasx_xvsran_w_d (__m256i, __m256i); ++__m256i __lasx_xvsrar_b (__m256i, __m256i); ++__m256i __lasx_xvsrar_d (__m256i, __m256i); ++__m256i __lasx_xvsrar_h (__m256i, __m256i); ++__m256i __lasx_xvsrari_b (__m256i, imm0_7); ++__m256i __lasx_xvsrari_d (__m256i, imm0_63); ++__m256i __lasx_xvsrari_h (__m256i, imm0_15); ++__m256i __lasx_xvsrari_w (__m256i, imm0_31); ++__m256i __lasx_xvsrarn_b_h (__m256i, __m256i); ++__m256i __lasx_xvsrarn_h_w (__m256i, __m256i); ++__m256i __lasx_xvsrarni_b_h (__m256i, __m256i, imm0_15); ++__m256i __lasx_xvsrarni_d_q (__m256i, __m256i, imm0_127); ++__m256i __lasx_xvsrarni_h_w (__m256i, __m256i, imm0_31); ++__m256i __lasx_xvsrarni_w_d (__m256i, __m256i, imm0_63); ++__m256i __lasx_xvsrarn_w_d (__m256i, __m256i); ++__m256i __lasx_xvsrar_w (__m256i, __m256i); ++__m256i __lasx_xvsra_w (__m256i, __m256i); ++__m256i __lasx_xvsrl_b (__m256i, __m256i); ++__m256i __lasx_xvsrl_d (__m256i, __m256i); ++__m256i __lasx_xvsrl_h (__m256i, __m256i); ++__m256i __lasx_xvsrli_b (__m256i, imm0_7); ++__m256i __lasx_xvsrli_d (__m256i, imm0_63); ++__m256i __lasx_xvsrli_h (__m256i, imm0_15); ++__m256i __lasx_xvsrli_w (__m256i, imm0_31); ++__m256i __lasx_xvsrln_b_h (__m256i, __m256i); ++__m256i __lasx_xvsrln_h_w (__m256i, __m256i); ++__m256i __lasx_xvsrlni_b_h (__m256i, __m256i, imm0_15); ++__m256i __lasx_xvsrlni_d_q (__m256i, __m256i, imm0_127); ++__m256i __lasx_xvsrlni_h_w (__m256i, __m256i, imm0_31); ++__m256i __lasx_xvsrlni_w_d (__m256i, __m256i, imm0_63); ++__m256i __lasx_xvsrln_w_d (__m256i, __m256i); ++__m256i __lasx_xvsrlr_b (__m256i, __m256i); ++__m256i __lasx_xvsrlr_d (__m256i, __m256i); ++__m256i __lasx_xvsrlr_h (__m256i, __m256i); ++__m256i __lasx_xvsrlri_b (__m256i, imm0_7); ++__m256i __lasx_xvsrlri_d (__m256i, imm0_63); ++__m256i __lasx_xvsrlri_h (__m256i, imm0_15); ++__m256i __lasx_xvsrlri_w (__m256i, imm0_31); ++__m256i __lasx_xvsrlrn_b_h (__m256i, __m256i); ++__m256i __lasx_xvsrlrn_h_w (__m256i, __m256i); ++__m256i __lasx_xvsrlrni_b_h (__m256i, __m256i, imm0_15); ++__m256i __lasx_xvsrlrni_d_q (__m256i, __m256i, imm0_127); ++__m256i __lasx_xvsrlrni_h_w (__m256i, __m256i, imm0_31); ++__m256i __lasx_xvsrlrni_w_d (__m256i, __m256i, imm0_63); ++__m256i __lasx_xvsrlrn_w_d (__m256i, __m256i); ++__m256i __lasx_xvsrlr_w (__m256i, __m256i); ++__m256i __lasx_xvsrl_w (__m256i, __m256i); ++__m256i __lasx_xvssran_b_h (__m256i, __m256i); ++__m256i __lasx_xvssran_bu_h (__m256i, __m256i); ++__m256i __lasx_xvssran_hu_w (__m256i, __m256i); ++__m256i __lasx_xvssran_h_w (__m256i, __m256i); ++__m256i __lasx_xvssrani_b_h (__m256i, __m256i, imm0_15); ++__m256i __lasx_xvssrani_bu_h (__m256i, __m256i, imm0_15); ++__m256i __lasx_xvssrani_d_q (__m256i, __m256i, imm0_127); ++__m256i __lasx_xvssrani_du_q (__m256i, __m256i, imm0_127); ++__m256i __lasx_xvssrani_hu_w (__m256i, __m256i, imm0_31); ++__m256i __lasx_xvssrani_h_w (__m256i, __m256i, imm0_31); ++__m256i __lasx_xvssrani_w_d (__m256i, __m256i, imm0_63); ++__m256i __lasx_xvssrani_wu_d (__m256i, __m256i, imm0_63); ++__m256i __lasx_xvssran_w_d (__m256i, __m256i); ++__m256i __lasx_xvssran_wu_d (__m256i, __m256i); ++__m256i __lasx_xvssrarn_b_h (__m256i, __m256i); ++__m256i __lasx_xvssrarn_bu_h (__m256i, __m256i); ++__m256i __lasx_xvssrarn_hu_w (__m256i, __m256i); ++__m256i __lasx_xvssrarn_h_w (__m256i, __m256i); ++__m256i __lasx_xvssrarni_b_h (__m256i, __m256i, imm0_15); ++__m256i __lasx_xvssrarni_bu_h (__m256i, __m256i, imm0_15); ++__m256i __lasx_xvssrarni_d_q (__m256i, __m256i, imm0_127); ++__m256i __lasx_xvssrarni_du_q (__m256i, __m256i, imm0_127); ++__m256i __lasx_xvssrarni_hu_w (__m256i, __m256i, imm0_31); ++__m256i __lasx_xvssrarni_h_w (__m256i, __m256i, imm0_31); ++__m256i __lasx_xvssrarni_w_d (__m256i, __m256i, imm0_63); ++__m256i __lasx_xvssrarni_wu_d (__m256i, __m256i, imm0_63); ++__m256i __lasx_xvssrarn_w_d (__m256i, __m256i); ++__m256i __lasx_xvssrarn_wu_d (__m256i, __m256i); ++__m256i __lasx_xvssrln_b_h (__m256i, __m256i); ++__m256i __lasx_xvssrln_bu_h (__m256i, __m256i); ++__m256i __lasx_xvssrln_hu_w (__m256i, __m256i); ++__m256i __lasx_xvssrln_h_w (__m256i, __m256i); ++__m256i __lasx_xvssrlni_b_h (__m256i, __m256i, imm0_15); ++__m256i __lasx_xvssrlni_bu_h (__m256i, __m256i, imm0_15); ++__m256i __lasx_xvssrlni_d_q (__m256i, __m256i, imm0_127); ++__m256i __lasx_xvssrlni_du_q (__m256i, __m256i, imm0_127); ++__m256i __lasx_xvssrlni_hu_w (__m256i, __m256i, imm0_31); ++__m256i __lasx_xvssrlni_h_w (__m256i, __m256i, imm0_31); ++__m256i __lasx_xvssrlni_w_d (__m256i, __m256i, imm0_63); ++__m256i __lasx_xvssrlni_wu_d (__m256i, __m256i, imm0_63); ++__m256i __lasx_xvssrln_w_d (__m256i, __m256i); ++__m256i __lasx_xvssrln_wu_d (__m256i, __m256i); ++__m256i __lasx_xvssrlrn_b_h (__m256i, __m256i); ++__m256i __lasx_xvssrlrn_bu_h (__m256i, __m256i); ++__m256i __lasx_xvssrlrn_hu_w (__m256i, __m256i); ++__m256i __lasx_xvssrlrn_h_w (__m256i, __m256i); ++__m256i __lasx_xvssrlrni_b_h (__m256i, __m256i, imm0_15); ++__m256i __lasx_xvssrlrni_bu_h (__m256i, __m256i, imm0_15); ++__m256i __lasx_xvssrlrni_d_q (__m256i, __m256i, imm0_127); ++__m256i __lasx_xvssrlrni_du_q (__m256i, __m256i, imm0_127); ++__m256i __lasx_xvssrlrni_hu_w (__m256i, __m256i, imm0_31); ++__m256i __lasx_xvssrlrni_h_w (__m256i, __m256i, imm0_31); ++__m256i __lasx_xvssrlrni_w_d (__m256i, __m256i, imm0_63); ++__m256i __lasx_xvssrlrni_wu_d (__m256i, __m256i, imm0_63); ++__m256i __lasx_xvssrlrn_w_d (__m256i, __m256i); ++__m256i __lasx_xvssrlrn_wu_d (__m256i, __m256i); ++__m256i __lasx_xvssub_b (__m256i, __m256i); ++__m256i __lasx_xvssub_bu (__m256i, __m256i); ++__m256i __lasx_xvssub_d (__m256i, __m256i); ++__m256i __lasx_xvssub_du (__m256i, __m256i); ++__m256i __lasx_xvssub_h (__m256i, __m256i); ++__m256i __lasx_xvssub_hu (__m256i, __m256i); ++__m256i __lasx_xvssub_w (__m256i, __m256i); ++__m256i __lasx_xvssub_wu (__m256i, __m256i); ++void __lasx_xvst (__m256i, void *, imm_n2048_2047); ++void __lasx_xvstelm_b (__m256i, void *, imm_n128_127, idx); ++void __lasx_xvstelm_d (__m256i, void *, imm_n128_127, idx); ++void __lasx_xvstelm_h (__m256i, void *, imm_n128_127, idx); ++void __lasx_xvstelm_w (__m256i, void *, imm_n128_127, idx); ++void __lasx_xvstx (__m256i, void *, long int); ++__m256i __lasx_xvsub_b (__m256i, __m256i); ++__m256i __lasx_xvsub_d (__m256i, __m256i); ++__m256i __lasx_xvsub_h (__m256i, __m256i); ++__m256i __lasx_xvsubi_bu (__m256i, imm0_31); ++__m256i __lasx_xvsubi_du (__m256i, imm0_31); ++__m256i __lasx_xvsubi_hu (__m256i, imm0_31); ++__m256i __lasx_xvsubi_wu (__m256i, imm0_31); ++__m256i __lasx_xvsub_q (__m256i, __m256i); ++__m256i __lasx_xvsub_w (__m256i, __m256i); ++__m256i __lasx_xvsubwev_d_w (__m256i, __m256i); ++__m256i __lasx_xvsubwev_d_wu (__m256i, __m256i); ++__m256i __lasx_xvsubwev_h_b (__m256i, __m256i); ++__m256i __lasx_xvsubwev_h_bu (__m256i, __m256i); ++__m256i __lasx_xvsubwev_q_d (__m256i, __m256i); ++__m256i __lasx_xvsubwev_q_du (__m256i, __m256i); ++__m256i __lasx_xvsubwev_w_h (__m256i, __m256i); ++__m256i __lasx_xvsubwev_w_hu (__m256i, __m256i); ++__m256i __lasx_xvsubwod_d_w (__m256i, __m256i); ++__m256i __lasx_xvsubwod_d_wu (__m256i, __m256i); ++__m256i __lasx_xvsubwod_h_b (__m256i, __m256i); ++__m256i __lasx_xvsubwod_h_bu (__m256i, __m256i); ++__m256i __lasx_xvsubwod_q_d (__m256i, __m256i); ++__m256i __lasx_xvsubwod_q_du (__m256i, __m256i); ++__m256i __lasx_xvsubwod_w_h (__m256i, __m256i); ++__m256i __lasx_xvsubwod_w_hu (__m256i, __m256i); ++__m256i __lasx_xvxori_b (__m256i, imm0_255); ++__m256i __lasx_xvxor_v (__m256i, __m256i); ++@end smallexample ++ + @node MIPS DSP Built-in Functions + @subsection MIPS DSP Built-in Functions + +-- +2.43.0 +
View file
_service:tar_scm:0056-LoongArch-Switch-loongarch-def-from-C-to-C-to-make-i.patch
Added
@@ -0,0 +1,925 @@ +From 6c85d03940f87770a7e8b7195ffe45f99afef411 Mon Sep 17 00:00:00 2001 +From: Xi Ruoyao <xry111@xry111.site> +Date: Fri, 1 Dec 2023 10:09:33 +0800 +Subject: PATCH 056/188 LoongArch: Switch loongarch-def from C to C++ to make + it possible. + +We'll use HOST_WIDE_INT in LoongArch static properties in following patches. + +To keep the same readability as C99 designated initializers, create a +std::array like data structure with position setter function, and add +field setter functions for structs used in loongarch-def.cc. + +Remove unneeded guards #if +!defined(IN_LIBGCC2) && !defined(IN_TARGET_LIBS) && !defined(IN_RTS) +in loongarch-def.h and loongarch-opts.h. + +gcc/ChangeLog: + + * config/loongarch/loongarch-def.h: Remove extern "C". + (loongarch_isa_base_strings): Declare as loongarch_def_array + instead of plain array. + (loongarch_isa_ext_strings): Likewise. + (loongarch_abi_base_strings): Likewise. + (loongarch_abi_ext_strings): Likewise. + (loongarch_cmodel_strings): Likewise. + (loongarch_cpu_strings): Likewise. + (loongarch_cpu_default_isa): Likewise. + (loongarch_cpu_issue_rate): Likewise. + (loongarch_cpu_multipass_dfa_lookahead): Likewise. + (loongarch_cpu_cache): Likewise. + (loongarch_cpu_align): Likewise. + (loongarch_cpu_rtx_cost_data): Likewise. + (loongarch_isa): Add a constructor and field setter functions. + * config/loongarch/loongarch-opts.h (loongarch-defs.h): Do not + include for target libraries. + * config/loongarch/loongarch-opts.cc: Comment code that doesn't + run and causes compilation errors. + * config/loongarch/loongarch-tune.h (LOONGARCH_TUNE_H): Likewise. + (struct loongarch_rtx_cost_data): Likewise. + (struct loongarch_cache): Likewise. + (struct loongarch_align): Likewise. + * config/loongarch/t-loongarch: Compile loongarch-def.cc with the + C++ compiler. + * config/loongarch/loongarch-def-array.h: New file for a + std:array like data structure with position setter function. + * config/loongarch/loongarch-def.c: Rename to ... + * config/loongarch/loongarch-def.cc: ... here. + (loongarch_cpu_strings): Define as loongarch_def_array instead + of plain array. + (loongarch_cpu_default_isa): Likewise. + (loongarch_cpu_cache): Likewise. + (loongarch_cpu_align): Likewise. + (loongarch_cpu_rtx_cost_data): Likewise. + (loongarch_cpu_issue_rate): Likewise. + (loongarch_cpu_multipass_dfa_lookahead): Likewise. + (loongarch_isa_base_strings): Likewise. + (loongarch_isa_ext_strings): Likewise. + (loongarch_abi_base_strings): Likewise. + (loongarch_abi_ext_strings): Likewise. + (loongarch_cmodel_strings): Likewise. + (abi_minimal_isa): Likewise. + (loongarch_rtx_cost_optimize_size): Use field setter functions + instead of designated initializers. + (loongarch_rtx_cost_data): Implement default constructor. +--- + gcc/config/loongarch/loongarch-def-array.h | 40 ++++ + gcc/config/loongarch/loongarch-def.c | 227 --------------------- + gcc/config/loongarch/loongarch-def.cc | 187 +++++++++++++++++ + gcc/config/loongarch/loongarch-def.h | 55 ++--- + gcc/config/loongarch/loongarch-opts.cc | 7 + + gcc/config/loongarch/loongarch-opts.h | 5 +- + gcc/config/loongarch/loongarch-tune.h | 123 ++++++++++- + gcc/config/loongarch/t-loongarch | 4 +- + 8 files changed, 390 insertions(+), 258 deletions(-) + create mode 100644 gcc/config/loongarch/loongarch-def-array.h + delete mode 100644 gcc/config/loongarch/loongarch-def.c + create mode 100644 gcc/config/loongarch/loongarch-def.cc + +diff --git a/gcc/config/loongarch/loongarch-def-array.h b/gcc/config/loongarch/loongarch-def-array.h +new file mode 100644 +index 000000000..bdb3e9c6a +--- /dev/null ++++ b/gcc/config/loongarch/loongarch-def-array.h +@@ -0,0 +1,40 @@ ++/* A std::array like data structure for LoongArch static properties. ++ Copyright (C) 2023 Free Software Foundation, Inc. ++ ++This file is part of GCC. ++ ++GCC is free software; you can redistribute it and/or modify ++it under the terms of the GNU General Public License as published by ++the Free Software Foundation; either version 3, or (at your option) ++any later version. ++ ++GCC is distributed in the hope that it will be useful, ++but WITHOUT ANY WARRANTY; without even the implied warranty of ++MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++GNU General Public License for more details. ++ ++You should have received a copy of the GNU General Public License ++along with GCC; see the file COPYING3. If not see ++<http://www.gnu.org/licenses/>. */ ++ ++#ifndef _LOONGARCH_DEF_ARRAY_H ++#define _LOONGARCH_DEF_ARRAY_H 1 ++ ++template <class T, int N> ++class loongarch_def_array { ++private: ++ T arrN; ++public: ++ loongarch_def_array () : arr{} {} ++ ++ T &operator (int n) { return arrn; } ++ const T &operator (int n) const { return arrn; } ++ ++ loongarch_def_array set (int idx, T &&value) ++ { ++ (*this)idx = value; ++ return *this; ++ } ++}; ++ ++#endif +diff --git a/gcc/config/loongarch/loongarch-def.c b/gcc/config/loongarch/loongarch-def.c +deleted file mode 100644 +index fe4474e77..000000000 +--- a/gcc/config/loongarch/loongarch-def.c ++++ /dev/null +@@ -1,227 +0,0 @@ +-/* LoongArch static properties. +- Copyright (C) 2021-2022 Free Software Foundation, Inc. +- Contributed by Loongson Ltd. +- +-This file is part of GCC. +- +-GCC is free software; you can redistribute it and/or modify +-it under the terms of the GNU General Public License as published by +-the Free Software Foundation; either version 3, or (at your option) +-any later version. +- +-GCC is distributed in the hope that it will be useful, +-but WITHOUT ANY WARRANTY; without even the implied warranty of +-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +-GNU General Public License for more details. +- +-You should have received a copy of the GNU General Public License +-along with GCC; see the file COPYING3. If not see +-<http://www.gnu.org/licenses/>. */ +- +-#include "loongarch-def.h" +-#include "loongarch-str.h" +- +-/* CPU property tables. */ +-const char* +-loongarch_cpu_stringsN_TUNE_TYPES = { +- CPU_NATIVE = STR_CPU_NATIVE, +- CPU_ABI_DEFAULT = STR_CPU_ABI_DEFAULT, +- CPU_LOONGARCH64 = STR_CPU_LOONGARCH64, +- CPU_LA464 = STR_CPU_LA464, +- CPU_LA664 = STR_CPU_LA664, +-}; +- +-struct loongarch_isa +-loongarch_cpu_default_isaN_ARCH_TYPES = { +- CPU_LOONGARCH64 = { +- .base = ISA_BASE_LA64V100, +- .fpu = ISA_EXT_FPU64, +- .simd = 0, +- }, +- CPU_LA464 = { +- .base = ISA_BASE_LA64V100, +- .fpu = ISA_EXT_FPU64, +- .simd = ISA_EXT_SIMD_LASX, +- }, +- CPU_LA664 = { +- .base = ISA_BASE_LA64V110, +- .fpu = ISA_EXT_FPU64, +- .simd = ISA_EXT_SIMD_LASX, +- }, +-}; +- +-struct loongarch_cache +-loongarch_cpu_cacheN_TUNE_TYPES = { +- CPU_LOONGARCH64 = { +- .l1d_line_size = 64, +- .l1d_size = 64, +- .l2d_size = 256, +- .simultaneous_prefetches = 4, +- }, +- CPU_LA464 = { +- .l1d_line_size = 64, +- .l1d_size = 64, +- .l2d_size = 256, +- .simultaneous_prefetches = 4, +- }, +- CPU_LA664 = { +- .l1d_line_size = 64, +- .l1d_size = 64, +- .l2d_size = 256, +- .simultaneous_prefetches = 4, +- }, +-}; +- +-struct loongarch_align +-loongarch_cpu_alignN_TUNE_TYPES = { +- CPU_LOONGARCH64 = { +- .function = "32", +- .label = "16", +- }, +- CPU_LA464 = { +- .function = "32", +- .label = "16", +- }, +- CPU_LA664 = { +- .function = "32", +- .label = "16", +- }, +-}; +- +- +-/* Default RTX cost initializer. */ +-#define COSTS_N_INSNS(N) ((N) * 4) +-#define DEFAULT_COSTS \ +- .fp_add = COSTS_N_INSNS (1), \ +- .fp_mult_sf = COSTS_N_INSNS (2), \ +- .fp_mult_df = COSTS_N_INSNS (4), \ +- .fp_div_sf = COSTS_N_INSNS (6), \ +- .fp_div_df = COSTS_N_INSNS (8), \ +- .int_mult_si = COSTS_N_INSNS (1), \ +- .int_mult_di = COSTS_N_INSNS (1), \ +- .int_div_si = COSTS_N_INSNS (4), \ +- .int_div_di = COSTS_N_INSNS (6), \ +- .branch_cost = 6, \ +- .memory_latency = 4 +- +-/* The following properties cannot be looked up directly using "cpucfg". +- So it is necessary to provide a default value for "unknown native" +- tune targets (i.e. -mtune=native while PRID does not correspond to +- any known "-mtune" type). */ +- +-struct loongarch_rtx_cost_data +-loongarch_cpu_rtx_cost_dataN_TUNE_TYPES = { +- CPU_NATIVE = { +- DEFAULT_COSTS +- }, +- CPU_LOONGARCH64 = { +- DEFAULT_COSTS +- }, +- CPU_LA464 = { +- DEFAULT_COSTS +- }, +- CPU_LA664 = { +- DEFAULT_COSTS +- }, +-}; +- +-/* RTX costs to use when optimizing for size. */ +-const struct loongarch_rtx_cost_data +-loongarch_rtx_cost_optimize_size = { +- .fp_add = 4, +- .fp_mult_sf = 4, +- .fp_mult_df = 4, +- .fp_div_sf = 4, +- .fp_div_df = 4, +- .int_mult_si = 4, +- .int_mult_di = 4, +- .int_div_si = 4, +- .int_div_di = 4, +- .branch_cost = 6, +- .memory_latency = 4, +-}; +- +-int +-loongarch_cpu_issue_rateN_TUNE_TYPES = { +- CPU_NATIVE = 4, +- CPU_LOONGARCH64 = 4, +- CPU_LA464 = 4, +- CPU_LA664 = 6, +-}; +- +-int +-loongarch_cpu_multipass_dfa_lookaheadN_TUNE_TYPES = { +- CPU_NATIVE = 4, +- CPU_LOONGARCH64 = 4, +- CPU_LA464 = 4, +- CPU_LA664 = 6, +-}; +- +-/* Wiring string definitions from loongarch-str.h to global arrays +- with standard index values from loongarch-opts.h, so we can +- print config-related messages and do ABI self-spec filtering +- from the driver in a self-consistent manner. */ +- +-const char* +-loongarch_isa_base_stringsN_ISA_BASE_TYPES = { +- ISA_BASE_LA64V100 = STR_ISA_BASE_LA64V100, +- ISA_BASE_LA64V110 = STR_ISA_BASE_LA64V110, +-}; +- +-const char* +-loongarch_isa_ext_stringsN_ISA_EXT_TYPES = { +- ISA_EXT_NONE = STR_NONE, +- ISA_EXT_FPU32 = STR_ISA_EXT_FPU32, +- ISA_EXT_FPU64 = STR_ISA_EXT_FPU64, +- ISA_EXT_SIMD_LSX = STR_ISA_EXT_LSX, +- ISA_EXT_SIMD_LASX = STR_ISA_EXT_LASX, +-}; +- +-const char* +-loongarch_abi_base_stringsN_ABI_BASE_TYPES = { +- ABI_BASE_LP64D = STR_ABI_BASE_LP64D, +- ABI_BASE_LP64F = STR_ABI_BASE_LP64F, +- ABI_BASE_LP64S = STR_ABI_BASE_LP64S, +-}; +- +-const char* +-loongarch_abi_ext_stringsN_ABI_EXT_TYPES = { +- ABI_EXT_BASE = STR_ABI_EXT_BASE, +-}; +- +-const char* +-loongarch_cmodel_strings = { +- CMODEL_NORMAL = STR_CMODEL_NORMAL, +- CMODEL_TINY = STR_CMODEL_TINY, +- CMODEL_TINY_STATIC = STR_CMODEL_TS, +- CMODEL_MEDIUM = STR_CMODEL_MEDIUM, +- CMODEL_LARGE = STR_CMODEL_LARGE, +- CMODEL_EXTREME = STR_CMODEL_EXTREME, +-}; +- +- +-/* ABI-related definitions. */ +-const struct loongarch_isa +-abi_minimal_isaN_ABI_BASE_TYPESN_ABI_EXT_TYPES = { +- ABI_BASE_LP64D = { +- ABI_EXT_BASE = { +- .base = ISA_BASE_LA64V100, +- .fpu = ISA_EXT_FPU64, +- .simd = 0 +- }, +- }, +- ABI_BASE_LP64F = { +- ABI_EXT_BASE = { +- .base = ISA_BASE_LA64V100, +- .fpu = ISA_EXT_FPU32, +- .simd = 0 +- }, +- }, +- ABI_BASE_LP64S = { +- ABI_EXT_BASE = { +- .base = ISA_BASE_LA64V100, +- .fpu = ISA_EXT_NONE, +- .simd = 0 +- }, +- }, +-}; +diff --git a/gcc/config/loongarch/loongarch-def.cc b/gcc/config/loongarch/loongarch-def.cc +new file mode 100644 +index 000000000..6990c86c2 +--- /dev/null ++++ b/gcc/config/loongarch/loongarch-def.cc +@@ -0,0 +1,187 @@ ++/* LoongArch static properties. ++ Copyright (C) 2021-2023 Free Software Foundation, Inc. ++ Contributed by Loongson Ltd. ++ ++This file is part of GCC. ++ ++GCC is free software; you can redistribute it and/or modify ++it under the terms of the GNU General Public License as published by ++the Free Software Foundation; either version 3, or (at your option) ++any later version. ++ ++GCC is distributed in the hope that it will be useful, ++but WITHOUT ANY WARRANTY; without even the implied warranty of ++MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++GNU General Public License for more details. ++ ++You should have received a copy of the GNU General Public License ++along with GCC; see the file COPYING3. If not see ++<http://www.gnu.org/licenses/>. */ ++ ++#include "loongarch-def.h" ++#include "loongarch-str.h" ++ ++template <class T, int N> ++using array = loongarch_def_array<T, N>; ++ ++template <class T> ++using array_tune = array<T, N_TUNE_TYPES>; ++ ++template <class T> ++using array_arch = array<T, N_ARCH_TYPES>; ++ ++/* CPU property tables. */ ++array_tune<const char *> loongarch_cpu_strings = array_tune<const char *> () ++ .set (CPU_NATIVE, STR_CPU_NATIVE) ++ .set (CPU_ABI_DEFAULT, STR_CPU_ABI_DEFAULT) ++ .set (CPU_LOONGARCH64, STR_CPU_LOONGARCH64) ++ .set (CPU_LA464, STR_CPU_LA464) ++ .set (CPU_LA664, STR_CPU_LA664); ++ ++array_arch<loongarch_isa> loongarch_cpu_default_isa = ++ array_arch<loongarch_isa> () ++ .set (CPU_LOONGARCH64, ++ loongarch_isa () ++ .base_ (ISA_BASE_LA64V100) ++ .fpu_ (ISA_EXT_FPU64)) ++ .set (CPU_LA464, ++ loongarch_isa () ++ .base_ (ISA_BASE_LA64V100) ++ .fpu_ (ISA_EXT_FPU64) ++ .simd_ (ISA_EXT_SIMD_LASX)) ++ .set (CPU_LA664, ++ loongarch_isa () ++ .base_ (ISA_BASE_LA64V110) ++ .fpu_ (ISA_EXT_FPU64) ++ .simd_ (ISA_EXT_SIMD_LASX)); ++ ++static inline loongarch_cache la464_cache () ++{ ++ return loongarch_cache () ++ .l1d_line_size_ (64) ++ .l1d_size_ (64) ++ .l2d_size_ (256) ++ .simultaneous_prefetches_ (4); ++} ++ ++array_tune<loongarch_cache> loongarch_cpu_cache = ++ array_tune<loongarch_cache> () ++ .set (CPU_LOONGARCH64, la464_cache ()) ++ .set (CPU_LA464, la464_cache ()) ++ .set (CPU_LA664, la464_cache ()); ++ ++static inline loongarch_align la464_align () ++{ ++ return loongarch_align ().function_ ("32").label_ ("16"); ++} ++ ++array_tune<loongarch_align> loongarch_cpu_align = ++ array_tune<loongarch_align> () ++ .set (CPU_LOONGARCH64, la464_align ()) ++ .set (CPU_LA464, la464_align ()) ++ .set (CPU_LA664, la464_align ()); ++ ++#define COSTS_N_INSNS(N) ((N) * 4) ++ ++/* Default RTX cost initializer. */ ++loongarch_rtx_cost_data::loongarch_rtx_cost_data () ++ : fp_add (COSTS_N_INSNS (1)), ++ fp_mult_sf (COSTS_N_INSNS (2)), ++ fp_mult_df (COSTS_N_INSNS (4)), ++ fp_div_sf (COSTS_N_INSNS (6)), ++ fp_div_df (COSTS_N_INSNS (8)), ++ int_mult_si (COSTS_N_INSNS (1)), ++ int_mult_di (COSTS_N_INSNS (1)), ++ int_div_si (COSTS_N_INSNS (4)), ++ int_div_di (COSTS_N_INSNS (6)), ++ branch_cost (6), ++ memory_latency (4) {} ++ ++/* The following properties cannot be looked up directly using "cpucfg". ++ So it is necessary to provide a default value for "unknown native" ++ tune targets (i.e. -mtune=native while PRID does not correspond to ++ any known "-mtune" type). Currently all numbers are default. */ ++array_tune<loongarch_rtx_cost_data> loongarch_cpu_rtx_cost_data = ++ array_tune<loongarch_rtx_cost_data> (); ++ ++/* RTX costs to use when optimizing for size. */ ++const loongarch_rtx_cost_data loongarch_rtx_cost_optimize_size = ++ loongarch_rtx_cost_data () ++ .fp_add_ (4) ++ .fp_mult_sf_ (4) ++ .fp_mult_df_ (4) ++ .fp_div_sf_ (4) ++ .fp_div_df_ (4) ++ .int_mult_si_ (4) ++ .int_mult_di_ (4) ++ .int_div_si_ (4) ++ .int_div_di_ (4); ++ ++array_tune<int> loongarch_cpu_issue_rate = array_tune<int> () ++ .set (CPU_NATIVE, 4) ++ .set (CPU_LOONGARCH64, 4) ++ .set (CPU_LA464, 4) ++ .set (CPU_LA664, 6); ++ ++array_tune<int> loongarch_cpu_multipass_dfa_lookahead = array_tune<int> () ++ .set (CPU_NATIVE, 4) ++ .set (CPU_LOONGARCH64, 4) ++ .set (CPU_LA464, 4) ++ .set (CPU_LA664, 6); ++ ++/* Wiring string definitions from loongarch-str.h to global arrays ++ with standard index values from loongarch-opts.h, so we can ++ print config-related messages and do ABI self-spec filtering ++ from the driver in a self-consistent manner. */ ++ ++array<const char *, N_ISA_BASE_TYPES> loongarch_isa_base_strings = ++ array<const char *, N_ISA_BASE_TYPES> () ++ .set (ISA_BASE_LA64V100, STR_ISA_BASE_LA64V100) ++ .set (ISA_BASE_LA64V110, STR_ISA_BASE_LA64V110); ++ ++array<const char *, N_ISA_EXT_TYPES> loongarch_isa_ext_strings = ++ array<const char *, N_ISA_EXT_TYPES> () ++ .set (ISA_EXT_NONE, STR_NONE) ++ .set (ISA_EXT_FPU32, STR_ISA_EXT_FPU32) ++ .set (ISA_EXT_FPU64, STR_ISA_EXT_FPU64) ++ .set (ISA_EXT_SIMD_LSX, STR_ISA_EXT_LSX) ++ .set (ISA_EXT_SIMD_LASX, STR_ISA_EXT_LASX); ++ ++array<const char *, N_ABI_BASE_TYPES> loongarch_abi_base_strings = ++ array<const char *, N_ABI_BASE_TYPES> () ++ .set (ABI_BASE_LP64D, STR_ABI_BASE_LP64D) ++ .set (ABI_BASE_LP64F, STR_ABI_BASE_LP64F) ++ .set (ABI_BASE_LP64S, STR_ABI_BASE_LP64S); ++ ++array<const char *, N_ABI_EXT_TYPES> loongarch_abi_ext_strings = ++ array<const char *, N_ABI_EXT_TYPES> () ++ .set (ABI_EXT_BASE, STR_ABI_EXT_BASE); ++ ++array<const char *, N_CMODEL_TYPES> loongarch_cmodel_strings = ++ array<const char *, N_CMODEL_TYPES> () ++ .set (CMODEL_NORMAL, STR_CMODEL_NORMAL) ++ .set (CMODEL_TINY, STR_CMODEL_TINY) ++ .set (CMODEL_TINY_STATIC, STR_CMODEL_TS) ++ .set (CMODEL_MEDIUM, STR_CMODEL_MEDIUM) ++ .set (CMODEL_LARGE, STR_CMODEL_LARGE) ++ .set (CMODEL_EXTREME, STR_CMODEL_EXTREME); ++ ++array<array<loongarch_isa, N_ABI_EXT_TYPES>, N_ABI_BASE_TYPES> ++ abi_minimal_isa = array<array<loongarch_isa, N_ABI_EXT_TYPES>, ++ N_ABI_BASE_TYPES> () ++ .set (ABI_BASE_LP64D, ++ array<loongarch_isa, N_ABI_EXT_TYPES> () ++ .set (ABI_EXT_BASE, ++ loongarch_isa () ++ .base_ (ISA_BASE_LA64V100) ++ .fpu_ (ISA_EXT_FPU64))) ++ .set (ABI_BASE_LP64F, ++ array<loongarch_isa, N_ABI_EXT_TYPES> () ++ .set (ABI_EXT_BASE, ++ loongarch_isa () ++ .base_ (ISA_BASE_LA64V100) ++ .fpu_ (ISA_EXT_FPU32))) ++ .set (ABI_BASE_LP64S, ++ array<loongarch_isa, N_ABI_EXT_TYPES> () ++ .set (ABI_EXT_BASE, ++ loongarch_isa ().base_ (ISA_BASE_LA64V100))); +diff --git a/gcc/config/loongarch/loongarch-def.h b/gcc/config/loongarch/loongarch-def.h +index ef848f606..5ac70dfdd 100644 +--- a/gcc/config/loongarch/loongarch-def.h ++++ b/gcc/config/loongarch/loongarch-def.h +@@ -50,20 +50,18 @@ along with GCC; see the file COPYING3. If not see + #include <stdint.h> + #endif + ++#include "loongarch-def-array.h" + #include "loongarch-tune.h" + +-#ifdef __cplusplus +-extern "C" { +-#endif +- + /* enum isa_base */ +-extern const char* loongarch_isa_base_strings; + + /* LoongArch V1.00. */ + #define ISA_BASE_LA64V100 0 + /* LoongArch V1.10. */ + #define ISA_BASE_LA64V110 1 + #define N_ISA_BASE_TYPES 2 ++extern loongarch_def_array<const char *, N_ISA_BASE_TYPES> ++ loongarch_isa_base_strings; + + #if !defined(IN_LIBGCC2) && !defined(IN_TARGET_LIBS) && !defined(IN_RTS) + /* Unlike other arrays, this is defined in loongarch-cpu.cc. The problem is +@@ -72,7 +70,6 @@ extern int64_t loongarch_isa_base_features; + #endif + + /* enum isa_ext_* */ +-extern const char* loongarch_isa_ext_strings; + #define ISA_EXT_NONE 0 + #define ISA_EXT_FPU32 1 + #define ISA_EXT_FPU64 2 +@@ -80,13 +77,16 @@ extern const char* loongarch_isa_ext_strings; + #define ISA_EXT_SIMD_LSX 3 + #define ISA_EXT_SIMD_LASX 4 + #define N_ISA_EXT_TYPES 5 ++extern loongarch_def_array<const char *, N_ISA_EXT_TYPES> ++ loongarch_isa_ext_strings; + + /* enum abi_base */ +-extern const char* loongarch_abi_base_strings; + #define ABI_BASE_LP64D 0 + #define ABI_BASE_LP64F 1 + #define ABI_BASE_LP64S 2 + #define N_ABI_BASE_TYPES 3 ++extern loongarch_def_array<const char *, N_ABI_BASE_TYPES> ++ loongarch_abi_base_strings; + + #define TO_LP64_ABI_BASE(C) (C) + +@@ -99,12 +99,12 @@ extern const char* loongarch_abi_base_strings; + + + /* enum abi_ext */ +-extern const char* loongarch_abi_ext_strings; + #define ABI_EXT_BASE 0 + #define N_ABI_EXT_TYPES 1 ++extern loongarch_def_array<const char *, N_ABI_EXT_TYPES> ++ loongarch_abi_ext_strings; + + /* enum cmodel */ +-extern const char* loongarch_cmodel_strings; + #define CMODEL_NORMAL 0 + #define CMODEL_TINY 1 + #define CMODEL_TINY_STATIC 2 +@@ -112,6 +112,8 @@ extern const char* loongarch_cmodel_strings; + #define CMODEL_LARGE 4 + #define CMODEL_EXTREME 5 + #define N_CMODEL_TYPES 6 ++extern loongarch_def_array<const char *, N_CMODEL_TYPES> ++ loongarch_cmodel_strings; + + /* enum explicit_relocs */ + #define EXPLICIT_RELOCS_AUTO 0 +@@ -126,7 +128,6 @@ extern const char* loongarch_cmodel_strings; + #define M_OPT_ABSENT(opt_enum) ((opt_enum) == M_OPT_UNSET) + + +-#if !defined(IN_LIBGCC2) && !defined(IN_TARGET_LIBS) && !defined(IN_RTS) + /* Internal representation of the target. */ + struct loongarch_isa + { +@@ -139,6 +140,13 @@ struct loongarch_isa + + Using int64_t instead of HOST_WIDE_INT for C compatibility. */ + int64_t evolution; ++ ++ loongarch_isa () : base (0), fpu (0), simd (0), evolution (0) {} ++ loongarch_isa base_ (int _base) { base = _base; return *this; } ++ loongarch_isa fpu_ (int _fpu) { fpu = _fpu; return *this; } ++ loongarch_isa simd_ (int _simd) { simd = _simd; return *this; } ++ loongarch_isa evolution_ (int64_t _evolution) ++ { evolution = _evolution; return *this; } + }; + + struct loongarch_abi +@@ -156,9 +164,6 @@ struct loongarch_target + int cmodel; /* CMODEL_ */ + }; + +-extern struct loongarch_isa loongarch_cpu_default_isa; +-#endif +- + /* CPU properties. */ + /* index */ + #define CPU_NATIVE 0 +@@ -170,15 +175,19 @@ extern struct loongarch_isa loongarch_cpu_default_isa; + #define N_TUNE_TYPES 5 + + /* parallel tables. */ +-extern const char* loongarch_cpu_strings; +-extern int loongarch_cpu_issue_rate; +-extern int loongarch_cpu_multipass_dfa_lookahead; ++extern loongarch_def_array<const char *, N_ARCH_TYPES> ++ loongarch_cpu_strings; ++extern loongarch_def_array<loongarch_isa, N_ARCH_TYPES> ++ loongarch_cpu_default_isa; ++extern loongarch_def_array<int, N_TUNE_TYPES> ++ loongarch_cpu_issue_rate; ++extern loongarch_def_array<int, N_TUNE_TYPES> ++ loongarch_cpu_multipass_dfa_lookahead; ++extern loongarch_def_array<loongarch_cache, N_TUNE_TYPES> ++ loongarch_cpu_cache; ++extern loongarch_def_array<loongarch_align, N_TUNE_TYPES> ++ loongarch_cpu_align; ++extern loongarch_def_array<loongarch_rtx_cost_data, N_TUNE_TYPES> ++ loongarch_cpu_rtx_cost_data; + +-extern struct loongarch_cache loongarch_cpu_cache; +-extern struct loongarch_align loongarch_cpu_align; +-extern struct loongarch_rtx_cost_data loongarch_cpu_rtx_cost_data; +- +-#ifdef __cplusplus +-} +-#endif + #endif /* LOONGARCH_DEF_H */ +diff --git a/gcc/config/loongarch/loongarch-opts.cc b/gcc/config/loongarch/loongarch-opts.cc +index 390720479..45fc521e4 100644 +--- a/gcc/config/loongarch/loongarch-opts.cc ++++ b/gcc/config/loongarch/loongarch-opts.cc +@@ -163,6 +163,7 @@ loongarch_config_target (struct loongarch_target *target, + int follow_multilib_list_p) + { + struct loongarch_target t; ++ + if (!target) + return; + +@@ -657,12 +658,18 @@ abi_str (struct loongarch_abi abi) + strlen (loongarch_abi_base_stringsabi.base)); + else + { ++ /* This situation has not yet occurred, so in order to avoid the ++ -Warray-bounds warning during C++ syntax checking, this part ++ of the code is commented first. */ ++ /* + APPEND_STRING (loongarch_abi_base_stringsabi.base) + APPEND1 ('/') + APPEND_STRING (loongarch_abi_ext_stringsabi.ext) + APPEND1 ('\0') + + return XOBFINISH (&msg_obstack, const char *); ++ */ ++ gcc_unreachable (); + } + } + +diff --git a/gcc/config/loongarch/loongarch-opts.h b/gcc/config/loongarch/loongarch-opts.h +index 9b3d023ac..0dabf1551 100644 +--- a/gcc/config/loongarch/loongarch-opts.h ++++ b/gcc/config/loongarch/loongarch-opts.h +@@ -21,7 +21,10 @@ along with GCC; see the file COPYING3. If not see + #ifndef LOONGARCH_OPTS_H + #define LOONGARCH_OPTS_H + ++/* This is a C++ header and it shouldn't be used by target libraries. */ ++#if !defined(IN_LIBGCC2) && !defined(IN_TARGET_LIBS) && !defined(IN_RTS) + #include "loongarch-def.h" ++#endif + + /* Target configuration */ + extern struct loongarch_target la_target; +@@ -33,7 +36,6 @@ struct loongarch_flags { + int sx2; + }; + +-#if !defined(IN_LIBGCC2) && !defined(IN_TARGET_LIBS) && !defined(IN_RTS) + + /* Initialize loongarch_target from separate option variables. */ + void +@@ -54,7 +56,6 @@ void + loongarch_update_gcc_opt_status (struct loongarch_target *target, + struct gcc_options *opts, + struct gcc_options *opts_set); +-#endif + + + /* Macros for common conditional expressions used in loongarch.{c,h,md} */ +diff --git a/gcc/config/loongarch/loongarch-tune.h b/gcc/config/loongarch/loongarch-tune.h +index d961963f0..616b94e87 100644 +--- a/gcc/config/loongarch/loongarch-tune.h ++++ b/gcc/config/loongarch/loongarch-tune.h +@@ -21,6 +21,8 @@ along with GCC; see the file COPYING3. If not see + #ifndef LOONGARCH_TUNE_H + #define LOONGARCH_TUNE_H + ++#include "loongarch-def-array.h" ++ + /* RTX costs of various operations on the different architectures. */ + struct loongarch_rtx_cost_data + { +@@ -35,6 +37,76 @@ struct loongarch_rtx_cost_data + unsigned short int_div_di; + unsigned short branch_cost; + unsigned short memory_latency; ++ ++ /* Default RTX cost initializer, implemented in loongarch-def.cc. */ ++ loongarch_rtx_cost_data (); ++ ++ loongarch_rtx_cost_data fp_add_ (unsigned short _fp_add) ++ { ++ fp_add = _fp_add; ++ return *this; ++ } ++ ++ loongarch_rtx_cost_data fp_mult_sf_ (unsigned short _fp_mult_sf) ++ { ++ fp_mult_sf = _fp_mult_sf; ++ return *this; ++ } ++ ++ loongarch_rtx_cost_data fp_mult_df_ (unsigned short _fp_mult_df) ++ { ++ fp_mult_df = _fp_mult_df; ++ return *this; ++ } ++ ++ loongarch_rtx_cost_data fp_div_sf_ (unsigned short _fp_div_sf) ++ { ++ fp_div_sf = _fp_div_sf; ++ return *this; ++ } ++ ++ loongarch_rtx_cost_data fp_div_df_ (unsigned short _fp_div_df) ++ { ++ fp_div_df = _fp_div_df; ++ return *this; ++ } ++ ++ loongarch_rtx_cost_data int_mult_si_ (unsigned short _int_mult_si) ++ { ++ int_mult_si = _int_mult_si; ++ return *this; ++ } ++ ++ loongarch_rtx_cost_data int_mult_di_ (unsigned short _int_mult_di) ++ { ++ int_mult_di = _int_mult_di; ++ return *this; ++ } ++ ++ loongarch_rtx_cost_data int_div_si_ (unsigned short _int_div_si) ++ { ++ int_div_si = _int_div_si; ++ return *this; ++ } ++ ++ loongarch_rtx_cost_data int_div_di_ (unsigned short _int_div_di) ++ { ++ int_div_di = _int_div_di; ++ return *this; ++ } ++ ++ loongarch_rtx_cost_data branch_cost_ (unsigned short _branch_cost) ++ { ++ branch_cost = _branch_cost; ++ return *this; ++ } ++ ++ loongarch_rtx_cost_data memory_latency_ (unsigned short _memory_latency) ++ { ++ memory_latency = _memory_latency; ++ return *this; ++ } ++ + }; + + /* Costs to use when optimizing for size. */ +@@ -42,10 +114,39 @@ extern const struct loongarch_rtx_cost_data loongarch_rtx_cost_optimize_size; + + /* Cache size record of known processor models. */ + struct loongarch_cache { +- int l1d_line_size; /* bytes */ +- int l1d_size; /* KiB */ +- int l2d_size; /* kiB */ +- int simultaneous_prefetches; /* number of parallel prefetch */ ++ int l1d_line_size; /* bytes */ ++ int l1d_size; /* KiB */ ++ int l2d_size; /* kiB */ ++ int simultaneous_prefetches; /* number of parallel prefetch */ ++ ++ loongarch_cache () : l1d_line_size (0), ++ l1d_size (0), ++ l2d_size (0), ++ simultaneous_prefetches (0) {} ++ ++ loongarch_cache l1d_line_size_ (int _l1d_line_size) ++ { ++ l1d_line_size = _l1d_line_size; ++ return *this; ++ } ++ ++ loongarch_cache l1d_size_ (int _l1d_size) ++ { ++ l1d_size = _l1d_size; ++ return *this; ++ } ++ ++ loongarch_cache l2d_size_ (int _l2d_size) ++ { ++ l2d_size = _l2d_size; ++ return *this; ++ } ++ ++ loongarch_cache simultaneous_prefetches_ (int _simultaneous_prefetches) ++ { ++ simultaneous_prefetches = _simultaneous_prefetches; ++ return *this; ++ } + }; + + /* Alignment for functions and labels for best performance. For new uarchs +@@ -54,6 +155,20 @@ struct loongarch_cache { + struct loongarch_align { + const char *function; /* default value for -falign-functions */ + const char *label; /* default value for -falign-labels */ ++ ++ loongarch_align () : function (nullptr), label (nullptr) {} ++ ++ loongarch_align function_ (const char *_function) ++ { ++ function = _function; ++ return *this; ++ } ++ ++ loongarch_align label_ (const char *_label) ++ { ++ label = _label; ++ return *this; ++ } + }; + + #endif /* LOONGARCH_TUNE_H */ +diff --git a/gcc/config/loongarch/t-loongarch b/gcc/config/loongarch/t-loongarch +index 57b1176bc..a1a40431f 100644 +--- a/gcc/config/loongarch/t-loongarch ++++ b/gcc/config/loongarch/t-loongarch +@@ -64,8 +64,8 @@ loongarch-cpu.o: $(srcdir)/config/loongarch/loongarch-cpu.cc $(LA_STR_H) \ + $(srcdir)/config/loongarch/loongarch-cpucfg-map.h + $(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) $< + +-loongarch-def.o: $(srcdir)/config/loongarch/loongarch-def.c $(LA_STR_H) +- $(CC) -c $(ALL_CFLAGS) $(INCLUDES) $< ++loongarch-def.o: $(srcdir)/config/loongarch/loongarch-def.cc $(LA_STR_H) ++ $(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) $< + + $(srcdir)/config/loongarch/loongarch.opt: s-loongarch-opt ; @true + s-loongarch-opt: $(srcdir)/config/loongarch/genopts/genstr.sh \ +-- +2.43.0 +
View file
_service:tar_scm:0057-LoongArch-Remove-the-definition-of-ISA_BASE_LA64V110.patch
Added
@@ -0,0 +1,261 @@ +From 1ec35f153636077760b65dc3e0385d0a4d383486 Mon Sep 17 00:00:00 2001 +From: Lulu Cheng <chenglulu@loongson.cn> +Date: Fri, 1 Dec 2023 11:51:51 +0800 +Subject: PATCH 057/188 LoongArch: Remove the definition of ISA_BASE_LA64V110 + from the code. + +The instructions defined in LoongArch Reference Manual v1.1 are not the instruction +set v1.1 version. The CPU defined later may only support some instructions in +LoongArch Reference Manual v1.1. Therefore, the macro ISA_BASE_LA64V110 and +related definitions are removed here. + +gcc/ChangeLog: + + * config/loongarch/genopts/loongarch-strings: Delete STR_ISA_BASE_LA64V110. + * config/loongarch/genopts/loongarch.opt.in: Likewise. + * config/loongarch/loongarch-cpu.cc (ISA_BASE_LA64V110_FEATURES): Delete macro. + (fill_native_cpu_config): Define a new variable hw_isa_evolution record the + extended instruction set support read from cpucfg. + * config/loongarch/loongarch-def.cc: Set evolution at initialization. + * config/loongarch/loongarch-def.h (ISA_BASE_LA64V100): Delete. + (ISA_BASE_LA64V110): Likewise. + (N_ISA_BASE_TYPES): Likewise. + (defined): Likewise. + * config/loongarch/loongarch-opts.cc: Likewise. + * config/loongarch/loongarch-opts.h (TARGET_64BIT): Likewise. + (ISA_BASE_IS_LA64V110): Likewise. + * config/loongarch/loongarch-str.h (STR_ISA_BASE_LA64V110): Likewise. + * config/loongarch/loongarch.opt: Regenerate. +--- + .../loongarch/genopts/loongarch-strings | 1 - + gcc/config/loongarch/genopts/loongarch.opt.in | 3 --- + gcc/config/loongarch/loongarch-cpu.cc | 23 +++++-------------- + gcc/config/loongarch/loongarch-def.cc | 14 +++++++---- + gcc/config/loongarch/loongarch-def.h | 12 ++-------- + gcc/config/loongarch/loongarch-opts.cc | 3 --- + gcc/config/loongarch/loongarch-opts.h | 4 +--- + gcc/config/loongarch/loongarch-str.h | 1 - + gcc/config/loongarch/loongarch.opt | 3 --- + 9 files changed, 19 insertions(+), 45 deletions(-) + +diff --git a/gcc/config/loongarch/genopts/loongarch-strings b/gcc/config/loongarch/genopts/loongarch-strings +index 6c8a42af2..411ad5696 100644 +--- a/gcc/config/loongarch/genopts/loongarch-strings ++++ b/gcc/config/loongarch/genopts/loongarch-strings +@@ -30,7 +30,6 @@ STR_CPU_LA664 la664 + + # Base architecture + STR_ISA_BASE_LA64V100 la64 +-STR_ISA_BASE_LA64V110 la64v1.1 + + # -mfpu + OPTSTR_ISA_EXT_FPU fpu +diff --git a/gcc/config/loongarch/genopts/loongarch.opt.in b/gcc/config/loongarch/genopts/loongarch.opt.in +index a49de07c9..cd5e75e4f 100644 +--- a/gcc/config/loongarch/genopts/loongarch.opt.in ++++ b/gcc/config/loongarch/genopts/loongarch.opt.in +@@ -32,9 +32,6 @@ Basic ISAs of LoongArch: + EnumValue + Enum(isa_base) String(@@STR_ISA_BASE_LA64V100@@) Value(ISA_BASE_LA64V100) + +-EnumValue +-Enum(isa_base) String(@@STR_ISA_BASE_LA64V110@@) Value(ISA_BASE_LA64V110) +- + ;; ISA extensions / adjustments + Enum + Name(isa_ext_fpu) Type(int) +diff --git a/gcc/config/loongarch/loongarch-cpu.cc b/gcc/config/loongarch/loongarch-cpu.cc +index bbce82c9c..7e0625835 100644 +--- a/gcc/config/loongarch/loongarch-cpu.cc ++++ b/gcc/config/loongarch/loongarch-cpu.cc +@@ -23,7 +23,6 @@ along with GCC; see the file COPYING3. If not see + #include "config.h" + #include "system.h" + #include "coretypes.h" +-#include "tm.h" + #include "diagnostic-core.h" + + #include "loongarch-def.h" +@@ -32,19 +31,6 @@ along with GCC; see the file COPYING3. If not see + #include "loongarch-cpucfg-map.h" + #include "loongarch-str.h" + +-/* loongarch_isa_base_features defined here instead of loongarch-def.c +- because we need to use options.h. Pay attention on the order of elements +- in the initializer becaue ISO C++ does not allow C99 designated +- initializers! */ +- +-#define ISA_BASE_LA64V110_FEATURES \ +- (OPTION_MASK_ISA_DIV32 | OPTION_MASK_ISA_LD_SEQ_SA \ +- | OPTION_MASK_ISA_LAM_BH | OPTION_MASK_ISA_LAMCAS) +- +-int64_t loongarch_isa_base_featuresN_ISA_BASE_TYPES = { +- /* ISA_BASE_LA64V100 = */ 0, +- /* ISA_BASE_LA64V110 = */ ISA_BASE_LA64V110_FEATURES, +-}; + + /* Native CPU detection with "cpucfg" */ + static uint32_t cpucfg_cacheN_CPUCFG_WORDS = { 0 }; +@@ -235,18 +221,20 @@ fill_native_cpu_config (struct loongarch_target *tgt) + /* Use the native value anyways. */ + preset.simd = tmp; + ++ ++ int64_t hw_isa_evolution = 0; ++ + /* Features added during ISA evolution. */ + for (const auto &entry: cpucfg_map) + if (cpucfg_cacheentry.cpucfg_word & entry.cpucfg_bit) +- preset.evolution |= entry.isa_evolution_bit; ++ hw_isa_evolution |= entry.isa_evolution_bit; + + if (native_cpu_type != CPU_NATIVE) + { + /* Check if the local CPU really supports the features of the base + ISA of probed native_cpu_type. If any feature is not detected, + either GCC or the hardware is buggy. */ +- auto base_isa_feature = loongarch_isa_base_featurespreset.base; +- if ((preset.evolution & base_isa_feature) != base_isa_feature) ++ if ((preset.evolution & hw_isa_evolution) != hw_isa_evolution) + warning (0, + "detected base architecture %qs, but some of its " + "features are not detected; the detected base " +@@ -254,6 +242,7 @@ fill_native_cpu_config (struct loongarch_target *tgt) + "features will be enabled", + loongarch_isa_base_stringspreset.base); + } ++ preset.evolution = hw_isa_evolution; + } + + if (tune_native_p) +diff --git a/gcc/config/loongarch/loongarch-def.cc b/gcc/config/loongarch/loongarch-def.cc +index 6990c86c2..bc6997e45 100644 +--- a/gcc/config/loongarch/loongarch-def.cc ++++ b/gcc/config/loongarch/loongarch-def.cc +@@ -18,6 +18,11 @@ You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + <http://www.gnu.org/licenses/>. */ + ++#include "config.h" ++#include "system.h" ++#include "coretypes.h" ++#include "tm.h" ++ + #include "loongarch-def.h" + #include "loongarch-str.h" + +@@ -51,9 +56,11 @@ array_arch<loongarch_isa> loongarch_cpu_default_isa = + .simd_ (ISA_EXT_SIMD_LASX)) + .set (CPU_LA664, + loongarch_isa () +- .base_ (ISA_BASE_LA64V110) ++ .base_ (ISA_BASE_LA64V100) + .fpu_ (ISA_EXT_FPU64) +- .simd_ (ISA_EXT_SIMD_LASX)); ++ .simd_ (ISA_EXT_SIMD_LASX) ++ .evolution_ (OPTION_MASK_ISA_DIV32 | OPTION_MASK_ISA_LD_SEQ_SA ++ | OPTION_MASK_ISA_LAM_BH | OPTION_MASK_ISA_LAMCAS)); + + static inline loongarch_cache la464_cache () + { +@@ -136,8 +143,7 @@ array_tune<int> loongarch_cpu_multipass_dfa_lookahead = array_tune<int> () + + array<const char *, N_ISA_BASE_TYPES> loongarch_isa_base_strings = + array<const char *, N_ISA_BASE_TYPES> () +- .set (ISA_BASE_LA64V100, STR_ISA_BASE_LA64V100) +- .set (ISA_BASE_LA64V110, STR_ISA_BASE_LA64V110); ++ .set (ISA_BASE_LA64V100, STR_ISA_BASE_LA64V100); + + array<const char *, N_ISA_EXT_TYPES> loongarch_isa_ext_strings = + array<const char *, N_ISA_EXT_TYPES> () +diff --git a/gcc/config/loongarch/loongarch-def.h b/gcc/config/loongarch/loongarch-def.h +index 5ac70dfdd..f8f36f0e2 100644 +--- a/gcc/config/loongarch/loongarch-def.h ++++ b/gcc/config/loongarch/loongarch-def.h +@@ -56,19 +56,11 @@ along with GCC; see the file COPYING3. If not see + /* enum isa_base */ + + /* LoongArch V1.00. */ +-#define ISA_BASE_LA64V100 0 +-/* LoongArch V1.10. */ +-#define ISA_BASE_LA64V110 1 +-#define N_ISA_BASE_TYPES 2 ++#define ISA_BASE_LA64V100 0 ++#define N_ISA_BASE_TYPES 1 + extern loongarch_def_array<const char *, N_ISA_BASE_TYPES> + loongarch_isa_base_strings; + +-#if !defined(IN_LIBGCC2) && !defined(IN_TARGET_LIBS) && !defined(IN_RTS) +-/* Unlike other arrays, this is defined in loongarch-cpu.cc. The problem is +- we cannot use the C++ header options.h in loongarch-def.c. */ +-extern int64_t loongarch_isa_base_features; +-#endif +- + /* enum isa_ext_* */ + #define ISA_EXT_NONE 0 + #define ISA_EXT_FPU32 1 +diff --git a/gcc/config/loongarch/loongarch-opts.cc b/gcc/config/loongarch/loongarch-opts.cc +index 45fc521e4..d31becc67 100644 +--- a/gcc/config/loongarch/loongarch-opts.cc ++++ b/gcc/config/loongarch/loongarch-opts.cc +@@ -285,9 +285,6 @@ config_target_isa: + /* Get default ISA from "-march" or its default value. */ + t.isa = loongarch_cpu_default_isat.cpu_arch; + +- if (t.cpu_arch != CPU_NATIVE) +- t.isa.evolution |= loongarch_isa_base_featurest.isa.base; +- + /* Apply incremental changes. */ + /* "-march=native" overrides the default FPU type. */ + +diff --git a/gcc/config/loongarch/loongarch-opts.h b/gcc/config/loongarch/loongarch-opts.h +index 0dabf1551..7010ddfec 100644 +--- a/gcc/config/loongarch/loongarch-opts.h ++++ b/gcc/config/loongarch/loongarch-opts.h +@@ -77,8 +77,7 @@ loongarch_update_gcc_opt_status (struct loongarch_target *target, + #define TARGET_DOUBLE_FLOAT (la_target.isa.fpu == ISA_EXT_FPU64) + #define TARGET_DOUBLE_FLOAT_ABI (la_target.abi.base == ABI_BASE_LP64D) + +-#define TARGET_64BIT (la_target.isa.base == ISA_BASE_LA64V100 \ +- || la_target.isa.base == ISA_BASE_LA64V110) ++#define TARGET_64BIT (la_target.isa.base == ISA_BASE_LA64V100) + #define TARGET_ABI_LP64 (la_target.abi.base == ABI_BASE_LP64D \ + || la_target.abi.base == ABI_BASE_LP64F \ + || la_target.abi.base == ABI_BASE_LP64S) +@@ -90,7 +89,6 @@ loongarch_update_gcc_opt_status (struct loongarch_target *target, + /* TARGET_ macros for use in *.md template conditionals */ + #define TARGET_uARCH_LA464 (la_target.cpu_tune == CPU_LA464) + #define TARGET_uARCH_LA664 (la_target.cpu_tune == CPU_LA664) +-#define ISA_BASE_IS_LA64V110 (la_target.isa.base == ISA_BASE_LA64V110) + + /* Note: optimize_size may vary across functions, + while -mno-memcpy imposes a global constraint. */ +diff --git a/gcc/config/loongarch/loongarch-str.h b/gcc/config/loongarch/loongarch-str.h +index 0fee9abe5..7144bbe28 100644 +--- a/gcc/config/loongarch/loongarch-str.h ++++ b/gcc/config/loongarch/loongarch-str.h +@@ -33,7 +33,6 @@ along with GCC; see the file COPYING3. If not see + #define STR_CPU_LA664 "la664" + + #define STR_ISA_BASE_LA64V100 "la64" +-#define STR_ISA_BASE_LA64V110 "la64v1.1" + + #define OPTSTR_ISA_EXT_FPU "fpu" + #define STR_NONE "none" +diff --git a/gcc/config/loongarch/loongarch.opt b/gcc/config/loongarch/loongarch.opt +index ea0d5bb4e..7fe36feb9 100644 +--- a/gcc/config/loongarch/loongarch.opt ++++ b/gcc/config/loongarch/loongarch.opt +@@ -40,9 +40,6 @@ Basic ISAs of LoongArch: + EnumValue + Enum(isa_base) String(la64) Value(ISA_BASE_LA64V100) + +-EnumValue +-Enum(isa_base) String(la64v1.1) Value(ISA_BASE_LA64V110) +- + ;; ISA extensions / adjustments + Enum + Name(isa_ext_fpu) Type(int) +-- +2.43.0 +
View file
_service:tar_scm:0058-LoongArch-Add-support-for-xorsign.patch
Added
@@ -0,0 +1,412 @@ +From dac02bbb72cae374ddc905fffcc6c94c901f9b26 Mon Sep 17 00:00:00 2001 +From: Jiahao Xu <xujiahao@loongson.cn> +Date: Fri, 17 Nov 2023 17:00:21 +0800 +Subject: PATCH 058/188 LoongArch: Add support for xorsign. + +This patch adds support for xorsign pattern to scalar fp and vector. With the +new expands, uniformly using vector bitwise logical operations to handle xorsign. + +On LoongArch64, floating-point registers and vector registers share the same register, +so this patch also allows conversion between LSX vector mode and scalar fp mode to +avoid unnecessary instruction generation. + +gcc/ChangeLog: + + * config/loongarch/lasx.md (xorsign<mode>3): New expander. + * config/loongarch/loongarch.cc (loongarch_can_change_mode_class): Allow + conversion between LSX vector mode and scalar fp mode. + * config/loongarch/loongarch.md (@xorsign<mode>3): New expander. + * config/loongarch/lsx.md (@xorsign<mode>3): Ditto. + +gcc/testsuite/ChangeLog: + + * gcc.target/loongarch/vector/lasx/lasx-xorsign-run.c: New test. + * gcc.target/loongarch/vector/lasx/lasx-xorsign.c: New test. + * gcc.target/loongarch/vector/lsx/lsx-xorsign-run.c: New test. + * gcc.target/loongarch/vector/lsx/lsx-xorsign.c: New test. + * gcc.target/loongarch/xorsign-run.c: New test. + * gcc.target/loongarch/xorsign.c: New test. +--- + gcc/config/loongarch/lasx.md | 22 +++++-- + gcc/config/loongarch/loongarch.cc | 5 ++ + gcc/config/loongarch/loongarch.md | 17 ++++++ + gcc/config/loongarch/lsx.md | 23 +++++-- + .../loongarch/vector/lasx/lasx-xorsign-run.c | 60 +++++++++++++++++++ + .../loongarch/vector/lasx/lasx-xorsign.c | 19 ++++++ + .../loongarch/vector/lsx/lsx-xorsign-run.c | 60 +++++++++++++++++++ + .../loongarch/vector/lsx/lsx-xorsign.c | 19 ++++++ + .../gcc.target/loongarch/xorsign-run.c | 25 ++++++++ + gcc/testsuite/gcc.target/loongarch/xorsign.c | 18 ++++++ + 10 files changed, 260 insertions(+), 8 deletions(-) + create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xorsign-run.c + create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xorsign.c + create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-xorsign-run.c + create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-xorsign.c + create mode 100644 gcc/testsuite/gcc.target/loongarch/xorsign-run.c + create mode 100644 gcc/testsuite/gcc.target/loongarch/xorsign.c + +diff --git a/gcc/config/loongarch/lasx.md b/gcc/config/loongarch/lasx.md +index 116b30c07..de7c88f14 100644 +--- a/gcc/config/loongarch/lasx.md ++++ b/gcc/config/loongarch/lasx.md +@@ -1065,10 +1065,10 @@ + (set_attr "mode" "<MODE>")) + + (define_insn "xor<mode>3" +- (set (match_operand:ILASX 0 "register_operand" "=f,f,f") +- (xor:ILASX +- (match_operand:ILASX 1 "register_operand" "f,f,f") +- (match_operand:ILASX 2 "reg_or_vector_same_val_operand" "f,YC,Urv8"))) ++ (set (match_operand:LASX 0 "register_operand" "=f,f,f") ++ (xor:LASX ++ (match_operand:LASX 1 "register_operand" "f,f,f") ++ (match_operand:LASX 2 "reg_or_vector_same_val_operand" "f,YC,Urv8"))) + "ISA_HAS_LASX" + "@ + xvxor.v\t%u0,%u1,%u2 +@@ -3061,6 +3061,20 @@ + operands5 = gen_reg_rtx (<MODE>mode); + }) + ++(define_expand "xorsign<mode>3" ++ (set (match_dup 4) ++ (and:FLASX (match_dup 3) ++ (match_operand:FLASX 2 "register_operand"))) ++ (set (match_operand:FLASX 0 "register_operand") ++ (xor:FLASX (match_dup 4) ++ (match_operand:FLASX 1 "register_operand"))) ++ "ISA_HAS_LASX" ++{ ++ operands3 = loongarch_build_signbit_mask (<MODE>mode, 1, 0); ++ ++ operands4 = gen_reg_rtx (<MODE>mode); ++}) ++ + + (define_insn "absv4df2" + (set (match_operand:V4DF 0 "register_operand" "=f") +diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc +index 3ef7e3605..3c8ae9a42 100644 +--- a/gcc/config/loongarch/loongarch.cc ++++ b/gcc/config/loongarch/loongarch.cc +@@ -6703,6 +6703,11 @@ loongarch_can_change_mode_class (machine_mode from, machine_mode to, + if (LSX_SUPPORTED_MODE_P (from) && LSX_SUPPORTED_MODE_P (to)) + return true; + ++ /* Allow conversion between LSX vector mode and scalar fp mode. */ ++ if ((LSX_SUPPORTED_MODE_P (from) && SCALAR_FLOAT_MODE_P (to)) ++ || ((SCALAR_FLOAT_MODE_P (from) && LSX_SUPPORTED_MODE_P (to)))) ++ return true; ++ + return !reg_classes_intersect_p (FP_REGS, rclass); + } + +diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md +index cfd7a8ec6..afc3c591f 100644 +--- a/gcc/config/loongarch/loongarch.md ++++ b/gcc/config/loongarch/loongarch.md +@@ -1164,6 +1164,23 @@ + "fcopysign.<fmt>\t%0,%1,%2" + (set_attr "type" "fcopysign") + (set_attr "mode" "<UNITMODE>")) ++ ++(define_expand "@xorsign<mode>3" ++ (match_operand:ANYF 0 "register_operand") ++ (match_operand:ANYF 1 "register_operand") ++ (match_operand:ANYF 2 "register_operand") ++ "ISA_HAS_LSX" ++{ ++ machine_mode lsx_mode ++ = <MODE>mode == SFmode ? V4SFmode : V2DFmode; ++ rtx tmp = gen_reg_rtx (lsx_mode); ++ rtx op1 = lowpart_subreg (lsx_mode, operands1, <MODE>mode); ++ rtx op2 = lowpart_subreg (lsx_mode, operands2, <MODE>mode); ++ emit_insn (gen_xorsign3 (lsx_mode, tmp, op1, op2)); ++ emit_move_insn (operands0, ++ lowpart_subreg (<MODE>mode, tmp, lsx_mode)); ++ DONE; ++}) +  + ;; + ;; .................... +diff --git a/gcc/config/loongarch/lsx.md b/gcc/config/loongarch/lsx.md +index 232399934..ce6ec6d69 100644 +--- a/gcc/config/loongarch/lsx.md ++++ b/gcc/config/loongarch/lsx.md +@@ -957,10 +957,10 @@ + (set_attr "mode" "<MODE>")) + + (define_insn "xor<mode>3" +- (set (match_operand:ILSX 0 "register_operand" "=f,f,f") +- (xor:ILSX +- (match_operand:ILSX 1 "register_operand" "f,f,f") +- (match_operand:ILSX 2 "reg_or_vector_same_val_operand" "f,YC,Urv8"))) ++ (set (match_operand:LSX 0 "register_operand" "=f,f,f") ++ (xor:LSX ++ (match_operand:LSX 1 "register_operand" "f,f,f") ++ (match_operand:LSX 2 "reg_or_vector_same_val_operand" "f,YC,Urv8"))) + "ISA_HAS_LSX" + "@ + vxor.v\t%w0,%w1,%w2 +@@ -2786,6 +2786,21 @@ + operands5 = gen_reg_rtx (<MODE>mode); + }) + ++(define_expand "@xorsign<mode>3" ++ (set (match_dup 4) ++ (and:FLSX (match_dup 3) ++ (match_operand:FLSX 2 "register_operand"))) ++ (set (match_operand:FLSX 0 "register_operand") ++ (xor:FLSX (match_dup 4) ++ (match_operand:FLSX 1 "register_operand"))) ++ "ISA_HAS_LSX" ++{ ++ operands3 = loongarch_build_signbit_mask (<MODE>mode, 1, 0); ++ ++ operands4 = gen_reg_rtx (<MODE>mode); ++}) ++ ++ + (define_insn "absv2df2" + (set (match_operand:V2DF 0 "register_operand" "=f") + (abs:V2DF (match_operand:V2DF 1 "register_operand" "f"))) +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xorsign-run.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xorsign-run.c +new file mode 100644 +index 000000000..2295503d4 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xorsign-run.c +@@ -0,0 +1,60 @@ ++/* { dg-do run } */ ++/* { dg-options "-O2 -ftree-vectorize -mlasx" } */ ++/* { dg-require-effective-target loongarch_asx_hw } */ ++ ++#include "lasx-xorsign.c" ++ ++extern void abort (); ++ ++#define N 16 ++float aN = {-0.1f, -3.2f, -6.3f, -9.4f, ++ -12.5f, -15.6f, -18.7f, -21.8f, ++ 24.9f, 27.1f, 30.2f, 33.3f, ++ 36.4f, 39.5f, 42.6f, 45.7f}; ++float bN = {-1.2f, 3.4f, -5.6f, 7.8f, ++ -9.0f, 1.0f, -2.0f, 3.0f, ++ -4.0f, -5.0f, 6.0f, 7.0f, ++ -8.0f, -9.0f, 10.0f, 11.0f}; ++float rN; ++ ++double adN = {-0.1d, -3.2d, -6.3d, -9.4d, ++ -12.5d, -15.6d, -18.7d, -21.8d, ++ 24.9d, 27.1d, 30.2d, 33.3d, ++ 36.4d, 39.5d, 42.6d, 45.7d}; ++double bdN = {-1.2d, 3.4d, -5.6d, 7.8d, ++ -9.0d, 1.0d, -2.0d, 3.0d, ++ -4.0d, -5.0d, 6.0d, 7.0d, ++ -8.0d, -9.0d, 10.0d, 11.0d}; ++double rdN; ++ ++void ++__attribute__ ((optimize ("-O0"))) ++check_xorsignf (void) ++{ ++ for (int i = 0; i < N; i++) ++ if (ri != ai * __builtin_copysignf (1.0f, bi)) ++ abort (); ++} ++ ++void ++__attribute__ ((optimize ("-O0"))) ++check_xorsign (void) ++{ ++ for (int i = 0; i < N; i++) ++ if (rdi != adi * __builtin_copysign (1.0d, bdi)) ++ abort (); ++} ++ ++int ++main (void) ++{ ++ my_xorsignf (r, a, b, N); ++ /* check results: */ ++ check_xorsignf (); ++ ++ my_xorsign (rd, ad, bd, N); ++ /* check results: */ ++ check_xorsign (); ++ ++ return 0; ++} +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xorsign.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xorsign.c +new file mode 100644 +index 000000000..190a9239b +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xorsign.c +@@ -0,0 +1,19 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -ftree-vectorize -mlasx" } */ ++/* { dg-final { scan-assembler "xvand\\.v" } } */ ++/* { dg-final { scan-assembler "xvxor\\.v" } } */ ++/* { dg-final { scan-assembler-not "xvfmul" } } */ ++ ++double ++my_xorsign (double *restrict a, double *restrict b, double *restrict c, int n) ++{ ++ for (int i = 0; i < n; i++) ++ ai = bi * __builtin_copysign (1.0d, ci); ++} ++ ++float ++my_xorsignf (float *restrict a, float *restrict b, float *restrict c, int n) ++{ ++ for (int i = 0; i < n; i++) ++ ai = bi * __builtin_copysignf (1.0f, ci); ++} +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-xorsign-run.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-xorsign-run.c +new file mode 100644 +index 000000000..22c5c03cc +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-xorsign-run.c +@@ -0,0 +1,60 @@ ++/* { dg-do run } */ ++/* { dg-options "-O2 -ftree-vectorize -mlsx" } */ ++/* { dg-require-effective-target loongarch_sx_hw } */ ++ ++#include "lsx-xorsign.c" ++ ++extern void abort (); ++ ++#define N 16 ++float aN = {-0.1f, -3.2f, -6.3f, -9.4f, ++ -12.5f, -15.6f, -18.7f, -21.8f, ++ 24.9f, 27.1f, 30.2f, 33.3f, ++ 36.4f, 39.5f, 42.6f, 45.7f}; ++float bN = {-1.2f, 3.4f, -5.6f, 7.8f, ++ -9.0f, 1.0f, -2.0f, 3.0f, ++ -4.0f, -5.0f, 6.0f, 7.0f, ++ -8.0f, -9.0f, 10.0f, 11.0f}; ++float rN; ++ ++double adN = {-0.1d, -3.2d, -6.3d, -9.4d, ++ -12.5d, -15.6d, -18.7d, -21.8d, ++ 24.9d, 27.1d, 30.2d, 33.3d, ++ 36.4d, 39.5d, 42.6d, 45.7d}; ++double bdN = {-1.2d, 3.4d, -5.6d, 7.8d, ++ -9.0d, 1.0d, -2.0d, 3.0d, ++ -4.0d, -5.0d, 6.0d, 7.0d, ++ -8.0d, -9.0d, 10.0d, 11.0d}; ++double rdN; ++ ++void ++__attribute__ ((optimize ("-O0"))) ++check_xorsignf (void) ++{ ++ for (int i = 0; i < N; i++) ++ if (ri != ai * __builtin_copysignf (1.0f, bi)) ++ abort (); ++} ++ ++void ++__attribute__ ((optimize ("-O0"))) ++check_xorsign (void) ++{ ++ for (int i = 0; i < N; i++) ++ if (rdi != adi * __builtin_copysign (1.0d, bdi)) ++ abort (); ++} ++ ++int ++main (void) ++{ ++ my_xorsignf (r, a, b, N); ++ /* check results: */ ++ check_xorsignf (); ++ ++ my_xorsign (rd, ad, bd, N); ++ /* check results: */ ++ check_xorsign (); ++ ++ return 0; ++} +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-xorsign.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-xorsign.c +new file mode 100644 +index 000000000..c2694c11e +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-xorsign.c +@@ -0,0 +1,19 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -ftree-vectorize -mlsx" } */ ++/* { dg-final { scan-assembler "vand\\.v" } } */ ++/* { dg-final { scan-assembler "vxor\\.v" } } */ ++/* { dg-final { scan-assembler-not "vfmul" } } */ ++ ++double ++my_xorsign (double *restrict a, double *restrict b, double *restrict c, int n) ++{ ++ for (int i = 0; i < n; i++) ++ ai = bi * __builtin_copysign (1.0d, ci); ++} ++ ++float ++my_xorsignf (float *restrict a, float *restrict b, float *restrict c, int n) ++{ ++ for (int i = 0; i < n; i++) ++ ai = bi * __builtin_copysignf (1.0f, ci); ++} +diff --git a/gcc/testsuite/gcc.target/loongarch/xorsign-run.c b/gcc/testsuite/gcc.target/loongarch/xorsign-run.c +new file mode 100644 +index 000000000..b4f28adf8 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/xorsign-run.c +@@ -0,0 +1,25 @@ ++/* { dg-do run } */ ++/* { dg-options "-O2 -mlsx" } */ ++/* { dg-require-effective-target loongarch_sx_hw } */ ++ ++extern void abort(void); ++ ++static double x = 2.0; ++static float y = 2.0; ++ ++int main() ++{ ++ if ((2.5 * __builtin_copysign(1.0d, x)) != 2.5) ++ abort(); ++ ++ if ((2.5 * __builtin_copysign(1.0f, y)) != 2.5) ++ abort(); ++ ++ if ((2.5 * __builtin_copysignf(1.0d, -x)) != -2.5) ++ abort(); ++ ++ if ((2.5 * __builtin_copysignf(1.0f, -y)) != -2.5) ++ abort(); ++ ++ return 0; ++} +diff --git a/gcc/testsuite/gcc.target/loongarch/xorsign.c b/gcc/testsuite/gcc.target/loongarch/xorsign.c +new file mode 100644 +index 000000000..ca80603d4 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/xorsign.c +@@ -0,0 +1,18 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -mlsx" } */ ++/* { dg-final { scan-assembler "vand\\.v" } } */ ++/* { dg-final { scan-assembler "vxor\\.v" } } */ ++/* { dg-final { scan-assembler-not "fcopysign" } } */ ++/* { dg-final { scan-assembler-not "fmul" } } */ ++ ++double ++my_xorsign (double a, double b) ++{ ++ return a * __builtin_copysign (1.0d, b); ++} ++ ++float ++my_xorsignf (float a, float b) ++{ ++ return a * __builtin_copysignf (1.0f, b); ++} +-- +2.43.0 +
View file
_service:tar_scm:0059-LoongArch-Add-support-for-LoongArch-V1.1-approximate.patch
Added
@@ -0,0 +1,730 @@ +From 88117f2703d06e44983e54a985ec0ad6f2397a46 Mon Sep 17 00:00:00 2001 +From: Jiahao Xu <xujiahao@loongson.cn> +Date: Wed, 6 Dec 2023 15:04:49 +0800 +Subject: PATCH 059/188 LoongArch: Add support for LoongArch V1.1 approximate + instructions. + +This patch adds define_insn/builtins/intrinsics for these instructions, and add option +-mfrecipe to control instruction generation. + +gcc/ChangeLog: + + * config/loongarch/genopts/isa-evolution.in (fecipe): Add. + * config/loongarch/larchintrin.h (__frecipe_s): New intrinsic. + (__frecipe_d): Ditto. + (__frsqrte_s): Ditto. + (__frsqrte_d): Ditto. + * config/loongarch/lasx.md (lasx_xvfrecipe_<flasxfmt>): New insn pattern. + (lasx_xvfrsqrte_<flasxfmt>): Ditto. + * config/loongarch/lasxintrin.h (__lasx_xvfrecipe_s): New intrinsic. + (__lasx_xvfrecipe_d): Ditto. + (__lasx_xvfrsqrte_s): Ditto. + (__lasx_xvfrsqrte_d): Ditto. + * config/loongarch/loongarch-builtins.cc (AVAIL_ALL): Add predicates. + (LSX_EXT_BUILTIN): New macro. + (LASX_EXT_BUILTIN): Ditto. + * config/loongarch/loongarch-cpucfg-map.h: Regenerate. + * config/loongarch/loongarch-c.cc: Add builtin macro "__loongarch_frecipe". + * config/loongarch/loongarch-def.cc: Regenerate. + * config/loongarch/loongarch-str.h (OPTSTR_FRECIPE): Regenerate. + * config/loongarch/loongarch.cc (loongarch_asm_code_end): Dump status for TARGET_FRECIPE. + * config/loongarch/loongarch.md (loongarch_frecipe_<fmt>): New insn pattern. + (loongarch_frsqrte_<fmt>): Ditto. + * config/loongarch/loongarch.opt: Regenerate. + * config/loongarch/lsx.md (lsx_vfrecipe_<flsxfmt>): New insn pattern. + (lsx_vfrsqrte_<flsxfmt>): Ditto. + * config/loongarch/lsxintrin.h (__lsx_vfrecipe_s): New intrinsic. + (__lsx_vfrecipe_d): Ditto. + (__lsx_vfrsqrte_s): Ditto. + (__lsx_vfrsqrte_d): Ditto. + * doc/extend.texi: Add documentation for LoongArch new builtins and intrinsics. + +gcc/testsuite/ChangeLog: + + * gcc.target/loongarch/larch-frecipe-builtin.c: New test. + * gcc.target/loongarch/vector/lasx/lasx-frecipe-builtin.c: New test. + * gcc.target/loongarch/vector/lsx/lsx-frecipe-builtin.c: New test. +--- + gcc/config/loongarch/genopts/isa-evolution.in | 1 + + gcc/config/loongarch/larchintrin.h | 38 +++++++++++++++++ + gcc/config/loongarch/lasx.md | 24 +++++++++++ + gcc/config/loongarch/lasxintrin.h | 34 +++++++++++++++ + gcc/config/loongarch/loongarch-builtins.cc | 42 +++++++++++++++++++ + gcc/config/loongarch/loongarch-c.cc | 3 ++ + gcc/config/loongarch/loongarch-cpucfg-map.h | 1 + + gcc/config/loongarch/loongarch-def.cc | 3 +- + gcc/config/loongarch/loongarch-str.h | 1 + + gcc/config/loongarch/loongarch.cc | 1 + + gcc/config/loongarch/loongarch.md | 35 +++++++++++++++- + gcc/config/loongarch/loongarch.opt | 4 ++ + gcc/config/loongarch/lsx.md | 24 +++++++++++ + gcc/config/loongarch/lsxintrin.h | 34 +++++++++++++++ + gcc/doc/extend.texi | 35 ++++++++++++++++ + .../loongarch/larch-frecipe-builtin.c | 28 +++++++++++++ + .../vector/lasx/lasx-frecipe-builtin.c | 30 +++++++++++++ + .../vector/lsx/lsx-frecipe-builtin.c | 30 +++++++++++++ + 18 files changed, 365 insertions(+), 3 deletions(-) + create mode 100644 gcc/testsuite/gcc.target/loongarch/larch-frecipe-builtin.c + create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-frecipe-builtin.c + create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-frecipe-builtin.c + +diff --git a/gcc/config/loongarch/genopts/isa-evolution.in b/gcc/config/loongarch/genopts/isa-evolution.in +index a6bc3f87f..11a198b64 100644 +--- a/gcc/config/loongarch/genopts/isa-evolution.in ++++ b/gcc/config/loongarch/genopts/isa-evolution.in +@@ -1,3 +1,4 @@ ++2 25 frecipe Support frecipe.{s/d} and frsqrte.{s/d} instructions. + 2 26 div32 Support div.wu and mod.wu instructions with inputs not sign-extended. + 2 27 lam-bh Support am{swap/add}_db.{b/h} instructions. + 2 28 lamcas Support amcas_db.{b/h/w/d} instructions. +diff --git a/gcc/config/loongarch/larchintrin.h b/gcc/config/loongarch/larchintrin.h +index 2833f1487..22035e767 100644 +--- a/gcc/config/loongarch/larchintrin.h ++++ b/gcc/config/loongarch/larchintrin.h +@@ -333,6 +333,44 @@ __iocsrwr_d (unsigned long int _1, unsigned int _2) + } + #endif + ++#ifdef __loongarch_frecipe ++/* Assembly instruction format: fd, fj. */ ++/* Data types in instruction templates: SF, SF. */ ++extern __inline void ++__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) ++__frecipe_s (float _1) ++{ ++ __builtin_loongarch_frecipe_s ((float) _1); ++} ++ ++/* Assembly instruction format: fd, fj. */ ++/* Data types in instruction templates: DF, DF. */ ++extern __inline void ++__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) ++__frecipe_d (double _1) ++{ ++ __builtin_loongarch_frecipe_d ((double) _1); ++} ++ ++/* Assembly instruction format: fd, fj. */ ++/* Data types in instruction templates: SF, SF. */ ++extern __inline void ++__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) ++__frsqrte_s (float _1) ++{ ++ __builtin_loongarch_frsqrte_s ((float) _1); ++} ++ ++/* Assembly instruction format: fd, fj. */ ++/* Data types in instruction templates: DF, DF. */ ++extern __inline void ++__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) ++__frsqrte_d (double _1) ++{ ++ __builtin_loongarch_frsqrte_d ((double) _1); ++} ++#endif ++ + /* Assembly instruction format: ui15. */ + /* Data types in instruction templates: USI. */ + #define __dbar(/*ui15*/ _1) __builtin_loongarch_dbar ((_1)) +diff --git a/gcc/config/loongarch/lasx.md b/gcc/config/loongarch/lasx.md +index de7c88f14..b1416f6c3 100644 +--- a/gcc/config/loongarch/lasx.md ++++ b/gcc/config/loongarch/lasx.md +@@ -40,8 +40,10 @@ + UNSPEC_LASX_XVFCVTL + UNSPEC_LASX_XVFLOGB + UNSPEC_LASX_XVFRECIP ++ UNSPEC_LASX_XVFRECIPE + UNSPEC_LASX_XVFRINT + UNSPEC_LASX_XVFRSQRT ++ UNSPEC_LASX_XVFRSQRTE + UNSPEC_LASX_XVFCMP_SAF + UNSPEC_LASX_XVFCMP_SEQ + UNSPEC_LASX_XVFCMP_SLE +@@ -1633,6 +1635,17 @@ + (set_attr "type" "simd_fdiv") + (set_attr "mode" "<MODE>")) + ++;; Approximate Reciprocal Instructions. ++ ++(define_insn "lasx_xvfrecipe_<flasxfmt>" ++ (set (match_operand:FLASX 0 "register_operand" "=f") ++ (unspec:FLASX (match_operand:FLASX 1 "register_operand" "f") ++ UNSPEC_LASX_XVFRECIPE)) ++ "ISA_HAS_LASX && TARGET_FRECIPE" ++ "xvfrecipe.<flasxfmt>\t%u0,%u1" ++ (set_attr "type" "simd_fdiv") ++ (set_attr "mode" "<MODE>")) ++ + (define_insn "lasx_xvfrsqrt_<flasxfmt>" + (set (match_operand:FLASX 0 "register_operand" "=f") + (unspec:FLASX (match_operand:FLASX 1 "register_operand" "f") +@@ -1642,6 +1655,17 @@ + (set_attr "type" "simd_fdiv") + (set_attr "mode" "<MODE>")) + ++;; Approximate Reciprocal Square Root Instructions. ++ ++(define_insn "lasx_xvfrsqrte_<flasxfmt>" ++ (set (match_operand:FLASX 0 "register_operand" "=f") ++ (unspec:FLASX (match_operand:FLASX 1 "register_operand" "f") ++ UNSPEC_LASX_XVFRSQRTE)) ++ "ISA_HAS_LASX && TARGET_FRECIPE" ++ "xvfrsqrte.<flasxfmt>\t%u0,%u1" ++ (set_attr "type" "simd_fdiv") ++ (set_attr "mode" "<MODE>")) ++ + (define_insn "lasx_xvftint_u_<ilasxfmt_u>_<flasxfmt>" + (set (match_operand:<VIMODE256> 0 "register_operand" "=f") + (unspec:<VIMODE256> (match_operand:FLASX 1 "register_operand" "f") +diff --git a/gcc/config/loongarch/lasxintrin.h b/gcc/config/loongarch/lasxintrin.h +index 7bce2c757..5e65e76e7 100644 +--- a/gcc/config/loongarch/lasxintrin.h ++++ b/gcc/config/loongarch/lasxintrin.h +@@ -2399,6 +2399,40 @@ __m256d __lasx_xvfrecip_d (__m256d _1) + return (__m256d)__builtin_lasx_xvfrecip_d ((v4f64)_1); + } + ++#if defined(__loongarch_frecipe) ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V8SF, V8SF. */ ++extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) ++__m256 __lasx_xvfrecipe_s (__m256 _1) ++{ ++ return (__m256)__builtin_lasx_xvfrecipe_s ((v8f32)_1); ++} ++ ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V4DF, V4DF. */ ++extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) ++__m256d __lasx_xvfrecipe_d (__m256d _1) ++{ ++ return (__m256d)__builtin_lasx_xvfrecipe_d ((v4f64)_1); ++} ++ ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V8SF, V8SF. */ ++extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) ++__m256 __lasx_xvfrsqrte_s (__m256 _1) ++{ ++ return (__m256)__builtin_lasx_xvfrsqrte_s ((v8f32)_1); ++} ++ ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V4DF, V4DF. */ ++extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) ++__m256d __lasx_xvfrsqrte_d (__m256d _1) ++{ ++ return (__m256d)__builtin_lasx_xvfrsqrte_d ((v4f64)_1); ++} ++#endif ++ + /* Assembly instruction format: xd, xj. */ + /* Data types in instruction templates: V8SF, V8SF. */ + extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +diff --git a/gcc/config/loongarch/loongarch-builtins.cc b/gcc/config/loongarch/loongarch-builtins.cc +index f4523c8bf..bc156bd36 100644 +--- a/gcc/config/loongarch/loongarch-builtins.cc ++++ b/gcc/config/loongarch/loongarch-builtins.cc +@@ -120,6 +120,9 @@ struct loongarch_builtin_description + AVAIL_ALL (hard_float, TARGET_HARD_FLOAT_ABI) + AVAIL_ALL (lsx, ISA_HAS_LSX) + AVAIL_ALL (lasx, ISA_HAS_LASX) ++AVAIL_ALL (frecipe, TARGET_FRECIPE && TARGET_HARD_FLOAT_ABI) ++AVAIL_ALL (lsx_frecipe, ISA_HAS_LSX && TARGET_FRECIPE) ++AVAIL_ALL (lasx_frecipe, ISA_HAS_LASX && TARGET_FRECIPE) + + /* Construct a loongarch_builtin_description from the given arguments. + +@@ -164,6 +167,15 @@ AVAIL_ALL (lasx, ISA_HAS_LASX) + "__builtin_lsx_" #INSN, LARCH_BUILTIN_DIRECT, \ + FUNCTION_TYPE, loongarch_builtin_avail_lsx } + ++ /* Define an LSX LARCH_BUILTIN_DIRECT function __builtin_lsx_<INSN> ++ for instruction CODE_FOR_lsx_<INSN>. FUNCTION_TYPE is a builtin_description ++ field. AVAIL is the name of the availability predicate, without the leading ++ loongarch_builtin_avail_. */ ++#define LSX_EXT_BUILTIN(INSN, FUNCTION_TYPE, AVAIL) \ ++ { CODE_FOR_lsx_ ## INSN, \ ++ "__builtin_lsx_" #INSN, LARCH_BUILTIN_DIRECT, \ ++ FUNCTION_TYPE, loongarch_builtin_avail_##AVAIL } ++ + + /* Define an LSX LARCH_BUILTIN_LSX_TEST_BRANCH function __builtin_lsx_<INSN> + for instruction CODE_FOR_lsx_<INSN>. FUNCTION_TYPE is a builtin_description +@@ -189,6 +201,15 @@ AVAIL_ALL (lasx, ISA_HAS_LASX) + "__builtin_lasx_" #INSN, LARCH_BUILTIN_LASX, \ + FUNCTION_TYPE, loongarch_builtin_avail_lasx } + ++/* Define an LASX LARCH_BUILTIN_DIRECT function __builtin_lasx_<INSN> ++ for instruction CODE_FOR_lasx_<INSN>. FUNCTION_TYPE is a builtin_description ++ field. AVAIL is the name of the availability predicate, without the leading ++ loongarch_builtin_avail_. */ ++#define LASX_EXT_BUILTIN(INSN, FUNCTION_TYPE, AVAIL) \ ++ { CODE_FOR_lasx_ ## INSN, \ ++ "__builtin_lasx_" #INSN, LARCH_BUILTIN_LASX, \ ++ FUNCTION_TYPE, loongarch_builtin_avail_##AVAIL } ++ + /* Define an LASX LARCH_BUILTIN_DIRECT_NO_TARGET function __builtin_lasx_<INSN> + for instruction CODE_FOR_lasx_<INSN>. FUNCTION_TYPE is a builtin_description + field. */ +@@ -804,6 +825,27 @@ static const struct loongarch_builtin_description loongarch_builtins = { + DIRECT_NO_TARGET_BUILTIN (syscall, LARCH_VOID_FTYPE_USI, default), + DIRECT_NO_TARGET_BUILTIN (break, LARCH_VOID_FTYPE_USI, default), + ++ /* Built-in functions for frecipe.{s/d} and frsqrte.{s/d}. */ ++ ++ DIRECT_BUILTIN (frecipe_s, LARCH_SF_FTYPE_SF, frecipe), ++ DIRECT_BUILTIN (frecipe_d, LARCH_DF_FTYPE_DF, frecipe), ++ DIRECT_BUILTIN (frsqrte_s, LARCH_SF_FTYPE_SF, frecipe), ++ DIRECT_BUILTIN (frsqrte_d, LARCH_DF_FTYPE_DF, frecipe), ++ ++ /* Built-in functions for new LSX instructions. */ ++ ++ LSX_EXT_BUILTIN (vfrecipe_s, LARCH_V4SF_FTYPE_V4SF, lsx_frecipe), ++ LSX_EXT_BUILTIN (vfrecipe_d, LARCH_V2DF_FTYPE_V2DF, lsx_frecipe), ++ LSX_EXT_BUILTIN (vfrsqrte_s, LARCH_V4SF_FTYPE_V4SF, lsx_frecipe), ++ LSX_EXT_BUILTIN (vfrsqrte_d, LARCH_V2DF_FTYPE_V2DF, lsx_frecipe), ++ ++ /* Built-in functions for new LASX instructions. */ ++ ++ LASX_EXT_BUILTIN (xvfrecipe_s, LARCH_V8SF_FTYPE_V8SF, lasx_frecipe), ++ LASX_EXT_BUILTIN (xvfrecipe_d, LARCH_V4DF_FTYPE_V4DF, lasx_frecipe), ++ LASX_EXT_BUILTIN (xvfrsqrte_s, LARCH_V8SF_FTYPE_V8SF, lasx_frecipe), ++ LASX_EXT_BUILTIN (xvfrsqrte_d, LARCH_V4DF_FTYPE_V4DF, lasx_frecipe), ++ + /* Built-in functions for LSX. */ + LSX_BUILTIN (vsll_b, LARCH_V16QI_FTYPE_V16QI_V16QI), + LSX_BUILTIN (vsll_h, LARCH_V8HI_FTYPE_V8HI_V8HI), +diff --git a/gcc/config/loongarch/loongarch-c.cc b/gcc/config/loongarch/loongarch-c.cc +index 76c8ea8db..a89477a74 100644 +--- a/gcc/config/loongarch/loongarch-c.cc ++++ b/gcc/config/loongarch/loongarch-c.cc +@@ -102,6 +102,9 @@ loongarch_cpu_cpp_builtins (cpp_reader *pfile) + else + builtin_define ("__loongarch_frlen=0"); + ++ if (TARGET_HARD_FLOAT && TARGET_FRECIPE) ++ builtin_define ("__loongarch_frecipe"); ++ + if (ISA_HAS_LSX) + { + builtin_define ("__loongarch_simd"); +diff --git a/gcc/config/loongarch/loongarch-cpucfg-map.h b/gcc/config/loongarch/loongarch-cpucfg-map.h +index 02ff16712..148333c24 100644 +--- a/gcc/config/loongarch/loongarch-cpucfg-map.h ++++ b/gcc/config/loongarch/loongarch-cpucfg-map.h +@@ -29,6 +29,7 @@ static constexpr struct { + unsigned int cpucfg_bit; + HOST_WIDE_INT isa_evolution_bit; + } cpucfg_map = { ++ { 2, 1u << 25, OPTION_MASK_ISA_FRECIPE }, + { 2, 1u << 26, OPTION_MASK_ISA_DIV32 }, + { 2, 1u << 27, OPTION_MASK_ISA_LAM_BH }, + { 2, 1u << 28, OPTION_MASK_ISA_LAMCAS }, +diff --git a/gcc/config/loongarch/loongarch-def.cc b/gcc/config/loongarch/loongarch-def.cc +index bc6997e45..c41804a18 100644 +--- a/gcc/config/loongarch/loongarch-def.cc ++++ b/gcc/config/loongarch/loongarch-def.cc +@@ -60,7 +60,8 @@ array_arch<loongarch_isa> loongarch_cpu_default_isa = + .fpu_ (ISA_EXT_FPU64) + .simd_ (ISA_EXT_SIMD_LASX) + .evolution_ (OPTION_MASK_ISA_DIV32 | OPTION_MASK_ISA_LD_SEQ_SA +- | OPTION_MASK_ISA_LAM_BH | OPTION_MASK_ISA_LAMCAS)); ++ | OPTION_MASK_ISA_LAM_BH | OPTION_MASK_ISA_LAMCAS ++ | OPTION_MASK_ISA_FRECIPE)); + + static inline loongarch_cache la464_cache () + { +diff --git a/gcc/config/loongarch/loongarch-str.h b/gcc/config/loongarch/loongarch-str.h +index 7144bbe28..a8821acb0 100644 +--- a/gcc/config/loongarch/loongarch-str.h ++++ b/gcc/config/loongarch/loongarch-str.h +@@ -68,6 +68,7 @@ along with GCC; see the file COPYING3. If not see + #define STR_EXPLICIT_RELOCS_NONE "none" + #define STR_EXPLICIT_RELOCS_ALWAYS "always" + ++#define OPTSTR_FRECIPE "frecipe" + #define OPTSTR_DIV32 "div32" + #define OPTSTR_LAM_BH "lam-bh" + #define OPTSTR_LAMCAS "lamcas" +diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc +index 3c8ae9a42..ce1c0a8bd 100644 +--- a/gcc/config/loongarch/loongarch.cc ++++ b/gcc/config/loongarch/loongarch.cc +@@ -11503,6 +11503,7 @@ loongarch_asm_code_end (void) + loongarch_cpu_strings la_target.cpu_tune); + fprintf (asm_out_file, "%s Base ISA: %s\n", ASM_COMMENT_START, + loongarch_isa_base_strings la_target.isa.base); ++ DUMP_FEATURE (TARGET_FRECIPE); + DUMP_FEATURE (TARGET_DIV32); + DUMP_FEATURE (TARGET_LAM_BH); + DUMP_FEATURE (TARGET_LAMCAS); +diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md +index afc3c591f..9080cec1c 100644 +--- a/gcc/config/loongarch/loongarch.md ++++ b/gcc/config/loongarch/loongarch.md +@@ -59,6 +59,12 @@ + ;; Stack tie + UNSPEC_TIE + ++ ;; RSQRT ++ UNSPEC_RSQRTE ++ ++ ;; RECIP ++ UNSPEC_RECIPE ++ + ;; CRC + UNSPEC_CRC + UNSPEC_CRCC +@@ -220,6 +226,7 @@ + ;; fmadd floating point multiply-add + ;; fdiv floating point divide + ;; frdiv floating point reciprocal divide ++;; frecipe floating point approximate reciprocal + ;; fabs floating point absolute value + ;; flogb floating point exponent extract + ;; fneg floating point negation +@@ -229,6 +236,7 @@ + ;; fscaleb floating point scale + ;; fsqrt floating point square root + ;; frsqrt floating point reciprocal square root ++;; frsqrte floating point approximate reciprocal square root + ;; multi multiword sequence (or user asm statements) + ;; atomic atomic memory update instruction + ;; syncloop memory atomic operation implemented as a sync loop +@@ -238,8 +246,8 @@ + "unknown,branch,jump,call,load,fpload,fpidxload,store,fpstore,fpidxstore, + prefetch,prefetchx,condmove,mgtf,mftg,const,arith,logical, + shift,slt,signext,clz,trap,imul,idiv,move, +- fmove,fadd,fmul,fmadd,fdiv,frdiv,fabs,flogb,fneg,fcmp,fcopysign,fcvt, +- fscaleb,fsqrt,frsqrt,accext,accmod,multi,atomic,syncloop,nop,ghost, ++ fmove,fadd,fmul,fmadd,fdiv,frdiv,frecipe,fabs,flogb,fneg,fcmp,fcopysign,fcvt, ++ fscaleb,fsqrt,frsqrt,frsqrte,accext,accmod,multi,atomic,syncloop,nop,ghost, + simd_div,simd_fclass,simd_flog2,simd_fadd,simd_fcvt,simd_fmul,simd_fmadd, + simd_fdiv,simd_bitins,simd_bitmov,simd_insert,simd_sld,simd_mul,simd_fcmp, + simd_fexp2,simd_int_arith,simd_bit,simd_shift,simd_splat,simd_fill, +@@ -908,6 +916,18 @@ + (set_attr "type" "frdiv") + (set_attr "mode" "<UNITMODE>")) + ++;; Approximate Reciprocal Instructions. ++ ++(define_insn "loongarch_frecipe_<fmt>" ++ (set (match_operand:ANYF 0 "register_operand" "=f") ++ (unspec:ANYF (match_operand:ANYF 1 "register_operand" "f") ++ UNSPEC_RECIPE)) ++ "TARGET_FRECIPE" ++ "frecipe.<fmt>\t%0,%1" ++ (set_attr "type" "frecipe") ++ (set_attr "mode" "<UNITMODE>") ++ (set_attr "insn_count" "1")) ++ + ;; Integer division and modulus. + (define_expand "<optab><mode>3" + (set (match_operand:GPR 0 "register_operand") +@@ -1133,6 +1153,17 @@ + (set_attr "type" "frsqrt") + (set_attr "mode" "<UNITMODE>") + (set_attr "insn_count" "1")) ++ ++;; Approximate Reciprocal Square Root Instructions. ++ ++(define_insn "loongarch_frsqrte_<fmt>" ++ (set (match_operand:ANYF 0 "register_operand" "=f") ++ (unspec:ANYF (match_operand:ANYF 1 "register_operand" "f") ++ UNSPEC_RSQRTE)) ++ "TARGET_FRECIPE" ++ "frsqrte.<fmt>\t%0,%1" ++ (set_attr "type" "frsqrte") ++ (set_attr "mode" "<UNITMODE>")) +  + ;; + ;; .................... +diff --git a/gcc/config/loongarch/loongarch.opt b/gcc/config/loongarch/loongarch.opt +index 7fe36feb9..e7bc8bed4 100644 +--- a/gcc/config/loongarch/loongarch.opt ++++ b/gcc/config/loongarch/loongarch.opt +@@ -260,6 +260,10 @@ default value is 4. + Variable + HOST_WIDE_INT isa_evolution = 0 + ++mfrecipe ++Target Mask(ISA_FRECIPE) Var(isa_evolution) ++Support frecipe.{s/d} and frsqrte.{s/d} instructions. ++ + mdiv32 + Target Mask(ISA_DIV32) Var(isa_evolution) + Support div.wu and mod.wu instructions with inputs not sign-extended. +diff --git a/gcc/config/loongarch/lsx.md b/gcc/config/loongarch/lsx.md +index ce6ec6d69..37bdc6910 100644 +--- a/gcc/config/loongarch/lsx.md ++++ b/gcc/config/loongarch/lsx.md +@@ -42,8 +42,10 @@ + UNSPEC_LSX_VFCVTL + UNSPEC_LSX_VFLOGB + UNSPEC_LSX_VFRECIP ++ UNSPEC_LSX_VFRECIPE + UNSPEC_LSX_VFRINT + UNSPEC_LSX_VFRSQRT ++ UNSPEC_LSX_VFRSQRTE + UNSPEC_LSX_VFCMP_SAF + UNSPEC_LSX_VFCMP_SEQ + UNSPEC_LSX_VFCMP_SLE +@@ -1546,6 +1548,17 @@ + (set_attr "type" "simd_fdiv") + (set_attr "mode" "<MODE>")) + ++;; Approximate Reciprocal Instructions. ++ ++(define_insn "lsx_vfrecipe_<flsxfmt>" ++ (set (match_operand:FLSX 0 "register_operand" "=f") ++ (unspec:FLSX (match_operand:FLSX 1 "register_operand" "f") ++ UNSPEC_LSX_VFRECIPE)) ++ "ISA_HAS_LSX && TARGET_FRECIPE" ++ "vfrecipe.<flsxfmt>\t%w0,%w1" ++ (set_attr "type" "simd_fdiv") ++ (set_attr "mode" "<MODE>")) ++ + (define_insn "lsx_vfrsqrt_<flsxfmt>" + (set (match_operand:FLSX 0 "register_operand" "=f") + (unspec:FLSX (match_operand:FLSX 1 "register_operand" "f") +@@ -1555,6 +1568,17 @@ + (set_attr "type" "simd_fdiv") + (set_attr "mode" "<MODE>")) + ++;; Approximate Reciprocal Square Root Instructions. ++ ++(define_insn "lsx_vfrsqrte_<flsxfmt>" ++ (set (match_operand:FLSX 0 "register_operand" "=f") ++ (unspec:FLSX (match_operand:FLSX 1 "register_operand" "f") ++ UNSPEC_LSX_VFRSQRTE)) ++ "ISA_HAS_LSX && TARGET_FRECIPE" ++ "vfrsqrte.<flsxfmt>\t%w0,%w1" ++ (set_attr "type" "simd_fdiv") ++ (set_attr "mode" "<MODE>")) ++ + (define_insn "lsx_vftint_u_<ilsxfmt_u>_<flsxfmt>" + (set (match_operand:<VIMODE> 0 "register_operand" "=f") + (unspec:<VIMODE> (match_operand:FLSX 1 "register_operand" "f") +diff --git a/gcc/config/loongarch/lsxintrin.h b/gcc/config/loongarch/lsxintrin.h +index 29553c093..57a6fc40a 100644 +--- a/gcc/config/loongarch/lsxintrin.h ++++ b/gcc/config/loongarch/lsxintrin.h +@@ -2480,6 +2480,40 @@ __m128d __lsx_vfrecip_d (__m128d _1) + return (__m128d)__builtin_lsx_vfrecip_d ((v2f64)_1); + } + ++#if defined(__loongarch_frecipe) ++/* Assembly instruction format: vd, vj. */ ++/* Data types in instruction templates: V4SF, V4SF. */ ++extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) ++__m128 __lsx_vfrecipe_s (__m128 _1) ++{ ++ return (__m128)__builtin_lsx_vfrecipe_s ((v4f32)_1); ++} ++ ++/* Assembly instruction format: vd, vj. */ ++/* Data types in instruction templates: V2DF, V2DF. */ ++extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) ++__m128d __lsx_vfrecipe_d (__m128d _1) ++{ ++ return (__m128d)__builtin_lsx_vfrecipe_d ((v2f64)_1); ++} ++ ++/* Assembly instruction format: vd, vj. */ ++/* Data types in instruction templates: V4SF, V4SF. */ ++extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) ++__m128 __lsx_vfrsqrte_s (__m128 _1) ++{ ++ return (__m128)__builtin_lsx_vfrsqrte_s ((v4f32)_1); ++} ++ ++/* Assembly instruction format: vd, vj. */ ++/* Data types in instruction templates: V2DF, V2DF. */ ++extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) ++__m128d __lsx_vfrsqrte_d (__m128d _1) ++{ ++ return (__m128d)__builtin_lsx_vfrsqrte_d ((v2f64)_1); ++} ++#endif ++ + /* Assembly instruction format: vd, vj. */ + /* Data types in instruction templates: V4SF, V4SF. */ + extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi +index 7edd3974d..bb042ae78 100644 +--- a/gcc/doc/extend.texi ++++ b/gcc/doc/extend.texi +@@ -16187,6 +16187,14 @@ The intrinsics provided are listed below: + void __builtin_loongarch_break (imm0_32767) + @end smallexample + ++These instrisic functions are available by using @option{-mfrecipe}. ++@smallexample ++ float __builtin_loongarch_frecipe_s (float); ++ double __builtin_loongarch_frecipe_d (double); ++ float __builtin_loongarch_frsqrte_s (float); ++ double __builtin_loongarch_frsqrte_d (double); ++@end smallexample ++ + @emph{Note:}Since the control register is divided into 32-bit and 64-bit, + but the access instruction is not distinguished. So GCC renames the control + instructions when implementing intrinsics. +@@ -16259,6 +16267,15 @@ function you need to include @code{larchintrin.h}. + void __break (imm0_32767) + @end smallexample + ++These instrisic functions are available by including @code{larchintrin.h} and ++using @option{-mfrecipe}. ++@smallexample ++ float __frecipe_s (float); ++ double __frecipe_d (double); ++ float __frsqrte_s (float); ++ double __frsqrte_d (double); ++@end smallexample ++ + Returns the value that is currently set in the @samp{tp} register. + @smallexample + void * __builtin_thread_pointer (void) +@@ -17085,6 +17102,15 @@ __m128i __lsx_vxori_b (__m128i, imm0_255); + __m128i __lsx_vxor_v (__m128i, __m128i); + @end smallexample + ++These instrisic functions are available by including @code{lsxintrin.h} and ++using @option{-mfrecipe} and @option{-mlsx}. ++@smallexample ++__m128d __lsx_vfrecipe_d (__m128d); ++__m128 __lsx_vfrecipe_s (__m128); ++__m128d __lsx_vfrsqrte_d (__m128d); ++__m128 __lsx_vfrsqrte_s (__m128); ++@end smallexample ++ + @node LoongArch ASX Vector Intrinsics + @subsection LoongArch ASX Vector Intrinsics + +@@ -17924,6 +17950,15 @@ __m256i __lasx_xvxori_b (__m256i, imm0_255); + __m256i __lasx_xvxor_v (__m256i, __m256i); + @end smallexample + ++These instrisic functions are available by including @code{lasxintrin.h} and ++using @option{-mfrecipe} and @option{-mlasx}. ++@smallexample ++__m256d __lasx_xvfrecipe_d (__m256d); ++__m256 __lasx_xvfrecipe_s (__m256); ++__m256d __lasx_xvfrsqrte_d (__m256d); ++__m256 __lasx_xvfrsqrte_s (__m256); ++@end smallexample ++ + @node MIPS DSP Built-in Functions + @subsection MIPS DSP Built-in Functions + +diff --git a/gcc/testsuite/gcc.target/loongarch/larch-frecipe-builtin.c b/gcc/testsuite/gcc.target/loongarch/larch-frecipe-builtin.c +new file mode 100644 +index 000000000..b9329f346 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/larch-frecipe-builtin.c +@@ -0,0 +1,28 @@ ++/* Test builtins for frecipe.{s/d} and frsqrte.{s/d} instructions */ ++/* { dg-do compile } */ ++/* { dg-options "-mfrecipe" } */ ++/* { dg-final { scan-assembler-times "test_frecipe_s:.*frecipe\\.s.*test_frecipe_s" 1 } } */ ++/* { dg-final { scan-assembler-times "test_frecipe_d:.*frecipe\\.d.*test_frecipe_d" 1 } } */ ++/* { dg-final { scan-assembler-times "test_frsqrte_s:.*frsqrte\\.s.*test_frsqrte_s" 1 } } */ ++/* { dg-final { scan-assembler-times "test_frsqrte_d:.*frsqrte\\.d.*test_frsqrte_d" 1 } } */ ++ ++float ++test_frecipe_s (float _1) ++{ ++ return __builtin_loongarch_frecipe_s (_1); ++} ++double ++test_frecipe_d (double _1) ++{ ++ return __builtin_loongarch_frecipe_d (_1); ++} ++float ++test_frsqrte_s (float _1) ++{ ++ return __builtin_loongarch_frsqrte_s (_1); ++} ++double ++test_frsqrte_d (double _1) ++{ ++ return __builtin_loongarch_frsqrte_d (_1); ++} +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-frecipe-builtin.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-frecipe-builtin.c +new file mode 100644 +index 000000000..522535b45 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-frecipe-builtin.c +@@ -0,0 +1,30 @@ ++/* Test builtins for xvfrecipe.{s/d} and xvfrsqrte.{s/d} instructions */ ++/* { dg-do compile } */ ++/* { dg-options "-mlasx -mfrecipe" } */ ++/* { dg-final { scan-assembler-times "lasx_xvfrecipe_s:.*xvfrecipe\\.s.*lasx_xvfrecipe_s" 1 } } */ ++/* { dg-final { scan-assembler-times "lasx_xvfrecipe_d:.*xvfrecipe\\.d.*lasx_xvfrecipe_d" 1 } } */ ++/* { dg-final { scan-assembler-times "lasx_xvfrsqrte_s:.*xvfrsqrte\\.s.*lasx_xvfrsqrte_s" 1 } } */ ++/* { dg-final { scan-assembler-times "lasx_xvfrsqrte_d:.*xvfrsqrte\\.d.*lasx_xvfrsqrte_d" 1 } } */ ++ ++#include <lasxintrin.h> ++ ++v8f32 ++__lasx_xvfrecipe_s (v8f32 _1) ++{ ++ return __builtin_lasx_xvfrecipe_s (_1); ++} ++v4f64 ++__lasx_xvfrecipe_d (v4f64 _1) ++{ ++ return __builtin_lasx_xvfrecipe_d (_1); ++} ++v8f32 ++__lasx_xvfrsqrte_s (v8f32 _1) ++{ ++ return __builtin_lasx_xvfrsqrte_s (_1); ++} ++v4f64 ++__lasx_xvfrsqrte_d (v4f64 _1) ++{ ++ return __builtin_lasx_xvfrsqrte_d (_1); ++} +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-frecipe-builtin.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-frecipe-builtin.c +new file mode 100644 +index 000000000..4ad0cb0ff +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-frecipe-builtin.c +@@ -0,0 +1,30 @@ ++/* Test builtins for vfrecipe.{s/d} and vfrsqrte.{s/d} instructions */ ++/* { dg-do compile } */ ++/* { dg-options "-mlsx -mfrecipe" } */ ++/* { dg-final { scan-assembler-times "lsx_vfrecipe_s:.*vfrecipe\\.s.*lsx_vfrecipe_s" 1 } } */ ++/* { dg-final { scan-assembler-times "lsx_vfrecipe_d:.*vfrecipe\\.d.*lsx_vfrecipe_d" 1 } } */ ++/* { dg-final { scan-assembler-times "lsx_vfrsqrte_s:.*vfrsqrte\\.s.*lsx_vfrsqrte_s" 1 } } */ ++/* { dg-final { scan-assembler-times "lsx_vfrsqrte_d:.*vfrsqrte\\.d.*lsx_vfrsqrte_d" 1 } } */ ++ ++#include <lsxintrin.h> ++ ++v4f32 ++__lsx_vfrecipe_s (v4f32 _1) ++{ ++ return __builtin_lsx_vfrecipe_s (_1); ++} ++v2f64 ++__lsx_vfrecipe_d (v2f64 _1) ++{ ++ return __builtin_lsx_vfrecipe_d (_1); ++} ++v4f32 ++__lsx_vfrsqrte_s (v4f32 _1) ++{ ++ return __builtin_lsx_vfrsqrte_s (_1); ++} ++v2f64 ++__lsx_vfrsqrte_d (v2f64 _1) ++{ ++ return __builtin_lsx_vfrsqrte_d (_1); ++} +-- +2.43.0 +
View file
_service:tar_scm:0060-LoongArch-Use-standard-pattern-name-for-xvfrsqrt-vfr.patch
Added
@@ -0,0 +1,257 @@ +From e8210e26ac638eb443f8991fee6d412b297cb279 Mon Sep 17 00:00:00 2001 +From: Jiahao Xu <xujiahao@loongson.cn> +Date: Wed, 6 Dec 2023 15:04:50 +0800 +Subject: PATCH 060/188 LoongArch: Use standard pattern name for + xvfrsqrt/vfrsqrt instructions. + +Rename lasx_xvfrsqrt*/lsx_vfrsqrt* to rsqrt<mode>2 to align with standard +pattern name. Define function use_rsqrt_p to decide when to use rsqrt optab. + +gcc/ChangeLog: + + * config/loongarch/lasx.md (lasx_xvfrsqrt_<flasxfmt>): Renamed to .. + (rsqrt<mode>2): .. this. + * config/loongarch/loongarch-builtins.cc + (CODE_FOR_lsx_vfrsqrt_d): Redefine to standard pattern name. + (CODE_FOR_lsx_vfrsqrt_s): Ditto. + (CODE_FOR_lasx_xvfrsqrt_d): Ditto. + (CODE_FOR_lasx_xvfrsqrt_s): Ditto. + * config/loongarch/loongarch.cc (use_rsqrt_p): New function. + (loongarch_optab_supported_p): Ditto. + (TARGET_OPTAB_SUPPORTED_P): New hook. + * config/loongarch/loongarch.md (*rsqrt<mode>a): Remove. + (*rsqrt<mode>2): New insn pattern. + (*rsqrt<mode>b): Remove. + * config/loongarch/lsx.md (lsx_vfrsqrt_<flsxfmt>): Renamed to .. + (rsqrt<mode>2): .. this. + +gcc/testsuite/ChangeLog: + + * gcc.target/loongarch/vector/lasx/lasx-rsqrt.c: New test. + * gcc.target/loongarch/vector/lsx/lsx-rsqrt.c: New test. +--- + gcc/config/loongarch/lasx.md | 6 ++--- + gcc/config/loongarch/loongarch-builtins.cc | 4 +++ + gcc/config/loongarch/loongarch.cc | 27 +++++++++++++++++++ + gcc/config/loongarch/loongarch.md | 24 +++++------------ + gcc/config/loongarch/lsx.md | 6 ++--- + .../loongarch/vector/lasx/lasx-rsqrt.c | 26 ++++++++++++++++++ + .../loongarch/vector/lsx/lsx-rsqrt.c | 26 ++++++++++++++++++ + 7 files changed, 96 insertions(+), 23 deletions(-) + create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-rsqrt.c + create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-rsqrt.c + +diff --git a/gcc/config/loongarch/lasx.md b/gcc/config/loongarch/lasx.md +index b1416f6c3..3a4a1fe51 100644 +--- a/gcc/config/loongarch/lasx.md ++++ b/gcc/config/loongarch/lasx.md +@@ -1646,10 +1646,10 @@ + (set_attr "type" "simd_fdiv") + (set_attr "mode" "<MODE>")) + +-(define_insn "lasx_xvfrsqrt_<flasxfmt>" ++(define_insn "rsqrt<mode>2" + (set (match_operand:FLASX 0 "register_operand" "=f") +- (unspec:FLASX (match_operand:FLASX 1 "register_operand" "f") +- UNSPEC_LASX_XVFRSQRT)) ++ (unspec:FLASX (match_operand:FLASX 1 "register_operand" "f") ++ UNSPEC_LASX_XVFRSQRT)) + "ISA_HAS_LASX" + "xvfrsqrt.<flasxfmt>\t%u0,%u1" + (set_attr "type" "simd_fdiv") +diff --git a/gcc/config/loongarch/loongarch-builtins.cc b/gcc/config/loongarch/loongarch-builtins.cc +index bc156bd36..4aae27a5e 100644 +--- a/gcc/config/loongarch/loongarch-builtins.cc ++++ b/gcc/config/loongarch/loongarch-builtins.cc +@@ -500,6 +500,8 @@ AVAIL_ALL (lasx_frecipe, ISA_HAS_LASX && TARGET_FRECIPE) + #define CODE_FOR_lsx_vssrlrn_bu_h CODE_FOR_lsx_vssrlrn_u_bu_h + #define CODE_FOR_lsx_vssrlrn_hu_w CODE_FOR_lsx_vssrlrn_u_hu_w + #define CODE_FOR_lsx_vssrlrn_wu_d CODE_FOR_lsx_vssrlrn_u_wu_d ++#define CODE_FOR_lsx_vfrsqrt_d CODE_FOR_rsqrtv2df2 ++#define CODE_FOR_lsx_vfrsqrt_s CODE_FOR_rsqrtv4sf2 + + /* LoongArch ASX define CODE_FOR_lasx_mxxx */ + #define CODE_FOR_lasx_xvsadd_b CODE_FOR_ssaddv32qi3 +@@ -776,6 +778,8 @@ AVAIL_ALL (lasx_frecipe, ISA_HAS_LASX && TARGET_FRECIPE) + #define CODE_FOR_lasx_xvsat_hu CODE_FOR_lasx_xvsat_u_hu + #define CODE_FOR_lasx_xvsat_wu CODE_FOR_lasx_xvsat_u_wu + #define CODE_FOR_lasx_xvsat_du CODE_FOR_lasx_xvsat_u_du ++#define CODE_FOR_lasx_xvfrsqrt_d CODE_FOR_rsqrtv4df2 ++#define CODE_FOR_lasx_xvfrsqrt_s CODE_FOR_rsqrtv8sf2 + + static const struct loongarch_builtin_description loongarch_builtins = { + #define LARCH_MOVFCSR2GR 0 +diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc +index ce1c0a8bd..95aa9453b 100644 +--- a/gcc/config/loongarch/loongarch.cc ++++ b/gcc/config/loongarch/loongarch.cc +@@ -11487,6 +11487,30 @@ loongarch_builtin_support_vector_misalignment (machine_mode mode, + is_packed); + } + ++static bool ++use_rsqrt_p (void) ++{ ++ return (flag_finite_math_only ++ && !flag_trapping_math ++ && flag_unsafe_math_optimizations); ++} ++ ++/* Implement the TARGET_OPTAB_SUPPORTED_P hook. */ ++ ++static bool ++loongarch_optab_supported_p (int op, machine_mode, machine_mode, ++ optimization_type opt_type) ++{ ++ switch (op) ++ { ++ case rsqrt_optab: ++ return opt_type == OPTIMIZE_FOR_SPEED && use_rsqrt_p (); ++ ++ default: ++ return true; ++ } ++} ++ + /* If -fverbose-asm, dump some info for debugging. */ + static void + loongarch_asm_code_end (void) +@@ -11625,6 +11649,9 @@ loongarch_asm_code_end (void) + #undef TARGET_FUNCTION_ARG_BOUNDARY + #define TARGET_FUNCTION_ARG_BOUNDARY loongarch_function_arg_boundary + ++#undef TARGET_OPTAB_SUPPORTED_P ++#define TARGET_OPTAB_SUPPORTED_P loongarch_optab_supported_p ++ + #undef TARGET_VECTOR_MODE_SUPPORTED_P + #define TARGET_VECTOR_MODE_SUPPORTED_P loongarch_vector_mode_supported_p + +diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md +index 9080cec1c..4dfe583e2 100644 +--- a/gcc/config/loongarch/loongarch.md ++++ b/gcc/config/loongarch/loongarch.md +@@ -60,6 +60,7 @@ + UNSPEC_TIE + + ;; RSQRT ++ UNSPEC_RSQRT + UNSPEC_RSQRTE + + ;; RECIP +@@ -1134,25 +1135,14 @@ + (set_attr "mode" "<UNITMODE>") + (set_attr "insn_count" "1")) + +-(define_insn "*rsqrt<mode>a" ++(define_insn "*rsqrt<mode>2" + (set (match_operand:ANYF 0 "register_operand" "=f") +- (div:ANYF (match_operand:ANYF 1 "const_1_operand" "") +- (sqrt:ANYF (match_operand:ANYF 2 "register_operand" "f")))) +- "flag_unsafe_math_optimizations" +- "frsqrt.<fmt>\t%0,%2" +- (set_attr "type" "frsqrt") +- (set_attr "mode" "<UNITMODE>") +- (set_attr "insn_count" "1")) +- +-(define_insn "*rsqrt<mode>b" +- (set (match_operand:ANYF 0 "register_operand" "=f") +- (sqrt:ANYF (div:ANYF (match_operand:ANYF 1 "const_1_operand" "") +- (match_operand:ANYF 2 "register_operand" "f")))) +- "flag_unsafe_math_optimizations" +- "frsqrt.<fmt>\t%0,%2" ++ (unspec:ANYF (match_operand:ANYF 1 "register_operand" "f") ++ UNSPEC_RSQRT)) ++ "TARGET_HARD_FLOAT" ++ "frsqrt.<fmt>\t%0,%1" + (set_attr "type" "frsqrt") +- (set_attr "mode" "<UNITMODE>") +- (set_attr "insn_count" "1")) ++ (set_attr "mode" "<UNITMODE>")) + + ;; Approximate Reciprocal Square Root Instructions. + +diff --git a/gcc/config/loongarch/lsx.md b/gcc/config/loongarch/lsx.md +index 37bdc6910..cb4a448e7 100644 +--- a/gcc/config/loongarch/lsx.md ++++ b/gcc/config/loongarch/lsx.md +@@ -1559,10 +1559,10 @@ + (set_attr "type" "simd_fdiv") + (set_attr "mode" "<MODE>")) + +-(define_insn "lsx_vfrsqrt_<flsxfmt>" ++(define_insn "rsqrt<mode>2" + (set (match_operand:FLSX 0 "register_operand" "=f") +- (unspec:FLSX (match_operand:FLSX 1 "register_operand" "f") +- UNSPEC_LSX_VFRSQRT)) ++ (unspec:FLSX (match_operand:FLSX 1 "register_operand" "f") ++ UNSPEC_LSX_VFRSQRT)) + "ISA_HAS_LSX" + "vfrsqrt.<flsxfmt>\t%w0,%w1" + (set_attr "type" "simd_fdiv") +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-rsqrt.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-rsqrt.c +new file mode 100644 +index 000000000..24316944d +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-rsqrt.c +@@ -0,0 +1,26 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -mlasx -ffast-math" } */ ++/* { dg-final { scan-assembler "xvfrsqrt.s" } } */ ++/* { dg-final { scan-assembler "xvfrsqrt.d" } } */ ++ ++extern float sqrtf (float); ++ ++float a8, b8; ++ ++void ++foo1(void) ++{ ++ for (int i = 0; i < 8; i++) ++ ai = 1 / sqrtf (bi); ++} ++ ++extern double sqrt (double); ++ ++double da4, db4; ++ ++void ++foo2(void) ++{ ++ for (int i = 0; i < 4; i++) ++ dai = 1 / sqrt (dbi); ++} +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-rsqrt.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-rsqrt.c +new file mode 100644 +index 000000000..519cc4764 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-rsqrt.c +@@ -0,0 +1,26 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -mlsx -ffast-math" } */ ++/* { dg-final { scan-assembler "vfrsqrt.s" } } */ ++/* { dg-final { scan-assembler "vfrsqrt.d" } } */ ++ ++extern float sqrtf (float); ++ ++float a4, b4; ++ ++void ++foo1(void) ++{ ++ for (int i = 0; i < 4; i++) ++ ai = 1 / sqrtf (bi); ++} ++ ++extern double sqrt (double); ++ ++double da2, db2; ++ ++void ++foo2(void) ++{ ++ for (int i = 0; i < 2; i++) ++ dai = 1 / sqrt (dbi); ++} +-- +2.43.0 +
View file
_service:tar_scm:0061-LoongArch-Redefine-pattern-for-xvfrecip-vfrecip-inst.patch
Added
@@ -0,0 +1,135 @@ +From 74924710ee8d662d883bf898d69aef1946d91ea5 Mon Sep 17 00:00:00 2001 +From: Jiahao Xu <xujiahao@loongson.cn> +Date: Wed, 6 Dec 2023 15:04:51 +0800 +Subject: PATCH 061/188 LoongArch: Redefine pattern for xvfrecip/vfrecip + instructions. + +Redefine pattern for xvfrecip instructions use rtx code instead of unspec, and enable +xvfrecip instructions to be generated during auto-vectorization. + +gcc/ChangeLog: + + * config/loongarch/lasx.md (lasx_xvfrecip_<flasxfmt>): Renamed to .. + (recip<mode>3): .. this. + * config/loongarch/loongarch-builtins.cc (CODE_FOR_lsx_vfrecip_d): Redefine + to new pattern name. + (CODE_FOR_lsx_vfrecip_s): Ditto. + (CODE_FOR_lasx_xvfrecip_d): Ditto. + (CODE_FOR_lasx_xvfrecip_s): Ditto. + (loongarch_expand_builtin_direct): For the vector recip instructions, construct a + temporary parameter const1_vector. + * config/loongarch/lsx.md (lsx_vfrecip_<flsxfmt>): Renamed to .. + (recip<mode>3): .. this. + * config/loongarch/predicates.md (const_vector_1_operand): New predicate. +--- + gcc/config/loongarch/lasx.md | 8 ++++---- + gcc/config/loongarch/loongarch-builtins.cc | 20 ++++++++++++++++++++ + gcc/config/loongarch/lsx.md | 8 ++++---- + gcc/config/loongarch/predicates.md | 4 ++++ + 4 files changed, 32 insertions(+), 8 deletions(-) + +diff --git a/gcc/config/loongarch/lasx.md b/gcc/config/loongarch/lasx.md +index 3a4a1fe51..ad49a3ffb 100644 +--- a/gcc/config/loongarch/lasx.md ++++ b/gcc/config/loongarch/lasx.md +@@ -1626,12 +1626,12 @@ + (set_attr "type" "simd_fminmax") + (set_attr "mode" "<MODE>")) + +-(define_insn "lasx_xvfrecip_<flasxfmt>" ++(define_insn "recip<mode>3" + (set (match_operand:FLASX 0 "register_operand" "=f") +- (unspec:FLASX (match_operand:FLASX 1 "register_operand" "f") +- UNSPEC_LASX_XVFRECIP)) ++ (div:FLASX (match_operand:FLASX 1 "const_vector_1_operand" "") ++ (match_operand:FLASX 2 "register_operand" "f"))) + "ISA_HAS_LASX" +- "xvfrecip.<flasxfmt>\t%u0,%u1" ++ "xvfrecip.<flasxfmt>\t%u0,%u2" + (set_attr "type" "simd_fdiv") + (set_attr "mode" "<MODE>")) + +diff --git a/gcc/config/loongarch/loongarch-builtins.cc b/gcc/config/loongarch/loongarch-builtins.cc +index 4aae27a5e..85849ed29 100644 +--- a/gcc/config/loongarch/loongarch-builtins.cc ++++ b/gcc/config/loongarch/loongarch-builtins.cc +@@ -502,6 +502,8 @@ AVAIL_ALL (lasx_frecipe, ISA_HAS_LASX && TARGET_FRECIPE) + #define CODE_FOR_lsx_vssrlrn_wu_d CODE_FOR_lsx_vssrlrn_u_wu_d + #define CODE_FOR_lsx_vfrsqrt_d CODE_FOR_rsqrtv2df2 + #define CODE_FOR_lsx_vfrsqrt_s CODE_FOR_rsqrtv4sf2 ++#define CODE_FOR_lsx_vfrecip_d CODE_FOR_recipv2df3 ++#define CODE_FOR_lsx_vfrecip_s CODE_FOR_recipv4sf3 + + /* LoongArch ASX define CODE_FOR_lasx_mxxx */ + #define CODE_FOR_lasx_xvsadd_b CODE_FOR_ssaddv32qi3 +@@ -780,6 +782,8 @@ AVAIL_ALL (lasx_frecipe, ISA_HAS_LASX && TARGET_FRECIPE) + #define CODE_FOR_lasx_xvsat_du CODE_FOR_lasx_xvsat_u_du + #define CODE_FOR_lasx_xvfrsqrt_d CODE_FOR_rsqrtv4df2 + #define CODE_FOR_lasx_xvfrsqrt_s CODE_FOR_rsqrtv8sf2 ++#define CODE_FOR_lasx_xvfrecip_d CODE_FOR_recipv4df3 ++#define CODE_FOR_lasx_xvfrecip_s CODE_FOR_recipv8sf3 + + static const struct loongarch_builtin_description loongarch_builtins = { + #define LARCH_MOVFCSR2GR 0 +@@ -3019,6 +3023,22 @@ loongarch_expand_builtin_direct (enum insn_code icode, rtx target, tree exp, + if (has_target_p) + create_output_operand (&opsopno++, target, TYPE_MODE (TREE_TYPE (exp))); + ++ /* For the vector reciprocal instructions, we need to construct a temporary ++ parameter const1_vector. */ ++ switch (icode) ++ { ++ case CODE_FOR_recipv8sf3: ++ case CODE_FOR_recipv4df3: ++ case CODE_FOR_recipv4sf3: ++ case CODE_FOR_recipv2df3: ++ loongarch_prepare_builtin_arg (&ops2, exp, 0); ++ create_input_operand (&ops1, CONST1_RTX (ops0.mode), ops0.mode); ++ return loongarch_expand_builtin_insn (icode, 3, ops, has_target_p); ++ ++ default: ++ break; ++ } ++ + /* Map the arguments to the other operands. */ + gcc_assert (opno + call_expr_nargs (exp) + == insn_dataicode.n_generator_args); +diff --git a/gcc/config/loongarch/lsx.md b/gcc/config/loongarch/lsx.md +index cb4a448e7..f2774f021 100644 +--- a/gcc/config/loongarch/lsx.md ++++ b/gcc/config/loongarch/lsx.md +@@ -1539,12 +1539,12 @@ + (set_attr "type" "simd_fminmax") + (set_attr "mode" "<MODE>")) + +-(define_insn "lsx_vfrecip_<flsxfmt>" ++(define_insn "recip<mode>3" + (set (match_operand:FLSX 0 "register_operand" "=f") +- (unspec:FLSX (match_operand:FLSX 1 "register_operand" "f") +- UNSPEC_LSX_VFRECIP)) ++ (div:FLSX (match_operand:FLSX 1 "const_vector_1_operand" "") ++ (match_operand:FLSX 2 "register_operand" "f"))) + "ISA_HAS_LSX" +- "vfrecip.<flsxfmt>\t%w0,%w1" ++ "vfrecip.<flsxfmt>\t%w0,%w2" + (set_attr "type" "simd_fdiv") + (set_attr "mode" "<MODE>")) + +diff --git a/gcc/config/loongarch/predicates.md b/gcc/config/loongarch/predicates.md +index 30a0dee9f..572550dbc 100644 +--- a/gcc/config/loongarch/predicates.md ++++ b/gcc/config/loongarch/predicates.md +@@ -227,6 +227,10 @@ + (and (match_code "const_int,const_wide_int,const_double,const_vector") + (match_test "op == CONST1_RTX (GET_MODE (op))"))) + ++(define_predicate "const_vector_1_operand" ++ (and (match_code "const_vector") ++ (match_test "op == CONST1_RTX (GET_MODE (op))"))) ++ + (define_predicate "reg_or_1_operand" + (ior (match_operand 0 "const_1_operand") + (match_operand 0 "register_operand"))) +-- +2.43.0 +
View file
_service:tar_scm:0062-LoongArch-New-options-mrecip-and-mrecip-with-ffast-m.patch
Added
@@ -0,0 +1,1096 @@ +From faac4efbee23e60691fc086a78284225ecf824a8 Mon Sep 17 00:00:00 2001 +From: Jiahao Xu <xujiahao@loongson.cn> +Date: Wed, 6 Dec 2023 15:04:52 +0800 +Subject: PATCH 062/188 LoongArch: New options -mrecip and -mrecip= with + ffast-math. + +When both the -mrecip and -mfrecipe options are enabled, use approximate reciprocal +instructions and approximate reciprocal square root instructions with additional +Newton-Raphson steps to implement single precision floating-point division, square +root and reciprocal square root operations, for a better performance. + +gcc/ChangeLog: + + * config/loongarch/genopts/loongarch.opt.in (recip_mask): New variable. + (-mrecip, -mrecip): New options. + * config/loongarch/lasx.md (div<mode>3): New expander. + (*div<mode>3): Rename. + (sqrt<mode>2): New expander. + (*sqrt<mode>2): Rename. + (rsqrt<mode>2): New expander. + * config/loongarch/loongarch-protos.h (loongarch_emit_swrsqrtsf): New prototype. + (loongarch_emit_swdivsf): Ditto. + * config/loongarch/loongarch.cc (loongarch_option_override_internal): Set + recip_mask for -mrecip and -mrecip= options. + (loongarch_emit_swrsqrtsf): New function. + (loongarch_emit_swdivsf): Ditto. + * config/loongarch/loongarch.h (RECIP_MASK_NONE, RECIP_MASK_DIV, RECIP_MASK_SQRT + RECIP_MASK_RSQRT, RECIP_MASK_VEC_DIV, RECIP_MASK_VEC_SQRT, RECIP_MASK_VEC_RSQRT + RECIP_MASK_ALL): New bitmasks. + (TARGET_RECIP_DIV, TARGET_RECIP_SQRT, TARGET_RECIP_RSQRT, TARGET_RECIP_VEC_DIV + TARGET_RECIP_VEC_SQRT, TARGET_RECIP_VEC_RSQRT): New tests. + * config/loongarch/loongarch.md (sqrt<mode>2): New expander. + (*sqrt<mode>2): Rename. + (rsqrt<mode>2): New expander. + * config/loongarch/loongarch.opt (recip_mask): New variable. + (-mrecip, -mrecip): New options. + * config/loongarch/lsx.md (div<mode>3): New expander. + (*div<mode>3): Rename. + (sqrt<mode>2): New expander. + (*sqrt<mode>2): Rename. + (rsqrt<mode>2): New expander. + * config/loongarch/predicates.md (reg_or_vecotr_1_operand): New predicate. + * doc/invoke.texi (LoongArch Options): Document new options. + +gcc/testsuite/ChangeLog: + + * gcc.target/loongarch/divf.c: New test. + * gcc.target/loongarch/recip-divf.c: New test. + * gcc.target/loongarch/recip-sqrtf.c: New test. + * gcc.target/loongarch/sqrtf.c: New test. + * gcc.target/loongarch/vector/lasx/lasx-divf.c: New test. + * gcc.target/loongarch/vector/lasx/lasx-recip-divf.c: New test. + * gcc.target/loongarch/vector/lasx/lasx-recip-sqrtf.c: New test. + * gcc.target/loongarch/vector/lasx/lasx-recip.c: New test. + * gcc.target/loongarch/vector/lasx/lasx-sqrtf.c: New test. + * gcc.target/loongarch/vector/lsx/lsx-divf.c: New test. + * gcc.target/loongarch/vector/lsx/lsx-recip-divf.c: New test. + * gcc.target/loongarch/vector/lsx/lsx-recip-sqrtf.c: New test. + * gcc.target/loongarch/vector/lsx/lsx-recip.c: New test. + * gcc.target/loongarch/vector/lsx/lsx-sqrtf.c: New test. +--- + gcc/config/loongarch/genopts/loongarch.opt.in | 11 + + gcc/config/loongarch/lasx.md | 53 ++++- + gcc/config/loongarch/loongarch-protos.h | 2 + + gcc/config/loongarch/loongarch.cc | 188 ++++++++++++++++++ + gcc/config/loongarch/loongarch.h | 18 ++ + gcc/config/loongarch/loongarch.md | 49 ++++- + gcc/config/loongarch/loongarch.opt | 11 + + gcc/config/loongarch/lsx.md | 53 ++++- + gcc/config/loongarch/predicates.md | 4 + + gcc/doc/invoke.texi | 55 ++++- + gcc/testsuite/gcc.target/loongarch/divf.c | 10 + + .../gcc.target/loongarch/recip-divf.c | 9 + + .../gcc.target/loongarch/recip-sqrtf.c | 23 +++ + gcc/testsuite/gcc.target/loongarch/sqrtf.c | 24 +++ + .../loongarch/vector/lasx/lasx-divf.c | 13 ++ + .../loongarch/vector/lasx/lasx-recip-divf.c | 12 ++ + .../loongarch/vector/lasx/lasx-recip-sqrtf.c | 28 +++ + .../loongarch/vector/lasx/lasx-recip.c | 24 +++ + .../loongarch/vector/lasx/lasx-sqrtf.c | 29 +++ + .../loongarch/vector/lsx/lsx-divf.c | 13 ++ + .../loongarch/vector/lsx/lsx-recip-divf.c | 12 ++ + .../loongarch/vector/lsx/lsx-recip-sqrtf.c | 28 +++ + .../loongarch/vector/lsx/lsx-recip.c | 24 +++ + .../loongarch/vector/lsx/lsx-sqrtf.c | 29 +++ + 24 files changed, 711 insertions(+), 11 deletions(-) + create mode 100644 gcc/testsuite/gcc.target/loongarch/divf.c + create mode 100644 gcc/testsuite/gcc.target/loongarch/recip-divf.c + create mode 100644 gcc/testsuite/gcc.target/loongarch/recip-sqrtf.c + create mode 100644 gcc/testsuite/gcc.target/loongarch/sqrtf.c + create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-divf.c + create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-recip-divf.c + create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-recip-sqrtf.c + create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-recip.c + create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-sqrtf.c + create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-divf.c + create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-recip-divf.c + create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-recip-sqrtf.c + create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-recip.c + create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-sqrtf.c + +diff --git a/gcc/config/loongarch/genopts/loongarch.opt.in b/gcc/config/loongarch/genopts/loongarch.opt.in +index cd5e75e4f..102202b03 100644 +--- a/gcc/config/loongarch/genopts/loongarch.opt.in ++++ b/gcc/config/loongarch/genopts/loongarch.opt.in +@@ -23,6 +23,9 @@ config/loongarch/loongarch-opts.h + HeaderInclude + config/loongarch/loongarch-str.h + ++TargetVariable ++unsigned int recip_mask = 0 ++ + ; ISA related options + ;; Base ISA + Enum +@@ -194,6 +197,14 @@ mexplicit-relocs + Target Var(la_opt_explicit_relocs_backward) Init(M_OPT_UNSET) + Use %reloc() assembly operators (for backward compatibility). + ++mrecip ++Target RejectNegative Var(loongarch_recip) ++Generate approximate reciprocal divide and square root for better throughput. ++ ++mrecip= ++Target RejectNegative Joined Var(loongarch_recip_name) ++Control generation of reciprocal estimates. ++ + ; The code model option names for -mcmodel. + Enum + Name(cmodel) Type(int) +diff --git a/gcc/config/loongarch/lasx.md b/gcc/config/loongarch/lasx.md +index ad49a3ffb..eeac8cd98 100644 +--- a/gcc/config/loongarch/lasx.md ++++ b/gcc/config/loongarch/lasx.md +@@ -1194,7 +1194,25 @@ + (set_attr "type" "simd_fmul") + (set_attr "mode" "<MODE>")) + +-(define_insn "div<mode>3" ++(define_expand "div<mode>3" ++ (set (match_operand:FLASX 0 "register_operand") ++ (div:FLASX (match_operand:FLASX 1 "reg_or_vecotr_1_operand") ++ (match_operand:FLASX 2 "register_operand"))) ++ "ISA_HAS_LASX" ++{ ++ if (<MODE>mode == V8SFmode ++ && TARGET_RECIP_VEC_DIV ++ && optimize_insn_for_speed_p () ++ && flag_finite_math_only && !flag_trapping_math ++ && flag_unsafe_math_optimizations) ++ { ++ loongarch_emit_swdivsf (operands0, operands1, ++ operands2, V8SFmode); ++ DONE; ++ } ++}) ++ ++(define_insn "*div<mode>3" + (set (match_operand:FLASX 0 "register_operand" "=f") + (div:FLASX (match_operand:FLASX 1 "register_operand" "f") + (match_operand:FLASX 2 "register_operand" "f"))) +@@ -1223,7 +1241,23 @@ + (set_attr "type" "simd_fmadd") + (set_attr "mode" "<MODE>")) + +-(define_insn "sqrt<mode>2" ++(define_expand "sqrt<mode>2" ++ (set (match_operand:FLASX 0 "register_operand") ++ (sqrt:FLASX (match_operand:FLASX 1 "register_operand"))) ++ "ISA_HAS_LASX" ++{ ++ if (<MODE>mode == V8SFmode ++ && TARGET_RECIP_VEC_SQRT ++ && flag_unsafe_math_optimizations ++ && optimize_insn_for_speed_p () ++ && flag_finite_math_only && !flag_trapping_math) ++ { ++ loongarch_emit_swrsqrtsf (operands0, operands1, V8SFmode, 0); ++ DONE; ++ } ++}) ++ ++(define_insn "*sqrt<mode>2" + (set (match_operand:FLASX 0 "register_operand" "=f") + (sqrt:FLASX (match_operand:FLASX 1 "register_operand" "f"))) + "ISA_HAS_LASX" +@@ -1646,7 +1680,20 @@ + (set_attr "type" "simd_fdiv") + (set_attr "mode" "<MODE>")) + +-(define_insn "rsqrt<mode>2" ++(define_expand "rsqrt<mode>2" ++ (set (match_operand:FLASX 0 "register_operand" "=f") ++ (unspec:FLASX (match_operand:FLASX 1 "register_operand" "f") ++ UNSPEC_LASX_XVFRSQRT)) ++ "ISA_HAS_LASX" ++ { ++ if (<MODE>mode == V8SFmode && TARGET_RECIP_VEC_RSQRT) ++ { ++ loongarch_emit_swrsqrtsf (operands0, operands1, V8SFmode, 1); ++ DONE; ++ } ++}) ++ ++(define_insn "*rsqrt<mode>2" + (set (match_operand:FLASX 0 "register_operand" "=f") + (unspec:FLASX (match_operand:FLASX 1 "register_operand" "f") + UNSPEC_LASX_XVFRSQRT)) +diff --git a/gcc/config/loongarch/loongarch-protos.h b/gcc/config/loongarch/loongarch-protos.h +index 51d38177b..117669e9f 100644 +--- a/gcc/config/loongarch/loongarch-protos.h ++++ b/gcc/config/loongarch/loongarch-protos.h +@@ -220,5 +220,7 @@ extern rtx loongarch_gen_const_int_vector_shuffle (machine_mode, int); + extern tree loongarch_build_builtin_va_list (void); + + extern rtx loongarch_build_signbit_mask (machine_mode, bool, bool); ++extern void loongarch_emit_swrsqrtsf (rtx, rtx, machine_mode, bool); ++extern void loongarch_emit_swdivsf (rtx, rtx, rtx, machine_mode); + extern bool loongarch_explicit_relocs_p (enum loongarch_symbol_type); + #endif /* ! GCC_LOONGARCH_PROTOS_H */ +diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc +index 95aa9453b..18326ce47 100644 +--- a/gcc/config/loongarch/loongarch.cc ++++ b/gcc/config/loongarch/loongarch.cc +@@ -7548,6 +7548,71 @@ loongarch_option_override_internal (struct gcc_options *opts, + + /* Function to allocate machine-dependent function status. */ + init_machine_status = &loongarch_init_machine_status; ++ ++ /* -mrecip options. */ ++ static struct ++ { ++ const char *string; /* option name. */ ++ unsigned int mask; /* mask bits to set. */ ++ } ++ const recip_options = { ++ { "all", RECIP_MASK_ALL }, ++ { "none", RECIP_MASK_NONE }, ++ { "div", RECIP_MASK_DIV }, ++ { "sqrt", RECIP_MASK_SQRT }, ++ { "rsqrt", RECIP_MASK_RSQRT }, ++ { "vec-div", RECIP_MASK_VEC_DIV }, ++ { "vec-sqrt", RECIP_MASK_VEC_SQRT }, ++ { "vec-rsqrt", RECIP_MASK_VEC_RSQRT }, ++ }; ++ ++ if (loongarch_recip_name) ++ { ++ char *p = ASTRDUP (loongarch_recip_name); ++ char *q; ++ unsigned int mask, i; ++ bool invert; ++ ++ while ((q = strtok (p, ",")) != NULL) ++ { ++ p = NULL; ++ if (*q == '!') ++ { ++ invert = true; ++ q++; ++ } ++ else ++ invert = false; ++ ++ if (!strcmp (q, "default")) ++ mask = RECIP_MASK_ALL; ++ else ++ { ++ for (i = 0; i < ARRAY_SIZE (recip_options); i++) ++ if (!strcmp (q, recip_optionsi.string)) ++ { ++ mask = recip_optionsi.mask; ++ break; ++ } ++ ++ if (i == ARRAY_SIZE (recip_options)) ++ { ++ error ("unknown option for %<-mrecip=%s%>", q); ++ invert = false; ++ mask = RECIP_MASK_NONE; ++ } ++ } ++ ++ if (invert) ++ recip_mask &= ~mask; ++ else ++ recip_mask |= mask; ++ } ++ } ++ if (loongarch_recip) ++ recip_mask |= RECIP_MASK_ALL; ++ if (!TARGET_FRECIPE) ++ recip_mask = RECIP_MASK_NONE; + } + + +@@ -11470,6 +11535,126 @@ loongarch_build_signbit_mask (machine_mode mode, bool vect, bool invert) + return force_reg (vec_mode, v); + } + ++/* Use rsqrte instruction and Newton-Rhapson to compute the approximation of ++ a single precision floating point reciprocal square root. */ ++ ++void loongarch_emit_swrsqrtsf (rtx res, rtx a, machine_mode mode, bool recip) ++{ ++ rtx x0, e0, e1, e2, mhalf, monehalf; ++ REAL_VALUE_TYPE r; ++ int unspec; ++ ++ x0 = gen_reg_rtx (mode); ++ e0 = gen_reg_rtx (mode); ++ e1 = gen_reg_rtx (mode); ++ e2 = gen_reg_rtx (mode); ++ ++ real_arithmetic (&r, ABS_EXPR, &dconsthalf, NULL); ++ mhalf = const_double_from_real_value (r, SFmode); ++ ++ real_arithmetic (&r, PLUS_EXPR, &dconsthalf, &dconst1); ++ monehalf = const_double_from_real_value (r, SFmode); ++ unspec = UNSPEC_RSQRTE; ++ ++ if (VECTOR_MODE_P (mode)) ++ { ++ mhalf = loongarch_build_const_vector (mode, true, mhalf); ++ monehalf = loongarch_build_const_vector (mode, true, monehalf); ++ unspec = GET_MODE_SIZE (mode) == 32 ? UNSPEC_LASX_XVFRSQRTE ++ : UNSPEC_LSX_VFRSQRTE; ++ } ++ ++ /* rsqrt(a) = rsqrte(a) * (1.5 - 0.5 * a * rsqrte(a) * rsqrte(a)) ++ sqrt(a) = a * rsqrte(a) * (1.5 - 0.5 * a * rsqrte(a) * rsqrte(a)) */ ++ ++ a = force_reg (mode, a); ++ ++ /* x0 = rsqrt(a) estimate. */ ++ emit_insn (gen_rtx_SET (x0, gen_rtx_UNSPEC (mode, gen_rtvec (1, a), ++ unspec))); ++ ++ /* If (a == 0.0) Filter out infinity to prevent NaN for sqrt(0.0). */ ++ if (!recip) ++ { ++ rtx zero = force_reg (mode, CONST0_RTX (mode)); ++ ++ if (VECTOR_MODE_P (mode)) ++ { ++ machine_mode imode = related_int_vector_mode (mode).require (); ++ rtx mask = gen_reg_rtx (imode); ++ emit_insn (gen_rtx_SET (mask, gen_rtx_NE (imode, a, zero))); ++ emit_insn (gen_rtx_SET (x0, gen_rtx_AND (mode, x0, ++ gen_lowpart (mode, mask)))); ++ } ++ else ++ { ++ rtx target = emit_conditional_move (x0, { GT, a, zero, mode }, ++ x0, zero, mode, 0); ++ if (target != x0) ++ emit_move_insn (x0, target); ++ } ++ } ++ ++ /* e0 = x0 * a */ ++ emit_insn (gen_rtx_SET (e0, gen_rtx_MULT (mode, x0, a))); ++ /* e1 = e0 * x0 */ ++ emit_insn (gen_rtx_SET (e1, gen_rtx_MULT (mode, e0, x0))); ++ ++ /* e2 = 1.5 - e1 * 0.5 */ ++ mhalf = force_reg (mode, mhalf); ++ monehalf = force_reg (mode, monehalf); ++ emit_insn (gen_rtx_SET (e2, gen_rtx_FMA (mode, ++ gen_rtx_NEG (mode, e1), ++ mhalf, monehalf))); ++ ++ if (recip) ++ /* res = e2 * x0 */ ++ emit_insn (gen_rtx_SET (res, gen_rtx_MULT (mode, x0, e2))); ++ else ++ /* res = e2 * e0 */ ++ emit_insn (gen_rtx_SET (res, gen_rtx_MULT (mode, e2, e0))); ++} ++ ++/* Use recipe instruction and Newton-Rhapson to compute the approximation of ++ a single precision floating point divide. */ ++ ++void loongarch_emit_swdivsf (rtx res, rtx a, rtx b, machine_mode mode) ++{ ++ rtx x0, e0, mtwo; ++ REAL_VALUE_TYPE r; ++ x0 = gen_reg_rtx (mode); ++ e0 = gen_reg_rtx (mode); ++ int unspec = UNSPEC_RECIPE; ++ ++ real_arithmetic (&r, ABS_EXPR, &dconst2, NULL); ++ mtwo = const_double_from_real_value (r, SFmode); ++ ++ if (VECTOR_MODE_P (mode)) ++ { ++ mtwo = loongarch_build_const_vector (mode, true, mtwo); ++ unspec = GET_MODE_SIZE (mode) == 32 ? UNSPEC_LASX_XVFRECIPE ++ : UNSPEC_LSX_VFRECIPE; ++ } ++ ++ mtwo = force_reg (mode, mtwo); ++ ++ /* a / b = a * recipe(b) * (2.0 - b * recipe(b)) */ ++ ++ /* x0 = 1./b estimate. */ ++ emit_insn (gen_rtx_SET (x0, gen_rtx_UNSPEC (mode, gen_rtvec (1, b), ++ unspec))); ++ /* 2.0 - b * x0 */ ++ emit_insn (gen_rtx_SET (e0, gen_rtx_FMA (mode, ++ gen_rtx_NEG (mode, b), x0, mtwo))); ++ ++ /* x0 = a * x0 */ ++ if (a != CONST1_RTX (mode)) ++ emit_insn (gen_rtx_SET (x0, gen_rtx_MULT (mode, a, x0))); ++ ++ /* res = e0 * x0 */ ++ emit_insn (gen_rtx_SET (res, gen_rtx_MULT (mode, e0, x0))); ++} ++ + static bool + loongarch_builtin_support_vector_misalignment (machine_mode mode, + const_tree type, +@@ -11665,6 +11850,9 @@ loongarch_asm_code_end (void) + #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES \ + loongarch_autovectorize_vector_modes + ++#undef TARGET_OPTAB_SUPPORTED_P ++#define TARGET_OPTAB_SUPPORTED_P loongarch_optab_supported_p ++ + #undef TARGET_INIT_BUILTINS + #define TARGET_INIT_BUILTINS loongarch_init_builtins + #undef TARGET_BUILTIN_DECL +diff --git a/gcc/config/loongarch/loongarch.h b/gcc/config/loongarch/loongarch.h +index 8b28be0e4..fbc0f53e4 100644 +--- a/gcc/config/loongarch/loongarch.h ++++ b/gcc/config/loongarch/loongarch.h +@@ -702,6 +702,24 @@ enum reg_class + && (GET_MODE_CLASS (MODE) == MODE_VECTOR_INT \ + || GET_MODE_CLASS (MODE) == MODE_VECTOR_FLOAT)) + ++#define RECIP_MASK_NONE 0x00 ++#define RECIP_MASK_DIV 0x01 ++#define RECIP_MASK_SQRT 0x02 ++#define RECIP_MASK_RSQRT 0x04 ++#define RECIP_MASK_VEC_DIV 0x08 ++#define RECIP_MASK_VEC_SQRT 0x10 ++#define RECIP_MASK_VEC_RSQRT 0x20 ++#define RECIP_MASK_ALL (RECIP_MASK_DIV | RECIP_MASK_SQRT \ ++ | RECIP_MASK_RSQRT | RECIP_MASK_VEC_SQRT \ ++ | RECIP_MASK_VEC_DIV | RECIP_MASK_VEC_RSQRT) ++ ++#define TARGET_RECIP_DIV ((recip_mask & RECIP_MASK_DIV) != 0 || TARGET_uARCH_LA664) ++#define TARGET_RECIP_SQRT ((recip_mask & RECIP_MASK_SQRT) != 0 || TARGET_uARCH_LA664) ++#define TARGET_RECIP_RSQRT ((recip_mask & RECIP_MASK_RSQRT) != 0 || TARGET_uARCH_LA664) ++#define TARGET_RECIP_VEC_DIV ((recip_mask & RECIP_MASK_VEC_DIV) != 0 || TARGET_uARCH_LA664) ++#define TARGET_RECIP_VEC_SQRT ((recip_mask & RECIP_MASK_VEC_SQRT) != 0 || TARGET_uARCH_LA664) ++#define TARGET_RECIP_VEC_RSQRT ((recip_mask & RECIP_MASK_VEC_RSQRT) != 0 || TARGET_uARCH_LA664) ++ + /* 1 if N is a possible register number for function argument passing. + We have no FP argument registers when soft-float. */ + +diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md +index 4dfe583e2..c6edd1dda 100644 +--- a/gcc/config/loongarch/loongarch.md ++++ b/gcc/config/loongarch/loongarch.md +@@ -893,9 +893,21 @@ + ;; Float division and modulus. + (define_expand "div<mode>3" + (set (match_operand:ANYF 0 "register_operand") +- (div:ANYF (match_operand:ANYF 1 "reg_or_1_operand") +- (match_operand:ANYF 2 "register_operand"))) +- "") ++ (div:ANYF (match_operand:ANYF 1 "reg_or_1_operand") ++ (match_operand:ANYF 2 "register_operand"))) ++ "" ++{ ++ if (<MODE>mode == SFmode ++ && TARGET_RECIP_DIV ++ && optimize_insn_for_speed_p () ++ && flag_finite_math_only && !flag_trapping_math ++ && flag_unsafe_math_optimizations) ++ { ++ loongarch_emit_swdivsf (operands0, operands1, ++ operands2, SFmode); ++ DONE; ++ } ++}) + + (define_insn "*div<mode>3" + (set (match_operand:ANYF 0 "register_operand" "=f") +@@ -1126,7 +1138,23 @@ + ;; + ;; .................... + +-(define_insn "sqrt<mode>2" ++(define_expand "sqrt<mode>2" ++ (set (match_operand:ANYF 0 "register_operand") ++ (sqrt:ANYF (match_operand:ANYF 1 "register_operand"))) ++ "" ++ { ++ if (<MODE>mode == SFmode ++ && TARGET_RECIP_SQRT ++ && flag_unsafe_math_optimizations ++ && !optimize_insn_for_size_p () ++ && flag_finite_math_only && !flag_trapping_math) ++ { ++ loongarch_emit_swrsqrtsf (operands0, operands1, SFmode, 0); ++ DONE; ++ } ++ }) ++ ++(define_insn "*sqrt<mode>2" + (set (match_operand:ANYF 0 "register_operand" "=f") + (sqrt:ANYF (match_operand:ANYF 1 "register_operand" "f"))) + "" +@@ -1135,6 +1163,19 @@ + (set_attr "mode" "<UNITMODE>") + (set_attr "insn_count" "1")) + ++(define_expand "rsqrt<mode>2" ++ (set (match_operand:ANYF 0 "register_operand") ++ (unspec:ANYF (match_operand:ANYF 1 "register_operand") ++ UNSPEC_RSQRT)) ++ "TARGET_HARD_FLOAT" ++{ ++ if (<MODE>mode == SFmode && TARGET_RECIP_RSQRT) ++ { ++ loongarch_emit_swrsqrtsf (operands0, operands1, SFmode, 1); ++ DONE; ++ } ++}) ++ + (define_insn "*rsqrt<mode>2" + (set (match_operand:ANYF 0 "register_operand" "=f") + (unspec:ANYF (match_operand:ANYF 1 "register_operand" "f") +diff --git a/gcc/config/loongarch/loongarch.opt b/gcc/config/loongarch/loongarch.opt +index e7bc8bed4..56f6a9564 100644 +--- a/gcc/config/loongarch/loongarch.opt ++++ b/gcc/config/loongarch/loongarch.opt +@@ -31,6 +31,9 @@ config/loongarch/loongarch-opts.h + HeaderInclude + config/loongarch/loongarch-str.h + ++TargetVariable ++unsigned int recip_mask = 0 ++ + ; ISA related options + ;; Base ISA + Enum +@@ -202,6 +205,14 @@ mexplicit-relocs + Target Var(la_opt_explicit_relocs_backward) Init(M_OPT_UNSET) + Use %reloc() assembly operators (for backward compatibility). + ++mrecip ++Target RejectNegative Var(loongarch_recip) ++Generate approximate reciprocal divide and square root for better throughput. ++ ++mrecip= ++Target RejectNegative Joined Var(loongarch_recip_name) ++Control generation of reciprocal estimates. ++ + ; The code model option names for -mcmodel. + Enum + Name(cmodel) Type(int) +diff --git a/gcc/config/loongarch/lsx.md b/gcc/config/loongarch/lsx.md +index f2774f021..dbdb42301 100644 +--- a/gcc/config/loongarch/lsx.md ++++ b/gcc/config/loongarch/lsx.md +@@ -1083,7 +1083,25 @@ + (set_attr "type" "simd_fmul") + (set_attr "mode" "<MODE>")) + +-(define_insn "div<mode>3" ++(define_expand "div<mode>3" ++ (set (match_operand:FLSX 0 "register_operand") ++ (div:FLSX (match_operand:FLSX 1 "reg_or_vecotr_1_operand") ++ (match_operand:FLSX 2 "register_operand"))) ++ "ISA_HAS_LSX" ++{ ++ if (<MODE>mode == V4SFmode ++ && TARGET_RECIP_VEC_DIV ++ && optimize_insn_for_speed_p () ++ && flag_finite_math_only && !flag_trapping_math ++ && flag_unsafe_math_optimizations) ++ { ++ loongarch_emit_swdivsf (operands0, operands1, ++ operands2, V4SFmode); ++ DONE; ++ } ++}) ++ ++(define_insn "*div<mode>3" + (set (match_operand:FLSX 0 "register_operand" "=f") + (div:FLSX (match_operand:FLSX 1 "register_operand" "f") + (match_operand:FLSX 2 "register_operand" "f"))) +@@ -1112,7 +1130,23 @@ + (set_attr "type" "simd_fmadd") + (set_attr "mode" "<MODE>")) + +-(define_insn "sqrt<mode>2" ++(define_expand "sqrt<mode>2" ++ (set (match_operand:FLSX 0 "register_operand") ++ (sqrt:FLSX (match_operand:FLSX 1 "register_operand"))) ++ "ISA_HAS_LSX" ++{ ++ if (<MODE>mode == V4SFmode ++ && TARGET_RECIP_VEC_SQRT ++ && flag_unsafe_math_optimizations ++ && optimize_insn_for_speed_p () ++ && flag_finite_math_only && !flag_trapping_math) ++ { ++ loongarch_emit_swrsqrtsf (operands0, operands1, V4SFmode, 0); ++ DONE; ++ } ++}) ++ ++(define_insn "*sqrt<mode>2" + (set (match_operand:FLSX 0 "register_operand" "=f") + (sqrt:FLSX (match_operand:FLSX 1 "register_operand" "f"))) + "ISA_HAS_LSX" +@@ -1559,7 +1593,20 @@ + (set_attr "type" "simd_fdiv") + (set_attr "mode" "<MODE>")) + +-(define_insn "rsqrt<mode>2" ++(define_expand "rsqrt<mode>2" ++ (set (match_operand:FLSX 0 "register_operand" "=f") ++ (unspec:FLSX (match_operand:FLSX 1 "register_operand" "f") ++ UNSPEC_LSX_VFRSQRT)) ++ "ISA_HAS_LSX" ++{ ++ if (<MODE>mode == V4SFmode && TARGET_RECIP_VEC_RSQRT) ++ { ++ loongarch_emit_swrsqrtsf (operands0, operands1, V4SFmode, 1); ++ DONE; ++ } ++}) ++ ++(define_insn "*rsqrt<mode>2" + (set (match_operand:FLSX 0 "register_operand" "=f") + (unspec:FLSX (match_operand:FLSX 1 "register_operand" "f") + UNSPEC_LSX_VFRSQRT)) +diff --git a/gcc/config/loongarch/predicates.md b/gcc/config/loongarch/predicates.md +index 572550dbc..88e54c915 100644 +--- a/gcc/config/loongarch/predicates.md ++++ b/gcc/config/loongarch/predicates.md +@@ -235,6 +235,10 @@ + (ior (match_operand 0 "const_1_operand") + (match_operand 0 "register_operand"))) + ++(define_predicate "reg_or_vecotr_1_operand" ++ (ior (match_operand 0 "const_vector_1_operand") ++ (match_operand 0 "register_operand"))) ++ + ;; These are used in vec_merge, hence accept bitmask as const_int. + (define_predicate "const_exp_2_operand" + (and (match_code "const_int") +diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi +index 168f3d0db..76a8f20d1 100644 +--- a/gcc/doc/invoke.texi ++++ b/gcc/doc/invoke.texi +@@ -1008,7 +1008,8 @@ Objective-C and Objective-C++ Dialects}. + -mmax-inline-memcpy-size=@var{n} @gol + -mexplicit-relocs -mno-explicit-relocs @gol + -mdirect-extern-access -mno-direct-extern-access @gol +--mcmodel=@var{code-model}} ++-mcmodel=@var{code-model} -mrelax -mpass-mrelax-to-as} @gol ++-mrecip -mrecip=@var{opt} + + @emph{M32R/D Options} + @gccoptlist{-m32r2 -m32rx -m32r @gol +@@ -24633,6 +24634,58 @@ kernels, executables linked with @option{-static} or @option{-static-pie}. + @option{-mdirect-extern-access} is not compatible with @option{-fPIC} or + @option{-fpic}. + ++@opindex mrecip ++@item -mrecip ++This option enables use of the reciprocal estimate and reciprocal square ++root estimate instructions with additional Newton-Raphson steps to increase ++precision instead of doing a divide or square root and divide for ++floating-point arguments. ++These instructions are generated only when @option{-funsafe-math-optimizations} ++is enabled together with @option{-ffinite-math-only} and ++@option{-fno-trapping-math}. ++This option is off by default. Before you can use this option, you must sure the ++target CPU supports frecipe and frsqrte instructions. ++Note that while the throughput of the sequence is higher than the throughput of ++the non-reciprocal instruction, the precision of the sequence can be decreased ++by up to 2 ulp (i.e. the inverse of 1.0 equals 0.99999994). ++ ++@opindex mrecip=opt ++@item -mrecip=@var{opt} ++This option controls which reciprocal estimate instructions ++may be used. @var{opt} is a comma-separated list of options, which may ++be preceded by a @samp{!} to invert the option: ++@table @samp ++@item all ++Enable all estimate instructions. ++ ++@item default ++Enable the default instructions, equivalent to @option{-mrecip}. ++ ++@item none ++Disable all estimate instructions, equivalent to @option{-mno-recip}. ++ ++@item div ++Enable the approximation for scalar division. ++ ++@item vec-div ++Enable the approximation for vectorized division. ++ ++@item sqrt ++Enable the approximation for scalar square root. ++ ++@item vec-sqrt ++Enable the approximation for vectorized square root. ++ ++@item rsqrt ++Enable the approximation for scalar reciprocal square root. ++ ++@item vec-rsqrt ++Enable the approximation for vectorized reciprocal square root. ++@end table ++ ++So, for example, @option{-mrecip=all,!sqrt} enables ++all of the reciprocal approximations, except for scalar square root. ++ + @item loongarch-vect-unroll-limit + The vectorizer will use available tuning information to determine whether it + would be beneficial to unroll the main vectorized loop and by how much. This +diff --git a/gcc/testsuite/gcc.target/loongarch/divf.c b/gcc/testsuite/gcc.target/loongarch/divf.c +new file mode 100644 +index 000000000..6c831817c +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/divf.c +@@ -0,0 +1,10 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -ffast-math -mrecip -mfrecipe -fno-unsafe-math-optimizations" } */ ++/* { dg-final { scan-assembler "fdiv.s" } } */ ++/* { dg-final { scan-assembler-not "frecipe.s" } } */ ++ ++float ++foo(float a, float b) ++{ ++ return a / b; ++} +diff --git a/gcc/testsuite/gcc.target/loongarch/recip-divf.c b/gcc/testsuite/gcc.target/loongarch/recip-divf.c +new file mode 100644 +index 000000000..db5e3e488 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/recip-divf.c +@@ -0,0 +1,9 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -ffast-math -mrecip -mfrecipe" } */ ++/* { dg-final { scan-assembler "frecipe.s" } } */ ++ ++float ++foo(float a, float b) ++{ ++ return a / b; ++} +diff --git a/gcc/testsuite/gcc.target/loongarch/recip-sqrtf.c b/gcc/testsuite/gcc.target/loongarch/recip-sqrtf.c +new file mode 100644 +index 000000000..7f45db6cd +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/recip-sqrtf.c +@@ -0,0 +1,23 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -ffast-math -mrecip -mfrecipe" } */ ++/* { dg-final { scan-assembler-times "frsqrte.s" 3 } } */ ++ ++extern float sqrtf (float); ++ ++float ++foo1 (float a, float b) ++{ ++ return a/sqrtf(b); ++} ++ ++float ++foo2 (float a, float b) ++{ ++ return sqrtf(a/b); ++} ++ ++float ++foo3 (float a) ++{ ++ return sqrtf(a); ++} +diff --git a/gcc/testsuite/gcc.target/loongarch/sqrtf.c b/gcc/testsuite/gcc.target/loongarch/sqrtf.c +new file mode 100644 +index 000000000..c2720faac +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/sqrtf.c +@@ -0,0 +1,24 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -ffast-math -mrecip -mfrecipe -fno-unsafe-math-optimizations" } */ ++/* { dg-final { scan-assembler-times "fsqrt.s" 3 } } */ ++/* { dg-final { scan-assembler-not "frsqrte.s" } } */ ++ ++extern float sqrtf (float); ++ ++float ++foo1 (float a, float b) ++{ ++ return a/sqrtf(b); ++} ++ ++float ++foo2 (float a, float b) ++{ ++ return sqrtf(a/b); ++} ++ ++float ++foo3 (float a) ++{ ++ return sqrtf(a); ++} +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-divf.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-divf.c +new file mode 100644 +index 000000000..748a82200 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-divf.c +@@ -0,0 +1,13 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -mrecip -mlasx -mfrecipe -fno-unsafe-math-optimizations" } */ ++/* { dg-final { scan-assembler "xvfdiv.s" } } */ ++/* { dg-final { scan-assembler-not "xvfrecipe.s" } } */ ++ ++float a8,b8,c8; ++ ++void ++foo () ++{ ++ for (int i = 0; i < 8; i++) ++ ci = ai / bi; ++} +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-recip-divf.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-recip-divf.c +new file mode 100644 +index 000000000..6532756f0 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-recip-divf.c +@@ -0,0 +1,12 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -ffast-math -mrecip -mlasx -mfrecipe" } */ ++/* { dg-final { scan-assembler "xvfrecipe.s" } } */ ++ ++float a8,b8,c8; ++ ++void ++foo () ++{ ++ for (int i = 0; i < 8; i++) ++ ci = ai / bi; ++} +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-recip-sqrtf.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-recip-sqrtf.c +new file mode 100644 +index 000000000..a623dff8f +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-recip-sqrtf.c +@@ -0,0 +1,28 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -ffast-math -mrecip -mlasx -mfrecipe" } */ ++/* { dg-final { scan-assembler-times "xvfrsqrte.s" 3 } } */ ++ ++float a8, b8, c8; ++ ++extern float sqrtf (float); ++ ++void ++foo1 (void) ++{ ++ for (int i = 0; i < 8; i++) ++ ci = ai / sqrtf (bi); ++} ++ ++void ++foo2 (void) ++{ ++ for (int i = 0; i < 8; i++) ++ ci = sqrtf (ai / bi); ++} ++ ++void ++foo3 (void) ++{ ++ for (int i = 0; i < 8; i++) ++ ci = sqrtf (ai); ++} +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-recip.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-recip.c +new file mode 100644 +index 000000000..083c86840 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-recip.c +@@ -0,0 +1,24 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -mlasx -fno-vect-cost-model" } */ ++/* { dg-final { scan-assembler "xvfrecip.s" } } */ ++/* { dg-final { scan-assembler "xvfrecip.d" } } */ ++/* { dg-final { scan-assembler-not "xvfdiv.s" } } */ ++/* { dg-final { scan-assembler-not "xvfdiv.d" } } */ ++ ++float a8, b8; ++ ++void ++foo1(void) ++{ ++ for (int i = 0; i < 8; i++) ++ ai = 1 / (bi); ++} ++ ++double da4, db4; ++ ++void ++foo2(void) ++{ ++ for (int i = 0; i < 4; i++) ++ dai = 1 / (dbi); ++} +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-sqrtf.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-sqrtf.c +new file mode 100644 +index 000000000..a005a3886 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-sqrtf.c +@@ -0,0 +1,29 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -ffast-math -fno-unsafe-math-optimizations -mrecip -mlasx -mfrecipe" } */ ++/* { dg-final { scan-assembler-times "xvfsqrt.s" 3 } } */ ++/* { dg-final { scan-assembler-not "xvfrsqrte.s" } } */ ++ ++float a8, b8, c8; ++ ++extern float sqrtf (float); ++ ++void ++foo1 (void) ++{ ++ for (int i = 0; i < 8; i++) ++ ci = ai / sqrtf (bi); ++} ++ ++void ++foo2 (void) ++{ ++ for (int i = 0; i < 8; i++) ++ ci = sqrtf (ai / bi); ++} ++ ++void ++foo3 (void) ++{ ++ for (int i = 0; i < 8; i++) ++ ci = sqrtf (ai); ++} +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-divf.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-divf.c +new file mode 100644 +index 000000000..1219b1ef8 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-divf.c +@@ -0,0 +1,13 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -ffast-math -mrecip -mlsx -mfrecipe -fno-unsafe-math-optimizations" } */ ++/* { dg-final { scan-assembler "vfdiv.s" } } */ ++/* { dg-final { scan-assembler-not "vfrecipe.s" } } */ ++ ++float a4,b4,c4; ++ ++void ++foo () ++{ ++ for (int i = 0; i < 4; i++) ++ ci = ai / bi; ++} +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-recip-divf.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-recip-divf.c +new file mode 100644 +index 000000000..edbe8d909 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-recip-divf.c +@@ -0,0 +1,12 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -ffast-math -mrecip -mlsx -mfrecipe" } */ ++/* { dg-final { scan-assembler "vfrecipe.s" } } */ ++ ++float a4,b4,c4; ++ ++void ++foo () ++{ ++ for (int i = 0; i < 4; i++) ++ ci = ai / bi; ++} +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-recip-sqrtf.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-recip-sqrtf.c +new file mode 100644 +index 000000000..d356f915e +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-recip-sqrtf.c +@@ -0,0 +1,28 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -ffast-math -mrecip -mlsx -mfrecipe" } */ ++/* { dg-final { scan-assembler-times "vfrsqrte.s" 3 } } */ ++ ++float a4, b4, c4; ++ ++extern float sqrtf (float); ++ ++void ++foo1 (void) ++{ ++ for (int i = 0; i < 4; i++) ++ ci = ai / sqrtf (bi); ++} ++ ++void ++foo2 (void) ++{ ++ for (int i = 0; i < 4; i++) ++ ci = sqrtf (ai / bi); ++} ++ ++void ++foo3 (void) ++{ ++ for (int i = 0; i < 4; i++) ++ ci = sqrtf (ai); ++} +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-recip.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-recip.c +new file mode 100644 +index 000000000..c4d6af4db +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-recip.c +@@ -0,0 +1,24 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -mlsx -fno-vect-cost-model" } */ ++/* { dg-final { scan-assembler "vfrecip.s" } } */ ++/* { dg-final { scan-assembler "vfrecip.d" } } */ ++/* { dg-final { scan-assembler-not "vfdiv.s" } } */ ++/* { dg-final { scan-assembler-not "vfdiv.d" } } */ ++ ++float a4, b4; ++ ++void ++foo1(void) ++{ ++ for (int i = 0; i < 4; i++) ++ ai = 1 / (bi); ++} ++ ++double da2, db2; ++ ++void ++foo2(void) ++{ ++ for (int i = 0; i < 2; i++) ++ dai = 1 / (dbi); ++} +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-sqrtf.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-sqrtf.c +new file mode 100644 +index 000000000..3ff6570a6 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-sqrtf.c +@@ -0,0 +1,29 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -ffast-math -mrecip -mlsx -mfrecipe -fno-unsafe-math-optimizations" } */ ++/* { dg-final { scan-assembler-times "vfsqrt.s" 3 } } */ ++/* { dg-final { scan-assembler-not "vfrsqrte.s" } } */ ++ ++float a4, b4, c4; ++ ++extern float sqrtf (float); ++ ++void ++foo1 (void) ++{ ++ for (int i = 0; i < 4; i++) ++ ci = ai / sqrtf (bi); ++} ++ ++void ++foo2 (void) ++{ ++ for (int i = 0; i < 4; i++) ++ ci = sqrtf (ai / bi); ++} ++ ++void ++foo3 (void) ++{ ++ for (int i = 0; i < 4; i++) ++ ci = sqrtf (ai); ++} +-- +2.43.0 +
View file
_service:tar_scm:0063-LoongArch-Vectorized-loop-unrolling-is-disable-for-d.patch
Added
@@ -0,0 +1,83 @@ +From bb211ae35474a9fa1a8189f0a4c525ce3d8c280e Mon Sep 17 00:00:00 2001 +From: Jiahao Xu <xujiahao@loongson.cn> +Date: Wed, 6 Dec 2023 15:04:53 +0800 +Subject: PATCH 063/188 LoongArch: Vectorized loop unrolling is disable for + divf/sqrtf/rsqrtf when -mrecip is enabled. + +Using -mrecip generates a sequence of instructions to replace divf, sqrtf and rsqrtf. The number +of generated instructions is close to or exceeds the maximum issue instructions per cycle of the +LoongArch, so vectorized loop unrolling is not performed on them. + +gcc/ChangeLog: + + * config/loongarch/loongarch.cc (loongarch_vector_costs::determine_suggested_unroll_factor): + If m_has_recip is true, uf return 1. + (loongarch_vector_costs::add_stmt_cost): Detect the use of approximate instruction sequence. +--- + gcc/config/loongarch/loongarch.cc | 36 +++++++++++++++++++++++++++++-- + 1 file changed, 34 insertions(+), 2 deletions(-) + +diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc +index 18326ce47..d64777179 100644 +--- a/gcc/config/loongarch/loongarch.cc ++++ b/gcc/config/loongarch/loongarch.cc +@@ -3970,7 +3970,9 @@ protected: + /* Reduction factor for suggesting unroll factor. */ + unsigned m_reduc_factor = 0; + /* True if the loop contains an average operation. */ +- bool m_has_avg =false; ++ bool m_has_avg = false; ++ /* True if the loop uses approximation instruction sequence. */ ++ bool m_has_recip = false; + }; + + /* Implement TARGET_VECTORIZE_CREATE_COSTS. */ +@@ -4017,7 +4019,7 @@ loongarch_vector_costs::determine_suggested_unroll_factor (loop_vec_info loop_vi + { + class loop *loop = LOOP_VINFO_LOOP (loop_vinfo); + +- if (m_has_avg) ++ if (m_has_avg || m_has_recip) + return 1; + + /* Don't unroll if it's specified explicitly not to be unrolled. */ +@@ -4077,6 +4079,36 @@ loongarch_vector_costs::add_stmt_cost (int count, vect_cost_for_stmt kind, + } + } + ++ combined_fn cfn; ++ if (kind == vector_stmt ++ && stmt_info ++ && stmt_info->stmt) ++ { ++ /* Detect the use of approximate instruction sequence. */ ++ if ((TARGET_RECIP_VEC_SQRT || TARGET_RECIP_VEC_RSQRT) ++ && (cfn = gimple_call_combined_fn (stmt_info->stmt)) != CFN_LAST) ++ switch (cfn) ++ { ++ case CFN_BUILT_IN_SQRTF: ++ m_has_recip = true; ++ default: ++ break; ++ } ++ else if (TARGET_RECIP_VEC_DIV ++ && gimple_code (stmt_info->stmt) == GIMPLE_ASSIGN) ++ { ++ machine_mode mode = TYPE_MODE (vectype); ++ switch (gimple_assign_rhs_code (stmt_info->stmt)) ++ { ++ case RDIV_EXPR: ++ if (GET_MODE_INNER (mode) == SFmode) ++ m_has_recip = true; ++ default: ++ break; ++ } ++ } ++ } ++ + return retval; + } + +-- +2.43.0 +
View file
_service:tar_scm:0064-LoongArch-Fix-lsx-vshuf.c-and-lasx-xvshuf_b.c-tests-.patch
Added
@@ -0,0 +1,130 @@ +From 6ca9670e02a7d3f939b1a75f7b5a9094cd1db909 Mon Sep 17 00:00:00 2001 +From: Jiahao Xu <xujiahao@loongson.cn> +Date: Fri, 25 Oct 2024 02:45:35 +0000 +Subject: PATCH 064/188 LoongArch: Fix lsx-vshuf.c and lasx-xvshuf_b.c tests + fail on LA664 PR112611 + +For xvshuf instructions, if the index value in the selector exceeds 63, it triggers +undefined behavior on LA464, but not on LA664. To ensure compatibility of these two +tests on both LA464 and LA664, we have modified both tests to ensure that the index +value in the selector does not exceed 63. + +gcc/testsuite/ChangeLog: + + PR target/112611 + * gcc.target/loongarch/vector/lasx/lasx-xvshuf_b.c: Sure index less than 64. + * gcc.target/loongarch/vector/lsx/lsx-vshuf.c: Ditto. +--- + .../loongarch/vector/lasx/lasx-xvshuf_b.c | 14 +++++++------- + .../gcc.target/loongarch/vector/lsx/lsx-vshuf.c | 12 ++++++------ + 2 files changed, 13 insertions(+), 13 deletions(-) + +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvshuf_b.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvshuf_b.c +index b8ab38711..910d29339 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvshuf_b.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvshuf_b.c +@@ -99,9 +99,9 @@ main () + *((unsigned long *)&__m256i_op12) = 0x7ff0000000000000; + *((unsigned long *)&__m256i_op11) = 0x7ff0000000000000; + *((unsigned long *)&__m256i_op10) = 0x7ff0000000000000; +- *((unsigned long *)&__m256i_op23) = 0x3ff0010000000000; ++ *((unsigned long *)&__m256i_op23) = 0x3f11010000000000; + *((unsigned long *)&__m256i_op22) = 0x0000000000000000; +- *((unsigned long *)&__m256i_op21) = 0x3ff0010000000000; ++ *((unsigned long *)&__m256i_op21) = 0x3f11010000000000; + *((unsigned long *)&__m256i_op20) = 0x0000000000000000; + *((unsigned long *)&__m256i_result3) = 0x0000000000000000; + *((unsigned long *)&__m256i_result2) = 0x0000000000000000; +@@ -200,7 +200,7 @@ main () + *((unsigned long *)&__m256i_op20) = 0x0000000000000000; + *((unsigned long *)&__m256i_result3) = 0x0000000000000000; + *((unsigned long *)&__m256i_result2) = 0x0000000000000000; +- *((unsigned long *)&__m256i_result1) = 0x0000000000000000; ++ *((unsigned long *)&__m256i_result1) = 0xffffffff00000000; + *((unsigned long *)&__m256i_result0) = 0x0000000000000000; + __m256i_out = __lasx_xvshuf_h (__m256i_op0, __m256i_op1, __m256i_op2); + ASSERTEQ_64 (__LINE__, __m256i_result, __m256i_out); +@@ -351,7 +351,7 @@ main () + *((unsigned long *)&__m256i_op21) = 0x0000000000000001; + *((unsigned long *)&__m256i_op20) = 0x00000000012e2110; + *((unsigned long *)&__m256i_result3) = 0x0000000000000001; +- *((unsigned long *)&__m256i_result2) = 0x0000000200000000; ++ *((unsigned long *)&__m256i_result2) = 0x0000000000000000; + *((unsigned long *)&__m256i_result1) = 0x00000000012e2110; + *((unsigned long *)&__m256i_result0) = 0x0000000000000000; + __m256i_out = __lasx_xvshuf_w (__m256i_op0, __m256i_op1, __m256i_op2); +@@ -426,10 +426,10 @@ main () + *((unsigned long *)&__m256i_op22) = 0x8000000080000000; + *((unsigned long *)&__m256i_op21) = 0xdfffffffdfffffff; + *((unsigned long *)&__m256i_op20) = 0x8000000080000000; +- *((unsigned long *)&__m256i_result3) = 0x8000000080000000; ++ *((unsigned long *)&__m256i_result3) = 0xdfffffff80000000; + *((unsigned long *)&__m256i_result2) = 0x7fc00000dfffffff; +- *((unsigned long *)&__m256i_result1) = 0x8000000080000000; +- *((unsigned long *)&__m256i_result0) = 0x8000000080000000; ++ *((unsigned long *)&__m256i_result1) = 0x7fc0000000000000; ++ *((unsigned long *)&__m256i_result0) = 0x8000000000000000; + __m256i_out = __lasx_xvshuf_w (__m256i_op0, __m256i_op1, __m256i_op2); + ASSERTEQ_64 (__LINE__, __m256i_result, __m256i_out); + +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vshuf.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vshuf.c +index f3b800f88..93a3078fa 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vshuf.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vshuf.c +@@ -33,7 +33,7 @@ main () + *((unsigned long *)&__m128i_op21) = 0x0000000000000000; + *((unsigned long *)&__m128i_op20) = 0x3f2f1f0f00000000; + *((unsigned long *)&__m128i_result1) = 0x0000000000000000; +- *((unsigned long *)&__m128i_result0) = 0x0000000000000000; ++ *((unsigned long *)&__m128i_result0) = 0x00ff00ff00000000; + __m128i_out = __lsx_vshuf_b (__m128i_op0, __m128i_op1, __m128i_op2); + ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out); + +@@ -153,7 +153,7 @@ main () + *((unsigned long *)&__m128i_op10) = 0x000000002bfd9461; + *((unsigned long *)&__m128i_op21) = 0x00007fff00007fff; + *((unsigned long *)&__m128i_op20) = 0x0000000000000000; +- *((unsigned long *)&__m128i_result1) = 0x0000000000000000; ++ *((unsigned long *)&__m128i_result1) = 0x00007fff00000000; + *((unsigned long *)&__m128i_result0) = 0x0000000000000000; + __m128i_out = __lsx_vshuf_h (__m128i_op0, __m128i_op1, __m128i_op2); + ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out); +@@ -198,7 +198,7 @@ main () + *((unsigned long *)&__m128i_op21) = 0x00000000000000c0; + *((unsigned long *)&__m128i_op20) = 0x00000001ffffff29; + *((unsigned long *)&__m128i_result1) = 0xffffff29ffffff29; +- *((unsigned long *)&__m128i_result0) = 0x0000000100000001; ++ *((unsigned long *)&__m128i_result0) = 0xffffff2900000001; + __m128i_out = __lsx_vshuf_w (__m128i_op0, __m128i_op1, __m128i_op2); + ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out); + +@@ -219,7 +219,7 @@ main () + *((unsigned long *)&__m128i_op10) = 0x0000000000000000; + *((unsigned long *)&__m128i_op21) = 0x0000000020000020; + *((unsigned long *)&__m128i_op20) = 0x0000000020000020; +- *((unsigned long *)&__m128i_result1) = 0x2000002000000000; ++ *((unsigned long *)&__m128i_result1) = 0x0000000000000000; + *((unsigned long *)&__m128i_result0) = 0x2000002020000020; + __m128i_out = __lsx_vshuf_w (__m128i_op0, __m128i_op1, __m128i_op2); + ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out); +@@ -241,7 +241,7 @@ main () + *((unsigned long *)&__m128i_op10) = 0x0000001000000010; + *((unsigned long *)&__m128i_op21) = 0x8000000100000000; + *((unsigned long *)&__m128i_op20) = 0x8000000000000103; +- *((unsigned long *)&__m128i_result1) = 0x0000010300000103; ++ *((unsigned long *)&__m128i_result1) = 0x8000000000000103; + *((unsigned long *)&__m128i_result0) = 0x0000010380000001; + __m128i_out = __lsx_vshuf_w (__m128i_op0, __m128i_op1, __m128i_op2); + ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out); +@@ -252,7 +252,7 @@ main () + *((unsigned long *)&__m128i_op10) = 0x0000000000000000; + *((unsigned long *)&__m128i_op21) = 0xffffffffffffffff; + *((unsigned long *)&__m128i_op20) = 0xffffffffffffffff; +- *((unsigned long *)&__m128i_result1) = 0x0000000000000000; ++ *((unsigned long *)&__m128i_result1) = 0xffffffff00000000; + *((unsigned long *)&__m128i_result0) = 0xffffffffffffffff; + __m128i_out = __lsx_vshuf_w (__m128i_op0, __m128i_op1, __m128i_op2); + ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out); +-- +2.43.0 +
View file
_service:tar_scm:0065-LoongArch-Fix-ICE-and-use-simplify_gen_subreg-instea.patch
Added
@@ -0,0 +1,318 @@ +From 87396b4550eeb097cdbe73fb19c84059ba6bb85e Mon Sep 17 00:00:00 2001 +From: Jiahao Xu <xujiahao@loongson.cn> +Date: Wed, 29 Nov 2023 11:18:00 +0800 +Subject: PATCH 065/188 LoongArch: Fix ICE and use simplify_gen_subreg + instead of gen_rtx_SUBREG directly. + +loongarch_expand_vec_cond_mask_expr generates 'subreg's of 'subreg's, which are not supported +in gcc, it causes an ICE: + +ice.c:55:1: error: unrecognizable insn: + 55 | } + | ^ +(insn 63 62 64 8 (set (reg:V4DI 278) + (subreg:V4DI (subreg:V4DF (reg:V4DI 273 vect__53.26 ) 0) 0)) -1 + (nil)) +during RTL pass: vregs +ice.c:55:1: internal compiler error: in extract_insn, at recog.cc:2804 + +Last time, Ruoyao has fixed a similar ICE: +https://gcc.gnu.org/pipermail/gcc-patches/2023-November/636156.html + +This patch fixes ICE and use simplify_gen_subreg instead of gen_rtx_SUBREG as much as possible +to avoid the same ice happening again. + +gcc/ChangeLog: + + * config/loongarch/loongarch.cc (loongarch_try_expand_lsx_vshuf_const): Use + simplify_gen_subreg instead of gen_rtx_SUBREG. + (loongarch_expand_vec_perm_const_2): Ditto. + (loongarch_expand_vec_cond_expr): Ditto. + +gcc/testsuite/ChangeLog: + + * gcc.target/loongarch/pr112476-3.c: New test. + * gcc.target/loongarch/pr112476-4.c: New test. +--- + gcc/config/loongarch/loongarch.cc | 79 +++++++++++-------- + .../gcc.target/loongarch/pr112476-3.c | 58 ++++++++++++++ + .../gcc.target/loongarch/pr112476-4.c | 4 + + 3 files changed, 108 insertions(+), 33 deletions(-) + create mode 100644 gcc/testsuite/gcc.target/loongarch/pr112476-3.c + create mode 100644 gcc/testsuite/gcc.target/loongarch/pr112476-4.c + +diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc +index d64777179..4a3a7a246 100644 +--- a/gcc/config/loongarch/loongarch.cc ++++ b/gcc/config/loongarch/loongarch.cc +@@ -8824,13 +8824,13 @@ loongarch_try_expand_lsx_vshuf_const (struct expand_vec_perm_d *d) + if (d->vmode == E_V2DFmode) + { + sel = gen_rtx_CONST_VECTOR (E_V2DImode, gen_rtvec_v (d->nelt, rperm)); +- tmp = gen_rtx_SUBREG (E_V2DImode, d->target, 0); ++ tmp = simplify_gen_subreg (E_V2DImode, d->target, d->vmode, 0); + emit_move_insn (tmp, sel); + } + else if (d->vmode == E_V4SFmode) + { + sel = gen_rtx_CONST_VECTOR (E_V4SImode, gen_rtvec_v (d->nelt, rperm)); +- tmp = gen_rtx_SUBREG (E_V4SImode, d->target, 0); ++ tmp = simplify_gen_subreg (E_V4SImode, d->target, d->vmode, 0); + emit_move_insn (tmp, sel); + } + else +@@ -9614,8 +9614,8 @@ loongarch_expand_vec_perm_const_2 (struct expand_vec_perm_d *d) + /* Adjust op1 for selecting correct value in high 128bit of target + register. + op1: E_V4DImode, { 4, 5, 6, 7 } -> { 2, 3, 4, 5 }. */ +- rtx conv_op1 = gen_rtx_SUBREG (E_V4DImode, op1_alt, 0); +- rtx conv_op0 = gen_rtx_SUBREG (E_V4DImode, d->op0, 0); ++ rtx conv_op1 = simplify_gen_subreg (E_V4DImode, op1_alt, d->vmode, 0); ++ rtx conv_op0 = simplify_gen_subreg (E_V4DImode, d->op0, d->vmode, 0); + emit_insn (gen_lasx_xvpermi_q_v4di (conv_op1, conv_op1, + conv_op0, GEN_INT (0x21))); + +@@ -9644,8 +9644,8 @@ loongarch_expand_vec_perm_const_2 (struct expand_vec_perm_d *d) + emit_move_insn (op0_alt, d->op0); + + /* Generate subreg for fitting into insn gen function. */ +- rtx conv_op1 = gen_rtx_SUBREG (E_V4DImode, op1_alt, 0); +- rtx conv_op0 = gen_rtx_SUBREG (E_V4DImode, op0_alt, 0); ++ rtx conv_op1 = simplify_gen_subreg (E_V4DImode, op1_alt, d->vmode, 0); ++ rtx conv_op0 = simplify_gen_subreg (E_V4DImode, op0_alt, d->vmode, 0); + + /* Adjust op value in temp register. + op0 = {0,1,2,3}, op1 = {4,5,0,1} */ +@@ -9691,9 +9691,10 @@ loongarch_expand_vec_perm_const_2 (struct expand_vec_perm_d *d) + emit_move_insn (op1_alt, d->op1); + emit_move_insn (op0_alt, d->op0); + +- rtx conv_op1 = gen_rtx_SUBREG (E_V4DImode, op1_alt, 0); +- rtx conv_op0 = gen_rtx_SUBREG (E_V4DImode, op0_alt, 0); +- rtx conv_target = gen_rtx_SUBREG (E_V4DImode, d->target, 0); ++ rtx conv_op1 = simplify_gen_subreg (E_V4DImode, op1_alt, d->vmode, 0); ++ rtx conv_op0 = simplify_gen_subreg (E_V4DImode, op0_alt, d->vmode, 0); ++ rtx conv_target = simplify_gen_subreg (E_V4DImode, d->target, ++ d->vmode, 0); + + emit_insn (gen_lasx_xvpermi_q_v4di (conv_op1, conv_op1, + conv_op0, GEN_INT (0x02))); +@@ -9725,9 +9726,10 @@ loongarch_expand_vec_perm_const_2 (struct expand_vec_perm_d *d) + Selector sample: E_V4DImode, { 0, 1, 4 ,5 } */ + if (!d->testing_p) + { +- rtx conv_op1 = gen_rtx_SUBREG (E_V4DImode, d->op1, 0); +- rtx conv_op0 = gen_rtx_SUBREG (E_V4DImode, d->op0, 0); +- rtx conv_target = gen_rtx_SUBREG (E_V4DImode, d->target, 0); ++ rtx conv_op1 = simplify_gen_subreg (E_V4DImode, d->op1, d->vmode, 0); ++ rtx conv_op0 = simplify_gen_subreg (E_V4DImode, d->op0, d->vmode, 0); ++ rtx conv_target = simplify_gen_subreg (E_V4DImode, d->target, ++ d->vmode, 0); + + /* We can achieve the expectation by using sinple xvpermi.q insn. */ + emit_move_insn (conv_target, conv_op1); +@@ -9752,8 +9754,8 @@ loongarch_expand_vec_perm_const_2 (struct expand_vec_perm_d *d) + emit_move_insn (op1_alt, d->op1); + emit_move_insn (op0_alt, d->op0); + +- rtx conv_op1 = gen_rtx_SUBREG (E_V4DImode, op1_alt, 0); +- rtx conv_op0 = gen_rtx_SUBREG (E_V4DImode, op0_alt, 0); ++ rtx conv_op1 = simplify_gen_subreg (E_V4DImode, op1_alt, d->vmode, 0); ++ rtx conv_op0 = simplify_gen_subreg (E_V4DImode, op0_alt, d->vmode, 0); + /* Adjust op value in temp regiter. + op0 = { 0, 1, 2, 3 }, op1 = { 6, 7, 2, 3 } */ + emit_insn (gen_lasx_xvpermi_q_v4di (conv_op1, conv_op1, +@@ -9797,9 +9799,10 @@ loongarch_expand_vec_perm_const_2 (struct expand_vec_perm_d *d) + emit_move_insn (op1_alt, d->op1); + emit_move_insn (op0_alt, d->op0); + +- rtx conv_op1 = gen_rtx_SUBREG (E_V4DImode, op1_alt, 0); +- rtx conv_op0 = gen_rtx_SUBREG (E_V4DImode, op0_alt, 0); +- rtx conv_target = gen_rtx_SUBREG (E_V4DImode, d->target, 0); ++ rtx conv_op1 = simplify_gen_subreg (E_V4DImode, op1_alt, d->vmode, 0); ++ rtx conv_op0 = simplify_gen_subreg (E_V4DImode, op0_alt, d->vmode, 0); ++ rtx conv_target = simplify_gen_subreg (E_V4DImode, d->target, ++ d->vmode, 0); + + emit_insn (gen_lasx_xvpermi_q_v4di (conv_op1, conv_op1, + conv_op0, GEN_INT (0x13))); +@@ -9831,10 +9834,11 @@ loongarch_expand_vec_perm_const_2 (struct expand_vec_perm_d *d) + Selector sample:E_V8SImode, { 2, 2, 2, 2, 2, 2, 2, 2 } */ + if (!d->testing_p) + { +- rtx conv_op1 = gen_rtx_SUBREG (E_V4DImode, d->op1, 0); +- rtx conv_op0 = gen_rtx_SUBREG (E_V4DImode, d->op0, 0); ++ rtx conv_op1 = simplify_gen_subreg (E_V4DImode, d->op1, d->vmode, 0); ++ rtx conv_op0 = simplify_gen_subreg (E_V4DImode, d->op0, d->vmode, 0); + rtx temp_reg = gen_reg_rtx (d->vmode); +- rtx conv_temp = gen_rtx_SUBREG (E_V4DImode, temp_reg, 0); ++ rtx conv_temp = simplify_gen_subreg (E_V4DImode, temp_reg, ++ d->vmode, 0); + + emit_move_insn (temp_reg, d->op0); + +@@ -9943,9 +9947,11 @@ loongarch_expand_vec_perm_const_2 (struct expand_vec_perm_d *d) + emit_move_insn (op0_alt, d->op0); + emit_move_insn (op1_alt, d->op1); + +- rtx conv_op0 = gen_rtx_SUBREG (E_V4DImode, d->op0, 0); +- rtx conv_op0a = gen_rtx_SUBREG (E_V4DImode, op0_alt, 0); +- rtx conv_op1a = gen_rtx_SUBREG (E_V4DImode, op1_alt, 0); ++ rtx conv_op0 = simplify_gen_subreg (E_V4DImode, d->op0, d->vmode, 0); ++ rtx conv_op0a = simplify_gen_subreg (E_V4DImode, op0_alt, ++ d->vmode, 0); ++ rtx conv_op1a = simplify_gen_subreg (E_V4DImode, op1_alt, ++ d->vmode, 0); + + /* Duplicate op0's low 128bit in op0, then duplicate high 128bit + in op1. After this, xvshuf.* insn's selector argument can +@@ -9978,10 +9984,12 @@ loongarch_expand_vec_perm_const_2 (struct expand_vec_perm_d *d) + emit_move_insn (op0_alt, d->op0); + emit_move_insn (op1_alt, d->op1); + +- rtx conv_op0a = gen_rtx_SUBREG (E_V4DImode, op0_alt, 0); +- rtx conv_op1a = gen_rtx_SUBREG (E_V4DImode, op1_alt, 0); +- rtx conv_op0 = gen_rtx_SUBREG (E_V4DImode, d->op0, 0); +- rtx conv_op1 = gen_rtx_SUBREG (E_V4DImode, d->op1, 0); ++ rtx conv_op0a = simplify_gen_subreg (E_V4DImode, op0_alt, ++ d->vmode, 0); ++ rtx conv_op1a = simplify_gen_subreg (E_V4DImode, op1_alt, ++ d->vmode, 0); ++ rtx conv_op0 = simplify_gen_subreg (E_V4DImode, d->op0, d->vmode, 0); ++ rtx conv_op1 = simplify_gen_subreg (E_V4DImode, d->op1, d->vmode, 0); + + /* Reorganize op0's hi/lo 128bit and op1's hi/lo 128bit, to make sure + that selector's low 128bit can access all op0's elements, and +@@ -10101,12 +10109,12 @@ loongarch_expand_vec_perm_const_2 (struct expand_vec_perm_d *d) + { + case E_V4DFmode: + sel = gen_rtx_CONST_VECTOR (E_V4DImode, gen_rtvec_v (d->nelt, rperm)); +- tmp = gen_rtx_SUBREG (E_V4DImode, d->target, 0); ++ tmp = simplify_gen_subreg (E_V4DImode, d->target, d->vmode, 0); + emit_move_insn (tmp, sel); + break; + case E_V8SFmode: + sel = gen_rtx_CONST_VECTOR (E_V8SImode, gen_rtvec_v (d->nelt, rperm)); +- tmp = gen_rtx_SUBREG (E_V8SImode, d->target, 0); ++ tmp = simplify_gen_subreg (E_V8SImode, d->target, d->vmode, 0); + emit_move_insn (tmp, sel); + break; + default: +@@ -10192,7 +10200,7 @@ loongarch_expand_vec_perm_const_2 (struct expand_vec_perm_d *d) + 64bit in target vector register. */ + else if (extract_ev_od) + { +- rtx converted = gen_rtx_SUBREG (E_V4DImode, d->target, 0); ++ rtx converted = simplify_gen_subreg (E_V4DImode, d->target, d->vmode, 0); + emit_insn (gen_lasx_xvpermi_d_v4di (converted, converted, + GEN_INT (0xD8))); + } +@@ -11279,7 +11287,9 @@ loongarch_expand_vec_cond_expr (machine_mode mode, machine_mode vimode, + if (mode != vimode) + { + xop1 = gen_reg_rtx (vimode); +- emit_move_insn (xop1, gen_rtx_SUBREG (vimode, operands1, 0)); ++ emit_move_insn (xop1, ++ simplify_gen_subreg (vimode, operands1, ++ mode, 0)); + } + emit_move_insn (src1, xop1); + } +@@ -11296,7 +11306,9 @@ loongarch_expand_vec_cond_expr (machine_mode mode, machine_mode vimode, + if (mode != vimode) + { + xop2 = gen_reg_rtx (vimode); +- emit_move_insn (xop2, gen_rtx_SUBREG (vimode, operands2, 0)); ++ emit_move_insn (xop2, ++ simplify_gen_subreg (vimode, operands2, ++ mode, 0)); + } + emit_move_insn (src2, xop2); + } +@@ -11315,7 +11327,8 @@ loongarch_expand_vec_cond_expr (machine_mode mode, machine_mode vimode, + gen_rtx_AND (vimode, mask, src1)); + /* The result is placed back to a register with the mask. */ + emit_insn (gen_rtx_SET (mask, bsel)); +- emit_move_insn (operands0, gen_rtx_SUBREG (mode, mask, 0)); ++ emit_move_insn (operands0, ++ simplify_gen_subreg (mode, mask, vimode, 0)); + } + } + +diff --git a/gcc/testsuite/gcc.target/loongarch/pr112476-3.c b/gcc/testsuite/gcc.target/loongarch/pr112476-3.c +new file mode 100644 +index 000000000..d696d4182 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/pr112476-3.c +@@ -0,0 +1,58 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O3 -mlsx" } */ ++ ++#include <stdint.h> ++ ++typedef int8_t orc_int8; ++typedef int16_t orc_int16; ++typedef int32_t orc_int32; ++typedef int64_t orc_int64; ++ ++typedef union ++{ ++ orc_int32 i; ++ float f; ++ orc_int16 x22; ++ orc_int8 x44; ++} orc_union32; ++typedef union ++{ ++ orc_int64 i; ++ double f; ++ orc_int32 x22; ++ float x2f2; ++ orc_int16 x44; ++} orc_union64; ++ ++void ++audio_orc_s32_to_double (double * restrict d1, ++ const signed int * restrict s1, int n) ++{ ++ int i; ++ orc_union64 *restrict ptr0; ++ const orc_union32 *restrict ptr4; ++ orc_union32 var33; ++ orc_union64 var34; ++ orc_union64 var35; ++ orc_union64 var36; ++ ++ ptr0 = (orc_union64 *) d1; ++ ptr4 = (orc_union32 *) s1; ++ ++ var34.i = 0x41e0000000000000UL; ++ ++ for (i = 0; i < n; i++) { ++ var33 = ptr4i; ++ var36.f = var33.i; ++ { ++ orc_union64 _src1; ++ orc_union64 _src2; ++ orc_union64 _dest1; ++ _src1.i = ((var36.i) & ((((var36.i)&0x7ff0000000000000UL) == 0) ? 0xfff0000000000000UL : 0xffffffffffffffffUL)); ++ _src2.i = ((var34.i) & ((((var34.i)&0x7ff0000000000000UL) == 0) ? 0xfff0000000000000UL : 0xffffffffffffffffUL)); ++ _dest1.f = _src1.f / _src2.f; ++ var35.i = ((_dest1.i) & ((((_dest1.i)&0x7ff0000000000000UL) == 0) ? 0xfff0000000000000UL : 0xffffffffffffffffUL)); ++ } ++ ptr0i = var35; ++ } ++} +diff --git a/gcc/testsuite/gcc.target/loongarch/pr112476-4.c b/gcc/testsuite/gcc.target/loongarch/pr112476-4.c +new file mode 100644 +index 000000000..955d98552 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/pr112476-4.c +@@ -0,0 +1,4 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O3 -mlasx" } */ ++ ++#include "pr112476-3.c" +-- +2.43.0 +
View file
_service:tar_scm:0066-LoongArch-Fix-eh_return-epilogue-for-normal-returns.patch
Added
@@ -0,0 +1,236 @@ +From 34088d0a8685defa97754b7ab5d90b9bc536cfaa Mon Sep 17 00:00:00 2001 +From: Yang Yujie <yangyujie@loongson.cn> +Date: Fri, 8 Dec 2023 18:01:18 +0800 +Subject: PATCH 066/188 LoongArch: Fix eh_return epilogue for normal returns. + +On LoongArch, the regitsters $r4 - $r7 (EH_RETURN_DATA_REGNO) will be saved +and restored in the function prologue and epilogue if the given function calls +__builtin_eh_return. This causes the return value to be overwritten on normal +return paths and breaks a rare case of libgcc's _Unwind_RaiseException. + +gcc/ChangeLog: + + * config/loongarch/loongarch.cc: Do not restore the saved eh_return + data registers ($r4-$r7) for a normal return of a function that calls + __builtin_eh_return elsewhere. + * config/loongarch/loongarch-protos.h: Same. + * config/loongarch/loongarch.md: Same. + +gcc/testsuite/ChangeLog: + + * gcc.target/loongarch/eh_return-normal-return.c: New test. +--- + gcc/config/loongarch/loongarch-protos.h | 2 +- + gcc/config/loongarch/loongarch.cc | 34 ++++++++++++----- + gcc/config/loongarch/loongarch.md | 23 ++++++++++- + .../loongarch/eh_return-normal-return.c | 38 +++++++++++++++++++ + 4 files changed, 84 insertions(+), 13 deletions(-) + create mode 100644 gcc/testsuite/gcc.target/loongarch/eh_return-normal-return.c + +diff --git a/gcc/config/loongarch/loongarch-protos.h b/gcc/config/loongarch/loongarch-protos.h +index 117669e9f..e5fcf3111 100644 +--- a/gcc/config/loongarch/loongarch-protos.h ++++ b/gcc/config/loongarch/loongarch-protos.h +@@ -60,7 +60,7 @@ enum loongarch_symbol_type { + extern rtx loongarch_emit_move (rtx, rtx); + extern HOST_WIDE_INT loongarch_initial_elimination_offset (int, int); + extern void loongarch_expand_prologue (void); +-extern void loongarch_expand_epilogue (bool); ++extern void loongarch_expand_epilogue (int); + extern bool loongarch_can_use_return_insn (void); +  + extern bool loongarch_symbolic_constant_p (rtx, enum loongarch_symbol_type *); +diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc +index 4a3a7a246..7caf04d8d 100644 +--- a/gcc/config/loongarch/loongarch.cc ++++ b/gcc/config/loongarch/loongarch.cc +@@ -1012,7 +1012,8 @@ loongarch_save_restore_reg (machine_mode mode, int regno, HOST_WIDE_INT offset, + + static void + loongarch_for_each_saved_reg (HOST_WIDE_INT sp_offset, +- loongarch_save_restore_fn fn) ++ loongarch_save_restore_fn fn, ++ bool skip_eh_data_regs_p) + { + HOST_WIDE_INT offset; + +@@ -1021,7 +1022,14 @@ loongarch_for_each_saved_reg (HOST_WIDE_INT sp_offset, + for (int regno = GP_REG_FIRST; regno <= GP_REG_LAST; regno++) + if (BITSET_P (cfun->machine->frame.mask, regno - GP_REG_FIRST)) + { +- if (!cfun->machine->reg_is_wrapped_separatelyregno) ++ /* Special care needs to be taken for $r4-$r7 (EH_RETURN_DATA_REGNO) ++ when returning normally from a function that calls ++ __builtin_eh_return. In this case, these registers are saved but ++ should not be restored, or the return value may be clobbered. */ ++ ++ if (!(cfun->machine->reg_is_wrapped_separatelyregno ++ || (skip_eh_data_regs_p ++ && GP_ARG_FIRST <= regno && regno < GP_ARG_FIRST + 4))) + loongarch_save_restore_reg (word_mode, regno, offset, fn); + + offset -= UNITS_PER_WORD; +@@ -1294,7 +1302,7 @@ loongarch_expand_prologue (void) + GEN_INT (-step1)); + RTX_FRAME_RELATED_P (emit_insn (insn)) = 1; + size -= step1; +- loongarch_for_each_saved_reg (size, loongarch_save_reg); ++ loongarch_for_each_saved_reg (size, loongarch_save_reg, false); + } + + /* Set up the frame pointer, if we're using one. */ +@@ -1379,11 +1387,13 @@ loongarch_can_use_return_insn (void) + return reload_completed && cfun->machine->frame.total_size == 0; + } + +-/* Expand an "epilogue" or "sibcall_epilogue" pattern; SIBCALL_P +- says which. */ ++/* Expand function epilogue using the following insn patterns: ++ "epilogue" (style == NORMAL_RETURN) ++ "sibcall_epilogue" (style == SIBCALL_RETURN) ++ "eh_return" (style == EXCEPTION_RETURN) */ + + void +-loongarch_expand_epilogue (bool sibcall_p) ++loongarch_expand_epilogue (int style) + { + /* Split the frame into two. STEP1 is the amount of stack we should + deallocate before restoring the registers. STEP2 is the amount we +@@ -1400,7 +1410,8 @@ loongarch_expand_epilogue (bool sibcall_p) + bool need_barrier_p + = (get_frame_size () + cfun->machine->frame.arg_pointer_offset) != 0; + +- if (!sibcall_p && loongarch_can_use_return_insn ()) ++ /* Handle simple returns. */ ++ if (style == NORMAL_RETURN && loongarch_can_use_return_insn ()) + { + emit_jump_insn (gen_return ()); + return; +@@ -1476,7 +1487,9 @@ loongarch_expand_epilogue (bool sibcall_p) + + /* Restore the registers. */ + loongarch_for_each_saved_reg (frame->total_size - step2, +- loongarch_restore_reg); ++ loongarch_restore_reg, ++ crtl->calls_eh_return ++ && style != EXCEPTION_RETURN); + + if (need_barrier_p) + loongarch_emit_stack_tie (); +@@ -1497,11 +1510,12 @@ loongarch_expand_epilogue (bool sibcall_p) + } + + /* Add in the __builtin_eh_return stack adjustment. */ +- if (crtl->calls_eh_return) ++ if (crtl->calls_eh_return && style == EXCEPTION_RETURN) + emit_insn (gen_add3_insn (stack_pointer_rtx, stack_pointer_rtx, + EH_RETURN_STACKADJ_RTX)); + +- if (!sibcall_p) ++ /* Emit return unless doing sibcall. */ ++ if (style != SIBCALL_RETURN) + emit_jump_insn (gen_simple_return_internal (ra)); + } + +diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md +index c6edd1dda..222f1ae83 100644 +--- a/gcc/config/loongarch/loongarch.md ++++ b/gcc/config/loongarch/loongarch.md +@@ -125,6 +125,11 @@ + (T1_REGNUM 13) + (S0_REGNUM 23) + ++ ;; Return path styles ++ (NORMAL_RETURN 0) ++ (SIBCALL_RETURN 1) ++ (EXCEPTION_RETURN 2) ++ + ;; PIC long branch sequences are never longer than 100 bytes. + (MAX_PIC_BRANCH_LENGTH 100) + ) +@@ -3276,7 +3281,7 @@ + (const_int 2) + "" + { +- loongarch_expand_epilogue (false); ++ loongarch_expand_epilogue (NORMAL_RETURN); + DONE; + }) + +@@ -3284,7 +3289,7 @@ + (const_int 2) + "" + { +- loongarch_expand_epilogue (true); ++ loongarch_expand_epilogue (SIBCALL_RETURN); + DONE; + }) + +@@ -3341,6 +3346,20 @@ + emit_insn (gen_eh_set_ra_di (operands0)); + else + emit_insn (gen_eh_set_ra_si (operands0)); ++ ++ emit_jump_insn (gen_eh_return_internal ()); ++ emit_barrier (); ++ DONE; ++}) ++ ++(define_insn_and_split "eh_return_internal" ++ (eh_return) ++ "" ++ "#" ++ "epilogue_completed" ++ (const_int 0) ++{ ++ loongarch_expand_epilogue (EXCEPTION_RETURN); + DONE; + }) + +diff --git a/gcc/testsuite/gcc.target/loongarch/eh_return-normal-return.c b/gcc/testsuite/gcc.target/loongarch/eh_return-normal-return.c +new file mode 100644 +index 000000000..f8f3965f8 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/eh_return-normal-return.c +@@ -0,0 +1,38 @@ ++/* { dg-do run } */ ++/* { dg-options "-O2" } */ ++ ++#include <stdlib.h> ++ ++int foo () __attribute__((noinline)); ++int main (); ++ ++int ++foo () { ++ ++ int t; ++ ++ /* prevent optimization using asm */ ++ asm ("" : "=r" (t) : "0" (-1)); ++ asm ("" : "=r" (t) : "0" (t ? 1 : 0)); ++ ++ if (t == 0) ++ /* never reached */ ++ __builtin_eh_return (0, __builtin_return_address (0)); ++ ++ else if (t == 1) ++ /* return here */ ++ return 202312; ++ ++ else ++ /* never reached: prevent vrp optimization in main */ ++ return 0; ++} ++ ++int ++main () ++{ ++ if (foo() == 202312) ++ return 0; ++ else ++ abort (); ++} +-- +2.43.0 +
View file
_service:tar_scm:0067-LoongArch-Allow-mcmodel-extreme-and-model-attribute-.patch
Added
@@ -0,0 +1,180 @@ +From fdb51014f00094737459d5c9008630454ec7f342 Mon Sep 17 00:00:00 2001 +From: Xi Ruoyao <xry111@xry111.site> +Date: Thu, 7 Dec 2023 15:45:30 +0800 +Subject: PATCH 067/188 LoongArch: Allow -mcmodel=extreme and model attribute + with -mexplicit-relocs=auto + +There seems no real reason to require -mexplicit-relocs=always for +-mcmodel=extreme or model attribute. As the linker does not know how to +relax a 3-operand la.local or la.global pseudo instruction, just emit +explicit relocs for SYMBOL_PCREL64, and under TARGET_CMODEL_EXTREME also +SYMBOL_GOT_DISP. + +gcc/ChangeLog: + + * config/loongarch/loongarch.cc (loongarch_explicit_relocs_p): + Return true for SYMBOL_PCREL64. Return true for SYMBOL_GOT_DISP + if TARGET_CMODEL_EXTREME. + (loongarch_split_symbol): Check for la_opt_explicit_relocs != + EXPLICIT_RELOCS_NONE instead of TARGET_EXPLICIT_RELOCS. + (loongarch_print_operand_reloc): Likewise. + (loongarch_option_override_internal): Likewise. + (loongarch_handle_model_attribute): Likewise. + * doc/invoke.texi (-mcmodel=extreme): Update the compatibility + between it and -mexplicit-relocs=. + +gcc/testsuite/ChangeLog: + + * gcc.target/loongarch/attr-model-3.c: New test. + * gcc.target/loongarch/attr-model-4.c: New test. + * gcc.target/loongarch/func-call-extreme-3.c: New test. + * gcc.target/loongarch/func-call-extreme-4.c: New test. +--- + gcc/config/loongarch/loongarch.cc | 25 ++++++++++++------- + gcc/doc/invoke.texi | 4 +-- + .../gcc.target/loongarch/attr-model-3.c | 6 +++++ + .../gcc.target/loongarch/attr-model-4.c | 6 +++++ + .../loongarch/func-call-extreme-3.c | 7 ++++++ + .../loongarch/func-call-extreme-4.c | 7 ++++++ + 6 files changed, 44 insertions(+), 11 deletions(-) + create mode 100644 gcc/testsuite/gcc.target/loongarch/attr-model-3.c + create mode 100644 gcc/testsuite/gcc.target/loongarch/attr-model-4.c + create mode 100644 gcc/testsuite/gcc.target/loongarch/func-call-extreme-3.c + create mode 100644 gcc/testsuite/gcc.target/loongarch/func-call-extreme-4.c + +diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc +index 7caf04d8d..4362149ef 100644 +--- a/gcc/config/loongarch/loongarch.cc ++++ b/gcc/config/loongarch/loongarch.cc +@@ -1969,9 +1969,16 @@ loongarch_explicit_relocs_p (enum loongarch_symbol_type type) + case SYMBOL_TLS_LE: + case SYMBOL_TLSGD: + case SYMBOL_TLSLDM: +- /* The linker don't know how to relax TLS accesses. */ ++ case SYMBOL_PCREL64: ++ /* The linker don't know how to relax TLS accesses or 64-bit ++ pc-relative accesses. */ + return true; + case SYMBOL_GOT_DISP: ++ /* The linker don't know how to relax GOT accesses in extreme ++ code model. */ ++ if (TARGET_CMODEL_EXTREME) ++ return true; ++ + /* If we are performing LTO for a final link, and we have the + linker plugin so we know the resolution of the symbols, then + all GOT references are binding to external symbols or +@@ -3134,7 +3141,7 @@ loongarch_split_symbol (rtx temp, rtx addr, machine_mode mode, rtx *low_out) + + if (loongarch_symbol_extreme_p (symbol_type) && can_create_pseudo_p ()) + { +- gcc_assert (TARGET_EXPLICIT_RELOCS); ++ gcc_assert (la_opt_explicit_relocs != EXPLICIT_RELOCS_NONE); + + temp1 = gen_reg_rtx (Pmode); + emit_move_insn (temp1, gen_rtx_LO_SUM (Pmode, gen_rtx_REG (Pmode, 0), +@@ -5933,7 +5940,7 @@ loongarch_print_operand_reloc (FILE *file, rtx op, bool hi64_part, + loongarch_classify_symbolic_expression (op); + + if (loongarch_symbol_extreme_p (symbol_type)) +- gcc_assert (TARGET_EXPLICIT_RELOCS); ++ gcc_assert (la_opt_explicit_relocs != EXPLICIT_RELOCS_NONE); + + switch (symbol_type) + { +@@ -7540,9 +7547,9 @@ loongarch_option_override_internal (struct gcc_options *opts, + switch (la_target.cmodel) + { + case CMODEL_EXTREME: +- if (!TARGET_EXPLICIT_RELOCS) +- error ("code model %qs needs %s", +- "extreme", "-mexplicit-relocs=always"); ++ if (la_opt_explicit_relocs == EXPLICIT_RELOCS_NONE) ++ error ("code model %qs is not compatible with %s", ++ "extreme", "-mexplicit-relocs=none"); + + if (opts->x_flag_plt) + { +@@ -7908,11 +7915,11 @@ loongarch_handle_model_attribute (tree *node, tree name, tree arg, int, + *no_add_attrs = true; + return NULL_TREE; + } +- if (!TARGET_EXPLICIT_RELOCS) ++ if (la_opt_explicit_relocs == EXPLICIT_RELOCS_NONE) + { + error_at (DECL_SOURCE_LOCATION (decl), +- "%qE attribute requires %s", name, +- "-mexplicit-relocs=always"); ++ "%qE attribute is not compatible with %s", name, ++ "-mexplicit-relocs=none"); + *no_add_attrs = true; + return NULL_TREE; + } +diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi +index 76a8f20d1..5c6515cb1 100644 +--- a/gcc/doc/invoke.texi ++++ b/gcc/doc/invoke.texi +@@ -24602,8 +24602,8 @@ The text segment and data segment must be within 2GB addressing space. + + @item extreme + This mode does not limit the size of the code segment and data segment. +-The @option{-mcmodel=extreme} option is incompatible with @option{-fplt} and +-@option{-mno-explicit-relocs}. ++The @option{-mcmodel=extreme} option is incompatible with @option{-fplt} ++and/or @option{-mexplicit-relocs=none}. + @end table + The default code model is @code{normal}. + +diff --git a/gcc/testsuite/gcc.target/loongarch/attr-model-3.c b/gcc/testsuite/gcc.target/loongarch/attr-model-3.c +new file mode 100644 +index 000000000..5622d5086 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/attr-model-3.c +@@ -0,0 +1,6 @@ ++/* { dg-do compile } */ ++/* { dg-options "-mexplicit-relocs=auto -mcmodel=normal -O2" } */ ++/* { dg-final { scan-assembler-times "%pc64_hi12" 2 } } */ ++ ++#define ATTR_MODEL_TEST ++#include "attr-model-test.c" +diff --git a/gcc/testsuite/gcc.target/loongarch/attr-model-4.c b/gcc/testsuite/gcc.target/loongarch/attr-model-4.c +new file mode 100644 +index 000000000..482724bb9 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/attr-model-4.c +@@ -0,0 +1,6 @@ ++/* { dg-do compile } */ ++/* { dg-options "-mexplicit-relocs=auto -mcmodel=extreme -O2" } */ ++/* { dg-final { scan-assembler-times "%pc64_hi12" 3 } } */ ++ ++#define ATTR_MODEL_TEST ++#include "attr-model-test.c" +diff --git a/gcc/testsuite/gcc.target/loongarch/func-call-extreme-3.c b/gcc/testsuite/gcc.target/loongarch/func-call-extreme-3.c +new file mode 100644 +index 000000000..a4da44b4a +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/func-call-extreme-3.c +@@ -0,0 +1,7 @@ ++/* { dg-do compile } */ ++/* { dg-options "-mabi=lp64d -O0 -fno-pic -fno-plt -mexplicit-relocs=auto -mcmodel=extreme" } */ ++/* { dg-final { scan-assembler "test:.*pcalau12i.*%got_pc_hi20.*\n\taddi\.d.*%got_pc_lo12.*\n\tlu32i\.d.*%got64_pc_lo20.*\n\tlu52i\.d.*%got64_pc_hi12.*\n\tldx\.d" } } */ ++/* { dg-final { scan-assembler "test1:.*pcalau12i.*%pc_hi20.*\n\taddi\.d.*%pc_lo12.*\n\tlu32i\.d.*%pc64_lo20.*\n\tlu52i\.d.*pc64_hi12.*\n\tadd\.d" } } */ ++/* { dg-final { scan-assembler "test2:.*pcalau12i.*%pc_hi20.*\n\taddi\.d.*%pc_lo12.*\n\tlu32i\.d.*%pc64_lo20.*\n\tlu52i\.d.*pc64_hi12.*\n\tadd\.d" } } */ ++ ++#include "func-call-extreme-1.c" +diff --git a/gcc/testsuite/gcc.target/loongarch/func-call-extreme-4.c b/gcc/testsuite/gcc.target/loongarch/func-call-extreme-4.c +new file mode 100644 +index 000000000..16b00f4c5 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/func-call-extreme-4.c +@@ -0,0 +1,7 @@ ++/* { dg-do compile } */ ++/* { dg-options "-mabi=lp64d -O0 -fpic -fno-plt -mexplicit-relocs=auto -mcmodel=extreme" } */ ++/* { dg-final { scan-assembler "test:.*pcalau12i.*%got_pc_hi20.*\n\taddi\.d.*%got_pc_lo12.*\n\tlu32i\.d.*%got64_pc_lo20.*\n\tlu52i\.d.*%got64_pc_hi12.*\n\tldx\.d" } } */ ++/* { dg-final { scan-assembler "test1:.*pcalau12i.*%got_pc_hi20.*\n\taddi\.d.*%got_pc_lo12.*\n\tlu32i\.d.*%got64_pc_lo20.*\n\tlu52i\.d.*%got64_pc_hi12.*\n\tldx\.d" } } */ ++/* { dg-final { scan-assembler "test2:.*pcalau12i.*%pc_hi20.*\n\taddi\.d.*%pc_lo12.*\n\tlu32i\.d.*%pc64_lo20.*\n\tlu52i\.d.*pc64_hi12.*\n\tadd\.d" } } */ ++ ++#include "func-call-extreme-1.c" +-- +2.43.0 +
View file
_service:tar_scm:0068-LoongArch-Fix-warnings-building-libgcc.patch
Added
@@ -0,0 +1,79 @@ +From 5a910f294605d0163f8f4ac255a14425b154b5dd Mon Sep 17 00:00:00 2001 +From: Xi Ruoyao <xry111@xry111.site> +Date: Sat, 9 Dec 2023 22:08:37 +0800 +Subject: PATCH 068/188 LoongArch: Fix warnings building libgcc + +We are excluding loongarch-opts.h from target libraries, but now struct +loongarch_target and gcc_options are not declared in the target +libraries, causing: + +In file included from ../.././gcc/options.h:8, + from ../.././gcc/tm.h:49, + from ../../../gcc/libgcc/fixed-bit.c:48: +../../../gcc/libgcc/../gcc/config/loongarch/loongarch-opts.h:57:41: +warning: 'struct gcc_options' declared inside parameter list will not +be visible outside of this definition or declaration + 57 | struct gcc_options *opts, + | ^~~~~~~~~~~ + +So exclude the declarations referring to the C++ structs as well. + +gcc/ChangeLog: + + * config/loongarch/loongarch-opts.h (la_target): Move into #if + for loongarch-def.h. + (loongarch_init_target): Likewise. + (loongarch_config_target): Likewise. + (loongarch_update_gcc_opt_status): Likewise. +--- + gcc/config/loongarch/loongarch-opts.h | 20 ++++++++++---------- + 1 file changed, 10 insertions(+), 10 deletions(-) + +diff --git a/gcc/config/loongarch/loongarch-opts.h b/gcc/config/loongarch/loongarch-opts.h +index 7010ddfec..639ed50bd 100644 +--- a/gcc/config/loongarch/loongarch-opts.h ++++ b/gcc/config/loongarch/loongarch-opts.h +@@ -21,22 +21,15 @@ along with GCC; see the file COPYING3. If not see + #ifndef LOONGARCH_OPTS_H + #define LOONGARCH_OPTS_H + +-/* This is a C++ header and it shouldn't be used by target libraries. */ ++/* The loongarch-def.h file is a C++ header and it shouldn't be used by ++ target libraries. Exclude it and everything using the C++ structs ++ (struct loongarch_target and gcc_options) from target libraries. */ + #if !defined(IN_LIBGCC2) && !defined(IN_TARGET_LIBS) && !defined(IN_RTS) + #include "loongarch-def.h" +-#endif + + /* Target configuration */ + extern struct loongarch_target la_target; + +-/* Flag status */ +-struct loongarch_flags { +- int flt; const char* flt_str; +-#define SX_FLAG_TYPE(x) ((x) < 0 ? -(x) : (x)) +- int sx2; +-}; +- +- + /* Initialize loongarch_target from separate option variables. */ + void + loongarch_init_target (struct loongarch_target *target, +@@ -56,7 +49,14 @@ void + loongarch_update_gcc_opt_status (struct loongarch_target *target, + struct gcc_options *opts, + struct gcc_options *opts_set); ++#endif + ++/* Flag status */ ++struct loongarch_flags { ++ int flt; const char* flt_str; ++#define SX_FLAG_TYPE(x) ((x) < 0 ? -(x) : (x)) ++ int sx2; ++}; + + /* Macros for common conditional expressions used in loongarch.{c,h,md} */ + #define TARGET_CMODEL_NORMAL (la_target.cmodel == CMODEL_NORMAL) +-- +2.43.0 +
View file
_service:tar_scm:0069-LoongArch-testsuite-Remove-XFAIL-in-vect-ftint-no-in.patch
Added
@@ -0,0 +1,30 @@ +From 639e7518c8a4468cd50d774c5a3dbda5f2dbb4a7 Mon Sep 17 00:00:00 2001 +From: Xi Ruoyao <xry111@xry111.site> +Date: Wed, 13 Dec 2023 02:39:35 +0800 +Subject: PATCH 069/188 LoongArch: testsuite: Remove XFAIL in + vect-ftint-no-inexact.c + +After r14-6455 this no longer fails. + +gcc/testsuite/ChangeLog: + + * gcc.target/loongarch/vect-ftint-no-inexact.c (xfail): Remove. +--- + gcc/testsuite/gcc.target/loongarch/vect-ftint-no-inexact.c | 3 +-- + 1 file changed, 1 insertion(+), 2 deletions(-) + +diff --git a/gcc/testsuite/gcc.target/loongarch/vect-ftint-no-inexact.c b/gcc/testsuite/gcc.target/loongarch/vect-ftint-no-inexact.c +index 83d268099..61918beef 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vect-ftint-no-inexact.c ++++ b/gcc/testsuite/gcc.target/loongarch/vect-ftint-no-inexact.c +@@ -39,6 +39,5 @@ + /* { dg-final { scan-assembler-not "\txvftintrne\.w\.s" } } */ + /* { dg-final { scan-assembler-not "\txvftintrne\.l\.d" } } */ + +-/* trunc: XFAIL due to PR 107723 */ +-/* { dg-final { scan-assembler "bl\t%plt\\(trunc\\)" { xfail *-*-* } } } */ ++/* { dg-final { scan-assembler "bl\t%plt\\(trunc\\)" } } */ + /* { dg-final { scan-assembler "bl\t%plt\\(truncf\\)" } } */ +-- +2.43.0 +
View file
_service:tar_scm:0070-LoongArch-Include-rtl.h-for-COSTS_N_INSNS-instead-of.patch
Added
@@ -0,0 +1,44 @@ +From 6a5e3932a39f1ffa6f87479748ee711e4fa47d30 Mon Sep 17 00:00:00 2001 +From: Xi Ruoyao <xry111@xry111.site> +Date: Sat, 9 Dec 2023 15:27:28 +0800 +Subject: PATCH 070/188 LoongArch: Include rtl.h for COSTS_N_INSNS instead of + hard coding our own + +With loongarch-def.cc switched from C to C++, we can include rtl.h for +COSTS_N_INSNS, instead of hard coding our own. + +THis is a non-functional change for now, but it will make the code more +future-proof in case COSTS_N_INSNS in rtl.h would be changed. + +gcc/ChangeLog: + + * config/loongarch/loongarch-def.cc (rtl.h): Include. + (COSTS_N_INSNS): Remove the macro definition. +--- + gcc/config/loongarch/loongarch-def.cc | 3 +-- + 1 file changed, 1 insertion(+), 2 deletions(-) + +diff --git a/gcc/config/loongarch/loongarch-def.cc b/gcc/config/loongarch/loongarch-def.cc +index c41804a18..6217b1926 100644 +--- a/gcc/config/loongarch/loongarch-def.cc ++++ b/gcc/config/loongarch/loongarch-def.cc +@@ -22,6 +22,7 @@ along with GCC; see the file COPYING3. If not see + #include "system.h" + #include "coretypes.h" + #include "tm.h" ++#include "rtl.h" + + #include "loongarch-def.h" + #include "loongarch-str.h" +@@ -89,8 +90,6 @@ array_tune<loongarch_align> loongarch_cpu_align = + .set (CPU_LA464, la464_align ()) + .set (CPU_LA664, la464_align ()); + +-#define COSTS_N_INSNS(N) ((N) * 4) +- + /* Default RTX cost initializer. */ + loongarch_rtx_cost_data::loongarch_rtx_cost_data () + : fp_add (COSTS_N_INSNS (1)), +-- +2.43.0 +
View file
_service:tar_scm:0071-LoongArch-Fix-instruction-costs-PR112936.patch
Added
@@ -0,0 +1,165 @@ +From c5abe64e64aba601e67f3367a27caf616062b8f4 Mon Sep 17 00:00:00 2001 +From: Xi Ruoyao <xry111@xry111.site> +Date: Sat, 9 Dec 2023 17:41:32 +0800 +Subject: PATCH 071/188 LoongArch: Fix instruction costs PR112936 + +Replace the instruction costs in loongarch_rtx_cost_data constructor +based on micro-benchmark results on LA464 and LA664. + +This allows optimizations like "x * 17" to alsl, and "x * 68" to alsl +and slli. + +gcc/ChangeLog: + + PR target/112936 + * config/loongarch/loongarch-def.cc + (loongarch_rtx_cost_data::loongarch_rtx_cost_data): Update + instruction costs per micro-benchmark results. + (loongarch_rtx_cost_optimize_size): Set all instruction costs + to (COSTS_N_INSNS (1) + 1). + * config/loongarch/loongarch.cc (loongarch_rtx_costs): Remove + special case for multiplication when optimizing for size. + Adjust division cost when TARGET_64BIT && !TARGET_DIV32. + Account the extra cost when TARGET_CHECK_ZERO_DIV and + optimizing for speed. + +gcc/testsuite/ChangeLog + + PR target/112936 + * gcc.target/loongarch/mul-const-reduction.c: New test. +--- + gcc/config/loongarch/loongarch-def.cc | 39 ++++++++++--------- + gcc/config/loongarch/loongarch.cc | 22 +++++------ + .../loongarch/mul-const-reduction.c | 11 ++++++ + 3 files changed, 43 insertions(+), 29 deletions(-) + create mode 100644 gcc/testsuite/gcc.target/loongarch/mul-const-reduction.c + +diff --git a/gcc/config/loongarch/loongarch-def.cc b/gcc/config/loongarch/loongarch-def.cc +index 6217b1926..4a8885e83 100644 +--- a/gcc/config/loongarch/loongarch-def.cc ++++ b/gcc/config/loongarch/loongarch-def.cc +@@ -92,15 +92,15 @@ array_tune<loongarch_align> loongarch_cpu_align = + + /* Default RTX cost initializer. */ + loongarch_rtx_cost_data::loongarch_rtx_cost_data () +- : fp_add (COSTS_N_INSNS (1)), +- fp_mult_sf (COSTS_N_INSNS (2)), +- fp_mult_df (COSTS_N_INSNS (4)), +- fp_div_sf (COSTS_N_INSNS (6)), ++ : fp_add (COSTS_N_INSNS (5)), ++ fp_mult_sf (COSTS_N_INSNS (5)), ++ fp_mult_df (COSTS_N_INSNS (5)), ++ fp_div_sf (COSTS_N_INSNS (8)), + fp_div_df (COSTS_N_INSNS (8)), +- int_mult_si (COSTS_N_INSNS (1)), +- int_mult_di (COSTS_N_INSNS (1)), +- int_div_si (COSTS_N_INSNS (4)), +- int_div_di (COSTS_N_INSNS (6)), ++ int_mult_si (COSTS_N_INSNS (4)), ++ int_mult_di (COSTS_N_INSNS (4)), ++ int_div_si (COSTS_N_INSNS (5)), ++ int_div_di (COSTS_N_INSNS (5)), + branch_cost (6), + memory_latency (4) {} + +@@ -111,18 +111,21 @@ loongarch_rtx_cost_data::loongarch_rtx_cost_data () + array_tune<loongarch_rtx_cost_data> loongarch_cpu_rtx_cost_data = + array_tune<loongarch_rtx_cost_data> (); + +-/* RTX costs to use when optimizing for size. */ ++/* RTX costs to use when optimizing for size. ++ We use a value slightly larger than COSTS_N_INSNS (1) for all of them ++ because they are slower than simple instructions. */ ++#define COST_COMPLEX_INSN (COSTS_N_INSNS (1) + 1) + const loongarch_rtx_cost_data loongarch_rtx_cost_optimize_size = + loongarch_rtx_cost_data () +- .fp_add_ (4) +- .fp_mult_sf_ (4) +- .fp_mult_df_ (4) +- .fp_div_sf_ (4) +- .fp_div_df_ (4) +- .int_mult_si_ (4) +- .int_mult_di_ (4) +- .int_div_si_ (4) +- .int_div_di_ (4); ++ .fp_add_ (COST_COMPLEX_INSN) ++ .fp_mult_sf_ (COST_COMPLEX_INSN) ++ .fp_mult_df_ (COST_COMPLEX_INSN) ++ .fp_div_sf_ (COST_COMPLEX_INSN) ++ .fp_div_df_ (COST_COMPLEX_INSN) ++ .int_mult_si_ (COST_COMPLEX_INSN) ++ .int_mult_di_ (COST_COMPLEX_INSN) ++ .int_div_si_ (COST_COMPLEX_INSN) ++ .int_div_di_ (COST_COMPLEX_INSN); + + array_tune<int> loongarch_cpu_issue_rate = array_tune<int> () + .set (CPU_NATIVE, 4) +diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc +index 4362149ef..afbb55390 100644 +--- a/gcc/config/loongarch/loongarch.cc ++++ b/gcc/config/loongarch/loongarch.cc +@@ -3797,8 +3797,6 @@ loongarch_rtx_costs (rtx x, machine_mode mode, int outer_code, + *total = (speed + ? loongarch_cost->int_mult_si * 3 + 6 + : COSTS_N_INSNS (7)); +- else if (!speed) +- *total = COSTS_N_INSNS (1) + 1; + else if (mode == DImode) + *total = loongarch_cost->int_mult_di; + else +@@ -3833,14 +3831,18 @@ loongarch_rtx_costs (rtx x, machine_mode mode, int outer_code, + + case UDIV: + case UMOD: +- if (!speed) +- { +- *total = COSTS_N_INSNS (loongarch_idiv_insns (mode)); +- } +- else if (mode == DImode) ++ if (mode == DImode) + *total = loongarch_cost->int_div_di; + else +- *total = loongarch_cost->int_div_si; ++ { ++ *total = loongarch_cost->int_div_si; ++ if (TARGET_64BIT && !TARGET_DIV32) ++ *total += COSTS_N_INSNS (2); ++ } ++ ++ if (TARGET_CHECK_ZERO_DIV) ++ *total += COSTS_N_INSNS (2); ++ + return false; + + case SIGN_EXTEND: +@@ -3872,9 +3874,7 @@ loongarch_rtx_costs (rtx x, machine_mode mode, int outer_code, + && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) + == ZERO_EXTEND)))) + { +- if (!speed) +- *total = COSTS_N_INSNS (1) + 1; +- else if (mode == DImode) ++ if (mode == DImode) + *total = loongarch_cost->int_mult_di; + else + *total = loongarch_cost->int_mult_si; +diff --git a/gcc/testsuite/gcc.target/loongarch/mul-const-reduction.c b/gcc/testsuite/gcc.target/loongarch/mul-const-reduction.c +new file mode 100644 +index 000000000..02d9a4876 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/mul-const-reduction.c +@@ -0,0 +1,11 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -mtune=la464" } */ ++/* { dg-final { scan-assembler "alsl\.w" } } */ ++/* { dg-final { scan-assembler "slli\.w" } } */ ++/* { dg-final { scan-assembler-not "mul\.w" } } */ ++ ++int ++test (int a) ++{ ++ return a * 68; ++} +-- +2.43.0 +
View file
_service:tar_scm:0072-LoongArch-Add-alslsi3_extend.patch
Added
@@ -0,0 +1,53 @@ +From 89dfb9ad8687f9b31be5925b2d106b6ec13cc628 Mon Sep 17 00:00:00 2001 +From: Xi Ruoyao <xry111@xry111.site> +Date: Sat, 9 Dec 2023 18:02:35 +0800 +Subject: PATCH 072/188 LoongArch: Add alslsi3_extend + +Following the instruction cost fix, we are generating + + alsl.w $a0, $a0, $a0, 4 + +instead of + + li.w $t0, 17 + mul.w $a0, $t0 + +for "x * 4", because alsl.w is 4 times faster than mul.w. But we didn't +have a sign-extending pattern for alsl.w, causing an extra slli.w +instruction generated to sign-extend $a0. Add the pattern to remove the +redundant extension. + +gcc/ChangeLog: + + * config/loongarch/loongarch.md (alslsi3_extend): New + define_insn. +--- + gcc/config/loongarch/loongarch.md | 12 ++++++++++++ + 1 file changed, 12 insertions(+) + +diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md +index 222f1ae83..23368008e 100644 +--- a/gcc/config/loongarch/loongarch.md ++++ b/gcc/config/loongarch/loongarch.md +@@ -2874,6 +2874,18 @@ + (set_attr "type" "arith") + (set_attr "mode" "<MODE>")) + ++(define_insn "alslsi3_extend" ++ (set (match_operand:DI 0 "register_operand" "=r") ++ (sign_extend:DI ++ (plus:SI ++ (ashift:SI (match_operand:SI 1 "register_operand" "r") ++ (match_operand 2 "const_immalsl_operand" "")) ++ (match_operand:SI 3 "register_operand" "r")))) ++ "" ++ "alsl.w\t%0,%1,%3,%2" ++ (set_attr "type" "arith") ++ (set_attr "mode" "SI")) ++ +  + + ;; Reverse the order of bytes of operand 1 and store the result in operand 0. +-- +2.43.0 +
View file
_service:tar_scm:0073-LoongArch-Add-support-for-D-frontend.patch
Added
@@ -0,0 +1,224 @@ +From 6ef045728a11218f023fee4527cd6d2fdb2c2910 Mon Sep 17 00:00:00 2001 +From: liushuyu <liushuyu011@gmail.com> +Date: Mon, 18 Dec 2023 09:52:07 +0800 +Subject: PATCH 073/188 LoongArch: Add support for D frontend. + +gcc/ChangeLog: + + * config.gcc: Add loongarch-d.o to d_target_objs for LoongArch + architecture. + * config/loongarch/t-loongarch: Add object target for loongarch-d.cc. + * config/loongarch/loongarch-d.cc + (loongarch_d_target_versions): add interface function to define builtin + D versions for LoongArch architecture. + (loongarch_d_handle_target_float_abi): add interface function to define + builtin D traits for LoongArch architecture. + (loongarch_d_register_target_info): add interface function to register + loongarch_d_handle_target_float_abi function. + * config/loongarch/loongarch-d.h + (loongarch_d_target_versions): add function prototype. + (loongarch_d_register_target_info): Likewise. + +libphobos/ChangeLog: + + * configure.tgt: Enable libphobos for LoongArch architecture. + * libdruntime/gcc/sections/elf.d: Add TLS_DTV_OFFSET constant for + LoongArch64. + * libdruntime/gcc/unwind/generic.d: Add __aligned__ constant for + LoongArch64. +--- + gcc/config.gcc | 1 + + gcc/config/loongarch/loongarch-d.cc | 77 ++++++++++++++++++++++ + gcc/config/loongarch/loongarch-d.h | 26 ++++++++ + gcc/config/loongarch/t-loongarch | 4 ++ + libphobos/configure.tgt | 3 + + libphobos/libdruntime/gcc/sections/elf.d | 2 + + libphobos/libdruntime/gcc/unwind/generic.d | 1 + + 7 files changed, 114 insertions(+) + create mode 100644 gcc/config/loongarch/loongarch-d.cc + create mode 100644 gcc/config/loongarch/loongarch-d.h + +diff --git a/gcc/config.gcc b/gcc/config.gcc +index 11ab620d0..039187fa2 100644 +--- a/gcc/config.gcc ++++ b/gcc/config.gcc +@@ -456,6 +456,7 @@ mips*-*-*) + ;; + loongarch*-*-*) + cpu_type=loongarch ++ d_target_objs="loongarch-d.o" + extra_headers="larchintrin.h lsxintrin.h lasxintrin.h" + extra_objs="loongarch-c.o loongarch-builtins.o loongarch-cpu.o loongarch-opts.o loongarch-def.o" + extra_gcc_objs="loongarch-driver.o loongarch-cpu.o loongarch-opts.o loongarch-def.o" +diff --git a/gcc/config/loongarch/loongarch-d.cc b/gcc/config/loongarch/loongarch-d.cc +new file mode 100644 +index 000000000..9ac483c39 +--- /dev/null ++++ b/gcc/config/loongarch/loongarch-d.cc +@@ -0,0 +1,77 @@ ++/* Subroutines for the D front end on the LoongArch architecture. ++ Copyright (C) 2023 Free Software Foundation, Inc. ++ ++GCC is free software; you can redistribute it and/or modify ++it under the terms of the GNU General Public License as published by ++the Free Software Foundation; either version 3, or (at your option) ++any later version. ++ ++GCC is distributed in the hope that it will be useful, ++but WITHOUT ANY WARRANTY; without even the implied warranty of ++MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++GNU General Public License for more details. ++ ++You should have received a copy of the GNU General Public License ++along with GCC; see the file COPYING3. If not see ++<http://www.gnu.org/licenses/>. */ ++ ++#define IN_TARGET_CODE 1 ++ ++#include "config.h" ++#include "system.h" ++#include "coretypes.h" ++#include "tm_d.h" ++#include "d/d-target.h" ++#include "d/d-target-def.h" ++ ++/* Implement TARGET_D_CPU_VERSIONS for LoongArch targets. */ ++ ++void ++loongarch_d_target_versions (void) ++{ ++ if (TARGET_64BIT) ++ d_add_builtin_version ("LoongArch64"); ++ else ++ d_add_builtin_version ("LoongArch32"); ++ ++ if (TARGET_HARD_FLOAT_ABI) ++ { ++ d_add_builtin_version ("LoongArch_HardFloat"); ++ d_add_builtin_version ("D_HardFloat"); ++ } ++ else if (TARGET_SOFT_FLOAT_ABI) ++ { ++ d_add_builtin_version ("LoongArch_SoftFloat"); ++ d_add_builtin_version ("D_SoftFloat"); ++ } ++} ++ ++/* Handle a call to `__traits(getTargetInfo, "floatAbi")'. */ ++ ++static tree ++loongarch_d_handle_target_float_abi (void) ++{ ++ const char *abi; ++ ++ if (TARGET_HARD_FLOAT_ABI) ++ abi = "hard"; ++ else if (TARGET_SOFT_FLOAT_ABI) ++ abi = "soft"; ++ else ++ abi = ""; ++ ++ return build_string_literal (strlen (abi) + 1, abi); ++} ++ ++/* Implement TARGET_D_REGISTER_CPU_TARGET_INFO. */ ++ ++void ++loongarch_d_register_target_info (void) ++{ ++ const struct d_target_info_spec handlers = { ++ {"floatAbi", loongarch_d_handle_target_float_abi}, ++ {NULL, NULL}, ++ }; ++ ++ d_add_target_info_handlers (handlers); ++} +diff --git a/gcc/config/loongarch/loongarch-d.h b/gcc/config/loongarch/loongarch-d.h +new file mode 100644 +index 000000000..a2fb8d51d +--- /dev/null ++++ b/gcc/config/loongarch/loongarch-d.h +@@ -0,0 +1,26 @@ ++/* Definitions for the D front end on the LoongArch architecture. ++ Copyright (C) 2023 Free Software Foundation, Inc. ++ ++GCC is free software; you can redistribute it and/or modify ++it under the terms of the GNU General Public License as published by ++the Free Software Foundation; either version 3, or (at your option) ++any later version. ++ ++GCC is distributed in the hope that it will be useful, ++but WITHOUT ANY WARRANTY; without even the implied warranty of ++MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++GNU General Public License for more details. ++ ++You should have received a copy of the GNU General Public License ++along with GCC; see the file COPYING3. If not see ++<http://www.gnu.org/licenses/>. */ ++ ++/* Defined in loongarch-d.cc */ ++extern void ++loongarch_d_target_versions (void); ++extern void ++loongarch_d_register_target_info (void); ++ ++/* Target hooks for D language. */ ++#define TARGET_D_CPU_VERSIONS loongarch_d_target_versions ++#define TARGET_D_REGISTER_CPU_TARGET_INFO loongarch_d_register_target_info +diff --git a/gcc/config/loongarch/t-loongarch b/gcc/config/loongarch/t-loongarch +index a1a40431f..994f4d19c 100644 +--- a/gcc/config/loongarch/t-loongarch ++++ b/gcc/config/loongarch/t-loongarch +@@ -67,6 +67,10 @@ loongarch-cpu.o: $(srcdir)/config/loongarch/loongarch-cpu.cc $(LA_STR_H) \ + loongarch-def.o: $(srcdir)/config/loongarch/loongarch-def.cc $(LA_STR_H) + $(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) $< + ++loongarch-d.o: $(srcdir)/config/loongarch/loongarch-d.cc ++ $(COMPILE) $< ++ $(POSTCOMPILE) ++ + $(srcdir)/config/loongarch/loongarch.opt: s-loongarch-opt ; @true + s-loongarch-opt: $(srcdir)/config/loongarch/genopts/genstr.sh \ + $(srcdir)/config/loongarch/genopts/loongarch.opt.in \ +diff --git a/libphobos/configure.tgt b/libphobos/configure.tgt +index 0063dd232..dcb1551cd 100644 +--- a/libphobos/configure.tgt ++++ b/libphobos/configure.tgt +@@ -36,6 +36,9 @@ case "${target}" in + hppa-*-linux*) + LIBPHOBOS_SUPPORTED=yes + ;; ++ loongarch*-*-linux*) ++ LIBPHOBOS_SUPPORTED=yes ++ ;; + mips*-*-linux*) + LIBPHOBOS_SUPPORTED=yes + ;; +diff --git a/libphobos/libdruntime/gcc/sections/elf.d b/libphobos/libdruntime/gcc/sections/elf.d +index 5819811f3..bc993ea49 100644 +--- a/libphobos/libdruntime/gcc/sections/elf.d ++++ b/libphobos/libdruntime/gcc/sections/elf.d +@@ -1061,6 +1061,8 @@ else version (MIPS64) + enum TLS_DTV_OFFSET = 0x8000; + else version (IBMZ_Any) + enum TLS_DTV_OFFSET = 0x0; ++else version (LoongArch64) ++ enum TLS_DTV_OFFSET = 0x0; + else + static assert( false, "Platform not supported." ); + +diff --git a/libphobos/libdruntime/gcc/unwind/generic.d b/libphobos/libdruntime/gcc/unwind/generic.d +index 929b75dc7..8e5db80e1 100644 +--- a/libphobos/libdruntime/gcc/unwind/generic.d ++++ b/libphobos/libdruntime/gcc/unwind/generic.d +@@ -141,6 +141,7 @@ else version (SPARC64) private enum __aligned__ = 16; + else version (SystemZ) private enum __aligned__ = 8; + else version (X86) private enum __aligned__ = 16; + else version (X86_64) private enum __aligned__ = 16; ++else version (LoongArch64) private enum __aligned__ = 16; + else static assert( false, "Platform not supported."); + + align(__aligned__) struct _Unwind_Exception +-- +2.43.0 +
View file
_service:tar_scm:0074-libruntime-Add-fiber-context-switch-code-for-LoongAr.patch
Added
@@ -0,0 +1,156 @@ +From 29eade7dc3032c6054f2ec2e2caa4ce43da6212d Mon Sep 17 00:00:00 2001 +From: Yang Yujie <yangyujie@loongson.cn> +Date: Fri, 8 Dec 2023 18:09:41 +0800 +Subject: PATCH 074/188 libruntime: Add fiber context switch code for + LoongArch. + +libphobos/ChangeLog: + + * libdruntime/config/loongarch/switchcontext.S: New file. +--- + .../config/loongarch/switchcontext.S | 133 ++++++++++++++++++ + 1 file changed, 133 insertions(+) + create mode 100644 libphobos/libdruntime/config/loongarch/switchcontext.S + +diff --git a/libphobos/libdruntime/config/loongarch/switchcontext.S b/libphobos/libdruntime/config/loongarch/switchcontext.S +new file mode 100644 +index 000000000..edfb9b67e +--- /dev/null ++++ b/libphobos/libdruntime/config/loongarch/switchcontext.S +@@ -0,0 +1,133 @@ ++/* LoongArch support code for fibers and multithreading. ++ Copyright (C) 2023 Free Software Foundation, Inc. ++ ++This file is part of GCC. ++ ++GCC is free software; you can redistribute it and/or modify it under ++the terms of the GNU General Public License as published by the Free ++Software Foundation; either version 3, or (at your option) any later ++version. ++ ++GCC is distributed in the hope that it will be useful, but WITHOUT ANY ++WARRANTY; without even the implied warranty of MERCHANTABILITY or ++FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++for more details. ++ ++Under Section 7 of GPL version 3, you are granted additional ++permissions described in the GCC Runtime Library Exception, version ++3.1, as published by the Free Software Foundation. ++ ++You should have received a copy of the GNU General Public License and ++a copy of the GCC Runtime Library Exception along with this program; ++see the files COPYING3 and COPYING.RUNTIME respectively. If not, see ++<http://www.gnu.org/licenses/>. */ ++ ++#include "../common/threadasm.S" ++ ++/** ++ * Performs a context switch. ++ * ++ * $a0 - void** - ptr to old stack pointer ++ * $a1 - void* - new stack pointer ++ * ++ */ ++ ++#if defined(__loongarch_lp64) ++# define GPR_L ld.d ++# define GPR_S st.d ++# define SZ_GPR 8 ++# define ADDSP(si) addi.d $sp, $sp, si ++#elif defined(__loongarch64_ilp32) ++# define GPR_L ld.w ++# define GPR_S st.w ++# define SZ_GPR 4 ++# define ADDSP(si) addi.w $sp, $sp, si ++#else ++# error Unsupported GPR size (must be 64-bit or 32-bit). ++#endif ++ ++#if defined(__loongarch_double_float) ++# define FPR_L fld.d ++# define FPR_S fst.d ++# define SZ_FPR 8 ++#elif defined(__loongarch_single_float) ++# define FPR_L fld.s ++# define FPR_S fst.s ++# define SZ_FPR 4 ++#else ++# define SZ_FPR 0 ++#endif ++ ++ .text ++ .align 2 ++ .global fiber_switchContext ++ .type fiber_switchContext, @function ++fiber_switchContext: ++ .cfi_startproc ++ ADDSP(-11 * SZ_GPR) ++ ++ // fp regs and return address are stored below the stack ++ // because we don't want the GC to scan them. ++ ++ // return address (r1) ++ GPR_S $r1, $sp, -SZ_GPR ++ ++#if SZ_FPR != 0 ++ // callee-saved scratch FPRs (f24-f31) ++ FPR_S $f24, $sp, -SZ_GPR-1*SZ_FPR ++ FPR_S $f25, $sp, -SZ_GPR-2*SZ_FPR ++ FPR_S $f26, $sp, -SZ_GPR-3*SZ_FPR ++ FPR_S $f27, $sp, -SZ_GPR-4*SZ_FPR ++ FPR_S $f28, $sp, -SZ_GPR-5*SZ_FPR ++ FPR_S $f29, $sp, -SZ_GPR-6*SZ_FPR ++ FPR_S $f30, $sp, -SZ_GPR-7*SZ_FPR ++ FPR_S $f31, $sp, -SZ_GPR-8*SZ_FPR ++#endif ++ ++ // callee-saved GPRs (r21, fp (r22), r23-r31) ++ GPR_S $r21, $sp, 0*SZ_GPR ++ GPR_S $fp, $sp, 1*SZ_GPR ++ GPR_S $s0, $sp, 2*SZ_GPR ++ GPR_S $s1, $sp, 3*SZ_GPR ++ GPR_S $s2, $sp, 4*SZ_GPR ++ GPR_S $s3, $sp, 5*SZ_GPR ++ GPR_S $s4, $sp, 6*SZ_GPR ++ GPR_S $s5, $sp, 7*SZ_GPR ++ GPR_S $s6, $sp, 8*SZ_GPR ++ GPR_S $s7, $sp, 9*SZ_GPR ++ GPR_S $s8, $sp, 10*SZ_GPR ++ ++ // swap stack pointer ++ GPR_S $sp, $a0, 0 ++ move $sp, $a1 ++ ++ GPR_L $r1, $sp, -SZ_GPR ++ ++#if SZ_FPR != 0 ++ FPR_L $f24, $sp, -SZ_GPR-1*SZ_FPR ++ FPR_L $f25, $sp, -SZ_GPR-2*SZ_FPR ++ FPR_L $f26, $sp, -SZ_GPR-3*SZ_FPR ++ FPR_L $f27, $sp, -SZ_GPR-4*SZ_FPR ++ FPR_L $f28, $sp, -SZ_GPR-5*SZ_FPR ++ FPR_L $f29, $sp, -SZ_GPR-6*SZ_FPR ++ FPR_L $f30, $sp, -SZ_GPR-7*SZ_FPR ++ FPR_L $f31, $sp, -SZ_GPR-8*SZ_FPR ++#endif ++ ++ GPR_L $r21, $sp, 0*SZ_GPR ++ GPR_L $fp, $sp, 1*SZ_GPR ++ GPR_L $s0, $sp, 2*SZ_GPR ++ GPR_L $s1, $sp, 3*SZ_GPR ++ GPR_L $s2, $sp, 4*SZ_GPR ++ GPR_L $s3, $sp, 5*SZ_GPR ++ GPR_L $s4, $sp, 6*SZ_GPR ++ GPR_L $s5, $sp, 7*SZ_GPR ++ GPR_L $s6, $sp, 8*SZ_GPR ++ GPR_L $s7, $sp, 9*SZ_GPR ++ GPR_L $s8, $sp, 10*SZ_GPR ++ ++ ADDSP(11 * SZ_GPR) ++ ++ jr $r1 // return ++ .cfi_endproc ++ .size fiber_switchContext,.-fiber_switchContext +-- +2.43.0 +
View file
_service:tar_scm:0075-LoongArch-Fix-FP-vector-comparsons-PR113034.patch
Added
@@ -0,0 +1,866 @@ +From dd33794e64d462bf39e72f39343a384c191307f4 Mon Sep 17 00:00:00 2001 +From: Xi Ruoyao <xry111@xry111.site> +Date: Sun, 17 Dec 2023 01:09:20 +0800 +Subject: PATCH 075/188 LoongArch: Fix FP vector comparsons PR113034 + +We had the following mappings between <x>vfcmp submenmonics and RTX +codes: + + (define_code_attr fcc + (unordered "cun") + (ordered "cor") + (eq "ceq") + (ne "cne") + (uneq "cueq") + (unle "cule") + (unlt "cult") + (le "cle") + (lt "clt")) + +This is inconsistent with scalar code: + + (define_code_attr fcond (unordered "cun") + (uneq "cueq") + (unlt "cult") + (unle "cule") + (eq "ceq") + (lt "slt") + (le "sle") + (ordered "cor") + (ltgt "sne") + (ne "cune") + (ge "sge") + (gt "sgt") + (unge "cuge") + (ungt "cugt")) + +For every RTX code for which the LSX/LASX code is different from the +scalar code, the scalar code is correct and the LSX/LASX code is wrong. +Most seriously, the RTX code NE should be mapped to "cneq", not "cne". +Rewrite <x>vfcmp define_insns in simd.md using the same mapping as +scalar fcmp. + +Note that GAS does not support xvfcmp.{c/s}u{ge/gt} (pseudo) +instruction (although fcmp.{c/s}u{ge/gt} is supported), so we need to +switch the order of inputs and use xvfcmp.{c/s}u{le/lt} instead. + +The <x>vfcmp.{sult/sule/clt/cle}.{s/d} instructions do not have a single +RTX code, but they can be modeled as an inversed RTX code following a +"not" operation. Doing so allows the compiler to optimized vectorized +__builtin_isless etc. to a single instruction. This optimization should +be added for scalar code too and I'll do it later. + +Tests are added for mapping between C code, IEC 60559 operations, and +vfcmp instructions. + +1:https://gcc.gnu.org/pipermail/gcc-patches/2023-December/640713.html + +gcc/ChangeLog: + + PR target/113034 + * config/loongarch/lasx.md (UNSPEC_LASX_XVFCMP_*): Remove. + (lasx_xvfcmp_caf_<flasxfmt>): Remove. + (lasx_xvfcmp_cune_<FLASX:flasxfmt>): Remove. + (FSC256_UNS): Remove. + (fsc256): Remove. + (lasx_xvfcmp_<vfcond:fcc>_<FLASX:flasxfmt>): Remove. + (lasx_xvfcmp_<fsc256>_<FLASX:flasxfmt>): Remove. + * config/loongarch/lsx.md (UNSPEC_LSX_XVFCMP_*): Remove. + (lsx_vfcmp_caf_<flsxfmt>): Remove. + (lsx_vfcmp_cune_<FLSX:flsxfmt>): Remove. + (vfcond): Remove. + (fcc): Remove. + (FSC_UNS): Remove. + (fsc): Remove. + (lsx_vfcmp_<vfcond:fcc>_<FLSX:flsxfmt>): Remove. + (lsx_vfcmp_<fsc>_<FLSX:flsxfmt>): Remove. + * config/loongarch/simd.md + (fcond_simd): New define_code_iterator. + (<simd_isa>_<x>vfcmp_<fcond:fcond_simd>_<simdfmt>): + New define_insn. + (fcond_simd_rev): New define_code_iterator. + (fcond_rev_asm): New define_code_attr. + (<simd_isa>_<x>vfcmp_<fcond:fcond_simd_rev>_<simdfmt>): + New define_insn. + (fcond_inv): New define_code_iterator. + (fcond_inv_rev): New define_code_iterator. + (fcond_inv_rev_asm): New define_code_attr. + (<simd_isa>_<x>vfcmp_<fcond_inv>_<simdfmt>): New define_insn. + (<simd_isa>_<x>vfcmp_<fcond_inv:fcond_inv_rev>_<simdfmt>): + New define_insn. + (UNSPEC_SIMD_FCMP_CAF, UNSPEC_SIMD_FCMP_SAF, + UNSPEC_SIMD_FCMP_SEQ, UNSPEC_SIMD_FCMP_SUN, + UNSPEC_SIMD_FCMP_SUEQ, UNSPEC_SIMD_FCMP_CNE, + UNSPEC_SIMD_FCMP_SOR, UNSPEC_SIMD_FCMP_SUNE): New unspecs. + (SIMD_FCMP): New define_int_iterator. + (fcond_unspec): New define_int_attr. + (<simd_isa>_<x>vfcmp_<fcond_unspec>_<simdfmt>): New define_insn. + * config/loongarch/loongarch.cc (loongarch_expand_lsx_cmp): + Remove unneeded special cases. + +gcc/testsuite/ChangeLog: + + PR target/113034 + * gcc.target/loongarch/vfcmp-f.c: New test. + * gcc.target/loongarch/vfcmp-d.c: New test. + * gcc.target/loongarch/xvfcmp-f.c: New test. + * gcc.target/loongarch/xvfcmp-d.c: New test. + * gcc.target/loongarch/vector/lasx/lasx-vcond-2.c: Scan for cune + instead of cne. + * gcc.target/loongarch/vector/lsx/lsx-vcond-2.c: Likewise. +--- + gcc/config/loongarch/lasx.md | 76 -------- + gcc/config/loongarch/loongarch.cc | 60 +----- + gcc/config/loongarch/lsx.md | 83 -------- + gcc/config/loongarch/simd.md | 118 ++++++++++++ + .../loongarch/vector/lasx/lasx-vcond-2.c | 4 +- + .../loongarch/vector/lsx/lsx-vcond-2.c | 4 +- + gcc/testsuite/gcc.target/loongarch/vfcmp-d.c | 28 +++ + gcc/testsuite/gcc.target/loongarch/vfcmp-f.c | 178 ++++++++++++++++++ + gcc/testsuite/gcc.target/loongarch/xvfcmp-d.c | 29 +++ + gcc/testsuite/gcc.target/loongarch/xvfcmp-f.c | 27 +++ + 10 files changed, 385 insertions(+), 222 deletions(-) + create mode 100644 gcc/testsuite/gcc.target/loongarch/vfcmp-d.c + create mode 100644 gcc/testsuite/gcc.target/loongarch/vfcmp-f.c + create mode 100644 gcc/testsuite/gcc.target/loongarch/xvfcmp-d.c + create mode 100644 gcc/testsuite/gcc.target/loongarch/xvfcmp-f.c + +diff --git a/gcc/config/loongarch/lasx.md b/gcc/config/loongarch/lasx.md +index eeac8cd98..921ce0eeb 100644 +--- a/gcc/config/loongarch/lasx.md ++++ b/gcc/config/loongarch/lasx.md +@@ -32,9 +32,7 @@ + UNSPEC_LASX_XVBITREVI + UNSPEC_LASX_XVBITSET + UNSPEC_LASX_XVBITSETI +- UNSPEC_LASX_XVFCMP_CAF + UNSPEC_LASX_XVFCLASS +- UNSPEC_LASX_XVFCMP_CUNE + UNSPEC_LASX_XVFCVT + UNSPEC_LASX_XVFCVTH + UNSPEC_LASX_XVFCVTL +@@ -44,17 +42,6 @@ + UNSPEC_LASX_XVFRINT + UNSPEC_LASX_XVFRSQRT + UNSPEC_LASX_XVFRSQRTE +- UNSPEC_LASX_XVFCMP_SAF +- UNSPEC_LASX_XVFCMP_SEQ +- UNSPEC_LASX_XVFCMP_SLE +- UNSPEC_LASX_XVFCMP_SLT +- UNSPEC_LASX_XVFCMP_SNE +- UNSPEC_LASX_XVFCMP_SOR +- UNSPEC_LASX_XVFCMP_SUEQ +- UNSPEC_LASX_XVFCMP_SULE +- UNSPEC_LASX_XVFCMP_SULT +- UNSPEC_LASX_XVFCMP_SUN +- UNSPEC_LASX_XVFCMP_SUNE + UNSPEC_LASX_XVFTINT_U + UNSPEC_LASX_XVCLO + UNSPEC_LASX_XVSAT_S +@@ -1481,69 +1468,6 @@ + (set_attr "type" "simd_fclass") + (set_attr "mode" "<MODE>")) + +-(define_insn "lasx_xvfcmp_caf_<flasxfmt>" +- (set (match_operand:<VIMODE256> 0 "register_operand" "=f") +- (unspec:<VIMODE256> (match_operand:FLASX 1 "register_operand" "f") +- (match_operand:FLASX 2 "register_operand" "f") +- UNSPEC_LASX_XVFCMP_CAF)) +- "ISA_HAS_LASX" +- "xvfcmp.caf.<flasxfmt>\t%u0,%u1,%u2" +- (set_attr "type" "simd_fcmp") +- (set_attr "mode" "<MODE>")) +- +-(define_insn "lasx_xvfcmp_cune_<FLASX:flasxfmt>" +- (set (match_operand:<VIMODE256> 0 "register_operand" "=f") +- (unspec:<VIMODE256> (match_operand:FLASX 1 "register_operand" "f") +- (match_operand:FLASX 2 "register_operand" "f") +- UNSPEC_LASX_XVFCMP_CUNE)) +- "ISA_HAS_LASX" +- "xvfcmp.cune.<FLASX:flasxfmt>\t%u0,%u1,%u2" +- (set_attr "type" "simd_fcmp") +- (set_attr "mode" "<MODE>")) +- +- +- +-(define_int_iterator FSC256_UNS UNSPEC_LASX_XVFCMP_SAF UNSPEC_LASX_XVFCMP_SUN +- UNSPEC_LASX_XVFCMP_SOR UNSPEC_LASX_XVFCMP_SEQ +- UNSPEC_LASX_XVFCMP_SNE UNSPEC_LASX_XVFCMP_SUEQ +- UNSPEC_LASX_XVFCMP_SUNE UNSPEC_LASX_XVFCMP_SULE +- UNSPEC_LASX_XVFCMP_SULT UNSPEC_LASX_XVFCMP_SLE +- UNSPEC_LASX_XVFCMP_SLT) +- +-(define_int_attr fsc256 +- (UNSPEC_LASX_XVFCMP_SAF "saf") +- (UNSPEC_LASX_XVFCMP_SUN "sun") +- (UNSPEC_LASX_XVFCMP_SOR "sor") +- (UNSPEC_LASX_XVFCMP_SEQ "seq") +- (UNSPEC_LASX_XVFCMP_SNE "sne") +- (UNSPEC_LASX_XVFCMP_SUEQ "sueq") +- (UNSPEC_LASX_XVFCMP_SUNE "sune") +- (UNSPEC_LASX_XVFCMP_SULE "sule") +- (UNSPEC_LASX_XVFCMP_SULT "sult") +- (UNSPEC_LASX_XVFCMP_SLE "sle") +- (UNSPEC_LASX_XVFCMP_SLT "slt")) +- +-(define_insn "lasx_xvfcmp_<vfcond:fcc>_<FLASX:flasxfmt>" +- (set (match_operand:<VIMODE256> 0 "register_operand" "=f") +- (vfcond:<VIMODE256> (match_operand:FLASX 1 "register_operand" "f") +- (match_operand:FLASX 2 "register_operand" "f"))) +- "ISA_HAS_LASX" +- "xvfcmp.<vfcond:fcc>.<FLASX:flasxfmt>\t%u0,%u1,%u2" +- (set_attr "type" "simd_fcmp") +- (set_attr "mode" "<MODE>")) +- +- +-(define_insn "lasx_xvfcmp_<fsc256>_<FLASX:flasxfmt>" +- (set (match_operand:<VIMODE256> 0 "register_operand" "=f") +- (unspec:<VIMODE256> (match_operand:FLASX 1 "register_operand" "f") +- (match_operand:FLASX 2 "register_operand" "f") +- FSC256_UNS)) +- "ISA_HAS_LASX" +- "xvfcmp.<fsc256>.<FLASX:flasxfmt>\t%u0,%u1,%u2" +- (set_attr "type" "simd_fcmp") +- (set_attr "mode" "<MODE>")) +- +- + (define_mode_attr fint256 + (V8SF "v8si") + (V4DF "v4di")) +diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc +index afbb55390..a22601d88 100644 +--- a/gcc/config/loongarch/loongarch.cc ++++ b/gcc/config/loongarch/loongarch.cc +@@ -11156,7 +11156,6 @@ static void + loongarch_expand_lsx_cmp (rtx dest, enum rtx_code cond, rtx op0, rtx op1) + { + machine_mode cmp_mode = GET_MODE (op0); +- int unspec = -1; + bool negate = false; + + switch (cmp_mode) +@@ -11198,66 +11197,9 @@ loongarch_expand_lsx_cmp (rtx dest, enum rtx_code cond, rtx op0, rtx op1) + + case E_V4SFmode: + case E_V2DFmode: +- switch (cond) +- { +- case UNORDERED: +- case ORDERED: +- case EQ: +- case NE: +- case UNEQ: +- case UNLE: +- case UNLT: +- break; +- case LTGT: cond = NE; break; +- case UNGE: cond = UNLE; std::swap (op0, op1); break; +- case UNGT: cond = UNLT; std::swap (op0, op1); break; +- case LE: unspec = UNSPEC_LSX_VFCMP_SLE; break; +- case LT: unspec = UNSPEC_LSX_VFCMP_SLT; break; +- case GE: unspec = UNSPEC_LSX_VFCMP_SLE; std::swap (op0, op1); break; +- case GT: unspec = UNSPEC_LSX_VFCMP_SLT; std::swap (op0, op1); break; +- default: +- gcc_unreachable (); +- } +- if (unspec < 0) +- loongarch_emit_binary (cond, dest, op0, op1); +- else +- { +- rtx x = gen_rtx_UNSPEC (GET_MODE (dest), +- gen_rtvec (2, op0, op1), unspec); +- emit_insn (gen_rtx_SET (dest, x)); +- } +- break; +- + case E_V8SFmode: + case E_V4DFmode: +- switch (cond) +- { +- case UNORDERED: +- case ORDERED: +- case EQ: +- case NE: +- case UNEQ: +- case UNLE: +- case UNLT: +- break; +- case LTGT: cond = NE; break; +- case UNGE: cond = UNLE; std::swap (op0, op1); break; +- case UNGT: cond = UNLT; std::swap (op0, op1); break; +- case LE: unspec = UNSPEC_LASX_XVFCMP_SLE; break; +- case LT: unspec = UNSPEC_LASX_XVFCMP_SLT; break; +- case GE: unspec = UNSPEC_LASX_XVFCMP_SLE; std::swap (op0, op1); break; +- case GT: unspec = UNSPEC_LASX_XVFCMP_SLT; std::swap (op0, op1); break; +- default: +- gcc_unreachable (); +- } +- if (unspec < 0) +- loongarch_emit_binary (cond, dest, op0, op1); +- else +- { +- rtx x = gen_rtx_UNSPEC (GET_MODE (dest), +- gen_rtvec (2, op0, op1), unspec); +- emit_insn (gen_rtx_SET (dest, x)); +- } ++ loongarch_emit_binary (cond, dest, op0, op1); + break; + + default: +diff --git a/gcc/config/loongarch/lsx.md b/gcc/config/loongarch/lsx.md +index dbdb42301..57e0ee3d4 100644 +--- a/gcc/config/loongarch/lsx.md ++++ b/gcc/config/loongarch/lsx.md +@@ -34,9 +34,7 @@ + UNSPEC_LSX_VBITSETI + UNSPEC_LSX_BRANCH_V + UNSPEC_LSX_BRANCH +- UNSPEC_LSX_VFCMP_CAF + UNSPEC_LSX_VFCLASS +- UNSPEC_LSX_VFCMP_CUNE + UNSPEC_LSX_VFCVT + UNSPEC_LSX_VFCVTH + UNSPEC_LSX_VFCVTL +@@ -46,17 +44,6 @@ + UNSPEC_LSX_VFRINT + UNSPEC_LSX_VFRSQRT + UNSPEC_LSX_VFRSQRTE +- UNSPEC_LSX_VFCMP_SAF +- UNSPEC_LSX_VFCMP_SEQ +- UNSPEC_LSX_VFCMP_SLE +- UNSPEC_LSX_VFCMP_SLT +- UNSPEC_LSX_VFCMP_SNE +- UNSPEC_LSX_VFCMP_SOR +- UNSPEC_LSX_VFCMP_SUEQ +- UNSPEC_LSX_VFCMP_SULE +- UNSPEC_LSX_VFCMP_SULT +- UNSPEC_LSX_VFCMP_SUN +- UNSPEC_LSX_VFCMP_SUNE + UNSPEC_LSX_VFTINT_U + UNSPEC_LSX_VSAT_S + UNSPEC_LSX_VSAT_U +@@ -1377,76 +1364,6 @@ + (set_attr "type" "simd_fclass") + (set_attr "mode" "<MODE>")) + +-(define_insn "lsx_vfcmp_caf_<flsxfmt>" +- (set (match_operand:<VIMODE> 0 "register_operand" "=f") +- (unspec:<VIMODE> (match_operand:FLSX 1 "register_operand" "f") +- (match_operand:FLSX 2 "register_operand" "f") +- UNSPEC_LSX_VFCMP_CAF)) +- "ISA_HAS_LSX" +- "vfcmp.caf.<flsxfmt>\t%w0,%w1,%w2" +- (set_attr "type" "simd_fcmp") +- (set_attr "mode" "<MODE>")) +- +-(define_insn "lsx_vfcmp_cune_<FLSX:flsxfmt>" +- (set (match_operand:<VIMODE> 0 "register_operand" "=f") +- (unspec:<VIMODE> (match_operand:FLSX 1 "register_operand" "f") +- (match_operand:FLSX 2 "register_operand" "f") +- UNSPEC_LSX_VFCMP_CUNE)) +- "ISA_HAS_LSX" +- "vfcmp.cune.<FLSX:flsxfmt>\t%w0,%w1,%w2" +- (set_attr "type" "simd_fcmp") +- (set_attr "mode" "<MODE>")) +- +-(define_code_iterator vfcond unordered ordered eq ne le lt uneq unle unlt) +- +-(define_code_attr fcc +- (unordered "cun") +- (ordered "cor") +- (eq "ceq") +- (ne "cne") +- (uneq "cueq") +- (unle "cule") +- (unlt "cult") +- (le "cle") +- (lt "clt")) +- +-(define_int_iterator FSC_UNS UNSPEC_LSX_VFCMP_SAF UNSPEC_LSX_VFCMP_SUN UNSPEC_LSX_VFCMP_SOR +- UNSPEC_LSX_VFCMP_SEQ UNSPEC_LSX_VFCMP_SNE UNSPEC_LSX_VFCMP_SUEQ +- UNSPEC_LSX_VFCMP_SUNE UNSPEC_LSX_VFCMP_SULE UNSPEC_LSX_VFCMP_SULT +- UNSPEC_LSX_VFCMP_SLE UNSPEC_LSX_VFCMP_SLT) +- +-(define_int_attr fsc +- (UNSPEC_LSX_VFCMP_SAF "saf") +- (UNSPEC_LSX_VFCMP_SUN "sun") +- (UNSPEC_LSX_VFCMP_SOR "sor") +- (UNSPEC_LSX_VFCMP_SEQ "seq") +- (UNSPEC_LSX_VFCMP_SNE "sne") +- (UNSPEC_LSX_VFCMP_SUEQ "sueq") +- (UNSPEC_LSX_VFCMP_SUNE "sune") +- (UNSPEC_LSX_VFCMP_SULE "sule") +- (UNSPEC_LSX_VFCMP_SULT "sult") +- (UNSPEC_LSX_VFCMP_SLE "sle") +- (UNSPEC_LSX_VFCMP_SLT "slt")) +- +-(define_insn "lsx_vfcmp_<vfcond:fcc>_<FLSX:flsxfmt>" +- (set (match_operand:<VIMODE> 0 "register_operand" "=f") +- (vfcond:<VIMODE> (match_operand:FLSX 1 "register_operand" "f") +- (match_operand:FLSX 2 "register_operand" "f"))) +- "ISA_HAS_LSX" +- "vfcmp.<vfcond:fcc>.<FLSX:flsxfmt>\t%w0,%w1,%w2" +- (set_attr "type" "simd_fcmp") +- (set_attr "mode" "<MODE>")) +- +-(define_insn "lsx_vfcmp_<fsc>_<FLSX:flsxfmt>" +- (set (match_operand:<VIMODE> 0 "register_operand" "=f") +- (unspec:<VIMODE> (match_operand:FLSX 1 "register_operand" "f") +- (match_operand:FLSX 2 "register_operand" "f") +- FSC_UNS)) +- "ISA_HAS_LSX" +- "vfcmp.<fsc>.<FLSX:flsxfmt>\t%w0,%w1,%w2" +- (set_attr "type" "simd_fcmp") +- (set_attr "mode" "<MODE>")) +- + (define_mode_attr fint + (V4SF "v4si") + (V2DF "v2di")) +diff --git a/gcc/config/loongarch/simd.md b/gcc/config/loongarch/simd.md +index 843b1a41f..13202f79b 100644 +--- a/gcc/config/loongarch/simd.md ++++ b/gcc/config/loongarch/simd.md +@@ -279,6 +279,124 @@ + (set_attr "type" "simd_int_arith") + (set_attr "mode" "<MODE>")) + ++;; <x>vfcmp.*.{s/d} with defined RTX code ++;; There are no fcmp.{sugt/suge/cgt/cge}.{s/d} menmonics in GAS, so we have ++;; to reverse the operands ourselves :(. ++(define_code_iterator fcond_simd unordered uneq unlt unle eq lt le ++ ordered ltgt ne) ++(define_insn "<simd_isa>_<x>vfcmp_<fcond>_<simdfmt>" ++ (set (match_operand:<VIMODE> 0 "register_operand" "=f") ++ (fcond_simd:<VIMODE> ++ (match_operand:FVEC 1 "register_operand" "f") ++ (match_operand:FVEC 2 "register_operand" "f"))) ++ "" ++ "<x>vfcmp.<fcond>.<simdfmt>\t%<wu>0,%<wu>1,%<wu>2" ++ (set_attr "type" "simd_fcmp") ++ (set_attr "mode" "<MODE>")) ++ ++;; There are no fcmp.{sge/sgt/cuge/cugt}.{s/d} menmonics in GAS, so we have ++;; to reverse the operands ourselves. ++(define_code_iterator fcond_simd_rev ge gt unge ungt) ++ ++(define_code_attr fcond_rev_asm ++ (ge "sle") ++ (gt "slt") ++ (unge "cule") ++ (ungt "cult")) ++ ++(define_insn "<simd_isa>_<x>vfcmp_<fcond>_<simdfmt>" ++ (set (match_operand:<VIMODE> 0 "register_operand" "=f") ++ (fcond_simd_rev:<VIMODE> ++ (match_operand:FVEC 1 "register_operand" "f") ++ (match_operand:FVEC 2 "register_operand" "f"))) ++ "" ++ "<x>vfcmp.<fcond_rev_asm>.<simdfmt>\t%<wu>0,%<wu>2,%<wu>1"; ++ (set_attr "type" "simd_fcmp") ++ (set_attr "mode" "<MODE>")) ++ ++;; <x>vfcmp.*.{s/d} without defined RTX code, but with defined RTX code for ++;; its inverse. Again, there are no fcmp.{sugt/suge/cgt/cge}.{s/d} ++;; menmonics in GAS, so we have to reverse the operands ourselves. ++(define_code_iterator fcond_inv ge gt unge ungt) ++(define_code_iterator fcond_inv_rev le lt unle unlt) ++(define_code_attr fcond_inv ++ (ge "sult") ++ (gt "sule") ++ (unge "clt") ++ (ungt "cle") ++ (le "sugt") ++ (lt "suge") ++ (unle "cgt") ++ (unlt "cge")) ++(define_code_attr fcond_inv_rev_asm ++ (le "sult") ++ (lt "sule") ++ (unle "clt") ++ (unlt "cle")) ++ ++(define_insn "<simd_isa>_<x>vfcmp_<fcond_inv>_<simdfmt>" ++ (set (match_operand:<VIMODE> 0 "register_operand" "=f") ++ (not:<VIMODE> ++ (fcond_inv:<VIMODE> ++ (match_operand:FVEC 1 "register_operand" "f") ++ (match_operand:FVEC 2 "register_operand" "f")))) ++ "" ++ "<x>vfcmp.<fcond_inv>.<simdfmt>\t%<wu>0,%<wu>1,%<wu>2" ++ (set_attr "type" "simd_fcmp") ++ (set_attr "mode" "<MODE>")) ++ ++(define_insn "<simd_isa>_<x>vfcmp_<fcond_inv>_<simdfmt>" ++ (set (match_operand:<VIMODE> 0 "register_operand" "=f") ++ (not:<VIMODE> ++ (fcond_inv_rev:<VIMODE> ++ (match_operand:FVEC 1 "register_operand" "f") ++ (match_operand:FVEC 2 "register_operand" "f")))) ++ "" ++ "<x>vfcmp.<fcond_inv_rev_asm>.<simdfmt>\t%<wu>0,%<wu>2,%<wu>1" ++ (set_attr "type" "simd_fcmp") ++ (set_attr "mode" "<MODE>")) ++ ++;; <x>vfcmp.*.{s/d} instructions only as instrinsics ++(define_c_enum "unspec" ++ UNSPEC_SIMD_FCMP_CAF ++ UNSPEC_SIMD_FCMP_SAF ++ UNSPEC_SIMD_FCMP_SEQ ++ UNSPEC_SIMD_FCMP_SUN ++ UNSPEC_SIMD_FCMP_SUEQ ++ UNSPEC_SIMD_FCMP_CNE ++ UNSPEC_SIMD_FCMP_SOR ++ UNSPEC_SIMD_FCMP_SUNE) ++ ++(define_int_iterator SIMD_FCMP ++ UNSPEC_SIMD_FCMP_CAF ++ UNSPEC_SIMD_FCMP_SAF ++ UNSPEC_SIMD_FCMP_SEQ ++ UNSPEC_SIMD_FCMP_SUN ++ UNSPEC_SIMD_FCMP_SUEQ ++ UNSPEC_SIMD_FCMP_CNE ++ UNSPEC_SIMD_FCMP_SOR ++ UNSPEC_SIMD_FCMP_SUNE) ++ ++(define_int_attr fcond_unspec ++ (UNSPEC_SIMD_FCMP_CAF "caf") ++ (UNSPEC_SIMD_FCMP_SAF "saf") ++ (UNSPEC_SIMD_FCMP_SEQ "seq") ++ (UNSPEC_SIMD_FCMP_SUN "sun") ++ (UNSPEC_SIMD_FCMP_SUEQ "sueq") ++ (UNSPEC_SIMD_FCMP_CNE "cne") ++ (UNSPEC_SIMD_FCMP_SOR "sor") ++ (UNSPEC_SIMD_FCMP_SUNE "sune")) ++ ++(define_insn "<simd_isa>_<x>vfcmp_<fcond_unspec>_<simdfmt>" ++ (set (match_operand:<VIMODE> 0 "register_operand" "=f") ++ (unspec:<VIMODE> (match_operand:FVEC 1 "register_operand" "f") ++ (match_operand:FVEC 2 "register_operand" "f") ++ SIMD_FCMP)) ++ "" ++ "<x>vfcmp.<fcond_unspec>.<simdfmt>\t%<wu>0,%<wu>1,%<wu>2" ++ (set_attr "type" "simd_fcmp") ++ (set_attr "mode" "<MODE>")) ++ + ; The LoongArch SX Instructions. + (include "lsx.md") + +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-vcond-2.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-vcond-2.c +index 55d5a084c..f2f523622 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-vcond-2.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-vcond-2.c +@@ -69,8 +69,8 @@ TEST_CMP (nugt) + + /* { dg-final { scan-assembler-times {\txvfcmp\.ceq\.s} 3 } } */ + /* { dg-final { scan-assembler-times {\txvfcmp\.ceq\.d} 3 } } */ +-/* { dg-final { scan-assembler-times {\txvfcmp\.cne\.s} 3 } } */ +-/* { dg-final { scan-assembler-times {\txvfcmp\.cne\.d} 3 } } */ ++/* { dg-final { scan-assembler-times {\txvfcmp\.cune\.s} 3 } } */ ++/* { dg-final { scan-assembler-times {\txvfcmp\.cune\.d} 3 } } */ + /* { dg-final { scan-assembler-times {\txvfcmp\.slt\.s} 6 } } */ + /* { dg-final { scan-assembler-times {\txvfcmp\.slt\.d} 6 } } */ + /* { dg-final { scan-assembler-times {\txvfcmp\.sle\.s} 6 } } */ +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vcond-2.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vcond-2.c +index 2214afd0a..486bedba4 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vcond-2.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vcond-2.c +@@ -69,8 +69,8 @@ TEST_CMP (nugt) + + /* { dg-final { scan-assembler-times {\tvfcmp\.ceq\.s} 3 } } */ + /* { dg-final { scan-assembler-times {\tvfcmp\.ceq\.d} 3 } } */ +-/* { dg-final { scan-assembler-times {\tvfcmp\.cne\.s} 3 } } */ +-/* { dg-final { scan-assembler-times {\tvfcmp\.cne\.d} 3 } } */ ++/* { dg-final { scan-assembler-times {\tvfcmp\.cune\.s} 3 } } */ ++/* { dg-final { scan-assembler-times {\tvfcmp\.cune\.d} 3 } } */ + /* { dg-final { scan-assembler-times {\tvfcmp\.slt\.s} 6 } } */ + /* { dg-final { scan-assembler-times {\tvfcmp\.slt\.d} 6 } } */ + /* { dg-final { scan-assembler-times {\tvfcmp\.sle\.s} 6 } } */ +diff --git a/gcc/testsuite/gcc.target/loongarch/vfcmp-d.c b/gcc/testsuite/gcc.target/loongarch/vfcmp-d.c +new file mode 100644 +index 000000000..8b870ef38 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/vfcmp-d.c +@@ -0,0 +1,28 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -mlsx -ffixed-f0 -ffixed-f1 -ffixed-f2 -fno-vect-cost-model" } */ ++ ++#define F double ++#define I long long ++ ++#include "vfcmp-f.c" ++ ++/* { dg-final { scan-assembler "compare_quiet_equal:.*\tvfcmp\\.ceq\\.d\t\\\$vr2,\\\$vr0,\\\$vr1.*-compare_quiet_equal\n" } } */ ++/* { dg-final { scan-assembler "compare_quiet_not_equal:.*\tvfcmp\\.cune\\.d\t\\\$vr2,\\\$vr0,\\\$vr1.*-compare_quiet_not_equal\n" } } */ ++/* { dg-final { scan-assembler "compare_signaling_greater:.*\tvfcmp\\.slt\\.d\t\\\$vr2,\\\$vr1,\\\$vr0.*-compare_signaling_greater\n" } } */ ++/* { dg-final { scan-assembler "compare_signaling_greater_equal:.*\tvfcmp\\.sle\\.d\t\\\$vr2,\\\$vr1,\\\$vr0.*-compare_signaling_greater_equal\n" } } */ ++/* { dg-final { scan-assembler "compare_signaling_less:.*\tvfcmp\\.slt\\.d\t\\\$vr2,\\\$vr0,\\\$vr1.*-compare_signaling_less\n" } } */ ++/* { dg-final { scan-assembler "compare_signaling_less_equal:.*\tvfcmp\\.sle\\.d\t\\\$vr2,\\\$vr0,\\\$vr1.*-compare_signaling_less_equal\n" } } */ ++/* { dg-final { scan-assembler "compare_signaling_not_greater:.*\tvfcmp\\.sule\\.d\t\\\$vr2,\\\$vr0,\\\$vr1.*-compare_signaling_not_greater\n" } } */ ++/* { dg-final { scan-assembler "compare_signaling_less_unordered:.*\tvfcmp\\.sult\\.d\t\\\$vr2,\\\$vr0,\\\$vr1.*-compare_signaling_less_unordered\n" } } */ ++/* { dg-final { scan-assembler "compare_signaling_not_less:.*\tvfcmp\\.sule\\.d\t\\\$vr2,\\\$vr1,\\\$vr0.*-compare_signaling_not_less\n" } } */ ++/* { dg-final { scan-assembler "compare_signaling_greater_unordered:.*\tvfcmp\\.sult\\.d\t\\\$vr2,\\\$vr1,\\\$vr0.*-compare_signaling_greater_unordered\n" } } */ ++/* { dg-final { scan-assembler "compare_quiet_less:.*\tvfcmp\\.clt\\.d\t\\\$vr2,\\\$vr0,\\\$vr1.*-compare_quiet_less\n" } } */ ++/* { dg-final { scan-assembler "compare_quiet_less_equal:.*\tvfcmp\\.cle\\.d\t\\\$vr2,\\\$vr0,\\\$vr1.*-compare_quiet_less_equal\n" } } */ ++/* { dg-final { scan-assembler "compare_quiet_greater:.*\tvfcmp\\.clt\\.d\t\\\$vr2,\\\$vr1,\\\$vr0.*-compare_quiet_greater\n" } } */ ++/* { dg-final { scan-assembler "compare_quiet_greater_equal:.*\tvfcmp\\.cle\\.d\t\\\$vr2,\\\$vr1,\\\$vr0.*-compare_quiet_greater_equal\n" } } */ ++/* { dg-final { scan-assembler "compare_quiet_not_less:.*\tvfcmp\\.cule\\.d\t\\\$vr2,\\\$vr1,\\\$vr0.*-compare_quiet_not_less\n" } } */ ++/* { dg-final { scan-assembler "compare_quiet_greater_unordered:.*\tvfcmp\\.cult\\.d\t\\\$vr2,\\\$vr1,\\\$vr0.*-compare_quiet_greater_unordered\n" } } */ ++/* { dg-final { scan-assembler "compare_quiet_not_greater:.*\tvfcmp\\.cule\\.d\t\\\$vr2,\\\$vr0,\\\$vr1.*-compare_quiet_not_greater\n" } } */ ++/* { dg-final { scan-assembler "compare_quiet_less_unordered:.*\tvfcmp\\.cult\\.d\t\\\$vr2,\\\$vr0,\\\$vr1.*-compare_quiet_less_unordered\n" } } */ ++/* { dg-final { scan-assembler "compare_quiet_unordered:.*\tvfcmp\\.cun\\.d\t\\\$vr2,\\\$vr0,\\\$vr1.*-compare_quiet_unordered\n" } } */ ++/* { dg-final { scan-assembler "compare_quiet_ordered:.*\tvfcmp\\.cor\\.d\t\\\$vr2,\\\$vr0,\\\$vr1.*-compare_quiet_ordered\n" } } */ +diff --git a/gcc/testsuite/gcc.target/loongarch/vfcmp-f.c b/gcc/testsuite/gcc.target/loongarch/vfcmp-f.c +new file mode 100644 +index 000000000..b9110b90c +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/vfcmp-f.c +@@ -0,0 +1,178 @@ ++/* Test mapping IEC 60559 operations to SIMD instructions. ++ For details read C23 Annex F.3 and LoongArch Vol. 1 section 3.2.2.1. */ ++ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -mlsx -ffixed-f0 -ffixed-f1 -ffixed-f2 -fno-vect-cost-model" } */ ++ ++#ifndef F ++#define F float ++#endif ++ ++#ifndef I ++#define I int ++#endif ++ ++#ifndef VL ++#define VL 16 ++#endif ++ ++typedef F VF __attribute__ ((vector_size (VL))); ++typedef I VI __attribute__ ((vector_size (VL))); ++ ++register VF a asm ("f0"); ++register VF b asm ("f1"); ++register VI c asm ("f2"); ++ ++void ++compare_quiet_equal (void) ++{ ++ c = (a == b); ++} ++ ++void ++compare_quiet_not_equal (void) ++{ ++ c = (a != b); ++} ++ ++void ++compare_signaling_greater (void) ++{ ++ c = (a > b); ++} ++ ++void ++compare_signaling_greater_equal (void) ++{ ++ c = (a >= b); ++} ++ ++void ++compare_signaling_less (void) ++{ ++ c = (a < b); ++} ++ ++void ++compare_signaling_less_equal (void) ++{ ++ c = (a <= b); ++} ++ ++void ++compare_signaling_not_greater (void) ++{ ++ c = ~(a > b); ++} ++ ++void ++compare_signaling_less_unordered (void) ++{ ++ c = ~(a >= b); ++} ++ ++void ++compare_signaling_not_less (void) ++{ ++ c = ~(a < b); ++} ++ ++void ++compare_signaling_greater_unordered (void) ++{ ++ c = ~(a <= b); ++} ++ ++void ++compare_quiet_less (void) ++{ ++ for (int i = 0; i < sizeof (c) / sizeof (c0); i++) ++ ci = __builtin_isless (ai, bi) ? -1 : 0; ++} ++ ++void ++compare_quiet_less_equal (void) ++{ ++ for (int i = 0; i < sizeof (c) / sizeof (c0); i++) ++ ci = __builtin_islessequal (ai, bi) ? -1 : 0; ++} ++ ++void ++compare_quiet_greater (void) ++{ ++ for (int i = 0; i < sizeof (c) / sizeof (c0); i++) ++ ci = __builtin_isgreater (ai, bi) ? -1 : 0; ++} ++ ++void ++compare_quiet_greater_equal (void) ++{ ++ for (int i = 0; i < sizeof (c) / sizeof (c0); i++) ++ ci = __builtin_isgreaterequal (ai, bi) ? -1 : 0; ++} ++ ++void ++compare_quiet_not_less (void) ++{ ++ for (int i = 0; i < sizeof (c) / sizeof (c0); i++) ++ ci = __builtin_isless (ai, bi) ? 0 : -1; ++} ++ ++void ++compare_quiet_greater_unordered (void) ++{ ++ for (int i = 0; i < sizeof (c) / sizeof (c0); i++) ++ ci = __builtin_islessequal (ai, bi) ? 0 : -1; ++} ++ ++void ++compare_quiet_not_greater (void) ++{ ++ for (int i = 0; i < sizeof (c) / sizeof (c0); i++) ++ ci = __builtin_isgreater (ai, bi) ? 0 : -1; ++} ++ ++void ++compare_quiet_less_unordered (void) ++{ ++ for (int i = 0; i < sizeof (c) / sizeof (c0); i++) ++ ci = __builtin_isgreaterequal (ai, bi) ? 0 : -1; ++} ++ ++void ++compare_quiet_unordered (void) ++{ ++ for (int i = 0; i < sizeof (c) / sizeof (c0); i++) ++ ci = __builtin_isunordered (ai, bi) ? -1 : 0; ++} ++ ++void ++compare_quiet_ordered (void) ++{ ++ for (int i = 0; i < sizeof (c) / sizeof (c0); i++) ++ ci = __builtin_isunordered (ai, bi) ? 0 : -1; ++} ++ ++/* The "-<function_name>" matches the .size directive after the function ++ body, so we can ensure the instruction is in the correct function. */ ++ ++/* { dg-final { scan-assembler "compare_quiet_equal:.*\tvfcmp\\.ceq\\.s\t\\\$vr2,\\\$vr0,\\\$vr1.*-compare_quiet_equal\n" } } */ ++/* { dg-final { scan-assembler "compare_quiet_not_equal:.*\tvfcmp\\.cune\\.s\t\\\$vr2,\\\$vr0,\\\$vr1.*-compare_quiet_not_equal\n" } } */ ++/* { dg-final { scan-assembler "compare_signaling_greater:.*\tvfcmp\\.slt\\.s\t\\\$vr2,\\\$vr1,\\\$vr0.*-compare_signaling_greater\n" } } */ ++/* { dg-final { scan-assembler "compare_signaling_greater_equal:.*\tvfcmp\\.sle\\.s\t\\\$vr2,\\\$vr1,\\\$vr0.*-compare_signaling_greater_equal\n" } } */ ++/* { dg-final { scan-assembler "compare_signaling_less:.*\tvfcmp\\.slt\\.s\t\\\$vr2,\\\$vr0,\\\$vr1.*-compare_signaling_less\n" } } */ ++/* { dg-final { scan-assembler "compare_signaling_less_equal:.*\tvfcmp\\.sle\\.s\t\\\$vr2,\\\$vr0,\\\$vr1.*-compare_signaling_less_equal\n" } } */ ++/* { dg-final { scan-assembler "compare_signaling_not_greater:.*\tvfcmp\\.sule\\.s\t\\\$vr2,\\\$vr0,\\\$vr1.*-compare_signaling_not_greater\n" } } */ ++/* { dg-final { scan-assembler "compare_signaling_less_unordered:.*\tvfcmp\\.sult\\.s\t\\\$vr2,\\\$vr0,\\\$vr1.*-compare_signaling_less_unordered\n" } } */ ++/* { dg-final { scan-assembler "compare_signaling_not_less:.*\tvfcmp\\.sule\\.s\t\\\$vr2,\\\$vr1,\\\$vr0.*-compare_signaling_not_less\n" } } */ ++/* { dg-final { scan-assembler "compare_signaling_greater_unordered:.*\tvfcmp\\.sult\\.s\t\\\$vr2,\\\$vr1,\\\$vr0.*-compare_signaling_greater_unordered\n" } } */ ++/* { dg-final { scan-assembler "compare_quiet_less:.*\tvfcmp\\.clt\\.s\t\\\$vr2,\\\$vr0,\\\$vr1.*-compare_quiet_less\n" } } */ ++/* { dg-final { scan-assembler "compare_quiet_less_equal:.*\tvfcmp\\.cle\\.s\t\\\$vr2,\\\$vr0,\\\$vr1.*-compare_quiet_less_equal\n" } } */ ++/* { dg-final { scan-assembler "compare_quiet_greater:.*\tvfcmp\\.clt\\.s\t\\\$vr2,\\\$vr1,\\\$vr0.*-compare_quiet_greater\n" } } */ ++/* { dg-final { scan-assembler "compare_quiet_greater_equal:.*\tvfcmp\\.cle\\.s\t\\\$vr2,\\\$vr1,\\\$vr0.*-compare_quiet_greater_equal\n" } } */ ++/* { dg-final { scan-assembler "compare_quiet_not_less:.*\tvfcmp\\.cule\\.s\t\\\$vr2,\\\$vr1,\\\$vr0.*-compare_quiet_not_less\n" } } */ ++/* { dg-final { scan-assembler "compare_quiet_greater_unordered:.*\tvfcmp\\.cult\\.s\t\\\$vr2,\\\$vr1,\\\$vr0.*-compare_quiet_greater_unordered\n" } } */ ++/* { dg-final { scan-assembler "compare_quiet_not_greater:.*\tvfcmp\\.cule\\.s\t\\\$vr2,\\\$vr0,\\\$vr1.*-compare_quiet_not_greater\n" } } */ ++/* { dg-final { scan-assembler "compare_quiet_less_unordered:.*\tvfcmp\\.cult\\.s\t\\\$vr2,\\\$vr0,\\\$vr1.*-compare_quiet_less_unordered\n" } } */ ++/* { dg-final { scan-assembler "compare_quiet_unordered:.*\tvfcmp\\.cun\\.s\t\\\$vr2,\\\$vr0,\\\$vr1.*-compare_quiet_unordered\n" } } */ ++/* { dg-final { scan-assembler "compare_quiet_ordered:.*\tvfcmp\\.cor\\.s\t\\\$vr2,\\\$vr0,\\\$vr1.*-compare_quiet_ordered\n" } } */ +diff --git a/gcc/testsuite/gcc.target/loongarch/xvfcmp-d.c b/gcc/testsuite/gcc.target/loongarch/xvfcmp-d.c +new file mode 100644 +index 000000000..d8017caaa +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/xvfcmp-d.c +@@ -0,0 +1,29 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -mlasx -ffixed-f0 -ffixed-f1 -ffixed-f2 -fno-vect-cost-model" } */ ++ ++#define F double ++#define I long long ++#define VL 32 ++ ++#include "vfcmp-f.c" ++ ++/* { dg-final { scan-assembler "compare_quiet_equal:.*\txvfcmp\\.ceq\\.d\t\\\$xr2,\\\$xr0,\\\$xr1.*-compare_quiet_equal\n" } } */ ++/* { dg-final { scan-assembler "compare_quiet_not_equal:.*\txvfcmp\\.cune\\.d\t\\\$xr2,\\\$xr0,\\\$xr1.*-compare_quiet_not_equal\n" } } */ ++/* { dg-final { scan-assembler "compare_signaling_greater:.*\txvfcmp\\.slt\\.d\t\\\$xr2,\\\$xr1,\\\$xr0.*-compare_signaling_greater\n" } } */ ++/* { dg-final { scan-assembler "compare_signaling_greater_equal:.*\txvfcmp\\.sle\\.d\t\\\$xr2,\\\$xr1,\\\$xr0.*-compare_signaling_greater_equal\n" } } */ ++/* { dg-final { scan-assembler "compare_signaling_less:.*\txvfcmp\\.slt\\.d\t\\\$xr2,\\\$xr0,\\\$xr1.*-compare_signaling_less\n" } } */ ++/* { dg-final { scan-assembler "compare_signaling_less_equal:.*\txvfcmp\\.sle\\.d\t\\\$xr2,\\\$xr0,\\\$xr1.*-compare_signaling_less_equal\n" } } */ ++/* { dg-final { scan-assembler "compare_signaling_not_greater:.*\txvfcmp\\.sule\\.d\t\\\$xr2,\\\$xr0,\\\$xr1.*-compare_signaling_not_greater\n" } } */ ++/* { dg-final { scan-assembler "compare_signaling_less_unordered:.*\txvfcmp\\.sult\\.d\t\\\$xr2,\\\$xr0,\\\$xr1.*-compare_signaling_less_unordered\n" } } */ ++/* { dg-final { scan-assembler "compare_signaling_not_less:.*\txvfcmp\\.sule\\.d\t\\\$xr2,\\\$xr1,\\\$xr0.*-compare_signaling_not_less\n" } } */ ++/* { dg-final { scan-assembler "compare_signaling_greater_unordered:.*\txvfcmp\\.sult\\.d\t\\\$xr2,\\\$xr1,\\\$xr0.*-compare_signaling_greater_unordered\n" } } */ ++/* { dg-final { scan-assembler "compare_quiet_less:.*\txvfcmp\\.clt\\.d\t\\\$xr2,\\\$xr0,\\\$xr1.*-compare_quiet_less\n" } } */ ++/* { dg-final { scan-assembler "compare_quiet_less_equal:.*\txvfcmp\\.cle\\.d\t\\\$xr2,\\\$xr0,\\\$xr1.*-compare_quiet_less_equal\n" } } */ ++/* { dg-final { scan-assembler "compare_quiet_greater:.*\txvfcmp\\.clt\\.d\t\\\$xr2,\\\$xr1,\\\$xr0.*-compare_quiet_greater\n" } } */ ++/* { dg-final { scan-assembler "compare_quiet_greater_equal:.*\txvfcmp\\.cle\\.d\t\\\$xr2,\\\$xr1,\\\$xr0.*-compare_quiet_greater_equal\n" } } */ ++/* { dg-final { scan-assembler "compare_quiet_not_less:.*\txvfcmp\\.cule\\.d\t\\\$xr2,\\\$xr1,\\\$xr0.*-compare_quiet_not_less\n" } } */ ++/* { dg-final { scan-assembler "compare_quiet_greater_unordered:.*\txvfcmp\\.cult\\.d\t\\\$xr2,\\\$xr1,\\\$xr0.*-compare_quiet_greater_unordered\n" } } */ ++/* { dg-final { scan-assembler "compare_quiet_not_greater:.*\txvfcmp\\.cule\\.d\t\\\$xr2,\\\$xr0,\\\$xr1.*-compare_quiet_not_greater\n" } } */ ++/* { dg-final { scan-assembler "compare_quiet_less_unordered:.*\txvfcmp\\.cult\\.d\t\\\$xr2,\\\$xr0,\\\$xr1.*-compare_quiet_less_unordered\n" } } */ ++/* { dg-final { scan-assembler "compare_quiet_unordered:.*\txvfcmp\\.cun\\.d\t\\\$xr2,\\\$xr0,\\\$xr1.*-compare_quiet_unordered\n" } } */ ++/* { dg-final { scan-assembler "compare_quiet_ordered:.*\txvfcmp\\.cor\\.d\t\\\$xr2,\\\$xr0,\\\$xr1.*-compare_quiet_ordered\n" } } */ +diff --git a/gcc/testsuite/gcc.target/loongarch/xvfcmp-f.c b/gcc/testsuite/gcc.target/loongarch/xvfcmp-f.c +new file mode 100644 +index 000000000..b54556475 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/xvfcmp-f.c +@@ -0,0 +1,27 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -mlasx -ffixed-f0 -ffixed-f1 -ffixed-f2" } */ ++ ++#define VL 32 ++ ++#include "vfcmp-f.c" ++ ++/* { dg-final { scan-assembler "compare_quiet_equal:.*\txvfcmp\\.ceq\\.s\t\\\$xr2,\\\$xr0,\\\$xr1.*-compare_quiet_equal\n" } } */ ++/* { dg-final { scan-assembler "compare_quiet_not_equal:.*\txvfcmp\\.cune\\.s\t\\\$xr2,\\\$xr0,\\\$xr1.*-compare_quiet_not_equal\n" } } */ ++/* { dg-final { scan-assembler "compare_signaling_greater:.*\txvfcmp\\.slt\\.s\t\\\$xr2,\\\$xr1,\\\$xr0.*-compare_signaling_greater\n" } } */ ++/* { dg-final { scan-assembler "compare_signaling_greater_equal:.*\txvfcmp\\.sle\\.s\t\\\$xr2,\\\$xr1,\\\$xr0.*-compare_signaling_greater_equal\n" } } */ ++/* { dg-final { scan-assembler "compare_signaling_less:.*\txvfcmp\\.slt\\.s\t\\\$xr2,\\\$xr0,\\\$xr1.*-compare_signaling_less\n" } } */ ++/* { dg-final { scan-assembler "compare_signaling_less_equal:.*\txvfcmp\\.sle\\.s\t\\\$xr2,\\\$xr0,\\\$xr1.*-compare_signaling_less_equal\n" } } */ ++/* { dg-final { scan-assembler "compare_signaling_not_greater:.*\txvfcmp\\.sule\\.s\t\\\$xr2,\\\$xr0,\\\$xr1.*-compare_signaling_not_greater\n" } } */ ++/* { dg-final { scan-assembler "compare_signaling_less_unordered:.*\txvfcmp\\.sult\\.s\t\\\$xr2,\\\$xr0,\\\$xr1.*-compare_signaling_less_unordered\n" } } */ ++/* { dg-final { scan-assembler "compare_signaling_not_less:.*\txvfcmp\\.sule\\.s\t\\\$xr2,\\\$xr1,\\\$xr0.*-compare_signaling_not_less\n" } } */ ++/* { dg-final { scan-assembler "compare_signaling_greater_unordered:.*\txvfcmp\\.sult\\.s\t\\\$xr2,\\\$xr1,\\\$xr0.*-compare_signaling_greater_unordered\n" } } */ ++/* { dg-final { scan-assembler "compare_quiet_less:.*\txvfcmp\\.clt\\.s\t\\\$xr2,\\\$xr0,\\\$xr1.*-compare_quiet_less\n" } } */ ++/* { dg-final { scan-assembler "compare_quiet_less_equal:.*\txvfcmp\\.cle\\.s\t\\\$xr2,\\\$xr0,\\\$xr1.*-compare_quiet_less_equal\n" } } */ ++/* { dg-final { scan-assembler "compare_quiet_greater:.*\txvfcmp\\.clt\\.s\t\\\$xr2,\\\$xr1,\\\$xr0.*-compare_quiet_greater\n" } } */ ++/* { dg-final { scan-assembler "compare_quiet_greater_equal:.*\txvfcmp\\.cle\\.s\t\\\$xr2,\\\$xr1,\\\$xr0.*-compare_quiet_greater_equal\n" } } */ ++/* { dg-final { scan-assembler "compare_quiet_not_less:.*\txvfcmp\\.cule\\.s\t\\\$xr2,\\\$xr1,\\\$xr0.*-compare_quiet_not_less\n" } } */ ++/* { dg-final { scan-assembler "compare_quiet_greater_unordered:.*\txvfcmp\\.cult\\.s\t\\\$xr2,\\\$xr1,\\\$xr0.*-compare_quiet_greater_unordered\n" } } */ ++/* { dg-final { scan-assembler "compare_quiet_not_greater:.*\txvfcmp\\.cule\\.s\t\\\$xr2,\\\$xr0,\\\$xr1.*-compare_quiet_not_greater\n" } } */ ++/* { dg-final { scan-assembler "compare_quiet_less_unordered:.*\txvfcmp\\.cult\\.s\t\\\$xr2,\\\$xr0,\\\$xr1.*-compare_quiet_less_unordered\n" } } */ ++/* { dg-final { scan-assembler "compare_quiet_unordered:.*\txvfcmp\\.cun\\.s\t\\\$xr2,\\\$xr0,\\\$xr1.*-compare_quiet_unordered\n" } } */ ++/* { dg-final { scan-assembler "compare_quiet_ordered:.*\txvfcmp\\.cor\\.s\t\\\$xr2,\\\$xr0,\\\$xr1.*-compare_quiet_ordered\n" } } */ +-- +2.43.0 +
View file
_service:tar_scm:0076-LoongArch-Use-force_reg-instead-of-gen_reg_rtx-emit_.patch
Added
@@ -0,0 +1,190 @@ +From be149d7f6527df6b16f3f9f8aec1e488466a71f1 Mon Sep 17 00:00:00 2001 +From: Xi Ruoyao <xry111@xry111.site> +Date: Tue, 19 Dec 2023 04:48:03 +0800 +Subject: PATCH 076/188 LoongArch: Use force_reg instead of gen_reg_rtx + + emit_move_insn in vec_init expander PR113033 + +Jakub says: + + Then that seems like a bug in the loongarch vec_init pattern(s). + Those really don't have a predicate in any of the backends on the + input operand, so they need to force_reg it if it is something it + can't handle. I've looked e.g. at i386 vec_init and that is exactly + what it does, see the various tests + force_reg calls in + ix86_expand_vector_init*. + +So replace gen_reg_rtx + emit_move_insn with force_reg to fix PR 113033. + +gcc/ChangeLog: + + PR target/113033 + * config/loongarch/loongarch.cc + (loongarch_expand_vector_init_same): Replace gen_reg_rtx + + emit_move_insn with force_reg. + (loongarch_expand_vector_init): Likewise. + +gcc/testsuite/ChangeLog: + + PR target/113033 + * gcc.target/loongarch/pr113033.c: New test. +--- + gcc/config/loongarch/loongarch.cc | 38 ++++++------------- + gcc/testsuite/gcc.target/loongarch/pr113033.c | 23 +++++++++++ + 2 files changed, 35 insertions(+), 26 deletions(-) + create mode 100644 gcc/testsuite/gcc.target/loongarch/pr113033.c + +diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc +index a22601d88..000d2d623 100644 +--- a/gcc/config/loongarch/loongarch.cc ++++ b/gcc/config/loongarch/loongarch.cc +@@ -10745,7 +10745,7 @@ loongarch_expand_vector_init_same (rtx target, rtx vals, unsigned nvar) + gcc_unreachable (); + } + } +- temp = gen_reg_rtx (imode); ++ + if (imode == GET_MODE (same)) + temp2 = same; + else if (GET_MODE_SIZE (imode) >= UNITS_PER_WORD) +@@ -10770,7 +10770,8 @@ loongarch_expand_vector_init_same (rtx target, rtx vals, unsigned nvar) + else + temp2 = lowpart_subreg (imode, same, GET_MODE (same)); + } +- emit_move_insn (temp, temp2); ++ ++ temp = force_reg (imode, temp2); + + switch (vmode) + { +@@ -10992,35 +10993,29 @@ loongarch_expand_vector_init (rtx target, rtx vals) + to reduce the number of instructions. */ + if (i == 1) + { +- op0 = gen_reg_rtx (imode); +- emit_move_insn (op0, val_hi0); +- op1 = gen_reg_rtx (imode); +- emit_move_insn (op1, val_hi1); ++ op0 = force_reg (imode, val_hi0); ++ op1 = force_reg (imode, val_hi1); + emit_insn ( + loongarch_vec_repl2_256 (target_hi, op0, op1)); + } + else if (i > 1) + { +- op0 = gen_reg_rtx (imode); +- emit_move_insn (op0, val_hii); ++ op0 = force_reg (imode, val_hii); + emit_insn ( + loongarch_vec_set256 (target_hi, op0, GEN_INT (i))); + } + } + else + { ++ op0 = force_reg (imode, val_hii); + /* Assign the lowest element of val_hi to all elements + of target_hi. */ + if (i == 0) + { +- op0 = gen_reg_rtx (imode); +- emit_move_insn (op0, val_hi0); + emit_insn (loongarch_vec_repl1_256 (target_hi, op0)); + } + else if (!rtx_equal_p (val_hii, val_hi0)) + { +- op0 = gen_reg_rtx (imode); +- emit_move_insn (op0, val_hii); + emit_insn ( + loongarch_vec_set256 (target_hi, op0, GEN_INT (i))); + } +@@ -11028,18 +11023,15 @@ loongarch_expand_vector_init (rtx target, rtx vals) + } + if (!lo_same && !half_same) + { ++ op0 = force_reg (imode, val_loi); + /* Assign the lowest element of val_lo to all elements + of target_lo. */ + if (i == 0) + { +- op0 = gen_reg_rtx (imode); +- emit_move_insn (op0, val_lo0); + emit_insn (loongarch_vec_repl1_128 (target_lo, op0)); + } + else if (!rtx_equal_p (val_loi, val_lo0)) + { +- op0 = gen_reg_rtx (imode); +- emit_move_insn (op0, val_loi); + emit_insn ( + loongarch_vec_set128 (target_lo, op0, GEN_INT (i))); + } +@@ -11071,16 +11063,13 @@ loongarch_expand_vector_init (rtx target, rtx vals) + reduce the number of instructions. */ + if (i == 1) + { +- op0 = gen_reg_rtx (imode); +- emit_move_insn (op0, val0); +- op1 = gen_reg_rtx (imode); +- emit_move_insn (op1, val1); ++ op0 = force_reg (imode, val0); ++ op1 = force_reg (imode, val1); + emit_insn (loongarch_vec_repl2_128 (target, op0, op1)); + } + else if (i > 1) + { +- op0 = gen_reg_rtx (imode); +- emit_move_insn (op0, vali); ++ op0 = force_reg (imode, vali); + emit_insn ( + loongarch_vec_set128 (target, op0, GEN_INT (i))); + } +@@ -11093,18 +11082,15 @@ loongarch_expand_vector_init (rtx target, rtx vals) + loongarch_vec_mirror (target, target, const0_rtx)); + return; + } ++ op0 = force_reg (imode, vali); + /* Assign the lowest element of val to all elements of + target. */ + if (i == 0) + { +- op0 = gen_reg_rtx (imode); +- emit_move_insn (op0, val0); + emit_insn (loongarch_vec_repl1_128 (target, op0)); + } + else if (!rtx_equal_p (vali, val0)) + { +- op0 = gen_reg_rtx (imode); +- emit_move_insn (op0, vali); + emit_insn ( + loongarch_vec_set128 (target, op0, GEN_INT (i))); + } +diff --git a/gcc/testsuite/gcc.target/loongarch/pr113033.c b/gcc/testsuite/gcc.target/loongarch/pr113033.c +new file mode 100644 +index 000000000..4ccd037d8 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/pr113033.c +@@ -0,0 +1,23 @@ ++/* PR target/113033: ICE with vector left rotate */ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -mlasx" } */ ++ ++typedef unsigned __attribute__ ((vector_size (16))) v4si; ++typedef unsigned __attribute__ ((vector_size (32))) v8si; ++typedef unsigned long long __attribute__ ((vector_size (16))) v2di; ++typedef unsigned long long __attribute__ ((vector_size (32))) v4di; ++ ++#define TEST(tp) \ ++extern tp data_##tp; \ ++tp \ ++test_##tp (int x) \ ++{ \ ++ const int bit = sizeof (data_##tp0) * __CHAR_BIT__; \ ++ data_##tp = data_##tp << (x & (bit - 1)) \ ++ | data_##tp >> (bit - x & (bit - 1)); \ ++} ++ ++TEST (v4si) ++TEST (v8si) ++TEST (v2di) ++TEST (v4di) +-- +2.43.0 +
View file
_service:tar_scm:0077-LoongArch-Clean-up-vec_init-expander.patch
Added
@@ -0,0 +1,83 @@ +From 38438021c770f077b78092299f22712fdd734814 Mon Sep 17 00:00:00 2001 +From: Xi Ruoyao <xry111@xry111.site> +Date: Tue, 19 Dec 2023 05:02:42 +0800 +Subject: PATCH 077/188 LoongArch: Clean up vec_init expander + +Non functional change, clean up the code. + +gcc/ChangeLog: + + * config/loongarch/loongarch.cc + (loongarch_expand_vector_init_same): Remove "temp2" and reuse + "temp" instead. + (loongarch_expand_vector_init): Use gcc_unreachable () instead + of gcc_assert (0), and fix the comment for it. +--- + gcc/config/loongarch/loongarch.cc | 18 +++++++++--------- + 1 file changed, 9 insertions(+), 9 deletions(-) + +diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc +index 000d2d623..3aeafeafd 100644 +--- a/gcc/config/loongarch/loongarch.cc ++++ b/gcc/config/loongarch/loongarch.cc +@@ -10723,7 +10723,7 @@ loongarch_expand_vector_init_same (rtx target, rtx vals, unsigned nvar) + machine_mode vmode = GET_MODE (target); + machine_mode imode = GET_MODE_INNER (vmode); + rtx same = XVECEXP (vals, 0, 0); +- rtx temp, temp2; ++ rtx temp; + + if (CONST_INT_P (same) && nvar == 0 + && loongarch_signed_immediate_p (INTVAL (same), 10, 0)) +@@ -10747,17 +10747,17 @@ loongarch_expand_vector_init_same (rtx target, rtx vals, unsigned nvar) + } + + if (imode == GET_MODE (same)) +- temp2 = same; ++ temp = same; + else if (GET_MODE_SIZE (imode) >= UNITS_PER_WORD) + { + if (GET_CODE (same) == MEM) + { + rtx reg_tmp = gen_reg_rtx (GET_MODE (same)); + loongarch_emit_move (reg_tmp, same); +- temp2 = simplify_gen_subreg (imode, reg_tmp, GET_MODE (reg_tmp), 0); ++ temp = simplify_gen_subreg (imode, reg_tmp, GET_MODE (reg_tmp), 0); + } + else +- temp2 = simplify_gen_subreg (imode, same, GET_MODE (same), 0); ++ temp = simplify_gen_subreg (imode, same, GET_MODE (same), 0); + } + else + { +@@ -10765,13 +10765,13 @@ loongarch_expand_vector_init_same (rtx target, rtx vals, unsigned nvar) + { + rtx reg_tmp = gen_reg_rtx (GET_MODE (same)); + loongarch_emit_move (reg_tmp, same); +- temp2 = lowpart_subreg (imode, reg_tmp, GET_MODE (reg_tmp)); ++ temp = lowpart_subreg (imode, reg_tmp, GET_MODE (reg_tmp)); + } + else +- temp2 = lowpart_subreg (imode, same, GET_MODE (same)); ++ temp = lowpart_subreg (imode, same, GET_MODE (same)); + } + +- temp = force_reg (imode, temp2); ++ temp = force_reg (imode, temp); + + switch (vmode) + { +@@ -11117,8 +11117,8 @@ loongarch_expand_vector_init (rtx target, rtx vals) + return; + } + +- /* Loongson is the only cpu with vectors with more elements. */ +- gcc_assert (0); ++ /* No LoongArch CPU supports vectors with more elements as at now. */ ++ gcc_unreachable (); + } + + /* Implement HARD_REGNO_CALLER_SAVE_MODE. */ +-- +2.43.0 +
View file
_service:tar_scm:0078-LoongArch-Fix-incorrect-code-generation-for-sad-patt.patch
Added
@@ -0,0 +1,78 @@ +From e5c0e4b416b8628585e27b524ba524261cacf713 Mon Sep 17 00:00:00 2001 +From: Jiahao Xu <xujiahao@loongson.cn> +Date: Thu, 14 Dec 2023 20:49:04 +0800 +Subject: PATCH 078/188 LoongArch: Fix incorrect code generation for sad + pattern + +When I attempt to enable vect_usad_char effective target for LoongArch, slp-reduc-sad.c +and vect-reduc-sad*.c tests fail. These tests fail because the sad pattern generates bad +code. This patch to fixed them, for sad patterns, use zero expansion instead of sign +expansion for reduction. + +Currently, we are fixing failed vectorized tests, and in the future, we will +enable more tests of "vect" for LoongArch. + +gcc/ChangeLog: + + * config/loongarch/lasx.md: Use zero expansion instruction. + * config/loongarch/lsx.md: Ditto. +--- + gcc/config/loongarch/lasx.md | 8 ++++---- + gcc/config/loongarch/lsx.md | 8 ++++---- + 2 files changed, 8 insertions(+), 8 deletions(-) + +diff --git a/gcc/config/loongarch/lasx.md b/gcc/config/loongarch/lasx.md +index 921ce0eeb..9ca3f9278 100644 +--- a/gcc/config/loongarch/lasx.md ++++ b/gcc/config/loongarch/lasx.md +@@ -5021,8 +5021,8 @@ + rtx t2 = gen_reg_rtx (V16HImode); + rtx t3 = gen_reg_rtx (V8SImode); + emit_insn (gen_lasx_xvabsd_u_bu (t1, operands1, operands2)); +- emit_insn (gen_lasx_xvhaddw_h_b (t2, t1, t1)); +- emit_insn (gen_lasx_xvhaddw_w_h (t3, t2, t2)); ++ emit_insn (gen_lasx_xvhaddw_hu_bu (t2, t1, t1)); ++ emit_insn (gen_lasx_xvhaddw_wu_hu (t3, t2, t2)); + emit_insn (gen_addv8si3 (operands0, t3, operands3)); + DONE; + }) +@@ -5038,8 +5038,8 @@ + rtx t2 = gen_reg_rtx (V16HImode); + rtx t3 = gen_reg_rtx (V8SImode); + emit_insn (gen_lasx_xvabsd_s_b (t1, operands1, operands2)); +- emit_insn (gen_lasx_xvhaddw_h_b (t2, t1, t1)); +- emit_insn (gen_lasx_xvhaddw_w_h (t3, t2, t2)); ++ emit_insn (gen_lasx_xvhaddw_hu_bu (t2, t1, t1)); ++ emit_insn (gen_lasx_xvhaddw_wu_hu (t3, t2, t2)); + emit_insn (gen_addv8si3 (operands0, t3, operands3)); + DONE; + }) +diff --git a/gcc/config/loongarch/lsx.md b/gcc/config/loongarch/lsx.md +index 57e0ee3d4..7f5fff40a 100644 +--- a/gcc/config/loongarch/lsx.md ++++ b/gcc/config/loongarch/lsx.md +@@ -3385,8 +3385,8 @@ + rtx t2 = gen_reg_rtx (V8HImode); + rtx t3 = gen_reg_rtx (V4SImode); + emit_insn (gen_lsx_vabsd_u_bu (t1, operands1, operands2)); +- emit_insn (gen_lsx_vhaddw_h_b (t2, t1, t1)); +- emit_insn (gen_lsx_vhaddw_w_h (t3, t2, t2)); ++ emit_insn (gen_lsx_vhaddw_hu_bu (t2, t1, t1)); ++ emit_insn (gen_lsx_vhaddw_wu_hu (t3, t2, t2)); + emit_insn (gen_addv4si3 (operands0, t3, operands3)); + DONE; + }) +@@ -3402,8 +3402,8 @@ + rtx t2 = gen_reg_rtx (V8HImode); + rtx t3 = gen_reg_rtx (V4SImode); + emit_insn (gen_lsx_vabsd_s_b (t1, operands1, operands2)); +- emit_insn (gen_lsx_vhaddw_h_b (t2, t1, t1)); +- emit_insn (gen_lsx_vhaddw_w_h (t3, t2, t2)); ++ emit_insn (gen_lsx_vhaddw_hu_bu (t2, t1, t1)); ++ emit_insn (gen_lsx_vhaddw_wu_hu (t3, t2, t2)); + emit_insn (gen_addv4si3 (operands0, t3, operands3)); + DONE; + }) +-- +2.43.0 +
View file
_service:tar_scm:0079-LoongArch-Modify-the-check-type-of-the-vector-builti.patch
Added
@@ -0,0 +1,68 @@ +From bedb0338fadc373eeafc418a7bf6395d37eec78c Mon Sep 17 00:00:00 2001 +From: chenxiaolong <chenxiaolong@loongson.cn> +Date: Wed, 13 Dec 2023 09:31:07 +0800 +Subject: PATCH 079/188 LoongArch: Modify the check type of the vector + builtin function. + +On LoongArch architecture, using the latest gcc14 in regression test, +it is found that the vector test cases in vector directory appear FAIL +entries with unmatched pointer types. In order to solve this kind of +problem, the type of the variable in the check result is modified with +the parameter type defined in the vector builtin function. + +gcc/testsuite/ChangeLog: + + * gcc.target/loongarch/vector/simd_correctness_check.h:The variable + types in the check results are modified in conjunction with the + parameter types defined in the vector builtin function. +--- + .../loongarch/vector/simd_correctness_check.h | 13 +++++++------ + 1 file changed, 7 insertions(+), 6 deletions(-) + +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/simd_correctness_check.h b/gcc/testsuite/gcc.target/loongarch/vector/simd_correctness_check.h +index eb7fbd59c..551340bd5 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/simd_correctness_check.h ++++ b/gcc/testsuite/gcc.target/loongarch/vector/simd_correctness_check.h +@@ -8,11 +8,12 @@ + int fail = 0; \ + for (size_t i = 0; i < sizeof (res) / sizeof (res0); ++i) \ + { \ +- long *temp_ref = &refi, *temp_res = &resi; \ ++ long long *temp_ref = (long long *)&refi, \ ++ *temp_res = (long long *)&resi; \ + if (abs (*temp_ref - *temp_res) > 0) \ + { \ + printf (" error: %s at line %ld , expected " #ref \ +- "%ld:0x%lx, got: 0x%lx\n", \ ++ "%ld:0x%016lx, got: 0x%016lx\n", \ + __FILE__, line, i, *temp_ref, *temp_res); \ + fail = 1; \ + } \ +@@ -28,11 +29,11 @@ + int fail = 0; \ + for (size_t i = 0; i < sizeof (res) / sizeof (res0); ++i) \ + { \ +- int *temp_ref = &refi, *temp_res = &resi; \ ++ int *temp_ref = (int *)&refi, *temp_res = (int *)&resi; \ + if (abs (*temp_ref - *temp_res) > 0) \ + { \ + printf (" error: %s at line %ld , expected " #ref \ +- "%ld:0x%x, got: 0x%x\n", \ ++ "%ld:0x%08x, got: 0x%08x\n", \ + __FILE__, line, i, *temp_ref, *temp_res); \ + fail = 1; \ + } \ +@@ -47,8 +48,8 @@ + { \ + if (ref != res) \ + { \ +- printf (" error: %s at line %ld , expected %d, got %d\n", __FILE__, \ +- line, ref, res); \ ++ printf (" error: %s at line %ld , expected 0x:%016x", \ ++ "got 0x:%016x\n", __FILE__, line, ref, res); \ + } \ + } \ + while (0) +-- +2.43.0 +
View file
_service:tar_scm:0080-LoongArch-extend.texi-Fix-typos-in-LSX-intrinsics.patch
Added
@@ -0,0 +1,250 @@ +From 2e0092b20b845e0e301b1dab177b338e35981f10 Mon Sep 17 00:00:00 2001 +From: Jiajie Chen <c@jia.je> +Date: Wed, 13 Dec 2023 23:26:01 +0800 +Subject: PATCH 080/188 LoongArch: extend.texi: Fix typos in LSX intrinsics + +Several typos have been found and fixed: missing semicolons, using +variable name instead of type, duplicate functions and wrong types. + +gcc/ChangeLog: + + * doc/extend.texi(__lsx_vabsd_di): remove extra `i' in name. + (__lsx_vfrintrm_d, __lsx_vfrintrm_s, __lsx_vfrintrne_d, + __lsx_vfrintrne_s, __lsx_vfrintrp_d, __lsx_vfrintrp_s, __lsx_vfrintrz_d, + __lsx_vfrintrz_s): fix return types. + (__lsx_vld, __lsx_vldi, __lsx_vldrepl_b, __lsx_vldrepl_d, + __lsx_vldrepl_h, __lsx_vldrepl_w, __lsx_vmaxi_b, __lsx_vmaxi_d, + __lsx_vmaxi_h, __lsx_vmaxi_w, __lsx_vmini_b, __lsx_vmini_d, + __lsx_vmini_h, __lsx_vmini_w, __lsx_vsrani_d_q, __lsx_vsrarni_d_q, + __lsx_vsrlni_d_q, __lsx_vsrlrni_d_q, __lsx_vssrani_d_q, + __lsx_vssrarni_d_q, __lsx_vssrarni_du_q, __lsx_vssrlni_d_q, + __lsx_vssrlrni_du_q, __lsx_vst, __lsx_vstx, __lsx_vssrani_du_q, + __lsx_vssrlni_du_q, __lsx_vssrlrni_d_q): add missing semicolon. + (__lsx_vpickve2gr_bu, __lsx_vpickve2gr_hu): fix typo in return + type. + (__lsx_vstelm_b, __lsx_vstelm_d, __lsx_vstelm_h, + __lsx_vstelm_w): use imm type for the last argument. + (__lsx_vsigncov_b, __lsx_vsigncov_h, __lsx_vsigncov_w, + __lsx_vsigncov_d): remove duplicate definitions. +--- + gcc/doc/extend.texi | 90 ++++++++++++++++++++++----------------------- + 1 file changed, 43 insertions(+), 47 deletions(-) + +diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi +index bb042ae78..ac8da4e80 100644 +--- a/gcc/doc/extend.texi ++++ b/gcc/doc/extend.texi +@@ -16392,7 +16392,7 @@ int __lsx_bz_v (__m128i); + int __lsx_bz_w (__m128i); + __m128i __lsx_vabsd_b (__m128i, __m128i); + __m128i __lsx_vabsd_bu (__m128i, __m128i); +-__m128i __lsx_vabsd_di (__m128i, __m128i); ++__m128i __lsx_vabsd_d (__m128i, __m128i); + __m128i __lsx_vabsd_du (__m128i, __m128i); + __m128i __lsx_vabsd_h (__m128i, __m128i); + __m128i __lsx_vabsd_hu (__m128i, __m128i); +@@ -16598,14 +16598,14 @@ __m128 __lsx_vfnmsub_s (__m128, __m128, __m128); + __m128d __lsx_vfrecip_d (__m128d); + __m128 __lsx_vfrecip_s (__m128); + __m128d __lsx_vfrint_d (__m128d); +-__m128i __lsx_vfrintrm_d (__m128d); +-__m128i __lsx_vfrintrm_s (__m128); +-__m128i __lsx_vfrintrne_d (__m128d); +-__m128i __lsx_vfrintrne_s (__m128); +-__m128i __lsx_vfrintrp_d (__m128d); +-__m128i __lsx_vfrintrp_s (__m128); +-__m128i __lsx_vfrintrz_d (__m128d); +-__m128i __lsx_vfrintrz_s (__m128); ++__m128d __lsx_vfrintrm_d (__m128d); ++__m128 __lsx_vfrintrm_s (__m128); ++__m128d __lsx_vfrintrne_d (__m128d); ++__m128 __lsx_vfrintrne_s (__m128); ++__m128d __lsx_vfrintrp_d (__m128d); ++__m128 __lsx_vfrintrp_s (__m128); ++__m128d __lsx_vfrintrz_d (__m128d); ++__m128 __lsx_vfrintrz_s (__m128); + __m128 __lsx_vfrint_s (__m128); + __m128d __lsx_vfrsqrt_d (__m128d); + __m128 __lsx_vfrsqrt_s (__m128); +@@ -16674,12 +16674,12 @@ __m128i __lsx_vinsgr2vr_b (__m128i, int, imm0_15); + __m128i __lsx_vinsgr2vr_d (__m128i, long int, imm0_1); + __m128i __lsx_vinsgr2vr_h (__m128i, int, imm0_7); + __m128i __lsx_vinsgr2vr_w (__m128i, int, imm0_3); +-__m128i __lsx_vld (void *, imm_n2048_2047) +-__m128i __lsx_vldi (imm_n1024_1023) +-__m128i __lsx_vldrepl_b (void *, imm_n2048_2047) +-__m128i __lsx_vldrepl_d (void *, imm_n256_255) +-__m128i __lsx_vldrepl_h (void *, imm_n1024_1023) +-__m128i __lsx_vldrepl_w (void *, imm_n512_511) ++__m128i __lsx_vld (void *, imm_n2048_2047); ++__m128i __lsx_vldi (imm_n1024_1023); ++__m128i __lsx_vldrepl_b (void *, imm_n2048_2047); ++__m128i __lsx_vldrepl_d (void *, imm_n256_255); ++__m128i __lsx_vldrepl_h (void *, imm_n1024_1023); ++__m128i __lsx_vldrepl_w (void *, imm_n512_511); + __m128i __lsx_vldx (void *, long int); + __m128i __lsx_vmadd_b (__m128i, __m128i, __m128i); + __m128i __lsx_vmadd_d (__m128i, __m128i, __m128i); +@@ -16715,13 +16715,13 @@ __m128i __lsx_vmax_d (__m128i, __m128i); + __m128i __lsx_vmax_du (__m128i, __m128i); + __m128i __lsx_vmax_h (__m128i, __m128i); + __m128i __lsx_vmax_hu (__m128i, __m128i); +-__m128i __lsx_vmaxi_b (__m128i, imm_n16_15) ++__m128i __lsx_vmaxi_b (__m128i, imm_n16_15); + __m128i __lsx_vmaxi_bu (__m128i, imm0_31); +-__m128i __lsx_vmaxi_d (__m128i, imm_n16_15) ++__m128i __lsx_vmaxi_d (__m128i, imm_n16_15); + __m128i __lsx_vmaxi_du (__m128i, imm0_31); +-__m128i __lsx_vmaxi_h (__m128i, imm_n16_15) ++__m128i __lsx_vmaxi_h (__m128i, imm_n16_15); + __m128i __lsx_vmaxi_hu (__m128i, imm0_31); +-__m128i __lsx_vmaxi_w (__m128i, imm_n16_15) ++__m128i __lsx_vmaxi_w (__m128i, imm_n16_15); + __m128i __lsx_vmaxi_wu (__m128i, imm0_31); + __m128i __lsx_vmax_w (__m128i, __m128i); + __m128i __lsx_vmax_wu (__m128i, __m128i); +@@ -16731,13 +16731,13 @@ __m128i __lsx_vmin_d (__m128i, __m128i); + __m128i __lsx_vmin_du (__m128i, __m128i); + __m128i __lsx_vmin_h (__m128i, __m128i); + __m128i __lsx_vmin_hu (__m128i, __m128i); +-__m128i __lsx_vmini_b (__m128i, imm_n16_15) ++__m128i __lsx_vmini_b (__m128i, imm_n16_15); + __m128i __lsx_vmini_bu (__m128i, imm0_31); +-__m128i __lsx_vmini_d (__m128i, imm_n16_15) ++__m128i __lsx_vmini_d (__m128i, imm_n16_15); + __m128i __lsx_vmini_du (__m128i, imm0_31); +-__m128i __lsx_vmini_h (__m128i, imm_n16_15) ++__m128i __lsx_vmini_h (__m128i, imm_n16_15); + __m128i __lsx_vmini_hu (__m128i, imm0_31); +-__m128i __lsx_vmini_w (__m128i, imm_n16_15) ++__m128i __lsx_vmini_w (__m128i, imm_n16_15); + __m128i __lsx_vmini_wu (__m128i, imm0_31); + __m128i __lsx_vmin_w (__m128i, __m128i); + __m128i __lsx_vmin_wu (__m128i, __m128i); +@@ -16826,11 +16826,11 @@ __m128i __lsx_vpickod_d (__m128i, __m128i); + __m128i __lsx_vpickod_h (__m128i, __m128i); + __m128i __lsx_vpickod_w (__m128i, __m128i); + int __lsx_vpickve2gr_b (__m128i, imm0_15); +-unsinged int __lsx_vpickve2gr_bu (__m128i, imm0_15); ++unsigned int __lsx_vpickve2gr_bu (__m128i, imm0_15); + long int __lsx_vpickve2gr_d (__m128i, imm0_1); + unsigned long int __lsx_vpickve2gr_du (__m128i, imm0_1); + int __lsx_vpickve2gr_h (__m128i, imm0_7); +-unsinged int __lsx_vpickve2gr_hu (__m128i, imm0_7); ++unsigned int __lsx_vpickve2gr_hu (__m128i, imm0_7); + int __lsx_vpickve2gr_w (__m128i, imm0_3); + unsigned int __lsx_vpickve2gr_wu (__m128i, imm0_3); + __m128i __lsx_vreplgr2vr_b (int); +@@ -16893,10 +16893,6 @@ __m128i __lsx_vsigncov_b (__m128i, __m128i); + __m128i __lsx_vsigncov_d (__m128i, __m128i); + __m128i __lsx_vsigncov_h (__m128i, __m128i); + __m128i __lsx_vsigncov_w (__m128i, __m128i); +-__m128i __lsx_vsigncov_b (__m128i, __m128i); +-__m128i __lsx_vsigncov_d (__m128i, __m128i); +-__m128i __lsx_vsigncov_h (__m128i, __m128i); +-__m128i __lsx_vsigncov_w (__m128i, __m128i); + __m128i __lsx_vsle_b (__m128i, __m128i); + __m128i __lsx_vsle_bu (__m128i, __m128i); + __m128i __lsx_vsle_d (__m128i, __m128i); +@@ -16953,7 +16949,7 @@ __m128i __lsx_vsrai_w (__m128i, imm0_31); + __m128i __lsx_vsran_b_h (__m128i, __m128i); + __m128i __lsx_vsran_h_w (__m128i, __m128i); + __m128i __lsx_vsrani_b_h (__m128i, __m128i, imm0_15); +-__m128i __lsx_vsrani_d_q (__m128i, __m128i, imm0_127) ++__m128i __lsx_vsrani_d_q (__m128i, __m128i, imm0_127); + __m128i __lsx_vsrani_h_w (__m128i, __m128i, imm0_31); + __m128i __lsx_vsrani_w_d (__m128i, __m128i, imm0_63); + __m128i __lsx_vsran_w_d (__m128i, __m128i); +@@ -16967,7 +16963,7 @@ __m128i __lsx_vsrari_w (__m128i, imm0_31); + __m128i __lsx_vsrarn_b_h (__m128i, __m128i); + __m128i __lsx_vsrarn_h_w (__m128i, __m128i); + __m128i __lsx_vsrarni_b_h (__m128i, __m128i, imm0_15); +-__m128i __lsx_vsrarni_d_q (__m128i, __m128i, imm0_127) ++__m128i __lsx_vsrarni_d_q (__m128i, __m128i, imm0_127); + __m128i __lsx_vsrarni_h_w (__m128i, __m128i, imm0_31); + __m128i __lsx_vsrarni_w_d (__m128i, __m128i, imm0_63); + __m128i __lsx_vsrarn_w_d (__m128i, __m128i); +@@ -16983,7 +16979,7 @@ __m128i __lsx_vsrli_w (__m128i, imm0_31); + __m128i __lsx_vsrln_b_h (__m128i, __m128i); + __m128i __lsx_vsrln_h_w (__m128i, __m128i); + __m128i __lsx_vsrlni_b_h (__m128i, __m128i, imm0_15); +-__m128i __lsx_vsrlni_d_q (__m128i, __m128i, imm0_127) ++__m128i __lsx_vsrlni_d_q (__m128i, __m128i, imm0_127); + __m128i __lsx_vsrlni_h_w (__m128i, __m128i, imm0_31); + __m128i __lsx_vsrlni_w_d (__m128i, __m128i, imm0_63); + __m128i __lsx_vsrln_w_d (__m128i, __m128i); +@@ -16997,7 +16993,7 @@ __m128i __lsx_vsrlri_w (__m128i, imm0_31); + __m128i __lsx_vsrlrn_b_h (__m128i, __m128i); + __m128i __lsx_vsrlrn_h_w (__m128i, __m128i); + __m128i __lsx_vsrlrni_b_h (__m128i, __m128i, imm0_15); +-__m128i __lsx_vsrlrni_d_q (__m128i, __m128i, imm0_127) ++__m128i __lsx_vsrlrni_d_q (__m128i, __m128i, imm0_127); + __m128i __lsx_vsrlrni_h_w (__m128i, __m128i, imm0_31); + __m128i __lsx_vsrlrni_w_d (__m128i, __m128i, imm0_63); + __m128i __lsx_vsrlrn_w_d (__m128i, __m128i); +@@ -17009,8 +17005,8 @@ __m128i __lsx_vssran_hu_w (__m128i, __m128i); + __m128i __lsx_vssran_h_w (__m128i, __m128i); + __m128i __lsx_vssrani_b_h (__m128i, __m128i, imm0_15); + __m128i __lsx_vssrani_bu_h (__m128i, __m128i, imm0_15); +-__m128i __lsx_vssrani_d_q (__m128i, __m128i, imm0_127) +-__m128i __lsx_vssrani_du_q (__m128i, __m128i, imm0_127) ++__m128i __lsx_vssrani_d_q (__m128i, __m128i, imm0_127); ++__m128i __lsx_vssrani_du_q (__m128i, __m128i, imm0_127); + __m128i __lsx_vssrani_hu_w (__m128i, __m128i, imm0_31); + __m128i __lsx_vssrani_h_w (__m128i, __m128i, imm0_31); + __m128i __lsx_vssrani_w_d (__m128i, __m128i, imm0_63); +@@ -17023,8 +17019,8 @@ __m128i __lsx_vssrarn_hu_w (__m128i, __m128i); + __m128i __lsx_vssrarn_h_w (__m128i, __m128i); + __m128i __lsx_vssrarni_b_h (__m128i, __m128i, imm0_15); + __m128i __lsx_vssrarni_bu_h (__m128i, __m128i, imm0_15); +-__m128i __lsx_vssrarni_d_q (__m128i, __m128i, imm0_127) +-__m128i __lsx_vssrarni_du_q (__m128i, __m128i, imm0_127) ++__m128i __lsx_vssrarni_d_q (__m128i, __m128i, imm0_127); ++__m128i __lsx_vssrarni_du_q (__m128i, __m128i, imm0_127); + __m128i __lsx_vssrarni_hu_w (__m128i, __m128i, imm0_31); + __m128i __lsx_vssrarni_h_w (__m128i, __m128i, imm0_31); + __m128i __lsx_vssrarni_w_d (__m128i, __m128i, imm0_63); +@@ -17037,8 +17033,8 @@ __m128i __lsx_vssrln_hu_w (__m128i, __m128i); + __m128i __lsx_vssrln_h_w (__m128i, __m128i); + __m128i __lsx_vssrlni_b_h (__m128i, __m128i, imm0_15); + __m128i __lsx_vssrlni_bu_h (__m128i, __m128i, imm0_15); +-__m128i __lsx_vssrlni_d_q (__m128i, __m128i, imm0_127) +-__m128i __lsx_vssrlni_du_q (__m128i, __m128i, imm0_127) ++__m128i __lsx_vssrlni_d_q (__m128i, __m128i, imm0_127); ++__m128i __lsx_vssrlni_du_q (__m128i, __m128i, imm0_127); + __m128i __lsx_vssrlni_hu_w (__m128i, __m128i, imm0_31); + __m128i __lsx_vssrlni_h_w (__m128i, __m128i, imm0_31); + __m128i __lsx_vssrlni_w_d (__m128i, __m128i, imm0_63); +@@ -17051,8 +17047,8 @@ __m128i __lsx_vssrlrn_hu_w (__m128i, __m128i); + __m128i __lsx_vssrlrn_h_w (__m128i, __m128i); + __m128i __lsx_vssrlrni_b_h (__m128i, __m128i, imm0_15); + __m128i __lsx_vssrlrni_bu_h (__m128i, __m128i, imm0_15); +-__m128i __lsx_vssrlrni_d_q (__m128i, __m128i, imm0_127) +-__m128i __lsx_vssrlrni_du_q (__m128i, __m128i, imm0_127) ++__m128i __lsx_vssrlrni_d_q (__m128i, __m128i, imm0_127); ++__m128i __lsx_vssrlrni_du_q (__m128i, __m128i, imm0_127); + __m128i __lsx_vssrlrni_hu_w (__m128i, __m128i, imm0_31); + __m128i __lsx_vssrlrni_h_w (__m128i, __m128i, imm0_31); + __m128i __lsx_vssrlrni_w_d (__m128i, __m128i, imm0_63); +@@ -17067,12 +17063,12 @@ __m128i __lsx_vssub_h (__m128i, __m128i); + __m128i __lsx_vssub_hu (__m128i, __m128i); + __m128i __lsx_vssub_w (__m128i, __m128i); + __m128i __lsx_vssub_wu (__m128i, __m128i); +-void __lsx_vst (__m128i, void *, imm_n2048_2047) +-void __lsx_vstelm_b (__m128i, void *, imm_n128_127, idx); +-void __lsx_vstelm_d (__m128i, void *, imm_n128_127, idx); +-void __lsx_vstelm_h (__m128i, void *, imm_n128_127, idx); +-void __lsx_vstelm_w (__m128i, void *, imm_n128_127, idx); +-void __lsx_vstx (__m128i, void *, long int) ++void __lsx_vst (__m128i, void *, imm_n2048_2047); ++void __lsx_vstelm_b (__m128i, void *, imm_n128_127, imm0_15); ++void __lsx_vstelm_d (__m128i, void *, imm_n128_127, imm0_1); ++void __lsx_vstelm_h (__m128i, void *, imm_n128_127, imm0_7); ++void __lsx_vstelm_w (__m128i, void *, imm_n128_127, imm0_3); ++void __lsx_vstx (__m128i, void *, long int); + __m128i __lsx_vsub_b (__m128i, __m128i); + __m128i __lsx_vsub_d (__m128i, __m128i); + __m128i __lsx_vsub_h (__m128i, __m128i); +-- +2.43.0 +
View file
_service:tar_scm:0081-LoongArch-Fix-builtin-function-prototypes-for-LASX-i.patch
Added
@@ -0,0 +1,60 @@ +From d9965ed8d9f4244ac1948c6fb92c7c0f7d80b3a4 Mon Sep 17 00:00:00 2001 +From: chenxiaolong <chenxiaolong@loongson.cn> +Date: Tue, 19 Dec 2023 16:43:17 +0800 +Subject: PATCH 081/188 LoongArch: Fix builtin function prototypes for LASX + in doc. + +gcc/ChangeLog: + + * doc/extend.texi:According to the documents submitted earlier, + Two problems with function return types and using the actual types + of parameters instead of variable names were found and fixed. +--- + gcc/doc/extend.texi | 24 ++++++++++++------------ + 1 file changed, 12 insertions(+), 12 deletions(-) + +diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi +index ac8da4e80..c793c9c5d 100644 +--- a/gcc/doc/extend.texi ++++ b/gcc/doc/extend.texi +@@ -17438,14 +17438,14 @@ __m256 __lasx_xvfnmsub_s (__m256, __m256, __m256); + __m256d __lasx_xvfrecip_d (__m256d); + __m256 __lasx_xvfrecip_s (__m256); + __m256d __lasx_xvfrint_d (__m256d); +-__m256i __lasx_xvfrintrm_d (__m256d); +-__m256i __lasx_xvfrintrm_s (__m256); +-__m256i __lasx_xvfrintrne_d (__m256d); +-__m256i __lasx_xvfrintrne_s (__m256); +-__m256i __lasx_xvfrintrp_d (__m256d); +-__m256i __lasx_xvfrintrp_s (__m256); +-__m256i __lasx_xvfrintrz_d (__m256d); +-__m256i __lasx_xvfrintrz_s (__m256); ++__m256d __lasx_xvfrintrm_d (__m256d); ++__m256 __lasx_xvfrintrm_s (__m256); ++__m256d __lasx_xvfrintrne_d (__m256d); ++__m256 __lasx_xvfrintrne_s (__m256); ++__m256d __lasx_xvfrintrp_d (__m256d); ++__m256 __lasx_xvfrintrp_s (__m256); ++__m256d __lasx_xvfrintrz_d (__m256d); ++__m256 __lasx_xvfrintrz_s (__m256); + __m256 __lasx_xvfrint_s (__m256); + __m256d __lasx_xvfrsqrt_d (__m256d); + __m256 __lasx_xvfrsqrt_s (__m256); +@@ -17912,10 +17912,10 @@ __m256i __lasx_xvssub_hu (__m256i, __m256i); + __m256i __lasx_xvssub_w (__m256i, __m256i); + __m256i __lasx_xvssub_wu (__m256i, __m256i); + void __lasx_xvst (__m256i, void *, imm_n2048_2047); +-void __lasx_xvstelm_b (__m256i, void *, imm_n128_127, idx); +-void __lasx_xvstelm_d (__m256i, void *, imm_n128_127, idx); +-void __lasx_xvstelm_h (__m256i, void *, imm_n128_127, idx); +-void __lasx_xvstelm_w (__m256i, void *, imm_n128_127, idx); ++void __lasx_xvstelm_b (__m256i, void *, imm_n128_127, imm0_31); ++void __lasx_xvstelm_d (__m256i, void *, imm_n128_127, imm0_3); ++void __lasx_xvstelm_h (__m256i, void *, imm_n128_127, imm0_15); ++void __lasx_xvstelm_w (__m256i, void *, imm_n128_127, imm0_7); + void __lasx_xvstx (__m256i, void *, long int); + __m256i __lasx_xvsub_b (__m256i, __m256i); + __m256i __lasx_xvsub_d (__m256i, __m256i); +-- +2.43.0 +
View file
_service:tar_scm:0082-LoongArch-Add-asm-modifiers-to-the-LSX-and-LASX-dire.patch
Added
@@ -0,0 +1,92 @@ +From 48f0d47eb6dc2c799c845a25cfabd586bd176378 Mon Sep 17 00:00:00 2001 +From: chenxiaolong <chenxiaolong@loongson.cn> +Date: Tue, 5 Dec 2023 14:44:35 +0800 +Subject: PATCH 082/188 LoongArch: Add asm modifiers to the LSX and LASX + directives in the doc. + +gcc/ChangeLog: + + * doc/extend.texi:Add modifiers to the vector of asm in the doc. + * doc/md.texi:Refine the description of the modifier 'f' in the doc. +--- + gcc/doc/extend.texi | 46 +++++++++++++++++++++++++++++++++++++++++++++ + gcc/doc/md.texi | 2 +- + 2 files changed, 47 insertions(+), 1 deletion(-) + +diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi +index c793c9c5d..bcb9329c2 100644 +--- a/gcc/doc/extend.texi ++++ b/gcc/doc/extend.texi +@@ -11424,10 +11424,56 @@ The list below describes the supported modifiers and their effects for LoongArch + @item @code{d} @tab Same as @code{c}. + @item @code{i} @tab Print the character ''@code{i}'' if the operand is not a register. + @item @code{m} @tab Same as @code{c}, but the printed value is @code{operand - 1}. ++@item @code{u} @tab Print a LASX register. ++@item @code{w} @tab Print a LSX register. + @item @code{X} @tab Print a constant integer operand in hexadecimal. + @item @code{z} @tab Print the operand in its unmodified form, followed by a comma. + @end multitable + ++References to input and output operands in the assembler template of extended ++asm statements can use modifiers to affect the way the operands are formatted ++in the code output to the assembler. For example, the following code uses the ++'w' modifier for LoongArch: ++ ++@example ++test-asm.c: ++ ++#include <lsxintrin.h> ++ ++__m128i foo (void) ++@{ ++__m128i a,b,c; ++__asm__ ("vadd.d %w0,%w1,%w2\n\t" ++ :"=f" (c) ++ :"f" (a),"f" (b)); ++ ++return c; ++@} ++ ++@end example ++ ++@noindent ++The compile command for the test case is as follows: ++ ++@example ++gcc test-asm.c -mlsx -S -o test-asm.s ++@end example ++ ++@noindent ++The assembly statement produces the following assembly code: ++ ++@example ++vadd.d $vr0,$vr0,$vr1 ++@end example ++ ++This is a 128-bit vector addition instruction, @code{c} (referred to in the ++template string as %0) is the output, and @code{a} (%1) and @code{b} (%2) are ++the inputs. @code{__m128i} is a vector data type defined in the file ++@code{lsxintrin.h} (@xref{LoongArch SX Vector Intrinsics}). The symbol '=f' ++represents a constraint using a floating-point register as an output type, and ++the 'f' in the input operand represents a constraint using a floating-point ++register operand, which can refer to the definition of a constraint ++(@xref{Constraints}) in gcc. + + @lowersections + @include md.texi +diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi +index b58da0787..a2e839073 100644 +--- a/gcc/doc/md.texi ++++ b/gcc/doc/md.texi +@@ -2750,7 +2750,7 @@ $r1h + @item LoongArch---@file{config/loongarch/constraints.md} + @table @code + @item f +-A floating-point register (if available). ++A floating-point or vector register (if available). + @item k + A memory operand whose address is formed by a base register and + (optionally scaled) index register. +-- +2.43.0 +
View file
_service:tar_scm:0083-LoongArch-Implement-FCCmode-reload-and-cstore-ANYF-m.patch
Added
@@ -0,0 +1,392 @@ +From b199de440fc877efdd1dde90b5c1c5111e060c1b Mon Sep 17 00:00:00 2001 +From: Xi Ruoyao <xry111@xry111.site> +Date: Fri, 15 Dec 2023 01:49:40 +0800 +Subject: PATCH 083/188 LoongArch: Implement FCCmode reload and + cstore<ANYF:mode>4 + +We used a branch to load floating-point comparison results into GPR. +This is very slow when the branch is not predictable. + +Implement movfcc so we can reload FCCmode into GPRs, FPRs, and MEM. +Then implement cstore<ANYF:mode>4. + +gcc/ChangeLog: + + * config/loongarch/loongarch-tune.h + (loongarch_rtx_cost_data::movcf2gr): New field. + (loongarch_rtx_cost_data::movcf2gr_): New method. + (loongarch_rtx_cost_data::use_movcf2gr): New method. + * config/loongarch/loongarch-def.cc + (loongarch_rtx_cost_data::loongarch_rtx_cost_data): Set movcf2gr + to COSTS_N_INSNS (7) and movgr2cf to COSTS_N_INSNS (15), based + on timing on LA464. + (loongarch_cpu_rtx_cost_data): Set movcf2gr and movgr2cf to + COSTS_N_INSNS (1) for LA664. + (loongarch_rtx_cost_optimize_size): Set movcf2gr and movgr2cf to + COSTS_N_INSNS (1) + 1. + * config/loongarch/predicates.md (loongarch_fcmp_operator): New + predicate. + * config/loongarch/loongarch.md (movfcc): Change to + define_expand. + (movfcc_internal): New define_insn. + (fcc_to_<X:mode>): New define_insn. + (cstore<ANYF:mode>4): New define_expand. + * config/loongarch/loongarch.cc + (loongarch_hard_regno_mode_ok_uncached): Allow FCCmode in GPRs + and GPRs. + (loongarch_secondary_reload): Reload FCCmode via FPR and/or GPR. + (loongarch_emit_float_compare): Call gen_reg_rtx instead of + loongarch_allocate_fcc. + (loongarch_allocate_fcc): Remove. + (loongarch_move_to_gpr_cost): Handle FCC_REGS -> GR_REGS. + (loongarch_move_from_gpr_cost): Handle GR_REGS -> FCC_REGS. + (loongarch_register_move_cost): Handle FCC_REGS -> FCC_REGS, + FCC_REGS -> FP_REGS, and FP_REGS -> FCC_REGS. + +gcc/testsuite/ChangeLog: + + * gcc.target/loongarch/movcf2gr.c: New test. + * gcc.target/loongarch/movcf2gr-via-fr.c: New test. +--- + gcc/config/loongarch/loongarch-def.cc | 13 +++- + gcc/config/loongarch/loongarch-tune.h | 15 +++- + gcc/config/loongarch/loongarch.cc | 70 ++++++++++++------- + gcc/config/loongarch/loongarch.md | 69 ++++++++++++++++-- + gcc/config/loongarch/predicates.md | 4 ++ + .../gcc.target/loongarch/movcf2gr-via-fr.c | 10 +++ + gcc/testsuite/gcc.target/loongarch/movcf2gr.c | 9 +++ + 7 files changed, 157 insertions(+), 33 deletions(-) + create mode 100644 gcc/testsuite/gcc.target/loongarch/movcf2gr-via-fr.c + create mode 100644 gcc/testsuite/gcc.target/loongarch/movcf2gr.c + +diff --git a/gcc/config/loongarch/loongarch-def.cc b/gcc/config/loongarch/loongarch-def.cc +index 4a8885e83..843be78e4 100644 +--- a/gcc/config/loongarch/loongarch-def.cc ++++ b/gcc/config/loongarch/loongarch-def.cc +@@ -101,15 +101,21 @@ loongarch_rtx_cost_data::loongarch_rtx_cost_data () + int_mult_di (COSTS_N_INSNS (4)), + int_div_si (COSTS_N_INSNS (5)), + int_div_di (COSTS_N_INSNS (5)), ++ movcf2gr (COSTS_N_INSNS (7)), ++ movgr2cf (COSTS_N_INSNS (15)), + branch_cost (6), + memory_latency (4) {} + + /* The following properties cannot be looked up directly using "cpucfg". + So it is necessary to provide a default value for "unknown native" + tune targets (i.e. -mtune=native while PRID does not correspond to +- any known "-mtune" type). Currently all numbers are default. */ ++ any known "-mtune" type). */ + array_tune<loongarch_rtx_cost_data> loongarch_cpu_rtx_cost_data = +- array_tune<loongarch_rtx_cost_data> (); ++ array_tune<loongarch_rtx_cost_data> () ++ .set (CPU_LA664, ++ loongarch_rtx_cost_data () ++ .movcf2gr_ (COSTS_N_INSNS (1)) ++ .movgr2cf_ (COSTS_N_INSNS (1))); + + /* RTX costs to use when optimizing for size. + We use a value slightly larger than COSTS_N_INSNS (1) for all of them +@@ -125,7 +131,8 @@ const loongarch_rtx_cost_data loongarch_rtx_cost_optimize_size = + .int_mult_si_ (COST_COMPLEX_INSN) + .int_mult_di_ (COST_COMPLEX_INSN) + .int_div_si_ (COST_COMPLEX_INSN) +- .int_div_di_ (COST_COMPLEX_INSN); ++ .int_div_di_ (COST_COMPLEX_INSN) ++ .movcf2gr_ (COST_COMPLEX_INSN); + + array_tune<int> loongarch_cpu_issue_rate = array_tune<int> () + .set (CPU_NATIVE, 4) +diff --git a/gcc/config/loongarch/loongarch-tune.h b/gcc/config/loongarch/loongarch-tune.h +index 616b94e87..26f163f0a 100644 +--- a/gcc/config/loongarch/loongarch-tune.h ++++ b/gcc/config/loongarch/loongarch-tune.h +@@ -35,6 +35,8 @@ struct loongarch_rtx_cost_data + unsigned short int_mult_di; + unsigned short int_div_si; + unsigned short int_div_di; ++ unsigned short movcf2gr; ++ unsigned short movgr2cf; + unsigned short branch_cost; + unsigned short memory_latency; + +@@ -95,6 +97,18 @@ struct loongarch_rtx_cost_data + return *this; + } + ++ loongarch_rtx_cost_data movcf2gr_ (unsigned short _movcf2gr) ++ { ++ movcf2gr = _movcf2gr; ++ return *this; ++ } ++ ++ loongarch_rtx_cost_data movgr2cf_ (unsigned short _movgr2cf) ++ { ++ movgr2cf = _movgr2cf; ++ return *this; ++ } ++ + loongarch_rtx_cost_data branch_cost_ (unsigned short _branch_cost) + { + branch_cost = _branch_cost; +@@ -106,7 +120,6 @@ struct loongarch_rtx_cost_data + memory_latency = _memory_latency; + return *this; + } +- + }; + + /* Costs to use when optimizing for size. */ +diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc +index 3aeafeafd..56f631b1a 100644 +--- a/gcc/config/loongarch/loongarch.cc ++++ b/gcc/config/loongarch/loongarch.cc +@@ -5119,29 +5119,6 @@ loongarch_zero_if_equal (rtx cmp0, rtx cmp1) + OPTAB_DIRECT); + } + +-/* Allocate a floating-point condition-code register of mode MODE. */ +- +-static rtx +-loongarch_allocate_fcc (machine_mode mode) +-{ +- unsigned int regno, count; +- +- gcc_assert (TARGET_HARD_FLOAT); +- +- if (mode == FCCmode) +- count = 1; +- else +- gcc_unreachable (); +- +- cfun->machine->next_fcc += -cfun->machine->next_fcc & (count - 1); +- if (cfun->machine->next_fcc > FCC_REG_LAST - FCC_REG_FIRST) +- cfun->machine->next_fcc = 0; +- +- regno = FCC_REG_FIRST + cfun->machine->next_fcc; +- cfun->machine->next_fcc += count; +- return gen_rtx_REG (mode, regno); +-} +- + /* Sign- or zero-extend OP0 and OP1 for integer comparisons. */ + + static void +@@ -5256,7 +5233,7 @@ loongarch_emit_float_compare (enum rtx_code *code, rtx *op0, rtx *op1) + operands for FCMP.cond.fmt, instead a reversed condition code is + required and a test for false. */ + *code = NE; +- *op0 = loongarch_allocate_fcc (FCCmode); ++ *op0 = gen_reg_rtx (FCCmode); + + *op1 = const0_rtx; + loongarch_emit_binary (cmp_code, *op0, cmp_op0, cmp_op1); +@@ -6626,7 +6603,7 @@ loongarch_hard_regno_mode_ok_uncached (unsigned int regno, machine_mode mode) + enum mode_class mclass; + + if (mode == FCCmode) +- return FCC_REG_P (regno); ++ return FCC_REG_P (regno) || GP_REG_P (regno) || FP_REG_P (regno); + + size = GET_MODE_SIZE (mode); + mclass = GET_MODE_CLASS (mode); +@@ -6841,6 +6818,9 @@ loongarch_move_to_gpr_cost (reg_class_t from) + /* MOVFR2GR, etc. */ + return 4; + ++ case FCC_REGS: ++ return loongarch_cost->movcf2gr; ++ + default: + return 0; + } +@@ -6863,6 +6843,9 @@ loongarch_move_from_gpr_cost (reg_class_t to) + /* MOVGR2FR, etc. */ + return 4; + ++ case FCC_REGS: ++ return loongarch_cost->movgr2cf; ++ + default: + return 0; + } +@@ -6897,6 +6880,10 @@ loongarch_register_move_cost (machine_mode mode, reg_class_t from, + if (to == dregs) + return loongarch_move_to_gpr_cost (from); + ++ /* fcc -> fcc, fcc -> fpr, or fpr -> fcc. */ ++ if (from == FCC_REGS || to == FCC_REGS) ++ return COSTS_N_INSNS (from == to ? 2 : 1); ++ + /* Handles cases that require a GPR temporary. */ + cost1 = loongarch_move_to_gpr_cost (from); + if (cost1 != 0) +@@ -6933,6 +6920,39 @@ loongarch_secondary_reload (bool in_p ATTRIBUTE_UNUSED, rtx x, + + regno = true_regnum (x); + ++ if (mode == FCCmode) ++ { ++ if (reg_class_subset_p (rclass, FCC_REGS) && !FP_REG_P (regno)) ++ { ++ if (FCC_REG_P (regno)) ++ return FP_REGS; ++ ++ auto fn = in_p ? loongarch_move_from_gpr_cost ++ : loongarch_move_to_gpr_cost; ++ ++ if (fn (FCC_REGS) > fn (FP_REGS) + COSTS_N_INSNS (1)) ++ return FP_REGS; ++ ++ return GP_REG_P (regno) ? NO_REGS : GR_REGS; ++ } ++ ++ if (reg_class_subset_p (rclass, GR_REGS) && FCC_REG_P (regno)) ++ { ++ auto fn = in_p ? loongarch_move_to_gpr_cost ++ : loongarch_move_from_gpr_cost; ++ ++ if (fn (FCC_REGS) > fn (FP_REGS) + COSTS_N_INSNS (1)) ++ return FP_REGS; ++ ++ return NO_REGS; ++ } ++ ++ if (reg_class_subset_p (rclass, FP_REGS) && MEM_P (x)) ++ return GR_REGS; ++ ++ return NO_REGS; ++ } ++ + if (reg_class_subset_p (rclass, FP_REGS)) + { + if (regno < 0 +diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md +index 23368008e..6cf71d9e4 100644 +--- a/gcc/config/loongarch/loongarch.md ++++ b/gcc/config/loongarch/loongarch.md +@@ -2283,11 +2283,72 @@ + + ;; Clear one FCC register + +-(define_insn "movfcc" +- (set (match_operand:FCC 0 "register_operand" "=z") +- (const_int 0)) ++(define_expand "movfcc" ++ (set (match_operand:FCC 0 "") ++ (match_operand:FCC 1 "")) ++ "TARGET_HARD_FLOAT" ++{ ++ if (memory_operand (operands0, FCCmode) ++ && memory_operand (operands1, FCCmode)) ++ operands1 = force_reg (FCCmode, operands1); ++}) ++ ++(define_insn "movfcc_internal" ++ (set (match_operand:FCC 0 "nonimmediate_operand" ++ "=z,z,*f,*f,*r,*r,*m,*f,*r,z,*r") ++ (match_operand:FCC 1 "reg_or_0_operand" ++ "J,*f,z,*f,J*r,*m,J*r,J*r,*f,*r,z")) ++ "TARGET_HARD_FLOAT" ++ "@ ++ fcmp.caf.s\t%0,$f0,$f0 ++ movfr2cf\t%0,%1 ++ movcf2fr\t%0,%1 ++ fmov.s\t%0,%1 ++ or\t%0,%z1,$r0 ++ ld.b\t%0,%1 ++ st.b\t%z1,%0 ++ movgr2fr.w\t%0,%1 ++ movfr2gr.s\t%0,%1 ++ movgr2cf\t%0,%1 ++ movcf2gr\t%0,%1" ++ (set_attr "type" "move") ++ (set_attr "mode" "FCC")) ++ ++(define_insn "fcc_to_<X:mode>" ++ (set (match_operand:X 0 "register_operand" "=r") ++ (if_then_else:X (ne (match_operand:FCC 1 "register_operand" "0") ++ (const_int 0)) ++ (const_int 1) ++ (const_int 0))) ++ "TARGET_HARD_FLOAT" + "" +- "fcmp.caf.s\t%0,$f0,$f0") ++ (set_attr "length" "0") ++ (set_attr "type" "ghost")) ++ ++(define_expand "cstore<ANYF:mode>4" ++ (set (match_operand:SI 0 "register_operand") ++ (match_operator:SI 1 "loongarch_fcmp_operator" ++ (match_operand:ANYF 2 "register_operand") ++ (match_operand:ANYF 3 "register_operand"))) ++ "" ++ { ++ rtx fcc = gen_reg_rtx (FCCmode); ++ rtx cmp = gen_rtx_fmt_ee (GET_CODE (operands1), FCCmode, ++ operands2, operands3); ++ ++ emit_insn (gen_rtx_SET (fcc, cmp)); ++ if (TARGET_64BIT) ++ { ++ rtx gpr = gen_reg_rtx (DImode); ++ emit_insn (gen_fcc_to_di (gpr, fcc)); ++ emit_insn (gen_rtx_SET (operands0, ++ lowpart_subreg (SImode, gpr, DImode))); ++ } ++ else ++ emit_insn (gen_fcc_to_si (operands0, fcc)); ++ ++ DONE; ++ }) + + ;; Conditional move instructions. + +diff --git a/gcc/config/loongarch/predicates.md b/gcc/config/loongarch/predicates.md +index 88e54c915..58f9a7826 100644 +--- a/gcc/config/loongarch/predicates.md ++++ b/gcc/config/loongarch/predicates.md +@@ -590,6 +590,10 @@ + (define_predicate "loongarch_cstore_operator" + (match_code "ne,eq,gt,gtu,ge,geu,lt,ltu,le,leu")) + ++(define_predicate "loongarch_fcmp_operator" ++ (match_code ++ "unordered,uneq,unlt,unle,eq,lt,le,ordered,ltgt,ne,ge,gt,unge,ungt")) ++ + (define_predicate "small_data_pattern" + (and (match_code "set,parallel,unspec,unspec_volatile,prefetch") + (match_test "loongarch_small_data_pattern_p (op)"))) +diff --git a/gcc/testsuite/gcc.target/loongarch/movcf2gr-via-fr.c b/gcc/testsuite/gcc.target/loongarch/movcf2gr-via-fr.c +new file mode 100644 +index 000000000..23334a3a3 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/movcf2gr-via-fr.c +@@ -0,0 +1,10 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -march=loongarch64 -mtune=la464 -mabi=lp64d" } */ ++/* { dg-final { scan-assembler "movcf2fr\t\\\$f\0-9\+,\\\$fcc" } } */ ++/* { dg-final { scan-assembler "movfr2gr\\.s\t\\\$r4" } } */ ++ ++int ++t (float a, float b) ++{ ++ return a > b; ++} +diff --git a/gcc/testsuite/gcc.target/loongarch/movcf2gr.c b/gcc/testsuite/gcc.target/loongarch/movcf2gr.c +new file mode 100644 +index 000000000..d27c393b5 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/movcf2gr.c +@@ -0,0 +1,9 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -march=loongarch64 -mtune=la664 -mabi=lp64d" } */ ++/* { dg-final { scan-assembler "movcf2gr\t\\\$r4,\\\$fcc" } } */ ++ ++int ++t (float a, float b) ++{ ++ return a > b; ++} +-- +2.43.0 +
View file
_service:tar_scm:0084-LoongArch-Add-sign_extend-pattern-for-32-bit-rotate-.patch
Added
@@ -0,0 +1,69 @@ +From 8da6a317bc3ad64da8590649b83a841391f20438 Mon Sep 17 00:00:00 2001 +From: Xi Ruoyao <xry111@xry111.site> +Date: Sun, 17 Dec 2023 04:26:23 +0800 +Subject: PATCH 084/188 LoongArch: Add sign_extend pattern for 32-bit rotate + shift + +Remove a redundant sign extension. + +gcc/ChangeLog: + + * config/loongarch/loongarch.md (rotrsi3_extend): New + define_insn. + +gcc/testsuite/ChangeLog: + + * gcc.target/loongarch/rotrw.c: New test. +--- + gcc/config/loongarch/loongarch.md | 10 ++++++++++ + gcc/testsuite/gcc.target/loongarch/rotrw.c | 17 +++++++++++++++++ + 2 files changed, 27 insertions(+) + create mode 100644 gcc/testsuite/gcc.target/loongarch/rotrw.c + +diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md +index 6cf71d9e4..44e8d336a 100644 +--- a/gcc/config/loongarch/loongarch.md ++++ b/gcc/config/loongarch/loongarch.md +@@ -2893,6 +2893,16 @@ + (set_attr "type" "shift,shift") + (set_attr "mode" "<MODE>")) + ++(define_insn "rotrsi3_extend" ++ (set (match_operand:DI 0 "register_operand" "=r,r") ++ (sign_extend:DI ++ (rotatert:SI (match_operand:SI 1 "register_operand" "r,r") ++ (match_operand:SI 2 "arith_operand" "r,I")))) ++ "TARGET_64BIT" ++ "rotr%i2.w\t%0,%1,%2" ++ (set_attr "type" "shift,shift") ++ (set_attr "mode" "SI")) ++ + ;; The following templates were added to generate "bstrpick.d + alsl.d" + ;; instruction pairs. + ;; It is required that the values of const_immalsl_operand and +diff --git a/gcc/testsuite/gcc.target/loongarch/rotrw.c b/gcc/testsuite/gcc.target/loongarch/rotrw.c +new file mode 100644 +index 000000000..6ed45e8b8 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/rotrw.c +@@ -0,0 +1,17 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2" } */ ++/* { dg-final { scan-assembler "rotr\\.w\t\\\$r4,\\\$r4,\\\$r5" } } */ ++/* { dg-final { scan-assembler "rotri\\.w\t\\\$r4,\\\$r4,5" } } */ ++/* { dg-final { scan-assembler-not "slli\\.w" } } */ ++ ++unsigned ++rotr (unsigned a, unsigned b) ++{ ++ return a >> b | a << 32 - b; ++} ++ ++unsigned ++rotri (unsigned a) ++{ ++ return a >> 5 | a << 27; ++} +-- +2.43.0 +
View file
_service:tar_scm:0085-LoongArch-Fixed-bug-in-bstrins_-mode-_for_ior_mask-t.patch
Added
@@ -0,0 +1,37 @@ +From e56d6d9526e1565fffeb320e15796385eb1732b8 Mon Sep 17 00:00:00 2001 +From: Li Wei <liwei@loongson.cn> +Date: Mon, 25 Dec 2023 11:20:23 +0800 +Subject: PATCH 085/188 LoongArch: Fixed bug in *bstrins_<mode>_for_ior_mask + template. + +We found that using the latest compiled gcc will cause a miscompare error +when running spec2006 400.perlbench test with -flto turned on. After testing, +it was found that only the LoongArch architecture will report errors. +The first error commit was located through the git bisect command as +r14-3773-g5b857e87201335. Through debugging, it was found that the problem +was that the split condition of the *bstrins_<mode>_for_ior_mask template was +empty, which should actually be consistent with the insn condition. + +gcc/ChangeLog: + + * config/loongarch/loongarch.md: Adjust. +--- + gcc/config/loongarch/loongarch.md | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md +index 44e8d336a..3d5b75825 100644 +--- a/gcc/config/loongarch/loongarch.md ++++ b/gcc/config/loongarch/loongarch.md +@@ -1489,7 +1489,7 @@ + "loongarch_pre_reload_split () && \ + loongarch_use_bstrins_for_ior_with_mask (<MODE>mode, operands)" + "#" +- "" ++ "&& true" + (set (match_dup 0) (match_dup 1)) + (set (zero_extract:GPR (match_dup 0) (match_dup 2) (match_dup 4)) + (match_dup 3)) +-- +2.43.0 +
View file
_service:tar_scm:0086-LoongArch-Fix-insn-output-of-vec_concat-templates-fo.patch
Added
@@ -0,0 +1,132 @@ +From b1947829a5949a37db09bc23681e44c8479bd404 Mon Sep 17 00:00:00 2001 +From: Chenghui Pan <panchenghui@loongson.cn> +Date: Fri, 22 Dec 2023 16:22:03 +0800 +Subject: PATCH 086/188 LoongArch: Fix insn output of vec_concat templates + for LASX. + +When investigaing failure of gcc.dg/vect/slp-reduc-sad.c, following +instruction block are being generated by vec_concatv32qi (which is +generated by vec_initv32qiv16qi) at entrance of foo() function: + + vldx $vr3,$r5,$r6 + vld $vr2,$r5,0 + xvpermi.q $xr2,$xr3,0x20 + +causes the reversion of vec_initv32qiv16qi operation's high and +low 128-bit part. + +According to other target's similar impl and LSX impl for following +RTL representation, current definition in lasx.md of "vec_concat<mode>" +are wrong: + + (set (op0) (vec_concat (op1) (op2))) + +For correct behavior, the last argument of xvpermi.q should be 0x02 +instead of 0x20. This patch fixes this issue and cleanup the vec_concat +template impl. + +gcc/ChangeLog: + + * config/loongarch/lasx.md (vec_concatv4di): Delete. + (vec_concatv8si): Delete. + (vec_concatv16hi): Delete. + (vec_concatv32qi): Delete. + (vec_concatv4df): Delete. + (vec_concatv8sf): Delete. + (vec_concat<mode>): New template with insn output fixed. +--- + gcc/config/loongarch/lasx.md | 74 ++++-------------------------------- + 1 file changed, 7 insertions(+), 67 deletions(-) + +diff --git a/gcc/config/loongarch/lasx.md b/gcc/config/loongarch/lasx.md +index 9ca3f9278..46150f2fb 100644 +--- a/gcc/config/loongarch/lasx.md ++++ b/gcc/config/loongarch/lasx.md +@@ -577,77 +577,17 @@ + (set_attr "type" "simd_insert") + (set_attr "mode" "<MODE>")) + +-(define_insn "vec_concatv4di" +- (set (match_operand:V4DI 0 "register_operand" "=f") +- (vec_concat:V4DI +- (match_operand:V2DI 1 "register_operand" "0") +- (match_operand:V2DI 2 "register_operand" "f"))) +- "ISA_HAS_LASX" +-{ +- return "xvpermi.q\t%u0,%u2,0x20"; +-} +- (set_attr "type" "simd_splat") +- (set_attr "mode" "V4DI")) +- +-(define_insn "vec_concatv8si" +- (set (match_operand:V8SI 0 "register_operand" "=f") +- (vec_concat:V8SI +- (match_operand:V4SI 1 "register_operand" "0") +- (match_operand:V4SI 2 "register_operand" "f"))) +- "ISA_HAS_LASX" +-{ +- return "xvpermi.q\t%u0,%u2,0x20"; +-} +- (set_attr "type" "simd_splat") +- (set_attr "mode" "V4DI")) +- +-(define_insn "vec_concatv16hi" +- (set (match_operand:V16HI 0 "register_operand" "=f") +- (vec_concat:V16HI +- (match_operand:V8HI 1 "register_operand" "0") +- (match_operand:V8HI 2 "register_operand" "f"))) +- "ISA_HAS_LASX" +-{ +- return "xvpermi.q\t%u0,%u2,0x20"; +-} +- (set_attr "type" "simd_splat") +- (set_attr "mode" "V4DI")) +- +-(define_insn "vec_concatv32qi" +- (set (match_operand:V32QI 0 "register_operand" "=f") +- (vec_concat:V32QI +- (match_operand:V16QI 1 "register_operand" "0") +- (match_operand:V16QI 2 "register_operand" "f"))) +- "ISA_HAS_LASX" +-{ +- return "xvpermi.q\t%u0,%u2,0x20"; +-} +- (set_attr "type" "simd_splat") +- (set_attr "mode" "V4DI")) +- +-(define_insn "vec_concatv4df" +- (set (match_operand:V4DF 0 "register_operand" "=f") +- (vec_concat:V4DF +- (match_operand:V2DF 1 "register_operand" "0") +- (match_operand:V2DF 2 "register_operand" "f"))) +- "ISA_HAS_LASX" +-{ +- return "xvpermi.q\t%u0,%u2,0x20"; +-} +- (set_attr "type" "simd_splat") +- (set_attr "mode" "V4DF")) +- +-(define_insn "vec_concatv8sf" +- (set (match_operand:V8SF 0 "register_operand" "=f") +- (vec_concat:V8SF +- (match_operand:V4SF 1 "register_operand" "0") +- (match_operand:V4SF 2 "register_operand" "f"))) ++(define_insn "vec_concat<mode>" ++ (set (match_operand:LASX 0 "register_operand" "=f") ++ (vec_concat:LASX ++ (match_operand:<VHMODE256_ALL> 1 "register_operand" "0") ++ (match_operand:<VHMODE256_ALL> 2 "register_operand" "f"))) + "ISA_HAS_LASX" + { +- return "xvpermi.q\t%u0,%u2,0x20"; ++ return "xvpermi.q\t%u0,%u2,0x02"; + } + (set_attr "type" "simd_splat") +- (set_attr "mode" "V4DI")) ++ (set_attr "mode" "<MODE>")) + + ;; xshuf.w + (define_insn "lasx_xvperm_<lasxfmt_f_wd>" +-- +2.43.0 +
View file
_service:tar_scm:0087-LoongArch-Fix-ICE-when-passing-two-same-vector-argum.patch
Added
@@ -0,0 +1,232 @@ +From 1096571509762846e2222f575bc981385b4e9fb7 Mon Sep 17 00:00:00 2001 +From: Chenghui Pan <panchenghui@loongson.cn> +Date: Fri, 22 Dec 2023 16:18:44 +0800 +Subject: PATCH 087/188 LoongArch: Fix ICE when passing two same vector + argument consecutively + +Following code will cause ICE on LoongArch target: + + #include <lsxintrin.h> + + extern void bar (__m128i, __m128i); + + __m128i a; + + void + foo () + { + bar (a, a); + } + +It is caused by missing constraint definition in mov<mode>_lsx. This +patch fixes the template and remove the unnecessary processing from +loongarch_split_move () function. + +This patch also cleanup the redundant definition from +loongarch_split_move () and loongarch_split_move_p (). + +gcc/ChangeLog: + + * config/loongarch/lasx.md: Use loongarch_split_move and + loongarch_split_move_p directly. + * config/loongarch/loongarch-protos.h + (loongarch_split_move): Remove unnecessary argument. + (loongarch_split_move_insn_p): Delete. + (loongarch_split_move_insn): Delete. + * config/loongarch/loongarch.cc + (loongarch_split_move_insn_p): Delete. + (loongarch_load_store_insns): Use loongarch_split_move_p + directly. + (loongarch_split_move): remove the unnecessary processing. + (loongarch_split_move_insn): Delete. + * config/loongarch/lsx.md: Use loongarch_split_move and + loongarch_split_move_p directly. + +gcc/testsuite/ChangeLog: + + * gcc.target/loongarch/vector/lsx/lsx-mov-1.c: New test. +--- + gcc/config/loongarch/lasx.md | 4 +- + gcc/config/loongarch/loongarch-protos.h | 4 +- + gcc/config/loongarch/loongarch.cc | 49 +------------------ + gcc/config/loongarch/lsx.md | 10 ++-- + .../loongarch/vector/lsx/lsx-mov-1.c | 14 ++++++ + 5 files changed, 24 insertions(+), 57 deletions(-) + create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-mov-1.c + +diff --git a/gcc/config/loongarch/lasx.md b/gcc/config/loongarch/lasx.md +index 46150f2fb..dbbf5a136 100644 +--- a/gcc/config/loongarch/lasx.md ++++ b/gcc/config/loongarch/lasx.md +@@ -839,10 +839,10 @@ + (set (match_operand:LASX 0 "nonimmediate_operand") + (match_operand:LASX 1 "move_operand")) + "reload_completed && ISA_HAS_LASX +- && loongarch_split_move_insn_p (operands0, operands1)" ++ && loongarch_split_move_p (operands0, operands1)" + (const_int 0) + { +- loongarch_split_move_insn (operands0, operands1, curr_insn); ++ loongarch_split_move (operands0, operands1); + DONE; + }) + +diff --git a/gcc/config/loongarch/loongarch-protos.h b/gcc/config/loongarch/loongarch-protos.h +index e5fcf3111..2067e50c3 100644 +--- a/gcc/config/loongarch/loongarch-protos.h ++++ b/gcc/config/loongarch/loongarch-protos.h +@@ -82,11 +82,9 @@ extern rtx loongarch_legitimize_call_address (rtx); + + extern rtx loongarch_subword (rtx, bool); + extern bool loongarch_split_move_p (rtx, rtx); +-extern void loongarch_split_move (rtx, rtx, rtx); ++extern void loongarch_split_move (rtx, rtx); + extern bool loongarch_addu16i_imm12_operand_p (HOST_WIDE_INT, machine_mode); + extern void loongarch_split_plus_constant (rtx *, machine_mode); +-extern bool loongarch_split_move_insn_p (rtx, rtx); +-extern void loongarch_split_move_insn (rtx, rtx, rtx); + extern void loongarch_split_128bit_move (rtx, rtx); + extern bool loongarch_split_128bit_move_p (rtx, rtx); + extern void loongarch_split_256bit_move (rtx, rtx); +diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc +index 56f631b1a..5c278386a 100644 +--- a/gcc/config/loongarch/loongarch.cc ++++ b/gcc/config/loongarch/loongarch.cc +@@ -2558,7 +2558,6 @@ loongarch_split_const_insns (rtx x) + return low + high; + } + +-bool loongarch_split_move_insn_p (rtx dest, rtx src); + /* Return one word of 128-bit value OP, taking into account the fixed + endianness of certain registers. BYTE selects from the byte address. */ + +@@ -2598,7 +2597,7 @@ loongarch_load_store_insns (rtx mem, rtx_insn *insn) + { + set = single_set (insn); + if (set +- && !loongarch_split_move_insn_p (SET_DEST (set), SET_SRC (set))) ++ && !loongarch_split_move_p (SET_DEST (set), SET_SRC (set))) + might_split_p = false; + } + +@@ -4216,7 +4215,7 @@ loongarch_split_move_p (rtx dest, rtx src) + SPLIT_TYPE describes the split condition. */ + + void +-loongarch_split_move (rtx dest, rtx src, rtx insn_) ++loongarch_split_move (rtx dest, rtx src) + { + rtx low_dest; + +@@ -4254,33 +4253,6 @@ loongarch_split_move (rtx dest, rtx src, rtx insn_) + loongarch_subword (src, true)); + } + } +- +- /* This is a hack. See if the next insn uses DEST and if so, see if we +- can forward SRC for DEST. This is most useful if the next insn is a +- simple store. */ +- rtx_insn *insn = (rtx_insn *) insn_; +- struct loongarch_address_info addr = {}; +- if (insn) +- { +- rtx_insn *next = next_nonnote_nondebug_insn_bb (insn); +- if (next) +- { +- rtx set = single_set (next); +- if (set && SET_SRC (set) == dest) +- { +- if (MEM_P (src)) +- { +- rtx tmp = XEXP (src, 0); +- loongarch_classify_address (&addr, tmp, GET_MODE (tmp), +- true); +- if (addr.reg && !reg_overlap_mentioned_p (dest, addr.reg)) +- validate_change (next, &SET_SRC (set), src, false); +- } +- else +- validate_change (next, &SET_SRC (set), src, false); +- } +- } +- } + } + + /* Check if adding an integer constant value for a specific mode can be +@@ -4327,23 +4299,6 @@ loongarch_split_plus_constant (rtx *op, machine_mode mode) + op2 = gen_int_mode (v, mode); + } + +-/* Return true if a move from SRC to DEST in INSN should be split. */ +- +-bool +-loongarch_split_move_insn_p (rtx dest, rtx src) +-{ +- return loongarch_split_move_p (dest, src); +-} +- +-/* Split a move from SRC to DEST in INSN, given that +- loongarch_split_move_insn_p holds. */ +- +-void +-loongarch_split_move_insn (rtx dest, rtx src, rtx insn) +-{ +- loongarch_split_move (dest, src, insn); +-} +- + /* Implement TARGET_CONSTANT_ALIGNMENT. */ + + static HOST_WIDE_INT +diff --git a/gcc/config/loongarch/lsx.md b/gcc/config/loongarch/lsx.md +index 7f5fff40a..3e3248ef4 100644 +--- a/gcc/config/loongarch/lsx.md ++++ b/gcc/config/loongarch/lsx.md +@@ -794,21 +794,21 @@ + }) + + (define_insn "mov<mode>_lsx" +- (set (match_operand:LSX 0 "nonimmediate_operand" "=f,f,R,*r,*f") +- (match_operand:LSX 1 "move_operand" "fYGYI,R,f,*f,*r")) ++ (set (match_operand:LSX 0 "nonimmediate_operand" "=f,f,R,*r,*f,*r") ++ (match_operand:LSX 1 "move_operand" "fYGYI,R,f,*f,*r,*r")) + "ISA_HAS_LSX" + { return loongarch_output_move (operands0, operands1); } +- (set_attr "type" "simd_move,simd_load,simd_store,simd_copy,simd_insert") ++ (set_attr "type" "simd_move,simd_load,simd_store,simd_copy,simd_insert,simd_copy") + (set_attr "mode" "<MODE>")) + + (define_split + (set (match_operand:LSX 0 "nonimmediate_operand") + (match_operand:LSX 1 "move_operand")) + "reload_completed && ISA_HAS_LSX +- && loongarch_split_move_insn_p (operands0, operands1)" ++ && loongarch_split_move_p (operands0, operands1)" + (const_int 0) + { +- loongarch_split_move_insn (operands0, operands1, curr_insn); ++ loongarch_split_move (operands0, operands1); + DONE; + }) + +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-mov-1.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-mov-1.c +new file mode 100644 +index 000000000..7f9d792eb +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-mov-1.c +@@ -0,0 +1,14 @@ ++/* { dg-do compile } */ ++/* { dg-options "-mlsx -O2" } */ ++ ++#include <lsxintrin.h> ++ ++extern void bar (__m128i, __m128i); ++ ++__m128i a; ++ ++void ++foo () ++{ ++ bar (a, a); ++} +-- +2.43.0 +
View file
_service:tar_scm:0088-LoongArch-Expand-left-rotate-to-right-rotate-with-ne.patch
Added
@@ -0,0 +1,253 @@ +From a2cc86c9b5e44c3dcdb8c52d6ae5f535442ec1d4 Mon Sep 17 00:00:00 2001 +From: Xi Ruoyao <xry111@xry111.site> +Date: Sun, 17 Dec 2023 05:38:20 +0800 +Subject: PATCH 088/188 LoongArch: Expand left rotate to right rotate with + negated amount + +gcc/ChangeLog: + + * config/loongarch/loongarch.md (rotl<mode>3): + New define_expand. + * config/loongarch/simd.md (vrotl<mode>3): Likewise. + (rotl<mode>3): Likewise. + +gcc/testsuite/ChangeLog: + + * gcc.target/loongarch/rotl-with-rotr.c: New test. + * gcc.target/loongarch/rotl-with-vrotr-b.c: New test. + * gcc.target/loongarch/rotl-with-vrotr-h.c: New test. + * gcc.target/loongarch/rotl-with-vrotr-w.c: New test. + * gcc.target/loongarch/rotl-with-vrotr-d.c: New test. + * gcc.target/loongarch/rotl-with-xvrotr-b.c: New test. + * gcc.target/loongarch/rotl-with-xvrotr-h.c: New test. + * gcc.target/loongarch/rotl-with-xvrotr-w.c: New test. + * gcc.target/loongarch/rotl-with-xvrotr-d.c: New test. +--- + gcc/config/loongarch/loongarch.md | 12 ++++++++ + gcc/config/loongarch/simd.md | 29 +++++++++++++++++++ + .../gcc.target/loongarch/rotl-with-rotr.c | 9 ++++++ + .../gcc.target/loongarch/rotl-with-vrotr-b.c | 7 +++++ + .../gcc.target/loongarch/rotl-with-vrotr-d.c | 7 +++++ + .../gcc.target/loongarch/rotl-with-vrotr-h.c | 7 +++++ + .../gcc.target/loongarch/rotl-with-vrotr-w.c | 28 ++++++++++++++++++ + .../gcc.target/loongarch/rotl-with-xvrotr-b.c | 7 +++++ + .../gcc.target/loongarch/rotl-with-xvrotr-d.c | 7 +++++ + .../gcc.target/loongarch/rotl-with-xvrotr-h.c | 7 +++++ + .../gcc.target/loongarch/rotl-with-xvrotr-w.c | 7 +++++ + 11 files changed, 127 insertions(+) + create mode 100644 gcc/testsuite/gcc.target/loongarch/rotl-with-rotr.c + create mode 100644 gcc/testsuite/gcc.target/loongarch/rotl-with-vrotr-b.c + create mode 100644 gcc/testsuite/gcc.target/loongarch/rotl-with-vrotr-d.c + create mode 100644 gcc/testsuite/gcc.target/loongarch/rotl-with-vrotr-h.c + create mode 100644 gcc/testsuite/gcc.target/loongarch/rotl-with-vrotr-w.c + create mode 100644 gcc/testsuite/gcc.target/loongarch/rotl-with-xvrotr-b.c + create mode 100644 gcc/testsuite/gcc.target/loongarch/rotl-with-xvrotr-d.c + create mode 100644 gcc/testsuite/gcc.target/loongarch/rotl-with-xvrotr-h.c + create mode 100644 gcc/testsuite/gcc.target/loongarch/rotl-with-xvrotr-w.c + +diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md +index 3d5b75825..ed4d4b906 100644 +--- a/gcc/config/loongarch/loongarch.md ++++ b/gcc/config/loongarch/loongarch.md +@@ -2903,6 +2903,18 @@ + (set_attr "type" "shift,shift") + (set_attr "mode" "SI")) + ++;; Expand left rotate to right rotate. ++(define_expand "rotl<mode>3" ++ (set (match_dup 3) ++ (neg:SI (match_operand:SI 2 "register_operand"))) ++ (set (match_operand:GPR 0 "register_operand") ++ (rotatert:GPR (match_operand:GPR 1 "register_operand") ++ (match_dup 3))) ++ "" ++ { ++ operands3 = gen_reg_rtx (SImode); ++ }); ++ + ;; The following templates were added to generate "bstrpick.d + alsl.d" + ;; instruction pairs. + ;; It is required that the values of const_immalsl_operand and +diff --git a/gcc/config/loongarch/simd.md b/gcc/config/loongarch/simd.md +index 13202f79b..93fb39abc 100644 +--- a/gcc/config/loongarch/simd.md ++++ b/gcc/config/loongarch/simd.md +@@ -268,6 +268,35 @@ + (set_attr "type" "simd_int_arith") + (set_attr "mode" "<MODE>")) + ++;; Expand left rotate to right rotate. ++(define_expand "vrotl<mode>3" ++ (set (match_dup 3) ++ (neg:IVEC (match_operand:IVEC 2 "register_operand"))) ++ (set (match_operand:IVEC 0 "register_operand") ++ (rotatert:IVEC (match_operand:IVEC 1 "register_operand") ++ (match_dup 3))) ++ "" ++ { ++ operands3 = gen_reg_rtx (<MODE>mode); ++ }); ++ ++;; Expand left rotate with a scalar amount to right rotate: negate the ++;; scalar before broadcasting it because scalar negation is cheaper than ++;; vector negation. ++(define_expand "rotl<mode>3" ++ (set (match_dup 3) ++ (neg:SI (match_operand:SI 2 "register_operand"))) ++ (set (match_dup 4) ++ (vec_duplicate:IVEC (subreg:<IVEC:UNITMODE> (match_dup 3) 0))) ++ (set (match_operand:IVEC 0 "register_operand") ++ (rotatert:IVEC (match_operand:IVEC 1 "register_operand") ++ (match_dup 4))) ++ "" ++ { ++ operands3 = gen_reg_rtx (SImode); ++ operands4 = gen_reg_rtx (<MODE>mode); ++ }); ++ + ;; <x>vrotri.{b/h/w/d} + + (define_insn "rotr<mode>3" +diff --git a/gcc/testsuite/gcc.target/loongarch/rotl-with-rotr.c b/gcc/testsuite/gcc.target/loongarch/rotl-with-rotr.c +new file mode 100644 +index 000000000..84cc53cec +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/rotl-with-rotr.c +@@ -0,0 +1,9 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2" } */ ++/* { dg-final { scan-assembler "rotr\\.w" } } */ ++ ++unsigned ++t (unsigned a, unsigned b) ++{ ++ return a << b | a >> (32 - b); ++} +diff --git a/gcc/testsuite/gcc.target/loongarch/rotl-with-vrotr-b.c b/gcc/testsuite/gcc.target/loongarch/rotl-with-vrotr-b.c +new file mode 100644 +index 000000000..14298bf9e +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/rotl-with-vrotr-b.c +@@ -0,0 +1,7 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -mlsx -fno-vect-cost-model" } */ ++/* { dg-final { scan-assembler-times "vrotr\\.b" 2 } } */ ++/* { dg-final { scan-assembler-times "vneg\\.b" 1 } } */ ++ ++#define TYPE char ++#include "rotl-with-vrotr-w.c" +diff --git a/gcc/testsuite/gcc.target/loongarch/rotl-with-vrotr-d.c b/gcc/testsuite/gcc.target/loongarch/rotl-with-vrotr-d.c +new file mode 100644 +index 000000000..0e971b323 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/rotl-with-vrotr-d.c +@@ -0,0 +1,7 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -mlsx -fno-vect-cost-model" } */ ++/* { dg-final { scan-assembler-times "vrotr\\.d" 2 } } */ ++/* { dg-final { scan-assembler-times "vneg\\.d" 1 } } */ ++ ++#define TYPE long long ++#include "rotl-with-vrotr-w.c" +diff --git a/gcc/testsuite/gcc.target/loongarch/rotl-with-vrotr-h.c b/gcc/testsuite/gcc.target/loongarch/rotl-with-vrotr-h.c +new file mode 100644 +index 000000000..93216ebc2 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/rotl-with-vrotr-h.c +@@ -0,0 +1,7 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -mlsx -fno-vect-cost-model" } */ ++/* { dg-final { scan-assembler-times "vrotr\\.h" 2 } } */ ++/* { dg-final { scan-assembler-times "vneg\\.h" 1 } } */ ++ ++#define TYPE short ++#include "rotl-with-vrotr-w.c" +diff --git a/gcc/testsuite/gcc.target/loongarch/rotl-with-vrotr-w.c b/gcc/testsuite/gcc.target/loongarch/rotl-with-vrotr-w.c +new file mode 100644 +index 000000000..d05b86f47 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/rotl-with-vrotr-w.c +@@ -0,0 +1,28 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -mlsx -fno-vect-cost-model" } */ ++/* { dg-final { scan-assembler-times "vrotr\\.w" 2 } } */ ++/* { dg-final { scan-assembler-times "vneg\\.w" 1 } } */ ++ ++#ifndef VLEN ++#define VLEN 16 ++#endif ++ ++#ifndef TYPE ++#define TYPE int ++#endif ++ ++typedef unsigned TYPE V __attribute__ ((vector_size (VLEN))); ++V a, b, c; ++ ++void ++test (int x) ++{ ++ b = a << x | a >> ((int)sizeof (TYPE) * __CHAR_BIT__ - x); ++} ++ ++void ++test2 (void) ++{ ++ for (int i = 0; i < VLEN / sizeof (TYPE); i++) ++ ci = ai << bi | ai >> ((int)sizeof (TYPE) * __CHAR_BIT__ - bi); ++} +diff --git a/gcc/testsuite/gcc.target/loongarch/rotl-with-xvrotr-b.c b/gcc/testsuite/gcc.target/loongarch/rotl-with-xvrotr-b.c +new file mode 100644 +index 000000000..2674b1b61 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/rotl-with-xvrotr-b.c +@@ -0,0 +1,7 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -mlasx -fno-vect-cost-model" } */ ++/* { dg-final { scan-assembler-times "xvrotr\\.b" 2 } } */ ++/* { dg-final { scan-assembler-times "xvneg\\.b" 1 } } */ ++ ++#define VLEN 32 ++#include "rotl-with-vrotr-b.c" +diff --git a/gcc/testsuite/gcc.target/loongarch/rotl-with-xvrotr-d.c b/gcc/testsuite/gcc.target/loongarch/rotl-with-xvrotr-d.c +new file mode 100644 +index 000000000..e94403315 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/rotl-with-xvrotr-d.c +@@ -0,0 +1,7 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -mlasx -fno-vect-cost-model" } */ ++/* { dg-final { scan-assembler-times "xvrotr\\.d" 2 } } */ ++/* { dg-final { scan-assembler-times "xvneg\\.d" 1 } } */ ++ ++#define VLEN 32 ++#include "rotl-with-vrotr-d.c" +diff --git a/gcc/testsuite/gcc.target/loongarch/rotl-with-xvrotr-h.c b/gcc/testsuite/gcc.target/loongarch/rotl-with-xvrotr-h.c +new file mode 100644 +index 000000000..3d998941f +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/rotl-with-xvrotr-h.c +@@ -0,0 +1,7 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -mlasx -fno-vect-cost-model" } */ ++/* { dg-final { scan-assembler-times "xvrotr\\.h" 2 } } */ ++/* { dg-final { scan-assembler-times "xvneg\\.h" 1 } } */ ++ ++#define VLEN 32 ++#include "rotl-with-vrotr-h.c" +diff --git a/gcc/testsuite/gcc.target/loongarch/rotl-with-xvrotr-w.c b/gcc/testsuite/gcc.target/loongarch/rotl-with-xvrotr-w.c +new file mode 100644 +index 000000000..ca6aa7bae +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/rotl-with-xvrotr-w.c +@@ -0,0 +1,7 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -mlasx -fno-vect-cost-model" } */ ++/* { dg-final { scan-assembler-times "xvrotr\\.w" 2 } } */ ++/* { dg-final { scan-assembler-times "xvneg\\.w" 1 } } */ ++ ++#define VLEN 32 ++#include "rotl-with-vrotr-w.c" +-- +2.43.0 +
View file
_service:tar_scm:0089-LoongArch-Fix-infinite-secondary-reloading-of-FCCmod.patch
Added
@@ -0,0 +1,104 @@ +From 1e389ec3bad94888fadd153f191fe8862448f258 Mon Sep 17 00:00:00 2001 +From: Xi Ruoyao <xry111@xry111.site> +Date: Wed, 27 Dec 2023 04:28:56 +0800 +Subject: PATCH 089/188 LoongArch: Fix infinite secondary reloading of + FCCmode PR113148 + +The GCC internal doc says: + + X might be a pseudo-register or a 'subreg' of a pseudo-register, + which could either be in a hard register or in memory. Use + 'true_regnum' to find out; it will return -1 if the pseudo is in + memory and the hard register number if it is in a register. + +So "MEM_P (x)" is not enough for checking if we are reloading from/to +the memory. This bug has caused reload pass to stall and finally ICE +complaining with "maximum number of generated reload insns per insn +achieved", since r14-6814. + +Check if "true_regnum (x)" is -1 besides "MEM_P (x)" to fix the issue. + +gcc/ChangeLog: + + PR target/113148 + * config/loongarch/loongarch.cc (loongarch_secondary_reload): + Check if regno == -1 besides MEM_P (x) for reloading FCCmode + from/to FPR to/from memory. + +gcc/testsuite/ChangeLog: + + PR target/113148 + * gcc.target/loongarch/pr113148.c: New test. +--- + gcc/config/loongarch/loongarch.cc | 3 +- + gcc/testsuite/gcc.target/loongarch/pr113148.c | 44 +++++++++++++++++++ + 2 files changed, 46 insertions(+), 1 deletion(-) + create mode 100644 gcc/testsuite/gcc.target/loongarch/pr113148.c + +diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc +index 5c278386a..2e305f940 100644 +--- a/gcc/config/loongarch/loongarch.cc ++++ b/gcc/config/loongarch/loongarch.cc +@@ -6902,7 +6902,8 @@ loongarch_secondary_reload (bool in_p ATTRIBUTE_UNUSED, rtx x, + return NO_REGS; + } + +- if (reg_class_subset_p (rclass, FP_REGS) && MEM_P (x)) ++ if (reg_class_subset_p (rclass, FP_REGS) ++ && (regno == -1 || MEM_P (x))) + return GR_REGS; + + return NO_REGS; +diff --git a/gcc/testsuite/gcc.target/loongarch/pr113148.c b/gcc/testsuite/gcc.target/loongarch/pr113148.c +new file mode 100644 +index 000000000..cf48e5520 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/pr113148.c +@@ -0,0 +1,44 @@ ++/* PR 113148: ICE caused by infinite reloading */ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -march=la464 -mfpu=64 -mabi=lp64d" } */ ++ ++struct bound ++{ ++ double max; ++} drawQuadrant_bound; ++double w4, innerXfromXY_y, computeBound_right_0; ++struct arc_def ++{ ++ double w, h; ++ double a0, a1; ++}; ++static void drawQuadrant (struct arc_def *); ++static void ++computeBound (struct arc_def *def, struct bound *bound) ++{ ++ double ellipsex_1, ellipsex_0; ++ bound->max = def->a1 ?: __builtin_sin (w4) * def->h; ++ if (def->a0 == 5 && def->w == def->h) ++ ; ++ else ++ ellipsex_0 = def->a0 == 0.0 ?: __builtin_cos (w4); ++ if (def->a1 == 5 && def->w == def->h) ++ ellipsex_1 = bound->max; ++ __builtin_sqrt (ellipsex_1 * innerXfromXY_y * innerXfromXY_y * w4); ++ computeBound_right_0 = ellipsex_0; ++} ++void ++drawArc () ++{ ++ struct arc_def foo; ++ for (;;) ++ drawQuadrant (&foo); ++} ++void ++drawQuadrant (struct arc_def *def) ++{ ++ int y, miny; ++ computeBound (def, &drawQuadrant_bound); ++ while (y >= miny) ++ ; ++} +-- +2.43.0 +
View file
_service:tar_scm:0090-LoongArch-Replace-mexplicit-relocs-auto-simple-used-.patch
Added
@@ -0,0 +1,305 @@ +From 294893b352898328d804f2d07981f6bf1e54f8b6 Mon Sep 17 00:00:00 2001 +From: Xi Ruoyao <xry111@xry111.site> +Date: Tue, 12 Dec 2023 04:54:21 +0800 +Subject: PATCH 090/188 LoongArch: Replace -mexplicit-relocs=auto simple-used + address peephole2 with combine + +The problem with peephole2 is it uses a naive sliding-window algorithm +and misses many cases. For example: + + float a10000; + float t() { return a0 + a8000; } + +is compiled to: + + la.local $r13,a + la.local $r12,a+32768 + fld.s $f1,$r13,0 + fld.s $f0,$r12,-768 + fadd.s $f0,$f1,$f0 + +by trunk. But as we've explained in r14-4851, the following would be +better with -mexplicit-relocs=auto: + + pcalau12i $r13,%pc_hi20(a) + pcalau12i $r12,%pc_hi20(a+32000) + fld.s $f1,$r13,%pc_lo12(a) + fld.s $f0,$r12,%pc_lo12(a+32000) + fadd.s $f0,$f1,$f0 + +However the sliding-window algorithm just won't detect the pcalau12i/fld +pair to be optimized. Use a define_insn_and_rewrite in combine pass +will work around the issue. + +gcc/ChangeLog: + + * config/loongarch/predicates.md + (symbolic_pcrel_offset_operand): New define_predicate. + (mem_simple_ldst_operand): Likewise. + * config/loongarch/loongarch-protos.h + (loongarch_rewrite_mem_for_simple_ldst): Declare. + * config/loongarch/loongarch.cc + (loongarch_rewrite_mem_for_simple_ldst): Implement. + * config/loongarch/loongarch.md (simple_load<mode>): New + define_insn_and_rewrite. + (simple_load_<su>ext<SUBDI:mode><GPR:mode>): Likewise. + (simple_store<mode>): Likewise. + (define_peephole2): Remove la.local/fld peepholes. + +gcc/testsuite/ChangeLog: + + * gcc.target/loongarch/explicit-relocs-auto-single-load-store-2.c: + New test. + * gcc.target/loongarch/explicit-relocs-auto-single-load-store-3.c: + New test. +--- + gcc/config/loongarch/loongarch-protos.h | 1 + + gcc/config/loongarch/loongarch.cc | 16 +++ + gcc/config/loongarch/loongarch.md | 114 +++++------------- + gcc/config/loongarch/predicates.md | 13 ++ + ...explicit-relocs-auto-single-load-store-2.c | 11 ++ + ...explicit-relocs-auto-single-load-store-3.c | 18 +++ + 6 files changed, 86 insertions(+), 87 deletions(-) + create mode 100644 gcc/testsuite/gcc.target/loongarch/explicit-relocs-auto-single-load-store-2.c + create mode 100644 gcc/testsuite/gcc.target/loongarch/explicit-relocs-auto-single-load-store-3.c + +diff --git a/gcc/config/loongarch/loongarch-protos.h b/gcc/config/loongarch/loongarch-protos.h +index 2067e50c3..5060efbb6 100644 +--- a/gcc/config/loongarch/loongarch-protos.h ++++ b/gcc/config/loongarch/loongarch-protos.h +@@ -163,6 +163,7 @@ extern bool loongarch_use_ins_ext_p (rtx, HOST_WIDE_INT, HOST_WIDE_INT); + extern bool loongarch_check_zero_div_p (void); + extern bool loongarch_pre_reload_split (void); + extern int loongarch_use_bstrins_for_ior_with_mask (machine_mode, rtx *); ++extern rtx loongarch_rewrite_mem_for_simple_ldst (rtx); + + union loongarch_gen_fn_ptrs + { +diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc +index 2e305f940..c6318bee9 100644 +--- a/gcc/config/loongarch/loongarch.cc ++++ b/gcc/config/loongarch/loongarch.cc +@@ -5713,6 +5713,22 @@ loongarch_use_bstrins_for_ior_with_mask (machine_mode mode, rtx *op) + return 0; + } + ++/* Rewrite a MEM for simple load/store under -mexplicit-relocs=auto ++ -mcmodel={normal/medium}. */ ++rtx ++loongarch_rewrite_mem_for_simple_ldst (rtx mem) ++{ ++ rtx addr = XEXP (mem, 0); ++ rtx hi = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), ++ UNSPEC_PCALAU12I_GR); ++ rtx new_mem; ++ ++ addr = gen_rtx_LO_SUM (Pmode, force_reg (Pmode, hi), addr); ++ new_mem = gen_rtx_MEM (GET_MODE (mem), addr); ++ MEM_COPY_ATTRIBUTES (new_mem, mem); ++ return new_mem; ++} ++ + /* Print the text for PRINT_OPERAND punctation character CH to FILE. + The punctuation characters are: + +diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md +index ed4d4b906..3c61a0cf4 100644 +--- a/gcc/config/loongarch/loongarch.md ++++ b/gcc/config/loongarch/loongarch.md +@@ -4135,101 +4135,41 @@ + ;; + ;; And if the pseudo op cannot be relaxed, we'll get a worse result (with + ;; 3 instructions). +-(define_peephole2 +- (set (match_operand:P 0 "register_operand") +- (match_operand:P 1 "symbolic_pcrel_operand")) +- (set (match_operand:LD_AT_LEAST_32_BIT 2 "register_operand") +- (mem:LD_AT_LEAST_32_BIT (match_dup 0))) +- "la_opt_explicit_relocs == EXPLICIT_RELOCS_AUTO \ +- && (TARGET_CMODEL_NORMAL || TARGET_CMODEL_MEDIUM) \ +- && (peep2_reg_dead_p (2, operands0) \ +- || REGNO (operands0) == REGNO (operands2))" +- (set (match_dup 2) +- (mem:LD_AT_LEAST_32_BIT (lo_sum:P (match_dup 0) (match_dup 1)))) +- { +- emit_insn (gen_pcalau12i_gr<P:mode> (operands0, operands1)); +- }) +- +-(define_peephole2 +- (set (match_operand:P 0 "register_operand") +- (match_operand:P 1 "symbolic_pcrel_operand")) +- (set (match_operand:LD_AT_LEAST_32_BIT 2 "register_operand") +- (mem:LD_AT_LEAST_32_BIT (plus (match_dup 0) +- (match_operand 3 "const_int_operand")))) +- "la_opt_explicit_relocs == EXPLICIT_RELOCS_AUTO \ +- && (TARGET_CMODEL_NORMAL || TARGET_CMODEL_MEDIUM) \ +- && (peep2_reg_dead_p (2, operands0) \ +- || REGNO (operands0) == REGNO (operands2))" +- (set (match_dup 2) +- (mem:LD_AT_LEAST_32_BIT (lo_sum:P (match_dup 0) (match_dup 1)))) +- { +- operands1 = plus_constant (Pmode, operands1, INTVAL (operands3)); +- emit_insn (gen_pcalau12i_gr<P:mode> (operands0, operands1)); +- }) +- +-(define_peephole2 +- (set (match_operand:P 0 "register_operand") +- (match_operand:P 1 "symbolic_pcrel_operand")) +- (set (match_operand:GPR 2 "register_operand") +- (any_extend:GPR (mem:SUBDI (match_dup 0)))) +- "la_opt_explicit_relocs == EXPLICIT_RELOCS_AUTO \ +- && (TARGET_CMODEL_NORMAL || TARGET_CMODEL_MEDIUM) \ +- && (peep2_reg_dead_p (2, operands0) \ +- || REGNO (operands0) == REGNO (operands2))" +- (set (match_dup 2) +- (any_extend:GPR (mem:SUBDI (lo_sum:P (match_dup 0) +- (match_dup 1))))) ++(define_insn_and_rewrite "simple_load<mode>" ++ (set (match_operand:LD_AT_LEAST_32_BIT 0 "register_operand" "=r,f") ++ (match_operand:LD_AT_LEAST_32_BIT 1 "mem_simple_ldst_operand" "")) ++ "loongarch_pre_reload_split () ++ && la_opt_explicit_relocs == EXPLICIT_RELOCS_AUTO ++ && (TARGET_CMODEL_NORMAL || TARGET_CMODEL_MEDIUM)" ++ "#" ++ "&& true" + { +- emit_insn (gen_pcalau12i_gr<P:mode> (operands0, operands1)); ++ operands1 = loongarch_rewrite_mem_for_simple_ldst (operands1); + }) + +-(define_peephole2 +- (set (match_operand:P 0 "register_operand") +- (match_operand:P 1 "symbolic_pcrel_operand")) +- (set (match_operand:GPR 2 "register_operand") ++(define_insn_and_rewrite "simple_load_<su>ext<SUBDI:mode><GPR:mode>" ++ (set (match_operand:GPR 0 "register_operand" "=r") + (any_extend:GPR +- (mem:SUBDI (plus (match_dup 0) +- (match_operand 3 "const_int_operand"))))) +- "la_opt_explicit_relocs == EXPLICIT_RELOCS_AUTO \ +- && (TARGET_CMODEL_NORMAL || TARGET_CMODEL_MEDIUM) \ +- && (peep2_reg_dead_p (2, operands0) \ +- || REGNO (operands0) == REGNO (operands2))" +- (set (match_dup 2) +- (any_extend:GPR (mem:SUBDI (lo_sum:P (match_dup 0) +- (match_dup 1))))) +- { +- operands1 = plus_constant (Pmode, operands1, INTVAL (operands3)); +- emit_insn (gen_pcalau12i_gr<P:mode> (operands0, operands1)); +- }) +- +-(define_peephole2 +- (set (match_operand:P 0 "register_operand") +- (match_operand:P 1 "symbolic_pcrel_operand")) +- (set (mem:ST_ANY (match_dup 0)) +- (match_operand:ST_ANY 2 "register_operand")) +- "la_opt_explicit_relocs == EXPLICIT_RELOCS_AUTO \ +- && (TARGET_CMODEL_NORMAL || TARGET_CMODEL_MEDIUM) \ +- && (peep2_reg_dead_p (2, operands0)) \ +- && REGNO (operands0) != REGNO (operands2)" +- (set (mem:ST_ANY (lo_sum:P (match_dup 0) (match_dup 1))) (match_dup 2)) ++ (match_operand:SUBDI 1 "mem_simple_ldst_operand" ""))) ++ "loongarch_pre_reload_split () ++ && la_opt_explicit_relocs == EXPLICIT_RELOCS_AUTO ++ && (TARGET_CMODEL_NORMAL || TARGET_CMODEL_MEDIUM)" ++ "#" ++ "&& true" + { +- emit_insn (gen_pcalau12i_gr<P:mode> (operands0, operands1)); ++ operands1 = loongarch_rewrite_mem_for_simple_ldst (operands1); + }) + +-(define_peephole2 +- (set (match_operand:P 0 "register_operand") +- (match_operand:P 1 "symbolic_pcrel_operand")) +- (set (mem:ST_ANY (plus (match_dup 0) +- (match_operand 3 "const_int_operand"))) +- (match_operand:ST_ANY 2 "register_operand")) +- "la_opt_explicit_relocs == EXPLICIT_RELOCS_AUTO \ +- && (TARGET_CMODEL_NORMAL || TARGET_CMODEL_MEDIUM) \ +- && (peep2_reg_dead_p (2, operands0)) \ +- && REGNO (operands0) != REGNO (operands2)" +- (set (mem:ST_ANY (lo_sum:P (match_dup 0) (match_dup 1))) (match_dup 2)) ++(define_insn_and_rewrite "simple_store<mode>" ++ (set (match_operand:ST_ANY 0 "mem_simple_ldst_operand" "") ++ (match_operand:ST_ANY 1 "reg_or_0_operand" "r,f")) ++ "loongarch_pre_reload_split () ++ && la_opt_explicit_relocs == EXPLICIT_RELOCS_AUTO ++ && (TARGET_CMODEL_NORMAL || TARGET_CMODEL_MEDIUM)" ++ "#" ++ "&& true" + { +- operands1 = plus_constant (Pmode, operands1, INTVAL (operands3)); +- emit_insn (gen_pcalau12i_gr<P:mode> (operands0, operands1)); ++ operands0 = loongarch_rewrite_mem_for_simple_ldst (operands0); + }) + + ;; Synchronization instructions. +diff --git a/gcc/config/loongarch/predicates.md b/gcc/config/loongarch/predicates.md +index 58f9a7826..3698b9103 100644 +--- a/gcc/config/loongarch/predicates.md ++++ b/gcc/config/loongarch/predicates.md +@@ -579,6 +579,19 @@ + return loongarch_symbolic_constant_p (op, &type) && type == SYMBOL_PCREL; + }) + ++(define_predicate "symbolic_pcrel_offset_operand" ++ (and (match_code "plus") ++ (match_operand 0 "symbolic_pcrel_operand") ++ (match_operand 1 "const_int_operand"))) ++ ++(define_predicate "mem_simple_ldst_operand" ++ (match_code "mem") ++{ ++ op = XEXP (op, 0); ++ return (symbolic_pcrel_operand (op, Pmode) ++ || symbolic_pcrel_offset_operand (op, Pmode)); ++}) ++ + (define_predicate "equality_operator" + (match_code "eq,ne")) + +diff --git a/gcc/testsuite/gcc.target/loongarch/explicit-relocs-auto-single-load-store-2.c b/gcc/testsuite/gcc.target/loongarch/explicit-relocs-auto-single-load-store-2.c +new file mode 100644 +index 000000000..42cb966d1 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/explicit-relocs-auto-single-load-store-2.c +@@ -0,0 +1,11 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -march=loongarch64 -mabi=lp64d -mexplicit-relocs=auto" } */ ++ ++float a8001; ++float ++t (void) ++{ ++ return a0 + a8000; ++} ++ ++/* { dg-final { scan-assembler-not "la.local" } } */ +diff --git a/gcc/testsuite/gcc.target/loongarch/explicit-relocs-auto-single-load-store-3.c b/gcc/testsuite/gcc.target/loongarch/explicit-relocs-auto-single-load-store-3.c +new file mode 100644 +index 000000000..32aa5383d +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/explicit-relocs-auto-single-load-store-3.c +@@ -0,0 +1,18 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -mexplicit-relocs=auto -fdump-rtl-final" } */ ++/* { dg-final { scan-rtl-dump-times "mem/v/c" 2 "final" } } */ ++/* { dg-final { scan-assembler-not "la\\.local" } } */ ++ ++volatile unsigned long counter; ++ ++unsigned long ++read (void) ++{ ++ return counter; ++} ++ ++void ++clear (void) ++{ ++ counter = 0; ++} +-- +2.43.0 +
View file
_service:tar_scm:0091-LoongArch-Fix-the-format-of-bstrins_-mode-_for_ior_m.patch
Added
@@ -0,0 +1,33 @@ +From 4d569c5fde85ca426eecf57119048ec25f048758 Mon Sep 17 00:00:00 2001 +From: Xi Ruoyao <xry111@xry111.site> +Date: Fri, 29 Dec 2023 20:04:34 +0800 +Subject: PATCH 091/188 LoongArch: Fix the format of + bstrins_<mode>_for_ior_mask condition (NFC) + +gcc/ChangeLog: + + * config/loongarch/loongarch.md (bstrins_<mode>_for_ior_mask): + For the condition, remove unneeded trailing "\" and move "&&" to + follow GNU coding style. NFC. +--- + gcc/config/loongarch/loongarch.md | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md +index 3c61a0cf4..996df66e8 100644 +--- a/gcc/config/loongarch/loongarch.md ++++ b/gcc/config/loongarch/loongarch.md +@@ -1486,8 +1486,8 @@ + (match_operand:GPR 2 "const_int_operand")) + (and:GPR (match_operand:GPR 3 "register_operand") + (match_operand:GPR 4 "const_int_operand")))) +- "loongarch_pre_reload_split () && \ +- loongarch_use_bstrins_for_ior_with_mask (<MODE>mode, operands)" ++ "loongarch_pre_reload_split () ++ && loongarch_use_bstrins_for_ior_with_mask (<MODE>mode, operands)" + "#" + "&& true" + (set (match_dup 0) (match_dup 1)) +-- +2.43.0 +
View file
_service:tar_scm:0092-LoongArch-Added-TLS-Le-Relax-support.patch
Added
@@ -0,0 +1,280 @@ +From 58d41ffad306a359ecd2902ec19d582506f14b10 Mon Sep 17 00:00:00 2001 +From: Lulu Cheng <chenglulu@loongson.cn> +Date: Tue, 12 Dec 2023 16:32:31 +0800 +Subject: PATCH 092/188 LoongArch: Added TLS Le Relax support. + +Check whether the assembler supports tls le relax. If it supports it, the assembly +instruction sequence of tls le relax will be generated by default. + +The original way to obtain the tls le symbol address: + lu12i.w $rd, %le_hi20(sym) + ori $rd, $rd, %le_lo12(sym) + add.{w/d} $rd, $rd, $tp + +If the assembler supports tls le relax, the following sequence is generated: + + lu12i.w $rd, %le_hi20_r(sym) + add.{w/d} $rd,$rd,$tp,%le_add_r(sym) + addi.{w/d} $rd,$rd,%le_lo12_r(sym) + +gcc/ChangeLog: + + * config.in: Regenerate. + * config/loongarch/loongarch-opts.h (HAVE_AS_TLS_LE_RELAXATION): Define. + * config/loongarch/loongarch.cc (loongarch_legitimize_tls_address): + Added TLS Le Relax support. + (loongarch_print_operand_reloc): Add the output string of TLS Le Relax. + * config/loongarch/loongarch.md (@add_tls_le_relax<mode>): New template. + * configure: Regenerate. + * configure.ac: Check if binutils supports TLS le relax. + +gcc/testsuite/ChangeLog: + + * lib/target-supports.exp: Add a function to check whether binutil supports + TLS Le Relax. + * gcc.target/loongarch/tls-le-relax.c: New test. +--- + gcc/config.in | 6 +++ + gcc/config/loongarch/loongarch-opts.h | 4 ++ + gcc/config/loongarch/loongarch.cc | 46 +++++++++++++++++-- + gcc/config/loongarch/loongarch.md | 12 +++++ + gcc/configure | 31 +++++++++++++ + gcc/configure.ac | 5 ++ + .../gcc.target/loongarch/tls-le-relax.c | 12 +++++ + gcc/testsuite/lib/target-supports.exp | 12 +++++ + 8 files changed, 125 insertions(+), 3 deletions(-) + create mode 100644 gcc/testsuite/gcc.target/loongarch/tls-le-relax.c + +diff --git a/gcc/config.in b/gcc/config.in +index 033cfb98b..7220b2b2b 100644 +--- a/gcc/config.in ++++ b/gcc/config.in +@@ -771,6 +771,12 @@ + #endif + + ++/* Define if your assembler supports tls le relocation. */ ++#ifndef USED_FOR_TARGET ++#undef HAVE_AS_TLS_LE_RELAXATION ++#endif ++ ++ + /* Define if your assembler supports vl/vst/vlm/vstm with an optional + alignment hint argument. */ + #ifndef USED_FOR_TARGET +diff --git a/gcc/config/loongarch/loongarch-opts.h b/gcc/config/loongarch/loongarch-opts.h +index 639ed50bd..8491bee0d 100644 +--- a/gcc/config/loongarch/loongarch-opts.h ++++ b/gcc/config/loongarch/loongarch-opts.h +@@ -114,4 +114,8 @@ struct loongarch_flags { + #define HAVE_AS_TLS 0 + #endif + ++#ifndef HAVE_AS_TLS_LE_RELAXATION ++#define HAVE_AS_TLS_LE_RELAXATION 0 ++#endif ++ + #endif /* LOONGARCH_OPTS_H */ +diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc +index c6318bee9..d1b1950dc 100644 +--- a/gcc/config/loongarch/loongarch.cc ++++ b/gcc/config/loongarch/loongarch.cc +@@ -2993,7 +2993,29 @@ loongarch_legitimize_tls_address (rtx loc) + + case TLS_MODEL_LOCAL_EXEC: + { +- /* la.tls.le; tp-relative add. */ ++ /* la.tls.le; tp-relative add. ++ ++ normal: ++ lu12i.w $rd, %le_hi20(sym) ++ ori $rd, $rd, %le_lo12(sym) ++ add.{w/d} $rd, $rd, $tp ++ (st.{w/d}/ld.{w/d} $rs, $rd, 0) ++ ++ tls le relax: ++ lu12i.w $rd, %le_hi20_r(sym) ++ add.{w/d} $rd,$rd,$tp ++ addi.{w/d} $rd,$rd,%le_lo12_r(sym) ++ (st.{w/d}/ld.{w/d} $rs, $rd, 0) ++ ++ extreme (When the code model is set to extreme, the TLS le Relax ++ instruction sequence is not generated): ++ lu12i.w $rd, %le_hi20(sym) ++ ori $rd, $rd, %le_lo12(sym) ++ lu32i.d $rd, %le64_lo20(sym) ++ lu52i.d $rd, $rd, %le64_hi12(sym) ++ add.d $rd, $rd, $tp ++ (st.{w/d}/ld.{w/d} $rs, $rd, 0) */ ++ + tp = gen_rtx_REG (Pmode, THREAD_POINTER_REGNUM); + tmp1 = gen_reg_rtx (Pmode); + dest = gen_reg_rtx (Pmode); +@@ -3004,7 +3026,20 @@ loongarch_legitimize_tls_address (rtx loc) + tmp3 = gen_reg_rtx (Pmode); + rtx high = gen_rtx_HIGH (Pmode, copy_rtx (tmp2)); + high = loongarch_force_temporary (tmp3, high); +- emit_insn (gen_ori_l_lo12 (Pmode, tmp1, high, tmp2)); ++ ++ /* The assembler does not implement tls le relax support when the ++ code model is extreme, so when the code model is extreme, the ++ old symbol address acquisition method is still used. */ ++ if (HAVE_AS_TLS_LE_RELAXATION && !TARGET_CMODEL_EXTREME) ++ { ++ emit_insn (gen_add_tls_le_relax (Pmode, dest, high, ++ tp, loc)); ++ loongarch_emit_move (dest, ++ gen_rtx_LO_SUM (Pmode, dest, tmp2)); ++ return dest; ++ } ++ else ++ emit_insn (gen_ori_l_lo12 (Pmode, tmp1, high, tmp2)); + + if (TARGET_CMODEL_EXTREME) + { +@@ -5936,7 +5971,12 @@ loongarch_print_operand_reloc (FILE *file, rtx op, bool hi64_part, + gcc_unreachable (); + } + else +- reloc = hi_reloc ? "%le_hi20" : "%le_lo12"; ++ { ++ if (HAVE_AS_TLS_LE_RELAXATION && !TARGET_CMODEL_EXTREME) ++ reloc = hi_reloc ? "%le_hi20_r" : "%le_lo12_r"; ++ else ++ reloc = hi_reloc ? "%le_hi20" : "%le_lo12"; ++ } + break; + + case SYMBOL_TLSGD: +diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md +index 996df66e8..02c537d4c 100644 +--- a/gcc/config/loongarch/loongarch.md ++++ b/gcc/config/loongarch/loongarch.md +@@ -73,6 +73,7 @@ + UNSPEC_LOAD_FROM_GOT + UNSPEC_PCALAU12I + UNSPEC_PCALAU12I_GR ++ UNSPEC_ADD_TLS_LE_RELAX + UNSPEC_ORI_L_LO12 + UNSPEC_LUI_L_HI20 + UNSPEC_LUI_H_LO20 +@@ -2503,6 +2504,17 @@ + "pcalau12i\t%0,%%pc_hi20(%1)" + (set_attr "type" "move")) + ++(define_insn "@add_tls_le_relax<mode>" ++ (set (match_operand:P 0 "register_operand" "=r") ++ (unspec:P (match_operand:P 1 "register_operand" "r") ++ (match_operand:P 2 "register_operand" "r") ++ (match_operand:P 3 "symbolic_operand") ++ UNSPEC_ADD_TLS_LE_RELAX)) ++ "HAVE_AS_TLS_LE_RELAXATION" ++ "add.<d>\t%0,%1,%2,%%le_add_r(%3)" ++ (set_attr "type" "move") ++) ++ + (define_insn "@ori_l_lo12<mode>" + (set (match_operand:P 0 "register_operand" "=r") + (unspec:P (match_operand:P 1 "register_operand" "r") +diff --git a/gcc/configure b/gcc/configure +index 5842e7a18..eecfe60d6 100755 +--- a/gcc/configure ++++ b/gcc/configure +@@ -28968,6 +28968,37 @@ if test $gcc_cv_as_loongarch_cond_branch_relax = yes; then + + $as_echo "#define HAVE_AS_COND_BRANCH_RELAXATION 1" >>confdefs.h + ++fi ++ ++ { $as_echo "$as_me:${as_lineno-$LINENO}: checking assembler for tls le relaxation support" >&5 ++$as_echo_n "checking assembler for tls le relaxation support... " >&6; } ++if ${gcc_cv_as_loongarch_tls_le_relaxation_support+:} false; then : ++ $as_echo_n "(cached) " >&6 ++else ++ gcc_cv_as_loongarch_tls_le_relaxation_support=no ++ if test x$gcc_cv_as != x; then ++ $as_echo 'lu12i.w $t0,%le_hi20_r(a)' > conftest.s ++ if { ac_try='$gcc_cv_as $gcc_cv_as_flags -o conftest.o conftest.s >&5' ++ { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5 ++ (eval $ac_try) 2>&5 ++ ac_status=$? ++ $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 ++ test $ac_status = 0; }; } ++ then ++ gcc_cv_as_loongarch_tls_le_relaxation_support=yes ++ else ++ echo "configure: failed program was" >&5 ++ cat conftest.s >&5 ++ fi ++ rm -f conftest.o conftest.s ++ fi ++fi ++{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $gcc_cv_as_loongarch_tls_le_relaxation_support" >&5 ++$as_echo "$gcc_cv_as_loongarch_tls_le_relaxation_support" >&6; } ++if test $gcc_cv_as_loongarch_tls_le_relaxation_support = yes; then ++ ++$as_echo "#define HAVE_AS_TLS_LE_RELAXATION 1" >>confdefs.h ++ + fi + + ;; +diff --git a/gcc/configure.ac b/gcc/configure.ac +index 9c3fd3ad6..d1032440d 100644 +--- a/gcc/configure.ac ++++ b/gcc/configure.ac +@@ -5357,6 +5357,11 @@ x: + beq $a0,$a1,a,, + AC_DEFINE(HAVE_AS_COND_BRANCH_RELAXATION, 1, + Define if your assembler supports conditional branch relaxation.)) ++ gcc_GAS_CHECK_FEATURE(tls le relaxation support, ++ gcc_cv_as_loongarch_tls_le_relaxation_support,, ++ lu12i.w $t0,%le_hi20_r(a),, ++ AC_DEFINE(HAVE_AS_TLS_LE_RELAXATION, 1, ++ Define if your assembler supports tls le relocation.)) + ;; + s390*-*-*) + gcc_GAS_CHECK_FEATURE(.gnu_attribute support, +diff --git a/gcc/testsuite/gcc.target/loongarch/tls-le-relax.c b/gcc/testsuite/gcc.target/loongarch/tls-le-relax.c +new file mode 100644 +index 000000000..a9a404fc7 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/tls-le-relax.c +@@ -0,0 +1,12 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -mcmodel=normal -mexplicit-relocs" } */ ++/* { dg-final { scan-assembler "%le_add_r" { target tls_le_relax } } } */ ++ ++__attribute__ ((tls_model ("local-exec"))) __thread int a; ++ ++void ++test (void) ++{ ++ a = 10; ++} ++ +diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp +index b8bff1a31..20fbd43ee 100644 +--- a/gcc/testsuite/lib/target-supports.exp ++++ b/gcc/testsuite/lib/target-supports.exp +@@ -10582,6 +10582,18 @@ proc check_effective_target_loongarch_call36_support { } { + } "" + } + ++# Returns 1 if binutils supports TLS le Relax, 0 otherwise. ++proc check_effective_target_tls_le_relax { } { ++ if check_effective_target_tls_native { ++ return check_no_compiler_messages loongarch_tls_le_relax object { ++ /* Assembly code */ ++ lu12i.w $r12, %le_hi20_r(a) ++ } ++ } ++ ++ return 0; ++} ++ + # Return 1 if the target does *not* require strict alignment. + + proc check_effective_target_non_strict_align {} { +-- +2.43.0 +
View file
_service:tar_scm:0093-LoongArch-Provide-fmin-fmax-RTL-pattern-for-vectors.patch
Added
@@ -0,0 +1,112 @@ +From 97081ba053424e35b1869a00d6ac0e84362d09ea Mon Sep 17 00:00:00 2001 +From: Xi Ruoyao <xry111@xry111.site> +Date: Sat, 30 Dec 2023 21:40:11 +0800 +Subject: PATCH 093/188 LoongArch: Provide fmin/fmax RTL pattern for vectors + +We already had smin/smax RTL pattern using vfmin/vfmax instructions. +But for smin/smax, it's unspecified what will happen if either operand +contains any NaN operands. So we would not vectorize the loop with +-fno-finite-math-only (the default for all optimization levels expect +-Ofast). + +But, LoongArch vfmin/vfmax instruction is IEEE-754-2008 conformant so we +can also use them and vectorize the loop. + +gcc/ChangeLog: + + * config/loongarch/simd.md (fmax<mode>3): New define_insn. + (fmin<mode>3): Likewise. + (reduc_fmax_scal_<mode>3): New define_expand. + (reduc_fmin_scal_<mode>3): Likewise. + +gcc/testsuite/ChangeLog: + + * gcc.target/loongarch/vfmax-vfmin.c: New test. +--- + gcc/config/loongarch/simd.md | 31 +++++++++++++++++++ + .../gcc.target/loongarch/vfmax-vfmin.c | 31 +++++++++++++++++++ + 2 files changed, 62 insertions(+) + create mode 100644 gcc/testsuite/gcc.target/loongarch/vfmax-vfmin.c + +diff --git a/gcc/config/loongarch/simd.md b/gcc/config/loongarch/simd.md +index 93fb39abc..8ac1d75a8 100644 +--- a/gcc/config/loongarch/simd.md ++++ b/gcc/config/loongarch/simd.md +@@ -426,6 +426,37 @@ + (set_attr "type" "simd_fcmp") + (set_attr "mode" "<MODE>")) + ++; xvf{min/max} instructions are IEEE-754-2008 conforming, use them for ++; the corresponding IEEE-754-2008 operations. We must use UNSPEC instead ++; of smin/smax though, see PR105414 and PR107013. ++ ++(define_int_iterator UNSPEC_FMAXMIN UNSPEC_FMAX UNSPEC_FMIN) ++(define_int_attr fmaxmin (UNSPEC_FMAX "fmax") (UNSPEC_FMIN "fmin")) ++ ++(define_insn "<fmaxmin><mode>3" ++ (set (match_operand:FVEC 0 "register_operand" "=f") ++ (unspec:FVEC (match_operand:FVEC 1 "register_operand" "f") ++ (match_operand:FVEC 2 "register_operand" "f") ++ UNSPEC_FMAXMIN)) ++ "" ++ "<x>v<fmaxmin>.<simdfmt>\t%<wu>0,%<wu>1,%<wu>2" ++ (set_attr "type" "simd_fminmax") ++ (set_attr "mode" "<MODE>")) ++ ++;; ... and also reduc operations. ++(define_expand "reduc_<fmaxmin>_scal_<mode>" ++ (match_operand:<UNITMODE> 0 "register_operand") ++ (match_operand:FVEC 1 "register_operand") ++ (const_int UNSPEC_FMAXMIN) ++ "" ++{ ++ rtx tmp = gen_reg_rtx (<MODE>mode); ++ loongarch_expand_vector_reduc (gen_<fmaxmin><mode>3, tmp, operands1); ++ emit_insn (gen_vec_extract<mode><unitmode> (operands0, tmp, ++ const0_rtx)); ++ DONE; ++}) ++ + ; The LoongArch SX Instructions. + (include "lsx.md") + +diff --git a/gcc/testsuite/gcc.target/loongarch/vfmax-vfmin.c b/gcc/testsuite/gcc.target/loongarch/vfmax-vfmin.c +new file mode 100644 +index 000000000..811fee361 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/vfmax-vfmin.c +@@ -0,0 +1,31 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -mtune=la464 -mlasx" } */ ++/* { dg-final { scan-assembler "\tvfmin\\.d" } } */ ++/* { dg-final { scan-assembler "\tvfmax\\.d" } } */ ++/* { dg-final { scan-assembler "\txvfmin\\.d" } } */ ++/* { dg-final { scan-assembler "\txvfmax\\.d" } } */ ++/* { dg-final { scan-assembler "\tvfmin\\.s" } } */ ++/* { dg-final { scan-assembler "\tvfmax\\.s" } } */ ++/* { dg-final { scan-assembler "\txvfmin\\.s" } } */ ++/* { dg-final { scan-assembler "\txvfmax\\.s" } } */ ++ ++#define T(OP) __typeof__ (__builtin_##OP (0, 0)) ++ ++#define TEST(OP, LEN) \ ++void \ ++test_##OP##LEN (T (OP) *restrict dest, \ ++ const T (OP) *restrict src1, \ ++ const T (OP) *restrict src2) \ ++{ \ ++ for (int i = 0; i < LEN / sizeof (T(OP)); i++) \ ++ desti = __builtin_##OP (src1i, src2i); \ ++} ++ ++TEST(fmin, 16) ++TEST(fmax, 16) ++TEST(fmin, 32) ++TEST(fmax, 32) ++TEST(fminf, 16) ++TEST(fmaxf, 16) ++TEST(fminf, 32) ++TEST(fmaxf, 32) +-- +2.43.0 +
View file
_service:tar_scm:0094-LoongArch-Merge-constant-vector-permuatation-impleme.patch
Added
@@ -0,0 +1,1484 @@ +From 06a6a571fd557b53f805d990dd1a40a2ab7c1e5c Mon Sep 17 00:00:00 2001 +From: Li Wei <liwei@loongson.cn> +Date: Thu, 28 Dec 2023 20:26:46 +0800 +Subject: PATCH 094/188 LoongArch: Merge constant vector permuatation + implementations. + +There are currently two versions of the implementations of constant +vector permutation: loongarch_expand_vec_perm_const_1 and +loongarch_expand_vec_perm_const_2. The implementations of the two +versions are different. Currently, only the implementation of +loongarch_expand_vec_perm_const_1 is used for 256-bit vectors. We +hope to streamline the code as much as possible while retaining the +better-performing implementation of the two. By repeatedly testing +spec2006 and spec2017, we got the following Merged version. +Compared with the pre-merger version, the number of lines of code +in loongarch.cc has been reduced by 888 lines. At the same time, +the performance of SPECint2006 under Ofast has been improved by 0.97%, +and the performance of SPEC2017 fprate has been improved by 0.27%. + +gcc/ChangeLog: + + * config/loongarch/loongarch.cc (loongarch_is_odd_extraction): + Remove useless forward declaration. + (loongarch_is_even_extraction): Remove useless forward declaration. + (loongarch_try_expand_lsx_vshuf_const): Removed. + (loongarch_expand_vec_perm_const_1): Merged. + (loongarch_is_double_duplicate): Removed. + (loongarch_is_center_extraction): Ditto. + (loongarch_is_reversing_permutation): Ditto. + (loongarch_is_di_misalign_extract): Ditto. + (loongarch_is_si_misalign_extract): Ditto. + (loongarch_is_lasx_lowpart_extract): Ditto. + (loongarch_is_op_reverse_perm): Ditto. + (loongarch_is_single_op_perm): Ditto. + (loongarch_is_divisible_perm): Ditto. + (loongarch_is_triple_stride_extract): Ditto. + (loongarch_expand_vec_perm_const_2): Merged. + (loongarch_expand_vec_perm_const): New. + (loongarch_vectorize_vec_perm_const): Adjust. +--- + gcc/config/loongarch/loongarch.cc | 1308 +++++------------------------ + 1 file changed, 210 insertions(+), 1098 deletions(-) + +diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc +index d1b1950dc..9d2374a46 100644 +--- a/gcc/config/loongarch/loongarch.cc ++++ b/gcc/config/loongarch/loongarch.cc +@@ -8823,143 +8823,6 @@ loongarch_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel) + } + } + +-static bool +-loongarch_is_odd_extraction (struct expand_vec_perm_d *); +- +-static bool +-loongarch_is_even_extraction (struct expand_vec_perm_d *); +- +-static bool +-loongarch_try_expand_lsx_vshuf_const (struct expand_vec_perm_d *d) +-{ +- int i; +- rtx target, op0, op1, sel, tmp; +- rtx rpermMAX_VECT_LEN; +- +- if (d->vmode == E_V2DImode || d->vmode == E_V2DFmode +- || d->vmode == E_V4SImode || d->vmode == E_V4SFmode +- || d->vmode == E_V8HImode || d->vmode == E_V16QImode) +- { +- target = d->target; +- op0 = d->op0; +- op1 = d->one_vector_p ? d->op0 : d->op1; +- +- if (GET_MODE (op0) != GET_MODE (op1) +- || GET_MODE (op0) != GET_MODE (target)) +- return false; +- +- if (d->testing_p) +- return true; +- +- /* If match extract-even and extract-odd permutations pattern, use +- * vselect much better than vshuf. */ +- if (loongarch_is_odd_extraction (d) +- || loongarch_is_even_extraction (d)) +- { +- if (loongarch_expand_vselect_vconcat (d->target, d->op0, d->op1, +- d->perm, d->nelt)) +- return true; +- +- unsigned char perm2MAX_VECT_LEN; +- for (i = 0; i < d->nelt; ++i) +- perm2i = (d->permi + d->nelt) & (2 * d->nelt - 1); +- +- if (loongarch_expand_vselect_vconcat (d->target, d->op1, d->op0, +- perm2, d->nelt)) +- return true; +- } +- +- for (i = 0; i < d->nelt; i += 1) +- { +- rpermi = GEN_INT (d->permi); +- } +- +- if (d->vmode == E_V2DFmode) +- { +- sel = gen_rtx_CONST_VECTOR (E_V2DImode, gen_rtvec_v (d->nelt, rperm)); +- tmp = simplify_gen_subreg (E_V2DImode, d->target, d->vmode, 0); +- emit_move_insn (tmp, sel); +- } +- else if (d->vmode == E_V4SFmode) +- { +- sel = gen_rtx_CONST_VECTOR (E_V4SImode, gen_rtvec_v (d->nelt, rperm)); +- tmp = simplify_gen_subreg (E_V4SImode, d->target, d->vmode, 0); +- emit_move_insn (tmp, sel); +- } +- else +- { +- sel = gen_rtx_CONST_VECTOR (d->vmode, gen_rtvec_v (d->nelt, rperm)); +- emit_move_insn (d->target, sel); +- } +- +- switch (d->vmode) +- { +- case E_V2DFmode: +- emit_insn (gen_lsx_vshuf_d_f (target, target, op1, op0)); +- break; +- case E_V2DImode: +- emit_insn (gen_lsx_vshuf_d (target, target, op1, op0)); +- break; +- case E_V4SFmode: +- emit_insn (gen_lsx_vshuf_w_f (target, target, op1, op0)); +- break; +- case E_V4SImode: +- emit_insn (gen_lsx_vshuf_w (target, target, op1, op0)); +- break; +- case E_V8HImode: +- emit_insn (gen_lsx_vshuf_h (target, target, op1, op0)); +- break; +- case E_V16QImode: +- emit_insn (gen_lsx_vshuf_b (target, op1, op0, target)); +- break; +- default: +- break; +- } +- +- return true; +- } +- return false; +-} +- +-static bool +-loongarch_expand_vec_perm_const_1 (struct expand_vec_perm_d *d) +-{ +- unsigned int i, nelt = d->nelt; +- unsigned char perm2MAX_VECT_LEN; +- +- if (d->one_vector_p) +- { +- /* Try interleave with alternating operands. */ +- memcpy (perm2, d->perm, sizeof (perm2)); +- for (i = 1; i < nelt; i += 2) +- perm2i += nelt; +- if (loongarch_expand_vselect_vconcat (d->target, d->op0, d->op1, perm2, +- nelt)) +- return true; +- } +- else +- { +- if (loongarch_expand_vselect_vconcat (d->target, d->op0, d->op1, +- d->perm, nelt)) +- return true; +- +- /* Try again with swapped operands. */ +- for (i = 0; i < nelt; ++i) +- perm2i = (d->permi + nelt) & (2 * nelt - 1); +- if (loongarch_expand_vselect_vconcat (d->target, d->op1, d->op0, perm2, +- nelt)) +- return true; +- } +- +- if (loongarch_expand_lsx_shuffle (d)) +- return true; +- if (loongarch_expand_vec_perm_even_odd (d)) +- return true; +- if (loongarch_expand_vec_perm_interleave (d)) +- return true; +- return false; +-} +- + /* Following are the assist function for const vector permutation support. */ + static bool + loongarch_is_quad_duplicate (struct expand_vec_perm_d *d) +@@ -8991,36 +8854,6 @@ loongarch_is_quad_duplicate (struct expand_vec_perm_d *d) + return result; + } + +-static bool +-loongarch_is_double_duplicate (struct expand_vec_perm_d *d) +-{ +- if (!d->one_vector_p) +- return false; +- +- if (d->nelt < 8) +- return false; +- +- bool result = true; +- unsigned char buf = d->perm0; +- +- for (int i = 1; i < d->nelt; i += 2) +- { +- if (d->permi != buf) +- { +- result = false; +- break; +- } +- if (d->permi - 1 != d->permi) +- { +- result = false; +- break; +- } +- buf += d->nelt / 4; +- } +- +- return result; +-} +- + static bool + loongarch_is_odd_extraction (struct expand_vec_perm_d *d) + { +@@ -9081,110 +8914,6 @@ loongarch_is_extraction_permutation (struct expand_vec_perm_d *d) + return result; + } + +-static bool +-loongarch_is_center_extraction (struct expand_vec_perm_d *d) +-{ +- bool result = true; +- unsigned buf = d->nelt / 2; +- +- for (int i = 0; i < d->nelt; i += 1) +- { +- if (buf != d->permi) +- { +- result = false; +- break; +- } +- buf += 1; +- } +- +- return result; +-} +- +-static bool +-loongarch_is_reversing_permutation (struct expand_vec_perm_d *d) +-{ +- if (!d->one_vector_p) +- return false; +- +- bool result = true; +- unsigned char buf = d->nelt - 1; +- +- for (int i = 0; i < d->nelt; i += 1) +- { +- if (d->permi != buf) +- { +- result = false; +- break; +- } +- +- buf -= 1; +- } +- +- return result; +-} +- +-static bool +-loongarch_is_di_misalign_extract (struct expand_vec_perm_d *d) +-{ +- if (d->nelt != 4 && d->nelt != 8) +- return false; +- +- bool result = true; +- unsigned char buf; +- +- if (d->nelt == 4) +- { +- buf = 1; +- for (int i = 0; i < d->nelt; i += 1) +- { +- if (buf != d->permi) +- { +- result = false; +- break; +- } +- +- buf += 1; +- } +- } +- else if (d->nelt == 8) +- { +- buf = 2; +- for (int i = 0; i < d->nelt; i += 1) +- { +- if (buf != d->permi) +- { +- result = false; +- break; +- } +- +- buf += 1; +- } +- } +- +- return result; +-} +- +-static bool +-loongarch_is_si_misalign_extract (struct expand_vec_perm_d *d) +-{ +- if (d->vmode != E_V8SImode && d->vmode != E_V8SFmode) +- return false; +- bool result = true; +- unsigned char buf = 1; +- +- for (int i = 0; i < d->nelt; i += 1) +- { +- if (buf != d->permi) +- { +- result = false; +- break; +- } +- buf += 1; +- } +- +- return result; +-} +- + static bool + loongarch_is_lasx_lowpart_interleave (struct expand_vec_perm_d *d) + { +@@ -9247,39 +8976,6 @@ loongarch_is_lasx_lowpart_interleave_2 (struct expand_vec_perm_d *d) + return result; + } + +-static bool +-loongarch_is_lasx_lowpart_extract (struct expand_vec_perm_d *d) +-{ +- bool result = true; +- unsigned char buf = 0; +- +- for (int i = 0; i < d->nelt / 2; i += 1) +- { +- if (buf != d->permi) +- { +- result = false; +- break; +- } +- buf += 1; +- } +- +- if (result) +- { +- buf = d->nelt; +- for (int i = d->nelt / 2; i < d->nelt; i += 1) +- { +- if (buf != d->permi) +- { +- result = false; +- break; +- } +- buf += 1; +- } +- } +- +- return result; +-} +- + static bool + loongarch_is_lasx_highpart_interleave (expand_vec_perm_d *d) + { +@@ -9361,538 +9057,195 @@ loongarch_is_elem_duplicate (struct expand_vec_perm_d *d) + return result; + } + +-inline bool +-loongarch_is_op_reverse_perm (struct expand_vec_perm_d *d) +-{ +- return (d->vmode == E_V4DFmode) +- && d->perm0 == 2 && d->perm1 == 3 +- && d->perm2 == 0 && d->perm3 == 1; +-} ++/* In LASX, some permutation insn does not have the behavior that gcc expects ++ when compiler wants to emit a vector permutation. ++ ++ 1. What GCC provides via vectorize_vec_perm_const ()'s paramater: ++ When GCC wants to performs a vector permutation, it provides two op ++ reigster, one target register, and a selector. ++ In const vector permutation case, GCC provides selector as a char array ++ that contains original value; in variable vector permuatation ++ (performs via vec_perm<mode> insn template), it provides a vector register. ++ We assume that nelt is the elements numbers inside single vector in current ++ 256bit vector mode. ++ ++ 2. What GCC expects to perform: ++ Two op registers (op0, op1) will "combine" into a 512bit temp vector storage ++ that has 2*nelt elements inside it; the low 256bit is op0, and high 256bit ++ is op1, then the elements are indexed as below: ++ 0 ~ nelt - 1 nelt ~ 2 * nelt - 1 ++ |-------------------------|-------------------------| ++ Low 256bit (op0) High 256bit (op1) ++ For example, the second element in op1 (V8SImode) will be indexed with 9. ++ Selector is a vector that has the same mode and number of elements with ++ op0,op1 and target, it's look like this: ++ 0 ~ nelt - 1 ++ |-------------------------| ++ 256bit (selector) ++ It describes which element from 512bit temp vector storage will fit into ++ target's every element slot. ++ GCC expects that every element in selector can be ANY indices of 512bit ++ vector storage (Selector can pick literally any element from op0 and op1, and ++ then fits into any place of target register). This is also what LSX 128bit ++ vshuf.* instruction do similarly, so we can handle 128bit vector permutation ++ by single instruction easily. ++ ++ 3. What LASX permutation instruction does: ++ In short, it just execute two independent 128bit vector permuatation, and ++ it's the reason that we need to do the jobs below. We will explain it. ++ op0, op1, target, and selector will be separate into high 128bit and low ++ 128bit, and do permutation as the description below: ++ ++ a) op0's low 128bit and op1's low 128bit "combines" into a 256bit temp ++ vector storage (TVS1), elements are indexed as below: ++ 0 ~ nelt / 2 - 1 nelt / 2 ~ nelt - 1 ++ |---------------------|---------------------| TVS1 ++ op0's low 128bit op1's low 128bit ++ op0's high 128bit and op1's high 128bit are "combined" into TVS2 in the ++ same way. ++ 0 ~ nelt / 2 - 1 nelt / 2 ~ nelt - 1 ++ |---------------------|---------------------| TVS2 ++ op0's high 128bit op1's high 128bit ++ b) Selector's low 128bit describes which elements from TVS1 will fit into ++ target vector's low 128bit. No TVS2 elements are allowed. ++ c) Selector's high 128bit describes which elements from TVS2 will fit into ++ target vector's high 128bit. No TVS1 elements are allowed. ++ ++ As we can see, if we want to handle vector permutation correctly, we can ++ achieve it in three ways: ++ a) Modify selector's elements, to make sure that every elements can inform ++ correct value that will put into target vector. ++ b) Generate extra instruction before/after permutation instruction, for ++ adjusting op vector or target vector, to make sure target vector's value is ++ what GCC expects. ++ c) Use other instructions to process op and put correct result into target. ++ */ ++ ++/* Implementation of constant vector permuatation. This function identifies ++ recognized pattern of permuation selector argument, and use one or more ++ instruction (s) to finish the permutation job correctly. For unsupported ++ patterns, it will return false. */ + + static bool +-loongarch_is_single_op_perm (struct expand_vec_perm_d *d) ++loongarch_expand_vec_perm_const (struct expand_vec_perm_d *d) + { +- bool result = true; ++ bool flag = false; ++ unsigned int i; ++ unsigned char idx; ++ rtx target, op0, op1, sel, tmp; ++ rtx rpermMAX_VECT_LEN; ++ unsigned int remappedMAX_VECT_LEN; ++ unsigned char perm2MAX_VECT_LEN; + +- for (int i = 0; i < d->nelt; i += 1) ++ if (GET_MODE_SIZE (d->vmode) == 16) ++ return loongarch_expand_lsx_shuffle (d); ++ else + { +- if (d->permi >= d->nelt) ++ if (d->one_vector_p) + { +- result = false; +- break; ++ /* Try interleave with alternating operands. */ ++ memcpy (perm2, d->perm, sizeof (perm2)); ++ for (i = 1; i < d->nelt; i += 2) ++ perm2i += d->nelt; ++ if (loongarch_expand_vselect_vconcat (d->target, d->op0, d->op1, ++ perm2, d->nelt)) ++ return true; + } +- } +- +- return result; +-} +- +-static bool +-loongarch_is_divisible_perm (struct expand_vec_perm_d *d) +-{ +- bool result = true; +- +- for (int i = 0; i < d->nelt / 2; i += 1) +- { +- if (d->permi >= d->nelt) ++ else + { +- result = false; +- break; +- } +- } +- +- if (result) +- { +- for (int i = d->nelt / 2; i < d->nelt; i += 1) +- { +- if (d->permi < d->nelt) +- { +- result = false; +- break; +- } +- } +- } +- +- return result; +-} +- +-inline bool +-loongarch_is_triple_stride_extract (struct expand_vec_perm_d *d) +-{ +- return (d->vmode == E_V4DImode || d->vmode == E_V4DFmode) +- && d->perm0 == 1 && d->perm1 == 4 +- && d->perm2 == 7 && d->perm3 == 0; +-} +- +-/* In LASX, some permutation insn does not have the behavior that gcc expects +- * when compiler wants to emit a vector permutation. +- * +- * 1. What GCC provides via vectorize_vec_perm_const ()'s paramater: +- * When GCC wants to performs a vector permutation, it provides two op +- * reigster, one target register, and a selector. +- * In const vector permutation case, GCC provides selector as a char array +- * that contains original value; in variable vector permuatation +- * (performs via vec_perm<mode> insn template), it provides a vector register. +- * We assume that nelt is the elements numbers inside single vector in current +- * 256bit vector mode. +- * +- * 2. What GCC expects to perform: +- * Two op registers (op0, op1) will "combine" into a 512bit temp vector storage +- * that has 2*nelt elements inside it; the low 256bit is op0, and high 256bit +- * is op1, then the elements are indexed as below: +- * 0 ~ nelt - 1 nelt ~ 2 * nelt - 1 +- * |-------------------------|-------------------------| +- * Low 256bit (op0) High 256bit (op1) +- * For example, the second element in op1 (V8SImode) will be indexed with 9. +- * Selector is a vector that has the same mode and number of elements with +- * op0,op1 and target, it's look like this: +- * 0 ~ nelt - 1 +- * |-------------------------| +- * 256bit (selector) +- * It describes which element from 512bit temp vector storage will fit into +- * target's every element slot. +- * GCC expects that every element in selector can be ANY indices of 512bit +- * vector storage (Selector can pick literally any element from op0 and op1, and +- * then fits into any place of target register). This is also what LSX 128bit +- * vshuf.* instruction do similarly, so we can handle 128bit vector permutation +- * by single instruction easily. +- * +- * 3. What LASX permutation instruction does: +- * In short, it just execute two independent 128bit vector permuatation, and +- * it's the reason that we need to do the jobs below. We will explain it. +- * op0, op1, target, and selector will be separate into high 128bit and low +- * 128bit, and do permutation as the description below: +- * +- * a) op0's low 128bit and op1's low 128bit "combines" into a 256bit temp +- * vector storage (TVS1), elements are indexed as below: +- * 0 ~ nelt / 2 - 1 nelt / 2 ~ nelt - 1 +- * |---------------------|---------------------| TVS1 +- * op0's low 128bit op1's low 128bit +- * op0's high 128bit and op1's high 128bit are "combined" into TVS2 in the +- * same way. +- * 0 ~ nelt / 2 - 1 nelt / 2 ~ nelt - 1 +- * |---------------------|---------------------| TVS2 +- * op0's high 128bit op1's high 128bit +- * b) Selector's low 128bit describes which elements from TVS1 will fit into +- * target vector's low 128bit. No TVS2 elements are allowed. +- * c) Selector's high 128bit describes which elements from TVS2 will fit into +- * target vector's high 128bit. No TVS1 elements are allowed. +- * +- * As we can see, if we want to handle vector permutation correctly, we can +- * achieve it in three ways: +- * a) Modify selector's elements, to make sure that every elements can inform +- * correct value that will put into target vector. +- b) Generate extra instruction before/after permutation instruction, for +- adjusting op vector or target vector, to make sure target vector's value is +- what GCC expects. +- c) Use other instructions to process op and put correct result into target. +- */ +- +-/* Implementation of constant vector permuatation. This function identifies +- * recognized pattern of permuation selector argument, and use one or more +- * instruction(s) to finish the permutation job correctly. For unsupported +- * patterns, it will return false. */ +- +-static bool +-loongarch_expand_vec_perm_const_2 (struct expand_vec_perm_d *d) +-{ +- /* Although we have the LSX vec_perm<mode> template, there's still some +- 128bit vector permuatation operations send to vectorize_vec_perm_const. +- In this case, we just simpliy wrap them by single vshuf.* instruction, +- because LSX vshuf.* instruction just have the same behavior that GCC +- expects. */ +- if (GET_MODE_SIZE (d->vmode) == 16) +- return loongarch_try_expand_lsx_vshuf_const (d); +- else +- return false; +- +- bool ok = false, reverse_hi_lo = false, extract_ev_od = false, +- use_alt_op = false; +- unsigned char idx; +- int i; +- rtx target, op0, op1, sel, tmp; +- rtx op0_alt = NULL_RTX, op1_alt = NULL_RTX; +- rtx rpermMAX_VECT_LEN; +- unsigned int remappedMAX_VECT_LEN; +- +- /* Try to figure out whether is a recognized permutation selector pattern, if +- yes, we will reassign some elements with new value in selector argument, +- and in some cases we will generate some assist insn to complete the +- permutation. (Even in some cases, we use other insn to impl permutation +- instead of xvshuf!) ++ if (loongarch_expand_vselect_vconcat (d->target, d->op0, d->op1, ++ d->perm, d->nelt)) ++ return true; + +- Make sure to check d->testing_p is false everytime if you want to emit new +- insn, unless you want to crash into ICE directly. */ +- if (loongarch_is_quad_duplicate (d)) +- { +- /* Selector example: E_V8SImode, { 0, 0, 0, 0, 4, 4, 4, 4 } +- copy first elem from original selector to all elem in new selector. */ +- idx = d->perm0; +- for (i = 0; i < d->nelt; i += 1) +- { +- remappedi = idx; +- } +- /* Selector after: { 0, 0, 0, 0, 0, 0, 0, 0 }. */ +- } +- else if (loongarch_is_double_duplicate (d)) +- { +- /* Selector example: E_V8SImode, { 1, 1, 3, 3, 5, 5, 7, 7 } +- one_vector_p == true. */ +- for (i = 0; i < d->nelt / 2; i += 1) +- { +- idx = d->permi; +- remappedi = idx; +- remappedi + d->nelt / 2 = idx; ++ /* Try again with swapped operands. */ ++ for (i = 0; i < d->nelt; ++i) ++ perm2i = (d->permi + d->nelt) & (2 * d->nelt - 1); ++ if (loongarch_expand_vselect_vconcat (d->target, d->op1, d->op0, ++ perm2, d->nelt)) ++ return true; + } +- /* Selector after: { 1, 1, 3, 3, 1, 1, 3, 3 }. */ +- } +- else if (loongarch_is_odd_extraction (d) +- || loongarch_is_even_extraction (d)) +- { +- /* Odd extraction selector sample: E_V4DImode, { 1, 3, 5, 7 } +- Selector after: { 1, 3, 1, 3 }. +- Even extraction selector sample: E_V4DImode, { 0, 2, 4, 6 } +- Selector after: { 0, 2, 0, 2 }. */ + +- /* Better implement of extract-even and extract-odd permutations. */ +- if (loongarch_expand_vec_perm_even_odd (d)) ++ if (loongarch_expand_lsx_shuffle (d)) + return true; + +- for (i = 0; i < d->nelt / 2; i += 1) +- { +- idx = d->permi; +- remappedi = idx; +- remappedi + d->nelt / 2 = idx; +- } +- /* Additional insn is required for correct result. See codes below. */ +- extract_ev_od = true; +- } +- else if (loongarch_is_extraction_permutation (d)) +- { +- /* Selector sample: E_V8SImode, { 0, 1, 2, 3, 4, 5, 6, 7 }. */ +- if (d->perm0 == 0) ++ if (loongarch_is_odd_extraction (d) ++ || loongarch_is_even_extraction (d)) + { +- for (i = 0; i < d->nelt / 2; i += 1) +- { +- remappedi = i; +- remappedi + d->nelt / 2 = i; +- } ++ if (loongarch_expand_vec_perm_even_odd (d)) ++ return true; + } +- else ++ ++ if (loongarch_is_lasx_lowpart_interleave (d) ++ || loongarch_is_lasx_lowpart_interleave_2 (d) ++ || loongarch_is_lasx_highpart_interleave (d) ++ || loongarch_is_lasx_highpart_interleave_2 (d)) + { +- /* { 8, 9, 10, 11, 12, 13, 14, 15 }. */ +- for (i = 0; i < d->nelt / 2; i += 1) +- { +- idx = i + d->nelt / 2; +- remappedi = idx; +- remappedi + d->nelt / 2 = idx; +- } ++ if (loongarch_expand_vec_perm_interleave (d)) ++ return true; + } +- /* Selector after: { 0, 1, 2, 3, 0, 1, 2, 3 } +- { 8, 9, 10, 11, 8, 9, 10, 11 } */ +- } +- else if (loongarch_is_center_extraction (d)) +- { +- /* sample: E_V4DImode, { 2, 3, 4, 5 } +- In this condition, we can just copy high 128bit of op0 and low 128bit +- of op1 to the target register by using xvpermi.q insn. */ +- if (!d->testing_p) ++ ++ if (loongarch_is_quad_duplicate (d)) + { +- emit_move_insn (d->target, d->op1); +- switch (d->vmode) ++ if (d->testing_p) ++ return true; ++ /* Selector example: E_V8SImode, { 0, 0, 0, 0, 4, 4, 4, 4 }. */ ++ for (i = 0; i < d->nelt; i += 1) + { +- case E_V4DImode: +- emit_insn (gen_lasx_xvpermi_q_v4di (d->target, d->target, +- d->op0, GEN_INT (0x21))); +- break; +- case E_V4DFmode: +- emit_insn (gen_lasx_xvpermi_q_v4df (d->target, d->target, +- d->op0, GEN_INT (0x21))); +- break; +- case E_V8SImode: +- emit_insn (gen_lasx_xvpermi_q_v8si (d->target, d->target, +- d->op0, GEN_INT (0x21))); +- break; +- case E_V8SFmode: +- emit_insn (gen_lasx_xvpermi_q_v8sf (d->target, d->target, +- d->op0, GEN_INT (0x21))); +- break; +- case E_V16HImode: +- emit_insn (gen_lasx_xvpermi_q_v16hi (d->target, d->target, +- d->op0, GEN_INT (0x21))); +- break; +- case E_V32QImode: +- emit_insn (gen_lasx_xvpermi_q_v32qi (d->target, d->target, +- d->op0, GEN_INT (0x21))); +- break; +- default: +- break; ++ rpermi = GEN_INT (d->perm0); + } ++ /* Selector after: { 0, 0, 0, 0, 0, 0, 0, 0 }. */ ++ flag = true; ++ goto expand_perm_const_end; + } +- ok = true; +- /* Finish the funtion directly. */ +- goto expand_perm_const_2_end; +- } +- else if (loongarch_is_reversing_permutation (d)) +- { +- /* Selector sample: E_V8SImode, { 7, 6, 5, 4, 3, 2, 1, 0 } +- one_vector_p == true */ +- idx = d->nelt / 2 - 1; +- for (i = 0; i < d->nelt / 2; i += 1) +- { +- remappedi = idx; +- remappedi + d->nelt / 2 = idx; +- idx -= 1; +- } +- /* Selector after: { 3, 2, 1, 0, 3, 2, 1, 0 } +- Additional insn will be generated to swap hi and lo 128bit of target +- register. */ +- reverse_hi_lo = true; +- } +- else if (loongarch_is_di_misalign_extract (d) +- || loongarch_is_si_misalign_extract (d)) +- { +- /* Selector Sample: +- DI misalign: E_V4DImode, { 1, 2, 3, 4 } +- SI misalign: E_V8SImode, { 1, 2, 3, 4, 5, 6, 7, 8 } */ +- if (!d->testing_p) +- { +- /* Copy original op0/op1 value to new temp register. +- In some cases, operand register may be used in multiple place, so +- we need new regiter instead modify original one, to avoid runtime +- crashing or wrong value after execution. */ +- use_alt_op = true; +- op1_alt = gen_reg_rtx (d->vmode); +- emit_move_insn (op1_alt, d->op1); +- +- /* Adjust op1 for selecting correct value in high 128bit of target +- register. +- op1: E_V4DImode, { 4, 5, 6, 7 } -> { 2, 3, 4, 5 }. */ +- rtx conv_op1 = simplify_gen_subreg (E_V4DImode, op1_alt, d->vmode, 0); +- rtx conv_op0 = simplify_gen_subreg (E_V4DImode, d->op0, d->vmode, 0); +- emit_insn (gen_lasx_xvpermi_q_v4di (conv_op1, conv_op1, +- conv_op0, GEN_INT (0x21))); + +- for (i = 0; i < d->nelt / 2; i += 1) +- { +- remappedi = d->permi; +- remappedi + d->nelt / 2 = d->permi; +- } +- /* Selector after: +- DI misalign: { 1, 2, 1, 2 } +- SI misalign: { 1, 2, 3, 4, 1, 2, 3, 4 } */ +- } +- } +- else if (loongarch_is_lasx_lowpart_interleave (d)) +- { +- /* Elements from op0's low 18bit and op1's 128bit are inserted into +- target register alternately. +- sample: E_V4DImode, { 0, 4, 1, 5 } */ +- if (!d->testing_p) +- { +- /* Prepare temp register instead of modify original op. */ +- use_alt_op = true; +- op1_alt = gen_reg_rtx (d->vmode); +- op0_alt = gen_reg_rtx (d->vmode); +- emit_move_insn (op1_alt, d->op1); +- emit_move_insn (op0_alt, d->op0); +- +- /* Generate subreg for fitting into insn gen function. */ +- rtx conv_op1 = simplify_gen_subreg (E_V4DImode, op1_alt, d->vmode, 0); +- rtx conv_op0 = simplify_gen_subreg (E_V4DImode, op0_alt, d->vmode, 0); +- +- /* Adjust op value in temp register. +- op0 = {0,1,2,3}, op1 = {4,5,0,1} */ +- emit_insn (gen_lasx_xvpermi_q_v4di (conv_op1, conv_op1, +- conv_op0, GEN_INT (0x02))); +- /* op0 = {0,1,4,5}, op1 = {4,5,0,1} */ +- emit_insn (gen_lasx_xvpermi_q_v4di (conv_op0, conv_op0, +- conv_op1, GEN_INT (0x01))); +- +- /* Remap indices in selector based on the location of index inside +- selector, and vector element numbers in current vector mode. */ +- +- /* Filling low 128bit of new selector. */ +- for (i = 0; i < d->nelt / 2; i += 1) +- { +- /* value in odd-indexed slot of low 128bit part of selector +- vector. */ +- remappedi = i % 2 != 0 ? d->permi - d->nelt / 2 : d->permi; +- } +- /* Then filling the high 128bit. */ +- for (i = d->nelt / 2; i < d->nelt; i += 1) ++ if (loongarch_is_extraction_permutation (d)) ++ { ++ if (d->testing_p) ++ return true; ++ /* Selector sample: E_V8SImode, { 0, 1, 2, 3, 4, 5, 6, 7 }. */ ++ if (d->perm0 == 0) + { +- /* value in even-indexed slot of high 128bit part of +- selector vector. */ +- remappedi = i % 2 == 0 +- ? d->permi + (d->nelt / 2) * 3 : d->permi; ++ for (i = 0; i < d->nelt / 2; i += 1) ++ { ++ remappedi = i; ++ remappedi + d->nelt / 2 = i; ++ } + } +- } +- } +- else if (loongarch_is_lasx_lowpart_interleave_2 (d)) +- { +- /* Special lowpart interleave case in V32QI vector mode. It does the same +- thing as we can see in if branch that above this line. +- Selector sample: E_V32QImode, +- {0, 1, 2, 3, 4, 5, 6, 7, 32, 33, 34, 35, 36, 37, 38, 39, 8, +- 9, 10, 11, 12, 13, 14, 15, 40, 41, 42, 43, 44, 45, 46, 47} */ +- if (!d->testing_p) +- { +- /* Solution for this case in very simple - covert op into V4DI mode, +- and do same thing as previous if branch. */ +- op1_alt = gen_reg_rtx (d->vmode); +- op0_alt = gen_reg_rtx (d->vmode); +- emit_move_insn (op1_alt, d->op1); +- emit_move_insn (op0_alt, d->op0); +- +- rtx conv_op1 = simplify_gen_subreg (E_V4DImode, op1_alt, d->vmode, 0); +- rtx conv_op0 = simplify_gen_subreg (E_V4DImode, op0_alt, d->vmode, 0); +- rtx conv_target = simplify_gen_subreg (E_V4DImode, d->target, +- d->vmode, 0); +- +- emit_insn (gen_lasx_xvpermi_q_v4di (conv_op1, conv_op1, +- conv_op0, GEN_INT (0x02))); +- emit_insn (gen_lasx_xvpermi_q_v4di (conv_op0, conv_op0, +- conv_op1, GEN_INT (0x01))); +- remapped0 = 0; +- remapped1 = 4; +- remapped2 = 1; +- remapped3 = 5; +- +- for (i = 0; i < d->nelt; i += 1) ++ else + { +- rpermi = GEN_INT (remappedi); ++ /* { 8, 9, 10, 11, 12, 13, 14, 15 }. */ ++ for (i = 0; i < d->nelt / 2; i += 1) ++ { ++ idx = i + d->nelt / 2; ++ remappedi = idx; ++ remappedi + d->nelt / 2 = idx; ++ } + } ++ /* Selector after: { 0, 1, 2, 3, 0, 1, 2, 3 } ++ { 8, 9, 10, 11, 8, 9, 10, 11 } */ + +- sel = gen_rtx_CONST_VECTOR (E_V4DImode, gen_rtvec_v (4, rperm)); +- sel = force_reg (E_V4DImode, sel); +- emit_insn (gen_lasx_xvshuf_d (conv_target, sel, +- conv_op1, conv_op0)); +- } +- +- ok = true; +- goto expand_perm_const_2_end; +- } +- else if (loongarch_is_lasx_lowpart_extract (d)) +- { +- /* Copy op0's low 128bit to target's low 128bit, and copy op1's low +- 128bit to target's high 128bit. +- Selector sample: E_V4DImode, { 0, 1, 4 ,5 } */ +- if (!d->testing_p) +- { +- rtx conv_op1 = simplify_gen_subreg (E_V4DImode, d->op1, d->vmode, 0); +- rtx conv_op0 = simplify_gen_subreg (E_V4DImode, d->op0, d->vmode, 0); +- rtx conv_target = simplify_gen_subreg (E_V4DImode, d->target, +- d->vmode, 0); +- +- /* We can achieve the expectation by using sinple xvpermi.q insn. */ +- emit_move_insn (conv_target, conv_op1); +- emit_insn (gen_lasx_xvpermi_q_v4di (conv_target, conv_target, +- conv_op0, GEN_INT (0x20))); +- } +- +- ok = true; +- goto expand_perm_const_2_end; +- } +- else if (loongarch_is_lasx_highpart_interleave (d)) +- { +- /* Similar to lowpart interleave, elements from op0's high 128bit and +- op1's high 128bit are inserted into target regiter alternately. +- Selector sample: E_V8SImode, { 4, 12, 5, 13, 6, 14, 7, 15 } */ +- if (!d->testing_p) +- { +- /* Prepare temp op register. */ +- use_alt_op = true; +- op1_alt = gen_reg_rtx (d->vmode); +- op0_alt = gen_reg_rtx (d->vmode); +- emit_move_insn (op1_alt, d->op1); +- emit_move_insn (op0_alt, d->op0); +- +- rtx conv_op1 = simplify_gen_subreg (E_V4DImode, op1_alt, d->vmode, 0); +- rtx conv_op0 = simplify_gen_subreg (E_V4DImode, op0_alt, d->vmode, 0); +- /* Adjust op value in temp regiter. +- op0 = { 0, 1, 2, 3 }, op1 = { 6, 7, 2, 3 } */ +- emit_insn (gen_lasx_xvpermi_q_v4di (conv_op1, conv_op1, +- conv_op0, GEN_INT (0x13))); +- /* op0 = { 2, 3, 6, 7 }, op1 = { 6, 7, 2, 3 } */ +- emit_insn (gen_lasx_xvpermi_q_v4di (conv_op0, conv_op0, +- conv_op1, GEN_INT (0x01))); +- /* Remap indices in selector based on the location of index inside +- selector, and vector element numbers in current vector mode. */ +- +- /* Filling low 128bit of new selector. */ +- for (i = 0; i < d->nelt / 2; i += 1) +- { +- /* value in even-indexed slot of low 128bit part of selector +- vector. */ +- remappedi = i % 2 == 0 ? d->permi - d->nelt / 2 : d->permi; +- } +- /* Then filling the high 128bit. */ +- for (i = d->nelt / 2; i < d->nelt; i += 1) +- { +- /* value in odd-indexed slot of high 128bit part of selector +- vector. */ +- remappedi = i % 2 != 0 +- ? d->permi - (d->nelt / 2) * 3 : d->permi; +- } +- } +- } +- else if (loongarch_is_lasx_highpart_interleave_2 (d)) +- { +- /* Special highpart interleave case in V32QI vector mode. It does the +- same thing as the normal version above. +- Selector sample: E_V32QImode, +- {16, 17, 18, 19, 20, 21, 22, 23, 48, 49, 50, 51, 52, 53, 54, 55, +- 24, 25, 26, 27, 28, 29, 30, 31, 56, 57, 58, 59, 60, 61, 62, 63} +- */ +- if (!d->testing_p) +- { +- /* Convert op into V4DImode and do the things. */ +- op1_alt = gen_reg_rtx (d->vmode); +- op0_alt = gen_reg_rtx (d->vmode); +- emit_move_insn (op1_alt, d->op1); +- emit_move_insn (op0_alt, d->op0); +- +- rtx conv_op1 = simplify_gen_subreg (E_V4DImode, op1_alt, d->vmode, 0); +- rtx conv_op0 = simplify_gen_subreg (E_V4DImode, op0_alt, d->vmode, 0); +- rtx conv_target = simplify_gen_subreg (E_V4DImode, d->target, +- d->vmode, 0); +- +- emit_insn (gen_lasx_xvpermi_q_v4di (conv_op1, conv_op1, +- conv_op0, GEN_INT (0x13))); +- emit_insn (gen_lasx_xvpermi_q_v4di (conv_op0, conv_op0, +- conv_op1, GEN_INT (0x01))); +- remapped0 = 2; +- remapped1 = 6; +- remapped2 = 3; +- remapped3 = 7; +- ++ /* Convert remapped selector array to RTL array. */ + for (i = 0; i < d->nelt; i += 1) + { + rpermi = GEN_INT (remappedi); + } + +- sel = gen_rtx_CONST_VECTOR (E_V4DImode, gen_rtvec_v (4, rperm)); +- sel = force_reg (E_V4DImode, sel); +- emit_insn (gen_lasx_xvshuf_d (conv_target, sel, +- conv_op1, conv_op0)); ++ flag = true; ++ goto expand_perm_const_end; + } + +- ok = true; +- goto expand_perm_const_2_end; +- } +- else if (loongarch_is_elem_duplicate (d)) +- { +- /* Brocast single element (from op0 or op1) to all slot of target +- register. +- Selector sample:E_V8SImode, { 2, 2, 2, 2, 2, 2, 2, 2 } */ +- if (!d->testing_p) ++ if (loongarch_is_elem_duplicate (d)) + { ++ if (d->testing_p) ++ return true; ++ /* Brocast single element (from op0 or op1) to all slot of target ++ register. ++ Selector sample:E_V8SImode, { 2, 2, 2, 2, 2, 2, 2, 2 } */ + rtx conv_op1 = simplify_gen_subreg (E_V4DImode, d->op1, d->vmode, 0); + rtx conv_op0 = simplify_gen_subreg (E_V4DImode, d->op0, d->vmode, 0); + rtx temp_reg = gen_reg_rtx (d->vmode); + rtx conv_temp = simplify_gen_subreg (E_V4DImode, temp_reg, + d->vmode, 0); +- + emit_move_insn (temp_reg, d->op0); + + idx = d->perm0; +@@ -9901,7 +9254,7 @@ loongarch_expand_vec_perm_const_2 (struct expand_vec_perm_d *d) + value that we need to broardcast, because xvrepl128vei does the + broardcast job from every 128bit of source register to + corresponded part of target register! (A deep sigh.) */ +- if (/*idx >= 0 &&*/ idx < d->nelt / 2) ++ if (idx < d->nelt / 2) + { + emit_insn (gen_lasx_xvpermi_q_v4di (conv_temp, conv_temp, + conv_op0, GEN_INT (0x0))); +@@ -9956,310 +9309,75 @@ loongarch_expand_vec_perm_const_2 (struct expand_vec_perm_d *d) + break; + } + +- /* finish func directly. */ +- ok = true; +- goto expand_perm_const_2_end; +- } +- } +- else if (loongarch_is_op_reverse_perm (d)) +- { +- /* reverse high 128bit and low 128bit in op0. +- Selector sample: E_V4DFmode, { 2, 3, 0, 1 } +- Use xvpermi.q for doing this job. */ +- if (!d->testing_p) +- { +- if (d->vmode == E_V4DImode) +- { +- emit_insn (gen_lasx_xvpermi_q_v4di (d->target, d->target, d->op0, +- GEN_INT (0x01))); +- } +- else if (d->vmode == E_V4DFmode) +- { +- emit_insn (gen_lasx_xvpermi_q_v4df (d->target, d->target, d->op0, +- GEN_INT (0x01))); +- } +- else +- { +- gcc_unreachable (); +- } +- } +- +- ok = true; +- goto expand_perm_const_2_end; +- } +- else if (loongarch_is_single_op_perm (d)) +- { +- /* Permutation that only select elements from op0. */ +- if (!d->testing_p) +- { +- /* Prepare temp register instead of modify original op. */ +- use_alt_op = true; +- op0_alt = gen_reg_rtx (d->vmode); +- op1_alt = gen_reg_rtx (d->vmode); +- +- emit_move_insn (op0_alt, d->op0); +- emit_move_insn (op1_alt, d->op1); +- +- rtx conv_op0 = simplify_gen_subreg (E_V4DImode, d->op0, d->vmode, 0); +- rtx conv_op0a = simplify_gen_subreg (E_V4DImode, op0_alt, +- d->vmode, 0); +- rtx conv_op1a = simplify_gen_subreg (E_V4DImode, op1_alt, +- d->vmode, 0); +- +- /* Duplicate op0's low 128bit in op0, then duplicate high 128bit +- in op1. After this, xvshuf.* insn's selector argument can +- access all elements we need for correct permutation result. */ +- emit_insn (gen_lasx_xvpermi_q_v4di (conv_op0a, conv_op0a, conv_op0, +- GEN_INT (0x00))); +- emit_insn (gen_lasx_xvpermi_q_v4di (conv_op1a, conv_op1a, conv_op0, +- GEN_INT (0x11))); +- +- /* In this case, there's no need to remap selector's indices. */ +- for (i = 0; i < d->nelt; i += 1) +- { +- remappedi = d->permi; +- } ++ return true; + } +- } +- else if (loongarch_is_divisible_perm (d)) +- { +- /* Divisible perm: +- Low 128bit of selector only selects elements of op0, +- and high 128bit of selector only selects elements of op1. */ + +- if (!d->testing_p) ++expand_perm_const_end: ++ if (flag) + { +- /* Prepare temp register instead of modify original op. */ +- use_alt_op = true; +- op0_alt = gen_reg_rtx (d->vmode); +- op1_alt = gen_reg_rtx (d->vmode); +- +- emit_move_insn (op0_alt, d->op0); +- emit_move_insn (op1_alt, d->op1); +- +- rtx conv_op0a = simplify_gen_subreg (E_V4DImode, op0_alt, +- d->vmode, 0); +- rtx conv_op1a = simplify_gen_subreg (E_V4DImode, op1_alt, +- d->vmode, 0); +- rtx conv_op0 = simplify_gen_subreg (E_V4DImode, d->op0, d->vmode, 0); +- rtx conv_op1 = simplify_gen_subreg (E_V4DImode, d->op1, d->vmode, 0); +- +- /* Reorganize op0's hi/lo 128bit and op1's hi/lo 128bit, to make sure +- that selector's low 128bit can access all op0's elements, and +- selector's high 128bit can access all op1's elements. */ +- emit_insn (gen_lasx_xvpermi_q_v4di (conv_op0a, conv_op0a, conv_op1, +- GEN_INT (0x02))); +- emit_insn (gen_lasx_xvpermi_q_v4di (conv_op1a, conv_op1a, conv_op0, +- GEN_INT (0x31))); +- +- /* No need to modify indices. */ +- for (i = 0; i < d->nelt;i += 1) ++ /* Copy selector vector from memory to vector register for later insn ++ gen function. ++ If vector's element in floating point value, we cannot fit ++ selector argument into insn gen function directly, because of the ++ insn template definition. As a solution, generate a integral mode ++ subreg of target, then copy selector vector (that is in integral ++ mode) to this subreg. */ ++ switch (d->vmode) + { +- remappedi = d->permi; ++ case E_V4DFmode: ++ sel = gen_rtx_CONST_VECTOR (E_V4DImode, gen_rtvec_v (d->nelt, ++ rperm)); ++ tmp = simplify_gen_subreg (E_V4DImode, d->target, d->vmode, 0); ++ emit_move_insn (tmp, sel); ++ break; ++ case E_V8SFmode: ++ sel = gen_rtx_CONST_VECTOR (E_V8SImode, gen_rtvec_v (d->nelt, ++ rperm)); ++ tmp = simplify_gen_subreg (E_V8SImode, d->target, d->vmode, 0); ++ emit_move_insn (tmp, sel); ++ break; ++ default: ++ sel = gen_rtx_CONST_VECTOR (d->vmode, gen_rtvec_v (d->nelt, ++ rperm)); ++ emit_move_insn (d->target, sel); ++ break; + } +- } +- } +- else if (loongarch_is_triple_stride_extract (d)) +- { +- /* Selector sample: E_V4DFmode, { 1, 4, 7, 0 }. */ +- if (!d->testing_p) +- { +- /* Resolve it with brute force modification. */ +- remapped0 = 1; +- remapped1 = 2; +- remapped2 = 3; +- remapped3 = 0; +- } +- } +- else +- { +- /* When all of the detections above are failed, we will try last +- strategy. +- The for loop tries to detect following rules based on indices' value, +- its position inside of selector vector ,and strange behavior of +- xvshuf.* insn; Then we take corresponding action. (Replace with new +- value, or give up whole permutation expansion.) */ +- for (i = 0; i < d->nelt; i += 1) +- { +- /* % (2 * d->nelt) */ +- idx = d->permi; + +- /* if index is located in low 128bit of selector vector. */ +- if (i < d->nelt / 2) +- { +- /* Fail case 1: index tries to reach element that located in op0's +- high 128bit. */ +- if (idx >= d->nelt / 2 && idx < d->nelt) +- { +- goto expand_perm_const_2_end; +- } +- /* Fail case 2: index tries to reach element that located in +- op1's high 128bit. */ +- if (idx >= (d->nelt + d->nelt / 2)) +- { +- goto expand_perm_const_2_end; +- } ++ target = d->target; ++ op0 = d->op0; ++ op1 = d->one_vector_p ? d->op0 : d->op1; + +- /* Success case: index tries to reach elements that located in +- op1's low 128bit. Apply - (nelt / 2) offset to original +- value. */ +- if (idx >= d->nelt && idx < (d->nelt + d->nelt / 2)) +- { +- idx -= d->nelt / 2; +- } +- } +- /* if index is located in high 128bit of selector vector. */ +- else ++ /* We FINALLY can generate xvshuf.* insn. */ ++ switch (d->vmode) + { +- /* Fail case 1: index tries to reach element that located in +- op1's low 128bit. */ +- if (idx >= d->nelt && idx < (d->nelt + d->nelt / 2)) +- { +- goto expand_perm_const_2_end; +- } +- /* Fail case 2: index tries to reach element that located in +- op0's low 128bit. */ +- if (idx < (d->nelt / 2)) +- { +- goto expand_perm_const_2_end; +- } +- /* Success case: index tries to reach element that located in +- op0's high 128bit. */ +- if (idx >= d->nelt / 2 && idx < d->nelt) +- { +- idx -= d->nelt / 2; +- } ++ case E_V4DFmode: ++ emit_insn (gen_lasx_xvshuf_d_f (target, target, op1, op0)); ++ break; ++ case E_V4DImode: ++ emit_insn (gen_lasx_xvshuf_d (target, target, op1, op0)); ++ break; ++ case E_V8SFmode: ++ emit_insn (gen_lasx_xvshuf_w_f (target, target, op1, op0)); ++ break; ++ case E_V8SImode: ++ emit_insn (gen_lasx_xvshuf_w (target, target, op1, op0)); ++ break; ++ case E_V16HImode: ++ emit_insn (gen_lasx_xvshuf_h (target, target, op1, op0)); ++ break; ++ case E_V32QImode: ++ emit_insn (gen_lasx_xvshuf_b (target, op1, op0, target)); ++ break; ++ default: ++ gcc_unreachable (); ++ break; + } +- /* No need to process other case that we did not mentioned. */ +- +- /* Assign with original or processed value. */ +- remappedi = idx; +- } +- } +- +- ok = true; +- /* If testing_p is true, compiler is trying to figure out that backend can +- handle this permutation, but doesn't want to generate actual insn. So +- if true, exit directly. */ +- if (d->testing_p) +- { +- goto expand_perm_const_2_end; +- } +- +- /* Convert remapped selector array to RTL array. */ +- for (i = 0; i < d->nelt; i += 1) +- { +- rpermi = GEN_INT (remappedi); +- } +- +- /* Copy selector vector from memory to vector regiter for later insn gen +- function. +- If vector's element in floating point value, we cannot fit selector +- argument into insn gen function directly, because of the insn template +- definition. As a solution, generate a integral mode subreg of target, +- then copy selector vector (that is in integral mode) to this subreg. */ +- switch (d->vmode) +- { +- case E_V4DFmode: +- sel = gen_rtx_CONST_VECTOR (E_V4DImode, gen_rtvec_v (d->nelt, rperm)); +- tmp = simplify_gen_subreg (E_V4DImode, d->target, d->vmode, 0); +- emit_move_insn (tmp, sel); +- break; +- case E_V8SFmode: +- sel = gen_rtx_CONST_VECTOR (E_V8SImode, gen_rtvec_v (d->nelt, rperm)); +- tmp = simplify_gen_subreg (E_V8SImode, d->target, d->vmode, 0); +- emit_move_insn (tmp, sel); +- break; +- default: +- sel = gen_rtx_CONST_VECTOR (d->vmode, gen_rtvec_v (d->nelt, rperm)); +- emit_move_insn (d->target, sel); +- break; +- } +- +- target = d->target; +- /* If temp op registers are requested in previous if branch, then use temp +- register intead of original one. */ +- if (use_alt_op) +- { +- op0 = op0_alt != NULL_RTX ? op0_alt : d->op0; +- op1 = op1_alt != NULL_RTX ? op1_alt : d->op1; +- } +- else +- { +- op0 = d->op0; +- op1 = d->one_vector_p ? d->op0 : d->op1; +- } +- +- /* We FINALLY can generate xvshuf.* insn. */ +- switch (d->vmode) +- { +- case E_V4DFmode: +- emit_insn (gen_lasx_xvshuf_d_f (target, target, op1, op0)); +- break; +- case E_V4DImode: +- emit_insn (gen_lasx_xvshuf_d (target, target, op1, op0)); +- break; +- case E_V8SFmode: +- emit_insn (gen_lasx_xvshuf_w_f (target, target, op1, op0)); +- break; +- case E_V8SImode: +- emit_insn (gen_lasx_xvshuf_w (target, target, op1, op0)); +- break; +- case E_V16HImode: +- emit_insn (gen_lasx_xvshuf_h (target, target, op1, op0)); +- break; +- case E_V32QImode: +- emit_insn (gen_lasx_xvshuf_b (target, op1, op0, target)); +- break; +- default: +- gcc_unreachable (); +- break; +- } + +- /* Extra insn for swapping the hi/lo 128bit of target vector register. */ +- if (reverse_hi_lo) +- { +- switch (d->vmode) +- { +- case E_V4DFmode: +- emit_insn (gen_lasx_xvpermi_q_v4df (d->target, d->target, +- d->target, GEN_INT (0x1))); +- break; +- case E_V4DImode: +- emit_insn (gen_lasx_xvpermi_q_v4di (d->target, d->target, +- d->target, GEN_INT (0x1))); +- break; +- case E_V8SFmode: +- emit_insn (gen_lasx_xvpermi_q_v8sf (d->target, d->target, +- d->target, GEN_INT (0x1))); +- break; +- case E_V8SImode: +- emit_insn (gen_lasx_xvpermi_q_v8si (d->target, d->target, +- d->target, GEN_INT (0x1))); +- break; +- case E_V16HImode: +- emit_insn (gen_lasx_xvpermi_q_v16hi (d->target, d->target, +- d->target, GEN_INT (0x1))); +- break; +- case E_V32QImode: +- emit_insn (gen_lasx_xvpermi_q_v32qi (d->target, d->target, +- d->target, GEN_INT (0x1))); +- break; +- default: +- break; ++ return true; + } + } +- /* Extra insn required by odd/even extraction. Swapping the second and third +- 64bit in target vector register. */ +- else if (extract_ev_od) +- { +- rtx converted = simplify_gen_subreg (E_V4DImode, d->target, d->vmode, 0); +- emit_insn (gen_lasx_xvpermi_d_v4di (converted, converted, +- GEN_INT (0xD8))); +- } + +-expand_perm_const_2_end: +- return ok; ++ return false; + } + + /* Implement TARGET_VECTORIZE_VEC_PERM_CONST. */ +@@ -10340,25 +9458,19 @@ loongarch_vectorize_vec_perm_const (machine_mode vmode, + if (!d.one_vector_p) + d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3); + +- ok = loongarch_expand_vec_perm_const_2 (&d); +- if (ok) +- return ok; +- + start_sequence (); +- ok = loongarch_expand_vec_perm_const_1 (&d); ++ ok = loongarch_expand_vec_perm_const (&d); + end_sequence (); + return ok; + } + +- ok = loongarch_expand_vec_perm_const_2 (&d); +- if (!ok) +- ok = loongarch_expand_vec_perm_const_1 (&d); ++ ok = loongarch_expand_vec_perm_const (&d); + + /* If we were given a two-vector permutation which just happened to + have both input vectors equal, we folded this into a one-vector + permutation. There are several loongson patterns that are matched + via direct vec_select+vec_concat expansion, but we do not have +- support in loongarch_expand_vec_perm_const_1 to guess the adjustment ++ support in loongarch_expand_vec_perm_const to guess the adjustment + that should be made for a single operand. Just try again with + the original permutation. */ + if (!ok && which == 3) +@@ -10367,7 +9479,7 @@ loongarch_vectorize_vec_perm_const (machine_mode vmode, + d.op1 = op1; + d.one_vector_p = false; + memcpy (d.perm, orig_perm, MAX_VECT_LEN); +- ok = loongarch_expand_vec_perm_const_1 (&d); ++ ok = loongarch_expand_vec_perm_const (&d); + } + + return ok; +-- +2.43.0 +
View file
_service:tar_scm:0095-LoongArch-testsuite-Fix-FAIL-in-lasx-xvstelm.c-file.patch
Added
@@ -0,0 +1,34 @@ +From 6263acd411b9685ebc7b16d19b91aad39cb7e184 Mon Sep 17 00:00:00 2001 +From: chenxiaolong <chenxiaolong@loongson.cn> +Date: Fri, 29 Dec 2023 09:45:15 +0800 +Subject: PATCH 095/188 LoongArch: testsuite:Fix FAIL in lasx-xvstelm.c file. + +After implementing the cost model on the LoongArch architecture, the GCC +compiler code has this feature turned on by default, which causes the +lasx-xvstelm.c file test to fail. Through analysis, this test case can +generate vectorization instructions required for detection only after +disabling the functionality of the cost model with the "-fno-vect-cost-model" +compilation option. + +gcc/testsuite/ChangeLog: + + * gcc.target/loongarch/vector/lasx/lasx-xvstelm.c:Add compile + option "-fno-vect-cost-model" to dg-options. +--- + gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvstelm.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvstelm.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvstelm.c +index 1a7b0e86f..4b846204a 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvstelm.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvstelm.c +@@ -1,5 +1,5 @@ + /* { dg-do compile } */ +-/* { dg-options "-O3 -mlasx" } */ ++/* { dg-options "-O3 -mlasx -fno-vect-cost-model" } */ + /* { dg-final { scan-assembler-times "xvstelm.w" 8} } */ + + #define LEN 256 +-- +2.43.0 +
View file
_service:tar_scm:0096-LoongArch-testsuite-Modify-the-test-behavior-of-the-.patch
Added
@@ -0,0 +1,47 @@ +From c21f2c7e6c2385a3783977bbca79ebe178d0d141 Mon Sep 17 00:00:00 2001 +From: chenxiaolong <chenxiaolong@loongson.cn> +Date: Fri, 5 Jan 2024 11:43:24 +0800 +Subject: PATCH 096/188 LoongArch: testsuite:Modify the test behavior of the + vect-bic-bitmask-{12, 23}.c file. + +Before modifying the test behavior of the program, dg-do is set to assemble in +vect-bic-bitmask-{12,23}.c. However, when the binutils library does not support +the vector instruction set, it will FAIL to recognize the vector instruction +and fail item will appear in the assembly stage. So set the program's dg-do to +compile. + +gcc/testsuite/ChangeLog: + + * gcc.dg/vect/vect-bic-bitmask-12.c: Change the default + setting of assembly to compile. + * gcc.dg/vect/vect-bic-bitmask-23.c: Dito. +--- + gcc/testsuite/gcc.dg/vect/vect-bic-bitmask-12.c | 2 +- + gcc/testsuite/gcc.dg/vect/vect-bic-bitmask-23.c | 2 +- + 2 files changed, 2 insertions(+), 2 deletions(-) + +diff --git a/gcc/testsuite/gcc.dg/vect/vect-bic-bitmask-12.c b/gcc/testsuite/gcc.dg/vect/vect-bic-bitmask-12.c +index 36ec5a8b1..213e4c2a4 100644 +--- a/gcc/testsuite/gcc.dg/vect/vect-bic-bitmask-12.c ++++ b/gcc/testsuite/gcc.dg/vect/vect-bic-bitmask-12.c +@@ -1,5 +1,5 @@ + /* { dg-skip-if "missing optab for vectorization" { sparc*-*-* } } */ +-/* { dg-do assemble } */ ++/* { dg-do compile } */ + /* { dg-additional-options "-O3 -fdump-tree-dce -w" } */ + + #include <stdint.h> +diff --git a/gcc/testsuite/gcc.dg/vect/vect-bic-bitmask-23.c b/gcc/testsuite/gcc.dg/vect/vect-bic-bitmask-23.c +index 5b4c3b6e1..5dceb4bbc 100644 +--- a/gcc/testsuite/gcc.dg/vect/vect-bic-bitmask-23.c ++++ b/gcc/testsuite/gcc.dg/vect/vect-bic-bitmask-23.c +@@ -1,5 +1,5 @@ + /* { dg-skip-if "missing optab for vectorization" { sparc*-*-* } } */ +-/* { dg-do assemble } */ ++/* { dg-do compile } */ + /* { dg-additional-options "-O1 -fdump-tree-dce -w" } */ + + #include <stdint.h> +-- +2.43.0 +
View file
_service:tar_scm:0097-Improve-non-loop-disambiguation.patch
Added
@@ -0,0 +1,101 @@ +From 6de2e0d400cbe46da482a672810c37b1832c408c Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?=E9=83=91=E6=99=A8=E5=8D=89?= <zhengchenhui1@huawei.com> +Date: Thu, 25 Jul 2024 19:45:43 +0800 +Subject: PATCH Improve non-loop disambiguation + +This optimization is brought from https://gcc.gnu.org/git/gitweb.cgi?p=gcc.git;h=038b077689bb5310386b04d40a2cea234f01e6aa. + +When dr_may_alias_p is called without a loop context, it tries +to use the tree-affine interface to calculate the difference +between the two addresses and use that difference to check whether +the gap between the accesses is known at compile time. However, as the +example in the PR shows, this doesn't expand SSA_NAMEs and so can easily +be defeated by things like reassociation. + +One fix would have been to use aff_combination_expand to expand the +SSA_NAMEs, but we'd then need some way of maintaining the associated +cache. This patch instead reuses the innermost_loop_behavior fields +(which exist even when no loop context is provided). + +It might still be useful to do the aff_combination_expand thing too, +if an example turns out to need it. +--- + gcc/common.opt | 4 ++++ + gcc/testsuite/gcc.dg/vect/bb-slp-pr106019.c | 16 +++++++++++++++ + gcc/tree-data-ref.cc | 22 +++++++++++++++++++++ + 3 files changed, 42 insertions(+) + create mode 100644 gcc/testsuite/gcc.dg/vect/bb-slp-pr106019.c + +diff --git a/gcc/common.opt b/gcc/common.opt +index b18f0b944..75bf9c9c1 100644 +--- a/gcc/common.opt ++++ b/gcc/common.opt +@@ -3217,6 +3217,10 @@ ftree-loop-vectorize + Common Var(flag_tree_loop_vectorize) Optimization EnabledBy(ftree-vectorize) + Enable loop vectorization on trees. + ++falias-analysis-expand-ssa ++Common Var(flag_alias_analysis_expand_ssa) Init(0) ++Enable expanded SSA name analysis during alias analysis. ++ + ftree-slp-vectorize + Common Var(flag_tree_slp_vectorize) Optimization EnabledBy(ftree-vectorize) + Enable basic block vectorization (SLP) on trees. +diff --git a/gcc/testsuite/gcc.dg/vect/bb-slp-pr106019.c b/gcc/testsuite/gcc.dg/vect/bb-slp-pr106019.c +new file mode 100644 +index 000000000..5ff8a8a62 +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/vect/bb-slp-pr106019.c +@@ -0,0 +1,16 @@ ++/* { dg-do compile } */ ++/* { dg-additional-options "-falias-analysis-expand-ssa" } */ ++ ++void f(double *p, long i) ++{ ++ pi+0 += 1; ++ pi+1 += 1; ++} ++void g(double *p, long i) ++{ ++ double *q = p + i; ++ q0 += 1; ++ q1 += 1; ++} ++ ++/* { dg-final { scan-tree-dump-not "can't determine dependence" slp2 } } */ +diff --git a/gcc/tree-data-ref.cc b/gcc/tree-data-ref.cc +index e6ae9e847..a05073c51 100644 +--- a/gcc/tree-data-ref.cc ++++ b/gcc/tree-data-ref.cc +@@ -2993,6 +2993,28 @@ dr_may_alias_p (const struct data_reference *a, const struct data_reference *b, + disambiguation. */ + if (!loop_nest) + { ++ if (flag_alias_analysis_expand_ssa) ++ { ++ tree tree_size_a = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (a))); ++ tree tree_size_b = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (b))); ++ ++ if (DR_BASE_ADDRESS (a) ++ && DR_BASE_ADDRESS (b) ++ && operand_equal_p (DR_BASE_ADDRESS (a), DR_BASE_ADDRESS (b)) ++ && operand_equal_p (DR_OFFSET (a), DR_OFFSET (b)) ++ && poly_int_tree_p (tree_size_a) ++ && poly_int_tree_p (tree_size_b) ++ && !ranges_maybe_overlap_p (wi::to_widest (DR_INIT (a)), ++ wi::to_widest (tree_size_a), ++ wi::to_widest (DR_INIT (b)), ++ wi::to_widest (tree_size_b))) ++ { ++ gcc_assert (integer_zerop (DR_STEP (a)) ++ && integer_zerop (DR_STEP (b))); ++ return false; ++ } ++ } ++ + aff_tree off1, off2; + poly_widest_int size1, size2; + get_inner_reference_aff (DR_REF (a), &off1, &size1); +-- +2.33.0 +
View file
_service:tar_scm:0097-LoongArch-testsuite-Delete-the-default-run-behavior-.patch
Added
@@ -0,0 +1,31 @@ +From cdee2d1e7391d95bf6fd471fddcb86ee81247929 Mon Sep 17 00:00:00 2001 +From: chenxiaolong <chenxiaolong@loongson.cn> +Date: Fri, 5 Jan 2024 11:43:27 +0800 +Subject: PATCH 097/188 LoongArch: testsuite:Delete the default run behavior + in pr60510.f. + +When binutils does not support vector instruction sets, the test program fails +because it does not recognize vectorization at the assembly stage. Therefore, +the default run behavior of the program is deleted, so that the behavior of +the program depends on whether the software supports vectorization. + +gcc/testsuite/ChangeLog: + + * gfortran.dg/vect/pr60510.f: Delete the default behavior of the + program. +--- + gcc/testsuite/gfortran.dg/vect/pr60510.f | 1 - + 1 file changed, 1 deletion(-) + +diff --git a/gcc/testsuite/gfortran.dg/vect/pr60510.f b/gcc/testsuite/gfortran.dg/vect/pr60510.f +index ecd50dd55..c1e11b27d 100644 +--- a/gcc/testsuite/gfortran.dg/vect/pr60510.f ++++ b/gcc/testsuite/gfortran.dg/vect/pr60510.f +@@ -1,4 +1,3 @@ +-! { dg-do run } + ! { dg-require-effective-target vect_double } + ! { dg-require-effective-target vect_intdouble_cvt } + ! { dg-additional-options "-fno-inline -ffast-math" } +-- +2.43.0 +
View file
_service:tar_scm:0097-aarch64-Use-local-frame-vars-in-shrink-wrapping-code.patch
Deleted
@@ -1,378 +0,0 @@ -From 62fbb215cc817e9f2c1ca80282a64f4ee30806bc Mon Sep 17 00:00:00 2001 -From: Richard Sandiford <richard.sandiford@arm.com> -Date: Tue, 12 Sep 2023 16:08:48 +0100 -Subject: PATCH aarch64: Use local frame vars in shrink-wrapping code - -aarch64_layout_frame uses a shorthand for referring to -cfun->machine->frame: - - aarch64_frame &frame = cfun->machine->frame; - -This patch does the same for some other heavy users of the structure. -No functional change intended. - -gcc/ - * config/aarch64/aarch64.cc (aarch64_save_callee_saves): Use - a local shorthand for cfun->machine->frame. - (aarch64_restore_callee_saves, aarch64_get_separate_components): - (aarch64_process_components): Likewise. - (aarch64_allocate_and_probe_stack_space): Likewise. - (aarch64_expand_prologue, aarch64_expand_epilogue): Likewise. - (aarch64_layout_frame): Use existing shorthand for one more case. ---- - gcc/config/aarch64/aarch64.cc | 123 ++++++++++++++++++---------------- - 1 file changed, 64 insertions(+), 59 deletions(-) - -diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc -index 226dc9dffd47..ae42ffdedbeb 100644 ---- a/gcc/config/aarch64/aarch64.cc -+++ b/gcc/config/aarch64/aarch64.cc -@@ -8351,7 +8351,7 @@ aarch64_layout_frame (void) - frame.is_scs_enabled - = (!crtl->calls_eh_return - && sanitize_flags_p (SANITIZE_SHADOW_CALL_STACK) -- && known_ge (cfun->machine->frame.reg_offsetLR_REGNUM, 0)); -+ && known_ge (frame.reg_offsetLR_REGNUM, 0)); - - /* When shadow call stack is enabled, the scs_pop in the epilogue will - restore x30, and we don't need to pop x30 again in the traditional -@@ -8763,6 +8763,7 @@ aarch64_save_callee_saves (poly_int64 start_offset, - unsigned start, unsigned limit, bool skip_wb, - bool hard_fp_valid_p) - { -+ aarch64_frame &frame = cfun->machine->frame; - rtx_insn *insn; - unsigned regno; - unsigned regno2; -@@ -8777,8 +8778,8 @@ aarch64_save_callee_saves (poly_int64 start_offset, - bool frame_related_p = aarch64_emit_cfi_for_reg_p (regno); - - if (skip_wb -- && (regno == cfun->machine->frame.wb_push_candidate1 -- || regno == cfun->machine->frame.wb_push_candidate2)) -+ && (regno == frame.wb_push_candidate1 -+ || regno == frame.wb_push_candidate2)) - continue; - - if (cfun->machine->reg_is_wrapped_separatelyregno) -@@ -8786,7 +8787,7 @@ aarch64_save_callee_saves (poly_int64 start_offset, - - machine_mode mode = aarch64_reg_save_mode (regno); - reg = gen_rtx_REG (mode, regno); -- offset = start_offset + cfun->machine->frame.reg_offsetregno; -+ offset = start_offset + frame.reg_offsetregno; - rtx base_rtx = stack_pointer_rtx; - poly_int64 sp_offset = offset; - -@@ -8799,7 +8800,7 @@ aarch64_save_callee_saves (poly_int64 start_offset, - { - gcc_assert (known_eq (start_offset, 0)); - poly_int64 fp_offset -- = cfun->machine->frame.below_hard_fp_saved_regs_size; -+ = frame.below_hard_fp_saved_regs_size; - if (hard_fp_valid_p) - base_rtx = hard_frame_pointer_rtx; - else -@@ -8821,8 +8822,7 @@ aarch64_save_callee_saves (poly_int64 start_offset, - && (regno2 = aarch64_next_callee_save (regno + 1, limit)) <= limit - && !cfun->machine->reg_is_wrapped_separatelyregno2 - && known_eq (GET_MODE_SIZE (mode), -- cfun->machine->frame.reg_offsetregno2 -- - cfun->machine->frame.reg_offsetregno)) -+ frame.reg_offsetregno2 - frame.reg_offsetregno)) - { - rtx reg2 = gen_rtx_REG (mode, regno2); - rtx mem2; -@@ -8872,6 +8872,7 @@ static void - aarch64_restore_callee_saves (poly_int64 start_offset, unsigned start, - unsigned limit, bool skip_wb, rtx *cfi_ops) - { -+ aarch64_frame &frame = cfun->machine->frame; - unsigned regno; - unsigned regno2; - poly_int64 offset; -@@ -8888,13 +8889,13 @@ aarch64_restore_callee_saves (poly_int64 start_offset, unsigned start, - rtx reg, mem; - - if (skip_wb -- && (regno == cfun->machine->frame.wb_pop_candidate1 -- || regno == cfun->machine->frame.wb_pop_candidate2)) -+ && (regno == frame.wb_pop_candidate1 -+ || regno == frame.wb_pop_candidate2)) - continue; - - machine_mode mode = aarch64_reg_save_mode (regno); - reg = gen_rtx_REG (mode, regno); -- offset = start_offset + cfun->machine->frame.reg_offsetregno; -+ offset = start_offset + frame.reg_offsetregno; - rtx base_rtx = stack_pointer_rtx; - if (mode == VNx2DImode && BYTES_BIG_ENDIAN) - aarch64_adjust_sve_callee_save_base (mode, base_rtx, anchor_reg, -@@ -8905,8 +8906,7 @@ aarch64_restore_callee_saves (poly_int64 start_offset, unsigned start, - && (regno2 = aarch64_next_callee_save (regno + 1, limit)) <= limit - && !cfun->machine->reg_is_wrapped_separatelyregno2 - && known_eq (GET_MODE_SIZE (mode), -- cfun->machine->frame.reg_offsetregno2 -- - cfun->machine->frame.reg_offsetregno)) -+ frame.reg_offsetregno2 - frame.reg_offsetregno)) - { - rtx reg2 = gen_rtx_REG (mode, regno2); - rtx mem2; -@@ -9011,6 +9011,7 @@ offset_12bit_unsigned_scaled_p (machine_mode mode, poly_int64 offset) - static sbitmap - aarch64_get_separate_components (void) - { -+ aarch64_frame &frame = cfun->machine->frame; - sbitmap components = sbitmap_alloc (LAST_SAVED_REGNUM + 1); - bitmap_clear (components); - -@@ -9027,18 +9028,18 @@ aarch64_get_separate_components (void) - if (mode == VNx2DImode && BYTES_BIG_ENDIAN) - continue; - -- poly_int64 offset = cfun->machine->frame.reg_offsetregno; -+ poly_int64 offset = frame.reg_offsetregno; - - /* If the register is saved in the first SVE save slot, we use - it as a stack probe for -fstack-clash-protection. */ - if (flag_stack_clash_protection -- && maybe_ne (cfun->machine->frame.below_hard_fp_saved_regs_size, 0) -+ && maybe_ne (frame.below_hard_fp_saved_regs_size, 0) - && known_eq (offset, 0)) - continue; - - /* Get the offset relative to the register we'll use. */ - if (frame_pointer_needed) -- offset -= cfun->machine->frame.below_hard_fp_saved_regs_size; -+ offset -= frame.below_hard_fp_saved_regs_size; - else - offset += crtl->outgoing_args_size; - -@@ -9057,11 +9058,11 @@ aarch64_get_separate_components (void) - /* If the spare predicate register used by big-endian SVE code - is call-preserved, it must be saved in the main prologue - before any saves that use it. */ -- if (cfun->machine->frame.spare_pred_reg != INVALID_REGNUM) -- bitmap_clear_bit (components, cfun->machine->frame.spare_pred_reg); -+ if (frame.spare_pred_reg != INVALID_REGNUM) -+ bitmap_clear_bit (components, frame.spare_pred_reg); - -- unsigned reg1 = cfun->machine->frame.wb_push_candidate1; -- unsigned reg2 = cfun->machine->frame.wb_push_candidate2; -+ unsigned reg1 = frame.wb_push_candidate1; -+ unsigned reg2 = frame.wb_push_candidate2; - /* If registers have been chosen to be stored/restored with - writeback don't interfere with them to avoid having to output explicit - stack adjustment instructions. */ -@@ -9170,6 +9171,7 @@ aarch64_get_next_set_bit (sbitmap bmp, unsigned int start) - static void - aarch64_process_components (sbitmap components, bool prologue_p) - { -+ aarch64_frame &frame = cfun->machine->frame; - rtx ptr_reg = gen_rtx_REG (Pmode, frame_pointer_needed - ? HARD_FRAME_POINTER_REGNUM - : STACK_POINTER_REGNUM); -@@ -9184,9 +9186,9 @@ aarch64_process_components (sbitmap components, bool prologue_p) - machine_mode mode = aarch64_reg_save_mode (regno); - - rtx reg = gen_rtx_REG (mode, regno); -- poly_int64 offset = cfun->machine->frame.reg_offsetregno; -+ poly_int64 offset = frame.reg_offsetregno; - if (frame_pointer_needed) -- offset -= cfun->machine->frame.below_hard_fp_saved_regs_size; -+ offset -= frame.below_hard_fp_saved_regs_size; - else - offset += crtl->outgoing_args_size; - -@@ -9211,14 +9213,14 @@ aarch64_process_components (sbitmap components, bool prologue_p) - break; - } - -- poly_int64 offset2 = cfun->machine->frame.reg_offsetregno2; -+ poly_int64 offset2 = frame.reg_offsetregno2; - /* The next register is not of the same class or its offset is not - mergeable with the current one into a pair. */ - if (aarch64_sve_mode_p (mode) - || !satisfies_constraint_Ump (mem) - || GP_REGNUM_P (regno) != GP_REGNUM_P (regno2) - || (crtl->abi->id () == ARM_PCS_SIMD && FP_REGNUM_P (regno)) -- || maybe_ne ((offset2 - cfun->machine->frame.reg_offsetregno), -+ || maybe_ne ((offset2 - frame.reg_offsetregno), - GET_MODE_SIZE (mode))) - { - insn = emit_insn (set); -@@ -9240,7 +9242,7 @@ aarch64_process_components (sbitmap components, bool prologue_p) - /* REGNO2 can be saved/restored in a pair with REGNO. */ - rtx reg2 = gen_rtx_REG (mode, regno2); - if (frame_pointer_needed) -- offset2 -= cfun->machine->frame.below_hard_fp_saved_regs_size; -+ offset2 -= frame.below_hard_fp_saved_regs_size; - else - offset2 += crtl->outgoing_args_size; - rtx addr2 = plus_constant (Pmode, ptr_reg, offset2); -@@ -9335,6 +9337,7 @@ aarch64_allocate_and_probe_stack_space (rtx temp1, rtx temp2, - bool frame_related_p, - bool final_adjustment_p) - { -+ aarch64_frame &frame = cfun->machine->frame; - HOST_WIDE_INT guard_size - = 1 << param_stack_clash_protection_guard_size; - HOST_WIDE_INT guard_used_by_caller = STACK_CLASH_CALLER_GUARD; -@@ -9355,25 +9358,25 @@ aarch64_allocate_and_probe_stack_space (rtx temp1, rtx temp2, - register as a probe. We can't assume that LR was saved at position 0 - though, so treat any space below it as unprobed. */ - if (final_adjustment_p -- && known_eq (cfun->machine->frame.below_hard_fp_saved_regs_size, 0)) -+ && known_eq (frame.below_hard_fp_saved_regs_size, 0)) - { -- poly_int64 lr_offset = cfun->machine->frame.reg_offsetLR_REGNUM; -+ poly_int64 lr_offset = frame.reg_offsetLR_REGNUM; - if (known_ge (lr_offset, 0)) - min_probe_threshold -= lr_offset.to_constant (); - else - gcc_assert (!flag_stack_clash_protection || known_eq (poly_size, 0)); - } - -- poly_int64 frame_size = cfun->machine->frame.frame_size; -+ poly_int64 frame_size = frame.frame_size; - - /* We should always have a positive probe threshold. */ - gcc_assert (min_probe_threshold > 0); - - if (flag_stack_clash_protection && !final_adjustment_p) - { -- poly_int64 initial_adjust = cfun->machine->frame.initial_adjust; -- poly_int64 sve_callee_adjust = cfun->machine->frame.sve_callee_adjust; -- poly_int64 final_adjust = cfun->machine->frame.final_adjust; -+ poly_int64 initial_adjust = frame.initial_adjust; -+ poly_int64 sve_callee_adjust = frame.sve_callee_adjust; -+ poly_int64 final_adjust = frame.final_adjust; - - if (known_eq (frame_size, 0)) - { -@@ -9662,17 +9665,18 @@ aarch64_epilogue_uses (int regno) - void - aarch64_expand_prologue (void) - { -- poly_int64 frame_size = cfun->machine->frame.frame_size; -- poly_int64 initial_adjust = cfun->machine->frame.initial_adjust; -- HOST_WIDE_INT callee_adjust = cfun->machine->frame.callee_adjust; -- poly_int64 final_adjust = cfun->machine->frame.final_adjust; -- poly_int64 callee_offset = cfun->machine->frame.callee_offset; -- poly_int64 sve_callee_adjust = cfun->machine->frame.sve_callee_adjust; -+ aarch64_frame &frame = cfun->machine->frame; -+ poly_int64 frame_size = frame.frame_size; -+ poly_int64 initial_adjust = frame.initial_adjust; -+ HOST_WIDE_INT callee_adjust = frame.callee_adjust; -+ poly_int64 final_adjust = frame.final_adjust; -+ poly_int64 callee_offset = frame.callee_offset; -+ poly_int64 sve_callee_adjust = frame.sve_callee_adjust; - poly_int64 below_hard_fp_saved_regs_size -- = cfun->machine->frame.below_hard_fp_saved_regs_size; -- unsigned reg1 = cfun->machine->frame.wb_push_candidate1; -- unsigned reg2 = cfun->machine->frame.wb_push_candidate2; -- bool emit_frame_chain = cfun->machine->frame.emit_frame_chain; -+ = frame.below_hard_fp_saved_regs_size; -+ unsigned reg1 = frame.wb_push_candidate1; -+ unsigned reg2 = frame.wb_push_candidate2; -+ bool emit_frame_chain = frame.emit_frame_chain; - rtx_insn *insn; - - if (flag_stack_clash_protection && known_eq (callee_adjust, 0)) -@@ -9703,7 +9707,7 @@ aarch64_expand_prologue (void) - } - - /* Push return address to shadow call stack. */ -- if (cfun->machine->frame.is_scs_enabled) -+ if (frame.is_scs_enabled) - emit_insn (gen_scs_push ()); - - if (flag_stack_usage_info) -@@ -9742,7 +9746,7 @@ aarch64_expand_prologue (void) - - /* The offset of the frame chain record (if any) from the current SP. */ - poly_int64 chain_offset = (initial_adjust + callee_adjust -- - cfun->machine->frame.hard_fp_offset); -+ - frame.hard_fp_offset); - gcc_assert (known_ge (chain_offset, 0)); - - /* The offset of the bottom of the save area from the current SP. */ -@@ -9845,16 +9849,17 @@ aarch64_use_return_insn_p (void) - void - aarch64_expand_epilogue (bool for_sibcall) - { -- poly_int64 initial_adjust = cfun->machine->frame.initial_adjust; -- HOST_WIDE_INT callee_adjust = cfun->machine->frame.callee_adjust; -- poly_int64 final_adjust = cfun->machine->frame.final_adjust; -- poly_int64 callee_offset = cfun->machine->frame.callee_offset; -- poly_int64 sve_callee_adjust = cfun->machine->frame.sve_callee_adjust; -+ aarch64_frame &frame = cfun->machine->frame; -+ poly_int64 initial_adjust = frame.initial_adjust; -+ HOST_WIDE_INT callee_adjust = frame.callee_adjust; -+ poly_int64 final_adjust = frame.final_adjust; -+ poly_int64 callee_offset = frame.callee_offset; -+ poly_int64 sve_callee_adjust = frame.sve_callee_adjust; - poly_int64 below_hard_fp_saved_regs_size -- = cfun->machine->frame.below_hard_fp_saved_regs_size; -- unsigned reg1 = cfun->machine->frame.wb_pop_candidate1; -- unsigned reg2 = cfun->machine->frame.wb_pop_candidate2; -- unsigned int last_gpr = (cfun->machine->frame.is_scs_enabled -+ = frame.below_hard_fp_saved_regs_size; -+ unsigned reg1 = frame.wb_pop_candidate1; -+ unsigned reg2 = frame.wb_pop_candidate2; -+ unsigned int last_gpr = (frame.is_scs_enabled - ? R29_REGNUM : R30_REGNUM); - rtx cfi_ops = NULL; - rtx_insn *insn; -@@ -9888,7 +9893,7 @@ aarch64_expand_epilogue (bool for_sibcall) - /* We need to add memory barrier to prevent read from deallocated stack. */ - bool need_barrier_p - = maybe_ne (get_frame_size () -- + cfun->machine->frame.saved_varargs_size, 0); -+ + frame.saved_varargs_size, 0); - - /* Emit a barrier to prevent loads from a deallocated stack. */ - if (maybe_gt (final_adjust, crtl->outgoing_args_size) -@@ -9969,7 +9974,7 @@ aarch64_expand_epilogue (bool for_sibcall) - } - - /* Pop return address from shadow call stack. */ -- if (cfun->machine->frame.is_scs_enabled) -+ if (frame.is_scs_enabled) - { - machine_mode mode = aarch64_reg_save_mode (R30_REGNUM); - rtx reg = gen_rtx_REG (mode, R30_REGNUM); -@@ -12564,24 +12569,24 @@ aarch64_can_eliminate (const int from ATTRIBUTE_UNUSED, const int to) - poly_int64 - aarch64_initial_elimination_offset (unsigned from, unsigned to) - { -+ aarch64_frame &frame = cfun->machine->frame; -+ - if (to == HARD_FRAME_POINTER_REGNUM) - { - if (from == ARG_POINTER_REGNUM) -- return cfun->machine->frame.hard_fp_offset; -+ return frame.hard_fp_offset; - - if (from == FRAME_POINTER_REGNUM) -- return cfun->machine->frame.hard_fp_offset -- - cfun->machine->frame.locals_offset; -+ return frame.hard_fp_offset - frame.locals_offset; - } - - if (to == STACK_POINTER_REGNUM) - { - if (from == FRAME_POINTER_REGNUM) -- return cfun->machine->frame.frame_size -- - cfun->machine->frame.locals_offset; -+ return frame.frame_size - frame.locals_offset; - } - -- return cfun->machine->frame.frame_size; -+ return frame.frame_size; - } - - --- -2.43.5 -
View file
_service:tar_scm:0098-CHREC-multiplication-and-undefined-overflow.patch
Added
@@ -0,0 +1,265 @@ +From c4e4fef145c1e402f0558cc35f6c1ed0a08beffb Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?=E9=83=91=E6=99=A8=E5=8D=89?= <zhengchenhui1@huawei.com> +Date: Thu, 25 Jul 2024 20:16:52 +0800 +Subject: PATCH CHREC multiplication and undefined overflow + +This optimization is brought from https://gcc.gnu.org/pipermail/gcc-patches/2024-February/646531.html + +When folding a multiply CHRECs are handled like {a, +, b} * c +is {a*c, +, b*c} but that isn't generally correct when overflow +invokes undefined behavior. The following uses unsigned arithmetic +unless either a is zero or a and b have the same sign. + +I've used simple early outs for INTEGER_CSTs and otherwise use +a range-query since we lack a tree_expr_nonpositive_p and +get_range_pos_neg isn't a good fit. +--- + gcc/common.opt | 4 ++ + gcc/testsuite/gcc.dg/pr68317.c | 6 +- + gcc/testsuite/gcc.dg/torture/pr114074.c | 31 ++++++++++ + gcc/tree-chrec.cc | 81 +++++++++++++++++++++---- + gcc/tree-chrec.h | 2 +- + gcc/value-range.cc | 12 ++++ + gcc/value-range.h | 2 + + 7 files changed, 123 insertions(+), 15 deletions(-) + create mode 100644 gcc/testsuite/gcc.dg/torture/pr114074.c + +diff --git a/gcc/common.opt b/gcc/common.opt +index b18f0b944..d3af3ba39 100644 +--- a/gcc/common.opt ++++ b/gcc/common.opt +@@ -1771,6 +1771,10 @@ floop-interchange + Common Var(flag_loop_interchange) Optimization + Enable loop interchange on trees. + ++fchrec-mul-fold-strict-overflow ++Common Var(flag_chrec_mul_fold_strict_overflow) Init(0) ++Enable strict overflow handling during constant folding of multiply CHRECs. ++ + floop-block + Common Alias(floop-nest-optimize) + Enable loop nest transforms. Same as -floop-nest-optimize. +diff --git a/gcc/testsuite/gcc.dg/pr68317.c b/gcc/testsuite/gcc.dg/pr68317.c +index bd053a752..671a67d95 100644 +--- a/gcc/testsuite/gcc.dg/pr68317.c ++++ b/gcc/testsuite/gcc.dg/pr68317.c +@@ -1,5 +1,5 @@ + /* { dg-do compile } */ +-/* { dg-options "-O2 -fdisable-tree-ethread" } */ ++/* { dg-options "-O2 -fdisable-tree-ethread -fchrec-mul-fold-strict-overflow" } */ + + /* Note: Threader will collapse loop. */ + +@@ -12,8 +12,8 @@ foo () + { + int32_t index = 0; + +- for (index; index <= 10; index--) // expected warning here ++ for (index; index <= 10; index--) /* { dg-warning "iteration \0-9\+ invokes undefined behavior" } */ + /* Result of the following multiply will overflow + when converted to signed int32_t. */ +- bar ((0xcafe + index) * 0xdead); /* { dg-warning "iteration \0-9\+ invokes undefined behavior" } */ ++ bar ((0xcafe + index) * 0xdead); + } +diff --git a/gcc/testsuite/gcc.dg/torture/pr114074.c b/gcc/testsuite/gcc.dg/torture/pr114074.c +new file mode 100644 +index 000000000..9a383d8fc +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/torture/pr114074.c +@@ -0,0 +1,31 @@ ++/* { dg-do run } */ ++<<<<<<< HEAD ++/* { dg-options "-fchrec-mul-fold-strict-overflow" } */ ++======= ++/* { dg-options "-fchrec-mul-fold-strict-overflow"" } */ ++>>>>>>> 47092575e7696f5a21cf75284fe3d4feb0c813ab ++int a, b, d; ++ ++__attribute__((noipa)) void ++foo (void) ++{ ++ ++d; ++} ++ ++int ++main () ++{ ++ for (a = 0; a > -3; a -= 2) ++ { ++ int c = a; ++ b = __INT_MAX__ - 3000; ++ a = ~c * b; ++ foo (); ++ if (!a) ++ break; ++ a = c; ++ } ++ if (d != 2) ++ __builtin_abort (); ++ return 0; ++} +diff --git a/gcc/tree-chrec.cc b/gcc/tree-chrec.cc +index c44cea754..3323901bc 100644 +--- a/gcc/tree-chrec.cc ++++ b/gcc/tree-chrec.cc +@@ -38,6 +38,8 @@ along with GCC; see the file COPYING3. If not see + #include "gimple.h" + #include "tree-ssa-loop.h" + #include "dumpfile.h" ++#include "value-range.h" ++#include "value-query.h" + #include "tree-scalar-evolution.h" + + /* Extended folder for chrecs. */ +@@ -404,6 +406,13 @@ chrec_fold_multiply (tree type, + || automatically_generated_chrec_p (op1)) + return chrec_fold_automatically_generated_operands (op0, op1); + ++ if (flag_chrec_mul_fold_strict_overflow) ++ { ++ if (TREE_CODE (op0) != POLYNOMIAL_CHREC ++ && TREE_CODE (op1) == POLYNOMIAL_CHREC) ++ std::swap (op0, op1); ++ } ++ + switch (TREE_CODE (op0)) + { + case POLYNOMIAL_CHREC: +@@ -428,10 +437,53 @@ chrec_fold_multiply (tree type, + if (integer_zerop (op1)) + return build_int_cst (type, 0); + +- return build_polynomial_chrec +- (CHREC_VARIABLE (op0), +- chrec_fold_multiply (type, CHREC_LEFT (op0), op1), +- chrec_fold_multiply (type, CHREC_RIGHT (op0), op1)); ++ if (flag_chrec_mul_fold_strict_overflow) ++ { ++ /* When overflow is undefined and CHREC_LEFT/RIGHT do not have the ++ same sign or CHREC_LEFT is zero then folding the multiply into ++ the addition does not have the same behavior on overflow. Use ++ unsigned arithmetic in that case. */ ++ value_range rl, rr; ++ if (!ANY_INTEGRAL_TYPE_P (type) ++ || TYPE_OVERFLOW_WRAPS (type) ++ || integer_zerop (CHREC_LEFT (op0)) ++ || (TREE_CODE (CHREC_LEFT (op0)) == INTEGER_CST ++ && TREE_CODE (CHREC_RIGHT (op0)) == INTEGER_CST ++ && (tree_int_cst_sgn (CHREC_LEFT (op0)) ++ == tree_int_cst_sgn (CHREC_RIGHT (op0)))) ++ || (get_range_query (cfun)->range_of_expr (rl, CHREC_LEFT (op0)) ++ && !rl.undefined_p () ++ && (rl.nonpositive_p () || rl.nonnegative_p ()) ++ && get_range_query (cfun)->range_of_expr (rr, ++ CHREC_RIGHT (op0)) ++ && !rr.undefined_p () ++ && ((rl.nonpositive_p () && rr.nonpositive_p ()) ++ || (rl.nonnegative_p () && rr.nonnegative_p ())))) ++ { ++ tree left = chrec_fold_multiply (type, CHREC_LEFT (op0), op1); ++ tree right = chrec_fold_multiply (type, CHREC_RIGHT (op0), op1); ++ return build_polynomial_chrec (CHREC_VARIABLE (op0), left, right); ++ } ++ else ++ { ++ tree utype = unsigned_type_for (type); ++ tree uop1 = chrec_convert_rhs (utype, op1); ++ tree uleft0 = chrec_convert_rhs (utype, CHREC_LEFT (op0)); ++ tree uright0 = chrec_convert_rhs (utype, CHREC_RIGHT (op0)); ++ tree left = chrec_fold_multiply (utype, uleft0, uop1); ++ tree right = chrec_fold_multiply (utype, uright0, uop1); ++ tree tem = build_polynomial_chrec (CHREC_VARIABLE (op0), ++ left, right); ++ return chrec_convert_rhs (type, tem); ++ } ++ } ++ else ++ { ++ return build_polynomial_chrec ++ (CHREC_VARIABLE (op0), ++ chrec_fold_multiply (type, CHREC_LEFT (op0), op1), ++ chrec_fold_multiply (type, CHREC_RIGHT (op0), op1)); ++ } + } + + CASE_CONVERT: +@@ -449,13 +501,20 @@ chrec_fold_multiply (tree type, + switch (TREE_CODE (op1)) + { + case POLYNOMIAL_CHREC: +- gcc_checking_assert +- (!chrec_contains_symbols_defined_in_loop (op1, +- CHREC_VARIABLE (op1))); +- return build_polynomial_chrec +- (CHREC_VARIABLE (op1), +- chrec_fold_multiply (type, CHREC_LEFT (op1), op0), +- chrec_fold_multiply (type, CHREC_RIGHT (op1), op0)); ++ if (flag_chrec_mul_fold_strict_overflow) ++ { ++ gcc_unreachable (); ++ } ++ else ++ { ++ gcc_checking_assert ++ (!chrec_contains_symbols_defined_in_loop (op1, ++ CHREC_VARIABLE (op1))); ++ return build_polynomial_chrec ++ (CHREC_VARIABLE (op1), ++ chrec_fold_multiply (type, CHREC_LEFT (op1), op0), ++ chrec_fold_multiply (type, CHREC_RIGHT (op1), op0)); ++ } + + CASE_CONVERT: + if (tree_contains_chrecs (op1, NULL)) +diff --git a/gcc/tree-chrec.h b/gcc/tree-chrec.h +index fcf41710d..cdc97d5d9 100644 +--- a/gcc/tree-chrec.h ++++ b/gcc/tree-chrec.h +@@ -63,7 +63,7 @@ extern tree chrec_fold_plus (tree, tree, tree); + extern tree chrec_fold_minus (tree, tree, tree); + extern tree chrec_fold_multiply (tree, tree, tree); + extern tree chrec_convert (tree, tree, gimple *, bool = true, tree = NULL); +-extern tree chrec_convert_rhs (tree, tree, gimple *); ++extern tree chrec_convert_rhs (tree, tree, gimple * = NULL); + extern tree chrec_convert_aggressive (tree, tree, bool *); + + /* Operations. */ +diff --git a/gcc/value-range.cc b/gcc/value-range.cc +index 000bbcf89..a1dc10a24 100644 +--- a/gcc/value-range.cc ++++ b/gcc/value-range.cc +@@ -656,6 +656,18 @@ irange::contains_p (tree cst) const + + return false; + } ++bool ++irange::nonnegative_p () const ++{ ++ return wi::ge_p (lower_bound (), 0, TYPE_SIGN (type ())); ++} ++ ++bool ++irange::nonpositive_p () const ++{ ++ return wi::le_p (upper_bound (), 0, TYPE_SIGN (type ())); ++} ++ + + + /* Normalize addresses into constants. */ +diff --git a/gcc/value-range.h b/gcc/value-range.h +index d4cba22d5..2dc0907de 100644 +--- a/gcc/value-range.h ++++ b/gcc/value-range.h +@@ -69,6 +69,8 @@ public: + bool varying_p () const; + bool singleton_p (tree *result = NULL) const; + bool contains_p (tree) const; ++ bool nonnegative_p () const; ++ bool nonpositive_p () const; + + // In-place operators. + void union_ (const irange &); +-- +2.33.0 +
View file
_service:tar_scm:0098-LoongArch-testsuite-Added-additional-vectorization-m.patch
Added
@@ -0,0 +1,157 @@ +From c8fa8efa3297ebced55da8a69cf44f314573be7c Mon Sep 17 00:00:00 2001 +From: chenxiaolong <chenxiaolong@loongson.cn> +Date: Fri, 5 Jan 2024 11:43:28 +0800 +Subject: PATCH 098/188 LoongArch: testsuite:Added additional vectorization + "-mlasx" compilation option. + +In the LoongArch architecture, the reason for not adding the 128-bit +vector-width-*hi* instruction template in the GCC back end is that it causes +program performance loss, so we can only add the "-mlasx" compilation option +to use 256-bit vectorization functions in test files. + +gcc/testsuite/ChangeLog: + + * gcc.dg/vect/bb-slp-pattern-1.c: If you are testing on the + LoongArch architecture, you need to add the "-mlasx" compilation + option to generate vectorized code. + * gcc.dg/vect/slp-widen-mult-half.c: Dito. + * gcc.dg/vect/vect-widen-mult-const-s16.c: Dito. + * gcc.dg/vect/vect-widen-mult-const-u16.c: Dito. + * gcc.dg/vect/vect-widen-mult-half-u8.c: Dito. + * gcc.dg/vect/vect-widen-mult-half.c: Dito. + * gcc.dg/vect/vect-widen-mult-u16.c: Dito. + * gcc.dg/vect/vect-widen-mult-u8-s16-s32.c: Dito. + * gcc.dg/vect/vect-widen-mult-u8-u32.c: Dito. + * gcc.dg/vect/vect-widen-mult-u8.c: Dito. +--- + gcc/testsuite/gcc.dg/vect/bb-slp-pattern-1.c | 1 + + gcc/testsuite/gcc.dg/vect/slp-widen-mult-half.c | 1 + + gcc/testsuite/gcc.dg/vect/vect-widen-mult-const-s16.c | 1 + + gcc/testsuite/gcc.dg/vect/vect-widen-mult-const-u16.c | 1 + + gcc/testsuite/gcc.dg/vect/vect-widen-mult-half-u8.c | 1 + + gcc/testsuite/gcc.dg/vect/vect-widen-mult-half.c | 1 + + gcc/testsuite/gcc.dg/vect/vect-widen-mult-u16.c | 1 + + gcc/testsuite/gcc.dg/vect/vect-widen-mult-u8-s16-s32.c | 1 + + gcc/testsuite/gcc.dg/vect/vect-widen-mult-u8-u32.c | 1 + + gcc/testsuite/gcc.dg/vect/vect-widen-mult-u8.c | 1 + + 10 files changed, 10 insertions(+) + +diff --git a/gcc/testsuite/gcc.dg/vect/bb-slp-pattern-1.c b/gcc/testsuite/gcc.dg/vect/bb-slp-pattern-1.c +index 47b1a4366..52ffca82a 100644 +--- a/gcc/testsuite/gcc.dg/vect/bb-slp-pattern-1.c ++++ b/gcc/testsuite/gcc.dg/vect/bb-slp-pattern-1.c +@@ -1,4 +1,5 @@ + /* { dg-require-effective-target vect_int } */ ++/* { dg-additional-options "-mlasx" { target loongarch*-*-* } } */ + + #include <stdarg.h> + #include "tree-vect.h" +diff --git a/gcc/testsuite/gcc.dg/vect/slp-widen-mult-half.c b/gcc/testsuite/gcc.dg/vect/slp-widen-mult-half.c +index e3bfee333..cd44e551f 100644 +--- a/gcc/testsuite/gcc.dg/vect/slp-widen-mult-half.c ++++ b/gcc/testsuite/gcc.dg/vect/slp-widen-mult-half.c +@@ -1,6 +1,7 @@ + /* Disabling epilogues until we find a better way to deal with scans. */ + /* { dg-additional-options "--param vect-epilogues-nomask=0" } */ + /* { dg-require-effective-target vect_int } */ ++/* { dg-additional-options "-mlasx" { target loongarch*-*-* } } */ + + #include "tree-vect.h" + +diff --git a/gcc/testsuite/gcc.dg/vect/vect-widen-mult-const-s16.c b/gcc/testsuite/gcc.dg/vect/vect-widen-mult-const-s16.c +index 4c95dd201..082c758cb 100644 +--- a/gcc/testsuite/gcc.dg/vect/vect-widen-mult-const-s16.c ++++ b/gcc/testsuite/gcc.dg/vect/vect-widen-mult-const-s16.c +@@ -2,6 +2,7 @@ + /* { dg-additional-options "--param vect-epilogues-nomask=0" } */ + /* { dg-require-effective-target vect_int } */ + /* { dg-additional-options "-fno-ipa-icf" } */ ++/* { dg-additional-options "-mlasx" { target loongarch*-*-*} } */ + + #include "tree-vect.h" + +diff --git a/gcc/testsuite/gcc.dg/vect/vect-widen-mult-const-u16.c b/gcc/testsuite/gcc.dg/vect/vect-widen-mult-const-u16.c +index 4075f815c..a95e617ad 100644 +--- a/gcc/testsuite/gcc.dg/vect/vect-widen-mult-const-u16.c ++++ b/gcc/testsuite/gcc.dg/vect/vect-widen-mult-const-u16.c +@@ -2,6 +2,7 @@ + /* { dg-additional-options "--param vect-epilogues-nomask=0" } */ + /* { dg-require-effective-target vect_int } */ + /* { dg-additional-options "-fno-ipa-icf" } */ ++/* { dg-additional-options "-mlasx" { target loongarch*-*-*} } */ + + #include "tree-vect.h" + +diff --git a/gcc/testsuite/gcc.dg/vect/vect-widen-mult-half-u8.c b/gcc/testsuite/gcc.dg/vect/vect-widen-mult-half-u8.c +index c4ac88e18..14d96645a 100644 +--- a/gcc/testsuite/gcc.dg/vect/vect-widen-mult-half-u8.c ++++ b/gcc/testsuite/gcc.dg/vect/vect-widen-mult-half-u8.c +@@ -2,6 +2,7 @@ + /* { dg-additional-options "--param vect-epilogues-nomask=0" } */ + /* { dg-require-effective-target vect_int } */ + /* { dg-additional-options "-fno-ipa-icf" } */ ++/* { dg-additional-options "-mlasx" { target loongarch*-*-*} } */ + + #include "tree-vect.h" + +diff --git a/gcc/testsuite/gcc.dg/vect/vect-widen-mult-half.c b/gcc/testsuite/gcc.dg/vect/vect-widen-mult-half.c +index ebbf4f5e8..7901dae85 100644 +--- a/gcc/testsuite/gcc.dg/vect/vect-widen-mult-half.c ++++ b/gcc/testsuite/gcc.dg/vect/vect-widen-mult-half.c +@@ -1,6 +1,7 @@ + /* Disabling epilogues until we find a better way to deal with scans. */ + /* { dg-additional-options "--param vect-epilogues-nomask=0" } */ + /* { dg-require-effective-target vect_int } */ ++/* { dg-additional-options "-mlasx" { target loongarch*-*-*} } */ + + #include "tree-vect.h" + +diff --git a/gcc/testsuite/gcc.dg/vect/vect-widen-mult-u16.c b/gcc/testsuite/gcc.dg/vect/vect-widen-mult-u16.c +index 2e28baae0..21b39953e 100644 +--- a/gcc/testsuite/gcc.dg/vect/vect-widen-mult-u16.c ++++ b/gcc/testsuite/gcc.dg/vect/vect-widen-mult-u16.c +@@ -1,6 +1,7 @@ + /* Disabling epilogues until we find a better way to deal with scans. */ + /* { dg-additional-options "--param vect-epilogues-nomask=0" } */ + /* { dg-require-effective-target vect_int } */ ++/* { dg-additional-options "-mlasx" { target loongarch*-*-*} } */ + + #include <stdarg.h> + #include "tree-vect.h" +diff --git a/gcc/testsuite/gcc.dg/vect/vect-widen-mult-u8-s16-s32.c b/gcc/testsuite/gcc.dg/vect/vect-widen-mult-u8-s16-s32.c +index d277f0b2b..4827e11b2 100644 +--- a/gcc/testsuite/gcc.dg/vect/vect-widen-mult-u8-s16-s32.c ++++ b/gcc/testsuite/gcc.dg/vect/vect-widen-mult-u8-s16-s32.c +@@ -1,6 +1,7 @@ + /* Disabling epilogues until we find a better way to deal with scans. */ + /* { dg-additional-options "--param vect-epilogues-nomask=0" } */ + /* { dg-require-effective-target vect_int } */ ++/* { dg-additional-options "-mlasx" { target loongarch*-*-*} } */ + + #include <stdarg.h> + #include "tree-vect.h" +diff --git a/gcc/testsuite/gcc.dg/vect/vect-widen-mult-u8-u32.c b/gcc/testsuite/gcc.dg/vect/vect-widen-mult-u8-u32.c +index f50358802..87eb9e0cb 100644 +--- a/gcc/testsuite/gcc.dg/vect/vect-widen-mult-u8-u32.c ++++ b/gcc/testsuite/gcc.dg/vect/vect-widen-mult-u8-u32.c +@@ -1,5 +1,6 @@ + /* { dg-additional-options "--param vect-epilogues-nomask=0" } */ + /* { dg-require-effective-target vect_int } */ ++/* { dg-additional-options "-mlasx" { target loongarch*-*-* } } */ + + #include <stdarg.h> + #include "tree-vect.h" +diff --git a/gcc/testsuite/gcc.dg/vect/vect-widen-mult-u8.c b/gcc/testsuite/gcc.dg/vect/vect-widen-mult-u8.c +index 03d137941..507d30c35 100644 +--- a/gcc/testsuite/gcc.dg/vect/vect-widen-mult-u8.c ++++ b/gcc/testsuite/gcc.dg/vect/vect-widen-mult-u8.c +@@ -1,5 +1,6 @@ + /* { dg-additional-options "--param vect-epilogues-nomask=0" } */ + /* { dg-require-effective-target vect_int } */ ++/* { dg-additional-options "-mlasx" { target loongarch*-*-*} } */ + + #include <stdarg.h> + #include "tree-vect.h" +-- +2.43.0 +
View file
_service:tar_scm:0098-aarch64-Avoid-a-use-of-callee-offset.patch
Deleted
@@ -1,73 +0,0 @@ -From 12a8889de169f892d2e927584c00d20b8b7e456f Mon Sep 17 00:00:00 2001 -From: Richard Sandiford <richard.sandiford@arm.com> -Date: Tue, 12 Sep 2023 16:08:49 +0100 -Subject: PATCH aarch64: Avoid a use of callee_offset - -When we emit the frame chain, i.e. when we reach Here in this statement -of aarch64_expand_prologue: - - if (emit_frame_chain) - { - // Here - ... - } - -the stack is in one of two states: - -- We've allocated up to the frame chain, but no more. - -- We've allocated the whole frame, and the frame chain is within easy - reach of the new SP. - -The offset of the frame chain from the current SP is available -in aarch64_frame as callee_offset. It is also available as the -chain_offset local variable, where the latter is calculated from other -data. (However, chain_offset is not always equal to callee_offset when -!emit_frame_chain, so chain_offset isn't redundant.) - -In c600df9a4060da3c6121ff4d0b93f179eafd69d1 I switched to using -chain_offset for the initialisation of the hard frame pointer: - - aarch64_add_offset (Pmode, hard_frame_pointer_rtx, -- stack_pointer_rtx, callee_offset, -+ stack_pointer_rtx, chain_offset, - tmp1_rtx, tmp0_rtx, frame_pointer_needed); - -But the later REG_CFA_ADJUST_CFA handling still used callee_offset. - -I think the difference is harmless, but it's more logical for the -CFA note to be in sync, and it's more convenient for later patches -if it uses chain_offset. - -gcc/ - * config/aarch64/aarch64.cc (aarch64_expand_prologue): Use - chain_offset rather than callee_offset. ---- - gcc/config/aarch64/aarch64.cc | 4 +--- - 1 file changed, 1 insertion(+), 3 deletions(-) - -diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc -index ae42ffdedbeb..79253322fd7c 100644 ---- a/gcc/config/aarch64/aarch64.cc -+++ b/gcc/config/aarch64/aarch64.cc -@@ -9670,7 +9670,6 @@ aarch64_expand_prologue (void) - poly_int64 initial_adjust = frame.initial_adjust; - HOST_WIDE_INT callee_adjust = frame.callee_adjust; - poly_int64 final_adjust = frame.final_adjust; -- poly_int64 callee_offset = frame.callee_offset; - poly_int64 sve_callee_adjust = frame.sve_callee_adjust; - poly_int64 below_hard_fp_saved_regs_size - = frame.below_hard_fp_saved_regs_size; -@@ -9783,8 +9782,7 @@ aarch64_expand_prologue (void) - implicit. */ - if (!find_reg_note (insn, REG_CFA_ADJUST_CFA, NULL_RTX)) - { -- rtx src = plus_constant (Pmode, stack_pointer_rtx, -- callee_offset); -+ rtx src = plus_constant (Pmode, stack_pointer_rtx, chain_offset); - add_reg_note (insn, REG_CFA_ADJUST_CFA, - gen_rtx_SET (hard_frame_pointer_rtx, src)); - } --- -2.43.5 -
View file
_service:tar_scm:0099-Enable-Transposed-SLP.patch
Added
@@ -0,0 +1,5624 @@ +From 0dd3b8532f35486bd5db2c71342c8dfed4c0893a Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?=E9=83=91=E6=99=A8=E5=8D=89?= <zhengchenhui1@huawei.com> +Date: Thu, 25 Jul 2024 17:25:23 +0800 +Subject: PATCH Enable Transposed SLP. + +--- + gcc/common.opt | 4 + + gcc/testsuite/gcc.dg/vect/transpose-1.c | 53 + + gcc/testsuite/gcc.dg/vect/transpose-2.c | 50 + + gcc/testsuite/gcc.dg/vect/transpose-3.c | 54 + + gcc/testsuite/gcc.dg/vect/transpose-4.c | 53 + + gcc/testsuite/gcc.dg/vect/transpose-5.c | 74 ++ + gcc/testsuite/gcc.dg/vect/transpose-6.c | 67 + + gcc/testsuite/gcc.dg/vect/transpose-7.c | 53 + + gcc/testsuite/gcc.dg/vect/transpose-8.c | 53 + + gcc/testsuite/gcc.dg/vect/vect.exp | 7 + + gcc/tree-loop-distribution.cc | 1464 ++++++++++++++++++++- + gcc/tree-vect-data-refs.cc | 237 ++++ + gcc/tree-vect-loop.cc | 42 +- + gcc/tree-vect-patterns.cc | 4 +- + gcc/tree-vect-slp.cc | 1553 ++++++++++++++++++++--- + gcc/tree-vect-stmts.cc | 973 +++++++++++++- + gcc/tree-vectorizer.h | 96 +- + 17 files changed, 4648 insertions(+), 189 deletions(-) + create mode 100644 gcc/testsuite/gcc.dg/vect/transpose-1.c + create mode 100644 gcc/testsuite/gcc.dg/vect/transpose-2.c + create mode 100644 gcc/testsuite/gcc.dg/vect/transpose-3.c + create mode 100644 gcc/testsuite/gcc.dg/vect/transpose-4.c + create mode 100644 gcc/testsuite/gcc.dg/vect/transpose-5.c + create mode 100644 gcc/testsuite/gcc.dg/vect/transpose-6.c + create mode 100644 gcc/testsuite/gcc.dg/vect/transpose-7.c + create mode 100644 gcc/testsuite/gcc.dg/vect/transpose-8.c + +diff --git a/gcc/common.opt b/gcc/common.opt +index b18f0b944..5958c4e0b 100644 +--- a/gcc/common.opt ++++ b/gcc/common.opt +@@ -3221,6 +3221,10 @@ ftree-slp-vectorize + Common Var(flag_tree_slp_vectorize) Optimization EnabledBy(ftree-vectorize) + Enable basic block vectorization (SLP) on trees. + ++ftree-slp-transpose-vectorize ++Common Var(flag_tree_slp_transpose_vectorize) Optimization Init(0) ++Enable basic block vectorization (SLP) for transposed stores and loads on trees. ++ + fvect-cost-model= + Common Joined RejectNegative Enum(vect_cost_model) Var(flag_vect_cost_model) Init(VECT_COST_MODEL_DEFAULT) Optimization + -fvect-cost-model=unlimited|dynamic|cheap|very-cheap Specifies the cost model for vectorization. +diff --git a/gcc/testsuite/gcc.dg/vect/transpose-1.c b/gcc/testsuite/gcc.dg/vect/transpose-1.c +new file mode 100644 +index 000000000..8237a8b9e +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/vect/transpose-1.c +@@ -0,0 +1,53 @@ ++/* { dg-do compile { target { aarch64*-*-linux* } } } */ ++/* { dg-require-effective-target vect_int } */ ++#include <stdio.h> ++#include <stdlib.h> ++#include "tree-vect.h" ++ ++#define N 4 ++#define M 256 ++ ++int foo (unsigned char *pix1, int i_pix1, unsigned char *pix2, int i_pix2) ++{ ++ int i = 0; ++ int sum = 0; ++ unsigned c0N, c1N, c2N, c3N, c4N, c5N, c6N, c7N; ++ for (i = 0; i < N; i++, pix1 += i_pix1, pix2 += i_pix2) ++ { ++ c0i = pix10 - pix20; ++ c1i = pix11 - pix21; ++ c2i = pix12 - pix22; ++ c3i = pix13 - pix23; ++ c4i = pix14 - pix24; ++ c5i = pix15 - pix25; ++ c6i = pix16 - pix26; ++ c7i = pix17 - pix27; ++ } ++ for (int i = 0; i < N; i++) ++ { ++ sum += c0i + c1i + c2i + c3i + c4i + c5i + c6i + c7i; ++ } ++ return sum; ++} ++ ++int main (int argc, const char* argv) ++{ ++ unsigned char input1M; ++ unsigned char input2M; ++ int i1 = 16; ++ int i2 = 8; ++ check_vect (); ++ for (int i = 0; i < M; i++) ++ { ++ input1i = i * 2; ++ input2i = i; ++ } ++ int sum = foo (input1, i1, input2, i2); ++ if (sum != 1264) ++ { ++ abort (); ++ } ++ return 0; ++} ++ ++/* { dg-final { scan-tree-dump "vectorized using transposed version" "slp1" } } */ +diff --git a/gcc/testsuite/gcc.dg/vect/transpose-2.c b/gcc/testsuite/gcc.dg/vect/transpose-2.c +new file mode 100644 +index 000000000..fdf4dbd96 +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/vect/transpose-2.c +@@ -0,0 +1,50 @@ ++/* { dg-do compile { target { aarch64*-*-linux* } } } */ ++/* { dg-additional-options "-fno-tree-loop-vectorize -fno-tree-dse" } */ ++/* { dg-require-effective-target vect_int } */ ++#include <stdio.h> ++#include <stdlib.h> ++#include "tree-vect.h" ++ ++#define N 8 ++#define M 256 ++ ++int foo (unsigned char *pix1, int i_pix1, unsigned char *pix2, int i_pix2) ++{ ++ int i = 0; ++ int sum = 0; ++ unsigned short c0N, c1N, c2N, c3N, c4N, c5N, c6N, c7N; ++ for (i = 0; i < N; i++, pix1 += i_pix1, pix2 += i_pix2) ++ { ++ c0i = pix10 - pix20; ++ c1i = pix11 - pix21; ++ c2i = pix12 - pix22; ++ c3i = pix13 - pix23; ++ } ++ for (int i = 0; i < N; i++) ++ { ++ sum += c0i + c1i + c2i + c3i; ++ } ++ return sum; ++} ++ ++int main (int argc, const char* argv) ++{ ++ unsigned char input1M; ++ unsigned char input2M; ++ int i1 = 5; ++ int i2 = 4; ++ check_vect (); ++ for (int i = 0; i < M; i++) ++ { ++ input1i = i * 4; ++ input2i = i * 2; ++ } ++ int sum = foo (input1, i1, input2, i2); ++ if (sum != 1440) ++ { ++ abort (); ++ } ++ return 0; ++} ++ ++/* { dg-final { scan-tree-dump "vectorized using transposed version" "slp1" } } */ +diff --git a/gcc/testsuite/gcc.dg/vect/transpose-3.c b/gcc/testsuite/gcc.dg/vect/transpose-3.c +new file mode 100644 +index 000000000..e492e3717 +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/vect/transpose-3.c +@@ -0,0 +1,54 @@ ++/* { dg-do compile { target { aarch64*-*-linux* } } } */ ++/* { dg-additional-options "-fno-tree-loop-vectorize -fno-tree-dse -fno-tree-fre" } */ ++/* { dg-require-effective-target vect_int } */ ++#include <stdio.h> ++#include <stdlib.h> ++#include "tree-vect.h" ++ ++#define N 4 ++#define M 256 ++ ++int foo (unsigned short *pix1, int i_pix1, unsigned short *pix2, int i_pix2) ++{ ++ int i = 0; ++ int sum = 0; ++ unsigned c0N, c1N, c2N, c3N, c4N, c5N, c6N, c7N; ++ for (i = 0; i < N; i++, pix1 += i_pix1, pix2 += i_pix2) ++ { ++ c0i = pix10 - pix20; ++ c1i = pix11 - pix21; ++ c2i = pix12 - pix22; ++ c3i = pix13 - pix23; ++ c4i = pix14 - pix24; ++ c5i = pix15 - pix25; ++ c6i = pix16 - pix26; ++ c7i = pix17 - pix27; ++ } ++ for (int i = 0; i < N; i++) ++ { ++ sum += c0i + c1i + c2i + c3i + c4i + c5i + c6i + c7i; ++ } ++ return sum; ++} ++ ++int main (int argc, const char* argv) ++{ ++ unsigned short input1M; ++ unsigned short input2M; ++ int i1 = 8; ++ int i2 = 4; ++ check_vect (); ++ for (int i = 0; i < M; i++) ++ { ++ input1i = i * 4; ++ input2i = i; ++ } ++ int sum = foo (input1, i1, input2, i2); ++ if (sum != 1680) ++ { ++ abort (); ++ } ++ return 0; ++} ++ ++/* { dg-final { scan-tree-dump "vectorized using transposed version" "slp1" } } */ +diff --git a/gcc/testsuite/gcc.dg/vect/transpose-4.c b/gcc/testsuite/gcc.dg/vect/transpose-4.c +new file mode 100644 +index 000000000..0b4adea9b +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/vect/transpose-4.c +@@ -0,0 +1,53 @@ ++/* { dg-do compile { target { aarch64*-*-linux* } } } */ ++/* { dg-require-effective-target vect_int } */ ++#include <stdio.h> ++#include <stdlib.h> ++#include "tree-vect.h" ++ ++#define N 4 ++#define M 256 ++ ++int foo (unsigned *pix1, int i_pix1, unsigned *pix2, int i_pix2) ++{ ++ int i = 0; ++ int sum = 0; ++ unsigned c0N, c1N, c2N, c3N, c4N, c5N, c6N, c7N; ++ for (i = 0; i < N; i++, pix1 += i_pix1, pix2 += i_pix2) ++ { ++ c0i = pix10 - pix20; ++ c1i = pix11 - pix21; ++ c2i = pix12 - pix22; ++ c3i = pix13 - pix23; ++ c4i = pix14 - pix24; ++ c5i = pix15 - pix25; ++ c6i = pix16 - pix26; ++ c7i = pix17 - pix27; ++ } ++ for (int i = 0; i < N; i++) ++ { ++ sum += c0i + c1i + c2i + c3i + c4i + c5i + c6i + c7i; ++ } ++ return sum; ++} ++ ++int main (int argc, const char* argv) ++{ ++ unsigned input1M; ++ unsigned input2M; ++ int i1 = 12; ++ int i2 = 6; ++ check_vect (); ++ for (int i = 0; i < M; i++) ++ { ++ input1i = i * 7; ++ input2i = i * 3; ++ } ++ int sum = foo (input1, i1, input2, i2); ++ if (sum != 3616) ++ { ++ abort (); ++ } ++ return 0; ++} ++ ++/* { dg-final { scan-tree-dump "vectorized using transposed version" "slp1" } } */ +diff --git a/gcc/testsuite/gcc.dg/vect/transpose-5.c b/gcc/testsuite/gcc.dg/vect/transpose-5.c +new file mode 100644 +index 000000000..040dedf1b +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/vect/transpose-5.c +@@ -0,0 +1,74 @@ ++/* { dg-do compile { target { aarch64*-*-linux* } } } */ ++/* { dg-additional-options "-fno-tree-dse -fno-tree-fre" } */ ++/* { dg-require-effective-target vect_int } */ ++#include <stdio.h> ++#include <stdlib.h> ++#include <math.h> ++#include "tree-vect.h" ++ ++#define N 4 ++#define M 256 ++#define eps 1e-8 ++ ++double foo (unsigned char *pix1, int i_pix1, unsigned char *pix2, int i_pix2) ++{ ++ unsigned a0N; ++ unsigned a1N; ++ unsigned a2N; ++ unsigned a3N; ++ ++ int b0N; ++ int b1N; ++ int b2N; ++ int b3N; ++ ++ for (int i = 0; i < N; i++, pix1 += i_pix1, pix2 += i_pix2) ++ { ++ a0i = (pix10 - pix20) + ((pix14 + pix24) << 16); ++ a1i = (pix11 - pix21) + ((pix15 + pix25) << 16); ++ a2i = (pix12 - pix22) + ((pix16 + pix26) << 16); ++ a3i = (pix13 - pix23) + ((pix17 + pix27) << 16); ++ } ++ ++ for (int i = 0; i < N; i++, pix1 += i_pix1, pix2 += i_pix2) ++ { ++ b0i = (pix10 - pix20) + (pix14 + pix24); ++ b1i = (pix11 - pix21) + (pix15 + pix25); ++ b2i = (pix12 - pix22) + (pix16 + pix26); ++ b3i = (pix13 - pix23) + (pix17 + pix27); ++ } ++ ++ double sum = 0; ++ for (int i = 0; i < N; i++) ++ { ++ sum += a0i + a1i + a2i + a3i + b0i + b1i + b2i + b3i; ++ } ++ return sum; ++} ++ ++int main (int argc, const char* argv) ++{ ++ unsigned char input1M; ++ unsigned char input2M; ++ int i1 = 8; ++ int i2 = 3; ++ unsigned char m = 2; ++ unsigned short n = 12; ++ float t = 3.0; ++ double k = 4.2; ++ check_vect (); ++ for (int i = 0; i < M; i++) ++ { ++ input1i = i * 6; ++ input2i = i * 3; ++ } ++ double sum = foo (input1, i1, input2, i2); ++ if (fabs (sum - 78648144) > eps) ++ { ++ abort (); ++ } ++ return 0; ++} ++ ++/* { dg-final { scan-tree-dump "vectorized using transposed version" "slp1" } } */ ++/* { dg-final { scan-tree-dump-times "vectorizable_store for slp transpose" 2 "slp1" } } */ +diff --git a/gcc/testsuite/gcc.dg/vect/transpose-6.c b/gcc/testsuite/gcc.dg/vect/transpose-6.c +new file mode 100644 +index 000000000..3e134ac02 +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/vect/transpose-6.c +@@ -0,0 +1,67 @@ ++/* { dg-do compile { target { aarch64*-*-linux* } } } */ ++/* { dg-require-effective-target vect_int } */ ++/* { dg-require-effective-target vect_float } */ ++#include <stdio.h> ++#include <stdlib.h> ++#include <math.h> ++#include "tree-vect.h" ++ ++#define N 4 ++#define M 256 ++#define eps 1e-8 ++ ++float foo (unsigned char *pix1, int i_pix1, unsigned char *pix2, int i_pix2) ++{ ++ unsigned a0N; ++ unsigned a1N; ++ unsigned a2N; ++ unsigned a3N; ++ ++ float c0N; ++ float c1N; ++ float c2N; ++ float c3N; ++ ++ for (int i = 0; i < N; i++, pix1 += i_pix1, pix2 += i_pix2) ++ { ++ a0i = (pix10 - pix20) + ((pix14 - pix24) << 16); ++ a1i = (pix11 - pix21) + ((pix15 - pix25) << 16); ++ a2i = (pix12 - pix22) + ((pix16 - pix26) << 16); ++ a3i = (pix13 - pix23) + ((pix17 - pix27) << 16); ++ ++ c0i = (pix10 * pix20) + (pix14 * pix24); ++ c1i = (pix11 * pix21) + (pix15 * pix25); ++ c2i = (pix12 * pix22) + (pix16 * pix26); ++ c3i = (pix13 * pix23) + (pix17 * pix27); ++ } ++ ++ float sum = 0; ++ for (int i = 0; i < N; i++) ++ { ++ sum += a0i + a1i + a2i + a3i + c0i + c1i + c2i + c3i; ++ } ++ return sum; ++} ++ ++int main (int argc, const char* argv) ++{ ++ unsigned char input1M; ++ unsigned char input2M; ++ int i1 = 18; ++ int i2 = 6; ++ check_vect (); ++ for (int i = 0; i < M; i++) ++ { ++ input1i = i * 4; ++ input2i = i * 2; ++ } ++ float sum = foo (input1, i1, input2, i2); ++ if (fabs (sum - 106041168) > eps) ++ { ++ abort (); ++ } ++ return 0; ++} ++ ++/* { dg-final { scan-tree-dump "vectorized using transposed version" "slp1" } } */ ++/* { dg-final { scan-tree-dump-times "vectorizable_store for slp transpose" 2 "slp1" } } */ +diff --git a/gcc/testsuite/gcc.dg/vect/transpose-7.c b/gcc/testsuite/gcc.dg/vect/transpose-7.c +new file mode 100644 +index 000000000..8ba1b1b6d +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/vect/transpose-7.c +@@ -0,0 +1,53 @@ ++/* { dg-do compile { target { aarch64*-*-linux* } } } */ ++/* { dg-additional-options "-fno-tree-loop-vectorize -fno-tree-dse" } */ ++/* { dg-require-effective-target vect_int } */ ++#include <stdio.h> ++#include <stdlib.h> ++#include "tree-vect.h" ++ ++#define N 16 ++#define M 256 ++ ++int foo (unsigned char *pix1, int i_pix1, unsigned char *pix2, int i_pix2) ++{ ++ int i = 0; ++ int sum = 0; ++ unsigned char c0N, c1N; ++ for (int i = 0; i < N/2; i++, pix1 += i_pix1, pix2 += i_pix2) ++ { ++ c0i = pix10 - pix20; ++ c1i = pix11 - pix21; ++ } ++ for (int i = N/2; i < N; i++, pix1 += i_pix1, pix2 += i_pix2) ++ { ++ c0i = pix10 - pix20; ++ c1i = pix11 - pix21; ++ } ++ for (int i = 0; i < N; i++) ++ { ++ sum += c0i + c1i; ++ } ++ return sum; ++} ++ ++int main (int argc, const char* argv) ++{ ++ unsigned char input1M; ++ unsigned char input2M; ++ int i1 = 6; ++ int i2 = 4; ++ check_vect (); ++ for (int i = 0; i < M; i++) ++ { ++ input1i = i * 5; ++ input2i = i * 2; ++ } ++ int sum = foo (input1, i1, input2, i2); ++ if (sum != 3280) ++ { ++ abort (); ++ } ++ return 0; ++} ++ ++/* { dg-final { scan-tree-dump "vectorized using transposed version" "slp1" } } */ +diff --git a/gcc/testsuite/gcc.dg/vect/transpose-8.c b/gcc/testsuite/gcc.dg/vect/transpose-8.c +new file mode 100644 +index 000000000..a154f012a +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/vect/transpose-8.c +@@ -0,0 +1,53 @@ ++/* { dg-do compile { target { aarch64*-*-linux* } } } */ ++/* { dg-additional-options "-fno-tree-loop-vectorize" } */ ++/* { dg-require-effective-target vect_int } */ ++#include <stdio.h> ++#include <stdlib.h> ++#include "tree-vect.h" ++ ++#define N 32 ++#define M 256 ++ ++int foo (unsigned char *pix1, int i_pix1, unsigned char *pix2, int i_pix2) ++{ ++ int i = 0; ++ int sum = 0; ++ unsigned char c0N, c1N; ++ for (int i = 0; i < N/2; i++, pix1 += i_pix1, pix2 += i_pix2) ++ { ++ c0i = pix10 - pix20; ++ c1i = pix11 - pix21; ++ } ++ for (int i = N/2; i < N; i++, pix1 += i_pix1, pix2 += i_pix2) ++ { ++ c0i = pix10 - pix20; ++ c1i = pix11 - pix21; ++ } ++ for (int i = 0; i < N; i++) ++ { ++ sum += c0i + c1i; ++ } ++ return sum; ++} ++ ++int main (int argc, const char* argv) ++{ ++ unsigned char input1M; ++ unsigned char input2M; ++ int i1 = 6; ++ int i2 = 4; ++ check_vect (); ++ for (int i = 0; i < M; i++) ++ { ++ input1i = i * 5; ++ input2i = i * 2; ++ } ++ int sum = foo (input1, i1, input2, i2); ++ if (sum != 7584) ++ { ++ abort (); ++ } ++ return 0; ++} ++ ++/* { dg-final { scan-tree-dump "vectorized using transposed version" "slp1" } } */ +diff --git a/gcc/testsuite/gcc.dg/vect/vect.exp b/gcc/testsuite/gcc.dg/vect/vect.exp +index dcaef1e0a..ae5212411 100644 +--- a/gcc/testsuite/gcc.dg/vect/vect.exp ++++ b/gcc/testsuite/gcc.dg/vect/vect.exp +@@ -117,6 +117,13 @@ et-dg-runtest dg-runtest lsort \ + glob -nocomplain $srcdir/$subdir/no-vfa-*.\cS\ \ + "" $DEFAULT_VECTCFLAGS + ++# -ftree-slp-transpose-vectorize SLP tests ++set VECT_SLP_CFLAGS $SAVED_VECT_SLP_CFLAGS ++lappend VECT_SLP_CFLAGS "-ftree-slp-transpose-vectorize" ++et-dg-runtest dg-runtest lsort \ ++ glob -nocomplain $srcdir/$subdir/transpose-*.\cS\ \ ++ "" "-ftree-slp-transpose-vectorize -fdump-tree-slp-details -O3" ++ + # -ffast-math tests + set DEFAULT_VECTCFLAGS $SAVED_DEFAULT_VECTCFLAGS + lappend DEFAULT_VECTCFLAGS "-ffast-math" +diff --git a/gcc/tree-loop-distribution.cc b/gcc/tree-loop-distribution.cc +index 606eb05e6..8d118e987 100644 +--- a/gcc/tree-loop-distribution.cc ++++ b/gcc/tree-loop-distribution.cc +@@ -36,6 +36,47 @@ along with GCC; see the file COPYING3. If not see + | D(I) = A(I-1)*E + |ENDDO + ++ If an unvectorizable loop has grouped loads, and calculations from grouped ++ loads are isomorphic, build temp arrays using stmts where isomorphic ++ calculations end. Afer distribution, the partition built from temp ++ arrays can be vectorized in pass SLP after loop unrolling. For example, ++ ++ |DO I = 1, N ++ | A = FOO (ARG_1); ++ | B = FOO (ARG_2); ++ | C = BAR_0 (A); ++ | D = BAR_1 (B); ++ |ENDDO ++ ++ is transformed to ++ ++ |DO I = 1, N ++ | J = FOO (ARG_1); ++ | K = FOO (ARG_2); ++ | XI = J; ++ | YI = K; ++ | A = XI; ++ | B = YI; ++ | C = BAR_0 (A); ++ | D = BAR_1 (B); ++ |ENDDO ++ ++ and is then distributed to ++ ++ |DO I = 1, N ++ | J = FOO (ARG_1); ++ | K = FOO (ARG_2); ++ | XI = J; ++ | YI = K; ++ |ENDDO ++ ++ |DO I = 1, N ++ | A = XI; ++ | B = YI; ++ | C = BAR_0 (A); ++ | D = BAR_1 (B); ++ |ENDDO ++ + Loop distribution is the dual of loop fusion. It separates statements + of a loop (or loop nest) into multiple loops (or loop nests) with the + same loop header. The major goal is to separate statements which may +@@ -44,7 +85,9 @@ along with GCC; see the file COPYING3. If not see + + 1) Seed partitions with specific type statements. For now we support + two types seed statements: statement defining variable used outside +- of loop; statement storing to memory. ++ of loop; statement storing to memory. Moreover, for unvectorizable ++ loops, we try to find isomorphic stmts from grouped load and build ++ temp arrays as new seed statements. + 2) Build reduced dependence graph (RDG) for loop to be distributed. + The vertices (RDG:V) model all statements in the loop and the edges + (RDG:E) model flow and control dependencies between statements. +@@ -90,6 +133,8 @@ along with GCC; see the file COPYING3. If not see + data reuse. */ + + #include "config.h" ++#define INCLUDE_MAP ++#define INCLUDE_ALGORITHM + #include "system.h" + #include "coretypes.h" + #include "backend.h" +@@ -115,6 +160,7 @@ along with GCC; see the file COPYING3. If not see + #include "tree-vectorizer.h" + #include "tree-eh.h" + #include "gimple-fold.h" ++#include "optabs-tree.h" + #include "tree-affine.h" + #include "intl.h" + #include "rtl.h" +@@ -188,6 +234,52 @@ struct rdg_vertex + #define RDG_MEM_WRITE_STMT(RDG, I) RDGV_HAS_MEM_WRITE (&(RDG->verticesI)) + #define RDG_MEM_READS_STMT(RDG, I) RDGV_HAS_MEM_READS (&(RDG->verticesI)) + ++/* Results of isomorphic group analysis. */ ++#define UNINITIALIZED (0) ++#define ISOMORPHIC (1) ++#define HETEROGENEOUS (1 << 1) ++#define UNCERTAIN (1 << 2) ++ ++/* Information of a stmt while analyzing isomorphic use in group. */ ++ ++typedef struct _group_info ++{ ++ gimple *stmt; ++ ++ /* True if stmt can be a cut point. */ ++ bool cut_point; ++ ++ /* For use_stmt with two rhses, one of which is the lhs of stmt. ++ If the other is unknown to be isomorphic, mark it uncertain. */ ++ bool uncertain; ++ ++ /* Searching of isomorphic stmt reaches heterogeneous groups or reaches ++ MEM stmts. */ ++ bool done; ++ ++ _group_info () ++ { ++ stmt = NULL; ++ cut_point = false; ++ uncertain = false; ++ done = false; ++ } ++} *group_info; ++ ++/* PAIR of cut points and corresponding profit. */ ++typedef std::pair<vec<gimple *> *, int> stmts_profit; ++ ++/* MAP of vector factor VF and corresponding stmts_profit PAIR. */ ++typedef std::map<unsigned, stmts_profit> vf_stmts_profit_map; ++ ++/* PAIR of group_num and iteration_num. We consider rhses from the same ++ group and interation are isomorphic. */ ++typedef std::pair<unsigned, unsigned> group_iteration; ++ ++/* An isomorphic stmt is detetmined by lhs of use_stmt, group_num and ++ the iteration_num when we insert this stmt to this map. */ ++typedef std::map<tree, group_iteration> isomer_stmt_lhs; ++ + /* Data dependence type. */ + + enum rdg_dep_type +@@ -600,13 +692,14 @@ class loop_distribution + /* Returns true when PARTITION1 and PARTITION2 access the same memory + object in RDG. */ + bool share_memory_accesses (struct graph *rdg, +- partition *partition1, partition *partition2); ++ partition *partition1, partition *partition2, ++ hash_set<tree> *excluded_arrays); + + /* For each seed statement in STARTING_STMTS, this function builds + partition for it by adding depended statements according to RDG. + All partitions are recorded in PARTITIONS. */ + void rdg_build_partitions (struct graph *rdg, +- vec<gimple *> starting_stmts, ++ vec<gimple *> *starting_stmts, + vec<partition *> *partitions); + + /* Compute partition dependence created by the data references in DRS1 +@@ -643,15 +736,50 @@ class loop_distribution + + /* Fuse PARTITIONS of LOOP if necessary before finalizing distribution. + ALIAS_DDRS contains ddrs which need runtime alias check. */ +- void finalize_partitions (class loop *loop, vec<struct partition *> +- *partitions, vec<ddr_p> *alias_ddrs); ++ void finalize_partitions (class loop *loop, ++ vec<struct partition *> *partitions, ++ vec<ddr_p> *alias_ddrs, bitmap producers); ++ ++ /* Analyze loop form and if it's vectorizable to decide if we need to ++ insert temp arrays to distribute it. */ ++ bool may_insert_temp_arrays (loop_p loop, struct graph *&rdg, ++ control_dependences *cd); ++ ++ /* Reset gimple_uid of GIMPLE_DEBUG and GIMPLE_LABEL to -1. */ ++ void reset_gimple_uid (loop_p loop); ++ ++ bool check_loop_vectorizable (loop_p loop); ++ ++ inline void rebuild_rdg (loop_p loop, struct graph *&rdg, ++ control_dependences *cd); ++ ++ /* If loop is not distributed, remove inserted temp arrays. */ ++ void remove_insertion (loop_p loop, struct graph *flow_only_rdg, ++ bitmap producers, struct partition *partition); ++ ++ /* Insert temp arrays if isomorphic computation exists. Temp arrays will be ++ regarded as SEED_STMTS for building partitions in succeeding processes. */ ++ bool insert_temp_arrays (loop_p loop, vec<gimple *> seed_stmts, ++ hash_set<tree> *tmp_array_vars, bitmap producers); ++ ++ void build_producers (loop_p loop, bitmap producers, ++ vec<gimple *> &transformed); ++ ++ void do_insertion (loop_p loop, struct graph *flow_only_rdg, tree iv, ++ bitmap cut_points, hash_set <tree> *tmp_array_vars, ++ bitmap producers); ++ ++ /* Fuse PARTITIONS built from inserted temp arrays into one partition, ++ fuse the rest into another. */ ++ void merge_remaining_partitions (vec<struct partition *> *partitions, ++ bitmap producers); + + /* Distributes the code from LOOP in such a way that producer statements + are placed before consumer statements. Tries to separate only the + statements from STMTS into separate loops. Returns the number of + distributed loops. Set NB_CALLS to number of generated builtin calls. + Set *DESTROY_P to whether LOOP needs to be destroyed. */ +- int distribute_loop (class loop *loop, const vec<gimple *> &stmts, ++ int distribute_loop (class loop *loop, vec<gimple *> &stmts, + control_dependences *cd, int *nb_calls, bool *destroy_p, + bool only_patterns_p); + +@@ -1893,7 +2021,8 @@ loop_distribution::classify_partition (loop_p loop, + + bool + loop_distribution::share_memory_accesses (struct graph *rdg, +- partition *partition1, partition *partition2) ++ partition *partition1, partition *partition2, ++ hash_set <tree> *excluded_arrays) + { + unsigned i, j; + bitmap_iterator bi, bj; +@@ -1927,7 +2056,10 @@ loop_distribution::share_memory_accesses (struct graph *rdg, + if (operand_equal_p (DR_BASE_ADDRESS (dr1), DR_BASE_ADDRESS (dr2), 0) + && operand_equal_p (DR_OFFSET (dr1), DR_OFFSET (dr2), 0) + && operand_equal_p (DR_INIT (dr1), DR_INIT (dr2), 0) +- && operand_equal_p (DR_STEP (dr1), DR_STEP (dr2), 0)) ++ && operand_equal_p (DR_STEP (dr1), DR_STEP (dr2), 0) ++ /* An exception, if PARTITION1 and PARTITION2 contain the ++ temp array we inserted, do not merge them. */ ++ && !excluded_arrays->contains (DR_REF (dr1))) + return true; + } + } +@@ -1941,14 +2073,14 @@ loop_distribution::share_memory_accesses (struct graph *rdg, + + void + loop_distribution::rdg_build_partitions (struct graph *rdg, +- vec<gimple *> starting_stmts, ++ vec<gimple *> *starting_stmts, + vec<partition *> *partitions) + { + auto_bitmap processed; + int i; + gimple *stmt; + +- FOR_EACH_VEC_ELT (starting_stmts, i, stmt) ++ FOR_EACH_VEC_ELT (*starting_stmts, i, stmt) + { + int v = rdg_vertex_for_stmt (rdg, stmt); + +@@ -2912,13 +3044,47 @@ fuse_memset_builtins (vec<struct partition *> *partitions) + } + } + ++void ++loop_distribution::merge_remaining_partitions ++ (vec<struct partition *> *partitions, ++ bitmap producers) ++{ ++ struct partition *partition = NULL; ++ struct partition *p1 = NULL, *p2 = NULL; ++ for (unsigned i = 0; partitions->iterate (i, &partition); i++) ++ { ++ if (bitmap_intersect_p (producers, partition->stmts)) ++ { ++ if (p1 == NULL) ++ { ++ p1 = partition; ++ continue; ++ } ++ partition_merge_into (NULL, p1, partition, FUSE_FINALIZE); ++ } ++ else ++ { ++ if (p2 == NULL) ++ { ++ p2 = partition; ++ continue; ++ } ++ partition_merge_into (NULL, p2, partition, FUSE_FINALIZE); ++ } ++ partitions->unordered_remove (i); ++ partition_free (partition); ++ i--; ++ } ++} ++ + void + loop_distribution::finalize_partitions (class loop *loop, + vec<struct partition *> *partitions, +- vec<ddr_p> *alias_ddrs) ++ vec<ddr_p> *alias_ddrs, ++ bitmap producers) + { + unsigned i; +- struct partition *partition, *a; ++ struct partition *partition; + + if (partitions->length () == 1 + || alias_ddrs->length () > 0) +@@ -2950,13 +3116,7 @@ loop_distribution::finalize_partitions (class loop *loop, + || (loop->inner == NULL + && i >= NUM_PARTITION_THRESHOLD && num_normal > num_builtin)) + { +- a = (*partitions)0; +- for (i = 1; partitions->iterate (i, &partition); ++i) +- { +- partition_merge_into (NULL, a, partition, FUSE_FINALIZE); +- partition_free (partition); +- } +- partitions->truncate (1); ++ merge_remaining_partitions (partitions, producers); + } + + /* Fuse memset builtins if possible. */ +@@ -2964,6 +3124,1216 @@ loop_distribution::finalize_partitions (class loop *loop, + fuse_memset_builtins (partitions); + } + ++/* Gimple uids of GIMPLE_DEBUG and GIMPLE_LABEL were changed during function ++ vect_analyze_loop, reset them to -1. */ ++ ++void ++loop_distribution::reset_gimple_uid (loop_p loop) ++{ ++ basic_block *bbs = get_loop_body_in_custom_order (loop, this, ++ bb_top_order_cmp_r); ++ for (int i = 0; i < int (loop->num_nodes); i++) ++ { ++ basic_block bb = bbsi; ++ for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi); ++ gsi_next (&gsi)) ++ { ++ gimple *stmt = gsi_stmt (gsi); ++ if (is_gimple_debug (stmt) || gimple_code (stmt) == GIMPLE_LABEL) ++ gimple_set_uid (stmt, -1); ++ } ++ } ++ free (bbs); ++} ++ ++bool ++loop_distribution::check_loop_vectorizable (loop_p loop) ++{ ++ vec_info_shared shared; ++ vect_analyze_loop (loop, &shared, true); ++ loop_vec_info vinfo = loop_vec_info_for_loop (loop); ++ reset_gimple_uid (loop); ++ if (vinfo == NULL) ++ { ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ fprintf (dump_file, ++ "Loop %d no temp array insertion: bad data access pattern," ++ " unable to generate loop_vinfo.\n", loop->num); ++ return false; ++ } ++ if (vinfo->vectorizable) ++ { ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ fprintf (dump_file, "Loop %d no temp array insertion: original loop" ++ " can be vectorized without distribution.\n", ++ loop->num); ++ delete vinfo; ++ loop->aux = NULL; ++ return false; ++ } ++ if (vinfo->grouped_loads.length () == 0) ++ { ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ fprintf (dump_file, "Loop %d no temp array insertion: original loop" ++ " has no grouped loads.\n" , loop->num); ++ delete vinfo; ++ loop->aux = NULL; ++ return false; ++ } ++ return true; ++} ++ ++inline void ++loop_distribution::rebuild_rdg (loop_p loop, struct graph *&rdg, ++ control_dependences *cd) ++{ ++ free_rdg (rdg); ++ rdg = build_rdg (loop, cd); ++ gcc_checking_assert (rdg != NULL); ++} ++ ++bool ++loop_distribution::may_insert_temp_arrays (loop_p loop, struct graph *&rdg, ++ control_dependences *cd) ++{ ++ if (!(flag_tree_slp_transpose_vectorize && flag_tree_loop_vectorize)) ++ return false; ++ ++ /* Only loops with two basic blocks HEADER and LATCH are supported. HEADER ++ is the main body of a LOOP and LATCH is the basic block that controls the ++ LOOP execution. Size of temp array is determined by loop execution time, ++ so it must be a const. */ ++ tree loop_extent = number_of_latch_executions (loop); ++ if (loop->inner != NULL || loop->num_nodes > 2 ++ || TREE_CODE (loop_extent) != INTEGER_CST) ++ { ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ fprintf (dump_file, "Loop %d: no temp array insertion: bad loop" ++ " form.\n", loop->num); ++ return false; ++ } ++ ++ if (loop->dont_vectorize) ++ { ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ fprintf (dump_file, "Loop %d: no temp array insertion: this loop" ++ " should never be vectorized.\n", ++ loop->num); ++ return false; ++ } ++ ++ /* Do not distribute a LOOP that is able to be vectorized without ++ distribution. */ ++ if (!check_loop_vectorizable (loop)) ++ { ++ rebuild_rdg (loop, rdg, cd); ++ return false; ++ } ++ ++ rebuild_rdg (loop, rdg, cd); ++ return true; ++} ++ ++/* Return max grouped loads' length if all groupes length satisfy len = 2 ^ n. ++ Otherwise, return 0. */ ++ ++static unsigned ++get_max_vf (loop_vec_info vinfo) ++{ ++ unsigned size = 0; ++ unsigned max = 0; ++ stmt_vec_info stmt_info; ++ unsigned i = 0; ++ FOR_EACH_VEC_ELT (vinfo->grouped_loads, i, stmt_info) ++ { ++ size = stmt_info->size; ++ if (!pow2p_hwi (size)) ++ return 0; ++ max = size > max ? size : max; ++ } ++ return max; ++} ++ ++/* Convert grouped_loads from linked list to vector with length vf. Init ++ group_info of each stmt in the same group and put then into a vector. And ++ these vectors consist WORKLISTS. We will re-analyze a group if it is ++ uncertain, so we regard WORKLISTS as a circular queue. */ ++ ++static unsigned ++build_queue (loop_vec_info vinfo, unsigned vf, ++ vec<vec<group_info> *> &worklists) ++{ ++ stmt_vec_info stmt_info; ++ unsigned i = 0; ++ group_info ginfo = NULL; ++ vec<group_info> *worklist = NULL; ++ FOR_EACH_VEC_ELT (vinfo->grouped_loads, i, stmt_info) ++ { ++ unsigned group_size = stmt_info->size; ++ stmt_vec_info c_stmt_info = stmt_info; ++ bool succ = true; ++ while (group_size >= vf) ++ { ++ vec_alloc (worklist, vf); ++ for (unsigned j = 0; j < vf; ++j) ++ { ++ if (c_stmt_info == NULL) ++ { ++ succ = false; ++ break; ++ } ++ ginfo = new _group_info (); ++ ginfo->stmt = c_stmt_info->stmt; ++ worklist->safe_push (ginfo); ++ c_stmt_info = c_stmt_info->next_element; ++ } ++ if (!succ) ++ { ++ unsigned k = 0; ++ ginfo = NULL; ++ FOR_EACH_VEC_ELT (*worklist, k, ginfo) ++ delete ginfo; ++ vec_free (worklist); ++ break; ++ } ++ worklists.safe_push (worklist); ++ group_size -= vf; ++ } ++ } ++ return worklists.length (); ++} ++ ++static bool ++check_same_oprand_type (tree op1, tree op2) ++{ ++ tree type1 = TREE_TYPE (op1); ++ tree type2 = TREE_TYPE (op2); ++ if (TREE_CODE (type1) != INTEGER_TYPE && TREE_CODE (type1) != REAL_TYPE) ++ return false; ++ ++ return (TREE_CODE (type1) == TREE_CODE (type2) ++ && TYPE_UNSIGNED (type1) == TYPE_UNSIGNED (type2) ++ && TYPE_PRECISION (type1) == TYPE_PRECISION (type2)); ++} ++ ++static bool ++bit_field_p (gimple *stmt) ++{ ++ unsigned i = 0; ++ auto_vec<data_reference_p, 2> datarefs_vec; ++ data_reference_p dr; ++ if (!find_data_references_in_stmt (NULL, stmt, &datarefs_vec)) ++ return true; ++ ++ FOR_EACH_VEC_ELT (datarefs_vec, i, dr) ++ { ++ if (TREE_CODE (DR_REF (dr)) == COMPONENT_REF ++ && DECL_BIT_FIELD (TREE_OPERAND (DR_REF (dr), 1))) ++ return true; ++ } ++ return false; ++} ++ ++static inline bool ++shift_operation (enum tree_code op) ++{ ++ return op == LSHIFT_EXPR || op == RSHIFT_EXPR || op == LROTATE_EXPR ++ || op == RROTATE_EXPR; ++} ++ ++/* Return relationship between USE_STMT and the first use_stmt of the group. ++ RHS1 is the lhs of stmt recorded in group_info. If another rhs of use_stmt ++ is not a constant, return UNCERTAIN and re-check it later. */ ++ ++static unsigned ++check_isomorphic (gimple *use_stmt, gimple *first, ++ tree rhs1, vec<tree> &hetero_lhs) ++{ ++ /* Check same operation. */ ++ enum tree_code rhs_code_first = gimple_assign_rhs_code (first); ++ enum tree_code rhs_code_current = gimple_assign_rhs_code (use_stmt); ++ if (rhs_code_first != rhs_code_current) ++ return HETEROGENEOUS; ++ ++ /* For shift operations, oprands should be equal. */ ++ if (shift_operation (rhs_code_current)) ++ { ++ tree shift_op_first = gimple_assign_rhs2 (first); ++ tree shift_op_current = gimple_assign_rhs2 (use_stmt); ++ if (!operand_equal_p (shift_op_first, shift_op_current, 0) ++ || !TREE_CONSTANT (shift_op_first)) ++ return HETEROGENEOUS; ++ ++ return ISOMORPHIC; ++ } ++ /* Type convertion expr or assignment. */ ++ if (gimple_num_ops (first) == 2) ++ return (rhs_code_first == NOP_EXPR || rhs_code_first == CONVERT_EXPR ++ || rhs_code_first == SSA_NAME) ? ISOMORPHIC : HETEROGENEOUS; ++ ++ /* We find USE_STMT from lhs of a stmt, denote it as rhs1 of USE_STMT and ++ the other one as rhs2. Check if define-stmt of current rhs2 is isomorphic ++ with define-stmt of rhs2 in the first USE_STMT at this group. */ ++ tree rhs2_first = gimple_assign_rhs1 (use_stmt) == rhs1 ++ ? gimple_assign_rhs2 (first) : gimple_assign_rhs1 (first); ++ tree rhs2_curr = gimple_assign_rhs1 (use_stmt) == rhs1 ++ ? gimple_assign_rhs2 (use_stmt) : gimple_assign_rhs1 (use_stmt); ++ ++ if (check_same_oprand_type (rhs2_first, rhs2_curr)) ++ { ++ if (TREE_CONSTANT (rhs2_curr)) ++ return ISOMORPHIC; ++ else if (hetero_lhs.contains (rhs2_curr)) ++ return HETEROGENEOUS; ++ ++ /* Provisionally set the stmt as uncertain and analyze the whole group ++ in function CHECK_UNCERTAIN later if all use_stmts are uncertain. */ ++ return UNCERTAIN; ++ } ++ return HETEROGENEOUS; ++} ++ ++static bool ++unsupported_operations (gimple *stmt) ++{ ++ enum tree_code code = gimple_assign_rhs_code (stmt); ++ return code == COND_EXPR; ++} ++ ++/* Check if the single use_stmt of STMT is isomorphic with the first one's ++ use_stmt in current group. */ ++ ++static unsigned ++check_use_stmt (group_info elmt, gimple *&first, ++ vec<gimple *> &tmp_stmts, vec<tree> &hetero_lhs) ++{ ++ if (gimple_code (elmt->stmt) != GIMPLE_ASSIGN) ++ return HETEROGENEOUS; ++ use_operand_p dummy; ++ tree lhs = gimple_assign_lhs (elmt->stmt); ++ gimple *use_stmt = NULL; ++ single_imm_use (lhs, &dummy, &use_stmt); ++ /* STMTs with three rhs are not supported, e.g., GIMPLE_COND. */ ++ if (use_stmt == NULL || gimple_code (use_stmt) != GIMPLE_ASSIGN ++ || unsupported_operations (use_stmt) || bit_field_p (use_stmt)) ++ return HETEROGENEOUS; ++ tmp_stmts.safe_push (use_stmt); ++ if (first == NULL) ++ { ++ first = use_stmt; ++ return UNINITIALIZED; ++ } ++ /* Check if current use_stmt and the first menber's use_stmt in the group ++ are of the same type. */ ++ tree first_lhs = gimple_assign_lhs (first); ++ tree curr_lhs = gimple_assign_lhs (use_stmt); ++ if (!check_same_oprand_type (first_lhs, curr_lhs)) ++ return HETEROGENEOUS; ++ return check_isomorphic (use_stmt, first, lhs, hetero_lhs); ++} ++ ++/* Replace stmt field in group with stmts in TMP_STMTS, and insert their ++ lhs_info to ISOMER_LHS. */ ++ ++static void ++update_isomer_lhs (vec<group_info> *group, unsigned group_num, ++ unsigned iteration, isomer_stmt_lhs &isomer_lhs, ++ vec<gimple *> &tmp_stmts, int &profit, ++ vec<unsigned> &merged_groups) ++{ ++ group_info elmt = NULL; ++ /* Do not insert temp array if isomorphic stmts from grouped load have ++ only casting operations. Once isomorphic calculation has 3 oprands, ++ such as plus operation, this group can be regarded as cut point. */ ++ bool operated = (gimple_num_ops (tmp_stmts0) == 3); ++ /* Do not insert temp arrays if search of iosomophic stmts reaches ++ MEM stmts. */ ++ bool has_vdef = gimple_vdef (tmp_stmts0) != NULL; ++ bool merge = false; ++ for (unsigned i = 0; i < group->length (); i++) ++ { ++ elmt = (*group)i; ++ elmt->stmt = has_vdef ? NULL : tmp_stmtsi; ++ elmt->cut_point = has_vdef ? false : (elmt->cut_point || operated); ++ elmt->uncertain = false; ++ elmt->done = has_vdef; ++ tree lhs = gimple_assign_lhs (tmp_stmtsi); ++ if (isomer_lhs.find (lhs) != isomer_lhs.end ()) ++ { ++ merge = true; ++ continue; ++ } ++ isomer_lhslhs = std::make_pair (group_num, iteration); ++ } ++ if (merge) ++ { ++ merged_groups.safe_push (group_num); ++ profit = 0; ++ return; ++ } ++ enum vect_cost_for_stmt kind = scalar_stmt; ++ int scalar_cost = builtin_vectorization_cost (kind, NULL_TREE, 0); ++ profit = (tmp_stmts.length () - 1) * scalar_cost; ++} ++ ++/* Try to find rhs2 in ISOMER_LHS, if all rhs2 were found and their group_num ++ and iteration are same, GROUP is isomorphic. */ ++ ++static unsigned ++check_isomorphic_rhs (vec<group_info> *group, vec<gimple *> &tmp_stmts, ++ isomer_stmt_lhs &isomer_lhs) ++{ ++ group_info elmt = NULL; ++ gimple *stmt = NULL; ++ unsigned j = 0; ++ unsigned group_num = -1u; ++ unsigned iteration = -1u; ++ tree rhs1 = NULL; ++ tree rhs2 = NULL; ++ unsigned status = UNINITIALIZED; ++ FOR_EACH_VEC_ELT (*group, j, elmt) ++ { ++ rhs1 = gimple_assign_lhs (elmt->stmt); ++ stmt = tmp_stmtsj; ++ rhs2 = (rhs1 == gimple_assign_rhs1 (stmt)) ++ ? gimple_assign_rhs2 (stmt) : gimple_assign_rhs1 (stmt); ++ isomer_stmt_lhs::iterator iter = isomer_lhs.find (rhs2); ++ if (iter != isomer_lhs.end ()) ++ { ++ if (group_num == -1u) ++ { ++ group_num = iter->second.first; ++ iteration = iter->second.second; ++ status |= ISOMORPHIC; ++ continue; ++ } ++ if (iter->second.first == group_num ++ && iter->second.second == iteration) ++ { ++ status |= ISOMORPHIC; ++ continue; ++ } ++ return HETEROGENEOUS; ++ } ++ else ++ status |= UNCERTAIN; ++ } ++ return status; ++} ++ ++/* Update group_info for uncertain groups. */ ++ ++static void ++update_uncertain_stmts (vec<group_info> *group, unsigned group_num, ++ unsigned iteration, vec<gimple *> &tmp_stmts) ++{ ++ unsigned j = 0; ++ group_info elmt = NULL; ++ FOR_EACH_VEC_ELT (*group, j, elmt) ++ { ++ elmt->uncertain = true; ++ elmt->done = false; ++ } ++} ++ ++/* Push stmts in TMP_STMTS into HETERO_LHS. */ ++ ++static void ++set_hetero (vec<group_info> *group, vec<tree> &hetero_lhs, ++ vec<gimple *> &tmp_stmts) ++{ ++ group_info elmt = NULL; ++ unsigned i = 0; ++ for (i = 0; i < group->length (); i++) ++ { ++ elmt = (*group)i; ++ elmt->uncertain = false; ++ elmt->done = true; ++ } ++ gimple *stmt = NULL; ++ FOR_EACH_VEC_ELT (tmp_stmts, i, stmt) ++ if (stmt != NULL) ++ hetero_lhs.safe_push (gimple_assign_lhs (stmt)); ++} ++ ++/* Given an uncertain group, TMP_STMTS are use_stmts of stmts in GROUP. ++ Rhs1 is the lhs of stmt in GROUP, rhs2 is the other rhs of USE_STMT. ++ ++ Try to find rhs2 in ISOMER_LHS, if all found rhs2 have same group_num ++ and iteration, this uncertain group is isomorphic. ++ ++ If no rhs matched, this GROUP remains uncertain and update group_info. ++ ++ Otherwise, this GROUP is heterogeneous and return true to end analysis ++ for this group. */ ++ ++static bool ++check_uncertain (vec<group_info> *group, unsigned group_num, ++ unsigned iteration, int &profit, ++ vec<gimple *> &tmp_stmts, isomer_stmt_lhs &isomer_lhs, ++ vec<tree> &hetero_lhs, vec<unsigned> &merged_groups) ++{ ++ unsigned status = check_isomorphic_rhs (group, tmp_stmts, isomer_lhs); ++ bool done = false; ++ switch (status) ++ { ++ case UNCERTAIN: ++ update_uncertain_stmts (group, group_num, iteration, tmp_stmts); ++ break; ++ case ISOMORPHIC: ++ update_isomer_lhs (group, group_num, iteration, isomer_lhs, ++ tmp_stmts, profit, merged_groups); ++ break; ++ default: ++ set_hetero (group, hetero_lhs, tmp_stmts); ++ done = true; ++ } ++ return done; ++} ++ ++/* Return false if analysis of this group is not finished, e.g., isomorphic or ++ uncertain. Calculate the profit if vectorized. */ ++ ++static bool ++check_group (vec<group_info> *group, unsigned group_num, unsigned iteration, ++ int &profit, vec<unsigned> &merged_groups, ++ isomer_stmt_lhs &isomer_lhs, vec<tree> &hetero_lhs) ++{ ++ unsigned j = 0; ++ group_info elmt = NULL; ++ gimple *first = NULL; ++ unsigned res = 0; ++ /* Record single use stmts in TMP_STMTS and decide whether replace stmts in ++ ginfo in succeeding processes. */ ++ auto_vec<gimple *, 12> tmp_stmts; ++ FOR_EACH_VEC_ELT (*group, j, elmt) ++ { ++ if (merged_groups.contains (group_num)) ++ return true; ++ res |= check_use_stmt (elmt, first, tmp_stmts, hetero_lhs); ++ } ++ ++ /* Update each group member according to RES. */ ++ switch (res) ++ { ++ case ISOMORPHIC: ++ update_isomer_lhs (group, group_num, iteration, isomer_lhs, ++ tmp_stmts, profit, merged_groups); ++ return false; ++ case UNCERTAIN: ++ return check_uncertain (group, group_num, iteration, profit, ++ tmp_stmts, isomer_lhs, hetero_lhs, ++ merged_groups); ++ default: ++ set_hetero (group, hetero_lhs, tmp_stmts); ++ return true; ++ } ++} ++ ++/* Return true if all analysises are done except uncertain groups. */ ++ ++static bool ++end_of_search (vec<vec<group_info> *> &circular_queue, ++ vec<unsigned> &merged_groups) ++{ ++ unsigned i = 0; ++ vec<group_info> *group = NULL; ++ group_info elmt = NULL; ++ FOR_EACH_VEC_ELT (circular_queue, i, group) ++ { ++ if (merged_groups.contains (i)) ++ continue; ++ elmt = (*group)0; ++ /* If there is any isomorphic use_stmts, continue analysis of isomorphic ++ use_stmts. */ ++ if (!elmt->done && !elmt->uncertain) ++ return false; ++ } ++ return true; ++} ++ ++/* Push valid stmts to STMTS as cutpoints. */ ++ ++static bool ++check_any_cutpoints (vec<vec<group_info> *> &circular_queue, ++ vec<gimple *> *&stmts, vec<unsigned> &merged_groups) ++{ ++ unsigned front = 0; ++ vec<group_info> *group = NULL; ++ group_info elmt = NULL; ++ unsigned max = circular_queue.length () * circular_queue0->length (); ++ vec_alloc (stmts, max); ++ while (front < circular_queue.length ()) ++ { ++ unsigned i = 0; ++ if (merged_groups.contains (front)) ++ { ++ front++; ++ continue; ++ } ++ group = circular_queuefront++; ++ FOR_EACH_VEC_ELT (*group, i, elmt) ++ if (elmt->stmt != NULL && elmt->done && elmt->cut_point) ++ stmts->safe_push (elmt->stmt); ++ } ++ return stmts->length () != 0; ++} ++ ++/* Grouped loads are isomorphic. Make pair for group number and iteration, ++ map load stmt to this pair. We set iteration 0 here. */ ++ ++static void ++init_isomer_lhs (vec<vec<group_info> *> &groups, isomer_stmt_lhs &isomer_lhs) ++{ ++ vec<group_info> *group = NULL; ++ group_info elmt = NULL; ++ unsigned i = 0; ++ FOR_EACH_VEC_ELT (groups, i, group) ++ { ++ unsigned j = 0; ++ FOR_EACH_VEC_ELT (*group, j, elmt) ++ isomer_lhsgimple_assign_lhs (elmt->stmt) = std::make_pair (i, 0); ++ } ++} ++ ++/* It's not a strict analysis of load/store profit. Assume scalar and vector ++ load/store are of the same cost. The result PROFIT equals profit form ++ vectorizing of scalar loads/stores minus cost of a vectorized load/store. */ ++ ++static int ++load_store_profit (unsigned scalar_mem_ops, unsigned vf, unsigned new_mem_ops) ++{ ++ int profit = 0; ++ enum vect_cost_for_stmt kind = scalar_load; ++ int scalar_cost = builtin_vectorization_cost (kind, NULL_TREE, 0); ++ profit += (scalar_mem_ops - (scalar_mem_ops / vf)) * scalar_cost; ++ profit -= new_mem_ops / vf * scalar_cost; ++ kind = scalar_store; ++ scalar_cost = builtin_vectorization_cost (kind, NULL_TREE, 0); ++ profit -= new_mem_ops / vf * scalar_cost; ++ return profit; ++} ++ ++/* Breadth first search the graph consisting of define-use chain starting from ++ the circular queue initialized by function BUILD_QUEUE. Find single use of ++ each stmt in group and check if they are isomorphic. Isomorphic is defined ++ as same rhs type, same operator, and isomorphic calculation of each rhs ++ starting from load. If another rhs is uncertain to be isomorphic, put it ++ at the end of circular queue and re-analyze it during the next iteration. ++ If a group shares the same use_stmt with another group, skip one of them in ++ succeedor prcoesses as merged. Iterate the circular queue until all ++ remianing groups heterogeneous or reaches MEN stmts. If all other groups ++ have finishes the analysis, and the remaining groups are uncertain, ++ return false to avoid endless loop. */ ++ ++bool ++bfs_find_isomer_stmts (vec<vec<group_info> *> &circular_queue, ++ stmts_profit &profit_pair, unsigned vf, ++ bool &reach_vdef) ++{ ++ isomer_stmt_lhs isomer_lhs; ++ auto_vec<tree> hetero_lhs; ++ auto_vec<unsigned> merged_groups; ++ vec<group_info> *group = NULL; ++ /* True if analysis finishes. */ ++ bool done = false; ++ int profit_sum = 0; ++ vec<gimple *> *stmts = NULL; ++ init_isomer_lhs (circular_queue, isomer_lhs); ++ for (unsigned i = 1; !done; ++i) ++ { ++ unsigned front = 0; ++ /* Re-initialize DONE to TRUE while a new iteration begins. */ ++ done = true; ++ while (front < circular_queue.length ()) ++ { ++ int profit = 0; ++ group = circular_queuefront; ++ done &= check_group (group, front, i, profit, merged_groups, ++ isomer_lhs, hetero_lhs); ++ profit_sum += profit; ++ if (profit != 0 && (*group)0->stmt == NULL) ++ { ++ reach_vdef = true; ++ return false; ++ } ++ ++front; ++ } ++ /* Uncertain result, return. */ ++ if (!done && end_of_search (circular_queue, merged_groups)) ++ return false; ++ } ++ if (check_any_cutpoints (circular_queue, stmts, merged_groups)) ++ { ++ profit_pair.first = stmts; ++ unsigned loads = circular_queue.length () * circular_queue0->length (); ++ profit_pair.second = profit_sum + load_store_profit (loads, vf, ++ stmts->length ()); ++ if (profit_pair.second > 0) ++ return true; ++ } ++ return false; ++} ++ ++/* Free memory allocated by ginfo. */ ++ ++static void ++free_ginfos (vec<vec<group_info> *> &worklists) ++{ ++ vec<group_info> *worklist; ++ unsigned i = 0; ++ while (i < worklists.length ()) ++ { ++ worklist = worklistsi++; ++ group_info ginfo; ++ unsigned j = 0; ++ FOR_EACH_VEC_ELT (*worklist, j, ginfo) ++ delete ginfo; ++ vec_free (worklist); ++ } ++} ++ ++static void ++release_tmp_stmts (vf_stmts_profit_map &candi_stmts) ++{ ++ vf_stmts_profit_map::iterator iter; ++ for (iter = candi_stmts.begin (); iter != candi_stmts.end (); ++iter) ++ iter->second.first->release (); ++} ++ ++/* Choose the group of stmt with maximun profit. */ ++ ++static bool ++decide_stmts_by_profit (vf_stmts_profit_map &candi_stmts, vec<gimple *> &stmts) ++{ ++ vf_stmts_profit_map::iterator iter; ++ int profit = 0; ++ int max = 0; ++ vec<gimple *> *tmp = NULL; ++ for (iter = candi_stmts.begin (); iter != candi_stmts.end (); ++iter) ++ { ++ profit = iter->second.second; ++ if (profit > max) ++ { ++ tmp = iter->second.first; ++ max = profit; ++ } ++ } ++ if (max == 0) ++ { ++ release_tmp_stmts (candi_stmts); ++ return false; ++ } ++ unsigned i = 0; ++ gimple *stmt = NULL; ++ FOR_EACH_VEC_ELT (*tmp, i, stmt) ++ stmts.safe_push (stmt); ++ release_tmp_stmts (candi_stmts); ++ return stmts.length () != 0; ++} ++ ++/* Find isomorphic stmts from grouped loads with vector factor VF. ++ ++ Given source code as follows and ignore casting. ++ ++ a0 = (a0 + b0) + ((a4 - b4) << 16); ++ a1 = (a1 + b1) + ((a5 - b5) << 16); ++ a2 = (a2 + b2) + ((a6 - b6) << 16); ++ a3 = (a3 + b3) + ((a7 - b7) << 16); ++ ++ We get grouped loads in VINFO as ++ ++ GROUP_1 GROUP_2 ++ _1 = *a _11 = *b ++ _2 = *(a + 1) _12 = *(b + 1) ++ _3 = *(a + 2) _13 = *(b + 2) ++ _4 = *(a + 3) _14 = *(b + 3) ++ _5 = *(a + 4) _15 = *(b + 4) ++ _6 = *(a + 5) _16 = *(b + 5) ++ _7 = *(a + 6) _17 = *(b + 6) ++ _8 = *(a + 7) _18 = *(b + 7) ++ ++ First we try VF = 8, we get two worklists ++ ++ WORKLIST_1 WORKLIST_2 ++ _1 = *a _11 = *b ++ _2 = *(a + 1) _12 = *(b + 1) ++ _3 = *(a + 2) _13 = *(b + 2) ++ _4 = *(a + 3) _14 = *(b + 3) ++ _5 = *(a + 4) _15 = *(b + 4) ++ _6 = *(a + 5) _16 = *(b + 5) ++ _7 = *(a + 6) _17 = *(b + 6) ++ _8 = *(a + 7) _18 = *(b + 7) ++ ++ We find _111 = _1 + _11 and _115 = _5 - _15 are not isomorphic, ++ so we try VF = VF / 2. ++ ++ GROUP_1 GROUP_2 ++ _1 = *a _5 = *(a + 4) ++ _2 = *(a + 1) _6 = *(a + 5) ++ _3 = *(a + 2) _7 = *(a + 6) ++ _4 = *(a + 3) _8 = *(a + 7) ++ ++ GROUP_3 GROUP_4 ++ _11 = *b _15 = *(b + 4) ++ _12 = *(b + 1) _16 = *(b + 5) ++ _13 = *(b + 2) _17 = *(b + 6) ++ _14 = *(b + 3) _18 = *(b + 7) ++ ++ We first analyze group_1, and find all operations are isomorphic, then ++ replace stmts in group_1 with their use_stmts. Group_2 as well. ++ ++ GROUP_1 GROUP_2 ++ _111 = _1 + _11 _115 = _5 - _15 ++ _112 = _2 + _12 _116 = _6 - _16 ++ _113 = _3 + _13 _117 = _7 - _17 ++ _114 = _4 + _14 _118 = _8 - _18 ++ ++ When analyzing group_3 and group_4, we find their use_stmts are the same ++ as group_1 and group_2. So group_3 is regarded as being merged to group_1 ++ and group_4 being merged to group_2. In future procedures, we will skip ++ group_3 and group_4. ++ ++ We repeat such processing until opreations are not isomorphic or searching ++ reaches MEM stmts. In our given case, searching end up at a0, a1, a2 and ++ a3. */ ++ ++static bool ++find_isomorphic_stmts (loop_vec_info vinfo, vec<gimple *> &stmts) ++{ ++ unsigned vf = get_max_vf (vinfo); ++ if (vf == 0) ++ return false; ++ auto_vec<vec<group_info> *> circular_queue; ++ /* Map of vector factor and corresponding vectorizing profit. */ ++ stmts_profit profit_map; ++ /* Map of cut_points and vector factor. */ ++ vf_stmts_profit_map candi_stmts; ++ bool reach_vdef = false; ++ while (vf > 2) ++ { ++ if (build_queue (vinfo, vf, circular_queue) == 0) ++ return false; ++ if (!bfs_find_isomer_stmts (circular_queue, profit_map, vf, reach_vdef)) ++ { ++ if (reach_vdef) ++ { ++ release_tmp_stmts (candi_stmts); ++ free_ginfos (circular_queue); ++ circular_queue.release (); ++ return false; ++ } ++ vf /= 2; ++ free_ginfos (circular_queue); ++ circular_queue.release (); ++ continue; ++ } ++ candi_stmtsvf = profit_map; ++ free_ginfos (circular_queue); ++ vf /= 2; ++ circular_queue.release (); ++ } ++ return decide_stmts_by_profit (candi_stmts, stmts); ++} ++ ++/* Get iv from SEED_STMTS and make sure each seed_stmt has only one iv as index ++ and all indices are the same. */ ++ ++static tree ++find_index (vec<gimple *> seed_stmts) ++{ ++ if (seed_stmts.length () == 0) ++ return NULL; ++ bool found_index = false; ++ tree index = NULL; ++ unsigned ui = 0; ++ for (ui = 0; ui < seed_stmts.length (); ui++) ++ { ++ if (!gimple_vdef (seed_stmtsui)) ++ return NULL; ++ tree lhs = gimple_assign_lhs (seed_stmtsui); ++ unsigned num_index = 0; ++ while (TREE_CODE (lhs) == ARRAY_REF) ++ { ++ if (TREE_CODE (TREE_OPERAND (lhs, 1)) == SSA_NAME) ++ { ++ num_index++; ++ if (num_index > 1) ++ return NULL; ++ if (index == NULL) ++ { ++ index = TREE_OPERAND (lhs, 1); ++ found_index = true; ++ } ++ else if (index != TREE_OPERAND (lhs, 1)) ++ return NULL; ++ } ++ lhs = TREE_OPERAND (lhs, 0); ++ } ++ if (!found_index) ++ return NULL; ++ } ++ return index; ++} ++ ++/* Check if expression of phi is an increament of a const. */ ++ ++static void ++check_phi_inc (struct vertex *v_phi, struct graph *rdg, bool &found_inc) ++{ ++ struct graph_edge *e_phi; ++ for (e_phi = v_phi->succ; e_phi; e_phi = e_phi->succ_next) ++ { ++ struct vertex *v_inc = &(rdg->verticese_phi->dest); ++ if (!is_gimple_assign (RDGV_STMT (v_inc)) ++ || gimple_expr_code (RDGV_STMT (v_inc)) != PLUS_EXPR) ++ continue; ++ tree rhs1 = gimple_assign_rhs1 (RDGV_STMT (v_inc)); ++ tree rhs2 = gimple_assign_rhs2 (RDGV_STMT (v_inc)); ++ if (!(integer_onep (rhs1) || integer_onep (rhs2))) ++ continue; ++ struct graph_edge *e_inc; ++ /* find cycle with only two vertices inc and phi: inc <--> phi. */ ++ bool found_cycle = false; ++ for (e_inc = v_inc->succ; e_inc; e_inc = e_inc->succ_next) ++ { ++ if (e_inc->dest == e_phi->src) ++ { ++ found_cycle = true; ++ break; ++ } ++ } ++ if (!found_cycle) ++ continue; ++ found_inc = true; ++ } ++} ++ ++/* Check if phi satisfies form like PHI <0, i>. */ ++ ++static inline bool ++iv_check_phi_stmt (gimple *phi_stmt) ++{ ++ return gimple_phi_num_args (phi_stmt) == 2 ++ && (integer_zerop (gimple_phi_arg_def (phi_stmt, 0)) ++ || integer_zerop (gimple_phi_arg_def (phi_stmt, 1))); ++} ++ ++/* Make sure the iteration varible is a phi. */ ++ ++static tree ++get_iv_from_seed (struct graph *flow_only_rdg, vec<gimple *> seed_stmts) ++{ ++ tree index = find_index (seed_stmts); ++ if (index == NULL) ++ return NULL; ++ for (int i = 0; i < flow_only_rdg->n_vertices; i++) ++ { ++ struct vertex *v = &(flow_only_rdg->verticesi); ++ if (RDGV_STMT (v) != seed_stmts0) ++ continue; ++ struct graph_edge *e; ++ bool found_phi = false; ++ for (e = v->pred; e; e = e->pred_next) ++ { ++ struct vertex *v_phi = &(flow_only_rdg->verticese->src); ++ gimple *phi_stmt = RDGV_STMT (v_phi); ++ if (gimple_code (phi_stmt) != GIMPLE_PHI ++ || gimple_phi_result (phi_stmt) != index) ++ continue; ++ if (!iv_check_phi_stmt (phi_stmt)) ++ return NULL; ++ /* find inc expr in succ of phi. */ ++ bool found_inc = false; ++ check_phi_inc (v_phi, flow_only_rdg, found_inc); ++ if (!found_inc) ++ return NULL; ++ found_phi = true; ++ break; ++ } ++ if (!found_phi) ++ return NULL; ++ break; ++ } ++ return index; ++} ++ ++/* Do not distribute loop if vertexes in ROOT_MAP have antidependence with in ++ FLOW_ONLY_RDG. */ ++ ++static bool ++check_no_dependency (struct graph *flow_only_rdg, bitmap root_map) ++{ ++ bitmap_iterator bi; ++ unsigned ui; ++ auto_vec<unsigned, 16> visited_nodes; ++ auto_bitmap visited_map; ++ EXECUTE_IF_SET_IN_BITMAP (root_map, 0, ui, bi) ++ visited_nodes.safe_push (ui); ++ for (ui = 0; ui < visited_nodes.length (); ui++) ++ { ++ struct vertex *v = &(flow_only_rdg->verticesvisited_nodesui); ++ struct graph_edge *e; ++ for (e = v->succ; e; e = e->succ_next) ++ { ++ if (bitmap_bit_p (root_map, e->dest)) ++ return false; ++ if (bitmap_bit_p (visited_map, e->dest)) ++ continue; ++ visited_nodes.safe_push (e->dest); ++ bitmap_set_bit (visited_map, e->dest); ++ } ++ } ++ return true; ++} ++ ++/* Find isomorphic stmts from GROUPED_LOADS in VINFO and make sure ++ there is no dependency among those STMT we found. */ ++ ++static unsigned ++get_cut_points (struct graph *flow_only_rdg, bitmap cut_points, ++ loop_vec_info vinfo) ++{ ++ unsigned n_stmts = 0; ++ ++ /* STMTS that may be CUT_POINTS. */ ++ auto_vec<gimple *> stmts; ++ if (!find_isomorphic_stmts (vinfo, stmts)) ++ { ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ fprintf (dump_file, "No temp array insertion: no isomorphic stmts" ++ " were found.\n"); ++ return 0; ++ } ++ ++ for (int i = 0; i < flow_only_rdg->n_vertices; i++) ++ { ++ if (stmts.contains (RDG_STMT (flow_only_rdg, i))) ++ bitmap_set_bit (cut_points, i); ++ } ++ n_stmts = bitmap_count_bits (cut_points); ++ ++ bool succ = check_no_dependency (flow_only_rdg, cut_points); ++ if (!succ) ++ { ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ fprintf (dump_file, "No temp array inserted: data dependency" ++ " among isomorphic stmts.\n"); ++ return 0; ++ } ++ return n_stmts; ++} ++ ++static void ++build_temp_array (struct vertex *v, gimple_stmt_iterator &gsi, ++ poly_uint64 array_extent, tree iv, ++ hash_set<tree> *tmp_array_vars, vec<gimple *> *transformed) ++{ ++ gimple *stmt = RDGV_STMT (v); ++ tree lhs = gimple_assign_lhs (stmt); ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ { ++ fprintf (dump_file, "original stmt:\t"); ++ print_gimple_stmt (dump_file, stmt, 0, TDF_VOPS|TDF_MEMSYMS); ++ } ++ tree var_ssa = duplicate_ssa_name (lhs, stmt); ++ gimple_assign_set_lhs (stmt, var_ssa); ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ { ++ fprintf (dump_file, "changed to:\t"); ++ print_gimple_stmt (dump_file, stmt, 0, TDF_VOPS | TDF_MEMSYMS); ++ } ++ gimple_set_uid (gsi_stmt (gsi), -1); ++ tree vect_elt_type = TREE_TYPE (lhs); ++ tree array_type = build_array_type_nelts (vect_elt_type, array_extent); ++ tree array = create_tmp_var (array_type); ++ tree array_ssa = build4 (ARRAY_REF, vect_elt_type, array, iv, NULL, NULL); ++ tmp_array_vars->add (array_ssa); ++ gimple *store = gimple_build_assign (array_ssa, var_ssa); ++ tree new_vdef = make_ssa_name (gimple_vop (cfun), store); ++ gsi_insert_after (&gsi, store, GSI_NEW_STMT); ++ gimple_set_vdef (store, new_vdef); ++ transformed->safe_push (store); ++ gimple_set_uid (gsi_stmt (gsi), -1); ++ tree array_ssa2 = build4 (ARRAY_REF, vect_elt_type, array, iv, NULL, NULL); ++ tmp_array_vars->add (array_ssa2); ++ gimple *load = gimple_build_assign (lhs, array_ssa2); ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ { ++ fprintf (dump_file, "insert stmt:\t"); ++ print_gimple_stmt (dump_file, store, 0, TDF_VOPS|TDF_MEMSYMS); ++ fprintf (dump_file, " and stmt:\t"); ++ print_gimple_stmt (dump_file, load, 0, TDF_VOPS|TDF_MEMSYMS); ++ } ++ gimple_set_vuse (load, new_vdef); ++ gsi_insert_after (&gsi, load, GSI_NEW_STMT); ++ gimple_set_uid (gsi_stmt (gsi), -1); ++} ++ ++/* Set bitmap PRODUCERS based on vec TRANSFORMED. */ ++ ++void ++loop_distribution::build_producers (loop_p loop, bitmap producers, ++ vec<gimple *> &transformed) ++{ ++ auto_vec<gimple *, 10> stmts; ++ stmts_from_loop (loop, &stmts); ++ int i = 0; ++ gimple *stmt = NULL; ++ ++ FOR_EACH_VEC_ELT (stmts, i, stmt) ++ gimple_set_uid (stmt, i); ++ i = 0; ++ FOR_EACH_VEC_ELT (transformed, i, stmt) ++ bitmap_set_bit (producers, stmt->uid); ++} ++ ++/* Transform stmt ++ ++ A = FOO (ARG_1); ++ ++ to ++ ++ STMT_1: A1 = FOO (ARG_1); ++ STMT_2: XI = A1; ++ STMT_3: A = XI; ++ ++ Producer is STMT_2 who defines the temp array and consumer is ++ STMT_3 who uses the temp array. */ ++ ++void ++loop_distribution::do_insertion (loop_p loop, struct graph *flow_only_rdg, ++ tree iv, bitmap cut_points, ++ hash_set<tree> *tmp_array_vars, ++ bitmap producers) ++{ ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ fprintf (dump_file, "=== do insertion ===\n"); ++ ++ auto_vec<gimple *> transformed; ++ ++ /* Execution times of loop. */ ++ poly_uint64 array_extent ++ = tree_to_poly_uint64 (number_of_latch_executions (loop)) + 1; ++ ++ basic_block *bbs = get_loop_body_in_custom_order (loop, this, ++ bb_top_order_cmp_r); ++ ++ for (int i = 0; i < int (loop->num_nodes); i++) ++ { ++ basic_block bb = bbsi; ++ ++ /* Find all cut points in bb and transform them. */ ++ for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi); ++ gsi_next (&gsi)) ++ { ++ unsigned j = gimple_uid (gsi_stmt (gsi)); ++ if (bitmap_bit_p (cut_points, j)) ++ { ++ struct vertex *v = &(flow_only_rdg->verticesj); ++ build_temp_array (v, gsi, array_extent, iv, tmp_array_vars, ++ &transformed); ++ } ++ } ++ } ++ build_producers (loop, producers, transformed); ++ update_ssa (TODO_update_ssa); ++ free (bbs); ++} ++ ++/* After temp array insertion, given stmts ++ STMT_1: M = FOO (ARG_1); ++ STMT_2: XI = M; ++ STMT_3: A = XI; ++ STMT_2 is the producer, STMT_1 is its prev and STMT_3 is its next. ++ Replace M with A, and remove STMT_2 and STMT_3. */ ++ ++static void ++reset_gimple_assign (struct graph *flow_only_rdg, struct partition *partition, ++ gimple_stmt_iterator &gsi, int j) ++{ ++ struct vertex *v = &(flow_only_rdg->verticesj); ++ gimple *stmt = RDGV_STMT (v); ++ gimple *prev = stmt->prev; ++ gimple *next = stmt->next; ++ tree n_lhs = gimple_assign_lhs (next); ++ gimple_assign_set_lhs (prev, n_lhs); ++ unlink_stmt_vdef (stmt); ++ if (partition) ++ bitmap_clear_bit (partition->stmts, gimple_uid (gsi_stmt (gsi))); ++ gsi_remove (&gsi, true); ++ release_defs (stmt); ++ if (partition) ++ bitmap_clear_bit (partition->stmts, gimple_uid (gsi_stmt (gsi))); ++ gsi_remove (&gsi, true); ++} ++ ++void ++loop_distribution::remove_insertion (loop_p loop, struct graph *flow_only_rdg, ++ bitmap producers, struct partition *partition) ++{ ++ basic_block *bbs = get_loop_body_in_custom_order (loop, this, ++ bb_top_order_cmp_r); ++ for (int i = 0; i < int (loop->num_nodes); i++) ++ { ++ basic_block bb = bbsi; ++ for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi); ++ gsi_next (&gsi)) ++ { ++ unsigned j = gimple_uid (gsi_stmt (gsi)); ++ if (bitmap_bit_p (producers, j)) ++ reset_gimple_assign (flow_only_rdg, partition, gsi, j); ++ } ++ } ++ update_ssa (TODO_update_ssa); ++ free (bbs); ++} ++ ++/* Insert temp arrays if isomorphic computation exists. Temp arrays will be ++ regarded as SEED_STMTS for building partitions in succeeding processes. */ ++ ++bool ++loop_distribution::insert_temp_arrays (loop_p loop, vec<gimple *> seed_stmts, ++ hash_set<tree> *tmp_array_vars, bitmap producers) ++{ ++ struct graph *flow_only_rdg = build_rdg (loop, NULL); ++ gcc_checking_assert (flow_only_rdg != NULL); ++ tree iv = get_iv_from_seed (flow_only_rdg, seed_stmts); ++ if (iv == NULL) ++ { ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ fprintf (dump_file, "Loop %d no temp array insertion: failed to get" ++ " iteration variable.\n", loop->num); ++ free_rdg (flow_only_rdg); ++ return false; ++ } ++ auto_bitmap cut_points; ++ loop_vec_info vinfo = loop_vec_info_for_loop (loop); ++ unsigned n_cut_points = get_cut_points (flow_only_rdg, cut_points, vinfo); ++ delete vinfo; ++ loop->aux = NULL; ++ if (n_cut_points == 0) ++ { ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ fprintf (dump_file, "Loop %d no temp array insertion: no cut points" ++ " found.\n", loop->num); ++ free_rdg (flow_only_rdg); ++ return false; ++ } ++ do_insertion (loop, flow_only_rdg, iv, cut_points, tmp_array_vars, producers); ++ if (dump_enabled_p ()) ++ { ++ dump_user_location_t loc = find_loop_location (loop); ++ dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, loc, "Insertion done:" ++ " %d temp arrays inserted in Loop %d.\n", ++ n_cut_points, loop->num); ++ } ++ free_rdg (flow_only_rdg); ++ return true; ++} ++ ++static bool find_seed_stmts_for_distribution (class loop *, vec<gimple *> *); ++ + /* Distributes the code from LOOP in such a way that producer statements + are placed before consumer statements. Tries to separate only the + statements from STMTS into separate loops. Returns the number of +@@ -2972,7 +4342,7 @@ loop_distribution::finalize_partitions (class loop *loop, + + int + loop_distribution::distribute_loop (class loop *loop, +- const vec<gimple *> &stmts, ++ vec<gimple *> &stmts, + control_dependences *cd, int *nb_calls, bool *destroy_p, + bool only_patterns_p) + { +@@ -3021,6 +4391,33 @@ loop_distribution::distribute_loop (class loop *loop, + return 0; + } + ++ /* Try to distribute LOOP if LOOP is simple enough and unable to vectorize. ++ If LOOP has grouped loads, recursively find isomorphic stmts and insert ++ temp arrays, rebuild RDG and call find_seed_stmts_for_distribution ++ to replace STMTS. */ ++ ++ hash_set<tree> tmp_array_vars; ++ ++ /* STMTs that define those inserted TMP_ARRAYs. */ ++ auto_bitmap producers; ++ ++ /* New SEED_STMTS after insertion. */ ++ auto_vec<gimple *> work_list; ++ bool insert_success = false; ++ if (may_insert_temp_arrays (loop, rdg, cd)) ++ { ++ if (insert_temp_arrays (loop, stmts, &tmp_array_vars, producers)) ++ { ++ if (find_seed_stmts_for_distribution (loop, &work_list)) ++ { ++ insert_success = true; ++ } ++ else ++ remove_insertion (loop, rdg, producers, NULL); ++ rebuild_rdg (loop, rdg, cd); ++ } ++ } ++ + data_reference_p dref; + for (i = 0; datarefs_vec.iterate (i, &dref); ++i) + dref->aux = (void *) (uintptr_t) i; +@@ -3029,7 +4426,10 @@ loop_distribution::distribute_loop (class loop *loop, + dump_rdg (dump_file, rdg); + + auto_vec<struct partition *, 3> partitions; +- rdg_build_partitions (rdg, stmts, &partitions); ++ if (work_list.length() > stmts.length()) ++ rdg_build_partitions (rdg, &work_list, &partitions); ++ else ++ rdg_build_partitions (rdg, &stmts, &partitions); + + auto_vec<ddr_p> alias_ddrs; + +@@ -3101,7 +4501,7 @@ loop_distribution::distribute_loop (class loop *loop, + for (int j = i + 1; + partitions.iterate (j, &partition); ++j) + { +- if (share_memory_accesses (rdg, into, partition)) ++ if (share_memory_accesses (rdg, into, partition, &tmp_array_vars)) + { + partition_merge_into (rdg, into, partition, FUSE_SHARE_REF); + partitions.unordered_remove (j); +@@ -3151,7 +4551,7 @@ loop_distribution::distribute_loop (class loop *loop, + } + } + +- finalize_partitions (loop, &partitions, &alias_ddrs); ++ finalize_partitions (loop, &partitions, &alias_ddrs, producers); + + /* If there is a reduction in all partitions make sure the last one + is not classified for builtin code generation. */ +@@ -3169,6 +4569,24 @@ loop_distribution::distribute_loop (class loop *loop, + } + + nbp = partitions.length (); ++ ++ /* If we have inserted TMP_ARRAYs but there is only one partition left in ++ the succeeding processes, remove those inserted TMP_ARRAYs back to the ++ original version. */ ++ ++ if (nbp == 1 && insert_success) ++ { ++ struct partition *partition = NULL; ++ partitions.iterate (0, &partition); ++ remove_insertion (loop, rdg, producers, partition); ++ if (dump_enabled_p ()) ++ { ++ dump_user_location_t loc = find_loop_location (loop); ++ dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, loc, "Insertion removed:" ++ " unable to distribute loop %d.\n", loop->num); ++ } ++ } ++ + if (nbp == 0 + || (nbp == 1 && !partition_builtin_p (partitions0)) + || (nbp > 1 && partition_contains_all_rw (rdg, partitions))) +diff --git a/gcc/tree-vect-data-refs.cc b/gcc/tree-vect-data-refs.cc +index 04e68f621..aae7f62f3 100644 +--- a/gcc/tree-vect-data-refs.cc ++++ b/gcc/tree-vect-data-refs.cc +@@ -2791,6 +2791,9 @@ vect_analyze_group_access_1 (vec_info *vinfo, dr_vec_info *dr_info) + DR_GROUP_GAP (stmt_info) = groupsize - last_accessed_element; + + DR_GROUP_SIZE (stmt_info) = groupsize; ++ ++ DR_GROUP_SLP_TRANSPOSE (stmt_info) = false; ++ + if (dump_enabled_p ()) + { + dump_printf_loc (MSG_NOTE, vect_location, +@@ -2820,6 +2823,20 @@ vect_analyze_group_access_1 (vec_info *vinfo, dr_vec_info *dr_info) + DR_GROUP_GAP (stmt_info)); + } + ++ /* SLP: create an SLP data structure for every interleaving group of ++ loads for further analysis in vect_analyse_slp. */ ++ if (DR_IS_READ (dr) && !slp_impossible) ++ { ++ if (loop_vinfo) ++ { ++ LOOP_VINFO_GROUPED_LOADS (loop_vinfo).safe_push (stmt_info); ++ } ++ if (bb_vinfo) ++ { ++ BB_VINFO_GROUPED_LOADS (bb_vinfo).safe_push (stmt_info); ++ } ++ } ++ + /* SLP: create an SLP data structure for every interleaving group of + stores for further analysis in vect_analyse_slp. */ + if (DR_IS_WRITE (dr) && !slp_impossible) +@@ -5636,6 +5653,226 @@ vect_permute_store_chain (vec_info *vinfo, vec<tree> &dr_chain, + } + } + ++/* Encoding the PERM_MASK_FIRST. */ ++ ++static void ++vect_indices_encoding_first (tree vectype, unsigned int array_num, ++ tree &perm_mask_high_first, ++ tree &perm_mask_low_first) ++{ ++ unsigned int nelt = TYPE_VECTOR_SUBPARTS (vectype).to_constant (); ++ vec_perm_builder sel (nelt, nelt, 1); ++ sel.quick_grow (nelt); ++ unsigned int group_num = nelt / array_num; ++ unsigned int index = 0; ++ unsigned int array = 0; ++ unsigned int group = 0; ++ ++ /* The encoding has 1 pattern in the fisrt stage. */ ++ for (array = 0; array < array_num / 2; array++) ++ { ++ for (group = 0; group < group_num * 2; group++) ++ { ++ selindex++ = array + array_num * group; ++ } ++ } ++ vec_perm_indices indices (sel, 2, nelt); ++ perm_mask_high_first = vect_gen_perm_mask_checked (vectype, indices); ++ ++ index = 0; ++ for (array = array_num / 2; array < array_num; array++) ++ { ++ for (group = 0; group < group_num * 2; group++) ++ { ++ selindex++ = array + array_num * group; ++ } ++ } ++ indices.new_vector (sel, 2, nelt); ++ perm_mask_low_first = vect_gen_perm_mask_checked (vectype, indices); ++} ++ ++/* Encoding the PERM_MASK. */ ++ ++static void ++vect_indices_encoding (tree vectype, unsigned int array_num, ++ tree &perm_mask_high, tree &perm_mask_low) ++{ ++ unsigned int nelt = TYPE_VECTOR_SUBPARTS (vectype).to_constant (); ++ vec_perm_builder sel (nelt, nelt, 1); ++ sel.quick_grow (nelt); ++ unsigned int group_num = nelt / array_num; ++ unsigned int index = 0; ++ unsigned int array = 0; ++ unsigned int group = 0; ++ ++ /* The encoding has 2 patterns in the folllowing stages. */ ++ for (array = 0; array < array_num / 2; array++) ++ { ++ for (group = 0; group < group_num; group++) ++ { ++ selindex++ = group + group_num * array; ++ } ++ for (group = 0; group < group_num; group++) ++ { ++ selindex++ = nelt + group + group_num * array; ++ } ++ } ++ vec_perm_indices indices (sel, 2, nelt); ++ perm_mask_high = vect_gen_perm_mask_checked (vectype, indices); ++ ++ index = 0; ++ for (array = array_num / 2; array < array_num; array++) ++ { ++ for (group = 0; group < group_num; group++) ++ { ++ selindex++ = group + group_num * array; ++ } ++ for (group = 0; group < group_num; group++) ++ { ++ selindex++ = nelt + group + group_num * array; ++ } ++ } ++ indices.new_vector (sel, 2, nelt); ++ perm_mask_low = vect_gen_perm_mask_checked (vectype, indices); ++} ++ ++/* Function vect_transpose_store_chain. ++ ++ Given a chain of interleaved stores in DR_CHAIN of LENGTH and ARRAY_NUM that ++ must be a power of 2. Generate interleave_high/low stmts to reorder ++ the data correctly for the stores. Return the final references for stores ++ in RESULT_CHAIN. This function is similar to vect_permute_store_chain (), ++ we interleave the contents of the vectors in their order. ++ ++ E.g., LENGTH is 4, the scalar type is short (i.e., VF is 8) and ARRAY_NUM ++ is 4. That is, the input is 4 vectors each containing 8 elements. ++ And 2 (VF / ARRAY_NUM) of 8 elements come from the same array. we interleave ++ the contents of the four vectors in their order. We assign a number to each ++ element, the input sequence is: ++ ++ 1st vec: 0 1 2 3 4 5 6 7 ++ 2nd vec: 8 9 10 11 12 13 14 15 ++ 3rd vec: 16 17 18 19 20 21 22 23 ++ 4th vec: 24 25 26 27 28 29 30 31 ++ ++ The output sequence should be: ++ ++ 1st vec: 0 4 8 12 16 20 24 28 ++ 2nd vec: 1 5 9 13 17 21 25 29 ++ 3rd vec: 2 6 10 14 18 22 26 30 ++ 4th vec: 3 7 11 15 19 23 27 31 ++ ++ In our example, ++ We get 2 (VF / ARRAY_NUM) elements together in every vector. ++ ++ I1: 0 4 1 5 2 6 3 7 ++ I2: 8 12 9 13 10 14 11 15 ++ I3: 16 20 17 21 18 22 19 23 ++ I4: 24 28 25 29 26 30 27 31 ++ ++ Then, we use interleave_high/low instructions to create such output. ++ Every 2 (VF / ARRAY_NUM) elements are regarded as a whole. The permutation ++ is done in log LENGTH stages. ++ ++ I1: interleave_high (1st vec, 3rd vec) ++ I2: interleave_low (1st vec, 3rd vec) ++ I3: interleave_high (2nd vec, 4th vec) ++ I4: interleave_low (2nd vec, 4th vec) ++ ++ The first stage of the sequence should be: ++ ++ I1: 0 4 16 20 1 5 17 21 ++ I2: 2 6 18 22 3 7 19 23 ++ I3: 8 12 24 28 9 13 25 29 ++ I4: 10 14 26 30 11 15 27 31 ++ ++ The following stage sequence should be, i.e. the final result is: ++ ++ I1: 0 4 8 12 16 20 24 28 ++ I2: 1 5 9 13 17 21 25 29 ++ I3: 2 6 10 14 18 22 26 30 ++ I4: 3 7 11 15 19 23 27 31. */ ++ ++void ++vect_transpose_store_chain (vec_info *vinfo, vec<tree> dr_chain, ++ unsigned int length, unsigned int array_num, ++ stmt_vec_info stmt_info, gimple_stmt_iterator *gsi, ++ vec<tree> *result_chain) ++{ ++ gimple *perm_stmt = NULL; ++ tree vectype = STMT_VINFO_VECTYPE (stmt_info); ++ tree perm_mask_low_first = NULL; ++ tree perm_mask_high_first = NULL; ++ tree perm_mask_low = NULL; ++ tree perm_mask_high = NULL; ++ unsigned int log_length = exact_log2 (length); ++ ++ /* Only power of 2 is supported. */ ++ gcc_assert (pow2p_hwi (length)); ++ ++ /* The encoding has 2 types, one for the grouped pattern in the fisrt stage, ++ another for the interleaved patterns in the following stages. */ ++ gcc_assert (array_num != 0); ++ ++ /* Create grouped stmt (in the first stage): ++ group = nelt / array_num; ++ high_first = VEC_PERM_EXPR <vect1, vect2, ++ {0, array_num, 2*array_num, ..., (2*group-1)*array_num, ++ 1, 1+array_num, 1+2*array_num, ..., 1+(2*group-1)*array_num, ++ ..., ++ array_num/2-1, (array_num/2-1)+array_num, ..., ++ (array_num/2-1)+(2*group-1)*array_num}> ++ low_first = VEC_PERM_EXPR <vect1, vect2, ++ {array_num/2, array_num/2+array_num, array_num/2+2*array_num, ++ ..., array_num/2+(2*group-1)*array_num, ++ array_num/2+1, array_num/2+1+array_num, ++ ..., array_num/2+1+(2*group-1)*array_num, ++ ..., ++ array_num-1, array_num-1+array_num, ++ ..., array_num-1+(2*group-1)*array_num}> */ ++ vect_indices_encoding_first (vectype, array_num, perm_mask_high_first, ++ perm_mask_low_first); ++ ++ /* Create interleaving stmt (in the following stages): ++ high = VEC_PERM_EXPR <vect1, vect2, {0, 1, ..., group-1, ++ nelt, nelt+1, ..., nelt+group-1, ++ group, group+1, ..., 2*group-1, ++ nelt+group, nelt+group+1, ..., nelt+2*group-1, ++ ...}> ++ low = VEC_PERM_EXPR <vect1, vect2, ++ {nelt/2, nelt/2+1, ..., nelt/2+group-1, ++ nelt*3/2, nelt*3/2+1, ..., nelt*3/2+group-1, ++ nelt/2+group, nelt/2+group+1, ..., nelt/2+2*group-1, ++ nelt*3/2+group, nelt*3/2+group+1, ..., nelt*3/2+2*group-1, ++ ...}> */ ++ vect_indices_encoding (vectype, array_num, perm_mask_high, perm_mask_low); ++ ++ for (unsigned int perm_time = 0; perm_time < log_length; perm_time++) ++ { ++ for (unsigned int index = 0; index < length / 2; index++) ++ { ++ tree vect1 = dr_chainindex; ++ tree vect2 = dr_chainindex + length / 2; ++ ++ tree high = make_temp_ssa_name (vectype, NULL, "vect_inter_high"); ++ perm_stmt = gimple_build_assign (high, VEC_PERM_EXPR, vect1, vect2, ++ perm_time == 0 ? perm_mask_high_first ++ : perm_mask_high); ++ vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, gsi); ++ (*result_chain)2 * index = high; ++ ++ tree low = make_temp_ssa_name (vectype, NULL, "vect_inter_low"); ++ perm_stmt = gimple_build_assign (low, VEC_PERM_EXPR, vect1, vect2, ++ perm_time == 0 ? perm_mask_low_first ++ : perm_mask_low); ++ vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, gsi); ++ (*result_chain)2 * index+1 = low; ++ } ++ memcpy (dr_chain.address (), result_chain->address (), ++ length * sizeof (tree)); ++ } ++} ++ + /* Function vect_setup_realignment + + This function is called when vectorizing an unaligned load using +diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc +index 3435f9378..f296e9415 100644 +--- a/gcc/tree-vect-loop.cc ++++ b/gcc/tree-vect-loop.cc +@@ -2856,7 +2856,7 @@ vect_analyze_loop_1 (class loop *loop, vec_info_shared *shared, + loop_vec_info main_loop_vinfo, + const vector_modes &vector_modes, unsigned &mode_i, + machine_mode &autodetected_vector_mode, +- bool &fatal) ++ bool &fatal, bool result_only_p) + { + loop_vec_info loop_vinfo + = vect_create_loop_vinfo (loop, shared, loop_form_info, main_loop_vinfo); +@@ -2865,6 +2865,8 @@ vect_analyze_loop_1 (class loop *loop, vec_info_shared *shared, + loop_vinfo->vector_mode = vector_mode; + unsigned int suggested_unroll_factor = 1; + ++ /* Loop_vinfo for loop-distribution pass. */ ++ opt_loop_vec_info fail_loop_vinfo = opt_loop_vec_info::success (NULL); + /* Run the main analysis. */ + opt_result res = vect_analyze_loop_2 (loop_vinfo, fatal, + &suggested_unroll_factor); +@@ -2933,7 +2935,21 @@ vect_analyze_loop_1 (class loop *loop, vec_info_shared *shared, + + if (!res) + { +- delete loop_vinfo; ++ ++ /* If current analysis shows LOOP is unable to vectorize, loop_vinfo ++ will be deleted. If LOOP is under ldist analysis, backup it before ++ it is deleted and return it if all modes are analyzed and still ++ fail to vectorize. */ ++ if (result_only_p && (mode_i == vector_modes.length () ++ || autodetected_vector_mode == VOIDmode)) ++ { ++ fail_loop_vinfo = opt_loop_vec_info::success (loop_vinfo); ++ loop->aux = (loop_vec_info) fail_loop_vinfo; ++ } ++ else ++ { ++ delete loop_vinfo; ++ } + if (fatal) + gcc_checking_assert (main_loop_vinfo == NULL); + return opt_loop_vec_info::propagate_failure (res); +@@ -2946,9 +2962,11 @@ vect_analyze_loop_1 (class loop *loop, vec_info_shared *shared, + + Apply a set of analyses on LOOP, and create a loop_vec_info struct + for it. The different analyses will record information in the +- loop_vec_info struct. */ ++ loop_vec_info struct. When RESULT_ONLY_P is true, quit analysis ++ if loop is vectorizable, otherwise, do not delete vinfo. */ + opt_loop_vec_info +-vect_analyze_loop (class loop *loop, vec_info_shared *shared) ++vect_analyze_loop (class loop *loop, vec_info_shared *shared, ++ bool result_only_p) + { + DUMP_VECT_SCOPE ("analyze_loop_nest"); + +@@ -2996,6 +3014,12 @@ vect_analyze_loop (class loop *loop, vec_info_shared *shared) + && !unlimited_cost_model (loop)); + machine_mode autodetected_vector_mode = VOIDmode; + opt_loop_vec_info first_loop_vinfo = opt_loop_vec_info::success (NULL); ++ /* Loop_vinfo for loop-distribution pass. */ ++ opt_loop_vec_info fail_loop_vinfo = opt_loop_vec_info::success (NULL); ++ if (result_only_p) ++ { ++ vect_slp_init (); ++ } + unsigned int mode_i = 0; + unsigned HOST_WIDE_INT simdlen = loop->simdlen; + +@@ -3019,10 +3043,16 @@ vect_analyze_loop (class loop *loop, vec_info_shared *shared) + opt_loop_vec_info loop_vinfo + = vect_analyze_loop_1 (loop, shared, &loop_form_info, + NULL, vector_modes, mode_i, +- autodetected_vector_mode, fatal); ++ autodetected_vector_mode, fatal, result_only_p); + if (fatal) + break; + ++ if (result_only_p && (mode_i == vector_modes.length () ++ || autodetected_vector_mode == VOIDmode)) ++ { ++ return loop_vinfo; ++ } ++ + if (loop_vinfo) + { + /* Analyzis has been successful so update the VF value. The +@@ -3132,7 +3162,7 @@ vect_analyze_loop (class loop *loop, vec_info_shared *shared) + = vect_analyze_loop_1 (loop, shared, &loop_form_info, + first_loop_vinfo, + vector_modes, mode_i, +- autodetected_vector_mode, fatal); ++ autodetected_vector_mode, fatal, result_only_p); + if (fatal) + break; + +diff --git a/gcc/tree-vect-patterns.cc b/gcc/tree-vect-patterns.cc +index e1bcab0f7..c0c15773d 100644 +--- a/gcc/tree-vect-patterns.cc ++++ b/gcc/tree-vect-patterns.cc +@@ -5632,8 +5632,8 @@ static vect_recog_func vect_vect_recog_func_ptrs = { + internal functions. */ + { vect_recog_gather_scatter_pattern, "gather_scatter" }, + { vect_recog_mask_conversion_pattern, "mask_conversion" }, +- { vect_recog_widen_plus_pattern, "widen_plus" }, +- { vect_recog_widen_minus_pattern, "widen_minus" }, ++ // { vect_recog_widen_plus_pattern, "widen_plus" }, ++ // { vect_recog_widen_minus_pattern, "widen_minus" }, + }; + + const unsigned int NUM_PATTERNS = ARRAY_SIZE (vect_vect_recog_func_ptrs); +diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc +index af477c31a..6cbf8085f 100644 +--- a/gcc/tree-vect-slp.cc ++++ b/gcc/tree-vect-slp.cc +@@ -49,6 +49,8 @@ along with GCC; see the file COPYING3. If not see + #include "tree-eh.h" + #include "tree-cfg.h" + #include "alloc-pool.h" ++#include "print-tree.h" ++#include "gimple-pretty-print.h" + + static bool vectorizable_slp_permutation (vec_info *, gimple_stmt_iterator *, + slp_tree, stmt_vector_for_cost *); +@@ -994,6 +996,21 @@ vect_build_slp_tree_1 (vec_info *vinfo, unsigned char *swap, + } + + gcc_assert (vectype); ++ if (!STMT_VINFO_VECTYPE (stmt_info)) ++ STMT_VINFO_VECTYPE (stmt_info) = vectype; ++ if (dump_file) ++ { ++ fprintf (dump_file, "vect_build_slp_tree_1: %p\n", stmt_info); ++ print_gimple_stmt (dump_file, stmt, 0); ++ fprintf (dump_file, "vect_build_slp_tree_1: vectype="); ++ if (vectype) ++ print_generic_expr (dump_file, vectype); ++ fprintf (dump_file, "\n"); ++ fprintf (dump_file, "internal vectype="); ++ if (STMT_VINFO_VECTYPE (stmt_info)) ++ print_generic_expr (dump_file, STMT_VINFO_VECTYPE (stmt_info)); ++ fprintf (dump_file, "\n"); ++ } + + gcall *call_stmt = dyn_cast <gcall *> (stmt); + if (call_stmt) +@@ -1575,10 +1592,10 @@ vect_build_slp_tree (vec_info *vinfo, + dump_printf_loc (MSG_NOTE, vect_location, + "SLP discovery for node %p succeeded\n", res); + gcc_assert (res_ == res); +- res->max_nunits = this_max_nunits; ++ res_->max_nunits = this_max_nunits; + vect_update_max_nunits (max_nunits, this_max_nunits); + /* Keep a reference for the bst_map use. */ +- SLP_TREE_REF_COUNT (res)++; ++ SLP_TREE_REF_COUNT (res_)++; + } + return res_; + } +@@ -3190,8 +3207,10 @@ vect_build_slp_instance (vec_info *vinfo, + + /* For basic block SLP, try to break the group up into multiples of + a vector size. */ ++ bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo); + if (is_a <bb_vec_info> (vinfo) +- && (i > 1 && i < group_size)) ++ && (i > 1 && i < group_size) ++ && !bb_vinfo->transposed) + { + tree scalar_type + = TREE_TYPE (DR_REF (STMT_VINFO_DATA_REF (stmt_info))); +@@ -3301,84 +3320,1034 @@ vect_analyze_slp_instance (vec_info *vinfo, + scalar_stmts.create (DR_GROUP_SIZE (stmt_info)); + while (next_info) + { +- scalar_stmts.quick_push (vect_stmt_to_vectorize (next_info)); +- next_info = DR_GROUP_NEXT_ELEMENT (next_info); ++ scalar_stmts.quick_push (vect_stmt_to_vectorize (next_info)); ++ next_info = DR_GROUP_NEXT_ELEMENT (next_info); ++ } ++ } ++ else if (kind == slp_inst_kind_reduc_chain) ++ { ++ /* Collect the reduction stmts and store them in scalar_stmts. */ ++ scalar_stmts.create (REDUC_GROUP_SIZE (stmt_info)); ++ while (next_info) ++ { ++ scalar_stmts.quick_push (vect_stmt_to_vectorize (next_info)); ++ next_info = REDUC_GROUP_NEXT_ELEMENT (next_info); ++ } ++ /* Mark the first element of the reduction chain as reduction to properly ++ transform the node. In the reduction analysis phase only the last ++ element of the chain is marked as reduction. */ ++ STMT_VINFO_DEF_TYPE (stmt_info) ++ = STMT_VINFO_DEF_TYPE (scalar_stmts.last ()); ++ STMT_VINFO_REDUC_DEF (vect_orig_stmt (stmt_info)) ++ = STMT_VINFO_REDUC_DEF (vect_orig_stmt (scalar_stmts.last ())); ++ } ++ else if (kind == slp_inst_kind_ctor) ++ { ++ tree rhs = gimple_assign_rhs1 (stmt_info->stmt); ++ tree val; ++ scalar_stmts.create (CONSTRUCTOR_NELTS (rhs)); ++ FOR_EACH_CONSTRUCTOR_VALUE (CONSTRUCTOR_ELTS (rhs), i, val) ++ { ++ stmt_vec_info def_info = vinfo->lookup_def (val); ++ def_info = vect_stmt_to_vectorize (def_info); ++ scalar_stmts.quick_push (def_info); ++ } ++ if (dump_enabled_p ()) ++ dump_printf_loc (MSG_NOTE, vect_location, ++ "Analyzing vectorizable constructor: %G\n", ++ stmt_info->stmt); ++ } ++ else if (kind == slp_inst_kind_reduc_group) ++ { ++ /* Collect reduction statements. */ ++ const vec<stmt_vec_info> &reductions ++ = as_a <loop_vec_info> (vinfo)->reductions; ++ scalar_stmts.create (reductions.length ()); ++ for (i = 0; reductions.iterate (i, &next_info); i++) ++ if ((STMT_VINFO_RELEVANT_P (next_info) ++ || STMT_VINFO_LIVE_P (next_info)) ++ /* ??? Make sure we didn't skip a conversion around a reduction ++ path. In that case we'd have to reverse engineer that conversion ++ stmt following the chain using reduc_idx and from the PHI ++ using reduc_def. */ ++ && STMT_VINFO_DEF_TYPE (next_info) == vect_reduction_def) ++ scalar_stmts.quick_push (next_info); ++ /* If less than two were relevant/live there's nothing to SLP. */ ++ if (scalar_stmts.length () < 2) ++ return false; ++ } ++ else ++ gcc_unreachable (); ++ ++ vec<stmt_vec_info> roots = vNULL; ++ if (kind == slp_inst_kind_ctor) ++ { ++ roots.create (1); ++ roots.quick_push (stmt_info); ++ } ++ /* Build the tree for the SLP instance. */ ++ bool res = vect_build_slp_instance (vinfo, kind, scalar_stmts, ++ roots, ++ max_tree_size, limit, bst_map, ++ kind == slp_inst_kind_store ++ ? stmt_info : NULL); ++ if (!res) ++ roots.release (); ++ ++ /* ??? If this is slp_inst_kind_store and the above succeeded here's ++ where we should do store group splitting. */ ++ ++ return res; ++} ++ ++static inline bool ++is_const_assign (stmt_vec_info store_elem) ++{ ++ if (store_elem == NULL) ++ { ++ gcc_unreachable (); ++ } ++ gimple *stmt = store_elem->stmt; ++ gimple_rhs_class rhs_class = gimple_assign_rhs_class (stmt); ++ return rhs_class == GIMPLE_SINGLE_RHS ++ && TREE_CONSTANT (gimple_assign_rhs1 (store_elem->stmt)); ++} ++ ++/* Push inits to INNERMOST_INITS and check const assign. */ ++ ++static bool ++record_innermost (vec<tree> &innermost_inits, ++ vec<tree> &innermost_offsets, ++ stmt_vec_info stmt_vinfo) ++{ ++ if (!stmt_vinfo) ++ { ++ return false; ++ } ++ stmt_vec_info next_info = stmt_vinfo; ++ while (next_info) ++ { ++ /* No need to vectorize constant assign in a transposed version. */ ++ if (is_const_assign (next_info)) ++ { ++ if (dump_enabled_p ()) ++ { ++ dump_printf_loc (MSG_NOTE, vect_location, ++ "no need to vectorize, store is const assign: %G", ++ next_info->stmt); ++ } ++ return false; ++ } ++ innermost_inits.safe_push (STMT_VINFO_DR_INIT (next_info)); ++ innermost_offsets.safe_push (STMT_VINFO_DR_OFFSET (next_info)); ++ next_info = DR_GROUP_NEXT_ELEMENT (next_info); ++ } ++ return true; ++} ++ ++/* Compare inits to INNERMOST_INITS, return FALSE if inits do not match ++ the first grouped_store. And check const assign meanwhile. */ ++ ++static bool ++compare_innermost (const vec<tree> &innermost_inits, ++ const vec<tree> &innermost_offsets, ++ stmt_vec_info stmt_vinfo) ++{ ++ if (!stmt_vinfo || innermost_inits.length () != stmt_vinfo->size) ++ { ++ return false; ++ } ++ stmt_vec_info next_info = stmt_vinfo; ++ unsigned int i = 0; ++ while (next_info) ++ { ++ if (is_const_assign (next_info)) ++ { ++ if (dump_enabled_p ()) ++ { ++ dump_printf_loc (MSG_NOTE, vect_location, ++ "no need to vectorize, store is const " ++ "assign: %G", next_info->stmt); ++ } ++ return false; ++ } ++ if (innermost_initsi != STMT_VINFO_DR_INIT (next_info) ++ || innermost_offsetsi != STMT_VINFO_DR_OFFSET (next_info)) ++ { ++ return false; ++ } ++ next_info = DR_GROUP_NEXT_ELEMENT (next_info); ++ i++; ++ } ++ return true; ++} ++ ++static bool ++check_same_bb (stmt_vec_info grp1, stmt_vec_info grp2) ++{ ++ if (grp1->stmt->bb->index == grp2->stmt->bb->index) ++ { ++ return true; ++ } ++ return false; ++} ++ ++/* Check if grouped stores are of same type. ++ input: t1/t2 = TREE_TYPE (gimple_assign_lhs (first_element->stmt)) ++ output: 0 if same, 1 or -1 else. */ ++ ++static int ++tree_type_cmp (const tree t1, const tree t2) ++{ ++ gcc_checking_assert (t1 != NULL && t2 != NULL); ++ if (t1 != t2) ++ { ++ if (TREE_CODE (t1) != TREE_CODE (t2)) ++ { ++ return TREE_CODE (t1) > TREE_CODE (t2) ? 1 : -1; ++ } ++ if (TYPE_UNSIGNED (t1) != TYPE_UNSIGNED (t2)) ++ { ++ return TYPE_UNSIGNED (t1) > TYPE_UNSIGNED (t2) ? 1 : -1; ++ } ++ if (TYPE_PRECISION (t1) != TYPE_PRECISION (t2)) ++ { ++ return TYPE_PRECISION (t1) > TYPE_PRECISION (t2) ? 1 : -1; ++ } ++ } ++ return 0; ++} ++ ++/* Check it if 2 grouped stores are of same type that ++ we can analyze them in a transpose group. */ ++static int ++check_same_store_type (stmt_vec_info grp1, stmt_vec_info grp2) ++{ ++ if (grp1 == grp2) ++ { ++ return 0; ++ } ++ if (grp1->size != grp2->size) ++ { ++ return grp1->size > grp2->size ? 1 : -1; ++ } ++ tree lhs1 = gimple_assign_lhs (grp1->stmt); ++ tree lhs2 = gimple_assign_lhs (grp2->stmt); ++ if (TREE_CODE (lhs1) != TREE_CODE (lhs2)) ++ { ++ return TREE_CODE (lhs1) > TREE_CODE (lhs2) ? 1 : -1; ++ } ++ tree grp_type1 = TREE_TYPE (gimple_assign_lhs (grp1->stmt)); ++ tree grp_type2 = TREE_TYPE (gimple_assign_lhs (grp2->stmt)); ++ int cmp = tree_type_cmp (grp_type1, grp_type2); ++ return cmp; ++} ++ ++/* Sort grouped stores according to group_size and store_type. ++ output: 0 if same, 1 if grp1 > grp2, -1 otherwise. */ ++ ++static int ++grouped_store_cmp (const void *grp1_, const void *grp2_) ++{ ++ stmt_vec_info grp1 = *(stmt_vec_info *)const_cast<void *>(grp1_); ++ stmt_vec_info grp2 = *(stmt_vec_info *)const_cast<void *>(grp2_); ++ return check_same_store_type (grp1, grp2); ++} ++ ++/* Transposing is based on permutation in registers. Permutation requires ++ vector length being power of 2 and satisfying the vector mode. */ ++ ++static inline bool ++check_filling_reg (stmt_vec_info current_element) ++{ ++ if (current_element->size == 0) ++ { ++ return false; ++ } ++ /* If the gimple STMT was already vectorized in vect pass, it's unable to ++ conduct transpose analysis, skip it. */ ++ bool lhs_vectorized ++ = TREE_CODE (TREE_TYPE (gimple_get_lhs (current_element->stmt))) ++ == VECTOR_TYPE; ++ bool rhs_vectorized ++ = TREE_CODE (TREE_TYPE (gimple_assign_rhs1 (current_element->stmt))) ++ == VECTOR_TYPE; ++ if (lhs_vectorized || rhs_vectorized) ++ { ++ return false; ++ } ++ unsigned int store_precision ++ = TYPE_PRECISION (TREE_TYPE (gimple_get_lhs (current_element->stmt))); ++ auto_vector_modes vector_modes; ++ targetm.vectorize.autovectorize_vector_modes (&vector_modes, false); ++ unsigned min_mode_size = -1u; ++ for (unsigned i = 0; i < vector_modes.length (); i++) ++ { ++ unsigned mode_bit_size = (GET_MODE_BITSIZE (vector_modesi)).coeffs0; ++ min_mode_size = mode_bit_size < min_mode_size ++ ? mode_bit_size : min_mode_size; ++ } ++ return store_precision != 0 ++ && pow2p_hwi (current_element->size) ++ && (current_element->size * store_precision % min_mode_size == 0); ++} ++ ++/* Check if previous groups are suitable to transpose, if not, set their ++ group number to -1, reduce grp_num and clear current_groups. ++ Otherwise, just clear current_groups. */ ++ ++static void ++check_and_clear_groups (vec<stmt_vec_info> ¤t_groups, ++ unsigned int &grp_num) ++{ ++ stmt_vec_info first_element; ++ if (current_groups.length () == 1 ++ || (current_groups.length () != 0 ++ && !pow2p_hwi (current_groups.length ()))) ++ { ++ while (current_groups.length () != 0) ++ { ++ first_element = current_groups.pop (); ++ first_element->group_number = -1; ++ } ++ grp_num--; ++ } ++ else ++ { ++ while (current_groups.length ()) ++ { ++ current_groups.pop (); ++ } ++ } ++} ++ ++ ++/* Make sure that transpose slp vectorization is conducted only if grouped ++ stores are one dimension array ref. */ ++ ++static bool ++is_store_one_dim_array (gimple *stmt) ++{ ++ tree op = gimple_get_lhs (stmt); ++ if (TREE_CODE (op) != ARRAY_REF) ++ return false; ++ return TREE_OPERAND_LENGTH (op) > 0 ++ && TREE_OPERAND_LENGTH (TREE_OPERAND (op, 0)) == 0; ++} ++ ++/* Set grouped_stores with similar MEM_REF to the same group and mark their ++ grp_num. Groups with same grp_num consist the minimum unit to analyze ++ transpose. Return num of such units. */ ++ ++static unsigned ++vect_prepare_transpose (bb_vec_info bb_vinfo) ++{ ++ stmt_vec_info current_element = NULL; ++ stmt_vec_info first_element = NULL; ++ unsigned int i = 0; ++ unsigned int grp_num = 0; ++ /* Use arrays to record MEM_REF data in different GROUPED_STORES. */ ++ auto_vec<tree> innermost_inits; ++ auto_vec<tree> innermost_offsets; ++ ++ /* A set of stmt_vec_info with same store type. Analyze them if their size ++ is suitable to transpose. */ ++ auto_vec<stmt_vec_info> current_groups; ++ ++ FOR_EACH_VEC_ELT (bb_vinfo->grouped_stores, i, current_element) ++ { ++ /* Compare current grouped_store to the first one if first_element exists, ++ push current_element to current_groups if they are similar on innermost ++ behavior of MEM_REF. */ ++ if (first_element != NULL ++ && !check_same_store_type (first_element, current_element) ++ && compare_innermost (innermost_inits, innermost_offsets, ++ current_element) ++ && check_same_bb (first_element, current_element)) ++ { ++ current_groups.safe_push (current_element); ++ current_element->group_number = grp_num; ++ /* If current_element is the last element in grouped_stores, continue ++ will exit the loop and leave the last group unanalyzed. */ ++ if (i == bb_vinfo->grouped_stores.length () - 1) ++ { ++ check_and_clear_groups (current_groups, grp_num); ++ } ++ continue; ++ } ++ check_and_clear_groups (current_groups, grp_num); ++ innermost_inits.release (); ++ innermost_offsets.release (); ++ /* Beginning of a new group to analyze whether they are able to consist ++ a unit to conduct transpose analysis. */ ++ first_element = NULL; ++ if (is_store_one_dim_array (current_element->stmt) ++ && check_filling_reg (current_element) ++ && record_innermost (innermost_inits, innermost_offsets, ++ current_element)) ++ { ++ first_element = current_element; ++ current_groups.safe_push (current_element); ++ current_element->group_number = ++grp_num; ++ if (i == bb_vinfo->grouped_stores.length () - 1) ++ { ++ check_and_clear_groups (current_groups, grp_num); ++ } ++ continue; ++ } ++ current_element->group_number = -1; ++ } ++ return grp_num; ++} ++ ++/* Return a flag to transpose grouped stores before building slp tree. ++ Add bool may_transpose in class vec_info. */ ++ ++static bool ++vect_may_transpose (bb_vec_info bb_vinfo) ++{ ++ if (targetm.vectorize.vec_perm_const == NULL) ++ { ++ return false; ++ } ++ ++ if (bb_vinfo->grouped_stores.length () < 2) ++ { ++ return false; ++ } ++ ++ DUMP_VECT_SCOPE ("analyze if grouped stores may transpose to slp"); ++ /* Sort grouped_stores according to size and type for function ++ vect_prepare_transpose (). */ ++ bb_vinfo->grouped_stores.qsort (grouped_store_cmp); ++ ++ int groups = vect_prepare_transpose (bb_vinfo); ++ BB_VINFO_TRANS_GROUPS (bb_vinfo) = groups; ++ if (dump_enabled_p ()) ++ dump_printf_loc (MSG_NOTE, vect_location, ++ "%d groups to analyze transposed slp.\n", groups); ++ return groups != 0; ++} ++ ++/* Get the base address of STMT_INFO. */ ++ ++static tree ++get_op_base_address (stmt_vec_info stmt_info) ++{ ++ struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info); ++ tree op = DR_BASE_ADDRESS (dr); ++ while (TREE_OPERAND_LENGTH (op) > 0) ++ { ++ op = TREE_OPERAND (op, 0); ++ } ++ return op; ++} ++ ++/* Compare the UID of the two stmt_info STMTINFO_A and STMTINFO_B. ++ Sorting them in ascending order. */ ++ ++static int ++dr_group_cmp (const void *stmtinfo_a_, const void *stmtinfo_b_) ++{ ++ stmt_vec_info stmtinfo_a ++ = *(stmt_vec_info *) const_cast<void *> (stmtinfo_a_); ++ stmt_vec_info stmtinfo_b ++ = *(stmt_vec_info *) const_cast<void *> (stmtinfo_b_); ++ ++ /* Stabilize sort. */ ++ if (stmtinfo_a == stmtinfo_b) ++ { ++ return 0; ++ } ++ return gimple_uid (stmtinfo_a->stmt) < gimple_uid (stmtinfo_b->stmt) ? -1 : 1; ++} ++ ++/* Find the first elements of the grouped loads which are required to merge. */ ++ ++static void ++vect_slp_grouped_load_find (bb_vec_info bb_vinfo, vec<bool> &visited, ++ vec<stmt_vec_info> &res) ++{ ++ unsigned int i = 0; ++ stmt_vec_info merge_first_element = NULL; ++ stmt_vec_info first_element = NULL; ++ tree opa = NULL; ++ unsigned int grp_size_a = 0; ++ FOR_EACH_VEC_ELT (bb_vinfo->grouped_loads, i, first_element) ++ { ++ if (visitedi) ++ { ++ continue; ++ } ++ if (!STMT_VINFO_GROUPED_ACCESS (first_element) ++ || !pow2p_hwi (DR_GROUP_SIZE (first_element))) ++ { ++ /* Non-conforming grouped load should be grouped separately. */ ++ if (merge_first_element == NULL) ++ { ++ visitedi = true; ++ res.safe_push (first_element); ++ return; ++ } ++ } ++ if (merge_first_element == NULL) ++ { ++ merge_first_element = first_element; ++ opa = get_op_base_address (first_element); ++ grp_size_a = DR_GROUP_SIZE (first_element); ++ res.safe_push (first_element); ++ visitedi = true; ++ continue; ++ } ++ ++ /* If the two first elements are of the same base address and group size, ++ these two grouped loads need to be merged. */ ++ tree opb = get_op_base_address (first_element); ++ unsigned int grp_size_b = DR_GROUP_SIZE (first_element); ++ if (opa == opb && grp_size_a == grp_size_b) ++ { ++ res.safe_push (first_element); ++ visitedi = true; ++ } ++ } ++} ++ ++/* Merge the grouped loads that are found from ++ vect_slp_grouped_load_find (). */ ++ ++static stmt_vec_info ++vect_slp_grouped_load_merge (vec<stmt_vec_info> &res) ++{ ++ stmt_vec_info stmt_info = res0; ++ if (res.length () == 1) ++ { ++ return stmt_info; ++ } ++ unsigned int i = 0; ++ unsigned int size = DR_GROUP_SIZE (res0); ++ unsigned int new_group_size = size * res.length (); ++ stmt_vec_info first_element = NULL; ++ stmt_vec_info merge_first_element = NULL; ++ stmt_vec_info last_element = NULL; ++ FOR_EACH_VEC_ELT (res, i, first_element) ++ { ++ if (merge_first_element == NULL) ++ { ++ merge_first_element = first_element; ++ last_element = merge_first_element; ++ size = DR_GROUP_SIZE (merge_first_element); ++ } ++ ++ if (last_element != first_element ++ && !DR_GROUP_NEXT_ELEMENT (last_element)) ++ { ++ DR_GROUP_NEXT_ELEMENT (last_element) = first_element; ++ /* Store the gap from the previous member of the group. If there is ++ no gap in the access, DR_GROUP_GAP is always 1. */ ++ DR_GROUP_GAP_TRANS (first_element) = DR_GROUP_GAP (first_element); ++ DR_GROUP_GAP (first_element) = 1; ++ } ++ for (stmt_info = first_element; stmt_info; ++ stmt_info = DR_GROUP_NEXT_ELEMENT (stmt_info)) ++ { ++ DR_GROUP_FIRST_ELEMENT (stmt_info) = merge_first_element; ++ DR_GROUP_SIZE_TRANS (stmt_info) = DR_GROUP_SIZE (stmt_info); ++ DR_GROUP_SIZE (stmt_info) = new_group_size; ++ last_element = stmt_info; ++ } ++ } ++ DR_GROUP_SIZE (merge_first_element) = new_group_size; ++ DR_GROUP_SLP_TRANSPOSE (merge_first_element) = true; ++ DR_GROUP_NEXT_ELEMENT (last_element) = NULL; ++ return merge_first_element; ++} ++ ++/* Merge the grouped loads which have the same base address and group size. ++ For example, for grouped loads (opa_1, opa_2, opb_1, opb_2): ++ opa_1: a0->a1->a2->a3 ++ opa_2: a8->a9->a10->a11 ++ opb_1: b0->b1 ++ opb_2: b16->b17 ++ we can probably get two merged grouped loads: ++ opa: a0->a1->a2->a3->a8->a9->a10->a11 ++ opb: b0->b1->b16->b17. */ ++ ++static bool ++vect_merge_slp_grouped_loads (bb_vec_info bb_vinfo) ++{ ++ if (bb_vinfo->grouped_loads.length () <= 0) ++ { ++ if (dump_enabled_p ()) ++ { ++ dump_printf_loc (MSG_NOTE, vect_location, ++ "The number of grouped loads is 0.\n"); ++ } ++ return false; ++ } ++ bb_vinfo->grouped_loads.qsort (dr_group_cmp); ++ auto_vec<bool> visited (bb_vinfo->grouped_loads.length ()); ++ auto_vec<stmt_vec_info> grouped_loads_merge; ++ for (unsigned int i = 0; i < bb_vinfo->grouped_loads.length (); i++) ++ { ++ visited.safe_push (false); ++ } ++ while (1) ++ { ++ /* Find grouped loads which are required to merge. */ ++ auto_vec<stmt_vec_info> res; ++ vect_slp_grouped_load_find (bb_vinfo, visited, res); ++ if (res.is_empty ()) ++ { ++ break; ++ } ++ /* Merge the required grouped loads into one group. */ ++ grouped_loads_merge.safe_push (vect_slp_grouped_load_merge (res)); ++ } ++ if (grouped_loads_merge.length () == bb_vinfo->grouped_loads.length ()) ++ { ++ if (dump_enabled_p ()) ++ { ++ dump_printf_loc (MSG_NOTE, vect_location, ++ "No grouped loads need to be merged.\n"); ++ } ++ return false; ++ } ++ if (dump_enabled_p ()) ++ { ++ dump_printf_loc (MSG_NOTE, vect_location, ++ "Merging grouped loads successfully.\n"); ++ } ++ BB_VINFO_GROUPED_LOADS (bb_vinfo).release (); ++ for (unsigned int i = 0; i < grouped_loads_merge.length (); i++) ++ { ++ BB_VINFO_GROUPED_LOADS (bb_vinfo).safe_push (grouped_loads_mergei); ++ } ++ return true; ++} ++ ++/* Find the first elements of the grouped stores ++ which are required to transpose and merge. */ ++ ++static void ++vect_slp_grouped_store_find (bb_vec_info bb_vinfo, vec<bool> &visited, ++ vec<stmt_vec_info> &res) ++{ ++ stmt_vec_info first_element = NULL; ++ stmt_vec_info merge_first_element = NULL; ++ unsigned int k = 0; ++ FOR_EACH_VEC_ELT (bb_vinfo->grouped_stores, k, first_element) ++ { ++ if (visitedk) ++ { ++ continue; ++ } ++ /* Non-conforming grouped store should be grouped separately. */ ++ if (!STMT_VINFO_GROUPED_ACCESS (first_element) ++ || first_element->group_number == -1) ++ { ++ if (merge_first_element == NULL) ++ { ++ visitedk = true; ++ res.safe_push (first_element); ++ return; ++ } ++ } ++ if (first_element->group_number != -1 ++ && merge_first_element == NULL) ++ { ++ merge_first_element = first_element; ++ } ++ if (merge_first_element->group_number == first_element->group_number) ++ { ++ visitedk = true; ++ res.safe_push (first_element); ++ } ++ } ++} ++ ++/* Transpose and merge the grouped stores that are found from ++ vect_slp_grouped_store_find (). */ ++ ++static stmt_vec_info ++vect_slp_grouped_store_transform (vec<stmt_vec_info> &res) ++{ ++ stmt_vec_info stmt_info = res0; ++ if (res.length () == 1) ++ { ++ return stmt_info; ++ } ++ stmt_vec_info rearrange_first_element = stmt_info; ++ stmt_vec_info last_element = rearrange_first_element; ++ ++ unsigned int size = DR_GROUP_SIZE (rearrange_first_element); ++ unsigned int new_group_size = size * res.length (); ++ for (unsigned int i = 1; i < res.length (); i++) ++ { ++ /* Store the gap from the previous member of the group. If there is no ++ gap in the access, DR_GROUP_GAP is always 1. */ ++ DR_GROUP_GAP_TRANS (resi) = DR_GROUP_GAP (resi); ++ DR_GROUP_GAP (resi) = 1; ++ } ++ while (!res.is_empty ()) ++ { ++ stmt_info = res0; ++ res.ordered_remove (0); ++ if (DR_GROUP_NEXT_ELEMENT (stmt_info)) ++ { ++ res.safe_push (DR_GROUP_NEXT_ELEMENT (stmt_info)); ++ } ++ DR_GROUP_FIRST_ELEMENT (stmt_info) = rearrange_first_element; ++ DR_GROUP_NEXT_ELEMENT (last_element) = stmt_info; ++ DR_GROUP_SIZE_TRANS (stmt_info) = DR_GROUP_SIZE (stmt_info); ++ DR_GROUP_SIZE (stmt_info) = new_group_size; ++ last_element = stmt_info; ++ } ++ ++ DR_GROUP_SIZE (rearrange_first_element) = new_group_size; ++ DR_GROUP_SLP_TRANSPOSE (rearrange_first_element) = true; ++ DR_GROUP_NEXT_ELEMENT (last_element) = NULL; ++ return rearrange_first_element; ++} ++ ++/* Save the STMT_INFO in the grouped stores to BB_VINFO_SCALAR_STORES for ++ transposing back grouped stores. */ ++ ++static void ++get_scalar_stores (bb_vec_info bb_vinfo) ++{ ++ unsigned int k = 0; ++ stmt_vec_info first_element = NULL; ++ FOR_EACH_VEC_ELT (bb_vinfo->grouped_stores, k, first_element) ++ { ++ /* Filter the grouped store which is unnecessary for transposing. */ ++ if (!STMT_VINFO_GROUPED_ACCESS (first_element) ++ || first_element->group_number == -1) ++ { ++ continue; ++ } ++ vec<stmt_vec_info> tmp_scalar_store; ++ tmp_scalar_store.create (DR_GROUP_SIZE (first_element)); ++ for (stmt_vec_info stmt_info = first_element; stmt_info; ++ stmt_info = DR_GROUP_NEXT_ELEMENT (stmt_info)) ++ { ++ tmp_scalar_store.safe_push (stmt_info); ++ } ++ BB_VINFO_SCALAR_STORES (bb_vinfo).safe_push (tmp_scalar_store); ++ } ++} ++ ++/* Transpose and merge the grouped stores which have the same group number. ++ For example, for grouped stores (opa_0, opa_1, opa_2, opa_3): ++ opa_0: a00->a01->a02->a03 ++ opa_1: a10->a11->a12->a13 ++ opa_2: a20->a21->a22->a23 ++ opa_2: a30->a31->a32->a33 ++ we can probably get the merged grouped store: ++ opa: a00->a10->a20->a30 ++ ->a01->a11->a21->a31 ++ ->a02->a12->a22->a32 ++ ->a03->a13->a23->a33. */ ++ ++static bool ++vect_transform_slp_grouped_stores (bb_vec_info bb_vinfo) ++{ ++ if (bb_vinfo->grouped_stores.length () <= 0) ++ { ++ if (dump_enabled_p ()) ++ { ++ dump_printf_loc (MSG_NOTE, vect_location, ++ "The number of grouped stores is 0.\n"); ++ } ++ return false; ++ } ++ ++ bb_vinfo->grouped_stores.qsort (dr_group_cmp); ++ auto_vec<stmt_vec_info> grouped_stores_merge; ++ auto_vec<bool> visited (bb_vinfo->grouped_stores.length ()); ++ unsigned int i = 0; ++ for (i = 0; i < bb_vinfo->grouped_stores.length (); i++) ++ { ++ visited.safe_push (false); ++ } ++ ++ /* Get scalar stores for the following transposition recovery. */ ++ get_scalar_stores (bb_vinfo); ++ ++ while (1) ++ { ++ /* Find grouped stores which are required to transpose and merge. */ ++ auto_vec<stmt_vec_info> res; ++ vect_slp_grouped_store_find (bb_vinfo, visited, res); ++ if (res.is_empty ()) ++ { ++ break; ++ } ++ /* Transpose and merge the required grouped stores into one group. */ ++ grouped_stores_merge.safe_push (vect_slp_grouped_store_transform (res)); ++ } ++ ++ BB_VINFO_GROUPED_STORES (bb_vinfo).release (); ++ for (i = 0; i < grouped_stores_merge.length (); i++) ++ { ++ BB_VINFO_GROUPED_STORES (bb_vinfo).safe_push (grouped_stores_mergei); ++ } ++ ++ if (dump_enabled_p ()) ++ { ++ dump_printf_loc (MSG_NOTE, vect_location, ++ "Transposing grouped stores successfully.\n"); ++ } ++ return true; ++} ++ ++/* A helpful function of vect_transform_back_slp_grouped_stores (). */ ++ ++static auto_vec<stmt_vec_info> ++vect_transform_back_slp_grouped_store (bb_vec_info bb_vinfo, ++ stmt_vec_info first_stmt_info) ++{ ++ auto_vec<stmt_vec_info> grouped_stores_split; ++ for (unsigned int i = 0; i < bb_vinfo->scalar_stores.length (); i++) ++ { ++ vec<stmt_vec_info> scalar_tmp = bb_vinfo->scalar_storesi; ++ if (scalar_tmp.length () > 1 ++ && scalar_tmp0->group_number != first_stmt_info->group_number) ++ { ++ continue; ++ } ++ stmt_vec_info cur_stmt_info = NULL; ++ stmt_vec_info cur_first_stmt_info = NULL; ++ stmt_vec_info last_stmt_info = NULL; ++ unsigned int k = 0; ++ FOR_EACH_VEC_ELT (scalar_tmp, k, cur_stmt_info) ++ { ++ if (k == 0) ++ { ++ cur_first_stmt_info = cur_stmt_info; ++ last_stmt_info = cur_stmt_info; ++ } ++ DR_GROUP_FIRST_ELEMENT (cur_stmt_info) = cur_first_stmt_info; ++ DR_GROUP_NEXT_ELEMENT (last_stmt_info) = cur_stmt_info; ++ last_stmt_info = cur_stmt_info; ++ } ++ DR_GROUP_SIZE (cur_first_stmt_info) = k; ++ DR_GROUP_NEXT_ELEMENT (last_stmt_info) = NULL; ++ if (first_stmt_info != cur_first_stmt_info) ++ { ++ DR_GROUP_GAP (cur_first_stmt_info) ++ = DR_GROUP_GAP_TRANS (cur_first_stmt_info); ++ DR_GROUP_SLP_TRANSPOSE (cur_first_stmt_info) = false; ++ DR_GROUP_NUMBER (cur_first_stmt_info) = -1; ++ } ++ grouped_stores_split.safe_push (cur_first_stmt_info); ++ } ++ return grouped_stores_split; ++} ++ ++/* Transform the grouped store back. */ ++ ++void ++vect_transform_back_slp_grouped_stores (bb_vec_info bb_vinfo, ++ stmt_vec_info first_stmt_info) ++{ ++ if (first_stmt_info->group_number == -1) ++ { ++ return; ++ } ++ /* Transform back. */ ++ auto_vec<stmt_vec_info> grouped_stores_split ++ = vect_transform_back_slp_grouped_store (bb_vinfo, first_stmt_info); ++ ++ /* Add the remaining grouped stores to grouped_stores_split. */ ++ stmt_vec_info first_element = NULL; ++ unsigned int i = 0; ++ FOR_EACH_VEC_ELT (bb_vinfo->grouped_stores, i, first_element) ++ { ++ if (first_element->group_number != first_stmt_info->group_number) ++ { ++ grouped_stores_split.safe_push (first_element); ++ } ++ } ++ DR_GROUP_SLP_TRANSPOSE (first_stmt_info) = false; ++ DR_GROUP_NUMBER (first_stmt_info) = -1; ++ BB_VINFO_GROUPED_STORES (bb_vinfo).release (); ++ for (i = 0; i < grouped_stores_split.length (); i++) ++ { ++ BB_VINFO_GROUPED_STORES (bb_vinfo).safe_push (grouped_stores_spliti); ++ } ++} ++ ++/* Function check_for_slp_vectype ++ ++ Restriction for grouped stores by checking their vectype. ++ If the vectype of the grouped store is changed, it need transform back. ++ If all grouped stores need to be transformed back, return FALSE. */ ++ ++static bool ++check_for_slp_vectype (bb_vec_info bb_vinfo) ++{ ++ if (dump_file) ++ fprintf (dump_file, "check_for_slp_vectype: enter\n"); ++ stmt_vec_info first_element = NULL; ++ unsigned int i = 0; ++ int count = 0; ++ auto_vec<stmt_vec_info> grouped_stores_check; ++ FOR_EACH_VEC_ELT (bb_vinfo->grouped_stores, i, first_element) ++ { ++ grouped_stores_check.safe_push (first_element); ++ } ++ FOR_EACH_VEC_ELT (grouped_stores_check, i, first_element) ++ { ++ if (STMT_VINFO_GROUPED_ACCESS (first_element) ++ && first_element->group_number != -1) ++ { ++ unsigned int group_size_b ++ = DR_GROUP_SIZE_TRANS (first_element); ++ tree vectype = STMT_VINFO_VECTYPE (first_element); ++ gimple *stmt = STMT_VINFO_STMT (first_element); ++ tree lhs = gimple_get_lhs (stmt); ++ tree type = TREE_TYPE (lhs); ++#if 0 ++ if (!vectype && !type) ++ { ++ if (dump_file) ++ fprintf (dump_file, "check_for_slp_vectype: no vectype/stmt type\n"); ++ continue; ++ } ++ ++ if (!vectype) ++ vectype = type; ++#endif ++ if (dump_file) ++ { ++ fprintf (dump_file, "check_for_slp_vectype: %p\n", first_element); ++ print_gimple_stmt (dump_file, stmt, 0); ++ fprintf (dump_file, "check_for_slp_vectype: vectype="); ++ if (vectype) ++ print_generic_expr (dump_file, vectype); ++ fprintf (dump_file, "\n"); ++ } ++#if 0 ++ if (!vectype || !VECTOR_TYPE_P (vectype)) ++ continue; ++#endif ++ poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype); ++ if (nunits.to_constant () > group_size_b) ++ { ++ count++; ++ /* If the vectype is changed, this grouped store need ++ to be transformed back. */ ++ vect_transform_back_slp_grouped_stores (bb_vinfo, first_element); ++ if (dump_enabled_p ()) ++ { ++ dump_printf_loc (MSG_NOTE, vect_location, ++ "No supported: only supported for" ++ " group_size geq than nunits.\n"); ++ } ++ } ++ } ++ } ++ if (count == BB_VINFO_TRANS_GROUPS (bb_vinfo)) ++ { ++ return false; ++ } ++ if (dump_file) ++ fprintf (dump_file, "check_for_slp_vectype: True\n"); ++ return true; ++} ++ ++/* Function check_for_dr_alignment ++ ++ Check the alignment of the slp instance loads. ++ Return FALSE if a load cannot be vectorized. */ ++ ++static bool ++check_for_dr_alignment (bb_vec_info bb_vinfo, slp_instance instance) ++{ ++ slp_tree node = NULL; ++ unsigned int i = 0; ++ FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (instance), i, node) ++ { ++ stmt_vec_info first_stmt_info = SLP_TREE_SCALAR_STMTS (node)0; ++ dr_vec_info *first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info); ++ if (dump_file) ++ { ++ fprintf (dump_file, "check_for_dr_alignment: %p\n", first_stmt_info); ++ ++ gimple *stmt = STMT_VINFO_STMT (first_stmt_info); ++ tree lhs = gimple_get_lhs (stmt); ++ tree type = TREE_TYPE (lhs); ++ print_gimple_stmt (dump_file, stmt, 0); ++ } ++ ++ tree vectype = STMT_VINFO_VECTYPE (first_stmt_info); ++ int malign = dr_misalignment (first_dr_info, vectype); ++ enum dr_alignment_support supportable_dr_alignment ++ = vect_supportable_dr_alignment (bb_vinfo, first_dr_info, ++ vectype, malign); ++ if (supportable_dr_alignment == dr_explicit_realign_optimized ++ || supportable_dr_alignment == dr_explicit_realign) ++ { ++ return false; + } + } +- else if (kind == slp_inst_kind_reduc_chain) ++ return true; ++} ++ ++/* Initialize slp_transpose flag before transposing. */ ++ ++static void ++init_stmt_info_slp_transpose (bb_vec_info bb_vinfo) ++{ ++ stmt_vec_info first_element = NULL; ++ unsigned int k = 0; ++ FOR_EACH_VEC_ELT (bb_vinfo->grouped_stores, k, first_element) + { +- /* Collect the reduction stmts and store them in scalar_stmts. */ +- scalar_stmts.create (REDUC_GROUP_SIZE (stmt_info)); +- while (next_info) ++ if (STMT_VINFO_GROUPED_ACCESS (first_element)) + { +- scalar_stmts.quick_push (vect_stmt_to_vectorize (next_info)); +- next_info = REDUC_GROUP_NEXT_ELEMENT (next_info); ++ DR_GROUP_SLP_TRANSPOSE (first_element) = false; + } +- /* Mark the first element of the reduction chain as reduction to properly +- transform the node. In the reduction analysis phase only the last +- element of the chain is marked as reduction. */ +- STMT_VINFO_DEF_TYPE (stmt_info) +- = STMT_VINFO_DEF_TYPE (scalar_stmts.last ()); +- STMT_VINFO_REDUC_DEF (vect_orig_stmt (stmt_info)) +- = STMT_VINFO_REDUC_DEF (vect_orig_stmt (scalar_stmts.last ())); + } +- else if (kind == slp_inst_kind_ctor) ++ FOR_EACH_VEC_ELT (bb_vinfo->grouped_loads, k, first_element) + { +- tree rhs = gimple_assign_rhs1 (stmt_info->stmt); +- tree val; +- scalar_stmts.create (CONSTRUCTOR_NELTS (rhs)); +- FOR_EACH_CONSTRUCTOR_VALUE (CONSTRUCTOR_ELTS (rhs), i, val) ++ if (STMT_VINFO_GROUPED_ACCESS (first_element)) + { +- stmt_vec_info def_info = vinfo->lookup_def (val); +- def_info = vect_stmt_to_vectorize (def_info); +- scalar_stmts.quick_push (def_info); ++ DR_GROUP_SLP_TRANSPOSE (first_element) = false; + } +- if (dump_enabled_p ()) +- dump_printf_loc (MSG_NOTE, vect_location, +- "Analyzing vectorizable constructor: %G\n", +- stmt_info->stmt); + } +- else if (kind == slp_inst_kind_reduc_group) ++} ++ ++/* Analyze and transpose the stmts before building the SLP tree. */ ++ ++static bool ++vect_analyze_transpose (bb_vec_info bb_vinfo) ++{ ++ DUMP_VECT_SCOPE ("vect_analyze_transpose"); ++ ++ if (!vect_may_transpose (bb_vinfo)) + { +- /* Collect reduction statements. */ +- const vec<stmt_vec_info> &reductions +- = as_a <loop_vec_info> (vinfo)->reductions; +- scalar_stmts.create (reductions.length ()); +- for (i = 0; reductions.iterate (i, &next_info); i++) +- if ((STMT_VINFO_RELEVANT_P (next_info) +- || STMT_VINFO_LIVE_P (next_info)) +- /* ??? Make sure we didn't skip a conversion around a reduction +- path. In that case we'd have to reverse engineer that conversion +- stmt following the chain using reduc_idx and from the PHI +- using reduc_def. */ +- && STMT_VINFO_DEF_TYPE (next_info) == vect_reduction_def) +- scalar_stmts.quick_push (next_info); +- /* If less than two were relevant/live there's nothing to SLP. */ +- if (scalar_stmts.length () < 2) +- return false; ++ return false; + } +- else +- gcc_unreachable (); + +- vec<stmt_vec_info> roots = vNULL; +- if (kind == slp_inst_kind_ctor) ++ /* For basic block SLP, try to merge the grouped stores and loads ++ into one group. */ ++ init_stmt_info_slp_transpose (bb_vinfo); ++ if (vect_transform_slp_grouped_stores (bb_vinfo) ++ && vect_merge_slp_grouped_loads (bb_vinfo)) + { +- roots.create (1); +- roots.quick_push (stmt_info); ++ if (dump_enabled_p ()) ++ { ++ dump_printf_loc (MSG_NOTE, vect_location, ++ "Analysis succeeded with SLP transposed.\n"); ++ } ++ return true; + } +- /* Build the tree for the SLP instance. */ +- bool res = vect_build_slp_instance (vinfo, kind, scalar_stmts, +- roots, +- max_tree_size, limit, bst_map, +- kind == slp_inst_kind_store +- ? stmt_info : NULL); +- if (!res) +- roots.release (); +- +- /* ??? If this is slp_inst_kind_store and the above succeeded here's +- where we should do store group splitting. */ +- +- return res; ++ if (dump_enabled_p ()) ++ { ++ dump_printf_loc (MSG_NOTE, vect_location, ++ "Analysis failed with SLP transposed.\n"); ++ } ++ return false; + } + + /* Check if there are stmts in the loop can be vectorized using SLP. Build SLP +@@ -4963,7 +5932,7 @@ vect_slp_analyze_operations (vec_info *vinfo) + /* Check we can vectorize the reduction. */ + || (SLP_INSTANCE_KIND (instance) == slp_inst_kind_bb_reduc + && !vectorizable_bb_reduc_epilogue (instance, &cost_vec))) +- { ++ { + slp_tree node = SLP_INSTANCE_TREE (instance); + stmt_vec_info stmt_info; + if (!SLP_INSTANCE_ROOT_STMTS (instance).is_empty ()) +@@ -4975,7 +5944,7 @@ vect_slp_analyze_operations (vec_info *vinfo) + "removing SLP instance operations starting from: %G", + stmt_info->stmt); + vect_free_slp_instance (instance); +- vinfo->slp_instances.ordered_remove (i); ++ vinfo->slp_instances.ordered_remove (i); + cost_vec.release (); + while (!visited_vec.is_empty ()) + visited.remove (visited_vec.pop ()); +@@ -5204,7 +6173,7 @@ vect_bb_slp_scalar_cost (vec_info *vinfo, + gimple *orig_stmt = orig_stmt_info->stmt; + + /* If there is a non-vectorized use of the defs then the scalar +- stmt is kept live in which case we do not account it or any ++ stmt is kept live in which case we do not account it or any + required defs in the SLP children in the scalar cost. This + way we make the vectorization more costly when compared to + the scalar cost. */ +@@ -5481,7 +6450,11 @@ vect_bb_vectorization_profitable_p (bb_vec_info bb_vinfo, + + vec_outside_cost = vec_prologue_cost + vec_epilogue_cost; + +- if (dump_enabled_p ()) ++ BB_VINFO_VEC_INSIDE_COST (bb_vinfo) = vec_inside_cost; ++ BB_VINFO_VEC_OUTSIDE_COST (bb_vinfo) = vec_outside_cost; ++ BB_VINFO_SCALAR_COST (bb_vinfo) = scalar_cost; ++ ++ if (!unlimited_cost_model (NULL) && dump_enabled_p ()) + { + dump_printf_loc (MSG_NOTE, vect_location, + "Cost model analysis for part in loop %d:\n", sl); +@@ -5819,7 +6792,7 @@ vect_slp_analyze_bb_1 (bb_vec_info bb_vinfo, int n_stmts, bool &fatal, + if (!vect_analyze_data_refs (bb_vinfo, &min_vf, NULL)) + { + if (dump_enabled_p ()) +- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, ++ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, + "not vectorized: unhandled data-ref in basic " + "block.\n"); + return false; +@@ -5854,6 +6827,22 @@ vect_slp_analyze_bb_1 (bb_vec_info bb_vinfo, int n_stmts, bool &fatal, + + vect_pattern_recog (bb_vinfo); + ++ /* Transpose grouped stores and loads for better vectorizable version. */ ++ if (bb_vinfo->transposed) ++ { ++ if (!vect_analyze_transpose (bb_vinfo)) ++ { ++ if (dump_enabled_p ()) ++ { ++ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, ++ "not vectorized: unhandled slp transposed in " ++ "basic block.\n"); ++ } ++ return false; ++ } ++ } ++ bb_vinfo->before_slp = true; ++ + /* Update store groups from pattern processing. */ + vect_fixup_store_groups_with_patterns (bb_vinfo); + +@@ -5872,6 +6861,20 @@ vect_slp_analyze_bb_1 (bb_vec_info bb_vinfo, int n_stmts, bool &fatal, + return false; + } + ++ /* Check if the vectype is suitable for SLP transposed. */ ++ if (bb_vinfo->transposed && !check_for_slp_vectype (bb_vinfo)) ++ { ++ if (dump_enabled_p ()) ++ { ++ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, ++ "Failed to SLP transposed in the basic block.\n"); ++ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, ++ "not vectorized: vectype is not suitable for " ++ "SLP transposed in basic block.\n"); ++ } ++ return false; ++ } ++ + /* Optimize permutations. */ + vect_optimize_slp (bb_vinfo); + +@@ -5914,6 +6917,27 @@ vect_slp_analyze_bb_1 (bb_vec_info bb_vinfo, int n_stmts, bool &fatal, + if (! BB_VINFO_SLP_INSTANCES (bb_vinfo).length ()) + return false; + ++ /* Check if the alignment is suitable for SLP transposed. */ ++ if (bb_vinfo->transposed) ++ { ++ for (i = 0; BB_VINFO_SLP_INSTANCES (bb_vinfo).iterate (i, &instance); i++) ++ { ++ if (!check_for_dr_alignment (bb_vinfo, instance)) ++ { ++ if (dump_enabled_p ()) ++ { ++ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, ++ "Failed to SLP transposed in the basic " ++ "block.\n"); ++ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, ++ "not vectorized: alignment is not suitable " ++ "for SLP transposed in basic block.\n"); ++ } ++ return false; ++ } ++ } ++ } ++ + if (!vect_slp_analyze_operations (bb_vinfo)) + { + if (dump_enabled_p ()) +@@ -5923,7 +6947,88 @@ vect_slp_analyze_bb_1 (bb_vec_info bb_vinfo, int n_stmts, bool &fatal, + } + + vect_bb_partition_graph (bb_vinfo); ++ return true; ++} ++ ++static bool ++may_new_transpose_bbvinfo (bb_vec_info bb_vinfo_ori, bool res_ori, ++ loop_p orig_loop) ++{ ++ /* If the flag is false or the slp analysis is broken before ++ vect_analyze_slp, we don't try to analyze the transposed SLP version. */ ++ if (!flag_tree_slp_transpose_vectorize ++ || !BB_VINFO_BEFORE_SLP (bb_vinfo_ori)) ++ { ++ return false; ++ } ++ ++ /* If the original bb_vinfo can't be vectorized, try to new a bb_vinfo ++ of the transposed version. */ ++ if (!res_ori) ++ { ++ return true; ++ } ++ ++ /* Caculate the cost of the original bb_vinfo. */ ++ if (unlimited_cost_model (NULL)) ++ { ++ vec<slp_instance> &instances = BB_VINFO_SLP_INSTANCES (bb_vinfo_ori); ++ vect_bb_vectorization_profitable_p (bb_vinfo_ori, instances, orig_loop); ++ } ++ /* If the vec cost and scalar cost are not much difference (here we set the ++ threshold to 4), we try to new a bb_vinfo of the transposed version. */ ++ if (BB_VINFO_SCALAR_COST (bb_vinfo_ori) ++ < 4 * (BB_VINFO_VEC_INSIDE_COST (bb_vinfo_ori) ++ + BB_VINFO_VEC_OUTSIDE_COST (bb_vinfo_ori))) ++ { ++ return true; ++ } ++ return false; ++} + ++static bool ++may_choose_transpose_bbvinfo (bb_vec_info bb_vinfo_trans, bool res_trans, ++ bb_vec_info bb_vinfo_ori, bool res_ori, ++ loop_p orig_loop) ++{ ++ /* The original bb_vinfo is chosen if the transposed bb_vinfo ++ can't be vectorized. */ ++ if (!res_trans) ++ { ++ return false; ++ } ++ /* Caculate the cost of the transposed bb_vinfo. */ ++ if (unlimited_cost_model (NULL)) ++ { ++ vec<slp_instance> &instances = BB_VINFO_SLP_INSTANCES (bb_vinfo_trans); ++ vect_bb_vectorization_profitable_p (bb_vinfo_trans, instances, ++ orig_loop); ++ } ++ int diff_bb_cost = -1; ++ int diff_bb_cost_trans = -1; ++ if (res_ori) ++ { ++ diff_bb_cost = BB_VINFO_SCALAR_COST (bb_vinfo_ori) ++ - BB_VINFO_VEC_INSIDE_COST (bb_vinfo_ori) ++ - BB_VINFO_VEC_OUTSIDE_COST (bb_vinfo_ori); ++ } ++ if (res_trans) ++ { ++ diff_bb_cost_trans = BB_VINFO_SCALAR_COST (bb_vinfo_trans) ++ - BB_VINFO_VEC_INSIDE_COST (bb_vinfo_trans) ++ - BB_VINFO_VEC_OUTSIDE_COST (bb_vinfo_trans); ++ } ++ /* The original bb_vinfo is chosen when one of the following conditions ++ is satisfied as follows: ++ 1) The cost of original version is better transposed version. ++ 2) The vec cost is similar to scalar cost in the transposed version. */ ++ if ((res_ori && res_trans && diff_bb_cost >= diff_bb_cost_trans) ++ || (res_trans && BB_VINFO_SCALAR_COST (bb_vinfo_trans) ++ <= (BB_VINFO_VEC_INSIDE_COST (bb_vinfo_trans) ++ + BB_VINFO_VEC_OUTSIDE_COST (bb_vinfo_trans)))) ++ { ++ return false; ++ } + return true; + } + +@@ -5937,6 +7042,7 @@ vect_slp_region (vec<basic_block> bbs, vec<data_reference_p> datarefs, + loop_p orig_loop) + { + bb_vec_info bb_vinfo; ++ bb_vec_info bb_vinfo_trans = NULL; + auto_vector_modes vector_modes; + + /* Autodetect first vector size we try. */ +@@ -5951,6 +7057,10 @@ vect_slp_region (vec<basic_block> bbs, vec<data_reference_p> datarefs, + { + bool vectorized = false; + bool fatal = false; ++ bool res_bb_vinfo_ori = false; ++ bool res_bb_vinfo_trans = false; ++ ++ /* New a bb_vinfo of the original version. */ + bb_vinfo = new _bb_vec_info (bbs, &shared); + + bool first_time_p = shared.datarefs.is_empty (); +@@ -5960,8 +7070,113 @@ vect_slp_region (vec<basic_block> bbs, vec<data_reference_p> datarefs, + else + bb_vinfo->shared->check_datarefs (); + bb_vinfo->vector_mode = next_vector_mode; ++ bb_vinfo->transposed = false; ++ bb_vinfo->before_slp = false; ++ ++ res_bb_vinfo_ori = vect_slp_analyze_bb_1 (bb_vinfo, n_stmts, fatal, ++ dataref_groups); ++ auto_vec<slp_instance> profitable_subgraphs; ++ auto_vec<slp_instance> profitable_subgraphs_trans; ++ for (slp_instance instance : BB_VINFO_SLP_INSTANCES (bb_vinfo)) ++ { ++ if (instance->subgraph_entries.is_empty ()) ++ continue; ++ ++ vect_location = instance->location (); ++ if (!unlimited_cost_model (NULL) ++ && !vect_bb_vectorization_profitable_p ++ (bb_vinfo, instance->subgraph_entries, orig_loop)) ++ { ++ if (dump_enabled_p ()) ++ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, ++ "not vectorized: vectorization is not " ++ "profitable.\n"); ++ continue; ++ } ++ if (res_bb_vinfo_ori) ++ { ++ if (!dbg_cnt (vect_slp)) ++ continue; ++ profitable_subgraphs.safe_push (instance); ++ } ++ } ++ ++ /* Analyze and new a transposed bb_vinfo. */ ++ if (may_new_transpose_bbvinfo (bb_vinfo, res_bb_vinfo_ori, orig_loop)) ++ { ++ bool fatal_trans = false; ++ bb_vinfo_trans ++ = new _bb_vec_info (bbs, &shared); ++ bool first_time_p = shared.datarefs.is_empty (); ++ BB_VINFO_DATAREFS (bb_vinfo_trans) = datarefs; ++ if (first_time_p) ++ { ++ bb_vinfo_trans->shared->save_datarefs (); ++ } ++ else ++ { ++ bb_vinfo_trans->shared->check_datarefs (); ++ } ++ bb_vinfo_trans->vector_mode = next_vector_mode; ++ bb_vinfo_trans->transposed = true; ++ bb_vinfo_trans->before_slp = false; ++ ++ res_bb_vinfo_trans ++ = vect_slp_analyze_bb_1 (bb_vinfo_trans, n_stmts, fatal_trans, ++ dataref_groups); ++ if (res_bb_vinfo_trans) ++ { ++ for (slp_instance instance : BB_VINFO_SLP_INSTANCES (bb_vinfo_trans)) ++ { ++ if (instance->subgraph_entries.is_empty ()) ++ continue; ++ ++ vect_location = instance->location (); ++ if (!unlimited_cost_model (NULL) ++ && !vect_bb_vectorization_profitable_p ++ (bb_vinfo_trans, instance->subgraph_entries, orig_loop)) ++ { ++ if (dump_enabled_p ()) ++ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, ++ "not vectorized: transpose vectorization is not " ++ "profitable.\n"); ++ res_bb_vinfo_trans = false; ++ continue; ++ } ++ if (res_bb_vinfo_trans) ++ { ++ if (!dbg_cnt (vect_slp)) ++ continue; ++ profitable_subgraphs_trans.safe_push (instance); ++ } ++ } ++ } ++ if (may_choose_transpose_bbvinfo (bb_vinfo_trans, ++ res_bb_vinfo_trans, ++ bb_vinfo, res_bb_vinfo_ori, ++ orig_loop)) ++ { ++ bb_vinfo = bb_vinfo_trans; ++ fatal = fatal_trans; ++ if (dump_enabled_p ()) ++ { ++ dump_printf_loc (MSG_NOTE, vect_location, ++ "Basic block part vectorized " ++ "using transposed version.\n"); ++ } ++ } ++ else ++ { ++ if (dump_enabled_p ()) ++ { ++ dump_printf_loc (MSG_NOTE, vect_location, ++ "Basic block part vectorized " ++ "\n"); ++ } ++ } ++ } + +- if (vect_slp_analyze_bb_1 (bb_vinfo, n_stmts, fatal, dataref_groups)) ++ if (res_bb_vinfo_ori || res_bb_vinfo_trans) + { + if (dump_enabled_p ()) + { +@@ -5972,90 +7187,129 @@ vect_slp_region (vec<basic_block> bbs, vec<data_reference_p> datarefs, + } + + bb_vinfo->shared->check_datarefs (); +- +- auto_vec<slp_instance> profitable_subgraphs; +- for (slp_instance instance : BB_VINFO_SLP_INSTANCES (bb_vinfo)) ++ if (!res_bb_vinfo_trans) + { +- if (instance->subgraph_entries.is_empty ()) +- continue; +- +- vect_location = instance->location (); +- if (!unlimited_cost_model (NULL) +- && !vect_bb_vectorization_profitable_p +- (bb_vinfo, instance->subgraph_entries, orig_loop)) ++ /* When we're vectorizing an if-converted loop body make sure ++ we vectorized all if-converted code. */ ++ if (!profitable_subgraphs.is_empty () ++ && orig_loop) + { +- if (dump_enabled_p ()) +- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, +- "not vectorized: vectorization is not " +- "profitable.\n"); +- continue; ++ gcc_assert (bb_vinfo->bbs.length () == 1); ++ for (gimple_stmt_iterator gsi = gsi_start_bb (bb_vinfo->bbs0); ++ !gsi_end_p (gsi); gsi_next (&gsi)) ++ { ++ /* The costing above left us with DCEable vectorized scalar ++ stmts having the visited flag set on profitable ++ subgraphs. Do the delayed clearing of the flag here. */ ++ if (gimple_visited_p (gsi_stmt (gsi))) ++ { ++ gimple_set_visited (gsi_stmt (gsi), false); ++ continue; ++ } ++ if (flag_vect_cost_model == VECT_COST_MODEL_UNLIMITED) ++ continue; ++ ++ if (gassign *ass = dyn_cast <gassign *> (gsi_stmt (gsi))) ++ if (gimple_assign_rhs_code (ass) == COND_EXPR) ++ { ++ if (!profitable_subgraphs.is_empty () ++ && dump_enabled_p ()) ++ dump_printf_loc (MSG_NOTE, vect_location, ++ "not profitable because of " ++ "unprofitable if-converted scalar " ++ "code\n"); ++ profitable_subgraphs.truncate (0); ++ } ++ } + } + +- if (!dbg_cnt (vect_slp)) +- continue; ++ /* Finally schedule the profitable subgraphs. */ ++ for (slp_instance instance : profitable_subgraphs) ++ { ++ if (!vectorized && dump_enabled_p ()) ++ dump_printf_loc (MSG_NOTE, vect_location, ++ "Basic block will be vectorized " ++ "using SLP\n"); ++ vectorized = true; + +- profitable_subgraphs.safe_push (instance); +- } ++ vect_schedule_slp (bb_vinfo, instance->subgraph_entries); + +- /* When we're vectorizing an if-converted loop body make sure +- we vectorized all if-converted code. */ +- if (!profitable_subgraphs.is_empty () +- && orig_loop) ++ unsigned HOST_WIDE_INT bytes; ++ if (dump_enabled_p ()) ++ { ++ if (GET_MODE_SIZE ++ (bb_vinfo->vector_mode).is_constant (&bytes)) ++ dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location, ++ "basic block part vectorized using %wu " ++ "byte vectors\n", bytes); ++ else ++ dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location, ++ "basic block part vectorized using " ++ "variable length vectors\n"); ++ } ++ } ++ } ++ else + { +- gcc_assert (bb_vinfo->bbs.length () == 1); +- for (gimple_stmt_iterator gsi = gsi_start_bb (bb_vinfo->bbs0); +- !gsi_end_p (gsi); gsi_next (&gsi)) ++ if (!profitable_subgraphs_trans.is_empty () ++ && orig_loop) + { +- /* The costing above left us with DCEable vectorized scalar +- stmts having the visited flag set on profitable +- subgraphs. Do the delayed clearing of the flag here. */ +- if (gimple_visited_p (gsi_stmt (gsi))) ++ gcc_assert (bb_vinfo->bbs.length () == 1); ++ for (gimple_stmt_iterator gsi = gsi_start_bb (bb_vinfo->bbs0); ++ !gsi_end_p (gsi); gsi_next (&gsi)) + { +- gimple_set_visited (gsi_stmt (gsi), false); +- continue; ++ /* The costing above left us with DCEable vectorized scalar ++ stmts having the visited flag set on profitable ++ subgraphs. Do the delayed clearing of the flag here. */ ++ if (gimple_visited_p (gsi_stmt (gsi))) ++ { ++ gimple_set_visited (gsi_stmt (gsi), false); ++ continue; ++ } ++ if (flag_vect_cost_model == VECT_COST_MODEL_UNLIMITED) ++ continue; ++ ++ if (gassign *ass = dyn_cast <gassign *> (gsi_stmt (gsi))) ++ if (gimple_assign_rhs_code (ass) == COND_EXPR) ++ { ++ if (!profitable_subgraphs_trans.is_empty () ++ && dump_enabled_p ()) ++ dump_printf_loc (MSG_NOTE, vect_location, ++ "not profitable because of " ++ "unprofitable if-converted scalar " ++ "code\n"); ++ profitable_subgraphs_trans.truncate (0); ++ } + } +- if (flag_vect_cost_model == VECT_COST_MODEL_UNLIMITED) +- continue; +- +- if (gassign *ass = dyn_cast <gassign *> (gsi_stmt (gsi))) +- if (gimple_assign_rhs_code (ass) == COND_EXPR) +- { +- if (!profitable_subgraphs.is_empty () +- && dump_enabled_p ()) +- dump_printf_loc (MSG_NOTE, vect_location, +- "not profitable because of " +- "unprofitable if-converted scalar " +- "code\n"); +- profitable_subgraphs.truncate (0); +- } + } +- } + +- /* Finally schedule the profitable subgraphs. */ +- for (slp_instance instance : profitable_subgraphs) +- { +- if (!vectorized && dump_enabled_p ()) +- dump_printf_loc (MSG_NOTE, vect_location, +- "Basic block will be vectorized " +- "using SLP\n"); +- vectorized = true; ++ /* Finally schedule the profitable subgraphs. */ ++ for (slp_instance instance : profitable_subgraphs_trans) ++ { ++ if (!vectorized && dump_enabled_p ()) ++ dump_printf_loc (MSG_NOTE, vect_location, ++ "Basic block will be vectorized " ++ "using SLP\n"); ++ vectorized = true; + +- vect_schedule_slp (bb_vinfo, instance->subgraph_entries); ++ vect_schedule_slp (bb_vinfo, instance->subgraph_entries); + +- unsigned HOST_WIDE_INT bytes; +- if (dump_enabled_p ()) +- { +- if (GET_MODE_SIZE +- (bb_vinfo->vector_mode).is_constant (&bytes)) +- dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location, +- "basic block part vectorized using %wu " +- "byte vectors\n", bytes); +- else +- dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location, +- "basic block part vectorized using " +- "variable length vectors\n"); ++ unsigned HOST_WIDE_INT bytes; ++ if (dump_enabled_p ()) ++ { ++ if (GET_MODE_SIZE ++ (bb_vinfo->vector_mode).is_constant (&bytes)) ++ dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location, ++ "basic block part vectorized using %wu " ++ "byte vectors\n", bytes); ++ else ++ dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location, ++ "basic block part vectorized using " ++ "variable length vectors\n"); ++ } + } + } ++ + } + else + { +@@ -6081,6 +7335,10 @@ vect_slp_region (vec<basic_block> bbs, vec<data_reference_p> datarefs, + } + + delete bb_vinfo; ++ if (bb_vinfo_trans) ++ { ++ bb_vinfo_trans = NULL; ++ } + + if (mode_i < vector_modes.length () + && VECTOR_MODE_P (autodetected_vector_mode) +@@ -7244,10 +8502,17 @@ vect_schedule_slp_node (vec_info *vinfo, + ready early, vectorized stores go before the last scalar + stmt which is where all uses are ready. */ + stmt_vec_info last_stmt_info = NULL; +- if (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info))) +- last_stmt_info = vect_find_first_scalar_stmt_in_slp (node); +- else /* DR_IS_WRITE */ +- last_stmt_info = vect_find_last_scalar_stmt_in_slp (node); ++ ++ if (DR_GROUP_FIRST_ELEMENT (stmt_info) ++ && DR_GROUP_SLP_TRANSPOSE (DR_GROUP_FIRST_ELEMENT (stmt_info))) ++ last_stmt_info = vect_find_last_scalar_stmt_in_slp (node); ++ else ++ { ++ if (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info))) ++ last_stmt_info = vect_find_first_scalar_stmt_in_slp (node); ++ else /* DR_IS_WRITE */ ++ last_stmt_info = vect_find_last_scalar_stmt_in_slp (node); ++ } + si = gsi_for_stmt (last_stmt_info->stmt); + } + else if ((STMT_VINFO_TYPE (stmt_info) == cycle_phi_info_type +diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc +index 349200411..3099f6743 100644 +--- a/gcc/tree-vect-stmts.cc ++++ b/gcc/tree-vect-stmts.cc +@@ -1369,10 +1369,10 @@ vect_get_load_cost (vec_info *, stmt_vec_info stmt_info, int ncopies, + + static void + vect_init_vector_1 (vec_info *vinfo, stmt_vec_info stmt_vinfo, gimple *new_stmt, +- gimple_stmt_iterator *gsi) ++ gimple_stmt_iterator *gsi, bool transpose=false) + { + if (gsi) +- vect_finish_stmt_generation (vinfo, stmt_vinfo, new_stmt, gsi); ++ vect_finish_stmt_generation (vinfo, stmt_vinfo, new_stmt, gsi, transpose); + else + vinfo->insert_on_entry (stmt_vinfo, new_stmt); + +@@ -1393,7 +1393,7 @@ vect_init_vector_1 (vec_info *vinfo, stmt_vec_info stmt_vinfo, gimple *new_stmt, + + tree + vect_init_vector (vec_info *vinfo, stmt_vec_info stmt_info, tree val, tree type, +- gimple_stmt_iterator *gsi) ++ gimple_stmt_iterator *gsi, bool transpose) + { + gimple *init_stmt; + tree new_temp; +@@ -1418,7 +1418,7 @@ vect_init_vector (vec_info *vinfo, stmt_vec_info stmt_info, tree val, tree type, + new_temp = make_ssa_name (TREE_TYPE (type)); + init_stmt = gimple_build_assign (new_temp, COND_EXPR, + val, true_val, false_val); +- vect_init_vector_1 (vinfo, stmt_info, init_stmt, gsi); ++ vect_init_vector_1 (vinfo, stmt_info, init_stmt, gsi, transpose); + val = new_temp; + } + } +@@ -1437,7 +1437,7 @@ vect_init_vector (vec_info *vinfo, stmt_vec_info stmt_info, tree val, tree type, + { + init_stmt = gsi_stmt (gsi2); + gsi_remove (&gsi2, false); +- vect_init_vector_1 (vinfo, stmt_info, init_stmt, gsi); ++ vect_init_vector_1 (vinfo, stmt_info, init_stmt, gsi, transpose); + } + } + } +@@ -1446,7 +1446,7 @@ vect_init_vector (vec_info *vinfo, stmt_vec_info stmt_info, tree val, tree type, + + new_temp = vect_get_new_ssa_name (type, vect_simple_var, "cst_"); + init_stmt = gimple_build_assign (new_temp, val); +- vect_init_vector_1 (vinfo, stmt_info, init_stmt, gsi); ++ vect_init_vector_1 (vinfo, stmt_info, init_stmt, gsi, transpose); + return new_temp; + } + +@@ -1572,9 +1572,11 @@ vect_get_vec_defs (vec_info *vinfo, stmt_vec_info stmt_info, slp_tree slp_node, + statement and create and return a stmt_vec_info for it. */ + + static void +-vect_finish_stmt_generation_1 (vec_info *, +- stmt_vec_info stmt_info, gimple *vec_stmt) ++vect_finish_stmt_generation_1 (vec_info *vinfo, ++ stmt_vec_info stmt_info, gimple *vec_stmt, bool transpose=false) + { ++ if (transpose) ++ stmt_vec_info vec_stmt_info = vinfo->add_pattern_stmt (vec_stmt, NULL); + if (dump_enabled_p ()) + dump_printf_loc (MSG_NOTE, vect_location, "add new stmt: %G", vec_stmt); + +@@ -1616,7 +1618,7 @@ vect_finish_replace_stmt (vec_info *vinfo, + void + vect_finish_stmt_generation (vec_info *vinfo, + stmt_vec_info stmt_info, gimple *vec_stmt, +- gimple_stmt_iterator *gsi) ++ gimple_stmt_iterator *gsi, bool transpose) + { + gcc_assert (!stmt_info || gimple_code (stmt_info->stmt) != GIMPLE_LABEL); + +@@ -1648,7 +1650,7 @@ vect_finish_stmt_generation (vec_info *vinfo, + } + } + gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT); +- vect_finish_stmt_generation_1 (vinfo, stmt_info, vec_stmt); ++ vect_finish_stmt_generation_1 (vinfo, stmt_info, vec_stmt, transpose); + } + + /* We want to vectorize a call to combined function CFN with function +@@ -2159,6 +2161,173 @@ vector_vector_composition_type (tree vtype, poly_uint64 nelts, tree *ptype) + return NULL_TREE; + } + ++/* Check succeedor BB, BB without load is regarded as empty BB. Ignore empty ++ BB in DFS. */ ++ ++static unsigned ++mem_refs_in_bb (basic_block bb, vec<gimple *> &stmts) ++{ ++ unsigned num = 0; ++ for (gimple_stmt_iterator gsi = gsi_start_bb (bb); ++ !gsi_end_p (gsi); gsi_next (&gsi)) ++ { ++ gimple *stmt = gsi_stmt (gsi); ++ if (is_gimple_debug (stmt)) ++ continue; ++ if (is_gimple_assign (stmt) && gimple_has_mem_ops (stmt) ++ && !gimple_has_volatile_ops (stmt)) ++ { ++ if (gimple_assign_rhs_code (stmt) == MEM_REF ++ || gimple_assign_rhs_code (stmt) == ARRAY_REF) ++ { ++ stmts.safe_push (stmt); ++ num++; ++ } ++ else if (TREE_CODE (gimple_get_lhs (stmt)) == MEM_REF ++ || TREE_CODE (gimple_get_lhs (stmt)) == ARRAY_REF) ++ num++; ++ } ++ } ++ return num; ++} ++ ++static bool ++check_same_base (vec<data_reference_p> *datarefs, data_reference_p dr) ++{ ++ for (unsigned ui = 0; ui < datarefs->length (); ui++) ++ { ++ tree op1 = TREE_OPERAND (DR_BASE_OBJECT (dr), 0); ++ tree op2 = TREE_OPERAND (DR_BASE_OBJECT ((*datarefs)ui), 0); ++ if (TREE_CODE (op1) != TREE_CODE (op2)) ++ continue; ++ if (TREE_CODE (op1) == ADDR_EXPR) ++ { ++ op1 = TREE_OPERAND (op1, 0); ++ op2 = TREE_OPERAND (op2, 0); ++ } ++ enum tree_code code = TREE_CODE (op1); ++ switch (code) ++ { ++ case VAR_DECL: ++ if (DECL_NAME (op1) == DECL_NAME (op2) ++ && DR_IS_READ ((*datarefs)ui)) ++ return true; ++ break; ++ case SSA_NAME: ++ if (SSA_NAME_VERSION (op1) == SSA_NAME_VERSION (op2) ++ && DR_IS_READ ((*datarefs)ui)) ++ return true; ++ break; ++ default: ++ break; ++ } ++ } ++ return false; ++} ++ ++/* Iterate all load STMTS, if staisfying same base vectorized stmt, then return, ++ Otherwise, set false to SUCCESS. */ ++ ++static void ++check_vec_use (loop_vec_info loop_vinfo, vec<gimple *> &stmts, ++ stmt_vec_info stmt_info, bool &success) ++{ ++ if (stmt_info == NULL) ++ { ++ success = false; ++ return; ++ } ++ if (DR_IS_READ (stmt_info->dr_aux.dr)) ++ { ++ success = false; ++ return; ++ } ++ unsigned ui = 0; ++ gimple *candidate = NULL; ++ FOR_EACH_VEC_ELT (stmts, ui, candidate) ++ { ++ if (TREE_CODE (TREE_TYPE (gimple_get_lhs (candidate))) != VECTOR_TYPE) ++ continue; ++ ++ if (candidate->bb != candidate->bb->loop_father->header) ++ { ++ success = false; ++ return; ++ } ++ auto_vec<data_reference_p> datarefs; ++ tree res = find_data_references_in_bb (candidate->bb->loop_father, ++ candidate->bb, &datarefs); ++ if (res == chrec_dont_know) ++ { ++ success = false; ++ return; ++ } ++ if (check_same_base (&datarefs, stmt_info->dr_aux.dr)) ++ return; ++ } ++ success = false; ++} ++ ++/* Deep first search from present BB. If succeedor has load STMTS, ++ stop further searching. */ ++ ++static void ++dfs_check_bb (loop_vec_info loop_vinfo, basic_block bb, stmt_vec_info stmt_info, ++ bool &success, vec<basic_block> &visited_bbs) ++{ ++ if (bb == cfun->cfg->x_exit_block_ptr) ++ { ++ success = false; ++ return; ++ } ++ if (!success || visited_bbs.contains (bb) || bb == loop_vinfo->loop->latch) ++ return; ++ ++ visited_bbs.safe_push (bb); ++ auto_vec<gimple *> stmts; ++ unsigned num = mem_refs_in_bb (bb, stmts); ++ /* Empty BB. */ ++ if (num == 0) ++ { ++ edge e; ++ edge_iterator ei; ++ FOR_EACH_EDGE (e, ei, bb->succs) ++ { ++ dfs_check_bb (loop_vinfo, e->dest, stmt_info, success, visited_bbs); ++ if (!success) ++ return; ++ } ++ return; ++ } ++ /* Non-empty BB. */ ++ check_vec_use (loop_vinfo, stmts, stmt_info, success); ++} ++ ++/* For grouped store, if all succeedors of present BB have vectorized load ++ from same base of store. If so, set memory_access_type using ++ VMAT_CONTIGUOUS_PERMUTE instead of VMAT_LOAD_STORE_LANES. */ ++ ++static bool ++conti_perm (stmt_vec_info stmt_vinfo, loop_vec_info loop_vinfo) ++{ ++ gimple *stmt = stmt_vinfo->stmt; ++ if (gimple_code (stmt) != GIMPLE_ASSIGN) ++ return false; ++ ++ if (DR_IS_READ (stmt_vinfo->dr_aux.dr)) ++ return false; ++ ++ basic_block bb = stmt->bb; ++ bool success = true; ++ auto_vec<basic_block> visited_bbs; ++ visited_bbs.safe_push (bb); ++ edge e; ++ edge_iterator ei; ++ FOR_EACH_EDGE (e, ei, bb->succs) ++ dfs_check_bb (loop_vinfo, e->dest, stmt_vinfo, success, visited_bbs); ++ return success; ++} ++ + /* A subroutine of get_load_store_type, with a subset of the same + arguments. Handle the case where STMT_INFO is part of a grouped load + or store. +@@ -2373,6 +2542,20 @@ get_group_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info, + *memory_access_type = VMAT_CONTIGUOUS_PERMUTE; + overrun_p = would_overrun_p; + } ++ ++ if (*memory_access_type == VMAT_LOAD_STORE_LANES ++ && TREE_CODE (loop_vinfo->num_iters) == INTEGER_CST ++ && maybe_eq (tree_to_shwi (loop_vinfo->num_iters), ++ loop_vinfo->vectorization_factor) ++ && conti_perm (stmt_info, loop_vinfo) ++ && (vls_type == VLS_LOAD ++ ? vect_grouped_load_supported (vectype, single_element_p, ++ group_size) ++ : vect_grouped_store_supported (vectype, group_size))) ++ { ++ *memory_access_type = VMAT_CONTIGUOUS_PERMUTE; ++ overrun_p = would_overrun_p; ++ } + } + + /* As a last resort, trying using a gather load or scatter store. +@@ -7456,6 +7639,154 @@ vectorizable_scan_store (vec_info *vinfo, + return true; + } + ++/* Function vect_permute_store_chains ++ ++ Call function vect_permute_store_chain (). ++ Given a chain of interleaved stores in DR_CHAIN, generate ++ interleave_high/low stmts to reorder the data correctly. ++ Return the final references for stores in RESULT_CHAIN. */ ++ ++static void ++vect_permute_store_chains (vec_info *vinfo, vec<tree> dr_chain, ++ unsigned int num_each, stmt_vec_info stmt_info, ++ gimple_stmt_iterator *gsi, vec<tree> *result_chain, ++ unsigned int group) ++{ ++ unsigned int k = 0; ++ unsigned int t = 0; ++ ++ /* Divide vectors into GROUP parts. And permute every NUM_EACH vectors ++ together. */ ++ for (k = 0; k < group; k++) ++ { ++ auto_vec<tree> dr_chain_transposed (num_each); ++ auto_vec<tree> result_chain_transposed (num_each); ++ for (t = k; t < dr_chain.length (); t = t + group) ++ { ++ dr_chain_transposed.quick_push (dr_chaint); ++ } ++ vect_permute_store_chain (vinfo, dr_chain_transposed, num_each, ++ stmt_info, gsi, &result_chain_transposed); ++ for (t = 0; t < num_each; t++) ++ { ++ result_chain->quick_push (result_chain_transposedt); ++ } ++ } ++} ++ ++/* Function transpose_oprnd_store ++ ++ Calculate the transposed results from VEC_OPRNDS (VEC_STMT) ++ for vectorizable_store. */ ++ ++static void ++transpose_oprnd_store (vec_info *vinfo, vec<tree>vec_oprnds, ++ vec<tree> *result_chain, unsigned int vec_num, ++ unsigned int const_nunits, unsigned int array_num, ++ stmt_vec_info first_stmt_info, ++ gimple_stmt_iterator *gsi) ++{ ++ unsigned int group_for_transform = 0; ++ unsigned int num_each = 0; ++ ++ /* Transpose back for vec_oprnds. */ ++ /* vec = {vec1, vec2, ...} */ ++ if (array_num < const_nunits ++ && const_nunits % array_num == 0) ++ { ++ vect_transpose_store_chain (vinfo, vec_oprnds, ++ vec_num, array_num, ++ first_stmt_info, ++ gsi, result_chain); ++ } ++ /* vec1 = {vec_part1}, vec2 = {vec_part2}, ... */ ++ else if (array_num >= const_nunits ++ && array_num % const_nunits == 0) ++ { ++ group_for_transform = array_num / const_nunits; ++ num_each = vec_oprnds.length () / group_for_transform; ++ vect_permute_store_chains (vinfo, vec_oprnds, ++ num_each, first_stmt_info, ++ gsi, result_chain, ++ group_for_transform); ++ } ++ else ++ { ++ gcc_unreachable (); ++ } ++} ++ ++static dr_vec_info * ++get_dr_info (stmt_vec_info stmt_info) ++{ ++ dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info); ++ if (dr_info->misalignment == DR_MISALIGNMENT_UNINITIALIZED) ++ { ++ SET_DR_MISALIGNMENT (dr_info, DR_MISALIGNMENT_UNKNOWN); ++ } ++ return dr_info; ++} ++ ++static unsigned ++dr_align_vect_store (vec_info *vinfo, dr_vec_info *cur_first_dr_info, ++ tree vectype, unsigned HOST_WIDE_INT &align) ++{ ++ unsigned misalign = 0; ++ align = known_alignment (DR_TARGET_ALIGNMENT (cur_first_dr_info)); ++ if (aligned_access_p (cur_first_dr_info, vectype)) ++ { ++ return misalign; ++ } ++ else if (cur_first_dr_info->misalignment == -1) ++ { ++ align = dr_alignment (vect_dr_behavior (vinfo, cur_first_dr_info)); ++ } ++ else ++ { ++ misalign = cur_first_dr_info->misalignment; ++ } ++ return misalign; ++} ++ ++static void ++add_new_stmt_vect_store (vec_info *vinfo, tree vectype, tree dataref_ptr, ++ tree dataref_offset, tree ref_type, ++ dr_vec_info *cur_first_dr_info, tree vec_oprnd, ++ gimple_stmt_iterator *gsi, stmt_vec_info stmt_info) ++{ ++ /* Data align. */ ++ unsigned HOST_WIDE_INT align; ++ unsigned misalign = dr_align_vect_store (vinfo, cur_first_dr_info, ++ vectype, align); ++ ++ if (dataref_offset == NULL_TREE && TREE_CODE (dataref_ptr) == SSA_NAME) ++ { ++ set_ptr_info_alignment (get_ptr_info (dataref_ptr), align, misalign); ++ } ++ ++ /* Get data_ref. */ ++ tree offset = dataref_offset ? dataref_offset : build_int_cst (ref_type, 0); ++ tree data_ref = fold_build2 (MEM_REF, vectype, dataref_ptr, offset); ++ if (aligned_access_p (cur_first_dr_info, vectype)) ++ { ++ ; ++ } ++ else if (cur_first_dr_info->misalignment == -1) ++ { ++ TREE_TYPE (data_ref) = build_aligned_type (TREE_TYPE (data_ref), ++ align * BITS_PER_UNIT); ++ } ++ else ++ { ++ tree elem_type = TREE_TYPE (vectype); ++ TREE_TYPE (data_ref) = build_aligned_type (TREE_TYPE (data_ref), ++ TYPE_ALIGN (elem_type)); ++ } ++ /* Add new stmt. */ ++ vect_copy_ref_info (data_ref, DR_REF (cur_first_dr_info->dr)); ++ gassign *new_stmt = gimple_build_assign (data_ref, vec_oprnd); ++ vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi, true); ++} + + /* Function vectorizable_store. + +@@ -8333,6 +8664,16 @@ vectorizable_store (vec_info *vinfo, + &vec_offsets); + vec_offset = vec_offsets0; + } ++ /* If the stmt_info need to be transposed recovery, dataref_ptr ++ will be caculated later. */ ++ else if (memory_access_type == VMAT_CONTIGUOUS ++ && is_a <bb_vec_info> (vinfo) ++ && STMT_VINFO_GROUPED_ACCESS (stmt_info) ++ && DR_GROUP_SLP_TRANSPOSE ( ++ DR_GROUP_FIRST_ELEMENT (stmt_info))) ++ { ++ dataref_ptr = NULL_TREE; ++ } + else + dataref_ptr + = vect_create_data_ref_ptr (vinfo, first_stmt_info, aggr_type, +@@ -8423,6 +8764,75 @@ vectorizable_store (vec_info *vinfo, + } + else + { ++ /* group_size: the size of group after transposing and merging. ++ group_size_b: the size of group before transposing and merging, ++ and only group_size_b >= const_nunits is supported. ++ array_num: the number of arrays. ++ const_nunits: TYPE_VECTOR_SUBPARTS (vectype). ++ ncontinues: group_size_b / const_nunits, it means the number of ++ times an array is stored in memory. */ ++ if (slp && is_a <bb_vec_info> (vinfo) ++ && STMT_VINFO_GROUPED_ACCESS (stmt_info) ++ && DR_GROUP_SLP_TRANSPOSE (DR_GROUP_FIRST_ELEMENT (stmt_info))) ++ { ++ if (dump_enabled_p ()) ++ { ++ dump_printf_loc (MSG_NOTE, vect_location, ++ "vectorizable_store for slp transpose.\n"); ++ } ++ /* Transpose back for grouped stores. */ ++ vect_transform_back_slp_grouped_stores (bb_vinfo, ++ first_stmt_info); ++ ++ result_chain.create (vec_oprnds.length ()); ++ unsigned int const_nunits = nunits.to_constant (); ++ unsigned int group_size_b = DR_GROUP_SIZE_TRANS (first_stmt_info); ++ unsigned int array_num = group_size / group_size_b; ++ transpose_oprnd_store (vinfo, vec_oprnds, &result_chain, vec_num, ++ const_nunits, array_num, ++ first_stmt_info, gsi); ++ ++ /* For every store group, not for every vec, because transposing ++ and merging have changed the data reference access. */ ++ gcc_assert (group_size_b >= const_nunits); ++ unsigned int ncontinues = group_size_b / const_nunits; ++ ++ unsigned int k = 0; ++ for (i = 0; i < array_num; i++) ++ { ++ stmt_vec_info first_stmt_b; ++ BB_VINFO_GROUPED_STORES (vinfo).iterate (i, &first_stmt_b); ++ bool simd_lane_access_p ++ = STMT_VINFO_SIMD_LANE_ACCESS_P (first_stmt_b) != 0; ++ tree ref_type = get_group_alias_ptr_type (first_stmt_b); ++ dataref_ptr = vect_create_data_ref_ptr ( ++ vinfo, first_stmt_b, aggr_type, ++ simd_lane_access_p ? loop : NULL, ++ offset, &dummy, gsi, &ptr_incr, ++ simd_lane_access_p, bump); ++ dr_vec_info *cur_first_dr_info = get_dr_info (first_stmt_b); ++ for (unsigned int t = 0; t < ncontinues; t++) ++ { ++ vec_oprnd = result_chaink; ++ k++; ++ if (t > 0) ++ { ++ /* Bump the vector pointer. */ ++ dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ++ ptr_incr, gsi, ++ first_stmt_b, bump); ++ } ++ add_new_stmt_vect_store (vinfo, vectype, dataref_ptr, ++ dataref_offset, ref_type, ++ cur_first_dr_info, vec_oprnd, ++ gsi, first_stmt_b); ++ } ++ } ++ oprnds.release (); ++ result_chain.release (); ++ vec_oprnds.release (); ++ return true; ++ } + new_stmt = NULL; + if (grouped_store) + { +@@ -8719,6 +9129,451 @@ hoist_defs_of_uses (stmt_vec_info stmt_info, class loop *loop) + return true; + } + ++static tree ++calculate_new_type (tree vectype, unsigned int const_nunits, ++ unsigned int group_size_b, unsigned int &nloads, ++ unsigned int &ncontinues, tree &lvectype) ++{ ++ tree ltype = TREE_TYPE (vectype); ++ /* nloads is the number of ARRAYs in a vector. ++ vectemp = {a, b, ...} */ ++ if (group_size_b < const_nunits) ++ { ++ tree ptype; ++ tree vtype ++ = vector_vector_composition_type (vectype, ++ const_nunits / group_size_b, ++ &ptype); ++ if (vtype != NULL_TREE) ++ { ++ nloads = const_nunits / group_size_b; ++ lvectype = vtype; ++ ltype = ptype; ++ ncontinues = 1; ++ } ++ } ++ /* ncontinues is the number of vectors from an ARRAY. ++ vectemp1 = {a0, a1, ...} ++ ... ++ vectempm = {ak, ak+1, ...} */ ++ else ++ { ++ nloads = 1; ++ ltype = vectype; ++ ncontinues = group_size_b / const_nunits; ++ } ++ ltype = build_aligned_type (ltype, TYPE_ALIGN (TREE_TYPE (vectype))); ++ return ltype; ++} ++ ++static void ++generate_old_load_permutations (slp_tree slp_node, unsigned int group_size, ++ vec<unsigned> &old_load_permutation) ++{ ++ /* Generate the old load permutations from the slp_node. */ ++ unsigned i = 0; ++ unsigned k = 0; ++ ++ /* If SLP_NODE has load_permutation, we copy it to old_load_permutation. ++ Otherwise, we generate a permutation sequentially. */ ++ if (SLP_TREE_LOAD_PERMUTATION (slp_node).exists ()) ++ { ++ FOR_EACH_VEC_ELT (SLP_TREE_LOAD_PERMUTATION (slp_node), i, k) ++ { ++ old_load_permutation.safe_push (k); ++ } ++ } ++ else ++ { ++ for (unsigned i = 0; i < group_size; i++) ++ { ++ old_load_permutation.safe_push (i); ++ } ++ } ++} ++ ++static void ++generate_new_load_permutation_mapping (unsigned slp_node_length, ++ vec<unsigned> &group_idx, ++ const vec<unsigned> &load_permutation, ++ unsigned int group_size_b, ++ unsigned &new_group_size, ++ vec<unsigned> &group_from) ++{ ++ /* group_num_vec: only stores the group_loads IDs which are caculated from ++ load_permutation. */ ++ auto_vec<unsigned> group_num_vec; ++ ++ /* Caculate which group_loads are the stmts in SLP_NODE from. */ ++ unsigned i = 0; ++ unsigned k = 0; ++ FOR_EACH_VEC_ELT (load_permutation, i, k) ++ { ++ unsigned int t0 = k / group_size_b; ++ if (!group_num_vec.contains (t0)) ++ { ++ group_num_vec.safe_push (t0); ++ } ++ group_from.safe_push (t0); ++ } ++ group_num_vec.qsort (cmp_for_group_num); ++ /* n_groups: the number of group_loads. */ ++ unsigned int n_groups = group_num_vec.length (); ++ new_group_size = n_groups * group_size_b; ++ for (i = 0; i < n_groups; i++) ++ { ++ group_idx.safe_push (group_num_veci * group_size_b); ++ } ++ /* A new mapping from group_ind_vec to group_from. ++ For example: ++ Origin: group_from = {1,1,3,3,5,5,7,7}; ++ After mapping: group_from = {0,0,1,1,2,2,2,2}; */ ++ auto_vec<unsigned> group_ind_vec (n_groups); ++ for (k = 0; k < n_groups; k++) ++ { ++ group_ind_vec.safe_push (k); ++ } ++ for (i = 0; i < slp_node_length; i++) ++ { ++ for (k = 0; k < n_groups; k++) ++ { ++ if (group_fromi == group_num_veck) ++ { ++ group_fromi = group_ind_veck; ++ break; ++ } ++ } ++ } ++} ++ ++static void ++generate_new_load_permutation (vec<unsigned> &new_load_permutation, ++ const vec<unsigned> &old_load_permutation, ++ slp_tree slp_node, bool &this_load_permuted, ++ const vec<unsigned> &group_from, ++ unsigned int group_size_b) ++{ ++ unsigned slp_node_length = SLP_TREE_SCALAR_STMTS (slp_node).length (); ++ /* Generate the new load permutation from the new mapping. */ ++ new_load_permutation.create (slp_node_length); ++ unsigned i = 0; ++ unsigned k = 0; ++ FOR_EACH_VEC_ELT (old_load_permutation, i, k) ++ { ++ /* t1 is the new permutation of k in the old permutation. ++ t1 = base_address + offset: ++ base_address = group_fromi * group_size_b; ++ offset = k % group_size_b. */ ++ unsigned int t1 ++ = group_fromi * group_size_b + k % group_size_b; ++ new_load_permutation.safe_push (t1); ++ if (t1 != k) ++ { ++ this_load_permuted = true; ++ } ++ } ++} ++ ++static bool ++is_slp_perm (bool slp_perm, bool this_load_permuted, poly_uint64 nunits, ++ unsigned int group_size, stmt_vec_info first_stmt_info) ++{ ++ /* Calculate the unrolling factor based on the smallest type. */ ++ poly_uint64 unrolling_factor ++ = exact_div (common_multiple (nunits, group_size), group_size); ++ /* The load requires permutation when unrolling exposes ++ a gap either because the group is larger than the SLP ++ group-size or because there is a gap between the groups. */ ++ if (!slp_perm && !this_load_permuted ++ && (known_eq (unrolling_factor, 1U) ++ || (group_size == DR_GROUP_SIZE (first_stmt_info) ++ && DR_GROUP_GAP (first_stmt_info) == 0))) ++ { ++ return false; ++ } ++ else ++ { ++ return true; ++ } ++} ++ ++static void ++generate_load_permutation (slp_tree slp_node, unsigned &new_group_size, ++ unsigned int group_size, unsigned int group_size_b, ++ bool &this_load_permuted, vec<unsigned> &group_idx, ++ vec<unsigned> &new_load_permutation) ++{ ++ /* Generate the old load permutations from SLP_NODE. */ ++ vec<unsigned> old_load_permutation; ++ old_load_permutation.create (group_size); ++ generate_old_load_permutations (slp_node, group_size, old_load_permutation); ++ ++ /* Caculate which group_loads are the stmts in SLP_NODE from. */ ++ unsigned slp_node_length = SLP_TREE_SCALAR_STMTS (slp_node).length (); ++ /* group_from: stores the group_loads ID for every stmt in SLP_NODE. */ ++ vec<unsigned> group_from; ++ group_from.create (slp_node_length); ++ generate_new_load_permutation_mapping (slp_node_length, group_idx, ++ old_load_permutation, ++ group_size_b, new_group_size, ++ group_from); ++ ++ /* Generate the new load permutation from the new mapping and caculate ++ this_load_permuted flag. If this_load_permuted is true, we need execute ++ slp permutation by using new load permutation. */ ++ generate_new_load_permutation (new_load_permutation, old_load_permutation, ++ slp_node, this_load_permuted, group_from, ++ group_size_b); ++ old_load_permutation.release (); ++ group_from.release (); ++} ++ ++static unsigned int ++dr_align_vect_load (vec_info *vinfo, dr_vec_info *cur_first_dr_info, ++ tree vectype, unsigned HOST_WIDE_INT &align, ++ enum dr_alignment_support alignment_support_scheme) ++{ ++ unsigned int misalign = 0; ++ ++ align = known_alignment (DR_TARGET_ALIGNMENT (cur_first_dr_info)); ++ if (alignment_support_scheme == dr_aligned) ++ { ++ gcc_assert (aligned_access_p (cur_first_dr_info, vectype)); ++ } ++ else if (cur_first_dr_info->misalignment == -1) ++ { ++ align = dr_alignment (vect_dr_behavior (vinfo, cur_first_dr_info)); ++ } ++ else ++ { ++ misalign = cur_first_dr_info->misalignment; ++ } ++ return misalign; ++} ++ ++static stmt_vec_info ++add_new_stmt_vect_load (vec_info *vinfo, tree vectype, tree dataref_ptr, ++ tree dataref_offset, tree ref_type, tree ltype, ++ gassign *(&new_stmt), dr_vec_info *cur_first_dr_info, ++ gimple_stmt_iterator *gsi, stmt_vec_info stmt_info) ++{ ++ /* Data align. */ ++ int malign = dr_misalignment (cur_first_dr_info, vectype); ++ enum dr_alignment_support alignment_support_scheme ++ = vect_supportable_dr_alignment (vinfo, cur_first_dr_info, ++ vectype, malign); ++ unsigned HOST_WIDE_INT align; ++ unsigned int misalign = dr_align_vect_load (vinfo, cur_first_dr_info, ++ vectype, align, ++ alignment_support_scheme); ++ if (dataref_offset == NULL_TREE && TREE_CODE (dataref_ptr) == SSA_NAME) ++ { ++ set_ptr_info_alignment (get_ptr_info (dataref_ptr), align, misalign); ++ } ++ ++ /* Get data_ref. */ ++ tree offset = dataref_offset ? dataref_offset : build_int_cst (ref_type, 0); ++ tree data_ref = fold_build2 (MEM_REF, ltype, dataref_ptr, offset); ++ if (alignment_support_scheme == dr_aligned) ++ { ++ ; ++ } ++ else if (cur_first_dr_info->misalignment == -1) ++ { ++ TREE_TYPE (data_ref) ++ = build_aligned_type (TREE_TYPE (data_ref), align * BITS_PER_UNIT); ++ } ++ else ++ { ++ tree elem_type = TREE_TYPE (vectype); ++ TREE_TYPE (data_ref) ++ = build_aligned_type (TREE_TYPE (data_ref), TYPE_ALIGN (elem_type)); ++ } ++ ++ /* Add new stmt. */ ++ vect_copy_ref_info (data_ref, DR_REF (cur_first_dr_info->dr)); ++ new_stmt = gimple_build_assign (make_ssa_name (ltype), data_ref); ++ vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi, true); ++ stmt_vec_info vec_stmt_info = vinfo->lookup_stmt (new_stmt); ++ return vec_stmt_info; ++} ++ ++static void ++push_new_stmt_to_dr_chain (bool slp_perm, stmt_vec_info new_stmt_info, ++ vec<tree> dr_chain, slp_tree slp_node) ++{ ++ if (slp_perm) ++ dr_chain.quick_push (gimple_assign_lhs (new_stmt_info->stmt)); ++ else ++ SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info->stmt); ++} ++ ++static stmt_vec_info ++get_first_stmt_info_before_transpose (stmt_vec_info first_stmt_info, ++ unsigned int group_el, ++ unsigned int group_size) ++{ ++ stmt_vec_info last_stmt_info = first_stmt_info; ++ unsigned int count = 0; ++ gcc_assert (group_el < group_size); ++ while (count < group_el) ++ { ++ last_stmt_info = DR_GROUP_NEXT_ELEMENT (last_stmt_info); ++ count++; ++ } ++ return last_stmt_info; ++} ++ ++static stmt_vec_info ++add_new_stmt_for_nloads_greater_than_one (vec_info *vinfo, tree lvectype, ++ tree vectype, ++ vec<constructor_elt, va_gc> *v, ++ stmt_vec_info stmt_info, ++ gimple_stmt_iterator *gsi) ++{ ++ tree vec_inv = build_constructor (lvectype, v); ++ tree new_temp = vect_init_vector (vinfo, stmt_info, vec_inv, lvectype, gsi, true); ++ stmt_vec_info new_stmt_info = vinfo->lookup_def (new_temp); ++ if (lvectype != vectype) ++ { ++ gassign *new_stmt = gimple_build_assign (make_ssa_name (vectype), ++ VIEW_CONVERT_EXPR, ++ build1 (VIEW_CONVERT_EXPR, ++ vectype, new_temp)); ++ vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi, true); ++ new_stmt_info = vinfo->lookup_stmt (new_stmt); ++ } ++ return new_stmt_info; ++} ++ ++/* Function new_vect_stmt_for_nloads. ++ ++ New a VEC_STMT when nloads Arrays are merged into a vector. ++ ++ ncopies is the number of vectors that need to be loaded from memmory. ++ nloads is the number of ARRAYs in a vector. ++ vectemp = {a, b, ...} */ ++ ++static void ++new_vect_stmt_for_nloads (vec_info *vinfo, unsigned int ncopies, ++ unsigned int nloads, const vec<unsigned> &group_idx, ++ stmt_vec_info stmt_info, offset_info *offset_info, ++ vectype_info *vectype_info, ++ vect_memory_access_type memory_access_type, ++ bool slp_perm, vec<tree> dr_chain, slp_tree slp_node, ++ gimple_stmt_iterator *gsi) ++{ ++ vec<constructor_elt, va_gc> *v = NULL; ++ stmt_vec_info first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info); ++ unsigned int group_size = DR_GROUP_SIZE (first_stmt_info); ++ stmt_vec_info first_stmt_info_b = NULL; ++ stmt_vec_info new_stmt_info = NULL; ++ tree dataref_ptr = NULL_TREE; ++ tree dummy; ++ gimple *ptr_incr = NULL; ++ unsigned int n = 0; ++ for (unsigned int i = 0; i < ncopies; i++) ++ { ++ vec_alloc (v, nloads); ++ for (unsigned int t = 0; t < nloads; t++) ++ { ++ first_stmt_info_b = get_first_stmt_info_before_transpose ( ++ first_stmt_info, group_idxn++, group_size); ++ dr_vec_info* cur_first_dr_info = get_dr_info (first_stmt_info_b); ++ tree bump = vect_get_data_ptr_increment (vinfo, cur_first_dr_info, ++ vectype_info->ltype, ++ memory_access_type); ++ bool simd_lane_access_p ++ = STMT_VINFO_SIMD_LANE_ACCESS_P (first_stmt_info_b) != 0; ++ ++ /* Create dataref_ptr which is point to init_address. */ ++ dataref_ptr = vect_create_data_ref_ptr ( ++ vinfo, first_stmt_info_b, vectype_info->ltype, NULL, ++ offset_info->offset, &dummy, gsi, &ptr_incr, ++ simd_lane_access_p, bump); ++ ++ gassign *new_stmt = NULL; ++ new_stmt_info = add_new_stmt_vect_load (vinfo, vectype_info->vectype, dataref_ptr, ++ offset_info->dataref_offset, ++ vectype_info->ref_type, vectype_info->ltype, ++ new_stmt, cur_first_dr_info, gsi, ++ first_stmt_info_b); ++ ++ CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, gimple_assign_lhs (new_stmt)); ++ } ++ new_stmt_info = add_new_stmt_for_nloads_greater_than_one ( ++ vinfo, vectype_info->lvectype, ++ vectype_info->vectype, v, ++ first_stmt_info_b, gsi); ++ push_new_stmt_to_dr_chain (slp_perm, new_stmt_info, ++ dr_chain, slp_node); ++ } ++} ++ ++/* Function new_vect_stmt_for_ncontinues. ++ ++ New a VEC_STMTs when an Array is divided into several vectors. ++ ++ n_groups is the number of ARRAYs. ++ ncontinues is the number of vectors from an ARRAY. ++ vectemp1 = {a0, a1, ...} ++ ... ++ vectempm = {ak, ak+1, ...} */ ++ ++static void ++new_vect_stmt_for_ncontinues (vec_info *vinfo, unsigned int ncontinues, ++ const vec<unsigned> &group_idx, ++ stmt_vec_info stmt_info, ++ offset_info* offset_info, ++ vectype_info* vectype_info, ++ vect_memory_access_type memory_access_type, ++ bool slp_perm, vec<tree> &dr_chain, ++ slp_tree slp_node, ++ gimple_stmt_iterator *gsi) ++{ ++ stmt_vec_info first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info); ++ unsigned int group_size = DR_GROUP_SIZE (first_stmt_info); ++ stmt_vec_info new_stmt_info = NULL; ++ tree dataref_ptr = NULL_TREE; ++ tree dummy; ++ gimple *ptr_incr = NULL; ++ unsigned int n_groups = group_idx.length (); ++ for (unsigned int i = 0; i < n_groups; i++) ++ { ++ stmt_vec_info first_stmt_info_b = get_first_stmt_info_before_transpose ( ++ first_stmt_info, group_idxi, group_size); ++ dr_vec_info* cur_first_dr_info = get_dr_info (first_stmt_info_b); ++ tree bump = vect_get_data_ptr_increment (vinfo, cur_first_dr_info, ++ vectype_info->ltype, memory_access_type); ++ bool simd_lane_access_p ++ = STMT_VINFO_SIMD_LANE_ACCESS_P (first_stmt_info_b) != 0; ++ for (unsigned int k = 0; k < ncontinues; k++) ++ { ++ /* Create dataref_ptr which is point to init_address. */ ++ if (k == 0) ++ { ++ dataref_ptr = vect_create_data_ref_ptr ( ++ vinfo, first_stmt_info_b, vectype_info->ltype, NULL, ++ offset_info->offset, &dummy, gsi, &ptr_incr, ++ simd_lane_access_p, bump); ++ } ++ else ++ { ++ dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, ++ gsi, first_stmt_info_b, bump); ++ } ++ gassign *new_stmt = NULL; ++ new_stmt_info = add_new_stmt_vect_load (vinfo, vectype_info->vectype, dataref_ptr, ++ offset_info->dataref_offset, ++ vectype_info->ref_type, vectype_info->ltype, ++ new_stmt, cur_first_dr_info, gsi, ++ first_stmt_info_b); ++ push_new_stmt_to_dr_chain (slp_perm, new_stmt_info, ++ dr_chain, slp_node); ++ } ++ } ++} ++ + /* vectorizable_load. + + Check if STMT_INFO reads a non scalar data-ref (array/pointer/structure) +@@ -9338,6 +10193,8 @@ vectorizable_load (vec_info *vinfo, + if (bb_vinfo) + first_stmt_info_for_drptr + = vect_find_first_scalar_stmt_in_slp (slp_node); ++ // first_stmt_info_for_drptr = SLP_TREE_SCALAR_STMTS (slp_node)0; ++ + + /* Check if the chain of loads is already vectorized. */ + if (STMT_VINFO_VEC_STMTS (first_stmt_info).exists () +@@ -9601,6 +10458,9 @@ vectorizable_load (vec_info *vinfo, + } + tree vec_mask = NULL_TREE; + poly_uint64 group_elt = 0; ++ unsigned new_group_size = 0; ++ vec<unsigned> new_load_permutation; ++ + for (j = 0; j < ncopies; j++) + { + /* 1. Create the vector or array pointer update chain. */ +@@ -9621,6 +10481,15 @@ vectorizable_load (vec_info *vinfo, + dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr_info->dr)); + dataref_offset = build_int_cst (ref_type, 0); + } ++ /* If the stmt_info need to be transposed recovery, dataref_ptr ++ will be caculated later. */ ++ else if (slp && is_a <bb_vec_info> (vinfo) ++ && STMT_VINFO_GROUPED_ACCESS (stmt_info) ++ && DR_GROUP_SLP_TRANSPOSE ( ++ DR_GROUP_FIRST_ELEMENT (stmt_info))) ++ { ++ dataref_ptr = NULL_TREE; ++ } + else if (diff_first_stmt_info) + { + dataref_ptr +@@ -9731,6 +10600,63 @@ vectorizable_load (vec_info *vinfo, + /* Record that VEC_ARRAY is now dead. */ + vect_clobber_variable (vinfo, stmt_info, gsi, vec_array); + } ++ else if (slp && is_a <bb_vec_info> (vinfo) ++ && STMT_VINFO_GROUPED_ACCESS (stmt_info) ++ && DR_GROUP_SLP_TRANSPOSE (DR_GROUP_FIRST_ELEMENT (stmt_info))) ++ { ++ if (dump_enabled_p ()) ++ { ++ dump_printf_loc (MSG_NOTE, vect_location, ++ "vectorizable_load for slp transpose.\n"); ++ } ++ /* group_size: the size of group after merging. ++ group_size_b: the size of group before merging. ++ const_nunits: TYPE_VECTOR_SUBPARTS (vectype), it is the number of ++ elements in a vector. ++ nloads: const_nunits / group_size_b or 1, it means the number ++ of ARRAYs in a vector. ++ ncontinues: group_size_b / const_nunits or 1, it means the number ++ of vectors from an ARRAY. */ ++ unsigned int group_size_b = DR_GROUP_SIZE_TRANS (first_stmt_info); ++ unsigned int const_nunits = nunits.to_constant (); ++ unsigned int nloads = const_nunits; ++ unsigned int ncontinues = group_size_b; ++ tree lvectype = vectype; ++ tree ltype = calculate_new_type (vectype, const_nunits, ++ group_size_b, nloads, ++ ncontinues, lvectype); ++ bool this_load_permuted = false; ++ auto_vec<unsigned> group_idx; ++ generate_load_permutation (slp_node, new_group_size, group_size, ++ group_size_b, this_load_permuted, ++ group_idx, new_load_permutation); ++ slp_perm = is_slp_perm (slp_perm, this_load_permuted, nunits, ++ group_size, first_stmt_info); ++ ++ /* ncopies: the number of vectors that need to be loaded from ++ memmory. */ ++ unsigned int ncopies = new_group_size / const_nunits; ++ offset_info offset_info = {offset, NULL_TREE, dataref_offset}; ++ vectype_info vectype_info = {vectype, ltype, lvectype, ref_type}; ++ if (slp_perm) ++ { ++ dr_chain.create (ncopies); ++ } ++ if (nloads > 1 && ncontinues == 1) ++ { ++ new_vect_stmt_for_nloads (vinfo, ncopies, nloads, group_idx, ++ stmt_info, &offset_info, &vectype_info, ++ memory_access_type, slp_perm, dr_chain, ++ slp_node, gsi); ++ } ++ else ++ { ++ new_vect_stmt_for_ncontinues (vinfo, ncontinues, group_idx, ++ stmt_info, &offset_info, ++ &vectype_info, memory_access_type, ++ slp_perm, dr_chain, slp_node, gsi); ++ } ++ } + else + { + for (i = 0; i < vec_num; i++) +@@ -10177,7 +11103,32 @@ vectorizable_load (vec_info *vinfo, + if (slp && !slp_perm) + continue; + +- if (slp_perm) ++ /* Using the new load permutation to generate vector permute statements ++ from a list of loads in DR_CHAIN. */ ++ if (slp && slp_perm && is_a <bb_vec_info> (vinfo) ++ && STMT_VINFO_GROUPED_ACCESS (stmt_info) ++ && DR_GROUP_SLP_TRANSPOSE (DR_GROUP_FIRST_ELEMENT (stmt_info))) ++ { ++ unsigned n_perms; ++ stmt_vec_info stmt_info_ = SLP_TREE_SCALAR_STMTS (slp_node)0; ++ unsigned int old_size = DR_GROUP_SIZE (stmt_info); ++ DR_GROUP_SIZE (stmt_info_) = new_group_size; ++ vec<unsigned> old_load_permutation ++ = SLP_TREE_LOAD_PERMUTATION (slp_node); ++ SLP_TREE_LOAD_PERMUTATION (slp_node) = new_load_permutation; ++ bool perm_load_success = vect_transform_slp_perm_load ( ++ vinfo, slp_node, dr_chain, gsi, vf, ++ false, &n_perms); ++ DR_GROUP_SIZE (stmt_info_) = old_size; ++ SLP_TREE_LOAD_PERMUTATION (slp_node) = old_load_permutation; ++ new_load_permutation.release (); ++ if (!perm_load_success) ++ { ++ dr_chain.release (); ++ return false; ++ } ++ } ++ else if (slp_perm) + { + unsigned n_perms; + /* For SLP we know we've seen all possible uses of dr_chain so +diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h +index 642eb0aeb..e13bc6c99 100644 +--- a/gcc/tree-vectorizer.h ++++ b/gcc/tree-vectorizer.h +@@ -412,6 +412,21 @@ public: + vec<ddr_p> ddrs; + }; + ++/* Information about offset in vectorizable_load. */ ++struct offset_info { ++ tree offset; ++ tree byte_offset; ++ tree dataref_offset; ++}; ++ ++/* Information about vectype in vectorizable_load. */ ++struct vectype_info { ++ tree vectype; ++ tree ltype; ++ tree lvectype; ++ tree ref_type; ++}; ++ + /* Vectorizer state common between loop and basic-block vectorization. */ + class vec_info { + public: +@@ -455,6 +470,14 @@ public: + stmt in the chain. */ + auto_vec<stmt_vec_info> grouped_stores; + ++ /* All interleaving chains of loads, represented by the first ++ stmt in the chain. */ ++ auto_vec<stmt_vec_info> grouped_loads; ++ ++ /* All interleaving chains of stores (before transposed), represented by all ++ stmt in the chain. */ ++ auto_vec<vec<stmt_vec_info> > scalar_stores; ++ + /* The set of vector modes used in the vectorized region. */ + mode_set used_vector_modes; + +@@ -899,6 +922,8 @@ public: + #define LOOP_VINFO_CHECK_NONZERO(L) (L)->check_nonzero + #define LOOP_VINFO_LOWER_BOUNDS(L) (L)->lower_bounds + #define LOOP_VINFO_GROUPED_STORES(L) (L)->grouped_stores ++#define LOOP_VINFO_GROUPED_LOADS(L) (L)->grouped_loads ++#define LOOP_VINFO_SCALAR_STORES(L) (L)->scalar_stores + #define LOOP_VINFO_SLP_INSTANCES(L) (L)->slp_instances + #define LOOP_VINFO_SLP_UNROLLING_FACTOR(L) (L)->slp_unrolling_factor + #define LOOP_VINFO_REDUCTIONS(L) (L)->reductions +@@ -982,6 +1007,25 @@ public: + vec<basic_block> bbs; + + vec<slp_root> roots; ++ ++ /* True, if bb_vinfo can goto vect_analyze_slp. */ ++ bool before_slp; ++ ++ /* True, if bb_vinfo is a transposed version. */ ++ bool transposed; ++ ++ /* The number of transposed groups. */ ++ int transposed_group; ++ ++ /* The cost of the scalar iterations. */ ++ int scalar_cost; ++ ++ /* The cost of the vector prologue and epilogue, including peeled ++ iterations and set-up code. */ ++ int vec_outside_cost; ++ ++ /* The cost of the vector loop body. */ ++ int vec_inside_cost; + } *bb_vec_info; + + #define BB_VINFO_BB(B) (B)->bb +@@ -989,6 +1033,14 @@ public: + #define BB_VINFO_SLP_INSTANCES(B) (B)->slp_instances + #define BB_VINFO_DATAREFS(B) (B)->shared->datarefs + #define BB_VINFO_DDRS(B) (B)->shared->ddrs ++#define BB_VINFO_GROUPED_LOADS(B) (B)->grouped_loads ++#define BB_VINFO_SCALAR_STORES(B) (B)->scalar_stores ++#define BB_VINFO_VEC_OUTSIDE_COST(B) (B)->vec_outside_cost ++#define BB_VINFO_VEC_INSIDE_COST(B) (B)->vec_inside_cost ++#define BB_VINFO_SCALAR_COST(B) (B)->scalar_cost ++#define BB_VINFO_SLP_TRANSPOSED(B) (B)->transposed ++#define BB_VINFO_BEFORE_SLP(B) (B)->before_slp ++#define BB_VINFO_TRANS_GROUPS(B) (B)->transposed_group + + /*-----------------------------------------------------------------*/ + /* Info on vectorized defs. */ +@@ -1219,6 +1271,17 @@ public: + stmt_vec_info next_element; + /* The size of the group. */ + unsigned int size; ++ ++ /* The size of the group before transposed. */ ++ unsigned int size_before_transpose; ++ ++ /* If true, the stmt_info is slp transposed. */ ++ bool slp_transpose; ++ ++ /* Mark the group store number for rebuild interleaving chain ++ during transpose phase. Value -1 represents unable to transpose. */ ++ int group_number; ++ + /* For stores, number of stores from this group seen. We vectorize the last + one. */ + unsigned int store_count; +@@ -1226,6 +1289,9 @@ public: + is 1. */ + unsigned int gap; + ++ /* The gap before transposed. */ ++ unsigned int gap_before_transpose; ++ + /* The minimum negative dependence distance this stmt participates in + or zero if none. */ + unsigned int min_neg_dist; +@@ -1427,6 +1493,12 @@ struct gather_scatter_info { + #define STMT_VINFO_SLP_VECT_ONLY(S) (S)->slp_vect_only_p + #define STMT_VINFO_SLP_VECT_ONLY_PATTERN(S) (S)->slp_vect_pattern_only_p + ++#define DR_GROUP_SLP_TRANSPOSE(S) \ ++ (gcc_checking_assert ((S)->dr_aux.dr), (S)->slp_transpose) ++#define DR_GROUP_SIZE_TRANS(S) \ ++ (gcc_checking_assert ((S)->dr_aux.dr), (S)->size_before_transpose) ++#define DR_GROUP_NUMBER(S) \ ++ (gcc_checking_assert ((S)->dr_aux.dr), (S)->group_number) + #define DR_GROUP_FIRST_ELEMENT(S) \ + (gcc_checking_assert ((S)->dr_aux.dr), (S)->first_element) + #define DR_GROUP_NEXT_ELEMENT(S) \ +@@ -1437,6 +1509,8 @@ struct gather_scatter_info { + (gcc_checking_assert ((S)->dr_aux.dr), (S)->store_count) + #define DR_GROUP_GAP(S) \ + (gcc_checking_assert ((S)->dr_aux.dr), (S)->gap) ++#define DR_GROUP_GAP_TRANS(S) \ ++ (gcc_checking_assert ((S)->dr_aux.dr), (S)->gap_before_transpose) + + #define REDUC_GROUP_FIRST_ELEMENT(S) \ + (gcc_checking_assert (!(S)->dr_aux.dr), (S)->first_element) +@@ -2033,6 +2107,17 @@ vect_get_scalar_dr_size (dr_vec_info *dr_info) + return tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr_info->dr)))); + } + ++/* Compare two unsigned int A and B. ++ Sorting them in ascending order. */ ++ ++static inline int ++cmp_for_group_num (const void *a_, const void *b_) ++{ ++ unsigned int a = *(unsigned int *)const_cast<void *>(a_); ++ unsigned int b = *(unsigned int *)const_cast<void *>(b_); ++ return a < b ? -1 : 1; ++} ++ + /* Return true if LOOP_VINFO requires a runtime check for whether the + vector loop is profitable. */ + +@@ -2152,7 +2237,7 @@ record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count, + + extern void vect_finish_replace_stmt (vec_info *, stmt_vec_info, gimple *); + extern void vect_finish_stmt_generation (vec_info *, stmt_vec_info, gimple *, +- gimple_stmt_iterator *); ++ gimple_stmt_iterator *,bool transpose=false); + extern opt_result vect_mark_stmts_to_be_vectorized (loop_vec_info, bool *); + extern tree vect_get_store_rhs (stmt_vec_info); + void vect_get_vec_defs_for_operand (vec_info *vinfo, stmt_vec_info, unsigned, +@@ -2168,7 +2253,7 @@ void vect_get_vec_defs (vec_info *, stmt_vec_info, slp_tree, unsigned, + tree = NULL, vec<tree> * = NULL, tree = NULL, + tree = NULL, vec<tree> * = NULL, tree = NULL); + extern tree vect_init_vector (vec_info *, stmt_vec_info, tree, tree, +- gimple_stmt_iterator *); ++ gimple_stmt_iterator *, bool transpose=false); + extern tree vect_get_slp_vect_def (slp_tree, unsigned); + extern bool vect_transform_stmt (vec_info *, stmt_vec_info, + gimple_stmt_iterator *, +@@ -2235,6 +2320,9 @@ extern bool vect_load_lanes_supported (tree, unsigned HOST_WIDE_INT, bool); + extern void vect_permute_store_chain (vec_info *, vec<tree> &, + unsigned int, stmt_vec_info, + gimple_stmt_iterator *, vec<tree> *); ++extern void vect_transpose_store_chain (vec_info *, vec<tree>, unsigned int, ++ unsigned int, stmt_vec_info, ++ gimple_stmt_iterator *, vec<tree> *); + extern tree vect_setup_realignment (vec_info *, + stmt_vec_info, gimple_stmt_iterator *, + tree *, enum dr_alignment_support, tree, +@@ -2262,7 +2350,8 @@ extern bool check_reduction_path (dump_user_location_t, loop_p, gphi *, tree, + enum tree_code); + extern bool needs_fold_left_reduction_p (tree, code_helper); + /* Drive for loop analysis stage. */ +-extern opt_loop_vec_info vect_analyze_loop (class loop *, vec_info_shared *); ++extern opt_loop_vec_info vect_analyze_loop (class loop *, vec_info_shared *, ++ bool result_only_p = false); + extern tree vect_build_loop_niters (loop_vec_info, bool * = NULL); + extern void vect_gen_vector_loop_niters (loop_vec_info, tree, tree *, + tree *, bool); +@@ -2331,6 +2420,7 @@ extern bool vect_transform_slp_perm_load (vec_info *, slp_tree, const vec<tree> + gimple_stmt_iterator *, poly_uint64, + bool, unsigned *, + unsigned * = nullptr, bool = false); ++extern void vect_transform_back_slp_grouped_stores (bb_vec_info, stmt_vec_info); + extern bool vect_slp_analyze_operations (vec_info *); + extern void vect_schedule_slp (vec_info *, const vec<slp_instance> &); + extern opt_result vect_analyze_slp (vec_info *, unsigned); +-- +2.33.0 +
View file
_service:tar_scm:0099-LoongArch-testsuite-Give-up-the-detection-of-the-gcc.patch
Added
@@ -0,0 +1,80 @@ +From df18d0c85049402b8f2f44c3c4e013a0b6d91cee Mon Sep 17 00:00:00 2001 +From: chenxiaolong <chenxiaolong@loongson.cn> +Date: Fri, 5 Jan 2024 11:43:29 +0800 +Subject: PATCH 099/188 LoongArch: testsuite:Give up the detection of the + gcc.dg/fma-{3, 4, 6, 7}.c file. + +On the LoongArch architecture, the above four test cases need to be waived +during testing. There are two situations: + +1. The function of fma-{3,6}.c test is to find the value of c-a*b, but on +the LoongArch architecture, the function of the existing fnmsub instruction +is to find the value of -(a*b - c); + +2. The function of fma-{4,7}.c test is to find the value of -(a*b)-c, but on +the LoongArch architecture, the function of the existing fnmadd instruction +is to find the value of -(a*b + c); + +Through the analysis of the above two cases, there will be positive and +negative zero inequality. + +gcc/testsuite/ChangeLog + + * gcc.dg/fma-3.c: The intermediate file corresponding to the + function does not produce the corresponding FNMA symbol, so the test + rules should be skipped when testing. + * gcc.dg/fma-4.c: The intermediate file corresponding to the + function does not produce the corresponding FNMS symbol, so skip the + test rules when testing. + * gcc.dg/fma-6.c: The cause is the same as fma-3.c. + * gcc.dg/fma-7.c: The cause is the same as fma-4.c +--- + gcc/testsuite/gcc.dg/fma-3.c | 2 +- + gcc/testsuite/gcc.dg/fma-4.c | 2 +- + gcc/testsuite/gcc.dg/fma-6.c | 2 +- + gcc/testsuite/gcc.dg/fma-7.c | 2 +- + 4 files changed, 4 insertions(+), 4 deletions(-) + +diff --git a/gcc/testsuite/gcc.dg/fma-3.c b/gcc/testsuite/gcc.dg/fma-3.c +index 699aa2c95..6649b54b6 100644 +--- a/gcc/testsuite/gcc.dg/fma-3.c ++++ b/gcc/testsuite/gcc.dg/fma-3.c +@@ -12,4 +12,4 @@ f2 (double a, double b, double c) + return c - a * b; + } + +-/* { dg-final { scan-tree-dump-times { = \.FNMA \(} 2 "widening_mul" { target scalar_all_fma } } } */ ++/* { dg-final { scan-tree-dump-times { = \.FNMA \(} 2 "widening_mul" { target { scalar_all_fma && { ! loongarch*-*-* } } } } } */ +diff --git a/gcc/testsuite/gcc.dg/fma-4.c b/gcc/testsuite/gcc.dg/fma-4.c +index bff928f1f..f1701c196 100644 +--- a/gcc/testsuite/gcc.dg/fma-4.c ++++ b/gcc/testsuite/gcc.dg/fma-4.c +@@ -12,4 +12,4 @@ f2 (double a, double b, double c) + return -(a * b) - c; + } + +-/* { dg-final { scan-tree-dump-times { = \.FNMS \(} 2 "widening_mul" { target scalar_all_fma } } } */ ++/* { dg-final { scan-tree-dump-times { = \.FNMS \(} 2 "widening_mul" { target { scalar_all_fma && { ! loongarch*-*-* } } } } } */ +diff --git a/gcc/testsuite/gcc.dg/fma-6.c b/gcc/testsuite/gcc.dg/fma-6.c +index 87258cec4..9e49b62b6 100644 +--- a/gcc/testsuite/gcc.dg/fma-6.c ++++ b/gcc/testsuite/gcc.dg/fma-6.c +@@ -64,4 +64,4 @@ f10 (double a, double b, double c) + return -__builtin_fma (a, b, -c); + } + +-/* { dg-final { scan-tree-dump-times { = \.FNMA \(} 14 "optimized" { target scalar_all_fma } } } */ ++/* { dg-final { scan-tree-dump-times { = \.FNMA \(} 14 "optimized" { target { scalar_all_fma && { ! loongarch*-*-* } } } } } */ +diff --git a/gcc/testsuite/gcc.dg/fma-7.c b/gcc/testsuite/gcc.dg/fma-7.c +index f409cc8ee..86aacad7b 100644 +--- a/gcc/testsuite/gcc.dg/fma-7.c ++++ b/gcc/testsuite/gcc.dg/fma-7.c +@@ -64,4 +64,4 @@ f10 (double a, double b, double c) + return -__builtin_fma (a, b, c); + } + +-/* { dg-final { scan-tree-dump-times { = \.FNMS \(} 14 "optimized" { target scalar_all_fma } } } */ ++/* { dg-final { scan-tree-dump-times { = \.FNMS \(} 14 "optimized" { target { scalar_all_fma && { ! loongarch*-*-* } } } } } */ +-- +2.43.0 +
View file
_service:tar_scm:0099-aarch64-Explicitly-handle-frames-with-no-saved-registers.patch
Deleted
@@ -1,48 +0,0 @@ -From 03d5e89e7f3be53fd7142556e8e0a2774c653dca Mon Sep 17 00:00:00 2001 -From: Richard Sandiford <richard.sandiford@arm.com> -Date: Tue, 12 Sep 2023 16:08:49 +0100 -Subject: PATCH aarch64: Explicitly handle frames with no saved registers - -If a frame has no saved registers, it can be allocated in one go. -There is no need to treat the areas below and above the saved -registers as separate. - -And if we allocate the frame in one go, it should be allocated -as the initial_adjust rather than the final_adjust. This allows the -frame size to grow to guard_size - guard_used_by_caller before a stack -probe is needed. (A frame with no register saves is necessarily a -leaf frame.) - -This is a no-op as thing stand, since a leaf function will have -no outgoing arguments, and so all the frame will be above where -the saved registers normally go. - -gcc/ - * config/aarch64/aarch64.cc (aarch64_layout_frame): Explicitly - allocate the frame in one go if there are no saved registers. ---- - gcc/config/aarch64/aarch64.cc | 8 +++++--- - 1 file changed, 5 insertions(+), 3 deletions(-) - -diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc -index 79253322fd7c..e1f21230c15e 100644 ---- a/gcc/config/aarch64/aarch64.cc -+++ b/gcc/config/aarch64/aarch64.cc -@@ -8378,9 +8378,11 @@ aarch64_layout_frame (void) - - HOST_WIDE_INT const_size, const_outgoing_args_size, const_fp_offset; - HOST_WIDE_INT const_saved_regs_size; -- if (frame.frame_size.is_constant (&const_size) -- && const_size < max_push_offset -- && known_eq (frame.hard_fp_offset, const_size)) -+ if (known_eq (frame.saved_regs_size, 0)) -+ frame.initial_adjust = frame.frame_size; -+ else if (frame.frame_size.is_constant (&const_size) -+ && const_size < max_push_offset -+ && known_eq (frame.hard_fp_offset, const_size)) - { - /* Simple, small frame with no outgoing arguments: - --- -2.43.5 -
View file
_service:tar_scm:0100-Add-hip09-machine-discribtion.patch
Added
@@ -0,0 +1,882 @@ +From d9131757175667d35e74d9ee84689039990af768 Mon Sep 17 00:00:00 2001 +From: xingyushuai <xingyushuai@huawei.com> +Date: Fri, 3 Mar 2023 09:31:04 +0800 +Subject: PATCH 001/157 Add hip09 machine discribtion + +Here is the patch introducing hip09 machine model +for the scheduler. +--- + gcc/config/aarch64/aarch64-cores.def | 1 + + gcc/config/aarch64/aarch64-cost-tables.h | 104 +++++ + gcc/config/aarch64/aarch64-tune.md | 2 +- + gcc/config/aarch64/aarch64.cc | 109 +++++ + gcc/config/aarch64/aarch64.md | 1 + + gcc/config/aarch64/hip09.md | 558 +++++++++++++++++++++++ + 6 files changed, 774 insertions(+), 1 deletion(-) + create mode 100644 gcc/config/aarch64/hip09.md + +diff --git a/gcc/config/aarch64/aarch64-cores.def b/gcc/config/aarch64/aarch64-cores.def +index 70b11eb80..a854bdb24 100644 +--- a/gcc/config/aarch64/aarch64-cores.def ++++ b/gcc/config/aarch64/aarch64-cores.def +@@ -130,6 +130,7 @@ AARCH64_CORE("a64fx", a64fx, a64fx, 8_2A, AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_F + + /* HiSilicon ('H') cores. */ + AARCH64_CORE("tsv110", tsv110, tsv110, 8_2A, AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_CRYPTO | AARCH64_FL_F16 | AARCH64_FL_AES | AARCH64_FL_SHA2, tsv110, 0x48, 0xd01, -1) ++AARCH64_CORE("hip09", hip09, hip09, 8_5A, AARCH64_FL_FOR_ARCH8_5 | AARCH64_FL_SVE | AARCH64_FL_I8MM | AARCH64_FL_F32MM | AARCH64_FL_F64MM | AARCH64_FL_PROFILE | AARCH64_FL_PREDRES, hip09, 0x48, 0xd02, 0x0) + + /* ARMv8.3-A Architecture Processors. */ + +diff --git a/gcc/config/aarch64/aarch64-cost-tables.h b/gcc/config/aarch64/aarch64-cost-tables.h +index 48522606f..fc5a3cbe4 100644 +--- a/gcc/config/aarch64/aarch64-cost-tables.h ++++ b/gcc/config/aarch64/aarch64-cost-tables.h +@@ -668,6 +668,110 @@ const struct cpu_cost_table a64fx_extra_costs = + } + }; + ++const struct cpu_cost_table hip09_extra_costs = ++{ ++ /* ALU */ ++ { ++ 0, /* arith. */ ++ 0, /* logical. */ ++ 0, /* shift. */ ++ 0, /* shift_reg. */ ++ COSTS_N_INSNS (1), /* arith_shift. */ ++ COSTS_N_INSNS (1), /* arith_shift_reg. */ ++ COSTS_N_INSNS (1), /* log_shift. */ ++ COSTS_N_INSNS (1), /* log_shift_reg. */ ++ 0, /* extend. */ ++ COSTS_N_INSNS (1), /* extend_arith. */ ++ 0, /* bfi. */ ++ 0, /* bfx. */ ++ 0, /* clz. */ ++ 0, /* rev. */ ++ 0, /* non_exec. */ ++ true /* non_exec_costs_exec. */ ++ }, ++ ++ { ++ /* MULT SImode */ ++ { ++ COSTS_N_INSNS (2), /* simple. */ ++ COSTS_N_INSNS (2), /* flag_setting. */ ++ COSTS_N_INSNS (2), /* extend. */ ++ COSTS_N_INSNS (2), /* add. */ ++ COSTS_N_INSNS (2), /* extend_add. */ ++ COSTS_N_INSNS (11) /* idiv. */ ++ }, ++ /* MULT DImode */ ++ { ++ COSTS_N_INSNS (3), /* simple. */ ++ 0, /* flag_setting (N/A). */ ++ COSTS_N_INSNS (3), /* extend. */ ++ COSTS_N_INSNS (3), /* add. */ ++ COSTS_N_INSNS (3), /* extend_add. */ ++ COSTS_N_INSNS (19) /* idiv. */ ++ } ++ }, ++ /* LD/ST */ ++ { ++ COSTS_N_INSNS (3), /* load. */ ++ COSTS_N_INSNS (4), /* load_sign_extend. */ ++ COSTS_N_INSNS (3), /* ldrd. */ ++ COSTS_N_INSNS (3), /* ldm_1st. */ ++ 1, /* ldm_regs_per_insn_1st. */ ++ 2, /* ldm_regs_per_insn_subsequent. */ ++ COSTS_N_INSNS (4), /* loadf. */ ++ COSTS_N_INSNS (4), /* loadd. */ ++ COSTS_N_INSNS (4), /* load_unaligned. */ ++ 0, /* store. */ ++ 0, /* strd. */ ++ 0, /* stm_1st. */ ++ 1, /* stm_regs_per_insn_1st. */ ++ 2, /* stm_regs_per_insn_subsequent. */ ++ 0, /* storef. */ ++ 0, /* stored. */ ++ COSTS_N_INSNS (1), /* store_unaligned. */ ++ COSTS_N_INSNS (4), /* loadv. */ ++ COSTS_N_INSNS (4) /* storev. */ ++ }, ++ { ++ /* FP SFmode */ ++ { ++ COSTS_N_INSNS (10), /* div. */ ++ COSTS_N_INSNS (4), /* mult. */ ++ COSTS_N_INSNS (4), /* mult_addsub. */ ++ COSTS_N_INSNS (4), /* fma. */ ++ COSTS_N_INSNS (4), /* addsub. */ ++ COSTS_N_INSNS (1), /* fpconst. */ ++ COSTS_N_INSNS (1), /* neg. */ ++ COSTS_N_INSNS (1), /* compare. */ ++ COSTS_N_INSNS (2), /* widen. */ ++ COSTS_N_INSNS (2), /* narrow. */ ++ COSTS_N_INSNS (2), /* toint. */ ++ COSTS_N_INSNS (1), /* fromint. */ ++ COSTS_N_INSNS (2) /* roundint. */ ++ }, ++ /* FP DFmode */ ++ { ++ COSTS_N_INSNS (17), /* div. */ ++ COSTS_N_INSNS (4), /* mult. */ ++ COSTS_N_INSNS (6), /* mult_addsub. */ ++ COSTS_N_INSNS (6), /* fma. */ ++ COSTS_N_INSNS (3), /* addsub. */ ++ COSTS_N_INSNS (1), /* fpconst. */ ++ COSTS_N_INSNS (1), /* neg. */ ++ COSTS_N_INSNS (1), /* compare. */ ++ COSTS_N_INSNS (2), /* widen. */ ++ COSTS_N_INSNS (2), /* narrow. */ ++ COSTS_N_INSNS (2), /* toint. */ ++ COSTS_N_INSNS (1), /* fromint. */ ++ COSTS_N_INSNS (2) /* roundint. */ ++ } ++ }, ++ /* Vector */ ++ { ++ COSTS_N_INSNS (1) /* alu. */ ++ } ++}; ++ + const struct cpu_cost_table ampere1_extra_costs = + { + /* ALU */ +diff --git a/gcc/config/aarch64/aarch64-tune.md b/gcc/config/aarch64/aarch64-tune.md +index 9dc9adc70..238bb6e31 100644 +--- a/gcc/config/aarch64/aarch64-tune.md ++++ b/gcc/config/aarch64/aarch64-tune.md +@@ -1,5 +1,5 @@ + ;; -*- buffer-read-only: t -*- + ;; Generated automatically by gentune.sh from aarch64-cores.def + (define_attr "tune" +- "cortexa34,cortexa35,cortexa53,cortexa57,cortexa72,cortexa73,thunderx,thunderxt88p1,thunderxt88,octeontx,octeontxt81,octeontxt83,thunderxt81,thunderxt83,ampere1,ampere1a,emag,xgene1,falkor,qdf24xx,exynosm1,phecda,thunderx2t99p1,vulcan,thunderx2t99,cortexa55,cortexa75,cortexa76,cortexa76ae,cortexa77,cortexa78,cortexa78ae,cortexa78c,cortexa65,cortexa65ae,cortexx1,ares,neoversen1,neoversee1,octeontx2,octeontx2t98,octeontx2t96,octeontx2t93,octeontx2f95,octeontx2f95n,octeontx2f95mm,a64fx,tsv110,thunderx3t110,zeus,neoversev1,neoverse512tvb,saphira,cortexa57cortexa53,cortexa72cortexa53,cortexa73cortexa35,cortexa73cortexa53,cortexa75cortexa55,cortexa76cortexa55,cortexr82,cortexa510,cortexa710,cortexx2,neoversen2,demeter,neoversev2" ++ "cortexa34,cortexa35,cortexa53,cortexa57,cortexa72,cortexa73,thunderx,thunderxt88p1,thunderxt88,octeontx,octeontxt81,octeontxt83,thunderxt81,thunderxt83,ampere1,ampere1a,emag,xgene1,falkor,qdf24xx,exynosm1,phecda,thunderx2t99p1,vulcan,thunderx2t99,cortexa55,cortexa75,cortexa76,cortexa76ae,cortexa77,cortexa78,cortexa78ae,cortexa78c,cortexa65,cortexa65ae,cortexx1,ares,neoversen1,neoversee1,octeontx2,octeontx2t98,octeontx2t96,octeontx2t93,octeontx2f95,octeontx2f95n,octeontx2f95mm,a64fx,tsv110,hip09,thunderx3t110,zeus,neoversev1,neoverse512tvb,saphira,cortexa57cortexa53,cortexa72cortexa53,cortexa73cortexa35,cortexa73cortexa53,cortexa75cortexa55,cortexa76cortexa55,cortexr82,cortexa510,cortexa710,cortexx2,neoversen2,demeter,neoversev2" + (const (symbol_ref "((enum attr_tune) aarch64_tune)"))) +diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc +index 5537a537c..e9b3980c4 100644 +--- a/gcc/config/aarch64/aarch64.cc ++++ b/gcc/config/aarch64/aarch64.cc +@@ -465,6 +465,22 @@ static const struct cpu_addrcost_table tsv110_addrcost_table = + 0, /* imm_offset */ + }; + ++static const struct cpu_addrcost_table hip09_addrcost_table = ++{ ++ { ++ 1, /* hi */ ++ 0, /* si */ ++ 0, /* di */ ++ 1, /* ti */ ++ }, ++ 0, /* pre_modify */ ++ 0, /* post_modify */ ++ 0, /* register_offset */ ++ 1, /* register_sextend */ ++ 1, /* register_zextend */ ++ 0, /* imm_offset */ ++}; ++ + static const struct cpu_addrcost_table qdf24xx_addrcost_table = + { + { +@@ -660,6 +676,16 @@ static const struct cpu_regmove_cost a64fx_regmove_cost = + 2 /* FP2FP */ + }; + ++static const struct cpu_regmove_cost hip09_regmove_cost = ++{ ++ 1, /* GP2GP */ ++ /* Avoid the use of slow int<->fp moves for spilling by setting ++ their cost higher than memmov_cost. */ ++ 2, /* GP2FP */ ++ 3, /* FP2GP */ ++ 2 /* FP2FP */ ++}; ++ + static const struct cpu_regmove_cost neoversen2_regmove_cost = + { + 1, /* GP2GP */ +@@ -947,6 +973,43 @@ static const struct cpu_vector_cost tsv110_vector_cost = + nullptr /* issue_info */ + }; + ++static const advsimd_vec_cost hip09_advsimd_vector_cost = ++{ ++ 2, /* int_stmt_cost */ ++ 2, /* fp_stmt_cost */ ++ 0, /* ld2_st2_permute_cost */ ++ 0, /* ld3_st3_permute_cost */ ++ 0, /* ld4_st4_permute_cost */ ++ 2, /* permute_cost */ ++ 3, /* reduc_i8_cost */ ++ 3, /* reduc_i16_cost */ ++ 3, /* reduc_i32_cost */ ++ 3, /* reduc_i64_cost */ ++ 3, /* reduc_f16_cost */ ++ 3, /* reduc_f32_cost */ ++ 3, /* reduc_f64_cost */ ++ 3, /* store_elt_extra_cost */ ++ 3, /* vec_to_scalar_cost */ ++ 2, /* scalar_to_vec_cost */ ++ 5, /* align_load_cost */ ++ 5, /* unalign_load_cost */ ++ 1, /* unalign_store_cost */ ++ 1 /* store_cost */ ++}; ++ ++static const struct cpu_vector_cost hip09_vector_cost = ++{ ++ 1, /* scalar_int_stmt_cost */ ++ 1, /* scalar_fp_stmt_cost */ ++ 5, /* scalar_load_cost */ ++ 1, /* scalar_store_cost */ ++ 1, /* cond_taken_branch_cost */ ++ 1, /* cond_not_taken_branch_cost */ ++ &hip09_advsimd_vector_cost, /* advsimd */ ++ nullptr, /* sve */ ++ nullptr /* issue_info */ ++}; ++ + static const advsimd_vec_cost cortexa57_advsimd_vector_cost = + { + 2, /* int_stmt_cost */ +@@ -1293,6 +1356,18 @@ static const cpu_prefetch_tune tsv110_prefetch_tune = + -1 /* default_opt_level */ + }; + ++ ++static const cpu_prefetch_tune hip09_prefetch_tune = ++{ ++ 0, /* num_slots */ ++ 64, /* l1_cache_size */ ++ 64, /* l1_cache_line_size */ ++ 512, /* l2_cache_size */ ++ true, /* prefetch_dynamic_strides */ ++ -1, /* minimum_stride */ ++ -1 /* default_opt_level */ ++}; ++ + static const cpu_prefetch_tune xgene1_prefetch_tune = + { + 8, /* num_slots */ +@@ -1658,6 +1733,40 @@ static const struct tune_params tsv110_tunings = + &tsv110_prefetch_tune + }; + ++static const struct tune_params hip09_tunings = ++{ ++ &hip09_extra_costs, ++ &hip09_addrcost_table, ++ &hip09_regmove_cost, ++ &hip09_vector_cost, ++ &generic_branch_cost, ++ &generic_approx_modes, ++ SVE_256, /* sve_width */ ++ { 4, /* load_int. */ ++ 4, /* store_int. */ ++ 4, /* load_fp. */ ++ 4, /* store_fp. */ ++ 4, /* load_pred. */ ++ 4 /* store_pred. */ ++ }, /* memmov_cost. */ ++ 4, /* issue_rate */ ++ (AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_ALU_BRANCH ++ | AARCH64_FUSE_ALU_CBZ), /* fusible_ops */ ++ "16", /* function_align. */ ++ "4", /* jump_align. */ ++ "8", /* loop_align. */ ++ 2, /* int_reassoc_width. */ ++ 4, /* fp_reassoc_width. */ ++ 1, /* vec_reassoc_width. */ ++ 2, /* min_div_recip_mul_sf. */ ++ 2, /* min_div_recip_mul_df. */ ++ 0, /* max_case_values. */ ++ tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */ ++ (AARCH64_EXTRA_TUNE_USE_NEW_VECTOR_COSTS ++ | AARCH64_EXTRA_TUNE_MATCHED_VECTOR_THROUGHPUT), /* tune_flags. */ ++ &hip09_prefetch_tune ++}; ++ + static const struct tune_params xgene1_tunings = + { + &xgene1_extra_costs, +diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md +index d24c8afcf..cf699e4c7 100644 +--- a/gcc/config/aarch64/aarch64.md ++++ b/gcc/config/aarch64/aarch64.md +@@ -477,6 +477,7 @@ + (include "thunderx2t99.md") + (include "tsv110.md") + (include "thunderx3t110.md") ++(include "hip09.md") + + ;; ------------------------------------------------------------------- + ;; Jumps and other miscellaneous insns +diff --git a/gcc/config/aarch64/hip09.md b/gcc/config/aarch64/hip09.md +new file mode 100644 +index 000000000..25428de9a +--- /dev/null ++++ b/gcc/config/aarch64/hip09.md +@@ -0,0 +1,558 @@ ++;; hip09 pipeline description ++;; Copyright (C) 2023 Free Software Foundation, Inc. ++;; ++;;Contributed by Yushuai Xing ++;; ++;; This file is part of GCC. ++;; ++;; GCC is free software; you can redistribute it and/or modify it ++;; under the terms of the GNU General Public License as published by ++;; the Free Software Foundation; either version 3, or (at your option) ++;; any later version. ++;; ++;; GCC is distributed in the hope that it will be useful, but ++;; WITHOUT ANY WARRANTY; without even the implied warranty of ++;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++;; General Public License for more details. ++;; ++;; You should have received a copy of the GNU General Public License ++;; along with GCC; see the file COPYING3. If not see ++;; <http://www.gnu.org/licenses/>. ++ ++(define_automaton "hip09") ++(define_automaton "hip09_ldst") ++(define_automaton "hip09_fsu") ++ ++(define_attr "hip09_type" ++ "hip09_neon_abs, hip09_neon_fp_arith, hip09_neon_mul, hip09_neon_mla, ++ hip09_neon_dot, hip09_neon_fp_div, hip09_neon_fp_sqrt, ++ hip09_neon_ins, hip09_neon_load1, hip09_neon_load1_lanes, ++ hip09_neon_load2and4, hip09_neon_load3_3reg, ++ hip09_neon_load4_4reg, hip09_neon_store1and2, ++ hip09_neon_store1_1reg, hip09_neon_store1_2reg, ++ hip09_neon_store1_3reg, hip09_neon_store1_4reg, ++ hip09_neon_store3and4_lane, hip09_neon_store3_3reg, ++ hip09_neon_store4_4reg, unknown" ++ (cond ++ (eq_attr "type" "neon_abs,neon_abs_q,neon_add,neon_add_q,\ ++ neon_neg,neon_neg_q,neon_sub,neon_sub_q,neon_add_widen,\ ++ neon_sub_widen,neon_qadd,neon_qadd_q,\ ++ neon_add_long,neon_sub_long,\ ++ neon_qabs,neon_qabs_q,neon_qneg,\ ++ neon_qneg_q,neon_qsub,neon_qsub_q,neon_compare,\ ++ neon_compare_q,neon_compare_zero,\ ++ neon_compare_zero_q,neon_logic,neon_logic_q,\ ++ neon_minmax,neon_minmax_q,neon_tst,\ ++ neon_tst_q,neon_bsl,neon_bsl_q,\ ++ neon_cls,neon_cls_q,neon_ext,\ ++ neon_ext_q,neon_rev,neon_rev_q,\ ++ neon_tbl1,neon_tbl1_q,neon_fp_abs_s,\ ++ neon_fp_abs_s_q,neon_fp_abs_d,\ ++ neon_fp_neg_s,neon_fp_neg_s_q,\ ++ neon_fp_neg_d,neon_fp_neg_d_q,\ ++ neon_shift_imm_narrow_q,neon_move,neon_move_q") ++ (const_string "hip09_neon_abs") ++ (eq_attr "type" "neon_abd,neon_abd_q,\ ++ neon_arith_acc,neon_arith_acc_q,\ ++ neon_add_halve,neon_add_halve_q,\ ++ neon_sub_halve,neon_sub_halve_q,\ ++ neon_add_halve_narrow_q,\ ++ neon_sub_halve_narrow_q,neon_reduc_add,\ ++ neon_reduc_add_q,\ ++ neon_sat_mul_b,neon_sat_mul_b_q,\ ++ neon_sat_mul_b_long,neon_mul_b,neon_mul_b_q,\ ++ neon_mul_b_long,neon_mla_b,neon_mla_b_q,\ ++ neon_mla_b_long,neon_sat_mla_b_long,\ ++ neon_sat_shift_imm,\ ++ neon_sat_shift_imm_q,neon_shift_imm_long,\ ++ neon_shift_imm,neon_shift_imm_q,neon_cnt,\ ++ neon_cnt_q,neon_fp_recpe_s,neon_fp_recpe_s_q,\ ++ neon_fp_recpe_d,neon_fp_recpe_d_q,\ ++ neon_fp_rsqrte_s,neon_fp_rsqrte_s_q,\ ++ neon_fp_rsqrte_d,neon_fp_rsqrte_d_q,\ ++ neon_fp_recpx_s,neon_fp_recpx_s_q,\ ++ neon_fp_recpx_d,neon_fp_recpx_d_q,\ ++ neon_tbl2,neon_tbl2_q,neon_to_gp,\ ++ neon_to_gp_q,neon_fp_abd_s,neon_fp_abd_s_q,\ ++ neon_fp_abd_d,neon_fp_abd_d_q,\ ++ neon_fp_addsub_s,neon_fp_addsub_s_q,\ ++ neon_fp_addsub_d,neon_fp_addsub_d_q,\ ++ neon_fp_compare_s,neon_fp_compare_s_q,\ ++ neon_fp_compare_d,neon_fp_compare_d_q,\ ++ neon_fp_cvt_widen_s,neon_fp_to_int_s,\ ++ neon_fp_to_int_s_q,neon_fp_to_int_d,\ ++ neon_fp_to_int_d_q,neon_fp_minmax_s,\ ++ neon_fp_minmax_s_q,neon_fp_minmax_d,\ ++ neon_fp_minmax_d_q,neon_fp_round_s,\ ++ neon_fp_round_s_q,neon_fp_cvt_narrow_d_q,\ ++ neon_fp_round_d,neon_fp_round_d_q,\ ++ neon_fp_cvt_narrow_s_q") ++ (const_string "hip09_neon_fp_arith") ++ (eq_attr "type" "neon_sat_mul_h,neon_sat_mul_h_q,\ ++ neon_sat_mul_s,neon_sat_mul_s_q,\ ++ neon_sat_mul_h_scalar,neon_sat_mul_s_scalar,\ ++ neon_sat_mul_h_scalar_q,neon_sat_mul_h_long,\ ++ neon_sat_mul_s_long,neon_sat_mul_h_scalar_long,\ ++ neon_sat_mul_s_scalar_long,neon_mul_h,neon_mul_h_q,\ ++ neon_mul_s,neon_mul_s_q,neon_mul_h_long,\ ++ neon_mul_s_long,neon_mul_h_scalar_long,\ ++ neon_mul_s_scalar_long,neon_mla_h,neon_mla_h_q,\ ++ neon_mla_s,neon_mla_h_scalar,\ ++ neon_mla_h_scalar_q,neon_mla_s_scalar,\ ++ neon_mla_h_long,\ ++ neon_mla_s_long,neon_sat_mla_h_long,\ ++ neon_sat_mla_s_long,neon_sat_mla_h_scalar_long,\ ++ neon_sat_mla_s_scalar_long,neon_mla_s_scalar_long,\ ++ neon_mla_h_scalar_long,neon_mla_s_scalar_q,\ ++ neon_shift_acc,neon_shift_acc_q,neon_shift_reg,\ ++ neon_shift_reg_q,neon_sat_shift_reg,\ ++ neon_sat_shift_reg_q,neon_sat_shift_imm_narrow_q,\ ++ neon_tbl3,neon_tbl3_q,neon_fp_reduc_add_s,\ ++ neon_fp_reduc_add_s_q,neon_fp_reduc_add_d,\ ++ neon_fp_reduc_add_d_q,neon_fp_reduc_minmax_s,\ ++ neon_fp_reduc_minmax_d,neon_fp_reduc_minmax_s_q,\ ++ neon_fp_reduc_minmax_d_q,\ ++ neon_fp_mul_s_q,\ ++ neon_fp_mul_d,neon_fp_mul_d_q,\ ++ neon_fp_mul_d_scalar_q,neon_fp_mul_s_scalar,\ ++ neon_fp_mul_s_scalar_q") ++ (const_string "hip09_neon_mul") ++ (eq_attr "type" "neon_mla_s_q,neon_reduc_minmax,\ ++ neon_reduc_minmax_q,neon_fp_recps_s,\ ++ neon_fp_recps_s_q,neon_fp_recps_d,\ ++ neon_fp_recps_d_q,neon_tbl4,neon_tbl4_q,\ ++ neon_fp_mla_s,\ ++ neon_fp_mla_d,neon_fp_mla_d_q,\ ++ neon_fp_mla_s_scalar,neon_fp_mla_s_scalar_q,\ ++ neon_fp_mla_d_scalar_q") ++ (const_string "hip09_neon_mla") ++ (eq_attr "type" "neon_dot,neon_dot_q") ++ (const_string "hip09_neon_dot") ++ (eq_attr "type" "neon_fp_div_s,neon_fp_div_s_q,\ ++ neon_fp_div_d,neon_fp_div_d_q") ++ (const_string "hip09_neon_fp_div") ++ (eq_attr "type" "neon_fp_sqrt_s,neon_fp_sqrt_s_q,\ ++ neon_fp_sqrt_d,neon_fp_sqrt_d_q") ++ (const_string "hip09_neon_fp_sqrt") ++ (eq_attr "type" "neon_dup,neon_dup_q,\ ++ neon_ins,neon_ins_q") ++ (const_string "hip09_neon_ins") ++ (eq_attr "type" "neon_load1_1reg,neon_load1_1reg_q,\ ++ neon_load1_2reg,neon_load1_2reg_q,\ ++ neon_load1_3reg,neon_load1_3reg_q,\ ++ neon_load1_4reg,neon_load1_4reg_q") ++ (const_string "hip09_neon_load1") ++ (eq_attr "type" "neon_load1_one_lane,\ ++ neon_load1_one_lane_q,\ ++ neon_load1_all_lanes,neon_load1_all_lanes_q") ++ (const_string "hip09_neon_load1_lanes") ++ (eq_attr "type" "neon_load2_all_lanes,\ ++ neon_load2_all_lanes_q,\ ++ neon_load2_one_lane,neon_load2_2reg,\ ++ neon_load2_2reg_q,neon_load3_one_lane,\ ++ neon_load3_all_lanes,neon_load3_all_lanes_q,\ ++ neon_load4_one_lane,neon_load4_all_lanes,\ ++ neon_load4_all_lanes_q") ++ (const_string "hip09_neon_load2and4") ++ (eq_attr "type" "neon_load3_3reg,neon_load3_3reg_q") ++ (const_string "hip09_neon_load3_3reg") ++ (eq_attr "type" "neon_load4_4reg,neon_load4_4reg_q") ++ (const_string "hip09_neon_load4_4reg") ++ (eq_attr "type" "neon_store1_one_lane,\ ++ neon_store1_one_lane_q,neon_store2_one_lane,\ ++ neon_store2_one_lane_q,neon_store2_2reg,\ ++ neon_store2_2reg_q") ++ (const_string "hip09_neon_store1and2") ++ (eq_attr "type" "neon_store1_1reg,neon_store1_1reg_q") ++ (const_string "hip09_neon_store1_1reg") ++ (eq_attr "type" "neon_store1_2reg,neon_store1_2reg_q") ++ (const_string "hip09_neon_store1_2reg") ++ (eq_attr "type" "neon_store1_3reg,neon_store1_3reg_q") ++ (const_string "hip09_neon_store1_3reg") ++ (eq_attr "type" "neon_store1_4reg,neon_store1_4reg_q") ++ (const_string "hip09_neon_store1_4reg") ++ (eq_attr "type" "neon_store3_one_lane,\ ++ neon_store3_one_lane_q,neon_store4_one_lane,\ ++ neon_store4_one_lane_q") ++ (const_string "hip09_neon_store3and4_lane") ++ (eq_attr "type" "neon_store3_3reg,\ ++ neon_store3_3reg_q") ++ (const_string "hip09_neon_store3_3reg") ++ (eq_attr "type" "neon_store4_4reg,\ ++ neon_store4_4reg_q") ++ (const_string "hip09_neon_store4_4reg") ++ (const_string "unknown"))) ++ ++; The hip09 core is modelled as issues pipeline that has ++; the following functional units. ++; 1. Two pipelines for branch micro operations: BRU1, BRU2 ++ ++(define_cpu_unit "hip09_bru0" "hip09") ++(define_cpu_unit "hip09_bru1" "hip09") ++ ++(define_reservation "hip09_bru01" "hip09_bru0|hip09_bru1") ++ ++; 2. Four pipelines for single cycle integer micro operations: ALUs1, ALUs2, ALUs3, ALUs4 ++ ++(define_cpu_unit "hip09_alus0" "hip09") ++(define_cpu_unit "hip09_alus1" "hip09") ++(define_cpu_unit "hip09_alus2" "hip09") ++(define_cpu_unit "hip09_alus3" "hip09") ++ ++(define_reservation "hip09_alus0123" "hip09_alus0|hip09_alus1|hip09_alus2|hip09_alus3") ++(define_reservation "hip09_alus01" "hip09_alus0|hip09_alus1") ++(define_reservation "hip09_alus23" "hip09_alus2|hip09_alus3") ++ ++; 3. Two pipelines for multi cycles integer micro operations: ALUm1, ALUm2 ++ ++(define_cpu_unit "hip09_alum0" "hip09") ++(define_cpu_unit "hip09_alum1" "hip09") ++ ++(define_reservation "hip09_alum01" "hip09_alum0|hip09_alum1") ++ ++; 4. Two pipelines for load micro opetations: Load1, Load2 ++ ++(define_cpu_unit "hip09_load0" "hip09_ldst") ++(define_cpu_unit "hip09_load1" "hip09_ldst") ++ ++(define_reservation "hip09_ld01" "hip09_load0|hip09_load1") ++ ++; 5. Two pipelines for store micro operations: Store1, Store2 ++ ++(define_cpu_unit "hip09_store0" "hip09_ldst") ++(define_cpu_unit "hip09_store1" "hip09_ldst") ++ ++(define_reservation "hip09_st01" "hip09_store0|hip09_store1") ++ ++; 6. Two pipelines for store data micro operations: STD0,STD1 ++ ++(define_cpu_unit "hip09_store_data0" "hip09_ldst") ++(define_cpu_unit "hip09_store_data1" "hip09_ldst") ++ ++(define_reservation "hip09_std01" "hip09_store_data0|hip09_store_data1") ++ ++; 7. Four asymmetric pipelines for Asimd and FP micro operations: FSU1, FSU2, FSU3, FSU4 ++ ++(define_cpu_unit "hip09_fsu0" "hip09_fsu") ++(define_cpu_unit "hip09_fsu1" "hip09_fsu") ++(define_cpu_unit "hip09_fsu2" "hip09_fsu") ++(define_cpu_unit "hip09_fsu3" "hip09_fsu") ++ ++(define_reservation "hip09_fsu0123" "hip09_fsu0|hip09_fsu1|hip09_fsu2|hip09_fsu3") ++(define_reservation "hip09_fsu02" "hip09_fsu0|hip09_fsu2") ++ ++ ++; 8. Two pipelines for sve operations but same with fsu1 and fsu3: SVE1, SVE2 ++ ++;; Simple Execution Unit: ++; ++;; Simple ALU without shift ++(define_insn_reservation "hip09_alu" 1 ++ (and (eq_attr "tune" "hip09") ++ (eq_attr "type" "alu_imm,logic_imm,\ ++ adc_imm,adc_reg,\ ++ alu_sreg,logic_reg,\ ++ mov_imm,mov_reg,\ ++ csel,rotate_imm,bfm,mov_imm,\ ++ clz,rbit,rev")) ++ "hip09_alus0123") ++ ++(define_insn_reservation "hip09_alus" 1 ++ (and (eq_attr "tune" "hip09") ++ (eq_attr "type" "alus_sreg,alus_imm,\ ++ adcs_reg,adcs_imm,\ ++ logics_imm,logics_reg,adr")) ++ "hip09_alus23") ++ ++;; ALU ops with shift and extend ++(define_insn_reservation "hip09_alu_ext_shift" 2 ++ (and (eq_attr "tune" "hip09") ++ (eq_attr "type" "alu_ext,alus_ext,\ ++ logics_shift_imm,logics_shift_reg,\ ++ logic_shift_reg,logic_shift_imm,\ ++ ")) ++ "hip09_alum01") ++ ++;; Multiplies instructions ++(define_insn_reservation "hip09_mult" 3 ++ (and (eq_attr "tune" "hip09") ++ (ior (eq_attr "mul32" "yes") ++ (eq_attr "widen_mul64" "yes"))) ++ "hip09_alum01") ++ ++;; Integer divide ++(define_insn_reservation "hip09_div" 10 ++ (and (eq_attr "tune" "hip09") ++ (eq_attr "type" "udiv,sdiv")) ++ "hip09_alum0") ++ ++;; Branch execution Unit ++; ++; Branches take two issue slot. ++; No latency as there is no result ++(define_insn_reservation "hip09_branch" 2 ++ (and (eq_attr "tune" "hip09") ++ (eq_attr "type" "branch,call")) ++ "hip09_bru01 + hip09_alus23") ++ ++;; Load execution Unit ++; ++; Loads of up to two words. ++(define_insn_reservation "hip09_load1" 4 ++ (and (eq_attr "tune" "hip09") ++ (eq_attr "type" "load_4,load_8")) ++ "hip09_ld01") ++ ++; Stores of up to two words. ++(define_insn_reservation "hip09_store1" 1 ++ (and (eq_attr "tune" "hip09") ++ (eq_attr "type" "store_4,store_8")) ++ "hip09_st01") ++ ++;; FP data processing instructions. ++ ++(define_insn_reservation "hip09_fp_arith" 1 ++ (and (eq_attr "tune" "hip09") ++ (eq_attr "type" "ffariths,ffarithd,fmov,fconsts,fconstd,\ ++ f_mrc")) ++ "hip09_fsu0123") ++ ++(define_insn_reservation "hip09_fp_cmp" 4 ++ (and (eq_attr "tune" "hip09") ++ (eq_attr "type" "fcmps,fcmpd")) ++ "hip09_fsu0123+hip09_alus23") ++ ++(define_insn_reservation "hip09_fp_ccmp" 7 ++ (and (eq_attr "tune" "hip09") ++ (eq_attr "type" "fccmps,fccmpd")) ++ "hip09_alus01+hip09_fsu0123+hip09_alus23") ++ ++(define_insn_reservation "hip09_fp_csel" 4 ++ (and (eq_attr "tune" "hip09") ++ (eq_attr "type" "fcsel,f_mcr")) ++ "hip09_alus01+hip09_fsu0123") ++ ++(define_insn_reservation "hip09_fp_divs" 7 ++ (and (eq_attr "tune" "hip09") ++ (eq_attr "type" "fdivs")) ++ "hip09_fsu0123") ++ ++(define_insn_reservation "hip09_fp_divd" 10 ++ (and (eq_attr "tune" "hip09") ++ (eq_attr "type" "fdivd")) ++ "hip09_fsu0123") ++ ++(define_insn_reservation "hip09_fp_sqrts" 9 ++ (and (eq_attr "tune" "hip09") ++ (eq_attr "type" "fsqrts")) ++ "hip09_fsu0123") ++ ++(define_insn_reservation "hip09_fp_sqrtd" 15 ++ (and (eq_attr "tune" "hip09") ++ (eq_attr "type" "fsqrtd")) ++ "hip09_fsu0123") ++ ++(define_insn_reservation "hip09_fp_mul" 3 ++ (and (eq_attr "tune" "hip09") ++ (eq_attr "type" "fmuls,fmuld")) ++ "hip09_fsu0123") ++ ++(define_insn_reservation "hip09_fp_add" 2 ++ (and (eq_attr "tune" "hip09") ++ (eq_attr "type" "fadds,faddd,f_minmaxs,f_minmaxd,f_cvt,\ ++ f_rints,f_rintd")) ++ "hip09_fsu0123") ++ ++(define_insn_reservation "hip09_fp_mac" 4 ++ (and (eq_attr "tune" "hip09") ++ (eq_attr "type" "fmacs,fmacd")) ++ "hip09_fsu0123") ++ ++;; FP miscellaneous instructions. ++ ++(define_insn_reservation "hip09_fp_cvt" 5 ++ (and (eq_attr "tune" "hip09") ++ (eq_attr "type" "f_cvtf2i")) ++ "hip09_fsu0123+hip09_alus23") ++ ++(define_insn_reservation "hip09_fp_cvt2" 5 ++ (and (eq_attr "tune" "hip09") ++ (eq_attr "type" "f_cvti2f")) ++ "hip09_alus01+hip09_fsu0123") ++ ++;; FP Load Instructions ++ ++(define_insn_reservation "hip09_fp_load" 7 ++ (and (eq_attr "tune" "hip09") ++ (eq_attr "type" "f_loads,f_loadd")) ++ "hip09_ld01") ++ ++(define_insn_reservation "hip09_fp_load2" 6 ++ (and (eq_attr "tune" "hip09") ++ (eq_attr "type" "neon_ldp_q,neon_ldp")) ++ "hip09_ld01+hip09_alus01") ++ ++;; FP store instructions ++ ++(define_insn_reservation "hip09_fp_store" 2 ++ (and (eq_attr "tune" "hip09") ++ (eq_attr "type" "f_stores,f_stored")) ++ "hip09_st01+hip09_std01") ++ ++;; ASIMD integer instructions ++ ++(define_insn_reservation "hip09_asimd_base1" 1 ++ (and (eq_attr "tune" "hip09") ++ (eq_attr "hip09_type" "hip09_neon_abs")) ++ "hip09_fsu0123") ++ ++(define_insn_reservation "hip09_asimd_base2" 2 ++ (and (eq_attr "tune" "hip09") ++ (eq_attr "hip09_type" "hip09_neon_fp_arith")) ++ "hip09_fsu0123") ++ ++(define_insn_reservation "hip09_asimd_base3" 3 ++ (and (eq_attr "tune" "hip09") ++ (eq_attr "hip09_type" "hip09_neon_mul")) ++ "hip09_fsu0123") ++ ++(define_insn_reservation "hip09_asimd_base4" 4 ++ (and (eq_attr "tune" "hip09") ++ (eq_attr "hip09_type" "hip09_neon_mla")) ++ "hip09_fsu0123") ++ ++(define_insn_reservation "hip09_asimd_base5" 5 ++ (and (eq_attr "tune" "hip09") ++ (eq_attr "type" "neon_fp_mul_s")) ++ "hip09_fsu0123") ++ ++(define_insn_reservation "hip09_asimd_dot" 6 ++ (and (eq_attr "tune" "hip09") ++ (eq_attr "hip09_type" "hip09_neon_dot")) ++ "hip09_fsu0123") ++ ++(define_insn_reservation "hip09_asimd_bfmmla" 9 ++ (and (eq_attr "tune" "hip09") ++ (eq_attr "type" "neon_fp_mla_s_q")) ++ "hip09_fsu0123") ++ ++(define_insn_reservation "hip09_asimd_fdiv" 15 ++ (and (eq_attr "tune" "hip09") ++ (eq_attr "hip09_type" "hip09_neon_fp_div")) ++ "hip09_fsu0123") ++ ++(define_insn_reservation "hip09_asimd_fsqrt" 25 ++ (and (eq_attr "tune" "hip09") ++ (eq_attr "hip09_type" "hip09_neon_fp_sqrt")) ++ "hip09_fsu0123") ++ ++(define_insn_reservation "hip09_asimd_pmull" 2 ++ (and (eq_attr "tune" "hip09") ++ (eq_attr "type" "crypto_pmull")) ++ "hip09_fsu2") ++ ++(define_insn_reservation "hip09_asimd_dup" 4 ++ (and (eq_attr "tune" "hip09") ++ (eq_attr "hip09_type" "hip09_neon_ins")) ++ "hip09_alus01+hip09_fsu0123") ++ ++;; ASIMD load instructions ++ ++(define_insn_reservation "hip09_asimd_ld1_reg" 6 ++ (and (eq_attr "tune" "hip09") ++ (eq_attr "hip09_type" "hip09_neon_load1")) ++ "hip09_ld01") ++ ++(define_insn_reservation "hip09_asimd_ld1_lane" 7 ++ (and (eq_attr "tune" "hip09") ++ (eq_attr "hip09_type" "hip09_neon_load1_lanes")) ++ "hip09_ld01+hip09_fsu0123") ++ ++(define_insn_reservation "hip09_asimd_ld23" 8 ++ (and (eq_attr "tune" "hip09") ++ (eq_attr "hip09_type" "hip09_neon_load2and4")) ++"hip09_ld01+hip09_fsu0123") ++ ++(define_insn_reservation "hip09_asimd_ld3_mtp" 9 ++ (and (eq_attr "tune" "hip09") ++ (eq_attr "hip09_type" "hip09_neon_load3_3reg")) ++ "hip09_ld01+hip09_fsu0123") ++ ++(define_insn_reservation "hip09_asimd_ld4_mtp" 13 ++ (and (eq_attr "tune" "hip09") ++ (eq_attr "hip09_type" "hip09_neon_load4_4reg")) ++ "hip09_ld01+hip09_fsu0123") ++ ++;; ASIMD store instructions ++ ++(define_insn_reservation "hip09_asimd_st12" 1 ++ (and (eq_attr "tune" "hip09") ++ (eq_attr "hip09_type" "hip09_neon_store1and2")) ++ "hip09_st01+hip09_std01") ++ ++(define_insn_reservation "hip09_asimd_st1_1reg" 2 ++ (and (eq_attr "tune" "hip09") ++ (eq_attr "hip09_type" "hip09_neon_store1_1reg")) ++ "hip09_st01+hip09_std01") ++ ++(define_insn_reservation "hip09_asimd_st1_2reg" 3 ++ (and (eq_attr "tune" "hip09") ++ (eq_attr "hip09_type" "hip09_neon_store1_2reg")) ++ "hip09_st01+hip09_std01") ++ ++(define_insn_reservation "hip09_asimd_st1_3reg" 4 ++ (and (eq_attr "tune" "hip09") ++ (eq_attr "hip09_type" "hip09_neon_store1_3reg")) ++ "hip09_st01+hip09_std01") ++ ++(define_insn_reservation "hip09_asimd_st1_4reg" 5 ++ (and (eq_attr "tune" "hip09") ++ (eq_attr "hip09_type" "hip09_neon_store1_4reg")) ++ "hip09_st01+hip09_std01") ++ ++(define_insn_reservation "hip09_asimd_st34_lane" 4 ++ (and (eq_attr "tune" "hip09") ++ (eq_attr "hip09_type" "hip09_neon_store3and4_lane")) ++ "hip09_fsu0123+hip09_st01+hip09_std01") ++ ++(define_insn_reservation "hip09_asimd_st3_mtp" 7 ++ (and (eq_attr "tune" "hip09") ++ (eq_attr "hip09_type" "hip09_neon_store3_3reg")) ++ "hip09_fsu0123+hip09_st01+hip09_std01") ++ ++(define_insn_reservation "hip09_asimd_st4_mtp" 10 ++ (and (eq_attr "tune" "hip09") ++ (eq_attr "hip09_type" "hip09_neon_store4_4reg")) ++ "hip09_fsu0123+hip09_st01+hip09_std01") ++ ++;; Cryptography extensions ++ ++(define_insn_reservation "hip09_asimd_aes" 2 ++ (and (eq_attr "tune" "hip09") ++ (eq_attr "type" "crypto_aese,crypto_aesmc")) ++ "hip09_fsu02") ++ ++(define_insn_reservation "hip09_asimd_sha3" 1 ++ (and (eq_attr "tune" "hip09") ++ (eq_attr "type" "crypto_sha3")) ++ "hip09_fsu2") ++ ++(define_insn_reservation "hip09_asimd_sha1" 2 ++ (and (eq_attr "tune" "hip09") ++ (eq_attr "type" "crypto_sha1_fast,crypto_sha1_xor,\ ++ crypto_sha256_fast,crypto_sha512,\ ++ crypto_sm3")) ++ "hip09_fsu2") ++ ++(define_insn_reservation "hip09_asimd_sha1_and256" 4 ++ (and (eq_attr "tune" "hip09") ++ (eq_attr "type" "crypto_sha1_slow,crypto_sha256_slow,\ ++ crypto_sm4")) ++ "hip09_fsu2") ++ ++;; CRC extension. ++ ++(define_insn_reservation "hip09_crc" 2 ++ (and (eq_attr "tune" "hip09") ++ (eq_attr "type" "crc")) ++ "hip09_alum01") +-- +2.33.0 +
View file
_service:tar_scm:0100-LoongArch-Fixed-the-problem-of-incorrect-judgment-of.patch
Added
@@ -0,0 +1,206 @@ +From 90db6906a92b685403d9220e94f779737d2dd100 Mon Sep 17 00:00:00 2001 +From: Lulu Cheng <chenglulu@loongson.cn> +Date: Thu, 4 Jan 2024 10:37:53 +0800 +Subject: PATCH 100/188 LoongArch: Fixed the problem of incorrect judgment of + the immediate field of the xvld/xvst instruction. + +The xvld/xvst directive is defined as follows: + xvld/xvst {x/v}d, rj, si12 + +When not modified, the immediate field of xvld/xvst is between 10 and +14 bits depending on the type. However, in loongarch_valid_offset_p, the +immediate field is restricted first, so there is no error. However, in +some cases redundant instructions will be generated, see test cases. +Now modify it according to the description in the instruction manual. + +gcc/ChangeLog: + + * config/loongarch/lasx.md (lasx_mxld_<lasxfmt_f>): + Modify the method of determining the memory offset of xvld/xvst. + (lasx_mxst_<lasxfmt_f>): Likewise. + * config/loongarch/loongarch.cc (loongarch_valid_offset_p): Delete. + (loongarch_address_insns): Likewise. + * config/loongarch/lsx.md (lsx_ld_<lsxfmt_f>): Likewise. + (lsx_st_<lsxfmt_f>): Likewise. + * config/loongarch/predicates.md (aq10b_operand): Likewise. + (aq10h_operand): Likewise. + (aq10w_operand): Likewise. + (aq10d_operand): Likewise. + +gcc/testsuite/ChangeLog: + + * gcc.target/loongarch/vect-ld-st-imm12.c: New test. +--- + gcc/config/loongarch/lasx.md | 26 ------------------- + gcc/config/loongarch/loongarch.cc | 19 +++----------- + gcc/config/loongarch/lsx.md | 26 ------------------- + gcc/config/loongarch/predicates.md | 16 ------------ + .../gcc.target/loongarch/vect-ld-st-imm12.c | 15 +++++++++++ + 5 files changed, 19 insertions(+), 83 deletions(-) + create mode 100644 gcc/testsuite/gcc.target/loongarch/vect-ld-st-imm12.c + +diff --git a/gcc/config/loongarch/lasx.md b/gcc/config/loongarch/lasx.md +index dbbf5a136..95c6bae20 100644 +--- a/gcc/config/loongarch/lasx.md ++++ b/gcc/config/loongarch/lasx.md +@@ -846,32 +846,6 @@ + DONE; + }) + +-;; Offset load +-(define_expand "lasx_mxld_<lasxfmt_f>" +- (match_operand:LASX 0 "register_operand") +- (match_operand 1 "pmode_register_operand") +- (match_operand 2 "aq10<lasxfmt>_operand") +- "ISA_HAS_LASX" +-{ +- rtx addr = plus_constant (GET_MODE (operands1), operands1, +- INTVAL (operands2)); +- loongarch_emit_move (operands0, gen_rtx_MEM (<MODE>mode, addr)); +- DONE; +-}) +- +-;; Offset store +-(define_expand "lasx_mxst_<lasxfmt_f>" +- (match_operand:LASX 0 "register_operand") +- (match_operand 1 "pmode_register_operand") +- (match_operand 2 "aq10<lasxfmt>_operand") +- "ISA_HAS_LASX" +-{ +- rtx addr = plus_constant (GET_MODE (operands1), operands1, +- INTVAL (operands2)); +- loongarch_emit_move (gen_rtx_MEM (<MODE>mode, addr), operands0); +- DONE; +-}) +- + ;; LASX + (define_insn "add<mode>3" + (set (match_operand:ILASX 0 "register_operand" "=f,f,f") +diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc +index 9d2374a46..ddb32cea2 100644 +--- a/gcc/config/loongarch/loongarch.cc ++++ b/gcc/config/loongarch/loongarch.cc +@@ -2123,21 +2123,11 @@ loongarch_valid_offset_p (rtx x, machine_mode mode) + + /* We may need to split multiword moves, so make sure that every word + is accessible. */ +- if (GET_MODE_SIZE (mode) > UNITS_PER_WORD ++ if (!(LSX_SUPPORTED_MODE_P (mode) || LASX_SUPPORTED_MODE_P (mode)) ++ && GET_MODE_SIZE (mode) > UNITS_PER_WORD + && !IMM12_OPERAND (INTVAL (x) + GET_MODE_SIZE (mode) - UNITS_PER_WORD)) + return false; + +- /* LSX LD.* and ST.* supports 10-bit signed offsets. */ +- if (LSX_SUPPORTED_MODE_P (mode) +- && !loongarch_signed_immediate_p (INTVAL (x), 10, +- loongarch_ldst_scaled_shift (mode))) +- return false; +- +- /* LASX XVLD.B and XVST.B supports 10-bit signed offsets without shift. */ +- if (LASX_SUPPORTED_MODE_P (mode) +- && !loongarch_signed_immediate_p (INTVAL (x), 10, 0)) +- return false; +- + return true; + } + +@@ -2372,9 +2362,8 @@ loongarch_address_insns (rtx x, machine_mode mode, bool might_split_p) + case ADDRESS_REG: + if (lsx_p) + { +- /* LSX LD.* and ST.* supports 10-bit signed offsets. */ +- if (loongarch_signed_immediate_p (INTVAL (addr.offset), 10, +- loongarch_ldst_scaled_shift (mode))) ++ /* LSX LD.* and ST.* supports 12-bit signed offsets. */ ++ if (IMM12_OPERAND (INTVAL (addr.offset))) + return 1; + else + return 0; +diff --git a/gcc/config/loongarch/lsx.md b/gcc/config/loongarch/lsx.md +index 3e3248ef4..02e89247b 100644 +--- a/gcc/config/loongarch/lsx.md ++++ b/gcc/config/loongarch/lsx.md +@@ -812,32 +812,6 @@ + DONE; + }) + +-;; Offset load +-(define_expand "lsx_ld_<lsxfmt_f>" +- (match_operand:LSX 0 "register_operand") +- (match_operand 1 "pmode_register_operand") +- (match_operand 2 "aq10<lsxfmt>_operand") +- "ISA_HAS_LSX" +-{ +- rtx addr = plus_constant (GET_MODE (operands1), operands1, +- INTVAL (operands2)); +- loongarch_emit_move (operands0, gen_rtx_MEM (<MODE>mode, addr)); +- DONE; +-}) +- +-;; Offset store +-(define_expand "lsx_st_<lsxfmt_f>" +- (match_operand:LSX 0 "register_operand") +- (match_operand 1 "pmode_register_operand") +- (match_operand 2 "aq10<lsxfmt>_operand") +- "ISA_HAS_LSX" +-{ +- rtx addr = plus_constant (GET_MODE (operands1), operands1, +- INTVAL (operands2)); +- loongarch_emit_move (gen_rtx_MEM (<MODE>mode, addr), operands0); +- DONE; +-}) +- + ;; Integer operations + (define_insn "add<mode>3" + (set (match_operand:ILSX 0 "register_operand" "=f,f,f") +diff --git a/gcc/config/loongarch/predicates.md b/gcc/config/loongarch/predicates.md +index 3698b9103..824a85b36 100644 +--- a/gcc/config/loongarch/predicates.md ++++ b/gcc/config/loongarch/predicates.md +@@ -167,22 +167,6 @@ + (and (match_code "const_int") + (match_test "loongarch_signed_immediate_p (INTVAL (op), 8, 3)"))) + +-(define_predicate "aq10b_operand" +- (and (match_code "const_int") +- (match_test "loongarch_signed_immediate_p (INTVAL (op), 10, 0)"))) +- +-(define_predicate "aq10h_operand" +- (and (match_code "const_int") +- (match_test "loongarch_signed_immediate_p (INTVAL (op), 10, 1)"))) +- +-(define_predicate "aq10w_operand" +- (and (match_code "const_int") +- (match_test "loongarch_signed_immediate_p (INTVAL (op), 10, 2)"))) +- +-(define_predicate "aq10d_operand" +- (and (match_code "const_int") +- (match_test "loongarch_signed_immediate_p (INTVAL (op), 10, 3)"))) +- + (define_predicate "aq12b_operand" + (and (match_code "const_int") + (match_test "loongarch_signed_immediate_p (INTVAL (op), 12, 0)"))) +diff --git a/gcc/testsuite/gcc.target/loongarch/vect-ld-st-imm12.c b/gcc/testsuite/gcc.target/loongarch/vect-ld-st-imm12.c +new file mode 100644 +index 000000000..bfc208e4f +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/vect-ld-st-imm12.c +@@ -0,0 +1,15 @@ ++/* { dg-do compile } */ ++/* { dg-options "-march=loongarch64 -mabi=lp64d -mlasx -O2" } */ ++/* { dg-final { scan-assembler-not "addi.d" } } */ ++ ++extern short a1000; ++extern short b1000; ++extern short c1000; ++ ++void ++test (void) ++{ ++ for (int i = 501; i < 517; i++) ++ ((int *)(c + 1))i = ((int *)(a + 1))i + ((int *)(b + 1))i; ++} ++ +-- +2.43.0 +
View file
_service:tar_scm:0100-aarch64-Add-bytes-below-saved-regs-to-frame-info.patch
Deleted
@@ -1,233 +0,0 @@ -From 49c2eb7616756c323b7f6b18d8616ec945eb1263 Mon Sep 17 00:00:00 2001 -From: Richard Sandiford <richard.sandiford@arm.com> -Date: Tue, 12 Sep 2023 16:08:49 +0100 -Subject: PATCH aarch64: Add bytes_below_saved_regs to frame info - -The frame layout code currently hard-codes the assumption that -the number of bytes below the saved registers is equal to the -size of the outgoing arguments. This patch abstracts that -value into a new field of aarch64_frame. - -gcc/ - * config/aarch64/aarch64.h (aarch64_frame::bytes_below_saved_regs): New - field. - * config/aarch64/aarch64.cc (aarch64_layout_frame): Initialize it, - and use it instead of crtl->outgoing_args_size. - (aarch64_get_separate_components): Use bytes_below_saved_regs instead - of outgoing_args_size. - (aarch64_process_components): Likewise. ---- - gcc/config/aarch64/aarch64.cc | 71 ++++++++++++++++++----------------- - gcc/config/aarch64/aarch64.h | 5 +++ - 2 files changed, 41 insertions(+), 35 deletions(-) - -diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc -index e1f21230c15e..94e1b6865849 100644 ---- a/gcc/config/aarch64/aarch64.cc -+++ b/gcc/config/aarch64/aarch64.cc -@@ -8217,6 +8217,8 @@ aarch64_layout_frame (void) - gcc_assert (crtl->is_leaf - || maybe_ne (frame.reg_offsetR30_REGNUM, SLOT_NOT_REQUIRED)); - -+ frame.bytes_below_saved_regs = crtl->outgoing_args_size; -+ - /* Now assign stack slots for the registers. Start with the predicate - registers, since predicate LDR and STR have a relatively small - offset range. These saves happen below the hard frame pointer. */ -@@ -8321,18 +8323,18 @@ aarch64_layout_frame (void) - - poly_int64 varargs_and_saved_regs_size = offset + frame.saved_varargs_size; - -- poly_int64 above_outgoing_args -+ poly_int64 saved_regs_and_above - = aligned_upper_bound (varargs_and_saved_regs_size - + get_frame_size (), - STACK_BOUNDARY / BITS_PER_UNIT); - - frame.hard_fp_offset -- = above_outgoing_args - frame.below_hard_fp_saved_regs_size; -+ = saved_regs_and_above - frame.below_hard_fp_saved_regs_size; - - /* Both these values are already aligned. */ -- gcc_assert (multiple_p (crtl->outgoing_args_size, -+ gcc_assert (multiple_p (frame.bytes_below_saved_regs, - STACK_BOUNDARY / BITS_PER_UNIT)); -- frame.frame_size = above_outgoing_args + crtl->outgoing_args_size; -+ frame.frame_size = saved_regs_and_above + frame.bytes_below_saved_regs; - - frame.locals_offset = frame.saved_varargs_size; - -@@ -8376,7 +8378,7 @@ aarch64_layout_frame (void) - else if (frame.wb_pop_candidate1 != INVALID_REGNUM) - max_push_offset = 256; - -- HOST_WIDE_INT const_size, const_outgoing_args_size, const_fp_offset; -+ HOST_WIDE_INT const_size, const_below_saved_regs, const_fp_offset; - HOST_WIDE_INT const_saved_regs_size; - if (known_eq (frame.saved_regs_size, 0)) - frame.initial_adjust = frame.frame_size; -@@ -8384,31 +8386,31 @@ aarch64_layout_frame (void) - && const_size < max_push_offset - && known_eq (frame.hard_fp_offset, const_size)) - { -- /* Simple, small frame with no outgoing arguments: -+ /* Simple, small frame with no data below the saved registers. - - stp reg1, reg2, sp, -frame_size! - stp reg3, reg4, sp, 16 */ - frame.callee_adjust = const_size; - } -- else if (crtl->outgoing_args_size.is_constant (&const_outgoing_args_size) -+ else if (frame.bytes_below_saved_regs.is_constant (&const_below_saved_regs) - && frame.saved_regs_size.is_constant (&const_saved_regs_size) -- && const_outgoing_args_size + const_saved_regs_size < 512 -- /* We could handle this case even with outgoing args, provided -- that the number of args left us with valid offsets for all -- predicate and vector save slots. It's such a rare case that -- it hardly seems worth the effort though. */ -- && (!saves_below_hard_fp_p || const_outgoing_args_size == 0) -+ && const_below_saved_regs + const_saved_regs_size < 512 -+ /* We could handle this case even with data below the saved -+ registers, provided that that data left us with valid offsets -+ for all predicate and vector save slots. It's such a rare -+ case that it hardly seems worth the effort though. */ -+ && (!saves_below_hard_fp_p || const_below_saved_regs == 0) - && !(cfun->calls_alloca - && frame.hard_fp_offset.is_constant (&const_fp_offset) - && const_fp_offset < max_push_offset)) - { -- /* Frame with small outgoing arguments: -+ /* Frame with small area below the saved registers: - - sub sp, sp, frame_size -- stp reg1, reg2, sp, outgoing_args_size -- stp reg3, reg4, sp, outgoing_args_size + 16 */ -+ stp reg1, reg2, sp, bytes_below_saved_regs -+ stp reg3, reg4, sp, bytes_below_saved_regs + 16 */ - frame.initial_adjust = frame.frame_size; -- frame.callee_offset = const_outgoing_args_size; -+ frame.callee_offset = const_below_saved_regs; - } - else if (saves_below_hard_fp_p - && known_eq (frame.saved_regs_size, -@@ -8418,30 +8420,29 @@ aarch64_layout_frame (void) - - sub sp, sp, hard_fp_offset + below_hard_fp_saved_regs_size - save SVE registers relative to SP -- sub sp, sp, outgoing_args_size */ -+ sub sp, sp, bytes_below_saved_regs */ - frame.initial_adjust = (frame.hard_fp_offset - + frame.below_hard_fp_saved_regs_size); -- frame.final_adjust = crtl->outgoing_args_size; -+ frame.final_adjust = frame.bytes_below_saved_regs; - } - else if (frame.hard_fp_offset.is_constant (&const_fp_offset) - && const_fp_offset < max_push_offset) - { -- /* Frame with large outgoing arguments or SVE saves, but with -- a small local area: -+ /* Frame with large area below the saved registers, or with SVE saves, -+ but with a small area above: - - stp reg1, reg2, sp, -hard_fp_offset! - stp reg3, reg4, sp, 16 - sub sp, sp, below_hard_fp_saved_regs_size - save SVE registers relative to SP -- sub sp, sp, outgoing_args_size */ -+ sub sp, sp, bytes_below_saved_regs */ - frame.callee_adjust = const_fp_offset; - frame.sve_callee_adjust = frame.below_hard_fp_saved_regs_size; -- frame.final_adjust = crtl->outgoing_args_size; -+ frame.final_adjust = frame.bytes_below_saved_regs; - } - else - { -- /* Frame with large local area and outgoing arguments or SVE saves, -- using frame pointer: -+ /* General case: - - sub sp, sp, hard_fp_offset - stp x29, x30, sp, 0 -@@ -8449,10 +8450,10 @@ aarch64_layout_frame (void) - stp reg3, reg4, sp, 16 - sub sp, sp, below_hard_fp_saved_regs_size - save SVE registers relative to SP -- sub sp, sp, outgoing_args_size */ -+ sub sp, sp, bytes_below_saved_regs */ - frame.initial_adjust = frame.hard_fp_offset; - frame.sve_callee_adjust = frame.below_hard_fp_saved_regs_size; -- frame.final_adjust = crtl->outgoing_args_size; -+ frame.final_adjust = frame.bytes_below_saved_regs; - } - - /* Make sure the individual adjustments add up to the full frame size. */ -@@ -9043,7 +9044,7 @@ aarch64_get_separate_components (void) - if (frame_pointer_needed) - offset -= frame.below_hard_fp_saved_regs_size; - else -- offset += crtl->outgoing_args_size; -+ offset += frame.bytes_below_saved_regs; - - /* Check that we can access the stack slot of the register with one - direct load with no adjustments needed. */ -@@ -9192,7 +9193,7 @@ aarch64_process_components (sbitmap components, bool prologue_p) - if (frame_pointer_needed) - offset -= frame.below_hard_fp_saved_regs_size; - else -- offset += crtl->outgoing_args_size; -+ offset += frame.bytes_below_saved_regs; - - rtx addr = plus_constant (Pmode, ptr_reg, offset); - rtx mem = gen_frame_mem (mode, addr); -@@ -9246,7 +9247,7 @@ aarch64_process_components (sbitmap components, bool prologue_p) - if (frame_pointer_needed) - offset2 -= frame.below_hard_fp_saved_regs_size; - else -- offset2 += crtl->outgoing_args_size; -+ offset2 += frame.bytes_below_saved_regs; - rtx addr2 = plus_constant (Pmode, ptr_reg, offset2); - rtx mem2 = gen_frame_mem (mode, addr2); - rtx set2 = prologue_p ? gen_rtx_SET (mem2, reg2) -@@ -9320,10 +9321,10 @@ aarch64_stack_clash_protection_alloca_probe_range (void) - registers. If POLY_SIZE is not large enough to require a probe this function - will only adjust the stack. When allocating the stack space - FRAME_RELATED_P is then used to indicate if the allocation is frame related. -- FINAL_ADJUSTMENT_P indicates whether we are allocating the outgoing -- arguments. If we are then we ensure that any allocation larger than the ABI -- defined buffer needs a probe so that the invariant of having a 1KB buffer is -- maintained. -+ FINAL_ADJUSTMENT_P indicates whether we are allocating the area below -+ the saved registers. If we are then we ensure that any allocation -+ larger than the ABI defined buffer needs a probe so that the -+ invariant of having a 1KB buffer is maintained. - - We emit barriers after each stack adjustment to prevent optimizations from - breaking the invariant that we never drop the stack more than a page. This -@@ -9532,7 +9533,7 @@ aarch64_allocate_and_probe_stack_space (rtx temp1, rtx temp2, - /* Handle any residuals. Residuals of at least MIN_PROBE_THRESHOLD have to - be probed. This maintains the requirement that each page is probed at - least once. For initial probing we probe only if the allocation is -- more than GUARD_SIZE - buffer, and for the outgoing arguments we probe -+ more than GUARD_SIZE - buffer, and below the saved registers we probe - if the amount is larger than buffer. GUARD_SIZE - buffer + buffer == - GUARD_SIZE. This works that for any allocation that is large enough to - trigger a probe here, we'll have at least one, and if they're not large -diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h -index 6834c3e99226..1e105e12db8d 100644 ---- a/gcc/config/aarch64/aarch64.h -+++ b/gcc/config/aarch64/aarch64.h -@@ -871,6 +871,11 @@ struct GTY (()) aarch64_frame - /* The size of the callee-save registers with a slot in REG_OFFSET. */ - poly_int64 saved_regs_size; - -+ /* The number of bytes between the bottom of the static frame (the bottom -+ of the outgoing arguments) and the bottom of the register save area. -+ This value is always a multiple of STACK_BOUNDARY. */ -+ poly_int64 bytes_below_saved_regs; -+ - /* The size of the callee-save registers with a slot in REG_OFFSET that - are saved below the hard frame pointer. */ - poly_int64 below_hard_fp_saved_regs_size; --- -2.43.5 -
View file
_service:tar_scm:0101-Add-hip11-CPU-pipeline-scheduling.patch
Added
@@ -0,0 +1,755 @@ +From 824fccdab1d3c5e87fb88b31f0eeb7abd1b35c1f Mon Sep 17 00:00:00 2001 +From: XingYuShuai <1150775134@qq.com> +Date: Mon, 26 Feb 2024 20:34:06 +0800 +Subject: PATCH 002/157 Add hip11 CPU pipeline scheduling + +This patch adds an mcpu: hip11. It has been tested on aarch64 +and no regressions from this patch. +--- + gcc/config/aarch64/aarch64-cores.def | 1 + + gcc/config/aarch64/aarch64-cost-tables.h | 104 ++++++ + gcc/config/aarch64/aarch64-tune.md | 2 +- + gcc/config/aarch64/aarch64.cc | 108 ++++++ + gcc/config/aarch64/aarch64.md | 1 + + gcc/config/aarch64/hip11.md | 418 +++++++++++++++++++++++ + gcc/doc/invoke.texi | 2 +- + 7 files changed, 634 insertions(+), 2 deletions(-) + create mode 100644 gcc/config/aarch64/hip11.md + +diff --git a/gcc/config/aarch64/aarch64-cores.def b/gcc/config/aarch64/aarch64-cores.def +index a854bdb24..601b72abb 100644 +--- a/gcc/config/aarch64/aarch64-cores.def ++++ b/gcc/config/aarch64/aarch64-cores.def +@@ -173,6 +173,7 @@ AARCH64_CORE("cortex-a710", cortexa710, cortexa57, 9A, AARCH64_FL_FOR_ARCH9 | + AARCH64_CORE("cortex-x2", cortexx2, cortexa57, 9A, AARCH64_FL_FOR_ARCH9 | AARCH64_FL_SVE2_BITPERM | AARCH64_FL_MEMTAG | AARCH64_FL_I8MM | AARCH64_FL_BF16, neoversen2, 0x41, 0xd48, -1) + + AARCH64_CORE("neoverse-n2", neoversen2, cortexa57, 9A, AARCH64_FL_FOR_ARCH9 | AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_SVE2_BITPERM | AARCH64_FL_RNG | AARCH64_FL_MEMTAG | AARCH64_FL_PROFILE, neoversen2, 0x41, 0xd49, -1) ++AARCH64_CORE("hip11", hip11, hip11, 8_5A, AARCH64_FL_FOR_ARCH8_5| AARCH64_FL_SVE | AARCH64_FL_SVE2 | AARCH64_FL_F16, hip11, 0x48, 0xd22, -1) + + AARCH64_CORE("demeter", demeter, cortexa57, 9A, AARCH64_FL_FOR_ARCH9 | AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_SVE2_BITPERM | AARCH64_FL_RNG | AARCH64_FL_MEMTAG | AARCH64_FL_PROFILE, neoversev2, 0x41, 0xd4f, -1) + AARCH64_CORE("neoverse-v2", neoversev2, cortexa57, 9A, AARCH64_FL_FOR_ARCH9 | AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_SVE2_BITPERM | AARCH64_FL_RNG | AARCH64_FL_MEMTAG | AARCH64_FL_PROFILE, neoversev2, 0x41, 0xd4f, -1) +diff --git a/gcc/config/aarch64/aarch64-cost-tables.h b/gcc/config/aarch64/aarch64-cost-tables.h +index fc5a3cbe4..0ee427b61 100644 +--- a/gcc/config/aarch64/aarch64-cost-tables.h ++++ b/gcc/config/aarch64/aarch64-cost-tables.h +@@ -561,6 +561,110 @@ const struct cpu_cost_table tsv110_extra_costs = + } + }; + ++const struct cpu_cost_table hip11_extra_costs = ++{ ++ /* ALU */ ++ { ++ 0, /* arith. */ ++ 0, /* logical. */ ++ 0, /* shift. */ ++ 0, /* shift_reg. */ ++ COSTS_N_INSNS (1), /* arith_shift. */ ++ COSTS_N_INSNS (1), /* arith_shift_reg. */ ++ COSTS_N_INSNS (1), /* log_shift. */ ++ COSTS_N_INSNS (1), /* log_shift_reg. */ ++ 0, /* extend. */ ++ COSTS_N_INSNS (1), /* extend_arith. */ ++ 0, /* bfi. */ ++ 0, /* bfx. */ ++ 0, /* clz. */ ++ 0, /* rev. */ ++ 0, /* non_exec. */ ++ true /* non_exec_costs_exec. */ ++ }, ++ ++ { ++ /* MULT SImode */ ++ { ++ COSTS_N_INSNS (2), /* simple. */ ++ COSTS_N_INSNS (2), /* flag_setting. */ ++ COSTS_N_INSNS (2), /* extend. */ ++ COSTS_N_INSNS (2), /* add. */ ++ COSTS_N_INSNS (2), /* extend_add. */ ++ COSTS_N_INSNS (11) /* idiv. */ ++ }, ++ /* MULT DImode */ ++ { ++ COSTS_N_INSNS (3), /* simple. */ ++ 0, /* flag_setting (N/A). */ ++ COSTS_N_INSNS (3), /* extend. */ ++ COSTS_N_INSNS (3), /* add. */ ++ COSTS_N_INSNS (3), /* extend_add. */ ++ COSTS_N_INSNS (19) /* idiv. */ ++ } ++ }, ++ /* LD/ST */ ++ { ++ COSTS_N_INSNS (3), /* load. */ ++ COSTS_N_INSNS (4), /* load_sign_extend. */ ++ COSTS_N_INSNS (3), /* ldrd. */ ++ COSTS_N_INSNS (3), /* ldm_1st. */ ++ 1, /* ldm_regs_per_insn_1st. */ ++ 2, /* ldm_regs_per_insn_subsequent. */ ++ COSTS_N_INSNS (4), /* loadf. */ ++ COSTS_N_INSNS (4), /* loadd. */ ++ COSTS_N_INSNS (4), /* load_unaligned. */ ++ 0, /* store. */ ++ 0, /* strd. */ ++ 0, /* stm_1st. */ ++ 1, /* stm_regs_per_insn_1st. */ ++ 2, /* stm_regs_per_insn_subsequent. */ ++ 0, /* storef. */ ++ 0, /* stored. */ ++ COSTS_N_INSNS (1), /* store_unaligned. */ ++ COSTS_N_INSNS (4), /* loadv. */ ++ COSTS_N_INSNS (4) /* storev. */ ++ }, ++ { ++ /* FP SFmode */ ++ { ++ COSTS_N_INSNS (10), /* div. */ ++ COSTS_N_INSNS (4), /* mult. */ ++ COSTS_N_INSNS (4), /* mult_addsub. */ ++ COSTS_N_INSNS (4), /* fma. */ ++ COSTS_N_INSNS (4), /* addsub. */ ++ COSTS_N_INSNS (1), /* fpconst. */ ++ COSTS_N_INSNS (1), /* neg. */ ++ COSTS_N_INSNS (1), /* compare. */ ++ COSTS_N_INSNS (2), /* widen. */ ++ COSTS_N_INSNS (2), /* narrow. */ ++ COSTS_N_INSNS (2), /* toint. */ ++ COSTS_N_INSNS (1), /* fromint. */ ++ COSTS_N_INSNS (2) /* roundint. */ ++ }, ++ /* FP DFmode */ ++ { ++ COSTS_N_INSNS (17), /* div. */ ++ COSTS_N_INSNS (4), /* mult. */ ++ COSTS_N_INSNS (6), /* mult_addsub. */ ++ COSTS_N_INSNS (6), /* fma. */ ++ COSTS_N_INSNS (3), /* addsub. */ ++ COSTS_N_INSNS (1), /* fpconst. */ ++ COSTS_N_INSNS (1), /* neg. */ ++ COSTS_N_INSNS (1), /* compare. */ ++ COSTS_N_INSNS (2), /* widen. */ ++ COSTS_N_INSNS (2), /* narrow. */ ++ COSTS_N_INSNS (2), /* toint. */ ++ COSTS_N_INSNS (1), /* fromint. */ ++ COSTS_N_INSNS (2) /* roundint. */ ++ } ++ }, ++ /* Vector */ ++ { ++ COSTS_N_INSNS (1) /* alu. */ ++ } ++}; ++ + const struct cpu_cost_table a64fx_extra_costs = + { + /* ALU */ +diff --git a/gcc/config/aarch64/aarch64-tune.md b/gcc/config/aarch64/aarch64-tune.md +index 238bb6e31..511422081 100644 +--- a/gcc/config/aarch64/aarch64-tune.md ++++ b/gcc/config/aarch64/aarch64-tune.md +@@ -1,5 +1,5 @@ + ;; -*- buffer-read-only: t -*- + ;; Generated automatically by gentune.sh from aarch64-cores.def + (define_attr "tune" +- "cortexa34,cortexa35,cortexa53,cortexa57,cortexa72,cortexa73,thunderx,thunderxt88p1,thunderxt88,octeontx,octeontxt81,octeontxt83,thunderxt81,thunderxt83,ampere1,ampere1a,emag,xgene1,falkor,qdf24xx,exynosm1,phecda,thunderx2t99p1,vulcan,thunderx2t99,cortexa55,cortexa75,cortexa76,cortexa76ae,cortexa77,cortexa78,cortexa78ae,cortexa78c,cortexa65,cortexa65ae,cortexx1,ares,neoversen1,neoversee1,octeontx2,octeontx2t98,octeontx2t96,octeontx2t93,octeontx2f95,octeontx2f95n,octeontx2f95mm,a64fx,tsv110,hip09,thunderx3t110,zeus,neoversev1,neoverse512tvb,saphira,cortexa57cortexa53,cortexa72cortexa53,cortexa73cortexa35,cortexa73cortexa53,cortexa75cortexa55,cortexa76cortexa55,cortexr82,cortexa510,cortexa710,cortexx2,neoversen2,demeter,neoversev2" ++ "cortexa34,cortexa35,cortexa53,cortexa57,cortexa72,cortexa73,thunderx,thunderxt88p1,thunderxt88,octeontx,octeontxt81,octeontxt83,thunderxt81,thunderxt83,ampere1,ampere1a,emag,xgene1,falkor,qdf24xx,exynosm1,phecda,thunderx2t99p1,vulcan,thunderx2t99,cortexa55,cortexa75,cortexa76,cortexa76ae,cortexa77,cortexa78,cortexa78ae,cortexa78c,cortexa65,cortexa65ae,cortexx1,ares,neoversen1,neoversee1,octeontx2,octeontx2t98,octeontx2t96,octeontx2t93,octeontx2f95,octeontx2f95n,octeontx2f95mm,a64fx,tsv110,hip09,thunderx3t110,zeus,neoversev1,neoverse512tvb,saphira,cortexa57cortexa53,cortexa72cortexa53,cortexa73cortexa35,cortexa73cortexa53,cortexa75cortexa55,cortexa76cortexa55,cortexr82,cortexa510,cortexa710,cortexx2,neoversen2,hip11,demeter,neoversev2" + (const (symbol_ref "((enum attr_tune) aarch64_tune)"))) +diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc +index e9b3980c4..7c62ddb2a 100644 +--- a/gcc/config/aarch64/aarch64.cc ++++ b/gcc/config/aarch64/aarch64.cc +@@ -481,6 +481,22 @@ static const struct cpu_addrcost_table hip09_addrcost_table = + 0, /* imm_offset */ + }; + ++static const struct cpu_addrcost_table hip11_addrcost_table = ++{ ++ { ++ 1, /* hi */ ++ 0, /* si */ ++ 0, /* di */ ++ 1, /* ti */ ++ }, ++ 0, /* pre_modify */ ++ 0, /* post_modify */ ++ 0, /* register_offset */ ++ 1, /* register_sextend */ ++ 1, /* register_zextend */ ++ 0, /* imm_offset */ ++}; ++ + static const struct cpu_addrcost_table qdf24xx_addrcost_table = + { + { +@@ -666,6 +682,16 @@ static const struct cpu_regmove_cost tsv110_regmove_cost = + 2 /* FP2FP */ + }; + ++static const struct cpu_regmove_cost hip11_regmove_cost = ++{ ++ 1, /* GP2GP */ ++ /* Avoid the use of slow int<->fp moves for spilling by setting ++ their cost higher than memmov_cost. */ ++ 2, /* GP2FP */ ++ 3, /* FP2GP */ ++ 2 /* FP2FP */ ++}; ++ + static const struct cpu_regmove_cost a64fx_regmove_cost = + { + 1, /* GP2GP */ +@@ -1010,6 +1036,43 @@ static const struct cpu_vector_cost hip09_vector_cost = + nullptr /* issue_info */ + }; + ++static const advsimd_vec_cost hip11_advsimd_vector_cost = ++{ ++ 2, /* int_stmt_cost */ ++ 2, /* fp_stmt_cost */ ++ 0, /* ld2_st2_permute_cost */ ++ 0, /* ld3_st3_permute_cost */ ++ 0, /* ld4_st4_permute_cost */ ++ 2, /* permute_cost */ ++ 3, /* reduc_i8_cost */ ++ 3, /* reduc_i16_cost */ ++ 3, /* reduc_i32_cost */ ++ 3, /* reduc_i64_cost */ ++ 3, /* reduc_f16_cost */ ++ 3, /* reduc_f32_cost */ ++ 3, /* reduc_f64_cost */ ++ 3, /* store_elt_extra_cost */ ++ 5, /* vec_to_scalar_cost */ ++ 5, /* scalar_to_vec_cost */ ++ 5, /* align_load_cost */ ++ 5, /* unalign_load_cost */ ++ 1, /* unalign_store_cost */ ++ 1 /* store_cost */ ++}; ++ ++static const struct cpu_vector_cost hip11_vector_cost = ++{ ++ 1, /* scalar_int_stmt_cost */ ++ 1, /* scalar_fp_stmt_cost */ ++ 5, /* scalar_load_cost */ ++ 1, /* scalar_store_cost */ ++ 1, /* cond_taken_branch_cost */ ++ 1, /* cond_not_taken_branch_cost */ ++ &hip11_advsimd_vector_cost, /* advsimd */ ++ nullptr, /* sve */ ++ nullptr /* issue_info */ ++}; ++ + static const advsimd_vec_cost cortexa57_advsimd_vector_cost = + { + 2, /* int_stmt_cost */ +@@ -1368,6 +1431,17 @@ static const cpu_prefetch_tune hip09_prefetch_tune = + -1 /* default_opt_level */ + }; + ++static const cpu_prefetch_tune hip11_prefetch_tune = ++{ ++ 0, /* num_slots */ ++ 64, /* l1_cache_size */ ++ 64, /* l1_cache_line_size */ ++ 512, /* l2_cache_size */ ++ true, /* prefetch_dynamic_strides */ ++ -1, /* minimum_stride */ ++ -1 /* default_opt_level */ ++}; ++ + static const cpu_prefetch_tune xgene1_prefetch_tune = + { + 8, /* num_slots */ +@@ -1767,6 +1841,40 @@ static const struct tune_params hip09_tunings = + &hip09_prefetch_tune + }; + ++static const struct tune_params hip11_tunings = ++{ ++ &hip11_extra_costs, ++ &hip11_addrcost_table, ++ &hip11_regmove_cost, ++ &hip11_vector_cost, ++ &generic_branch_cost, ++ &generic_approx_modes, ++ SVE_512, /* sve_width */ ++ { 4, /* load_int. */ ++ 4, /* store_int. */ ++ 4, /* load_fp. */ ++ 4, /* store_fp. */ ++ 4, /* load_pred. */ ++ 4 /* store_pred. */ ++ }, /* memmov_cost. */ ++ 4, /* issue_rate */ ++ (AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_ALU_BRANCH ++ | AARCH64_FUSE_ALU_CBZ), /* fusible_ops */ ++ "16", /* function_align. */ ++ "4", /* jump_align. */ ++ "8", /* loop_align. */ ++ 2, /* int_reassoc_width. */ ++ 4, /* fp_reassoc_width. */ ++ 1, /* vec_reassoc_width. */ ++ 2, /* min_div_recip_mul_sf. */ ++ 2, /* min_div_recip_mul_df. */ ++ 0, /* max_case_values. */ ++ tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */ ++ (AARCH64_EXTRA_TUNE_USE_NEW_VECTOR_COSTS ++ | AARCH64_EXTRA_TUNE_MATCHED_VECTOR_THROUGHPUT), /* tune_flags. */ ++ &hip11_prefetch_tune ++}; ++ + static const struct tune_params xgene1_tunings = + { + &xgene1_extra_costs, +diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md +index cf699e4c7..c0c64a798 100644 +--- a/gcc/config/aarch64/aarch64.md ++++ b/gcc/config/aarch64/aarch64.md +@@ -478,6 +478,7 @@ + (include "tsv110.md") + (include "thunderx3t110.md") + (include "hip09.md") ++(include "hip11.md") + + ;; ------------------------------------------------------------------- + ;; Jumps and other miscellaneous insns +diff --git a/gcc/config/aarch64/hip11.md b/gcc/config/aarch64/hip11.md +new file mode 100644 +index 000000000..45f91e65b +--- /dev/null ++++ b/gcc/config/aarch64/hip11.md +@@ -0,0 +1,418 @@ ++;; hip11 pipeline description ++;; Copyright (C) 2018-2024 Free Software Foundation, Inc. ++;; ++;; This file is part of GCC. ++;; ++;; GCC is free software; you can redistribute it and/or modify it ++;; under the terms of the GNU General Public License as published by ++;; the Free Software Foundation; either version 3, or (at your option) ++;; any later version. ++;; ++;; GCC is distributed in the hope that it will be useful, but ++;; WITHOUT ANY WARRANTY; without even the implied warranty of ++;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++;; General Public License for more details. ++;; ++;; You should have received a copy of the GNU General Public License ++;; along with GCC; see the file COPYING3. If not see ++;; <http://www.gnu.org/licenses/>. ++ ++(define_automaton "hip11") ++ ++;; The hip11 core is modelled as issues pipeline that has ++;; the following functional units. ++;; 1. Three pipelines for integer operations: ALU1, ALU2, ALU3 ++ ++(define_cpu_unit "hip11_alu1_issue" "hip11") ++(define_reservation "hip11_alu1" "hip11_alu1_issue") ++ ++(define_cpu_unit "hip11_alu2_issue" "hip11") ++(define_reservation "hip11_alu2" "hip11_alu2_issue") ++ ++(define_cpu_unit "hip11_alu3_issue" "hip11") ++(define_reservation "hip11_alu3" "hip11_alu3_issue") ++ ++(define_reservation "hip11alu" "hip11_alu1|hip11_alu2|hip11_alu3") ++ ++;; 2. One pipeline for complex integer operations: MDU ++ ++(define_cpu_unit "hip11_mdu_issue" "hip11") ++(define_reservation "hip11_mdu" "hip11_mdu_issue") ++ ++;; 3. Two asymmetric pipelines for Asimd and FP operations: FSU1, FSU2 ++(define_automaton "hip11_fsu") ++ ++(define_cpu_unit "hip11_fsu1_issue" ++ "hip11_fsu") ++(define_cpu_unit "hip11_fsu2_issue" ++ "hip11_fsu") ++ ++(define_reservation "hip11_fsu1" "hip11_fsu1_issue") ++(define_reservation "hip11_fsu2" "hip11_fsu2_issue") ++(define_reservation "hip11_fsu_pipe" "hip11_fsu1|hip11_fsu2") ++ ++;; 4. Two pipeline for branch operations but same with alu2 and alu3: BRU1, BRU2 ++ ++;; 5. Two pipelines for load and store operations: LS1, LS2. ++ ++(define_cpu_unit "hip11_ls1_issue" "hip11") ++(define_cpu_unit "hip11_ls2_issue" "hip11") ++(define_reservation "hip11_ls1" "hip11_ls1_issue") ++(define_reservation "hip11_ls2" "hip11_ls2_issue") ++ ++;; Block all issue queues. ++ ++(define_reservation "hip11_block" "hip11_fsu1_issue + hip11_fsu2_issue ++ + hip11_mdu_issue + hip11_alu1_issue ++ + hip11_alu2_issue + hip11_alu3_issue + hip11_ls1_issue + hip11_ls2_issue") ++ ++;; Branch execution Unit ++;; ++(define_insn_reservation "hip11_branch" 1 ++ (and (eq_attr "tune" "hip11") ++ (eq_attr "type" "branch")) ++ "hip11_alu2|hip11_alu3") ++ ++(define_insn_reservation "hip11_return_from_subroutine" 6 ++ (and (eq_attr "tune" "hip11") ++ (eq_attr "type" "branch") ++ (eq_attr "sls_length" "retbr")) ++ "hip11_mdu,(hip11_alu2|hip11_alu3)") ++ ++ ;; Simple Execution Unit: ++;; ++;; Simple ALU without shift ++(define_insn_reservation "hip11_alu" 1 ++ (and (eq_attr "tune" "hip11") ++ (eq_attr "type" "alu_imm,logic_imm,\ ++ alu_sreg,logic_reg,\ ++ adc_imm,adc_reg,\ ++ adr,bfm,clz,rbit,rev,\ ++ shift_imm,shift_reg,\ ++ mov_imm,mov_reg,\ ++ mvn_imm,mvn_reg,\ ++ mrs,multiple,csel,\ ++ rotate_imm")) ++ "hip11_alu1|hip11_alu2|hip11_alu3") ++ ++(define_insn_reservation "hip11_alus" 1 ++ (and (eq_attr "tune" "hip11") ++ (eq_attr "type" "alus_imm,logics_imm,\ ++ alus_sreg,logics_reg,\ ++ adcs_imm,adcs_reg")) ++ "hip11_alu2|hip11_alu3") ++ ++;; ALU ops with shift ++(define_insn_reservation "hip11_alu_shift" 2 ++ (and (eq_attr "tune" "hip11") ++ (eq_attr "type" "extend,\ ++ alu_shift_imm_lsl_1to4,alu_shift_imm_other,alu_shift_reg,\ ++ crc,logic_shift_imm,logic_shift_reg,\ ++ mov_shift,mvn_shift,\ ++ mov_shift_reg,mvn_shift_reg")) ++ "hip11_mdu") ++ ++(define_insn_reservation "hip11_alus_shift" 2 ++ (and (eq_attr "tune" "hip11") ++ (eq_attr "type" "alus_shift_imm,alus_shift_reg,\ ++ logics_shift_imm,logics_shift_reg")) ++ "hip11_alu2|hip11_alu3") ++ ++;; Multiplies instructions ++(define_insn_reservation "hip11_mult" 3 ++ (and (eq_attr "tune" "hip11") ++ (ior (eq_attr "mul32" "yes") ++ (eq_attr "widen_mul64" "yes"))) ++ "hip11_mdu") ++ ++;; Integer divide ++(define_insn_reservation "hip11_div" 10 ++ (and (eq_attr "tune" "hip11") ++ (eq_attr "type" "udiv,sdiv")) ++ "hip11_mdu") ++ ++(define_insn_reservation "hip11_mla" 4 ++ (and (eq_attr "tune" "hip11") ++ (eq_attr "type" "mla,smlal,umlal,smull,umull")) ++ "hip11_mdu") ++ ++;; Block all issue pipes for a cycle ++(define_insn_reservation "hip11_block" 1 ++ (and (eq_attr "tune" "hip11") ++ (eq_attr "type" "block")) ++ "hip11_block") ++ ++;; Load-store execution Unit ++;; ++(define_insn_reservation "hip11_load1" 4 ++ (and (eq_attr "tune" "hip11") ++ (eq_attr "type" "load_4,load_8,load_16")) ++ "hip11_ls1|hip11_ls2") ++ ++(define_insn_reservation "hip11_fp_load" 5 ++ (and (eq_attr "tune" "hip11") ++ (eq_attr "type" "f_loads,f_loadd")) ++ "hip11_ls1|hip11_ls2") ++ ++(define_insn_reservation "hip11_neon_ld1_single" 7 ++ (and (eq_attr "tune" "hip11") ++ (eq_attr "type" "neon_load1_one_lane,neon_load1_one_lane_q,\ ++ neon_load1_all_lanes,neon_load1_all_lanes_q")) ++ "(hip11_ls1|hip11_ls2)+hip11_fsu1") ++ ++(define_insn_reservation "hip11_neon_ld1_1reg" 5 ++ (and (eq_attr "tune" "hip11") ++ (eq_attr "type" "neon_load1_1reg,neon_load1_1reg_q")) ++ "hip11_ls1|hip11_ls2") ++ ++(define_insn_reservation "hip11_neon_ld1_2reg" 6 ++ (and (eq_attr "tune" "hip11") ++ (eq_attr "type" "neon_load1_2reg,neon_load1_2reg_q")) ++ "hip11_ls1|hip11_ls2") ++ ++(define_insn_reservation "hip11_neon_ld1_3reg" 7 ++ (and (eq_attr "tune" "hip11") ++ (eq_attr "type" "neon_load1_3reg,neon_load1_3reg_q")) ++ "hip11_ls1|hip11_ls2") ++ ++(define_insn_reservation "hip11_neon_ld1_4reg" 8 ++ (and (eq_attr "tune" "hip11") ++ (eq_attr "type" "neon_load1_4reg,neon_load1_4reg_q")) ++ "hip11_ls1|hip11_ls2") ++ ++(define_insn_reservation "hip11_neon_ld2" 8 ++ (and (eq_attr "tune" "hip11") ++ (eq_attr "type" "neon_load2_one_lane,neon_load2_one_lane_q,\ ++ neon_load2_all_lanes,neon_load2_all_lanes_q,\ ++ neon_load2_2reg,neon_load2_2reg_q,\ ++ neon_load2_4reg,neon_load2_4reg_q")) ++ "(hip11_ls1|hip11_ls2)+hip11_fsu1") ++ ++(define_insn_reservation "hip11_neon_ld3_single" 9 ++ (and (eq_attr "tune" "hip11") ++ (eq_attr "type" "neon_load3_one_lane,neon_load3_one_lane_q,\ ++ neon_load3_all_lanes,neon_load3_all_lanes_q")) ++ "(hip11_ls1|hip11_ls2)+hip11_fsu1") ++ ++(define_insn_reservation "hip11_neon_ld3_multiple" 13 ++ (and (eq_attr "tune" "hip11") ++ (eq_attr "type" "neon_load3_3reg,neon_load3_3reg_q")) ++ "(hip11_ls1|hip11_ls2)+hip11_fsu1") ++ ++(define_insn_reservation "hip11_neon_ld4_single" 10 ++ (and (eq_attr "tune" "hip11") ++ (eq_attr "type" "neon_load4_one_lane,neon_load4_one_lane_q,\ ++ neon_load4_all_lanes,neon_load4_all_lanes_q")) ++ "(hip11_ls1|hip11_ls2)+hip11_fsu1") ++ ++(define_insn_reservation "hip11_neon_ld4_multiple" 11 ++ (and (eq_attr "tune" "hip11") ++ (eq_attr "type" "neon_load4_4reg,neon_load4_4reg_q")) ++ "(hip11_ls1|hip11_ls2)+hip11_fsu1") ++ ++;; Stores of up to two words. ++(define_insn_reservation "hip11_store1" 1 ++ (and (eq_attr "tune" "hip11") ++ (eq_attr "type" "store_4,store_8,store_16,\ ++ f_stored,f_stores")) ++ "hip11_ls1|hip11_ls2") ++ ++;; Floating-Point Operations. ++(define_insn_reservation "hip11_fp_arith" 2 ++ (and (eq_attr "tune" "hip11") ++ (eq_attr "type" "ffariths,ffarithd,f_minmaxs,\ ++ f_minmaxd,fadds,faddd,neon_fcadd")) ++ "hip11_fsu_pipe") ++ ++(define_insn_reservation "hip11_fp_mul" 3 ++ (and (eq_attr "tune" "hip11") ++ (eq_attr "type" "neon_fp_mul_d,neon_fp_mul_d_q,\ ++ neon_fp_mul_s_scalar,neon_fp_mul_s_scalar_q,\ ++ neon_fp_mul_d_scalar_q,fmuld,fmuls")) ++ "hip11_fsu_pipe") ++ ++(define_insn_reservation "hip11_fp_cmp" 2 ++ (and (eq_attr "tune" "hip11") ++ (eq_attr "type" "fccmpd,fccmps")) ++ "hip11alu,hip11_fsu_pipe") ++ ++(define_insn_reservation "hip11_fp_csel" 2 ++ (and (eq_attr "tune" "hip11") ++ (eq_attr "type" "fcsel")) ++ "hip11alu,hip11_fsu1") ++ ++(define_insn_reservation "hip11_fp_fcmp" 1 ++ (and (eq_attr "tune" "hip11") ++ (eq_attr "type" "fcmpd,fcmps")) ++ "hip11_fsu_pipe") ++ ++(define_insn_reservation "hip11_fp_divs" 7 ++ (and (eq_attr "tune" "hip11") ++ (eq_attr "type" "fdivs")) ++ "hip11_fsu1") ++ ++(define_insn_reservation "hip11_fp_divd" 10 ++ (and (eq_attr "tune" "hip11") ++ (eq_attr "type" "fdivd")) ++ "hip11_fsu1") ++ ++(define_insn_reservation "hip11_fp_sqrts" 9 ++ (and (eq_attr "tune" "hip11") ++ (eq_attr "type" "fsqrts")) ++ "hip11_fsu1") ++ ++(define_insn_reservation "hip11_fp_sqrtd" 15 ++ (and (eq_attr "tune" "hip11") ++ (eq_attr "type" "fsqrtd")) ++ "hip11_fsu1") ++ ++(define_insn_reservation "hip11_fp_mac" 4 ++ (and (eq_attr "tune" "hip11") ++ (eq_attr "type" "fmacs,ffmas,fmacd,ffmad")) ++ "hip11_fsu_pipe") ++ ++(define_insn_reservation "hip11_fp_mov" 1 ++ (and (eq_attr "tune" "hip11") ++ (eq_attr "type" "fmov,neon_dup,neon_dup_q,\ ++ neon_from_gp,neon_from_gp_q,\ ++ neon_ins,neon_ins_q,\ ++ neon_to_gp,neon_to_gp_q,\ ++ neon_move,neon_move_q,\ ++ neon_rev,neon_rev_q,\ ++ neon_permute,neon_permute_q,\ ++ neon_shift_imm_narrow_q,\ ++ neon_ext,neon_ext_q,\ ++ neon_rbit,\ ++ crypto_sha3,neon_tbl1,neon_tbl1_q,\ ++ neon_tbl2_q,f_mcr,neon_tst,neon_tst_q,\ ++ neon_move_narrow_q")) ++ "hip11_fsu1") ++ ++;; ASIMD instructions ++(define_insn_reservation "hip11_asimd_simple_arithmetic" 2 ++ (and (eq_attr "tune" "hip11") ++ (eq_attr "type" "neon_abs,neon_abs_q,neon_neg,neon_neg_q,\ ++ neon_abd,neon_abd_q,\ ++ neon_add_long,neon_sub_long,neon_sub_widen,neon_add_widen,\ ++ neon_add_halve_narrow_q,neon_sub_halve_narrow_q,\ ++ neon_arith_acc,neon_arith_acc_q,\ ++ neon_compare,neon_compare_q,\ ++ neon_compare_zero,neon_compare_zero_q,\ ++ neon_minmax,neon_minmax_q,\ ++ neon_logic,neon_logic_q,\ ++ neon_reduc_add,neon_reduc_add_q,\ ++ neon_reduc_minmax,neon_reduc_minmax_q,\ ++ neon_fp_to_int_s,neon_fp_to_int_s_q,\ ++ neon_fp_to_int_d,neon_fp_to_int_d_q,\ ++ neon_fp_cvt_widen_s,\ ++ neon_fp_cvt_narrow_d_q,\ ++ neon_cls,neon_cls_q,\ ++ neon_cnt,neon_cnt_q,\ ++ f_rints,f_rintd,f_cvtf2i,f_cvt,\ ++ neon_tbl3,neon_fp_round_s,neon_fp_round_s_q,\ ++ neon_fp_round_d,neon_fp_round_d_q,\ ++ neon_int_to_fp_s,neon_fp_recpe_s,neon_fp_recpe_s_q,\ ++ neon_fp_recpe_d,neon_fp_recpe_d_q,\ ++ neon_fp_cvt_narrow_s_q,\ ++ crypto_aese,crypto_aesmc,\ ++ crypto_sha1_fast,crypto_sha1_xor,\ ++ crypto_sha1_slow,\ ++ crypto_sha256_fast,\ ++ crypto_sha512,crypto_sm3,\ ++ neon_qabs,neon_qabs_q,\ ++ neon_qneg,neon_qneg_q,\ ++ neon_qadd,neon_qadd_q,\ ++ neon_qsub,neon_qsub_q,\ ++ neon_add_halve,neon_add_halve_q,\ ++ neon_sub_halve,neon_sub_halve_q,\ ++ neon_fp_reduc_minmax_s,neon_fp_reduc_minmax_s_q,\ ++ neon_fp_reduc_minmax_d,neon_fp_reduc_minmax_d_q,\ ++ neon_fp_rsqrte_s,neon_fp_rsqrte_s_q,\ ++ neon_fp_rsqrte_d,neon_fp_rsqrte_d_q")) ++ "hip11_fsu1") ++ ++(define_insn_reservation "hip11_asimd_complex_arithmetic" 4 ++ (and (eq_attr "tune" "hip11") ++ (eq_attr "type" "neon_mul_b,neon_mul_b_q,\ ++ neon_mul_h,neon_mul_h_q,\ ++ neon_mul_s,neon_mul_s_q,\ ++ neon_mla_b,neon_mla_b_q,\ ++ neon_mla_h,neon_mla_h_q,\ ++ neon_mla_s,\ ++ neon_mla_h_scalar,neon_mla_h_scalar_q,\ ++ neon_mla_s_scalar,neon_mla_s_scalar_q,\ ++ neon_sat_mul_h_scalar,neon_sat_mul_h_scalar_q,\ ++ neon_sat_mul_s_scalar,neon_sat_mul_s_scalar_q,\ ++ neon_sat_mul_b,neon_sat_mul_b_q,\ ++ neon_sat_mul_h,neon_sat_mul_h_q,\ ++ neon_sat_mul_s,neon_sat_mul_s_q,\ ++ neon_mla_b_long,neon_mla_h_long,neon_mla_s_long,\ ++ neon_mul_b_long,neon_mul_h_long,neon_mul_s_long,\ ++ neon_sat_mla_b_long,neon_sat_mla_h_long,neon_sat_mla_s_long,\ ++ neon_sat_mla_h_scalar_long,neon_sat_mla_s_scalar_long,\ ++ neon_sat_mul_b_long,neon_sat_mul_h_long,neon_sat_mul_s_long,\ ++ neon_sat_mul_h_scalar_long,neon_sat_mul_s_scalar_long,\ ++ crypto_pmull,\ ++ neon_sat_shift_reg,neon_sat_shift_reg_q,\ ++ neon_shift_reg,neon_shift_reg_q,\ ++ neon_shift_imm,neon_shift_imm_q,\ ++ neon_shift_imm_long,\ ++ neon_sat_shift_imm,neon_sat_shift_imm_q,\ ++ neon_sat_shift_imm_narrow_q,\ ++ neon_shift_acc,neon_shift_acc_q,\ ++ crypto_sha256_slow")) ++ "hip11_fsu1") ++ ++(define_insn_reservation "hip11_asimd_fp_compare" 2 ++ (and (eq_attr "tune" "hip11") ++ (eq_attr "type" "neon_fp_abs_s,neon_fp_abs_s_q,\ ++ neon_fp_abs_d,neon_fp_abs_d_q,\ ++ neon_fp_neg_s,neon_fp_neg_s_q,\ ++ neon_fp_neg_d,neon_fp_neg_d_q,\ ++ neon_fp_compare_s,neon_fp_compare_s_q,\ ++ neon_fp_compare_d,neon_fp_compare_d_q,\ ++ neon_fp_minmax_s,neon_fp_minmax_s_q,\ ++ neon_fp_minmax_d,neon_fp_minmax_d_q,\ ++ neon_fp_addsub_s,neon_fp_addsub_s_q,\ ++ neon_fp_addsub_d,neon_fp_addsub_d_q,\ ++ neon_fp_reduc_add_s,neon_fp_reduc_add_s_q,\ ++ neon_fp_reduc_add_d,neon_fp_reduc_add_d_q,\ ++ neon_fp_abd_s,neon_fp_abd_s_q,\ ++ neon_fp_abd_d,neon_fp_abd_d_q")) ++ "hip11_fsu_pipe") ++ ++(define_insn_reservation "hip11_asimd_fdiv" 10 ++ (and (eq_attr "tune" "hip11") ++ (eq_attr "type" "neon_fp_div_s,neon_fp_div_s_q,\ ++ neon_fp_div_d,neon_fp_div_d_q")) ++ "hip11_fsu1") ++ ++(define_insn_reservation "hip11_asimd_fsqrt" 15 ++ (and (eq_attr "tune" "hip11") ++ (eq_attr "type" "neon_fp_sqrt_s,neon_fp_sqrt_s_q,\ ++ neon_fp_sqrt_d,neon_fp_sqrt_d_q")) ++ "hip11_fsu1") ++ ++(define_insn_reservation "hip11_asimd_fp_multiply_add" 4 ++ (and (eq_attr "tune" "hip11") ++ (eq_attr "type" "neon_fp_mla_s,neon_fp_mla_s_q,\ ++ neon_fp_mla_d,neon_fp_mla_d_q,\ ++ neon_fp_mla_s_scalar,neon_fp_mla_s_scalar_q,\ ++ neon_fp_mul_s,neon_fp_mul_s_q,neon_fcmla,\ ++ neon_fp_recps_s,neon_fp_recps_s_q,\ ++ neon_fp_recps_d,neon_fp_recps_d_q,\ ++ neon_fp_rsqrts_s,neon_fp_rsqrts_s_q,\ ++ neon_fp_rsqrts_d,neon_fp_rsqrts_d_q")) ++ "hip11_fsu_pipe") ++ ++(define_insn_reservation "hip11_asimd_frecpx" 3 ++ (and (eq_attr "tune" "hip11") ++ (eq_attr "type" "neon_fp_recpx_s,neon_fp_recpx_s_q,\ ++ neon_fp_recpx_d,neon_fp_recpx_d_q,neon_tbl4,\ ++ neon_dot,neon_dot_q")) ++ "hip11_fsu1") ++ ++(define_insn_reservation "hip11_asimd_mmla" 6 ++ (and (eq_attr "tune" "hip11") ++ (eq_attr "type" "neon_mla_s_q")) ++ "hip11_fsu1") +diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi +index 7ca60dd64..17d9e4126 100644 +--- a/gcc/doc/invoke.texi ++++ b/gcc/doc/invoke.texi +@@ -19212,7 +19212,7 @@ performance of the code. Permissible values for this option are: + @samp{octeontx2}, @samp{octeontx2t98}, @samp{octeontx2t96} + @samp{octeontx2t93}, @samp{octeontx2f95}, @samp{octeontx2f95n}, + @samp{octeontx2f95mm}, +-@samp{a64fx}, ++@samp{a64fx},@samp{hip11} + @samp{thunderx}, @samp{thunderxt88}, + @samp{thunderxt88p1}, @samp{thunderxt81}, @samp{tsv110}, + @samp{thunderxt83}, @samp{thunderx2t99}, @samp{thunderx3t110}, @samp{zeus}, +-- +2.33.0 +
View file
_service:tar_scm:0101-LoongArch-Improve-lasx_xvpermi_q_-LASX-mode-insn-pat.patch
Added
@@ -0,0 +1,150 @@ +From f5355c67104cb5d150e1fd3b58807b2ad4e67b7c Mon Sep 17 00:00:00 2001 +From: Jiahao Xu <xujiahao@loongson.cn> +Date: Fri, 5 Jan 2024 15:37:13 +0800 +Subject: PATCH 101/188 LoongArch: Improve lasx_xvpermi_q_<LASX:mode> insn + pattern + +For instruction xvpermi.q, unused bits in operands3 need be set to 0 to avoid +causing undefined behavior on LA464. + +gcc/ChangeLog: + + * config/loongarch/lasx.md: Set the unused bits in operand3 to 0. + +gcc/testsuite/ChangeLog: + + * gcc.target/loongarch/vector/lasx/lasx-xvpremi.c: Removed. + * gcc.target/loongarch/vector/lasx/lasx-xvpermi_q.c: New test. +--- + gcc/config/loongarch/lasx.md | 9 ++- + .../loongarch/vector/lasx/lasx-xvpermi_q.c | 64 +++++++++++++++++++ + .../loongarch/vector/lasx/lasx-xvpremi.c | 19 ------ + 3 files changed, 72 insertions(+), 20 deletions(-) + create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvpermi_q.c + delete mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvpremi.c + +diff --git a/gcc/config/loongarch/lasx.md b/gcc/config/loongarch/lasx.md +index 95c6bae20..b4aa8e261 100644 +--- a/gcc/config/loongarch/lasx.md ++++ b/gcc/config/loongarch/lasx.md +@@ -635,6 +635,8 @@ + (set_attr "mode" "<MODE>")) + + ;; xvpermi.q ++;; Unused bits in operands3 need be set to 0 to avoid ++;; causing undefined behavior on LA464. + (define_insn "lasx_xvpermi_q_<LASX:mode>" + (set (match_operand:LASX 0 "register_operand" "=f") + (unspec:LASX +@@ -643,7 +645,12 @@ + (match_operand 3 "const_uimm8_operand") + UNSPEC_LASX_XVPERMI_Q)) + "ISA_HAS_LASX" +- "xvpermi.q\t%u0,%u2,%3" ++{ ++ int mask = 0x33; ++ mask &= INTVAL (operands3); ++ operands3 = GEN_INT (mask); ++ return "xvpermi.q\t%u0,%u2,%3"; ++} + (set_attr "type" "simd_splat") + (set_attr "mode" "<MODE>")) + +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvpermi_q.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvpermi_q.c +new file mode 100644 +index 000000000..dbc29d2fb +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvpermi_q.c +@@ -0,0 +1,64 @@ ++/* { dg-options "-mlasx -w -fno-strict-aliasing" } */ ++#include "../simd_correctness_check.h" ++#include <lasxintrin.h> ++ ++int ++main () ++{ ++ __m256i __m256i_op0, __m256i_op1, __m256i_op2, __m256i_out, __m256i_result; ++ __m256 __m256_op0, __m256_op1, __m256_op2, __m256_out, __m256_result; ++ __m256d __m256d_op0, __m256d_op1, __m256d_op2, __m256d_out, __m256d_result; ++ ++ int int_op0, int_op1, int_op2, int_out, int_result, i = 1, fail; ++ long int long_op0, long_op1, long_op2, lont_out, lont_result; ++ long int long_int_out, long_int_result; ++ unsigned int unsigned_int_out, unsigned_int_result; ++ unsigned long int unsigned_long_int_out, unsigned_long_int_result; ++ ++ *((unsigned long*)& __m256i_op03) = 0x7fe37fe3001d001d; ++ *((unsigned long*)& __m256i_op02) = 0x7fff7fff7fff0000; ++ *((unsigned long*)& __m256i_op01) = 0x7fe37fe3001d001d; ++ *((unsigned long*)& __m256i_op00) = 0x7fff7fff7fff0000; ++ *((unsigned long*)& __m256i_op13) = 0x7575757575757575; ++ *((unsigned long*)& __m256i_op12) = 0x7575757575757575; ++ *((unsigned long*)& __m256i_op11) = 0x7575757575757575; ++ *((unsigned long*)& __m256i_op10) = 0x7575757575757575; ++ *((unsigned long*)& __m256i_result3) = 0x7fe37fe3001d001d; ++ *((unsigned long*)& __m256i_result2) = 0x7fff7fff7fff0000; ++ *((unsigned long*)& __m256i_result1) = 0x7fe37fe3001d001d; ++ *((unsigned long*)& __m256i_result0) = 0x7fff7fff7fff0000; ++ __m256i_out = __lasx_xvpermi_q (__m256i_op0, __m256i_op1, 0x2a); ++ ASSERTEQ_64 (__LINE__, __m256i_result, __m256i_out); ++ ++ *((unsigned long*)& __m256i_op03) = 0x0000000000000000; ++ *((unsigned long*)& __m256i_op02) = 0x000000000019001c; ++ *((unsigned long*)& __m256i_op01) = 0x0000000000000000; ++ *((unsigned long*)& __m256i_op00) = 0x000000000019001c; ++ *((unsigned long*)& __m256i_op13) = 0x0000000000000000; ++ *((unsigned long*)& __m256i_op12) = 0x00000000000001fe; ++ *((unsigned long*)& __m256i_op11) = 0x0000000000000000; ++ *((unsigned long*)& __m256i_op10) = 0x00000000000001fe; ++ *((unsigned long*)& __m256i_result3) = 0x0000000000000000; ++ *((unsigned long*)& __m256i_result2) = 0x000000000019001c; ++ *((unsigned long*)& __m256i_result1) = 0x0000000000000000; ++ *((unsigned long*)& __m256i_result0) = 0x00000000000001fe; ++ __m256i_out = __lasx_xvpermi_q (__m256i_op0, __m256i_op1, 0xb9); ++ ASSERTEQ_64 (__LINE__, __m256i_result, __m256i_out); ++ ++ *((unsigned long*)& __m256i_op03) = 0x00ff00ff00ff00ff; ++ *((unsigned long*)& __m256i_op02) = 0x00ff00ff00ff00ff; ++ *((unsigned long*)& __m256i_op01) = 0x00ff00ff00ff00ff; ++ *((unsigned long*)& __m256i_op00) = 0x00ff00ff00ff00ff; ++ *((unsigned long*)& __m256i_op13) = 0xffffffffffffffff; ++ *((unsigned long*)& __m256i_op12) = 0xffff0000ffff0000; ++ *((unsigned long*)& __m256i_op11) = 0xffffffffffffffff; ++ *((unsigned long*)& __m256i_op10) = 0xffff0000ffff0000; ++ *((unsigned long*)& __m256i_result3) = 0xffffffffffffffff; ++ *((unsigned long*)& __m256i_result2) = 0xffff0000ffff0000; ++ *((unsigned long*)& __m256i_result1) = 0x00ff00ff00ff00ff; ++ *((unsigned long*)& __m256i_result0) = 0x00ff00ff00ff00ff; ++ __m256i_out = __lasx_xvpermi_q (__m256i_op0, __m256i_op1, 0xca); ++ ASSERTEQ_64 (__LINE__, __m256i_result, __m256i_out); ++ ++ return 0; ++} +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvpremi.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvpremi.c +deleted file mode 100644 +index e9fc1d7d3..000000000 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvpremi.c ++++ /dev/null +@@ -1,19 +0,0 @@ +-/* { dg-options "-mlasx -w -fno-strict-aliasing" } */ +-#include "../simd_correctness_check.h" +-#include <lasxintrin.h> +- +-int +-main () +-{ +- __m256i __m256i_op0, __m256i_op1, __m256i_op2, __m256i_out, __m256i_result; +- __m256 __m256_op0, __m256_op1, __m256_op2, __m256_out, __m256_result; +- __m256d __m256d_op0, __m256d_op1, __m256d_op2, __m256d_out, __m256d_result; +- +- int int_op0, int_op1, int_op2, int_out, int_result, i = 1, fail; +- long int long_op0, long_op1, long_op2, lont_out, lont_result; +- long int long_int_out, long_int_result; +- unsigned int unsigned_int_out, unsigned_int_result; +- unsigned long int unsigned_long_int_out, unsigned_long_int_result; +- +- return 0; +-} +-- +2.43.0 +
View file
_service:tar_scm:0101-aarch64-Add-bytes-below-hard-fp-to-frame-info.patch
Deleted
@@ -1,84 +0,0 @@ -From 34081079ea4de0c98331843f574b5f6f94d7b234 Mon Sep 17 00:00:00 2001 -From: Richard Sandiford <richard.sandiford@arm.com> -Date: Tue, 12 Sep 2023 16:08:50 +0100 -Subject: PATCH aarch64: Add bytes_below_hard_fp to frame info - -Following on from the previous bytes_below_saved_regs patch, this one -records the number of bytes that are below the hard frame pointer. -This eventually replaces below_hard_fp_saved_regs_size. - -If a frame pointer is not needed, the epilogue adds final_adjust -to the stack pointer before restoring registers: - - aarch64_add_sp (tmp1_rtx, tmp0_rtx, final_adjust, true); - -Therefore, if the epilogue needs to restore the stack pointer from -the hard frame pointer, the directly corresponding offset is: - - -bytes_below_hard_fp + final_adjust - -i.e. go from the hard frame pointer to the bottom of the frame, -then add the same amount as if we were using the stack pointer -from the outset. - -gcc/ - * config/aarch64/aarch64.h (aarch64_frame::bytes_below_hard_fp): New - field. - * config/aarch64/aarch64.cc (aarch64_layout_frame): Initialize it. - (aarch64_expand_epilogue): Use it instead of - below_hard_fp_saved_regs_size. ---- - gcc/config/aarch64/aarch64.cc | 6 +++--- - gcc/config/aarch64/aarch64.h | 5 +++++ - 2 files changed, 8 insertions(+), 3 deletions(-) - -diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc -index 94e1b6865849..c7d84245fbfc 100644 ---- a/gcc/config/aarch64/aarch64.cc -+++ b/gcc/config/aarch64/aarch64.cc -@@ -8269,6 +8269,7 @@ aarch64_layout_frame (void) - of the callee save area. */ - bool saves_below_hard_fp_p = maybe_ne (offset, 0); - frame.below_hard_fp_saved_regs_size = offset; -+ frame.bytes_below_hard_fp = offset + frame.bytes_below_saved_regs; - if (frame.emit_frame_chain) - { - /* FP and LR are placed in the linkage record. */ -@@ -9856,8 +9857,7 @@ aarch64_expand_epilogue (bool for_sibcall) - poly_int64 final_adjust = frame.final_adjust; - poly_int64 callee_offset = frame.callee_offset; - poly_int64 sve_callee_adjust = frame.sve_callee_adjust; -- poly_int64 below_hard_fp_saved_regs_size -- = frame.below_hard_fp_saved_regs_size; -+ poly_int64 bytes_below_hard_fp = frame.bytes_below_hard_fp; - unsigned reg1 = frame.wb_pop_candidate1; - unsigned reg2 = frame.wb_pop_candidate2; - unsigned int last_gpr = (frame.is_scs_enabled -@@ -9915,7 +9915,7 @@ aarch64_expand_epilogue (bool for_sibcall) - is restored on the instruction doing the writeback. */ - aarch64_add_offset (Pmode, stack_pointer_rtx, - hard_frame_pointer_rtx, -- -callee_offset - below_hard_fp_saved_regs_size, -+ -bytes_below_hard_fp + final_adjust, - tmp1_rtx, tmp0_rtx, callee_adjust == 0); - else - /* The case where we need to re-use the register here is very rare, so -diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h -index 1e105e12db8d..de68ff7202fc 100644 ---- a/gcc/config/aarch64/aarch64.h -+++ b/gcc/config/aarch64/aarch64.h -@@ -880,6 +880,11 @@ struct GTY (()) aarch64_frame - are saved below the hard frame pointer. */ - poly_int64 below_hard_fp_saved_regs_size; - -+ /* The number of bytes between the bottom of the static frame (the bottom -+ of the outgoing arguments) and the hard frame pointer. This value is -+ always a multiple of STACK_BOUNDARY. */ -+ poly_int64 bytes_below_hard_fp; -+ - /* Offset from the base of the frame (incomming SP) to the - top of the locals area. This value is always a multiple of - STACK_BOUNDARY. */ --- -2.43.5 -
View file
_service:tar_scm:0102-Add-Crc32-Optimization-in-Gzip-For-crc32-algorithm-i.patch
Added
@@ -0,0 +1,2164 @@ +From 8fa9788ac64a9ea5dc92c61c8f2ec11075cd17ec Mon Sep 17 00:00:00 2001 +From: XingYushuai <xingyushuai@huawei.com> +Date: Thu, 15 Dec 2022 14:34:16 +0800 +Subject: PATCH 003/157 Add Crc32 Optimization in Gzip For crc32 algorithm in + APBC int_gzip. + +Match crc32 lookup table algorithm. An example for crc32 lookup table +elg: ```c do { c = crc_32_tab((int)c ^ (*s++)) & 0xff ^ (c >> 8); } while (--n); + +Usage: `gcc -O3 -march=armv8.1-a -floop-crc yourfile.c` +Node: The cpu you use needs to support the crc32 instructions +--- + gcc/Makefile.in | 1 + + gcc/common.opt | 4 + + gcc/config/aarch64/aarch64-builtins.cc | 30 + + gcc/config/aarch64/aarch64-protos.h | 1 + + gcc/config/aarch64/aarch64.cc | 12 + + gcc/doc/invoke.texi | 6 +- + gcc/doc/tm.texi | 9 + + gcc/doc/tm.texi.in | 2 + + gcc/match.pd | 23 + + gcc/passes.def | 1 + + gcc/target.def | 14 + + .../tree-ssa/loop-crc-loop-condition-fail.c | 85 ++ + .../tree-ssa/loop-crc-loop-form-fail-2.c | 90 ++ + .../gcc.dg/tree-ssa/loop-crc-loop-form-fail.c | 112 ++ + .../gcc.dg/tree-ssa/loop-crc-sucess.c | 83 + + .../tree-ssa/loop-crc-table-check-fail.c | 114 ++ + gcc/timevar.def | 1 + + gcc/tree-pass.h | 1 + + gcc/tree-ssa-loop-crc.cc | 1333 +++++++++++++++++ + 19 files changed, 1921 insertions(+), 1 deletion(-) + create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/loop-crc-loop-condition-fail.c + create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/loop-crc-loop-form-fail-2.c + create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/loop-crc-loop-form-fail.c + create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/loop-crc-sucess.c + create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/loop-crc-table-check-fail.c + create mode 100644 gcc/tree-ssa-loop-crc.cc + +diff --git a/gcc/Makefile.in b/gcc/Makefile.in +index 5cd838270..2b9f025dc 100644 +--- a/gcc/Makefile.in ++++ b/gcc/Makefile.in +@@ -1649,6 +1649,7 @@ OBJS = \ + tree-ssa-ifcombine.o \ + tree-ssa-live.o \ + tree-ssa-loop-ch.o \ ++ tree-ssa-loop-crc.o \ + tree-ssa-loop-im.o \ + tree-ssa-loop-ivcanon.o \ + tree-ssa-loop-ivopts.o \ +diff --git a/gcc/common.opt b/gcc/common.opt +index b18f0b944..42fb2fc19 100644 +--- a/gcc/common.opt ++++ b/gcc/common.opt +@@ -1119,6 +1119,10 @@ fcrypto-accel-aes + Common Var(flag_crypto_accel_aes) Init(0) Optimization + Perform crypto acceleration AES pattern matching. + ++floop-crc ++Common Var(flag_loop_crc) Optimization ++Do the loop crc conversion. ++ + fauto-inc-dec + Common Var(flag_auto_inc_dec) Init(1) Optimization + Generate auto-inc/dec instructions. +diff --git a/gcc/config/aarch64/aarch64-builtins.cc b/gcc/config/aarch64/aarch64-builtins.cc +index 42276e7ca..3b952ef39 100644 +--- a/gcc/config/aarch64/aarch64-builtins.cc ++++ b/gcc/config/aarch64/aarch64-builtins.cc +@@ -551,6 +551,12 @@ typedef struct + #define VAR1(T, N, MAP, FLAG, A) \ + AARCH64_SIMD_BUILTIN_##T##_##N##A, + ++enum aarch64_crc_builtins{ ++ AARCH64_BUILTIN_CRC32B, ++ AARCH64_BUILTIN_CRC32H, ++ AARCH64_BUILTIN_CRC32W, ++}; ++ + enum aarch64_builtins + { + AARCH64_BUILTIN_MIN, +@@ -1812,6 +1818,30 @@ aarch64_general_builtin_decl (unsigned code, bool) + return aarch64_builtin_declscode; + } + ++/* Implement TARGET_GET_CRC_BUILTIN_CODE */ ++unsigned ++get_crc_builtin_code(unsigned code, bool) ++{ ++ if (code > AARCH64_BUILTIN_CRC32W) ++ return AARCH64_BUILTIN_MIN; ++ ++ unsigned res = AARCH64_BUILTIN_MIN; ++ switch (code) { ++ case AARCH64_BUILTIN_CRC32B: ++ res = AARCH64_BUILTIN_crc32b; ++ break; ++ case AARCH64_BUILTIN_CRC32H: ++ res = AARCH64_BUILTIN_crc32h; ++ break; ++ case AARCH64_BUILTIN_CRC32W: ++ res = AARCH64_BUILTIN_crc32w; ++ break; ++ default: ++ break; ++ } ++ return res; ++} ++ + typedef enum + { + SIMD_ARG_COPY_TO_REG, +diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h +index 475d174dd..853197ee9 100644 +--- a/gcc/config/aarch64/aarch64-protos.h ++++ b/gcc/config/aarch64/aarch64-protos.h +@@ -994,6 +994,7 @@ gimple *aarch64_general_gimple_fold_builtin (unsigned int, gcall *, + gimple_stmt_iterator *); + rtx aarch64_general_expand_builtin (unsigned int, tree, rtx, int); + tree aarch64_general_builtin_decl (unsigned, bool); ++unsigned get_crc_builtin_code(unsigned , bool); + tree aarch64_general_builtin_rsqrt (unsigned int); + tree aarch64_builtin_vectorized_function (unsigned int, tree, tree); + void handle_arm_acle_h (void); +diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc +index 5537a537c..280e0b618 100644 +--- a/gcc/config/aarch64/aarch64.cc ++++ b/gcc/config/aarch64/aarch64.cc +@@ -15210,6 +15210,15 @@ aarch64_builtin_decl (unsigned int code, bool initialize_p) + gcc_unreachable (); + } + ++/* Implement TARGET_GET_CRC_BUILTIN_CODE. */ ++static unsigned ++aarch64_get_crc_builtin_code(unsigned code, bool initialize_p) ++{ ++ unsigned subcode = get_crc_builtin_code(code,initialize_p); ++ unsigned res = subcode << AARCH64_BUILTIN_SHIFT; ++ return res; ++} ++ + /* Return true if it is safe and beneficial to use the approximate rsqrt optabs + to optimize 1.0/sqrt. */ + +@@ -27677,6 +27686,9 @@ aarch64_get_v16qi_mode () + #undef TARGET_BUILTIN_DECL + #define TARGET_BUILTIN_DECL aarch64_builtin_decl + ++#undef TARGET_GET_CRC_BUILTIN_CODE ++#define TARGET_GET_CRC_BUILTIN_CODE aarch64_get_crc_builtin_code ++ + #undef TARGET_BUILTIN_RECIPROCAL + #define TARGET_BUILTIN_RECIPROCAL aarch64_builtin_reciprocal + +diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi +index 7ca60dd64..c3ce148b0 100644 +--- a/gcc/doc/invoke.texi ++++ b/gcc/doc/invoke.texi +@@ -537,7 +537,7 @@ Objective-C and Objective-C++ Dialects}. + -fisolate-erroneous-paths-dereference -fisolate-erroneous-paths-attribute @gol + -fivopts -fkeep-inline-functions -fkeep-static-functions @gol + -fkeep-static-consts -flimit-function-alignment -flive-range-shrinkage @gol +--floop-block -floop-interchange -floop-strip-mine @gol ++-floop-block -floop-crc -floop-interchange -floop-strip-mine @gol + -floop-unroll-and-jam -floop-nest-optimize @gol + -floop-parallelize-all -flra-remat -flto -flto-compression-level @gol + -flto-partition=@var{alg} -fmerge-all-constants @gol +@@ -12159,6 +12159,10 @@ GIMPLE -> GRAPHITE -> GIMPLE transformation. Some minimal optimizations + are also performed by the code generator isl, like index splitting and + dead code elimination in loops. + ++@item -floop-crc ++@opindex floop-crc ++Do the loop crc conversion ++ + @item -floop-nest-optimize + @opindex floop-nest-optimize + Enable the isl based loop nest optimizer. This is a generic loop nest +diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi +index 851d31c18..5a1e0fe43 100644 +--- a/gcc/doc/tm.texi ++++ b/gcc/doc/tm.texi +@@ -11658,6 +11658,15 @@ If @var{code} is out of range the function should return + @code{error_mark_node}. + @end deftypefn + ++@deftypefn {Target Hook} unsigned TARGET_GET_CRC_BUILTIN_CODE (unsigned @var{code}, bool @var{initialize_p}) ++Define this hook to get crc32 builtin code. It should be a function that ++returns the crc32 builtin function code @var{code}. ++If there is no such builtin and it cannot be initialized at this time ++if @var{initialize_p} is true the function should return @code{NULL_TREE}. ++If @var{code} is out of range the function should return ++@code{error_mark_node}. ++@end deftypefn ++ + @deftypefn {Target Hook} rtx TARGET_EXPAND_BUILTIN (tree @var{exp}, rtx @var{target}, rtx @var{subtarget}, machine_mode @var{mode}, int @var{ignore}) + + Expand a call to a machine specific built-in function that was set up by +diff --git a/gcc/doc/tm.texi.in b/gcc/doc/tm.texi.in +index ac95cdf7a..6ff0eff66 100644 +--- a/gcc/doc/tm.texi.in ++++ b/gcc/doc/tm.texi.in +@@ -7704,6 +7704,8 @@ to by @var{ce_info}. + + @hook TARGET_BUILTIN_DECL + ++@hook TARGET_GET_CRC_BUILTIN_CODE ++ + @hook TARGET_EXPAND_BUILTIN + + @hook TARGET_RESOLVE_OVERLOADED_BUILTIN +diff --git a/gcc/match.pd b/gcc/match.pd +index aee58e47b..1f42090a2 100644 +--- a/gcc/match.pd ++++ b/gcc/match.pd +@@ -4409,6 +4409,29 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) + ) + #endif + ++#if GIMPLE ++/* Try to match ++ _4 = (int) _3; NOP_EXPR (SSA_NAME @2) ++ _5 = _4 ^ c_10; BIT_XOR_EXPR (SSA_NAME@1, SSA_NAME) ++ _6 = _5 & 255; BIT_AND_EXPR (SSA_NAME, INTEGER_CST@3) ++*/ ++(match (crc_match_index @1 @2 @3) ++ (bit_and (bit_xor (nop SSA_NAME@2) SSA_NAME@1) INTEGER_CST@3) ++ (if (INTEGRAL_TYPE_P (type) && tree_to_uhwi(@3) == 255)) ++) ++#endif ++ ++#if GIMPLE ++/* Try to match ++ _8 = c_12 >> 8; RSHIFT_EXPR (SSA_NAME @1, INTEGER_CST @2) ++ c_19 = _7 ^ _8; BIT_XOR_EXPR (SSA_NAME@3, SSA_NAME) ++*/ ++(match (crc_match_res @1 @2 @3) ++ (bit_xor SSA_NAME@3 (rshift SSA_NAME@1 INTEGER_CST@2)) ++ (if (INTEGRAL_TYPE_P (type) && tree_to_uhwi(@2) == 8)) ++) ++#endif ++ + /* Simplification moved from fold_cond_expr_with_comparison. It may also + be extended. */ + /* This pattern implements two kinds simplification: +diff --git a/gcc/passes.def b/gcc/passes.def +index cdc600298..89d6889e5 100644 +--- a/gcc/passes.def ++++ b/gcc/passes.def +@@ -95,6 +95,7 @@ along with GCC; see the file COPYING3. If not see + NEXT_PASS (pass_cd_dce, false /* update_address_taken_p */); + NEXT_PASS (pass_phiopt, true /* early_p */); + NEXT_PASS (pass_array_widen_compare); ++ NEXT_PASS (pass_loop_crc); + NEXT_PASS (pass_tail_recursion); + NEXT_PASS (pass_if_to_switch); + NEXT_PASS (pass_convert_switch); +diff --git a/gcc/target.def b/gcc/target.def +index c9bb2b4c2..8abf49f0a 100644 +--- a/gcc/target.def ++++ b/gcc/target.def +@@ -2413,6 +2413,20 @@ If @var{code} is out of range the function should return\n\ + @code{error_mark_node}.", + tree, (unsigned code, bool initialize_p), NULL) + ++/* Initialize (if INITIALIZE_P is true) and return the real code of ++ target-specific built-in function. ++ Return NULL if that is not possible. Return error_mark_node if CODE ++ is outside of the range of valid crc32 codes. */ ++DEFHOOK ++(get_crc_builtin_code, ++ "Define this hook to get crc32 builtin code. It should be a function that\n\ ++returns the crc32 builtin function code @var{code}.\n\ ++If there is no such builtin and it cannot be initialized at this time\n\ ++if @var{initialize_p} is true the function should return @code{NULL_TREE}.\n\ ++If @var{code} is out of range the function should return\n\ ++@code{error_mark_node}.", ++ unsigned , (unsigned code, bool initialize_p), NULL) ++ + /* Expand a target-specific builtin. */ + DEFHOOK + (expand_builtin, +diff --git a/gcc/testsuite/gcc.dg/tree-ssa/loop-crc-loop-condition-fail.c b/gcc/testsuite/gcc.dg/tree-ssa/loop-crc-loop-condition-fail.c +new file mode 100644 +index 000000000..3620e92f7 +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/tree-ssa/loop-crc-loop-condition-fail.c +@@ -0,0 +1,85 @@ ++/* { dg-do compile { target {{ aarch64*-*-linux* } && lp64 } } } */ ++/* { dg-options "-O3 -march=armv8.1-a -mabi=lp64 -floop-crc -fdump-tree-loop_crc-details" } */ ++ ++#include <stdint.h> ++#include <stddef.h> ++typedef unsigned long ulg; ++typedef unsigned char uch; ++ ++static const ulg crc_32_tab = { ++ 0x00000000L, 0x77073096L, 0xee0e612cL, 0x990951baL, 0x076dc419L, ++ 0x706af48fL, 0xe963a535L, 0x9e6495a3L, 0x0edb8832L, 0x79dcb8a4L, ++ 0xe0d5e91eL, 0x97d2d988L, 0x09b64c2bL, 0x7eb17cbdL, 0xe7b82d07L, ++ 0x90bf1d91L, 0x1db71064L, 0x6ab020f2L, 0xf3b97148L, 0x84be41deL, ++ 0x1adad47dL, 0x6ddde4ebL, 0xf4d4b551L, 0x83d385c7L, 0x136c9856L, ++ 0x646ba8c0L, 0xfd62f97aL, 0x8a65c9ecL, 0x14015c4fL, 0x63066cd9L, ++ 0xfa0f3d63L, 0x8d080df5L, 0x3b6e20c8L, 0x4c69105eL, 0xd56041e4L, ++ 0xa2677172L, 0x3c03e4d1L, 0x4b04d447L, 0xd20d85fdL, 0xa50ab56bL, ++ 0x35b5a8faL, 0x42b2986cL, 0xdbbbc9d6L, 0xacbcf940L, 0x32d86ce3L, ++ 0x45df5c75L, 0xdcd60dcfL, 0xabd13d59L, 0x26d930acL, 0x51de003aL, ++ 0xc8d75180L, 0xbfd06116L, 0x21b4f4b5L, 0x56b3c423L, 0xcfba9599L, ++ 0xb8bda50fL, 0x2802b89eL, 0x5f058808L, 0xc60cd9b2L, 0xb10be924L, ++ 0x2f6f7c87L, 0x58684c11L, 0xc1611dabL, 0xb6662d3dL, 0x76dc4190L, ++ 0x01db7106L, 0x98d220bcL, 0xefd5102aL, 0x71b18589L, 0x06b6b51fL, ++ 0x9fbfe4a5L, 0xe8b8d433L, 0x7807c9a2L, 0x0f00f934L, 0x9609a88eL, ++ 0xe10e9818L, 0x7f6a0dbbL, 0x086d3d2dL, 0x91646c97L, 0xe6635c01L, ++ 0x6b6b51f4L, 0x1c6c6162L, 0x856530d8L, 0xf262004eL, 0x6c0695edL, ++ 0x1b01a57bL, 0x8208f4c1L, 0xf50fc457L, 0x65b0d9c6L, 0x12b7e950L, ++ 0x8bbeb8eaL, 0xfcb9887cL, 0x62dd1ddfL, 0x15da2d49L, 0x8cd37cf3L, ++ 0xfbd44c65L, 0x4db26158L, 0x3ab551ceL, 0xa3bc0074L, 0xd4bb30e2L, ++ 0x4adfa541L, 0x3dd895d7L, 0xa4d1c46dL, 0xd3d6f4fbL, 0x4369e96aL, ++ 0x346ed9fcL, 0xad678846L, 0xda60b8d0L, 0x44042d73L, 0x33031de5L, ++ 0xaa0a4c5fL, 0xdd0d7cc9L, 0x5005713cL, 0x270241aaL, 0xbe0b1010L, ++ 0xc90c2086L, 0x5768b525L, 0x206f85b3L, 0xb966d409L, 0xce61e49fL, ++ 0x5edef90eL, 0x29d9c998L, 0xb0d09822L, 0xc7d7a8b4L, 0x59b33d17L, ++ 0x2eb40d81L, 0xb7bd5c3bL, 0xc0ba6cadL, 0xedb88320L, 0x9abfb3b6L, ++ 0x03b6e20cL, 0x74b1d29aL, 0xead54739L, 0x9dd277afL, 0x04db2615L, ++ 0x73dc1683L, 0xe3630b12L, 0x94643b84L, 0x0d6d6a3eL, 0x7a6a5aa8L, ++ 0xe40ecf0bL, 0x9309ff9dL, 0x0a00ae27L, 0x7d079eb1L, 0xf00f9344L, ++ 0x8708a3d2L, 0x1e01f268L, 0x6906c2feL, 0xf762575dL, 0x806567cbL, ++ 0x196c3671L, 0x6e6b06e7L, 0xfed41b76L, 0x89d32be0L, 0x10da7a5aL, ++ 0x67dd4accL, 0xf9b9df6fL, 0x8ebeeff9L, 0x17b7be43L, 0x60b08ed5L, ++ 0xd6d6a3e8L, 0xa1d1937eL, 0x38d8c2c4L, 0x4fdff252L, 0xd1bb67f1L, ++ 0xa6bc5767L, 0x3fb506ddL, 0x48b2364bL, 0xd80d2bdaL, 0xaf0a1b4cL, ++ 0x36034af6L, 0x41047a60L, 0xdf60efc3L, 0xa867df55L, 0x316e8eefL, ++ 0x4669be79L, 0xcb61b38cL, 0xbc66831aL, 0x256fd2a0L, 0x5268e236L, ++ 0xcc0c7795L, 0xbb0b4703L, 0x220216b9L, 0x5505262fL, 0xc5ba3bbeL, ++ 0xb2bd0b28L, 0x2bb45a92L, 0x5cb36a04L, 0xc2d7ffa7L, 0xb5d0cf31L, ++ 0x2cd99e8bL, 0x5bdeae1dL, 0x9b64c2b0L, 0xec63f226L, 0x756aa39cL, ++ 0x026d930aL, 0x9c0906a9L, 0xeb0e363fL, 0x72076785L, 0x05005713L, ++ 0x95bf4a82L, 0xe2b87a14L, 0x7bb12baeL, 0x0cb61b38L, 0x92d28e9bL, ++ 0xe5d5be0dL, 0x7cdcefb7L, 0x0bdbdf21L, 0x86d3d2d4L, 0xf1d4e242L, ++ 0x68ddb3f8L, 0x1fda836eL, 0x81be16cdL, 0xf6b9265bL, 0x6fb077e1L, ++ 0x18b74777L, 0x88085ae6L, 0xff0f6a70L, 0x66063bcaL, 0x11010b5cL, ++ 0x8f659effL, 0xf862ae69L, 0x616bffd3L, 0x166ccf45L, 0xa00ae278L, ++ 0xd70dd2eeL, 0x4e048354L, 0x3903b3c2L, 0xa7672661L, 0xd06016f7L, ++ 0x4969474dL, 0x3e6e77dbL, 0xaed16a4aL, 0xd9d65adcL, 0x40df0b66L, ++ 0x37d83bf0L, 0xa9bcae53L, 0xdebb9ec5L, 0x47b2cf7fL, 0x30b5ffe9L, ++ 0xbdbdf21cL, 0xcabac28aL, 0x53b39330L, 0x24b4a3a6L, 0xbad03605L, ++ 0xcdd70693L, 0x54de5729L, 0x23d967bfL, 0xb3667a2eL, 0xc4614ab8L, ++ 0x5d681b02L, 0x2a6f2b94L, 0xb40bbe37L, 0xc30c8ea1L, 0x5a05df1bL, ++ 0x2d02ef8dL ++}; ++ ++ulg updcrc (s, n) ++ uch *s; /* pointer to bytes to pump through */ ++ unsigned n; /* number of bytes in s */ ++{ ++ register ulg c; /* temporary variable */ ++ ++ static ulg crc = (ulg)0xffffffffL; /* shift register contents */ ++ ++ if (s == NULL) { ++ c = 0xffffffffL; ++ } else { ++ c = crc; ++ if (n) ++ if (n) do { ++ c = crc_32_tab((int)c ^ (*s++)) & 0xff ^ (c >> 8); ++ } while (--n || c != 0) ; ++ } ++ crc = c; ++exit1: ++ return c ^ 0xffffffffL; /* (instead of ~c for 64-bit machines) */ ++} ++/* { dg-final { scan-tree-dump-times "Wrong loop form for crc matching." 1 "loop_crc"} } */ +diff --git a/gcc/testsuite/gcc.dg/tree-ssa/loop-crc-loop-form-fail-2.c b/gcc/testsuite/gcc.dg/tree-ssa/loop-crc-loop-form-fail-2.c +new file mode 100644 +index 000000000..fac759c67 +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/tree-ssa/loop-crc-loop-form-fail-2.c +@@ -0,0 +1,90 @@ ++/* { dg-do compile { target {{ aarch64*-*-linux* } && lp64 } } } */ ++/* { dg-options "-O3 -march=armv8.1-a -mabi=lp64 -floop-crc -fdump-tree-loop_crc-details" } */ ++ ++#include <stdint.h> ++#include <stddef.h> ++typedef unsigned long ulg; ++typedef unsigned char uch; ++ ++static const ulg crc_32_tab = { ++ 0x00000000L, 0x77073096L, 0xee0e612cL, 0x990951baL, 0x076dc419L, ++ 0x706af48fL, 0xe963a535L, 0x9e6495a3L, 0x0edb8832L, 0x79dcb8a4L, ++ 0xe0d5e91eL, 0x97d2d988L, 0x09b64c2bL, 0x7eb17cbdL, 0xe7b82d07L, ++ 0x90bf1d91L, 0x1db71064L, 0x6ab020f2L, 0xf3b97148L, 0x84be41deL, ++ 0x1adad47dL, 0x6ddde4ebL, 0xf4d4b551L, 0x83d385c7L, 0x136c9856L, ++ 0x646ba8c0L, 0xfd62f97aL, 0x8a65c9ecL, 0x14015c4fL, 0x63066cd9L, ++ 0xfa0f3d63L, 0x8d080df5L, 0x3b6e20c8L, 0x4c69105eL, 0xd56041e4L, ++ 0xa2677172L, 0x3c03e4d1L, 0x4b04d447L, 0xd20d85fdL, 0xa50ab56bL, ++ 0x35b5a8faL, 0x42b2986cL, 0xdbbbc9d6L, 0xacbcf940L, 0x32d86ce3L, ++ 0x45df5c75L, 0xdcd60dcfL, 0xabd13d59L, 0x26d930acL, 0x51de003aL, ++ 0xc8d75180L, 0xbfd06116L, 0x21b4f4b5L, 0x56b3c423L, 0xcfba9599L, ++ 0xb8bda50fL, 0x2802b89eL, 0x5f058808L, 0xc60cd9b2L, 0xb10be924L, ++ 0x2f6f7c87L, 0x58684c11L, 0xc1611dabL, 0xb6662d3dL, 0x76dc4190L, ++ 0x01db7106L, 0x98d220bcL, 0xefd5102aL, 0x71b18589L, 0x06b6b51fL, ++ 0x9fbfe4a5L, 0xe8b8d433L, 0x7807c9a2L, 0x0f00f934L, 0x9609a88eL, ++ 0xe10e9818L, 0x7f6a0dbbL, 0x086d3d2dL, 0x91646c97L, 0xe6635c01L, ++ 0x6b6b51f4L, 0x1c6c6162L, 0x856530d8L, 0xf262004eL, 0x6c0695edL, ++ 0x1b01a57bL, 0x8208f4c1L, 0xf50fc457L, 0x65b0d9c6L, 0x12b7e950L, ++ 0x8bbeb8eaL, 0xfcb9887cL, 0x62dd1ddfL, 0x15da2d49L, 0x8cd37cf3L, ++ 0xfbd44c65L, 0x4db26158L, 0x3ab551ceL, 0xa3bc0074L, 0xd4bb30e2L, ++ 0x4adfa541L, 0x3dd895d7L, 0xa4d1c46dL, 0xd3d6f4fbL, 0x4369e96aL, ++ 0x346ed9fcL, 0xad678846L, 0xda60b8d0L, 0x44042d73L, 0x33031de5L, ++ 0xaa0a4c5fL, 0xdd0d7cc9L, 0x5005713cL, 0x270241aaL, 0xbe0b1010L, ++ 0xc90c2086L, 0x5768b525L, 0x206f85b3L, 0xb966d409L, 0xce61e49fL, ++ 0x5edef90eL, 0x29d9c998L, 0xb0d09822L, 0xc7d7a8b4L, 0x59b33d17L, ++ 0x2eb40d81L, 0xb7bd5c3bL, 0xc0ba6cadL, 0xedb88320L, 0x9abfb3b6L, ++ 0x03b6e20cL, 0x74b1d29aL, 0xead54739L, 0x9dd277afL, 0x04db2615L, ++ 0x73dc1683L, 0xe3630b12L, 0x94643b84L, 0x0d6d6a3eL, 0x7a6a5aa8L, ++ 0xe40ecf0bL, 0x9309ff9dL, 0x0a00ae27L, 0x7d079eb1L, 0xf00f9344L, ++ 0x8708a3d2L, 0x1e01f268L, 0x6906c2feL, 0xf762575dL, 0x806567cbL, ++ 0x196c3671L, 0x6e6b06e7L, 0xfed41b76L, 0x89d32be0L, 0x10da7a5aL, ++ 0x67dd4accL, 0xf9b9df6fL, 0x8ebeeff9L, 0x17b7be43L, 0x60b08ed5L, ++ 0xd6d6a3e8L, 0xa1d1937eL, 0x38d8c2c4L, 0x4fdff252L, 0xd1bb67f1L, ++ 0xa6bc5767L, 0x3fb506ddL, 0x48b2364bL, 0xd80d2bdaL, 0xaf0a1b4cL, ++ 0x36034af6L, 0x41047a60L, 0xdf60efc3L, 0xa867df55L, 0x316e8eefL, ++ 0x4669be79L, 0xcb61b38cL, 0xbc66831aL, 0x256fd2a0L, 0x5268e236L, ++ 0xcc0c7795L, 0xbb0b4703L, 0x220216b9L, 0x5505262fL, 0xc5ba3bbeL, ++ 0xb2bd0b28L, 0x2bb45a92L, 0x5cb36a04L, 0xc2d7ffa7L, 0xb5d0cf31L, ++ 0x2cd99e8bL, 0x5bdeae1dL, 0x9b64c2b0L, 0xec63f226L, 0x756aa39cL, ++ 0x026d930aL, 0x9c0906a9L, 0xeb0e363fL, 0x72076785L, 0x05005713L, ++ 0x95bf4a82L, 0xe2b87a14L, 0x7bb12baeL, 0x0cb61b38L, 0x92d28e9bL, ++ 0xe5d5be0dL, 0x7cdcefb7L, 0x0bdbdf21L, 0x86d3d2d4L, 0xf1d4e242L, ++ 0x68ddb3f8L, 0x1fda836eL, 0x81be16cdL, 0xf6b9265bL, 0x6fb077e1L, ++ 0x18b74777L, 0x88085ae6L, 0xff0f6a70L, 0x66063bcaL, 0x11010b5cL, ++ 0x8f659effL, 0xf862ae69L, 0x616bffd3L, 0x166ccf45L, 0xa00ae278L, ++ 0xd70dd2eeL, 0x4e048354L, 0x3903b3c2L, 0xa7672661L, 0xd06016f7L, ++ 0x4969474dL, 0x3e6e77dbL, 0xaed16a4aL, 0xd9d65adcL, 0x40df0b66L, ++ 0x37d83bf0L, 0xa9bcae53L, 0xdebb9ec5L, 0x47b2cf7fL, 0x30b5ffe9L, ++ 0xbdbdf21cL, 0xcabac28aL, 0x53b39330L, 0x24b4a3a6L, 0xbad03605L, ++ 0xcdd70693L, 0x54de5729L, 0x23d967bfL, 0xb3667a2eL, 0xc4614ab8L, ++ 0x5d681b02L, 0x2a6f2b94L, 0xb40bbe37L, 0xc30c8ea1L, 0x5a05df1bL, ++ 0x2d02ef8dL ++}; ++int test5 = {0}; ++ ++ulg updcrc (s, n) ++ uch *s; /* pointer to bytes to pump through */ ++ unsigned n; /* number of bytes in s */ ++{ ++ register ulg c; /* temporary variable */ ++ ++ static ulg crc = (ulg)0xffffffffL; /* shift register contents */ ++ ++ if (s == NULL) { ++ c = 0xffffffffL; ++ } else { ++ c = crc; ++ if (n) ++ if (n) do { ++ c = crc_32_tab((int)c ^ (*s++)) & 0xff ^ (c >> 8); ++ } while (--n) ; ++ } ++ do { ++ c = crc_32_tab(c ^ (*s++)) & 0xff ^ (c >> 8); ++ testc%5 = c; ++ } while (--n) ; ++ crc = c; ++ return c ^ 0xffffffffL; /* (instead of ~c for 64-bit machines) */ ++} ++/* { dg-final { scan-tree-dump-times "Table check fail. not only single array is read." 1 "loop_crc"} } */ ++/* { dg-final { scan-tree-dump-times "Wrong crc table for crc matching." 1 "loop_crc"} } */ +diff --git a/gcc/testsuite/gcc.dg/tree-ssa/loop-crc-loop-form-fail.c b/gcc/testsuite/gcc.dg/tree-ssa/loop-crc-loop-form-fail.c +new file mode 100644 +index 000000000..ba9e5bb95 +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/tree-ssa/loop-crc-loop-form-fail.c +@@ -0,0 +1,112 @@ ++/* { dg-do compile { target {{ aarch64*-*-linux* } && lp64 } } } */ ++/* { dg-options "-O3 -march=armv8.1-a -mabi=lp64 -floop-crc -fdump-tree-loop_crc-details" } */ ++ ++#include <stdint.h> ++#include <stddef.h> ++typedef unsigned long ulg; ++typedef unsigned char uch; ++ ++static const ulg crc_32_tab = { ++ 0x00000000L, 0x77073096L, 0xee0e612cL, 0x990951baL, 0x076dc419L, ++ 0x706af48fL, 0xe963a535L, 0x9e6495a3L, 0x0edb8832L, 0x79dcb8a4L, ++ 0xe0d5e91eL, 0x97d2d988L, 0x09b64c2bL, 0x7eb17cbdL, 0xe7b82d07L, ++ 0x90bf1d91L, 0x1db71064L, 0x6ab020f2L, 0xf3b97148L, 0x84be41deL, ++ 0x1adad47dL, 0x6ddde4ebL, 0xf4d4b551L, 0x83d385c7L, 0x136c9856L, ++ 0x646ba8c0L, 0xfd62f97aL, 0x8a65c9ecL, 0x14015c4fL, 0x63066cd9L, ++ 0xfa0f3d63L, 0x8d080df5L, 0x3b6e20c8L, 0x4c69105eL, 0xd56041e4L, ++ 0xa2677172L, 0x3c03e4d1L, 0x4b04d447L, 0xd20d85fdL, 0xa50ab56bL, ++ 0x35b5a8faL, 0x42b2986cL, 0xdbbbc9d6L, 0xacbcf940L, 0x32d86ce3L, ++ 0x45df5c75L, 0xdcd60dcfL, 0xabd13d59L, 0x26d930acL, 0x51de003aL, ++ 0xc8d75180L, 0xbfd06116L, 0x21b4f4b5L, 0x56b3c423L, 0xcfba9599L, ++ 0xb8bda50fL, 0x2802b89eL, 0x5f058808L, 0xc60cd9b2L, 0xb10be924L, ++ 0x2f6f7c87L, 0x58684c11L, 0xc1611dabL, 0xb6662d3dL, 0x76dc4190L, ++ 0x01db7106L, 0x98d220bcL, 0xefd5102aL, 0x71b18589L, 0x06b6b51fL, ++ 0x9fbfe4a5L, 0xe8b8d433L, 0x7807c9a2L, 0x0f00f934L, 0x9609a88eL, ++ 0xe10e9818L, 0x7f6a0dbbL, 0x086d3d2dL, 0x91646c97L, 0xe6635c01L, ++ 0x6b6b51f4L, 0x1c6c6162L, 0x856530d8L, 0xf262004eL, 0x6c0695edL, ++ 0x1b01a57bL, 0x8208f4c1L, 0xf50fc457L, 0x65b0d9c6L, 0x12b7e950L, ++ 0x8bbeb8eaL, 0xfcb9887cL, 0x62dd1ddfL, 0x15da2d49L, 0x8cd37cf3L, ++ 0xfbd44c65L, 0x4db26158L, 0x3ab551ceL, 0xa3bc0074L, 0xd4bb30e2L, ++ 0x4adfa541L, 0x3dd895d7L, 0xa4d1c46dL, 0xd3d6f4fbL, 0x4369e96aL, ++ 0x346ed9fcL, 0xad678846L, 0xda60b8d0L, 0x44042d73L, 0x33031de5L, ++ 0xaa0a4c5fL, 0xdd0d7cc9L, 0x5005713cL, 0x270241aaL, 0xbe0b1010L, ++ 0xc90c2086L, 0x5768b525L, 0x206f85b3L, 0xb966d409L, 0xce61e49fL, ++ 0x5edef90eL, 0x29d9c998L, 0xb0d09822L, 0xc7d7a8b4L, 0x59b33d17L, ++ 0x2eb40d81L, 0xb7bd5c3bL, 0xc0ba6cadL, 0xedb88320L, 0x9abfb3b6L, ++ 0x03b6e20cL, 0x74b1d29aL, 0xead54739L, 0x9dd277afL, 0x04db2615L, ++ 0x73dc1683L, 0xe3630b12L, 0x94643b84L, 0x0d6d6a3eL, 0x7a6a5aa8L, ++ 0xe40ecf0bL, 0x9309ff9dL, 0x0a00ae27L, 0x7d079eb1L, 0xf00f9344L, ++ 0x8708a3d2L, 0x1e01f268L, 0x6906c2feL, 0xf762575dL, 0x806567cbL, ++ 0x196c3671L, 0x6e6b06e7L, 0xfed41b76L, 0x89d32be0L, 0x10da7a5aL, ++ 0x67dd4accL, 0xf9b9df6fL, 0x8ebeeff9L, 0x17b7be43L, 0x60b08ed5L, ++ 0xd6d6a3e8L, 0xa1d1937eL, 0x38d8c2c4L, 0x4fdff252L, 0xd1bb67f1L, ++ 0xa6bc5767L, 0x3fb506ddL, 0x48b2364bL, 0xd80d2bdaL, 0xaf0a1b4cL, ++ 0x36034af6L, 0x41047a60L, 0xdf60efc3L, 0xa867df55L, 0x316e8eefL, ++ 0x4669be79L, 0xcb61b38cL, 0xbc66831aL, 0x256fd2a0L, 0x5268e236L, ++ 0xcc0c7795L, 0xbb0b4703L, 0x220216b9L, 0x5505262fL, 0xc5ba3bbeL, ++ 0xb2bd0b28L, 0x2bb45a92L, 0x5cb36a04L, 0xc2d7ffa7L, 0xb5d0cf31L, ++ 0x2cd99e8bL, 0x5bdeae1dL, 0x9b64c2b0L, 0xec63f226L, 0x756aa39cL, ++ 0x026d930aL, 0x9c0906a9L, 0xeb0e363fL, 0x72076785L, 0x05005713L, ++ 0x95bf4a82L, 0xe2b87a14L, 0x7bb12baeL, 0x0cb61b38L, 0x92d28e9bL, ++ 0xe5d5be0dL, 0x7cdcefb7L, 0x0bdbdf21L, 0x86d3d2d4L, 0xf1d4e242L, ++ 0x68ddb3f8L, 0x1fda836eL, 0x81be16cdL, 0xf6b9265bL, 0x6fb077e1L, ++ 0x18b74777L, 0x88085ae6L, 0xff0f6a70L, 0x66063bcaL, 0x11010b5cL, ++ 0x8f659effL, 0xf862ae69L, 0x616bffd3L, 0x166ccf45L, 0xa00ae278L, ++ 0xd70dd2eeL, 0x4e048354L, 0x3903b3c2L, 0xa7672661L, 0xd06016f7L, ++ 0x4969474dL, 0x3e6e77dbL, 0xaed16a4aL, 0xd9d65adcL, 0x40df0b66L, ++ 0x37d83bf0L, 0xa9bcae53L, 0xdebb9ec5L, 0x47b2cf7fL, 0x30b5ffe9L, ++ 0xbdbdf21cL, 0xcabac28aL, 0x53b39330L, 0x24b4a3a6L, 0xbad03605L, ++ 0xcdd70693L, 0x54de5729L, 0x23d967bfL, 0xb3667a2eL, 0xc4614ab8L, ++ 0x5d681b02L, 0x2a6f2b94L, 0xb40bbe37L, 0xc30c8ea1L, 0x5a05df1bL, ++ 0x2d02ef8dL ++}; ++ ++/* check when the loop have a innor loop, should fail. */ ++ulg updcrc (s, n) ++ uch *s; /* pointer to bytes to pump through */ ++ unsigned n; /* number of bytes in s */ ++{ ++ register ulg c; /* temporary variable */ ++ ++ static ulg crc = (ulg)0xffffffffL; /* shift register contents */ ++ ++ if (s == NULL) { ++ c = 0xffffffffL; ++ } else { ++ c = crc; ++ if (n) ++ do { ++ c = crc_32_tab(c ^ (*s++)) & 0xff ^ (c >> 8); ++ for (int i = 0; i < 5; i++) { ++ c++; ++ } ++ ++ } while (--n); ++ } ++ crc = c; ++ return c ^ 0xffffffffL; /* (instead of ~c for 64-bit machines) */ ++} ++ ++/* check when the loop have a second backedge, should fail. */ ++ulg updcrc1(s, n) ++ uch *s; /* pointer to bytes to pump through */ ++ unsigned n; /* number of bytes in s */ ++{ ++ register ulg c; /* temporary variable */ ++ ++ static ulg crc = (ulg)0xffffffffL; /* shift register contents */ ++ ++ if (s == NULL) { ++ c = 0xffffffffL; ++ } else { ++ c = crc; ++ if (n) ++ do { ++ c = crc_32_tab(c ^ (*s++)) & 0xff ^ (c >> 8); ++ } while (--n || c != 0) ; ++ } ++ crc = c; ++ return c ^ 0xffffffffL; /* (instead of ~c for 64-bit machines) */ ++} ++/* { dg-final { scan-tree-dump-times "Wrong crc table for crc matching." 1 "loop_crc"} } */ ++/* { dg-final { scan-tree-dump-times "Table check fail. not only single array is read." 1 "loop_crc"} } */ +diff --git a/gcc/testsuite/gcc.dg/tree-ssa/loop-crc-sucess.c b/gcc/testsuite/gcc.dg/tree-ssa/loop-crc-sucess.c +new file mode 100644 +index 000000000..dad7bdbfc +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/tree-ssa/loop-crc-sucess.c +@@ -0,0 +1,83 @@ ++/* { dg-do compile { target {{ aarch64*-*-linux* } && lp64 } } } */ ++/* { dg-options "-O3 -march=armv8.1-a -mabi=lp64 -floop-crc -fdump-tree-loop_crc-details" } */ ++ ++#include <stdint.h> ++#include <stddef.h> ++typedef unsigned long ulg; ++typedef unsigned char uch; ++ ++static const ulg crc_32_tab = { ++ 0x00000000L, 0x77073096L, 0xee0e612cL, 0x990951baL, 0x076dc419L, ++ 0x706af48fL, 0xe963a535L, 0x9e6495a3L, 0x0edb8832L, 0x79dcb8a4L, ++ 0xe0d5e91eL, 0x97d2d988L, 0x09b64c2bL, 0x7eb17cbdL, 0xe7b82d07L, ++ 0x90bf1d91L, 0x1db71064L, 0x6ab020f2L, 0xf3b97148L, 0x84be41deL, ++ 0x1adad47dL, 0x6ddde4ebL, 0xf4d4b551L, 0x83d385c7L, 0x136c9856L, ++ 0x646ba8c0L, 0xfd62f97aL, 0x8a65c9ecL, 0x14015c4fL, 0x63066cd9L, ++ 0xfa0f3d63L, 0x8d080df5L, 0x3b6e20c8L, 0x4c69105eL, 0xd56041e4L, ++ 0xa2677172L, 0x3c03e4d1L, 0x4b04d447L, 0xd20d85fdL, 0xa50ab56bL, ++ 0x35b5a8faL, 0x42b2986cL, 0xdbbbc9d6L, 0xacbcf940L, 0x32d86ce3L, ++ 0x45df5c75L, 0xdcd60dcfL, 0xabd13d59L, 0x26d930acL, 0x51de003aL, ++ 0xc8d75180L, 0xbfd06116L, 0x21b4f4b5L, 0x56b3c423L, 0xcfba9599L, ++ 0xb8bda50fL, 0x2802b89eL, 0x5f058808L, 0xc60cd9b2L, 0xb10be924L, ++ 0x2f6f7c87L, 0x58684c11L, 0xc1611dabL, 0xb6662d3dL, 0x76dc4190L, ++ 0x01db7106L, 0x98d220bcL, 0xefd5102aL, 0x71b18589L, 0x06b6b51fL, ++ 0x9fbfe4a5L, 0xe8b8d433L, 0x7807c9a2L, 0x0f00f934L, 0x9609a88eL, ++ 0xe10e9818L, 0x7f6a0dbbL, 0x086d3d2dL, 0x91646c97L, 0xe6635c01L, ++ 0x6b6b51f4L, 0x1c6c6162L, 0x856530d8L, 0xf262004eL, 0x6c0695edL, ++ 0x1b01a57bL, 0x8208f4c1L, 0xf50fc457L, 0x65b0d9c6L, 0x12b7e950L, ++ 0x8bbeb8eaL, 0xfcb9887cL, 0x62dd1ddfL, 0x15da2d49L, 0x8cd37cf3L, ++ 0xfbd44c65L, 0x4db26158L, 0x3ab551ceL, 0xa3bc0074L, 0xd4bb30e2L, ++ 0x4adfa541L, 0x3dd895d7L, 0xa4d1c46dL, 0xd3d6f4fbL, 0x4369e96aL, ++ 0x346ed9fcL, 0xad678846L, 0xda60b8d0L, 0x44042d73L, 0x33031de5L, ++ 0xaa0a4c5fL, 0xdd0d7cc9L, 0x5005713cL, 0x270241aaL, 0xbe0b1010L, ++ 0xc90c2086L, 0x5768b525L, 0x206f85b3L, 0xb966d409L, 0xce61e49fL, ++ 0x5edef90eL, 0x29d9c998L, 0xb0d09822L, 0xc7d7a8b4L, 0x59b33d17L, ++ 0x2eb40d81L, 0xb7bd5c3bL, 0xc0ba6cadL, 0xedb88320L, 0x9abfb3b6L, ++ 0x03b6e20cL, 0x74b1d29aL, 0xead54739L, 0x9dd277afL, 0x04db2615L, ++ 0x73dc1683L, 0xe3630b12L, 0x94643b84L, 0x0d6d6a3eL, 0x7a6a5aa8L, ++ 0xe40ecf0bL, 0x9309ff9dL, 0x0a00ae27L, 0x7d079eb1L, 0xf00f9344L, ++ 0x8708a3d2L, 0x1e01f268L, 0x6906c2feL, 0xf762575dL, 0x806567cbL, ++ 0x196c3671L, 0x6e6b06e7L, 0xfed41b76L, 0x89d32be0L, 0x10da7a5aL, ++ 0x67dd4accL, 0xf9b9df6fL, 0x8ebeeff9L, 0x17b7be43L, 0x60b08ed5L, ++ 0xd6d6a3e8L, 0xa1d1937eL, 0x38d8c2c4L, 0x4fdff252L, 0xd1bb67f1L, ++ 0xa6bc5767L, 0x3fb506ddL, 0x48b2364bL, 0xd80d2bdaL, 0xaf0a1b4cL, ++ 0x36034af6L, 0x41047a60L, 0xdf60efc3L, 0xa867df55L, 0x316e8eefL, ++ 0x4669be79L, 0xcb61b38cL, 0xbc66831aL, 0x256fd2a0L, 0x5268e236L, ++ 0xcc0c7795L, 0xbb0b4703L, 0x220216b9L, 0x5505262fL, 0xc5ba3bbeL, ++ 0xb2bd0b28L, 0x2bb45a92L, 0x5cb36a04L, 0xc2d7ffa7L, 0xb5d0cf31L, ++ 0x2cd99e8bL, 0x5bdeae1dL, 0x9b64c2b0L, 0xec63f226L, 0x756aa39cL, ++ 0x026d930aL, 0x9c0906a9L, 0xeb0e363fL, 0x72076785L, 0x05005713L, ++ 0x95bf4a82L, 0xe2b87a14L, 0x7bb12baeL, 0x0cb61b38L, 0x92d28e9bL, ++ 0xe5d5be0dL, 0x7cdcefb7L, 0x0bdbdf21L, 0x86d3d2d4L, 0xf1d4e242L, ++ 0x68ddb3f8L, 0x1fda836eL, 0x81be16cdL, 0xf6b9265bL, 0x6fb077e1L, ++ 0x18b74777L, 0x88085ae6L, 0xff0f6a70L, 0x66063bcaL, 0x11010b5cL, ++ 0x8f659effL, 0xf862ae69L, 0x616bffd3L, 0x166ccf45L, 0xa00ae278L, ++ 0xd70dd2eeL, 0x4e048354L, 0x3903b3c2L, 0xa7672661L, 0xd06016f7L, ++ 0x4969474dL, 0x3e6e77dbL, 0xaed16a4aL, 0xd9d65adcL, 0x40df0b66L, ++ 0x37d83bf0L, 0xa9bcae53L, 0xdebb9ec5L, 0x47b2cf7fL, 0x30b5ffe9L, ++ 0xbdbdf21cL, 0xcabac28aL, 0x53b39330L, 0x24b4a3a6L, 0xbad03605L, ++ 0xcdd70693L, 0x54de5729L, 0x23d967bfL, 0xb3667a2eL, 0xc4614ab8L, ++ 0x5d681b02L, 0x2a6f2b94L, 0xb40bbe37L, 0xc30c8ea1L, 0x5a05df1bL, ++ 0x2d02ef8dL ++}; ++ ++ulg updcrc (s, n) ++ uch *s; /* pointer to bytes to pump through */ ++ unsigned n; /* number of bytes in s */ ++{ ++ register ulg c; /* temporary variable */ ++ ++ static ulg crc = (ulg)0xffffffffL; /* shift register contents */ ++ ++ if (s == NULL) { ++ c = 0xffffffffL; ++ } else { ++ c = crc; ++ if (n) do { ++ c = crc_32_tab((int)c ^ (*s++)) & 0xff ^ (c >> 8); ++ } while (--n); ++ } ++ crc = c; ++ return c ^ 0xffffffffL; /* (instead of ~c for 64-bit machines) */ ++} ++/* { dg-final { scan-tree-dump-times "The 1th loop form is success matched,and the loop can be optimized." 1 "loop_crc"} } */ +diff --git a/gcc/testsuite/gcc.dg/tree-ssa/loop-crc-table-check-fail.c b/gcc/testsuite/gcc.dg/tree-ssa/loop-crc-table-check-fail.c +new file mode 100644 +index 000000000..523a7740c +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/tree-ssa/loop-crc-table-check-fail.c +@@ -0,0 +1,114 @@ ++/* { dg-do compile { target {{ aarch64*-*-linux* } && lp64 } } } */ ++/* { dg-options "-O3 -march=armv8.1-a -mabi=lp64 -floop-crc -fdump-tree-loop_crc-details" } */ ++ ++#include <stdint.h> ++#include <stddef.h> ++typedef unsigned long ulg; ++typedef unsigned char uch; ++ ++static const ulg crc_32_tab = { ++ 0x00000000L, 0x77073096L, 0xee0e612cL, 0x990951baL, 0x076dc419L, ++ 0x706af48fL, 0xe963a535L, 0x9e6495a3L, 0x0edb8832L, 0x79dcb8a4L, ++ 0xe0d5e91eL, 0x97d2d988L, 0x09b64c2bL, 0x7eb17cbdL, 0xe7b82d07L, ++ 0x90bf1d91L, 0x1db71064L, 0x6ab020f2L, 0xf3b97148L, 0x84be41deL, ++ 0x1adad47dL, 0x6ddde4ebL, 0xf4d4b551L, 0x83d385c7L, 0x136c9856L, ++ 0x646ba8c0L, 0xfd62f97aL, 0x8a65c9ecL, 0x14015c4fL, 0x63066cd9L, ++ 0xfa0f3d63L, 0x8d080df5L, 0x3b6e20c8L, 0x4c69105eL, 0xd56041e4L, ++ 0xa2677172L, 0x3c03e4d1L, 0x4b04d447L, 0xd20d85fdL, 0xa50ab56bL, ++ 0x35b5a8faL, 0x42b2986cL, 0xdbbbc9d6L, 0xacbcf940L, 0x32d86ce3L, ++ 0x45df5c75L, 0xdcd60dcfL, 0xabd13d59L, 0x26d930acL, 0x51de003aL, ++ 0xc8d75180L, 0xbfd06116L, 0x21b4f4b5L, 0x56b3c423L, 0xcfba9599L, ++ 0xb8bda50fL, 0x2802b89eL, 0x5f058808L, 0xc60cd9b2L, 0xb10be924L, ++ 0x2f6f7c87L, 0x58684c11L, 0xc1611dabL, 0xb6662d3dL, 0x76dc4190L, ++ 0x01db7106L, 0x98d220bcL, 0xefd5102aL, 0x71b18589L, 0x06b6b51fL, ++ 0x9fbfe4a5L, 0xe8b8d433L, 0x7807c9a2L, 0x0f00f934L, 0x9609a88eL, ++ 0xe10e9818L, 0x7f6a0dbbL, 0x086d3d2dL, 0x91646c97L, 0xe6635c01L, ++ 0x6b6b51f4L, 0x1c6c6162L, 0x856530d8L, 0xf262004eL, 0x6c0695edL, ++ 0x1b01a57bL, 0x8208f4c1L, 0xf50fc457L, 0x65b0d9c6L, 0x12b7e950L, ++ 0x8bbeb8eaL, 0xfcb9887cL, 0x62dd1ddfL, 0x15da2d49L, 0x8cd37cf3L, ++ 0xfbd44c65L, 0x4db26158L, 0x3ab551ceL, 0xa3bc0074L, 0xd4bb30e2L, ++ 0x4adfa541L, 0x3dd895d7L, 0xa4d1c46dL, 0xd3d6f4fbL, 0x4369e96aL, ++ 0x346ed9fcL, 0xad678846L, 0xda60b8d0L, 0x44042d73L, 0x33031de5L, ++ 0xaa0a4c5fL, 0xdd0d7cc9L, 0x5005713cL, 0x270241aaL, 0xbe0b1010L, ++ 0xc90c2086L, 0x5768b525L, 0x206f85b3L, 0xb966d409L, 0xce61e49fL, ++ 0x5edef90eL, 0x29d9c998L, 0xb0d09822L, 0xc7d7a8b4L, 0x59b33d17L, ++ 0x2eb40d81L, 0xb7bd5c3bL, 0xc0ba6cadL, 0xedb88320L, 0x9abfb3b6L, ++ 0x03b6e20cL, 0x74b1d29aL, 0xead54739L, 0x9dd277afL, 0x04db2615L, ++ 0x73dc1683L, 0xe3630b12L, 0x94643b84L, 0x0d6d6a3eL, 0x7a6a5aa8L, ++ 0xe40ecf0bL, 0x9309ff9dL, 0x0a00ae27L, 0x7d079eb1L, 0xf00f9344L, ++ 0x8708a3d2L, 0x1e01f268L, 0x6906c2feL, 0xf762575dL, 0x806567cbL, ++ 0x196c3671L, 0x6e6b06e7L, 0xfed41b76L, 0x89d32be0L, 0x10da7a5aL, ++ 0x67dd4accL, 0xf9b9df6fL, 0x8ebeeff9L, 0x17b7be43L, 0x60b08ed5L, ++ 0xd6d6a3e8L, 0xa1d1937eL, 0x38d8c2c4L, 0x4fdff252L, 0xd1bb67f1L, ++ 0xa6bc5767L, 0x3fb506ddL, 0x48b2364bL, 0xd80d2bdaL, 0xaf0a1b4cL, ++ 0x36034af6L, 0x41047a60L, 0xdf60efc3L, 0xa867df55L, 0x316e8eefL, ++ 0x4669be79L, 0xcb61b38cL, 0xbc66831aL, 0x256fd2a0L, 0x5268e236L, ++ 0xcc0c7795L, 0xbb0b4703L, 0x220216b9L, 0x5505262fL, 0xc5ba3bbeL, ++ 0xb2bd0b28L, 0x2bb45a92L, 0x5cb36a04L, 0xc2d7ffa7L, 0xb5d0cf31L, ++ 0x2cd99e8bL, 0x5bdeae1dL, 0x9b64c2b0L, 0xec63f226L, 0x756aa39cL, ++ 0x026d930aL, 0x9c0906a9L, 0xeb0e363fL, 0x72076785L, 0x05005713L, ++ 0x95bf4a82L, 0xe2b87a14L, 0x7bb12baeL, 0x0cb61b38L, 0x92d28e9bL, ++ 0xe5d5be0dL, 0x7cdcefb7L, 0x0bdbdf21L, 0x86d3d2d4L, 0xf1d4e242L, ++ 0x68ddb3f8L, 0x1fda836eL, 0x81be16cdL, 0xf6b9265bL, 0x6fb077e1L, ++ 0x18b74777L, 0x88085ae6L, 0xff0f6a70L, 0x66063bcaL, 0x11010b5cL, ++ 0x8f659effL, 0xf862ae69L, 0x616bffd3L, 0x166ccf45L, 0xa00ae278L, ++ 0xd70dd2eeL, 0x4e048354L, 0x3903b3c2L, 0xa7672661L, 0xd06016f7L, ++ 0x4969474dL, 0x3e6e77dbL, 0xaed16a4aL, 0xd9d65adcL, 0x40df0b66L, ++ 0x37d83bf1L, 0xa9bcae53L, 0xdebb9ec5L, 0x47b2cf7fL, 0x30b5ffe9L, ++ 0xbdbdf21cL, 0xcabac28aL, 0x53b39330L, 0x24b4a3a6L, 0xbad03605L, ++ 0xcdd70693L, 0x54de5729L, 0x23d967bfL, 0xb3667a2eL, 0xc4614ab8L, ++ 0x5d681b02L, 0x2a6f2b94L, 0xb40bbe37L, 0xc30c8ea1L, 0x5a05df1bL, ++ 0x2d02ef8dL ++}; ++int test5 = {0}; ++ ++/* check when the loop is doing more then 1 array read or writing an array, both should fail. */ ++ulg updcrc (s, n) ++ uch *s; /* pointer to bytes to pump through */ ++ unsigned n; /* number of bytes in s */ ++{ ++ register ulg c; /* temporary variable */ ++ ++ static ulg crc = (ulg)0xffffffffL; /* shift register contents */ ++ ++ if (s == NULL) { ++ c = 0xffffffffL; ++ } else { ++ c = crc; ++ if (n) ++ do { ++ c = crc_32_tab(c ^ (*s++)) & 0xff ^ (c >> 8) * testc%5; ++ } while (--n) ; ++ } ++ do { ++ c = crc_32_tab(c ^ (*s++)) & 0xff ^ (c >> 8); ++ testc%5 = c; ++ } while (--n) ; ++ crc = c; ++ return c ^ 0xffffffffL; /* (instead of ~c for 64-bit machines) */ ++} ++ ++/* check when the loop is not working on a correct crc_table. should fail. */ ++ulg updcrc1(s, n) ++ uch *s; /* pointer to bytes to pump through */ ++ unsigned n; /* number of bytes in s */ ++{ ++ register ulg c; /* temporary variable */ ++ ++ static ulg crc = (ulg)0xffffffffL; /* shift register contents */ ++ ++ if (s == NULL) { ++ c = 0xffffffffL; ++ } else { ++ c = crc; ++ if (n) ++ do { ++ c = crc_32_tab(c ^ (*s++)) & 0xff ^ (c >> 8); ++ } while (--n) ; ++ } ++ crc = c; ++ return c ^ 0xffffffffL; /* (instead of ~c for 64-bit machines) */ ++} ++/* { dg-final { scan-tree-dump-times "Table check fail. not only single array is read." 2 "loop_crc"} } */ ++/* { dg-final { scan-tree-dump-times "Wrong crc table for crc matching." 3 "loop_crc"} } */ ++/* { dg-final { scan-tree-dump-times "Table check fail. Table not matching." 1 "loop_crc"} } */ +diff --git a/gcc/timevar.def b/gcc/timevar.def +index 8e7510eb3..8341b9ffd 100644 +--- a/gcc/timevar.def ++++ b/gcc/timevar.def +@@ -220,6 +220,7 @@ DEFTIMEVAR (TV_TREE_COPY_RENAME , "tree rename SSA copies") + DEFTIMEVAR (TV_TREE_SSA_VERIFY , "tree SSA verifier") + DEFTIMEVAR (TV_TREE_STMT_VERIFY , "tree STMT verifier") + DEFTIMEVAR (TV_TREE_ARRAY_WIDEN_COMPARE, "tree array widen compare") ++DEFTIMEVAR (TV_TREE_LOOP_CRC , "tree loop crc") + DEFTIMEVAR (TV_TREE_SWITCH_CONVERSION, "tree switch conversion") + DEFTIMEVAR (TV_TREE_SWITCH_LOWERING, "tree switch lowering") + DEFTIMEVAR (TV_TREE_RECIP , "gimple CSE reciprocals") +diff --git a/gcc/tree-pass.h b/gcc/tree-pass.h +index 34e60bc38..6cd679e10 100644 +--- a/gcc/tree-pass.h ++++ b/gcc/tree-pass.h +@@ -454,6 +454,7 @@ extern gimple_opt_pass *make_pass_phiopt (gcc::context *ctxt); + extern gimple_opt_pass *make_pass_forwprop (gcc::context *ctxt); + extern gimple_opt_pass *make_pass_phiprop (gcc::context *ctxt); + extern gimple_opt_pass *make_pass_array_widen_compare (gcc::context *ctxt); ++extern gimple_opt_pass *make_pass_loop_crc (gcc::context *ctxt); + extern gimple_opt_pass *make_pass_tree_ifcombine (gcc::context *ctxt); + extern gimple_opt_pass *make_pass_dse (gcc::context *ctxt); + extern gimple_opt_pass *make_pass_nrv (gcc::context *ctxt); +diff --git a/gcc/tree-ssa-loop-crc.cc b/gcc/tree-ssa-loop-crc.cc +new file mode 100644 +index 000000000..b9c2f71ca +--- /dev/null ++++ b/gcc/tree-ssa-loop-crc.cc +@@ -0,0 +1,1333 @@ ++/* This pass converts special loops where do CRC algorithms to ++ simple CRC instructions in AArch64. ++ Copyright (C) 2023-2023 Free Software Foundation, Inc. ++ ++This file is part of GCC. ++ ++GCC is free software; you can redistribute it and/or modify it ++under the terms of the GNU General Public License as published by the ++Free Software Foundation; either version 3, or (at your option) any ++later version. ++ ++GCC is distributed in the hope that it will be useful, but WITHOUT ++ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++for more details. ++ ++You should have received a copy of the GNU General Public License ++along with GCC; see the file COPYING3. If not see ++<http://www.gnu.org/licenses/>. */ ++ ++#include "config.h" ++#include "system.h" ++#include "coretypes.h" ++#include "backend.h" ++#include "target.h" ++#include "tree.h" ++#include "gimple.h" ++#include "tree-pass.h" ++#include "gimple-ssa.h" ++#include "tree-pretty-print.h" ++#include "fold-const.h" ++#include "gimplify.h" ++#include "gimple-iterator.h" ++#include "tree-ssa-loop-manip.h" ++#include "tree-ssa-loop.h" ++#include "ssa.h" ++#include "tree-into-ssa.h" ++#include "cfganal.h" ++#include "cfgloop.h" ++#include "gimple-pretty-print.h" ++#include "tree-cfg.h" ++#include "cgraph.h" ++#include "print-tree.h" ++#include "cfghooks.h" ++#include "gimple-fold.h" ++#include "diagnostic-core.h" ++ ++/* This pass handles scenarios similar to the following: ++ulg updcrc (s, n) ++ uch *s; ++ unsigned n; ++{ ++ register ulg c; ++ ++ static ulg crc = (ulg)0xffffffffL; ++ ++ if (s == NULL) ++ { ++ c = 0xffffffffL; ++ } ++ else ++ { ++ c = crc; ++ if (n) ++ do ++ { ++ c = crc_32_tab((int)c ^ (*s++)) & 0xff ^ (c >> 8); ++ } while (--n); ++ } ++ crc = c; ++ return c ^ 0xffffffffL; ++} ++ ++If the hardware supports the crc instruction, then the pass completes the ++conversion of the above scenario into: ++ ++#define SIZE_U32 sizeof(uint32_t) ++unsigned long updcrc(s, n) ++ unsigned char *s; ++ unsigned n; ++{ ++ register unsigned long c; ++ ++ static unsigned long crc = (unsigned long)0xffffffffL; ++ ++ if (s == NULL) ++ { ++ c = 0xffffffffL; ++ } ++ else ++ { ++ c = crc; ++ if (n) ++ { ++ uint32_t nn = n/SIZE_U32; ++ do ++ { ++ c = __crc32w (c,*((uint32_t *)s)); ++ s += SIZE_U32; ++ } while(--nn); ++ } ++ } ++ if (n & sizeof (uint16_t)) ++ { ++ c = __crc32h (c, *((uint16_t *)s)); ++ s += sizeof (uint16_t); ++ } ++ if (n & sizeof (uint8_t)) ++ c = __crc32b (c, *s); ++ crc = c; ++ return c ^ 0xffffffffL; ++} ++ ++This pass is to complete the conversion of such scenarios from ++the internal perspective of the compiler: ++1) match_crc_loop: The function completes the screening of such ++ scenarios; ++2) convert_to_new_loop: The function completes the conversion of ++ origin_loop to new loops, and removes origin_loop; ++3) origin_loop_info: The structure is used to record important ++ information of origin_loop: such as loop exit, initial value ++ of induction variable, etc; ++4) create_new_loops: The function is used as the key content ++ of the pass to complete the creation of new loops. */ ++ ++extern bool gimple_crc_match_index (tree, tree *, tree (*)(tree)); ++extern bool gimple_crc_match_res (tree, tree *, tree (*)(tree)); ++ ++static gimple *crc_table_read_stmt = NULL; ++ ++static gphi *phi_s = NULL; ++static gphi *phi_c = NULL; ++static tree nn_tree = NULL; ++ ++enum aarch64_crc_builtins ++{ ++ AARCH64_BUILTIN_CRC32B, ++ AARCH64_BUILTIN_CRC32H, ++ AARCH64_BUILTIN_CRC32W, ++}; ++ ++/* The useful information of origin loop. */ ++struct origin_loop_info ++{ ++ tree limit; /* The limit index of the array in the old loop. */ ++ tree base_n; /* The initial value of the old loop. */ ++ tree base_s; /* The initial value of the old loop. */ ++ tree base_c; /* The initial value of the old loop. */ ++ edge entry_edge; /* The edge into the old loop. */ ++ edge exit_edge; /* The edge outto the old loop. */ ++ basic_block exit_bb; ++}; ++ ++typedef struct origin_loop_info origin_loop_info; ++ ++static origin_loop_info origin_loop; ++hash_map <basic_block, tree> n_map; ++hash_map <basic_block, tree> nn_map; ++hash_map <basic_block, tree> s_map; ++hash_map <basic_block, tree> c_map; ++hash_map <basic_block, tree> crc_map; ++ ++/* Initialize the origin_loop structure. */ ++static void ++init_origin_loop_structure () ++{ ++ origin_loop.entry_edge = NULL; ++ origin_loop.exit_edge = NULL; ++ origin_loop.exit_bb = NULL; ++ origin_loop.limit = NULL; ++ origin_loop.base_n = NULL; ++ origin_loop.base_s = NULL; ++ origin_loop.base_c = NULL; ++} ++ ++/* Get the edge that first entered the loop. */ ++static edge ++get_loop_preheader_edge (class loop *loop) ++{ ++ edge e; ++ edge_iterator ei; ++ ++ FOR_EACH_EDGE (e, ei, loop->header->preds) ++ if (e->src != loop->latch) ++ break; ++ ++ return e; ++} ++ ++/* Returns true if t is SSA_NAME and user variable exists. */ ++ ++static bool ++ssa_name_var_p (tree t) ++{ ++ if (!t || TREE_CODE (t) != SSA_NAME) ++ return false; ++ if (SSA_NAME_VAR (t)) ++ return true; ++ return false; ++} ++ ++/* Returns true if t1 and t2 are SSA_NAME and belong to the same variable. */ ++ ++static bool ++same_ssa_name_var_p (tree t1, tree t2) ++{ ++ if (!ssa_name_var_p (t1) || !ssa_name_var_p (t2)) ++ return false; ++ if (SSA_NAME_VAR (t1) == SSA_NAME_VAR (t2)) ++ return true; ++ return false; ++} ++ ++/* Get origin loop induction variable upper bound. */ ++ ++static bool ++get_iv_upper_bound (gimple *stmt) ++{ ++ if (origin_loop.limit != NULL || origin_loop.base_n != NULL) ++ return false; ++ ++ tree lhs = gimple_cond_lhs (stmt); ++ tree rhs = gimple_cond_rhs (stmt); ++ ++ if (TREE_CODE (TREE_TYPE (lhs)) != INTEGER_TYPE ++ || TREE_CODE (TREE_TYPE (rhs)) != INTEGER_TYPE) ++ return false; ++ ++ /* TODO: Currently, the input restrictions on lhs and rhs are implemented ++ through PARM_DECL. We may consider relax the restrictions later, and ++ we need to consider the overall adaptation scenario and adding test ++ cases. */ ++ if (ssa_name_var_p (lhs) && TREE_CODE (SSA_NAME_VAR (lhs)) == PARM_DECL) ++ { ++ origin_loop.limit = rhs; ++ origin_loop.base_n = lhs; ++ } ++ else ++ return false; ++ ++ if (origin_loop.limit != NULL && origin_loop.base_n != NULL) ++ return true; ++ ++ return false; ++} ++ ++/* Get origin loop info. */ ++static bool ++get_origin_loop_info (class loop *loop) ++{ ++ auto_vec<edge> edges = get_loop_exit_edges (loop); ++ origin_loop.exit_edge = edges0; ++ origin_loop.exit_bb = origin_loop.exit_edge->dest; ++ origin_loop.entry_edge = get_loop_preheader_edge (loop); ++ origin_loop.base_s = PHI_ARG_DEF_FROM_EDGE (phi_s,origin_loop.entry_edge); ++ origin_loop.base_c = PHI_ARG_DEF_FROM_EDGE (phi_c,origin_loop.entry_edge); ++ ++ basic_block preheader_bb = origin_loop.entry_edge->src; ++ ++ if (preheader_bb->preds->length () != 1) ++ return false; ++ ++ edge entry_pre_bb_edge = EDGE_PRED (preheader_bb, 0); ++ ++ basic_block pre_preheader_bb = entry_pre_bb_edge->src; ++ ++ gimple_stmt_iterator gsi; ++ gimple *stmt; ++ bool get_upper_bound = false; ++ for (gsi = gsi_start_bb (pre_preheader_bb); !gsi_end_p (gsi); gsi_next (&gsi)) ++ { ++ stmt = gsi_stmt (gsi); ++ if (stmt && gimple_code (stmt) == GIMPLE_COND ++ && get_iv_upper_bound (stmt)) ++ { ++ get_upper_bound = true; ++ break; ++ } ++ } ++ ++ return get_upper_bound; ++} ++ ++/* The loop form check will check the entire loop control flow ++ It should be a loop that: ++ 1. a do-while loop with header and latch only with no other control flow ++ inside the loop ++ 2. have only one exiting edge ++ 3. have only one back edge and one entry edge ++*/ ++static bool ++crc_loop_form_check (class loop *loop) ++{ ++ if (loop->num_nodes > 2 || loop->inner) ++ return false; ++ // Should only have 1 exit edge ++ auto_vec<edge> edges = get_loop_exit_edges (loop); ++ if (edges.length() != 1) ++ return false; ++ ++ // The header should have only 2 incoming edges ++ // One of them is the preheader edge and the other is the backedge from the ++ // latch ++ if (EDGE_COUNT (loop->header->preds) != 2) ++ return false; ++ edge e1 = EDGE_PRED (loop->header, 0); ++ edge e2 = EDGE_PRED (loop->header, 1); ++ ++ if ((e1->src == loop->latch && e2->src->loop_father != loop) ++ || (e2->src == loop->latch && e1->src->loop_father != loop)) ++ return true; ++ ++ return false; ++} ++ ++/* Check there is only one array is read in the loop. ++ Return the only array as crc_table. */ ++static bool ++only_one_array_read (class loop *loop, tree &crc_table) ++{ ++ gimple_stmt_iterator gsi; ++ gimple *stmt; ++ bool res = false; ++ for (gsi = gsi_start_bb (loop->header); !gsi_end_p (gsi); gsi_next (&gsi)) ++ { ++ stmt = gsi_stmt (gsi); ++ if (stmt == NULL) ++ return false; ++ ++ if (gimple_code (stmt) == GIMPLE_ASSIGN ++ && TREE_CODE (gimple_assign_lhs (stmt)) == ARRAY_REF) ++ return false; ++ ++ /* Only one-dimensional integer arrays meet the condition. */ ++ if (gimple_code (stmt) == GIMPLE_ASSIGN ++ && TREE_CODE (gimple_assign_rhs1 (stmt)) == ARRAY_REF ++ && TREE_CODE (TREE_OPERAND (gimple_assign_rhs1 (stmt), 0)) == VAR_DECL ++ && TREE_CODE (TREE_TYPE (gimple_assign_rhs1 (stmt))) == INTEGER_TYPE) ++ { ++ if (crc_table == NULL ++ && TREE_READONLY (gimple_assign_rhs1 (stmt))) ++ { ++ crc_table = gimple_assign_rhs1 (stmt); ++ crc_table_read_stmt = stmt; ++ res = true; ++ } ++ else ++ return false; ++ } ++ } ++ return res; ++} ++ ++static const unsigned HOST_WIDE_INT crc_32_tab = { ++ 0x00000000L, 0x77073096L, 0xee0e612cL, 0x990951baL, 0x076dc419L, ++ 0x706af48fL, 0xe963a535L, 0x9e6495a3L, 0x0edb8832L, 0x79dcb8a4L, ++ 0xe0d5e91eL, 0x97d2d988L, 0x09b64c2bL, 0x7eb17cbdL, 0xe7b82d07L, ++ 0x90bf1d91L, 0x1db71064L, 0x6ab020f2L, 0xf3b97148L, 0x84be41deL, ++ 0x1adad47dL, 0x6ddde4ebL, 0xf4d4b551L, 0x83d385c7L, 0x136c9856L, ++ 0x646ba8c0L, 0xfd62f97aL, 0x8a65c9ecL, 0x14015c4fL, 0x63066cd9L, ++ 0xfa0f3d63L, 0x8d080df5L, 0x3b6e20c8L, 0x4c69105eL, 0xd56041e4L, ++ 0xa2677172L, 0x3c03e4d1L, 0x4b04d447L, 0xd20d85fdL, 0xa50ab56bL, ++ 0x35b5a8faL, 0x42b2986cL, 0xdbbbc9d6L, 0xacbcf940L, 0x32d86ce3L, ++ 0x45df5c75L, 0xdcd60dcfL, 0xabd13d59L, 0x26d930acL, 0x51de003aL, ++ 0xc8d75180L, 0xbfd06116L, 0x21b4f4b5L, 0x56b3c423L, 0xcfba9599L, ++ 0xb8bda50fL, 0x2802b89eL, 0x5f058808L, 0xc60cd9b2L, 0xb10be924L, ++ 0x2f6f7c87L, 0x58684c11L, 0xc1611dabL, 0xb6662d3dL, 0x76dc4190L, ++ 0x01db7106L, 0x98d220bcL, 0xefd5102aL, 0x71b18589L, 0x06b6b51fL, ++ 0x9fbfe4a5L, 0xe8b8d433L, 0x7807c9a2L, 0x0f00f934L, 0x9609a88eL, ++ 0xe10e9818L, 0x7f6a0dbbL, 0x086d3d2dL, 0x91646c97L, 0xe6635c01L, ++ 0x6b6b51f4L, 0x1c6c6162L, 0x856530d8L, 0xf262004eL, 0x6c0695edL, ++ 0x1b01a57bL, 0x8208f4c1L, 0xf50fc457L, 0x65b0d9c6L, 0x12b7e950L, ++ 0x8bbeb8eaL, 0xfcb9887cL, 0x62dd1ddfL, 0x15da2d49L, 0x8cd37cf3L, ++ 0xfbd44c65L, 0x4db26158L, 0x3ab551ceL, 0xa3bc0074L, 0xd4bb30e2L, ++ 0x4adfa541L, 0x3dd895d7L, 0xa4d1c46dL, 0xd3d6f4fbL, 0x4369e96aL, ++ 0x346ed9fcL, 0xad678846L, 0xda60b8d0L, 0x44042d73L, 0x33031de5L, ++ 0xaa0a4c5fL, 0xdd0d7cc9L, 0x5005713cL, 0x270241aaL, 0xbe0b1010L, ++ 0xc90c2086L, 0x5768b525L, 0x206f85b3L, 0xb966d409L, 0xce61e49fL, ++ 0x5edef90eL, 0x29d9c998L, 0xb0d09822L, 0xc7d7a8b4L, 0x59b33d17L, ++ 0x2eb40d81L, 0xb7bd5c3bL, 0xc0ba6cadL, 0xedb88320L, 0x9abfb3b6L, ++ 0x03b6e20cL, 0x74b1d29aL, 0xead54739L, 0x9dd277afL, 0x04db2615L, ++ 0x73dc1683L, 0xe3630b12L, 0x94643b84L, 0x0d6d6a3eL, 0x7a6a5aa8L, ++ 0xe40ecf0bL, 0x9309ff9dL, 0x0a00ae27L, 0x7d079eb1L, 0xf00f9344L, ++ 0x8708a3d2L, 0x1e01f268L, 0x6906c2feL, 0xf762575dL, 0x806567cbL, ++ 0x196c3671L, 0x6e6b06e7L, 0xfed41b76L, 0x89d32be0L, 0x10da7a5aL, ++ 0x67dd4accL, 0xf9b9df6fL, 0x8ebeeff9L, 0x17b7be43L, 0x60b08ed5L, ++ 0xd6d6a3e8L, 0xa1d1937eL, 0x38d8c2c4L, 0x4fdff252L, 0xd1bb67f1L, ++ 0xa6bc5767L, 0x3fb506ddL, 0x48b2364bL, 0xd80d2bdaL, 0xaf0a1b4cL, ++ 0x36034af6L, 0x41047a60L, 0xdf60efc3L, 0xa867df55L, 0x316e8eefL, ++ 0x4669be79L, 0xcb61b38cL, 0xbc66831aL, 0x256fd2a0L, 0x5268e236L, ++ 0xcc0c7795L, 0xbb0b4703L, 0x220216b9L, 0x5505262fL, 0xc5ba3bbeL, ++ 0xb2bd0b28L, 0x2bb45a92L, 0x5cb36a04L, 0xc2d7ffa7L, 0xb5d0cf31L, ++ 0x2cd99e8bL, 0x5bdeae1dL, 0x9b64c2b0L, 0xec63f226L, 0x756aa39cL, ++ 0x026d930aL, 0x9c0906a9L, 0xeb0e363fL, 0x72076785L, 0x05005713L, ++ 0x95bf4a82L, 0xe2b87a14L, 0x7bb12baeL, 0x0cb61b38L, 0x92d28e9bL, ++ 0xe5d5be0dL, 0x7cdcefb7L, 0x0bdbdf21L, 0x86d3d2d4L, 0xf1d4e242L, ++ 0x68ddb3f8L, 0x1fda836eL, 0x81be16cdL, 0xf6b9265bL, 0x6fb077e1L, ++ 0x18b74777L, 0x88085ae6L, 0xff0f6a70L, 0x66063bcaL, 0x11010b5cL, ++ 0x8f659effL, 0xf862ae69L, 0x616bffd3L, 0x166ccf45L, 0xa00ae278L, ++ 0xd70dd2eeL, 0x4e048354L, 0x3903b3c2L, 0xa7672661L, 0xd06016f7L, ++ 0x4969474dL, 0x3e6e77dbL, 0xaed16a4aL, 0xd9d65adcL, 0x40df0b66L, ++ 0x37d83bf0L, 0xa9bcae53L, 0xdebb9ec5L, 0x47b2cf7fL, 0x30b5ffe9L, ++ 0xbdbdf21cL, 0xcabac28aL, 0x53b39330L, 0x24b4a3a6L, 0xbad03605L, ++ 0xcdd70693L, 0x54de5729L, 0x23d967bfL, 0xb3667a2eL, 0xc4614ab8L, ++ 0x5d681b02L, 0x2a6f2b94L, 0xb40bbe37L, 0xc30c8ea1L, 0x5a05df1bL, ++ 0x2d02ef8dL ++}; ++ ++/* Check the content of the array. */ ++static bool ++match_crc_table (tree crc_table) ++{ ++ const unsigned LOW_BOUND = 0; ++ const unsigned UP_BOUND = 255; ++ const unsigned ELEMENT_SIZE = 8; ++ tree low_bound = array_ref_low_bound (crc_table); ++ tree up_bound = array_ref_up_bound (crc_table); ++ tree element_size = array_ref_element_size (crc_table); ++ if (!tree_fits_uhwi_p(low_bound) || !tree_fits_uhwi_p(up_bound) || ++ !tree_fits_uhwi_p(element_size)) ++ return false; ++ unsigned HOST_WIDE_INT lb = tree_to_uhwi (low_bound); ++ unsigned HOST_WIDE_INT ub = tree_to_uhwi (up_bound); ++ unsigned HOST_WIDE_INT es = tree_to_uhwi (element_size); ++ if (lb != LOW_BOUND || ub != UP_BOUND || es != ELEMENT_SIZE) ++ return false; ++ ++ tree decl = TREE_OPERAND (crc_table, 0); ++ tree ctor = ctor_for_folding(decl); ++ for (int i = lb; i <= ub; i++) ++ { ++ unsigned HOST_WIDE_INT val = tree_to_uhwi (CONSTRUCTOR_ELT (ctor, ++ i)->value); ++ if (crc_32_tabi != val) ++ return false; ++ } ++ return true; ++} ++ ++/* Check the crc table. The loop should have only one data reference. ++ And match the data reference with the predefined array. */ ++static bool ++crc_table_check (class loop *loop) ++{ ++ tree crc_table = NULL; ++ if (!only_one_array_read (loop, crc_table)) ++ { ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ fprintf (dump_file, "\nTable check fail. not only single array " ++ "is read.\n"); ++ return false; ++ } ++ if (!match_crc_table (crc_table)) ++ { ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ fprintf (dump_file, "\nTable check fail. Table not matching.\n"); ++ return false; ++ } ++ return true; ++} ++ ++/* Check whether the evolution pattern of phi is phi = SSA_NAME + target*/ ++static bool ++evolution_pattern_plus_with_p (class loop *loop, gphi *phi, ++ unsigned HOST_WIDE_INT target) ++{ ++ edge backedge = find_edge (loop->latch, loop->header); ++ if (backedge == NULL) ++ return false; ++ tree evolution_node = PHI_ARG_DEF_FROM_EDGE (phi, backedge); ++ gimple *evolution_expr = SSA_NAME_DEF_STMT (evolution_node); ++ ++ if (evolution_expr && (gimple_assign_rhs_code (evolution_expr) == PLUS_EXPR || ++ gimple_assign_rhs_code (evolution_expr) ++ == POINTER_PLUS_EXPR)) ++ { ++ tree rhs1 = gimple_assign_rhs1 (evolution_expr); ++ tree rhs2 = gimple_assign_rhs2 (evolution_expr); ++ if (TREE_CODE (rhs1) == SSA_NAME && TREE_CODE (rhs2) == INTEGER_CST ++ && tree_to_uhwi (rhs2) == target) ++ return true; ++ } ++ return false; ++} ++ ++/* Check whether there are only 3 phi nodes in the header block. ++ Return 3 phi nodes in the capture. */ ++static bool ++check_num_of_phi (basic_block header, gphi *capture) ++{ ++ gphi *phi; ++ gphi_iterator gsi; ++ int num_of_phi = 0; ++ ++ for (gsi = gsi_start_phis (header); !gsi_end_p (gsi); gsi_next (&gsi)) ++ { ++ phi = gsi.phi (); ++ if (phi) ++ num_of_phi++; ++ if (num_of_phi > 3) ++ return false; ++ capturenum_of_phi - 1 = phi; ++ } ++ /* Phi node should be exactly 3. */ ++ return num_of_phi == 3; ++} ++ ++/* Check the evolution pattern of three phi nodes. ++ Should be one of the node +1 every time (s), one of the node -1 ++ every time (n), and a 3rd one neither (c). Return 3 phi nodes in ++ the capture with the order of s,n,c.*/ ++static bool ++check_evolution_pattern (class loop *loop, gphi *capture) ++{ ++ gphi *s = NULL; ++ gphi *n = NULL; ++ gphi *c = NULL; ++ ++ for (int i = 0; i < 3; i++) ++ { ++ if (evolution_pattern_plus_with_p (loop, capturei, 1)) ++ { ++ if (s != NULL) ++ return false; ++ s = capturei; ++ phi_s = s; ++ } ++ else if (evolution_pattern_plus_with_p (loop, capturei, 4294967295)) ++ { ++ if (n != NULL) ++ return false; ++ n = capturei; ++ } ++ else ++ { ++ if (c != NULL) ++ return false; ++ c = capturei; ++ phi_c = c; ++ } ++ } ++ ++ // Some envolution pattern cannot find ++ if (!n || !s || !c) ++ return false; ++ ++ capture0 = s; ++ capture1 = n; ++ capture2 = c; ++ return true; ++} ++/* Check the calculation pattern before and after the crc_table array read stmt. ++ _7 = crc_32_tab_6; ++ The caculation of index _6 should be the result of a sequency of calculation ++ by the s and c ++ The result of the array read _7 should be used to calculate the new c. */ ++static bool ++check_calculation_pattern (class loop *loop, gphi *capture) ++{ ++ gphi *s = capture0; ++ gphi *c = capture2; ++ tree res_ops3; ++ tree index = TREE_OPERAND (gimple_assign_rhs1 (crc_table_read_stmt), 1); ++ ++ /* Try to match ++ _4 = (int) _3; //NOP_EXPR (SSA_NAME @2) ++ _5 = _4 ^ c_10; //BIT_XOR_EXPR (SSA_NAME, PHI @1) ++ _6 = _5 & 255; //BIT_XOR_EXPR (SSA_NAME, INTEGER_CST@3) ++ */ ++ if (!gimple_crc_match_index (index, res_ops, NULL)) ++ return false; ++ gimple *s_res_stmt = SSA_NAME_DEF_STMT (res_ops0); ++ if (!s_res_stmt) ++ return false; ++ gimple *s_def_stmt = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (s_res_stmt)); ++ if (!s_def_stmt) ++ return false; ++ tree s_res = TREE_OPERAND (gimple_assign_rhs1 (s_def_stmt), 0); ++ if (res_ops1 != gimple_phi_result (c) || s_res != gimple_phi_result (s)) ++ return false; ++ ++ /* Try to match ++ _8 = c_12 >> 8; // RSHIFT_EXPR (SSA_NAME @1, INTEGER_CST @2) ++ c_19 = _7 ^ _8; // BIT_XOR_EXPR (SSA_NAME@3, SSA_NAME) ++ */ ++ edge backedge = find_edge (loop->latch, loop->header); ++ tree updated_c = PHI_ARG_DEF_FROM_EDGE (c, backedge); ++ if (!gimple_crc_match_res (updated_c, res_ops, NULL)) ++ return false; ++ if (res_ops0 != gimple_phi_result (c) ++ || res_ops2 != gimple_assign_lhs (crc_table_read_stmt)) ++ { ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ fprintf (dump_file, "\n gimple_crc_match_res pattern check failed.\n"); ++ return false; ++ } ++ ++ return true; ++} ++ ++/* Check the exit condition is n != 0. */ ++static bool ++check_exit_condition (class loop *loop, gphi *n) ++{ ++ edge backedge = find_edge (loop->latch, loop->header); ++ gimple *cond_stmt = gsi_stmt (gsi_last_bb (loop->header)); ++ if (!cond_stmt || gimple_code (cond_stmt) != GIMPLE_COND ++ || gimple_cond_code (cond_stmt) != NE_EXPR ++ || gimple_cond_lhs (cond_stmt) != PHI_ARG_DEF_FROM_EDGE (n, backedge) ++ || tree_to_uhwi (gimple_cond_rhs (cond_stmt)) != 0) ++ return false; ++ ++ return true; ++} ++ ++/* Check the loop body. The loop body we are trying to match is ++ ++# s_10 = PHI <s_14(D)(6), s_18(7)> ++# n_11 = PHI <n_17(D)(6), n_20(7)> ++# c_12 = PHI <c_16(6), c_19(7)> ++_1 = (int) c_12; ++s_18 = s_10 + 1; ++_3 = *s_10; ++_4 = (int) _3; ++_5 = _1 ^ _4; ++_6 = _5 & 255; ++_7 = crc_32_tab_6; ++_8 = c_12 >> 8; ++c_19 = _7 ^ _8; ++n_20 = n_11 + 4294967295; ++if (n_20 != 0) ++ goto <bb 7>; INV ++else ++ goto <bb 5>; INV ++ ++which is doing a very simple calculation ++do { ++ c = crc_32_tab(c ^ (*s++)) & 0xff ^ (c >> 8); ++} while (--n); ++ ++In this case ,we don't want this loop to have any other operation inside. ++so the matching condition is ++1. There are only 3 loop variant during each itoration, namely s,c,n, ++ which is limited by the condition that the loop have exactly 3 phi nodes. ++2. The 3 loop variants should have evolution pattern as 1 of the 3 nodes is ++ increased by 1 every itoration, 1 of the 3 nodes is decreased by 1 every itor ++ and the 3rd one is neither. These three tree node SSA value will be captured ++ for the later arithmatic pattern matching ++3. Pattern matching for the index of crc_table ++4. pattern matching for the result of c calcuation after read from crc_table ++5. The exit condition matching. ++ */ ++static bool ++crc_loop_body_check (class loop *loop) ++{ ++ basic_block header = loop->header; ++ gphi *capture3; ++ if (!check_num_of_phi(header, capture)) ++ { ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ fprintf (dump_file, "\n num of phi noeds check failed.\n"); ++ return false; ++ } ++ if (!check_evolution_pattern (loop, capture)) ++ { ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ fprintf (dump_file, "\n evolution pattern check failed.\n"); ++ return false; ++ } ++ if (!check_calculation_pattern (loop, capture)) ++ { ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ fprintf (dump_file, "\n calculation pattern check failed.\n"); ++ return false; ++ } ++ if (!check_exit_condition (loop, capture1)) ++ { ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ fprintf (dump_file, "\n exit condition check failed.\n"); ++ return false; ++ } ++ return true; ++} ++ ++static bool check_prev_bb (basic_block prev_bb, enum tree_code code) ++{ ++ gimple_stmt_iterator gsi; ++ gimple *stmt; ++ for (gsi = gsi_start_bb (prev_bb); !gsi_end_p (gsi); ++ gsi_next (&gsi)) ++ { ++ stmt = gsi_stmt (gsi); ++ if (stmt == NULL) ++ return false; ++ ++ if (gimple_code (stmt) == GIMPLE_COND ++ && gimple_cond_code (stmt) == code ++ && TREE_CODE (gimple_cond_rhs (stmt)) == INTEGER_CST ++ && tree_int_cst_sgn (gimple_cond_rhs (stmt)) == 0) ++ return true; ++ } ++ return false; ++} ++ ++/* Check the prev_bb of prev_bb of loop header. The prev_bb we are trying to ++match is ++ ++c_15 = crc; ++if (n_16 (D) != 0) ++ goto <bb 6>; INV ++else ++ goto <bb 5>; INV ++ ++ In this case , we must be sure that the n is not zero. ++ so the match condition is ++ 1 the n is not zero. ++ ++ <bb 2> : ++if (s_13 (D) == 0B) ++ goto <bb 5>; INV ++else ++ goto <bb 3>; INV ++ ++ In this case, we must be sure the s is not NULL. ++ so the match condition is ++ 1 the s is not NULL. ++*/ ++static bool ++crc_prev_bb_of_loop_header_check (class loop *loop) ++{ ++ basic_block header = loop->header; ++ basic_block prev_header_bb = header->prev_bb; ++ if (NULL == prev_header_bb) ++ return false; ++ ++ basic_block prev_prev_header_bb = prev_header_bb->prev_bb; ++ if (NULL == prev_prev_header_bb) ++ return false; ++ ++ if (!check_prev_bb (prev_prev_header_bb, NE_EXPR)) ++ return false; ++ ++ basic_block first_bb = prev_prev_header_bb->prev_bb; ++ if (NULL == first_bb) ++ return false; ++ ++ if (!check_prev_bb (first_bb, EQ_EXPR)) ++ return false; ++ ++ return true; ++} ++ ++static bool ++match_crc_loop (class loop *loop) ++{ ++ if (!crc_loop_form_check (loop)) ++ { ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ fprintf (dump_file, "\nWrong loop form for crc matching.\n"); ++ return false; ++ } ++ if (!crc_table_check (loop)) ++ { ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ fprintf (dump_file, "\nWrong crc table for crc matching.\n"); ++ return false; ++ } ++ if (!crc_loop_body_check (loop)) ++ { ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ fprintf (dump_file, "\nWrong loop body for crc matching.\n"); ++ return false; ++ } ++ if (!crc_prev_bb_of_loop_header_check (loop)) ++ { ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ fprintf (dump_file, "\nWrong prev basic_blocks of loop header for" ++ " crc matching.\n"); ++ return false; ++ } ++ ++ init_origin_loop_structure (); ++ if (!get_origin_loop_info (loop)) ++ return false; ++ ++ return true; ++} ++ ++static void ++create_new_bb (basic_block &new_bb, basic_block after_bb, ++ basic_block dominator_bb, class loop *outer) ++{ ++ new_bb = create_empty_bb (after_bb); ++ add_bb_to_loop (new_bb, outer); ++ set_immediate_dominator (CDI_DOMINATORS, new_bb, dominator_bb); ++} ++ ++static void ++change_preheader_bb (edge entry_edge) ++{ ++ gimple_seq stmts = NULL; ++ gimple_stmt_iterator gsi; ++ gimple *g; ++ tree lhs1; ++ ++ lhs1 = create_tmp_var (TREE_TYPE (origin_loop.base_n),"nn"); ++ lhs1 = make_ssa_name (lhs1); ++ gsi = gsi_last_bb (entry_edge->src); ++ g = gimple_build_assign (lhs1, RSHIFT_EXPR, origin_loop.base_n, ++ build_int_cst (TREE_TYPE (origin_loop.base_n), 2)); ++ gimple_seq_add_stmt (&stmts, g); ++ gsi_insert_seq_after (&gsi, stmts, GSI_NEW_STMT); ++ nn_tree = lhs1; ++ set_current_def (nn_tree, lhs1); ++ nn_map.put (entry_edge->src, lhs1); ++} ++ ++static gphi * ++create_phi_node_for_bb (tree old_name, basic_block bb) ++{ ++ gphi *phi = create_phi_node (NULL_TREE, bb); ++ create_new_def_for (old_name, phi, gimple_phi_result_ptr (phi)); ++ return phi; ++} ++ ++static gimple * ++call_builtin_fun (int code,tree &lhs, tree arg1, tree arg2) ++{ ++ unsigned int builtin_code = targetm.get_crc_builtin_code (code, true); ++ // Get the decl of __builtin_aarch64_crc32w ++ tree fn = targetm.builtin_decl (builtin_code, true); ++ if (!fn || fn == error_mark_node) ++ fatal_error (input_location, ++ "target specific builtin not available"); ++ gimple *call_builtin = gimple_build_call (fn, 2, arg1, arg2); ++ lhs = make_ssa_name (unsigned_type_node); ++ gimple_call_set_lhs (call_builtin, lhs); ++ ++ return call_builtin; ++} ++ ++/* Create loop_header and loop_latch for new loop ++ <bb 5> : ++ # s_14 = PHI <s_23(D)(4), s_30(5)> ++ # c_16 = PHI <c_25(4), c_29(5)> ++ # nn_19 = PHI <nn_27(4), nn_31(5)> ++ _1 = (unsigned int) c_16; ++ _2 = MEM(uint32_t *)s_14; ++ _40 = __builtin_aarch64_crc32w (_1, _2); ++ c_29 = (long unsigned int) _40; ++ s_30 = s_14 + 4; ++ nn_31 = nn_19 + 4294967295; ++ if (nn_31 != 0) ++ The IR of bb is as above. */ ++static void ++create_loop_bb (basic_block &loop_bb, basic_block after_bb, ++ basic_block dominator_bb, class loop *outer, edge entry_edge) ++{ ++ gimple_seq stmts = NULL; ++ gimple_stmt_iterator gsi; ++ gimple *g; ++ gphi *phi_s_loop; ++ gphi *phi_c_loop; ++ gphi *phi_nn_loop; ++ ++ create_new_bb (loop_bb, after_bb, dominator_bb, outer); ++ redirect_edge_and_branch (entry_edge, loop_bb); ++ gsi = gsi_last_bb (loop_bb); ++ tree entry_nn = get_current_def (nn_tree); ++ phi_s_loop = create_phi_node_for_bb (origin_loop.base_s, loop_bb); ++ phi_c_loop = create_phi_node_for_bb (origin_loop.base_c, loop_bb); ++ phi_nn_loop = create_phi_node_for_bb (entry_nn, loop_bb); ++ ++ tree res_s = gimple_phi_result (phi_s_loop); ++ tree res_nn = gimple_phi_result (phi_nn_loop); ++ tree lhs1 = gimple_build (&stmts, NOP_EXPR, unsigned_type_node, ++ gimple_phi_result (phi_c_loop)); ++ g = gimple_build_assign (make_ssa_name (unsigned_type_node), ++ fold_build2 (MEM_REF, unsigned_type_node, res_s, ++ build_int_cst ( ++ build_pointer_type ( ++ unsigned_type_node),0))); ++ gimple_seq_add_stmt (&stmts, g); ++ tree lhs2 = gimple_assign_lhs (g); // _2 = MEM(uint32_t *)s_14; ++ unsigned int code = AARCH64_BUILTIN_CRC32W; ++ tree lhs3; ++ gimple *build_crc32w = call_builtin_fun (code, lhs3, lhs1, lhs2); ++ crc_map.put (loop_bb, lhs3); ++ gimple_seq_add_stmt (&stmts, build_crc32w); ++ ++ tree lhs4 = copy_ssa_name (origin_loop.base_c); ++ g = gimple_build_assign (lhs4, NOP_EXPR, lhs3); ++ gimple_seq_add_stmt (&stmts, g); ++ c_map.put (loop_bb, lhs4); ++ ++ tree lhs5 = copy_ssa_name (origin_loop.base_s); ++ g = gimple_build_assign (lhs5, POINTER_PLUS_EXPR, res_s, ++ build_int_cst (sizetype, 4)); ++ gimple_seq_add_stmt (&stmts, g); ++ s_map.put (loop_bb, lhs5); ++ ++ tree lhs6 = copy_ssa_name (nn_tree); ++ g = gimple_build_assign (lhs6, PLUS_EXPR, res_nn, ++ build_int_cst (TREE_TYPE (res_nn), 4294967295)); ++ gimple_seq_add_stmt (&stmts,g); ++ nn_map.put (loop_bb, lhs6); ++ ++ gcond *cond_stmt = gimple_build_cond (NE_EXPR, lhs6, origin_loop.limit, ++ NULL_TREE, NULL_TREE); ++ gimple_seq_add_stmt (&stmts, cond_stmt); ++ gsi_insert_seq_after (&gsi, stmts, GSI_NEW_STMT); ++} ++ ++/* <bb 6> : ++ # c_6 = PHI <c_29(5)> ++ # s_46 = PHI <s_30(5)> ++ _44 = n_26(D) & 2; ++ if (_44 != 0) ++ The IR of bb is as above. */ ++static void ++create_cond_bb (basic_block &cond_bb, basic_block after_bb, ++ basic_block dominator_bb, class loop *outer) ++{ ++ gimple_seq stmts = NULL; ++ gimple_stmt_iterator gsi; ++ gphi *phi_s_loop; ++ gphi *phi_c_loop; ++ ++ create_new_bb (cond_bb, after_bb, dominator_bb, outer); ++ gsi = gsi_last_bb (cond_bb); ++ tree entry_nn = get_current_def (nn_tree); ++ phi_s_loop = create_phi_node_for_bb (origin_loop.base_s, cond_bb); ++ phi_c_loop = create_phi_node_for_bb (origin_loop.base_c, cond_bb); ++ tree res_s = gimple_phi_result (phi_s_loop); ++ set_current_def (origin_loop.base_s, res_s); ++ s_map.put (cond_bb, res_s); ++ tree res_c = gimple_phi_result (phi_c_loop); ++ set_current_def (origin_loop.base_c, res_c); ++ c_map.put (cond_bb, res_c); ++ ++ tree lhs1 = gimple_build (&stmts, BIT_AND_EXPR, ++ TREE_TYPE (origin_loop.base_n), origin_loop.base_n, ++ build_int_cst (TREE_TYPE (origin_loop.base_n), 2)); ++ gcond *cond_stmt = gimple_build_cond (NE_EXPR, lhs1, origin_loop.limit, ++ NULL_TREE, NULL_TREE); ++ gimple_seq_add_stmt (&stmts, cond_stmt); ++ gsi_insert_seq_after (&gsi, stmts, GSI_NEW_STMT); ++} ++ ++/* <bb 7> : ++ _7 = MEM(uint16_t *)s_46; ++ _41 = __builtin_aarch64_crc32h (_8, _7); ++ c_33 = (long unsigned int) _41; ++ s_34 = s_30 + 2; ++ The IR of bb is as above. */ ++static void ++create_cond_true_bb (basic_block &cond_true_bb, basic_block after_bb, ++ basic_block dominator_bb, class loop *outer) ++{ ++ gimple_seq stmts = NULL; ++ gimple *g; ++ gimple_stmt_iterator gsi; ++ ++ create_new_bb (cond_true_bb, after_bb, dominator_bb, outer); ++ gsi = gsi_last_bb (cond_true_bb); ++ tree s_46 = *(s_map.get (after_bb)); ++ tree type = build_pointer_type (short_unsigned_type_node); ++ g = gimple_build_assign (make_ssa_name (short_unsigned_type_node), ++ fold_build2 (MEM_REF, short_unsigned_type_node, s_46, ++ build_int_cst (type, 0))); ++ gimple_seq_add_stmt (&stmts,g); ++ tree lhs1 = gimple_assign_lhs (g); // _7 = MEM(uint16_t *)s_46; ++ unsigned int code = AARCH64_BUILTIN_CRC32H; ++ tree lhs2; ++ gimple *call_builtin = call_builtin_fun (code, lhs2, ++ *(crc_map.get ( ++ cond_true_bb->prev_bb->prev_bb)), lhs1); ++ crc_map.put (cond_true_bb,lhs2); ++ gimple_seq_add_stmt (&stmts, call_builtin); ++ ++ tree lhs3 = copy_ssa_name (origin_loop.base_c); ++ g = gimple_build_assign (lhs3, NOP_EXPR, lhs2); ++ gimple_seq_add_stmt (&stmts, g); ++ c_map.put (cond_true_bb, lhs3); ++ ++ tree lhs5 = copy_ssa_name (s_46); ++ g = gimple_build_assign (lhs5, POINTER_PLUS_EXPR, s_46, ++ build_int_cst (sizetype, 2)); // s_30 + 2; ++ gimple_seq_add_stmt (&stmts, g); ++ s_map.put (cond_true_bb, lhs5); ++ ++ gsi_insert_seq_after (&gsi, stmts, GSI_NEW_STMT); ++ s_map.put (cond_true_bb, lhs5); ++} ++ ++/* <bb 8> : ++ # s_15 = PHI <s_46(6), s_34(7)> ++ # c_17 = PHI <c_6(6), c_33(7)> ++ _3 = n_26(D) & 1; ++ if (_3 != 0) ++ The IR of bb is as above. */ ++static void ++create_cond_false_bb (basic_block &cond_false_bb, basic_block after_bb, ++ basic_block dominator_bb, class loop *outer) ++{ ++ gimple_seq stmts = NULL; ++ gimple_stmt_iterator gsi; ++ gphi *phi_s_cond_true_bb; ++ gphi *phi_c_cond_true_bb; ++ ++ create_new_bb (cond_false_bb, after_bb, dominator_bb, outer); ++ make_single_succ_edge (after_bb, cond_false_bb, EDGE_FALLTHRU); ++ ++ tree entry_s = get_current_def (origin_loop.base_s); ++ phi_s_cond_true_bb = create_phi_node_for_bb (entry_s, cond_false_bb); ++ tree entry_c = get_current_def (origin_loop.base_c); ++ phi_c_cond_true_bb = create_phi_node_for_bb (entry_c, cond_false_bb); ++ tree res_s = gimple_phi_result (phi_s_cond_true_bb); ++ set_current_def (origin_loop.base_s, res_s); ++ s_map.put (cond_false_bb, res_s); ++ tree res_c = gimple_phi_result (phi_c_cond_true_bb); ++ set_current_def (origin_loop.base_c, res_c); ++ c_map.put (cond_false_bb, res_c); ++ ++ gsi = gsi_last_bb (cond_false_bb); ++ tree lhs1 = gimple_build (&stmts, BIT_AND_EXPR, ++ TREE_TYPE (origin_loop.base_n), origin_loop.base_n, ++ build_int_cst (TREE_TYPE (origin_loop.base_n), 1)); ++ gcond *cond_stmt = gimple_build_cond (NE_EXPR, lhs1, origin_loop.limit, ++ NULL_TREE, NULL_TREE); ++ gimple_seq_add_stmt (&stmts, cond_stmt); ++ gsi_insert_seq_after (&gsi, stmts, GSI_NEW_STMT); ++} ++ ++/* <bb 9> : ++ _11 = (unsigned int) c_17; ++ _12 = *s_15; ++ _42 = __builtin_aarch64_crc32b (_11, _12); ++ c_36 = (long unsigned int) _42; ++ The IR of bb is as above. */ ++static void ++create_lastcond_true_bb (basic_block &new_bb, basic_block after_bb, ++ basic_block dominator_bb, class loop *outer) ++{ ++ gimple_seq stmts = NULL; ++ gimple_stmt_iterator gsi; ++ gimple *g; ++ ++ create_new_bb (new_bb, after_bb, dominator_bb, outer); ++ gsi = gsi_last_bb (new_bb); ++ ++ tree lhs1 = gimple_build (&stmts, NOP_EXPR, unsigned_type_node, ++ get_current_def (origin_loop.base_c)); ++ tree lhs2; ++ tree s_15 = get_current_def (origin_loop.base_s); ++ g = gimple_build_assign (make_ssa_name (unsigned_char_type_node), ++ fold_build2 (MEM_REF, unsigned_char_type_node, s_15, ++ build_int_cst (TREE_TYPE (s_15), 0))); ++ gimple_seq_add_stmt (&stmts, g); ++ lhs2 = gimple_assign_lhs (g); ++ ++ unsigned int code = AARCH64_BUILTIN_CRC32B; ++ tree lhs3; ++ gimple *call_builtin = call_builtin_fun (code, lhs3, lhs1, lhs2); ++ crc_map.put (new_bb,lhs3); ++ gimple_seq_add_stmt (&stmts,call_builtin); ++ ++ tree lhs4 = copy_ssa_name (origin_loop.base_c); ++ g = gimple_build_assign (lhs4, NOP_EXPR, lhs3); ++ gimple_seq_add_stmt (&stmts, g); ++ c_map.put (new_bb, lhs4); ++ ++ gsi_insert_seq_after (&gsi, stmts, GSI_NEW_STMT); ++} ++ ++static bool ++optional_add_phi_arg (gphi * phi, tree phi_res, tree phi_arg, edge e) ++{ ++ location_t loc; ++ if (same_ssa_name_var_p (phi_arg, phi_res)) ++ { ++ if (virtual_operand_p (phi_arg)) ++ loc = UNKNOWN_LOCATION; ++ else ++ loc = gimple_location (SSA_NAME_DEF_STMT (phi_arg)); ++ add_phi_arg (phi, phi_arg, e, loc); ++ ++ return true; ++ } ++ ++ return false; ++} ++ ++/* Add phi_arg for bb with phi node. */ ++static void ++update_phi_nodes (basic_block bb) ++{ ++ edge e; ++ edge_iterator ei; ++ gphi *phi; ++ gphi_iterator gsi; ++ tree res; ++ ++ for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi)) ++ { ++ phi = gsi.phi (); ++ res = gimple_phi_result (phi); ++ ++ FOR_EACH_EDGE (e, ei, bb->preds) ++ { ++ if (PHI_ARG_DEF_FROM_EDGE (phi, e)) ++ continue; ++ tree var_c; ++ tree *ptr_var_c = c_map.get (e->src); ++ if (ptr_var_c == NULL) ++ var_c = origin_loop.base_c; ++ else ++ var_c = *ptr_var_c; ++ if (optional_add_phi_arg (phi, res, var_c, e)) ++ continue; ++ ++ tree var_nn; ++ tree *ptr_var_nn = nn_map.get (e->src); ++ if (ptr_var_nn == NULL) ++ var_nn = nn_tree; ++ else ++ var_nn = *ptr_var_nn; ++ if (optional_add_phi_arg (phi, res, var_nn, e)) ++ continue; ++ ++ tree var_s; ++ tree *ptr_var_s = s_map.get (e->src); ++ if (ptr_var_s == NULL) ++ var_s = origin_loop.base_s; ++ else ++ var_s = *ptr_var_s; ++ if (optional_add_phi_arg (phi, res, var_s, e)) ++ continue; ++ } ++ } ++} ++ ++static void ++create_new_loops (edge entry_edge) ++{ ++ class loop *new_loop = NULL; ++ basic_block loop_bb, cond_bb, cond_true_bb, cond_false_bb, lastcond_true_bb; ++ class loop *outer = entry_edge->src->loop_father; ++ change_preheader_bb (entry_edge); ++ ++ create_loop_bb (loop_bb, entry_edge->src, entry_edge->src, outer, entry_edge); ++ create_cond_bb (cond_bb, loop_bb, loop_bb, outer); ++ make_edge (loop_bb, loop_bb, EDGE_TRUE_VALUE); ++ make_edge (loop_bb, cond_bb, EDGE_FALSE_VALUE); ++ update_phi_nodes (loop_bb); ++ ++ new_loop = alloc_loop (); ++ new_loop->header = loop_bb; ++ new_loop->latch = loop_bb; ++ add_loop (new_loop, outer); ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ { ++ fprintf (dump_file, "\nPrint byte new loop %d:\n", new_loop->num); ++ flow_loop_dump (new_loop, dump_file, NULL, 1); ++ fprintf (dump_file, "\n\n"); ++ } ++ ++ create_cond_true_bb (cond_true_bb, cond_bb, cond_bb, outer); ++ make_edge (cond_bb, cond_true_bb, EDGE_TRUE_VALUE); ++ create_cond_false_bb (cond_false_bb, cond_true_bb, cond_bb, outer); ++ make_edge (cond_bb, cond_false_bb, EDGE_FALSE_VALUE); ++ update_phi_nodes (cond_bb); ++ update_phi_nodes (cond_false_bb); ++ create_lastcond_true_bb (lastcond_true_bb, cond_false_bb, ++ cond_false_bb, outer); ++ make_edge (cond_false_bb, lastcond_true_bb, EDGE_TRUE_VALUE); ++ make_edge (cond_false_bb, origin_loop.exit_bb, EDGE_FALSE_VALUE); ++ make_single_succ_edge (lastcond_true_bb, origin_loop.exit_bb, EDGE_FALLTHRU); ++ ++ update_phi_nodes (origin_loop.exit_bb); ++ remove_edge (origin_loop.exit_edge); ++} ++ ++/* Clear information about the original loop. */ ++static void ++remove_origin_loop (class loop *loop) ++{ ++ basic_block *body = get_loop_body_in_dom_order (loop); ++ unsigned n = loop->num_nodes; ++ for (int i = 0; i < n; ++i) ++ delete_basic_block (bodyi); ++ free (body); ++ delete_loop (loop); ++} ++ ++/* Make sure that the dominance relationship of the newly inserted cfg ++ is not missing. */ ++static void ++update_loop_dominator (cdi_direction dir) ++{ ++ gcc_assert (dom_info_available_p (dir)); ++ ++ basic_block bb; ++ FOR_EACH_BB_FN (bb, cfun) ++ { ++ basic_block imm_bb = get_immediate_dominator (dir, bb); ++ if (!imm_bb || bb == origin_loop.exit_bb) ++ { ++ set_immediate_dominator (CDI_DOMINATORS, bb, ++ recompute_dominator (CDI_DOMINATORS, bb)); ++ continue; ++ } ++ } ++} ++ ++/* Perform the conversion of origin_loop to new_loop. */ ++static void ++convert_to_new_loop (class loop *loop) ++{ ++ create_new_loops (origin_loop.entry_edge); ++ remove_origin_loop (loop); ++ update_loop_dominator (CDI_DOMINATORS); ++ update_ssa (TODO_update_ssa); ++} ++ ++/* The main entry of loop crc optimizes. */ ++static unsigned int ++tree_ssa_loop_crc () ++{ ++ if (TARGET_CRC32 == false) ++ { ++ warning (OPT____,"The loop-crc optimization is not working." \ ++ "You should make sure that the specified architecture" \ ++ "supports crc:-march=armv8.1-a"); ++ return 0; ++ } ++ unsigned int todo = 0; ++ class loop *loop; ++ ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ { ++ flow_loops_dump (dump_file, NULL, 1); ++ fprintf (dump_file, "\nStarting the loop_crc pass\n"); ++ } ++ ++ enum li_flags LI = LI_FROM_INNERMOST; ++ for (auto loop : loops_list (cfun, LI)) ++ { ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ { ++ fprintf (dump_file, "======================================\n"); ++ fprintf (dump_file, "Processing loop %d:\n", loop->num); ++ fprintf (dump_file, "======================================\n"); ++ flow_loop_dump (loop, dump_file, NULL, 1); ++ fprintf (dump_file, "\n\n"); ++ } ++ ++ if (match_crc_loop (loop)) ++ { ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ { ++ fprintf (dump_file, "The %dth loop form is success matched," ++ "and the loop can be optimized.\n", ++ loop->num); ++ } ++ ++ convert_to_new_loop (loop); ++ todo |= (TODO_update_ssa); ++ } ++ } ++ return todo; ++} ++ ++/* Loop crc. */ ++ ++namespace { ++const pass_data pass_data_tree_loop_crc = ++{ ++ GIMPLE_PASS, ++ "loop_crc", ++ OPTGROUP_LOOP, ++ TV_TREE_LOOP_CRC, ++ (PROP_cfg | PROP_ssa), ++ 0, ++ 0, ++ 0, ++ (TODO_update_ssa | TODO_verify_all) ++}; ++ ++class pass_loop_crc : public gimple_opt_pass ++{ ++public: ++ pass_loop_crc (gcc::context *ctxt) ++ : gimple_opt_pass (pass_data_tree_loop_crc, ctxt) ++ {} ++ ++ /* Opt_pass methods: */ ++ virtual bool gate (function *); ++ virtual unsigned int execute (function *); ++}; // Class pass_loop_crc ++ ++bool ++pass_loop_crc::gate (function *) ++{ ++ return (flag_loop_crc > 0 && optimize >= 3); ++} ++ ++unsigned int ++pass_loop_crc::execute (function *fun) ++{ ++ if (number_of_loops (fun) <= 1) ++ return 0; ++ ++ /* Only supports LP64 data mode. */ ++ if (TYPE_PRECISION (long_integer_type_node) != 64 ++ || POINTER_SIZE != 64 || TYPE_PRECISION (integer_type_node) != 32) ++ { ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ fprintf (dump_file, "The current data mode is not supported," ++ "only the LP64 date mode is supported.\n"); ++ return 0; ++ } ++ ++ return tree_ssa_loop_crc (); ++} ++ ++} // Anon namespace ++ ++gimple_opt_pass * ++make_pass_loop_crc (gcc::context *ctxt) ++{ ++ return new pass_loop_crc (ctxt); ++} +-- +2.33.0 +
View file
_service:tar_scm:0102-LoongArch-Implement-vec_init-M-N-where-N-is-a-LSX-ve.patch
Added
@@ -0,0 +1,253 @@ +From a321a294407781b2694fe9a3be0099fe38ccf13a Mon Sep 17 00:00:00 2001 +From: Jiahao Xu <xujiahao@loongson.cn> +Date: Fri, 5 Jan 2024 15:38:25 +0800 +Subject: PATCH 102/188 LoongArch: Implement vec_init<M><N> where N is a LSX + vector mode + +This patch implements more vec_init optabs that can handle two LSX vectors producing a LASX +vector by concatenating them. When an lsx vector is concatenated with an LSX const_vector of +zeroes, the vec_concatz pattern can be used effectively. For example as below + +typedef short v8hi __attribute__ ((vector_size (16))); +typedef short v16hi __attribute__ ((vector_size (32))); +v8hi a, b; + +v16hi vec_initv16hiv8hi () +{ + return __builtin_shufflevector (a, b, 0, 8, 1, 9, 2, 10, 3, 11, 4, 12, 5, 13, 6, 14, 7, 15); +} + +Before this patch: + +vec_initv16hiv8hi: + addi.d $r3,$r3,-64 + .cfi_def_cfa_offset 64 + xvrepli.h $xr0,0 + la.local $r12,.LANCHOR0 + xvst $xr0,$r3,0 + xvst $xr0,$r3,32 + vld $vr0,$r12,0 + vst $vr0,$r3,0 + vld $vr0,$r12,16 + vst $vr0,$r3,32 + xvld $xr1,$r3,32 + xvld $xr2,$r3,32 + xvld $xr0,$r3,0 + xvilvh.h $xr0,$xr1,$xr0 + xvld $xr1,$r3,0 + xvilvl.h $xr1,$xr2,$xr1 + addi.d $r3,$r3,64 + .cfi_def_cfa_offset 0 + xvpermi.q $xr0,$xr1,32 + jr $r1 + +After this patch: + +vec_initv16hiv8hi: + la.local $r12,.LANCHOR0 + vld $vr0,$r12,32 + vld $vr2,$r12,48 + xvilvh.h $xr1,$xr2,$xr0 + xvilvl.h $xr0,$xr2,$xr0 + xvpermi.q $xr1,$xr0,32 + xvst $xr1,$r4,0 + jr $r1 + +gcc/ChangeLog: + + * config/loongarch/lasx.md (vec_initv32qiv16qi): Rename to .. + (vec_init<mode><lasxhalf>): .. this, and extend to mode. + (@vec_concatz<mode>): New insn pattern. + * config/loongarch/loongarch.cc (loongarch_expand_vector_group_init): + Handle VALS containing two vectors. + +gcc/testsuite/ChangeLog: + + * gcc.target/loongarch/vector/lasx/lasx-vec-init-2.c: New test. +--- + gcc/config/loongarch/lasx.md | 26 +++++++- + gcc/config/loongarch/loongarch.cc | 44 +++++++++++-- + .../loongarch/vector/lasx/lasx-vec-init-2.c | 65 +++++++++++++++++++ + 3 files changed, 128 insertions(+), 7 deletions(-) + create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-vec-init-2.c + +diff --git a/gcc/config/loongarch/lasx.md b/gcc/config/loongarch/lasx.md +index b4aa8e261..803c5dd93 100644 +--- a/gcc/config/loongarch/lasx.md ++++ b/gcc/config/loongarch/lasx.md +@@ -465,6 +465,11 @@ + (V16HI "w") + (V32QI "w")) + ++;; Half modes of all LASX vector modes, in lower-case. ++(define_mode_attr lasxhalf (V32QI "v16qi") (V16HI "v8hi") ++ (V8SI "v4si") (V4DI "v2di") ++ (V8SF "v4sf") (V4DF "v2df")) ++ + (define_expand "vec_init<mode><unitmode>" + (match_operand:LASX 0 "register_operand") + (match_operand:LASX 1 "") +@@ -474,9 +479,9 @@ + DONE; + }) + +-(define_expand "vec_initv32qiv16qi" +- (match_operand:V32QI 0 "register_operand") +- (match_operand:V16QI 1 "") ++(define_expand "vec_init<mode><lasxhalf>" ++ (match_operand:LASX 0 "register_operand") ++ (match_operand:<VHMODE256_ALL> 1 "") + "ISA_HAS_LASX" + { + loongarch_expand_vector_group_init (operands0, operands1); +@@ -577,6 +582,21 @@ + (set_attr "type" "simd_insert") + (set_attr "mode" "<MODE>")) + ++(define_insn "@vec_concatz<mode>" ++ (set (match_operand:LASX 0 "register_operand" "=f") ++ (vec_concat:LASX ++ (match_operand:<VHMODE256_ALL> 1 "nonimmediate_operand") ++ (match_operand:<VHMODE256_ALL> 2 "const_0_operand"))) ++ "ISA_HAS_LASX" ++{ ++ if (MEM_P (operands1)) ++ return "vld\t%w0,%1"; ++ else ++ return "vori.b\t%w0,%w1,0"; ++} ++ (set_attr "type" "simd_splat") ++ (set_attr "mode" "<MODE>")) ++ + (define_insn "vec_concat<mode>" + (set (match_operand:LASX 0 "register_operand" "=f") + (vec_concat:LASX +diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc +index ddb32cea2..fccdc21a8 100644 +--- a/gcc/config/loongarch/loongarch.cc ++++ b/gcc/config/loongarch/loongarch.cc +@@ -9842,10 +9842,46 @@ loongarch_gen_const_int_vector_shuffle (machine_mode mode, int val) + void + loongarch_expand_vector_group_init (rtx target, rtx vals) + { +- rtx ops2 = { force_reg (E_V16QImode, XVECEXP (vals, 0, 0)), +- force_reg (E_V16QImode, XVECEXP (vals, 0, 1)) }; +- emit_insn (gen_rtx_SET (target, gen_rtx_VEC_CONCAT (E_V32QImode, ops0, +- ops1))); ++ machine_mode vmode = GET_MODE (target); ++ machine_mode half_mode = VOIDmode; ++ rtx low = XVECEXP (vals, 0, 0); ++ rtx high = XVECEXP (vals, 0, 1); ++ ++ switch (vmode) ++ { ++ case E_V32QImode: ++ half_mode = V16QImode; ++ break; ++ case E_V16HImode: ++ half_mode = V8HImode; ++ break; ++ case E_V8SImode: ++ half_mode = V4SImode; ++ break; ++ case E_V4DImode: ++ half_mode = V2DImode; ++ break; ++ case E_V8SFmode: ++ half_mode = V4SFmode; ++ break; ++ case E_V4DFmode: ++ half_mode = V2DFmode; ++ break; ++ default: ++ gcc_unreachable (); ++ } ++ ++ if (high == CONST0_RTX (half_mode)) ++ emit_insn (gen_vec_concatz (vmode, target, low, high)); ++ else ++ { ++ if (!register_operand (low, half_mode)) ++ low = force_reg (half_mode, low); ++ if (!register_operand (high, half_mode)) ++ high = force_reg (half_mode, high); ++ emit_insn (gen_rtx_SET (target, ++ gen_rtx_VEC_CONCAT (vmode, low, high))); ++ } + } + + /* Expand initialization of a vector which has all same elements. */ +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-vec-init-2.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-vec-init-2.c +new file mode 100644 +index 000000000..7592198c4 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-vec-init-2.c +@@ -0,0 +1,65 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O3 -fno-vect-cost-model -mlasx" } */ ++/* { dg-final { scan-assembler-times "vld" 12 } } */ ++ ++ ++typedef char v16qi __attribute__ ((vector_size (16))); ++typedef char v32qi __attribute__ ((vector_size (32))); ++ ++typedef short v8hi __attribute__ ((vector_size (16))); ++typedef short v16hi __attribute__ ((vector_size (32))); ++ ++typedef int v4si __attribute__ ((vector_size (16))); ++typedef int v8si __attribute__ ((vector_size (32))); ++ ++typedef long v2di __attribute__ ((vector_size (16))); ++typedef long v4di __attribute__ ((vector_size (32))); ++ ++typedef float v4sf __attribute__ ((vector_size (16))); ++typedef float v8sf __attribute__ ((vector_size (32))); ++ ++typedef double v2df __attribute__ ((vector_size (16))); ++typedef double v4df __attribute__ ((vector_size (32))); ++ ++v16qi a_qi, b_qi; ++v8hi a_hi, b_hi; ++v4si a_si, b_si; ++v2di a_di, b_di; ++v4sf a_sf, b_sf; ++v2df a_df, b_df; ++ ++v32qi ++foo_v32qi () ++{ ++ return __builtin_shufflevector (a_qi, b_qi, 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23, 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31); ++} ++ ++v16hi ++foo_v16qi () ++{ ++ return __builtin_shufflevector (a_hi, b_hi, 0, 8, 1, 9, 2, 10, 3, 11, 4, 12, 5, 13, 6, 14, 7, 15); ++} ++ ++v8si ++foo_v8si () ++{ ++ return __builtin_shufflevector (a_si, b_si, 0, 4, 1, 5, 2, 6, 3, 7); ++} ++ ++v4di ++foo_v4di () ++{ ++ return __builtin_shufflevector (a_di, b_di, 0, 2, 1, 3); ++} ++ ++v8sf ++foo_v8sf () ++{ ++ return __builtin_shufflevector (a_sf, b_sf, 0, 4, 1, 5, 2, 6, 3, 7); ++} ++ ++v4df ++foo_v4df () ++{ ++ return __builtin_shufflevector (a_df, b_df, 0, 2, 1, 3); ++} +-- +2.43.0 +
View file
_service:tar_scm:0102-aarch64-Tweak-aarch64-save-restore-callee-saves.patch
Deleted
@@ -1,225 +0,0 @@ -From 187861af7c51db9eddc6f954b589c121b210fc74 Mon Sep 17 00:00:00 2001 -From: Richard Sandiford <richard.sandiford@arm.com> -Date: Tue, 12 Sep 2023 16:08:50 +0100 -Subject: PATCH aarch64: Tweak aarch64_save/restore_callee_saves - -aarch64_save_callee_saves and aarch64_restore_callee_saves took -a parameter called start_offset that gives the offset of the -bottom of the saved register area from the current stack pointer. -However, it's more convenient for later patches if we use the -bottom of the entire frame as the reference point, rather than -the bottom of the saved registers. - -Doing that removes the need for the callee_offset field. -Other than that, this is not a win on its own. It only really -makes sense in combination with the follow-on patches. - -gcc/ - * config/aarch64/aarch64.h (aarch64_frame::callee_offset): Delete. - * config/aarch64/aarch64.cc (aarch64_layout_frame): Remove - callee_offset handling. - (aarch64_save_callee_saves): Replace the start_offset parameter - with a bytes_below_sp parameter. - (aarch64_restore_callee_saves): Likewise. - (aarch64_expand_prologue): Update accordingly. - (aarch64_expand_epilogue): Likewise. ---- - gcc/config/aarch64/aarch64.cc | 56 +++++++++++++++++------------------ - gcc/config/aarch64/aarch64.h | 4 --- - 2 files changed, 28 insertions(+), 32 deletions(-) - -diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc -index c7d84245fbfc..e79551af41df 100644 ---- a/gcc/config/aarch64/aarch64.cc -+++ b/gcc/config/aarch64/aarch64.cc -@@ -8343,7 +8343,6 @@ aarch64_layout_frame (void) - frame.final_adjust = 0; - frame.callee_adjust = 0; - frame.sve_callee_adjust = 0; -- frame.callee_offset = 0; - - frame.wb_pop_candidate1 = frame.wb_push_candidate1; - frame.wb_pop_candidate2 = frame.wb_push_candidate2; -@@ -8411,7 +8410,6 @@ aarch64_layout_frame (void) - stp reg1, reg2, sp, bytes_below_saved_regs - stp reg3, reg4, sp, bytes_below_saved_regs + 16 */ - frame.initial_adjust = frame.frame_size; -- frame.callee_offset = const_below_saved_regs; - } - else if (saves_below_hard_fp_p - && known_eq (frame.saved_regs_size, -@@ -8758,12 +8756,13 @@ aarch64_add_cfa_expression (rtx_insn *insn, rtx reg, - } - - /* Emit code to save the callee-saved registers from register number START -- to LIMIT to the stack at the location starting at offset START_OFFSET, -- skipping any write-back candidates if SKIP_WB is true. HARD_FP_VALID_P -- is true if the hard frame pointer has been set up. */ -+ to LIMIT to the stack. The stack pointer is currently BYTES_BELOW_SP -+ bytes above the bottom of the static frame. Skip any write-back -+ candidates if SKIP_WB is true. HARD_FP_VALID_P is true if the hard -+ frame pointer has been set up. */ - - static void --aarch64_save_callee_saves (poly_int64 start_offset, -+aarch64_save_callee_saves (poly_int64 bytes_below_sp, - unsigned start, unsigned limit, bool skip_wb, - bool hard_fp_valid_p) - { -@@ -8791,7 +8790,9 @@ aarch64_save_callee_saves (poly_int64 start_offset, - - machine_mode mode = aarch64_reg_save_mode (regno); - reg = gen_rtx_REG (mode, regno); -- offset = start_offset + frame.reg_offsetregno; -+ offset = (frame.reg_offsetregno -+ + frame.bytes_below_saved_regs -+ - bytes_below_sp); - rtx base_rtx = stack_pointer_rtx; - poly_int64 sp_offset = offset; - -@@ -8802,9 +8803,7 @@ aarch64_save_callee_saves (poly_int64 start_offset, - else if (GP_REGNUM_P (regno) - && (!offset.is_constant (&const_offset) || const_offset >= 512)) - { -- gcc_assert (known_eq (start_offset, 0)); -- poly_int64 fp_offset -- = frame.below_hard_fp_saved_regs_size; -+ poly_int64 fp_offset = frame.bytes_below_hard_fp - bytes_below_sp; - if (hard_fp_valid_p) - base_rtx = hard_frame_pointer_rtx; - else -@@ -8868,12 +8867,13 @@ aarch64_save_callee_saves (poly_int64 start_offset, - } - - /* Emit code to restore the callee registers from register number START -- up to and including LIMIT. Restore from the stack offset START_OFFSET, -- skipping any write-back candidates if SKIP_WB is true. Write the -- appropriate REG_CFA_RESTORE notes into CFI_OPS. */ -+ up to and including LIMIT. The stack pointer is currently BYTES_BELOW_SP -+ bytes above the bottom of the static frame. Skip any write-back -+ candidates if SKIP_WB is true. Write the appropriate REG_CFA_RESTORE -+ notes into CFI_OPS. */ - - static void --aarch64_restore_callee_saves (poly_int64 start_offset, unsigned start, -+aarch64_restore_callee_saves (poly_int64 bytes_below_sp, unsigned start, - unsigned limit, bool skip_wb, rtx *cfi_ops) - { - aarch64_frame &frame = cfun->machine->frame; -@@ -8899,7 +8899,9 @@ aarch64_restore_callee_saves (poly_int64 start_offset, unsigned start, - - machine_mode mode = aarch64_reg_save_mode (regno); - reg = gen_rtx_REG (mode, regno); -- offset = start_offset + frame.reg_offsetregno; -+ offset = (frame.reg_offsetregno -+ + frame.bytes_below_saved_regs -+ - bytes_below_sp); - rtx base_rtx = stack_pointer_rtx; - if (mode == VNx2DImode && BYTES_BIG_ENDIAN) - aarch64_adjust_sve_callee_save_base (mode, base_rtx, anchor_reg, -@@ -9675,8 +9677,6 @@ aarch64_expand_prologue (void) - HOST_WIDE_INT callee_adjust = frame.callee_adjust; - poly_int64 final_adjust = frame.final_adjust; - poly_int64 sve_callee_adjust = frame.sve_callee_adjust; -- poly_int64 below_hard_fp_saved_regs_size -- = frame.below_hard_fp_saved_regs_size; - unsigned reg1 = frame.wb_push_candidate1; - unsigned reg2 = frame.wb_push_candidate2; - bool emit_frame_chain = frame.emit_frame_chain; -@@ -9752,8 +9752,8 @@ aarch64_expand_prologue (void) - - frame.hard_fp_offset); - gcc_assert (known_ge (chain_offset, 0)); - -- /* The offset of the bottom of the save area from the current SP. */ -- poly_int64 saved_regs_offset = chain_offset - below_hard_fp_saved_regs_size; -+ /* The offset of the current SP from the bottom of the static frame. */ -+ poly_int64 bytes_below_sp = frame_size - initial_adjust - callee_adjust; - - if (emit_frame_chain) - { -@@ -9761,7 +9761,7 @@ aarch64_expand_prologue (void) - { - reg1 = R29_REGNUM; - reg2 = R30_REGNUM; -- aarch64_save_callee_saves (saved_regs_offset, reg1, reg2, -+ aarch64_save_callee_saves (bytes_below_sp, reg1, reg2, - false, false); - } - else -@@ -9801,7 +9801,7 @@ aarch64_expand_prologue (void) - emit_insn (gen_stack_tie (stack_pointer_rtx, hard_frame_pointer_rtx)); - } - -- aarch64_save_callee_saves (saved_regs_offset, R0_REGNUM, R30_REGNUM, -+ aarch64_save_callee_saves (bytes_below_sp, R0_REGNUM, R30_REGNUM, - callee_adjust != 0 || emit_frame_chain, - emit_frame_chain); - if (maybe_ne (sve_callee_adjust, 0)) -@@ -9811,16 +9811,17 @@ aarch64_expand_prologue (void) - aarch64_allocate_and_probe_stack_space (tmp1_rtx, tmp0_rtx, - sve_callee_adjust, - !frame_pointer_needed, false); -- saved_regs_offset += sve_callee_adjust; -+ bytes_below_sp -= sve_callee_adjust; - } -- aarch64_save_callee_saves (saved_regs_offset, P0_REGNUM, P15_REGNUM, -+ aarch64_save_callee_saves (bytes_below_sp, P0_REGNUM, P15_REGNUM, - false, emit_frame_chain); -- aarch64_save_callee_saves (saved_regs_offset, V0_REGNUM, V31_REGNUM, -+ aarch64_save_callee_saves (bytes_below_sp, V0_REGNUM, V31_REGNUM, - callee_adjust != 0 || emit_frame_chain, - emit_frame_chain); - - /* We may need to probe the final adjustment if it is larger than the guard - that is assumed by the called. */ -+ gcc_assert (known_eq (bytes_below_sp, final_adjust)); - aarch64_allocate_and_probe_stack_space (tmp1_rtx, tmp0_rtx, final_adjust, - !frame_pointer_needed, true); - } -@@ -9855,7 +9856,6 @@ aarch64_expand_epilogue (bool for_sibcall) - poly_int64 initial_adjust = frame.initial_adjust; - HOST_WIDE_INT callee_adjust = frame.callee_adjust; - poly_int64 final_adjust = frame.final_adjust; -- poly_int64 callee_offset = frame.callee_offset; - poly_int64 sve_callee_adjust = frame.sve_callee_adjust; - poly_int64 bytes_below_hard_fp = frame.bytes_below_hard_fp; - unsigned reg1 = frame.wb_pop_candidate1; -@@ -9925,9 +9925,9 @@ aarch64_expand_epilogue (bool for_sibcall) - - /* Restore the vector registers before the predicate registers, - so that we can use P4 as a temporary for big-endian SVE frames. */ -- aarch64_restore_callee_saves (callee_offset, V0_REGNUM, V31_REGNUM, -+ aarch64_restore_callee_saves (final_adjust, V0_REGNUM, V31_REGNUM, - callee_adjust != 0, &cfi_ops); -- aarch64_restore_callee_saves (callee_offset, P0_REGNUM, P15_REGNUM, -+ aarch64_restore_callee_saves (final_adjust, P0_REGNUM, P15_REGNUM, - false, &cfi_ops); - if (maybe_ne (sve_callee_adjust, 0)) - aarch64_add_sp (NULL_RTX, NULL_RTX, sve_callee_adjust, true); -@@ -9935,7 +9935,7 @@ aarch64_expand_epilogue (bool for_sibcall) - /* When shadow call stack is enabled, the scs_pop in the epilogue will - restore x30, we don't need to restore x30 again in the traditional - way. */ -- aarch64_restore_callee_saves (callee_offset - sve_callee_adjust, -+ aarch64_restore_callee_saves (final_adjust + sve_callee_adjust, - R0_REGNUM, last_gpr, - callee_adjust != 0, &cfi_ops); - -diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h -index de68ff7202fc..94fca4b94716 100644 ---- a/gcc/config/aarch64/aarch64.h -+++ b/gcc/config/aarch64/aarch64.h -@@ -907,10 +907,6 @@ struct GTY (()) aarch64_frame - It is zero when no push is used. */ - HOST_WIDE_INT callee_adjust; - -- /* The offset from SP to the callee-save registers after initial_adjust. -- It may be non-zero if no push is used (ie. callee_adjust == 0). */ -- poly_int64 callee_offset; -- - /* The size of the stack adjustment before saving or after restoring - SVE registers. */ - poly_int64 sve_callee_adjust; --- -2.43.5 -
View file
_service:tar_scm:0103-LoongArch-Handle-ISA-evolution-switches-along-with-o.patch
Added
@@ -0,0 +1,533 @@ +From 901663758281d4ce87a75e4d6e45de621b65f0cb Mon Sep 17 00:00:00 2001 +From: Yang Yujie <yangyujie@loongson.cn> +Date: Mon, 8 Jan 2024 09:14:07 +0800 +Subject: PATCH 103/188 LoongArch: Handle ISA evolution switches along with + other options + +gcc/ChangeLog: + + * config/loongarch/genopts/genstr.sh: Prepend the isa_evolution + variable with the common la_ prefix. + * config/loongarch/genopts/loongarch.opt.in: Mark ISA evolution + flags as saved using TargetVariable. + * config/loongarch/loongarch.opt: Same. + * config/loongarch/loongarch-def.h: Define evolution_set to + mark changes to the -march default. + * config/loongarch/loongarch-driver.cc: Same. + * config/loongarch/loongarch-opts.cc: Same. + * config/loongarch/loongarch-opts.h: Define and use ISA evolution + conditions around the la_target structure. + * config/loongarch/loongarch.cc: Same. + * config/loongarch/loongarch.md: Same. + * config/loongarch/loongarch-builtins.cc: Same. + * config/loongarch/loongarch-c.cc: Same. + * config/loongarch/lasx.md: Same. + * config/loongarch/lsx.md: Same. + * config/loongarch/sync.md: Same. +--- + gcc/config/loongarch/genopts/genstr.sh | 2 +- + gcc/config/loongarch/genopts/loongarch.opt.in | 6 ++--- + gcc/config/loongarch/lasx.md | 4 ++-- + gcc/config/loongarch/loongarch-builtins.cc | 6 ++--- + gcc/config/loongarch/loongarch-c.cc | 2 +- + gcc/config/loongarch/loongarch-def.h | 5 +++- + gcc/config/loongarch/loongarch-driver.cc | 5 ++-- + gcc/config/loongarch/loongarch-opts.cc | 17 ++++++++++++- + gcc/config/loongarch/loongarch-opts.h | 24 +++++++++++++++---- + gcc/config/loongarch/loongarch.cc | 24 ++++++++----------- + gcc/config/loongarch/loongarch.md | 12 +++++----- + gcc/config/loongarch/loongarch.opt | 16 ++++++------- + gcc/config/loongarch/lsx.md | 4 ++-- + gcc/config/loongarch/sync.md | 22 ++++++++--------- + 14 files changed, 90 insertions(+), 59 deletions(-) + +diff --git a/gcc/config/loongarch/genopts/genstr.sh b/gcc/config/loongarch/genopts/genstr.sh +index bcc616e98..391eca121 100755 +--- a/gcc/config/loongarch/genopts/genstr.sh ++++ b/gcc/config/loongarch/genopts/genstr.sh +@@ -107,7 +107,7 @@ EOF + print("") + print("m"$3) + gsub(/-/, "_", $3) +- print("Target Mask(ISA_"toupper($3)") Var(isa_evolution)") ++ print("Target Mask(ISA_"toupper($3)") Var(la_isa_evolution)") + $1=""; $2=""; $3="" + sub(/^ */, "", $0) + print($0) +diff --git a/gcc/config/loongarch/genopts/loongarch.opt.in b/gcc/config/loongarch/genopts/loongarch.opt.in +index 102202b03..a866dab84 100644 +--- a/gcc/config/loongarch/genopts/loongarch.opt.in ++++ b/gcc/config/loongarch/genopts/loongarch.opt.in +@@ -259,6 +259,6 @@ default value is 4. + ; Features added during ISA evolution. This concept is different from ISA + ; extension, read Section 1.5 of LoongArch v1.10 Volume 1 for the + ; explanation. These features may be implemented and enumerated with +-; CPUCFG independantly, so we use bit flags to specify them. +-Variable +-HOST_WIDE_INT isa_evolution = 0 ++; CPUCFG independently, so we use bit flags to specify them. ++TargetVariable ++HOST_WIDE_INT la_isa_evolution = 0 +diff --git a/gcc/config/loongarch/lasx.md b/gcc/config/loongarch/lasx.md +index 803c5dd93..fdfd65e4a 100644 +--- a/gcc/config/loongarch/lasx.md ++++ b/gcc/config/loongarch/lasx.md +@@ -1540,7 +1540,7 @@ + (set (match_operand:FLASX 0 "register_operand" "=f") + (unspec:FLASX (match_operand:FLASX 1 "register_operand" "f") + UNSPEC_LASX_XVFRECIPE)) +- "ISA_HAS_LASX && TARGET_FRECIPE" ++ "ISA_HAS_LASX && ISA_HAS_FRECIPE" + "xvfrecipe.<flasxfmt>\t%u0,%u1" + (set_attr "type" "simd_fdiv") + (set_attr "mode" "<MODE>")) +@@ -1573,7 +1573,7 @@ + (set (match_operand:FLASX 0 "register_operand" "=f") + (unspec:FLASX (match_operand:FLASX 1 "register_operand" "f") + UNSPEC_LASX_XVFRSQRTE)) +- "ISA_HAS_LASX && TARGET_FRECIPE" ++ "ISA_HAS_LASX && ISA_HAS_FRECIPE" + "xvfrsqrte.<flasxfmt>\t%u0,%u1" + (set_attr "type" "simd_fdiv") + (set_attr "mode" "<MODE>")) +diff --git a/gcc/config/loongarch/loongarch-builtins.cc b/gcc/config/loongarch/loongarch-builtins.cc +index 85849ed29..e3b4dbc52 100644 +--- a/gcc/config/loongarch/loongarch-builtins.cc ++++ b/gcc/config/loongarch/loongarch-builtins.cc +@@ -120,9 +120,9 @@ struct loongarch_builtin_description + AVAIL_ALL (hard_float, TARGET_HARD_FLOAT_ABI) + AVAIL_ALL (lsx, ISA_HAS_LSX) + AVAIL_ALL (lasx, ISA_HAS_LASX) +-AVAIL_ALL (frecipe, TARGET_FRECIPE && TARGET_HARD_FLOAT_ABI) +-AVAIL_ALL (lsx_frecipe, ISA_HAS_LSX && TARGET_FRECIPE) +-AVAIL_ALL (lasx_frecipe, ISA_HAS_LASX && TARGET_FRECIPE) ++AVAIL_ALL (frecipe, ISA_HAS_FRECIPE && TARGET_HARD_FLOAT_ABI) ++AVAIL_ALL (lsx_frecipe, ISA_HAS_LSX && ISA_HAS_FRECIPE) ++AVAIL_ALL (lasx_frecipe, ISA_HAS_LASX && ISA_HAS_FRECIPE) + + /* Construct a loongarch_builtin_description from the given arguments. + +diff --git a/gcc/config/loongarch/loongarch-c.cc b/gcc/config/loongarch/loongarch-c.cc +index a89477a74..df2a482ad 100644 +--- a/gcc/config/loongarch/loongarch-c.cc ++++ b/gcc/config/loongarch/loongarch-c.cc +@@ -102,7 +102,7 @@ loongarch_cpu_cpp_builtins (cpp_reader *pfile) + else + builtin_define ("__loongarch_frlen=0"); + +- if (TARGET_HARD_FLOAT && TARGET_FRECIPE) ++ if (TARGET_HARD_FLOAT && ISA_HAS_FRECIPE) + builtin_define ("__loongarch_frecipe"); + + if (ISA_HAS_LSX) +diff --git a/gcc/config/loongarch/loongarch-def.h b/gcc/config/loongarch/loongarch-def.h +index f8f36f0e2..9e5eee0e2 100644 +--- a/gcc/config/loongarch/loongarch-def.h ++++ b/gcc/config/loongarch/loongarch-def.h +@@ -132,8 +132,11 @@ struct loongarch_isa + + Using int64_t instead of HOST_WIDE_INT for C compatibility. */ + int64_t evolution; ++ int64_t evolution_set; + +- loongarch_isa () : base (0), fpu (0), simd (0), evolution (0) {} ++ loongarch_isa () : ++ base (0), fpu (0), simd (0), evolution (0), evolution_set (0) ++ {} + loongarch_isa base_ (int _base) { base = _base; return *this; } + loongarch_isa fpu_ (int _fpu) { fpu = _fpu; return *this; } + loongarch_isa simd_ (int _simd) { simd = _simd; return *this; } +diff --git a/gcc/config/loongarch/loongarch-driver.cc b/gcc/config/loongarch/loongarch-driver.cc +index b3626984d..b84a6eaf7 100644 +--- a/gcc/config/loongarch/loongarch-driver.cc ++++ b/gcc/config/loongarch/loongarch-driver.cc +@@ -42,9 +42,10 @@ extern struct obstack opts_obstack; + const char* + la_driver_init (int argc ATTRIBUTE_UNUSED, const char **argv ATTRIBUTE_UNUSED) + { +- /* Initialize all fields of la_target to -1 */ ++ /* Initialize all fields of la_target. */ + loongarch_init_target (&la_target, M_OPT_UNSET, M_OPT_UNSET, M_OPT_UNSET, +- M_OPT_UNSET, M_OPT_UNSET, M_OPT_UNSET, M_OPT_UNSET); ++ M_OPT_UNSET, M_OPT_UNSET, M_OPT_UNSET, M_OPT_UNSET, ++ 0, 0); + return ""; + } + +diff --git a/gcc/config/loongarch/loongarch-opts.cc b/gcc/config/loongarch/loongarch-opts.cc +index d31becc67..935d09f45 100644 +--- a/gcc/config/loongarch/loongarch-opts.cc ++++ b/gcc/config/loongarch/loongarch-opts.cc +@@ -140,7 +140,9 @@ static int with_default_simd = 0; + void + loongarch_init_target (struct loongarch_target *target, + int cpu_arch, int cpu_tune, int fpu, int simd, +- int abi_base, int abi_ext, int cmodel) ++ int abi_base, int abi_ext, int cmodel, ++ HOST_WIDE_INT isa_evolution, ++ HOST_WIDE_INT isa_evolution_set) + { + if (!target) + return; +@@ -148,6 +150,8 @@ loongarch_init_target (struct loongarch_target *target, + target->cpu_tune = cpu_tune; + target->isa.fpu = fpu; + target->isa.simd = simd; ++ target->isa.evolution = isa_evolution; ++ target->isa.evolution_set = isa_evolution_set; + target->abi.base = abi_base; + target->abi.ext = abi_ext; + target->cmodel = cmodel; +@@ -184,6 +188,9 @@ loongarch_config_target (struct loongarch_target *target, + M_OPT_ABSENT (target->abi.base) ? 0 : 1, + }; + ++ int64_t isa_evolution = target->isa.evolution; ++ int64_t isa_evolution_set = target->isa.evolution_set; ++ + /* 1. Target ABI */ + if (constrained.abi_base) + t.abi.base = target->abi.base; +@@ -394,6 +401,13 @@ config_target_isa: + } + } + ++ /* Apply the ISA evolution feature switches from the user. */ ++ HOST_WIDE_INT isa_evolution_orig = t.isa.evolution; ++ t.isa.evolution &= ~(~isa_evolution & isa_evolution_set); ++ t.isa.evolution |= isa_evolution & isa_evolution_set; ++ ++ /* evolution_set means "what's different from the -march default". */ ++ t.isa.evolution_set = isa_evolution_orig ^ t.isa.evolution; + + /* 4. ABI-ISA compatibility */ + /* Note: +@@ -774,4 +788,5 @@ loongarch_update_gcc_opt_status (struct loongarch_target *target, + /* status of -mfpu */ + opts->x_la_opt_fpu = target->isa.fpu; + opts->x_la_opt_simd = target->isa.simd; ++ opts->x_la_isa_evolution = target->isa.evolution; + } +diff --git a/gcc/config/loongarch/loongarch-opts.h b/gcc/config/loongarch/loongarch-opts.h +index 8491bee0d..204338553 100644 +--- a/gcc/config/loongarch/loongarch-opts.h ++++ b/gcc/config/loongarch/loongarch-opts.h +@@ -34,7 +34,9 @@ extern struct loongarch_target la_target; + void + loongarch_init_target (struct loongarch_target *target, + int cpu_arch, int cpu_tune, int fpu, int simd, +- int abi_base, int abi_ext, int cmodel); ++ int abi_base, int abi_ext, int cmodel, ++ HOST_WIDE_INT isa_evolutions, ++ HOST_WIDE_INT isa_evolutions_set); + + + /* Handler for "-m" option combinations, +@@ -82,9 +84,23 @@ struct loongarch_flags { + || la_target.abi.base == ABI_BASE_LP64F \ + || la_target.abi.base == ABI_BASE_LP64S) + +-#define ISA_HAS_LSX (la_target.isa.simd == ISA_EXT_SIMD_LSX \ +- || la_target.isa.simd == ISA_EXT_SIMD_LASX) +-#define ISA_HAS_LASX (la_target.isa.simd == ISA_EXT_SIMD_LASX) ++#define ISA_HAS_LSX \ ++ (la_target.isa.simd == ISA_EXT_SIMD_LSX \ ++ || la_target.isa.simd == ISA_EXT_SIMD_LASX) ++ ++#define ISA_HAS_LASX \ ++ (la_target.isa.simd == ISA_EXT_SIMD_LASX) ++ ++#define ISA_HAS_FRECIPE \ ++ (la_target.isa.evolution & OPTION_MASK_ISA_FRECIPE) ++#define ISA_HAS_DIV32 \ ++ (la_target.isa.evolution & OPTION_MASK_ISA_DIV32) ++#define ISA_HAS_LAM_BH \ ++ (la_target.isa.evolution & OPTION_MASK_ISA_LAM_BH) ++#define ISA_HAS_LAMCAS \ ++ (la_target.isa.evolution & OPTION_MASK_ISA_LAMCAS) ++#define ISA_HAS_LD_SEQ_SA \ ++ (la_target.isa.evolution & OPTION_MASK_ISA_LD_SEQ_SA) + + /* TARGET_ macros for use in *.md template conditionals */ + #define TARGET_uARCH_LA464 (la_target.cpu_tune == CPU_LA464) +diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc +index fccdc21a8..b0bb67d60 100644 +--- a/gcc/config/loongarch/loongarch.cc ++++ b/gcc/config/loongarch/loongarch.cc +@@ -3859,7 +3859,7 @@ loongarch_rtx_costs (rtx x, machine_mode mode, int outer_code, + else + { + *total = loongarch_cost->int_div_si; +- if (TARGET_64BIT && !TARGET_DIV32) ++ if (TARGET_64BIT && !ISA_HAS_DIV32) + *total += COSTS_N_INSNS (2); + } + +@@ -6107,7 +6107,7 @@ loongarch_print_operand (FILE *file, rtx op, int letter) + if (loongarch_cas_failure_memorder_needs_acquire ( + memmodel_from_int (INTVAL (op)))) + fputs ("dbar\t0b10100", file); +- else if (!TARGET_LD_SEQ_SA) ++ else if (!ISA_HAS_LD_SEQ_SA) + fputs ("dbar\t0x700", file); + break; + +@@ -7509,7 +7509,8 @@ loongarch_option_override_internal (struct gcc_options *opts, + loongarch_init_target (&la_target, + la_opt_cpu_arch, la_opt_cpu_tune, la_opt_fpu, + la_opt_simd, la_opt_abi_base, la_opt_abi_ext, +- la_opt_cmodel); ++ la_opt_cmodel, opts->x_la_isa_evolution, ++ opts_set->x_la_isa_evolution); + + /* Handle target-specific options: compute defaults/conflicts etc. */ + loongarch_config_target (&la_target, NULL, 0); +@@ -7550,11 +7551,6 @@ loongarch_option_override_internal (struct gcc_options *opts, + if (loongarch_branch_cost == 0) + loongarch_branch_cost = loongarch_cost->branch_cost; + +- /* If the user hasn't disabled a feature added during ISA evolution, +- use the processor's default. */ +- isa_evolution |= (la_target.isa.evolution & +- ~global_options_set.x_isa_evolution); +- + /* Enable sw prefetching at -O3 and higher. */ + if (opts->x_flag_prefetch_loop_arrays < 0 + && (opts->x_optimize >= 3 || opts->x_flag_profile_use) +@@ -7685,7 +7681,7 @@ loongarch_option_override_internal (struct gcc_options *opts, + } + if (loongarch_recip) + recip_mask |= RECIP_MASK_ALL; +- if (!TARGET_FRECIPE) ++ if (!ISA_HAS_FRECIPE) + recip_mask = RECIP_MASK_NONE; + } + +@@ -10875,11 +10871,11 @@ loongarch_asm_code_end (void) + loongarch_cpu_strings la_target.cpu_tune); + fprintf (asm_out_file, "%s Base ISA: %s\n", ASM_COMMENT_START, + loongarch_isa_base_strings la_target.isa.base); +- DUMP_FEATURE (TARGET_FRECIPE); +- DUMP_FEATURE (TARGET_DIV32); +- DUMP_FEATURE (TARGET_LAM_BH); +- DUMP_FEATURE (TARGET_LAMCAS); +- DUMP_FEATURE (TARGET_LD_SEQ_SA); ++ DUMP_FEATURE (ISA_HAS_FRECIPE); ++ DUMP_FEATURE (ISA_HAS_DIV32); ++ DUMP_FEATURE (ISA_HAS_LAM_BH); ++ DUMP_FEATURE (ISA_HAS_LAMCAS); ++ DUMP_FEATURE (ISA_HAS_LD_SEQ_SA); + } + + fputs ("\n\n", asm_out_file); +diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md +index 02c537d4c..23653a2b0 100644 +--- a/gcc/config/loongarch/loongarch.md ++++ b/gcc/config/loongarch/loongarch.md +@@ -425,7 +425,7 @@ + + ;; A mode for anything legal as a input of a div or mod instruction. + (define_mode_iterator DIV (DI "TARGET_64BIT") +- (SI "!TARGET_64BIT || TARGET_DIV32")) ++ (SI "!TARGET_64BIT || ISA_HAS_DIV32")) + + ;; In GPR templates, a string like "mul.<d>" will expand to "mul.w" in the + ;; 32-bit version and "mul.d" in the 64-bit version. +@@ -941,7 +941,7 @@ + (set (match_operand:ANYF 0 "register_operand" "=f") + (unspec:ANYF (match_operand:ANYF 1 "register_operand" "f") + UNSPEC_RECIPE)) +- "TARGET_FRECIPE" ++ "ISA_HAS_FRECIPE" + "frecipe.<fmt>\t%0,%1" + (set_attr "type" "frecipe") + (set_attr "mode" "<UNITMODE>") +@@ -954,7 +954,7 @@ + (match_operand:GPR 2 "register_operand"))) + "" + { +- if (GET_MODE (operands0) == SImode && TARGET_64BIT && !TARGET_DIV32) ++ if (GET_MODE (operands0) == SImode && TARGET_64BIT && !ISA_HAS_DIV32) + { + rtx reg1 = gen_reg_rtx (DImode); + rtx reg2 = gen_reg_rtx (DImode); +@@ -994,7 +994,7 @@ + (sign_extend + (any_div:SI (match_operand:SI 1 "register_operand" "r,r,0") + (match_operand:SI 2 "register_operand" "r,r,r")))) +- "TARGET_64BIT && TARGET_DIV32" ++ "TARGET_64BIT && ISA_HAS_DIV32" + { + return loongarch_output_division ("<insn>.w<u>\t%0,%1,%2", operands); + } +@@ -1014,7 +1014,7 @@ + (any_div:DI (match_operand:DI 1 "register_operand" "r,r,0") + (match_operand:DI 2 "register_operand" "r,r,r")) 0) + UNSPEC_FAKE_ANY_DIV))) +- "TARGET_64BIT && !TARGET_DIV32" ++ "TARGET_64BIT && !ISA_HAS_DIV32" + { + return loongarch_output_division ("<insn>.w<u>\t%0,%1,%2", operands); + } +@@ -1197,7 +1197,7 @@ + (set (match_operand:ANYF 0 "register_operand" "=f") + (unspec:ANYF (match_operand:ANYF 1 "register_operand" "f") + UNSPEC_RSQRTE)) +- "TARGET_FRECIPE" ++ "ISA_HAS_FRECIPE" + "frsqrte.<fmt>\t%0,%1" + (set_attr "type" "frsqrte") + (set_attr "mode" "<UNITMODE>")) +diff --git a/gcc/config/loongarch/loongarch.opt b/gcc/config/loongarch/loongarch.opt +index 56f6a9564..b5a46df4e 100644 +--- a/gcc/config/loongarch/loongarch.opt ++++ b/gcc/config/loongarch/loongarch.opt +@@ -267,26 +267,26 @@ default value is 4. + ; Features added during ISA evolution. This concept is different from ISA + ; extension, read Section 1.5 of LoongArch v1.10 Volume 1 for the + ; explanation. These features may be implemented and enumerated with +-; CPUCFG independantly, so we use bit flags to specify them. +-Variable +-HOST_WIDE_INT isa_evolution = 0 ++; CPUCFG independently, so we use bit flags to specify them. ++TargetVariable ++HOST_WIDE_INT la_isa_evolution = 0 + + mfrecipe +-Target Mask(ISA_FRECIPE) Var(isa_evolution) ++Target Mask(ISA_FRECIPE) Var(la_isa_evolution) + Support frecipe.{s/d} and frsqrte.{s/d} instructions. + + mdiv32 +-Target Mask(ISA_DIV32) Var(isa_evolution) ++Target Mask(ISA_DIV32) Var(la_isa_evolution) + Support div.wu and mod.wu instructions with inputs not sign-extended. + + mlam-bh +-Target Mask(ISA_LAM_BH) Var(isa_evolution) ++Target Mask(ISA_LAM_BH) Var(la_isa_evolution) + Support am{swap/add}_db.{b/h} instructions. + + mlamcas +-Target Mask(ISA_LAMCAS) Var(isa_evolution) ++Target Mask(ISA_LAMCAS) Var(la_isa_evolution) + Support amcas_db.{b/h/w/d} instructions. + + mld-seq-sa +-Target Mask(ISA_LD_SEQ_SA) Var(isa_evolution) ++Target Mask(ISA_LD_SEQ_SA) Var(la_isa_evolution) + Do not need load-load barriers (dbar 0x700). +diff --git a/gcc/config/loongarch/lsx.md b/gcc/config/loongarch/lsx.md +index 02e89247b..612377436 100644 +--- a/gcc/config/loongarch/lsx.md ++++ b/gcc/config/loongarch/lsx.md +@@ -1479,7 +1479,7 @@ + (set (match_operand:FLSX 0 "register_operand" "=f") + (unspec:FLSX (match_operand:FLSX 1 "register_operand" "f") + UNSPEC_LSX_VFRECIPE)) +- "ISA_HAS_LSX && TARGET_FRECIPE" ++ "ISA_HAS_LSX && ISA_HAS_FRECIPE" + "vfrecipe.<flsxfmt>\t%w0,%w1" + (set_attr "type" "simd_fdiv") + (set_attr "mode" "<MODE>")) +@@ -1512,7 +1512,7 @@ + (set (match_operand:FLSX 0 "register_operand" "=f") + (unspec:FLSX (match_operand:FLSX 1 "register_operand" "f") + UNSPEC_LSX_VFRSQRTE)) +- "ISA_HAS_LSX && TARGET_FRECIPE" ++ "ISA_HAS_LSX && ISA_HAS_FRECIPE" + "vfrsqrte.<flsxfmt>\t%w0,%w1" + (set_attr "type" "simd_fdiv") + (set_attr "mode" "<MODE>")) +diff --git a/gcc/config/loongarch/sync.md b/gcc/config/loongarch/sync.md +index a678e7131..5da5c2780 100644 +--- a/gcc/config/loongarch/sync.md ++++ b/gcc/config/loongarch/sync.md +@@ -124,9 +124,9 @@ + return "ld.<size>\t%0,%1\\n\\t" + "dbar\t0x14"; + case MEMMODEL_RELAXED: +- return TARGET_LD_SEQ_SA ? "ld.<size>\t%0,%1" +- : "ld.<size>\t%0,%1\\n\\t" +- "dbar\t0x700"; ++ return ISA_HAS_LD_SEQ_SA ? "ld.<size>\t%0,%1" ++ : "ld.<size>\t%0,%1\\n\\t" ++ "dbar\t0x700"; + + default: + /* The valid memory order variants are __ATOMIC_RELAXED, __ATOMIC_SEQ_CST, +@@ -193,7 +193,7 @@ + (match_operand:SHORT 1 "reg_or_0_operand" "rJ")) + (match_operand:SI 2 "const_int_operand") ;; model + UNSPEC_SYNC_OLD_OP)) +- "TARGET_LAM_BH" ++ "ISA_HAS_LAM_BH" + "amadd%A2.<amo>\t$zero,%z1,%0" + (set (attr "length") (const_int 4))) + +@@ -230,7 +230,7 @@ + UNSPEC_SYNC_EXCHANGE)) + (set (match_dup 1) + (match_operand:SHORT 2 "register_operand" "r")) +- "TARGET_LAM_BH" ++ "ISA_HAS_LAM_BH" + "amswap%A3.<amo>\t%0,%z2,%1" + (set (attr "length") (const_int 4))) + +@@ -266,7 +266,7 @@ + (match_operand:QHWD 3 "reg_or_0_operand" "rJ") + (match_operand:SI 4 "const_int_operand") ;; mod_s + UNSPEC_COMPARE_AND_SWAP)) +- "TARGET_LAMCAS" ++ "ISA_HAS_LAMCAS" + "ori\t%0,%z2,0\n\tamcas%A4.<amo>\t%0,%z3,%1" + (set (attr "length") (const_int 8))) + +@@ -296,7 +296,7 @@ + + operands6 = mod_s; + +- if (TARGET_LAMCAS) ++ if (ISA_HAS_LAMCAS) + emit_insn (gen_atomic_cas_value_strong<mode>_amcas (operands1, operands2, + operands3, operands4, + operands6)); +@@ -422,7 +422,7 @@ + + operands6 = mod_s; + +- if (TARGET_LAMCAS) ++ if (ISA_HAS_LAMCAS) + emit_insn (gen_atomic_cas_value_strong<mode>_amcas (operands1, operands2, + operands3, operands4, + operands6)); +@@ -642,7 +642,7 @@ + (match_operand:SHORT 2 "register_operand")) + "" + { +- if (TARGET_LAM_BH) ++ if (ISA_HAS_LAM_BH) + emit_insn (gen_atomic_exchange<mode>_short (operands0, operands1, operands2, operands3)); + else + { +@@ -663,7 +663,7 @@ + (match_operand:SHORT 2 "reg_or_0_operand" "rJ")) + (match_operand:SI 3 "const_int_operand") ;; model + UNSPEC_SYNC_OLD_OP)) +- "TARGET_LAM_BH" ++ "ISA_HAS_LAM_BH" + "amadd%A3.<amo>\t%0,%z2,%1" + (set (attr "length") (const_int 4))) + +@@ -678,7 +678,7 @@ + UNSPEC_SYNC_OLD_OP)) + "" + { +- if (TARGET_LAM_BH) ++ if (ISA_HAS_LAM_BH) + emit_insn (gen_atomic_fetch_add<mode>_short (operands0, operands1, + operands2, operands3)); + else +-- +2.43.0 +
View file
_service:tar_scm:0103-SME-Remove-hip09-and-hip11-in-aarch64-cores.def-to-b.patch
Added
@@ -0,0 +1,34 @@ +From 72c48ade495ef99ef032a6c44365eb102b74888e Mon Sep 17 00:00:00 2001 +From: xiezhiheng <xiezhiheng@huawei.com> +Date: Fri, 23 Aug 2024 15:14:04 +0800 +Subject: PATCH 004/157 SME Remove hip09 and hip11 in aarch64-cores.def to + backport SME + +Will apply it in the end. +--- + gcc/config/aarch64/aarch64-cores.def | 2 -- + 1 file changed, 2 deletions(-) + +diff --git a/gcc/config/aarch64/aarch64-cores.def b/gcc/config/aarch64/aarch64-cores.def +index 601b72abb..70b11eb80 100644 +--- a/gcc/config/aarch64/aarch64-cores.def ++++ b/gcc/config/aarch64/aarch64-cores.def +@@ -130,7 +130,6 @@ AARCH64_CORE("a64fx", a64fx, a64fx, 8_2A, AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_F + + /* HiSilicon ('H') cores. */ + AARCH64_CORE("tsv110", tsv110, tsv110, 8_2A, AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_CRYPTO | AARCH64_FL_F16 | AARCH64_FL_AES | AARCH64_FL_SHA2, tsv110, 0x48, 0xd01, -1) +-AARCH64_CORE("hip09", hip09, hip09, 8_5A, AARCH64_FL_FOR_ARCH8_5 | AARCH64_FL_SVE | AARCH64_FL_I8MM | AARCH64_FL_F32MM | AARCH64_FL_F64MM | AARCH64_FL_PROFILE | AARCH64_FL_PREDRES, hip09, 0x48, 0xd02, 0x0) + + /* ARMv8.3-A Architecture Processors. */ + +@@ -173,7 +172,6 @@ AARCH64_CORE("cortex-a710", cortexa710, cortexa57, 9A, AARCH64_FL_FOR_ARCH9 | + AARCH64_CORE("cortex-x2", cortexx2, cortexa57, 9A, AARCH64_FL_FOR_ARCH9 | AARCH64_FL_SVE2_BITPERM | AARCH64_FL_MEMTAG | AARCH64_FL_I8MM | AARCH64_FL_BF16, neoversen2, 0x41, 0xd48, -1) + + AARCH64_CORE("neoverse-n2", neoversen2, cortexa57, 9A, AARCH64_FL_FOR_ARCH9 | AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_SVE2_BITPERM | AARCH64_FL_RNG | AARCH64_FL_MEMTAG | AARCH64_FL_PROFILE, neoversen2, 0x41, 0xd49, -1) +-AARCH64_CORE("hip11", hip11, hip11, 8_5A, AARCH64_FL_FOR_ARCH8_5| AARCH64_FL_SVE | AARCH64_FL_SVE2 | AARCH64_FL_F16, hip11, 0x48, 0xd22, -1) + + AARCH64_CORE("demeter", demeter, cortexa57, 9A, AARCH64_FL_FOR_ARCH9 | AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_SVE2_BITPERM | AARCH64_FL_RNG | AARCH64_FL_MEMTAG | AARCH64_FL_PROFILE, neoversev2, 0x41, 0xd4f, -1) + AARCH64_CORE("neoverse-v2", neoversev2, cortexa57, 9A, AARCH64_FL_FOR_ARCH9 | AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_SVE2_BITPERM | AARCH64_FL_RNG | AARCH64_FL_MEMTAG | AARCH64_FL_PROFILE, neoversev2, 0x41, 0xd4f, -1) +-- +2.33.0 +
View file
_service:tar_scm:0103-aarch64-Only-calculate-chain-offset-if-there-is-a-chain.patch
Deleted
@@ -1,44 +0,0 @@ -From 2b983f9064d808daf909bde1d4a13980934a7e6e Mon Sep 17 00:00:00 2001 -From: Richard Sandiford <richard.sandiford@arm.com> -Date: Tue, 12 Sep 2023 16:08:51 +0100 -Subject: PATCH aarch64: Only calculate chain_offset if there is a chain - -After previous patches, it is no longer necessary to calculate -a chain_offset in cases where there is no chain record. - -gcc/ - * config/aarch64/aarch64.cc (aarch64_expand_prologue): Move the - calculation of chain_offset into the emit_frame_chain block. ---- - gcc/config/aarch64/aarch64.cc | 10 +++++----- - 1 file changed, 5 insertions(+), 5 deletions(-) - -diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc -index e79551af41df..d71a042d6112 100644 ---- a/gcc/config/aarch64/aarch64.cc -+++ b/gcc/config/aarch64/aarch64.cc -@@ -9747,16 +9747,16 @@ aarch64_expand_prologue (void) - if (callee_adjust != 0) - aarch64_push_regs (reg1, reg2, callee_adjust); - -- /* The offset of the frame chain record (if any) from the current SP. */ -- poly_int64 chain_offset = (initial_adjust + callee_adjust -- - frame.hard_fp_offset); -- gcc_assert (known_ge (chain_offset, 0)); -- - /* The offset of the current SP from the bottom of the static frame. */ - poly_int64 bytes_below_sp = frame_size - initial_adjust - callee_adjust; - - if (emit_frame_chain) - { -+ /* The offset of the frame chain record (if any) from the current SP. */ -+ poly_int64 chain_offset = (initial_adjust + callee_adjust -+ - frame.hard_fp_offset); -+ gcc_assert (known_ge (chain_offset, 0)); -+ - if (callee_adjust == 0) - { - reg1 = R29_REGNUM; --- -2.43.5 -
View file
_service:tar_scm:0104-Backport-SME-AArch64-Cleanup-CPU-option-processing-c.patch
Added
@@ -0,0 +1,336 @@ +From 9a36ca4e9188ee402327ec908d4f6860f2ee67eb Mon Sep 17 00:00:00 2001 +From: Wilco Dijkstra <wilco.dijkstra@arm.com> +Date: Wed, 18 May 2022 16:02:12 +0100 +Subject: PATCH 005/157 BackportSME AArch64: Cleanup CPU option + processing code + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=1be715f31605976d8e4336973d3b81c5b7cea79f + +The --with-cpu/--with-arch configure option processing not only checks valid +arguments but also sets TARGET_CPU_DEFAULT with a CPU and extension bitmask. +This isn't used however since a --with-cpu is translated into a -mcpu option +which is processed as if written on the command-line (so TARGET_CPU_DEFAULT +is never accessed). + +So remove all the complex processing and bitmask, and just validate the +option. Fix a bug that always reports valid architecture extensions as invalid. +As a result the CPU processing in aarch64.c can be simplified. + +gcc/ + * config.gcc (aarch64*-*-*): Simplify --with-cpu and --with-arch + processing. Add support for architectural extensions. + * config/aarch64/aarch64.h (TARGET_CPU_DEFAULT): Remove + AARCH64_CPU_DEFAULT_FLAGS. + (TARGET_CPU_NBITS): Remove. + (TARGET_CPU_MASK): Remove. + * config/aarch64/aarch64.cc (AARCH64_CPU_DEFAULT_FLAGS): Remove define. + (get_tune_cpu): Assert CPU is always valid. + (get_arch): Assert architecture is always valid. + (aarch64_override_options): Cleanup CPU selection code and simplify logic. + (aarch64_option_restore): Remove unnecessary checks on tune. +--- + gcc/config.gcc | 43 +------------ + gcc/config/aarch64/aarch64.cc | 115 +++++++++------------------------- + gcc/config/aarch64/aarch64.h | 9 +-- + 3 files changed, 32 insertions(+), 135 deletions(-) + +diff --git a/gcc/config.gcc b/gcc/config.gcc +index 8fdde1576..3be450471 100644 +--- a/gcc/config.gcc ++++ b/gcc/config.gcc +@@ -4190,8 +4190,6 @@ case "${target}" in + pattern=AARCH64_CORE + fi + +- ext_mask=AARCH64_CPU_DEFAULT_FLAGS +- + # Find the base CPU or ARCH id in aarch64-cores.def or + # aarch64-arches.def + if x"$base_val" = x \ +@@ -4199,23 +4197,6 @@ case "${target}" in + ${srcdir}/config/aarch64/$def \ + > /dev/null; then + +- if $which = arch ; then +- base_id=`grep "^$pattern(\"$base_val\"," \ +- ${srcdir}/config/aarch64/$def | \ +- sed -e 's/^^,*, *//' | \ +- sed -e 's/,.*$//'` +- # Extract the architecture flags from aarch64-arches.def +- ext_mask=`grep "^$pattern(\"$base_val\"," \ +- ${srcdir}/config/aarch64/$def | \ +- sed -e 's/)$//' | \ +- sed -e 's/^.*,//'` +- else +- base_id=`grep "^$pattern(\"$base_val\"," \ +- ${srcdir}/config/aarch64/$def | \ +- sed -e 's/^^,*, *//' | \ +- sed -e 's/,.*$//'` +- fi +- + # Disallow extensions in --with-tune=cortex-a53+crc. + if $which = tune && x"$ext_val" != x ; then + echo "Architecture extensions not supported in --with-$which=$val" 1>&2 +@@ -4246,25 +4227,7 @@ case "${target}" in + grep "^\"$base_ext\""` + + if x"$base_ext" = x \ +- || -n $opt_line ; then +- +- # These regexp extract the elements based on +- # their group match index in the regexp. +- ext_canon=`echo -e "$opt_line" | \ +- sed -e "s/$sed_patt/\2/"` +- ext_on=`echo -e "$opt_line" | \ +- sed -e "s/$sed_patt/\3/"` +- ext_off=`echo -e "$opt_line" | \ +- sed -e "s/$sed_patt/\4/"` +- +- if $ext = $base_ext ; then +- # Adding extension +- ext_mask="("$ext_mask") | ("$ext_on" | "$ext_canon")" +- else +- # Removing extension +- ext_mask="("$ext_mask") & ~("$ext_off" | "$ext_canon")" +- fi +- ++ || x"$opt_line" != x ; then + true + else + echo "Unknown extension used in --with-$which=$val" 1>&2 +@@ -4273,10 +4236,6 @@ case "${target}" in + ext_val=`echo $ext_val | sed -e 's/a-z0-9\+//'` + done + +- ext_mask="(("$ext_mask") << TARGET_CPU_NBITS)" +- if x"$base_id" != x ; then +- target_cpu_cname="TARGET_CPU_$base_id | $ext_mask" +- fi + true + else + # Allow --with-$which=native. +diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc +index 7c62ddb2a..ba888beb0 100644 +--- a/gcc/config/aarch64/aarch64.cc ++++ b/gcc/config/aarch64/aarch64.cc +@@ -3014,8 +3014,6 @@ static const struct attribute_spec aarch64_attribute_table = + { NULL, 0, 0, false, false, false, false, NULL, NULL } + }; + +-#define AARCH64_CPU_DEFAULT_FLAGS ((selected_cpu) ? selected_cpu->flags : 0) +- + /* An ISA extension in the co-processor and main instruction set space. */ + struct aarch64_option_extension + { +@@ -18411,39 +18409,24 @@ aarch64_validate_mtune (const char *str, const struct processor **res) + return false; + } + +-static_assert (TARGET_CPU_generic < TARGET_CPU_MASK, +- "TARGET_CPU_NBITS is big enough"); +- +-/* Return the CPU corresponding to the enum CPU. +- If it doesn't specify a cpu, return the default. */ ++/* Return the CPU corresponding to the enum CPU. */ + + static const struct processor * + aarch64_get_tune_cpu (enum aarch64_processor cpu) + { +- if (cpu != aarch64_none) +- return &all_corescpu; ++ gcc_assert (cpu != aarch64_none); + +- /* The & TARGET_CPU_MASK is to extract the bottom TARGET_CPU_NBITS bits that +- encode the default cpu as selected by the --with-cpu GCC configure option +- in config.gcc. +- ???: The whole TARGET_CPU_DEFAULT and AARCH64_CPU_DEFAULT_FLAGS +- flags mechanism should be reworked to make it more sane. */ +- return &all_coresTARGET_CPU_DEFAULT & TARGET_CPU_MASK; ++ return &all_corescpu; + } + +-/* Return the architecture corresponding to the enum ARCH. +- If it doesn't specify a valid architecture, return the default. */ ++/* Return the architecture corresponding to the enum ARCH. */ + + static const struct processor * + aarch64_get_arch (enum aarch64_arch arch) + { +- if (arch != aarch64_no_arch) +- return &all_architecturesarch; +- +- const struct processor *cpu +- = &all_coresTARGET_CPU_DEFAULT & TARGET_CPU_MASK; ++ gcc_assert (arch != aarch64_no_arch); + +- return &all_architecturescpu->arch; ++ return &all_architecturesarch; + } + + /* Return the VG value associated with -msve-vector-bits= value VALUE. */ +@@ -18481,10 +18464,6 @@ aarch64_override_options (void) + uint64_t arch_isa = 0; + aarch64_isa_flags = 0; + +- bool valid_cpu = true; +- bool valid_tune = true; +- bool valid_arch = true; +- + selected_cpu = NULL; + selected_arch = NULL; + selected_tune = NULL; +@@ -18499,77 +18478,56 @@ aarch64_override_options (void) + If either of -march or -mtune is given, they override their + respective component of -mcpu. */ + if (aarch64_cpu_string) +- valid_cpu = aarch64_validate_mcpu (aarch64_cpu_string, &selected_cpu, +- &cpu_isa); ++ aarch64_validate_mcpu (aarch64_cpu_string, &selected_cpu, &cpu_isa); + + if (aarch64_arch_string) +- valid_arch = aarch64_validate_march (aarch64_arch_string, &selected_arch, +- &arch_isa); ++ aarch64_validate_march (aarch64_arch_string, &selected_arch, &arch_isa); + + if (aarch64_tune_string) +- valid_tune = aarch64_validate_mtune (aarch64_tune_string, &selected_tune); ++ aarch64_validate_mtune (aarch64_tune_string, &selected_tune); + + #ifdef SUBTARGET_OVERRIDE_OPTIONS + SUBTARGET_OVERRIDE_OPTIONS; + #endif + +- /* If the user did not specify a processor, choose the default +- one for them. This will be the CPU set during configuration using +- --with-cpu, otherwise it is "generic". */ +- if (!selected_cpu) +- { +- if (selected_arch) +- { +- selected_cpu = &all_coresselected_arch->ident; +- aarch64_isa_flags = arch_isa; +- explicit_arch = selected_arch->arch; +- } +- else +- { +- /* Get default configure-time CPU. */ +- selected_cpu = aarch64_get_tune_cpu (aarch64_none); +- aarch64_isa_flags = TARGET_CPU_DEFAULT >> TARGET_CPU_NBITS; +- } +- +- if (selected_tune) +- explicit_tune_core = selected_tune->ident; +- } +- /* If both -mcpu and -march are specified check that they are architecturally +- compatible, warn if they're not and prefer the -march ISA flags. */ +- else if (selected_arch) ++ if (selected_cpu && selected_arch) + { ++ /* If both -mcpu and -march are specified, warn if they are not ++ architecturally compatible and prefer the -march ISA flags. */ + if (selected_arch->arch != selected_cpu->arch) + { + warning (0, "switch %<-mcpu=%s%> conflicts with %<-march=%s%> switch", + aarch64_cpu_string, + aarch64_arch_string); + } ++ + aarch64_isa_flags = arch_isa; +- explicit_arch = selected_arch->arch; +- explicit_tune_core = selected_tune ? selected_tune->ident +- : selected_cpu->ident; + } +- else ++ else if (selected_cpu) + { +- /* -mcpu but no -march. */ +- aarch64_isa_flags = cpu_isa; +- explicit_tune_core = selected_tune ? selected_tune->ident +- : selected_cpu->ident; +- gcc_assert (selected_cpu); + selected_arch = &all_architecturesselected_cpu->arch; +- explicit_arch = selected_arch->arch; ++ aarch64_isa_flags = cpu_isa; + } +- +- /* Set the arch as well as we will need it when outputing +- the .arch directive in assembly. */ +- if (!selected_arch) ++ else if (selected_arch) + { +- gcc_assert (selected_cpu); ++ selected_cpu = &all_coresselected_arch->ident; ++ aarch64_isa_flags = arch_isa; ++ } ++ else ++ { ++ /* No -mcpu or -march specified, so use the default CPU. */ ++ selected_cpu = &all_coresTARGET_CPU_DEFAULT; + selected_arch = &all_architecturesselected_cpu->arch; ++ aarch64_isa_flags = selected_cpu->flags; + } + ++ explicit_arch = selected_arch->arch; + if (!selected_tune) + selected_tune = selected_cpu; ++ explicit_tune_core = selected_tune->ident; ++ ++ gcc_assert (explicit_tune_core != aarch64_none); ++ gcc_assert (explicit_arch != aarch64_no_arch); + + if (aarch64_enable_bti == 2) + { +@@ -18605,15 +18563,6 @@ aarch64_override_options (void) + if (aarch64_ra_sign_scope != AARCH64_FUNCTION_NONE && TARGET_ILP32) + sorry ("return address signing is only supported for %<-mabi=lp64%>"); + +- /* Make sure we properly set up the explicit options. */ +- if ((aarch64_cpu_string && valid_cpu) +- || (aarch64_tune_string && valid_tune)) +- gcc_assert (explicit_tune_core != aarch64_none); +- +- if ((aarch64_cpu_string && valid_cpu) +- || (aarch64_arch_string && valid_arch)) +- gcc_assert (explicit_arch != aarch64_no_arch); +- + /* The pass to insert speculation tracking runs before + shrink-wrapping and the latter does not know how to update the + tracking status. So disable it in this case. */ +@@ -18719,11 +18668,7 @@ aarch64_option_restore (struct gcc_options *opts, + opts->x_explicit_arch = ptr->x_explicit_arch; + selected_arch = aarch64_get_arch (ptr->x_explicit_arch); + opts->x_explicit_tune_core = ptr->x_explicit_tune_core; +- if (opts->x_explicit_tune_core == aarch64_none +- && opts->x_explicit_arch != aarch64_no_arch) +- selected_tune = &all_coresselected_arch->ident; +- else +- selected_tune = aarch64_get_tune_cpu (ptr->x_explicit_tune_core); ++ selected_tune = aarch64_get_tune_cpu (ptr->x_explicit_tune_core); + opts->x_aarch64_override_tune_string = ptr->x_aarch64_override_tune_string; + opts->x_aarch64_branch_protection_string + = ptr->x_aarch64_branch_protection_string; +diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h +index 6834c3e99..14e2af054 100644 +--- a/gcc/config/aarch64/aarch64.h ++++ b/gcc/config/aarch64/aarch64.h +@@ -811,16 +811,9 @@ enum target_cpus + TARGET_CPU_generic + }; + +-/* Define how many bits are used to represent the CPU in TARGET_CPU_DEFAULT. +- This needs to be big enough to fit the value of TARGET_CPU_generic. +- All bits after this are used to represent the AARCH64_CPU_DEFAULT_FLAGS. */ +-#define TARGET_CPU_NBITS 8 +-#define TARGET_CPU_MASK ((1 << TARGET_CPU_NBITS) - 1) +- + /* If there is no CPU defined at configure, use generic as default. */ + #ifndef TARGET_CPU_DEFAULT +-#define TARGET_CPU_DEFAULT \ +- (TARGET_CPU_generic | (AARCH64_CPU_DEFAULT_FLAGS << TARGET_CPU_NBITS)) ++# define TARGET_CPU_DEFAULT TARGET_CPU_generic + #endif + + /* If inserting NOP before a mult-accumulate insn remember to adjust the +-- +2.33.0 +
View file
_service:tar_scm:0104-LoongArch-Rename-ISA_BASE_LA64V100-to-ISA_BASE_LA64.patch
Added
@@ -0,0 +1,220 @@ +From 282b0847a86fab49fb3582371647fa4cb2d941ed Mon Sep 17 00:00:00 2001 +From: Yang Yujie <yangyujie@loongson.cn> +Date: Mon, 8 Jan 2024 09:14:08 +0800 +Subject: PATCH 104/188 LoongArch: Rename ISA_BASE_LA64V100 to ISA_BASE_LA64 + +LoongArch ISA manual v1.10 suggests that software should not depend on +the ISA version number for marking processor features. The ISA version +number is now defined as a collective name of individual ISA evolutions. +Since there is a independent ISA evolution mask now, we can drop the +version information from the base ISA. + +gcc/ChangeLog: + + * config/loongarch/genopts/loongarch-strings: Rename. + * config/loongarch/genopts/loongarch.opt.in: Same. + * config/loongarch/loongarch-cpu.cc: Same. + * config/loongarch/loongarch-def.cc: Same. + * config/loongarch/loongarch-def.h: Same. + * config/loongarch/loongarch-opts.cc: Same. + * config/loongarch/loongarch-opts.h: Same. + * config/loongarch/loongarch-str.h: Same. + * config/loongarch/loongarch.opt: Same. +--- + gcc/config/loongarch/genopts/loongarch-strings | 2 +- + gcc/config/loongarch/genopts/loongarch.opt.in | 2 +- + gcc/config/loongarch/loongarch-cpu.cc | 2 +- + gcc/config/loongarch/loongarch-def.cc | 14 +++++++------- + gcc/config/loongarch/loongarch-def.h | 6 +++--- + gcc/config/loongarch/loongarch-opts.cc | 10 +++++----- + gcc/config/loongarch/loongarch-opts.h | 2 +- + gcc/config/loongarch/loongarch-str.h | 2 +- + gcc/config/loongarch/loongarch.opt | 2 +- + 9 files changed, 21 insertions(+), 21 deletions(-) + +diff --git a/gcc/config/loongarch/genopts/loongarch-strings b/gcc/config/loongarch/genopts/loongarch-strings +index 411ad5696..ce70b8b9c 100644 +--- a/gcc/config/loongarch/genopts/loongarch-strings ++++ b/gcc/config/loongarch/genopts/loongarch-strings +@@ -29,7 +29,7 @@ STR_CPU_LA464 la464 + STR_CPU_LA664 la664 + + # Base architecture +-STR_ISA_BASE_LA64V100 la64 ++STR_ISA_BASE_LA64 la64 + + # -mfpu + OPTSTR_ISA_EXT_FPU fpu +diff --git a/gcc/config/loongarch/genopts/loongarch.opt.in b/gcc/config/loongarch/genopts/loongarch.opt.in +index a866dab84..851d8d1f3 100644 +--- a/gcc/config/loongarch/genopts/loongarch.opt.in ++++ b/gcc/config/loongarch/genopts/loongarch.opt.in +@@ -33,7 +33,7 @@ Name(isa_base) Type(int) + Basic ISAs of LoongArch: + + EnumValue +-Enum(isa_base) String(@@STR_ISA_BASE_LA64V100@@) Value(ISA_BASE_LA64V100) ++Enum(isa_base) String(@@STR_ISA_BASE_LA64@@) Value(ISA_BASE_LA64) + + ;; ISA extensions / adjustments + Enum +diff --git a/gcc/config/loongarch/loongarch-cpu.cc b/gcc/config/loongarch/loongarch-cpu.cc +index 7e0625835..551d4f72c 100644 +--- a/gcc/config/loongarch/loongarch-cpu.cc ++++ b/gcc/config/loongarch/loongarch-cpu.cc +@@ -133,7 +133,7 @@ fill_native_cpu_config (struct loongarch_target *tgt) + switch (cpucfg_cache1 & 0x3) + { + case 0x02: +- tmp = ISA_BASE_LA64V100; ++ tmp = ISA_BASE_LA64; + break; + + default: +diff --git a/gcc/config/loongarch/loongarch-def.cc b/gcc/config/loongarch/loongarch-def.cc +index 843be78e4..533dd0af2 100644 +--- a/gcc/config/loongarch/loongarch-def.cc ++++ b/gcc/config/loongarch/loongarch-def.cc +@@ -48,16 +48,16 @@ array_arch<loongarch_isa> loongarch_cpu_default_isa = + array_arch<loongarch_isa> () + .set (CPU_LOONGARCH64, + loongarch_isa () +- .base_ (ISA_BASE_LA64V100) ++ .base_ (ISA_BASE_LA64) + .fpu_ (ISA_EXT_FPU64)) + .set (CPU_LA464, + loongarch_isa () +- .base_ (ISA_BASE_LA64V100) ++ .base_ (ISA_BASE_LA64) + .fpu_ (ISA_EXT_FPU64) + .simd_ (ISA_EXT_SIMD_LASX)) + .set (CPU_LA664, + loongarch_isa () +- .base_ (ISA_BASE_LA64V100) ++ .base_ (ISA_BASE_LA64) + .fpu_ (ISA_EXT_FPU64) + .simd_ (ISA_EXT_SIMD_LASX) + .evolution_ (OPTION_MASK_ISA_DIV32 | OPTION_MASK_ISA_LD_SEQ_SA +@@ -153,7 +153,7 @@ array_tune<int> loongarch_cpu_multipass_dfa_lookahead = array_tune<int> () + + array<const char *, N_ISA_BASE_TYPES> loongarch_isa_base_strings = + array<const char *, N_ISA_BASE_TYPES> () +- .set (ISA_BASE_LA64V100, STR_ISA_BASE_LA64V100); ++ .set (ISA_BASE_LA64, STR_ISA_BASE_LA64); + + array<const char *, N_ISA_EXT_TYPES> loongarch_isa_ext_strings = + array<const char *, N_ISA_EXT_TYPES> () +@@ -189,15 +189,15 @@ array<array<loongarch_isa, N_ABI_EXT_TYPES>, N_ABI_BASE_TYPES> + array<loongarch_isa, N_ABI_EXT_TYPES> () + .set (ABI_EXT_BASE, + loongarch_isa () +- .base_ (ISA_BASE_LA64V100) ++ .base_ (ISA_BASE_LA64) + .fpu_ (ISA_EXT_FPU64))) + .set (ABI_BASE_LP64F, + array<loongarch_isa, N_ABI_EXT_TYPES> () + .set (ABI_EXT_BASE, + loongarch_isa () +- .base_ (ISA_BASE_LA64V100) ++ .base_ (ISA_BASE_LA64) + .fpu_ (ISA_EXT_FPU32))) + .set (ABI_BASE_LP64S, + array<loongarch_isa, N_ABI_EXT_TYPES> () + .set (ABI_EXT_BASE, +- loongarch_isa ().base_ (ISA_BASE_LA64V100))); ++ loongarch_isa ().base_ (ISA_BASE_LA64))); +diff --git a/gcc/config/loongarch/loongarch-def.h b/gcc/config/loongarch/loongarch-def.h +index 9e5eee0e2..a133ea265 100644 +--- a/gcc/config/loongarch/loongarch-def.h ++++ b/gcc/config/loongarch/loongarch-def.h +@@ -55,9 +55,9 @@ along with GCC; see the file COPYING3. If not see + + /* enum isa_base */ + +-/* LoongArch V1.00. */ +-#define ISA_BASE_LA64V100 0 +-#define N_ISA_BASE_TYPES 1 ++/* LoongArch64 */ ++#define ISA_BASE_LA64 0 ++#define N_ISA_BASE_TYPES 1 + extern loongarch_def_array<const char *, N_ISA_BASE_TYPES> + loongarch_isa_base_strings; + +diff --git a/gcc/config/loongarch/loongarch-opts.cc b/gcc/config/loongarch/loongarch-opts.cc +index 935d09f45..cf4c7bc93 100644 +--- a/gcc/config/loongarch/loongarch-opts.cc ++++ b/gcc/config/loongarch/loongarch-opts.cc +@@ -567,17 +567,17 @@ isa_default_abi (const struct loongarch_isa *isa) + switch (isa->fpu) + { + case ISA_EXT_FPU64: +- if (isa->base >= ISA_BASE_LA64V100) ++ if (isa->base >= ISA_BASE_LA64) + abi.base = ABI_BASE_LP64D; + break; + + case ISA_EXT_FPU32: +- if (isa->base >= ISA_BASE_LA64V100) ++ if (isa->base >= ISA_BASE_LA64) + abi.base = ABI_BASE_LP64F; + break; + + case ISA_EXT_NONE: +- if (isa->base >= ISA_BASE_LA64V100) ++ if (isa->base >= ISA_BASE_LA64) + abi.base = ABI_BASE_LP64S; + break; + +@@ -596,8 +596,8 @@ isa_base_compat_p (const struct loongarch_isa *set1, + { + switch (set2->base) + { +- case ISA_BASE_LA64V100: +- return (set1->base >= ISA_BASE_LA64V100); ++ case ISA_BASE_LA64: ++ return (set1->base >= ISA_BASE_LA64); + + default: + gcc_unreachable (); +diff --git a/gcc/config/loongarch/loongarch-opts.h b/gcc/config/loongarch/loongarch-opts.h +index 204338553..463812136 100644 +--- a/gcc/config/loongarch/loongarch-opts.h ++++ b/gcc/config/loongarch/loongarch-opts.h +@@ -79,7 +79,7 @@ struct loongarch_flags { + #define TARGET_DOUBLE_FLOAT (la_target.isa.fpu == ISA_EXT_FPU64) + #define TARGET_DOUBLE_FLOAT_ABI (la_target.abi.base == ABI_BASE_LP64D) + +-#define TARGET_64BIT (la_target.isa.base == ISA_BASE_LA64V100) ++#define TARGET_64BIT (la_target.isa.base == ISA_BASE_LA64) + #define TARGET_ABI_LP64 (la_target.abi.base == ABI_BASE_LP64D \ + || la_target.abi.base == ABI_BASE_LP64F \ + || la_target.abi.base == ABI_BASE_LP64S) +diff --git a/gcc/config/loongarch/loongarch-str.h b/gcc/config/loongarch/loongarch-str.h +index a8821acb0..2251df38b 100644 +--- a/gcc/config/loongarch/loongarch-str.h ++++ b/gcc/config/loongarch/loongarch-str.h +@@ -32,7 +32,7 @@ along with GCC; see the file COPYING3. If not see + #define STR_CPU_LA464 "la464" + #define STR_CPU_LA664 "la664" + +-#define STR_ISA_BASE_LA64V100 "la64" ++#define STR_ISA_BASE_LA64 "la64" + + #define OPTSTR_ISA_EXT_FPU "fpu" + #define STR_NONE "none" +diff --git a/gcc/config/loongarch/loongarch.opt b/gcc/config/loongarch/loongarch.opt +index b5a46df4e..df7314973 100644 +--- a/gcc/config/loongarch/loongarch.opt ++++ b/gcc/config/loongarch/loongarch.opt +@@ -41,7 +41,7 @@ Name(isa_base) Type(int) + Basic ISAs of LoongArch: + + EnumValue +-Enum(isa_base) String(la64) Value(ISA_BASE_LA64V100) ++Enum(isa_base) String(la64) Value(ISA_BASE_LA64) + + ;; ISA extensions / adjustments + Enum +-- +2.43.0 +
View file
_service:tar_scm:0104-aarch64-Rename-locals-offset-to-bytes-above-locals.patch
Deleted
@@ -1,91 +0,0 @@ -From 0a0a824808d1dec51004fb5805c1a0ae2a35433f Mon Sep 17 00:00:00 2001 -From: Richard Sandiford <richard.sandiford@arm.com> -Date: Tue, 12 Sep 2023 16:08:51 +0100 -Subject: PATCH aarch64: Rename locals_offset to bytes_above_locals -MIME-Version: 1.0 -Content-Type: text/plain; charset=utf8 -Content-Transfer-Encoding: 8bit - -locals_offset was described as: - - /* Offset from the base of the frame (incomming SP) to the - top of the locals area. This value is always a multiple of - STACK_BOUNDARY. */ - -This is implicitly an âupside downâ view of the frame: the incoming -SP is at offset 0, and anything N bytes below the incoming SP is at -offset N (rather than -N). - -However, reg_offset instead uses a âright way upâ view; that is, -it views offsets in address terms. Something above X is at a -positive offset from X and something below X is at a negative -offset from X. - -Also, even on FRAME_GROWS_DOWNWARD targets like AArch64, -target-independent code views offsets in address terms too: -locals are allocated at negative offsets to virtual_stack_vars. - -It seems confusing to have *_offset fields of the same structure -using different polarities like this. This patch tries to avoid -that by renaming locals_offset to bytes_above_locals. - -gcc/ - * config/aarch64/aarch64.h (aarch64_frame::locals_offset): Rename to... - (aarch64_frame::bytes_above_locals): ...this. - * config/aarch64/aarch64.cc (aarch64_layout_frame) - (aarch64_initial_elimination_offset): Update accordingly. ---- - gcc/config/aarch64/aarch64.cc | 6 +++--- - gcc/config/aarch64/aarch64.h | 6 +++--- - 2 files changed, 6 insertions(+), 6 deletions(-) - -diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc -index d71a042d6112..d4ec352ba98a 100644 ---- a/gcc/config/aarch64/aarch64.cc -+++ b/gcc/config/aarch64/aarch64.cc -@@ -8337,7 +8337,7 @@ aarch64_layout_frame (void) - STACK_BOUNDARY / BITS_PER_UNIT)); - frame.frame_size = saved_regs_and_above + frame.bytes_below_saved_regs; - -- frame.locals_offset = frame.saved_varargs_size; -+ frame.bytes_above_locals = frame.saved_varargs_size; - - frame.initial_adjust = 0; - frame.final_adjust = 0; -@@ -12578,13 +12578,13 @@ aarch64_initial_elimination_offset (unsigned from, unsigned to) - return frame.hard_fp_offset; - - if (from == FRAME_POINTER_REGNUM) -- return frame.hard_fp_offset - frame.locals_offset; -+ return frame.hard_fp_offset - frame.bytes_above_locals; - } - - if (to == STACK_POINTER_REGNUM) - { - if (from == FRAME_POINTER_REGNUM) -- return frame.frame_size - frame.locals_offset; -+ return frame.frame_size - frame.bytes_above_locals; - } - - return frame.frame_size; -diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h -index 94fca4b94716..bf46e6124aa9 100644 ---- a/gcc/config/aarch64/aarch64.h -+++ b/gcc/config/aarch64/aarch64.h -@@ -885,10 +885,10 @@ struct GTY (()) aarch64_frame - always a multiple of STACK_BOUNDARY. */ - poly_int64 bytes_below_hard_fp; - -- /* Offset from the base of the frame (incomming SP) to the -- top of the locals area. This value is always a multiple of -+ /* The number of bytes between the top of the locals area and the top -+ of the frame (the incomming SP). This value is always a multiple of - STACK_BOUNDARY. */ -- poly_int64 locals_offset; -+ poly_int64 bytes_above_locals; - - /* Offset from the base of the frame (incomming SP) to the - hard_frame_pointer. This value is always a multiple of --- -2.43.5 -
View file
_service:tar_scm:0105-Backport-SME-AArch64-Cleanup-option-processing-code.patch
Added
@@ -0,0 +1,528 @@ +From ba32885874fc6caa90f6ae5e264bc3d51f64a26e Mon Sep 17 00:00:00 2001 +From: Wilco Dijkstra <wilco.dijkstra@arm.com> +Date: Wed, 1 Jun 2022 16:46:36 +0100 +Subject: PATCH 006/157 BackportSME AArch64: Cleanup option processing + code + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=ae54c1b09963779c5c3914782324ff48af32e2f1 + +Further cleanup option processing. Remove the duplication of global +variables for CPU and tune settings so that CPU option processing is +simplified even further. Move global variables that need save and +restore due to target option processing into aarch64.opt. This removes +the need for explicit saving/restoring and unnecessary reparsing of +options. + +gcc/ + * config/aarch64/aarch64.opt (explicit_tune_core): Rename to + selected_tune. + (explicit_arch): Rename to selected_arch. + (x_aarch64_override_tune_string): Remove. + (aarch64_ra_sign_key): Add as TargetVariable so it gets saved/restored. + (aarch64_override_tune_string): Add Save so it gets saved/restored. + * config/aarch64/aarch64.h (aarch64_architecture_version): Remove. + * config/aarch64/aarch64.cc (aarch64_architecture_version): Remove. + (processor): Remove archtecture_version field. + (selected_arch): Remove global. + (selected_cpu): Remove global. + (selected_tune): Remove global. + (aarch64_ra_sign_key): Move global to aarch64.opt so it is saved. + (aarch64_override_options_internal): Use aarch64_get_tune_cpu. + (aarch64_override_options): Further simplify code to only set + selected_arch and selected_tune globals. + (aarch64_option_save): Remove now that target options are saved. + (aarch64_option_restore): Remove redundant target option restores. + * config/aarch64/aarch64-c.cc (aarch64_update_cpp_builtins): Use + AARCH64_ISA_V9. + * config/aarch64/aarch64-opts.h (aarch64_key_type): Add, moved from... + * config/aarch64/aarch64-protos.h (aarch64_key_type): Remove. + (aarch64_ra_sign_key): Remove. +--- + gcc/config/aarch64/aarch64-c.cc | 2 +- + gcc/config/aarch64/aarch64-opts.h | 6 + + gcc/config/aarch64/aarch64-protos.h | 8 -- + gcc/config/aarch64/aarch64.cc | 183 ++++++++++------------------ + gcc/config/aarch64/aarch64.h | 3 - + gcc/config/aarch64/aarch64.opt | 12 +- + 6 files changed, 76 insertions(+), 138 deletions(-) + +diff --git a/gcc/config/aarch64/aarch64-c.cc b/gcc/config/aarch64/aarch64-c.cc +index a4c407724..90d45e45d 100644 +--- a/gcc/config/aarch64/aarch64-c.cc ++++ b/gcc/config/aarch64/aarch64-c.cc +@@ -82,7 +82,7 @@ aarch64_update_cpp_builtins (cpp_reader *pfile) + { + aarch64_def_or_undef (flag_unsafe_math_optimizations, "__ARM_FP_FAST", pfile); + +- builtin_define_with_int_value ("__ARM_ARCH", aarch64_architecture_version); ++ builtin_define_with_int_value ("__ARM_ARCH", AARCH64_ISA_V9 ? 9 : 8); + + builtin_define_with_int_value ("__ARM_SIZEOF_MINIMAL_ENUM", + flag_short_enums ? 1 : 4); +diff --git a/gcc/config/aarch64/aarch64-opts.h b/gcc/config/aarch64/aarch64-opts.h +index 93572fe83..421648a15 100644 +--- a/gcc/config/aarch64/aarch64-opts.h ++++ b/gcc/config/aarch64/aarch64-opts.h +@@ -98,4 +98,10 @@ enum stack_protector_guard { + SSP_GLOBAL /* global canary */ + }; + ++/* The key type that -msign-return-address should use. */ ++enum aarch64_key_type { ++ AARCH64_KEY_A, ++ AARCH64_KEY_B ++}; ++ + #endif +diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h +index 475d174dd..e60ce3c36 100644 +--- a/gcc/config/aarch64/aarch64-protos.h ++++ b/gcc/config/aarch64/aarch64-protos.h +@@ -672,14 +672,6 @@ enum simd_immediate_check { + AARCH64_CHECK_MOV = AARCH64_CHECK_ORR | AARCH64_CHECK_BIC + }; + +-/* The key type that -msign-return-address should use. */ +-enum aarch64_key_type { +- AARCH64_KEY_A, +- AARCH64_KEY_B +-}; +- +-extern enum aarch64_key_type aarch64_ra_sign_key; +- + extern struct tune_params aarch64_tune_params; + + /* The available SVE predicate patterns, known in the ACLE as "svpattern". */ +diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc +index ba888beb0..254ecfaa2 100644 +--- a/gcc/config/aarch64/aarch64.cc ++++ b/gcc/config/aarch64/aarch64.cc +@@ -306,9 +306,6 @@ static bool aarch64_print_address_internal (FILE*, machine_mode, rtx, + aarch64_addr_query_type); + static HOST_WIDE_INT aarch64_clamp_to_uimm12_shift (HOST_WIDE_INT val); + +-/* Major revision number of the ARM Architecture implemented by the target. */ +-unsigned aarch64_architecture_version; +- + /* The processor for which instructions should be scheduled. */ + enum aarch64_processor aarch64_tune = cortexa53; + +@@ -2931,7 +2928,6 @@ struct processor + enum aarch64_processor ident; + enum aarch64_processor sched_core; + enum aarch64_arch arch; +- unsigned architecture_version; + const uint64_t flags; + const struct tune_params *const tune; + }; +@@ -2940,9 +2936,9 @@ struct processor + static const struct processor all_architectures = + { + #define AARCH64_ARCH(NAME, CORE, ARCH_IDENT, ARCH_REV, FLAGS) \ +- {NAME, CORE, CORE, AARCH64_ARCH_##ARCH_IDENT, ARCH_REV, FLAGS, NULL}, ++ {NAME, CORE, CORE, AARCH64_ARCH_##ARCH_IDENT, FLAGS, NULL}, + #include "aarch64-arches.def" +- {NULL, aarch64_none, aarch64_none, aarch64_no_arch, 0, 0, NULL} ++ {NULL, aarch64_none, aarch64_none, aarch64_no_arch, 0, NULL} + }; + + /* Processor cores implementing AArch64. */ +@@ -2950,23 +2946,13 @@ static const struct processor all_cores = + { + #define AARCH64_CORE(NAME, IDENT, SCHED, ARCH, FLAGS, COSTS, IMP, PART, VARIANT) \ + {NAME, IDENT, SCHED, AARCH64_ARCH_##ARCH, \ +- all_architecturesAARCH64_ARCH_##ARCH.architecture_version, \ + FLAGS, &COSTS##_tunings}, + #include "aarch64-cores.def" +- {"generic", generic, cortexa53, AARCH64_ARCH_8A, 8, ++ {"generic", generic, cortexa53, AARCH64_ARCH_8A, + AARCH64_FL_FOR_ARCH8, &generic_tunings}, +- {NULL, aarch64_none, aarch64_none, aarch64_no_arch, 0, 0, NULL} ++ {NULL, aarch64_none, aarch64_none, aarch64_no_arch, 0, NULL} + }; + +- +-/* Target specification. These are populated by the -march, -mtune, -mcpu +- handling code or by target attributes. */ +-static const struct processor *selected_arch; +-static const struct processor *selected_cpu; +-static const struct processor *selected_tune; +- +-enum aarch64_key_type aarch64_ra_sign_key = AARCH64_KEY_A; +- + /* The current tuning set. */ + struct tune_params aarch64_tune_params = generic_tunings; + +@@ -10633,8 +10619,8 @@ aarch64_case_values_threshold (void) + /* Use the specified limit for the number of cases before using jump + tables at higher optimization levels. */ + if (optimize > 2 +- && selected_cpu->tune->max_case_values != 0) +- return selected_cpu->tune->max_case_values; ++ && aarch64_tune_params.max_case_values != 0) ++ return aarch64_tune_params.max_case_values; + else + return optimize_size ? 8 : 11; + } +@@ -17769,6 +17755,26 @@ initialize_aarch64_tls_size (struct gcc_options *opts) + return; + } + ++/* Return the CPU corresponding to the enum CPU. */ ++ ++static const struct processor * ++aarch64_get_tune_cpu (enum aarch64_processor cpu) ++{ ++ gcc_assert (cpu != aarch64_none); ++ ++ return &all_corescpu; ++} ++ ++/* Return the architecture corresponding to the enum ARCH. */ ++ ++static const struct processor * ++aarch64_get_arch (enum aarch64_arch arch) ++{ ++ gcc_assert (arch != aarch64_no_arch); ++ ++ return &all_architecturesarch; ++} ++ + /* Parse STRING looking for options in the format: + string :: option:string + option :: name=substring +@@ -17879,18 +17885,18 @@ aarch64_override_options_after_change_1 (struct gcc_options *opts) + void + aarch64_override_options_internal (struct gcc_options *opts) + { +- aarch64_tune_flags = selected_tune->flags; +- aarch64_tune = selected_tune->sched_core; ++ const struct processor *tune = aarch64_get_tune_cpu (opts->x_selected_tune); ++ aarch64_tune_flags = tune->flags; ++ aarch64_tune = tune->sched_core; + /* Make a copy of the tuning parameters attached to the core, which + we may later overwrite. */ +- aarch64_tune_params = *(selected_tune->tune); +- aarch64_architecture_version = selected_arch->architecture_version; +- if (selected_tune->tune == &generic_tunings) ++ aarch64_tune_params = *(tune->tune); ++ if (tune->tune == &generic_tunings) + aarch64_adjust_generic_arch_tuning (aarch64_tune_params); + + if (opts->x_aarch64_override_tune_string) + aarch64_parse_override_string (opts->x_aarch64_override_tune_string, +- &aarch64_tune_params); ++ &aarch64_tune_params); + + /* This target defaults to strict volatile bitfields. */ + if (opts->x_flag_strict_volatile_bitfields < 0 && abi_version_at_least (2)) +@@ -18051,13 +18057,6 @@ aarch64_override_options_internal (struct gcc_options *opts) + && opts->x_optimize >= aarch64_tune_params.prefetch->default_opt_level) + opts->x_flag_prefetch_loop_arrays = 1; + +- if (opts->x_aarch64_arch_string == NULL) +- opts->x_aarch64_arch_string = selected_arch->name; +- if (opts->x_aarch64_cpu_string == NULL) +- opts->x_aarch64_cpu_string = selected_cpu->name; +- if (opts->x_aarch64_tune_string == NULL) +- opts->x_aarch64_tune_string = selected_tune->name; +- + aarch64_override_options_after_change_1 (opts); + } + +@@ -18409,26 +18408,6 @@ aarch64_validate_mtune (const char *str, const struct processor **res) + return false; + } + +-/* Return the CPU corresponding to the enum CPU. */ +- +-static const struct processor * +-aarch64_get_tune_cpu (enum aarch64_processor cpu) +-{ +- gcc_assert (cpu != aarch64_none); +- +- return &all_corescpu; +-} +- +-/* Return the architecture corresponding to the enum ARCH. */ +- +-static const struct processor * +-aarch64_get_arch (enum aarch64_arch arch) +-{ +- gcc_assert (arch != aarch64_no_arch); +- +- return &all_architecturesarch; +-} +- + /* Return the VG value associated with -msve-vector-bits= value VALUE. */ + + static poly_uint16 +@@ -18464,9 +18443,9 @@ aarch64_override_options (void) + uint64_t arch_isa = 0; + aarch64_isa_flags = 0; + +- selected_cpu = NULL; +- selected_arch = NULL; +- selected_tune = NULL; ++ const struct processor *cpu = NULL; ++ const struct processor *arch = NULL; ++ const struct processor *tune = NULL; + + if (aarch64_harden_sls_string) + aarch64_validate_sls_mitigation (aarch64_harden_sls_string); +@@ -18478,56 +18457,52 @@ aarch64_override_options (void) + If either of -march or -mtune is given, they override their + respective component of -mcpu. */ + if (aarch64_cpu_string) +- aarch64_validate_mcpu (aarch64_cpu_string, &selected_cpu, &cpu_isa); ++ aarch64_validate_mcpu (aarch64_cpu_string, &cpu, &cpu_isa); + + if (aarch64_arch_string) +- aarch64_validate_march (aarch64_arch_string, &selected_arch, &arch_isa); ++ aarch64_validate_march (aarch64_arch_string, &arch, &arch_isa); + + if (aarch64_tune_string) +- aarch64_validate_mtune (aarch64_tune_string, &selected_tune); ++ aarch64_validate_mtune (aarch64_tune_string, &tune); + + #ifdef SUBTARGET_OVERRIDE_OPTIONS + SUBTARGET_OVERRIDE_OPTIONS; + #endif + +- if (selected_cpu && selected_arch) ++ if (cpu && arch) + { + /* If both -mcpu and -march are specified, warn if they are not + architecturally compatible and prefer the -march ISA flags. */ +- if (selected_arch->arch != selected_cpu->arch) ++ if (arch->arch != cpu->arch) + { + warning (0, "switch %<-mcpu=%s%> conflicts with %<-march=%s%> switch", + aarch64_cpu_string, + aarch64_arch_string); + } + ++ selected_arch = arch->arch; + aarch64_isa_flags = arch_isa; + } +- else if (selected_cpu) ++ else if (cpu) + { +- selected_arch = &all_architecturesselected_cpu->arch; ++ selected_arch = cpu->arch; + aarch64_isa_flags = cpu_isa; + } +- else if (selected_arch) ++ else if (arch) + { +- selected_cpu = &all_coresselected_arch->ident; ++ cpu = &all_coresarch->ident; ++ selected_arch = arch->arch; + aarch64_isa_flags = arch_isa; + } + else + { + /* No -mcpu or -march specified, so use the default CPU. */ +- selected_cpu = &all_coresTARGET_CPU_DEFAULT; +- selected_arch = &all_architecturesselected_cpu->arch; +- aarch64_isa_flags = selected_cpu->flags; ++ cpu = &all_coresTARGET_CPU_DEFAULT; ++ selected_arch = cpu->arch; ++ aarch64_isa_flags = cpu->flags; + } + +- explicit_arch = selected_arch->arch; +- if (!selected_tune) +- selected_tune = selected_cpu; +- explicit_tune_core = selected_tune->ident; +- +- gcc_assert (explicit_tune_core != aarch64_none); +- gcc_assert (explicit_arch != aarch64_no_arch); ++ selected_tune = tune ? tune->ident : cpu->ident; + + if (aarch64_enable_bti == 2) + { +@@ -18646,38 +18621,14 @@ initialize_aarch64_code_model (struct gcc_options *opts) + } + } + +-/* Implement TARGET_OPTION_SAVE. */ +- +-static void +-aarch64_option_save (struct cl_target_option *ptr, struct gcc_options *opts, +- struct gcc_options */* opts_set */) +-{ +- ptr->x_aarch64_override_tune_string = opts->x_aarch64_override_tune_string; +- ptr->x_aarch64_branch_protection_string +- = opts->x_aarch64_branch_protection_string; +-} +- + /* Implements TARGET_OPTION_RESTORE. Restore the backend codegen decisions + using the information saved in PTR. */ + + static void + aarch64_option_restore (struct gcc_options *opts, +- struct gcc_options */* opts_set */, +- struct cl_target_option *ptr) ++ struct gcc_options * /* opts_set */, ++ struct cl_target_option * /* ptr */) + { +- opts->x_explicit_arch = ptr->x_explicit_arch; +- selected_arch = aarch64_get_arch (ptr->x_explicit_arch); +- opts->x_explicit_tune_core = ptr->x_explicit_tune_core; +- selected_tune = aarch64_get_tune_cpu (ptr->x_explicit_tune_core); +- opts->x_aarch64_override_tune_string = ptr->x_aarch64_override_tune_string; +- opts->x_aarch64_branch_protection_string +- = ptr->x_aarch64_branch_protection_string; +- if (opts->x_aarch64_branch_protection_string) +- { +- aarch64_parse_branch_protection (opts->x_aarch64_branch_protection_string, +- NULL); +- } +- + aarch64_override_options_internal (opts); + } + +@@ -18687,11 +18638,11 @@ static void + aarch64_option_print (FILE *file, int indent, struct cl_target_option *ptr) + { + const struct processor *cpu +- = aarch64_get_tune_cpu (ptr->x_explicit_tune_core); +- uint64_t isa_flags = ptr->x_aarch64_isa_flags; +- const struct processor *arch = aarch64_get_arch (ptr->x_explicit_arch); ++ = aarch64_get_tune_cpu (ptr->x_selected_tune); ++ const struct processor *arch = aarch64_get_arch (ptr->x_selected_arch); + std::string extension +- = aarch64_get_extension_string_for_isa_flags (isa_flags, arch->flags); ++ = aarch64_get_extension_string_for_isa_flags (ptr->x_aarch64_isa_flags, ++ arch->flags); + + fprintf (file, "%*sselected tune = %s\n", indent, "", cpu->name); + fprintf (file, "%*sselected arch = %s%s\n", indent, "", +@@ -18804,8 +18755,7 @@ aarch64_handle_attr_arch (const char *str) + if (parse_res == AARCH64_PARSE_OK) + { + gcc_assert (tmp_arch); +- selected_arch = tmp_arch; +- explicit_arch = selected_arch->arch; ++ selected_arch = tmp_arch->arch; + return true; + } + +@@ -18843,11 +18793,8 @@ aarch64_handle_attr_cpu (const char *str) + if (parse_res == AARCH64_PARSE_OK) + { + gcc_assert (tmp_cpu); +- selected_tune = tmp_cpu; +- explicit_tune_core = selected_tune->ident; +- +- selected_arch = &all_architecturestmp_cpu->arch; +- explicit_arch = selected_arch->arch; ++ selected_tune = tmp_cpu->ident; ++ selected_arch = tmp_cpu->arch; + return true; + } + +@@ -18915,8 +18862,7 @@ aarch64_handle_attr_tune (const char *str) + if (parse_res == AARCH64_PARSE_OK) + { + gcc_assert (tmp_tune); +- selected_tune = tmp_tune; +- explicit_tune_core = selected_tune->ident; ++ selected_tune = tmp_tune->ident; + return true; + } + +@@ -22821,7 +22767,7 @@ aarch64_declare_function_name (FILE *stream, const char* name, + gcc_assert (targ_options); + + const struct processor *this_arch +- = aarch64_get_arch (targ_options->x_explicit_arch); ++ = aarch64_get_arch (targ_options->x_selected_arch); + + uint64_t isa_flags = targ_options->x_aarch64_isa_flags; + std::string extension +@@ -22840,7 +22786,7 @@ aarch64_declare_function_name (FILE *stream, const char* name, + useful to readers of the generated asm. Do it only when it changes + from function to function and verbose assembly is requested. */ + const struct processor *this_tune +- = aarch64_get_tune_cpu (targ_options->x_explicit_tune_core); ++ = aarch64_get_tune_cpu (targ_options->x_selected_tune); + + if (flag_debug_asm && aarch64_last_printed_tune_string != this_tune->name) + { +@@ -22952,7 +22898,7 @@ aarch64_start_file (void) + = TREE_TARGET_OPTION (target_option_default_node); + + const struct processor *default_arch +- = aarch64_get_arch (default_options->x_explicit_arch); ++ = aarch64_get_arch (default_options->x_selected_arch); + uint64_t default_isa_flags = default_options->x_aarch64_isa_flags; + std::string extension + = aarch64_get_extension_string_for_isa_flags (default_isa_flags, +@@ -27950,9 +27896,6 @@ aarch64_libgcc_floating_mode_supported_p + #undef TARGET_OFFLOAD_OPTIONS + #define TARGET_OFFLOAD_OPTIONS aarch64_offload_options + +-#undef TARGET_OPTION_SAVE +-#define TARGET_OPTION_SAVE aarch64_option_save +- + #undef TARGET_OPTION_RESTORE + #define TARGET_OPTION_RESTORE aarch64_option_restore + +diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h +index 14e2af054..7d73689e4 100644 +--- a/gcc/config/aarch64/aarch64.h ++++ b/gcc/config/aarch64/aarch64.h +@@ -144,9 +144,6 @@ + + #define PCC_BITFIELD_TYPE_MATTERS 1 + +-/* Major revision number of the ARM Architecture implemented by the target. */ +-extern unsigned aarch64_architecture_version; +- + /* Instruction tuning/selection flags. */ + + /* Bit values used to identify processor capabilities. */ +diff --git a/gcc/config/aarch64/aarch64.opt b/gcc/config/aarch64/aarch64.opt +index 101664c7c..836a3c784 100644 +--- a/gcc/config/aarch64/aarch64.opt ++++ b/gcc/config/aarch64/aarch64.opt +@@ -22,13 +22,10 @@ HeaderInclude + config/aarch64/aarch64-opts.h + + TargetVariable +-enum aarch64_processor explicit_tune_core = aarch64_none ++enum aarch64_processor selected_tune = aarch64_none + + TargetVariable +-enum aarch64_arch explicit_arch = aarch64_no_arch +- +-TargetSave +-const char *x_aarch64_override_tune_string ++enum aarch64_arch selected_arch = aarch64_no_arch + + TargetVariable + uint64_t aarch64_isa_flags = 0 +@@ -36,6 +33,9 @@ uint64_t aarch64_isa_flags = 0 + TargetVariable + unsigned aarch64_enable_bti = 2 + ++TargetVariable ++enum aarch64_key_type aarch64_ra_sign_key = AARCH64_KEY_A ++ + ; The TLS dialect names to use with -mtls-dialect. + + Enum +@@ -139,7 +139,7 @@ Target RejectNegative Joined Enum(aarch64_abi) Var(aarch64_abi) Init(AARCH64_ABI + Generate code that conforms to the specified ABI. + + moverride= +-Target RejectNegative ToLower Joined Var(aarch64_override_tune_string) ++Target RejectNegative ToLower Joined Var(aarch64_override_tune_string) Save + -moverride=<string> Power users only! Override CPU optimization parameters. + + Enum +-- +2.33.0 +
View file
_service:tar_scm:0105-LoongArch-Use-enums-for-constants.patch
Added
@@ -0,0 +1,181 @@ +From 907b35525c8abcdfe22152ebce6640dbe3905cce Mon Sep 17 00:00:00 2001 +From: Yang Yujie <yangyujie@loongson.cn> +Date: Mon, 8 Jan 2024 09:14:09 +0800 +Subject: PATCH 105/188 LoongArch: Use enums for constants + +Target features constants from loongarch-def.h are currently defined as macros. +Switch to enums for better look in the debugger. + +gcc/ChangeLog: + + * config/loongarch/loongarch-def.h: Define constants with + enums instead of Macros. +--- + gcc/config/loongarch/loongarch-def.h | 115 ++++++++++++++++----------- + 1 file changed, 67 insertions(+), 48 deletions(-) + +diff --git a/gcc/config/loongarch/loongarch-def.h b/gcc/config/loongarch/loongarch-def.h +index a133ea265..28da3ae5f 100644 +--- a/gcc/config/loongarch/loongarch-def.h ++++ b/gcc/config/loongarch/loongarch-def.h +@@ -23,12 +23,10 @@ along with GCC; see the file COPYING3. If not see + - ISA extensions (isa_ext), + - base ABI types (abi_base), + - ABI extension types (abi_ext). +- +- - code models (cmodel) +- - other command-line switches (switch) ++ - code models (cmodel) + + These values are primarily used for implementing option handling +- logic in "loongarch.opt", "loongarch-driver.c" and "loongarch-opt.c". ++ logic in "loongarch.opt", "loongarch-driver.cc" and "loongarch-opt.cc". + + As for the result of this option handling process, the following + scheme is adopted to represent the final configuration: +@@ -53,30 +51,40 @@ along with GCC; see the file COPYING3. If not see + #include "loongarch-def-array.h" + #include "loongarch-tune.h" + +-/* enum isa_base */ + +-/* LoongArch64 */ +-#define ISA_BASE_LA64 0 +-#define N_ISA_BASE_TYPES 1 ++/* ISA base */ ++enum { ++ ISA_BASE_LA64 = 0, /* LoongArch64 */ ++ N_ISA_BASE_TYPES = 1 ++}; ++ + extern loongarch_def_array<const char *, N_ISA_BASE_TYPES> + loongarch_isa_base_strings; + +-/* enum isa_ext_* */ +-#define ISA_EXT_NONE 0 +-#define ISA_EXT_FPU32 1 +-#define ISA_EXT_FPU64 2 +-#define N_ISA_EXT_FPU_TYPES 3 +-#define ISA_EXT_SIMD_LSX 3 +-#define ISA_EXT_SIMD_LASX 4 +-#define N_ISA_EXT_TYPES 5 ++ ++/* ISA extensions */ ++enum { ++ ISA_EXT_NONE = 0, ++ ISA_EXT_FPU32 = 1, ++ ISA_EXT_FPU64 = 2, ++ N_ISA_EXT_FPU_TYPES = 3, ++ ISA_EXT_SIMD_LSX = 3, ++ ISA_EXT_SIMD_LASX = 4, ++ N_ISA_EXT_TYPES = 5 ++}; ++ + extern loongarch_def_array<const char *, N_ISA_EXT_TYPES> + loongarch_isa_ext_strings; + +-/* enum abi_base */ +-#define ABI_BASE_LP64D 0 +-#define ABI_BASE_LP64F 1 +-#define ABI_BASE_LP64S 2 +-#define N_ABI_BASE_TYPES 3 ++ ++/* Base ABI */ ++enum { ++ ABI_BASE_LP64D = 0, ++ ABI_BASE_LP64F = 1, ++ ABI_BASE_LP64S = 2, ++ N_ABI_BASE_TYPES = 3 ++}; ++ + extern loongarch_def_array<const char *, N_ABI_BASE_TYPES> + loongarch_abi_base_strings; + +@@ -90,28 +98,38 @@ extern loongarch_def_array<const char *, N_ABI_BASE_TYPES> + (abi_base == ABI_BASE_LP64S) + + +-/* enum abi_ext */ +-#define ABI_EXT_BASE 0 +-#define N_ABI_EXT_TYPES 1 ++/* ABI Extension */ ++enum { ++ ABI_EXT_BASE = 0, ++ N_ABI_EXT_TYPES = 1 ++}; ++ + extern loongarch_def_array<const char *, N_ABI_EXT_TYPES> + loongarch_abi_ext_strings; + +-/* enum cmodel */ +-#define CMODEL_NORMAL 0 +-#define CMODEL_TINY 1 +-#define CMODEL_TINY_STATIC 2 +-#define CMODEL_MEDIUM 3 +-#define CMODEL_LARGE 4 +-#define CMODEL_EXTREME 5 +-#define N_CMODEL_TYPES 6 ++ ++/* Code Model */ ++enum { ++ CMODEL_NORMAL = 0, ++ CMODEL_TINY = 1, ++ CMODEL_TINY_STATIC = 2, ++ CMODEL_MEDIUM = 3, ++ CMODEL_LARGE = 4, ++ CMODEL_EXTREME = 5, ++ N_CMODEL_TYPES = 6 ++}; ++ + extern loongarch_def_array<const char *, N_CMODEL_TYPES> + loongarch_cmodel_strings; + +-/* enum explicit_relocs */ +-#define EXPLICIT_RELOCS_AUTO 0 +-#define EXPLICIT_RELOCS_NONE 1 +-#define EXPLICIT_RELOCS_ALWAYS 2 +-#define N_EXPLICIT_RELOCS_TYPES 3 ++ ++/* Explicit Reloc Type */ ++enum { ++ EXPLICIT_RELOCS_AUTO = 0, ++ EXPLICIT_RELOCS_NONE = 1, ++ EXPLICIT_RELOCS_ALWAYS = 2, ++ N_EXPLICIT_RELOCS_TYPES = 3 ++}; + + /* The common default value for variables whose assignments + are triggered by command-line options. */ +@@ -159,17 +177,18 @@ struct loongarch_target + int cmodel; /* CMODEL_ */ + }; + +-/* CPU properties. */ +-/* index */ +-#define CPU_NATIVE 0 +-#define CPU_ABI_DEFAULT 1 +-#define CPU_LOONGARCH64 2 +-#define CPU_LA464 3 +-#define CPU_LA664 4 +-#define N_ARCH_TYPES 5 +-#define N_TUNE_TYPES 5 +- +-/* parallel tables. */ ++/* CPU model */ ++enum { ++ CPU_NATIVE = 0, ++ CPU_ABI_DEFAULT = 1, ++ CPU_LOONGARCH64 = 2, ++ CPU_LA464 = 3, ++ CPU_LA664 = 4, ++ N_ARCH_TYPES = 5, ++ N_TUNE_TYPES = 5 ++}; ++ ++/* CPU model properties */ + extern loongarch_def_array<const char *, N_ARCH_TYPES> + loongarch_cpu_strings; + extern loongarch_def_array<loongarch_isa, N_ARCH_TYPES> +-- +2.43.0 +
View file
_service:tar_scm:0105-aarch64-Rename-hard-fp-offset-to-bytes-above-hard-fp.patch
Deleted
@@ -1,148 +0,0 @@ -From 3fbf0789202b30a67b12e1fb785c7130f098d665 Mon Sep 17 00:00:00 2001 -From: Richard Sandiford <richard.sandiford@arm.com> -Date: Tue, 12 Sep 2023 16:08:52 +0100 -Subject: PATCH aarch64: Rename hard_fp_offset to bytes_above_hard_fp -MIME-Version: 1.0 -Content-Type: text/plain; charset=utf8 -Content-Transfer-Encoding: 8bit - -Similarly to the previous locals_offset patch, hard_fp_offset -was described as: - - /* Offset from the base of the frame (incomming SP) to the - hard_frame_pointer. This value is always a multiple of - STACK_BOUNDARY. */ - poly_int64 hard_fp_offset; - -which again took an âupside-downâ view: higher offsets meant lower -addresses. This patch renames the field to bytes_above_hard_fp instead. - -gcc/ - * config/aarch64/aarch64.h (aarch64_frame::hard_fp_offset): Rename - to... - (aarch64_frame::bytes_above_hard_fp): ...this. - * config/aarch64/aarch64.cc (aarch64_layout_frame) - (aarch64_expand_prologue): Update accordingly. - (aarch64_initial_elimination_offset): Likewise. ---- - gcc/config/aarch64/aarch64.cc | 26 +++++++++++++------------- - gcc/config/aarch64/aarch64.h | 6 +++--- - 2 files changed, 16 insertions(+), 16 deletions(-) - -diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc -index d4ec352ba98a..3c4052740e7a 100644 ---- a/gcc/config/aarch64/aarch64.cc -+++ b/gcc/config/aarch64/aarch64.cc -@@ -8329,7 +8329,7 @@ aarch64_layout_frame (void) - + get_frame_size (), - STACK_BOUNDARY / BITS_PER_UNIT); - -- frame.hard_fp_offset -+ frame.bytes_above_hard_fp - = saved_regs_and_above - frame.below_hard_fp_saved_regs_size; - - /* Both these values are already aligned. */ -@@ -8378,13 +8378,13 @@ aarch64_layout_frame (void) - else if (frame.wb_pop_candidate1 != INVALID_REGNUM) - max_push_offset = 256; - -- HOST_WIDE_INT const_size, const_below_saved_regs, const_fp_offset; -+ HOST_WIDE_INT const_size, const_below_saved_regs, const_above_fp; - HOST_WIDE_INT const_saved_regs_size; - if (known_eq (frame.saved_regs_size, 0)) - frame.initial_adjust = frame.frame_size; - else if (frame.frame_size.is_constant (&const_size) - && const_size < max_push_offset -- && known_eq (frame.hard_fp_offset, const_size)) -+ && known_eq (frame.bytes_above_hard_fp, const_size)) - { - /* Simple, small frame with no data below the saved registers. - -@@ -8401,8 +8401,8 @@ aarch64_layout_frame (void) - case that it hardly seems worth the effort though. */ - && (!saves_below_hard_fp_p || const_below_saved_regs == 0) - && !(cfun->calls_alloca -- && frame.hard_fp_offset.is_constant (&const_fp_offset) -- && const_fp_offset < max_push_offset)) -+ && frame.bytes_above_hard_fp.is_constant (&const_above_fp) -+ && const_above_fp < max_push_offset)) - { - /* Frame with small area below the saved registers: - -@@ -8420,12 +8420,12 @@ aarch64_layout_frame (void) - sub sp, sp, hard_fp_offset + below_hard_fp_saved_regs_size - save SVE registers relative to SP - sub sp, sp, bytes_below_saved_regs */ -- frame.initial_adjust = (frame.hard_fp_offset -+ frame.initial_adjust = (frame.bytes_above_hard_fp - + frame.below_hard_fp_saved_regs_size); - frame.final_adjust = frame.bytes_below_saved_regs; - } -- else if (frame.hard_fp_offset.is_constant (&const_fp_offset) -- && const_fp_offset < max_push_offset) -+ else if (frame.bytes_above_hard_fp.is_constant (&const_above_fp) -+ && const_above_fp < max_push_offset) - { - /* Frame with large area below the saved registers, or with SVE saves, - but with a small area above: -@@ -8435,7 +8435,7 @@ aarch64_layout_frame (void) - sub sp, sp, below_hard_fp_saved_regs_size - save SVE registers relative to SP - sub sp, sp, bytes_below_saved_regs */ -- frame.callee_adjust = const_fp_offset; -+ frame.callee_adjust = const_above_fp; - frame.sve_callee_adjust = frame.below_hard_fp_saved_regs_size; - frame.final_adjust = frame.bytes_below_saved_regs; - } -@@ -8450,7 +8450,7 @@ aarch64_layout_frame (void) - sub sp, sp, below_hard_fp_saved_regs_size - save SVE registers relative to SP - sub sp, sp, bytes_below_saved_regs */ -- frame.initial_adjust = frame.hard_fp_offset; -+ frame.initial_adjust = frame.bytes_above_hard_fp; - frame.sve_callee_adjust = frame.below_hard_fp_saved_regs_size; - frame.final_adjust = frame.bytes_below_saved_regs; - } -@@ -9754,7 +9754,7 @@ aarch64_expand_prologue (void) - { - /* The offset of the frame chain record (if any) from the current SP. */ - poly_int64 chain_offset = (initial_adjust + callee_adjust -- - frame.hard_fp_offset); -+ - frame.bytes_above_hard_fp); - gcc_assert (known_ge (chain_offset, 0)); - - if (callee_adjust == 0) -@@ -12575,10 +12575,10 @@ aarch64_initial_elimination_offset (unsigned from, unsigned to) - if (to == HARD_FRAME_POINTER_REGNUM) - { - if (from == ARG_POINTER_REGNUM) -- return frame.hard_fp_offset; -+ return frame.bytes_above_hard_fp; - - if (from == FRAME_POINTER_REGNUM) -- return frame.hard_fp_offset - frame.bytes_above_locals; -+ return frame.bytes_above_hard_fp - frame.bytes_above_locals; - } - - if (to == STACK_POINTER_REGNUM) -diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h -index bf46e6124aa9..dd1f403f9393 100644 ---- a/gcc/config/aarch64/aarch64.h -+++ b/gcc/config/aarch64/aarch64.h -@@ -890,10 +890,10 @@ struct GTY (()) aarch64_frame - STACK_BOUNDARY. */ - poly_int64 bytes_above_locals; - -- /* Offset from the base of the frame (incomming SP) to the -- hard_frame_pointer. This value is always a multiple of -+ /* The number of bytes between the hard_frame_pointer and the top of -+ the frame (the incomming SP). This value is always a multiple of - STACK_BOUNDARY. */ -- poly_int64 hard_fp_offset; -+ poly_int64 bytes_above_hard_fp; - - /* The size of the frame. This value is the offset from base of the - frame (incomming SP) to the stack_pointer. This value is always --- -2.43.5 -
View file
_service:tar_scm:0106-Backport-SME-aarch64-Add-march-support-for-Armv9.1-A.patch
Added
@@ -0,0 +1,108 @@ +From 0bfb7b0b745d0a9af13772ad48ccc102e557f95a Mon Sep 17 00:00:00 2001 +From: Kyrylo Tkachov <kyrylo.tkachov@arm.com> +Date: Mon, 26 Sep 2022 10:10:25 +0100 +Subject: PATCH 007/157 BackportSME aarch64: Add -march support for + Armv9.1-A, Armv9.2-A, Armv9.3-A + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=c33e12fa479c01848f4a288883bf1ef848c94ca3 + +This is a straightforward patch that allows targeting the architecture revisions mentioned in the subject +through -march. These are already supported in binutils. + +Bootstrapped and tested on aarch64-none-linux-gnu. + +gcc/ChangeLog: + + * config/aarch64/aarch64-arches.def (armv9.1-a): Define. + (armv9.2-a): Likewise. + (armv9.3-a): Likewise. + * config/aarch64/aarch64.h (AARCH64_FL_V9_1): Likewise. + (AARCH64_FL_V9_2): Likewise. + (AARCH64_FL_V9_3): Likewise. + (AARCH64_FL_FOR_ARCH9_1): Likewise. + (AARCH64_FL_FOR_ARCH9_2): Likewise. + (AARCH64_FL_FOR_ARCH9_3): Likewise. + (AARCH64_ISA_V9_1): Likewise. + (AARCH64_ISA_V9_2): Likewise. + (AARCH64_ISA_V9_3): Likewise. + * doc/invoke.texi (AArch64 Options): Document armv9.1-a, armv9.2-a, + armv9.3-a values to -march. +--- + gcc/config/aarch64/aarch64-arches.def | 3 +++ + gcc/config/aarch64/aarch64.h | 18 ++++++++++++++++++ + gcc/doc/invoke.texi | 3 +++ + 3 files changed, 24 insertions(+) + +diff --git a/gcc/config/aarch64/aarch64-arches.def b/gcc/config/aarch64/aarch64-arches.def +index 3c2b16588..6150448dc 100644 +--- a/gcc/config/aarch64/aarch64-arches.def ++++ b/gcc/config/aarch64/aarch64-arches.def +@@ -41,5 +41,8 @@ AARCH64_ARCH("armv8.7-a", generic, 8_7A, 8, AARCH64_FL_FOR_ARCH8 + AARCH64_ARCH("armv8.8-a", generic, 8_8A, 8, AARCH64_FL_FOR_ARCH8_8) + AARCH64_ARCH("armv8-r", generic, 8R , 8, AARCH64_FL_FOR_ARCH8_R) + AARCH64_ARCH("armv9-a", generic, 9A , 9, AARCH64_FL_FOR_ARCH9) ++AARCH64_ARCH("armv9.1-a", generic, 9_1A, 9, AARCH64_FL_FOR_ARCH9_1) ++AARCH64_ARCH("armv9.2-a", generic, 9_2A, 9, AARCH64_FL_FOR_ARCH9_2) ++AARCH64_ARCH("armv9.3-a", generic, 9_3A, 9, AARCH64_FL_FOR_ARCH9_3) + + #undef AARCH64_ARCH +diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h +index 7d73689e4..42aae37ef 100644 +--- a/gcc/config/aarch64/aarch64.h ++++ b/gcc/config/aarch64/aarch64.h +@@ -239,6 +239,15 @@ + /* Armv8.8-a architecture extensions. */ + #define AARCH64_FL_V8_8 (1ULL << 45) + ++/* Armv9.1-A. */ ++#define AARCH64_FL_V9_1 (1ULL << 46) ++ ++/* Armv9.2-A. */ ++#define AARCH64_FL_V9_2 (1ULL << 47) ++ ++/* Armv9.3-A. */ ++#define AARCH64_FL_V9_3 (1ULL << 48) ++ + /* Has FP and SIMD. */ + #define AARCH64_FL_FPSIMD (AARCH64_FL_FP | AARCH64_FL_SIMD) + +@@ -274,6 +283,12 @@ + #define AARCH64_FL_FOR_ARCH9 \ + (AARCH64_FL_FOR_ARCH8_5 | AARCH64_FL_SVE | AARCH64_FL_SVE2 | AARCH64_FL_V9 \ + | AARCH64_FL_F16) ++#define AARCH64_FL_FOR_ARCH9_1 \ ++ (AARCH64_FL_FOR_ARCH9 | AARCH64_FL_FOR_ARCH8_6 | AARCH64_FL_V9_1) ++#define AARCH64_FL_FOR_ARCH9_2 \ ++ (AARCH64_FL_FOR_ARCH9_1 | AARCH64_FL_FOR_ARCH8_7 | AARCH64_FL_V9_2) ++#define AARCH64_FL_FOR_ARCH9_3 \ ++ (AARCH64_FL_FOR_ARCH9_2 | AARCH64_FL_FOR_ARCH8_8 | AARCH64_FL_V9_3) + + /* Macros to test ISA flags. */ + +@@ -314,6 +329,9 @@ + #define AARCH64_ISA_V8_R (aarch64_isa_flags & AARCH64_FL_V8_R) + #define AARCH64_ISA_PAUTH (aarch64_isa_flags & AARCH64_FL_PAUTH) + #define AARCH64_ISA_V9 (aarch64_isa_flags & AARCH64_FL_V9) ++#define AARCH64_ISA_V9_1 (aarch64_isa_flags & AARCH64_FL_V9_1) ++#define AARCH64_ISA_V9_2 (aarch64_isa_flags & AARCH64_FL_V9_2) ++#define AARCH64_ISA_V9_3 (aarch64_isa_flags & AARCH64_FL_V9_3) + #define AARCH64_ISA_MOPS (aarch64_isa_flags & AARCH64_FL_MOPS) + #define AARCH64_ISA_LS64 (aarch64_isa_flags & AARCH64_FL_LS64) + +diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi +index 17d9e4126..53709b246 100644 +--- a/gcc/doc/invoke.texi ++++ b/gcc/doc/invoke.texi +@@ -19176,6 +19176,9 @@ and the features that they enable by default: + @item @samp{armv8.7-a} @tab Armv8.7-A @tab @samp{armv8.6-a}, @samp{+ls64} + @item @samp{armv8.8-a} @tab Armv8.8-a @tab @samp{armv8.7-a}, @samp{+mops} + @item @samp{armv9-a} @tab Armv9-A @tab @samp{armv8.5-a}, @samp{+sve}, @samp{+sve2} ++@item @samp{armv9.1-a} @tab Armv9.1-A @tab @samp{armv9-a}, @samp{+bf16}, @samp{+i8mm} ++@item @samp{armv9.2-a} @tab Armv9.2-A @tab @samp{armv9.1-a}, @samp{+ls64} ++@item @samp{armv9.3-a} @tab Armv9.3-A @tab @samp{armv9.2-a}, @samp{+mops} + @item @samp{armv8-r} @tab Armv8-R @tab @samp{armv8-r} + @end multitable + +-- +2.33.0 +
View file
_service:tar_scm:0106-LoongArch-Simplify-mexplicit-reloc-definitions.patch
Added
@@ -0,0 +1,124 @@ +From dc572aebb3a2c9062014ec50764bbc702dbb8a20 Mon Sep 17 00:00:00 2001 +From: Yang Yujie <yangyujie@loongson.cn> +Date: Mon, 8 Jan 2024 09:14:10 +0800 +Subject: PATCH 106/188 LoongArch: Simplify -mexplicit-reloc definitions + +Since we do not need printing or manual parsing of this option, +(whether in the driver or for target attributes to be supported later) +it can be handled in the .opt file framework. + +gcc/ChangeLog: + + * config/loongarch/genopts/loongarch-strings: Remove explicit-reloc + argument string definitions. + * config/loongarch/loongarch-str.h: Same. + * config/loongarch/genopts/loongarch.opt.in: Mark -mno-explicit-relocs + as aliases to -mexplicit-relocs={always,none} + * config/loongarch/loongarch.opt: Regenerate. + * config/loongarch/loongarch.cc: Same. +--- + gcc/config/loongarch/genopts/loongarch-strings | 6 ------ + gcc/config/loongarch/genopts/loongarch.opt.in | 8 ++++---- + gcc/config/loongarch/loongarch-str.h | 5 ----- + gcc/config/loongarch/loongarch.cc | 12 ------------ + gcc/config/loongarch/loongarch.opt | 2 +- + 5 files changed, 5 insertions(+), 28 deletions(-) + +diff --git a/gcc/config/loongarch/genopts/loongarch-strings b/gcc/config/loongarch/genopts/loongarch-strings +index ce70b8b9c..99fd4e7cd 100644 +--- a/gcc/config/loongarch/genopts/loongarch-strings ++++ b/gcc/config/loongarch/genopts/loongarch-strings +@@ -64,9 +64,3 @@ STR_CMODEL_TS tiny-static + STR_CMODEL_MEDIUM medium + STR_CMODEL_LARGE large + STR_CMODEL_EXTREME extreme +- +-# -mexplicit-relocs +-OPTSTR_EXPLICIT_RELOCS explicit-relocs +-STR_EXPLICIT_RELOCS_AUTO auto +-STR_EXPLICIT_RELOCS_NONE none +-STR_EXPLICIT_RELOCS_ALWAYS always +diff --git a/gcc/config/loongarch/genopts/loongarch.opt.in b/gcc/config/loongarch/genopts/loongarch.opt.in +index 851d8d1f3..f2055b55e 100644 +--- a/gcc/config/loongarch/genopts/loongarch.opt.in ++++ b/gcc/config/loongarch/genopts/loongarch.opt.in +@@ -181,20 +181,20 @@ Name(explicit_relocs) Type(int) + The code model option names for -mexplicit-relocs: + + EnumValue +-Enum(explicit_relocs) String(@@STR_EXPLICIT_RELOCS_AUTO@@) Value(EXPLICIT_RELOCS_AUTO) ++Enum(explicit_relocs) String(auto) Value(EXPLICIT_RELOCS_AUTO) + + EnumValue +-Enum(explicit_relocs) String(@@STR_EXPLICIT_RELOCS_NONE@@) Value(EXPLICIT_RELOCS_NONE) ++Enum(explicit_relocs) String(none) Value(EXPLICIT_RELOCS_NONE) + + EnumValue +-Enum(explicit_relocs) String(@@STR_EXPLICIT_RELOCS_ALWAYS@@) Value(EXPLICIT_RELOCS_ALWAYS) ++Enum(explicit_relocs) String(always) Value(EXPLICIT_RELOCS_ALWAYS) + + mexplicit-relocs= + Target RejectNegative Joined Enum(explicit_relocs) Var(la_opt_explicit_relocs) Init(M_OPT_UNSET) + Use %reloc() assembly operators. + + mexplicit-relocs +-Target Var(la_opt_explicit_relocs_backward) Init(M_OPT_UNSET) ++Target Alias(mexplicit-relocs=, always, none) + Use %reloc() assembly operators (for backward compatibility). + + mrecip +diff --git a/gcc/config/loongarch/loongarch-str.h b/gcc/config/loongarch/loongarch-str.h +index 2251df38b..cacae38c0 100644 +--- a/gcc/config/loongarch/loongarch-str.h ++++ b/gcc/config/loongarch/loongarch-str.h +@@ -63,11 +63,6 @@ along with GCC; see the file COPYING3. If not see + #define STR_CMODEL_LARGE "large" + #define STR_CMODEL_EXTREME "extreme" + +-#define OPTSTR_EXPLICIT_RELOCS "explicit-relocs" +-#define STR_EXPLICIT_RELOCS_AUTO "auto" +-#define STR_EXPLICIT_RELOCS_NONE "none" +-#define STR_EXPLICIT_RELOCS_ALWAYS "always" +- + #define OPTSTR_FRECIPE "frecipe" + #define OPTSTR_DIV32 "div32" + #define OPTSTR_LAM_BH "lam-bh" +diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc +index b0bb67d60..8cd703caa 100644 +--- a/gcc/config/loongarch/loongarch.cc ++++ b/gcc/config/loongarch/loongarch.cc +@@ -7518,18 +7518,6 @@ loongarch_option_override_internal (struct gcc_options *opts, + loongarch_update_gcc_opt_status (&la_target, opts, opts_set); + loongarch_cpu_option_override (&la_target, opts, opts_set); + +- if (la_opt_explicit_relocs != M_OPT_UNSET +- && la_opt_explicit_relocs_backward != M_OPT_UNSET) +- error ("do not use %qs (with %qs) and %qs (without %qs) together", +- "-mexplicit-relocs=", "=", +- la_opt_explicit_relocs_backward ? "-mexplicit-relocs" +- : "-mno-explicit-relocs", "="); +- +- if (la_opt_explicit_relocs_backward != M_OPT_UNSET) +- la_opt_explicit_relocs = (la_opt_explicit_relocs_backward +- ? EXPLICIT_RELOCS_ALWAYS +- : EXPLICIT_RELOCS_NONE); +- + if (la_opt_explicit_relocs == M_OPT_UNSET) + la_opt_explicit_relocs = (HAVE_AS_EXPLICIT_RELOCS + ? (loongarch_mrelax +diff --git a/gcc/config/loongarch/loongarch.opt b/gcc/config/loongarch/loongarch.opt +index df7314973..d6e337ac2 100644 +--- a/gcc/config/loongarch/loongarch.opt ++++ b/gcc/config/loongarch/loongarch.opt +@@ -202,7 +202,7 @@ Target RejectNegative Joined Enum(explicit_relocs) Var(la_opt_explicit_relocs) I + Use %reloc() assembly operators. + + mexplicit-relocs +-Target Var(la_opt_explicit_relocs_backward) Init(M_OPT_UNSET) ++Target Alias(mexplicit-relocs=, always, none) + Use %reloc() assembly operators (for backward compatibility). + + mrecip +-- +2.43.0 +
View file
_service:tar_scm:0106-aarch64-Tweak-frame-size-comment.patch
Deleted
@@ -1,35 +0,0 @@ -From aac8b31379ac3bbd14fc6427dce23f56e54e8485 Mon Sep 17 00:00:00 2001 -From: Richard Sandiford <richard.sandiford@arm.com> -Date: Tue, 12 Sep 2023 16:08:52 +0100 -Subject: PATCH aarch64: Tweak frame_size comment -MIME-Version: 1.0 -Content-Type: text/plain; charset=utf8 -Content-Transfer-Encoding: 8bit - -This patch fixes another case in which a value was described with -an âupside-downâ view. - -gcc/ - * config/aarch64/aarch64.h (aarch64_frame::frame_size): Tweak comment. ---- - gcc/config/aarch64/aarch64.h | 4 ++-- - 1 file changed, 2 insertions(+), 2 deletions(-) - -diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h -index dd1f403f9393..700524ae22bf 100644 ---- a/gcc/config/aarch64/aarch64.h -+++ b/gcc/config/aarch64/aarch64.h -@@ -895,8 +895,8 @@ struct GTY (()) aarch64_frame - STACK_BOUNDARY. */ - poly_int64 bytes_above_hard_fp; - -- /* The size of the frame. This value is the offset from base of the -- frame (incomming SP) to the stack_pointer. This value is always -+ /* The size of the frame, i.e. the number of bytes between the bottom -+ of the outgoing arguments and the incoming SP. This value is always - a multiple of STACK_BOUNDARY. */ - poly_int64 frame_size; - --- -2.43.5 -
View file
_service:tar_scm:0107-Backport-SME-Revert-aarch64-Define-__ARM_FEATURE_RCP.patch
Added
@@ -0,0 +1,112 @@ +From b36c8c41cab42d3df45197bb287f06381d660001 Mon Sep 17 00:00:00 2001 +From: xiezhiheng <xiezhiheng@huawei.com> +Date: Mon, 19 Feb 2024 19:27:29 +0800 +Subject: PATCH 008/157 BackportSME Revert "aarch64: Define + __ARM_FEATURE_RCPC" + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=40a727379f3e8e6a83aea4e94c38dfa5dd8ef33d + +Revert this commit to solve conflicts with later patches, +and will apply it later. +--- + gcc/config/aarch64/aarch64-c.cc | 1 - + gcc/config/aarch64/aarch64-cores.def | 10 +++++----- + gcc/config/aarch64/aarch64.h | 4 +--- + .../gcc.target/aarch64/pragma_cpp_predefs_1.c | 20 ------------------- + 4 files changed, 6 insertions(+), 29 deletions(-) + +diff --git a/gcc/config/aarch64/aarch64-c.cc b/gcc/config/aarch64/aarch64-c.cc +index 90d45e45d..3d2fb5ec2 100644 +--- a/gcc/config/aarch64/aarch64-c.cc ++++ b/gcc/config/aarch64/aarch64-c.cc +@@ -202,7 +202,6 @@ aarch64_update_cpp_builtins (cpp_reader *pfile) + "__ARM_FEATURE_BF16_SCALAR_ARITHMETIC", pfile); + aarch64_def_or_undef (TARGET_LS64, + "__ARM_FEATURE_LS64", pfile); +- aarch64_def_or_undef (AARCH64_ISA_RCPC, "__ARM_FEATURE_RCPC", pfile); + + /* Not for ACLE, but required to keep "float.h" correct if we switch + target between implementations that do or do not support ARMv8.2-A +diff --git a/gcc/config/aarch64/aarch64-cores.def b/gcc/config/aarch64/aarch64-cores.def +index 70b11eb80..842d64932 100644 +--- a/gcc/config/aarch64/aarch64-cores.def ++++ b/gcc/config/aarch64/aarch64-cores.def +@@ -134,17 +134,17 @@ AARCH64_CORE("tsv110", tsv110, tsv110, 8_2A, AARCH64_FL_FOR_ARCH8_2 | AARCH64_ + /* ARMv8.3-A Architecture Processors. */ + + /* Marvell cores (TX3). */ +-AARCH64_CORE("thunderx3t110", thunderx3t110, thunderx3t110, 8_3A, AARCH64_FL_FOR_ARCH8_3 | AARCH64_FL_CRYPTO | AARCH64_FL_SM4 | AARCH64_FL_SHA3 | AARCH64_FL_F16FML | AARCH64_FL_RCPC8_4, thunderx3t110, 0x43, 0x0b8, 0x0a) ++AARCH64_CORE("thunderx3t110", thunderx3t110, thunderx3t110, 8_3A, AARCH64_FL_FOR_ARCH8_3 | AARCH64_FL_CRYPTO | AARCH64_FL_RCPC | AARCH64_FL_SM4 | AARCH64_FL_SHA3 | AARCH64_FL_F16FML | AARCH64_FL_RCPC8_4, thunderx3t110, 0x43, 0x0b8, 0x0a) + + /* ARMv8.4-A Architecture Processors. */ + + /* Arm ('A') cores. */ +-AARCH64_CORE("zeus", zeus, cortexa57, 8_4A, AARCH64_FL_FOR_ARCH8_4 | AARCH64_FL_SVE | AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_F16 | AARCH64_FL_PROFILE | AARCH64_FL_SSBS | AARCH64_FL_RNG, neoversev1, 0x41, 0xd40, -1) +-AARCH64_CORE("neoverse-v1", neoversev1, cortexa57, 8_4A, AARCH64_FL_FOR_ARCH8_4 | AARCH64_FL_SVE | AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_F16 | AARCH64_FL_PROFILE | AARCH64_FL_SSBS | AARCH64_FL_RNG, neoversev1, 0x41, 0xd40, -1) +-AARCH64_CORE("neoverse-512tvb", neoverse512tvb, cortexa57, 8_4A, AARCH64_FL_FOR_ARCH8_4 | AARCH64_FL_SVE | AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_F16 | AARCH64_FL_PROFILE | AARCH64_FL_SSBS | AARCH64_FL_RNG, neoverse512tvb, INVALID_IMP, INVALID_CORE, -1) ++AARCH64_CORE("zeus", zeus, cortexa57, 8_4A, AARCH64_FL_FOR_ARCH8_4 | AARCH64_FL_SVE | AARCH64_FL_RCPC | AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_F16 | AARCH64_FL_PROFILE | AARCH64_FL_SSBS | AARCH64_FL_RNG, neoversev1, 0x41, 0xd40, -1) ++AARCH64_CORE("neoverse-v1", neoversev1, cortexa57, 8_4A, AARCH64_FL_FOR_ARCH8_4 | AARCH64_FL_SVE | AARCH64_FL_RCPC | AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_F16 | AARCH64_FL_PROFILE | AARCH64_FL_SSBS | AARCH64_FL_RNG, neoversev1, 0x41, 0xd40, -1) ++AARCH64_CORE("neoverse-512tvb", neoverse512tvb, cortexa57, 8_4A, AARCH64_FL_FOR_ARCH8_4 | AARCH64_FL_SVE | AARCH64_FL_RCPC | AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_F16 | AARCH64_FL_PROFILE | AARCH64_FL_SSBS | AARCH64_FL_RNG, neoverse512tvb, INVALID_IMP, INVALID_CORE, -1) + + /* Qualcomm ('Q') cores. */ +-AARCH64_CORE("saphira", saphira, saphira, 8_4A, AARCH64_FL_FOR_ARCH8_4 | AARCH64_FL_CRYPTO, saphira, 0x51, 0xC01, -1) ++AARCH64_CORE("saphira", saphira, saphira, 8_4A, AARCH64_FL_FOR_ARCH8_4 | AARCH64_FL_CRYPTO | AARCH64_FL_RCPC, saphira, 0x51, 0xC01, -1) + + /* ARMv8-A big.LITTLE implementations. */ + +diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h +index 42aae37ef..7c090c8f2 100644 +--- a/gcc/config/aarch64/aarch64.h ++++ b/gcc/config/aarch64/aarch64.h +@@ -262,8 +262,7 @@ + #define AARCH64_FL_FOR_ARCH8_2 \ + (AARCH64_FL_FOR_ARCH8_1 | AARCH64_FL_V8_2) + #define AARCH64_FL_FOR_ARCH8_3 \ +- (AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_V8_3 | AARCH64_FL_PAUTH \ +- | AARCH64_FL_RCPC) ++ (AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_V8_3 | AARCH64_FL_PAUTH) + #define AARCH64_FL_FOR_ARCH8_4 \ + (AARCH64_FL_FOR_ARCH8_3 | AARCH64_FL_V8_4 | AARCH64_FL_F16FML \ + | AARCH64_FL_DOTPROD | AARCH64_FL_RCPC8_4 | AARCH64_FL_FLAGM) +@@ -314,7 +313,6 @@ + #define AARCH64_ISA_SM4 (aarch64_isa_flags & AARCH64_FL_SM4) + #define AARCH64_ISA_SHA3 (aarch64_isa_flags & AARCH64_FL_SHA3) + #define AARCH64_ISA_F16FML (aarch64_isa_flags & AARCH64_FL_F16FML) +-#define AARCH64_ISA_RCPC (aarch64_isa_flags & AARCH64_FL_RCPC) + #define AARCH64_ISA_RCPC8_4 (aarch64_isa_flags & AARCH64_FL_RCPC8_4) + #define AARCH64_ISA_RNG (aarch64_isa_flags & AARCH64_FL_RNG) + #define AARCH64_ISA_V8_5 (aarch64_isa_flags & AARCH64_FL_V8_5) +diff --git a/gcc/testsuite/gcc.target/aarch64/pragma_cpp_predefs_1.c b/gcc/testsuite/gcc.target/aarch64/pragma_cpp_predefs_1.c +index 307fa3d67..bfb044f5d 100644 +--- a/gcc/testsuite/gcc.target/aarch64/pragma_cpp_predefs_1.c ++++ b/gcc/testsuite/gcc.target/aarch64/pragma_cpp_predefs_1.c +@@ -248,26 +248,6 @@ + #error "__ARM_FEATURE_CRC32 is not defined but should be!" + #endif + +-#pragma GCC target ("arch=armv8.2-a") +-#ifdef __ARM_FEATURE_RCPC +-#error "__ARM_FEATURE_RCPC is defined but should not be!" +-#endif +- +-#pragma GCC target ("arch=armv8.2-a+rcpc") +-#ifndef __ARM_FEATURE_RCPC +-#error "__ARM_FEATURE_RCPC is not defined but should be!" +-#endif +- +-#pragma GCC target ("+norcpc") +-#ifdef __ARM_FEATURE_RCPC +-#error "__ARM_FEATURE_RCPC is defined but should not be!" +-#endif +- +-#pragma GCC target ("arch=armv8.3-a") +-#ifndef __ARM_FEATURE_RCPC +-#error "__ARM_FEATURE_RCPC is not defined but should be!" +-#endif +- + int + foo (int a) + { +-- +2.33.0 +
View file
_service:tar_scm:0107-LoongArch-testsuite-Add-loongarch-support-to-slp-21..patch
Added
@@ -0,0 +1,35 @@ +From f90e31b6dc8c99f6670dee9a120c5dd9fa9a18d9 Mon Sep 17 00:00:00 2001 +From: chenxiaolong <chenxiaolong@loongson.cn> +Date: Wed, 10 Jan 2024 15:25:21 +0800 +Subject: PATCH 107/188 LoongArch: testsuite: Add loongarch support to + slp-21.c. + +The function of this test is to check that the compiler supports vectorization +using SLP and vec_{load/store/*}_lanes. However, vec_{load/store/*}_lanes are +not supported on LoongArch, such as the corresponding "st4/ld4" directives on +aarch64. + +gcc/testsuite/ChangeLog: + + * gcc.dg/vect/slp-21.c: Add loongarch. +--- + gcc/testsuite/gcc.dg/vect/slp-21.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/gcc/testsuite/gcc.dg/vect/slp-21.c b/gcc/testsuite/gcc.dg/vect/slp-21.c +index 4b83adb98..3b7e92fe8 100644 +--- a/gcc/testsuite/gcc.dg/vect/slp-21.c ++++ b/gcc/testsuite/gcc.dg/vect/slp-21.c +@@ -210,7 +210,7 @@ int main (void) + + Not all vect_perm targets support that, and it's a bit too specific to have + its own effective-target selector, so we just test targets directly. */ +-/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 4 "vect" { target { powerpc64*-*-* s390*-*-* } } } } */ +-/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" { target { vect_strided4 && { ! { powerpc64*-*-* s390*-*-* } } } } } } */ ++/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 4 "vect" { target { powerpc64*-*-* s390*-*-* loongarch*-*-* } } } } */ ++/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" { target { vect_strided4 && { ! { powerpc64*-*-* s390*-*-* loongarch*-*-* } } } } } } */ + /* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 0 "vect" { target { ! { vect_strided4 } } } } } */ + +-- +2.43.0 +
View file
_service:tar_scm:0107-aarch64-Measure-reg-offset-from-the-bottom-of-the-frame.patch
Deleted
@@ -1,195 +0,0 @@ -From 8d5506a8aeb8dd7e8b209a3663b07688478f76b9 Mon Sep 17 00:00:00 2001 -From: Richard Sandiford <richard.sandiford@arm.com> -Date: Tue, 12 Sep 2023 16:08:53 +0100 -Subject: PATCH aarch64: Measure reg_offset from the bottom of the frame - -reg_offset was measured from the bottom of the saved register area. -This made perfect sense with the original layout, since the bottom -of the saved register area was also the hard frame pointer address. -It became slightly less obvious with SVE, since we save SVE -registers below the hard frame pointer, but it still made sense. - -However, if we want to allow different frame layouts, it's more -convenient and obvious to measure reg_offset from the bottom of -the frame. After previous patches, it's also a slight simplification -in its own right. - -gcc/ - * config/aarch64/aarch64.h (aarch64_frame): Add comment above - reg_offset. - * config/aarch64/aarch64.cc (aarch64_layout_frame): Walk offsets - from the bottom of the frame, rather than the bottom of the saved - register area. Measure reg_offset from the bottom of the frame - rather than the bottom of the saved register area. - (aarch64_save_callee_saves): Update accordingly. - (aarch64_restore_callee_saves): Likewise. - (aarch64_get_separate_components): Likewise. - (aarch64_process_components): Likewise. ---- - gcc/config/aarch64/aarch64.cc | 53 ++++++++++++++++------------------- - gcc/config/aarch64/aarch64.h | 3 ++ - 2 files changed, 27 insertions(+), 29 deletions(-) - -diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc -index 3c4052740e7a..97dd077844b4 100644 ---- a/gcc/config/aarch64/aarch64.cc -+++ b/gcc/config/aarch64/aarch64.cc -@@ -8139,7 +8139,6 @@ aarch64_needs_frame_chain (void) - static void - aarch64_layout_frame (void) - { -- poly_int64 offset = 0; - int regno, last_fp_reg = INVALID_REGNUM; - machine_mode vector_save_mode = aarch64_reg_save_mode (V8_REGNUM); - poly_int64 vector_save_size = GET_MODE_SIZE (vector_save_mode); -@@ -8217,7 +8216,9 @@ aarch64_layout_frame (void) - gcc_assert (crtl->is_leaf - || maybe_ne (frame.reg_offsetR30_REGNUM, SLOT_NOT_REQUIRED)); - -- frame.bytes_below_saved_regs = crtl->outgoing_args_size; -+ poly_int64 offset = crtl->outgoing_args_size; -+ gcc_assert (multiple_p (offset, STACK_BOUNDARY / BITS_PER_UNIT)); -+ frame.bytes_below_saved_regs = offset; - - /* Now assign stack slots for the registers. Start with the predicate - registers, since predicate LDR and STR have a relatively small -@@ -8229,7 +8230,8 @@ aarch64_layout_frame (void) - offset += BYTES_PER_SVE_PRED; - } - -- if (maybe_ne (offset, 0)) -+ poly_int64 saved_prs_size = offset - frame.bytes_below_saved_regs; -+ if (maybe_ne (saved_prs_size, 0)) - { - /* If we have any vector registers to save above the predicate registers, - the offset of the vector register save slots need to be a multiple -@@ -8247,10 +8249,10 @@ aarch64_layout_frame (void) - offset = aligned_upper_bound (offset, STACK_BOUNDARY / BITS_PER_UNIT); - else - { -- if (known_le (offset, vector_save_size)) -- offset = vector_save_size; -- else if (known_le (offset, vector_save_size * 2)) -- offset = vector_save_size * 2; -+ if (known_le (saved_prs_size, vector_save_size)) -+ offset = frame.bytes_below_saved_regs + vector_save_size; -+ else if (known_le (saved_prs_size, vector_save_size * 2)) -+ offset = frame.bytes_below_saved_regs + vector_save_size * 2; - else - gcc_unreachable (); - } -@@ -8267,9 +8269,10 @@ aarch64_layout_frame (void) - - /* OFFSET is now the offset of the hard frame pointer from the bottom - of the callee save area. */ -- bool saves_below_hard_fp_p = maybe_ne (offset, 0); -- frame.below_hard_fp_saved_regs_size = offset; -- frame.bytes_below_hard_fp = offset + frame.bytes_below_saved_regs; -+ frame.below_hard_fp_saved_regs_size = offset - frame.bytes_below_saved_regs; -+ bool saves_below_hard_fp_p -+ = maybe_ne (frame.below_hard_fp_saved_regs_size, 0); -+ frame.bytes_below_hard_fp = offset; - if (frame.emit_frame_chain) - { - /* FP and LR are placed in the linkage record. */ -@@ -8320,9 +8323,10 @@ aarch64_layout_frame (void) - - offset = aligned_upper_bound (offset, STACK_BOUNDARY / BITS_PER_UNIT); - -- frame.saved_regs_size = offset; -+ frame.saved_regs_size = offset - frame.bytes_below_saved_regs; - -- poly_int64 varargs_and_saved_regs_size = offset + frame.saved_varargs_size; -+ poly_int64 varargs_and_saved_regs_size -+ = frame.saved_regs_size + frame.saved_varargs_size; - - poly_int64 saved_regs_and_above - = aligned_upper_bound (varargs_and_saved_regs_size -@@ -8790,9 +8794,7 @@ aarch64_save_callee_saves (poly_int64 bytes_below_sp, - - machine_mode mode = aarch64_reg_save_mode (regno); - reg = gen_rtx_REG (mode, regno); -- offset = (frame.reg_offsetregno -- + frame.bytes_below_saved_regs -- - bytes_below_sp); -+ offset = frame.reg_offsetregno - bytes_below_sp; - rtx base_rtx = stack_pointer_rtx; - poly_int64 sp_offset = offset; - -@@ -8899,9 +8901,7 @@ aarch64_restore_callee_saves (poly_int64 bytes_below_sp, unsigned start, - - machine_mode mode = aarch64_reg_save_mode (regno); - reg = gen_rtx_REG (mode, regno); -- offset = (frame.reg_offsetregno -- + frame.bytes_below_saved_regs -- - bytes_below_sp); -+ offset = frame.reg_offsetregno - bytes_below_sp; - rtx base_rtx = stack_pointer_rtx; - if (mode == VNx2DImode && BYTES_BIG_ENDIAN) - aarch64_adjust_sve_callee_save_base (mode, base_rtx, anchor_reg, -@@ -9040,14 +9040,12 @@ aarch64_get_separate_components (void) - it as a stack probe for -fstack-clash-protection. */ - if (flag_stack_clash_protection - && maybe_ne (frame.below_hard_fp_saved_regs_size, 0) -- && known_eq (offset, 0)) -+ && known_eq (offset, frame.bytes_below_saved_regs)) - continue; - - /* Get the offset relative to the register we'll use. */ - if (frame_pointer_needed) -- offset -= frame.below_hard_fp_saved_regs_size; -- else -- offset += frame.bytes_below_saved_regs; -+ offset -= frame.bytes_below_hard_fp; - - /* Check that we can access the stack slot of the register with one - direct load with no adjustments needed. */ -@@ -9194,9 +9192,7 @@ aarch64_process_components (sbitmap components, bool prologue_p) - rtx reg = gen_rtx_REG (mode, regno); - poly_int64 offset = frame.reg_offsetregno; - if (frame_pointer_needed) -- offset -= frame.below_hard_fp_saved_regs_size; -- else -- offset += frame.bytes_below_saved_regs; -+ offset -= frame.bytes_below_hard_fp; - - rtx addr = plus_constant (Pmode, ptr_reg, offset); - rtx mem = gen_frame_mem (mode, addr); -@@ -9248,9 +9244,7 @@ aarch64_process_components (sbitmap components, bool prologue_p) - /* REGNO2 can be saved/restored in a pair with REGNO. */ - rtx reg2 = gen_rtx_REG (mode, regno2); - if (frame_pointer_needed) -- offset2 -= frame.below_hard_fp_saved_regs_size; -- else -- offset2 += frame.bytes_below_saved_regs; -+ offset2 -= frame.bytes_below_hard_fp; - rtx addr2 = plus_constant (Pmode, ptr_reg, offset2); - rtx mem2 = gen_frame_mem (mode, addr2); - rtx set2 = prologue_p ? gen_rtx_SET (mem2, reg2) -@@ -9366,7 +9360,8 @@ aarch64_allocate_and_probe_stack_space (rtx temp1, rtx temp2, - if (final_adjustment_p - && known_eq (frame.below_hard_fp_saved_regs_size, 0)) - { -- poly_int64 lr_offset = frame.reg_offsetLR_REGNUM; -+ poly_int64 lr_offset = (frame.reg_offsetLR_REGNUM -+ - frame.bytes_below_saved_regs); - if (known_ge (lr_offset, 0)) - min_probe_threshold -= lr_offset.to_constant (); - else -diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h -index 700524ae22bf..b61358370732 100644 ---- a/gcc/config/aarch64/aarch64.h -+++ b/gcc/config/aarch64/aarch64.h -@@ -860,6 +860,9 @@ extern enum aarch64_processor aarch64_tune; - #ifdef HAVE_POLY_INT_H - struct GTY (()) aarch64_frame - { -+ /* The offset from the bottom of the static frame (the bottom of the -+ outgoing arguments) of each register save slot, or -2 if no save is -+ needed. */ - poly_int64 reg_offsetLAST_SAVED_REGNUM + 1; - - /* The number of extra stack bytes taken up by register varargs. --- -2.43.5 -
View file
_service:tar_scm:0108-Backport-SME-Revert-Ampere-1-and-Ampere-1A-core-defi.patch
Added
@@ -0,0 +1,39 @@ +From 34374de5edde59f27a1b3b443e8a163fc5b528d7 Mon Sep 17 00:00:00 2001 +From: xiezhiheng <xiezhiheng@huawei.com> +Date: Tue, 20 Feb 2024 10:13:06 +0800 +Subject: PATCH 009/157 BackportSME Revert "Ampere-1 and Ampere-1A core + definition in aarch64-cores.def" + +Revert it to solve conflicts with later patches, and will apply it +later. It's introduced by commit 3668a59ae22a and e9f0d974600e. +--- + gcc/config/aarch64/aarch64-cores.def | 6 ++---- + 1 file changed, 2 insertions(+), 4 deletions(-) + +diff --git a/gcc/config/aarch64/aarch64-cores.def b/gcc/config/aarch64/aarch64-cores.def +index 842d64932..0402bfb74 100644 +--- a/gcc/config/aarch64/aarch64-cores.def ++++ b/gcc/config/aarch64/aarch64-cores.def +@@ -69,8 +69,7 @@ AARCH64_CORE("thunderxt81", thunderxt81, thunderx, 8A, AARCH64_FL_FOR_ARCH + AARCH64_CORE("thunderxt83", thunderxt83, thunderx, 8A, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx, 0x43, 0x0a3, -1) + + /* Ampere Computing ('\xC0') cores. */ +-AARCH64_CORE("ampere1", ampere1, cortexa57, 8_6A, AARCH64_FL_FOR_ARCH8_6 | AARCH64_FL_CRYPTO | AARCH64_FL_F16 | AARCH64_FL_RNG | AARCH64_FL_SHA3, ampere1, 0xC0, 0xac3, -1) +-AARCH64_CORE("ampere1a", ampere1a, cortexa57, 8_6A, AARCH64_FL_FOR_ARCH8_6 | AARCH64_FL_CRYPTO | AARCH64_FL_F16 | AARCH64_FL_RNG | AARCH64_FL_SHA3 | AARCH64_FL_MEMTAG, ampere1a, 0xC0, 0xac4, -1) ++AARCH64_CORE("ampere1", ampere1, cortexa57, 8_6A, AARCH64_FL_FOR_ARCH8_6, ampere1, 0xC0, 0xac3, -1) + /* Do not swap around "emag" and "xgene1", + this order is required to handle variant correctly. */ + AARCH64_CORE("emag", emag, xgene1, 8A, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, emag, 0x50, 0x000, 3) +@@ -164,8 +163,7 @@ AARCH64_CORE("cortex-r82", cortexr82, cortexa53, 8R, AARCH64_FL_FOR_ARCH8_R, cor + /* Armv9.0-A Architecture Processors. */ + + /* Arm ('A') cores. */ +-AARCH64_CORE("cortex-a510", cortexa510, cortexa55, 9A, AARCH64_FL_FOR_ARCH9 | AARCH64_FL_SVE2_BITPERM | AARCH64_FL_MEMTAG +- | AARCH64_FL_I8MM | AARCH64_FL_BF16, cortexa53, 0x41, 0xd46, -1) ++AARCH64_CORE("cortex-a510", cortexa510, cortexa55, 9A, AARCH64_FL_FOR_ARCH9 | AARCH64_FL_SVE2_BITPERM | AARCH64_FL_MEMTAG | AARCH64_FL_I8MM | AARCH64_FL_BF16, cortexa53, 0x41, 0xd46, -1) + + AARCH64_CORE("cortex-a710", cortexa710, cortexa57, 9A, AARCH64_FL_FOR_ARCH9 | AARCH64_FL_SVE2_BITPERM | AARCH64_FL_MEMTAG | AARCH64_FL_I8MM | AARCH64_FL_BF16, neoversen2, 0x41, 0xd47, -1) + +-- +2.33.0 +
View file
_service:tar_scm:0108-LoongArch-Optimized-some-of-the-symbolic-expansion-i.patch
Added
@@ -0,0 +1,228 @@ +From 9b19eb071fe3826aa61567b927fc95a37f6560f7 Mon Sep 17 00:00:00 2001 +From: Lulu Cheng <chenglulu@loongson.cn> +Date: Fri, 8 Dec 2023 10:16:48 +0800 +Subject: PATCH 108/188 LoongArch: Optimized some of the symbolic expansion + instructions generated during bitwise operations. + +There are two mode iterators defined in the loongarch.md: + (define_mode_iterator GPR SI (DI "TARGET_64BIT")) + and + (define_mode_iterator X (SI "!TARGET_64BIT") (DI "TARGET_64BIT")) +Replace the mode in the bit arithmetic from GPR to X. + +Since the bitwise operation instruction does not distinguish between 64-bit, +32-bit, etc., it is necessary to perform symbolic expansion if the bitwise +operation is less than 64 bits. +The original definition would have generated a lot of redundant symbolic +extension instructions. This problem is optimized with reference to the +implementation of RISCV. + +Add this patch spec2017 500.perlbench performance improvement by 1.8% + +gcc/ChangeLog: + + * config/loongarch/loongarch.md (one_cmpl<mode>2): Replace GPR with X. + (*nor<mode>3): Likewise. + (nor<mode>3): Likewise. + (*negsi2_extended): New template. + (*<optab>si3_internal): Likewise. + (*one_cmplsi2_internal): Likewise. + (*norsi3_internal): Likewise. + (*<optab>nsi_internal): Likewise. + (bytepick_w_<bytepick_imm>_extend): Modify this template according to the + modified bit operation to make the optimization work. + +gcc/testsuite/ChangeLog: + + * gcc.target/loongarch/sign-extend-bitwise.c: New test. +--- + gcc/config/loongarch/loongarch.md | 93 ++++++++++++++----- + .../loongarch/sign-extend-bitwise.c | 21 +++++ + 2 files changed, 90 insertions(+), 24 deletions(-) + create mode 100644 gcc/testsuite/gcc.target/loongarch/sign-extend-bitwise.c + +diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md +index 23653a2b0..6ebf33cbe 100644 +--- a/gcc/config/loongarch/loongarch.md ++++ b/gcc/config/loongarch/loongarch.md +@@ -736,7 +736,7 @@ + + (define_insn "sub<mode>3" + (set (match_operand:GPR 0 "register_operand" "=r") +- (minus:GPR (match_operand:GPR 1 "register_operand" "rJ") ++ (minus:GPR (match_operand:GPR 1 "register_operand" "r") + (match_operand:GPR 2 "register_operand" "r"))) + "" + "sub.<d>\t%0,%z1,%2" +@@ -1412,13 +1412,13 @@ + (set_attr "alu_type" "sub") + (set_attr "mode" "<MODE>")) + +-(define_insn "one_cmpl<mode>2" +- (set (match_operand:GPR 0 "register_operand" "=r") +- (not:GPR (match_operand:GPR 1 "register_operand" "r"))) +- "" +- "nor\t%0,%.,%1" +- (set_attr "alu_type" "not") +- (set_attr "mode" "<MODE>")) ++(define_insn "*negsi2_extended" ++ (set (match_operand:DI 0 "register_operand" "=r") ++ (sign_extend:DI (neg:SI (match_operand:SI 1 "register_operand" "r")))) ++ "TARGET_64BIT" ++ "sub.w\t%0,%.,%1" ++ (set_attr "alu_type" "sub") ++ (set_attr "mode" "SI")) + + (define_insn "neg<mode>2" + (set (match_operand:ANYF 0 "register_operand" "=f") +@@ -1438,14 +1438,39 @@ + ;; + + (define_insn "<optab><mode>3" +- (set (match_operand:GPR 0 "register_operand" "=r,r") +- (any_bitwise:GPR (match_operand:GPR 1 "register_operand" "%r,r") +- (match_operand:GPR 2 "uns_arith_operand" "r,K"))) ++ (set (match_operand:X 0 "register_operand" "=r,r") ++ (any_bitwise:X (match_operand:X 1 "register_operand" "%r,r") ++ (match_operand:X 2 "uns_arith_operand" "r,K"))) + "" + "<insn>%i2\t%0,%1,%2" + (set_attr "type" "logical") + (set_attr "mode" "<MODE>")) + ++(define_insn "*<optab>si3_internal" ++ (set (match_operand:SI 0 "register_operand" "=r,r") ++ (any_bitwise:SI (match_operand:SI 1 "register_operand" "%r,r") ++ (match_operand:SI 2 "uns_arith_operand" " r,K"))) ++ "TARGET_64BIT" ++ "<insn>%i2\t%0,%1,%2" ++ (set_attr "type" "logical") ++ (set_attr "mode" "SI")) ++ ++(define_insn "one_cmpl<mode>2" ++ (set (match_operand:X 0 "register_operand" "=r") ++ (not:X (match_operand:X 1 "register_operand" "r"))) ++ "" ++ "nor\t%0,%.,%1" ++ (set_attr "alu_type" "not") ++ (set_attr "mode" "<MODE>")) ++ ++(define_insn "*one_cmplsi2_internal" ++ (set (match_operand:SI 0 "register_operand" "=r") ++ (not:SI (match_operand:SI 1 "register_operand" " r"))) ++ "TARGET_64BIT" ++ "nor\t%0,%.,%1" ++ (set_attr "type" "logical") ++ (set_attr "mode" "SI")) ++ + (define_insn "and<mode>3_extended" + (set (match_operand:GPR 0 "register_operand" "=r") + (and:GPR (match_operand:GPR 1 "nonimmediate_operand" "r") +@@ -1561,25 +1586,43 @@ + (set_attr "type" "logical") + (set_attr "mode" "HI")) + +-(define_insn "*nor<mode>3" +- (set (match_operand:GPR 0 "register_operand" "=r") +- (and:GPR (not:GPR (match_operand:GPR 1 "register_operand" "%r")) +- (not:GPR (match_operand:GPR 2 "register_operand" "r")))) ++(define_insn "nor<mode>3" ++ (set (match_operand:X 0 "register_operand" "=r") ++ (and:X (not:X (match_operand:X 1 "register_operand" "%r")) ++ (not:X (match_operand:X 2 "register_operand" "r")))) + "" + "nor\t%0,%1,%2" + (set_attr "type" "logical") + (set_attr "mode" "<MODE>")) + ++(define_insn "*norsi3_internal" ++ (set (match_operand:SI 0 "register_operand" "=r") ++ (and:SI (not:SI (match_operand:SI 1 "register_operand" "%r")) ++ (not:SI (match_operand:SI 2 "register_operand" "r")))) ++ "TARGET_64BIT" ++ "nor\t%0,%1,%2" ++ (set_attr "type" "logical") ++ (set_attr "mode" "SI")) ++ + (define_insn "<optab>n<mode>" +- (set (match_operand:GPR 0 "register_operand" "=r") +- (neg_bitwise:GPR +- (not:GPR (match_operand:GPR 1 "register_operand" "r")) +- (match_operand:GPR 2 "register_operand" "r"))) ++ (set (match_operand:X 0 "register_operand" "=r") ++ (neg_bitwise:X ++ (not:X (match_operand:X 1 "register_operand" "r")) ++ (match_operand:X 2 "register_operand" "r"))) + "" + "<insn>n\t%0,%2,%1" + (set_attr "type" "logical") + (set_attr "mode" "<MODE>")) + ++(define_insn "*<optab>nsi_internal" ++ (set (match_operand:SI 0 "register_operand" "=r") ++ (neg_bitwise:SI ++ (not:SI (match_operand:SI 1 "register_operand" "r")) ++ (match_operand:SI 2 "register_operand" "r"))) ++ "TARGET_64BIT" ++ "<insn>n\t%0,%2,%1" ++ (set_attr "type" "logical") ++ (set_attr "mode" "SI")) +  + ;; + ;; .................... +@@ -3167,7 +3210,6 @@ + (label_ref (match_operand 1)) + (pc)))) + +- +  + ;; + ;; .................... +@@ -3967,10 +4009,13 @@ + (define_insn "bytepick_w_<bytepick_imm>_extend" + (set (match_operand:DI 0 "register_operand" "=r") + (sign_extend:DI +- (ior:SI (lshiftrt (match_operand:SI 1 "register_operand" "r") +- (const_int <bytepick_w_lshiftrt_amount>)) +- (ashift (match_operand:SI 2 "register_operand" "r") +- (const_int bytepick_w_ashift_amount))))) ++ (subreg:SI ++ (ior:DI (subreg:DI (lshiftrt ++ (match_operand:SI 1 "register_operand" "r") ++ (const_int <bytepick_w_lshiftrt_amount>)) 0) ++ (subreg:DI (ashift ++ (match_operand:SI 2 "register_operand" "r") ++ (const_int bytepick_w_ashift_amount)) 0)) 0))) + "TARGET_64BIT" + "bytepick.w\t%0,%1,%2,<bytepick_imm>" + (set_attr "mode" "SI")) +diff --git a/gcc/testsuite/gcc.target/loongarch/sign-extend-bitwise.c b/gcc/testsuite/gcc.target/loongarch/sign-extend-bitwise.c +new file mode 100644 +index 000000000..5753ef69d +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/sign-extend-bitwise.c +@@ -0,0 +1,21 @@ ++/* { dg-do compile } */ ++/* { dg-options "-mabi=lp64d -O2" } */ ++/* { dg-final { scan-assembler-not "slli.w\t\\\$r\0-9\+,\\\$r\0-9\+,0" } } */ ++ ++struct pmop ++{ ++ unsigned int op_pmflags; ++ unsigned int op_pmpermflags; ++}; ++unsigned int PL_hints; ++ ++struct pmop *pmop; ++void ++Perl_newPMOP (int type, int flags) ++{ ++ if (PL_hints & 0x00100000) ++ pmop->op_pmpermflags |= 0x0001; ++ if (PL_hints & 0x00000004) ++ pmop->op_pmpermflags |= 0x0800; ++ pmop->op_pmflags = pmop->op_pmpermflags; ++} +-- +2.43.0 +
View file
_service:tar_scm:0108-aarch64-Simplify-top-of-frame-allocation.patch
Deleted
@@ -1,55 +0,0 @@ -From b47766614df3b9df878262efb2ad73aaac108363 Mon Sep 17 00:00:00 2001 -From: Richard Sandiford <richard.sandiford@arm.com> -Date: Tue, 12 Sep 2023 16:08:53 +0100 -Subject: PATCH aarch64: Simplify top of frame allocation - -After previous patches, it no longer really makes sense to allocate -the top of the frame in terms of varargs_and_saved_regs_size and -saved_regs_and_above. - -gcc/ - * config/aarch64/aarch64.cc (aarch64_layout_frame): Simplify - the allocation of the top of the frame. ---- - gcc/config/aarch64/aarch64.cc | 23 ++++++++--------------- - 1 file changed, 8 insertions(+), 15 deletions(-) - -diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc -index 97dd077844b4..81935852d5b2 100644 ---- a/gcc/config/aarch64/aarch64.cc -+++ b/gcc/config/aarch64/aarch64.cc -@@ -8325,23 +8325,16 @@ aarch64_layout_frame (void) - - frame.saved_regs_size = offset - frame.bytes_below_saved_regs; - -- poly_int64 varargs_and_saved_regs_size -- = frame.saved_regs_size + frame.saved_varargs_size; -- -- poly_int64 saved_regs_and_above -- = aligned_upper_bound (varargs_and_saved_regs_size -- + get_frame_size (), -- STACK_BOUNDARY / BITS_PER_UNIT); -- -- frame.bytes_above_hard_fp -- = saved_regs_and_above - frame.below_hard_fp_saved_regs_size; -+ offset += get_frame_size (); -+ offset = aligned_upper_bound (offset, STACK_BOUNDARY / BITS_PER_UNIT); -+ auto top_of_locals = offset; - -- /* Both these values are already aligned. */ -- gcc_assert (multiple_p (frame.bytes_below_saved_regs, -- STACK_BOUNDARY / BITS_PER_UNIT)); -- frame.frame_size = saved_regs_and_above + frame.bytes_below_saved_regs; -+ offset += frame.saved_varargs_size; -+ gcc_assert (multiple_p (offset, STACK_BOUNDARY / BITS_PER_UNIT)); -+ frame.frame_size = offset; - -- frame.bytes_above_locals = frame.saved_varargs_size; -+ frame.bytes_above_hard_fp = frame.frame_size - frame.bytes_below_hard_fp; -+ frame.bytes_above_locals = frame.frame_size - top_of_locals; - - frame.initial_adjust = 0; - frame.final_adjust = 0; --- -2.43.5 -
View file
_service:tar_scm:0109-Backport-SME-aarch64-Rename-AARCH64_ISA-architecture.patch
Added
@@ -0,0 +1,157 @@ +From 244780570ebc85c44806559ba165d4a70a2333d1 Mon Sep 17 00:00:00 2001 +From: Richard Sandiford <richard.sandiford@arm.com> +Date: Thu, 29 Sep 2022 11:32:50 +0100 +Subject: PATCH 010/157 BackportSME aarch64: Rename AARCH64_ISA + architecture-level macros + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=2a4788ac3bae1467b0379852d5a6690a8496d0c9 + +All AARCH64_ISA_* architecture-level macros except AARCH64_ISA_V8_R +are for the A profile: they cause __ARM_ARCH_PROFILE to be set to +'A' and they are associated with architecture names like armv8.4-a. + +It's convenient for later patches if we make this explicit +by adding an "A" to the name. Also, rather than add an underscore +(as for V8_R) it's more convenient to add the profile directly +to the number, like we already do in the ARCH_IDENT field of the +aarch64-arches.def entries. + +gcc/ + * config/aarch64/aarch64.h (AARCH64_ISA_V8_2, AARCH64_ISA_V8_3) + (AARCH64_ISA_V8_4, AARCH64_ISA_V8_5, AARCH64_ISA_V8_6) + (AARCH64_ISA_V9, AARCH64_ISA_V9_1, AARCH64_ISA_V9_2) + (AARCH64_ISA_V9_3): Add "A" to the end of the name. + (AARCH64_ISA_V8_R): Rename to AARCH64_ISA_V8R. + (TARGET_ARMV8_3, TARGET_JSCVT, TARGET_FRINT, TARGET_MEMTAG): Update + accordingly. + * common/config/aarch64/aarch64-common.cc + (aarch64_get_extension_string_for_isa_flags): Likewise. + * config/aarch64/aarch64-c.cc + (aarch64_define_unconditional_macros): Likewise. +--- + gcc/common/config/aarch64/aarch64-common.cc | 2 +- + gcc/config/aarch64/aarch64-c.cc | 4 +-- + gcc/config/aarch64/aarch64.h | 28 ++++++++++----------- + 3 files changed, 17 insertions(+), 17 deletions(-) + +diff --git a/gcc/common/config/aarch64/aarch64-common.cc b/gcc/common/config/aarch64/aarch64-common.cc +index 85ce8133b..3dc020f0c 100644 +--- a/gcc/common/config/aarch64/aarch64-common.cc ++++ b/gcc/common/config/aarch64/aarch64-common.cc +@@ -506,7 +506,7 @@ aarch64_get_extension_string_for_isa_flags (uint64_t isa_flags, + + Note that assemblers with Armv8-R AArch64 support should not have this + issue, so we don't need this fix when targeting Armv8-R. */ +- if ((isa_flags & AARCH64_ISA_CRC) && !AARCH64_ISA_V8_R) ++ if ((isa_flags & AARCH64_ISA_CRC) && !AARCH64_ISA_V8R) + isa_flag_bits |= AARCH64_ISA_CRC; + + /* Pass Two: +diff --git a/gcc/config/aarch64/aarch64-c.cc b/gcc/config/aarch64/aarch64-c.cc +index 3d2fb5ec2..18c9b975b 100644 +--- a/gcc/config/aarch64/aarch64-c.cc ++++ b/gcc/config/aarch64/aarch64-c.cc +@@ -64,7 +64,7 @@ aarch64_define_unconditional_macros (cpp_reader *pfile) + builtin_define ("__ARM_ARCH_8A"); + + builtin_define_with_int_value ("__ARM_ARCH_PROFILE", +- AARCH64_ISA_V8_R ? 'R' : 'A'); ++ AARCH64_ISA_V8R ? 'R' : 'A'); + builtin_define ("__ARM_FEATURE_CLZ"); + builtin_define ("__ARM_FEATURE_IDIV"); + builtin_define ("__ARM_FEATURE_UNALIGNED"); +@@ -82,7 +82,7 @@ aarch64_update_cpp_builtins (cpp_reader *pfile) + { + aarch64_def_or_undef (flag_unsafe_math_optimizations, "__ARM_FP_FAST", pfile); + +- builtin_define_with_int_value ("__ARM_ARCH", AARCH64_ISA_V9 ? 9 : 8); ++ builtin_define_with_int_value ("__ARM_ARCH", AARCH64_ISA_V9A ? 9 : 8); + + builtin_define_with_int_value ("__ARM_SIZEOF_MINIMAL_ENUM", + flag_short_enums ? 1 : 4); +diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h +index 7c090c8f2..356a263b2 100644 +--- a/gcc/config/aarch64/aarch64.h ++++ b/gcc/config/aarch64/aarch64.h +@@ -297,7 +297,7 @@ + #define AARCH64_ISA_SIMD (aarch64_isa_flags & AARCH64_FL_SIMD) + #define AARCH64_ISA_LSE (aarch64_isa_flags & AARCH64_FL_LSE) + #define AARCH64_ISA_RDMA (aarch64_isa_flags & AARCH64_FL_RDMA) +-#define AARCH64_ISA_V8_2 (aarch64_isa_flags & AARCH64_FL_V8_2) ++#define AARCH64_ISA_V8_2A (aarch64_isa_flags & AARCH64_FL_V8_2) + #define AARCH64_ISA_F16 (aarch64_isa_flags & AARCH64_FL_F16) + #define AARCH64_ISA_SVE (aarch64_isa_flags & AARCH64_FL_SVE) + #define AARCH64_ISA_SVE2 (aarch64_isa_flags & AARCH64_FL_SVE2) +@@ -305,31 +305,31 @@ + #define AARCH64_ISA_SVE2_BITPERM (aarch64_isa_flags & AARCH64_FL_SVE2_BITPERM) + #define AARCH64_ISA_SVE2_SHA3 (aarch64_isa_flags & AARCH64_FL_SVE2_SHA3) + #define AARCH64_ISA_SVE2_SM4 (aarch64_isa_flags & AARCH64_FL_SVE2_SM4) +-#define AARCH64_ISA_V8_3 (aarch64_isa_flags & AARCH64_FL_V8_3) ++#define AARCH64_ISA_V8_3A (aarch64_isa_flags & AARCH64_FL_V8_3) + #define AARCH64_ISA_DOTPROD (aarch64_isa_flags & AARCH64_FL_DOTPROD) + #define AARCH64_ISA_AES (aarch64_isa_flags & AARCH64_FL_AES) + #define AARCH64_ISA_SHA2 (aarch64_isa_flags & AARCH64_FL_SHA2) +-#define AARCH64_ISA_V8_4 (aarch64_isa_flags & AARCH64_FL_V8_4) ++#define AARCH64_ISA_V8_4A (aarch64_isa_flags & AARCH64_FL_V8_4) + #define AARCH64_ISA_SM4 (aarch64_isa_flags & AARCH64_FL_SM4) + #define AARCH64_ISA_SHA3 (aarch64_isa_flags & AARCH64_FL_SHA3) + #define AARCH64_ISA_F16FML (aarch64_isa_flags & AARCH64_FL_F16FML) + #define AARCH64_ISA_RCPC8_4 (aarch64_isa_flags & AARCH64_FL_RCPC8_4) + #define AARCH64_ISA_RNG (aarch64_isa_flags & AARCH64_FL_RNG) +-#define AARCH64_ISA_V8_5 (aarch64_isa_flags & AARCH64_FL_V8_5) ++#define AARCH64_ISA_V8_5A (aarch64_isa_flags & AARCH64_FL_V8_5) + #define AARCH64_ISA_TME (aarch64_isa_flags & AARCH64_FL_TME) + #define AARCH64_ISA_MEMTAG (aarch64_isa_flags & AARCH64_FL_MEMTAG) +-#define AARCH64_ISA_V8_6 (aarch64_isa_flags & AARCH64_FL_V8_6) ++#define AARCH64_ISA_V8_6A (aarch64_isa_flags & AARCH64_FL_V8_6) + #define AARCH64_ISA_I8MM (aarch64_isa_flags & AARCH64_FL_I8MM) + #define AARCH64_ISA_F32MM (aarch64_isa_flags & AARCH64_FL_F32MM) + #define AARCH64_ISA_F64MM (aarch64_isa_flags & AARCH64_FL_F64MM) + #define AARCH64_ISA_BF16 (aarch64_isa_flags & AARCH64_FL_BF16) + #define AARCH64_ISA_SB (aarch64_isa_flags & AARCH64_FL_SB) +-#define AARCH64_ISA_V8_R (aarch64_isa_flags & AARCH64_FL_V8_R) ++#define AARCH64_ISA_V8R (aarch64_isa_flags & AARCH64_FL_V8_R) + #define AARCH64_ISA_PAUTH (aarch64_isa_flags & AARCH64_FL_PAUTH) +-#define AARCH64_ISA_V9 (aarch64_isa_flags & AARCH64_FL_V9) +-#define AARCH64_ISA_V9_1 (aarch64_isa_flags & AARCH64_FL_V9_1) +-#define AARCH64_ISA_V9_2 (aarch64_isa_flags & AARCH64_FL_V9_2) +-#define AARCH64_ISA_V9_3 (aarch64_isa_flags & AARCH64_FL_V9_3) ++#define AARCH64_ISA_V9A (aarch64_isa_flags & AARCH64_FL_V9) ++#define AARCH64_ISA_V9_1A (aarch64_isa_flags & AARCH64_FL_V9_1) ++#define AARCH64_ISA_V9_2A (aarch64_isa_flags & AARCH64_FL_V9_2) ++#define AARCH64_ISA_V9_3A (aarch64_isa_flags & AARCH64_FL_V9_3) + #define AARCH64_ISA_MOPS (aarch64_isa_flags & AARCH64_FL_MOPS) + #define AARCH64_ISA_LS64 (aarch64_isa_flags & AARCH64_FL_LS64) + +@@ -383,16 +383,16 @@ + #define TARGET_SVE2_SM4 (TARGET_SVE2 && AARCH64_ISA_SVE2_SM4) + + /* ARMv8.3-A features. */ +-#define TARGET_ARMV8_3 (AARCH64_ISA_V8_3) ++#define TARGET_ARMV8_3 (AARCH64_ISA_V8_3A) + + /* Javascript conversion instruction from Armv8.3-a. */ +-#define TARGET_JSCVT (TARGET_FLOAT && AARCH64_ISA_V8_3) ++#define TARGET_JSCVT (TARGET_FLOAT && AARCH64_ISA_V8_3A) + + /* Armv8.3-a Complex number extension to AdvSIMD extensions. */ + #define TARGET_COMPLEX (TARGET_SIMD && TARGET_ARMV8_3) + + /* Floating-point rounding instructions from Armv8.5-a. */ +-#define TARGET_FRINT (AARCH64_ISA_V8_5 && TARGET_FLOAT) ++#define TARGET_FRINT (AARCH64_ISA_V8_5A && TARGET_FLOAT) + + /* TME instructions are enabled. */ + #define TARGET_TME (AARCH64_ISA_TME) +@@ -401,7 +401,7 @@ + #define TARGET_RNG (AARCH64_ISA_RNG) + + /* Memory Tagging instructions optional to Armv8.5 enabled through +memtag. */ +-#define TARGET_MEMTAG (AARCH64_ISA_V8_5 && AARCH64_ISA_MEMTAG) ++#define TARGET_MEMTAG (AARCH64_ISA_V8_5A && AARCH64_ISA_MEMTAG) + + /* I8MM instructions are enabled through +i8mm. */ + #define TARGET_I8MM (AARCH64_ISA_I8MM) +-- +2.33.0 +
View file
_service:tar_scm:0109-LoongArch-Implement-option-save-restore.patch
Added
@@ -0,0 +1,467 @@ +From 146c85fa8b32d88acacf8645096d004e0c6f2f9c Mon Sep 17 00:00:00 2001 +From: Yang Yujie <yangyujie@loongson.cn> +Date: Thu, 11 Jan 2024 09:07:10 +0800 +Subject: PATCH 109/188 LoongArch: Implement option save/restore + +LTO option streaming and target attributes both require per-function +target configuration, which is achieved via option save/restore. + +We implement TARGET_OPTION_{SAVE,RESTORE} to switch the la_target +context in addition to other automatically maintained option states +(via the "Save" option property in the .opt files). + +Tested on loongarch64-linux-gnu without regression. + + PR target/113233 + +gcc/ChangeLog: + + * config/loongarch/genopts/loongarch.opt.in: Mark options with + the "Save" property. + * config/loongarch/loongarch.opt: Same. + * config/loongarch/loongarch-opts.cc: Refresh -mcmodel= state + according to la_target. + * config/loongarch/loongarch.cc: Implement TARGET_OPTION_{SAVE, + RESTORE} for the la_target structure; Rename option conditions + to have the same "la_" prefix. + * config/loongarch/loongarch.h: Same. +--- + gcc/config/loongarch/genopts/loongarch.opt.in | 38 ++++----- + gcc/config/loongarch/loongarch-opts.cc | 7 ++ + gcc/config/loongarch/loongarch.cc | 80 +++++++++++++++---- + gcc/config/loongarch/loongarch.h | 2 +- + gcc/config/loongarch/loongarch.opt | 38 ++++----- + 5 files changed, 111 insertions(+), 54 deletions(-) + +diff --git a/gcc/config/loongarch/genopts/loongarch.opt.in b/gcc/config/loongarch/genopts/loongarch.opt.in +index f2055b55e..4d6b1902d 100644 +--- a/gcc/config/loongarch/genopts/loongarch.opt.in ++++ b/gcc/config/loongarch/genopts/loongarch.opt.in +@@ -50,7 +50,7 @@ EnumValue + Enum(isa_ext_fpu) String(@@STR_ISA_EXT_FPU64@@) Value(ISA_EXT_FPU64) + + m@@OPTSTR_ISA_EXT_FPU@@= +-Target RejectNegative Joined ToLower Enum(isa_ext_fpu) Var(la_opt_fpu) Init(M_OPT_UNSET) ++Target RejectNegative Joined ToLower Enum(isa_ext_fpu) Var(la_opt_fpu) Init(M_OPT_UNSET) Save + -m@@OPTSTR_ISA_EXT_FPU@@=FPU Generate code for the given FPU. + + m@@OPTSTR_ISA_EXT_FPU@@=@@STR_ISA_EXT_FPU0@@ +@@ -82,7 +82,7 @@ EnumValue + Enum(isa_ext_simd) String(@@STR_ISA_EXT_LASX@@) Value(ISA_EXT_SIMD_LASX) + + m@@OPTSTR_ISA_EXT_SIMD@@= +-Target RejectNegative Joined ToLower Enum(isa_ext_simd) Var(la_opt_simd) Init(M_OPT_UNSET) ++Target RejectNegative Joined ToLower Enum(isa_ext_simd) Var(la_opt_simd) Init(M_OPT_UNSET) Save + -m@@OPTSTR_ISA_EXT_SIMD@@=SIMD Generate code for the given SIMD extension. + + m@@STR_ISA_EXT_LSX@@ +@@ -114,11 +114,11 @@ EnumValue + Enum(cpu_type) String(@@STR_CPU_LA664@@) Value(CPU_LA664) + + m@@OPTSTR_ARCH@@= +-Target RejectNegative Joined Enum(cpu_type) Var(la_opt_cpu_arch) Init(M_OPT_UNSET) ++Target RejectNegative Joined Enum(cpu_type) Var(la_opt_cpu_arch) Init(M_OPT_UNSET) Save + -m@@OPTSTR_ARCH@@=PROCESSOR Generate code for the given PROCESSOR ISA. + + m@@OPTSTR_TUNE@@= +-Target RejectNegative Joined Enum(cpu_type) Var(la_opt_cpu_tune) Init(M_OPT_UNSET) ++Target RejectNegative Joined Enum(cpu_type) Var(la_opt_cpu_tune) Init(M_OPT_UNSET) Save + -m@@OPTSTR_TUNE@@=PROCESSOR Generate optimized code for PROCESSOR. + + +@@ -149,31 +149,31 @@ Variable + int la_opt_abi_ext = M_OPT_UNSET + + mbranch-cost= +-Target RejectNegative Joined UInteger Var(loongarch_branch_cost) ++Target RejectNegative Joined UInteger Var(la_branch_cost) Save + -mbranch-cost=COST Set the cost of branches to roughly COST instructions. + + mcheck-zero-division +-Target Mask(CHECK_ZERO_DIV) ++Target Mask(CHECK_ZERO_DIV) Save + Trap on integer divide by zero. + + mcond-move-int +-Target Var(TARGET_COND_MOVE_INT) Init(1) ++Target Mask(COND_MOVE_INT) Save + Conditional moves for integral are enabled. + + mcond-move-float +-Target Var(TARGET_COND_MOVE_FLOAT) Init(1) ++Target Mask(COND_MOVE_FLOAT) Save + Conditional moves for float are enabled. + + mmemcpy +-Target Mask(MEMCPY) ++Target Mask(MEMCPY) Save + Prevent optimizing block moves, which is also the default behavior of -Os. + + mstrict-align +-Target Var(TARGET_STRICT_ALIGN) Init(0) ++Target Mask(STRICT_ALIGN) Save + Do not generate unaligned memory accesses. + + mmax-inline-memcpy-size= +-Target Joined RejectNegative UInteger Var(loongarch_max_inline_memcpy_size) Init(1024) ++Target Joined RejectNegative UInteger Var(la_max_inline_memcpy_size) Init(1024) Save + -mmax-inline-memcpy-size=SIZE Set the max size of memcpy to inline, default is 1024. + + Enum +@@ -198,11 +198,11 @@ Target Alias(mexplicit-relocs=, always, none) + Use %reloc() assembly operators (for backward compatibility). + + mrecip +-Target RejectNegative Var(loongarch_recip) ++Target RejectNegative Var(la_recip) Save + Generate approximate reciprocal divide and square root for better throughput. + + mrecip= +-Target RejectNegative Joined Var(loongarch_recip_name) ++Target RejectNegative Joined Var(la_recip_name) Save + Control generation of reciprocal estimates. + + ; The code model option names for -mcmodel. +@@ -229,29 +229,29 @@ EnumValue + Enum(cmodel) String(@@STR_CMODEL_EXTREME@@) Value(CMODEL_EXTREME) + + mcmodel= +-Target RejectNegative Joined Enum(cmodel) Var(la_opt_cmodel) Init(M_OPT_UNSET) ++Target RejectNegative Joined Enum(cmodel) Var(la_opt_cmodel) Init(M_OPT_UNSET) Save + Specify the code model. + + mdirect-extern-access +-Target Var(TARGET_DIRECT_EXTERN_ACCESS) Init(0) ++Target Mask(DIRECT_EXTERN_ACCESS) Save + Avoid using the GOT to access external symbols. + + mrelax +-Target Var(loongarch_mrelax) Init(HAVE_AS_MRELAX_OPTION && HAVE_AS_COND_BRANCH_RELAXATION) ++Target Mask(LINKER_RELAXATION) + Take advantage of linker relaxations to reduce the number of instructions + required to materialize symbol addresses. + + mpass-mrelax-to-as +-Target Var(loongarch_pass_mrelax_to_as) Init(HAVE_AS_MRELAX_OPTION) ++Driver Var(la_pass_mrelax_to_as) Init(HAVE_AS_MRELAX_OPTION) + Pass -mrelax or -mno-relax option to the assembler. + + -param=loongarch-vect-unroll-limit= +-Target Joined UInteger Var(loongarch_vect_unroll_limit) Init(6) IntegerRange(1, 64) Param ++Target Joined UInteger Var(la_vect_unroll_limit) Init(6) IntegerRange(1, 64) Param + Used to limit unroll factor which indicates how much the autovectorizer may + unroll a loop. The default value is 6. + + -param=loongarch-vect-issue-info= +-Target Undocumented Joined UInteger Var(loongarch_vect_issue_info) Init(4) IntegerRange(1, 64) Param ++Target Undocumented Joined UInteger Var(la_vect_issue_info) Init(4) IntegerRange(1, 64) Param + Indicate how many non memory access vector instructions can be issued per + cycle, it's used in unroll factor determination for autovectorizer. The + default value is 4. +diff --git a/gcc/config/loongarch/loongarch-opts.cc b/gcc/config/loongarch/loongarch-opts.cc +index cf4c7bc93..a2b069d83 100644 +--- a/gcc/config/loongarch/loongarch-opts.cc ++++ b/gcc/config/loongarch/loongarch-opts.cc +@@ -785,8 +785,15 @@ loongarch_update_gcc_opt_status (struct loongarch_target *target, + opts->x_la_opt_cpu_arch = target->cpu_arch; + opts->x_la_opt_cpu_tune = target->cpu_tune; + ++ /* status of -mcmodel */ ++ opts->x_la_opt_cmodel = target->cmodel; ++ + /* status of -mfpu */ + opts->x_la_opt_fpu = target->isa.fpu; ++ ++ /* status of -msimd */ + opts->x_la_opt_simd = target->isa.simd; ++ ++ /* ISA evolution features */ + opts->x_la_isa_evolution = target->isa.evolution; + } +diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc +index 8cd703caa..533bae5b2 100644 +--- a/gcc/config/loongarch/loongarch.cc ++++ b/gcc/config/loongarch/loongarch.cc +@@ -4079,10 +4079,10 @@ loongarch_vector_costs::determine_suggested_unroll_factor (loop_vec_info loop_vi + + /* Use this simple hardware resource model that how many non vld/vst + vector instructions can be issued per cycle. */ +- unsigned int issue_info = loongarch_vect_issue_info; ++ unsigned int issue_info = la_vect_issue_info; + unsigned int reduc_factor = m_reduc_factor > 1 ? m_reduc_factor : 1; + unsigned int uf = CEIL (reduc_factor * issue_info, nstmts_nonldst); +- uf = MIN ((unsigned int) loongarch_vect_unroll_limit, uf); ++ uf = MIN ((unsigned int) la_vect_unroll_limit, uf); + + return 1 << ceil_log2 (uf); + } +@@ -5540,7 +5540,7 @@ loongarch_expand_block_move (rtx dest, rtx src, rtx r_length, rtx r_align) + return false; + + HOST_WIDE_INT length = INTVAL (r_length); +- if (length > loongarch_max_inline_memcpy_size) ++ if (length > la_max_inline_memcpy_size) + return false; + + HOST_WIDE_INT align = INTVAL (r_align); +@@ -7518,13 +7518,6 @@ loongarch_option_override_internal (struct gcc_options *opts, + loongarch_update_gcc_opt_status (&la_target, opts, opts_set); + loongarch_cpu_option_override (&la_target, opts, opts_set); + +- if (la_opt_explicit_relocs == M_OPT_UNSET) +- la_opt_explicit_relocs = (HAVE_AS_EXPLICIT_RELOCS +- ? (loongarch_mrelax +- ? EXPLICIT_RELOCS_AUTO +- : EXPLICIT_RELOCS_ALWAYS) +- : EXPLICIT_RELOCS_NONE); +- + if (TARGET_ABI_LP64) + flag_pcc_struct_return = 0; + +@@ -7536,8 +7529,8 @@ loongarch_option_override_internal (struct gcc_options *opts, + + /* If the user hasn't specified a branch cost, use the processor's + default. */ +- if (loongarch_branch_cost == 0) +- loongarch_branch_cost = loongarch_cost->branch_cost; ++ if (la_branch_cost == 0) ++ la_branch_cost = loongarch_cost->branch_cost; + + /* Enable sw prefetching at -O3 and higher. */ + if (opts->x_flag_prefetch_loop_arrays < 0 +@@ -7624,9 +7617,9 @@ loongarch_option_override_internal (struct gcc_options *opts, + { "vec-rsqrt", RECIP_MASK_VEC_RSQRT }, + }; + +- if (loongarch_recip_name) ++ if (la_recip_name) + { +- char *p = ASTRDUP (loongarch_recip_name); ++ char *p = ASTRDUP (la_recip_name); + char *q; + unsigned int mask, i; + bool invert; +@@ -7667,10 +7660,38 @@ loongarch_option_override_internal (struct gcc_options *opts, + recip_mask |= mask; + } + } +- if (loongarch_recip) ++ if (la_recip) + recip_mask |= RECIP_MASK_ALL; + if (!ISA_HAS_FRECIPE) + recip_mask = RECIP_MASK_NONE; ++ ++#define INIT_TARGET_FLAG(NAME, INIT) \ ++ { \ ++ if (!(target_flags_explicit & MASK_##NAME)) \ ++ { \ ++ if (INIT) \ ++ target_flags |= MASK_##NAME; \ ++ else \ ++ target_flags &= ~MASK_##NAME; \ ++ } \ ++ } ++ ++ /* Enable conditional moves for int and float by default. */ ++ INIT_TARGET_FLAG (COND_MOVE_INT, 1) ++ INIT_TARGET_FLAG (COND_MOVE_FLOAT, 1) ++ ++ /* Set mrelax default. */ ++ INIT_TARGET_FLAG (LINKER_RELAXATION, ++ HAVE_AS_MRELAX_OPTION && HAVE_AS_COND_BRANCH_RELAXATION) ++ ++#undef INIT_TARGET_FLAG ++ ++ if (la_opt_explicit_relocs == M_OPT_UNSET) ++ la_opt_explicit_relocs = (HAVE_AS_EXPLICIT_RELOCS ++ ? (TARGET_LINKER_RELAXATION ++ ? EXPLICIT_RELOCS_AUTO ++ : EXPLICIT_RELOCS_ALWAYS) ++ : EXPLICIT_RELOCS_NONE); + } + + +@@ -7682,6 +7703,31 @@ loongarch_option_override (void) + loongarch_option_override_internal (&global_options, &global_options_set); + } + ++/* Implement TARGET_OPTION_SAVE. */ ++static void ++loongarch_option_save (struct cl_target_option *, ++ struct gcc_options *opts, ++ struct gcc_options *opts_set) ++{ ++ loongarch_update_gcc_opt_status (&la_target, opts, opts_set); ++} ++ ++/* Implement TARGET_OPTION_RESTORE. */ ++static void ++loongarch_option_restore (struct gcc_options *, ++ struct gcc_options *, ++ struct cl_target_option *ptr) ++{ ++ la_target.cpu_arch = ptr->x_la_opt_cpu_arch; ++ la_target.cpu_tune = ptr->x_la_opt_cpu_tune; ++ ++ la_target.isa.fpu = ptr->x_la_opt_fpu; ++ la_target.isa.simd = ptr->x_la_opt_simd; ++ la_target.isa.evolution = ptr->x_la_isa_evolution; ++ ++ la_target.cmodel = ptr->x_la_opt_cmodel; ++} ++ + /* Implement TARGET_CONDITIONAL_REGISTER_USAGE. */ + + static void +@@ -10880,6 +10926,10 @@ loongarch_asm_code_end (void) + + #undef TARGET_OPTION_OVERRIDE + #define TARGET_OPTION_OVERRIDE loongarch_option_override ++#undef TARGET_OPTION_SAVE ++#define TARGET_OPTION_SAVE loongarch_option_save ++#undef TARGET_OPTION_RESTORE ++#define TARGET_OPTION_RESTORE loongarch_option_restore + + #undef TARGET_LEGITIMIZE_ADDRESS + #define TARGET_LEGITIMIZE_ADDRESS loongarch_legitimize_address +diff --git a/gcc/config/loongarch/loongarch.h b/gcc/config/loongarch/loongarch.h +index fbc0f53e4..f54b078b1 100644 +--- a/gcc/config/loongarch/loongarch.h ++++ b/gcc/config/loongarch/loongarch.h +@@ -868,7 +868,7 @@ typedef struct { + /* A C expression for the cost of a branch instruction. A value of + 1 is the default; other values are interpreted relative to that. */ + +-#define BRANCH_COST(speed_p, predictable_p) loongarch_branch_cost ++#define BRANCH_COST(speed_p, predictable_p) la_branch_cost + + /* Return the asm template for a conditional branch instruction. + OPCODE is the opcode's mnemonic and OPERANDS is the asm template for +diff --git a/gcc/config/loongarch/loongarch.opt b/gcc/config/loongarch/loongarch.opt +index d6e337ac2..75d230067 100644 +--- a/gcc/config/loongarch/loongarch.opt ++++ b/gcc/config/loongarch/loongarch.opt +@@ -58,7 +58,7 @@ EnumValue + Enum(isa_ext_fpu) String(64) Value(ISA_EXT_FPU64) + + mfpu= +-Target RejectNegative Joined ToLower Enum(isa_ext_fpu) Var(la_opt_fpu) Init(M_OPT_UNSET) ++Target RejectNegative Joined ToLower Enum(isa_ext_fpu) Var(la_opt_fpu) Init(M_OPT_UNSET) Save + -mfpu=FPU Generate code for the given FPU. + + mfpu=0 +@@ -90,7 +90,7 @@ EnumValue + Enum(isa_ext_simd) String(lasx) Value(ISA_EXT_SIMD_LASX) + + msimd= +-Target RejectNegative Joined ToLower Enum(isa_ext_simd) Var(la_opt_simd) Init(M_OPT_UNSET) ++Target RejectNegative Joined ToLower Enum(isa_ext_simd) Var(la_opt_simd) Init(M_OPT_UNSET) Save + -msimd=SIMD Generate code for the given SIMD extension. + + mlsx +@@ -122,11 +122,11 @@ EnumValue + Enum(cpu_type) String(la664) Value(CPU_LA664) + + march= +-Target RejectNegative Joined Enum(cpu_type) Var(la_opt_cpu_arch) Init(M_OPT_UNSET) ++Target RejectNegative Joined Enum(cpu_type) Var(la_opt_cpu_arch) Init(M_OPT_UNSET) Save + -march=PROCESSOR Generate code for the given PROCESSOR ISA. + + mtune= +-Target RejectNegative Joined Enum(cpu_type) Var(la_opt_cpu_tune) Init(M_OPT_UNSET) ++Target RejectNegative Joined Enum(cpu_type) Var(la_opt_cpu_tune) Init(M_OPT_UNSET) Save + -mtune=PROCESSOR Generate optimized code for PROCESSOR. + + +@@ -157,31 +157,31 @@ Variable + int la_opt_abi_ext = M_OPT_UNSET + + mbranch-cost= +-Target RejectNegative Joined UInteger Var(loongarch_branch_cost) ++Target RejectNegative Joined UInteger Var(la_branch_cost) Save + -mbranch-cost=COST Set the cost of branches to roughly COST instructions. + + mcheck-zero-division +-Target Mask(CHECK_ZERO_DIV) ++Target Mask(CHECK_ZERO_DIV) Save + Trap on integer divide by zero. + + mcond-move-int +-Target Var(TARGET_COND_MOVE_INT) Init(1) ++Target Mask(COND_MOVE_INT) Save + Conditional moves for integral are enabled. + + mcond-move-float +-Target Var(TARGET_COND_MOVE_FLOAT) Init(1) ++Target Mask(COND_MOVE_FLOAT) Save + Conditional moves for float are enabled. + + mmemcpy +-Target Mask(MEMCPY) ++Target Mask(MEMCPY) Save + Prevent optimizing block moves, which is also the default behavior of -Os. + + mstrict-align +-Target Var(TARGET_STRICT_ALIGN) Init(0) ++Target Mask(STRICT_ALIGN) Save + Do not generate unaligned memory accesses. + + mmax-inline-memcpy-size= +-Target Joined RejectNegative UInteger Var(loongarch_max_inline_memcpy_size) Init(1024) ++Target Joined RejectNegative UInteger Var(la_max_inline_memcpy_size) Init(1024) Save + -mmax-inline-memcpy-size=SIZE Set the max size of memcpy to inline, default is 1024. + + Enum +@@ -206,11 +206,11 @@ Target Alias(mexplicit-relocs=, always, none) + Use %reloc() assembly operators (for backward compatibility). + + mrecip +-Target RejectNegative Var(loongarch_recip) ++Target RejectNegative Var(la_recip) Save + Generate approximate reciprocal divide and square root for better throughput. + + mrecip= +-Target RejectNegative Joined Var(loongarch_recip_name) ++Target RejectNegative Joined Var(la_recip_name) Save + Control generation of reciprocal estimates. + + ; The code model option names for -mcmodel. +@@ -237,29 +237,29 @@ EnumValue + Enum(cmodel) String(extreme) Value(CMODEL_EXTREME) + + mcmodel= +-Target RejectNegative Joined Enum(cmodel) Var(la_opt_cmodel) Init(M_OPT_UNSET) ++Target RejectNegative Joined Enum(cmodel) Var(la_opt_cmodel) Init(M_OPT_UNSET) Save + Specify the code model. + + mdirect-extern-access +-Target Var(TARGET_DIRECT_EXTERN_ACCESS) Init(0) ++Target Mask(DIRECT_EXTERN_ACCESS) Save + Avoid using the GOT to access external symbols. + + mrelax +-Target Var(loongarch_mrelax) Init(HAVE_AS_MRELAX_OPTION && HAVE_AS_COND_BRANCH_RELAXATION) ++Target Mask(LINKER_RELAXATION) + Take advantage of linker relaxations to reduce the number of instructions + required to materialize symbol addresses. + + mpass-mrelax-to-as +-Target Var(loongarch_pass_mrelax_to_as) Init(HAVE_AS_MRELAX_OPTION) ++Driver Var(la_pass_mrelax_to_as) Init(HAVE_AS_MRELAX_OPTION) + Pass -mrelax or -mno-relax option to the assembler. + + -param=loongarch-vect-unroll-limit= +-Target Joined UInteger Var(loongarch_vect_unroll_limit) Init(6) IntegerRange(1, 64) Param ++Target Joined UInteger Var(la_vect_unroll_limit) Init(6) IntegerRange(1, 64) Param + Used to limit unroll factor which indicates how much the autovectorizer may + unroll a loop. The default value is 6. + + -param=loongarch-vect-issue-info= +-Target Undocumented Joined UInteger Var(loongarch_vect_issue_info) Init(4) IntegerRange(1, 64) Param ++Target Undocumented Joined UInteger Var(la_vect_issue_info) Init(4) IntegerRange(1, 64) Param + Indicate how many non memory access vector instructions can be issued per + cycle, it's used in unroll factor determination for autovectorizer. The + default value is 4. +-- +2.43.0 +
View file
_service:tar_scm:0109-aarch64-Minor-initial-adjustment-tweak.patch
Deleted
@@ -1,38 +0,0 @@ -From 08f71b4bb28fb74d20e8d2927a557e8119ce9f4d Mon Sep 17 00:00:00 2001 -From: Richard Sandiford <richard.sandiford@arm.com> -Date: Tue, 12 Sep 2023 16:08:54 +0100 -Subject: PATCH aarch64: Minor initial adjustment tweak - -This patch just changes a calculation of initial_adjust -to one that makes it slightly more obvious that the total -adjustment is frame.frame_size. - -gcc/ - * config/aarch64/aarch64.cc (aarch64_layout_frame): Tweak - calculation of initial_adjust for frames in which all saves - are SVE saves. ---- - gcc/config/aarch64/aarch64.cc | 5 ++--- - 1 file changed, 2 insertions(+), 3 deletions(-) - -diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc -index 81935852d5b2..4d9fcf3d1623 100644 ---- a/gcc/config/aarch64/aarch64.cc -+++ b/gcc/config/aarch64/aarch64.cc -@@ -8414,11 +8414,10 @@ aarch64_layout_frame (void) - { - /* Frame in which all saves are SVE saves: - -- sub sp, sp, hard_fp_offset + below_hard_fp_saved_regs_size -+ sub sp, sp, frame_size - bytes_below_saved_regs - save SVE registers relative to SP - sub sp, sp, bytes_below_saved_regs */ -- frame.initial_adjust = (frame.bytes_above_hard_fp -- + frame.below_hard_fp_saved_regs_size); -+ frame.initial_adjust = frame.frame_size - frame.bytes_below_saved_regs; - frame.final_adjust = frame.bytes_below_saved_regs; - } - else if (frame.bytes_above_hard_fp.is_constant (&const_above_fp) --- -2.43.5 -
View file
_service:tar_scm:0110-Backport-SME-aarch64-Rename-AARCH64_FL-architecture-.patch
Added
@@ -0,0 +1,220 @@ +From e1b067871c4c39565bf6059b4924a810923c6eeb Mon Sep 17 00:00:00 2001 +From: Richard Sandiford <richard.sandiford@arm.com> +Date: Thu, 29 Sep 2022 11:32:51 +0100 +Subject: PATCH 011/157 BackportSME aarch64: Rename AARCH64_FL + architecture-level macros + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=78aaafc3d4dc0ef997b4747349d3836ca2f7e301 + +Following on from the previous AARCH64_ISA patch, this one adds the +profile name directly to the end of architecture-level AARCH64_FL_* +macros. + +gcc/ + * config/aarch64/aarch64.h (AARCH64_FL_V8_1, AARCH64_FL_V8_2) + (AARCH64_FL_V8_3, AARCH64_FL_V8_4, AARCH64_FL_V8_5, AARCH64_FL_V8_6) + (AARCH64_FL_V9, AARCH64_FL_V8_7, AARCH64_FL_V8_8, AARCH64_FL_V9_1) + (AARCH64_FL_V9_2, AARCH64_FL_V9_3): Add "A" to the end of the name. + (AARCH64_FL_V8_R): Rename to AARCH64_FL_V8R. + (AARCH64_FL_FOR_ARCH8_1, AARCH64_FL_FOR_ARCH8_2): Update accordingly. + (AARCH64_FL_FOR_ARCH8_3, AARCH64_FL_FOR_ARCH8_4): Likewise. + (AARCH64_FL_FOR_ARCH8_5, AARCH64_FL_FOR_ARCH8_6): Likewise. + (AARCH64_FL_FOR_ARCH8_7, AARCH64_FL_FOR_ARCH8_8): Likewise. + (AARCH64_FL_FOR_ARCH8_R, AARCH64_FL_FOR_ARCH9): Likewise. + (AARCH64_FL_FOR_ARCH9_1, AARCH64_FL_FOR_ARCH9_2): Likewise. + (AARCH64_FL_FOR_ARCH9_3, AARCH64_ISA_V8_2A, AARCH64_ISA_V8_3A) + (AARCH64_ISA_V8_4A, AARCH64_ISA_V8_5A, AARCH64_ISA_V8_6A): Likewise. + (AARCH64_ISA_V8R, AARCH64_ISA_V9A, AARCH64_ISA_V9_1A): Likewise. + (AARCH64_ISA_V9_2A, AARCH64_ISA_V9_3A): Likewise. +--- + gcc/config/aarch64/aarch64.h | 72 ++++++++++++++++++------------------ + 1 file changed, 36 insertions(+), 36 deletions(-) + +diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h +index 356a263b2..5a91dfdd2 100644 +--- a/gcc/config/aarch64/aarch64.h ++++ b/gcc/config/aarch64/aarch64.h +@@ -154,22 +154,22 @@ + /* ARMv8.1-A architecture extensions. */ + #define AARCH64_FL_LSE (1 << 4) /* Has Large System Extensions. */ + #define AARCH64_FL_RDMA (1 << 5) /* Has Round Double Multiply Add. */ +-#define AARCH64_FL_V8_1 (1 << 6) /* Has ARMv8.1-A extensions. */ ++#define AARCH64_FL_V8_1A (1 << 6) /* Has ARMv8.1-A extensions. */ + /* Armv8-R. */ +-#define AARCH64_FL_V8_R (1 << 7) /* Armv8-R AArch64. */ ++#define AARCH64_FL_V8R (1 << 7) /* Armv8-R AArch64. */ + /* ARMv8.2-A architecture extensions. */ +-#define AARCH64_FL_V8_2 (1 << 8) /* Has ARMv8.2-A features. */ ++#define AARCH64_FL_V8_2A (1 << 8) /* Has ARMv8.2-A features. */ + #define AARCH64_FL_F16 (1 << 9) /* Has ARMv8.2-A FP16 extensions. */ + #define AARCH64_FL_SVE (1 << 10) /* Has Scalable Vector Extensions. */ + /* ARMv8.3-A architecture extensions. */ +-#define AARCH64_FL_V8_3 (1 << 11) /* Has ARMv8.3-A features. */ ++#define AARCH64_FL_V8_3A (1 << 11) /* Has ARMv8.3-A features. */ + #define AARCH64_FL_RCPC (1 << 12) /* Has support for RCpc model. */ + #define AARCH64_FL_DOTPROD (1 << 13) /* Has ARMv8.2-A Dot Product ins. */ + /* New flags to split crypto into aes and sha2. */ + #define AARCH64_FL_AES (1 << 14) /* Has Crypto AES. */ + #define AARCH64_FL_SHA2 (1 << 15) /* Has Crypto SHA2. */ + /* ARMv8.4-A architecture extensions. */ +-#define AARCH64_FL_V8_4 (1 << 16) /* Has ARMv8.4-A features. */ ++#define AARCH64_FL_V8_4A (1 << 16) /* Has ARMv8.4-A features. */ + #define AARCH64_FL_SM4 (1 << 17) /* Has ARMv8.4-A SM3 and SM4. */ + #define AARCH64_FL_SHA3 (1 << 18) /* Has ARMv8.4-a SHA3 and SHA512. */ + #define AARCH64_FL_F16FML (1 << 19) /* Has ARMv8.4-a FP16 extensions. */ +@@ -179,7 +179,7 @@ + #define AARCH64_FL_PROFILE (1 << 21) + + /* ARMv8.5-A architecture extensions. */ +-#define AARCH64_FL_V8_5 (1 << 22) /* Has ARMv8.5-A features. */ ++#define AARCH64_FL_V8_5A (1 << 22) /* Has ARMv8.5-A features. */ + #define AARCH64_FL_RNG (1 << 23) /* ARMv8.5-A Random Number Insns. */ + #define AARCH64_FL_MEMTAG (1 << 24) /* ARMv8.5-A Memory Tagging + Extensions. */ +@@ -204,7 +204,7 @@ + #define AARCH64_FL_TME (1ULL << 33) /* Has TME instructions. */ + + /* Armv8.6-A architecture extensions. */ +-#define AARCH64_FL_V8_6 (1ULL << 34) ++#define AARCH64_FL_V8_6A (1ULL << 34) + + /* 8-bit Integer Matrix Multiply (I8MM) extensions. */ + #define AARCH64_FL_I8MM (1ULL << 35) +@@ -225,28 +225,28 @@ + #define AARCH64_FL_PAUTH (1ULL << 40) + + /* Armv9.0-A. */ +-#define AARCH64_FL_V9 (1ULL << 41) /* Armv9.0-A Architecture. */ ++#define AARCH64_FL_V9A (1ULL << 41) /* Armv9.0-A Architecture. */ + + /* 64-byte atomic load/store extensions. */ + #define AARCH64_FL_LS64 (1ULL << 42) + + /* Armv8.7-a architecture extensions. */ +-#define AARCH64_FL_V8_7 (1ULL << 43) ++#define AARCH64_FL_V8_7A (1ULL << 43) + + /* Hardware memory operation instructions. */ + #define AARCH64_FL_MOPS (1ULL << 44) + + /* Armv8.8-a architecture extensions. */ +-#define AARCH64_FL_V8_8 (1ULL << 45) ++#define AARCH64_FL_V8_8A (1ULL << 45) + + /* Armv9.1-A. */ +-#define AARCH64_FL_V9_1 (1ULL << 46) ++#define AARCH64_FL_V9_1A (1ULL << 46) + + /* Armv9.2-A. */ +-#define AARCH64_FL_V9_2 (1ULL << 47) ++#define AARCH64_FL_V9_2A (1ULL << 47) + + /* Armv9.3-A. */ +-#define AARCH64_FL_V9_3 (1ULL << 48) ++#define AARCH64_FL_V9_3A (1ULL << 48) + + /* Has FP and SIMD. */ + #define AARCH64_FL_FPSIMD (AARCH64_FL_FP | AARCH64_FL_SIMD) +@@ -258,36 +258,36 @@ + #define AARCH64_FL_FOR_ARCH8 (AARCH64_FL_FPSIMD) + #define AARCH64_FL_FOR_ARCH8_1 \ + (AARCH64_FL_FOR_ARCH8 | AARCH64_FL_LSE | AARCH64_FL_CRC \ +- | AARCH64_FL_RDMA | AARCH64_FL_V8_1) ++ | AARCH64_FL_RDMA | AARCH64_FL_V8_1A) + #define AARCH64_FL_FOR_ARCH8_2 \ +- (AARCH64_FL_FOR_ARCH8_1 | AARCH64_FL_V8_2) ++ (AARCH64_FL_FOR_ARCH8_1 | AARCH64_FL_V8_2A) + #define AARCH64_FL_FOR_ARCH8_3 \ +- (AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_V8_3 | AARCH64_FL_PAUTH) ++ (AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_V8_3A | AARCH64_FL_PAUTH) + #define AARCH64_FL_FOR_ARCH8_4 \ +- (AARCH64_FL_FOR_ARCH8_3 | AARCH64_FL_V8_4 | AARCH64_FL_F16FML \ ++ (AARCH64_FL_FOR_ARCH8_3 | AARCH64_FL_V8_4A | AARCH64_FL_F16FML \ + | AARCH64_FL_DOTPROD | AARCH64_FL_RCPC8_4 | AARCH64_FL_FLAGM) + #define AARCH64_FL_FOR_ARCH8_5 \ +- (AARCH64_FL_FOR_ARCH8_4 | AARCH64_FL_V8_5 \ ++ (AARCH64_FL_FOR_ARCH8_4 | AARCH64_FL_V8_5A \ + | AARCH64_FL_SB | AARCH64_FL_SSBS | AARCH64_FL_PREDRES) + #define AARCH64_FL_FOR_ARCH8_6 \ +- (AARCH64_FL_FOR_ARCH8_5 | AARCH64_FL_V8_6 | AARCH64_FL_FPSIMD \ ++ (AARCH64_FL_FOR_ARCH8_5 | AARCH64_FL_V8_6A | AARCH64_FL_FPSIMD \ + | AARCH64_FL_I8MM | AARCH64_FL_BF16) + #define AARCH64_FL_FOR_ARCH8_7 \ +- (AARCH64_FL_FOR_ARCH8_6 | AARCH64_FL_V8_7 | AARCH64_FL_LS64) ++ (AARCH64_FL_FOR_ARCH8_6 | AARCH64_FL_V8_7A | AARCH64_FL_LS64) + #define AARCH64_FL_FOR_ARCH8_8 \ +- (AARCH64_FL_FOR_ARCH8_7 | AARCH64_FL_V8_8 | AARCH64_FL_MOPS) ++ (AARCH64_FL_FOR_ARCH8_7 | AARCH64_FL_V8_8A | AARCH64_FL_MOPS) + + #define AARCH64_FL_FOR_ARCH8_R \ +- (AARCH64_FL_FOR_ARCH8_4 | AARCH64_FL_V8_R) ++ (AARCH64_FL_FOR_ARCH8_4 | AARCH64_FL_V8R) + #define AARCH64_FL_FOR_ARCH9 \ +- (AARCH64_FL_FOR_ARCH8_5 | AARCH64_FL_SVE | AARCH64_FL_SVE2 | AARCH64_FL_V9 \ ++ (AARCH64_FL_FOR_ARCH8_5 | AARCH64_FL_SVE | AARCH64_FL_SVE2 | AARCH64_FL_V9A \ + | AARCH64_FL_F16) + #define AARCH64_FL_FOR_ARCH9_1 \ +- (AARCH64_FL_FOR_ARCH9 | AARCH64_FL_FOR_ARCH8_6 | AARCH64_FL_V9_1) ++ (AARCH64_FL_FOR_ARCH9 | AARCH64_FL_FOR_ARCH8_6 | AARCH64_FL_V9_1A) + #define AARCH64_FL_FOR_ARCH9_2 \ +- (AARCH64_FL_FOR_ARCH9_1 | AARCH64_FL_FOR_ARCH8_7 | AARCH64_FL_V9_2) ++ (AARCH64_FL_FOR_ARCH9_1 | AARCH64_FL_FOR_ARCH8_7 | AARCH64_FL_V9_2A) + #define AARCH64_FL_FOR_ARCH9_3 \ +- (AARCH64_FL_FOR_ARCH9_2 | AARCH64_FL_FOR_ARCH8_8 | AARCH64_FL_V9_3) ++ (AARCH64_FL_FOR_ARCH9_2 | AARCH64_FL_FOR_ARCH8_8 | AARCH64_FL_V9_3A) + + /* Macros to test ISA flags. */ + +@@ -297,7 +297,7 @@ + #define AARCH64_ISA_SIMD (aarch64_isa_flags & AARCH64_FL_SIMD) + #define AARCH64_ISA_LSE (aarch64_isa_flags & AARCH64_FL_LSE) + #define AARCH64_ISA_RDMA (aarch64_isa_flags & AARCH64_FL_RDMA) +-#define AARCH64_ISA_V8_2A (aarch64_isa_flags & AARCH64_FL_V8_2) ++#define AARCH64_ISA_V8_2A (aarch64_isa_flags & AARCH64_FL_V8_2A) + #define AARCH64_ISA_F16 (aarch64_isa_flags & AARCH64_FL_F16) + #define AARCH64_ISA_SVE (aarch64_isa_flags & AARCH64_FL_SVE) + #define AARCH64_ISA_SVE2 (aarch64_isa_flags & AARCH64_FL_SVE2) +@@ -305,31 +305,31 @@ + #define AARCH64_ISA_SVE2_BITPERM (aarch64_isa_flags & AARCH64_FL_SVE2_BITPERM) + #define AARCH64_ISA_SVE2_SHA3 (aarch64_isa_flags & AARCH64_FL_SVE2_SHA3) + #define AARCH64_ISA_SVE2_SM4 (aarch64_isa_flags & AARCH64_FL_SVE2_SM4) +-#define AARCH64_ISA_V8_3A (aarch64_isa_flags & AARCH64_FL_V8_3) ++#define AARCH64_ISA_V8_3A (aarch64_isa_flags & AARCH64_FL_V8_3A) + #define AARCH64_ISA_DOTPROD (aarch64_isa_flags & AARCH64_FL_DOTPROD) + #define AARCH64_ISA_AES (aarch64_isa_flags & AARCH64_FL_AES) + #define AARCH64_ISA_SHA2 (aarch64_isa_flags & AARCH64_FL_SHA2) +-#define AARCH64_ISA_V8_4A (aarch64_isa_flags & AARCH64_FL_V8_4) ++#define AARCH64_ISA_V8_4A (aarch64_isa_flags & AARCH64_FL_V8_4A) + #define AARCH64_ISA_SM4 (aarch64_isa_flags & AARCH64_FL_SM4) + #define AARCH64_ISA_SHA3 (aarch64_isa_flags & AARCH64_FL_SHA3) + #define AARCH64_ISA_F16FML (aarch64_isa_flags & AARCH64_FL_F16FML) + #define AARCH64_ISA_RCPC8_4 (aarch64_isa_flags & AARCH64_FL_RCPC8_4) + #define AARCH64_ISA_RNG (aarch64_isa_flags & AARCH64_FL_RNG) +-#define AARCH64_ISA_V8_5A (aarch64_isa_flags & AARCH64_FL_V8_5) ++#define AARCH64_ISA_V8_5A (aarch64_isa_flags & AARCH64_FL_V8_5A) + #define AARCH64_ISA_TME (aarch64_isa_flags & AARCH64_FL_TME) + #define AARCH64_ISA_MEMTAG (aarch64_isa_flags & AARCH64_FL_MEMTAG) +-#define AARCH64_ISA_V8_6A (aarch64_isa_flags & AARCH64_FL_V8_6) ++#define AARCH64_ISA_V8_6A (aarch64_isa_flags & AARCH64_FL_V8_6A) + #define AARCH64_ISA_I8MM (aarch64_isa_flags & AARCH64_FL_I8MM) + #define AARCH64_ISA_F32MM (aarch64_isa_flags & AARCH64_FL_F32MM) + #define AARCH64_ISA_F64MM (aarch64_isa_flags & AARCH64_FL_F64MM) + #define AARCH64_ISA_BF16 (aarch64_isa_flags & AARCH64_FL_BF16) + #define AARCH64_ISA_SB (aarch64_isa_flags & AARCH64_FL_SB) +-#define AARCH64_ISA_V8R (aarch64_isa_flags & AARCH64_FL_V8_R) ++#define AARCH64_ISA_V8R (aarch64_isa_flags & AARCH64_FL_V8R) + #define AARCH64_ISA_PAUTH (aarch64_isa_flags & AARCH64_FL_PAUTH) +-#define AARCH64_ISA_V9A (aarch64_isa_flags & AARCH64_FL_V9) +-#define AARCH64_ISA_V9_1A (aarch64_isa_flags & AARCH64_FL_V9_1) +-#define AARCH64_ISA_V9_2A (aarch64_isa_flags & AARCH64_FL_V9_2) +-#define AARCH64_ISA_V9_3A (aarch64_isa_flags & AARCH64_FL_V9_3) ++#define AARCH64_ISA_V9A (aarch64_isa_flags & AARCH64_FL_V9A) ++#define AARCH64_ISA_V9_1A (aarch64_isa_flags & AARCH64_FL_V9_1A) ++#define AARCH64_ISA_V9_2A (aarch64_isa_flags & AARCH64_FL_V9_2A) ++#define AARCH64_ISA_V9_3A (aarch64_isa_flags & AARCH64_FL_V9_3A) + #define AARCH64_ISA_MOPS (aarch64_isa_flags & AARCH64_FL_MOPS) + #define AARCH64_ISA_LS64 (aarch64_isa_flags & AARCH64_FL_LS64) + +-- +2.33.0 +
View file
_service:tar_scm:0110-LoongArch-Redundant-sign-extension-elimination-optim.patch
Added
@@ -0,0 +1,234 @@ +From 54786cec1f52854a70369a3060ed22b1e070f000 Mon Sep 17 00:00:00 2001 +From: Li Wei <liwei@loongson.cn> +Date: Thu, 11 Jan 2024 19:36:19 +0800 +Subject: PATCH 110/188 LoongArch: Redundant sign extension elimination + optimization. + +We found that the current combine optimization pass in gcc cannot handle +the following redundant sign extension situations: + +(insn 77 76 78 5 (set (reg:SI 143) + (plus:SI (subreg/s/u:SI (reg/v:DI 104 len ) 0) + (const_int 1 0x1))) {addsi3} + (expr_list:REG_DEAD (reg/v:DI 104 len ) + (nil))) +(insn 78 77 82 5 (set (reg/v:DI 104 len ) + (sign_extend:DI (reg:SI 143))) {extendsidi2} + (nil)) + +Because reg:SI 143 is not died or set in insn 78, no replacement merge will +be performed for the insn sequence. We adjusted the add template to eliminate +redundant sign extensions during the expand pass. +Adjusted based on upstream comments: +https://gcc.gnu.org/pipermail/gcc-patches/2024-January/641988.html + +gcc/ChangeLog: + + * config/loongarch/loongarch.md (add<mode>3): Removed. + (*addsi3): New. + (addsi3): Ditto. + (adddi3): Ditto. + (*addsi3_extended): Removed. + (addsi3_extended): New. + +gcc/testsuite/ChangeLog: + + * gcc.target/loongarch/sign-extend.c: Moved to... + * gcc.target/loongarch/sign-extend-1.c: ...here. + * gcc.target/loongarch/sign-extend-2.c: New test. +--- + gcc/config/loongarch/loongarch.md | 93 ++++++++++++++----- + .../{sign-extend.c => sign-extend-1.c} | 0 + .../gcc.target/loongarch/sign-extend-2.c | 59 ++++++++++++ + 3 files changed, 128 insertions(+), 24 deletions(-) + rename gcc/testsuite/gcc.target/loongarch/{sign-extend.c => sign-extend-1.c} (100%) + create mode 100644 gcc/testsuite/gcc.target/loongarch/sign-extend-2.c + +diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md +index 6ebf33cbe..4c7e28ace 100644 +--- a/gcc/config/loongarch/loongarch.md ++++ b/gcc/config/loongarch/loongarch.md +@@ -657,42 +657,87 @@ + (set_attr "type" "fadd") + (set_attr "mode" "<UNITMODE>")) + +-(define_insn_and_split "add<mode>3" +- (set (match_operand:GPR 0 "register_operand" "=r,r,r,r,r,r,r") +- (plus:GPR (match_operand:GPR 1 "register_operand" "r,r,r,r,r,r,r") +- (match_operand:GPR 2 "plus_<mode>_operand" +- "r,I,La,Lb,Lc,Ld,Le"))) ++(define_insn_and_split "*addsi3" ++ (set (match_operand:SI 0 "register_operand" "=r,r,r,r,r") ++ (plus:SI (match_operand:SI 1 "register_operand" "r,r,r,r,r") ++ (match_operand:SI 2 "plus_si_operand" ++ "r,I,La,Lb,Le"))) + "" + "@ +- add.<d>\t%0,%1,%2 +- addi.<d>\t%0,%1,%2 ++ add.w\t%0,%1,%2 ++ addi.w\t%0,%1,%2 + # + * operands2 = GEN_INT (INTVAL (operands2) / 65536); \ + return \"addu16i.d\t%0,%1,%2\"; ++ #" ++ "CONST_INT_P (operands2) && !IMM12_INT (operands2) \ ++ && !ADDU16I_OPERAND (INTVAL (operands2))" ++ (set (match_dup 0) (plus:SI (match_dup 1) (match_dup 3))) ++ (set (match_dup 0) (plus:SI (match_dup 0) (match_dup 4))) ++ { ++ loongarch_split_plus_constant (&operands2, SImode); ++ } ++ (set_attr "alu_type" "add") ++ (set_attr "mode" "SI") ++ (set_attr "insn_count" "1,1,2,1,2")) ++ ++(define_expand "addsi3" ++ (set (match_operand:SI 0 "register_operand" "=r,r,r,r,r") ++ (plus:SI (match_operand:SI 1 "register_operand" "r,r,r,r,r") ++ (match_operand:SI 2 "plus_si_operand" "r,I,La,Le,Lb"))) ++ "TARGET_64BIT" ++{ ++ if (CONST_INT_P (operands2) && !IMM12_INT (operands2) ++ && ADDU16I_OPERAND (INTVAL (operands2))) ++ { ++ rtx t1 = gen_reg_rtx (DImode); ++ rtx t2 = gen_reg_rtx (DImode); ++ rtx t3 = gen_reg_rtx (DImode); ++ emit_insn (gen_extend_insn (t1, operands1, DImode, SImode, 0)); ++ t2 = operands2; ++ emit_insn (gen_adddi3 (t3, t1, t2)); ++ t3 = gen_lowpart (SImode, t3); ++ emit_move_insn (operands0, t3); ++ DONE; ++ } ++ else ++ { ++ rtx t = gen_reg_rtx (DImode); ++ emit_insn (gen_addsi3_extended (t, operands1, operands2)); ++ t = gen_lowpart (SImode, t); ++ SUBREG_PROMOTED_VAR_P (t) = 1; ++ SUBREG_PROMOTED_SET (t, SRP_SIGNED); ++ emit_move_insn (operands0, t); ++ DONE; ++ } ++}) ++ ++(define_insn_and_split "adddi3" ++ (set (match_operand:DI 0 "register_operand" "=r,r,r,r,r,r") ++ (plus:DI (match_operand:DI 1 "register_operand" "r,r,r,r,r,r") ++ (match_operand:DI 2 "plus_di_operand" ++ "r,I,La,Lb,Lc,Ld"))) ++ "TARGET_64BIT" ++ "@ ++ add.d\t%0,%1,%2 ++ addi.d\t%0,%1,%2 + # ++ * operands2 = GEN_INT (INTVAL (operands2) / 65536); \ ++ return \"addu16i.d\t%0,%1,%2\"; + # + #" +- "CONST_INT_P (operands2) && !IMM12_INT (operands2) \ ++ "&& CONST_INT_P (operands2) && !IMM12_INT (operands2) \ + && !ADDU16I_OPERAND (INTVAL (operands2))" +- (set (match_dup 0) (plus:GPR (match_dup 1) (match_dup 3))) +- (set (match_dup 0) (plus:GPR (match_dup 0) (match_dup 4))) ++ (set (match_dup 0) (plus:DI (match_dup 1) (match_dup 3))) ++ (set (match_dup 0) (plus:DI (match_dup 0) (match_dup 4))) + { +- loongarch_split_plus_constant (&operands2, <MODE>mode); ++ loongarch_split_plus_constant (&operands2, DImode); + } + (set_attr "alu_type" "add") +- (set_attr "mode" "<MODE>") +- (set_attr "insn_count" "1,1,2,1,2,2,2") +- (set (attr "enabled") +- (cond +- (match_test "<MODE>mode != DImode && which_alternative == 4") +- (const_string "no") +- (match_test "<MODE>mode != DImode && which_alternative == 5") +- (const_string "no") +- (match_test "<MODE>mode != SImode && which_alternative == 6") +- (const_string "no") +- (const_string "yes")))) +- +-(define_insn_and_split "*addsi3_extended" ++ (set_attr "mode" "DI") ++ (set_attr "insn_count" "1,1,2,1,2,2")) ++ ++(define_insn_and_split "addsi3_extended" + (set (match_operand:DI 0 "register_operand" "=r,r,r,r") + (sign_extend:DI + (plus:SI (match_operand:SI 1 "register_operand" "r,r,r,r") +diff --git a/gcc/testsuite/gcc.target/loongarch/sign-extend.c b/gcc/testsuite/gcc.target/loongarch/sign-extend-1.c +similarity index 100% +rename from gcc/testsuite/gcc.target/loongarch/sign-extend.c +rename to gcc/testsuite/gcc.target/loongarch/sign-extend-1.c +diff --git a/gcc/testsuite/gcc.target/loongarch/sign-extend-2.c b/gcc/testsuite/gcc.target/loongarch/sign-extend-2.c +new file mode 100644 +index 000000000..a45dde4f7 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/sign-extend-2.c +@@ -0,0 +1,59 @@ ++/* { dg-do compile } */ ++/* { dg-options "-mabi=lp64d -O2" } */ ++/* { dg-final { scan-assembler-times "slli.w\t\\\$r\0-9\+,\\\$r\0-9\+,0" 1 } } */ ++ ++#include <stdint.h> ++#define my_min(x, y) ((x) < (y) ? (x) : (y)) ++ ++void ++bt_skip_func (const uint32_t len_limit, const uint32_t pos, ++ const uint8_t *const cur, uint32_t cur_match, ++ uint32_t *const son, const uint32_t cyclic_pos, ++ const uint32_t cyclic_size) ++{ ++ uint32_t *ptr0 = son + (cyclic_pos << 1) + 1; ++ uint32_t *ptr1 = son + (cyclic_pos << 1); ++ ++ uint32_t len0 = 0; ++ uint32_t len1 = 0; ++ ++ while (1) ++ { ++ const uint32_t delta = pos - cur_match; ++ uint32_t *pair ++ = son ++ + ((cyclic_pos - delta + (delta > cyclic_pos ? cyclic_size : 0)) ++ << 1); ++ const uint8_t *pb = cur - delta; ++ uint32_t len = my_min (len0, len1); ++ ++ if (pblen == curlen) ++ { ++ while (++len != len_limit) ++ if (pblen != curlen) ++ break; ++ ++ if (len == len_limit) ++ { ++ *ptr1 = pair0; ++ *ptr0 = pair1; ++ return; ++ } ++ } ++ ++ if (pblen < curlen) ++ { ++ *ptr1 = cur_match; ++ ptr1 = pair + 1; ++ cur_match = *ptr1; ++ len1 = len; ++ } ++ else ++ { ++ *ptr0 = cur_match; ++ ptr0 = pair; ++ cur_match = *ptr0; ++ len0 = len; ++ } ++ } ++} +-- +2.43.0 +
View file
_service:tar_scm:0110-aarch64-Tweak-stack-clash-boundary-condition.patch
Deleted
@@ -1,125 +0,0 @@ -From f22315d5c19e8310e4dc880fd509678fd291fca8 Mon Sep 17 00:00:00 2001 -From: Richard Sandiford <richard.sandiford@arm.com> -Date: Tue, 12 Sep 2023 16:08:54 +0100 -Subject: PATCH aarch64: Tweak stack clash boundary condition - -The AArch64 ABI says that, when stack clash protection is used, -there can be a maximum of 1KiB of unprobed space at sp on entry -to a function. Therefore, we need to probe when allocating ->= guard_size - 1KiB of data (>= rather than >). This is what -GCC does. - -If an allocation is exactly guard_size bytes, it is enough to allocate -those bytes and probe once at offset 1024. It isn't possible to use a -single probe at any other offset: higher would conmplicate later code, -by leaving more unprobed space than usual, while lower would risk -leaving an entire page unprobed. For simplicity, the code probes all -allocations at offset 1024. - -Some register saves also act as probes. If we need to allocate -more space below the last such register save probe, we need to -probe the allocation if it is > 1KiB. Again, this allocation is -then sometimes (but not always) probed at offset 1024. This sort of -allocation is currently only used for outgoing arguments, which are -rarely this big. - -However, the code also probed if this final outgoing-arguments -allocation was == 1KiB, rather than just > 1KiB. This isn't -necessary, since the register save then probes at offset 1024 -as required. Continuing to probe allocations of exactly 1KiB -would complicate later patches. - -gcc/ - * config/aarch64/aarch64.cc (aarch64_allocate_and_probe_stack_space): - Don't probe final allocations that are exactly 1KiB in size (after - unprobed space above the final allocation has been deducted). - -gcc/testsuite/ - * gcc.target/aarch64/stack-check-prologue-17.c: New test. ---- - gcc/config/aarch64/aarch64.cc | 4 +- - .../aarch64/stack-check-prologue-17.c | 55 +++++++++++++++++++ - 2 files changed, 58 insertions(+), 1 deletion(-) - create mode 100644 gcc/testsuite/gcc.target/aarch64/stack-check-prologue-17.c - -diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc -index 4d9fcf3d1623..34c1d8614cd9 100644 ---- a/gcc/config/aarch64/aarch64.cc -+++ b/gcc/config/aarch64/aarch64.cc -@@ -9333,9 +9333,11 @@ aarch64_allocate_and_probe_stack_space (rtx temp1, rtx temp2, - HOST_WIDE_INT guard_size - = 1 << param_stack_clash_protection_guard_size; - HOST_WIDE_INT guard_used_by_caller = STACK_CLASH_CALLER_GUARD; -+ HOST_WIDE_INT byte_sp_alignment = STACK_BOUNDARY / BITS_PER_UNIT; -+ gcc_assert (multiple_p (poly_size, byte_sp_alignment)); - HOST_WIDE_INT min_probe_threshold - = (final_adjustment_p -- ? guard_used_by_caller -+ ? guard_used_by_caller + byte_sp_alignment - : guard_size - guard_used_by_caller); - /* When doing the final adjustment for the outgoing arguments, take into - account any unprobed space there is above the current SP. There are -diff --git a/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-17.c b/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-17.c -new file mode 100644 -index 000000000000..0d8a25d73a24 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-17.c -@@ -0,0 +1,55 @@ -+/* { dg-options "-O2 -fstack-clash-protection -fomit-frame-pointer --param stack-clash-protection-guard-size=12" } */ -+/* { dg-final { check-function-bodies "**" "" } } */ -+ -+void f(int, ...); -+void g(); -+ -+/* -+** test1: -+** ... -+** str x30, \sp\ -+** sub sp, sp, #1024 -+** cbnz w0, .* -+** bl g -+** ... -+*/ -+int test1(int z) { -+ __uint128_t x = 0; -+ int y0x400; -+ if (z) -+ { -+ f(0, 0, 0, 0, 0, 0, 0, &y, -+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, -+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, -+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, -+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x); -+ } -+ g(); -+ return 1; -+} -+ -+/* -+** test2: -+** ... -+** str x30, \sp\ -+** sub sp, sp, #1040 -+** str xzr, \sp\ -+** cbnz w0, .* -+** bl g -+** ... -+*/ -+int test2(int z) { -+ __uint128_t x = 0; -+ int y0x400; -+ if (z) -+ { -+ f(0, 0, 0, 0, 0, 0, 0, &y, -+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, -+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, -+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, -+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, -+ x); -+ } -+ g(); -+ return 1; -+} --- -2.43.5 -
View file
_service:tar_scm:0111-Backport-SME-aarch64-Rename-AARCH64_FL_FOR_ARCH-macr.patch
Added
@@ -0,0 +1,398 @@ +From 7da27deb7413d7d1fd2c543617640e2de5b10db0 Mon Sep 17 00:00:00 2001 +From: Richard Sandiford <richard.sandiford@arm.com> +Date: Thu, 29 Sep 2022 11:32:51 +0100 +Subject: PATCH 012/157 BackportSME aarch64: Rename AARCH64_FL_FOR_ARCH + macros + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=0f833d1900176509e16b6f5563cfe58508fef5d2 + +This patch renames AARCH64_FL_FOR_ARCH* macros to follow the +same V<number><profile> names that we (now) use elsewhere. + +The names are only temporary -- a later patch will move the +information to the .def file instead. However, it helps with +the sequencing to do this first. + +gcc/ + * config/aarch64/aarch64.h (AARCH64_FL_FOR_ARCH8): Rename to... + (AARCH64_FL_FOR_V8A): ...this. + (AARCH64_FL_FOR_ARCH8_1): Rename to... + (AARCH64_FL_FOR_V8_1A): ...this. + (AARCH64_FL_FOR_ARCH8_2): Rename to... + (AARCH64_FL_FOR_V8_2A): ...this. + (AARCH64_FL_FOR_ARCH8_3): Rename to... + (AARCH64_FL_FOR_V8_3A): ...this. + (AARCH64_FL_FOR_ARCH8_4): Rename to... + (AARCH64_FL_FOR_V8_4A): ...this. + (AARCH64_FL_FOR_ARCH8_5): Rename to... + (AARCH64_FL_FOR_V8_5A): ...this. + (AARCH64_FL_FOR_ARCH8_6): Rename to... + (AARCH64_FL_FOR_V8_6A): ...this. + (AARCH64_FL_FOR_ARCH8_7): Rename to... + (AARCH64_FL_FOR_V8_7A): ...this. + (AARCH64_FL_FOR_ARCH8_8): Rename to... + (AARCH64_FL_FOR_V8_8A): ...this. + (AARCH64_FL_FOR_ARCH8_R): Rename to... + (AARCH64_FL_FOR_V8R): ...this. + (AARCH64_FL_FOR_ARCH9): Rename to... + (AARCH64_FL_FOR_V9A): ...this. + (AARCH64_FL_FOR_ARCH9_1): Rename to... + (AARCH64_FL_FOR_V9_1A): ...this. + (AARCH64_FL_FOR_ARCH9_2): Rename to... + (AARCH64_FL_FOR_V9_2A): ...this. + (AARCH64_FL_FOR_ARCH9_3): Rename to... + (AARCH64_FL_FOR_V9_3A): ...this. + * common/config/aarch64/aarch64-common.cc (all_cores): Update + accordingly. + * config/aarch64/aarch64-arches.def: Likewise. + * config/aarch64/aarch64-cores.def: Likewise. + * config/aarch64/aarch64.cc (all_cores): Likewise. +--- + gcc/common/config/aarch64/aarch64-common.cc | 2 +- + gcc/config/aarch64/aarch64-arches.def | 28 ++--- + gcc/config/aarch64/aarch64-cores.def | 130 ++++++++++---------- + gcc/config/aarch64/aarch64.cc | 2 +- + gcc/config/aarch64/aarch64.h | 56 ++++----- + 5 files changed, 109 insertions(+), 109 deletions(-) + +diff --git a/gcc/common/config/aarch64/aarch64-common.cc b/gcc/common/config/aarch64/aarch64-common.cc +index 3dc020f0c..0461201a5 100644 +--- a/gcc/common/config/aarch64/aarch64-common.cc ++++ b/gcc/common/config/aarch64/aarch64-common.cc +@@ -253,7 +253,7 @@ static const struct processor_name_to_arch all_cores = + #define AARCH64_CORE(NAME, X, IDENT, ARCH_IDENT, FLAGS, COSTS, IMP, PART, VARIANT) \ + {NAME, AARCH64_ARCH_##ARCH_IDENT, FLAGS}, + #include "config/aarch64/aarch64-cores.def" +- {"generic", AARCH64_ARCH_8A, AARCH64_FL_FOR_ARCH8}, ++ {"generic", AARCH64_ARCH_8A, AARCH64_FL_FOR_V8A}, + {"", aarch64_no_arch, 0} + }; + +diff --git a/gcc/config/aarch64/aarch64-arches.def b/gcc/config/aarch64/aarch64-arches.def +index 6150448dc..c6bf7d82c 100644 +--- a/gcc/config/aarch64/aarch64-arches.def ++++ b/gcc/config/aarch64/aarch64-arches.def +@@ -30,19 +30,19 @@ + Due to the assumptions about the positions of these fields in config.gcc, + the NAME should be kept as the first argument and FLAGS as the last. */ + +-AARCH64_ARCH("armv8-a", generic, 8A, 8, AARCH64_FL_FOR_ARCH8) +-AARCH64_ARCH("armv8.1-a", generic, 8_1A, 8, AARCH64_FL_FOR_ARCH8_1) +-AARCH64_ARCH("armv8.2-a", generic, 8_2A, 8, AARCH64_FL_FOR_ARCH8_2) +-AARCH64_ARCH("armv8.3-a", generic, 8_3A, 8, AARCH64_FL_FOR_ARCH8_3) +-AARCH64_ARCH("armv8.4-a", generic, 8_4A, 8, AARCH64_FL_FOR_ARCH8_4) +-AARCH64_ARCH("armv8.5-a", generic, 8_5A, 8, AARCH64_FL_FOR_ARCH8_5) +-AARCH64_ARCH("armv8.6-a", generic, 8_6A, 8, AARCH64_FL_FOR_ARCH8_6) +-AARCH64_ARCH("armv8.7-a", generic, 8_7A, 8, AARCH64_FL_FOR_ARCH8_7) +-AARCH64_ARCH("armv8.8-a", generic, 8_8A, 8, AARCH64_FL_FOR_ARCH8_8) +-AARCH64_ARCH("armv8-r", generic, 8R , 8, AARCH64_FL_FOR_ARCH8_R) +-AARCH64_ARCH("armv9-a", generic, 9A , 9, AARCH64_FL_FOR_ARCH9) +-AARCH64_ARCH("armv9.1-a", generic, 9_1A, 9, AARCH64_FL_FOR_ARCH9_1) +-AARCH64_ARCH("armv9.2-a", generic, 9_2A, 9, AARCH64_FL_FOR_ARCH9_2) +-AARCH64_ARCH("armv9.3-a", generic, 9_3A, 9, AARCH64_FL_FOR_ARCH9_3) ++AARCH64_ARCH("armv8-a", generic, 8A, 8, AARCH64_FL_FOR_V8A) ++AARCH64_ARCH("armv8.1-a", generic, 8_1A, 8, AARCH64_FL_FOR_V8_1A) ++AARCH64_ARCH("armv8.2-a", generic, 8_2A, 8, AARCH64_FL_FOR_V8_2A) ++AARCH64_ARCH("armv8.3-a", generic, 8_3A, 8, AARCH64_FL_FOR_V8_3A) ++AARCH64_ARCH("armv8.4-a", generic, 8_4A, 8, AARCH64_FL_FOR_V8_4A) ++AARCH64_ARCH("armv8.5-a", generic, 8_5A, 8, AARCH64_FL_FOR_V8_5A) ++AARCH64_ARCH("armv8.6-a", generic, 8_6A, 8, AARCH64_FL_FOR_V8_6A) ++AARCH64_ARCH("armv8.7-a", generic, 8_7A, 8, AARCH64_FL_FOR_V8_7A) ++AARCH64_ARCH("armv8.8-a", generic, 8_8A, 8, AARCH64_FL_FOR_V8_8A) ++AARCH64_ARCH("armv8-r", generic, 8R , 8, AARCH64_FL_FOR_V8R) ++AARCH64_ARCH("armv9-a", generic, 9A , 9, AARCH64_FL_FOR_V9A) ++AARCH64_ARCH("armv9.1-a", generic, 9_1A, 9, AARCH64_FL_FOR_V9_1A) ++AARCH64_ARCH("armv9.2-a", generic, 9_2A, 9, AARCH64_FL_FOR_V9_2A) ++AARCH64_ARCH("armv9.3-a", generic, 9_3A, 9, AARCH64_FL_FOR_V9_3A) + + #undef AARCH64_ARCH +diff --git a/gcc/config/aarch64/aarch64-cores.def b/gcc/config/aarch64/aarch64-cores.def +index 0402bfb74..c4038c641 100644 +--- a/gcc/config/aarch64/aarch64-cores.def ++++ b/gcc/config/aarch64/aarch64-cores.def +@@ -46,132 +46,132 @@ + /* ARMv8-A Architecture Processors. */ + + /* ARM ('A') cores. */ +-AARCH64_CORE("cortex-a34", cortexa34, cortexa53, 8A, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa35, 0x41, 0xd02, -1) +-AARCH64_CORE("cortex-a35", cortexa35, cortexa53, 8A, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa35, 0x41, 0xd04, -1) +-AARCH64_CORE("cortex-a53", cortexa53, cortexa53, 8A, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa53, 0x41, 0xd03, -1) +-AARCH64_CORE("cortex-a57", cortexa57, cortexa57, 8A, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa57, 0x41, 0xd07, -1) +-AARCH64_CORE("cortex-a72", cortexa72, cortexa57, 8A, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa72, 0x41, 0xd08, -1) +-AARCH64_CORE("cortex-a73", cortexa73, cortexa57, 8A, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa73, 0x41, 0xd09, -1) ++AARCH64_CORE("cortex-a34", cortexa34, cortexa53, 8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC, cortexa35, 0x41, 0xd02, -1) ++AARCH64_CORE("cortex-a35", cortexa35, cortexa53, 8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC, cortexa35, 0x41, 0xd04, -1) ++AARCH64_CORE("cortex-a53", cortexa53, cortexa53, 8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC, cortexa53, 0x41, 0xd03, -1) ++AARCH64_CORE("cortex-a57", cortexa57, cortexa57, 8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC, cortexa57, 0x41, 0xd07, -1) ++AARCH64_CORE("cortex-a72", cortexa72, cortexa57, 8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC, cortexa72, 0x41, 0xd08, -1) ++AARCH64_CORE("cortex-a73", cortexa73, cortexa57, 8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC, cortexa73, 0x41, 0xd09, -1) + + /* Cavium ('C') cores. */ +-AARCH64_CORE("thunderx", thunderx, thunderx, 8A, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx, 0x43, 0x0a0, -1) ++AARCH64_CORE("thunderx", thunderx, thunderx, 8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx, 0x43, 0x0a0, -1) + /* Do not swap around "thunderxt88p1" and "thunderxt88", + this order is required to handle variant correctly. */ +-AARCH64_CORE("thunderxt88p1", thunderxt88p1, thunderx, 8A, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderxt88, 0x43, 0x0a1, 0) +-AARCH64_CORE("thunderxt88", thunderxt88, thunderx, 8A, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderxt88, 0x43, 0x0a1, -1) ++AARCH64_CORE("thunderxt88p1", thunderxt88p1, thunderx, 8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderxt88, 0x43, 0x0a1, 0) ++AARCH64_CORE("thunderxt88", thunderxt88, thunderx, 8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderxt88, 0x43, 0x0a1, -1) + + /* OcteonTX is the official name for T81/T83. */ +-AARCH64_CORE("octeontx", octeontx, thunderx, 8A, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx, 0x43, 0x0a0, -1) +-AARCH64_CORE("octeontx81", octeontxt81, thunderx, 8A, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx, 0x43, 0x0a2, -1) +-AARCH64_CORE("octeontx83", octeontxt83, thunderx, 8A, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx, 0x43, 0x0a3, -1) ++AARCH64_CORE("octeontx", octeontx, thunderx, 8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx, 0x43, 0x0a0, -1) ++AARCH64_CORE("octeontx81", octeontxt81, thunderx, 8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx, 0x43, 0x0a2, -1) ++AARCH64_CORE("octeontx83", octeontxt83, thunderx, 8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx, 0x43, 0x0a3, -1) + +-AARCH64_CORE("thunderxt81", thunderxt81, thunderx, 8A, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx, 0x43, 0x0a2, -1) +-AARCH64_CORE("thunderxt83", thunderxt83, thunderx, 8A, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx, 0x43, 0x0a3, -1) ++AARCH64_CORE("thunderxt81", thunderxt81, thunderx, 8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx, 0x43, 0x0a2, -1) ++AARCH64_CORE("thunderxt83", thunderxt83, thunderx, 8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx, 0x43, 0x0a3, -1) + + /* Ampere Computing ('\xC0') cores. */ +-AARCH64_CORE("ampere1", ampere1, cortexa57, 8_6A, AARCH64_FL_FOR_ARCH8_6, ampere1, 0xC0, 0xac3, -1) ++AARCH64_CORE("ampere1", ampere1, cortexa57, 8_6A, AARCH64_FL_FOR_V8_6A, ampere1, 0xC0, 0xac3, -1) + /* Do not swap around "emag" and "xgene1", + this order is required to handle variant correctly. */ +-AARCH64_CORE("emag", emag, xgene1, 8A, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, emag, 0x50, 0x000, 3) ++AARCH64_CORE("emag", emag, xgene1, 8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, emag, 0x50, 0x000, 3) + + /* APM ('P') cores. */ +-AARCH64_CORE("xgene1", xgene1, xgene1, 8A, AARCH64_FL_FOR_ARCH8, xgene1, 0x50, 0x000, -1) ++AARCH64_CORE("xgene1", xgene1, xgene1, 8A, AARCH64_FL_FOR_V8A, xgene1, 0x50, 0x000, -1) + + /* Qualcomm ('Q') cores. */ +-AARCH64_CORE("falkor", falkor, falkor, 8A, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO | AARCH64_FL_RDMA, qdf24xx, 0x51, 0xC00, -1) +-AARCH64_CORE("qdf24xx", qdf24xx, falkor, 8A, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO | AARCH64_FL_RDMA, qdf24xx, 0x51, 0xC00, -1) ++AARCH64_CORE("falkor", falkor, falkor, 8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO | AARCH64_FL_RDMA, qdf24xx, 0x51, 0xC00, -1) ++AARCH64_CORE("qdf24xx", qdf24xx, falkor, 8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO | AARCH64_FL_RDMA, qdf24xx, 0x51, 0xC00, -1) + + /* Samsung ('S') cores. */ +-AARCH64_CORE("exynos-m1", exynosm1, exynosm1, 8A, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, exynosm1, 0x53, 0x001, -1) ++AARCH64_CORE("exynos-m1", exynosm1, exynosm1, 8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, exynosm1, 0x53, 0x001, -1) + + /* HXT ('h') cores. */ +-AARCH64_CORE("phecda", phecda, falkor, 8A, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, qdf24xx, 0x68, 0x000, -1) ++AARCH64_CORE("phecda", phecda, falkor, 8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, qdf24xx, 0x68, 0x000, -1) + + /* ARMv8.1-A Architecture Processors. */ + + /* Broadcom ('B') cores. */ +-AARCH64_CORE("thunderx2t99p1", thunderx2t99p1, thunderx2t99, 8_1A, AARCH64_FL_FOR_ARCH8_1 | AARCH64_FL_CRYPTO, thunderx2t99, 0x42, 0x516, -1) +-AARCH64_CORE("vulcan", vulcan, thunderx2t99, 8_1A, AARCH64_FL_FOR_ARCH8_1 | AARCH64_FL_CRYPTO, thunderx2t99, 0x42, 0x516, -1) ++AARCH64_CORE("thunderx2t99p1", thunderx2t99p1, thunderx2t99, 8_1A, AARCH64_FL_FOR_V8_1A | AARCH64_FL_CRYPTO, thunderx2t99, 0x42, 0x516, -1) ++AARCH64_CORE("vulcan", vulcan, thunderx2t99, 8_1A, AARCH64_FL_FOR_V8_1A | AARCH64_FL_CRYPTO, thunderx2t99, 0x42, 0x516, -1) + + /* Cavium ('C') cores. */ +-AARCH64_CORE("thunderx2t99", thunderx2t99, thunderx2t99, 8_1A, AARCH64_FL_FOR_ARCH8_1 | AARCH64_FL_CRYPTO, thunderx2t99, 0x43, 0x0af, -1) ++AARCH64_CORE("thunderx2t99", thunderx2t99, thunderx2t99, 8_1A, AARCH64_FL_FOR_V8_1A | AARCH64_FL_CRYPTO, thunderx2t99, 0x43, 0x0af, -1) + + /* ARMv8.2-A Architecture Processors. */ + + /* ARM ('A') cores. */ +-AARCH64_CORE("cortex-a55", cortexa55, cortexa53, 8_2A, AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD, cortexa53, 0x41, 0xd05, -1) +-AARCH64_CORE("cortex-a75", cortexa75, cortexa57, 8_2A, AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD, cortexa73, 0x41, 0xd0a, -1) +-AARCH64_CORE("cortex-a76", cortexa76, cortexa57, 8_2A, AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD, neoversen1, 0x41, 0xd0b, -1) +-AARCH64_CORE("cortex-a76ae", cortexa76ae, cortexa57, 8_2A, AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS, neoversen1, 0x41, 0xd0e, -1) +-AARCH64_CORE("cortex-a77", cortexa77, cortexa57, 8_2A, AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS, neoversen1, 0x41, 0xd0d, -1) +-AARCH64_CORE("cortex-a78", cortexa78, cortexa57, 8_2A, AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS | AARCH64_FL_PROFILE, neoversen1, 0x41, 0xd41, -1) +-AARCH64_CORE("cortex-a78ae", cortexa78ae, cortexa57, 8_2A, AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS | AARCH64_FL_PROFILE, neoversen1, 0x41, 0xd42, -1) +-AARCH64_CORE("cortex-a78c", cortexa78c, cortexa57, 8_2A, AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS | AARCH64_FL_PROFILE | AARCH64_FL_FLAGM | AARCH64_FL_PAUTH, neoversen1, 0x41, 0xd4b, -1) +-AARCH64_CORE("cortex-a65", cortexa65, cortexa53, 8_2A, AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS, cortexa73, 0x41, 0xd06, -1) +-AARCH64_CORE("cortex-a65ae", cortexa65ae, cortexa53, 8_2A, AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS, cortexa73, 0x41, 0xd43, -1) +-AARCH64_CORE("cortex-x1", cortexx1, cortexa57, 8_2A, AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS | AARCH64_FL_PROFILE, neoversen1, 0x41, 0xd44, -1) +-AARCH64_CORE("ares", ares, cortexa57, 8_2A, AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_PROFILE, neoversen1, 0x41, 0xd0c, -1) +-AARCH64_CORE("neoverse-n1", neoversen1, cortexa57, 8_2A, AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_PROFILE, neoversen1, 0x41, 0xd0c, -1) +-AARCH64_CORE("neoverse-e1", neoversee1, cortexa53, 8_2A, AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS, cortexa73, 0x41, 0xd4a, -1) ++AARCH64_CORE("cortex-a55", cortexa55, cortexa53, 8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD, cortexa53, 0x41, 0xd05, -1) ++AARCH64_CORE("cortex-a75", cortexa75, cortexa57, 8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD, cortexa73, 0x41, 0xd0a, -1) ++AARCH64_CORE("cortex-a76", cortexa76, cortexa57, 8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD, neoversen1, 0x41, 0xd0b, -1) ++AARCH64_CORE("cortex-a76ae", cortexa76ae, cortexa57, 8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS, neoversen1, 0x41, 0xd0e, -1) ++AARCH64_CORE("cortex-a77", cortexa77, cortexa57, 8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS, neoversen1, 0x41, 0xd0d, -1) ++AARCH64_CORE("cortex-a78", cortexa78, cortexa57, 8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS | AARCH64_FL_PROFILE, neoversen1, 0x41, 0xd41, -1) ++AARCH64_CORE("cortex-a78ae", cortexa78ae, cortexa57, 8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS | AARCH64_FL_PROFILE, neoversen1, 0x41, 0xd42, -1) ++AARCH64_CORE("cortex-a78c", cortexa78c, cortexa57, 8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS | AARCH64_FL_PROFILE | AARCH64_FL_FLAGM | AARCH64_FL_PAUTH, neoversen1, 0x41, 0xd4b, -1) ++AARCH64_CORE("cortex-a65", cortexa65, cortexa53, 8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS, cortexa73, 0x41, 0xd06, -1) ++AARCH64_CORE("cortex-a65ae", cortexa65ae, cortexa53, 8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS, cortexa73, 0x41, 0xd43, -1) ++AARCH64_CORE("cortex-x1", cortexx1, cortexa57, 8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS | AARCH64_FL_PROFILE, neoversen1, 0x41, 0xd44, -1) ++AARCH64_CORE("ares", ares, cortexa57, 8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_PROFILE, neoversen1, 0x41, 0xd0c, -1) ++AARCH64_CORE("neoverse-n1", neoversen1, cortexa57, 8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_PROFILE, neoversen1, 0x41, 0xd0c, -1) ++AARCH64_CORE("neoverse-e1", neoversee1, cortexa53, 8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS, cortexa73, 0x41, 0xd4a, -1) + + /* Cavium ('C') cores. */ +-AARCH64_CORE("octeontx2", octeontx2, cortexa57, 8_2A, AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_CRYPTO | AARCH64_FL_PROFILE, cortexa57, 0x43, 0x0b0, -1) +-AARCH64_CORE("octeontx2t98", octeontx2t98, cortexa57, 8_2A, AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_CRYPTO | AARCH64_FL_PROFILE, cortexa57, 0x43, 0x0b1, -1) +-AARCH64_CORE("octeontx2t96", octeontx2t96, cortexa57, 8_2A, AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_CRYPTO | AARCH64_FL_PROFILE, cortexa57, 0x43, 0x0b2, -1) ++AARCH64_CORE("octeontx2", octeontx2, cortexa57, 8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_CRYPTO | AARCH64_FL_PROFILE, cortexa57, 0x43, 0x0b0, -1) ++AARCH64_CORE("octeontx2t98", octeontx2t98, cortexa57, 8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_CRYPTO | AARCH64_FL_PROFILE, cortexa57, 0x43, 0x0b1, -1) ++AARCH64_CORE("octeontx2t96", octeontx2t96, cortexa57, 8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_CRYPTO | AARCH64_FL_PROFILE, cortexa57, 0x43, 0x0b2, -1) + /* Note OcteonTX2 T93 is an alias to OcteonTX2 T96. */ +-AARCH64_CORE("octeontx2t93", octeontx2t93, cortexa57, 8_2A, AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_CRYPTO | AARCH64_FL_PROFILE, cortexa57, 0x43, 0x0b2, -1) +-AARCH64_CORE("octeontx2f95", octeontx2f95, cortexa57, 8_2A, AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_CRYPTO | AARCH64_FL_PROFILE, cortexa57, 0x43, 0x0b3, -1) +-AARCH64_CORE("octeontx2f95n", octeontx2f95n, cortexa57, 8_2A, AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_CRYPTO | AARCH64_FL_PROFILE, cortexa57, 0x43, 0x0b4, -1) +-AARCH64_CORE("octeontx2f95mm", octeontx2f95mm, cortexa57, 8_2A, AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_CRYPTO | AARCH64_FL_PROFILE, cortexa57, 0x43, 0x0b5, -1) ++AARCH64_CORE("octeontx2t93", octeontx2t93, cortexa57, 8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_CRYPTO | AARCH64_FL_PROFILE, cortexa57, 0x43, 0x0b2, -1) ++AARCH64_CORE("octeontx2f95", octeontx2f95, cortexa57, 8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_CRYPTO | AARCH64_FL_PROFILE, cortexa57, 0x43, 0x0b3, -1) ++AARCH64_CORE("octeontx2f95n", octeontx2f95n, cortexa57, 8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_CRYPTO | AARCH64_FL_PROFILE, cortexa57, 0x43, 0x0b4, -1) ++AARCH64_CORE("octeontx2f95mm", octeontx2f95mm, cortexa57, 8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_CRYPTO | AARCH64_FL_PROFILE, cortexa57, 0x43, 0x0b5, -1) + + /* Fujitsu ('F') cores. */ +-AARCH64_CORE("a64fx", a64fx, a64fx, 8_2A, AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_F16 | AARCH64_FL_SVE, a64fx, 0x46, 0x001, -1) ++AARCH64_CORE("a64fx", a64fx, a64fx, 8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_SVE, a64fx, 0x46, 0x001, -1) + + /* HiSilicon ('H') cores. */ +-AARCH64_CORE("tsv110", tsv110, tsv110, 8_2A, AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_CRYPTO | AARCH64_FL_F16 | AARCH64_FL_AES | AARCH64_FL_SHA2, tsv110, 0x48, 0xd01, -1) ++AARCH64_CORE("tsv110", tsv110, tsv110, 8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_CRYPTO | AARCH64_FL_F16 | AARCH64_FL_AES | AARCH64_FL_SHA2, tsv110, 0x48, 0xd01, -1) + + /* ARMv8.3-A Architecture Processors. */ + + /* Marvell cores (TX3). */ +-AARCH64_CORE("thunderx3t110", thunderx3t110, thunderx3t110, 8_3A, AARCH64_FL_FOR_ARCH8_3 | AARCH64_FL_CRYPTO | AARCH64_FL_RCPC | AARCH64_FL_SM4 | AARCH64_FL_SHA3 | AARCH64_FL_F16FML | AARCH64_FL_RCPC8_4, thunderx3t110, 0x43, 0x0b8, 0x0a) ++AARCH64_CORE("thunderx3t110", thunderx3t110, thunderx3t110, 8_3A, AARCH64_FL_FOR_V8_3A | AARCH64_FL_CRYPTO | AARCH64_FL_RCPC | AARCH64_FL_SM4 | AARCH64_FL_SHA3 | AARCH64_FL_F16FML | AARCH64_FL_RCPC8_4, thunderx3t110, 0x43, 0x0b8, 0x0a) + + /* ARMv8.4-A Architecture Processors. */ + + /* Arm ('A') cores. */ +-AARCH64_CORE("zeus", zeus, cortexa57, 8_4A, AARCH64_FL_FOR_ARCH8_4 | AARCH64_FL_SVE | AARCH64_FL_RCPC | AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_F16 | AARCH64_FL_PROFILE | AARCH64_FL_SSBS | AARCH64_FL_RNG, neoversev1, 0x41, 0xd40, -1) +-AARCH64_CORE("neoverse-v1", neoversev1, cortexa57, 8_4A, AARCH64_FL_FOR_ARCH8_4 | AARCH64_FL_SVE | AARCH64_FL_RCPC | AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_F16 | AARCH64_FL_PROFILE | AARCH64_FL_SSBS | AARCH64_FL_RNG, neoversev1, 0x41, 0xd40, -1) +-AARCH64_CORE("neoverse-512tvb", neoverse512tvb, cortexa57, 8_4A, AARCH64_FL_FOR_ARCH8_4 | AARCH64_FL_SVE | AARCH64_FL_RCPC | AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_F16 | AARCH64_FL_PROFILE | AARCH64_FL_SSBS | AARCH64_FL_RNG, neoverse512tvb, INVALID_IMP, INVALID_CORE, -1) ++AARCH64_CORE("zeus", zeus, cortexa57, 8_4A, AARCH64_FL_FOR_V8_4A | AARCH64_FL_SVE | AARCH64_FL_RCPC | AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_F16 | AARCH64_FL_PROFILE | AARCH64_FL_SSBS | AARCH64_FL_RNG, neoversev1, 0x41, 0xd40, -1) ++AARCH64_CORE("neoverse-v1", neoversev1, cortexa57, 8_4A, AARCH64_FL_FOR_V8_4A | AARCH64_FL_SVE | AARCH64_FL_RCPC | AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_F16 | AARCH64_FL_PROFILE | AARCH64_FL_SSBS | AARCH64_FL_RNG, neoversev1, 0x41, 0xd40, -1) ++AARCH64_CORE("neoverse-512tvb", neoverse512tvb, cortexa57, 8_4A, AARCH64_FL_FOR_V8_4A | AARCH64_FL_SVE | AARCH64_FL_RCPC | AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_F16 | AARCH64_FL_PROFILE | AARCH64_FL_SSBS | AARCH64_FL_RNG, neoverse512tvb, INVALID_IMP, INVALID_CORE, -1) + + /* Qualcomm ('Q') cores. */ +-AARCH64_CORE("saphira", saphira, saphira, 8_4A, AARCH64_FL_FOR_ARCH8_4 | AARCH64_FL_CRYPTO | AARCH64_FL_RCPC, saphira, 0x51, 0xC01, -1) ++AARCH64_CORE("saphira", saphira, saphira, 8_4A, AARCH64_FL_FOR_V8_4A | AARCH64_FL_CRYPTO | AARCH64_FL_RCPC, saphira, 0x51, 0xC01, -1) + + /* ARMv8-A big.LITTLE implementations. */ + +-AARCH64_CORE("cortex-a57.cortex-a53", cortexa57cortexa53, cortexa53, 8A, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa57, 0x41, AARCH64_BIG_LITTLE (0xd07, 0xd03), -1) +-AARCH64_CORE("cortex-a72.cortex-a53", cortexa72cortexa53, cortexa53, 8A, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa72, 0x41, AARCH64_BIG_LITTLE (0xd08, 0xd03), -1) +-AARCH64_CORE("cortex-a73.cortex-a35", cortexa73cortexa35, cortexa53, 8A, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa73, 0x41, AARCH64_BIG_LITTLE (0xd09, 0xd04), -1) +-AARCH64_CORE("cortex-a73.cortex-a53", cortexa73cortexa53, cortexa53, 8A, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa73, 0x41, AARCH64_BIG_LITTLE (0xd09, 0xd03), -1) ++AARCH64_CORE("cortex-a57.cortex-a53", cortexa57cortexa53, cortexa53, 8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC, cortexa57, 0x41, AARCH64_BIG_LITTLE (0xd07, 0xd03), -1) ++AARCH64_CORE("cortex-a72.cortex-a53", cortexa72cortexa53, cortexa53, 8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC, cortexa72, 0x41, AARCH64_BIG_LITTLE (0xd08, 0xd03), -1) ++AARCH64_CORE("cortex-a73.cortex-a35", cortexa73cortexa35, cortexa53, 8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC, cortexa73, 0x41, AARCH64_BIG_LITTLE (0xd09, 0xd04), -1) ++AARCH64_CORE("cortex-a73.cortex-a53", cortexa73cortexa53, cortexa53, 8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC, cortexa73, 0x41, AARCH64_BIG_LITTLE (0xd09, 0xd03), -1) + + /* ARM DynamIQ big.LITTLE configurations. */ + +-AARCH64_CORE("cortex-a75.cortex-a55", cortexa75cortexa55, cortexa53, 8_2A, AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD, cortexa73, 0x41, AARCH64_BIG_LITTLE (0xd0a, 0xd05), -1) +-AARCH64_CORE("cortex-a76.cortex-a55", cortexa76cortexa55, cortexa53, 8_2A, AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD, neoversen1, 0x41, AARCH64_BIG_LITTLE (0xd0b, 0xd05), -1) ++AARCH64_CORE("cortex-a75.cortex-a55", cortexa75cortexa55, cortexa53, 8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD, cortexa73, 0x41, AARCH64_BIG_LITTLE (0xd0a, 0xd05), -1) ++AARCH64_CORE("cortex-a76.cortex-a55", cortexa76cortexa55, cortexa53, 8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD, neoversen1, 0x41, AARCH64_BIG_LITTLE (0xd0b, 0xd05), -1) + + /* Armv8-R Architecture Processors. */ +-AARCH64_CORE("cortex-r82", cortexr82, cortexa53, 8R, AARCH64_FL_FOR_ARCH8_R, cortexa53, 0x41, 0xd15, -1) ++AARCH64_CORE("cortex-r82", cortexr82, cortexa53, 8R, AARCH64_FL_FOR_V8R, cortexa53, 0x41, 0xd15, -1) + + /* Armv9.0-A Architecture Processors. */ + + /* Arm ('A') cores. */ +-AARCH64_CORE("cortex-a510", cortexa510, cortexa55, 9A, AARCH64_FL_FOR_ARCH9 | AARCH64_FL_SVE2_BITPERM | AARCH64_FL_MEMTAG | AARCH64_FL_I8MM | AARCH64_FL_BF16, cortexa53, 0x41, 0xd46, -1) ++AARCH64_CORE("cortex-a510", cortexa510, cortexa55, 9A, AARCH64_FL_FOR_V9A | AARCH64_FL_SVE2_BITPERM | AARCH64_FL_MEMTAG | AARCH64_FL_I8MM | AARCH64_FL_BF16, cortexa53, 0x41, 0xd46, -1) + +-AARCH64_CORE("cortex-a710", cortexa710, cortexa57, 9A, AARCH64_FL_FOR_ARCH9 | AARCH64_FL_SVE2_BITPERM | AARCH64_FL_MEMTAG | AARCH64_FL_I8MM | AARCH64_FL_BF16, neoversen2, 0x41, 0xd47, -1) ++AARCH64_CORE("cortex-a710", cortexa710, cortexa57, 9A, AARCH64_FL_FOR_V9A | AARCH64_FL_SVE2_BITPERM | AARCH64_FL_MEMTAG | AARCH64_FL_I8MM | AARCH64_FL_BF16, neoversen2, 0x41, 0xd47, -1) + +-AARCH64_CORE("cortex-x2", cortexx2, cortexa57, 9A, AARCH64_FL_FOR_ARCH9 | AARCH64_FL_SVE2_BITPERM | AARCH64_FL_MEMTAG | AARCH64_FL_I8MM | AARCH64_FL_BF16, neoversen2, 0x41, 0xd48, -1) ++AARCH64_CORE("cortex-x2", cortexx2, cortexa57, 9A, AARCH64_FL_FOR_V9A | AARCH64_FL_SVE2_BITPERM | AARCH64_FL_MEMTAG | AARCH64_FL_I8MM | AARCH64_FL_BF16, neoversen2, 0x41, 0xd48, -1) + +-AARCH64_CORE("neoverse-n2", neoversen2, cortexa57, 9A, AARCH64_FL_FOR_ARCH9 | AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_SVE2_BITPERM | AARCH64_FL_RNG | AARCH64_FL_MEMTAG | AARCH64_FL_PROFILE, neoversen2, 0x41, 0xd49, -1) ++AARCH64_CORE("neoverse-n2", neoversen2, cortexa57, 9A, AARCH64_FL_FOR_V9A | AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_SVE2_BITPERM | AARCH64_FL_RNG | AARCH64_FL_MEMTAG | AARCH64_FL_PROFILE, neoversen2, 0x41, 0xd49, -1) + +-AARCH64_CORE("demeter", demeter, cortexa57, 9A, AARCH64_FL_FOR_ARCH9 | AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_SVE2_BITPERM | AARCH64_FL_RNG | AARCH64_FL_MEMTAG | AARCH64_FL_PROFILE, neoversev2, 0x41, 0xd4f, -1) +-AARCH64_CORE("neoverse-v2", neoversev2, cortexa57, 9A, AARCH64_FL_FOR_ARCH9 | AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_SVE2_BITPERM | AARCH64_FL_RNG | AARCH64_FL_MEMTAG | AARCH64_FL_PROFILE, neoversev2, 0x41, 0xd4f, -1) ++AARCH64_CORE("demeter", demeter, cortexa57, 9A, AARCH64_FL_FOR_V9A | AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_SVE2_BITPERM | AARCH64_FL_RNG | AARCH64_FL_MEMTAG | AARCH64_FL_PROFILE, neoversev2, 0x41, 0xd4f, -1) ++AARCH64_CORE("neoverse-v2", neoversev2, cortexa57, 9A, AARCH64_FL_FOR_V9A | AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_SVE2_BITPERM | AARCH64_FL_RNG | AARCH64_FL_MEMTAG | AARCH64_FL_PROFILE, neoversev2, 0x41, 0xd4f, -1) + + #undef AARCH64_CORE +diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc +index 254ecfaa2..3714c1047 100644 +--- a/gcc/config/aarch64/aarch64.cc ++++ b/gcc/config/aarch64/aarch64.cc +@@ -2949,7 +2949,7 @@ static const struct processor all_cores = + FLAGS, &COSTS##_tunings}, + #include "aarch64-cores.def" + {"generic", generic, cortexa53, AARCH64_ARCH_8A, +- AARCH64_FL_FOR_ARCH8, &generic_tunings}, ++ AARCH64_FL_FOR_V8A, &generic_tunings}, + {NULL, aarch64_none, aarch64_none, aarch64_no_arch, 0, NULL} + }; + +diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h +index 5a91dfdd2..918a14193 100644 +--- a/gcc/config/aarch64/aarch64.h ++++ b/gcc/config/aarch64/aarch64.h +@@ -255,39 +255,39 @@ + #define AARCH64_FL_FPQ16 (AARCH64_FL_FP & ~AARCH64_FL_SIMD) + + /* Architecture flags that effect instruction selection. */ +-#define AARCH64_FL_FOR_ARCH8 (AARCH64_FL_FPSIMD) +-#define AARCH64_FL_FOR_ARCH8_1 \ +- (AARCH64_FL_FOR_ARCH8 | AARCH64_FL_LSE | AARCH64_FL_CRC \ ++#define AARCH64_FL_FOR_V8A (AARCH64_FL_FPSIMD) ++#define AARCH64_FL_FOR_V8_1A \ ++ (AARCH64_FL_FOR_V8A | AARCH64_FL_LSE | AARCH64_FL_CRC \ + | AARCH64_FL_RDMA | AARCH64_FL_V8_1A) +-#define AARCH64_FL_FOR_ARCH8_2 \ +- (AARCH64_FL_FOR_ARCH8_1 | AARCH64_FL_V8_2A) +-#define AARCH64_FL_FOR_ARCH8_3 \ +- (AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_V8_3A | AARCH64_FL_PAUTH) +-#define AARCH64_FL_FOR_ARCH8_4 \ +- (AARCH64_FL_FOR_ARCH8_3 | AARCH64_FL_V8_4A | AARCH64_FL_F16FML \ ++#define AARCH64_FL_FOR_V8_2A \ ++ (AARCH64_FL_FOR_V8_1A | AARCH64_FL_V8_2A) ++#define AARCH64_FL_FOR_V8_3A \ ++ (AARCH64_FL_FOR_V8_2A | AARCH64_FL_V8_3A | AARCH64_FL_PAUTH) ++#define AARCH64_FL_FOR_V8_4A \ ++ (AARCH64_FL_FOR_V8_3A | AARCH64_FL_V8_4A | AARCH64_FL_F16FML \ + | AARCH64_FL_DOTPROD | AARCH64_FL_RCPC8_4 | AARCH64_FL_FLAGM) +-#define AARCH64_FL_FOR_ARCH8_5 \ +- (AARCH64_FL_FOR_ARCH8_4 | AARCH64_FL_V8_5A \ ++#define AARCH64_FL_FOR_V8_5A \ ++ (AARCH64_FL_FOR_V8_4A | AARCH64_FL_V8_5A \ + | AARCH64_FL_SB | AARCH64_FL_SSBS | AARCH64_FL_PREDRES) +-#define AARCH64_FL_FOR_ARCH8_6 \ +- (AARCH64_FL_FOR_ARCH8_5 | AARCH64_FL_V8_6A | AARCH64_FL_FPSIMD \ ++#define AARCH64_FL_FOR_V8_6A \ ++ (AARCH64_FL_FOR_V8_5A | AARCH64_FL_V8_6A | AARCH64_FL_FPSIMD \ + | AARCH64_FL_I8MM | AARCH64_FL_BF16) +-#define AARCH64_FL_FOR_ARCH8_7 \ +- (AARCH64_FL_FOR_ARCH8_6 | AARCH64_FL_V8_7A | AARCH64_FL_LS64) +-#define AARCH64_FL_FOR_ARCH8_8 \ +- (AARCH64_FL_FOR_ARCH8_7 | AARCH64_FL_V8_8A | AARCH64_FL_MOPS) +- +-#define AARCH64_FL_FOR_ARCH8_R \ +- (AARCH64_FL_FOR_ARCH8_4 | AARCH64_FL_V8R) +-#define AARCH64_FL_FOR_ARCH9 \ +- (AARCH64_FL_FOR_ARCH8_5 | AARCH64_FL_SVE | AARCH64_FL_SVE2 | AARCH64_FL_V9A \ ++#define AARCH64_FL_FOR_V8_7A \ ++ (AARCH64_FL_FOR_V8_6A | AARCH64_FL_V8_7A | AARCH64_FL_LS64) ++#define AARCH64_FL_FOR_V8_8A \ ++ (AARCH64_FL_FOR_V8_7A | AARCH64_FL_V8_8A | AARCH64_FL_MOPS) ++ ++#define AARCH64_FL_FOR_V8R \ ++ (AARCH64_FL_FOR_V8_4A | AARCH64_FL_V8R) ++#define AARCH64_FL_FOR_V9A \ ++ (AARCH64_FL_FOR_V8_5A | AARCH64_FL_SVE | AARCH64_FL_SVE2 | AARCH64_FL_V9A \ + | AARCH64_FL_F16) +-#define AARCH64_FL_FOR_ARCH9_1 \ +- (AARCH64_FL_FOR_ARCH9 | AARCH64_FL_FOR_ARCH8_6 | AARCH64_FL_V9_1A) +-#define AARCH64_FL_FOR_ARCH9_2 \ +- (AARCH64_FL_FOR_ARCH9_1 | AARCH64_FL_FOR_ARCH8_7 | AARCH64_FL_V9_2A) +-#define AARCH64_FL_FOR_ARCH9_3 \ +- (AARCH64_FL_FOR_ARCH9_2 | AARCH64_FL_FOR_ARCH8_8 | AARCH64_FL_V9_3A) ++#define AARCH64_FL_FOR_V9_1A \ ++ (AARCH64_FL_FOR_V9A | AARCH64_FL_FOR_V8_6A | AARCH64_FL_V9_1A) ++#define AARCH64_FL_FOR_V9_2A \ ++ (AARCH64_FL_FOR_V9_1A | AARCH64_FL_FOR_V8_7A | AARCH64_FL_V9_2A) ++#define AARCH64_FL_FOR_V9_3A \ ++ (AARCH64_FL_FOR_V9_2A | AARCH64_FL_FOR_V8_8A | AARCH64_FL_V9_3A) + + /* Macros to test ISA flags. */ + +-- +2.33.0 +
View file
_service:tar_scm:0111-LoongArch-Redundant-sign-extension-elimination-optim.patch
Added
@@ -0,0 +1,56 @@ +From 7bb1a356ca9eefab48d64bd3deeaac081c1ae7ea Mon Sep 17 00:00:00 2001 +From: Li Wei <liwei@loongson.cn> +Date: Thu, 11 Jan 2024 19:36:33 +0800 +Subject: PATCH 111/188 LoongArch: Redundant sign extension elimination + optimization 2. + +Eliminate the redundant sign extension that exists after the conditional +move when the target register is SImode. + +gcc/ChangeLog: + + * config/loongarch/loongarch.cc (loongarch_expand_conditional_move): + Adjust. + +gcc/testsuite/ChangeLog: + + * gcc.target/loongarch/sign-extend-2.c: Adjust. +--- + gcc/config/loongarch/loongarch.cc | 6 ++++++ + gcc/testsuite/gcc.target/loongarch/sign-extend-2.c | 5 +++-- + 2 files changed, 9 insertions(+), 2 deletions(-) + +diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc +index 533bae5b2..13481130b 100644 +--- a/gcc/config/loongarch/loongarch.cc ++++ b/gcc/config/loongarch/loongarch.cc +@@ -5367,6 +5367,12 @@ loongarch_expand_conditional_move (rtx *operands) + rtx temp3 = gen_reg_rtx (mode); + emit_insn (gen_rtx_SET (temp3, gen_rtx_IOR (mode, temp, temp2))); + temp3 = gen_lowpart (GET_MODE (operands0), temp3); ++ /* Nonzero in a subreg if it was made when accessing an object that ++ was promoted to a wider mode in accord with the PROMOTED_MODE ++ machine description macro. */ ++ SUBREG_PROMOTED_VAR_P (temp3) = 1; ++ /* Sets promoted mode for SUBREG_PROMOTED_VAR_P. */ ++ SUBREG_PROMOTED_SET (temp3, SRP_SIGNED); + loongarch_emit_move (operands0, temp3); + } + else +diff --git a/gcc/testsuite/gcc.target/loongarch/sign-extend-2.c b/gcc/testsuite/gcc.target/loongarch/sign-extend-2.c +index a45dde4f7..e57a2727d 100644 +--- a/gcc/testsuite/gcc.target/loongarch/sign-extend-2.c ++++ b/gcc/testsuite/gcc.target/loongarch/sign-extend-2.c +@@ -1,6 +1,7 @@ + /* { dg-do compile } */ +-/* { dg-options "-mabi=lp64d -O2" } */ +-/* { dg-final { scan-assembler-times "slli.w\t\\\$r\0-9\+,\\\$r\0-9\+,0" 1 } } */ ++/* { dg-options "-mabi=lp64d -O2 -fdump-rtl-expand" } */ ++/* { dg-final { scan-rtl-dump "subreg/s" "expand" } } */ ++/* { dg-final { scan-assembler-not "slli.w\t\\\$r\0-9\+,\\\$r\0-9\+,0" } } */ + + #include <stdint.h> + #define my_min(x, y) ((x) < (y) ? (x) : (y)) +-- +2.43.0 +
View file
_service:tar_scm:0111-aarch64-Put-LR-save-probe-in-first-16-bytes.patch
Deleted
@@ -1,406 +0,0 @@ -From 15e18831bf98fd25af098b970ebf0c9a6200a34b Mon Sep 17 00:00:00 2001 -From: Richard Sandiford <richard.sandiford@arm.com> -Date: Tue, 12 Sep 2023 16:08:55 +0100 -Subject: PATCH aarch64: Put LR save probe in first 16 bytes - --fstack-clash-protection uses the save of LR as a probe for the next -allocation. The next allocation could be: - -* another part of the static frame, e.g. when allocating SVE save slots - or outgoing arguments - -* an alloca in the same function - -* an allocation made by a callee function - -However, when -fomit-frame-pointer is used, the LR save slot is placed -above the other GPR save slots. It could therefore be up to 80 bytes -above the base of the GPR save area (which is also the hard fp address). - -aarch64_allocate_and_probe_stack_space took this into account when -deciding how much subsequent space could be allocated without needing -a probe. However, it interacted badly with: - - /* If doing a small final adjustment, we always probe at offset 0. - This is done to avoid issues when LR is not at position 0 or when - the final adjustment is smaller than the probing offset. */ - else if (final_adjustment_p && rounded_size == 0) - residual_probe_offset = 0; - -which forces any allocation that is smaller than the guard page size -to be probed at offset 0 rather than the usual offset 1024. It was -therefore possible to construct cases in which we had: - -* a probe using LR at SP + 80 bytes (or some other value >= 16) -* an allocation of the guard page size - 16 bytes -* a probe at SP + 0 - -which allocates guard page size + 64 consecutive unprobed bytes. - -This patch requires the LR probe to be in the first 16 bytes of the -save area when stack clash protection is active. Doing it -unconditionally would cause code-quality regressions. - -Putting LR before other registers prevents push/pop allocation -when shadow call stacks are enabled, since LR is restored -separately from the other callee-saved registers. - -The new comment doesn't say that the probe register is required -to be LR, since a later patch removes that restriction. - -gcc/ - * config/aarch64/aarch64.cc (aarch64_layout_frame): Ensure that - the LR save slot is in the first 16 bytes of the register save area. - Only form STP/LDP push/pop candidates if both registers are valid. - (aarch64_allocate_and_probe_stack_space): Remove workaround for - when LR was not in the first 16 bytes. - -gcc/testsuite/ - * gcc.target/aarch64/stack-check-prologue-18.c: New test. - * gcc.target/aarch64/stack-check-prologue-19.c: Likewise. - * gcc.target/aarch64/stack-check-prologue-20.c: Likewise. ---- - gcc/config/aarch64/aarch64.cc | 72 ++++++------- - .../aarch64/stack-check-prologue-18.c | 100 ++++++++++++++++++ - .../aarch64/stack-check-prologue-19.c | 100 ++++++++++++++++++ - .../aarch64/stack-check-prologue-20.c | 3 + - 4 files changed, 233 insertions(+), 42 deletions(-) - create mode 100644 gcc/testsuite/gcc.target/aarch64/stack-check-prologue-18.c - create mode 100644 gcc/testsuite/gcc.target/aarch64/stack-check-prologue-19.c - create mode 100644 gcc/testsuite/gcc.target/aarch64/stack-check-prologue-20.c - -diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc -index 34c1d8614cd9..16433fb70f4f 100644 ---- a/gcc/config/aarch64/aarch64.cc -+++ b/gcc/config/aarch64/aarch64.cc -@@ -8273,26 +8273,34 @@ aarch64_layout_frame (void) - bool saves_below_hard_fp_p - = maybe_ne (frame.below_hard_fp_saved_regs_size, 0); - frame.bytes_below_hard_fp = offset; -+ -+ auto allocate_gpr_slot = &(unsigned int regno) -+ { -+ frame.reg_offsetregno = offset; -+ if (frame.wb_push_candidate1 == INVALID_REGNUM) -+ frame.wb_push_candidate1 = regno; -+ else if (frame.wb_push_candidate2 == INVALID_REGNUM) -+ frame.wb_push_candidate2 = regno; -+ offset += UNITS_PER_WORD; -+ }; -+ - if (frame.emit_frame_chain) - { - /* FP and LR are placed in the linkage record. */ -- frame.reg_offsetR29_REGNUM = offset; -- frame.wb_push_candidate1 = R29_REGNUM; -- frame.reg_offsetR30_REGNUM = offset + UNITS_PER_WORD; -- frame.wb_push_candidate2 = R30_REGNUM; -- offset += 2 * UNITS_PER_WORD; -+ allocate_gpr_slot (R29_REGNUM); -+ allocate_gpr_slot (R30_REGNUM); - } -+ else if (flag_stack_clash_protection -+ && known_eq (frame.reg_offsetR30_REGNUM, SLOT_REQUIRED)) -+ /* Put the LR save slot first, since it makes a good choice of probe -+ for stack clash purposes. The idea is that the link register usually -+ has to be saved before a call anyway, and so we lose little by -+ stopping it from being individually shrink-wrapped. */ -+ allocate_gpr_slot (R30_REGNUM); - - for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++) - if (known_eq (frame.reg_offsetregno, SLOT_REQUIRED)) -- { -- frame.reg_offsetregno = offset; -- if (frame.wb_push_candidate1 == INVALID_REGNUM) -- frame.wb_push_candidate1 = regno; -- else if (frame.wb_push_candidate2 == INVALID_REGNUM) -- frame.wb_push_candidate2 = regno; -- offset += UNITS_PER_WORD; -- } -+ allocate_gpr_slot (regno); - - poly_int64 max_int_offset = offset; - offset = aligned_upper_bound (offset, STACK_BOUNDARY / BITS_PER_UNIT); -@@ -8370,10 +8378,13 @@ aarch64_layout_frame (void) - max_push_offset to 0, because no registers are popped at this time, - so callee_adjust cannot be adjusted. */ - HOST_WIDE_INT max_push_offset = 0; -- if (frame.wb_pop_candidate2 != INVALID_REGNUM) -- max_push_offset = 512; -- else if (frame.wb_pop_candidate1 != INVALID_REGNUM) -- max_push_offset = 256; -+ if (frame.wb_pop_candidate1 != INVALID_REGNUM) -+ { -+ if (frame.wb_pop_candidate2 != INVALID_REGNUM) -+ max_push_offset = 512; -+ else -+ max_push_offset = 256; -+ } - - HOST_WIDE_INT const_size, const_below_saved_regs, const_above_fp; - HOST_WIDE_INT const_saved_regs_size; -@@ -9339,29 +9350,6 @@ aarch64_allocate_and_probe_stack_space (rtx temp1, rtx temp2, - = (final_adjustment_p - ? guard_used_by_caller + byte_sp_alignment - : guard_size - guard_used_by_caller); -- /* When doing the final adjustment for the outgoing arguments, take into -- account any unprobed space there is above the current SP. There are -- two cases: -- -- - When saving SVE registers below the hard frame pointer, we force -- the lowest save to take place in the prologue before doing the final -- adjustment (i.e. we don't allow the save to be shrink-wrapped). -- This acts as a probe at SP, so there is no unprobed space. -- -- - When there are no SVE register saves, we use the store of the link -- register as a probe. We can't assume that LR was saved at position 0 -- though, so treat any space below it as unprobed. */ -- if (final_adjustment_p -- && known_eq (frame.below_hard_fp_saved_regs_size, 0)) -- { -- poly_int64 lr_offset = (frame.reg_offsetLR_REGNUM -- - frame.bytes_below_saved_regs); -- if (known_ge (lr_offset, 0)) -- min_probe_threshold -= lr_offset.to_constant (); -- else -- gcc_assert (!flag_stack_clash_protection || known_eq (poly_size, 0)); -- } -- - poly_int64 frame_size = frame.frame_size; - - /* We should always have a positive probe threshold. */ -@@ -9541,8 +9529,8 @@ aarch64_allocate_and_probe_stack_space (rtx temp1, rtx temp2, - if (final_adjustment_p && rounded_size != 0) - min_probe_threshold = 0; - /* If doing a small final adjustment, we always probe at offset 0. -- This is done to avoid issues when LR is not at position 0 or when -- the final adjustment is smaller than the probing offset. */ -+ This is done to avoid issues when the final adjustment is smaller -+ than the probing offset. */ - else if (final_adjustment_p && rounded_size == 0) - residual_probe_offset = 0; - -diff --git a/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-18.c b/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-18.c -new file mode 100644 -index 000000000000..82447d20fff5 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-18.c -@@ -0,0 +1,100 @@ -+/* { dg-options "-O2 -fstack-clash-protection -fomit-frame-pointer --param stack-clash-protection-guard-size=12" } */ -+/* { dg-final { check-function-bodies "**" "" } } */ -+ -+void f(int, ...); -+void g(); -+ -+/* -+** test1: -+** ... -+** str x30, \sp\ -+** sub sp, sp, #4064 -+** str xzr, \sp\ -+** cbnz w0, .* -+** bl g -+** ... -+** str x26, \sp, #?4128\ -+** ... -+*/ -+int test1(int z) { -+ __uint128_t x = 0; -+ int y0x400; -+ if (z) -+ { -+ asm volatile ("" ::: -+ "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26"); -+ f(0, 0, 0, 0, 0, 0, 0, &y, -+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, -+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, -+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, -+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, -+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, -+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, -+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, -+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, -+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, -+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, -+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, -+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, -+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, -+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, -+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, -+ x, x, x, x, x, x, x, x, x, x, x, x, x, x); -+ } -+ g(); -+ return 1; -+} -+ -+/* -+** test2: -+** ... -+** str x30, \sp\ -+** sub sp, sp, #1040 -+** str xzr, \sp\ -+** cbnz w0, .* -+** bl g -+** ... -+*/ -+int test2(int z) { -+ __uint128_t x = 0; -+ int y0x400; -+ if (z) -+ { -+ asm volatile ("" ::: -+ "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26"); -+ f(0, 0, 0, 0, 0, 0, 0, &y, -+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, -+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, -+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, -+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, -+ x); -+ } -+ g(); -+ return 1; -+} -+ -+/* -+** test3: -+** ... -+** str x30, \sp\ -+** sub sp, sp, #1024 -+** cbnz w0, .* -+** bl g -+** ... -+*/ -+int test3(int z) { -+ __uint128_t x = 0; -+ int y0x400; -+ if (z) -+ { -+ asm volatile ("" ::: -+ "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26"); -+ f(0, 0, 0, 0, 0, 0, 0, &y, -+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, -+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, -+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, -+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x); -+ } -+ g(); -+ return 1; -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-19.c b/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-19.c -new file mode 100644 -index 000000000000..73ac3e4e4eb0 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-19.c -@@ -0,0 +1,100 @@ -+/* { dg-options "-O2 -fstack-clash-protection -fomit-frame-pointer --param stack-clash-protection-guard-size=12 -fsanitize=shadow-call-stack -ffixed-x18" } */ -+/* { dg-final { check-function-bodies "**" "" } } */ -+ -+void f(int, ...); -+void g(); -+ -+/* -+** test1: -+** ... -+** str x30, \sp\ -+** sub sp, sp, #4064 -+** str xzr, \sp\ -+** cbnz w0, .* -+** bl g -+** ... -+** str x26, \sp, #?4128\ -+** ... -+*/ -+int test1(int z) { -+ __uint128_t x = 0; -+ int y0x400; -+ if (z) -+ { -+ asm volatile ("" ::: -+ "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26"); -+ f(0, 0, 0, 0, 0, 0, 0, &y, -+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, -+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, -+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, -+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, -+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, -+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, -+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, -+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, -+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, -+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, -+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, -+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, -+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, -+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, -+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, -+ x, x, x, x, x, x, x, x, x, x, x, x, x, x); -+ } -+ g(); -+ return 1; -+} -+ -+/* -+** test2: -+** ... -+** str x30, \sp\ -+** sub sp, sp, #1040 -+** str xzr, \sp\ -+** cbnz w0, .* -+** bl g -+** ... -+*/ -+int test2(int z) { -+ __uint128_t x = 0; -+ int y0x400; -+ if (z) -+ { -+ asm volatile ("" ::: -+ "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26"); -+ f(0, 0, 0, 0, 0, 0, 0, &y, -+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, -+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, -+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, -+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, -+ x); -+ } -+ g(); -+ return 1; -+} -+ -+/* -+** test3: -+** ... -+** str x30, \sp\ -+** sub sp, sp, #1024 -+** cbnz w0, .* -+** bl g -+** ... -+*/ -+int test3(int z) { -+ __uint128_t x = 0; -+ int y0x400; -+ if (z) -+ { -+ asm volatile ("" ::: -+ "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26"); -+ f(0, 0, 0, 0, 0, 0, 0, &y, -+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, -+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, -+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, -+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x); -+ } -+ g(); -+ return 1; -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-20.c b/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-20.c -new file mode 100644 -index 000000000000..690aae8dfd5b ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-20.c -@@ -0,0 +1,3 @@ -+/* { dg-options "-O2 -fstack-protector-all -fstack-clash-protection -fomit-frame-pointer --param stack-clash-protection-guard-size=12 -fsanitize=shadow-call-stack -ffixed-x18" } */ -+ -+#include "stack-check-prologue-19.c" --- -2.43.5 -
View file
_service:tar_scm:0112-Backport-SME-aarch64-Add-V-to-aarch64-arches.def-nam.patch
Added
@@ -0,0 +1,315 @@ +From ed8ce0b31f2b608f0360af1ffd5375ea7809aba7 Mon Sep 17 00:00:00 2001 +From: Richard Sandiford <richard.sandiford@arm.com> +Date: Thu, 29 Sep 2022 11:32:52 +0100 +Subject: PATCH 013/157 BackportSME aarch64: Add "V" to + aarch64-arches.def names + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=00c22ba69d8e738a4789b30165ff9c925c508fc1 + +This patch completes the renaming of architecture-level related +things by adding "V" to the name of the architecture in +aarch64-arches.def. Since the "V" is predictable, we can easily +drop it when we don't need it (as when matching /proc/cpuinfo). + +Having a valid C identifier is necessary for later patches. + +gcc/ + * config/aarch64/aarch64-arches.def: Add a leading "V" to the + ARCH_IDENT fields. + * config/aarch64/aarch64-cores.def: Update accordingly. + * common/config/aarch64/aarch64-common.cc (all_cores): Likewise. + * config/aarch64/aarch64.cc (all_cores): Likewise. + * config/aarch64/driver-aarch64.cc (aarch64_arches): Skip the + leading "V". +--- + gcc/common/config/aarch64/aarch64-common.cc | 2 +- + gcc/config/aarch64/aarch64-arches.def | 28 ++--- + gcc/config/aarch64/aarch64-cores.def | 130 ++++++++++---------- + gcc/config/aarch64/aarch64.cc | 2 +- + gcc/config/aarch64/driver-aarch64.cc | 3 +- + 5 files changed, 83 insertions(+), 82 deletions(-) + +diff --git a/gcc/common/config/aarch64/aarch64-common.cc b/gcc/common/config/aarch64/aarch64-common.cc +index 0461201a5..6ca89d31f 100644 +--- a/gcc/common/config/aarch64/aarch64-common.cc ++++ b/gcc/common/config/aarch64/aarch64-common.cc +@@ -253,7 +253,7 @@ static const struct processor_name_to_arch all_cores = + #define AARCH64_CORE(NAME, X, IDENT, ARCH_IDENT, FLAGS, COSTS, IMP, PART, VARIANT) \ + {NAME, AARCH64_ARCH_##ARCH_IDENT, FLAGS}, + #include "config/aarch64/aarch64-cores.def" +- {"generic", AARCH64_ARCH_8A, AARCH64_FL_FOR_V8A}, ++ {"generic", AARCH64_ARCH_V8A, AARCH64_FL_FOR_V8A}, + {"", aarch64_no_arch, 0} + }; + +diff --git a/gcc/config/aarch64/aarch64-arches.def b/gcc/config/aarch64/aarch64-arches.def +index c6bf7d82c..e42202822 100644 +--- a/gcc/config/aarch64/aarch64-arches.def ++++ b/gcc/config/aarch64/aarch64-arches.def +@@ -30,19 +30,19 @@ + Due to the assumptions about the positions of these fields in config.gcc, + the NAME should be kept as the first argument and FLAGS as the last. */ + +-AARCH64_ARCH("armv8-a", generic, 8A, 8, AARCH64_FL_FOR_V8A) +-AARCH64_ARCH("armv8.1-a", generic, 8_1A, 8, AARCH64_FL_FOR_V8_1A) +-AARCH64_ARCH("armv8.2-a", generic, 8_2A, 8, AARCH64_FL_FOR_V8_2A) +-AARCH64_ARCH("armv8.3-a", generic, 8_3A, 8, AARCH64_FL_FOR_V8_3A) +-AARCH64_ARCH("armv8.4-a", generic, 8_4A, 8, AARCH64_FL_FOR_V8_4A) +-AARCH64_ARCH("armv8.5-a", generic, 8_5A, 8, AARCH64_FL_FOR_V8_5A) +-AARCH64_ARCH("armv8.6-a", generic, 8_6A, 8, AARCH64_FL_FOR_V8_6A) +-AARCH64_ARCH("armv8.7-a", generic, 8_7A, 8, AARCH64_FL_FOR_V8_7A) +-AARCH64_ARCH("armv8.8-a", generic, 8_8A, 8, AARCH64_FL_FOR_V8_8A) +-AARCH64_ARCH("armv8-r", generic, 8R , 8, AARCH64_FL_FOR_V8R) +-AARCH64_ARCH("armv9-a", generic, 9A , 9, AARCH64_FL_FOR_V9A) +-AARCH64_ARCH("armv9.1-a", generic, 9_1A, 9, AARCH64_FL_FOR_V9_1A) +-AARCH64_ARCH("armv9.2-a", generic, 9_2A, 9, AARCH64_FL_FOR_V9_2A) +-AARCH64_ARCH("armv9.3-a", generic, 9_3A, 9, AARCH64_FL_FOR_V9_3A) ++AARCH64_ARCH("armv8-a", generic, V8A, 8, AARCH64_FL_FOR_V8A) ++AARCH64_ARCH("armv8.1-a", generic, V8_1A, 8, AARCH64_FL_FOR_V8_1A) ++AARCH64_ARCH("armv8.2-a", generic, V8_2A, 8, AARCH64_FL_FOR_V8_2A) ++AARCH64_ARCH("armv8.3-a", generic, V8_3A, 8, AARCH64_FL_FOR_V8_3A) ++AARCH64_ARCH("armv8.4-a", generic, V8_4A, 8, AARCH64_FL_FOR_V8_4A) ++AARCH64_ARCH("armv8.5-a", generic, V8_5A, 8, AARCH64_FL_FOR_V8_5A) ++AARCH64_ARCH("armv8.6-a", generic, V8_6A, 8, AARCH64_FL_FOR_V8_6A) ++AARCH64_ARCH("armv8.7-a", generic, V8_7A, 8, AARCH64_FL_FOR_V8_7A) ++AARCH64_ARCH("armv8.8-a", generic, V8_8A, 8, AARCH64_FL_FOR_V8_8A) ++AARCH64_ARCH("armv8-r", generic, V8R , 8, AARCH64_FL_FOR_V8R) ++AARCH64_ARCH("armv9-a", generic, V9A , 9, AARCH64_FL_FOR_V9A) ++AARCH64_ARCH("armv9.1-a", generic, V9_1A, 9, AARCH64_FL_FOR_V9_1A) ++AARCH64_ARCH("armv9.2-a", generic, V9_2A, 9, AARCH64_FL_FOR_V9_2A) ++AARCH64_ARCH("armv9.3-a", generic, V9_3A, 9, AARCH64_FL_FOR_V9_3A) + + #undef AARCH64_ARCH +diff --git a/gcc/config/aarch64/aarch64-cores.def b/gcc/config/aarch64/aarch64-cores.def +index c4038c641..f4c2f4ea4 100644 +--- a/gcc/config/aarch64/aarch64-cores.def ++++ b/gcc/config/aarch64/aarch64-cores.def +@@ -46,132 +46,132 @@ + /* ARMv8-A Architecture Processors. */ + + /* ARM ('A') cores. */ +-AARCH64_CORE("cortex-a34", cortexa34, cortexa53, 8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC, cortexa35, 0x41, 0xd02, -1) +-AARCH64_CORE("cortex-a35", cortexa35, cortexa53, 8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC, cortexa35, 0x41, 0xd04, -1) +-AARCH64_CORE("cortex-a53", cortexa53, cortexa53, 8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC, cortexa53, 0x41, 0xd03, -1) +-AARCH64_CORE("cortex-a57", cortexa57, cortexa57, 8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC, cortexa57, 0x41, 0xd07, -1) +-AARCH64_CORE("cortex-a72", cortexa72, cortexa57, 8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC, cortexa72, 0x41, 0xd08, -1) +-AARCH64_CORE("cortex-a73", cortexa73, cortexa57, 8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC, cortexa73, 0x41, 0xd09, -1) ++AARCH64_CORE("cortex-a34", cortexa34, cortexa53, V8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC, cortexa35, 0x41, 0xd02, -1) ++AARCH64_CORE("cortex-a35", cortexa35, cortexa53, V8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC, cortexa35, 0x41, 0xd04, -1) ++AARCH64_CORE("cortex-a53", cortexa53, cortexa53, V8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC, cortexa53, 0x41, 0xd03, -1) ++AARCH64_CORE("cortex-a57", cortexa57, cortexa57, V8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC, cortexa57, 0x41, 0xd07, -1) ++AARCH64_CORE("cortex-a72", cortexa72, cortexa57, V8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC, cortexa72, 0x41, 0xd08, -1) ++AARCH64_CORE("cortex-a73", cortexa73, cortexa57, V8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC, cortexa73, 0x41, 0xd09, -1) + + /* Cavium ('C') cores. */ +-AARCH64_CORE("thunderx", thunderx, thunderx, 8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx, 0x43, 0x0a0, -1) ++AARCH64_CORE("thunderx", thunderx, thunderx, V8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx, 0x43, 0x0a0, -1) + /* Do not swap around "thunderxt88p1" and "thunderxt88", + this order is required to handle variant correctly. */ +-AARCH64_CORE("thunderxt88p1", thunderxt88p1, thunderx, 8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderxt88, 0x43, 0x0a1, 0) +-AARCH64_CORE("thunderxt88", thunderxt88, thunderx, 8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderxt88, 0x43, 0x0a1, -1) ++AARCH64_CORE("thunderxt88p1", thunderxt88p1, thunderx, V8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderxt88, 0x43, 0x0a1, 0) ++AARCH64_CORE("thunderxt88", thunderxt88, thunderx, V8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderxt88, 0x43, 0x0a1, -1) + + /* OcteonTX is the official name for T81/T83. */ +-AARCH64_CORE("octeontx", octeontx, thunderx, 8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx, 0x43, 0x0a0, -1) +-AARCH64_CORE("octeontx81", octeontxt81, thunderx, 8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx, 0x43, 0x0a2, -1) +-AARCH64_CORE("octeontx83", octeontxt83, thunderx, 8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx, 0x43, 0x0a3, -1) ++AARCH64_CORE("octeontx", octeontx, thunderx, V8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx, 0x43, 0x0a0, -1) ++AARCH64_CORE("octeontx81", octeontxt81, thunderx, V8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx, 0x43, 0x0a2, -1) ++AARCH64_CORE("octeontx83", octeontxt83, thunderx, V8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx, 0x43, 0x0a3, -1) + +-AARCH64_CORE("thunderxt81", thunderxt81, thunderx, 8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx, 0x43, 0x0a2, -1) +-AARCH64_CORE("thunderxt83", thunderxt83, thunderx, 8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx, 0x43, 0x0a3, -1) ++AARCH64_CORE("thunderxt81", thunderxt81, thunderx, V8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx, 0x43, 0x0a2, -1) ++AARCH64_CORE("thunderxt83", thunderxt83, thunderx, V8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx, 0x43, 0x0a3, -1) + + /* Ampere Computing ('\xC0') cores. */ +-AARCH64_CORE("ampere1", ampere1, cortexa57, 8_6A, AARCH64_FL_FOR_V8_6A, ampere1, 0xC0, 0xac3, -1) ++AARCH64_CORE("ampere1", ampere1, cortexa57, V8_6A, AARCH64_FL_FOR_V8_6A, ampere1, 0xC0, 0xac3, -1) + /* Do not swap around "emag" and "xgene1", + this order is required to handle variant correctly. */ +-AARCH64_CORE("emag", emag, xgene1, 8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, emag, 0x50, 0x000, 3) ++AARCH64_CORE("emag", emag, xgene1, V8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, emag, 0x50, 0x000, 3) + + /* APM ('P') cores. */ +-AARCH64_CORE("xgene1", xgene1, xgene1, 8A, AARCH64_FL_FOR_V8A, xgene1, 0x50, 0x000, -1) ++AARCH64_CORE("xgene1", xgene1, xgene1, V8A, AARCH64_FL_FOR_V8A, xgene1, 0x50, 0x000, -1) + + /* Qualcomm ('Q') cores. */ +-AARCH64_CORE("falkor", falkor, falkor, 8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO | AARCH64_FL_RDMA, qdf24xx, 0x51, 0xC00, -1) +-AARCH64_CORE("qdf24xx", qdf24xx, falkor, 8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO | AARCH64_FL_RDMA, qdf24xx, 0x51, 0xC00, -1) ++AARCH64_CORE("falkor", falkor, falkor, V8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO | AARCH64_FL_RDMA, qdf24xx, 0x51, 0xC00, -1) ++AARCH64_CORE("qdf24xx", qdf24xx, falkor, V8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO | AARCH64_FL_RDMA, qdf24xx, 0x51, 0xC00, -1) + + /* Samsung ('S') cores. */ +-AARCH64_CORE("exynos-m1", exynosm1, exynosm1, 8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, exynosm1, 0x53, 0x001, -1) ++AARCH64_CORE("exynos-m1", exynosm1, exynosm1, V8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, exynosm1, 0x53, 0x001, -1) + + /* HXT ('h') cores. */ +-AARCH64_CORE("phecda", phecda, falkor, 8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, qdf24xx, 0x68, 0x000, -1) ++AARCH64_CORE("phecda", phecda, falkor, V8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, qdf24xx, 0x68, 0x000, -1) + + /* ARMv8.1-A Architecture Processors. */ + + /* Broadcom ('B') cores. */ +-AARCH64_CORE("thunderx2t99p1", thunderx2t99p1, thunderx2t99, 8_1A, AARCH64_FL_FOR_V8_1A | AARCH64_FL_CRYPTO, thunderx2t99, 0x42, 0x516, -1) +-AARCH64_CORE("vulcan", vulcan, thunderx2t99, 8_1A, AARCH64_FL_FOR_V8_1A | AARCH64_FL_CRYPTO, thunderx2t99, 0x42, 0x516, -1) ++AARCH64_CORE("thunderx2t99p1", thunderx2t99p1, thunderx2t99, V8_1A, AARCH64_FL_FOR_V8_1A | AARCH64_FL_CRYPTO, thunderx2t99, 0x42, 0x516, -1) ++AARCH64_CORE("vulcan", vulcan, thunderx2t99, V8_1A, AARCH64_FL_FOR_V8_1A | AARCH64_FL_CRYPTO, thunderx2t99, 0x42, 0x516, -1) + + /* Cavium ('C') cores. */ +-AARCH64_CORE("thunderx2t99", thunderx2t99, thunderx2t99, 8_1A, AARCH64_FL_FOR_V8_1A | AARCH64_FL_CRYPTO, thunderx2t99, 0x43, 0x0af, -1) ++AARCH64_CORE("thunderx2t99", thunderx2t99, thunderx2t99, V8_1A, AARCH64_FL_FOR_V8_1A | AARCH64_FL_CRYPTO, thunderx2t99, 0x43, 0x0af, -1) + + /* ARMv8.2-A Architecture Processors. */ + + /* ARM ('A') cores. */ +-AARCH64_CORE("cortex-a55", cortexa55, cortexa53, 8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD, cortexa53, 0x41, 0xd05, -1) +-AARCH64_CORE("cortex-a75", cortexa75, cortexa57, 8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD, cortexa73, 0x41, 0xd0a, -1) +-AARCH64_CORE("cortex-a76", cortexa76, cortexa57, 8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD, neoversen1, 0x41, 0xd0b, -1) +-AARCH64_CORE("cortex-a76ae", cortexa76ae, cortexa57, 8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS, neoversen1, 0x41, 0xd0e, -1) +-AARCH64_CORE("cortex-a77", cortexa77, cortexa57, 8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS, neoversen1, 0x41, 0xd0d, -1) +-AARCH64_CORE("cortex-a78", cortexa78, cortexa57, 8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS | AARCH64_FL_PROFILE, neoversen1, 0x41, 0xd41, -1) +-AARCH64_CORE("cortex-a78ae", cortexa78ae, cortexa57, 8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS | AARCH64_FL_PROFILE, neoversen1, 0x41, 0xd42, -1) +-AARCH64_CORE("cortex-a78c", cortexa78c, cortexa57, 8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS | AARCH64_FL_PROFILE | AARCH64_FL_FLAGM | AARCH64_FL_PAUTH, neoversen1, 0x41, 0xd4b, -1) +-AARCH64_CORE("cortex-a65", cortexa65, cortexa53, 8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS, cortexa73, 0x41, 0xd06, -1) +-AARCH64_CORE("cortex-a65ae", cortexa65ae, cortexa53, 8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS, cortexa73, 0x41, 0xd43, -1) +-AARCH64_CORE("cortex-x1", cortexx1, cortexa57, 8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS | AARCH64_FL_PROFILE, neoversen1, 0x41, 0xd44, -1) +-AARCH64_CORE("ares", ares, cortexa57, 8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_PROFILE, neoversen1, 0x41, 0xd0c, -1) +-AARCH64_CORE("neoverse-n1", neoversen1, cortexa57, 8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_PROFILE, neoversen1, 0x41, 0xd0c, -1) +-AARCH64_CORE("neoverse-e1", neoversee1, cortexa53, 8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS, cortexa73, 0x41, 0xd4a, -1) ++AARCH64_CORE("cortex-a55", cortexa55, cortexa53, V8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD, cortexa53, 0x41, 0xd05, -1) ++AARCH64_CORE("cortex-a75", cortexa75, cortexa57, V8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD, cortexa73, 0x41, 0xd0a, -1) ++AARCH64_CORE("cortex-a76", cortexa76, cortexa57, V8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD, neoversen1, 0x41, 0xd0b, -1) ++AARCH64_CORE("cortex-a76ae", cortexa76ae, cortexa57, V8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS, neoversen1, 0x41, 0xd0e, -1) ++AARCH64_CORE("cortex-a77", cortexa77, cortexa57, V8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS, neoversen1, 0x41, 0xd0d, -1) ++AARCH64_CORE("cortex-a78", cortexa78, cortexa57, V8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS | AARCH64_FL_PROFILE, neoversen1, 0x41, 0xd41, -1) ++AARCH64_CORE("cortex-a78ae", cortexa78ae, cortexa57, V8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS | AARCH64_FL_PROFILE, neoversen1, 0x41, 0xd42, -1) ++AARCH64_CORE("cortex-a78c", cortexa78c, cortexa57, V8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS | AARCH64_FL_PROFILE | AARCH64_FL_FLAGM | AARCH64_FL_PAUTH, neoversen1, 0x41, 0xd4b, -1) ++AARCH64_CORE("cortex-a65", cortexa65, cortexa53, V8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS, cortexa73, 0x41, 0xd06, -1) ++AARCH64_CORE("cortex-a65ae", cortexa65ae, cortexa53, V8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS, cortexa73, 0x41, 0xd43, -1) ++AARCH64_CORE("cortex-x1", cortexx1, cortexa57, V8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS | AARCH64_FL_PROFILE, neoversen1, 0x41, 0xd44, -1) ++AARCH64_CORE("ares", ares, cortexa57, V8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_PROFILE, neoversen1, 0x41, 0xd0c, -1) ++AARCH64_CORE("neoverse-n1", neoversen1, cortexa57, V8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_PROFILE, neoversen1, 0x41, 0xd0c, -1) ++AARCH64_CORE("neoverse-e1", neoversee1, cortexa53, V8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS, cortexa73, 0x41, 0xd4a, -1) + + /* Cavium ('C') cores. */ +-AARCH64_CORE("octeontx2", octeontx2, cortexa57, 8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_CRYPTO | AARCH64_FL_PROFILE, cortexa57, 0x43, 0x0b0, -1) +-AARCH64_CORE("octeontx2t98", octeontx2t98, cortexa57, 8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_CRYPTO | AARCH64_FL_PROFILE, cortexa57, 0x43, 0x0b1, -1) +-AARCH64_CORE("octeontx2t96", octeontx2t96, cortexa57, 8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_CRYPTO | AARCH64_FL_PROFILE, cortexa57, 0x43, 0x0b2, -1) ++AARCH64_CORE("octeontx2", octeontx2, cortexa57, V8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_CRYPTO | AARCH64_FL_PROFILE, cortexa57, 0x43, 0x0b0, -1) ++AARCH64_CORE("octeontx2t98", octeontx2t98, cortexa57, V8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_CRYPTO | AARCH64_FL_PROFILE, cortexa57, 0x43, 0x0b1, -1) ++AARCH64_CORE("octeontx2t96", octeontx2t96, cortexa57, V8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_CRYPTO | AARCH64_FL_PROFILE, cortexa57, 0x43, 0x0b2, -1) + /* Note OcteonTX2 T93 is an alias to OcteonTX2 T96. */ +-AARCH64_CORE("octeontx2t93", octeontx2t93, cortexa57, 8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_CRYPTO | AARCH64_FL_PROFILE, cortexa57, 0x43, 0x0b2, -1) +-AARCH64_CORE("octeontx2f95", octeontx2f95, cortexa57, 8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_CRYPTO | AARCH64_FL_PROFILE, cortexa57, 0x43, 0x0b3, -1) +-AARCH64_CORE("octeontx2f95n", octeontx2f95n, cortexa57, 8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_CRYPTO | AARCH64_FL_PROFILE, cortexa57, 0x43, 0x0b4, -1) +-AARCH64_CORE("octeontx2f95mm", octeontx2f95mm, cortexa57, 8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_CRYPTO | AARCH64_FL_PROFILE, cortexa57, 0x43, 0x0b5, -1) ++AARCH64_CORE("octeontx2t93", octeontx2t93, cortexa57, V8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_CRYPTO | AARCH64_FL_PROFILE, cortexa57, 0x43, 0x0b2, -1) ++AARCH64_CORE("octeontx2f95", octeontx2f95, cortexa57, V8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_CRYPTO | AARCH64_FL_PROFILE, cortexa57, 0x43, 0x0b3, -1) ++AARCH64_CORE("octeontx2f95n", octeontx2f95n, cortexa57, V8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_CRYPTO | AARCH64_FL_PROFILE, cortexa57, 0x43, 0x0b4, -1) ++AARCH64_CORE("octeontx2f95mm", octeontx2f95mm, cortexa57, V8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_CRYPTO | AARCH64_FL_PROFILE, cortexa57, 0x43, 0x0b5, -1) + + /* Fujitsu ('F') cores. */ +-AARCH64_CORE("a64fx", a64fx, a64fx, 8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_SVE, a64fx, 0x46, 0x001, -1) ++AARCH64_CORE("a64fx", a64fx, a64fx, V8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_SVE, a64fx, 0x46, 0x001, -1) + + /* HiSilicon ('H') cores. */ +-AARCH64_CORE("tsv110", tsv110, tsv110, 8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_CRYPTO | AARCH64_FL_F16 | AARCH64_FL_AES | AARCH64_FL_SHA2, tsv110, 0x48, 0xd01, -1) ++AARCH64_CORE("tsv110", tsv110, tsv110, V8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_CRYPTO | AARCH64_FL_F16 | AARCH64_FL_AES | AARCH64_FL_SHA2, tsv110, 0x48, 0xd01, -1) + + /* ARMv8.3-A Architecture Processors. */ + + /* Marvell cores (TX3). */ +-AARCH64_CORE("thunderx3t110", thunderx3t110, thunderx3t110, 8_3A, AARCH64_FL_FOR_V8_3A | AARCH64_FL_CRYPTO | AARCH64_FL_RCPC | AARCH64_FL_SM4 | AARCH64_FL_SHA3 | AARCH64_FL_F16FML | AARCH64_FL_RCPC8_4, thunderx3t110, 0x43, 0x0b8, 0x0a) ++AARCH64_CORE("thunderx3t110", thunderx3t110, thunderx3t110, V8_3A, AARCH64_FL_FOR_V8_3A | AARCH64_FL_CRYPTO | AARCH64_FL_RCPC | AARCH64_FL_SM4 | AARCH64_FL_SHA3 | AARCH64_FL_F16FML | AARCH64_FL_RCPC8_4, thunderx3t110, 0x43, 0x0b8, 0x0a) + + /* ARMv8.4-A Architecture Processors. */ + + /* Arm ('A') cores. */ +-AARCH64_CORE("zeus", zeus, cortexa57, 8_4A, AARCH64_FL_FOR_V8_4A | AARCH64_FL_SVE | AARCH64_FL_RCPC | AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_F16 | AARCH64_FL_PROFILE | AARCH64_FL_SSBS | AARCH64_FL_RNG, neoversev1, 0x41, 0xd40, -1) +-AARCH64_CORE("neoverse-v1", neoversev1, cortexa57, 8_4A, AARCH64_FL_FOR_V8_4A | AARCH64_FL_SVE | AARCH64_FL_RCPC | AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_F16 | AARCH64_FL_PROFILE | AARCH64_FL_SSBS | AARCH64_FL_RNG, neoversev1, 0x41, 0xd40, -1) +-AARCH64_CORE("neoverse-512tvb", neoverse512tvb, cortexa57, 8_4A, AARCH64_FL_FOR_V8_4A | AARCH64_FL_SVE | AARCH64_FL_RCPC | AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_F16 | AARCH64_FL_PROFILE | AARCH64_FL_SSBS | AARCH64_FL_RNG, neoverse512tvb, INVALID_IMP, INVALID_CORE, -1) ++AARCH64_CORE("zeus", zeus, cortexa57, V8_4A, AARCH64_FL_FOR_V8_4A | AARCH64_FL_SVE | AARCH64_FL_RCPC | AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_F16 | AARCH64_FL_PROFILE | AARCH64_FL_SSBS | AARCH64_FL_RNG, neoversev1, 0x41, 0xd40, -1) ++AARCH64_CORE("neoverse-v1", neoversev1, cortexa57, V8_4A, AARCH64_FL_FOR_V8_4A | AARCH64_FL_SVE | AARCH64_FL_RCPC | AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_F16 | AARCH64_FL_PROFILE | AARCH64_FL_SSBS | AARCH64_FL_RNG, neoversev1, 0x41, 0xd40, -1) ++AARCH64_CORE("neoverse-512tvb", neoverse512tvb, cortexa57, V8_4A, AARCH64_FL_FOR_V8_4A | AARCH64_FL_SVE | AARCH64_FL_RCPC | AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_F16 | AARCH64_FL_PROFILE | AARCH64_FL_SSBS | AARCH64_FL_RNG, neoverse512tvb, INVALID_IMP, INVALID_CORE, -1) + + /* Qualcomm ('Q') cores. */ +-AARCH64_CORE("saphira", saphira, saphira, 8_4A, AARCH64_FL_FOR_V8_4A | AARCH64_FL_CRYPTO | AARCH64_FL_RCPC, saphira, 0x51, 0xC01, -1) ++AARCH64_CORE("saphira", saphira, saphira, V8_4A, AARCH64_FL_FOR_V8_4A | AARCH64_FL_CRYPTO | AARCH64_FL_RCPC, saphira, 0x51, 0xC01, -1) + + /* ARMv8-A big.LITTLE implementations. */ + +-AARCH64_CORE("cortex-a57.cortex-a53", cortexa57cortexa53, cortexa53, 8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC, cortexa57, 0x41, AARCH64_BIG_LITTLE (0xd07, 0xd03), -1) +-AARCH64_CORE("cortex-a72.cortex-a53", cortexa72cortexa53, cortexa53, 8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC, cortexa72, 0x41, AARCH64_BIG_LITTLE (0xd08, 0xd03), -1) +-AARCH64_CORE("cortex-a73.cortex-a35", cortexa73cortexa35, cortexa53, 8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC, cortexa73, 0x41, AARCH64_BIG_LITTLE (0xd09, 0xd04), -1) +-AARCH64_CORE("cortex-a73.cortex-a53", cortexa73cortexa53, cortexa53, 8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC, cortexa73, 0x41, AARCH64_BIG_LITTLE (0xd09, 0xd03), -1) ++AARCH64_CORE("cortex-a57.cortex-a53", cortexa57cortexa53, cortexa53, V8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC, cortexa57, 0x41, AARCH64_BIG_LITTLE (0xd07, 0xd03), -1) ++AARCH64_CORE("cortex-a72.cortex-a53", cortexa72cortexa53, cortexa53, V8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC, cortexa72, 0x41, AARCH64_BIG_LITTLE (0xd08, 0xd03), -1) ++AARCH64_CORE("cortex-a73.cortex-a35", cortexa73cortexa35, cortexa53, V8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC, cortexa73, 0x41, AARCH64_BIG_LITTLE (0xd09, 0xd04), -1) ++AARCH64_CORE("cortex-a73.cortex-a53", cortexa73cortexa53, cortexa53, V8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC, cortexa73, 0x41, AARCH64_BIG_LITTLE (0xd09, 0xd03), -1) + + /* ARM DynamIQ big.LITTLE configurations. */ + +-AARCH64_CORE("cortex-a75.cortex-a55", cortexa75cortexa55, cortexa53, 8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD, cortexa73, 0x41, AARCH64_BIG_LITTLE (0xd0a, 0xd05), -1) +-AARCH64_CORE("cortex-a76.cortex-a55", cortexa76cortexa55, cortexa53, 8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD, neoversen1, 0x41, AARCH64_BIG_LITTLE (0xd0b, 0xd05), -1) ++AARCH64_CORE("cortex-a75.cortex-a55", cortexa75cortexa55, cortexa53, V8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD, cortexa73, 0x41, AARCH64_BIG_LITTLE (0xd0a, 0xd05), -1) ++AARCH64_CORE("cortex-a76.cortex-a55", cortexa76cortexa55, cortexa53, V8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD, neoversen1, 0x41, AARCH64_BIG_LITTLE (0xd0b, 0xd05), -1) + + /* Armv8-R Architecture Processors. */ +-AARCH64_CORE("cortex-r82", cortexr82, cortexa53, 8R, AARCH64_FL_FOR_V8R, cortexa53, 0x41, 0xd15, -1) ++AARCH64_CORE("cortex-r82", cortexr82, cortexa53, V8R, AARCH64_FL_FOR_V8R, cortexa53, 0x41, 0xd15, -1) + + /* Armv9.0-A Architecture Processors. */ + + /* Arm ('A') cores. */ +-AARCH64_CORE("cortex-a510", cortexa510, cortexa55, 9A, AARCH64_FL_FOR_V9A | AARCH64_FL_SVE2_BITPERM | AARCH64_FL_MEMTAG | AARCH64_FL_I8MM | AARCH64_FL_BF16, cortexa53, 0x41, 0xd46, -1) ++AARCH64_CORE("cortex-a510", cortexa510, cortexa55, V9A, AARCH64_FL_FOR_V9A | AARCH64_FL_SVE2_BITPERM | AARCH64_FL_MEMTAG | AARCH64_FL_I8MM | AARCH64_FL_BF16, cortexa53, 0x41, 0xd46, -1) + +-AARCH64_CORE("cortex-a710", cortexa710, cortexa57, 9A, AARCH64_FL_FOR_V9A | AARCH64_FL_SVE2_BITPERM | AARCH64_FL_MEMTAG | AARCH64_FL_I8MM | AARCH64_FL_BF16, neoversen2, 0x41, 0xd47, -1) ++AARCH64_CORE("cortex-a710", cortexa710, cortexa57, V9A, AARCH64_FL_FOR_V9A | AARCH64_FL_SVE2_BITPERM | AARCH64_FL_MEMTAG | AARCH64_FL_I8MM | AARCH64_FL_BF16, neoversen2, 0x41, 0xd47, -1) + +-AARCH64_CORE("cortex-x2", cortexx2, cortexa57, 9A, AARCH64_FL_FOR_V9A | AARCH64_FL_SVE2_BITPERM | AARCH64_FL_MEMTAG | AARCH64_FL_I8MM | AARCH64_FL_BF16, neoversen2, 0x41, 0xd48, -1) ++AARCH64_CORE("cortex-x2", cortexx2, cortexa57, V9A, AARCH64_FL_FOR_V9A | AARCH64_FL_SVE2_BITPERM | AARCH64_FL_MEMTAG | AARCH64_FL_I8MM | AARCH64_FL_BF16, neoversen2, 0x41, 0xd48, -1) + +-AARCH64_CORE("neoverse-n2", neoversen2, cortexa57, 9A, AARCH64_FL_FOR_V9A | AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_SVE2_BITPERM | AARCH64_FL_RNG | AARCH64_FL_MEMTAG | AARCH64_FL_PROFILE, neoversen2, 0x41, 0xd49, -1) ++AARCH64_CORE("neoverse-n2", neoversen2, cortexa57, V9A, AARCH64_FL_FOR_V9A | AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_SVE2_BITPERM | AARCH64_FL_RNG | AARCH64_FL_MEMTAG | AARCH64_FL_PROFILE, neoversen2, 0x41, 0xd49, -1) + +-AARCH64_CORE("demeter", demeter, cortexa57, 9A, AARCH64_FL_FOR_V9A | AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_SVE2_BITPERM | AARCH64_FL_RNG | AARCH64_FL_MEMTAG | AARCH64_FL_PROFILE, neoversev2, 0x41, 0xd4f, -1) +-AARCH64_CORE("neoverse-v2", neoversev2, cortexa57, 9A, AARCH64_FL_FOR_V9A | AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_SVE2_BITPERM | AARCH64_FL_RNG | AARCH64_FL_MEMTAG | AARCH64_FL_PROFILE, neoversev2, 0x41, 0xd4f, -1) ++AARCH64_CORE("demeter", demeter, cortexa57, V9A, AARCH64_FL_FOR_V9A | AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_SVE2_BITPERM | AARCH64_FL_RNG | AARCH64_FL_MEMTAG | AARCH64_FL_PROFILE, neoversev2, 0x41, 0xd4f, -1) ++AARCH64_CORE("neoverse-v2", neoversev2, cortexa57, V9A, AARCH64_FL_FOR_V9A | AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_SVE2_BITPERM | AARCH64_FL_RNG | AARCH64_FL_MEMTAG | AARCH64_FL_PROFILE, neoversev2, 0x41, 0xd4f, -1) + + #undef AARCH64_CORE +diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc +index 3714c1047..22b51e12f 100644 +--- a/gcc/config/aarch64/aarch64.cc ++++ b/gcc/config/aarch64/aarch64.cc +@@ -2948,7 +2948,7 @@ static const struct processor all_cores = + {NAME, IDENT, SCHED, AARCH64_ARCH_##ARCH, \ + FLAGS, &COSTS##_tunings}, + #include "aarch64-cores.def" +- {"generic", generic, cortexa53, AARCH64_ARCH_8A, ++ {"generic", generic, cortexa53, AARCH64_ARCH_V8A, + AARCH64_FL_FOR_V8A, &generic_tunings}, + {NULL, aarch64_none, aarch64_none, aarch64_no_arch, 0, NULL} + }; +diff --git a/gcc/config/aarch64/driver-aarch64.cc b/gcc/config/aarch64/driver-aarch64.cc +index d714a8bda..644780ef2 100644 +--- a/gcc/config/aarch64/driver-aarch64.cc ++++ b/gcc/config/aarch64/driver-aarch64.cc +@@ -78,8 +78,9 @@ struct aarch64_arch_driver_info + const uint64_t flags; + }; + ++/* Skip the leading "V" in the architecture name. */ + #define AARCH64_ARCH(NAME, CORE, ARCH_IDENT, ARCH_REV, FLAGS) \ +- { #ARCH_IDENT, NAME, FLAGS }, ++ { #ARCH_IDENT + 1, NAME, FLAGS }, + + static struct aarch64_arch_driver_info aarch64_arches = + { +-- +2.33.0 +
View file
_service:tar_scm:0112-LoongArch-Assign-the-u-attribute-to-the-mem-to-which.patch
Added
@@ -0,0 +1,64 @@ +From 191675bdfd4cef0fbcf642f53da82a49bd23a3bf Mon Sep 17 00:00:00 2001 +From: Lulu Cheng <chenglulu@loongson.cn> +Date: Fri, 12 Jan 2024 17:06:30 +0800 +Subject: PATCH 112/188 LoongArch: Assign the '/u' attribute to the mem to + which the global offset table belongs. + +gcc/ChangeLog: + + * config/loongarch/loongarch.cc (loongarch_split_symbol): + Assign the '/u' attribute to the mem. + +gcc/testsuite/ChangeLog: + + * g++.target/loongarch/got-load.C: New test. +--- + gcc/config/loongarch/loongarch.cc | 5 +++++ + gcc/testsuite/g++.target/loongarch/got-load.C | 19 +++++++++++++++++++ + 2 files changed, 24 insertions(+) + create mode 100644 gcc/testsuite/g++.target/loongarch/got-load.C + +diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc +index 13481130b..7da00c132 100644 +--- a/gcc/config/loongarch/loongarch.cc ++++ b/gcc/config/loongarch/loongarch.cc +@@ -3198,6 +3198,11 @@ loongarch_split_symbol (rtx temp, rtx addr, machine_mode mode, rtx *low_out) + rtx mem = gen_rtx_MEM (Pmode, low); + *low_out = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, mem), + UNSPEC_LOAD_FROM_GOT); ++ ++ /* Nonzero in a mem, if the memory is statically allocated and ++ read-only. A common example of the later is a shared library’s ++ global offset table. */ ++ MEM_READONLY_P (mem) = 1; + } + + break; +diff --git a/gcc/testsuite/g++.target/loongarch/got-load.C b/gcc/testsuite/g++.target/loongarch/got-load.C +new file mode 100644 +index 000000000..20924c739 +--- /dev/null ++++ b/gcc/testsuite/g++.target/loongarch/got-load.C +@@ -0,0 +1,19 @@ ++/* { dg-do compile } */ ++/* { dg-options "-mabi=lp64d -O2 -mexplicit-relocs -mcmodel=normal -fdump-rtl-expand" } */ ++/* { dg-final { scan-rtl-dump-times "mem/u" 2 "expand" } } */ ++ ++#include <bits/stdc++.h> ++ ++using namespace std; ++ ++int lr1000052; ++ ++void ++test(void) ++{ ++ int n; ++ ++ cin >> n; ++ for (int i = 0; i < n; ++i) ++ cin >> lri0 >> lri1; ++} +-- +2.43.0 +
View file
_service:tar_scm:0112-aarch64-Simplify-probe-of-final-frame-allocation.patch
Deleted
@@ -1,123 +0,0 @@ -From c4f0e121faa36342f1d21919e54a05ad841c4f86 Mon Sep 17 00:00:00 2001 -From: Richard Sandiford <richard.sandiford@arm.com> -Date: Tue, 12 Sep 2023 16:08:55 +0100 -Subject: PATCH aarch64: Simplify probe of final frame allocation - -Previous patches ensured that the final frame allocation only needs -a probe when the size is strictly greater than 1KiB. It's therefore -safe to use the normal 1024 probe offset in all cases. - -The main motivation for doing this is to simplify the code and -remove the number of special cases. - -gcc/ - * config/aarch64/aarch64.cc (aarch64_allocate_and_probe_stack_space): - Always probe the residual allocation at offset 1024, asserting - that that is in range. - -gcc/testsuite/ - * gcc.target/aarch64/stack-check-prologue-17.c: Expect the probe - to be at offset 1024 rather than offset 0. - * gcc.target/aarch64/stack-check-prologue-18.c: Likewise. - * gcc.target/aarch64/stack-check-prologue-19.c: Likewise. ---- - gcc/config/aarch64/aarch64.cc | 12 ++++-------- - .../gcc.target/aarch64/stack-check-prologue-17.c | 2 +- - .../gcc.target/aarch64/stack-check-prologue-18.c | 4 ++-- - .../gcc.target/aarch64/stack-check-prologue-19.c | 4 ++-- - 4 files changed, 9 insertions(+), 13 deletions(-) - -diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc -index 16433fb70f4f..8abf3d7a1e2b 100644 ---- a/gcc/config/aarch64/aarch64.cc -+++ b/gcc/config/aarch64/aarch64.cc -@@ -9523,16 +9523,12 @@ aarch64_allocate_and_probe_stack_space (rtx temp1, rtx temp2, - are still safe. */ - if (residual) - { -- HOST_WIDE_INT residual_probe_offset = guard_used_by_caller; -+ gcc_assert (guard_used_by_caller + byte_sp_alignment <= size); -+ - /* If we're doing final adjustments, and we've done any full page - allocations then any residual needs to be probed. */ - if (final_adjustment_p && rounded_size != 0) - min_probe_threshold = 0; -- /* If doing a small final adjustment, we always probe at offset 0. -- This is done to avoid issues when the final adjustment is smaller -- than the probing offset. */ -- else if (final_adjustment_p && rounded_size == 0) -- residual_probe_offset = 0; - - aarch64_sub_sp (temp1, temp2, residual, frame_related_p); - if (residual >= min_probe_threshold) -@@ -9543,8 +9539,8 @@ aarch64_allocate_and_probe_stack_space (rtx temp1, rtx temp2, - HOST_WIDE_INT_PRINT_DEC " bytes, probing will be required." - "\n", residual); - -- emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx, -- residual_probe_offset)); -+ emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx, -+ guard_used_by_caller)); - emit_insn (gen_blockage ()); - } - } -diff --git a/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-17.c b/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-17.c -index 0d8a25d73a24..f0ec1389771d 100644 ---- a/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-17.c -+++ b/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-17.c -@@ -33,7 +33,7 @@ int test1(int z) { - ** ... - ** str x30, \sp\ - ** sub sp, sp, #1040 --** str xzr, \sp\ -+** str xzr, \sp, #?1024\ - ** cbnz w0, .* - ** bl g - ** ... -diff --git a/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-18.c b/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-18.c -index 82447d20fff5..6383bec5ebcd 100644 ---- a/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-18.c -+++ b/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-18.c -@@ -9,7 +9,7 @@ void g(); - ** ... - ** str x30, \sp\ - ** sub sp, sp, #4064 --** str xzr, \sp\ -+** str xzr, \sp, #?1024\ - ** cbnz w0, .* - ** bl g - ** ... -@@ -50,7 +50,7 @@ int test1(int z) { - ** ... - ** str x30, \sp\ - ** sub sp, sp, #1040 --** str xzr, \sp\ -+** str xzr, \sp, #?1024\ - ** cbnz w0, .* - ** bl g - ** ... -diff --git a/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-19.c b/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-19.c -index 73ac3e4e4eb0..562039b5e9b8 100644 ---- a/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-19.c -+++ b/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-19.c -@@ -9,7 +9,7 @@ void g(); - ** ... - ** str x30, \sp\ - ** sub sp, sp, #4064 --** str xzr, \sp\ -+** str xzr, \sp, #?1024\ - ** cbnz w0, .* - ** bl g - ** ... -@@ -50,7 +50,7 @@ int test1(int z) { - ** ... - ** str x30, \sp\ - ** sub sp, sp, #1040 --** str xzr, \sp\ -+** str xzr, \sp, #?1024\ - ** cbnz w0, .* - ** bl g - ** ... --- -2.43.5 -
View file
_service:tar_scm:0113-Backport-SME-aarch64-Small-config.gcc-cleanups.patch
Added
@@ -0,0 +1,55 @@ +From aac2b2d4191d08a107c3ff8d98602355988a5558 Mon Sep 17 00:00:00 2001 +From: Richard Sandiford <richard.sandiford@arm.com> +Date: Thu, 29 Sep 2022 11:32:52 +0100 +Subject: PATCH 014/157 BackportSME aarch64: Small config.gcc cleanups + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=0af214b447529453b356e8e480d7d35b3e642f0e + +The aarch64-option-extensions.def parsing in config.gcc had +some code left over from when it tried to parse the whole +macro definition. Also, config.gcc now only looks at the +first fields of the aarch64-arches.def entries. + +gcc/ + * config.gcc: Remove dead aarch64-option-extensions.def code. + * config/aarch64/aarch64-arches.def: Update comment. +--- + gcc/config.gcc | 8 -------- + gcc/config/aarch64/aarch64-arches.def | 2 +- + 2 files changed, 1 insertion(+), 9 deletions(-) + +diff --git a/gcc/config.gcc b/gcc/config.gcc +index 3be450471..da66603cd 100644 +--- a/gcc/config.gcc ++++ b/gcc/config.gcc +@@ -4210,14 +4210,6 @@ case "${target}" in + options_parsed="`$ac_cv_prog_CPP -D"$opt_macro" -x c \ + ${srcdir}/config/aarch64/aarch64-option-extensions.def`" + +- # Match one element inside AARCH64_OPT_EXTENSION, we +- # consume anything that's not a ,. +- elem=" *\(^,\+\) *" +- +- # Repeat the pattern for the number of entries in the +- # AARCH64_OPT_EXTENSION, currently 6 times. +- sed_patt="^$elem,$elem,$elem,$elem,$elem,$elem" +- + while x"$ext_val" != x + do + ext_val=`echo $ext_val | sed -e 's/\+//'` +diff --git a/gcc/config/aarch64/aarch64-arches.def b/gcc/config/aarch64/aarch64-arches.def +index e42202822..ece96e22a 100644 +--- a/gcc/config/aarch64/aarch64-arches.def ++++ b/gcc/config/aarch64/aarch64-arches.def +@@ -28,7 +28,7 @@ + ARCH_REV is an integer specifying the architecture major revision. + FLAGS are the flags implied by the architecture. + Due to the assumptions about the positions of these fields in config.gcc, +- the NAME should be kept as the first argument and FLAGS as the last. */ ++ NAME should be kept as the first argument. */ + + AARCH64_ARCH("armv8-a", generic, V8A, 8, AARCH64_FL_FOR_V8A) + AARCH64_ARCH("armv8.1-a", generic, V8_1A, 8, AARCH64_FL_FOR_V8_1A) +-- +2.33.0 +
View file
_service:tar_scm:0113-LoongArch-testsuite-Fix-fail-in-gen-vect-2-25-.c-fil.patch
Added
@@ -0,0 +1,51 @@ +From 1576f83f8cae0ead9de533566ec5f21e7a01f842 Mon Sep 17 00:00:00 2001 +From: chenxiaolong <chenxiaolong@loongson.cn> +Date: Sat, 13 Jan 2024 15:28:34 +0800 +Subject: PATCH 113/188 LoongArch: testsuite:Fix fail in gen-vect-{2,25}.c + file. + +1.Added dg-do compile on LoongArch. + When binutils does not support vector instruction sets, an error occurs +because the assembler does not recognize vector instructions. + +2.Added "-mlsx" option for vectorization on LoongArch. + +gcc/testsuite/ChangeLog: + + * gcc.dg/tree-ssa/gen-vect-2.c: Added detection of compilation + behavior and "-mlsx" option on LoongArch. + * gcc.dg/tree-ssa/gen-vect-25.c: Dito. +--- + gcc/testsuite/gcc.dg/tree-ssa/gen-vect-2.c | 2 ++ + gcc/testsuite/gcc.dg/tree-ssa/gen-vect-25.c | 2 ++ + 2 files changed, 4 insertions(+) + +diff --git a/gcc/testsuite/gcc.dg/tree-ssa/gen-vect-2.c b/gcc/testsuite/gcc.dg/tree-ssa/gen-vect-2.c +index 42171a2fb..395d6f7ee 100644 +--- a/gcc/testsuite/gcc.dg/tree-ssa/gen-vect-2.c ++++ b/gcc/testsuite/gcc.dg/tree-ssa/gen-vect-2.c +@@ -1,6 +1,8 @@ + /* { dg-do run { target vect_cmdline_needed } } */ ++/* { dg-do compile { target { loongarch_sx && {! loongarch_sx_hw } } } } */ + /* { dg-options "-O2 -fno-tree-loop-distribute-patterns -ftree-vectorize -fdump-tree-vect-details -fvect-cost-model=dynamic" } */ + /* { dg-additional-options "-mno-sse" { target { i?86-*-* x86_64-*-* } } } */ ++/* { dg-additional-options "-mlsx" { target { loongarch*-*-* } } } */ + + #include <stdlib.h> + +diff --git a/gcc/testsuite/gcc.dg/tree-ssa/gen-vect-25.c b/gcc/testsuite/gcc.dg/tree-ssa/gen-vect-25.c +index 60ec27054..cea7f246a 100644 +--- a/gcc/testsuite/gcc.dg/tree-ssa/gen-vect-25.c ++++ b/gcc/testsuite/gcc.dg/tree-ssa/gen-vect-25.c +@@ -1,6 +1,8 @@ + /* { dg-do run { target vect_cmdline_needed } } */ ++/* { dg-do compile { target { loongarch_sx && {! loongarch_sx_hw } } } } */ + /* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-details -fvect-cost-model=dynamic" } */ + /* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-details -fvect-cost-model=dynamic -mno-sse" { target { i?86-*-* x86_64-*-* } } } */ ++/* { dg-additional-options "-mlsx" { target { loongarch*-*-* } } } */ + + #include <stdlib.h> + +-- +2.43.0 +
View file
_service:tar_scm:0113-aarch64-Explicitly-record-probe-registers-in-frame-info.patch
Deleted
@@ -1,277 +0,0 @@ -From 6f0ab0a9f46a17b68349ff6035aa776bf65f0575 Mon Sep 17 00:00:00 2001 -From: Richard Sandiford <richard.sandiford@arm.com> -Date: Tue, 12 Sep 2023 16:08:56 +0100 -Subject: PATCH aarch64: Explicitly record probe registers in frame info - -The stack frame is currently divided into three areas: - -A: the area above the hard frame pointer -B: the SVE saves below the hard frame pointer -C: the outgoing arguments - -If the stack frame is allocated in one chunk, the allocation needs a -probe if the frame size is >= guard_size - 1KiB. In addition, if the -function is not a leaf function, it must probe an address no more than -1KiB above the outgoing SP. We ensured the second condition by - -(1) using single-chunk allocations for non-leaf functions only if - the link register save slot is within 512 bytes of the bottom - of the frame; and - -(2) using the link register save as a probe (meaning, for instance, - that it can't be individually shrink wrapped) - -If instead the stack is allocated in multiple chunks, then: - -* an allocation involving only the outgoing arguments (C above) requires - a probe if the allocation size is > 1KiB - -* any other allocation requires a probe if the allocation size - is >= guard_size - 1KiB - -* second and subsequent allocations require the previous allocation - to probe at the bottom of the allocated area, regardless of the size - of that previous allocation - -The final point means that, unlike for single allocations, -it can be necessary to have both a non-SVE register probe and -an SVE register probe. For example: - -* allocate A, probe using a non-SVE register save -* allocate B, probe using an SVE register save -* allocate C - -The non-SVE register used in this case was again the link register. -It was previously used even if the link register save slot was some -bytes above the bottom of the non-SVE register saves, but an earlier -patch avoided that by putting the link register save slot first. - -As a belt-and-braces fix, this patch explicitly records which -probe registers we're using and allows the non-SVE probe to be -whichever register comes first (as for SVE). - -The patch also avoids unnecessary probes in sve/pcs/stack_clash_3.c. - -gcc/ - * config/aarch64/aarch64.h (aarch64_frame::sve_save_and_probe) - (aarch64_frame::hard_fp_save_and_probe): New fields. - * config/aarch64/aarch64.cc (aarch64_layout_frame): Initialize them. - Rather than asserting that a leaf function saves LR, instead assert - that a leaf function saves something. - (aarch64_get_separate_components): Prevent the chosen probe - registers from being individually shrink-wrapped. - (aarch64_allocate_and_probe_stack_space): Remove workaround for - probe registers that aren't at the bottom of the previous allocation. - -gcc/testsuite/ - * gcc.target/aarch64/sve/pcs/stack_clash_3.c: Avoid redundant probes. ---- - gcc/config/aarch64/aarch64.cc | 68 +++++++++++++++---- - gcc/config/aarch64/aarch64.h | 8 +++ - .../aarch64/sve/pcs/stack_clash_3.c | 6 +- - 3 files changed, 64 insertions(+), 18 deletions(-) - -diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc -index 8abf3d7a1e2b..a8d907df8843 100644 ---- a/gcc/config/aarch64/aarch64.cc -+++ b/gcc/config/aarch64/aarch64.cc -@@ -8210,15 +8210,11 @@ aarch64_layout_frame (void) - && !crtl->abi->clobbers_full_reg_p (regno)) - frame.reg_offsetregno = SLOT_REQUIRED; - -- /* With stack-clash, LR must be saved in non-leaf functions. The saving of -- LR counts as an implicit probe which allows us to maintain the invariant -- described in the comment at expand_prologue. */ -- gcc_assert (crtl->is_leaf -- || maybe_ne (frame.reg_offsetR30_REGNUM, SLOT_NOT_REQUIRED)); - - poly_int64 offset = crtl->outgoing_args_size; - gcc_assert (multiple_p (offset, STACK_BOUNDARY / BITS_PER_UNIT)); - frame.bytes_below_saved_regs = offset; -+ frame.sve_save_and_probe = INVALID_REGNUM; - - /* Now assign stack slots for the registers. Start with the predicate - registers, since predicate LDR and STR have a relatively small -@@ -8226,6 +8222,8 @@ aarch64_layout_frame (void) - for (regno = P0_REGNUM; regno <= P15_REGNUM; regno++) - if (known_eq (frame.reg_offsetregno, SLOT_REQUIRED)) - { -+ if (frame.sve_save_and_probe == INVALID_REGNUM) -+ frame.sve_save_and_probe = regno; - frame.reg_offsetregno = offset; - offset += BYTES_PER_SVE_PRED; - } -@@ -8263,6 +8261,8 @@ aarch64_layout_frame (void) - for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++) - if (known_eq (frame.reg_offsetregno, SLOT_REQUIRED)) - { -+ if (frame.sve_save_and_probe == INVALID_REGNUM) -+ frame.sve_save_and_probe = regno; - frame.reg_offsetregno = offset; - offset += vector_save_size; - } -@@ -8272,10 +8272,18 @@ aarch64_layout_frame (void) - frame.below_hard_fp_saved_regs_size = offset - frame.bytes_below_saved_regs; - bool saves_below_hard_fp_p - = maybe_ne (frame.below_hard_fp_saved_regs_size, 0); -+ gcc_assert (!saves_below_hard_fp_p -+ || (frame.sve_save_and_probe != INVALID_REGNUM -+ && known_eq (frame.reg_offsetframe.sve_save_and_probe, -+ frame.bytes_below_saved_regs))); -+ - frame.bytes_below_hard_fp = offset; -+ frame.hard_fp_save_and_probe = INVALID_REGNUM; - - auto allocate_gpr_slot = &(unsigned int regno) - { -+ if (frame.hard_fp_save_and_probe == INVALID_REGNUM) -+ frame.hard_fp_save_and_probe = regno; - frame.reg_offsetregno = offset; - if (frame.wb_push_candidate1 == INVALID_REGNUM) - frame.wb_push_candidate1 = regno; -@@ -8309,6 +8317,8 @@ aarch64_layout_frame (void) - for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++) - if (known_eq (frame.reg_offsetregno, SLOT_REQUIRED)) - { -+ if (frame.hard_fp_save_and_probe == INVALID_REGNUM) -+ frame.hard_fp_save_and_probe = regno; - /* If there is an alignment gap between integer and fp callee-saves, - allocate the last fp register to it if possible. */ - if (regno == last_fp_reg -@@ -8332,6 +8342,17 @@ aarch64_layout_frame (void) - offset = aligned_upper_bound (offset, STACK_BOUNDARY / BITS_PER_UNIT); - - frame.saved_regs_size = offset - frame.bytes_below_saved_regs; -+ gcc_assert (known_eq (frame.saved_regs_size, -+ frame.below_hard_fp_saved_regs_size) -+ || (frame.hard_fp_save_and_probe != INVALID_REGNUM -+ && known_eq (frame.reg_offsetframe.hard_fp_save_and_probe, -+ frame.bytes_below_hard_fp))); -+ -+ /* With stack-clash, a register must be saved in non-leaf functions. -+ The saving of the bottommost register counts as an implicit probe, -+ which allows us to maintain the invariant described in the comment -+ at expand_prologue. */ -+ gcc_assert (crtl->is_leaf || maybe_ne (frame.saved_regs_size, 0)); - - offset += get_frame_size (); - offset = aligned_upper_bound (offset, STACK_BOUNDARY / BITS_PER_UNIT); -@@ -8462,6 +8483,25 @@ aarch64_layout_frame (void) - frame.final_adjust = frame.bytes_below_saved_regs; - } - -+ /* The frame is allocated in pieces, with each non-final piece -+ including a register save at offset 0 that acts as a probe for -+ the following piece. In addition, the save of the bottommost register -+ acts as a probe for callees and allocas. Roll back any probes that -+ aren't needed. -+ -+ A probe isn't needed if it is associated with the final allocation -+ (including callees and allocas) that happens before the epilogue is -+ executed. */ -+ if (crtl->is_leaf -+ && !cfun->calls_alloca -+ && known_eq (frame.final_adjust, 0)) -+ { -+ if (maybe_ne (frame.sve_callee_adjust, 0)) -+ frame.sve_save_and_probe = INVALID_REGNUM; -+ else -+ frame.hard_fp_save_and_probe = INVALID_REGNUM; -+ } -+ - /* Make sure the individual adjustments add up to the full frame size. */ - gcc_assert (known_eq (frame.initial_adjust - + frame.callee_adjust -@@ -9039,13 +9079,6 @@ aarch64_get_separate_components (void) - - poly_int64 offset = frame.reg_offsetregno; - -- /* If the register is saved in the first SVE save slot, we use -- it as a stack probe for -fstack-clash-protection. */ -- if (flag_stack_clash_protection -- && maybe_ne (frame.below_hard_fp_saved_regs_size, 0) -- && known_eq (offset, frame.bytes_below_saved_regs)) -- continue; -- - /* Get the offset relative to the register we'll use. */ - if (frame_pointer_needed) - offset -= frame.bytes_below_hard_fp; -@@ -9080,6 +9113,13 @@ aarch64_get_separate_components (void) - - bitmap_clear_bit (components, LR_REGNUM); - bitmap_clear_bit (components, SP_REGNUM); -+ if (flag_stack_clash_protection) -+ { -+ if (frame.sve_save_and_probe != INVALID_REGNUM) -+ bitmap_clear_bit (components, frame.sve_save_and_probe); -+ if (frame.hard_fp_save_and_probe != INVALID_REGNUM) -+ bitmap_clear_bit (components, frame.hard_fp_save_and_probe); -+ } - - return components; - } -@@ -9616,8 +9656,8 @@ aarch64_epilogue_uses (int regno) - When probing is needed, we emit a probe at the start of the prologue - and every PARAM_STACK_CLASH_PROTECTION_GUARD_SIZE bytes thereafter. - -- We have to track how much space has been allocated and the only stores -- to the stack we track as implicit probes are the FP/LR stores. -+ We can also use register saves as probes. These are stored in -+ sve_save_and_probe and hard_fp_save_and_probe. - - For outgoing arguments we probe if the size is larger than 1KB, such that - the ABI specified buffer is maintained for the next callee. -diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h -index b61358370732..46d4693e2064 100644 ---- a/gcc/config/aarch64/aarch64.h -+++ b/gcc/config/aarch64/aarch64.h -@@ -957,6 +957,14 @@ struct GTY (()) aarch64_frame - This is the register they should use. */ - unsigned spare_pred_reg; - -+ /* An SVE register that is saved below the hard frame pointer and that acts -+ as a probe for later allocations, or INVALID_REGNUM if none. */ -+ unsigned sve_save_and_probe; -+ -+ /* A register that is saved at the hard frame pointer and that acts -+ as a probe for later allocations, or INVALID_REGNUM if none. */ -+ unsigned hard_fp_save_and_probe; -+ - bool laid_out; - - /* True if shadow call stack should be enabled for the current function. */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/stack_clash_3.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/stack_clash_3.c -index 3e01ec36c3a4..3530a0d504ba 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/pcs/stack_clash_3.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/stack_clash_3.c -@@ -11,11 +11,10 @@ - ** mov x11, sp - ** ... - ** sub sp, sp, x13 --** str p4, \sp\ - ** cbz w0, ^\n* -+** str p4, \sp\ - ** ... - ** ptrue p0\.b, all --** ldr p4, \sp\ - ** addvl sp, sp, #1 - ** ldr x24, \sp\, 32 - ** ret -@@ -39,13 +38,12 @@ test_1 (int n) - ** mov x11, sp - ** ... - ** sub sp, sp, x13 --** str p4, \sp\ - ** cbz w0, ^\n* -+** str p4, \sp\ - ** str p5, \sp, #1, mul vl\ - ** str p6, \sp, #2, mul vl\ - ** ... - ** ptrue p0\.b, all --** ldr p4, \sp\ - ** addvl sp, sp, #1 - ** ldr x24, \sp\, 32 - ** ret --- -2.43.5 -
View file
_service:tar_scm:0114-Backport-SME-aarch64-Avoid-redundancy-in-aarch64-cor.patch
Added
@@ -0,0 +1,273 @@ +From f6f28c50045f672a35f5b7344b556fc45dc0b3a1 Mon Sep 17 00:00:00 2001 +From: Richard Sandiford <richard.sandiford@arm.com> +Date: Thu, 29 Sep 2022 11:32:53 +0100 +Subject: PATCH 015/157 BackportSME aarch64: Avoid redundancy in + aarch64-cores.def + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=198bb6ed327c74eb2b0450bf978e4e6a64a6406c + +The flags fields of the aarch64-cores.def always start with +AARCH64_FL_FOR_<ARCH>. After previous changes, <ARCH> is always +identical to the previous field, so we can drop the explicit +AARCH64_FL_FOR_<ARCH> and derive it programmatically. + +This isn't a big saving in itself, but it helps with later patches. + +gcc/ + * config/aarch64/aarch64-cores.def: Remove AARCH64_FL_FOR_<ARCH> + from the flags field. + * common/config/aarch64/aarch64-common.cc (all_cores): Add it + here instead. + * config/aarch64/aarch64.cc (all_cores): Likewise. + * config/aarch64/driver-aarch64.cc (all_cores): Likewise. +--- + gcc/common/config/aarch64/aarch64-common.cc | 2 +- + gcc/config/aarch64/aarch64-cores.def | 130 ++++++++++---------- + gcc/config/aarch64/aarch64.cc | 2 +- + gcc/config/aarch64/driver-aarch64.cc | 2 +- + 4 files changed, 68 insertions(+), 68 deletions(-) + +diff --git a/gcc/common/config/aarch64/aarch64-common.cc b/gcc/common/config/aarch64/aarch64-common.cc +index 6ca89d31f..a965ac660 100644 +--- a/gcc/common/config/aarch64/aarch64-common.cc ++++ b/gcc/common/config/aarch64/aarch64-common.cc +@@ -251,7 +251,7 @@ struct arch_to_arch_name + static const struct processor_name_to_arch all_cores = + { + #define AARCH64_CORE(NAME, X, IDENT, ARCH_IDENT, FLAGS, COSTS, IMP, PART, VARIANT) \ +- {NAME, AARCH64_ARCH_##ARCH_IDENT, FLAGS}, ++ {NAME, AARCH64_ARCH_##ARCH_IDENT, AARCH64_FL_FOR_##ARCH_IDENT | FLAGS}, + #include "config/aarch64/aarch64-cores.def" + {"generic", AARCH64_ARCH_V8A, AARCH64_FL_FOR_V8A}, + {"", aarch64_no_arch, 0} +diff --git a/gcc/config/aarch64/aarch64-cores.def b/gcc/config/aarch64/aarch64-cores.def +index f4c2f4ea4..008b0b8c1 100644 +--- a/gcc/config/aarch64/aarch64-cores.def ++++ b/gcc/config/aarch64/aarch64-cores.def +@@ -46,132 +46,132 @@ + /* ARMv8-A Architecture Processors. */ + + /* ARM ('A') cores. */ +-AARCH64_CORE("cortex-a34", cortexa34, cortexa53, V8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC, cortexa35, 0x41, 0xd02, -1) +-AARCH64_CORE("cortex-a35", cortexa35, cortexa53, V8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC, cortexa35, 0x41, 0xd04, -1) +-AARCH64_CORE("cortex-a53", cortexa53, cortexa53, V8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC, cortexa53, 0x41, 0xd03, -1) +-AARCH64_CORE("cortex-a57", cortexa57, cortexa57, V8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC, cortexa57, 0x41, 0xd07, -1) +-AARCH64_CORE("cortex-a72", cortexa72, cortexa57, V8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC, cortexa72, 0x41, 0xd08, -1) +-AARCH64_CORE("cortex-a73", cortexa73, cortexa57, V8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC, cortexa73, 0x41, 0xd09, -1) ++AARCH64_CORE("cortex-a34", cortexa34, cortexa53, V8A, AARCH64_FL_CRC, cortexa35, 0x41, 0xd02, -1) ++AARCH64_CORE("cortex-a35", cortexa35, cortexa53, V8A, AARCH64_FL_CRC, cortexa35, 0x41, 0xd04, -1) ++AARCH64_CORE("cortex-a53", cortexa53, cortexa53, V8A, AARCH64_FL_CRC, cortexa53, 0x41, 0xd03, -1) ++AARCH64_CORE("cortex-a57", cortexa57, cortexa57, V8A, AARCH64_FL_CRC, cortexa57, 0x41, 0xd07, -1) ++AARCH64_CORE("cortex-a72", cortexa72, cortexa57, V8A, AARCH64_FL_CRC, cortexa72, 0x41, 0xd08, -1) ++AARCH64_CORE("cortex-a73", cortexa73, cortexa57, V8A, AARCH64_FL_CRC, cortexa73, 0x41, 0xd09, -1) + + /* Cavium ('C') cores. */ +-AARCH64_CORE("thunderx", thunderx, thunderx, V8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx, 0x43, 0x0a0, -1) ++AARCH64_CORE("thunderx", thunderx, thunderx, V8A, AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx, 0x43, 0x0a0, -1) + /* Do not swap around "thunderxt88p1" and "thunderxt88", + this order is required to handle variant correctly. */ +-AARCH64_CORE("thunderxt88p1", thunderxt88p1, thunderx, V8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderxt88, 0x43, 0x0a1, 0) +-AARCH64_CORE("thunderxt88", thunderxt88, thunderx, V8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderxt88, 0x43, 0x0a1, -1) ++AARCH64_CORE("thunderxt88p1", thunderxt88p1, thunderx, V8A, AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderxt88, 0x43, 0x0a1, 0) ++AARCH64_CORE("thunderxt88", thunderxt88, thunderx, V8A, AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderxt88, 0x43, 0x0a1, -1) + + /* OcteonTX is the official name for T81/T83. */ +-AARCH64_CORE("octeontx", octeontx, thunderx, V8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx, 0x43, 0x0a0, -1) +-AARCH64_CORE("octeontx81", octeontxt81, thunderx, V8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx, 0x43, 0x0a2, -1) +-AARCH64_CORE("octeontx83", octeontxt83, thunderx, V8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx, 0x43, 0x0a3, -1) ++AARCH64_CORE("octeontx", octeontx, thunderx, V8A, AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx, 0x43, 0x0a0, -1) ++AARCH64_CORE("octeontx81", octeontxt81, thunderx, V8A, AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx, 0x43, 0x0a2, -1) ++AARCH64_CORE("octeontx83", octeontxt83, thunderx, V8A, AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx, 0x43, 0x0a3, -1) + +-AARCH64_CORE("thunderxt81", thunderxt81, thunderx, V8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx, 0x43, 0x0a2, -1) +-AARCH64_CORE("thunderxt83", thunderxt83, thunderx, V8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx, 0x43, 0x0a3, -1) ++AARCH64_CORE("thunderxt81", thunderxt81, thunderx, V8A, AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx, 0x43, 0x0a2, -1) ++AARCH64_CORE("thunderxt83", thunderxt83, thunderx, V8A, AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx, 0x43, 0x0a3, -1) + + /* Ampere Computing ('\xC0') cores. */ +-AARCH64_CORE("ampere1", ampere1, cortexa57, V8_6A, AARCH64_FL_FOR_V8_6A, ampere1, 0xC0, 0xac3, -1) ++AARCH64_CORE("ampere1", ampere1, cortexa57, V8_6A, 0, ampere1, 0xC0, 0xac3, -1) + /* Do not swap around "emag" and "xgene1", + this order is required to handle variant correctly. */ +-AARCH64_CORE("emag", emag, xgene1, V8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, emag, 0x50, 0x000, 3) ++AARCH64_CORE("emag", emag, xgene1, V8A, AARCH64_FL_CRC | AARCH64_FL_CRYPTO, emag, 0x50, 0x000, 3) + + /* APM ('P') cores. */ +-AARCH64_CORE("xgene1", xgene1, xgene1, V8A, AARCH64_FL_FOR_V8A, xgene1, 0x50, 0x000, -1) ++AARCH64_CORE("xgene1", xgene1, xgene1, V8A, 0, xgene1, 0x50, 0x000, -1) + + /* Qualcomm ('Q') cores. */ +-AARCH64_CORE("falkor", falkor, falkor, V8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO | AARCH64_FL_RDMA, qdf24xx, 0x51, 0xC00, -1) +-AARCH64_CORE("qdf24xx", qdf24xx, falkor, V8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO | AARCH64_FL_RDMA, qdf24xx, 0x51, 0xC00, -1) ++AARCH64_CORE("falkor", falkor, falkor, V8A, AARCH64_FL_CRC | AARCH64_FL_CRYPTO | AARCH64_FL_RDMA, qdf24xx, 0x51, 0xC00, -1) ++AARCH64_CORE("qdf24xx", qdf24xx, falkor, V8A, AARCH64_FL_CRC | AARCH64_FL_CRYPTO | AARCH64_FL_RDMA, qdf24xx, 0x51, 0xC00, -1) + + /* Samsung ('S') cores. */ +-AARCH64_CORE("exynos-m1", exynosm1, exynosm1, V8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, exynosm1, 0x53, 0x001, -1) ++AARCH64_CORE("exynos-m1", exynosm1, exynosm1, V8A, AARCH64_FL_CRC | AARCH64_FL_CRYPTO, exynosm1, 0x53, 0x001, -1) + + /* HXT ('h') cores. */ +-AARCH64_CORE("phecda", phecda, falkor, V8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, qdf24xx, 0x68, 0x000, -1) ++AARCH64_CORE("phecda", phecda, falkor, V8A, AARCH64_FL_CRC | AARCH64_FL_CRYPTO, qdf24xx, 0x68, 0x000, -1) + + /* ARMv8.1-A Architecture Processors. */ + + /* Broadcom ('B') cores. */ +-AARCH64_CORE("thunderx2t99p1", thunderx2t99p1, thunderx2t99, V8_1A, AARCH64_FL_FOR_V8_1A | AARCH64_FL_CRYPTO, thunderx2t99, 0x42, 0x516, -1) +-AARCH64_CORE("vulcan", vulcan, thunderx2t99, V8_1A, AARCH64_FL_FOR_V8_1A | AARCH64_FL_CRYPTO, thunderx2t99, 0x42, 0x516, -1) ++AARCH64_CORE("thunderx2t99p1", thunderx2t99p1, thunderx2t99, V8_1A, AARCH64_FL_CRYPTO, thunderx2t99, 0x42, 0x516, -1) ++AARCH64_CORE("vulcan", vulcan, thunderx2t99, V8_1A, AARCH64_FL_CRYPTO, thunderx2t99, 0x42, 0x516, -1) + + /* Cavium ('C') cores. */ +-AARCH64_CORE("thunderx2t99", thunderx2t99, thunderx2t99, V8_1A, AARCH64_FL_FOR_V8_1A | AARCH64_FL_CRYPTO, thunderx2t99, 0x43, 0x0af, -1) ++AARCH64_CORE("thunderx2t99", thunderx2t99, thunderx2t99, V8_1A, AARCH64_FL_CRYPTO, thunderx2t99, 0x43, 0x0af, -1) + + /* ARMv8.2-A Architecture Processors. */ + + /* ARM ('A') cores. */ +-AARCH64_CORE("cortex-a55", cortexa55, cortexa53, V8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD, cortexa53, 0x41, 0xd05, -1) +-AARCH64_CORE("cortex-a75", cortexa75, cortexa57, V8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD, cortexa73, 0x41, 0xd0a, -1) +-AARCH64_CORE("cortex-a76", cortexa76, cortexa57, V8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD, neoversen1, 0x41, 0xd0b, -1) +-AARCH64_CORE("cortex-a76ae", cortexa76ae, cortexa57, V8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS, neoversen1, 0x41, 0xd0e, -1) +-AARCH64_CORE("cortex-a77", cortexa77, cortexa57, V8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS, neoversen1, 0x41, 0xd0d, -1) +-AARCH64_CORE("cortex-a78", cortexa78, cortexa57, V8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS | AARCH64_FL_PROFILE, neoversen1, 0x41, 0xd41, -1) +-AARCH64_CORE("cortex-a78ae", cortexa78ae, cortexa57, V8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS | AARCH64_FL_PROFILE, neoversen1, 0x41, 0xd42, -1) +-AARCH64_CORE("cortex-a78c", cortexa78c, cortexa57, V8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS | AARCH64_FL_PROFILE | AARCH64_FL_FLAGM | AARCH64_FL_PAUTH, neoversen1, 0x41, 0xd4b, -1) +-AARCH64_CORE("cortex-a65", cortexa65, cortexa53, V8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS, cortexa73, 0x41, 0xd06, -1) +-AARCH64_CORE("cortex-a65ae", cortexa65ae, cortexa53, V8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS, cortexa73, 0x41, 0xd43, -1) +-AARCH64_CORE("cortex-x1", cortexx1, cortexa57, V8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS | AARCH64_FL_PROFILE, neoversen1, 0x41, 0xd44, -1) +-AARCH64_CORE("ares", ares, cortexa57, V8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_PROFILE, neoversen1, 0x41, 0xd0c, -1) +-AARCH64_CORE("neoverse-n1", neoversen1, cortexa57, V8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_PROFILE, neoversen1, 0x41, 0xd0c, -1) +-AARCH64_CORE("neoverse-e1", neoversee1, cortexa53, V8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS, cortexa73, 0x41, 0xd4a, -1) ++AARCH64_CORE("cortex-a55", cortexa55, cortexa53, V8_2A, AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD, cortexa53, 0x41, 0xd05, -1) ++AARCH64_CORE("cortex-a75", cortexa75, cortexa57, V8_2A, AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD, cortexa73, 0x41, 0xd0a, -1) ++AARCH64_CORE("cortex-a76", cortexa76, cortexa57, V8_2A, AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD, neoversen1, 0x41, 0xd0b, -1) ++AARCH64_CORE("cortex-a76ae", cortexa76ae, cortexa57, V8_2A, AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS, neoversen1, 0x41, 0xd0e, -1) ++AARCH64_CORE("cortex-a77", cortexa77, cortexa57, V8_2A, AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS, neoversen1, 0x41, 0xd0d, -1) ++AARCH64_CORE("cortex-a78", cortexa78, cortexa57, V8_2A, AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS | AARCH64_FL_PROFILE, neoversen1, 0x41, 0xd41, -1) ++AARCH64_CORE("cortex-a78ae", cortexa78ae, cortexa57, V8_2A, AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS | AARCH64_FL_PROFILE, neoversen1, 0x41, 0xd42, -1) ++AARCH64_CORE("cortex-a78c", cortexa78c, cortexa57, V8_2A, AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS | AARCH64_FL_PROFILE | AARCH64_FL_FLAGM | AARCH64_FL_PAUTH, neoversen1, 0x41, 0xd4b, -1) ++AARCH64_CORE("cortex-a65", cortexa65, cortexa53, V8_2A, AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS, cortexa73, 0x41, 0xd06, -1) ++AARCH64_CORE("cortex-a65ae", cortexa65ae, cortexa53, V8_2A, AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS, cortexa73, 0x41, 0xd43, -1) ++AARCH64_CORE("cortex-x1", cortexx1, cortexa57, V8_2A, AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS | AARCH64_FL_PROFILE, neoversen1, 0x41, 0xd44, -1) ++AARCH64_CORE("ares", ares, cortexa57, V8_2A, AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_PROFILE, neoversen1, 0x41, 0xd0c, -1) ++AARCH64_CORE("neoverse-n1", neoversen1, cortexa57, V8_2A, AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_PROFILE, neoversen1, 0x41, 0xd0c, -1) ++AARCH64_CORE("neoverse-e1", neoversee1, cortexa53, V8_2A, AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS, cortexa73, 0x41, 0xd4a, -1) + + /* Cavium ('C') cores. */ +-AARCH64_CORE("octeontx2", octeontx2, cortexa57, V8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_CRYPTO | AARCH64_FL_PROFILE, cortexa57, 0x43, 0x0b0, -1) +-AARCH64_CORE("octeontx2t98", octeontx2t98, cortexa57, V8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_CRYPTO | AARCH64_FL_PROFILE, cortexa57, 0x43, 0x0b1, -1) +-AARCH64_CORE("octeontx2t96", octeontx2t96, cortexa57, V8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_CRYPTO | AARCH64_FL_PROFILE, cortexa57, 0x43, 0x0b2, -1) ++AARCH64_CORE("octeontx2", octeontx2, cortexa57, V8_2A, AARCH64_FL_CRYPTO | AARCH64_FL_PROFILE, cortexa57, 0x43, 0x0b0, -1) ++AARCH64_CORE("octeontx2t98", octeontx2t98, cortexa57, V8_2A, AARCH64_FL_CRYPTO | AARCH64_FL_PROFILE, cortexa57, 0x43, 0x0b1, -1) ++AARCH64_CORE("octeontx2t96", octeontx2t96, cortexa57, V8_2A, AARCH64_FL_CRYPTO | AARCH64_FL_PROFILE, cortexa57, 0x43, 0x0b2, -1) + /* Note OcteonTX2 T93 is an alias to OcteonTX2 T96. */ +-AARCH64_CORE("octeontx2t93", octeontx2t93, cortexa57, V8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_CRYPTO | AARCH64_FL_PROFILE, cortexa57, 0x43, 0x0b2, -1) +-AARCH64_CORE("octeontx2f95", octeontx2f95, cortexa57, V8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_CRYPTO | AARCH64_FL_PROFILE, cortexa57, 0x43, 0x0b3, -1) +-AARCH64_CORE("octeontx2f95n", octeontx2f95n, cortexa57, V8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_CRYPTO | AARCH64_FL_PROFILE, cortexa57, 0x43, 0x0b4, -1) +-AARCH64_CORE("octeontx2f95mm", octeontx2f95mm, cortexa57, V8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_CRYPTO | AARCH64_FL_PROFILE, cortexa57, 0x43, 0x0b5, -1) ++AARCH64_CORE("octeontx2t93", octeontx2t93, cortexa57, V8_2A, AARCH64_FL_CRYPTO | AARCH64_FL_PROFILE, cortexa57, 0x43, 0x0b2, -1) ++AARCH64_CORE("octeontx2f95", octeontx2f95, cortexa57, V8_2A, AARCH64_FL_CRYPTO | AARCH64_FL_PROFILE, cortexa57, 0x43, 0x0b3, -1) ++AARCH64_CORE("octeontx2f95n", octeontx2f95n, cortexa57, V8_2A, AARCH64_FL_CRYPTO | AARCH64_FL_PROFILE, cortexa57, 0x43, 0x0b4, -1) ++AARCH64_CORE("octeontx2f95mm", octeontx2f95mm, cortexa57, V8_2A, AARCH64_FL_CRYPTO | AARCH64_FL_PROFILE, cortexa57, 0x43, 0x0b5, -1) + + /* Fujitsu ('F') cores. */ +-AARCH64_CORE("a64fx", a64fx, a64fx, V8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_SVE, a64fx, 0x46, 0x001, -1) ++AARCH64_CORE("a64fx", a64fx, a64fx, V8_2A, AARCH64_FL_F16 | AARCH64_FL_SVE, a64fx, 0x46, 0x001, -1) + + /* HiSilicon ('H') cores. */ +-AARCH64_CORE("tsv110", tsv110, tsv110, V8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_CRYPTO | AARCH64_FL_F16 | AARCH64_FL_AES | AARCH64_FL_SHA2, tsv110, 0x48, 0xd01, -1) ++AARCH64_CORE("tsv110", tsv110, tsv110, V8_2A, AARCH64_FL_CRYPTO | AARCH64_FL_F16 | AARCH64_FL_AES | AARCH64_FL_SHA2, tsv110, 0x48, 0xd01, -1) + + /* ARMv8.3-A Architecture Processors. */ + + /* Marvell cores (TX3). */ +-AARCH64_CORE("thunderx3t110", thunderx3t110, thunderx3t110, V8_3A, AARCH64_FL_FOR_V8_3A | AARCH64_FL_CRYPTO | AARCH64_FL_RCPC | AARCH64_FL_SM4 | AARCH64_FL_SHA3 | AARCH64_FL_F16FML | AARCH64_FL_RCPC8_4, thunderx3t110, 0x43, 0x0b8, 0x0a) ++AARCH64_CORE("thunderx3t110", thunderx3t110, thunderx3t110, V8_3A, AARCH64_FL_CRYPTO | AARCH64_FL_RCPC | AARCH64_FL_SM4 | AARCH64_FL_SHA3 | AARCH64_FL_F16FML | AARCH64_FL_RCPC8_4, thunderx3t110, 0x43, 0x0b8, 0x0a) + + /* ARMv8.4-A Architecture Processors. */ + + /* Arm ('A') cores. */ +-AARCH64_CORE("zeus", zeus, cortexa57, V8_4A, AARCH64_FL_FOR_V8_4A | AARCH64_FL_SVE | AARCH64_FL_RCPC | AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_F16 | AARCH64_FL_PROFILE | AARCH64_FL_SSBS | AARCH64_FL_RNG, neoversev1, 0x41, 0xd40, -1) +-AARCH64_CORE("neoverse-v1", neoversev1, cortexa57, V8_4A, AARCH64_FL_FOR_V8_4A | AARCH64_FL_SVE | AARCH64_FL_RCPC | AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_F16 | AARCH64_FL_PROFILE | AARCH64_FL_SSBS | AARCH64_FL_RNG, neoversev1, 0x41, 0xd40, -1) +-AARCH64_CORE("neoverse-512tvb", neoverse512tvb, cortexa57, V8_4A, AARCH64_FL_FOR_V8_4A | AARCH64_FL_SVE | AARCH64_FL_RCPC | AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_F16 | AARCH64_FL_PROFILE | AARCH64_FL_SSBS | AARCH64_FL_RNG, neoverse512tvb, INVALID_IMP, INVALID_CORE, -1) ++AARCH64_CORE("zeus", zeus, cortexa57, V8_4A, AARCH64_FL_SVE | AARCH64_FL_RCPC | AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_F16 | AARCH64_FL_PROFILE | AARCH64_FL_SSBS | AARCH64_FL_RNG, neoversev1, 0x41, 0xd40, -1) ++AARCH64_CORE("neoverse-v1", neoversev1, cortexa57, V8_4A, AARCH64_FL_SVE | AARCH64_FL_RCPC | AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_F16 | AARCH64_FL_PROFILE | AARCH64_FL_SSBS | AARCH64_FL_RNG, neoversev1, 0x41, 0xd40, -1) ++AARCH64_CORE("neoverse-512tvb", neoverse512tvb, cortexa57, V8_4A, AARCH64_FL_SVE | AARCH64_FL_RCPC | AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_F16 | AARCH64_FL_PROFILE | AARCH64_FL_SSBS | AARCH64_FL_RNG, neoverse512tvb, INVALID_IMP, INVALID_CORE, -1) + + /* Qualcomm ('Q') cores. */ +-AARCH64_CORE("saphira", saphira, saphira, V8_4A, AARCH64_FL_FOR_V8_4A | AARCH64_FL_CRYPTO | AARCH64_FL_RCPC, saphira, 0x51, 0xC01, -1) ++AARCH64_CORE("saphira", saphira, saphira, V8_4A, AARCH64_FL_CRYPTO | AARCH64_FL_RCPC, saphira, 0x51, 0xC01, -1) + + /* ARMv8-A big.LITTLE implementations. */ + +-AARCH64_CORE("cortex-a57.cortex-a53", cortexa57cortexa53, cortexa53, V8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC, cortexa57, 0x41, AARCH64_BIG_LITTLE (0xd07, 0xd03), -1) +-AARCH64_CORE("cortex-a72.cortex-a53", cortexa72cortexa53, cortexa53, V8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC, cortexa72, 0x41, AARCH64_BIG_LITTLE (0xd08, 0xd03), -1) +-AARCH64_CORE("cortex-a73.cortex-a35", cortexa73cortexa35, cortexa53, V8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC, cortexa73, 0x41, AARCH64_BIG_LITTLE (0xd09, 0xd04), -1) +-AARCH64_CORE("cortex-a73.cortex-a53", cortexa73cortexa53, cortexa53, V8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC, cortexa73, 0x41, AARCH64_BIG_LITTLE (0xd09, 0xd03), -1) ++AARCH64_CORE("cortex-a57.cortex-a53", cortexa57cortexa53, cortexa53, V8A, AARCH64_FL_CRC, cortexa57, 0x41, AARCH64_BIG_LITTLE (0xd07, 0xd03), -1) ++AARCH64_CORE("cortex-a72.cortex-a53", cortexa72cortexa53, cortexa53, V8A, AARCH64_FL_CRC, cortexa72, 0x41, AARCH64_BIG_LITTLE (0xd08, 0xd03), -1) ++AARCH64_CORE("cortex-a73.cortex-a35", cortexa73cortexa35, cortexa53, V8A, AARCH64_FL_CRC, cortexa73, 0x41, AARCH64_BIG_LITTLE (0xd09, 0xd04), -1) ++AARCH64_CORE("cortex-a73.cortex-a53", cortexa73cortexa53, cortexa53, V8A, AARCH64_FL_CRC, cortexa73, 0x41, AARCH64_BIG_LITTLE (0xd09, 0xd03), -1) + + /* ARM DynamIQ big.LITTLE configurations. */ + +-AARCH64_CORE("cortex-a75.cortex-a55", cortexa75cortexa55, cortexa53, V8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD, cortexa73, 0x41, AARCH64_BIG_LITTLE (0xd0a, 0xd05), -1) +-AARCH64_CORE("cortex-a76.cortex-a55", cortexa76cortexa55, cortexa53, V8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD, neoversen1, 0x41, AARCH64_BIG_LITTLE (0xd0b, 0xd05), -1) ++AARCH64_CORE("cortex-a75.cortex-a55", cortexa75cortexa55, cortexa53, V8_2A, AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD, cortexa73, 0x41, AARCH64_BIG_LITTLE (0xd0a, 0xd05), -1) ++AARCH64_CORE("cortex-a76.cortex-a55", cortexa76cortexa55, cortexa53, V8_2A, AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD, neoversen1, 0x41, AARCH64_BIG_LITTLE (0xd0b, 0xd05), -1) + + /* Armv8-R Architecture Processors. */ +-AARCH64_CORE("cortex-r82", cortexr82, cortexa53, V8R, AARCH64_FL_FOR_V8R, cortexa53, 0x41, 0xd15, -1) ++AARCH64_CORE("cortex-r82", cortexr82, cortexa53, V8R, 0, cortexa53, 0x41, 0xd15, -1) + + /* Armv9.0-A Architecture Processors. */ + + /* Arm ('A') cores. */ +-AARCH64_CORE("cortex-a510", cortexa510, cortexa55, V9A, AARCH64_FL_FOR_V9A | AARCH64_FL_SVE2_BITPERM | AARCH64_FL_MEMTAG | AARCH64_FL_I8MM | AARCH64_FL_BF16, cortexa53, 0x41, 0xd46, -1) ++AARCH64_CORE("cortex-a510", cortexa510, cortexa55, V9A, AARCH64_FL_SVE2_BITPERM | AARCH64_FL_MEMTAG | AARCH64_FL_I8MM | AARCH64_FL_BF16, cortexa53, 0x41, 0xd46, -1) + +-AARCH64_CORE("cortex-a710", cortexa710, cortexa57, V9A, AARCH64_FL_FOR_V9A | AARCH64_FL_SVE2_BITPERM | AARCH64_FL_MEMTAG | AARCH64_FL_I8MM | AARCH64_FL_BF16, neoversen2, 0x41, 0xd47, -1) ++AARCH64_CORE("cortex-a710", cortexa710, cortexa57, V9A, AARCH64_FL_SVE2_BITPERM | AARCH64_FL_MEMTAG | AARCH64_FL_I8MM | AARCH64_FL_BF16, neoversen2, 0x41, 0xd47, -1) + +-AARCH64_CORE("cortex-x2", cortexx2, cortexa57, V9A, AARCH64_FL_FOR_V9A | AARCH64_FL_SVE2_BITPERM | AARCH64_FL_MEMTAG | AARCH64_FL_I8MM | AARCH64_FL_BF16, neoversen2, 0x41, 0xd48, -1) ++AARCH64_CORE("cortex-x2", cortexx2, cortexa57, V9A, AARCH64_FL_SVE2_BITPERM | AARCH64_FL_MEMTAG | AARCH64_FL_I8MM | AARCH64_FL_BF16, neoversen2, 0x41, 0xd48, -1) + +-AARCH64_CORE("neoverse-n2", neoversen2, cortexa57, V9A, AARCH64_FL_FOR_V9A | AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_SVE2_BITPERM | AARCH64_FL_RNG | AARCH64_FL_MEMTAG | AARCH64_FL_PROFILE, neoversen2, 0x41, 0xd49, -1) ++AARCH64_CORE("neoverse-n2", neoversen2, cortexa57, V9A, AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_SVE2_BITPERM | AARCH64_FL_RNG | AARCH64_FL_MEMTAG | AARCH64_FL_PROFILE, neoversen2, 0x41, 0xd49, -1) + +-AARCH64_CORE("demeter", demeter, cortexa57, V9A, AARCH64_FL_FOR_V9A | AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_SVE2_BITPERM | AARCH64_FL_RNG | AARCH64_FL_MEMTAG | AARCH64_FL_PROFILE, neoversev2, 0x41, 0xd4f, -1) +-AARCH64_CORE("neoverse-v2", neoversev2, cortexa57, V9A, AARCH64_FL_FOR_V9A | AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_SVE2_BITPERM | AARCH64_FL_RNG | AARCH64_FL_MEMTAG | AARCH64_FL_PROFILE, neoversev2, 0x41, 0xd4f, -1) ++AARCH64_CORE("demeter", demeter, cortexa57, V9A, AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_SVE2_BITPERM | AARCH64_FL_RNG | AARCH64_FL_MEMTAG | AARCH64_FL_PROFILE, neoversev2, 0x41, 0xd4f, -1) ++AARCH64_CORE("neoverse-v2", neoversev2, cortexa57, V9A, AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_SVE2_BITPERM | AARCH64_FL_RNG | AARCH64_FL_MEMTAG | AARCH64_FL_PROFILE, neoversev2, 0x41, 0xd4f, -1) + + #undef AARCH64_CORE +diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc +index 22b51e12f..f975aad07 100644 +--- a/gcc/config/aarch64/aarch64.cc ++++ b/gcc/config/aarch64/aarch64.cc +@@ -2946,7 +2946,7 @@ static const struct processor all_cores = + { + #define AARCH64_CORE(NAME, IDENT, SCHED, ARCH, FLAGS, COSTS, IMP, PART, VARIANT) \ + {NAME, IDENT, SCHED, AARCH64_ARCH_##ARCH, \ +- FLAGS, &COSTS##_tunings}, ++ AARCH64_FL_FOR_##ARCH | FLAGS, &COSTS##_tunings}, + #include "aarch64-cores.def" + {"generic", generic, cortexa53, AARCH64_ARCH_V8A, + AARCH64_FL_FOR_V8A, &generic_tunings}, +diff --git a/gcc/config/aarch64/driver-aarch64.cc b/gcc/config/aarch64/driver-aarch64.cc +index 644780ef2..97690de62 100644 +--- a/gcc/config/aarch64/driver-aarch64.cc ++++ b/gcc/config/aarch64/driver-aarch64.cc +@@ -62,7 +62,7 @@ struct aarch64_core_data + #define DEFAULT_ARCH "8A" + + #define AARCH64_CORE(CORE_NAME, CORE_IDENT, SCHED, ARCH, FLAGS, COSTS, IMP, PART, VARIANT) \ +- { CORE_NAME, #ARCH, IMP, PART, VARIANT, FLAGS }, ++ { CORE_NAME, #ARCH, IMP, PART, VARIANT, AARCH64_FL_FOR_##ARCH | FLAGS }, + + static struct aarch64_core_data aarch64_cpu_data = + { +-- +2.33.0 +
View file
_service:tar_scm:0114-LoongArch-Remove-constraint-z-from-movsi_internal.patch
Added
@@ -0,0 +1,43 @@ +From 167a3f34b308d3d56e816559701c3fb1c4f88c7b Mon Sep 17 00:00:00 2001 +From: Xi Ruoyao <xry111@xry111.site> +Date: Fri, 25 Oct 2024 03:30:35 +0000 +Subject: PATCH 114/188 LoongArch: Remove constraint z from movsi_internal + +We don't allow SImode in FCC, so constraint z is never really used +here. + +gcc/ChangeLog: + + * config/loongarch/loongarch.md (movsi_internal): Remove + constraint z. +--- + gcc/config/loongarch/loongarch.md | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md +index 4c7e28ace..23d8dc126 100644 +--- a/gcc/config/loongarch/loongarch.md ++++ b/gcc/config/loongarch/loongarch.md +@@ -2197,8 +2197,8 @@ + }) + + (define_insn_and_split "*movsi_internal" +- (set (match_operand:SI 0 "nonimmediate_operand" "=r,r,r,w,*f,*f,*r,*m,*r,*z") +- (match_operand:SI 1 "move_operand" "r,Yd,w,rJ,*r*J,*m,*f,*f,*z,*r")) ++ (set (match_operand:SI 0 "nonimmediate_operand" "=r,r,r,w,*f,f,*r,*m") ++ (match_operand:SI 1 "move_operand" "r,Yd,w,rJ,*r*J,m,*f,*f")) + "(register_operand (operands0, SImode) + || reg_or_0_operand (operands1, SImode))" + { return loongarch_output_move (operands0, operands1); } +@@ -2211,7 +2211,7 @@ + DONE; + } + " +- (set_attr "move_type" "move,const,load,store,mgtf,fpload,mftg,fpstore,mftg,mgtf") ++ (set_attr "move_type" "move,const,load,store,mgtf,fpload,mftg,fpstore") + (set_attr "mode" "SI")) + + ;; 16-bit Integer moves +-- +2.43.0 +
View file
_service:tar_scm:0114-aarch64-Remove-below-hard-fp-saved-regs-size.patch
Deleted
@@ -1,157 +0,0 @@ -From 8254e1b9cd500e0c278465a3657543477e9d1250 Mon Sep 17 00:00:00 2001 -From: Richard Sandiford <richard.sandiford@arm.com> -Date: Tue, 12 Sep 2023 16:08:56 +0100 -Subject: PATCH aarch64: Remove below_hard_fp_saved_regs_size - -After previous patches, it's no longer necessary to store -saved_regs_size and below_hard_fp_saved_regs_size in the frame info. -All measurements instead use the top or bottom of the frame as -reference points. - -gcc/ - * config/aarch64/aarch64.h (aarch64_frame::saved_regs_size) - (aarch64_frame::below_hard_fp_saved_regs_size): Delete. - * config/aarch64/aarch64.cc (aarch64_layout_frame): Update accordingly. ---- - gcc/config/aarch64/aarch64.cc | 45 ++++++++++++++++------------------- - gcc/config/aarch64/aarch64.h | 7 ------ - 2 files changed, 21 insertions(+), 31 deletions(-) - -diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc -index a8d907df8843..ac3d3b336a37 100644 ---- a/gcc/config/aarch64/aarch64.cc -+++ b/gcc/config/aarch64/aarch64.cc -@@ -8269,9 +8269,8 @@ aarch64_layout_frame (void) - - /* OFFSET is now the offset of the hard frame pointer from the bottom - of the callee save area. */ -- frame.below_hard_fp_saved_regs_size = offset - frame.bytes_below_saved_regs; -- bool saves_below_hard_fp_p -- = maybe_ne (frame.below_hard_fp_saved_regs_size, 0); -+ auto below_hard_fp_saved_regs_size = offset - frame.bytes_below_saved_regs; -+ bool saves_below_hard_fp_p = maybe_ne (below_hard_fp_saved_regs_size, 0); - gcc_assert (!saves_below_hard_fp_p - || (frame.sve_save_and_probe != INVALID_REGNUM - && known_eq (frame.reg_offsetframe.sve_save_and_probe, -@@ -8341,9 +8340,8 @@ aarch64_layout_frame (void) - - offset = aligned_upper_bound (offset, STACK_BOUNDARY / BITS_PER_UNIT); - -- frame.saved_regs_size = offset - frame.bytes_below_saved_regs; -- gcc_assert (known_eq (frame.saved_regs_size, -- frame.below_hard_fp_saved_regs_size) -+ auto saved_regs_size = offset - frame.bytes_below_saved_regs; -+ gcc_assert (known_eq (saved_regs_size, below_hard_fp_saved_regs_size) - || (frame.hard_fp_save_and_probe != INVALID_REGNUM - && known_eq (frame.reg_offsetframe.hard_fp_save_and_probe, - frame.bytes_below_hard_fp))); -@@ -8352,7 +8350,7 @@ aarch64_layout_frame (void) - The saving of the bottommost register counts as an implicit probe, - which allows us to maintain the invariant described in the comment - at expand_prologue. */ -- gcc_assert (crtl->is_leaf || maybe_ne (frame.saved_regs_size, 0)); -+ gcc_assert (crtl->is_leaf || maybe_ne (saved_regs_size, 0)); - - offset += get_frame_size (); - offset = aligned_upper_bound (offset, STACK_BOUNDARY / BITS_PER_UNIT); -@@ -8409,7 +8407,7 @@ aarch64_layout_frame (void) - - HOST_WIDE_INT const_size, const_below_saved_regs, const_above_fp; - HOST_WIDE_INT const_saved_regs_size; -- if (known_eq (frame.saved_regs_size, 0)) -+ if (known_eq (saved_regs_size, 0)) - frame.initial_adjust = frame.frame_size; - else if (frame.frame_size.is_constant (&const_size) - && const_size < max_push_offset -@@ -8422,7 +8420,7 @@ aarch64_layout_frame (void) - frame.callee_adjust = const_size; - } - else if (frame.bytes_below_saved_regs.is_constant (&const_below_saved_regs) -- && frame.saved_regs_size.is_constant (&const_saved_regs_size) -+ && saved_regs_size.is_constant (&const_saved_regs_size) - && const_below_saved_regs + const_saved_regs_size < 512 - /* We could handle this case even with data below the saved - registers, provided that that data left us with valid offsets -@@ -8441,8 +8439,7 @@ aarch64_layout_frame (void) - frame.initial_adjust = frame.frame_size; - } - else if (saves_below_hard_fp_p -- && known_eq (frame.saved_regs_size, -- frame.below_hard_fp_saved_regs_size)) -+ && known_eq (saved_regs_size, below_hard_fp_saved_regs_size)) - { - /* Frame in which all saves are SVE saves: - -@@ -8464,7 +8461,7 @@ aarch64_layout_frame (void) - save SVE registers relative to SP - sub sp, sp, bytes_below_saved_regs */ - frame.callee_adjust = const_above_fp; -- frame.sve_callee_adjust = frame.below_hard_fp_saved_regs_size; -+ frame.sve_callee_adjust = below_hard_fp_saved_regs_size; - frame.final_adjust = frame.bytes_below_saved_regs; - } - else -@@ -8479,7 +8476,7 @@ aarch64_layout_frame (void) - save SVE registers relative to SP - sub sp, sp, bytes_below_saved_regs */ - frame.initial_adjust = frame.bytes_above_hard_fp; -- frame.sve_callee_adjust = frame.below_hard_fp_saved_regs_size; -+ frame.sve_callee_adjust = below_hard_fp_saved_regs_size; - frame.final_adjust = frame.bytes_below_saved_regs; - } - -@@ -9621,17 +9618,17 @@ aarch64_epilogue_uses (int regno) - | local variables | <-- frame_pointer_rtx - | | - +-------------------------------+ -- | padding | \ -- +-------------------------------+ | -- | callee-saved registers | | frame.saved_regs_size -- +-------------------------------+ | -- | LR' | | -- +-------------------------------+ | -- | FP' | | -- +-------------------------------+ |<- hard_frame_pointer_rtx (aligned) -- | SVE vector registers | | \ -- +-------------------------------+ | | below_hard_fp_saved_regs_size -- | SVE predicate registers | / / -+ | padding | -+ +-------------------------------+ -+ | callee-saved registers | -+ +-------------------------------+ -+ | LR' | -+ +-------------------------------+ -+ | FP' | -+ +-------------------------------+ <-- hard_frame_pointer_rtx (aligned) -+ | SVE vector registers | -+ +-------------------------------+ -+ | SVE predicate registers | - +-------------------------------+ - | dynamic allocation | - +-------------------------------+ -diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h -index 46d4693e2064..01f7751bc783 100644 ---- a/gcc/config/aarch64/aarch64.h -+++ b/gcc/config/aarch64/aarch64.h -@@ -871,18 +871,11 @@ struct GTY (()) aarch64_frame - STACK_BOUNDARY. */ - HOST_WIDE_INT saved_varargs_size; - -- /* The size of the callee-save registers with a slot in REG_OFFSET. */ -- poly_int64 saved_regs_size; -- - /* The number of bytes between the bottom of the static frame (the bottom - of the outgoing arguments) and the bottom of the register save area. - This value is always a multiple of STACK_BOUNDARY. */ - poly_int64 bytes_below_saved_regs; - -- /* The size of the callee-save registers with a slot in REG_OFFSET that -- are saved below the hard frame pointer. */ -- poly_int64 below_hard_fp_saved_regs_size; -- - /* The number of bytes between the bottom of the static frame (the bottom - of the outgoing arguments) and the hard frame pointer. This value is - always a multiple of STACK_BOUNDARY. */ --- -2.43.5 -
View file
_service:tar_scm:0115-Backport-SME-aarch64-Remove-AARCH64_FL_RCPC8_4-PR107.patch
Added
@@ -0,0 +1,83 @@ +From f6137d5be2761caea75dcc1c98d941ceec161456 Mon Sep 17 00:00:00 2001 +From: Richard Sandiford <richard.sandiford@arm.com> +Date: Thu, 29 Sep 2022 11:32:53 +0100 +Subject: PATCH 016/157 BackportSME aarch64: Remove AARCH64_FL_RCPC8_4 + PR107025 + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=0f244d848cffeda68f0eb4c5bb9c7e629bf2e957 + +AARCH64_FL_RCPC8_4 is an odd-one-out in that it has no associated +entry in aarch64-option-extensions.def. This means that, although +it is internally separated from AARCH64_FL_V8_4A, there is no +mechanism for turning it on and off individually, independently +of armv8.4-a. + +The only place that the flag was used independently was in the +entry for thunderx3t110, which enabled it alongside V8_3A. +As noted in PR107025, this means that any use of the extension +will fail to assemble. + +In the PR trail, Andrew suggested removing the core entry. +That might be best long-term, but since the barrier for removing +command-line options without a deprecation period is very high, +this patch instead just drops the flag from the core entry. +We'll still produce correct code. + +gcc/ + PR target/107025 + * config/aarch64/aarch64.h (oAARCH64_FL_RCPC8_4): Delete. + (AARCH64_FL_FOR_V8_4A): Update accordingly. + (AARCH64_ISA_RCPC8_4): Use AARCH64_FL_V8_4A directly. + * config/aarch64/aarch64-cores.def (thunderx3t110): Remove + AARCH64_FL_RCPC8_4. +--- + gcc/config/aarch64/aarch64-cores.def | 2 +- + gcc/config/aarch64/aarch64.h | 5 ++--- + 2 files changed, 3 insertions(+), 4 deletions(-) + +diff --git a/gcc/config/aarch64/aarch64-cores.def b/gcc/config/aarch64/aarch64-cores.def +index 008b0b8c1..cf500d0a9 100644 +--- a/gcc/config/aarch64/aarch64-cores.def ++++ b/gcc/config/aarch64/aarch64-cores.def +@@ -133,7 +133,7 @@ AARCH64_CORE("tsv110", tsv110, tsv110, V8_2A, AARCH64_FL_CRYPTO | AARCH64_FL_F + /* ARMv8.3-A Architecture Processors. */ + + /* Marvell cores (TX3). */ +-AARCH64_CORE("thunderx3t110", thunderx3t110, thunderx3t110, V8_3A, AARCH64_FL_CRYPTO | AARCH64_FL_RCPC | AARCH64_FL_SM4 | AARCH64_FL_SHA3 | AARCH64_FL_F16FML | AARCH64_FL_RCPC8_4, thunderx3t110, 0x43, 0x0b8, 0x0a) ++AARCH64_CORE("thunderx3t110", thunderx3t110, thunderx3t110, V8_3A, AARCH64_FL_CRYPTO | AARCH64_FL_RCPC | AARCH64_FL_SM4 | AARCH64_FL_SHA3 | AARCH64_FL_F16FML, thunderx3t110, 0x43, 0x0b8, 0x0a) + + /* ARMv8.4-A Architecture Processors. */ + +diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h +index 918a14193..f4e0cd148 100644 +--- a/gcc/config/aarch64/aarch64.h ++++ b/gcc/config/aarch64/aarch64.h +@@ -173,7 +173,6 @@ + #define AARCH64_FL_SM4 (1 << 17) /* Has ARMv8.4-A SM3 and SM4. */ + #define AARCH64_FL_SHA3 (1 << 18) /* Has ARMv8.4-a SHA3 and SHA512. */ + #define AARCH64_FL_F16FML (1 << 19) /* Has ARMv8.4-a FP16 extensions. */ +-#define AARCH64_FL_RCPC8_4 (1 << 20) /* Has ARMv8.4-a RCPC extensions. */ + + /* Statistical Profiling extensions. */ + #define AARCH64_FL_PROFILE (1 << 21) +@@ -265,7 +264,7 @@ + (AARCH64_FL_FOR_V8_2A | AARCH64_FL_V8_3A | AARCH64_FL_PAUTH) + #define AARCH64_FL_FOR_V8_4A \ + (AARCH64_FL_FOR_V8_3A | AARCH64_FL_V8_4A | AARCH64_FL_F16FML \ +- | AARCH64_FL_DOTPROD | AARCH64_FL_RCPC8_4 | AARCH64_FL_FLAGM) ++ | AARCH64_FL_DOTPROD | AARCH64_FL_FLAGM) + #define AARCH64_FL_FOR_V8_5A \ + (AARCH64_FL_FOR_V8_4A | AARCH64_FL_V8_5A \ + | AARCH64_FL_SB | AARCH64_FL_SSBS | AARCH64_FL_PREDRES) +@@ -313,7 +312,7 @@ + #define AARCH64_ISA_SM4 (aarch64_isa_flags & AARCH64_FL_SM4) + #define AARCH64_ISA_SHA3 (aarch64_isa_flags & AARCH64_FL_SHA3) + #define AARCH64_ISA_F16FML (aarch64_isa_flags & AARCH64_FL_F16FML) +-#define AARCH64_ISA_RCPC8_4 (aarch64_isa_flags & AARCH64_FL_RCPC8_4) ++#define AARCH64_ISA_RCPC8_4 (aarch64_isa_flags & AARCH64_FL_V8_4A) + #define AARCH64_ISA_RNG (aarch64_isa_flags & AARCH64_FL_RNG) + #define AARCH64_ISA_V8_5A (aarch64_isa_flags & AARCH64_FL_V8_5A) + #define AARCH64_ISA_TME (aarch64_isa_flags & AARCH64_FL_TME) +-- +2.33.0 +
View file
_service:tar_scm:0115-LoongArch-doc-Add-attribute-descriptions-defined-in-.patch
Added
@@ -0,0 +1,47 @@ +From 0929961b9dd57e0dd18e4cccc6ba760706e74f77 Mon Sep 17 00:00:00 2001 +From: chenxiaolong <chenxiaolong@loongson.cn> +Date: Wed, 17 Jan 2024 09:24:06 +0800 +Subject: PATCH 115/188 LoongArch: doc: Add attribute descriptions defined in + the target-supports.exp. + +gcc/ChangeLog: + + * doc/sourcebuild.texi: Add attributes for keywords. +--- + gcc/doc/sourcebuild.texi | 20 ++++++++++++++++++++ + 1 file changed, 20 insertions(+) + +diff --git a/gcc/doc/sourcebuild.texi b/gcc/doc/sourcebuild.texi +index 71c04841d..a1ab0a1cb 100644 +--- a/gcc/doc/sourcebuild.texi ++++ b/gcc/doc/sourcebuild.texi +@@ -2292,6 +2292,26 @@ AArch64 target that is able to generate and execute armv8.3-a FJCVTZS + instruction. + @end table + ++@subsubsection LoongArch specific attributes ++ ++@table @code ++@item loongarch_sx ++LoongArch target that generates instructions for SX. ++ ++@item loongarch_asx ++LoongArch target that generates instructions for ASX. ++ ++@item loongarch_sx_hw ++LoongArch target that is able to generate and execute SX code. ++ ++@item loongarch_asx_hw ++LoongArch target that is able to generate and execute ASX code. ++ ++@item loongarch_call36_support ++LoongArch binutils supports call36 relocation. ++ ++@end table ++ + @subsubsection MIPS-specific attributes + + @table @code +-- +2.43.0 +
View file
_service:tar_scm:0115-aarch64-Make-stack-smash-canary-protect-saved-registers.patch
Deleted
@@ -1,298 +0,0 @@ -From 75c37e031408262263442f5b4cdb83d3777b6422 Mon Sep 17 00:00:00 2001 -From: Richard Sandiford <richard.sandiford@arm.com> -Date: Tue, 12 Sep 2023 16:08:57 +0100 -Subject: PATCH aarch64: Make stack smash canary protect saved registers - -AArch64 normally puts the saved registers near the bottom of the frame, -immediately above any dynamic allocations. But this means that a -stack-smash attack on those dynamic allocations could overwrite the -saved registers without needing to reach as far as the stack smash -canary. - -The same thing could also happen for variable-sized arguments that are -passed by value, since those are allocated before a call and popped on -return. - -This patch avoids that by putting the locals (and thus the canary) below -the saved registers when stack smash protection is active. - -The patch fixes CVE-2023-4039. - -gcc/ - * config/aarch64/aarch64.cc (aarch64_save_regs_above_locals_p): - New function. - (aarch64_layout_frame): Use it to decide whether locals should - go above or below the saved registers. - (aarch64_expand_prologue): Update stack layout comment. - Emit a stack tie after the final adjustment. - -gcc/testsuite/ - * gcc.target/aarch64/stack-protector-8.c: New test. - * gcc.target/aarch64/stack-protector-9.c: Likewise. ---- - gcc/config/aarch64/aarch64.cc | 46 +++++++-- - .../gcc.target/aarch64/stack-protector-8.c | 95 +++++++++++++++++++ - .../gcc.target/aarch64/stack-protector-9.c | 33 +++++++ - 3 files changed, 168 insertions(+), 6 deletions(-) - create mode 100644 gcc/testsuite/gcc.target/aarch64/stack-protector-8.c - create mode 100644 gcc/testsuite/gcc.target/aarch64/stack-protector-9.c - -diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc -index ac3d3b336a37..96c3f48fdc49 100644 ---- a/gcc/config/aarch64/aarch64.cc -+++ b/gcc/config/aarch64/aarch64.cc -@@ -8133,6 +8133,20 @@ aarch64_needs_frame_chain (void) - return aarch64_use_frame_pointer; - } - -+/* Return true if the current function should save registers above -+ the locals area, rather than below it. */ -+ -+static bool -+aarch64_save_regs_above_locals_p () -+{ -+ /* When using stack smash protection, make sure that the canary slot -+ comes between the locals and the saved registers. Otherwise, -+ it would be possible for a carefully sized smash attack to change -+ the saved registers (particularly LR and FP) without reaching the -+ canary. */ -+ return crtl->stack_protect_guard; -+} -+ - /* Mark the registers that need to be saved by the callee and calculate - the size of the callee-saved registers area and frame record (both FP - and LR may be omitted). */ -@@ -8144,6 +8158,7 @@ aarch64_layout_frame (void) - poly_int64 vector_save_size = GET_MODE_SIZE (vector_save_mode); - bool frame_related_fp_reg_p = false; - aarch64_frame &frame = cfun->machine->frame; -+ poly_int64 top_of_locals = -1; - - frame.emit_frame_chain = aarch64_needs_frame_chain (); - -@@ -8210,9 +8225,16 @@ aarch64_layout_frame (void) - && !crtl->abi->clobbers_full_reg_p (regno)) - frame.reg_offsetregno = SLOT_REQUIRED; - -+ bool regs_at_top_p = aarch64_save_regs_above_locals_p (); - - poly_int64 offset = crtl->outgoing_args_size; - gcc_assert (multiple_p (offset, STACK_BOUNDARY / BITS_PER_UNIT)); -+ if (regs_at_top_p) -+ { -+ offset += get_frame_size (); -+ offset = aligned_upper_bound (offset, STACK_BOUNDARY / BITS_PER_UNIT); -+ top_of_locals = offset; -+ } - frame.bytes_below_saved_regs = offset; - frame.sve_save_and_probe = INVALID_REGNUM; - -@@ -8352,15 +8374,18 @@ aarch64_layout_frame (void) - at expand_prologue. */ - gcc_assert (crtl->is_leaf || maybe_ne (saved_regs_size, 0)); - -- offset += get_frame_size (); -- offset = aligned_upper_bound (offset, STACK_BOUNDARY / BITS_PER_UNIT); -- auto top_of_locals = offset; -- -+ if (!regs_at_top_p) -+ { -+ offset += get_frame_size (); -+ offset = aligned_upper_bound (offset, STACK_BOUNDARY / BITS_PER_UNIT); -+ top_of_locals = offset; -+ } - offset += frame.saved_varargs_size; - gcc_assert (multiple_p (offset, STACK_BOUNDARY / BITS_PER_UNIT)); - frame.frame_size = offset; - - frame.bytes_above_hard_fp = frame.frame_size - frame.bytes_below_hard_fp; -+ gcc_assert (known_ge (top_of_locals, 0)); - frame.bytes_above_locals = frame.frame_size - top_of_locals; - - frame.initial_adjust = 0; -@@ -9615,10 +9640,10 @@ aarch64_epilogue_uses (int regno) - | for register varargs | - | | - +-------------------------------+ -- | local variables | <-- frame_pointer_rtx -+ | local variables (1) | <-- frame_pointer_rtx - | | - +-------------------------------+ -- | padding | -+ | padding (1) | - +-------------------------------+ - | callee-saved registers | - +-------------------------------+ -@@ -9630,6 +9655,10 @@ aarch64_epilogue_uses (int regno) - +-------------------------------+ - | SVE predicate registers | - +-------------------------------+ -+ | local variables (2) | -+ +-------------------------------+ -+ | padding (2) | -+ +-------------------------------+ - | dynamic allocation | - +-------------------------------+ - | padding | -@@ -9639,6 +9668,9 @@ aarch64_epilogue_uses (int regno) - +-------------------------------+ - | | <-- stack_pointer_rtx (aligned) - -+ The regions marked (1) and (2) are mutually exclusive. (2) is used -+ when aarch64_save_regs_above_locals_p is true. -+ - Dynamic stack allocations via alloca() decrease stack_pointer_rtx - but leave frame_pointer_rtx and hard_frame_pointer_rtx - unchanged. -@@ -9834,6 +9866,8 @@ aarch64_expand_prologue (void) - gcc_assert (known_eq (bytes_below_sp, final_adjust)); - aarch64_allocate_and_probe_stack_space (tmp1_rtx, tmp0_rtx, final_adjust, - !frame_pointer_needed, true); -+ if (emit_frame_chain && maybe_ne (final_adjust, 0)) -+ emit_insn (gen_stack_tie (stack_pointer_rtx, hard_frame_pointer_rtx)); - } - - /* Return TRUE if we can use a simple_return insn. -diff --git a/gcc/testsuite/gcc.target/aarch64/stack-protector-8.c b/gcc/testsuite/gcc.target/aarch64/stack-protector-8.c -new file mode 100644 -index 000000000000..e71d820e3654 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/stack-protector-8.c -@@ -0,0 +1,95 @@ -+/* { dg-options " -O -fstack-protector-strong -mstack-protector-guard=sysreg -mstack-protector-guard-reg=tpidr2_el0 -mstack-protector-guard-offset=16" } */ -+/* { dg-final { check-function-bodies "**" "" } } */ -+ -+void g(void *); -+__SVBool_t *h(void *); -+ -+/* -+** test1: -+** sub sp, sp, #288 -+** stp x29, x30, \sp, #?272\ -+** add x29, sp, #?272 -+** mrs (x0-9+), tpidr2_el0 -+** ldr (x0-9+), \\1, #?16\ -+** str \2, \sp, #?264\ -+** mov \2, #?0 -+** add x0, sp, #?8 -+** bl g -+** ... -+** mrs .* -+** ... -+** bne .* -+** ... -+** ldp x29, x30, \sp, #?272\ -+** add sp, sp, #?288 -+** ret -+** bl __stack_chk_fail -+*/ -+int test1() { -+ int y0x40; -+ g(y); -+ return 1; -+} -+ -+/* -+** test2: -+** stp x29, x30, \sp, #?-16\! -+** mov x29, sp -+** sub sp, sp, #1040 -+** mrs (x0-9+), tpidr2_el0 -+** ldr (x0-9+), \\1, #?16\ -+** str \2, \sp, #?1032\ -+** mov \2, #?0 -+** add x0, sp, #?8 -+** bl g -+** ... -+** mrs .* -+** ... -+** bne .* -+** ... -+** add sp, sp, #?1040 -+** ldp x29, x30, \sp\, #?16 -+** ret -+** bl __stack_chk_fail -+*/ -+int test2() { -+ int y0x100; -+ g(y); -+ return 1; -+} -+ -+#pragma GCC target "+sve" -+ -+/* -+** test3: -+** stp x29, x30, \sp, #?-16\! -+** mov x29, sp -+** addvl sp, sp, #-18 -+** ... -+** str p4, \sp\ -+** ... -+** sub sp, sp, #272 -+** mrs (x0-9+), tpidr2_el0 -+** ldr (x0-9+), \\1, #?16\ -+** str \2, \sp, #?264\ -+** mov \2, #?0 -+** add x0, sp, #?8 -+** bl h -+** ... -+** mrs .* -+** ... -+** bne .* -+** ... -+** add sp, sp, #?272 -+** ... -+** ldr p4, \sp\ -+** ... -+** addvl sp, sp, #18 -+** ldp x29, x30, \sp\, #?16 -+** ret -+** bl __stack_chk_fail -+*/ -+__SVBool_t test3() { -+ int y0x40; -+ return *h(y); -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/stack-protector-9.c b/gcc/testsuite/gcc.target/aarch64/stack-protector-9.c -new file mode 100644 -index 000000000000..58f322aa480a ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/stack-protector-9.c -@@ -0,0 +1,33 @@ -+/* { dg-options "-O2 -mcpu=neoverse-v1 -fstack-protector-all" } */ -+/* { dg-final { check-function-bodies "**" "" } } */ -+ -+/* -+** main: -+** ... -+** stp x29, x30, \sp, #?-0-9+\! -+** ... -+** sub sp, sp, #0-9+ -+** ... -+** str x0-9+, \x29, #?-8\ -+** ... -+*/ -+int f(const char *); -+void g(void *); -+int main(int argc, char* argv) -+{ -+ int a; -+ int b; -+ char c2+f(argv1); -+ int d0x100; -+ char y; -+ -+ y=42; a=4; b=10; -+ c0 = 'h'; c1 = '\0'; -+ -+ cf(argv2) = '\0'; -+ -+ __builtin_printf("%d %d\n%s\n", a, b, c); -+ g(d); -+ -+ return 0; -+} --- -2.43.5 -
View file
_service:tar_scm:0116-Backport-SME-aarch64-Fix-transitive-closure-of-featu.patch
Added
@@ -0,0 +1,154 @@ +From c6698a5feb07fc0cda89a54a0ee4006295ac6dbe Mon Sep 17 00:00:00 2001 +From: Richard Sandiford <richard.sandiford@arm.com> +Date: Thu, 29 Sep 2022 11:32:53 +0100 +Subject: PATCH 017/157 BackportSME aarch64: Fix transitive closure of + features + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=b754d32d3053a4ba2a82361ac0f2739797a811f1 + +aarch64-option-extensions.def requires us to maintain the transitive +closure of options by hand. This patch fixes a few cases where a +flag was missed. + ++noaes and +nosha2 now disable +crypto, which IMO makes more +sense and is consistent with the Clang behaviour. + +gcc/ + * config/aarch64/aarch64-option-extensions.def (dotprod): Depend + on fp as well as simd. + (sha3): Likewise. + (aes): Likewise. Make +noaes disable crypto. + (sha2): Likewise +nosha2. Also make +nosha2 disable sha3 and + sve2-sha3. + (sve2-sha3): Depend on sha2 as well as sha3. + +gcc/testsuite/ + * gcc.target/aarch64/options_set_6.c: Expect +crypto+nosha2 to + disable crypto but keep aes. + * gcc.target/aarch64/pragma_cpp_predefs_4.c: New test. +--- + .../aarch64/aarch64-option-extensions.def | 16 ++++--- + .../gcc.target/aarch64/options_set_6.c | 5 +- + .../gcc.target/aarch64/pragma_cpp_predefs_4.c | 47 +++++++++++++++++++ + 3 files changed, 58 insertions(+), 10 deletions(-) + create mode 100644 gcc/testsuite/gcc.target/aarch64/pragma_cpp_predefs_4.c + +diff --git a/gcc/config/aarch64/aarch64-option-extensions.def b/gcc/config/aarch64/aarch64-option-extensions.def +index b4d0ac8b6..b98008127 100644 +--- a/gcc/config/aarch64/aarch64-option-extensions.def ++++ b/gcc/config/aarch64/aarch64-option-extensions.def +@@ -113,28 +113,29 @@ AARCH64_OPT_EXTENSION("rdma", AARCH64_FL_RDMA, \ + + /* Enabling "dotprod" also enables "simd". + Disabling "dotprod" only disables "dotprod". */ +-AARCH64_OPT_EXTENSION("dotprod", AARCH64_FL_DOTPROD, AARCH64_FL_SIMD, 0, \ ++AARCH64_OPT_EXTENSION("dotprod", AARCH64_FL_DOTPROD, AARCH64_FL_FPSIMD, 0, \ + false, "asimddp") + + /* Enabling "aes" also enables "simd". + Disabling "aes" disables "aes" and "sve2-aes'. */ +-AARCH64_OPT_EXTENSION("aes", AARCH64_FL_AES, AARCH64_FL_SIMD, \ +- AARCH64_FL_SVE2_AES, false, "aes") ++AARCH64_OPT_EXTENSION("aes", AARCH64_FL_AES, AARCH64_FL_FPSIMD, \ ++ AARCH64_FL_SVE2_AES | AARCH64_FL_CRYPTO, false, "aes") + + /* Enabling "sha2" also enables "simd". + Disabling "sha2" just disables "sha2". */ +-AARCH64_OPT_EXTENSION("sha2", AARCH64_FL_SHA2, AARCH64_FL_SIMD, 0, false, \ +- "sha1 sha2") ++AARCH64_OPT_EXTENSION("sha2", AARCH64_FL_SHA2, AARCH64_FL_FPSIMD, \ ++ AARCH64_FL_CRYPTO | AARCH64_FL_SHA3 | \ ++ AARCH64_FL_SVE2_SHA3, false, "sha1 sha2") + + /* Enabling "sha3" enables "simd" and "sha2". + Disabling "sha3" disables "sha3" and "sve2-sha3". */ +-AARCH64_OPT_EXTENSION("sha3", AARCH64_FL_SHA3, AARCH64_FL_SIMD | \ ++AARCH64_OPT_EXTENSION("sha3", AARCH64_FL_SHA3, AARCH64_FL_FPSIMD | \ + AARCH64_FL_SHA2, AARCH64_FL_SVE2_SHA3, false, \ + "sha3 sha512") + + /* Enabling "sm4" also enables "simd". + Disabling "sm4" disables "sm4" and "sve2-sm4". */ +-AARCH64_OPT_EXTENSION("sm4", AARCH64_FL_SM4, AARCH64_FL_SIMD, \ ++AARCH64_OPT_EXTENSION("sm4", AARCH64_FL_SM4, AARCH64_FL_FPSIMD, \ + AARCH64_FL_SVE2_SM4, false, "sm3 sm4") + + /* Enabling "fp16fml" also enables "fp" and "fp16". +@@ -192,6 +193,7 @@ AARCH64_OPT_EXTENSION("sve2-aes", AARCH64_FL_SVE2_AES, AARCH64_FL_AES | \ + /* Enabling "sve2-sha3" also enables "sha3", "simd", "fp16", "fp", "sve", and + "sve2". Disabling "sve2-sha3" just disables "sve2-sha3". */ + AARCH64_OPT_EXTENSION("sve2-sha3", AARCH64_FL_SVE2_SHA3, AARCH64_FL_SHA3 | \ ++ AARCH64_FL_SHA2 | \ + AARCH64_FL_SIMD | AARCH64_FL_F16 | AARCH64_FL_FP | \ + AARCH64_FL_SVE | AARCH64_FL_SVE2, 0, false, "svesha3") + +diff --git a/gcc/testsuite/gcc.target/aarch64/options_set_6.c b/gcc/testsuite/gcc.target/aarch64/options_set_6.c +index 90a055928..2a1d7fe5b 100644 +--- a/gcc/testsuite/gcc.target/aarch64/options_set_6.c ++++ b/gcc/testsuite/gcc.target/aarch64/options_set_6.c +@@ -6,7 +6,6 @@ int main () + return 0; + } + +-/* { dg-final { scan-assembler-times {\.arch armv8\.2\-a\+crypto\+crc} 1 } } */ ++/* { dg-final { scan-assembler-times {\.arch armv8\.2\-a\+crc\+aes} 1 } } */ + +-/* Group as a whole was requested to be turned on, crypto itself is a bit and so +- just turning off one feature can't turn it off. */ ++/* +crypto turns on +aes and +sha2, but +nosha2 disables +crypto. */ +diff --git a/gcc/testsuite/gcc.target/aarch64/pragma_cpp_predefs_4.c b/gcc/testsuite/gcc.target/aarch64/pragma_cpp_predefs_4.c +new file mode 100644 +index 000000000..0e6461fa4 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/pragma_cpp_predefs_4.c +@@ -0,0 +1,47 @@ ++#pragma GCC target "+nothing+dotprod" ++#ifndef __ARM_FEATURE_FMA ++#error Foo ++#endif ++ ++#pragma GCC target "+nothing+aes" ++#ifndef __ARM_FEATURE_FMA ++#error Foo ++#endif ++ ++#pragma GCC target "+nothing+sha2" ++#ifndef __ARM_FEATURE_FMA ++#error Foo ++#endif ++ ++#pragma GCC target "+nothing+sha3" ++#ifndef __ARM_FEATURE_FMA ++#error Foo ++#endif ++ ++#pragma GCC target "+nothing+sm4" ++#ifndef __ARM_FEATURE_FMA ++#error Foo ++#endif ++ ++#pragma GCC target "+crypto+noaes" ++#ifdef __ARM_FEATURE_CRYPTO ++#error Foo ++#endif ++ ++#pragma GCC target "+crypto+nosha2" ++#ifdef __ARM_FEATURE_CRYPTO ++#error Foo ++#endif ++ ++#pragma GCC target "+nothing+sve2-sha3" ++#ifndef __ARM_FEATURE_SHA2 ++#error Foo ++#endif ++ ++#pragma GCC target "+sve2-sha3+nosha2" ++#ifdef __ARM_FEATURE_SHA3 ++#error Foo ++#endif ++#ifdef __ARM_FEATURE_SVE2_SHA3 ++#error Foo ++#endif +-- +2.33.0 +
View file
_service:tar_scm:0116-LoongArch-Disable-explicit-reloc-for-TLS-LD-GD-with-.patch
Added
@@ -0,0 +1,70 @@ +From c0b63b89a03c11bf6383f0175b60614d73295463 Mon Sep 17 00:00:00 2001 +From: Xi Ruoyao <xry111@xry111.site> +Date: Mon, 22 Jan 2024 18:07:42 +0800 +Subject: PATCH 116/188 LoongArch: Disable explicit reloc for TLS LD/GD with + -mexplicit-relocs=auto + +Binutils 2.42 supports TLS LD/GD relaxation which requires the assembler +macro. + +gcc/ChangeLog: + + * config/loongarch/loongarch.cc (loongarch_explicit_relocs_p): + If la_opt_explicit_relocs is EXPLICIT_RELOCS_AUTO, return false + for SYMBOL_TLS_LDM and SYMBOL_TLS_GD. + (loongarch_call_tls_get_addr): Do not split symbols of + SYMBOL_TLS_LDM or SYMBOL_TLS_GD if la_opt_explicit_relocs is + EXPLICIT_RELOCS_AUTO. + +gcc/testsuite/ChangeLog: + + * gcc.target/loongarch/explicit-relocs-auto-tls-ld-gd.c: Check + for la.tls.ld and la.tls.gd. +--- + gcc/config/loongarch/loongarch.cc | 10 +++++----- + .../loongarch/explicit-relocs-auto-tls-ld-gd.c | 3 ++- + 2 files changed, 7 insertions(+), 6 deletions(-) + +diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc +index 7da00c132..5f22b9dd8 100644 +--- a/gcc/config/loongarch/loongarch.cc ++++ b/gcc/config/loongarch/loongarch.cc +@@ -1967,11 +1967,11 @@ loongarch_explicit_relocs_p (enum loongarch_symbol_type type) + { + case SYMBOL_TLS_IE: + case SYMBOL_TLS_LE: +- case SYMBOL_TLSGD: +- case SYMBOL_TLSLDM: + case SYMBOL_PCREL64: +- /* The linker don't know how to relax TLS accesses or 64-bit +- pc-relative accesses. */ ++ /* TLS IE cannot be relaxed. TLS LE relaxation is different from ++ the normal R_LARCH_RELAX-based relaxation and it **requires** ++ using the explicit %le_{lo12,hi20,add}_r relocs. The linker ++ does not relax 64-bit pc-relative accesses as at now. */ + return true; + case SYMBOL_GOT_DISP: + /* The linker don't know how to relax GOT accesses in extreme +@@ -2785,7 +2785,7 @@ loongarch_call_tls_get_addr (rtx sym, enum loongarch_symbol_type type, rtx v0) + + start_sequence (); + +- if (la_opt_explicit_relocs != EXPLICIT_RELOCS_NONE) ++ if (la_opt_explicit_relocs == EXPLICIT_RELOCS_ALWAYS) + { + /* Split tls symbol to high and low. */ + rtx high = gen_rtx_HIGH (Pmode, copy_rtx (loc)); +diff --git a/gcc/testsuite/gcc.target/loongarch/explicit-relocs-auto-tls-ld-gd.c b/gcc/testsuite/gcc.target/loongarch/explicit-relocs-auto-tls-ld-gd.c +index 957ff98df..ca55fcfc5 100644 +--- a/gcc/testsuite/gcc.target/loongarch/explicit-relocs-auto-tls-ld-gd.c ++++ b/gcc/testsuite/gcc.target/loongarch/explicit-relocs-auto-tls-ld-gd.c +@@ -6,4 +6,5 @@ extern __thread int b __attribute__((visibility("default"))); + + int test() { return a + b; } + +-/* { dg-final { scan-assembler-not "la.tls" { target tls_native } } } */ ++/* { dg-final { scan-assembler "la\\.tls\\.ld" { target tls_native } } } */ ++/* { dg-final { scan-assembler "la\\.tls\\.gd" { target tls_native } } } */ +-- +2.43.0 +
View file
_service:tar_scm:0116-aarch64-Fix-return-register-handling-in-untyped_call.patch
Deleted
@@ -1,66 +0,0 @@ -From 38d0605ac8bc90324170041676fc05e7e595769e Mon Sep 17 00:00:00 2001 -From: Richard Sandiford <richard.sandiford@arm.com> -Date: Wed, 20 Sep 2023 11:13:19 +0100 -Subject: PATCH aarch64: Fix return register handling in untyped_call - -While working on another patch, I hit a problem with the aarch64 -expansion of untyped_call. The expander emits the usual: - - (set (mem ...) (reg resN)) - -instructions to store the result registers to memory, but it didn't -say in RTL where those resN results came from. This eventually led -to a failure of gcc.dg/torture/stackalign/builtin-return-2.c, -via regrename. - -This patch turns the untyped call from a plain call to a call_value, -to represent that the call returns (or might return) a useful value. -The patch also uses a PARALLEL return rtx to represent all the possible -return registers. - -gcc/ - * config/aarch64/aarch64.md (untyped_call): Emit a call_value - rather than a call. List each possible destination register - in the call pattern. - -(cherry picked from commit 629efe27744d13c3b83bbe8338b84c37c83dbe4f) ---- - gcc/config/aarch64/aarch64.md | 20 +++++++++++++++++++- - 1 file changed, 19 insertions(+), 1 deletion(-) - -diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md -index b9b3ba4f9164..cd568f3daa0f 100644 ---- a/gcc/config/aarch64/aarch64.md -+++ b/gcc/config/aarch64/aarch64.md -@@ -1159,9 +1159,27 @@ - { - int i; - -+ /* Generate a PARALLEL that contains all of the register results. -+ The offsets are somewhat arbitrary, since we don't know the -+ actual return type. The main thing we need to avoid is having -+ overlapping byte ranges, since those might give the impression -+ that two registers are known to have data in common. */ -+ rtvec rets = rtvec_alloc (XVECLEN (operands2, 0)); -+ poly_int64 offset = 0; -+ for (i = 0; i < XVECLEN (operands2, 0); i++) -+ { -+ rtx reg = SET_SRC (XVECEXP (operands2, 0, i)); -+ gcc_assert (REG_P (reg)); -+ rtx offset_rtx = gen_int_mode (offset, Pmode); -+ rtx piece = gen_rtx_EXPR_LIST (VOIDmode, reg, offset_rtx); -+ RTVEC_ELT (rets, i) = piece; -+ offset += GET_MODE_SIZE (GET_MODE (reg)); -+ } -+ rtx ret = gen_rtx_PARALLEL (VOIDmode, rets); -+ - /* Untyped calls always use the default ABI. It's only possible to use - ABI variants if we know the type of the target function. */ -- emit_call_insn (gen_call (operands0, const0_rtx, const0_rtx)); -+ emit_call_insn (gen_call_value (ret, operands0, const0_rtx, const0_rtx)); - - for (i = 0; i < XVECLEN (operands2, 0); i++) - { --- -2.43.5 -
View file
_service:tar_scm:0117-Backport-SME-aarch64-Reorder-an-entry-in-aarch64-opt.patch
Added
@@ -0,0 +1,194 @@ +From 4a2d0bdf5c9a5f4ee615c1d0768cb2e8a3dfef4a Mon Sep 17 00:00:00 2001 +From: Richard Sandiford <richard.sandiford@arm.com> +Date: Thu, 29 Sep 2022 11:32:54 +0100 +Subject: PATCH 018/157 BackportSME aarch64: Reorder an entry in + aarch64-option-extensions.def + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=c067c474f85b1e9c56fb34dd51ef0eec9221b766 + +aarch64-option-extensions.def was topologically sorted except +for one case: crypto came before its aes and sha2 dependencies. +This patch moves crypto after sha2 instead. + +gcc/ + * config/aarch64/aarch64-option-extensions.def: Move crypto + after sha2. + +gcc/testsuite/ + * gcc.target/aarch64/cpunative/native_cpu_0.c: Expect +crypto + to come after +crc. + * gcc.target/aarch64/cpunative/native_cpu_13.c: Likewise. + * gcc.target/aarch64/cpunative/native_cpu_16.c: Likewise. + * gcc.target/aarch64/cpunative/native_cpu_17.c: Likewise. + * gcc.target/aarch64/cpunative/native_cpu_6.c: Likewise. + * gcc.target/aarch64/cpunative/native_cpu_7.c: Likewise. + * gcc.target/aarch64/options_set_2.c: Likewise. + * gcc.target/aarch64/options_set_3.c: Likewise. + * gcc.target/aarch64/options_set_4.c: Likewise. +--- + .../aarch64/aarch64-option-extensions.def | 20 +++++++++---------- + .../aarch64/cpunative/native_cpu_0.c | 2 +- + .../aarch64/cpunative/native_cpu_13.c | 2 +- + .../aarch64/cpunative/native_cpu_16.c | 2 +- + .../aarch64/cpunative/native_cpu_17.c | 2 +- + .../aarch64/cpunative/native_cpu_6.c | 2 +- + .../aarch64/cpunative/native_cpu_7.c | 2 +- + .../gcc.target/aarch64/options_set_2.c | 2 +- + .../gcc.target/aarch64/options_set_3.c | 2 +- + .../gcc.target/aarch64/options_set_4.c | 4 ++-- + 10 files changed, 20 insertions(+), 20 deletions(-) + +diff --git a/gcc/config/aarch64/aarch64-option-extensions.def b/gcc/config/aarch64/aarch64-option-extensions.def +index b98008127..df2c8d19b 100644 +--- a/gcc/config/aarch64/aarch64-option-extensions.def ++++ b/gcc/config/aarch64/aarch64-option-extensions.def +@@ -76,16 +76,6 @@ AARCH64_OPT_EXTENSION("simd", AARCH64_FL_SIMD, AARCH64_FL_FP, \ + AARCH64_FL_I8MM | AARCH64_FL_F32MM | AARCH64_FL_F64MM, \ + false, "asimd") + +-/* Enabling "crypto" also enables "fp", "simd", "aes" and "sha2". +- Disabling "crypto" disables "crypto", "aes", "sha2", "sha3" and "sm3/sm4", +- "sve2-aes", "sve2-sha3", "sve2-sm4". */ +-AARCH64_OPT_EXTENSION("crypto", AARCH64_FL_CRYPTO, AARCH64_FL_FP | \ +- AARCH64_FL_SIMD | AARCH64_FL_AES | AARCH64_FL_SHA2, \ +- AARCH64_FL_AES | AARCH64_FL_SHA2 | AARCH64_FL_SHA3 | \ +- AARCH64_FL_SM4 | AARCH64_FL_SVE2_AES | \ +- AARCH64_FL_SVE2_SHA3 | AARCH64_FL_SVE2_SM4, true, \ +- "aes pmull sha1 sha2") +- + /* Enabling or disabling "crc" only changes "crc". */ + AARCH64_OPT_EXTENSION("crc", AARCH64_FL_CRC, 0, 0, false, "crc32") + +@@ -127,6 +117,16 @@ AARCH64_OPT_EXTENSION("sha2", AARCH64_FL_SHA2, AARCH64_FL_FPSIMD, \ + AARCH64_FL_CRYPTO | AARCH64_FL_SHA3 | \ + AARCH64_FL_SVE2_SHA3, false, "sha1 sha2") + ++/* Enabling "crypto" also enables "fp", "simd", "aes" and "sha2". ++ Disabling "crypto" disables "crypto", "aes", "sha2", "sha3" and "sm3/sm4", ++ "sve2-aes", "sve2-sha3", "sve2-sm4". */ ++AARCH64_OPT_EXTENSION("crypto", AARCH64_FL_CRYPTO, AARCH64_FL_FP | \ ++ AARCH64_FL_SIMD | AARCH64_FL_AES | AARCH64_FL_SHA2, \ ++ AARCH64_FL_AES | AARCH64_FL_SHA2 | AARCH64_FL_SHA3 | \ ++ AARCH64_FL_SM4 | AARCH64_FL_SVE2_AES | \ ++ AARCH64_FL_SVE2_SHA3 | AARCH64_FL_SVE2_SM4, true, \ ++ "aes pmull sha1 sha2") ++ + /* Enabling "sha3" enables "simd" and "sha2". + Disabling "sha3" disables "sha3" and "sve2-sha3". */ + AARCH64_OPT_EXTENSION("sha3", AARCH64_FL_SHA3, AARCH64_FL_FPSIMD | \ +diff --git a/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_0.c b/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_0.c +index f155f51ba..8499f87c3 100644 +--- a/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_0.c ++++ b/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_0.c +@@ -7,6 +7,6 @@ int main() + return 0; + } + +-/* { dg-final { scan-assembler {\.arch armv8-a\+crypto\+crc\+dotprod} } } */ ++/* { dg-final { scan-assembler {\.arch armv8-a\+crc\+dotprod\+crypto} } } */ + + /* Test a normal looking procinfo. */ +diff --git a/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_13.c b/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_13.c +index b7b3a8e13..551669091 100644 +--- a/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_13.c ++++ b/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_13.c +@@ -7,6 +7,6 @@ int main() + return 0; + } + +-/* { dg-final { scan-assembler {\.arch armv8-a\+crypto\+crc\+dotprod} } } */ ++/* { dg-final { scan-assembler {\.arch armv8-a\+crc\+dotprod\+crypto} } } */ + + /* Test one with mixed order of feature bits. */ +diff --git a/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_16.c b/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_16.c +index a424e7c56..2f963bb23 100644 +--- a/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_16.c ++++ b/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_16.c +@@ -7,6 +7,6 @@ int main() + return 0; + } + +-/* { dg-final { scan-assembler {\.arch armv8-a\+crypto\+crc\+dotprod\+sve2} } } */ ++/* { dg-final { scan-assembler {\.arch armv8-a\+crc\+dotprod\+crypto\+sve2} } } */ + + /* Test a normal looking procinfo. */ +diff --git a/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_17.c b/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_17.c +index c269c5fef..c68a697aa 100644 +--- a/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_17.c ++++ b/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_17.c +@@ -7,6 +7,6 @@ int main() + return 0; + } + +-/* { dg-final { scan-assembler {\.arch armv8-a\+crypto\+crc\+dotprod\+sve2} } } */ ++/* { dg-final { scan-assembler {\.arch armv8-a\+crc\+dotprod\+crypto\+sve2} } } */ + + /* Test a normal looking procinfo. */ +diff --git a/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_6.c b/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_6.c +index da72052e6..7608e8845 100644 +--- a/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_6.c ++++ b/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_6.c +@@ -7,7 +7,7 @@ int main() + return 0; + } + +-/* { dg-final { scan-assembler {\.arch armv8-a\+crypto\+fp16} } } */ ++/* { dg-final { scan-assembler {\.arch armv8-a\+fp16\+crypto} } } */ + + /* Test one where the feature bits for crypto and fp16 are given in + same order as declared in options file. */ +diff --git a/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_7.c b/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_7.c +index 96ad4c14d..72b14b4f6 100644 +--- a/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_7.c ++++ b/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_7.c +@@ -7,7 +7,7 @@ int main() + return 0; + } + +-/* { dg-final { scan-assembler {\.arch armv8-a\+crypto\+fp16} } } */ ++/* { dg-final { scan-assembler {\.arch armv8-a\+fp16\+crypto} } } */ + + /* Test one where the crypto and fp16 options are specified in different + order from what is in the options file. */ +diff --git a/gcc/testsuite/gcc.target/aarch64/options_set_2.c b/gcc/testsuite/gcc.target/aarch64/options_set_2.c +index 3476febce..f82cb5f78 100644 +--- a/gcc/testsuite/gcc.target/aarch64/options_set_2.c ++++ b/gcc/testsuite/gcc.target/aarch64/options_set_2.c +@@ -6,6 +6,6 @@ int main () + return 0; + } + +-/* { dg-final { scan-assembler-times {\.arch armv8\.2\-a\+crypto\+crc} 1 } } */ ++/* { dg-final { scan-assembler-times {\.arch armv8\.2\-a\+crc\+crypto} 1 } } */ + + /* Check to see if crc and crypto are maintained if crypto specified. */ +diff --git a/gcc/testsuite/gcc.target/aarch64/options_set_3.c b/gcc/testsuite/gcc.target/aarch64/options_set_3.c +index 4558339f1..7d350cfa3 100644 +--- a/gcc/testsuite/gcc.target/aarch64/options_set_3.c ++++ b/gcc/testsuite/gcc.target/aarch64/options_set_3.c +@@ -6,6 +6,6 @@ int main () + return 0; + } + +-/* { dg-final { scan-assembler-times {\.arch armv8\.2\-a\+crypto\+crc} 1 } } */ ++/* { dg-final { scan-assembler-times {\.arch armv8\.2\-a\+crc\+crypto} 1 } } */ + + /* Check if smallest set is maintained when outputting. */ +diff --git a/gcc/testsuite/gcc.target/aarch64/options_set_4.c b/gcc/testsuite/gcc.target/aarch64/options_set_4.c +index 15514bfe9..5370e02e1 100644 +--- a/gcc/testsuite/gcc.target/aarch64/options_set_4.c ++++ b/gcc/testsuite/gcc.target/aarch64/options_set_4.c +@@ -6,7 +6,7 @@ int main () + return 0; + } + +-/* { dg-final { scan-assembler-times {\.arch armv8\.2\-a\+crypto\+crc} 1 } } */ ++/* { dg-final { scan-assembler-times {\.arch armv8\.2\-a\+crc\+crypto} 1 } } */ + + /* Check if individual bits that make up a grouping is specified that only the +- grouping is kept. */ +\ No newline at end of file ++ grouping is kept. */ +-- +2.33.0 +
View file
_service:tar_scm:0117-LoongArch-testsuite-Disable-stack-protector-for-got-.patch
Added
@@ -0,0 +1,35 @@ +From 7e10f7b95a598e9471bd1bc77454af43a69eb506 Mon Sep 17 00:00:00 2001 +From: Xi Ruoyao <xry111@xry111.site> +Date: Tue, 23 Jan 2024 19:32:38 +0800 +Subject: PATCH 117/188 LoongArch: testsuite: Disable stack protector for + got-load.C + +When building GCC with --enable-default-ssp, the stack protector is +enabled for got-load.C, causing additional GOT loads for +__stack_chk_guard. So mem/u will be matched more than 2 times and the +test will fail. + +Disable stack protector to fix this issue. + +gcc/testsuite: + + * g++.target/loongarch/got-load.C (dg-options): Add + -fno-stack-protector. +--- + gcc/testsuite/g++.target/loongarch/got-load.C | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/gcc/testsuite/g++.target/loongarch/got-load.C b/gcc/testsuite/g++.target/loongarch/got-load.C +index 20924c739..17870176a 100644 +--- a/gcc/testsuite/g++.target/loongarch/got-load.C ++++ b/gcc/testsuite/g++.target/loongarch/got-load.C +@@ -1,5 +1,5 @@ + /* { dg-do compile } */ +-/* { dg-options "-mabi=lp64d -O2 -mexplicit-relocs -mcmodel=normal -fdump-rtl-expand" } */ ++/* { dg-options "-mabi=lp64d -O2 -mexplicit-relocs -mcmodel=normal -fdump-rtl-expand -fno-stack-protector" } */ + /* { dg-final { scan-rtl-dump-times "mem/u" 2 "expand" } } */ + + #include <bits/stdc++.h> +-- +2.43.0 +
View file
_service:tar_scm:0117-aarch64-Fix-loose-ldpstp-check.patch
Deleted
@@ -1,119 +0,0 @@ -From 74f99f1adc696f446115f36974a3f94f66294a53 Mon Sep 17 00:00:00 2001 -From: Richard Sandiford <richard.sandiford@arm.com> -Date: Wed, 20 Sep 2023 11:13:20 +0100 -Subject: PATCH aarch64: Fix loose ldpstp check PR111411 - -aarch64_operands_ok_for_ldpstp contained the code: - - /* One of the memory accesses must be a mempair operand. - If it is not the first one, they need to be swapped by the - peephole. */ - if (!aarch64_mem_pair_operand (mem_1, GET_MODE (mem_1)) - && !aarch64_mem_pair_operand (mem_2, GET_MODE (mem_2))) - return false; - -But the requirement isn't just that one of the accesses must be a -valid mempair operand. It's that the lower access must be, since -that's the access that will be used for the instruction operand. - -gcc/ - PR target/111411 - * config/aarch64/aarch64.cc (aarch64_operands_ok_for_ldpstp): Require - the lower memory access to a mem-pair operand. - -gcc/testsuite/ - PR target/111411 - * gcc.dg/rtl/aarch64/pr111411.c: New test. - -(cherry picked from commit 2d38f45bcca62ca0c7afef4b579f82c5c2a01610) ---- - gcc/config/aarch64/aarch64.cc | 8 ++- - gcc/testsuite/gcc.dg/rtl/aarch64/pr111411.c | 57 +++++++++++++++++++++ - 2 files changed, 60 insertions(+), 5 deletions(-) - create mode 100644 gcc/testsuite/gcc.dg/rtl/aarch64/pr111411.c - -diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc -index 96c3f48fdc49..a979accd90a9 100644 ---- a/gcc/config/aarch64/aarch64.cc -+++ b/gcc/config/aarch64/aarch64.cc -@@ -26031,11 +26031,9 @@ aarch64_operands_ok_for_ldpstp (rtx *operands, bool load, - gcc_assert (known_eq (GET_MODE_SIZE (GET_MODE (mem_1)), - GET_MODE_SIZE (GET_MODE (mem_2)))); - -- /* One of the memory accesses must be a mempair operand. -- If it is not the first one, they need to be swapped by the -- peephole. */ -- if (!aarch64_mem_pair_operand (mem_1, GET_MODE (mem_1)) -- && !aarch64_mem_pair_operand (mem_2, GET_MODE (mem_2))) -+ /* The lower memory access must be a mem-pair operand. */ -+ rtx lower_mem = reversed ? mem_2 : mem_1; -+ if (!aarch64_mem_pair_operand (lower_mem, GET_MODE (lower_mem))) - return false; - - if (REG_P (reg_1) && FP_REGNUM_P (REGNO (reg_1))) -diff --git a/gcc/testsuite/gcc.dg/rtl/aarch64/pr111411.c b/gcc/testsuite/gcc.dg/rtl/aarch64/pr111411.c -new file mode 100644 -index 000000000000..ad07e9c6c893 ---- /dev/null -+++ b/gcc/testsuite/gcc.dg/rtl/aarch64/pr111411.c -@@ -0,0 +1,57 @@ -+/* { dg-do compile { target aarch64*-*-* } } */ -+/* { dg-require-effective-target lp64 } */ -+/* { dg-options "-O -fdisable-rtl-postreload -fpeephole2 -fno-schedule-fusion" } */ -+ -+extern int data; -+ -+void __RTL (startwith ("ira")) foo (void *ptr) -+{ -+ (function "foo" -+ (param "ptr" -+ (DECL_RTL (reg/v:DI <0> ptr )) -+ (DECL_RTL_INCOMING (reg/v:DI x0 ptr )) -+ ) ;; param "ptr" -+ (insn-chain -+ (block 2 -+ (edge-from entry (flags "FALLTHRU")) -+ (cnote 3 bb 2 NOTE_INSN_BASIC_BLOCK) -+ (insn 4 (set (reg:DI <0>) (reg:DI x0))) -+ (insn 5 (set (reg:DI <1>) -+ (plus:DI (reg:DI <0>) (const_int 768)))) -+ (insn 6 (set (mem:SI (plus:DI (reg:DI <0>) -+ (const_int 508)) 1 &data+508 S4 A4) -+ (const_int 0))) -+ (insn 7 (set (mem:SI (plus:DI (reg:DI <1>) -+ (const_int -256)) 1 &data+512 S4 A4) -+ (const_int 0))) -+ (edge-to exit (flags "FALLTHRU")) -+ ) ;; block 2 -+ ) ;; insn-chain -+ ) ;; function -+} -+ -+void __RTL (startwith ("ira")) bar (void *ptr) -+{ -+ (function "bar" -+ (param "ptr" -+ (DECL_RTL (reg/v:DI <0> ptr )) -+ (DECL_RTL_INCOMING (reg/v:DI x0 ptr )) -+ ) ;; param "ptr" -+ (insn-chain -+ (block 2 -+ (edge-from entry (flags "FALLTHRU")) -+ (cnote 3 bb 2 NOTE_INSN_BASIC_BLOCK) -+ (insn 4 (set (reg:DI <0>) (reg:DI x0))) -+ (insn 5 (set (reg:DI <1>) -+ (plus:DI (reg:DI <0>) (const_int 768)))) -+ (insn 6 (set (mem:SI (plus:DI (reg:DI <1>) -+ (const_int -256)) 1 &data+512 S4 A4) -+ (const_int 0))) -+ (insn 7 (set (mem:SI (plus:DI (reg:DI <0>) -+ (const_int 508)) 1 &data+508 S4 A4) -+ (const_int 0))) -+ (edge-to exit (flags "FALLTHRU")) -+ ) ;; block 2 -+ ) ;; insn-chain -+ ) ;; function -+} --- -2.43.5 -
View file
_service:tar_scm:0118-Backport-SME-aarch64-Simplify-feature-definitions.patch
Added
@@ -0,0 +1,1176 @@ +From deb18d5083d8f9edbdafac184c010a6720dc8dda Mon Sep 17 00:00:00 2001 +From: Richard Sandiford <richard.sandiford@arm.com> +Date: Thu, 29 Sep 2022 11:32:54 +0100 +Subject: PATCH 019/157 BackportSME aarch64: Simplify feature definitions + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=11a113d501ff64fa4843e28d0a21b3f4e9d0d3de + +Currently the aarch64-option-extensions.def entries, the +aarch64-cores.def entries, and the AARCH64_FL_FOR_* macros +have a transitive closure of dependencies that is maintained by hand. +This is a bit error-prone and is becoming less tenable as more features +are added. The main point of this patch is to maintain the closure +automatically instead. + +For example, the +sve2-aes extension requires sve2 and aes. +This is now described using: + + AARCH64_OPT_EXTENSION("sve2-aes", SVE2_AES, (SVE2, AES), ...) + +If life was simple, we could just give the name of the feature +and the list of features that it requires/depends on. But sadly +things are more complicated. For example: + +- the legacy +crypto option enables aes and sha2 only, but +nocrypto + disables all crypto-related extensions, including sm4. + +- +fp16fml enables fp16, but armv8.4-a enables fp16fml without fp16. + fp16fml only has an effect when fp16 is also present; see the + comments for more details. + +- +bf16 enables simd, but +bf16+nosimd is valid and enables just the + scalar bf16 instructions. rdma behaves similarly. + +To handle cases like these, the option entries have extra fields to +specify what an explicit +foo enables and what an explicit +nofoo +disables, in addition to the absolute dependencies. + +The other main changes are: + +- AARCH64_FL_* are now defined automatically. + +- the feature list for each architecture level moves from aarch64.h + to aarch64-arches.def. + +As a consequence, we now have a (redundant) V8A feature flag. + +While there, the patch uses a new typedef, aarch64_feature_flags, +for the set of feature flags. This should make it easier to switch +to a class if we run out of bits in the uint64_t. + +For now the patch hardcodes the fact that crypto is the only +synthetic option. A later patch will remove this field. + +To test for things that might not be covered by the testsuite, +I made the driver print out the all_extensions, all_cores and +all_archs arrays before and after the patch, with the following +tweaks: + +- renumber the old AARCH64_FL_* bit assignments to match the .def order +- remove the new V8A flag when printing the new tables +- treat CRYPTO and CRYPTO | AES | SHA2 the same way when printing the + core tables + +(On the last point: some cores enabled just CRYPTO while others enabled +CRYPTO, AES and SHA2. This doesn't cause a difference in behaviour +because of how the dependent macros are defined. With the new scheme, +all entries with CRYPTO automatically get AES and SHA2 too.) + +The only difference is that +nofp now turns off dotprod. This was +another instance of an incomplete transitive closure, but unlike the +instances fixed in a previous patch, it had no observable effect. + +gcc/ + * config/aarch64/aarch64-option-extensions.def: Switch to a new format. + * config/aarch64/aarch64-cores.def: Use the same format to specify + lists of features. + * config/aarch64/aarch64-arches.def: Likewise, moving that information + from aarch64.h. + * config/aarch64/aarch64-opts.h (aarch64_feature_flags): New typedef. + * config/aarch64/aarch64.h (aarch64_feature): New class enum. + Turn AARCH64_FL_* macros into constexprs, getting the definitions + from aarch64-option-extensions.def. Remove AARCH64_FL_FOR_* macros. + * common/config/aarch64/aarch64-common.cc: Include + aarch64-feature-deps.h. + (all_extensions): Update for new .def format. + (all_extensions_by_on, all_cores, all_architectures): Likewise. + * config/aarch64/driver-aarch64.cc: Include aarch64-feature-deps.h. + (aarch64_extensions): Update for new .def format. + (aarch64_cpu_data, aarch64_arches): Likewise. + * config/aarch64/aarch64.cc: Include aarch64-feature-deps.h. + (all_architectures, all_cores): Update for new .def format. + * config/aarch64/aarch64-sve-builtins.cc + (check_required_extensions): Likewise. +--- + gcc/common/config/aarch64/aarch64-common.cc | 29 +- + gcc/config/aarch64/aarch64-arches.def | 28 +- + gcc/config/aarch64/aarch64-cores.def | 130 +++---- + gcc/config/aarch64/aarch64-feature-deps.h | 121 +++++++ + .../aarch64/aarch64-option-extensions.def | 323 +++++++----------- + gcc/config/aarch64/aarch64-opts.h | 4 + + gcc/config/aarch64/aarch64-sve-builtins.cc | 5 +- + gcc/config/aarch64/aarch64.cc | 14 +- + gcc/config/aarch64/aarch64.h | 164 ++------- + gcc/config/aarch64/driver-aarch64.cc | 10 +- + 10 files changed, 374 insertions(+), 454 deletions(-) + create mode 100644 gcc/config/aarch64/aarch64-feature-deps.h + +diff --git a/gcc/common/config/aarch64/aarch64-common.cc b/gcc/common/config/aarch64/aarch64-common.cc +index a965ac660..74729bb30 100644 +--- a/gcc/common/config/aarch64/aarch64-common.cc ++++ b/gcc/common/config/aarch64/aarch64-common.cc +@@ -30,6 +30,7 @@ + #include "opts.h" + #include "flags.h" + #include "diagnostic.h" ++#include "config/aarch64/aarch64-feature-deps.h" + + #ifdef TARGET_BIG_ENDIAN_DEFAULT + #undef TARGET_DEFAULT_TARGET_FLAGS +@@ -214,9 +215,12 @@ struct aarch64_option_extension + /* ISA extensions in AArch64. */ + static const struct aarch64_option_extension all_extensions = + { +-#define AARCH64_OPT_EXTENSION(NAME, FLAG_CANONICAL, FLAGS_ON, FLAGS_OFF, \ +- SYNTHETIC, Z) \ +- {NAME, FLAG_CANONICAL, FLAGS_ON, FLAGS_OFF, SYNTHETIC}, ++#define AARCH64_OPT_EXTENSION(NAME, IDENT, C, D, E, F) \ ++ {NAME, AARCH64_FL_##IDENT, \ ++ feature_deps::IDENT ().explicit_on & ~AARCH64_FL_##IDENT, \ ++ feature_deps::get_flags_off (feature_deps::root_off_##IDENT) \ ++ & ~AARCH64_FL_##IDENT, \ ++ AARCH64_FL_##IDENT == AARCH64_FL_CRYPTO}, + #include "config/aarch64/aarch64-option-extensions.def" + {NULL, 0, 0, 0, false} + }; +@@ -225,9 +229,12 @@ static const struct aarch64_option_extension all_extensions = + bits and extension turned on. Cached for efficiency. */ + static struct aarch64_option_extension all_extensions_by_on = + { +-#define AARCH64_OPT_EXTENSION(NAME, FLAG_CANONICAL, FLAGS_ON, FLAGS_OFF, \ +- SYNTHETIC, Z) \ +- {NAME, FLAG_CANONICAL, FLAGS_ON, FLAGS_OFF, SYNTHETIC}, ++#define AARCH64_OPT_EXTENSION(NAME, IDENT, C, D, E, F) \ ++ {NAME, AARCH64_FL_##IDENT, \ ++ feature_deps::IDENT ().explicit_on & ~AARCH64_FL_##IDENT, \ ++ feature_deps::get_flags_off (feature_deps::root_off_##IDENT) \ ++ & ~AARCH64_FL_##IDENT, \ ++ AARCH64_FL_##IDENT == AARCH64_FL_CRYPTO}, + #include "config/aarch64/aarch64-option-extensions.def" + {NULL, 0, 0, 0, false} + }; +@@ -250,18 +257,18 @@ struct arch_to_arch_name + the default set of architectural feature flags they support. */ + static const struct processor_name_to_arch all_cores = + { +-#define AARCH64_CORE(NAME, X, IDENT, ARCH_IDENT, FLAGS, COSTS, IMP, PART, VARIANT) \ +- {NAME, AARCH64_ARCH_##ARCH_IDENT, AARCH64_FL_FOR_##ARCH_IDENT | FLAGS}, ++#define AARCH64_CORE(NAME, CORE_IDENT, C, ARCH_IDENT, E, F, G, H, I) \ ++ {NAME, AARCH64_ARCH_##ARCH_IDENT, feature_deps::cpu_##CORE_IDENT}, + #include "config/aarch64/aarch64-cores.def" +- {"generic", AARCH64_ARCH_V8A, AARCH64_FL_FOR_V8A}, ++ {"generic", AARCH64_ARCH_V8A, feature_deps::V8A ().enable}, + {"", aarch64_no_arch, 0} + }; + + /* Map architecture revisions to their string representation. */ + static const struct arch_to_arch_name all_architectures = + { +-#define AARCH64_ARCH(NAME, CORE, ARCH_IDENT, ARCH, FLAGS) \ +- {AARCH64_ARCH_##ARCH_IDENT, NAME, FLAGS}, ++#define AARCH64_ARCH(NAME, B, ARCH_IDENT, D, E) \ ++ {AARCH64_ARCH_##ARCH_IDENT, NAME, feature_deps::ARCH_IDENT ().enable}, + #include "config/aarch64/aarch64-arches.def" + {aarch64_no_arch, "", 0} + }; +diff --git a/gcc/config/aarch64/aarch64-arches.def b/gcc/config/aarch64/aarch64-arches.def +index ece96e22a..9f8246618 100644 +--- a/gcc/config/aarch64/aarch64-arches.def ++++ b/gcc/config/aarch64/aarch64-arches.def +@@ -30,19 +30,19 @@ + Due to the assumptions about the positions of these fields in config.gcc, + NAME should be kept as the first argument. */ + +-AARCH64_ARCH("armv8-a", generic, V8A, 8, AARCH64_FL_FOR_V8A) +-AARCH64_ARCH("armv8.1-a", generic, V8_1A, 8, AARCH64_FL_FOR_V8_1A) +-AARCH64_ARCH("armv8.2-a", generic, V8_2A, 8, AARCH64_FL_FOR_V8_2A) +-AARCH64_ARCH("armv8.3-a", generic, V8_3A, 8, AARCH64_FL_FOR_V8_3A) +-AARCH64_ARCH("armv8.4-a", generic, V8_4A, 8, AARCH64_FL_FOR_V8_4A) +-AARCH64_ARCH("armv8.5-a", generic, V8_5A, 8, AARCH64_FL_FOR_V8_5A) +-AARCH64_ARCH("armv8.6-a", generic, V8_6A, 8, AARCH64_FL_FOR_V8_6A) +-AARCH64_ARCH("armv8.7-a", generic, V8_7A, 8, AARCH64_FL_FOR_V8_7A) +-AARCH64_ARCH("armv8.8-a", generic, V8_8A, 8, AARCH64_FL_FOR_V8_8A) +-AARCH64_ARCH("armv8-r", generic, V8R , 8, AARCH64_FL_FOR_V8R) +-AARCH64_ARCH("armv9-a", generic, V9A , 9, AARCH64_FL_FOR_V9A) +-AARCH64_ARCH("armv9.1-a", generic, V9_1A, 9, AARCH64_FL_FOR_V9_1A) +-AARCH64_ARCH("armv9.2-a", generic, V9_2A, 9, AARCH64_FL_FOR_V9_2A) +-AARCH64_ARCH("armv9.3-a", generic, V9_3A, 9, AARCH64_FL_FOR_V9_3A) ++AARCH64_ARCH("armv8-a", generic, V8A, 8, (SIMD)) ++AARCH64_ARCH("armv8.1-a", generic, V8_1A, 8, (V8A, LSE, CRC, RDMA)) ++AARCH64_ARCH("armv8.2-a", generic, V8_2A, 8, (V8_1A)) ++AARCH64_ARCH("armv8.3-a", generic, V8_3A, 8, (V8_2A, PAUTH)) ++AARCH64_ARCH("armv8.4-a", generic, V8_4A, 8, (V8_3A, F16FML, DOTPROD, FLAGM)) ++AARCH64_ARCH("armv8.5-a", generic, V8_5A, 8, (V8_4A, SB, SSBS, PREDRES)) ++AARCH64_ARCH("armv8.6-a", generic, V8_6A, 8, (V8_5A, I8MM, BF16)) ++AARCH64_ARCH("armv8.7-a", generic, V8_7A, 8, (V8_6A, LS64)) ++AARCH64_ARCH("armv8.8-a", generic, V8_8A, 8, (V8_7A, MOPS)) ++AARCH64_ARCH("armv8-r", generic, V8R , 8, (V8_4A)) ++AARCH64_ARCH("armv9-a", generic, V9A , 9, (V8_5A, SVE2)) ++AARCH64_ARCH("armv9.1-a", generic, V9_1A, 9, (V8_6A, V9A)) ++AARCH64_ARCH("armv9.2-a", generic, V9_2A, 9, (V8_7A, V9_1A)) ++AARCH64_ARCH("armv9.3-a", generic, V9_3A, 9, (V8_8A, V9_2A)) + + #undef AARCH64_ARCH +diff --git a/gcc/config/aarch64/aarch64-cores.def b/gcc/config/aarch64/aarch64-cores.def +index cf500d0a9..60299160b 100644 +--- a/gcc/config/aarch64/aarch64-cores.def ++++ b/gcc/config/aarch64/aarch64-cores.def +@@ -46,132 +46,132 @@ + /* ARMv8-A Architecture Processors. */ + + /* ARM ('A') cores. */ +-AARCH64_CORE("cortex-a34", cortexa34, cortexa53, V8A, AARCH64_FL_CRC, cortexa35, 0x41, 0xd02, -1) +-AARCH64_CORE("cortex-a35", cortexa35, cortexa53, V8A, AARCH64_FL_CRC, cortexa35, 0x41, 0xd04, -1) +-AARCH64_CORE("cortex-a53", cortexa53, cortexa53, V8A, AARCH64_FL_CRC, cortexa53, 0x41, 0xd03, -1) +-AARCH64_CORE("cortex-a57", cortexa57, cortexa57, V8A, AARCH64_FL_CRC, cortexa57, 0x41, 0xd07, -1) +-AARCH64_CORE("cortex-a72", cortexa72, cortexa57, V8A, AARCH64_FL_CRC, cortexa72, 0x41, 0xd08, -1) +-AARCH64_CORE("cortex-a73", cortexa73, cortexa57, V8A, AARCH64_FL_CRC, cortexa73, 0x41, 0xd09, -1) ++AARCH64_CORE("cortex-a34", cortexa34, cortexa53, V8A, (CRC), cortexa35, 0x41, 0xd02, -1) ++AARCH64_CORE("cortex-a35", cortexa35, cortexa53, V8A, (CRC), cortexa35, 0x41, 0xd04, -1) ++AARCH64_CORE("cortex-a53", cortexa53, cortexa53, V8A, (CRC), cortexa53, 0x41, 0xd03, -1) ++AARCH64_CORE("cortex-a57", cortexa57, cortexa57, V8A, (CRC), cortexa57, 0x41, 0xd07, -1) ++AARCH64_CORE("cortex-a72", cortexa72, cortexa57, V8A, (CRC), cortexa72, 0x41, 0xd08, -1) ++AARCH64_CORE("cortex-a73", cortexa73, cortexa57, V8A, (CRC), cortexa73, 0x41, 0xd09, -1) + + /* Cavium ('C') cores. */ +-AARCH64_CORE("thunderx", thunderx, thunderx, V8A, AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx, 0x43, 0x0a0, -1) ++AARCH64_CORE("thunderx", thunderx, thunderx, V8A, (CRC, CRYPTO), thunderx, 0x43, 0x0a0, -1) + /* Do not swap around "thunderxt88p1" and "thunderxt88", + this order is required to handle variant correctly. */ +-AARCH64_CORE("thunderxt88p1", thunderxt88p1, thunderx, V8A, AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderxt88, 0x43, 0x0a1, 0) +-AARCH64_CORE("thunderxt88", thunderxt88, thunderx, V8A, AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderxt88, 0x43, 0x0a1, -1) ++AARCH64_CORE("thunderxt88p1", thunderxt88p1, thunderx, V8A, (CRC, CRYPTO), thunderxt88, 0x43, 0x0a1, 0) ++AARCH64_CORE("thunderxt88", thunderxt88, thunderx, V8A, (CRC, CRYPTO), thunderxt88, 0x43, 0x0a1, -1) + + /* OcteonTX is the official name for T81/T83. */ +-AARCH64_CORE("octeontx", octeontx, thunderx, V8A, AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx, 0x43, 0x0a0, -1) +-AARCH64_CORE("octeontx81", octeontxt81, thunderx, V8A, AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx, 0x43, 0x0a2, -1) +-AARCH64_CORE("octeontx83", octeontxt83, thunderx, V8A, AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx, 0x43, 0x0a3, -1) ++AARCH64_CORE("octeontx", octeontx, thunderx, V8A, (CRC, CRYPTO), thunderx, 0x43, 0x0a0, -1) ++AARCH64_CORE("octeontx81", octeontxt81, thunderx, V8A, (CRC, CRYPTO), thunderx, 0x43, 0x0a2, -1) ++AARCH64_CORE("octeontx83", octeontxt83, thunderx, V8A, (CRC, CRYPTO), thunderx, 0x43, 0x0a3, -1) + +-AARCH64_CORE("thunderxt81", thunderxt81, thunderx, V8A, AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx, 0x43, 0x0a2, -1) +-AARCH64_CORE("thunderxt83", thunderxt83, thunderx, V8A, AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx, 0x43, 0x0a3, -1) ++AARCH64_CORE("thunderxt81", thunderxt81, thunderx, V8A, (CRC, CRYPTO), thunderx, 0x43, 0x0a2, -1) ++AARCH64_CORE("thunderxt83", thunderxt83, thunderx, V8A, (CRC, CRYPTO), thunderx, 0x43, 0x0a3, -1) + + /* Ampere Computing ('\xC0') cores. */ +-AARCH64_CORE("ampere1", ampere1, cortexa57, V8_6A, 0, ampere1, 0xC0, 0xac3, -1) ++AARCH64_CORE("ampere1", ampere1, cortexa57, V8_6A, (), ampere1, 0xC0, 0xac3, -1) + /* Do not swap around "emag" and "xgene1", + this order is required to handle variant correctly. */ +-AARCH64_CORE("emag", emag, xgene1, V8A, AARCH64_FL_CRC | AARCH64_FL_CRYPTO, emag, 0x50, 0x000, 3) ++AARCH64_CORE("emag", emag, xgene1, V8A, (CRC, CRYPTO), emag, 0x50, 0x000, 3) + + /* APM ('P') cores. */ +-AARCH64_CORE("xgene1", xgene1, xgene1, V8A, 0, xgene1, 0x50, 0x000, -1) ++AARCH64_CORE("xgene1", xgene1, xgene1, V8A, (), xgene1, 0x50, 0x000, -1) + + /* Qualcomm ('Q') cores. */ +-AARCH64_CORE("falkor", falkor, falkor, V8A, AARCH64_FL_CRC | AARCH64_FL_CRYPTO | AARCH64_FL_RDMA, qdf24xx, 0x51, 0xC00, -1) +-AARCH64_CORE("qdf24xx", qdf24xx, falkor, V8A, AARCH64_FL_CRC | AARCH64_FL_CRYPTO | AARCH64_FL_RDMA, qdf24xx, 0x51, 0xC00, -1) ++AARCH64_CORE("falkor", falkor, falkor, V8A, (CRC, CRYPTO, RDMA), qdf24xx, 0x51, 0xC00, -1) ++AARCH64_CORE("qdf24xx", qdf24xx, falkor, V8A, (CRC, CRYPTO, RDMA), qdf24xx, 0x51, 0xC00, -1) + + /* Samsung ('S') cores. */ +-AARCH64_CORE("exynos-m1", exynosm1, exynosm1, V8A, AARCH64_FL_CRC | AARCH64_FL_CRYPTO, exynosm1, 0x53, 0x001, -1) ++AARCH64_CORE("exynos-m1", exynosm1, exynosm1, V8A, (CRC, CRYPTO), exynosm1, 0x53, 0x001, -1) + + /* HXT ('h') cores. */ +-AARCH64_CORE("phecda", phecda, falkor, V8A, AARCH64_FL_CRC | AARCH64_FL_CRYPTO, qdf24xx, 0x68, 0x000, -1) ++AARCH64_CORE("phecda", phecda, falkor, V8A, (CRC, CRYPTO), qdf24xx, 0x68, 0x000, -1) + + /* ARMv8.1-A Architecture Processors. */ + + /* Broadcom ('B') cores. */ +-AARCH64_CORE("thunderx2t99p1", thunderx2t99p1, thunderx2t99, V8_1A, AARCH64_FL_CRYPTO, thunderx2t99, 0x42, 0x516, -1) +-AARCH64_CORE("vulcan", vulcan, thunderx2t99, V8_1A, AARCH64_FL_CRYPTO, thunderx2t99, 0x42, 0x516, -1) ++AARCH64_CORE("thunderx2t99p1", thunderx2t99p1, thunderx2t99, V8_1A, (CRYPTO), thunderx2t99, 0x42, 0x516, -1) ++AARCH64_CORE("vulcan", vulcan, thunderx2t99, V8_1A, (CRYPTO), thunderx2t99, 0x42, 0x516, -1) + + /* Cavium ('C') cores. */ +-AARCH64_CORE("thunderx2t99", thunderx2t99, thunderx2t99, V8_1A, AARCH64_FL_CRYPTO, thunderx2t99, 0x43, 0x0af, -1) ++AARCH64_CORE("thunderx2t99", thunderx2t99, thunderx2t99, V8_1A, (CRYPTO), thunderx2t99, 0x43, 0x0af, -1) + + /* ARMv8.2-A Architecture Processors. */ + + /* ARM ('A') cores. */ +-AARCH64_CORE("cortex-a55", cortexa55, cortexa53, V8_2A, AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD, cortexa53, 0x41, 0xd05, -1) +-AARCH64_CORE("cortex-a75", cortexa75, cortexa57, V8_2A, AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD, cortexa73, 0x41, 0xd0a, -1) +-AARCH64_CORE("cortex-a76", cortexa76, cortexa57, V8_2A, AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD, neoversen1, 0x41, 0xd0b, -1) +-AARCH64_CORE("cortex-a76ae", cortexa76ae, cortexa57, V8_2A, AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS, neoversen1, 0x41, 0xd0e, -1) +-AARCH64_CORE("cortex-a77", cortexa77, cortexa57, V8_2A, AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS, neoversen1, 0x41, 0xd0d, -1) +-AARCH64_CORE("cortex-a78", cortexa78, cortexa57, V8_2A, AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS | AARCH64_FL_PROFILE, neoversen1, 0x41, 0xd41, -1) +-AARCH64_CORE("cortex-a78ae", cortexa78ae, cortexa57, V8_2A, AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS | AARCH64_FL_PROFILE, neoversen1, 0x41, 0xd42, -1) +-AARCH64_CORE("cortex-a78c", cortexa78c, cortexa57, V8_2A, AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS | AARCH64_FL_PROFILE | AARCH64_FL_FLAGM | AARCH64_FL_PAUTH, neoversen1, 0x41, 0xd4b, -1) +-AARCH64_CORE("cortex-a65", cortexa65, cortexa53, V8_2A, AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS, cortexa73, 0x41, 0xd06, -1) +-AARCH64_CORE("cortex-a65ae", cortexa65ae, cortexa53, V8_2A, AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS, cortexa73, 0x41, 0xd43, -1) +-AARCH64_CORE("cortex-x1", cortexx1, cortexa57, V8_2A, AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS | AARCH64_FL_PROFILE, neoversen1, 0x41, 0xd44, -1) +-AARCH64_CORE("ares", ares, cortexa57, V8_2A, AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_PROFILE, neoversen1, 0x41, 0xd0c, -1) +-AARCH64_CORE("neoverse-n1", neoversen1, cortexa57, V8_2A, AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_PROFILE, neoversen1, 0x41, 0xd0c, -1) +-AARCH64_CORE("neoverse-e1", neoversee1, cortexa53, V8_2A, AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS, cortexa73, 0x41, 0xd4a, -1) ++AARCH64_CORE("cortex-a55", cortexa55, cortexa53, V8_2A, (F16, RCPC, DOTPROD), cortexa53, 0x41, 0xd05, -1) ++AARCH64_CORE("cortex-a75", cortexa75, cortexa57, V8_2A, (F16, RCPC, DOTPROD), cortexa73, 0x41, 0xd0a, -1) ++AARCH64_CORE("cortex-a76", cortexa76, cortexa57, V8_2A, (F16, RCPC, DOTPROD), neoversen1, 0x41, 0xd0b, -1) ++AARCH64_CORE("cortex-a76ae", cortexa76ae, cortexa57, V8_2A, (F16, RCPC, DOTPROD, SSBS), neoversen1, 0x41, 0xd0e, -1) ++AARCH64_CORE("cortex-a77", cortexa77, cortexa57, V8_2A, (F16, RCPC, DOTPROD, SSBS), neoversen1, 0x41, 0xd0d, -1) ++AARCH64_CORE("cortex-a78", cortexa78, cortexa57, V8_2A, (F16, RCPC, DOTPROD, SSBS, PROFILE), neoversen1, 0x41, 0xd41, -1) ++AARCH64_CORE("cortex-a78ae", cortexa78ae, cortexa57, V8_2A, (F16, RCPC, DOTPROD, SSBS, PROFILE), neoversen1, 0x41, 0xd42, -1) ++AARCH64_CORE("cortex-a78c", cortexa78c, cortexa57, V8_2A, (F16, RCPC, DOTPROD, SSBS, PROFILE, FLAGM, PAUTH), neoversen1, 0x41, 0xd4b, -1) ++AARCH64_CORE("cortex-a65", cortexa65, cortexa53, V8_2A, (F16, RCPC, DOTPROD, SSBS), cortexa73, 0x41, 0xd06, -1) ++AARCH64_CORE("cortex-a65ae", cortexa65ae, cortexa53, V8_2A, (F16, RCPC, DOTPROD, SSBS), cortexa73, 0x41, 0xd43, -1) ++AARCH64_CORE("cortex-x1", cortexx1, cortexa57, V8_2A, (F16, RCPC, DOTPROD, SSBS, PROFILE), neoversen1, 0x41, 0xd44, -1) ++AARCH64_CORE("ares", ares, cortexa57, V8_2A, (F16, RCPC, DOTPROD, PROFILE), neoversen1, 0x41, 0xd0c, -1) ++AARCH64_CORE("neoverse-n1", neoversen1, cortexa57, V8_2A, (F16, RCPC, DOTPROD, PROFILE), neoversen1, 0x41, 0xd0c, -1) ++AARCH64_CORE("neoverse-e1", neoversee1, cortexa53, V8_2A, (F16, RCPC, DOTPROD, SSBS), cortexa73, 0x41, 0xd4a, -1) + + /* Cavium ('C') cores. */ +-AARCH64_CORE("octeontx2", octeontx2, cortexa57, V8_2A, AARCH64_FL_CRYPTO | AARCH64_FL_PROFILE, cortexa57, 0x43, 0x0b0, -1) +-AARCH64_CORE("octeontx2t98", octeontx2t98, cortexa57, V8_2A, AARCH64_FL_CRYPTO | AARCH64_FL_PROFILE, cortexa57, 0x43, 0x0b1, -1) +-AARCH64_CORE("octeontx2t96", octeontx2t96, cortexa57, V8_2A, AARCH64_FL_CRYPTO | AARCH64_FL_PROFILE, cortexa57, 0x43, 0x0b2, -1) ++AARCH64_CORE("octeontx2", octeontx2, cortexa57, V8_2A, (CRYPTO, PROFILE), cortexa57, 0x43, 0x0b0, -1) ++AARCH64_CORE("octeontx2t98", octeontx2t98, cortexa57, V8_2A, (CRYPTO, PROFILE), cortexa57, 0x43, 0x0b1, -1) ++AARCH64_CORE("octeontx2t96", octeontx2t96, cortexa57, V8_2A, (CRYPTO, PROFILE), cortexa57, 0x43, 0x0b2, -1) + /* Note OcteonTX2 T93 is an alias to OcteonTX2 T96. */ +-AARCH64_CORE("octeontx2t93", octeontx2t93, cortexa57, V8_2A, AARCH64_FL_CRYPTO | AARCH64_FL_PROFILE, cortexa57, 0x43, 0x0b2, -1) +-AARCH64_CORE("octeontx2f95", octeontx2f95, cortexa57, V8_2A, AARCH64_FL_CRYPTO | AARCH64_FL_PROFILE, cortexa57, 0x43, 0x0b3, -1) +-AARCH64_CORE("octeontx2f95n", octeontx2f95n, cortexa57, V8_2A, AARCH64_FL_CRYPTO | AARCH64_FL_PROFILE, cortexa57, 0x43, 0x0b4, -1) +-AARCH64_CORE("octeontx2f95mm", octeontx2f95mm, cortexa57, V8_2A, AARCH64_FL_CRYPTO | AARCH64_FL_PROFILE, cortexa57, 0x43, 0x0b5, -1) ++AARCH64_CORE("octeontx2t93", octeontx2t93, cortexa57, V8_2A, (CRYPTO, PROFILE), cortexa57, 0x43, 0x0b2, -1) ++AARCH64_CORE("octeontx2f95", octeontx2f95, cortexa57, V8_2A, (CRYPTO, PROFILE), cortexa57, 0x43, 0x0b3, -1) ++AARCH64_CORE("octeontx2f95n", octeontx2f95n, cortexa57, V8_2A, (CRYPTO, PROFILE), cortexa57, 0x43, 0x0b4, -1) ++AARCH64_CORE("octeontx2f95mm", octeontx2f95mm, cortexa57, V8_2A, (CRYPTO, PROFILE), cortexa57, 0x43, 0x0b5, -1) + + /* Fujitsu ('F') cores. */ +-AARCH64_CORE("a64fx", a64fx, a64fx, V8_2A, AARCH64_FL_F16 | AARCH64_FL_SVE, a64fx, 0x46, 0x001, -1) ++AARCH64_CORE("a64fx", a64fx, a64fx, V8_2A, (F16, SVE), a64fx, 0x46, 0x001, -1) + + /* HiSilicon ('H') cores. */ +-AARCH64_CORE("tsv110", tsv110, tsv110, V8_2A, AARCH64_FL_CRYPTO | AARCH64_FL_F16 | AARCH64_FL_AES | AARCH64_FL_SHA2, tsv110, 0x48, 0xd01, -1) ++AARCH64_CORE("tsv110", tsv110, tsv110, V8_2A, (CRYPTO, F16), tsv110, 0x48, 0xd01, -1) + + /* ARMv8.3-A Architecture Processors. */ + + /* Marvell cores (TX3). */ +-AARCH64_CORE("thunderx3t110", thunderx3t110, thunderx3t110, V8_3A, AARCH64_FL_CRYPTO | AARCH64_FL_RCPC | AARCH64_FL_SM4 | AARCH64_FL_SHA3 | AARCH64_FL_F16FML, thunderx3t110, 0x43, 0x0b8, 0x0a) ++AARCH64_CORE("thunderx3t110", thunderx3t110, thunderx3t110, V8_3A, (CRYPTO, RCPC, SM4, SHA3, F16FML), thunderx3t110, 0x43, 0x0b8, 0x0a) + + /* ARMv8.4-A Architecture Processors. */ + + /* Arm ('A') cores. */ +-AARCH64_CORE("zeus", zeus, cortexa57, V8_4A, AARCH64_FL_SVE | AARCH64_FL_RCPC | AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_F16 | AARCH64_FL_PROFILE | AARCH64_FL_SSBS | AARCH64_FL_RNG, neoversev1, 0x41, 0xd40, -1) +-AARCH64_CORE("neoverse-v1", neoversev1, cortexa57, V8_4A, AARCH64_FL_SVE | AARCH64_FL_RCPC | AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_F16 | AARCH64_FL_PROFILE | AARCH64_FL_SSBS | AARCH64_FL_RNG, neoversev1, 0x41, 0xd40, -1) +-AARCH64_CORE("neoverse-512tvb", neoverse512tvb, cortexa57, V8_4A, AARCH64_FL_SVE | AARCH64_FL_RCPC | AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_F16 | AARCH64_FL_PROFILE | AARCH64_FL_SSBS | AARCH64_FL_RNG, neoverse512tvb, INVALID_IMP, INVALID_CORE, -1) ++AARCH64_CORE("zeus", zeus, cortexa57, V8_4A, (SVE, RCPC, I8MM, BF16, PROFILE, SSBS, RNG), neoversev1, 0x41, 0xd40, -1) ++AARCH64_CORE("neoverse-v1", neoversev1, cortexa57, V8_4A, (SVE, RCPC, I8MM, BF16, PROFILE, SSBS, RNG), neoversev1, 0x41, 0xd40, -1) ++AARCH64_CORE("neoverse-512tvb", neoverse512tvb, cortexa57, V8_4A, (SVE, RCPC, I8MM, BF16, PROFILE, SSBS, RNG), neoverse512tvb, INVALID_IMP, INVALID_CORE, -1) + + /* Qualcomm ('Q') cores. */ +-AARCH64_CORE("saphira", saphira, saphira, V8_4A, AARCH64_FL_CRYPTO | AARCH64_FL_RCPC, saphira, 0x51, 0xC01, -1) ++AARCH64_CORE("saphira", saphira, saphira, V8_4A, (CRYPTO, RCPC), saphira, 0x51, 0xC01, -1) + + /* ARMv8-A big.LITTLE implementations. */ + +-AARCH64_CORE("cortex-a57.cortex-a53", cortexa57cortexa53, cortexa53, V8A, AARCH64_FL_CRC, cortexa57, 0x41, AARCH64_BIG_LITTLE (0xd07, 0xd03), -1) +-AARCH64_CORE("cortex-a72.cortex-a53", cortexa72cortexa53, cortexa53, V8A, AARCH64_FL_CRC, cortexa72, 0x41, AARCH64_BIG_LITTLE (0xd08, 0xd03), -1) +-AARCH64_CORE("cortex-a73.cortex-a35", cortexa73cortexa35, cortexa53, V8A, AARCH64_FL_CRC, cortexa73, 0x41, AARCH64_BIG_LITTLE (0xd09, 0xd04), -1) +-AARCH64_CORE("cortex-a73.cortex-a53", cortexa73cortexa53, cortexa53, V8A, AARCH64_FL_CRC, cortexa73, 0x41, AARCH64_BIG_LITTLE (0xd09, 0xd03), -1) ++AARCH64_CORE("cortex-a57.cortex-a53", cortexa57cortexa53, cortexa53, V8A, (CRC), cortexa57, 0x41, AARCH64_BIG_LITTLE (0xd07, 0xd03), -1) ++AARCH64_CORE("cortex-a72.cortex-a53", cortexa72cortexa53, cortexa53, V8A, (CRC), cortexa72, 0x41, AARCH64_BIG_LITTLE (0xd08, 0xd03), -1) ++AARCH64_CORE("cortex-a73.cortex-a35", cortexa73cortexa35, cortexa53, V8A, (CRC), cortexa73, 0x41, AARCH64_BIG_LITTLE (0xd09, 0xd04), -1) ++AARCH64_CORE("cortex-a73.cortex-a53", cortexa73cortexa53, cortexa53, V8A, (CRC), cortexa73, 0x41, AARCH64_BIG_LITTLE (0xd09, 0xd03), -1) + + /* ARM DynamIQ big.LITTLE configurations. */ + +-AARCH64_CORE("cortex-a75.cortex-a55", cortexa75cortexa55, cortexa53, V8_2A, AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD, cortexa73, 0x41, AARCH64_BIG_LITTLE (0xd0a, 0xd05), -1) +-AARCH64_CORE("cortex-a76.cortex-a55", cortexa76cortexa55, cortexa53, V8_2A, AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD, neoversen1, 0x41, AARCH64_BIG_LITTLE (0xd0b, 0xd05), -1) ++AARCH64_CORE("cortex-a75.cortex-a55", cortexa75cortexa55, cortexa53, V8_2A, (F16, RCPC, DOTPROD), cortexa73, 0x41, AARCH64_BIG_LITTLE (0xd0a, 0xd05), -1) ++AARCH64_CORE("cortex-a76.cortex-a55", cortexa76cortexa55, cortexa53, V8_2A, (F16, RCPC, DOTPROD), neoversen1, 0x41, AARCH64_BIG_LITTLE (0xd0b, 0xd05), -1) + + /* Armv8-R Architecture Processors. */ +-AARCH64_CORE("cortex-r82", cortexr82, cortexa53, V8R, 0, cortexa53, 0x41, 0xd15, -1) ++AARCH64_CORE("cortex-r82", cortexr82, cortexa53, V8R, (), cortexa53, 0x41, 0xd15, -1) + + /* Armv9.0-A Architecture Processors. */ + + /* Arm ('A') cores. */ +-AARCH64_CORE("cortex-a510", cortexa510, cortexa55, V9A, AARCH64_FL_SVE2_BITPERM | AARCH64_FL_MEMTAG | AARCH64_FL_I8MM | AARCH64_FL_BF16, cortexa53, 0x41, 0xd46, -1) ++AARCH64_CORE("cortex-a510", cortexa510, cortexa55, V9A, (SVE2_BITPERM, MEMTAG, I8MM, BF16), cortexa53, 0x41, 0xd46, -1) + +-AARCH64_CORE("cortex-a710", cortexa710, cortexa57, V9A, AARCH64_FL_SVE2_BITPERM | AARCH64_FL_MEMTAG | AARCH64_FL_I8MM | AARCH64_FL_BF16, neoversen2, 0x41, 0xd47, -1) ++AARCH64_CORE("cortex-a710", cortexa710, cortexa57, V9A, (SVE2_BITPERM, MEMTAG, I8MM, BF16), neoversen2, 0x41, 0xd47, -1) + +-AARCH64_CORE("cortex-x2", cortexx2, cortexa57, V9A, AARCH64_FL_SVE2_BITPERM | AARCH64_FL_MEMTAG | AARCH64_FL_I8MM | AARCH64_FL_BF16, neoversen2, 0x41, 0xd48, -1) ++AARCH64_CORE("cortex-x2", cortexx2, cortexa57, V9A, (SVE2_BITPERM, MEMTAG, I8MM, BF16), neoversen2, 0x41, 0xd48, -1) + +-AARCH64_CORE("neoverse-n2", neoversen2, cortexa57, V9A, AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_SVE2_BITPERM | AARCH64_FL_RNG | AARCH64_FL_MEMTAG | AARCH64_FL_PROFILE, neoversen2, 0x41, 0xd49, -1) ++AARCH64_CORE("neoverse-n2", neoversen2, cortexa57, V9A, (I8MM, BF16, SVE2_BITPERM, RNG, MEMTAG, PROFILE), neoversen2, 0x41, 0xd49, -1) + +-AARCH64_CORE("demeter", demeter, cortexa57, V9A, AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_SVE2_BITPERM | AARCH64_FL_RNG | AARCH64_FL_MEMTAG | AARCH64_FL_PROFILE, neoversev2, 0x41, 0xd4f, -1) +-AARCH64_CORE("neoverse-v2", neoversev2, cortexa57, V9A, AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_SVE2_BITPERM | AARCH64_FL_RNG | AARCH64_FL_MEMTAG | AARCH64_FL_PROFILE, neoversev2, 0x41, 0xd4f, -1) ++AARCH64_CORE("demeter", demeter, cortexa57, V9A, (I8MM, BF16, SVE2_BITPERM, RNG, MEMTAG, PROFILE), neoversev2, 0x41, 0xd4f, -1) ++AARCH64_CORE("neoverse-v2", neoversev2, cortexa57, V9A, (I8MM, BF16, SVE2_BITPERM, RNG, MEMTAG, PROFILE), neoversev2, 0x41, 0xd4f, -1) + + #undef AARCH64_CORE +diff --git a/gcc/config/aarch64/aarch64-feature-deps.h b/gcc/config/aarch64/aarch64-feature-deps.h +new file mode 100644 +index 000000000..3e33cb2ce +--- /dev/null ++++ b/gcc/config/aarch64/aarch64-feature-deps.h +@@ -0,0 +1,121 @@ ++/* Feature dependency helpers for AArch64. ++ Copyright (C) 2022 Free Software Foundation, Inc. ++ ++ This file is part of GCC. ++ ++ GCC is free software; you can redistribute it and/or modify it ++ under the terms of the GNU General Public License as published by ++ the Free Software Foundation; either version 3, or (at your option) ++ any later version. ++ ++ GCC is distributed in the hope that it will be useful, but ++ WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ General Public License for more details. ++ ++ You should have received a copy of the GNU General Public License ++ along with GCC; see the file COPYING3. If not see ++ <http://www.gnu.org/licenses/>. */ ++ ++#ifndef AARCH64_FEATURE_DEPS_H ++#define AARCH64_FEATURE_DEPS_H 1 ++ ++namespace { ++namespace feature_deps { ++ ++/* Together, these definitions of get_flags take a list of ++ feature names (representing functions that are defined below) ++ and return the set of associated flags. */ ++constexpr aarch64_feature_flags get_flags () { return 0; } ++ ++template<typename T1, typename ...Ts> ++constexpr aarch64_feature_flags ++get_flags (T1 i, Ts... args) ++{ ++ return i ().flag | get_flags (args...); ++} ++ ++/* Like get_flags, but return the transitive closure of those features ++ and the ones that they rely on. */ ++constexpr aarch64_feature_flags get_enable () { return 0; } ++ ++template<typename T1, typename ...Ts> ++constexpr aarch64_feature_flags ++get_enable (T1 i, Ts... args) ++{ ++ return i ().enable | get_enable (args...); ++} ++ ++/* Define info<FEATURE> such that it has the following static constant ++ variables: ++ ++ - flag: the aarch64_feature_flags bit associated with FEATURE ++ ++ - enable: the transitive closure of the features that FEATURE requires, ++ plus FLAG itself ++ ++ - explicit_on: the transitive closure of the features that an ++ explicit +FEATURE enables, including FLAG itself. This is ++ always a superset of ENABLE ++ ++ Also define a function FEATURE () that returns an info<FEATURE> ++ (which is an empty structure, since all members are static). ++ ++ Building up the list feature-by-feature ensures that the definition ++ files are in topological order. */ ++template<aarch64_feature> struct info; ++ ++#define HANDLE(IDENT, REQUIRES, EXPLICIT_ON) \ ++ template<> struct info<aarch64_feature::IDENT> { \ ++ static constexpr auto flag = AARCH64_FL_##IDENT; \ ++ static constexpr auto enable = flag | get_enable REQUIRES; \ ++ static constexpr auto explicit_on = enable | get_enable EXPLICIT_ON; \ ++ }; \ ++ constexpr info<aarch64_feature::IDENT> IDENT () \ ++ { \ ++ return info<aarch64_feature::IDENT> (); \ ++ } ++#define AARCH64_OPT_EXTENSION(A, IDENT, REQUIRES, EXPLICIT_ON, E, F) \ ++ HANDLE (IDENT, REQUIRES, EXPLICIT_ON) ++#define AARCH64_ARCH(A, B, IDENT, D, REQUIRES) HANDLE (IDENT, REQUIRES, ()) ++#include "config/aarch64/aarch64-option-extensions.def" ++#include "config/aarch64/aarch64-arches.def" ++#undef HANDLE ++ ++/* Return the set of all features that would need to be disabled if ++ the features in MASK are disabled. ++ ++ Note that the size of the expression varies linearly with the number ++ of features, which means that invoking this function once per feature ++ is quadratic in the number of features. However, collecting the same ++ information at compiler start-up is likely to be quadratic too, so ++ we're better off paying the cost once per compiler build rather than ++ once per compiler run. */ ++constexpr aarch64_feature_flags ++get_flags_off (aarch64_feature_flags mask) ++{ ++ return (0 ++#define AARCH64_OPT_EXTENSION(A, IDENT, C, D, E, F) \ ++ | (feature_deps::IDENT ().enable & mask ? AARCH64_FL_##IDENT : 0) ++#include "config/aarch64/aarch64-option-extensions.def" ++ ); ++} ++ ++/* Define root_off_<IDENT> variables for each feature, giving the set of ++ features that must be turned off by +noIDENT. This set is not transitively ++ closed; use get_flags_off to complete the closure. */ ++#define AARCH64_OPT_EXTENSION(A, IDENT, C, D, EXPLICIT_OFF, F) \ ++ constexpr auto root_off_##IDENT \ ++ = AARCH64_FL_##IDENT | get_flags EXPLICIT_OFF; ++#include "config/aarch64/aarch64-option-extensions.def" ++ ++/* Define cpu_<NAME> variables for each CPU, giving the transitive ++ closure of all the features that the CPU supports. */ ++#define AARCH64_CORE(A, CORE_IDENT, C, ARCH_IDENT, FEATURES, F, G, H, I) \ ++ constexpr auto cpu_##CORE_IDENT = ARCH_IDENT ().enable | get_enable FEATURES; ++#include "config/aarch64/aarch64-cores.def" ++ ++} ++} ++ ++#endif +diff --git a/gcc/config/aarch64/aarch64-option-extensions.def b/gcc/config/aarch64/aarch64-option-extensions.def +index df2c8d19b..bdf4baf30 100644 +--- a/gcc/config/aarch64/aarch64-option-extensions.def ++++ b/gcc/config/aarch64/aarch64-option-extensions.def +@@ -21,23 +21,34 @@ + + Before using #include to read this file, define a macro: + +- AARCH64_OPT_EXTENSION(EXT_NAME, FLAG_CANONICAL, FLAGS_ON, FLAGS_OFF, +- SYNTHETIC, FEATURE_STRING) +- +- - EXT_NAME is the name of the extension, represented as a string constant. +- - FLAGS_CANONICAL is the canonical internal name for this flag. +- - FLAGS_ON are the bitwise-or of the features that enabling the extension +- adds, or zero if enabling this extension has no effect on other features. +- - FLAGS_OFF are the bitwise-or of the features that disabling the extension +- removes, or zero if disabling this extension has no effect on other +- features. +- - SYNTHETIC is a boolean to indicate whether the option is a purely synthetic +- grouping of options and that the option itself has no feature bit (e.g. +- crypto). This is used to determine when sum of the individual options in +- FLAGS_ON can be replaced by FLAG_CANONICAL in options minimization. If the +- group is synthetic then they can be replaced when all options in FLAGS_ON +- are enabled, otherwise they can only be replaced when +- FLAGS_ON | FLAG_CANONICAL are enabled. ++ AARCH64_OPT_EXTENSION(NAME, IDENT, REQUIRES, EXPLICIT_ON, ++ EXPLICIT_OFF, FEATURE_STRING) ++ ++ - NAME is the name of the extension, represented as a string constant. ++ ++ - IDENT is the canonical internal name for this flag. ++ ++ - REQUIRES is a list of features that must be enabled whenever this ++ feature is enabled. The relationship is implicitly transitive: ++ if A appears in B's REQUIRES and B appears in C's REQUIRES then ++ A and B must be enabled whenever C is. Thus, turning on C also ++ turns on A and B, while turning off A or B also turns off C. ++ ++ - EXPLICIT_ON is a list of features that are enabled by an explicit ++ +NAME specification, in addition to those listed in REQUIRES. ++ Usually this is an empty list; comments below explain the exceptions. ++ The list is implicitly transitively closed wrt REQUIRES (but *not* ++ to EXPLICIT_ON, since NAME is the only thing explicit in +NAME). ++ Thus if A is in B's REQUIRES and B is in C's EXPLICIT_ON, +C will ++ enable both B and A. B's EXPLICIT_ON has no effect on +C. ++ ++ - EXPLICIT_OFF is a list of features that are disabled by an explicit ++ +noNAME specification, in addition to the features that are transitively ++ dependent on NAME (according to REQUIRES). As with EXPLICIT_ON, ++ this is usually an empty list; comments below explain the exceptions. ++ If a feature A appears in this list then the list implicitly includes ++ any features that are transitively dependent on A (according to REQUIRES). ++ + - FEAT_STRING is a string containing the entries in the 'Features' field of + /proc/cpuinfo on a GNU/Linux system that correspond to this architecture + extension being available. Sometimes multiple entries are needed to enable +@@ -47,197 +58,95 @@ + that are required. Their order is not important. An empty string means + do not detect this feature during auto detection. + +- NOTE: Any changes to the AARCH64_OPT_EXTENSION macro need to be mirrored in +- config.gcc. */ +- +-/* Enabling "fp" just enables "fp". +- Disabling "fp" also disables "simd", "crypto", "fp16", "aes", "sha2", +- "sha3", sm3/sm4, "sve", "sve2", "sve2-aes", "sve2-sha3", "sve2-sm4", +- "sve2-bitperm", "i8mm", "f32mm", "f64mm", and "bf16". */ +-AARCH64_OPT_EXTENSION("fp", AARCH64_FL_FP, 0, AARCH64_FL_SIMD | \ +- AARCH64_FL_CRYPTO | AARCH64_FL_F16 | AARCH64_FL_AES | \ +- AARCH64_FL_SHA2 | AARCH64_FL_SHA3 | AARCH64_FL_SM4 | \ +- AARCH64_FL_SVE | AARCH64_FL_SVE2 | AARCH64_FL_SVE2_AES | \ +- AARCH64_FL_SVE2_SHA3 | AARCH64_FL_SVE2_SM4 | \ +- AARCH64_FL_SVE2_BITPERM | AARCH64_FL_I8MM | \ +- AARCH64_FL_F32MM | AARCH64_FL_F64MM | AARCH64_FL_BF16, +- false, "fp") +- +-/* Enabling "simd" also enables "fp". +- Disabling "simd" also disables "crypto", "dotprod", "aes", "sha2", "sha3", +- "sm3/sm4", "sve", "sve2", "sve2-aes", "sve2-sha3", "sve2-sm4", +- "sve2-bitperm", "i8mm", "f32mm" and "f64mm". */ +-AARCH64_OPT_EXTENSION("simd", AARCH64_FL_SIMD, AARCH64_FL_FP, \ +- AARCH64_FL_CRYPTO | AARCH64_FL_DOTPROD | \ +- AARCH64_FL_AES | AARCH64_FL_SHA2 | AARCH64_FL_SHA3 | \ +- AARCH64_FL_SM4 | AARCH64_FL_SVE | AARCH64_FL_SVE2 | \ +- AARCH64_FL_SVE2_AES | AARCH64_FL_SVE2_SHA3 | \ +- AARCH64_FL_SVE2_SM4 | AARCH64_FL_SVE2_BITPERM | \ +- AARCH64_FL_I8MM | AARCH64_FL_F32MM | AARCH64_FL_F64MM, \ +- false, "asimd") +- +-/* Enabling or disabling "crc" only changes "crc". */ +-AARCH64_OPT_EXTENSION("crc", AARCH64_FL_CRC, 0, 0, false, "crc32") +- +-/* Enabling or disabling "lse" only changes "lse". */ +-AARCH64_OPT_EXTENSION("lse", AARCH64_FL_LSE, 0, 0, false, "atomics") +- +-/* Enabling "fp16" also enables "fp". +- Disabling "fp16" disables "fp16", "fp16fml", "sve", "sve2", +- "sve2-aes", "sve2-sha3", "sve2-sm4", "sve2-bitperm", "f32mm" and +- "f64mm". */ +-AARCH64_OPT_EXTENSION("fp16", AARCH64_FL_F16, AARCH64_FL_FP, \ +- AARCH64_FL_F16FML | AARCH64_FL_SVE | AARCH64_FL_F32MM | \ +- AARCH64_FL_F64MM | AARCH64_FL_SVE2 | \ +- AARCH64_FL_SVE2_AES | AARCH64_FL_SVE2_SHA3 | \ +- AARCH64_FL_SVE2_SM4 | AARCH64_FL_SVE2_BITPERM, false, \ +- "fphp asimdhp") +- +-/* Enabling or disabling "rcpc" only changes "rcpc". */ +-AARCH64_OPT_EXTENSION("rcpc", AARCH64_FL_RCPC, 0, 0, false, "lrcpc") +- +-/* Enabling "rdma" also enables "fp", "simd". +- Disabling "rdma" just disables "rdma". */ +-AARCH64_OPT_EXTENSION("rdma", AARCH64_FL_RDMA, \ +- AARCH64_FL_FP | AARCH64_FL_SIMD, 0, false, "asimdrdm") +- +-/* Enabling "dotprod" also enables "simd". +- Disabling "dotprod" only disables "dotprod". */ +-AARCH64_OPT_EXTENSION("dotprod", AARCH64_FL_DOTPROD, AARCH64_FL_FPSIMD, 0, \ +- false, "asimddp") +- +-/* Enabling "aes" also enables "simd". +- Disabling "aes" disables "aes" and "sve2-aes'. */ +-AARCH64_OPT_EXTENSION("aes", AARCH64_FL_AES, AARCH64_FL_FPSIMD, \ +- AARCH64_FL_SVE2_AES | AARCH64_FL_CRYPTO, false, "aes") +- +-/* Enabling "sha2" also enables "simd". +- Disabling "sha2" just disables "sha2". */ +-AARCH64_OPT_EXTENSION("sha2", AARCH64_FL_SHA2, AARCH64_FL_FPSIMD, \ +- AARCH64_FL_CRYPTO | AARCH64_FL_SHA3 | \ +- AARCH64_FL_SVE2_SHA3, false, "sha1 sha2") +- +-/* Enabling "crypto" also enables "fp", "simd", "aes" and "sha2". +- Disabling "crypto" disables "crypto", "aes", "sha2", "sha3" and "sm3/sm4", +- "sve2-aes", "sve2-sha3", "sve2-sm4". */ +-AARCH64_OPT_EXTENSION("crypto", AARCH64_FL_CRYPTO, AARCH64_FL_FP | \ +- AARCH64_FL_SIMD | AARCH64_FL_AES | AARCH64_FL_SHA2, \ +- AARCH64_FL_AES | AARCH64_FL_SHA2 | AARCH64_FL_SHA3 | \ +- AARCH64_FL_SM4 | AARCH64_FL_SVE2_AES | \ +- AARCH64_FL_SVE2_SHA3 | AARCH64_FL_SVE2_SM4, true, \ ++ The list of features must follow topological order wrt REQUIRES ++ and EXPLICIT_ON. For example, if A is in B's REQUIRES list, A must ++ come before B. This is enforced by aarch64-feature-deps.h. ++ ++ NOTE: Any changes to the AARCH64_OPT_EXTENSION macro need to be mirrored in ++ config.gcc. */ ++ ++AARCH64_OPT_EXTENSION("fp", FP, (), (), (), "fp") ++ ++AARCH64_OPT_EXTENSION("simd", SIMD, (FP), (), (), "asimd") ++ ++AARCH64_OPT_EXTENSION("crc", CRC, (), (), (), "crc32") ++ ++AARCH64_OPT_EXTENSION("lse", LSE, (), (), (), "atomics") ++ ++/* +nofp16 disables an implicit F16FML, even though an implicit F16FML ++ does not imply F16. See F16FML for more details. */ ++AARCH64_OPT_EXTENSION("fp16", F16, (FP), (), (F16FML), "fphp asimdhp") ++ ++AARCH64_OPT_EXTENSION("rcpc", RCPC, (), (), (), "lrcpc") ++ ++/* An explicit +rdma implies +simd, but +rdma+nosimd still enables scalar ++ RDMA instructions. */ ++AARCH64_OPT_EXTENSION("rdma", RDMA, (), (SIMD), (), "asimdrdm") ++ ++AARCH64_OPT_EXTENSION("dotprod", DOTPROD, (SIMD), (), (), "asimddp") ++ ++AARCH64_OPT_EXTENSION("aes", AES, (SIMD), (), (), "aes") ++ ++AARCH64_OPT_EXTENSION("sha2", SHA2, (SIMD), (), (), "sha1 sha2") ++ ++/* +nocrypto disables AES, SHA2 and SM4, and anything that depends on them ++ (such as SHA3 and the SVE2 crypto extensions). */ ++AARCH64_OPT_EXTENSION("crypto", CRYPTO, (AES, SHA2), (), (AES, SHA2, SM4), + "aes pmull sha1 sha2") + +-/* Enabling "sha3" enables "simd" and "sha2". +- Disabling "sha3" disables "sha3" and "sve2-sha3". */ +-AARCH64_OPT_EXTENSION("sha3", AARCH64_FL_SHA3, AARCH64_FL_FPSIMD | \ +- AARCH64_FL_SHA2, AARCH64_FL_SVE2_SHA3, false, \ +- "sha3 sha512") +- +-/* Enabling "sm4" also enables "simd". +- Disabling "sm4" disables "sm4" and "sve2-sm4". */ +-AARCH64_OPT_EXTENSION("sm4", AARCH64_FL_SM4, AARCH64_FL_FPSIMD, \ +- AARCH64_FL_SVE2_SM4, false, "sm3 sm4") +- +-/* Enabling "fp16fml" also enables "fp" and "fp16". +- Disabling "fp16fml" just disables "fp16fml". */ +-AARCH64_OPT_EXTENSION("fp16fml", AARCH64_FL_F16FML, \ +- AARCH64_FL_FP | AARCH64_FL_F16, 0, false, "asimdfhm") +- +-/* Enabling "sve" also enables "fp16", "fp" and "simd". +- Disabling "sve" disables "sve", "f32mm", "f64mm", "sve2", "sve2-aes", +- "sve2-sha3", "sve2-sm4" and "sve2-bitperm". */ +-AARCH64_OPT_EXTENSION("sve", AARCH64_FL_SVE, AARCH64_FL_FP | AARCH64_FL_SIMD | \ +- AARCH64_FL_F16, AARCH64_FL_F32MM | AARCH64_FL_F64MM | \ +- AARCH64_FL_SVE2 | AARCH64_FL_SVE2_AES | \ +- AARCH64_FL_SVE2_SHA3 | AARCH64_FL_SVE2_SM4 | \ +- AARCH64_FL_SVE2_BITPERM, false, "sve") +- +-/* Enabling/Disabling "profile" does not enable/disable any other feature. */ +-AARCH64_OPT_EXTENSION("profile", AARCH64_FL_PROFILE, 0, 0, false, "") +- +-/* Enabling/Disabling "rng" only changes "rng". */ +-AARCH64_OPT_EXTENSION("rng", AARCH64_FL_RNG, 0, 0, false, "rng") +- +-/* Enabling/Disabling "memtag" only changes "memtag". */ +-AARCH64_OPT_EXTENSION("memtag", AARCH64_FL_MEMTAG, 0, 0, false, "") +- +-/* Enabling/Disabling "sb" only changes "sb". */ +-AARCH64_OPT_EXTENSION("sb", AARCH64_FL_SB, 0, 0, false, "sb") +- +-/* Enabling/Disabling "ssbs" only changes "ssbs". */ +-AARCH64_OPT_EXTENSION("ssbs", AARCH64_FL_SSBS, 0, 0, false, "ssbs") +- +-/* Enabling/Disabling "predres" only changes "predres". */ +-AARCH64_OPT_EXTENSION("predres", AARCH64_FL_PREDRES, 0, 0, false, "") +- +-/* Enabling "sve2" also enables "sve", "fp16", "fp", and "simd". +- Disabling "sve2" disables "sve2", "sve2-aes", "sve2-sha3", "sve2-sm4", and +- "sve2-bitperm". */ +-AARCH64_OPT_EXTENSION("sve2", AARCH64_FL_SVE2, AARCH64_FL_SVE | \ +- AARCH64_FL_FP | AARCH64_FL_SIMD | AARCH64_FL_F16, \ +- AARCH64_FL_SVE2_AES | AARCH64_FL_SVE2_SHA3 | \ +- AARCH64_FL_SVE2_SM4 | AARCH64_FL_SVE2_BITPERM, false, "sve2") +- +-/* Enabling "sve2-sm4" also enables "sm4", "simd", "fp16", "fp", "sve", and +- "sve2". Disabling "sve2-sm4" just disables "sve2-sm4". */ +-AARCH64_OPT_EXTENSION("sve2-sm4", AARCH64_FL_SVE2_SM4, AARCH64_FL_SM4 | \ +- AARCH64_FL_SIMD | AARCH64_FL_F16 | AARCH64_FL_FP | \ +- AARCH64_FL_SVE | AARCH64_FL_SVE2, 0, false, "svesm4") +- +-/* Enabling "sve2-aes" also enables "aes", "simd", "fp16", "fp", "sve", and +- "sve2". Disabling "sve2-aes" just disables "sve2-aes". */ +-AARCH64_OPT_EXTENSION("sve2-aes", AARCH64_FL_SVE2_AES, AARCH64_FL_AES | \ +- AARCH64_FL_SIMD | AARCH64_FL_F16 | AARCH64_FL_FP | \ +- AARCH64_FL_SVE | AARCH64_FL_SVE2, 0, false, "sveaes") +- +-/* Enabling "sve2-sha3" also enables "sha3", "simd", "fp16", "fp", "sve", and +- "sve2". Disabling "sve2-sha3" just disables "sve2-sha3". */ +-AARCH64_OPT_EXTENSION("sve2-sha3", AARCH64_FL_SVE2_SHA3, AARCH64_FL_SHA3 | \ +- AARCH64_FL_SHA2 | \ +- AARCH64_FL_SIMD | AARCH64_FL_F16 | AARCH64_FL_FP | \ +- AARCH64_FL_SVE | AARCH64_FL_SVE2, 0, false, "svesha3") +- +-/* Enabling "sve2-bitperm" also enables "simd", "fp16", "fp", "sve", and +- "sve2". Disabling "sve2-bitperm" just disables "sve2-bitperm". */ +-AARCH64_OPT_EXTENSION("sve2-bitperm", AARCH64_FL_SVE2_BITPERM, AARCH64_FL_SIMD | \ +- AARCH64_FL_F16 | AARCH64_FL_FP | AARCH64_FL_SVE | \ +- AARCH64_FL_SVE2, 0, false, "svebitperm") +- +-/* Enabling or disabling "tme" only changes "tme". */ +-AARCH64_OPT_EXTENSION("tme", AARCH64_FL_TME, 0, 0, false, "") +- +-/* Enabling "i8mm" also enables "simd" and "fp". +- Disabling "i8mm" only disables "i8mm". */ +-AARCH64_OPT_EXTENSION("i8mm", AARCH64_FL_I8MM, \ +- AARCH64_FL_SIMD | AARCH64_FL_FP, 0, false, "i8mm") +- +-/* Enabling "f32mm" also enables "sve", "fp16", "fp", and "simd". +- Disabling "f32mm" only disables "f32mm". */ +-AARCH64_OPT_EXTENSION("f32mm", AARCH64_FL_F32MM, \ +- AARCH64_FL_SVE | AARCH64_FL_F16 | AARCH64_FL_FP | \ +- AARCH64_FL_SIMD, 0, false, "f32mm") +- +-/* Enabling "f64mm" also enables "sve", "fp16", "fp", and "simd". +- Disabling "f64mm" only disables "f64mm". */ +-AARCH64_OPT_EXTENSION("f64mm", AARCH64_FL_F64MM, \ +- AARCH64_FL_SVE | AARCH64_FL_F16 | AARCH64_FL_FP | \ +- AARCH64_FL_SIMD, 0, false, "f64mm") +- +-/* Enabling "bf16" also enables "simd" and "fp". +- Disabling "bf16" only disables "bf16". */ +-AARCH64_OPT_EXTENSION("bf16", AARCH64_FL_BF16, \ +- AARCH64_FL_SIMD | AARCH64_FL_FP, 0, false, "bf16") +- +-/* Enabling/Disabling "flagm" only changes "flagm". */ +-AARCH64_OPT_EXTENSION("flagm", AARCH64_FL_FLAGM, 0, 0, false, "flagm") +- +-/* Enabling/Disabling "pauth" only changes "pauth". */ +-AARCH64_OPT_EXTENSION("pauth", AARCH64_FL_PAUTH, 0, 0, false, "paca pacg") +- +-/* Enabling/Disabling "ls64" only changes "ls64". */ +-AARCH64_OPT_EXTENSION("ls64", AARCH64_FL_LS64, 0, 0, false, "") +- +-/* Enabling/disabling "mops" only changes "mops". */ +-AARCH64_OPT_EXTENSION("mops", AARCH64_FL_MOPS, 0, 0, false, "") ++AARCH64_OPT_EXTENSION("sha3", SHA3, (SHA2), (), (), "sha3 sha512") ++ ++AARCH64_OPT_EXTENSION("sm4", SM4, (SIMD), (), (), "sm3 sm4") ++ ++/* An explicit +fp16fml implies +fp16, but a dependence on it does not. ++ Thus -march=armv8.4-a implies F16FML but not F16. -march=armv8.4-a+fp16 ++ and -march=armv8.4-a+fp16fml are equivalent and enable both F16FML and F16. ++ -march=armv8.4-a+nofp16+fp16 enables F16 but not F16FML. */ ++AARCH64_OPT_EXTENSION("fp16fml", F16FML, (), (F16), (), "asimdfhm") ++ ++AARCH64_OPT_EXTENSION("sve", SVE, (SIMD, F16), (), (), "sve") ++ ++AARCH64_OPT_EXTENSION("profile", PROFILE, (), (), (), "") ++ ++AARCH64_OPT_EXTENSION("rng", RNG, (), (), (), "rng") ++ ++AARCH64_OPT_EXTENSION("memtag", MEMTAG, (), (), (), "") ++ ++AARCH64_OPT_EXTENSION("sb", SB, (), (), (), "sb") ++ ++AARCH64_OPT_EXTENSION("ssbs", SSBS, (), (), (), "ssbs") ++ ++AARCH64_OPT_EXTENSION("predres", PREDRES, (), (), (), "") ++ ++AARCH64_OPT_EXTENSION("sve2", SVE2, (SVE), (), (), "sve2") ++ ++AARCH64_OPT_EXTENSION("sve2-sm4", SVE2_SM4, (SVE2, SM4), (), (), "svesm4") ++ ++AARCH64_OPT_EXTENSION("sve2-aes", SVE2_AES, (SVE2, AES), (), (), "sveaes") ++ ++AARCH64_OPT_EXTENSION("sve2-sha3", SVE2_SHA3, (SVE2, SHA3), (), (), "svesha3") ++ ++AARCH64_OPT_EXTENSION("sve2-bitperm", SVE2_BITPERM, (SVE2), (), (), ++ "svebitperm") ++ ++AARCH64_OPT_EXTENSION("tme", TME, (), (), (), "") ++ ++AARCH64_OPT_EXTENSION("i8mm", I8MM, (SIMD), (), (), "i8mm") ++ ++AARCH64_OPT_EXTENSION("f32mm", F32MM, (SVE), (), (), "f32mm") ++ ++AARCH64_OPT_EXTENSION("f64mm", F64MM, (SVE), (), (), "f64mm") ++ ++/* An explicit +bf16 implies +simd, but +bf16+nosimd still enables scalar BF16 ++ instructions. */ ++AARCH64_OPT_EXTENSION("bf16", BF16, (FP), (SIMD), (), "bf16") ++ ++AARCH64_OPT_EXTENSION("flagm", FLAGM, (), (), (), "flagm") ++ ++AARCH64_OPT_EXTENSION("pauth", PAUTH, (), (), (), "paca pacg") ++ ++AARCH64_OPT_EXTENSION("ls64", LS64, (), (), (), "") ++ ++AARCH64_OPT_EXTENSION("mops", MOPS, (), (), (), "") + + #undef AARCH64_OPT_EXTENSION +diff --git a/gcc/config/aarch64/aarch64-opts.h b/gcc/config/aarch64/aarch64-opts.h +index 421648a15..ba23c90c4 100644 +--- a/gcc/config/aarch64/aarch64-opts.h ++++ b/gcc/config/aarch64/aarch64-opts.h +@@ -22,6 +22,10 @@ + #ifndef GCC_AARCH64_OPTS_H + #define GCC_AARCH64_OPTS_H + ++#ifndef USED_FOR_TARGET ++typedef uint64_t aarch64_feature_flags; ++#endif ++ + /* The various cores that implement AArch64. */ + enum aarch64_processor + { +diff --git a/gcc/config/aarch64/aarch64-sve-builtins.cc b/gcc/config/aarch64/aarch64-sve-builtins.cc +index 12d9beee4..c06e99339 100644 +--- a/gcc/config/aarch64/aarch64-sve-builtins.cc ++++ b/gcc/config/aarch64/aarch64-sve-builtins.cc +@@ -701,9 +701,8 @@ check_required_extensions (location_t location, tree fndecl, + return check_required_registers (location, fndecl); + + static const struct { uint64_t flag; const char *name; } extensions = { +-#define AARCH64_OPT_EXTENSION(EXT_NAME, FLAG_CANONICAL, FLAGS_ON, FLAGS_OFF, \ +- SYNTHETIC, FEATURE_STRING) \ +- { FLAG_CANONICAL, EXT_NAME }, ++#define AARCH64_OPT_EXTENSION(EXT_NAME, IDENT, C, D, E, F) \ ++ { AARCH64_FL_##IDENT, EXT_NAME }, + #include "aarch64-option-extensions.def" + }; + +diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc +index f975aad07..1363873b1 100644 +--- a/gcc/config/aarch64/aarch64.cc ++++ b/gcc/config/aarch64/aarch64.cc +@@ -81,6 +81,7 @@ + #include "rtlanal.h" + #include "tree-dfa.h" + #include "asan.h" ++#include "aarch64-feature-deps.h" + + /* This file should be included last. */ + #include "target-def.h" +@@ -2935,8 +2936,9 @@ struct processor + /* Architectures implementing AArch64. */ + static const struct processor all_architectures = + { +-#define AARCH64_ARCH(NAME, CORE, ARCH_IDENT, ARCH_REV, FLAGS) \ +- {NAME, CORE, CORE, AARCH64_ARCH_##ARCH_IDENT, FLAGS, NULL}, ++#define AARCH64_ARCH(NAME, CORE, ARCH_IDENT, D, E) \ ++ {NAME, CORE, CORE, AARCH64_ARCH_##ARCH_IDENT, \ ++ feature_deps::ARCH_IDENT ().enable, NULL}, + #include "aarch64-arches.def" + {NULL, aarch64_none, aarch64_none, aarch64_no_arch, 0, NULL} + }; +@@ -2944,12 +2946,12 @@ static const struct processor all_architectures = + /* Processor cores implementing AArch64. */ + static const struct processor all_cores = + { +-#define AARCH64_CORE(NAME, IDENT, SCHED, ARCH, FLAGS, COSTS, IMP, PART, VARIANT) \ +- {NAME, IDENT, SCHED, AARCH64_ARCH_##ARCH, \ +- AARCH64_FL_FOR_##ARCH | FLAGS, &COSTS##_tunings}, ++#define AARCH64_CORE(NAME, IDENT, SCHED, ARCH, E, COSTS, G, H, I) \ ++ {NAME, IDENT, SCHED, AARCH64_ARCH_##ARCH, \ ++ feature_deps::cpu_##IDENT, &COSTS##_tunings}, + #include "aarch64-cores.def" + {"generic", generic, cortexa53, AARCH64_ARCH_V8A, +- AARCH64_FL_FOR_V8A, &generic_tunings}, ++ feature_deps::V8A ().enable, &generic_tunings}, + {NULL, aarch64_none, aarch64_none, aarch64_no_arch, 0, NULL} + }; + +diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h +index f4e0cd148..50a2ef444 100644 +--- a/gcc/config/aarch64/aarch64.h ++++ b/gcc/config/aarch64/aarch64.h +@@ -144,149 +144,27 @@ + + #define PCC_BITFIELD_TYPE_MATTERS 1 + +-/* Instruction tuning/selection flags. */ +- +-/* Bit values used to identify processor capabilities. */ +-#define AARCH64_FL_SIMD (1 << 0) /* Has SIMD instructions. */ +-#define AARCH64_FL_FP (1 << 1) /* Has FP. */ +-#define AARCH64_FL_CRYPTO (1 << 2) /* Has crypto. */ +-#define AARCH64_FL_CRC (1 << 3) /* Has CRC. */ +-/* ARMv8.1-A architecture extensions. */ +-#define AARCH64_FL_LSE (1 << 4) /* Has Large System Extensions. */ +-#define AARCH64_FL_RDMA (1 << 5) /* Has Round Double Multiply Add. */ +-#define AARCH64_FL_V8_1A (1 << 6) /* Has ARMv8.1-A extensions. */ +-/* Armv8-R. */ +-#define AARCH64_FL_V8R (1 << 7) /* Armv8-R AArch64. */ +-/* ARMv8.2-A architecture extensions. */ +-#define AARCH64_FL_V8_2A (1 << 8) /* Has ARMv8.2-A features. */ +-#define AARCH64_FL_F16 (1 << 9) /* Has ARMv8.2-A FP16 extensions. */ +-#define AARCH64_FL_SVE (1 << 10) /* Has Scalable Vector Extensions. */ +-/* ARMv8.3-A architecture extensions. */ +-#define AARCH64_FL_V8_3A (1 << 11) /* Has ARMv8.3-A features. */ +-#define AARCH64_FL_RCPC (1 << 12) /* Has support for RCpc model. */ +-#define AARCH64_FL_DOTPROD (1 << 13) /* Has ARMv8.2-A Dot Product ins. */ +-/* New flags to split crypto into aes and sha2. */ +-#define AARCH64_FL_AES (1 << 14) /* Has Crypto AES. */ +-#define AARCH64_FL_SHA2 (1 << 15) /* Has Crypto SHA2. */ +-/* ARMv8.4-A architecture extensions. */ +-#define AARCH64_FL_V8_4A (1 << 16) /* Has ARMv8.4-A features. */ +-#define AARCH64_FL_SM4 (1 << 17) /* Has ARMv8.4-A SM3 and SM4. */ +-#define AARCH64_FL_SHA3 (1 << 18) /* Has ARMv8.4-a SHA3 and SHA512. */ +-#define AARCH64_FL_F16FML (1 << 19) /* Has ARMv8.4-a FP16 extensions. */ +- +-/* Statistical Profiling extensions. */ +-#define AARCH64_FL_PROFILE (1 << 21) +- +-/* ARMv8.5-A architecture extensions. */ +-#define AARCH64_FL_V8_5A (1 << 22) /* Has ARMv8.5-A features. */ +-#define AARCH64_FL_RNG (1 << 23) /* ARMv8.5-A Random Number Insns. */ +-#define AARCH64_FL_MEMTAG (1 << 24) /* ARMv8.5-A Memory Tagging +- Extensions. */ +- +-/* Speculation Barrier instruction supported. */ +-#define AARCH64_FL_SB (1 << 25) +- +-/* Speculative Store Bypass Safe instruction supported. */ +-#define AARCH64_FL_SSBS (1 << 26) +- +-/* Execution and Data Prediction Restriction instructions supported. */ +-#define AARCH64_FL_PREDRES (1 << 27) +- +-/* SVE2 instruction supported. */ +-#define AARCH64_FL_SVE2 (1 << 28) +-#define AARCH64_FL_SVE2_AES (1 << 29) +-#define AARCH64_FL_SVE2_SM4 (1 << 30) +-#define AARCH64_FL_SVE2_SHA3 (1ULL << 31) +-#define AARCH64_FL_SVE2_BITPERM (1ULL << 32) +- +-/* Transactional Memory Extension. */ +-#define AARCH64_FL_TME (1ULL << 33) /* Has TME instructions. */ +- +-/* Armv8.6-A architecture extensions. */ +-#define AARCH64_FL_V8_6A (1ULL << 34) +- +-/* 8-bit Integer Matrix Multiply (I8MM) extensions. */ +-#define AARCH64_FL_I8MM (1ULL << 35) +- +-/* Brain half-precision floating-point (BFloat16) Extension. */ +-#define AARCH64_FL_BF16 (1ULL << 36) +- +-/* 32-bit Floating-point Matrix Multiply (F32MM) extensions. */ +-#define AARCH64_FL_F32MM (1ULL << 37) +- +-/* 64-bit Floating-point Matrix Multiply (F64MM) extensions. */ +-#define AARCH64_FL_F64MM (1ULL << 38) +- +-/* Flag Manipulation Instructions (FLAGM) extension. */ +-#define AARCH64_FL_FLAGM (1ULL << 39) +- +-/* Pointer Authentication (PAUTH) extension. */ +-#define AARCH64_FL_PAUTH (1ULL << 40) +- +-/* Armv9.0-A. */ +-#define AARCH64_FL_V9A (1ULL << 41) /* Armv9.0-A Architecture. */ +- +-/* 64-byte atomic load/store extensions. */ +-#define AARCH64_FL_LS64 (1ULL << 42) +- +-/* Armv8.7-a architecture extensions. */ +-#define AARCH64_FL_V8_7A (1ULL << 43) +- +-/* Hardware memory operation instructions. */ +-#define AARCH64_FL_MOPS (1ULL << 44) +- +-/* Armv8.8-a architecture extensions. */ +-#define AARCH64_FL_V8_8A (1ULL << 45) +- +-/* Armv9.1-A. */ +-#define AARCH64_FL_V9_1A (1ULL << 46) +- +-/* Armv9.2-A. */ +-#define AARCH64_FL_V9_2A (1ULL << 47) +- +-/* Armv9.3-A. */ +-#define AARCH64_FL_V9_3A (1ULL << 48) +- +-/* Has FP and SIMD. */ +-#define AARCH64_FL_FPSIMD (AARCH64_FL_FP | AARCH64_FL_SIMD) +- +-/* Has FP without SIMD. */ +-#define AARCH64_FL_FPQ16 (AARCH64_FL_FP & ~AARCH64_FL_SIMD) +- +-/* Architecture flags that effect instruction selection. */ +-#define AARCH64_FL_FOR_V8A (AARCH64_FL_FPSIMD) +-#define AARCH64_FL_FOR_V8_1A \ +- (AARCH64_FL_FOR_V8A | AARCH64_FL_LSE | AARCH64_FL_CRC \ +- | AARCH64_FL_RDMA | AARCH64_FL_V8_1A) +-#define AARCH64_FL_FOR_V8_2A \ +- (AARCH64_FL_FOR_V8_1A | AARCH64_FL_V8_2A) +-#define AARCH64_FL_FOR_V8_3A \ +- (AARCH64_FL_FOR_V8_2A | AARCH64_FL_V8_3A | AARCH64_FL_PAUTH) +-#define AARCH64_FL_FOR_V8_4A \ +- (AARCH64_FL_FOR_V8_3A | AARCH64_FL_V8_4A | AARCH64_FL_F16FML \ +- | AARCH64_FL_DOTPROD | AARCH64_FL_FLAGM) +-#define AARCH64_FL_FOR_V8_5A \ +- (AARCH64_FL_FOR_V8_4A | AARCH64_FL_V8_5A \ +- | AARCH64_FL_SB | AARCH64_FL_SSBS | AARCH64_FL_PREDRES) +-#define AARCH64_FL_FOR_V8_6A \ +- (AARCH64_FL_FOR_V8_5A | AARCH64_FL_V8_6A | AARCH64_FL_FPSIMD \ +- | AARCH64_FL_I8MM | AARCH64_FL_BF16) +-#define AARCH64_FL_FOR_V8_7A \ +- (AARCH64_FL_FOR_V8_6A | AARCH64_FL_V8_7A | AARCH64_FL_LS64) +-#define AARCH64_FL_FOR_V8_8A \ +- (AARCH64_FL_FOR_V8_7A | AARCH64_FL_V8_8A | AARCH64_FL_MOPS) +- +-#define AARCH64_FL_FOR_V8R \ +- (AARCH64_FL_FOR_V8_4A | AARCH64_FL_V8R) +-#define AARCH64_FL_FOR_V9A \ +- (AARCH64_FL_FOR_V8_5A | AARCH64_FL_SVE | AARCH64_FL_SVE2 | AARCH64_FL_V9A \ +- | AARCH64_FL_F16) +-#define AARCH64_FL_FOR_V9_1A \ +- (AARCH64_FL_FOR_V9A | AARCH64_FL_FOR_V8_6A | AARCH64_FL_V9_1A) +-#define AARCH64_FL_FOR_V9_2A \ +- (AARCH64_FL_FOR_V9_1A | AARCH64_FL_FOR_V8_7A | AARCH64_FL_V9_2A) +-#define AARCH64_FL_FOR_V9_3A \ +- (AARCH64_FL_FOR_V9_2A | AARCH64_FL_FOR_V8_8A | AARCH64_FL_V9_3A) ++#ifndef USED_FOR_TARGET ++ ++/* Define an enum of all features (architectures and extensions). */ ++enum class aarch64_feature : unsigned char { ++#define AARCH64_OPT_EXTENSION(A, IDENT, C, D, E, F) IDENT, ++#define AARCH64_ARCH(A, B, IDENT, D, E) IDENT, ++#include "aarch64-option-extensions.def" ++#include "aarch64-arches.def" ++}; ++ ++/* Define unique flags for each of the above. */ ++#define HANDLE(IDENT) \ ++ constexpr auto AARCH64_FL_##IDENT \ ++ = aarch64_feature_flags (1) << int (aarch64_feature::IDENT); ++#define AARCH64_OPT_EXTENSION(A, IDENT, C, D, E, F) HANDLE (IDENT) ++#define AARCH64_ARCH(A, B, IDENT, D, E) HANDLE (IDENT) ++#include "aarch64-option-extensions.def" ++#include "aarch64-arches.def" ++#undef HANDLE ++ ++#endif + + /* Macros to test ISA flags. */ + +diff --git a/gcc/config/aarch64/driver-aarch64.cc b/gcc/config/aarch64/driver-aarch64.cc +index 97690de62..ddfc9451f 100644 +--- a/gcc/config/aarch64/driver-aarch64.cc ++++ b/gcc/config/aarch64/driver-aarch64.cc +@@ -26,6 +26,7 @@ + #include "coretypes.h" + #include "tm.h" + #include "aarch64-protos.h" ++#include "aarch64-feature-deps.h" + + struct aarch64_arch_extension + { +@@ -34,9 +35,8 @@ struct aarch64_arch_extension + const char *feat_string; + }; + +-#define AARCH64_OPT_EXTENSION(EXT_NAME, FLAG_CANONICAL, FLAGS_ON, FLAGS_OFF, \ +- SYNTHETIC, FEATURE_STRING) \ +- { EXT_NAME, FLAG_CANONICAL, FEATURE_STRING }, ++#define AARCH64_OPT_EXTENSION(EXT_NAME, IDENT, C, D, E, FEATURE_STRING) \ ++ { EXT_NAME, AARCH64_FL_##IDENT, FEATURE_STRING }, + static struct aarch64_arch_extension aarch64_extensions = + { + #include "aarch64-option-extensions.def" +@@ -62,7 +62,7 @@ struct aarch64_core_data + #define DEFAULT_ARCH "8A" + + #define AARCH64_CORE(CORE_NAME, CORE_IDENT, SCHED, ARCH, FLAGS, COSTS, IMP, PART, VARIANT) \ +- { CORE_NAME, #ARCH, IMP, PART, VARIANT, AARCH64_FL_FOR_##ARCH | FLAGS }, ++ { CORE_NAME, #ARCH, IMP, PART, VARIANT, feature_deps::cpu_##CORE_IDENT }, + + static struct aarch64_core_data aarch64_cpu_data = + { +@@ -80,7 +80,7 @@ struct aarch64_arch_driver_info + + /* Skip the leading "V" in the architecture name. */ + #define AARCH64_ARCH(NAME, CORE, ARCH_IDENT, ARCH_REV, FLAGS) \ +- { #ARCH_IDENT + 1, NAME, FLAGS }, ++ { #ARCH_IDENT + 1, NAME, feature_deps::ARCH_IDENT ().enable }, + + static struct aarch64_arch_driver_info aarch64_arches = + { +-- +2.33.0 +
View file
_service:tar_scm:0118-LoongArch-Disable-TLS-type-symbols-from-generating-n.patch
Added
@@ -0,0 +1,65 @@ +From 8cab312a44efc4711fb7adad65f1314349295591 Mon Sep 17 00:00:00 2001 +From: Lulu Cheng <chenglulu@loongson.cn> +Date: Tue, 23 Jan 2024 11:28:09 +0800 +Subject: PATCH 118/188 LoongArch: Disable TLS type symbols from generating + non-zero offsets. + +TLS gd ld and ie type symbols will generate corresponding GOT entries, +so non-zero offsets cannot be generated. +The address of TLS le type symbol+addend is not implemented in binutils, +so non-zero offset is not generated here for the time being. + +gcc/ChangeLog: + + * config/loongarch/loongarch.cc (loongarch_symbolic_constant_p): + For symbols of type tls, non-zero Offset is not generated. +--- + gcc/config/loongarch/loongarch.cc | 18 +++++++++--------- + 1 file changed, 9 insertions(+), 9 deletions(-) + +diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc +index 5f22b9dd8..9cdd4ed15 100644 +--- a/gcc/config/loongarch/loongarch.cc ++++ b/gcc/config/loongarch/loongarch.cc +@@ -1921,11 +1921,7 @@ loongarch_symbolic_constant_p (rtx x, enum loongarch_symbol_type *symbol_type) + x = UNSPEC_ADDRESS (x); + } + else if (SYMBOL_REF_P (x) || LABEL_REF_P (x)) +- { +- *symbol_type = loongarch_classify_symbol (x); +- if (*symbol_type == SYMBOL_TLS) +- return true; +- } ++ *symbol_type = loongarch_classify_symbol (x); + else + return false; + +@@ -1936,17 +1932,21 @@ loongarch_symbolic_constant_p (rtx x, enum loongarch_symbol_type *symbol_type) + relocations. */ + switch (*symbol_type) + { +- case SYMBOL_TLS_IE: +- case SYMBOL_TLS_LE: +- case SYMBOL_TLSGD: +- case SYMBOL_TLSLDM: + case SYMBOL_PCREL: + case SYMBOL_PCREL64: + /* GAS rejects offsets outside the range -2^31, 2^31-1. */ + return sext_hwi (INTVAL (offset), 32) == INTVAL (offset); + ++ /* The following symbol types do not allow non-zero offsets. */ + case SYMBOL_GOT_DISP: ++ case SYMBOL_TLS_IE: ++ case SYMBOL_TLSGD: ++ case SYMBOL_TLSLDM: + case SYMBOL_TLS: ++ /* From an implementation perspective, tls_le symbols are allowed to ++ have non-zero offsets, but currently binutils has not added support, ++ so the generation of non-zero offsets is prohibited here. */ ++ case SYMBOL_TLS_LE: + return false; + } + gcc_unreachable (); +-- +2.43.0 +
View file
_service:tar_scm:0119-Backport-SME-aarch64-Simplify-generation-of-.arch-st.patch
Added
@@ -0,0 +1,467 @@ +From e7ebc54e809e8647ff054a02fbaf946b41414004 Mon Sep 17 00:00:00 2001 +From: Richard Sandiford <richard.sandiford@arm.com> +Date: Thu, 29 Sep 2022 11:32:55 +0100 +Subject: PATCH 020/157 BackportSME aarch64: Simplify generation of .arch + strings + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=4ebf56f283ae5a98ae4c43079b7e8459945ef18d + +aarch64-common.cc has two arrays, one maintaining the original +definition order and one sorted by population count. Sorting +by population count was a way of ensuring topological ordering, +taking advantage of the fact that the entries are partially +ordered by the subset relation. However, the sorting is not +needed now that the .def file is forced to have topological +order from the outset. + +Other changes are: + +(1) The population count used: + + uint64_t total_flags_a = opt_a->flag_canonical & opt_a->flags_on; + uint64_t total_flags_b = opt_b->flag_canonical & opt_b->flags_on; + int popcnt_a = popcount_hwi ((HOST_WIDE_INT)total_flags_a); + int popcnt_b = popcount_hwi ((HOST_WIDE_INT)total_flags_b); + + where I think the & was supposed to be |. This meant that the + counts would always be 1 in practice, since flag_canonical is + a single bit. This led us to printing +nofp+nosimd even though + GCC "knows" (and GAS agrees) that +nofp disables simd. + +(2) The .arch output code converts +aes+sha2 to +crypto. I think + the main reason for doing this is to support assemblers that + predate the individual per-feature crypto flags. It therefore + seems more natural to treat it as a special case, rather than + as an instance of a general pattern. Hopefully we won't do + something similar in future! + + (There is already special handling of CRC, for different reasons.) + +(3) Previously, if the /proc/cpuinfo code saw a feature like sve, + it would assume the presence of all the features that sve + depends on. It would be possible to keep that behaviour + if necessary, but it was simpler to assume the presence of + fp16 (say) only when fphp is present. There's an argument + that that's more conservatively correct too. + +gcc/ + * common/config/aarch64/aarch64-common.cc + (TARGET_OPTION_INIT_STRUCT): Delete. + (aarch64_option_extension): Remove is_synthetic_flag. + (all_extensions): Update accordingly. + (all_extensions_by_on, opt_ext, opt_ext_cmp): Delete. + (aarch64_option_init_struct, aarch64_contains_opt): Delete. + (aarch64_get_extension_string_for_isa_flags): Rewrite to use + all_extensions instead of all_extensions_on. + +gcc/testsuite/ + * gcc.target/aarch64/cpunative/info_8: Add all dependencies of sve. + * gcc.target/aarch64/cpunative/info_9: Likewise svesm4. + * gcc.target/aarch64/cpunative/info_15: Likewise. + * gcc.target/aarch64/cpunative/info_16: Likewise sve2. + * gcc.target/aarch64/cpunative/info_17: Likewise. + * gcc.target/aarch64/cpunative/native_cpu_2.c: Expect just +nofp + rather than +nofp+nosimd. + * gcc.target/aarch64/cpunative/native_cpu_10.c: Likewise. + * gcc.target/aarch64/target_attr_15.c: Likewise. +--- + gcc/common/config/aarch64/aarch64-common.cc | 244 ++++-------------- + .../gcc.target/aarch64/cpunative/info_15 | 2 +- + .../gcc.target/aarch64/cpunative/info_16 | 2 +- + .../gcc.target/aarch64/cpunative/info_17 | 2 +- + .../gcc.target/aarch64/cpunative/info_8 | 2 +- + .../gcc.target/aarch64/cpunative/info_9 | 2 +- + .../aarch64/cpunative/native_cpu_10.c | 2 +- + .../aarch64/cpunative/native_cpu_2.c | 2 +- + .../gcc.target/aarch64/target_attr_15.c | 2 +- + 9 files changed, 55 insertions(+), 205 deletions(-) + +diff --git a/gcc/common/config/aarch64/aarch64-common.cc b/gcc/common/config/aarch64/aarch64-common.cc +index 74729bb30..057dc094d 100644 +--- a/gcc/common/config/aarch64/aarch64-common.cc ++++ b/gcc/common/config/aarch64/aarch64-common.cc +@@ -42,8 +42,6 @@ + + #undef TARGET_OPTION_OPTIMIZATION_TABLE + #define TARGET_OPTION_OPTIMIZATION_TABLE aarch_option_optimization_table +-#undef TARGET_OPTION_INIT_STRUCT +-#define TARGET_OPTION_INIT_STRUCT aarch64_option_init_struct + + #define INVALID_IMP ((unsigned) -1) + +@@ -209,7 +207,6 @@ struct aarch64_option_extension + const uint64_t flag_canonical; + const uint64_t flags_on; + const uint64_t flags_off; +- const bool is_synthetic; + }; + + /* ISA extensions in AArch64. */ +@@ -219,24 +216,9 @@ static const struct aarch64_option_extension all_extensions = + {NAME, AARCH64_FL_##IDENT, \ + feature_deps::IDENT ().explicit_on & ~AARCH64_FL_##IDENT, \ + feature_deps::get_flags_off (feature_deps::root_off_##IDENT) \ +- & ~AARCH64_FL_##IDENT, \ +- AARCH64_FL_##IDENT == AARCH64_FL_CRYPTO}, ++ & ~AARCH64_FL_##IDENT}, + #include "config/aarch64/aarch64-option-extensions.def" +- {NULL, 0, 0, 0, false} +-}; +- +-/* A copy of the ISA extensions list for AArch64 sorted by the popcount of +- bits and extension turned on. Cached for efficiency. */ +-static struct aarch64_option_extension all_extensions_by_on = +-{ +-#define AARCH64_OPT_EXTENSION(NAME, IDENT, C, D, E, F) \ +- {NAME, AARCH64_FL_##IDENT, \ +- feature_deps::IDENT ().explicit_on & ~AARCH64_FL_##IDENT, \ +- feature_deps::get_flags_off (feature_deps::root_off_##IDENT) \ +- & ~AARCH64_FL_##IDENT, \ +- AARCH64_FL_##IDENT == AARCH64_FL_CRYPTO}, +-#include "config/aarch64/aarch64-option-extensions.def" +- {NULL, 0, 0, 0, false} ++ {NULL, 0, 0, 0} + }; + + struct processor_name_to_arch +@@ -353,79 +335,6 @@ aarch64_get_all_extension_candidates (auto_vec<const char *> *candidates) + candidates->safe_push (opt->name); + } + +-/* Comparer to sort aarch64's feature extensions by population count. Largest +- first. */ +- +-typedef const struct aarch64_option_extension opt_ext; +- +-int opt_ext_cmp (const void* a, const void* b) +-{ +- opt_ext *opt_a = (opt_ext *)a; +- opt_ext *opt_b = (opt_ext *)b; +- +- /* We consider the total set of bits an options turns on to be the union of +- the singleton set containing the option itself and the set of options it +- turns on as a dependency. As an example +dotprod turns on FL_DOTPROD and +- FL_SIMD. As such the set of bits represented by this option is +- {FL_DOTPROD, FL_SIMD}. */ +- uint64_t total_flags_a = opt_a->flag_canonical & opt_a->flags_on; +- uint64_t total_flags_b = opt_b->flag_canonical & opt_b->flags_on; +- int popcnt_a = popcount_hwi ((HOST_WIDE_INT)total_flags_a); +- int popcnt_b = popcount_hwi ((HOST_WIDE_INT)total_flags_b); +- int order = popcnt_b - popcnt_a; +- +- /* If they have the same amount of bits set, give it a more +- deterministic ordering by using the value of the bits themselves. */ +- if (order != 0) +- return order; +- +- if (total_flags_a != total_flags_b) +- return total_flags_a < total_flags_b ? 1 : -1; +- +- return 0; +-} +- +-/* Implement TARGET_OPTION_INIT_STRUCT. */ +- +-static void +-aarch64_option_init_struct (struct gcc_options *opts ATTRIBUTE_UNUSED) +-{ +- /* Sort the extensions based on how many bits they set, order the larger +- counts first. We sort the list because this makes processing the +- feature bits O(n) instead of O(n^2). While n is small, the function +- to calculate the feature strings is called on every options push, +- pop and attribute change (arm_neon headers, lto etc all cause this to +- happen quite frequently). It is a trade-off between time and space and +- so time won. */ +- int n_extensions +- = sizeof (all_extensions) / sizeof (struct aarch64_option_extension); +- qsort (&all_extensions_by_on, n_extensions, +- sizeof (struct aarch64_option_extension), opt_ext_cmp); +-} +- +-/* Checks to see if enough bits from the option OPT are enabled in +- ISA_FLAG_BITS to be able to replace the individual options with the +- canonicalized version of the option. This is done based on two rules: +- +- 1) Synthetic groups, such as +crypto we only care about the bits that are +- turned on. e.g. +aes+sha2 can be replaced with +crypto. +- +- 2) Options that themselves have a bit, such as +rdma, in this case, all the +- feature bits they turn on must be available and the bit for the option +- itself must be. In this case it's effectively a reduction rather than a +- grouping. e.g. +fp+simd is not enough to turn on +rdma, for that you would +- need +rdma+fp+simd which is reduced down to +rdma. +-*/ +- +-static bool +-aarch64_contains_opt (uint64_t isa_flag_bits, opt_ext *opt) +-{ +- uint64_t flags_check +- = opt->is_synthetic ? opt->flags_on : opt->flag_canonical; +- +- return (isa_flag_bits & flags_check) == flags_check; +-} +- + /* Return a string representation of ISA_FLAGS. DEFAULT_ARCH_FLAGS + gives the default set of flags which are implied by whatever -march + we'd put out. Our job is to figure out the minimal set of "+" and +@@ -436,118 +345,59 @@ std::string + aarch64_get_extension_string_for_isa_flags (uint64_t isa_flags, + uint64_t default_arch_flags) + { +- const struct aarch64_option_extension *opt = NULL; + std::string outstr = ""; + +- uint64_t isa_flag_bits = isa_flags; +- +- /* Pass one: Minimize the search space by reducing the set of options +- to the smallest set that still turns on the same features as before in +- conjunction with the bits that are turned on by default for the selected +- architecture. */ +- for (opt = all_extensions_by_on; opt->name != NULL; opt++) ++ aarch64_feature_flags current_flags = default_arch_flags; ++ ++ /* As a special case, do not assume that the assembler will enable CRC ++ even if it is the default for the architecture. This is required ++ because some CPUs had an incorrect specification in older assemblers: ++ even though CRC should be the default for these cases the -mcpu ++ values would not turn it on. ++ ++ However, assemblers with Armv8-R AArch64 support should not have this ++ issue, so we don't need this fix when targeting Armv8-R. */ ++ auto explicit_flags = (!(current_flags & AARCH64_FL_V8R) ++ ? AARCH64_FL_CRC : 0); ++ ++ /* Add the features in isa_flags & ~current_flags using the smallest ++ possible number of extensions. We can do this by iterating over the ++ array in reverse order, since the array is sorted topologically. ++ But in order to make the output more readable, it seems better ++ to add the strings in definition order. */ ++ aarch64_feature_flags added = 0; ++ for (unsigned int i = ARRAY_SIZE (all_extensions); i-- > 0; ) + { +- /* If the bit is on by default, then all the options it turns on are also +- on by default due to the transitive dependencies. +- +- If the option is enabled explicitly in the set then we need to emit +- an option for it. Since this list is sorted by extensions setting the +- largest number of featers first, we can be sure that nothing else will +- ever need to set the bits we already set. Consider the following +- situation: +- +- Feat1 = A + B + C +- Feat2 = A + B +- Feat3 = A + D +- Feat4 = B + C +- Feat5 = C +- +- The following results are expected: +- +- A + C = A + Feat5 +- B + C = Feat4 +- Feat4 + A = Feat1 +- Feat2 + Feat5 = Feat1 +- Feat1 + C = Feat1 +- Feat3 + Feat4 = Feat1 + D +- +- This search assumes that all invidual feature bits are use visible, +- in other words the user must be able to do +A, +B, +C and +D. */ +- if (aarch64_contains_opt (isa_flag_bits | default_arch_flags, opt)) +- { +- /* We remove all the dependent bits, to prevent them from being turned +- on twice. This only works because we assume that all there are +- individual options to set all bits standalone. */ +- +- /* PR target/94396. +- +- For flags which would already imply a bit that's on by default (e.g +- fp16fml which implies +fp,+fp16) we must emit the flags that are not +- on by default. i.e. in Armv8.4-a +fp16fml is default if +fp16. So +- if a user passes armv8.4-a+fp16 (or +fp16fml) then we need to emit +- +fp16. But if +fp16fml is used in an architecture where it is +- completely optional we only have to emit the canonical flag. */ +- uint64_t toggle_bits = opt->flags_on & default_arch_flags; +- /* Now check to see if the canonical flag is on by default. If it +- is not then enabling it will enable all bits in flags_on. */ +- if ((opt->flag_canonical & default_arch_flags) == 0) +- toggle_bits = opt->flags_on; +- +- isa_flag_bits &= ~toggle_bits; +- isa_flag_bits |= opt->flag_canonical; +- } +- } ++ auto &opt = all_extensionsi; + +- /* By toggling bits on and off, we may have set bits on that are already +- enabled by default. So we mask the default set out so we don't emit an +- option for them. Instead of checking for this each time during Pass One +- we just mask all default bits away at the end. */ +- isa_flag_bits &= ~default_arch_flags; +- +- /* We now have the smallest set of features we need to process. A subsequent +- linear scan of the bits in isa_flag_bits will allow us to print the ext +- names. However as a special case if CRC was enabled before, always print +- it. This is required because some CPUs have an incorrect specification +- in older assemblers. Even though CRC should be the default for these +- cases the -mcpu values won't turn it on. +- +- Note that assemblers with Armv8-R AArch64 support should not have this +- issue, so we don't need this fix when targeting Armv8-R. */ +- if ((isa_flags & AARCH64_ISA_CRC) && !AARCH64_ISA_V8R) +- isa_flag_bits |= AARCH64_ISA_CRC; +- +- /* Pass Two: +- Print the option names that we're sure we must turn on. These are only +- optional extension names. Mandatory ones have already been removed and +- ones we explicitly want off have been too. */ +- for (opt = all_extensions_by_on; opt->name != NULL; opt++) +- { +- if (isa_flag_bits & opt->flag_canonical) +- { +- outstr += "+"; +- outstr += opt->name; +- } +- } ++ /* As a special case, emit +crypto rather than +aes+sha2, ++ in order to support assemblers that predate the separate ++ per-feature crypto flags. */ ++ auto flags = opt.flag_canonical; ++ if (flags == AARCH64_FL_CRYPTO) ++ flags = AARCH64_FL_AES | AARCH64_FL_SHA2; + +- /* Pass Three: +- Print out a +no for any mandatory extension that we are +- turning off. By this point aarch64_parse_extension would have ensured +- that any optional extensions are turned off. The only things left are +- things that can't be turned off usually, e.g. something that is on by +- default because it's mandatory and we want it off. For turning off bits +- we don't guarantee the smallest set of flags, but instead just emit all +- options the user has specified. +- +- The assembler requires all +<opts> to be printed before +no<opts>. */ +- for (opt = all_extensions_by_on; opt->name != NULL; opt++) +- { +- if ((~isa_flags) & opt->flag_canonical +- && !((~default_arch_flags) & opt->flag_canonical)) ++ if ((flags & isa_flags & (explicit_flags | ~current_flags)) == flags) + { +- outstr += "+no"; +- outstr += opt->name; ++ current_flags |= opt.flag_canonical | opt.flags_on; ++ added |= opt.flag_canonical; + } + } ++ for (auto &opt : all_extensions) ++ if (added & opt.flag_canonical) ++ { ++ outstr += "+"; ++ outstr += opt.name; ++ } ++ ++ /* Remove the features in current_flags & ~isa_flags. */ ++ for (auto &opt : all_extensions) ++ if (opt.flag_canonical & current_flags & ~isa_flags) ++ { ++ current_flags &= ~(opt.flag_canonical | opt.flags_off); ++ outstr += "+no"; ++ outstr += opt.name; ++ } + + return outstr; + } +diff --git a/gcc/testsuite/gcc.target/aarch64/cpunative/info_15 b/gcc/testsuite/gcc.target/aarch64/cpunative/info_15 +index bc6453945..6b425ea20 100644 +--- a/gcc/testsuite/gcc.target/aarch64/cpunative/info_15 ++++ b/gcc/testsuite/gcc.target/aarch64/cpunative/info_15 +@@ -1,6 +1,6 @@ + processor : 0 + BogoMIPS : 100.00 +-Features : Lorem ipsum dolor sit ametd rebum expetendis per at Dolor lucilius referrentur ei mei virtute eruditi eum ne Iisque verter svesm4 asimd fp ++Features : Lorem ipsum dolor sit ametd rebum expetendis per at Dolor lucilius referrentur ei mei virtute eruditi eum ne Iisque verter svesm4 asimd fp sve sve2 fphp asimdhp sm3 sm4 + CPU implementer : 0x41 + CPU architecture: 8 + CPU variant : 0x0 +diff --git a/gcc/testsuite/gcc.target/aarch64/cpunative/info_16 b/gcc/testsuite/gcc.target/aarch64/cpunative/info_16 +index 2c04ff19c..26f01c496 100644 +--- a/gcc/testsuite/gcc.target/aarch64/cpunative/info_16 ++++ b/gcc/testsuite/gcc.target/aarch64/cpunative/info_16 +@@ -1,6 +1,6 @@ + processor : 0 + BogoMIPS : 100.00 +-Features : fp asimd evtstrm aes pmull sha1 sha2 crc32 asimddp sve sve2 ++Features : fp asimd evtstrm aes pmull sha1 sha2 crc32 asimddp sve sve2 fphp asimdhp + CPU implementer : 0xfe + CPU architecture: 8 + CPU variant : 0x0 +diff --git a/gcc/testsuite/gcc.target/aarch64/cpunative/info_17 b/gcc/testsuite/gcc.target/aarch64/cpunative/info_17 +index 2c04ff19c..26f01c496 100644 +--- a/gcc/testsuite/gcc.target/aarch64/cpunative/info_17 ++++ b/gcc/testsuite/gcc.target/aarch64/cpunative/info_17 +@@ -1,6 +1,6 @@ + processor : 0 + BogoMIPS : 100.00 +-Features : fp asimd evtstrm aes pmull sha1 sha2 crc32 asimddp sve sve2 ++Features : fp asimd evtstrm aes pmull sha1 sha2 crc32 asimddp sve sve2 fphp asimdhp + CPU implementer : 0xfe + CPU architecture: 8 + CPU variant : 0x0 +diff --git a/gcc/testsuite/gcc.target/aarch64/cpunative/info_8 b/gcc/testsuite/gcc.target/aarch64/cpunative/info_8 +index d6d9d03a2..76da16c57 100644 +--- a/gcc/testsuite/gcc.target/aarch64/cpunative/info_8 ++++ b/gcc/testsuite/gcc.target/aarch64/cpunative/info_8 +@@ -1,6 +1,6 @@ + processor : 0 + BogoMIPS : 100.00 +-Features : asimd sve fp ++Features : asimd sve fp fphp asimdhp + CPU implementer : 0x41 + CPU architecture: 8 + CPU variant : 0x0 +diff --git a/gcc/testsuite/gcc.target/aarch64/cpunative/info_9 b/gcc/testsuite/gcc.target/aarch64/cpunative/info_9 +index c9aa4a9a0..14703dd1d 100644 +--- a/gcc/testsuite/gcc.target/aarch64/cpunative/info_9 ++++ b/gcc/testsuite/gcc.target/aarch64/cpunative/info_9 +@@ -1,6 +1,6 @@ + processor : 0 + BogoMIPS : 100.00 +-Features : asimd fp svesm4 ++Features : asimd fp svesm4 sve sve2 fphp asimdhp sm3 sm4 + CPU implementer : 0x41 + CPU architecture: 8 + CPU variant : 0x0 +diff --git a/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_10.c b/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_10.c +index 6a753965c..ddb06b822 100644 +--- a/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_10.c ++++ b/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_10.c +@@ -7,6 +7,6 @@ int main() + return 0; + } + +-/* { dg-final { scan-assembler {\.arch armv8-a\+nofp\+nosimd} } } */ ++/* { dg-final { scan-assembler {\.arch armv8-a\+nofp} } } */ + + /* Test one with no entry in feature list. */ +diff --git a/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_2.c b/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_2.c +index aad71f434..edbdb5626 100644 +--- a/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_2.c ++++ b/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_2.c +@@ -7,6 +7,6 @@ int main() + return 0; + } + +-/* { dg-final { scan-assembler {\.arch armv8-a\+nofp\+nosimd} } } */ ++/* { dg-final { scan-assembler {\.arch armv8-a\+nofp} } } */ + + /* Test one where asimd is provided byt no fp. */ +diff --git a/gcc/testsuite/gcc.target/aarch64/target_attr_15.c b/gcc/testsuite/gcc.target/aarch64/target_attr_15.c +index 108b372e4..069a00108 100644 +--- a/gcc/testsuite/gcc.target/aarch64/target_attr_15.c ++++ b/gcc/testsuite/gcc.target/aarch64/target_attr_15.c +@@ -10,4 +10,4 @@ foo (int a) + return a + 1; + } + +-/* { dg-final { scan-assembler-times "\\.arch armv8-a\\+nofp\\+nosimd\n" 1 } } */ ++/* { dg-final { scan-assembler-times "\\.arch armv8-a\\+nofp\n" 1 } } */ +-- +2.33.0 +
View file
_service:tar_scm:0119-LoongArch-Remove-vec_concatz-mode-pattern.patch
Added
@@ -0,0 +1,75 @@ +From e19c5ba24839d7446f1874b0b33bd61e27e36905 Mon Sep 17 00:00:00 2001 +From: Jiahao Xu <xujiahao@loongson.cn> +Date: Wed, 24 Jan 2024 17:19:13 +0800 +Subject: PATCH 119/188 LoongArch: Remove vec_concatz<mode> pattern. + +It is incorrect to use vld/vori to implement the vec_concatz<mode> because when the LSX +instruction is used to update the value of the vector register, the upper 128 bits of +the vector register will not be zeroed. + +gcc/ChangeLog: + + * config/loongarch/lasx.md (@vec_concatz<mode>): Remove this define_insn pattern. + * config/loongarch/loongarch.cc (loongarch_expand_vector_group_init): Use vec_concat<mode>. +--- + gcc/config/loongarch/lasx.md | 15 --------------- + gcc/config/loongarch/loongarch.cc | 17 ++++++----------- + 2 files changed, 6 insertions(+), 26 deletions(-) + +diff --git a/gcc/config/loongarch/lasx.md b/gcc/config/loongarch/lasx.md +index fdfd65e4a..a5128c30c 100644 +--- a/gcc/config/loongarch/lasx.md ++++ b/gcc/config/loongarch/lasx.md +@@ -582,21 +582,6 @@ + (set_attr "type" "simd_insert") + (set_attr "mode" "<MODE>")) + +-(define_insn "@vec_concatz<mode>" +- (set (match_operand:LASX 0 "register_operand" "=f") +- (vec_concat:LASX +- (match_operand:<VHMODE256_ALL> 1 "nonimmediate_operand") +- (match_operand:<VHMODE256_ALL> 2 "const_0_operand"))) +- "ISA_HAS_LASX" +-{ +- if (MEM_P (operands1)) +- return "vld\t%w0,%1"; +- else +- return "vori.b\t%w0,%w1,0"; +-} +- (set_attr "type" "simd_splat") +- (set_attr "mode" "<MODE>")) +- + (define_insn "vec_concat<mode>" + (set (match_operand:LASX 0 "register_operand" "=f") + (vec_concat:LASX +diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc +index 9cdd4ed15..9bd931549 100644 +--- a/gcc/config/loongarch/loongarch.cc ++++ b/gcc/config/loongarch/loongarch.cc +@@ -9912,17 +9912,12 @@ loongarch_expand_vector_group_init (rtx target, rtx vals) + gcc_unreachable (); + } + +- if (high == CONST0_RTX (half_mode)) +- emit_insn (gen_vec_concatz (vmode, target, low, high)); +- else +- { +- if (!register_operand (low, half_mode)) +- low = force_reg (half_mode, low); +- if (!register_operand (high, half_mode)) +- high = force_reg (half_mode, high); +- emit_insn (gen_rtx_SET (target, +- gen_rtx_VEC_CONCAT (vmode, low, high))); +- } ++ if (!register_operand (low, half_mode)) ++ low = force_reg (half_mode, low); ++ if (!register_operand (high, half_mode)) ++ high = force_reg (half_mode, high); ++ emit_insn (gen_rtx_SET (target, ++ gen_rtx_VEC_CONCAT (vmode, low, high))); + } + + /* Expand initialization of a vector which has all same elements. */ +-- +2.43.0 +
View file
_service:tar_scm:0120-Backport-SME-aarch64-Avoid-std-string-in-static-data.patch
Added
@@ -0,0 +1,43 @@ +From 7096be1673a10da5218a8620fb40b4b26e61c1d4 Mon Sep 17 00:00:00 2001 +From: Richard Sandiford <richard.sandiford@arm.com> +Date: Thu, 29 Sep 2022 11:32:55 +0100 +Subject: PATCH 021/157 BackportSME aarch64: Avoid std::string in static + data + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=13af9e9fda391f4f0566ad8f0b4d0448a7e984d0 + +Just a minor patch to avoid having to construct std::strings +in static data. + +gcc/ + * common/config/aarch64/aarch64-common.cc (processor_name_to_arch) + (arch_to_arch_name): Use const char * instead of std::string. +--- + gcc/common/config/aarch64/aarch64-common.cc | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/gcc/common/config/aarch64/aarch64-common.cc b/gcc/common/config/aarch64/aarch64-common.cc +index 057dc094d..2bdf51b8b 100644 +--- a/gcc/common/config/aarch64/aarch64-common.cc ++++ b/gcc/common/config/aarch64/aarch64-common.cc +@@ -223,7 +223,7 @@ static const struct aarch64_option_extension all_extensions = + + struct processor_name_to_arch + { +- const std::string processor_name; ++ const char *const processor_name; + const enum aarch64_arch arch; + const uint64_t flags; + }; +@@ -231,7 +231,7 @@ struct processor_name_to_arch + struct arch_to_arch_name + { + const enum aarch64_arch arch; +- const std::string arch_name; ++ const char *const arch_name; + const uint64_t flags; + }; + +-- +2.33.0 +
View file
_service:tar_scm:0120-LoongArch-Optimize-implementation-of-single-precisio.patch
Added
@@ -0,0 +1,107 @@ +From cb9180ef1fb7e7b97a60adc3d3908b9684771cd8 Mon Sep 17 00:00:00 2001 +From: Li Wei <liwei@loongson.cn> +Date: Wed, 24 Jan 2024 17:44:17 +0800 +Subject: PATCH 120/188 LoongArch: Optimize implementation of + single-precision floating-point approximate division. + +We found that in the spec17 521.wrf program, some loop invariant code generated +from single-precision floating-point approximate division calculation failed to +propose a loop. This is because the pseudo-register that stores the +intermediate temporary calculation results is rewritten in the implementation +of single-precision floating-point approximate division, failing to propose +invariants in the loop2_invariant pass. To this end, the intermediate temporary +calculation results are stored in new pseudo-registers without destroying the +read-write dependency, so that they could be recognized as loop invariants in +the loop2_invariant pass. +After optimization, the number of instructions of 521.wrf is reduced by 0.18% +compared with before optimization (1716612948501 -> 1713471771364). + +gcc/ChangeLog: + + * config/loongarch/loongarch.cc (loongarch_emit_swdivsf): Adjust. + +gcc/testsuite/ChangeLog: + + * gcc.target/loongarch/invariant-recip.c: New test. +--- + gcc/config/loongarch/loongarch.cc | 19 +++++++---- + .../gcc.target/loongarch/invariant-recip.c | 33 +++++++++++++++++++ + 2 files changed, 46 insertions(+), 6 deletions(-) + create mode 100644 gcc/testsuite/gcc.target/loongarch/invariant-recip.c + +diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc +index 9bd931549..5877b0acf 100644 +--- a/gcc/config/loongarch/loongarch.cc ++++ b/gcc/config/loongarch/loongarch.cc +@@ -10842,16 +10842,23 @@ void loongarch_emit_swdivsf (rtx res, rtx a, rtx b, machine_mode mode) + /* x0 = 1./b estimate. */ + emit_insn (gen_rtx_SET (x0, gen_rtx_UNSPEC (mode, gen_rtvec (1, b), + unspec))); +- /* 2.0 - b * x0 */ ++ /* e0 = 2.0 - b * x0. */ + emit_insn (gen_rtx_SET (e0, gen_rtx_FMA (mode, + gen_rtx_NEG (mode, b), x0, mtwo))); + +- /* x0 = a * x0 */ + if (a != CONST1_RTX (mode)) +- emit_insn (gen_rtx_SET (x0, gen_rtx_MULT (mode, a, x0))); +- +- /* res = e0 * x0 */ +- emit_insn (gen_rtx_SET (res, gen_rtx_MULT (mode, e0, x0))); ++ { ++ rtx e1 = gen_reg_rtx (mode); ++ /* e1 = a * x0. */ ++ emit_insn (gen_rtx_SET (e1, gen_rtx_MULT (mode, a, x0))); ++ /* res = e0 * e1. */ ++ emit_insn (gen_rtx_SET (res, gen_rtx_MULT (mode, e0, e1))); ++ } ++ else ++ { ++ /* res = e0 * x0. */ ++ emit_insn (gen_rtx_SET (res, gen_rtx_MULT (mode, e0, x0))); ++ } + } + + static bool +diff --git a/gcc/testsuite/gcc.target/loongarch/invariant-recip.c b/gcc/testsuite/gcc.target/loongarch/invariant-recip.c +new file mode 100644 +index 000000000..2f64f6ed5 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/invariant-recip.c +@@ -0,0 +1,33 @@ ++/* { dg-do compile } */ ++/* { dg-options "-Ofast -march=loongarch64 -mabi=lp64d -mrecip -mfrecipe -fdump-rtl-loop2_invariant " } */ ++/* { dg-final { scan-rtl-dump "Decided to move dependent invariant" "loop2_invariant" } } */ ++ ++void ++nislfv_rain_plm (int im, int km, float dzlimkm, float rqlimkm, ++ float dt) ++{ ++ int i, k; ++ float con1, decfl; ++ float dzkm, qnkm, wikm + 1; ++ ++ for (i = 0; i < im; i++) ++ { ++ for (k = 0; k < km; k++) ++ { ++ dzk = dzlik; ++ } ++ con1 = 0.05; ++ for (k = km - 1; k >= 0; k--) ++ { ++ decfl = (wik + 1 - wik) * dt / dzk; ++ if (decfl > con1) ++ { ++ wik = wik + 1 - con1 * dzk / dt; ++ } ++ } ++ for (k = 0; k < km; k++) ++ { ++ rqlik = qnk; ++ } ++ } ++} +-- +2.43.0 +
View file
_service:tar_scm:0121-Backport-SME-aarch64-Tweak-constness-of-option-relat.patch
Added
@@ -0,0 +1,195 @@ +From 99c5eb58e898417632b6d9a7b2b3d288b50e9b65 Mon Sep 17 00:00:00 2001 +From: Richard Sandiford <richard.sandiford@arm.com> +Date: Thu, 29 Sep 2022 11:32:55 +0100 +Subject: PATCH 022/157 BackportSME aarch64: Tweak constness of + option-related data + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=60dee638c8a7ae59c033868de7e7638c88b38ed2 + +Some of the option structures have all-const member variables. +That doesn't seem necessary: we can just use const on the objects +that are supposed to be read-only. + +Also, with the new, more C++-heavy option handling, it seems +better to use constexpr for the static data, to make sure that +we're not adding unexpected overhead. + +gcc/ + * common/config/aarch64/aarch64-common.cc (aarch64_option_extension) + (processor_name_to_arch, arch_to_arch_name): Remove const from + member variables. + (all_extensions, all_cores, all_architectures): Make a constexpr. + * config/aarch64/aarch64.cc (processor): Remove const from + member variables. + (all_architectures): Make a constexpr. + * config/aarch64/driver-aarch64.cc (aarch64_core_data) + (aarch64_arch_driver_info): Remove const from member variables. + (aarch64_cpu_data, aarch64_arches): Make a constexpr. + (get_arch_from_id): Return a pointer to const. + (host_detect_local_cpu): Update accordingly. +--- + gcc/common/config/aarch64/aarch64-common.cc | 26 ++++++++++----------- + gcc/config/aarch64/aarch64.cc | 14 +++++------ + gcc/config/aarch64/driver-aarch64.cc | 15 ++++++------ + 3 files changed, 27 insertions(+), 28 deletions(-) + +diff --git a/gcc/common/config/aarch64/aarch64-common.cc b/gcc/common/config/aarch64/aarch64-common.cc +index 2bdf51b8b..ac3486d71 100644 +--- a/gcc/common/config/aarch64/aarch64-common.cc ++++ b/gcc/common/config/aarch64/aarch64-common.cc +@@ -203,14 +203,14 @@ aarch64_handle_option (struct gcc_options *opts, + /* An ISA extension in the co-processor and main instruction set space. */ + struct aarch64_option_extension + { +- const char *const name; +- const uint64_t flag_canonical; +- const uint64_t flags_on; +- const uint64_t flags_off; ++ const char *name; ++ uint64_t flag_canonical; ++ uint64_t flags_on; ++ uint64_t flags_off; + }; + + /* ISA extensions in AArch64. */ +-static const struct aarch64_option_extension all_extensions = ++static constexpr aarch64_option_extension all_extensions = + { + #define AARCH64_OPT_EXTENSION(NAME, IDENT, C, D, E, F) \ + {NAME, AARCH64_FL_##IDENT, \ +@@ -223,21 +223,21 @@ static const struct aarch64_option_extension all_extensions = + + struct processor_name_to_arch + { +- const char *const processor_name; +- const enum aarch64_arch arch; +- const uint64_t flags; ++ const char *processor_name; ++ aarch64_arch arch; ++ uint64_t flags; + }; + + struct arch_to_arch_name + { +- const enum aarch64_arch arch; +- const char *const arch_name; +- const uint64_t flags; ++ aarch64_arch arch; ++ const char *arch_name; ++ uint64_t flags; + }; + + /* Map processor names to the architecture revision they implement and + the default set of architectural feature flags they support. */ +-static const struct processor_name_to_arch all_cores = ++static constexpr processor_name_to_arch all_cores = + { + #define AARCH64_CORE(NAME, CORE_IDENT, C, ARCH_IDENT, E, F, G, H, I) \ + {NAME, AARCH64_ARCH_##ARCH_IDENT, feature_deps::cpu_##CORE_IDENT}, +@@ -247,7 +247,7 @@ static const struct processor_name_to_arch all_cores = + }; + + /* Map architecture revisions to their string representation. */ +-static const struct arch_to_arch_name all_architectures = ++static constexpr arch_to_arch_name all_architectures = + { + #define AARCH64_ARCH(NAME, B, ARCH_IDENT, D, E) \ + {AARCH64_ARCH_##ARCH_IDENT, NAME, feature_deps::ARCH_IDENT ().enable}, +diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc +index 1363873b1..71db7ace1 100644 +--- a/gcc/config/aarch64/aarch64.cc ++++ b/gcc/config/aarch64/aarch64.cc +@@ -2925,16 +2925,16 @@ aarch64_tuning_override_functions = + /* A processor implementing AArch64. */ + struct processor + { +- const char *const name; +- enum aarch64_processor ident; +- enum aarch64_processor sched_core; +- enum aarch64_arch arch; +- const uint64_t flags; +- const struct tune_params *const tune; ++ const char *name; ++ aarch64_processor ident; ++ aarch64_processor sched_core; ++ aarch64_arch arch; ++ uint64_t flags; ++ const tune_params *tune; + }; + + /* Architectures implementing AArch64. */ +-static const struct processor all_architectures = ++static constexpr processor all_architectures = + { + #define AARCH64_ARCH(NAME, CORE, ARCH_IDENT, D, E) \ + {NAME, CORE, CORE, AARCH64_ARCH_##ARCH_IDENT, \ +diff --git a/gcc/config/aarch64/driver-aarch64.cc b/gcc/config/aarch64/driver-aarch64.cc +index ddfc9451f..ee9cb65a5 100644 +--- a/gcc/config/aarch64/driver-aarch64.cc ++++ b/gcc/config/aarch64/driver-aarch64.cc +@@ -50,7 +50,7 @@ struct aarch64_core_data + unsigned char implementer_id; /* Exactly 8 bits */ + unsigned int part_no; /* 12 bits + 12 bits */ + unsigned variant; +- const uint64_t flags; ++ uint64_t flags; + }; + + #define AARCH64_BIG_LITTLE(BIG, LITTLE) \ +@@ -64,7 +64,7 @@ struct aarch64_core_data + #define AARCH64_CORE(CORE_NAME, CORE_IDENT, SCHED, ARCH, FLAGS, COSTS, IMP, PART, VARIANT) \ + { CORE_NAME, #ARCH, IMP, PART, VARIANT, feature_deps::cpu_##CORE_IDENT }, + +-static struct aarch64_core_data aarch64_cpu_data = ++static constexpr aarch64_core_data aarch64_cpu_data = + { + #include "aarch64-cores.def" + { NULL, NULL, INVALID_IMP, INVALID_CORE, ALL_VARIANTS, 0 } +@@ -75,14 +75,14 @@ struct aarch64_arch_driver_info + { + const char* id; + const char* name; +- const uint64_t flags; ++ uint64_t flags; + }; + + /* Skip the leading "V" in the architecture name. */ + #define AARCH64_ARCH(NAME, CORE, ARCH_IDENT, ARCH_REV, FLAGS) \ + { #ARCH_IDENT + 1, NAME, feature_deps::ARCH_IDENT ().enable }, + +-static struct aarch64_arch_driver_info aarch64_arches = ++static constexpr aarch64_arch_driver_info aarch64_arches = + { + #include "aarch64-arches.def" + {NULL, NULL, 0} +@@ -92,7 +92,7 @@ static struct aarch64_arch_driver_info aarch64_arches = + /* Return an aarch64_arch_driver_info for the architecture described + by ID, or NULL if ID describes something we don't know about. */ + +-static struct aarch64_arch_driver_info* ++static const aarch64_arch_driver_info * + get_arch_from_id (const char* id) + { + unsigned int i = 0; +@@ -396,8 +396,7 @@ host_detect_local_cpu (int argc, const char **argv) + + if (aarch64_cpu_datai.name == NULL) + { +- aarch64_arch_driver_info* arch_info +- = get_arch_from_id (DEFAULT_ARCH); ++ auto arch_info = get_arch_from_id (DEFAULT_ARCH); + + gcc_assert (arch_info); + +@@ -407,7 +406,7 @@ host_detect_local_cpu (int argc, const char **argv) + else if (arch) + { + const char *arch_id = aarch64_cpu_datai.arch; +- aarch64_arch_driver_info* arch_info = get_arch_from_id (arch_id); ++ auto arch_info = get_arch_from_id (arch_id); + + /* We got some arch indentifier that's not in aarch64-arches.def? */ + if (!arch_info) +-- +2.33.0 +
View file
_service:tar_scm:0121-LoongArch-Define-LOGICAL_OP_NON_SHORT_CIRCUIT.patch
Added
@@ -0,0 +1,71 @@ +From a2baa4807fdfd381c543eb7ea85edf14dc6c8a20 Mon Sep 17 00:00:00 2001 +From: Jiahao Xu <xujiahao@loongson.cn> +Date: Tue, 16 Jan 2024 10:32:31 +0800 +Subject: PATCH 121/188 LoongArch: Define LOGICAL_OP_NON_SHORT_CIRCUIT + +Define LOGICAL_OP_NON_SHORT_CIRCUIT as 0, for a short-circuit branch, use the +short-circuit operation instead of the non-short-circuit operation. + +SPEC2017 performance evaluation shows 1% performance improvement for fprate +GEOMEAN and no obvious regression for others. Especially, 526.blender_r +10.6% +on 3A6000. + +This modification will introduce the following FAIL items: + +FAIL: gcc.dg/tree-ssa/copy-headers-8.c scan-tree-dump-times ch2 "Conditional combines static and invariant" 1 +FAIL: gcc.dg/tree-ssa/copy-headers-8.c scan-tree-dump-times ch2 "Will duplicate bb" 2 +FAIL: gcc.dg/tree-ssa/update-threading.c scan-tree-dump-times optimized "Invalid sum" 0 + +gcc/ChangeLog: + + * config/loongarch/loongarch.h (LOGICAL_OP_NON_SHORT_CIRCUIT): Define. + +gcc/testsuite/ChangeLog: + + * gcc.target/loongarch/short-circuit.c: New test. +--- + gcc/config/loongarch/loongarch.h | 1 + + .../gcc.target/loongarch/short-circuit.c | 19 +++++++++++++++++++ + 2 files changed, 20 insertions(+) + create mode 100644 gcc/testsuite/gcc.target/loongarch/short-circuit.c + +diff --git a/gcc/config/loongarch/loongarch.h b/gcc/config/loongarch/loongarch.h +index f54b078b1..15261fdc0 100644 +--- a/gcc/config/loongarch/loongarch.h ++++ b/gcc/config/loongarch/loongarch.h +@@ -869,6 +869,7 @@ typedef struct { + 1 is the default; other values are interpreted relative to that. */ + + #define BRANCH_COST(speed_p, predictable_p) la_branch_cost ++#define LOGICAL_OP_NON_SHORT_CIRCUIT 0 + + /* Return the asm template for a conditional branch instruction. + OPCODE is the opcode's mnemonic and OPERANDS is the asm template for +diff --git a/gcc/testsuite/gcc.target/loongarch/short-circuit.c b/gcc/testsuite/gcc.target/loongarch/short-circuit.c +new file mode 100644 +index 000000000..bed585ee1 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/short-circuit.c +@@ -0,0 +1,19 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -ffast-math -fdump-tree-gimple" } */ ++ ++int ++short_circuit (float *a) ++{ ++ float t1x = a0; ++ float t2x = a1; ++ float t1y = a2; ++ float t2y = a3; ++ float t1z = a4; ++ float t2z = a5; ++ ++ if (t1x > t2y || t2x < t1y || t1x > t2z || t2x < t1z || t1y > t2z || t2y < t1z) ++ return 0; ++ ++ return 1; ++} ++/* { dg-final { scan-tree-dump-times "if" 6 "gimple" } } */ +-- +2.43.0 +
View file
_service:tar_scm:0122-Backport-SME-aarch64-Make-more-use-of-aarch64_featur.patch
Added
@@ -0,0 +1,394 @@ +From bdb91009cf250fb22c21ae7f5072263492f2b08c Mon Sep 17 00:00:00 2001 +From: Richard Sandiford <richard.sandiford@arm.com> +Date: Thu, 29 Sep 2022 11:32:56 +0100 +Subject: PATCH 023/157 BackportSME aarch64: Make more use of + aarch64_feature_flags + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=fed55a60e5b230bc159617f26e33611073c672fd + +A previous patch added a aarch64_feature_flags typedef, to abstract +the representation of the feature flags. This patch makes existing +code use the typedef too. Hope I've caught them all! + +gcc/ + * common/config/aarch64/aarch64-common.cc: Use aarch64_feature_flags + for feature flags throughout. + * config/aarch64/aarch64-protos.h: Likewise. + * config/aarch64/aarch64-sve-builtins.h: Likewise. + * config/aarch64/aarch64-sve-builtins.cc: Likewise. + * config/aarch64/aarch64.cc: Likewise. + * config/aarch64/aarch64.opt: Likewise. + * config/aarch64/driver-aarch64.cc: Likewise. +--- + gcc/common/config/aarch64/aarch64-common.cc | 19 +++++++------- + gcc/config/aarch64/aarch64-protos.h | 5 ++-- + gcc/config/aarch64/aarch64-sve-builtins.cc | 29 ++++++++++++--------- + gcc/config/aarch64/aarch64-sve-builtins.h | 9 ++++--- + gcc/config/aarch64/aarch64.cc | 29 +++++++++++---------- + gcc/config/aarch64/aarch64.opt | 2 +- + gcc/config/aarch64/driver-aarch64.cc | 10 +++---- + 7 files changed, 56 insertions(+), 47 deletions(-) + +diff --git a/gcc/common/config/aarch64/aarch64-common.cc b/gcc/common/config/aarch64/aarch64-common.cc +index ac3486d71..3efa57b26 100644 +--- a/gcc/common/config/aarch64/aarch64-common.cc ++++ b/gcc/common/config/aarch64/aarch64-common.cc +@@ -204,9 +204,9 @@ aarch64_handle_option (struct gcc_options *opts, + struct aarch64_option_extension + { + const char *name; +- uint64_t flag_canonical; +- uint64_t flags_on; +- uint64_t flags_off; ++ aarch64_feature_flags flag_canonical; ++ aarch64_feature_flags flags_on; ++ aarch64_feature_flags flags_off; + }; + + /* ISA extensions in AArch64. */ +@@ -225,14 +225,14 @@ struct processor_name_to_arch + { + const char *processor_name; + aarch64_arch arch; +- uint64_t flags; ++ aarch64_feature_flags flags; + }; + + struct arch_to_arch_name + { + aarch64_arch arch; + const char *arch_name; +- uint64_t flags; ++ aarch64_feature_flags flags; + }; + + /* Map processor names to the architecture revision they implement and +@@ -262,7 +262,7 @@ static constexpr arch_to_arch_name all_architectures = + a copy of the string is created and stored to INVALID_EXTENSION. */ + + enum aarch64_parse_opt_result +-aarch64_parse_extension (const char *str, uint64_t *isa_flags, ++aarch64_parse_extension (const char *str, aarch64_feature_flags *isa_flags, + std::string *invalid_extension) + { + /* The extension string is parsed left to right. */ +@@ -342,8 +342,9 @@ aarch64_get_all_extension_candidates (auto_vec<const char *> *candidates) + that all the "+" flags come before the "+no" flags. */ + + std::string +-aarch64_get_extension_string_for_isa_flags (uint64_t isa_flags, +- uint64_t default_arch_flags) ++aarch64_get_extension_string_for_isa_flags ++ (aarch64_feature_flags isa_flags, ++ aarch64_feature_flags default_arch_flags) + { + std::string outstr = ""; + +@@ -451,7 +452,7 @@ aarch64_rewrite_selected_cpu (const char *name) + || a_to_an->arch == aarch64_no_arch) + fatal_error (input_location, "unknown value %qs for %<-mcpu%>", name); + +- uint64_t extensions = p_to_a->flags; ++ aarch64_feature_flags extensions = p_to_a->flags; + aarch64_parse_extension (extension_str.c_str (), &extensions, NULL); + + std::string outstr = a_to_an->arch_name +diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h +index e60ce3c36..ef84df731 100644 +--- a/gcc/config/aarch64/aarch64-protos.h ++++ b/gcc/config/aarch64/aarch64-protos.h +@@ -1037,10 +1037,11 @@ bool aarch64_handle_option (struct gcc_options *, struct gcc_options *, + const struct cl_decoded_option *, location_t); + const char *aarch64_rewrite_selected_cpu (const char *name); + enum aarch64_parse_opt_result aarch64_parse_extension (const char *, +- uint64_t *, ++ aarch64_feature_flags *, + std::string *); + void aarch64_get_all_extension_candidates (auto_vec<const char *> *candidates); +-std::string aarch64_get_extension_string_for_isa_flags (uint64_t, uint64_t); ++std::string aarch64_get_extension_string_for_isa_flags (aarch64_feature_flags, ++ aarch64_feature_flags); + + rtl_opt_pass *make_pass_fma_steering (gcc::context *); + rtl_opt_pass *make_pass_track_speculation (gcc::context *); +diff --git a/gcc/config/aarch64/aarch64-sve-builtins.cc b/gcc/config/aarch64/aarch64-sve-builtins.cc +index c06e99339..b927a886e 100644 +--- a/gcc/config/aarch64/aarch64-sve-builtins.cc ++++ b/gcc/config/aarch64/aarch64-sve-builtins.cc +@@ -82,7 +82,7 @@ public: + + /* The architecture extensions that the function requires, as a set of + AARCH64_FL_* flags. */ +- uint64_t required_extensions; ++ aarch64_feature_flags required_extensions; + + /* True if the decl represents an overloaded function that needs to be + resolved by function_resolver. */ +@@ -694,13 +694,16 @@ check_required_registers (location_t location, tree fndecl) + Report an error against LOCATION if not. */ + static bool + check_required_extensions (location_t location, tree fndecl, +- uint64_t required_extensions) ++ aarch64_feature_flags required_extensions) + { +- uint64_t missing_extensions = required_extensions & ~aarch64_isa_flags; ++ auto missing_extensions = required_extensions & ~aarch64_isa_flags; + if (missing_extensions == 0) + return check_required_registers (location, fndecl); + +- static const struct { uint64_t flag; const char *name; } extensions = { ++ static const struct { ++ aarch64_feature_flags flag; ++ const char *name; ++ } extensions = { + #define AARCH64_OPT_EXTENSION(EXT_NAME, IDENT, C, D, E, F) \ + { AARCH64_FL_##IDENT, EXT_NAME }, + #include "aarch64-option-extensions.def" +@@ -992,7 +995,7 @@ function_builder::get_attributes (const function_instance &instance) + registered_function & + function_builder::add_function (const function_instance &instance, + const char *name, tree fntype, tree attrs, +- uint64_t required_extensions, ++ aarch64_feature_flags required_extensions, + bool overloaded_p, + bool placeholder_p) + { +@@ -1034,11 +1037,12 @@ function_builder::add_function (const function_instance &instance, + one-to-one mapping between "short" and "full" names, and if standard + overload resolution therefore isn't necessary. */ + void +-function_builder::add_unique_function (const function_instance &instance, +- tree return_type, +- vec<tree> &argument_types, +- uint64_t required_extensions, +- bool force_direct_overloads) ++function_builder:: ++add_unique_function (const function_instance &instance, ++ tree return_type, ++ vec<tree> &argument_types, ++ aarch64_feature_flags required_extensions, ++ bool force_direct_overloads) + { + /* Add the function under its full (unique) name. */ + char *name = get_name (instance, false); +@@ -1081,8 +1085,9 @@ function_builder::add_unique_function (const function_instance &instance, + features are available as part of resolving the function to the + relevant unique function. */ + void +-function_builder::add_overloaded_function (const function_instance &instance, +- uint64_t required_extensions) ++function_builder:: ++add_overloaded_function (const function_instance &instance, ++ aarch64_feature_flags required_extensions) + { + char *name = get_name (instance, true); + if (registered_function **map_value = m_overload_names.get (name)) +diff --git a/gcc/config/aarch64/aarch64-sve-builtins.h b/gcc/config/aarch64/aarch64-sve-builtins.h +index 24594d584..63d1db776 100644 +--- a/gcc/config/aarch64/aarch64-sve-builtins.h ++++ b/gcc/config/aarch64/aarch64-sve-builtins.h +@@ -263,7 +263,7 @@ struct function_group_info + + /* The architecture extensions that the functions require, as a set of + AARCH64_FL_* flags. */ +- uint64_t required_extensions; ++ aarch64_feature_flags required_extensions; + }; + + /* Describes a single fully-resolved function (i.e. one that has a +@@ -321,8 +321,9 @@ public: + ~function_builder (); + + void add_unique_function (const function_instance &, tree, +- vec<tree> &, uint64_t, bool); +- void add_overloaded_function (const function_instance &, uint64_t); ++ vec<tree> &, aarch64_feature_flags, bool); ++ void add_overloaded_function (const function_instance &, ++ aarch64_feature_flags); + void add_overloaded_functions (const function_group_info &, + mode_suffix_index); + +@@ -338,7 +339,7 @@ private: + + registered_function &add_function (const function_instance &, + const char *, tree, tree, +- uint64_t, bool, bool); ++ aarch64_feature_flags, bool, bool); + + /* The function type to use for functions that are resolved by + function_resolver. */ +diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc +index 71db7ace1..8cb820767 100644 +--- a/gcc/config/aarch64/aarch64.cc ++++ b/gcc/config/aarch64/aarch64.cc +@@ -2929,7 +2929,7 @@ struct processor + aarch64_processor ident; + aarch64_processor sched_core; + aarch64_arch arch; +- uint64_t flags; ++ aarch64_feature_flags flags; + const tune_params *tune; + }; + +@@ -17428,7 +17428,8 @@ static void initialize_aarch64_code_model (struct gcc_options *); + + static enum aarch64_parse_opt_result + aarch64_parse_arch (const char *to_parse, const struct processor **res, +- uint64_t *isa_flags, std::string *invalid_extension) ++ aarch64_feature_flags *isa_flags, ++ std::string *invalid_extension) + { + const char *ext; + const struct processor *arch; +@@ -17451,7 +17452,7 @@ aarch64_parse_arch (const char *to_parse, const struct processor **res, + if (strlen (arch->name) == len + && strncmp (arch->name, to_parse, len) == 0) + { +- uint64_t isa_temp = arch->flags; ++ auto isa_temp = arch->flags; + + if (ext != NULL) + { +@@ -17483,7 +17484,8 @@ aarch64_parse_arch (const char *to_parse, const struct processor **res, + + static enum aarch64_parse_opt_result + aarch64_parse_cpu (const char *to_parse, const struct processor **res, +- uint64_t *isa_flags, std::string *invalid_extension) ++ aarch64_feature_flags *isa_flags, ++ std::string *invalid_extension) + { + const char *ext; + const struct processor *cpu; +@@ -17505,8 +17507,7 @@ aarch64_parse_cpu (const char *to_parse, const struct processor **res, + { + if (strlen (cpu->name) == len && strncmp (cpu->name, to_parse, len) == 0) + { +- uint64_t isa_temp = cpu->flags; +- ++ auto isa_temp = cpu->flags; + + if (ext != NULL) + { +@@ -18137,7 +18138,7 @@ aarch64_print_hint_for_extensions (const std::string &str) + + static bool + aarch64_validate_mcpu (const char *str, const struct processor **res, +- uint64_t *isa_flags) ++ aarch64_feature_flags *isa_flags) + { + std::string invalid_extension; + enum aarch64_parse_opt_result parse_res +@@ -18351,7 +18352,7 @@ aarch64_validate_mbranch_protection (const char *const_str) + + static bool + aarch64_validate_march (const char *str, const struct processor **res, +- uint64_t *isa_flags) ++ aarch64_feature_flags *isa_flags) + { + std::string invalid_extension; + enum aarch64_parse_opt_result parse_res +@@ -18441,8 +18442,8 @@ aarch64_convert_sve_vector_bits (aarch64_sve_vector_bits_enum value) + static void + aarch64_override_options (void) + { +- uint64_t cpu_isa = 0; +- uint64_t arch_isa = 0; ++ aarch64_feature_flags cpu_isa = 0; ++ aarch64_feature_flags arch_isa = 0; + aarch64_isa_flags = 0; + + const struct processor *cpu = NULL; +@@ -18890,7 +18891,7 @@ static bool + aarch64_handle_attr_isa_flags (char *str) + { + enum aarch64_parse_opt_result parse_res; +- uint64_t isa_flags = aarch64_isa_flags; ++ auto isa_flags = aarch64_isa_flags; + + /* We allow "+nothing" in the beginning to clear out all architectural + features if the user wants to handpick specific features. */ +@@ -19162,7 +19163,7 @@ aarch64_process_target_attr (tree args) + { + /* Check if token is possibly an arch extension without + leading '+'. */ +- uint64_t isa_temp = 0; ++ aarch64_feature_flags isa_temp = 0; + auto with_plus = std::string ("+") + token; + enum aarch64_parse_opt_result ext_res + = aarch64_parse_extension (with_plus.c_str (), &isa_temp, nullptr); +@@ -22771,7 +22772,7 @@ aarch64_declare_function_name (FILE *stream, const char* name, + const struct processor *this_arch + = aarch64_get_arch (targ_options->x_selected_arch); + +- uint64_t isa_flags = targ_options->x_aarch64_isa_flags; ++ auto isa_flags = targ_options->x_aarch64_isa_flags; + std::string extension + = aarch64_get_extension_string_for_isa_flags (isa_flags, + this_arch->flags); +@@ -22901,7 +22902,7 @@ aarch64_start_file (void) + + const struct processor *default_arch + = aarch64_get_arch (default_options->x_selected_arch); +- uint64_t default_isa_flags = default_options->x_aarch64_isa_flags; ++ auto default_isa_flags = default_options->x_aarch64_isa_flags; + std::string extension + = aarch64_get_extension_string_for_isa_flags (default_isa_flags, + default_arch->flags); +diff --git a/gcc/config/aarch64/aarch64.opt b/gcc/config/aarch64/aarch64.opt +index 836a3c784..47ec7824f 100644 +--- a/gcc/config/aarch64/aarch64.opt ++++ b/gcc/config/aarch64/aarch64.opt +@@ -28,7 +28,7 @@ TargetVariable + enum aarch64_arch selected_arch = aarch64_no_arch + + TargetVariable +-uint64_t aarch64_isa_flags = 0 ++aarch64_feature_flags aarch64_isa_flags = 0 + + TargetVariable + unsigned aarch64_enable_bti = 2 +diff --git a/gcc/config/aarch64/driver-aarch64.cc b/gcc/config/aarch64/driver-aarch64.cc +index ee9cb65a5..2ae47c020 100644 +--- a/gcc/config/aarch64/driver-aarch64.cc ++++ b/gcc/config/aarch64/driver-aarch64.cc +@@ -31,7 +31,7 @@ + struct aarch64_arch_extension + { + const char *ext; +- uint64_t flag; ++ aarch64_feature_flags flag; + const char *feat_string; + }; + +@@ -50,7 +50,7 @@ struct aarch64_core_data + unsigned char implementer_id; /* Exactly 8 bits */ + unsigned int part_no; /* 12 bits + 12 bits */ + unsigned variant; +- uint64_t flags; ++ aarch64_feature_flags flags; + }; + + #define AARCH64_BIG_LITTLE(BIG, LITTLE) \ +@@ -75,7 +75,7 @@ struct aarch64_arch_driver_info + { + const char* id; + const char* name; +- uint64_t flags; ++ aarch64_feature_flags flags; + }; + + /* Skip the leading "V" in the architecture name. */ +@@ -261,8 +261,8 @@ host_detect_local_cpu (int argc, const char **argv) + unsigned int variants2 = { ALL_VARIANTS, ALL_VARIANTS }; + unsigned int n_variants = 0; + bool processed_exts = false; +- uint64_t extension_flags = 0; +- uint64_t default_flags = 0; ++ aarch64_feature_flags extension_flags = 0; ++ aarch64_feature_flags default_flags = 0; + std::string buf; + size_t sep_pos = -1; + char *fcpu_info; +-- +2.33.0 +
View file
_service:tar_scm:0122-LoongArch-Split-vec_selects-of-bottom-elements-into-.patch
Added
@@ -0,0 +1,84 @@ +From 5cab5d1a9fb9cfaa0d12d229aa0ee19e0dd55cc5 Mon Sep 17 00:00:00 2001 +From: Jiahao Xu <xujiahao@loongson.cn> +Date: Tue, 16 Jan 2024 10:23:20 +0800 +Subject: PATCH 122/188 LoongArch: Split vec_selects of bottom elements into + simple move + +For below pattern, can be treated as a simple move because floating point +and vector share a common register on loongarch64. + +(set (reg/v:SF 32 $f0 orig:93 res 93) + (vec_select:SF (reg:V8SF 32 $f0 115) + (parallel + (const_int 0 0) + ))) + +gcc/ChangeLog: + + * config/loongarch/lasx.md (vec_extract<mode>_0): + New define_insn_and_split patten. + +gcc/testsuite/ChangeLog: + + * gcc.target/loongarch/vect-extract.c: New test. +--- + gcc/config/loongarch/lasx.md | 15 ++++++++++++++ + .../gcc.target/loongarch/vect-extract.c | 20 +++++++++++++++++++ + 2 files changed, 35 insertions(+) + create mode 100644 gcc/testsuite/gcc.target/loongarch/vect-extract.c + +diff --git a/gcc/config/loongarch/lasx.md b/gcc/config/loongarch/lasx.md +index a5128c30c..946811e1a 100644 +--- a/gcc/config/loongarch/lasx.md ++++ b/gcc/config/loongarch/lasx.md +@@ -746,6 +746,21 @@ + DONE; + }) + ++(define_insn_and_split "vec_extract<mode>_0" ++ (set (match_operand:<UNITMODE> 0 "register_operand" "=f") ++ (vec_select:<UNITMODE> ++ (match_operand:FLASX 1 "register_operand" "f") ++ (parallel (const_int 0)))) ++ "ISA_HAS_LSX" ++ "#" ++ "&& reload_completed" ++ (set (match_dup 0) (match_dup 1)) ++{ ++ operands1 = gen_rtx_REG (<UNITMODE>mode, REGNO (operands1)); ++} ++ (set_attr "move_type" "fmove") ++ (set_attr "mode" "<UNITMODE>")) ++ + (define_expand "vec_perm<mode>" + (match_operand:LASX 0 "register_operand") + (match_operand:LASX 1 "register_operand") +diff --git a/gcc/testsuite/gcc.target/loongarch/vect-extract.c b/gcc/testsuite/gcc.target/loongarch/vect-extract.c +new file mode 100644 +index 000000000..ce126e3a4 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/vect-extract.c +@@ -0,0 +1,20 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -ffast-math -mlasx -fno-vect-cost-model -fno-unroll-loops" } */ ++/* { dg-final { scan-assembler-not "xvpickve.w" } } */ ++/* { dg-final { scan-assembler-not "xvpickve.d" } } */ ++ ++float ++sum_float (float *a, int n) { ++ float res = 0.0; ++ for (int i = 0; i < n; i++) ++ res += ai; ++ return res; ++} ++ ++double ++sum_double (double *a, int n) { ++ double res = 0.0; ++ for (int i = 0; i < n; i++) ++ res += ai; ++ return res; ++} +-- +2.43.0 +
View file
_service:tar_scm:0123-Backport-SME-aarch64-Tweak-contents-of-flags_on-off-.patch
Added
@@ -0,0 +1,70 @@ +From eb92c185c1c71edcbd83b1c66fe4f9e7d52a98b3 Mon Sep 17 00:00:00 2001 +From: Richard Sandiford <richard.sandiford@arm.com> +Date: Thu, 29 Sep 2022 11:32:56 +0100 +Subject: PATCH 024/157 BackportSME aarch64: Tweak contents of + flags_on/off fields + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=bb7f43b62a58a0f0326fd3060f0bd43e6f3ef971 + +After previous changes, it's more convenient if the flags_on and +flags_off fields of all_extensions include the feature flag itself. + +gcc/ + * common/config/aarch64/aarch64-common.cc (all_extensions): + Include the feature flag in flags_on and flags_off. + (aarch64_parse_extension): Update accordingly. + (aarch64_get_extension_string_for_isa_flags): Likewise. +--- + gcc/common/config/aarch64/aarch64-common.cc | 14 ++++++-------- + 1 file changed, 6 insertions(+), 8 deletions(-) + +diff --git a/gcc/common/config/aarch64/aarch64-common.cc b/gcc/common/config/aarch64/aarch64-common.cc +index 3efa57b26..752ba5632 100644 +--- a/gcc/common/config/aarch64/aarch64-common.cc ++++ b/gcc/common/config/aarch64/aarch64-common.cc +@@ -213,10 +213,8 @@ struct aarch64_option_extension + static constexpr aarch64_option_extension all_extensions = + { + #define AARCH64_OPT_EXTENSION(NAME, IDENT, C, D, E, F) \ +- {NAME, AARCH64_FL_##IDENT, \ +- feature_deps::IDENT ().explicit_on & ~AARCH64_FL_##IDENT, \ +- feature_deps::get_flags_off (feature_deps::root_off_##IDENT) \ +- & ~AARCH64_FL_##IDENT}, ++ {NAME, AARCH64_FL_##IDENT, feature_deps::IDENT ().explicit_on, \ ++ feature_deps::get_flags_off (feature_deps::root_off_##IDENT)}, + #include "config/aarch64/aarch64-option-extensions.def" + {NULL, 0, 0, 0} + }; +@@ -304,9 +302,9 @@ aarch64_parse_extension (const char *str, aarch64_feature_flags *isa_flags, + { + /* Add or remove the extension. */ + if (adding_ext) +- *isa_flags |= (opt->flags_on | opt->flag_canonical); ++ *isa_flags |= opt->flags_on; + else +- *isa_flags &= ~(opt->flags_off | opt->flag_canonical); ++ *isa_flags &= ~opt->flags_off; + break; + } + } +@@ -380,7 +378,7 @@ aarch64_get_extension_string_for_isa_flags + + if ((flags & isa_flags & (explicit_flags | ~current_flags)) == flags) + { +- current_flags |= opt.flag_canonical | opt.flags_on; ++ current_flags |= opt.flags_on; + added |= opt.flag_canonical; + } + } +@@ -395,7 +393,7 @@ aarch64_get_extension_string_for_isa_flags + for (auto &opt : all_extensions) + if (opt.flag_canonical & current_flags & ~isa_flags) + { +- current_flags &= ~(opt.flag_canonical | opt.flags_off); ++ current_flags &= ~opt.flags_off; + outstr += "+no"; + outstr += opt.name; + } +-- +2.33.0 +
View file
_service:tar_scm:0123-LoongArch-Modify-the-address-calculation-logic-for-o.patch
Added
@@ -0,0 +1,112 @@ +From c4815d70715bed71b8e89888ef19eb43e9171229 Mon Sep 17 00:00:00 2001 +From: Lulu Cheng <chenglulu@loongson.cn> +Date: Tue, 30 Jan 2024 15:02:32 +0800 +Subject: PATCH 123/188 LoongArch: Modify the address calculation logic for + obtaining array element values through fp. + +Modify address calculation logic from (((a x C) + fp) + offset) to ((fp + offset) + a x C). +Thereby modifying the register dependencies and optimizing the code. +The value of C is 2 4 or 8. + +The following is the assembly code before and after a loop modification in spec2006 401.bzip: + + old | new + 735 .L71: | 735 .L71: + 736 slli.d $r12,$r15,2 | 736 slli.d $r12,$r15,2 + 737 ldx.w $r13,$r22,$r12 | 737 ldx.w $r13,$r22,$r12 + 738 addi.d $r15,$r15,-1 | 738 addi.d $r15,$r15,-1 + 739 slli.w $r16,$r15,0 | 739 slli.w $r16,$r15,0 + 740 addi.w $r13,$r13,-1 | 740 addi.w $r13,$r13,-1 + 741 slti $r14,$r13,0 | 741 slti $r14,$r13,0 + 742 add.w $r12,$r26,$r13 | 742 add.w $r12,$r26,$r13 + 743 maskeqz $r12,$r12,$r14 | 743 maskeqz $r12,$r12,$r14 + 744 masknez $r14,$r13,$r14 | 744 masknez $r14,$r13,$r14 + 745 or $r12,$r12,$r14 | 745 or $r12,$r12,$r14 + 746 ldx.bu $r14,$r30,$r12 | 746 ldx.bu $r14,$r30,$r12 + 747 lu12i.w $r13,4096>>12 | 747 alsl.d $r14,$r14,$r18,2 + 748 ori $r13,$r13,432 | 748 ldptr.w $r13,$r14,0 + 749 add.d $r13,$r13,$r3 | 749 addi.w $r17,$r13,-1 + 750 alsl.d $r14,$r14,$r13,2 | 750 stptr.w $r17,$r14,0 + 751 ldptr.w $r13,$r14,-1968 | 751 slli.d $r13,$r13,2 + 752 addi.w $r17,$r13,-1 | 752 stx.w $r12,$r22,$r13 + 753 st.w $r17,$r14,-1968 | 753 ldptr.w $r12,$r19,0 + 754 slli.d $r13,$r13,2 | 754 blt $r12,$r16,.L71 + 755 stx.w $r12,$r22,$r13 | 755 .align 4 + 756 ldptr.w $r12,$r18,-2048 | 756 + 757 blt $r12,$r16,.L71 | 757 + 758 .align 4 | 758 + +This patch is ported from riscv's commit r14-3111. + +gcc/ChangeLog: + + * config/loongarch/loongarch.cc (mem_shadd_or_shadd_rtx_p): New function. + (loongarch_legitimize_address): Add logical transformation code. +--- + gcc/config/loongarch/loongarch.cc | 43 +++++++++++++++++++++++++++++++ + 1 file changed, 43 insertions(+) + +diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc +index 5877b0acf..612a9c138 100644 +--- a/gcc/config/loongarch/loongarch.cc ++++ b/gcc/config/loongarch/loongarch.cc +@@ -3215,6 +3215,22 @@ loongarch_split_symbol (rtx temp, rtx addr, machine_mode mode, rtx *low_out) + return true; + } + ++/* Helper loongarch_legitimize_address. Given X, return true if it ++ is a left shift by 1, 2 or 3 positions or a multiply by 2, 4 or 8. ++ ++ This respectively represent canonical shift-add rtxs or scaled ++ memory addresses. */ ++static bool ++mem_shadd_or_shadd_rtx_p (rtx x) ++{ ++ return ((GET_CODE (x) == ASHIFT ++ || GET_CODE (x) == MULT) ++ && CONST_INT_P (XEXP (x, 1)) ++ && ((GET_CODE (x) == ASHIFT && IN_RANGE (INTVAL (XEXP (x, 1)), 1, 3)) ++ || (GET_CODE (x) == MULT ++ && IN_RANGE (exact_log2 (INTVAL (XEXP (x, 1))), 1, 3)))); ++} ++ + /* This function is used to implement LEGITIMIZE_ADDRESS. If X can + be legitimized in a way that the generic machinery might not expect, + return a new address, otherwise return NULL. MODE is the mode of +@@ -3238,6 +3254,33 @@ loongarch_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, + loongarch_split_plus (x, &base, &offset); + if (offset != 0) + { ++ /* Handle (plus (plus (mult (a) (mem_shadd_constant)) (fp)) (C)) case. */ ++ if (GET_CODE (base) == PLUS && mem_shadd_or_shadd_rtx_p (XEXP (base, 0)) ++ && IMM12_OPERAND (offset)) ++ { ++ rtx index = XEXP (base, 0); ++ rtx fp = XEXP (base, 1); ++ ++ if (REG_P (fp) && REGNO (fp) == VIRTUAL_STACK_VARS_REGNUM) ++ { ++ /* If we were given a MULT, we must fix the constant ++ as we're going to create the ASHIFT form. */ ++ int shift_val = INTVAL (XEXP (index, 1)); ++ if (GET_CODE (index) == MULT) ++ shift_val = exact_log2 (shift_val); ++ ++ rtx reg1 = gen_reg_rtx (Pmode); ++ rtx reg3 = gen_reg_rtx (Pmode); ++ loongarch_emit_binary (PLUS, reg1, fp, GEN_INT (offset)); ++ loongarch_emit_binary (PLUS, reg3, ++ gen_rtx_ASHIFT (Pmode, XEXP (index, 0), ++ GEN_INT (shift_val)), ++ reg1); ++ ++ return reg3; ++ } ++ } ++ + if (!loongarch_valid_base_register_p (base, mode, false)) + base = copy_to_mode_reg (Pmode, base); + addr = loongarch_add_offset (NULL, base, offset); +-- +2.43.0 +
View file
_service:tar_scm:0124-Backport-SME-aarch64-Tweak-handling-of-mgeneral-regs.patch
Added
@@ -0,0 +1,370 @@ +From 91f7471cbc7dec42673b58a1896330d64eb6be2a Mon Sep 17 00:00:00 2001 +From: Richard Sandiford <richard.sandiford@arm.com> +Date: Thu, 29 Sep 2022 11:32:57 +0100 +Subject: PATCH 025/157 BackportSME aarch64: Tweak handling of + -mgeneral-regs-only + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=2a269bda9e7b8f9353699d0c965e7e9246500aa0 + +-mgeneral-regs-only is effectively "+nofp for the compiler without +changing the assembler's ISA flags". Currently that's implemented +by making TARGET_FLOAT, TARGET_SIMD and TARGET_SVE depend on +!TARGET_GENERAL_REGS_ONLY and then making any feature that needs FP +registers depend (directly or indirectly) on one of those three TARGET +macros. The problem is that it's easy to forgot to do the last bit. + +This patch instead represents the distinction between "assemnbler +ISA flags" and "compiler ISA flags" more directly, funnelling +all updates through a new function that sets both sets of flags +together. + +gcc/ + * config/aarch64/aarch64.opt (aarch64_asm_isa_flags): New variable. + * config/aarch64/aarch64.h (aarch64_asm_isa_flags) + (aarch64_isa_flags): Redefine as read-only macros. + (TARGET_SIMD, TARGET_FLOAT, TARGET_SVE): Don't depend on + !TARGET_GENERAL_REGS_ONLY. + * common/config/aarch64/aarch64-common.cc + (aarch64_set_asm_isa_flags): New function. + (aarch64_handle_option): Call it when updating -mgeneral-regs. + * config/aarch64/aarch64-protos.h (aarch64_simd_switcher): Replace + m_old_isa_flags with m_old_asm_isa_flags. + (aarch64_set_asm_isa_flags): Declare. + * config/aarch64/aarch64-builtins.cc + (aarch64_simd_switcher::aarch64_simd_switcher) + (aarch64_simd_switcher::~aarch64_simd_switcher): Save and restore + aarch64_asm_isa_flags instead of aarch64_isa_flags. + * config/aarch64/aarch64-sve-builtins.cc + (check_required_extensions): Use aarch64_asm_isa_flags instead + of aarch64_isa_flags. + * config/aarch64/aarch64.cc (aarch64_set_asm_isa_flags): New function. + (aarch64_override_options, aarch64_handle_attr_arch) + (aarch64_handle_attr_cpu, aarch64_handle_attr_isa_flags): Use + aarch64_set_asm_isa_flags to set the ISA flags. + (aarch64_option_print, aarch64_declare_function_name) + (aarch64_start_file): Use aarch64_asm_isa_flags instead + of aarch64_isa_flags. + (aarch64_can_inline_p): Check aarch64_asm_isa_flags as well as + aarch64_isa_flags. +--- + gcc/common/config/aarch64/aarch64-common.cc | 12 ++++++ + gcc/config/aarch64/aarch64-builtins.cc | 6 +-- + gcc/config/aarch64/aarch64-protos.h | 5 ++- + gcc/config/aarch64/aarch64-sve-builtins.cc | 2 +- + gcc/config/aarch64/aarch64.cc | 45 ++++++++++++++------- + gcc/config/aarch64/aarch64.h | 17 ++++++-- + gcc/config/aarch64/aarch64.opt | 3 ++ + 7 files changed, 68 insertions(+), 22 deletions(-) + +diff --git a/gcc/common/config/aarch64/aarch64-common.cc b/gcc/common/config/aarch64/aarch64-common.cc +index 752ba5632..c64b4987e 100644 +--- a/gcc/common/config/aarch64/aarch64-common.cc ++++ b/gcc/common/config/aarch64/aarch64-common.cc +@@ -137,6 +137,17 @@ reset_tsv110_option () + } + } + ++/* Set OPTS->x_aarch64_asm_isa_flags to FLAGS and update ++ OPTS->x_aarch64_isa_flags accordingly. */ ++void ++aarch64_set_asm_isa_flags (gcc_options *opts, aarch64_feature_flags flags) ++{ ++ opts->x_aarch64_asm_isa_flags = flags; ++ opts->x_aarch64_isa_flags = flags; ++ if (opts->x_target_flags & MASK_GENERAL_REGS_ONLY) ++ opts->x_aarch64_isa_flags &= ~feature_deps::get_flags_off (AARCH64_FL_FP); ++} ++ + /* Implement TARGET_HANDLE_OPTION. + This function handles the target specific options for CPU/target selection. + +@@ -174,6 +185,7 @@ aarch64_handle_option (struct gcc_options *opts, + + case OPT_mgeneral_regs_only: + opts->x_target_flags |= MASK_GENERAL_REGS_ONLY; ++ aarch64_set_asm_isa_flags (opts, opts->x_aarch64_asm_isa_flags); + return true; + + case OPT_mfix_cortex_a53_835769: +diff --git a/gcc/config/aarch64/aarch64-builtins.cc b/gcc/config/aarch64/aarch64-builtins.cc +index 42276e7ca..015e9d975 100644 +--- a/gcc/config/aarch64/aarch64-builtins.cc ++++ b/gcc/config/aarch64/aarch64-builtins.cc +@@ -1336,20 +1336,20 @@ aarch64_scalar_builtin_type_p (aarch64_simd_type t) + /* Enable AARCH64_FL_* flags EXTRA_FLAGS on top of the base Advanced SIMD + set. */ + aarch64_simd_switcher::aarch64_simd_switcher (unsigned int extra_flags) +- : m_old_isa_flags (aarch64_isa_flags), ++ : m_old_asm_isa_flags (aarch64_asm_isa_flags), + m_old_general_regs_only (TARGET_GENERAL_REGS_ONLY) + { + /* Changing the ISA flags should be enough here. We shouldn't need to + pay the compile-time cost of a full target switch. */ +- aarch64_isa_flags = AARCH64_FL_FP | AARCH64_FL_SIMD | extra_flags; + global_options.x_target_flags &= ~MASK_GENERAL_REGS_ONLY; ++ aarch64_set_asm_isa_flags (AARCH64_FL_FP | AARCH64_FL_SIMD | extra_flags); + } + + aarch64_simd_switcher::~aarch64_simd_switcher () + { + if (m_old_general_regs_only) + global_options.x_target_flags |= MASK_GENERAL_REGS_ONLY; +- aarch64_isa_flags = m_old_isa_flags; ++ aarch64_set_asm_isa_flags (m_old_asm_isa_flags); + } + + /* Implement #pragma GCC aarch64 "arm_neon.h". */ +diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h +index ef84df731..86e444a60 100644 +--- a/gcc/config/aarch64/aarch64-protos.h ++++ b/gcc/config/aarch64/aarch64-protos.h +@@ -747,7 +747,7 @@ public: + ~aarch64_simd_switcher (); + + private: +- unsigned long m_old_isa_flags; ++ unsigned long m_old_asm_isa_flags; + bool m_old_general_regs_only; + }; + +@@ -1032,7 +1032,10 @@ extern bool aarch64_classify_address (struct aarch64_address_info *, rtx, + machine_mode, bool, + aarch64_addr_query_type = ADDR_QUERY_M); + ++void aarch64_set_asm_isa_flags (aarch64_feature_flags); ++ + /* Defined in common/config/aarch64-common.cc. */ ++void aarch64_set_asm_isa_flags (gcc_options *, aarch64_feature_flags); + bool aarch64_handle_option (struct gcc_options *, struct gcc_options *, + const struct cl_decoded_option *, location_t); + const char *aarch64_rewrite_selected_cpu (const char *name); +diff --git a/gcc/config/aarch64/aarch64-sve-builtins.cc b/gcc/config/aarch64/aarch64-sve-builtins.cc +index b927a886e..a70e3a6b4 100644 +--- a/gcc/config/aarch64/aarch64-sve-builtins.cc ++++ b/gcc/config/aarch64/aarch64-sve-builtins.cc +@@ -696,7 +696,7 @@ static bool + check_required_extensions (location_t location, tree fndecl, + aarch64_feature_flags required_extensions) + { +- auto missing_extensions = required_extensions & ~aarch64_isa_flags; ++ auto missing_extensions = required_extensions & ~aarch64_asm_isa_flags; + if (missing_extensions == 0) + return check_required_registers (location, fndecl); + +diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc +index 8cb820767..3e83e48ec 100644 +--- a/gcc/config/aarch64/aarch64.cc ++++ b/gcc/config/aarch64/aarch64.cc +@@ -18432,10 +18432,19 @@ aarch64_convert_sve_vector_bits (aarch64_sve_vector_bits_enum value) + return (int) value / 64; + } + ++/* Set the global aarch64_asm_isa_flags to FLAGS and update ++ aarch64_isa_flags accordingly. */ ++ ++void ++aarch64_set_asm_isa_flags (aarch64_feature_flags flags) ++{ ++ aarch64_set_asm_isa_flags (&global_options, flags); ++} ++ + /* Implement TARGET_OPTION_OVERRIDE. This is called once in the beginning + and is used to parse the -m{cpu,tune,arch} strings and setup the initial + tuning structs. In particular it must set selected_tune and +- aarch64_isa_flags that define the available ISA features and tuning ++ aarch64_asm_isa_flags that define the available ISA features and tuning + decisions. It must also set selected_arch as this will be used to + output the .arch asm tags for each function. */ + +@@ -18444,7 +18453,7 @@ aarch64_override_options (void) + { + aarch64_feature_flags cpu_isa = 0; + aarch64_feature_flags arch_isa = 0; +- aarch64_isa_flags = 0; ++ aarch64_set_asm_isa_flags (0); + + const struct processor *cpu = NULL; + const struct processor *arch = NULL; +@@ -18484,25 +18493,25 @@ aarch64_override_options (void) + } + + selected_arch = arch->arch; +- aarch64_isa_flags = arch_isa; ++ aarch64_set_asm_isa_flags (arch_isa); + } + else if (cpu) + { + selected_arch = cpu->arch; +- aarch64_isa_flags = cpu_isa; ++ aarch64_set_asm_isa_flags (cpu_isa); + } + else if (arch) + { + cpu = &all_coresarch->ident; + selected_arch = arch->arch; +- aarch64_isa_flags = arch_isa; ++ aarch64_set_asm_isa_flags (arch_isa); + } + else + { + /* No -mcpu or -march specified, so use the default CPU. */ + cpu = &all_coresTARGET_CPU_DEFAULT; + selected_arch = cpu->arch; +- aarch64_isa_flags = cpu->flags; ++ aarch64_set_asm_isa_flags (cpu->flags); + } + + selected_tune = tune ? tune->ident : cpu->ident; +@@ -18644,7 +18653,7 @@ aarch64_option_print (FILE *file, int indent, struct cl_target_option *ptr) + = aarch64_get_tune_cpu (ptr->x_selected_tune); + const struct processor *arch = aarch64_get_arch (ptr->x_selected_arch); + std::string extension +- = aarch64_get_extension_string_for_isa_flags (ptr->x_aarch64_isa_flags, ++ = aarch64_get_extension_string_for_isa_flags (ptr->x_aarch64_asm_isa_flags, + arch->flags); + + fprintf (file, "%*sselected tune = %s\n", indent, "", cpu->name); +@@ -18752,13 +18761,15 @@ aarch64_handle_attr_arch (const char *str) + { + const struct processor *tmp_arch = NULL; + std::string invalid_extension; ++ aarch64_feature_flags tmp_flags; + enum aarch64_parse_opt_result parse_res +- = aarch64_parse_arch (str, &tmp_arch, &aarch64_isa_flags, &invalid_extension); ++ = aarch64_parse_arch (str, &tmp_arch, &tmp_flags, &invalid_extension); + + if (parse_res == AARCH64_PARSE_OK) + { + gcc_assert (tmp_arch); + selected_arch = tmp_arch->arch; ++ aarch64_set_asm_isa_flags (tmp_flags); + return true; + } + +@@ -18790,14 +18801,16 @@ aarch64_handle_attr_cpu (const char *str) + { + const struct processor *tmp_cpu = NULL; + std::string invalid_extension; ++ aarch64_feature_flags tmp_flags; + enum aarch64_parse_opt_result parse_res +- = aarch64_parse_cpu (str, &tmp_cpu, &aarch64_isa_flags, &invalid_extension); ++ = aarch64_parse_cpu (str, &tmp_cpu, &tmp_flags, &invalid_extension); + + if (parse_res == AARCH64_PARSE_OK) + { + gcc_assert (tmp_cpu); + selected_tune = tmp_cpu->ident; + selected_arch = tmp_cpu->arch; ++ aarch64_set_asm_isa_flags (tmp_flags); + return true; + } + +@@ -18891,7 +18904,7 @@ static bool + aarch64_handle_attr_isa_flags (char *str) + { + enum aarch64_parse_opt_result parse_res; +- auto isa_flags = aarch64_isa_flags; ++ auto isa_flags = aarch64_asm_isa_flags; + + /* We allow "+nothing" in the beginning to clear out all architectural + features if the user wants to handpick specific features. */ +@@ -18906,7 +18919,7 @@ aarch64_handle_attr_isa_flags (char *str) + + if (parse_res == AARCH64_PARSE_OK) + { +- aarch64_isa_flags = isa_flags; ++ aarch64_set_asm_isa_flags (isa_flags); + return true; + } + +@@ -19328,8 +19341,12 @@ aarch64_can_inline_p (tree caller, tree callee) + : target_option_default_node); + + /* Callee's ISA flags should be a subset of the caller's. */ ++ if ((caller_opts->x_aarch64_asm_isa_flags ++ & callee_opts->x_aarch64_asm_isa_flags) ++ != callee_opts->x_aarch64_asm_isa_flags) ++ return false; + if ((caller_opts->x_aarch64_isa_flags & callee_opts->x_aarch64_isa_flags) +- != callee_opts->x_aarch64_isa_flags) ++ != callee_opts->x_aarch64_isa_flags) + return false; + + /* Allow non-strict aligned functions inlining into strict +@@ -22772,7 +22789,7 @@ aarch64_declare_function_name (FILE *stream, const char* name, + const struct processor *this_arch + = aarch64_get_arch (targ_options->x_selected_arch); + +- auto isa_flags = targ_options->x_aarch64_isa_flags; ++ auto isa_flags = targ_options->x_aarch64_asm_isa_flags; + std::string extension + = aarch64_get_extension_string_for_isa_flags (isa_flags, + this_arch->flags); +@@ -22902,7 +22919,7 @@ aarch64_start_file (void) + + const struct processor *default_arch + = aarch64_get_arch (default_options->x_selected_arch); +- auto default_isa_flags = default_options->x_aarch64_isa_flags; ++ auto default_isa_flags = default_options->x_aarch64_asm_isa_flags; + std::string extension + = aarch64_get_extension_string_for_isa_flags (default_isa_flags, + default_arch->flags); +diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h +index 50a2ef444..521031efe 100644 +--- a/gcc/config/aarch64/aarch64.h ++++ b/gcc/config/aarch64/aarch64.h +@@ -22,6 +22,17 @@ + #ifndef GCC_AARCH64_H + #define GCC_AARCH64_H + ++/* Make these flags read-only so that all uses go via ++ aarch64_set_asm_isa_flags. */ ++#ifndef GENERATOR_FILE ++#undef aarch64_asm_isa_flags ++#define aarch64_asm_isa_flags \ ++ ((aarch64_feature_flags) global_options.x_aarch64_asm_isa_flags) ++#undef aarch64_isa_flags ++#define aarch64_isa_flags \ ++ ((aarch64_feature_flags) global_options.x_aarch64_isa_flags) ++#endif ++ + /* Target CPU builtins. */ + #define TARGET_CPU_CPP_BUILTINS() \ + aarch64_cpu_cpp_builtins (pfile) +@@ -51,8 +62,8 @@ + + /* AdvSIMD is supported in the default configuration, unless disabled by + -mgeneral-regs-only or by the +nosimd extension. */ +-#define TARGET_SIMD (!TARGET_GENERAL_REGS_ONLY && AARCH64_ISA_SIMD) +-#define TARGET_FLOAT (!TARGET_GENERAL_REGS_ONLY && AARCH64_ISA_FP) ++#define TARGET_SIMD (AARCH64_ISA_SIMD) ++#define TARGET_FLOAT (AARCH64_ISA_FP) + + #define UNITS_PER_WORD 8 + +@@ -242,7 +253,7 @@ enum class aarch64_feature : unsigned char { + #define TARGET_DOTPROD (TARGET_SIMD && AARCH64_ISA_DOTPROD) + + /* SVE instructions, enabled through +sve. */ +-#define TARGET_SVE (!TARGET_GENERAL_REGS_ONLY && AARCH64_ISA_SVE) ++#define TARGET_SVE (AARCH64_ISA_SVE) + + /* SVE2 instructions, enabled through +sve2. */ + #define TARGET_SVE2 (TARGET_SVE && AARCH64_ISA_SVE2) +diff --git a/gcc/config/aarch64/aarch64.opt b/gcc/config/aarch64/aarch64.opt +index 47ec7824f..5f507abd4 100644 +--- a/gcc/config/aarch64/aarch64.opt ++++ b/gcc/config/aarch64/aarch64.opt +@@ -27,6 +27,9 @@ enum aarch64_processor selected_tune = aarch64_none + TargetVariable + enum aarch64_arch selected_arch = aarch64_no_arch + ++TargetVariable ++aarch64_feature_flags aarch64_asm_isa_flags = 0 ++ + TargetVariable + aarch64_feature_flags aarch64_isa_flags = 0 + +-- +2.33.0 +
View file
_service:tar_scm:0124-LoongArch-Merge-template-got_load_tls_-ld-gd-le-ie.patch
Added
@@ -0,0 +1,214 @@ +From 3f45bbfe924ffe38832b2ad0050589b9f188422e Mon Sep 17 00:00:00 2001 +From: Lulu Cheng <chenglulu@loongson.cn> +Date: Thu, 25 Jan 2024 14:44:39 +0800 +Subject: PATCH 124/188 LoongArch: Merge template got_load_tls_{ld/gd/le/ie}. + +gcc/ChangeLog: + + * config/loongarch/loongarch.cc (loongarch_load_tls): + Load all types of tls symbols through one function. + (loongarch_got_load_tls_gd): Delete. + (loongarch_got_load_tls_ld): Delete. + (loongarch_got_load_tls_ie): Delete. + (loongarch_got_load_tls_le): Delete. + (loongarch_call_tls_get_addr): Modify the called function name. + (loongarch_legitimize_tls_address): Likewise. + * config/loongarch/loongarch.md (@got_load_tls_gd<mode>): Delete. + (@load_tls<mode>): New template. + (@got_load_tls_ld<mode>): Delete. + (@got_load_tls_le<mode>): Delete. + (@got_load_tls_ie<mode>): Delete. +--- + gcc/config/loongarch/loongarch.cc | 47 +++++------------------- + gcc/config/loongarch/loongarch.md | 59 ++++++++++++------------------- + 2 files changed, 30 insertions(+), 76 deletions(-) + +diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc +index 612a9c138..ced7e58c2 100644 +--- a/gcc/config/loongarch/loongarch.cc ++++ b/gcc/config/loongarch/loongarch.cc +@@ -2732,36 +2732,12 @@ loongarch_add_offset (rtx temp, rtx reg, HOST_WIDE_INT offset) + /* The __tls_get_attr symbol. */ + static GTY (()) rtx loongarch_tls_symbol; + +-/* Load an entry from the GOT for a TLS GD access. */ ++/* Load an entry for a TLS access. */ + + static rtx +-loongarch_got_load_tls_gd (rtx dest, rtx sym) ++loongarch_load_tls (rtx dest, rtx sym) + { +- return gen_got_load_tls_gd (Pmode, dest, sym); +-} +- +-/* Load an entry from the GOT for a TLS LD access. */ +- +-static rtx +-loongarch_got_load_tls_ld (rtx dest, rtx sym) +-{ +- return gen_got_load_tls_ld (Pmode, dest, sym); +-} +- +-/* Load an entry from the GOT for a TLS IE access. */ +- +-static rtx +-loongarch_got_load_tls_ie (rtx dest, rtx sym) +-{ +- return gen_got_load_tls_ie (Pmode, dest, sym); +-} +- +-/* Add in the thread pointer for a TLS LE access. */ +- +-static rtx +-loongarch_got_load_tls_le (rtx dest, rtx sym) +-{ +- return gen_got_load_tls_le (Pmode, dest, sym); ++ return gen_load_tls (Pmode, dest, sym); + } + + /* Return an instruction sequence that calls __tls_get_addr. SYM is +@@ -2805,14 +2781,7 @@ loongarch_call_tls_get_addr (rtx sym, enum loongarch_symbol_type type, rtx v0) + emit_insn (gen_tls_low (Pmode, a0, high, loc)); + } + else +- { +- if (type == SYMBOL_TLSLDM) +- emit_insn (loongarch_got_load_tls_ld (a0, loc)); +- else if (type == SYMBOL_TLSGD) +- emit_insn (loongarch_got_load_tls_gd (a0, loc)); +- else +- gcc_unreachable (); +- } ++ emit_insn (loongarch_load_tls (a0, loc)); + + if (flag_plt) + { +@@ -2949,10 +2918,10 @@ loongarch_legitimize_tls_address (rtx loc) + /* la.tls.ie; tp-relative add. */ + tp = gen_rtx_REG (Pmode, THREAD_POINTER_REGNUM); + tmp1 = gen_reg_rtx (Pmode); ++ tmp2 = loongarch_unspec_address (loc, SYMBOL_TLS_IE); + dest = gen_reg_rtx (Pmode); + if (la_opt_explicit_relocs != EXPLICIT_RELOCS_NONE) + { +- tmp2 = loongarch_unspec_address (loc, SYMBOL_TLS_IE); + tmp3 = gen_reg_rtx (Pmode); + rtx high = gen_rtx_HIGH (Pmode, copy_rtx (tmp2)); + high = loongarch_force_temporary (tmp3, high); +@@ -2975,7 +2944,7 @@ loongarch_legitimize_tls_address (rtx loc) + emit_insn (gen_ld_from_got (Pmode, tmp1, high, tmp2)); + } + else +- emit_insn (loongarch_got_load_tls_ie (tmp1, loc)); ++ emit_insn (loongarch_load_tls (tmp1, tmp2)); + emit_insn (gen_add3_insn (dest, tmp1, tp)); + } + break; +@@ -3007,11 +2976,11 @@ loongarch_legitimize_tls_address (rtx loc) + + tp = gen_rtx_REG (Pmode, THREAD_POINTER_REGNUM); + tmp1 = gen_reg_rtx (Pmode); ++ tmp2 = loongarch_unspec_address (loc, SYMBOL_TLS_LE); + dest = gen_reg_rtx (Pmode); + + if (la_opt_explicit_relocs != EXPLICIT_RELOCS_NONE) + { +- tmp2 = loongarch_unspec_address (loc, SYMBOL_TLS_LE); + tmp3 = gen_reg_rtx (Pmode); + rtx high = gen_rtx_HIGH (Pmode, copy_rtx (tmp2)); + high = loongarch_force_temporary (tmp3, high); +@@ -3039,7 +3008,7 @@ loongarch_legitimize_tls_address (rtx loc) + } + } + else +- emit_insn (loongarch_got_load_tls_le (tmp1, loc)); ++ emit_insn (loongarch_load_tls (tmp1, tmp2)); + emit_insn (gen_add3_insn (dest, tmp1, tp)); + } + break; +diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md +index 23d8dc126..4f9a92334 100644 +--- a/gcc/config/loongarch/loongarch.md ++++ b/gcc/config/loongarch/loongarch.md +@@ -51,10 +51,7 @@ + UNSPEC_BITREV_8B + + ;; TLS +- UNSPEC_TLS_GD +- UNSPEC_TLS_LD +- UNSPEC_TLS_LE +- UNSPEC_TLS_IE ++ UNSPEC_TLS + + ;; Stack tie + UNSPEC_TIE +@@ -2701,45 +2698,33 @@ + + ;; Thread-Local Storage + +-(define_insn "@got_load_tls_gd<mode>" ++(define_insn "@load_tls<mode>" + (set (match_operand:P 0 "register_operand" "=r") + (unspec:P + (match_operand:P 1 "symbolic_operand" "") +- UNSPEC_TLS_GD)) ++ UNSPEC_TLS)) + "" +- "la.tls.gd\t%0,%1" +- (set_attr "got" "load") +- (set_attr "mode" "<MODE>")) +- +-(define_insn "@got_load_tls_ld<mode>" +- (set (match_operand:P 0 "register_operand" "=r") +- (unspec:P +- (match_operand:P 1 "symbolic_operand" "") +- UNSPEC_TLS_LD)) +- "" +- "la.tls.ld\t%0,%1" +- (set_attr "got" "load") +- (set_attr "mode" "<MODE>")) ++{ ++ enum loongarch_symbol_type symbol_type; ++ gcc_assert (loongarch_symbolic_constant_p (operands1, &symbol_type)); + +-(define_insn "@got_load_tls_le<mode>" +- (set (match_operand:P 0 "register_operand" "=r") +- (unspec:P +- (match_operand:P 1 "symbolic_operand" "") +- UNSPEC_TLS_LE)) +- "" +- "la.tls.le\t%0,%1" +- (set_attr "got" "load") +- (set_attr "mode" "<MODE>")) ++ switch (symbol_type) ++ { ++ case SYMBOL_TLS_LE: ++ return "la.tls.le\t%0,%1"; ++ case SYMBOL_TLS_IE: ++ return "la.tls.ie\t%0,%1"; ++ case SYMBOL_TLSLDM: ++ return "la.tls.ld\t%0,%1"; ++ case SYMBOL_TLSGD: ++ return "la.tls.gd\t%0,%1"; + +-(define_insn "@got_load_tls_ie<mode>" +- (set (match_operand:P 0 "register_operand" "=r") +- (unspec:P +- (match_operand:P 1 "symbolic_operand" "") +- UNSPEC_TLS_IE)) +- "" +- "la.tls.ie\t%0,%1" +- (set_attr "got" "load") +- (set_attr "mode" "<MODE>")) ++ default: ++ gcc_unreachable (); ++ } ++} ++ (set_attr "mode" "<MODE>") ++ (set_attr "insn_count" "2")) + + ;; Move operand 1 to the high word of operand 0 using movgr2frh.w, preserving the + ;; value in the low word. +-- +2.43.0 +
View file
_service:tar_scm:0125-Backport-SME-aarch64-Remove-redundant-TARGET_-checks.patch
Added
@@ -0,0 +1,453 @@ +From 77a86d955dd1c9cd8c7fc35e6caf0cb707799129 Mon Sep 17 00:00:00 2001 +From: Richard Sandiford <richard.sandiford@arm.com> +Date: Thu, 29 Sep 2022 11:32:57 +0100 +Subject: PATCH 026/157 BackportSME aarch64: Remove redundant TARGET_* + checks + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=a31641840af2c40cf36036fa472df34d4a4402c3 + +After previous patches, it's possible to remove TARGET_* +options that are redundant due to (IMO) obvious dependencies. + +gcc/ + * config/aarch64/aarch64.h (TARGET_CRYPTO, TARGET_SHA3, TARGET_SM4) + (TARGET_DOTPROD): Don't depend on TARGET_SIMD. + (TARGET_AES, TARGET_SHA2): Likewise. Remove TARGET_CRYPTO test. + (TARGET_FP_F16INST): Don't depend on TARGET_FLOAT. + (TARGET_SVE2, TARGET_SVE_F32MM, TARGET_SVE_F64MM): Don't depend + on TARGET_SVE. + (TARGET_SVE2_AES, TARGET_SVE2_BITPERM, TARGET_SVE2_SHA3) + (TARGET_SVE2_SM4): Don't depend on TARGET_SVE2. + (TARGET_F32MM, TARGET_F64MM): Delete. + * config/aarch64/aarch64-c.cc (aarch64_update_cpp_builtins): Guard + float macros with just TARGET_FLOAT rather than TARGET_FLOAT + || TARGET_SIMD. + * config/aarch64/aarch64-simd.md (copysign<mode>3): Depend + only on TARGET_SIMD, rather than TARGET_FLOAT && TARGET_SIMD. + (aarch64_crypto_aes<aes_op>v16qi): Depend only on TARGET_AES, + rather than TARGET_SIMD && TARGET_AES. + (aarch64_crypto_aes<aesmc_op>v16qi): Likewise. + (*aarch64_crypto_aese_fused): Likewise. + (*aarch64_crypto_aesd_fused): Likewise. + (aarch64_crypto_pmulldi): Likewise. + (aarch64_crypto_pmullv2di): Likewise. + (aarch64_crypto_sha1hsi): Likewise TARGET_SHA2. + (aarch64_crypto_sha1hv4si): Likewise. + (aarch64_be_crypto_sha1hv4si): Likewise. + (aarch64_crypto_sha1su1v4si): Likewise. + (aarch64_crypto_sha1<sha1_op>v4si): Likewise. + (aarch64_crypto_sha1su0v4si): Likewise. + (aarch64_crypto_sha256h<sha256_op>v4si): Likewise. + (aarch64_crypto_sha256su0v4si): Likewise. + (aarch64_crypto_sha256su1v4si): Likewise. + (aarch64_crypto_sha512h<sha512_op>qv2di): Likewise TARGET_SHA3. + (aarch64_crypto_sha512su0qv2di): Likewise. + (aarch64_crypto_sha512su1qv2di, eor3q<mode>4): Likewise. + (aarch64_rax1qv2di, aarch64_xarqv2di, bcaxq<mode>4): Likewise. + (aarch64_sm3ss1qv4si): Likewise TARGET_SM4. + (aarch64_sm3tt<sm3tt_op>qv4si): Likewise. + (aarch64_sm3partw<sm3part_op>qv4si): Likewise. + (aarch64_sm4eqv4si, aarch64_sm4ekeyqv4si): Likewise. + * config/aarch64/aarch64.md (<FLOATUORS:optab>dihf2) + (copysign<GPF:mode>3, copysign<GPF:mode>3_insn) + (xorsign<mode>3): Remove redundant TARGET_FLOAT condition. +--- + gcc/config/aarch64/aarch64-c.cc | 2 +- + gcc/config/aarch64/aarch64-simd.md | 56 +++++++++++++++--------------- + gcc/config/aarch64/aarch64.h | 30 ++++++++-------- + gcc/config/aarch64/aarch64.md | 8 ++--- + 4 files changed, 47 insertions(+), 49 deletions(-) + +diff --git a/gcc/config/aarch64/aarch64-c.cc b/gcc/config/aarch64/aarch64-c.cc +index 18c9b975b..2dfe2b8f8 100644 +--- a/gcc/config/aarch64/aarch64-c.cc ++++ b/gcc/config/aarch64/aarch64-c.cc +@@ -92,7 +92,7 @@ aarch64_update_cpp_builtins (cpp_reader *pfile) + + aarch64_def_or_undef (TARGET_FLOAT, "__ARM_FEATURE_FMA", pfile); + +- if (TARGET_FLOAT || TARGET_SIMD) ++ if (TARGET_FLOAT) + { + builtin_define_with_int_value ("__ARM_FP", 0x0E); + builtin_define ("__ARM_FP16_FORMAT_IEEE"); +diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md +index de92802f5..a47b39281 100644 +--- a/gcc/config/aarch64/aarch64-simd.md ++++ b/gcc/config/aarch64/aarch64-simd.md +@@ -693,7 +693,7 @@ + (match_operand:VHSDF 0 "register_operand") + (match_operand:VHSDF 1 "register_operand") + (match_operand:VHSDF 2 "register_operand") +- "TARGET_FLOAT && TARGET_SIMD" ++ "TARGET_SIMD" + { + rtx v_bitmask = gen_reg_rtx (<V_INT_EQUIV>mode); + int bits = GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1; +@@ -8352,7 +8352,7 @@ + (match_operand:V16QI 1 "register_operand" "%0") + (match_operand:V16QI 2 "register_operand" "w")) + CRYPTO_AES)) +- "TARGET_SIMD && TARGET_AES" ++ "TARGET_AES" + "aes<aes_op>\\t%0.16b, %2.16b" + (set_attr "type" "crypto_aese") + ) +@@ -8361,7 +8361,7 @@ + (set (match_operand:V16QI 0 "register_operand" "=w") + (unspec:V16QI (match_operand:V16QI 1 "register_operand" "w") + CRYPTO_AESMC)) +- "TARGET_SIMD && TARGET_AES" ++ "TARGET_AES" + "aes<aesmc_op>\\t%0.16b, %1.16b" + (set_attr "type" "crypto_aesmc") + ) +@@ -8380,7 +8380,7 @@ + (match_operand:V16QI 2 "register_operand" "w")) + UNSPEC_AESE) + UNSPEC_AESMC)) +- "TARGET_SIMD && TARGET_AES ++ "TARGET_AES + && aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC)" + "aese\\t%0.16b, %2.16b\;aesmc\\t%0.16b, %0.16b" + (set_attr "type" "crypto_aese") +@@ -8401,7 +8401,7 @@ + (match_operand:V16QI 2 "register_operand" "w")) + UNSPEC_AESD) + UNSPEC_AESIMC)) +- "TARGET_SIMD && TARGET_AES ++ "TARGET_AES + && aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC)" + "aesd\\t%0.16b, %2.16b\;aesimc\\t%0.16b, %0.16b" + (set_attr "type" "crypto_aese") +@@ -8415,7 +8415,7 @@ + (unspec:SI (match_operand:SI 1 + "register_operand" "w") + UNSPEC_SHA1H)) +- "TARGET_SIMD && TARGET_SHA2" ++ "TARGET_SHA2" + "sha1h\\t%s0, %s1" + (set_attr "type" "crypto_sha1_fast") + ) +@@ -8425,7 +8425,7 @@ + (unspec:SI (vec_select:SI (match_operand:V4SI 1 "register_operand" "w") + (parallel (const_int 0))) + UNSPEC_SHA1H)) +- "TARGET_SIMD && TARGET_SHA2 && !BYTES_BIG_ENDIAN" ++ "TARGET_SHA2 && !BYTES_BIG_ENDIAN" + "sha1h\\t%s0, %s1" + (set_attr "type" "crypto_sha1_fast") + ) +@@ -8435,7 +8435,7 @@ + (unspec:SI (vec_select:SI (match_operand:V4SI 1 "register_operand" "w") + (parallel (const_int 3))) + UNSPEC_SHA1H)) +- "TARGET_SIMD && TARGET_SHA2 && BYTES_BIG_ENDIAN" ++ "TARGET_SHA2 && BYTES_BIG_ENDIAN" + "sha1h\\t%s0, %s1" + (set_attr "type" "crypto_sha1_fast") + ) +@@ -8445,7 +8445,7 @@ + (unspec:V4SI (match_operand:V4SI 1 "register_operand" "0") + (match_operand:V4SI 2 "register_operand" "w") + UNSPEC_SHA1SU1)) +- "TARGET_SIMD && TARGET_SHA2" ++ "TARGET_SHA2" + "sha1su1\\t%0.4s, %2.4s" + (set_attr "type" "crypto_sha1_fast") + ) +@@ -8456,7 +8456,7 @@ + (match_operand:SI 2 "register_operand" "w") + (match_operand:V4SI 3 "register_operand" "w") + CRYPTO_SHA1)) +- "TARGET_SIMD && TARGET_SHA2" ++ "TARGET_SHA2" + "sha1<sha1_op>\\t%q0, %s2, %3.4s" + (set_attr "type" "crypto_sha1_slow") + ) +@@ -8467,7 +8467,7 @@ + (match_operand:V4SI 2 "register_operand" "w") + (match_operand:V4SI 3 "register_operand" "w") + UNSPEC_SHA1SU0)) +- "TARGET_SIMD && TARGET_SHA2" ++ "TARGET_SHA2" + "sha1su0\\t%0.4s, %2.4s, %3.4s" + (set_attr "type" "crypto_sha1_xor") + ) +@@ -8480,7 +8480,7 @@ + (match_operand:V4SI 2 "register_operand" "w") + (match_operand:V4SI 3 "register_operand" "w") + CRYPTO_SHA256)) +- "TARGET_SIMD && TARGET_SHA2" ++ "TARGET_SHA2" + "sha256h<sha256_op>\\t%q0, %q2, %3.4s" + (set_attr "type" "crypto_sha256_slow") + ) +@@ -8490,7 +8490,7 @@ + (unspec:V4SI (match_operand:V4SI 1 "register_operand" "0") + (match_operand:V4SI 2 "register_operand" "w") + UNSPEC_SHA256SU0)) +- "TARGET_SIMD && TARGET_SHA2" ++ "TARGET_SHA2" + "sha256su0\\t%0.4s, %2.4s" + (set_attr "type" "crypto_sha256_fast") + ) +@@ -8501,7 +8501,7 @@ + (match_operand:V4SI 2 "register_operand" "w") + (match_operand:V4SI 3 "register_operand" "w") + UNSPEC_SHA256SU1)) +- "TARGET_SIMD && TARGET_SHA2" ++ "TARGET_SHA2" + "sha256su1\\t%0.4s, %2.4s, %3.4s" + (set_attr "type" "crypto_sha256_slow") + ) +@@ -8514,7 +8514,7 @@ + (match_operand:V2DI 2 "register_operand" "w") + (match_operand:V2DI 3 "register_operand" "w") + CRYPTO_SHA512)) +- "TARGET_SIMD && TARGET_SHA3" ++ "TARGET_SHA3" + "sha512h<sha512_op>\\t%q0, %q2, %3.2d" + (set_attr "type" "crypto_sha512") + ) +@@ -8524,7 +8524,7 @@ + (unspec:V2DI (match_operand:V2DI 1 "register_operand" "0") + (match_operand:V2DI 2 "register_operand" "w") + UNSPEC_SHA512SU0)) +- "TARGET_SIMD && TARGET_SHA3" ++ "TARGET_SHA3" + "sha512su0\\t%0.2d, %2.2d" + (set_attr "type" "crypto_sha512") + ) +@@ -8535,7 +8535,7 @@ + (match_operand:V2DI 2 "register_operand" "w") + (match_operand:V2DI 3 "register_operand" "w") + UNSPEC_SHA512SU1)) +- "TARGET_SIMD && TARGET_SHA3" ++ "TARGET_SHA3" + "sha512su1\\t%0.2d, %2.2d, %3.2d" + (set_attr "type" "crypto_sha512") + ) +@@ -8549,7 +8549,7 @@ + (match_operand:VQ_I 2 "register_operand" "w") + (match_operand:VQ_I 3 "register_operand" "w")) + (match_operand:VQ_I 1 "register_operand" "w"))) +- "TARGET_SIMD && TARGET_SHA3" ++ "TARGET_SHA3" + "eor3\\t%0.16b, %1.16b, %2.16b, %3.16b" + (set_attr "type" "crypto_sha3") + ) +@@ -8561,7 +8561,7 @@ + (match_operand:V2DI 2 "register_operand" "w") + (const_int 1)) + (match_operand:V2DI 1 "register_operand" "w"))) +- "TARGET_SIMD && TARGET_SHA3" ++ "TARGET_SHA3" + "rax1\\t%0.2d, %1.2d, %2.2d" + (set_attr "type" "crypto_sha3") + ) +@@ -8573,7 +8573,7 @@ + (match_operand:V2DI 1 "register_operand" "%w") + (match_operand:V2DI 2 "register_operand" "w")) + (match_operand:SI 3 "aarch64_simd_shift_imm_di" "Usd"))) +- "TARGET_SIMD && TARGET_SHA3" ++ "TARGET_SHA3" + "xar\\t%0.2d, %1.2d, %2.2d, %3" + (set_attr "type" "crypto_sha3") + ) +@@ -8585,7 +8585,7 @@ + (not:VQ_I (match_operand:VQ_I 3 "register_operand" "w")) + (match_operand:VQ_I 2 "register_operand" "w")) + (match_operand:VQ_I 1 "register_operand" "w"))) +- "TARGET_SIMD && TARGET_SHA3" ++ "TARGET_SHA3" + "bcax\\t%0.16b, %1.16b, %2.16b, %3.16b" + (set_attr "type" "crypto_sha3") + ) +@@ -8598,7 +8598,7 @@ + (match_operand:V4SI 2 "register_operand" "w") + (match_operand:V4SI 3 "register_operand" "w") + UNSPEC_SM3SS1)) +- "TARGET_SIMD && TARGET_SM4" ++ "TARGET_SM4" + "sm3ss1\\t%0.4s, %1.4s, %2.4s, %3.4s" + (set_attr "type" "crypto_sm3") + ) +@@ -8611,7 +8611,7 @@ + (match_operand:V4SI 3 "register_operand" "w") + (match_operand:SI 4 "aarch64_imm2" "Ui2") + CRYPTO_SM3TT)) +- "TARGET_SIMD && TARGET_SM4" ++ "TARGET_SM4" + "sm3tt<sm3tt_op>\\t%0.4s, %2.4s, %3.4s%4" + (set_attr "type" "crypto_sm3") + ) +@@ -8622,7 +8622,7 @@ + (match_operand:V4SI 2 "register_operand" "w") + (match_operand:V4SI 3 "register_operand" "w") + CRYPTO_SM3PART)) +- "TARGET_SIMD && TARGET_SM4" ++ "TARGET_SM4" + "sm3partw<sm3part_op>\\t%0.4s, %2.4s, %3.4s" + (set_attr "type" "crypto_sm3") + ) +@@ -8634,7 +8634,7 @@ + (unspec:V4SI (match_operand:V4SI 1 "register_operand" "0") + (match_operand:V4SI 2 "register_operand" "w") + UNSPEC_SM4E)) +- "TARGET_SIMD && TARGET_SM4" ++ "TARGET_SM4" + "sm4e\\t%0.4s, %2.4s" + (set_attr "type" "crypto_sm4") + ) +@@ -8644,7 +8644,7 @@ + (unspec:V4SI (match_operand:V4SI 1 "register_operand" "w") + (match_operand:V4SI 2 "register_operand" "w") + UNSPEC_SM4EKEY)) +- "TARGET_SIMD && TARGET_SM4" ++ "TARGET_SM4" + "sm4ekey\\t%0.4s, %1.4s, %2.4s" + (set_attr "type" "crypto_sm4") + ) +@@ -9230,7 +9230,7 @@ + (unspec:TI (match_operand:DI 1 "register_operand" "w") + (match_operand:DI 2 "register_operand" "w") + UNSPEC_PMULL)) +- "TARGET_SIMD && TARGET_AES" ++ "TARGET_AES" + "pmull\\t%0.1q, %1.1d, %2.1d" + (set_attr "type" "crypto_pmull") + ) +@@ -9240,7 +9240,7 @@ + (unspec:TI (match_operand:V2DI 1 "register_operand" "w") + (match_operand:V2DI 2 "register_operand" "w") + UNSPEC_PMULL2)) +- "TARGET_SIMD && TARGET_AES" ++ "TARGET_AES" + "pmull2\\t%0.1q, %1.2d, %2.2d" + (set_attr "type" "crypto_pmull") + ) +diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h +index 521031efe..2a9d2d031 100644 +--- a/gcc/config/aarch64/aarch64.h ++++ b/gcc/config/aarch64/aarch64.h +@@ -222,19 +222,19 @@ enum class aarch64_feature : unsigned char { + #define AARCH64_ISA_LS64 (aarch64_isa_flags & AARCH64_FL_LS64) + + /* Crypto is an optional extension to AdvSIMD. */ +-#define TARGET_CRYPTO (TARGET_SIMD && AARCH64_ISA_CRYPTO) ++#define TARGET_CRYPTO (AARCH64_ISA_CRYPTO) + + /* SHA2 is an optional extension to AdvSIMD. */ +-#define TARGET_SHA2 ((TARGET_SIMD && AARCH64_ISA_SHA2) || TARGET_CRYPTO) ++#define TARGET_SHA2 (AARCH64_ISA_SHA2) + + /* SHA3 is an optional extension to AdvSIMD. */ +-#define TARGET_SHA3 (TARGET_SIMD && AARCH64_ISA_SHA3) ++#define TARGET_SHA3 (AARCH64_ISA_SHA3) + + /* AES is an optional extension to AdvSIMD. */ +-#define TARGET_AES ((TARGET_SIMD && AARCH64_ISA_AES) || TARGET_CRYPTO) ++#define TARGET_AES (AARCH64_ISA_AES) + + /* SM is an optional extension to AdvSIMD. */ +-#define TARGET_SM4 (TARGET_SIMD && AARCH64_ISA_SM4) ++#define TARGET_SM4 (AARCH64_ISA_SM4) + + /* FP16FML is an optional extension to AdvSIMD. */ + #define TARGET_F16FML (TARGET_SIMD && AARCH64_ISA_F16FML && TARGET_FP_F16INST) +@@ -246,29 +246,29 @@ enum class aarch64_feature : unsigned char { + #define TARGET_LSE (AARCH64_ISA_LSE) + + /* ARMv8.2-A FP16 support that can be enabled through the +fp16 extension. */ +-#define TARGET_FP_F16INST (TARGET_FLOAT && AARCH64_ISA_F16) ++#define TARGET_FP_F16INST (AARCH64_ISA_F16) + #define TARGET_SIMD_F16INST (TARGET_SIMD && AARCH64_ISA_F16) + + /* Dot Product is an optional extension to AdvSIMD enabled through +dotprod. */ +-#define TARGET_DOTPROD (TARGET_SIMD && AARCH64_ISA_DOTPROD) ++#define TARGET_DOTPROD (AARCH64_ISA_DOTPROD) + + /* SVE instructions, enabled through +sve. */ + #define TARGET_SVE (AARCH64_ISA_SVE) + + /* SVE2 instructions, enabled through +sve2. */ +-#define TARGET_SVE2 (TARGET_SVE && AARCH64_ISA_SVE2) ++#define TARGET_SVE2 (AARCH64_ISA_SVE2) + + /* SVE2 AES instructions, enabled through +sve2-aes. */ +-#define TARGET_SVE2_AES (TARGET_SVE2 && AARCH64_ISA_SVE2_AES) ++#define TARGET_SVE2_AES (AARCH64_ISA_SVE2_AES) + + /* SVE2 BITPERM instructions, enabled through +sve2-bitperm. */ +-#define TARGET_SVE2_BITPERM (TARGET_SVE2 && AARCH64_ISA_SVE2_BITPERM) ++#define TARGET_SVE2_BITPERM (AARCH64_ISA_SVE2_BITPERM) + + /* SVE2 SHA3 instructions, enabled through +sve2-sha3. */ +-#define TARGET_SVE2_SHA3 (TARGET_SVE2 && AARCH64_ISA_SVE2_SHA3) ++#define TARGET_SVE2_SHA3 (AARCH64_ISA_SVE2_SHA3) + + /* SVE2 SM4 instructions, enabled through +sve2-sm4. */ +-#define TARGET_SVE2_SM4 (TARGET_SVE2 && AARCH64_ISA_SVE2_SM4) ++#define TARGET_SVE2_SM4 (AARCH64_ISA_SVE2_SM4) + + /* ARMv8.3-A features. */ + #define TARGET_ARMV8_3 (AARCH64_ISA_V8_3A) +@@ -296,12 +296,10 @@ enum class aarch64_feature : unsigned char { + #define TARGET_SVE_I8MM (TARGET_SVE && AARCH64_ISA_I8MM) + + /* F32MM instructions are enabled through +f32mm. */ +-#define TARGET_F32MM (AARCH64_ISA_F32MM) +-#define TARGET_SVE_F32MM (TARGET_SVE && AARCH64_ISA_F32MM) ++#define TARGET_SVE_F32MM (AARCH64_ISA_F32MM) + + /* F64MM instructions are enabled through +f64mm. */ +-#define TARGET_F64MM (AARCH64_ISA_F64MM) +-#define TARGET_SVE_F64MM (TARGET_SVE && AARCH64_ISA_F64MM) ++#define TARGET_SVE_F64MM (AARCH64_ISA_F64MM) + + /* BF16 instructions are enabled through +bf16. */ + #define TARGET_BF16_FP (AARCH64_ISA_BF16) +diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md +index c0c64a798..7ee26284d 100644 +--- a/gcc/config/aarch64/aarch64.md ++++ b/gcc/config/aarch64/aarch64.md +@@ -6417,7 +6417,7 @@ + (define_expand "<optab>dihf2" + (set (match_operand:HF 0 "register_operand") + (FLOATUORS:HF (match_operand:DI 1 "register_operand"))) +- "TARGET_FLOAT && (TARGET_FP_F16INST || TARGET_SIMD)" ++ "TARGET_FP_F16INST || TARGET_SIMD" + { + if (TARGET_FP_F16INST) + emit_insn (gen_aarch64_fp16_<optab>dihf2 (operands0, operands1)); +@@ -6676,7 +6676,7 @@ + (match_operand:GPF 0 "register_operand") + (match_operand:GPF 1 "register_operand") + (match_operand:GPF 2 "register_operand") +- "TARGET_FLOAT && TARGET_SIMD" ++ "TARGET_SIMD" + { + rtx bitmask = gen_reg_rtx (<V_INT_EQUIV>mode); + emit_move_insn (bitmask, GEN_INT (HOST_WIDE_INT_M1U +@@ -6693,7 +6693,7 @@ + (match_operand:GPF 2 "register_operand" "w,w,0,0") + (match_operand:<V_INT_EQUIV> 3 "register_operand" "0,w,w,X") + UNSPEC_COPYSIGN)) +- "TARGET_FLOAT && TARGET_SIMD" ++ "TARGET_SIMD" + "@ + bsl\\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype> + bit\\t%0.<Vbtype>, %2.<Vbtype>, %3.<Vbtype> +@@ -6714,7 +6714,7 @@ + (match_operand:GPF 0 "register_operand") + (match_operand:GPF 1 "register_operand") + (match_operand:GPF 2 "register_operand") +- "TARGET_FLOAT && TARGET_SIMD" ++ "TARGET_SIMD" + { + + machine_mode imode = <V_INT_EQUIV>mode; +-- +2.33.0 +
View file
_service:tar_scm:0125-LoongArch-Add-the-macro-implementation-of-mcmodel-ex.patch
Added
@@ -0,0 +1,453 @@ +From cd177538c2a0f5248e9e7af6247b4d1ba6fe55db Mon Sep 17 00:00:00 2001 +From: Lulu Cheng <chenglulu@loongson.cn> +Date: Thu, 25 Jan 2024 19:10:46 +0800 +Subject: PATCH 125/188 LoongArch: Add the macro implementation of + mcmodel=extreme. + +gcc/ChangeLog: + + * config/loongarch/loongarch-protos.h (loongarch_symbol_extreme_p): + Add function declaration. + * config/loongarch/loongarch.cc (loongarch_symbolic_constant_p): + For SYMBOL_PCREL64, non-zero addend of "la.local $rd,$rt,sym+addend" + is not allowed + (loongarch_load_tls): Added macro support in extreme mode. + (loongarch_call_tls_get_addr): Likewise. + (loongarch_legitimize_tls_address): Likewise. + (loongarch_force_address): Likewise. + (loongarch_legitimize_move): Likewise. + (loongarch_output_mi_thunk): Likewise. + (loongarch_option_override_internal): Remove the code that detects + explicit relocs status. + (loongarch_handle_model_attribute): Likewise. + * config/loongarch/loongarch.md (movdi_symbolic_off64): New template. + * config/loongarch/predicates.md (symbolic_off64_operand): New predicate. + (symbolic_off64_or_reg_operand): Likewise. + +gcc/testsuite/ChangeLog: + + * gcc.target/loongarch/attr-model-5.c: New test. + * gcc.target/loongarch/func-call-extreme-5.c: New test. + * gcc.target/loongarch/func-call-extreme-6.c: New test. + * gcc.target/loongarch/tls-extreme-macro.c: New test. +--- + gcc/config/loongarch/loongarch-protos.h | 1 + + gcc/config/loongarch/loongarch.cc | 110 +++++++++++------- + gcc/config/loongarch/loongarch.md | 48 +++++++- + gcc/config/loongarch/predicates.md | 12 ++ + .../gcc.target/loongarch/attr-model-5.c | 8 ++ + .../loongarch/func-call-extreme-5.c | 7 ++ + .../loongarch/func-call-extreme-6.c | 7 ++ + .../gcc.target/loongarch/tls-extreme-macro.c | 35 ++++++ + 8 files changed, 184 insertions(+), 44 deletions(-) + create mode 100644 gcc/testsuite/gcc.target/loongarch/attr-model-5.c + create mode 100644 gcc/testsuite/gcc.target/loongarch/func-call-extreme-5.c + create mode 100644 gcc/testsuite/gcc.target/loongarch/func-call-extreme-6.c + create mode 100644 gcc/testsuite/gcc.target/loongarch/tls-extreme-macro.c + +diff --git a/gcc/config/loongarch/loongarch-protos.h b/gcc/config/loongarch/loongarch-protos.h +index 5060efbb6..87b94e8b0 100644 +--- a/gcc/config/loongarch/loongarch-protos.h ++++ b/gcc/config/loongarch/loongarch-protos.h +@@ -222,4 +222,5 @@ extern rtx loongarch_build_signbit_mask (machine_mode, bool, bool); + extern void loongarch_emit_swrsqrtsf (rtx, rtx, machine_mode, bool); + extern void loongarch_emit_swdivsf (rtx, rtx, rtx, machine_mode); + extern bool loongarch_explicit_relocs_p (enum loongarch_symbol_type); ++extern bool loongarch_symbol_extreme_p (enum loongarch_symbol_type); + #endif /* ! GCC_LOONGARCH_PROTOS_H */ +diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc +index ced7e58c2..9cfe5bfb2 100644 +--- a/gcc/config/loongarch/loongarch.cc ++++ b/gcc/config/loongarch/loongarch.cc +@@ -1932,8 +1932,13 @@ loongarch_symbolic_constant_p (rtx x, enum loongarch_symbol_type *symbol_type) + relocations. */ + switch (*symbol_type) + { +- case SYMBOL_PCREL: + case SYMBOL_PCREL64: ++ /* When the code model is extreme, the non-zero offset situation ++ has not been handled well, so it is disabled here now. */ ++ if (!loongarch_explicit_relocs_p (SYMBOL_PCREL64)) ++ return false; ++ /* fall through */ ++ case SYMBOL_PCREL: + /* GAS rejects offsets outside the range -2^31, 2^31-1. */ + return sext_hwi (INTVAL (offset), 32) == INTVAL (offset); + +@@ -2735,9 +2740,15 @@ static GTY (()) rtx loongarch_tls_symbol; + /* Load an entry for a TLS access. */ + + static rtx +-loongarch_load_tls (rtx dest, rtx sym) ++loongarch_load_tls (rtx dest, rtx sym, enum loongarch_symbol_type type) + { +- return gen_load_tls (Pmode, dest, sym); ++ /* TLS LE gets a 32 or 64 bit offset here, so one register can do it. */ ++ if (type == SYMBOL_TLS_LE) ++ return gen_load_tls (Pmode, dest, sym); ++ ++ return loongarch_symbol_extreme_p (type) ++ ? gen_movdi_symbolic_off64 (dest, sym, gen_reg_rtx (DImode)) ++ : gen_load_tls (Pmode, dest, sym); + } + + /* Return an instruction sequence that calls __tls_get_addr. SYM is +@@ -2769,8 +2780,6 @@ loongarch_call_tls_get_addr (rtx sym, enum loongarch_symbol_type type, rtx v0) + + if (TARGET_CMODEL_EXTREME) + { +- gcc_assert (TARGET_EXPLICIT_RELOCS); +- + rtx tmp1 = gen_reg_rtx (Pmode); + emit_insn (gen_tls_low (Pmode, tmp1, gen_rtx_REG (Pmode, 0), loc)); + emit_insn (gen_lui_h_lo20 (tmp1, tmp1, loc)); +@@ -2781,7 +2790,7 @@ loongarch_call_tls_get_addr (rtx sym, enum loongarch_symbol_type type, rtx v0) + emit_insn (gen_tls_low (Pmode, a0, high, loc)); + } + else +- emit_insn (loongarch_load_tls (a0, loc)); ++ emit_insn (loongarch_load_tls (a0, loc, type)); + + if (flag_plt) + { +@@ -2848,22 +2857,28 @@ loongarch_call_tls_get_addr (rtx sym, enum loongarch_symbol_type type, rtx v0) + + case CMODEL_EXTREME: + { +- gcc_assert (TARGET_EXPLICIT_RELOCS); +- +- rtx tmp1 = gen_reg_rtx (Pmode); +- rtx high = gen_reg_rtx (Pmode); +- +- loongarch_emit_move (high, +- gen_rtx_HIGH (Pmode, loongarch_tls_symbol)); +- loongarch_emit_move (tmp1, gen_rtx_LO_SUM (Pmode, +- gen_rtx_REG (Pmode, 0), +- loongarch_tls_symbol)); +- emit_insn (gen_lui_h_lo20 (tmp1, tmp1, loongarch_tls_symbol)); +- emit_insn (gen_lui_h_hi12 (tmp1, tmp1, loongarch_tls_symbol)); +- loongarch_emit_move (dest, +- gen_rtx_MEM (Pmode, +- gen_rtx_PLUS (Pmode, +- high, tmp1))); ++ if (loongarch_explicit_relocs_p (SYMBOL_GOT_DISP)) ++ { ++ rtx tmp1 = gen_reg_rtx (Pmode); ++ rtx high = gen_reg_rtx (Pmode); ++ ++ loongarch_emit_move (high, ++ gen_rtx_HIGH (Pmode, ++ loongarch_tls_symbol)); ++ loongarch_emit_move (tmp1, ++ gen_rtx_LO_SUM (Pmode, ++ gen_rtx_REG (Pmode, 0), ++ loongarch_tls_symbol)); ++ emit_insn (gen_lui_h_lo20 (tmp1, tmp1, loongarch_tls_symbol)); ++ emit_insn (gen_lui_h_hi12 (tmp1, tmp1, loongarch_tls_symbol)); ++ loongarch_emit_move (dest, ++ gen_rtx_MEM (Pmode, ++ gen_rtx_PLUS (Pmode, ++ high, tmp1))); ++ } ++ else ++ emit_insn (gen_movdi_symbolic_off64 (dest, loongarch_tls_symbol, ++ gen_reg_rtx (DImode))); + } + break; + +@@ -2928,8 +2943,6 @@ loongarch_legitimize_tls_address (rtx loc) + + if (TARGET_CMODEL_EXTREME) + { +- gcc_assert (TARGET_EXPLICIT_RELOCS); +- + rtx tmp3 = gen_reg_rtx (Pmode); + emit_insn (gen_tls_low (Pmode, tmp3, + gen_rtx_REG (Pmode, 0), tmp2)); +@@ -2944,7 +2957,7 @@ loongarch_legitimize_tls_address (rtx loc) + emit_insn (gen_ld_from_got (Pmode, tmp1, high, tmp2)); + } + else +- emit_insn (loongarch_load_tls (tmp1, tmp2)); ++ emit_insn (loongarch_load_tls (tmp1, tmp2, SYMBOL_TLS_IE)); + emit_insn (gen_add3_insn (dest, tmp1, tp)); + } + break; +@@ -3001,14 +3014,12 @@ loongarch_legitimize_tls_address (rtx loc) + + if (TARGET_CMODEL_EXTREME) + { +- gcc_assert (TARGET_EXPLICIT_RELOCS); +- + emit_insn (gen_lui_h_lo20 (tmp1, tmp1, tmp2)); + emit_insn (gen_lui_h_hi12 (tmp1, tmp1, tmp2)); + } + } + else +- emit_insn (loongarch_load_tls (tmp1, tmp2)); ++ emit_insn (loongarch_load_tls (tmp1, tmp2, SYMBOL_TLS_LE)); + emit_insn (gen_add3_insn (dest, tmp1, tp)); + } + break; +@@ -3081,7 +3092,7 @@ loongarch_force_address (rtx x, machine_mode mode) + return x; + } + +-static bool ++bool + loongarch_symbol_extreme_p (enum loongarch_symbol_type type) + { + switch (type) +@@ -3402,6 +3413,21 @@ loongarch_legitimize_move (machine_mode mode, rtx dest, rtx src) + return true; + } + ++ /* Obtain the address of the symbol through the macro instruction ++ of two registers. */ ++ enum loongarch_symbol_type symbol_type; ++ if (TARGET_64BIT && register_operand (dest, mode) ++ && loongarch_symbolic_constant_p (src, &symbol_type) ++ && loongarch_symbol_extreme_p (symbol_type)) ++ { ++ gcc_assert (can_create_pseudo_p ()); ++ rtx tmp_reg = gen_reg_rtx (DImode); ++ emit_insn (gen_movdi_symbolic_off64 (dest, src, tmp_reg)); ++ set_unique_reg_note (get_last_insn (), REG_UNUSED, tmp_reg); ++ set_unique_reg_note (get_last_insn (), REG_EQUAL, src); ++ return true; ++ } ++ + return false; + } + +@@ -7458,12 +7484,22 @@ loongarch_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED, + allowed, otherwise load the address into a register first. */ + if (use_sibcall_p) + { +- insn = emit_call_insn (gen_sibcall_internal (fnaddr, const0_rtx)); ++ if (TARGET_CMODEL_EXTREME) ++ { ++ emit_insn (gen_movdi_symbolic_off64 (temp1, fnaddr, temp2)); ++ insn = emit_call_insn (gen_sibcall_internal (temp1, const0_rtx)); ++ } ++ else ++ insn = emit_call_insn (gen_sibcall_internal (fnaddr, const0_rtx)); + SIBLING_CALL_P (insn) = 1; + } + else + { +- loongarch_emit_move (temp1, fnaddr); ++ if (TARGET_CMODEL_EXTREME) ++ emit_insn (gen_movdi_symbolic_off64 (temp1, fnaddr, temp2)); ++ else ++ loongarch_emit_move (temp1, fnaddr); ++ + emit_jump_insn (gen_indirect_jump (temp1)); + } + +@@ -7568,10 +7604,6 @@ loongarch_option_override_internal (struct gcc_options *opts, + switch (la_target.cmodel) + { + case CMODEL_EXTREME: +- if (la_opt_explicit_relocs == EXPLICIT_RELOCS_NONE) +- error ("code model %qs is not compatible with %s", +- "extreme", "-mexplicit-relocs=none"); +- + if (opts->x_flag_plt) + { + if (global_options_set.x_flag_plt) +@@ -7989,14 +8021,6 @@ loongarch_handle_model_attribute (tree *node, tree name, tree arg, int, + *no_add_attrs = true; + return NULL_TREE; + } +- if (la_opt_explicit_relocs == EXPLICIT_RELOCS_NONE) +- { +- error_at (DECL_SOURCE_LOCATION (decl), +- "%qE attribute is not compatible with %s", name, +- "-mexplicit-relocs=none"); +- *no_add_attrs = true; +- return NULL_TREE; +- } + + arg = TREE_VALUE (arg); + if (TREE_CODE (arg) != STRING_CST) +diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md +index 4f9a92334..add55e0af 100644 +--- a/gcc/config/loongarch/loongarch.md ++++ b/gcc/config/loongarch/loongarch.md +@@ -82,6 +82,8 @@ + + UNSPEC_SIBCALL_VALUE_MULTIPLE_INTERNAL_1 + UNSPEC_CALL_VALUE_MULTIPLE_INTERNAL_1 ++ ++ UNSPEC_LOAD_SYMBOL_OFFSET64 + ) + + (define_c_enum "unspecv" +@@ -2182,6 +2184,46 @@ + (set_attr "move_type" "move,const,load,store,mgtf,fpload,mftg,fpstore") + (set_attr "mode" "DI")) + ++;; Use two registers to get the global symbol address from the got table. ++;; la.global rd, rt, sym ++ ++(define_insn_and_split "movdi_symbolic_off64" ++ (set (match_operand:DI 0 "register_operand" "=r,r") ++ (match_operand:DI 1 "symbolic_off64_or_reg_operand" "Yd,r")) ++ (unspec:DI (const_int 0) ++ UNSPEC_LOAD_SYMBOL_OFFSET64) ++ (clobber (match_operand:DI 2 "register_operand" "=&r,r")) ++ "TARGET_64BIT && TARGET_CMODEL_EXTREME" ++{ ++ if (which_alternative == 1) ++ return "#"; ++ ++ enum loongarch_symbol_type symbol_type; ++ gcc_assert (loongarch_symbolic_constant_p (operands1, &symbol_type)); ++ ++ switch (symbol_type) ++ { ++ case SYMBOL_PCREL64: ++ return "la.local\t%0,%2,%1"; ++ case SYMBOL_GOT_DISP: ++ return "la.global\t%0,%2,%1"; ++ case SYMBOL_TLS_IE: ++ return "la.tls.ie\t%0,%2,%1"; ++ case SYMBOL_TLSGD: ++ return "la.tls.gd\t%0,%2,%1"; ++ case SYMBOL_TLSLDM: ++ return "la.tls.ld\t%0,%2,%1"; ++ ++ default: ++ gcc_unreachable (); ++ } ++} ++ "&& REG_P (operands1) && find_reg_note (insn, REG_UNUSED, operands2) != 0" ++ (set (match_dup 0) (match_dup 1)) ++ "" ++ (set_attr "mode" "DI") ++ (set_attr "insn_count" "5")) ++ + ;; 32-bit Integer moves + + (define_expand "movsi" +@@ -2724,7 +2766,11 @@ + } + } + (set_attr "mode" "<MODE>") +- (set_attr "insn_count" "2")) ++ (set (attr "insn_count") ++ (if_then_else ++ (match_test "TARGET_CMODEL_EXTREME") ++ (const_int 4) ++ (const_int 2)))) + + ;; Move operand 1 to the high word of operand 0 using movgr2frh.w, preserving the + ;; value in the low word. +diff --git a/gcc/config/loongarch/predicates.md b/gcc/config/loongarch/predicates.md +index 824a85b36..1d9a30695 100644 +--- a/gcc/config/loongarch/predicates.md ++++ b/gcc/config/loongarch/predicates.md +@@ -576,6 +576,18 @@ + || symbolic_pcrel_offset_operand (op, Pmode)); + }) + ++(define_predicate "symbolic_off64_operand" ++ (match_code "const,symbol_ref,label_ref") ++{ ++ enum loongarch_symbol_type type; ++ return loongarch_symbolic_constant_p (op, &type) ++ && loongarch_symbol_extreme_p (type); ++}) ++ ++(define_predicate "symbolic_off64_or_reg_operand" ++ (ior (match_operand 0 "register_operand") ++ (match_operand 0 "symbolic_off64_operand"))) ++ + (define_predicate "equality_operator" + (match_code "eq,ne")) + +diff --git a/gcc/testsuite/gcc.target/loongarch/attr-model-5.c b/gcc/testsuite/gcc.target/loongarch/attr-model-5.c +new file mode 100644 +index 000000000..5f2c3ec9e +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/attr-model-5.c +@@ -0,0 +1,8 @@ ++/* { dg-do compile } */ ++/* { dg-options "-mexplicit-relocs=none -mcmodel=extreme -O2 -fno-pic" } */ ++/* { dg-final { scan-assembler "la.local\t\\\$r\0-9\+,\\\$r\0-9\+,x" } } */ ++/* { dg-final { scan-assembler "la.local\t\\\$r\0-9\+,y" } } */ ++/* { dg-final { scan-assembler "la.local\t\\\$r\0-9\+,\\\$r\0-9\+,counter" } } */ ++ ++#define ATTR_MODEL_TEST ++#include "attr-model-test.c" +diff --git a/gcc/testsuite/gcc.target/loongarch/func-call-extreme-5.c b/gcc/testsuite/gcc.target/loongarch/func-call-extreme-5.c +new file mode 100644 +index 000000000..b1bd9d236 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/func-call-extreme-5.c +@@ -0,0 +1,7 @@ ++/* { dg-do compile } */ ++/* { dg-options "-mabi=lp64d -O0 -fpic -fno-plt -mexplicit-relocs=none -mcmodel=extreme" } */ ++/* { dg-final { scan-assembler "test:.*la.global\t\\\$r\0-9\+,\\\$r\0-9\+,g" } } */ ++/* { dg-final { scan-assembler "test1:.*la.global\t\\\$r\0-9\+,\\\$r\0-9\+,f" } } */ ++/* { dg-final { scan-assembler "test2:.*la.local\t\\\$r\0-9\+,\\\$r\0-9\+,l" } } */ ++ ++#include "func-call-extreme-1.c" +diff --git a/gcc/testsuite/gcc.target/loongarch/func-call-extreme-6.c b/gcc/testsuite/gcc.target/loongarch/func-call-extreme-6.c +new file mode 100644 +index 000000000..6e6ad5c9f +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/func-call-extreme-6.c +@@ -0,0 +1,7 @@ ++/* { dg-do compile } */ ++/* { dg-options "-mabi=lp64d -O0 -fno-pic -fno-plt -mexplicit-relocs=none -mcmodel=extreme" } */ ++/* { dg-final { scan-assembler "test:.*la.global\t\\\$r\0-9\+,\\\$r\0-9\+,g" } } */ ++/* { dg-final { scan-assembler "test1:.*la.local\t\\\$r\0-9\+,\\\$r\0-9\+,f" } } */ ++/* { dg-final { scan-assembler "test2:.*la.local\t\\\$r\0-9\+,\\\$r\0-9\+,l" } } */ ++ ++#include "func-call-extreme-1.c" +diff --git a/gcc/testsuite/gcc.target/loongarch/tls-extreme-macro.c b/gcc/testsuite/gcc.target/loongarch/tls-extreme-macro.c +new file mode 100644 +index 000000000..4341f8212 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/tls-extreme-macro.c +@@ -0,0 +1,35 @@ ++/* { dg-do compile } */ ++/* { dg-options "-march=loongarch64 -mabi=lp64d -O2 -mcmodel=extreme -fno-plt -mexplicit-relocs=none" } */ ++/* { dg-final { scan-assembler "test_le:.*la.tls.le\t\\\$r\0-9\+,\\\.L" { target tls_native } } } */ ++/* { dg-final { scan-assembler "test_ie:.*la.tls.ie\t\\\$r\0-9\+,\\\$r\0-9\+,\\\.L" { target tls_native } } } */ ++/* { dg-final { scan-assembler "test_ld:.*la.tls.ld\t\\\$r\0-9\+,\\\$r\0-9\+,\\\.L.*la.global\t\\\$r\0-9\+,\\\$r\0-9\+,__tls_get_addr" { target tls_native } } } */ ++/* { dg-final { scan-assembler "test_le:.*la.tls.gd\t\\\$r\0-9\+,\\\$r\0-9\+,\\\.L.*la.global\t\\\$r\0-9\+,\\\$r\0-9\+,__tls_get_addr" { target tls_native } } } */ ++ ++__thread int c __attribute__ ((tls_model ("local-exec"))); ++__thread int d __attribute__ ((tls_model ("initial-exec"))); ++__thread int e __attribute__ ((tls_model ("local-dynamic"))); ++__thread int f __attribute__ ((tls_model ("global-dynamic"))); ++ ++int ++test_le (void) ++{ ++ return c; ++} ++ ++int ++test_ie (void) ++{ ++ return d; ++} ++ ++int ++test_ld (void) ++{ ++ return e; ++} ++ ++int ++test_gd (void) ++{ ++ return f; ++} +-- +2.43.0 +
View file
_service:tar_scm:0126-Backport-SME-aarch64-Define-__ARM_FEATURE_RCPC.patch
Added
@@ -0,0 +1,132 @@ +From 53a858c0c371cbea27ed4170a94fb3918b9fcdcf Mon Sep 17 00:00:00 2001 +From: Richard Sandiford <richard.sandiford@arm.com> +Date: Tue, 4 Oct 2022 16:39:18 +0100 +Subject: PATCH 027/157 BackportSME aarch64: Define __ARM_FEATURE_RCPC + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=c1b0a767f04a8ccbaff2a7b71d5c817cdb469630 + +https://github.com/ARM-software/acle/pull/199 adds a new feature +macro for RCPC, for use in things like inline assembly. This patch +adds the associated support to GCC. + +Also, RCPC is required for Armv8.3-A and later, but the armv8.3-a +entry didn't include it. This was probably harmless in practice +since GCC simply ignored the extension until now. (The GAS +definition is OK.) + +gcc/ + * config/aarch64/aarch64.h (AARCH64_ISA_RCPC): New macro. + * config/aarch64/aarch64-arches.def (armv8.3-a): Include RCPC. + * config/aarch64/aarch64-cores.def (thunderx3t110, zeus, neoverse-v1) + (neoverse-512tvb, saphira): Remove RCPC from these Armv8.3-A+ cores. + * config/aarch64/aarch64-c.cc (aarch64_update_cpp_builtins): Define + __ARM_FEATURE_RCPC when appropriate. + +gcc/testsuite/ + * gcc.target/aarch64/pragma_cpp_predefs_1.c: Add RCPC tests. +--- + gcc/config/aarch64/aarch64-arches.def | 2 +- + gcc/config/aarch64/aarch64-c.cc | 1 + + gcc/config/aarch64/aarch64-cores.def | 10 +++++----- + gcc/config/aarch64/aarch64.h | 1 + + .../gcc.target/aarch64/pragma_cpp_predefs_1.c | 20 +++++++++++++++++++ + 5 files changed, 28 insertions(+), 6 deletions(-) + +diff --git a/gcc/config/aarch64/aarch64-arches.def b/gcc/config/aarch64/aarch64-arches.def +index 9f8246618..5a9eff336 100644 +--- a/gcc/config/aarch64/aarch64-arches.def ++++ b/gcc/config/aarch64/aarch64-arches.def +@@ -33,7 +33,7 @@ + AARCH64_ARCH("armv8-a", generic, V8A, 8, (SIMD)) + AARCH64_ARCH("armv8.1-a", generic, V8_1A, 8, (V8A, LSE, CRC, RDMA)) + AARCH64_ARCH("armv8.2-a", generic, V8_2A, 8, (V8_1A)) +-AARCH64_ARCH("armv8.3-a", generic, V8_3A, 8, (V8_2A, PAUTH)) ++AARCH64_ARCH("armv8.3-a", generic, V8_3A, 8, (V8_2A, PAUTH, RCPC)) + AARCH64_ARCH("armv8.4-a", generic, V8_4A, 8, (V8_3A, F16FML, DOTPROD, FLAGM)) + AARCH64_ARCH("armv8.5-a", generic, V8_5A, 8, (V8_4A, SB, SSBS, PREDRES)) + AARCH64_ARCH("armv8.6-a", generic, V8_6A, 8, (V8_5A, I8MM, BF16)) +diff --git a/gcc/config/aarch64/aarch64-c.cc b/gcc/config/aarch64/aarch64-c.cc +index 2dfe2b8f8..4085ad840 100644 +--- a/gcc/config/aarch64/aarch64-c.cc ++++ b/gcc/config/aarch64/aarch64-c.cc +@@ -202,6 +202,7 @@ aarch64_update_cpp_builtins (cpp_reader *pfile) + "__ARM_FEATURE_BF16_SCALAR_ARITHMETIC", pfile); + aarch64_def_or_undef (TARGET_LS64, + "__ARM_FEATURE_LS64", pfile); ++ aarch64_def_or_undef (AARCH64_ISA_RCPC, "__ARM_FEATURE_RCPC", pfile); + + /* Not for ACLE, but required to keep "float.h" correct if we switch + target between implementations that do or do not support ARMv8.2-A +diff --git a/gcc/config/aarch64/aarch64-cores.def b/gcc/config/aarch64/aarch64-cores.def +index 60299160b..b50628d6b 100644 +--- a/gcc/config/aarch64/aarch64-cores.def ++++ b/gcc/config/aarch64/aarch64-cores.def +@@ -133,17 +133,17 @@ AARCH64_CORE("tsv110", tsv110, tsv110, V8_2A, (CRYPTO, F16), tsv110, 0x48, 0 + /* ARMv8.3-A Architecture Processors. */ + + /* Marvell cores (TX3). */ +-AARCH64_CORE("thunderx3t110", thunderx3t110, thunderx3t110, V8_3A, (CRYPTO, RCPC, SM4, SHA3, F16FML), thunderx3t110, 0x43, 0x0b8, 0x0a) ++AARCH64_CORE("thunderx3t110", thunderx3t110, thunderx3t110, V8_3A, (CRYPTO, SM4, SHA3, F16FML), thunderx3t110, 0x43, 0x0b8, 0x0a) + + /* ARMv8.4-A Architecture Processors. */ + + /* Arm ('A') cores. */ +-AARCH64_CORE("zeus", zeus, cortexa57, V8_4A, (SVE, RCPC, I8MM, BF16, PROFILE, SSBS, RNG), neoversev1, 0x41, 0xd40, -1) +-AARCH64_CORE("neoverse-v1", neoversev1, cortexa57, V8_4A, (SVE, RCPC, I8MM, BF16, PROFILE, SSBS, RNG), neoversev1, 0x41, 0xd40, -1) +-AARCH64_CORE("neoverse-512tvb", neoverse512tvb, cortexa57, V8_4A, (SVE, RCPC, I8MM, BF16, PROFILE, SSBS, RNG), neoverse512tvb, INVALID_IMP, INVALID_CORE, -1) ++AARCH64_CORE("zeus", zeus, cortexa57, V8_4A, (SVE, I8MM, BF16, PROFILE, SSBS, RNG), neoversev1, 0x41, 0xd40, -1) ++AARCH64_CORE("neoverse-v1", neoversev1, cortexa57, V8_4A, (SVE, I8MM, BF16, PROFILE, SSBS, RNG), neoversev1, 0x41, 0xd40, -1) ++AARCH64_CORE("neoverse-512tvb", neoverse512tvb, cortexa57, V8_4A, (SVE, I8MM, BF16, PROFILE, SSBS, RNG), neoverse512tvb, INVALID_IMP, INVALID_CORE, -1) + + /* Qualcomm ('Q') cores. */ +-AARCH64_CORE("saphira", saphira, saphira, V8_4A, (CRYPTO, RCPC), saphira, 0x51, 0xC01, -1) ++AARCH64_CORE("saphira", saphira, saphira, V8_4A, (CRYPTO), saphira, 0x51, 0xC01, -1) + + /* ARMv8-A big.LITTLE implementations. */ + +diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h +index 2a9d2d031..19b82b4f3 100644 +--- a/gcc/config/aarch64/aarch64.h ++++ b/gcc/config/aarch64/aarch64.h +@@ -201,6 +201,7 @@ enum class aarch64_feature : unsigned char { + #define AARCH64_ISA_SM4 (aarch64_isa_flags & AARCH64_FL_SM4) + #define AARCH64_ISA_SHA3 (aarch64_isa_flags & AARCH64_FL_SHA3) + #define AARCH64_ISA_F16FML (aarch64_isa_flags & AARCH64_FL_F16FML) ++#define AARCH64_ISA_RCPC (aarch64_isa_flags & AARCH64_FL_RCPC) + #define AARCH64_ISA_RCPC8_4 (aarch64_isa_flags & AARCH64_FL_V8_4A) + #define AARCH64_ISA_RNG (aarch64_isa_flags & AARCH64_FL_RNG) + #define AARCH64_ISA_V8_5A (aarch64_isa_flags & AARCH64_FL_V8_5A) +diff --git a/gcc/testsuite/gcc.target/aarch64/pragma_cpp_predefs_1.c b/gcc/testsuite/gcc.target/aarch64/pragma_cpp_predefs_1.c +index bfb044f5d..307fa3d67 100644 +--- a/gcc/testsuite/gcc.target/aarch64/pragma_cpp_predefs_1.c ++++ b/gcc/testsuite/gcc.target/aarch64/pragma_cpp_predefs_1.c +@@ -248,6 +248,26 @@ + #error "__ARM_FEATURE_CRC32 is not defined but should be!" + #endif + ++#pragma GCC target ("arch=armv8.2-a") ++#ifdef __ARM_FEATURE_RCPC ++#error "__ARM_FEATURE_RCPC is defined but should not be!" ++#endif ++ ++#pragma GCC target ("arch=armv8.2-a+rcpc") ++#ifndef __ARM_FEATURE_RCPC ++#error "__ARM_FEATURE_RCPC is not defined but should be!" ++#endif ++ ++#pragma GCC target ("+norcpc") ++#ifdef __ARM_FEATURE_RCPC ++#error "__ARM_FEATURE_RCPC is defined but should not be!" ++#endif ++ ++#pragma GCC target ("arch=armv8.3-a") ++#ifndef __ARM_FEATURE_RCPC ++#error "__ARM_FEATURE_RCPC is not defined but should be!" ++#endif ++ + int + foo (int a) + { +-- +2.33.0 +
View file
_service:tar_scm:0126-LoongArch-Enable-explicit-reloc-for-extreme-TLS-GD-L.patch
Added
@@ -0,0 +1,126 @@ +From 1ccf16353b2be4308c79f3b011cb800bfa6f94f4 Mon Sep 17 00:00:00 2001 +From: Lulu Cheng <chenglulu@loongson.cn> +Date: Fri, 26 Jan 2024 10:46:51 +0800 +Subject: PATCH 126/188 LoongArch: Enable explicit reloc for extreme TLS + GD/LD with -mexplicit-relocs=auto. + +Binutils does not support relaxation using four instructions to obtain +symbol addresses + +gcc/ChangeLog: + + * config/loongarch/loongarch.cc (loongarch_explicit_relocs_p): + When the code model of the symbol is extreme and -mexplicit-relocs=auto, + the macro instruction loading symbol address is not applicable. + (loongarch_call_tls_get_addr): Adjust code. + (loongarch_legitimize_tls_address): Likewise. + +gcc/testsuite/ChangeLog: + + * gcc.target/loongarch/explicit-relocs-extreme-auto-tls-ld-gd.c: New test. + * gcc.target/loongarch/explicit-relocs-medium-auto-tls-ld-gd.c: New test. +--- + gcc/config/loongarch/loongarch.cc | 19 +++++++++---------- + .../explicit-relocs-extreme-auto-tls-ld-gd.c | 5 +++++ + .../explicit-relocs-medium-auto-tls-ld-gd.c | 5 +++++ + 3 files changed, 19 insertions(+), 10 deletions(-) + create mode 100644 gcc/testsuite/gcc.target/loongarch/explicit-relocs-extreme-auto-tls-ld-gd.c + create mode 100644 gcc/testsuite/gcc.target/loongarch/explicit-relocs-medium-auto-tls-ld-gd.c + +diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc +index 9cfe5bfb2..84b949021 100644 +--- a/gcc/config/loongarch/loongarch.cc ++++ b/gcc/config/loongarch/loongarch.cc +@@ -1968,6 +1968,10 @@ loongarch_explicit_relocs_p (enum loongarch_symbol_type type) + if (la_opt_explicit_relocs != EXPLICIT_RELOCS_AUTO) + return la_opt_explicit_relocs == EXPLICIT_RELOCS_ALWAYS; + ++ /* The linker don't know how to relax accesses in extreme code model. */ ++ if (loongarch_symbol_extreme_p (type)) ++ return true; ++ + switch (type) + { + case SYMBOL_TLS_IE: +@@ -1979,11 +1983,6 @@ loongarch_explicit_relocs_p (enum loongarch_symbol_type type) + does not relax 64-bit pc-relative accesses as at now. */ + return true; + case SYMBOL_GOT_DISP: +- /* The linker don't know how to relax GOT accesses in extreme +- code model. */ +- if (TARGET_CMODEL_EXTREME) +- return true; +- + /* If we are performing LTO for a final link, and we have the + linker plugin so we know the resolution of the symbols, then + all GOT references are binding to external symbols or +@@ -2772,7 +2771,7 @@ loongarch_call_tls_get_addr (rtx sym, enum loongarch_symbol_type type, rtx v0) + + start_sequence (); + +- if (la_opt_explicit_relocs == EXPLICIT_RELOCS_ALWAYS) ++ if (loongarch_explicit_relocs_p (type)) + { + /* Split tls symbol to high and low. */ + rtx high = gen_rtx_HIGH (Pmode, copy_rtx (loc)); +@@ -2805,7 +2804,7 @@ loongarch_call_tls_get_addr (rtx sym, enum loongarch_symbol_type type, rtx v0) + case CMODEL_MEDIUM: + { + rtx reg = gen_reg_rtx (Pmode); +- if (TARGET_EXPLICIT_RELOCS) ++ if (la_opt_explicit_relocs != EXPLICIT_RELOCS_NONE) + { + emit_insn (gen_pcalau12i (Pmode, reg, loongarch_tls_symbol)); + rtx call = gen_call_value_internal_1 (Pmode, v0, reg, +@@ -2841,7 +2840,7 @@ loongarch_call_tls_get_addr (rtx sym, enum loongarch_symbol_type type, rtx v0) + case CMODEL_NORMAL: + case CMODEL_MEDIUM: + { +- if (TARGET_EXPLICIT_RELOCS) ++ if (loongarch_explicit_relocs_p (SYMBOL_GOT_DISP)) + { + rtx high = gen_reg_rtx (Pmode); + loongarch_emit_move (high, +@@ -2935,7 +2934,7 @@ loongarch_legitimize_tls_address (rtx loc) + tmp1 = gen_reg_rtx (Pmode); + tmp2 = loongarch_unspec_address (loc, SYMBOL_TLS_IE); + dest = gen_reg_rtx (Pmode); +- if (la_opt_explicit_relocs != EXPLICIT_RELOCS_NONE) ++ if (loongarch_explicit_relocs_p (SYMBOL_TLS_IE)) + { + tmp3 = gen_reg_rtx (Pmode); + rtx high = gen_rtx_HIGH (Pmode, copy_rtx (tmp2)); +@@ -2992,7 +2991,7 @@ loongarch_legitimize_tls_address (rtx loc) + tmp2 = loongarch_unspec_address (loc, SYMBOL_TLS_LE); + dest = gen_reg_rtx (Pmode); + +- if (la_opt_explicit_relocs != EXPLICIT_RELOCS_NONE) ++ if (loongarch_explicit_relocs_p (SYMBOL_TLS_LE)) + { + tmp3 = gen_reg_rtx (Pmode); + rtx high = gen_rtx_HIGH (Pmode, copy_rtx (tmp2)); +diff --git a/gcc/testsuite/gcc.target/loongarch/explicit-relocs-extreme-auto-tls-ld-gd.c b/gcc/testsuite/gcc.target/loongarch/explicit-relocs-extreme-auto-tls-ld-gd.c +new file mode 100644 +index 000000000..35bd4570a +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/explicit-relocs-extreme-auto-tls-ld-gd.c +@@ -0,0 +1,5 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -fPIC -mexplicit-relocs=auto -mcmodel=extreme -fno-plt" } */ ++/* { dg-final { scan-assembler-not "la.tls.\lg\d" { target tls_native } } } */ ++ ++#include "./explicit-relocs-auto-tls-ld-gd.c" +diff --git a/gcc/testsuite/gcc.target/loongarch/explicit-relocs-medium-auto-tls-ld-gd.c b/gcc/testsuite/gcc.target/loongarch/explicit-relocs-medium-auto-tls-ld-gd.c +new file mode 100644 +index 000000000..47bffae8a +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/explicit-relocs-medium-auto-tls-ld-gd.c +@@ -0,0 +1,5 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -fPIC -mexplicit-relocs=auto -mcmodel=medium -fplt" } */ ++/* { dg-final { scan-assembler-not "la.global" { target tls_native } } } */ ++ ++#include "./explicit-relocs-auto-tls-ld-gd.c" +-- +2.43.0 +
View file
_service:tar_scm:0127-Backport-SME-Add-Ampere-1-and-Ampere-1A-core-definit.patch
Added
@@ -0,0 +1,29 @@ +From f6b2917888292c694bae1debe8abb0d6c2c6f59e Mon Sep 17 00:00:00 2001 +From: xiezhiheng <xiezhiheng@huawei.com> +Date: Tue, 20 Feb 2024 11:03:47 +0800 +Subject: PATCH 028/157 BackportSME Add Ampere-1 and Ampere-1A core + definition in aarch64-cores.def + +From commit db2f5d661239737157cf131de7d4df1c17d8d88d and +590a06afbf0e96813b5879742f38f3665512c854 +--- + gcc/config/aarch64/aarch64-cores.def | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/gcc/config/aarch64/aarch64-cores.def b/gcc/config/aarch64/aarch64-cores.def +index b50628d6b..f069c81cf 100644 +--- a/gcc/config/aarch64/aarch64-cores.def ++++ b/gcc/config/aarch64/aarch64-cores.def +@@ -69,7 +69,8 @@ AARCH64_CORE("thunderxt81", thunderxt81, thunderx, V8A, (CRC, CRYPTO), thu + AARCH64_CORE("thunderxt83", thunderxt83, thunderx, V8A, (CRC, CRYPTO), thunderx, 0x43, 0x0a3, -1) + + /* Ampere Computing ('\xC0') cores. */ +-AARCH64_CORE("ampere1", ampere1, cortexa57, V8_6A, (), ampere1, 0xC0, 0xac3, -1) ++AARCH64_CORE("ampere1", ampere1, cortexa57, V8_6A, (F16, RNG, AES, SHA3), ampere1, 0xC0, 0xac3, -1) ++AARCH64_CORE("ampere1a", ampere1a, cortexa57, V8_6A, (F16, RNG, AES, SHA3, MEMTAG), ampere1a, 0xC0, 0xac4, -1) + /* Do not swap around "emag" and "xgene1", + this order is required to handle variant correctly. */ + AARCH64_CORE("emag", emag, xgene1, V8A, (CRC, CRYPTO), emag, 0x50, 0x000, 3) +-- +2.33.0 +
View file
_service:tar_scm:0127-LoongArch-Added-support-for-loading-__get_tls_addr-s.patch
Added
@@ -0,0 +1,72 @@ +From d802fd5eb24bba0c274edeea5aff33e794927aaa Mon Sep 17 00:00:00 2001 +From: Lulu Cheng <chenglulu@loongson.cn> +Date: Fri, 26 Jan 2024 11:14:00 +0800 +Subject: PATCH 127/188 LoongArch: Added support for loading __get_tls_addr + symbol address using call36. + +gcc/ChangeLog: + + * config/loongarch/loongarch.cc (loongarch_call_tls_get_addr): + Add support for call36. + +gcc/testsuite/ChangeLog: + + * gcc.target/loongarch/explicit-relocs-medium-call36-auto-tls-ld-gd.c: New test. +--- + gcc/config/loongarch/loongarch.cc | 22 ++++++++++++++----- + ...icit-relocs-medium-call36-auto-tls-ld-gd.c | 5 +++++ + 2 files changed, 21 insertions(+), 6 deletions(-) + create mode 100644 gcc/testsuite/gcc.target/loongarch/explicit-relocs-medium-call36-auto-tls-ld-gd.c + +diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc +index 84b949021..0050813df 100644 +--- a/gcc/config/loongarch/loongarch.cc ++++ b/gcc/config/loongarch/loongarch.cc +@@ -2803,17 +2803,27 @@ loongarch_call_tls_get_addr (rtx sym, enum loongarch_symbol_type type, rtx v0) + + case CMODEL_MEDIUM: + { +- rtx reg = gen_reg_rtx (Pmode); + if (la_opt_explicit_relocs != EXPLICIT_RELOCS_NONE) + { +- emit_insn (gen_pcalau12i (Pmode, reg, loongarch_tls_symbol)); +- rtx call = gen_call_value_internal_1 (Pmode, v0, reg, +- loongarch_tls_symbol, +- const0_rtx); +- insn = emit_call_insn (call); ++ rtx call; ++ ++ if (HAVE_AS_SUPPORT_CALL36) ++ call = gen_call_value_internal (v0, loongarch_tls_symbol, ++ const0_rtx); ++ else ++ { ++ rtx reg = gen_reg_rtx (Pmode); ++ emit_insn (gen_pcalau12i (Pmode, reg, ++ loongarch_tls_symbol)); ++ call = gen_call_value_internal_1 (Pmode, v0, reg, ++ loongarch_tls_symbol, ++ const0_rtx); ++ } ++ insn = emit_call_insn (call); + } + else + { ++ rtx reg = gen_reg_rtx (Pmode); + emit_move_insn (reg, loongarch_tls_symbol); + insn = emit_call_insn (gen_call_value_internal (v0, + reg, +diff --git a/gcc/testsuite/gcc.target/loongarch/explicit-relocs-medium-call36-auto-tls-ld-gd.c b/gcc/testsuite/gcc.target/loongarch/explicit-relocs-medium-call36-auto-tls-ld-gd.c +new file mode 100644 +index 000000000..d1a482083 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/explicit-relocs-medium-call36-auto-tls-ld-gd.c +@@ -0,0 +1,5 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -fPIC -mexplicit-relocs=auto -mcmodel=medium -fplt" } */ ++/* { dg-final { scan-assembler "pcaddu18i\t\\\$r1,%call36\\\(__tls_get_addr\\\)" { target { tls_native && loongarch_call36_support } } } } */ ++ ++#include "./explicit-relocs-auto-tls-ld-gd.c" +-- +2.43.0 +
View file
_service:tar_scm:0128-Backport-SME-aarch64-Fix-nosimd-handling-of-FPR-move.patch
Added
@@ -0,0 +1,968 @@ +From 81a4b464d01cf00f8b355115588e67bf2c021acd Mon Sep 17 00:00:00 2001 +From: Richard Sandiford <richard.sandiford@arm.com> +Date: Wed, 7 Sep 2022 10:52:04 +0100 +Subject: PATCH 029/157 BackportSME aarch64: Fix +nosimd handling of FPR + moves + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=d6106132907f6bd01109f2616d20a87edecc6fc6 + +8-bit and 16-bit FPR moves would ICE for +nosimd+fp, and some other +moves would handle FPR<-zero inefficiently. This is very much a +niche case at the moment, but something like it becomes more +important with SME streaming mode. + +The si, di and vector tests already passed, they're just included for +completeness. + +We're a bit inconsistent about whether alternatives involving FPRs +are marked with arch==fp or arch=* (i.e. default). E.g. FPR loads +and stores are sometimes * and sometimes fp. + +IMO * makes more sense. FPRs should not be used at all without +TARGET_FLOAT, so TARGET_FLOAT represents the base architecture +when FPRs are enabled. I think it's more useful if non-default +arches represent a genuine restriction. + +gcc/ + * config/aarch64/aarch64.md (*mov<SHORT:mode>_aarch64): Extend + w<-w, r<-w and w<-r alternatives to !simd, using 32-bit moves + in that case. Extend w<-r to w<-Z. + (*mov<HFBF:mode>_aarch64): Likewise, but with Y instead of Z. + (*movti_aarch64): Use an FMOV from XZR for w<-Z if MOVI is not + available. + (define_split): Do not apply the floating-point immediate-to-register + split to zeros, even if MOVI is not available. + +gcc/testsuite/ + * gcc.target/aarch64/movqi_1.c: New test. + * gcc.target/aarch64/movhi_1.c: Likewise. + * gcc.target/aarch64/movsi_1.c: Likewise. + * gcc.target/aarch64/movdi_2.c: Likewise. + * gcc.target/aarch64/movti_2.c: Likewise. + * gcc.target/aarch64/movhf_1.c: Likewise. + * gcc.target/aarch64/movsf_1.c: Likewise. + * gcc.target/aarch64/movdf_1.c: Likewise. + * gcc.target/aarch64/movtf_2.c: Likewise. + * gcc.target/aarch64/movv8qi_1.c: Likewise. + * gcc.target/aarch64/movv16qi_1.c: Likewise. +--- + gcc/config/aarch64/aarch64.md | 38 ++++---- + gcc/testsuite/gcc.target/aarch64/movdf_1.c | 53 ++++++++++++ + gcc/testsuite/gcc.target/aarch64/movdi_2.c | 61 +++++++++++++ + gcc/testsuite/gcc.target/aarch64/movhf_1.c | 53 ++++++++++++ + gcc/testsuite/gcc.target/aarch64/movhi_1.c | 61 +++++++++++++ + gcc/testsuite/gcc.target/aarch64/movqi_1.c | 61 +++++++++++++ + gcc/testsuite/gcc.target/aarch64/movsf_1.c | 53 ++++++++++++ + gcc/testsuite/gcc.target/aarch64/movsi_1.c | 61 +++++++++++++ + gcc/testsuite/gcc.target/aarch64/movtf_2.c | 81 +++++++++++++++++ + gcc/testsuite/gcc.target/aarch64/movti_2.c | 86 +++++++++++++++++++ + gcc/testsuite/gcc.target/aarch64/movv16qi_1.c | 82 ++++++++++++++++++ + gcc/testsuite/gcc.target/aarch64/movv8qi_1.c | 55 ++++++++++++ + 12 files changed, 729 insertions(+), 16 deletions(-) + create mode 100644 gcc/testsuite/gcc.target/aarch64/movdf_1.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/movdi_2.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/movhf_1.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/movhi_1.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/movqi_1.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/movsf_1.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/movsi_1.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/movtf_2.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/movti_2.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/movv16qi_1.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/movv8qi_1.c + +diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md +index 7ee26284d..7267a74d6 100644 +--- a/gcc/config/aarch64/aarch64.md ++++ b/gcc/config/aarch64/aarch64.md +@@ -1201,7 +1201,7 @@ + + (define_insn "*mov<mode>_aarch64" + (set (match_operand:SHORT 0 "nonimmediate_operand" "=r,r, w,r ,r,w, m,m,r,w,w") +- (match_operand:SHORT 1 "aarch64_mov_operand" " r,M,D<hq>,Usv,m,m,rZ,w,w,r,w")) ++ (match_operand:SHORT 1 "aarch64_mov_operand" " r,M,D<hq>,Usv,m,m,rZ,w,w,rZ,w")) + "(register_operand (operands0, <MODE>mode) + || aarch64_reg_or_zero (operands1, <MODE>mode))" + { +@@ -1225,11 +1225,11 @@ + case 7: + return "str\t%<size>1, %0"; + case 8: +- return "umov\t%w0, %1.<v>0"; ++ return TARGET_SIMD ? "umov\t%w0, %1.<v>0" : "fmov\t%w0, %s1"; + case 9: +- return "dup\t%0.<Vallxd>, %w1"; ++ return TARGET_SIMD ? "dup\t%0.<Vallxd>, %w1" : "fmov\t%s0, %w1"; + case 10: +- return "dup\t%<Vetype>0, %1.<v>0"; ++ return TARGET_SIMD ? "dup\t%<Vetype>0, %1.<v>0" : "fmov\t%s0, %s1"; + default: + gcc_unreachable (); + } +@@ -1237,7 +1237,7 @@ + ;; The "mov_imm" type for CNT is just a placeholder. + (set_attr "type" "mov_reg,mov_imm,neon_move,mov_imm,load_4,load_4,store_4, + store_4,neon_to_gp<q>,neon_from_gp<q>,neon_dup") +- (set_attr "arch" "*,*,simd,sve,*,*,*,*,simd,simd,simd") ++ (set_attr "arch" "*,*,simd,sve,*,*,*,*,*,*,*") + ) + + (define_expand "mov<mode>" +@@ -1399,14 +1399,15 @@ + + (define_insn "*movti_aarch64" + (set (match_operand:TI 0 +- "nonimmediate_operand" "= r,w,w, r,w,r,m,m,w,m") ++ "nonimmediate_operand" "= r,w,w,w, r,w,r,m,m,w,m") + (match_operand:TI 1 +- "aarch64_movti_operand" " rUti,Z,r, w,w,m,r,Z,m,w")) ++ "aarch64_movti_operand" " rUti,Z,Z,r, w,w,m,r,Z,m,w")) + "(register_operand (operands0, TImode) + || aarch64_reg_or_zero (operands1, TImode))" + "@ + # + movi\\t%0.2d, #0 ++ fmov\t%d0, xzr + # + # + mov\\t%0.16b, %1.16b +@@ -1415,11 +1416,11 @@ + stp\\txzr, xzr, %0 + ldr\\t%q0, %1 + str\\t%q1, %0" +- (set_attr "type" "multiple,neon_move,f_mcr,f_mrc,neon_logic_q, \ ++ (set_attr "type" "multiple,neon_move,f_mcr,f_mcr,f_mrc,neon_logic_q, \ + load_16,store_16,store_16,\ + load_16,store_16") +- (set_attr "length" "8,4,8,8,4,4,4,4,4,4") +- (set_attr "arch" "*,simd,*,*,simd,*,*,*,fp,fp") ++ (set_attr "length" "8,4,4,8,8,4,4,4,4,4,4") ++ (set_attr "arch" "*,simd,*,*,*,simd,*,*,*,fp,fp") + ) + + ;; Split a TImode register-register or register-immediate move into +@@ -1458,16 +1459,19 @@ + ) + + (define_insn "*mov<mode>_aarch64" +- (set (match_operand:HFBF 0 "nonimmediate_operand" "=w,w , w,?r,w,w ,w ,w,m,r,m ,r") +- (match_operand:HFBF 1 "general_operand" "Y ,?rY,?r, w,w,Ufc,Uvi,m,w,m,rY,r")) ++ (set (match_operand:HFBF 0 "nonimmediate_operand" "=w,w ,w ,w ,?r,?r,w,w,w ,w ,w,m,r,m ,r") ++ (match_operand:HFBF 1 "general_operand" "Y ,?rY,?r,?rY, w, w,w,w,Ufc,Uvi,m,w,m,rY,r")) + "TARGET_FLOAT && (register_operand (operands0, <MODE>mode) + || aarch64_reg_or_fp_zero (operands1, <MODE>mode))" + "@ + movi\\t%0.4h, #0 + fmov\\t%h0, %w1 + dup\\t%w0.4h, %w1 ++ fmov\\t%s0, %w1 + umov\\t%w0, %1.h0 ++ fmov\\t%w0, %s1 + mov\\t%0.h0, %1.h0 ++ fmov\\t%s0, %s1 + fmov\\t%h0, %1 + * return aarch64_output_scalar_simd_mov_immediate (operands1, HImode); + ldr\\t%h0, %1 +@@ -1475,9 +1479,10 @@ + ldrh\\t%w0, %1 + strh\\t%w1, %0 + mov\\t%w0, %w1" +- (set_attr "type" "neon_move,f_mcr,neon_move,neon_to_gp, neon_move,fconsts, \ +- neon_move,f_loads,f_stores,load_4,store_4,mov_reg") +- (set_attr "arch" "simd,fp16,simd,simd,simd,fp16,simd,*,*,*,*,*") ++ (set_attr "type" "neon_move,f_mcr,neon_move,f_mcr,neon_to_gp,f_mrc, ++ neon_move,fmov,fconsts,neon_move,f_loads,f_stores, ++ load_4,store_4,mov_reg") ++ (set_attr "arch" "simd,fp16,simd,*,simd,*,simd,*,fp16,simd,*,*,*,*,*") + ) + + (define_insn "*movsf_aarch64" +@@ -1530,10 +1535,11 @@ + + (define_split + (set (match_operand:GPF_HF 0 "nonimmediate_operand") +- (match_operand:GPF_HF 1 "general_operand")) ++ (match_operand:GPF_HF 1 "const_double_operand")) + "can_create_pseudo_p () + && !aarch64_can_const_movi_rtx_p (operands1, <MODE>mode) + && !aarch64_float_const_representable_p (operands1) ++ && !aarch64_float_const_zero_rtx_p (operands1) + && aarch64_float_const_rtx_p (operands1)" + (const_int 0) + { +diff --git a/gcc/testsuite/gcc.target/aarch64/movdf_1.c b/gcc/testsuite/gcc.target/aarch64/movdf_1.c +new file mode 100644 +index 000000000..a51ded1d6 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/movdf_1.c +@@ -0,0 +1,53 @@ ++/* { dg-do assemble } */ ++/* { dg-options "-O --save-temps" } */ ++/* { dg-final { check-function-bodies "**" "" "" } } */ ++ ++#pragma GCC target "+nothing+nosimd+fp" ++ ++/* ++** fpr_to_fpr: ++** fmov d0, d1 ++** ret ++*/ ++double ++fpr_to_fpr (double q0, double q1) ++{ ++ return q1; ++} ++ ++/* ++** gpr_to_fpr: ++** fmov d0, x0 ++** ret ++*/ ++double ++gpr_to_fpr () ++{ ++ register double x0 asm ("x0"); ++ asm volatile ("" : "=r" (x0)); ++ return x0; ++} ++ ++/* ++** zero_to_fpr: ++** fmov d0, xzr ++** ret ++*/ ++double ++zero_to_fpr () ++{ ++ return 0; ++} ++ ++/* ++** fpr_to_gpr: ++** fmov x0, d0 ++** ret ++*/ ++void ++fpr_to_gpr (double q0) ++{ ++ register double x0 asm ("x0"); ++ x0 = q0; ++ asm volatile ("" :: "r" (x0)); ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/movdi_2.c b/gcc/testsuite/gcc.target/aarch64/movdi_2.c +new file mode 100644 +index 000000000..dd3fc3e8a +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/movdi_2.c +@@ -0,0 +1,61 @@ ++/* { dg-do assemble } */ ++/* { dg-options "-O --save-temps" } */ ++/* { dg-final { check-function-bodies "**" "" "" } } */ ++ ++#pragma GCC target "+nothing+nosimd+fp" ++ ++#include <stdint.h> ++ ++/* ++** fpr_to_fpr: ++** fmov d0, d1 ++** ret ++*/ ++void ++fpr_to_fpr (void) ++{ ++ register uint64_t q0 asm ("q0"); ++ register uint64_t q1 asm ("q1"); ++ asm volatile ("" : "=w" (q1)); ++ q0 = q1; ++ asm volatile ("" :: "w" (q0)); ++} ++ ++/* ++** gpr_to_fpr: ++** fmov d0, x0 ++** ret ++*/ ++void ++gpr_to_fpr (uint64_t x0) ++{ ++ register uint64_t q0 asm ("q0"); ++ q0 = x0; ++ asm volatile ("" :: "w" (q0)); ++} ++ ++/* ++** zero_to_fpr: ++** fmov d0, xzr ++** ret ++*/ ++void ++zero_to_fpr () ++{ ++ register uint64_t q0 asm ("q0"); ++ q0 = 0; ++ asm volatile ("" :: "w" (q0)); ++} ++ ++/* ++** fpr_to_gpr: ++** fmov x0, d0 ++** ret ++*/ ++uint64_t ++fpr_to_gpr () ++{ ++ register uint64_t q0 asm ("q0"); ++ asm volatile ("" : "=w" (q0)); ++ return q0; ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/movhf_1.c b/gcc/testsuite/gcc.target/aarch64/movhf_1.c +new file mode 100644 +index 000000000..cae25d4e5 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/movhf_1.c +@@ -0,0 +1,53 @@ ++/* { dg-do assemble } */ ++/* { dg-options "-O --save-temps" } */ ++/* { dg-final { check-function-bodies "**" "" "" } } */ ++ ++#pragma GCC target "+nothing+nosimd+fp" ++ ++/* ++** fpr_to_fpr: ++** fmov s0, s1 ++** ret ++*/ ++_Float16 ++fpr_to_fpr (_Float16 q0, _Float16 q1) ++{ ++ return q1; ++} ++ ++/* ++** gpr_to_fpr: ++** fmov s0, w0 ++** ret ++*/ ++_Float16 ++gpr_to_fpr () ++{ ++ register _Float16 w0 asm ("w0"); ++ asm volatile ("" : "=r" (w0)); ++ return w0; ++} ++ ++/* ++** zero_to_fpr: ++** fmov s0, wzr ++** ret ++*/ ++_Float16 ++zero_to_fpr () ++{ ++ return 0; ++} ++ ++/* ++** fpr_to_gpr: ++** fmov w0, s0 ++** ret ++*/ ++void ++fpr_to_gpr (_Float16 q0) ++{ ++ register _Float16 w0 asm ("w0"); ++ w0 = q0; ++ asm volatile ("" :: "r" (w0)); ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/movhi_1.c b/gcc/testsuite/gcc.target/aarch64/movhi_1.c +new file mode 100644 +index 000000000..8017abc5f +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/movhi_1.c +@@ -0,0 +1,61 @@ ++/* { dg-do assemble } */ ++/* { dg-options "-O --save-temps" } */ ++/* { dg-final { check-function-bodies "**" "" "" } } */ ++ ++#pragma GCC target "+nothing+nosimd+fp" ++ ++#include <stdint.h> ++ ++/* ++** fpr_to_fpr: ++** fmov s0, s1 ++** ret ++*/ ++void ++fpr_to_fpr (void) ++{ ++ register uint16_t q0 asm ("q0"); ++ register uint16_t q1 asm ("q1"); ++ asm volatile ("" : "=w" (q1)); ++ q0 = q1; ++ asm volatile ("" :: "w" (q0)); ++} ++ ++/* ++** gpr_to_fpr: ++** fmov s0, w0 ++** ret ++*/ ++void ++gpr_to_fpr (uint16_t w0) ++{ ++ register uint16_t q0 asm ("q0"); ++ q0 = w0; ++ asm volatile ("" :: "w" (q0)); ++} ++ ++/* ++** zero_to_fpr: ++** fmov s0, wzr ++** ret ++*/ ++void ++zero_to_fpr () ++{ ++ register uint16_t q0 asm ("q0"); ++ q0 = 0; ++ asm volatile ("" :: "w" (q0)); ++} ++ ++/* ++** fpr_to_gpr: ++** fmov w0, s0 ++** ret ++*/ ++uint16_t ++fpr_to_gpr () ++{ ++ register uint16_t q0 asm ("q0"); ++ asm volatile ("" : "=w" (q0)); ++ return q0; ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/movqi_1.c b/gcc/testsuite/gcc.target/aarch64/movqi_1.c +new file mode 100644 +index 000000000..401a79630 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/movqi_1.c +@@ -0,0 +1,61 @@ ++/* { dg-do assemble } */ ++/* { dg-options "-O --save-temps" } */ ++/* { dg-final { check-function-bodies "**" "" "" } } */ ++ ++#pragma GCC target "+nothing+nosimd+fp" ++ ++#include <stdint.h> ++ ++/* ++** fpr_to_fpr: ++** fmov s0, s1 ++** ret ++*/ ++void ++fpr_to_fpr (void) ++{ ++ register uint8_t q0 asm ("q0"); ++ register uint8_t q1 asm ("q1"); ++ asm volatile ("" : "=w" (q1)); ++ q0 = q1; ++ asm volatile ("" :: "w" (q0)); ++} ++ ++/* ++** gpr_to_fpr: ++** fmov s0, w0 ++** ret ++*/ ++void ++gpr_to_fpr (uint8_t w0) ++{ ++ register uint8_t q0 asm ("q0"); ++ q0 = w0; ++ asm volatile ("" :: "w" (q0)); ++} ++ ++/* ++** zero_to_fpr: ++** fmov s0, wzr ++** ret ++*/ ++void ++zero_to_fpr () ++{ ++ register uint8_t q0 asm ("q0"); ++ q0 = 0; ++ asm volatile ("" :: "w" (q0)); ++} ++ ++/* ++** fpr_to_gpr: ++** fmov w0, s0 ++** ret ++*/ ++uint8_t ++fpr_to_gpr () ++{ ++ register uint8_t q0 asm ("q0"); ++ asm volatile ("" : "=w" (q0)); ++ return q0; ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/movsf_1.c b/gcc/testsuite/gcc.target/aarch64/movsf_1.c +new file mode 100644 +index 000000000..09715aa4f +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/movsf_1.c +@@ -0,0 +1,53 @@ ++/* { dg-do assemble } */ ++/* { dg-options "-O --save-temps" } */ ++/* { dg-final { check-function-bodies "**" "" "" } } */ ++ ++#pragma GCC target "+nothing+nosimd+fp" ++ ++/* ++** fpr_to_fpr: ++** fmov s0, s1 ++** ret ++*/ ++float ++fpr_to_fpr (float q0, float q1) ++{ ++ return q1; ++} ++ ++/* ++** gpr_to_fpr: ++** fmov s0, w0 ++** ret ++*/ ++float ++gpr_to_fpr () ++{ ++ register float w0 asm ("w0"); ++ asm volatile ("" : "=r" (w0)); ++ return w0; ++} ++ ++/* ++** zero_to_fpr: ++** fmov s0, wzr ++** ret ++*/ ++float ++zero_to_fpr () ++{ ++ return 0; ++} ++ ++/* ++** fpr_to_gpr: ++** fmov w0, s0 ++** ret ++*/ ++void ++fpr_to_gpr (float q0) ++{ ++ register float w0 asm ("w0"); ++ w0 = q0; ++ asm volatile ("" :: "r" (w0)); ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/movsi_1.c b/gcc/testsuite/gcc.target/aarch64/movsi_1.c +new file mode 100644 +index 000000000..5314139aa +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/movsi_1.c +@@ -0,0 +1,61 @@ ++/* { dg-do assemble } */ ++/* { dg-options "-O --save-temps" } */ ++/* { dg-final { check-function-bodies "**" "" "" } } */ ++ ++#pragma GCC target "+nothing+nosimd+fp" ++ ++#include <stdint.h> ++ ++/* ++** fpr_to_fpr: ++** fmov s0, s1 ++** ret ++*/ ++void ++fpr_to_fpr (void) ++{ ++ register uint32_t q0 asm ("q0"); ++ register uint32_t q1 asm ("q1"); ++ asm volatile ("" : "=w" (q1)); ++ q0 = q1; ++ asm volatile ("" :: "w" (q0)); ++} ++ ++/* ++** gpr_to_fpr: ++** fmov s0, w0 ++** ret ++*/ ++void ++gpr_to_fpr (uint32_t w0) ++{ ++ register uint32_t q0 asm ("q0"); ++ q0 = w0; ++ asm volatile ("" :: "w" (q0)); ++} ++ ++/* ++** zero_to_fpr: ++** fmov s0, wzr ++** ret ++*/ ++void ++zero_to_fpr () ++{ ++ register uint32_t q0 asm ("q0"); ++ q0 = 0; ++ asm volatile ("" :: "w" (q0)); ++} ++ ++/* ++** fpr_to_gpr: ++** fmov w0, s0 ++** ret ++*/ ++uint32_t ++fpr_to_gpr () ++{ ++ register uint32_t q0 asm ("q0"); ++ asm volatile ("" : "=w" (q0)); ++ return q0; ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/movtf_2.c b/gcc/testsuite/gcc.target/aarch64/movtf_2.c +new file mode 100644 +index 000000000..38b16358d +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/movtf_2.c +@@ -0,0 +1,81 @@ ++/* { dg-do assemble } */ ++/* { dg-require-effective-target large_long_double } */ ++/* { dg-options "-O -mtune=neoverse-v1 --save-temps" } */ ++/* { dg-final { check-function-bodies "**" "" "" } } */ ++ ++#pragma GCC target "+nothing+nosimd+fp" ++ ++/* ++** fpr_to_fpr: ++** sub sp, sp, #16 ++** str q1, \sp\ ++** ldr q0, \sp\ ++** add sp, sp, #?16 ++** ret ++*/ ++long double ++fpr_to_fpr (long double q0, long double q1) ++{ ++ return q1; ++} ++ ++/* ++** gpr_to_fpr: { target aarch64_little_endian } ++** fmov d0, x0 ++** fmov v0.d\1\, x1 ++** ret ++*/ ++/* ++** gpr_to_fpr: { target aarch64_big_endian } ++** fmov d0, x1 ++** fmov v0.d\1\, x0 ++** ret ++*/ ++long double ++gpr_to_fpr () ++{ ++ register long double x0 asm ("x0"); ++ asm volatile ("" : "=r" (x0)); ++ return x0; ++} ++ ++/* ++** zero_to_fpr: ++** fmov s0, wzr ++** ret ++*/ ++long double ++zero_to_fpr () ++{ ++ return 0; ++} ++ ++/* ++** fpr_to_gpr: { target aarch64_little_endian } ++** ( ++** fmov x0, d0 ++** fmov x1, v0.d\1\ ++** | ++** fmov x1, v0.d\1\ ++** fmov x0, d0 ++** ) ++** ret ++*/ ++/* ++** fpr_to_gpr: { target aarch64_big_endian } ++** ( ++** fmov x1, d0 ++** fmov x0, v0.d\1\ ++** | ++** fmov x0, v0.d\1\ ++** fmov x1, d0 ++** ) ++** ret ++*/ ++void ++fpr_to_gpr (long double q0) ++{ ++ register long double x0 asm ("x0"); ++ x0 = q0; ++ asm volatile ("" :: "r" (x0)); ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/movti_2.c b/gcc/testsuite/gcc.target/aarch64/movti_2.c +new file mode 100644 +index 000000000..c393b1220 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/movti_2.c +@@ -0,0 +1,86 @@ ++/* { dg-do assemble } */ ++/* { dg-options "-O -mtune=neoverse-v1 --save-temps" } */ ++/* { dg-final { check-function-bodies "**" "" "" } } */ ++ ++#pragma GCC target "+nothing+nosimd+fp" ++ ++/* ++** fpr_to_fpr: ++** sub sp, sp, #16 ++** str q1, \sp\ ++** ldr q0, \sp\ ++** add sp, sp, #?16 ++** ret ++*/ ++void ++fpr_to_fpr (void) ++{ ++ register __int128_t q0 asm ("q0"); ++ register __int128_t q1 asm ("q1"); ++ asm volatile ("" : "=w" (q1)); ++ q0 = q1; ++ asm volatile ("" :: "w" (q0)); ++} ++ ++/* ++** gpr_to_fpr: { target aarch64_little_endian } ++** fmov d0, x0 ++** fmov v0.d\1\, x1 ++** ret ++*/ ++/* ++** gpr_to_fpr: { target aarch64_big_endian } ++** fmov d0, x1 ++** fmov v0.d\1\, x0 ++** ret ++*/ ++void ++gpr_to_fpr (__int128_t x0) ++{ ++ register __int128_t q0 asm ("q0"); ++ q0 = x0; ++ asm volatile ("" :: "w" (q0)); ++} ++ ++/* ++** zero_to_fpr: ++** fmov d0, xzr ++** ret ++*/ ++void ++zero_to_fpr () ++{ ++ register __int128_t q0 asm ("q0"); ++ q0 = 0; ++ asm volatile ("" :: "w" (q0)); ++} ++ ++/* ++** fpr_to_gpr: { target aarch64_little_endian } ++** ( ++** fmov x0, d0 ++** fmov x1, v0.d\1\ ++** | ++** fmov x1, v0.d\1\ ++** fmov x0, d0 ++** ) ++** ret ++*/ ++/* ++** fpr_to_gpr: { target aarch64_big_endian } ++** ( ++** fmov x1, d0 ++** fmov x0, v0.d\1\ ++** | ++** fmov x0, v0.d\1\ ++** fmov x1, d0 ++** ) ++** ret ++*/ ++__int128_t ++fpr_to_gpr () ++{ ++ register __int128_t q0 asm ("q0"); ++ asm volatile ("" : "=w" (q0)); ++ return q0; ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/movv16qi_1.c b/gcc/testsuite/gcc.target/aarch64/movv16qi_1.c +new file mode 100644 +index 000000000..8a6afb13b +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/movv16qi_1.c +@@ -0,0 +1,82 @@ ++/* { dg-do assemble } */ ++/* { dg-options "-O -mtune=neoverse-v1 --save-temps" } */ ++/* { dg-final { check-function-bodies "**" "" "" } } */ ++ ++#pragma GCC target "+nothing+nosimd+fp" ++ ++typedef unsigned char v16qi __attribute__((vector_size(16))); ++ ++/* ++** fpr_to_fpr: ++** sub sp, sp, #16 ++** str q1, \sp\ ++** ldr q0, \sp\ ++** add sp, sp, #?16 ++** ret ++*/ ++v16qi ++fpr_to_fpr (v16qi q0, v16qi q1) ++{ ++ return q1; ++} ++ ++/* ++** gpr_to_fpr: { target aarch64_little_endian } ++** fmov d0, x0 ++** fmov v0.d\1\, x1 ++** ret ++*/ ++/* ++** gpr_to_fpr: { target aarch64_big_endian } ++** fmov d0, x1 ++** fmov v0.d\1\, x0 ++** ret ++*/ ++v16qi ++gpr_to_fpr () ++{ ++ register v16qi x0 asm ("x0"); ++ asm volatile ("" : "=r" (x0)); ++ return x0; ++} ++ ++/* ++** zero_to_fpr: ++** fmov d0, xzr ++** ret ++*/ ++v16qi ++zero_to_fpr () ++{ ++ return (v16qi) {}; ++} ++ ++/* ++** fpr_to_gpr: { target aarch64_little_endian } ++** ( ++** fmov x0, d0 ++** fmov x1, v0.d\1\ ++** | ++** fmov x1, v0.d\1\ ++** fmov x0, d0 ++** ) ++** ret ++*/ ++/* ++** fpr_to_gpr: { target aarch64_big_endian } ++** ( ++** fmov x1, d0 ++** fmov x0, v0.d\1\ ++** | ++** fmov x0, v0.d\1\ ++** fmov x1, d0 ++** ) ++** ret ++*/ ++void ++fpr_to_gpr (v16qi q0) ++{ ++ register v16qi x0 asm ("x0"); ++ x0 = q0; ++ asm volatile ("" :: "r" (x0)); ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/movv8qi_1.c b/gcc/testsuite/gcc.target/aarch64/movv8qi_1.c +new file mode 100644 +index 000000000..4c97e6fbc +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/movv8qi_1.c +@@ -0,0 +1,55 @@ ++/* { dg-do assemble } */ ++/* { dg-options "-O -mtune=neoverse-v1 --save-temps" } */ ++/* { dg-final { check-function-bodies "**" "" "" } } */ ++ ++#pragma GCC target "+nothing+nosimd+fp" ++ ++typedef unsigned char v8qi __attribute__((vector_size(8))); ++ ++/* ++** fpr_to_fpr: ++** fmov d0, d1 ++** ret ++*/ ++v8qi ++fpr_to_fpr (v8qi q0, v8qi q1) ++{ ++ return q1; ++} ++ ++/* ++** gpr_to_fpr: ++** fmov d0, x0 ++** ret ++*/ ++v8qi ++gpr_to_fpr () ++{ ++ register v8qi x0 asm ("x0"); ++ asm volatile ("" : "=r" (x0)); ++ return x0; ++} ++ ++/* ++** zero_to_fpr: ++** fmov d0, xzr ++** ret ++*/ ++v8qi ++zero_to_fpr () ++{ ++ return (v8qi) {}; ++} ++ ++/* ++** fpr_to_gpr: ++** fmov x0, d0 ++** ret ++*/ ++void ++fpr_to_gpr (v8qi q0) ++{ ++ register v8qi x0 asm ("x0"); ++ x0 = q0; ++ asm volatile ("" :: "r" (x0)); ++} +-- +2.33.0 +
View file
_service:tar_scm:0128-LoongArch-Don-t-split-the-instructions-containing-re.patch
Added
@@ -0,0 +1,514 @@ +From 45aace43891ccaef756f2f1356edbb0da676629b Mon Sep 17 00:00:00 2001 +From: Xi Ruoyao <xry111@xry111.site> +Date: Mon, 29 Jan 2024 15:20:07 +0800 +Subject: PATCH 128/188 LoongArch: Don't split the instructions containing + relocs for extreme code model. + +The ABI mandates the pcalau12i/addi.d/lu32i.d/lu52i.d instructions for +addressing a symbol to be adjacent. So model them as "one large +instruction", i.e. define_insn, with two output registers. The real +address is the sum of these two registers. + +The advantage of this approach is the RTL passes can still use ldx/stx +instructions to skip an addi.d instruction. + +gcc/ChangeLog: + + * config/loongarch/loongarch.md (unspec): Add + UNSPEC_LA_PCREL_64_PART1 and UNSPEC_LA_PCREL_64_PART2. + (la_pcrel64_two_parts): New define_insn. + * config/loongarch/loongarch.cc (loongarch_tls_symbol): Fix a + typo in the comment. + (loongarch_call_tls_get_addr): If -mcmodel=extreme + -mexplicit-relocs={always,auto}, use la_pcrel64_two_parts for + addressing the TLS symbol and __tls_get_addr. Emit an REG_EQUAL + note to allow CSE addressing __tls_get_addr. + (loongarch_legitimize_tls_address): If -mcmodel=extreme + -mexplicit-relocs={always,auto}, address TLS IE symbols with + la_pcrel64_two_parts. + (loongarch_split_symbol): If -mcmodel=extreme + -mexplicit-relocs={always,auto}, address symbols with + la_pcrel64_two_parts. + (loongarch_output_mi_thunk): Clean up unreachable code. If + -mcmodel=extreme -mexplicit-relocs={always,auto}, address the MI + thunks with la_pcrel64_two_parts. + +gcc/testsuite/ChangeLog: + + * gcc.target/loongarch/func-call-extreme-1.c (dg-options): + Use -O2 instead of -O0 to ensure the pcalau12i/addi/lu32i/lu52i + instruction sequences are not reordered by the compiler. + (NOIPA): Disallow interprocedural optimizations. + * gcc.target/loongarch/func-call-extreme-2.c: Remove the content + duplicated from func-call-extreme-1.c, include it instead. + (dg-options): Likewise. + * gcc.target/loongarch/func-call-extreme-3.c (dg-options): + Likewise. + * gcc.target/loongarch/func-call-extreme-4.c (dg-options): + Likewise. + * gcc.target/loongarch/cmodel-extreme-1.c: New test. + * gcc.target/loongarch/cmodel-extreme-2.c: New test. + * g++.target/loongarch/cmodel-extreme-mi-thunk-1.C: New test. + * g++.target/loongarch/cmodel-extreme-mi-thunk-2.C: New test. + * g++.target/loongarch/cmodel-extreme-mi-thunk-3.C: New test. +--- + gcc/config/loongarch/loongarch.cc | 131 ++++++++++-------- + gcc/config/loongarch/loongarch.md | 20 +++ + .../loongarch/cmodel-extreme-mi-thunk-1.C | 11 ++ + .../loongarch/cmodel-extreme-mi-thunk-2.C | 6 + + .../loongarch/cmodel-extreme-mi-thunk-3.C | 6 + + .../gcc.target/loongarch/cmodel-extreme-1.c | 18 +++ + .../gcc.target/loongarch/cmodel-extreme-2.c | 7 + + .../loongarch/func-call-extreme-1.c | 14 +- + .../loongarch/func-call-extreme-2.c | 29 +--- + .../loongarch/func-call-extreme-3.c | 2 +- + .../loongarch/func-call-extreme-4.c | 2 +- + 11 files changed, 154 insertions(+), 92 deletions(-) + create mode 100644 gcc/testsuite/g++.target/loongarch/cmodel-extreme-mi-thunk-1.C + create mode 100644 gcc/testsuite/g++.target/loongarch/cmodel-extreme-mi-thunk-2.C + create mode 100644 gcc/testsuite/g++.target/loongarch/cmodel-extreme-mi-thunk-3.C + create mode 100644 gcc/testsuite/gcc.target/loongarch/cmodel-extreme-1.c + create mode 100644 gcc/testsuite/gcc.target/loongarch/cmodel-extreme-2.c + +diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc +index 0050813df..b8f0291ab 100644 +--- a/gcc/config/loongarch/loongarch.cc ++++ b/gcc/config/loongarch/loongarch.cc +@@ -2733,7 +2733,7 @@ loongarch_add_offset (rtx temp, rtx reg, HOST_WIDE_INT offset) + return plus_constant (Pmode, reg, offset); + } + +-/* The __tls_get_attr symbol. */ ++/* The __tls_get_addr symbol. */ + static GTY (()) rtx loongarch_tls_symbol; + + /* Load an entry for a TLS access. */ +@@ -2773,20 +2773,22 @@ loongarch_call_tls_get_addr (rtx sym, enum loongarch_symbol_type type, rtx v0) + + if (loongarch_explicit_relocs_p (type)) + { +- /* Split tls symbol to high and low. */ +- rtx high = gen_rtx_HIGH (Pmode, copy_rtx (loc)); +- high = loongarch_force_temporary (tmp, high); +- + if (TARGET_CMODEL_EXTREME) + { +- rtx tmp1 = gen_reg_rtx (Pmode); +- emit_insn (gen_tls_low (Pmode, tmp1, gen_rtx_REG (Pmode, 0), loc)); +- emit_insn (gen_lui_h_lo20 (tmp1, tmp1, loc)); +- emit_insn (gen_lui_h_hi12 (tmp1, tmp1, loc)); +- emit_move_insn (a0, gen_rtx_PLUS (Pmode, high, tmp1)); ++ rtx part1 = gen_reg_rtx (Pmode); ++ rtx part2 = gen_reg_rtx (Pmode); ++ ++ emit_insn (gen_la_pcrel64_two_parts (part1, part2, loc)); ++ emit_move_insn (a0, gen_rtx_PLUS (Pmode, part1, part2)); + } + else +- emit_insn (gen_tls_low (Pmode, a0, high, loc)); ++ { ++ /* Split tls symbol to high and low. */ ++ rtx high = gen_rtx_HIGH (Pmode, copy_rtx (loc)); ++ ++ high = loongarch_force_temporary (tmp, high); ++ emit_insn (gen_tls_low (Pmode, a0, high, loc)); ++ } + } + else + emit_insn (loongarch_load_tls (a0, loc, type)); +@@ -2868,22 +2870,28 @@ loongarch_call_tls_get_addr (rtx sym, enum loongarch_symbol_type type, rtx v0) + { + if (loongarch_explicit_relocs_p (SYMBOL_GOT_DISP)) + { +- rtx tmp1 = gen_reg_rtx (Pmode); +- rtx high = gen_reg_rtx (Pmode); ++ gcc_assert (la_opt_explicit_relocs != EXPLICIT_RELOCS_NONE); + +- loongarch_emit_move (high, +- gen_rtx_HIGH (Pmode, +- loongarch_tls_symbol)); +- loongarch_emit_move (tmp1, +- gen_rtx_LO_SUM (Pmode, +- gen_rtx_REG (Pmode, 0), ++ rtx part1 = gen_reg_rtx (Pmode); ++ rtx part2 = gen_reg_rtx (Pmode); ++ ++ emit_insn (gen_la_pcrel64_two_parts (part1, part2, + loongarch_tls_symbol)); +- emit_insn (gen_lui_h_lo20 (tmp1, tmp1, loongarch_tls_symbol)); +- emit_insn (gen_lui_h_hi12 (tmp1, tmp1, loongarch_tls_symbol)); +- loongarch_emit_move (dest, +- gen_rtx_MEM (Pmode, +- gen_rtx_PLUS (Pmode, +- high, tmp1))); ++ loongarch_emit_move ( ++ dest, ++ gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, ++ part1, ++ part2))); ++ ++ /* Put an REG_EQUAL note here to allow CSE (storing ++ part1 + part2, i.e. the address of tls_get_addr into ++ a saved register and use it for multiple TLS ++ accesses). */ ++ rtx sum = gen_rtx_UNSPEC ( ++ Pmode, gen_rtvec (1, loongarch_tls_symbol), ++ UNSPEC_ADDRESS_FIRST ++ + loongarch_classify_symbol (loongarch_tls_symbol)); ++ set_unique_reg_note (get_last_insn (), REG_EQUAL, sum); + } + else + emit_insn (gen_movdi_symbolic_off64 (dest, loongarch_tls_symbol, +@@ -2946,24 +2954,30 @@ loongarch_legitimize_tls_address (rtx loc) + dest = gen_reg_rtx (Pmode); + if (loongarch_explicit_relocs_p (SYMBOL_TLS_IE)) + { +- tmp3 = gen_reg_rtx (Pmode); +- rtx high = gen_rtx_HIGH (Pmode, copy_rtx (tmp2)); +- high = loongarch_force_temporary (tmp3, high); +- + if (TARGET_CMODEL_EXTREME) + { +- rtx tmp3 = gen_reg_rtx (Pmode); +- emit_insn (gen_tls_low (Pmode, tmp3, +- gen_rtx_REG (Pmode, 0), tmp2)); +- emit_insn (gen_lui_h_lo20 (tmp3, tmp3, tmp2)); +- emit_insn (gen_lui_h_hi12 (tmp3, tmp3, tmp2)); ++ gcc_assert (la_opt_explicit_relocs ++ != EXPLICIT_RELOCS_NONE); ++ ++ rtx part1 = gen_reg_rtx (Pmode); ++ rtx part2 = gen_reg_rtx (Pmode); ++ ++ emit_insn (gen_la_pcrel64_two_parts (part1, part2, ++ tmp2)); + emit_move_insn (tmp1, + gen_rtx_MEM (Pmode, + gen_rtx_PLUS (Pmode, +- high, tmp3))); ++ part1, ++ part2))); + } + else +- emit_insn (gen_ld_from_got (Pmode, tmp1, high, tmp2)); ++ { ++ tmp3 = gen_reg_rtx (Pmode); ++ rtx high = gen_rtx_HIGH (Pmode, copy_rtx (tmp2)); ++ ++ high = loongarch_force_temporary (tmp3, high); ++ emit_insn (gen_ld_from_got (Pmode, tmp1, high, tmp2)); ++ } + } + else + emit_insn (loongarch_load_tls (tmp1, tmp2, SYMBOL_TLS_IE)); +@@ -3142,24 +3156,23 @@ loongarch_split_symbol (rtx temp, rtx addr, machine_mode mode, rtx *low_out) + || !loongarch_split_symbol_type (symbol_type)) + return false; + +- rtx high, temp1 = NULL; ++ rtx high; + + if (temp == NULL) + temp = gen_reg_rtx (Pmode); + +- /* Get the 12-31 bits of the address. */ +- high = gen_rtx_HIGH (Pmode, copy_rtx (addr)); +- high = loongarch_force_temporary (temp, high); +- + if (loongarch_symbol_extreme_p (symbol_type) && can_create_pseudo_p ()) + { + gcc_assert (la_opt_explicit_relocs != EXPLICIT_RELOCS_NONE); + +- temp1 = gen_reg_rtx (Pmode); +- emit_move_insn (temp1, gen_rtx_LO_SUM (Pmode, gen_rtx_REG (Pmode, 0), +- addr)); +- emit_insn (gen_lui_h_lo20 (temp1, temp1, addr)); +- emit_insn (gen_lui_h_hi12 (temp1, temp1, addr)); ++ high = gen_reg_rtx (Pmode); ++ emit_insn (gen_la_pcrel64_two_parts (high, temp, addr)); ++ } ++ else ++ { ++ /* Get the 12-31 bits of the address. */ ++ high = gen_rtx_HIGH (Pmode, copy_rtx (addr)); ++ high = loongarch_force_temporary (temp, high); + } + + if (low_out) +@@ -3168,7 +3181,7 @@ loongarch_split_symbol (rtx temp, rtx addr, machine_mode mode, rtx *low_out) + case SYMBOL_PCREL64: + if (can_create_pseudo_p ()) + { +- *low_out = gen_rtx_PLUS (Pmode, high, temp1); ++ *low_out = gen_rtx_PLUS (Pmode, high, temp); + break; + } + /* fall through */ +@@ -3180,7 +3193,8 @@ loongarch_split_symbol (rtx temp, rtx addr, machine_mode mode, rtx *low_out) + /* SYMBOL_GOT_DISP symbols are loaded from the GOT. */ + { + if (TARGET_CMODEL_EXTREME && can_create_pseudo_p ()) +- *low_out = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, high, temp1)); ++ *low_out = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, high, ++ temp)); + else + { + rtx low = gen_rtx_LO_SUM (Pmode, high, addr); +@@ -7493,21 +7507,24 @@ loongarch_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED, + allowed, otherwise load the address into a register first. */ + if (use_sibcall_p) + { +- if (TARGET_CMODEL_EXTREME) +- { +- emit_insn (gen_movdi_symbolic_off64 (temp1, fnaddr, temp2)); +- insn = emit_call_insn (gen_sibcall_internal (temp1, const0_rtx)); +- } +- else +- insn = emit_call_insn (gen_sibcall_internal (fnaddr, const0_rtx)); ++ /* If TARGET_CMODEL_EXTREME, we cannot do a direct jump at all ++ and const_call_insn_operand should have returned false. */ ++ gcc_assert (!TARGET_CMODEL_EXTREME); ++ ++ insn = emit_call_insn (gen_sibcall_internal (fnaddr, const0_rtx)); + SIBLING_CALL_P (insn) = 1; + } + else + { +- if (TARGET_CMODEL_EXTREME) ++ if (!TARGET_CMODEL_EXTREME) ++ loongarch_emit_move (temp1, fnaddr); ++ else if (la_opt_explicit_relocs == EXPLICIT_RELOCS_NONE) + emit_insn (gen_movdi_symbolic_off64 (temp1, fnaddr, temp2)); + else +- loongarch_emit_move (temp1, fnaddr); ++ { ++ emit_insn (gen_la_pcrel64_two_parts (temp1, temp2, fnaddr)); ++ emit_move_insn (temp1, gen_rtx_PLUS (Pmode, temp1, temp2)); ++ } + + emit_jump_insn (gen_indirect_jump (temp1)); + } +diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md +index add55e0af..9356194fe 100644 +--- a/gcc/config/loongarch/loongarch.md ++++ b/gcc/config/loongarch/loongarch.md +@@ -84,6 +84,8 @@ + UNSPEC_CALL_VALUE_MULTIPLE_INTERNAL_1 + + UNSPEC_LOAD_SYMBOL_OFFSET64 ++ UNSPEC_LA_PCREL_64_PART1 ++ UNSPEC_LA_PCREL_64_PART2 + ) + + (define_c_enum "unspecv" +@@ -2224,6 +2226,24 @@ + (set_attr "mode" "DI") + (set_attr "insn_count" "5")) + ++;; The 64-bit PC-relative part of address loading. ++;; Note that the psABI does not allow splitting it. ++(define_insn "la_pcrel64_two_parts" ++ (set (match_operand:DI 0 "register_operand" "=r") ++ (unspec:DI (match_operand:DI 2 "") (pc) UNSPEC_LA_PCREL_64_PART1)) ++ (set (match_operand:DI 1 "register_operand" "=r") ++ (unspec:DI (match_dup 2) (pc) UNSPEC_LA_PCREL_64_PART2)) ++ "TARGET_ABI_LP64 && la_opt_explicit_relocs != EXPLICIT_RELOCS_NONE" ++ { ++ return "pcalau12i\t%0,%r2\n\t" ++ "addi.d\t%1,$r0,%L2\n\t" ++ "lu32i.d\t%1,%R2\n\t" ++ "lu52i.d\t%1,%1,%H2"; ++ } ++ (set_attr "move_type" "move") ++ (set_attr "mode" "DI") ++ (set_attr "length" "16")) ++ + ;; 32-bit Integer moves + + (define_expand "movsi" +diff --git a/gcc/testsuite/g++.target/loongarch/cmodel-extreme-mi-thunk-1.C b/gcc/testsuite/g++.target/loongarch/cmodel-extreme-mi-thunk-1.C +new file mode 100644 +index 000000000..ff1f7c165 +--- /dev/null ++++ b/gcc/testsuite/g++.target/loongarch/cmodel-extreme-mi-thunk-1.C +@@ -0,0 +1,11 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -fno-inline -march=loongarch64 -mabi=lp64d -O2 -mcmodel=extreme -fno-plt -mexplicit-relocs=always -mdirect-extern-access" } */ ++ ++struct A { ++ virtual ~A(); ++}; ++ ++struct B : virtual A {}; ++void var() { B(); } ++ ++/* { dg-final { scan-assembler "pcalau12i\t\^\n\*%pc_hi20\\(\\.LTHUNK0\\)\n\taddi\\.d\t\^\n\*%pc_lo12\\(\\\.LTHUNK0\\)\n\tlu32i\\.d\t\^\n\*%pc64_lo20\\(\\.LTHUNK0\\)\n\tlu52i\\.d\t\^\n\*%pc64_hi12\\(\\.LTHUNK0\\)" } } */ +diff --git a/gcc/testsuite/g++.target/loongarch/cmodel-extreme-mi-thunk-2.C b/gcc/testsuite/g++.target/loongarch/cmodel-extreme-mi-thunk-2.C +new file mode 100644 +index 000000000..c9aa16b41 +--- /dev/null ++++ b/gcc/testsuite/g++.target/loongarch/cmodel-extreme-mi-thunk-2.C +@@ -0,0 +1,6 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -fno-inline -march=loongarch64 -mabi=lp64d -O2 -mcmodel=extreme -fno-plt -mexplicit-relocs=auto -mdirect-extern-access" } */ ++ ++#include "cmodel-extreme-mi-thunk-1.C" ++ ++/* { dg-final { scan-assembler "pcalau12i\t\^\n\*%pc_hi20\\(\\.LTHUNK0\\)\n\taddi\\.d\t\^\n\*%pc_lo12\\(\\\.LTHUNK0\\)\n\tlu32i\\.d\t\^\n\*%pc64_lo20\\(\\.LTHUNK0\\)\n\tlu52i\\.d\t\^\n\*%pc64_hi12\\(\\.LTHUNK0\\)" } } */ +diff --git a/gcc/testsuite/g++.target/loongarch/cmodel-extreme-mi-thunk-3.C b/gcc/testsuite/g++.target/loongarch/cmodel-extreme-mi-thunk-3.C +new file mode 100644 +index 000000000..afb86c8bd +--- /dev/null ++++ b/gcc/testsuite/g++.target/loongarch/cmodel-extreme-mi-thunk-3.C +@@ -0,0 +1,6 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -fno-inline -march=loongarch64 -mabi=lp64d -O2 -mcmodel=extreme -fno-plt -mexplicit-relocs=none -mdirect-extern-access" } */ ++ ++#include "cmodel-extreme-mi-thunk-1.C" ++ ++/* { dg-final { scan-assembler "la.local\t\^\n\*\\.LTHUNK0" } } */ +diff --git a/gcc/testsuite/gcc.target/loongarch/cmodel-extreme-1.c b/gcc/testsuite/gcc.target/loongarch/cmodel-extreme-1.c +new file mode 100644 +index 000000000..564ee4017 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/cmodel-extreme-1.c +@@ -0,0 +1,18 @@ ++/* { dg-do compile } */ ++/* { dg-options "-march=loongarch64 -mabi=lp64d -O2 -mcmodel=extreme -fno-plt -mexplicit-relocs=always -fdump-rtl-final" } */ ++ ++int a; ++extern int b; ++__thread int c __attribute__ ((tls_model ("local-exec"))); ++__thread int d __attribute__ ((tls_model ("initial-exec"))); ++__thread int e __attribute__ ((tls_model ("local-dynamic"))); ++__thread int f __attribute__ ((tls_model ("global-dynamic"))); ++ ++void ++test (void) ++{ ++ a = b + c + d + e + f; ++} ++ ++/* a, b, d, e, f, and __tls_get_addr. */ ++/* { dg-final { scan-rtl-dump-times "la_pcrel64_two_parts" 6 "final" } } */ +diff --git a/gcc/testsuite/gcc.target/loongarch/cmodel-extreme-2.c b/gcc/testsuite/gcc.target/loongarch/cmodel-extreme-2.c +new file mode 100644 +index 000000000..ce834805f +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/cmodel-extreme-2.c +@@ -0,0 +1,7 @@ ++/* { dg-do compile } */ ++/* { dg-options "-march=loongarch64 -mabi=lp64d -O2 -mcmodel=extreme -fno-plt -mexplicit-relocs=auto -fdump-rtl-final" } */ ++ ++#include "cmodel-extreme-1.c" ++ ++/* a, b, d, e, f, and __tls_get_addr. */ ++/* { dg-final { scan-rtl-dump-times "la_pcrel64_two_parts" 6 "final" } } */ +diff --git a/gcc/testsuite/gcc.target/loongarch/func-call-extreme-1.c b/gcc/testsuite/gcc.target/loongarch/func-call-extreme-1.c +index db1e0f853..fdb4cf1ff 100644 +--- a/gcc/testsuite/gcc.target/loongarch/func-call-extreme-1.c ++++ b/gcc/testsuite/gcc.target/loongarch/func-call-extreme-1.c +@@ -1,31 +1,33 @@ + /* { dg-do compile } */ +-/* { dg-options "-mabi=lp64d -O0 -fno-pic -fno-plt -mexplicit-relocs -mcmodel=extreme" } */ ++/* { dg-options "-mabi=lp64d -O2 -fno-pic -fno-plt -mexplicit-relocs -mcmodel=extreme" } */ + /* { dg-final { scan-assembler "test:.*pcalau12i.*%got_pc_hi20.*\n\taddi\.d.*%got_pc_lo12.*\n\tlu32i\.d.*%got64_pc_lo20.*\n\tlu52i\.d.*%got64_pc_hi12.*\n\tldx\.d" } } */ + /* { dg-final { scan-assembler "test1:.*pcalau12i.*%pc_hi20.*\n\taddi\.d.*%pc_lo12.*\n\tlu32i\.d.*%pc64_lo20.*\n\tlu52i\.d.*pc64_hi12.*\n\tadd\.d" } } */ + /* { dg-final { scan-assembler "test2:.*pcalau12i.*%pc_hi20.*\n\taddi\.d.*%pc_lo12.*\n\tlu32i\.d.*%pc64_lo20.*\n\tlu52i\.d.*pc64_hi12.*\n\tadd\.d" } } */ + ++#define NOIPA __attribute__ ((noipa)) ++ + extern void g (void); +-void ++NOIPA void + f (void) + {} + +-static void ++NOIPA static void + l (void) + {} + +-void ++NOIPA void + test (void) + { + g (); + } + +-void ++NOIPA void + test1 (void) + { + f (); + } + +-void ++NOIPA void + test2 (void) + { + l (); +diff --git a/gcc/testsuite/gcc.target/loongarch/func-call-extreme-2.c b/gcc/testsuite/gcc.target/loongarch/func-call-extreme-2.c +index 21bf81ae8..dfba3882b 100644 +--- a/gcc/testsuite/gcc.target/loongarch/func-call-extreme-2.c ++++ b/gcc/testsuite/gcc.target/loongarch/func-call-extreme-2.c +@@ -1,32 +1,7 @@ + /* { dg-do compile } */ +-/* { dg-options "-mabi=lp64d -O0 -fpic -fno-plt -mexplicit-relocs -mcmodel=extreme" } */ ++/* { dg-options "-mabi=lp64d -O2 -fpic -fno-plt -mexplicit-relocs -mcmodel=extreme" } */ + /* { dg-final { scan-assembler "test:.*pcalau12i.*%got_pc_hi20.*\n\taddi\.d.*%got_pc_lo12.*\n\tlu32i\.d.*%got64_pc_lo20.*\n\tlu52i\.d.*%got64_pc_hi12.*\n\tldx\.d" } } */ + /* { dg-final { scan-assembler "test1:.*pcalau12i.*%got_pc_hi20.*\n\taddi\.d.*%got_pc_lo12.*\n\tlu32i\.d.*%got64_pc_lo20.*\n\tlu52i\.d.*%got64_pc_hi12.*\n\tldx\.d" } } */ + /* { dg-final { scan-assembler "test2:.*pcalau12i.*%pc_hi20.*\n\taddi\.d.*%pc_lo12.*\n\tlu32i\.d.*%pc64_lo20.*\n\tlu52i\.d.*pc64_hi12.*\n\tadd\.d" } } */ + +-extern void g (void); +-void +-f (void) +-{} +- +-static void +-l (void) +-{} +- +-void +-test (void) +-{ +- g (); +-} +- +-void +-test1 (void) +-{ +- f (); +-} +- +-void +-test2 (void) +-{ +- l (); +-} ++#include "func-call-extreme-1.c" +diff --git a/gcc/testsuite/gcc.target/loongarch/func-call-extreme-3.c b/gcc/testsuite/gcc.target/loongarch/func-call-extreme-3.c +index a4da44b4a..1f5234f83 100644 +--- a/gcc/testsuite/gcc.target/loongarch/func-call-extreme-3.c ++++ b/gcc/testsuite/gcc.target/loongarch/func-call-extreme-3.c +@@ -1,5 +1,5 @@ + /* { dg-do compile } */ +-/* { dg-options "-mabi=lp64d -O0 -fno-pic -fno-plt -mexplicit-relocs=auto -mcmodel=extreme" } */ ++/* { dg-options "-mabi=lp64d -O2 -fno-pic -fno-plt -mexplicit-relocs=auto -mcmodel=extreme" } */ + /* { dg-final { scan-assembler "test:.*pcalau12i.*%got_pc_hi20.*\n\taddi\.d.*%got_pc_lo12.*\n\tlu32i\.d.*%got64_pc_lo20.*\n\tlu52i\.d.*%got64_pc_hi12.*\n\tldx\.d" } } */ + /* { dg-final { scan-assembler "test1:.*pcalau12i.*%pc_hi20.*\n\taddi\.d.*%pc_lo12.*\n\tlu32i\.d.*%pc64_lo20.*\n\tlu52i\.d.*pc64_hi12.*\n\tadd\.d" } } */ + /* { dg-final { scan-assembler "test2:.*pcalau12i.*%pc_hi20.*\n\taddi\.d.*%pc_lo12.*\n\tlu32i\.d.*%pc64_lo20.*\n\tlu52i\.d.*pc64_hi12.*\n\tadd\.d" } } */ +diff --git a/gcc/testsuite/gcc.target/loongarch/func-call-extreme-4.c b/gcc/testsuite/gcc.target/loongarch/func-call-extreme-4.c +index 16b00f4c5..c42285006 100644 +--- a/gcc/testsuite/gcc.target/loongarch/func-call-extreme-4.c ++++ b/gcc/testsuite/gcc.target/loongarch/func-call-extreme-4.c +@@ -1,5 +1,5 @@ + /* { dg-do compile } */ +-/* { dg-options "-mabi=lp64d -O0 -fpic -fno-plt -mexplicit-relocs=auto -mcmodel=extreme" } */ ++/* { dg-options "-mabi=lp64d -O2 -fpic -fno-plt -mexplicit-relocs=auto -mcmodel=extreme" } */ + /* { dg-final { scan-assembler "test:.*pcalau12i.*%got_pc_hi20.*\n\taddi\.d.*%got_pc_lo12.*\n\tlu32i\.d.*%got64_pc_lo20.*\n\tlu52i\.d.*%got64_pc_hi12.*\n\tldx\.d" } } */ + /* { dg-final { scan-assembler "test1:.*pcalau12i.*%got_pc_hi20.*\n\taddi\.d.*%got_pc_lo12.*\n\tlu32i\.d.*%got64_pc_lo20.*\n\tlu52i\.d.*%got64_pc_hi12.*\n\tldx\.d" } } */ + /* { dg-final { scan-assembler "test2:.*pcalau12i.*%pc_hi20.*\n\taddi\.d.*%pc_lo12.*\n\tlu32i\.d.*%pc64_lo20.*\n\tlu52i\.d.*pc64_hi12.*\n\tadd\.d" } } */ +-- +2.43.0 +
View file
_service:tar_scm:0129-Backport-SME-aarch64-Commonise-some-folding-code.patch
Added
@@ -0,0 +1,83 @@ +From 805a7aec3ddab49b92bf2d5c1a3e288860cc14bf Mon Sep 17 00:00:00 2001 +From: Richard Sandiford <richard.sandiford@arm.com> +Date: Thu, 20 Oct 2022 10:37:35 +0100 +Subject: PATCH 030/157 BackportSME aarch64: Commonise some folding code + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=df99e9e42094dee0833ac38f53e7fae09b4d133c + +Add an aarch64_sve::gimple_folder helper for folding calls +to integer constants. SME will make more use of this. + +gcc/ + * config/aarch64/aarch64-sve-builtins.h + (gimple_folder::fold_to_cstu): New member function. + * config/aarch64/aarch64-sve-builtins.cc + (gimple_folder::fold_to_cstu): Define. + * config/aarch64/aarch64-sve-builtins-base.cc + (svcnt_bhwd_impl::fold): Use it. +--- + gcc/config/aarch64/aarch64-sve-builtins-base.cc | 9 ++------- + gcc/config/aarch64/aarch64-sve-builtins.cc | 7 +++++++ + gcc/config/aarch64/aarch64-sve-builtins.h | 1 + + 3 files changed, 10 insertions(+), 7 deletions(-) + +diff --git a/gcc/config/aarch64/aarch64-sve-builtins-base.cc b/gcc/config/aarch64/aarch64-sve-builtins-base.cc +index c24c05487..56c9d75e7 100644 +--- a/gcc/config/aarch64/aarch64-sve-builtins-base.cc ++++ b/gcc/config/aarch64/aarch64-sve-builtins-base.cc +@@ -516,9 +516,7 @@ public: + gimple * + fold (gimple_folder &f) const OVERRIDE + { +- tree count = build_int_cstu (TREE_TYPE (f.lhs), +- GET_MODE_NUNITS (m_ref_mode)); +- return gimple_build_assign (f.lhs, count); ++ return f.fold_to_cstu (GET_MODE_NUNITS (m_ref_mode)); + } + + rtx +@@ -553,10 +551,7 @@ public: + unsigned int elements_per_vq = 128 / GET_MODE_UNIT_BITSIZE (m_ref_mode); + HOST_WIDE_INT value = aarch64_fold_sve_cnt_pat (pattern, elements_per_vq); + if (value >= 0) +- { +- tree count = build_int_cstu (TREE_TYPE (f.lhs), value); +- return gimple_build_assign (f.lhs, count); +- } ++ return f.fold_to_cstu (value); + + return NULL; + } +diff --git a/gcc/config/aarch64/aarch64-sve-builtins.cc b/gcc/config/aarch64/aarch64-sve-builtins.cc +index a70e3a6b4..e168c8334 100644 +--- a/gcc/config/aarch64/aarch64-sve-builtins.cc ++++ b/gcc/config/aarch64/aarch64-sve-builtins.cc +@@ -2615,6 +2615,13 @@ gimple_folder::redirect_call (const function_instance &instance) + return call; + } + ++/* Fold the call to constant VAL. */ ++gimple * ++gimple_folder::fold_to_cstu (poly_uint64 val) ++{ ++ return gimple_build_assign (lhs, build_int_cstu (TREE_TYPE (lhs), val)); ++} ++ + /* Fold the call to a PTRUE, taking the element size from type suffix 0. */ + gimple * + gimple_folder::fold_to_ptrue () +diff --git a/gcc/config/aarch64/aarch64-sve-builtins.h b/gcc/config/aarch64/aarch64-sve-builtins.h +index 63d1db776..0d130b871 100644 +--- a/gcc/config/aarch64/aarch64-sve-builtins.h ++++ b/gcc/config/aarch64/aarch64-sve-builtins.h +@@ -500,6 +500,7 @@ public: + tree load_store_cookie (tree); + + gimple *redirect_call (const function_instance &); ++ gimple *fold_to_cstu (poly_uint64); + gimple *fold_to_pfalse (); + gimple *fold_to_ptrue (); + gimple *fold_to_vl_pred (unsigned int); +-- +2.33.0 +
View file
_service:tar_scm:0129-LoongArch-Adjust-cost-of-vector_stmt-that-match-mult.patch
Added
@@ -0,0 +1,173 @@ +From 825847768a29ec9d50e01015167002998150cb27 Mon Sep 17 00:00:00 2001 +From: Li Wei <liwei@loongson.cn> +Date: Fri, 26 Jan 2024 16:41:11 +0800 +Subject: PATCH 129/188 LoongArch: Adjust cost of vector_stmt that match + multiply-add pattern. + +We found that when only 128-bit vectorization was enabled, 549.fotonik3d_r +failed to vectorize effectively. For this reason, we adjust the cost of +128-bit vector_stmt that match the multiply-add pattern to facilitate 128-bit +vectorization. +The experimental results show that after the modification, 549.fotonik3d_r +performance can be improved by 9.77% under the 128-bit vectorization option. + +gcc/ChangeLog: + + * config/loongarch/loongarch.cc (loongarch_multiply_add_p): New. + (loongarch_vector_costs::add_stmt_cost): Adjust. + +gcc/testsuite/ChangeLog: + + * gfortran.dg/vect/vect-10.f90: New test. +--- + gcc/config/loongarch/loongarch.cc | 48 +++++++++++++++ + gcc/testsuite/gfortran.dg/vect/vect-10.f90 | 71 ++++++++++++++++++++++ + 2 files changed, 119 insertions(+) + create mode 100644 gcc/testsuite/gfortran.dg/vect/vect-10.f90 + +diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc +index b8f0291ab..526ea0bcb 100644 +--- a/gcc/config/loongarch/loongarch.cc ++++ b/gcc/config/loongarch/loongarch.cc +@@ -4153,6 +4153,37 @@ loongarch_vector_costs::determine_suggested_unroll_factor (loop_vec_info loop_vi + return 1 << ceil_log2 (uf); + } + ++/* Check if assign stmt rhs op comes from a multiply-add operation. */ ++static bool ++loongarch_multiply_add_p (vec_info *vinfo, stmt_vec_info stmt_info) ++{ ++ gassign *assign = dyn_cast<gassign *> (stmt_info->stmt); ++ if (!assign) ++ return false; ++ tree_code code = gimple_assign_rhs_code (assign); ++ if (code != PLUS_EXPR && code != MINUS_EXPR) ++ return false; ++ ++ auto is_mul_result = &(int i) ++ { ++ tree rhs = gimple_op (assign, i); ++ if (TREE_CODE (rhs) != SSA_NAME) ++ return false; ++ ++ stmt_vec_info def_stmt_info = vinfo->lookup_def (rhs); ++ if (!def_stmt_info ++ || STMT_VINFO_DEF_TYPE (def_stmt_info) != vect_internal_def) ++ return false; ++ gassign *rhs_assign = dyn_cast<gassign *> (def_stmt_info->stmt); ++ if (!rhs_assign || gimple_assign_rhs_code (rhs_assign) != MULT_EXPR) ++ return false; ++ ++ return true; ++ }; ++ ++ return is_mul_result (1) || is_mul_result (2); ++} ++ + unsigned + loongarch_vector_costs::add_stmt_cost (int count, vect_cost_for_stmt kind, + stmt_vec_info stmt_info, slp_tree, +@@ -4165,6 +4196,23 @@ loongarch_vector_costs::add_stmt_cost (int count, vect_cost_for_stmt kind, + { + int stmt_cost = loongarch_builtin_vectorization_cost (kind, vectype, + misalign); ++ if (vectype && stmt_info) ++ { ++ gassign *assign = dyn_cast<gassign *> (STMT_VINFO_STMT (stmt_info)); ++ machine_mode mode = TYPE_MODE (vectype); ++ ++ /* We found through testing that this strategy (the stmt that ++ matches the multiply-add pattern) has positive returns only ++ when applied to the 128-bit vector stmt, so this restriction ++ is currently made. */ ++ if (kind == vector_stmt && GET_MODE_SIZE (mode) == 16 && assign) ++ { ++ if (!vect_is_reduction (stmt_info) ++ && loongarch_multiply_add_p (m_vinfo, stmt_info)) ++ stmt_cost = 0; ++ } ++ } ++ + retval = adjust_cost_for_freq (stmt_info, where, count * stmt_cost); + m_costswhere += retval; + +diff --git a/gcc/testsuite/gfortran.dg/vect/vect-10.f90 b/gcc/testsuite/gfortran.dg/vect/vect-10.f90 +new file mode 100644 +index 000000000..b85bc2702 +--- /dev/null ++++ b/gcc/testsuite/gfortran.dg/vect/vect-10.f90 +@@ -0,0 +1,71 @@ ++! { dg-do compile } ++! { dg-additional-options "-Ofast -mlsx -fvect-cost-model=dynamic" { target loongarch64*-*-* } } ++ ++MODULE material_mod ++ ++IMPLICIT NONE ++ ++integer, parameter :: dfp = selected_real_kind (13, 99) ++integer, parameter :: rfp = dfp ++ ++PUBLIC Mat_updateE, iepx, iepy, iepz ++ ++PRIVATE ++ ++integer, dimension (:, :, :), allocatable :: iepx, iepy, iepz ++real (kind = rfp), dimension (:), allocatable :: Dbdx, Dbdy, Dbdz ++integer :: imin, jmin, kmin ++integer, dimension (6) :: Exsize ++integer, dimension (6) :: Eysize ++integer, dimension (6) :: Ezsize ++integer, dimension (6) :: Hxsize ++integer, dimension (6) :: Hysize ++integer, dimension (6) :: Hzsize ++ ++CONTAINS ++ ++SUBROUTINE mat_updateE (nx, ny, nz, Hx, Hy, Hz, Ex, Ey, Ez) ++ ++integer, intent (in) :: nx, ny, nz ++ ++real (kind = rfp), intent (inout), & ++ dimension (Exsize (1) : Exsize (2), Exsize (3) : Exsize (4), Exsize (5) : Exsize (6)) :: Ex ++real (kind = rfp), intent (inout), & ++ dimension (Eysize (1) : Eysize (2), Eysize (3) : Eysize (4), Eysize (5) : Eysize (6)) :: Ey ++real (kind = rfp), intent (inout), & ++ dimension (Ezsize (1) : Ezsize (2), Ezsize (3) : Ezsize (4), Ezsize (5) : Ezsize (6)) :: Ez ++real (kind = rfp), intent (in), & ++ dimension (Hxsize (1) : Hxsize (2), Hxsize (3) : Hxsize (4), Hxsize (5) : Hxsize (6)) :: Hx ++real (kind = rfp), intent (in), & ++ dimension (Hysize (1) : Hysize (2), Hysize (3) : Hysize (4), Hysize (5) : Hysize (6)) :: Hy ++real (kind = rfp), intent (in), & ++ dimension (Hzsize (1) : Hzsize (2), Hzsize (3) : Hzsize (4), Hzsize (5) : Hzsize (6)) :: Hz ++ ++integer :: i, j, k, mp ++ ++do k = kmin, nz ++ do j = jmin, ny ++ do i = imin, nx ++ mp = iepx (i, j, k) ++ Ex (i, j, k) = Ex (i, j, k) + & ++ Dbdy (mp) * (Hz (i, j, k ) - Hz (i, j-1, k)) + & ++ Dbdz (mp) * (Hy (i, j, k-1) - Hy (i, j , k)) ++ ++ mp = iepy (i, j, k) ++ Ey (i, j, k) = Ey (i, j, k) + & ++ Dbdz (mp) * (Hx (i , j, k) - Hx (i, j, k-1)) + & ++ Dbdx (mp) * (Hz (i-1, j, k) - Hz (i, j, k )) ++ ++ mp = iepz (i, j, k) ++ Ez (i, j, k) = Ez (i, j, k) + & ++ Dbdx (mp) * (Hy (i, j , k) - Hy (i-1, j, k)) + & ++ Dbdy (mp) * (Hx (i, j-1, k) - Hx (i , j, k)) ++ end do ++ end do ++end do ++ ++END SUBROUTINE mat_updateE ++ ++END MODULE material_mod ++ ++! { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target loongarch64*-*-* } } } +-- +2.43.0 +
View file
_service:tar_scm:0130-Backport-SME-aarch64-Add-a-Z-operand-modifier-for-SV.patch
Added
@@ -0,0 +1,49 @@ +From 8dc1eee26c61bea8aab62080bd961825142685f9 Mon Sep 17 00:00:00 2001 +From: Richard Sandiford <richard.sandiford@arm.com> +Date: Tue, 7 Nov 2023 15:22:57 +0000 +Subject: PATCH 031/157 BackportSME aarch64: Add a %Z operand modifier + for SVE registers + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=f40eac535bd55192cf93daca16235efbcd91157a + +This patch adds a %Z operand modifier that prints registers as SVE z +registers. The SME patches need this, but so do Tamar's patches. +I'm separating this out to unblock those. + +We should probably document the wxbhsdqZ modifiers as +user-facing, but doing that for all of them is a separate patch. + +gcc/ + * config/aarch64/aarch64.cc (aarch64_print_operand): Add a %Z + modifier for SVE registers. +--- + gcc/config/aarch64/aarch64.cc | 6 ++++++ + 1 file changed, 6 insertions(+) + +diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc +index 3e83e48ec..fd1114b52 100644 +--- a/gcc/config/aarch64/aarch64.cc ++++ b/gcc/config/aarch64/aarch64.cc +@@ -11901,6 +11901,10 @@ sizetochar (int size) + 'N': Take the duplicated element in a vector constant + and print the negative of it in decimal. + 'b/h/s/d/q': Print a scalar FP/SIMD register name. ++ 'Z': Same for SVE registers. ('z' was already taken.) ++ Note that it is not necessary to use %Z for operands ++ that have SVE modes. The convention is to use %Z ++ only for non-SVE (or potentially non-SVE) modes. + 'S/T/U/V': Print a FP/SIMD register name for a register list. + The register printed is the FP/SIMD register name + of X + 0/1/2/3 for S/T/U/V. +@@ -12073,6 +12077,8 @@ aarch64_print_operand (FILE *f, rtx x, int code) + case 's': + case 'd': + case 'q': ++ case 'Z': ++ code = TOLOWER (code); + if (!REG_P (x) || !FP_REGNUM_P (REGNO (x))) + { + output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code); +-- +2.33.0 +
View file
_service:tar_scm:0130-LoongArch-Fix-incorrect-return-type-for-frecipe-frsq.patch
Added
@@ -0,0 +1,113 @@ +From 99a48268961f05e87f4f9d6f3f22903869f50af7 Mon Sep 17 00:00:00 2001 +From: Jiahao Xu <xujiahao@loongson.cn> +Date: Wed, 24 Jan 2024 17:19:32 +0800 +Subject: PATCH 130/188 LoongArch: Fix incorrect return type for + frecipe/frsqrte intrinsic functions + +gcc/ChangeLog: + + * config/loongarch/larchintrin.h + (__frecipe_s): Update function return type. + (__frecipe_d): Ditto. + (__frsqrte_s): Ditto. + (__frsqrte_d): Ditto. + +gcc/testsuite/ChangeLog: + + * gcc.target/loongarch/larch-frecipe-intrinsic.c: New test. +--- + gcc/config/loongarch/larchintrin.h | 16 +++++----- + .../loongarch/larch-frecipe-intrinsic.c | 30 +++++++++++++++++++ + 2 files changed, 38 insertions(+), 8 deletions(-) + create mode 100644 gcc/testsuite/gcc.target/loongarch/larch-frecipe-intrinsic.c + +diff --git a/gcc/config/loongarch/larchintrin.h b/gcc/config/loongarch/larchintrin.h +index 22035e767..6582dfe49 100644 +--- a/gcc/config/loongarch/larchintrin.h ++++ b/gcc/config/loongarch/larchintrin.h +@@ -336,38 +336,38 @@ __iocsrwr_d (unsigned long int _1, unsigned int _2) + #ifdef __loongarch_frecipe + /* Assembly instruction format: fd, fj. */ + /* Data types in instruction templates: SF, SF. */ +-extern __inline void ++extern __inline float + __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) + __frecipe_s (float _1) + { +- __builtin_loongarch_frecipe_s ((float) _1); ++ return (float) __builtin_loongarch_frecipe_s ((float) _1); + } + + /* Assembly instruction format: fd, fj. */ + /* Data types in instruction templates: DF, DF. */ +-extern __inline void ++extern __inline double + __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) + __frecipe_d (double _1) + { +- __builtin_loongarch_frecipe_d ((double) _1); ++ return (double) __builtin_loongarch_frecipe_d ((double) _1); + } + + /* Assembly instruction format: fd, fj. */ + /* Data types in instruction templates: SF, SF. */ +-extern __inline void ++extern __inline float + __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) + __frsqrte_s (float _1) + { +- __builtin_loongarch_frsqrte_s ((float) _1); ++ return (float) __builtin_loongarch_frsqrte_s ((float) _1); + } + + /* Assembly instruction format: fd, fj. */ + /* Data types in instruction templates: DF, DF. */ +-extern __inline void ++extern __inline double + __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) + __frsqrte_d (double _1) + { +- __builtin_loongarch_frsqrte_d ((double) _1); ++ return (double) __builtin_loongarch_frsqrte_d ((double) _1); + } + #endif + +diff --git a/gcc/testsuite/gcc.target/loongarch/larch-frecipe-intrinsic.c b/gcc/testsuite/gcc.target/loongarch/larch-frecipe-intrinsic.c +new file mode 100644 +index 000000000..6ce2bde0a +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/larch-frecipe-intrinsic.c +@@ -0,0 +1,30 @@ ++/* Test intrinsics for frecipe.{s/d} and frsqrte.{s/d} instructions */ ++/* { dg-do compile } */ ++/* { dg-options "-mfrecipe -O2" } */ ++/* { dg-final { scan-assembler-times "test_frecipe_s:.*frecipe\\.s.*test_frecipe_s" 1 } } */ ++/* { dg-final { scan-assembler-times "test_frecipe_d:.*frecipe\\.d.*test_frecipe_d" 1 } } */ ++/* { dg-final { scan-assembler-times "test_frsqrte_s:.*frsqrte\\.s.*test_frsqrte_s" 1 } } */ ++/* { dg-final { scan-assembler-times "test_frsqrte_d:.*frsqrte\\.d.*test_frsqrte_d" 1 } } */ ++ ++#include <larchintrin.h> ++ ++float ++test_frecipe_s (float _1) ++{ ++ return __frecipe_s (_1); ++} ++double ++test_frecipe_d (double _1) ++{ ++ return __frecipe_d (_1); ++} ++float ++test_frsqrte_s (float _1) ++{ ++ return __frsqrte_s (_1); ++} ++double ++test_frsqrte_d (double _1) ++{ ++ return __frsqrte_d (_1); ++} +-- +2.43.0 +
View file
_service:tar_scm:0131-Backport-SME-mode-switching-Remove-unused-bbnum-fiel.patch
Added
@@ -0,0 +1,104 @@ +From 8a43bd7885ce479cadb0643fbb0fc22d2b0ffced Mon Sep 17 00:00:00 2001 +From: Richard Sandiford <richard.sandiford@arm.com> +Date: Sun, 5 Nov 2023 18:28:46 +0000 +Subject: PATCH 032/157 BackportSME mode-switching: Remove unused bbnum + field + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=2d55ed2b8a754d7279cd002941f7cb481f0fd133 + +seginfo had an unused bbnum field, presumably dating from before +BB information was attached directly to insns. + +gcc/ + * mode-switching.cc: Remove unused forward references. + (seginfo): Remove bbnum. + (new_seginfo): Remove associated argument. + (optimize_mode_switching): Update calls accordingly. +--- + gcc/mode-switching.cc | 18 +++++------------- + 1 file changed, 5 insertions(+), 13 deletions(-) + +diff --git a/gcc/mode-switching.cc b/gcc/mode-switching.cc +index 6e3f1dc65..4cf8f03a0 100644 +--- a/gcc/mode-switching.cc ++++ b/gcc/mode-switching.cc +@@ -65,13 +65,11 @@ along with GCC; see the file COPYING3. If not see + MODE is the mode this insn must be executed in. + INSN_PTR is the insn to be executed (may be the note that marks the + beginning of a basic block). +- BBNUM is the flow graph basic block this insn occurs in. + NEXT is the next insn in the same basic block. */ + struct seginfo + { + int mode; + rtx_insn *insn_ptr; +- int bbnum; + struct seginfo *next; + HARD_REG_SET regs_live; + }; +@@ -84,11 +82,6 @@ struct bb_info + int mode_in; + }; + +-static struct seginfo * new_seginfo (int, rtx_insn *, int, HARD_REG_SET); +-static void add_seginfo (struct bb_info *, struct seginfo *); +-static void reg_dies (rtx, HARD_REG_SET *); +-static void reg_becomes_live (rtx, const_rtx, void *); +- + /* Clear ode I from entity J in bitmap B. */ + #define clear_mode_bit(b, j, i) \ + bitmap_clear_bit (b, (j * max_num_modes) + i) +@@ -148,13 +141,13 @@ commit_mode_sets (struct edge_list *edge_list, int e, struct bb_info *info) + } + + /* Allocate a new BBINFO structure, initialized with the MODE, INSN, +- and basic block BB parameters. ++ and REGS_LIVE parameters. + INSN may not be a NOTE_INSN_BASIC_BLOCK, unless it is an empty + basic block; that allows us later to insert instructions in a FIFO-like + manner. */ + + static struct seginfo * +-new_seginfo (int mode, rtx_insn *insn, int bb, HARD_REG_SET regs_live) ++new_seginfo (int mode, rtx_insn *insn, const HARD_REG_SET ®s_live) + { + struct seginfo *ptr; + +@@ -163,7 +156,6 @@ new_seginfo (int mode, rtx_insn *insn, int bb, HARD_REG_SET regs_live) + ptr = XNEW (struct seginfo); + ptr->mode = mode; + ptr->insn_ptr = insn; +- ptr->bbnum = bb; + ptr->next = NULL; + ptr->regs_live = regs_live; + return ptr; +@@ -604,7 +596,7 @@ optimize_mode_switching (void) + gcc_assert (NOTE_INSN_BASIC_BLOCK_P (ins_pos)); + if (ins_pos != BB_END (bb)) + ins_pos = NEXT_INSN (ins_pos); +- ptr = new_seginfo (no_mode, ins_pos, bb->index, live_now); ++ ptr = new_seginfo (no_mode, ins_pos, live_now); + add_seginfo (info + bb->index, ptr); + for (i = 0; i < no_mode; i++) + clear_mode_bit (transpbb->index, j, i); +@@ -622,7 +614,7 @@ optimize_mode_switching (void) + { + any_set_required = true; + last_mode = mode; +- ptr = new_seginfo (mode, insn, bb->index, live_now); ++ ptr = new_seginfo (mode, insn, live_now); + add_seginfo (info + bb->index, ptr); + for (i = 0; i < no_mode; i++) + clear_mode_bit (transpbb->index, j, i); +@@ -651,7 +643,7 @@ optimize_mode_switching (void) + mark the block as nontransparent. */ + if (!any_set_required) + { +- ptr = new_seginfo (no_mode, BB_END (bb), bb->index, live_now); ++ ptr = new_seginfo (no_mode, BB_END (bb), live_now); + add_seginfo (info + bb->index, ptr); + if (last_mode != no_mode) + for (i = 0; i < no_mode; i++) +-- +2.33.0 +
View file
_service:tar_scm:0131-LoongArch-Fix-an-ODR-violation.patch
Added
@@ -0,0 +1,60 @@ +From 89ebd7012ecf49c60bad8dd018e0aa573b58844b Mon Sep 17 00:00:00 2001 +From: Xi Ruoyao <xry111@xry111.site> +Date: Fri, 2 Feb 2024 05:37:38 +0800 +Subject: PATCH 131/188 LoongArch: Fix an ODR violation + +When bootstrapping GCC 14 with --with-build-config=bootstrap-lto, an ODR +violation is detected: + + ../../gcc/config/loongarch/loongarch-opts.cc:57: warning: + 'abi_minimal_isa' violates the C++ One Definition Rule -Wodr + 57 | abi_minimal_isaN_ABI_BASE_TYPESN_ABI_EXT_TYPES; + ../../gcc/config/loongarch/loongarch-def.cc:186: note: + 'abi_minimal_isa' was previously declared here + 186 | abi_minimal_isa = array<array<loongarch_isa, N_ABI_EXT_TYPES>, + ../../gcc/config/loongarch/loongarch-def.cc:186: note: + code may be misoptimized unless '-fno-strict-aliasing' is used + +Fix it by adding a proper declaration of abi_minimal_isa into +loongarch-def.h and remove the ODR-violating local declaration in +loongarch-opts.cc. + +gcc/ChangeLog: + + * config/loongarch/loongarch-def.h (abi_minimal_isa): Declare. + * config/loongarch/loongarch-opts.cc (abi_minimal_isa): Remove + the ODR-violating locale declaration. +--- + gcc/config/loongarch/loongarch-def.h | 3 +++ + gcc/config/loongarch/loongarch-opts.cc | 2 -- + 2 files changed, 3 insertions(+), 2 deletions(-) + +diff --git a/gcc/config/loongarch/loongarch-def.h b/gcc/config/loongarch/loongarch-def.h +index 28da3ae5f..fdcf43fc7 100644 +--- a/gcc/config/loongarch/loongarch-def.h ++++ b/gcc/config/loongarch/loongarch-def.h +@@ -203,5 +203,8 @@ extern loongarch_def_array<loongarch_align, N_TUNE_TYPES> + loongarch_cpu_align; + extern loongarch_def_array<loongarch_rtx_cost_data, N_TUNE_TYPES> + loongarch_cpu_rtx_cost_data; ++extern loongarch_def_array< ++ loongarch_def_array<loongarch_isa, N_ABI_EXT_TYPES>, ++ N_ABI_BASE_TYPES> abi_minimal_isa; + + #endif /* LOONGARCH_DEF_H */ +diff --git a/gcc/config/loongarch/loongarch-opts.cc b/gcc/config/loongarch/loongarch-opts.cc +index a2b069d83..2ea3972d1 100644 +--- a/gcc/config/loongarch/loongarch-opts.cc ++++ b/gcc/config/loongarch/loongarch-opts.cc +@@ -53,8 +53,6 @@ static const int tm_multilib_list = { TM_MULTILIB_LIST }; + static int enabled_abi_typesN_ABI_BASE_TYPESN_ABI_EXT_TYPES = { 0 }; + + #define isa_required(ABI) (abi_minimal_isa(ABI).base(ABI).ext) +-extern "C" const struct loongarch_isa +-abi_minimal_isaN_ABI_BASE_TYPESN_ABI_EXT_TYPES; + + static inline int + is_multilib_enabled (struct loongarch_abi abi) +-- +2.43.0 +
View file
_service:tar_scm:0132-Backport-SME-mode-switching-Tweak-the-macro-hook-doc.patch
Added
@@ -0,0 +1,311 @@ +From c980e40d2c27ac3ee33c9b6aea6d2b0d4080852e Mon Sep 17 00:00:00 2001 +From: Richard Sandiford <richard.sandiford@arm.com> +Date: Sat, 11 Nov 2023 17:28:54 +0000 +Subject: PATCH 033/157 BackportSME mode-switching: Tweak the macro/hook + documentation + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=8479a3759025961f80cf0cd6bb3f127e09d0510d + +I found the documentation for the mode-switching macros/hooks +a bit hard to follow at first. This patch tries to add the +information that I think would have made it easier to understand. + +Of course, documentation preferences are personal, and so I could +be changing something that others understood to something that +seems impenetrable. + +Some notes on specific changes: + +- "in an optimizing compilation" didn't seem accurate; the pass + is run even at -O0, and often needs to be for correctness. + +- "at run time" meant when the compiler was run, rather than when + the compiled code was run. + +- Removing the list of optional macros isn't a clarification, + but it means that upcoming patches don't create an absurdly + long list. + +- I don't really understand the purpose of TARGET_MODE_PRIORITY, + so I mostly left that alone. + +gcc/ + * target.def: Tweak documentation of mode-switching hooks. + * doc/tm.texi.in (OPTIMIZE_MODE_SWITCHING): Tweak documentation. + (NUM_MODES_FOR_MODE_SWITCHING): Likewise. + * doc/tm.texi: Regenerate. +--- + gcc/doc/tm.texi | 69 ++++++++++++++++++++++++++++------------------ + gcc/doc/tm.texi.in | 26 +++++++++-------- + gcc/target.def | 43 ++++++++++++++++++----------- + 3 files changed, 84 insertions(+), 54 deletions(-) + +diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi +index 851d31c18..553aa4cf2 100644 +--- a/gcc/doc/tm.texi ++++ b/gcc/doc/tm.texi +@@ -10234,7 +10234,7 @@ The following macros control mode switching optimizations: + + @defmac OPTIMIZE_MODE_SWITCHING (@var{entity}) + Define this macro if the port needs extra instructions inserted for mode +-switching in an optimizing compilation. ++switching. + + For an example, the SH4 can perform both single and double precision + floating point operations, but to perform a single precision operation, +@@ -10244,73 +10244,88 @@ purpose register as a scratch register, hence these FPSCR sets have to + be inserted before reload, i.e.@: you cannot put this into instruction emitting + or @code{TARGET_MACHINE_DEPENDENT_REORG}. + +-You can have multiple entities that are mode-switched, and select at run time +-which entities actually need it. @code{OPTIMIZE_MODE_SWITCHING} should +-return nonzero for any @var{entity} that needs mode-switching. ++You can have multiple entities that are mode-switched, some of which might ++only be needed conditionally. The entities are identified by their index ++into the @code{NUM_MODES_FOR_MODE_SWITCHING} initializer, with the length ++of the initializer determining the number of entities. ++ ++@code{OPTIMIZE_MODE_SWITCHING} should return nonzero for any @var{entity} ++that needs mode-switching. ++ + If you define this macro, you also have to define + @code{NUM_MODES_FOR_MODE_SWITCHING}, @code{TARGET_MODE_NEEDED}, + @code{TARGET_MODE_PRIORITY} and @code{TARGET_MODE_EMIT}. +-@code{TARGET_MODE_AFTER}, @code{TARGET_MODE_ENTRY}, and @code{TARGET_MODE_EXIT} +-are optional. ++The other macros in this section are optional. + @end defmac + + @defmac NUM_MODES_FOR_MODE_SWITCHING + If you define @code{OPTIMIZE_MODE_SWITCHING}, you have to define this as + initializer for an array of integers. Each initializer element + N refers to an entity that needs mode switching, and specifies the number +-of different modes that might need to be set for this entity. +-The position of the initializer in the initializer---starting counting at ++of different modes that are defined for that entity. ++The position of the element in the initializer---starting counting at + zero---determines the integer that is used to refer to the mode-switched + entity in question. +-In macros that take mode arguments / yield a mode result, modes are +-represented as numbers 0 @dots{} N @minus{} 1. N is used to specify that no mode +-switch is needed / supplied. ++Modes are represented as numbers 0 @dots{} N @minus{} 1. ++In mode arguments and return values, N either represents an unknown ++mode or ``no mode'', depending on context. + @end defmac + + @deftypefn {Target Hook} void TARGET_MODE_EMIT (int @var{entity}, int @var{mode}, int @var{prev_mode}, HARD_REG_SET @var{regs_live}) + Generate one or more insns to set @var{entity} to @var{mode}. + @var{hard_reg_live} is the set of hard registers live at the point where + the insn(s) are to be inserted. @var{prev_moxde} indicates the mode +-to switch from. Sets of a lower numbered entity will be emitted before ++to switch from, or is the number of modes if the previous mode is not ++known. Sets of a lower numbered entity will be emitted before + sets of a higher numbered entity to a mode of the same or lower priority. + @end deftypefn + + @deftypefn {Target Hook} int TARGET_MODE_NEEDED (int @var{entity}, rtx_insn *@var{insn}) + @var{entity} is an integer specifying a mode-switched entity. +-If @code{OPTIMIZE_MODE_SWITCHING} is defined, you must define this macro +-to return an integer value not larger than the corresponding element +-in @code{NUM_MODES_FOR_MODE_SWITCHING}, to denote the mode that @var{entity} +-must be switched into prior to the execution of @var{insn}. ++If @code{OPTIMIZE_MODE_SWITCHING} is defined, you must define this hook ++to return the mode that @var{entity} must be switched into prior to the ++execution of @var{insn}, or the number of modes if @var{insn} has no ++such requirement. + @end deftypefn + + @deftypefn {Target Hook} int TARGET_MODE_AFTER (int @var{entity}, int @var{mode}, rtx_insn *@var{insn}) + @var{entity} is an integer specifying a mode-switched entity. +-If this macro is defined, it is evaluated for every @var{insn} during mode +-switching. It determines the mode that an insn results +-in (if different from the incoming mode). ++If this hook is defined, it is evaluated for every @var{insn} during mode ++switching. It returns the mode that @var{entity} is in after @var{insn} ++has been executed. @var{mode} is the mode that @var{entity} was in ++before @var{insn} was executed, taking account of @var{TARGET_MODE_NEEDED}. ++ ++@var{mode} is equal to the number of modes defined for @var{entity} ++if the mode before @var{insn} is unknown. The hook should likewise return ++the number of modes if it does not know what mode @var{entity} has after ++@var{insn}. ++ ++Not defining the hook is equivalent to returning @var{mode}. + @end deftypefn + + @deftypefn {Target Hook} int TARGET_MODE_ENTRY (int @var{entity}) +-If this macro is defined, it is evaluated for every @var{entity} that +-needs mode switching. It should evaluate to an integer, which is a mode +-that @var{entity} is assumed to be switched to at function entry. ++If this hook is defined, it is evaluated for every @var{entity} that ++needs mode switching. It should return the mode that @var{entity} is ++guaranteed to be in on entry to the function, or the number of modes ++if there is no such guarantee. + If @code{TARGET_MODE_ENTRY} is defined then @code{TARGET_MODE_EXIT} + must be defined. + @end deftypefn + + @deftypefn {Target Hook} int TARGET_MODE_EXIT (int @var{entity}) +-If this macro is defined, it is evaluated for every @var{entity} that +-needs mode switching. It should evaluate to an integer, which is a mode +-that @var{entity} is assumed to be switched to at function exit. ++If this hook is defined, it is evaluated for every @var{entity} that ++needs mode switching. It should return the mode that @var{entity} must ++be in on return from the function, or the number of modes if there is no ++such requirement. + If @code{TARGET_MODE_EXIT} is defined then @code{TARGET_MODE_ENTRY} + must be defined. + @end deftypefn + + @deftypefn {Target Hook} int TARGET_MODE_PRIORITY (int @var{entity}, int @var{n}) +-This macro specifies the order in which modes for @var{entity} ++This hook specifies the order in which modes for @var{entity} + are processed. 0 is the highest priority, + @code{NUM_MODES_FOR_MODE_SWITCHING@var{entity} - 1} the lowest. +-The value of the macro should be an integer designating a mode ++The hook returns an integer designating a mode + for @var{entity}. For any fixed @var{entity}, @code{mode_priority} + (@var{entity}, @var{n}) shall be a bijection in 0 @dots{} + @code{num_modes_for_mode_switching@var{entity} - 1}. +diff --git a/gcc/doc/tm.texi.in b/gcc/doc/tm.texi.in +index ac95cdf7a..9ec11b15c 100644 +--- a/gcc/doc/tm.texi.in ++++ b/gcc/doc/tm.texi.in +@@ -6879,7 +6879,7 @@ The following macros control mode switching optimizations: + + @defmac OPTIMIZE_MODE_SWITCHING (@var{entity}) + Define this macro if the port needs extra instructions inserted for mode +-switching in an optimizing compilation. ++switching. + + For an example, the SH4 can perform both single and double precision + floating point operations, but to perform a single precision operation, +@@ -6889,27 +6889,31 @@ purpose register as a scratch register, hence these FPSCR sets have to + be inserted before reload, i.e.@: you cannot put this into instruction emitting + or @code{TARGET_MACHINE_DEPENDENT_REORG}. + +-You can have multiple entities that are mode-switched, and select at run time +-which entities actually need it. @code{OPTIMIZE_MODE_SWITCHING} should +-return nonzero for any @var{entity} that needs mode-switching. ++You can have multiple entities that are mode-switched, some of which might ++only be needed conditionally. The entities are identified by their index ++into the @code{NUM_MODES_FOR_MODE_SWITCHING} initializer, with the length ++of the initializer determining the number of entities. ++ ++@code{OPTIMIZE_MODE_SWITCHING} should return nonzero for any @var{entity} ++that needs mode-switching. ++ + If you define this macro, you also have to define + @code{NUM_MODES_FOR_MODE_SWITCHING}, @code{TARGET_MODE_NEEDED}, + @code{TARGET_MODE_PRIORITY} and @code{TARGET_MODE_EMIT}. +-@code{TARGET_MODE_AFTER}, @code{TARGET_MODE_ENTRY}, and @code{TARGET_MODE_EXIT} +-are optional. ++The other macros in this section are optional. + @end defmac + + @defmac NUM_MODES_FOR_MODE_SWITCHING + If you define @code{OPTIMIZE_MODE_SWITCHING}, you have to define this as + initializer for an array of integers. Each initializer element + N refers to an entity that needs mode switching, and specifies the number +-of different modes that might need to be set for this entity. +-The position of the initializer in the initializer---starting counting at ++of different modes that are defined for that entity. ++The position of the element in the initializer---starting counting at + zero---determines the integer that is used to refer to the mode-switched + entity in question. +-In macros that take mode arguments / yield a mode result, modes are +-represented as numbers 0 @dots{} N @minus{} 1. N is used to specify that no mode +-switch is needed / supplied. ++Modes are represented as numbers 0 @dots{} N @minus{} 1. ++In mode arguments and return values, N either represents an unknown ++mode or ``no mode'', depending on context. + @end defmac + + @hook TARGET_MODE_EMIT +diff --git a/gcc/target.def b/gcc/target.def +index c9bb2b4c2..b87b0f927 100644 +--- a/gcc/target.def ++++ b/gcc/target.def +@@ -6992,51 +6992,62 @@ DEFHOOK + "Generate one or more insns to set @var{entity} to @var{mode}.\n\ + @var{hard_reg_live} is the set of hard registers live at the point where\n\ + the insn(s) are to be inserted. @var{prev_moxde} indicates the mode\n\ +-to switch from. Sets of a lower numbered entity will be emitted before\n\ ++to switch from, or is the number of modes if the previous mode is not\n\ ++known. Sets of a lower numbered entity will be emitted before\n\ + sets of a higher numbered entity to a mode of the same or lower priority.", + void, (int entity, int mode, int prev_mode, HARD_REG_SET regs_live), NULL) + + DEFHOOK + (needed, + "@var{entity} is an integer specifying a mode-switched entity.\n\ +-If @code{OPTIMIZE_MODE_SWITCHING} is defined, you must define this macro\n\ +-to return an integer value not larger than the corresponding element\n\ +-in @code{NUM_MODES_FOR_MODE_SWITCHING}, to denote the mode that @var{entity}\n\ +-must be switched into prior to the execution of @var{insn}.", ++If @code{OPTIMIZE_MODE_SWITCHING} is defined, you must define this hook\n\ ++to return the mode that @var{entity} must be switched into prior to the\n\ ++execution of @var{insn}, or the number of modes if @var{insn} has no\n\ ++such requirement.", + int, (int entity, rtx_insn *insn), NULL) + + DEFHOOK + (after, + "@var{entity} is an integer specifying a mode-switched entity.\n\ +-If this macro is defined, it is evaluated for every @var{insn} during mode\n\ +-switching. It determines the mode that an insn results\n\ +-in (if different from the incoming mode).", ++If this hook is defined, it is evaluated for every @var{insn} during mode\n\ ++switching. It returns the mode that @var{entity} is in after @var{insn}\n\ ++has been executed. @var{mode} is the mode that @var{entity} was in\n\ ++before @var{insn} was executed, taking account of @var{TARGET_MODE_NEEDED}.\n\ ++\n\ ++@var{mode} is equal to the number of modes defined for @var{entity}\n\ ++if the mode before @var{insn} is unknown. The hook should likewise return\n\ ++the number of modes if it does not know what mode @var{entity} has after\n\ ++@var{insn}.\n\ ++\n\ ++Not defining the hook is equivalent to returning @var{mode}.", + int, (int entity, int mode, rtx_insn *insn), NULL) + + DEFHOOK + (entry, +- "If this macro is defined, it is evaluated for every @var{entity} that\n\ +-needs mode switching. It should evaluate to an integer, which is a mode\n\ +-that @var{entity} is assumed to be switched to at function entry.\n\ ++ "If this hook is defined, it is evaluated for every @var{entity} that\n\ ++needs mode switching. It should return the mode that @var{entity} is\n\ ++guaranteed to be in on entry to the function, or the number of modes\n\ ++if there is no such guarantee.\n\ + If @code{TARGET_MODE_ENTRY} is defined then @code{TARGET_MODE_EXIT}\n\ + must be defined.", + int, (int entity), NULL) + + DEFHOOK + (exit, +- "If this macro is defined, it is evaluated for every @var{entity} that\n\ +-needs mode switching. It should evaluate to an integer, which is a mode\n\ +-that @var{entity} is assumed to be switched to at function exit.\n\ ++ "If this hook is defined, it is evaluated for every @var{entity} that\n\ ++needs mode switching. It should return the mode that @var{entity} must\n\ ++be in on return from the function, or the number of modes if there is no\n\ ++such requirement.\n\ + If @code{TARGET_MODE_EXIT} is defined then @code{TARGET_MODE_ENTRY}\n\ + must be defined.", + int, (int entity), NULL) + + DEFHOOK + (priority, +- "This macro specifies the order in which modes for @var{entity}\n\ ++ "This hook specifies the order in which modes for @var{entity}\n\ + are processed. 0 is the highest priority,\n\ + @code{NUM_MODES_FOR_MODE_SWITCHING@var{entity} - 1} the lowest.\n\ +-The value of the macro should be an integer designating a mode\n\ ++The hook returns an integer designating a mode\n\ + for @var{entity}. For any fixed @var{entity}, @code{mode_priority}\n\ + (@var{entity}, @var{n}) shall be a bijection in 0 @dots{}\n\ + @code{num_modes_for_mode_switching@var{entity} - 1}.", +-- +2.33.0 +
View file
_service:tar_scm:0132-LoongArch-testsuite-Fix-gcc.dg-vect-vect-reduc-mul_-.patch
Added
@@ -0,0 +1,359 @@ +From f4a447bff86c7f5598a7461e353a3c6f4a101ed4 Mon Sep 17 00:00:00 2001 +From: Li Wei <liwei@loongson.cn> +Date: Fri, 2 Feb 2024 09:42:28 +0800 +Subject: PATCH 132/188 LoongArch: testsuite: Fix + gcc.dg/vect/vect-reduc-mul_{1, 2}.c FAIL. + +This FAIL was introduced from r14-6908. The reason is that when merging +constant vector permutation implementations, the 128-bit matching situation +was not fully considered. In fact, the expansion of 128-bit vectors after +merging only supports value-based 4 elements set shuffle, so this time is a +complete implementation of the entire 128-bit vector constant permutation, +and some structural adjustments have also been made to the code. + +gcc/ChangeLog: + + * config/loongarch/loongarch.cc (loongarch_expand_vselect): Adjust. + (loongarch_expand_vselect_vconcat): Ditto. + (loongarch_try_expand_lsx_vshuf_const): New, use vshuf to implement + all 128-bit constant permutation situations. + (loongarch_expand_lsx_shuffle): Adjust and rename function name. + (loongarch_is_imm_set_shuffle): Renamed function name. + (loongarch_expand_vec_perm_even_odd): Function forward declaration. + (loongarch_expand_vec_perm_even_odd_1): Add implement for 128-bit + extract-even and extract-odd permutations. + (loongarch_is_odd_extraction): Delete. + (loongarch_is_even_extraction): Ditto. + (loongarch_expand_vec_perm_const): Adjust. +--- + gcc/config/loongarch/loongarch.cc | 218 ++++++++++++++++++++++-------- + 1 file changed, 163 insertions(+), 55 deletions(-) + +diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc +index 526ea0bcb..a0e0906af 100644 +--- a/gcc/config/loongarch/loongarch.cc ++++ b/gcc/config/loongarch/loongarch.cc +@@ -8025,7 +8025,8 @@ struct expand_vec_perm_d + + static bool + loongarch_expand_vselect (rtx target, rtx op0, +- const unsigned char *perm, unsigned nelt) ++ const unsigned char *perm, unsigned nelt, ++ bool testing_p) + { + rtx rpermMAX_VECT_LEN, x; + rtx_insn *insn; +@@ -8044,6 +8045,9 @@ loongarch_expand_vselect (rtx target, rtx op0, + remove_insn (insn); + return false; + } ++ ++ if (testing_p) ++ remove_insn (insn); + return true; + } + +@@ -8051,7 +8055,8 @@ loongarch_expand_vselect (rtx target, rtx op0, + + static bool + loongarch_expand_vselect_vconcat (rtx target, rtx op0, rtx op1, +- const unsigned char *perm, unsigned nelt) ++ const unsigned char *perm, unsigned nelt, ++ bool testing_p) + { + machine_mode v2mode; + rtx x; +@@ -8059,7 +8064,7 @@ loongarch_expand_vselect_vconcat (rtx target, rtx op0, rtx op1, + if (!GET_MODE_2XWIDER_MODE (GET_MODE (op0)).exists (&v2mode)) + return false; + x = gen_rtx_VEC_CONCAT (v2mode, op0, op1); +- return loongarch_expand_vselect (target, x, perm, nelt); ++ return loongarch_expand_vselect (target, x, perm, nelt, testing_p); + } + + static tree +@@ -8315,11 +8320,87 @@ loongarch_set_handled_components (sbitmap components) + #define TARGET_ASM_ALIGNED_SI_OP "\t.word\t" + #undef TARGET_ASM_ALIGNED_DI_OP + #define TARGET_ASM_ALIGNED_DI_OP "\t.dword\t" ++ ++/* Use the vshuf instruction to implement all 128-bit constant vector ++ permuatation. */ ++ ++static bool ++loongarch_try_expand_lsx_vshuf_const (struct expand_vec_perm_d *d) ++{ ++ int i; ++ rtx target, op0, op1, sel, tmp; ++ rtx rpermMAX_VECT_LEN; ++ ++ if (GET_MODE_SIZE (d->vmode) == 16) ++ { ++ target = d->target; ++ op0 = d->op0; ++ op1 = d->one_vector_p ? d->op0 : d->op1; ++ ++ if (GET_MODE (op0) != GET_MODE (op1) ++ || GET_MODE (op0) != GET_MODE (target)) ++ return false; ++ ++ if (d->testing_p) ++ return true; ++ ++ for (i = 0; i < d->nelt; i += 1) ++ rpermi = GEN_INT (d->permi); ++ ++ if (d->vmode == E_V2DFmode) ++ { ++ sel = gen_rtx_CONST_VECTOR (E_V2DImode, gen_rtvec_v (d->nelt, rperm)); ++ tmp = simplify_gen_subreg (E_V2DImode, d->target, d->vmode, 0); ++ emit_move_insn (tmp, sel); ++ } ++ else if (d->vmode == E_V4SFmode) ++ { ++ sel = gen_rtx_CONST_VECTOR (E_V4SImode, gen_rtvec_v (d->nelt, rperm)); ++ tmp = simplify_gen_subreg (E_V4SImode, d->target, d->vmode, 0); ++ emit_move_insn (tmp, sel); ++ } ++ else ++ { ++ sel = gen_rtx_CONST_VECTOR (d->vmode, gen_rtvec_v (d->nelt, rperm)); ++ emit_move_insn (d->target, sel); ++ } ++ ++ switch (d->vmode) ++ { ++ case E_V2DFmode: ++ emit_insn (gen_lsx_vshuf_d_f (target, target, op1, op0)); ++ break; ++ case E_V2DImode: ++ emit_insn (gen_lsx_vshuf_d (target, target, op1, op0)); ++ break; ++ case E_V4SFmode: ++ emit_insn (gen_lsx_vshuf_w_f (target, target, op1, op0)); ++ break; ++ case E_V4SImode: ++ emit_insn (gen_lsx_vshuf_w (target, target, op1, op0)); ++ break; ++ case E_V8HImode: ++ emit_insn (gen_lsx_vshuf_h (target, target, op1, op0)); ++ break; ++ case E_V16QImode: ++ emit_insn (gen_lsx_vshuf_b (target, op1, op0, target)); ++ break; ++ default: ++ break; ++ } ++ ++ return true; ++ } ++ return false; ++} ++ + /* Construct (set target (vec_select op0 (parallel selector))) and +- return true if that's a valid instruction in the active ISA. */ ++ return true if that's a valid instruction in the active ISA. ++ In fact, it matches the special constant vector with repeated ++ 4-element sets. */ + + static bool +-loongarch_expand_lsx_shuffle (struct expand_vec_perm_d *d) ++loongarch_is_imm_set_shuffle (struct expand_vec_perm_d *d) + { + rtx x, eltsMAX_VECT_LEN; + rtvec v; +@@ -8338,6 +8419,9 @@ loongarch_expand_lsx_shuffle (struct expand_vec_perm_d *d) + if (!loongarch_const_vector_shuffle_set_p (x, d->vmode)) + return false; + ++ if (d->testing_p) ++ return true; ++ + x = gen_rtx_VEC_SELECT (d->vmode, d->op0, x); + x = gen_rtx_SET (d->target, x); + +@@ -8350,6 +8434,27 @@ loongarch_expand_lsx_shuffle (struct expand_vec_perm_d *d) + return true; + } + ++static bool ++loongarch_expand_vec_perm_even_odd (struct expand_vec_perm_d *); ++ ++/* Try to match and expand all kinds of 128-bit const vector permutation ++ cases. */ ++ ++static bool ++loongarch_expand_lsx_shuffle (struct expand_vec_perm_d *d) ++{ ++ if (!ISA_HAS_LSX && GET_MODE_SIZE (d->vmode) != 16) ++ return false; ++ ++ if (loongarch_is_imm_set_shuffle (d)) ++ return true; ++ ++ if (loongarch_expand_vec_perm_even_odd (d)) ++ return true; ++ ++ return loongarch_try_expand_lsx_vshuf_const (d); ++} ++ + /* Try to simplify a two vector permutation using 2 intra-lane interleave + insns and cross-lane shuffle for 32-byte vectors. */ + +@@ -8442,7 +8547,7 @@ loongarch_expand_vec_perm_interleave (struct expand_vec_perm_d *d) + return true; + } + +-/* Implement extract-even and extract-odd permutations. */ ++/* Implement 128-bit and 256-bit extract-even and extract-odd permutations. */ + + static bool + loongarch_expand_vec_perm_even_odd_1 (struct expand_vec_perm_d *d, unsigned odd) +@@ -8457,6 +8562,50 @@ loongarch_expand_vec_perm_even_odd_1 (struct expand_vec_perm_d *d, unsigned odd) + + switch (d->vmode) + { ++ /* 128 bit. */ ++ case E_V2DFmode: ++ if (odd) ++ emit_insn (gen_lsx_vilvh_d_f (d->target, d->op0, d->op1)); ++ else ++ emit_insn (gen_lsx_vilvl_d_f (d->target, d->op0, d->op1)); ++ break; ++ ++ case E_V2DImode: ++ if (odd) ++ emit_insn (gen_lsx_vilvh_d (d->target, d->op0, d->op1)); ++ else ++ emit_insn (gen_lsx_vilvl_d (d->target, d->op0, d->op1)); ++ break; ++ ++ case E_V4SFmode: ++ if (odd) ++ emit_insn (gen_lsx_vpickod_w_f (d->target, d->op0, d->op1)); ++ else ++ emit_insn (gen_lsx_vpickev_w_f (d->target, d->op0, d->op1)); ++ break; ++ ++ case E_V4SImode: ++ if (odd) ++ emit_insn (gen_lsx_vpickod_w (d->target, d->op0, d->op1)); ++ else ++ emit_insn (gen_lsx_vpickev_w (d->target, d->op0, d->op1)); ++ break; ++ ++ case E_V8HImode: ++ if (odd) ++ emit_insn (gen_lsx_vpickod_h (d->target, d->op0, d->op1)); ++ else ++ emit_insn (gen_lsx_vpickev_h (d->target, d->op0, d->op1)); ++ break; ++ ++ case E_V16QImode: ++ if (odd) ++ emit_insn (gen_lsx_vpickod_b (d->target, d->op0, d->op1)); ++ else ++ emit_insn (gen_lsx_vpickev_b (d->target, d->op0, d->op1)); ++ break; ++ ++ /* 256 bit. */ + case E_V4DFmode: + /* Shuffle the lanes around into { 0 4 2 6 } and { 1 5 3 7 }. */ + if (odd) +@@ -8531,7 +8680,7 @@ static bool + loongarch_expand_vec_perm_even_odd (struct expand_vec_perm_d *d) + { + unsigned i, odd, nelt = d->nelt; +- if (!ISA_HAS_LASX) ++ if (!ISA_HAS_LASX && !ISA_HAS_LSX) + return false; + + odd = d->perm0; +@@ -8994,44 +9143,6 @@ loongarch_is_quad_duplicate (struct expand_vec_perm_d *d) + return result; + } + +-static bool +-loongarch_is_odd_extraction (struct expand_vec_perm_d *d) +-{ +- bool result = true; +- unsigned char buf = 1; +- +- for (int i = 0; i < d->nelt; i += 1) +- { +- if (buf != d->permi) +- { +- result = false; +- break; +- } +- buf += 2; +- } +- +- return result; +-} +- +-static bool +-loongarch_is_even_extraction (struct expand_vec_perm_d *d) +-{ +- bool result = true; +- unsigned char buf = 0; +- +- for (int i = 0; i < d->nelt; i += 1) +- { +- if (buf != d->permi) +- { +- result = false; +- break; +- } +- buf += 2; +- } +- +- return result; +-} +- + static bool + loongarch_is_extraction_permutation (struct expand_vec_perm_d *d) + { +@@ -9288,32 +9399,29 @@ loongarch_expand_vec_perm_const (struct expand_vec_perm_d *d) + for (i = 1; i < d->nelt; i += 2) + perm2i += d->nelt; + if (loongarch_expand_vselect_vconcat (d->target, d->op0, d->op1, +- perm2, d->nelt)) ++ perm2, d->nelt, d->testing_p)) + return true; + } + else + { + if (loongarch_expand_vselect_vconcat (d->target, d->op0, d->op1, +- d->perm, d->nelt)) ++ d->perm, d->nelt, ++ d->testing_p)) + return true; + + /* Try again with swapped operands. */ + for (i = 0; i < d->nelt; ++i) + perm2i = (d->permi + d->nelt) & (2 * d->nelt - 1); + if (loongarch_expand_vselect_vconcat (d->target, d->op1, d->op0, +- perm2, d->nelt)) ++ perm2, d->nelt, d->testing_p)) + return true; + } + +- if (loongarch_expand_lsx_shuffle (d)) ++ if (loongarch_is_imm_set_shuffle (d)) + return true; + +- if (loongarch_is_odd_extraction (d) +- || loongarch_is_even_extraction (d)) +- { +- if (loongarch_expand_vec_perm_even_odd (d)) +- return true; +- } ++ if (loongarch_expand_vec_perm_even_odd (d)) ++ return true; + + if (loongarch_is_lasx_lowpart_interleave (d) + || loongarch_is_lasx_lowpart_interleave_2 (d) +-- +2.43.0 +
View file
_service:tar_scm:0133-Backport-SME-mode-switching-Add-note-problem.patch
Added
@@ -0,0 +1,35 @@ +From 7ab54a765239bdd2ce548cffdd5b83f9c20f69da Mon Sep 17 00:00:00 2001 +From: Richard Sandiford <richard.sandiford@arm.com> +Date: Sat, 11 Nov 2023 17:28:55 +0000 +Subject: PATCH 034/157 BackportSME mode-switching: Add note problem + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=3cd3a09b3f91a1d023cb180763d40598d6bb274b + +optimize_mode_switching uses REG_DEAD notes to track register +liveness, but it failed to tell DF to calculate up-to-date notes. + +Noticed by inspection. I don't have a testcase that fails +because of this. + +gcc/ + * mode-switching.cc (optimize_mode_switching): Call + df_note_add_problem. +--- + gcc/mode-switching.cc | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/gcc/mode-switching.cc b/gcc/mode-switching.cc +index 4cf8f03a0..2a9f98793 100644 +--- a/gcc/mode-switching.cc ++++ b/gcc/mode-switching.cc +@@ -540,6 +540,7 @@ optimize_mode_switching (void) + pre_exit = create_pre_exit (n_entities, entity_map, num_modes); + } + ++ df_note_add_problem (); + df_analyze (); + + /* Create the bitmap vectors. */ +-- +2.33.0 +
View file
_service:tar_scm:0133-LoongArch-Avoid-out-of-bounds-access-in-loongarch_sy.patch
Added
@@ -0,0 +1,72 @@ +From 6364467c68ac1ee2b54b866f462fb670a43029fa Mon Sep 17 00:00:00 2001 +From: Xi Ruoyao <xry111@xry111.site> +Date: Fri, 2 Feb 2024 08:51:08 +0800 +Subject: PATCH 133/188 LoongArch: Avoid out-of-bounds access in + loongarch_symbol_insns + +We call loongarch_symbol_insns with mode = MAX_MACHINE_MODE sometimes. +But in loongarch_symbol_insns: + + if (LSX_SUPPORTED_MODE_P (mode) || LASX_SUPPORTED_MODE_P (mode)) + return 0; + +And LSX_SUPPORTED_MODE_P is defined as: + + #define LSX_SUPPORTED_MODE_P(MODE) \ + (ISA_HAS_LSX \ + && GET_MODE_SIZE (MODE) == UNITS_PER_LSX_REG ... ... + +GET_MODE_SIZE is expanded to a call to mode_to_bytes, which is defined: + + ALWAYS_INLINE poly_uint16 + mode_to_bytes (machine_mode mode) + { + #if GCC_VERSION >= 4001 + return (__builtin_constant_p (mode) + ? mode_size_inline (mode) : mode_sizemode); + #else + return mode_sizemode; + #endif + } + +There is an assertion in mode_size_inline: + + gcc_assert (mode >= 0 && mode < NUM_MACHINE_MODES); + +Note that NUM_MACHINE_MODES = MAX_MACHINE_MODE (emitted by genmodes.cc), +thus if __builtin_constant_p (mode) is evaluated true (it happens when +GCC is bootstrapped with LTO+PGO), the assertion will be triggered and +cause an ICE. OTOH if __builtin_constant_p (mode) is evaluated false, +mode_sizemode is still an out-of-bound array access (the length or the +mode_size array is NUM_MACHINE_MODES). + +So we shouldn't call LSX_SUPPORTED_MODE_P or LASX_SUPPORTED_MODE_P with +MAX_MACHINE_MODE in loongarch_symbol_insns. This is very similar to a +MIPS bug PR98491 fixed by me about 3 years ago. + +gcc/ChangeLog: + + * config/loongarch/loongarch.cc (loongarch_symbol_insns): Do not + use LSX_SUPPORTED_MODE_P or LASX_SUPPORTED_MODE_P if mode is + MAX_MACHINE_MODE. +--- + gcc/config/loongarch/loongarch.cc | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc +index a0e0906af..d23b09cc5 100644 +--- a/gcc/config/loongarch/loongarch.cc ++++ b/gcc/config/loongarch/loongarch.cc +@@ -2004,7 +2004,8 @@ loongarch_symbol_insns (enum loongarch_symbol_type type, machine_mode mode) + { + /* LSX LD.* and ST.* cannot support loading symbols via an immediate + operand. */ +- if (LSX_SUPPORTED_MODE_P (mode) || LASX_SUPPORTED_MODE_P (mode)) ++ if (mode != MAX_MACHINE_MODE ++ && (LSX_SUPPORTED_MODE_P (mode) || LASX_SUPPORTED_MODE_P (mode))) + return 0; + + switch (type) +-- +2.43.0 +
View file
_service:tar_scm:0134-Backport-SME-mode-switching-Avoid-quadractic-list-op.patch
Added
@@ -0,0 +1,90 @@ +From a2a8b560c1749293d3b6d027e20753a7ea042c80 Mon Sep 17 00:00:00 2001 +From: Richard Sandiford <richard.sandiford@arm.com> +Date: Sat, 11 Nov 2023 17:28:55 +0000 +Subject: PATCH 035/157 BackportSME mode-switching: Avoid quadractic list + operation + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=174ee5115a3004d3664165e9d619535b579111d4 + +add_seginfo chained insn information to the end of a list +by starting at the head of the list. This patch avoids the +quadraticness by keeping track of the tail pointer. + +gcc/ + * mode-switching.cc (add_seginfo): Replace head pointer with + a pointer to the tail pointer. + (optimize_mode_switching): Update calls accordingly. +--- + gcc/mode-switching.cc | 24 ++++++++---------------- + 1 file changed, 8 insertions(+), 16 deletions(-) + +diff --git a/gcc/mode-switching.cc b/gcc/mode-switching.cc +index 2a9f98793..6a13951c9 100644 +--- a/gcc/mode-switching.cc ++++ b/gcc/mode-switching.cc +@@ -162,23 +162,14 @@ new_seginfo (int mode, rtx_insn *insn, const HARD_REG_SET ®s_live) + } + + /* Add a seginfo element to the end of a list. +- HEAD is a pointer to the list beginning. ++ TAIL is a pointer to the list's null terminator. + INFO is the structure to be linked in. */ + + static void +-add_seginfo (struct bb_info *head, struct seginfo *info) ++add_seginfo (struct seginfo ***tail_ptr, struct seginfo *info) + { +- struct seginfo *ptr; +- +- if (head->seginfo == NULL) +- head->seginfo = info; +- else +- { +- ptr = head->seginfo; +- while (ptr->next != NULL) +- ptr = ptr->next; +- ptr->next = info; +- } ++ **tail_ptr = info; ++ *tail_ptr = &info->next; + } + + /* Record in LIVE that register REG died. */ +@@ -573,6 +564,7 @@ optimize_mode_switching (void) + Also compute the initial transparency settings. */ + FOR_EACH_BB_FN (bb, cfun) + { ++ struct seginfo **tail_ptr = &infobb->index.seginfo; + struct seginfo *ptr; + int last_mode = no_mode; + bool any_set_required = false; +@@ -598,7 +590,7 @@ optimize_mode_switching (void) + if (ins_pos != BB_END (bb)) + ins_pos = NEXT_INSN (ins_pos); + ptr = new_seginfo (no_mode, ins_pos, live_now); +- add_seginfo (info + bb->index, ptr); ++ add_seginfo (&tail_ptr, ptr); + for (i = 0; i < no_mode; i++) + clear_mode_bit (transpbb->index, j, i); + } +@@ -616,7 +608,7 @@ optimize_mode_switching (void) + any_set_required = true; + last_mode = mode; + ptr = new_seginfo (mode, insn, live_now); +- add_seginfo (info + bb->index, ptr); ++ add_seginfo (&tail_ptr, ptr); + for (i = 0; i < no_mode; i++) + clear_mode_bit (transpbb->index, j, i); + } +@@ -645,7 +637,7 @@ optimize_mode_switching (void) + if (!any_set_required) + { + ptr = new_seginfo (no_mode, BB_END (bb), live_now); +- add_seginfo (info + bb->index, ptr); ++ add_seginfo (&tail_ptr, ptr); + if (last_mode != no_mode) + for (i = 0; i < no_mode; i++) + clear_mode_bit (transpbb->index, j, i); +-- +2.33.0 +
View file
_service:tar_scm:0134-LoongArch-Fix-wrong-LSX-FP-vector-negation.patch
Added
@@ -0,0 +1,122 @@ +From 659b51a6aed60f389009eff1e04645a47e55a45c Mon Sep 17 00:00:00 2001 +From: Xi Ruoyao <xry111@xry111.site> +Date: Sat, 3 Feb 2024 03:16:14 +0800 +Subject: PATCH 134/188 LoongArch: Fix wrong LSX FP vector negation + +We expanded (neg x) to (minus const0 x) for LSX FP vectors, this is +wrong because -0.0 is not 0 - 0.0. This causes some Python tests to +fail when Python is built with LSX enabled. + +Use the vbitrevi.{d/w} instructions to simply reverse the sign bit +instead. We are already doing this for LASX and now we can unify them +into simd.md. + +gcc/ChangeLog: + + * config/loongarch/lsx.md (neg<mode:FLSX>2): Remove the + incorrect expand. + * config/loongarch/simd.md (simdfmt_as_i): New define_mode_attr. + (elmsgnbit): Likewise. + (neg<mode:FVEC>2): New define_insn. + * config/loongarch/lasx.md (negv4df2, negv8sf2): Remove as they + are now instantiated in simd.md. +--- + gcc/config/loongarch/lasx.md | 16 ---------------- + gcc/config/loongarch/lsx.md | 11 ----------- + gcc/config/loongarch/simd.md | 18 ++++++++++++++++++ + 3 files changed, 18 insertions(+), 27 deletions(-) + +diff --git a/gcc/config/loongarch/lasx.md b/gcc/config/loongarch/lasx.md +index 946811e1a..38f35bad6 100644 +--- a/gcc/config/loongarch/lasx.md ++++ b/gcc/config/loongarch/lasx.md +@@ -3028,22 +3028,6 @@ + (set_attr "type" "simd_logic") + (set_attr "mode" "V8SF")) + +-(define_insn "negv4df2" +- (set (match_operand:V4DF 0 "register_operand" "=f") +- (neg:V4DF (match_operand:V4DF 1 "register_operand" "f"))) +- "ISA_HAS_LASX" +- "xvbitrevi.d\t%u0,%u1,63" +- (set_attr "type" "simd_logic") +- (set_attr "mode" "V4DF")) +- +-(define_insn "negv8sf2" +- (set (match_operand:V8SF 0 "register_operand" "=f") +- (neg:V8SF (match_operand:V8SF 1 "register_operand" "f"))) +- "ISA_HAS_LASX" +- "xvbitrevi.w\t%u0,%u1,31" +- (set_attr "type" "simd_logic") +- (set_attr "mode" "V8SF")) +- + (define_insn "xvfmadd<mode>4" + (set (match_operand:FLASX 0 "register_operand" "=f") + (fma:FLASX (match_operand:FLASX 1 "register_operand" "f") +diff --git a/gcc/config/loongarch/lsx.md b/gcc/config/loongarch/lsx.md +index 612377436..d5aa3f46f 100644 +--- a/gcc/config/loongarch/lsx.md ++++ b/gcc/config/loongarch/lsx.md +@@ -728,17 +728,6 @@ + DONE; + }) + +-(define_expand "neg<mode>2" +- (set (match_operand:FLSX 0 "register_operand") +- (neg:FLSX (match_operand:FLSX 1 "register_operand"))) +- "ISA_HAS_LSX" +-{ +- rtx reg = gen_reg_rtx (<MODE>mode); +- emit_move_insn (reg, CONST0_RTX (<MODE>mode)); +- emit_insn (gen_sub<mode>3 (operands0, reg, operands1)); +- DONE; +-}) +- + (define_expand "lsx_vrepli<mode>" + (match_operand:ILSX 0 "register_operand") + (match_operand 1 "const_imm10_operand") +diff --git a/gcc/config/loongarch/simd.md b/gcc/config/loongarch/simd.md +index 8ac1d75a8..00d4c7831 100644 +--- a/gcc/config/loongarch/simd.md ++++ b/gcc/config/loongarch/simd.md +@@ -85,12 +85,21 @@ + (define_mode_attr simdifmt_for_f (V2DF "l") (V4DF "l") + (V4SF "w") (V8SF "w")) + ++;; Suffix for integer mode in LSX or LASX instructions to operating FP ++;; vectors using integer vector operations. ++(define_mode_attr simdfmt_as_i (V2DF "d") (V4DF "d") ++ (V4SF "w") (V8SF "w")) ++ + ;; Size of vector elements in bits. + (define_mode_attr elmbits (V2DI "64") (V4DI "64") + (V4SI "32") (V8SI "32") + (V8HI "16") (V16HI "16") + (V16QI "8") (V32QI "8")) + ++;; The index of sign bit in FP vector elements. ++(define_mode_attr elmsgnbit (V2DF "63") (V4DF "63") ++ (V4SF "31") (V8SF "31")) ++ + ;; This attribute is used to form an immediate operand constraint using + ;; "const_<bitimm>_operand". + (define_mode_attr bitimm (V16QI "uimm3") (V32QI "uimm3") +@@ -457,6 +466,15 @@ + DONE; + }) + ++;; FP negation. ++(define_insn "neg<mode>2" ++ (set (match_operand:FVEC 0 "register_operand" "=f") ++ (neg:FVEC (match_operand:FVEC 1 "register_operand" "f"))) ++ "" ++ "<x>vbitrevi.<simdfmt_as_i>\t%<wu>0,%<wu>1,<elmsgnbit>" ++ (set_attr "type" "simd_logic") ++ (set_attr "mode" "<MODE>")) ++ + ; The LoongArch SX Instructions. + (include "lsx.md") + +-- +2.43.0 +
View file
_service:tar_scm:0135-Backport-SME-mode-switching-Fix-the-mode-passed-to-t.patch
Added
@@ -0,0 +1,136 @@ +From 194700063ed04b56d84912f7ace1b8370af6c696 Mon Sep 17 00:00:00 2001 +From: Richard Sandiford <richard.sandiford@arm.com> +Date: Sat, 11 Nov 2023 17:28:56 +0000 +Subject: PATCH 036/157 BackportSME mode-switching: Fix the mode passed + to the emit hook + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=5afd208beaef50bcc43b556d4c41d41656b06436 + +optimize_mode_switching passes an entity's current mode (if known) +to the emit hook. However, the mode that it passed ignored the +effect of the after hook. Instead, the mode for the first emit +call in a block was taken from the incoming mode, whereas the +mode for each subsequent emit call was taken from the result +of the previous call. + +The previous pass through the insns already calculated the +correct mode, so this patch records it in the seginfo structure. +(There was a 32-bit hole on 64-bit hosts, so this doesn't increase +the size of the structure for them.) + +gcc/ + * mode-switching.cc (seginfo): Add a prev_mode field. + (new_seginfo): Take and initialize the prev_mode. + (optimize_mode_switching): Update calls accordingly. + Use the recorded modes during the emit phase, rather than + computing one on the fly. +--- + gcc/mode-switching.cc | 30 +++++++++++++++++------------- + 1 file changed, 17 insertions(+), 13 deletions(-) + +diff --git a/gcc/mode-switching.cc b/gcc/mode-switching.cc +index 6a13951c9..584cd4f67 100644 +--- a/gcc/mode-switching.cc ++++ b/gcc/mode-switching.cc +@@ -68,6 +68,7 @@ along with GCC; see the file COPYING3. If not see + NEXT is the next insn in the same basic block. */ + struct seginfo + { ++ int prev_mode; + int mode; + rtx_insn *insn_ptr; + struct seginfo *next; +@@ -140,20 +141,22 @@ commit_mode_sets (struct edge_list *edge_list, int e, struct bb_info *info) + return need_commit; + } + +-/* Allocate a new BBINFO structure, initialized with the MODE, INSN, +- and REGS_LIVE parameters. ++/* Allocate a new BBINFO structure, initialized with the PREV_MODE, MODE, ++ INSN, and REGS_LIVE parameters. + INSN may not be a NOTE_INSN_BASIC_BLOCK, unless it is an empty + basic block; that allows us later to insert instructions in a FIFO-like + manner. */ + + static struct seginfo * +-new_seginfo (int mode, rtx_insn *insn, const HARD_REG_SET ®s_live) ++new_seginfo (int prev_mode, int mode, rtx_insn *insn, ++ const HARD_REG_SET ®s_live) + { + struct seginfo *ptr; + + gcc_assert (!NOTE_INSN_BASIC_BLOCK_P (insn) + || insn == BB_END (NOTE_BASIC_BLOCK (insn))); + ptr = XNEW (struct seginfo); ++ ptr->prev_mode = prev_mode; + ptr->mode = mode; + ptr->insn_ptr = insn; + ptr->next = NULL; +@@ -589,7 +592,7 @@ optimize_mode_switching (void) + gcc_assert (NOTE_INSN_BASIC_BLOCK_P (ins_pos)); + if (ins_pos != BB_END (bb)) + ins_pos = NEXT_INSN (ins_pos); +- ptr = new_seginfo (no_mode, ins_pos, live_now); ++ ptr = new_seginfo (no_mode, no_mode, ins_pos, live_now); + add_seginfo (&tail_ptr, ptr); + for (i = 0; i < no_mode; i++) + clear_mode_bit (transpbb->index, j, i); +@@ -605,12 +608,12 @@ optimize_mode_switching (void) + + if (mode != no_mode && mode != last_mode) + { +- any_set_required = true; +- last_mode = mode; +- ptr = new_seginfo (mode, insn, live_now); ++ ptr = new_seginfo (last_mode, mode, insn, live_now); + add_seginfo (&tail_ptr, ptr); + for (i = 0; i < no_mode; i++) + clear_mode_bit (transpbb->index, j, i); ++ any_set_required = true; ++ last_mode = mode; + } + + if (targetm.mode_switching.after) +@@ -636,7 +639,7 @@ optimize_mode_switching (void) + mark the block as nontransparent. */ + if (!any_set_required) + { +- ptr = new_seginfo (no_mode, BB_END (bb), live_now); ++ ptr = new_seginfo (last_mode, no_mode, BB_END (bb), live_now); + add_seginfo (&tail_ptr, ptr); + if (last_mode != no_mode) + for (i = 0; i < no_mode; i++) +@@ -777,9 +780,9 @@ optimize_mode_switching (void) + FOR_EACH_BB_FN (bb, cfun) + { + struct seginfo *ptr, *next; +- int cur_mode = bb_infojbb->index.mode_in; ++ struct seginfo *first = bb_infojbb->index.seginfo; + +- for (ptr = bb_infojbb->index.seginfo; ptr; ptr = next) ++ for (ptr = first; ptr; ptr = next) + { + next = ptr->next; + if (ptr->mode != no_mode) +@@ -789,14 +792,15 @@ optimize_mode_switching (void) + rtl_profile_for_bb (bb); + start_sequence (); + ++ int cur_mode = (ptr == first && ptr->prev_mode == no_mode ++ ? bb_infojbb->index.mode_in ++ : ptr->prev_mode); ++ + targetm.mode_switching.emit (entity_mapj, ptr->mode, + cur_mode, ptr->regs_live); + mode_set = get_insns (); + end_sequence (); + +- /* modes kill each other inside a basic block. */ +- cur_mode = ptr->mode; +- + /* Insert MODE_SET only if it is nonempty. */ + if (mode_set != NULL_RTX) + { +-- +2.33.0 +
View file
_service:tar_scm:0135-LoongArch-Fix-wrong-return-value-type-of-__iocsrrd_h.patch
Added
@@ -0,0 +1,30 @@ +From 539eb7639eeda8ea43149032f6aa724e5d46017c Mon Sep 17 00:00:00 2001 +From: Lulu Cheng <chenglulu@loongson.cn> +Date: Mon, 5 Feb 2024 16:23:20 +0800 +Subject: PATCH 135/188 LoongArch: Fix wrong return value type of + __iocsrrd_h. + +gcc/ChangeLog: + + * config/loongarch/larchintrin.h (__iocsrrd_h): Modify the + function return value type to unsigned short. +--- + gcc/config/loongarch/larchintrin.h | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/gcc/config/loongarch/larchintrin.h b/gcc/config/loongarch/larchintrin.h +index 6582dfe49..046e042fd 100644 +--- a/gcc/config/loongarch/larchintrin.h ++++ b/gcc/config/loongarch/larchintrin.h +@@ -268,7 +268,7 @@ __iocsrrd_b (unsigned int _1) + + /* Assembly instruction format: rd, rj. */ + /* Data types in instruction templates: UHI, USI. */ +-extern __inline unsigned char ++extern __inline unsigned short + __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) + __iocsrrd_h (unsigned int _1) + { +-- +2.43.0 +
View file
_service:tar_scm:0136-Backport-SME-mode-switching-Simplify-recording-of-tr.patch
Added
@@ -0,0 +1,103 @@ +From ac51d446ee605e942b0831d3ff617980d94bf502 Mon Sep 17 00:00:00 2001 +From: Richard Sandiford <richard.sandiford@arm.com> +Date: Sat, 11 Nov 2023 17:28:56 +0000 +Subject: PATCH 037/157 BackportSME mode-switching: Simplify recording of + transparency + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=335b55f4146c5ef9e3bf4bcb7e58e887c3150b02 + +For a given block, an entity is either transparent for +all modes or for none. Each update to the transparency set +therefore used a loop like: + + for (i = 0; i < no_mode; i++) + clear_mode_bit (transpbb->index, j, i); + +This patch instead starts out with a bit-per-block bitmap +and updates the main bitmap at the end. + +This isn't much of a simplification on its own. The main +purpose is to simplify later patches. + +gcc/ + * mode-switching.cc (optimize_mode_switching): Initially + compute transparency in a bit-per-block bitmap. +--- + gcc/mode-switching.cc | 19 +++++++++++-------- + 1 file changed, 11 insertions(+), 8 deletions(-) + +diff --git a/gcc/mode-switching.cc b/gcc/mode-switching.cc +index 584cd4f67..4d2b9e284 100644 +--- a/gcc/mode-switching.cc ++++ b/gcc/mode-switching.cc +@@ -555,6 +555,8 @@ optimize_mode_switching (void) + bitmap_vector_clear (antic, last_basic_block_for_fn (cfun)); + bitmap_vector_clear (comp, last_basic_block_for_fn (cfun)); + ++ auto_sbitmap transp_all (last_basic_block_for_fn (cfun)); ++ + for (j = n_entities - 1; j >= 0; j--) + { + int e = entity_mapj; +@@ -562,6 +564,8 @@ optimize_mode_switching (void) + struct bb_info *info = bb_infoj; + rtx_insn *insn; + ++ bitmap_ones (transp_all); ++ + /* Determine what the first use (if any) need for a mode of entity E is. + This will be the mode that is anticipatable for this block. + Also compute the initial transparency settings. */ +@@ -594,8 +598,7 @@ optimize_mode_switching (void) + ins_pos = NEXT_INSN (ins_pos); + ptr = new_seginfo (no_mode, no_mode, ins_pos, live_now); + add_seginfo (&tail_ptr, ptr); +- for (i = 0; i < no_mode; i++) +- clear_mode_bit (transpbb->index, j, i); ++ bitmap_clear_bit (transp_all, bb->index); + } + } + +@@ -610,8 +613,7 @@ optimize_mode_switching (void) + { + ptr = new_seginfo (last_mode, mode, insn, live_now); + add_seginfo (&tail_ptr, ptr); +- for (i = 0; i < no_mode; i++) +- clear_mode_bit (transpbb->index, j, i); ++ bitmap_clear_bit (transp_all, bb->index); + any_set_required = true; + last_mode = mode; + } +@@ -642,8 +644,7 @@ optimize_mode_switching (void) + ptr = new_seginfo (last_mode, no_mode, BB_END (bb), live_now); + add_seginfo (&tail_ptr, ptr); + if (last_mode != no_mode) +- for (i = 0; i < no_mode; i++) +- clear_mode_bit (transpbb->index, j, i); ++ bitmap_clear_bit (transp_all, bb->index); + } + } + if (targetm.mode_switching.entry && targetm.mode_switching.exit) +@@ -666,8 +667,7 @@ optimize_mode_switching (void) + an extra check in make_preds_opaque. We also + need this to avoid confusing pre_edge_lcm when + antic is cleared but transp and comp are set. */ +- for (i = 0; i < no_mode; i++) +- clear_mode_bit (transpbb->index, j, i); ++ bitmap_clear_bit (transp_all, bb->index); + + /* Insert a fake computing definition of MODE into entry + blocks which compute no mode. This represents the mode on +@@ -687,6 +687,9 @@ optimize_mode_switching (void) + + FOR_EACH_BB_FN (bb, cfun) + { ++ if (!bitmap_bit_p (transp_all, bb->index)) ++ clear_mode_bit (transpbb->index, j, m); ++ + if (infobb->index.seginfo->mode == m) + set_mode_bit (anticbb->index, j, m); + +-- +2.33.0 +
View file
_service:tar_scm:0136-LoongArch-Remove-redundant-symbol-type-conversions-i.patch
Added
@@ -0,0 +1,337 @@ +From 868f56db1101bf679f1b2510b9934a978f503a1e Mon Sep 17 00:00:00 2001 +From: Lulu Cheng <chenglulu@loongson.cn> +Date: Mon, 5 Feb 2024 16:53:01 +0800 +Subject: PATCH 136/188 LoongArch: Remove redundant symbol type conversions + in larchintrin.h. + +gcc/ChangeLog: + + * config/loongarch/larchintrin.h (__movgr2fcsr): Remove redundant + symbol type conversions. + (__cacop_d): Likewise. + (__cpucfg): Likewise. + (__asrtle_d): Likewise. + (__asrtgt_d): Likewise. + (__lddir_d): Likewise. + (__ldpte_d): Likewise. + (__crc_w_b_w): Likewise. + (__crc_w_h_w): Likewise. + (__crc_w_w_w): Likewise. + (__crc_w_d_w): Likewise. + (__crcc_w_b_w): Likewise. + (__crcc_w_h_w): Likewise. + (__crcc_w_w_w): Likewise. + (__crcc_w_d_w): Likewise. + (__csrrd_w): Likewise. + (__csrwr_w): Likewise. + (__csrxchg_w): Likewise. + (__csrrd_d): Likewise. + (__csrwr_d): Likewise. + (__csrxchg_d): Likewise. + (__iocsrrd_b): Likewise. + (__iocsrrd_h): Likewise. + (__iocsrrd_w): Likewise. + (__iocsrrd_d): Likewise. + (__iocsrwr_b): Likewise. + (__iocsrwr_h): Likewise. + (__iocsrwr_w): Likewise. + (__iocsrwr_d): Likewise. + (__frecipe_s): Likewise. + (__frecipe_d): Likewise. + (__frsqrte_s): Likewise. + (__frsqrte_d): Likewise. +--- + gcc/config/loongarch/larchintrin.h | 69 ++++++++++++++---------------- + 1 file changed, 33 insertions(+), 36 deletions(-) + +diff --git a/gcc/config/loongarch/larchintrin.h b/gcc/config/loongarch/larchintrin.h +index 046e042fd..2e94e5612 100644 +--- a/gcc/config/loongarch/larchintrin.h ++++ b/gcc/config/loongarch/larchintrin.h +@@ -87,13 +87,13 @@ __rdtimel_w (void) + /* Assembly instruction format: fcsr, rj. */ + /* Data types in instruction templates: VOID, UQI, USI. */ + #define __movgr2fcsr(/*ui5*/ _1, _2) \ +- __builtin_loongarch_movgr2fcsr ((_1), (unsigned int) _2); ++ __builtin_loongarch_movgr2fcsr ((_1), _2); + + #if defined __loongarch64 + /* Assembly instruction format: ui5, rj, si12. */ + /* Data types in instruction templates: VOID, USI, UDI, SI. */ + #define __cacop_d(/*ui5*/ _1, /*unsigned long int*/ _2, /*si12*/ _3) \ +- ((void) __builtin_loongarch_cacop_d ((_1), (unsigned long int) (_2), (_3))) ++ __builtin_loongarch_cacop_d ((_1), (_2), (_3)) + #else + #error "Unsupported ABI." + #endif +@@ -104,7 +104,7 @@ extern __inline unsigned int + __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) + __cpucfg (unsigned int _1) + { +- return (unsigned int) __builtin_loongarch_cpucfg ((unsigned int) _1); ++ return __builtin_loongarch_cpucfg (_1); + } + + #ifdef __loongarch64 +@@ -114,7 +114,7 @@ extern __inline void + __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) + __asrtle_d (long int _1, long int _2) + { +- __builtin_loongarch_asrtle_d ((long int) _1, (long int) _2); ++ __builtin_loongarch_asrtle_d (_1, _2); + } + + /* Assembly instruction format: rj, rk. */ +@@ -123,7 +123,7 @@ extern __inline void + __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) + __asrtgt_d (long int _1, long int _2) + { +- __builtin_loongarch_asrtgt_d ((long int) _1, (long int) _2); ++ __builtin_loongarch_asrtgt_d (_1, _2); + } + #endif + +@@ -131,7 +131,7 @@ __asrtgt_d (long int _1, long int _2) + /* Assembly instruction format: rd, rj, ui5. */ + /* Data types in instruction templates: DI, DI, UQI. */ + #define __lddir_d(/*long int*/ _1, /*ui5*/ _2) \ +- ((long int) __builtin_loongarch_lddir_d ((long int) (_1), (_2))) ++ __builtin_loongarch_lddir_d ((_1), (_2)) + #else + #error "Unsupported ABI." + #endif +@@ -140,7 +140,7 @@ __asrtgt_d (long int _1, long int _2) + /* Assembly instruction format: rj, ui5. */ + /* Data types in instruction templates: VOID, DI, UQI. */ + #define __ldpte_d(/*long int*/ _1, /*ui5*/ _2) \ +- ((void) __builtin_loongarch_ldpte_d ((long int) (_1), (_2))) ++ __builtin_loongarch_ldpte_d ((_1), (_2)) + #else + #error "Unsupported ABI." + #endif +@@ -151,7 +151,7 @@ extern __inline int + __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) + __crc_w_b_w (char _1, int _2) + { +- return (int) __builtin_loongarch_crc_w_b_w ((char) _1, (int) _2); ++ return __builtin_loongarch_crc_w_b_w (_1, _2); + } + + /* Assembly instruction format: rd, rj, rk. */ +@@ -160,7 +160,7 @@ extern __inline int + __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) + __crc_w_h_w (short _1, int _2) + { +- return (int) __builtin_loongarch_crc_w_h_w ((short) _1, (int) _2); ++ return __builtin_loongarch_crc_w_h_w (_1, _2); + } + + /* Assembly instruction format: rd, rj, rk. */ +@@ -169,7 +169,7 @@ extern __inline int + __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) + __crc_w_w_w (int _1, int _2) + { +- return (int) __builtin_loongarch_crc_w_w_w ((int) _1, (int) _2); ++ return __builtin_loongarch_crc_w_w_w (_1, _2); + } + + #ifdef __loongarch64 +@@ -179,7 +179,7 @@ extern __inline int + __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) + __crc_w_d_w (long int _1, int _2) + { +- return (int) __builtin_loongarch_crc_w_d_w ((long int) _1, (int) _2); ++ return __builtin_loongarch_crc_w_d_w (_1, _2); + } + #endif + +@@ -189,7 +189,7 @@ extern __inline int + __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) + __crcc_w_b_w (char _1, int _2) + { +- return (int) __builtin_loongarch_crcc_w_b_w ((char) _1, (int) _2); ++ return __builtin_loongarch_crcc_w_b_w (_1, _2); + } + + /* Assembly instruction format: rd, rj, rk. */ +@@ -198,7 +198,7 @@ extern __inline int + __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) + __crcc_w_h_w (short _1, int _2) + { +- return (int) __builtin_loongarch_crcc_w_h_w ((short) _1, (int) _2); ++ return __builtin_loongarch_crcc_w_h_w (_1, _2); + } + + /* Assembly instruction format: rd, rj, rk. */ +@@ -207,7 +207,7 @@ extern __inline int + __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) + __crcc_w_w_w (int _1, int _2) + { +- return (int) __builtin_loongarch_crcc_w_w_w ((int) _1, (int) _2); ++ return __builtin_loongarch_crcc_w_w_w (_1, _2); + } + + #ifdef __loongarch64 +@@ -217,44 +217,41 @@ extern __inline int + __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) + __crcc_w_d_w (long int _1, int _2) + { +- return (int) __builtin_loongarch_crcc_w_d_w ((long int) _1, (int) _2); ++ return __builtin_loongarch_crcc_w_d_w (_1, _2); + } + #endif + + /* Assembly instruction format: rd, ui14. */ + /* Data types in instruction templates: USI, USI. */ + #define __csrrd_w(/*ui14*/ _1) \ +- ((unsigned int) __builtin_loongarch_csrrd_w ((_1))) ++ __builtin_loongarch_csrrd_w ((_1)) + + /* Assembly instruction format: rd, ui14. */ + /* Data types in instruction templates: USI, USI, USI. */ + #define __csrwr_w(/*unsigned int*/ _1, /*ui14*/ _2) \ +- ((unsigned int) __builtin_loongarch_csrwr_w ((unsigned int) (_1), (_2))) ++ __builtin_loongarch_csrwr_w ((_1), (_2)) + + /* Assembly instruction format: rd, rj, ui14. */ + /* Data types in instruction templates: USI, USI, USI, USI. */ + #define __csrxchg_w(/*unsigned int*/ _1, /*unsigned int*/ _2, /*ui14*/ _3) \ +- ((unsigned int) __builtin_loongarch_csrxchg_w ((unsigned int) (_1), \ +- (unsigned int) (_2), (_3))) ++ __builtin_loongarch_csrxchg_w ((_1), (_2), (_3)) + + #ifdef __loongarch64 + /* Assembly instruction format: rd, ui14. */ + /* Data types in instruction templates: UDI, USI. */ + #define __csrrd_d(/*ui14*/ _1) \ +- ((unsigned long int) __builtin_loongarch_csrrd_d ((_1))) ++ __builtin_loongarch_csrrd_d ((_1)) + + /* Assembly instruction format: rd, ui14. */ + /* Data types in instruction templates: UDI, UDI, USI. */ + #define __csrwr_d(/*unsigned long int*/ _1, /*ui14*/ _2) \ +- ((unsigned long int) __builtin_loongarch_csrwr_d ((unsigned long int) (_1), \ +- (_2))) ++ __builtin_loongarch_csrwr_d ((_1), (_2)) + + /* Assembly instruction format: rd, rj, ui14. */ + /* Data types in instruction templates: UDI, UDI, UDI, USI. */ + #define __csrxchg_d(/*unsigned long int*/ _1, /*unsigned long int*/ _2, \ + /*ui14*/ _3) \ +- ((unsigned long int) __builtin_loongarch_csrxchg_d ( \ +- (unsigned long int) (_1), (unsigned long int) (_2), (_3))) ++ __builtin_loongarch_csrxchg_d ((_1), (_2), (_3)) + #endif + + /* Assembly instruction format: rd, rj. */ +@@ -263,7 +260,7 @@ extern __inline unsigned char + __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) + __iocsrrd_b (unsigned int _1) + { +- return (unsigned char) __builtin_loongarch_iocsrrd_b ((unsigned int) _1); ++ return __builtin_loongarch_iocsrrd_b (_1); + } + + /* Assembly instruction format: rd, rj. */ +@@ -272,7 +269,7 @@ extern __inline unsigned short + __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) + __iocsrrd_h (unsigned int _1) + { +- return (unsigned short) __builtin_loongarch_iocsrrd_h ((unsigned int) _1); ++ return __builtin_loongarch_iocsrrd_h (_1); + } + + /* Assembly instruction format: rd, rj. */ +@@ -281,7 +278,7 @@ extern __inline unsigned int + __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) + __iocsrrd_w (unsigned int _1) + { +- return (unsigned int) __builtin_loongarch_iocsrrd_w ((unsigned int) _1); ++ return __builtin_loongarch_iocsrrd_w (_1); + } + + #ifdef __loongarch64 +@@ -291,7 +288,7 @@ extern __inline unsigned long int + __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) + __iocsrrd_d (unsigned int _1) + { +- return (unsigned long int) __builtin_loongarch_iocsrrd_d ((unsigned int) _1); ++ return __builtin_loongarch_iocsrrd_d (_1); + } + #endif + +@@ -301,7 +298,7 @@ extern __inline void + __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) + __iocsrwr_b (unsigned char _1, unsigned int _2) + { +- __builtin_loongarch_iocsrwr_b ((unsigned char) _1, (unsigned int) _2); ++ __builtin_loongarch_iocsrwr_b (_1, _2); + } + + /* Assembly instruction format: rd, rj. */ +@@ -310,7 +307,7 @@ extern __inline void + __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) + __iocsrwr_h (unsigned short _1, unsigned int _2) + { +- __builtin_loongarch_iocsrwr_h ((unsigned short) _1, (unsigned int) _2); ++ __builtin_loongarch_iocsrwr_h (_1, _2); + } + + /* Assembly instruction format: rd, rj. */ +@@ -319,7 +316,7 @@ extern __inline void + __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) + __iocsrwr_w (unsigned int _1, unsigned int _2) + { +- __builtin_loongarch_iocsrwr_w ((unsigned int) _1, (unsigned int) _2); ++ __builtin_loongarch_iocsrwr_w (_1, _2); + } + + #ifdef __loongarch64 +@@ -329,7 +326,7 @@ extern __inline void + __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) + __iocsrwr_d (unsigned long int _1, unsigned int _2) + { +- __builtin_loongarch_iocsrwr_d ((unsigned long int) _1, (unsigned int) _2); ++ __builtin_loongarch_iocsrwr_d (_1, _2); + } + #endif + +@@ -340,7 +337,7 @@ extern __inline float + __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) + __frecipe_s (float _1) + { +- return (float) __builtin_loongarch_frecipe_s ((float) _1); ++ return __builtin_loongarch_frecipe_s (_1); + } + + /* Assembly instruction format: fd, fj. */ +@@ -349,7 +346,7 @@ extern __inline double + __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) + __frecipe_d (double _1) + { +- return (double) __builtin_loongarch_frecipe_d ((double) _1); ++ return __builtin_loongarch_frecipe_d (_1); + } + + /* Assembly instruction format: fd, fj. */ +@@ -358,7 +355,7 @@ extern __inline float + __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) + __frsqrte_s (float _1) + { +- return (float) __builtin_loongarch_frsqrte_s ((float) _1); ++ return __builtin_loongarch_frsqrte_s (_1); + } + + /* Assembly instruction format: fd, fj. */ +@@ -367,7 +364,7 @@ extern __inline double + __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) + __frsqrte_d (double _1) + { +- return (double) __builtin_loongarch_frsqrte_d ((double) _1); ++ return __builtin_loongarch_frsqrte_d (_1); + } + #endif + +-- +2.43.0 +
View file
_service:tar_scm:0137-Backport-SME-mode-switching-Tweak-entry-exit-handlin.patch
Added
@@ -0,0 +1,92 @@ +From c0aaf329d9c547b249ac120a8d1995d8546a1edb Mon Sep 17 00:00:00 2001 +From: Richard Sandiford <richard.sandiford@arm.com> +Date: Sat, 11 Nov 2023 17:28:57 +0000 +Subject: PATCH 038/157 BackportSME mode-switching: Tweak entry/exit + handling + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=e59ec35276599805cdc6c3979d8a167b027d286e + +An entity isn't transparent in a block that requires a specific mode. +optimize_mode_switching took that into account for normal insns, +but didn't for the exit block. Later patches misbehaved because +of this. + +In contrast, an entity was correctly marked as non-transparent +in the entry block, but the reasoning seemed a bit convoluted. +It also referred to a function that no longer exists. +Since KILL = ~TRANSP, the entity is by definition not transparent +in a block that defines the entity, so I think we can make it so +without comment. + +Finally, the exit handling was nested in the entry handling, +but that doesn't seem necessary. A target could say that an +entity is undefined on entry but must be defined on return, +on a "be liberal in what you accept, be conservative in what +you do" principle. + +gcc/ + * mode-switching.cc (optimize_mode_switching): Mark the exit + block as nontransparent if it requires a specific mode. + Handle the entry and exit mode as sibling rather than nested + concepts. Remove outdated comment. +--- + gcc/mode-switching.cc | 34 +++++++++++++++------------------- + 1 file changed, 15 insertions(+), 19 deletions(-) + +diff --git a/gcc/mode-switching.cc b/gcc/mode-switching.cc +index 4d2b9e284..4761c2ff0 100644 +--- a/gcc/mode-switching.cc ++++ b/gcc/mode-switching.cc +@@ -649,34 +649,30 @@ optimize_mode_switching (void) + } + if (targetm.mode_switching.entry && targetm.mode_switching.exit) + { +- int mode = targetm.mode_switching.entry (e); +- + infopost_entry->index.mode_out = + infopost_entry->index.mode_in = no_mode; +- if (pre_exit) +- { +- infopre_exit->index.mode_out = +- infopre_exit->index.mode_in = no_mode; +- } + ++ int mode = targetm.mode_switching.entry (e); + if (mode != no_mode) + { +- bb = post_entry; +- +- /* By always making this nontransparent, we save +- an extra check in make_preds_opaque. We also +- need this to avoid confusing pre_edge_lcm when +- antic is cleared but transp and comp are set. */ +- bitmap_clear_bit (transp_all, bb->index); +- + /* Insert a fake computing definition of MODE into entry + blocks which compute no mode. This represents the mode on + entry. */ +- infobb->index.computing = mode; ++ infopost_entry->index.computing = mode; ++ bitmap_clear_bit (transp_all, post_entry->index); ++ } + +- if (pre_exit) +- infopre_exit->index.seginfo->mode = +- targetm.mode_switching.exit (e); ++ if (pre_exit) ++ { ++ infopre_exit->index.mode_out = ++ infopre_exit->index.mode_in = no_mode; ++ ++ int mode = targetm.mode_switching.exit (e); ++ if (mode != no_mode) ++ { ++ infopre_exit->index.seginfo->mode = mode; ++ bitmap_clear_bit (transp_all, pre_exit->index); ++ } + } + } + +-- +2.33.0 +
View file
_service:tar_scm:0137-LoongArch-When-checking-whether-the-assembler-suppor.patch
Added
@@ -0,0 +1,54 @@ +From 3580ce2b8c57967117e55af48beba0aaa6257e8b Mon Sep 17 00:00:00 2001 +From: Lulu Cheng <chenglulu@loongson.cn> +Date: Wed, 21 Feb 2024 11:17:14 +0800 +Subject: PATCH 137/188 LoongArch: When checking whether the assembler + supports conditional branch relaxation, add compilation parameter + "--fatal-warnings" to the assembler. + +In binutils 2.40 and earlier versions, only a warning will be reported +when a relocation immediate value is out of bounds. As a result, +the value of the macro HAVE_AS_COND_BRANCH_RELAXATION will also be +defined as 1 when the assembler does not support conditional branch +relaxation. Therefore, add the compilation option "--fatal-warnings" +to avoid this problem. + +gcc/ChangeLog: + + * configure: Regenerate. + * configure.ac: Add parameter "--fatal-warnings" to assemble + when checking whether the assemble support conditional branch + relaxation. +--- + gcc/configure | 2 +- + gcc/configure.ac | 2 +- + 2 files changed, 2 insertions(+), 2 deletions(-) + +diff --git a/gcc/configure b/gcc/configure +index eecfe60d6..f31395017 100755 +--- a/gcc/configure ++++ b/gcc/configure +@@ -28947,7 +28947,7 @@ else + nop + .endr + beq $a0,$a1,a' > conftest.s +- if { ac_try='$gcc_cv_as $gcc_cv_as_flags -o conftest.o conftest.s >&5' ++ if { ac_try='$gcc_cv_as $gcc_cv_as_flags --fatal-warnings -o conftest.o conftest.s >&5' + { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5 + (eval $ac_try) 2>&5 + ac_status=$? +diff --git a/gcc/configure.ac b/gcc/configure.ac +index d1032440d..35f2c657f 100644 +--- a/gcc/configure.ac ++++ b/gcc/configure.ac +@@ -5349,7 +5349,7 @@ x: + Define if your assembler supports -mrelax option.)) + gcc_GAS_CHECK_FEATURE(conditional branch relaxation support, + gcc_cv_as_loongarch_cond_branch_relax, +- , ++ --fatal-warnings, + a: + .rept 32769 + nop +-- +2.43.0 +
View file
_service:tar_scm:0138-Backport-SME-mode-switching-Allow-targets-to-set-the.patch
Added
@@ -0,0 +1,93 @@ +From 9505464aec8f95125293c64e2eea9577e9be4700 Mon Sep 17 00:00:00 2001 +From: Richard Sandiford <richard.sandiford@arm.com> +Date: Sat, 11 Nov 2023 17:28:57 +0000 +Subject: PATCH 039/157 BackportSME mode-switching: Allow targets to set + the mode for EH handlers + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=4b803fbf839439b1deca660e32d5ced211111dfa + +The mode-switching pass already had hooks to say what mode +an entity is in on entry to a function and what mode it must +be in on return. For SME, we also want to say what mode an +entity is guaranteed to be in on entry to an exception handler. + +gcc/ + * target.def (mode_switching.eh_handler): New hook. + * doc/tm.texi.in (TARGET_MODE_EH_HANDLER): New @hook. + * doc/tm.texi: Regenerate. + * mode-switching.cc (optimize_mode_switching): Use eh_handler + to get the mode on entry to an exception handler. +--- + gcc/doc/tm.texi | 6 ++++++ + gcc/doc/tm.texi.in | 2 ++ + gcc/mode-switching.cc | 5 ++++- + gcc/target.def | 7 +++++++ + 4 files changed, 19 insertions(+), 1 deletion(-) + +diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi +index 553aa4cf2..4788b3f7a 100644 +--- a/gcc/doc/tm.texi ++++ b/gcc/doc/tm.texi +@@ -10321,6 +10321,12 @@ If @code{TARGET_MODE_EXIT} is defined then @code{TARGET_MODE_ENTRY} + must be defined. + @end deftypefn + ++@deftypefn {Target Hook} int TARGET_MODE_EH_HANDLER (int @var{entity}) ++If this hook is defined, it should return the mode that @var{entity} is ++guaranteed to be in on entry to an exception handler, or the number of modes ++if there is no such guarantee. ++@end deftypefn ++ + @deftypefn {Target Hook} int TARGET_MODE_PRIORITY (int @var{entity}, int @var{n}) + This hook specifies the order in which modes for @var{entity} + are processed. 0 is the highest priority, +diff --git a/gcc/doc/tm.texi.in b/gcc/doc/tm.texi.in +index 9ec11b15c..ad343504f 100644 +--- a/gcc/doc/tm.texi.in ++++ b/gcc/doc/tm.texi.in +@@ -6926,6 +6926,8 @@ mode or ``no mode'', depending on context. + + @hook TARGET_MODE_EXIT + ++@hook TARGET_MODE_EH_HANDLER ++ + @hook TARGET_MODE_PRIORITY + + @node Target Attributes +diff --git a/gcc/mode-switching.cc b/gcc/mode-switching.cc +index 4761c2ff0..9a6ba6cca 100644 +--- a/gcc/mode-switching.cc ++++ b/gcc/mode-switching.cc +@@ -596,7 +596,10 @@ optimize_mode_switching (void) + gcc_assert (NOTE_INSN_BASIC_BLOCK_P (ins_pos)); + if (ins_pos != BB_END (bb)) + ins_pos = NEXT_INSN (ins_pos); +- ptr = new_seginfo (no_mode, no_mode, ins_pos, live_now); ++ if (bb_has_eh_pred (bb) ++ && targetm.mode_switching.eh_handler) ++ last_mode = targetm.mode_switching.eh_handler (e); ++ ptr = new_seginfo (no_mode, last_mode, ins_pos, live_now); + add_seginfo (&tail_ptr, ptr); + bitmap_clear_bit (transp_all, bb->index); + } +diff --git a/gcc/target.def b/gcc/target.def +index b87b0f927..bbb482de6 100644 +--- a/gcc/target.def ++++ b/gcc/target.def +@@ -7042,6 +7042,13 @@ If @code{TARGET_MODE_EXIT} is defined then @code{TARGET_MODE_ENTRY}\n\ + must be defined.", + int, (int entity), NULL) + ++DEFHOOK ++(eh_handler, ++ "If this hook is defined, it should return the mode that @var{entity} is\n\ ++guaranteed to be in on entry to an exception handler, or the number of modes\n\ ++if there is no such guarantee.", ++ int, (int entity), NULL) ++ + DEFHOOK + (priority, + "This hook specifies the order in which modes for @var{entity}\n\ +-- +2.33.0 +
View file
_service:tar_scm:0138-LoongArch-Don-t-falsely-claim-gold-supported-in-topl.patch
Added
@@ -0,0 +1,49 @@ +From e6968eb62b2a0adc7ef591594240582630adfc61 Mon Sep 17 00:00:00 2001 +From: Xi Ruoyao <xry111@xry111.site> +Date: Wed, 21 Feb 2024 23:54:53 +0800 +Subject: PATCH 138/188 LoongArch: Don't falsely claim gold supported in + toplevel configure + +The gold linker has never been ported to LoongArch (and it seems +unlikely to be ported in the future as the new architectures are +focusing on lld and/or mold for fast linkers). + +ChangeLog: + + * configure.ac (ENABLE_GOLD): Remove loongarch*-*-* from target + list. + * configure: Regenerate. +--- + configure | 2 +- + configure.ac | 2 +- + 2 files changed, 2 insertions(+), 2 deletions(-) + +diff --git a/configure b/configure +index 81b4a3cec..ebdca8c62 100755 +--- a/configure ++++ b/configure +@@ -3058,7 +3058,7 @@ case "${ENABLE_GOLD}" in + # Check for target supported by gold. + case "${target}" in + i?86-*-* | x86_64-*-* | sparc*-*-* | powerpc*-*-* | arm*-*-* \ +- | aarch64*-*-* | tilegx*-*-* | mips*-*-* | s390*-*-* | loongarch*-*-*) ++ | aarch64*-*-* | tilegx*-*-* | mips*-*-* | s390*-*-*) + configdirs="$configdirs gold" + if test x${ENABLE_GOLD} = xdefault; then + default_ld=gold +diff --git a/configure.ac b/configure.ac +index 9f8dbd319..4f45fd2ba 100644 +--- a/configure.ac ++++ b/configure.ac +@@ -353,7 +353,7 @@ case "${ENABLE_GOLD}" in + # Check for target supported by gold. + case "${target}" in + i?86-*-* | x86_64-*-* | sparc*-*-* | powerpc*-*-* | arm*-*-* \ +- | aarch64*-*-* | tilegx*-*-* | mips*-*-* | s390*-*-* | loongarch*-*-*) ++ | aarch64*-*-* | tilegx*-*-* | mips*-*-* | s390*-*-*) + configdirs="$configdirs gold" + if test x${ENABLE_GOLD} = xdefault; then + default_ld=gold +-- +2.43.0 +
View file
_service:tar_scm:0139-Backport-SME-mode-switching-Pass-set-of-live-registe.patch
Added
@@ -0,0 +1,211 @@ +From a6964e11c7f624cdaed2c9608565a5968292b70f Mon Sep 17 00:00:00 2001 +From: Richard Sandiford <richard.sandiford@arm.com> +Date: Sat, 11 Nov 2023 17:28:58 +0000 +Subject: PATCH 040/157 BackportSME mode-switching: Pass set of live + registers to the needed hook + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=29d3e1892ebec8abce784077d1f1a3e21d763218 + +The emit hook already takes the set of live hard registers as input. +This patch passes it to the needed hook too. SME uses this to +optimise the mode choice based on whether state is live or dead. + +The main caller already had access to the required info, but the +special handling of return values did not. + +gcc/ + * target.def (mode_switching.needed): Add a regs_live parameter. + * doc/tm.texi: Regenerate. + * config/epiphany/epiphany-protos.h (epiphany_mode_needed): Update + accordingly. + * config/epiphany/epiphany.cc (epiphany_mode_needed): Likewise. + * config/epiphany/mode-switch-use.cc (insert_uses): Likewise. + * config/i386/i386.cc (ix86_mode_needed): Likewise. + * config/riscv/riscv.cc (riscv_mode_needed): Likewise. + * config/sh/sh.cc (sh_mode_needed): Likewise. + * mode-switching.cc (optimize_mode_switching): Likewise. + (create_pre_exit): Likewise, using the DF simulate functions + to calculate the required information. +--- + gcc/config/epiphany/epiphany-protos.h | 4 +++- + gcc/config/epiphany/epiphany.cc | 2 +- + gcc/config/epiphany/mode-switch-use.cc | 2 +- + gcc/config/i386/i386.cc | 2 +- + gcc/config/sh/sh.cc | 4 ++-- + gcc/doc/tm.texi | 5 +++-- + gcc/mode-switching.cc | 14 ++++++++++++-- + gcc/target.def | 5 +++-- + 8 files changed, 26 insertions(+), 12 deletions(-) + +diff --git a/gcc/config/epiphany/epiphany-protos.h b/gcc/config/epiphany/epiphany-protos.h +index 61b63234e..d463e5483 100644 +--- a/gcc/config/epiphany/epiphany-protos.h ++++ b/gcc/config/epiphany/epiphany-protos.h +@@ -44,7 +44,9 @@ extern void emit_set_fp_mode (int entity, int mode, int prev_mode, + #endif + extern void epiphany_insert_mode_switch_use (rtx_insn *insn, int, int); + extern void epiphany_expand_set_fp_mode (rtx *operands); +-extern int epiphany_mode_needed (int entity, rtx_insn *insn); ++#ifdef HARD_CONST ++extern int epiphany_mode_needed (int entity, rtx_insn *insn, HARD_REG_SET); ++#endif + extern int epiphany_mode_after (int entity, int last_mode, rtx_insn *insn); + extern bool epiphany_epilogue_uses (int regno); + extern bool epiphany_optimize_mode_switching (int entity); +diff --git a/gcc/config/epiphany/epiphany.cc b/gcc/config/epiphany/epiphany.cc +index f8c049340..be0fbc68c 100644 +--- a/gcc/config/epiphany/epiphany.cc ++++ b/gcc/config/epiphany/epiphany.cc +@@ -2400,7 +2400,7 @@ epiphany_mode_priority (int entity, int priority) + } + + int +-epiphany_mode_needed (int entity, rtx_insn *insn) ++epiphany_mode_needed (int entity, rtx_insn *insn, HARD_REG_SET) + { + enum attr_fp_mode mode; + +diff --git a/gcc/config/epiphany/mode-switch-use.cc b/gcc/config/epiphany/mode-switch-use.cc +index 887550a33..cacb1ce5a 100644 +--- a/gcc/config/epiphany/mode-switch-use.cc ++++ b/gcc/config/epiphany/mode-switch-use.cc +@@ -58,7 +58,7 @@ insert_uses (void) + { + if (!INSN_P (insn)) + continue; +- mode = epiphany_mode_needed (e, insn); ++ mode = epiphany_mode_needed (e, insn, {}); + if (mode == no_mode) + continue; + if (target_insert_mode_switch_use) +diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc +index 60f3296b0..4d591d217 100644 +--- a/gcc/config/i386/i386.cc ++++ b/gcc/config/i386/i386.cc +@@ -14522,7 +14522,7 @@ ix86_i387_mode_needed (int entity, rtx_insn *insn) + prior to the execution of insn. */ + + static int +-ix86_mode_needed (int entity, rtx_insn *insn) ++ix86_mode_needed (int entity, rtx_insn *insn, HARD_REG_SET) + { + switch (entity) + { +diff --git a/gcc/config/sh/sh.cc b/gcc/config/sh/sh.cc +index 03e1c04ec..85e83e12e 100644 +--- a/gcc/config/sh/sh.cc ++++ b/gcc/config/sh/sh.cc +@@ -195,7 +195,7 @@ static int calc_live_regs (HARD_REG_SET *); + static HOST_WIDE_INT rounded_frame_size (int); + static bool sh_frame_pointer_required (void); + static void sh_emit_mode_set (int, int, int, HARD_REG_SET); +-static int sh_mode_needed (int, rtx_insn *); ++static int sh_mode_needed (int, rtx_insn *, HARD_REG_SET); + static int sh_mode_after (int, int, rtx_insn *); + static int sh_mode_entry (int); + static int sh_mode_exit (int); +@@ -12529,7 +12529,7 @@ sh_emit_mode_set (int entity ATTRIBUTE_UNUSED, int mode, + } + + static int +-sh_mode_needed (int entity ATTRIBUTE_UNUSED, rtx_insn *insn) ++sh_mode_needed (int entity ATTRIBUTE_UNUSED, rtx_insn *insn, HARD_REG_SET) + { + return recog_memoized (insn) >= 0 ? get_attr_fp_mode (insn) : FP_MODE_NONE; + } +diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi +index 4788b3f7a..d8ac6c4d6 100644 +--- a/gcc/doc/tm.texi ++++ b/gcc/doc/tm.texi +@@ -10280,12 +10280,13 @@ known. Sets of a lower numbered entity will be emitted before + sets of a higher numbered entity to a mode of the same or lower priority. + @end deftypefn + +-@deftypefn {Target Hook} int TARGET_MODE_NEEDED (int @var{entity}, rtx_insn *@var{insn}) ++@deftypefn {Target Hook} int TARGET_MODE_NEEDED (int @var{entity}, rtx_insn *@var{insn}, HARD_REG_SET @var{regs_live}) + @var{entity} is an integer specifying a mode-switched entity. + If @code{OPTIMIZE_MODE_SWITCHING} is defined, you must define this hook + to return the mode that @var{entity} must be switched into prior to the + execution of @var{insn}, or the number of modes if @var{insn} has no +-such requirement. ++such requirement. @var{regs_live} contains the set of hard registers ++that are live before @var{insn}. + @end deftypefn + + @deftypefn {Target Hook} int TARGET_MODE_AFTER (int @var{entity}, int @var{mode}, rtx_insn *@var{insn}) +diff --git a/gcc/mode-switching.cc b/gcc/mode-switching.cc +index 9a6ba6cca..6bbda5058 100644 +--- a/gcc/mode-switching.cc ++++ b/gcc/mode-switching.cc +@@ -254,6 +254,9 @@ create_pre_exit (int n_entities, int *entity_map, const int *num_modes) + && GET_CODE (PATTERN (last_insn)) == USE + && GET_CODE ((ret_reg = XEXP (PATTERN (last_insn), 0))) == REG) + { ++ auto_bitmap live; ++ df_simulate_initialize_backwards (src_bb, live); ++ + int ret_start = REGNO (ret_reg); + int nregs = REG_NREGS (ret_reg); + int ret_end = ret_start + nregs; +@@ -262,6 +265,8 @@ create_pre_exit (int n_entities, int *entity_map, const int *num_modes) + bool forced_late_switch = false; + rtx_insn *before_return_copy; + ++ df_simulate_one_insn_backwards (src_bb, last_insn, live); ++ + do + { + rtx_insn *return_copy = PREV_INSN (last_insn); +@@ -269,6 +274,8 @@ create_pre_exit (int n_entities, int *entity_map, const int *num_modes) + int copy_start, copy_num; + int j; + ++ df_simulate_one_insn_backwards (src_bb, return_copy, live); ++ + if (NONDEBUG_INSN_P (return_copy)) + { + /* When using SJLJ exceptions, the call to the +@@ -368,11 +375,14 @@ create_pre_exit (int n_entities, int *entity_map, const int *num_modes) + the case for floating point on SH4 - then it might + be set by an arithmetic operation that needs a + different mode than the exit block. */ ++ HARD_REG_SET hard_regs_live; ++ REG_SET_TO_HARD_REG_SET (hard_regs_live, live); + for (j = n_entities - 1; j >= 0; j--) + { + int e = entity_mapj; + int mode = +- targetm.mode_switching.needed (e, return_copy); ++ targetm.mode_switching.needed (e, return_copy, ++ hard_regs_live); + + if (mode != num_modese + && mode != targetm.mode_switching.exit (e)) +@@ -609,7 +619,7 @@ optimize_mode_switching (void) + { + if (INSN_P (insn)) + { +- int mode = targetm.mode_switching.needed (e, insn); ++ int mode = targetm.mode_switching.needed (e, insn, live_now); + rtx link; + + if (mode != no_mode && mode != last_mode) +diff --git a/gcc/target.def b/gcc/target.def +index bbb482de6..06a52bdaf 100644 +--- a/gcc/target.def ++++ b/gcc/target.def +@@ -7003,8 +7003,9 @@ DEFHOOK + If @code{OPTIMIZE_MODE_SWITCHING} is defined, you must define this hook\n\ + to return the mode that @var{entity} must be switched into prior to the\n\ + execution of @var{insn}, or the number of modes if @var{insn} has no\n\ +-such requirement.", +- int, (int entity, rtx_insn *insn), NULL) ++such requirement. @var{regs_live} contains the set of hard registers\n\ ++that are live before @var{insn}.", ++ int, (int entity, rtx_insn *insn, HARD_REG_SET regs_live), NULL) + + DEFHOOK + (after, +-- +2.33.0 +
View file
_service:tar_scm:0139-LoongArch-NFC-Deduplicate-crc-instruction-defines.patch
Added
@@ -0,0 +1,56 @@ +From 7a4761a31454f999331e8aa5f831e26e249c4295 Mon Sep 17 00:00:00 2001 +From: Xi Ruoyao <xry111@xry111.site> +Date: Sun, 25 Feb 2024 20:40:41 +0800 +Subject: PATCH 139/188 LoongArch: NFC: Deduplicate crc instruction defines + +Introduce an iterator for UNSPEC_CRC and UNSPEC_CRCC to make the next +change easier. + +gcc/ChangeLog: + + * config/loongarch/loongarch.md (CRC): New define_int_iterator. + (crc): New define_int_attr. + (loongarch_crc_w_<size>_w, loongarch_crcc_w_<size>_w): Unify + into ... + (loongarch_<crc>_w_<size>_w): ... here. +--- + gcc/config/loongarch/loongarch.md | 18 +++++------------- + 1 file changed, 5 insertions(+), 13 deletions(-) + +diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md +index 9356194fe..b5ad9eada 100644 +--- a/gcc/config/loongarch/loongarch.md ++++ b/gcc/config/loongarch/loongarch.md +@@ -4251,24 +4251,16 @@ + + + (define_mode_iterator QHSD QI HI SI DI) ++(define_int_iterator CRC UNSPEC_CRC UNSPEC_CRCC) ++(define_int_attr crc (UNSPEC_CRC "crc") (UNSPEC_CRCC "crcc")) + +-(define_insn "loongarch_crc_w_<size>_w" ++(define_insn "loongarch_<crc>_w_<size>_w" + (set (match_operand:SI 0 "register_operand" "=r") + (unspec:SI (match_operand:QHSD 1 "register_operand" "r") + (match_operand:SI 2 "register_operand" "r") +- UNSPEC_CRC)) ++ CRC)) + "" +- "crc.w.<size>.w\t%0,%1,%2" +- (set_attr "type" "unknown") +- (set_attr "mode" "<MODE>")) +- +-(define_insn "loongarch_crcc_w_<size>_w" +- (set (match_operand:SI 0 "register_operand" "=r") +- (unspec:SI (match_operand:QHSD 1 "register_operand" "r") +- (match_operand:SI 2 "register_operand" "r") +- UNSPEC_CRCC)) +- "" +- "crcc.w.<size>.w\t%0,%1,%2" ++ "<crc>.w.<size>.w\t%0,%1,%2" + (set_attr "type" "unknown") + (set_attr "mode" "<MODE>")) + +-- +2.43.0 +
View file
_service:tar_scm:0140-Backport-SME-mode-switching-Pass-the-set-of-live-reg.patch
Added
@@ -0,0 +1,177 @@ +From 4457604c11c0a32f3736d73429d1e5fb7baae3a5 Mon Sep 17 00:00:00 2001 +From: Richard Sandiford <richard.sandiford@arm.com> +Date: Sat, 11 Nov 2023 17:28:58 +0000 +Subject: PATCH 041/157 BackportSME mode-switching: Pass the set of live + registers to the after hook + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=93d65f39bc5c3dc318deb6da0e3633f3a4c6c34d + +This patch passes the set of live hard registers to the after hook, +like the previous one did for the needed hook. + +gcc/ + * target.def (mode_switching.after): Add a regs_live parameter. + * doc/tm.texi: Regenerate. + * config/epiphany/epiphany-protos.h (epiphany_mode_after): Update + accordingly. + * config/epiphany/epiphany.cc (epiphany_mode_needed): Likewise. + (epiphany_mode_after): Likewise. + * config/i386/i386.cc (ix86_mode_after): Likewise. + * config/riscv/riscv.cc (riscv_mode_after): Likewise. + * config/sh/sh.cc (sh_mode_after): Likewise. + * mode-switching.cc (optimize_mode_switching): Likewise. +--- + gcc/config/epiphany/epiphany-protos.h | 3 ++- + gcc/config/epiphany/epiphany.cc | 5 +++-- + gcc/config/i386/i386.cc | 2 +- + gcc/config/sh/sh.cc | 5 +++-- + gcc/doc/tm.texi | 4 +++- + gcc/mode-switching.cc | 8 ++++---- + gcc/target.def | 4 +++- + 7 files changed, 19 insertions(+), 12 deletions(-) + +diff --git a/gcc/config/epiphany/epiphany-protos.h b/gcc/config/epiphany/epiphany-protos.h +index d463e5483..6326b7e80 100644 +--- a/gcc/config/epiphany/epiphany-protos.h ++++ b/gcc/config/epiphany/epiphany-protos.h +@@ -46,8 +46,9 @@ extern void epiphany_insert_mode_switch_use (rtx_insn *insn, int, int); + extern void epiphany_expand_set_fp_mode (rtx *operands); + #ifdef HARD_CONST + extern int epiphany_mode_needed (int entity, rtx_insn *insn, HARD_REG_SET); ++extern int epiphany_mode_after (int entity, int last_mode, rtx_insn *insn, ++ HARD_REG_SET); + #endif +-extern int epiphany_mode_after (int entity, int last_mode, rtx_insn *insn); + extern bool epiphany_epilogue_uses (int regno); + extern bool epiphany_optimize_mode_switching (int entity); + extern bool epiphany_is_interrupt_p (tree); +diff --git a/gcc/config/epiphany/epiphany.cc b/gcc/config/epiphany/epiphany.cc +index be0fbc68c..62636b1ec 100644 +--- a/gcc/config/epiphany/epiphany.cc ++++ b/gcc/config/epiphany/epiphany.cc +@@ -2437,7 +2437,7 @@ epiphany_mode_needed (int entity, rtx_insn *insn, HARD_REG_SET) + return 2; + case EPIPHANY_MSW_ENTITY_ROUND_KNOWN: + if (recog_memoized (insn) == CODE_FOR_set_fp_mode) +- mode = (enum attr_fp_mode) epiphany_mode_after (entity, mode, insn); ++ mode = (enum attr_fp_mode) epiphany_mode_after (entity, mode, insn, {}); + /* Fall through. */ + case EPIPHANY_MSW_ENTITY_NEAREST: + case EPIPHANY_MSW_ENTITY_TRUNC: +@@ -2498,7 +2498,8 @@ epiphany_mode_entry_exit (int entity, bool exit) + } + + int +-epiphany_mode_after (int entity, int last_mode, rtx_insn *insn) ++epiphany_mode_after (int entity, int last_mode, rtx_insn *insn, ++ HARD_REG_SET) + { + /* We have too few call-saved registers to hope to keep the masks across + calls. */ +diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc +index 4d591d217..593185fa6 100644 +--- a/gcc/config/i386/i386.cc ++++ b/gcc/config/i386/i386.cc +@@ -14583,7 +14583,7 @@ ix86_avx_u128_mode_after (int mode, rtx_insn *insn) + /* Return the mode that an insn results in. */ + + static int +-ix86_mode_after (int entity, int mode, rtx_insn *insn) ++ix86_mode_after (int entity, int mode, rtx_insn *insn, HARD_REG_SET) + { + switch (entity) + { +diff --git a/gcc/config/sh/sh.cc b/gcc/config/sh/sh.cc +index 85e83e12e..74d61c43b 100644 +--- a/gcc/config/sh/sh.cc ++++ b/gcc/config/sh/sh.cc +@@ -196,7 +196,7 @@ static HOST_WIDE_INT rounded_frame_size (int); + static bool sh_frame_pointer_required (void); + static void sh_emit_mode_set (int, int, int, HARD_REG_SET); + static int sh_mode_needed (int, rtx_insn *, HARD_REG_SET); +-static int sh_mode_after (int, int, rtx_insn *); ++static int sh_mode_after (int, int, rtx_insn *, HARD_REG_SET); + static int sh_mode_entry (int); + static int sh_mode_exit (int); + static int sh_mode_priority (int entity, int n); +@@ -12535,7 +12535,8 @@ sh_mode_needed (int entity ATTRIBUTE_UNUSED, rtx_insn *insn, HARD_REG_SET) + } + + static int +-sh_mode_after (int entity ATTRIBUTE_UNUSED, int mode, rtx_insn *insn) ++sh_mode_after (int entity ATTRIBUTE_UNUSED, int mode, rtx_insn *insn, ++ HARD_REG_SET) + { + if (TARGET_HITACHI && recog_memoized (insn) >= 0 && + get_attr_fp_set (insn) != FP_SET_NONE) +diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi +index d8ac6c4d6..7fce485b2 100644 +--- a/gcc/doc/tm.texi ++++ b/gcc/doc/tm.texi +@@ -10289,12 +10289,14 @@ such requirement. @var{regs_live} contains the set of hard registers + that are live before @var{insn}. + @end deftypefn + +-@deftypefn {Target Hook} int TARGET_MODE_AFTER (int @var{entity}, int @var{mode}, rtx_insn *@var{insn}) ++@deftypefn {Target Hook} int TARGET_MODE_AFTER (int @var{entity}, int @var{mode}, rtx_insn *@var{insn}, HARD_REG_SET @var{regs_live}) + @var{entity} is an integer specifying a mode-switched entity. + If this hook is defined, it is evaluated for every @var{insn} during mode + switching. It returns the mode that @var{entity} is in after @var{insn} + has been executed. @var{mode} is the mode that @var{entity} was in + before @var{insn} was executed, taking account of @var{TARGET_MODE_NEEDED}. ++@var{regs_live} is the set of hard registers that are live after @var{insn} ++has been executed. + + @var{mode} is equal to the number of modes defined for @var{entity} + if the mode before @var{insn} is unknown. The hook should likewise return +diff --git a/gcc/mode-switching.cc b/gcc/mode-switching.cc +index 6bbda5058..4f0445894 100644 +--- a/gcc/mode-switching.cc ++++ b/gcc/mode-switching.cc +@@ -631,10 +631,6 @@ optimize_mode_switching (void) + last_mode = mode; + } + +- if (targetm.mode_switching.after) +- last_mode = targetm.mode_switching.after (e, last_mode, +- insn); +- + /* Update LIVE_NOW. */ + for (link = REG_NOTES (insn); link; link = XEXP (link, 1)) + if (REG_NOTE_KIND (link) == REG_DEAD) +@@ -644,6 +640,10 @@ optimize_mode_switching (void) + for (link = REG_NOTES (insn); link; link = XEXP (link, 1)) + if (REG_NOTE_KIND (link) == REG_UNUSED) + reg_dies (XEXP (link, 0), &live_now); ++ ++ if (targetm.mode_switching.after) ++ last_mode = targetm.mode_switching.after (e, last_mode, ++ insn, live_now); + } + } + +diff --git a/gcc/target.def b/gcc/target.def +index 06a52bdaf..67c20bbb0 100644 +--- a/gcc/target.def ++++ b/gcc/target.def +@@ -7014,6 +7014,8 @@ If this hook is defined, it is evaluated for every @var{insn} during mode\n\ + switching. It returns the mode that @var{entity} is in after @var{insn}\n\ + has been executed. @var{mode} is the mode that @var{entity} was in\n\ + before @var{insn} was executed, taking account of @var{TARGET_MODE_NEEDED}.\n\ ++@var{regs_live} is the set of hard registers that are live after @var{insn}\n\ ++has been executed.\n\ + \n\ + @var{mode} is equal to the number of modes defined for @var{entity}\n\ + if the mode before @var{insn} is unknown. The hook should likewise return\n\ +@@ -7021,7 +7023,7 @@ the number of modes if it does not know what mode @var{entity} has after\n\ + @var{insn}.\n\ + \n\ + Not defining the hook is equivalent to returning @var{mode}.", +- int, (int entity, int mode, rtx_insn *insn), NULL) ++ int, (int entity, int mode, rtx_insn *insn, HARD_REG_SET regs_live), NULL) + + DEFHOOK + (entry, +-- +2.33.0 +
View file
_service:tar_scm:0140-LoongArch-Remove-unneeded-sign-extension-after-crc-c.patch
Added
@@ -0,0 +1,70 @@ +From 946f9153a5d813301b05fb56a75e2c7ce22a6c2a Mon Sep 17 00:00:00 2001 +From: Xi Ruoyao <xry111@xry111.site> +Date: Sun, 25 Feb 2024 20:44:34 +0800 +Subject: PATCH 140/188 LoongArch: Remove unneeded sign extension after + crc/crcc instructions + +The specification of crc/crcc instructions is clear that the output is +sign-extended to GRLEN. Add a define_insn to tell the compiler this +fact and allow it to remove the unneeded sign extension on crc/crcc +output. As crc/crcc instructions are usually used in a tight loop, +this should produce a significant performance gain. + +gcc/ChangeLog: + + * config/loongarch/loongarch.md + (loongarch_<crc>_w_<size>_w_extended): New define_insn. + +gcc/testsuite/ChangeLog: + + * gcc.target/loongarch/crc-sext.c: New test; +--- + gcc/config/loongarch/loongarch.md | 11 +++++++++++ + gcc/testsuite/gcc.target/loongarch/crc-sext.c | 13 +++++++++++++ + 2 files changed, 24 insertions(+) + create mode 100644 gcc/testsuite/gcc.target/loongarch/crc-sext.c + +diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md +index b5ad9eada..248ad12bb 100644 +--- a/gcc/config/loongarch/loongarch.md ++++ b/gcc/config/loongarch/loongarch.md +@@ -4264,6 +4264,17 @@ + (set_attr "type" "unknown") + (set_attr "mode" "<MODE>")) + ++(define_insn "loongarch_<crc>_w_<size>_w_extended" ++ (set (match_operand:DI 0 "register_operand" "=r") ++ (sign_extend:DI ++ (unspec:SI (match_operand:QHSD 1 "register_operand" "r") ++ (match_operand:SI 2 "register_operand" "r") ++ CRC))) ++ "TARGET_64BIT" ++ "<crc>.w.<size>.w\t%0,%1,%2" ++ (set_attr "type" "unknown") ++ (set_attr "mode" "<MODE>")) ++ + ;; With normal or medium code models, if the only use of a pc-relative + ;; address is for loading or storing a value, then relying on linker + ;; relaxation is not better than emitting the machine instruction directly. +diff --git a/gcc/testsuite/gcc.target/loongarch/crc-sext.c b/gcc/testsuite/gcc.target/loongarch/crc-sext.c +new file mode 100644 +index 000000000..9ade5a8e4 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/crc-sext.c +@@ -0,0 +1,13 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -march=loongarch64" } */ ++/* { dg-final { check-function-bodies "**" "" } } */ ++ ++/* ++**my_crc: ++** crc.w.d.w \$r4,\$r4,\$r5 ++** jr \$r1 ++*/ ++int my_crc(long long dword, int crc) ++{ ++ return __builtin_loongarch_crc_w_d_w(dword, crc); ++} +-- +2.43.0 +
View file
_service:tar_scm:0141-Backport-SME-mode-switching-Use-1-based-edge-aux-fie.patch
Added
@@ -0,0 +1,56 @@ +From b0d3536b2a28d3a7084e3bbb9532e719aaf2016b Mon Sep 17 00:00:00 2001 +From: Richard Sandiford <richard.sandiford@arm.com> +Date: Sat, 11 Nov 2023 17:28:59 +0000 +Subject: PATCH 042/157 BackportSME mode-switching: Use 1-based edge aux + fields + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=718228a6f479fe252e0e6f71933c2465b7b480a9 + +The pass used the edge aux field to record which mode change +should happen on the edge, with -1 meaning "none". It's more +convenient for later patches to leave aux zero for "none", +and use numbers based at 1 to record a change. + +gcc/ + * mode-switching.cc (commit_mode_sets): Use 1-based edge aux values. +--- + gcc/mode-switching.cc | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + +diff --git a/gcc/mode-switching.cc b/gcc/mode-switching.cc +index 4f0445894..89a8494c6 100644 +--- a/gcc/mode-switching.cc ++++ b/gcc/mode-switching.cc +@@ -106,10 +106,10 @@ commit_mode_sets (struct edge_list *edge_list, int e, struct bb_info *info) + for (int ed = NUM_EDGES (edge_list) - 1; ed >= 0; ed--) + { + edge eg = INDEX_EDGE (edge_list, ed); +- int mode; + +- if ((mode = (int)(intptr_t)(eg->aux)) != -1) ++ if (eg->aux) + { ++ int mode = (int) (intptr_t) eg->aux - 1; + HARD_REG_SET live_at_edge; + basic_block src_bb = eg->src; + int cur_mode = infosrc_bb->index.mode_out; +@@ -727,14 +727,14 @@ optimize_mode_switching (void) + { + edge eg = INDEX_EDGE (edge_list, ed); + +- eg->aux = (void *)(intptr_t)-1; ++ eg->aux = (void *) (intptr_t) 0; + + for (i = 0; i < no_mode; i++) + { + int m = targetm.mode_switching.priority (entity_mapj, i); + if (mode_bit_p (inserted, j, m)) + { +- eg->aux = (void *)(intptr_t)m; ++ eg->aux = (void *) (intptr_t) (m + 1); + break; + } + } +-- +2.33.0 +
View file
_service:tar_scm:0141-LoongArch-Allow-s9-as-a-register-alias.patch
Added
@@ -0,0 +1,45 @@ +From a74a85ed5f5b00018553d614b4dc57eb1dd5f5ee Mon Sep 17 00:00:00 2001 +From: Xi Ruoyao <xry111@xry111.site> +Date: Thu, 25 Jan 2024 23:49:13 +0800 +Subject: PATCH 141/188 LoongArch: Allow s9 as a register alias + +The psABI allows using s9 as an alias of r22. + +gcc/ChangeLog: + + * config/loongarch/loongarch.h (ADDITIONAL_REGISTER_NAMES): Add + s9 as an alias of r22. + +gcc/testsuite/ChangeLog: + + * gcc.target/loongarch/regname-fp-s9.c: New test. +--- + gcc/config/loongarch/loongarch.h | 1 + + gcc/testsuite/gcc.target/loongarch/regname-fp-s9.c | 3 +++ + 2 files changed, 4 insertions(+) + create mode 100644 gcc/testsuite/gcc.target/loongarch/regname-fp-s9.c + +diff --git a/gcc/config/loongarch/loongarch.h b/gcc/config/loongarch/loongarch.h +index 15261fdc0..8bcdb8729 100644 +--- a/gcc/config/loongarch/loongarch.h ++++ b/gcc/config/loongarch/loongarch.h +@@ -931,6 +931,7 @@ typedef struct { + { "t8", 20 + GP_REG_FIRST }, \ + { "x", 21 + GP_REG_FIRST }, \ + { "fp", 22 + GP_REG_FIRST }, \ ++ { "s9", 22 + GP_REG_FIRST }, \ + { "s0", 23 + GP_REG_FIRST }, \ + { "s1", 24 + GP_REG_FIRST }, \ + { "s2", 25 + GP_REG_FIRST }, \ +diff --git a/gcc/testsuite/gcc.target/loongarch/regname-fp-s9.c b/gcc/testsuite/gcc.target/loongarch/regname-fp-s9.c +new file mode 100644 +index 000000000..d2e3b80f8 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/regname-fp-s9.c +@@ -0,0 +1,3 @@ ++/* { dg-do compile } */ ++register long s9 asm("s9"); /* { dg-note "conflicts with 's9'" } */ ++register long fp asm("fp"); /* { dg-warning "register of 'fp' used for multiple global register variables" } */ +-- +2.43.0 +
View file
_service:tar_scm:0142-Backport-SME-mode-switching-Add-a-target-configurabl.patch
Added
@@ -0,0 +1,337 @@ +From 88d76baa38bb29d5cc732b3c0188b74ef9783713 Mon Sep 17 00:00:00 2001 +From: Richard Sandiford <richard.sandiford@arm.com> +Date: Sat, 11 Nov 2023 17:28:59 +0000 +Subject: PATCH 043/157 BackportSME mode-switching: Add a + target-configurable confluence operator + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=493b0038d7d04986c7de977074d095e4eb7d9a27 + +The mode-switching pass assumed that all of an entity's modes +were mutually exclusive. However, the upcoming SME changes +have an entity with some overlapping modes, so that there is +sometimes a "superunion" mode that contains two given modes. +We can use this relationship to pass something more helpful than +"don't know" to the emit hook. + +This patch adds a new hook that targets can use to specify +a mode confluence operator. + +With mutually exclusive modes, it's possible to compute a block's +incoming and outgoing modes by looking at its availability sets. +With the confluence operator, we instead need to solve a full +dataflow problem. + +However, when emitting a mode transition, the upcoming SME use of +mode-switching benefits from having as much information as possible +about the starting mode. Calculating this information is definitely +worth the compile time. + +The dataflow problem is written to work before and after the LCM +problem has been solved. A later patch makes use of this. + +While there (since git blame would ping me for the reindented code), +I used a lambda to avoid the cut-&-pasted loops. + +gcc/ + * target.def (mode_switching.confluence): New hook. + * doc/tm.texi (TARGET_MODE_CONFLUENCE): New @hook. + * doc/tm.texi.in: Regenerate. + * mode-switching.cc (confluence_info): New variable. + (mode_confluence, forward_confluence_n, forward_transfer): New + functions. + (optimize_mode_switching): Use them to calculate mode_in when + TARGET_MODE_CONFLUENCE is defined. +--- + gcc/doc/tm.texi | 16 ++++ + gcc/doc/tm.texi.in | 2 + + gcc/mode-switching.cc | 179 +++++++++++++++++++++++++++++++++++------- + gcc/target.def | 17 ++++ + 4 files changed, 186 insertions(+), 28 deletions(-) + +diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi +index 7fce485b2..d7053ec9e 100644 +--- a/gcc/doc/tm.texi ++++ b/gcc/doc/tm.texi +@@ -10306,6 +10306,22 @@ the number of modes if it does not know what mode @var{entity} has after + Not defining the hook is equivalent to returning @var{mode}. + @end deftypefn + ++@deftypefn {Target Hook} int TARGET_MODE_CONFLUENCE (int @var{entity}, int @var{mode1}, int @var{mode2}) ++By default, the mode-switching pass assumes that a given entity's modes ++are mutually exclusive. This means that the pass can only tell ++@code{TARGET_MODE_EMIT} about an entity's previous mode if all ++incoming paths of execution leave the entity in the same state. ++ ++However, some entities might have overlapping, non-exclusive modes, ++so that it is sometimes possible to represent ``mode @var{mode1} or mode ++@var{mode2}'' with something more specific than ``mode not known''. ++If this is true for at least one entity, you should define this hook ++and make it return a mode that includes @var{mode1} and @var{mode2} ++as possibilities. (The mode can include other possibilities too.) ++The hook should return the number of modes if no suitable mode exists ++for the given arguments. ++@end deftypefn ++ + @deftypefn {Target Hook} int TARGET_MODE_ENTRY (int @var{entity}) + If this hook is defined, it is evaluated for every @var{entity} that + needs mode switching. It should return the mode that @var{entity} is +diff --git a/gcc/doc/tm.texi.in b/gcc/doc/tm.texi.in +index ad343504f..d420e62fd 100644 +--- a/gcc/doc/tm.texi.in ++++ b/gcc/doc/tm.texi.in +@@ -6922,6 +6922,8 @@ mode or ``no mode'', depending on context. + + @hook TARGET_MODE_AFTER + ++@hook TARGET_MODE_CONFLUENCE ++ + @hook TARGET_MODE_ENTRY + + @hook TARGET_MODE_EXIT +diff --git a/gcc/mode-switching.cc b/gcc/mode-switching.cc +index 89a8494c6..065767902 100644 +--- a/gcc/mode-switching.cc ++++ b/gcc/mode-switching.cc +@@ -484,6 +484,101 @@ create_pre_exit (int n_entities, int *entity_map, const int *num_modes) + return pre_exit; + } + ++/* Return the confluence of modes MODE1 and MODE2 for entity ENTITY, ++ using NO_MODE to represent an unknown mode if nothing more precise ++ is available. */ ++ ++int ++mode_confluence (int entity, int mode1, int mode2, int no_mode) ++{ ++ if (mode1 == mode2) ++ return mode1; ++ ++ if (mode1 != no_mode ++ && mode2 != no_mode ++ && targetm.mode_switching.confluence) ++ return targetm.mode_switching.confluence (entity, mode1, mode2); ++ ++ return no_mode; ++} ++ ++/* Information for the dataflow problems below. */ ++struct ++{ ++ /* Information about each basic block, indexed by block id. */ ++ struct bb_info *bb_info; ++ ++ /* The entity that we're processing. */ ++ int entity; ++ ++ /* The number of modes defined for the entity, and thus the identifier ++ of the "don't know" mode. */ ++ int no_mode; ++} confluence_info; ++ ++/* Propagate information about any mode change on edge E to the ++ destination block's mode_in. Return true if something changed. ++ ++ The mode_in and mode_out fields use no_mode + 1 to mean "not yet set". */ ++ ++static bool ++forward_confluence_n (edge e) ++{ ++ /* The entry and exit blocks have no useful mode information. */ ++ if (e->src->index == ENTRY_BLOCK || e->dest->index == EXIT_BLOCK) ++ return false; ++ ++ /* We don't control mode changes across abnormal edges. */ ++ if (e->flags & EDGE_ABNORMAL) ++ return false; ++ ++ /* E->aux is nonzero if we have computed the LCM problem and scheduled ++ E to change the mode to E->aux - 1. Otherwise model the change ++ from the source to the destination. */ ++ struct bb_info *bb_info = confluence_info.bb_info; ++ int no_mode = confluence_info.no_mode; ++ int src_mode = bb_infoe->src->index.mode_out; ++ if (e->aux) ++ src_mode = (int) (intptr_t) e->aux - 1; ++ if (src_mode == no_mode + 1) ++ return false; ++ ++ int dest_mode = bb_infoe->dest->index.mode_in; ++ if (dest_mode == no_mode + 1) ++ { ++ bb_infoe->dest->index.mode_in = src_mode; ++ return true; ++ } ++ ++ int entity = confluence_info.entity; ++ int new_mode = mode_confluence (entity, src_mode, dest_mode, no_mode); ++ if (dest_mode == new_mode) ++ return false; ++ ++ bb_infoe->dest->index.mode_in = new_mode; ++ return true; ++} ++ ++/* Update block BB_INDEX's mode_out based on its mode_in. Return true if ++ something changed. */ ++ ++static bool ++forward_transfer (int bb_index) ++{ ++ /* The entry and exit blocks have no useful mode information. */ ++ if (bb_index == ENTRY_BLOCK || bb_index == EXIT_BLOCK) ++ return false; ++ ++ /* Only propagate through a block if the entity is transparent. */ ++ struct bb_info *bb_info = confluence_info.bb_info; ++ if (bb_infobb_index.computing != confluence_info.no_mode ++ || bb_infobb_index.mode_out == bb_infobb_index.mode_in) ++ return false; ++ ++ bb_infobb_index.mode_out = bb_infobb_index.mode_in; ++ return true; ++} ++ + /* Find all insns that need a particular mode setting, and insert the + necessary mode switches. Return true if we did work. */ + +@@ -567,6 +662,39 @@ optimize_mode_switching (void) + + auto_sbitmap transp_all (last_basic_block_for_fn (cfun)); + ++ auto_bitmap blocks; ++ ++ /* Forward-propagate mode information through blocks where the entity ++ is transparent, so that mode_in describes the mode on entry to each ++ block and mode_out describes the mode on exit from each block. */ ++ auto forwprop_mode_info = &(struct bb_info *info, ++ int entity, int no_mode) ++ { ++ /* Use no_mode + 1 to mean "not yet set". */ ++ FOR_EACH_BB_FN (bb, cfun) ++ { ++ if (bb_has_abnormal_pred (bb)) ++ infobb->index.mode_in = infobb->index.seginfo->mode; ++ else ++ infobb->index.mode_in = no_mode + 1; ++ if (infobb->index.computing != no_mode) ++ infobb->index.mode_out = infobb->index.computing; ++ else ++ infobb->index.mode_out = no_mode + 1; ++ } ++ ++ confluence_info.bb_info = info; ++ confluence_info.entity = entity; ++ confluence_info.no_mode = no_mode; ++ ++ bitmap_set_range (blocks, 0, last_basic_block_for_fn (cfun)); ++ df_simple_dataflow (DF_FORWARD, NULL, NULL, forward_confluence_n, ++ forward_transfer, blocks, ++ df_get_postorder (DF_FORWARD), ++ df_get_n_blocks (DF_FORWARD)); ++ ++ }; ++ + for (j = n_entities - 1; j >= 0; j--) + { + int e = entity_mapj; +@@ -720,6 +848,7 @@ optimize_mode_switching (void) + for (j = n_entities - 1; j >= 0; j--) + { + int no_mode = num_modesentity_mapj; ++ struct bb_info *info = bb_infoj; + + /* Insert all mode sets that have been inserted by lcm. */ + +@@ -740,39 +869,33 @@ optimize_mode_switching (void) + } + } + ++ /* mode_in and mode_out can be calculated directly from avin and ++ avout if all the modes are mutually exclusive. Use the target- ++ provided confluence function otherwise. */ ++ if (targetm.mode_switching.confluence) ++ forwprop_mode_info (info, entity_mapj, no_mode); ++ + FOR_EACH_BB_FN (bb, cfun) + { +- struct bb_info *info = bb_infoj; +- int last_mode = no_mode; +- +- /* intialize mode in availability for bb. */ +- for (i = 0; i < no_mode; i++) +- if (mode_bit_p (avoutbb->index, j, i)) +- { +- if (last_mode == no_mode) +- last_mode = i; +- if (last_mode != i) ++ auto modes_confluence = &(sbitmap *av) ++ { ++ for (int i = 0; i < no_mode; ++i) ++ if (mode_bit_p (avbb->index, j, i)) + { +- last_mode = no_mode; +- break; ++ for (int i2 = i + 1; i2 < no_mode; ++i2) ++ if (mode_bit_p (avbb->index, j, i2)) ++ return no_mode; ++ return i; + } +- } +- infobb->index.mode_out = last_mode; ++ return no_mode; ++ }; + +- /* intialize mode out availability for bb. */ +- last_mode = no_mode; +- for (i = 0; i < no_mode; i++) +- if (mode_bit_p (avinbb->index, j, i)) +- { +- if (last_mode == no_mode) +- last_mode = i; +- if (last_mode != i) +- { +- last_mode = no_mode; +- break; +- } +- } +- infobb->index.mode_in = last_mode; ++ /* intialize mode in/out availability for bb. */ ++ if (!targetm.mode_switching.confluence) ++ { ++ infobb->index.mode_out = modes_confluence (avout); ++ infobb->index.mode_in = modes_confluence (avin); ++ } + + for (i = 0; i < no_mode; i++) + if (mode_bit_p (delbb->index, j, i)) +diff --git a/gcc/target.def b/gcc/target.def +index 67c20bbb0..1e2091ed3 100644 +--- a/gcc/target.def ++++ b/gcc/target.def +@@ -7025,6 +7025,23 @@ the number of modes if it does not know what mode @var{entity} has after\n\ + Not defining the hook is equivalent to returning @var{mode}.", + int, (int entity, int mode, rtx_insn *insn, HARD_REG_SET regs_live), NULL) + ++DEFHOOK ++(confluence, ++ "By default, the mode-switching pass assumes that a given entity's modes\n\ ++are mutually exclusive. This means that the pass can only tell\n\ ++@code{TARGET_MODE_EMIT} about an entity's previous mode if all\n\ ++incoming paths of execution leave the entity in the same state.\n\ ++\n\ ++However, some entities might have overlapping, non-exclusive modes,\n\ ++so that it is sometimes possible to represent ``mode @var{mode1} or mode\n\ ++@var{mode2}'' with something more specific than ``mode not known''.\n\ ++If this is true for at least one entity, you should define this hook\n\ ++and make it return a mode that includes @var{mode1} and @var{mode2}\n\ ++as possibilities. (The mode can include other possibilities too.)\n\ ++The hook should return the number of modes if no suitable mode exists\n\ ++for the given arguments.", ++ int, (int entity, int mode1, int mode2), NULL) ++ + DEFHOOK + (entry, + "If this hook is defined, it is evaluated for every @var{entity} that\n\ +-- +2.33.0 +
View file
_service:tar_scm:0142-LoongArch-testsuite-Rewrite-x-vfcmp-d-f-.c-to-avoid-.patch
Added
@@ -0,0 +1,1117 @@ +From d568321f8894ed270bf0011892b86baa6d6b82bd Mon Sep 17 00:00:00 2001 +From: Xi Ruoyao <xry111@xry111.site> +Date: Tue, 5 Mar 2024 20:46:57 +0800 +Subject: PATCH 142/188 LoongArch: testsuite: Rewrite {x,}vfcmp-{d,f}.c to + avoid named registers + +Loops on named vector register are not vectorized (see comment 11 of +PR113622), so the these test cases have been failing for a while. +Rewrite them using check-function-bodies to remove hard coding register +names. A barrier is needed to always load the first operand before the +second operand. + +gcc/testsuite/ChangeLog: + + * gcc.target/loongarch/vfcmp-f.c: Rewrite to avoid named + registers. + * gcc.target/loongarch/vfcmp-d.c: Likewise. + * gcc.target/loongarch/xvfcmp-f.c: Likewise. + * gcc.target/loongarch/xvfcmp-d.c: Likewise. +--- + gcc/testsuite/gcc.target/loongarch/vfcmp-d.c | 202 ++++++++-- + gcc/testsuite/gcc.target/loongarch/vfcmp-f.c | 347 ++++++++++++++---- + gcc/testsuite/gcc.target/loongarch/xvfcmp-d.c | 202 ++++++++-- + gcc/testsuite/gcc.target/loongarch/xvfcmp-f.c | 204 ++++++++-- + 4 files changed, 816 insertions(+), 139 deletions(-) + +diff --git a/gcc/testsuite/gcc.target/loongarch/vfcmp-d.c b/gcc/testsuite/gcc.target/loongarch/vfcmp-d.c +index 8b870ef38..87e4ed19e 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vfcmp-d.c ++++ b/gcc/testsuite/gcc.target/loongarch/vfcmp-d.c +@@ -1,28 +1,188 @@ + /* { dg-do compile } */ +-/* { dg-options "-O2 -mlsx -ffixed-f0 -ffixed-f1 -ffixed-f2 -fno-vect-cost-model" } */ ++/* { dg-options "-O2 -mlsx -fno-vect-cost-model" } */ ++/* { dg-final { check-function-bodies "**" "" } } */ + + #define F double + #define I long long + + #include "vfcmp-f.c" + +-/* { dg-final { scan-assembler "compare_quiet_equal:.*\tvfcmp\\.ceq\\.d\t\\\$vr2,\\\$vr0,\\\$vr1.*-compare_quiet_equal\n" } } */ +-/* { dg-final { scan-assembler "compare_quiet_not_equal:.*\tvfcmp\\.cune\\.d\t\\\$vr2,\\\$vr0,\\\$vr1.*-compare_quiet_not_equal\n" } } */ +-/* { dg-final { scan-assembler "compare_signaling_greater:.*\tvfcmp\\.slt\\.d\t\\\$vr2,\\\$vr1,\\\$vr0.*-compare_signaling_greater\n" } } */ +-/* { dg-final { scan-assembler "compare_signaling_greater_equal:.*\tvfcmp\\.sle\\.d\t\\\$vr2,\\\$vr1,\\\$vr0.*-compare_signaling_greater_equal\n" } } */ +-/* { dg-final { scan-assembler "compare_signaling_less:.*\tvfcmp\\.slt\\.d\t\\\$vr2,\\\$vr0,\\\$vr1.*-compare_signaling_less\n" } } */ +-/* { dg-final { scan-assembler "compare_signaling_less_equal:.*\tvfcmp\\.sle\\.d\t\\\$vr2,\\\$vr0,\\\$vr1.*-compare_signaling_less_equal\n" } } */ +-/* { dg-final { scan-assembler "compare_signaling_not_greater:.*\tvfcmp\\.sule\\.d\t\\\$vr2,\\\$vr0,\\\$vr1.*-compare_signaling_not_greater\n" } } */ +-/* { dg-final { scan-assembler "compare_signaling_less_unordered:.*\tvfcmp\\.sult\\.d\t\\\$vr2,\\\$vr0,\\\$vr1.*-compare_signaling_less_unordered\n" } } */ +-/* { dg-final { scan-assembler "compare_signaling_not_less:.*\tvfcmp\\.sule\\.d\t\\\$vr2,\\\$vr1,\\\$vr0.*-compare_signaling_not_less\n" } } */ +-/* { dg-final { scan-assembler "compare_signaling_greater_unordered:.*\tvfcmp\\.sult\\.d\t\\\$vr2,\\\$vr1,\\\$vr0.*-compare_signaling_greater_unordered\n" } } */ +-/* { dg-final { scan-assembler "compare_quiet_less:.*\tvfcmp\\.clt\\.d\t\\\$vr2,\\\$vr0,\\\$vr1.*-compare_quiet_less\n" } } */ +-/* { dg-final { scan-assembler "compare_quiet_less_equal:.*\tvfcmp\\.cle\\.d\t\\\$vr2,\\\$vr0,\\\$vr1.*-compare_quiet_less_equal\n" } } */ +-/* { dg-final { scan-assembler "compare_quiet_greater:.*\tvfcmp\\.clt\\.d\t\\\$vr2,\\\$vr1,\\\$vr0.*-compare_quiet_greater\n" } } */ +-/* { dg-final { scan-assembler "compare_quiet_greater_equal:.*\tvfcmp\\.cle\\.d\t\\\$vr2,\\\$vr1,\\\$vr0.*-compare_quiet_greater_equal\n" } } */ +-/* { dg-final { scan-assembler "compare_quiet_not_less:.*\tvfcmp\\.cule\\.d\t\\\$vr2,\\\$vr1,\\\$vr0.*-compare_quiet_not_less\n" } } */ +-/* { dg-final { scan-assembler "compare_quiet_greater_unordered:.*\tvfcmp\\.cult\\.d\t\\\$vr2,\\\$vr1,\\\$vr0.*-compare_quiet_greater_unordered\n" } } */ +-/* { dg-final { scan-assembler "compare_quiet_not_greater:.*\tvfcmp\\.cule\\.d\t\\\$vr2,\\\$vr0,\\\$vr1.*-compare_quiet_not_greater\n" } } */ +-/* { dg-final { scan-assembler "compare_quiet_less_unordered:.*\tvfcmp\\.cult\\.d\t\\\$vr2,\\\$vr0,\\\$vr1.*-compare_quiet_less_unordered\n" } } */ +-/* { dg-final { scan-assembler "compare_quiet_unordered:.*\tvfcmp\\.cun\\.d\t\\\$vr2,\\\$vr0,\\\$vr1.*-compare_quiet_unordered\n" } } */ +-/* { dg-final { scan-assembler "compare_quiet_ordered:.*\tvfcmp\\.cor\\.d\t\\\$vr2,\\\$vr0,\\\$vr1.*-compare_quiet_ordered\n" } } */ ++/* ++** compare_quiet_equal: ++** vld (\$vr0-9+),\$r4,0 ++** vld (\$vr0-9+),\$r5,0 ++** vfcmp.ceq.d (\$vr0-9+),(\1,\2|\2,\1) ++** vst \3,\$r6,0 ++** jr \$r1 ++*/ ++ ++/* ++** compare_quiet_not_equal: ++** vld (\$vr0-9+),\$r4,0 ++** vld (\$vr0-9+),\$r5,0 ++** vfcmp.cune.d (\$vr0-9+),(\1,\2|\2,\1) ++** vst \3,\$r6,0 ++** jr \$r1 ++*/ ++ ++/* ++** compare_signaling_greater: ++** vld (\$vr0-9+),\$r4,0 ++** vld (\$vr0-9+),\$r5,0 ++** vfcmp.slt.d (\$vr0-9+),\2,\1 ++** vst \3,\$r6,0 ++** jr \$r1 ++*/ ++ ++/* ++** compare_signaling_greater_equal: ++** vld (\$vr0-9+),\$r4,0 ++** vld (\$vr0-9+),\$r5,0 ++** vfcmp.sle.d (\$vr0-9+),\2,\1 ++** vst \3,\$r6,0 ++** jr \$r1 ++*/ ++ ++/* ++** compare_signaling_less: ++** vld (\$vr0-9+),\$r4,0 ++** vld (\$vr0-9+),\$r5,0 ++** vfcmp.slt.d (\$vr0-9+),\1,\2 ++** vst \3,\$r6,0 ++** jr \$r1 ++*/ ++ ++/* ++** compare_signaling_less_equal: ++** vld (\$vr0-9+),\$r4,0 ++** vld (\$vr0-9+),\$r5,0 ++** vfcmp.sle.d (\$vr0-9+),\1,\2 ++** vst \3,\$r6,0 ++** jr \$r1 ++*/ ++ ++/* ++** compare_signaling_not_greater: ++** vld (\$vr0-9+),\$r4,0 ++** vld (\$vr0-9+),\$r5,0 ++** vfcmp.sule.d (\$vr0-9+),\1,\2 ++** vst \3,\$r6,0 ++** jr \$r1 ++*/ ++ ++/* ++** compare_signaling_less_unordered: ++** vld (\$vr0-9+),\$r4,0 ++** vld (\$vr0-9+),\$r5,0 ++** vfcmp.sult.d (\$vr0-9+),\1,\2 ++** vst \3,\$r6,0 ++** jr \$r1 ++*/ ++ ++/* ++** compare_signaling_not_less: ++** vld (\$vr0-9+),\$r4,0 ++** vld (\$vr0-9+),\$r5,0 ++** vfcmp.sule.d (\$vr0-9+),\2,\1 ++** vst \3,\$r6,0 ++** jr \$r1 ++*/ ++ ++/* ++** compare_signaling_greater_unordered: ++** vld (\$vr0-9+),\$r4,0 ++** vld (\$vr0-9+),\$r5,0 ++** vfcmp.sult.d (\$vr0-9+),\2,\1 ++** vst \3,\$r6,0 ++** jr \$r1 ++*/ ++ ++/* ++** compare_quiet_less: ++** vld (\$vr0-9+),\$r4,0 ++** vld (\$vr0-9+),\$r5,0 ++** vfcmp.clt.d (\$vr0-9+),\1,\2 ++** vst \3,\$r6,0 ++** jr \$r1 ++*/ ++ ++/* ++** compare_quiet_less_equal: ++** vld (\$vr0-9+),\$r4,0 ++** vld (\$vr0-9+),\$r5,0 ++** vfcmp.cle.d (\$vr0-9+),\1,\2 ++** vst \3,\$r6,0 ++** jr \$r1 ++*/ ++ ++/* ++** compare_quiet_greater: ++** vld (\$vr0-9+),\$r4,0 ++** vld (\$vr0-9+),\$r5,0 ++** vfcmp.clt.d (\$vr0-9+),\2,\1 ++** vst \3,\$r6,0 ++** jr \$r1 ++*/ ++ ++/* ++** compare_quiet_greater_equal: ++** vld (\$vr0-9+),\$r4,0 ++** vld (\$vr0-9+),\$r5,0 ++** vfcmp.cle.d (\$vr0-9+),\2,\1 ++** vst \3,\$r6,0 ++** jr \$r1 ++*/ ++ ++/* ++** compare_quiet_not_less: ++** vld (\$vr0-9+),\$r4,0 ++** vld (\$vr0-9+),\$r5,0 ++** vfcmp.cule.d (\$vr0-9+),\2,\1 ++** vst \3,\$r6,0 ++** jr \$r1 ++*/ ++ ++/* ++** compare_quiet_greater_unordered: ++** vld (\$vr0-9+),\$r4,0 ++** vld (\$vr0-9+),\$r5,0 ++** vfcmp.cult.d (\$vr0-9+),\2,\1 ++** vst \3,\$r6,0 ++** jr \$r1 ++*/ ++ ++/* ++** compare_quiet_not_greater: ++** vld (\$vr0-9+),\$r4,0 ++** vld (\$vr0-9+),\$r5,0 ++** vfcmp.cule.d (\$vr0-9+),\1,\2 ++** vst \3,\$r6,0 ++** jr \$r1 ++*/ ++ ++/* ++** compare_quiet_less_unordered: ++** vld (\$vr0-9+),\$r4,0 ++** vld (\$vr0-9+),\$r5,0 ++** vfcmp.cult.d (\$vr0-9+),\1,\2 ++** vst \3,\$r6,0 ++** jr \$r1 ++*/ ++ ++/* ++** compare_quiet_unordered: ++** vld (\$vr0-9+),\$r4,0 ++** vld (\$vr0-9+),\$r5,0 ++** vfcmp.cun.d (\$vr0-9+),(\1,\2|\2,\1) ++** vst \3,\$r6,0 ++** jr \$r1 ++*/ ++ ++/* ++** compare_quiet_ordered: ++** vld (\$vr0-9+),\$r4,0 ++** vld (\$vr0-9+),\$r5,0 ++** vfcmp.cor.d (\$vr0-9+),(\1,\2|\2,\1) ++** vst \3,\$r6,0 ++** jr \$r1 ++*/ +diff --git a/gcc/testsuite/gcc.target/loongarch/vfcmp-f.c b/gcc/testsuite/gcc.target/loongarch/vfcmp-f.c +index b9110b90c..8d2671998 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vfcmp-f.c ++++ b/gcc/testsuite/gcc.target/loongarch/vfcmp-f.c +@@ -2,7 +2,8 @@ + For details read C23 Annex F.3 and LoongArch Vol. 1 section 3.2.2.1. */ + + /* { dg-do compile } */ +-/* { dg-options "-O2 -mlsx -ffixed-f0 -ffixed-f1 -ffixed-f2 -fno-vect-cost-model" } */ ++/* { dg-options "-O2 -mlsx -fno-vect-cost-model" } */ ++/* { dg-final { check-function-bodies "**" "" } } */ + + #ifndef F + #define F float +@@ -19,160 +20,354 @@ + typedef F VF __attribute__ ((vector_size (VL))); + typedef I VI __attribute__ ((vector_size (VL))); + +-register VF a asm ("f0"); +-register VF b asm ("f1"); +-register VI c asm ("f2"); ++#define ARGS const VF *a, const VF *b, VI *c + + void +-compare_quiet_equal (void) ++compare_quiet_equal (ARGS) + { +- c = (a == b); ++ VF _a = *a; ++ asm("" ::: "memory"); ++ *c = (_a == *b); + } + + void +-compare_quiet_not_equal (void) ++compare_quiet_not_equal (ARGS) + { +- c = (a != b); ++ VF _a = *a; ++ asm("" ::: "memory"); ++ *c = (_a != *b); + } + + void +-compare_signaling_greater (void) ++compare_signaling_greater (ARGS) + { +- c = (a > b); ++ VF _a = *a; ++ asm("" ::: "memory"); ++ *c = (_a > *b); + } + + void +-compare_signaling_greater_equal (void) ++compare_signaling_greater_equal (ARGS) + { +- c = (a >= b); ++ VF _a = *a; ++ asm("" ::: "memory"); ++ *c = (_a >= *b); + } + + void +-compare_signaling_less (void) ++compare_signaling_less (ARGS) + { +- c = (a < b); ++ VF _a = *a; ++ asm("" ::: "memory"); ++ *c = (_a < *b); + } + + void +-compare_signaling_less_equal (void) ++compare_signaling_less_equal (ARGS) + { +- c = (a <= b); ++ VF _a = *a; ++ asm("" ::: "memory"); ++ *c = (_a <= *b); + } + + void +-compare_signaling_not_greater (void) ++compare_signaling_not_greater (ARGS) + { +- c = ~(a > b); ++ VF _a = *a; ++ asm("" ::: "memory"); ++ *c = ~(_a > *b); + } + + void +-compare_signaling_less_unordered (void) ++compare_signaling_less_unordered (ARGS) + { +- c = ~(a >= b); ++ VF _a = *a; ++ asm("" ::: "memory"); ++ *c = ~(_a >= *b); + } + + void +-compare_signaling_not_less (void) ++compare_signaling_not_less (ARGS) + { +- c = ~(a < b); ++ VF _a = *a; ++ asm("" ::: "memory"); ++ *c = ~(_a < *b); + } + + void +-compare_signaling_greater_unordered (void) ++compare_signaling_greater_unordered (ARGS) + { +- c = ~(a <= b); ++ VF _a = *a; ++ asm("" ::: "memory"); ++ *c = ~(_a <= *b); + } + + void +-compare_quiet_less (void) ++compare_quiet_less (ARGS) + { +- for (int i = 0; i < sizeof (c) / sizeof (c0); i++) +- ci = __builtin_isless (ai, bi) ? -1 : 0; ++ VF _a = *a; ++ asm("" ::: "memory"); ++ for (int i = 0; i < sizeof (*c) / sizeof ((*c)0); i++) ++ (*c)i = __builtin_isless (_ai, (*b)i) ? -1 : 0; + } + + void +-compare_quiet_less_equal (void) ++compare_quiet_less_equal (ARGS) + { +- for (int i = 0; i < sizeof (c) / sizeof (c0); i++) +- ci = __builtin_islessequal (ai, bi) ? -1 : 0; ++ VF _a = *a; ++ asm("" ::: "memory"); ++ for (int i = 0; i < sizeof (*c) / sizeof ((*c)0); i++) ++ (*c)i = __builtin_islessequal (_ai, (*b)i) ? -1 : 0; + } + + void +-compare_quiet_greater (void) ++compare_quiet_greater (ARGS) + { +- for (int i = 0; i < sizeof (c) / sizeof (c0); i++) +- ci = __builtin_isgreater (ai, bi) ? -1 : 0; ++ VF _a = *a; ++ asm("" ::: "memory"); ++ for (int i = 0; i < sizeof (*c) / sizeof ((*c)0); i++) ++ (*c)i = __builtin_isgreater (_ai, (*b)i) ? -1 : 0; + } + + void +-compare_quiet_greater_equal (void) ++compare_quiet_greater_equal (ARGS) + { +- for (int i = 0; i < sizeof (c) / sizeof (c0); i++) +- ci = __builtin_isgreaterequal (ai, bi) ? -1 : 0; ++ VF _a = *a; ++ asm("" ::: "memory"); ++ for (int i = 0; i < sizeof (*c) / sizeof ((*c)0); i++) ++ (*c)i = __builtin_isgreaterequal (_ai, (*b)i) ? -1 : 0; + } + + void +-compare_quiet_not_less (void) ++compare_quiet_not_less (ARGS) + { +- for (int i = 0; i < sizeof (c) / sizeof (c0); i++) +- ci = __builtin_isless (ai, bi) ? 0 : -1; ++ VF _a = *a; ++ asm("" ::: "memory"); ++ for (int i = 0; i < sizeof (*c) / sizeof ((*c)0); i++) ++ (*c)i = __builtin_isless (_ai, (*b)i) ? 0 : -1; + } + + void +-compare_quiet_greater_unordered (void) ++compare_quiet_greater_unordered (ARGS) + { +- for (int i = 0; i < sizeof (c) / sizeof (c0); i++) +- ci = __builtin_islessequal (ai, bi) ? 0 : -1; ++ VF _a = *a; ++ asm("" ::: "memory"); ++ for (int i = 0; i < sizeof (*c) / sizeof ((*c)0); i++) ++ (*c)i = __builtin_islessequal (_ai, (*b)i) ? 0 : -1; + } + + void +-compare_quiet_not_greater (void) ++compare_quiet_not_greater (ARGS) + { +- for (int i = 0; i < sizeof (c) / sizeof (c0); i++) +- ci = __builtin_isgreater (ai, bi) ? 0 : -1; ++ VF _a = *a; ++ asm("" ::: "memory"); ++ for (int i = 0; i < sizeof (*c) / sizeof ((*c)0); i++) ++ (*c)i = __builtin_isgreater (_ai, (*b)i) ? 0 : -1; + } + + void +-compare_quiet_less_unordered (void) ++compare_quiet_less_unordered (ARGS) + { +- for (int i = 0; i < sizeof (c) / sizeof (c0); i++) +- ci = __builtin_isgreaterequal (ai, bi) ? 0 : -1; ++ VF _a = *a; ++ asm("" ::: "memory"); ++ for (int i = 0; i < sizeof (*c) / sizeof ((*c)0); i++) ++ (*c)i = __builtin_isgreaterequal (_ai, (*b)i) ? 0 : -1; + } + + void +-compare_quiet_unordered (void) ++compare_quiet_unordered (ARGS) + { +- for (int i = 0; i < sizeof (c) / sizeof (c0); i++) +- ci = __builtin_isunordered (ai, bi) ? -1 : 0; ++ VF _a = *a; ++ asm("" ::: "memory"); ++ for (int i = 0; i < sizeof (*c) / sizeof ((*c)0); i++) ++ (*c)i = __builtin_isunordered (_ai, (*b)i) ? -1 : 0; + } + + void +-compare_quiet_ordered (void) ++compare_quiet_ordered (ARGS) + { +- for (int i = 0; i < sizeof (c) / sizeof (c0); i++) +- ci = __builtin_isunordered (ai, bi) ? 0 : -1; ++ VF _a = *a; ++ asm("" ::: "memory"); ++ for (int i = 0; i < sizeof (*c) / sizeof ((*c)0); i++) ++ (*c)i = __builtin_isunordered (_ai, (*b)i) ? 0 : -1; + } + +-/* The "-<function_name>" matches the .size directive after the function +- body, so we can ensure the instruction is in the correct function. */ ++/* ++** compare_quiet_equal: ++** vld (\$vr0-9+),\$r4,0 ++** vld (\$vr0-9+),\$r5,0 ++** vfcmp.ceq.s (\$vr0-9+),(\1,\2|\2,\1) ++** vst \3,\$r6,0 ++** jr \$r1 ++*/ + +-/* { dg-final { scan-assembler "compare_quiet_equal:.*\tvfcmp\\.ceq\\.s\t\\\$vr2,\\\$vr0,\\\$vr1.*-compare_quiet_equal\n" } } */ +-/* { dg-final { scan-assembler "compare_quiet_not_equal:.*\tvfcmp\\.cune\\.s\t\\\$vr2,\\\$vr0,\\\$vr1.*-compare_quiet_not_equal\n" } } */ +-/* { dg-final { scan-assembler "compare_signaling_greater:.*\tvfcmp\\.slt\\.s\t\\\$vr2,\\\$vr1,\\\$vr0.*-compare_signaling_greater\n" } } */ +-/* { dg-final { scan-assembler "compare_signaling_greater_equal:.*\tvfcmp\\.sle\\.s\t\\\$vr2,\\\$vr1,\\\$vr0.*-compare_signaling_greater_equal\n" } } */ +-/* { dg-final { scan-assembler "compare_signaling_less:.*\tvfcmp\\.slt\\.s\t\\\$vr2,\\\$vr0,\\\$vr1.*-compare_signaling_less\n" } } */ +-/* { dg-final { scan-assembler "compare_signaling_less_equal:.*\tvfcmp\\.sle\\.s\t\\\$vr2,\\\$vr0,\\\$vr1.*-compare_signaling_less_equal\n" } } */ +-/* { dg-final { scan-assembler "compare_signaling_not_greater:.*\tvfcmp\\.sule\\.s\t\\\$vr2,\\\$vr0,\\\$vr1.*-compare_signaling_not_greater\n" } } */ +-/* { dg-final { scan-assembler "compare_signaling_less_unordered:.*\tvfcmp\\.sult\\.s\t\\\$vr2,\\\$vr0,\\\$vr1.*-compare_signaling_less_unordered\n" } } */ +-/* { dg-final { scan-assembler "compare_signaling_not_less:.*\tvfcmp\\.sule\\.s\t\\\$vr2,\\\$vr1,\\\$vr0.*-compare_signaling_not_less\n" } } */ +-/* { dg-final { scan-assembler "compare_signaling_greater_unordered:.*\tvfcmp\\.sult\\.s\t\\\$vr2,\\\$vr1,\\\$vr0.*-compare_signaling_greater_unordered\n" } } */ +-/* { dg-final { scan-assembler "compare_quiet_less:.*\tvfcmp\\.clt\\.s\t\\\$vr2,\\\$vr0,\\\$vr1.*-compare_quiet_less\n" } } */ +-/* { dg-final { scan-assembler "compare_quiet_less_equal:.*\tvfcmp\\.cle\\.s\t\\\$vr2,\\\$vr0,\\\$vr1.*-compare_quiet_less_equal\n" } } */ +-/* { dg-final { scan-assembler "compare_quiet_greater:.*\tvfcmp\\.clt\\.s\t\\\$vr2,\\\$vr1,\\\$vr0.*-compare_quiet_greater\n" } } */ +-/* { dg-final { scan-assembler "compare_quiet_greater_equal:.*\tvfcmp\\.cle\\.s\t\\\$vr2,\\\$vr1,\\\$vr0.*-compare_quiet_greater_equal\n" } } */ +-/* { dg-final { scan-assembler "compare_quiet_not_less:.*\tvfcmp\\.cule\\.s\t\\\$vr2,\\\$vr1,\\\$vr0.*-compare_quiet_not_less\n" } } */ +-/* { dg-final { scan-assembler "compare_quiet_greater_unordered:.*\tvfcmp\\.cult\\.s\t\\\$vr2,\\\$vr1,\\\$vr0.*-compare_quiet_greater_unordered\n" } } */ +-/* { dg-final { scan-assembler "compare_quiet_not_greater:.*\tvfcmp\\.cule\\.s\t\\\$vr2,\\\$vr0,\\\$vr1.*-compare_quiet_not_greater\n" } } */ +-/* { dg-final { scan-assembler "compare_quiet_less_unordered:.*\tvfcmp\\.cult\\.s\t\\\$vr2,\\\$vr0,\\\$vr1.*-compare_quiet_less_unordered\n" } } */ +-/* { dg-final { scan-assembler "compare_quiet_unordered:.*\tvfcmp\\.cun\\.s\t\\\$vr2,\\\$vr0,\\\$vr1.*-compare_quiet_unordered\n" } } */ +-/* { dg-final { scan-assembler "compare_quiet_ordered:.*\tvfcmp\\.cor\\.s\t\\\$vr2,\\\$vr0,\\\$vr1.*-compare_quiet_ordered\n" } } */ ++/* ++** compare_quiet_not_equal: ++** vld (\$vr0-9+),\$r4,0 ++** vld (\$vr0-9+),\$r5,0 ++** vfcmp.cune.s (\$vr0-9+),(\1,\2|\2,\1) ++** vst \3,\$r6,0 ++** jr \$r1 ++*/ ++ ++/* ++** compare_signaling_greater: ++** vld (\$vr0-9+),\$r4,0 ++** vld (\$vr0-9+),\$r5,0 ++** vfcmp.slt.s (\$vr0-9+),\2,\1 ++** vst \3,\$r6,0 ++** jr \$r1 ++*/ ++ ++/* ++** compare_signaling_greater_equal: ++** vld (\$vr0-9+),\$r4,0 ++** vld (\$vr0-9+),\$r5,0 ++** vfcmp.sle.s (\$vr0-9+),\2,\1 ++** vst \3,\$r6,0 ++** jr \$r1 ++*/ ++ ++/* ++** compare_signaling_less: ++** vld (\$vr0-9+),\$r4,0 ++** vld (\$vr0-9+),\$r5,0 ++** vfcmp.slt.s (\$vr0-9+),\1,\2 ++** vst \3,\$r6,0 ++** jr \$r1 ++*/ ++ ++/* ++** compare_signaling_less_equal: ++** vld (\$vr0-9+),\$r4,0 ++** vld (\$vr0-9+),\$r5,0 ++** vfcmp.sle.s (\$vr0-9+),\1,\2 ++** vst \3,\$r6,0 ++** jr \$r1 ++*/ ++ ++/* ++** compare_signaling_not_greater: ++** vld (\$vr0-9+),\$r4,0 ++** vld (\$vr0-9+),\$r5,0 ++** vfcmp.sule.s (\$vr0-9+),\1,\2 ++** vst \3,\$r6,0 ++** jr \$r1 ++*/ ++ ++/* ++** compare_signaling_less_unordered: ++** vld (\$vr0-9+),\$r4,0 ++** vld (\$vr0-9+),\$r5,0 ++** vfcmp.sult.s (\$vr0-9+),\1,\2 ++** vst \3,\$r6,0 ++** jr \$r1 ++*/ ++ ++/* ++** compare_signaling_not_less: ++** vld (\$vr0-9+),\$r4,0 ++** vld (\$vr0-9+),\$r5,0 ++** vfcmp.sule.s (\$vr0-9+),\2,\1 ++** vst \3,\$r6,0 ++** jr \$r1 ++*/ ++ ++/* ++** compare_signaling_greater_unordered: ++** vld (\$vr0-9+),\$r4,0 ++** vld (\$vr0-9+),\$r5,0 ++** vfcmp.sult.s (\$vr0-9+),\2,\1 ++** vst \3,\$r6,0 ++** jr \$r1 ++*/ ++ ++/* ++** compare_quiet_less: ++** vld (\$vr0-9+),\$r4,0 ++** vld (\$vr0-9+),\$r5,0 ++** vfcmp.clt.s (\$vr0-9+),\1,\2 ++** vst \3,\$r6,0 ++** jr \$r1 ++*/ ++ ++/* ++** compare_quiet_less_equal: ++** vld (\$vr0-9+),\$r4,0 ++** vld (\$vr0-9+),\$r5,0 ++** vfcmp.cle.s (\$vr0-9+),\1,\2 ++** vst \3,\$r6,0 ++** jr \$r1 ++*/ ++ ++/* ++** compare_quiet_greater: ++** vld (\$vr0-9+),\$r4,0 ++** vld (\$vr0-9+),\$r5,0 ++** vfcmp.clt.s (\$vr0-9+),\2,\1 ++** vst \3,\$r6,0 ++** jr \$r1 ++*/ ++ ++/* ++** compare_quiet_greater_equal: ++** vld (\$vr0-9+),\$r4,0 ++** vld (\$vr0-9+),\$r5,0 ++** vfcmp.cle.s (\$vr0-9+),\2,\1 ++** vst \3,\$r6,0 ++** jr \$r1 ++*/ ++ ++/* ++** compare_quiet_not_less: ++** vld (\$vr0-9+),\$r4,0 ++** vld (\$vr0-9+),\$r5,0 ++** vfcmp.cule.s (\$vr0-9+),\2,\1 ++** vst \3,\$r6,0 ++** jr \$r1 ++*/ ++ ++/* ++** compare_quiet_greater_unordered: ++** vld (\$vr0-9+),\$r4,0 ++** vld (\$vr0-9+),\$r5,0 ++** vfcmp.cult.s (\$vr0-9+),\2,\1 ++** vst \3,\$r6,0 ++** jr \$r1 ++*/ ++ ++/* ++** compare_quiet_not_greater: ++** vld (\$vr0-9+),\$r4,0 ++** vld (\$vr0-9+),\$r5,0 ++** vfcmp.cule.s (\$vr0-9+),\1,\2 ++** vst \3,\$r6,0 ++** jr \$r1 ++*/ ++ ++/* ++** compare_quiet_less_unordered: ++** vld (\$vr0-9+),\$r4,0 ++** vld (\$vr0-9+),\$r5,0 ++** vfcmp.cult.s (\$vr0-9+),\1,\2 ++** vst \3,\$r6,0 ++** jr \$r1 ++*/ ++ ++/* ++** compare_quiet_unordered: ++** vld (\$vr0-9+),\$r4,0 ++** vld (\$vr0-9+),\$r5,0 ++** vfcmp.cun.s (\$vr0-9+),(\1,\2|\2,\1) ++** vst \3,\$r6,0 ++** jr \$r1 ++*/ ++ ++/* ++** compare_quiet_ordered: ++** vld (\$vr0-9+),\$r4,0 ++** vld (\$vr0-9+),\$r5,0 ++** vfcmp.cor.s (\$vr0-9+),(\1,\2|\2,\1) ++** vst \3,\$r6,0 ++** jr \$r1 ++*/ +diff --git a/gcc/testsuite/gcc.target/loongarch/xvfcmp-d.c b/gcc/testsuite/gcc.target/loongarch/xvfcmp-d.c +index d8017caaa..b27efebad 100644 +--- a/gcc/testsuite/gcc.target/loongarch/xvfcmp-d.c ++++ b/gcc/testsuite/gcc.target/loongarch/xvfcmp-d.c +@@ -1,5 +1,6 @@ + /* { dg-do compile } */ +-/* { dg-options "-O2 -mlasx -ffixed-f0 -ffixed-f1 -ffixed-f2 -fno-vect-cost-model" } */ ++/* { dg-options "-O2 -mlasx -fno-vect-cost-model" } */ ++/* { dg-final { check-function-bodies "**" "" } } */ + + #define F double + #define I long long +@@ -7,23 +8,182 @@ + + #include "vfcmp-f.c" + +-/* { dg-final { scan-assembler "compare_quiet_equal:.*\txvfcmp\\.ceq\\.d\t\\\$xr2,\\\$xr0,\\\$xr1.*-compare_quiet_equal\n" } } */ +-/* { dg-final { scan-assembler "compare_quiet_not_equal:.*\txvfcmp\\.cune\\.d\t\\\$xr2,\\\$xr0,\\\$xr1.*-compare_quiet_not_equal\n" } } */ +-/* { dg-final { scan-assembler "compare_signaling_greater:.*\txvfcmp\\.slt\\.d\t\\\$xr2,\\\$xr1,\\\$xr0.*-compare_signaling_greater\n" } } */ +-/* { dg-final { scan-assembler "compare_signaling_greater_equal:.*\txvfcmp\\.sle\\.d\t\\\$xr2,\\\$xr1,\\\$xr0.*-compare_signaling_greater_equal\n" } } */ +-/* { dg-final { scan-assembler "compare_signaling_less:.*\txvfcmp\\.slt\\.d\t\\\$xr2,\\\$xr0,\\\$xr1.*-compare_signaling_less\n" } } */ +-/* { dg-final { scan-assembler "compare_signaling_less_equal:.*\txvfcmp\\.sle\\.d\t\\\$xr2,\\\$xr0,\\\$xr1.*-compare_signaling_less_equal\n" } } */ +-/* { dg-final { scan-assembler "compare_signaling_not_greater:.*\txvfcmp\\.sule\\.d\t\\\$xr2,\\\$xr0,\\\$xr1.*-compare_signaling_not_greater\n" } } */ +-/* { dg-final { scan-assembler "compare_signaling_less_unordered:.*\txvfcmp\\.sult\\.d\t\\\$xr2,\\\$xr0,\\\$xr1.*-compare_signaling_less_unordered\n" } } */ +-/* { dg-final { scan-assembler "compare_signaling_not_less:.*\txvfcmp\\.sule\\.d\t\\\$xr2,\\\$xr1,\\\$xr0.*-compare_signaling_not_less\n" } } */ +-/* { dg-final { scan-assembler "compare_signaling_greater_unordered:.*\txvfcmp\\.sult\\.d\t\\\$xr2,\\\$xr1,\\\$xr0.*-compare_signaling_greater_unordered\n" } } */ +-/* { dg-final { scan-assembler "compare_quiet_less:.*\txvfcmp\\.clt\\.d\t\\\$xr2,\\\$xr0,\\\$xr1.*-compare_quiet_less\n" } } */ +-/* { dg-final { scan-assembler "compare_quiet_less_equal:.*\txvfcmp\\.cle\\.d\t\\\$xr2,\\\$xr0,\\\$xr1.*-compare_quiet_less_equal\n" } } */ +-/* { dg-final { scan-assembler "compare_quiet_greater:.*\txvfcmp\\.clt\\.d\t\\\$xr2,\\\$xr1,\\\$xr0.*-compare_quiet_greater\n" } } */ +-/* { dg-final { scan-assembler "compare_quiet_greater_equal:.*\txvfcmp\\.cle\\.d\t\\\$xr2,\\\$xr1,\\\$xr0.*-compare_quiet_greater_equal\n" } } */ +-/* { dg-final { scan-assembler "compare_quiet_not_less:.*\txvfcmp\\.cule\\.d\t\\\$xr2,\\\$xr1,\\\$xr0.*-compare_quiet_not_less\n" } } */ +-/* { dg-final { scan-assembler "compare_quiet_greater_unordered:.*\txvfcmp\\.cult\\.d\t\\\$xr2,\\\$xr1,\\\$xr0.*-compare_quiet_greater_unordered\n" } } */ +-/* { dg-final { scan-assembler "compare_quiet_not_greater:.*\txvfcmp\\.cule\\.d\t\\\$xr2,\\\$xr0,\\\$xr1.*-compare_quiet_not_greater\n" } } */ +-/* { dg-final { scan-assembler "compare_quiet_less_unordered:.*\txvfcmp\\.cult\\.d\t\\\$xr2,\\\$xr0,\\\$xr1.*-compare_quiet_less_unordered\n" } } */ +-/* { dg-final { scan-assembler "compare_quiet_unordered:.*\txvfcmp\\.cun\\.d\t\\\$xr2,\\\$xr0,\\\$xr1.*-compare_quiet_unordered\n" } } */ +-/* { dg-final { scan-assembler "compare_quiet_ordered:.*\txvfcmp\\.cor\\.d\t\\\$xr2,\\\$xr0,\\\$xr1.*-compare_quiet_ordered\n" } } */ ++/* ++** compare_quiet_equal: ++** xvld (\$xr0-9+),\$r4,0 ++** xvld (\$xr0-9+),\$r5,0 ++** xvfcmp.ceq.d (\$xr0-9+),(\1,\2|\2,\1) ++** xvst \3,\$r6,0 ++** jr \$r1 ++*/ ++ ++/* ++** compare_quiet_not_equal: ++** xvld (\$xr0-9+),\$r4,0 ++** xvld (\$xr0-9+),\$r5,0 ++** xvfcmp.cune.d (\$xr0-9+),(\1,\2|\2,\1) ++** xvst \3,\$r6,0 ++** jr \$r1 ++*/ ++ ++/* ++** compare_signaling_greater: ++** xvld (\$xr0-9+),\$r4,0 ++** xvld (\$xr0-9+),\$r5,0 ++** xvfcmp.slt.d (\$xr0-9+),\2,\1 ++** xvst \3,\$r6,0 ++** jr \$r1 ++*/ ++ ++/* ++** compare_signaling_greater_equal: ++** xvld (\$xr0-9+),\$r4,0 ++** xvld (\$xr0-9+),\$r5,0 ++** xvfcmp.sle.d (\$xr0-9+),\2,\1 ++** xvst \3,\$r6,0 ++** jr \$r1 ++*/ ++ ++/* ++** compare_signaling_less: ++** xvld (\$xr0-9+),\$r4,0 ++** xvld (\$xr0-9+),\$r5,0 ++** xvfcmp.slt.d (\$xr0-9+),\1,\2 ++** xvst \3,\$r6,0 ++** jr \$r1 ++*/ ++ ++/* ++** compare_signaling_less_equal: ++** xvld (\$xr0-9+),\$r4,0 ++** xvld (\$xr0-9+),\$r5,0 ++** xvfcmp.sle.d (\$xr0-9+),\1,\2 ++** xvst \3,\$r6,0 ++** jr \$r1 ++*/ ++ ++/* ++** compare_signaling_not_greater: ++** xvld (\$xr0-9+),\$r4,0 ++** xvld (\$xr0-9+),\$r5,0 ++** xvfcmp.sule.d (\$xr0-9+),\1,\2 ++** xvst \3,\$r6,0 ++** jr \$r1 ++*/ ++ ++/* ++** compare_signaling_less_unordered: ++** xvld (\$xr0-9+),\$r4,0 ++** xvld (\$xr0-9+),\$r5,0 ++** xvfcmp.sult.d (\$xr0-9+),\1,\2 ++** xvst \3,\$r6,0 ++** jr \$r1 ++*/ ++ ++/* ++** compare_signaling_not_less: ++** xvld (\$xr0-9+),\$r4,0 ++** xvld (\$xr0-9+),\$r5,0 ++** xvfcmp.sule.d (\$xr0-9+),\2,\1 ++** xvst \3,\$r6,0 ++** jr \$r1 ++*/ ++ ++/* ++** compare_signaling_greater_unordered: ++** xvld (\$xr0-9+),\$r4,0 ++** xvld (\$xr0-9+),\$r5,0 ++** xvfcmp.sult.d (\$xr0-9+),\2,\1 ++** xvst \3,\$r6,0 ++** jr \$r1 ++*/ ++ ++/* ++** compare_quiet_less: ++** xvld (\$xr0-9+),\$r4,0 ++** xvld (\$xr0-9+),\$r5,0 ++** xvfcmp.clt.d (\$xr0-9+),\1,\2 ++** xvst \3,\$r6,0 ++** jr \$r1 ++*/ ++ ++/* ++** compare_quiet_less_equal: ++** xvld (\$xr0-9+),\$r4,0 ++** xvld (\$xr0-9+),\$r5,0 ++** xvfcmp.cle.d (\$xr0-9+),\1,\2 ++** xvst \3,\$r6,0 ++** jr \$r1 ++*/ ++ ++/* ++** compare_quiet_greater: ++** xvld (\$xr0-9+),\$r4,0 ++** xvld (\$xr0-9+),\$r5,0 ++** xvfcmp.clt.d (\$xr0-9+),\2,\1 ++** xvst \3,\$r6,0 ++** jr \$r1 ++*/ ++ ++/* ++** compare_quiet_greater_equal: ++** xvld (\$xr0-9+),\$r4,0 ++** xvld (\$xr0-9+),\$r5,0 ++** xvfcmp.cle.d (\$xr0-9+),\2,\1 ++** xvst \3,\$r6,0 ++** jr \$r1 ++*/ ++ ++/* ++** compare_quiet_not_less: ++** xvld (\$xr0-9+),\$r4,0 ++** xvld (\$xr0-9+),\$r5,0 ++** xvfcmp.cule.d (\$xr0-9+),\2,\1 ++** xvst \3,\$r6,0 ++** jr \$r1 ++*/ ++ ++/* ++** compare_quiet_greater_unordered: ++** xvld (\$xr0-9+),\$r4,0 ++** xvld (\$xr0-9+),\$r5,0 ++** xvfcmp.cult.d (\$xr0-9+),\2,\1 ++** xvst \3,\$r6,0 ++** jr \$r1 ++*/ ++ ++/* ++** compare_quiet_not_greater: ++** xvld (\$xr0-9+),\$r4,0 ++** xvld (\$xr0-9+),\$r5,0 ++** xvfcmp.cule.d (\$xr0-9+),\1,\2 ++** xvst \3,\$r6,0 ++** jr \$r1 ++*/ ++ ++/* ++** compare_quiet_less_unordered: ++** xvld (\$xr0-9+),\$r4,0 ++** xvld (\$xr0-9+),\$r5,0 ++** xvfcmp.cult.d (\$xr0-9+),\1,\2 ++** xvst \3,\$r6,0 ++** jr \$r1 ++*/ ++ ++/* ++** compare_quiet_unordered: ++** xvld (\$xr0-9+),\$r4,0 ++** xvld (\$xr0-9+),\$r5,0 ++** xvfcmp.cun.d (\$xr0-9+),(\1,\2|\2,\1) ++** xvst \3,\$r6,0 ++** jr \$r1 ++*/ ++ ++/* ++** compare_quiet_ordered: ++** xvld (\$xr0-9+),\$r4,0 ++** xvld (\$xr0-9+),\$r5,0 ++** xvfcmp.cor.d (\$xr0-9+),(\1,\2|\2,\1) ++** xvst \3,\$r6,0 ++** jr \$r1 ++*/ +diff --git a/gcc/testsuite/gcc.target/loongarch/xvfcmp-f.c b/gcc/testsuite/gcc.target/loongarch/xvfcmp-f.c +index b54556475..1ca1e6c8b 100644 +--- a/gcc/testsuite/gcc.target/loongarch/xvfcmp-f.c ++++ b/gcc/testsuite/gcc.target/loongarch/xvfcmp-f.c +@@ -1,27 +1,189 @@ + /* { dg-do compile } */ +-/* { dg-options "-O2 -mlasx -ffixed-f0 -ffixed-f1 -ffixed-f2" } */ ++/* { dg-options "-O2 -mlasx -fno-vect-cost-model" } */ ++/* { dg-final { check-function-bodies "**" "" } } */ + ++#define F float ++#define I int + #define VL 32 + + #include "vfcmp-f.c" + +-/* { dg-final { scan-assembler "compare_quiet_equal:.*\txvfcmp\\.ceq\\.s\t\\\$xr2,\\\$xr0,\\\$xr1.*-compare_quiet_equal\n" } } */ +-/* { dg-final { scan-assembler "compare_quiet_not_equal:.*\txvfcmp\\.cune\\.s\t\\\$xr2,\\\$xr0,\\\$xr1.*-compare_quiet_not_equal\n" } } */ +-/* { dg-final { scan-assembler "compare_signaling_greater:.*\txvfcmp\\.slt\\.s\t\\\$xr2,\\\$xr1,\\\$xr0.*-compare_signaling_greater\n" } } */ +-/* { dg-final { scan-assembler "compare_signaling_greater_equal:.*\txvfcmp\\.sle\\.s\t\\\$xr2,\\\$xr1,\\\$xr0.*-compare_signaling_greater_equal\n" } } */ +-/* { dg-final { scan-assembler "compare_signaling_less:.*\txvfcmp\\.slt\\.s\t\\\$xr2,\\\$xr0,\\\$xr1.*-compare_signaling_less\n" } } */ +-/* { dg-final { scan-assembler "compare_signaling_less_equal:.*\txvfcmp\\.sle\\.s\t\\\$xr2,\\\$xr0,\\\$xr1.*-compare_signaling_less_equal\n" } } */ +-/* { dg-final { scan-assembler "compare_signaling_not_greater:.*\txvfcmp\\.sule\\.s\t\\\$xr2,\\\$xr0,\\\$xr1.*-compare_signaling_not_greater\n" } } */ +-/* { dg-final { scan-assembler "compare_signaling_less_unordered:.*\txvfcmp\\.sult\\.s\t\\\$xr2,\\\$xr0,\\\$xr1.*-compare_signaling_less_unordered\n" } } */ +-/* { dg-final { scan-assembler "compare_signaling_not_less:.*\txvfcmp\\.sule\\.s\t\\\$xr2,\\\$xr1,\\\$xr0.*-compare_signaling_not_less\n" } } */ +-/* { dg-final { scan-assembler "compare_signaling_greater_unordered:.*\txvfcmp\\.sult\\.s\t\\\$xr2,\\\$xr1,\\\$xr0.*-compare_signaling_greater_unordered\n" } } */ +-/* { dg-final { scan-assembler "compare_quiet_less:.*\txvfcmp\\.clt\\.s\t\\\$xr2,\\\$xr0,\\\$xr1.*-compare_quiet_less\n" } } */ +-/* { dg-final { scan-assembler "compare_quiet_less_equal:.*\txvfcmp\\.cle\\.s\t\\\$xr2,\\\$xr0,\\\$xr1.*-compare_quiet_less_equal\n" } } */ +-/* { dg-final { scan-assembler "compare_quiet_greater:.*\txvfcmp\\.clt\\.s\t\\\$xr2,\\\$xr1,\\\$xr0.*-compare_quiet_greater\n" } } */ +-/* { dg-final { scan-assembler "compare_quiet_greater_equal:.*\txvfcmp\\.cle\\.s\t\\\$xr2,\\\$xr1,\\\$xr0.*-compare_quiet_greater_equal\n" } } */ +-/* { dg-final { scan-assembler "compare_quiet_not_less:.*\txvfcmp\\.cule\\.s\t\\\$xr2,\\\$xr1,\\\$xr0.*-compare_quiet_not_less\n" } } */ +-/* { dg-final { scan-assembler "compare_quiet_greater_unordered:.*\txvfcmp\\.cult\\.s\t\\\$xr2,\\\$xr1,\\\$xr0.*-compare_quiet_greater_unordered\n" } } */ +-/* { dg-final { scan-assembler "compare_quiet_not_greater:.*\txvfcmp\\.cule\\.s\t\\\$xr2,\\\$xr0,\\\$xr1.*-compare_quiet_not_greater\n" } } */ +-/* { dg-final { scan-assembler "compare_quiet_less_unordered:.*\txvfcmp\\.cult\\.s\t\\\$xr2,\\\$xr0,\\\$xr1.*-compare_quiet_less_unordered\n" } } */ +-/* { dg-final { scan-assembler "compare_quiet_unordered:.*\txvfcmp\\.cun\\.s\t\\\$xr2,\\\$xr0,\\\$xr1.*-compare_quiet_unordered\n" } } */ +-/* { dg-final { scan-assembler "compare_quiet_ordered:.*\txvfcmp\\.cor\\.s\t\\\$xr2,\\\$xr0,\\\$xr1.*-compare_quiet_ordered\n" } } */ ++/* ++** compare_quiet_equal: ++** xvld (\$xr0-9+),\$r4,0 ++** xvld (\$xr0-9+),\$r5,0 ++** xvfcmp.ceq.s (\$xr0-9+),(\1,\2|\2,\1) ++** xvst \3,\$r6,0 ++** jr \$r1 ++*/ ++ ++/* ++** compare_quiet_not_equal: ++** xvld (\$xr0-9+),\$r4,0 ++** xvld (\$xr0-9+),\$r5,0 ++** xvfcmp.cune.s (\$xr0-9+),(\1,\2|\2,\1) ++** xvst \3,\$r6,0 ++** jr \$r1 ++*/ ++ ++/* ++** compare_signaling_greater: ++** xvld (\$xr0-9+),\$r4,0 ++** xvld (\$xr0-9+),\$r5,0 ++** xvfcmp.slt.s (\$xr0-9+),\2,\1 ++** xvst \3,\$r6,0 ++** jr \$r1 ++*/ ++ ++/* ++** compare_signaling_greater_equal: ++** xvld (\$xr0-9+),\$r4,0 ++** xvld (\$xr0-9+),\$r5,0 ++** xvfcmp.sle.s (\$xr0-9+),\2,\1 ++** xvst \3,\$r6,0 ++** jr \$r1 ++*/ ++ ++/* ++** compare_signaling_less: ++** xvld (\$xr0-9+),\$r4,0 ++** xvld (\$xr0-9+),\$r5,0 ++** xvfcmp.slt.s (\$xr0-9+),\1,\2 ++** xvst \3,\$r6,0 ++** jr \$r1 ++*/ ++ ++/* ++** compare_signaling_less_equal: ++** xvld (\$xr0-9+),\$r4,0 ++** xvld (\$xr0-9+),\$r5,0 ++** xvfcmp.sle.s (\$xr0-9+),\1,\2 ++** xvst \3,\$r6,0 ++** jr \$r1 ++*/ ++ ++/* ++** compare_signaling_not_greater: ++** xvld (\$xr0-9+),\$r4,0 ++** xvld (\$xr0-9+),\$r5,0 ++** xvfcmp.sule.s (\$xr0-9+),\1,\2 ++** xvst \3,\$r6,0 ++** jr \$r1 ++*/ ++ ++/* ++** compare_signaling_less_unordered: ++** xvld (\$xr0-9+),\$r4,0 ++** xvld (\$xr0-9+),\$r5,0 ++** xvfcmp.sult.s (\$xr0-9+),\1,\2 ++** xvst \3,\$r6,0 ++** jr \$r1 ++*/ ++ ++/* ++** compare_signaling_not_less: ++** xvld (\$xr0-9+),\$r4,0 ++** xvld (\$xr0-9+),\$r5,0 ++** xvfcmp.sule.s (\$xr0-9+),\2,\1 ++** xvst \3,\$r6,0 ++** jr \$r1 ++*/ ++ ++/* ++** compare_signaling_greater_unordered: ++** xvld (\$xr0-9+),\$r4,0 ++** xvld (\$xr0-9+),\$r5,0 ++** xvfcmp.sult.s (\$xr0-9+),\2,\1 ++** xvst \3,\$r6,0 ++** jr \$r1 ++*/ ++ ++/* ++** compare_quiet_less: ++** xvld (\$xr0-9+),\$r4,0 ++** xvld (\$xr0-9+),\$r5,0 ++** xvfcmp.clt.s (\$xr0-9+),\1,\2 ++** xvst \3,\$r6,0 ++** jr \$r1 ++*/ ++ ++/* ++** compare_quiet_less_equal: ++** xvld (\$xr0-9+),\$r4,0 ++** xvld (\$xr0-9+),\$r5,0 ++** xvfcmp.cle.s (\$xr0-9+),\1,\2 ++** xvst \3,\$r6,0 ++** jr \$r1 ++*/ ++ ++/* ++** compare_quiet_greater: ++** xvld (\$xr0-9+),\$r4,0 ++** xvld (\$xr0-9+),\$r5,0 ++** xvfcmp.clt.s (\$xr0-9+),\2,\1 ++** xvst \3,\$r6,0 ++** jr \$r1 ++*/ ++ ++/* ++** compare_quiet_greater_equal: ++** xvld (\$xr0-9+),\$r4,0 ++** xvld (\$xr0-9+),\$r5,0 ++** xvfcmp.cle.s (\$xr0-9+),\2,\1 ++** xvst \3,\$r6,0 ++** jr \$r1 ++*/ ++ ++/* ++** compare_quiet_not_less: ++** xvld (\$xr0-9+),\$r4,0 ++** xvld (\$xr0-9+),\$r5,0 ++** xvfcmp.cule.s (\$xr0-9+),\2,\1 ++** xvst \3,\$r6,0 ++** jr \$r1 ++*/ ++ ++/* ++** compare_quiet_greater_unordered: ++** xvld (\$xr0-9+),\$r4,0 ++** xvld (\$xr0-9+),\$r5,0 ++** xvfcmp.cult.s (\$xr0-9+),\2,\1 ++** xvst \3,\$r6,0 ++** jr \$r1 ++*/ ++ ++/* ++** compare_quiet_not_greater: ++** xvld (\$xr0-9+),\$r4,0 ++** xvld (\$xr0-9+),\$r5,0 ++** xvfcmp.cule.s (\$xr0-9+),\1,\2 ++** xvst \3,\$r6,0 ++** jr \$r1 ++*/ ++ ++/* ++** compare_quiet_less_unordered: ++** xvld (\$xr0-9+),\$r4,0 ++** xvld (\$xr0-9+),\$r5,0 ++** xvfcmp.cult.s (\$xr0-9+),\1,\2 ++** xvst \3,\$r6,0 ++** jr \$r1 ++*/ ++ ++/* ++** compare_quiet_unordered: ++** xvld (\$xr0-9+),\$r4,0 ++** xvld (\$xr0-9+),\$r5,0 ++** xvfcmp.cun.s (\$xr0-9+),(\1,\2|\2,\1) ++** xvst \3,\$r6,0 ++** jr \$r1 ++*/ ++ ++/* ++** compare_quiet_ordered: ++** xvld (\$xr0-9+),\$r4,0 ++** xvld (\$xr0-9+),\$r5,0 ++** xvfcmp.cor.s (\$xr0-9+),(\1,\2|\2,\1) ++** xvst \3,\$r6,0 ++** jr \$r1 ++*/ +-- +2.43.0 +
View file
_service:tar_scm:0143-Backport-SME-mode-switching-Add-a-backprop-hook.patch
Added
@@ -0,0 +1,483 @@ +From cb4189b45a3a411958ab6aa85108f6dc7516acf5 Mon Sep 17 00:00:00 2001 +From: Richard Sandiford <richard.sandiford@arm.com> +Date: Sat, 11 Nov 2023 17:29:00 +0000 +Subject: PATCH 044/157 BackportSME mode-switching: Add a backprop hook + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=fc8458e20a524d053f576d64a606e21f8bd03b84 + +This patch adds a way for targets to ask that selected mode changes +be brought forward, through a combination of: + +(1) requiring a mode in blocks where the entity was previously + transparent + +(2) pushing the transition at the head of a block onto incomging edges + +SME has two uses for this: + +- A "one-shot" entity that, for any given path of execution, + either stays off or makes exactly one transition from off to on. + This relies only on (1) above; see the hook description for more info. + + The main purpose of using mode-switching for this entity is to + shrink-wrap the code that requires it. + +- A second entity for which all transitions must be from known + modes, which is enforced using a combination of (1) and (2). + More specifically, (1) looks for edges B1->B2 for which: + + - B2 requires a specific mode and + - B1 does not guarantee a specific starting mode + + In this system, such an edge is only possible if the entity is + transparent in B1. (1) then forces B1 to require some safe common + mode. Applying this inductively means that all incoming edges are + from known modes. If different edges give different starting modes, + (2) pushes the transitions onto the edges themselves; this only + happens if the entity is not transparent in some predecessor block. + +The patch also uses the back-propagation as an excuse to do a simple +on-the-fly optimisation. + +Hopefully the comments in the patch explain things a bit better. + +gcc/ + * target.def (mode_switching.backprop): New hook. + * doc/tm.texi.in (TARGET_MODE_BACKPROP): New @hook. + * doc/tm.texi: Regenerate. + * mode-switching.cc (struct bb_info): Add single_succ. + (confluence_info): Add transp field. + (single_succ_confluence_n, single_succ_transfer): New functions. + (backprop_confluence_n, backprop_transfer): Likewise. + (optimize_mode_switching): Use them. Push mode transitions onto + a block's incoming edges, if the backprop hook requires it. +--- + gcc/doc/tm.texi | 28 +++++ + gcc/doc/tm.texi.in | 2 + + gcc/mode-switching.cc | 275 ++++++++++++++++++++++++++++++++++++++++++ + gcc/target.def | 29 +++++ + 4 files changed, 334 insertions(+) + +diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi +index d7053ec9e..5f0972356 100644 +--- a/gcc/doc/tm.texi ++++ b/gcc/doc/tm.texi +@@ -10322,6 +10322,34 @@ The hook should return the number of modes if no suitable mode exists + for the given arguments. + @end deftypefn + ++@deftypefn {Target Hook} int TARGET_MODE_BACKPROP (int @var{entity}, int @var{mode1}, int @var{mode2}) ++If defined, the mode-switching pass uses this hook to back-propagate mode ++requirements through blocks that have no mode requirements of their own. ++Specifically, @var{mode1} is the mode that @var{entity} has on exit ++from a block B1 (say) and @var{mode2} is the mode that the next block ++requires @var{entity} to have. B1 does not have any mode requirements ++of its own. ++ ++The hook should return the mode that it prefers or requires @var{entity} ++to have in B1, or the number of modes if there is no such requirement. ++If the hook returns a required mode for more than one of B1's outgoing ++edges, those modes are combined as for @code{TARGET_MODE_CONFLUENCE}. ++ ++For example, suppose there is a ``one-shot'' entity that, ++for a given execution of a function, either stays off or makes exactly ++one transition from off to on. It is safe to make the transition at any ++time, but it is better not to do so unnecessarily. This hook allows the ++function to manage such an entity without having to track its state at ++runtime. Specifically. the entity would have two modes, 0 for off and ++1 for on, with 2 representing ``don't know''. The system is forbidden from ++transitioning from 2 to 1, since 2 represents the possibility that the ++entity is already on (and the aim is to avoid having to emit code to ++check for that case). This hook would therefore return 1 when @var{mode1} ++is 2 and @var{mode2} is 1, which would force the entity to be on in the ++source block. Applying this inductively would remove all transitions ++in which the previous state is unknown. ++@end deftypefn ++ + @deftypefn {Target Hook} int TARGET_MODE_ENTRY (int @var{entity}) + If this hook is defined, it is evaluated for every @var{entity} that + needs mode switching. It should return the mode that @var{entity} is +diff --git a/gcc/doc/tm.texi.in b/gcc/doc/tm.texi.in +index d420e62fd..fcab21744 100644 +--- a/gcc/doc/tm.texi.in ++++ b/gcc/doc/tm.texi.in +@@ -6924,6 +6924,8 @@ mode or ``no mode'', depending on context. + + @hook TARGET_MODE_CONFLUENCE + ++@hook TARGET_MODE_BACKPROP ++ + @hook TARGET_MODE_ENTRY + + @hook TARGET_MODE_EXIT +diff --git a/gcc/mode-switching.cc b/gcc/mode-switching.cc +index 065767902..c2a0f0294 100644 +--- a/gcc/mode-switching.cc ++++ b/gcc/mode-switching.cc +@@ -81,6 +81,7 @@ struct bb_info + int computing; + int mode_out; + int mode_in; ++ int single_succ; + }; + + /* Clear ode I from entity J in bitmap B. */ +@@ -508,6 +509,9 @@ struct + /* Information about each basic block, indexed by block id. */ + struct bb_info *bb_info; + ++ /* A bitmap of blocks for which the current entity is transparent. */ ++ sbitmap transp; ++ + /* The entity that we're processing. */ + int entity; + +@@ -579,6 +583,210 @@ forward_transfer (int bb_index) + return true; + } + ++/* A backwards confluence function. Update the the bb_info single_succ ++ field for E's source block, based on changes to E's destination block. ++ At the end of the dataflow problem, single_succ is the single mode ++ that all successors require (directly or indirectly), or no_mode ++ if there are conflicting requirements. ++ ++ Initially, a value of no_mode + 1 means "don't know". */ ++ ++static bool ++single_succ_confluence_n (edge e) ++{ ++ /* The entry block has no associated mode information. */ ++ if (e->src->index == ENTRY_BLOCK) ++ return false; ++ ++ /* We don't control mode changes across abnormal edges. */ ++ if (e->flags & EDGE_ABNORMAL) ++ return false; ++ ++ /* Do nothing if we've already found a conflict. */ ++ struct bb_info *bb_info = confluence_info.bb_info; ++ int no_mode = confluence_info.no_mode; ++ int src_mode = bb_infoe->src->index.single_succ; ++ if (src_mode == no_mode) ++ return false; ++ ++ /* Work out what mode the destination block (or its successors) require. */ ++ int dest_mode; ++ if (e->dest->index == EXIT_BLOCK) ++ dest_mode = no_mode; ++ else if (bitmap_bit_p (confluence_info.transp, e->dest->index)) ++ dest_mode = bb_infoe->dest->index.single_succ; ++ else ++ dest_mode = bb_infoe->dest->index.seginfo->mode; ++ ++ /* Do nothing if the destination block has no new information. */ ++ if (dest_mode == no_mode + 1 || dest_mode == src_mode) ++ return false; ++ ++ /* Detect conflicting modes. */ ++ if (src_mode != no_mode + 1) ++ dest_mode = no_mode; ++ ++ bb_infoe->src->index.single_succ = dest_mode; ++ return true; ++} ++ ++/* A backward transfer function for computing the bb_info single_succ ++ fields, as described above single_succ_confluence. */ ++ ++static bool ++single_succ_transfer (int bb_index) ++{ ++ /* We don't have any field to transfer to. Assume that, after the ++ first iteration, we are only called if single_succ has changed. ++ We should then process incoming edges if the entity is transparent. */ ++ return bitmap_bit_p (confluence_info.transp, bb_index); ++} ++ ++/* Check whether the target wants to back-propagate a mode change across ++ edge E, and update the source block's computed mode if so. Return true ++ if something changed. */ ++ ++static bool ++backprop_confluence_n (edge e) ++{ ++ /* The entry and exit blocks have no useful mode information. */ ++ if (e->src->index == ENTRY_BLOCK || e->dest->index == EXIT_BLOCK) ++ return false; ++ ++ /* We don't control mode changes across abnormal edges. */ ++ if (e->flags & EDGE_ABNORMAL) ++ return false; ++ ++ /* We can only require a new mode in the source block if the entity ++ was originally transparent there. */ ++ if (!bitmap_bit_p (confluence_info.transp, e->src->index)) ++ return false; ++ ++ /* Exit now if there is no required mode, or if all paths into the ++ source block leave the entity in the required mode. */ ++ struct bb_info *bb_info = confluence_info.bb_info; ++ int no_mode = confluence_info.no_mode; ++ int src_mode = bb_infoe->src->index.mode_out; ++ int dest_mode = bb_infoe->dest->index.mode_in; ++ if (dest_mode == no_mode || src_mode == dest_mode) ++ return false; ++ ++ /* See what the target thinks about this transition. */ ++ int entity = confluence_info.entity; ++ int new_mode = targetm.mode_switching.backprop (entity, src_mode, ++ dest_mode); ++ if (new_mode == no_mode) ++ return false; ++ ++ /* The target doesn't like the current transition, but would be happy ++ with a transition from NEW_MODE. ++ ++ If we force the source block to use NEW_MODE, we might introduce a ++ double transition on at least one path through the function (one to ++ NEW_MODE and then one to DEST_MODE). Therefore, if all destination ++ blocks require the same mode, it is usually better to bring that ++ mode requirement forward. ++ ++ If that isn't possible, merge the preference for this edge with ++ the preferences for other edges. no_mode + 1 indicates that there ++ was no previous preference. */ ++ int old_mode = bb_infoe->src->index.computing; ++ if (bb_infoe->src->index.single_succ != no_mode) ++ new_mode = bb_infoe->src->index.single_succ; ++ else if (old_mode != no_mode + 1) ++ new_mode = mode_confluence (entity, old_mode, new_mode, no_mode); ++ ++ if (old_mode == new_mode) ++ return false; ++ ++ bb_infoe->src->index.computing = new_mode; ++ return true; ++} ++ ++/* If the current entity was originally transparent in block BB_INDEX, ++ update the incoming mode to match the outgoing mode. Register a mode ++ change if the entity is no longer transparent. ++ ++ Also, as an on-the-fly optimization, check whether the entity was ++ originally transparent in BB_INDEX and if all successor blocks require ++ the same mode. If so, anticipate the mode change in BB_INDEX if ++ doing it on the incoming edges would require no more mode changes than ++ doing it on the outgoing edges. The aim is to reduce the total number ++ of mode changes emitted for the function (and thus reduce code size and ++ cfg complexity) without increasing the number of mode changes on any ++ given path through the function. A typical case where it helps is: ++ ++ T ++ / \ ++ T M ++ \ / ++ M ++ ++ where the entity is transparent in the T blocks and is required to have ++ mode M in the M blocks. If there are no redundancies leading up to this, ++ there will be two mutually-exclusive changes to mode M, one on each of ++ the T->M edges. The optimization instead converts it to: ++ ++ T T M ++ / \ / \ / \ ++ T M -> M M -> M M ++ \ / \ / \ / ++ M M M ++ ++ which creates a single transition to M for both paths through the diamond. ++ ++ Return true if something changed. */ ++ ++static bool ++backprop_transfer (int bb_index) ++{ ++ /* The entry and exit blocks have no useful mode information. */ ++ if (bb_index == ENTRY_BLOCK || bb_index == EXIT_BLOCK) ++ return false; ++ ++ /* We can only require a new mode if the entity was previously ++ transparent. */ ++ if (!bitmap_bit_p (confluence_info.transp, bb_index)) ++ return false; ++ ++ struct bb_info *bb_info = confluence_info.bb_info; ++ basic_block bb = BASIC_BLOCK_FOR_FN (cfun, bb_index); ++ int no_mode = confluence_info.no_mode; ++ int mode_in = bb_infobb_index.mode_in; ++ int mode_out = bb_infobb_index.computing; ++ if (mode_out == no_mode + 1) ++ { ++ /* The entity is still transparent for this block. See whether ++ all successor blocks need the same mode, either directly or ++ indirectly. */ ++ mode_out = bb_infobb_index.single_succ; ++ if (mode_out == no_mode) ++ return false; ++ ++ /* Get a minimum bound on the number of transitions that would be ++ removed if BB itself required MODE_OUT. */ ++ unsigned int moved = 0; ++ for (edge e : bb->succs) ++ if (e->dest->index != EXIT_BLOCK ++ && mode_out == bb_infoe->dest->index.seginfo->mode) ++ moved += 1; ++ ++ /* See whether making the mode change on all incoming edges would ++ be no worse than making it on MOVED outgoing edges. */ ++ if (moved < EDGE_COUNT (bb->preds)) ++ return false; ++ ++ bb_infobb_index.mode_out = mode_out; ++ bb_infobb_index.computing = mode_out; ++ } ++ else if (mode_out == mode_in) ++ return false; ++ ++ bb_infobb_index.mode_in = mode_out; ++ bb_infobb_index.seginfo->mode = mode_out; ++ return true; ++} ++ + /* Find all insns that need a particular mode setting, and insert the + necessary mode switches. Return true if we did work. */ + +@@ -684,6 +892,7 @@ optimize_mode_switching (void) + } + + confluence_info.bb_info = info; ++ confluence_info.transp = nullptr; + confluence_info.entity = entity; + confluence_info.no_mode = no_mode; + +@@ -695,6 +904,9 @@ optimize_mode_switching (void) + + }; + ++ if (targetm.mode_switching.backprop) ++ clear_aux_for_edges (); ++ + for (j = n_entities - 1; j >= 0; j--) + { + int e = entity_mapj; +@@ -817,6 +1029,53 @@ optimize_mode_switching (void) + } + } + ++ /* If the target requests it, back-propagate selected mode requirements ++ through transparent blocks. */ ++ if (targetm.mode_switching.backprop) ++ { ++ /* First work out the mode on entry to and exit from each block. */ ++ forwprop_mode_info (info, e, no_mode); ++ ++ /* Compute the single_succ fields, as described above ++ single_succ_confluence. */ ++ FOR_EACH_BB_FN (bb, cfun) ++ infobb->index.single_succ = no_mode + 1; ++ ++ confluence_info.transp = transp_all; ++ bitmap_set_range (blocks, 0, last_basic_block_for_fn (cfun)); ++ df_simple_dataflow (DF_BACKWARD, NULL, NULL, ++ single_succ_confluence_n, ++ single_succ_transfer, blocks, ++ df_get_postorder (DF_BACKWARD), ++ df_get_n_blocks (DF_BACKWARD)); ++ ++ FOR_EACH_BB_FN (bb, cfun) ++ { ++ /* Repurpose mode_in as the first mode required by the block, ++ or the output mode if none. */ ++ if (infobb->index.seginfo->mode != no_mode) ++ infobb->index.mode_in = infobb->index.seginfo->mode; ++ ++ /* In transparent blocks, use computing == no_mode + 1 ++ to indicate that no propagation has taken place. */ ++ if (infobb->index.computing == no_mode) ++ infobb->index.computing = no_mode + 1; ++ } ++ ++ bitmap_set_range (blocks, 0, last_basic_block_for_fn (cfun)); ++ df_simple_dataflow (DF_BACKWARD, NULL, NULL, backprop_confluence_n, ++ backprop_transfer, blocks, ++ df_get_postorder (DF_BACKWARD), ++ df_get_n_blocks (DF_BACKWARD)); ++ ++ /* Any block that now computes a mode is no longer transparent. */ ++ FOR_EACH_BB_FN (bb, cfun) ++ if (infobb->index.computing == no_mode + 1) ++ infobb->index.computing = no_mode; ++ else if (infobb->index.computing != no_mode) ++ bitmap_clear_bit (transp_all, bb->index); ++ } ++ + /* Set the anticipatable and computing arrays. */ + for (i = 0; i < no_mode; i++) + { +@@ -900,6 +1159,22 @@ optimize_mode_switching (void) + for (i = 0; i < no_mode; i++) + if (mode_bit_p (delbb->index, j, i)) + infobb->index.seginfo->mode = no_mode; ++ ++ /* See whether the target can perform the first transition. ++ If not, push it onto the incoming edges. The earlier backprop ++ pass should ensure that the resulting transitions are valid. */ ++ if (targetm.mode_switching.backprop) ++ { ++ int from_mode = infobb->index.mode_in; ++ int to_mode = infobb->index.seginfo->mode; ++ if (targetm.mode_switching.backprop (entity_mapj, from_mode, ++ to_mode) != no_mode) ++ { ++ for (edge e : bb->preds) ++ e->aux = (void *) (intptr_t) (to_mode + 1); ++ infobb->index.mode_in = to_mode; ++ } ++ } + } + + /* Now output the remaining mode sets in all the segments. */ +diff --git a/gcc/target.def b/gcc/target.def +index 1e2091ed3..4d77c1523 100644 +--- a/gcc/target.def ++++ b/gcc/target.def +@@ -7042,6 +7042,35 @@ The hook should return the number of modes if no suitable mode exists\n\ + for the given arguments.", + int, (int entity, int mode1, int mode2), NULL) + ++DEFHOOK ++(backprop, ++ "If defined, the mode-switching pass uses this hook to back-propagate mode\n\ ++requirements through blocks that have no mode requirements of their own.\n\ ++Specifically, @var{mode1} is the mode that @var{entity} has on exit\n\ ++from a block B1 (say) and @var{mode2} is the mode that the next block\n\ ++requires @var{entity} to have. B1 does not have any mode requirements\n\ ++of its own.\n\ ++\n\ ++The hook should return the mode that it prefers or requires @var{entity}\n\ ++to have in B1, or the number of modes if there is no such requirement.\n\ ++If the hook returns a required mode for more than one of B1's outgoing\n\ ++edges, those modes are combined as for @code{TARGET_MODE_CONFLUENCE}.\n\ ++\n\ ++For example, suppose there is a ``one-shot'' entity that,\n\ ++for a given execution of a function, either stays off or makes exactly\n\ ++one transition from off to on. It is safe to make the transition at any\n\ ++time, but it is better not to do so unnecessarily. This hook allows the\n\ ++function to manage such an entity without having to track its state at\n\ ++runtime. Specifically. the entity would have two modes, 0 for off and\n\ ++1 for on, with 2 representing ``don't know''. The system is forbidden from\n\ ++transitioning from 2 to 1, since 2 represents the possibility that the\n\ ++entity is already on (and the aim is to avoid having to emit code to\n\ ++check for that case). This hook would therefore return 1 when @var{mode1}\n\ ++is 2 and @var{mode2} is 1, which would force the entity to be on in the\n\ ++source block. Applying this inductively would remove all transitions\n\ ++in which the previous state is unknown.", ++ int, (int entity, int mode1, int mode2), NULL) ++ + DEFHOOK + (entry, + "If this hook is defined, it is evaluated for every @var{entity} that\n\ +-- +2.33.0 +
View file
_service:tar_scm:0143-LoongArch-Use-lib-instead-of-lib64-as-the-library-se.patch
Added
@@ -0,0 +1,80 @@ +From 415d38d84b2e363a2d512b54baac5532553f1402 Mon Sep 17 00:00:00 2001 +From: Yang Yujie <yangyujie@loongson.cn> +Date: Wed, 6 Mar 2024 09:19:59 +0800 +Subject: PATCH 143/188 LoongArch: Use /lib instead of /lib64 as the library + search path for MUSL. + +gcc/ChangeLog: + + * config.gcc: Add a case for loongarch*-*-linux-musl*. + * config/loongarch/linux.h: Disable the multilib-compatible + treatment for *musl* targets. + * config/loongarch/musl.h: New file. +--- + gcc/config.gcc | 3 +++ + gcc/config/loongarch/linux.h | 4 +++- + gcc/config/loongarch/musl.h | 23 +++++++++++++++++++++++ + 3 files changed, 29 insertions(+), 1 deletion(-) + create mode 100644 gcc/config/loongarch/musl.h + +diff --git a/gcc/config.gcc b/gcc/config.gcc +index 039187fa2..499b36b45 100644 +--- a/gcc/config.gcc ++++ b/gcc/config.gcc +@@ -2509,6 +2509,9 @@ riscv*-*-freebsd*) + + loongarch*-*-linux*) + tm_file="elfos.h gnu-user.h linux.h linux-android.h glibc-stdint.h ${tm_file}" ++ case ${target} in ++ *-linux-musl*) tm_file="${tm_file} loongarch/musl.h" ++ esac + tm_file="${tm_file} loongarch/gnu-user.h loongarch/linux.h loongarch/loongarch-driver.h" + extra_options="${extra_options} linux-android.opt" + tmake_file="${tmake_file} loongarch/t-multilib loongarch/t-linux" +diff --git a/gcc/config/loongarch/linux.h b/gcc/config/loongarch/linux.h +index 00039ac18..38aa4da2c 100644 +--- a/gcc/config/loongarch/linux.h ++++ b/gcc/config/loongarch/linux.h +@@ -21,7 +21,9 @@ along with GCC; see the file COPYING3. If not see + * This ensures that a compiler configured with --disable-multilib + * can work in a multilib environment. */ + +-#if defined(LA_DISABLE_MULTILIB) && defined(LA_DISABLE_MULTIARCH) ++#if !defined(LA_DEFAULT_TARGET_MUSL) \ ++ && defined(LA_DISABLE_MULTILIB) \ ++ && defined(LA_DISABLE_MULTIARCH) + + #if DEFAULT_ABI_BASE == ABI_BASE_LP64D + #define ABI_LIBDIR "lib64" +diff --git a/gcc/config/loongarch/musl.h b/gcc/config/loongarch/musl.h +new file mode 100644 +index 000000000..fa43bc866 +--- /dev/null ++++ b/gcc/config/loongarch/musl.h +@@ -0,0 +1,23 @@ ++/* Definitions for MUSL C library support. ++ Copyright (C) 2024 Free Software Foundation, Inc. ++ ++This file is part of GCC. ++ ++GCC is free software; you can redistribute it and/or modify ++it under the terms of the GNU General Public License as published by ++the Free Software Foundation; either version 3, or (at your option) ++any later version. ++ ++GCC is distributed in the hope that it will be useful, ++but WITHOUT ANY WARRANTY; without even the implied warranty of ++MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++GNU General Public License for more details. ++ ++You should have received a copy of the GNU General Public License ++along with GCC; see the file COPYING3. If not see ++<http://www.gnu.org/licenses/>. */ ++ ++ ++#ifndef LA_DEFAULT_TARGET_MUSL ++#define LA_DEFAULT_TARGET_MUSL ++#endif +-- +2.43.0 +
View file
_service:tar_scm:0144-Backport-SME-aarch64-Add-a-result_mode-helper-functi.patch
Added
@@ -0,0 +1,81 @@ +From 4553f252c10968037edceba4abe3984dc9bbad2a Mon Sep 17 00:00:00 2001 +From: Richard Sandiford <richard.sandiford@arm.com> +Date: Fri, 1 Dec 2023 08:36:15 +0000 +Subject: PATCH 045/157 BackportSME aarch64: Add a result_mode helper + function + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=a1bc121c00e30bd1bdaa62d87cbe64eb88e74f45 + +SME will add more intrinsics whose expansion code requires +the mode of the function return value. This patch adds an +associated helper routine. + +gcc/ + * config/aarch64/aarch64-sve-builtins.h + (function_expander::result_mode): New member function. + * config/aarch64/aarch64-sve-builtins-base.cc + (svld234_impl::expand): Use it. + * config/aarch64/aarch64-sve-builtins.cc + (function_expander::get_reg_target): Likewise. +--- + gcc/config/aarch64/aarch64-sve-builtins-base.cc | 2 +- + gcc/config/aarch64/aarch64-sve-builtins.cc | 2 +- + gcc/config/aarch64/aarch64-sve-builtins.h | 9 +++++++++ + 3 files changed, 11 insertions(+), 2 deletions(-) + +diff --git a/gcc/config/aarch64/aarch64-sve-builtins-base.cc b/gcc/config/aarch64/aarch64-sve-builtins-base.cc +index 56c9d75e7..c9bf13792 100644 +--- a/gcc/config/aarch64/aarch64-sve-builtins-base.cc ++++ b/gcc/config/aarch64/aarch64-sve-builtins-base.cc +@@ -1272,7 +1272,7 @@ public: + rtx + expand (function_expander &e) const OVERRIDE + { +- machine_mode tuple_mode = TYPE_MODE (TREE_TYPE (e.call_expr)); ++ machine_mode tuple_mode = e.result_mode (); + insn_code icode = convert_optab_handler (vec_mask_load_lanes_optab, + tuple_mode, e.vector_mode (0)); + return e.use_contiguous_load_insn (icode); +diff --git a/gcc/config/aarch64/aarch64-sve-builtins.cc b/gcc/config/aarch64/aarch64-sve-builtins.cc +index e168c8334..91af96687 100644 +--- a/gcc/config/aarch64/aarch64-sve-builtins.cc ++++ b/gcc/config/aarch64/aarch64-sve-builtins.cc +@@ -2796,7 +2796,7 @@ function_expander::get_fallback_value (machine_mode mode, unsigned int nops, + rtx + function_expander::get_reg_target () + { +- machine_mode target_mode = TYPE_MODE (TREE_TYPE (TREE_TYPE (fndecl))); ++ machine_mode target_mode = result_mode (); + if (!possible_target || GET_MODE (possible_target) != target_mode) + possible_target = gen_reg_rtx (target_mode); + return possible_target; +diff --git a/gcc/config/aarch64/aarch64-sve-builtins.h b/gcc/config/aarch64/aarch64-sve-builtins.h +index 0d130b871..52994cde0 100644 +--- a/gcc/config/aarch64/aarch64-sve-builtins.h ++++ b/gcc/config/aarch64/aarch64-sve-builtins.h +@@ -528,6 +528,8 @@ public: + insn_code direct_optab_handler_for_sign (optab, optab, unsigned int = 0, + machine_mode = E_VOIDmode); + ++ machine_mode result_mode () const; ++ + bool overlaps_input_p (rtx); + + rtx convert_to_pmode (rtx); +@@ -877,6 +879,13 @@ function_base::call_properties (const function_instance &instance) const + return flags; + } + ++/* Return the mode of the result of a call. */ ++inline machine_mode ++function_expander::result_mode () const ++{ ++ return TYPE_MODE (TREE_TYPE (TREE_TYPE (fndecl))); ++} ++ + } + + #endif +-- +2.33.0 +
View file
_service:tar_scm:0144-LoongArch-testsuite-Fix-problems-with-incorrect-resu.patch
Added
@@ -0,0 +1,551 @@ +From 2170e0e811cb1b592f7577571f10b5ab95da9eaa Mon Sep 17 00:00:00 2001 +From: chenxiaolong <chenxiaolong@loongson.cn> +Date: Fri, 25 Oct 2024 06:05:59 +0000 +Subject: PATCH 144/188 LoongArch: testsuite:Fix problems with incorrect + results in vector test cases. + +In simd_correctness_check.h, the role of the macro ASSERTEQ_64 is to check the +result of the passed vector values for the 64-bit data of each array element. +It turns out that it uses the abs() function to check only the lower 32 bits +of the data at a time, so it replaces abs() with the llabs() function. + +However, the following two problems may occur after modification: + +1.FAIL in lasx-xvfrint_s.c and lsx-vfrint_s.c +The reason for the error is because vector test cases that use __m{128,256} to +define vector types are composed of 32-bit primitive types, they should use +ASSERTEQ_32 instead of ASSERTEQ_64 to check for correctness. + +2.FAIL in lasx-xvshuf_b.c and lsx-vshuf.c +The cause of the error is that the expected result of the function setting in +the test case is incorrect. + +gcc/testsuite/ChangeLog: + + * gcc.target/loongarch/vector/lasx/lasx-xvfrint_s.c: Replace + ASSERTEQ_64 with the macro ASSERTEQ_32. + * gcc.target/loongarch/vector/lasx/lasx-xvshuf_b.c: Modify the expected + test results of some functions according to the function of the vector + instruction. + * gcc.target/loongarch/vector/lsx/lsx-vfrint_s.c: Same + modification as lasx-xvfrint_s.c. + * gcc.target/loongarch/vector/lsx/lsx-vshuf.c: Same + modification as lasx-xvshuf_b.c. + * gcc.target/loongarch/vector/simd_correctness_check.h: Use the llabs() + function instead of abs() to check the correctness of the results. +--- + .../loongarch/vector/lasx/lasx-xvfrint_s.c | 58 +++++++++---------- + .../loongarch/vector/lsx/lsx-vfrint_s.c | 50 ++++++++-------- + .../loongarch/vector/simd_correctness_check.h | 2 +- + 3 files changed, 55 insertions(+), 55 deletions(-) + +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfrint_s.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfrint_s.c +index fbfe300ea..4538528a6 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfrint_s.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfrint_s.c +@@ -184,7 +184,7 @@ main () + *((int *)&__m256_result1) = 0x00000000; + *((int *)&__m256_result0) = 0x00000000; + __m256_out = __lasx_xvfrintrne_s (__m256_op0); +- ASSERTEQ_64 (__LINE__, __m256_result, __m256_out); ++ ASSERTEQ_32 (__LINE__, __m256_result, __m256_out); + + *((int *)&__m256_op07) = 0xffffffff; + *((int *)&__m256_op06) = 0xffffffff; +@@ -203,7 +203,7 @@ main () + *((int *)&__m256_result1) = 0x00000000; + *((int *)&__m256_result0) = 0x00000000; + __m256_out = __lasx_xvfrintrne_s (__m256_op0); +- ASSERTEQ_64 (__LINE__, __m256_result, __m256_out); ++ ASSERTEQ_32 (__LINE__, __m256_result, __m256_out); + + *((int *)&__m256_op07) = 0xffffffff; + *((int *)&__m256_op06) = 0xffffffff; +@@ -222,7 +222,7 @@ main () + *((int *)&__m256_result1) = 0xffffffff; + *((int *)&__m256_result0) = 0xffffffff; + __m256_out = __lasx_xvfrintrne_s (__m256_op0); +- ASSERTEQ_64 (__LINE__, __m256_result, __m256_out); ++ ASSERTEQ_32 (__LINE__, __m256_result, __m256_out); + + *((int *)&__m256_op07) = 0x01010101; + *((int *)&__m256_op06) = 0x01010101; +@@ -241,7 +241,7 @@ main () + *((int *)&__m256_result1) = 0x00000000; + *((int *)&__m256_result0) = 0x00000000; + __m256_out = __lasx_xvfrintrne_s (__m256_op0); +- ASSERTEQ_64 (__LINE__, __m256_result, __m256_out); ++ ASSERTEQ_32 (__LINE__, __m256_result, __m256_out); + + *((int *)&__m256_op07) = 0x00000000; + *((int *)&__m256_op06) = 0x00000000; +@@ -260,7 +260,7 @@ main () + *((int *)&__m256_result1) = 0x00000000; + *((int *)&__m256_result0) = 0x00000000; + __m256_out = __lasx_xvfrintrne_s (__m256_op0); +- ASSERTEQ_64 (__LINE__, __m256_result, __m256_out); ++ ASSERTEQ_32 (__LINE__, __m256_result, __m256_out); + + *((int *)&__m256_op07) = 0xffffffff; + *((int *)&__m256_op06) = 0xffffffff; +@@ -279,7 +279,7 @@ main () + *((int *)&__m256_result1) = 0x00000000; + *((int *)&__m256_result0) = 0x00000000; + __m256_out = __lasx_xvfrintrne_s (__m256_op0); +- ASSERTEQ_64 (__LINE__, __m256_result, __m256_out); ++ ASSERTEQ_32 (__LINE__, __m256_result, __m256_out); + + *((int *)&__m256_op07) = 0xffffffff; + *((int *)&__m256_op06) = 0xffffffff; +@@ -298,7 +298,7 @@ main () + *((int *)&__m256_result1) = 0xffffffff; + *((int *)&__m256_result0) = 0xffffffff; + __m256_out = __lasx_xvfrintrne_s (__m256_op0); +- ASSERTEQ_64 (__LINE__, __m256_result, __m256_out); ++ ASSERTEQ_32 (__LINE__, __m256_result, __m256_out); + + *((int *)&__m256_op07) = 0x01010101; + *((int *)&__m256_op06) = 0x01010101; +@@ -317,7 +317,7 @@ main () + *((int *)&__m256_result1) = 0x00000000; + *((int *)&__m256_result0) = 0x00000000; + __m256_out = __lasx_xvfrintrne_s (__m256_op0); +- ASSERTEQ_64 (__LINE__, __m256_result, __m256_out); ++ ASSERTEQ_32 (__LINE__, __m256_result, __m256_out); + + *((int *)&__m256_op07) = 0x55555555; + *((int *)&__m256_op06) = 0x36aaaaac; +@@ -336,7 +336,7 @@ main () + *((int *)&__m256_result1) = 0x55555555; + *((int *)&__m256_result0) = 0x80000000; + __m256_out = __lasx_xvfrintrp_s (__m256_op0); +- ASSERTEQ_64 (__LINE__, __m256_result, __m256_out); ++ ASSERTEQ_32 (__LINE__, __m256_result, __m256_out); + + *((int *)&__m256_op07) = 0x00000000; + *((int *)&__m256_op06) = 0x00000000; +@@ -355,7 +355,7 @@ main () + *((int *)&__m256_result1) = 0x00000000; + *((int *)&__m256_result0) = 0x00000000; + __m256_out = __lasx_xvfrintrp_s (__m256_op0); +- ASSERTEQ_64 (__LINE__, __m256_result, __m256_out); ++ ASSERTEQ_32 (__LINE__, __m256_result, __m256_out); + + *((int *)&__m256_op07) = 0xffffc741; + *((int *)&__m256_op06) = 0x8a023680; +@@ -374,7 +374,7 @@ main () + *((int *)&__m256_result1) = 0x00000000; + *((int *)&__m256_result0) = 0x00000000; + __m256_out = __lasx_xvfrintrp_s (__m256_op0); +- ASSERTEQ_64 (__LINE__, __m256_result, __m256_out); ++ ASSERTEQ_32 (__LINE__, __m256_result, __m256_out); + + *((int *)&__m256_op07) = 0x00000000; + *((int *)&__m256_op06) = 0xffffffff; +@@ -393,7 +393,7 @@ main () + *((int *)&__m256_result1) = 0x00000000; + *((int *)&__m256_result0) = 0xffffffff; + __m256_out = __lasx_xvfrintrp_s (__m256_op0); +- ASSERTEQ_64 (__LINE__, __m256_result, __m256_out); ++ ASSERTEQ_32 (__LINE__, __m256_result, __m256_out); + + *((int *)&__m256_op07) = 0x00200101; + *((int *)&__m256_op06) = 0x01610000; +@@ -412,7 +412,7 @@ main () + *((int *)&__m256_result1) = 0x3f800000; + *((int *)&__m256_result0) = 0x3f800000; + __m256_out = __lasx_xvfrintrp_s (__m256_op0); +- ASSERTEQ_64 (__LINE__, __m256_result, __m256_out); ++ ASSERTEQ_32 (__LINE__, __m256_result, __m256_out); + + *((int *)&__m256_op07) = 0x00000000; + *((int *)&__m256_op06) = 0x00000000; +@@ -431,7 +431,7 @@ main () + *((int *)&__m256_result1) = 0xfefefefe; + *((int *)&__m256_result0) = 0x3f800000; + __m256_out = __lasx_xvfrintrp_s (__m256_op0); +- ASSERTEQ_64 (__LINE__, __m256_result, __m256_out); ++ ASSERTEQ_32 (__LINE__, __m256_result, __m256_out); + + *((int *)&__m256_op07) = 0x1c1c1c1c; + *((int *)&__m256_op06) = 0x1c1c1c1c; +@@ -450,7 +450,7 @@ main () + *((int *)&__m256_result1) = 0xfffffffe; + *((int *)&__m256_result0) = 0xffffff00; + __m256_out = __lasx_xvfrintrp_s (__m256_op0); +- ASSERTEQ_64 (__LINE__, __m256_result, __m256_out); ++ ASSERTEQ_32 (__LINE__, __m256_result, __m256_out); + + *((int *)&__m256_op07) = 0x00000000; + *((int *)&__m256_op06) = 0x00000000; +@@ -469,7 +469,7 @@ main () + *((int *)&__m256_result1) = 0x00000000; + *((int *)&__m256_result0) = 0x00000000; + __m256_out = __lasx_xvfrintrm_s (__m256_op0); +- ASSERTEQ_64 (__LINE__, __m256_result, __m256_out); ++ ASSERTEQ_32 (__LINE__, __m256_result, __m256_out); + + *((int *)&__m256_op07) = 0x00000000; + *((int *)&__m256_op06) = 0x00000000; +@@ -488,7 +488,7 @@ main () + *((int *)&__m256_result1) = 0x00000000; + *((int *)&__m256_result0) = 0x00000000; + __m256_out = __lasx_xvfrintrm_s (__m256_op0); +- ASSERTEQ_64 (__LINE__, __m256_result, __m256_out); ++ ASSERTEQ_32 (__LINE__, __m256_result, __m256_out); + + *((int *)&__m256_op07) = 0xffffffff; + *((int *)&__m256_op06) = 0xffffffff; +@@ -507,7 +507,7 @@ main () + *((int *)&__m256_result1) = 0x00000000; + *((int *)&__m256_result0) = 0xffffffff; + __m256_out = __lasx_xvfrintrm_s (__m256_op0); +- ASSERTEQ_64 (__LINE__, __m256_result, __m256_out); ++ ASSERTEQ_32 (__LINE__, __m256_result, __m256_out); + + *((int *)&__m256_op07) = 0x5d20a0a1; + *((int *)&__m256_op06) = 0x5d20a0a1; +@@ -526,7 +526,7 @@ main () + *((int *)&__m256_result1) = 0x00000000; + *((int *)&__m256_result0) = 0x00000000; + __m256_out = __lasx_xvfrintrm_s (__m256_op0); +- ASSERTEQ_64 (__LINE__, __m256_result, __m256_out); ++ ASSERTEQ_32 (__LINE__, __m256_result, __m256_out); + + *((int *)&__m256_op07) = 0x00000000; + *((int *)&__m256_op06) = 0x001d001d; +@@ -545,7 +545,7 @@ main () + *((int *)&__m256_result1) = 0x00000000; + *((int *)&__m256_result0) = 0x00000000; + __m256_out = __lasx_xvfrintrm_s (__m256_op0); +- ASSERTEQ_64 (__LINE__, __m256_result, __m256_out); ++ ASSERTEQ_32 (__LINE__, __m256_result, __m256_out); + + *((int *)&__m256_op07) = 0x00000000; + *((int *)&__m256_op06) = 0x00000000; +@@ -564,7 +564,7 @@ main () + *((int *)&__m256_result1) = 0x00000000; + *((int *)&__m256_result0) = 0x00000000; + __m256_out = __lasx_xvfrintrm_s (__m256_op0); +- ASSERTEQ_64 (__LINE__, __m256_result, __m256_out); ++ ASSERTEQ_32 (__LINE__, __m256_result, __m256_out); + + *((int *)&__m256_op07) = 0x00000000; + *((int *)&__m256_op06) = 0x00000000; +@@ -583,7 +583,7 @@ main () + *((int *)&__m256_result1) = 0x00000000; + *((int *)&__m256_result0) = 0x00000000; + __m256_out = __lasx_xvfrintrm_s (__m256_op0); +- ASSERTEQ_64 (__LINE__, __m256_result, __m256_out); ++ ASSERTEQ_32 (__LINE__, __m256_result, __m256_out); + + *((int *)&__m256_op07) = 0x00000000; + *((int *)&__m256_op06) = 0x00000000; +@@ -602,7 +602,7 @@ main () + *((int *)&__m256_result1) = 0x00000000; + *((int *)&__m256_result0) = 0x00000000; + __m256_out = __lasx_xvfrintrz_s (__m256_op0); +- ASSERTEQ_64 (__LINE__, __m256_result, __m256_out); ++ ASSERTEQ_32 (__LINE__, __m256_result, __m256_out); + + *((int *)&__m256_op07) = 0xffffffff; + *((int *)&__m256_op06) = 0xfffffffe; +@@ -621,7 +621,7 @@ main () + *((int *)&__m256_result1) = 0xffffffff; + *((int *)&__m256_result0) = 0xfffffffe; + __m256_out = __lasx_xvfrintrz_s (__m256_op0); +- ASSERTEQ_64 (__LINE__, __m256_result, __m256_out); ++ ASSERTEQ_32 (__LINE__, __m256_result, __m256_out); + + *((int *)&__m256_op07) = 0x00000000; + *((int *)&__m256_op06) = 0x00000000; +@@ -640,7 +640,7 @@ main () + *((int *)&__m256_result1) = 0x00000000; + *((int *)&__m256_result0) = 0x00000000; + __m256_out = __lasx_xvfrintrz_s (__m256_op0); +- ASSERTEQ_64 (__LINE__, __m256_result, __m256_out); ++ ASSERTEQ_32 (__LINE__, __m256_result, __m256_out); + + *((int *)&__m256_op07) = 0x00000000; + *((int *)&__m256_op06) = 0x00000000; +@@ -659,7 +659,7 @@ main () + *((int *)&__m256_result1) = 0x00000000; + *((int *)&__m256_result0) = 0xffffffff; + __m256_out = __lasx_xvfrintrz_s (__m256_op0); +- ASSERTEQ_64 (__LINE__, __m256_result, __m256_out); ++ ASSERTEQ_32 (__LINE__, __m256_result, __m256_out); + + *((int *)&__m256_op07) = 0x80000000; + *((int *)&__m256_op06) = 0x80000000; +@@ -678,7 +678,7 @@ main () + *((int *)&__m256_result1) = 0xffffffff; + *((int *)&__m256_result0) = 0xffffffff; + __m256_out = __lasx_xvfrintrz_s (__m256_op0); +- ASSERTEQ_64 (__LINE__, __m256_result, __m256_out); ++ ASSERTEQ_32 (__LINE__, __m256_result, __m256_out); + + *((int *)&__m256_op07) = 0xffffffff; + *((int *)&__m256_op06) = 0xffffffff; +@@ -697,7 +697,7 @@ main () + *((int *)&__m256_result1) = 0xffffffff; + *((int *)&__m256_result0) = 0xffffffff; + __m256_out = __lasx_xvfrintrz_s (__m256_op0); +- ASSERTEQ_64 (__LINE__, __m256_result, __m256_out); ++ ASSERTEQ_32 (__LINE__, __m256_result, __m256_out); + + *((int *)&__m256_op07) = 0xf5fffc00; + *((int *)&__m256_op06) = 0xfc000000; +@@ -716,7 +716,7 @@ main () + *((int *)&__m256_result1) = 0xf5fffc00; + *((int *)&__m256_result0) = 0xfc000000; + __m256_out = __lasx_xvfrintrz_s (__m256_op0); +- ASSERTEQ_64 (__LINE__, __m256_result, __m256_out); ++ ASSERTEQ_32 (__LINE__, __m256_result, __m256_out); + + return 0; + } +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfrint_s.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfrint_s.c +index 61f28325a..5ba91ee51 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfrint_s.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfrint_s.c +@@ -79,7 +79,7 @@ main () + *((int *)&__m128_result1) = 0x00000000; + *((int *)&__m128_result0) = 0x00000000; + __m128_out = __lsx_vfrintrne_s (__m128_op0); +- ASSERTEQ_64 (__LINE__, __m128_result, __m128_out); ++ ASSERTEQ_32 (__LINE__, __m128_result, __m128_out); + + *((int *)&__m128_op03) = 0x00130013; + *((int *)&__m128_op02) = 0x00130013; +@@ -90,7 +90,7 @@ main () + *((int *)&__m128_result1) = 0x00000000; + *((int *)&__m128_result0) = 0x00000000; + __m128_out = __lsx_vfrintrne_s (__m128_op0); +- ASSERTEQ_64 (__LINE__, __m128_result, __m128_out); ++ ASSERTEQ_32 (__LINE__, __m128_result, __m128_out); + + *((int *)&__m128_op03) = 0x20202020; + *((int *)&__m128_op02) = 0x20202020; +@@ -101,7 +101,7 @@ main () + *((int *)&__m128_result1) = 0x00000000; + *((int *)&__m128_result0) = 0x00000000; + __m128_out = __lsx_vfrintrne_s (__m128_op0); +- ASSERTEQ_64 (__LINE__, __m128_result, __m128_out); ++ ASSERTEQ_32 (__LINE__, __m128_result, __m128_out); + + *((int *)&__m128_op03) = 0x00000000; + *((int *)&__m128_op02) = 0x00000000; +@@ -112,7 +112,7 @@ main () + *((int *)&__m128_result1) = 0x00000000; + *((int *)&__m128_result0) = 0x00000000; + __m128_out = __lsx_vfrintrne_s (__m128_op0); +- ASSERTEQ_64 (__LINE__, __m128_result, __m128_out); ++ ASSERTEQ_32 (__LINE__, __m128_result, __m128_out); + + *((int *)&__m128_op03) = 0xffffffff; + *((int *)&__m128_op02) = 0xffffffff; +@@ -123,7 +123,7 @@ main () + *((int *)&__m128_result1) = 0xffffffff; + *((int *)&__m128_result0) = 0xffffffff; + __m128_out = __lsx_vfrintrne_s (__m128_op0); +- ASSERTEQ_64 (__LINE__, __m128_result, __m128_out); ++ ASSERTEQ_32 (__LINE__, __m128_result, __m128_out); + + *((int *)&__m128_op03) = 0x00000000; + *((int *)&__m128_op02) = 0x00000001; +@@ -134,7 +134,7 @@ main () + *((int *)&__m128_result1) = 0x00000000; + *((int *)&__m128_result0) = 0x00000000; + __m128_out = __lsx_vfrintrne_s (__m128_op0); +- ASSERTEQ_64 (__LINE__, __m128_result, __m128_out); ++ ASSERTEQ_32 (__LINE__, __m128_result, __m128_out); + + *((int *)&__m128_op03) = 0x00000000; + *((int *)&__m128_op02) = 0x00000000; +@@ -145,7 +145,7 @@ main () + *((int *)&__m128_result1) = 0x00000000; + *((int *)&__m128_result0) = 0x00000000; + __m128_out = __lsx_vfrintrne_s (__m128_op0); +- ASSERTEQ_64 (__LINE__, __m128_result, __m128_out); ++ ASSERTEQ_32 (__LINE__, __m128_result, __m128_out); + + *((int *)&__m128_op03) = 0xfffbfffb; + *((int *)&__m128_op02) = 0xfffbfffb; +@@ -156,7 +156,7 @@ main () + *((int *)&__m128_result1) = 0xfffbfffb; + *((int *)&__m128_result0) = 0xfffbfffb; + __m128_out = __lsx_vfrintrne_s (__m128_op0); +- ASSERTEQ_64 (__LINE__, __m128_result, __m128_out); ++ ASSERTEQ_32 (__LINE__, __m128_result, __m128_out); + + *((int *)&__m128_op03) = 0x0ff780a1; + *((int *)&__m128_op02) = 0x0efc01af; +@@ -167,7 +167,7 @@ main () + *((int *)&__m128_result1) = 0x00000000; + *((int *)&__m128_result0) = 0xfe7f0000; + __m128_out = __lsx_vfrintrne_s (__m128_op0); +- ASSERTEQ_64 (__LINE__, __m128_result, __m128_out); ++ ASSERTEQ_32 (__LINE__, __m128_result, __m128_out); + + *((int *)&__m128_op03) = 0x00000000; + *((int *)&__m128_op02) = 0x00000000; +@@ -178,7 +178,7 @@ main () + *((int *)&__m128_result1) = 0x00000000; + *((int *)&__m128_result0) = 0x00000000; + __m128_out = __lsx_vfrintrp_s (__m128_op0); +- ASSERTEQ_64 (__LINE__, __m128_result, __m128_out); ++ ASSERTEQ_32 (__LINE__, __m128_result, __m128_out); + + *((int *)&__m128_op03) = 0x00000000; + *((int *)&__m128_op02) = 0xefffffff; +@@ -189,7 +189,7 @@ main () + *((int *)&__m128_result1) = 0x00000000; + *((int *)&__m128_result0) = 0x00000000; + __m128_out = __lsx_vfrintrp_s (__m128_op0); +- ASSERTEQ_64 (__LINE__, __m128_result, __m128_out); ++ ASSERTEQ_32 (__LINE__, __m128_result, __m128_out); + + *((int *)&__m128_op03) = 0xffffffff; + *((int *)&__m128_op02) = 0xffffff00; +@@ -200,7 +200,7 @@ main () + *((int *)&__m128_result1) = 0xffffffff; + *((int *)&__m128_result0) = 0xffffff00; + __m128_out = __lsx_vfrintrp_s (__m128_op0); +- ASSERTEQ_64 (__LINE__, __m128_result, __m128_out); ++ ASSERTEQ_32 (__LINE__, __m128_result, __m128_out); + + *((int *)&__m128_op03) = 0xffffb96b; + *((int *)&__m128_op02) = 0xffff57c9; +@@ -211,7 +211,7 @@ main () + *((int *)&__m128_result1) = 0xffff6080; + *((int *)&__m128_result0) = 0xffff4417; + __m128_out = __lsx_vfrintrp_s (__m128_op0); +- ASSERTEQ_64 (__LINE__, __m128_result, __m128_out); ++ ASSERTEQ_32 (__LINE__, __m128_result, __m128_out); + + *((int *)&__m128_op03) = 0x00ff00ff; + *((int *)&__m128_op02) = 0x00ff00ff; +@@ -222,7 +222,7 @@ main () + *((int *)&__m128_result1) = 0x62cbf96e; + *((int *)&__m128_result0) = 0x4acfaf40; + __m128_out = __lsx_vfrintrp_s (__m128_op0); +- ASSERTEQ_64 (__LINE__, __m128_result, __m128_out); ++ ASSERTEQ_32 (__LINE__, __m128_result, __m128_out); + + *((int *)&__m128_op03) = 0x00000000; + *((int *)&__m128_op02) = 0x00002000; +@@ -233,7 +233,7 @@ main () + *((int *)&__m128_result1) = 0x00000000; + *((int *)&__m128_result0) = 0x3f800000; + __m128_out = __lsx_vfrintrp_s (__m128_op0); +- ASSERTEQ_64 (__LINE__, __m128_result, __m128_out); ++ ASSERTEQ_32 (__LINE__, __m128_result, __m128_out); + + *((int *)&__m128_op03) = 0xffffffff; + *((int *)&__m128_op02) = 0xffffffff; +@@ -244,7 +244,7 @@ main () + *((int *)&__m128_result1) = 0xffffffff; + *((int *)&__m128_result0) = 0xffffffff; + __m128_out = __lsx_vfrintrp_s (__m128_op0); +- ASSERTEQ_64 (__LINE__, __m128_result, __m128_out); ++ ASSERTEQ_32 (__LINE__, __m128_result, __m128_out); + + *((int *)&__m128_op03) = 0x63636363; + *((int *)&__m128_op02) = 0x63abdf16; +@@ -255,7 +255,7 @@ main () + *((int *)&__m128_result1) = 0x42000000; + *((int *)&__m128_result0) = 0x3f800000; + __m128_out = __lsx_vfrintrp_s (__m128_op0); +- ASSERTEQ_64 (__LINE__, __m128_result, __m128_out); ++ ASSERTEQ_32 (__LINE__, __m128_result, __m128_out); + + *((int *)&__m128_op03) = 0x00000000; + *((int *)&__m128_op02) = 0x00000000; +@@ -266,7 +266,7 @@ main () + *((int *)&__m128_result1) = 0x00000000; + *((int *)&__m128_result0) = 0x00000000; + __m128_out = __lsx_vfrintrm_s (__m128_op0); +- ASSERTEQ_64 (__LINE__, __m128_result, __m128_out); ++ ASSERTEQ_32 (__LINE__, __m128_result, __m128_out); + + *((int *)&__m128_op03) = 0xa5c4c774; + *((int *)&__m128_op02) = 0x856ba83b; +@@ -277,7 +277,7 @@ main () + *((int *)&__m128_result1) = 0xbf800000; + *((int *)&__m128_result0) = 0x54691124; + __m128_out = __lsx_vfrintrm_s (__m128_op0); +- ASSERTEQ_64 (__LINE__, __m128_result, __m128_out); ++ ASSERTEQ_32 (__LINE__, __m128_result, __m128_out); + + *((int *)&__m128_op03) = 0x00000000; + *((int *)&__m128_op02) = 0x00010002; +@@ -288,7 +288,7 @@ main () + *((int *)&__m128_result1) = 0xffffffff; + *((int *)&__m128_result0) = 0xffd60015; + __m128_out = __lsx_vfrintrm_s (__m128_op0); +- ASSERTEQ_64 (__LINE__, __m128_result, __m128_out); ++ ASSERTEQ_32 (__LINE__, __m128_result, __m128_out); + + *((int *)&__m128_op03) = 0xffffffff; + *((int *)&__m128_op02) = 0x3c992b2e; +@@ -299,7 +299,7 @@ main () + *((int *)&__m128_result1) = 0xffffffff; + *((int *)&__m128_result0) = 0xffff730f; + __m128_out = __lsx_vfrintrz_s (__m128_op0); +- ASSERTEQ_64 (__LINE__, __m128_result, __m128_out); ++ ASSERTEQ_32 (__LINE__, __m128_result, __m128_out); + + *((int *)&__m128_op03) = 0x00000000; + *((int *)&__m128_op02) = 0x00000001; +@@ -310,7 +310,7 @@ main () + *((int *)&__m128_result1) = 0x00000000; + *((int *)&__m128_result0) = 0x00000000; + __m128_out = __lsx_vfrintrz_s (__m128_op0); +- ASSERTEQ_64 (__LINE__, __m128_result, __m128_out); ++ ASSERTEQ_32 (__LINE__, __m128_result, __m128_out); + + *((int *)&__m128_op03) = 0x18171615; + *((int *)&__m128_op02) = 0x17161514; +@@ -321,7 +321,7 @@ main () + *((int *)&__m128_result1) = 0x00000000; + *((int *)&__m128_result0) = 0x00000000; + __m128_out = __lsx_vfrintrz_s (__m128_op0); +- ASSERTEQ_64 (__LINE__, __m128_result, __m128_out); ++ ASSERTEQ_32 (__LINE__, __m128_result, __m128_out); + + *((int *)&__m128_op03) = 0x62cbf96e; + *((int *)&__m128_op02) = 0x4acfaf40; +@@ -332,7 +332,7 @@ main () + *((int *)&__m128_result1) = 0xf0bc9a52; + *((int *)&__m128_result0) = 0x78285a4a; + __m128_out = __lsx_vfrintrz_s (__m128_op0); +- ASSERTEQ_64 (__LINE__, __m128_result, __m128_out); ++ ASSERTEQ_32 (__LINE__, __m128_result, __m128_out); + + *((int *)&__m128_op03) = 0x00000000; + *((int *)&__m128_op02) = 0x00000000; +@@ -343,7 +343,7 @@ main () + *((int *)&__m128_result1) = 0x00000000; + *((int *)&__m128_result0) = 0x00000000; + __m128_out = __lsx_vfrintrz_s (__m128_op0); +- ASSERTEQ_64 (__LINE__, __m128_result, __m128_out); ++ ASSERTEQ_32 (__LINE__, __m128_result, __m128_out); + + return 0; + } +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/simd_correctness_check.h b/gcc/testsuite/gcc.target/loongarch/vector/simd_correctness_check.h +index 551340bd5..c1adab586 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/simd_correctness_check.h ++++ b/gcc/testsuite/gcc.target/loongarch/vector/simd_correctness_check.h +@@ -10,7 +10,7 @@ + { \ + long long *temp_ref = (long long *)&refi, \ + *temp_res = (long long *)&resi; \ +- if (abs (*temp_ref - *temp_res) > 0) \ ++ if (llabs (*temp_ref - *temp_res) > 0) \ + { \ + printf (" error: %s at line %ld , expected " #ref \ + "%ld:0x%016lx, got: 0x%016lx\n", \ +-- +2.43.0 +
View file
_service:tar_scm:0145-Backport-SME-rtl-Try-to-remove-EH-edges-after-pro-ep.patch
Added
@@ -0,0 +1,232 @@ +From 60612cbd9cdd9b5079c0505b9d53c9cd98fba4b1 Mon Sep 17 00:00:00 2001 +From: Kewen Lin <linkw@linux.ibm.com> +Date: Tue, 15 Nov 2022 20:26:07 -0600 +Subject: PATCH 046/157 BackportSME rtl: Try to remove EH edges after + {pro,epi}logue generation PR90259 + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=63e1b2e767a3f4695373c2406ff719c0a60c1858 + +After prologue and epilogue generation, the judgement on whether +one memory access onto stack frame may trap or not could change, +since we get more exact stack information by now. + +As PR90259 shows, some memory access becomes impossible to trap +any more after prologue and epilogue generation, it can make +subsequent optimization be able to remove it if safe, but it +results in unexpected control flow status due to REG_EH_REGION +note missing. + +This patch proposes to try to remove EH edges with function +purge_all_dead_edges after prologue and epilogue generation, +it simplifies CFG as early as we can and don't need any fixup +in downstream passes. + +CFG simplification result with PR90259's case as example: + +*before* + + 18: %1:TF=call `__gcc_qdiv' argc:0 + REG_EH_REGION 0x2 + 77: NOTE_INSN_BASIC_BLOCK 3 + 19: NOTE_INSN_DELETED + 20: NOTE_INSN_DELETED + 110: %31:SI+0x20=%1:DF + REG_EH_REGION 0x2 + 116: NOTE_INSN_BASIC_BLOCK 4 + 111: %31:SI+0x28=%2:DF + REG_EH_REGION 0x2 + 22: NOTE_INSN_BASIC_BLOCK 5 + 108: %0:DF=%31:SI+0x20 + REG_EH_REGION 0x2 + 117: NOTE_INSN_BASIC_BLOCK 6 + 109: %1:DF=%31:SI+0x28 + REG_EH_REGION 0x2 + 79: NOTE_INSN_BASIC_BLOCK 7 + 26: %31:SI+0x18=%0:DF + 104: pc=L69 + 105: barrier + +*after* + + 18: %1:TF=call `__gcc_qdiv' argc:0 + REG_EH_REGION 0x2 + 77: NOTE_INSN_BASIC_BLOCK 3 + 19: NOTE_INSN_DELETED + 20: NOTE_INSN_DELETED + 110: %31:SI+0x20=%1:DF + 111: %31:SI+0x28=%2:DF + 108: %0:DF=%31:SI+0x20 + 109: %1:DF=%31:SI+0x28 + 26: %31:SI+0x18=%0:DF + 104: pc=L69 + 105: barrier + + PR rtl-optimization/90259 + +gcc/ChangeLog: + + * function.cc (rest_of_handle_thread_prologue_and_epilogue): Add + parameter fun, and call function purge_all_dead_edges. + (pass_thread_prologue_and_epilogue::execute): Name unamed parameter + as fun, and use it for rest_of_handle_thread_prologue_and_epilogue. + +gcc/testsuite/ChangeLog: + + * g++.target/powerpc/pr90259.C: New. +--- + gcc/function.cc | 13 ++- + gcc/testsuite/g++.target/powerpc/pr90259.C | 103 +++++++++++++++++++++ + 2 files changed, 113 insertions(+), 3 deletions(-) + create mode 100644 gcc/testsuite/g++.target/powerpc/pr90259.C + +diff --git a/gcc/function.cc b/gcc/function.cc +index 49c7ccf4b..28de39dd6 100644 +--- a/gcc/function.cc ++++ b/gcc/function.cc +@@ -6529,7 +6529,7 @@ make_pass_leaf_regs (gcc::context *ctxt) + } + + static unsigned int +-rest_of_handle_thread_prologue_and_epilogue (void) ++rest_of_handle_thread_prologue_and_epilogue (function *fun) + { + /* prepare_shrink_wrap is sensitive to the block structure of the control + flow graph, so clean it up first. */ +@@ -6546,6 +6546,13 @@ rest_of_handle_thread_prologue_and_epilogue (void) + Fix that up. */ + fixup_partitions (); + ++ /* After prologue and epilogue generation, the judgement on whether ++ one memory access onto stack frame may trap or not could change, ++ since we get more exact stack information by now. So try to ++ remove any EH edges here, see PR90259. */ ++ if (fun->can_throw_non_call_exceptions) ++ purge_all_dead_edges (); ++ + /* Shrink-wrapping can result in unreachable edges in the epilogue, + see PR57320. */ + cleanup_cfg (optimize ? CLEANUP_EXPENSIVE : 0); +@@ -6614,9 +6621,9 @@ public: + {} + + /* opt_pass methods: */ +- virtual unsigned int execute (function *) ++ unsigned int execute (function * fun) final override + { +- return rest_of_handle_thread_prologue_and_epilogue (); ++ return rest_of_handle_thread_prologue_and_epilogue (fun); + } + + }; // class pass_thread_prologue_and_epilogue +diff --git a/gcc/testsuite/g++.target/powerpc/pr90259.C b/gcc/testsuite/g++.target/powerpc/pr90259.C +new file mode 100644 +index 000000000..db75ac7fe +--- /dev/null ++++ b/gcc/testsuite/g++.target/powerpc/pr90259.C +@@ -0,0 +1,103 @@ ++/* { dg-require-effective-target long_double_ibm128 } */ ++/* { dg-options "-O2 -ffloat-store -fgcse -fnon-call-exceptions -fno-forward-propagate -fno-omit-frame-pointer -fstack-protector-all" } */ ++/* { dg-add-options long_double_ibm128 } */ ++ ++/* Verify there is no ICE. */ ++ ++template <int a> struct b ++{ ++ static constexpr int c = a; ++}; ++template <bool a> using d = b<a>; ++struct e ++{ ++ int f; ++ int ++ g () ++ { ++ return __builtin_ceil (f / (long double) h); ++ } ++ float h; ++}; ++template <typename, typename> using k = d<!bool ()>; ++template <typename> class n ++{ ++public: ++ e ae; ++ void af (); ++}; ++template <typename l> ++void ++n<l>::af () ++{ ++ ae.g (); ++} ++template <bool> using m = int; ++template <typename ag, typename ah, typename ai = m<k<ag, ah>::c>> ++using aj = n<ai>; ++struct o ++{ ++ void ++ af () ++ { ++ al.af (); ++ } ++ aj<int, int> al; ++}; ++template <typename> class am; ++template <typename i> class ao ++{ ++protected: ++ static i *ap (int); ++}; ++template <typename, typename> class p; ++template <typename ar, typename i, typename... j> class p<ar (j...), i> : ao<i> ++{ ++public: ++ static ar ++ as (const int &p1, j...) ++ { ++ (*ao<i>::ap (p1)) (j ()...); ++ } ++}; ++template <typename ar, typename... j> class am<ar (j...)> ++{ ++ template <typename, typename> using av = int; ++ ++public: ++ template <typename i, typename = av<d<!bool ()>, void>, ++ typename = av<i, void>> ++ am (i); ++ using aw = ar (*) (const int &, j...); ++ aw ax; ++}; ++template <typename ar, typename... j> ++template <typename i, typename, typename> ++am<ar (j...)>::am (i) ++{ ++ ax = p<ar (j...), i>::as; ++} ++struct G ++{ ++ void ba (am<void (o)>); ++}; ++struct q ++{ ++ q () ++ { ++ G a; ++ a.ba (r ()); ++ } ++ struct r ++ { ++ void ++ operator() (o p1) ++ try ++ { ++ p1.af (); ++ } ++ catch (int) ++ { ++ } ++ }; ++} s; +-- +2.33.0 +
View file
_service:tar_scm:0145-LoongArch-Fixed-an-issue-with-the-implementation-of-.patch
Added
@@ -0,0 +1,130 @@ +From 44a9ae67e19c0d744bd744cb0e9ae9e0069e40f1 Mon Sep 17 00:00:00 2001 +From: Lulu Cheng <chenglulu@loongson.cn> +Date: Tue, 5 Mar 2024 14:43:04 +0800 +Subject: PATCH 145/188 LoongArch: Fixed an issue with the implementation of + the template atomic_compare_and_swapsi. + +If the hardware does not support LAMCAS, atomic_compare_and_swapsi needs to be +implemented through "ll.w+sc.w". In the implementation of the instruction sequence, +it is necessary to determine whether the two registers are equal. +Since LoongArch's comparison instructions do not distinguish between 32-bit +and 64-bit, the two operand registers that need to be compared are symbolically +extended, and one of the operand registers is obtained from memory through the +"ll.w" instruction, which can ensure that the symbolic expansion is carried out. +However, the value of the other operand register is not guaranteed to be the +value of the sign extension. + +gcc/ChangeLog: + + * config/loongarch/sync.md (atomic_cas_value_strong<mode>): + In loongarch64, a sign extension operation is added when + operands2 is a register operand and the mode is SImode. + +gcc/testsuite/ChangeLog: + + * g++.target/loongarch/atomic-cas-int.C: New test. +--- + gcc/config/loongarch/sync.md | 46 ++++++++++++++----- + .../g++.target/loongarch/atomic-cas-int.C | 32 +++++++++++++ + 2 files changed, 67 insertions(+), 11 deletions(-) + create mode 100644 gcc/testsuite/g++.target/loongarch/atomic-cas-int.C + +diff --git a/gcc/config/loongarch/sync.md b/gcc/config/loongarch/sync.md +index 5da5c2780..2e008c487 100644 +--- a/gcc/config/loongarch/sync.md ++++ b/gcc/config/loongarch/sync.md +@@ -245,18 +245,42 @@ + (clobber (match_scratch:GPR 5 "=&r")) + "" + { +- return "1:\\n\\t" +- "ll.<amo>\\t%0,%1\\n\\t" +- "bne\\t%0,%z2,2f\\n\\t" +- "or%i3\\t%5,$zero,%3\\n\\t" +- "sc.<amo>\\t%5,%1\\n\\t" +- "beqz\\t%5,1b\\n\\t" +- "b\\t3f\\n\\t" +- "2:\\n\\t" +- "%G4\\n\\t" +- "3:\\n\\t"; ++ output_asm_insn ("1:", operands); ++ output_asm_insn ("ll.<amo>\t%0,%1", operands); ++ ++ /* Like the test case atomic-cas-int.C, in loongarch64, O1 and higher, the ++ return value of the val_without_const_folding will not be truncated and ++ will be passed directly to the function compare_exchange_strong. ++ However, the instruction 'bne' does not distinguish between 32-bit and ++ 64-bit operations. so if the upper 32 bits of the register are not ++ extended by the 32nd bit symbol, then the comparison may not be valid ++ here. This will affect the result of the operation. */ ++ ++ if (TARGET_64BIT && REG_P (operands2) ++ && GET_MODE (operands2) == SImode) ++ { ++ output_asm_insn ("addi.w\t%5,%2,0", operands); ++ output_asm_insn ("bne\t%0,%5,2f", operands); ++ } ++ else ++ output_asm_insn ("bne\t%0,%z2,2f", operands); ++ ++ output_asm_insn ("or%i3\t%5,$zero,%3", operands); ++ output_asm_insn ("sc.<amo>\t%5,%1", operands); ++ output_asm_insn ("beqz\t%5,1b", operands); ++ output_asm_insn ("b\t3f", operands); ++ output_asm_insn ("2:", operands); ++ output_asm_insn ("%G4", operands); ++ output_asm_insn ("3:", operands); ++ ++ return ""; + } +- (set (attr "length") (const_int 28))) ++ (set (attr "length") ++ (if_then_else ++ (and (match_test "GET_MODE (operands2) == SImode") ++ (match_test "REG_P (operands2)")) ++ (const_int 32) ++ (const_int 28)))) + + (define_insn "atomic_cas_value_strong<mode>_amcas" + (set (match_operand:QHWD 0 "register_operand" "=&r") +diff --git a/gcc/testsuite/g++.target/loongarch/atomic-cas-int.C b/gcc/testsuite/g++.target/loongarch/atomic-cas-int.C +new file mode 100644 +index 000000000..830ce4826 +--- /dev/null ++++ b/gcc/testsuite/g++.target/loongarch/atomic-cas-int.C +@@ -0,0 +1,32 @@ ++/* { dg-do run } */ ++/* { dg-options "-O2" } */ ++ ++#include <atomic> ++#include <cstdio> ++ ++__attribute__ ((noinline)) long ++val_without_const_folding (long val) ++{ ++ return val; ++} ++ ++int ++main () ++{ ++ int oldval = 0xaa; ++ int newval = 0xbb; ++ std::atomic<int> amo; ++ ++ amo.store (oldval); ++ ++ long longval = val_without_const_folding (0xff80000000000000 + oldval); ++ oldval = static_cast<int> (longval); ++ ++ amo.compare_exchange_strong (oldval, newval); ++ ++ if (newval != amo.load (std::memory_order_relaxed)) ++ __builtin_abort (); ++ ++ return 0; ++} ++ +-- +2.43.0 +
View file
_service:tar_scm:0146-Backport-SME-Fix-PR-middle-end-107705-ICE-after-recl.patch
Added
@@ -0,0 +1,71 @@ +From beb962ec516f152cef482b229c9adf0390dc3b2c Mon Sep 17 00:00:00 2001 +From: Andrew Pinski <apinski@marvell.com> +Date: Thu, 17 Nov 2022 22:03:08 +0000 +Subject: PATCH 047/157 BackportSME Fix PR middle-end/107705: ICE after + reclaration error + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=ceba66ee230bb96b0889fc8ec7333c7ffae96d6e + +The problem here is after we created a call expression +in the C front-end, we replace the decl type with +an error mark node. We then end up calling +aggregate_value_p with the call expression +with the decl with the error mark as the type +and we ICE. + +The fix is to check the function type +after we process the call expression inside +aggregate_value_p to get it. + +OK? Bootstrapped and tested on x86_64-linux-gnu with no regressions. + +Thanks, +Andrew Pinski + +gcc/ChangeLog: + + PR middle-end/107705 + * function.cc (aggregate_value_p): Return 0 if + the function type was an error operand. + +gcc/testsuite/ChangeLog: + + * gcc.dg/redecl-22.c: New test. +--- + gcc/function.cc | 3 +++ + gcc/testsuite/gcc.dg/redecl-22.c | 9 +++++++++ + 2 files changed, 12 insertions(+) + create mode 100644 gcc/testsuite/gcc.dg/redecl-22.c + +diff --git a/gcc/function.cc b/gcc/function.cc +index 28de39dd6..99aa738eb 100644 +--- a/gcc/function.cc ++++ b/gcc/function.cc +@@ -2090,6 +2090,9 @@ aggregate_value_p (const_tree exp, const_tree fntype) + if (VOID_TYPE_P (type)) + return 0; + ++ if (error_operand_p (fntype)) ++ return 0; ++ + /* If a record should be passed the same as its first (and only) member + don't pass it as an aggregate. */ + if (TREE_CODE (type) == RECORD_TYPE && TYPE_TRANSPARENT_AGGR (type)) +diff --git a/gcc/testsuite/gcc.dg/redecl-22.c b/gcc/testsuite/gcc.dg/redecl-22.c +new file mode 100644 +index 000000000..7758570fa +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/redecl-22.c +@@ -0,0 +1,9 @@ ++/* We used to ICE in the gimplifier, PR 107705 */ ++/* { dg-do compile } */ ++/* { dg-options "-w" } */ ++int f (void) ++{ ++ int (*p) (void) = 0; // { dg-note "" } ++ return p (); ++ int p = 1; // { dg-error "" } ++} +-- +2.33.0 +
View file
_service:tar_scm:0146-LoongArch-testsuite-Add-compilation-options-to-the-r.patch
Added
@@ -0,0 +1,30 @@ +From eab751e71d4f4d5e9b2eda55d793fd57541fbc56 Mon Sep 17 00:00:00 2001 +From: Lulu Cheng <chenglulu@loongson.cn> +Date: Thu, 7 Mar 2024 09:44:03 +0800 +Subject: PATCH 146/188 LoongArch: testsuite: Add compilation options to the + regname-fp-s9.c. + +When the value of the macro DEFAULT_CFLAGS is set to '-ansi -pedantic-errors', +regname-s9-fp.c will test to fail. To solve this problem, add the compilation +option '-Wno-pedantic -std=gnu90' to this test case. + +gcc/testsuite/ChangeLog: + + * gcc.target/loongarch/regname-fp-s9.c: Add compilation option + '-Wno-pedantic -std=gnu90'. +--- + gcc/testsuite/gcc.target/loongarch/regname-fp-s9.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/gcc/testsuite/gcc.target/loongarch/regname-fp-s9.c b/gcc/testsuite/gcc.target/loongarch/regname-fp-s9.c +index d2e3b80f8..77a74f1f6 100644 +--- a/gcc/testsuite/gcc.target/loongarch/regname-fp-s9.c ++++ b/gcc/testsuite/gcc.target/loongarch/regname-fp-s9.c +@@ -1,3 +1,4 @@ + /* { dg-do compile } */ ++/* { dg-additional-options "-Wno-pedantic -std=gnu90" } */ + register long s9 asm("s9"); /* { dg-note "conflicts with 's9'" } */ + register long fp asm("fp"); /* { dg-warning "register of 'fp' used for multiple global register variables" } */ +-- +2.43.0 +
View file
_service:tar_scm:0147-Backport-SME-function-Change-return-type-of-predicat.patch
Added
@@ -0,0 +1,351 @@ +From c074871572ef22cbcca8f0f4bc493d60caeddd78 Mon Sep 17 00:00:00 2001 +From: Uros Bizjak <ubizjak@gmail.com> +Date: Wed, 21 Jun 2023 21:55:30 +0200 +Subject: PATCH 048/157 BackportSME function: Change return type of + predicate function from int to bool + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=ce47d3c2cf59bb2cc94afc4bbef88b0e4950f086 + +Also change some internal variables to bool and some functions to void. + +gcc/ChangeLog: + + * function.h (emit_initial_value_sets): + Change return type from int to void. + (aggregate_value_p): Change return type from int to bool. + (prologue_contains): Ditto. + (epilogue_contains): Ditto. + (prologue_epilogue_contains): Ditto. + * function.cc (temp_slot): Make "in_use" variable bool. + (make_slot_available): Update for changed "in_use" variable. + (assign_stack_temp_for_type): Ditto. + (emit_initial_value_sets): Change return type from int to void + and update function body accordingly. + (instantiate_virtual_regs): Ditto. + (rest_of_handle_thread_prologue_and_epilogue): Ditto. + (safe_insn_predicate): Change return type from int to bool. + (aggregate_value_p): Change return type from int to bool + and update function body accordingly. + (prologue_contains): Change return type from int to bool. + (prologue_epilogue_contains): Ditto. +--- + gcc/function.cc | 77 ++++++++++++++++++++++++------------------------- + gcc/function.h | 10 +++---- + 2 files changed, 42 insertions(+), 45 deletions(-) + +diff --git a/gcc/function.cc b/gcc/function.cc +index 99aa738eb..fc8eb5812 100644 +--- a/gcc/function.cc ++++ b/gcc/function.cc +@@ -578,8 +578,8 @@ public: + tree type; + /* The alignment (in bits) of the slot. */ + unsigned int align; +- /* Nonzero if this temporary is currently in use. */ +- char in_use; ++ /* True if this temporary is currently in use. */ ++ bool in_use; + /* Nesting level at which this slot is being used. */ + int level; + /* The offset of the slot from the frame_pointer, including extra space +@@ -674,7 +674,7 @@ make_slot_available (class temp_slot *temp) + { + cut_slot_from_list (temp, temp_slots_at_level (temp->level)); + insert_slot_to_list (temp, &avail_temp_slots); +- temp->in_use = 0; ++ temp->in_use = false; + temp->level = -1; + n_temp_slots_in_use--; + } +@@ -848,7 +848,7 @@ assign_stack_temp_for_type (machine_mode mode, poly_int64 size, tree type) + if (known_ge (best_p->size - rounded_size, alignment)) + { + p = ggc_alloc<temp_slot> (); +- p->in_use = 0; ++ p->in_use = false; + p->size = best_p->size - rounded_size; + p->base_offset = best_p->base_offset + rounded_size; + p->full_size = best_p->full_size - rounded_size; +@@ -918,7 +918,7 @@ assign_stack_temp_for_type (machine_mode mode, poly_int64 size, tree type) + } + + p = selected; +- p->in_use = 1; ++ p->in_use = true; + p->type = type; + p->level = temp_slot_level; + n_temp_slots_in_use++; +@@ -1340,7 +1340,7 @@ has_hard_reg_initial_val (machine_mode mode, unsigned int regno) + return NULL_RTX; + } + +-unsigned int ++void + emit_initial_value_sets (void) + { + struct initial_value_struct *ivs = crtl->hard_reg_initial_vals; +@@ -1348,7 +1348,7 @@ emit_initial_value_sets (void) + rtx_insn *seq; + + if (ivs == 0) +- return 0; ++ return; + + start_sequence (); + for (i = 0; i < ivs->num_entries; i++) +@@ -1357,7 +1357,6 @@ emit_initial_value_sets (void) + end_sequence (); + + emit_insn_at_entry (seq); +- return 0; + } + + /* Return the hardreg-pseudoreg initial values pair entry I and +@@ -1535,7 +1534,7 @@ instantiate_virtual_regs_in_rtx (rtx *loc) + /* A subroutine of instantiate_virtual_regs_in_insn. Return true if X + matches the predicate for insn CODE operand OPERAND. */ + +-static int ++static bool + safe_insn_predicate (int code, int operand, rtx x) + { + return code < 0 || insn_operand_matches ((enum insn_code) code, operand, x); +@@ -1948,7 +1947,7 @@ instantiate_decls (tree fndecl) + /* Pass through the INSNS of function FNDECL and convert virtual register + references to hard register references. */ + +-static unsigned int ++static void + instantiate_virtual_regs (void) + { + rtx_insn *insn; +@@ -2002,8 +2001,6 @@ instantiate_virtual_regs (void) + /* Indicate that, from now on, assign_stack_local should use + frame_pointer_rtx. */ + virtuals_instantiated = 1; +- +- return 0; + } + + namespace { +@@ -2031,7 +2028,8 @@ public: + /* opt_pass methods: */ + virtual unsigned int execute (function *) + { +- return instantiate_virtual_regs (); ++ instantiate_virtual_regs (); ++ return 0; + } + + }; // class pass_instantiate_virtual_regs +@@ -2045,12 +2043,12 @@ make_pass_instantiate_virtual_regs (gcc::context *ctxt) + } + +  +-/* Return 1 if EXP is an aggregate type (or a value with aggregate type). ++/* Return true if EXP is an aggregate type (or a value with aggregate type). + This means a type for which function calls must pass an address to the + function or get an address back from the function. + EXP may be a type node or an expression (whose type is tested). */ + +-int ++bool + aggregate_value_p (const_tree exp, const_tree fntype) + { + const_tree type = (TYPE_P (exp)) ? exp : TREE_TYPE (exp); +@@ -2070,7 +2068,7 @@ aggregate_value_p (const_tree exp, const_tree fntype) + else + /* For internal functions, assume nothing needs to be + returned in memory. */ +- return 0; ++ return false; + } + break; + case FUNCTION_DECL: +@@ -2088,10 +2086,10 @@ aggregate_value_p (const_tree exp, const_tree fntype) + } + + if (VOID_TYPE_P (type)) +- return 0; ++ return false; + + if (error_operand_p (fntype)) +- return 0; ++ return false; + + /* If a record should be passed the same as its first (and only) member + don't pass it as an aggregate. */ +@@ -2102,25 +2100,25 @@ aggregate_value_p (const_tree exp, const_tree fntype) + reference, do so. */ + if ((TREE_CODE (exp) == PARM_DECL || TREE_CODE (exp) == RESULT_DECL) + && DECL_BY_REFERENCE (exp)) +- return 1; ++ return true; + + /* Function types that are TREE_ADDRESSABLE force return in memory. */ + if (fntype && TREE_ADDRESSABLE (fntype)) +- return 1; ++ return true; + + /* Types that are TREE_ADDRESSABLE must be constructed in memory, + and thus can't be returned in registers. */ + if (TREE_ADDRESSABLE (type)) +- return 1; ++ return true; + + if (TYPE_EMPTY_P (type)) +- return 0; ++ return false; + + if (flag_pcc_struct_return && AGGREGATE_TYPE_P (type)) +- return 1; ++ return true; + + if (targetm.calls.return_in_memory (type, fntype)) +- return 1; ++ return true; + + /* Make sure we have suitable call-clobbered regs to return + the value in; if not, we must return it in memory. */ +@@ -2129,7 +2127,7 @@ aggregate_value_p (const_tree exp, const_tree fntype) + /* If we have something other than a REG (e.g. a PARALLEL), then assume + it is OK. */ + if (!REG_P (reg)) +- return 0; ++ return false; + + /* Use the default ABI if the type of the function isn't known. + The scheme for handling interoperability between different ABIs +@@ -2142,9 +2140,9 @@ aggregate_value_p (const_tree exp, const_tree fntype) + nregs = hard_regno_nregs (regno, TYPE_MODE (type)); + for (i = 0; i < nregs; i++) + if (!fixed_regsregno + i && !abi.clobbers_full_reg_p (regno + i)) +- return 1; ++ return true; + +- return 0; ++ return false; + } +  + /* Return true if we should assign DECL a pseudo register; false if it +@@ -5741,26 +5739,26 @@ contains (const rtx_insn *insn, hash_table<insn_cache_hasher> *hash) + return hash->find (const_cast<rtx_insn *> (insn)) != NULL; + } + +-int ++bool + prologue_contains (const rtx_insn *insn) + { + return contains (insn, prologue_insn_hash); + } + +-int ++bool + epilogue_contains (const rtx_insn *insn) + { + return contains (insn, epilogue_insn_hash); + } + +-int ++bool + prologue_epilogue_contains (const rtx_insn *insn) + { + if (contains (insn, prologue_insn_hash)) +- return 1; ++ return true; + if (contains (insn, epilogue_insn_hash)) +- return 1; +- return 0; ++ return true; ++ return false; + } + + void +@@ -6386,14 +6384,13 @@ current_function_name (void) + } +  + +-static unsigned int ++static void + rest_of_handle_check_leaf_regs (void) + { + #ifdef LEAF_REGISTERS + crtl->uses_only_leaf_regs + = optimize > 0 && only_leaf_regs_used () && leaf_function_p (); + #endif +- return 0; + } + + /* Insert a TYPE into the used types hash table of CFUN. */ +@@ -6518,7 +6515,8 @@ public: + /* opt_pass methods: */ + virtual unsigned int execute (function *) + { +- return rest_of_handle_check_leaf_regs (); ++ rest_of_handle_check_leaf_regs (); ++ return 0; + } + + }; // class pass_leaf_regs +@@ -6531,7 +6529,7 @@ make_pass_leaf_regs (gcc::context *ctxt) + return new pass_leaf_regs (ctxt); + } + +-static unsigned int ++static void + rest_of_handle_thread_prologue_and_epilogue (function *fun) + { + /* prepare_shrink_wrap is sensitive to the block structure of the control +@@ -6563,8 +6561,6 @@ rest_of_handle_thread_prologue_and_epilogue (function *fun) + /* The stack usage info is finalized during prologue expansion. */ + if (flag_stack_usage_info || flag_callgraph_info) + output_stack_usage (); +- +- return 0; + } + + /* Record a final call to CALLEE at LOCATION. */ +@@ -6626,7 +6622,8 @@ public: + /* opt_pass methods: */ + unsigned int execute (function * fun) final override + { +- return rest_of_handle_thread_prologue_and_epilogue (fun); ++ rest_of_handle_thread_prologue_and_epilogue (fun); ++ return 0; + } + + }; // class pass_thread_prologue_and_epilogue +diff --git a/gcc/function.h b/gcc/function.h +index a53fb24d2..4e8131706 100644 +--- a/gcc/function.h ++++ b/gcc/function.h +@@ -653,11 +653,11 @@ extern rtx get_hard_reg_initial_val (machine_mode, unsigned int); + extern rtx has_hard_reg_initial_val (machine_mode, unsigned int); + + /* Called from gimple_expand_cfg. */ +-extern unsigned int emit_initial_value_sets (void); ++extern void emit_initial_value_sets (void); + + extern bool initial_value_entry (int i, rtx *, rtx *); + extern void instantiate_decl_rtl (rtx x); +-extern int aggregate_value_p (const_tree, const_tree); ++extern bool aggregate_value_p (const_tree, const_tree); + extern bool use_register_for_decl (const_tree); + extern gimple_seq gimplify_parameters (gimple_seq *); + extern void locate_and_pad_parm (machine_mode, tree, int, int, int, +@@ -698,9 +698,9 @@ extern void clobber_return_register (void); + extern void expand_function_end (void); + extern rtx get_arg_pointer_save_area (void); + extern void maybe_copy_prologue_epilogue_insn (rtx, rtx); +-extern int prologue_contains (const rtx_insn *); +-extern int epilogue_contains (const rtx_insn *); +-extern int prologue_epilogue_contains (const rtx_insn *); ++extern bool prologue_contains (const rtx_insn *); ++extern bool epilogue_contains (const rtx_insn *); ++extern bool prologue_epilogue_contains (const rtx_insn *); + extern void record_prologue_seq (rtx_insn *); + extern void record_epilogue_seq (rtx_insn *); + extern void emit_return_into_block (bool simple_p, basic_block bb); +-- +2.33.0 +
View file
_service:tar_scm:0147-LoongArch-Emit-R_LARCH_RELAX-for-TLS-IE-with-non-ext.patch
Added
@@ -0,0 +1,137 @@ +From 465f0653b6e7bf5adb5d1f6c9e8aff2b81a3f27f Mon Sep 17 00:00:00 2001 +From: Xi Ruoyao <xry111@xry111.site> +Date: Fri, 26 Jan 2024 18:28:32 +0800 +Subject: PATCH 147/188 LoongArch: Emit R_LARCH_RELAX for TLS IE with + non-extreme code model to allow the IE to LE linker relaxation + +In Binutils we need to make IE to LE relaxation only allowed when there +is an R_LARCH_RELAX after R_LARCH_TLE_IE_PC_{HI20,LO12} so an invalid +"partial" relaxation won't happen with the extreme code model. So if we +are emitting %ie_pc_{hi20,lo12} in a non-extreme code model, emit an +R_LARCH_RELAX to allow the relaxation. The IE to LE relaxation does not +require the pcalau12i and the ld instruction to be adjacent, so we don't +need to limit ourselves to use the macro. + +For the distro maintainers backporting changes: this change depends on +r14-8721, without r14-8721 R_LARCH_RELAX can be emitted mistakenly in +the extreme code model. + +gcc/ChangeLog: + + * config/loongarch/loongarch.cc (loongarch_print_operand_reloc): + Support 'Q' for R_LARCH_RELAX for TLS IE. + (loongarch_output_move): Use 'Q' to print R_LARCH_RELAX for TLS + IE. + * config/loongarch/loongarch.md (ld_from_got<mode>): Likewise. + +gcc/testsuite/ChangeLog: + + * gcc.target/loongarch/tls-ie-relax.c: New test. + * gcc.target/loongarch/tls-ie-norelax.c: New test. + * gcc.target/loongarch/tls-ie-extreme.c: New test. +--- + gcc/config/loongarch/loongarch.cc | 15 ++++++++++++++- + gcc/config/loongarch/loongarch.md | 2 +- + .../gcc.target/loongarch/tls-ie-extreme.c | 5 +++++ + .../gcc.target/loongarch/tls-ie-norelax.c | 5 +++++ + gcc/testsuite/gcc.target/loongarch/tls-ie-relax.c | 11 +++++++++++ + 5 files changed, 36 insertions(+), 2 deletions(-) + create mode 100644 gcc/testsuite/gcc.target/loongarch/tls-ie-extreme.c + create mode 100644 gcc/testsuite/gcc.target/loongarch/tls-ie-norelax.c + create mode 100644 gcc/testsuite/gcc.target/loongarch/tls-ie-relax.c + +diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc +index d23b09cc5..c1dc30b61 100644 +--- a/gcc/config/loongarch/loongarch.cc ++++ b/gcc/config/loongarch/loongarch.cc +@@ -4977,7 +4977,7 @@ loongarch_output_move (rtx dest, rtx src) + if (type == SYMBOL_TLS_LE) + return "lu12i.w\t%0,%h1"; + else +- return "pcalau12i\t%0,%h1"; ++ return "%Q1pcalau12i\t%0,%h1"; + } + + if (src_code == CONST_INT) +@@ -6141,6 +6141,7 @@ loongarch_print_operand_reloc (FILE *file, rtx op, bool hi64_part, + 'L' Print the low-part relocation associated with OP. + 'm' Print one less than CONST_INT OP in decimal. + 'N' Print the inverse of the integer branch condition for comparison OP. ++ 'Q' Print R_LARCH_RELAX for TLS IE. + 'r' Print address 12-31bit relocation associated with OP. + 'R' Print address 32-51bit relocation associated with OP. + 'T' Print 'f' for (eq:CC ...), 't' for (ne:CC ...), +@@ -6278,6 +6279,18 @@ loongarch_print_operand (FILE *file, rtx op, int letter) + letter); + break; + ++ case 'Q': ++ if (!TARGET_LINKER_RELAXATION) ++ break; ++ ++ if (code == HIGH) ++ op = XEXP (op, 0); ++ ++ if (loongarch_classify_symbolic_expression (op) == SYMBOL_TLS_IE) ++ fprintf (file, ".reloc\t.,R_LARCH_RELAX\n\t"); ++ ++ break; ++ + case 'r': + loongarch_print_operand_reloc (file, op, false /* hi64_part */, + true /* lo_reloc */); +diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md +index 248ad12bb..d2c7c3b05 100644 +--- a/gcc/config/loongarch/loongarch.md ++++ b/gcc/config/loongarch/loongarch.md +@@ -2620,7 +2620,7 @@ + (match_operand:P 2 "symbolic_operand"))) + UNSPEC_LOAD_FROM_GOT)) + "" +- "ld.<d>\t%0,%1,%L2" ++ "%Q2ld.<d>\t%0,%1,%L2" + (set_attr "type" "move") + ) + +diff --git a/gcc/testsuite/gcc.target/loongarch/tls-ie-extreme.c b/gcc/testsuite/gcc.target/loongarch/tls-ie-extreme.c +new file mode 100644 +index 000000000..00c545a3e +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/tls-ie-extreme.c +@@ -0,0 +1,5 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -march=loongarch64 -mabi=lp64d -mcmodel=extreme -mexplicit-relocs=auto -mrelax" } */ ++/* { dg-final { scan-assembler-not "R_LARCH_RELAX" { target tls_native } } } */ ++ ++#include "tls-ie-relax.c" +diff --git a/gcc/testsuite/gcc.target/loongarch/tls-ie-norelax.c b/gcc/testsuite/gcc.target/loongarch/tls-ie-norelax.c +new file mode 100644 +index 000000000..dd6bf3634 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/tls-ie-norelax.c +@@ -0,0 +1,5 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -mcmodel=normal -mexplicit-relocs -mno-relax" } */ ++/* { dg-final { scan-assembler-not "R_LARCH_RELAX" { target tls_native } } } */ ++ ++#include "tls-ie-relax.c" +diff --git a/gcc/testsuite/gcc.target/loongarch/tls-ie-relax.c b/gcc/testsuite/gcc.target/loongarch/tls-ie-relax.c +new file mode 100644 +index 000000000..e9f7569b1 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/tls-ie-relax.c +@@ -0,0 +1,11 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -mcmodel=normal -mexplicit-relocs -mrelax" } */ ++/* { dg-final { scan-assembler-times "R_LARCH_RELAX" 2 { target tls_native } } } */ ++ ++extern __thread int errno; ++ ++void ++unimplemented (void) ++{ ++ errno = -38; ++} +-- +2.43.0 +
View file
_service:tar_scm:0148-Backport-SME-Allow-prologues-and-epilogues-to-be-ins.patch
Added
@@ -0,0 +1,233 @@ +From 417d51e1ecf41b3ba3ddf24eaf1e07db5c1ded9e Mon Sep 17 00:00:00 2001 +From: Richard Sandiford <richard.sandiford@arm.com> +Date: Tue, 5 Dec 2023 09:28:46 +0000 +Subject: PATCH 049/157 BackportSME Allow prologues and epilogues to be + inserted later + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=e9d2ae6b9816e61a6148040149c63faa83f54702 + +Arm's SME adds a new processor mode called streaming mode. +This mode enables some new (matrix-oriented) instructions and +disables several existing groups of instructions, such as most +Advanced SIMD vector instructions and a much smaller set of SVE +instructions. It can also change the current vector length. + +There are instructions to switch in and out of streaming mode. +However, their effect on the ISA and vector length can't be represented +directly in RTL, so they need to be emitted late in the pass pipeline, +close to md_reorg. + +It's sometimes the responsibility of the prologue and epilogue to +switch modes, which means we need to emit the prologue and epilogue +sequences late as well. (This loses shrink-wrapping and scheduling +opportunities, but that's a price worth paying.) + +This patch therefore adds a target hook for forcing prologue +and epilogue insertion to happen later in the pipeline. + +gcc/ + * target.def (use_late_prologue_epilogue): New hook. + * doc/tm.texi.in: Add TARGET_USE_LATE_PROLOGUE_EPILOGUE. + * doc/tm.texi: Regenerate. + * passes.def (pass_late_thread_prologue_and_epilogue): New pass. + * tree-pass.h (make_pass_late_thread_prologue_and_epilogue): Declare. + * function.cc (pass_thread_prologue_and_epilogue::gate): New function. + (pass_data_late_thread_prologue_and_epilogue): New pass variable. + (pass_late_thread_prologue_and_epilogue): New pass class. + (make_pass_late_thread_prologue_and_epilogue): New function. +--- + gcc/doc/tm.texi | 19 ++++++++++++++++++ + gcc/doc/tm.texi.in | 2 ++ + gcc/function.cc | 50 ++++++++++++++++++++++++++++++++++++++++++++++ + gcc/passes.def | 3 +++ + gcc/target.def | 21 +++++++++++++++++++ + gcc/tree-pass.h | 2 ++ + 6 files changed, 97 insertions(+) + +diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi +index 5f0972356..d930d233d 100644 +--- a/gcc/doc/tm.texi ++++ b/gcc/doc/tm.texi +@@ -11684,6 +11684,25 @@ of the if-block in the @code{struct ce_if_block} structure that is pointed + to by @var{ce_info}. + @end defmac + ++@deftypefn {Target Hook} bool TARGET_USE_LATE_PROLOGUE_EPILOGUE () ++Return true if the current function's prologue and epilogue should ++be emitted late in the pass pipeline, instead of at the usual point. ++ ++Normally, the prologue and epilogue sequences are introduced soon after ++register allocation is complete. The advantage of this approach is that ++it allows the prologue and epilogue instructions to be optimized and ++scheduled with other code in the function. However, some targets ++require the prologue and epilogue to be the first and last sequences ++executed by the function, with no variation allowed. This hook should ++return true on such targets. ++ ++The default implementation returns false, which is correct for most ++targets. The hook should only return true if there is a specific ++target limitation that cannot be described in RTL. For example, ++the hook might return true if the prologue and epilogue need to switch ++between instruction sets. ++@end deftypefn ++ + @deftypefn {Target Hook} void TARGET_MACHINE_DEPENDENT_REORG (void) + If non-null, this hook performs a target-specific pass over the + instruction stream. The compiler will run it at all optimization levels, +diff --git a/gcc/doc/tm.texi.in b/gcc/doc/tm.texi.in +index fcab21744..19eabec48 100644 +--- a/gcc/doc/tm.texi.in ++++ b/gcc/doc/tm.texi.in +@@ -7708,6 +7708,8 @@ of the if-block in the @code{struct ce_if_block} structure that is pointed + to by @var{ce_info}. + @end defmac + ++@hook TARGET_USE_LATE_PROLOGUE_EPILOGUE ++ + @hook TARGET_MACHINE_DEPENDENT_REORG + + @hook TARGET_INIT_BUILTINS +diff --git a/gcc/function.cc b/gcc/function.cc +index fc8eb5812..7c90b5f23 100644 +--- a/gcc/function.cc ++++ b/gcc/function.cc +@@ -84,6 +84,7 @@ along with GCC; see the file COPYING3. If not see + #include "function-abi.h" + #include "value-range.h" + #include "gimple-range.h" ++#include "insn-attr.h" + + /* So we can assign to cfun in this file. */ + #undef cfun +@@ -6620,6 +6621,11 @@ public: + {} + + /* opt_pass methods: */ ++ bool gate (function *) final override ++ { ++ return !targetm.use_late_prologue_epilogue (); ++ } ++ + unsigned int execute (function * fun) final override + { + rest_of_handle_thread_prologue_and_epilogue (fun); +@@ -6628,6 +6634,44 @@ public: + + }; // class pass_thread_prologue_and_epilogue + ++const pass_data pass_data_late_thread_prologue_and_epilogue = ++{ ++ RTL_PASS, /* type */ ++ "late_pro_and_epilogue", /* name */ ++ OPTGROUP_NONE, /* optinfo_flags */ ++ TV_THREAD_PROLOGUE_AND_EPILOGUE, /* tv_id */ ++ 0, /* properties_required */ ++ 0, /* properties_provided */ ++ 0, /* properties_destroyed */ ++ 0, /* todo_flags_start */ ++ ( TODO_df_verify | TODO_df_finish ), /* todo_flags_finish */ ++}; ++ ++class pass_late_thread_prologue_and_epilogue : public rtl_opt_pass ++{ ++public: ++ pass_late_thread_prologue_and_epilogue (gcc::context *ctxt) ++ : rtl_opt_pass (pass_data_late_thread_prologue_and_epilogue, ctxt) ++ {} ++ ++ /* opt_pass methods: */ ++ bool gate (function *) final override ++ { ++ return targetm.use_late_prologue_epilogue (); ++ } ++ ++ unsigned int execute (function *fn) final override ++ { ++ /* It's not currently possible to have both delay slots and ++ late prologue/epilogue, since the latter has to run before ++ the former, and the former won't honor whatever restrictions ++ the latter is trying to enforce. */ ++ gcc_assert (!DELAY_SLOTS); ++ rest_of_handle_thread_prologue_and_epilogue (fn); ++ return 0; ++ } ++}; // class pass_late_thread_prologue_and_epilogue ++ + } // anon namespace + + rtl_opt_pass * +@@ -6636,6 +6680,12 @@ make_pass_thread_prologue_and_epilogue (gcc::context *ctxt) + return new pass_thread_prologue_and_epilogue (ctxt); + } + ++rtl_opt_pass * ++make_pass_late_thread_prologue_and_epilogue (gcc::context *ctxt) ++{ ++ return new pass_late_thread_prologue_and_epilogue (ctxt); ++} ++ + namespace { + + const pass_data pass_data_zero_call_used_regs = +diff --git a/gcc/passes.def b/gcc/passes.def +index cdc600298..8797f166f 100644 +--- a/gcc/passes.def ++++ b/gcc/passes.def +@@ -523,6 +523,9 @@ along with GCC; see the file COPYING3. If not see + NEXT_PASS (pass_stack_regs_run); + POP_INSERT_PASSES () + POP_INSERT_PASSES () ++ NEXT_PASS (pass_late_thread_prologue_and_epilogue); ++ /* No target-independent code motion is allowed beyond this point, ++ excepting the legacy delayed-branch pass. */ + NEXT_PASS (pass_late_compilation); + PUSH_INSERT_PASSES_WITHIN (pass_late_compilation) + NEXT_PASS (pass_zero_call_used_regs); +diff --git a/gcc/target.def b/gcc/target.def +index 4d77c1523..fd4899612 100644 +--- a/gcc/target.def ++++ b/gcc/target.def +@@ -4120,6 +4120,27 @@ returns @code{VOIDmode}.", + machine_mode, (machine_mode m1, machine_mode m2), + default_cc_modes_compatible) + ++DEFHOOK ++(use_late_prologue_epilogue, ++ "Return true if the current function's prologue and epilogue should\n\ ++be emitted late in the pass pipeline, instead of at the usual point.\n\ ++\n\ ++Normally, the prologue and epilogue sequences are introduced soon after\n\ ++register allocation is complete. The advantage of this approach is that\n\ ++it allows the prologue and epilogue instructions to be optimized and\n\ ++scheduled with other code in the function. However, some targets\n\ ++require the prologue and epilogue to be the first and last sequences\n\ ++executed by the function, with no variation allowed. This hook should\n\ ++return true on such targets.\n\ ++\n\ ++The default implementation returns false, which is correct for most\n\ ++targets. The hook should only return true if there is a specific\n\ ++target limitation that cannot be described in RTL. For example,\n\ ++the hook might return true if the prologue and epilogue need to switch\n\ ++between instruction sets.", ++ bool, (), ++ hook_bool_void_false) ++ + /* Do machine-dependent code transformations. Called just before + delayed-branch scheduling. */ + DEFHOOK +diff --git a/gcc/tree-pass.h b/gcc/tree-pass.h +index 34e60bc38..1c983ef71 100644 +--- a/gcc/tree-pass.h ++++ b/gcc/tree-pass.h +@@ -612,6 +612,8 @@ extern rtl_opt_pass *make_pass_gcse2 (gcc::context *ctxt); + extern rtl_opt_pass *make_pass_split_after_reload (gcc::context *ctxt); + extern rtl_opt_pass *make_pass_thread_prologue_and_epilogue (gcc::context + *ctxt); ++extern rtl_opt_pass *make_pass_late_thread_prologue_and_epilogue (gcc::context ++ *ctxt); + extern rtl_opt_pass *make_pass_zero_call_used_regs (gcc::context *ctxt); + extern rtl_opt_pass *make_pass_split_complex_instructions (gcc::context *ctxt); + extern rtl_opt_pass *make_pass_stack_adjustments (gcc::context *ctxt); +-- +2.33.0 +
View file
_service:tar_scm:0148-LoongArch-Remove-unused-and-incorrect-sge-u-_-X-mode.patch
Added
@@ -0,0 +1,57 @@ +From acc00029aab3cdd1186f1ed4a93db2205cdd166c Mon Sep 17 00:00:00 2001 +From: Xi Ruoyao <xry111@xry111.site> +Date: Wed, 13 Mar 2024 20:44:38 +0800 +Subject: PATCH 148/188 LoongArch: Remove unused and incorrect + "sge<u>_<X:mode><GPR:mode>" define_insn + +If this insn is really used, we'll have something like + + slti $r4,$r0,$r5 + +in the code. The assembler will reject it because slti wants 2 +register operands and 1 immediate operand. But we've not got any bug +report for this, indicating this define_insn is unused at all. + +Note that do_store_flag (in expr.cc) is already converting x >= 1 to +x > 0 unconditionally, so this define_insn is indeed unused and we can +just remove it. + +gcc/ChangeLog: + + * config/loongarch/loongarch.md (any_ge): Remove. + (sge<u>_<X:mode><GPR:mode>): Remove. +--- + gcc/config/loongarch/loongarch.md | 10 ---------- + 1 file changed, 10 deletions(-) + +diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md +index d2c7c3b05..1b3525dde 100644 +--- a/gcc/config/loongarch/loongarch.md ++++ b/gcc/config/loongarch/loongarch.md +@@ -517,7 +517,6 @@ + ;; These code iterators allow the signed and unsigned scc operations to use + ;; the same template. + (define_code_iterator any_gt gt gtu) +-(define_code_iterator any_ge ge geu) + (define_code_iterator any_lt lt ltu) + (define_code_iterator any_le le leu) + +@@ -3355,15 +3354,6 @@ + (set_attr "type" "slt") + (set_attr "mode" "<X:MODE>")) + +-(define_insn "*sge<u>_<X:mode><GPR:mode>" +- (set (match_operand:GPR 0 "register_operand" "=r") +- (any_ge:GPR (match_operand:X 1 "register_operand" "r") +- (const_int 1))) +- "" +- "slt<u>i\t%0,%.,%1" +- (set_attr "type" "slt") +- (set_attr "mode" "<X:MODE>")) +- + (define_insn "*slt<u>_<X:mode><GPR:mode>" + (set (match_operand:GPR 0 "register_operand" "=r") + (any_lt:GPR (match_operand:X 1 "register_operand" "r") +-- +2.43.0 +
View file
_service:tar_scm:0149-Backport-SME-Add-a-target-hook-for-sibcall-epilogues.patch
Added
@@ -0,0 +1,239 @@ +From e906213086639df81085a0101bf88fb66c1dbc2b Mon Sep 17 00:00:00 2001 +From: Richard Sandiford <richard.sandiford@arm.com> +Date: Tue, 5 Dec 2023 09:35:57 +0000 +Subject: PATCH 050/157 BackportSME Add a target hook for sibcall + epilogues + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=2e0aefa77157396acb48833407637303edba450a + +Epilogues for sibling calls are generated using the +sibcall_epilogue pattern. One disadvantage of this approach +is that the target doesn't know which call the epilogue is for, +even though the code that generates the pattern has the call +to hand. + +Although call instructions are currently rtxes, and so could be +passed as an operand to the pattern, the main point of introducing +rtx_insn was to move towards separating the rtx and insn types +(a good thing IMO). There also isn't an existing practice of +passing genuine instructions (as opposed to labels) to +instruction patterns. + +This patch therefore adds a hook that can be defined as an +alternative to sibcall_epilogue. The advantage is that it +can be passed the call; the disadvantage is that it can't +use .md conveniences like generating instructions from +textual patterns (although most epilogues are too complex +to benefit much from that anyway). + +gcc/ + * doc/tm.texi.in: Add TARGET_EMIT_EPILOGUE_FOR_SIBCALL. + * doc/tm.texi: Regenerate. + * target.def (emit_epilogue_for_sibcall): New hook. + * calls.cc (can_implement_as_sibling_call_p): Use it. + * function.cc (thread_prologue_and_epilogue_insns): Likewise. + (reposition_prologue_and_epilogue_notes): Likewise. + * config/aarch64/aarch64-protos.h (aarch64_expand_epilogue): Take + an rtx_call_insn * rather than a bool. + * config/aarch64/aarch64.cc (aarch64_expand_epilogue): Likewise. + (TARGET_EMIT_EPILOGUE_FOR_SIBCALL): Define. + * config/aarch64/aarch64.md (epilogue): Update call. + (sibcall_epilogue): Delete. +--- + gcc/calls.cc | 3 ++- + gcc/config/aarch64/aarch64-protos.h | 2 +- + gcc/config/aarch64/aarch64.cc | 11 +++++++---- + gcc/config/aarch64/aarch64.md | 11 +---------- + gcc/doc/tm.texi | 8 ++++++++ + gcc/doc/tm.texi.in | 2 ++ + gcc/function.cc | 15 +++++++++++++-- + gcc/target.def | 9 +++++++++ + 8 files changed, 43 insertions(+), 18 deletions(-) + +diff --git a/gcc/calls.cc b/gcc/calls.cc +index 4d0bc45be..c1db66883 100644 +--- a/gcc/calls.cc ++++ b/gcc/calls.cc +@@ -2461,7 +2461,8 @@ can_implement_as_sibling_call_p (tree exp, + tree addr, + const args_size &args_size) + { +- if (!targetm.have_sibcall_epilogue ()) ++ if (!targetm.have_sibcall_epilogue () ++ && !targetm.emit_epilogue_for_sibcall) + { + maybe_complain_about_tail_call + (exp, +diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h +index 86e444a60..97984f3ab 100644 +--- a/gcc/config/aarch64/aarch64-protos.h ++++ b/gcc/config/aarch64/aarch64-protos.h +@@ -887,7 +887,7 @@ const char * aarch64_gen_far_branch (rtx *, int, const char *, const char *); + const char * aarch64_output_probe_stack_range (rtx, rtx); + const char * aarch64_output_probe_sve_stack_clash (rtx, rtx, rtx, rtx); + void aarch64_err_no_fpadvsimd (machine_mode); +-void aarch64_expand_epilogue (bool); ++void aarch64_expand_epilogue (rtx_call_insn *); + rtx aarch64_ptrue_all (unsigned int); + opt_machine_mode aarch64_ptrue_all_mode (rtx); + rtx aarch64_convert_sve_data_to_pred (rtx, machine_mode, rtx); +diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc +index fd1114b52..055b436b1 100644 +--- a/gcc/config/aarch64/aarch64.cc ++++ b/gcc/config/aarch64/aarch64.cc +@@ -10046,7 +10046,7 @@ aarch64_use_return_insn_p (void) + from a deallocated stack, and we optimize the unwind records by + emitting them all together if possible. */ + void +-aarch64_expand_epilogue (bool for_sibcall) ++aarch64_expand_epilogue (rtx_call_insn *sibcall) + { + poly_int64 initial_adjust = cfun->machine->frame.initial_adjust; + HOST_WIDE_INT callee_adjust = cfun->machine->frame.callee_adjust; +@@ -10194,7 +10194,7 @@ aarch64_expand_epilogue (bool for_sibcall) + explicitly authenticate. + */ + if (aarch64_return_address_signing_enabled () +- && (for_sibcall || !TARGET_ARMV8_3)) ++ && (sibcall || !TARGET_ARMV8_3)) + { + switch (aarch64_ra_sign_key) + { +@@ -10212,7 +10212,7 @@ aarch64_expand_epilogue (bool for_sibcall) + } + + /* Stack adjustment for exception handler. */ +- if (crtl->calls_eh_return && !for_sibcall) ++ if (crtl->calls_eh_return && !sibcall) + { + /* We need to unwind the stack by the offset computed by + EH_RETURN_STACKADJ_RTX. We have already reset the CFA +@@ -10223,7 +10223,7 @@ aarch64_expand_epilogue (bool for_sibcall) + } + + emit_use (gen_rtx_REG (DImode, LR_REGNUM)); +- if (!for_sibcall) ++ if (!sibcall) + emit_jump_insn (ret_rtx); + } + +@@ -28246,6 +28246,9 @@ aarch64_libgcc_floating_mode_supported_p + #undef TARGET_HAVE_SHADOW_CALL_STACK + #define TARGET_HAVE_SHADOW_CALL_STACK true + ++#undef TARGET_EMIT_EPILOGUE_FOR_SIBCALL ++#define TARGET_EMIT_EPILOGUE_FOR_SIBCALL aarch64_expand_epilogue ++ + struct gcc_target targetm = TARGET_INITIALIZER; + + #include "gt-aarch64.h" +diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md +index 7267a74d6..a78476c8a 100644 +--- a/gcc/config/aarch64/aarch64.md ++++ b/gcc/config/aarch64/aarch64.md +@@ -871,16 +871,7 @@ + (clobber (const_int 0)) + "" + " +- aarch64_expand_epilogue (false); +- DONE; +- " +-) +- +-(define_expand "sibcall_epilogue" +- (clobber (const_int 0)) +- "" +- " +- aarch64_expand_epilogue (true); ++ aarch64_expand_epilogue (nullptr); + DONE; + " + ) +diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi +index d930d233d..369f4b8da 100644 +--- a/gcc/doc/tm.texi ++++ b/gcc/doc/tm.texi +@@ -11703,6 +11703,14 @@ the hook might return true if the prologue and epilogue need to switch + between instruction sets. + @end deftypefn + ++@deftypefn {Target Hook} void TARGET_EMIT_EPILOGUE_FOR_SIBCALL (rtx_call_insn *@var{call}) ++If defined, this hook emits an epilogue sequence for sibling (tail) ++call instruction @var{call}. Another way of providing epilogues ++for sibling calls is to define the @code{sibcall_epilogue} instruction ++pattern; the main advantage of this hook over the pattern is that it ++has access to the call instruction. ++@end deftypefn ++ + @deftypefn {Target Hook} void TARGET_MACHINE_DEPENDENT_REORG (void) + If non-null, this hook performs a target-specific pass over the + instruction stream. The compiler will run it at all optimization levels, +diff --git a/gcc/doc/tm.texi.in b/gcc/doc/tm.texi.in +index 19eabec48..748b0777a 100644 +--- a/gcc/doc/tm.texi.in ++++ b/gcc/doc/tm.texi.in +@@ -7710,6 +7710,8 @@ to by @var{ce_info}. + + @hook TARGET_USE_LATE_PROLOGUE_EPILOGUE + ++@hook TARGET_EMIT_EPILOGUE_FOR_SIBCALL ++ + @hook TARGET_MACHINE_DEPENDENT_REORG + + @hook TARGET_INIT_BUILTINS +diff --git a/gcc/function.cc b/gcc/function.cc +index 7c90b5f23..ddab43ca4 100644 +--- a/gcc/function.cc ++++ b/gcc/function.cc +@@ -6209,7 +6209,17 @@ thread_prologue_and_epilogue_insns (void) + if (!(CALL_P (insn) && SIBLING_CALL_P (insn))) + continue; + +- if (rtx_insn *ep_seq = targetm.gen_sibcall_epilogue ()) ++ rtx_insn *ep_seq; ++ if (targetm.emit_epilogue_for_sibcall) ++ { ++ start_sequence (); ++ targetm.emit_epilogue_for_sibcall (as_a<rtx_call_insn *> (insn)); ++ ep_seq = get_insns (); ++ end_sequence (); ++ } ++ else ++ ep_seq = targetm.gen_sibcall_epilogue (); ++ if (ep_seq) + { + start_sequence (); + emit_note (NOTE_INSN_EPILOGUE_BEG); +@@ -6259,7 +6269,8 @@ reposition_prologue_and_epilogue_notes (void) + { + if (!targetm.have_prologue () + && !targetm.have_epilogue () +- && !targetm.have_sibcall_epilogue ()) ++ && !targetm.have_sibcall_epilogue () ++ && !targetm.emit_epilogue_for_sibcall) + return; + + /* Since the hash table is created on demand, the fact that it is +diff --git a/gcc/target.def b/gcc/target.def +index fd4899612..cf9f96eba 100644 +--- a/gcc/target.def ++++ b/gcc/target.def +@@ -4141,6 +4141,15 @@ between instruction sets.", + bool, (), + hook_bool_void_false) + ++DEFHOOK ++(emit_epilogue_for_sibcall, ++ "If defined, this hook emits an epilogue sequence for sibling (tail)\n\ ++call instruction @var{call}. Another way of providing epilogues\n\ ++for sibling calls is to define the @code{sibcall_epilogue} instruction\n\ ++pattern; the main advantage of this hook over the pattern is that it\n\ ++has access to the call instruction.", ++ void, (rtx_call_insn *call), NULL) ++ + /* Do machine-dependent code transformations. Called just before + delayed-branch scheduling. */ + DEFHOOK +-- +2.33.0 +
View file
_service:tar_scm:0149-LoongArch-Remove-masking-process-for-operand-3-of-xv.patch
Added
@@ -0,0 +1,85 @@ +From 0dba1a1daef3f043235382f0e8f107313b9bde07 Mon Sep 17 00:00:00 2001 +From: Chenghui Pan <panchenghui@loongson.cn> +Date: Thu, 14 Mar 2024 09:26:54 +0800 +Subject: PATCH 149/188 LoongArch: Remove masking process for operand 3 of + xvpermi.q. + +The behavior of non-zero unused bits in xvpermi.q instruction's +third operand is undefined on LoongArch, according to our +discussion (https://github.com/llvm/llvm-project/pull/83540), +we think that keeping original insn operand as unmodified +state is better solution. + +This patch partially reverts 7b158e036a95b1ab40793dd53bed7dbd770ffdaf. + +gcc/ChangeLog: + + * config/loongarch/lasx.md (lasx_xvpermi_q_<LASX:mode>): + Remove masking of operand 3. + +gcc/testsuite/ChangeLog: + + * gcc.target/loongarch/vector/lasx/lasx-xvpermi_q.c: + Reposition operand 3's value into instruction's defined accept range. +--- + gcc/config/loongarch/lasx.md | 5 ----- + .../gcc.target/loongarch/vector/lasx/lasx-xvpermi_q.c | 6 +++--- + 2 files changed, 3 insertions(+), 8 deletions(-) + +diff --git a/gcc/config/loongarch/lasx.md b/gcc/config/loongarch/lasx.md +index 38f35bad6..f3b5ea373 100644 +--- a/gcc/config/loongarch/lasx.md ++++ b/gcc/config/loongarch/lasx.md +@@ -640,8 +640,6 @@ + (set_attr "mode" "<MODE>")) + + ;; xvpermi.q +-;; Unused bits in operands3 need be set to 0 to avoid +-;; causing undefined behavior on LA464. + (define_insn "lasx_xvpermi_q_<LASX:mode>" + (set (match_operand:LASX 0 "register_operand" "=f") + (unspec:LASX +@@ -651,9 +649,6 @@ + UNSPEC_LASX_XVPERMI_Q)) + "ISA_HAS_LASX" + { +- int mask = 0x33; +- mask &= INTVAL (operands3); +- operands3 = GEN_INT (mask); + return "xvpermi.q\t%u0,%u2,%3"; + } + (set_attr "type" "simd_splat") +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvpermi_q.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvpermi_q.c +index dbc29d2fb..f89dfc311 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvpermi_q.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvpermi_q.c +@@ -27,7 +27,7 @@ main () + *((unsigned long*)& __m256i_result2) = 0x7fff7fff7fff0000; + *((unsigned long*)& __m256i_result1) = 0x7fe37fe3001d001d; + *((unsigned long*)& __m256i_result0) = 0x7fff7fff7fff0000; +- __m256i_out = __lasx_xvpermi_q (__m256i_op0, __m256i_op1, 0x2a); ++ __m256i_out = __lasx_xvpermi_q (__m256i_op0, __m256i_op1, 0x22); + ASSERTEQ_64 (__LINE__, __m256i_result, __m256i_out); + + *((unsigned long*)& __m256i_op03) = 0x0000000000000000; +@@ -42,7 +42,7 @@ main () + *((unsigned long*)& __m256i_result2) = 0x000000000019001c; + *((unsigned long*)& __m256i_result1) = 0x0000000000000000; + *((unsigned long*)& __m256i_result0) = 0x00000000000001fe; +- __m256i_out = __lasx_xvpermi_q (__m256i_op0, __m256i_op1, 0xb9); ++ __m256i_out = __lasx_xvpermi_q (__m256i_op0, __m256i_op1, 0x31); + ASSERTEQ_64 (__LINE__, __m256i_result, __m256i_out); + + *((unsigned long*)& __m256i_op03) = 0x00ff00ff00ff00ff; +@@ -57,7 +57,7 @@ main () + *((unsigned long*)& __m256i_result2) = 0xffff0000ffff0000; + *((unsigned long*)& __m256i_result1) = 0x00ff00ff00ff00ff; + *((unsigned long*)& __m256i_result0) = 0x00ff00ff00ff00ff; +- __m256i_out = __lasx_xvpermi_q (__m256i_op0, __m256i_op1, 0xca); ++ __m256i_out = __lasx_xvpermi_q (__m256i_op0, __m256i_op1, 0x02); + ASSERTEQ_64 (__LINE__, __m256i_result, __m256i_out); + + return 0; +-- +2.43.0 +
View file
_service:tar_scm:0150-Backport-SME-Add-a-new-target-hook-TARGET_START_CALL.patch
Added
@@ -0,0 +1,461 @@ +From 58adede22d9ff2368b5c24ec3fc0e53bd3ddc8bd Mon Sep 17 00:00:00 2001 +From: Richard Sandiford <richard.sandiford@arm.com> +Date: Tue, 5 Dec 2023 09:44:52 +0000 +Subject: PATCH 051/157 BackportSME Add a new target hook: + TARGET_START_CALL_ARGS + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=672fad57c1f99ff893019e2da4620e26b9b31dd2 + +We have the following two hooks into the call expansion code: + +- TARGET_CALL_ARGS is called for each argument before arguments + are moved into hard registers. + +- TARGET_END_CALL_ARGS is called after the end of the call + sequence (specifically, after any return value has been + moved to a pseudo). + +This patch adds a TARGET_START_CALL_ARGS hook that is called before +the TARGET_CALL_ARGS sequence. This means that TARGET_START_CALL_REGS +and TARGET_END_CALL_REGS bracket the region in which argument registers +might be live. They also bracket a region in which the only call +emiitted by target-independent code is the call to the target function +itself. (For example, TARGET_START_CALL_ARGS happens after any use of +memcpy to copy arguments, and TARGET_END_CALL_ARGS happens before any +use of memcpy to copy the result.) + +Also, the patch adds the cumulative argument structure as an argument +to the hooks, so that the target can use it to record and retrieve +information about the call as a whole. + +The TARGET_CALL_ARGS docs said: + + While generating RTL for a function call, this target hook is invoked once + for each argument passed to the function, either a register returned by + ``TARGET_FUNCTION_ARG`` or a memory location. It is called just +- before the point where argument registers are stored. + +The last bit was true for normal calls, but for libcalls the hook was +invoked earlier, before stack arguments have been copied. I don't think +this caused a practical difference for nvptx (the only port to use the +hooks) since I wouldn't expect any libcalls to take stack parameters. + +gcc/ + * doc/tm.texi.in: Add TARGET_START_CALL_ARGS. + * doc/tm.texi: Regenerate. + * target.def (start_call_args): New hook. + (call_args, end_call_args): Add a parameter for the cumulative + argument information. + * hooks.h (hook_void_rtx_tree): Delete. + * hooks.cc (hook_void_rtx_tree): Likewise. + * targhooks.h (hook_void_CUMULATIVE_ARGS): Declare. + (hook_void_CUMULATIVE_ARGS_rtx_tree): Likewise. + * targhooks.cc (hook_void_CUMULATIVE_ARGS): New function. + (hook_void_CUMULATIVE_ARGS_rtx_tree): Likewise. + * calls.cc (expand_call): Call start_call_args before computing + and storing stack parameters. Pass the cumulative argument + information to call_args and end_call_args. + (emit_library_call_value_1): Likewise. + * config/nvptx/nvptx.cc (nvptx_call_args): Add a cumulative + argument parameter. + (nvptx_end_call_args): Likewise. +--- + gcc/calls.cc | 61 +++++++++++++++++++++------------------ + gcc/config/nvptx/nvptx.cc | 4 +-- + gcc/doc/tm.texi | 53 +++++++++++++++++++++++++++------- + gcc/doc/tm.texi.in | 2 ++ + gcc/hooks.cc | 5 ---- + gcc/hooks.h | 1 - + gcc/target.def | 59 +++++++++++++++++++++++++++++-------- + gcc/targhooks.cc | 10 +++++++ + gcc/targhooks.h | 5 ++-- + 9 files changed, 140 insertions(+), 60 deletions(-) + +diff --git a/gcc/calls.cc b/gcc/calls.cc +index c1db66883..4a8535cc6 100644 +--- a/gcc/calls.cc ++++ b/gcc/calls.cc +@@ -3507,15 +3507,26 @@ expand_call (tree exp, rtx target, int ignore) + sibcall_failure = 1; + } + ++ /* Set up the next argument register. For sibling calls on machines ++ with register windows this should be the incoming register. */ ++ if (pass == 0) ++ next_arg_reg = targetm.calls.function_incoming_arg ++ (args_so_far, function_arg_info::end_marker ()); ++ else ++ next_arg_reg = targetm.calls.function_arg ++ (args_so_far, function_arg_info::end_marker ()); ++ ++ targetm.calls.start_call_args (args_so_far); ++ + bool any_regs = false; + for (i = 0; i < num_actuals; i++) + if (argsi.reg != NULL_RTX) + { + any_regs = true; +- targetm.calls.call_args (argsi.reg, funtype); ++ targetm.calls.call_args (args_so_far, argsi.reg, funtype); + } + if (!any_regs) +- targetm.calls.call_args (pc_rtx, funtype); ++ targetm.calls.call_args (args_so_far, pc_rtx, funtype); + + /* Figure out the register where the value, if any, will come back. */ + valreg = 0; +@@ -3578,15 +3589,6 @@ expand_call (tree exp, rtx target, int ignore) + later safely search backwards to find the CALL_INSN. */ + before_call = get_last_insn (); + +- /* Set up next argument register. For sibling calls on machines +- with register windows this should be the incoming register. */ +- if (pass == 0) +- next_arg_reg = targetm.calls.function_incoming_arg +- (args_so_far, function_arg_info::end_marker ()); +- else +- next_arg_reg = targetm.calls.function_arg +- (args_so_far, function_arg_info::end_marker ()); +- + if (pass == 1 && (return_flags & ERF_RETURNS_ARG)) + { + int arg_nr = return_flags & ERF_RETURN_ARG_MASK; +@@ -3879,7 +3881,7 @@ expand_call (tree exp, rtx target, int ignore) + for (i = 0; i < num_actuals; ++i) + free (argsi.aligned_regs); + +- targetm.calls.end_call_args (); ++ targetm.calls.end_call_args (args_so_far); + + insns = get_insns (); + end_sequence (); +@@ -4437,17 +4439,9 @@ emit_library_call_value_1 (int retval, rtx orgfun, rtx value, + } + #endif + +- /* When expanding a normal call, args are stored in push order, +- which is the reverse of what we have here. */ +- bool any_regs = false; +- for (int i = nargs; i-- > 0; ) +- if (argveci.reg != NULL_RTX) +- { +- targetm.calls.call_args (argveci.reg, NULL_TREE); +- any_regs = true; +- } +- if (!any_regs) +- targetm.calls.call_args (pc_rtx, NULL_TREE); ++ rtx call_cookie ++ = targetm.calls.function_arg (args_so_far, ++ function_arg_info::end_marker ()); + + /* Push the args that need to be pushed. */ + +@@ -4565,6 +4559,20 @@ emit_library_call_value_1 (int retval, rtx orgfun, rtx value, + + fun = prepare_call_address (NULL, fun, NULL, &call_fusage, 0, 0); + ++ targetm.calls.start_call_args (args_so_far); ++ ++ /* When expanding a normal call, args are stored in push order, ++ which is the reverse of what we have here. */ ++ bool any_regs = false; ++ for (int i = nargs; i-- > 0; ) ++ if (argveci.reg != NULL_RTX) ++ { ++ targetm.calls.call_args (args_so_far, argveci.reg, NULL_TREE); ++ any_regs = true; ++ } ++ if (!any_regs) ++ targetm.calls.call_args (args_so_far, pc_rtx, NULL_TREE); ++ + /* Now load any reg parms into their regs. */ + + /* ARGNUM indexes the ARGVEC array in the order in which the arguments +@@ -4671,10 +4679,7 @@ emit_library_call_value_1 (int retval, rtx orgfun, rtx value, + get_identifier (XSTR (orgfun, 0)), + build_function_type (tfom, NULL_TREE), + original_args_size.constant, args_size.constant, +- struct_value_size, +- targetm.calls.function_arg (args_so_far, +- function_arg_info::end_marker ()), +- valreg, ++ struct_value_size, call_cookie, valreg, + old_inhibit_defer_pop + 1, call_fusage, flags, args_so_far); + + if (flag_ipa_ra) +@@ -4694,7 +4699,7 @@ emit_library_call_value_1 (int retval, rtx orgfun, rtx value, + valreg = gen_rtx_REG (TYPE_MODE (tfom), REGNO (valreg)); + } + +- targetm.calls.end_call_args (); ++ targetm.calls.end_call_args (args_so_far); + + /* For calls to `setjmp', etc., inform function.cc:setjmp_warnings + that it should complain if nonvolatile values are live. For +diff --git a/gcc/config/nvptx/nvptx.cc b/gcc/config/nvptx/nvptx.cc +index 3634a49de..7f2103ba6 100644 +--- a/gcc/config/nvptx/nvptx.cc ++++ b/gcc/config/nvptx/nvptx.cc +@@ -1780,7 +1780,7 @@ nvptx_get_drap_rtx (void) + argument to the next call. */ + + static void +-nvptx_call_args (rtx arg, tree fntype) ++nvptx_call_args (cumulative_args_t, rtx arg, tree fntype) + { + if (!cfun->machine->doing_call) + { +@@ -1808,7 +1808,7 @@ nvptx_call_args (rtx arg, tree fntype) + information we recorded. */ + + static void +-nvptx_end_call_args (void) ++nvptx_end_call_args (cumulative_args_t) + { + cfun->machine->doing_call = false; + free_EXPR_LIST_list (&cfun->machine->call_args); +diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi +index 369f4b8da..357c29a4d 100644 +--- a/gcc/doc/tm.texi ++++ b/gcc/doc/tm.texi +@@ -5392,26 +5392,59 @@ except the last are treated as named. + You need not define this hook if it always returns @code{false}. + @end deftypefn + +-@deftypefn {Target Hook} void TARGET_CALL_ARGS (rtx, @var{tree}) ++@deftypefn {Target Hook} void TARGET_START_CALL_ARGS (cumulative_args_t @var{complete_args}) ++This target hook is invoked while generating RTL for a function call, ++after the argument values have been computed, and after stack arguments ++have been initialized, but before register arguments have been moved into ++their ABI-defined hard register locations. It precedes calls to the related ++hooks @code{TARGET_CALL_ARGS} and @code{TARGET_END_CALL_ARGS}. ++The significance of this position in the call expansion is that: ++ ++@itemize @bullet ++@item ++No argument registers are live. ++@item ++Although a call sequence can in general involve subcalls (such as using ++@code{memcpy} to copy large arguments), no such subcall will occur between ++the call to this hook and the generation of the main call instruction. ++@end itemize ++ ++The single argument @var{complete_args} is the state of the target ++function's cumulative argument information after the final call to ++@code{TARGET_FUNCTION_ARG}. ++ ++The hook can be used for things like switching processor mode, in cases ++where different calls need different processor modes. Most ports do not ++need to implement anything for this hook. ++@end deftypefn ++ ++@deftypefn {Target Hook} void TARGET_CALL_ARGS (cumulative_args_t @var{complete_args}, rtx @var{loc}, tree @var{type}) + While generating RTL for a function call, this target hook is invoked once + for each argument passed to the function, either a register returned by + @code{TARGET_FUNCTION_ARG} or a memory location. It is called just +-before the point where argument registers are stored. The type of the +-function to be called is also passed as the second argument; it is +-@code{NULL_TREE} for libcalls. The @code{TARGET_END_CALL_ARGS} hook is +-invoked just after the code to copy the return reg has been emitted. +-This functionality can be used to perform special setup of call argument +-registers if a target needs it. ++before the point where argument registers are stored. ++ ++@var{complete_args} is the state of the target function's cumulative ++argument information after the final call to @code{TARGET_FUNCTION_ARG}. ++@var{loc} is the location of the argument. @var{type} is the type of ++the function being called, or @code{NULL_TREE} for libcalls. ++ + For functions without arguments, the hook is called once with @code{pc_rtx} + passed instead of an argument register. +-Most ports do not need to implement anything for this hook. ++ ++This functionality can be used to perform special setup of call argument ++registers, if a target needs it. Most ports do not need to implement ++anything for this hook. + @end deftypefn + +-@deftypefn {Target Hook} void TARGET_END_CALL_ARGS (void) ++@deftypefn {Target Hook} void TARGET_END_CALL_ARGS (cumulative_args_t @var{complete_args}) + This target hook is invoked while generating RTL for a function call, + just after the point where the return reg is copied into a pseudo. It + signals that all the call argument and return registers for the just +-emitted call are now no longer in use. ++emitted call are now no longer in use. @var{complete_args} is the ++state of the target function's cumulative argument information after ++the final call to @code{TARGET_FUNCTION_ARG}. ++ + Most ports do not need to implement anything for this hook. + @end deftypefn + +diff --git a/gcc/doc/tm.texi.in b/gcc/doc/tm.texi.in +index 748b0777a..4ebc9afbf 100644 +--- a/gcc/doc/tm.texi.in ++++ b/gcc/doc/tm.texi.in +@@ -3774,6 +3774,8 @@ These machine description macros help implement varargs: + + @hook TARGET_STRICT_ARGUMENT_NAMING + ++@hook TARGET_START_CALL_ARGS ++ + @hook TARGET_CALL_ARGS + + @hook TARGET_END_CALL_ARGS +diff --git a/gcc/hooks.cc b/gcc/hooks.cc +index b29233f4f..0f4e7ce10 100644 +--- a/gcc/hooks.cc ++++ b/gcc/hooks.cc +@@ -280,11 +280,6 @@ hook_void_FILEptr_tree (FILE *, tree) + { + } + +-void +-hook_void_rtx_tree (rtx, tree) +-{ +-} +- + void + hook_void_constcharptr (const char *) + { +diff --git a/gcc/hooks.h b/gcc/hooks.h +index 1056e1e9e..e2a742f43 100644 +--- a/gcc/hooks.h ++++ b/gcc/hooks.h +@@ -83,7 +83,6 @@ extern void hook_void_FILEptr_constcharptr (FILE *, const char *); + extern void hook_void_FILEptr_constcharptr_const_tree (FILE *, const char *, + const_tree); + extern bool hook_bool_FILEptr_rtx_false (FILE *, rtx); +-extern void hook_void_rtx_tree (rtx, tree); + extern void hook_void_FILEptr_tree (FILE *, tree); + extern void hook_void_tree (tree); + extern void hook_void_tree_treeptr (tree, tree *); +diff --git a/gcc/target.def b/gcc/target.def +index cf9f96eba..a57e51b0d 100644 +--- a/gcc/target.def ++++ b/gcc/target.def +@@ -4784,32 +4784,67 @@ not generate any instructions in this case.", + int *pretend_args_size, int second_time), + default_setup_incoming_varargs) + ++DEFHOOK ++(start_call_args, ++ "This target hook is invoked while generating RTL for a function call,\n\ ++after the argument values have been computed, and after stack arguments\n\ ++have been initialized, but before register arguments have been moved into\n\ ++their ABI-defined hard register locations. It precedes calls to the related\n\ ++hooks @code{TARGET_CALL_ARGS} and @code{TARGET_END_CALL_ARGS}.\n\ ++The significance of this position in the call expansion is that:\n\ ++\n\ ++@itemize @bullet\n\ ++@item\n\ ++No argument registers are live.\n\ ++@item\n\ ++Although a call sequence can in general involve subcalls (such as using\n\ ++@code{memcpy} to copy large arguments), no such subcall will occur between\n\ ++the call to this hook and the generation of the main call instruction.\n\ ++@end itemize\n\ ++\n\ ++The single argument @var{complete_args} is the state of the target\n\ ++function's cumulative argument information after the final call to\n\ ++@code{TARGET_FUNCTION_ARG}.\n\ ++\n\ ++The hook can be used for things like switching processor mode, in cases\n\ ++where different calls need different processor modes. Most ports do not\n\ ++need to implement anything for this hook.", ++ void, (cumulative_args_t complete_args), ++ hook_void_CUMULATIVE_ARGS) ++ + DEFHOOK + (call_args, + "While generating RTL for a function call, this target hook is invoked once\n\ + for each argument passed to the function, either a register returned by\n\ + @code{TARGET_FUNCTION_ARG} or a memory location. It is called just\n\ +-before the point where argument registers are stored. The type of the\n\ +-function to be called is also passed as the second argument; it is\n\ +-@code{NULL_TREE} for libcalls. The @code{TARGET_END_CALL_ARGS} hook is\n\ +-invoked just after the code to copy the return reg has been emitted.\n\ +-This functionality can be used to perform special setup of call argument\n\ +-registers if a target needs it.\n\ ++before the point where argument registers are stored.\n\ ++\n\ ++@var{complete_args} is the state of the target function's cumulative\n\ ++argument information after the final call to @code{TARGET_FUNCTION_ARG}.\n\ ++@var{loc} is the location of the argument. @var{type} is the type of\n\ ++the function being called, or @code{NULL_TREE} for libcalls.\n\ ++\n\ + For functions without arguments, the hook is called once with @code{pc_rtx}\n\ + passed instead of an argument register.\n\ +-Most ports do not need to implement anything for this hook.", +- void, (rtx, tree), +- hook_void_rtx_tree) ++\n\ ++This functionality can be used to perform special setup of call argument\n\ ++registers, if a target needs it. Most ports do not need to implement\n\ ++anything for this hook.", ++ void, (cumulative_args_t complete_args, rtx loc, tree type), ++ hook_void_CUMULATIVE_ARGS_rtx_tree) + + DEFHOOK + (end_call_args, + "This target hook is invoked while generating RTL for a function call,\n\ + just after the point where the return reg is copied into a pseudo. It\n\ + signals that all the call argument and return registers for the just\n\ +-emitted call are now no longer in use.\n\ ++emitted call are now no longer in use. @var{complete_args} is the\n\ ++state of the target function's cumulative argument information after\n\ ++the final call to @code{TARGET_FUNCTION_ARG}.\n\ ++\n\ + Most ports do not need to implement anything for this hook.", +- void, (void), +- hook_void_void) ++ void, (cumulative_args_t complete_args), ++ hook_void_CUMULATIVE_ARGS) + + DEFHOOK + (push_argument, +diff --git a/gcc/targhooks.cc b/gcc/targhooks.cc +index 399d6f874..c88afa5db 100644 +--- a/gcc/targhooks.cc ++++ b/gcc/targhooks.cc +@@ -772,12 +772,22 @@ hook_int_CUMULATIVE_ARGS_arg_info_0 (cumulative_args_t, + return 0; + } + ++void ++hook_void_CUMULATIVE_ARGS (cumulative_args_t) ++{ ++} ++ + void + hook_void_CUMULATIVE_ARGS_tree (cumulative_args_t ca ATTRIBUTE_UNUSED, + tree ATTRIBUTE_UNUSED) + { + } + ++void ++hook_void_CUMULATIVE_ARGS_rtx_tree (cumulative_args_t, rtx, tree) ++{ ++} ++ + /* Default implementation of TARGET_PUSH_ARGUMENT. */ + + bool +diff --git a/gcc/targhooks.h b/gcc/targhooks.h +index ecce55ebe..c6e12fc2e 100644 +--- a/gcc/targhooks.h ++++ b/gcc/targhooks.h +@@ -138,8 +138,9 @@ extern bool hook_bool_CUMULATIVE_ARGS_arg_info_true + (cumulative_args_t, const function_arg_info &); + extern int hook_int_CUMULATIVE_ARGS_arg_info_0 + (cumulative_args_t, const function_arg_info &); +-extern void hook_void_CUMULATIVE_ARGS_tree +- (cumulative_args_t, tree); ++extern void hook_void_CUMULATIVE_ARGS (cumulative_args_t); ++extern void hook_void_CUMULATIVE_ARGS_tree (cumulative_args_t, tree); ++extern void hook_void_CUMULATIVE_ARGS_rtx_tree (cumulative_args_t, rtx, tree); + extern const char *hook_invalid_arg_for_unprototyped_fn + (const_tree, const_tree, const_tree); + extern void default_function_arg_advance +-- +2.33.0 +
View file
_service:tar_scm:0150-LoongArch-Fix-C23-.-functions-returning-large-aggreg.patch
Added
@@ -0,0 +1,48 @@ +From 3ed698858f0ebb12a99ed1cc12c038b533f64b2c Mon Sep 17 00:00:00 2001 +From: Xi Ruoyao <xry111@xry111.site> +Date: Fri, 25 Oct 2024 06:15:21 +0000 +Subject: PATCH 150/188 LoongArch: Fix C23 (...) functions returning large + aggregates PR114175 + +We were assuming TYPE_NO_NAMED_ARGS_STDARG_P don't have any named +arguments and there is nothing to advance, but that is not the case +for (...) functions returning by hidden reference which have one such +artificial argument. This is causing gcc.dg/c23-stdarg-6.c and +gcc.dg/c23-stdarg-8.c to fail. + +Fix the issue by checking if arg.type is NULL, as r14-9503 explains. + +gcc/ChangeLog: + + PR target/114175 + * config/loongarch/loongarch.cc + (loongarch_setup_incoming_varargs): Only skip + loongarch_function_arg_advance for TYPE_NO_NAMED_ARGS_STDARG_P + functions if arg.type is NULL. +--- + gcc/config/loongarch/loongarch.cc | 9 ++++++++- + 1 file changed, 8 insertions(+), 1 deletion(-) + +diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc +index c1dc30b61..1e3981e19 100644 +--- a/gcc/config/loongarch/loongarch.cc ++++ b/gcc/config/loongarch/loongarch.cc +@@ -767,7 +767,14 @@ loongarch_setup_incoming_varargs (cumulative_args_t cum, + argument. Advance a local copy of CUM past the last "real" named + argument, to find out how many registers are left over. */ + local_cum = *get_cumulative_args (cum); +- loongarch_function_arg_advance (pack_cumulative_args (&local_cum), arg); ++ ++ /* For a C23 variadic function w/o any named argument, and w/o an ++ artifical argument for large return value, skip advancing args. ++ There is such an artifical argument iff. arg.type is non-NULL ++ (PR 114175). */ ++ if (!TYPE_NO_NAMED_ARGS_STDARG_P (TREE_TYPE (current_function_decl)) ++ || arg.type != NULL_TREE) ++ loongarch_function_arg_advance (pack_cumulative_args (&local_cum), arg); + + /* Found out how many registers we need to save. */ + gp_saved = MAX_ARGS_IN_REGISTERS - local_cum.num_gprs; +-- +2.43.0 +
View file
_service:tar_scm:0151-Backport-SME-Allow-targets-to-add-USEs-to-asms.patch
Added
@@ -0,0 +1,490 @@ +From 8684458c3faf358e5a15dfb73b4ef632341ddf0a Mon Sep 17 00:00:00 2001 +From: Richard Sandiford <richard.sandiford@arm.com> +Date: Tue, 5 Dec 2023 09:52:41 +0000 +Subject: PATCH 052/157 BackportSME Allow targets to add USEs to asms + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=414d795d8a365b6e72a84257caa36cb3bed7e0ba + +Arm's SME has an array called ZA that for inline asm purposes +is effectively a form of special-purpose memory. It doesn't +have an associated storage type and so can't be passed and +returned in normal C/C++ objects. + +We'd therefore like "za" in a clobber list to mean that an inline +asm can read from and write to ZA. (Just reading or writing +individually is unlikely to be useful, but we could add syntax +for that too if necessary.) + +There is currently a TARGET_MD_ASM_ADJUST target hook that allows +targets to add clobbers to an asm instruction. This patch +extends that to allow targets to add USEs as well. + +gcc/ + * target.def (md_asm_adjust): Add a uses parameter. + * doc/tm.texi: Regenerate. + * cfgexpand.cc (expand_asm_loc): Update call to md_asm_adjust. + Handle any USEs created by the target. + (expand_asm_stmt): Likewise. + * recog.cc (asm_noperands): Handle asms with USEs. + (decode_asm_operands): Likewise. + * config/arm/aarch-common-protos.h (arm_md_asm_adjust): Add uses + parameter. + * config/arm/aarch-common.cc (arm_md_asm_adjust): Likewise. + * config/arm/arm.cc (thumb1_md_asm_adjust): Likewise. + * config/avr/avr.cc (avr_md_asm_adjust): Likewise. + * config/cris/cris.cc (cris_md_asm_adjust): Likewise. + * config/i386/i386.cc (ix86_md_asm_adjust): Likewise. + * config/mn10300/mn10300.cc (mn10300_md_asm_adjust): Likewise. + * config/nds32/nds32.cc (nds32_md_asm_adjust): Likewise. + * config/pdp11/pdp11.cc (pdp11_md_asm_adjust): Likewise. + * config/rs6000/rs6000.cc (rs6000_md_asm_adjust): Likewise. + * config/s390/s390.cc (s390_md_asm_adjust): Likewise. + * config/vax/vax.cc (vax_md_asm_adjust): Likewise. + * config/visium/visium.cc (visium_md_asm_adjust): Likewise. +--- + gcc/cfgexpand.cc | 37 +++++++++++++++++++--------- + gcc/config/arm/aarch-common-protos.h | 2 +- + gcc/config/arm/aarch-common.cc | 3 ++- + gcc/config/arm/arm.cc | 5 ++-- + gcc/config/avr/avr.cc | 1 + + gcc/config/cris/cris.cc | 6 +++-- + gcc/config/i386/i386.cc | 5 ++-- + gcc/config/mn10300/mn10300.cc | 3 ++- + gcc/config/nds32/nds32.cc | 4 +-- + gcc/config/pdp11/pdp11.cc | 6 +++-- + gcc/config/rs6000/rs6000.cc | 3 ++- + gcc/config/s390/s390.cc | 3 ++- + gcc/config/vax/vax.cc | 4 ++- + gcc/config/visium/visium.cc | 5 ++-- + gcc/doc/tm.texi | 5 ++-- + gcc/recog.cc | 20 ++++++++++----- + gcc/target.def | 5 ++-- + 17 files changed, 77 insertions(+), 40 deletions(-) + +diff --git a/gcc/cfgexpand.cc b/gcc/cfgexpand.cc +index 4691355aa..5401a4ebd 100644 +--- a/gcc/cfgexpand.cc ++++ b/gcc/cfgexpand.cc +@@ -2873,6 +2873,7 @@ expand_asm_loc (tree string, int vol, location_t locus) + auto_vec<rtx> input_rvec, output_rvec; + auto_vec<machine_mode> input_mode; + auto_vec<const char *> constraints; ++ auto_vec<rtx> use_rvec; + auto_vec<rtx> clobber_rvec; + HARD_REG_SET clobbered_regs; + CLEAR_HARD_REG_SET (clobbered_regs); +@@ -2882,16 +2883,20 @@ expand_asm_loc (tree string, int vol, location_t locus) + + if (targetm.md_asm_adjust) + targetm.md_asm_adjust (output_rvec, input_rvec, input_mode, +- constraints, clobber_rvec, clobbered_regs, +- locus); ++ constraints, use_rvec, clobber_rvec, ++ clobbered_regs, locus); + + asm_op = body; + nclobbers = clobber_rvec.length (); +- body = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (1 + nclobbers)); ++ auto nuses = use_rvec.length (); ++ body = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (1 + nuses + nclobbers)); + +- XVECEXP (body, 0, 0) = asm_op; +- for (i = 0; i < nclobbers; i++) +- XVECEXP (body, 0, i + 1) = gen_rtx_CLOBBER (VOIDmode, clobber_rveci); ++ i = 0; ++ XVECEXP (body, 0, i++) = asm_op; ++ for (rtx use : use_rvec) ++ XVECEXP (body, 0, i++) = gen_rtx_USE (VOIDmode, use); ++ for (rtx clobber : clobber_rvec) ++ XVECEXP (body, 0, i++) = gen_rtx_CLOBBER (VOIDmode, clobber); + } + + emit_insn (body); +@@ -3443,11 +3448,12 @@ expand_asm_stmt (gasm *stmt) + maintaining source-level compatibility means automatically clobbering + the flags register. */ + rtx_insn *after_md_seq = NULL; ++ auto_vec<rtx> use_rvec; + if (targetm.md_asm_adjust) + after_md_seq + = targetm.md_asm_adjust (output_rvec, input_rvec, input_mode, +- constraints, clobber_rvec, clobbered_regs, +- locus); ++ constraints, use_rvec, clobber_rvec, ++ clobbered_regs, locus); + + /* Do not allow the hook to change the output and input count, + lest it mess up the operand numbering. */ +@@ -3455,7 +3461,8 @@ expand_asm_stmt (gasm *stmt) + gcc_assert (input_rvec.length() == ninputs); + gcc_assert (constraints.length() == noutputs + ninputs); + +- /* But it certainly can adjust the clobbers. */ ++ /* But it certainly can adjust the uses and clobbers. */ ++ unsigned nuses = use_rvec.length (); + unsigned nclobbers = clobber_rvec.length (); + + /* Third pass checks for easy conflicts. */ +@@ -3527,7 +3534,7 @@ expand_asm_stmt (gasm *stmt) + ARGVEC CONSTRAINTS OPNAMES)) + If there is more than one, put them inside a PARALLEL. */ + +- if (noutputs == 0 && nclobbers == 0) ++ if (noutputs == 0 && nuses == 0 && nclobbers == 0) + { + /* No output operands: put in a raw ASM_OPERANDS rtx. */ + if (nlabels > 0) +@@ -3535,7 +3542,7 @@ expand_asm_stmt (gasm *stmt) + else + emit_insn (body); + } +- else if (noutputs == 1 && nclobbers == 0) ++ else if (noutputs == 1 && nuses == 0 && nclobbers == 0) + { + ASM_OPERANDS_OUTPUT_CONSTRAINT (body) = constraints0; + if (nlabels > 0) +@@ -3551,7 +3558,8 @@ expand_asm_stmt (gasm *stmt) + if (num == 0) + num = 1; + +- body = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num + nclobbers)); ++ body = gen_rtx_PARALLEL (VOIDmode, ++ rtvec_alloc (num + nuses + nclobbers)); + + /* For each output operand, store a SET. */ + for (i = 0; i < noutputs; ++i) +@@ -3578,6 +3586,11 @@ expand_asm_stmt (gasm *stmt) + if (i == 0) + XVECEXP (body, 0, i++) = obody; + ++ /* Add the uses specified by the target hook. No checking should ++ be needed since this doesn't come directly from user code. */ ++ for (rtx use : use_rvec) ++ XVECEXP (body, 0, i++) = gen_rtx_USE (VOIDmode, use); ++ + /* Store (clobber REG) for each clobbered register specified. */ + for (unsigned j = 0; j < nclobbers; ++j) + { +diff --git a/gcc/config/arm/aarch-common-protos.h b/gcc/config/arm/aarch-common-protos.h +index ae0465159..3b525c174 100644 +--- a/gcc/config/arm/aarch-common-protos.h ++++ b/gcc/config/arm/aarch-common-protos.h +@@ -149,7 +149,7 @@ struct cpu_cost_table + + rtx_insn *arm_md_asm_adjust (vec<rtx> &outputs, vec<rtx> & /*inputs*/, + vec<machine_mode> & /*input_modes*/, +- vec<const char *> &constraints, ++ vec<const char *> &constraints, vec<rtx> &, + vec<rtx> &clobbers, HARD_REG_SET &clobbered_regs, + location_t loc); + +diff --git a/gcc/config/arm/aarch-common.cc b/gcc/config/arm/aarch-common.cc +index 04a53d750..365cfc140 100644 +--- a/gcc/config/arm/aarch-common.cc ++++ b/gcc/config/arm/aarch-common.cc +@@ -533,7 +533,8 @@ arm_mac_accumulator_is_mul_result (rtx producer, rtx consumer) + rtx_insn * + arm_md_asm_adjust (vec<rtx> &outputs, vec<rtx> & /*inputs*/, + vec<machine_mode> & /*input_modes*/, +- vec<const char *> &constraints, vec<rtx> & /*clobbers*/, ++ vec<const char *> &constraints, ++ vec<rtx> & /*uses*/, vec<rtx> & /*clobbers*/, + HARD_REG_SET & /*clobbered_regs*/, location_t loc) + { + bool saw_asm_flag = false; +diff --git a/gcc/config/arm/arm.cc b/gcc/config/arm/arm.cc +index b700c23b8..c72e9c0b0 100644 +--- a/gcc/config/arm/arm.cc ++++ b/gcc/config/arm/arm.cc +@@ -325,7 +325,7 @@ static HOST_WIDE_INT arm_constant_alignment (const_tree, HOST_WIDE_INT); + static rtx_insn *thumb1_md_asm_adjust (vec<rtx> &, vec<rtx> &, + vec<machine_mode> &, + vec<const char *> &, vec<rtx> &, +- HARD_REG_SET &, location_t); ++ vec<rtx> &, HARD_REG_SET &, location_t); + static const char *arm_identify_fpu_from_isa (sbitmap); +  + /* Table of machine attributes. */ +@@ -34209,7 +34209,8 @@ arm_stack_protect_guard (void) + rtx_insn * + thumb1_md_asm_adjust (vec<rtx> &outputs, vec<rtx> & /*inputs*/, + vec<machine_mode> & /*input_modes*/, +- vec<const char *> &constraints, vec<rtx> & /*clobbers*/, ++ vec<const char *> &constraints, ++ vec<rtx> &, vec<rtx> & /*clobbers*/, + HARD_REG_SET & /*clobbered_regs*/, location_t /*loc*/) + { + for (unsigned i = 0, n = outputs.length (); i < n; ++i) +diff --git a/gcc/config/avr/avr.cc b/gcc/config/avr/avr.cc +index 4ed390e4c..1b5a95410 100644 +--- a/gcc/config/avr/avr.cc ++++ b/gcc/config/avr/avr.cc +@@ -14497,6 +14497,7 @@ static rtx_insn * + avr_md_asm_adjust (vec<rtx> &/*outputs*/, vec<rtx> &/*inputs*/, + vec<machine_mode> & /*input_modes*/, + vec<const char *> &/*constraints*/, ++ vec<rtx> &/*uses*/, + vec<rtx> &clobbers, HARD_REG_SET &clobbered_regs, + location_t /*loc*/) + { +diff --git a/gcc/config/cris/cris.cc b/gcc/config/cris/cris.cc +index f0017d630..3a1c85481 100644 +--- a/gcc/config/cris/cris.cc ++++ b/gcc/config/cris/cris.cc +@@ -151,7 +151,8 @@ static void cris_function_arg_advance (cumulative_args_t, + const function_arg_info &); + static rtx_insn *cris_md_asm_adjust (vec<rtx> &, vec<rtx> &, + vec<machine_mode> &, vec<const char *> &, +- vec<rtx> &, HARD_REG_SET &, location_t); ++ vec<rtx> &, vec<rtx> &, ++ HARD_REG_SET &, location_t); + + static void cris_option_override (void); + +@@ -3506,7 +3507,8 @@ cris_function_arg_advance (cumulative_args_t ca_v, + static rtx_insn * + cris_md_asm_adjust (vec<rtx> &outputs, vec<rtx> &inputs, + vec<machine_mode> & /*input_modes*/, +- vec<const char *> &constraints, vec<rtx> &clobbers, ++ vec<const char *> &constraints, ++ vec<rtx> &/*uses*/, vec<rtx> &clobbers, + HARD_REG_SET &clobbered_regs, location_t /*loc*/) + { + /* For the time being, all asms clobber condition codes. +diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc +index 593185fa6..83a0d8abb 100644 +--- a/gcc/config/i386/i386.cc ++++ b/gcc/config/i386/i386.cc +@@ -22252,8 +22252,9 @@ ix86_c_mode_for_suffix (char suffix) + static rtx_insn * + ix86_md_asm_adjust (vec<rtx> &outputs, vec<rtx> & /*inputs*/, + vec<machine_mode> & /*input_modes*/, +- vec<const char *> &constraints, vec<rtx> &clobbers, +- HARD_REG_SET &clobbered_regs, location_t loc) ++ vec<const char *> &constraints, vec<rtx> &/*uses*/, ++ vec<rtx> &clobbers, HARD_REG_SET &clobbered_regs, ++ location_t loc) + { + bool saw_asm_flag = false; + +diff --git a/gcc/config/mn10300/mn10300.cc b/gcc/config/mn10300/mn10300.cc +index 2a58dd925..2ca2c769c 100644 +--- a/gcc/config/mn10300/mn10300.cc ++++ b/gcc/config/mn10300/mn10300.cc +@@ -2849,7 +2849,8 @@ mn10300_conditional_register_usage (void) + static rtx_insn * + mn10300_md_asm_adjust (vec<rtx> & /*outputs*/, vec<rtx> & /*inputs*/, + vec<machine_mode> & /*input_modes*/, +- vec<const char *> & /*constraints*/, vec<rtx> &clobbers, ++ vec<const char *> & /*constraints*/, ++ vec<rtx> &/*uses*/, vec<rtx> &clobbers, + HARD_REG_SET &clobbered_regs, location_t /*loc*/) + { + clobbers.safe_push (gen_rtx_REG (CCmode, CC_REG)); +diff --git a/gcc/config/nds32/nds32.cc b/gcc/config/nds32/nds32.cc +index 71fe9e8bc..27530495f 100644 +--- a/gcc/config/nds32/nds32.cc ++++ b/gcc/config/nds32/nds32.cc +@@ -4199,8 +4199,8 @@ nds32_md_asm_adjust (vec<rtx> &outputs ATTRIBUTE_UNUSED, + vec<rtx> &inputs ATTRIBUTE_UNUSED, + vec<machine_mode> &input_modes ATTRIBUTE_UNUSED, + vec<const char *> &constraints ATTRIBUTE_UNUSED, +- vec<rtx> &clobbers, HARD_REG_SET &clobbered_regs, +- location_t /*loc*/) ++ vec<rtx> &/*uses*/, vec<rtx> &clobbers, ++ HARD_REG_SET &clobbered_regs, location_t /*loc*/) + { + if (!flag_inline_asm_r15) + { +diff --git a/gcc/config/pdp11/pdp11.cc b/gcc/config/pdp11/pdp11.cc +index 380223439..25cf62cbc 100644 +--- a/gcc/config/pdp11/pdp11.cc ++++ b/gcc/config/pdp11/pdp11.cc +@@ -155,7 +155,8 @@ static int pdp11_addr_cost (rtx, machine_mode, addr_space_t, bool); + static int pdp11_insn_cost (rtx_insn *insn, bool speed); + static rtx_insn *pdp11_md_asm_adjust (vec<rtx> &, vec<rtx> &, + vec<machine_mode> &, vec<const char *> &, +- vec<rtx> &, HARD_REG_SET &, location_t); ++ vec<rtx> &, vec<rtx> &, ++ HARD_REG_SET &, location_t); + static bool pdp11_return_in_memory (const_tree, const_tree); + static rtx pdp11_function_value (const_tree, const_tree, bool); + static rtx pdp11_libcall_value (machine_mode, const_rtx); +@@ -2137,7 +2138,8 @@ pdp11_cmp_length (rtx *operands, int words) + static rtx_insn * + pdp11_md_asm_adjust (vec<rtx> & /*outputs*/, vec<rtx> & /*inputs*/, + vec<machine_mode> & /*input_modes*/, +- vec<const char *> & /*constraints*/, vec<rtx> &clobbers, ++ vec<const char *> & /*constraints*/, ++ vec<rtx> &/*uses*/, vec<rtx> &clobbers, + HARD_REG_SET &clobbered_regs, location_t /*loc*/) + { + clobbers.safe_push (gen_rtx_REG (CCmode, CC_REGNUM)); +diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc +index 0b75861bb..55d4ce751 100644 +--- a/gcc/config/rs6000/rs6000.cc ++++ b/gcc/config/rs6000/rs6000.cc +@@ -3443,7 +3443,8 @@ rs6000_builtin_mask_calculate (void) + static rtx_insn * + rs6000_md_asm_adjust (vec<rtx> & /*outputs*/, vec<rtx> & /*inputs*/, + vec<machine_mode> & /*input_modes*/, +- vec<const char *> & /*constraints*/, vec<rtx> &clobbers, ++ vec<const char *> & /*constraints*/, ++ vec<rtx> &/*uses*/, vec<rtx> &clobbers, + HARD_REG_SET &clobbered_regs, location_t /*loc*/) + { + clobbers.safe_push (gen_rtx_REG (SImode, CA_REGNO)); +diff --git a/gcc/config/s390/s390.cc b/gcc/config/s390/s390.cc +index ae0cf9ef5..f1599a5c5 100644 +--- a/gcc/config/s390/s390.cc ++++ b/gcc/config/s390/s390.cc +@@ -16994,7 +16994,8 @@ s390_hard_fp_reg_p (rtx x) + static rtx_insn * + s390_md_asm_adjust (vec<rtx> &outputs, vec<rtx> &inputs, + vec<machine_mode> &input_modes, +- vec<const char *> &constraints, vec<rtx> & /*clobbers*/, ++ vec<const char *> &constraints, ++ vec<rtx> &/*uses*/, vec<rtx> &/*clobbers*/, + HARD_REG_SET & /*clobbered_regs*/, location_t /*loc*/) + { + if (!TARGET_VXE) +diff --git a/gcc/config/vax/vax.cc b/gcc/config/vax/vax.cc +index 28c1af59a..7673a1428 100644 +--- a/gcc/config/vax/vax.cc ++++ b/gcc/config/vax/vax.cc +@@ -57,7 +57,8 @@ static bool vax_rtx_costs (rtx, machine_mode, int, int, int *, bool); + static machine_mode vax_cc_modes_compatible (machine_mode, machine_mode); + static rtx_insn *vax_md_asm_adjust (vec<rtx> &, vec<rtx> &, + vec<machine_mode> &, vec<const char *> &, +- vec<rtx> &, HARD_REG_SET &, location_t); ++ vec<rtx> &, vec<rtx> &, HARD_REG_SET &, ++ location_t); + static rtx vax_function_arg (cumulative_args_t, const function_arg_info &); + static void vax_function_arg_advance (cumulative_args_t, + const function_arg_info &); +@@ -1179,6 +1180,7 @@ vax_md_asm_adjust (vec<rtx> &outputs ATTRIBUTE_UNUSED, + vec<rtx> &inputs ATTRIBUTE_UNUSED, + vec<machine_mode> &input_modes ATTRIBUTE_UNUSED, + vec<const char *> &constraints ATTRIBUTE_UNUSED, ++ vec<rtx> &/*uses*/, + vec<rtx> &clobbers, HARD_REG_SET &clobbered_regs, + location_t /*loc*/) + { +diff --git a/gcc/config/visium/visium.cc b/gcc/config/visium/visium.cc +index 03c1a33e1..35b46ced9 100644 +--- a/gcc/config/visium/visium.cc ++++ b/gcc/config/visium/visium.cc +@@ -190,7 +190,7 @@ static tree visium_build_builtin_va_list (void); + static rtx_insn *visium_md_asm_adjust (vec<rtx> &, vec<rtx> &, + vec<machine_mode> &, + vec<const char *> &, vec<rtx> &, +- HARD_REG_SET &, location_t); ++ vec<rtx> &, HARD_REG_SET &, location_t); + + static bool visium_legitimate_constant_p (machine_mode, rtx); + +@@ -794,7 +794,8 @@ visium_conditional_register_usage (void) + static rtx_insn * + visium_md_asm_adjust (vec<rtx> & /*outputs*/, vec<rtx> & /*inputs*/, + vec<machine_mode> & /*input_modes*/, +- vec<const char *> & /*constraints*/, vec<rtx> &clobbers, ++ vec<const char *> & /*constraints*/, ++ vec<rtx> &/*uses*/, vec<rtx> &clobbers, + HARD_REG_SET &clobbered_regs, location_t /*loc*/) + { + clobbers.safe_push (gen_rtx_REG (CCmode, FLAGS_REGNUM)); +diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi +index 357c29a4d..4f93facf7 100644 +--- a/gcc/doc/tm.texi ++++ b/gcc/doc/tm.texi +@@ -11626,10 +11626,11 @@ from shared libraries (DLLs). + You need not define this macro if it would always evaluate to zero. + @end defmac + +-@deftypefn {Target Hook} {rtx_insn *} TARGET_MD_ASM_ADJUST (vec<rtx>& @var{outputs}, vec<rtx>& @var{inputs}, vec<machine_mode>& @var{input_modes}, vec<const char *>& @var{constraints}, vec<rtx>& @var{clobbers}, HARD_REG_SET& @var{clobbered_regs}, location_t @var{loc}) ++@deftypefn {Target Hook} {rtx_insn *} TARGET_MD_ASM_ADJUST (vec<rtx>& @var{outputs}, vec<rtx>& @var{inputs}, vec<machine_mode>& @var{input_modes}, vec<const char *>& @var{constraints}, vec<rtx>& @var{usess}, vec<rtx>& @var{clobbers}, HARD_REG_SET& @var{clobbered_regs}, location_t @var{loc}) + This target hook may add @dfn{clobbers} to @var{clobbers} and + @var{clobbered_regs} for any hard regs the port wishes to automatically +-clobber for an asm. The @var{outputs} and @var{inputs} may be inspected ++clobber for an asm. It can also add hard registers that are used by the ++asm to @var{uses}. The @var{outputs} and @var{inputs} may be inspected + to avoid clobbering a register that is already used by the asm. @var{loc} + is the source location of the asm. + +diff --git a/gcc/recog.cc b/gcc/recog.cc +index cd2410ab2..5b81d5e21 100644 +--- a/gcc/recog.cc ++++ b/gcc/recog.cc +@@ -1977,13 +1977,17 @@ asm_noperands (const_rtx body) + { + /* Multiple output operands, or 1 output plus some clobbers: + body is +- (set OUTPUT (asm_operands ...))... (clobber (reg ...)).... */ +- /* Count backwards through CLOBBERs to determine number of SETs. */ ++ (set OUTPUT (asm_operands ...))... ++ (use (reg ...))... ++ (clobber (reg ...)).... */ ++ /* Count backwards through USEs and CLOBBERs to determine ++ number of SETs. */ + for (i = XVECLEN (body, 0); i > 0; i--) + { + if (GET_CODE (XVECEXP (body, 0, i - 1)) == SET) + break; +- if (GET_CODE (XVECEXP (body, 0, i - 1)) != CLOBBER) ++ if (GET_CODE (XVECEXP (body, 0, i - 1)) != USE ++ && GET_CODE (XVECEXP (body, 0, i - 1)) != CLOBBER) + return -1; + } + +@@ -2010,10 +2014,13 @@ asm_noperands (const_rtx body) + else + { + /* 0 outputs, but some clobbers: +- body is (asm_operands ...) (clobber (reg ...)).... */ ++ body is (asm_operands ...) ++ (use (reg ...))... ++ (clobber (reg ...)).... */ + /* Make sure all the other parallel things really are clobbers. */ + for (i = XVECLEN (body, 0) - 1; i > 0; i--) +- if (GET_CODE (XVECEXP (body, 0, i)) != CLOBBER) ++ if (GET_CODE (XVECEXP (body, 0, i)) != USE ++ && GET_CODE (XVECEXP (body, 0, i)) != CLOBBER) + return -1; + } + } +@@ -2080,7 +2087,8 @@ decode_asm_operands (rtx body, rtx *operands, rtx **operand_locs, + the SETs. Their constraints are in the ASM_OPERANDS itself. */ + for (i = 0; i < nparallel; i++) + { +- if (GET_CODE (XVECEXP (body, 0, i)) == CLOBBER) ++ if (GET_CODE (XVECEXP (body, 0, i)) == USE ++ || GET_CODE (XVECEXP (body, 0, i)) == CLOBBER) + break; /* Past last SET */ + gcc_assert (GET_CODE (XVECEXP (body, 0, i)) == SET); + if (operands) +diff --git a/gcc/target.def b/gcc/target.def +index a57e51b0d..60096c60c 100644 +--- a/gcc/target.def ++++ b/gcc/target.def +@@ -4309,7 +4309,8 @@ DEFHOOK + (md_asm_adjust, + "This target hook may add @dfn{clobbers} to @var{clobbers} and\n\ + @var{clobbered_regs} for any hard regs the port wishes to automatically\n\ +-clobber for an asm. The @var{outputs} and @var{inputs} may be inspected\n\ ++clobber for an asm. It can also add hard registers that are used by the\n\ ++asm to @var{uses}. The @var{outputs} and @var{inputs} may be inspected\n\ + to avoid clobbering a register that is already used by the asm. @var{loc}\n\ + is the source location of the asm.\n\ + \n\ +@@ -4320,7 +4321,7 @@ changes to @var{inputs} must be accompanied by the corresponding changes\n\ + to @var{input_modes}.", + rtx_insn *, + (vec<rtx>& outputs, vec<rtx>& inputs, vec<machine_mode>& input_modes, +- vec<const char *>& constraints, vec<rtx>& clobbers, ++ vec<const char *>& constraints, vec<rtx>& usess, vec<rtx>& clobbers, + HARD_REG_SET& clobbered_regs, location_t loc), + NULL) + +-- +2.33.0 +
View file
_service:tar_scm:0151-LoongArch-Remove-unused-useless-definitions.patch
Added
@@ -0,0 +1,123 @@ +From 6ee300fd31e000efba141ed8806e56bd03826197 Mon Sep 17 00:00:00 2001 +From: Chenghui Pan <panchenghui@loongson.cn> +Date: Fri, 15 Mar 2024 09:30:25 +0800 +Subject: PATCH 151/188 LoongArch: Remove unused/useless definitions. + +This patch removes some unnecessary definitions of target hook functions +according to the documentation of GCC. + +gcc/ChangeLog: + + * config/loongarch/loongarch-protos.h + (loongarch_cfun_has_cprestore_slot_p): Delete. + (loongarch_adjust_insn_length): Delete. + (current_section_name): Delete. + (loongarch_split_symbol_type): Delete. + * config/loongarch/loongarch.cc + (loongarch_case_values_threshold): Delete. + (loongarch_spill_class): Delete. + (TARGET_OPTAB_SUPPORTED_P): Delete. + (TARGET_CASE_VALUES_THRESHOLD): Delete. + (TARGET_SPILL_CLASS): Delete. +--- + gcc/config/loongarch/loongarch-protos.h | 5 ----- + gcc/config/loongarch/loongarch.cc | 26 ------------------------- + 2 files changed, 31 deletions(-) + +diff --git a/gcc/config/loongarch/loongarch-protos.h b/gcc/config/loongarch/loongarch-protos.h +index 87b94e8b0..3dac20279 100644 +--- a/gcc/config/loongarch/loongarch-protos.h ++++ b/gcc/config/loongarch/loongarch-protos.h +@@ -93,7 +93,6 @@ extern void loongarch_split_lsx_copy_d (rtx, rtx, rtx, rtx (*)(rtx, rtx, rtx)); + extern void loongarch_split_lsx_insert_d (rtx, rtx, rtx, rtx); + extern void loongarch_split_lsx_fill_d (rtx, rtx); + extern const char *loongarch_output_move (rtx, rtx); +-extern bool loongarch_cfun_has_cprestore_slot_p (void); + #ifdef RTX_CODE + extern void loongarch_expand_scc (rtx *); + extern bool loongarch_expand_vec_cmp (rtx *); +@@ -135,7 +134,6 @@ extern int loongarch_class_max_nregs (enum reg_class, machine_mode); + extern machine_mode loongarch_hard_regno_caller_save_mode (unsigned int, + unsigned int, + machine_mode); +-extern int loongarch_adjust_insn_length (rtx_insn *, int); + extern const char *loongarch_output_conditional_branch (rtx_insn *, rtx *, + const char *, + const char *); +@@ -157,7 +155,6 @@ extern bool loongarch_global_symbol_noweak_p (const_rtx); + extern bool loongarch_weak_symbol_p (const_rtx); + extern bool loongarch_symbol_binds_local_p (const_rtx); + +-extern const char *current_section_name (void); + extern unsigned int current_section_flags (void); + extern bool loongarch_use_ins_ext_p (rtx, HOST_WIDE_INT, HOST_WIDE_INT); + extern bool loongarch_check_zero_div_p (void); +@@ -198,8 +195,6 @@ extern bool loongarch_epilogue_uses (unsigned int); + extern bool loongarch_load_store_bonding_p (rtx *, machine_mode, bool); + extern bool loongarch_split_symbol_type (enum loongarch_symbol_type); + +-typedef rtx (*mulsidi3_gen_fn) (rtx, rtx, rtx); +- + extern void loongarch_register_frame_header_opt (void); + extern void loongarch_expand_vec_cond_expr (machine_mode, machine_mode, rtx *); + extern void loongarch_expand_vec_cond_mask_expr (machine_mode, machine_mode, +diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc +index 1e3981e19..903c0d4ef 100644 +--- a/gcc/config/loongarch/loongarch.cc ++++ b/gcc/config/loongarch/loongarch.cc +@@ -10812,23 +10812,6 @@ loongarch_expand_vec_cmp (rtx operands) + return true; + } + +-/* Implement TARGET_CASE_VALUES_THRESHOLD. */ +- +-unsigned int +-loongarch_case_values_threshold (void) +-{ +- return default_case_values_threshold (); +-} +- +-/* Implement TARGET_SPILL_CLASS. */ +- +-static reg_class_t +-loongarch_spill_class (reg_class_t rclass ATTRIBUTE_UNUSED, +- machine_mode mode ATTRIBUTE_UNUSED) +-{ +- return NO_REGS; +-} +- + /* Implement TARGET_PROMOTE_FUNCTION_MODE. */ + + /* This function is equivalent to default_promote_function_mode_always_promote +@@ -11283,9 +11266,6 @@ loongarch_asm_code_end (void) + #undef TARGET_FUNCTION_ARG_BOUNDARY + #define TARGET_FUNCTION_ARG_BOUNDARY loongarch_function_arg_boundary + +-#undef TARGET_OPTAB_SUPPORTED_P +-#define TARGET_OPTAB_SUPPORTED_P loongarch_optab_supported_p +- + #undef TARGET_VECTOR_MODE_SUPPORTED_P + #define TARGET_VECTOR_MODE_SUPPORTED_P loongarch_vector_mode_supported_p + +@@ -11355,18 +11335,12 @@ loongarch_asm_code_end (void) + #undef TARGET_SCHED_REASSOCIATION_WIDTH + #define TARGET_SCHED_REASSOCIATION_WIDTH loongarch_sched_reassociation_width + +-#undef TARGET_CASE_VALUES_THRESHOLD +-#define TARGET_CASE_VALUES_THRESHOLD loongarch_case_values_threshold +- + #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV + #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV loongarch_atomic_assign_expand_fenv + + #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS + #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true + +-#undef TARGET_SPILL_CLASS +-#define TARGET_SPILL_CLASS loongarch_spill_class +- + #undef TARGET_HARD_REGNO_NREGS + #define TARGET_HARD_REGNO_NREGS loongarch_hard_regno_nregs + #undef TARGET_HARD_REGNO_MODE_OK +-- +2.43.0 +
View file
_service:tar_scm:0152-Backport-SME-New-compact-syntax-for-insn-and-insn_sp.patch
Added
@@ -0,0 +1,998 @@ +From 763db5ed42e18cdddf979dda82056345e3af15ed Mon Sep 17 00:00:00 2001 +From: Tamar Christina <tamar.christina@arm.com> +Date: Mon, 19 Jun 2023 15:47:46 +0100 +Subject: PATCH 053/157 BackportSME New compact syntax for insn and + insn_split in Machine Descriptions. + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=957ae90406591739b68e95ad49a0232faeb74217 + +This patch adds support for a compact syntax for specifying constraints in +instruction patterns. Credit for the idea goes to Richard Earnshaw. + +With this new syntax we want a clean break from the current limitations to make +something that is hopefully easier to use and maintain. + +The idea behind this compact syntax is that often times it's quite hard to +correlate the entries in the constrains list, attributes and instruction lists. + +One has to count and this often is tedious. Additionally when changing a single +line in the insn multiple lines in a diff change, making it harder to see what's +going on. + +This new syntax takes into account many of the common things that are done in MD +files. It's also worth saying that this version is intended to deal with the +common case of a string based alternatives. For C chunks we have some ideas +but those are not intended to be addressed here. + +It's easiest to explain with an example: + +normal syntax: + +(define_insn_and_split "*movsi_aarch64" + (set (match_operand:SI 0 "nonimmediate_operand" "=r,k,r,r,r,r, r,w, m, m, r, r, r, w,r,w, w") + (match_operand:SI 1 "aarch64_mov_operand" " r,r,k,M,n,Usv,m,m,rZ,w,Usw,Usa,Ush,rZ,w,w,Ds")) + "(register_operand (operands0, SImode) + || aarch64_reg_or_zero (operands1, SImode))" + "@ + mov\\t%w0, %w1 + mov\\t%w0, %w1 + mov\\t%w0, %w1 + mov\\t%w0, %1 + # + * return aarch64_output_sve_cnt_immediate (\"cnt\", \"%x0\", operands1); + ldr\\t%w0, %1 + ldr\\t%s0, %1 + str\\t%w1, %0 + str\\t%s1, %0 + adrp\\t%x0, %A1\;ldr\\t%w0, %x0, %L1 + adr\\t%x0, %c1 + adrp\\t%x0, %A1 + fmov\\t%s0, %w1 + fmov\\t%w0, %s1 + fmov\\t%s0, %s1 + * return aarch64_output_scalar_simd_mov_immediate (operands1, SImode);" + "CONST_INT_P (operands1) && !aarch64_move_imm (INTVAL (operands1), SImode) + && REG_P (operands0) && GP_REGNUM_P (REGNO (operands0))" + (const_int 0) + "{ + aarch64_expand_mov_immediate (operands0, operands1); + DONE; + }" + ;; The "mov_imm" type for CNT is just a placeholder. + (set_attr "type" "mov_reg,mov_reg,mov_reg,mov_imm,mov_imm,mov_imm,load_4, + load_4,store_4,store_4,load_4,adr,adr,f_mcr,f_mrc,fmov,neon_move") + (set_attr "arch" "*,*,*,*,*,sve,*,fp,*,fp,*,*,*,fp,fp,fp,simd") + (set_attr "length" "4,4,4,4,*, 4,4, 4,4, 4,8,4,4, 4, 4, 4, 4") + +) + +New syntax: + +(define_insn_and_split "*movsi_aarch64" + (set (match_operand:SI 0 "nonimmediate_operand") + (match_operand:SI 1 "aarch64_mov_operand")) + "(register_operand (operands0, SImode) + || aarch64_reg_or_zero (operands1, SImode))" + {@ cons: =0, 1; attrs: type, arch, length + r , r ; mov_reg , * , 4 mov\t%w0, %w1 + k , r ; mov_reg , * , 4 ^ + r , k ; mov_reg , * , 4 ^ + r , M ; mov_imm , * , 4 mov\t%w0, %1 + r , n ; mov_imm , * ,16 # + /* The "mov_imm" type for CNT is just a placeholder. */ + r , Usv; mov_imm , sve , 4 << aarch64_output_sve_cnt_immediate ("cnt", "%x0", operands1); + r , m ; load_4 , * , 4 ldr\t%w0, %1 + w , m ; load_4 , fp , 4 ldr\t%s0, %1 + m , rZ ; store_4 , * , 4 str\t%w1, %0 + m , w ; store_4 , fp , 4 str\t%s1, %0 + r , Usw; load_4 , * , 8 adrp\t%x0, %A1;ldr\t%w0, %x0, %L1 + r , Usa; adr , * , 4 adr\t%x0, %c1 + r , Ush; adr , * , 4 adrp\t%x0, %A1 + w , rZ ; f_mcr , fp , 4 fmov\t%s0, %w1 + r , w ; f_mrc , fp , 4 fmov\t%w0, %s1 + w , w ; fmov , fp , 4 fmov\t%s0, %s1 + w , Ds ; neon_move, simd, 4 << aarch64_output_scalar_simd_mov_immediate (operands1, SImode); + } + "CONST_INT_P (operands1) && !aarch64_move_imm (INTVAL (operands1), SImode) + && REG_P (operands0) && GP_REGNUM_P (REGNO (operands0))" + (const_int 0) + { + aarch64_expand_mov_immediate (operands0, operands1); + DONE; + } +) + +The main syntax rules are as follows (See docs for full rules): + - Template must start with "{@" and end with "}" to use the new syntax. + - "{@" is followed by a layout in parentheses which is "cons:" followed by + a list of match_operand/match_scratch IDs, then a semicolon, then the + same for attributes ("attrs:"). Both sections are optional (so you can + use only cons, or only attrs, or both), and cons must come before attrs + if present. + - Each alternative begins with any amount of whitespace. + - Following the whitespace is a comma-separated list of constraints and/or + attributes within brackets , with sections separated by a semicolon. + - Following the closing '' is any amount of whitespace, and then the actual + asm output. + - Spaces are allowed in the list (they will simply be removed). + - All alternatives should be specified: a blank list should be + ",,", ",,;," etc., not "" or "" (however genattr may segfault if + you leave certain attributes empty, I have found). + - The actual constraint string in the match_operand or match_scratch, and + the attribute string in the set_attr, must be blank or an empty string + (you can't combine the old and new syntaxes). + - The common idion * return can be shortened by using <<. + - Any unexpanded iterators left during processing will result in an error at + compile time. If for some reason <> is needed in the output then these + must be escaped using \. + - Within an {@ block both multiline and singleline C comments are allowed, but + when used outside of a C block they must be the only non-whitespace blocks on + the line + - Inside an {@ block any unexpanded iterators will result in a compile time + fault instead of incorrect assembly being generated at runtime. If the + literal <> is needed in the output this needs to be escaped with \<\>. + - This check is not performed inside C blocks (lines starting with *). + - Instead of copying the previous instruction again in the next pattern, one + can use ^ to refer to the previous asm string. + +This patch works by blindly transforming the new syntax into the old syntax, +so it doesn't do extensive checking. However, it does verify that: + - The correct number of constraints/attributes are specified. + - You haven't mixed old and new syntax. + - The specified operand IDs/attribute names actually exist. + - You don't have duplicate cons + +If something goes wrong, it may write invalid constraints/attributes/template +back into the rtx. But this shouldn't matter because error_at will cause the +program to fail on exit anyway. + +Because this transformation occurs as early as possible (before patterns are +queued), the rest of the compiler can completely ignore the new syntax and +assume that the old syntax will always be used. + +This doesn't seem to have any measurable effect on the runtime of gen* +programs. + +gcc/ChangeLog: + + * gensupport.cc (class conlist, add_constraints, add_attributes, + skip_spaces, expect_char, preprocess_compact_syntax, + parse_section_layout, parse_section, convert_syntax): New. + (process_rtx): Check for conversion. + * genoutput.cc (process_template): Check for unresolved iterators. + (class data): Add compact_syntax_p. + (gen_insn): Use it. + * gensupport.h (compact_syntax): New. + (hash-set.h): Include. + * doc/md.texi: Document it. + +Co-Authored-By: Omar Tahir <Omar.Tahir2@arm.com> +--- + gcc/doc/md.texi | 163 +++++++++++++++ + gcc/genoutput.cc | 48 ++++- + gcc/gensupport.cc | 498 ++++++++++++++++++++++++++++++++++++++++++++++ + gcc/gensupport.h | 3 + + 4 files changed, 709 insertions(+), 3 deletions(-) + +diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi +index 3b544358b..04ace8f7f 100644 +--- a/gcc/doc/md.texi ++++ b/gcc/doc/md.texi +@@ -27,6 +27,7 @@ See the next chapter for information on the C header file. + from such an insn. + * Output Statement:: For more generality, write C code to output + the assembler code. ++* Compact Syntax:: Compact syntax for writing machine descriptors. + * Predicates:: Controlling what kinds of operands can be used + for an insn. + * Constraints:: Fine-tuning operand selection. +@@ -713,6 +714,168 @@ you can use @samp{*} inside of a @samp{@@} multi-alternative template: + @end group + @end smallexample + ++@node Compact Syntax ++@section Compact Syntax ++@cindex compact syntax ++ ++When a @code{define_insn} or @code{define_insn_and_split} has multiple ++alternatives it may be beneficial to use the compact syntax when specifying ++alternatives. ++ ++This syntax puts the constraints and attributes on the same horizontal line as ++the instruction assembly template. ++ ++As an example ++ ++@smallexample ++@group ++(define_insn_and_split "" ++ (set (match_operand:SI 0 "nonimmediate_operand" "=r,k,r,r,r,r") ++ (match_operand:SI 1 "aarch64_mov_operand" " r,r,k,M,n,Usv")) ++ "" ++ "@@ ++ mov\\t%w0, %w1 ++ mov\\t%w0, %w1 ++ mov\\t%w0, %w1 ++ mov\\t%w0, %1 ++ # ++ * return aarch64_output_sve_cnt_immediate ('cnt', '%x0', operands1);" ++ "&& true" ++ (const_int 0) ++ @{ ++ aarch64_expand_mov_immediate (operands0, operands1); ++ DONE; ++ @} ++ (set_attr "type" "mov_reg,mov_reg,mov_reg,mov_imm,mov_imm,mov_imm") ++ (set_attr "arch" "*,*,*,*,*,sve") ++ (set_attr "length" "4,4,4,4,*, 4") ++ ++) ++@end group ++@end smallexample ++ ++can be better expressed as: ++ ++@smallexample ++@group ++(define_insn_and_split "" ++ (set (match_operand:SI 0 "nonimmediate_operand") ++ (match_operand:SI 1 "aarch64_mov_operand")) ++ "" ++ @{@@ cons: =0, 1; attrs: type, arch, length ++ r , r ; mov_reg , * , 4 mov\t%w0, %w1 ++ k , r ; mov_reg , * , 4 ^ ++ r , k ; mov_reg , * , 4 ^ ++ r , M ; mov_imm , * , 4 mov\t%w0, %1 ++ r , n ; mov_imm , * , * # ++ r , Usv; mov_imm , sve , 4 << aarch64_output_sve_cnt_immediate ("cnt", "%x0", operands1); ++ @} ++ "&& true" ++ (const_int 0) ++ @{ ++ aarch64_expand_mov_immediate (operands0, operands1); ++ DONE; ++ @} ++) ++@end group ++@end smallexample ++ ++The syntax rules are as follows: ++@itemize @bullet ++@item ++Templates must start with @samp{@{@@} to use the new syntax. ++ ++@item ++@samp{@{@@} is followed by a layout in square brackets which is @samp{cons:} ++followed by a comma-separated list of @code{match_operand}/@code{match_scratch} ++operand numbers, then a semicolon, followed by the same for attributes ++(@samp{attrs:}). Operand modifiers like @code{=} and @code{+} can be placed ++before an operand number. ++Both sections are optional (so you can use only @samp{cons}, or only ++@samp{attrs}, or both), and @samp{cons} must come before @samp{attrs} if ++present. ++ ++@item ++Each alternative begins with any amount of whitespace. ++ ++@item ++Following the whitespace is a comma-separated list of "constraints" and/or ++"attributes" within brackets @code{}, with sections separated by a semicolon. ++ ++@item ++Should you want to copy the previous asm line, the symbol @code{^} can be used. ++This allows less copy pasting between alternative and reduces the number of ++lines to update on changes. ++ ++@item ++When using C functions for output, the idiom @samp{* return @var{function};} ++can be replaced with the shorthand @samp{<< @var{function};}. ++ ++@item ++Following the closing @samp{} is any amount of whitespace, and then the actual ++asm output. ++ ++@item ++Spaces are allowed in the list (they will simply be removed). ++ ++@item ++All constraint alternatives should be specified. For example, a list of ++of three blank alternatives should be written @samp{,,} rather than ++@samp{}. ++ ++@item ++All attribute alternatives should be non-empty, with @samp{*} ++representing the default attribute value. For example, a list of three ++default attribute values should be written @samp{*,*,*} rather than ++@samp{}. ++ ++@item ++Within an @samp{@{@@} block both multiline and singleline C comments are ++allowed, but when used outside of a C block they must be the only non-whitespace ++blocks on the line. ++ ++@item ++Within an @samp{@{@@} block, any iterators that do not get expanded will result ++in an error. If for some reason it is required to have @code{<} or @code{>} in ++the output then these must be escaped using @backslashchar{}. ++ ++@item ++It is possible to use the @samp{attrs} list to specify some attributes and to ++use the normal @code{set_attr} syntax to specify other attributes. There must ++not be any overlap between the two lists. ++ ++In other words, the following is valid: ++@smallexample ++@group ++(define_insn_and_split "" ++ (set (match_operand:SI 0 "nonimmediate_operand") ++ (match_operand:SI 1 "aarch64_mov_operand")) ++ "" ++ @{@@ cons: 0, 1; attrs: type, arch, length@} ++ @dots{} ++ (set_attr "foo" "mov_imm") ++) ++@end group ++@end smallexample ++ ++but this is not valid: ++@smallexample ++@group ++(define_insn_and_split "" ++ (set (match_operand:SI 0 "nonimmediate_operand") ++ (match_operand:SI 1 "aarch64_mov_operand")) ++ "" ++ @{@@ cons: 0, 1; attrs: type, arch, length@} ++ @dots{} ++ (set_attr "arch" "bar") ++ (set_attr "foo" "mov_imm") ++) ++@end group ++@end smallexample ++ ++because it specifies @code{arch} twice. ++@end itemize ++ + @node Predicates + @section Predicates + @cindex predicates +diff --git a/gcc/genoutput.cc b/gcc/genoutput.cc +index 6bb03e286..de5dafdbf 100644 +--- a/gcc/genoutput.cc ++++ b/gcc/genoutput.cc +@@ -157,6 +157,7 @@ public: + int n_alternatives; /* Number of alternatives in each constraint */ + int operand_number; /* Operand index in the big array. */ + int output_format; /* INSN_OUTPUT_FORMAT_*. */ ++ bool compact_syntax_p; + struct operand_data operandMAX_MAX_OPERANDS; + }; + +@@ -700,12 +701,51 @@ process_template (class data *d, const char *template_code) + if (sp != ep) + message_at (d->loc, "trailing whitespace in output template"); + +- while (cp < sp) ++ /* Check for any unexpanded iterators. */ ++ if (bp0 != '*' && d->compact_syntax_p) + { +- putchar (*cp); +- cp++; ++ const char *p = cp; ++ const char *last_bracket = nullptr; ++ while (p < sp) ++ { ++ if (*p == '\\' && p + 1 < sp) ++ { ++ putchar (*p); ++ putchar (*(p+1)); ++ p += 2; ++ continue; ++ } ++ ++ if (*p == '>' && last_bracket && *last_bracket == '<') ++ { ++ int len = p - last_bracket; ++ fatal_at (d->loc, "unresolved iterator '%.*s' in '%s'", ++ len - 1, last_bracket + 1, cp); ++ } ++ else if (*p == '<' || *p == '>') ++ last_bracket = p; ++ ++ putchar (*p); ++ p += 1; ++ } ++ ++ if (last_bracket) ++ { ++ char *nl = strchr (const_cast<char*> (cp), '\n'); ++ if (nl) ++ *nl = '\0'; ++ fatal_at (d->loc, "unmatched angle brackets, likely an " ++ "error in iterator syntax in %s", cp); ++ } ++ } ++ else ++ { ++ while (cp < sp) ++ putchar (*(cp++)); + } + ++ cp = sp; ++ + if (!found_star) + puts ("\","); + else if (*bp != '*') +@@ -881,6 +921,8 @@ gen_insn (md_rtx_info *info) + else + d->name = 0; + ++ d->compact_syntax_p = compact_syntax.contains (insn); ++ + /* Build up the list in the same order as the insns are seen + in the machine description. */ + d->next = 0; +diff --git a/gcc/gensupport.cc b/gcc/gensupport.cc +index 42680499d..23c61dcdd 100644 +--- a/gcc/gensupport.cc ++++ b/gcc/gensupport.cc +@@ -18,6 +18,8 @@ + <http://www.gnu.org/licenses/>. */ + + #include "bconfig.h" ++#define INCLUDE_STRING ++#define INCLUDE_VECTOR + #include "system.h" + #include "coretypes.h" + #include "tm.h" +@@ -33,6 +35,8 @@ + static rtx operand_dataMAX_OPERANDS; + static rtx match_operand_entries_in_patternMAX_OPERANDS; + static char used_operands_numbersMAX_OPERANDS; ++/* List of entries which are part of the new syntax. */ ++hash_set<rtx> compact_syntax; + + + /* In case some macros used by files we include need it, define this here. */ +@@ -545,6 +549,497 @@ gen_rewrite_sequence (rtvec vec) + return new_vec; + } + ++/* The following is for handling the compact syntax for constraints and ++ attributes. ++ ++ The normal syntax looks like this: ++ ++ ... ++ (match_operand: 0 "s_register_operand" "r,I,k") ++ (match_operand: 2 "s_register_operand" "r,k,I") ++ ... ++ "@ ++ <asm> ++ <asm> ++ <asm>" ++ ... ++ (set_attr "length" "4,8,8") ++ ++ The compact syntax looks like this: ++ ++ ... ++ (match_operand: 0 "s_register_operand") ++ (match_operand: 2 "s_register_operand") ++ ... ++ {@ cons: 0, 2; attrs: length ++ r,r; 4 <asm> ++ I,k; 8 <asm> ++ k,I; 8 <asm> ++ } ++ ... ++ <other attributes> ++ ++ This is the only place where this syntax needs to be handled. Relevant ++ patterns are transformed from compact to the normal syntax before they are ++ queued, so none of the gen* programs need to know about this syntax at all. ++ ++ Conversion process (convert_syntax): ++ ++ 0) Check that pattern actually uses new syntax (check for {@ ... }). ++ ++ 1) Get the "layout", i.e. the "cons: 0 2; attrs: length" from the above ++ example. cons must come first; both are optional. Set up two vecs, ++ convec and attrvec, for holding the results of the transformation. ++ ++ 2) For each alternative: parse the list of constraints and/or attributes, ++ and enqueue them in the relevant lists in convec and attrvec. By the end ++ of this process, convecN.con and attrvecN.con should contain regular ++ syntax constraint/attribute lists like "r,I,k". Copy the asm to a string ++ as we go. ++ ++ 3) Search the rtx and write the constraint and attribute lists into the ++ correct places. Write the asm back into the template. */ ++ ++/* Helper class for shuffling constraints/attributes in convert_syntax and ++ add_constraints/add_attributes. This includes commas but not whitespace. */ ++ ++class conlist { ++private: ++ std::string con; ++ ++public: ++ std::string name; ++ int idx = -1; ++ ++ conlist () = default; ++ ++ /* ns..ns + len) should be a string with the id of the rtx to match ++ i.e. if rtx is the relevant match_operand or match_scratch then ++ ns..ns + len) should equal itoa (XINT (rtx, 0)), and if set_attr then ++ ns..ns + len) should equal XSTR (rtx, 0). */ ++ conlist (const char *ns, unsigned int len, bool numeric) ++ { ++ /* Trim leading whitespaces. */ ++ while (ISBLANK (*ns)) ++ { ++ ns++; ++ len--; ++ } ++ ++ /* Trim trailing whitespace. */ ++ for (int i = len - 1; i >= 0; i--, len--) ++ if (!ISBLANK (nsi)) ++ break; ++ ++ /* Parse off any modifiers. */ ++ while (!ISALNUM (*ns)) ++ { ++ con += *(ns++); ++ len--; ++ } ++ ++ name.assign (ns, len); ++ if (numeric) ++ idx = std::stoi (name); ++ } ++ ++ /* Adds a character to the end of the string. */ ++ void add (char c) ++ { ++ con += c; ++ } ++ ++ /* Output the string in the form of a brand-new char *, then effectively ++ clear the internal string by resetting len to 0. */ ++ char *out () ++ { ++ /* Final character is always a trailing comma, so strip it out. */ ++ char *q = xstrndup (con.c_str (), con.size () - 1); ++ con.clear (); ++ return q; ++ } ++}; ++ ++typedef std::vector<conlist> vec_conlist; ++ ++/* Add constraints to an rtx. This function is similar to remove_constraints. ++ Errors if adding the constraints would overwrite existing constraints. */ ++ ++static void ++add_constraints (rtx part, file_location loc, vec_conlist &cons) ++{ ++ const char *format_ptr; ++ ++ if (part == NULL_RTX) ++ return; ++ ++ /* If match_op or match_scr, check if we have the right one, and if so, copy ++ over the constraint list. */ ++ if (GET_CODE (part) == MATCH_OPERAND || GET_CODE (part) == MATCH_SCRATCH) ++ { ++ int field = GET_CODE (part) == MATCH_OPERAND ? 2 : 1; ++ unsigned id = XINT (part, 0); ++ ++ if (id >= cons.size () || consid.idx == -1) ++ return; ++ ++ if (XSTR (part, field)0 != '\0') ++ { ++ error_at (loc, "can't mix normal and compact constraint syntax"); ++ return; ++ } ++ XSTR (part, field) = consid.out (); ++ consid.idx = -1; ++ } ++ ++ format_ptr = GET_RTX_FORMAT (GET_CODE (part)); ++ ++ /* Recursively search the rtx. */ ++ for (int i = 0; i < GET_RTX_LENGTH (GET_CODE (part)); i++) ++ switch (*format_ptr++) ++ { ++ case 'e': ++ case 'u': ++ add_constraints (XEXP (part, i), loc, cons); ++ break; ++ case 'E': ++ if (XVEC (part, i) != NULL) ++ for (int j = 0; j < XVECLEN (part, i); j++) ++ add_constraints (XVECEXP (part, i, j), loc, cons); ++ break; ++ default: ++ continue; ++ } ++} ++ ++/* Add ATTRS to definition X's attribute list. */ ++ ++static void ++add_attributes (rtx x, vec_conlist &attrs) ++{ ++ unsigned int attr_index = GET_CODE (x) == DEFINE_INSN ? 4 : 3; ++ rtvec orig = XVEC (x, attr_index); ++ if (orig) ++ { ++ size_t n_curr = XVECLEN (x, attr_index); ++ rtvec copy = rtvec_alloc (n_curr + attrs.size ()); ++ ++ /* Create a shallow copy of existing entries. */ ++ memcpy (©->elemattrs.size (), &orig->elem0, ++ sizeof (rtx) * n_curr); ++ XVEC (x, attr_index) = copy; ++ } ++ else ++ XVEC (x, attr_index) = rtvec_alloc (attrs.size ()); ++ ++ /* Create the new elements. */ ++ for (unsigned i = 0; i < attrs.size (); i++) ++ { ++ rtx attr = rtx_alloc (SET_ATTR); ++ XSTR (attr, 0) = xstrdup (attrsi.name.c_str ()); ++ XSTR (attr, 1) = attrsi.out (); ++ XVECEXP (x, attr_index, i) = attr; ++ } ++} ++ ++/* Consumes spaces and tabs. */ ++ ++static inline void ++skip_spaces (const char **str) ++{ ++ while (ISBLANK (**str)) ++ (*str)++; ++} ++ ++/* Consumes the given character, if it's there. */ ++ ++static inline bool ++expect_char (const char **str, char c) ++{ ++ if (**str != c) ++ return false; ++ (*str)++; ++ return true; ++} ++ ++/* Parses the section layout that follows a "{@" if using new syntax. Builds ++ a vector for a single section. E.g. if we have "attrs: length, arch..." ++ then list will have two elements, the first for "length" and the second ++ for "arch". */ ++ ++static void ++parse_section_layout (file_location loc, const char **templ, const char *label, ++ vec_conlist &list, bool numeric) ++{ ++ const char *name_start; ++ size_t label_len = strlen (label); ++ if (strncmp (label, *templ, label_len) == 0) ++ { ++ *templ += label_len; ++ ++ /* Gather the names. */ ++ while (**templ != ';' && **templ != '') ++ { ++ skip_spaces (templ); ++ name_start = *templ; ++ int len = 0; ++ char val = (*templ)len; ++ while (val != ',' && val != ';' && val != '') ++ { ++ if (val == 0 || val == '\n') ++ fatal_at (loc, "missing ''"); ++ val = (*templ)++len; ++ } ++ *templ += len; ++ if (val == ',') ++ (*templ)++; ++ list.push_back (conlist (name_start, len, numeric)); ++ } ++ } ++} ++ ++/* Parse a section, a section is defined as a named space separated list, e.g. ++ ++ foo: a, b, c ++ ++ is a section named "foo" with entries a, b and c. */ ++ ++static void ++parse_section (const char **templ, unsigned int n_elems, unsigned int alt_no, ++ vec_conlist &list, file_location loc, const char *name) ++{ ++ unsigned int i; ++ ++ /* Go through the list, one character at a time, adding said character ++ to the correct string. */ ++ for (i = 0; **templ != '' && **templ != ';'; (*templ)++) ++ if (!ISBLANK (**templ)) ++ { ++ if (**templ == 0 || **templ == '\n') ++ fatal_at (loc, "missing ''"); ++ listi.add (**templ); ++ if (**templ == ',') ++ { ++ ++i; ++ if (i == n_elems) ++ fatal_at (loc, "too many %ss in alternative %d: expected %d", ++ name, alt_no, n_elems); ++ } ++ } ++ ++ if (i + 1 < n_elems) ++ fatal_at (loc, "too few %ss in alternative %d: expected %d, got %d", ++ name, alt_no, n_elems, i); ++ ++ listi.add (','); ++} ++ ++/* The compact syntax has more convience syntaxes. As such we post process ++ the lines to get them back to something the normal syntax understands. */ ++ ++static void ++preprocess_compact_syntax (file_location loc, int alt_no, std::string &line, ++ std::string &last_line) ++{ ++ /* Check if we're copying the last statement. */ ++ if (line.find ("^") == 0 && line.size () == 1) ++ { ++ if (last_line.empty ()) ++ fatal_at (loc, "found instruction to copy previous line (^) in" ++ "alternative %d but no previous line to copy", alt_no); ++ line = last_line; ++ return; ++ } ++ ++ std::string result; ++ std::string buffer; ++ /* Check if we have << which means return c statement. */ ++ if (line.find ("<<") == 0) ++ { ++ result.append ("* return "); ++ const char *chunk = line.c_str () + 2; ++ skip_spaces (&chunk); ++ result.append (chunk); ++ } ++ else ++ result.append (line); ++ ++ line = result; ++ return; ++} ++ ++/* Converts an rtx from compact syntax to normal syntax if possible. */ ++ ++static void ++convert_syntax (rtx x, file_location loc) ++{ ++ int alt_no; ++ unsigned int templ_index; ++ const char *templ; ++ vec_conlist tconvec, convec, attrvec; ++ ++ templ_index = GET_CODE (x) == DEFINE_INSN ? 3 : 2; ++ ++ templ = XTMPL (x, templ_index); ++ ++ /* Templates with constraints start with "{@". */ ++ if (strncmp ("*{@", templ, 3)) ++ return; ++ ++ /* Get the layout for the template. */ ++ templ += 3; ++ skip_spaces (&templ); ++ ++ if (!expect_char (&templ, '')) ++ fatal_at (loc, "expecing `' to begin section list"); ++ ++ parse_section_layout (loc, &templ, "cons:", tconvec, true); ++ ++ /* Check for any duplicate cons entries and sort based on i. */ ++ for (auto e : tconvec) ++ { ++ unsigned idx = e.idx; ++ if (idx >= convec.size ()) ++ convec.resize (idx + 1); ++ ++ if (convecidx.idx >= 0) ++ fatal_at (loc, "duplicate cons number found: %d", idx); ++ convecidx = e; ++ } ++ tconvec.clear (); ++ ++ if (*templ != '') ++ { ++ if (*templ == ';') ++ skip_spaces (&(++templ)); ++ parse_section_layout (loc, &templ, "attrs:", attrvec, false); ++ } ++ ++ if (!expect_char (&templ, '')) ++ fatal_at (loc, "expecting `` to end section list - section list must have " ++ "cons first, attrs second"); ++ ++ /* We will write the un-constrainified template into new_templ. */ ++ std::string new_templ; ++ new_templ.append ("@"); ++ ++ /* Skip to the first proper line. */ ++ skip_spaces (&templ); ++ if (*templ == 0) ++ fatal_at (loc, "'{@...}' blocks must have at least one alternative"); ++ if (*templ != '\n') ++ fatal_at (loc, "unexpected character '%c' after ''", *templ); ++ templ++; ++ ++ alt_no = 0; ++ std::string last_line; ++ ++ /* Process the alternatives. */ ++ while (*(templ - 1) != '\0') ++ { ++ /* Skip leading whitespace. */ ++ std::string buffer; ++ skip_spaces (&templ); ++ ++ /* Check if we're at the end. */ ++ if (templ0 == '}' && templ1 == '\0') ++ break; ++ ++ if (expect_char (&templ, '')) ++ { ++ new_templ += '\n'; ++ new_templ.append (buffer); ++ /* Parse the constraint list, then the attribute list. */ ++ if (convec.size () > 0) ++ parse_section (&templ, convec.size (), alt_no, convec, loc, ++ "constraint"); ++ ++ if (attrvec.size () > 0) ++ { ++ if (convec.size () > 0 && !expect_char (&templ, ';')) ++ fatal_at (loc, "expected `;' to separate constraints " ++ "and attributes in alternative %d", alt_no); ++ ++ parse_section (&templ, attrvec.size (), alt_no, ++ attrvec, loc, "attribute"); ++ } ++ ++ if (!expect_char (&templ, '')) ++ fatal_at (loc, "expected end of constraint/attribute list but " ++ "missing an ending `' in alternative %d", alt_no); ++ } ++ else if (templ0 == '/' && templ1 == '/') ++ { ++ templ += 2; ++ /* Glob till newline or end of string. */ ++ while (*templ != '\n' || *templ != '\0') ++ templ++; ++ ++ /* Skip any newlines or whitespaces needed. */ ++ while (ISSPACE(*templ)) ++ templ++; ++ continue; ++ } ++ else if (templ0 == '/' && templ1 == '*') ++ { ++ templ += 2; ++ /* Glob till newline or end of multiline comment. */ ++ while (templ0 != 0 && templ0 != '*' && templ1 != '/') ++ templ++; ++ ++ while (templ0 != '*' || templ1 != '/') ++ { ++ if (templ0 == 0) ++ fatal_at (loc, "unterminated '/*'"); ++ templ++; ++ } ++ templ += 2; ++ ++ /* Skip any newlines or whitespaces needed. */ ++ while (ISSPACE(*templ)) ++ templ++; ++ continue; ++ } ++ else ++ fatal_at (loc, "expected constraint/attribute list at beginning of " ++ "alternative %d but missing a starting `'", alt_no); ++ ++ /* Skip whitespace between list and asm. */ ++ skip_spaces (&templ); ++ ++ /* Copy asm to new template. */ ++ std::string line; ++ while (*templ != '\n' && *templ != '\0') ++ line += *templ++; ++ ++ /* Apply any pre-processing needed to the line. */ ++ preprocess_compact_syntax (loc, alt_no, line, last_line); ++ new_templ.append (line); ++ last_line = line; ++ ++ /* Normal "*..." syntax expects the closing quote to be on the final ++ line of asm, whereas we allow the closing "}" to be on its own line. ++ Postpone copying the '\n' until we know that there is another ++ alternative in the list. */ ++ while (ISSPACE (*templ)) ++ templ++; ++ ++alt_no; ++ } ++ ++ /* Write the constraints and attributes into their proper places. */ ++ if (convec.size () > 0) ++ add_constraints (x, loc, convec); ++ ++ if (attrvec.size () > 0) ++ add_attributes (x, attrvec); ++ ++ /* Copy over the new un-constrainified template. */ ++ XTMPL (x, templ_index) = xstrdup (new_templ.c_str ()); ++ ++ /* Register for later checks during iterator expansions. */ ++ compact_syntax.add (x); ++} ++ + /* Process a top level rtx in some way, queuing as appropriate. */ + + static void +@@ -553,10 +1048,12 @@ process_rtx (rtx desc, file_location loc) + switch (GET_CODE (desc)) + { + case DEFINE_INSN: ++ convert_syntax (desc, loc); + queue_pattern (desc, &define_insn_tail, loc); + break; + + case DEFINE_COND_EXEC: ++ convert_syntax (desc, loc); + queue_pattern (desc, &define_cond_exec_tail, loc); + break; + +@@ -631,6 +1128,7 @@ process_rtx (rtx desc, file_location loc) + attr = XVEC (desc, split_code + 1); + PUT_CODE (desc, DEFINE_INSN); + XVEC (desc, 4) = attr; ++ convert_syntax (desc, loc); + + /* Queue them. */ + insn_elem = queue_pattern (desc, &define_insn_tail, loc); +diff --git a/gcc/gensupport.h b/gcc/gensupport.h +index 9a0fd7393..a19fc1319 100644 +--- a/gcc/gensupport.h ++++ b/gcc/gensupport.h +@@ -20,6 +20,7 @@ along with GCC; see the file COPYING3. If not see + #ifndef GCC_GENSUPPORT_H + #define GCC_GENSUPPORT_H + ++#include "hash-set.h" + #include "read-md.h" + + struct obstack; +@@ -218,6 +219,8 @@ struct pattern_stats + int num_operand_vars; + }; + ++extern hash_set<rtx> compact_syntax; ++ + extern void get_pattern_stats (struct pattern_stats *ranges, rtvec vec); + extern void compute_test_codes (rtx, file_location, char *); + extern file_location get_file_location (rtx); +-- +2.33.0 +
View file
_service:tar_scm:0152-LoongArch-Change-loongarch_expand_vec_cmp-s-return-t.patch
Added
@@ -0,0 +1,110 @@ +From d569e34b29faee3658014b3900e9553a4880dac0 Mon Sep 17 00:00:00 2001 +From: Chenghui Pan <panchenghui@loongson.cn> +Date: Fri, 15 Mar 2024 09:30:26 +0800 +Subject: PATCH 152/188 LoongArch: Change loongarch_expand_vec_cmp()'s return + type from bool to void. + +This function is always return true at the end of function implementation, +so the return value is useless. + +gcc/ChangeLog: + + * config/loongarch/lasx.md (vec_cmp<mode><mode256_i>): Remove checking + of loongarch_expand_vec_cmp()'s return value. + (vec_cmpu<ILASX:mode><mode256_i>): Ditto. + * config/loongarch/lsx.md (vec_cmp<mode><mode_i>): Ditto. + (vec_cmpu<ILSX:mode><mode_i>): Ditto. + * config/loongarch/loongarch-protos.h + (loongarch_expand_vec_cmp): Change loongarch_expand_vec_cmp()'s return + type from bool to void. + * config/loongarch/loongarch.cc (loongarch_expand_vec_cmp): Ditto. +--- + gcc/config/loongarch/lasx.md | 6 ++---- + gcc/config/loongarch/loongarch-protos.h | 2 +- + gcc/config/loongarch/loongarch.cc | 3 +-- + gcc/config/loongarch/lsx.md | 6 ++---- + 4 files changed, 6 insertions(+), 11 deletions(-) + +diff --git a/gcc/config/loongarch/lasx.md b/gcc/config/loongarch/lasx.md +index f3b5ea373..45a0a8cc8 100644 +--- a/gcc/config/loongarch/lasx.md ++++ b/gcc/config/loongarch/lasx.md +@@ -1378,8 +1378,7 @@ + (match_operand:LASX 3 "register_operand"))) + "ISA_HAS_LASX" + { +- bool ok = loongarch_expand_vec_cmp (operands); +- gcc_assert (ok); ++ loongarch_expand_vec_cmp (operands); + DONE; + }) + +@@ -1390,8 +1389,7 @@ + (match_operand:ILASX 3 "register_operand"))) + "ISA_HAS_LASX" + { +- bool ok = loongarch_expand_vec_cmp (operands); +- gcc_assert (ok); ++ loongarch_expand_vec_cmp (operands); + DONE; + }) + +diff --git a/gcc/config/loongarch/loongarch-protos.h b/gcc/config/loongarch/loongarch-protos.h +index 3dac20279..8523da8d6 100644 +--- a/gcc/config/loongarch/loongarch-protos.h ++++ b/gcc/config/loongarch/loongarch-protos.h +@@ -95,7 +95,7 @@ extern void loongarch_split_lsx_fill_d (rtx, rtx); + extern const char *loongarch_output_move (rtx, rtx); + #ifdef RTX_CODE + extern void loongarch_expand_scc (rtx *); +-extern bool loongarch_expand_vec_cmp (rtx *); ++extern void loongarch_expand_vec_cmp (rtx *); + extern void loongarch_expand_conditional_branch (rtx *); + extern void loongarch_expand_conditional_move (rtx *); + extern void loongarch_expand_conditional_trap (rtx); +diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc +index 903c0d4ef..dbb318660 100644 +--- a/gcc/config/loongarch/loongarch.cc ++++ b/gcc/config/loongarch/loongarch.cc +@@ -10803,13 +10803,12 @@ loongarch_expand_vec_cond_mask_expr (machine_mode mode, machine_mode vimode, + } + + /* Expand integer vector comparison */ +-bool ++void + loongarch_expand_vec_cmp (rtx operands) + { + + rtx_code code = GET_CODE (operands1); + loongarch_expand_lsx_cmp (operands0, code, operands2, operands3); +- return true; + } + + /* Implement TARGET_PROMOTE_FUNCTION_MODE. */ +diff --git a/gcc/config/loongarch/lsx.md b/gcc/config/loongarch/lsx.md +index d5aa3f46f..dc81093e9 100644 +--- a/gcc/config/loongarch/lsx.md ++++ b/gcc/config/loongarch/lsx.md +@@ -518,8 +518,7 @@ + (match_operand:LSX 3 "register_operand"))) + "ISA_HAS_LSX" + { +- bool ok = loongarch_expand_vec_cmp (operands); +- gcc_assert (ok); ++ loongarch_expand_vec_cmp (operands); + DONE; + }) + +@@ -530,8 +529,7 @@ + (match_operand:ILSX 3 "register_operand"))) + "ISA_HAS_LSX" + { +- bool ok = loongarch_expand_vec_cmp (operands); +- gcc_assert (ok); ++ loongarch_expand_vec_cmp (operands); + DONE; + }) + +-- +2.43.0 +
View file
_service:tar_scm:0153-Backport-SME-recog-Improve-parser-for-pattern-new-co.patch
Added
@@ -0,0 +1,104 @@ +From 35b64175c6fd622212d0bf936e7e98c635e1c618 Mon Sep 17 00:00:00 2001 +From: Richard Sandiford <richard.sandiford@arm.com> +Date: Wed, 13 Sep 2023 14:50:30 +0100 +Subject: PATCH 054/157 BackportSME recog: Improve parser for pattern new + compact syntax + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=dd1091fe455c1ede5993b4cdf10d0f7c461b86d7 + +Hi all, + +this is to add support to the new compact pattern syntax for the case +where the constraints do appear unsorted like: + +(define_insn "*<optab>si3_insn_uxtw" + (set (match_operand:DI 0 "register_operand") + (zero_extend:DI (SHIFT_no_rotate:SI + (match_operand:SI 1 "register_operand") + (match_operand:QI 2 "aarch64_reg_or_shift_imm_si")))) + "" + {@ cons: =0, 2, 1 + r, Uss, r <shift>\\t%w0, %w1, %2 + r, r, r <shift>\\t%w0, %w1, %w2 + } + (set_attr "type" "bfx,shift_reg") +) + +Best Regards + + Andrea + +gcc/Changelog + +2023-09-20 Richard Sandiford <richard.sandiford@arm.com> + + * gensupport.cc (convert_syntax): Updated to support unordered + constraints in compact syntax. +--- + gcc/gensupport.cc | 32 ++++++++++++++++---------------- + 1 file changed, 16 insertions(+), 16 deletions(-) + +diff --git a/gcc/gensupport.cc b/gcc/gensupport.cc +index 23c61dcdd..97c614850 100644 +--- a/gcc/gensupport.cc ++++ b/gcc/gensupport.cc +@@ -895,19 +895,6 @@ convert_syntax (rtx x, file_location loc) + + parse_section_layout (loc, &templ, "cons:", tconvec, true); + +- /* Check for any duplicate cons entries and sort based on i. */ +- for (auto e : tconvec) +- { +- unsigned idx = e.idx; +- if (idx >= convec.size ()) +- convec.resize (idx + 1); +- +- if (convecidx.idx >= 0) +- fatal_at (loc, "duplicate cons number found: %d", idx); +- convecidx = e; +- } +- tconvec.clear (); +- + if (*templ != '') + { + if (*templ == ';') +@@ -950,13 +937,13 @@ convert_syntax (rtx x, file_location loc) + new_templ += '\n'; + new_templ.append (buffer); + /* Parse the constraint list, then the attribute list. */ +- if (convec.size () > 0) +- parse_section (&templ, convec.size (), alt_no, convec, loc, ++ if (tconvec.size () > 0) ++ parse_section (&templ, tconvec.size (), alt_no, tconvec, loc, + "constraint"); + + if (attrvec.size () > 0) + { +- if (convec.size () > 0 && !expect_char (&templ, ';')) ++ if (tconvec.size () > 0 && !expect_char (&templ, ';')) + fatal_at (loc, "expected `;' to separate constraints " + "and attributes in alternative %d", alt_no); + +@@ -1026,6 +1013,19 @@ convert_syntax (rtx x, file_location loc) + ++alt_no; + } + ++ /* Check for any duplicate cons entries and sort based on i. */ ++ for (auto e : tconvec) ++ { ++ unsigned idx = e.idx; ++ if (idx >= convec.size ()) ++ convec.resize (idx + 1); ++ ++ if (convecidx.idx >= 0) ++ fatal_at (loc, "duplicate cons number found: %d", idx); ++ convecidx = e; ++ } ++ tconvec.clear (); ++ + /* Write the constraints and attributes into their proper places. */ + if (convec.size () > 0) + add_constraints (x, loc, convec); +-- +2.33.0 +
View file
_service:tar_scm:0153-LoongArch-Combine-UNITS_PER_FP_REG-and-UNITS_PER_FPR.patch
Added
@@ -0,0 +1,104 @@ +From 6c4a2fbdabab053a2a0fb1041e3ffccc3d853c97 Mon Sep 17 00:00:00 2001 +From: Chenghui Pan <panchenghui@loongson.cn> +Date: Fri, 15 Mar 2024 09:30:27 +0800 +Subject: PATCH 153/188 LoongArch: Combine UNITS_PER_FP_REG and + UNITS_PER_FPREG macros. + +These macros are completely same in definition, so we can keep the previous one +and eliminate later one. + +gcc/ChangeLog: + + * config/loongarch/loongarch.cc + (loongarch_hard_regno_mode_ok_uncached): Combine UNITS_PER_FP_REG and + UNITS_PER_FPREG macros. + (loongarch_hard_regno_nregs): Ditto. + (loongarch_class_max_nregs): Ditto. + (loongarch_get_separate_components): Ditto. + (loongarch_process_components): Ditto. + * config/loongarch/loongarch.h (UNITS_PER_FPREG): Ditto. + (UNITS_PER_HWFPVALUE): Ditto. + (UNITS_PER_FPVALUE): Ditto. +--- + gcc/config/loongarch/loongarch.cc | 10 +++++----- + gcc/config/loongarch/loongarch.h | 7 ++----- + 2 files changed, 7 insertions(+), 10 deletions(-) + +diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc +index dbb318660..8d9cda165 100644 +--- a/gcc/config/loongarch/loongarch.cc ++++ b/gcc/config/loongarch/loongarch.cc +@@ -6773,7 +6773,7 @@ loongarch_hard_regno_mode_ok_uncached (unsigned int regno, machine_mode mode) + and TRUNC. There's no point allowing sizes smaller than a word, + because the FPU has no appropriate load/store instructions. */ + if (mclass == MODE_INT) +- return size >= MIN_UNITS_PER_WORD && size <= UNITS_PER_FPREG; ++ return size >= MIN_UNITS_PER_WORD && size <= UNITS_PER_FP_REG; + } + + return false; +@@ -6816,7 +6816,7 @@ loongarch_hard_regno_nregs (unsigned int regno, machine_mode mode) + if (LASX_SUPPORTED_MODE_P (mode)) + return 1; + +- return (GET_MODE_SIZE (mode) + UNITS_PER_FPREG - 1) / UNITS_PER_FPREG; ++ return (GET_MODE_SIZE (mode) + UNITS_PER_FP_REG - 1) / UNITS_PER_FP_REG; + } + + /* All other registers are word-sized. */ +@@ -6851,7 +6851,7 @@ loongarch_class_max_nregs (enum reg_class rclass, machine_mode mode) + else if (LSX_SUPPORTED_MODE_P (mode)) + size = MIN (size, UNITS_PER_LSX_REG); + else +- size = MIN (size, UNITS_PER_FPREG); ++ size = MIN (size, UNITS_PER_FP_REG); + } + left &= ~reg_class_contentsFP_REGS; + } +@@ -8227,7 +8227,7 @@ loongarch_get_separate_components (void) + if (IMM12_OPERAND (offset)) + bitmap_set_bit (components, regno); + +- offset -= UNITS_PER_FPREG; ++ offset -= UNITS_PER_FP_REG; + } + + /* Don't mess with the hard frame pointer. */ +@@ -8306,7 +8306,7 @@ loongarch_process_components (sbitmap components, loongarch_save_restore_fn fn) + if (bitmap_bit_p (components, regno)) + loongarch_save_restore_reg (mode, regno, offset, fn); + +- offset -= UNITS_PER_FPREG; ++ offset -= UNITS_PER_FP_REG; + } + } + +diff --git a/gcc/config/loongarch/loongarch.h b/gcc/config/loongarch/loongarch.h +index 8bcdb8729..698e42aec 100644 +--- a/gcc/config/loongarch/loongarch.h ++++ b/gcc/config/loongarch/loongarch.h +@@ -138,19 +138,16 @@ along with GCC; see the file COPYING3. If not see + /* Width of a LASX vector register in bits. */ + #define BITS_PER_LASX_REG (UNITS_PER_LASX_REG * BITS_PER_UNIT) + +-/* For LARCH, width of a floating point register. */ +-#define UNITS_PER_FPREG (TARGET_DOUBLE_FLOAT ? 8 : 4) +- + /* The largest size of value that can be held in floating-point + registers and moved with a single instruction. */ + #define UNITS_PER_HWFPVALUE \ +- (TARGET_SOFT_FLOAT ? 0 : UNITS_PER_FPREG) ++ (TARGET_SOFT_FLOAT ? 0 : UNITS_PER_FP_REG) + + /* The largest size of value that can be held in floating-point + registers. */ + #define UNITS_PER_FPVALUE \ + (TARGET_SOFT_FLOAT ? 0 \ +- : TARGET_SINGLE_FLOAT ? UNITS_PER_FPREG \ ++ : TARGET_SINGLE_FLOAT ? UNITS_PER_FP_REG \ + : LONG_DOUBLE_TYPE_SIZE / BITS_PER_UNIT) + + /* The number of bytes in a double. */ +-- +2.43.0 +
View file
_service:tar_scm:0154-Backport-SME-recog-Support-space-in-cons.patch
Added
@@ -0,0 +1,49 @@ +From e593ad216bd1f4f75d9875898f352e0e5f978159 Mon Sep 17 00:00:00 2001 +From: Andrea Corallo <andrea.corallo@arm.com> +Date: Fri, 15 Sep 2023 10:23:02 +0200 +Subject: PATCH 055/157 BackportSME recog: Support space in " cons" + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=9d31045b21324166c3997d603961d99e3c4c357d + +Hi all, + +this is to allow for spaces before "cons:" in the definitions of +patterns using the new compact syntax, ex: + +(define_insn "aarch64_simd_dup<mode>" + (set (match_operand:VDQ_I 0 "register_operand") + (vec_duplicate:VDQ_I + (match_operand:<VEL> 1 "register_operand"))) + "TARGET_SIMD" + {@ cons: =0 , 1 ; attrs: type + w , w ; neon_dup<q> dup\t%0.<Vtype>, %1.<Vetype>0 + w , ?r ; neon_from_gp<q> dup\t%0.<Vtype>, %<vwcore>1 + } +) + +gcc/Changelog + +2023-09-20 Andrea Corallo <andrea.corallo@arm.com> + + * gensupport.cc (convert_syntax): Skip spaces before "cons:" + in new compact pattern syntax. +--- + gcc/gensupport.cc | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/gcc/gensupport.cc b/gcc/gensupport.cc +index 97c614850..3d7a6d4fd 100644 +--- a/gcc/gensupport.cc ++++ b/gcc/gensupport.cc +@@ -893,6 +893,8 @@ convert_syntax (rtx x, file_location loc) + if (!expect_char (&templ, '')) + fatal_at (loc, "expecing `' to begin section list"); + ++ skip_spaces (&templ); ++ + parse_section_layout (loc, &templ, "cons:", tconvec, true); + + if (*templ != '') +-- +2.33.0 +
View file
_service:tar_scm:0154-LoongArch-Fix-a-typo-PR-114407.patch
Added
@@ -0,0 +1,30 @@ +From 72f18deb0b8e59cc23f25cb99b59a25a0a1d99c7 Mon Sep 17 00:00:00 2001 +From: Xi Ruoyao <xry111@xry111.site> +Date: Thu, 21 Mar 2024 04:01:17 +0800 +Subject: PATCH 154/188 LoongArch: Fix a typo PR 114407 + +gcc/ChangeLog: + + PR target/114407 + * config/loongarch/loongarch-opts.cc (loongarch_config_target): + Fix typo in diagnostic message, enabing -> enabling. +--- + gcc/config/loongarch/loongarch-opts.cc | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/gcc/config/loongarch/loongarch-opts.cc b/gcc/config/loongarch/loongarch-opts.cc +index 2ea3972d1..bdecfaf49 100644 +--- a/gcc/config/loongarch/loongarch-opts.cc ++++ b/gcc/config/loongarch/loongarch-opts.cc +@@ -362,7 +362,7 @@ config_target_isa: + gcc_assert (constrained.simd); + + inform (UNKNOWN_LOCATION, +- "enabing %qs promotes %<%s%s%> to %<%s%s%>", ++ "enabling %qs promotes %<%s%s%> to %<%s%s%>", + loongarch_isa_ext_stringst.isa.simd, + OPTSTR_ISA_EXT_FPU, loongarch_isa_ext_stringst.isa.fpu, + OPTSTR_ISA_EXT_FPU, loongarch_isa_ext_stringsISA_EXT_FPU64); +-- +2.43.0 +
View file
_service:tar_scm:0155-Backport-SME-aarch64-Generalise-require_immediate_la.patch
Added
@@ -0,0 +1,164 @@ +From cb6d55f6bc7c490f72a43dd87543ab7a7ea582a8 Mon Sep 17 00:00:00 2001 +From: Richard Sandiford <richard.sandiford@arm.com> +Date: Tue, 5 Dec 2023 10:11:18 +0000 +Subject: PATCH 056/157 BackportSME aarch64: Generalise + require_immediate_lane_index + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=c0cf2c893d54420b0c19fee7bd41ae40017d0106 + +require_immediate_lane_index previously hard-coded the assumption +that the group size is determined by the argument immediately before +the index. However, for SME, there are cases where it should be +determined by an earlier argument instead. + +gcc/ + * config/aarch64/aarch64-sve-builtins.h: + (function_checker::require_immediate_lane_index): Add an argument + for the index of the indexed vector argument. + * config/aarch64/aarch64-sve-builtins.cc + (function_checker::require_immediate_lane_index): Likewise. + * config/aarch64/aarch64-sve-builtins-shapes.cc + (ternary_bfloat_lane_base::check): Update accordingly. + (ternary_qq_lane_base::check): Likewise. + (binary_lane_def::check): Likewise. + (binary_long_lane_def::check): Likewise. + (ternary_lane_def::check): Likewise. + (ternary_lane_rotate_def::check): Likewise. + (ternary_long_lane_def::check): Likewise. + (ternary_qq_lane_rotate_def::check): Likewise. +--- + .../aarch64/aarch64-sve-builtins-shapes.cc | 16 ++++++++-------- + gcc/config/aarch64/aarch64-sve-builtins.cc | 18 ++++++++++++------ + gcc/config/aarch64/aarch64-sve-builtins.h | 3 ++- + 3 files changed, 22 insertions(+), 15 deletions(-) + +diff --git a/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc b/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc +index f57f92698..4fa4181b9 100644 +--- a/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc ++++ b/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc +@@ -941,7 +941,7 @@ struct ternary_bfloat_lane_base + bool + check (function_checker &c) const OVERRIDE + { +- return c.require_immediate_lane_index (3, N); ++ return c.require_immediate_lane_index (3, 2, N); + } + }; + +@@ -956,7 +956,7 @@ struct ternary_qq_lane_base + bool + check (function_checker &c) const OVERRIDE + { +- return c.require_immediate_lane_index (3, 4); ++ return c.require_immediate_lane_index (3, 0); + } + }; + +@@ -1123,7 +1123,7 @@ struct binary_lane_def : public overloaded_base<0> + bool + check (function_checker &c) const OVERRIDE + { +- return c.require_immediate_lane_index (2); ++ return c.require_immediate_lane_index (2, 1); + } + }; + SHAPE (binary_lane) +@@ -1162,7 +1162,7 @@ struct binary_long_lane_def : public overloaded_base<0> + bool + check (function_checker &c) const OVERRIDE + { +- return c.require_immediate_lane_index (2); ++ return c.require_immediate_lane_index (2, 1); + } + }; + SHAPE (binary_long_lane) +@@ -2817,7 +2817,7 @@ struct ternary_lane_def : public overloaded_base<0> + bool + check (function_checker &c) const OVERRIDE + { +- return c.require_immediate_lane_index (3); ++ return c.require_immediate_lane_index (3, 2); + } + }; + SHAPE (ternary_lane) +@@ -2845,7 +2845,7 @@ struct ternary_lane_rotate_def : public overloaded_base<0> + bool + check (function_checker &c) const OVERRIDE + { +- return (c.require_immediate_lane_index (3, 2) ++ return (c.require_immediate_lane_index (3, 2, 2) + && c.require_immediate_one_of (4, 0, 90, 180, 270)); + } + }; +@@ -2868,7 +2868,7 @@ struct ternary_long_lane_def + bool + check (function_checker &c) const OVERRIDE + { +- return c.require_immediate_lane_index (3); ++ return c.require_immediate_lane_index (3, 2); + } + }; + SHAPE (ternary_long_lane) +@@ -2965,7 +2965,7 @@ struct ternary_qq_lane_rotate_def : public overloaded_base<0> + bool + check (function_checker &c) const OVERRIDE + { +- return (c.require_immediate_lane_index (3, 4) ++ return (c.require_immediate_lane_index (3, 0) + && c.require_immediate_one_of (4, 0, 90, 180, 270)); + } + }; +diff --git a/gcc/config/aarch64/aarch64-sve-builtins.cc b/gcc/config/aarch64/aarch64-sve-builtins.cc +index 91af96687..7924cdf0f 100644 +--- a/gcc/config/aarch64/aarch64-sve-builtins.cc ++++ b/gcc/config/aarch64/aarch64-sve-builtins.cc +@@ -2440,20 +2440,26 @@ function_checker::require_immediate_enum (unsigned int rel_argno, tree type) + return false; + } + +-/* Check that argument REL_ARGNO is suitable for indexing argument +- REL_ARGNO - 1, in groups of GROUP_SIZE elements. REL_ARGNO counts +- from the end of the predication arguments. */ ++/* The intrinsic conceptually divides vector argument REL_VEC_ARGNO into ++ groups of GROUP_SIZE elements. Return true if argument REL_ARGNO is ++ a suitable constant index for selecting one of these groups. The ++ selection happens within a 128-bit quadword, rather than the whole vector. ++ ++ REL_ARGNO and REL_VEC_ARGNO count from the end of the predication ++ arguments. */ + bool + function_checker::require_immediate_lane_index (unsigned int rel_argno, ++ unsigned int rel_vec_argno, + unsigned int group_size) + { + unsigned int argno = m_base_arg + rel_argno; + if (!argument_exists_p (argno)) + return true; + +- /* Get the type of the previous argument. tree_argument_type wants a +- 1-based number, whereas ARGNO is 0-based. */ +- machine_mode mode = TYPE_MODE (type_argument_type (m_fntype, argno)); ++ /* Get the type of the vector argument. tree_argument_type wants a ++ 1-based number, whereas VEC_ARGNO is 0-based. */ ++ unsigned int vec_argno = m_base_arg + rel_vec_argno; ++ machine_mode mode = TYPE_MODE (type_argument_type (m_fntype, vec_argno + 1)); + gcc_assert (VECTOR_MODE_P (mode)); + unsigned int nlanes = 128 / (group_size * GET_MODE_UNIT_BITSIZE (mode)); + return require_immediate_range (rel_argno, 0, nlanes - 1); +diff --git a/gcc/config/aarch64/aarch64-sve-builtins.h b/gcc/config/aarch64/aarch64-sve-builtins.h +index 52994cde0..824c31cd7 100644 +--- a/gcc/config/aarch64/aarch64-sve-builtins.h ++++ b/gcc/config/aarch64/aarch64-sve-builtins.h +@@ -463,7 +463,8 @@ public: + bool require_immediate_either_or (unsigned int, HOST_WIDE_INT, + HOST_WIDE_INT); + bool require_immediate_enum (unsigned int, tree); +- bool require_immediate_lane_index (unsigned int, unsigned int = 1); ++ bool require_immediate_lane_index (unsigned int, unsigned int, ++ unsigned int = 1); + bool require_immediate_one_of (unsigned int, HOST_WIDE_INT, HOST_WIDE_INT, + HOST_WIDE_INT, HOST_WIDE_INT); + bool require_immediate_range (unsigned int, HOST_WIDE_INT, HOST_WIDE_INT); +-- +2.33.0 +
View file
_service:tar_scm:0155-testsuite-Add-a-test-case-for-negating-FP-vectors-co.patch
Added
@@ -0,0 +1,68 @@ +From e27123a020e7bf0845a9804a4b09fe4ce57992f0 Mon Sep 17 00:00:00 2001 +From: Xi Ruoyao <xry111@xry111.site> +Date: Tue, 6 Feb 2024 17:49:50 +0800 +Subject: PATCH 155/188 testsuite: Add a test case for negating FP vectors + containing zeros + +Recently I've fixed two wrong FP vector negate implementation which +caused wrong sign bits in zeros in targets (r14-8786 and r14-8801). To +prevent a similar issue from happening again, add a test case. + +Tested on x86_64 (with SSE2, AVX, AVX2, and AVX512F), AArch64, MIPS +(with MSA), LoongArch (with LSX and LASX). + +gcc/testsuite: + + * gcc.dg/vect/vect-neg-zero.c: New test. +--- + gcc/testsuite/gcc.dg/vect/vect-neg-zero.c | 38 +++++++++++++++++++++++ + 1 file changed, 38 insertions(+) + create mode 100644 gcc/testsuite/gcc.dg/vect/vect-neg-zero.c + +diff --git a/gcc/testsuite/gcc.dg/vect/vect-neg-zero.c b/gcc/testsuite/gcc.dg/vect/vect-neg-zero.c +new file mode 100644 +index 000000000..21fa00cfa +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/vect/vect-neg-zero.c +@@ -0,0 +1,38 @@ ++/* { dg-add-options ieee } */ ++/* { dg-additional-options "-fno-associative-math -fsigned-zeros" } */ ++ ++double x4 = {-0.0, 0.0, -0.0, 0.0}; ++float y8 = {-0.0, 0.0, -0.0, 0.0, -0.0, -0.0, 0.0, 0.0}; ++ ++static __attribute__ ((always_inline)) inline void ++test (int factor) ++{ ++ double a4; ++ float b8; ++ ++ asm ("" ::: "memory"); ++ ++ for (int i = 0; i < 2 * factor; i++) ++ ai = -xi; ++ ++ for (int i = 0; i < 4 * factor; i++) ++ bi = -yi; ++ ++#pragma GCC novector ++ for (int i = 0; i < 2 * factor; i++) ++ if (__builtin_signbit (ai) == __builtin_signbit (xi)) ++ __builtin_abort (); ++ ++#pragma GCC novector ++ for (int i = 0; i < 4 * factor; i++) ++ if (__builtin_signbit (bi) == __builtin_signbit (yi)) ++ __builtin_abort (); ++} ++ ++int ++main (void) ++{ ++ test (1); ++ test (2); ++ return 0; ++} +-- +2.43.0 +
View file
_service:tar_scm:0156-Backport-SME-aarch64-Add-backend-support-for-DFP.patch
Added
@@ -0,0 +1,469 @@ +From 8394394bd26c7be6129b9a4e673d2a3530d9efde Mon Sep 17 00:00:00 2001 +From: Christophe Lyon <christophe.lyon@arm.com> +Date: Fri, 11 Mar 2022 16:21:02 +0000 +Subject: PATCH 057/157 BackportSME aarch64: Add backend support for DFP + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=0dc8e1e7026d9b8ec8b669c051786d426a52cd22 + +This patch updates the aarch64 backend as needed to support DFP modes +(SD, DD and TD). + +Changes v1->v2: + +* Drop support for DFP modes in + aarch64_gen_{load||store}wb_pair as these are only used in + prologue/epilogue where DFP modes are not used. Drop the + changes to the corresponding patterns in aarch64.md, and + useless GPF_PAIR iterator. + +* In aarch64_reinterpret_float_as_int, handle DDmode the same way + as DFmode (needed in case the representation of the + floating-point value can be loaded using mov/movk. + +* In aarch64_float_const_zero_rtx_p, reject constants with DFP + mode: when X is zero, the callers want to emit either '0' or + 'zr' depending on the context, which is not the way 0.0 is + represented in DFP mode (in particular fmov d0, #0 is not right + for DFP). + +* In aarch64_legitimate_constant_p, accept DFP + +2022-03-31 Christophe Lyon <christophe.lyon@arm.com> + + gcc/ + * config/aarch64/aarch64.cc + (aarch64_split_128bit_move): Handle DFP modes. + (aarch64_mode_valid_for_sched_fusion_p): Likewise. + (aarch64_classify_address): Likewise. + (aarch64_legitimize_address_displacement): Likewise. + (aarch64_reinterpret_float_as_int): Likewise. + (aarch64_float_const_zero_rtx_p): Likewise. + (aarch64_can_const_movi_rtx_p): Likewise. + (aarch64_anchor_offset): Likewise. + (aarch64_secondary_reload): Likewise. + (aarch64_rtx_costs): Likewise. + (aarch64_legitimate_constant_p): Likewise. + (aarch64_gimplify_va_arg_expr): Likewise. + (aapcs_vfp_sub_candidate): Likewise. + (aarch64_vfp_is_call_or_return_candidate): Likewise. + (aarch64_output_scalar_simd_mov_immediate): Likewise. + (aarch64_gen_adjusted_ldpstp): Likewise. + (aarch64_scalar_mode_supported_p): Accept DFP modes if enabled. + * config/aarch64/aarch64.md + (movsf_aarch64): Use SFD iterator and rename into + mov<mode>_aarch64. + (movdf_aarch64): Use DFD iterator and rename into + mov<mode>_aarch64. + (movtf_aarch64): Use TFD iterator and rename into + mov<mode>_aarch64. + (split pattern for move TF mode): Use TFD iterator. + * config/aarch64/iterators.md + (GPF_TF_F16_MOV): Add DFP modes. + (SFD, DFD, TFD): New iterators. + (GPF_TF): Add DFP modes. + (TX, DX, DX2): Likewise. +--- + gcc/config/aarch64/aarch64.cc | 82 ++++++++++++++++++++++----------- + gcc/config/aarch64/aarch64.md | 34 +++++++------- + gcc/config/aarch64/iterators.md | 24 +++++++--- + 3 files changed, 89 insertions(+), 51 deletions(-) + +diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc +index 055b436b1..02210ed13 100644 +--- a/gcc/config/aarch64/aarch64.cc ++++ b/gcc/config/aarch64/aarch64.cc +@@ -5068,7 +5068,7 @@ aarch64_split_128bit_move (rtx dst, rtx src) + + machine_mode mode = GET_MODE (dst); + +- gcc_assert (mode == TImode || mode == TFmode); ++ gcc_assert (mode == TImode || mode == TFmode || mode == TDmode); + gcc_assert (!(side_effects_p (src) || side_effects_p (dst))); + gcc_assert (mode == GET_MODE (src) || GET_MODE (src) == VOIDmode); + +@@ -10834,6 +10834,7 @@ aarch64_mode_valid_for_sched_fusion_p (machine_mode mode) + { + return mode == SImode || mode == DImode + || mode == SFmode || mode == DFmode ++ || mode == SDmode || mode == DDmode + || (aarch64_vector_mode_supported_p (mode) + && (known_eq (GET_MODE_SIZE (mode), 8) + || (known_eq (GET_MODE_SIZE (mode), 16) +@@ -10876,12 +10877,13 @@ aarch64_classify_address (struct aarch64_address_info *info, + vec_flags &= ~VEC_PARTIAL; + + /* On BE, we use load/store pair for all large int mode load/stores. +- TI/TFmode may also use a load/store pair. */ ++ TI/TF/TDmode may also use a load/store pair. */ + bool advsimd_struct_p = (vec_flags == (VEC_ADVSIMD | VEC_STRUCT)); + bool load_store_pair_p = (type == ADDR_QUERY_LDP_STP + || type == ADDR_QUERY_LDP_STP_N + || mode == TImode + || mode == TFmode ++ || mode == TDmode + || (BYTES_BIG_ENDIAN && advsimd_struct_p)); + /* If we are dealing with ADDR_QUERY_LDP_STP_N that means the incoming mode + corresponds to the actual size of the memory being loaded/stored and the +@@ -10955,7 +10957,7 @@ aarch64_classify_address (struct aarch64_address_info *info, + info->offset = op1; + info->const_offset = offset; + +- /* TImode and TFmode values are allowed in both pairs of X ++ /* TImode, TFmode and TDmode values are allowed in both pairs of X + registers and individual Q registers. The available + address modes are: + X,X: 7-bit signed scaled offset +@@ -10964,7 +10966,7 @@ aarch64_classify_address (struct aarch64_address_info *info, + When performing the check for pairs of X registers i.e. LDP/STP + pass down DImode since that is the natural size of the LDP/STP + instruction memory accesses. */ +- if (mode == TImode || mode == TFmode) ++ if (mode == TImode || mode == TFmode || mode == TDmode) + return (aarch64_offset_7bit_signed_scaled_p (DImode, offset) + && (aarch64_offset_9bit_signed_unscaled_p (mode, offset) + || offset_12bit_unsigned_scaled_p (mode, offset))); +@@ -11087,14 +11089,14 @@ aarch64_classify_address (struct aarch64_address_info *info, + info->offset = XEXP (XEXP (x, 1), 1); + info->const_offset = offset; + +- /* TImode and TFmode values are allowed in both pairs of X ++ /* TImode, TFmode and TDmode values are allowed in both pairs of X + registers and individual Q registers. The available + address modes are: + X,X: 7-bit signed scaled offset + Q: 9-bit signed offset + We conservatively require an offset representable in either mode. + */ +- if (mode == TImode || mode == TFmode) ++ if (mode == TImode || mode == TFmode || mode == TDmode) + return (aarch64_offset_7bit_signed_scaled_p (mode, offset) + && aarch64_offset_9bit_signed_unscaled_p (mode, offset)); + +@@ -11256,9 +11258,9 @@ aarch64_legitimize_address_displacement (rtx *offset1, rtx *offset2, + offset. Use 4KB range for 1- and 2-byte accesses and a 16KB + range otherwise to increase opportunities for sharing the base + address of different sizes. Unaligned accesses use the signed +- 9-bit range, TImode/TFmode use the intersection of signed ++ 9-bit range, TImode/TFmode/TDmode use the intersection of signed + scaled 7-bit and signed 9-bit offset. */ +- if (mode == TImode || mode == TFmode) ++ if (mode == TImode || mode == TFmode || mode == TDmode) + second_offset = ((const_offset + 0x100) & 0x1f8) - 0x100; + else if ((const_offset & (size - 1)) != 0) + second_offset = ((const_offset + 0x100) & 0x1ff) - 0x100; +@@ -11339,7 +11341,7 @@ aarch64_reinterpret_float_as_int (rtx value, unsigned HOST_WIDE_INT *intval) + CONST_DOUBLE_REAL_VALUE (value), + REAL_MODE_FORMAT (mode)); + +- if (mode == DFmode) ++ if (mode == DFmode || mode == DDmode) + { + int order = BYTES_BIG_ENDIAN ? 1 : 0; + ival = zext_hwi (resorder, 32); +@@ -11380,11 +11382,15 @@ aarch64_float_const_rtx_p (rtx x) + return false; + } + +-/* Return TRUE if rtx X is immediate constant 0.0 */ ++/* Return TRUE if rtx X is immediate constant 0.0 (but not in Decimal ++ Floating Point). */ + bool + aarch64_float_const_zero_rtx_p (rtx x) + { +- if (GET_MODE (x) == VOIDmode) ++ /* 0.0 in Decimal Floating Point cannot be represented by #0 or ++ zr as our callers expect, so no need to check the actual ++ value if X is of Decimal Floating Point type. */ ++ if (GET_MODE_CLASS (GET_MODE (x)) == MODE_DECIMAL_FLOAT) + return false; + + if (REAL_VALUE_MINUS_ZERO (*CONST_DOUBLE_REAL_VALUE (x))) +@@ -11422,7 +11428,7 @@ aarch64_can_const_movi_rtx_p (rtx x, machine_mode mode) + else + return false; + +- /* use a 64 bit mode for everything except for DI/DF mode, where we use ++ /* use a 64 bit mode for everything except for DI/DF/DD mode, where we use + a 128 bit vector mode. */ + int width = GET_MODE_BITSIZE (imode) == 64 ? 128 : 64; + +@@ -12628,7 +12634,7 @@ aarch64_anchor_offset (HOST_WIDE_INT offset, HOST_WIDE_INT size, + if (IN_RANGE (offset, -256, 0)) + return 0; + +- if (mode == TImode || mode == TFmode) ++ if (mode == TImode || mode == TFmode || mode == TDmode) + return (offset + 0x100) & ~0x1ff; + + /* Use 12-bit offset by access size. */ +@@ -12737,7 +12743,9 @@ aarch64_secondary_reload (bool in_p ATTRIBUTE_UNUSED, rtx x, + + /* Without the TARGET_SIMD instructions we cannot move a Q register + to a Q register directly. We need a scratch. */ +- if (REG_P (x) && (mode == TFmode || mode == TImode) && mode == GET_MODE (x) ++ if (REG_P (x) ++ && (mode == TFmode || mode == TImode || mode == TDmode) ++ && mode == GET_MODE (x) + && FP_REGNUM_P (REGNO (x)) && !TARGET_SIMD + && reg_class_subset_p (rclass, FP_REGS)) + { +@@ -12745,14 +12753,16 @@ aarch64_secondary_reload (bool in_p ATTRIBUTE_UNUSED, rtx x, + return NO_REGS; + } + +- /* A TFmode or TImode memory access should be handled via an FP_REGS ++ /* A TFmode, TImode or TDmode memory access should be handled via an FP_REGS + because AArch64 has richer addressing modes for LDR/STR instructions + than LDP/STP instructions. */ + if (TARGET_FLOAT && rclass == GENERAL_REGS + && known_eq (GET_MODE_SIZE (mode), 16) && MEM_P (x)) + return FP_REGS; + +- if (rclass == FP_REGS && (mode == TImode || mode == TFmode) && CONSTANT_P(x)) ++ if (rclass == FP_REGS ++ && (mode == TImode || mode == TFmode || mode == TDmode) ++ && CONSTANT_P(x)) + return GENERAL_REGS; + + return NO_REGS; +@@ -13883,9 +13893,9 @@ aarch64_rtx_costs (rtx x, machine_mode mode, int outer ATTRIBUTE_UNUSED, + *cost += extra_cost->ldst.storev; + else if (GET_MODE_CLASS (mode) == MODE_INT) + *cost += extra_cost->ldst.store; +- else if (mode == SFmode) ++ else if (mode == SFmode || mode == SDmode) + *cost += extra_cost->ldst.storef; +- else if (mode == DFmode) ++ else if (mode == DFmode || mode == DDmode) + *cost += extra_cost->ldst.stored; + + *cost += +@@ -14009,11 +14019,11 @@ aarch64_rtx_costs (rtx x, machine_mode mode, int outer ATTRIBUTE_UNUSED, + /* movdf,sf_aarch64. */ + if (aarch64_float_const_representable_p (x)) + /* FMOV (scalar immediate). */ +- *cost += extra_cost->fpmode == DFmode.fpconst; ++ *cost += extra_cost->fpmode == DFmode || mode == DDmode.fpconst; + else if (!aarch64_float_const_zero_rtx_p (x)) + { + /* This will be a load from memory. */ +- if (mode == DFmode) ++ if (mode == DFmode || mode == DDmode) + *cost += extra_cost->ldst.loadd; + else + *cost += extra_cost->ldst.loadf; +@@ -14039,9 +14049,9 @@ aarch64_rtx_costs (rtx x, machine_mode mode, int outer ATTRIBUTE_UNUSED, + *cost += extra_cost->ldst.loadv; + else if (GET_MODE_CLASS (mode) == MODE_INT) + *cost += extra_cost->ldst.load; +- else if (mode == SFmode) ++ else if (mode == SFmode || mode == SDmode) + *cost += extra_cost->ldst.loadf; +- else if (mode == DFmode) ++ else if (mode == DFmode || mode == DDmode) + *cost += extra_cost->ldst.loadd; + + *cost += +@@ -19623,7 +19633,7 @@ aarch64_legitimate_constant_p (machine_mode mode, rtx x) + { + /* Support CSE and rematerialization of common constants. */ + if (CONST_INT_P (x) +- || (CONST_DOUBLE_P (x) && GET_MODE_CLASS (mode) == MODE_FLOAT)) ++ || CONST_DOUBLE_P (x)) + return true; + + /* Only accept variable-length vector constants if they can be +@@ -20064,6 +20074,18 @@ aarch64_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p, + field_t = long_double_type_node; + field_ptr_t = long_double_ptr_type_node; + break; ++ case SDmode: ++ field_t = dfloat32_type_node; ++ field_ptr_t = build_pointer_type (dfloat32_type_node); ++ break; ++ case DDmode: ++ field_t = dfloat64_type_node; ++ field_ptr_t = build_pointer_type (dfloat64_type_node); ++ break; ++ case TDmode: ++ field_t = dfloat128_type_node; ++ field_ptr_t = build_pointer_type (dfloat128_type_node); ++ break; + case E_HFmode: + field_t = aarch64_fp16_type_node; + field_ptr_t = aarch64_fp16_ptr_type_node; +@@ -20315,7 +20337,8 @@ aapcs_vfp_sub_candidate (const_tree type, machine_mode *modep, + case REAL_TYPE: + mode = TYPE_MODE (type); + if (mode != DFmode && mode != SFmode +- && mode != TFmode && mode != HFmode) ++ && mode != TFmode && mode != HFmode ++ && mode != SDmode && mode != DDmode && mode != TDmode) + return -1; + + if (*modep == VOIDmode) +@@ -20631,7 +20654,9 @@ aarch64_vfp_is_call_or_return_candidate (machine_mode mode, + machine_mode new_mode = VOIDmode; + bool composite_p = aarch64_composite_type_p (type, mode); + +- if ((!composite_p && GET_MODE_CLASS (mode) == MODE_FLOAT) ++ if ((!composite_p ++ && (GET_MODE_CLASS (mode) == MODE_FLOAT ++ || GET_MODE_CLASS (mode) == MODE_DECIMAL_FLOAT)) + || aarch64_short_vector_p (type, mode)) + { + *count = 1; +@@ -23565,7 +23590,7 @@ aarch64_output_scalar_simd_mov_immediate (rtx immediate, scalar_int_mode mode) + } + + machine_mode vmode; +- /* use a 64 bit mode for everything except for DI/DF mode, where we use ++ /* use a 64 bit mode for everything except for DI/DF/DD mode, where we use + a 128 bit vector mode. */ + int width = GET_MODE_BITSIZE (mode) == 64 ? 128 : 64; + +@@ -26417,7 +26442,7 @@ aarch64_gen_adjusted_ldpstp (rtx *operands, bool load, + base_off = (off_val_1 + off_val_3) / 2; + else + /* However, due to issues with negative LDP/STP offset generation for +- larger modes, for DF, DI and vector modes. we must not use negative ++ larger modes, for DF, DD, DI and vector modes. we must not use negative + addresses smaller than 9 signed unadjusted bits can store. This + provides the most range in this case. */ + base_off = off_val_1; +@@ -26695,6 +26720,9 @@ aarch64_libgcc_floating_mode_supported_p (scalar_float_mode mode) + static bool + aarch64_scalar_mode_supported_p (scalar_mode mode) + { ++ if (DECIMAL_FLOAT_MODE_P (mode)) ++ return default_decimal_float_supported_p (); ++ + return (mode == HFmode + ? true + : default_scalar_mode_supported_p (mode)); +diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md +index a78476c8a..8757a962f 100644 +--- a/gcc/config/aarch64/aarch64.md ++++ b/gcc/config/aarch64/aarch64.md +@@ -1476,11 +1476,11 @@ + (set_attr "arch" "simd,fp16,simd,*,simd,*,simd,*,fp16,simd,*,*,*,*,*") + ) + +-(define_insn "*movsf_aarch64" +- (set (match_operand:SF 0 "nonimmediate_operand" "=w,w ,?r,w,w ,w ,w,m,r,m ,r,r") +- (match_operand:SF 1 "general_operand" "Y ,?rY, w,w,Ufc,Uvi,m,w,m,rY,r,M")) +- "TARGET_FLOAT && (register_operand (operands0, SFmode) +- || aarch64_reg_or_fp_zero (operands1, SFmode))" ++(define_insn "*mov<mode>_aarch64" ++ (set (match_operand:SFD 0 "nonimmediate_operand" "=w,w ,?r,w,w ,w ,w,m,r,m ,r,r") ++ (match_operand:SFD 1 "general_operand" "Y ,?rY, w,w,Ufc,Uvi,m,w,m,rY,r,M")) ++ "TARGET_FLOAT && (register_operand (operands0, <MODE>mode) ++ || aarch64_reg_or_fp_zero (operands1, <MODE>mode))" + "@ + movi\\t%0.2s, #0 + fmov\\t%s0, %w1 +@@ -1500,11 +1500,11 @@ + (set_attr "arch" "simd,*,*,*,*,simd,*,*,*,*,*,*") + ) + +-(define_insn "*movdf_aarch64" +- (set (match_operand:DF 0 "nonimmediate_operand" "=w, w ,?r,w,w ,w ,w,m,r,m ,r,r") +- (match_operand:DF 1 "general_operand" "Y , ?rY, w,w,Ufc,Uvi,m,w,m,rY,r,N")) +- "TARGET_FLOAT && (register_operand (operands0, DFmode) +- || aarch64_reg_or_fp_zero (operands1, DFmode))" ++(define_insn "*mov<mode>_aarch64" ++ (set (match_operand:DFD 0 "nonimmediate_operand" "=w, w ,?r,w,w ,w ,w,m,r,m ,r,r") ++ (match_operand:DFD 1 "general_operand" "Y , ?rY, w,w,Ufc,Uvi,m,w,m,rY,r,N")) ++ "TARGET_FLOAT && (register_operand (operands0, <MODE>mode) ++ || aarch64_reg_or_fp_zero (operands1, <MODE>mode))" + "@ + movi\\t%d0, #0 + fmov\\t%d0, %x1 +@@ -1545,13 +1545,13 @@ + } + ) + +-(define_insn "*movtf_aarch64" +- (set (match_operand:TF 0 ++(define_insn "*mov<mode>_aarch64" ++ (set (match_operand:TFD 0 + "nonimmediate_operand" "=w,?r ,w ,?r,w,?w,w,m,?r,m ,m") +- (match_operand:TF 1 ++ (match_operand:TFD 1 + "general_operand" " w,?rY,?r,w ,Y,Y ,m,w,m ,?r,Y")) +- "TARGET_FLOAT && (register_operand (operands0, TFmode) +- || aarch64_reg_or_fp_zero (operands1, TFmode))" ++ "TARGET_FLOAT && (register_operand (operands0, <MODE>mode) ++ || aarch64_reg_or_fp_zero (operands1, <MODE>mode))" + "@ + mov\\t%0.16b, %1.16b + # +@@ -1571,8 +1571,8 @@ + ) + + (define_split +- (set (match_operand:TF 0 "register_operand" "") +- (match_operand:TF 1 "nonmemory_operand" "")) ++ (set (match_operand:TFD 0 "register_operand" "") ++ (match_operand:TFD 1 "nonmemory_operand" "")) + "reload_completed && aarch64_split_128bit_move_p (operands0, operands1)" + (const_int 0) + { +diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md +index 967e6b0b1..d0cd1b788 100644 +--- a/gcc/config/aarch64/iterators.md ++++ b/gcc/config/aarch64/iterators.md +@@ -67,14 +67,24 @@ + (define_mode_iterator GPF_TF_F16 HF SF DF TF) + + ;; Iterator for all scalar floating point modes suitable for moving, including +-;; special BF type (HF, SF, DF, TF and BF) +-(define_mode_iterator GPF_TF_F16_MOV HF BF SF DF TF) ++;; special BF type and decimal floating point types (HF, SF, DF, TF, BF, ++;; SD, DD and TD) ++(define_mode_iterator GPF_TF_F16_MOV HF BF SF DF TF SD DD TD) ++ ++;; Iterator for scalar 32bit fp modes (SF, SD) ++(define_mode_iterator SFD SD SF) ++ ++;; Iterator for scalar 64bit fp modes (DF, DD) ++(define_mode_iterator DFD DD DF) ++ ++;; Iterator for scalar 128bit fp modes (TF, TD) ++(define_mode_iterator TFD TD TF) + + ;; Double vector modes. + (define_mode_iterator VDF V2SF V4HF) + +-;; Iterator for all scalar floating point modes (SF, DF and TF) +-(define_mode_iterator GPF_TF SF DF TF) ++;; Iterator for all scalar floating point modes (SF, DF, TF, SD, DD, and TD) ++(define_mode_iterator GPF_TF SF DF TF SD DD TD) + + ;; Integer Advanced SIMD modes. + (define_mode_iterator VDQ_I V8QI V16QI V4HI V8HI V2SI V4SI V2DI) +@@ -301,7 +311,7 @@ + ;; 2 and 4 lane SI modes. + (define_mode_iterator VS V2SI V4SI) + +-(define_mode_iterator TX TI TF) ++(define_mode_iterator TX TI TF TD) + + ;; Advanced SIMD opaque structure modes. + (define_mode_iterator VSTRUCT OI CI XI) +@@ -403,10 +413,10 @@ + V4x8HF V4x4SF V4x2DF V4x8BF) + + ;; Double scalar modes +-(define_mode_iterator DX DI DF) ++(define_mode_iterator DX DI DF DD) + + ;; Duplicate of the above +-(define_mode_iterator DX2 DI DF) ++(define_mode_iterator DX2 DI DF DD) + + ;; Single scalar modes + (define_mode_iterator SX SI SF) +-- +2.33.0 +
View file
_service:tar_scm:0156-LoongArch-Add-descriptions-of-the-compilation-option.patch
Added
@@ -0,0 +1,83 @@ +From 899f1f351ddc0d76bc9d432cfe63b30cfb294860 Mon Sep 17 00:00:00 2001 +From: Lulu Cheng <chenglulu@loongson.cn> +Date: Fri, 25 Oct 2024 06:22:11 +0000 +Subject: PATCH 156/188 LoongArch: Add descriptions of the compilation + options. + +Add descriptions for the compilation options '-mfrecipe' '-mdiv32' +'-mlam-bh' '-mlamcas' and '-mld-seq-sa'. + +gcc/ChangeLog: + + * doc/invoke.texi: Add descriptions for the compilation + options. +--- + gcc/doc/invoke.texi | 45 +++++++++++++++++++++++++++++++++++++++++++-- + 1 file changed, 43 insertions(+), 2 deletions(-) + +diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi +index 5c6515cb1..7f24fe1e2 100644 +--- a/gcc/doc/invoke.texi ++++ b/gcc/doc/invoke.texi +@@ -1008,8 +1008,9 @@ Objective-C and Objective-C++ Dialects}. + -mmax-inline-memcpy-size=@var{n} @gol + -mexplicit-relocs -mno-explicit-relocs @gol + -mdirect-extern-access -mno-direct-extern-access @gol +--mcmodel=@var{code-model} -mrelax -mpass-mrelax-to-as} @gol +--mrecip -mrecip=@var{opt} ++-mcmodel=@var{code-model} -mrelax -mpass-mrelax-to-as @gol ++-mrecip -mrecip=@var{opt} -mfrecipe -mno-frecipe -mdiv32 -mno-div32 @gol ++-mlam-bh -mno-lam-bh -mlamcas -mno-lamcas -mld-seq-sa -mno-ld-seq-sa} + + @emph{M32R/D Options} + @gccoptlist{-m32r2 -m32rx -m32r @gol +@@ -24686,6 +24687,46 @@ Enable the approximation for vectorized reciprocal square root. + So, for example, @option{-mrecip=all,!sqrt} enables + all of the reciprocal approximations, except for scalar square root. + ++@opindex mfrecipe ++@opindex mno-frecipe ++@item -mfrecipe ++@itemx -mno-frecipe ++Use (do not use) @code{frecipe.@{s/d@}} and @code{frsqrte.@{s/d@}} ++instructions. When build with @option{-march=la664}, it is enabled by default. ++The default is @option{-mno-frecipe}. ++ ++@opindex mdiv32 ++@opindex mno-div32 ++@item -mdiv32 ++@itemx -mno-div32 ++Use (do not use) @code{div.wu} and @code{mod.wu} instructions with input ++not sign-extended. When build with @option{-march=la664}, it is enabled by ++default. The default is @option{-mno-div32}. ++ ++@opindex mlam-bh ++@opindex mno-lam-bh ++@item -mlam-bh ++@itemx -mno-lam-bh ++Use (do not use) @code{am@{swap/add@}_db.@{b/h@}} instructions. When build ++with @option{-march=la664}, it is enabled by default. The default is ++@option{-mno-lam-bh}. ++ ++@opindex mlamcas ++@opindex mno-lamcas ++@item -mlamcas ++@itemx -mno-lamcas ++Use (do not use) @code{amcas_db.@{b/h/w/d@}} instructions. When build with ++@option{-march=la664}, it is enabled by default. The default is ++@option{-mno-lamcas}. ++ ++@opindex mld-seq-sa ++@opindex mno-ld-seq-sa ++@item -mld-seq-sa ++@itemx -mno-ld-seq-sa ++Whether a load-load barrier (@code{dbar 0x700}) is needed. When build with ++@option{-march=la664}, it is enabled by default. The default is ++@option{-mno-ld-seq-sa}, the load-load barrier is needed. ++ + @item loongarch-vect-unroll-limit + The vectorizer will use available tuning information to determine whether it + would be beneficial to unroll the main vectorized loop and by how much. This +-- +2.43.0 +
View file
_service:tar_scm:0157-Backport-SME-aarch64-Vector-move-fixes-for-nosimd.patch
Added
@@ -0,0 +1,1824 @@ +From 737d2a5f1c5e725b7e5a20075270016ebf56b44c Mon Sep 17 00:00:00 2001 +From: Richard Sandiford <richard.sandiford@arm.com> +Date: Tue, 13 Sep 2022 09:28:49 +0100 +Subject: PATCH 058/157 BackportSME aarch64: Vector move fixes for + +nosimd + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=721c0fb3aca31d3bf8ad6e929eab32e29a427e60 + +This patch fixes various issues around the handling of vectors +and (particularly) vector structures with +nosimd. Previously, +passing and returning structures would trigger an ICE, since: + +* we didn't allow the structure modes to be stored in FPRs + +* we didn't provide +nosimd move patterns + +* splitting the moves into word-sized pieces (the default + strategy without move patterns) doesn't work because the + registers are doubleword sized. + +The patch is a bit of a hodge-podge since a lot of the handling of +moves, register costs, and register legitimacy is so interconnected. +It didn't seem feasible to split things further. + +Some notes: + +* The patch recognises vector and tuple modes based on TARGET_FLOAT + rather than TARGET_SIMD, and instead adds TARGET_SIMD to places + that really do need the vector ISA. This is necessary for the + modes to be handled correctly in register arguments and returns. + +* The 64-bit (DREG) STP peephole required TARGET_SIMD but the + LDP peephole didn't. I think the LDP one is right, since + DREG moves could involve GPRs as well as FPRs. + +* The patch keeps the existing choices of instructions for + TARGET_SIMD, just in case they happen to be better than FMOV + on some uarches. + +* Before the patch, +nosimd Q<->Q moves of 128-bit scalars went via + a GPR, thanks to a secondary reload pattern. This approach might + not be ideal, but there's no reason that 128-bit vectors should + behave differently from 128-bit scalars. The patch therefore + extends the current scalar approach to vectors. + +* Multi-vector LD1 and ST1 require TARGET_SIMD, so the TARGET_FLOAT + structure moves need to use LDP/STP and LDR/STR combinations + instead. That's also what we do for big-endian even with + TARGET_SIMD, so most of the code was already there. The patterns + for structures of 64-bit vectors are identical, but the patterns + for structures of 128-bit vectors need to cope with the lack of + 128-bit Q<->Q moves. + + It isn't feasible to move multi-vector tuples via GPRs, so the + patch moves them via memory instead. This contaminates the port + with its first secondary memory reload. + +gcc/ + + * config/aarch64/aarch64.cc (aarch64_classify_vector_mode): Use + TARGET_FLOAT instead of TARGET_SIMD. + (aarch64_vectorize_related_mode): Restrict ADVSIMD handling to + TARGET_SIMD. + (aarch64_hard_regno_mode_ok): Don't allow tuples of 2 64-bit vectors + in GPRs. + (aarch64_classify_address): Treat little-endian structure moves + like big-endian for TARGET_FLOAT && !TARGET_SIMD. + (aarch64_secondary_memory_needed): New function. + (aarch64_secondary_reload): Handle 128-bit Advanced SIMD vectors + in the same way as TF, TI and TD. + (aarch64_rtx_mult_cost): Restrict ADVSIMD handling to TARGET_SIMD. + (aarch64_rtx_costs): Likewise. + (aarch64_register_move_cost): Treat a pair of 64-bit vectors + separately from a single 128-bit vector. Handle the cost implied + by aarch64_secondary_memory_needed. + (aarch64_simd_valid_immediate): Restrict ADVSIMD handling to + TARGET_SIMD. + (aarch64_expand_vec_perm_const_1): Likewise. + (TARGET_SECONDARY_MEMORY_NEEDED): New macro. + * config/aarch64/iterators.md (VTX): New iterator. + * config/aarch64/aarch64.md (arches): Add fp_q as a synonym of simd. + (arch_enabled): Adjust accordingly. + (@aarch64_reload_mov<TX:mode>): Extend to... + (@aarch64_reload_mov<VTX:mode>): ...this. + * config/aarch64/aarch64-simd.md (mov<mode>): Require TARGET_FLOAT + rather than TARGET_SIMD. + (movmisalign<mode>): Likewise. + (load_pair<DREG:mode><DREG2:mode>): Likewise. + (vec_store_pair<DREG:mode><DREG2:mode>): Likewise. + (load_pair<VQ:mode><VQ2:mode>): Likewise. + (vec_store_pair<VQ:mode><VQ2:mode>): Likewise. + (@aarch64_split_simd_mov<mode>): Likewise. + (aarch64_get_low<mode>): Likewise. + (aarch64_get_high<mode>): Likewise. + (aarch64_get_half<mode>): Likewise. Canonicalize to a move for + lowpart extracts. + (*aarch64_simd_mov<VDMOV:mode>): Require TARGET_FLOAT rather than + TARGET_SIMD. Use different w<-w and r<-w instructions for + !TARGET_SIMD. Disable immediate moves for !TARGET_SIMD but + add an alternative specifically for w<-Z. + (*aarch64_simd_mov<VQMOV:mode>): Require TARGET_FLOAT rather than + TARGET_SIMD. Likewise for the associated define_splits. Disable + FPR moves and immediate moves for !TARGET_SIMD but add an alternative + specifically for w<-Z. + (aarch64_simd_mov_from_<mode>high): Require TARGET_FLOAT rather than + TARGET_SIMD. Restrict the existing alternatives to TARGET_SIMD + but add a new r<-w one for !TARGET_SIMD. + (*aarch64_get_high<mode>): New pattern. + (load_pair_lanes<mode>): Require TARGET_FLOAT rather than TARGET_SIMD. + (store_pair_lanes<mode>): Likewise. + (*aarch64_combine_internal<mode>): Likewise. Restrict existing + w<-w, w<-r and w<-m alternatives to TARGET_SIMD but add a new w<-r + alternative for !TARGET_SIMD. + (*aarch64_combine_internal_be<mode>): Likewise. + (aarch64_combinez<mode>): Require TARGET_FLOAT rather than TARGET_SIMD. + Remove bogus arch attribute. + (*aarch64_combinez_be<mode>): Likewise. + (@aarch64_vec_concat<mode>): Require TARGET_FLOAT rather than + TARGET_SIMD. + (aarch64_combine<mode>): Likewise. + (aarch64_rev_reglist<mode>): Likewise. + (mov<mode>): Likewise. + (*aarch64_be_mov<VSTRUCT_2D:mode>): Extend to TARGET_FLOAT && + !TARGET_SIMD, regardless of endianness. Extend associated + define_splits in the same way, both for this pattern and the + ones below. + (*aarch64_be_mov<VSTRUCT_2Qmode>): Likewise. Restrict w<-w + alternative to TARGET_SIMD. + (*aarch64_be_movoi): Likewise. + (*aarch64_be_movci): Likewise. + (*aarch64_be_movxi): Likewise. + (*aarch64_be_mov<VSTRUCT_4QD:mode>): Extend to TARGET_FLOAT + && !TARGET_SIMD, regardless of endianness. Restrict w<-w alternative + to TARGET_SIMD for tuples of 128-bit vectors. + (*aarch64_be_mov<VSTRUCT_4QD:mode>): Likewise. + * config/aarch64/aarch64-ldpstp.md: Remove TARGET_SIMD condition + from DREG STP peephole. Change TARGET_SIMD to TARGET_FLOAT in + the VQ and VP_2E LDP and STP peepholes. + +gcc/testsuite/ + * gcc.target/aarch64/ldp_stp_20.c: New test. + * gcc.target/aarch64/ldp_stp_21.c: Likewise. + * gcc.target/aarch64/ldp_stp_22.c: Likewise. + * gcc.target/aarch64/ldp_stp_23.c: Likewise. + * gcc.target/aarch64/ldp_stp_24.c: Likewise. + * gcc.target/aarch64/movv16qi_1.c (gpr_to_gpr): New function. + * gcc.target/aarch64/movv8qi_1.c (gpr_to_gpr): Likewise. + * gcc.target/aarch64/movv16qi_2.c: New test. + * gcc.target/aarch64/movv16qi_3.c: Likewise. + * gcc.target/aarch64/movv2di_1.c: Likewise. + * gcc.target/aarch64/movv2x16qi_1.c: Likewise. + * gcc.target/aarch64/movv2x8qi_1.c: Likewise. + * gcc.target/aarch64/movv3x16qi_1.c: Likewise. + * gcc.target/aarch64/movv3x8qi_1.c: Likewise. + * gcc.target/aarch64/movv4x16qi_1.c: Likewise. + * gcc.target/aarch64/movv4x8qi_1.c: Likewise. + * gcc.target/aarch64/movv8qi_2.c: Likewise. + * gcc.target/aarch64/movv8qi_3.c: Likewise. + * gcc.target/aarch64/vect_unary_2.c: Likewise. +--- + gcc/config/aarch64/aarch64-ldpstp.md | 11 +- + gcc/config/aarch64/aarch64-simd.md | 199 +++++++++++------- + gcc/config/aarch64/aarch64.cc | 94 ++++++--- + gcc/config/aarch64/aarch64.md | 11 +- + gcc/config/aarch64/iterators.md | 2 + + gcc/testsuite/gcc.target/aarch64/ldp_stp_20.c | 7 + + gcc/testsuite/gcc.target/aarch64/ldp_stp_21.c | 7 + + gcc/testsuite/gcc.target/aarch64/ldp_stp_22.c | 13 ++ + gcc/testsuite/gcc.target/aarch64/ldp_stp_23.c | 16 ++ + gcc/testsuite/gcc.target/aarch64/ldp_stp_24.c | 16 ++ + gcc/testsuite/gcc.target/aarch64/movv16qi_1.c | 21 ++ + gcc/testsuite/gcc.target/aarch64/movv16qi_2.c | 27 +++ + gcc/testsuite/gcc.target/aarch64/movv16qi_3.c | 30 +++ + gcc/testsuite/gcc.target/aarch64/movv2di_1.c | 103 +++++++++ + .../gcc.target/aarch64/movv2x16qi_1.c | 40 ++++ + .../gcc.target/aarch64/movv2x8qi_1.c | 38 ++++ + .../gcc.target/aarch64/movv3x16qi_1.c | 44 ++++ + .../gcc.target/aarch64/movv3x8qi_1.c | 41 ++++ + .../gcc.target/aarch64/movv4x16qi_1.c | 44 ++++ + .../gcc.target/aarch64/movv4x8qi_1.c | 42 ++++ + gcc/testsuite/gcc.target/aarch64/movv8qi_1.c | 15 ++ + gcc/testsuite/gcc.target/aarch64/movv8qi_2.c | 27 +++ + gcc/testsuite/gcc.target/aarch64/movv8qi_3.c | 30 +++ + .../gcc.target/aarch64/vect_unary_2.c | 5 + + 24 files changed, 774 insertions(+), 109 deletions(-) + create mode 100644 gcc/testsuite/gcc.target/aarch64/ldp_stp_20.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/ldp_stp_21.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/ldp_stp_22.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/ldp_stp_23.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/ldp_stp_24.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/movv16qi_2.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/movv16qi_3.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/movv2di_1.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/movv2x16qi_1.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/movv2x8qi_1.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/movv3x16qi_1.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/movv3x8qi_1.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/movv4x16qi_1.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/movv4x8qi_1.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/movv8qi_2.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/movv8qi_3.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/vect_unary_2.c + +diff --git a/gcc/config/aarch64/aarch64-ldpstp.md b/gcc/config/aarch64/aarch64-ldpstp.md +index ba76a1b78..f8446e212 100644 +--- a/gcc/config/aarch64/aarch64-ldpstp.md ++++ b/gcc/config/aarch64/aarch64-ldpstp.md +@@ -83,8 +83,7 @@ + (match_operand:DREG 1 "register_operand" "")) + (set (match_operand:DREG2 2 "memory_operand" "") + (match_operand:DREG2 3 "register_operand" "")) +- "TARGET_SIMD +- && aarch64_operands_ok_for_ldpstp (operands, false, <DREG:MODE>mode)" ++ "aarch64_operands_ok_for_ldpstp (operands, false, <DREG:MODE>mode)" + (parallel (set (match_dup 0) (match_dup 1)) + (set (match_dup 2) (match_dup 3))) + { +@@ -96,7 +95,7 @@ + (match_operand:VQ 1 "memory_operand" "")) + (set (match_operand:VQ2 2 "register_operand" "") + (match_operand:VQ2 3 "memory_operand" "")) +- "TARGET_SIMD ++ "TARGET_FLOAT + && aarch64_operands_ok_for_ldpstp (operands, true, <VQ:MODE>mode) + && (aarch64_tune_params.extra_tuning_flags + & AARCH64_EXTRA_TUNE_NO_LDP_STP_QREGS) == 0" +@@ -111,7 +110,7 @@ + (match_operand:VQ 1 "register_operand" "")) + (set (match_operand:VQ2 2 "memory_operand" "") + (match_operand:VQ2 3 "register_operand" "")) +- "TARGET_SIMD ++ "TARGET_FLOAT + && aarch64_operands_ok_for_ldpstp (operands, false, <VQ:MODE>mode) + && (aarch64_tune_params.extra_tuning_flags + & AARCH64_EXTRA_TUNE_NO_LDP_STP_QREGS) == 0" +@@ -306,7 +305,7 @@ + (set (match_operand:VP_2E 6 "memory_operand" "") + (match_operand:VP_2E 7 "aarch64_reg_or_zero" "")) + (match_dup 8) +- "TARGET_SIMD ++ "TARGET_FLOAT + && aarch64_operands_adjust_ok_for_ldpstp (operands, false, <MODE>mode)" + (const_int 0) + { +@@ -327,7 +326,7 @@ + (set (match_operand:VP_2E 6 "register_operand" "") + (match_operand:VP_2E 7 "memory_operand" "")) + (match_dup 8) +- "TARGET_SIMD ++ "TARGET_FLOAT + && aarch64_operands_adjust_ok_for_ldpstp (operands, true, <MODE>mode)" + (const_int 0) + { +diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md +index a47b39281..ef7fc4ecb 100644 +--- a/gcc/config/aarch64/aarch64-simd.md ++++ b/gcc/config/aarch64/aarch64-simd.md +@@ -21,7 +21,7 @@ + (define_expand "mov<mode>" + (set (match_operand:VALL_F16 0 "nonimmediate_operand") + (match_operand:VALL_F16 1 "general_operand")) +- "TARGET_SIMD" ++ "TARGET_FLOAT" + " + /* Force the operand into a register if it is not an + immediate whose use can be replaced with xzr. +@@ -52,7 +52,7 @@ + (define_expand "movmisalign<mode>" + (set (match_operand:VALL_F16 0 "nonimmediate_operand") + (match_operand:VALL_F16 1 "general_operand")) +- "TARGET_SIMD && !STRICT_ALIGNMENT" ++ "TARGET_FLOAT && !STRICT_ALIGNMENT" + { + /* This pattern is not permitted to fail during expansion: if both arguments + are non-registers (e.g. memory := constant, which can be created by the +@@ -116,10 +116,10 @@ + + (define_insn "*aarch64_simd_mov<VDMOV:mode>" + (set (match_operand:VDMOV 0 "nonimmediate_operand" +- "=w, m, m, w, ?r, ?w, ?r, w") ++ "=w, m, m, w, ?r, ?w, ?r, w, w") + (match_operand:VDMOV 1 "general_operand" +- "m, Dz, w, w, w, r, r, Dn")) +- "TARGET_SIMD ++ "m, Dz, w, w, w, r, r, Dn, Dz")) ++ "TARGET_FLOAT + && (register_operand (operands0, <MODE>mode) + || aarch64_simd_reg_or_zero (operands1, <MODE>mode))" + { +@@ -128,26 +128,34 @@ + case 0: return "ldr\t%d0, %1"; + case 1: return "str\txzr, %0"; + case 2: return "str\t%d1, %0"; +- case 3: return "mov\t%0.<Vbtype>, %1.<Vbtype>"; +- case 4: return "umov\t%0, %1.d0"; ++ case 3: ++ if (TARGET_SIMD) ++ return "mov\t%0.<Vbtype>, %1.<Vbtype>"; ++ return "fmov\t%d0, %d1"; ++ case 4: ++ if (TARGET_SIMD) ++ return "umov\t%0, %1.d0"; ++ return "fmov\t%x0, %d1"; + case 5: return "fmov\t%d0, %1"; + case 6: return "mov\t%0, %1"; + case 7: + return aarch64_output_simd_mov_immediate (operands1, 64); ++ case 8: return "fmov\t%d0, xzr"; + default: gcc_unreachable (); + } + } + (set_attr "type" "neon_load1_1reg<q>, store_8, neon_store1_1reg<q>,\ + neon_logic<q>, neon_to_gp<q>, f_mcr,\ +- mov_reg, neon_move<q>") ++ mov_reg, neon_move<q>, f_mcr") ++ (set_attr "arch" "*,*,*,*,*,*,*,simd,*") + ) + + (define_insn "*aarch64_simd_mov<VQMOV:mode>" + (set (match_operand:VQMOV 0 "nonimmediate_operand" +- "=w, Umn, m, w, ?r, ?w, ?r, w") ++ "=w, Umn, m, w, ?r, ?w, ?r, w, w") + (match_operand:VQMOV 1 "general_operand" +- "m, Dz, w, w, w, r, r, Dn")) +- "TARGET_SIMD ++ "m, Dz, w, w, w, r, r, Dn, Dz")) ++ "TARGET_FLOAT + && (register_operand (operands0, <MODE>mode) + || aarch64_simd_reg_or_zero (operands1, <MODE>mode))" + { +@@ -167,14 +175,17 @@ + return "#"; + case 7: + return aarch64_output_simd_mov_immediate (operands1, 128); ++ case 8: ++ return "fmov\t%d0, xzr"; + default: + gcc_unreachable (); + } + } + (set_attr "type" "neon_load1_1reg<q>, store_16, neon_store1_1reg<q>,\ + neon_logic<q>, multiple, multiple,\ +- multiple, neon_move<q>") +- (set_attr "length" "4,4,4,4,8,8,8,4") ++ multiple, neon_move<q>, fmov") ++ (set_attr "length" "4,4,4,4,8,8,8,4,4") ++ (set_attr "arch" "*,*,*,simd,*,*,*,simd,*") + ) + + ;; When storing lane zero we can use the normal STR and its more permissive +@@ -195,7 +206,7 @@ + (match_operand:DREG 1 "aarch64_mem_pair_operand" "Ump")) + (set (match_operand:DREG2 2 "register_operand" "=w") + (match_operand:DREG2 3 "memory_operand" "m")) +- "TARGET_SIMD ++ "TARGET_FLOAT + && rtx_equal_p (XEXP (operands3, 0), + plus_constant (Pmode, + XEXP (operands1, 0), +@@ -209,7 +220,7 @@ + (match_operand:DREG 1 "register_operand" "w")) + (set (match_operand:DREG2 2 "memory_operand" "=m") + (match_operand:DREG2 3 "register_operand" "w")) +- "TARGET_SIMD ++ "TARGET_FLOAT + && rtx_equal_p (XEXP (operands2, 0), + plus_constant (Pmode, + XEXP (operands0, 0), +@@ -223,7 +234,7 @@ + (match_operand:VQ 1 "aarch64_mem_pair_operand" "Ump")) + (set (match_operand:VQ2 2 "register_operand" "=w") + (match_operand:VQ2 3 "memory_operand" "m")) +- "TARGET_SIMD ++ "TARGET_FLOAT + && rtx_equal_p (XEXP (operands3, 0), + plus_constant (Pmode, + XEXP (operands1, 0), +@@ -237,10 +248,11 @@ + (match_operand:VQ 1 "register_operand" "w")) + (set (match_operand:VQ2 2 "memory_operand" "=m") + (match_operand:VQ2 3 "register_operand" "w")) +- "TARGET_SIMD && rtx_equal_p (XEXP (operands2, 0), +- plus_constant (Pmode, +- XEXP (operands0, 0), +- GET_MODE_SIZE (<VQ:MODE>mode)))" ++ "TARGET_FLOAT ++ && rtx_equal_p (XEXP (operands2, 0), ++ plus_constant (Pmode, ++ XEXP (operands0, 0), ++ GET_MODE_SIZE (<VQ:MODE>mode)))" + "stp\\t%q1, %q3, %z0" + (set_attr "type" "neon_stp_q") + ) +@@ -248,8 +260,9 @@ + + (define_split + (set (match_operand:VQMOV 0 "register_operand" "") +- (match_operand:VQMOV 1 "register_operand" "")) +- "TARGET_SIMD && reload_completed ++ (match_operand:VQMOV 1 "register_operand" "")) ++ "TARGET_FLOAT ++ && reload_completed + && GP_REGNUM_P (REGNO (operands0)) + && GP_REGNUM_P (REGNO (operands1))" + (const_int 0) +@@ -261,7 +274,8 @@ + (define_split + (set (match_operand:VQMOV 0 "register_operand" "") + (match_operand:VQMOV 1 "register_operand" "")) +- "TARGET_SIMD && reload_completed ++ "TARGET_FLOAT ++ && reload_completed + && ((FP_REGNUM_P (REGNO (operands0)) && GP_REGNUM_P (REGNO (operands1))) + || (GP_REGNUM_P (REGNO (operands0)) && FP_REGNUM_P (REGNO (operands1))))" + (const_int 0) +@@ -273,7 +287,7 @@ + (define_expand "@aarch64_split_simd_mov<mode>" + (set (match_operand:VQMOV 0) + (match_operand:VQMOV 1)) +- "TARGET_SIMD" ++ "TARGET_FLOAT" + { + rtx dst = operands0; + rtx src = operands1; +@@ -306,13 +320,20 @@ + (vec_select:<VHALF> + (match_operand:VQMOV 1 "register_operand") + (match_operand 2 "ascending_int_parallel"))) +- "TARGET_SIMD" ++ "TARGET_FLOAT" ++ { ++ if (vect_par_cnst_lo_half (operands2, <MODE>mode)) ++ { ++ emit_move_insn (operands0, gen_lowpart (<VHALF>mode, operands1)); ++ DONE; ++ } ++ } + ) + + (define_expand "aarch64_get_low<mode>" + (match_operand:<VHALF> 0 "register_operand") + (match_operand:VQMOV 1 "register_operand") +- "TARGET_SIMD" ++ "TARGET_FLOAT" + { + rtx lo = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false); + emit_insn (gen_aarch64_get_half<mode> (operands0, operands1, lo)); +@@ -323,7 +344,7 @@ + (define_expand "aarch64_get_high<mode>" + (match_operand:<VHALF> 0 "register_operand") + (match_operand:VQMOV 1 "register_operand") +- "TARGET_SIMD" ++ "TARGET_FLOAT" + { + rtx hi = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true); + emit_insn (gen_aarch64_get_half<mode> (operands0, operands1, hi)); +@@ -350,15 +371,17 @@ + ) + + (define_insn "aarch64_simd_mov_from_<mode>high" +- (set (match_operand:<VHALF> 0 "register_operand" "=w,?r") ++ (set (match_operand:<VHALF> 0 "register_operand" "=w,?r,?r") + (vec_select:<VHALF> +- (match_operand:VQMOV_NO2E 1 "register_operand" "w,w") ++ (match_operand:VQMOV_NO2E 1 "register_operand" "w,w,w") + (match_operand:VQMOV_NO2E 2 "vect_par_cnst_hi_half" ""))) +- "TARGET_SIMD" ++ "TARGET_FLOAT" + "@ +- dup\\t%d0, %1.d1 +- umov\t%0, %1.d1" +- (set_attr "type" "neon_dup<q>,neon_to_gp<q>") ++ dup\t%d0, %1.d1 ++ umov\t%0, %1.d1 ++ fmov\t%0, %1.d1" ++ (set_attr "type" "neon_dup<q>,neon_to_gp<q>,f_mrc") ++ (set_attr "arch" "simd,simd,*") + (set_attr "length" "4") + ) + +@@ -4322,12 +4345,22 @@ + (set_attr "type" "neon_to_gp<q>, neon_dup<q>, neon_store1_one_lane<q>") + ) + ++(define_insn "*aarch64_get_high<mode>" ++ (set (match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "=r") ++ (vec_select:<VEL> ++ (match_operand:VQ_2E 1 "register_operand" "w") ++ (parallel (match_operand:SI 2 "immediate_operand")))) ++ "TARGET_FLOAT && ENDIAN_LANE_N (<nunits>, INTVAL (operands2)) == 1" ++ "fmov\t%0, %1.d1" ++ (set_attr "type" "f_mrc") ++) ++ + (define_insn "load_pair_lanes<mode>" + (set (match_operand:<VDBL> 0 "register_operand" "=w") + (vec_concat:<VDBL> + (match_operand:VDCSIF 1 "memory_operand" "Utq") + (match_operand:VDCSIF 2 "memory_operand" "m"))) +- "TARGET_SIMD ++ "TARGET_FLOAT + && aarch64_mergeable_load_pair_p (<VDBL>mode, operands1, operands2)" + "ldr\\t%<single_dtype>0, %1" + (set_attr "type" "neon_load1_1reg<dblq>") +@@ -4357,7 +4390,7 @@ + (vec_concat:<VDBL> + (match_operand:VDCSIF 1 "register_operand" "w, r") + (match_operand:VDCSIF 2 "register_operand" "w, r"))) +- "TARGET_SIMD" ++ "TARGET_FLOAT" + "@ + stp\t%<single_type>1, %<single_type>2, %y0 + stp\t%<single_wx>1, %<single_wx>2, %y0" +@@ -4372,39 +4405,44 @@ + ;; the register alternatives either don't accept or themselves disparage. + + (define_insn "*aarch64_combine_internal<mode>" +- (set (match_operand:<VDBL> 0 "aarch64_reg_or_mem_pair_operand" "=w, w, w, Umn, Umn") ++ (set (match_operand:<VDBL> 0 "aarch64_reg_or_mem_pair_operand" "=w, w, w, w, Umn, Umn") + (vec_concat:<VDBL> +- (match_operand:VDCSIF 1 "register_operand" "0, 0, 0, ?w, ?r") +- (match_operand:VDCSIF 2 "aarch64_simd_nonimmediate_operand" "w, ?r, Utv, w, ?r"))) +- "TARGET_SIMD ++ (match_operand:VDCSIF 1 "register_operand" "0, 0, 0, 0, ?w, ?r") ++ (match_operand:VDCSIF 2 "aarch64_simd_nonimmediate_operand" "w, ?r, ?r, Utv, w, ?r"))) ++ "TARGET_FLOAT + && !BYTES_BIG_ENDIAN + && (register_operand (operands0, <VDBL>mode) + || register_operand (operands2, <MODE>mode))" + "@ + ins\t%0.<single_type>1, %2.<single_type>0 + ins\t%0.<single_type>1, %<single_wx>2 ++ fmov\t%0.d1, %2 + ld1\t{%0.<single_type>}1, %2 + stp\t%<single_type>1, %<single_type>2, %y0 + stp\t%<single_wx>1, %<single_wx>2, %y0" +- (set_attr "type" "neon_ins<dblq>, neon_from_gp<dblq>, neon_load1_one_lane<dblq>, neon_stp, store_16") ++ (set_attr "type" "neon_ins<dblq>, neon_from_gp<dblq>, f_mcr, ++ neon_load1_one_lane<dblq>, neon_stp, store_16") ++ (set_attr "arch" "simd,simd,*,simd,*,*") + ) + + (define_insn "*aarch64_combine_internal_be<mode>" +- (set (match_operand:<VDBL> 0 "aarch64_reg_or_mem_pair_operand" "=w, w, w, Umn, Umn") ++ (set (match_operand:<VDBL> 0 "aarch64_reg_or_mem_pair_operand" "=w, w, w, w, Umn, Umn") + (vec_concat:<VDBL> +- (match_operand:VDCSIF 2 "aarch64_simd_nonimmediate_operand" "w, ?r, Utv, ?w, ?r") +- (match_operand:VDCSIF 1 "register_operand" "0, 0, 0, ?w, ?r"))) +- "TARGET_SIMD ++ (match_operand:VDCSIF 2 "aarch64_simd_nonimmediate_operand" "w, ?r, ?r, Utv, ?w, ?r") ++ (match_operand:VDCSIF 1 "register_operand" "0, 0, 0, 0, ?w, ?r"))) ++ "TARGET_FLOAT + && BYTES_BIG_ENDIAN + && (register_operand (operands0, <VDBL>mode) + || register_operand (operands2, <MODE>mode))" + "@ + ins\t%0.<single_type>1, %2.<single_type>0 + ins\t%0.<single_type>1, %<single_wx>2 ++ fmov\t%0.d1, %2 + ld1\t{%0.<single_type>}1, %2 + stp\t%<single_type>2, %<single_type>1, %y0 + stp\t%<single_wx>2, %<single_wx>1, %y0" +- (set_attr "type" "neon_ins<dblq>, neon_from_gp<dblq>, neon_load1_one_lane<dblq>, neon_stp, store_16") ++ (set_attr "type" "neon_ins<dblq>, neon_from_gp<dblq>, f_mcr, neon_load1_one_lane<dblq>, neon_stp, store_16") ++ (set_attr "arch" "simd,simd,*,simd,*,*") + ) + + ;; In this insn, operand 1 should be low, and operand 2 the high part of the +@@ -4415,13 +4453,12 @@ + (vec_concat:<VDBL> + (match_operand:VDCSIF 1 "nonimmediate_operand" "w,?r,m") + (match_operand:VDCSIF 2 "aarch64_simd_or_scalar_imm_zero"))) +- "TARGET_SIMD && !BYTES_BIG_ENDIAN" ++ "TARGET_FLOAT && !BYTES_BIG_ENDIAN" + "@ + fmov\\t%<single_type>0, %<single_type>1 + fmov\t%<single_type>0, %<single_wx>1 + ldr\\t%<single_type>0, %1" +- (set_attr "type" "neon_move<q>, neon_from_gp, neon_load1_1reg") +- (set_attr "arch" "simd,fp,simd") ++ (set_attr "type" "neon_move<q>, neon_from_gp, neon_load1_1reg") + ) + + (define_insn "*aarch64_combinez_be<mode>" +@@ -4429,13 +4466,12 @@ + (vec_concat:<VDBL> + (match_operand:VDCSIF 2 "aarch64_simd_or_scalar_imm_zero") + (match_operand:VDCSIF 1 "nonimmediate_operand" "w,?r,m"))) +- "TARGET_SIMD && BYTES_BIG_ENDIAN" ++ "TARGET_FLOAT && BYTES_BIG_ENDIAN" + "@ + fmov\\t%<single_type>0, %<single_type>1 + fmov\t%<single_type>0, %<single_wx>1 + ldr\\t%<single_type>0, %1" +- (set_attr "type" "neon_move<q>, neon_from_gp, neon_load1_1reg") +- (set_attr "arch" "simd,fp,simd") ++ (set_attr "type" "neon_move<q>, neon_from_gp, neon_load1_1reg") + ) + + ;; Form a vector whose first half (in array order) comes from operand 1 +@@ -4446,7 +4482,7 @@ + (vec_concat:<VDBL> + (match_operand:VDCSIF 1 "general_operand") + (match_operand:VDCSIF 2 "general_operand"))) +- "TARGET_SIMD" ++ "TARGET_FLOAT" + { + int lo = BYTES_BIG_ENDIAN ? 2 : 1; + int hi = BYTES_BIG_ENDIAN ? 1 : 2; +@@ -4464,7 +4500,7 @@ + } + else + { +- /* Use *aarch64_combine_general<mode>. */ ++ /* Use *aarch64_combine_internal<mode>. */ + operandslo = force_reg (<MODE>mode, operandslo); + if (!aarch64_simd_nonimmediate_operand (operandshi, <MODE>mode)) + { +@@ -4486,7 +4522,7 @@ + (match_operand:<VDBL> 0 "register_operand") + (match_operand:VDC 1 "general_operand") + (match_operand:VDC 2 "general_operand") +- "TARGET_SIMD" ++ "TARGET_FLOAT" + { + if (BYTES_BIG_ENDIAN) + std::swap (operands1, operands2); +@@ -7367,7 +7403,7 @@ + (define_expand "mov<mode>" + (set (match_operand:VSTRUCT_QD 0 "nonimmediate_operand") + (match_operand:VSTRUCT_QD 1 "general_operand")) +- "TARGET_SIMD" ++ "TARGET_FLOAT" + { + if (can_create_pseudo_p ()) + { +@@ -7379,7 +7415,7 @@ + (define_expand "mov<mode>" + (set (match_operand:VSTRUCT 0 "nonimmediate_operand") + (match_operand:VSTRUCT 1 "general_operand")) +- "TARGET_SIMD" ++ "TARGET_FLOAT" + { + if (can_create_pseudo_p ()) + { +@@ -7559,7 +7595,8 @@ + (define_insn "*aarch64_be_mov<mode>" + (set (match_operand:VSTRUCT_2D 0 "nonimmediate_operand" "=w,m,w") + (match_operand:VSTRUCT_2D 1 "general_operand" " w,w,m")) +- "TARGET_SIMD && BYTES_BIG_ENDIAN ++ "TARGET_FLOAT ++ && (!TARGET_SIMD || BYTES_BIG_ENDIAN) + && (register_operand (operands0, <MODE>mode) + || register_operand (operands1, <MODE>mode))" + "@ +@@ -7573,7 +7610,8 @@ + (define_insn "*aarch64_be_mov<mode>" + (set (match_operand:VSTRUCT_2Q 0 "nonimmediate_operand" "=w,m,w") + (match_operand:VSTRUCT_2Q 1 "general_operand" " w,w,m")) +- "TARGET_SIMD && BYTES_BIG_ENDIAN ++ "TARGET_FLOAT ++ && (!TARGET_SIMD || BYTES_BIG_ENDIAN) + && (register_operand (operands0, <MODE>mode) + || register_operand (operands1, <MODE>mode))" + "@ +@@ -7581,13 +7619,15 @@ + stp\\t%q1, %R1, %0 + ldp\\t%q0, %R0, %1" + (set_attr "type" "multiple,neon_stp_q,neon_ldp_q") ++ (set_attr "arch" "simd,*,*") + (set_attr "length" "8,4,4") + ) + + (define_insn "*aarch64_be_movoi" + (set (match_operand:OI 0 "nonimmediate_operand" "=w,m,w") + (match_operand:OI 1 "general_operand" " w,w,m")) +- "TARGET_SIMD && BYTES_BIG_ENDIAN ++ "TARGET_FLOAT ++ && (!TARGET_SIMD || BYTES_BIG_ENDIAN) + && (register_operand (operands0, OImode) + || register_operand (operands1, OImode))" + "@ +@@ -7595,57 +7635,66 @@ + stp\\t%q1, %R1, %0 + ldp\\t%q0, %R0, %1" + (set_attr "type" "multiple,neon_stp_q,neon_ldp_q") ++ (set_attr "arch" "simd,*,*") + (set_attr "length" "8,4,4") + ) + + (define_insn "*aarch64_be_mov<mode>" + (set (match_operand:VSTRUCT_3QD 0 "nonimmediate_operand" "=w,o,w") + (match_operand:VSTRUCT_3QD 1 "general_operand" " w,w,o")) +- "TARGET_SIMD && BYTES_BIG_ENDIAN ++ "TARGET_FLOAT ++ && (!TARGET_SIMD || BYTES_BIG_ENDIAN) + && (register_operand (operands0, <MODE>mode) + || register_operand (operands1, <MODE>mode))" + "#" + (set_attr "type" "multiple") ++ (set_attr "arch" "fp<q>,*,*") + (set_attr "length" "12,8,8") + ) + + (define_insn "*aarch64_be_movci" + (set (match_operand:CI 0 "nonimmediate_operand" "=w,o,w") + (match_operand:CI 1 "general_operand" " w,w,o")) +- "TARGET_SIMD && BYTES_BIG_ENDIAN ++ "TARGET_FLOAT ++ && (!TARGET_SIMD || BYTES_BIG_ENDIAN) + && (register_operand (operands0, CImode) + || register_operand (operands1, CImode))" + "#" + (set_attr "type" "multiple") +- (set_attr "length" "12,4,4") ++ (set_attr "arch" "simd,*,*") ++ (set_attr "length" "12,8,8") + ) + + (define_insn "*aarch64_be_mov<mode>" + (set (match_operand:VSTRUCT_4QD 0 "nonimmediate_operand" "=w,o,w") + (match_operand:VSTRUCT_4QD 1 "general_operand" " w,w,o")) +- "TARGET_SIMD && BYTES_BIG_ENDIAN ++ "TARGET_FLOAT ++ && (!TARGET_SIMD || BYTES_BIG_ENDIAN) + && (register_operand (operands0, <MODE>mode) + || register_operand (operands1, <MODE>mode))" + "#" + (set_attr "type" "multiple") ++ (set_attr "arch" "fp<q>,*,*") + (set_attr "length" "16,8,8") + ) + + (define_insn "*aarch64_be_movxi" + (set (match_operand:XI 0 "nonimmediate_operand" "=w,o,w") + (match_operand:XI 1 "general_operand" " w,w,o")) +- "TARGET_SIMD && BYTES_BIG_ENDIAN ++ "TARGET_FLOAT ++ && (!TARGET_SIMD || BYTES_BIG_ENDIAN) + && (register_operand (operands0, XImode) + || register_operand (operands1, XImode))" + "#" + (set_attr "type" "multiple") +- (set_attr "length" "16,4,4") ++ (set_attr "arch" "simd,*,*") ++ (set_attr "length" "16,8,8") + ) + + (define_split + (set (match_operand:VSTRUCT_2QD 0 "register_operand") + (match_operand:VSTRUCT_2QD 1 "register_operand")) +- "TARGET_SIMD && reload_completed" ++ "TARGET_FLOAT && reload_completed" + (const_int 0) + { + aarch64_simd_emit_reg_reg_move (operands, <VSTRUCT_ELT>mode, 2); +@@ -7655,7 +7704,7 @@ + (define_split + (set (match_operand:OI 0 "register_operand") + (match_operand:OI 1 "register_operand")) +- "TARGET_SIMD && reload_completed" ++ "TARGET_FLOAT && reload_completed" + (const_int 0) + { + aarch64_simd_emit_reg_reg_move (operands, TImode, 2); +@@ -7665,7 +7714,7 @@ + (define_split + (set (match_operand:VSTRUCT_3QD 0 "nonimmediate_operand") + (match_operand:VSTRUCT_3QD 1 "general_operand")) +- "TARGET_SIMD && reload_completed" ++ "TARGET_FLOAT && reload_completed" + (const_int 0) + { + if (register_operand (operands0, <MODE>mode) +@@ -7674,7 +7723,7 @@ + aarch64_simd_emit_reg_reg_move (operands, <VSTRUCT_ELT>mode, 3); + DONE; + } +- else if (BYTES_BIG_ENDIAN) ++ else if (!TARGET_SIMD || BYTES_BIG_ENDIAN) + { + int elt_size = GET_MODE_SIZE (<MODE>mode).to_constant () / <nregs>; + machine_mode pair_mode = elt_size == 16 ? V2x16QImode : V2x8QImode; +@@ -7701,7 +7750,7 @@ + (define_split + (set (match_operand:CI 0 "nonimmediate_operand") + (match_operand:CI 1 "general_operand")) +- "TARGET_SIMD && reload_completed" ++ "TARGET_FLOAT && reload_completed" + (const_int 0) + { + if (register_operand (operands0, CImode) +@@ -7710,7 +7759,7 @@ + aarch64_simd_emit_reg_reg_move (operands, TImode, 3); + DONE; + } +- else if (BYTES_BIG_ENDIAN) ++ else if (!TARGET_SIMD || BYTES_BIG_ENDIAN) + { + emit_move_insn (simplify_gen_subreg (OImode, operands0, CImode, 0), + simplify_gen_subreg (OImode, operands1, CImode, 0)); +@@ -7729,7 +7778,7 @@ + (define_split + (set (match_operand:VSTRUCT_4QD 0 "nonimmediate_operand") + (match_operand:VSTRUCT_4QD 1 "general_operand")) +- "TARGET_SIMD && reload_completed" ++ "TARGET_FLOAT && reload_completed" + (const_int 0) + { + if (register_operand (operands0, <MODE>mode) +@@ -7738,7 +7787,7 @@ + aarch64_simd_emit_reg_reg_move (operands, <VSTRUCT_ELT>mode, 4); + DONE; + } +- else if (BYTES_BIG_ENDIAN) ++ else if (!TARGET_SIMD || BYTES_BIG_ENDIAN) + { + int elt_size = GET_MODE_SIZE (<MODE>mode).to_constant () / <nregs>; + machine_mode pair_mode = elt_size == 16 ? V2x16QImode : V2x8QImode; +@@ -7759,7 +7808,7 @@ + (define_split + (set (match_operand:XI 0 "nonimmediate_operand") + (match_operand:XI 1 "general_operand")) +- "TARGET_SIMD && reload_completed" ++ "TARGET_FLOAT && reload_completed" + (const_int 0) + { + if (register_operand (operands0, XImode) +@@ -7768,7 +7817,7 @@ + aarch64_simd_emit_reg_reg_move (operands, TImode, 4); + DONE; + } +- else if (BYTES_BIG_ENDIAN) ++ else if (!TARGET_SIMD || BYTES_BIG_ENDIAN) + { + emit_move_insn (simplify_gen_subreg (OImode, operands0, XImode, 0), + simplify_gen_subreg (OImode, operands1, XImode, 0)); +diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc +index 02210ed13..b4b646fa0 100644 +--- a/gcc/config/aarch64/aarch64.cc ++++ b/gcc/config/aarch64/aarch64.cc +@@ -3748,7 +3748,7 @@ aarch64_classify_vector_mode (machine_mode mode) + case E_OImode: + case E_CImode: + case E_XImode: +- return TARGET_SIMD ? VEC_ADVSIMD | VEC_STRUCT : 0; ++ return TARGET_FLOAT ? VEC_ADVSIMD | VEC_STRUCT : 0; + + /* Structures of 64-bit Advanced SIMD vectors. */ + case E_V2x8QImode: +@@ -3775,7 +3775,7 @@ aarch64_classify_vector_mode (machine_mode mode) + case E_V4x4HFmode: + case E_V4x2SFmode: + case E_V4x1DFmode: +- return TARGET_SIMD ? VEC_ADVSIMD | VEC_STRUCT | VEC_PARTIAL : 0; ++ return TARGET_FLOAT ? VEC_ADVSIMD | VEC_STRUCT | VEC_PARTIAL : 0; + + /* Structures of 128-bit Advanced SIMD vectors. */ + case E_V2x16QImode: +@@ -3802,7 +3802,7 @@ aarch64_classify_vector_mode (machine_mode mode) + case E_V4x8HFmode: + case E_V4x4SFmode: + case E_V4x2DFmode: +- return TARGET_SIMD ? VEC_ADVSIMD | VEC_STRUCT : 0; ++ return TARGET_FLOAT ? VEC_ADVSIMD | VEC_STRUCT : 0; + + /* 64-bit Advanced SIMD vectors. */ + case E_V8QImode: +@@ -3822,7 +3822,7 @@ aarch64_classify_vector_mode (machine_mode mode) + case E_V8BFmode: + case E_V4SFmode: + case E_V2DFmode: +- return TARGET_SIMD ? VEC_ADVSIMD : 0; ++ return TARGET_FLOAT ? VEC_ADVSIMD : 0; + + default: + return 0; +@@ -4110,7 +4110,8 @@ aarch64_vectorize_related_mode (machine_mode vector_mode, + } + + /* Prefer to use 1 128-bit vector instead of 2 64-bit vectors. */ +- if ((vec_flags & VEC_ADVSIMD) ++ if (TARGET_SIMD ++ && (vec_flags & VEC_ADVSIMD) + && known_eq (nunits, 0U) + && known_eq (GET_MODE_BITSIZE (vector_mode), 64U) + && maybe_ge (GET_MODE_BITSIZE (element_mode) +@@ -4208,7 +4209,7 @@ aarch64_hard_regno_mode_ok (unsigned regno, machine_mode mode) + + if (GP_REGNUM_P (regno)) + { +- if (vec_flags & VEC_ANY_SVE) ++ if (vec_flags & (VEC_ANY_SVE | VEC_STRUCT)) + return false; + if (known_le (GET_MODE_SIZE (mode), 8)) + return true; +@@ -10884,7 +10885,8 @@ aarch64_classify_address (struct aarch64_address_info *info, + || mode == TImode + || mode == TFmode + || mode == TDmode +- || (BYTES_BIG_ENDIAN && advsimd_struct_p)); ++ || ((!TARGET_SIMD || BYTES_BIG_ENDIAN) ++ && advsimd_struct_p)); + /* If we are dealing with ADDR_QUERY_LDP_STP_N that means the incoming mode + corresponds to the actual size of the memory being loaded/stored and the + mode of the corresponding addressing mode is half of that. */ +@@ -10914,6 +10916,7 @@ aarch64_classify_address (struct aarch64_address_info *info, + /* On LE, for AdvSIMD, don't support anything other than POST_INC or + REG addressing. */ + if (advsimd_struct_p ++ && TARGET_SIMD + && !BYTES_BIG_ENDIAN + && (code != POST_INC && code != REG)) + return false; +@@ -10976,7 +10979,7 @@ aarch64_classify_address (struct aarch64_address_info *info, + && aarch64_offset_7bit_signed_scaled_p (DImode, offset + 48)); + + /* A 7bit offset check because OImode will emit a ldp/stp +- instruction (only big endian will get here). ++ instruction (only !TARGET_SIMD or big endian will get here). + For ldp/stp instructions, the offset is scaled for the size of a + single element of the pair. */ + if (aarch64_advsimd_partial_struct_mode_p (mode) +@@ -10987,7 +10990,8 @@ aarch64_classify_address (struct aarch64_address_info *info, + return aarch64_offset_7bit_signed_scaled_p (TImode, offset); + + /* Three 9/12 bit offsets checks because CImode will emit three +- ldr/str instructions (only big endian will get here). */ ++ ldr/str instructions (only !TARGET_SIMD or big endian will ++ get here). */ + if (aarch64_advsimd_partial_struct_mode_p (mode) + && known_eq (GET_MODE_SIZE (mode), 24)) + return (aarch64_offset_7bit_signed_scaled_p (DImode, offset) +@@ -12716,18 +12720,16 @@ aarch64_secondary_reload (bool in_p ATTRIBUTE_UNUSED, rtx x, + /* Use aarch64_sve_reload_mem for SVE memory reloads that cannot use + LDR and STR. See the comment at the head of aarch64-sve.md for + more details about the big-endian handling. */ ++ unsigned int vec_flags = aarch64_classify_vector_mode (mode); + if (reg_class_subset_p (rclass, FP_REGS) + && !((REG_P (x) && HARD_REGISTER_P (x)) + || aarch64_simd_valid_immediate (x, NULL)) +- && mode != VNx16QImode) ++ && mode != VNx16QImode ++ && (vec_flags & VEC_SVE_DATA) ++ && ((vec_flags & VEC_PARTIAL) || BYTES_BIG_ENDIAN)) + { +- unsigned int vec_flags = aarch64_classify_vector_mode (mode); +- if ((vec_flags & VEC_SVE_DATA) +- && ((vec_flags & VEC_PARTIAL) || BYTES_BIG_ENDIAN)) +- { +- sri->icode = CODE_FOR_aarch64_sve_reload_mem; +- return NO_REGS; +- } ++ sri->icode = CODE_FOR_aarch64_sve_reload_mem; ++ return NO_REGS; + } + + /* If we have to disable direct literal pool loads and stores because the +@@ -12744,9 +12746,13 @@ aarch64_secondary_reload (bool in_p ATTRIBUTE_UNUSED, rtx x, + /* Without the TARGET_SIMD instructions we cannot move a Q register + to a Q register directly. We need a scratch. */ + if (REG_P (x) +- && (mode == TFmode || mode == TImode || mode == TDmode) ++ && (mode == TFmode ++ || mode == TImode ++ || mode == TDmode ++ || (vec_flags == VEC_ADVSIMD && known_eq (GET_MODE_SIZE (mode), 16))) + && mode == GET_MODE (x) +- && FP_REGNUM_P (REGNO (x)) && !TARGET_SIMD ++ && !TARGET_SIMD ++ && FP_REGNUM_P (REGNO (x)) + && reg_class_subset_p (rclass, FP_REGS)) + { + sri->icode = code_for_aarch64_reload_mov (mode); +@@ -12768,6 +12774,28 @@ aarch64_secondary_reload (bool in_p ATTRIBUTE_UNUSED, rtx x, + return NO_REGS; + } + ++/* Implement TARGET_SECONDARY_MEMORY_NEEDED. */ ++ ++static bool ++aarch64_secondary_memory_needed (machine_mode mode, reg_class_t class1, ++ reg_class_t class2) ++{ ++ if (!TARGET_SIMD ++ && reg_classes_intersect_p (class1, FP_REGS) ++ && reg_classes_intersect_p (class2, FP_REGS)) ++ { ++ /* We can't do a 128-bit FPR-to-FPR move without TARGET_SIMD, ++ so we can't easily split a move involving tuples of 128-bit ++ vectors. Force the copy through memory instead. ++ ++ (Tuples of 64-bit vectors are fine.) */ ++ unsigned int vec_flags = aarch64_classify_vector_mode (mode); ++ if (vec_flags == (VEC_ADVSIMD | VEC_STRUCT)) ++ return true; ++ } ++ return false; ++} ++ + static bool + aarch64_can_eliminate (const int from ATTRIBUTE_UNUSED, const int to) + { +@@ -13311,7 +13339,7 @@ aarch64_rtx_mult_cost (rtx x, enum rtx_code code, int outer, bool speed) + if (VECTOR_MODE_P (mode)) + { + unsigned int vec_flags = aarch64_classify_vector_mode (mode); +- if (vec_flags & VEC_ADVSIMD) ++ if (TARGET_SIMD && (vec_flags & VEC_ADVSIMD)) + { + /* The select-operand-high-half versions of the instruction have the + same cost as the three vector version - don't add the costs of the +@@ -14257,7 +14285,7 @@ cost_minus: + { + /* SUBL2 and SUBW2. */ + unsigned int vec_flags = aarch64_classify_vector_mode (mode); +- if (vec_flags & VEC_ADVSIMD) ++ if (TARGET_SIMD && (vec_flags & VEC_ADVSIMD)) + { + /* The select-operand-high-half versions of the sub instruction + have the same cost as the regular three vector version - +@@ -14359,7 +14387,7 @@ cost_plus: + { + /* ADDL2 and ADDW2. */ + unsigned int vec_flags = aarch64_classify_vector_mode (mode); +- if (vec_flags & VEC_ADVSIMD) ++ if (TARGET_SIMD && (vec_flags & VEC_ADVSIMD)) + { + /* The select-operand-high-half versions of the add instruction + have the same cost as the regular three vector version - +@@ -15284,7 +15312,9 @@ aarch64_register_move_cost (machine_mode mode, + return aarch64_register_move_cost (mode, from, GENERAL_REGS) + + aarch64_register_move_cost (mode, GENERAL_REGS, to); + +- if (known_eq (GET_MODE_SIZE (mode), 16)) ++ unsigned int vec_flags = aarch64_classify_vector_mode (mode); ++ if (vec_flags != (VEC_ADVSIMD | VEC_STRUCT | VEC_PARTIAL) ++ && known_eq (GET_MODE_SIZE (mode), 16)) + { + /* 128-bit operations on general registers require 2 instructions. */ + if (from == GENERAL_REGS && to == GENERAL_REGS) +@@ -15312,6 +15342,16 @@ aarch64_register_move_cost (machine_mode mode, + else if (to == GENERAL_REGS) + return regmove_cost->FP2GP; + ++ if (!TARGET_SIMD && vec_flags == (VEC_ADVSIMD | VEC_STRUCT)) ++ { ++ /* Needs a round-trip through memory, which can use LDP/STP for pairs. ++ The cost must be greater than 2 units to indicate that direct ++ moves aren't possible. */ ++ auto per_vector = (aarch64_tune_params.memmov_cost.load_fp ++ + aarch64_tune_params.memmov_cost.store_fp); ++ return MIN (CEIL (per_vector, 2), 4); ++ } ++ + return regmove_cost->FP2FP; + } + +@@ -21504,6 +21544,9 @@ aarch64_simd_valid_immediate (rtx op, simd_immediate_info *info, + if (vec_flags == 0 || vec_flags == (VEC_ADVSIMD | VEC_STRUCT)) + return false; + ++ if ((vec_flags & VEC_ADVSIMD) && !TARGET_SIMD) ++ return false; ++ + if (vec_flags & VEC_SVE_PRED) + return aarch64_sve_pred_valid_immediate (op, info); + +@@ -24430,7 +24473,7 @@ aarch64_expand_vec_perm_const_1 (struct expand_vec_perm_d *d) + std::swap (d->op0, d->op1); + } + +- if ((d->vec_flags == VEC_ADVSIMD ++ if (((d->vec_flags == VEC_ADVSIMD && TARGET_SIMD) + || d->vec_flags == VEC_SVE_DATA + || d->vec_flags == (VEC_SVE_DATA | VEC_PARTIAL) + || d->vec_flags == VEC_SVE_PRED) +@@ -27977,6 +28020,9 @@ aarch64_libgcc_floating_mode_supported_p + #undef TARGET_SECONDARY_RELOAD + #define TARGET_SECONDARY_RELOAD aarch64_secondary_reload + ++#undef TARGET_SECONDARY_MEMORY_NEEDED ++#define TARGET_SECONDARY_MEMORY_NEEDED aarch64_secondary_memory_needed ++ + #undef TARGET_SHIFT_TRUNCATION_MASK + #define TARGET_SHIFT_TRUNCATION_MASK aarch64_shift_truncation_mask + +diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md +index 8757a962f..c0cc91756 100644 +--- a/gcc/config/aarch64/aarch64.md ++++ b/gcc/config/aarch64/aarch64.md +@@ -374,8 +374,11 @@ + ;; Attributes of the architecture required to support the instruction (or + ;; alternative). This attribute is used to compute attribute "enabled", use type + ;; "any" to enable an alternative in all cases. ++;; ++;; As a convenience, "fp_q" means "fp" + the ability to move between ++;; Q registers and is equivalent to "simd". + +-(define_enum "arches" any rcpc8_4 fp simd sve fp16) ++(define_enum "arches" any rcpc8_4 fp fp_q simd sve fp16) + + (define_enum_attr "arch" "arches" (const_string "any")) + +@@ -403,7 +406,7 @@ + (and (eq_attr "arch" "fp") + (match_test "TARGET_FLOAT")) + +- (and (eq_attr "arch" "simd") ++ (and (eq_attr "arch" "fp_q, simd") + (match_test "TARGET_SIMD")) + + (and (eq_attr "arch" "fp16") +@@ -6768,8 +6771,8 @@ + ) + + (define_expand "@aarch64_reload_mov<mode>" +- (set (match_operand:TX 0 "register_operand" "=w") +- (match_operand:TX 1 "register_operand" "w")) ++ (set (match_operand:VTX 0 "register_operand" "=w") ++ (match_operand:VTX 1 "register_operand" "w")) + (clobber (match_operand:DI 2 "register_operand" "=&r")) + + "TARGET_FLOAT" +diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md +index d0cd1b788..a8a39b65a 100644 +--- a/gcc/config/aarch64/iterators.md ++++ b/gcc/config/aarch64/iterators.md +@@ -313,6 +313,8 @@ + + (define_mode_iterator TX TI TF TD) + ++(define_mode_iterator VTX TI TF TD V16QI V8HI V4SI V2DI V8HF V4SF V2DF V8BF) ++ + ;; Advanced SIMD opaque structure modes. + (define_mode_iterator VSTRUCT OI CI XI) + +diff --git a/gcc/testsuite/gcc.target/aarch64/ldp_stp_20.c b/gcc/testsuite/gcc.target/aarch64/ldp_stp_20.c +new file mode 100644 +index 000000000..7e705e119 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/ldp_stp_20.c +@@ -0,0 +1,7 @@ ++/* { dg-options "-O2" } */ ++ ++#pragma GCC target "+nosimd+fp" ++ ++#include "ldp_stp_6.c" ++ ++/* { dg-final { scan-assembler "stp\td\0-9\+, d\0-9\+, \\\x\0-9\+\\\" } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/ldp_stp_21.c b/gcc/testsuite/gcc.target/aarch64/ldp_stp_21.c +new file mode 100644 +index 000000000..462e3c9aa +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/ldp_stp_21.c +@@ -0,0 +1,7 @@ ++/* { dg-options "-O2" } */ ++ ++#pragma GCC target "+nosimd+fp" ++ ++#include "ldp_stp_8.c" ++ ++/* { dg-final { scan-assembler-times "ldp\td\0-9\, d\0-9\+, \\\x\0-9\+\\\" 2 } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/ldp_stp_22.c b/gcc/testsuite/gcc.target/aarch64/ldp_stp_22.c +new file mode 100644 +index 000000000..283c56dd2 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/ldp_stp_22.c +@@ -0,0 +1,13 @@ ++/* { dg-options "-O2" } */ ++ ++#pragma GCC target "+nosimd+fp" ++ ++void ++foo (__Float32x4_t *ptr) ++{ ++ ptr0 = ptr2; ++ ptr1 = ptr3; ++} ++ ++/* { dg-final { scan-assembler {\tldp\tq0-9+, q0-9+} } } */ ++/* { dg-final { scan-assembler {\tstp\tq0-9+, q0-9+} } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/ldp_stp_23.c b/gcc/testsuite/gcc.target/aarch64/ldp_stp_23.c +new file mode 100644 +index 000000000..b14976cfe +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/ldp_stp_23.c +@@ -0,0 +1,16 @@ ++/* { dg-options "-O2" } */ ++ ++#pragma GCC target "+nosimd+fp" ++ ++void ++foo (char *char_ptr) ++{ ++ __Float64x2_t *ptr = (__Float64x2_t *)(char_ptr + 1); ++ asm volatile ("" :: ++ "w" (ptr1), ++ "w" (ptr2), ++ "w" (ptr3), ++ "w" (ptr4)); ++} ++ ++/* { dg-final { scan-assembler-times {\tldp\tq0-9+, q0-9+} 2 } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/ldp_stp_24.c b/gcc/testsuite/gcc.target/aarch64/ldp_stp_24.c +new file mode 100644 +index 000000000..a99426eb2 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/ldp_stp_24.c +@@ -0,0 +1,16 @@ ++/* { dg-options "-O2" } */ ++ ++#pragma GCC target "+nosimd+fp" ++ ++void ++foo (char *char_ptr) ++{ ++ __Float64x2_t *ptr = (__Float64x2_t *)(char_ptr + 1); ++ asm volatile ("" : ++ "=w" (ptr1), ++ "=w" (ptr2), ++ "=w" (ptr3), ++ "=w" (ptr4)); ++} ++ ++/* { dg-final { scan-assembler-times {\tstp\tq0-9+, q0-9+} 2 } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/movv16qi_1.c b/gcc/testsuite/gcc.target/aarch64/movv16qi_1.c +index 8a6afb13b..cac4241b0 100644 +--- a/gcc/testsuite/gcc.target/aarch64/movv16qi_1.c ++++ b/gcc/testsuite/gcc.target/aarch64/movv16qi_1.c +@@ -80,3 +80,24 @@ fpr_to_gpr (v16qi q0) + x0 = q0; + asm volatile ("" :: "r" (x0)); + } ++ ++/* ++** gpr_to_gpr: ++** ( ++** mov x0, x2 ++** mov x1, x3 ++** | ++** mov x1, x3 ++** mov x0, x2 ++** ) ++** ret ++*/ ++void ++gpr_to_gpr () ++{ ++ register v16qi x0 asm ("x0"); ++ register v16qi x2 asm ("x2"); ++ asm volatile ("" : "=r" (x2)); ++ x0 = x2; ++ asm volatile ("" :: "r" (x0)); ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/movv16qi_2.c b/gcc/testsuite/gcc.target/aarch64/movv16qi_2.c +new file mode 100644 +index 000000000..08a0a19b5 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/movv16qi_2.c +@@ -0,0 +1,27 @@ ++/* { dg-do assemble } */ ++/* { dg-options "-O --save-temps" } */ ++ ++#pragma GCC target "+nosimd+fp" ++ ++#define TEST_GENERAL(TYPE) \ ++ TYPE mov_##TYPE (TYPE a, TYPE b) { return b; } \ ++ TYPE zero_##TYPE () { return (TYPE) {}; } \ ++ TYPE load_##TYPE (TYPE *ptr) { return *ptr; } \ ++ void store_##TYPE (TYPE *ptr, TYPE a) { *ptr = a; } ++ ++TEST_GENERAL (__Int8x16_t) ++TEST_GENERAL (__Int16x8_t) ++TEST_GENERAL (__Int32x4_t) ++TEST_GENERAL (__Int64x2_t) ++TEST_GENERAL (__Bfloat16x8_t) ++TEST_GENERAL (__Float16x8_t) ++TEST_GENERAL (__Float32x4_t) ++TEST_GENERAL (__Float64x2_t) ++ ++__Int8x16_t const_s8x8 () { return (__Int8x16_t) { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 }; } ++__Int16x8_t const_s16x4 () { return (__Int16x8_t) { 1, 0, 1, 0, 1, 0, 1, 0 }; } ++__Int32x4_t const_s32x2 () { return (__Int32x4_t) { 1, 2, 3, 4 }; } ++__Int64x2_t const_s64x1 () { return (__Int64x2_t) { 100, 100 }; } ++__Float16x8_t const_f16x4 () { return (__Float16x8_t) { 2, 2, 2, 2, 2, 2, 2, 2 }; } ++__Float32x4_t const_f32x2 () { return (__Float32x4_t) { 1, 2, 1, 2 }; } ++__Float64x2_t const_f64x1 () { return (__Float64x2_t) { 32, 32 }; } +diff --git a/gcc/testsuite/gcc.target/aarch64/movv16qi_3.c b/gcc/testsuite/gcc.target/aarch64/movv16qi_3.c +new file mode 100644 +index 000000000..d43b994c1 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/movv16qi_3.c +@@ -0,0 +1,30 @@ ++/* { dg-do assemble } */ ++/* { dg-options "-O --save-temps" } */ ++/* { dg-final { check-function-bodies "**" "" "" } } */ ++ ++#pragma GCC target "+nosimd+fp" ++ ++#define TEST_VECTOR(TYPE) \ ++ TYPE \ ++ test_##TYPE (void) \ ++ { \ ++ typedef TYPE v __attribute__((aligned(1))); \ ++ register v *ptr asm ("x0"); \ ++ asm volatile ("" : "=r" (ptr)); \ ++ return *ptr; \ ++ } ++ ++TEST_VECTOR (__Int8x16_t) ++TEST_VECTOR (__Int16x8_t) ++TEST_VECTOR (__Int32x4_t) ++TEST_VECTOR (__Int64x2_t) ++TEST_VECTOR (__Bfloat16x8_t) ++TEST_VECTOR (__Float16x8_t) ++TEST_VECTOR (__Float32x4_t) ++TEST_VECTOR (__Float64x2_t) ++ ++/* ++** test___Int8x16_t: ++** ldr q0, \x0\ ++** ret ++*/ +diff --git a/gcc/testsuite/gcc.target/aarch64/movv2di_1.c b/gcc/testsuite/gcc.target/aarch64/movv2di_1.c +new file mode 100644 +index 000000000..e3b55fd52 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/movv2di_1.c +@@ -0,0 +1,103 @@ ++/* { dg-do assemble } */ ++/* { dg-options "-O -mtune=neoverse-v1 --save-temps" } */ ++/* { dg-final { check-function-bodies "**" "" "" } } */ ++ ++#pragma GCC target "+nothing+nosimd+fp" ++ ++typedef long long v2di __attribute__((vector_size(16))); ++ ++/* ++** fpr_to_fpr: ++** sub sp, sp, #16 ++** str q1, \sp\ ++** ldr q0, \sp\ ++** add sp, sp, #?16 ++** ret ++*/ ++v2di ++fpr_to_fpr (v2di q0, v2di q1) ++{ ++ return q1; ++} ++ ++/* ++** gpr_to_fpr: { target aarch64_little_endian } ++** fmov d0, x0 ++** fmov v0.d\1\, x1 ++** ret ++*/ ++/* ++** gpr_to_fpr: { target aarch64_big_endian } ++** fmov d0, x1 ++** fmov v0.d\1\, x0 ++** ret ++*/ ++v2di ++gpr_to_fpr () ++{ ++ register v2di x0 asm ("x0"); ++ asm volatile ("" : "=r" (x0)); ++ return x0; ++} ++ ++/* ++** zero_to_fpr: ++** fmov d0, xzr ++** ret ++*/ ++v2di ++zero_to_fpr () ++{ ++ return (v2di) {}; ++} ++ ++/* ++** fpr_to_gpr: { target aarch64_little_endian } ++** ( ++** fmov x0, d0 ++** fmov x1, v0.d\1\ ++** | ++** fmov x1, v0.d\1\ ++** fmov x0, d0 ++** ) ++** ret ++*/ ++/* ++** fpr_to_gpr: { target aarch64_big_endian } ++** ( ++** fmov x1, d0 ++** fmov x0, v0.d\1\ ++** | ++** fmov x0, v0.d\1\ ++** fmov x1, d0 ++** ) ++** ret ++*/ ++void ++fpr_to_gpr (v2di q0) ++{ ++ register v2di x0 asm ("x0"); ++ x0 = q0; ++ asm volatile ("" :: "r" (x0)); ++} ++ ++/* ++** gpr_to_gpr: ++** ( ++** mov x0, x2 ++** mov x1, x3 ++** | ++** mov x1, x3 ++** mov x0, x2 ++** ) ++** ret ++*/ ++void ++gpr_to_gpr () ++{ ++ register v2di x0 asm ("x0"); ++ register v2di x2 asm ("x2"); ++ asm volatile ("" : "=r" (x2)); ++ x0 = x2; ++ asm volatile ("" :: "r" (x0)); ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/movv2x16qi_1.c b/gcc/testsuite/gcc.target/aarch64/movv2x16qi_1.c +new file mode 100644 +index 000000000..90e3b426d +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/movv2x16qi_1.c +@@ -0,0 +1,40 @@ ++/* { dg-do assemble } */ ++/* { dg-options "-O --save-temps" } */ ++/* { dg-final { check-function-bodies "**" "" "" } } */ ++ ++#pragma GCC aarch64 "arm_neon.h" ++ ++#pragma GCC target "+nosimd+fp" ++ ++#define TEST_VECTOR(TYPE) \ ++ TYPE mov_##TYPE (TYPE a, TYPE b) { return b; } \ ++ TYPE load_##TYPE (TYPE *ptr) { return *ptr; } \ ++ void store_##TYPE (TYPE *ptr, TYPE a) { *ptr = a; } ++ ++TEST_VECTOR (int8x16x2_t) ++TEST_VECTOR (int16x8x2_t) ++TEST_VECTOR (int32x4x2_t) ++TEST_VECTOR (int64x2x2_t) ++TEST_VECTOR (float16x8x2_t) ++TEST_VECTOR (bfloat16x8x2_t) ++TEST_VECTOR (float32x4x2_t) ++TEST_VECTOR (float64x2x2_t) ++ ++/* ++** mov_int8x16x2_t: ++** sub sp, sp, #32 ++** stp q2, q3, \sp\ ++** ldp q0, q1, \sp\ ++** add sp, sp, #?32 ++** ret ++*/ ++/* ++** load_int8x16x2_t: ++** ldp q0, q1, \x0\ ++** ret ++*/ ++/* ++** store_int8x16x2_t: { xfail *-*-* } ++** stp q0, q1, \x0\ ++** ret ++*/ +diff --git a/gcc/testsuite/gcc.target/aarch64/movv2x8qi_1.c b/gcc/testsuite/gcc.target/aarch64/movv2x8qi_1.c +new file mode 100644 +index 000000000..883a0ea71 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/movv2x8qi_1.c +@@ -0,0 +1,38 @@ ++/* { dg-do assemble } */ ++/* { dg-options "-O --save-temps" } */ ++/* { dg-final { check-function-bodies "**" "" "" } } */ ++ ++#pragma GCC aarch64 "arm_neon.h" ++ ++#pragma GCC target "+nosimd+fp" ++ ++#define TEST_VECTOR(TYPE) \ ++ TYPE mov_##TYPE (TYPE a, TYPE b) { return b; } \ ++ TYPE load_##TYPE (TYPE *ptr) { return *ptr; } \ ++ void store_##TYPE (TYPE *ptr, TYPE a) { *ptr = a; } ++ ++TEST_VECTOR (int8x8x2_t) ++TEST_VECTOR (int16x4x2_t) ++TEST_VECTOR (int32x2x2_t) ++TEST_VECTOR (int64x1x2_t) ++TEST_VECTOR (float16x4x2_t) ++TEST_VECTOR (bfloat16x4x2_t) ++TEST_VECTOR (float32x2x2_t) ++TEST_VECTOR (float64x1x2_t) ++ ++/* ++** mov_int8x8x2_t: ++** fmov d0, d2 ++** fmov d1, d3 ++** ret ++*/ ++/* ++** load_int8x8x2_t: ++** ldp d0, d1, \x0\ ++** ret ++*/ ++/* ++** store_int8x8x2_t: ++** stp d0, d1, \x0\ ++** ret ++*/ +diff --git a/gcc/testsuite/gcc.target/aarch64/movv3x16qi_1.c b/gcc/testsuite/gcc.target/aarch64/movv3x16qi_1.c +new file mode 100644 +index 000000000..070a596bf +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/movv3x16qi_1.c +@@ -0,0 +1,44 @@ ++/* { dg-do assemble } */ ++/* { dg-options "-O --save-temps" } */ ++/* { dg-final { check-function-bodies "**" "" "" } } */ ++ ++#pragma GCC aarch64 "arm_neon.h" ++ ++#pragma GCC target "+nosimd+fp" ++ ++#define TEST_VECTOR(TYPE) \ ++ TYPE mov_##TYPE (TYPE a, TYPE b) { return b; } \ ++ TYPE load_##TYPE (TYPE *ptr) { return *ptr; } \ ++ void store_##TYPE (TYPE *ptr, TYPE a) { *ptr = a; } ++ ++TEST_VECTOR (int8x16x3_t) ++TEST_VECTOR (int16x8x3_t) ++TEST_VECTOR (int32x4x3_t) ++TEST_VECTOR (int64x2x3_t) ++TEST_VECTOR (float16x8x3_t) ++TEST_VECTOR (bfloat16x8x3_t) ++TEST_VECTOR (float32x4x3_t) ++TEST_VECTOR (float64x2x3_t) ++ ++/* ++** mov_int8x16x3_t: ++** sub sp, sp, #48 ++** stp q3, q4, \sp\ ++** str q5, \sp, #?32\ ++** ldp q0, q1, \sp\ ++** ldr q2, \sp, #?32\ ++** add sp, sp, #?48 ++** ret ++*/ ++/* ++** load_int8x16x3_t: ++** ldp q0, q1, \x0\ ++** ldr q2, \x0, #?32\ ++** ret ++*/ ++/* ++** store_int8x16x3_t: { xfail *-*-* } ++** stp q0, q1, \x0\ ++** stp q2, \x0, #?32\ ++** ret ++*/ +diff --git a/gcc/testsuite/gcc.target/aarch64/movv3x8qi_1.c b/gcc/testsuite/gcc.target/aarch64/movv3x8qi_1.c +new file mode 100644 +index 000000000..4b873d749 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/movv3x8qi_1.c +@@ -0,0 +1,41 @@ ++/* { dg-do assemble } */ ++/* { dg-options "-O --save-temps" } */ ++/* { dg-final { check-function-bodies "**" "" "" } } */ ++ ++#pragma GCC aarch64 "arm_neon.h" ++ ++#pragma GCC target "+nosimd+fp" ++ ++#define TEST_VECTOR(TYPE) \ ++ TYPE mov_##TYPE (TYPE a, TYPE b) { return b; } \ ++ TYPE load_##TYPE (TYPE *ptr) { return *ptr; } \ ++ void store_##TYPE (TYPE *ptr, TYPE a) { *ptr = a; } ++ ++TEST_VECTOR (int8x8x3_t) ++TEST_VECTOR (int16x4x3_t) ++TEST_VECTOR (int32x2x3_t) ++TEST_VECTOR (int64x1x3_t) ++TEST_VECTOR (float16x4x3_t) ++TEST_VECTOR (bfloat16x4x3_t) ++TEST_VECTOR (float32x2x3_t) ++TEST_VECTOR (float64x1x3_t) ++ ++/* ++** mov_int8x8x3_t: ++** fmov d0, d3 ++** fmov d1, d4 ++** fmov d2, d5 ++** ret ++*/ ++/* ++** load_int8x8x3_t: ++** ldp d0, d1, \x0\ ++** ldr d2, \x0, #?16\ ++** ret ++*/ ++/* ++** store_int8x8x3_t: ++** stp d0, d1, \x0\ ++** str d2, \x0, #?16\ ++** ret ++*/ +diff --git a/gcc/testsuite/gcc.target/aarch64/movv4x16qi_1.c b/gcc/testsuite/gcc.target/aarch64/movv4x16qi_1.c +new file mode 100644 +index 000000000..6a517b4fe +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/movv4x16qi_1.c +@@ -0,0 +1,44 @@ ++/* { dg-do assemble } */ ++/* { dg-options "-O --save-temps" } */ ++/* { dg-final { check-function-bodies "**" "" "" } } */ ++ ++#pragma GCC aarch64 "arm_neon.h" ++ ++#pragma GCC target "+nosimd+fp" ++ ++#define TEST_VECTOR(TYPE) \ ++ TYPE mov_##TYPE (TYPE a, TYPE b) { return b; } \ ++ TYPE load_##TYPE (TYPE *ptr) { return *ptr; } \ ++ void store_##TYPE (TYPE *ptr, TYPE a) { *ptr = a; } ++ ++TEST_VECTOR (int8x16x4_t) ++TEST_VECTOR (int16x8x4_t) ++TEST_VECTOR (int32x4x4_t) ++TEST_VECTOR (int64x2x4_t) ++TEST_VECTOR (float16x8x4_t) ++TEST_VECTOR (bfloat16x8x4_t) ++TEST_VECTOR (float32x4x4_t) ++TEST_VECTOR (float64x2x4_t) ++ ++/* ++** mov_int8x16x4_t: ++** sub sp, sp, #64 ++** stp q4, q5, \sp\ ++** stp q6, q7, \sp, #?32\ ++** ldp q0, q1, \sp\ ++** ldp q2, q3, \sp, #?32\ ++** add sp, sp, #?64 ++** ret ++*/ ++/* ++** load_int8x16x4_t: ++** ldp q0, q1, \x0\ ++** ldp q2, q3, \x0, #?32\ ++** ret ++*/ ++/* ++** store_int8x16x4_t: { xfail *-*-* } ++** stp q0, q1, \x0\ ++** stp q2, q3, \x0, #?32\ ++** ret ++*/ +diff --git a/gcc/testsuite/gcc.target/aarch64/movv4x8qi_1.c b/gcc/testsuite/gcc.target/aarch64/movv4x8qi_1.c +new file mode 100644 +index 000000000..f096be4a5 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/movv4x8qi_1.c +@@ -0,0 +1,42 @@ ++/* { dg-do assemble } */ ++/* { dg-options "-O --save-temps" } */ ++/* { dg-final { check-function-bodies "**" "" "" } } */ ++ ++#pragma GCC aarch64 "arm_neon.h" ++ ++#pragma GCC target "+nosimd+fp" ++ ++#define TEST_VECTOR(TYPE) \ ++ TYPE mov_##TYPE (TYPE a, TYPE b) { return b; } \ ++ TYPE load_##TYPE (TYPE *ptr) { return *ptr; } \ ++ void store_##TYPE (TYPE *ptr, TYPE a) { *ptr = a; } ++ ++TEST_VECTOR (int8x8x4_t) ++TEST_VECTOR (int16x4x4_t) ++TEST_VECTOR (int32x2x4_t) ++TEST_VECTOR (int64x1x4_t) ++TEST_VECTOR (float16x4x4_t) ++TEST_VECTOR (bfloat16x4x4_t) ++TEST_VECTOR (float32x2x4_t) ++TEST_VECTOR (float64x1x4_t) ++ ++/* ++** mov_int8x8x4_t: ++** fmov d0, d4 ++** fmov d1, d5 ++** fmov d2, d6 ++** fmov d3, d7 ++** ret ++*/ ++/* ++** load_int8x8x4_t: ++** ldp d0, d1, \x0\ ++** ldp d2, d3, \x0, #?16\ ++** ret ++*/ ++/* ++** store_int8x8x4_t: ++** stp d0, d1, \x0\ ++** stp d2, d3, \x0, #?16\ ++** ret ++*/ +diff --git a/gcc/testsuite/gcc.target/aarch64/movv8qi_1.c b/gcc/testsuite/gcc.target/aarch64/movv8qi_1.c +index 4c97e6fbc..d2b5d8025 100644 +--- a/gcc/testsuite/gcc.target/aarch64/movv8qi_1.c ++++ b/gcc/testsuite/gcc.target/aarch64/movv8qi_1.c +@@ -53,3 +53,18 @@ fpr_to_gpr (v8qi q0) + x0 = q0; + asm volatile ("" :: "r" (x0)); + } ++ ++/* ++** gpr_to_gpr: ++** mov x0, x1 ++** ret ++*/ ++void ++gpr_to_gpr () ++{ ++ register v8qi x0 asm ("x0"); ++ register v8qi x1 asm ("x1"); ++ asm volatile ("" : "=r" (x1)); ++ x0 = x1; ++ asm volatile ("" :: "r" (x0)); ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/movv8qi_2.c b/gcc/testsuite/gcc.target/aarch64/movv8qi_2.c +new file mode 100644 +index 000000000..0d8576ffe +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/movv8qi_2.c +@@ -0,0 +1,27 @@ ++/* { dg-do assemble } */ ++/* { dg-options "-O --save-temps" } */ ++ ++#pragma GCC target "+nosimd+fp" ++ ++#define TEST_GENERAL(TYPE) \ ++ TYPE mov_##TYPE (TYPE a, TYPE b) { return b; } \ ++ TYPE zero_##TYPE () { return (TYPE) {}; } \ ++ TYPE load_##TYPE (TYPE *ptr) { return *ptr; } \ ++ void store_##TYPE (TYPE *ptr, TYPE a) { *ptr = a; } ++ ++TEST_GENERAL (__Int8x8_t) ++TEST_GENERAL (__Int16x4_t) ++TEST_GENERAL (__Int32x2_t) ++TEST_GENERAL (__Int64x1_t) ++TEST_GENERAL (__Bfloat16x4_t) ++TEST_GENERAL (__Float16x4_t) ++TEST_GENERAL (__Float32x2_t) ++TEST_GENERAL (__Float64x1_t) ++ ++__Int8x8_t const_s8x8 () { return (__Int8x8_t) { 1, 1, 1, 1, 1, 1, 1, 1 }; } ++__Int16x4_t const_s16x4 () { return (__Int16x4_t) { 1, 0, 1, 0 }; } ++__Int32x2_t const_s32x2 () { return (__Int32x2_t) { 1, 2 }; } ++__Int64x1_t const_s64x1 () { return (__Int64x1_t) { 100 }; } ++__Float16x4_t const_f16x4 () { return (__Float16x4_t) { 2, 2, 2, 2 }; } ++__Float32x2_t const_f32x2 () { return (__Float32x2_t) { 1, 2 }; } ++__Float64x1_t const_f64x1 () { return (__Float64x1_t) { 32 }; } +diff --git a/gcc/testsuite/gcc.target/aarch64/movv8qi_3.c b/gcc/testsuite/gcc.target/aarch64/movv8qi_3.c +new file mode 100644 +index 000000000..1caa1a788 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/movv8qi_3.c +@@ -0,0 +1,30 @@ ++/* { dg-do assemble } */ ++/* { dg-options "-O --save-temps" } */ ++/* { dg-final { check-function-bodies "**" "" "" } } */ ++ ++#pragma GCC target "+nosimd+fp" ++ ++#define TEST_VECTOR(TYPE) \ ++ TYPE \ ++ test_##TYPE (void) \ ++ { \ ++ typedef TYPE v __attribute__((aligned(1))); \ ++ register v *ptr asm ("x0"); \ ++ asm volatile ("" : "=r" (ptr)); \ ++ return *ptr; \ ++ } ++ ++TEST_VECTOR (__Int8x8_t) ++TEST_VECTOR (__Int16x4_t) ++TEST_VECTOR (__Int32x2_t) ++TEST_VECTOR (__Int64x1_t) ++TEST_VECTOR (__Bfloat16x4_t) ++TEST_VECTOR (__Float16x4_t) ++TEST_VECTOR (__Float32x2_t) ++TEST_VECTOR (__Float64x1_t) ++ ++/* ++** test___Int8x8_t: ++** ldr d0, \x0\ ++** ret ++*/ +diff --git a/gcc/testsuite/gcc.target/aarch64/vect_unary_2.c b/gcc/testsuite/gcc.target/aarch64/vect_unary_2.c +new file mode 100644 +index 000000000..454ac2771 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/vect_unary_2.c +@@ -0,0 +1,5 @@ ++/* { dg-options "-O3 -fno-math-errno --save-temps" } */ ++ ++#pragma GCC target "+nosimd+fp" ++ ++#include "vect_unary_1.c" +-- +2.33.0 +
View file
_service:tar_scm:0157-LoongArch-Split-loongarch_option_override_internal-i.patch
Added
@@ -0,0 +1,800 @@ +From 6dd3434f004dd1481a3d18fb416b3ddd4151b10f Mon Sep 17 00:00:00 2001 +From: Yang Yujie <yangyujie@loongson.cn> +Date: Sat, 30 Mar 2024 16:43:14 +0800 +Subject: PATCH 157/188 LoongArch: Split loongarch_option_override_internal + into smaller procedures + +gcc/ChangeLog: + + * config/loongarch/genopts/loongarch.opt.in: Mark -mno-recip as + aliases to -mrecip={all,none}, respectively. + * config/loongarch/loongarch.opt: Regenerate. + * config/loongarch/loongarch-def.h (ABI_FPU_64): Rename to... + (ABI_FPU64_P): ...this. + (ABI_FPU_32): Rename to... + (ABI_FPU32_P): ...this. + (ABI_FPU_NONE): Rename to... + (ABI_NOFPU_P): ...this. + (ABI_LP64_P): Define. + * config/loongarch/loongarch.cc (loongarch_init_print_operand_punct): + Merged into loongarch_global_init. + (loongarch_cpu_option_override): Renamed to + loongarch_target_option_override. + (loongarch_option_override_internal): Move the work after + loongarch_config_target into loongarch_target_option_override. + (loongarch_global_init): Define. + (INIT_TARGET_FLAG): Move to loongarch-opts.cc. + (loongarch_option_override): Call loongarch_global_init + separately. + * config/loongarch/loongarch-opts.cc (loongarch_parse_mrecip_scheme): + Split the parsing of -mrecip=<string> from + loongarch_option_override_internal. + (loongarch_generate_mrecip_scheme): Define. Split from + loongarch_option_override_internal. + (loongarch_target_option_override): Define. Renamed from + loongarch_cpu_option_override. + (loongarch_init_misc_options): Define. Split from + loongarch_option_override_internal. + (INIT_TARGET_FLAG): Move from loongarch.cc. + * config/loongarch/loongarch-opts.h (loongarch_target_option_override): + New prototype. + (loongarch_parse_mrecip_scheme): New prototype. + (loongarch_init_misc_options): New prototype. + (TARGET_ABI_LP64): Simplify with ABI_LP64_P. + * config/loongarch/loongarch.h (TARGET_RECIP_DIV): Simplify. + Do not reference specific CPU architecture (LA664). + (TARGET_RECIP_SQRT): Same. + (TARGET_RECIP_RSQRT): Same. + (TARGET_RECIP_VEC_DIV): Same. + (TARGET_RECIP_VEC_SQRT): Same. + (TARGET_RECIP_VEC_RSQRT): Same. +--- + gcc/config/loongarch/genopts/loongarch.opt.in | 8 +- + gcc/config/loongarch/loongarch-def.h | 11 +- + gcc/config/loongarch/loongarch-opts.cc | 253 ++++++++++++++++++ + gcc/config/loongarch/loongarch-opts.h | 27 +- + gcc/config/loongarch/loongarch.cc | 253 +++--------------- + gcc/config/loongarch/loongarch.h | 18 +- + gcc/config/loongarch/loongarch.opt | 8 +- + 7 files changed, 342 insertions(+), 236 deletions(-) + +diff --git a/gcc/config/loongarch/genopts/loongarch.opt.in b/gcc/config/loongarch/genopts/loongarch.opt.in +index 4d6b1902d..9c6f59bb8 100644 +--- a/gcc/config/loongarch/genopts/loongarch.opt.in ++++ b/gcc/config/loongarch/genopts/loongarch.opt.in +@@ -197,14 +197,14 @@ mexplicit-relocs + Target Alias(mexplicit-relocs=, always, none) + Use %reloc() assembly operators (for backward compatibility). + +-mrecip +-Target RejectNegative Var(la_recip) Save +-Generate approximate reciprocal divide and square root for better throughput. +- + mrecip= + Target RejectNegative Joined Var(la_recip_name) Save + Control generation of reciprocal estimates. + ++mrecip ++Target Alias(mrecip=, all, none) ++Generate approximate reciprocal divide and square root for better throughput. ++ + ; The code model option names for -mcmodel. + Enum + Name(cmodel) Type(int) +diff --git a/gcc/config/loongarch/loongarch-def.h b/gcc/config/loongarch/loongarch-def.h +index fdcf43fc7..b1423bcfe 100644 +--- a/gcc/config/loongarch/loongarch-def.h ++++ b/gcc/config/loongarch/loongarch-def.h +@@ -90,11 +90,16 @@ extern loongarch_def_array<const char *, N_ABI_BASE_TYPES> + + #define TO_LP64_ABI_BASE(C) (C) + +-#define ABI_FPU_64(abi_base) \ ++#define ABI_LP64_P(abi_base) \ ++ (abi_base == ABI_BASE_LP64D \ ++ || abi_base == ABI_BASE_LP64F \ ++ || abi_base == ABI_BASE_LP64S) ++ ++#define ABI_FPU64_P(abi_base) \ + (abi_base == ABI_BASE_LP64D) +-#define ABI_FPU_32(abi_base) \ ++#define ABI_FPU32_P(abi_base) \ + (abi_base == ABI_BASE_LP64F) +-#define ABI_FPU_NONE(abi_base) \ ++#define ABI_NOFPU_P(abi_base) \ + (abi_base == ABI_BASE_LP64S) + + +diff --git a/gcc/config/loongarch/loongarch-opts.cc b/gcc/config/loongarch/loongarch-opts.cc +index bdecfaf49..404642a9e 100644 +--- a/gcc/config/loongarch/loongarch-opts.cc ++++ b/gcc/config/loongarch/loongarch-opts.cc +@@ -25,6 +25,7 @@ along with GCC; see the file COPYING3. If not see + #include "coretypes.h" + #include "tm.h" + #include "obstack.h" ++#include "opts.h" + #include "diagnostic-core.h" + + #include "loongarch-cpu.h" +@@ -32,8 +33,12 @@ along with GCC; see the file COPYING3. If not see + #include "loongarch-str.h" + #include "loongarch-def.h" + ++/* Target configuration */ + struct loongarch_target la_target; + ++/* RTL cost information */ ++const struct loongarch_rtx_cost_data *loongarch_cost; ++ + /* ABI-related configuration. */ + #define ABI_COUNT (sizeof(abi_priority_list)/sizeof(struct loongarch_abi)) + static const struct loongarch_abi +@@ -795,3 +800,251 @@ loongarch_update_gcc_opt_status (struct loongarch_target *target, + /* ISA evolution features */ + opts->x_la_isa_evolution = target->isa.evolution; + } ++ ++/* -mrecip=<str> handling */ ++static struct ++ { ++ const char *string; /* option name. */ ++ unsigned int mask; /* mask bits to set. */ ++ } ++const recip_options = { ++ { "all", RECIP_MASK_ALL }, ++ { "none", RECIP_MASK_NONE }, ++ { "div", RECIP_MASK_DIV }, ++ { "sqrt", RECIP_MASK_SQRT }, ++ { "rsqrt", RECIP_MASK_RSQRT }, ++ { "vec-div", RECIP_MASK_VEC_DIV }, ++ { "vec-sqrt", RECIP_MASK_VEC_SQRT }, ++ { "vec-rsqrt", RECIP_MASK_VEC_RSQRT }, ++}; ++ ++/* Parser for -mrecip=<recip_string>. */ ++unsigned int ++loongarch_parse_mrecip_scheme (const char *recip_string) ++{ ++ unsigned int result_mask = RECIP_MASK_NONE; ++ ++ if (recip_string) ++ { ++ char *p = ASTRDUP (recip_string); ++ char *q; ++ unsigned int mask, i; ++ bool invert; ++ ++ while ((q = strtok (p, ",")) != NULL) ++ { ++ p = NULL; ++ if (*q == '!') ++ { ++ invert = true; ++ q++; ++ } ++ else ++ invert = false; ++ ++ if (!strcmp (q, "default")) ++ mask = RECIP_MASK_ALL; ++ else ++ { ++ for (i = 0; i < ARRAY_SIZE (recip_options); i++) ++ if (!strcmp (q, recip_optionsi.string)) ++ { ++ mask = recip_optionsi.mask; ++ break; ++ } ++ ++ if (i == ARRAY_SIZE (recip_options)) ++ { ++ error ("unknown option for %<-mrecip=%s%>", q); ++ invert = false; ++ mask = RECIP_MASK_NONE; ++ } ++ } ++ ++ if (invert) ++ result_mask &= ~mask; ++ else ++ result_mask |= mask; ++ } ++ } ++ return result_mask; ++} ++ ++/* Generate -mrecip= argument based on the mask. */ ++const char* ++loongarch_generate_mrecip_scheme (unsigned int mask) ++{ ++ static char recip_scheme_str128; ++ int p = 0, tmp; ++ ++ switch (mask) ++ { ++ case RECIP_MASK_ALL: ++ return "all"; ++ ++ case RECIP_MASK_NONE: ++ return "none"; ++ } ++ ++ for (unsigned long i = 2; i < ARRAY_SIZE (recip_options); i++) ++ { ++ if (mask & recip_optionsi.mask) ++ { ++ if ((tmp = strlen (recip_optionsi.string) + 1) >= 127 - p) ++ gcc_unreachable (); ++ ++ recip_scheme_strp = ','; ++ strcpy (recip_scheme_str + p + 1, recip_optionsi.string); ++ p += tmp; ++ } ++ } ++ recip_scheme_strp = '\0'; ++ return recip_scheme_str + 1; ++} ++ ++ ++ ++/* Refresh the switches acccording to the resolved loongarch_target struct. */ ++void ++loongarch_target_option_override (struct loongarch_target *target, ++ struct gcc_options *opts, ++ struct gcc_options *opts_set) ++{ ++ loongarch_update_gcc_opt_status (target, opts, opts_set); ++ ++ /* alignments */ ++ if (opts->x_flag_align_functions && !opts->x_str_align_functions) ++ opts->x_str_align_functions ++ = loongarch_cpu_aligntarget->cpu_tune.function; ++ ++ if (opts->x_flag_align_labels && !opts->x_str_align_labels) ++ opts->x_str_align_labels = loongarch_cpu_aligntarget->cpu_tune.label; ++ ++ /* Set up parameters to be used in prefetching algorithm. */ ++ int simultaneous_prefetches ++ = loongarch_cpu_cachetarget->cpu_tune.simultaneous_prefetches; ++ ++ SET_OPTION_IF_UNSET (opts, opts_set, param_simultaneous_prefetches, ++ simultaneous_prefetches); ++ ++ SET_OPTION_IF_UNSET (opts, opts_set, param_l1_cache_line_size, ++ loongarch_cpu_cachetarget->cpu_tune.l1d_line_size); ++ ++ SET_OPTION_IF_UNSET (opts, opts_set, param_l1_cache_size, ++ loongarch_cpu_cachetarget->cpu_tune.l1d_size); ++ ++ SET_OPTION_IF_UNSET (opts, opts_set, param_l2_cache_size, ++ loongarch_cpu_cachetarget->cpu_tune.l2d_size); ++ ++ /* Other arch-specific overrides. */ ++ switch (target->cpu_arch) ++ { ++ case CPU_LA664: ++ /* Enable -mrecipe=all for LA664 by default. */ ++ if (!opts_set->x_recip_mask) ++ { ++ opts->x_recip_mask = RECIP_MASK_ALL; ++ opts_set->x_recip_mask = 1; ++ } ++ } ++ ++ /* -mrecip= */ ++ opts->x_la_recip_name ++ = loongarch_generate_mrecip_scheme (opts->x_recip_mask); ++ ++ /* Decide which rtx_costs structure to use. */ ++ if (opts->x_optimize_size) ++ loongarch_cost = &loongarch_rtx_cost_optimize_size; ++ else ++ loongarch_cost = &loongarch_cpu_rtx_cost_datatarget->cpu_tune; ++ ++ /* If the user hasn't specified a branch cost, use the processor's ++ default. */ ++ if (!opts_set->x_la_branch_cost) ++ opts->x_la_branch_cost = loongarch_cost->branch_cost; ++ ++ /* other stuff */ ++ if (ABI_LP64_P (target->abi.base)) ++ opts->x_flag_pcc_struct_return = 0; ++ ++ switch (target->cmodel) ++ { ++ case CMODEL_EXTREME: ++ if (opts->x_flag_plt) ++ { ++ if (opts_set->x_flag_plt) ++ error ("code model %qs is not compatible with %s", ++ "extreme", "-fplt"); ++ opts->x_flag_plt = 0; ++ } ++ break; ++ ++ case CMODEL_TINY_STATIC: ++ case CMODEL_MEDIUM: ++ case CMODEL_NORMAL: ++ case CMODEL_TINY: ++ case CMODEL_LARGE: ++ break; ++ ++ default: ++ gcc_unreachable (); ++ } ++} ++ ++ ++/* Resolve options that's not covered by la_target. */ ++void ++loongarch_init_misc_options (struct gcc_options *opts, ++ struct gcc_options *opts_set) ++{ ++ if (opts->x_flag_pic) ++ opts->x_g_switch_value = 0; ++ ++ /* -mrecip options. */ ++ opts->x_recip_mask = loongarch_parse_mrecip_scheme (opts->x_la_recip_name); ++ ++#define INIT_TARGET_FLAG(NAME, INIT) \ ++ { \ ++ if (!(opts_set->x_target_flags & MASK_##NAME)) \ ++ { \ ++ if (INIT) \ ++ opts->x_target_flags |= MASK_##NAME; \ ++ else \ ++ opts->x_target_flags &= ~MASK_##NAME; \ ++ } \ ++ } ++ ++ /* Enable conditional moves for int and float by default. */ ++ INIT_TARGET_FLAG (COND_MOVE_INT, 1) ++ INIT_TARGET_FLAG (COND_MOVE_FLOAT, 1) ++ ++ /* Set mrelax default. */ ++ INIT_TARGET_FLAG (LINKER_RELAXATION, ++ HAVE_AS_MRELAX_OPTION && HAVE_AS_COND_BRANCH_RELAXATION) ++ ++#undef INIT_TARGET_FLAG ++ ++ /* Set mexplicit-relocs default. */ ++ if (opts->x_la_opt_explicit_relocs == M_OPT_UNSET) ++ opts->x_la_opt_explicit_relocs = (HAVE_AS_EXPLICIT_RELOCS ++ ? (TARGET_LINKER_RELAXATION ++ ? EXPLICIT_RELOCS_AUTO ++ : EXPLICIT_RELOCS_ALWAYS) ++ : EXPLICIT_RELOCS_NONE); ++ ++ /* Enable sw prefetching at -O3 and higher. */ ++ if (opts->x_flag_prefetch_loop_arrays < 0 ++ && (opts->x_optimize >= 3 || opts->x_flag_profile_use) ++ && !opts->x_optimize_size) ++ opts->x_flag_prefetch_loop_arrays = 1; ++ ++ if (TARGET_DIRECT_EXTERN_ACCESS_OPTS_P (opts) && opts->x_flag_shlib) ++ error ("%qs cannot be used for compiling a shared library", ++ "-mdirect-extern-access"); ++ ++ /* Enforce that interval is the same size as size so the mid-end does the ++ right thing. */ ++ SET_OPTION_IF_UNSET (opts, opts_set, ++ param_stack_clash_protection_probe_interval, ++ param_stack_clash_protection_guard_size); ++} +diff --git a/gcc/config/loongarch/loongarch-opts.h b/gcc/config/loongarch/loongarch-opts.h +index 463812136..177d587da 100644 +--- a/gcc/config/loongarch/loongarch-opts.h ++++ b/gcc/config/loongarch/loongarch-opts.h +@@ -30,6 +30,10 @@ along with GCC; see the file COPYING3. If not see + /* Target configuration */ + extern struct loongarch_target la_target; + ++/* RTL cost information */ ++extern const struct loongarch_rtx_cost_data *loongarch_cost; ++ ++ + /* Initialize loongarch_target from separate option variables. */ + void + loongarch_init_target (struct loongarch_target *target, +@@ -46,11 +50,30 @@ loongarch_config_target (struct loongarch_target *target, + struct loongarch_flags *flags, + int follow_multilib_list_p); + ++ ++/* Refresh the switches acccording to the resolved loongarch_target struct. */ ++void ++loongarch_target_option_override (struct loongarch_target *target, ++ struct gcc_options *opts, ++ struct gcc_options *opts_set); ++ ++ + /* option status feedback for "gcc --help=target -Q" */ + void + loongarch_update_gcc_opt_status (struct loongarch_target *target, + struct gcc_options *opts, + struct gcc_options *opts_set); ++ ++ ++/* Parser for -mrecip=<recip_string>. */ ++unsigned int ++loongarch_parse_mrecip_scheme (const char *recip_string); ++ ++ ++/* Resolve options that's not covered by la_target. */ ++void ++loongarch_init_misc_options (struct gcc_options *opts, ++ struct gcc_options *opts_set); + #endif + + /* Flag status */ +@@ -80,9 +103,7 @@ struct loongarch_flags { + #define TARGET_DOUBLE_FLOAT_ABI (la_target.abi.base == ABI_BASE_LP64D) + + #define TARGET_64BIT (la_target.isa.base == ISA_BASE_LA64) +-#define TARGET_ABI_LP64 (la_target.abi.base == ABI_BASE_LP64D \ +- || la_target.abi.base == ABI_BASE_LP64F \ +- || la_target.abi.base == ABI_BASE_LP64S) ++#define TARGET_ABI_LP64 ABI_LP64_P(la_target.abi.base) + + #define ISA_HAS_LSX \ + (la_target.isa.simd == ISA_EXT_SIMD_LSX \ +diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc +index 8d9cda165..c2f3739d0 100644 +--- a/gcc/config/loongarch/loongarch.cc ++++ b/gcc/config/loongarch/loongarch.cc +@@ -208,9 +208,6 @@ const enum reg_class loongarch_regno_to_classFIRST_PSEUDO_REGISTER = { + FRAME_REGS, FRAME_REGS + }; + +-/* Which cost information to use. */ +-static const struct loongarch_rtx_cost_data *loongarch_cost; +- + /* Information about a single argument. */ + struct loongarch_arg_info + { +@@ -5908,17 +5905,6 @@ loongarch_print_operand_punctuation (FILE *file, int ch) + } + } + +-/* Initialize loongarch_print_operand_punct. */ +- +-static void +-loongarch_init_print_operand_punct (void) +-{ +- const char *p; +- +- for (p = ".$"; *p; p++) +- loongarch_print_operand_punct(unsigned char) *p = true; +-} +- + /* PRINT_OPERAND prefix LETTER refers to the integer branch instruction + associated with condition CODE. Print the condition part of the + opcode to FILE. */ +@@ -7622,118 +7608,15 @@ loongarch_init_machine_status (void) + } + + static void +-loongarch_cpu_option_override (struct loongarch_target *target, +- struct gcc_options *opts, +- struct gcc_options *opts_set) +-{ +- /* alignments */ +- if (opts->x_flag_align_functions && !opts->x_str_align_functions) +- opts->x_str_align_functions +- = loongarch_cpu_aligntarget->cpu_tune.function; +- +- if (opts->x_flag_align_labels && !opts->x_str_align_labels) +- opts->x_str_align_labels = loongarch_cpu_aligntarget->cpu_tune.label; +- +- /* Set up parameters to be used in prefetching algorithm. */ +- int simultaneous_prefetches +- = loongarch_cpu_cachetarget->cpu_tune.simultaneous_prefetches; +- +- SET_OPTION_IF_UNSET (opts, opts_set, param_simultaneous_prefetches, +- simultaneous_prefetches); +- +- SET_OPTION_IF_UNSET (opts, opts_set, param_l1_cache_line_size, +- loongarch_cpu_cachetarget->cpu_tune.l1d_line_size); +- +- SET_OPTION_IF_UNSET (opts, opts_set, param_l1_cache_size, +- loongarch_cpu_cachetarget->cpu_tune.l1d_size); +- +- SET_OPTION_IF_UNSET (opts, opts_set, param_l2_cache_size, +- loongarch_cpu_cachetarget->cpu_tune.l2d_size); +-} +- +-static void +-loongarch_option_override_internal (struct gcc_options *opts, +- struct gcc_options *opts_set) ++loongarch_global_init (void) + { +- int i, regno, mode; +- +- if (flag_pic) +- g_switch_value = 0; +- +- loongarch_init_target (&la_target, +- la_opt_cpu_arch, la_opt_cpu_tune, la_opt_fpu, +- la_opt_simd, la_opt_abi_base, la_opt_abi_ext, +- la_opt_cmodel, opts->x_la_isa_evolution, +- opts_set->x_la_isa_evolution); +- +- /* Handle target-specific options: compute defaults/conflicts etc. */ +- loongarch_config_target (&la_target, NULL, 0); +- +- loongarch_update_gcc_opt_status (&la_target, opts, opts_set); +- loongarch_cpu_option_override (&la_target, opts, opts_set); +- +- if (TARGET_ABI_LP64) +- flag_pcc_struct_return = 0; +- +- /* Decide which rtx_costs structure to use. */ +- if (optimize_size) +- loongarch_cost = &loongarch_rtx_cost_optimize_size; +- else +- loongarch_cost = &loongarch_cpu_rtx_cost_datala_target.cpu_tune; +- +- /* If the user hasn't specified a branch cost, use the processor's +- default. */ +- if (la_branch_cost == 0) +- la_branch_cost = loongarch_cost->branch_cost; +- +- /* Enable sw prefetching at -O3 and higher. */ +- if (opts->x_flag_prefetch_loop_arrays < 0 +- && (opts->x_optimize >= 3 || opts->x_flag_profile_use) +- && !opts->x_optimize_size) +- opts->x_flag_prefetch_loop_arrays = 1; +- +- if (TARGET_DIRECT_EXTERN_ACCESS && flag_shlib) +- error ("%qs cannot be used for compiling a shared library", +- "-mdirect-extern-access"); +- +- switch (la_target.cmodel) +- { +- case CMODEL_EXTREME: +- if (opts->x_flag_plt) +- { +- if (global_options_set.x_flag_plt) +- error ("code model %qs is not compatible with %s", +- "extreme", "-fplt"); +- opts->x_flag_plt = 0; +- } +- break; +- +- case CMODEL_TINY_STATIC: +- case CMODEL_MEDIUM: +- case CMODEL_NORMAL: +- case CMODEL_TINY: +- case CMODEL_LARGE: +- break; +- +- default: +- gcc_unreachable (); +- } +- +- /* Validate the guard size. */ +- int guard_size = param_stack_clash_protection_guard_size; +- +- /* Enforce that interval is the same size as size so the mid-end does the +- right thing. */ +- SET_OPTION_IF_UNSET (opts, &global_options_set, +- param_stack_clash_protection_probe_interval, +- guard_size); +- +- loongarch_init_print_operand_punct (); ++ /* Initialize loongarch_print_operand_punct. */ ++ for (const char *p = ".$"; *p; p++) ++ loongarch_print_operand_punct(unsigned char) *p = true; + + /* Set up array to map GCC register number to debug register number. + Ignore the special purpose register numbers. */ +- +- for (i = 0; i < FIRST_PSEUDO_REGISTER; i++) ++ for (int i = 0; i < FIRST_PSEUDO_REGISTER; i++) + { + if (GP_REG_P (i) || FP_REG_P (i)) + loongarch_dwarf_regnoi = i; +@@ -7742,115 +7625,53 @@ loongarch_option_override_internal (struct gcc_options *opts, + } + + /* Set up loongarch_hard_regno_mode_ok. */ +- for (mode = 0; mode < MAX_MACHINE_MODE; mode++) +- for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) ++ for (int mode = 0; mode < MAX_MACHINE_MODE; mode++) ++ for (int regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) + loongarch_hard_regno_mode_ok_pmoderegno + = loongarch_hard_regno_mode_ok_uncached (regno, (machine_mode) mode); + + /* Function to allocate machine-dependent function status. */ + init_machine_status = &loongarch_init_machine_status; ++}; + +- /* -mrecip options. */ +- static struct +- { +- const char *string; /* option name. */ +- unsigned int mask; /* mask bits to set. */ +- } +- const recip_options = { +- { "all", RECIP_MASK_ALL }, +- { "none", RECIP_MASK_NONE }, +- { "div", RECIP_MASK_DIV }, +- { "sqrt", RECIP_MASK_SQRT }, +- { "rsqrt", RECIP_MASK_RSQRT }, +- { "vec-div", RECIP_MASK_VEC_DIV }, +- { "vec-sqrt", RECIP_MASK_VEC_SQRT }, +- { "vec-rsqrt", RECIP_MASK_VEC_RSQRT }, +- }; +- +- if (la_recip_name) +- { +- char *p = ASTRDUP (la_recip_name); +- char *q; +- unsigned int mask, i; +- bool invert; +- +- while ((q = strtok (p, ",")) != NULL) +- { +- p = NULL; +- if (*q == '!') +- { +- invert = true; +- q++; +- } +- else +- invert = false; +- +- if (!strcmp (q, "default")) +- mask = RECIP_MASK_ALL; +- else +- { +- for (i = 0; i < ARRAY_SIZE (recip_options); i++) +- if (!strcmp (q, recip_optionsi.string)) +- { +- mask = recip_optionsi.mask; +- break; +- } +- +- if (i == ARRAY_SIZE (recip_options)) +- { +- error ("unknown option for %<-mrecip=%s%>", q); +- invert = false; +- mask = RECIP_MASK_NONE; +- } +- } +- +- if (invert) +- recip_mask &= ~mask; +- else +- recip_mask |= mask; +- } +- } +- if (la_recip) +- recip_mask |= RECIP_MASK_ALL; +- if (!ISA_HAS_FRECIPE) +- recip_mask = RECIP_MASK_NONE; +- +-#define INIT_TARGET_FLAG(NAME, INIT) \ +- { \ +- if (!(target_flags_explicit & MASK_##NAME)) \ +- { \ +- if (INIT) \ +- target_flags |= MASK_##NAME; \ +- else \ +- target_flags &= ~MASK_##NAME; \ +- } \ +- } +- +- /* Enable conditional moves for int and float by default. */ +- INIT_TARGET_FLAG (COND_MOVE_INT, 1) +- INIT_TARGET_FLAG (COND_MOVE_FLOAT, 1) +- +- /* Set mrelax default. */ +- INIT_TARGET_FLAG (LINKER_RELAXATION, +- HAVE_AS_MRELAX_OPTION && HAVE_AS_COND_BRANCH_RELAXATION) ++static void ++loongarch_option_override_internal (struct loongarch_target *target, ++ struct gcc_options *opts, ++ struct gcc_options *opts_set) ++{ ++ /* Handle options not covered by struct loongarch_target. */ ++ loongarch_init_misc_options (opts, opts_set); ++ ++ /* Resolve the target struct. */ ++ loongarch_init_target (target, ++ opts->x_la_opt_cpu_arch, ++ opts->x_la_opt_cpu_tune, ++ opts->x_la_opt_fpu, ++ opts->x_la_opt_simd, ++ opts->x_la_opt_abi_base, ++ opts->x_la_opt_abi_ext, ++ opts->x_la_opt_cmodel, ++ opts->x_la_isa_evolution, ++ opts_set->x_la_isa_evolution); + +-#undef INIT_TARGET_FLAG ++ loongarch_config_target (target, NULL, 0); + +- if (la_opt_explicit_relocs == M_OPT_UNSET) +- la_opt_explicit_relocs = (HAVE_AS_EXPLICIT_RELOCS +- ? (TARGET_LINKER_RELAXATION +- ? EXPLICIT_RELOCS_AUTO +- : EXPLICIT_RELOCS_ALWAYS) +- : EXPLICIT_RELOCS_NONE); ++ /* Override some options according to the resolved target. */ ++ loongarch_target_option_override (target, opts, opts_set); + } + +- + /* Implement TARGET_OPTION_OVERRIDE. */ + + static void + loongarch_option_override (void) + { +- loongarch_option_override_internal (&global_options, &global_options_set); ++ /* Setting up the target configuration. */ ++ loongarch_option_override_internal (&la_target, ++ &global_options, ++ &global_options_set); ++ ++ /* Global initializations. */ ++ loongarch_global_init (); + } + + /* Implement TARGET_OPTION_SAVE. */ +diff --git a/gcc/config/loongarch/loongarch.h b/gcc/config/loongarch/loongarch.h +index 698e42aec..221e8b286 100644 +--- a/gcc/config/loongarch/loongarch.h ++++ b/gcc/config/loongarch/loongarch.h +@@ -710,12 +710,18 @@ enum reg_class + | RECIP_MASK_RSQRT | RECIP_MASK_VEC_SQRT \ + | RECIP_MASK_VEC_DIV | RECIP_MASK_VEC_RSQRT) + +-#define TARGET_RECIP_DIV ((recip_mask & RECIP_MASK_DIV) != 0 || TARGET_uARCH_LA664) +-#define TARGET_RECIP_SQRT ((recip_mask & RECIP_MASK_SQRT) != 0 || TARGET_uARCH_LA664) +-#define TARGET_RECIP_RSQRT ((recip_mask & RECIP_MASK_RSQRT) != 0 || TARGET_uARCH_LA664) +-#define TARGET_RECIP_VEC_DIV ((recip_mask & RECIP_MASK_VEC_DIV) != 0 || TARGET_uARCH_LA664) +-#define TARGET_RECIP_VEC_SQRT ((recip_mask & RECIP_MASK_VEC_SQRT) != 0 || TARGET_uARCH_LA664) +-#define TARGET_RECIP_VEC_RSQRT ((recip_mask & RECIP_MASK_VEC_RSQRT) != 0 || TARGET_uARCH_LA664) ++#define TARGET_RECIP_DIV \ ++ ((recip_mask & RECIP_MASK_DIV) != 0 && ISA_HAS_FRECIPE) ++#define TARGET_RECIP_SQRT \ ++ ((recip_mask & RECIP_MASK_SQRT) != 0 && ISA_HAS_FRECIPE) ++#define TARGET_RECIP_RSQRT \ ++ ((recip_mask & RECIP_MASK_RSQRT) != 0 && ISA_HAS_FRECIPE) ++#define TARGET_RECIP_VEC_DIV \ ++ ((recip_mask & RECIP_MASK_VEC_DIV) != 0 && ISA_HAS_FRECIPE) ++#define TARGET_RECIP_VEC_SQRT \ ++ ((recip_mask & RECIP_MASK_VEC_SQRT) != 0 && ISA_HAS_FRECIPE) ++#define TARGET_RECIP_VEC_RSQRT \ ++ ((recip_mask & RECIP_MASK_VEC_RSQRT) != 0 && ISA_HAS_FRECIPE) + + /* 1 if N is a possible register number for function argument passing. + We have no FP argument registers when soft-float. */ +diff --git a/gcc/config/loongarch/loongarch.opt b/gcc/config/loongarch/loongarch.opt +index 75d230067..ea848cd76 100644 +--- a/gcc/config/loongarch/loongarch.opt ++++ b/gcc/config/loongarch/loongarch.opt +@@ -205,14 +205,14 @@ mexplicit-relocs + Target Alias(mexplicit-relocs=, always, none) + Use %reloc() assembly operators (for backward compatibility). + +-mrecip +-Target RejectNegative Var(la_recip) Save +-Generate approximate reciprocal divide and square root for better throughput. +- + mrecip= + Target RejectNegative Joined Var(la_recip_name) Save + Control generation of reciprocal estimates. + ++mrecip ++Target Alias(mrecip=, all, none) ++Generate approximate reciprocal divide and square root for better throughput. ++ + ; The code model option names for -mcmodel. + Enum + Name(cmodel) Type(int) +-- +2.43.0 +
View file
_service:tar_scm:0158-Backport-SME-aarch64-Simplify-output-template-emissi.patch
Added
@@ -0,0 +1,213 @@ +From b51d3b1af24758534e5a8f3a52a56106b935c485 Mon Sep 17 00:00:00 2001 +From: Kyrylo Tkachov <kyrylo.tkachov@arm.com> +Date: Wed, 31 May 2023 11:23:23 +0100 +Subject: PATCH 059/157 BackportSME aarch64: Simplify output template + emission code for a few patterns + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=11bd9b1f8133fc07c267e6d1aee8b01e06c7a750 + +If the output code for a define_insn just does a switch (which_alternative) with no other computation we can almost always +replace it with more compact MD syntax for each alternative in a mult-alternative '@' block. +This patch cleans up some such patterns in the aarch64 backend, making them shorter and more concise. +No behavioural change intended. + +Bootstrapped and tested on aarch64-none-linux-gnu. + +gcc/ChangeLog: + + * config/aarch64/aarch64-simd.md (*aarch64_simd_mov<VDMOV:mode>): Rewrite + output template to avoid explicit switch on which_alternative. + (*aarch64_simd_mov<VQMOV:mode>): Likewise. + (and<mode>3): Likewise. + (ior<mode>3): Likewise. + * config/aarch64/aarch64.md (*mov<mode>_aarch64): Likewise. +--- + gcc/config/aarch64/aarch64-simd.md | 97 +++++++++--------------------- + gcc/config/aarch64/aarch64.md | 42 ++++--------- + 2 files changed, 40 insertions(+), 99 deletions(-) + +diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md +index ef7fc4ecb..2d688edf5 100644 +--- a/gcc/config/aarch64/aarch64-simd.md ++++ b/gcc/config/aarch64/aarch64-simd.md +@@ -122,28 +122,16 @@ + "TARGET_FLOAT + && (register_operand (operands0, <MODE>mode) + || aarch64_simd_reg_or_zero (operands1, <MODE>mode))" +-{ +- switch (which_alternative) +- { +- case 0: return "ldr\t%d0, %1"; +- case 1: return "str\txzr, %0"; +- case 2: return "str\t%d1, %0"; +- case 3: +- if (TARGET_SIMD) +- return "mov\t%0.<Vbtype>, %1.<Vbtype>"; +- return "fmov\t%d0, %d1"; +- case 4: +- if (TARGET_SIMD) +- return "umov\t%0, %1.d0"; +- return "fmov\t%x0, %d1"; +- case 5: return "fmov\t%d0, %1"; +- case 6: return "mov\t%0, %1"; +- case 7: +- return aarch64_output_simd_mov_immediate (operands1, 64); +- case 8: return "fmov\t%d0, xzr"; +- default: gcc_unreachable (); +- } +-} ++ "@ ++ ldr\t%d0, %1 ++ str\txzr, %0 ++ str\t%d1, %0 ++ * return TARGET_SIMD ? \"mov\t%0.<Vbtype>, %1.<Vbtype>\" : \"fmov\t%d0, %d1\"; ++ * return TARGET_SIMD ? \"umov\t%0, %1.d0\" : \"fmov\t%x0, %d1\"; ++ fmov\t%d0, %1 ++ mov\t%0, %1 ++ * return aarch64_output_simd_mov_immediate (operands1, 64); ++ fmov\t%d0, xzr" + (set_attr "type" "neon_load1_1reg<q>, store_8, neon_store1_1reg<q>,\ + neon_logic<q>, neon_to_gp<q>, f_mcr,\ + mov_reg, neon_move<q>, f_mcr") +@@ -158,29 +146,16 @@ + "TARGET_FLOAT + && (register_operand (operands0, <MODE>mode) + || aarch64_simd_reg_or_zero (operands1, <MODE>mode))" +-{ +- switch (which_alternative) +- { +- case 0: +- return "ldr\t%q0, %1"; +- case 1: +- return "stp\txzr, xzr, %0"; +- case 2: +- return "str\t%q1, %0"; +- case 3: +- return "mov\t%0.<Vbtype>, %1.<Vbtype>"; +- case 4: +- case 5: +- case 6: +- return "#"; +- case 7: +- return aarch64_output_simd_mov_immediate (operands1, 128); +- case 8: +- return "fmov\t%d0, xzr"; +- default: +- gcc_unreachable (); +- } +-} ++ "@ ++ ldr\t%q0, %1 ++ stp\txzr, xzr, %0 ++ str\t%q1, %0 ++ mov\t%0.<Vbtype>, %1.<Vbtype> ++ # ++ # ++ # ++ * return aarch64_output_simd_mov_immediate (operands1, 128); ++ fmov\t%d0, xzr" + (set_attr "type" "neon_load1_1reg<q>, store_16, neon_store1_1reg<q>,\ + neon_logic<q>, multiple, multiple,\ + multiple, neon_move<q>, fmov") +@@ -1004,18 +979,10 @@ + (and:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w,0") + (match_operand:VDQ_I 2 "aarch64_reg_or_bic_imm" "w,Db"))) + "TARGET_SIMD" +- { +- switch (which_alternative) +- { +- case 0: +- return "and\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>"; +- case 1: +- return aarch64_output_simd_mov_immediate (operands2, <bitsize>, +- AARCH64_CHECK_BIC); +- default: +- gcc_unreachable (); +- } +- } ++ "@ ++ and\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype> ++ * return aarch64_output_simd_mov_immediate (operands2, <bitsize>,\ ++ AARCH64_CHECK_BIC);" + (set_attr "type" "neon_logic<q>") + ) + +@@ -1025,18 +992,10 @@ + (ior:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w,0") + (match_operand:VDQ_I 2 "aarch64_reg_or_orr_imm" "w,Do"))) + "TARGET_SIMD" +- { +- switch (which_alternative) +- { +- case 0: +- return "orr\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>"; +- case 1: +- return aarch64_output_simd_mov_immediate (operands2, <bitsize>, +- AARCH64_CHECK_ORR); +- default: +- gcc_unreachable (); +- } +- } ++ "@ ++ orr\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype> ++ * return aarch64_output_simd_mov_immediate (operands2, <bitsize>,\ ++ AARCH64_CHECK_ORR);" + (set_attr "type" "neon_logic<q>") + ) + +diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md +index c0cc91756..7454a5c77 100644 +--- a/gcc/config/aarch64/aarch64.md ++++ b/gcc/config/aarch64/aarch64.md +@@ -1198,36 +1198,18 @@ + (match_operand:SHORT 1 "aarch64_mov_operand" " r,M,D<hq>,Usv,m,m,rZ,w,w,rZ,w")) + "(register_operand (operands0, <MODE>mode) + || aarch64_reg_or_zero (operands1, <MODE>mode))" +-{ +- switch (which_alternative) +- { +- case 0: +- return "mov\t%w0, %w1"; +- case 1: +- return "mov\t%w0, %1"; +- case 2: +- return aarch64_output_scalar_simd_mov_immediate (operands1, +- <MODE>mode); +- case 3: +- return aarch64_output_sve_cnt_immediate (\"cnt\", \"%x0\", operands1); +- case 4: +- return "ldr<size>\t%w0, %1"; +- case 5: +- return "ldr\t%<size>0, %1"; +- case 6: +- return "str<size>\t%w1, %0"; +- case 7: +- return "str\t%<size>1, %0"; +- case 8: +- return TARGET_SIMD ? "umov\t%w0, %1.<v>0" : "fmov\t%w0, %s1"; +- case 9: +- return TARGET_SIMD ? "dup\t%0.<Vallxd>, %w1" : "fmov\t%s0, %w1"; +- case 10: +- return TARGET_SIMD ? "dup\t%<Vetype>0, %1.<v>0" : "fmov\t%s0, %s1"; +- default: +- gcc_unreachable (); +- } +-} ++ "@ ++ mov\t%w0, %w1 ++ mov\t%w0, %1 ++ * return aarch64_output_scalar_simd_mov_immediate (operands1, <MODE>mode); ++ * return aarch64_output_sve_cnt_immediate (\"cnt\", \"%x0\", operands1); ++ ldr<size>\t%w0, %1 ++ ldr\t%<size>0, %1 ++ str<size>\t%w1, %0 ++ str\t%<size>1, %0 ++ * return TARGET_SIMD ? \"umov\t%w0, %1.<v>0\" : \"fmov\t%w0, %s1\"; ++ * return TARGET_SIMD ? \"dup\t%0.<Vallxd>, %w1\" : \"fmov\t%s0, %w1\"; ++ * return TARGET_SIMD ? \"dup\t%<Vetype>0, %1.<v>0\" : \"fmov\t%s0, %s1\";" + ;; The "mov_imm" type for CNT is just a placeholder. + (set_attr "type" "mov_reg,mov_imm,neon_move,mov_imm,load_4,load_4,store_4, + store_4,neon_to_gp<q>,neon_from_gp<q>,neon_dup") +-- +2.33.0 +
View file
_service:tar_scm:0158-LoongArch-Regenerate-loongarch.opt.urls.patch
Added
@@ -0,0 +1,117 @@ +From 90a0f195830a25e4179127c67e873c80f758f29d Mon Sep 17 00:00:00 2001 +From: Lulu Cheng <chenglulu@loongson.cn> +Date: Fri, 25 Oct 2024 06:25:39 +0000 +Subject: PATCH 158/188 LoongArch: Regenerate loongarch.opt.urls. + +Fixes: d28ea8e5a704 ("LoongArch: Split loongarch_option_override_internal + into smaller procedures") + +gcc/ChangeLog: + + * config/loongarch/loongarch.opt.urls: Regenerate. +--- + gcc/config/loongarch/loongarch.opt.urls | 92 +++++++++++++++++++++++++ + 1 file changed, 92 insertions(+) + create mode 100644 gcc/config/loongarch/loongarch.opt.urls + +diff --git a/gcc/config/loongarch/loongarch.opt.urls b/gcc/config/loongarch/loongarch.opt.urls +new file mode 100644 +index 000000000..571c504e6 +--- /dev/null ++++ b/gcc/config/loongarch/loongarch.opt.urls +@@ -0,0 +1,92 @@ ++; Autogenerated by regenerate-opt-urls.py from gcc/config/loongarch/loongarch.opt and generated HTML ++ ++mfpu= ++UrlSuffix(gcc/LoongArch-Options.html#index-mfpu-2) ++ ++msoft-float ++UrlSuffix(gcc/LoongArch-Options.html#index-msoft-float-5) ++ ++msingle-float ++UrlSuffix(gcc/LoongArch-Options.html#index-msingle-float) ++ ++mdouble-float ++UrlSuffix(gcc/LoongArch-Options.html#index-mdouble-float-1) ++ ++msimd= ++UrlSuffix(gcc/LoongArch-Options.html#index-msimd-1) ++ ++march= ++UrlSuffix(gcc/LoongArch-Options.html#index-march-7) ++ ++mtune= ++UrlSuffix(gcc/LoongArch-Options.html#index-mtune-8) ++ ++mabi= ++UrlSuffix(gcc/LoongArch-Options.html#index-mabi-2) ++ ++mbranch-cost= ++UrlSuffix(gcc/LoongArch-Options.html#index-mbranch-cost-2) ++ ++mcheck-zero-division ++UrlSuffix(gcc/LoongArch-Options.html#index-mcheck-zero-division) ++ ++mcond-move-int ++UrlSuffix(gcc/LoongArch-Options.html#index-mcond-move-int) ++ ++mcond-move-float ++UrlSuffix(gcc/LoongArch-Options.html#index-mcond-move-float) ++ ++mmemcpy ++UrlSuffix(gcc/LoongArch-Options.html#index-mmemcpy) ++ ++mstrict-align ++UrlSuffix(gcc/LoongArch-Options.html#index-mstrict-align-1) ++ ++mmax-inline-memcpy-size= ++UrlSuffix(gcc/LoongArch-Options.html#index-mmax-inline-memcpy-size) ++ ++mexplicit-relocs= ++UrlSuffix(gcc/LoongArch-Options.html#index-mexplicit-relocs-1) ++ ++mexplicit-relocs ++UrlSuffix(gcc/LoongArch-Options.html#index-mexplicit-relocs-1) ++ ++mrecip= ++UrlSuffix(gcc/LoongArch-Options.html#index-mrecip) ++ ++mrecip ++UrlSuffix(gcc/LoongArch-Options.html#index-mrecip) ++ ++mcmodel= ++UrlSuffix(gcc/LoongArch-Options.html#index-mcmodel_003d-1) ++ ++mdirect-extern-access ++UrlSuffix(gcc/LoongArch-Options.html#index-mdirect-extern-access) ++ ++mrelax ++UrlSuffix(gcc/LoongArch-Options.html#index-mrelax-2) ++ ++mpass-mrelax-to-as ++UrlSuffix(gcc/LoongArch-Options.html#index-mpass-mrelax-to-as) ++ ++mtls-dialect= ++UrlSuffix(gcc/LoongArch-Options.html#index-mtls-dialect-1) ++ ++mannotate-tablejump ++UrlSuffix(gcc/LoongArch-Options.html#index-mannotate-tablejump) ++ ++mfrecipe ++UrlSuffix(gcc/LoongArch-Options.html#index-mfrecipe) ++ ++mdiv32 ++UrlSuffix(gcc/LoongArch-Options.html#index-mdiv32) ++ ++mlam-bh ++UrlSuffix(gcc/LoongArch-Options.html#index-mlam-bh) ++ ++mlamcas ++UrlSuffix(gcc/LoongArch-Options.html#index-mlamcas) ++ ++mld-seq-sa ++UrlSuffix(gcc/LoongArch-Options.html#index-mld-seq-sa) ++ +-- +2.43.0 +
View file
_service:tar_scm:0159-Backport-SME-Improve-immediate-expansion-PR106583.patch
Added
@@ -0,0 +1,631 @@ +From d5293e2a8db54245553e01ad5d791b7492ad6101 Mon Sep 17 00:00:00 2001 +From: Wilco Dijkstra <wdijkstr@arm.com> +Date: Mon, 24 Oct 2022 15:14:14 +0100 +Subject: PATCH 060/157 BackportSME Improve immediate expansion + PR106583 + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=a096036589d82175a0f729c2dab73c9a527d075d + +Improve immediate expansion of immediates which can be created from a +bitmask immediate and 2 MOVKs. Simplify, refactor and improve efficiency +of bitmask checks. Move various immediate handling functions together +to avoid forward declarations. + +This reduces the number of 4-instruction immediates in SPECINT/FP by 10-15%. + +gcc/ + + PR target/106583 + * config/aarch64/aarch64.cc (aarch64_internal_mov_immediate) + Add support for a bitmask immediate with 2 MOVKs. + (aarch64_check_bitmask): New function after refactorization. + (aarch64_bitmask_imm): Simplify replication of small modes. + Split function into 64-bit only version for efficiency. + (aarch64_move_imm): Move near other immediate functions. + (aarch64_uimm12_shift): Likewise. + (aarch64_clamp_to_uimm12_shift): Likewise. + (aarch64_movk_shift): Likewise. + (aarch64_replicate_bitmask_imm): Likewise. + (aarch64_and_split_imm1): Likewise. + (aarch64_and_split_imm2): Likewise. + (aarch64_and_bitmask_imm): Likewise. + (aarch64_movw_imm): Likewise. + +gcc/testsuite/ + PR target/106583 + * gcc.target/aarch64/pr106583.c: Add new test. +--- + gcc/config/aarch64/aarch64.cc | 485 +++++++++++--------- + gcc/testsuite/gcc.target/aarch64/pr106583.c | 41 ++ + 2 files changed, 301 insertions(+), 225 deletions(-) + create mode 100644 gcc/testsuite/gcc.target/aarch64/pr106583.c + +diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc +index b4b646fa0..cf7736994 100644 +--- a/gcc/config/aarch64/aarch64.cc ++++ b/gcc/config/aarch64/aarch64.cc +@@ -305,7 +305,6 @@ static bool aarch64_builtin_support_vector_misalignment (machine_mode mode, + static machine_mode aarch64_simd_container_mode (scalar_mode, poly_int64); + static bool aarch64_print_address_internal (FILE*, machine_mode, rtx, + aarch64_addr_query_type); +-static HOST_WIDE_INT aarch64_clamp_to_uimm12_shift (HOST_WIDE_INT val); + + /* The processor for which instructions should be scheduled. */ + enum aarch64_processor aarch64_tune = cortexa53; +@@ -5756,6 +5755,143 @@ aarch64_output_sve_vector_inc_dec (const char *operands, rtx x) + factor, nelts_per_vq); + } + ++/* Multipliers for repeating bitmasks of width 32, 16, 8, 4, and 2. */ ++ ++static const unsigned HOST_WIDE_INT bitmask_imm_mul = ++ { ++ 0x0000000100000001ull, ++ 0x0001000100010001ull, ++ 0x0101010101010101ull, ++ 0x1111111111111111ull, ++ 0x5555555555555555ull, ++ }; ++ ++ ++ ++/* Return true if 64-bit VAL is a valid bitmask immediate. */ ++static bool ++aarch64_bitmask_imm (unsigned HOST_WIDE_INT val) ++{ ++ unsigned HOST_WIDE_INT tmp, mask, first_one, next_one; ++ int bits; ++ ++ /* Check for a single sequence of one bits and return quickly if so. ++ The special cases of all ones and all zeroes returns false. */ ++ tmp = val + (val & -val); ++ ++ if (tmp == (tmp & -tmp)) ++ return (val + 1) > 1; ++ ++ /* Invert if the immediate doesn't start with a zero bit - this means we ++ only need to search for sequences of one bits. */ ++ if (val & 1) ++ val = ~val; ++ ++ /* Find the first set bit and set tmp to val with the first sequence of one ++ bits removed. Return success if there is a single sequence of ones. */ ++ first_one = val & -val; ++ tmp = val & (val + first_one); ++ ++ if (tmp == 0) ++ return true; ++ ++ /* Find the next set bit and compute the difference in bit position. */ ++ next_one = tmp & -tmp; ++ bits = clz_hwi (first_one) - clz_hwi (next_one); ++ mask = val ^ tmp; ++ ++ /* Check the bit position difference is a power of 2, and that the first ++ sequence of one bits fits within 'bits' bits. */ ++ if ((mask >> bits) != 0 || bits != (bits & -bits)) ++ return false; ++ ++ /* Check the sequence of one bits is repeated 64/bits times. */ ++ return val == mask * bitmask_imm_mul__builtin_clz (bits) - 26; ++} ++ ++ ++/* Return true if VAL is a valid bitmask immediate for MODE. */ ++bool ++aarch64_bitmask_imm (HOST_WIDE_INT val_in, machine_mode mode) ++{ ++ if (mode == DImode) ++ return aarch64_bitmask_imm (val_in); ++ ++ unsigned HOST_WIDE_INT val = val_in; ++ ++ if (mode == SImode) ++ return aarch64_bitmask_imm ((val & 0xffffffff) | (val << 32)); ++ ++ /* Replicate small immediates to fit 64 bits. */ ++ int size = GET_MODE_UNIT_PRECISION (mode); ++ val &= (HOST_WIDE_INT_1U << size) - 1; ++ val *= bitmask_imm_mul__builtin_clz (size) - 26; ++ ++ return aarch64_bitmask_imm (val); ++} ++ ++ ++/* Return true if the immediate VAL can be a bitfield immediate ++ by changing the given MASK bits in VAL to zeroes, ones or bits ++ from the other half of VAL. Return the new immediate in VAL2. */ ++static inline bool ++aarch64_check_bitmask (unsigned HOST_WIDE_INT val, ++ unsigned HOST_WIDE_INT &val2, ++ unsigned HOST_WIDE_INT mask) ++{ ++ val2 = val & ~mask; ++ if (val2 != val && aarch64_bitmask_imm (val2)) ++ return true; ++ val2 = val | mask; ++ if (val2 != val && aarch64_bitmask_imm (val2)) ++ return true; ++ val = val & ~mask; ++ val2 = val | (((val >> 32) | (val << 32)) & mask); ++ if (val2 != val && aarch64_bitmask_imm (val2)) ++ return true; ++ val2 = val | (((val >> 16) | (val << 48)) & mask); ++ if (val2 != val && aarch64_bitmask_imm (val2)) ++ return true; ++ return false; ++} ++ ++ ++/* Return true if val is an immediate that can be loaded into a ++ register by a MOVZ instruction. */ ++static bool ++aarch64_movw_imm (HOST_WIDE_INT val, scalar_int_mode mode) ++{ ++ if (GET_MODE_SIZE (mode) > 4) ++ { ++ if ((val & (((HOST_WIDE_INT) 0xffff) << 32)) == val ++ || (val & (((HOST_WIDE_INT) 0xffff) << 48)) == val) ++ return 1; ++ } ++ else ++ { ++ /* Ignore sign extension. */ ++ val &= (HOST_WIDE_INT) 0xffffffff; ++ } ++ return ((val & (((HOST_WIDE_INT) 0xffff) << 0)) == val ++ || (val & (((HOST_WIDE_INT) 0xffff) << 16)) == val); ++} ++ ++ ++/* Return true if VAL is an immediate that can be loaded into a ++ register in a single instruction. */ ++bool ++aarch64_move_imm (HOST_WIDE_INT val, machine_mode mode) ++{ ++ scalar_int_mode int_mode; ++ if (!is_a <scalar_int_mode> (mode, &int_mode)) ++ return false; ++ ++ if (aarch64_movw_imm (val, int_mode) || aarch64_movw_imm (~val, int_mode)) ++ return 1; ++ return aarch64_bitmask_imm (val, int_mode); ++} ++ ++ + static int + aarch64_internal_mov_immediate (rtx dest, rtx imm, bool generate, + scalar_int_mode mode) +@@ -5786,7 +5922,7 @@ aarch64_internal_mov_immediate (rtx dest, rtx imm, bool generate, + emit_insn (gen_rtx_SET (dest, GEN_INT (val2))); + + /* Check if we have to emit a second instruction by checking to see +- if any of the upper 32 bits of the original DI mode value is set. */ ++ if any of the upper 32 bits of the original DI mode value is set. */ + if (val == val2) + return 1; + +@@ -5822,36 +5958,43 @@ aarch64_internal_mov_immediate (rtx dest, rtx imm, bool generate, + one_match = ((~val & mask) == 0) + ((~val & (mask << 16)) == 0) + + ((~val & (mask << 32)) == 0) + ((~val & (mask << 48)) == 0); + +- if (zero_match != 2 && one_match != 2) ++ if (zero_match < 2 && one_match < 2) + { + /* Try emitting a bitmask immediate with a movk replacing 16 bits. + For a 64-bit bitmask try whether changing 16 bits to all ones or + zeroes creates a valid bitmask. To check any repeated bitmask, + try using 16 bits from the other 32-bit half of val. */ + +- for (i = 0; i < 64; i += 16, mask <<= 16) +- { +- val2 = val & ~mask; +- if (val2 != val && aarch64_bitmask_imm (val2, mode)) +- break; +- val2 = val | mask; +- if (val2 != val && aarch64_bitmask_imm (val2, mode)) +- break; +- val2 = val2 & ~mask; +- val2 = val2 | (((val2 >> 32) | (val2 << 32)) & mask); +- if (val2 != val && aarch64_bitmask_imm (val2, mode)) +- break; +- } +- if (i != 64) +- { +- if (generate) ++ for (i = 0; i < 64; i += 16) ++ if (aarch64_check_bitmask (val, val2, mask << i)) ++ { ++ if (generate) ++ { ++ emit_insn (gen_rtx_SET (dest, GEN_INT (val2))); ++ emit_insn (gen_insv_immdi (dest, GEN_INT (i), ++ GEN_INT ((val >> i) & 0xffff))); ++ } ++ return 2; ++ } ++ } ++ ++ /* Try a bitmask plus 2 movk to generate the immediate in 3 instructions. */ ++ if (zero_match + one_match == 0) ++ { ++ for (i = 0; i < 48; i += 16) ++ for (int j = i + 16; j < 64; j += 16) ++ if (aarch64_check_bitmask (val, val2, (mask << i) | (mask << j))) + { +- emit_insn (gen_rtx_SET (dest, GEN_INT (val2))); +- emit_insn (gen_insv_immdi (dest, GEN_INT (i), +- GEN_INT ((val >> i) & 0xffff))); ++ if (generate) ++ { ++ emit_insn (gen_rtx_SET (dest, GEN_INT (val2))); ++ emit_insn (gen_insv_immdi (dest, GEN_INT (i), ++ GEN_INT ((val >> i) & 0xffff))); ++ emit_insn (gen_insv_immdi (dest, GEN_INT (j), ++ GEN_INT ((val >> j) & 0xffff))); ++ } ++ return 3; + } +- return 2; +- } + } + + /* Generate 2-4 instructions, skipping 16 bits of all zeroes or ones which +@@ -5898,6 +6041,99 @@ aarch64_mov128_immediate (rtx imm) + } + + ++/* Return true if val can be encoded as a 12-bit unsigned immediate with ++ a left shift of 0 or 12 bits. */ ++bool ++aarch64_uimm12_shift (HOST_WIDE_INT val) ++{ ++ return ((val & (((HOST_WIDE_INT) 0xfff) << 0)) == val ++ || (val & (((HOST_WIDE_INT) 0xfff) << 12)) == val ++ ); ++} ++ ++/* Returns the nearest value to VAL that will fit as a 12-bit unsigned immediate ++ that can be created with a left shift of 0 or 12. */ ++static HOST_WIDE_INT ++aarch64_clamp_to_uimm12_shift (HOST_WIDE_INT val) ++{ ++ /* Check to see if the value fits in 24 bits, as that is the maximum we can ++ handle correctly. */ ++ gcc_assert ((val & 0xffffff) == val); ++ ++ if (((val & 0xfff) << 0) == val) ++ return val; ++ ++ return val & (0xfff << 12); ++} ++ ++ ++/* Test whether: ++ ++ X = (X & AND_VAL) | IOR_VAL; ++ ++ can be implemented using: ++ ++ MOVK X, #(IOR_VAL >> shift), LSL #shift ++ ++ Return the shift if so, otherwise return -1. */ ++int ++aarch64_movk_shift (const wide_int_ref &and_val, ++ const wide_int_ref &ior_val) ++{ ++ unsigned int precision = and_val.get_precision (); ++ unsigned HOST_WIDE_INT mask = 0xffff; ++ for (unsigned int shift = 0; shift < precision; shift += 16) ++ { ++ if (and_val == ~mask && (ior_val & mask) == ior_val) ++ return shift; ++ mask <<= 16; ++ } ++ return -1; ++} ++ ++/* Create mask of ones, covering the lowest to highest bits set in VAL_IN. ++ Assumed precondition: VAL_IN Is not zero. */ ++ ++unsigned HOST_WIDE_INT ++aarch64_and_split_imm1 (HOST_WIDE_INT val_in) ++{ ++ int lowest_bit_set = ctz_hwi (val_in); ++ int highest_bit_set = floor_log2 (val_in); ++ gcc_assert (val_in != 0); ++ ++ return ((HOST_WIDE_INT_UC (2) << highest_bit_set) - ++ (HOST_WIDE_INT_1U << lowest_bit_set)); ++} ++ ++/* Create constant where bits outside of lowest bit set to highest bit set ++ are set to 1. */ ++ ++unsigned HOST_WIDE_INT ++aarch64_and_split_imm2 (HOST_WIDE_INT val_in) ++{ ++ return val_in | ~aarch64_and_split_imm1 (val_in); ++} ++ ++/* Return true if VAL_IN is a valid 'and' bitmask immediate. */ ++ ++bool ++aarch64_and_bitmask_imm (unsigned HOST_WIDE_INT val_in, machine_mode mode) ++{ ++ scalar_int_mode int_mode; ++ if (!is_a <scalar_int_mode> (mode, &int_mode)) ++ return false; ++ ++ if (aarch64_bitmask_imm (val_in, int_mode)) ++ return false; ++ ++ if (aarch64_move_imm (val_in, int_mode)) ++ return false; ++ ++ unsigned HOST_WIDE_INT imm2 = aarch64_and_split_imm2 (val_in); ++ ++ return aarch64_bitmask_imm (imm2, int_mode); ++} ++ + /* Return the number of temporary registers that aarch64_add_offset_1 + would need to add OFFSET to a register. */ + +@@ -10379,207 +10615,6 @@ aarch64_tls_referenced_p (rtx x) + } + + +-/* Return true if val can be encoded as a 12-bit unsigned immediate with +- a left shift of 0 or 12 bits. */ +-bool +-aarch64_uimm12_shift (HOST_WIDE_INT val) +-{ +- return ((val & (((HOST_WIDE_INT) 0xfff) << 0)) == val +- || (val & (((HOST_WIDE_INT) 0xfff) << 12)) == val +- ); +-} +- +-/* Returns the nearest value to VAL that will fit as a 12-bit unsigned immediate +- that can be created with a left shift of 0 or 12. */ +-static HOST_WIDE_INT +-aarch64_clamp_to_uimm12_shift (HOST_WIDE_INT val) +-{ +- /* Check to see if the value fits in 24 bits, as that is the maximum we can +- handle correctly. */ +- gcc_assert ((val & 0xffffff) == val); +- +- if (((val & 0xfff) << 0) == val) +- return val; +- +- return val & (0xfff << 12); +-} +- +-/* Return true if val is an immediate that can be loaded into a +- register by a MOVZ instruction. */ +-static bool +-aarch64_movw_imm (HOST_WIDE_INT val, scalar_int_mode mode) +-{ +- if (GET_MODE_SIZE (mode) > 4) +- { +- if ((val & (((HOST_WIDE_INT) 0xffff) << 32)) == val +- || (val & (((HOST_WIDE_INT) 0xffff) << 48)) == val) +- return 1; +- } +- else +- { +- /* Ignore sign extension. */ +- val &= (HOST_WIDE_INT) 0xffffffff; +- } +- return ((val & (((HOST_WIDE_INT) 0xffff) << 0)) == val +- || (val & (((HOST_WIDE_INT) 0xffff) << 16)) == val); +-} +- +-/* Test whether: +- +- X = (X & AND_VAL) | IOR_VAL; +- +- can be implemented using: +- +- MOVK X, #(IOR_VAL >> shift), LSL #shift +- +- Return the shift if so, otherwise return -1. */ +-int +-aarch64_movk_shift (const wide_int_ref &and_val, +- const wide_int_ref &ior_val) +-{ +- unsigned int precision = and_val.get_precision (); +- unsigned HOST_WIDE_INT mask = 0xffff; +- for (unsigned int shift = 0; shift < precision; shift += 16) +- { +- if (and_val == ~mask && (ior_val & mask) == ior_val) +- return shift; +- mask <<= 16; +- } +- return -1; +-} +- +-/* VAL is a value with the inner mode of MODE. Replicate it to fill a +- 64-bit (DImode) integer. */ +- +-static unsigned HOST_WIDE_INT +-aarch64_replicate_bitmask_imm (unsigned HOST_WIDE_INT val, machine_mode mode) +-{ +- unsigned int size = GET_MODE_UNIT_PRECISION (mode); +- while (size < 64) +- { +- val &= (HOST_WIDE_INT_1U << size) - 1; +- val |= val << size; +- size *= 2; +- } +- return val; +-} +- +-/* Multipliers for repeating bitmasks of width 32, 16, 8, 4, and 2. */ +- +-static const unsigned HOST_WIDE_INT bitmask_imm_mul = +- { +- 0x0000000100000001ull, +- 0x0001000100010001ull, +- 0x0101010101010101ull, +- 0x1111111111111111ull, +- 0x5555555555555555ull, +- }; +- +- +-/* Return true if val is a valid bitmask immediate. */ +- +-bool +-aarch64_bitmask_imm (HOST_WIDE_INT val_in, machine_mode mode) +-{ +- unsigned HOST_WIDE_INT val, tmp, mask, first_one, next_one; +- int bits; +- +- /* Check for a single sequence of one bits and return quickly if so. +- The special cases of all ones and all zeroes returns false. */ +- val = aarch64_replicate_bitmask_imm (val_in, mode); +- tmp = val + (val & -val); +- +- if (tmp == (tmp & -tmp)) +- return (val + 1) > 1; +- +- /* Replicate 32-bit immediates so we can treat them as 64-bit. */ +- if (mode == SImode) +- val = (val << 32) | (val & 0xffffffff); +- +- /* Invert if the immediate doesn't start with a zero bit - this means we +- only need to search for sequences of one bits. */ +- if (val & 1) +- val = ~val; +- +- /* Find the first set bit and set tmp to val with the first sequence of one +- bits removed. Return success if there is a single sequence of ones. */ +- first_one = val & -val; +- tmp = val & (val + first_one); +- +- if (tmp == 0) +- return true; +- +- /* Find the next set bit and compute the difference in bit position. */ +- next_one = tmp & -tmp; +- bits = clz_hwi (first_one) - clz_hwi (next_one); +- mask = val ^ tmp; +- +- /* Check the bit position difference is a power of 2, and that the first +- sequence of one bits fits within 'bits' bits. */ +- if ((mask >> bits) != 0 || bits != (bits & -bits)) +- return false; +- +- /* Check the sequence of one bits is repeated 64/bits times. */ +- return val == mask * bitmask_imm_mul__builtin_clz (bits) - 26; +-} +- +-/* Create mask of ones, covering the lowest to highest bits set in VAL_IN. +- Assumed precondition: VAL_IN Is not zero. */ +- +-unsigned HOST_WIDE_INT +-aarch64_and_split_imm1 (HOST_WIDE_INT val_in) +-{ +- int lowest_bit_set = ctz_hwi (val_in); +- int highest_bit_set = floor_log2 (val_in); +- gcc_assert (val_in != 0); +- +- return ((HOST_WIDE_INT_UC (2) << highest_bit_set) - +- (HOST_WIDE_INT_1U << lowest_bit_set)); +-} +- +-/* Create constant where bits outside of lowest bit set to highest bit set +- are set to 1. */ +- +-unsigned HOST_WIDE_INT +-aarch64_and_split_imm2 (HOST_WIDE_INT val_in) +-{ +- return val_in | ~aarch64_and_split_imm1 (val_in); +-} +- +-/* Return true if VAL_IN is a valid 'and' bitmask immediate. */ +- +-bool +-aarch64_and_bitmask_imm (unsigned HOST_WIDE_INT val_in, machine_mode mode) +-{ +- scalar_int_mode int_mode; +- if (!is_a <scalar_int_mode> (mode, &int_mode)) +- return false; +- +- if (aarch64_bitmask_imm (val_in, int_mode)) +- return false; +- +- if (aarch64_move_imm (val_in, int_mode)) +- return false; +- +- unsigned HOST_WIDE_INT imm2 = aarch64_and_split_imm2 (val_in); +- +- return aarch64_bitmask_imm (imm2, int_mode); +-} +- +-/* Return true if val is an immediate that can be loaded into a +- register in a single instruction. */ +-bool +-aarch64_move_imm (HOST_WIDE_INT val, machine_mode mode) +-{ +- scalar_int_mode int_mode; +- if (!is_a <scalar_int_mode> (mode, &int_mode)) +- return false; +- +- if (aarch64_movw_imm (val, int_mode) || aarch64_movw_imm (~val, int_mode)) +- return 1; +- return aarch64_bitmask_imm (val, int_mode); +-} +- + static bool + aarch64_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x) + { +diff --git a/gcc/testsuite/gcc.target/aarch64/pr106583.c b/gcc/testsuite/gcc.target/aarch64/pr106583.c +new file mode 100644 +index 000000000..0f9315808 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/pr106583.c +@@ -0,0 +1,41 @@ ++/* { dg-do assemble } */ ++/* { dg-options "-O2 --save-temps" } */ ++ ++long f1 (void) ++{ ++ return 0x7efefefefefefeff; ++} ++ ++long f2 (void) ++{ ++ return 0x12345678aaaaaaaa; ++} ++ ++long f3 (void) ++{ ++ return 0x1234cccccccc5678; ++} ++ ++long f4 (void) ++{ ++ return 0x7777123456787777; ++} ++ ++long f5 (void) ++{ ++ return 0x5555555512345678; ++} ++ ++long f6 (void) ++{ ++ return 0x1234bbbb5678bbbb; ++} ++ ++long f7 (void) ++{ ++ return 0x4444123444445678; ++} ++ ++ ++/* { dg-final { scan-assembler-times {\tmovk\t} 14 } } */ ++/* { dg-final { scan-assembler-times {\tmov\t} 7 } } */ +-- +2.33.0 +
View file
_service:tar_scm:0159-LoongArch-Add-support-for-TLS-descriptors.patch
Added
@@ -0,0 +1,724 @@ +From 0d5ff38a94dbd655bc86e0be262458ac71726ea4 Mon Sep 17 00:00:00 2001 +From: mengqinggang <mengqinggang@loongson.cn> +Date: Tue, 2 Apr 2024 09:57:20 +0800 +Subject: PATCH 159/188 LoongArch: Add support for TLS descriptors. + +Add support for TLS descriptors on normal code model and extreme +code model. + +Normal code model instruction sequence: + -mno-explicit-relocs: + la.tls.desc $r4, s + add.d $r12, $r4, $r2 + -mexplicit-relocs: + pcalau12i $r4,%desc_pc_hi20(s) + addi.d $r4,$r4,%desc_pc_lo12(s) + ld.d $r1,$r4,%desc_ld(s) + jirl $r1,$r1,%desc_call(s) + add.d $r12, $r4, $r2 + +Extreme code model instruction sequence: + -mno-explicit-relocs: + la.tls.desc $r4, $r12, s + add.d $r12, $r4, $r2 + -mexplicit-relocs: + pcalau12i $r4,%desc_pc_hi20(s) + addi.d $r12,$r0,%desc_pc_lo12(s) + lu32i.d $r12,%desc64_pc_lo20(s) + lu52i.d $r12,$r12,%desc64_pc_hi12(s) + add.d $r4,$r4,$r12 + ld.d $r1,$r4,%desc_ld(s) + jirl $r1,$r1,%desc_call(s) + add.d $r12, $r4, $r2 + +The default is still traditional TLS model, but can be configured with +--with-tls={trad,desc}. The default can change to TLS descriptors once +libc and LLVM support this. + +gcc/ChangeLog: + + * config.gcc: Add --with-tls option to change TLS flavor. + * config/loongarch/genopts/loongarch.opt.in: Add -mtls-dialect to + configure TLS flavor. + * config/loongarch/loongarch-def.h (struct loongarch_target): Add + tls_dialect. + * config/loongarch/loongarch-driver.cc (la_driver_init): Add tls + flavor. + * config/loongarch/loongarch-opts.cc (loongarch_init_target): Add + tls_dialect. + (loongarch_config_target): Ditto. + (loongarch_update_gcc_opt_status): Ditto. + * config/loongarch/loongarch-opts.h (loongarch_init_target): Ditto. + (TARGET_TLS_DESC): New define. + * config/loongarch/loongarch.cc (loongarch_symbol_insns): Add TLS + DESC instructions sequence length. + (loongarch_legitimize_tls_address): New TLS DESC instruction sequence. + (loongarch_option_override_internal): Add la_opt_tls_dialect. + (loongarch_option_restore): Add la_target.tls_dialect. + * config/loongarch/loongarch.md (@got_load_tls_desc<mode>): Normal + code model for TLS DESC. + (got_load_tls_desc_off64): Extreme cmode model for TLS DESC. + * config/loongarch/loongarch.opt: Regenerate. + * config/loongarch/loongarch.opt.urls: Ditto. + * doc/invoke.texi: Add a description of the compilation option + '-mtls-dialect={trad,desc}'. + +gcc/testsuite/ChangeLog: + + * gcc.target/loongarch/cmodel-extreme-1.c: Add -mtls-dialect=trad. + * gcc.target/loongarch/cmodel-extreme-2.c: Ditto. + * gcc.target/loongarch/explicit-relocs-auto-tls-ld-gd.c: Ditto. + * gcc.target/loongarch/explicit-relocs-medium-call36-auto-tls-ld-gd.c: + Ditto. + * gcc.target/loongarch/func-call-medium-1.c: Ditto. + * gcc.target/loongarch/func-call-medium-2.c: Ditto. + * gcc.target/loongarch/func-call-medium-3.c: Ditto. + * gcc.target/loongarch/func-call-medium-4.c: Ditto. + * gcc.target/loongarch/tls-extreme-macro.c: Ditto. + * gcc.target/loongarch/tls-gd-noplt.c: Ditto. + * gcc.target/loongarch/explicit-relocs-auto-extreme-tls-desc.c: New test. + * gcc.target/loongarch/explicit-relocs-auto-tls-desc.c: New test. + * gcc.target/loongarch/explicit-relocs-extreme-tls-desc.c: New test. + * gcc.target/loongarch/explicit-relocs-tls-desc.c: New test. + +Co-authored-by: Lulu Cheng <chenglulu@loongson.cn> +Co-authored-by: Xi Ruoyao <xry111@xry111.site> +--- + gcc/config.gcc | 19 +++++- + gcc/config/loongarch/genopts/loongarch.opt.in | 14 ++++ + gcc/config/loongarch/loongarch-def.h | 7 ++ + gcc/config/loongarch/loongarch-driver.cc | 2 +- + gcc/config/loongarch/loongarch-opts.cc | 12 +++- + gcc/config/loongarch/loongarch-opts.h | 3 + + gcc/config/loongarch/loongarch.cc | 45 ++++++++---- + gcc/config/loongarch/loongarch.md | 68 +++++++++++++++++++ + gcc/config/loongarch/loongarch.opt | 14 ++++ + gcc/doc/invoke.texi | 16 ++++- + .../gcc.target/loongarch/cmodel-extreme-1.c | 2 +- + .../gcc.target/loongarch/cmodel-extreme-2.c | 2 +- + .../explicit-relocs-auto-extreme-tls-desc.c | 10 +++ + .../loongarch/explicit-relocs-auto-tls-desc.c | 10 +++ + .../explicit-relocs-auto-tls-ld-gd.c | 2 +- + .../explicit-relocs-extreme-tls-desc.c | 16 +++++ + ...icit-relocs-medium-call36-auto-tls-ld-gd.c | 2 +- + .../loongarch/explicit-relocs-tls-desc.c | 13 ++++ + .../gcc.target/loongarch/func-call-medium-1.c | 2 +- + .../gcc.target/loongarch/func-call-medium-2.c | 2 +- + .../gcc.target/loongarch/func-call-medium-3.c | 2 +- + .../gcc.target/loongarch/func-call-medium-4.c | 2 +- + .../gcc.target/loongarch/tls-extreme-macro.c | 2 +- + .../gcc.target/loongarch/tls-gd-noplt.c | 2 +- + 24 files changed, 243 insertions(+), 26 deletions(-) + create mode 100644 gcc/testsuite/gcc.target/loongarch/explicit-relocs-auto-extreme-tls-desc.c + create mode 100644 gcc/testsuite/gcc.target/loongarch/explicit-relocs-auto-tls-desc.c + create mode 100644 gcc/testsuite/gcc.target/loongarch/explicit-relocs-extreme-tls-desc.c + create mode 100644 gcc/testsuite/gcc.target/loongarch/explicit-relocs-tls-desc.c + +diff --git a/gcc/config.gcc b/gcc/config.gcc +index 499b36b45..1db558d4c 100644 +--- a/gcc/config.gcc ++++ b/gcc/config.gcc +@@ -4982,7 +4982,7 @@ case "${target}" in + ;; + + loongarch*-*) +- supported_defaults="abi arch tune fpu simd multilib-default strict-align-lib" ++ supported_defaults="abi arch tune fpu simd multilib-default strict-align-lib tls" + + # Local variables + unset \ +@@ -5240,6 +5240,18 @@ case "${target}" in + with_multilib_list="${abi_base}/${abi_ext}" + fi + ++ # Handle --with-tls. ++ case "$with_tls" in ++ "" \ ++ | trad | desc) ++ # OK ++ ;; ++ *) ++ echo "Unknown TLS method used in --with-tls=$with_tls" 1>&2 ++ exit 1 ++ ;; ++ esac ++ + # Check if the configured default ABI combination is included in + # ${with_multilib_list}. + loongarch_multilib_list_sane=no +@@ -5875,6 +5887,11 @@ case ${target} in + lasx) tm_defines="$tm_defines DEFAULT_ISA_EXT_SIMD=ISA_EXT_SIMD_LASX" ;; + esac + ++ case ${with_tls} in ++ "" | trad) tm_defines="$tm_defines DEFAULT_TLS_TYPE=TLS_TRADITIONAL" ;; ++ desc) tm_defines="$tm_defines DEFAULT_TLS_TYPE=TLS_DESCRIPTORS" ;; ++ esac ++ + tmake_file="loongarch/t-loongarch $tmake_file" + ;; + +diff --git a/gcc/config/loongarch/genopts/loongarch.opt.in b/gcc/config/loongarch/genopts/loongarch.opt.in +index 9c6f59bb8..f3d53f03c 100644 +--- a/gcc/config/loongarch/genopts/loongarch.opt.in ++++ b/gcc/config/loongarch/genopts/loongarch.opt.in +@@ -245,6 +245,20 @@ mpass-mrelax-to-as + Driver Var(la_pass_mrelax_to_as) Init(HAVE_AS_MRELAX_OPTION) + Pass -mrelax or -mno-relax option to the assembler. + ++Enum ++Name(tls_type) Type(int) ++The possible TLS dialects: ++ ++EnumValue ++Enum(tls_type) String(trad) Value(TLS_TRADITIONAL) ++ ++EnumValue ++Enum(tls_type) String(desc) Value(TLS_DESCRIPTORS) ++ ++mtls-dialect= ++Target RejectNegative Joined Enum(tls_type) Var(la_opt_tls_dialect) Init(M_OPT_UNSET) Save ++Specify TLS dialect. ++ + -param=loongarch-vect-unroll-limit= + Target Joined UInteger Var(la_vect_unroll_limit) Init(6) IntegerRange(1, 64) Param + Used to limit unroll factor which indicates how much the autovectorizer may +diff --git a/gcc/config/loongarch/loongarch-def.h b/gcc/config/loongarch/loongarch-def.h +index b1423bcfe..2fe44da5a 100644 +--- a/gcc/config/loongarch/loongarch-def.h ++++ b/gcc/config/loongarch/loongarch-def.h +@@ -180,6 +180,7 @@ struct loongarch_target + int cpu_arch; /* CPU_ */ + int cpu_tune; /* same */ + int cmodel; /* CMODEL_ */ ++ int tls_dialect; /* TLS_ */ + }; + + /* CPU model */ +@@ -193,6 +194,12 @@ enum { + N_TUNE_TYPES = 5 + }; + ++/* TLS types. */ ++enum { ++ TLS_TRADITIONAL = 0, ++ TLS_DESCRIPTORS = 1 ++}; ++ + /* CPU model properties */ + extern loongarch_def_array<const char *, N_ARCH_TYPES> + loongarch_cpu_strings; +diff --git a/gcc/config/loongarch/loongarch-driver.cc b/gcc/config/loongarch/loongarch-driver.cc +index b84a6eaf7..8551cf94d 100644 +--- a/gcc/config/loongarch/loongarch-driver.cc ++++ b/gcc/config/loongarch/loongarch-driver.cc +@@ -45,7 +45,7 @@ la_driver_init (int argc ATTRIBUTE_UNUSED, const char **argv ATTRIBUTE_UNUSED) + /* Initialize all fields of la_target. */ + loongarch_init_target (&la_target, M_OPT_UNSET, M_OPT_UNSET, M_OPT_UNSET, + M_OPT_UNSET, M_OPT_UNSET, M_OPT_UNSET, M_OPT_UNSET, +- 0, 0); ++ M_OPT_UNSET, 0, 0); + return ""; + } + +diff --git a/gcc/config/loongarch/loongarch-opts.cc b/gcc/config/loongarch/loongarch-opts.cc +index 404642a9e..062d430c2 100644 +--- a/gcc/config/loongarch/loongarch-opts.cc ++++ b/gcc/config/loongarch/loongarch-opts.cc +@@ -144,6 +144,7 @@ void + loongarch_init_target (struct loongarch_target *target, + int cpu_arch, int cpu_tune, int fpu, int simd, + int abi_base, int abi_ext, int cmodel, ++ int tls_dialect, + HOST_WIDE_INT isa_evolution, + HOST_WIDE_INT isa_evolution_set) + { +@@ -158,6 +159,7 @@ loongarch_init_target (struct loongarch_target *target, + target->abi.base = abi_base; + target->abi.ext = abi_ext; + target->cmodel = cmodel; ++ target->tls_dialect = tls_dialect; + } + + +@@ -179,7 +181,8 @@ loongarch_config_target (struct loongarch_target *target, + obstack_init (&msg_obstack); + + struct { +- int arch, tune, fpu, simd, abi_base, abi_ext, cmodel, abi_flt; ++ int arch, tune, fpu, simd, abi_base, abi_ext, cmodel, ++ tls_dialect, abi_flt; + } constrained = { + M_OPT_ABSENT (target->cpu_arch) ? 0 : 1, + M_OPT_ABSENT (target->cpu_tune) ? 0 : 1, +@@ -188,6 +191,7 @@ loongarch_config_target (struct loongarch_target *target, + M_OPT_ABSENT (target->abi.base) ? 0 : 1, + M_OPT_ABSENT (target->abi.ext) ? 0 : 1, + M_OPT_ABSENT (target->cmodel) ? 0 : 1, ++ M_OPT_ABSENT (target->tls_dialect) ? 0 : 1, + M_OPT_ABSENT (target->abi.base) ? 0 : 1, + }; + +@@ -556,6 +560,9 @@ fallback: + gcc_unreachable (); + } + ++ t.tls_dialect = constrained.tls_dialect ? target->tls_dialect ++ : DEFAULT_TLS_TYPE; ++ + /* Cleanup and return. */ + obstack_free (&msg_obstack, NULL); + *target = t; +@@ -791,6 +798,9 @@ loongarch_update_gcc_opt_status (struct loongarch_target *target, + /* status of -mcmodel */ + opts->x_la_opt_cmodel = target->cmodel; + ++ /* status of -mtls-dialect */ ++ opts->x_la_opt_tls_dialect = target->tls_dialect; ++ + /* status of -mfpu */ + opts->x_la_opt_fpu = target->isa.fpu; + +diff --git a/gcc/config/loongarch/loongarch-opts.h b/gcc/config/loongarch/loongarch-opts.h +index 177d587da..a3b467f4c 100644 +--- a/gcc/config/loongarch/loongarch-opts.h ++++ b/gcc/config/loongarch/loongarch-opts.h +@@ -39,6 +39,7 @@ void + loongarch_init_target (struct loongarch_target *target, + int cpu_arch, int cpu_tune, int fpu, int simd, + int abi_base, int abi_ext, int cmodel, ++ int tls_dialect, + HOST_WIDE_INT isa_evolutions, + HOST_WIDE_INT isa_evolutions_set); + +@@ -105,6 +106,8 @@ struct loongarch_flags { + #define TARGET_64BIT (la_target.isa.base == ISA_BASE_LA64) + #define TARGET_ABI_LP64 ABI_LP64_P(la_target.abi.base) + ++#define TARGET_TLS_DESC (la_target.tls_dialect == TLS_DESCRIPTORS) ++ + #define ISA_HAS_LSX \ + (la_target.isa.simd == ISA_EXT_SIMD_LSX \ + || la_target.isa.simd == ISA_EXT_SIMD_LASX) +diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc +index c2f3739d0..e27335b3c 100644 +--- a/gcc/config/loongarch/loongarch.cc ++++ b/gcc/config/loongarch/loongarch.cc +@@ -2029,7 +2029,7 @@ loongarch_symbol_insns (enum loongarch_symbol_type type, machine_mode mode) + + case SYMBOL_TLSGD: + case SYMBOL_TLSLDM: +- return 3; ++ return TARGET_TLS_DESC ? 4 : 3; + + case SYMBOL_PCREL64: + return 5; +@@ -2930,24 +2930,43 @@ loongarch_call_tls_get_addr (rtx sym, enum loongarch_symbol_type type, rtx v0) + static rtx + loongarch_legitimize_tls_address (rtx loc) + { +- rtx dest, tp, tmp, tmp1, tmp2, tmp3; ++ rtx dest, tp, tmp, tmp1, tmp2, tmp3, a0; + enum tls_model model = SYMBOL_REF_TLS_MODEL (loc); + rtx_insn *insn; + + switch (model) + { + case TLS_MODEL_LOCAL_DYNAMIC: +- tmp = gen_rtx_REG (Pmode, GP_RETURN); +- dest = gen_reg_rtx (Pmode); +- insn = loongarch_call_tls_get_addr (loc, SYMBOL_TLSLDM, tmp); +- emit_libcall_block (insn, dest, tmp, loc); +- break; +- ++ if (!TARGET_TLS_DESC) ++ { ++ tmp = gen_rtx_REG (Pmode, GP_RETURN); ++ dest = gen_reg_rtx (Pmode); ++ insn = loongarch_call_tls_get_addr (loc, SYMBOL_TLSLDM, tmp); ++ emit_libcall_block (insn, dest, tmp, loc); ++ break; ++ } ++ /* Fall through. */ + case TLS_MODEL_GLOBAL_DYNAMIC: +- tmp = gen_rtx_REG (Pmode, GP_RETURN); +- dest = gen_reg_rtx (Pmode); +- insn = loongarch_call_tls_get_addr (loc, SYMBOL_TLSGD, tmp); +- emit_libcall_block (insn, dest, tmp, loc); ++ if (TARGET_TLS_DESC) ++ { ++ a0 = gen_rtx_REG (Pmode, GP_ARG_FIRST); ++ dest = gen_reg_rtx (Pmode); ++ tp = gen_rtx_REG (Pmode, THREAD_POINTER_REGNUM); ++ ++ if (TARGET_CMODEL_EXTREME) ++ emit_insn (gen_got_load_tls_desc_off64 (loc, gen_reg_rtx (DImode))); ++ else ++ emit_insn (gen_got_load_tls_desc (Pmode, loc)); ++ ++ emit_insn (gen_add3_insn (dest, a0, tp)); ++ } ++ else ++ { ++ tmp = gen_rtx_REG (Pmode, GP_RETURN); ++ dest = gen_reg_rtx (Pmode); ++ insn = loongarch_call_tls_get_addr (loc, SYMBOL_TLSGD, tmp); ++ emit_libcall_block (insn, dest, tmp, loc); ++ } + break; + + case TLS_MODEL_INITIAL_EXEC: +@@ -7651,6 +7670,7 @@ loongarch_option_override_internal (struct loongarch_target *target, + opts->x_la_opt_abi_base, + opts->x_la_opt_abi_ext, + opts->x_la_opt_cmodel, ++ opts->x_la_opt_tls_dialect, + opts->x_la_isa_evolution, + opts_set->x_la_isa_evolution); + +@@ -7697,6 +7717,7 @@ loongarch_option_restore (struct gcc_options *, + la_target.isa.evolution = ptr->x_la_isa_evolution; + + la_target.cmodel = ptr->x_la_opt_cmodel; ++ la_target.tls_dialect = ptr->x_la_opt_tls_dialect; + } + + /* Implement TARGET_CONDITIONAL_REGISTER_USAGE. */ +diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md +index 1b3525dde..95beb88fe 100644 +--- a/gcc/config/loongarch/loongarch.md ++++ b/gcc/config/loongarch/loongarch.md +@@ -52,6 +52,8 @@ + + ;; TLS + UNSPEC_TLS ++ UNSPEC_TLS_DESC ++ UNSPEC_TLS_DESC_OFF64 + + ;; Stack tie + UNSPEC_TIE +@@ -127,6 +129,15 @@ + (T1_REGNUM 13) + (S0_REGNUM 23) + ++ (FCC0_REGNUM 64) ++ (FCC1_REGNUM 65) ++ (FCC2_REGNUM 66) ++ (FCC3_REGNUM 67) ++ (FCC4_REGNUM 68) ++ (FCC5_REGNUM 69) ++ (FCC6_REGNUM 70) ++ (FCC7_REGNUM 71) ++ + ;; Return path styles + (NORMAL_RETURN 0) + (SIBCALL_RETURN 1) +@@ -2759,6 +2770,63 @@ + + ;; Thread-Local Storage + ++(define_insn "@got_load_tls_desc<mode>" ++ (set (reg:P 4) ++ (unspec:P ++ (match_operand:P 0 "symbolic_operand" "") ++ UNSPEC_TLS_DESC)) ++ (clobber (reg:SI FCC0_REGNUM)) ++ (clobber (reg:SI FCC1_REGNUM)) ++ (clobber (reg:SI FCC2_REGNUM)) ++ (clobber (reg:SI FCC3_REGNUM)) ++ (clobber (reg:SI FCC4_REGNUM)) ++ (clobber (reg:SI FCC5_REGNUM)) ++ (clobber (reg:SI FCC6_REGNUM)) ++ (clobber (reg:SI FCC7_REGNUM)) ++ (clobber (reg:SI RETURN_ADDR_REGNUM)) ++ "TARGET_TLS_DESC" ++{ ++ return TARGET_EXPLICIT_RELOCS ++ ? "pcalau12i\t$r4,%%desc_pc_hi20(%0)\n\t" ++ "addi.d\t$r4,$r4,%%desc_pc_lo12(%0)\n\t" ++ "ld.d\t$r1,$r4,%%desc_ld(%0)\n\t" ++ "jirl\t$r1,$r1,%%desc_call(%0)" ++ : "la.tls.desc\t$r4,%0"; ++} ++ (set_attr "got" "load") ++ (set_attr "mode" "<MODE>") ++ (set_attr "length" "16")) ++ ++(define_insn "got_load_tls_desc_off64" ++ (set (reg:DI 4) ++ (unspec:DI ++ (match_operand:DI 0 "symbolic_operand" "") ++ UNSPEC_TLS_DESC_OFF64)) ++ (clobber (reg:SI FCC0_REGNUM)) ++ (clobber (reg:SI FCC1_REGNUM)) ++ (clobber (reg:SI FCC2_REGNUM)) ++ (clobber (reg:SI FCC3_REGNUM)) ++ (clobber (reg:SI FCC4_REGNUM)) ++ (clobber (reg:SI FCC5_REGNUM)) ++ (clobber (reg:SI FCC6_REGNUM)) ++ (clobber (reg:SI FCC7_REGNUM)) ++ (clobber (reg:SI RETURN_ADDR_REGNUM)) ++ (clobber (match_operand:DI 1 "register_operand" "=&r")) ++ "TARGET_TLS_DESC && TARGET_CMODEL_EXTREME" ++{ ++ return TARGET_EXPLICIT_RELOCS ++ ? "pcalau12i\t$r4,%%desc_pc_hi20(%0)\n\t" ++ "addi.d\t%1,$r0,%%desc_pc_lo12(%0)\n\t" ++ "lu32i.d\t%1,%%desc64_pc_lo20(%0)\n\t" ++ "lu52i.d\t%1,%1,%%desc64_pc_hi12(%0)\n\t" ++ "add.d\t$r4,$r4,%1\n\t" ++ "ld.d\t$r1,$r4,%%desc_ld(%0)\n\t" ++ "jirl\t$r1,$r1,%%desc_call(%0)" ++ : "la.tls.desc\t$r4,%1,%0"; ++} ++ (set_attr "got" "load") ++ (set_attr "length" "28")) ++ + (define_insn "@load_tls<mode>" + (set (match_operand:P 0 "register_operand" "=r") + (unspec:P +diff --git a/gcc/config/loongarch/loongarch.opt b/gcc/config/loongarch/loongarch.opt +index ea848cd76..6f730d886 100644 +--- a/gcc/config/loongarch/loongarch.opt ++++ b/gcc/config/loongarch/loongarch.opt +@@ -253,6 +253,20 @@ mpass-mrelax-to-as + Driver Var(la_pass_mrelax_to_as) Init(HAVE_AS_MRELAX_OPTION) + Pass -mrelax or -mno-relax option to the assembler. + ++Enum ++Name(tls_type) Type(int) ++The possible TLS dialects: ++ ++EnumValue ++Enum(tls_type) String(trad) Value(TLS_TRADITIONAL) ++ ++EnumValue ++Enum(tls_type) String(desc) Value(TLS_DESCRIPTORS) ++ ++mtls-dialect= ++Target RejectNegative Joined Enum(tls_type) Var(la_opt_tls_dialect) Init(M_OPT_UNSET) Save ++Specify TLS dialect. ++ + -param=loongarch-vect-unroll-limit= + Target Joined UInteger Var(la_vect_unroll_limit) Init(6) IntegerRange(1, 64) Param + Used to limit unroll factor which indicates how much the autovectorizer may +diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi +index 7f24fe1e2..c9a1969ad 100644 +--- a/gcc/doc/invoke.texi ++++ b/gcc/doc/invoke.texi +@@ -1010,7 +1010,8 @@ Objective-C and Objective-C++ Dialects}. + -mdirect-extern-access -mno-direct-extern-access @gol + -mcmodel=@var{code-model} -mrelax -mpass-mrelax-to-as @gol + -mrecip -mrecip=@var{opt} -mfrecipe -mno-frecipe -mdiv32 -mno-div32 @gol +--mlam-bh -mno-lam-bh -mlamcas -mno-lamcas -mld-seq-sa -mno-ld-seq-sa} ++-mlam-bh -mno-lam-bh -mlamcas -mno-lamcas -mld-seq-sa -mno-ld-seq-sa @gol ++-mtls-dialect=@var{opt}} + + @emph{M32R/D Options} + @gccoptlist{-m32r2 -m32rx -m32r @gol +@@ -24727,6 +24728,19 @@ Whether a load-load barrier (@code{dbar 0x700}) is needed. When build with + @option{-march=la664}, it is enabled by default. The default is + @option{-mno-ld-seq-sa}, the load-load barrier is needed. + ++@opindex mtls-dialect ++@item -mtls-dialect=@var{opt} ++This option controls which tls dialect may be used for general dynamic and ++local dynamic TLS models. ++ ++@table @samp ++@item trad ++Use traditional TLS. This is the default. ++ ++@item desc ++Use TLS descriptors. ++@end table ++ + @item loongarch-vect-unroll-limit + The vectorizer will use available tuning information to determine whether it + would be beneficial to unroll the main vectorized loop and by how much. This +diff --git a/gcc/testsuite/gcc.target/loongarch/cmodel-extreme-1.c b/gcc/testsuite/gcc.target/loongarch/cmodel-extreme-1.c +index 564ee4017..6269607e7 100644 +--- a/gcc/testsuite/gcc.target/loongarch/cmodel-extreme-1.c ++++ b/gcc/testsuite/gcc.target/loongarch/cmodel-extreme-1.c +@@ -1,5 +1,5 @@ + /* { dg-do compile } */ +-/* { dg-options "-march=loongarch64 -mabi=lp64d -O2 -mcmodel=extreme -fno-plt -mexplicit-relocs=always -fdump-rtl-final" } */ ++/* { dg-options "-march=loongarch64 -mabi=lp64d -O2 -mcmodel=extreme -mtls-dialect=trad -fno-plt -mexplicit-relocs=always -fdump-rtl-final" } */ + + int a; + extern int b; +diff --git a/gcc/testsuite/gcc.target/loongarch/cmodel-extreme-2.c b/gcc/testsuite/gcc.target/loongarch/cmodel-extreme-2.c +index ce834805f..35f6ee0bb 100644 +--- a/gcc/testsuite/gcc.target/loongarch/cmodel-extreme-2.c ++++ b/gcc/testsuite/gcc.target/loongarch/cmodel-extreme-2.c +@@ -1,5 +1,5 @@ + /* { dg-do compile } */ +-/* { dg-options "-march=loongarch64 -mabi=lp64d -O2 -mcmodel=extreme -fno-plt -mexplicit-relocs=auto -fdump-rtl-final" } */ ++/* { dg-options "-march=loongarch64 -mabi=lp64d -O2 -mcmodel=extreme -mtls-dialect=trad -fno-plt -mexplicit-relocs=auto -fdump-rtl-final" } */ + + #include "cmodel-extreme-1.c" + +diff --git a/gcc/testsuite/gcc.target/loongarch/explicit-relocs-auto-extreme-tls-desc.c b/gcc/testsuite/gcc.target/loongarch/explicit-relocs-auto-extreme-tls-desc.c +new file mode 100644 +index 000000000..0fc7a1a51 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/explicit-relocs-auto-extreme-tls-desc.c +@@ -0,0 +1,10 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -fPIC -mcmodel=extreme -mexplicit-relocs=auto -mtls-dialect=desc" } */ ++ ++__thread int a __attribute__((visibility("hidden"))); ++extern __thread int b __attribute__((visibility("default"))); ++ ++int test() { return a + b; } ++ ++/* { dg-final { scan-assembler "la\\.tls\\.desc\t\\\$r4,\\\$r12,\\.LANCHOR0" { target tls_native } } } */ ++/* { dg-final { scan-assembler "la\\.tls\\.desc\t\\\$r4,\\\$r12,\\.LANCHOR0" { target tls_native } } } */ +diff --git a/gcc/testsuite/gcc.target/loongarch/explicit-relocs-auto-tls-desc.c b/gcc/testsuite/gcc.target/loongarch/explicit-relocs-auto-tls-desc.c +new file mode 100644 +index 000000000..37947ecfd +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/explicit-relocs-auto-tls-desc.c +@@ -0,0 +1,10 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -fPIC -mexplicit-relocs=auto -mtls-dialect=desc" } */ ++ ++__thread int a __attribute__((visibility("hidden"))); ++extern __thread int b __attribute__((visibility("default"))); ++ ++int test() { return a + b; } ++ ++/* { dg-final { scan-assembler "la\\.tls\\.desc\t\\\$r4,\\.LANCHOR0" { target tls_native } } } */ ++/* { dg-final { scan-assembler "la\\.tls\\.desc\t\\\$r4,\\.LANCHOR0" { target tls_native } } } */ +diff --git a/gcc/testsuite/gcc.target/loongarch/explicit-relocs-auto-tls-ld-gd.c b/gcc/testsuite/gcc.target/loongarch/explicit-relocs-auto-tls-ld-gd.c +index ca55fcfc5..b47e37c82 100644 +--- a/gcc/testsuite/gcc.target/loongarch/explicit-relocs-auto-tls-ld-gd.c ++++ b/gcc/testsuite/gcc.target/loongarch/explicit-relocs-auto-tls-ld-gd.c +@@ -1,5 +1,5 @@ + /* { dg-do compile } */ +-/* { dg-options "-O2 -fPIC -mexplicit-relocs=auto" } */ ++/* { dg-options "-O2 -fPIC -mexplicit-relocs=auto -mtls-dialect=trad" } */ + + __thread int a __attribute__((visibility("hidden"))); + extern __thread int b __attribute__((visibility("default"))); +diff --git a/gcc/testsuite/gcc.target/loongarch/explicit-relocs-extreme-tls-desc.c b/gcc/testsuite/gcc.target/loongarch/explicit-relocs-extreme-tls-desc.c +new file mode 100644 +index 000000000..3797556e1 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/explicit-relocs-extreme-tls-desc.c +@@ -0,0 +1,16 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -fPIC -mexplicit-relocs -mtls-dialect=desc -mcmodel=extreme" } */ ++ ++__thread int a __attribute__((visibility("hidden"))); ++extern __thread int b __attribute__((visibility("default"))); ++ ++int test() { return a + b; } ++ ++/* { dg-final { scan-assembler "pcalau12i\t\\\$r4,%desc_pc_hi20\\\(\\.LANCHOR0\\\)" { target tls_native } } } */ ++/* { dg-final { scan-assembler "addi.d\t\\\$r12,\\\$r0,%desc_pc_lo12\\\(\\.LANCHOR0\\\)" { target tls_native } } } */ ++/* { dg-final { scan-assembler "lu32i.d\t\\\$r12,%desc64_pc_lo20\\\(\\.LANCHOR0\\\)" { target tls_native } } } */ ++/* { dg-final { scan-assembler "lu52i.d\t\\\$r12,\\\$r12,%desc64_pc_hi12\\\(\\.LANCHOR0\\\)" { target tls_native } } } */ ++/* { dg-final { scan-assembler "add.d\t\\\$r4,\\\$r4,\\\$r12" { target tls_native } } } */ ++/* { dg-final { scan-assembler "ld.d\t\\\$r1,\\\$r4,%desc_ld\\\(\\.LANCHOR0\\\)" { target tls_native } } } */ ++/* { dg-final { scan-assembler "jirl\t\\\$r1,\\\$r1,%desc_call\\\(\\.LANCHOR0\\\)" { target tls_native } } } */ ++/* { dg-final { scan-assembler "add.d\t\\\$r12,\\\$r4,\\\$r2" { target tls_native } } } */ +diff --git a/gcc/testsuite/gcc.target/loongarch/explicit-relocs-medium-call36-auto-tls-ld-gd.c b/gcc/testsuite/gcc.target/loongarch/explicit-relocs-medium-call36-auto-tls-ld-gd.c +index d1a482083..cfb855323 100644 +--- a/gcc/testsuite/gcc.target/loongarch/explicit-relocs-medium-call36-auto-tls-ld-gd.c ++++ b/gcc/testsuite/gcc.target/loongarch/explicit-relocs-medium-call36-auto-tls-ld-gd.c +@@ -1,5 +1,5 @@ + /* { dg-do compile } */ +-/* { dg-options "-O2 -fPIC -mexplicit-relocs=auto -mcmodel=medium -fplt" } */ ++/* { dg-options "-O2 -fPIC -mexplicit-relocs=auto -mtls-dialect=trad -mcmodel=medium -fplt" } */ + /* { dg-final { scan-assembler "pcaddu18i\t\\\$r1,%call36\\\(__tls_get_addr\\\)" { target { tls_native && loongarch_call36_support } } } } */ + + #include "./explicit-relocs-auto-tls-ld-gd.c" +diff --git a/gcc/testsuite/gcc.target/loongarch/explicit-relocs-tls-desc.c b/gcc/testsuite/gcc.target/loongarch/explicit-relocs-tls-desc.c +new file mode 100644 +index 000000000..f66903091 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/explicit-relocs-tls-desc.c +@@ -0,0 +1,13 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -fPIC -mexplicit-relocs -mtls-dialect=desc" } */ ++ ++__thread int a __attribute__((visibility("hidden"))); ++extern __thread int b __attribute__((visibility("default"))); ++ ++int test() { return a + b; } ++ ++/* { dg-final { scan-assembler "pcalau12i\t\\\$r4,%desc_pc_hi20\\\(\\.LANCHOR0\\\)" { target tls_native } } } */ ++/* { dg-final { scan-assembler "addi.d\t\\\$r4,\\\$r4,%desc_pc_lo12\\\(\\.LANCHOR0\\\)" { target tls_native } } } */ ++/* { dg-final { scan-assembler "ld.d\t\\\$r1,\\\$r4,%desc_ld\\\(\\.LANCHOR0\\\)" { target tls_native } } } */ ++/* { dg-final { scan-assembler "jirl\t\\\$r1,\\\$r1,%desc_call\\\(\\.LANCHOR0\\\)" { target tls_native } } } */ ++/* { dg-final { scan-assembler "add.d\t\\\$r12,\\\$r4,\\\$r2" { target tls_native } } } */ +diff --git a/gcc/testsuite/gcc.target/loongarch/func-call-medium-1.c b/gcc/testsuite/gcc.target/loongarch/func-call-medium-1.c +index 6339e832f..5e81df552 100644 +--- a/gcc/testsuite/gcc.target/loongarch/func-call-medium-1.c ++++ b/gcc/testsuite/gcc.target/loongarch/func-call-medium-1.c +@@ -1,5 +1,5 @@ + /* { dg-do compile } */ +-/* { dg-options "-mabi=lp64d -O0 -fpic -fplt -mno-explicit-relocs -mcmodel=medium" } */ ++/* { dg-options "-mabi=lp64d -O0 -fpic -fplt -mno-explicit-relocs -mtls-dialect=trad -mcmodel=medium" } */ + /* { dg-final { scan-assembler "test:.*la\.global\t.*g\n\tjirl" } } */ + /* { dg-final { scan-assembler "test1:.*la\.global\t.*f\n\tjirl" } } */ + /* { dg-final { scan-assembler "test2:.*la\.local\t.*l\n\tjirl" } } */ +diff --git a/gcc/testsuite/gcc.target/loongarch/func-call-medium-2.c b/gcc/testsuite/gcc.target/loongarch/func-call-medium-2.c +index a53e75e0b..d73df2dd8 100644 +--- a/gcc/testsuite/gcc.target/loongarch/func-call-medium-2.c ++++ b/gcc/testsuite/gcc.target/loongarch/func-call-medium-2.c +@@ -1,5 +1,5 @@ + /* { dg-do compile } */ +-/* { dg-options "-mabi=lp64d -O0 -fno-pic -fplt -mno-explicit-relocs -mcmodel=medium" } */ ++/* { dg-options "-mabi=lp64d -O0 -fno-pic -fplt -mno-explicit-relocs -mtls-dialect=trad -mcmodel=medium" } */ + /* { dg-final { scan-assembler "test:.*la\.global\t.*g\n\tjirl" } } */ + /* { dg-final { scan-assembler "test1:.*la\.local\t.*f\n\tjirl" } } */ + /* { dg-final { scan-assembler "test2:.*la\.local\t.*l\n\tjirl" } } */ +diff --git a/gcc/testsuite/gcc.target/loongarch/func-call-medium-3.c b/gcc/testsuite/gcc.target/loongarch/func-call-medium-3.c +index 0da7bf98e..88a667450 100644 +--- a/gcc/testsuite/gcc.target/loongarch/func-call-medium-3.c ++++ b/gcc/testsuite/gcc.target/loongarch/func-call-medium-3.c +@@ -1,5 +1,5 @@ + /* { dg-do compile } */ +-/* { dg-options "-mabi=lp64d -O0 -fpic -fno-plt -mno-explicit-relocs -mcmodel=medium" } */ ++/* { dg-options "-mabi=lp64d -O0 -fpic -fno-plt -mno-explicit-relocs -mtls-dialect=trad -mcmodel=medium" } */ + /* { dg-final { scan-assembler "test:.*la\.global\t.*g\n\tjirl" } } */ + /* { dg-final { scan-assembler "test1:.*la\.global\t.*f\n\tjirl" } } */ + /* { dg-final { scan-assembler "test2:.*la\.local\t.*l\n\tjirl" } } */ +diff --git a/gcc/testsuite/gcc.target/loongarch/func-call-medium-4.c b/gcc/testsuite/gcc.target/loongarch/func-call-medium-4.c +index 0219688ae..f9dc12fea 100644 +--- a/gcc/testsuite/gcc.target/loongarch/func-call-medium-4.c ++++ b/gcc/testsuite/gcc.target/loongarch/func-call-medium-4.c +@@ -1,5 +1,5 @@ + /* { dg-do compile } */ +-/* { dg-options "-mabi=lp64d -O0 -fno-pic -fno-plt -mno-explicit-relocs -mcmodel=medium" } */ ++/* { dg-options "-mabi=lp64d -O0 -fno-pic -fno-plt -mno-explicit-relocs -mtls-dialect=trad -mcmodel=medium" } */ + /* { dg-final { scan-assembler "test:.*la\.global\t.*g\n\tjirl" } } */ + /* { dg-final { scan-assembler "test1:.*la\.local\t.*f\n\tjirl" } } */ + /* { dg-final { scan-assembler "test2:.*la\.local\t.*l\n\tjirl" } } */ +diff --git a/gcc/testsuite/gcc.target/loongarch/tls-extreme-macro.c b/gcc/testsuite/gcc.target/loongarch/tls-extreme-macro.c +index 4341f8212..4adda4202 100644 +--- a/gcc/testsuite/gcc.target/loongarch/tls-extreme-macro.c ++++ b/gcc/testsuite/gcc.target/loongarch/tls-extreme-macro.c +@@ -1,5 +1,5 @@ + /* { dg-do compile } */ +-/* { dg-options "-march=loongarch64 -mabi=lp64d -O2 -mcmodel=extreme -fno-plt -mexplicit-relocs=none" } */ ++/* { dg-options "-march=loongarch64 -mabi=lp64d -O2 -mcmodel=extreme -mtls-dialect=trad -fno-plt -mexplicit-relocs=none" } */ + /* { dg-final { scan-assembler "test_le:.*la.tls.le\t\\\$r\0-9\+,\\\.L" { target tls_native } } } */ + /* { dg-final { scan-assembler "test_ie:.*la.tls.ie\t\\\$r\0-9\+,\\\$r\0-9\+,\\\.L" { target tls_native } } } */ + /* { dg-final { scan-assembler "test_ld:.*la.tls.ld\t\\\$r\0-9\+,\\\$r\0-9\+,\\\.L.*la.global\t\\\$r\0-9\+,\\\$r\0-9\+,__tls_get_addr" { target tls_native } } } */ +diff --git a/gcc/testsuite/gcc.target/loongarch/tls-gd-noplt.c b/gcc/testsuite/gcc.target/loongarch/tls-gd-noplt.c +index 9432c477e..dfa1bf53c 100644 +--- a/gcc/testsuite/gcc.target/loongarch/tls-gd-noplt.c ++++ b/gcc/testsuite/gcc.target/loongarch/tls-gd-noplt.c +@@ -1,5 +1,5 @@ + /* { dg-do compile } */ +-/* { dg-options "-O0 -fno-plt -mcmodel=normal -mexplicit-relocs" } */ ++/* { dg-options "-O0 -fno-plt -mcmodel=normal -mtls-dialect=trad -mexplicit-relocs" } */ + /* { dg-final { scan-assembler "pcalau12i\t.*%got_pc_hi20\\(__tls_get_addr\\)\n\tld\.d.*%got_pc_lo12\\(__tls_get_addr\\)" { target tls_native } } } */ + + __attribute__ ((tls_model ("global-dynamic"))) __thread int a; +-- +2.43.0 +
View file
_service:tar_scm:0160-Backport-SME-AArch64-Cleanup-move-immediate-code.patch
Added
@@ -0,0 +1,410 @@ +From d76be4acadc0641cc8e795cd6b8a1c3c83b4fdb2 Mon Sep 17 00:00:00 2001 +From: Wilco Dijkstra <wilco.dijkstra@arm.com> +Date: Mon, 5 Dec 2022 10:49:25 +0000 +Subject: PATCH 061/157 BackportSME AArch64: Cleanup move immediate code + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=ba1536dac780f3f92c5eab999fda6931f6247fc1 + +Simplify, refactor and improve various move immediate functions. +Allow 32-bit MOVN/I as a valid 64-bit immediate which removes special +cases in aarch64_internal_mov_immediate. Add new constraint so the movdi +pattern only needs a single alternative for move immediate. + +gcc/ + * config/aarch64/aarch64.cc (aarch64_bitmask_imm): Use unsigned type. + (aarch64_is_mov_xn_imm): New function. + (aarch64_move_imm): Refactor, assert mode is SImode or DImode. + (aarch64_internal_mov_immediate): Assert mode is SImode or DImode. + Simplify special cases. + (aarch64_uimm12_shift): Simplify code. + (aarch64_clamp_to_uimm12_shift): Likewise. + (aarch64_movw_imm): Rename to aarch64_is_movz. + (aarch64_float_const_rtx_p): Pass either SImode or DImode to + aarch64_internal_mov_immediate. + (aarch64_rtx_costs): Likewise. + * config/aarch64/aarch64.md (movdi_aarch64): Merge 'N' and 'M' + constraints into single 'O'. + (mov<mode>_aarch64): Likewise. + * config/aarch64/aarch64-protos.h (aarch64_move_imm): Use unsigned. + (aarch64_bitmask_imm): Likewise. + (aarch64_uimm12_shift): Likewise. + (aarch64_is_mov_xn_imm): New prototype. + * config/aarch64/constraints.md: Add 'O' for 32/64-bit immediates, + limit 'N' to 64-bit only moves. +--- + gcc/config/aarch64/aarch64-protos.h | 7 +- + gcc/config/aarch64/aarch64.cc | 158 ++++++++++++---------------- + gcc/config/aarch64/aarch64.md | 17 ++- + gcc/config/aarch64/constraints.md | 5 + + 4 files changed, 85 insertions(+), 102 deletions(-) + +diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h +index 97984f3ab..3ff1a0163 100644 +--- a/gcc/config/aarch64/aarch64-protos.h ++++ b/gcc/config/aarch64/aarch64-protos.h +@@ -755,7 +755,7 @@ void aarch64_post_cfi_startproc (void); + poly_int64 aarch64_initial_elimination_offset (unsigned, unsigned); + int aarch64_get_condition_code (rtx); + bool aarch64_address_valid_for_prefetch_p (rtx, bool); +-bool aarch64_bitmask_imm (HOST_WIDE_INT val, machine_mode); ++bool aarch64_bitmask_imm (unsigned HOST_WIDE_INT val, machine_mode); + unsigned HOST_WIDE_INT aarch64_and_split_imm1 (HOST_WIDE_INT val_in); + unsigned HOST_WIDE_INT aarch64_and_split_imm2 (HOST_WIDE_INT val_in); + bool aarch64_and_bitmask_imm (unsigned HOST_WIDE_INT val_in, machine_mode mode); +@@ -793,7 +793,7 @@ bool aarch64_masks_and_shift_for_bfi_p (scalar_int_mode, unsigned HOST_WIDE_INT, + unsigned HOST_WIDE_INT, + unsigned HOST_WIDE_INT); + bool aarch64_zero_extend_const_eq (machine_mode, rtx, machine_mode, rtx); +-bool aarch64_move_imm (HOST_WIDE_INT, machine_mode); ++bool aarch64_move_imm (unsigned HOST_WIDE_INT, machine_mode); + machine_mode aarch64_sve_int_mode (machine_mode); + opt_machine_mode aarch64_sve_pred_mode (unsigned int); + machine_mode aarch64_sve_pred_mode (machine_mode); +@@ -843,8 +843,9 @@ bool aarch64_sve_float_arith_immediate_p (rtx, bool); + bool aarch64_sve_float_mul_immediate_p (rtx); + bool aarch64_split_dimode_const_store (rtx, rtx); + bool aarch64_symbolic_address_p (rtx); +-bool aarch64_uimm12_shift (HOST_WIDE_INT); ++bool aarch64_uimm12_shift (unsigned HOST_WIDE_INT); + int aarch64_movk_shift (const wide_int_ref &, const wide_int_ref &); ++bool aarch64_is_mov_xn_imm (unsigned HOST_WIDE_INT); + bool aarch64_use_return_insn_p (void); + const char *aarch64_output_casesi (rtx *); + +diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc +index cf7736994..acb659f53 100644 +--- a/gcc/config/aarch64/aarch64.cc ++++ b/gcc/config/aarch64/aarch64.cc +@@ -5812,12 +5812,10 @@ aarch64_bitmask_imm (unsigned HOST_WIDE_INT val) + + /* Return true if VAL is a valid bitmask immediate for MODE. */ + bool +-aarch64_bitmask_imm (HOST_WIDE_INT val_in, machine_mode mode) ++aarch64_bitmask_imm (unsigned HOST_WIDE_INT val, machine_mode mode) + { + if (mode == DImode) +- return aarch64_bitmask_imm (val_in); +- +- unsigned HOST_WIDE_INT val = val_in; ++ return aarch64_bitmask_imm (val); + + if (mode == SImode) + return aarch64_bitmask_imm ((val & 0xffffffff) | (val << 32)); +@@ -5856,51 +5854,55 @@ aarch64_check_bitmask (unsigned HOST_WIDE_INT val, + } + + +-/* Return true if val is an immediate that can be loaded into a +- register by a MOVZ instruction. */ +-static bool +-aarch64_movw_imm (HOST_WIDE_INT val, scalar_int_mode mode) ++/* Return true if VAL is a valid MOVZ immediate. */ ++static inline bool ++aarch64_is_movz (unsigned HOST_WIDE_INT val) + { +- if (GET_MODE_SIZE (mode) > 4) +- { +- if ((val & (((HOST_WIDE_INT) 0xffff) << 32)) == val +- || (val & (((HOST_WIDE_INT) 0xffff) << 48)) == val) +- return 1; +- } +- else +- { +- /* Ignore sign extension. */ +- val &= (HOST_WIDE_INT) 0xffffffff; +- } +- return ((val & (((HOST_WIDE_INT) 0xffff) << 0)) == val +- || (val & (((HOST_WIDE_INT) 0xffff) << 16)) == val); ++ return (val >> (ctz_hwi (val) & 48)) < 65536; + } + + +-/* Return true if VAL is an immediate that can be loaded into a +- register in a single instruction. */ ++/* Return true if immediate VAL can be created by a 64-bit MOVI/MOVN/MOVZ. */ + bool +-aarch64_move_imm (HOST_WIDE_INT val, machine_mode mode) ++aarch64_is_mov_xn_imm (unsigned HOST_WIDE_INT val) + { +- scalar_int_mode int_mode; +- if (!is_a <scalar_int_mode> (mode, &int_mode)) +- return false; ++ return aarch64_is_movz (val) || aarch64_is_movz (~val) ++ || aarch64_bitmask_imm (val); ++} + +- if (aarch64_movw_imm (val, int_mode) || aarch64_movw_imm (~val, int_mode)) +- return 1; +- return aarch64_bitmask_imm (val, int_mode); ++ ++/* Return true if VAL is an immediate that can be created by a single ++ MOV instruction. */ ++bool ++aarch64_move_imm (unsigned HOST_WIDE_INT val, machine_mode mode) ++{ ++ gcc_assert (mode == SImode || mode == DImode); ++ ++ if (val < 65536) ++ return true; ++ ++ unsigned HOST_WIDE_INT mask = ++ (val >> 32) == 0 || mode == SImode ? 0xffffffff : HOST_WIDE_INT_M1U; ++ ++ if (aarch64_is_movz (val & mask) || aarch64_is_movz (~val & mask)) ++ return true; ++ ++ val = (val & mask) | ((val << 32) & ~mask); ++ return aarch64_bitmask_imm (val); + } + + + static int + aarch64_internal_mov_immediate (rtx dest, rtx imm, bool generate, +- scalar_int_mode mode) ++ machine_mode mode) + { + int i; + unsigned HOST_WIDE_INT val, val2, mask; + int one_match, zero_match; + int num_insns; + ++ gcc_assert (mode == SImode || mode == DImode); ++ + val = INTVAL (imm); + + if (aarch64_move_imm (val, mode)) +@@ -5910,31 +5912,6 @@ aarch64_internal_mov_immediate (rtx dest, rtx imm, bool generate, + return 1; + } + +- /* Check to see if the low 32 bits are either 0xffffXXXX or 0xXXXXffff +- (with XXXX non-zero). In that case check to see if the move can be done in +- a smaller mode. */ +- val2 = val & 0xffffffff; +- if (mode == DImode +- && aarch64_move_imm (val2, SImode) +- && (((val >> 32) & 0xffff) == 0 || (val >> 48) == 0)) +- { +- if (generate) +- emit_insn (gen_rtx_SET (dest, GEN_INT (val2))); +- +- /* Check if we have to emit a second instruction by checking to see +- if any of the upper 32 bits of the original DI mode value is set. */ +- if (val == val2) +- return 1; +- +- i = (val >> 48) ? 48 : 32; +- +- if (generate) +- emit_insn (gen_insv_immdi (dest, GEN_INT (i), +- GEN_INT ((val >> i) & 0xffff))); +- +- return 2; +- } +- + if ((val >> 32) == 0 || mode == SImode) + { + if (generate) +@@ -5958,24 +5935,31 @@ aarch64_internal_mov_immediate (rtx dest, rtx imm, bool generate, + one_match = ((~val & mask) == 0) + ((~val & (mask << 16)) == 0) + + ((~val & (mask << 32)) == 0) + ((~val & (mask << 48)) == 0); + ++ /* Try a bitmask immediate and a movk to generate the immediate ++ in 2 instructions. */ ++ + if (zero_match < 2 && one_match < 2) + { +- /* Try emitting a bitmask immediate with a movk replacing 16 bits. +- For a 64-bit bitmask try whether changing 16 bits to all ones or +- zeroes creates a valid bitmask. To check any repeated bitmask, +- try using 16 bits from the other 32-bit half of val. */ +- + for (i = 0; i < 64; i += 16) +- if (aarch64_check_bitmask (val, val2, mask << i)) +- { +- if (generate) +- { +- emit_insn (gen_rtx_SET (dest, GEN_INT (val2))); +- emit_insn (gen_insv_immdi (dest, GEN_INT (i), +- GEN_INT ((val >> i) & 0xffff))); +- } +- return 2; +- } ++ { ++ if (aarch64_check_bitmask (val, val2, mask << i)) ++ break; ++ ++ val2 = val & ~(mask << i); ++ if ((val2 >> 32) == 0 && aarch64_move_imm (val2, DImode)) ++ break; ++ } ++ ++ if (i != 64) ++ { ++ if (generate) ++ { ++ emit_insn (gen_rtx_SET (dest, GEN_INT (val2))); ++ emit_insn (gen_insv_immdi (dest, GEN_INT (i), ++ GEN_INT ((val >> i) & 0xffff))); ++ } ++ return 2; ++ } + } + + /* Try a bitmask plus 2 movk to generate the immediate in 3 instructions. */ +@@ -6044,26 +6028,24 @@ aarch64_mov128_immediate (rtx imm) + /* Return true if val can be encoded as a 12-bit unsigned immediate with + a left shift of 0 or 12 bits. */ + bool +-aarch64_uimm12_shift (HOST_WIDE_INT val) ++aarch64_uimm12_shift (unsigned HOST_WIDE_INT val) + { +- return ((val & (((HOST_WIDE_INT) 0xfff) << 0)) == val +- || (val & (((HOST_WIDE_INT) 0xfff) << 12)) == val +- ); ++ return val < 4096 || (val & 0xfff000) == val; + } + + /* Returns the nearest value to VAL that will fit as a 12-bit unsigned immediate + that can be created with a left shift of 0 or 12. */ + static HOST_WIDE_INT +-aarch64_clamp_to_uimm12_shift (HOST_WIDE_INT val) ++aarch64_clamp_to_uimm12_shift (unsigned HOST_WIDE_INT val) + { + /* Check to see if the value fits in 24 bits, as that is the maximum we can + handle correctly. */ +- gcc_assert ((val & 0xffffff) == val); ++ gcc_assert (val < 0x1000000); + +- if (((val & 0xfff) << 0) == val) ++ if (val < 4096) + return val; + +- return val & (0xfff << 12); ++ return val & 0xfff000; + } + + +@@ -7211,8 +7193,7 @@ aarch64_expand_mov_immediate (rtx dest, rtx imm) + return; + } + +- aarch64_internal_mov_immediate (dest, imm, true, +- as_a <scalar_int_mode> (mode)); ++ aarch64_internal_mov_immediate (dest, imm, true, mode); + } + + /* Return the MEM rtx that provides the canary value that should be used +@@ -11410,9 +11391,7 @@ aarch64_float_const_rtx_p (rtx x) + && SCALAR_FLOAT_MODE_P (mode) + && aarch64_reinterpret_float_as_int (x, &ival)) + { +- scalar_int_mode imode = (mode == HFmode +- ? SImode +- : int_mode_for_mode (mode).require ()); ++ machine_mode imode = known_eq (GET_MODE_SIZE (mode), 8) ? DImode : SImode; + int num_instr = aarch64_internal_mov_immediate + (NULL_RTX, gen_int_mode (ival, imode), false, imode); + return num_instr < 3; +@@ -14049,10 +14028,10 @@ aarch64_rtx_costs (rtx x, machine_mode mode, int outer ATTRIBUTE_UNUSED, + proportionally expensive to the number of instructions + required to build that constant. This is true whether we + are compiling for SPEED or otherwise. */ +- if (!is_a <scalar_int_mode> (mode, &int_mode)) +- int_mode = word_mode; ++ machine_mode imode = known_le (GET_MODE_SIZE (mode), 4) ++ ? SImode : DImode; + *cost = COSTS_N_INSNS (aarch64_internal_mov_immediate +- (NULL_RTX, x, false, int_mode)); ++ (NULL_RTX, x, false, imode)); + } + return true; + +@@ -14068,9 +14047,8 @@ aarch64_rtx_costs (rtx x, machine_mode mode, int outer ATTRIBUTE_UNUSED, + bool succeed = aarch64_reinterpret_float_as_int (x, &ival); + gcc_assert (succeed); + +- scalar_int_mode imode = (mode == HFmode +- ? SImode +- : int_mode_for_mode (mode).require ()); ++ machine_mode imode = known_eq (GET_MODE_SIZE (mode), 8) ++ ? DImode : SImode; + int ncost = aarch64_internal_mov_immediate + (NULL_RTX, gen_int_mode (ival, imode), false, imode); + *cost += COSTS_N_INSNS (ncost); +diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md +index 7454a5c77..ea94152bf 100644 +--- a/gcc/config/aarch64/aarch64.md ++++ b/gcc/config/aarch64/aarch64.md +@@ -1288,16 +1288,15 @@ + ) + + (define_insn_and_split "*movdi_aarch64" +- (set (match_operand:DI 0 "nonimmediate_operand" "=r,k,r,r,r,r,r, r,w, m,m, r, r, r, w,r,w, w") +- (match_operand:DI 1 "aarch64_mov_operand" " r,r,k,N,M,n,Usv,m,m,rZ,w,Usw,Usa,Ush,rZ,w,w,Dd")) ++ (set (match_operand:DI 0 "nonimmediate_operand" "=r,k,r,r,r,r, r,w, m,m, r, r, r, w,r,w, w") ++ (match_operand:DI 1 "aarch64_mov_operand" " r,r,k,O,n,Usv,m,m,rZ,w,Usw,Usa,Ush,rZ,w,w,Dd")) + "(register_operand (operands0, DImode) + || aarch64_reg_or_zero (operands1, DImode))" + "@ + mov\\t%x0, %x1 + mov\\t%0, %x1 + mov\\t%x0, %1 +- mov\\t%x0, %1 +- mov\\t%w0, %1 ++ * return aarch64_is_mov_xn_imm (INTVAL (operands1)) ? \"mov\\t%x0, %1\" : \"mov\\t%w0, %1\"; + # + * return aarch64_output_sve_cnt_immediate (\"cnt\", \"%x0\", operands1); + ldr\\t%x0, %1 +@@ -1319,11 +1318,11 @@ + DONE; + }" + ;; The "mov_imm" type for CNTD is just a placeholder. +- (set_attr "type" "mov_reg,mov_reg,mov_reg,mov_imm,mov_imm,mov_imm,mov_imm, ++ (set_attr "type" "mov_reg,mov_reg,mov_reg,mov_imm,mov_imm,mov_imm, + load_8,load_8,store_8,store_8,load_8,adr,adr,f_mcr,f_mrc, + fmov,neon_move") +- (set_attr "arch" "*,*,*,*,*,*,sve,*,fp,*,fp,*,*,*,fp,fp,fp,simd") +- (set_attr "length" "4,4,4,4,4,*, 4,4, 4,4, 4,8,4,4, 4, 4, 4, 4") ++ (set_attr "arch" "*,*,*,*,*,sve,*,fp,*,fp,*,*,*,fp,fp,fp,simd") ++ (set_attr "length" "4,4,4,4,*, 4,4, 4,4, 4,8,4,4, 4, 4, 4, 4") + ) + + (define_insn "insv_imm<mode>" +@@ -1487,7 +1486,7 @@ + + (define_insn "*mov<mode>_aarch64" + (set (match_operand:DFD 0 "nonimmediate_operand" "=w, w ,?r,w,w ,w ,w,m,r,m ,r,r") +- (match_operand:DFD 1 "general_operand" "Y , ?rY, w,w,Ufc,Uvi,m,w,m,rY,r,N")) ++ (match_operand:DFD 1 "general_operand" "Y , ?rY, w,w,Ufc,Uvi,m,w,m,rY,r,O")) + "TARGET_FLOAT && (register_operand (operands0, <MODE>mode) + || aarch64_reg_or_fp_zero (operands1, <MODE>mode))" + "@ +@@ -1502,7 +1501,7 @@ + ldr\\t%x0, %1 + str\\t%x1, %0 + mov\\t%x0, %x1 +- mov\\t%x0, %1" ++ * return aarch64_is_mov_xn_imm (INTVAL (operands1)) ? \"mov\\t%x0, %1\" : \"mov\\t%w0, %1\";" + (set_attr "type" "neon_move,f_mcr,f_mrc,fmov,fconstd,neon_move,\ + f_loadd,f_stored,load_8,store_8,mov_reg,\ + fconstd") +diff --git a/gcc/config/aarch64/constraints.md b/gcc/config/aarch64/constraints.md +index ee7587cca..750a42fb1 100644 +--- a/gcc/config/aarch64/constraints.md ++++ b/gcc/config/aarch64/constraints.md +@@ -106,6 +106,11 @@ + + (define_constraint "N" + "A constant that can be used with a 64-bit MOV immediate operation." ++ (and (match_code "const_int") ++ (match_test "aarch64_is_mov_xn_imm (ival)"))) ++ ++(define_constraint "O" ++ "A constant that can be used with a 32 or 64-bit MOV immediate operation." + (and (match_code "const_int") + (match_test "aarch64_move_imm (ival, DImode)"))) + +-- +2.33.0 +
View file
_service:tar_scm:0160-LoongArch-Fix-missing-plugin-header.patch
Added
@@ -0,0 +1,32 @@ +From bec97638d68c760f6ee4b0a86ce4f9ffe9a691b3 Mon Sep 17 00:00:00 2001 +From: Yang Yujie <yangyujie@loongson.cn> +Date: Tue, 2 Apr 2024 09:20:32 +0800 +Subject: PATCH 160/188 LoongArch: Fix missing plugin header + +gcc/ChangeLog: + + * config/loongarch/t-loongarch: Add loongarch-def-arrays.h + to OPTION_H_EXTRA. +--- + gcc/config/loongarch/t-loongarch | 5 +++-- + 1 file changed, 3 insertions(+), 2 deletions(-) + +diff --git a/gcc/config/loongarch/t-loongarch b/gcc/config/loongarch/t-loongarch +index 994f4d19c..488e8cff3 100644 +--- a/gcc/config/loongarch/t-loongarch ++++ b/gcc/config/loongarch/t-loongarch +@@ -18,8 +18,9 @@ + + + GTM_H += loongarch-multilib.h +-OPTIONS_H_EXTRA += $(srcdir)/config/loongarch/loongarch-def.h \ +- $(srcdir)/config/loongarch/loongarch-tune.h \ ++OPTIONS_H_EXTRA += $(srcdir)/config/loongarch/loongarch-def.h \ ++ $(srcdir)/config/loongarch/loongarch-def-array.h \ ++ $(srcdir)/config/loongarch/loongarch-tune.h \ + $(srcdir)/config/loongarch/loongarch-cpucfg-map.h + + # Canonical target triplet from config.gcc +-- +2.43.0 +
View file
_service:tar_scm:0161-Backport-SME-AArch64-convert-some-patterns-to-compac.patch
Added
@@ -0,0 +1,229 @@ +From 5db3e7b68d5a443e908011b8d53de625ae462f82 Mon Sep 17 00:00:00 2001 +From: Tamar Christina <tamar.christina@arm.com> +Date: Mon, 19 Jun 2023 15:55:28 +0100 +Subject: PATCH 062/157 BackportSME AArch64: convert some patterns to + compact MD syntax + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=c72a7b849853716d94e8d313be5dce3c22850113 + +Hi All, + +This converts some patterns in the AArch64 backend to use the new +compact syntax. + +gcc/ChangeLog: + + * config/aarch64/aarch64.md (arches): Add nosimd. + (*mov<mode>_aarch64, *movsi_aarch64, *movdi_aarch64): Rewrite to + compact syntax. +--- + gcc/config/aarch64/aarch64.md | 161 ++++++++++++++++------------------ + 1 file changed, 78 insertions(+), 83 deletions(-) + +diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md +index ea94152bf..5d02da42f 100644 +--- a/gcc/config/aarch64/aarch64.md ++++ b/gcc/config/aarch64/aarch64.md +@@ -378,7 +378,7 @@ + ;; As a convenience, "fp_q" means "fp" + the ability to move between + ;; Q registers and is equivalent to "simd". + +-(define_enum "arches" any rcpc8_4 fp fp_q simd sve fp16) ++(define_enum "arches" any rcpc8_4 fp fp_q simd nosimd sve fp16) + + (define_enum_attr "arch" "arches" (const_string "any")) + +@@ -409,6 +409,9 @@ + (and (eq_attr "arch" "fp_q, simd") + (match_test "TARGET_SIMD")) + ++ (and (eq_attr "arch" "nosimd") ++ (match_test "!TARGET_SIMD")) ++ + (and (eq_attr "arch" "fp16") + (match_test "TARGET_FP_F16INST")) + +@@ -1194,26 +1197,27 @@ + ) + + (define_insn "*mov<mode>_aarch64" +- (set (match_operand:SHORT 0 "nonimmediate_operand" "=r,r, w,r ,r,w, m,m,r,w,w") +- (match_operand:SHORT 1 "aarch64_mov_operand" " r,M,D<hq>,Usv,m,m,rZ,w,w,rZ,w")) ++ (set (match_operand:SHORT 0 "nonimmediate_operand") ++ (match_operand:SHORT 1 "aarch64_mov_operand")) + "(register_operand (operands0, <MODE>mode) + || aarch64_reg_or_zero (operands1, <MODE>mode))" +- "@ +- mov\t%w0, %w1 +- mov\t%w0, %1 +- * return aarch64_output_scalar_simd_mov_immediate (operands1, <MODE>mode); +- * return aarch64_output_sve_cnt_immediate (\"cnt\", \"%x0\", operands1); +- ldr<size>\t%w0, %1 +- ldr\t%<size>0, %1 +- str<size>\t%w1, %0 +- str\t%<size>1, %0 +- * return TARGET_SIMD ? \"umov\t%w0, %1.<v>0\" : \"fmov\t%w0, %s1\"; +- * return TARGET_SIMD ? \"dup\t%0.<Vallxd>, %w1\" : \"fmov\t%s0, %w1\"; +- * return TARGET_SIMD ? \"dup\t%<Vetype>0, %1.<v>0\" : \"fmov\t%s0, %s1\";" +- ;; The "mov_imm" type for CNT is just a placeholder. +- (set_attr "type" "mov_reg,mov_imm,neon_move,mov_imm,load_4,load_4,store_4, +- store_4,neon_to_gp<q>,neon_from_gp<q>,neon_dup") +- (set_attr "arch" "*,*,simd,sve,*,*,*,*,*,*,*") ++ {@ cons: =0, 1; attrs: type, arch ++ r, r ; mov_reg , * mov\t%w0, %w1 ++ r, M ; mov_imm , * mov\t%w0, %1 ++ w, D<hq>; neon_move , simd << aarch64_output_scalar_simd_mov_immediate (operands1, <MODE>mode); ++ /* The "mov_imm" type for CNT is just a placeholder. */ ++ r, Usv ; mov_imm , sve << aarch64_output_sve_cnt_immediate ("cnt", "%x0", operands1); ++ r, m ; load_4 , * ldr<size>\t%w0, %1 ++ w, m ; load_4 , * ldr\t%<size>0, %1 ++ m, r Z ; store_4 , * str<size>\\t%w1, %0 ++ m, w ; store_4 , * str\t%<size>1, %0 ++ r, w ; neon_to_gp<q> , simd umov\t%w0, %1.<v>0 ++ r, w ; neon_to_gp<q> , nosimd fmov\t%w0, %s1 /*foo */ ++ w, r Z ; neon_from_gp<q>, simd dup\t%0.<Vallxd>, %w1 ++ w, r Z ; neon_from_gp<q>, nosimd fmov\t%s0, %w1 ++ w, w ; neon_dup , simd dup\t%<Vetype>0, %1.<v>0 ++ w, w ; neon_dup , nosimd fmov\t%s0, %s1 ++ } + ) + + (define_expand "mov<mode>" +@@ -1250,79 +1254,70 @@ + ) + + (define_insn_and_split "*movsi_aarch64" +- (set (match_operand:SI 0 "nonimmediate_operand" "=r,k,r,r,r,r, r,w, m, m, r, r, r, w,r,w, w") +- (match_operand:SI 1 "aarch64_mov_operand" " r,r,k,M,n,Usv,m,m,rZ,w,Usw,Usa,Ush,rZ,w,w,Ds")) ++ (set (match_operand:SI 0 "nonimmediate_operand") ++ (match_operand:SI 1 "aarch64_mov_operand")) + "(register_operand (operands0, SImode) + || aarch64_reg_or_zero (operands1, SImode))" +- "@ +- mov\\t%w0, %w1 +- mov\\t%w0, %w1 +- mov\\t%w0, %w1 +- mov\\t%w0, %1 +- # +- * return aarch64_output_sve_cnt_immediate (\"cnt\", \"%x0\", operands1); +- ldr\\t%w0, %1 +- ldr\\t%s0, %1 +- str\\t%w1, %0 +- str\\t%s1, %0 +- adrp\\t%x0, %A1\;ldr\\t%w0, %x0, %L1 +- adr\\t%x0, %c1 +- adrp\\t%x0, %A1 +- fmov\\t%s0, %w1 +- fmov\\t%w0, %s1 +- fmov\\t%s0, %s1 +- * return aarch64_output_scalar_simd_mov_immediate (operands1, SImode);" ++ {@ cons: =0, 1; attrs: type, arch, length ++ r k, r ; mov_reg , * , 4 mov\t%w0, %w1 ++ r , k ; mov_reg , * , 4 ^ ++ r , M ; mov_imm , * , 4 mov\t%w0, %1 ++ r , n ; mov_imm , * ,16 # ++ /* The "mov_imm" type for CNT is just a placeholder. */ ++ r , Usv; mov_imm , sve , 4 << aarch64_output_sve_cnt_immediate ("cnt", "%x0", operands1); ++ r , m ; load_4 , * , 4 ldr\t%w0, %1 ++ w , m ; load_4 , fp , 4 ldr\t%s0, %1 ++ m , r Z; store_4 , * , 4 str\t%w1, %0 ++ m , w ; store_4 , fp , 4 str\t%s1, %0 ++ r , Usw; load_4 , * , 8 adrp\t%x0, %A1;ldr\t%w0, %x0, %L1 ++ r , Usa; adr , * , 4 adr\t%x0, %c1 ++ r , Ush; adr , * , 4 adrp\t%x0, %A1 ++ w , r Z; f_mcr , fp , 4 fmov\t%s0, %w1 ++ r , w ; f_mrc , fp , 4 fmov\t%w0, %s1 ++ w , w ; fmov , fp , 4 fmov\t%s0, %s1 ++ w , Ds ; neon_move, simd, 4 << aarch64_output_scalar_simd_mov_immediate (operands1, SImode); ++ } + "CONST_INT_P (operands1) && !aarch64_move_imm (INTVAL (operands1), SImode) + && REG_P (operands0) && GP_REGNUM_P (REGNO (operands0))" +- (const_int 0) +- "{ +- aarch64_expand_mov_immediate (operands0, operands1); +- DONE; +- }" +- ;; The "mov_imm" type for CNT is just a placeholder. +- (set_attr "type" "mov_reg,mov_reg,mov_reg,mov_imm,mov_imm,mov_imm,load_4, +- load_4,store_4,store_4,load_4,adr,adr,f_mcr,f_mrc,fmov,neon_move") +- (set_attr "arch" "*,*,*,*,*,sve,*,fp,*,fp,*,*,*,fp,fp,fp,simd") +- (set_attr "length" "4,4,4,4,*, 4,4, 4,4, 4,8,4,4, 4, 4, 4, 4") +- ++ (const_int 0) ++ { ++ aarch64_expand_mov_immediate (operands0, operands1); ++ DONE; ++ } + ) + + (define_insn_and_split "*movdi_aarch64" +- (set (match_operand:DI 0 "nonimmediate_operand" "=r,k,r,r,r,r, r,w, m,m, r, r, r, w,r,w, w") +- (match_operand:DI 1 "aarch64_mov_operand" " r,r,k,O,n,Usv,m,m,rZ,w,Usw,Usa,Ush,rZ,w,w,Dd")) ++ (set (match_operand:DI 0 "nonimmediate_operand") ++ (match_operand:DI 1 "aarch64_mov_operand")) + "(register_operand (operands0, DImode) + || aarch64_reg_or_zero (operands1, DImode))" +- "@ +- mov\\t%x0, %x1 +- mov\\t%0, %x1 +- mov\\t%x0, %1 +- * return aarch64_is_mov_xn_imm (INTVAL (operands1)) ? \"mov\\t%x0, %1\" : \"mov\\t%w0, %1\"; +- # +- * return aarch64_output_sve_cnt_immediate (\"cnt\", \"%x0\", operands1); +- ldr\\t%x0, %1 +- ldr\\t%d0, %1 +- str\\t%x1, %0 +- str\\t%d1, %0 +- * return TARGET_ILP32 ? \"adrp\\t%0, %A1\;ldr\\t%w0, %0, %L1\" : \"adrp\\t%0, %A1\;ldr\\t%0, %0, %L1\"; +- adr\\t%x0, %c1 +- adrp\\t%x0, %A1 +- fmov\\t%d0, %x1 +- fmov\\t%x0, %d1 +- fmov\\t%d0, %d1 +- * return aarch64_output_scalar_simd_mov_immediate (operands1, DImode);" +- "CONST_INT_P (operands1) && !aarch64_move_imm (INTVAL (operands1), DImode) +- && REG_P (operands0) && GP_REGNUM_P (REGNO (operands0))" +- (const_int 0) +- "{ +- aarch64_expand_mov_immediate (operands0, operands1); +- DONE; +- }" +- ;; The "mov_imm" type for CNTD is just a placeholder. +- (set_attr "type" "mov_reg,mov_reg,mov_reg,mov_imm,mov_imm,mov_imm, +- load_8,load_8,store_8,store_8,load_8,adr,adr,f_mcr,f_mrc, +- fmov,neon_move") +- (set_attr "arch" "*,*,*,*,*,sve,*,fp,*,fp,*,*,*,fp,fp,fp,simd") +- (set_attr "length" "4,4,4,4,*, 4,4, 4,4, 4,8,4,4, 4, 4, 4, 4") ++ {@ cons: =0, 1; attrs: type, arch, length ++ r, r ; mov_reg , * , 4 mov\t%x0, %x1 ++ k, r ; mov_reg , * , 4 mov\t%0, %x1 ++ r, k ; mov_reg , * , 4 mov\t%x0, %1 ++ r, O ; mov_imm , * , 4 << aarch64_is_mov_xn_imm (INTVAL (operands1)) ? "mov\t%x0, %1" : "mov\t%w0, %1"; ++ r, n ; mov_imm , * ,16 # ++ /* The "mov_imm" type for CNT is just a placeholder. */ ++ r, Usv; mov_imm , sve , 4 << aarch64_output_sve_cnt_immediate ("cnt", "%x0", operands1); ++ r, m ; load_8 , * , 4 ldr\t%x0, %1 ++ w, m ; load_8 , fp , 4 ldr\t%d0, %1 ++ m, r Z; store_8 , * , 4 str\t%x1, %0 ++ m, w ; store_8 , fp , 4 str\t%d1, %0 ++ r, Usw; load_8 , * , 8 << TARGET_ILP32 ? "adrp\t%0, %A1;ldr\t%w0, %0, %L1" : "adrp\t%0, %A1;ldr\t%0, %0, %L1"; ++ r, Usa; adr , * , 4 adr\t%x0, %c1 ++ r, Ush; adr , * , 4 adrp\t%x0, %A1 ++ w, r Z; f_mcr , fp , 4 fmov\t%d0, %x1 ++ r, w ; f_mrc , fp , 4 fmov\t%x0, %d1 ++ w, w ; fmov , fp , 4 fmov\t%d0, %d1 ++ w, Dd ; neon_move, simd, 4 << aarch64_output_scalar_simd_mov_immediate (operands1, DImode); ++ } ++ "CONST_INT_P (operands1) && !aarch64_move_imm (INTVAL (operands1), DImode) ++ && REG_P (operands0) && GP_REGNUM_P (REGNO (operands0))" ++ (const_int 0) ++ { ++ aarch64_expand_mov_immediate (operands0, operands1); ++ DONE; ++ } + ) + + (define_insn "insv_imm<mode>" +-- +2.33.0 +
View file
_service:tar_scm:0161-LoongArch-Remove-unused-code.patch
Added
@@ -0,0 +1,344 @@ +From 47581dd6da960172bc768435400010748b3f97eb Mon Sep 17 00:00:00 2001 +From: Jiahao Xu <xujiahao@loongson.cn> +Date: Wed, 3 Apr 2024 09:38:23 +0800 +Subject: PATCH 161/188 LoongArch: Remove unused code. + +gcc/ChangeLog: + + * config/loongarch/lasx.md: Remove unused code. + * config/loongarch/loongarch-protos.h + (loongarch_split_lsx_copy_d): Remove. + (loongarch_split_lsx_insert_d): Ditto. + (loongarch_split_lsx_fill_d): Ditto. + * config/loongarch/loongarch.cc + (loongarch_split_lsx_copy_d): Ditto. + (loongarch_split_lsx_insert_d): Ditto. + (loongarch_split_lsx_fill_d): Ditto. + * config/loongarch/lsx.md (lsx_vpickve2gr_du): Remove splitter. + (lsx_vpickve2gr_<lsxfmt_f>): Ditto. + (abs<mode>2): Remove expander. + (vabs<mode>2): Rename 2 abs<mode>2. + +gcc/testsuite/ChangeLog: + + * gcc.target/loongarch/vector/lsx/lsx-abs.c: New test. +--- + gcc/config/loongarch/lasx.md | 12 +-- + gcc/config/loongarch/loongarch-protos.h | 3 - + gcc/config/loongarch/loongarch.cc | 76 ---------------- + gcc/config/loongarch/lsx.md | 89 ++----------------- + .../gcc.target/loongarch/vector/lsx/lsx-abs.c | 26 ++++++ + 5 files changed, 35 insertions(+), 171 deletions(-) + create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-abs.c + +diff --git a/gcc/config/loongarch/lasx.md b/gcc/config/loongarch/lasx.md +index 45a0a8cc8..44a7d58ff 100644 +--- a/gcc/config/loongarch/lasx.md ++++ b/gcc/config/loongarch/lasx.md +@@ -572,12 +572,7 @@ + (match_operand 3 "const_<bitmask256>_operand" ""))) + "ISA_HAS_LASX" + { +-#if 0 +- if (!TARGET_64BIT && (<MODE>mode == V4DImode || <MODE>mode == V4DFmode)) +- return "#"; +- else +-#endif +- return "xvinsgr2vr.<lasxfmt>\t%u0,%z1,%y3"; ++ return "xvinsgr2vr.<lasxfmt>\t%u0,%z1,%y3"; + } + (set_attr "type" "simd_insert") + (set_attr "mode" "<MODE>")) +@@ -1446,10 +1441,7 @@ + if (which_alternative == 1) + return "xvldi.b\t%u0,0" ; + +- if (!TARGET_64BIT && (<MODE>mode == V2DImode || <MODE>mode == V2DFmode)) +- return "#"; +- else +- return "xvreplgr2vr.<lasxfmt>\t%u0,%z1"; ++ return "xvreplgr2vr.<lasxfmt>\t%u0,%z1"; + } + (set_attr "type" "simd_fill") + (set_attr "mode" "<MODE>") +diff --git a/gcc/config/loongarch/loongarch-protos.h b/gcc/config/loongarch/loongarch-protos.h +index 8523da8d6..0c31a74b7 100644 +--- a/gcc/config/loongarch/loongarch-protos.h ++++ b/gcc/config/loongarch/loongarch-protos.h +@@ -89,9 +89,6 @@ extern void loongarch_split_128bit_move (rtx, rtx); + extern bool loongarch_split_128bit_move_p (rtx, rtx); + extern void loongarch_split_256bit_move (rtx, rtx); + extern bool loongarch_split_256bit_move_p (rtx, rtx); +-extern void loongarch_split_lsx_copy_d (rtx, rtx, rtx, rtx (*)(rtx, rtx, rtx)); +-extern void loongarch_split_lsx_insert_d (rtx, rtx, rtx, rtx); +-extern void loongarch_split_lsx_fill_d (rtx, rtx); + extern const char *loongarch_output_move (rtx, rtx); + #ifdef RTX_CODE + extern void loongarch_expand_scc (rtx *); +diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc +index e27335b3c..8d8a50b70 100644 +--- a/gcc/config/loongarch/loongarch.cc ++++ b/gcc/config/loongarch/loongarch.cc +@@ -4772,82 +4772,6 @@ loongarch_split_256bit_move (rtx dest, rtx src) + } + } + +- +-/* Split a COPY_S.D with operands DEST, SRC and INDEX. GEN is a function +- used to generate subregs. */ +- +-void +-loongarch_split_lsx_copy_d (rtx dest, rtx src, rtx index, +- rtx (*gen_fn)(rtx, rtx, rtx)) +-{ +- gcc_assert ((GET_MODE (src) == V2DImode && GET_MODE (dest) == DImode) +- || (GET_MODE (src) == V2DFmode && GET_MODE (dest) == DFmode)); +- +- /* Note that low is always from the lower index, and high is always +- from the higher index. */ +- rtx low = loongarch_subword (dest, false); +- rtx high = loongarch_subword (dest, true); +- rtx new_src = simplify_gen_subreg (V4SImode, src, GET_MODE (src), 0); +- +- emit_insn (gen_fn (low, new_src, GEN_INT (INTVAL (index) * 2))); +- emit_insn (gen_fn (high, new_src, GEN_INT (INTVAL (index) * 2 + 1))); +-} +- +-/* Split a INSERT.D with operand DEST, SRC1.INDEX and SRC2. */ +- +-void +-loongarch_split_lsx_insert_d (rtx dest, rtx src1, rtx index, rtx src2) +-{ +- int i; +- gcc_assert (GET_MODE (dest) == GET_MODE (src1)); +- gcc_assert ((GET_MODE (dest) == V2DImode +- && (GET_MODE (src2) == DImode || src2 == const0_rtx)) +- || (GET_MODE (dest) == V2DFmode && GET_MODE (src2) == DFmode)); +- +- /* Note that low is always from the lower index, and high is always +- from the higher index. */ +- rtx low = loongarch_subword (src2, false); +- rtx high = loongarch_subword (src2, true); +- rtx new_dest = simplify_gen_subreg (V4SImode, dest, GET_MODE (dest), 0); +- rtx new_src1 = simplify_gen_subreg (V4SImode, src1, GET_MODE (src1), 0); +- i = exact_log2 (INTVAL (index)); +- gcc_assert (i != -1); +- +- emit_insn (gen_lsx_vinsgr2vr_w (new_dest, low, new_src1, +- GEN_INT (1 << (i * 2)))); +- emit_insn (gen_lsx_vinsgr2vr_w (new_dest, high, new_dest, +- GEN_INT (1 << (i * 2 + 1)))); +-} +- +-/* Split FILL.D. */ +- +-void +-loongarch_split_lsx_fill_d (rtx dest, rtx src) +-{ +- gcc_assert ((GET_MODE (dest) == V2DImode +- && (GET_MODE (src) == DImode || src == const0_rtx)) +- || (GET_MODE (dest) == V2DFmode && GET_MODE (src) == DFmode)); +- +- /* Note that low is always from the lower index, and high is always +- from the higher index. */ +- rtx low, high; +- if (src == const0_rtx) +- { +- low = src; +- high = src; +- } +- else +- { +- low = loongarch_subword (src, false); +- high = loongarch_subword (src, true); +- } +- rtx new_dest = simplify_gen_subreg (V4SImode, dest, GET_MODE (dest), 0); +- emit_insn (gen_lsx_vreplgr2vr_w (new_dest, low)); +- emit_insn (gen_lsx_vinsgr2vr_w (new_dest, high, new_dest, GEN_INT (1 << 1))); +- emit_insn (gen_lsx_vinsgr2vr_w (new_dest, high, new_dest, GEN_INT (1 << 3))); +-} +- +- + /* Return the appropriate instructions to move SRC into DEST. Assume + that SRC is operand 1 and DEST is operand 0. */ + +diff --git a/gcc/config/loongarch/lsx.md b/gcc/config/loongarch/lsx.md +index dc81093e9..2eac11473 100644 +--- a/gcc/config/loongarch/lsx.md ++++ b/gcc/config/loongarch/lsx.md +@@ -582,28 +582,11 @@ + (match_operand 3 "const_<bitmask>_operand" ""))) + "ISA_HAS_LSX" + { +- if (!TARGET_64BIT && (<MODE>mode == V2DImode || <MODE>mode == V2DFmode)) +- return "#"; +- else +- return "vinsgr2vr.<lsxfmt>\t%w0,%z1,%y3"; ++ return "vinsgr2vr.<lsxfmt>\t%w0,%z1,%y3"; + } + (set_attr "type" "simd_insert") + (set_attr "mode" "<MODE>")) + +-(define_split +- (set (match_operand:LSX_D 0 "register_operand") +- (vec_merge:LSX_D +- (vec_duplicate:LSX_D +- (match_operand:<UNITMODE> 1 "<LSX_D:lsx_d>_operand")) +- (match_operand:LSX_D 2 "register_operand") +- (match_operand 3 "const_<bitmask>_operand"))) +- "reload_completed && ISA_HAS_LSX && !TARGET_64BIT" +- (const_int 0) +-{ +- loongarch_split_lsx_insert_d (operands0, operands2, operands3, operands1); +- DONE; +-}) +- + (define_insn "lsx_vextrins_<lsxfmt_f>_internal" + (set (match_operand:LSX 0 "register_operand" "=f") + (vec_merge:LSX +@@ -653,70 +636,26 @@ + (set_attr "type" "simd_copy") + (set_attr "mode" "<MODE>")) + +-(define_insn_and_split "lsx_vpickve2gr_du" ++(define_insn "lsx_vpickve2gr_du" + (set (match_operand:DI 0 "register_operand" "=r") + (vec_select:DI + (match_operand:V2DI 1 "register_operand" "f") + (parallel (match_operand 2 "const_0_or_1_operand" "")))) + "ISA_HAS_LSX" +-{ +- if (TARGET_64BIT) +- return "vpickve2gr.du\t%0,%w1,%2"; +- else +- return "#"; +-} +- "reload_completed && ISA_HAS_LSX && !TARGET_64BIT" +- (const_int 0) +-{ +- loongarch_split_lsx_copy_d (operands0, operands1, operands2, +- gen_lsx_vpickve2gr_wu); +- DONE; +-} ++ "vpickve2gr.du\t%0,%w1,%2" + (set_attr "type" "simd_copy") + (set_attr "mode" "V2DI")) + +-(define_insn_and_split "lsx_vpickve2gr_<lsxfmt_f>" ++(define_insn "lsx_vpickve2gr_<lsxfmt_f>" + (set (match_operand:<UNITMODE> 0 "register_operand" "=r") + (vec_select:<UNITMODE> + (match_operand:LSX_D 1 "register_operand" "f") + (parallel (match_operand 2 "const_<indeximm>_operand" "")))) + "ISA_HAS_LSX" +-{ +- if (TARGET_64BIT) +- return "vpickve2gr.<lsxfmt>\t%0,%w1,%2"; +- else +- return "#"; +-} +- "reload_completed && ISA_HAS_LSX && !TARGET_64BIT" +- (const_int 0) +-{ +- loongarch_split_lsx_copy_d (operands0, operands1, operands2, +- gen_lsx_vpickve2gr_w); +- DONE; +-} ++ "vpickve2gr.<lsxfmt>\t%0,%w1,%2" + (set_attr "type" "simd_copy") + (set_attr "mode" "<MODE>")) + +- +-(define_expand "abs<mode>2" +- (match_operand:ILSX 0 "register_operand" "=f") +- (abs:ILSX (match_operand:ILSX 1 "register_operand" "f")) +- "ISA_HAS_LSX" +-{ +- if (ISA_HAS_LSX) +- { +- emit_insn (gen_vabs<mode>2 (operands0, operands1)); +- DONE; +- } +- else +- { +- rtx reg = gen_reg_rtx (<MODE>mode); +- emit_move_insn (reg, CONST0_RTX (<MODE>mode)); +- emit_insn (gen_lsx_vadda_<lsxfmt> (operands0, operands1, reg)); +- DONE; +- } +-}) +- + (define_expand "neg<mode>2" + (set (match_operand:ILSX 0 "register_operand") + (neg:ILSX (match_operand:ILSX 1 "register_operand"))) +@@ -1369,25 +1308,11 @@ + if (which_alternative == 1) + return "vldi.<lsxfmt>\t%w0,0"; + +- if (!TARGET_64BIT && (<MODE>mode == V2DImode || <MODE>mode == V2DFmode)) +- return "#"; +- else +- return "vreplgr2vr.<lsxfmt>\t%w0,%z1"; ++ return "vreplgr2vr.<lsxfmt>\t%w0,%z1"; + } + (set_attr "type" "simd_fill") + (set_attr "mode" "<MODE>")) + +-(define_split +- (set (match_operand:LSX_D 0 "register_operand") +- (vec_duplicate:LSX_D +- (match_operand:<UNITMODE> 1 "register_operand"))) +- "reload_completed && ISA_HAS_LSX && !TARGET_64BIT" +- (const_int 0) +-{ +- loongarch_split_lsx_fill_d (operands0, operands1); +- DONE; +-}) +- + (define_insn "logb<mode>2" + (set (match_operand:FLSX 0 "register_operand" "=f") + (unspec:FLSX (match_operand:FLSX 1 "register_operand" "f") +@@ -2428,7 +2353,7 @@ + (set_attr "type" "simd_logic") + (set_attr "mode" "<MODE>")) + +-(define_insn "vabs<mode>2" ++(define_insn "abs<mode>2" + (set (match_operand:ILSX 0 "register_operand" "=f") + (abs:ILSX (match_operand:ILSX 1 "register_operand" "f"))) + "ISA_HAS_LSX" +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-abs.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-abs.c +new file mode 100644 +index 000000000..cf971badb +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-abs.c +@@ -0,0 +1,26 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -mlsx" } */ ++/* { dg-final { scan-assembler-times "vsigncov.w" 1 } } */ ++/* { dg-final { scan-assembler-times "vsigncov.d" 1 } } */ ++ ++int a4, b4; ++ ++extern int abs (int); ++ ++void ++foo1 (void) ++{ ++ for (int i = 0; i < 4; i++) ++ ai = abs (bi); ++} ++ ++long la2, lb2; ++ ++extern long labs (long); ++ ++void ++foo2 (void) ++{ ++ for (int i = 0; i < 2; i++) ++ lai = labs (lbi); ++} +-- +2.43.0 +
View file
_service:tar_scm:0162-Backport-SME-aarch64-Use-SVE-s-RDVL-instruction.patch
Added
@@ -0,0 +1,792 @@ +From 46310765c05cde8732e07bfb0df9f0ec25a34018 Mon Sep 17 00:00:00 2001 +From: Richard Sandiford <richard.sandiford@arm.com> +Date: Tue, 5 Dec 2023 10:11:18 +0000 +Subject: PATCH 063/157 BackportSME aarch64: Use SVE's RDVL instruction + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=80f47d7bbe38234e1530d27fe5c2f130223ca7a0 + +We didn't previously use SVE's RDVL instruction, since the CNT* +forms are preferred and provide most of the range. However, +there are some cases that RDVL can handle and CNT* can't, +and using RDVL-like instructions becomes important for SME. + +gcc/ + * config/aarch64/aarch64-protos.h (aarch64_sve_rdvl_immediate_p) + (aarch64_output_sve_rdvl): Declare. + * config/aarch64/aarch64.cc (aarch64_sve_cnt_factor_p): New + function, split out from... + (aarch64_sve_cnt_immediate_p): ...here. + (aarch64_sve_rdvl_factor_p): New function. + (aarch64_sve_rdvl_immediate_p): Likewise. + (aarch64_output_sve_rdvl): Likewise. + (aarch64_offset_temporaries): Rewrite the SVE handling to use RDVL + for some cases. + (aarch64_expand_mov_immediate): Handle RDVL immediates. + (aarch64_mov_operand_p): Likewise. + * config/aarch64/constraints.md (Usr): New constraint. + * config/aarch64/aarch64.md (*mov<SHORT:mode>_aarch64): Add an RDVL + alternative. + (*movsi_aarch64, *movdi_aarch64): Likewise. + +gcc/testsuite/ + * gcc.target/aarch64/sve/acle/asm/cntb.c: Tweak expected output. + * gcc.target/aarch64/sve/acle/asm/cnth.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/cntw.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/cntd.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/prfb.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/prfh.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/prfw.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/prfd.c: Likewise. + * gcc.target/aarch64/sve/loop_add_4.c: Expect RDVL to be used + to calculate the -17 and 17 factors. + * gcc.target/aarch64/sve/pcs/stack_clash_1.c: Likewise the 18 factor. +--- + gcc/config/aarch64/aarch64-protos.h | 2 + + gcc/config/aarch64/aarch64.cc | 191 ++++++++++++------ + gcc/config/aarch64/aarch64.md | 3 + + gcc/config/aarch64/constraints.md | 6 + + .../gcc.target/aarch64/sve/acle/asm/cntb.c | 71 +++++-- + .../gcc.target/aarch64/sve/acle/asm/cntd.c | 12 +- + .../gcc.target/aarch64/sve/acle/asm/cnth.c | 20 +- + .../gcc.target/aarch64/sve/acle/asm/cntw.c | 16 +- + .../gcc.target/aarch64/sve/acle/asm/prfb.c | 6 +- + .../gcc.target/aarch64/sve/acle/asm/prfd.c | 4 +- + .../gcc.target/aarch64/sve/acle/asm/prfh.c | 4 +- + .../gcc.target/aarch64/sve/acle/asm/prfw.c | 4 +- + .../gcc.target/aarch64/sve/loop_add_4.c | 6 +- + .../aarch64/sve/pcs/stack_clash_1.c | 3 +- + 14 files changed, 225 insertions(+), 123 deletions(-) + +diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h +index 3ff1a0163..14a568140 100644 +--- a/gcc/config/aarch64/aarch64-protos.h ++++ b/gcc/config/aarch64/aarch64-protos.h +@@ -802,6 +802,7 @@ bool aarch64_sve_mode_p (machine_mode); + HOST_WIDE_INT aarch64_fold_sve_cnt_pat (aarch64_svpattern, unsigned int); + bool aarch64_sve_cnt_immediate_p (rtx); + bool aarch64_sve_scalar_inc_dec_immediate_p (rtx); ++bool aarch64_sve_rdvl_immediate_p (rtx); + bool aarch64_sve_addvl_addpl_immediate_p (rtx); + bool aarch64_sve_vector_inc_dec_immediate_p (rtx); + int aarch64_add_offset_temporaries (rtx); +@@ -814,6 +815,7 @@ char *aarch64_output_sve_prefetch (const char *, rtx, const char *); + char *aarch64_output_sve_cnt_immediate (const char *, const char *, rtx); + char *aarch64_output_sve_cnt_pat_immediate (const char *, const char *, rtx *); + char *aarch64_output_sve_scalar_inc_dec (rtx); ++char *aarch64_output_sve_rdvl (rtx); + char *aarch64_output_sve_addvl_addpl (rtx); + char *aarch64_output_sve_vector_inc_dec (const char *, rtx); + char *aarch64_output_scalar_simd_mov_immediate (rtx, scalar_int_mode); +diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc +index acb659f53..4194dfc70 100644 +--- a/gcc/config/aarch64/aarch64.cc ++++ b/gcc/config/aarch64/aarch64.cc +@@ -5520,6 +5520,18 @@ aarch64_fold_sve_cnt_pat (aarch64_svpattern pattern, unsigned int nelts_per_vq) + return -1; + } + ++/* Return true if a single CNTBHWD instruction can multiply FACTOR ++ by the number of 128-bit quadwords in an SVE vector. */ ++ ++static bool ++aarch64_sve_cnt_factor_p (HOST_WIDE_INT factor) ++{ ++ /* The coefficient must be 1, 16 * {2, 4, 8, 16}. */ ++ return (IN_RANGE (factor, 2, 16 * 16) ++ && (factor & 1) == 0 ++ && factor <= 16 * (factor & -factor)); ++} ++ + /* Return true if we can move VALUE into a register using a single + CNTBHWD instruction. */ + +@@ -5527,11 +5539,7 @@ static bool + aarch64_sve_cnt_immediate_p (poly_int64 value) + { + HOST_WIDE_INT factor = value.coeffs0; +- /* The coefficient must be 1, 16 * {2, 4, 8, 16}. */ +- return (value.coeffs1 == factor +- && IN_RANGE (factor, 2, 16 * 16) +- && (factor & 1) == 0 +- && factor <= 16 * (factor & -factor)); ++ return value.coeffs1 == factor && aarch64_sve_cnt_factor_p (factor); + } + + /* Likewise for rtx X. */ +@@ -5647,6 +5655,50 @@ aarch64_output_sve_scalar_inc_dec (rtx offset) + -offset_value.coeffs1, 0); + } + ++/* Return true if a single RDVL instruction can multiply FACTOR by the ++ number of 128-bit quadwords in an SVE vector. */ ++ ++static bool ++aarch64_sve_rdvl_factor_p (HOST_WIDE_INT factor) ++{ ++ return (multiple_p (factor, 16) ++ && IN_RANGE (factor, -32 * 16, 31 * 16)); ++} ++ ++/* Return true if we can move VALUE into a register using a single ++ RDVL instruction. */ ++ ++static bool ++aarch64_sve_rdvl_immediate_p (poly_int64 value) ++{ ++ HOST_WIDE_INT factor = value.coeffs0; ++ return value.coeffs1 == factor && aarch64_sve_rdvl_factor_p (factor); ++} ++ ++/* Likewise for rtx X. */ ++ ++bool ++aarch64_sve_rdvl_immediate_p (rtx x) ++{ ++ poly_int64 value; ++ return poly_int_rtx_p (x, &value) && aarch64_sve_rdvl_immediate_p (value); ++} ++ ++/* Return the asm string for moving RDVL immediate OFFSET into register ++ operand 0. */ ++ ++char * ++aarch64_output_sve_rdvl (rtx offset) ++{ ++ static char buffersizeof ("rdvl\t%x0, #-") + 3 * sizeof (int); ++ poly_int64 offset_value = rtx_to_poly_int64 (offset); ++ gcc_assert (aarch64_sve_rdvl_immediate_p (offset_value)); ++ ++ int factor = offset_value.coeffs1; ++ snprintf (buffer, sizeof (buffer), "rdvl\t%%x0, #%d", factor / 16); ++ return buffer; ++} ++ + /* Return true if we can add VALUE to a register using a single ADDVL + or ADDPL instruction. */ + +@@ -6227,13 +6279,13 @@ aarch64_offset_temporaries (bool add_p, poly_int64 offset) + count += 1; + else if (factor != 0) + { +- factor = abs (factor); +- if (factor > 16 * (factor & -factor)) +- /* Need one register for the CNT result and one for the multiplication +- factor. If necessary, the second temporary can be reused for the +- constant part of the offset. */ ++ factor /= (HOST_WIDE_INT) least_bit_hwi (factor); ++ if (!IN_RANGE (factor, -32, 31)) ++ /* Need one register for the CNT or RDVL result and one for the ++ multiplication factor. If necessary, the second temporary ++ can be reused for the constant part of the offset. */ + return 2; +- /* Need one register for the CNT result (which might then ++ /* Need one register for the CNT or RDVL result (which might then + be shifted). */ + count += 1; + } +@@ -6322,85 +6374,100 @@ aarch64_add_offset (scalar_int_mode mode, rtx dest, rtx src, + /* Otherwise use a CNT-based sequence. */ + else if (factor != 0) + { +- /* Use a subtraction if we have a negative factor. */ +- rtx_code code = PLUS; +- if (factor < 0) +- { +- factor = -factor; +- code = MINUS; +- } ++ /* Calculate CNTB * FACTOR / 16 as CNTB * REL_FACTOR * 2**SHIFT, ++ with negative shifts indicating a shift right. */ ++ HOST_WIDE_INT low_bit = least_bit_hwi (factor); ++ HOST_WIDE_INT rel_factor = factor / low_bit; ++ int shift = exact_log2 (low_bit) - 4; ++ gcc_assert (shift >= -4 && (rel_factor & 1) != 0); ++ ++ /* Set CODE, VAL and SHIFT so that +- VAL * 2**SHIFT is ++ equal to CNTB * FACTOR / 16, with CODE being the +-. + +- /* Calculate CNTD * FACTOR / 2. First try to fold the division +- into the multiplication. */ ++ We can avoid a multiplication if REL_FACTOR is in the range ++ of RDVL, although there are then various optimizations that ++ we can try on top. */ ++ rtx_code code = PLUS; + rtx val; +- int shift = 0; +- if (factor & 1) +- /* Use a right shift by 1. */ +- shift = -1; +- else +- factor /= 2; +- HOST_WIDE_INT low_bit = factor & -factor; +- if (factor <= 16 * low_bit) ++ if (IN_RANGE (rel_factor, -32, 31)) + { +- if (factor > 16 * 8) ++ /* Try to use an unshifted CNTBHWD or RDVL. */ ++ if (aarch64_sve_cnt_factor_p (factor) ++ || aarch64_sve_rdvl_factor_p (factor)) ++ { ++ val = gen_int_mode (poly_int64 (factor, factor), mode); ++ shift = 0; ++ } ++ /* Try to subtract an unshifted CNTBHWD. */ ++ else if (aarch64_sve_cnt_factor_p (-factor)) + { +- /* "CNTB Xn, ALL, MUL #FACTOR" is out of range, so calculate +- the value with the minimum multiplier and shift it into +- position. */ +- int extra_shift = exact_log2 (low_bit); +- shift += extra_shift; +- factor >>= extra_shift; ++ code = MINUS; ++ val = gen_int_mode (poly_int64 (-factor, -factor), mode); ++ shift = 0; + } +- val = gen_int_mode (poly_int64 (factor * 2, factor * 2), mode); ++ /* If subtraction is free, prefer to load a positive constant. ++ In the best case this will fit a shifted CNTB. */ ++ else if (src != const0_rtx && rel_factor < 0) ++ { ++ code = MINUS; ++ val = gen_int_mode (-rel_factor * BYTES_PER_SVE_VECTOR, mode); ++ } ++ /* Otherwise use a shifted RDVL or CNTBHWD. */ ++ else ++ val = gen_int_mode (rel_factor * BYTES_PER_SVE_VECTOR, mode); + } + else + { +- /* Base the factor on LOW_BIT if we can calculate LOW_BIT +- directly, since that should increase the chances of being +- able to use a shift and add sequence. If LOW_BIT itself +- is out of range, just use CNTD. */ +- if (low_bit <= 16 * 8) +- factor /= low_bit; ++ /* If we can calculate CNTB << SHIFT directly, prefer to do that, ++ since it should increase the chances of being able to use ++ a shift and add sequence for the multiplication. ++ If CNTB << SHIFT is out of range, stick with the current ++ shift factor. */ ++ if (IN_RANGE (low_bit, 2, 16 * 16)) ++ { ++ val = gen_int_mode (poly_int64 (low_bit, low_bit), mode); ++ shift = 0; ++ } + else +- low_bit = 1; ++ val = gen_int_mode (BYTES_PER_SVE_VECTOR, mode); + +- val = gen_int_mode (poly_int64 (low_bit * 2, low_bit * 2), mode); + val = aarch64_force_temporary (mode, temp1, val); + ++ /* Prefer to multiply by a positive factor and subtract rather ++ than multiply by a negative factor and add, since positive ++ values are usually easier to move. */ ++ if (rel_factor < 0 && src != const0_rtx) ++ { ++ rel_factor = -rel_factor; ++ code = MINUS; ++ } ++ + if (can_create_pseudo_p ()) + { +- rtx coeff1 = gen_int_mode (factor, mode); ++ rtx coeff1 = gen_int_mode (rel_factor, mode); + val = expand_mult (mode, val, coeff1, NULL_RTX, true, true); + } + else + { +- /* Go back to using a negative multiplication factor if we have +- no register from which to subtract. */ +- if (code == MINUS && src == const0_rtx) +- { +- factor = -factor; +- code = PLUS; +- } +- rtx coeff1 = gen_int_mode (factor, mode); ++ rtx coeff1 = gen_int_mode (rel_factor, mode); + coeff1 = aarch64_force_temporary (mode, temp2, coeff1); + val = gen_rtx_MULT (mode, val, coeff1); + } + } + ++ /* Multiply by 2 ** SHIFT. */ + if (shift > 0) + { +- /* Multiply by 1 << SHIFT. */ + val = aarch64_force_temporary (mode, temp1, val); + val = gen_rtx_ASHIFT (mode, val, GEN_INT (shift)); + } +- else if (shift == -1) ++ else if (shift < 0) + { +- /* Divide by 2. */ + val = aarch64_force_temporary (mode, temp1, val); +- val = gen_rtx_ASHIFTRT (mode, val, const1_rtx); ++ val = gen_rtx_ASHIFTRT (mode, val, GEN_INT (-shift)); + } + +- /* Calculate SRC +/- CNTD * FACTOR / 2. */ ++ /* Add the result to SRC or subtract the result from SRC. */ + if (src != const0_rtx) + { + val = aarch64_force_temporary (mode, temp1, val); +@@ -7045,7 +7112,9 @@ aarch64_expand_mov_immediate (rtx dest, rtx imm) + aarch64_report_sve_required (); + return; + } +- if (base == const0_rtx && aarch64_sve_cnt_immediate_p (offset)) ++ if (base == const0_rtx ++ && (aarch64_sve_cnt_immediate_p (offset) ++ || aarch64_sve_rdvl_immediate_p (offset))) + emit_insn (gen_rtx_SET (dest, imm)); + else + { +@@ -21751,7 +21820,9 @@ aarch64_mov_operand_p (rtx x, machine_mode mode) + if (SYMBOL_REF_P (x) && mode == DImode && CONSTANT_ADDRESS_P (x)) + return true; + +- if (TARGET_SVE && aarch64_sve_cnt_immediate_p (x)) ++ if (TARGET_SVE ++ && (aarch64_sve_cnt_immediate_p (x) ++ || aarch64_sve_rdvl_immediate_p (x))) + return true; + + return aarch64_classify_symbolic_expression (x) +diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md +index 5d02da42f..c0977a3da 100644 +--- a/gcc/config/aarch64/aarch64.md ++++ b/gcc/config/aarch64/aarch64.md +@@ -1207,6 +1207,7 @@ + w, D<hq>; neon_move , simd << aarch64_output_scalar_simd_mov_immediate (operands1, <MODE>mode); + /* The "mov_imm" type for CNT is just a placeholder. */ + r, Usv ; mov_imm , sve << aarch64_output_sve_cnt_immediate ("cnt", "%x0", operands1); ++ r, Usr ; mov_imm , sve << aarch64_output_sve_rdvl (operands1); + r, m ; load_4 , * ldr<size>\t%w0, %1 + w, m ; load_4 , * ldr\t%<size>0, %1 + m, r Z ; store_4 , * str<size>\\t%w1, %0 +@@ -1265,6 +1266,7 @@ + r , n ; mov_imm , * ,16 # + /* The "mov_imm" type for CNT is just a placeholder. */ + r , Usv; mov_imm , sve , 4 << aarch64_output_sve_cnt_immediate ("cnt", "%x0", operands1); ++ r , Usr; mov_imm , sve, 4 << aarch64_output_sve_rdvl (operands1); + r , m ; load_4 , * , 4 ldr\t%w0, %1 + w , m ; load_4 , fp , 4 ldr\t%s0, %1 + m , r Z; store_4 , * , 4 str\t%w1, %0 +@@ -1299,6 +1301,7 @@ + r, n ; mov_imm , * ,16 # + /* The "mov_imm" type for CNT is just a placeholder. */ + r, Usv; mov_imm , sve , 4 << aarch64_output_sve_cnt_immediate ("cnt", "%x0", operands1); ++ r, Usr; mov_imm , sve, 4 << aarch64_output_sve_rdvl (operands1); + r, m ; load_8 , * , 4 ldr\t%x0, %1 + w, m ; load_8 , fp , 4 ldr\t%d0, %1 + m, r Z; store_8 , * , 4 str\t%x1, %0 +diff --git a/gcc/config/aarch64/constraints.md b/gcc/config/aarch64/constraints.md +index 750a42fb1..212a73416 100644 +--- a/gcc/config/aarch64/constraints.md ++++ b/gcc/config/aarch64/constraints.md +@@ -214,6 +214,12 @@ + (and (match_code "const_int") + (match_test "aarch64_high_bits_all_ones_p (ival)"))) + ++(define_constraint "Usr" ++ "@internal ++ A constraint that matches a value produced by RDVL." ++ (and (match_code "const_poly_int") ++ (match_test "aarch64_sve_rdvl_immediate_p (op)"))) ++ + (define_constraint "Usv" + "@internal + A constraint that matches a VG-based constant that can be loaded by +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cntb.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cntb.c +index 8b8fe8e4f..a22d8a28d 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cntb.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cntb.c +@@ -51,19 +51,24 @@ PROTO (cntb_15, uint64_t, ()) { return svcntb () * 15; } + */ + PROTO (cntb_16, uint64_t, ()) { return svcntb () * 16; } + +-/* Other sequences would be OK. */ + /* + ** cntb_17: +-** cntb x0, all, mul #16 +-** incb x0 ++** rdvl x0, #17 + ** ret + */ + PROTO (cntb_17, uint64_t, ()) { return svcntb () * 17; } + ++/* ++** cntb_31: ++** rdvl x0, #31 ++** ret ++*/ ++PROTO (cntb_31, uint64_t, ()) { return svcntb () * 31; } ++ + /* + ** cntb_32: +-** cntd (x0-9+) +-** lsl x0, \1, 8 ++** cntb (x0-9+) ++** lsl x0, \1, 5 + ** ret + */ + PROTO (cntb_32, uint64_t, ()) { return svcntb () * 32; } +@@ -80,16 +85,16 @@ PROTO (cntb_33, uint64_t, ()) { return svcntb () * 33; } + + /* + ** cntb_64: +-** cntd (x0-9+) +-** lsl x0, \1, 9 ++** cntb (x0-9+) ++** lsl x0, \1, 6 + ** ret + */ + PROTO (cntb_64, uint64_t, ()) { return svcntb () * 64; } + + /* + ** cntb_128: +-** cntd (x0-9+) +-** lsl x0, \1, 10 ++** cntb (x0-9+) ++** lsl x0, \1, 7 + ** ret + */ + PROTO (cntb_128, uint64_t, ()) { return svcntb () * 128; } +@@ -106,46 +111,70 @@ PROTO (cntb_129, uint64_t, ()) { return svcntb () * 129; } + + /* + ** cntb_m1: +-** cntb (x0-9+) +-** neg x0, \1 ++** rdvl x0, #-1 + ** ret + */ + PROTO (cntb_m1, uint64_t, ()) { return -svcntb (); } + + /* + ** cntb_m13: +-** cntb (x0-9+), all, mul #13 +-** neg x0, \1 ++** rdvl x0, #-13 + ** ret + */ + PROTO (cntb_m13, uint64_t, ()) { return -svcntb () * 13; } + + /* + ** cntb_m15: +-** cntb (x0-9+), all, mul #15 +-** neg x0, \1 ++** rdvl x0, #-15 + ** ret + */ + PROTO (cntb_m15, uint64_t, ()) { return -svcntb () * 15; } + + /* + ** cntb_m16: +-** cntb (x0-9+), all, mul #16 +-** neg x0, \1 ++** rdvl x0, #-16 + ** ret + */ + PROTO (cntb_m16, uint64_t, ()) { return -svcntb () * 16; } + +-/* Other sequences would be OK. */ + /* + ** cntb_m17: +-** cntb x0, all, mul #16 +-** incb x0 +-** neg x0, x0 ++** rdvl x0, #-17 + ** ret + */ + PROTO (cntb_m17, uint64_t, ()) { return -svcntb () * 17; } + ++/* ++** cntb_m32: ++** rdvl x0, #-32 ++** ret ++*/ ++PROTO (cntb_m32, uint64_t, ()) { return -svcntb () * 32; } ++ ++/* ++** cntb_m33: ++** rdvl x0, #-32 ++** decb x0 ++** ret ++*/ ++PROTO (cntb_m33, uint64_t, ()) { return -svcntb () * 33; } ++ ++/* ++** cntb_m34: ++** rdvl (x0-9+), #-17 ++** lsl x0, \1, #?1 ++** ret ++*/ ++PROTO (cntb_m34, uint64_t, ()) { return -svcntb () * 34; } ++ ++/* ++** cntb_m64: ++** rdvl (x0-9+), #-1 ++** lsl x0, \1, #?6 ++** ret ++*/ ++PROTO (cntb_m64, uint64_t, ()) { return -svcntb () * 64; } ++ + /* + ** incb_1: + ** incb x0 +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cntd.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cntd.c +index 0d0ed4849..090a643b4 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cntd.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cntd.c +@@ -54,8 +54,8 @@ PROTO (cntd_16, uint64_t, ()) { return svcntd () * 16; } + /* Other sequences would be OK. */ + /* + ** cntd_17: +-** cntb x0, all, mul #2 +-** incd x0 ++** rdvl (x0-9+), #17 ++** asr x0, \1, 3 + ** ret + */ + PROTO (cntd_17, uint64_t, ()) { return svcntd () * 17; } +@@ -107,8 +107,7 @@ PROTO (cntd_m15, uint64_t, ()) { return -svcntd () * 15; } + + /* + ** cntd_m16: +-** cntb (x0-9+), all, mul #2 +-** neg x0, \1 ++** rdvl x0, #-2 + ** ret + */ + PROTO (cntd_m16, uint64_t, ()) { return -svcntd () * 16; } +@@ -116,9 +115,8 @@ PROTO (cntd_m16, uint64_t, ()) { return -svcntd () * 16; } + /* Other sequences would be OK. */ + /* + ** cntd_m17: +-** cntb x0, all, mul #2 +-** incd x0 +-** neg x0, x0 ++** rdvl (x0-9+), #-17 ++** asr x0, \1, 3 + ** ret + */ + PROTO (cntd_m17, uint64_t, ()) { return -svcntd () * 17; } +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cnth.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cnth.c +index c29930f15..1a4e7dc0e 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cnth.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cnth.c +@@ -54,8 +54,8 @@ PROTO (cnth_16, uint64_t, ()) { return svcnth () * 16; } + /* Other sequences would be OK. */ + /* + ** cnth_17: +-** cntb x0, all, mul #8 +-** inch x0 ++** rdvl (x0-9+), #17 ++** asr x0, \1, 1 + ** ret + */ + PROTO (cnth_17, uint64_t, ()) { return svcnth () * 17; } +@@ -69,16 +69,16 @@ PROTO (cnth_32, uint64_t, ()) { return svcnth () * 32; } + + /* + ** cnth_64: +-** cntd (x0-9+) +-** lsl x0, \1, 8 ++** cntb (x0-9+) ++** lsl x0, \1, 5 + ** ret + */ + PROTO (cnth_64, uint64_t, ()) { return svcnth () * 64; } + + /* + ** cnth_128: +-** cntd (x0-9+) +-** lsl x0, \1, 9 ++** cntb (x0-9+) ++** lsl x0, \1, 6 + ** ret + */ + PROTO (cnth_128, uint64_t, ()) { return svcnth () * 128; } +@@ -109,8 +109,7 @@ PROTO (cnth_m15, uint64_t, ()) { return -svcnth () * 15; } + + /* + ** cnth_m16: +-** cntb (x0-9+), all, mul #8 +-** neg x0, \1 ++** rdvl x0, #-8 + ** ret + */ + PROTO (cnth_m16, uint64_t, ()) { return -svcnth () * 16; } +@@ -118,9 +117,8 @@ PROTO (cnth_m16, uint64_t, ()) { return -svcnth () * 16; } + /* Other sequences would be OK. */ + /* + ** cnth_m17: +-** cntb x0, all, mul #8 +-** inch x0 +-** neg x0, x0 ++** rdvl (x0-9+), #-17 ++** asr x0, \1, 1 + ** ret + */ + PROTO (cnth_m17, uint64_t, ()) { return -svcnth () * 17; } +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cntw.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cntw.c +index e26cc67a4..9d1697690 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cntw.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cntw.c +@@ -54,8 +54,8 @@ PROTO (cntw_16, uint64_t, ()) { return svcntw () * 16; } + /* Other sequences would be OK. */ + /* + ** cntw_17: +-** cntb x0, all, mul #4 +-** incw x0 ++** rdvl (x0-9+), #17 ++** asr x0, \1, 2 + ** ret + */ + PROTO (cntw_17, uint64_t, ()) { return svcntw () * 17; } +@@ -76,8 +76,8 @@ PROTO (cntw_64, uint64_t, ()) { return svcntw () * 64; } + + /* + ** cntw_128: +-** cntd (x0-9+) +-** lsl x0, \1, 8 ++** cntb (x0-9+) ++** lsl x0, \1, 5 + ** ret + */ + PROTO (cntw_128, uint64_t, ()) { return svcntw () * 128; } +@@ -108,8 +108,7 @@ PROTO (cntw_m15, uint64_t, ()) { return -svcntw () * 15; } + + /* + ** cntw_m16: +-** cntb (x0-9+), all, mul #4 +-** neg x0, \1 ++** rdvl (x0-9+), #-4 + ** ret + */ + PROTO (cntw_m16, uint64_t, ()) { return -svcntw () * 16; } +@@ -117,9 +116,8 @@ PROTO (cntw_m16, uint64_t, ()) { return -svcntw () * 16; } + /* Other sequences would be OK. */ + /* + ** cntw_m17: +-** cntb x0, all, mul #4 +-** incw x0 +-** neg x0, x0 ++** rdvl (x0-9+), #-17 ++** asr x0, \1, 2 + ** ret + */ + PROTO (cntw_m17, uint64_t, ()) { return -svcntw () * 17; } +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/prfb.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/prfb.c +index c90730a03..94cd3a066 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/prfb.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/prfb.c +@@ -218,8 +218,8 @@ TEST_PREFETCH (prfb_vnum_31, uint16_t, + + /* + ** prfb_vnum_32: +-** cntd (x0-9+) +-** lsl (x0-9+), \1, #?8 ++** cntb (x0-9+) ++** lsl (x0-9+), \1, #?5 + ** add (x0-9+), (\2, x0|x0, \2) + ** prfb pldl1keep, p0, \\3\ + ** ret +@@ -240,7 +240,7 @@ TEST_PREFETCH (prfb_vnum_m32, uint16_t, + /* + ** prfb_vnum_m33: + ** ... +-** prfb pldl1keep, p0, \x0-9+\ ++** prfb pldl1keep, p0, \x0-9+(, x0-9+)?\ + ** ret + */ + TEST_PREFETCH (prfb_vnum_m33, uint16_t, +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/prfd.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/prfd.c +index 869ef3d3e..b7a116cf0 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/prfd.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/prfd.c +@@ -218,8 +218,8 @@ TEST_PREFETCH (prfd_vnum_31, uint16_t, + + /* + ** prfd_vnum_32: +-** cntd (x0-9+) +-** lsl (x0-9+), \1, #?8 ++** cntb (x0-9+) ++** lsl (x0-9+), \1, #?5 + ** add (x0-9+), (\2, x0|x0, \2) + ** prfd pldl1keep, p0, \\3\ + ** ret +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/prfh.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/prfh.c +index 45a735eae..9d3df6bd3 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/prfh.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/prfh.c +@@ -218,8 +218,8 @@ TEST_PREFETCH (prfh_vnum_31, uint16_t, + + /* + ** prfh_vnum_32: +-** cntd (x0-9+) +-** lsl (x0-9+), \1, #?8 ++** cntb (x0-9+) ++** lsl (x0-9+), \1, #?5 + ** add (x0-9+), (\2, x0|x0, \2) + ** prfh pldl1keep, p0, \\3\ + ** ret +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/prfw.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/prfw.c +index 444187f45..6962abab6 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/prfw.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/prfw.c +@@ -218,8 +218,8 @@ TEST_PREFETCH (prfw_vnum_31, uint16_t, + + /* + ** prfw_vnum_32: +-** cntd (x0-9+) +-** lsl (x0-9+), \1, #?8 ++** cntb (x0-9+) ++** lsl (x0-9+), \1, #?5 + ** add (x0-9+), (\2, x0|x0, \2) + ** prfw pldl1keep, p0, \\3\ + ** ret +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/loop_add_4.c b/gcc/testsuite/gcc.target/aarch64/sve/loop_add_4.c +index 9ead9c21b..7f02497e8 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/loop_add_4.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/loop_add_4.c +@@ -68,8 +68,7 @@ TEST_ALL (LOOP) + /* { dg-final { scan-assembler-times {\tindex\tz0-9+\.s, w0-9+, w0-9+\n} 3 } } */ + /* { dg-final { scan-assembler-times {\tld1w\tz0-9+\.s, p0-7+/z, \x0-9+, x0-9+, lsl 2\} 8 } } */ + /* { dg-final { scan-assembler-times {\tst1w\tz0-9+\.s, p0-7+, \x0-9+, x0-9+, lsl 2\} 8 } } */ +-/* 2 for the calculations of -17 and 17. */ +-/* { dg-final { scan-assembler-times {\tincw\tx0-9+\n} 10 } } */ ++/* { dg-final { scan-assembler-times {\tincw\tx0-9+\n} 8 } } */ + + /* { dg-final { scan-assembler-times {\tdecw\tz0-9+\.s, all, mul #16\n} 1 } } */ + /* { dg-final { scan-assembler-times {\tdecw\tz0-9+\.s, all, mul #15\n} 1 } } */ +@@ -86,8 +85,7 @@ TEST_ALL (LOOP) + /* { dg-final { scan-assembler-times {\tindex\tz0-9+\.d, x0-9+, x0-9+\n} 3 } } */ + /* { dg-final { scan-assembler-times {\tld1d\tz0-9+\.d, p0-7+/z, \x0-9+, x0-9+, lsl 3\} 8 } } */ + /* { dg-final { scan-assembler-times {\tst1d\tz0-9+\.d, p0-7+, \x0-9+, x0-9+, lsl 3\} 8 } } */ +-/* 2 for the calculations of -17 and 17. */ +-/* { dg-final { scan-assembler-times {\tincd\tx0-9+\n} 10 } } */ ++/* { dg-final { scan-assembler-times {\tincd\tx0-9+\n} 8 } } */ + + /* { dg-final { scan-assembler-times {\tdecd\tz0-9+\.d, all, mul #16\n} 1 } } */ + /* { dg-final { scan-assembler-times {\tdecd\tz0-9+\.d, all, mul #15\n} 1 } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/stack_clash_1.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/stack_clash_1.c +index 110947a6c..5de34fc61 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/pcs/stack_clash_1.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/stack_clash_1.c +@@ -6,8 +6,7 @@ + + /* + ** test_1: +-** cntd x12, all, mul #9 +-** lsl x12, x12, #?4 ++** rdvl x12, #18 + ** mov x11, sp + ** ... + ** sub sp, sp, x12 +-- +2.33.0 +
View file
_service:tar_scm:0162-LoongArch-Set-default-alignment-for-functions-jumps-.patch
Added
@@ -0,0 +1,135 @@ +From 7dff9d3f7fefe074e78cd7ff6529d7c1ea6cc3b1 Mon Sep 17 00:00:00 2001 +From: Lulu Cheng <chenglulu@loongson.cn> +Date: Tue, 2 Apr 2024 14:29:08 +0800 +Subject: PATCH 162/188 LoongArch: Set default alignment for functions jumps + and loops PR112919. + +Xi Ruoyao set the alignment rules under LA464 in commit r14-1839, +but the macro ASM_OUTPUT_ALIGN_WITH_NOP was removed in R14-4674, +which affected the alignment rules. + +So I set different aligns on LA464 and LA664 again to test the +performance of spec2006, and modify the alignment based on the test +results. + +gcc/ChangeLog: + + PR target/112919 + * config/loongarch/loongarch-def.cc (la664_align): Newly defined + function that sets alignment rules under the LA664 microarchitecture. + * config/loongarch/loongarch-opts.cc + (loongarch_target_option_override): If not optimizing for size, set + the default alignment to what the target wants. + * config/loongarch/loongarch-tune.h (struct loongarch_align): Add + new member variables jump and loop. +--- + gcc/config/loongarch/loongarch-def.cc | 11 ++++++++--- + gcc/config/loongarch/loongarch-opts.cc | 19 +++++++++++++------ + gcc/config/loongarch/loongarch-tune.h | 22 +++++++++++++++------- + 3 files changed, 36 insertions(+), 16 deletions(-) + +diff --git a/gcc/config/loongarch/loongarch-def.cc b/gcc/config/loongarch/loongarch-def.cc +index 533dd0af2..a48050c5f 100644 +--- a/gcc/config/loongarch/loongarch-def.cc ++++ b/gcc/config/loongarch/loongarch-def.cc +@@ -81,14 +81,19 @@ array_tune<loongarch_cache> loongarch_cpu_cache = + + static inline loongarch_align la464_align () + { +- return loongarch_align ().function_ ("32").label_ ("16"); ++ return loongarch_align ().function_ ("32").loop_ ("16").jump_ ("16"); ++} ++ ++static inline loongarch_align la664_align () ++{ ++ return loongarch_align ().function_ ("8").loop_ ("8").jump_ ("32"); + } + + array_tune<loongarch_align> loongarch_cpu_align = + array_tune<loongarch_align> () +- .set (CPU_LOONGARCH64, la464_align ()) ++ .set (CPU_LOONGARCH64, la664_align ()) + .set (CPU_LA464, la464_align ()) +- .set (CPU_LA664, la464_align ()); ++ .set (CPU_LA664, la664_align ()); + + /* Default RTX cost initializer. */ + loongarch_rtx_cost_data::loongarch_rtx_cost_data () +diff --git a/gcc/config/loongarch/loongarch-opts.cc b/gcc/config/loongarch/loongarch-opts.cc +index 062d430c2..c455c5e32 100644 +--- a/gcc/config/loongarch/loongarch-opts.cc ++++ b/gcc/config/loongarch/loongarch-opts.cc +@@ -922,13 +922,20 @@ loongarch_target_option_override (struct loongarch_target *target, + { + loongarch_update_gcc_opt_status (target, opts, opts_set); + +- /* alignments */ +- if (opts->x_flag_align_functions && !opts->x_str_align_functions) +- opts->x_str_align_functions +- = loongarch_cpu_aligntarget->cpu_tune.function; ++ /* If not optimizing for size, set the default ++ alignment to what the target wants. */ ++ if (!opts->x_optimize_size) ++ { ++ if (opts->x_flag_align_functions && !opts->x_str_align_functions) ++ opts->x_str_align_functions ++ = loongarch_cpu_aligntarget->cpu_tune.function; ++ ++ if (opts->x_flag_align_loops && !opts->x_str_align_loops) ++ opts->x_str_align_loops = loongarch_cpu_aligntarget->cpu_tune.loop; + +- if (opts->x_flag_align_labels && !opts->x_str_align_labels) +- opts->x_str_align_labels = loongarch_cpu_aligntarget->cpu_tune.label; ++ if (opts->x_flag_align_jumps && !opts->x_str_align_jumps) ++ opts->x_str_align_jumps = loongarch_cpu_aligntarget->cpu_tune.jump; ++ } + + /* Set up parameters to be used in prefetching algorithm. */ + int simultaneous_prefetches +diff --git a/gcc/config/loongarch/loongarch-tune.h b/gcc/config/loongarch/loongarch-tune.h +index 26f163f0a..d286eee0b 100644 +--- a/gcc/config/loongarch/loongarch-tune.h ++++ b/gcc/config/loongarch/loongarch-tune.h +@@ -162,14 +162,16 @@ struct loongarch_cache { + } + }; + +-/* Alignment for functions and labels for best performance. For new uarchs +- the value should be measured via benchmarking. See the documentation for +- -falign-functions and -falign-labels in invoke.texi for the format. */ ++/* Alignment for functions loops and jumps for best performance. For new ++ uarchs the value should be measured via benchmarking. See the ++ documentation for -falign-functions, -falign-loops, and -falign-jumps in ++ invoke.texi for the format. */ + struct loongarch_align { + const char *function; /* default value for -falign-functions */ +- const char *label; /* default value for -falign-labels */ ++ const char *loop; /* default value for -falign-loops */ ++ const char *jump; /* default value for -falign-jumps */ + +- loongarch_align () : function (nullptr), label (nullptr) {} ++ loongarch_align () : function (nullptr), loop (nullptr), jump (nullptr) {} + + loongarch_align function_ (const char *_function) + { +@@ -177,9 +179,15 @@ struct loongarch_align { + return *this; + } + +- loongarch_align label_ (const char *_label) ++ loongarch_align loop_ (const char *_loop) + { +- label = _label; ++ loop = _loop; ++ return *this; ++ } ++ ++ loongarch_align jump_ (const char *_jump) ++ { ++ jump = _jump; + return *this; + } + }; +-- +2.43.0 +
View file
_service:tar_scm:0163-Backport-SME-aarch64-Make-AARCH64_FL_SVE-requirement.patch
Added
@@ -0,0 +1,137 @@ +From c0badff223a1f5ea5a0f75df72f5d0138d94d8e6 Mon Sep 17 00:00:00 2001 +From: Richard Sandiford <richard.sandiford@arm.com> +Date: Tue, 5 Dec 2023 10:11:19 +0000 +Subject: PATCH 064/157 BackportSME aarch64: Make AARCH64_FL_SVE + requirements explicit + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=dd7aaef62a43efe52eece525eea4d7d252b0c148 + +So far, all intrinsics covered by the aarch64-sve-builtins* +framework have (naturally enough) required at least SVE. +However, arm_sme.h defines a couple of intrinsics that can +be called by any code. It's therefore necessary to make +the implicit SVE requirement explicit. + +gcc/ + * config/aarch64/aarch64-sve-builtins.cc (function_groups): Remove + implied requirement on SVE. + * config/aarch64/aarch64-sve-builtins-base.def: Explicitly require SVE. + * config/aarch64/aarch64-sve-builtins-sve2.def: Likewise. +--- + .../aarch64/aarch64-sve-builtins-base.def | 10 +++++----- + .../aarch64/aarch64-sve-builtins-sve2.def | 18 +++++++++++++----- + gcc/config/aarch64/aarch64-sve-builtins.cc | 2 +- + 3 files changed, 19 insertions(+), 11 deletions(-) + +diff --git a/gcc/config/aarch64/aarch64-sve-builtins-base.def b/gcc/config/aarch64/aarch64-sve-builtins-base.def +index ffdf7cb4c..3a58f76c3 100644 +--- a/gcc/config/aarch64/aarch64-sve-builtins-base.def ++++ b/gcc/config/aarch64/aarch64-sve-builtins-base.def +@@ -17,7 +17,7 @@ + along with GCC; see the file COPYING3. If not see + <http://www.gnu.org/licenses/>. */ + +-#define REQUIRED_EXTENSIONS 0 ++#define REQUIRED_EXTENSIONS AARCH64_FL_SVE + DEF_SVE_FUNCTION (svabd, binary_opt_n, all_arith, mxz) + DEF_SVE_FUNCTION (svabs, unary, all_float_and_signed, mxz) + DEF_SVE_FUNCTION (svacge, compare_opt_n, all_float, implicit) +@@ -318,7 +318,7 @@ DEF_SVE_FUNCTION (svzip2, binary, all_data, none) + DEF_SVE_FUNCTION (svzip2, binary_pred, all_pred, none) + #undef REQUIRED_EXTENSIONS + +-#define REQUIRED_EXTENSIONS AARCH64_FL_BF16 ++#define REQUIRED_EXTENSIONS AARCH64_FL_SVE | AARCH64_FL_BF16 + DEF_SVE_FUNCTION (svbfdot, ternary_bfloat_opt_n, s_float, none) + DEF_SVE_FUNCTION (svbfdot_lane, ternary_bfloat_lanex2, s_float, none) + DEF_SVE_FUNCTION (svbfmlalb, ternary_bfloat_opt_n, s_float, none) +@@ -330,7 +330,7 @@ DEF_SVE_FUNCTION (svcvt, unary_convert, cvt_bfloat, mxz) + DEF_SVE_FUNCTION (svcvtnt, unary_convert_narrowt, cvt_bfloat, mx) + #undef REQUIRED_EXTENSIONS + +-#define REQUIRED_EXTENSIONS AARCH64_FL_I8MM ++#define REQUIRED_EXTENSIONS AARCH64_FL_SVE | AARCH64_FL_I8MM + DEF_SVE_FUNCTION (svmmla, mmla, s_integer, none) + DEF_SVE_FUNCTION (svusmmla, ternary_uintq_intq, s_signed, none) + DEF_SVE_FUNCTION (svsudot, ternary_intq_uintq_opt_n, s_signed, none) +@@ -339,11 +339,11 @@ DEF_SVE_FUNCTION (svusdot, ternary_uintq_intq_opt_n, s_signed, none) + DEF_SVE_FUNCTION (svusdot_lane, ternary_uintq_intq_lane, s_signed, none) + #undef REQUIRED_EXTENSIONS + +-#define REQUIRED_EXTENSIONS AARCH64_FL_F32MM ++#define REQUIRED_EXTENSIONS AARCH64_FL_SVE | AARCH64_FL_F32MM + DEF_SVE_FUNCTION (svmmla, mmla, s_float, none) + #undef REQUIRED_EXTENSIONS + +-#define REQUIRED_EXTENSIONS AARCH64_FL_F64MM ++#define REQUIRED_EXTENSIONS AARCH64_FL_SVE | AARCH64_FL_F64MM + DEF_SVE_FUNCTION (svld1ro, load_replicate, all_data, implicit) + DEF_SVE_FUNCTION (svmmla, mmla, d_float, none) + DEF_SVE_FUNCTION (svtrn1q, binary, all_data, none) +diff --git a/gcc/config/aarch64/aarch64-sve-builtins-sve2.def b/gcc/config/aarch64/aarch64-sve-builtins-sve2.def +index 635089ffc..d5f23a887 100644 +--- a/gcc/config/aarch64/aarch64-sve-builtins-sve2.def ++++ b/gcc/config/aarch64/aarch64-sve-builtins-sve2.def +@@ -17,7 +17,7 @@ + along with GCC; see the file COPYING3. If not see + <http://www.gnu.org/licenses/>. */ + +-#define REQUIRED_EXTENSIONS AARCH64_FL_SVE2 ++#define REQUIRED_EXTENSIONS AARCH64_FL_SVE | AARCH64_FL_SVE2 + DEF_SVE_FUNCTION (svaba, ternary_opt_n, all_integer, none) + DEF_SVE_FUNCTION (svabalb, ternary_long_opt_n, hsd_integer, none) + DEF_SVE_FUNCTION (svabalt, ternary_long_opt_n, hsd_integer, none) +@@ -189,7 +189,9 @@ DEF_SVE_FUNCTION (svwhilewr, compare_ptr, all_data, none) + DEF_SVE_FUNCTION (svxar, ternary_shift_right_imm, all_integer, none) + #undef REQUIRED_EXTENSIONS + +-#define REQUIRED_EXTENSIONS (AARCH64_FL_SVE2 | AARCH64_FL_SVE2_AES) ++#define REQUIRED_EXTENSIONS (AARCH64_FL_SVE \ ++ | AARCH64_FL_SVE2 \ ++ | AARCH64_FL_SVE2_AES) + DEF_SVE_FUNCTION (svaesd, binary, b_unsigned, none) + DEF_SVE_FUNCTION (svaese, binary, b_unsigned, none) + DEF_SVE_FUNCTION (svaesmc, unary, b_unsigned, none) +@@ -198,17 +200,23 @@ DEF_SVE_FUNCTION (svpmullb_pair, binary_opt_n, d_unsigned, none) + DEF_SVE_FUNCTION (svpmullt_pair, binary_opt_n, d_unsigned, none) + #undef REQUIRED_EXTENSIONS + +-#define REQUIRED_EXTENSIONS (AARCH64_FL_SVE2 | AARCH64_FL_SVE2_BITPERM) ++#define REQUIRED_EXTENSIONS (AARCH64_FL_SVE \ ++ | AARCH64_FL_SVE2 \ ++ | AARCH64_FL_SVE2_BITPERM) + DEF_SVE_FUNCTION (svbdep, binary_opt_n, all_unsigned, none) + DEF_SVE_FUNCTION (svbext, binary_opt_n, all_unsigned, none) + DEF_SVE_FUNCTION (svbgrp, binary_opt_n, all_unsigned, none) + #undef REQUIRED_EXTENSIONS + +-#define REQUIRED_EXTENSIONS (AARCH64_FL_SVE2 | AARCH64_FL_SVE2_SHA3) ++#define REQUIRED_EXTENSIONS (AARCH64_FL_SVE \ ++ | AARCH64_FL_SVE2 \ ++ | AARCH64_FL_SVE2_SHA3) + DEF_SVE_FUNCTION (svrax1, binary, d_integer, none) + #undef REQUIRED_EXTENSIONS + +-#define REQUIRED_EXTENSIONS (AARCH64_FL_SVE2 | AARCH64_FL_SVE2_SM4) ++#define REQUIRED_EXTENSIONS (AARCH64_FL_SVE \ ++ | AARCH64_FL_SVE2 \ ++ | AARCH64_FL_SVE2_SM4) + DEF_SVE_FUNCTION (svsm4e, binary, s_unsigned, none) + DEF_SVE_FUNCTION (svsm4ekey, binary, s_unsigned, none) + #undef REQUIRED_EXTENSIONS +diff --git a/gcc/config/aarch64/aarch64-sve-builtins.cc b/gcc/config/aarch64/aarch64-sve-builtins.cc +index 7924cdf0f..dde01f676 100644 +--- a/gcc/config/aarch64/aarch64-sve-builtins.cc ++++ b/gcc/config/aarch64/aarch64-sve-builtins.cc +@@ -525,7 +525,7 @@ static const predication_index preds_z = { PRED_z, NUM_PREDS }; + static CONSTEXPR const function_group_info function_groups = { + #define DEF_SVE_FUNCTION(NAME, SHAPE, TYPES, PREDS) \ + { #NAME, &functions::NAME, &shapes::SHAPE, types_##TYPES, preds_##PREDS, \ +- REQUIRED_EXTENSIONS | AARCH64_FL_SVE }, ++ REQUIRED_EXTENSIONS }, + #include "aarch64-sve-builtins.def" + }; + +-- +2.33.0 +
View file
_service:tar_scm:0163-LoongArch-Enable-switchable-target.patch
Added
@@ -0,0 +1,281 @@ +From 427d5f10951435241d883a13557f862683046ddd Mon Sep 17 00:00:00 2001 +From: Yang Yujie <yangyujie@loongson.cn> +Date: Mon, 8 Apr 2024 16:45:13 +0800 +Subject: PATCH 163/188 LoongArch: Enable switchable target + +This patch fixes the back-end context switching in cases where functions +should be built with their own target contexts instead of the +global one, such as LTO linking and functions with target attributes (TBD). + + PR target/113233 + +gcc/ChangeLog: + + * config/loongarch/loongarch.cc (loongarch_reg_init): + Reinitialize the loongarch_regno_mode_ok cache. + (loongarch_option_override): Same. + (loongarch_save_restore_target_globals): Restore target globals. + (loongarch_set_current_function): Restore the target contexts + for functions. + (TARGET_SET_CURRENT_FUNCTION): Define. + * config/loongarch/loongarch.h (SWITCHABLE_TARGET): Enable + switchable target context. + * config/loongarch/loongarch-builtins.cc (loongarch_init_builtins): + Initialize all builtin functions at startup. + (loongarch_expand_builtin): Turn assertion of builtin availability + into a test. + +gcc/testsuite/ChangeLog: + + * lib/target-supports.exp: Define condition loongarch_sx_as. + * gcc.dg/lto/pr113233_0.c: New test. +--- + gcc/config/loongarch/loongarch-builtins.cc | 25 +++--- + gcc/config/loongarch/loongarch.cc | 91 ++++++++++++++++++++-- + gcc/config/loongarch/loongarch.h | 2 + + gcc/testsuite/gcc.dg/lto/pr113233_0.c | 14 ++++ + gcc/testsuite/lib/target-supports.exp | 12 +++ + 5 files changed, 127 insertions(+), 17 deletions(-) + create mode 100644 gcc/testsuite/gcc.dg/lto/pr113233_0.c + +diff --git a/gcc/config/loongarch/loongarch-builtins.cc b/gcc/config/loongarch/loongarch-builtins.cc +index e3b4dbc52..51abba007 100644 +--- a/gcc/config/loongarch/loongarch-builtins.cc ++++ b/gcc/config/loongarch/loongarch-builtins.cc +@@ -2507,14 +2507,11 @@ loongarch_init_builtins (void) + for (i = 0; i < ARRAY_SIZE (loongarch_builtins); i++) + { + d = &loongarch_builtinsi; +- if (d->avail ()) +- { +- type = loongarch_build_function_type (d->function_type); +- loongarch_builtin_declsi +- = add_builtin_function (d->name, type, i, BUILT_IN_MD, NULL, +- NULL); +- loongarch_get_builtin_decl_indexd->icode = i; +- } ++ type = loongarch_build_function_type (d->function_type); ++ loongarch_builtin_declsi ++ = add_builtin_function (d->name, type, i, BUILT_IN_MD, NULL, ++ NULL); ++ loongarch_get_builtin_decl_indexd->icode = i; + } + } + +@@ -3100,15 +3097,21 @@ loongarch_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED, + int ignore ATTRIBUTE_UNUSED) + { + tree fndecl; +- unsigned int fcode, avail; ++ unsigned int fcode; + const struct loongarch_builtin_description *d; + + fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0); + fcode = DECL_MD_FUNCTION_CODE (fndecl); + gcc_assert (fcode < ARRAY_SIZE (loongarch_builtins)); + d = &loongarch_builtinsfcode; +- avail = d->avail (); +- gcc_assert (avail != 0); ++ ++ if (!d->avail ()) ++ { ++ error_at (EXPR_LOCATION (exp), ++ "built-in function %qD is not enabled", fndecl); ++ return target; ++ } ++ + switch (d->builtin_type) + { + case LARCH_BUILTIN_DIRECT: +diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc +index 8d8a50b70..50ab6a82a 100644 +--- a/gcc/config/loongarch/loongarch.cc ++++ b/gcc/config/loongarch/loongarch.cc +@@ -7567,15 +7567,19 @@ loongarch_global_init (void) + loongarch_dwarf_regnoi = INVALID_REGNUM; + } + ++ /* Function to allocate machine-dependent function status. */ ++ init_machine_status = &loongarch_init_machine_status; ++}; ++ ++static void ++loongarch_reg_init (void) ++{ + /* Set up loongarch_hard_regno_mode_ok. */ + for (int mode = 0; mode < MAX_MACHINE_MODE; mode++) + for (int regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) + loongarch_hard_regno_mode_ok_pmoderegno + = loongarch_hard_regno_mode_ok_uncached (regno, (machine_mode) mode); +- +- /* Function to allocate machine-dependent function status. */ +- init_machine_status = &loongarch_init_machine_status; +-}; ++} + + static void + loongarch_option_override_internal (struct loongarch_target *target, +@@ -7602,20 +7606,92 @@ loongarch_option_override_internal (struct loongarch_target *target, + + /* Override some options according to the resolved target. */ + loongarch_target_option_override (target, opts, opts_set); ++ ++ target_option_default_node = target_option_current_node ++ = build_target_option_node (opts, opts_set); ++ ++ loongarch_reg_init (); ++} ++ ++/* Remember the last target of loongarch_set_current_function. */ ++ ++static GTY(()) tree loongarch_previous_fndecl; ++ ++/* Restore or save the TREE_TARGET_GLOBALS from or to new_tree. ++ Used by loongarch_set_current_function to ++ make sure optab availability predicates are recomputed when necessary. */ ++ ++static void ++loongarch_save_restore_target_globals (tree new_tree) ++{ ++ if (TREE_TARGET_GLOBALS (new_tree)) ++ restore_target_globals (TREE_TARGET_GLOBALS (new_tree)); ++ else if (new_tree == target_option_default_node) ++ restore_target_globals (&default_target_globals); ++ else ++ TREE_TARGET_GLOBALS (new_tree) = save_target_globals_default_opts (); ++} ++ ++/* Implement TARGET_SET_CURRENT_FUNCTION. */ ++ ++static void ++loongarch_set_current_function (tree fndecl) ++{ ++ if (fndecl == loongarch_previous_fndecl) ++ return; ++ ++ tree old_tree; ++ if (loongarch_previous_fndecl == NULL_TREE) ++ old_tree = target_option_current_node; ++ else if (DECL_FUNCTION_SPECIFIC_TARGET (loongarch_previous_fndecl)) ++ old_tree = DECL_FUNCTION_SPECIFIC_TARGET (loongarch_previous_fndecl); ++ else ++ old_tree = target_option_default_node; ++ ++ if (fndecl == NULL_TREE) ++ { ++ if (old_tree != target_option_current_node) ++ { ++ loongarch_previous_fndecl = NULL_TREE; ++ cl_target_option_restore (&global_options, &global_options_set, ++ TREE_TARGET_OPTION ++ (target_option_current_node)); ++ } ++ return; ++ } ++ ++ tree new_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl); ++ if (new_tree == NULL_TREE) ++ new_tree = target_option_default_node; ++ ++ loongarch_previous_fndecl = fndecl; ++ ++ if (new_tree == old_tree) ++ return; ++ ++ cl_target_option_restore (&global_options, &global_options_set, ++ TREE_TARGET_OPTION (new_tree)); ++ ++ loongarch_reg_init (); ++ ++ loongarch_save_restore_target_globals (new_tree); + } + ++ ++ + /* Implement TARGET_OPTION_OVERRIDE. */ + + static void + loongarch_option_override (void) + { ++ /* Global initializations. */ ++ loongarch_global_init (); ++ + /* Setting up the target configuration. */ + loongarch_option_override_internal (&la_target, + &global_options, + &global_options_set); + +- /* Global initializations. */ +- loongarch_global_init (); + } + + /* Implement TARGET_OPTION_SAVE. */ +@@ -10931,6 +11007,9 @@ loongarch_asm_code_end (void) + #undef TARGET_OPTION_RESTORE + #define TARGET_OPTION_RESTORE loongarch_option_restore + ++#undef TARGET_SET_CURRENT_FUNCTION ++#define TARGET_SET_CURRENT_FUNCTION loongarch_set_current_function ++ + #undef TARGET_LEGITIMIZE_ADDRESS + #define TARGET_LEGITIMIZE_ADDRESS loongarch_legitimize_address + +diff --git a/gcc/config/loongarch/loongarch.h b/gcc/config/loongarch/loongarch.h +index 221e8b286..089206605 100644 +--- a/gcc/config/loongarch/loongarch.h ++++ b/gcc/config/loongarch/loongarch.h +@@ -23,6 +23,8 @@ along with GCC; see the file COPYING3. If not see + + #include "config/loongarch/loongarch-opts.h" + ++#define SWITCHABLE_TARGET 1 ++ + #define TARGET_SUPPORTS_WIDE_INT 1 + + /* Macros to silence warnings about numbers being signed in traditional +diff --git a/gcc/testsuite/gcc.dg/lto/pr113233_0.c b/gcc/testsuite/gcc.dg/lto/pr113233_0.c +new file mode 100644 +index 000000000..0a045c519 +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/lto/pr113233_0.c +@@ -0,0 +1,14 @@ ++/* { dg-require-effective-target loongarch_sx_as } */ ++/* { dg-lto-do link } */ ++/* { dg-skip-if "" { ! { loongarch*-linux-* } } } */ ++/* { dg-lto-options { {-mlsx } } } */ ++/* { dg-suppress-ld-options { -mlsx } } */ ++ ++#include <lsxintrin.h> ++ ++int main (void) ++{ ++ __m128i a, b, c; ++ c = __lsx_vand_v (a, b); ++ return 0; ++} +diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp +index 20fbd43ee..b673c92b5 100644 +--- a/gcc/testsuite/lib/target-supports.exp ++++ b/gcc/testsuite/lib/target-supports.exp +@@ -10549,6 +10549,18 @@ proc check_vect_support_and_set_flags { } { + return 1 + } + ++proc check_effective_target_loongarch_sx_as { } { ++ return check_no_compiler_messages loongarch_sx_as object { ++ #include <lsxintrin.h> ++ int main (void) ++ { ++ __m128i a, b, c; ++ c = __lsx_vand_v (a, b); ++ return 0; ++ } ++ } "-mlsx" ++} ++ + proc check_effective_target_loongarch_sx_hw { } { + return check_runtime loongarch_sx_hw { + #include <lsxintrin.h> +-- +2.43.0 +
View file
_service:tar_scm:0164-Backport-SME-aarch64-Add-group-suffixes-to-SVE-intri.patch
Added
@@ -0,0 +1,562 @@ +From e99332e15895156632949f3b6c3080fc9d994b13 Mon Sep 17 00:00:00 2001 +From: Richard Sandiford <richard.sandiford@arm.com> +Date: Tue, 5 Dec 2023 10:11:19 +0000 +Subject: PATCH 065/157 BackportSME aarch64: Add group suffixes to SVE + intrinsics + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=7b607f197967e052d7d7e29f6b41eded18f8c65d + +The SME2 ACLE adds a new "group" suffix component to the naming +convention for SVE intrinsics. This is also used in the new tuple +forms of the svreinterpret intrinsics. + +This patch adds support for group suffixes and defines the +x2, x3 and x4 suffixes that are needed for the svreinterprets. + +gcc/ + * config/aarch64/aarch64-sve-builtins-shapes.cc (build_one): Take + a group suffix index parameter. + (build_32_64, build_all): Update accordingly. Iterate over all + group suffixes. + * config/aarch64/aarch64-sve-builtins-sve2.cc (svqrshl_impl::fold) + (svqshl_impl::fold, svrshl_impl::fold): Update function_instance + constructors. + * config/aarch64/aarch64-sve-builtins.cc (group_suffixes): New array. + (groups_none): New constant. + (function_groups): Initialize the groups field. + (function_instance::hash): Hash the group index. + (function_builder::get_name): Add the group suffix. + (function_builder::add_overloaded_functions): Iterate over all + group suffixes. + (function_resolver::lookup_form): Take a group suffix parameter. + (function_resolver::resolve_to): Likewise. + * config/aarch64/aarch64-sve-builtins.def (DEF_SVE_GROUP_SUFFIX): New + macro. + (x2, x3, x4): New group suffixes. + * config/aarch64/aarch64-sve-builtins.h (group_suffix_index): New enum. + (group_suffix_info): New structure. + (function_group_info::groups): New member variable. + (function_instance::group_suffix_id): Likewise. + (group_suffixes): New array. + (function_instance::operator==): Compare the group suffixes. + (function_instance::group_suffix): New function. +--- + .../aarch64/aarch64-sve-builtins-shapes.cc | 53 ++++++------ + .../aarch64/aarch64-sve-builtins-sve2.cc | 10 +-- + gcc/config/aarch64/aarch64-sve-builtins.cc | 84 +++++++++++++------ + gcc/config/aarch64/aarch64-sve-builtins.def | 9 ++ + gcc/config/aarch64/aarch64-sve-builtins.h | 81 ++++++++++++++---- + 5 files changed, 165 insertions(+), 72 deletions(-) + +diff --git a/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc b/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc +index 4fa4181b9..3ecef026c 100644 +--- a/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc ++++ b/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc +@@ -275,18 +275,20 @@ parse_signature (const function_instance &instance, const char *format, + } + + /* Add one function instance for GROUP, using mode suffix MODE_SUFFIX_ID, +- the type suffixes at index TI and the predication suffix at index PI. +- The other arguments are as for build_all. */ ++ the type suffixes at index TI, the group suffixes at index GI, and the ++ predication suffix at index PI. The other arguments are as for ++ build_all. */ + static void + build_one (function_builder &b, const char *signature, + const function_group_info &group, mode_suffix_index mode_suffix_id, +- unsigned int ti, unsigned int pi, bool force_direct_overloads) ++ unsigned int ti, unsigned int gi, unsigned int pi, ++ bool force_direct_overloads) + { + /* Byte forms of svdupq take 16 arguments. */ + auto_vec<tree, 16> argument_types; + function_instance instance (group.base_name, *group.base, *group.shape, + mode_suffix_id, group.typesti, +- group.predspi); ++ group.groupsgi, group.predspi); + tree return_type = parse_signature (instance, signature, argument_types); + apply_predication (instance, return_type, argument_types); + b.add_unique_function (instance, return_type, argument_types, +@@ -312,24 +314,26 @@ build_32_64 (function_builder &b, const char *signature, + mode_suffix_index mode64, bool force_direct_overloads = false) + { + for (unsigned int pi = 0; group.predspi != NUM_PREDS; ++pi) +- if (group.types00 == NUM_TYPE_SUFFIXES) +- { +- gcc_assert (mode32 != MODE_none && mode64 != MODE_none); +- build_one (b, signature, group, mode32, 0, pi, +- force_direct_overloads); +- build_one (b, signature, group, mode64, 0, pi, +- force_direct_overloads); +- } +- else +- for (unsigned int ti = 0; group.typesti0 != NUM_TYPE_SUFFIXES; ++ti) ++ for (unsigned int gi = 0; group.groupsgi != NUM_GROUP_SUFFIXES; ++gi) ++ if (group.types00 == NUM_TYPE_SUFFIXES) + { +- unsigned int bits = type_suffixesgroup.typesti0.element_bits; +- gcc_assert (bits == 32 || bits == 64); +- mode_suffix_index mode = bits == 32 ? mode32 : mode64; +- if (mode != MODE_none) +- build_one (b, signature, group, mode, ti, pi, +- force_direct_overloads); ++ gcc_assert (mode32 != MODE_none && mode64 != MODE_none); ++ build_one (b, signature, group, mode32, 0, gi, pi, ++ force_direct_overloads); ++ build_one (b, signature, group, mode64, 0, gi, pi, ++ force_direct_overloads); + } ++ else ++ for (unsigned int ti = 0; group.typesti0 != NUM_TYPE_SUFFIXES; ++ ++ti) ++ { ++ unsigned int bits = type_suffixesgroup.typesti0.element_bits; ++ gcc_assert (bits == 32 || bits == 64); ++ mode_suffix_index mode = bits == 32 ? mode32 : mode64; ++ if (mode != MODE_none) ++ build_one (b, signature, group, mode, ti, gi, pi, ++ force_direct_overloads); ++ } + } + + /* For every type and predicate combination in GROUP, add one function +@@ -423,10 +427,11 @@ build_all (function_builder &b, const char *signature, + bool force_direct_overloads = false) + { + for (unsigned int pi = 0; group.predspi != NUM_PREDS; ++pi) +- for (unsigned int ti = 0; +- ti == 0 || group.typesti0 != NUM_TYPE_SUFFIXES; ++ti) +- build_one (b, signature, group, mode_suffix_id, ti, pi, +- force_direct_overloads); ++ for (unsigned int gi = 0; group.groupsgi != NUM_GROUP_SUFFIXES; ++gi) ++ for (unsigned int ti = 0; ++ ti == 0 || group.typesti0 != NUM_TYPE_SUFFIXES; ++ti) ++ build_one (b, signature, group, mode_suffix_id, ti, gi, pi, ++ force_direct_overloads); + } + + /* TYPE is the largest type suffix associated with the arguments of R, +diff --git a/gcc/config/aarch64/aarch64-sve-builtins-sve2.cc b/gcc/config/aarch64/aarch64-sve-builtins-sve2.cc +index e066f096d..a94e5e269 100644 +--- a/gcc/config/aarch64/aarch64-sve-builtins-sve2.cc ++++ b/gcc/config/aarch64/aarch64-sve-builtins-sve2.cc +@@ -252,7 +252,7 @@ public: + that we can use for sensible shift amounts. */ + function_instance instance ("svqshl", functions::svqshl, + shapes::binary_int_opt_n, MODE_n, +- f.type_suffix_ids, f.pred); ++ f.type_suffix_ids, GROUP_none, f.pred); + return f.redirect_call (instance); + } + else +@@ -261,7 +261,7 @@ public: + that we can use for sensible shift amounts. */ + function_instance instance ("svrshl", functions::svrshl, + shapes::binary_int_opt_n, MODE_n, +- f.type_suffix_ids, f.pred); ++ f.type_suffix_ids, GROUP_none, f.pred); + return f.redirect_call (instance); + } + } +@@ -290,7 +290,7 @@ public: + -wi::to_wide (amount)); + function_instance instance ("svasr", functions::svasr, + shapes::binary_uint_opt_n, MODE_n, +- f.type_suffix_ids, f.pred); ++ f.type_suffix_ids, GROUP_none, f.pred); + if (f.type_suffix (0).unsigned_p) + { + instance.base_name = "svlsr"; +@@ -322,7 +322,7 @@ public: + that we can use for sensible shift amounts. */ + function_instance instance ("svlsl", functions::svlsl, + shapes::binary_uint_opt_n, MODE_n, +- f.type_suffix_ids, f.pred); ++ f.type_suffix_ids, GROUP_none, f.pred); + gcall *call = as_a <gcall *> (f.redirect_call (instance)); + gimple_call_set_arg (call, 2, amount); + return call; +@@ -335,7 +335,7 @@ public: + -wi::to_wide (amount)); + function_instance instance ("svrshr", functions::svrshr, + shapes::shift_right_imm, MODE_n, +- f.type_suffix_ids, f.pred); ++ f.type_suffix_ids, GROUP_none, f.pred); + gcall *call = as_a <gcall *> (f.redirect_call (instance)); + gimple_call_set_arg (call, 2, amount); + return call; +diff --git a/gcc/config/aarch64/aarch64-sve-builtins.cc b/gcc/config/aarch64/aarch64-sve-builtins.cc +index dde01f676..dc3fd80da 100644 +--- a/gcc/config/aarch64/aarch64-sve-builtins.cc ++++ b/gcc/config/aarch64/aarch64-sve-builtins.cc +@@ -144,6 +144,13 @@ CONSTEXPR const type_suffix_info type_suffixesNUM_TYPE_SUFFIXES + 1 = { + 0, VOIDmode } + }; + ++CONSTEXPR const group_suffix_info group_suffixes = { ++#define DEF_SVE_GROUP_SUFFIX(NAME, VG, VECTORS_PER_TUPLE) \ ++ { "_" #NAME, VG, VECTORS_PER_TUPLE }, ++#include "aarch64-sve-builtins.def" ++ { "", 0, 1 } ++}; ++ + /* Define a TYPES_<combination> macro for each combination of type + suffixes that an ACLE function can have, where <combination> is the + name used in DEF_SVE_FUNCTION entries. +@@ -483,6 +490,10 @@ DEF_SVE_TYPES_ARRAY (inc_dec_n); + DEF_SVE_TYPES_ARRAY (reinterpret); + DEF_SVE_TYPES_ARRAY (while); + ++static const group_suffix_index groups_none = { ++ GROUP_none, NUM_GROUP_SUFFIXES ++}; ++ + /* Used by functions that have no governing predicate. */ + static const predication_index preds_none = { PRED_none, NUM_PREDS }; + +@@ -524,8 +535,8 @@ static const predication_index preds_z = { PRED_z, NUM_PREDS }; + /* A list of all SVE ACLE functions. */ + static CONSTEXPR const function_group_info function_groups = { + #define DEF_SVE_FUNCTION(NAME, SHAPE, TYPES, PREDS) \ +- { #NAME, &functions::NAME, &shapes::SHAPE, types_##TYPES, preds_##PREDS, \ +- REQUIRED_EXTENSIONS }, ++ { #NAME, &functions::NAME, &shapes::SHAPE, types_##TYPES, groups_none, \ ++ preds_##PREDS, REQUIRED_EXTENSIONS }, + #include "aarch64-sve-builtins.def" + }; + +@@ -788,6 +799,7 @@ function_instance::hash () const + h.add_int (mode_suffix_id); + h.add_int (type_suffix_ids0); + h.add_int (type_suffix_ids1); ++ h.add_int (group_suffix_id); + h.add_int (pred); + return h.end (); + } +@@ -957,6 +969,8 @@ function_builder::get_name (const function_instance &instance, + for (unsigned int i = 0; i < 2; ++i) + if (!overloaded_p || instance.shape->explicit_type_suffix_p (i)) + append_name (instance.type_suffix (i).string); ++ if (!overloaded_p || instance.shape->explicit_group_suffix_p ()) ++ append_name (instance.group_suffix ().string); + append_name (pred_suffixesinstance.pred); + return finish_name (); + } +@@ -1113,19 +1127,26 @@ void + function_builder::add_overloaded_functions (const function_group_info &group, + mode_suffix_index mode) + { +- unsigned int explicit_type0 = (*group.shape)->explicit_type_suffix_p (0); +- unsigned int explicit_type1 = (*group.shape)->explicit_type_suffix_p (1); +- for (unsigned int pi = 0; group.predspi != NUM_PREDS; ++pi) ++ bool explicit_type0 = (*group.shape)->explicit_type_suffix_p (0); ++ bool explicit_type1 = (*group.shape)->explicit_type_suffix_p (1); ++ bool explicit_group = (*group.shape)->explicit_group_suffix_p (); ++ auto add_function = &(const type_suffix_pair &types, ++ group_suffix_index group_suffix_id, ++ unsigned int pi) ++ { ++ function_instance instance (group.base_name, *group.base, ++ *group.shape, mode, types, ++ group_suffix_id, group.predspi); ++ add_overloaded_function (instance, group.required_extensions); ++ }; ++ ++ auto add_group_suffix = &(group_suffix_index group_suffix_id, ++ unsigned int pi) + { + if (!explicit_type0 && !explicit_type1) +- { +- /* Deal with the common case in which there is one overloaded +- function for all type combinations. */ +- function_instance instance (group.base_name, *group.base, +- *group.shape, mode, types_none0, +- group.predspi); +- add_overloaded_function (instance, group.required_extensions); +- } ++ /* Deal with the common case in which there is one overloaded ++ function for all type combinations. */ ++ add_function (types_none0, group_suffix_id, pi); + else + for (unsigned int ti = 0; group.typesti0 != NUM_TYPE_SUFFIXES; + ++ti) +@@ -1136,12 +1157,16 @@ function_builder::add_overloaded_functions (const function_group_info &group, + explicit_type0 ? group.typesti0 : NUM_TYPE_SUFFIXES, + explicit_type1 ? group.typesti1 : NUM_TYPE_SUFFIXES + }; +- function_instance instance (group.base_name, *group.base, +- *group.shape, mode, types, +- group.predspi); +- add_overloaded_function (instance, group.required_extensions); ++ add_function (types, group_suffix_id, pi); + } +- } ++ }; ++ ++ for (unsigned int pi = 0; group.predspi != NUM_PREDS; ++pi) ++ if (explicit_group) ++ for (unsigned int gi = 0; group.groupsgi != NUM_GROUP_SUFFIXES; ++gi) ++ add_group_suffix (group.groupsgi, pi); ++ else ++ add_group_suffix (GROUP_none, pi); + } + + /* Register all the functions in GROUP. */ +@@ -1213,29 +1238,34 @@ function_resolver::report_no_such_form (type_suffix_index type) + } + + /* Silently check whether there is an instance of the function with the +- mode suffix given by MODE and the type suffixes given by TYPE0 and TYPE1. +- Return its function decl if so, otherwise return null. */ ++ mode suffix given by MODE, the type suffixes given by TYPE0 and TYPE1, ++ and the group suffix given by GROUP. Return its function decl if so, ++ otherwise return null. */ + tree + function_resolver::lookup_form (mode_suffix_index mode, + type_suffix_index type0, +- type_suffix_index type1) ++ type_suffix_index type1, ++ group_suffix_index group) + { + type_suffix_pair types = { type0, type1 }; +- function_instance instance (base_name, base, shape, mode, types, pred); ++ function_instance instance (base_name, base, shape, mode, types, ++ group, pred); + registered_function *rfn + = function_table->find_with_hash (instance, instance.hash ()); + return rfn ? rfn->decl : NULL_TREE; + } + +-/* Resolve the function to one with the mode suffix given by MODE and the +- type suffixes given by TYPE0 and TYPE1. Return its function decl on +- success, otherwise report an error and return error_mark_node. */ ++/* Resolve the function to one with the mode suffix given by MODE, the ++ type suffixes given by TYPE0 and TYPE1, and group suffix given by ++ GROUP. Return its function decl on success, otherwise report an ++ error and return error_mark_node. */ + tree + function_resolver::resolve_to (mode_suffix_index mode, + type_suffix_index type0, +- type_suffix_index type1) ++ type_suffix_index type1, ++ group_suffix_index group) + { +- tree res = lookup_form (mode, type0, type1); ++ tree res = lookup_form (mode, type0, type1, group); + if (!res) + { + if (type1 == NUM_TYPE_SUFFIXES) +diff --git a/gcc/config/aarch64/aarch64-sve-builtins.def b/gcc/config/aarch64/aarch64-sve-builtins.def +index 6e4dcdbc9..d9bf9c350 100644 +--- a/gcc/config/aarch64/aarch64-sve-builtins.def ++++ b/gcc/config/aarch64/aarch64-sve-builtins.def +@@ -29,6 +29,10 @@ + #define DEF_SVE_TYPE_SUFFIX(A, B, C, D, E) + #endif + ++#ifndef DEF_SVE_GROUP_SUFFIX ++#define DEF_SVE_GROUP_SUFFIX(A, B, C) ++#endif ++ + #ifndef DEF_SVE_FUNCTION + #define DEF_SVE_FUNCTION(A, B, C, D) + #endif +@@ -95,10 +99,15 @@ DEF_SVE_TYPE_SUFFIX (u16, svuint16_t, unsigned, 16, VNx8HImode) + DEF_SVE_TYPE_SUFFIX (u32, svuint32_t, unsigned, 32, VNx4SImode) + DEF_SVE_TYPE_SUFFIX (u64, svuint64_t, unsigned, 64, VNx2DImode) + ++DEF_SVE_GROUP_SUFFIX (x2, 0, 2) ++DEF_SVE_GROUP_SUFFIX (x3, 0, 3) ++DEF_SVE_GROUP_SUFFIX (x4, 0, 4) ++ + #include "aarch64-sve-builtins-base.def" + #include "aarch64-sve-builtins-sve2.def" + + #undef DEF_SVE_FUNCTION ++#undef DEF_SVE_GROUP_SUFFIX + #undef DEF_SVE_TYPE_SUFFIX + #undef DEF_SVE_TYPE + #undef DEF_SVE_MODE +diff --git a/gcc/config/aarch64/aarch64-sve-builtins.h b/gcc/config/aarch64/aarch64-sve-builtins.h +index 824c31cd7..374c57e93 100644 +--- a/gcc/config/aarch64/aarch64-sve-builtins.h ++++ b/gcc/config/aarch64/aarch64-sve-builtins.h +@@ -180,6 +180,17 @@ enum type_suffix_index + NUM_TYPE_SUFFIXES + }; + ++/* Enumerates the possible group suffixes. Each suffix combines two ++ optional pieces of information: the vector group size in a ZA index, ++ and the number of vectors in the largest tuple argument. */ ++enum group_suffix_index ++{ ++#define DEF_SVE_GROUP_SUFFIX(NAME, VG, VECTORS_PER_TUPLE) GROUP_##NAME, ++#include "aarch64-sve-builtins.def" ++ GROUP_none, ++ NUM_GROUP_SUFFIXES ++}; ++ + /* Combines two type suffixes. */ + typedef enum type_suffix_index type_suffix_pair2; + +@@ -237,6 +248,21 @@ struct type_suffix_info + machine_mode vector_mode : 16; + }; + ++/* Static information about a group suffix. */ ++struct group_suffix_info ++{ ++ /* The suffix string itself. */ ++ const char *string; ++ ++ /* If the suffix describes a vector group in a ZA index, this is the ++ size of that group, otherwise it is zero. */ ++ unsigned int vg; ++ ++ /* The number of vectors in the largest (or only) tuple argument, ++ or 1 if the suffix does not convey this information. */ ++ unsigned int vectors_per_tuple; ++}; ++ + /* Static information about a set of functions. */ + struct function_group_info + { +@@ -251,14 +277,16 @@ struct function_group_info + shapes. */ + const function_shape *const *shape; + +- /* A list of the available type suffixes, and of the available predication +- types. The function supports every combination of the two. ++ /* A list of the available type suffixes, group suffixes, and predication ++ types. The function supports every combination of the three. ++ ++ The list of type suffixes is terminated by two NUM_TYPE_SUFFIXES. ++ It is lexicographically ordered based on the index value. + +- The list of type suffixes is terminated by two NUM_TYPE_SUFFIXES +- while the list of predication types is terminated by NUM_PREDS. +- The list of type suffixes is lexicographically ordered based +- on the index value. */ ++ The list of group suffixes is terminated by NUM_GROUP_SUFFIXES ++ and the list of predication types is terminated by NUM_PREDS. */ + const type_suffix_pair *types; ++ const group_suffix_index *groups; + const predication_index *preds; + + /* The architecture extensions that the functions require, as a set of +@@ -273,7 +301,8 @@ class GTY((user)) function_instance + public: + function_instance (const char *, const function_base *, + const function_shape *, mode_suffix_index, +- const type_suffix_pair &, predication_index); ++ const type_suffix_pair &, group_suffix_index, ++ predication_index); + + bool operator== (const function_instance &) const; + bool operator!= (const function_instance &) const; +@@ -294,6 +323,8 @@ public: + units_index displacement_units () const; + + const type_suffix_info &type_suffix (unsigned int) const; ++ const group_suffix_info &group_suffix () const; ++ + tree scalar_type (unsigned int) const; + tree vector_type (unsigned int) const; + tree tuple_type (unsigned int) const; +@@ -301,14 +332,14 @@ public: + machine_mode vector_mode (unsigned int) const; + machine_mode gp_mode (unsigned int) const; + +- /* The properties of the function. (The explicit "enum"s are required +- for gengtype.) */ ++ /* The properties of the function. */ + const char *base_name; + const function_base *base; + const function_shape *shape; +- enum mode_suffix_index mode_suffix_id; ++ mode_suffix_index mode_suffix_id; + type_suffix_pair type_suffix_ids; +- enum predication_index pred; ++ group_suffix_index group_suffix_id; ++ predication_index pred; + }; + + class registered_function; +@@ -390,10 +421,12 @@ public: + tree report_no_such_form (type_suffix_index); + tree lookup_form (mode_suffix_index, + type_suffix_index = NUM_TYPE_SUFFIXES, +- type_suffix_index = NUM_TYPE_SUFFIXES); ++ type_suffix_index = NUM_TYPE_SUFFIXES, ++ group_suffix_index = GROUP_none); + tree resolve_to (mode_suffix_index, + type_suffix_index = NUM_TYPE_SUFFIXES, +- type_suffix_index = NUM_TYPE_SUFFIXES); ++ type_suffix_index = NUM_TYPE_SUFFIXES, ++ group_suffix_index = GROUP_none); + + type_suffix_index infer_integer_scalar_type (unsigned int); + type_suffix_index infer_pointer_type (unsigned int, bool = false); +@@ -641,6 +674,11 @@ class function_shape + public: + virtual bool explicit_type_suffix_p (unsigned int) const = 0; + ++ /* True if the group suffix is present in overloaded names. ++ This isn't meaningful for pre-SME intrinsics, and true is ++ more common than false, so provide a default definition. */ ++ virtual bool explicit_group_suffix_p () const { return true; } ++ + /* Define all functions associated with the given group. */ + virtual void build (function_builder &, + const function_group_info &) const = 0; +@@ -669,6 +707,7 @@ private: + + extern const type_suffix_info type_suffixesNUM_TYPE_SUFFIXES + 1; + extern const mode_suffix_info mode_suffixesMODE_none + 1; ++extern const group_suffix_info group_suffixesNUM_GROUP_SUFFIXES; + + extern tree scalar_typesNUM_VECTOR_TYPES; + extern tree acle_vector_typesMAX_TUPLE_SIZENUM_VECTOR_TYPES + 1; +@@ -728,9 +767,11 @@ function_instance (const char *base_name_in, + const function_shape *shape_in, + mode_suffix_index mode_suffix_id_in, + const type_suffix_pair &type_suffix_ids_in, ++ group_suffix_index group_suffix_id_in, + predication_index pred_in) + : base_name (base_name_in), base (base_in), shape (shape_in), +- mode_suffix_id (mode_suffix_id_in), pred (pred_in) ++ mode_suffix_id (mode_suffix_id_in), group_suffix_id (group_suffix_id_in), ++ pred (pred_in) + { + memcpy (type_suffix_ids, type_suffix_ids_in, sizeof (type_suffix_ids)); + } +@@ -741,9 +782,10 @@ function_instance::operator== (const function_instance &other) const + return (base == other.base + && shape == other.shape + && mode_suffix_id == other.mode_suffix_id +- && pred == other.pred + && type_suffix_ids0 == other.type_suffix_ids0 +- && type_suffix_ids1 == other.type_suffix_ids1); ++ && type_suffix_ids1 == other.type_suffix_ids1 ++ && group_suffix_id == other.group_suffix_id ++ && pred == other.pred); + } + + inline bool +@@ -815,6 +857,13 @@ function_instance::type_suffix (unsigned int i) const + return type_suffixestype_suffix_idsi; + } + ++/* Return information about the function's group suffix. */ ++inline const group_suffix_info & ++function_instance::group_suffix () const ++{ ++ return group_suffixesgroup_suffix_id; ++} ++ + /* Return the scalar type associated with type suffix I. */ + inline tree + function_instance::scalar_type (unsigned int i) const +-- +2.33.0 +
View file
_service:tar_scm:0164-LoongArch-Define-ISA-versions.patch
Added
@@ -0,0 +1,1016 @@ +From 66c8369ff9e5987c14786692cf6fd945a94273a1 Mon Sep 17 00:00:00 2001 +From: Yang Yujie <yangyujie@loongson.cn> +Date: Tue, 23 Apr 2024 10:42:47 +0800 +Subject: PATCH 164/188 LoongArch: Define ISA versions + +These ISA versions are defined as -march= parameters and +are recommended for building binaries for distribution. + +Detailed description of these definitions can be found at +https://github.com/loongson/la-toolchain-conventions, which +the LoongArch GCC port aims to conform to. + +gcc/ChangeLog: + + * config.gcc: Make la64v1.0 the default ISA preset of the lp64d ABI. + * config/loongarch/genopts/loongarch-strings: Define la64v1.0, la64v1.1. + * config/loongarch/genopts/loongarch.opt.in: Likewise. + * config/loongarch/loongarch-c.cc (LARCH_CPP_SET_PROCESSOR): Likewise. + (loongarch_cpu_cpp_builtins): Likewise. + * config/loongarch/loongarch-cpu.cc (get_native_prid): Likewise. + (fill_native_cpu_config): Likewise. + * config/loongarch/loongarch-def.cc (array_tune): Likewise. + * config/loongarch/loongarch-def.h: Likewise. + * config/loongarch/loongarch-driver.cc (driver_set_m_parm): Likewise. + (driver_get_normalized_m_opts): Likewise. + * config/loongarch/loongarch-opts.cc (default_tune_for_arch): Likewise. + (TUNE_FOR_ARCH): Likewise. + (arch_str): Likewise. + (loongarch_target_option_override): Likewise. + * config/loongarch/loongarch-opts.h (TARGET_uARCH_LA464): Likewise. + (TARGET_uARCH_LA664): Likewise. + * config/loongarch/loongarch-str.h (STR_CPU_ABI_DEFAULT): Likewise. + (STR_ARCH_ABI_DEFAULT): Likewise. + (STR_TUNE_GENERIC): Likewise. + (STR_ARCH_LA64V1_0): Likewise. + (STR_ARCH_LA64V1_1): Likewise. + * config/loongarch/loongarch.cc (loongarch_cpu_sched_reassociation_width): Likewise. + (loongarch_asm_code_end): Likewise. + * config/loongarch/loongarch.opt: Likewise. + * doc/invoke.texi: Likewise. +--- + gcc/config.gcc | 34 ++++---- + .../loongarch/genopts/loongarch-strings | 5 +- + gcc/config/loongarch/genopts/loongarch.opt.in | 43 ++++++++-- + gcc/config/loongarch/loongarch-c.cc | 37 +++------ + gcc/config/loongarch/loongarch-cpu.cc | 35 ++++---- + gcc/config/loongarch/loongarch-def.cc | 83 +++++++++++++------ + gcc/config/loongarch/loongarch-def.h | 37 ++++++--- + gcc/config/loongarch/loongarch-driver.cc | 8 +- + gcc/config/loongarch/loongarch-opts.cc | 66 +++++++++++---- + gcc/config/loongarch/loongarch-opts.h | 4 +- + gcc/config/loongarch/loongarch-str.h | 5 +- + gcc/config/loongarch/loongarch.cc | 11 +-- + gcc/config/loongarch/loongarch.opt | 43 ++++++++-- + gcc/doc/invoke.texi | 57 ++++++++----- + 14 files changed, 300 insertions(+), 168 deletions(-) + +diff --git a/gcc/config.gcc b/gcc/config.gcc +index 1db558d4c..c6820d0f1 100644 +--- a/gcc/config.gcc ++++ b/gcc/config.gcc +@@ -5035,7 +5035,7 @@ case "${target}" in + + # Perform initial sanity checks on --with-* options. + case ${with_arch} in +- "" | abi-default | loongarch64 | la4664) ;; # OK, append here. ++ "" | la64v1.01 | abi-default | loongarch64 | la4664) ;; # OK, append here. + native) + if test x${host} != x${target}; then + echo "--with-arch=native is illegal for cross-compiler." 1>&2 +@@ -5082,10 +5082,18 @@ case "${target}" in + + # Infer ISA-related default options from the ABI: pass 1 + case ${abi_base}/${abi_ext} in +- lp64*/base) ++ lp64d/base) + # architectures that support lp64* ABI +- arch_pattern="native|abi-default|loongarch64|la4664" +- # default architecture for lp64* ABI ++ arch_pattern="native|abi-default|la64v1.01|loongarch64|la4664" ++ ++ # default architecture for lp64d ABI ++ arch_default="la64v1.0" ++ ;; ++ lp64fs/base) ++ # architectures that support lp64* ABI ++ arch_pattern="native|abi-default|la64v1.01|loongarch64|la4664" ++ ++ # default architecture for lp64fs ABI + arch_default="abi-default" + ;; + *) +@@ -5157,15 +5165,7 @@ case "${target}" in + + + # Check default with_tune configuration using with_arch. +- case ${with_arch} in +- loongarch64) +- tune_pattern="native|abi-default|loongarch64|la4664" +- ;; +- *) +- # By default, $with_tune == $with_arch +- tune_pattern="*" +- ;; +- esac ++ tune_pattern="native|generic|loongarch64|la4664" + + case ${with_tune} in + "") ;; # OK +@@ -5215,7 +5215,7 @@ case "${target}" in + # Fixed: use the default gcc configuration for all multilib + # builds by default. + with_multilib_default="" ;; +- arch,native|arch,loongarch64|arch,la4664) # OK, append here. ++ arch,native|arch,la64v1.01|arch,loongarch64|arch,la4664) # OK, append here. + with_multilib_default="/march=${component}" ;; + arch,*) + with_multilib_default="/march=abi-default" +@@ -5315,7 +5315,7 @@ case "${target}" in + if test x${parse_state} = x"arch"; then + # -march option + case ${component} in +- native | abi-default | loongarch64 | la4664) # OK, append here. ++ native | abi-default | la64v1.01 | loongarch64 | la4664) # OK, append here. + # Append -march spec for each multilib variant. + loongarch_multilib_list_make="${loongarch_multilib_list_make}/march=${component}" + parse_state="opts" +@@ -5858,7 +5858,7 @@ case ${target} in + # See macro definitions from loongarch-opts.h and loongarch-cpu.h. + + # Architecture +- tm_defines="${tm_defines} DEFAULT_CPU_ARCH=CPU_$(echo ${with_arch} | tr a-z- A-Z_)" ++ tm_defines="${tm_defines} DEFAULT_CPU_ARCH=ARCH_$(echo ${with_arch} | tr a-z.- A-Z__)" + + # Base ABI type + tm_defines="${tm_defines} DEFAULT_ABI_BASE=ABI_BASE_$(echo ${abi_base} | tr a-z- A-Z_)" +@@ -5870,7 +5870,7 @@ case ${target} in + + # Microarchitecture + if test x${with_tune} != x; then +- tm_defines="${tm_defines} DEFAULT_CPU_TUNE=CPU_$(echo ${with_tune} | tr a-z- A-Z_)" ++ tm_defines="${tm_defines} DEFAULT_CPU_TUNE=TUNE_$(echo ${with_tune} | tr a-z.- A-Z__)" + fi + + # FPU adjustment +diff --git a/gcc/config/loongarch/genopts/loongarch-strings b/gcc/config/loongarch/genopts/loongarch-strings +index 99fd4e7cd..fd2f9b4f3 100644 +--- a/gcc/config/loongarch/genopts/loongarch-strings ++++ b/gcc/config/loongarch/genopts/loongarch-strings +@@ -23,10 +23,13 @@ OPTSTR_ARCH arch + OPTSTR_TUNE tune + + STR_CPU_NATIVE native +-STR_CPU_ABI_DEFAULT abi-default ++STR_ARCH_ABI_DEFAULT abi-default ++STR_TUNE_GENERIC generic + STR_CPU_LOONGARCH64 loongarch64 + STR_CPU_LA464 la464 + STR_CPU_LA664 la664 ++STR_ARCH_LA64V1_0 la64v1.0 ++STR_ARCH_LA64V1_1 la64v1.1 + + # Base architecture + STR_ISA_BASE_LA64 la64 +diff --git a/gcc/config/loongarch/genopts/loongarch.opt.in b/gcc/config/loongarch/genopts/loongarch.opt.in +index f3d53f03c..0ecd10922 100644 +--- a/gcc/config/loongarch/genopts/loongarch.opt.in ++++ b/gcc/config/loongarch/genopts/loongarch.opt.in +@@ -95,30 +95,55 @@ Enable LoongArch Advanced SIMD Extension (LASX, 256-bit). + + ;; Base target models (implies ISA & tune parameters) + Enum +-Name(cpu_type) Type(int) +-LoongArch CPU types: ++Name(arch_type) Type(int) ++LoongArch ARCH presets: + + EnumValue +-Enum(cpu_type) String(@@STR_CPU_NATIVE@@) Value(CPU_NATIVE) ++Enum(arch_type) String(@@STR_CPU_NATIVE@@) Value(ARCH_NATIVE) + + EnumValue +-Enum(cpu_type) String(@@STR_CPU_ABI_DEFAULT@@) Value(CPU_ABI_DEFAULT) ++Enum(arch_type) String(@@STR_ARCH_ABI_DEFAULT@@) Value(ARCH_ABI_DEFAULT) + + EnumValue +-Enum(cpu_type) String(@@STR_CPU_LOONGARCH64@@) Value(CPU_LOONGARCH64) ++Enum(arch_type) String(@@STR_CPU_LOONGARCH64@@) Value(ARCH_LOONGARCH64) + + EnumValue +-Enum(cpu_type) String(@@STR_CPU_LA464@@) Value(CPU_LA464) ++Enum(arch_type) String(@@STR_CPU_LA464@@) Value(ARCH_LA464) + + EnumValue +-Enum(cpu_type) String(@@STR_CPU_LA664@@) Value(CPU_LA664) ++Enum(arch_type) String(@@STR_CPU_LA664@@) Value(ARCH_LA664) ++ ++EnumValue ++Enum(arch_type) String(@@STR_ARCH_LA64V1_0@@) Value(ARCH_LA64V1_0) ++ ++EnumValue ++Enum(arch_type) String(@@STR_ARCH_LA64V1_1@@) Value(ARCH_LA64V1_1) + + m@@OPTSTR_ARCH@@= +-Target RejectNegative Joined Enum(cpu_type) Var(la_opt_cpu_arch) Init(M_OPT_UNSET) Save ++Target RejectNegative Joined Enum(arch_type) Var(la_opt_cpu_arch) Init(M_OPT_UNSET) Save + -m@@OPTSTR_ARCH@@=PROCESSOR Generate code for the given PROCESSOR ISA. + ++Enum ++Name(tune_type) Type(int) ++LoongArch TUNE presets: ++ ++EnumValue ++Enum(tune_type) String(@@STR_CPU_NATIVE@@) Value(TUNE_NATIVE) ++ ++EnumValue ++Enum(tune_type) String(@@STR_TUNE_GENERIC@@) Value(TUNE_GENERIC) ++ ++EnumValue ++Enum(tune_type) String(@@STR_CPU_LOONGARCH64@@) Value(TUNE_LOONGARCH64) ++ ++EnumValue ++Enum(tune_type) String(@@STR_CPU_LA464@@) Value(TUNE_LA464) ++ ++EnumValue ++Enum(tune_type) String(@@STR_CPU_LA664@@) Value(TUNE_LA664) ++ + m@@OPTSTR_TUNE@@= +-Target RejectNegative Joined Enum(cpu_type) Var(la_opt_cpu_tune) Init(M_OPT_UNSET) Save ++Target RejectNegative Joined Enum(tune_type) Var(la_opt_cpu_tune) Init(M_OPT_UNSET) Save + -m@@OPTSTR_TUNE@@=PROCESSOR Generate optimized code for PROCESSOR. + + +diff --git a/gcc/config/loongarch/loongarch-c.cc b/gcc/config/loongarch/loongarch-c.cc +index df2a482ad..153db75b0 100644 +--- a/gcc/config/loongarch/loongarch-c.cc ++++ b/gcc/config/loongarch/loongarch-c.cc +@@ -31,29 +31,6 @@ along with GCC; see the file COPYING3. If not see + #define builtin_define(TXT) cpp_define (pfile, TXT) + #define builtin_assert(TXT) cpp_assert (pfile, TXT) + +-/* Define preprocessor macros for the -march and -mtune options. +- PREFIX is either _LOONGARCH_ARCH or _LOONGARCH_TUNE, INFO is +- the selected processor. If INFO's canonical name is "foo", +- define PREFIX to be "foo", and define an additional macro +- PREFIX_FOO. */ +-#define LARCH_CPP_SET_PROCESSOR(PREFIX, CPU_TYPE) \ +- do \ +- { \ +- char *macro, *p; \ +- int cpu_type = (CPU_TYPE); \ +- \ +- macro = concat ((PREFIX), "_", \ +- loongarch_cpu_stringscpu_type, NULL); \ +- for (p = macro; *p != 0; p++) \ +- *p = TOUPPER (*p); \ +- \ +- builtin_define (macro); \ +- builtin_define_with_value ((PREFIX), \ +- loongarch_cpu_stringscpu_type, 1); \ +- free (macro); \ +- } \ +- while (0) +- + void + loongarch_cpu_cpp_builtins (cpp_reader *pfile) + { +@@ -61,11 +38,17 @@ loongarch_cpu_cpp_builtins (cpp_reader *pfile) + builtin_assert ("cpu=loongarch"); + builtin_define ("__loongarch__"); + +- LARCH_CPP_SET_PROCESSOR ("_LOONGARCH_ARCH", la_target.cpu_arch); +- LARCH_CPP_SET_PROCESSOR ("_LOONGARCH_TUNE", la_target.cpu_tune); ++ builtin_define_with_value ("__loongarch_arch", ++ loongarch_arch_stringsla_target.cpu_arch, 1); ++ ++ builtin_define_with_value ("__loongarch_tune", ++ loongarch_tune_stringsla_target.cpu_tune, 1); ++ ++ builtin_define_with_value ("_LOONGARCH_ARCH", ++ loongarch_arch_stringsla_target.cpu_arch, 1); + +- LARCH_CPP_SET_PROCESSOR ("__loongarch_arch", la_target.cpu_arch); +- LARCH_CPP_SET_PROCESSOR ("__loongarch_tune", la_target.cpu_tune); ++ builtin_define_with_value ("_LOONGARCH_TUNE", ++ loongarch_tune_stringsla_target.cpu_tune, 1); + + /* Base architecture / ABI. */ + if (TARGET_64BIT) +diff --git a/gcc/config/loongarch/loongarch-cpu.cc b/gcc/config/loongarch/loongarch-cpu.cc +index 551d4f72c..eb1eb8011 100644 +--- a/gcc/config/loongarch/loongarch-cpu.cc ++++ b/gcc/config/loongarch/loongarch-cpu.cc +@@ -62,7 +62,7 @@ cache_cpucfg (void) + uint32_t + get_native_prid (void) + { +- /* Fill loongarch_cpu_default_configCPU_NATIVE with cpucfg data, ++ /* Fill loongarch_cpu_default_configARCH_NATIVE with cpucfg data, + see "Loongson Architecture Reference Manual" + (Volume 1, Section 2.2.10.5) */ + return cpucfg_cache0; +@@ -76,13 +76,14 @@ get_native_prid_str (void) + return (const char*) prid_str; + } + +-/* Fill property tables for CPU_NATIVE. */ ++/* Fill property tables for ARCH_NATIVE / TUNE_NATIVE. */ + void + fill_native_cpu_config (struct loongarch_target *tgt) + { +- int arch_native_p = tgt->cpu_arch == CPU_NATIVE; +- int tune_native_p = tgt->cpu_tune == CPU_NATIVE; +- int native_cpu_type = CPU_NATIVE; ++ int arch_native_p = tgt->cpu_arch == ARCH_NATIVE; ++ int tune_native_p = tgt->cpu_tune == TUNE_NATIVE; ++ int native_cpu_arch = ARCH_NATIVE; ++ int native_cpu_tune = TUNE_NATIVE; + + /* Nothing needs to be done unless "-march/tune=native" + is given or implied. */ +@@ -99,11 +100,13 @@ fill_native_cpu_config (struct loongarch_target *tgt) + switch (cpucfg_cache0 & 0x00ffff00) + { + case 0x0014c000: /* LA464 */ +- native_cpu_type = CPU_LA464; ++ native_cpu_arch = ARCH_LA464; ++ native_cpu_tune = TUNE_LA464; + break; + + case 0x0014d000: /* LA664 */ +- native_cpu_type = CPU_LA664; ++ native_cpu_arch = ARCH_LA664; ++ native_cpu_tune = TUNE_LA664; + break; + + default: +@@ -119,7 +122,7 @@ fill_native_cpu_config (struct loongarch_target *tgt) + if (arch_native_p) + { + int tmp; +- tgt->cpu_arch = native_cpu_type; ++ tgt->cpu_arch = native_cpu_arch; + + auto &preset = loongarch_cpu_default_isatgt->cpu_arch; + +@@ -127,8 +130,8 @@ fill_native_cpu_config (struct loongarch_target *tgt) + With: base architecture (ARCH) + At: cpucfg_words11:0 */ + +- if (native_cpu_type != CPU_NATIVE) +- tmp = loongarch_cpu_default_isanative_cpu_type.base; ++ if (native_cpu_arch != ARCH_NATIVE) ++ tmp = loongarch_cpu_default_isanative_cpu_arch.base; + else + switch (cpucfg_cache1 & 0x3) + { +@@ -173,7 +176,7 @@ fill_native_cpu_config (struct loongarch_target *tgt) + } + + /* Check consistency with PRID presets. */ +- if (native_cpu_type != CPU_NATIVE && tmp != preset.fpu) ++ if (native_cpu_arch != ARCH_NATIVE && tmp != preset.fpu) + warning (0, "floating-point unit %qs differs from PRID preset %qs", + loongarch_isa_ext_stringstmp, + loongarch_isa_ext_stringspreset.fpu); +@@ -182,7 +185,7 @@ fill_native_cpu_config (struct loongarch_target *tgt) + preset.fpu = tmp; + + +- /* Fill: loongarch_cpu_default_isaCPU_NATIVE.simd ++ /* Fill: loongarch_cpu_default_isaARCH_NATIVE.simd + With: SIMD extension type (LSX, LASX) + At: cpucfg_words27:6 */ + +@@ -212,7 +215,7 @@ fill_native_cpu_config (struct loongarch_target *tgt) + /* Check consistency with PRID presets. */ + + /* +- if (native_cpu_type != CPU_NATIVE && tmp != preset.simd) ++ if (native_cpu_arch != ARCH_NATIVE && tmp != preset.simd) + warning (0, "SIMD extension %qs differs from PRID preset %qs", + loongarch_isa_ext_stringstmp, + loongarch_isa_ext_stringspreset.simd); +@@ -229,10 +232,10 @@ fill_native_cpu_config (struct loongarch_target *tgt) + if (cpucfg_cacheentry.cpucfg_word & entry.cpucfg_bit) + hw_isa_evolution |= entry.isa_evolution_bit; + +- if (native_cpu_type != CPU_NATIVE) ++ if (native_cpu_arch != ARCH_NATIVE) + { + /* Check if the local CPU really supports the features of the base +- ISA of probed native_cpu_type. If any feature is not detected, ++ ISA of probed native_cpu_arch. If any feature is not detected, + either GCC or the hardware is buggy. */ + if ((preset.evolution & hw_isa_evolution) != hw_isa_evolution) + warning (0, +@@ -247,7 +250,7 @@ fill_native_cpu_config (struct loongarch_target *tgt) + + if (tune_native_p) + { +- tgt->cpu_tune = native_cpu_type; ++ tgt->cpu_tune = native_cpu_tune; + + /* Fill: loongarch_cpu_cachetgt->cpu_tune + With: cache size info +diff --git a/gcc/config/loongarch/loongarch-def.cc b/gcc/config/loongarch/loongarch-def.cc +index a48050c5f..c3f9fc6de 100644 +--- a/gcc/config/loongarch/loongarch-def.cc ++++ b/gcc/config/loongarch/loongarch-def.cc +@@ -31,39 +31,64 @@ template <class T, int N> + using array = loongarch_def_array<T, N>; + + template <class T> +-using array_tune = array<T, N_TUNE_TYPES>; ++using array_arch = array<T, N_ARCH_TYPES>; + + template <class T> +-using array_arch = array<T, N_ARCH_TYPES>; ++using array_tune = array<T, N_TUNE_TYPES>; + +-/* CPU property tables. */ +-array_tune<const char *> loongarch_cpu_strings = array_tune<const char *> () +- .set (CPU_NATIVE, STR_CPU_NATIVE) +- .set (CPU_ABI_DEFAULT, STR_CPU_ABI_DEFAULT) +- .set (CPU_LOONGARCH64, STR_CPU_LOONGARCH64) +- .set (CPU_LA464, STR_CPU_LA464) +- .set (CPU_LA664, STR_CPU_LA664); ++array_arch<const char *> loongarch_arch_strings = array_arch<const char *> () ++ .set (ARCH_NATIVE, STR_CPU_NATIVE) ++ .set (ARCH_ABI_DEFAULT, STR_ARCH_ABI_DEFAULT) ++ .set (ARCH_LOONGARCH64, STR_CPU_LOONGARCH64) ++ .set (ARCH_LA464, STR_CPU_LA464) ++ .set (ARCH_LA664, STR_CPU_LA664) ++ .set (ARCH_LA64V1_0, STR_ARCH_LA64V1_0) ++ .set (ARCH_LA64V1_1, STR_ARCH_LA64V1_1); ++ ++array_tune<const char *> loongarch_tune_strings = array_tune<const char *> () ++ .set (TUNE_NATIVE, STR_CPU_NATIVE) ++ .set (TUNE_GENERIC, STR_TUNE_GENERIC) ++ .set (TUNE_LOONGARCH64, STR_CPU_LOONGARCH64) ++ .set (TUNE_LA464, STR_CPU_LA464) ++ .set (TUNE_LA664, STR_CPU_LA664); + + array_arch<loongarch_isa> loongarch_cpu_default_isa = + array_arch<loongarch_isa> () +- .set (CPU_LOONGARCH64, ++ .set (ARCH_LOONGARCH64, + loongarch_isa () + .base_ (ISA_BASE_LA64) + .fpu_ (ISA_EXT_FPU64)) +- .set (CPU_LA464, ++ ++ .set (ARCH_LA464, + loongarch_isa () + .base_ (ISA_BASE_LA64) + .fpu_ (ISA_EXT_FPU64) + .simd_ (ISA_EXT_SIMD_LASX)) +- .set (CPU_LA664, ++ ++ .set (ARCH_LA664, + loongarch_isa () + .base_ (ISA_BASE_LA64) + .fpu_ (ISA_EXT_FPU64) + .simd_ (ISA_EXT_SIMD_LASX) ++ .evolution_ (OPTION_MASK_ISA_DIV32 | OPTION_MASK_ISA_LD_SEQ_SA ++ | OPTION_MASK_ISA_LAM_BH | OPTION_MASK_ISA_LAMCAS ++ | OPTION_MASK_ISA_FRECIPE)) ++ .set (ARCH_LA64V1_0, ++ loongarch_isa () ++ .base_ (ISA_BASE_LA64) ++ .fpu_ (ISA_EXT_FPU64) ++ .simd_ (ISA_EXT_SIMD_LSX)) ++ ++ .set (ARCH_LA64V1_1, ++ loongarch_isa () ++ .base_ (ISA_BASE_LA64) ++ .fpu_ (ISA_EXT_FPU64) ++ .simd_ (ISA_EXT_SIMD_LSX) + .evolution_ (OPTION_MASK_ISA_DIV32 | OPTION_MASK_ISA_LD_SEQ_SA + | OPTION_MASK_ISA_LAM_BH | OPTION_MASK_ISA_LAMCAS + | OPTION_MASK_ISA_FRECIPE)); + ++ + static inline loongarch_cache la464_cache () + { + return loongarch_cache () +@@ -75,9 +100,10 @@ static inline loongarch_cache la464_cache () + + array_tune<loongarch_cache> loongarch_cpu_cache = + array_tune<loongarch_cache> () +- .set (CPU_LOONGARCH64, la464_cache ()) +- .set (CPU_LA464, la464_cache ()) +- .set (CPU_LA664, la464_cache ()); ++ .set (TUNE_GENERIC, la464_cache ()) ++ .set (TUNE_LOONGARCH64, la464_cache ()) ++ .set (TUNE_LA464, la464_cache ()) ++ .set (TUNE_LA664, la464_cache ()); + + static inline loongarch_align la464_align () + { +@@ -91,9 +117,10 @@ static inline loongarch_align la664_align () + + array_tune<loongarch_align> loongarch_cpu_align = + array_tune<loongarch_align> () +- .set (CPU_LOONGARCH64, la664_align ()) +- .set (CPU_LA464, la464_align ()) +- .set (CPU_LA664, la664_align ()); ++ .set (TUNE_GENERIC, la664_align ()) ++ .set (TUNE_LOONGARCH64, la664_align ()) ++ .set (TUNE_LA464, la464_align ()) ++ .set (TUNE_LA664, la664_align ()); + + /* Default RTX cost initializer. */ + loongarch_rtx_cost_data::loongarch_rtx_cost_data () +@@ -117,7 +144,7 @@ loongarch_rtx_cost_data::loongarch_rtx_cost_data () + any known "-mtune" type). */ + array_tune<loongarch_rtx_cost_data> loongarch_cpu_rtx_cost_data = + array_tune<loongarch_rtx_cost_data> () +- .set (CPU_LA664, ++ .set (TUNE_LA664, + loongarch_rtx_cost_data () + .movcf2gr_ (COSTS_N_INSNS (1)) + .movgr2cf_ (COSTS_N_INSNS (1))); +@@ -140,16 +167,18 @@ const loongarch_rtx_cost_data loongarch_rtx_cost_optimize_size = + .movcf2gr_ (COST_COMPLEX_INSN); + + array_tune<int> loongarch_cpu_issue_rate = array_tune<int> () +- .set (CPU_NATIVE, 4) +- .set (CPU_LOONGARCH64, 4) +- .set (CPU_LA464, 4) +- .set (CPU_LA664, 6); ++ .set (TUNE_NATIVE, 4) ++ .set (TUNE_GENERIC, 4) ++ .set (TUNE_LOONGARCH64, 4) ++ .set (TUNE_LA464, 4) ++ .set (TUNE_LA664, 6); + + array_tune<int> loongarch_cpu_multipass_dfa_lookahead = array_tune<int> () +- .set (CPU_NATIVE, 4) +- .set (CPU_LOONGARCH64, 4) +- .set (CPU_LA464, 4) +- .set (CPU_LA664, 6); ++ .set (TUNE_NATIVE, 4) ++ .set (TUNE_GENERIC, 4) ++ .set (TUNE_LOONGARCH64, 4) ++ .set (TUNE_LA464, 4) ++ .set (TUNE_LA664, 6); + + /* Wiring string definitions from loongarch-str.h to global arrays + with standard index values from loongarch-opts.h, so we can +diff --git a/gcc/config/loongarch/loongarch-def.h b/gcc/config/loongarch/loongarch-def.h +index 2fe44da5a..10b5f9ddc 100644 +--- a/gcc/config/loongarch/loongarch-def.h ++++ b/gcc/config/loongarch/loongarch-def.h +@@ -177,21 +177,32 @@ struct loongarch_target + { + struct loongarch_isa isa; + struct loongarch_abi abi; +- int cpu_arch; /* CPU_ */ +- int cpu_tune; /* same */ ++ int cpu_arch; /* ARCH_ */ ++ int cpu_tune; /* TUNE_ */ + int cmodel; /* CMODEL_ */ + int tls_dialect; /* TLS_ */ + }; + +-/* CPU model */ ++/* ISA target presets (-march=*) */ + enum { +- CPU_NATIVE = 0, +- CPU_ABI_DEFAULT = 1, +- CPU_LOONGARCH64 = 2, +- CPU_LA464 = 3, +- CPU_LA664 = 4, +- N_ARCH_TYPES = 5, +- N_TUNE_TYPES = 5 ++ ARCH_NATIVE = 0, ++ ARCH_ABI_DEFAULT = 1, ++ ARCH_LOONGARCH64 = 2, ++ ARCH_LA464 = 3, ++ ARCH_LA664 = 4, ++ ARCH_LA64V1_0 = 5, ++ ARCH_LA64V1_1 = 6, ++ N_ARCH_TYPES = 7, ++}; ++ ++/* Tune target presets (-mtune=*) */ ++enum { ++ TUNE_NATIVE = 0, ++ TUNE_GENERIC = 1, ++ TUNE_LOONGARCH64 = 2, ++ TUNE_LA464 = 3, ++ TUNE_LA664 = 4, ++ N_TUNE_TYPES = 5, + }; + + /* TLS types. */ +@@ -200,9 +211,11 @@ enum { + TLS_DESCRIPTORS = 1 + }; + +-/* CPU model properties */ ++/* Target preset properties */ + extern loongarch_def_array<const char *, N_ARCH_TYPES> +- loongarch_cpu_strings; ++ loongarch_arch_strings; ++extern loongarch_def_array<const char *, N_TUNE_TYPES> ++ loongarch_tune_strings; + extern loongarch_def_array<loongarch_isa, N_ARCH_TYPES> + loongarch_cpu_default_isa; + extern loongarch_def_array<int, N_TUNE_TYPES> +diff --git a/gcc/config/loongarch/loongarch-driver.cc b/gcc/config/loongarch/loongarch-driver.cc +index 8551cf94d..9e0b79994 100644 +--- a/gcc/config/loongarch/loongarch-driver.cc ++++ b/gcc/config/loongarch/loongarch-driver.cc +@@ -85,10 +85,10 @@ driver_set_m_parm (int argc, const char **argv) + loongarch_isa_ext_strings, 0, N_ISA_EXT_TYPES) + + LARCH_DRIVER_PARSE_PARM (la_target.cpu_arch, ARCH, \ +- loongarch_cpu_strings, 0, N_ARCH_TYPES) ++ loongarch_arch_strings, 0, N_ARCH_TYPES) + + LARCH_DRIVER_PARSE_PARM (la_target.cpu_tune, TUNE, \ +- loongarch_cpu_strings, 0, N_TUNE_TYPES) ++ loongarch_tune_strings, 0, N_TUNE_TYPES) + + LARCH_DRIVER_PARSE_PARM (la_target.cmodel, CMODEL, \ + loongarch_cmodel_strings, 0, N_CMODEL_TYPES) +@@ -190,7 +190,7 @@ driver_get_normalized_m_opts (int argc, const char **argv ATTRIBUTE_UNUSED) + APPEND_VAL (loongarch_abi_base_stringsla_target.abi.base); + + APPEND_OPT (ARCH); +- APPEND_VAL (loongarch_cpu_stringsla_target.cpu_arch); ++ APPEND_VAL (loongarch_arch_stringsla_target.cpu_arch); + + APPEND_OPT (ISA_EXT_FPU); + APPEND_VAL (loongarch_isa_ext_stringsla_target.isa.fpu); +@@ -202,7 +202,7 @@ driver_get_normalized_m_opts (int argc, const char **argv ATTRIBUTE_UNUSED) + APPEND_VAL (loongarch_cmodel_stringsla_target.cmodel); + + APPEND_OPT (TUNE); +- APPEND_VAL (loongarch_cpu_stringsla_target.cpu_tune); ++ APPEND_VAL (loongarch_tune_stringsla_target.cpu_tune); + + obstack_1grow (&opts_obstack, '\0'); + +diff --git a/gcc/config/loongarch/loongarch-opts.cc b/gcc/config/loongarch/loongarch-opts.cc +index c455c5e32..735daeb7c 100644 +--- a/gcc/config/loongarch/loongarch-opts.cc ++++ b/gcc/config/loongarch/loongarch-opts.cc +@@ -101,6 +101,7 @@ static int abi_compat_p (const struct loongarch_isa *isa, + struct loongarch_abi abi); + static int abi_default_cpu_arch (struct loongarch_abi abi, + struct loongarch_isa *isa); ++static int default_tune_for_arch (int arch, int fallback); + + /* Mandatory configure-time defaults. */ + #ifndef DEFAULT_ABI_BASE +@@ -259,35 +260,35 @@ loongarch_config_target (struct loongarch_target *target, + /* If cpu_tune is not set using neither -mtune nor --with-tune, + the current cpu_arch is used as its default. */ + t.cpu_tune = constrained.tune ? target->cpu_tune +- : (constrained.arch ? target->cpu_arch : +- (with_default_tune ? DEFAULT_CPU_TUNE : DEFAULT_CPU_ARCH)); ++ : (constrained.arch ++ ? default_tune_for_arch (target->cpu_arch, with_default_tune ++ ? DEFAULT_CPU_TUNE : TUNE_GENERIC) ++ : (with_default_tune ? DEFAULT_CPU_TUNE ++ : default_tune_for_arch (DEFAULT_CPU_ARCH, TUNE_GENERIC))); + + + /* Handle -march/tune=native */ + #ifdef __loongarch__ + /* For native compilers, gather local CPU information +- and fill the "CPU_NATIVE" index of arrays defined in +- loongarch-cpu.c. */ ++ and fill the "ARCH_NATIVE/TUNE_NATIVE" index of arrays ++ defined in loongarch-cpu.c. */ + + fill_native_cpu_config (&t); + + #else +- if (t.cpu_arch == CPU_NATIVE) ++ if (t.cpu_arch == ARCH_NATIVE) + fatal_error (UNKNOWN_LOCATION, + "%qs does not work on a cross compiler", + "-m" OPTSTR_ARCH "=" STR_CPU_NATIVE); + +- else if (t.cpu_tune == CPU_NATIVE) ++ else if (t.cpu_tune == TUNE_NATIVE) + fatal_error (UNKNOWN_LOCATION, + "%qs does not work on a cross compiler", + "-m" OPTSTR_TUNE "=" STR_CPU_NATIVE); + #endif + +- /* Handle -march/tune=abi-default */ +- if (t.cpu_tune == CPU_ABI_DEFAULT) +- t.cpu_tune = abi_default_cpu_arch (t.abi, NULL); +- +- if (t.cpu_arch == CPU_ABI_DEFAULT) ++ /* Handle -march=abi-default */ ++ if (t.cpu_arch == ARCH_ABI_DEFAULT) + { + t.cpu_arch = abi_default_cpu_arch (t.abi, &(t.isa)); + loongarch_cpu_default_isat.cpu_arch = t.isa; +@@ -438,16 +439,16 @@ config_target_isa: + so we adjust that first if it is not constrained. */ + int fallback_arch = abi_default_cpu_arch (t.abi, NULL); + +- if (t.cpu_arch == CPU_NATIVE) ++ if (t.cpu_arch == ARCH_NATIVE) + warning (0, "your native CPU architecture (%qs) " + "does not support %qs ABI, falling back to %<-m%s=%s%>", + arch_str (&t), abi_str (t.abi), OPTSTR_ARCH, +- loongarch_cpu_stringsfallback_arch); ++ loongarch_arch_stringsfallback_arch); + else + warning (0, "default CPU architecture (%qs) " + "does not support %qs ABI, falling back to %<-m%s=%s%>", + arch_str (&t), abi_str (t.abi), OPTSTR_ARCH, +- loongarch_cpu_stringsfallback_arch); ++ loongarch_arch_stringsfallback_arch); + + t.cpu_arch = fallback_arch; + constrained.arch = 1; +@@ -664,11 +665,40 @@ abi_default_cpu_arch (struct loongarch_abi abi, + case ABI_BASE_LP64F: + case ABI_BASE_LP64S: + *isa = isa_required (abi); +- return CPU_LOONGARCH64; ++ return ARCH_LOONGARCH64; + } + gcc_unreachable (); + } + ++static inline int ++default_tune_for_arch (int arch, int fallback) ++{ ++ int ret; ++ switch (arch) ++ { ++ ++#define TUNE_FOR_ARCH(NAME) \ ++ case ARCH_##NAME: \ ++ ret = TUNE_##NAME; \ ++ break; ++ ++ TUNE_FOR_ARCH(NATIVE) ++ TUNE_FOR_ARCH(LOONGARCH64) ++ TUNE_FOR_ARCH(LA464) ++ TUNE_FOR_ARCH(LA664) ++ ++#undef TUNE_FOR_ARCH ++ ++ case ARCH_ABI_DEFAULT: ++ case ARCH_LA64V1_0: ++ case ARCH_LA64V1_1: ++ ret = fallback; ++ } ++ ++ gcc_assert (0 <= ret && ret < N_TUNE_TYPES); ++ return ret; ++} ++ + static const char* + abi_str (struct loongarch_abi abi) + { +@@ -731,7 +761,7 @@ isa_str (const struct loongarch_isa *isa, char separator) + static const char* + arch_str (const struct loongarch_target *target) + { +- if (target->cpu_arch == CPU_NATIVE) ++ if (target->cpu_arch == ARCH_NATIVE) + { + /* Describe a native CPU with unknown PRID. */ + const char* isa_string = isa_str (&target->isa, ','); +@@ -741,7 +771,7 @@ arch_str (const struct loongarch_target *target) + APPEND_STRING (isa_string) + } + else +- APPEND_STRING (loongarch_cpu_stringstarget->cpu_arch); ++ APPEND_STRING (loongarch_arch_stringstarget->cpu_arch); + + APPEND1 ('\0') + return XOBFINISH (&msg_obstack, const char *); +@@ -956,7 +986,7 @@ loongarch_target_option_override (struct loongarch_target *target, + /* Other arch-specific overrides. */ + switch (target->cpu_arch) + { +- case CPU_LA664: ++ case ARCH_LA664: + /* Enable -mrecipe=all for LA664 by default. */ + if (!opts_set->x_recip_mask) + { +diff --git a/gcc/config/loongarch/loongarch-opts.h b/gcc/config/loongarch/loongarch-opts.h +index a3b467f4c..325c1e29c 100644 +--- a/gcc/config/loongarch/loongarch-opts.h ++++ b/gcc/config/loongarch/loongarch-opts.h +@@ -127,8 +127,8 @@ struct loongarch_flags { + (la_target.isa.evolution & OPTION_MASK_ISA_LD_SEQ_SA) + + /* TARGET_ macros for use in *.md template conditionals */ +-#define TARGET_uARCH_LA464 (la_target.cpu_tune == CPU_LA464) +-#define TARGET_uARCH_LA664 (la_target.cpu_tune == CPU_LA664) ++#define TARGET_uARCH_LA464 (la_target.cpu_tune == TUNE_LA464) ++#define TARGET_uARCH_LA664 (la_target.cpu_tune == TUNE_LA664) + + /* Note: optimize_size may vary across functions, + while -mno-memcpy imposes a global constraint. */ +diff --git a/gcc/config/loongarch/loongarch-str.h b/gcc/config/loongarch/loongarch-str.h +index cacae38c0..3cbe12f7b 100644 +--- a/gcc/config/loongarch/loongarch-str.h ++++ b/gcc/config/loongarch/loongarch-str.h +@@ -27,10 +27,13 @@ along with GCC; see the file COPYING3. If not see + #define OPTSTR_TUNE "tune" + + #define STR_CPU_NATIVE "native" +-#define STR_CPU_ABI_DEFAULT "abi-default" ++#define STR_ARCH_ABI_DEFAULT "abi-default" ++#define STR_TUNE_GENERIC "generic" + #define STR_CPU_LOONGARCH64 "loongarch64" + #define STR_CPU_LA464 "la464" + #define STR_CPU_LA664 "la664" ++#define STR_ARCH_LA64V1_0 "la64v1.0" ++#define STR_ARCH_LA64V1_1 "la64v1.1" + + #define STR_ISA_BASE_LA64 "la64" + +diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc +index 50ab6a82a..c86a0856b 100644 +--- a/gcc/config/loongarch/loongarch.cc ++++ b/gcc/config/loongarch/loongarch.cc +@@ -9605,9 +9605,10 @@ loongarch_cpu_sched_reassociation_width (struct loongarch_target *target, + + switch (target->cpu_tune) + { +- case CPU_LOONGARCH64: +- case CPU_LA464: +- case CPU_LA664: ++ case TUNE_GENERIC: ++ case TUNE_LOONGARCH64: ++ case TUNE_LA464: ++ case TUNE_LA664: + /* Vector part. */ + if (LSX_SUPPORTED_MODE_P (mode) || LASX_SUPPORTED_MODE_P (mode)) + { +@@ -10976,9 +10977,9 @@ loongarch_asm_code_end (void) + if (flag_verbose_asm) + { + fprintf (asm_out_file, "\n%s CPU: %s\n", ASM_COMMENT_START, +- loongarch_cpu_strings la_target.cpu_arch); ++ loongarch_arch_stringsla_target.cpu_arch); + fprintf (asm_out_file, "%s Tune: %s\n", ASM_COMMENT_START, +- loongarch_cpu_strings la_target.cpu_tune); ++ loongarch_tune_stringsla_target.cpu_tune); + fprintf (asm_out_file, "%s Base ISA: %s\n", ASM_COMMENT_START, + loongarch_isa_base_strings la_target.isa.base); + DUMP_FEATURE (ISA_HAS_FRECIPE); +diff --git a/gcc/config/loongarch/loongarch.opt b/gcc/config/loongarch/loongarch.opt +index 6f730d886..69b3b965c 100644 +--- a/gcc/config/loongarch/loongarch.opt ++++ b/gcc/config/loongarch/loongarch.opt +@@ -103,30 +103,55 @@ Enable LoongArch Advanced SIMD Extension (LASX, 256-bit). + + ;; Base target models (implies ISA & tune parameters) + Enum +-Name(cpu_type) Type(int) +-LoongArch CPU types: ++Name(arch_type) Type(int) ++LoongArch ARCH presets: + + EnumValue +-Enum(cpu_type) String(native) Value(CPU_NATIVE) ++Enum(arch_type) String(native) Value(ARCH_NATIVE) + + EnumValue +-Enum(cpu_type) String(abi-default) Value(CPU_ABI_DEFAULT) ++Enum(arch_type) String(abi-default) Value(ARCH_ABI_DEFAULT) + + EnumValue +-Enum(cpu_type) String(loongarch64) Value(CPU_LOONGARCH64) ++Enum(arch_type) String(loongarch64) Value(ARCH_LOONGARCH64) + + EnumValue +-Enum(cpu_type) String(la464) Value(CPU_LA464) ++Enum(arch_type) String(la464) Value(ARCH_LA464) + + EnumValue +-Enum(cpu_type) String(la664) Value(CPU_LA664) ++Enum(arch_type) String(la664) Value(ARCH_LA664) ++ ++EnumValue ++Enum(arch_type) String(la64v1.0) Value(ARCH_LA64V1_0) ++ ++EnumValue ++Enum(arch_type) String(la64v1.1) Value(ARCH_LA64V1_1) + + march= +-Target RejectNegative Joined Enum(cpu_type) Var(la_opt_cpu_arch) Init(M_OPT_UNSET) Save ++Target RejectNegative Joined Enum(arch_type) Var(la_opt_cpu_arch) Init(M_OPT_UNSET) Save + -march=PROCESSOR Generate code for the given PROCESSOR ISA. + ++Enum ++Name(tune_type) Type(int) ++LoongArch TUNE presets: ++ ++EnumValue ++Enum(tune_type) String(native) Value(TUNE_NATIVE) ++ ++EnumValue ++Enum(tune_type) String(generic) Value(TUNE_GENERIC) ++ ++EnumValue ++Enum(tune_type) String(loongarch64) Value(TUNE_LOONGARCH64) ++ ++EnumValue ++Enum(tune_type) String(la464) Value(TUNE_LA464) ++ ++EnumValue ++Enum(tune_type) String(la664) Value(TUNE_LA664) ++ + mtune= +-Target RejectNegative Joined Enum(cpu_type) Var(la_opt_cpu_tune) Init(M_OPT_UNSET) Save ++Target RejectNegative Joined Enum(tune_type) Var(la_opt_cpu_tune) Init(M_OPT_UNSET) Save + -mtune=PROCESSOR Generate optimized code for PROCESSOR. + + +diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi +index c9a1969ad..f6d59317b 100644 +--- a/gcc/doc/invoke.texi ++++ b/gcc/doc/invoke.texi +@@ -999,7 +999,7 @@ Objective-C and Objective-C++ Dialects}. + -msign-extend-enabled -muser-enabled} + + @emph{LoongArch Options} +-@gccoptlist{-march=@var{cpu-type} -mtune=@var{cpu-type} -mabi=@var{base-abi-type} @gol ++@gccoptlist{-march=@var{arch-type} -mtune=@var{tune-type} -mabi=@var{base-abi-type} @gol + -mfpu=@var{fpu-type} -msoft-float -msingle-float -mdouble-float @gol + -mbranch-cost=@var{n} -mcheck-zero-division -mno-check-zero-division @gol + -mcond-move-int -mno-cond-move-int @gol +@@ -24455,35 +24455,52 @@ Enable user-defined instructions. + These command-line options are defined for LoongArch targets: + + @table @gcctabopt +-@item -march=@var{cpu-type} +-@opindex -march +-Generate instructions for the machine type @var{cpu-type}. In contrast to +-@option{-mtune=@var{cpu-type}}, which merely tunes the generated code +-for the specified @var{cpu-type}, @option{-march=@var{cpu-type}} allows GCC +-to generate code that may not run at all on processors other than the one +-indicated. Specifying @option{-march=@var{cpu-type}} implies +-@option{-mtune=@var{cpu-type}}, except where noted otherwise. ++@opindex march ++@item -march=@var{arch-type} ++Generate instructions for the machine type @var{arch-type}. ++@option{-march=@var{arch-type}} allows GCC to generate code that ++may not run at all on processors other than the one indicated. + +-The choices for @var{cpu-type} are: ++The choices for @var{arch-type} are: + + @table @samp + @item native +-This selects the CPU to generate code for at compilation time by determining +-the processor type of the compiling machine. Using @option{-march=native} +-enables all instruction subsets supported by the local machine (hence +-the result might not run on different machines). Using @option{-mtune=native} +-produces code optimized for the local machine under the constraints +-of the selected instruction set. ++Local processor type detected by the native compiler. + @item loongarch64 +-A generic CPU with 64-bit extensions. ++Generic LoongArch 64-bit processor. + @item la464 +-LoongArch LA464 CPU with LBT, LSX, LASX, LVZ. ++LoongArch LA464-based processor with LSX, LASX. ++@item la664 ++LoongArch LA664-based processor with LSX, LASX ++and all LoongArch v1.1 instructions. ++@item la64v1.0 ++LoongArch64 ISA version 1.0. ++@item la64v1.1 ++LoongArch64 ISA version 1.1. + @end table + ++More information about LoongArch ISA versions can be found at ++@uref{https://github.com/loongson/la-toolchain-conventions}. ++ + @item -mtune=@var{cpu-type} + @opindex mtune +-Optimize the output for the given processor, specified by microarchitecture +-name. ++@item -mtune=@var{tune-type} ++Optimize the generated code for the given processor target. ++ ++The choices for @var{tune-type} are: ++ ++@table @samp ++@item native ++Local processor type detected by the native compiler. ++@item generic ++Generic LoongArch processor. ++@item loongarch64 ++Generic LoongArch 64-bit processor. ++@item la464 ++LoongArch LA464 core. ++@item la664 ++LoongArch LA664 core. ++@end table + + @item -mabi=@var{base-abi-type} + @opindex mabi +-- +2.43.0 +
View file
_service:tar_scm:0165-Backport-SME-aarch64-Add-sve_type-to-SVE-builtins-co.patch
Added
@@ -0,0 +1,230 @@ +From a32a9321b3336907fe2d17148cb9e4652642a3e6 Mon Sep 17 00:00:00 2001 +From: Richard Sandiford <richard.sandiford@arm.com> +Date: Tue, 5 Dec 2023 10:11:20 +0000 +Subject: PATCH 066/157 BackportSME aarch64: Add sve_type to SVE builtins + code + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=7f6de9861e5d7745a0af5174582519a39d545a92 + +Until now, the SVE ACLE code had mostly been able to represent +individual SVE arguments with just an element type suffix (s32, u32, +etc.). However, the SME2 ACLE provides many overloaded intrinsics +that operate on tuples rather than single vectors. This patch +therefore adds a new type (sve_type) that combines an element +type suffix with a vector count. This is enough to uniquely +represent all SVE ACLE types. + +gcc/ + * config/aarch64/aarch64-sve-builtins.h (sve_type): New struct. + (sve_type::operator==): New function. + (function_resolver::get_vector_type): Delete. + (function_resolver::report_no_such_form): Take an sve_type rather + than a type_suffix_index. + * config/aarch64/aarch64-sve-builtins.cc (get_vector_type): New + function. + (function_resolver::get_vector_type): Delete. + (function_resolver::report_no_such_form): Take an sve_type rather + than a type_suffix_index. + (find_sve_type): New function, split out from... + (function_resolver::infer_vector_or_tuple_type): ...here. +--- + gcc/config/aarch64/aarch64-sve-builtins.cc | 93 ++++++++++++---------- + gcc/config/aarch64/aarch64-sve-builtins.h | 37 ++++++++- + 2 files changed, 88 insertions(+), 42 deletions(-) + +diff --git a/gcc/config/aarch64/aarch64-sve-builtins.cc b/gcc/config/aarch64/aarch64-sve-builtins.cc +index dc3fd80da..cc676bfe1 100644 +--- a/gcc/config/aarch64/aarch64-sve-builtins.cc ++++ b/gcc/config/aarch64/aarch64-sve-builtins.cc +@@ -659,6 +659,14 @@ find_type_suffix_for_scalar_type (const_tree type) + return NUM_TYPE_SUFFIXES; + } + ++/* Return the vector type associated with TYPE. */ ++static tree ++get_vector_type (sve_type type) ++{ ++ auto vector_type = type_suffixestype.type.vector_type; ++ return acle_vector_typestype.num_vectors - 1vector_type; ++} ++ + /* Report an error against LOCATION that the user has tried to use + function FNDECL when extension EXTENSION is disabled. */ + static void +@@ -1190,13 +1198,6 @@ function_resolver::function_resolver (location_t location, + { + } + +-/* Return the vector type associated with type suffix TYPE. */ +-tree +-function_resolver::get_vector_type (type_suffix_index type) +-{ +- return acle_vector_types0type_suffixestype.vector_type; +-} +- + /* Return the <stdint.h> name associated with TYPE. Using the <stdint.h> + name should be more user-friendly than the underlying canonical type, + since it makes the signedness and bitwidth explicit. */ +@@ -1227,10 +1228,10 @@ function_resolver::scalar_argument_p (unsigned int i) + || SCALAR_FLOAT_TYPE_P (type)); + } + +-/* Report that the function has no form that takes type suffix TYPE. ++/* Report that the function has no form that takes type TYPE. + Return error_mark_node. */ + tree +-function_resolver::report_no_such_form (type_suffix_index type) ++function_resolver::report_no_such_form (sve_type type) + { + error_at (location, "%qE has no form that takes %qT arguments", + fndecl, get_vector_type (type)); +@@ -1352,6 +1353,25 @@ function_resolver::infer_pointer_type (unsigned int argno, + return type; + } + ++/* If TYPE is an SVE predicate or vector type, or a tuple of such a type, ++ return the associated sve_type, otherwise return an invalid sve_type. */ ++static sve_type ++find_sve_type (const_tree type) ++{ ++ /* A linear search should be OK here, since the code isn't hot and ++ the number of types is only small. */ ++ for (unsigned int size_i = 0; size_i < MAX_TUPLE_SIZE; ++size_i) ++ for (unsigned int suffix_i = 0; suffix_i < NUM_TYPE_SUFFIXES; ++suffix_i) ++ { ++ vector_type_index type_i = type_suffixessuffix_i.vector_type; ++ tree this_type = acle_vector_typessize_itype_i; ++ if (this_type && matches_type_p (this_type, type)) ++ return { type_suffix_index (suffix_i), size_i + 1 }; ++ } ++ ++ return {}; ++} ++ + /* Require argument ARGNO to be a single vector or a tuple of NUM_VECTORS + vectors; NUM_VECTORS is 1 for the former. Return the associated type + suffix on success, using TYPE_SUFFIX_b for predicates. Report an error +@@ -1364,37 +1384,30 @@ function_resolver::infer_vector_or_tuple_type (unsigned int argno, + if (actual == error_mark_node) + return NUM_TYPE_SUFFIXES; + +- /* A linear search should be OK here, since the code isn't hot and +- the number of types is only small. */ +- for (unsigned int size_i = 0; size_i < MAX_TUPLE_SIZE; ++size_i) +- for (unsigned int suffix_i = 0; suffix_i < NUM_TYPE_SUFFIXES; ++suffix_i) +- { +- vector_type_index type_i = type_suffixessuffix_i.vector_type; +- tree type = acle_vector_typessize_itype_i; +- if (type && matches_type_p (type, actual)) +- { +- if (size_i + 1 == num_vectors) +- return type_suffix_index (suffix_i); +- +- if (num_vectors == 1) +- error_at (location, "passing %qT to argument %d of %qE, which" +- " expects a single SVE vector rather than a tuple", +- actual, argno + 1, fndecl); +- else if (size_i == 0 && type_i != VECTOR_TYPE_svbool_t) +- /* num_vectors is always != 1, so the singular isn't needed. */ +- error_n (location, num_vectors, "%qT%d%qE%d", +- "passing single vector %qT to argument %d" +- " of %qE, which expects a tuple of %d vectors", +- actual, argno + 1, fndecl, num_vectors); +- else +- /* num_vectors is always != 1, so the singular isn't needed. */ +- error_n (location, num_vectors, "%qT%d%qE%d", +- "passing %qT to argument %d of %qE, which" +- " expects a tuple of %d vectors", actual, argno + 1, +- fndecl, num_vectors); +- return NUM_TYPE_SUFFIXES; +- } +- } ++ if (auto sve_type = find_sve_type (actual)) ++ { ++ if (sve_type.num_vectors == num_vectors) ++ return sve_type.type; ++ ++ if (num_vectors == 1) ++ error_at (location, "passing %qT to argument %d of %qE, which" ++ " expects a single SVE vector rather than a tuple", ++ actual, argno + 1, fndecl); ++ else if (sve_type.num_vectors == 1 ++ && sve_type.type != TYPE_SUFFIX_b) ++ /* num_vectors is always != 1, so the singular isn't needed. */ ++ error_n (location, num_vectors, "%qT%d%qE%d", ++ "passing single vector %qT to argument %d" ++ " of %qE, which expects a tuple of %d vectors", ++ actual, argno + 1, fndecl, num_vectors); ++ else ++ /* num_vectors is always != 1, so the singular isn't needed. */ ++ error_n (location, num_vectors, "%qT%d%qE%d", ++ "passing %qT to argument %d of %qE, which" ++ " expects a tuple of %d vectors", actual, argno + 1, ++ fndecl, num_vectors); ++ return NUM_TYPE_SUFFIXES; ++ } + + if (num_vectors == 1) + error_at (location, "passing %qT to argument %d of %qE, which" +diff --git a/gcc/config/aarch64/aarch64-sve-builtins.h b/gcc/config/aarch64/aarch64-sve-builtins.h +index 374c57e93..f4f2c415f 100644 +--- a/gcc/config/aarch64/aarch64-sve-builtins.h ++++ b/gcc/config/aarch64/aarch64-sve-builtins.h +@@ -263,6 +263,40 @@ struct group_suffix_info + unsigned int vectors_per_tuple; + }; + ++/* Represents an SVE vector, predicate, tuple of vectors, or tuple of ++ predicates. There is also a representation of "no type"/"invalid type". */ ++struct sve_type ++{ ++ sve_type () = default; ++ sve_type (type_suffix_index type) : type (type), num_vectors (1) {} ++ sve_type (type_suffix_index type, unsigned int num_vectors) ++ : type (type), num_vectors (num_vectors) {} ++ ++ /* Return true if the type is valid. */ ++ explicit operator bool () const { return type != NUM_TYPE_SUFFIXES; } ++ ++ bool operator== (const sve_type &) const; ++ bool operator!= (const sve_type &x) const { return !operator== (x); } ++ ++ /* This is one of: ++ ++ - TYPE_SUFFIX_b for svbool_t-based types ++ - TYPE_SUFFIX_c for svcount_t-based types ++ - the type suffix of a data element for SVE data vectors and tuples ++ - NUM_TYPE_SUFFIXES for invalid types. */ ++ type_suffix_index type = NUM_TYPE_SUFFIXES; ++ ++ /* If the type is a tuple, this is the number of vectors in the tuple, ++ otherwise it is 1. */ ++ unsigned int num_vectors = 1; ++}; ++ ++inline bool ++sve_type::operator== (const sve_type &other) const ++{ ++ return type == other.type && num_vectors == other.num_vectors; ++} ++ + /* Static information about a set of functions. */ + struct function_group_info + { +@@ -413,12 +447,11 @@ public: + function_resolver (location_t, const function_instance &, tree, + vec<tree, va_gc> &); + +- tree get_vector_type (type_suffix_index); + const char *get_scalar_type_name (type_suffix_index); + tree get_argument_type (unsigned int); + bool scalar_argument_p (unsigned int); + +- tree report_no_such_form (type_suffix_index); ++ tree report_no_such_form (sve_type); + tree lookup_form (mode_suffix_index, + type_suffix_index = NUM_TYPE_SUFFIXES, + type_suffix_index = NUM_TYPE_SUFFIXES, +-- +2.33.0 +
View file
_service:tar_scm:0165-LoongArch-Define-builtin-macros-for-ISA-evolutions.patch
Added
@@ -0,0 +1,678 @@ +From 9af73fb7213d5c10b3683465e6682ad20f5abe64 Mon Sep 17 00:00:00 2001 +From: Yang Yujie <yangyujie@loongson.cn> +Date: Tue, 23 Apr 2024 10:42:48 +0800 +Subject: PATCH 165/188 LoongArch: Define builtin macros for ISA evolutions + +Detailed description of these definitions can be found at +https://github.com/loongson/la-toolchain-conventions, which +the LoongArch GCC port aims to conform to. + +gcc/ChangeLog: + + * config.gcc: Add loongarch-evolution.o. + * config/loongarch/genopts/genstr.sh: Enable generation of + loongarch-evolution.cc,h. + * config/loongarch/t-loongarch: Likewise. + * config/loongarch/genopts/gen-evolution.awk: New file. + * config/loongarch/genopts/isa-evolution.in: Mark ISA version + of introduction for each ISA evolution feature. + * config/loongarch/loongarch-c.cc (loongarch_cpu_cpp_builtins): + Define builtin macros for enabled ISA evolutions and the ISA + version. + * config/loongarch/loongarch-cpu.cc: Use loongarch-evolution.h. + * config/loongarch/loongarch.h: Likewise. + * config/loongarch/loongarch-cpucfg-map.h: Delete. + * config/loongarch/loongarch-evolution.cc: New file. + * config/loongarch/loongarch-evolution.h: New file. + * config/loongarch/loongarch-opts.h (ISA_HAS_FRECIPE): Define. + (ISA_HAS_DIV32): Likewise. + (ISA_HAS_LAM_BH): Likewise. + (ISA_HAS_LAMCAS): Likewise. + (ISA_HAS_LD_SEQ_SA): Likewise. +--- + gcc/config.gcc | 2 +- + .../loongarch/genopts/gen-evolution.awk | 230 ++++++++++++++++++ + gcc/config/loongarch/genopts/genstr.sh | 82 ++----- + gcc/config/loongarch/genopts/isa-evolution.in | 10 +- + gcc/config/loongarch/loongarch-c.cc | 23 ++ + gcc/config/loongarch/loongarch-cpu.cc | 2 +- + gcc/config/loongarch/loongarch-evolution.cc | 60 +++++ + ...rch-cpucfg-map.h => loongarch-evolution.h} | 46 +++- + gcc/config/loongarch/loongarch-opts.h | 11 - + gcc/config/loongarch/loongarch.h | 1 + + gcc/config/loongarch/t-loongarch | 26 +- + 11 files changed, 398 insertions(+), 95 deletions(-) + create mode 100644 gcc/config/loongarch/genopts/gen-evolution.awk + create mode 100644 gcc/config/loongarch/loongarch-evolution.cc + rename gcc/config/loongarch/{loongarch-cpucfg-map.h => loongarch-evolution.h} (52%) + +diff --git a/gcc/config.gcc b/gcc/config.gcc +index c6820d0f1..a405e6d2e 100644 +--- a/gcc/config.gcc ++++ b/gcc/config.gcc +@@ -458,7 +458,7 @@ loongarch*-*-*) + cpu_type=loongarch + d_target_objs="loongarch-d.o" + extra_headers="larchintrin.h lsxintrin.h lasxintrin.h" +- extra_objs="loongarch-c.o loongarch-builtins.o loongarch-cpu.o loongarch-opts.o loongarch-def.o" ++ extra_objs="loongarch-c.o loongarch-builtins.o loongarch-cpu.o loongarch-opts.o loongarch-def.o loongarch-evolution.o" + extra_gcc_objs="loongarch-driver.o loongarch-cpu.o loongarch-opts.o loongarch-def.o" + extra_options="${extra_options} g.opt fused-madd.opt" + ;; +diff --git a/gcc/config/loongarch/genopts/gen-evolution.awk b/gcc/config/loongarch/genopts/gen-evolution.awk +new file mode 100644 +index 000000000..4d105afa9 +--- /dev/null ++++ b/gcc/config/loongarch/genopts/gen-evolution.awk +@@ -0,0 +1,230 @@ ++#!/usr/bin/gawk ++# ++# A simple script that generates loongarch-evolution.h ++# from genopts/isa-evolution.in ++# ++# Copyright (C) 2021-2024 Free Software Foundation, Inc. ++# ++# This file is part of GCC. ++# ++# GCC is free software; you can redistribute it and/or modify it under ++# the terms of the GNU General Public License as published by the Free ++# Software Foundation; either version 3, or (at your option) any later ++# version. ++# ++# GCC is distributed in the hope that it will be useful, but WITHOUT ++# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY ++# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public ++# License for more details. ++# ++# You should have received a copy of the GNU General Public License ++# along with GCC; see the file COPYING3. If not see ++# <http://www.gnu.org/licenses/>. ++ ++BEGIN { ++ # isa_version_major ++ # isa_version_minor ++ # cpucfg_word ++ # cpucfg_bit_in_word ++ # name_capitalized ++ # comment ++} ++ ++{ ++ cpucfg_wordNR = $1 ++ cpucfg_bit_in_wordNR = $2 ++ nameNR = gensub(/-/, "_", "g", $3) ++ name_capitalizedNR = toupper(nameNR) ++ isa_version_majorNR = gensub(/^(1-90-9*)\.(0-9+)$/, "\\1", 1, $4) ++ isa_version_minorNR = gensub(/^(1-90-9*)\.(0-9+)$/, "\\2", 1, $4) ++ ++ $1 = $2 = $3 = $4 = "" ++ sub (/^\s*/, "") ++ commentNR = $0 ++} ++ ++function copyright_header(from_year,to_year) ++{ ++ print " Copyright (C) " from_year "-" to_year \ ++ " Free Software Foundation, Inc." ++ print "" ++ print "This file is part of GCC." ++ print "" ++ print "GCC is free software; you can redistribute it and/or modify" ++ print "it under the terms of the GNU General Public License as published by" ++ print "the Free Software Foundation; either version 3, or (at your option)" ++ print "any later version." ++ print "" ++ print "GCC is distributed in the hope that it will be useful," ++ print "but WITHOUT ANY WARRANTY; without even the implied warranty of" ++ print "MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the" ++ print "GNU General Public License for more details." ++ print "" ++ print "You should have received a copy of the GNU General Public License" ++ print "along with GCC; see the file COPYING3. If not see" ++ print "<http://www.gnu.org/licenses/>." ++} ++ ++function gen_cpucfg_map() ++{ ++ print "static constexpr struct {" ++ print " int cpucfg_word;" ++ print " unsigned int cpucfg_bit;" ++ print " HOST_WIDE_INT isa_evolution_bit;" ++ print "} cpucfg_map = {" ++ ++ for (i = 1; i <= NR; i++) ++ printf (" { %d, 1u << %d, OPTION_MASK_ISA_%s },\n", ++ cpucfg_wordi, cpucfg_bit_in_wordi, name_capitalizedi) ++ ++ print "};" ++} ++ ++function gen_cpucfg_useful_idx() ++{ ++ split("0 1 2 16 17 18 19", init_useful_idx) ++ ++ delete idx_bucket ++ ++ for (i in init_useful_idx) ++ idx_bucketinit_useful_idxi = 1 ++ delete init_useful_idx ++ ++ for (i in cpucfg_word) ++ idx_bucketcpucfg_wordi = 1 ++ ++ delete idx_list ++ for (i in idx_bucket) ++ idx_listlength(idx_list)-1 = i+0 ++ delete idx_bucket ++ ++ asort (idx_list) ++ ++ print "static constexpr int cpucfg_useful_idx = {" ++ for (i in idx_list) ++ printf(" %d,\n", idx_listi) ++ print "};" ++ ++ print "" ++ ++ printf ("static constexpr int N_CPUCFG_WORDS = %d;\n", ++ idx_listlength(idx_list) + 1) ++ ++ delete idx_list ++} ++ ++function gen_evolution_decl() ++{ ++ print "/* ISA evolution features */" ++ print "enum {" ++ ++ for (i = 1; i <= NR; i++) ++ print " EVO_" name_capitalizedi " = " i - 1 "," ++ ++ print " N_EVO_FEATURES = " NR ++ print "};" ++ print "" ++ ++ print "/* Condition macros */" ++ for (i = 1; i <= NR; i++) ++ printf ("#define ISA_HAS_%s \\\n" \ ++ " (la_target.isa.evolution & OPTION_MASK_ISA_%s)\n", ++ name_capitalizedi, name_capitalizedi) ++ print "" ++ ++ print "/* Bitmasks on la_target.isa.evolution. */" ++ print "extern int la_evo_feature_masksN_EVO_FEATURES;" ++ print "" ++ print "/* Builtin macro names for the evolution features. */" ++ print "extern const char* la_evo_macro_nameN_EVO_FEATURES;" ++ print "" ++ print "/* The ISA version where a specific feature is introduced. */" ++ print "extern int la_evo_version_majorN_EVO_FEATURES;" ++ print "extern int la_evo_version_minorN_EVO_FEATURES;" ++} ++ ++function gen_full_header() ++{ ++ print "/* Generated automatically by \"genstr\" from \"isa-evolution.in\"." ++ print " Please do not edit this file directly." ++ print "" ++ ++ copyright_header(2023, 2024) ++ ++ print "*/" ++ print "" ++ ++ print "#ifndef LOONGARCH_EVOLUTION_H" ++ print "#define LOONGARCH_EVOLUTION_H" ++ print "" ++ print "#if !defined(IN_LIBGCC2) && !defined(IN_TARGET_LIBS) && !defined(IN_RTS)" ++ print "" ++ print "#include \"options.h\"" ++ print "" ++ ++ gen_cpucfg_map() ++ ++ print "" ++ ++ gen_cpucfg_useful_idx() ++ ++ print "" ++ ++ gen_evolution_decl() ++ ++ print "" ++ print "#endif" ++ print "" ++ print "#endif /* LOONGARCH_EVOLUTION_H */" ++} ++ ++ ++function gen_full_source() ++{ ++ print "/* Generated automatically by \"genstr\" from \"isa-evolution.in\"." ++ print " Please do not edit this file directly." ++ print "" ++ ++ copyright_header(2023, 2024) ++ ++ print "*/" ++ print "" ++ print "#include \"config.h\"" ++ print "#include \"system.h\"" ++ print "#include \"coretypes.h\"" ++ print "#include \"options.h\"" ++ print "" ++ print "#include \"loongarch-evolution.h\"" ++ print "" ++ ++ print "int la_evo_feature_masks = {"; ++ for (i = 1; i <= NR; i++) ++ print " OPTION_MASK_ISA_" name_capitalizedi "," ++ print "};" ++ print "" ++ ++ print "const char* la_evo_macro_name = {"; ++ for (i = 1; i <= NR; i++) ++ print " \"__loongarch_" namei "\"," ++ print "};" ++ print "" ++ ++ ++ print "int la_evo_version_major = {" ++ for (i = 1; i <= NR; i++) ++ print " " isa_version_majori ", /* " name_capitalizedi " */" ++ print "};" ++ print "" ++ ++ print "int la_evo_version_minor = {" ++ for (i = 1; i <= NR; i++) ++ print " " isa_version_minori ", /* " name_capitalizedi " */" ++ print "};" ++} ++ ++END { ++ if (header_p) ++ gen_full_header() ++ else ++ gen_full_source() ++} +diff --git a/gcc/config/loongarch/genopts/genstr.sh b/gcc/config/loongarch/genopts/genstr.sh +index 391eca121..3e86c8152 100755 +--- a/gcc/config/loongarch/genopts/genstr.sh ++++ b/gcc/config/loongarch/genopts/genstr.sh +@@ -108,78 +108,30 @@ EOF + print("m"$3) + gsub(/-/, "_", $3) + print("Target Mask(ISA_"toupper($3)") Var(la_isa_evolution)") +- $1=""; $2=""; $3="" ++ $1=""; $2=""; $3=""; $4="" + sub(/^ */, "", $0) + print($0) + }' isa-evolution.in + } + +-gen_cpucfg_map() { +- cat <<EOF +-/* Generated automatically by "genstr" from "isa-evolution.in". +- Please do not edit this file directly. +- +- Copyright (C) 2023 Free Software Foundation, Inc. +- +-This file is part of GCC. +- +-GCC is free software; you can redistribute it and/or modify +-it under the terms of the GNU General Public License as published by +-the Free Software Foundation; either version 3, or (at your option) +-any later version. +- +-GCC is distributed in the hope that it will be useful, +-but WITHOUT ANY WARRANTY; without even the implied warranty of +-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +-GNU General Public License for more details. +- +-You should have received a copy of the GNU General Public License +-along with GCC; see the file COPYING3. If not see +-<http://www.gnu.org/licenses/>. */ +- +-#ifndef LOONGARCH_CPUCFG_MAP_H +-#define LOONGARCH_CPUCFG_MAP_H +- +-#include "options.h" +- +-static constexpr struct { +- int cpucfg_word; +- unsigned int cpucfg_bit; +- HOST_WIDE_INT isa_evolution_bit; +-} cpucfg_map = { +-EOF +- +- # Generate the strings from isa-evolution.in. +- awk '{ +- gsub(/-/, "_", $3) +- print(" { "$1", 1u << "$2", OPTION_MASK_ISA_"toupper($3)" },") +- }' isa-evolution.in +- +- echo "};" +- echo +- echo "static constexpr int cpucfg_useful_idx = {" +- +- awk 'BEGIN { print(" 0,\n 1,\n 2,\n 16,\n 17,\n 18,\n 19,") } +- {if ($1+0 > max+0) max=$1; print(" "$1",")}' \ +- isa-evolution.in | sort -n | uniq +- +- echo "};" +- echo "" +- +- awk 'BEGIN { max=19 } +- { if ($1+0 > max+0) max=$1 } +- END { print "static constexpr int N_CPUCFG_WORDS = "1+max";" }' \ +- isa-evolution.in +- +- echo "#endif /* LOONGARCH_CPUCFG_MAP_H */" +-} +- + main() { + case "$1" in +- cpucfg-map) gen_cpucfg_map;; +- header) gen_defines;; +- opt) gen_options;; +- *) echo "Unknown Command: \"$1\". Available: cpucfg-map, header, opt"; exit 1;; ++ evolution_h) ++ awk -v header_p=1 -f gen-evolution.awk isa-evolution.in ++ ;; ++ evolution_c) ++ awk -v header_p=0 -f gen-evolution.awk isa-evolution.in ++ ;; ++ header) ++ gen_defines ++ ;; ++ opt) ++ gen_options ++ ;; ++ *) ++ echo "Unknown Command: \"$1\". Available: header, opt, evolution_h, evolution_c" ++ exit 1 ++ ;; + esac + } + +diff --git a/gcc/config/loongarch/genopts/isa-evolution.in b/gcc/config/loongarch/genopts/isa-evolution.in +index 11a198b64..50f72d5a0 100644 +--- a/gcc/config/loongarch/genopts/isa-evolution.in ++++ b/gcc/config/loongarch/genopts/isa-evolution.in +@@ -1,5 +1,5 @@ +-2 25 frecipe Support frecipe.{s/d} and frsqrte.{s/d} instructions. +-2 26 div32 Support div.wu and mod.wu instructions with inputs not sign-extended. +-2 27 lam-bh Support am{swap/add}_db.{b/h} instructions. +-2 28 lamcas Support amcas_db.{b/h/w/d} instructions. +-3 23 ld-seq-sa Do not need load-load barriers (dbar 0x700). ++2 25 frecipe 1.1 Support frecipe.{s/d} and frsqrte.{s/d} instructions. ++2 26 div32 1.1 Support div.wu and mod.wu instructions with inputs not sign-extended. ++2 27 lam-bh 1.1 Support am{swap/add}_db.{b/h} instructions. ++2 28 lamcas 1.1 Support amcas_db.{b/h/w/d} instructions. ++3 23 ld-seq-sa 1.1 Do not need load-load barriers (dbar 0x700). +diff --git a/gcc/config/loongarch/loongarch-c.cc b/gcc/config/loongarch/loongarch-c.cc +index 153db75b0..4ecea6a45 100644 +--- a/gcc/config/loongarch/loongarch-c.cc ++++ b/gcc/config/loongarch/loongarch-c.cc +@@ -103,6 +103,29 @@ loongarch_cpu_cpp_builtins (cpp_reader *pfile) + builtin_define ("__loongarch_simd_width=256"); + } + ++ /* ISA evolution features */ ++ int max_v_major = 1, max_v_minor = 0; ++ ++ for (int i = 0; i < N_EVO_FEATURES; i++) ++ if (la_target.isa.evolution & la_evo_feature_masksi) ++ { ++ builtin_define (la_evo_macro_namei); ++ ++ int major = la_evo_version_majori, ++ minor = la_evo_version_minori; ++ ++ max_v_major = major > max_v_major ? major : max_v_major; ++ max_v_minor = major == max_v_major ++ ? (minor > max_v_minor ? minor : max_v_minor): max_v_minor; ++ } ++ ++ /* Find the minimum ISA version required to run the target program. */ ++ if (!(max_v_major == 1 && max_v_minor <= 1 && ISA_HAS_LASX)) ++ { ++ builtin_define_with_int_value ("__loongarch_version_major", max_v_major); ++ builtin_define_with_int_value ("__loongarch_version_minor", max_v_minor); ++ } ++ + /* Native Data Sizes. */ + builtin_define_with_int_value ("_LOONGARCH_SZINT", INT_TYPE_SIZE); + builtin_define_with_int_value ("_LOONGARCH_SZLONG", LONG_TYPE_SIZE); +diff --git a/gcc/config/loongarch/loongarch-cpu.cc b/gcc/config/loongarch/loongarch-cpu.cc +index eb1eb8011..49107f2ae 100644 +--- a/gcc/config/loongarch/loongarch-cpu.cc ++++ b/gcc/config/loongarch/loongarch-cpu.cc +@@ -28,8 +28,8 @@ along with GCC; see the file COPYING3. If not see + #include "loongarch-def.h" + #include "loongarch-opts.h" + #include "loongarch-cpu.h" +-#include "loongarch-cpucfg-map.h" + #include "loongarch-str.h" ++#include "loongarch-evolution.h" + + + /* Native CPU detection with "cpucfg" */ +diff --git a/gcc/config/loongarch/loongarch-evolution.cc b/gcc/config/loongarch/loongarch-evolution.cc +new file mode 100644 +index 000000000..1fb4e3b01 +--- /dev/null ++++ b/gcc/config/loongarch/loongarch-evolution.cc +@@ -0,0 +1,60 @@ ++/* Generated automatically by "genstr" from "isa-evolution.in". ++ Please do not edit this file directly. ++ ++ Copyright (C) 2023-2024 Free Software Foundation, Inc. ++ ++This file is part of GCC. ++ ++GCC is free software; you can redistribute it and/or modify ++it under the terms of the GNU General Public License as published by ++the Free Software Foundation; either version 3, or (at your option) ++any later version. ++ ++GCC is distributed in the hope that it will be useful, ++but WITHOUT ANY WARRANTY; without even the implied warranty of ++MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++GNU General Public License for more details. ++ ++You should have received a copy of the GNU General Public License ++along with GCC; see the file COPYING3. If not see ++<http://www.gnu.org/licenses/>. ++*/ ++ ++#include "config.h" ++#include "system.h" ++#include "coretypes.h" ++#include "options.h" ++ ++#include "loongarch-evolution.h" ++ ++int la_evo_feature_masks = { ++ OPTION_MASK_ISA_FRECIPE, ++ OPTION_MASK_ISA_DIV32, ++ OPTION_MASK_ISA_LAM_BH, ++ OPTION_MASK_ISA_LAMCAS, ++ OPTION_MASK_ISA_LD_SEQ_SA, ++}; ++ ++const char* la_evo_macro_name = { ++ "__loongarch_frecipe", ++ "__loongarch_div32", ++ "__loongarch_lam_bh", ++ "__loongarch_lamcas", ++ "__loongarch_ld_seq_sa", ++}; ++ ++int la_evo_version_major = { ++ 1, /* FRECIPE */ ++ 1, /* DIV32 */ ++ 1, /* LAM_BH */ ++ 1, /* LAMCAS */ ++ 1, /* LD_SEQ_SA */ ++}; ++ ++int la_evo_version_minor = { ++ 1, /* FRECIPE */ ++ 1, /* DIV32 */ ++ 1, /* LAM_BH */ ++ 1, /* LAMCAS */ ++ 1, /* LD_SEQ_SA */ ++}; +diff --git a/gcc/config/loongarch/loongarch-cpucfg-map.h b/gcc/config/loongarch/loongarch-evolution.h +similarity index 52% +rename from gcc/config/loongarch/loongarch-cpucfg-map.h +rename to gcc/config/loongarch/loongarch-evolution.h +index 148333c24..d64996481 100644 +--- a/gcc/config/loongarch/loongarch-cpucfg-map.h ++++ b/gcc/config/loongarch/loongarch-evolution.h +@@ -17,10 +17,13 @@ GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see +-<http://www.gnu.org/licenses/>. */ ++<http://www.gnu.org/licenses/>. ++*/ + +-#ifndef LOONGARCH_CPUCFG_MAP_H +-#define LOONGARCH_CPUCFG_MAP_H ++#ifndef LOONGARCH_EVOLUTION_H ++#define LOONGARCH_EVOLUTION_H ++ ++#if !defined(IN_LIBGCC2) && !defined(IN_TARGET_LIBS) && !defined(IN_RTS) + + #include "options.h" + +@@ -48,4 +51,39 @@ static constexpr int cpucfg_useful_idx = { + }; + + static constexpr int N_CPUCFG_WORDS = 20; +-#endif /* LOONGARCH_CPUCFG_MAP_H */ ++ ++/* ISA evolution features */ ++enum { ++ EVO_FRECIPE = 0, ++ EVO_DIV32 = 1, ++ EVO_LAM_BH = 2, ++ EVO_LAMCAS = 3, ++ EVO_LD_SEQ_SA = 4, ++ N_EVO_FEATURES = 5 ++}; ++ ++/* Condition macros */ ++#define ISA_HAS_FRECIPE \ ++ (la_target.isa.evolution & OPTION_MASK_ISA_FRECIPE) ++#define ISA_HAS_DIV32 \ ++ (la_target.isa.evolution & OPTION_MASK_ISA_DIV32) ++#define ISA_HAS_LAM_BH \ ++ (la_target.isa.evolution & OPTION_MASK_ISA_LAM_BH) ++#define ISA_HAS_LAMCAS \ ++ (la_target.isa.evolution & OPTION_MASK_ISA_LAMCAS) ++#define ISA_HAS_LD_SEQ_SA \ ++ (la_target.isa.evolution & OPTION_MASK_ISA_LD_SEQ_SA) ++ ++/* Bitmasks on la_target.isa.evolution. */ ++extern int la_evo_feature_masksN_EVO_FEATURES; ++ ++/* Builtin macro names for the evolution features. */ ++extern const char* la_evo_macro_nameN_EVO_FEATURES; ++ ++/* The ISA version where a specific feature is introduced. */ ++extern int la_evo_version_majorN_EVO_FEATURES; ++extern int la_evo_version_minorN_EVO_FEATURES; ++ ++#endif ++ ++#endif /* LOONGARCH_EVOLUTION_H */ +diff --git a/gcc/config/loongarch/loongarch-opts.h b/gcc/config/loongarch/loongarch-opts.h +index 325c1e29c..19bae5a0b 100644 +--- a/gcc/config/loongarch/loongarch-opts.h ++++ b/gcc/config/loongarch/loongarch-opts.h +@@ -115,17 +115,6 @@ struct loongarch_flags { + #define ISA_HAS_LASX \ + (la_target.isa.simd == ISA_EXT_SIMD_LASX) + +-#define ISA_HAS_FRECIPE \ +- (la_target.isa.evolution & OPTION_MASK_ISA_FRECIPE) +-#define ISA_HAS_DIV32 \ +- (la_target.isa.evolution & OPTION_MASK_ISA_DIV32) +-#define ISA_HAS_LAM_BH \ +- (la_target.isa.evolution & OPTION_MASK_ISA_LAM_BH) +-#define ISA_HAS_LAMCAS \ +- (la_target.isa.evolution & OPTION_MASK_ISA_LAMCAS) +-#define ISA_HAS_LD_SEQ_SA \ +- (la_target.isa.evolution & OPTION_MASK_ISA_LD_SEQ_SA) +- + /* TARGET_ macros for use in *.md template conditionals */ + #define TARGET_uARCH_LA464 (la_target.cpu_tune == TUNE_LA464) + #define TARGET_uARCH_LA664 (la_target.cpu_tune == TUNE_LA664) +diff --git a/gcc/config/loongarch/loongarch.h b/gcc/config/loongarch/loongarch.h +index 089206605..6743d2684 100644 +--- a/gcc/config/loongarch/loongarch.h ++++ b/gcc/config/loongarch/loongarch.h +@@ -22,6 +22,7 @@ along with GCC; see the file COPYING3. If not see + /* LoongArch external variables defined in loongarch.cc. */ + + #include "config/loongarch/loongarch-opts.h" ++#include "config/loongarch/loongarch-evolution.h" + + #define SWITCHABLE_TARGET 1 + +diff --git a/gcc/config/loongarch/t-loongarch b/gcc/config/loongarch/t-loongarch +index 488e8cff3..53dde9ce6 100644 +--- a/gcc/config/loongarch/t-loongarch ++++ b/gcc/config/loongarch/t-loongarch +@@ -21,7 +21,7 @@ GTM_H += loongarch-multilib.h + OPTIONS_H_EXTRA += $(srcdir)/config/loongarch/loongarch-def.h \ + $(srcdir)/config/loongarch/loongarch-def-array.h \ + $(srcdir)/config/loongarch/loongarch-tune.h \ +- $(srcdir)/config/loongarch/loongarch-cpucfg-map.h ++ $(srcdir)/config/loongarch/loongarch-evolution.h + + # Canonical target triplet from config.gcc + LA_MULTIARCH_TRIPLET = $(patsubst LA_MULTIARCH_TRIPLET=%,%,$\ +@@ -62,7 +62,11 @@ loongarch-opts.o: $(srcdir)/config/loongarch/loongarch-opts.cc $(LA_STR_H) + $(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) $< + + loongarch-cpu.o: $(srcdir)/config/loongarch/loongarch-cpu.cc $(LA_STR_H) \ +- $(srcdir)/config/loongarch/loongarch-cpucfg-map.h ++ $(srcdir)/config/loongarch/loongarch-evolution.h ++ $(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) $< ++ ++loongarch-evolution.o: $(srcdir)/config/loongarch/loongarch-evolution.cc $(LA_STR_H) \ ++ $(srcdir)/config/loongarch/loongarch-evolution.h + $(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) $< + + loongarch-def.o: $(srcdir)/config/loongarch/loongarch-def.cc $(LA_STR_H) +@@ -84,11 +88,17 @@ s-loongarch-opt: $(srcdir)/config/loongarch/genopts/genstr.sh \ + $(srcdir)/config/loongarch/loongarch.opt + $(STAMP) s-loongarch-opt + +-$(srcdir)/config/loongarch/loongarch-cpucfg-map.h: s-loongarch-cpucfg-map ++$(srcdir)/config/loongarch/loongarch-evolution.h: s-loongarch-evolution + @true +-s-loongarch-cpucfg-map: $(srcdir)/config/loongarch/genopts/genstr.sh \ +- $(srcdir)/config/loongarch/genopts/isa-evolution.in +- $(SHELL) $< cpucfg-map > tmp-cpucfg.h +- $(SHELL) $(srcdir)/../move-if-change tmp-cpucfg.h \ +- $(srcdir)/config/loongarch/loongarch-cpucfg-map.h ++$(srcdir)/config/loongarch/loongarch-evolution.cc: s-loongarch-evolution ++ @true ++s-loongarch-evolution: $(srcdir)/config/loongarch/genopts/genstr.sh \ ++ $(srcdir)/config/loongarch/genopts/isa-evolution.in \ ++ $(srcdir)/config/loongarch/genopts/gen-evolution.awk ++ $(SHELL) $< evolution_h > tmp-isa-evo.h ++ $(SHELL) $< evolution_c > tmp-isa-evo.cc ++ $(SHELL) $(srcdir)/../move-if-change tmp-isa-evo.h \ ++ $(srcdir)/config/loongarch/loongarch-evolution.h ++ $(SHELL) $(srcdir)/../move-if-change tmp-isa-evo.cc \ ++ $(srcdir)/config/loongarch/loongarch-evolution.cc + $(STAMP) $@ +-- +2.43.0 +
View file
_service:tar_scm:0166-Backport-SME-aarch64-Generalise-some-SVE-ACLE-error-.patch
Added
@@ -0,0 +1,1474 @@ +From 21839879d5f00db48cdacd472044a9bd4e23a2c6 Mon Sep 17 00:00:00 2001 +From: Richard Sandiford <richard.sandiford@arm.com> +Date: Tue, 5 Dec 2023 10:11:20 +0000 +Subject: PATCH 067/157 BackportSME aarch64: Generalise some SVE ACLE + error messages + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=bb01ef94ff5096b907639aa3a1d77850921e7d37 + +The current SVE ACLE function-resolution diagnostics assume +that a function has a fixed choice between vectors or tuples +of vectors. If an argument was not an SVE type at all, the +error message said the function "expects an SVE vector type" +or "expects an SVE tuple type". + +This patch generalises the error to cope with cases where +an argument can be either a vector or a tuple. It also splits +out the diagnostics for mismatched tuple sizes, so that they +can be reused by later patches. + +gcc/ + * config/aarch64/aarch64-sve-builtins.h + (function_resolver::infer_sve_type): New member function. + (function_resolver::report_incorrect_num_vectors): Likewise. + * config/aarch64/aarch64-sve-builtins.cc + (function_resolver::infer_sve_type): New function,. + (function_resolver::report_incorrect_num_vectors): New function, + split out from... + (function_resolver::infer_vector_or_tuple_type): ...here. Use + infer_sve_type. + +gcc/testsuite/ + * gcc.target/aarch64/sve/acle/general-c/*: Update expected error + messages. +--- + gcc/config/aarch64/aarch64-sve-builtins.cc | 87 ++++++++++++------- + gcc/config/aarch64/aarch64-sve-builtins.h | 3 + + .../aarch64/sve/acle/general-c/adr_index_1.c | 6 +- + .../aarch64/sve/acle/general-c/adr_offset_1.c | 6 +- + .../aarch64/sve/acle/general-c/binary_1.c | 2 +- + .../sve/acle/general-c/binary_int_opt_n.c | 2 +- + .../sve/acle/general-c/binary_lane_1.c | 4 +- + .../sve/acle/general-c/binary_long_lane_1.c | 4 +- + .../sve/acle/general-c/binary_long_opt_n_1.c | 2 +- + .../aarch64/sve/acle/general-c/binary_n_1.c | 2 +- + .../acle/general-c/binary_narrowb_opt_n_1.c | 2 +- + .../acle/general-c/binary_narrowt_opt_n_1.c | 4 +- + .../sve/acle/general-c/binary_opt_n_2.c | 2 +- + .../sve/acle/general-c/binary_opt_n_3.c | 2 +- + .../sve/acle/general-c/binary_rotate_1.c | 4 +- + .../sve/acle/general-c/binary_to_uint_1.c | 4 +- + .../sve/acle/general-c/binary_uint64_n_1.c | 2 +- + .../acle/general-c/binary_uint64_opt_n_2.c | 2 +- + .../sve/acle/general-c/binary_uint_1.c | 2 +- + .../sve/acle/general-c/binary_uint_n_1.c | 2 +- + .../sve/acle/general-c/binary_uint_opt_n_1.c | 2 +- + .../sve/acle/general-c/binary_wide_1.c | 8 +- + .../sve/acle/general-c/binary_wide_opt_n_1.c | 4 +- + .../aarch64/sve/acle/general-c/clast_1.c | 4 +- + .../aarch64/sve/acle/general-c/compare_1.c | 4 +- + .../sve/acle/general-c/compare_opt_n_1.c | 2 +- + .../sve/acle/general-c/compare_wide_opt_n_1.c | 2 +- + .../sve/acle/general-c/count_vector_1.c | 2 +- + .../aarch64/sve/acle/general-c/create_1.c | 4 +- + .../aarch64/sve/acle/general-c/create_3.c | 4 +- + .../aarch64/sve/acle/general-c/create_5.c | 4 +- + .../aarch64/sve/acle/general-c/fold_left_1.c | 4 +- + .../sve/acle/general-c/inc_dec_pred_1.c | 2 +- + .../aarch64/sve/acle/general-c/mmla_1.c | 10 +-- + .../acle/general-c/prefetch_gather_offset_2.c | 2 +- + .../aarch64/sve/acle/general-c/reduction_1.c | 2 +- + .../sve/acle/general-c/reduction_wide_1.c | 2 +- + .../general-c/shift_right_imm_narrowb_1.c | 2 +- + .../shift_right_imm_narrowb_to_uint_1.c | 2 +- + .../general-c/shift_right_imm_narrowt_1.c | 4 +- + .../shift_right_imm_narrowt_to_uint_1.c | 4 +- + .../aarch64/sve/acle/general-c/store_1.c | 2 +- + .../aarch64/sve/acle/general-c/store_2.c | 2 +- + .../acle/general-c/store_scatter_offset_1.c | 4 +- + .../sve/acle/general-c/ternary_bfloat16_1.c | 2 +- + .../acle/general-c/ternary_bfloat16_lane_1.c | 2 +- + .../general-c/ternary_bfloat16_lanex2_1.c | 2 +- + .../acle/general-c/ternary_bfloat16_opt_n_1.c | 2 +- + .../general-c/ternary_intq_uintq_lane_1.c | 6 +- + .../general-c/ternary_intq_uintq_opt_n_1.c | 4 +- + .../sve/acle/general-c/ternary_lane_1.c | 6 +- + .../acle/general-c/ternary_lane_rotate_1.c | 6 +- + .../sve/acle/general-c/ternary_long_lane_1.c | 6 +- + .../sve/acle/general-c/ternary_long_opt_n_1.c | 4 +- + .../sve/acle/general-c/ternary_opt_n_1.c | 4 +- + .../sve/acle/general-c/ternary_qq_lane_1.c | 6 +- + .../acle/general-c/ternary_qq_lane_rotate_1.c | 6 +- + .../sve/acle/general-c/ternary_qq_opt_n_2.c | 4 +- + .../sve/acle/general-c/ternary_qq_rotate_1.c | 6 +- + .../sve/acle/general-c/ternary_rotate_1.c | 6 +- + .../general-c/ternary_shift_right_imm_1.c | 4 +- + .../sve/acle/general-c/ternary_uint_1.c | 6 +- + .../sve/acle/general-c/ternary_uintq_intq_1.c | 6 +- + .../general-c/ternary_uintq_intq_lane_1.c | 6 +- + .../general-c/ternary_uintq_intq_opt_n_1.c | 4 +- + .../aarch64/sve/acle/general-c/tmad_1.c | 4 +- + .../aarch64/sve/acle/general-c/unary_1.c | 2 +- + .../aarch64/sve/acle/general-c/unary_2.c | 2 +- + .../sve/acle/general-c/unary_convert_1.c | 2 +- + .../sve/acle/general-c/unary_convert_2.c | 2 +- + .../acle/general-c/unary_convert_narrowt_1.c | 2 +- + .../sve/acle/general-c/unary_narrowb_1.c | 2 +- + .../acle/general-c/unary_narrowb_to_uint_1.c | 2 +- + .../sve/acle/general-c/unary_narrowt_1.c | 4 +- + .../acle/general-c/unary_narrowt_to_uint_1.c | 4 +- + .../sve/acle/general-c/unary_to_int_1.c | 2 +- + .../sve/acle/general-c/unary_to_uint_1.c | 2 +- + .../sve/acle/general-c/unary_to_uint_2.c | 2 +- + .../sve/acle/general-c/unary_to_uint_3.c | 2 +- + .../aarch64/sve/acle/general-c/unary_uint_1.c | 2 +- + .../sve/acle/general-c/unary_widen_1.c | 4 +- + 81 files changed, 195 insertions(+), 169 deletions(-) + +diff --git a/gcc/config/aarch64/aarch64-sve-builtins.cc b/gcc/config/aarch64/aarch64-sve-builtins.cc +index cc676bfe1..4e94e3633 100644 +--- a/gcc/config/aarch64/aarch64-sve-builtins.cc ++++ b/gcc/config/aarch64/aarch64-sve-builtins.cc +@@ -1228,6 +1228,32 @@ function_resolver::scalar_argument_p (unsigned int i) + || SCALAR_FLOAT_TYPE_P (type)); + } + ++/* Report that argument ARGNO was expected to have NUM_VECTORS vectors. ++ TYPE is the type that ARGNO actually has. */ ++void ++function_resolver::report_incorrect_num_vectors (unsigned int argno, ++ sve_type type, ++ unsigned int num_vectors) ++{ ++ if (num_vectors == 1) ++ error_at (location, "passing %qT to argument %d of %qE, which" ++ " expects a single SVE vector rather than a tuple", ++ get_vector_type (type), argno + 1, fndecl); ++ else if (type.num_vectors == 1 ++ && type.type != TYPE_SUFFIX_b) ++ /* num_vectors is always != 1, so the singular isn't needed. */ ++ error_n (location, num_vectors, "%qT%d%qE%d", ++ "passing single vector %qT to argument %d" ++ " of %qE, which expects a tuple of %d vectors", ++ get_vector_type (type), argno + 1, fndecl, num_vectors); ++ else ++ /* num_vectors is always != 1, so the singular isn't needed. */ ++ error_n (location, num_vectors, "%qT%d%qE%d", ++ "passing %qT to argument %d of %qE, which" ++ " expects a tuple of %d vectors", get_vector_type (type), ++ argno + 1, fndecl, num_vectors); ++} ++ + /* Report that the function has no form that takes type TYPE. + Return error_mark_node. */ + tree +@@ -1372,6 +1398,30 @@ find_sve_type (const_tree type) + return {}; + } + ++/* Require argument ARGNO to be an SVE type (i.e. something that can be ++ represented by sve_type). Return the (valid) type if it is, otherwise ++ report an error and return an invalid type. */ ++sve_type ++function_resolver::infer_sve_type (unsigned int argno) ++{ ++ tree actual = get_argument_type (argno); ++ if (actual == error_mark_node) ++ return {}; ++ ++ if (sve_type type = find_sve_type (actual)) ++ return type; ++ ++ if (scalar_argument_p (argno)) ++ error_at (location, "passing %qT to argument %d of %qE, which" ++ " expects an SVE type rather than a scalar type", ++ actual, argno + 1, fndecl); ++ else ++ error_at (location, "passing %qT to argument %d of %qE, which" ++ " expects an SVE type", ++ actual, argno + 1, fndecl); ++ return {}; ++} ++ + /* Require argument ARGNO to be a single vector or a tuple of NUM_VECTORS + vectors; NUM_VECTORS is 1 for the former. Return the associated type + suffix on success, using TYPE_SUFFIX_b for predicates. Report an error +@@ -1380,41 +1430,14 @@ type_suffix_index + function_resolver::infer_vector_or_tuple_type (unsigned int argno, + unsigned int num_vectors) + { +- tree actual = get_argument_type (argno); +- if (actual == error_mark_node) ++ auto type = infer_sve_type (argno); ++ if (!type) + return NUM_TYPE_SUFFIXES; + +- if (auto sve_type = find_sve_type (actual)) +- { +- if (sve_type.num_vectors == num_vectors) +- return sve_type.type; +- +- if (num_vectors == 1) +- error_at (location, "passing %qT to argument %d of %qE, which" +- " expects a single SVE vector rather than a tuple", +- actual, argno + 1, fndecl); +- else if (sve_type.num_vectors == 1 +- && sve_type.type != TYPE_SUFFIX_b) +- /* num_vectors is always != 1, so the singular isn't needed. */ +- error_n (location, num_vectors, "%qT%d%qE%d", +- "passing single vector %qT to argument %d" +- " of %qE, which expects a tuple of %d vectors", +- actual, argno + 1, fndecl, num_vectors); +- else +- /* num_vectors is always != 1, so the singular isn't needed. */ +- error_n (location, num_vectors, "%qT%d%qE%d", +- "passing %qT to argument %d of %qE, which" +- " expects a tuple of %d vectors", actual, argno + 1, +- fndecl, num_vectors); +- return NUM_TYPE_SUFFIXES; +- } ++ if (type.num_vectors == num_vectors) ++ return type.type; + +- if (num_vectors == 1) +- error_at (location, "passing %qT to argument %d of %qE, which" +- " expects an SVE vector type", actual, argno + 1, fndecl); +- else +- error_at (location, "passing %qT to argument %d of %qE, which" +- " expects an SVE tuple type", actual, argno + 1, fndecl); ++ report_incorrect_num_vectors (argno, type, num_vectors); + return NUM_TYPE_SUFFIXES; + } + +diff --git a/gcc/config/aarch64/aarch64-sve-builtins.h b/gcc/config/aarch64/aarch64-sve-builtins.h +index f4f2c415f..5a4f35123 100644 +--- a/gcc/config/aarch64/aarch64-sve-builtins.h ++++ b/gcc/config/aarch64/aarch64-sve-builtins.h +@@ -451,6 +451,8 @@ public: + tree get_argument_type (unsigned int); + bool scalar_argument_p (unsigned int); + ++ void report_incorrect_num_vectors (unsigned int, sve_type, unsigned int); ++ + tree report_no_such_form (sve_type); + tree lookup_form (mode_suffix_index, + type_suffix_index = NUM_TYPE_SUFFIXES, +@@ -463,6 +465,7 @@ public: + + type_suffix_index infer_integer_scalar_type (unsigned int); + type_suffix_index infer_pointer_type (unsigned int, bool = false); ++ sve_type infer_sve_type (unsigned int); + type_suffix_index infer_vector_or_tuple_type (unsigned int, unsigned int); + type_suffix_index infer_vector_type (unsigned int); + type_suffix_index infer_integer_vector_type (unsigned int); +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/adr_index_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/adr_index_1.c +index 714265ed1..a17e99f5d 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/adr_index_1.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/adr_index_1.c +@@ -10,14 +10,14 @@ f1 (svbool_t pg, uint32_t *u32_ptr, svuint8_t u8, svuint16_t u16, + { + svadrh_index (u32); /* { dg-error {too few arguments to function 'svadrh_index'} } */ + svadrh_index (u32, u32, u32); /* { dg-error {too many arguments to function 'svadrh_index'} } */ +- svadrh_index (u32_ptr, s32); /* { dg-error {passing '^'*\*'^\n* to argument 1 of 'svadrh_index', which expects an SVE vector type} } */ +- svadrh_index (0, s32); /* { dg-error {passing 'int' to argument 1 of 'svadrh_index', which expects an SVE vector type} } */ ++ svadrh_index (u32_ptr, s32); /* { dg-error {passing '^'*\*'^\n* to argument 1 of 'svadrh_index', which expects an SVE type} } */ ++ svadrh_index (0, s32); /* { dg-error {passing 'int' to argument 1 of 'svadrh_index', which expects an SVE type rather than a scalar} } */ + svadrh_index (u16, u16); /* { dg-error {passing 'svuint16_t' to argument 1 of 'svadrh_index', which expects 'svuint32_t' or 'svuint64_t'} } */ + svadrh_index (s32, s32); /* { dg-error {passing 'svint32_t' to argument 1 of 'svadrh_index', which expects 'svuint32_t' or 'svuint64_t'} } */ + svadrh_index (f32, s32); /* { dg-error {passing 'svfloat32_t' to argument 1 of 'svadrh_index', which expects 'svuint32_t' or 'svuint64_t'} } */ + svadrh_index (pg, s32); /* { dg-error {passing 'svbool_t' to argument 1 of 'svadrh_index', which expects 'svuint32_t' or 'svuint64_t'} } */ + +- svadrh_index (u32, 0); /* { dg-error {passing 'int' to argument 2 of 'svadrh_index', which expects an SVE vector type} } */ ++ svadrh_index (u32, 0); /* { dg-error {passing 'int' to argument 2 of 'svadrh_index', which expects an SVE type rather than a scalar} } */ + svadrh_index (u32, u8); /* { dg-error {passing 'svuint8_t' to argument 2 of 'svadrh_index', which expects a vector of 32-bit or 64-bit integers} } */ + svadrh_index (u32, u16); /* { dg-error {passing 'svuint16_t' to argument 2 of 'svadrh_index', which expects a vector of 32-bit or 64-bit integers} } */ + svadrh_index (u32, pg); /* { dg-error {passing 'svbool_t' to argument 2 of 'svadrh_index', which expects a vector of integers} } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/adr_offset_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/adr_offset_1.c +index 528d7ac51..627ae8ac5 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/adr_offset_1.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/adr_offset_1.c +@@ -10,14 +10,14 @@ f1 (svbool_t pg, uint32_t *u32_ptr, svuint8_t u8, svuint16_t u16, + { + svadrb_offset (u32); /* { dg-error {too few arguments to function 'svadrb_offset'} } */ + svadrb_offset (u32, u32, u32); /* { dg-error {too many arguments to function 'svadrb_offset'} } */ +- svadrb_offset (u32_ptr, s32); /* { dg-error {passing '^'*\*'^\n* to argument 1 of 'svadrb_offset', which expects an SVE vector type} } */ +- svadrb_offset (0, s32); /* { dg-error {passing 'int' to argument 1 of 'svadrb_offset', which expects an SVE vector type} } */ ++ svadrb_offset (u32_ptr, s32); /* { dg-error {passing '^'*\*'^\n* to argument 1 of 'svadrb_offset', which expects an SVE type} } */ ++ svadrb_offset (0, s32); /* { dg-error {passing 'int' to argument 1 of 'svadrb_offset', which expects an SVE type rather than a scalar} } */ + svadrb_offset (u16, u16); /* { dg-error {passing 'svuint16_t' to argument 1 of 'svadrb_offset', which expects 'svuint32_t' or 'svuint64_t'} } */ + svadrb_offset (s32, s32); /* { dg-error {passing 'svint32_t' to argument 1 of 'svadrb_offset', which expects 'svuint32_t' or 'svuint64_t'} } */ + svadrb_offset (f32, s32); /* { dg-error {passing 'svfloat32_t' to argument 1 of 'svadrb_offset', which expects 'svuint32_t' or 'svuint64_t'} } */ + svadrb_offset (pg, s32); /* { dg-error {passing 'svbool_t' to argument 1 of 'svadrb_offset', which expects 'svuint32_t' or 'svuint64_t'} } */ + +- svadrb_offset (u32, 0); /* { dg-error {passing 'int' to argument 2 of 'svadrb_offset', which expects an SVE vector type} } */ ++ svadrb_offset (u32, 0); /* { dg-error {passing 'int' to argument 2 of 'svadrb_offset', which expects an SVE type rather than a scalar} } */ + svadrb_offset (u32, u8); /* { dg-error {passing 'svuint8_t' to argument 2 of 'svadrb_offset', which expects a vector of 32-bit or 64-bit integers} } */ + svadrb_offset (u32, u16); /* { dg-error {passing 'svuint16_t' to argument 2 of 'svadrb_offset', which expects a vector of 32-bit or 64-bit integers} } */ + svadrb_offset (u32, pg); /* { dg-error {passing 'svbool_t' to argument 2 of 'svadrb_offset', which expects a vector of integers} } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_1.c +index 8ce89fa10..4343146de 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_1.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_1.c +@@ -10,5 +10,5 @@ f1 (svbool_t pg, svuint8_t u8, svint16_t s16) + svzip1 (pg, u8); /* { dg-error {passing 'svuint8_t' to argument 2 of 'svzip1', but previous arguments had type 'svbool_t'} } */ + svzip1 (u8, pg); /* { dg-error {passing 'svbool_t' to argument 2 of 'svzip1', but previous arguments had type 'svuint8_t'} } */ + svzip1 (u8, s16); /* { dg-error {passing 'svint16_t' to argument 2 of 'svzip1', but previous arguments had type 'svuint8_t'} } */ +- svzip1 (u8, 0); /* { dg-error {passing 'int' to argument 2 of 'svzip1', which expects an SVE vector type} } */ ++ svzip1 (u8, 0); /* { dg-error {passing 'int' to argument 2 of 'svzip1', which expects an SVE type rather than a scalar} } */ + } +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_int_opt_n.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_int_opt_n.c +index 965e9a13c..9902379f6 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_int_opt_n.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_int_opt_n.c +@@ -11,7 +11,7 @@ f1 (svbool_t pg, svfloat16_t f16, svint16_t s16, svuint16_t u16, + svscale_x (s32, f16, s32); /* { dg-error {passing 'svint32_t' to argument 1 of 'svscale_x', which expects 'svbool_t'} } */ + svscale_x (1, f16, s32); /* { dg-error {passing 'int' to argument 1 of 'svscale_x', which expects 'svbool_t'} } */ + svscale_x (pg, pg, s16); /* { dg-error {'svscale_x' has no form that takes 'svbool_t' arguments} } */ +- svscale_x (pg, 1, s16); /* { dg-error {passing 'int' to argument 2 of 'svscale_x', which expects an SVE vector type} } */ ++ svscale_x (pg, 1, s16); /* { dg-error {passing 'int' to argument 2 of 'svscale_x', which expects an SVE type rather than a scalar} } */ + svscale_x (pg, f16, s16); + svscale_x (pg, f16, u16); /* { dg-error {passing 'svuint16_t' to argument 3 of 'svscale_x', which expects a vector of signed integers} } */ + svscale_x (pg, f16, f16); /* { dg-error {passing 'svfloat16_t' to argument 3 of 'svscale_x', which expects a vector of signed integers} } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_lane_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_lane_1.c +index 3913ff63d..10b6b7e81 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_lane_1.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_lane_1.c +@@ -10,8 +10,8 @@ f1 (svbool_t pg, svfloat16_t f16, svfloat32_t f32, svfloat64_t f64, + svmul_lane (f32, f32, 0, 0); /* { dg-error {too many arguments to function 'svmul_lane'} } */ + svmul_lane (pg, pg, 0); /* { dg-error {'svmul_lane' has no form that takes 'svbool_t' arguments} } */ + svmul_lane (s32, s32, 0); /* { dg-error {ACLE function 'svmul_lane_s32' requires ISA extension 'sve2'} "" { xfail aarch64_sve2 } } */ +- svmul_lane (1, f32, 0); /* { dg-error {passing 'int' to argument 1 of 'svmul_lane', which expects an SVE vector type} } */ +- svmul_lane (f32, 1, 0); /* { dg-error {passing 'int' to argument 2 of 'svmul_lane', which expects an SVE vector type} } */ ++ svmul_lane (1, f32, 0); /* { dg-error {passing 'int' to argument 1 of 'svmul_lane', which expects an SVE type rather than a scalar} } */ ++ svmul_lane (f32, 1, 0); /* { dg-error {passing 'int' to argument 2 of 'svmul_lane', which expects an SVE type rather than a scalar} } */ + svmul_lane (f32, f64, 0); /* { dg-error {passing 'svfloat64_t' to argument 2 of 'svmul_lane', but previous arguments had type 'svfloat32_t'} } */ + svmul_lane (f32, f32, s32); /* { dg-error {argument 3 of 'svmul_lane' must be an integer constant expression} } */ + svmul_lane (f32, f32, i); /* { dg-error {argument 3 of 'svmul_lane' must be an integer constant expression} } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_long_lane_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_long_lane_1.c +index bfe78088b..805863f76 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_long_lane_1.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_long_lane_1.c +@@ -19,8 +19,8 @@ f1 (svbool_t pg, svint8_t s8, svuint8_t u8, svint16_t s16, svuint16_t u16, + svmullb_lane (f16, f16, 0); /* { dg-error {'svmullb_lane' has no form that takes 'svfloat16_t' arguments} } */ + svmullb_lane (f32, f32, 0); /* { dg-error {'svmullb_lane' has no form that takes 'svfloat32_t' arguments} } */ + svmullb_lane (f64, f64, 0); /* { dg-error {'svmullb_lane' has no form that takes 'svfloat64_t' arguments} } */ +- svmullb_lane (1, u32, 0); /* { dg-error {passing 'int' to argument 1 of 'svmullb_lane', which expects an SVE vector type} } */ +- svmullb_lane (u32, 1, 0); /* { dg-error {passing 'int' to argument 2 of 'svmullb_lane', which expects an SVE vector type} } */ ++ svmullb_lane (1, u32, 0); /* { dg-error {passing 'int' to argument 1 of 'svmullb_lane', which expects an SVE type rather than a scalar} } */ ++ svmullb_lane (u32, 1, 0); /* { dg-error {passing 'int' to argument 2 of 'svmullb_lane', which expects an SVE type rather than a scalar} } */ + svmullb_lane (u32, s32, 0); /* { dg-error {passing 'svint32_t' to argument 2 of 'svmullb_lane', but previous arguments had type 'svuint32_t'} } */ + svmullb_lane (u32, u32, s32); /* { dg-error {argument 3 of 'svmullb_lane' must be an integer constant expression} } */ + svmullb_lane (u32, u32, i); /* { dg-error {argument 3 of 'svmullb_lane' must be an integer constant expression} } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_long_opt_n_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_long_opt_n_1.c +index 27893c6fb..ee704eeae 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_long_opt_n_1.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_long_opt_n_1.c +@@ -23,7 +23,7 @@ f1 (svbool_t pg, svint8_t s8, svuint8_t u8, + svaddlb (u64, u64); /* { dg-error {'svaddlb' has no form that takes 'svuint64_t' arguments} } */ + svaddlb (s64, s64); /* { dg-error {'svaddlb' has no form that takes 'svint64_t' arguments} } */ + svaddlb (f16, f16); /* { dg-error {'svaddlb' has no form that takes 'svfloat16_t' arguments} } */ +- svaddlb (1, u8); /* { dg-error {passing 'int' to argument 1 of 'svaddlb', which expects an SVE vector type} } */ ++ svaddlb (1, u8); /* { dg-error {passing 'int' to argument 1 of 'svaddlb', which expects an SVE type rather than a scalar} } */ + svaddlb (u8, s8); /* { dg-error {passing 'svint8_t' to argument 2 of 'svaddlb', but previous arguments had type 'svuint8_t'} } */ + svaddlb (u8, s16); /* { dg-error {passing 'svint16_t' to argument 2 of 'svaddlb', but previous arguments had type 'svuint8_t'} } */ + svaddlb (u8, u16); /* { dg-error {passing 'svuint16_t' to argument 2 of 'svaddlb', but previous arguments had type 'svuint8_t'} } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_n_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_n_1.c +index 0c69e66a1..ff4f0ff75 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_n_1.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_n_1.c +@@ -7,7 +7,7 @@ f1 (svbool_t pg, svuint8_t u8, svfloat16_t f16, int i, float f) + { + svinsr (u8); /* { dg-error {too few arguments to function 'svinsr'} } */ + svinsr (u8, 0, 0); /* { dg-error {too many arguments to function 'svinsr'} } */ +- svinsr (0, 0); /* { dg-error {passing 'int' to argument 1 of 'svinsr', which expects an SVE vector type} } */ ++ svinsr (0, 0); /* { dg-error {passing 'int' to argument 1 of 'svinsr', which expects an SVE type rather than a scalar} } */ + svinsr (u8, 0); + svinsr (u8, -1); + svinsr (u8, i); +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_narrowb_opt_n_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_narrowb_opt_n_1.c +index 920cbd1b0..8ca549ba9 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_narrowb_opt_n_1.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_narrowb_opt_n_1.c +@@ -23,7 +23,7 @@ f1 (svbool_t pg, svint8_t s8, svuint8_t u8, + svaddhnb (u64, u64); + svaddhnb (s64, s64); + svaddhnb (f32, f32); /* { dg-error {'svaddhnb' has no form that takes 'svfloat32_t' arguments} } */ +- svaddhnb (1, u16); /* { dg-error {passing 'int' to argument 1 of 'svaddhnb', which expects an SVE vector type} } */ ++ svaddhnb (1, u16); /* { dg-error {passing 'int' to argument 1 of 'svaddhnb', which expects an SVE type rather than a scalar} } */ + svaddhnb (u16, s8); /* { dg-error {passing 'svint8_t' to argument 2 of 'svaddhnb', but previous arguments had type 'svuint16_t'} } */ + svaddhnb (u16, s16); /* { dg-error {passing 'svint16_t' to argument 2 of 'svaddhnb', but previous arguments had type 'svuint16_t'} } */ + svaddhnb (u16, u32); /* { dg-error {passing 'svuint32_t' to argument 2 of 'svaddhnb', but previous arguments had type 'svuint16_t'} } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_narrowt_opt_n_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_narrowt_opt_n_1.c +index eb70d058e..2b537965b 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_narrowt_opt_n_1.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_narrowt_opt_n_1.c +@@ -26,8 +26,8 @@ f1 (svbool_t pg, svint8_t s8, svuint8_t u8, + svaddhnt (u32, u64, u64); + svaddhnt (s32, s64, s64); + svaddhnt (f16, f32, f32); /* { dg-error {'svaddhnt' has no form that takes 'svfloat32_t' arguments} } */ +- svaddhnt (1, u16, u16); /* { dg-error {passing 'int' to argument 1 of 'svaddhnt', which expects an SVE vector type} } */ +- svaddhnt (u8, 1, u16); /* { dg-error {passing 'int' to argument 2 of 'svaddhnt', which expects an SVE vector type} } */ ++ svaddhnt (1, u16, u16); /* { dg-error {passing 'int' to argument 1 of 'svaddhnt', which expects an SVE type rather than a scalar} } */ ++ svaddhnt (u8, 1, u16); /* { dg-error {passing 'int' to argument 2 of 'svaddhnt', which expects an SVE type rather than a scalar} } */ + svaddhnt (u8, u16, s8); /* { dg-error {passing 'svint8_t' to argument 3 of 'svaddhnt', but previous arguments had type 'svuint16_t'} } */ + svaddhnt (u8, u16, s16); /* { dg-error {passing 'svint16_t' to argument 3 of 'svaddhnt', but previous arguments had type 'svuint16_t'} } */ + svaddhnt (u8, u16, u32); /* { dg-error {passing 'svuint32_t' to argument 3 of 'svaddhnt', but previous arguments had type 'svuint16_t'} } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_opt_n_2.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_opt_n_2.c +index 9fa83ca99..a151f90d1 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_opt_n_2.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_opt_n_2.c +@@ -10,7 +10,7 @@ f1 (svbool_t pg, svint8_t s8, svuint8_t u8, + svadd_x (pg, u8, u8, u8); /* { dg-error {too many arguments to function 'svadd_x'} } */ + svadd_x (u8, u8, u8); /* { dg-error {passing 'svuint8_t' to argument 1 of 'svadd_x', which expects 'svbool_t'} } */ + svadd_x (pg, pg, pg); /* { dg-error {'svadd_x' has no form that takes 'svbool_t' arguments} } */ +- svadd_x (pg, 1, u8); /* { dg-error {passing 'int' to argument 2 of 'svadd_x', which expects an SVE vector type} } */ ++ svadd_x (pg, 1, u8); /* { dg-error {passing 'int' to argument 2 of 'svadd_x', which expects an SVE type rather than a scalar} } */ + svadd_x (pg, u8, s8); /* { dg-error {passing 'svint8_t' to argument 3 of 'svadd_x', but previous arguments had type 'svuint8_t'} } */ + svadd_x (pg, u8, u8); + svadd_x (pg, u8, s16); /* { dg-error {passing 'svint16_t' to argument 3 of 'svadd_x', but previous arguments had type 'svuint8_t'} } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_opt_n_3.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_opt_n_3.c +index 4d0b253e3..70ec9c585 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_opt_n_3.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_opt_n_3.c +@@ -10,7 +10,7 @@ f1 (svbool_t pg, svint8_t s8, svuint8_t u8, + svand_z (pg, u8, u8, u8); /* { dg-error {too many arguments to function 'svand_z'} } */ + svand_z (u8, u8, u8); /* { dg-error {passing 'svuint8_t' to argument 1 of 'svand_z', which expects 'svbool_t'} } */ + svand_z (pg, pg, pg); +- svand_z (pg, 1, u8); /* { dg-error {passing 'int' to argument 2 of 'svand_z', which expects an SVE vector type} } */ ++ svand_z (pg, 1, u8); /* { dg-error {passing 'int' to argument 2 of 'svand_z', which expects an SVE type rather than a scalar} } */ + svand_z (pg, u8, s8); /* { dg-error {passing 'svint8_t' to argument 3 of 'svand_z', but previous arguments had type 'svuint8_t'} } */ + svand_z (pg, u8, u8); + svand_z (pg, u8, s16); /* { dg-error {passing 'svint16_t' to argument 3 of 'svand_z', but previous arguments had type 'svuint8_t'} } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_rotate_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_rotate_1.c +index 8ffe91bce..7669e4a02 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_rotate_1.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_rotate_1.c +@@ -10,8 +10,8 @@ f1 (svbool_t pg, svfloat32_t f32, svfloat64_t f64, svint32_t s32, int i) + svcadd_x (f32, f32, f32, 90); /* { dg-error {passing 'svfloat32_t' to argument 1 of 'svcadd_x', which expects 'svbool_t'} } */ + svcadd_x (pg, pg, pg, 90); /* { dg-error {'svcadd_x' has no form that takes 'svbool_t' arguments} } */ + svcadd_x (pg, s32, s32, 90); /* { dg-error {'svcadd_x' has no form that takes 'svint32_t' arguments} } */ +- svcadd_x (pg, 1, f32, 90); /* { dg-error {passing 'int' to argument 2 of 'svcadd_x', which expects an SVE vector type} } */ +- svcadd_x (pg, f32, 1, 90); /* { dg-error {passing 'int' to argument 3 of 'svcadd_x', which expects an SVE vector type} } */ ++ svcadd_x (pg, 1, f32, 90); /* { dg-error {passing 'int' to argument 2 of 'svcadd_x', which expects an SVE type rather than a scalar} } */ ++ svcadd_x (pg, f32, 1, 90); /* { dg-error {passing 'int' to argument 3 of 'svcadd_x', which expects an SVE type rather than a scalar} } */ + svcadd_x (pg, f32, f64, 90); /* { dg-error {passing 'svfloat64_t' to argument 3 of 'svcadd_x', but previous arguments had type 'svfloat32_t'} } */ + svcadd_x (pg, f32, f32, s32); /* { dg-error {argument 4 of 'svcadd_x' must be an integer constant expression} } */ + svcadd_x (pg, f32, f32, i); /* { dg-error {argument 4 of 'svcadd_x' must be an integer constant expression} } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_to_uint_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_to_uint_1.c +index 213defc66..154662487 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_to_uint_1.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_to_uint_1.c +@@ -11,9 +11,9 @@ f1 (svbool_t pg, svint32_t s32, svuint32_t u32) + svhistcnt_z (pg, s32, s32, 0); /* { dg-error {too many arguments to function 'svhistcnt_z'} } */ + svhistcnt_z (0, s32, s32); /* { dg-error {passing 'int' to argument 1 of 'svhistcnt_z', which expects 'svbool_t'} } */ + svhistcnt_z (s32, s32, s32); /* { dg-error {passing 'svint32_t' to argument 1 of 'svhistcnt_z', which expects 'svbool_t'} } */ +- svhistcnt_z (pg, 0, s32); /* { dg-error {passing 'int' to argument 2 of 'svhistcnt_z', which expects an SVE vector type} } */ ++ svhistcnt_z (pg, 0, s32); /* { dg-error {passing 'int' to argument 2 of 'svhistcnt_z', which expects an SVE type rather than a scalar} } */ + svhistcnt_z (pg, pg, s32); /* { dg-error {passing 'svint32_t' to argument 3 of 'svhistcnt_z', but previous arguments had type 'svbool_t'} } */ + svhistcnt_z (pg, s32, u32); /* { dg-error {passing 'svuint32_t' to argument 3 of 'svhistcnt_z', but previous arguments had type 'svint32_t'} } */ +- svhistcnt_z (pg, s32, 0); /* { dg-error {passing 'int' to argument 3 of 'svhistcnt_z', which expects an SVE vector type} } */ ++ svhistcnt_z (pg, s32, 0); /* { dg-error {passing 'int' to argument 3 of 'svhistcnt_z', which expects an SVE type rather than a scalar} } */ + svhistcnt_z (pg, pg, pg); /* { dg-error {'svhistcnt_z' has no form that takes 'svbool_t' arguments} } */ + } +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_uint64_n_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_uint64_n_1.c +index c8ca5f746..207552a3b 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_uint64_n_1.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_uint64_n_1.c +@@ -7,7 +7,7 @@ f1 (svbool_t pg, svuint8_t u8, int i, float f) + { + svdupq_lane (u8); /* { dg-error {too few arguments to function 'svdupq_lane'} } */ + svdupq_lane (u8, 0, 0); /* { dg-error {too many arguments to function 'svdupq_lane'} } */ +- svdupq_lane (0, 0); /* { dg-error {passing 'int' to argument 1 of 'svdupq_lane', which expects an SVE vector type} } */ ++ svdupq_lane (0, 0); /* { dg-error {passing 'int' to argument 1 of 'svdupq_lane', which expects an SVE type rather than a scalar} } */ + svdupq_lane (u8, 0); + svdupq_lane (u8, -1); + svdupq_lane (u8, i); +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_uint64_opt_n_2.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_uint64_opt_n_2.c +index be217394f..c661a66f3 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_uint64_opt_n_2.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_uint64_opt_n_2.c +@@ -8,7 +8,7 @@ f1 (svbool_t pg, svuint8_t u8, svuint64_t u64) + svlsl_wide_x (pg, u8); /* { dg-error {too few arguments to function 'svlsl_wide_x'} } */ + svlsl_wide_x (pg, u8, u8, u8); /* { dg-error {too many arguments to function 'svlsl_wide_x'} } */ + svlsl_wide_x (u8, u8, u64); /* { dg-error {passing 'svuint8_t' to argument 1 of 'svlsl_wide_x', which expects 'svbool_t'} } */ +- svlsl_wide_x (pg, 1, u64); /* { dg-error {passing 'int' to argument 2 of 'svlsl_wide_x', which expects an SVE vector type} } */ ++ svlsl_wide_x (pg, 1, u64); /* { dg-error {passing 'int' to argument 2 of 'svlsl_wide_x', which expects an SVE type rather than a scalar} } */ + svlsl_wide_x (pg, u8, u8); /* { dg-error {passing 'svuint8_t' to argument 3 of 'svlsl_wide_x', which expects 'svuint64_t'} } */ + svlsl_wide_x (pg, u64, u64); /* { dg-error {'svlsl_wide_x' has no form that takes 'svuint64_t' arguments} } */ + } +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_uint_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_uint_1.c +index 8f86c50b6..8493d5d68 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_uint_1.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_uint_1.c +@@ -11,7 +11,7 @@ f1 (svbool_t pg, svuint8_t u8, svint8_t s8, svuint16_t u16, svint16_t s16, + svtbl (pg, pg); /* { dg-error {passing 'svbool_t' to argument 2 of 'svtbl', which expects a vector of unsigned integers} } */ + svtbl (pg, u8); /* { dg-error {'svtbl' has no form that takes 'svbool_t' arguments} } */ + +- svtbl (u8, 0); /* { dg-error {passing 'int' to argument 2 of 'svtbl', which expects an SVE vector type} } */ ++ svtbl (u8, 0); /* { dg-error {passing 'int' to argument 2 of 'svtbl', which expects an SVE type rather than a scalar} } */ + svtbl (u8, u8); + svtbl (u8, s8); /* { dg-error {passing 'svint8_t' to argument 2 of 'svtbl', which expects a vector of unsigned integers} } */ + svtbl (u8, u16); /* { dg-error {arguments 1 and 2 of 'svtbl' must have the same element size, but the values passed here have type 'svuint8_t' and 'svuint16_t' respectively} } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_uint_n_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_uint_n_1.c +index 36a902e69..d74cb46f7 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_uint_n_1.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_uint_n_1.c +@@ -7,7 +7,7 @@ f1 (svbool_t pg, svuint8_t u8, int i, float f) + { + svdup_lane (u8); /* { dg-error {too few arguments to function 'svdup_lane'} } */ + svdup_lane (u8, 0, 0); /* { dg-error {too many arguments to function 'svdup_lane'} } */ +- svdup_lane (0, 0); /* { dg-error {passing 'int' to argument 1 of 'svdup_lane', which expects an SVE vector type} } */ ++ svdup_lane (0, 0); /* { dg-error {passing 'int' to argument 1 of 'svdup_lane', which expects an SVE type rather than a scalar} } */ + svdup_lane (u8, 0); + svdup_lane (u8, -1); + svdup_lane (u8, i); +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_uint_opt_n_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_uint_opt_n_1.c +index b162ab405..f44d7a9fa 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_uint_opt_n_1.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_uint_opt_n_1.c +@@ -11,7 +11,7 @@ f1 (svbool_t pg, svfloat16_t f16, svint16_t s16, svuint16_t u16, + svlsl_x (s32, s32, u32); /* { dg-error {passing 'svint32_t' to argument 1 of 'svlsl_x', which expects 'svbool_t'} } */ + svlsl_x (1, s32, u32); /* { dg-error {passing 'int' to argument 1 of 'svlsl_x', which expects 'svbool_t'} } */ + svlsl_x (pg, pg, u16); /* { dg-error {'svlsl_x' has no form that takes 'svbool_t' arguments} } */ +- svlsl_x (pg, 1, s16); /* { dg-error {passing 'int' to argument 2 of 'svlsl_x', which expects an SVE vector type} } */ ++ svlsl_x (pg, 1, s16); /* { dg-error {passing 'int' to argument 2 of 'svlsl_x', which expects an SVE type rather than a scalar} } */ + svlsl_x (pg, s16, s16); /* { dg-error {passing 'svint16_t' to argument 3 of 'svlsl_x', which expects a vector of unsigned integers} } */ + svlsl_x (pg, s16, u16); + svlsl_x (pg, s16, f16); /* { dg-error {passing 'svfloat16_t' to argument 3 of 'svlsl_x', which expects a vector of unsigned integers} } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_wide_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_wide_1.c +index f58ab75d7..ba38361ab 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_wide_1.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_wide_1.c +@@ -30,8 +30,8 @@ f1 (svbool_t pg, svint8_t s8, svuint8_t u8, + svadalp_m (pg, s16, s8); + svadalp_m (pg, f32, f16); /* { dg-error {'svadalp_m' has no form that takes 'svfloat32_t' arguments} } */ + svadalp_m (pg, f16, f32); /* { dg-error {'svadalp_m' has no form that takes 'svfloat16_t' arguments} } */ +- svadalp_m (pg, 0, u32); /* { dg-error {passing 'int' to argument 2 of 'svadalp_m', which expects an SVE vector type} } */ +- svadalp_m (pg, 0, u64); /* { dg-error {passing 'int' to argument 2 of 'svadalp_m', which expects an SVE vector type} } */ +- svadalp_m (pg, u8, 0); /* { dg-error {passing 'int' to argument 3 of 'svadalp_m', which expects an SVE vector type} } */ +- svadalp_m (pg, u16, 0); /* { dg-error {passing 'int' to argument 3 of 'svadalp_m', which expects an SVE vector type} } */ ++ svadalp_m (pg, 0, u32); /* { dg-error {passing 'int' to argument 2 of 'svadalp_m', which expects an SVE type rather than a scalar} } */ ++ svadalp_m (pg, 0, u64); /* { dg-error {passing 'int' to argument 2 of 'svadalp_m', which expects an SVE type rather than a scalar} } */ ++ svadalp_m (pg, u8, 0); /* { dg-error {passing 'int' to argument 3 of 'svadalp_m', which expects an SVE type rather than a scalar} } */ ++ svadalp_m (pg, u16, 0); /* { dg-error {passing 'int' to argument 3 of 'svadalp_m', which expects an SVE type rather than a scalar} } */ + } +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_wide_opt_n_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_wide_opt_n_1.c +index 5a58211a0..fd27d8559 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_wide_opt_n_1.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_wide_opt_n_1.c +@@ -27,8 +27,8 @@ f1 (svbool_t pg, svint8_t s8, svuint8_t u8, + svaddwb (s16, s8); + svaddwb (f32, f16); /* { dg-error {'svaddwb' has no form that takes 'svfloat32_t' arguments} } */ + svaddwb (f16, f32); /* { dg-error {'svaddwb' has no form that takes 'svfloat16_t' arguments} } */ +- svaddwb (0, u32); /* { dg-error {passing 'int' to argument 1 of 'svaddwb', which expects an SVE vector type} } */ +- svaddwb (0, u64); /* { dg-error {passing 'int' to argument 1 of 'svaddwb', which expects an SVE vector type} } */ ++ svaddwb (0, u32); /* { dg-error {passing 'int' to argument 1 of 'svaddwb', which expects an SVE type rather than a scalar} } */ ++ svaddwb (0, u64); /* { dg-error {passing 'int' to argument 1 of 'svaddwb', which expects an SVE type rather than a scalar} } */ + svaddwb (u8, 0); /* { dg-error {'svaddwb' has no form that takes 'svuint8_t' arguments} } */ + svaddwb (u16, 0); + svaddwb (u32, 0); +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/clast_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/clast_1.c +index cb9ac946c..ba1b2520f 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/clast_1.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/clast_1.c +@@ -6,10 +6,10 @@ test (svbool_t pg, svint32_t s32, svint64_t s64, int i) + svclasta (pg, 1); /* { dg-error {too few arguments to function 'svclasta'} } */ + svclasta (pg, 1, s32, 1); /* { dg-error {too many arguments to function 'svclasta'} } */ + svclasta (1, 1, s32); /* { dg-error {passing 'int' to argument 1 of 'svclasta', which expects 'svbool_t'} } */ +- svclasta (pg, 1, 1); /* { dg-error {passing 'int' to argument 3 of 'svclasta', which expects an SVE vector type} } */ ++ svclasta (pg, 1, 1); /* { dg-error {passing 'int' to argument 3 of 'svclasta', which expects an SVE type rather than a scalar} } */ + svclasta (pg, 1, pg); /* { dg-error {'svclasta' has no form that takes 'svbool_t' arguments} } */ + svclasta (pg, i, s32); +- svclasta (pg, s32, 1); /* { dg-error {passing 'int' to argument 3 of 'svclasta', which expects an SVE vector type} } */ ++ svclasta (pg, s32, 1); /* { dg-error {passing 'int' to argument 3 of 'svclasta', which expects an SVE type rather than a scalar} } */ + svclasta (pg, s32, s64); /* { dg-error {passing 'svint64_t' to argument 3 of 'svclasta', but previous arguments had type 'svint32_t'} } */ + svclasta (pg, pg, pg); /* { dg-error {'svclasta' has no form that takes 'svbool_t' arguments} } */ + } +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/compare_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/compare_1.c +index 12511a85b..5474124cc 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/compare_1.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/compare_1.c +@@ -12,14 +12,14 @@ f1 (svbool_t pg, svint8_t s8, svuint8_t u8, + svmatch (pg, u8, u8, u8); /* { dg-error {too many arguments to function 'svmatch'} } */ + svmatch (u8, u8, u8); /* { dg-error {passing 'svuint8_t' to argument 1 of 'svmatch', which expects 'svbool_t'} } */ + svmatch (pg, pg, pg); /* { dg-error {'svmatch' has no form that takes 'svbool_t' arguments} } */ +- svmatch (pg, 1, u8); /* { dg-error {passing 'int' to argument 2 of 'svmatch', which expects an SVE vector type} } */ ++ svmatch (pg, 1, u8); /* { dg-error {passing 'int' to argument 2 of 'svmatch', which expects an SVE type rather than a scalar} } */ + svmatch (pg, u8, s8); /* { dg-error {passing 'svint8_t' to argument 3 of 'svmatch', but previous arguments had type 'svuint8_t'} } */ + svmatch (pg, u8, u8); + svmatch (pg, u8, s16); /* { dg-error {passing 'svint16_t' to argument 3 of 'svmatch', but previous arguments had type 'svuint8_t'} } */ + svmatch (pg, u8, u16); /* { dg-error {passing 'svuint16_t' to argument 3 of 'svmatch', but previous arguments had type 'svuint8_t'} } */ + svmatch (pg, u8, f16); /* { dg-error {passing 'svfloat16_t' to argument 3 of 'svmatch', but previous arguments had type 'svuint8_t'} } */ + svmatch (pg, u8, pg); /* { dg-error {passing 'svbool_t' to argument 3 of 'svmatch', but previous arguments had type 'svuint8_t'} } */ +- svmatch (pg, u8, 0); /* { dg-error {passing 'int' to argument 3 of 'svmatch', which expects an SVE vector type} } */ ++ svmatch (pg, u8, 0); /* { dg-error {passing 'int' to argument 3 of 'svmatch', which expects an SVE type rather than a scalar} } */ + + svmatch (pg, f16, s16); /* { dg-error {passing 'svint16_t' to argument 3 of 'svmatch', but previous arguments had type 'svfloat16_t'} } */ + svmatch (pg, f16, u16); /* { dg-error {passing 'svuint16_t' to argument 3 of 'svmatch', but previous arguments had type 'svfloat16_t'} } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/compare_opt_n_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/compare_opt_n_1.c +index 71c8e86d5..6faa73972 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/compare_opt_n_1.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/compare_opt_n_1.c +@@ -10,7 +10,7 @@ f1 (svbool_t pg, svint8_t s8, svuint8_t u8, + svcmpeq (pg, u8, u8, u8); /* { dg-error {too many arguments to function 'svcmpeq'} } */ + svcmpeq (u8, u8, u8); /* { dg-error {passing 'svuint8_t' to argument 1 of 'svcmpeq', which expects 'svbool_t'} } */ + svcmpeq (pg, pg, pg); /* { dg-error {'svcmpeq' has no form that takes 'svbool_t' arguments} } */ +- svcmpeq (pg, 1, u8); /* { dg-error {passing 'int' to argument 2 of 'svcmpeq', which expects an SVE vector type} } */ ++ svcmpeq (pg, 1, u8); /* { dg-error {passing 'int' to argument 2 of 'svcmpeq', which expects an SVE type rather than a scalar} } */ + svcmpeq (pg, u8, s8); /* { dg-error {passing 'svint8_t' to argument 3 of 'svcmpeq', but previous arguments had type 'svuint8_t'} } */ + svcmpeq (pg, u8, u8); + svcmpeq (pg, u8, s16); /* { dg-error {passing 'svint16_t' to argument 3 of 'svcmpeq', but previous arguments had type 'svuint8_t'} } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/compare_wide_opt_n_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/compare_wide_opt_n_1.c +index fc5e45663..655f03360 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/compare_wide_opt_n_1.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/compare_wide_opt_n_1.c +@@ -9,7 +9,7 @@ f1 (svbool_t pg, svuint8_t u8, svint8_t s8, svint64_t s64, svuint64_t u64, + svcmpeq_wide (pg, s8); /* { dg-error {too few arguments to function 'svcmpeq_wide'} } */ + svcmpeq_wide (pg, s8, s64, s8); /* { dg-error {too many arguments to function 'svcmpeq_wide'} } */ + svcmpeq_wide (s8, s8, s64); /* { dg-error {passing 'svint8_t' to argument 1 of 'svcmpeq_wide', which expects 'svbool_t'} } */ +- svcmpeq_wide (pg, 0, s64); /* { dg-error {passing 'int' to argument 2 of 'svcmpeq_wide', which expects an SVE vector type} } */ ++ svcmpeq_wide (pg, 0, s64); /* { dg-error {passing 'int' to argument 2 of 'svcmpeq_wide', which expects an SVE type rather than a scalar} } */ + svcmpeq_wide (pg, s8, 0); + svcmpeq_wide (pg, s8, x); + svcmpeq_wide (pg, s8, s8); /* { dg-error {passing 'svint8_t' to argument 3 of 'svcmpeq_wide', which expects a vector of 64-bit elements} } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/count_vector_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/count_vector_1.c +index daf9e0d5b..b57d9de1d 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/count_vector_1.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/count_vector_1.c +@@ -7,7 +7,7 @@ f1 (svbool_t pg, svuint32_t u32, svuint32x2_t u32x2) + { + svlen (); /* { dg-error {too few arguments to function 'svlen'} } */ + svlen (u32, u32); /* { dg-error {too many arguments to function 'svlen'} } */ +- svlen (0); /* { dg-error {passing 'int' to argument 1 of 'svlen', which expects an SVE vector type} } */ ++ svlen (0); /* { dg-error {passing 'int' to argument 1 of 'svlen', which expects an SVE type rather than a scalar} } */ + svlen (pg); /* { dg-error {'svlen' has no form that takes 'svbool_t' arguments} } */ + svlen (u32x2); /* { dg-error {passing 'svuint32x2_t' to argument 1 of 'svlen', which expects a single SVE vector rather than a tuple} } */ + } +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/create_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/create_1.c +index 31321a046..83e4a5600 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/create_1.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/create_1.c +@@ -12,8 +12,8 @@ f1 (svuint8x2_t *ptr, svbool_t pg, svuint8_t u8, svfloat64_t f64, + *ptr = svcreate2 (u8x2, u8x2); /* { dg-error {passing 'svuint8x2_t' to argument 1 of 'svcreate2', which expects a single SVE vector rather than a tuple} } */ + *ptr = svcreate2 (u8, f64); /* { dg-error {passing 'svfloat64_t' to argument 2 of 'svcreate2', but previous arguments had type 'svuint8_t'} } */ + *ptr = svcreate2 (u8, pg); /* { dg-error {passing 'svbool_t' to argument 2 of 'svcreate2', but previous arguments had type 'svuint8_t'} } */ +- *ptr = svcreate2 (u8, x); /* { dg-error {passing 'int' to argument 2 of 'svcreate2', which expects an SVE vector type} } */ +- *ptr = svcreate2 (x, u8); /* { dg-error {passing 'int' to argument 1 of 'svcreate2', which expects an SVE vector type} } */ ++ *ptr = svcreate2 (u8, x); /* { dg-error {passing 'int' to argument 2 of 'svcreate2', which expects an SVE type rather than a scalar} } */ ++ *ptr = svcreate2 (x, u8); /* { dg-error {passing 'int' to argument 1 of 'svcreate2', which expects an SVE type rather than a scalar} } */ + *ptr = svcreate2 (pg, u8); /* { dg-error {passing 'svuint8_t' to argument 2 of 'svcreate2', but previous arguments had type 'svbool_t'} } */ + *ptr = svcreate2 (pg, pg); /* { dg-error {'svcreate2' has no form that takes 'svbool_t' arguments} } */ + *ptr = svcreate2 (u8, u8); +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/create_3.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/create_3.c +index a88e56b31..e3302f7e7 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/create_3.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/create_3.c +@@ -13,8 +13,8 @@ f1 (svfloat16x3_t *ptr, svbool_t pg, svfloat16_t f16, svfloat64_t f64, + *ptr = svcreate3 (f16x3, f16x3, f16x3); /* { dg-error {passing 'svfloat16x3_t' to argument 1 of 'svcreate3', which expects a single SVE vector rather than a tuple} } */ + *ptr = svcreate3 (f16, f16, f64); /* { dg-error {passing 'svfloat64_t' to argument 3 of 'svcreate3', but previous arguments had type 'svfloat16_t'} } */ + *ptr = svcreate3 (f16, pg, f16); /* { dg-error {passing 'svbool_t' to argument 2 of 'svcreate3', but previous arguments had type 'svfloat16_t'} } */ +- *ptr = svcreate3 (f16, x, f16); /* { dg-error {passing 'int' to argument 2 of 'svcreate3', which expects an SVE vector type} } */ +- *ptr = svcreate3 (x, f16, f16); /* { dg-error {passing 'int' to argument 1 of 'svcreate3', which expects an SVE vector type} } */ ++ *ptr = svcreate3 (f16, x, f16); /* { dg-error {passing 'int' to argument 2 of 'svcreate3', which expects an SVE type rather than a scalar} } */ ++ *ptr = svcreate3 (x, f16, f16); /* { dg-error {passing 'int' to argument 1 of 'svcreate3', which expects an SVE type rather than a scalar} } */ + *ptr = svcreate3 (pg, f16, f16); /* { dg-error {passing 'svfloat16_t' to argument 2 of 'svcreate3', but previous arguments had type 'svbool_t'} } */ + *ptr = svcreate3 (pg, pg, pg); /* { dg-error {'svcreate3' has no form that takes 'svbool_t' arguments} } */ + *ptr = svcreate3 (f16, f16, f16); +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/create_5.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/create_5.c +index fed124506..c850c94f0 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/create_5.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/create_5.c +@@ -14,8 +14,8 @@ f1 (svint32x4_t *ptr, svbool_t pg, svint32_t s32, svfloat64_t f64, + *ptr = svcreate4 (s32x4, s32x4, s32x4, s32x4); /* { dg-error {passing 'svint32x4_t' to argument 1 of 'svcreate4', which expects a single SVE vector rather than a tuple} } */ + *ptr = svcreate4 (s32, s32, s32, f64); /* { dg-error {passing 'svfloat64_t' to argument 4 of 'svcreate4', but previous arguments had type 'svint32_t'} } */ + *ptr = svcreate4 (s32, s32, pg, s32); /* { dg-error {passing 'svbool_t' to argument 3 of 'svcreate4', but previous arguments had type 'svint32_t'} } */ +- *ptr = svcreate4 (s32, x, s32, s32); /* { dg-error {passing 'int' to argument 2 of 'svcreate4', which expects an SVE vector type} } */ +- *ptr = svcreate4 (x, s32, s32, s32); /* { dg-error {passing 'int' to argument 1 of 'svcreate4', which expects an SVE vector type} } */ ++ *ptr = svcreate4 (s32, x, s32, s32); /* { dg-error {passing 'int' to argument 2 of 'svcreate4', which expects an SVE type rather than a scalar} } */ ++ *ptr = svcreate4 (x, s32, s32, s32); /* { dg-error {passing 'int' to argument 1 of 'svcreate4', which expects an SVE type rather than a scalar} } */ + *ptr = svcreate4 (pg, s32, s32, s32); /* { dg-error {passing 'svint32_t' to argument 2 of 'svcreate4', but previous arguments had type 'svbool_t'} } */ + *ptr = svcreate4 (pg, pg, pg, pg); /* { dg-error {'svcreate4' has no form that takes 'svbool_t' arguments} } */ + *ptr = svcreate4 (s32, s32, s32, s32); +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/fold_left_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/fold_left_1.c +index 1d292786d..181d1b01b 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/fold_left_1.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/fold_left_1.c +@@ -15,7 +15,7 @@ f1 (svbool_t pg, int i, float f, double d, void *ptr, svfloat32_t f32, + svadda (pg, ptr, f32); /* { dg-error {incompatible type for argument 2 of 'svadda_f32'} } */ + svadda (pg, pg, f32); /* { dg-error {passing 'svbool_t' to argument 2 of 'svadda', which expects a scalar element} } */ + svadda (pg, f32, f32); /* { dg-error {passing 'svfloat32_t' to argument 2 of 'svadda', which expects a scalar element} } */ +- svadda (pg, f, f); /* { dg-error {passing 'float' to argument 3 of 'svadda', which expects an SVE vector type} } */ ++ svadda (pg, f, f); /* { dg-error {passing 'float' to argument 3 of 'svadda', which expects an SVE type rather than a scalar} } */ + svadda (pg, i, i32); /* { dg-error {'svadda' has no form that takes 'svint32_t' arguments} } */ +- svadda (pg, i, i); /* { dg-error {passing 'int' to argument 3 of 'svadda', which expects an SVE vector type} } */ ++ svadda (pg, i, i); /* { dg-error {passing 'int' to argument 3 of 'svadda', which expects an SVE type rather than a scalar} } */ + } +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/inc_dec_pred_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/inc_dec_pred_1.c +index a61afcd2d..4de082d01 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/inc_dec_pred_1.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/inc_dec_pred_1.c +@@ -7,7 +7,7 @@ test (svbool_t pg, svint8_t s8, svuint8_t u8, + { + svqincp (s32); /* { dg-error {too few arguments to function 'svqincp'} } */ + svqincp (s32, pg, pg); /* { dg-error {too many arguments to function 'svqincp'} } */ +- svqincp (i, pg); /* { dg-error {passing 'int' to argument 1 of 'svqincp', which expects an SVE vector type} } */ ++ svqincp (i, pg); /* { dg-error {passing 'int' to argument 1 of 'svqincp', which expects an SVE type rather than a scalar} } */ + svqincp (pg, pg); /* { dg-error {'svqincp' has no form that takes 'svbool_t' arguments} } */ + svqincp (s8, pg); /* { dg-error {'svqincp' has no form that takes 'svint8_t' arguments} } */ + svqincp (u8, pg); /* { dg-error {'svqincp' has no form that takes 'svuint8_t' arguments} } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/mmla_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/mmla_1.c +index 5b0b00e96..7fc7bb67b 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/mmla_1.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/mmla_1.c +@@ -23,22 +23,22 @@ f2 (svbool_t pg, svint8_t s8, svuint8_t u8, svuint32_t u32, svint32_t s32, + { + svmmla (s32, s8); /* { dg-error {too few arguments to function 'svmmla'} } */ + svmmla (s32, s8, s8, s8); /* { dg-error {too many arguments to function 'svmmla'} } */ +- svmmla (0, s8, s8); /* { dg-error {passing 'int' to argument 1 of 'svmmla', which expects an SVE vector type} } */ ++ svmmla (0, s8, s8); /* { dg-error {passing 'int' to argument 1 of 'svmmla', which expects an SVE type rather than a scalar} } */ + svmmla (pg, s8, s8); /* { dg-error {'svmmla' has no form that takes 'svbool_t' arguments} } */ + svmmla (u8, s8, s8); /* { dg-error {'svmmla' has no form that takes 'svuint8_t' arguments} } */ + +- svmmla (s32, 0, s8); /* { dg-error {passing 'int' to argument 2 of 'svmmla', which expects an SVE vector type} } */ ++ svmmla (s32, 0, s8); /* { dg-error {passing 'int' to argument 2 of 'svmmla', which expects an SVE type rather than a scalar} } */ + svmmla (s32, u8, s8); /* { dg-error {arguments 1 and 2 of 'svmmla' must have the same signedness, but the values passed here have type 'svint32_t' and 'svuint8_t' respectively} } */ + svmmla (s32, s8, u8); /* { dg-error {arguments 1 and 3 of 'svmmla' must have the same signedness, but the values passed here have type 'svint32_t' and 'svuint8_t' respectively} } */ +- svmmla (s32, s8, 0); /* { dg-error {passing 'int' to argument 3 of 'svmmla', which expects an SVE vector type} } */ ++ svmmla (s32, s8, 0); /* { dg-error {passing 'int' to argument 3 of 'svmmla', which expects an SVE type rather than a scalar} } */ + svmmla (s32, s8, s8); + svmmla (s32, s32, s32); /* { dg-error {passing 'svint32_t' instead of the expected 'svint8_t' to argument 2 of 'svmmla', after passing 'svint32_t' to argument 1} } */ + svmmla (s32, u32, u32); /* { dg-error {passing 'svuint32_t' instead of the expected 'svint8_t' to argument 2 of 'svmmla', after passing 'svint32_t' to argument 1} } */ + +- svmmla (u32, 0, u8); /* { dg-error {passing 'int' to argument 2 of 'svmmla', which expects an SVE vector type} } */ ++ svmmla (u32, 0, u8); /* { dg-error {passing 'int' to argument 2 of 'svmmla', which expects an SVE type rather than a scalar} } */ + svmmla (u32, s8, u8); /* { dg-error {arguments 1 and 2 of 'svmmla' must have the same signedness, but the values passed here have type 'svuint32_t' and 'svint8_t' respectively} } */ + svmmla (u32, u8, s8); /* { dg-error {arguments 1 and 3 of 'svmmla' must have the same signedness, but the values passed here have type 'svuint32_t' and 'svint8_t' respectively} } */ +- svmmla (u32, u8, 0); /* { dg-error {passing 'int' to argument 3 of 'svmmla', which expects an SVE vector type} } */ ++ svmmla (u32, u8, 0); /* { dg-error {passing 'int' to argument 3 of 'svmmla', which expects an SVE type rather than a scalar} } */ + svmmla (u32, u8, u8); + svmmla (u32, s32, s32); /* { dg-error {passing 'svint32_t' instead of the expected 'svuint8_t' to argument 2 of 'svmmla', after passing 'svuint32_t' to argument 1} } */ + svmmla (u32, u32, u32); /* { dg-error {passing 'svuint32_t' instead of the expected 'svuint8_t' to argument 2 of 'svmmla', after passing 'svuint32_t' to argument 1} } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/prefetch_gather_offset_2.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/prefetch_gather_offset_2.c +index b74721fad..88e0c35e7 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/prefetch_gather_offset_2.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/prefetch_gather_offset_2.c +@@ -12,7 +12,7 @@ f1 (svbool_t pg, svint8_t s8, svuint8_t u8, + svprfb_gather (pg, u32); /* { dg-error {too few arguments to function 'svprfb_gather'} } */ + svprfb_gather (pg, u32, SV_PLDL1KEEP, 0); /* { dg-error {too many arguments to function 'svprfb_gather'} } */ + svprfb_gather (0, u32, SV_PLDL1KEEP); /* { dg-error {passing 'int' to argument 1 of 'svprfb_gather', which expects 'svbool_t'} } */ +- svprfb_gather (pg, 0, SV_PLDL1KEEP); /* { dg-error {passing 'int' to argument 2 of 'svprfb_gather', which expects an SVE vector type} } */ ++ svprfb_gather (pg, 0, SV_PLDL1KEEP); /* { dg-error {passing 'int' to argument 2 of 'svprfb_gather', which expects an SVE type rather than a scalar} } */ + + svprfb_gather (pg, s8, SV_PLDL1KEEP); /* { dg-error {passing 'svint8_t' to argument 2 of 'svprfb_gather', which expects 'svuint32_t' or 'svuint64_t'} } */ + svprfb_gather (pg, u8, SV_PLDL1KEEP); /* { dg-error {passing 'svuint8_t' to argument 2 of 'svprfb_gather', which expects 'svuint32_t' or 'svuint64_t'} } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/reduction_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/reduction_1.c +index ab0ef304a..025795e3d 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/reduction_1.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/reduction_1.c +@@ -10,7 +10,7 @@ f1 (svbool_t pg, svint32_t s32, svuint32_t u32, svfloat32_t f32, + svorv (pg, u32, u32); /* { dg-error {too many arguments to function 'svorv'} } */ + svorv (0, u32); /* { dg-error {passing 'int' to argument 1 of 'svorv', which expects 'svbool_t'} } */ + svorv (u32, u32); /* { dg-error {passing 'svuint32_t' to argument 1 of 'svorv', which expects 'svbool_t'} } */ +- svorv (pg, 0); /* { dg-error {passing 'int' to argument 2 of 'svorv', which expects an SVE vector type} } */ ++ svorv (pg, 0); /* { dg-error {passing 'int' to argument 2 of 'svorv', which expects an SVE type rather than a scalar} } */ + svorv (pg, pg); /* { dg-error {'svorv' has no form that takes 'svbool_t' arguments} } */ + svorv (pg, s32); + svorv (pg, u32); +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/reduction_wide_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/reduction_wide_1.c +index f99a2887b..68bacd0a3 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/reduction_wide_1.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/reduction_wide_1.c +@@ -10,7 +10,7 @@ f1 (svbool_t pg, svint32_t s32, svuint32_t u32, svfloat32_t f32, + svaddv (pg, u32, u32); /* { dg-error {too many arguments to function 'svaddv'} } */ + svaddv (0, u32); /* { dg-error {passing 'int' to argument 1 of 'svaddv', which expects 'svbool_t'} } */ + svaddv (u32, u32); /* { dg-error {passing 'svuint32_t' to argument 1 of 'svaddv', which expects 'svbool_t'} } */ +- svaddv (pg, 0); /* { dg-error {passing 'int' to argument 2 of 'svaddv', which expects an SVE vector type} } */ ++ svaddv (pg, 0); /* { dg-error {passing 'int' to argument 2 of 'svaddv', which expects an SVE type rather than a scalar} } */ + svaddv (pg, pg); /* { dg-error {'svaddv' has no form that takes 'svbool_t' arguments} } */ + svaddv (pg, s32); + svaddv (pg, u32); +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/shift_right_imm_narrowb_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/shift_right_imm_narrowb_1.c +index 6536679d5..c5942c701 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/shift_right_imm_narrowb_1.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/shift_right_imm_narrowb_1.c +@@ -66,5 +66,5 @@ f1 (svbool_t pg, svint8_t s8, svuint8_t u8, + + svshrnb (f32, 1); /* { dg-error {'svshrnb' has no form that takes 'svfloat32_t' arguments} } */ + +- svshrnb (1, 1); /* { dg-error {passing 'int' to argument 1 of 'svshrnb', which expects an SVE vector type} } */ ++ svshrnb (1, 1); /* { dg-error {passing 'int' to argument 1 of 'svshrnb', which expects an SVE type rather than a scalar} } */ + } +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/shift_right_imm_narrowb_to_uint_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/shift_right_imm_narrowb_to_uint_1.c +index 51f9388bf..3ecd20a22 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/shift_right_imm_narrowb_to_uint_1.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/shift_right_imm_narrowb_to_uint_1.c +@@ -54,5 +54,5 @@ f1 (svbool_t pg, svint8_t s8, svuint8_t u8, + + svqshrunb (f32, 1); /* { dg-error {'svqshrunb' has no form that takes 'svfloat32_t' arguments} } */ + +- svqshrunb (1, 1); /* { dg-error {passing 'int' to argument 1 of 'svqshrunb', which expects an SVE vector type} } */ ++ svqshrunb (1, 1); /* { dg-error {passing 'int' to argument 1 of 'svqshrunb', which expects an SVE type rather than a scalar} } */ + } +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/shift_right_imm_narrowt_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/shift_right_imm_narrowt_1.c +index 6c31cf8ec..e9d1d1337 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/shift_right_imm_narrowt_1.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/shift_right_imm_narrowt_1.c +@@ -76,6 +76,6 @@ f1 (svbool_t pg, svint8_t s8, svuint8_t u8, + + svshrnt (f32, f32, 1); /* { dg-error {'svshrnt' has no form that takes 'svfloat32_t' arguments} } */ + +- svshrnt (1, s32, 1); /* { dg-error {passing 'int' to argument 1 of 'svshrnt', which expects an SVE vector type} } */ +- svshrnt (s32, 1, 1); /* { dg-error {passing 'int' to argument 2 of 'svshrnt', which expects an SVE vector type} } */ ++ svshrnt (1, s32, 1); /* { dg-error {passing 'int' to argument 1 of 'svshrnt', which expects an SVE type rather than a scalar} } */ ++ svshrnt (s32, 1, 1); /* { dg-error {passing 'int' to argument 2 of 'svshrnt', which expects an SVE type rather than a scalar} } */ + } +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/shift_right_imm_narrowt_to_uint_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/shift_right_imm_narrowt_to_uint_1.c +index 2e35ad304..741495609 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/shift_right_imm_narrowt_to_uint_1.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/shift_right_imm_narrowt_to_uint_1.c +@@ -59,6 +59,6 @@ f1 (svbool_t pg, svint8_t s8, svuint8_t u8, + + svqshrunt (u16, f32, 1); /* { dg-error {'svqshrunt' has no form that takes 'svfloat32_t' arguments} } */ + +- svqshrunt (1, u32, 1); /* { dg-error {passing 'int' to argument 1 of 'svqshrunt', which expects an SVE vector type} } */ +- svqshrunt (u32, 1, 1); /* { dg-error {passing 'int' to argument 2 of 'svqshrunt', which expects an SVE vector type} } */ ++ svqshrunt (1, u32, 1); /* { dg-error {passing 'int' to argument 1 of 'svqshrunt', which expects an SVE type rather than a scalar} } */ ++ svqshrunt (u32, 1, 1); /* { dg-error {passing 'int' to argument 2 of 'svqshrunt', which expects an SVE type rather than a scalar} } */ + } +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/store_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/store_1.c +index 625f059af..0b2a3e837 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/store_1.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/store_1.c +@@ -13,7 +13,7 @@ f1 (svbool_t pg, signed char *s8_ptr, void *void_ptr, struct s *s_ptr, + svst1 (pg, s8_ptr); /* { dg-error {too few arguments to function 'svst1'} } */ + svst1 (pg, s8_ptr, s8, 0); /* { dg-error {too many arguments to function 'svst1'} } */ + svst1 (0, s8_ptr, s8); /* { dg-error {passing 'int' to argument 1 of 'svst1', which expects 'svbool_t'} } */ +- svst1 (pg, void_ptr, 0); /* { dg-error {passing 'int' to argument 3 of 'svst1', which expects an SVE vector type} } */ ++ svst1 (pg, void_ptr, 0); /* { dg-error {passing 'int' to argument 3 of 'svst1', which expects an SVE type rather than a scalar} } */ + svst1 (pg, void_ptr, pg); /* { dg-error {'svst1' has no form that takes 'svbool_t' arguments} } */ + svst1 (pg, 0, s8); + svst1 (pg, (int32_t *) 0, s8); /* { dg-warning "passing argument 2 of 'svst1_s8' from incompatible pointer type" } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/store_2.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/store_2.c +index c718b3ee0..b35e8955f 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/store_2.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/store_2.c +@@ -15,7 +15,7 @@ f1 (svbool_t pg, signed char *s8_ptr, void *void_ptr, struct s *s_ptr, + svst1_vnum (pg, s8_ptr, pg, s8); /* { dg-error {passing 'svbool_t' to argument 3 of 'svst1_vnum', which expects 'int64_t'} } */ + svst1_vnum (pg, s8_ptr, s8, s8); /* { dg-error {passing 'svint8_t' to argument 3 of 'svst1_vnum', which expects 'int64_t'} } */ + svst1_vnum (pg, s8_ptr, void_ptr, s8); /* { dg-warning "passing argument 3 of 'svst1_vnum_s8' makes integer from pointer without a cast" } */ +- svst1_vnum (pg, void_ptr, 0, 0); /* { dg-error {passing 'int' to argument 4 of 'svst1_vnum', which expects an SVE vector type} } */ ++ svst1_vnum (pg, void_ptr, 0, 0); /* { dg-error {passing 'int' to argument 4 of 'svst1_vnum', which expects an SVE type rather than a scalar} } */ + svst1_vnum (pg, void_ptr, 0, pg); /* { dg-error {'svst1_vnum' has no form that takes 'svbool_t' arguments} } */ + svst1_vnum (pg, 0, 0, s8); + svst1_vnum (pg, (int32_t *) 0, 0, s8); /* { dg-warning "passing argument 2 of 'svst1_vnum_s8' from incompatible pointer type" } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/store_scatter_offset_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/store_scatter_offset_1.c +index 10abf758c..3b3b56222 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/store_scatter_offset_1.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/store_scatter_offset_1.c +@@ -13,8 +13,8 @@ f1 (svbool_t pg, svint8_t s8, svuint8_t u8, svint16_t s16, svuint16_t u16, + svst1_scatter (pg, u32); /* { dg-error {too few arguments to function 'svst1_scatter'} } */ + svst1_scatter (pg, u32, u32, 0); /* { dg-error {too many arguments to function 'svst1_scatter'} } */ + svst1_scatter (0, u32, u32); /* { dg-error {passing 'int' to argument 1 of 'svst1_scatter', which expects 'svbool_t'} } */ +- svst1_scatter (pg, 0, u32); /* { dg-error {passing 'int' to argument 2 of 'svst1_scatter', which expects an SVE vector type} } */ +- svst1_scatter (pg, u32, 0); /* { dg-error {passing 'int' to argument 3 of 'svst1_scatter', which expects an SVE vector type} } */ ++ svst1_scatter (pg, 0, u32); /* { dg-error {passing 'int' to argument 2 of 'svst1_scatter', which expects an SVE type rather than a scalar} } */ ++ svst1_scatter (pg, u32, 0); /* { dg-error {passing 'int' to argument 3 of 'svst1_scatter', which expects an SVE type rather than a scalar} } */ + + svst1_scatter (pg, u32, pg); /* { dg-error {passing 'svbool_t' to argument 3 of 'svst1_scatter', which expects a vector of 32-bit or 64-bit elements} } */ + +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_bfloat16_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_bfloat16_1.c +index a9233324c..9a554f54f 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_bfloat16_1.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_bfloat16_1.c +@@ -10,7 +10,7 @@ f1 (svbool_t pg, svuint8_t u8, svuint16_t u16, svint32_t s32, + { + svbfmmla (f32, bf16); /* { dg-error {too few arguments to function 'svbfmmla'} } */ + svbfmmla (f32, bf16, bf16, 0); /* { dg-error {too many arguments to function 'svbfmmla'} } */ +- svbfmmla (0, bf16, bf16); /* { dg-error {passing 'int' to argument 1 of 'svbfmmla', which expects an SVE vector type} } */ ++ svbfmmla (0, bf16, bf16); /* { dg-error {passing 'int' to argument 1 of 'svbfmmla', which expects an SVE type rather than a scalar} } */ + svbfmmla (pg, bf16, bf16); /* { dg-error {'svbfmmla' has no form that takes 'svbool_t' arguments} } */ + svbfmmla (u8, bf16, bf16); /* { dg-error {'svbfmmla' has no form that takes 'svuint8_t' arguments} } */ + svbfmmla (u16, bf16, bf16); /* { dg-error {'svbfmmla' has no form that takes 'svuint16_t' arguments} } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_bfloat16_lane_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_bfloat16_lane_1.c +index 23f027f2d..87e74fbcf 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_bfloat16_lane_1.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_bfloat16_lane_1.c +@@ -10,7 +10,7 @@ f1 (svbool_t pg, svuint8_t u8, svuint16_t u16, svint32_t s32, + { + svbfmlalb_lane (f32, bf16, bf16); /* { dg-error {too few arguments to function 'svbfmlalb_lane'} } */ + svbfmlalb_lane (f32, bf16, bf16, 0, 0); /* { dg-error {too many arguments to function 'svbfmlalb_lane'} } */ +- svbfmlalb_lane (0, bf16, bf16, 0); /* { dg-error {passing 'int' to argument 1 of 'svbfmlalb_lane', which expects an SVE vector type} } */ ++ svbfmlalb_lane (0, bf16, bf16, 0); /* { dg-error {passing 'int' to argument 1 of 'svbfmlalb_lane', which expects an SVE type rather than a scalar} } */ + svbfmlalb_lane (pg, bf16, bf16, 0); /* { dg-error {'svbfmlalb_lane' has no form that takes 'svbool_t' arguments} } */ + svbfmlalb_lane (u8, bf16, bf16, 0); /* { dg-error {'svbfmlalb_lane' has no form that takes 'svuint8_t' arguments} } */ + svbfmlalb_lane (u16, bf16, bf16, 0); /* { dg-error {'svbfmlalb_lane' has no form that takes 'svuint16_t' arguments} } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_bfloat16_lanex2_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_bfloat16_lanex2_1.c +index 4755ca79a..ca1852644 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_bfloat16_lanex2_1.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_bfloat16_lanex2_1.c +@@ -10,7 +10,7 @@ f1 (svbool_t pg, svuint8_t u8, svuint16_t u16, svint32_t s32, + { + svbfdot_lane (f32, bf16, bf16); /* { dg-error {too few arguments to function 'svbfdot_lane'} } */ + svbfdot_lane (f32, bf16, bf16, 0, 0); /* { dg-error {too many arguments to function 'svbfdot_lane'} } */ +- svbfdot_lane (0, bf16, bf16, 0); /* { dg-error {passing 'int' to argument 1 of 'svbfdot_lane', which expects an SVE vector type} } */ ++ svbfdot_lane (0, bf16, bf16, 0); /* { dg-error {passing 'int' to argument 1 of 'svbfdot_lane', which expects an SVE type rather than a scalar} } */ + svbfdot_lane (pg, bf16, bf16, 0); /* { dg-error {'svbfdot_lane' has no form that takes 'svbool_t' arguments} } */ + svbfdot_lane (u8, bf16, bf16, 0); /* { dg-error {'svbfdot_lane' has no form that takes 'svuint8_t' arguments} } */ + svbfdot_lane (u16, bf16, bf16, 0); /* { dg-error {'svbfdot_lane' has no form that takes 'svuint16_t' arguments} } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_bfloat16_opt_n_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_bfloat16_opt_n_1.c +index 2d09a8eeb..efdfb8955 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_bfloat16_opt_n_1.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_bfloat16_opt_n_1.c +@@ -10,7 +10,7 @@ f1 (svbool_t pg, svuint8_t u8, svuint16_t u16, svint32_t s32, + { + svbfdot (f32, bf16); /* { dg-error {too few arguments to function 'svbfdot'} } */ + svbfdot (f32, bf16, bf16, 0); /* { dg-error {too many arguments to function 'svbfdot'} } */ +- svbfdot (0, bf16, bf16); /* { dg-error {passing 'int' to argument 1 of 'svbfdot', which expects an SVE vector type} } */ ++ svbfdot (0, bf16, bf16); /* { dg-error {passing 'int' to argument 1 of 'svbfdot', which expects an SVE type rather than a scalar} } */ + svbfdot (pg, bf16, bf16); /* { dg-error {'svbfdot' has no form that takes 'svbool_t' arguments} } */ + svbfdot (u8, bf16, bf16); /* { dg-error {'svbfdot' has no form that takes 'svuint8_t' arguments} } */ + svbfdot (u16, bf16, bf16); /* { dg-error {'svbfdot' has no form that takes 'svuint16_t' arguments} } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_intq_uintq_lane_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_intq_uintq_lane_1.c +index 600be05a8..934b7bd60 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_intq_uintq_lane_1.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_intq_uintq_lane_1.c +@@ -10,14 +10,14 @@ f1 (svbool_t pg, svint8_t s8, svuint8_t u8, svint16_t s16, svuint16_t u16, + { + svsudot_lane (s32, s8, u8); /* { dg-error {too few arguments to function 'svsudot_lane'} } */ + svsudot_lane (s32, s8, u8, 0, 0); /* { dg-error {too many arguments to function 'svsudot_lane'} } */ +- svsudot_lane (0, s8, u8, 0); /* { dg-error {passing 'int' to argument 1 of 'svsudot_lane', which expects an SVE vector type} } */ ++ svsudot_lane (0, s8, u8, 0); /* { dg-error {passing 'int' to argument 1 of 'svsudot_lane', which expects an SVE type rather than a scalar} } */ + svsudot_lane (pg, s8, u8, 0); /* { dg-error {'svsudot_lane' has no form that takes 'svbool_t' arguments} } */ + svsudot_lane (u8, s8, u8, 0); /* { dg-error {'svsudot_lane' has no form that takes 'svuint8_t' arguments} } */ + svsudot_lane (f32, s8, u8, 0); /* { dg-error {'svsudot_lane' has no form that takes 'svfloat32_t' arguments} } */ + svsudot_lane (u32, s8, u8, 0); /* { dg-error {'svsudot_lane' has no form that takes 'svuint32_t' arguments} } */ + svsudot_lane (s32, s8, u8, 0); +- svsudot_lane (s32, 0, u8, 0); /* { dg-error {passing 'int' to argument 2 of 'svsudot_lane', which expects an SVE vector type} } */ +- svsudot_lane (s32, s8, 0, 0); /* { dg-error {passing 'int' to argument 3 of 'svsudot_lane', which expects an SVE vector type} } */ ++ svsudot_lane (s32, 0, u8, 0); /* { dg-error {passing 'int' to argument 2 of 'svsudot_lane', which expects an SVE type rather than a scalar} } */ ++ svsudot_lane (s32, s8, 0, 0); /* { dg-error {passing 'int' to argument 3 of 'svsudot_lane', which expects an SVE type rather than a scalar} } */ + + svsudot_lane (s32, s8, u8, 0); + svsudot_lane (s32, u8, u8, 0); /* { dg-error {passing 'svuint8_t' to argument 2 of 'svsudot_lane', which expects a vector of signed integers} } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_intq_uintq_opt_n_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_intq_uintq_opt_n_1.c +index f95ac582f..c481996d3 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_intq_uintq_opt_n_1.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_intq_uintq_opt_n_1.c +@@ -23,12 +23,12 @@ f2 (svbool_t pg, svint8_t s8, svuint8_t u8, svuint32_t u32, + { + svsudot (s32, s8); /* { dg-error {too few arguments to function 'svsudot'} } */ + svsudot (s32, s8, u8, u8); /* { dg-error {too many arguments to function 'svsudot'} } */ +- svsudot (0, s8, u8); /* { dg-error {passing 'int' to argument 1 of 'svsudot', which expects an SVE vector type} } */ ++ svsudot (0, s8, u8); /* { dg-error {passing 'int' to argument 1 of 'svsudot', which expects an SVE type rather than a scalar} } */ + svsudot (pg, s8, u8); /* { dg-error {'svsudot' has no form that takes 'svbool_t' arguments} } */ + svsudot (u8, s8, u8); /* { dg-error {'svsudot' has no form that takes 'svuint8_t' arguments} } */ + svsudot (f32, s8, u8); /* { dg-error {'svsudot' has no form that takes 'svfloat32_t' arguments} } */ + svsudot (s32, s8, u8); +- svsudot (s32, 0, u8); /* { dg-error {passing 'int' to argument 2 of 'svsudot', which expects an SVE vector type} } */ ++ svsudot (s32, 0, u8); /* { dg-error {passing 'int' to argument 2 of 'svsudot', which expects an SVE type rather than a scalar} } */ + svsudot (s32, u8, u8); /* { dg-error {passing 'svuint8_t' to argument 2 of 'svsudot', which expects a vector of signed integers} } */ + svsudot (s32, s8, s8); /* { dg-error {passing 'svint8_t' to argument 3 of 'svsudot', which expects a vector of unsigned integers} } */ + svsudot (s32, s8, 0); +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_lane_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_lane_1.c +index d59ffab40..520c11f79 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_lane_1.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_lane_1.c +@@ -10,9 +10,9 @@ f1 (svbool_t pg, svfloat16_t f16, svfloat32_t f32, svfloat64_t f64, + svmla_lane (f32, f32, f32, 0, 0); /* { dg-error {too many arguments to function 'svmla_lane'} } */ + svmla_lane (pg, pg, pg, 0); /* { dg-error {'svmla_lane' has no form that takes 'svbool_t' arguments} } */ + svmla_lane (s32, s32, s32, 0); /* { dg-error {ACLE function 'svmla_lane_s32' requires ISA extension 'sve2'} "" { xfail aarch64_sve2 } } */ +- svmla_lane (1, f32, f32, 0); /* { dg-error {passing 'int' to argument 1 of 'svmla_lane', which expects an SVE vector type} } */ +- svmla_lane (f32, 1, f32, 0); /* { dg-error {passing 'int' to argument 2 of 'svmla_lane', which expects an SVE vector type} } */ +- svmla_lane (f32, f32, 1, 0); /* { dg-error {passing 'int' to argument 3 of 'svmla_lane', which expects an SVE vector type} } */ ++ svmla_lane (1, f32, f32, 0); /* { dg-error {passing 'int' to argument 1 of 'svmla_lane', which expects an SVE type rather than a scalar} } */ ++ svmla_lane (f32, 1, f32, 0); /* { dg-error {passing 'int' to argument 2 of 'svmla_lane', which expects an SVE type rather than a scalar} } */ ++ svmla_lane (f32, f32, 1, 0); /* { dg-error {passing 'int' to argument 3 of 'svmla_lane', which expects an SVE type rather than a scalar} } */ + svmla_lane (f32, f64, f32, 0); /* { dg-error {passing 'svfloat64_t' to argument 2 of 'svmla_lane', but previous arguments had type 'svfloat32_t'} } */ + svmla_lane (f32, f32, f64, 0); /* { dg-error {passing 'svfloat64_t' to argument 3 of 'svmla_lane', but previous arguments had type 'svfloat32_t'} } */ + svmla_lane (f32, f32, f32, s32); /* { dg-error {argument 4 of 'svmla_lane' must be an integer constant expression} } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_lane_rotate_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_lane_rotate_1.c +index 68e51724c..3163d130c 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_lane_rotate_1.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_lane_rotate_1.c +@@ -11,9 +11,9 @@ f1 (svbool_t pg, svfloat16_t f16, svfloat32_t f32, svfloat64_t f64, + svcmla_lane (pg, pg, pg, 0, 90); /* { dg-error {'svcmla_lane' has no form that takes 'svbool_t' arguments} } */ + svcmla_lane (s32, s32, s32, 0, 90); /* { dg-error {ACLE function 'svcmla_lane_s32' requires ISA extension 'sve2'} "" { xfail aarch64_sve2 } } */ + svcmla_lane (f64, f64, f64, 0, 90); /* { dg-error {'svcmla_lane' has no form that takes 'svfloat64_t' arguments} } */ +- svcmla_lane (1, f32, f32, 0, 90); /* { dg-error {passing 'int' to argument 1 of 'svcmla_lane', which expects an SVE vector type} } */ +- svcmla_lane (f32, 1, f32, 0, 90); /* { dg-error {passing 'int' to argument 2 of 'svcmla_lane', which expects an SVE vector type} } */ +- svcmla_lane (f32, f32, 1, 0, 90); /* { dg-error {passing 'int' to argument 3 of 'svcmla_lane', which expects an SVE vector type} } */ ++ svcmla_lane (1, f32, f32, 0, 90); /* { dg-error {passing 'int' to argument 1 of 'svcmla_lane', which expects an SVE type rather than a scalar} } */ ++ svcmla_lane (f32, 1, f32, 0, 90); /* { dg-error {passing 'int' to argument 2 of 'svcmla_lane', which expects an SVE type rather than a scalar} } */ ++ svcmla_lane (f32, f32, 1, 0, 90); /* { dg-error {passing 'int' to argument 3 of 'svcmla_lane', which expects an SVE type rather than a scalar} } */ + svcmla_lane (f32, f64, f32, 0, 90); /* { dg-error {passing 'svfloat64_t' to argument 2 of 'svcmla_lane', but previous arguments had type 'svfloat32_t'} } */ + svcmla_lane (f32, f32, f64, 0, 90); /* { dg-error {passing 'svfloat64_t' to argument 3 of 'svcmla_lane', but previous arguments had type 'svfloat32_t'} } */ + svcmla_lane (f32, f32, f32, s32, 0); /* { dg-error {argument 4 of 'svcmla_lane' must be an integer constant expression} } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_long_lane_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_long_lane_1.c +index e20e1a122..dd67b4e4e 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_long_lane_1.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_long_lane_1.c +@@ -11,16 +11,16 @@ f1 (svbool_t pg, svint8_t s8, svuint8_t u8, svint16_t s16, svuint16_t u16, + { + svmlalb_lane (u64, u32, u32); /* { dg-error {too few arguments to function 'svmlalb_lane'} } */ + svmlalb_lane (u64, u32, u32, 0, 0); /* { dg-error {too many arguments to function 'svmlalb_lane'} } */ +- svmlalb_lane (0, u16, u16, 0); /* { dg-error {passing 'int' to argument 1 of 'svmlalb_lane', which expects an SVE vector type} } */ ++ svmlalb_lane (0, u16, u16, 0); /* { dg-error {passing 'int' to argument 1 of 'svmlalb_lane', which expects an SVE type rather than a scalar} } */ + svmlalb_lane (pg, u16, u16, 0); /* { dg-error {'svmlalb_lane' has no form that takes 'svbool_t' arguments} } */ + svmlalb_lane (u8, u8, u8, 0); /* { dg-error {'svmlalb_lane' has no form that takes 'svuint8_t' arguments} } */ + svmlalb_lane (u16, u8, u8, 0); /* { dg-error {'svmlalb_lane' has no form that takes 'svuint16_t' arguments} } */ + svmlalb_lane (f16, u16, u16, 0); /* { dg-error {'svmlalb_lane' has no form that takes 'svfloat16_t' arguments} } */ + svmlalb_lane (f32, f16, f16, 0); + svmlalb_lane (u32, u16, u16, 0); +- svmlalb_lane (u32, 0, u16, 0); /* { dg-error {passing 'int' to argument 2 of 'svmlalb_lane', which expects an SVE vector type} } */ ++ svmlalb_lane (u32, 0, u16, 0); /* { dg-error {passing 'int' to argument 2 of 'svmlalb_lane', which expects an SVE type rather than a scalar} } */ + svmlalb_lane (u32, s16, u16, 0); /* { dg-error {arguments 1 and 2 of 'svmlalb_lane' must have the same signedness, but the values passed here have type 'svuint32_t' and 'svint16_t' respectively} } */ +- svmlalb_lane (u32, u16, 0, 0); /* { dg-error {passing 'int' to argument 3 of 'svmlalb_lane', which expects an SVE vector type} } */ ++ svmlalb_lane (u32, u16, 0, 0); /* { dg-error {passing 'int' to argument 3 of 'svmlalb_lane', which expects an SVE type rather than a scalar} } */ + svmlalb_lane (u32, u16, s16, 0); /* { dg-error {arguments 1 and 3 of 'svmlalb_lane' must have the same signedness, but the values passed here have type 'svuint32_t' and 'svint16_t' respectively} } */ + svmlalb_lane (u32, u32, u32, 0); /* { dg-error {passing 'svuint32_t' instead of the expected 'svuint16_t' to argument 2 of 'svmlalb_lane', after passing 'svuint32_t' to argument 1} } */ + svmlalb_lane (u32, u8, u16, 0); /* { dg-error {passing 'svuint8_t' instead of the expected 'svuint16_t' to argument 2 of 'svmlalb_lane', after passing 'svuint32_t' to argument 1} } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_long_opt_n_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_long_opt_n_1.c +index c6718cf37..157fd7cd5 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_long_opt_n_1.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_long_opt_n_1.c +@@ -10,13 +10,13 @@ f1 (svbool_t pg, svint8_t s8, svuint8_t u8, svuint16_t u16, svuint32_t u32, + { + svabalb (u16, u8); /* { dg-error {too few arguments to function 'svabalb'} } */ + svabalb (u16, u8, u8, u8); /* { dg-error {too many arguments to function 'svabalb'} } */ +- svabalb (0, u8, u8); /* { dg-error {passing 'int' to argument 1 of 'svabalb', which expects an SVE vector type} } */ ++ svabalb (0, u8, u8); /* { dg-error {passing 'int' to argument 1 of 'svabalb', which expects an SVE type rather than a scalar} } */ + svabalb (pg, u8, u8); /* { dg-error {'svabalb' has no form that takes 'svbool_t' arguments} } */ + svabalb (u8, u8, u8); /* { dg-error {'svabalb' has no form that takes 'svuint8_t' arguments} } */ + svabalb (f16, u8, u8); /* { dg-error {'svabalb' has no form that takes 'svfloat16_t' arguments} } */ + svabalb (f32, f16, f16); /* { dg-error {'svabalb' has no form that takes 'svfloat32_t' arguments} } */ + svabalb (u16, u8, u8); +- svabalb (u16, 0, u8); /* { dg-error {passing 'int' to argument 2 of 'svabalb', which expects an SVE vector type} } */ ++ svabalb (u16, 0, u8); /* { dg-error {passing 'int' to argument 2 of 'svabalb', which expects an SVE type rather than a scalar} } */ + svabalb (u16, s8, u8); /* { dg-error {arguments 1 and 2 of 'svabalb' must have the same signedness, but the values passed here have type 'svuint16_t' and 'svint8_t' respectively} } */ + svabalb (u16, u8, 0); + svabalb (u16, u8, s8); /* { dg-error {arguments 1 and 3 of 'svabalb' must have the same signedness, but the values passed here have type 'svuint16_t' and 'svint8_t' respectively} } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_opt_n_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_opt_n_1.c +index c4a80e9da..ac789c2be 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_opt_n_1.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_opt_n_1.c +@@ -10,14 +10,14 @@ f1 (svbool_t pg, svint8_t s8, svuint8_t u8, + svmla_x (pg, u8, u8, u8, u8); /* { dg-error {too many arguments to function 'svmla_x'} } */ + svmla_x (u8, u8, u8, u8); /* { dg-error {passing 'svuint8_t' to argument 1 of 'svmla_x', which expects 'svbool_t'} } */ + svmla_x (pg, pg, pg, pg); /* { dg-error {'svmla_x' has no form that takes 'svbool_t' arguments} } */ +- svmla_x (pg, 1, u8, u8); /* { dg-error {passing 'int' to argument 2 of 'svmla_x', which expects an SVE vector type} } */ ++ svmla_x (pg, 1, u8, u8); /* { dg-error {passing 'int' to argument 2 of 'svmla_x', which expects an SVE type rather than a scalar} } */ + svmla_x (pg, u8, s8, u8); /* { dg-error {passing 'svint8_t' to argument 3 of 'svmla_x', but previous arguments had type 'svuint8_t'} } */ + svmla_x (pg, u8, u8, u8); + svmla_x (pg, u8, s16, u8); /* { dg-error {passing 'svint16_t' to argument 3 of 'svmla_x', but previous arguments had type 'svuint8_t'} } */ + svmla_x (pg, u8, u16, u8); /* { dg-error {passing 'svuint16_t' to argument 3 of 'svmla_x', but previous arguments had type 'svuint8_t'} } */ + svmla_x (pg, u8, f16, u8); /* { dg-error {passing 'svfloat16_t' to argument 3 of 'svmla_x', but previous arguments had type 'svuint8_t'} } */ + svmla_x (pg, u8, pg, u8); /* { dg-error {passing 'svbool_t' to argument 3 of 'svmla_x', but previous arguments had type 'svuint8_t'} } */ +- svmla_x (pg, u8, 0, u8); /* { dg-error {passing 'int' to argument 3 of 'svmla_x', which expects an SVE vector type} } */ ++ svmla_x (pg, u8, 0, u8); /* { dg-error {passing 'int' to argument 3 of 'svmla_x', which expects an SVE type rather than a scalar} } */ + svmla_x (pg, u8, u8, s8); /* { dg-error {passing 'svint8_t' to argument 4 of 'svmla_x', but previous arguments had type 'svuint8_t'} } */ + svmla_x (pg, u8, u8, s16); /* { dg-error {passing 'svint16_t' to argument 4 of 'svmla_x', but previous arguments had type 'svuint8_t'} } */ + svmla_x (pg, u8, u8, u16); /* { dg-error {passing 'svuint16_t' to argument 4 of 'svmla_x', but previous arguments had type 'svuint8_t'} } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_qq_lane_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_qq_lane_1.c +index e81552b64..c69b2d575 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_qq_lane_1.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_qq_lane_1.c +@@ -9,13 +9,13 @@ f1 (svbool_t pg, svint8_t s8, svuint8_t u8, svint16_t s16, svuint16_t u16, + { + svdot_lane (u32, u8, u8); /* { dg-error {too few arguments to function 'svdot_lane'} } */ + svdot_lane (u32, u8, u8, 0, 0); /* { dg-error {too many arguments to function 'svdot_lane'} } */ +- svdot_lane (0, u8, u8, 0); /* { dg-error {passing 'int' to argument 1 of 'svdot_lane', which expects an SVE vector type} } */ ++ svdot_lane (0, u8, u8, 0); /* { dg-error {passing 'int' to argument 1 of 'svdot_lane', which expects an SVE type rather than a scalar} } */ + svdot_lane (pg, u8, u8, 0); /* { dg-error {'svdot_lane' has no form that takes 'svbool_t' arguments} } */ + svdot_lane (u8, u8, u8, 0); /* { dg-error {'svdot_lane' has no form that takes 'svuint8_t' arguments} } */ + svdot_lane (f32, u8, u8, 0); /* { dg-error {'svdot_lane' has no form that takes 'svfloat32_t' arguments} } */ + svdot_lane (u32, u8, u8, 0); +- svdot_lane (u32, 0, u8, 0); /* { dg-error {passing 'int' to argument 2 of 'svdot_lane', which expects an SVE vector type} } */ +- svdot_lane (u32, u8, 0, 0); /* { dg-error {passing 'int' to argument 3 of 'svdot_lane', which expects an SVE vector type} } */ ++ svdot_lane (u32, 0, u8, 0); /* { dg-error {passing 'int' to argument 2 of 'svdot_lane', which expects an SVE type rather than a scalar} } */ ++ svdot_lane (u32, u8, 0, 0); /* { dg-error {passing 'int' to argument 3 of 'svdot_lane', which expects an SVE type rather than a scalar} } */ + + svdot_lane (s32, s8, s8, 0); + svdot_lane (s32, u8, s8, 0); /* { dg-error {arguments 1 and 2 of 'svdot_lane' must have the same signedness, but the values passed here have type 'svint32_t' and 'svuint8_t' respectively} } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_qq_lane_rotate_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_qq_lane_rotate_1.c +index a748a8627..9e84e7a89 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_qq_lane_rotate_1.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_qq_lane_rotate_1.c +@@ -11,13 +11,13 @@ f1 (svbool_t pg, svint8_t s8, svuint8_t u8, svint16_t s16, svuint16_t u16, + { + svcdot_lane (u32, u8, u8, 0); /* { dg-error {too few arguments to function 'svcdot_lane'} } */ + svcdot_lane (u32, u8, u8, 0, 0, 0); /* { dg-error {too many arguments to function 'svcdot_lane'} } */ +- svcdot_lane (0, u8, u8, 0, 0); /* { dg-error {passing 'int' to argument 1 of 'svcdot_lane', which expects an SVE vector type} } */ ++ svcdot_lane (0, u8, u8, 0, 0); /* { dg-error {passing 'int' to argument 1 of 'svcdot_lane', which expects an SVE type rather than a scalar} } */ + svcdot_lane (pg, u8, u8, 0, 0); /* { dg-error {'svcdot_lane' has no form that takes 'svbool_t' arguments} } */ + svcdot_lane (s8, s8, s8, 0, 0); /* { dg-error {'svcdot_lane' has no form that takes 'svint8_t' arguments} } */ + svcdot_lane (f32, s8, s8, 0, 0); /* { dg-error {'svcdot_lane' has no form that takes 'svfloat32_t' arguments} } */ + svcdot_lane (s32, s8, s8, 0, 0); +- svcdot_lane (s32, 0, s8, 0, 0); /* { dg-error {passing 'int' to argument 2 of 'svcdot_lane', which expects an SVE vector type} } */ +- svcdot_lane (s32, s8, 0, 0, 0); /* { dg-error {passing 'int' to argument 3 of 'svcdot_lane', which expects an SVE vector type} } */ ++ svcdot_lane (s32, 0, s8, 0, 0); /* { dg-error {passing 'int' to argument 2 of 'svcdot_lane', which expects an SVE type rather than a scalar} } */ ++ svcdot_lane (s32, s8, 0, 0, 0); /* { dg-error {passing 'int' to argument 3 of 'svcdot_lane', which expects an SVE type rather than a scalar} } */ + + svcdot_lane (s32, s8, s8, 0, 0); + svcdot_lane (s32, u8, s8, 0, 0); /* { dg-error {arguments 1 and 2 of 'svcdot_lane' must have the same signedness, but the values passed here have type 'svint32_t' and 'svuint8_t' respectively} } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_qq_opt_n_2.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_qq_opt_n_2.c +index fee4096fe..85d4b2dd8 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_qq_opt_n_2.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_qq_opt_n_2.c +@@ -8,12 +8,12 @@ f1 (svbool_t pg, svint8_t s8, svuint8_t u8, svuint32_t u32, + { + svdot (u32, u8); /* { dg-error {too few arguments to function 'svdot'} } */ + svdot (u32, u8, u8, u8); /* { dg-error {too many arguments to function 'svdot'} } */ +- svdot (0, u8, u8); /* { dg-error {passing 'int' to argument 1 of 'svdot', which expects an SVE vector type} } */ ++ svdot (0, u8, u8); /* { dg-error {passing 'int' to argument 1 of 'svdot', which expects an SVE type rather than a scalar} } */ + svdot (pg, u8, u8); /* { dg-error {'svdot' has no form that takes 'svbool_t' arguments} } */ + svdot (u8, u8, u8); /* { dg-error {'svdot' has no form that takes 'svuint8_t' arguments} } */ + svdot (f32, u8, u8); /* { dg-error {'svdot' has no form that takes 'svfloat32_t' arguments} } */ + svdot (u32, u8, u8); +- svdot (u32, 0, u8); /* { dg-error {passing 'int' to argument 2 of 'svdot', which expects an SVE vector type} } */ ++ svdot (u32, 0, u8); /* { dg-error {passing 'int' to argument 2 of 'svdot', which expects an SVE type rather than a scalar} } */ + svdot (u32, s8, u8); /* { dg-error {arguments 1 and 2 of 'svdot' must have the same signedness, but the values passed here have type 'svuint32_t' and 'svint8_t' respectively} } */ + svdot (u32, u8, 0); + svdot (u32, u8, s8); /* { dg-error {arguments 1 and 3 of 'svdot' must have the same signedness, but the values passed here have type 'svuint32_t' and 'svint8_t' respectively} } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_qq_rotate_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_qq_rotate_1.c +index 65e749ba7..9dd7eaf3c 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_qq_rotate_1.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_qq_rotate_1.c +@@ -11,13 +11,13 @@ f1 (svbool_t pg, svint8_t s8, svuint8_t u8, svint16_t s16, svuint16_t u16, + { + svcdot (u32, u8, u8); /* { dg-error {too few arguments to function 'svcdot'} } */ + svcdot (u32, u8, u8, 0, 0); /* { dg-error {too many arguments to function 'svcdot'} } */ +- svcdot (0, u8, u8, 0); /* { dg-error {passing 'int' to argument 1 of 'svcdot', which expects an SVE vector type} } */ ++ svcdot (0, u8, u8, 0); /* { dg-error {passing 'int' to argument 1 of 'svcdot', which expects an SVE type rather than a scalar} } */ + svcdot (pg, u8, u8, 0); /* { dg-error {'svcdot' has no form that takes 'svbool_t' arguments} } */ + svcdot (s8, s8, s8, 0); /* { dg-error {'svcdot' has no form that takes 'svint8_t' arguments} } */ + svcdot (f32, s8, s8, 0); /* { dg-error {'svcdot' has no form that takes 'svfloat32_t' arguments} } */ + svcdot (s32, s8, s8, 0); +- svcdot (s32, 0, s8, 0); /* { dg-error {passing 'int' to argument 2 of 'svcdot', which expects an SVE vector type} } */ +- svcdot (s32, s8, 0, 0); /* { dg-error {passing 'int' to argument 3 of 'svcdot', which expects an SVE vector type} } */ ++ svcdot (s32, 0, s8, 0); /* { dg-error {passing 'int' to argument 2 of 'svcdot', which expects an SVE type rather than a scalar} } */ ++ svcdot (s32, s8, 0, 0); /* { dg-error {passing 'int' to argument 3 of 'svcdot', which expects an SVE type rather than a scalar} } */ + + svcdot (s32, s8, s8, 0); + svcdot (s32, u8, s8, 0); /* { dg-error {arguments 1 and 2 of 'svcdot' must have the same signedness, but the values passed here have type 'svint32_t' and 'svuint8_t' respectively} } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_rotate_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_rotate_1.c +index f340e3d1e..bb6740289 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_rotate_1.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_rotate_1.c +@@ -10,9 +10,9 @@ f1 (svbool_t pg, svfloat32_t f32, svfloat64_t f64, svint32_t s32, int i) + svcmla_x (f32, f32, f32, f32, 90); /* { dg-error {passing 'svfloat32_t' to argument 1 of 'svcmla_x', which expects 'svbool_t'} } */ + svcmla_x (pg, pg, pg, pg, 90); /* { dg-error {'svcmla_x' has no form that takes 'svbool_t' arguments} } */ + svcmla_x (pg, s32, s32, s32, 90); /* { dg-error {'svcmla_x' has no form that takes 'svint32_t' arguments} } */ +- svcmla_x (pg, 1, f32, f32, 90); /* { dg-error {passing 'int' to argument 2 of 'svcmla_x', which expects an SVE vector type} } */ +- svcmla_x (pg, f32, 1, f32, 90); /* { dg-error {passing 'int' to argument 3 of 'svcmla_x', which expects an SVE vector type} } */ +- svcmla_x (pg, f32, f32, 1, 90); /* { dg-error {passing 'int' to argument 4 of 'svcmla_x', which expects an SVE vector type} } */ ++ svcmla_x (pg, 1, f32, f32, 90); /* { dg-error {passing 'int' to argument 2 of 'svcmla_x', which expects an SVE type rather than a scalar} } */ ++ svcmla_x (pg, f32, 1, f32, 90); /* { dg-error {passing 'int' to argument 3 of 'svcmla_x', which expects an SVE type rather than a scalar} } */ ++ svcmla_x (pg, f32, f32, 1, 90); /* { dg-error {passing 'int' to argument 4 of 'svcmla_x', which expects an SVE type rather than a scalar} } */ + svcmla_x (pg, f32, f64, f32, 90); /* { dg-error {passing 'svfloat64_t' to argument 3 of 'svcmla_x', but previous arguments had type 'svfloat32_t'} } */ + svcmla_x (pg, f32, f32, f64, 90); /* { dg-error {passing 'svfloat64_t' to argument 4 of 'svcmla_x', but previous arguments had type 'svfloat32_t'} } */ + svcmla_x (pg, f32, f32, f32, s32); /* { dg-error {argument 5 of 'svcmla_x' must be an integer constant expression} } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_shift_right_imm_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_shift_right_imm_1.c +index 28111375f..cfe601631 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_shift_right_imm_1.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_shift_right_imm_1.c +@@ -12,10 +12,10 @@ f1 (svbool_t pg, svuint8_t u8, svint8_t s8, svint16_t s16, + const int one = 1; + pg = svsra (pg, pg, 1); /* { dg-error {'svsra' has no form that takes 'svbool_t' arguments} } */ + pg = svsra (pg, s8, 1); /* { dg-error {passing 'svint8_t' to argument 2 of 'svsra', but previous arguments had type 'svbool_t'} } */ +- s8 = svsra (1, s8, 1); /* { dg-error {passing 'int' to argument 1 of 'svsra', which expects an SVE vector type} } */ ++ s8 = svsra (1, s8, 1); /* { dg-error {passing 'int' to argument 1 of 'svsra', which expects an SVE type rather than a scalar} } */ + s8 = svsra (s8, u8, 1); /* { dg-error {passing 'svuint8_t' to argument 2 of 'svsra', but previous arguments had type 'svint8_t'} } */ + s8 = svsra (s8, pg, 1); /* { dg-error {passing 'svbool_t' to argument 2 of 'svsra', but previous arguments had type 'svint8_t'} } */ +- s8 = svsra (s8, 1, 1); /* { dg-error {passing 'int' to argument 2 of 'svsra', which expects an SVE vector type} } */ ++ s8 = svsra (s8, 1, 1); /* { dg-error {passing 'int' to argument 2 of 'svsra', which expects an SVE type rather than a scalar} } */ + s8 = svsra (s8, s8, x); /* { dg-error {argument 3 of 'svsra' must be an integer constant expression} } */ + s8 = svsra (s8, s8, one); /* { dg-error {argument 3 of 'svsra' must be an integer constant expression} } */ + s8 = svsra (s8, s8, 0.4); /* { dg-error {passing 0 to argument 3 of 'svsra', which expects a value in the range \1, 8\} } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_uint_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_uint_1.c +index 711b6a133..5fb497701 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_uint_1.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_uint_1.c +@@ -13,8 +13,8 @@ f1 (svbool_t pg, svuint8_t u8, svint8_t s8, svuint16_t u16, svint16_t s16, + svtbx (pg, pg, pg); /* { dg-error {passing 'svbool_t' to argument 3 of 'svtbx', which expects a vector of unsigned integers} } */ + svtbx (pg, pg, u8); /* { dg-error {'svtbx' has no form that takes 'svbool_t' arguments} } */ + +- svtbx (u8, 0, u8); /* { dg-error {passing 'int' to argument 2 of 'svtbx', which expects an SVE vector type} } */ +- svtbx (u8, u8, 0); /* { dg-error {passing 'int' to argument 3 of 'svtbx', which expects an SVE vector type} } */ ++ svtbx (u8, 0, u8); /* { dg-error {passing 'int' to argument 2 of 'svtbx', which expects an SVE type rather than a scalar} } */ ++ svtbx (u8, u8, 0); /* { dg-error {passing 'int' to argument 3 of 'svtbx', which expects an SVE type rather than a scalar} } */ + svtbx (u8, s8, u8); /* { dg-error {passing 'svint8_t' to argument 2 of 'svtbx', but previous arguments had type 'svuint8_t'} } */ + svtbx (u8, u8, u8); + svtbx (u8, u8, s8); /* { dg-error {passing 'svint8_t' to argument 3 of 'svtbx', which expects a vector of unsigned integers} } */ +@@ -29,7 +29,7 @@ f1 (svbool_t pg, svuint8_t u8, svint8_t s8, svuint16_t u16, svint16_t s16, + svtbx (s8, s8, s16); /* { dg-error {passing 'svint16_t' to argument 3 of 'svtbx', which expects a vector of unsigned integers} } */ + svtbx (s8, s8, pg); /* { dg-error {passing 'svbool_t' to argument 3 of 'svtbx', which expects a vector of unsigned integers} } */ + +- svtbx (u16, 0, u16); /* { dg-error {passing 'int' to argument 2 of 'svtbx', which expects an SVE vector type} } */ ++ svtbx (u16, 0, u16); /* { dg-error {passing 'int' to argument 2 of 'svtbx', which expects an SVE type rather than a scalar} } */ + svtbx (u16, u16, u8); /* { dg-error {arguments 1 and 3 of 'svtbx' must have the same element size, but the values passed here have type 'svuint16_t' and 'svuint8_t' respectively} } */ + svtbx (u16, u16, s8); /* { dg-error {passing 'svint8_t' to argument 3 of 'svtbx', which expects a vector of unsigned integers} } */ + svtbx (u16, u16, u16); +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_uintq_intq_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_uintq_intq_1.c +index f52fb39bf..d1aad1de1 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_uintq_intq_1.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_uintq_intq_1.c +@@ -23,15 +23,15 @@ f2 (svbool_t pg, svint8_t s8, svuint8_t u8, svuint32_t u32, + { + svusmmla (s32, u8); /* { dg-error {too few arguments to function 'svusmmla'} } */ + svusmmla (s32, u8, s8, u8); /* { dg-error {too many arguments to function 'svusmmla'} } */ +- svusmmla (0, u8, s8); /* { dg-error {passing 'int' to argument 1 of 'svusmmla', which expects an SVE vector type} } */ ++ svusmmla (0, u8, s8); /* { dg-error {passing 'int' to argument 1 of 'svusmmla', which expects an SVE type rather than a scalar} } */ + svusmmla (pg, u8, s8); /* { dg-error {'svusmmla' has no form that takes 'svbool_t' arguments} } */ + svusmmla (u8, u8, s8); /* { dg-error {'svusmmla' has no form that takes 'svuint8_t' arguments} } */ + svusmmla (f32, u8, s8); /* { dg-error {'svusmmla' has no form that takes 'svfloat32_t' arguments} } */ + svusmmla (s32, u8, s8); +- svusmmla (s32, 0, s8); /* { dg-error {passing 'int' to argument 2 of 'svusmmla', which expects an SVE vector type} } */ ++ svusmmla (s32, 0, s8); /* { dg-error {passing 'int' to argument 2 of 'svusmmla', which expects an SVE type rather than a scalar} } */ + svusmmla (s32, u8, u8); /* { dg-error {passing 'svuint8_t' to argument 3 of 'svusmmla', which expects a vector of signed integers} } */ + svusmmla (s32, s8, s8); /* { dg-error {passing 'svint8_t' to argument 2 of 'svusmmla', which expects a vector of unsigned integers} } */ +- svusmmla (s32, u8, 0); /* { dg-error {passing 'int' to argument 3 of 'svusmmla', which expects an SVE vector type} } */ ++ svusmmla (s32, u8, 0); /* { dg-error {passing 'int' to argument 3 of 'svusmmla', which expects an SVE type rather than a scalar} } */ + svusmmla (s32, u8, s8); + svusmmla (s32, u32, u32); /* { dg-error {passing 'svuint32_t' instead of the expected 'svuint8_t' to argument 2 of 'svusmmla', after passing 'svint32_t' to argument 1} } */ + } +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_uintq_intq_lane_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_uintq_intq_lane_1.c +index b40cfe9e8..0cc5c7497 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_uintq_intq_lane_1.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_uintq_intq_lane_1.c +@@ -10,14 +10,14 @@ f1 (svbool_t pg, svint8_t s8, svuint8_t u8, svint16_t s16, svuint16_t u16, + { + svusdot_lane (s32, u8, s8); /* { dg-error {too few arguments to function 'svusdot_lane'} } */ + svusdot_lane (s32, u8, s8, 0, 0); /* { dg-error {too many arguments to function 'svusdot_lane'} } */ +- svusdot_lane (0, u8, s8, 0); /* { dg-error {passing 'int' to argument 1 of 'svusdot_lane', which expects an SVE vector type} } */ ++ svusdot_lane (0, u8, s8, 0); /* { dg-error {passing 'int' to argument 1 of 'svusdot_lane', which expects an SVE type rather than a scalar} } */ + svusdot_lane (pg, u8, s8, 0); /* { dg-error {'svusdot_lane' has no form that takes 'svbool_t' arguments} } */ + svusdot_lane (u8, u8, s8, 0); /* { dg-error {'svusdot_lane' has no form that takes 'svuint8_t' arguments} } */ + svusdot_lane (f32, u8, s8, 0); /* { dg-error {'svusdot_lane' has no form that takes 'svfloat32_t' arguments} } */ + svusdot_lane (u32, u8, s8, 0); /* { dg-error {'svusdot_lane' has no form that takes 'svuint32_t' arguments} } */ + svusdot_lane (s32, u8, s8, 0); +- svusdot_lane (s32, 0, s8, 0); /* { dg-error {passing 'int' to argument 2 of 'svusdot_lane', which expects an SVE vector type} } */ +- svusdot_lane (s32, u8, 0, 0); /* { dg-error {passing 'int' to argument 3 of 'svusdot_lane', which expects an SVE vector type} } */ ++ svusdot_lane (s32, 0, s8, 0); /* { dg-error {passing 'int' to argument 2 of 'svusdot_lane', which expects an SVE type rather than a scalar} } */ ++ svusdot_lane (s32, u8, 0, 0); /* { dg-error {passing 'int' to argument 3 of 'svusdot_lane', which expects an SVE type rather than a scalar} } */ + + svusdot_lane (s32, u8, s8, 0); + svusdot_lane (s32, s8, s8, 0); /* { dg-error {passing 'svint8_t' to argument 2 of 'svusdot_lane', which expects a vector of unsigned integers} } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_uintq_intq_opt_n_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_uintq_intq_opt_n_1.c +index 896b80390..f6585ae77 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_uintq_intq_opt_n_1.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_uintq_intq_opt_n_1.c +@@ -23,12 +23,12 @@ f2 (svbool_t pg, svint8_t s8, svuint8_t u8, svuint32_t u32, + { + svusdot (s32, u8); /* { dg-error {too few arguments to function 'svusdot'} } */ + svusdot (s32, u8, s8, u8); /* { dg-error {too many arguments to function 'svusdot'} } */ +- svusdot (0, u8, s8); /* { dg-error {passing 'int' to argument 1 of 'svusdot', which expects an SVE vector type} } */ ++ svusdot (0, u8, s8); /* { dg-error {passing 'int' to argument 1 of 'svusdot', which expects an SVE type rather than a scalar} } */ + svusdot (pg, u8, s8); /* { dg-error {'svusdot' has no form that takes 'svbool_t' arguments} } */ + svusdot (u8, u8, s8); /* { dg-error {'svusdot' has no form that takes 'svuint8_t' arguments} } */ + svusdot (f32, u8, s8); /* { dg-error {'svusdot' has no form that takes 'svfloat32_t' arguments} } */ + svusdot (s32, u8, s8); +- svusdot (s32, 0, s8); /* { dg-error {passing 'int' to argument 2 of 'svusdot', which expects an SVE vector type} } */ ++ svusdot (s32, 0, s8); /* { dg-error {passing 'int' to argument 2 of 'svusdot', which expects an SVE type rather than a scalar} } */ + svusdot (s32, u8, u8); /* { dg-error {passing 'svuint8_t' to argument 3 of 'svusdot', which expects a vector of signed integers} } */ + svusdot (s32, s8, s8); /* { dg-error {passing 'svint8_t' to argument 2 of 'svusdot', which expects a vector of unsigned integers} } */ + svusdot (s32, u8, 0); +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/tmad_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/tmad_1.c +index 8b98fc24d..c2eda93e3 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/tmad_1.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/tmad_1.c +@@ -9,8 +9,8 @@ f1 (svbool_t pg, svfloat32_t f32, svfloat64_t f64, svint32_t s32, int i) + svtmad (f32, f32, 0, 0); /* { dg-error {too many arguments to function 'svtmad'} } */ + svtmad (pg, pg, 0); /* { dg-error {'svtmad' has no form that takes 'svbool_t' arguments} } */ + svtmad (s32, s32, 0); /* { dg-error {'svtmad' has no form that takes 'svint32_t' arguments} } */ +- svtmad (1, f32, 0); /* { dg-error {passing 'int' to argument 1 of 'svtmad', which expects an SVE vector type} } */ +- svtmad (f32, 1, 0); /* { dg-error {passing 'int' to argument 2 of 'svtmad', which expects an SVE vector type} } */ ++ svtmad (1, f32, 0); /* { dg-error {passing 'int' to argument 1 of 'svtmad', which expects an SVE type rather than a scalar} } */ ++ svtmad (f32, 1, 0); /* { dg-error {passing 'int' to argument 2 of 'svtmad', which expects an SVE type rather than a scalar} } */ + svtmad (f32, f64, 0); /* { dg-error {passing 'svfloat64_t' to argument 2 of 'svtmad', but previous arguments had type 'svfloat32_t'} } */ + svtmad (f32, f32, s32); /* { dg-error {argument 3 of 'svtmad' must be an integer constant expression} } */ + svtmad (f32, f32, i); /* { dg-error {argument 3 of 'svtmad' must be an integer constant expression} } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/unary_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/unary_1.c +index eef85a01d..8c865a0e6 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/unary_1.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/unary_1.c +@@ -7,7 +7,7 @@ f1 (svbool_t pg, svint32_t s32, svuint32_t u32, svfloat32_t f32) + { + svabs_m (s32, pg); /* { dg-error {too few arguments to function 'svabs_m'} } */ + svabs_m (s32, pg, s32, s32); /* { dg-error {too many arguments to function 'svabs_m'} } */ +- svabs_m (0, pg, s32); /* { dg-error {passing 'int' to argument 1 of 'svabs_m', which expects an SVE vector type} } */ ++ svabs_m (0, pg, s32); /* { dg-error {passing 'int' to argument 1 of 'svabs_m', which expects an SVE type rather than a scalar} } */ + svabs_m (s32, s32, s32); /* { dg-error {passing 'svint32_t' to argument 2 of 'svabs_m', which expects 'svbool_t'} } */ + svabs_m (s32, 0, s32); /* { dg-error {passing 'int' to argument 2 of 'svabs_m', which expects 'svbool_t'} } */ + svabs_m (s32, pg, s32); +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/unary_2.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/unary_2.c +index e94673a66..bf93e21a4 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/unary_2.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/unary_2.c +@@ -9,7 +9,7 @@ f1 (svbool_t pg, svint8_t s8, svuint8_t u8) + svabs_x (pg, s8, s8); /* { dg-error {too many arguments to function 'svabs_x'} } */ + svabs_x (s8, s8); /* { dg-error {passing 'svint8_t' to argument 1 of 'svabs_x', which expects 'svbool_t'} } */ + svabs_x (pg, pg); /* { dg-error {'svabs_x' has no form that takes 'svbool_t' arguments} } */ +- svabs_x (pg, 1); /* { dg-error {passing 'int' to argument 2 of 'svabs_x', which expects an SVE vector type} } */ ++ svabs_x (pg, 1); /* { dg-error {passing 'int' to argument 2 of 'svabs_x', which expects an SVE type rather than a scalar} } */ + svabs_x (pg, s8); + svabs_x (pg, u8); /* { dg-error {'svabs_x' has no form that takes 'svuint8_t' arguments} } */ + } +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/unary_convert_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/unary_convert_1.c +index caa4e623d..f59ad590b 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/unary_convert_1.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/unary_convert_1.c +@@ -9,7 +9,7 @@ test (svbool_t pg, svint8_t s8, svuint8_t u8, + svcvt_f64_x (pg); /* { dg-error {too few arguments to function 'svcvt_f64_x'} } */ + svcvt_f64_x (pg, s32, 0); /* { dg-error {too many arguments to function 'svcvt_f64_x'} } */ + svcvt_f64_x (s32, s32); /* { dg-error {passing 'svint32_t' to argument 1 of 'svcvt_f64_x', which expects 'svbool_t'} } */ +- svcvt_f64_x (pg, 0); /* { dg-error {passing 'int' to argument 2 of 'svcvt_f64_x', which expects an SVE vector type} } */ ++ svcvt_f64_x (pg, 0); /* { dg-error {passing 'int' to argument 2 of 'svcvt_f64_x', which expects an SVE type rather than a scalar} } */ + + svcvt_f64_x (pg, s8); /* { dg-error {'svcvt_f64_x' has no form that takes 'svint8_t' arguments} } */ + svcvt_f64_x (pg, s16); /* { dg-error {'svcvt_f64_x' has no form that takes 'svint16_t' arguments} } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/unary_convert_2.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/unary_convert_2.c +index ddbd93b69..2649fd694 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/unary_convert_2.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/unary_convert_2.c +@@ -12,7 +12,7 @@ test (svbool_t pg, svint8_t s8, svuint8_t u8, + svcvt_f64_m (0, pg, s32); /* { dg-error {passing 'int' to argument 1 of 'svcvt_f64_m', which expects 'svfloat64_t'} } */ + svcvt_f64_m (pg, pg, s32); /* { dg-error {passing 'svbool_t' to argument 1 of 'svcvt_f64_m', which expects 'svfloat64_t'} } */ + svcvt_f64_m (f64, s32, s32); /* { dg-error {passing 'svint32_t' to argument 2 of 'svcvt_f64_m', which expects 'svbool_t'} } */ +- svcvt_f64_m (f64, pg, 0); /* { dg-error {passing 'int' to argument 3 of 'svcvt_f64_m', which expects an SVE vector type} } */ ++ svcvt_f64_m (f64, pg, 0); /* { dg-error {passing 'int' to argument 3 of 'svcvt_f64_m', which expects an SVE type rather than a scalar} } */ + + svcvt_f64_m (f64, pg, s8); /* { dg-error {'svcvt_f64_m' has no form that takes 'svint8_t' arguments} } */ + svcvt_f64_m (f64, pg, s16); /* { dg-error {'svcvt_f64_m' has no form that takes 'svint16_t' arguments} } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/unary_convert_narrowt_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/unary_convert_narrowt_1.c +index 92c07b8c1..a5d56dec0 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/unary_convert_narrowt_1.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/unary_convert_narrowt_1.c +@@ -14,7 +14,7 @@ test (svbool_t pg, svint8_t s8, svuint8_t u8, + svcvtnt_f32_m (0, pg, f64); /* { dg-error {passing 'int' to argument 1 of 'svcvtnt_f32_m', which expects 'svfloat32_t'} } */ + svcvtnt_f32_m (pg, pg, f64); /* { dg-error {passing 'svbool_t' to argument 1 of 'svcvtnt_f32_m', which expects 'svfloat32_t'} } */ + svcvtnt_f32_m (f32, s32, f64); /* { dg-error {passing 'svint32_t' to argument 2 of 'svcvtnt_f32_m', which expects 'svbool_t'} } */ +- svcvtnt_f32_m (f32, pg, 0); /* { dg-error {passing 'int' to argument 3 of 'svcvtnt_f32_m', which expects an SVE vector type} } */ ++ svcvtnt_f32_m (f32, pg, 0); /* { dg-error {passing 'int' to argument 3 of 'svcvtnt_f32_m', which expects an SVE type rather than a scalar} } */ + + svcvtnt_f32_m (f32, pg, s8); /* { dg-error {'svcvtnt_f32_m' has no form that takes 'svint8_t' arguments} } */ + svcvtnt_f32_m (f32, pg, s16); /* { dg-error {'svcvtnt_f32_m' has no form that takes 'svint16_t' arguments} } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/unary_narrowb_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/unary_narrowb_1.c +index c03d644ed..c2465e3e2 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/unary_narrowb_1.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/unary_narrowb_1.c +@@ -23,5 +23,5 @@ f1 (svbool_t pg, svint8_t s8, svuint8_t u8, + svqxtnb (u64); + svqxtnb (s64); + svqxtnb (f32); /* { dg-error {'svqxtnb' has no form that takes 'svfloat32_t' arguments} } */ +- svqxtnb (1); /* { dg-error {passing 'int' to argument 1 of 'svqxtnb', which expects an SVE vector type} } */ ++ svqxtnb (1); /* { dg-error {passing 'int' to argument 1 of 'svqxtnb', which expects an SVE type rather than a scalar} } */ + } +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/unary_narrowb_to_uint_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/unary_narrowb_to_uint_1.c +index c3e210380..60051f80c 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/unary_narrowb_to_uint_1.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/unary_narrowb_to_uint_1.c +@@ -23,5 +23,5 @@ f1 (svbool_t pg, svint8_t s8, svuint8_t u8, + svqxtunb (u64); /* { dg-error {'svqxtunb' has no form that takes 'svuint64_t' arguments} } */ + svqxtunb (s64); + svqxtunb (f32); /* { dg-error {'svqxtunb' has no form that takes 'svfloat32_t' arguments} } */ +- svqxtunb (1); /* { dg-error {passing 'int' to argument 1 of 'svqxtunb', which expects an SVE vector type} } */ ++ svqxtunb (1); /* { dg-error {passing 'int' to argument 1 of 'svqxtunb', which expects an SVE type rather than a scalar} } */ + } +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/unary_narrowt_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/unary_narrowt_1.c +index 4ed179cb3..a0612dcb7 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/unary_narrowt_1.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/unary_narrowt_1.c +@@ -26,6 +26,6 @@ f1 (svbool_t pg, svint8_t s8, svuint8_t u8, + svqxtnt (u32, u64); + svqxtnt (s32, s64); + svqxtnt (f16, f32); /* { dg-error {'svqxtnt' has no form that takes 'svfloat32_t' arguments} } */ +- svqxtnt (1, u16); /* { dg-error {passing 'int' to argument 1 of 'svqxtnt', which expects an SVE vector type} } */ +- svqxtnt (u8, 1); /* { dg-error {passing 'int' to argument 2 of 'svqxtnt', which expects an SVE vector type} } */ ++ svqxtnt (1, u16); /* { dg-error {passing 'int' to argument 1 of 'svqxtnt', which expects an SVE type rather than a scalar} } */ ++ svqxtnt (u8, 1); /* { dg-error {passing 'int' to argument 2 of 'svqxtnt', which expects an SVE type rather than a scalar} } */ + } +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/unary_narrowt_to_uint_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/unary_narrowt_to_uint_1.c +index acaa546ee..8e5fa5b3d 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/unary_narrowt_to_uint_1.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/unary_narrowt_to_uint_1.c +@@ -26,6 +26,6 @@ f1 (svbool_t pg, svint8_t s8, svuint8_t u8, + svqxtunt (u32, u64); /* { dg-error {'svqxtunt' has no form that takes 'svuint64_t' arguments} } */ + svqxtunt (u32, s64); + svqxtunt (u16, f32); /* { dg-error {'svqxtunt' has no form that takes 'svfloat32_t' arguments} } */ +- svqxtunt (1, u16); /* { dg-error {passing 'int' to argument 1 of 'svqxtunt', which expects an SVE vector type} } */ +- svqxtunt (u8, 1); /* { dg-error {passing 'int' to argument 2 of 'svqxtunt', which expects an SVE vector type} } */ ++ svqxtunt (1, u16); /* { dg-error {passing 'int' to argument 1 of 'svqxtunt', which expects an SVE type rather than a scalar} } */ ++ svqxtunt (u8, 1); /* { dg-error {passing 'int' to argument 2 of 'svqxtunt', which expects an SVE type rather than a scalar} } */ + } +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/unary_to_int_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/unary_to_int_1.c +index 517d11ff0..e2e172d2d 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/unary_to_int_1.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/unary_to_int_1.c +@@ -10,7 +10,7 @@ f1 (svbool_t pg, svint32_t s32, svuint32_t u32, svfloat32_t f32, + { + svlogb_m (s32, pg); /* { dg-error {too few arguments to function 'svlogb_m'} } */ + svlogb_m (s32, pg, f32, s32); /* { dg-error {too many arguments to function 'svlogb_m'} } */ +- svlogb_m (0, pg, f32); /* { dg-error {passing 'int' to argument 1 of 'svlogb_m', which expects an SVE vector type} } */ ++ svlogb_m (0, pg, f32); /* { dg-error {passing 'int' to argument 1 of 'svlogb_m', which expects an SVE type rather than a scalar} } */ + svlogb_m (s32, u32, f32); /* { dg-error {passing 'svuint32_t' to argument 2 of 'svlogb_m', which expects 'svbool_t'} } */ + svlogb_m (s32, 0, f32); /* { dg-error {passing 'int' to argument 2 of 'svlogb_m', which expects 'svbool_t'} } */ + svlogb_m (s32, pg, s32); /* { dg-error {'svlogb_m' has no form that takes 'svint32_t' arguments} } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/unary_to_uint_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/unary_to_uint_1.c +index 888b52513..b3cf0b9f5 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/unary_to_uint_1.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/unary_to_uint_1.c +@@ -8,7 +8,7 @@ f1 (svbool_t pg, svint32_t s32, svuint32_t u32, svfloat32_t f32, + { + svclz_m (u32, pg); /* { dg-error {too few arguments to function 'svclz_m'} } */ + svclz_m (u32, pg, s32, s32); /* { dg-error {too many arguments to function 'svclz_m'} } */ +- svclz_m (0, pg, f32); /* { dg-error {passing 'int' to argument 1 of 'svclz_m', which expects an SVE vector type} } */ ++ svclz_m (0, pg, f32); /* { dg-error {passing 'int' to argument 1 of 'svclz_m', which expects an SVE type rather than a scalar} } */ + svclz_m (u32, u32, f32); /* { dg-error {passing 'svuint32_t' to argument 2 of 'svclz_m', which expects 'svbool_t'} } */ + svclz_m (u32, 0, f32); /* { dg-error {passing 'int' to argument 2 of 'svclz_m', which expects 'svbool_t'} } */ + svclz_m (u32, pg, s32); +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/unary_to_uint_2.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/unary_to_uint_2.c +index 233e847e9..da02d12fb 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/unary_to_uint_2.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/unary_to_uint_2.c +@@ -9,7 +9,7 @@ f1 (svbool_t pg, svint32_t s32, svuint32_t u32, svfloat32_t f32, + { + svclz_m (u32, pg); /* { dg-error {too few arguments to function 'svclz_m'} } */ + svclz_m (u32, pg, s32, s32); /* { dg-error {too many arguments to function 'svclz_m'} } */ +- svclz_m (0, pg, f32); /* { dg-error {passing 'int' to argument 1 of 'svclz_m', which expects an SVE vector type} } */ ++ svclz_m (0, pg, f32); /* { dg-error {passing 'int' to argument 1 of 'svclz_m', which expects an SVE type rather than a scalar} } */ + svclz_m (u32, u32, f32); /* { dg-error {passing 'svuint32_t' to argument 2 of 'svclz_m', which expects 'svbool_t'} } */ + svclz_m (u32, 0, f32); /* { dg-error {passing 'int' to argument 2 of 'svclz_m', which expects 'svbool_t'} } */ + svclz_m (u32, pg, s32); +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/unary_to_uint_3.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/unary_to_uint_3.c +index da57b07ea..858a2a5e0 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/unary_to_uint_3.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/unary_to_uint_3.c +@@ -9,6 +9,6 @@ f1 (svbool_t pg, svuint8_t u8) + svcnt_x (pg, u8, u8); /* { dg-error {too many arguments to function 'svcnt_x'} } */ + svcnt_x (u8, u8); /* { dg-error {passing 'svuint8_t' to argument 1 of 'svcnt_x', which expects 'svbool_t'} } */ + svcnt_x (pg, pg); /* { dg-error {'svcnt_x' has no form that takes 'svbool_t' arguments} } */ +- svcnt_x (pg, 1); /* { dg-error {passing 'int' to argument 2 of 'svcnt_x', which expects an SVE vector type} } */ ++ svcnt_x (pg, 1); /* { dg-error {passing 'int' to argument 2 of 'svcnt_x', which expects an SVE type rather than a scalar} } */ + svcnt_x (pg, u8); + } +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/unary_uint_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/unary_uint_1.c +index 9c8acdf2d..e3275a8ce 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/unary_uint_1.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/unary_uint_1.c +@@ -8,7 +8,7 @@ f1 (svbool_t pg, svint8_t s8, svuint8_t u8, + { + svexpa (); /* { dg-error {too few arguments to function 'svexpa'} } */ + svexpa (u16, u16); /* { dg-error {too many arguments to function 'svexpa'} } */ +- svexpa (1); /* { dg-error {passing 'int' to argument 1 of 'svexpa', which expects an SVE vector type} } */ ++ svexpa (1); /* { dg-error {passing 'int' to argument 1 of 'svexpa', which expects an SVE type rather than a scalar} } */ + svexpa (pg); /* { dg-error {passing 'svbool_t' to argument 1 of 'svexpa', which expects a vector of unsigned integers} } */ + svexpa (s8); /* { dg-error {passing 'svint8_t' to argument 1 of 'svexpa', which expects a vector of unsigned integers} } */ + svexpa (s16); /* { dg-error {passing 'svint16_t' to argument 1 of 'svexpa', which expects a vector of unsigned integers} } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/unary_widen_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/unary_widen_1.c +index 95a97a72e..a194bd6ab 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/unary_widen_1.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/unary_widen_1.c +@@ -8,8 +8,8 @@ test (svbool_t pg, svint8_t s8, svuint8_t u8, + { + svunpklo (); /* { dg-error {too few arguments to function 'svunpklo'} } */ + svunpklo (pg, s8); /* { dg-error {too many arguments to function 'svunpklo'} } */ +- svunpklo (i); /* { dg-error {passing 'int' to argument 1 of 'svunpklo', which expects an SVE vector type} } */ +- svunpklo (f); /* { dg-error {passing 'float' to argument 1 of 'svunpklo', which expects an SVE vector type} } */ ++ svunpklo (i); /* { dg-error {passing 'int' to argument 1 of 'svunpklo', which expects an SVE type rather than a scalar} } */ ++ svunpklo (f); /* { dg-error {passing 'float' to argument 1 of 'svunpklo', which expects an SVE type rather than a scalar} } */ + svunpklo (pg); + svunpklo (s8); + svunpklo (s16); +-- +2.33.0 +
View file
_service:tar_scm:0166-LoongArch-Add-constraints-for-bit-string-operation-d.patch
Added
@@ -0,0 +1,120 @@ +From 3bb46830b0f92f54d1ef529796348c0a86504065 Mon Sep 17 00:00:00 2001 +From: Xi Ruoyao <xry111@xry111.site> +Date: Fri, 26 Apr 2024 15:59:11 +0800 +Subject: PATCH 166/188 LoongArch: Add constraints for bit string operation + define_insn_and_split's PR114861 + +Without the constrants, the compiler attempts to use a stack slot as the +target, causing an ICE building the kernel with -Os: + + drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c:3144:1: + error: could not split insn + (insn:TI 1764 67 1745 + (set (mem/c:DI (reg/f:DI 3 $r3) 707 %sfp+-80 S8 A64) + (and:DI (reg/v:DI 28 $r28 orig:422 raster_config 422) + (const_int -50331649 0xfffffffffcffffff))) + "drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c":1386:21 111 + {*bstrins_di_for_mask} + (nil)) + +Add these constrants to fix the issue. + +gcc/ChangeLog: + + PR target/114861 + * config/loongarch/loongarch.md (bstrins_<mode>_for_mask): Add + constraints for operands. + (bstrins_<mode>_for_ior_mask): Likewise. + +gcc/testsuite/ChangeLog: + + PR target/114861 + * gcc.target/loongarch/pr114861.c: New test. +--- + gcc/config/loongarch/loongarch.md | 16 ++++---- + gcc/testsuite/gcc.target/loongarch/pr114861.c | 39 +++++++++++++++++++ + 2 files changed, 47 insertions(+), 8 deletions(-) + create mode 100644 gcc/testsuite/gcc.target/loongarch/pr114861.c + +diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md +index 95beb88fe..20494ce8a 100644 +--- a/gcc/config/loongarch/loongarch.md ++++ b/gcc/config/loongarch/loongarch.md +@@ -1543,9 +1543,9 @@ + (set_attr "mode" "<MODE>")) + + (define_insn_and_split "*bstrins_<mode>_for_mask" +- (set (match_operand:GPR 0 "register_operand") +- (and:GPR (match_operand:GPR 1 "register_operand") +- (match_operand:GPR 2 "ins_zero_bitmask_operand"))) ++ (set (match_operand:GPR 0 "register_operand" "=r") ++ (and:GPR (match_operand:GPR 1 "register_operand" "r") ++ (match_operand:GPR 2 "ins_zero_bitmask_operand" "i"))) + "" + "#" + "" +@@ -1563,11 +1563,11 @@ + }) + + (define_insn_and_split "*bstrins_<mode>_for_ior_mask" +- (set (match_operand:GPR 0 "register_operand") +- (ior:GPR (and:GPR (match_operand:GPR 1 "register_operand") +- (match_operand:GPR 2 "const_int_operand")) +- (and:GPR (match_operand:GPR 3 "register_operand") +- (match_operand:GPR 4 "const_int_operand")))) ++ (set (match_operand:GPR 0 "register_operand" "=r") ++ (ior:GPR (and:GPR (match_operand:GPR 1 "register_operand" "r") ++ (match_operand:GPR 2 "const_int_operand" "i")) ++ (and:GPR (match_operand:GPR 3 "register_operand" "r") ++ (match_operand:GPR 4 "const_int_operand" "i")))) + "loongarch_pre_reload_split () + && loongarch_use_bstrins_for_ior_with_mask (<MODE>mode, operands)" + "#" +diff --git a/gcc/testsuite/gcc.target/loongarch/pr114861.c b/gcc/testsuite/gcc.target/loongarch/pr114861.c +new file mode 100644 +index 000000000..e6507c406 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/pr114861.c +@@ -0,0 +1,39 @@ ++/* PR114861: ICE building the kernel with -Os ++ Reduced from linux/fs/ntfs3/attrib.c at revision c942a0cd3603. */ ++/* { dg-do compile } */ ++/* { dg-options "-Os -march=loongarch64 -msoft-float -mabi=lp64s" } */ ++ ++long evcn, attr_collapse_range_vbo, attr_collapse_range_bytes; ++unsigned short flags; ++int attr_collapse_range_ni_0_0; ++int *attr_collapse_range_mi; ++unsigned attr_collapse_range_svcn, attr_collapse_range_vcn1; ++void ni_insert_nonresident (unsigned, unsigned short, int **); ++int mi_pack_runs (int); ++int ++attr_collapse_range (void) ++{ ++ _Bool __trans_tmp_1; ++ int run = attr_collapse_range_ni_0_0; ++ unsigned evcn1, vcn, end; ++ short a_flags = flags; ++ __trans_tmp_1 = flags & (32768 | 1); ++ if (__trans_tmp_1) ++ return 2; ++ vcn = attr_collapse_range_vbo; ++ end = attr_collapse_range_bytes; ++ evcn1 = evcn; ++ for (;;) ++ if (attr_collapse_range_svcn >= end) ++ { ++ unsigned eat, next_svcn = mi_pack_runs (42); ++ attr_collapse_range_vcn1 = (vcn ? vcn : attr_collapse_range_svcn); ++ eat = (0 < end) - attr_collapse_range_vcn1; ++ mi_pack_runs (run - eat); ++ if (next_svcn + eat) ++ ni_insert_nonresident (evcn1 - eat - next_svcn, a_flags, ++ &attr_collapse_range_mi); ++ } ++ else ++ return 42; ++} +-- +2.43.0 +
View file
_service:tar_scm:0167-Backport-SME-aarch64-Replace-vague-previous-argument.patch
Added
@@ -0,0 +1,698 @@ +From 6a7cb5074824416ae562de0589550a930e9dbcaf Mon Sep 17 00:00:00 2001 +From: Richard Sandiford <richard.sandiford@arm.com> +Date: Tue, 5 Dec 2023 10:11:21 +0000 +Subject: PATCH 068/157 BackportSME aarch64: Replace vague "previous + arguments" message + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=1b52d4b66e8b91ec1e3de9c0b79aaf258824b875 + +If an SVE ACLE intrinsic requires two arguments to have the +same type, the C resolver would report mismatches as "argument N +has type T2, but previous arguments had type T1". This patch makes +the message say which argument had type T1. + +This is needed to give decent error messages for some SME cases. + +gcc/ + * config/aarch64/aarch64-sve-builtins.h + (function_resolver::require_matching_vector_type): Add a parameter + that specifies the number of the earlier argument that is being + matched against. + * config/aarch64/aarch64-sve-builtins.cc + (function_resolver::require_matching_vector_type): Likewise. + (require_derived_vector_type): Update calls accordingly. + (function_resolver::resolve_unary): Likewise. + (function_resolver::resolve_uniform): Likewise. + (function_resolver::resolve_uniform_opt_n): Likewise. + * config/aarch64/aarch64-sve-builtins-shapes.cc + (binary_long_lane_def::resolve): Likewise. + (clast_def::resolve, ternary_uint_def::resolve): Likewise. + +gcc/testsuite/ + * gcc.target/aarch64/sve/acle/general-c/*: Replace "but previous + arguments had" with "but argument N had". +--- + .../aarch64/aarch64-sve-builtins-shapes.cc | 6 ++-- + gcc/config/aarch64/aarch64-sve-builtins.cc | 17 +++++------ + gcc/config/aarch64/aarch64-sve-builtins.h | 3 +- + .../aarch64/sve/acle/general-c/binary_1.c | 6 ++-- + .../sve/acle/general-c/binary_lane_1.c | 2 +- + .../sve/acle/general-c/binary_long_lane_1.c | 2 +- + .../sve/acle/general-c/binary_long_opt_n_1.c | 8 +++--- + .../acle/general-c/binary_narrowb_opt_n_1.c | 8 +++--- + .../acle/general-c/binary_narrowt_opt_n_1.c | 8 +++--- + .../sve/acle/general-c/binary_opt_n_2.c | 14 +++++----- + .../sve/acle/general-c/binary_opt_n_3.c | 16 +++++------ + .../sve/acle/general-c/binary_rotate_1.c | 2 +- + .../sve/acle/general-c/binary_to_uint_1.c | 4 +-- + .../aarch64/sve/acle/general-c/clast_1.c | 2 +- + .../aarch64/sve/acle/general-c/compare_1.c | 14 +++++----- + .../sve/acle/general-c/compare_opt_n_1.c | 14 +++++----- + .../aarch64/sve/acle/general-c/create_1.c | 6 ++-- + .../aarch64/sve/acle/general-c/create_3.c | 6 ++-- + .../aarch64/sve/acle/general-c/create_5.c | 6 ++-- + .../aarch64/sve/acle/general-c/mmla_1.c | 14 +++++----- + .../sve/acle/general-c/ternary_lane_1.c | 4 +-- + .../acle/general-c/ternary_lane_rotate_1.c | 4 +-- + .../sve/acle/general-c/ternary_opt_n_1.c | 28 +++++++++---------- + .../sve/acle/general-c/ternary_rotate_1.c | 4 +-- + .../general-c/ternary_shift_right_imm_1.c | 6 ++-- + .../sve/acle/general-c/ternary_uint_1.c | 6 ++-- + .../aarch64/sve/acle/general-c/tmad_1.c | 2 +- + .../aarch64/sve/acle/general-c/unary_1.c | 8 +++--- + .../aarch64/sve/acle/general-c/undeclared_2.c | 2 +- + 29 files changed, 112 insertions(+), 110 deletions(-) + +diff --git a/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc b/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc +index 3ecef026c..40aa418e0 100644 +--- a/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc ++++ b/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc +@@ -1153,7 +1153,7 @@ struct binary_long_lane_def : public overloaded_base<0> + type_suffix_index type, result_type; + if (!r.check_gp_argument (3, i, nargs) + || (type = r.infer_vector_type (i)) == NUM_TYPE_SUFFIXES +- || !r.require_matching_vector_type (i + 1, type) ++ || !r.require_matching_vector_type (i + 1, i, type) + || !r.require_integer_immediate (i + 2) + || (result_type = long_type_suffix (r, type)) == NUM_TYPE_SUFFIXES) + return error_mark_node; +@@ -1608,7 +1608,7 @@ struct clast_def : public overloaded_base<0> + { + type_suffix_index type; + if ((type = r.infer_vector_type (i)) == NUM_TYPE_SUFFIXES +- || !r.require_matching_vector_type (i + 1, type)) ++ || !r.require_matching_vector_type (i + 1, i, type)) + return error_mark_node; + return r.resolve_to (MODE_none, type); + } +@@ -3108,7 +3108,7 @@ struct ternary_uint_def : public overloaded_base<0> + type_suffix_index type; + if (!r.check_gp_argument (3, i, nargs) + || (type = r.infer_vector_type (i)) == NUM_TYPE_SUFFIXES +- || !r.require_matching_vector_type (i + 1, type) ++ || !r.require_matching_vector_type (i + 1, i, type) + || !r.require_derived_vector_type (i + 2, i, type, TYPE_unsigned)) + return error_mark_node; + +diff --git a/gcc/config/aarch64/aarch64-sve-builtins.cc b/gcc/config/aarch64/aarch64-sve-builtins.cc +index 4e94e3633..1545fd78d 100644 +--- a/gcc/config/aarch64/aarch64-sve-builtins.cc ++++ b/gcc/config/aarch64/aarch64-sve-builtins.cc +@@ -1561,11 +1561,12 @@ function_resolver::require_vector_type (unsigned int argno, + return true; + } + +-/* Like require_vector_type, but TYPE is inferred from previous arguments ++/* Like require_vector_type, but TYPE is inferred from argument FIRST_ARGNO + rather than being a fixed part of the function signature. This changes + the nature of the error messages. */ + bool + function_resolver::require_matching_vector_type (unsigned int argno, ++ unsigned int first_argno, + type_suffix_index type) + { + type_suffix_index new_type = infer_vector_type (argno); +@@ -1575,9 +1576,9 @@ function_resolver::require_matching_vector_type (unsigned int argno, + if (type != new_type) + { + error_at (location, "passing %qT to argument %d of %qE, but" +- " previous arguments had type %qT", ++ " argument %d had type %qT", + get_vector_type (new_type), argno + 1, fndecl, +- get_vector_type (type)); ++ first_argno + 1, get_vector_type (type)); + return false; + } + return true; +@@ -1626,7 +1627,7 @@ require_derived_vector_type (unsigned int argno, + { + /* There's no need to resolve this case out of order. */ + gcc_assert (argno > first_argno); +- return require_matching_vector_type (argno, first_type); ++ return require_matching_vector_type (argno, first_argno, first_type); + } + + /* Use FIRST_TYPE to get the expected type class and element size. */ +@@ -2314,7 +2315,7 @@ function_resolver::resolve_unary (type_class_index merge_tclass, + so we can use normal left-to-right resolution. */ + if ((type = infer_vector_type (0)) == NUM_TYPE_SUFFIXES + || !require_vector_type (1, VECTOR_TYPE_svbool_t) +- || !require_matching_vector_type (2, type)) ++ || !require_matching_vector_type (2, 0, type)) + return error_mark_node; + } + else +@@ -2359,9 +2360,9 @@ function_resolver::resolve_uniform (unsigned int nops, unsigned int nimm) + || (type = infer_vector_type (i)) == NUM_TYPE_SUFFIXES) + return error_mark_node; + +- i += 1; ++ unsigned int first_arg = i++; + for (; i < nargs - nimm; ++i) +- if (!require_matching_vector_type (i, type)) ++ if (!require_matching_vector_type (i, first_arg, type)) + return error_mark_node; + + for (; i < nargs; ++i) +@@ -2390,7 +2391,7 @@ function_resolver::resolve_uniform_opt_n (unsigned int nops) + + unsigned int first_arg = i++; + for (; i < nargs - 1; ++i) +- if (!require_matching_vector_type (i, type)) ++ if (!require_matching_vector_type (i, first_arg, type)) + return error_mark_node; + + return finish_opt_n_resolution (i, first_arg, type); +diff --git a/gcc/config/aarch64/aarch64-sve-builtins.h b/gcc/config/aarch64/aarch64-sve-builtins.h +index 5a4f35123..f7d6cc084 100644 +--- a/gcc/config/aarch64/aarch64-sve-builtins.h ++++ b/gcc/config/aarch64/aarch64-sve-builtins.h +@@ -476,7 +476,8 @@ public: + bool require_vector_or_scalar_type (unsigned int); + + bool require_vector_type (unsigned int, vector_type_index); +- bool require_matching_vector_type (unsigned int, type_suffix_index); ++ bool require_matching_vector_type (unsigned int, unsigned int, ++ type_suffix_index); + bool require_derived_vector_type (unsigned int, unsigned int, + type_suffix_index, + type_class_index = SAME_TYPE_CLASS, +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_1.c +index 4343146de..2e919d287 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_1.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_1.c +@@ -7,8 +7,8 @@ f1 (svbool_t pg, svuint8_t u8, svint16_t s16) + { + svzip1 (pg); /* { dg-error {too few arguments to function 'svzip1'} } */ + svzip1 (pg, u8, u8); /* { dg-error {too many arguments to function 'svzip1'} } */ +- svzip1 (pg, u8); /* { dg-error {passing 'svuint8_t' to argument 2 of 'svzip1', but previous arguments had type 'svbool_t'} } */ +- svzip1 (u8, pg); /* { dg-error {passing 'svbool_t' to argument 2 of 'svzip1', but previous arguments had type 'svuint8_t'} } */ +- svzip1 (u8, s16); /* { dg-error {passing 'svint16_t' to argument 2 of 'svzip1', but previous arguments had type 'svuint8_t'} } */ ++ svzip1 (pg, u8); /* { dg-error {passing 'svuint8_t' to argument 2 of 'svzip1', but argument 1 had type 'svbool_t'} } */ ++ svzip1 (u8, pg); /* { dg-error {passing 'svbool_t' to argument 2 of 'svzip1', but argument 1 had type 'svuint8_t'} } */ ++ svzip1 (u8, s16); /* { dg-error {passing 'svint16_t' to argument 2 of 'svzip1', but argument 1 had type 'svuint8_t'} } */ + svzip1 (u8, 0); /* { dg-error {passing 'int' to argument 2 of 'svzip1', which expects an SVE type rather than a scalar} } */ + } +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_lane_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_lane_1.c +index 10b6b7e81..81533b25d 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_lane_1.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_lane_1.c +@@ -12,7 +12,7 @@ f1 (svbool_t pg, svfloat16_t f16, svfloat32_t f32, svfloat64_t f64, + svmul_lane (s32, s32, 0); /* { dg-error {ACLE function 'svmul_lane_s32' requires ISA extension 'sve2'} "" { xfail aarch64_sve2 } } */ + svmul_lane (1, f32, 0); /* { dg-error {passing 'int' to argument 1 of 'svmul_lane', which expects an SVE type rather than a scalar} } */ + svmul_lane (f32, 1, 0); /* { dg-error {passing 'int' to argument 2 of 'svmul_lane', which expects an SVE type rather than a scalar} } */ +- svmul_lane (f32, f64, 0); /* { dg-error {passing 'svfloat64_t' to argument 2 of 'svmul_lane', but previous arguments had type 'svfloat32_t'} } */ ++ svmul_lane (f32, f64, 0); /* { dg-error {passing 'svfloat64_t' to argument 2 of 'svmul_lane', but argument 1 had type 'svfloat32_t'} } */ + svmul_lane (f32, f32, s32); /* { dg-error {argument 3 of 'svmul_lane' must be an integer constant expression} } */ + svmul_lane (f32, f32, i); /* { dg-error {argument 3 of 'svmul_lane' must be an integer constant expression} } */ + +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_long_lane_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_long_lane_1.c +index 805863f76..25b620877 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_long_lane_1.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_long_lane_1.c +@@ -21,7 +21,7 @@ f1 (svbool_t pg, svint8_t s8, svuint8_t u8, svint16_t s16, svuint16_t u16, + svmullb_lane (f64, f64, 0); /* { dg-error {'svmullb_lane' has no form that takes 'svfloat64_t' arguments} } */ + svmullb_lane (1, u32, 0); /* { dg-error {passing 'int' to argument 1 of 'svmullb_lane', which expects an SVE type rather than a scalar} } */ + svmullb_lane (u32, 1, 0); /* { dg-error {passing 'int' to argument 2 of 'svmullb_lane', which expects an SVE type rather than a scalar} } */ +- svmullb_lane (u32, s32, 0); /* { dg-error {passing 'svint32_t' to argument 2 of 'svmullb_lane', but previous arguments had type 'svuint32_t'} } */ ++ svmullb_lane (u32, s32, 0); /* { dg-error {passing 'svint32_t' to argument 2 of 'svmullb_lane', but argument 1 had type 'svuint32_t'} } */ + svmullb_lane (u32, u32, s32); /* { dg-error {argument 3 of 'svmullb_lane' must be an integer constant expression} } */ + svmullb_lane (u32, u32, i); /* { dg-error {argument 3 of 'svmullb_lane' must be an integer constant expression} } */ + +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_long_opt_n_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_long_opt_n_1.c +index ee704eeae..1f513dde9 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_long_opt_n_1.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_long_opt_n_1.c +@@ -24,10 +24,10 @@ f1 (svbool_t pg, svint8_t s8, svuint8_t u8, + svaddlb (s64, s64); /* { dg-error {'svaddlb' has no form that takes 'svint64_t' arguments} } */ + svaddlb (f16, f16); /* { dg-error {'svaddlb' has no form that takes 'svfloat16_t' arguments} } */ + svaddlb (1, u8); /* { dg-error {passing 'int' to argument 1 of 'svaddlb', which expects an SVE type rather than a scalar} } */ +- svaddlb (u8, s8); /* { dg-error {passing 'svint8_t' to argument 2 of 'svaddlb', but previous arguments had type 'svuint8_t'} } */ +- svaddlb (u8, s16); /* { dg-error {passing 'svint16_t' to argument 2 of 'svaddlb', but previous arguments had type 'svuint8_t'} } */ +- svaddlb (u8, u16); /* { dg-error {passing 'svuint16_t' to argument 2 of 'svaddlb', but previous arguments had type 'svuint8_t'} } */ +- svaddlb (u16, pg); /* { dg-error {passing 'svbool_t' to argument 2 of 'svaddlb', but previous arguments had type 'svuint16_t'} } */ ++ svaddlb (u8, s8); /* { dg-error {passing 'svint8_t' to argument 2 of 'svaddlb', but argument 1 had type 'svuint8_t'} } */ ++ svaddlb (u8, s16); /* { dg-error {passing 'svint16_t' to argument 2 of 'svaddlb', but argument 1 had type 'svuint8_t'} } */ ++ svaddlb (u8, u16); /* { dg-error {passing 'svuint16_t' to argument 2 of 'svaddlb', but argument 1 had type 'svuint8_t'} } */ ++ svaddlb (u16, pg); /* { dg-error {passing 'svbool_t' to argument 2 of 'svaddlb', but argument 1 had type 'svuint16_t'} } */ + svaddlb (u8, 0); + svaddlb (u16, 0); + svaddlb (u32, 0); +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_narrowb_opt_n_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_narrowb_opt_n_1.c +index 8ca549ba9..4a29b5c43 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_narrowb_opt_n_1.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_narrowb_opt_n_1.c +@@ -24,10 +24,10 @@ f1 (svbool_t pg, svint8_t s8, svuint8_t u8, + svaddhnb (s64, s64); + svaddhnb (f32, f32); /* { dg-error {'svaddhnb' has no form that takes 'svfloat32_t' arguments} } */ + svaddhnb (1, u16); /* { dg-error {passing 'int' to argument 1 of 'svaddhnb', which expects an SVE type rather than a scalar} } */ +- svaddhnb (u16, s8); /* { dg-error {passing 'svint8_t' to argument 2 of 'svaddhnb', but previous arguments had type 'svuint16_t'} } */ +- svaddhnb (u16, s16); /* { dg-error {passing 'svint16_t' to argument 2 of 'svaddhnb', but previous arguments had type 'svuint16_t'} } */ +- svaddhnb (u16, u32); /* { dg-error {passing 'svuint32_t' to argument 2 of 'svaddhnb', but previous arguments had type 'svuint16_t'} } */ +- svaddhnb (u16, pg); /* { dg-error {passing 'svbool_t' to argument 2 of 'svaddhnb', but previous arguments had type 'svuint16_t'} } */ ++ svaddhnb (u16, s8); /* { dg-error {passing 'svint8_t' to argument 2 of 'svaddhnb', but argument 1 had type 'svuint16_t'} } */ ++ svaddhnb (u16, s16); /* { dg-error {passing 'svint16_t' to argument 2 of 'svaddhnb', but argument 1 had type 'svuint16_t'} } */ ++ svaddhnb (u16, u32); /* { dg-error {passing 'svuint32_t' to argument 2 of 'svaddhnb', but argument 1 had type 'svuint16_t'} } */ ++ svaddhnb (u16, pg); /* { dg-error {passing 'svbool_t' to argument 2 of 'svaddhnb', but argument 1 had type 'svuint16_t'} } */ + svaddhnb (u8, 0); /* { dg-error {'svaddhnb' has no form that takes 'svuint8_t' arguments} } */ + svaddhnb (u16, 0); + svaddhnb (u32, 0); +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_narrowt_opt_n_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_narrowt_opt_n_1.c +index 2b537965b..4a442616e 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_narrowt_opt_n_1.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_narrowt_opt_n_1.c +@@ -28,10 +28,10 @@ f1 (svbool_t pg, svint8_t s8, svuint8_t u8, + svaddhnt (f16, f32, f32); /* { dg-error {'svaddhnt' has no form that takes 'svfloat32_t' arguments} } */ + svaddhnt (1, u16, u16); /* { dg-error {passing 'int' to argument 1 of 'svaddhnt', which expects an SVE type rather than a scalar} } */ + svaddhnt (u8, 1, u16); /* { dg-error {passing 'int' to argument 2 of 'svaddhnt', which expects an SVE type rather than a scalar} } */ +- svaddhnt (u8, u16, s8); /* { dg-error {passing 'svint8_t' to argument 3 of 'svaddhnt', but previous arguments had type 'svuint16_t'} } */ +- svaddhnt (u8, u16, s16); /* { dg-error {passing 'svint16_t' to argument 3 of 'svaddhnt', but previous arguments had type 'svuint16_t'} } */ +- svaddhnt (u8, u16, u32); /* { dg-error {passing 'svuint32_t' to argument 3 of 'svaddhnt', but previous arguments had type 'svuint16_t'} } */ +- svaddhnt (u8, u16, pg); /* { dg-error {passing 'svbool_t' to argument 3 of 'svaddhnt', but previous arguments had type 'svuint16_t'} } */ ++ svaddhnt (u8, u16, s8); /* { dg-error {passing 'svint8_t' to argument 3 of 'svaddhnt', but argument 2 had type 'svuint16_t'} } */ ++ svaddhnt (u8, u16, s16); /* { dg-error {passing 'svint16_t' to argument 3 of 'svaddhnt', but argument 2 had type 'svuint16_t'} } */ ++ svaddhnt (u8, u16, u32); /* { dg-error {passing 'svuint32_t' to argument 3 of 'svaddhnt', but argument 2 had type 'svuint16_t'} } */ ++ svaddhnt (u8, u16, pg); /* { dg-error {passing 'svbool_t' to argument 3 of 'svaddhnt', but argument 2 had type 'svuint16_t'} } */ + svaddhnt (u8, u8, 0); /* { dg-error {'svaddhnt' has no form that takes 'svuint8_t' arguments} } */ + svaddhnt (u16, u16, 0); /* { dg-error {passing 'svuint16_t' instead of the expected 'svuint8_t' to argument 1 of 'svaddhnt', after passing 'svuint16_t' to argument 2} } */ + svaddhnt (s8, u16, 0); /* { dg-error {arguments 1 and 2 of 'svaddhnt' must have the same signedness, but the values passed here have type 'svint8_t' and 'svuint16_t' respectively} } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_opt_n_2.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_opt_n_2.c +index a151f90d1..40447cf83 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_opt_n_2.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_opt_n_2.c +@@ -11,16 +11,16 @@ f1 (svbool_t pg, svint8_t s8, svuint8_t u8, + svadd_x (u8, u8, u8); /* { dg-error {passing 'svuint8_t' to argument 1 of 'svadd_x', which expects 'svbool_t'} } */ + svadd_x (pg, pg, pg); /* { dg-error {'svadd_x' has no form that takes 'svbool_t' arguments} } */ + svadd_x (pg, 1, u8); /* { dg-error {passing 'int' to argument 2 of 'svadd_x', which expects an SVE type rather than a scalar} } */ +- svadd_x (pg, u8, s8); /* { dg-error {passing 'svint8_t' to argument 3 of 'svadd_x', but previous arguments had type 'svuint8_t'} } */ ++ svadd_x (pg, u8, s8); /* { dg-error {passing 'svint8_t' to argument 3 of 'svadd_x', but argument 2 had type 'svuint8_t'} } */ + svadd_x (pg, u8, u8); +- svadd_x (pg, u8, s16); /* { dg-error {passing 'svint16_t' to argument 3 of 'svadd_x', but previous arguments had type 'svuint8_t'} } */ +- svadd_x (pg, u8, u16); /* { dg-error {passing 'svuint16_t' to argument 3 of 'svadd_x', but previous arguments had type 'svuint8_t'} } */ +- svadd_x (pg, u8, f16); /* { dg-error {passing 'svfloat16_t' to argument 3 of 'svadd_x', but previous arguments had type 'svuint8_t'} } */ +- svadd_x (pg, u8, pg); /* { dg-error {passing 'svbool_t' to argument 3 of 'svadd_x', but previous arguments had type 'svuint8_t'} } */ ++ svadd_x (pg, u8, s16); /* { dg-error {passing 'svint16_t' to argument 3 of 'svadd_x', but argument 2 had type 'svuint8_t'} } */ ++ svadd_x (pg, u8, u16); /* { dg-error {passing 'svuint16_t' to argument 3 of 'svadd_x', but argument 2 had type 'svuint8_t'} } */ ++ svadd_x (pg, u8, f16); /* { dg-error {passing 'svfloat16_t' to argument 3 of 'svadd_x', but argument 2 had type 'svuint8_t'} } */ ++ svadd_x (pg, u8, pg); /* { dg-error {passing 'svbool_t' to argument 3 of 'svadd_x', but argument 2 had type 'svuint8_t'} } */ + svadd_x (pg, u8, 0); + +- svadd_x (pg, f16, s16); /* { dg-error {passing 'svint16_t' to argument 3 of 'svadd_x', but previous arguments had type 'svfloat16_t'} } */ +- svadd_x (pg, f16, u16); /* { dg-error {passing 'svuint16_t' to argument 3 of 'svadd_x', but previous arguments had type 'svfloat16_t'} } */ ++ svadd_x (pg, f16, s16); /* { dg-error {passing 'svint16_t' to argument 3 of 'svadd_x', but argument 2 had type 'svfloat16_t'} } */ ++ svadd_x (pg, f16, u16); /* { dg-error {passing 'svuint16_t' to argument 3 of 'svadd_x', but argument 2 had type 'svfloat16_t'} } */ + svadd_x (pg, f16, f16); + svadd_x (pg, f16, 1); + } +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_opt_n_3.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_opt_n_3.c +index 70ec9c585..94e20bc91 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_opt_n_3.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_opt_n_3.c +@@ -11,19 +11,19 @@ f1 (svbool_t pg, svint8_t s8, svuint8_t u8, + svand_z (u8, u8, u8); /* { dg-error {passing 'svuint8_t' to argument 1 of 'svand_z', which expects 'svbool_t'} } */ + svand_z (pg, pg, pg); + svand_z (pg, 1, u8); /* { dg-error {passing 'int' to argument 2 of 'svand_z', which expects an SVE type rather than a scalar} } */ +- svand_z (pg, u8, s8); /* { dg-error {passing 'svint8_t' to argument 3 of 'svand_z', but previous arguments had type 'svuint8_t'} } */ ++ svand_z (pg, u8, s8); /* { dg-error {passing 'svint8_t' to argument 3 of 'svand_z', but argument 2 had type 'svuint8_t'} } */ + svand_z (pg, u8, u8); +- svand_z (pg, u8, s16); /* { dg-error {passing 'svint16_t' to argument 3 of 'svand_z', but previous arguments had type 'svuint8_t'} } */ +- svand_z (pg, u8, u16); /* { dg-error {passing 'svuint16_t' to argument 3 of 'svand_z', but previous arguments had type 'svuint8_t'} } */ +- svand_z (pg, u8, f16); /* { dg-error {passing 'svfloat16_t' to argument 3 of 'svand_z', but previous arguments had type 'svuint8_t'} } */ +- svand_z (pg, u8, pg); /* { dg-error {passing 'svbool_t' to argument 3 of 'svand_z', but previous arguments had type 'svuint8_t'} } */ ++ svand_z (pg, u8, s16); /* { dg-error {passing 'svint16_t' to argument 3 of 'svand_z', but argument 2 had type 'svuint8_t'} } */ ++ svand_z (pg, u8, u16); /* { dg-error {passing 'svuint16_t' to argument 3 of 'svand_z', but argument 2 had type 'svuint8_t'} } */ ++ svand_z (pg, u8, f16); /* { dg-error {passing 'svfloat16_t' to argument 3 of 'svand_z', but argument 2 had type 'svuint8_t'} } */ ++ svand_z (pg, u8, pg); /* { dg-error {passing 'svbool_t' to argument 3 of 'svand_z', but argument 2 had type 'svuint8_t'} } */ + svand_z (pg, u8, 0); + +- svand_z (pg, pg, u8); /* { dg-error {passing 'svuint8_t' to argument 3 of 'svand_z', but previous arguments had type 'svbool_t'} } */ ++ svand_z (pg, pg, u8); /* { dg-error {passing 'svuint8_t' to argument 3 of 'svand_z', but argument 2 had type 'svbool_t'} } */ + svand_z (pg, pg, 0); /* { dg-error {passing 'int' to argument 3 of 'svand_z', but its 'svbool_t' form does not accept scalars} } */ + +- svand_z (pg, f16, s16); /* { dg-error {passing 'svint16_t' to argument 3 of 'svand_z', but previous arguments had type 'svfloat16_t'} } */ +- svand_z (pg, f16, u16); /* { dg-error {passing 'svuint16_t' to argument 3 of 'svand_z', but previous arguments had type 'svfloat16_t'} } */ ++ svand_z (pg, f16, s16); /* { dg-error {passing 'svint16_t' to argument 3 of 'svand_z', but argument 2 had type 'svfloat16_t'} } */ ++ svand_z (pg, f16, u16); /* { dg-error {passing 'svuint16_t' to argument 3 of 'svand_z', but argument 2 had type 'svfloat16_t'} } */ + svand_z (pg, f16, f16); /* { dg-error {'svand_z' has no form that takes 'svfloat16_t' arguments} } */ + svand_z (pg, f16, 1); /* { dg-error {'svand_z' has no form that takes 'svfloat16_t' arguments} } */ + } +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_rotate_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_rotate_1.c +index 7669e4a02..8939ce258 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_rotate_1.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_rotate_1.c +@@ -12,7 +12,7 @@ f1 (svbool_t pg, svfloat32_t f32, svfloat64_t f64, svint32_t s32, int i) + svcadd_x (pg, s32, s32, 90); /* { dg-error {'svcadd_x' has no form that takes 'svint32_t' arguments} } */ + svcadd_x (pg, 1, f32, 90); /* { dg-error {passing 'int' to argument 2 of 'svcadd_x', which expects an SVE type rather than a scalar} } */ + svcadd_x (pg, f32, 1, 90); /* { dg-error {passing 'int' to argument 3 of 'svcadd_x', which expects an SVE type rather than a scalar} } */ +- svcadd_x (pg, f32, f64, 90); /* { dg-error {passing 'svfloat64_t' to argument 3 of 'svcadd_x', but previous arguments had type 'svfloat32_t'} } */ ++ svcadd_x (pg, f32, f64, 90); /* { dg-error {passing 'svfloat64_t' to argument 3 of 'svcadd_x', but argument 2 had type 'svfloat32_t'} } */ + svcadd_x (pg, f32, f32, s32); /* { dg-error {argument 4 of 'svcadd_x' must be an integer constant expression} } */ + svcadd_x (pg, f32, f32, i); /* { dg-error {argument 4 of 'svcadd_x' must be an integer constant expression} } */ + svcadd_x (pg, f32, f32, -90); /* { dg-error {passing -90 to argument 4 of 'svcadd_x', which expects either 90 or 270} } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_to_uint_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_to_uint_1.c +index 154662487..2c3fe5df1 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_to_uint_1.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_to_uint_1.c +@@ -12,8 +12,8 @@ f1 (svbool_t pg, svint32_t s32, svuint32_t u32) + svhistcnt_z (0, s32, s32); /* { dg-error {passing 'int' to argument 1 of 'svhistcnt_z', which expects 'svbool_t'} } */ + svhistcnt_z (s32, s32, s32); /* { dg-error {passing 'svint32_t' to argument 1 of 'svhistcnt_z', which expects 'svbool_t'} } */ + svhistcnt_z (pg, 0, s32); /* { dg-error {passing 'int' to argument 2 of 'svhistcnt_z', which expects an SVE type rather than a scalar} } */ +- svhistcnt_z (pg, pg, s32); /* { dg-error {passing 'svint32_t' to argument 3 of 'svhistcnt_z', but previous arguments had type 'svbool_t'} } */ +- svhistcnt_z (pg, s32, u32); /* { dg-error {passing 'svuint32_t' to argument 3 of 'svhistcnt_z', but previous arguments had type 'svint32_t'} } */ ++ svhistcnt_z (pg, pg, s32); /* { dg-error {passing 'svint32_t' to argument 3 of 'svhistcnt_z', but argument 2 had type 'svbool_t'} } */ ++ svhistcnt_z (pg, s32, u32); /* { dg-error {passing 'svuint32_t' to argument 3 of 'svhistcnt_z', but argument 2 had type 'svint32_t'} } */ + svhistcnt_z (pg, s32, 0); /* { dg-error {passing 'int' to argument 3 of 'svhistcnt_z', which expects an SVE type rather than a scalar} } */ + svhistcnt_z (pg, pg, pg); /* { dg-error {'svhistcnt_z' has no form that takes 'svbool_t' arguments} } */ + } +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/clast_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/clast_1.c +index ba1b2520f..47ce47328 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/clast_1.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/clast_1.c +@@ -10,6 +10,6 @@ test (svbool_t pg, svint32_t s32, svint64_t s64, int i) + svclasta (pg, 1, pg); /* { dg-error {'svclasta' has no form that takes 'svbool_t' arguments} } */ + svclasta (pg, i, s32); + svclasta (pg, s32, 1); /* { dg-error {passing 'int' to argument 3 of 'svclasta', which expects an SVE type rather than a scalar} } */ +- svclasta (pg, s32, s64); /* { dg-error {passing 'svint64_t' to argument 3 of 'svclasta', but previous arguments had type 'svint32_t'} } */ ++ svclasta (pg, s32, s64); /* { dg-error {passing 'svint64_t' to argument 3 of 'svclasta', but argument 2 had type 'svint32_t'} } */ + svclasta (pg, pg, pg); /* { dg-error {'svclasta' has no form that takes 'svbool_t' arguments} } */ + } +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/compare_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/compare_1.c +index 5474124cc..0dd0ad910 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/compare_1.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/compare_1.c +@@ -13,15 +13,15 @@ f1 (svbool_t pg, svint8_t s8, svuint8_t u8, + svmatch (u8, u8, u8); /* { dg-error {passing 'svuint8_t' to argument 1 of 'svmatch', which expects 'svbool_t'} } */ + svmatch (pg, pg, pg); /* { dg-error {'svmatch' has no form that takes 'svbool_t' arguments} } */ + svmatch (pg, 1, u8); /* { dg-error {passing 'int' to argument 2 of 'svmatch', which expects an SVE type rather than a scalar} } */ +- svmatch (pg, u8, s8); /* { dg-error {passing 'svint8_t' to argument 3 of 'svmatch', but previous arguments had type 'svuint8_t'} } */ ++ svmatch (pg, u8, s8); /* { dg-error {passing 'svint8_t' to argument 3 of 'svmatch', but argument 2 had type 'svuint8_t'} } */ + svmatch (pg, u8, u8); +- svmatch (pg, u8, s16); /* { dg-error {passing 'svint16_t' to argument 3 of 'svmatch', but previous arguments had type 'svuint8_t'} } */ +- svmatch (pg, u8, u16); /* { dg-error {passing 'svuint16_t' to argument 3 of 'svmatch', but previous arguments had type 'svuint8_t'} } */ +- svmatch (pg, u8, f16); /* { dg-error {passing 'svfloat16_t' to argument 3 of 'svmatch', but previous arguments had type 'svuint8_t'} } */ +- svmatch (pg, u8, pg); /* { dg-error {passing 'svbool_t' to argument 3 of 'svmatch', but previous arguments had type 'svuint8_t'} } */ ++ svmatch (pg, u8, s16); /* { dg-error {passing 'svint16_t' to argument 3 of 'svmatch', but argument 2 had type 'svuint8_t'} } */ ++ svmatch (pg, u8, u16); /* { dg-error {passing 'svuint16_t' to argument 3 of 'svmatch', but argument 2 had type 'svuint8_t'} } */ ++ svmatch (pg, u8, f16); /* { dg-error {passing 'svfloat16_t' to argument 3 of 'svmatch', but argument 2 had type 'svuint8_t'} } */ ++ svmatch (pg, u8, pg); /* { dg-error {passing 'svbool_t' to argument 3 of 'svmatch', but argument 2 had type 'svuint8_t'} } */ + svmatch (pg, u8, 0); /* { dg-error {passing 'int' to argument 3 of 'svmatch', which expects an SVE type rather than a scalar} } */ + +- svmatch (pg, f16, s16); /* { dg-error {passing 'svint16_t' to argument 3 of 'svmatch', but previous arguments had type 'svfloat16_t'} } */ +- svmatch (pg, f16, u16); /* { dg-error {passing 'svuint16_t' to argument 3 of 'svmatch', but previous arguments had type 'svfloat16_t'} } */ ++ svmatch (pg, f16, s16); /* { dg-error {passing 'svint16_t' to argument 3 of 'svmatch', but argument 2 had type 'svfloat16_t'} } */ ++ svmatch (pg, f16, u16); /* { dg-error {passing 'svuint16_t' to argument 3 of 'svmatch', but argument 2 had type 'svfloat16_t'} } */ + svmatch (pg, f16, f16); /* { dg-error {'svmatch' has no form that takes 'svfloat16_t' arguments} } */ + } +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/compare_opt_n_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/compare_opt_n_1.c +index 6faa73972..cfa50d387 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/compare_opt_n_1.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/compare_opt_n_1.c +@@ -11,16 +11,16 @@ f1 (svbool_t pg, svint8_t s8, svuint8_t u8, + svcmpeq (u8, u8, u8); /* { dg-error {passing 'svuint8_t' to argument 1 of 'svcmpeq', which expects 'svbool_t'} } */ + svcmpeq (pg, pg, pg); /* { dg-error {'svcmpeq' has no form that takes 'svbool_t' arguments} } */ + svcmpeq (pg, 1, u8); /* { dg-error {passing 'int' to argument 2 of 'svcmpeq', which expects an SVE type rather than a scalar} } */ +- svcmpeq (pg, u8, s8); /* { dg-error {passing 'svint8_t' to argument 3 of 'svcmpeq', but previous arguments had type 'svuint8_t'} } */ ++ svcmpeq (pg, u8, s8); /* { dg-error {passing 'svint8_t' to argument 3 of 'svcmpeq', but argument 2 had type 'svuint8_t'} } */ + svcmpeq (pg, u8, u8); +- svcmpeq (pg, u8, s16); /* { dg-error {passing 'svint16_t' to argument 3 of 'svcmpeq', but previous arguments had type 'svuint8_t'} } */ +- svcmpeq (pg, u8, u16); /* { dg-error {passing 'svuint16_t' to argument 3 of 'svcmpeq', but previous arguments had type 'svuint8_t'} } */ +- svcmpeq (pg, u8, f16); /* { dg-error {passing 'svfloat16_t' to argument 3 of 'svcmpeq', but previous arguments had type 'svuint8_t'} } */ +- svcmpeq (pg, u8, pg); /* { dg-error {passing 'svbool_t' to argument 3 of 'svcmpeq', but previous arguments had type 'svuint8_t'} } */ ++ svcmpeq (pg, u8, s16); /* { dg-error {passing 'svint16_t' to argument 3 of 'svcmpeq', but argument 2 had type 'svuint8_t'} } */ ++ svcmpeq (pg, u8, u16); /* { dg-error {passing 'svuint16_t' to argument 3 of 'svcmpeq', but argument 2 had type 'svuint8_t'} } */ ++ svcmpeq (pg, u8, f16); /* { dg-error {passing 'svfloat16_t' to argument 3 of 'svcmpeq', but argument 2 had type 'svuint8_t'} } */ ++ svcmpeq (pg, u8, pg); /* { dg-error {passing 'svbool_t' to argument 3 of 'svcmpeq', but argument 2 had type 'svuint8_t'} } */ + svcmpeq (pg, u8, 0); + +- svcmpeq (pg, f16, s16); /* { dg-error {passing 'svint16_t' to argument 3 of 'svcmpeq', but previous arguments had type 'svfloat16_t'} } */ +- svcmpeq (pg, f16, u16); /* { dg-error {passing 'svuint16_t' to argument 3 of 'svcmpeq', but previous arguments had type 'svfloat16_t'} } */ ++ svcmpeq (pg, f16, s16); /* { dg-error {passing 'svint16_t' to argument 3 of 'svcmpeq', but argument 2 had type 'svfloat16_t'} } */ ++ svcmpeq (pg, f16, u16); /* { dg-error {passing 'svuint16_t' to argument 3 of 'svcmpeq', but argument 2 had type 'svfloat16_t'} } */ + svcmpeq (pg, f16, f16); + svcmpeq (pg, f16, 1); + } +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/create_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/create_1.c +index 83e4a5600..7a617aa15 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/create_1.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/create_1.c +@@ -10,11 +10,11 @@ f1 (svuint8x2_t *ptr, svbool_t pg, svuint8_t u8, svfloat64_t f64, + *ptr = svcreate2 (u8); /* { dg-error {too few arguments to function 'svcreate2'} } */ + *ptr = svcreate2 (u8, u8, u8); /* { dg-error {too many arguments to function 'svcreate2'} } */ + *ptr = svcreate2 (u8x2, u8x2); /* { dg-error {passing 'svuint8x2_t' to argument 1 of 'svcreate2', which expects a single SVE vector rather than a tuple} } */ +- *ptr = svcreate2 (u8, f64); /* { dg-error {passing 'svfloat64_t' to argument 2 of 'svcreate2', but previous arguments had type 'svuint8_t'} } */ +- *ptr = svcreate2 (u8, pg); /* { dg-error {passing 'svbool_t' to argument 2 of 'svcreate2', but previous arguments had type 'svuint8_t'} } */ ++ *ptr = svcreate2 (u8, f64); /* { dg-error {passing 'svfloat64_t' to argument 2 of 'svcreate2', but argument 1 had type 'svuint8_t'} } */ ++ *ptr = svcreate2 (u8, pg); /* { dg-error {passing 'svbool_t' to argument 2 of 'svcreate2', but argument 1 had type 'svuint8_t'} } */ + *ptr = svcreate2 (u8, x); /* { dg-error {passing 'int' to argument 2 of 'svcreate2', which expects an SVE type rather than a scalar} } */ + *ptr = svcreate2 (x, u8); /* { dg-error {passing 'int' to argument 1 of 'svcreate2', which expects an SVE type rather than a scalar} } */ +- *ptr = svcreate2 (pg, u8); /* { dg-error {passing 'svuint8_t' to argument 2 of 'svcreate2', but previous arguments had type 'svbool_t'} } */ ++ *ptr = svcreate2 (pg, u8); /* { dg-error {passing 'svuint8_t' to argument 2 of 'svcreate2', but argument 1 had type 'svbool_t'} } */ + *ptr = svcreate2 (pg, pg); /* { dg-error {'svcreate2' has no form that takes 'svbool_t' arguments} } */ + *ptr = svcreate2 (u8, u8); + *ptr = svcreate2 (f64, f64); /* { dg-error {incompatible types when assigning to type 'svuint8x2_t' from type 'svfloat64x2_t'} } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/create_3.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/create_3.c +index e3302f7e7..40f3a1fed 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/create_3.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/create_3.c +@@ -11,11 +11,11 @@ f1 (svfloat16x3_t *ptr, svbool_t pg, svfloat16_t f16, svfloat64_t f64, + *ptr = svcreate3 (f16, f16); /* { dg-error {too few arguments to function 'svcreate3'} } */ + *ptr = svcreate3 (f16, f16, f16, f16); /* { dg-error {too many arguments to function 'svcreate3'} } */ + *ptr = svcreate3 (f16x3, f16x3, f16x3); /* { dg-error {passing 'svfloat16x3_t' to argument 1 of 'svcreate3', which expects a single SVE vector rather than a tuple} } */ +- *ptr = svcreate3 (f16, f16, f64); /* { dg-error {passing 'svfloat64_t' to argument 3 of 'svcreate3', but previous arguments had type 'svfloat16_t'} } */ +- *ptr = svcreate3 (f16, pg, f16); /* { dg-error {passing 'svbool_t' to argument 2 of 'svcreate3', but previous arguments had type 'svfloat16_t'} } */ ++ *ptr = svcreate3 (f16, f16, f64); /* { dg-error {passing 'svfloat64_t' to argument 3 of 'svcreate3', but argument 1 had type 'svfloat16_t'} } */ ++ *ptr = svcreate3 (f16, pg, f16); /* { dg-error {passing 'svbool_t' to argument 2 of 'svcreate3', but argument 1 had type 'svfloat16_t'} } */ + *ptr = svcreate3 (f16, x, f16); /* { dg-error {passing 'int' to argument 2 of 'svcreate3', which expects an SVE type rather than a scalar} } */ + *ptr = svcreate3 (x, f16, f16); /* { dg-error {passing 'int' to argument 1 of 'svcreate3', which expects an SVE type rather than a scalar} } */ +- *ptr = svcreate3 (pg, f16, f16); /* { dg-error {passing 'svfloat16_t' to argument 2 of 'svcreate3', but previous arguments had type 'svbool_t'} } */ ++ *ptr = svcreate3 (pg, f16, f16); /* { dg-error {passing 'svfloat16_t' to argument 2 of 'svcreate3', but argument 1 had type 'svbool_t'} } */ + *ptr = svcreate3 (pg, pg, pg); /* { dg-error {'svcreate3' has no form that takes 'svbool_t' arguments} } */ + *ptr = svcreate3 (f16, f16, f16); + *ptr = svcreate3 (f64, f64, f64); /* { dg-error {incompatible types when assigning to type 'svfloat16x3_t' from type 'svfloat64x3_t'} } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/create_5.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/create_5.c +index c850c94f0..bf3dd5d75 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/create_5.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/create_5.c +@@ -12,11 +12,11 @@ f1 (svint32x4_t *ptr, svbool_t pg, svint32_t s32, svfloat64_t f64, + *ptr = svcreate4 (s32, s32, s32); /* { dg-error {too few arguments to function 'svcreate4'} } */ + *ptr = svcreate4 (s32, s32, s32, s32, s32); /* { dg-error {too many arguments to function 'svcreate4'} } */ + *ptr = svcreate4 (s32x4, s32x4, s32x4, s32x4); /* { dg-error {passing 'svint32x4_t' to argument 1 of 'svcreate4', which expects a single SVE vector rather than a tuple} } */ +- *ptr = svcreate4 (s32, s32, s32, f64); /* { dg-error {passing 'svfloat64_t' to argument 4 of 'svcreate4', but previous arguments had type 'svint32_t'} } */ +- *ptr = svcreate4 (s32, s32, pg, s32); /* { dg-error {passing 'svbool_t' to argument 3 of 'svcreate4', but previous arguments had type 'svint32_t'} } */ ++ *ptr = svcreate4 (s32, s32, s32, f64); /* { dg-error {passing 'svfloat64_t' to argument 4 of 'svcreate4', but argument 1 had type 'svint32_t'} } */ ++ *ptr = svcreate4 (s32, s32, pg, s32); /* { dg-error {passing 'svbool_t' to argument 3 of 'svcreate4', but argument 1 had type 'svint32_t'} } */ + *ptr = svcreate4 (s32, x, s32, s32); /* { dg-error {passing 'int' to argument 2 of 'svcreate4', which expects an SVE type rather than a scalar} } */ + *ptr = svcreate4 (x, s32, s32, s32); /* { dg-error {passing 'int' to argument 1 of 'svcreate4', which expects an SVE type rather than a scalar} } */ +- *ptr = svcreate4 (pg, s32, s32, s32); /* { dg-error {passing 'svint32_t' to argument 2 of 'svcreate4', but previous arguments had type 'svbool_t'} } */ ++ *ptr = svcreate4 (pg, s32, s32, s32); /* { dg-error {passing 'svint32_t' to argument 2 of 'svcreate4', but argument 1 had type 'svbool_t'} } */ + *ptr = svcreate4 (pg, pg, pg, pg); /* { dg-error {'svcreate4' has no form that takes 'svbool_t' arguments} } */ + *ptr = svcreate4 (s32, s32, s32, s32); + *ptr = svcreate4 (f64, f64, f64, f64); /* { dg-error {incompatible types when assigning to type 'svint32x4_t' from type 'svfloat64x4_t'} } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/mmla_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/mmla_1.c +index 7fc7bb67b..ca2ab8a6f 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/mmla_1.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/mmla_1.c +@@ -44,13 +44,13 @@ f2 (svbool_t pg, svint8_t s8, svuint8_t u8, svuint32_t u32, svint32_t s32, + svmmla (u32, u32, u32); /* { dg-error {passing 'svuint32_t' instead of the expected 'svuint8_t' to argument 2 of 'svmmla', after passing 'svuint32_t' to argument 1} } */ + + svmmla (f16, s8, s8); /* { dg-error {'svmmla' has no form that takes 'svfloat16_t' arguments} } */ +- svmmla (f32, s8, s8); /* { dg-error {passing 'svint8_t' to argument 2 of 'svmmla', but previous arguments had type 'svfloat32_t'} } */ +- svmmla (f32, s32, s32); /* { dg-error {passing 'svint32_t' to argument 2 of 'svmmla', but previous arguments had type 'svfloat32_t'} } */ +- svmmla (f32, f16, f16); /* { dg-error {passing 'svfloat16_t' to argument 2 of 'svmmla', but previous arguments had type 'svfloat32_t'} } */ +- svmmla (f64, f16, f16); /* { dg-error {passing 'svfloat16_t' to argument 2 of 'svmmla', but previous arguments had type 'svfloat64_t'} } */ +- svmmla (f32, f32, f16); /* { dg-error {passing 'svfloat16_t' to argument 3 of 'svmmla', but previous arguments had type 'svfloat32_t'} } */ +- svmmla (f64, f32, f16); /* { dg-error {passing 'svfloat32_t' to argument 2 of 'svmmla', but previous arguments had type 'svfloat64_t'} } */ +- svmmla (f64, f64, f16); /* { dg-error {passing 'svfloat16_t' to argument 3 of 'svmmla', but previous arguments had type 'svfloat64_t'} } */ ++ svmmla (f32, s8, s8); /* { dg-error {passing 'svint8_t' to argument 2 of 'svmmla', but argument 1 had type 'svfloat32_t'} } */ ++ svmmla (f32, s32, s32); /* { dg-error {passing 'svint32_t' to argument 2 of 'svmmla', but argument 1 had type 'svfloat32_t'} } */ ++ svmmla (f32, f16, f16); /* { dg-error {passing 'svfloat16_t' to argument 2 of 'svmmla', but argument 1 had type 'svfloat32_t'} } */ ++ svmmla (f64, f16, f16); /* { dg-error {passing 'svfloat16_t' to argument 2 of 'svmmla', but argument 1 had type 'svfloat64_t'} } */ ++ svmmla (f32, f32, f16); /* { dg-error {passing 'svfloat16_t' to argument 3 of 'svmmla', but argument 1 had type 'svfloat32_t'} } */ ++ svmmla (f64, f32, f16); /* { dg-error {passing 'svfloat32_t' to argument 2 of 'svmmla', but argument 1 had type 'svfloat64_t'} } */ ++ svmmla (f64, f64, f16); /* { dg-error {passing 'svfloat16_t' to argument 3 of 'svmmla', but argument 1 had type 'svfloat64_t'} } */ + + svmmla (f16, f16, f16); /* { dg-error {'svmmla' has no form that takes 'svfloat16_t' arguments} } */ + svmmla (f32, f32, f32); +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_lane_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_lane_1.c +index 520c11f79..0a67f82bf 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_lane_1.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_lane_1.c +@@ -13,8 +13,8 @@ f1 (svbool_t pg, svfloat16_t f16, svfloat32_t f32, svfloat64_t f64, + svmla_lane (1, f32, f32, 0); /* { dg-error {passing 'int' to argument 1 of 'svmla_lane', which expects an SVE type rather than a scalar} } */ + svmla_lane (f32, 1, f32, 0); /* { dg-error {passing 'int' to argument 2 of 'svmla_lane', which expects an SVE type rather than a scalar} } */ + svmla_lane (f32, f32, 1, 0); /* { dg-error {passing 'int' to argument 3 of 'svmla_lane', which expects an SVE type rather than a scalar} } */ +- svmla_lane (f32, f64, f32, 0); /* { dg-error {passing 'svfloat64_t' to argument 2 of 'svmla_lane', but previous arguments had type 'svfloat32_t'} } */ +- svmla_lane (f32, f32, f64, 0); /* { dg-error {passing 'svfloat64_t' to argument 3 of 'svmla_lane', but previous arguments had type 'svfloat32_t'} } */ ++ svmla_lane (f32, f64, f32, 0); /* { dg-error {passing 'svfloat64_t' to argument 2 of 'svmla_lane', but argument 1 had type 'svfloat32_t'} } */ ++ svmla_lane (f32, f32, f64, 0); /* { dg-error {passing 'svfloat64_t' to argument 3 of 'svmla_lane', but argument 1 had type 'svfloat32_t'} } */ + svmla_lane (f32, f32, f32, s32); /* { dg-error {argument 4 of 'svmla_lane' must be an integer constant expression} } */ + svmla_lane (f32, f32, f32, i); /* { dg-error {argument 4 of 'svmla_lane' must be an integer constant expression} } */ + +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_lane_rotate_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_lane_rotate_1.c +index 3163d130c..60c9c466e 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_lane_rotate_1.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_lane_rotate_1.c +@@ -14,8 +14,8 @@ f1 (svbool_t pg, svfloat16_t f16, svfloat32_t f32, svfloat64_t f64, + svcmla_lane (1, f32, f32, 0, 90); /* { dg-error {passing 'int' to argument 1 of 'svcmla_lane', which expects an SVE type rather than a scalar} } */ + svcmla_lane (f32, 1, f32, 0, 90); /* { dg-error {passing 'int' to argument 2 of 'svcmla_lane', which expects an SVE type rather than a scalar} } */ + svcmla_lane (f32, f32, 1, 0, 90); /* { dg-error {passing 'int' to argument 3 of 'svcmla_lane', which expects an SVE type rather than a scalar} } */ +- svcmla_lane (f32, f64, f32, 0, 90); /* { dg-error {passing 'svfloat64_t' to argument 2 of 'svcmla_lane', but previous arguments had type 'svfloat32_t'} } */ +- svcmla_lane (f32, f32, f64, 0, 90); /* { dg-error {passing 'svfloat64_t' to argument 3 of 'svcmla_lane', but previous arguments had type 'svfloat32_t'} } */ ++ svcmla_lane (f32, f64, f32, 0, 90); /* { dg-error {passing 'svfloat64_t' to argument 2 of 'svcmla_lane', but argument 1 had type 'svfloat32_t'} } */ ++ svcmla_lane (f32, f32, f64, 0, 90); /* { dg-error {passing 'svfloat64_t' to argument 3 of 'svcmla_lane', but argument 1 had type 'svfloat32_t'} } */ + svcmla_lane (f32, f32, f32, s32, 0); /* { dg-error {argument 4 of 'svcmla_lane' must be an integer constant expression} } */ + svcmla_lane (f32, f32, f32, i, 0); /* { dg-error {argument 4 of 'svcmla_lane' must be an integer constant expression} } */ + +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_opt_n_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_opt_n_1.c +index ac789c2be..6ca223475 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_opt_n_1.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_opt_n_1.c +@@ -11,24 +11,24 @@ f1 (svbool_t pg, svint8_t s8, svuint8_t u8, + svmla_x (u8, u8, u8, u8); /* { dg-error {passing 'svuint8_t' to argument 1 of 'svmla_x', which expects 'svbool_t'} } */ + svmla_x (pg, pg, pg, pg); /* { dg-error {'svmla_x' has no form that takes 'svbool_t' arguments} } */ + svmla_x (pg, 1, u8, u8); /* { dg-error {passing 'int' to argument 2 of 'svmla_x', which expects an SVE type rather than a scalar} } */ +- svmla_x (pg, u8, s8, u8); /* { dg-error {passing 'svint8_t' to argument 3 of 'svmla_x', but previous arguments had type 'svuint8_t'} } */ ++ svmla_x (pg, u8, s8, u8); /* { dg-error {passing 'svint8_t' to argument 3 of 'svmla_x', but argument 2 had type 'svuint8_t'} } */ + svmla_x (pg, u8, u8, u8); +- svmla_x (pg, u8, s16, u8); /* { dg-error {passing 'svint16_t' to argument 3 of 'svmla_x', but previous arguments had type 'svuint8_t'} } */ +- svmla_x (pg, u8, u16, u8); /* { dg-error {passing 'svuint16_t' to argument 3 of 'svmla_x', but previous arguments had type 'svuint8_t'} } */ +- svmla_x (pg, u8, f16, u8); /* { dg-error {passing 'svfloat16_t' to argument 3 of 'svmla_x', but previous arguments had type 'svuint8_t'} } */ +- svmla_x (pg, u8, pg, u8); /* { dg-error {passing 'svbool_t' to argument 3 of 'svmla_x', but previous arguments had type 'svuint8_t'} } */ ++ svmla_x (pg, u8, s16, u8); /* { dg-error {passing 'svint16_t' to argument 3 of 'svmla_x', but argument 2 had type 'svuint8_t'} } */ ++ svmla_x (pg, u8, u16, u8); /* { dg-error {passing 'svuint16_t' to argument 3 of 'svmla_x', but argument 2 had type 'svuint8_t'} } */ ++ svmla_x (pg, u8, f16, u8); /* { dg-error {passing 'svfloat16_t' to argument 3 of 'svmla_x', but argument 2 had type 'svuint8_t'} } */ ++ svmla_x (pg, u8, pg, u8); /* { dg-error {passing 'svbool_t' to argument 3 of 'svmla_x', but argument 2 had type 'svuint8_t'} } */ + svmla_x (pg, u8, 0, u8); /* { dg-error {passing 'int' to argument 3 of 'svmla_x', which expects an SVE type rather than a scalar} } */ +- svmla_x (pg, u8, u8, s8); /* { dg-error {passing 'svint8_t' to argument 4 of 'svmla_x', but previous arguments had type 'svuint8_t'} } */ +- svmla_x (pg, u8, u8, s16); /* { dg-error {passing 'svint16_t' to argument 4 of 'svmla_x', but previous arguments had type 'svuint8_t'} } */ +- svmla_x (pg, u8, u8, u16); /* { dg-error {passing 'svuint16_t' to argument 4 of 'svmla_x', but previous arguments had type 'svuint8_t'} } */ +- svmla_x (pg, u8, u8, f16); /* { dg-error {passing 'svfloat16_t' to argument 4 of 'svmla_x', but previous arguments had type 'svuint8_t'} } */ +- svmla_x (pg, u8, u8, pg); /* { dg-error {passing 'svbool_t' to argument 4 of 'svmla_x', but previous arguments had type 'svuint8_t'} } */ ++ svmla_x (pg, u8, u8, s8); /* { dg-error {passing 'svint8_t' to argument 4 of 'svmla_x', but argument 2 had type 'svuint8_t'} } */ ++ svmla_x (pg, u8, u8, s16); /* { dg-error {passing 'svint16_t' to argument 4 of 'svmla_x', but argument 2 had type 'svuint8_t'} } */ ++ svmla_x (pg, u8, u8, u16); /* { dg-error {passing 'svuint16_t' to argument 4 of 'svmla_x', but argument 2 had type 'svuint8_t'} } */ ++ svmla_x (pg, u8, u8, f16); /* { dg-error {passing 'svfloat16_t' to argument 4 of 'svmla_x', but argument 2 had type 'svuint8_t'} } */ ++ svmla_x (pg, u8, u8, pg); /* { dg-error {passing 'svbool_t' to argument 4 of 'svmla_x', but argument 2 had type 'svuint8_t'} } */ + svmla_x (pg, u8, u8, 0); + +- svmla_x (pg, f16, s16, f16); /* { dg-error {passing 'svint16_t' to argument 3 of 'svmla_x', but previous arguments had type 'svfloat16_t'} } */ +- svmla_x (pg, f16, u16, f16); /* { dg-error {passing 'svuint16_t' to argument 3 of 'svmla_x', but previous arguments had type 'svfloat16_t'} } */ +- svmla_x (pg, f16, f16, s16); /* { dg-error {passing 'svint16_t' to argument 4 of 'svmla_x', but previous arguments had type 'svfloat16_t'} } */ +- svmla_x (pg, f16, f16, u16); /* { dg-error {passing 'svuint16_t' to argument 4 of 'svmla_x', but previous arguments had type 'svfloat16_t'} } */ ++ svmla_x (pg, f16, s16, f16); /* { dg-error {passing 'svint16_t' to argument 3 of 'svmla_x', but argument 2 had type 'svfloat16_t'} } */ ++ svmla_x (pg, f16, u16, f16); /* { dg-error {passing 'svuint16_t' to argument 3 of 'svmla_x', but argument 2 had type 'svfloat16_t'} } */ ++ svmla_x (pg, f16, f16, s16); /* { dg-error {passing 'svint16_t' to argument 4 of 'svmla_x', but argument 2 had type 'svfloat16_t'} } */ ++ svmla_x (pg, f16, f16, u16); /* { dg-error {passing 'svuint16_t' to argument 4 of 'svmla_x', but argument 2 had type 'svfloat16_t'} } */ + svmla_x (pg, f16, f16, f16); + svmla_x (pg, f16, f16, 1); + } +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_rotate_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_rotate_1.c +index bb6740289..68b2cfc1d 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_rotate_1.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_rotate_1.c +@@ -13,8 +13,8 @@ f1 (svbool_t pg, svfloat32_t f32, svfloat64_t f64, svint32_t s32, int i) + svcmla_x (pg, 1, f32, f32, 90); /* { dg-error {passing 'int' to argument 2 of 'svcmla_x', which expects an SVE type rather than a scalar} } */ + svcmla_x (pg, f32, 1, f32, 90); /* { dg-error {passing 'int' to argument 3 of 'svcmla_x', which expects an SVE type rather than a scalar} } */ + svcmla_x (pg, f32, f32, 1, 90); /* { dg-error {passing 'int' to argument 4 of 'svcmla_x', which expects an SVE type rather than a scalar} } */ +- svcmla_x (pg, f32, f64, f32, 90); /* { dg-error {passing 'svfloat64_t' to argument 3 of 'svcmla_x', but previous arguments had type 'svfloat32_t'} } */ +- svcmla_x (pg, f32, f32, f64, 90); /* { dg-error {passing 'svfloat64_t' to argument 4 of 'svcmla_x', but previous arguments had type 'svfloat32_t'} } */ ++ svcmla_x (pg, f32, f64, f32, 90); /* { dg-error {passing 'svfloat64_t' to argument 3 of 'svcmla_x', but argument 2 had type 'svfloat32_t'} } */ ++ svcmla_x (pg, f32, f32, f64, 90); /* { dg-error {passing 'svfloat64_t' to argument 4 of 'svcmla_x', but argument 2 had type 'svfloat32_t'} } */ + svcmla_x (pg, f32, f32, f32, s32); /* { dg-error {argument 5 of 'svcmla_x' must be an integer constant expression} } */ + svcmla_x (pg, f32, f32, f32, i); /* { dg-error {argument 5 of 'svcmla_x' must be an integer constant expression} } */ + svcmla_x (pg, f32, f32, f32, -90); /* { dg-error {passing -90 to argument 5 of 'svcmla_x', which expects 0, 90, 180 or 270} } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_shift_right_imm_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_shift_right_imm_1.c +index cfe601631..134cf98fd 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_shift_right_imm_1.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_shift_right_imm_1.c +@@ -11,10 +11,10 @@ f1 (svbool_t pg, svuint8_t u8, svint8_t s8, svint16_t s16, + { + const int one = 1; + pg = svsra (pg, pg, 1); /* { dg-error {'svsra' has no form that takes 'svbool_t' arguments} } */ +- pg = svsra (pg, s8, 1); /* { dg-error {passing 'svint8_t' to argument 2 of 'svsra', but previous arguments had type 'svbool_t'} } */ ++ pg = svsra (pg, s8, 1); /* { dg-error {passing 'svint8_t' to argument 2 of 'svsra', but argument 1 had type 'svbool_t'} } */ + s8 = svsra (1, s8, 1); /* { dg-error {passing 'int' to argument 1 of 'svsra', which expects an SVE type rather than a scalar} } */ +- s8 = svsra (s8, u8, 1); /* { dg-error {passing 'svuint8_t' to argument 2 of 'svsra', but previous arguments had type 'svint8_t'} } */ +- s8 = svsra (s8, pg, 1); /* { dg-error {passing 'svbool_t' to argument 2 of 'svsra', but previous arguments had type 'svint8_t'} } */ ++ s8 = svsra (s8, u8, 1); /* { dg-error {passing 'svuint8_t' to argument 2 of 'svsra', but argument 1 had type 'svint8_t'} } */ ++ s8 = svsra (s8, pg, 1); /* { dg-error {passing 'svbool_t' to argument 2 of 'svsra', but argument 1 had type 'svint8_t'} } */ + s8 = svsra (s8, 1, 1); /* { dg-error {passing 'int' to argument 2 of 'svsra', which expects an SVE type rather than a scalar} } */ + s8 = svsra (s8, s8, x); /* { dg-error {argument 3 of 'svsra' must be an integer constant expression} } */ + s8 = svsra (s8, s8, one); /* { dg-error {argument 3 of 'svsra' must be an integer constant expression} } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_uint_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_uint_1.c +index 5fb497701..a639562b1 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_uint_1.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_uint_1.c +@@ -15,14 +15,14 @@ f1 (svbool_t pg, svuint8_t u8, svint8_t s8, svuint16_t u16, svint16_t s16, + + svtbx (u8, 0, u8); /* { dg-error {passing 'int' to argument 2 of 'svtbx', which expects an SVE type rather than a scalar} } */ + svtbx (u8, u8, 0); /* { dg-error {passing 'int' to argument 3 of 'svtbx', which expects an SVE type rather than a scalar} } */ +- svtbx (u8, s8, u8); /* { dg-error {passing 'svint8_t' to argument 2 of 'svtbx', but previous arguments had type 'svuint8_t'} } */ ++ svtbx (u8, s8, u8); /* { dg-error {passing 'svint8_t' to argument 2 of 'svtbx', but argument 1 had type 'svuint8_t'} } */ + svtbx (u8, u8, u8); + svtbx (u8, u8, s8); /* { dg-error {passing 'svint8_t' to argument 3 of 'svtbx', which expects a vector of unsigned integers} } */ + svtbx (u8, u8, u16); /* { dg-error {arguments 1 and 3 of 'svtbx' must have the same element size, but the values passed here have type 'svuint8_t' and 'svuint16_t' respectively} } */ + svtbx (u8, u8, s16); /* { dg-error {passing 'svint16_t' to argument 3 of 'svtbx', which expects a vector of unsigned integers} } */ + svtbx (u8, u8, pg); /* { dg-error {passing 'svbool_t' to argument 3 of 'svtbx', which expects a vector of unsigned integers} } */ + +- svtbx (s8, u8, u8); /* { dg-error {passing 'svuint8_t' to argument 2 of 'svtbx', but previous arguments had type 'svint8_t'} } */ ++ svtbx (s8, u8, u8); /* { dg-error {passing 'svuint8_t' to argument 2 of 'svtbx', but argument 1 had type 'svint8_t'} } */ + svtbx (s8, s8, u8); + svtbx (s8, s8, s8); /* { dg-error {passing 'svint8_t' to argument 3 of 'svtbx', which expects a vector of unsigned integers} } */ + svtbx (s8, s8, u16); /* { dg-error {arguments 1 and 3 of 'svtbx' must have the same element size, but the values passed here have type 'svint8_t' and 'svuint16_t' respectively} } */ +@@ -36,7 +36,7 @@ f1 (svbool_t pg, svuint8_t u8, svint8_t s8, svuint16_t u16, svint16_t s16, + svtbx (u16, u16, s16); /* { dg-error {passing 'svint16_t' to argument 3 of 'svtbx', which expects a vector of unsigned integers} } */ + svtbx (u16, u16, f16); /* { dg-error {passing 'svfloat16_t' to argument 3 of 'svtbx', which expects a vector of unsigned integers} } */ + +- svtbx (s16, u16, u16); /* { dg-error {passing 'svuint16_t' to argument 2 of 'svtbx', but previous arguments had type 'svint16_t'} } */ ++ svtbx (s16, u16, u16); /* { dg-error {passing 'svuint16_t' to argument 2 of 'svtbx', but argument 1 had type 'svint16_t'} } */ + svtbx (s16, s16, u8); /* { dg-error {arguments 1 and 3 of 'svtbx' must have the same element size, but the values passed here have type 'svint16_t' and 'svuint8_t' respectively} } */ + svtbx (s16, s16, s8); /* { dg-error {passing 'svint8_t' to argument 3 of 'svtbx', which expects a vector of unsigned integers} } */ + svtbx (s16, s16, u16); +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/tmad_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/tmad_1.c +index c2eda93e3..992b50199 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/tmad_1.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/tmad_1.c +@@ -11,7 +11,7 @@ f1 (svbool_t pg, svfloat32_t f32, svfloat64_t f64, svint32_t s32, int i) + svtmad (s32, s32, 0); /* { dg-error {'svtmad' has no form that takes 'svint32_t' arguments} } */ + svtmad (1, f32, 0); /* { dg-error {passing 'int' to argument 1 of 'svtmad', which expects an SVE type rather than a scalar} } */ + svtmad (f32, 1, 0); /* { dg-error {passing 'int' to argument 2 of 'svtmad', which expects an SVE type rather than a scalar} } */ +- svtmad (f32, f64, 0); /* { dg-error {passing 'svfloat64_t' to argument 2 of 'svtmad', but previous arguments had type 'svfloat32_t'} } */ ++ svtmad (f32, f64, 0); /* { dg-error {passing 'svfloat64_t' to argument 2 of 'svtmad', but argument 1 had type 'svfloat32_t'} } */ + svtmad (f32, f32, s32); /* { dg-error {argument 3 of 'svtmad' must be an integer constant expression} } */ + svtmad (f32, f32, i); /* { dg-error {argument 3 of 'svtmad' must be an integer constant expression} } */ + svtmad (f32, f32, -1); /* { dg-error {passing -1 to argument 3 of 'svtmad', which expects a value in the range \0, 7\} } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/unary_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/unary_1.c +index 8c865a0e6..9c9c383dd 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/unary_1.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/unary_1.c +@@ -13,9 +13,9 @@ f1 (svbool_t pg, svint32_t s32, svuint32_t u32, svfloat32_t f32) + svabs_m (s32, pg, s32); + svabs_m (u32, pg, u32); /* { dg-error {'svabs_m' has no form that takes 'svuint32_t' arguments} } */ + svabs_m (f32, pg, f32); +- svabs_m (s32, pg, u32); /* { dg-error {passing 'svuint32_t' to argument 3 of 'svabs_m', but previous arguments had type 'svint32_t'} } */ +- svabs_m (s32, pg, f32); /* { dg-error {passing 'svfloat32_t' to argument 3 of 'svabs_m', but previous arguments had type 'svint32_t'} } */ +- svabs_m (s32, pg, pg); /* { dg-error {passing 'svbool_t' to argument 3 of 'svabs_m', but previous arguments had type 'svint32_t'} } */ +- svabs_m (pg, pg, s32); /* { dg-error {passing 'svint32_t' to argument 3 of 'svabs_m', but previous arguments had type 'svbool_t'} } */ ++ svabs_m (s32, pg, u32); /* { dg-error {passing 'svuint32_t' to argument 3 of 'svabs_m', but argument 1 had type 'svint32_t'} } */ ++ svabs_m (s32, pg, f32); /* { dg-error {passing 'svfloat32_t' to argument 3 of 'svabs_m', but argument 1 had type 'svint32_t'} } */ ++ svabs_m (s32, pg, pg); /* { dg-error {passing 'svbool_t' to argument 3 of 'svabs_m', but argument 1 had type 'svint32_t'} } */ ++ svabs_m (pg, pg, s32); /* { dg-error {passing 'svint32_t' to argument 3 of 'svabs_m', but argument 1 had type 'svbool_t'} } */ + svabs_m (pg, pg, pg); /* { dg-error {'svabs_m' has no form that takes 'svbool_t' arguments} } */ + } +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/undeclared_2.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/undeclared_2.c +index 7e869bda8..6ffd3d9e8 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/undeclared_2.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/undeclared_2.c +@@ -9,7 +9,7 @@ f (svint8_t s8, svuint16_t u16, svfloat32_t f32, + u16 = svneg_x (pg, u16); /* { dg-error {'svneg_x' has no form that takes 'svuint16_t' arguments} } */ + f32 = svclz_x (pg, f32); /* { dg-error {'svclz_x' has no form that takes 'svfloat32_t' arguments} } */ + s16x2 = svcreate2 (s8); /* { dg-error {too few arguments to function 'svcreate2'} } */ +- u32x3 = svcreate3 (u16, u16, f32); /* { dg-error {passing 'svfloat32_t' to argument 3 of 'svcreate3', but previous arguments had type 'svuint16_t'} } */ ++ u32x3 = svcreate3 (u16, u16, f32); /* { dg-error {passing 'svfloat32_t' to argument 3 of 'svcreate3', but argument 1 had type 'svuint16_t'} } */ + f64x4 = svcreate4 (f32, f32, f32, f32, f32); /* { dg-error {too many arguments to function 'svcreate4'} } */ + pg = svadd_x (pg, pg, pg); /* { dg-error {'svadd_x' has no form that takes 'svbool_t' arguments} } */ + } +-- +2.33.0 +
View file
_service:tar_scm:0167-LoongArch-Guard-REGNO-with-REG_P-in-loongarch_expand.patch
Added
@@ -0,0 +1,67 @@ +From be1397b598a436d562e6a35a13ed2ae695531255 Mon Sep 17 00:00:00 2001 +From: Xi Ruoyao <xry111@xry111.site> +Date: Wed, 22 May 2024 09:29:43 +0800 +Subject: PATCH 167/188 LoongArch: Guard REGNO with REG_P in + loongarch_expand_conditional_move PR115169 + +gcc/ChangeLog: + + PR target/115169 + * config/loongarch/loongarch.cc + (loongarch_expand_conditional_move): Guard REGNO with REG_P. +--- + gcc/config/loongarch/loongarch.cc | 17 ++++++++++++----- + 1 file changed, 12 insertions(+), 5 deletions(-) + +diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc +index c86a0856b..0c2c38f6f 100644 +--- a/gcc/config/loongarch/loongarch.cc ++++ b/gcc/config/loongarch/loongarch.cc +@@ -5341,6 +5341,7 @@ loongarch_expand_conditional_move (rtx *operands) + rtx op1_extend = op1; + + /* Record whether operands2 and operands3 modes are promoted to word_mode. */ ++ bool promote_op2 = {false, false}; + bool promote_p = false; + machine_mode mode = GET_MODE (operands0); + +@@ -5348,9 +5349,15 @@ loongarch_expand_conditional_move (rtx *operands) + loongarch_emit_float_compare (&code, &op0, &op1); + else + { +- if ((REGNO (op0) == REGNO (operands2) +- || (REGNO (op1) == REGNO (operands3) && (op1 != const0_rtx))) +- && (GET_MODE_SIZE (GET_MODE (op0)) < word_mode)) ++ if (GET_MODE_SIZE (GET_MODE (op0)) < word_mode) ++ { ++ promote_op0 = (REG_P (op0) && REG_P (operands2) && ++ REGNO (op0) == REGNO (operands2)); ++ promote_op1 = (REG_P (op1) && REG_P (operands3) && ++ REGNO (op1) == REGNO (operands3)); ++ } ++ ++ if (promote_op0 || promote_op1) + { + mode = word_mode; + promote_p = true; +@@ -5392,7 +5399,7 @@ loongarch_expand_conditional_move (rtx *operands) + + if (promote_p) + { +- if (REGNO (XEXP (operands1, 0)) == REGNO (operands2)) ++ if (promote_op0) + op2 = op0_extend; + else + { +@@ -5400,7 +5407,7 @@ loongarch_expand_conditional_move (rtx *operands) + op2 = force_reg (mode, op2); + } + +- if (REGNO (XEXP (operands1, 1)) == REGNO (operands3)) ++ if (promote_op1) + op3 = op1_extend; + else + { +-- +2.43.0 +
View file
_service:tar_scm:0168-Backport-SME-aarch64-Make-more-use-of-sve_type-in-AC.patch
Added
@@ -0,0 +1,368 @@ +From 05dee9ad331c27345b014fe9aec0067a6f3b07d9 Mon Sep 17 00:00:00 2001 +From: Richard Sandiford <richard.sandiford@arm.com> +Date: Tue, 5 Dec 2023 10:11:21 +0000 +Subject: PATCH 069/157 BackportSME aarch64: Make more use of sve_type in + ACLE code + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=1f7f076ad6293cad19d35efdf726eb48cf78e3dd + +This patch makes some functions operate on sve_type, rather than just +on type suffixes. It also allows an overload to be resolved based on +a mode and sve_type. In this case the sve_type is used to derive the +group size as well as a type suffix. + +This is needed for the SME2 intrinsics and the new tuple forms of +svreinterpret. No functional change intended on its own. + +gcc/ + * config/aarch64/aarch64-sve-builtins.h + (function_resolver::lookup_form): Add an overload that takes + an sve_type rather than type and group suffixes. + (function_resolver::resolve_to): Likewise. + (function_resolver::infer_vector_or_tuple_type): Return an sve_type. + (function_resolver::infer_tuple_type): Likewise. + (function_resolver::require_matching_vector_type): Take an sve_type + rather than a type_suffix_index. + (function_resolver::require_derived_vector_type): Likewise. + * config/aarch64/aarch64-sve-builtins.cc (num_vectors_to_group): + New function. + (function_resolver::lookup_form): Add an overload that takes + an sve_type rather than type and group suffixes. + (function_resolver::resolve_to): Likewise. + (function_resolver::infer_vector_or_tuple_type): Return an sve_type. + (function_resolver::infer_tuple_type): Likewise. + (function_resolver::infer_vector_type): Update accordingly. + (function_resolver::require_matching_vector_type): Take an sve_type + rather than a type_suffix_index. + (function_resolver::require_derived_vector_type): Likewise. + * config/aarch64/aarch64-sve-builtins-shapes.cc (get_def::resolve) + (set_def::resolve, store_def::resolve, tbl_tuple_def::resolve): Update + calls accordingly. +--- + .../aarch64/aarch64-sve-builtins-shapes.cc | 16 +-- + gcc/config/aarch64/aarch64-sve-builtins.cc | 111 +++++++++++++----- + gcc/config/aarch64/aarch64-sve-builtins.h | 12 +- + 3 files changed, 95 insertions(+), 44 deletions(-) + +diff --git a/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc b/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc +index 40aa418e0..f187b4cb2 100644 +--- a/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc ++++ b/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc +@@ -1904,9 +1904,9 @@ struct get_def : public overloaded_base<0> + resolve (function_resolver &r) const OVERRIDE + { + unsigned int i, nargs; +- type_suffix_index type; ++ sve_type type; + if (!r.check_gp_argument (2, i, nargs) +- || (type = r.infer_tuple_type (i)) == NUM_TYPE_SUFFIXES ++ || !(type = r.infer_tuple_type (i)) + || !r.require_integer_immediate (i + 1)) + return error_mark_node; + +@@ -2417,9 +2417,9 @@ struct set_def : public overloaded_base<0> + resolve (function_resolver &r) const OVERRIDE + { + unsigned int i, nargs; +- type_suffix_index type; ++ sve_type type; + if (!r.check_gp_argument (3, i, nargs) +- || (type = r.infer_tuple_type (i)) == NUM_TYPE_SUFFIXES ++ || !(type = r.infer_tuple_type (i)) + || !r.require_integer_immediate (i + 1) + || !r.require_derived_vector_type (i + 2, i, type)) + return error_mark_node; +@@ -2592,11 +2592,11 @@ struct store_def : public overloaded_base<0> + gcc_assert (r.mode_suffix_id == MODE_none || vnum_p); + + unsigned int i, nargs; +- type_suffix_index type; ++ sve_type type; + if (!r.check_gp_argument (vnum_p ? 3 : 2, i, nargs) + || !r.require_pointer_type (i) + || (vnum_p && !r.require_scalar_type (i + 1, "int64_t")) +- || ((type = r.infer_tuple_type (nargs - 1)) == NUM_TYPE_SUFFIXES)) ++ || !(type = r.infer_tuple_type (nargs - 1))) + return error_mark_node; + + return r.resolve_to (r.mode_suffix_id, type); +@@ -2713,9 +2713,9 @@ struct tbl_tuple_def : public overloaded_base<0> + resolve (function_resolver &r) const OVERRIDE + { + unsigned int i, nargs; +- type_suffix_index type; ++ sve_type type; + if (!r.check_gp_argument (2, i, nargs) +- || (type = r.infer_tuple_type (i)) == NUM_TYPE_SUFFIXES ++ || !(type = r.infer_tuple_type (i)) + || !r.require_derived_vector_type (i + 1, i, type, TYPE_unsigned)) + return error_mark_node; + +diff --git a/gcc/config/aarch64/aarch64-sve-builtins.cc b/gcc/config/aarch64/aarch64-sve-builtins.cc +index 1545fd78d..e98274f8a 100644 +--- a/gcc/config/aarch64/aarch64-sve-builtins.cc ++++ b/gcc/config/aarch64/aarch64-sve-builtins.cc +@@ -659,6 +659,21 @@ find_type_suffix_for_scalar_type (const_tree type) + return NUM_TYPE_SUFFIXES; + } + ++/* Return the implicit group suffix for intrinsics that operate on NVECTORS ++ vectors. */ ++static group_suffix_index ++num_vectors_to_group (unsigned int nvectors) ++{ ++ switch (nvectors) ++ { ++ case 1: return GROUP_none; ++ case 2: return GROUP_x2; ++ case 3: return GROUP_x3; ++ case 4: return GROUP_x4; ++ } ++ gcc_unreachable (); ++} ++ + /* Return the vector type associated with TYPE. */ + static tree + get_vector_type (sve_type type) +@@ -1282,6 +1297,27 @@ function_resolver::lookup_form (mode_suffix_index mode, + return rfn ? rfn->decl : NULL_TREE; + } + ++/* Silently check whether there is an instance of the function that has the ++ mode suffix given by MODE and the type and group suffixes implied by TYPE. ++ If the overloaded function has an explicit first type suffix (like ++ conversions do), TYPE describes the implicit second type suffix. ++ Otherwise, TYPE describes the only type suffix. ++ ++ Return the decl of the function if it exists, otherwise return null. */ ++tree ++function_resolver::lookup_form (mode_suffix_index mode, sve_type type) ++{ ++ type_suffix_index type0 = type_suffix_ids0; ++ type_suffix_index type1 = type_suffix_ids1; ++ (type0 == NUM_TYPE_SUFFIXES ? type0 : type1) = type.type; ++ ++ group_suffix_index group = group_suffix_id; ++ if (group == GROUP_none && type.num_vectors != vectors_per_tuple ()) ++ group = num_vectors_to_group (type.num_vectors); ++ ++ return lookup_form (mode, type0, type1, group); ++} ++ + /* Resolve the function to one with the mode suffix given by MODE, the + type suffixes given by TYPE0 and TYPE1, and group suffix given by + GROUP. Return its function decl on success, otherwise report an +@@ -1305,6 +1341,19 @@ function_resolver::resolve_to (mode_suffix_index mode, + return res; + } + ++/* Resolve the function to one that has the suffixes associated with MODE ++ and TYPE; see lookup_form for how TYPE is interpreted. Return the ++ function decl on success, otherwise report an error and return ++ error_mark_node. */ ++tree ++function_resolver::resolve_to (mode_suffix_index mode, sve_type type) ++{ ++ if (tree res = lookup_form (mode, type)) ++ return res; ++ ++ return report_no_such_form (type); ++} ++ + /* Require argument ARGNO to be a 32-bit or 64-bit scalar integer type. + Return the associated type suffix on success, otherwise report an + error and return NUM_TYPE_SUFFIXES. */ +@@ -1424,21 +1473,20 @@ function_resolver::infer_sve_type (unsigned int argno) + + /* Require argument ARGNO to be a single vector or a tuple of NUM_VECTORS + vectors; NUM_VECTORS is 1 for the former. Return the associated type +- suffix on success, using TYPE_SUFFIX_b for predicates. Report an error +- and return NUM_TYPE_SUFFIXES on failure. */ +-type_suffix_index ++ on success. Report an error on failure. */ ++sve_type + function_resolver::infer_vector_or_tuple_type (unsigned int argno, + unsigned int num_vectors) + { + auto type = infer_sve_type (argno); + if (!type) +- return NUM_TYPE_SUFFIXES; ++ return type; + + if (type.num_vectors == num_vectors) +- return type.type; ++ return type; + + report_incorrect_num_vectors (argno, type, num_vectors); +- return NUM_TYPE_SUFFIXES; ++ return {}; + } + + /* Require argument ARGNO to have some form of vector type. Return the +@@ -1447,7 +1495,9 @@ function_resolver::infer_vector_or_tuple_type (unsigned int argno, + type_suffix_index + function_resolver::infer_vector_type (unsigned int argno) + { +- return infer_vector_or_tuple_type (argno, 1); ++ if (auto type = infer_vector_or_tuple_type (argno, 1)) ++ return type.type; ++ return NUM_TYPE_SUFFIXES; + } + + /* Like infer_vector_type, but also require the type to be integral. */ +@@ -1512,10 +1562,9 @@ function_resolver::infer_sd_vector_type (unsigned int argno) + + /* If the function operates on tuples of vectors, require argument ARGNO to be + a tuple with the appropriate number of vectors, otherwise require it to be +- a single vector. Return the associated type suffix on success, using +- TYPE_SUFFIX_b for predicates. Report an error and return NUM_TYPE_SUFFIXES ++ a single vector. Return the associated type on success. Report an error + on failure. */ +-type_suffix_index ++sve_type + function_resolver::infer_tuple_type (unsigned int argno) + { + return infer_vector_or_tuple_type (argno, vectors_per_tuple ()); +@@ -1567,10 +1616,10 @@ function_resolver::require_vector_type (unsigned int argno, + bool + function_resolver::require_matching_vector_type (unsigned int argno, + unsigned int first_argno, +- type_suffix_index type) ++ sve_type type) + { +- type_suffix_index new_type = infer_vector_type (argno); +- if (new_type == NUM_TYPE_SUFFIXES) ++ sve_type new_type = infer_sve_type (argno); ++ if (!new_type) + return false; + + if (type != new_type) +@@ -1613,15 +1662,13 @@ function_resolver::require_matching_vector_type (unsigned int argno, + bool function_resolver:: + require_derived_vector_type (unsigned int argno, + unsigned int first_argno, +- type_suffix_index first_type, ++ sve_type first_type, + type_class_index expected_tclass, + unsigned int expected_bits) + { + /* If the type needs to match FIRST_ARGNO exactly, use the preferred +- error message for that case. The VECTOR_TYPE_P test excludes tuple +- types, which we handle below instead. */ +- bool both_vectors_p = VECTOR_TYPE_P (get_argument_type (first_argno)); +- if (both_vectors_p ++ error message for that case. */ ++ if (first_type.num_vectors == 1 + && expected_tclass == SAME_TYPE_CLASS + && expected_bits == SAME_SIZE) + { +@@ -1631,17 +1678,18 @@ require_derived_vector_type (unsigned int argno, + } + + /* Use FIRST_TYPE to get the expected type class and element size. */ ++ auto &first_type_suffix = type_suffixesfirst_type.type; + type_class_index orig_expected_tclass = expected_tclass; + if (expected_tclass == NUM_TYPE_CLASSES) +- expected_tclass = type_suffixesfirst_type.tclass; ++ expected_tclass = first_type_suffix.tclass; + + unsigned int orig_expected_bits = expected_bits; + if (expected_bits == SAME_SIZE) +- expected_bits = type_suffixesfirst_type.element_bits; ++ expected_bits = first_type_suffix.element_bits; + else if (expected_bits == HALF_SIZE) +- expected_bits = type_suffixesfirst_type.element_bits / 2; ++ expected_bits = first_type_suffix.element_bits / 2; + else if (expected_bits == QUARTER_SIZE) +- expected_bits = type_suffixesfirst_type.element_bits / 4; ++ expected_bits = first_type_suffix.element_bits / 4; + + /* If the expected type doesn't depend on FIRST_TYPE at all, + just check for the fixed choice of vector type. */ +@@ -1655,13 +1703,14 @@ require_derived_vector_type (unsigned int argno, + + /* Require the argument to be some form of SVE vector type, + without being specific about the type of vector we want. */ +- type_suffix_index actual_type = infer_vector_type (argno); +- if (actual_type == NUM_TYPE_SUFFIXES) ++ sve_type actual_type = infer_vector_type (argno); ++ if (!actual_type) + return false; + + /* Exit now if we got the right type. */ +- bool tclass_ok_p = (type_suffixesactual_type.tclass == expected_tclass); +- bool size_ok_p = (type_suffixesactual_type.element_bits == expected_bits); ++ auto &actual_type_suffix = type_suffixesactual_type.type; ++ bool tclass_ok_p = (actual_type_suffix.tclass == expected_tclass); ++ bool size_ok_p = (actual_type_suffix.element_bits == expected_bits); + if (tclass_ok_p && size_ok_p) + return true; + +@@ -1701,7 +1750,9 @@ require_derived_vector_type (unsigned int argno, + + /* If the arguments have consistent type classes, but a link between + the sizes has been broken, try to describe the error in those terms. */ +- if (both_vectors_p && tclass_ok_p && orig_expected_bits == SAME_SIZE) ++ if (first_type.num_vectors == 1 ++ && tclass_ok_p ++ && orig_expected_bits == SAME_SIZE) + { + if (argno < first_argno) + { +@@ -1718,11 +1769,11 @@ require_derived_vector_type (unsigned int argno, + + /* Likewise in reverse: look for cases in which the sizes are consistent + but a link between the type classes has been broken. */ +- if (both_vectors_p ++ if (first_type.num_vectors == 1 + && size_ok_p + && orig_expected_tclass == SAME_TYPE_CLASS +- && type_suffixesfirst_type.integer_p +- && type_suffixesactual_type.integer_p) ++ && first_type_suffix.integer_p ++ && actual_type_suffix.integer_p) + { + if (argno < first_argno) + { +diff --git a/gcc/config/aarch64/aarch64-sve-builtins.h b/gcc/config/aarch64/aarch64-sve-builtins.h +index f7d6cc084..a7cfff7c1 100644 +--- a/gcc/config/aarch64/aarch64-sve-builtins.h ++++ b/gcc/config/aarch64/aarch64-sve-builtins.h +@@ -458,28 +458,28 @@ public: + type_suffix_index = NUM_TYPE_SUFFIXES, + type_suffix_index = NUM_TYPE_SUFFIXES, + group_suffix_index = GROUP_none); ++ tree lookup_form (mode_suffix_index, sve_type); + tree resolve_to (mode_suffix_index, + type_suffix_index = NUM_TYPE_SUFFIXES, + type_suffix_index = NUM_TYPE_SUFFIXES, + group_suffix_index = GROUP_none); ++ tree resolve_to (mode_suffix_index, sve_type); + + type_suffix_index infer_integer_scalar_type (unsigned int); + type_suffix_index infer_pointer_type (unsigned int, bool = false); + sve_type infer_sve_type (unsigned int); +- type_suffix_index infer_vector_or_tuple_type (unsigned int, unsigned int); ++ sve_type infer_vector_or_tuple_type (unsigned int, unsigned int); + type_suffix_index infer_vector_type (unsigned int); + type_suffix_index infer_integer_vector_type (unsigned int); + type_suffix_index infer_unsigned_vector_type (unsigned int); + type_suffix_index infer_sd_vector_type (unsigned int); +- type_suffix_index infer_tuple_type (unsigned int); ++ sve_type infer_tuple_type (unsigned int); + + bool require_vector_or_scalar_type (unsigned int); + + bool require_vector_type (unsigned int, vector_type_index); +- bool require_matching_vector_type (unsigned int, unsigned int, +- type_suffix_index); +- bool require_derived_vector_type (unsigned int, unsigned int, +- type_suffix_index, ++ bool require_matching_vector_type (unsigned int, unsigned int, sve_type); ++ bool require_derived_vector_type (unsigned int, unsigned int, sve_type, + type_class_index = SAME_TYPE_CLASS, + unsigned int = SAME_SIZE); + +-- +2.33.0 +
View file
_service:tar_scm:0168-LoongArch-Fix-mode-size-comparision-in-loongarch_exp.patch
Added
@@ -0,0 +1,36 @@ +From 7675f45536691eeca7d8163020c9bfb127d5ee4f Mon Sep 17 00:00:00 2001 +From: Xi Ruoyao <xry111@xry111.site> +Date: Wed, 12 Jun 2024 11:01:53 +0800 +Subject: PATCH 168/188 LoongArch: Fix mode size comparision in + loongarch_expand_conditional_move + +We were comparing a mode size with word_mode, but word_mode is an enum +value thus this does not really make any sense. (Un)luckily E_DImode +happens to be 8 so this seemed to work, but let's make it correct so it +won't blow up when we add LA32 support or add another machine mode... + +gcc/ChangeLog: + + * config/loongarch/loongarch.cc + (loongarch_expand_conditional_move): Compare mode size with + UNITS_PER_WORD instead of word_mode. +--- + gcc/config/loongarch/loongarch.cc | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc +index 0c2c38f6f..77f83ab9e 100644 +--- a/gcc/config/loongarch/loongarch.cc ++++ b/gcc/config/loongarch/loongarch.cc +@@ -5349,7 +5349,7 @@ loongarch_expand_conditional_move (rtx *operands) + loongarch_emit_float_compare (&code, &op0, &op1); + else + { +- if (GET_MODE_SIZE (GET_MODE (op0)) < word_mode) ++ if (GET_MODE_SIZE (GET_MODE (op0)) < UNITS_PER_WORD) + { + promote_op0 = (REG_P (op0) && REG_P (operands2) && + REGNO (op0) == REGNO (operands2)); +-- +2.43.0 +
View file
_service:tar_scm:0169-Backport-SME-aarch64-Tweak-error-message-for-tuple-v.patch
Added
@@ -0,0 +1,106 @@ +From 1abb02c636eef4f9a5f55f243bc0c4d38ee1f849 Mon Sep 17 00:00:00 2001 +From: Richard Sandiford <richard.sandiford@arm.com> +Date: Tue, 5 Dec 2023 10:11:22 +0000 +Subject: PATCH 070/157 BackportSME aarch64: Tweak error message for + (tuple,vector) pairs + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=5ce2e22b7e02c7fbd1ab8145b632559b67ae9958 + +SME2 adds more intrinsics that take a tuple of vectors followed +by a single vector, with the two arguments expected to have the +same element type. Unlike with the existing svset* intrinsics, +the size of the tuple is not fixed by the overloaded function name. + +This patch adds an error message that (hopefully) copes better +with that combination. + +gcc/ + * config/aarch64/aarch64-sve-builtins.cc + (function_resolver::require_derived_vector_type): Add a specific + error message for the case in which the caller wants a single + vector whose element type matches a previous tuyple argument. + +gcc/testsuite/ + * gcc.target/aarch64/sve/acle/general-c/set_1.c: Tweak expected + error message. + * gcc.target/aarch64/sve/acle/general-c/set_3.c: Likewise. + * gcc.target/aarch64/sve/acle/general-c/set_5.c: Likewise. +--- + gcc/config/aarch64/aarch64-sve-builtins.cc | 13 +++++++++++++ + .../gcc.target/aarch64/sve/acle/general-c/set_1.c | 4 ++-- + .../gcc.target/aarch64/sve/acle/general-c/set_3.c | 4 ++-- + .../gcc.target/aarch64/sve/acle/general-c/set_5.c | 4 ++-- + 4 files changed, 19 insertions(+), 6 deletions(-) + +diff --git a/gcc/config/aarch64/aarch64-sve-builtins.cc b/gcc/config/aarch64/aarch64-sve-builtins.cc +index e98274f8a..9224916a7 100644 +--- a/gcc/config/aarch64/aarch64-sve-builtins.cc ++++ b/gcc/config/aarch64/aarch64-sve-builtins.cc +@@ -1707,6 +1707,19 @@ require_derived_vector_type (unsigned int argno, + if (!actual_type) + return false; + ++ if (orig_expected_tclass == SAME_TYPE_CLASS ++ && orig_expected_bits == SAME_SIZE) ++ { ++ if (actual_type.type == first_type.type) ++ return true; ++ ++ error_at (location, "passing %qT to argument %d of %qE, but" ++ " argument %d was a tuple of %qT", ++ get_vector_type (actual_type), argno + 1, fndecl, ++ first_argno + 1, get_vector_type (first_type.type)); ++ return false; ++ } ++ + /* Exit now if we got the right type. */ + auto &actual_type_suffix = type_suffixesactual_type.type; + bool tclass_ok_p = (actual_type_suffix.tclass == expected_tclass); +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/set_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/set_1.c +index f07c76102..f2a6da536 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/set_1.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/set_1.c +@@ -16,8 +16,8 @@ f1 (svbool_t pg, svuint8_t u8, svuint8x2_t u8x2, svuint8x3_t u8x3, int x) + u8x2 = svset2 (u8x3, 0, u8); /* { dg-error {passing 'svuint8x3_t' to argument 1 of 'svset2', which expects a tuple of 2 vectors} } */ + u8x2 = svset2 (pg, 0, u8); /* { dg-error {passing 'svbool_t' to argument 1 of 'svset2', which expects a tuple of 2 vectors} } */ + u8x2 = svset2 (u8x2, 0, u8x2); /* { dg-error {passing 'svuint8x2_t' to argument 3 of 'svset2', which expects a single SVE vector rather than a tuple} } */ +- u8x2 = svset2 (u8x2, 0, f64); /* { dg-error {passing 'svfloat64_t' instead of the expected 'svuint8_t' to argument 3 of 'svset2', after passing 'svuint8x2_t' to argument 1} } */ +- u8x2 = svset2 (u8x2, 0, pg); /* { dg-error {passing 'svbool_t' instead of the expected 'svuint8_t' to argument 3 of 'svset2', after passing 'svuint8x2_t' to argument 1} } */ ++ u8x2 = svset2 (u8x2, 0, f64); /* { dg-error {passing 'svfloat64_t' to argument 3 of 'svset2', but argument 1 was a tuple of 'svuint8_t'} } */ ++ u8x2 = svset2 (u8x2, 0, pg); /* { dg-error {passing 'svbool_t' to argument 3 of 'svset2', but argument 1 was a tuple of 'svuint8_t'} } */ + u8x2 = svset2 (u8x2, x, u8); /* { dg-error {argument 2 of 'svset2' must be an integer constant expression} } */ + u8x2 = svset2 (u8x2, 0, u8); + f64 = svset2 (u8x2, 0, u8); /* { dg-error {incompatible types when assigning to type 'svfloat64_t' from type 'svuint8x2_t'} } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/set_3.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/set_3.c +index 543a1bea8..92b955f83 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/set_3.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/set_3.c +@@ -17,8 +17,8 @@ f1 (svbool_t pg, svfloat16_t f16, svfloat16x3_t f16x3, svfloat16x4_t f16x4, + f16x3 = svset3 (f16x4, 0, f16); /* { dg-error {passing 'svfloat16x4_t' to argument 1 of 'svset3', which expects a tuple of 3 vectors} } */ + f16x3 = svset3 (pg, 0, f16); /* { dg-error {passing 'svbool_t' to argument 1 of 'svset3', which expects a tuple of 3 vectors} } */ + f16x3 = svset3 (f16x3, 0, f16x3); /* { dg-error {passing 'svfloat16x3_t' to argument 3 of 'svset3', which expects a single SVE vector rather than a tuple} } */ +- f16x3 = svset3 (f16x3, 0, f64); /* { dg-error {passing 'svfloat64_t' instead of the expected 'svfloat16_t' to argument 3 of 'svset3', after passing 'svfloat16x3_t' to argument 1} } */ +- f16x3 = svset3 (f16x3, 0, pg); /* { dg-error {passing 'svbool_t' instead of the expected 'svfloat16_t' to argument 3 of 'svset3', after passing 'svfloat16x3_t' to argument 1} } */ ++ f16x3 = svset3 (f16x3, 0, f64); /* { dg-error {passing 'svfloat64_t' to argument 3 of 'svset3', but argument 1 was a tuple of 'svfloat16_t'} } */ ++ f16x3 = svset3 (f16x3, 0, pg); /* { dg-error {passing 'svbool_t' to argument 3 of 'svset3', but argument 1 was a tuple of 'svfloat16_t'} } */ + f16x3 = svset3 (f16x3, x, f16); /* { dg-error {argument 2 of 'svset3' must be an integer constant expression} } */ + f16x3 = svset3 (f16x3, 0, f16); + f64 = svset3 (f16x3, 0, f16); /* { dg-error {incompatible types when assigning to type 'svfloat64_t' from type 'svfloat16x3_t'} } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/set_5.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/set_5.c +index be911a731..f0696fb07 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/set_5.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/set_5.c +@@ -16,8 +16,8 @@ f1 (svbool_t pg, svint32_t s32, svint32x4_t s32x4, svint32x2_t s32x2, int x) + s32x4 = svset4 (s32x2, 0, s32); /* { dg-error {passing 'svint32x2_t' to argument 1 of 'svset4', which expects a tuple of 4 vectors} } */ + s32x4 = svset4 (pg, 0, s32); /* { dg-error {passing 'svbool_t' to argument 1 of 'svset4', which expects a tuple of 4 vectors} } */ + s32x4 = svset4 (s32x4, 0, s32x4); /* { dg-error {passing 'svint32x4_t' to argument 3 of 'svset4', which expects a single SVE vector rather than a tuple} } */ +- s32x4 = svset4 (s32x4, 0, f64); /* { dg-error {passing 'svfloat64_t' instead of the expected 'svint32_t' to argument 3 of 'svset4', after passing 'svint32x4_t' to argument 1} } */ +- s32x4 = svset4 (s32x4, 0, pg); /* { dg-error {passing 'svbool_t' instead of the expected 'svint32_t' to argument 3 of 'svset4', after passing 'svint32x4_t' to argument 1} } */ ++ s32x4 = svset4 (s32x4, 0, f64); /* { dg-error {passing 'svfloat64_t' to argument 3 of 'svset4', but argument 1 was a tuple of 'svint32_t'} } */ ++ s32x4 = svset4 (s32x4, 0, pg); /* { dg-error {passing 'svbool_t' to argument 3 of 'svset4', but argument 1 was a tuple of 'svint32_t'} } */ + s32x4 = svset4 (s32x4, x, s32); /* { dg-error {argument 2 of 'svset4' must be an integer constant expression} } */ + s32x4 = svset4 (s32x4, 0, s32); + f64 = svset4 (s32x4, 0, s32); /* { dg-error {incompatible types when assigning to type 'svfloat64_t' from type 'svint32x4_t'} } */ +-- +2.33.0 +
View file
_service:tar_scm:0169-LoongArch-Use-bstrins-for-value-1u-const.patch
Added
@@ -0,0 +1,135 @@ +From 7e34bede110bfa7b2f91dc657c41ed0e7b4b11f7 Mon Sep 17 00:00:00 2001 +From: Xi Ruoyao <xry111@xry111.site> +Date: Sun, 9 Jun 2024 14:43:48 +0800 +Subject: PATCH 169/188 LoongArch: Use bstrins for "value & (-1u << const)" + +A move/bstrins pair is as fast as a (addi.w|lu12i.w|lu32i.d|lu52i.d)/and +pair, and twice fast as a srli/slli pair. When the src reg and the dst +reg happens to be the same, the move instruction can be optimized away. + +gcc/ChangeLog: + + * config/loongarch/predicates.md (high_bitmask_operand): New + predicate. + * config/loongarch/constraints.md (Yy): New constriant. + * config/loongarch/loongarch.md (and<mode>3_align): New + define_insn_and_split. + +gcc/testsuite/ChangeLog: + + * gcc.target/loongarch/bstrins-1.c: New test. + * gcc.target/loongarch/bstrins-2.c: New test. +--- + gcc/config/loongarch/constraints.md | 5 +++++ + gcc/config/loongarch/loongarch.md | 17 +++++++++++++++++ + gcc/config/loongarch/predicates.md | 4 ++++ + gcc/testsuite/gcc.target/loongarch/bstrins-1.c | 9 +++++++++ + gcc/testsuite/gcc.target/loongarch/bstrins-2.c | 14 ++++++++++++++ + 5 files changed, 49 insertions(+) + create mode 100644 gcc/testsuite/gcc.target/loongarch/bstrins-1.c + create mode 100644 gcc/testsuite/gcc.target/loongarch/bstrins-2.c + +diff --git a/gcc/config/loongarch/constraints.md b/gcc/config/loongarch/constraints.md +index cec5d8857..f3f5776da 100644 +--- a/gcc/config/loongarch/constraints.md ++++ b/gcc/config/loongarch/constraints.md +@@ -94,6 +94,7 @@ + ;; "A constant @code{move_operand} that can be safely loaded using + ;; @code{la}." + ;; "Yx" ++;; "Yy" + ;; "Z" - + ;; "ZC" + ;; "A memory operand whose address is formed by a base register and offset +@@ -291,6 +292,10 @@ + "@internal" + (match_operand 0 "low_bitmask_operand")) + ++(define_constraint "Yy" ++ "@internal" ++ (match_operand 0 "high_bitmask_operand")) ++ + (define_constraint "YI" + "@internal + A replicated vector const in which the replicated value is in the range +diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md +index 20494ce8a..55a759850 100644 +--- a/gcc/config/loongarch/loongarch.md ++++ b/gcc/config/loongarch/loongarch.md +@@ -1542,6 +1542,23 @@ + (set_attr "move_type" "pick_ins") + (set_attr "mode" "<MODE>")) + ++(define_insn_and_split "and<mode>3_align" ++ (set (match_operand:GPR 0 "register_operand" "=r") ++ (and:GPR (match_operand:GPR 1 "register_operand" "r") ++ (match_operand:GPR 2 "high_bitmask_operand" "Yy"))) ++ "" ++ "#" ++ "" ++ (set (match_dup 0) (match_dup 1)) ++ (set (zero_extract:GPR (match_dup 0) (match_dup 2) (const_int 0)) ++ (const_int 0)) ++{ ++ int len; ++ ++ len = low_bitmask_len (<MODE>mode, ~INTVAL (operands2)); ++ operands2 = GEN_INT (len); ++}) ++ + (define_insn_and_split "*bstrins_<mode>_for_mask" + (set (match_operand:GPR 0 "register_operand" "=r") + (and:GPR (match_operand:GPR 1 "register_operand" "r") +diff --git a/gcc/config/loongarch/predicates.md b/gcc/config/loongarch/predicates.md +index 1d9a30695..95be8a4fe 100644 +--- a/gcc/config/loongarch/predicates.md ++++ b/gcc/config/loongarch/predicates.md +@@ -293,6 +293,10 @@ + (and (match_code "const_int") + (match_test "low_bitmask_len (mode, INTVAL (op)) > 12"))) + ++(define_predicate "high_bitmask_operand" ++ (and (match_code "const_int") ++ (match_test "low_bitmask_len (mode, ~INTVAL (op)) > 0"))) ++ + (define_predicate "d_operand" + (and (match_code "reg") + (match_test "GP_REG_P (REGNO (op))"))) +diff --git a/gcc/testsuite/gcc.target/loongarch/bstrins-1.c b/gcc/testsuite/gcc.target/loongarch/bstrins-1.c +new file mode 100644 +index 000000000..7cb3a9523 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/bstrins-1.c +@@ -0,0 +1,9 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -march=loongarch64 -mabi=lp64d" } */ ++/* { dg-final { scan-assembler "bstrins\\.d\t\\\$r4,\\\$r0,4,0" } } */ ++ ++long ++x (long a) ++{ ++ return a & -32; ++} +diff --git a/gcc/testsuite/gcc.target/loongarch/bstrins-2.c b/gcc/testsuite/gcc.target/loongarch/bstrins-2.c +new file mode 100644 +index 000000000..9777f502e +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/bstrins-2.c +@@ -0,0 +1,14 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -march=loongarch64 -mabi=lp64d" } */ ++/* { dg-final { scan-assembler "bstrins\\.d\t\\\$r\0-9\+,\\\$r0,4,0" } } */ ++ ++struct aligned_buffer { ++ _Alignas(32) char x1024; ++}; ++ ++extern int f(char *); ++int g(void) ++{ ++ struct aligned_buffer buf; ++ return f(buf.x); ++} +-- +2.43.0 +
View file
_service:tar_scm:0170-Backport-SME-aarch64-Add-tuple-forms-of-svreinterpre.patch
Added
@@ -0,0 +1,1236 @@ +From 95234ef07c47dda7ac6a13f75619580a6683118c Mon Sep 17 00:00:00 2001 +From: Richard Sandiford <richard.sandiford@arm.com> +Date: Tue, 5 Dec 2023 10:11:22 +0000 +Subject: PATCH 071/157 BackportSME aarch64: Add tuple forms of + svreinterpret + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=1ce9dc263c2f6d455b2013fc58932beda2a4ae92 + +SME2 adds a number of intrinsics that operate on tuples of 2 and 4 +vectors. The ACLE therefore extends the existing svreinterpret +intrinsics to handle tuples as well. + +gcc/ + * config/aarch64/aarch64-sve-builtins-base.cc + (svreinterpret_impl::fold): Punt on tuple forms. + (svreinterpret_impl::expand): Use tuple_mode instead of vector_mode. + * config/aarch64/aarch64-sve-builtins-base.def (svreinterpret): + Extend to x1234 groups. + * config/aarch64/aarch64-sve-builtins-functions.h + (multi_vector_function::vectors_per_tuple): If the function has + a group suffix, get the number of vectors from there. + * config/aarch64/aarch64-sve-builtins-shapes.h (reinterpret): Declare. + * config/aarch64/aarch64-sve-builtins-shapes.cc (reinterpret_def) + (reinterpret): New function shape. + * config/aarch64/aarch64-sve-builtins.cc (function_groups): Handle + DEF_SVE_FUNCTION_GS. + * config/aarch64/aarch64-sve-builtins.def (DEF_SVE_FUNCTION_GS): New + macro. + (DEF_SVE_FUNCTION): Forward to DEF_SVE_FUNCTION_GS by default. + * config/aarch64/aarch64-sve-builtins.h + (function_instance::tuple_mode): New member function. + (function_base::vectors_per_tuple): Take the function instance + as argument and get the number from the group suffix. + (function_instance::vectors_per_tuple): Update accordingly. + * config/aarch64/iterators.md (SVE_FULLx2, SVE_FULLx3, SVE_FULLx4) + (SVE_ALL_STRUCT): New mode iterators. + (SVE_STRUCT): Redefine in terms of SVE_FULL*. + * config/aarch64/aarch64-sve.md (@aarch64_sve_reinterpret<mode>) + (*aarch64_sve_reinterpret<mode>): Extend to SVE structure modes. + +gcc/testsuite/ + * gcc.target/aarch64/sve/acle/asm/test_sve_acle.h (TEST_DUAL_XN): + New macro. + * gcc.target/aarch64/sve/acle/asm/reinterpret_bf16.c: Add tests for + tuple forms. + * gcc.target/aarch64/sve/acle/asm/reinterpret_f16.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/reinterpret_f32.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/reinterpret_f64.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/reinterpret_s16.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/reinterpret_s32.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/reinterpret_s64.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/reinterpret_s8.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/reinterpret_u16.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/reinterpret_u32.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/reinterpret_u64.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/reinterpret_u8.c: Likewise. +--- + .../aarch64/aarch64-sve-builtins-base.cc | 5 +- + .../aarch64/aarch64-sve-builtins-base.def | 2 +- + .../aarch64/aarch64-sve-builtins-functions.h | 7 ++- + .../aarch64/aarch64-sve-builtins-shapes.cc | 28 +++++++++ + .../aarch64/aarch64-sve-builtins-shapes.h | 1 + + gcc/config/aarch64/aarch64-sve-builtins.cc | 8 ++- + gcc/config/aarch64/aarch64-sve-builtins.def | 8 ++- + gcc/config/aarch64/aarch64-sve-builtins.h | 20 +++++- + gcc/config/aarch64/aarch64-sve.md | 8 +-- + gcc/config/aarch64/iterators.md | 26 +++++--- + .../aarch64/sve/acle/asm/reinterpret_bf16.c | 62 +++++++++++++++++++ + .../aarch64/sve/acle/asm/reinterpret_f16.c | 62 +++++++++++++++++++ + .../aarch64/sve/acle/asm/reinterpret_f32.c | 62 +++++++++++++++++++ + .../aarch64/sve/acle/asm/reinterpret_f64.c | 62 +++++++++++++++++++ + .../aarch64/sve/acle/asm/reinterpret_s16.c | 62 +++++++++++++++++++ + .../aarch64/sve/acle/asm/reinterpret_s32.c | 62 +++++++++++++++++++ + .../aarch64/sve/acle/asm/reinterpret_s64.c | 62 +++++++++++++++++++ + .../aarch64/sve/acle/asm/reinterpret_s8.c | 62 +++++++++++++++++++ + .../aarch64/sve/acle/asm/reinterpret_u16.c | 62 +++++++++++++++++++ + .../aarch64/sve/acle/asm/reinterpret_u32.c | 62 +++++++++++++++++++ + .../aarch64/sve/acle/asm/reinterpret_u64.c | 62 +++++++++++++++++++ + .../aarch64/sve/acle/asm/reinterpret_u8.c | 62 +++++++++++++++++++ + .../aarch64/sve/acle/asm/test_sve_acle.h | 14 +++++ + 23 files changed, 851 insertions(+), 20 deletions(-) + +diff --git a/gcc/config/aarch64/aarch64-sve-builtins-base.cc b/gcc/config/aarch64/aarch64-sve-builtins-base.cc +index c9bf13792..53f3f28f9 100644 +--- a/gcc/config/aarch64/aarch64-sve-builtins-base.cc ++++ b/gcc/config/aarch64/aarch64-sve-builtins-base.cc +@@ -1928,6 +1928,9 @@ public: + gimple * + fold (gimple_folder &f) const OVERRIDE + { ++ if (f.vectors_per_tuple () > 1) ++ return NULL; ++ + /* Punt to rtl if the effect of the reinterpret on registers does not + conform to GCC's endianness model. */ + if (!targetm.can_change_mode_class (f.vector_mode (0), +@@ -1944,7 +1947,7 @@ public: + rtx + expand (function_expander &e) const OVERRIDE + { +- machine_mode mode = e.vector_mode (0); ++ machine_mode mode = e.tuple_mode (0); + return e.use_exact_insn (code_for_aarch64_sve_reinterpret (mode)); + } + }; +diff --git a/gcc/config/aarch64/aarch64-sve-builtins-base.def b/gcc/config/aarch64/aarch64-sve-builtins-base.def +index 3a58f76c3..756469959 100644 +--- a/gcc/config/aarch64/aarch64-sve-builtins-base.def ++++ b/gcc/config/aarch64/aarch64-sve-builtins-base.def +@@ -248,7 +248,7 @@ DEF_SVE_FUNCTION (svrdffr, rdffr, none, z_or_none) + DEF_SVE_FUNCTION (svrecpe, unary, all_float, none) + DEF_SVE_FUNCTION (svrecps, binary, all_float, none) + DEF_SVE_FUNCTION (svrecpx, unary, all_float, mxz) +-DEF_SVE_FUNCTION (svreinterpret, unary_convert, reinterpret, none) ++DEF_SVE_FUNCTION_GS (svreinterpret, reinterpret, reinterpret, x1234, none) + DEF_SVE_FUNCTION (svrev, unary, all_data, none) + DEF_SVE_FUNCTION (svrev, unary_pred, all_pred, none) + DEF_SVE_FUNCTION (svrevb, unary, hsd_integer, mxz) +diff --git a/gcc/config/aarch64/aarch64-sve-builtins-functions.h b/gcc/config/aarch64/aarch64-sve-builtins-functions.h +index 9d346b6ff..94a6d1207 100644 +--- a/gcc/config/aarch64/aarch64-sve-builtins-functions.h ++++ b/gcc/config/aarch64/aarch64-sve-builtins-functions.h +@@ -59,8 +59,13 @@ public: + : m_vectors_per_tuple (vectors_per_tuple) {} + + unsigned int +- vectors_per_tuple () const OVERRIDE ++ vectors_per_tuple (const function_instance &fi) const override + { ++ if (fi.group_suffix_id != GROUP_none) ++ { ++ gcc_checking_assert (m_vectors_per_tuple == 1); ++ return fi.group_suffix ().vectors_per_tuple; ++ } + return m_vectors_per_tuple; + } + +diff --git a/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc b/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc +index f187b4cb2..95e40d8f3 100644 +--- a/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc ++++ b/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc +@@ -2400,6 +2400,34 @@ struct reduction_wide_def : public overloaded_base<0> + }; + SHAPE (reduction_wide) + ++/* sv<t0>x<g>_t svfoo_t0_t1_g(sv<t1>x<g>_t) ++ ++ where the target type <t0> must be specified explicitly but the source ++ type <t1> can be inferred. */ ++struct reinterpret_def : public overloaded_base<1> ++{ ++ bool explicit_group_suffix_p () const override { return false; } ++ ++ void ++ build (function_builder &b, const function_group_info &group) const override ++ { ++ b.add_overloaded_functions (group, MODE_none); ++ build_all (b, "t0,t1", group, MODE_none); ++ } ++ ++ tree ++ resolve (function_resolver &r) const override ++ { ++ sve_type type; ++ if (!r.check_num_arguments (1) ++ || !(type = r.infer_sve_type (0))) ++ return error_mark_node; ++ ++ return r.resolve_to (r.mode_suffix_id, type); ++ } ++}; ++SHAPE (reinterpret) ++ + /* sv<t0>xN_t svfoo_t0(sv<t0>xN_t, uint64_t, sv<t0>_t) + + where the second argument is an integer constant expression in the +diff --git a/gcc/config/aarch64/aarch64-sve-builtins-shapes.h b/gcc/config/aarch64/aarch64-sve-builtins-shapes.h +index 3b0025f85..2b06152d4 100644 +--- a/gcc/config/aarch64/aarch64-sve-builtins-shapes.h ++++ b/gcc/config/aarch64/aarch64-sve-builtins-shapes.h +@@ -133,6 +133,7 @@ namespace aarch64_sve + extern const function_shape *const rdffr; + extern const function_shape *const reduction; + extern const function_shape *const reduction_wide; ++ extern const function_shape *const reinterpret; + extern const function_shape *const set; + extern const function_shape *const setffr; + extern const function_shape *const shift_left_imm_long; +diff --git a/gcc/config/aarch64/aarch64-sve-builtins.cc b/gcc/config/aarch64/aarch64-sve-builtins.cc +index 9224916a7..c439f2e8a 100644 +--- a/gcc/config/aarch64/aarch64-sve-builtins.cc ++++ b/gcc/config/aarch64/aarch64-sve-builtins.cc +@@ -494,6 +494,10 @@ static const group_suffix_index groups_none = { + GROUP_none, NUM_GROUP_SUFFIXES + }; + ++static const group_suffix_index groups_x1234 = { ++ GROUP_none, GROUP_x2, GROUP_x3, GROUP_x4, NUM_GROUP_SUFFIXES ++}; ++ + /* Used by functions that have no governing predicate. */ + static const predication_index preds_none = { PRED_none, NUM_PREDS }; + +@@ -534,8 +538,8 @@ static const predication_index preds_z = { PRED_z, NUM_PREDS }; + + /* A list of all SVE ACLE functions. */ + static CONSTEXPR const function_group_info function_groups = { +-#define DEF_SVE_FUNCTION(NAME, SHAPE, TYPES, PREDS) \ +- { #NAME, &functions::NAME, &shapes::SHAPE, types_##TYPES, groups_none, \ ++#define DEF_SVE_FUNCTION_GS(NAME, SHAPE, TYPES, GROUPS, PREDS) \ ++ { #NAME, &functions::NAME, &shapes::SHAPE, types_##TYPES, groups_##GROUPS, \ + preds_##PREDS, REQUIRED_EXTENSIONS }, + #include "aarch64-sve-builtins.def" + }; +diff --git a/gcc/config/aarch64/aarch64-sve-builtins.def b/gcc/config/aarch64/aarch64-sve-builtins.def +index d9bf9c350..be10b5ea1 100644 +--- a/gcc/config/aarch64/aarch64-sve-builtins.def ++++ b/gcc/config/aarch64/aarch64-sve-builtins.def +@@ -33,8 +33,13 @@ + #define DEF_SVE_GROUP_SUFFIX(A, B, C) + #endif + ++#ifndef DEF_SVE_FUNCTION_GS ++#define DEF_SVE_FUNCTION_GS(A, B, C, D, E) ++#endif ++ + #ifndef DEF_SVE_FUNCTION +-#define DEF_SVE_FUNCTION(A, B, C, D) ++#define DEF_SVE_FUNCTION(NAME, SHAPE, TYPES, PREDS) \ ++ DEF_SVE_FUNCTION_GS (NAME, SHAPE, TYPES, none, PREDS) + #endif + + DEF_SVE_MODE (n, none, none, none) +@@ -107,6 +112,7 @@ DEF_SVE_GROUP_SUFFIX (x4, 0, 4) + #include "aarch64-sve-builtins-sve2.def" + + #undef DEF_SVE_FUNCTION ++#undef DEF_SVE_FUNCTION_GS + #undef DEF_SVE_GROUP_SUFFIX + #undef DEF_SVE_TYPE_SUFFIX + #undef DEF_SVE_TYPE +diff --git a/gcc/config/aarch64/aarch64-sve-builtins.h b/gcc/config/aarch64/aarch64-sve-builtins.h +index a7cfff7c1..7132b6e77 100644 +--- a/gcc/config/aarch64/aarch64-sve-builtins.h ++++ b/gcc/config/aarch64/aarch64-sve-builtins.h +@@ -364,6 +364,7 @@ public: + tree tuple_type (unsigned int) const; + unsigned int elements_per_vq (unsigned int i) const; + machine_mode vector_mode (unsigned int) const; ++ machine_mode tuple_mode (unsigned int) const; + machine_mode gp_mode (unsigned int) const; + + /* The properties of the function. */ +@@ -664,7 +665,7 @@ public: + + /* If the function operates on tuples of vectors, return the number + of vectors in the tuples, otherwise return 1. */ +- virtual unsigned int vectors_per_tuple () const { return 1; } ++ virtual unsigned int vectors_per_tuple (const function_instance &) const; + + /* If the function addresses memory, return the type of a single + scalar memory element. */ +@@ -836,7 +837,7 @@ function_instance::operator!= (const function_instance &other) const + inline unsigned int + function_instance::vectors_per_tuple () const + { +- return base->vectors_per_tuple (); ++ return base->vectors_per_tuple (*this); + } + + /* If the function addresses memory, return the type of a single +@@ -940,6 +941,15 @@ function_instance::vector_mode (unsigned int i) const + return type_suffix (i).vector_mode; + } + ++/* Return the mode of tuple_type (I). */ ++inline machine_mode ++function_instance::tuple_mode (unsigned int i) const ++{ ++ if (group_suffix ().vectors_per_tuple > 1) ++ return TYPE_MODE (tuple_type (i)); ++ return vector_mode (i); ++} ++ + /* Return the mode of the governing predicate to use when operating on + type suffix I. */ + inline machine_mode +@@ -966,6 +976,12 @@ function_base::call_properties (const function_instance &instance) const + return flags; + } + ++inline unsigned int ++function_base::vectors_per_tuple (const function_instance &instance) const ++{ ++ return instance.group_suffix ().vectors_per_tuple; ++} ++ + /* Return the mode of the result of a call. */ + inline machine_mode + function_expander::result_mode () const +diff --git a/gcc/config/aarch64/aarch64-sve.md b/gcc/config/aarch64/aarch64-sve.md +index b8cc47ef5..28b73d807 100644 +--- a/gcc/config/aarch64/aarch64-sve.md ++++ b/gcc/config/aarch64/aarch64-sve.md +@@ -784,8 +784,8 @@ + ;; This is equivalent to a subreg on little-endian targets but not for + ;; big-endian; see the comment at the head of the file for details. + (define_expand "@aarch64_sve_reinterpret<mode>" +- (set (match_operand:SVE_ALL 0 "register_operand") +- (unspec:SVE_ALL ++ (set (match_operand:SVE_ALL_STRUCT 0 "register_operand") ++ (unspec:SVE_ALL_STRUCT + (match_operand 1 "aarch64_any_register_operand") + UNSPEC_REINTERPRET)) + "TARGET_SVE" +@@ -802,8 +802,8 @@ + ;; A pattern for handling type punning on big-endian targets. We use a + ;; special predicate for operand 1 to reduce the number of patterns. + (define_insn_and_split "*aarch64_sve_reinterpret<mode>" +- (set (match_operand:SVE_ALL 0 "register_operand" "=w") +- (unspec:SVE_ALL ++ (set (match_operand:SVE_ALL_STRUCT 0 "register_operand" "=w") ++ (unspec:SVE_ALL_STRUCT + (match_operand 1 "aarch64_any_register_operand" "w") + UNSPEC_REINTERPRET)) + "TARGET_SVE" +diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md +index a8a39b65a..8dd2035bc 100644 +--- a/gcc/config/aarch64/iterators.md ++++ b/gcc/config/aarch64/iterators.md +@@ -451,14 +451,6 @@ + (define_mode_iterator VNx2DI_ONLY VNx2DI) + (define_mode_iterator VNx2DF_ONLY VNx2DF) + +-;; All SVE vector structure modes. +-(define_mode_iterator SVE_STRUCT VNx32QI VNx16HI VNx8SI VNx4DI +- VNx16BF VNx16HF VNx8SF VNx4DF +- VNx48QI VNx24HI VNx12SI VNx6DI +- VNx24BF VNx24HF VNx12SF VNx6DF +- VNx64QI VNx32HI VNx16SI VNx8DI +- VNx32BF VNx32HF VNx16SF VNx8DF) +- + ;; All fully-packed SVE vector modes. + (define_mode_iterator SVE_FULL VNx16QI VNx8HI VNx4SI VNx2DI + VNx8BF VNx8HF VNx4SF VNx2DF) +@@ -530,6 +522,24 @@ + VNx2DI + VNx2DF) + ++;; All SVE 2-vector modes. ++(define_mode_iterator SVE_FULLx2 VNx32QI VNx16HI VNx8SI VNx4DI ++ VNx16BF VNx16HF VNx8SF VNx4DF) ++ ++;; All SVE 3-vector modes. ++(define_mode_iterator SVE_FULLx3 VNx48QI VNx24HI VNx12SI VNx6DI ++ VNx24BF VNx24HF VNx12SF VNx6DF) ++ ++;; All SVE 4-vector modes. ++(define_mode_iterator SVE_FULLx4 VNx64QI VNx32HI VNx16SI VNx8DI ++ VNx32BF VNx32HF VNx16SF VNx8DF) ++ ++;; All SVE vector structure modes. ++(define_mode_iterator SVE_STRUCT SVE_FULLx2 SVE_FULLx3 SVE_FULLx4) ++ ++;; All SVE vector and structure modes. ++(define_mode_iterator SVE_ALL_STRUCT SVE_ALL SVE_STRUCT) ++ + ;; All SVE integer vector modes. + (define_mode_iterator SVE_I VNx16QI VNx8QI VNx4QI VNx2QI + VNx8HI VNx4HI VNx2HI +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/reinterpret_bf16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/reinterpret_bf16.c +index 2d2c2a714..dd0daf2ef 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/reinterpret_bf16.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/reinterpret_bf16.c +@@ -205,3 +205,65 @@ TEST_DUAL_Z_REV (reinterpret_bf16_u64_tied1, svbfloat16_t, svuint64_t, + TEST_DUAL_Z (reinterpret_bf16_u64_untied, svbfloat16_t, svuint64_t, + z0 = svreinterpret_bf16_u64 (z4), + z0 = svreinterpret_bf16 (z4)) ++ ++/* ++** reinterpret_bf16_bf16_x2_tied1: ++** ret ++*/ ++TEST_DUAL_Z_REV (reinterpret_bf16_bf16_x2_tied1, svbfloat16x2_t, svbfloat16x2_t, ++ z0_res = svreinterpret_bf16_bf16_x2 (z0), ++ z0_res = svreinterpret_bf16 (z0)) ++ ++/* ++** reinterpret_bf16_f32_x2_untied: ++** ( ++** mov z0\.d, z4\.d ++** mov z1\.d, z5\.d ++** | ++** mov z0\.d, z4\.d ++** mov z1\.d, z5\.d ++** ) ++** ret ++*/ ++TEST_DUAL_XN (reinterpret_bf16_f32_x2_untied, svbfloat16x2_t, svfloat32x2_t, z0, ++ svreinterpret_bf16_f32_x2 (z4), ++ svreinterpret_bf16 (z4)) ++ ++/* ++** reinterpret_bf16_s64_x3_tied1: ++** ret ++*/ ++TEST_DUAL_Z_REV (reinterpret_bf16_s64_x3_tied1, svbfloat16x3_t, svint64x3_t, ++ z0_res = svreinterpret_bf16_s64_x3 (z0), ++ z0_res = svreinterpret_bf16 (z0)) ++ ++/* ++** reinterpret_bf16_u8_x3_untied: ++** mov (z18|z19|z20)\.d, (z23|z24|z25)\.d ++** mov (z18|z19|z20)\.d, (z23|z24|z25)\.d ++** mov (z18|z19|z20)\.d, (z23|z24|z25)\.d ++** ret ++*/ ++TEST_DUAL_XN (reinterpret_bf16_u8_x3_untied, svbfloat16x3_t, svuint8x3_t, z18, ++ svreinterpret_bf16_u8_x3 (z23), ++ svreinterpret_bf16 (z23)) ++ ++/* ++** reinterpret_bf16_u32_x4_tied1: ++** ret ++*/ ++TEST_DUAL_Z_REV (reinterpret_bf16_u32_x4_tied1, svbfloat16x4_t, svuint32x4_t, ++ z0_res = svreinterpret_bf16_u32_x4 (z0), ++ z0_res = svreinterpret_bf16 (z0)) ++ ++/* ++** reinterpret_bf16_f64_x4_untied: ++** mov (z28|z29|z30|z31)\.d, z4-7\.d ++** mov (z28|z29|z30|z31)\.d, z4-7\.d ++** mov (z28|z29|z30|z31)\.d, z4-7\.d ++** mov (z28|z29|z30|z31)\.d, z4-7\.d ++** ret ++*/ ++TEST_DUAL_XN (reinterpret_bf16_f64_x4_untied, svbfloat16x4_t, svfloat64x4_t, z28, ++ svreinterpret_bf16_f64_x4 (z4), ++ svreinterpret_bf16 (z4)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/reinterpret_f16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/reinterpret_f16.c +index 60705e628..9b6f8227d 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/reinterpret_f16.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/reinterpret_f16.c +@@ -205,3 +205,65 @@ TEST_DUAL_Z_REV (reinterpret_f16_u64_tied1, svfloat16_t, svuint64_t, + TEST_DUAL_Z (reinterpret_f16_u64_untied, svfloat16_t, svuint64_t, + z0 = svreinterpret_f16_u64 (z4), + z0 = svreinterpret_f16 (z4)) ++ ++/* ++** reinterpret_f16_bf16_x2_tied1: ++** ret ++*/ ++TEST_DUAL_Z_REV (reinterpret_f16_bf16_x2_tied1, svfloat16x2_t, svbfloat16x2_t, ++ z0_res = svreinterpret_f16_bf16_x2 (z0), ++ z0_res = svreinterpret_f16 (z0)) ++ ++/* ++** reinterpret_f16_f32_x2_untied: ++** ( ++** mov z0\.d, z4\.d ++** mov z1\.d, z5\.d ++** | ++** mov z0\.d, z4\.d ++** mov z1\.d, z5\.d ++** ) ++** ret ++*/ ++TEST_DUAL_XN (reinterpret_f16_f32_x2_untied, svfloat16x2_t, svfloat32x2_t, z0, ++ svreinterpret_f16_f32_x2 (z4), ++ svreinterpret_f16 (z4)) ++ ++/* ++** reinterpret_f16_s64_x3_tied1: ++** ret ++*/ ++TEST_DUAL_Z_REV (reinterpret_f16_s64_x3_tied1, svfloat16x3_t, svint64x3_t, ++ z0_res = svreinterpret_f16_s64_x3 (z0), ++ z0_res = svreinterpret_f16 (z0)) ++ ++/* ++** reinterpret_f16_u8_x3_untied: ++** mov (z18|z19|z20)\.d, (z23|z24|z25)\.d ++** mov (z18|z19|z20)\.d, (z23|z24|z25)\.d ++** mov (z18|z19|z20)\.d, (z23|z24|z25)\.d ++** ret ++*/ ++TEST_DUAL_XN (reinterpret_f16_u8_x3_untied, svfloat16x3_t, svuint8x3_t, z18, ++ svreinterpret_f16_u8_x3 (z23), ++ svreinterpret_f16 (z23)) ++ ++/* ++** reinterpret_f16_u32_x4_tied1: ++** ret ++*/ ++TEST_DUAL_Z_REV (reinterpret_f16_u32_x4_tied1, svfloat16x4_t, svuint32x4_t, ++ z0_res = svreinterpret_f16_u32_x4 (z0), ++ z0_res = svreinterpret_f16 (z0)) ++ ++/* ++** reinterpret_f16_f64_x4_untied: ++** mov (z28|z29|z30|z31)\.d, z4-7\.d ++** mov (z28|z29|z30|z31)\.d, z4-7\.d ++** mov (z28|z29|z30|z31)\.d, z4-7\.d ++** mov (z28|z29|z30|z31)\.d, z4-7\.d ++** ret ++*/ ++TEST_DUAL_XN (reinterpret_f16_f64_x4_untied, svfloat16x4_t, svfloat64x4_t, z28, ++ svreinterpret_f16_f64_x4 (z4), ++ svreinterpret_f16 (z4)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/reinterpret_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/reinterpret_f32.c +index 06fc46f25..ce981fce9 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/reinterpret_f32.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/reinterpret_f32.c +@@ -205,3 +205,65 @@ TEST_DUAL_Z_REV (reinterpret_f32_u64_tied1, svfloat32_t, svuint64_t, + TEST_DUAL_Z (reinterpret_f32_u64_untied, svfloat32_t, svuint64_t, + z0 = svreinterpret_f32_u64 (z4), + z0 = svreinterpret_f32 (z4)) ++ ++/* ++** reinterpret_f32_bf16_x2_tied1: ++** ret ++*/ ++TEST_DUAL_Z_REV (reinterpret_f32_bf16_x2_tied1, svfloat32x2_t, svbfloat16x2_t, ++ z0_res = svreinterpret_f32_bf16_x2 (z0), ++ z0_res = svreinterpret_f32 (z0)) ++ ++/* ++** reinterpret_f32_f32_x2_untied: ++** ( ++** mov z0\.d, z4\.d ++** mov z1\.d, z5\.d ++** | ++** mov z0\.d, z4\.d ++** mov z1\.d, z5\.d ++** ) ++** ret ++*/ ++TEST_DUAL_XN (reinterpret_f32_f32_x2_untied, svfloat32x2_t, svfloat32x2_t, z0, ++ svreinterpret_f32_f32_x2 (z4), ++ svreinterpret_f32 (z4)) ++ ++/* ++** reinterpret_f32_s64_x3_tied1: ++** ret ++*/ ++TEST_DUAL_Z_REV (reinterpret_f32_s64_x3_tied1, svfloat32x3_t, svint64x3_t, ++ z0_res = svreinterpret_f32_s64_x3 (z0), ++ z0_res = svreinterpret_f32 (z0)) ++ ++/* ++** reinterpret_f32_u8_x3_untied: ++** mov (z18|z19|z20)\.d, (z23|z24|z25)\.d ++** mov (z18|z19|z20)\.d, (z23|z24|z25)\.d ++** mov (z18|z19|z20)\.d, (z23|z24|z25)\.d ++** ret ++*/ ++TEST_DUAL_XN (reinterpret_f32_u8_x3_untied, svfloat32x3_t, svuint8x3_t, z18, ++ svreinterpret_f32_u8_x3 (z23), ++ svreinterpret_f32 (z23)) ++ ++/* ++** reinterpret_f32_u32_x4_tied1: ++** ret ++*/ ++TEST_DUAL_Z_REV (reinterpret_f32_u32_x4_tied1, svfloat32x4_t, svuint32x4_t, ++ z0_res = svreinterpret_f32_u32_x4 (z0), ++ z0_res = svreinterpret_f32 (z0)) ++ ++/* ++** reinterpret_f32_f64_x4_untied: ++** mov (z28|z29|z30|z31)\.d, z4-7\.d ++** mov (z28|z29|z30|z31)\.d, z4-7\.d ++** mov (z28|z29|z30|z31)\.d, z4-7\.d ++** mov (z28|z29|z30|z31)\.d, z4-7\.d ++** ret ++*/ ++TEST_DUAL_XN (reinterpret_f32_f64_x4_untied, svfloat32x4_t, svfloat64x4_t, z28, ++ svreinterpret_f32_f64_x4 (z4), ++ svreinterpret_f32 (z4)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/reinterpret_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/reinterpret_f64.c +index 003ee3fe2..4f51824ab 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/reinterpret_f64.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/reinterpret_f64.c +@@ -205,3 +205,65 @@ TEST_DUAL_Z_REV (reinterpret_f64_u64_tied1, svfloat64_t, svuint64_t, + TEST_DUAL_Z (reinterpret_f64_u64_untied, svfloat64_t, svuint64_t, + z0 = svreinterpret_f64_u64 (z4), + z0 = svreinterpret_f64 (z4)) ++ ++/* ++** reinterpret_f64_bf16_x2_tied1: ++** ret ++*/ ++TEST_DUAL_Z_REV (reinterpret_f64_bf16_x2_tied1, svfloat64x2_t, svbfloat16x2_t, ++ z0_res = svreinterpret_f64_bf16_x2 (z0), ++ z0_res = svreinterpret_f64 (z0)) ++ ++/* ++** reinterpret_f64_f32_x2_untied: ++** ( ++** mov z0\.d, z4\.d ++** mov z1\.d, z5\.d ++** | ++** mov z0\.d, z4\.d ++** mov z1\.d, z5\.d ++** ) ++** ret ++*/ ++TEST_DUAL_XN (reinterpret_f64_f32_x2_untied, svfloat64x2_t, svfloat32x2_t, z0, ++ svreinterpret_f64_f32_x2 (z4), ++ svreinterpret_f64 (z4)) ++ ++/* ++** reinterpret_f64_s64_x3_tied1: ++** ret ++*/ ++TEST_DUAL_Z_REV (reinterpret_f64_s64_x3_tied1, svfloat64x3_t, svint64x3_t, ++ z0_res = svreinterpret_f64_s64_x3 (z0), ++ z0_res = svreinterpret_f64 (z0)) ++ ++/* ++** reinterpret_f64_u8_x3_untied: ++** mov (z18|z19|z20)\.d, (z23|z24|z25)\.d ++** mov (z18|z19|z20)\.d, (z23|z24|z25)\.d ++** mov (z18|z19|z20)\.d, (z23|z24|z25)\.d ++** ret ++*/ ++TEST_DUAL_XN (reinterpret_f64_u8_x3_untied, svfloat64x3_t, svuint8x3_t, z18, ++ svreinterpret_f64_u8_x3 (z23), ++ svreinterpret_f64 (z23)) ++ ++/* ++** reinterpret_f64_u32_x4_tied1: ++** ret ++*/ ++TEST_DUAL_Z_REV (reinterpret_f64_u32_x4_tied1, svfloat64x4_t, svuint32x4_t, ++ z0_res = svreinterpret_f64_u32_x4 (z0), ++ z0_res = svreinterpret_f64 (z0)) ++ ++/* ++** reinterpret_f64_f64_x4_untied: ++** mov (z28|z29|z30|z31)\.d, z4-7\.d ++** mov (z28|z29|z30|z31)\.d, z4-7\.d ++** mov (z28|z29|z30|z31)\.d, z4-7\.d ++** mov (z28|z29|z30|z31)\.d, z4-7\.d ++** ret ++*/ ++TEST_DUAL_XN (reinterpret_f64_f64_x4_untied, svfloat64x4_t, svfloat64x4_t, z28, ++ svreinterpret_f64_f64_x4 (z4), ++ svreinterpret_f64 (z4)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/reinterpret_s16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/reinterpret_s16.c +index d62817c2c..7e15f3e9b 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/reinterpret_s16.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/reinterpret_s16.c +@@ -205,3 +205,65 @@ TEST_DUAL_Z_REV (reinterpret_s16_u64_tied1, svint16_t, svuint64_t, + TEST_DUAL_Z (reinterpret_s16_u64_untied, svint16_t, svuint64_t, + z0 = svreinterpret_s16_u64 (z4), + z0 = svreinterpret_s16 (z4)) ++ ++/* ++** reinterpret_s16_bf16_x2_tied1: ++** ret ++*/ ++TEST_DUAL_Z_REV (reinterpret_s16_bf16_x2_tied1, svint16x2_t, svbfloat16x2_t, ++ z0_res = svreinterpret_s16_bf16_x2 (z0), ++ z0_res = svreinterpret_s16 (z0)) ++ ++/* ++** reinterpret_s16_f32_x2_untied: ++** ( ++** mov z0\.d, z4\.d ++** mov z1\.d, z5\.d ++** | ++** mov z0\.d, z4\.d ++** mov z1\.d, z5\.d ++** ) ++** ret ++*/ ++TEST_DUAL_XN (reinterpret_s16_f32_x2_untied, svint16x2_t, svfloat32x2_t, z0, ++ svreinterpret_s16_f32_x2 (z4), ++ svreinterpret_s16 (z4)) ++ ++/* ++** reinterpret_s16_s64_x3_tied1: ++** ret ++*/ ++TEST_DUAL_Z_REV (reinterpret_s16_s64_x3_tied1, svint16x3_t, svint64x3_t, ++ z0_res = svreinterpret_s16_s64_x3 (z0), ++ z0_res = svreinterpret_s16 (z0)) ++ ++/* ++** reinterpret_s16_u8_x3_untied: ++** mov (z18|z19|z20)\.d, (z23|z24|z25)\.d ++** mov (z18|z19|z20)\.d, (z23|z24|z25)\.d ++** mov (z18|z19|z20)\.d, (z23|z24|z25)\.d ++** ret ++*/ ++TEST_DUAL_XN (reinterpret_s16_u8_x3_untied, svint16x3_t, svuint8x3_t, z18, ++ svreinterpret_s16_u8_x3 (z23), ++ svreinterpret_s16 (z23)) ++ ++/* ++** reinterpret_s16_u32_x4_tied1: ++** ret ++*/ ++TEST_DUAL_Z_REV (reinterpret_s16_u32_x4_tied1, svint16x4_t, svuint32x4_t, ++ z0_res = svreinterpret_s16_u32_x4 (z0), ++ z0_res = svreinterpret_s16 (z0)) ++ ++/* ++** reinterpret_s16_f64_x4_untied: ++** mov (z28|z29|z30|z31)\.d, z4-7\.d ++** mov (z28|z29|z30|z31)\.d, z4-7\.d ++** mov (z28|z29|z30|z31)\.d, z4-7\.d ++** mov (z28|z29|z30|z31)\.d, z4-7\.d ++** ret ++*/ ++TEST_DUAL_XN (reinterpret_s16_f64_x4_untied, svint16x4_t, svfloat64x4_t, z28, ++ svreinterpret_s16_f64_x4 (z4), ++ svreinterpret_s16 (z4)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/reinterpret_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/reinterpret_s32.c +index e1068f244..60da8aef3 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/reinterpret_s32.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/reinterpret_s32.c +@@ -205,3 +205,65 @@ TEST_DUAL_Z_REV (reinterpret_s32_u64_tied1, svint32_t, svuint64_t, + TEST_DUAL_Z (reinterpret_s32_u64_untied, svint32_t, svuint64_t, + z0 = svreinterpret_s32_u64 (z4), + z0 = svreinterpret_s32 (z4)) ++ ++/* ++** reinterpret_s32_bf16_x2_tied1: ++** ret ++*/ ++TEST_DUAL_Z_REV (reinterpret_s32_bf16_x2_tied1, svint32x2_t, svbfloat16x2_t, ++ z0_res = svreinterpret_s32_bf16_x2 (z0), ++ z0_res = svreinterpret_s32 (z0)) ++ ++/* ++** reinterpret_s32_f32_x2_untied: ++** ( ++** mov z0\.d, z4\.d ++** mov z1\.d, z5\.d ++** | ++** mov z0\.d, z4\.d ++** mov z1\.d, z5\.d ++** ) ++** ret ++*/ ++TEST_DUAL_XN (reinterpret_s32_f32_x2_untied, svint32x2_t, svfloat32x2_t, z0, ++ svreinterpret_s32_f32_x2 (z4), ++ svreinterpret_s32 (z4)) ++ ++/* ++** reinterpret_s32_s64_x3_tied1: ++** ret ++*/ ++TEST_DUAL_Z_REV (reinterpret_s32_s64_x3_tied1, svint32x3_t, svint64x3_t, ++ z0_res = svreinterpret_s32_s64_x3 (z0), ++ z0_res = svreinterpret_s32 (z0)) ++ ++/* ++** reinterpret_s32_u8_x3_untied: ++** mov (z18|z19|z20)\.d, (z23|z24|z25)\.d ++** mov (z18|z19|z20)\.d, (z23|z24|z25)\.d ++** mov (z18|z19|z20)\.d, (z23|z24|z25)\.d ++** ret ++*/ ++TEST_DUAL_XN (reinterpret_s32_u8_x3_untied, svint32x3_t, svuint8x3_t, z18, ++ svreinterpret_s32_u8_x3 (z23), ++ svreinterpret_s32 (z23)) ++ ++/* ++** reinterpret_s32_u32_x4_tied1: ++** ret ++*/ ++TEST_DUAL_Z_REV (reinterpret_s32_u32_x4_tied1, svint32x4_t, svuint32x4_t, ++ z0_res = svreinterpret_s32_u32_x4 (z0), ++ z0_res = svreinterpret_s32 (z0)) ++ ++/* ++** reinterpret_s32_f64_x4_untied: ++** mov (z28|z29|z30|z31)\.d, z4-7\.d ++** mov (z28|z29|z30|z31)\.d, z4-7\.d ++** mov (z28|z29|z30|z31)\.d, z4-7\.d ++** mov (z28|z29|z30|z31)\.d, z4-7\.d ++** ret ++*/ ++TEST_DUAL_XN (reinterpret_s32_f64_x4_untied, svint32x4_t, svfloat64x4_t, z28, ++ svreinterpret_s32_f64_x4 (z4), ++ svreinterpret_s32 (z4)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/reinterpret_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/reinterpret_s64.c +index cada7533c..d705c60df 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/reinterpret_s64.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/reinterpret_s64.c +@@ -205,3 +205,65 @@ TEST_DUAL_Z_REV (reinterpret_s64_u64_tied1, svint64_t, svuint64_t, + TEST_DUAL_Z (reinterpret_s64_u64_untied, svint64_t, svuint64_t, + z0 = svreinterpret_s64_u64 (z4), + z0 = svreinterpret_s64 (z4)) ++ ++/* ++** reinterpret_s64_bf16_x2_tied1: ++** ret ++*/ ++TEST_DUAL_Z_REV (reinterpret_s64_bf16_x2_tied1, svint64x2_t, svbfloat16x2_t, ++ z0_res = svreinterpret_s64_bf16_x2 (z0), ++ z0_res = svreinterpret_s64 (z0)) ++ ++/* ++** reinterpret_s64_f32_x2_untied: ++** ( ++** mov z0\.d, z4\.d ++** mov z1\.d, z5\.d ++** | ++** mov z0\.d, z4\.d ++** mov z1\.d, z5\.d ++** ) ++** ret ++*/ ++TEST_DUAL_XN (reinterpret_s64_f32_x2_untied, svint64x2_t, svfloat32x2_t, z0, ++ svreinterpret_s64_f32_x2 (z4), ++ svreinterpret_s64 (z4)) ++ ++/* ++** reinterpret_s64_s64_x3_tied1: ++** ret ++*/ ++TEST_DUAL_Z_REV (reinterpret_s64_s64_x3_tied1, svint64x3_t, svint64x3_t, ++ z0_res = svreinterpret_s64_s64_x3 (z0), ++ z0_res = svreinterpret_s64 (z0)) ++ ++/* ++** reinterpret_s64_u8_x3_untied: ++** mov (z18|z19|z20)\.d, (z23|z24|z25)\.d ++** mov (z18|z19|z20)\.d, (z23|z24|z25)\.d ++** mov (z18|z19|z20)\.d, (z23|z24|z25)\.d ++** ret ++*/ ++TEST_DUAL_XN (reinterpret_s64_u8_x3_untied, svint64x3_t, svuint8x3_t, z18, ++ svreinterpret_s64_u8_x3 (z23), ++ svreinterpret_s64 (z23)) ++ ++/* ++** reinterpret_s64_u32_x4_tied1: ++** ret ++*/ ++TEST_DUAL_Z_REV (reinterpret_s64_u32_x4_tied1, svint64x4_t, svuint32x4_t, ++ z0_res = svreinterpret_s64_u32_x4 (z0), ++ z0_res = svreinterpret_s64 (z0)) ++ ++/* ++** reinterpret_s64_f64_x4_untied: ++** mov (z28|z29|z30|z31)\.d, z4-7\.d ++** mov (z28|z29|z30|z31)\.d, z4-7\.d ++** mov (z28|z29|z30|z31)\.d, z4-7\.d ++** mov (z28|z29|z30|z31)\.d, z4-7\.d ++** ret ++*/ ++TEST_DUAL_XN (reinterpret_s64_f64_x4_untied, svint64x4_t, svfloat64x4_t, z28, ++ svreinterpret_s64_f64_x4 (z4), ++ svreinterpret_s64 (z4)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/reinterpret_s8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/reinterpret_s8.c +index 23a40d0ba..ab90a54d7 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/reinterpret_s8.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/reinterpret_s8.c +@@ -205,3 +205,65 @@ TEST_DUAL_Z_REV (reinterpret_s8_u64_tied1, svint8_t, svuint64_t, + TEST_DUAL_Z (reinterpret_s8_u64_untied, svint8_t, svuint64_t, + z0 = svreinterpret_s8_u64 (z4), + z0 = svreinterpret_s8 (z4)) ++ ++/* ++** reinterpret_s8_bf16_x2_tied1: ++** ret ++*/ ++TEST_DUAL_Z_REV (reinterpret_s8_bf16_x2_tied1, svint8x2_t, svbfloat16x2_t, ++ z0_res = svreinterpret_s8_bf16_x2 (z0), ++ z0_res = svreinterpret_s8 (z0)) ++ ++/* ++** reinterpret_s8_f32_x2_untied: ++** ( ++** mov z0\.d, z4\.d ++** mov z1\.d, z5\.d ++** | ++** mov z0\.d, z4\.d ++** mov z1\.d, z5\.d ++** ) ++** ret ++*/ ++TEST_DUAL_XN (reinterpret_s8_f32_x2_untied, svint8x2_t, svfloat32x2_t, z0, ++ svreinterpret_s8_f32_x2 (z4), ++ svreinterpret_s8 (z4)) ++ ++/* ++** reinterpret_s8_s64_x3_tied1: ++** ret ++*/ ++TEST_DUAL_Z_REV (reinterpret_s8_s64_x3_tied1, svint8x3_t, svint64x3_t, ++ z0_res = svreinterpret_s8_s64_x3 (z0), ++ z0_res = svreinterpret_s8 (z0)) ++ ++/* ++** reinterpret_s8_u8_x3_untied: ++** mov (z18|z19|z20)\.d, (z23|z24|z25)\.d ++** mov (z18|z19|z20)\.d, (z23|z24|z25)\.d ++** mov (z18|z19|z20)\.d, (z23|z24|z25)\.d ++** ret ++*/ ++TEST_DUAL_XN (reinterpret_s8_u8_x3_untied, svint8x3_t, svuint8x3_t, z18, ++ svreinterpret_s8_u8_x3 (z23), ++ svreinterpret_s8 (z23)) ++ ++/* ++** reinterpret_s8_u32_x4_tied1: ++** ret ++*/ ++TEST_DUAL_Z_REV (reinterpret_s8_u32_x4_tied1, svint8x4_t, svuint32x4_t, ++ z0_res = svreinterpret_s8_u32_x4 (z0), ++ z0_res = svreinterpret_s8 (z0)) ++ ++/* ++** reinterpret_s8_f64_x4_untied: ++** mov (z28|z29|z30|z31)\.d, z4-7\.d ++** mov (z28|z29|z30|z31)\.d, z4-7\.d ++** mov (z28|z29|z30|z31)\.d, z4-7\.d ++** mov (z28|z29|z30|z31)\.d, z4-7\.d ++** ret ++*/ ++TEST_DUAL_XN (reinterpret_s8_f64_x4_untied, svint8x4_t, svfloat64x4_t, z28, ++ svreinterpret_s8_f64_x4 (z4), ++ svreinterpret_s8 (z4)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/reinterpret_u16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/reinterpret_u16.c +index 48e8ecaff..fcfc0eb9d 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/reinterpret_u16.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/reinterpret_u16.c +@@ -205,3 +205,65 @@ TEST_DUAL_Z_REV (reinterpret_u16_u64_tied1, svuint16_t, svuint64_t, + TEST_DUAL_Z (reinterpret_u16_u64_untied, svuint16_t, svuint64_t, + z0 = svreinterpret_u16_u64 (z4), + z0 = svreinterpret_u16 (z4)) ++ ++/* ++** reinterpret_u16_bf16_x2_tied1: ++** ret ++*/ ++TEST_DUAL_Z_REV (reinterpret_u16_bf16_x2_tied1, svuint16x2_t, svbfloat16x2_t, ++ z0_res = svreinterpret_u16_bf16_x2 (z0), ++ z0_res = svreinterpret_u16 (z0)) ++ ++/* ++** reinterpret_u16_f32_x2_untied: ++** ( ++** mov z0\.d, z4\.d ++** mov z1\.d, z5\.d ++** | ++** mov z0\.d, z4\.d ++** mov z1\.d, z5\.d ++** ) ++** ret ++*/ ++TEST_DUAL_XN (reinterpret_u16_f32_x2_untied, svuint16x2_t, svfloat32x2_t, z0, ++ svreinterpret_u16_f32_x2 (z4), ++ svreinterpret_u16 (z4)) ++ ++/* ++** reinterpret_u16_s64_x3_tied1: ++** ret ++*/ ++TEST_DUAL_Z_REV (reinterpret_u16_s64_x3_tied1, svuint16x3_t, svint64x3_t, ++ z0_res = svreinterpret_u16_s64_x3 (z0), ++ z0_res = svreinterpret_u16 (z0)) ++ ++/* ++** reinterpret_u16_u8_x3_untied: ++** mov (z18|z19|z20)\.d, (z23|z24|z25)\.d ++** mov (z18|z19|z20)\.d, (z23|z24|z25)\.d ++** mov (z18|z19|z20)\.d, (z23|z24|z25)\.d ++** ret ++*/ ++TEST_DUAL_XN (reinterpret_u16_u8_x3_untied, svuint16x3_t, svuint8x3_t, z18, ++ svreinterpret_u16_u8_x3 (z23), ++ svreinterpret_u16 (z23)) ++ ++/* ++** reinterpret_u16_u32_x4_tied1: ++** ret ++*/ ++TEST_DUAL_Z_REV (reinterpret_u16_u32_x4_tied1, svuint16x4_t, svuint32x4_t, ++ z0_res = svreinterpret_u16_u32_x4 (z0), ++ z0_res = svreinterpret_u16 (z0)) ++ ++/* ++** reinterpret_u16_f64_x4_untied: ++** mov (z28|z29|z30|z31)\.d, z4-7\.d ++** mov (z28|z29|z30|z31)\.d, z4-7\.d ++** mov (z28|z29|z30|z31)\.d, z4-7\.d ++** mov (z28|z29|z30|z31)\.d, z4-7\.d ++** ret ++*/ ++TEST_DUAL_XN (reinterpret_u16_f64_x4_untied, svuint16x4_t, svfloat64x4_t, z28, ++ svreinterpret_u16_f64_x4 (z4), ++ svreinterpret_u16 (z4)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/reinterpret_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/reinterpret_u32.c +index 1d4e85712..6d7e05857 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/reinterpret_u32.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/reinterpret_u32.c +@@ -205,3 +205,65 @@ TEST_DUAL_Z_REV (reinterpret_u32_u64_tied1, svuint32_t, svuint64_t, + TEST_DUAL_Z (reinterpret_u32_u64_untied, svuint32_t, svuint64_t, + z0 = svreinterpret_u32_u64 (z4), + z0 = svreinterpret_u32 (z4)) ++ ++/* ++** reinterpret_u32_bf16_x2_tied1: ++** ret ++*/ ++TEST_DUAL_Z_REV (reinterpret_u32_bf16_x2_tied1, svuint32x2_t, svbfloat16x2_t, ++ z0_res = svreinterpret_u32_bf16_x2 (z0), ++ z0_res = svreinterpret_u32 (z0)) ++ ++/* ++** reinterpret_u32_f32_x2_untied: ++** ( ++** mov z0\.d, z4\.d ++** mov z1\.d, z5\.d ++** | ++** mov z0\.d, z4\.d ++** mov z1\.d, z5\.d ++** ) ++** ret ++*/ ++TEST_DUAL_XN (reinterpret_u32_f32_x2_untied, svuint32x2_t, svfloat32x2_t, z0, ++ svreinterpret_u32_f32_x2 (z4), ++ svreinterpret_u32 (z4)) ++ ++/* ++** reinterpret_u32_s64_x3_tied1: ++** ret ++*/ ++TEST_DUAL_Z_REV (reinterpret_u32_s64_x3_tied1, svuint32x3_t, svint64x3_t, ++ z0_res = svreinterpret_u32_s64_x3 (z0), ++ z0_res = svreinterpret_u32 (z0)) ++ ++/* ++** reinterpret_u32_u8_x3_untied: ++** mov (z18|z19|z20)\.d, (z23|z24|z25)\.d ++** mov (z18|z19|z20)\.d, (z23|z24|z25)\.d ++** mov (z18|z19|z20)\.d, (z23|z24|z25)\.d ++** ret ++*/ ++TEST_DUAL_XN (reinterpret_u32_u8_x3_untied, svuint32x3_t, svuint8x3_t, z18, ++ svreinterpret_u32_u8_x3 (z23), ++ svreinterpret_u32 (z23)) ++ ++/* ++** reinterpret_u32_u32_x4_tied1: ++** ret ++*/ ++TEST_DUAL_Z_REV (reinterpret_u32_u32_x4_tied1, svuint32x4_t, svuint32x4_t, ++ z0_res = svreinterpret_u32_u32_x4 (z0), ++ z0_res = svreinterpret_u32 (z0)) ++ ++/* ++** reinterpret_u32_f64_x4_untied: ++** mov (z28|z29|z30|z31)\.d, z4-7\.d ++** mov (z28|z29|z30|z31)\.d, z4-7\.d ++** mov (z28|z29|z30|z31)\.d, z4-7\.d ++** mov (z28|z29|z30|z31)\.d, z4-7\.d ++** ret ++*/ ++TEST_DUAL_XN (reinterpret_u32_f64_x4_untied, svuint32x4_t, svfloat64x4_t, z28, ++ svreinterpret_u32_f64_x4 (z4), ++ svreinterpret_u32 (z4)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/reinterpret_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/reinterpret_u64.c +index 07af69dce..55c0baefb 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/reinterpret_u64.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/reinterpret_u64.c +@@ -205,3 +205,65 @@ TEST_DUAL_Z_REV (reinterpret_u64_u64_tied1, svuint64_t, svuint64_t, + TEST_DUAL_Z (reinterpret_u64_u64_untied, svuint64_t, svuint64_t, + z0 = svreinterpret_u64_u64 (z4), + z0 = svreinterpret_u64 (z4)) ++ ++/* ++** reinterpret_u64_bf16_x2_tied1: ++** ret ++*/ ++TEST_DUAL_Z_REV (reinterpret_u64_bf16_x2_tied1, svuint64x2_t, svbfloat16x2_t, ++ z0_res = svreinterpret_u64_bf16_x2 (z0), ++ z0_res = svreinterpret_u64 (z0)) ++ ++/* ++** reinterpret_u64_f32_x2_untied: ++** ( ++** mov z0\.d, z4\.d ++** mov z1\.d, z5\.d ++** | ++** mov z0\.d, z4\.d ++** mov z1\.d, z5\.d ++** ) ++** ret ++*/ ++TEST_DUAL_XN (reinterpret_u64_f32_x2_untied, svuint64x2_t, svfloat32x2_t, z0, ++ svreinterpret_u64_f32_x2 (z4), ++ svreinterpret_u64 (z4)) ++ ++/* ++** reinterpret_u64_s64_x3_tied1: ++** ret ++*/ ++TEST_DUAL_Z_REV (reinterpret_u64_s64_x3_tied1, svuint64x3_t, svint64x3_t, ++ z0_res = svreinterpret_u64_s64_x3 (z0), ++ z0_res = svreinterpret_u64 (z0)) ++ ++/* ++** reinterpret_u64_u8_x3_untied: ++** mov (z18|z19|z20)\.d, (z23|z24|z25)\.d ++** mov (z18|z19|z20)\.d, (z23|z24|z25)\.d ++** mov (z18|z19|z20)\.d, (z23|z24|z25)\.d ++** ret ++*/ ++TEST_DUAL_XN (reinterpret_u64_u8_x3_untied, svuint64x3_t, svuint8x3_t, z18, ++ svreinterpret_u64_u8_x3 (z23), ++ svreinterpret_u64 (z23)) ++ ++/* ++** reinterpret_u64_u32_x4_tied1: ++** ret ++*/ ++TEST_DUAL_Z_REV (reinterpret_u64_u32_x4_tied1, svuint64x4_t, svuint32x4_t, ++ z0_res = svreinterpret_u64_u32_x4 (z0), ++ z0_res = svreinterpret_u64 (z0)) ++ ++/* ++** reinterpret_u64_f64_x4_untied: ++** mov (z28|z29|z30|z31)\.d, z4-7\.d ++** mov (z28|z29|z30|z31)\.d, z4-7\.d ++** mov (z28|z29|z30|z31)\.d, z4-7\.d ++** mov (z28|z29|z30|z31)\.d, z4-7\.d ++** ret ++*/ ++TEST_DUAL_XN (reinterpret_u64_f64_x4_untied, svuint64x4_t, svfloat64x4_t, z28, ++ svreinterpret_u64_f64_x4 (z4), ++ svreinterpret_u64 (z4)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/reinterpret_u8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/reinterpret_u8.c +index a4c7f4c8d..f73021961 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/reinterpret_u8.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/reinterpret_u8.c +@@ -205,3 +205,65 @@ TEST_DUAL_Z_REV (reinterpret_u8_u64_tied1, svuint8_t, svuint64_t, + TEST_DUAL_Z (reinterpret_u8_u64_untied, svuint8_t, svuint64_t, + z0 = svreinterpret_u8_u64 (z4), + z0 = svreinterpret_u8 (z4)) ++ ++/* ++** reinterpret_u8_bf16_x2_tied1: ++** ret ++*/ ++TEST_DUAL_Z_REV (reinterpret_u8_bf16_x2_tied1, svuint8x2_t, svbfloat16x2_t, ++ z0_res = svreinterpret_u8_bf16_x2 (z0), ++ z0_res = svreinterpret_u8 (z0)) ++ ++/* ++** reinterpret_u8_f32_x2_untied: ++** ( ++** mov z0\.d, z4\.d ++** mov z1\.d, z5\.d ++** | ++** mov z0\.d, z4\.d ++** mov z1\.d, z5\.d ++** ) ++** ret ++*/ ++TEST_DUAL_XN (reinterpret_u8_f32_x2_untied, svuint8x2_t, svfloat32x2_t, z0, ++ svreinterpret_u8_f32_x2 (z4), ++ svreinterpret_u8 (z4)) ++ ++/* ++** reinterpret_u8_s64_x3_tied1: ++** ret ++*/ ++TEST_DUAL_Z_REV (reinterpret_u8_s64_x3_tied1, svuint8x3_t, svint64x3_t, ++ z0_res = svreinterpret_u8_s64_x3 (z0), ++ z0_res = svreinterpret_u8 (z0)) ++ ++/* ++** reinterpret_u8_u8_x3_untied: ++** mov (z18|z19|z20)\.d, (z23|z24|z25)\.d ++** mov (z18|z19|z20)\.d, (z23|z24|z25)\.d ++** mov (z18|z19|z20)\.d, (z23|z24|z25)\.d ++** ret ++*/ ++TEST_DUAL_XN (reinterpret_u8_u8_x3_untied, svuint8x3_t, svuint8x3_t, z18, ++ svreinterpret_u8_u8_x3 (z23), ++ svreinterpret_u8 (z23)) ++ ++/* ++** reinterpret_u8_u32_x4_tied1: ++** ret ++*/ ++TEST_DUAL_Z_REV (reinterpret_u8_u32_x4_tied1, svuint8x4_t, svuint32x4_t, ++ z0_res = svreinterpret_u8_u32_x4 (z0), ++ z0_res = svreinterpret_u8 (z0)) ++ ++/* ++** reinterpret_u8_f64_x4_untied: ++** mov (z28|z29|z30|z31)\.d, z4-7\.d ++** mov (z28|z29|z30|z31)\.d, z4-7\.d ++** mov (z28|z29|z30|z31)\.d, z4-7\.d ++** mov (z28|z29|z30|z31)\.d, z4-7\.d ++** ret ++*/ ++TEST_DUAL_XN (reinterpret_u8_f64_x4_untied, svuint8x4_t, svfloat64x4_t, z28, ++ svreinterpret_u8_f64_x4 (z4), ++ svreinterpret_u8 (z4)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/test_sve_acle.h b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/test_sve_acle.h +index fbf392b3e..2da61ff5c 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/test_sve_acle.h ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/test_sve_acle.h +@@ -421,4 +421,18 @@ + return z0_res; \ + } + ++#define TEST_DUAL_XN(NAME, TTYPE1, TTYPE2, RES, CODE1, CODE2) \ ++ PROTO (NAME, void, ()) \ ++ { \ ++ register TTYPE1 z0 __asm ("z0"); \ ++ register TTYPE2 z4 __asm ("z4"); \ ++ register TTYPE1 z18 __asm ("z18"); \ ++ register TTYPE2 z23 __asm ("z23"); \ ++ register TTYPE1 z28 __asm ("z28"); \ ++ __asm volatile ("" : "=w" (z0), "=w" (z4), "=w" (z18), \ ++ "=w" (z23), "=w" (z28)); \ ++ INVOKE (RES = CODE1, RES = CODE2); \ ++ __asm volatile ("" :: "w" (RES)); \ ++ } ++ + #endif +-- +2.33.0 +
View file
_service:tar_scm:0170-LoongArch-Tweak-IOR-rtx_cost-for-bstrins.patch
Added
@@ -0,0 +1,158 @@ +From bdc189d43ef38ea53823120de8008f39ead0618d Mon Sep 17 00:00:00 2001 +From: Xi Ruoyao <xry111@xry111.site> +Date: Sat, 15 Jun 2024 18:29:43 +0800 +Subject: PATCH 170/188 LoongArch: Tweak IOR rtx_cost for bstrins + +Consider + + c &= 0xfff; + a &= ~0xfff; + b &= ~0xfff; + a |= c; + b |= c; + +This can be done with 2 bstrins instructions. But we need to recognize +it in loongarch_rtx_costs or the compiler will not propagate "c & 0xfff" +forward. + +gcc/ChangeLog: + + * config/loongarch/loongarch.cc: + (loongarch_use_bstrins_for_ior_with_mask): Split the main logic + into ... + (loongarch_use_bstrins_for_ior_with_mask_1): ... here. + (loongarch_rtx_costs): Special case for IOR those can be + implemented with bstrins. + +gcc/testsuite/ChangeLog; + + * gcc.target/loongarch/bstrins-3.c: New test. +--- + gcc/config/loongarch/loongarch.cc | 73 ++++++++++++++----- + .../gcc.target/loongarch/bstrins-3.c | 16 ++++ + 2 files changed, 72 insertions(+), 17 deletions(-) + create mode 100644 gcc/testsuite/gcc.target/loongarch/bstrins-3.c + +diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc +index 77f83ab9e..cd9fa98dc 100644 +--- a/gcc/config/loongarch/loongarch.cc ++++ b/gcc/config/loongarch/loongarch.cc +@@ -3678,6 +3678,27 @@ loongarch_set_reg_reg_piece_cost (machine_mode mode, unsigned int units) + return COSTS_N_INSNS ((GET_MODE_SIZE (mode) + units - 1) / units); + } + ++static int ++loongarch_use_bstrins_for_ior_with_mask_1 (machine_mode mode, ++ unsigned HOST_WIDE_INT mask1, ++ unsigned HOST_WIDE_INT mask2) ++{ ++ if (mask1 != ~mask2 || !mask1 || !mask2) ++ return 0; ++ ++ /* Try to avoid a right-shift. */ ++ if (low_bitmask_len (mode, mask1) != -1) ++ return -1; ++ ++ if (low_bitmask_len (mode, mask2 >> (ffs_hwi (mask2) - 1)) != -1) ++ return 1; ++ ++ if (low_bitmask_len (mode, mask1 >> (ffs_hwi (mask1) - 1)) != -1) ++ return -1; ++ ++ return 0; ++} ++ + /* Return the cost of moving between two registers of mode MODE. */ + + static int +@@ -3809,6 +3830,38 @@ loongarch_rtx_costs (rtx x, machine_mode mode, int outer_code, + /* Fall through. */ + + case IOR: ++ { ++ rtx op2 = {XEXP (x, 0), XEXP (x, 1)}; ++ if (GET_CODE (op0) == AND && GET_CODE (op1) == AND ++ && (mode == SImode || (TARGET_64BIT && mode == DImode))) ++ { ++ rtx rtx_mask0 = XEXP (op0, 1), rtx_mask1 = XEXP (op1, 1); ++ if (CONST_INT_P (rtx_mask0) && CONST_INT_P (rtx_mask1)) ++ { ++ unsigned HOST_WIDE_INT mask0 = UINTVAL (rtx_mask0); ++ unsigned HOST_WIDE_INT mask1 = UINTVAL (rtx_mask1); ++ if (loongarch_use_bstrins_for_ior_with_mask_1 (mode, ++ mask0, ++ mask1)) ++ { ++ /* A bstrins instruction */ ++ *total = COSTS_N_INSNS (1); ++ ++ /* A srai instruction */ ++ if (low_bitmask_len (mode, mask0) == -1 ++ && low_bitmask_len (mode, mask1) == -1) ++ *total += COSTS_N_INSNS (1); ++ ++ for (int i = 0; i < 2; i++) ++ *total += set_src_cost (XEXP (opi, 0), mode, speed); ++ ++ return true; ++ } ++ } ++ } ++ } ++ ++ /* Fall through. */ + case XOR: + /* Double-word operations use two single-word operations. */ + *total = loongarch_binary_cost (x, COSTS_N_INSNS (1), COSTS_N_INSNS (2), +@@ -5793,23 +5846,9 @@ bool loongarch_pre_reload_split (void) + int + loongarch_use_bstrins_for_ior_with_mask (machine_mode mode, rtx *op) + { +- unsigned HOST_WIDE_INT mask1 = UINTVAL (op2); +- unsigned HOST_WIDE_INT mask2 = UINTVAL (op4); +- +- if (mask1 != ~mask2 || !mask1 || !mask2) +- return 0; +- +- /* Try to avoid a right-shift. */ +- if (low_bitmask_len (mode, mask1) != -1) +- return -1; +- +- if (low_bitmask_len (mode, mask2 >> (ffs_hwi (mask2) - 1)) != -1) +- return 1; +- +- if (low_bitmask_len (mode, mask1 >> (ffs_hwi (mask1) - 1)) != -1) +- return -1; +- +- return 0; ++ return loongarch_use_bstrins_for_ior_with_mask_1 (mode, ++ UINTVAL (op2), ++ UINTVAL (op4)); + } + + /* Rewrite a MEM for simple load/store under -mexplicit-relocs=auto +diff --git a/gcc/testsuite/gcc.target/loongarch/bstrins-3.c b/gcc/testsuite/gcc.target/loongarch/bstrins-3.c +new file mode 100644 +index 000000000..13762bdef +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/bstrins-3.c +@@ -0,0 +1,16 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -fdump-rtl-final" } */ ++/* { dg-final { scan-rtl-dump-times "insv\sd\i" 2 "final" } } */ ++ ++struct X { ++ long a, b; ++}; ++ ++struct X ++test (long a, long b, long c) ++{ ++ c &= 0xfff; ++ a &= ~0xfff; ++ b &= ~0xfff; ++ return (struct X){.a = a | c, .b = b | c}; ++} +-- +2.43.0 +
View file
_service:tar_scm:0171-Backport-SME-attribs-Use-existing-traits-for-excl_ha.patch
Added
@@ -0,0 +1,90 @@ +From 11f813112629dbad432134f7b4c7c9a93551eb3c Mon Sep 17 00:00:00 2001 +From: Richard Sandiford <richard.sandiford@arm.com> +Date: Mon, 27 Nov 2023 13:38:16 +0000 +Subject: PATCH 072/157 BackportSME attribs: Use existing traits for + excl_hash_traits + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=5b33cf3a3a2025a4856f90fea8bd04884c2f6b31 + +excl_hash_traits can be defined more simply by reusing existing traits. + +gcc/ + * attribs.cc (excl_hash_traits): Delete. + (test_attribute_exclusions): Use pair_hash and nofree_string_hash + instead. +--- + gcc/attribs.cc | 45 +++------------------------------------------ + 1 file changed, 3 insertions(+), 42 deletions(-) + +diff --git a/gcc/attribs.cc b/gcc/attribs.cc +index b219f8780..16d05b1da 100644 +--- a/gcc/attribs.cc ++++ b/gcc/attribs.cc +@@ -2555,47 +2555,6 @@ namespace selftest + + typedef std::pair<const char *, const char *> excl_pair; + +-struct excl_hash_traits: typed_noop_remove<excl_pair> +-{ +- typedef excl_pair value_type; +- typedef value_type compare_type; +- +- static hashval_t hash (const value_type &x) +- { +- hashval_t h1 = htab_hash_string (x.first); +- hashval_t h2 = htab_hash_string (x.second); +- return h1 ^ h2; +- } +- +- static bool equal (const value_type &x, const value_type &y) +- { +- return !strcmp (x.first, y.first) && !strcmp (x.second, y.second); +- } +- +- static void mark_deleted (value_type &x) +- { +- x = value_type (NULL, NULL); +- } +- +- static const bool empty_zero_p = false; +- +- static void mark_empty (value_type &x) +- { +- x = value_type ("", ""); +- } +- +- static bool is_deleted (const value_type &x) +- { +- return !x.first && !x.second; +- } +- +- static bool is_empty (const value_type &x) +- { +- return !*x.first && !*x.second; +- } +-}; +- +- + /* Self-test to verify that each attribute exclusion is symmetric, + meaning that if attribute A is encoded as incompatible with + attribute B then the opposite relationship is also encoded. +@@ -2605,13 +2564,15 @@ struct excl_hash_traits: typed_noop_remove<excl_pair> + static void + test_attribute_exclusions () + { ++ using excl_hash_traits = pair_hash<nofree_string_hash, nofree_string_hash>; ++ + /* Iterate over the array of attribute tables first (with TI0 as + the index) and over the array of attribute_spec in each table + (with SI0 as the index). */ + const size_t ntables = ARRAY_SIZE (attribute_tables); + + /* Set of pairs of mutually exclusive attributes. */ +- typedef hash_set<excl_pair, false, excl_hash_traits> exclusion_set; ++ typedef hash_set<excl_hash_traits> exclusion_set; + exclusion_set excl_set; + + for (size_t ti0 = 0; ti0 != ntables; ++ti0) +-- +2.33.0 +
View file
_service:tar_scm:0171-LoongArch-NFC-Dedup-and-sort-the-comment-in-loongarc.patch
Added
@@ -0,0 +1,44 @@ +From 51c20768fde58093794ff0281c698b6738346313 Mon Sep 17 00:00:00 2001 +From: Xi Ruoyao <xry111@xry111.site> +Date: Sun, 16 Jun 2024 12:22:40 +0800 +Subject: PATCH 171/188 LoongArch: NFC: Dedup and sort the comment in + loongarch_print_operand_reloc + +gcc/ChangeLog: + + * config/loongarch/loongarch.cc (loongarch_print_operand_reloc): + Dedup and sort the comment describing modifiers. +--- + gcc/config/loongarch/loongarch.cc | 10 +--------- + 1 file changed, 1 insertion(+), 9 deletions(-) + +diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc +index cd9fa98dc..35524b5da 100644 +--- a/gcc/config/loongarch/loongarch.cc ++++ b/gcc/config/loongarch/loongarch.cc +@@ -6129,21 +6129,13 @@ loongarch_print_operand_reloc (FILE *file, rtx op, bool hi64_part, + 'T' Print 'f' for (eq:CC ...), 't' for (ne:CC ...), + 'z' for (eq:?I ...), 'n' for (ne:?I ...). + 't' Like 'T', but with the EQ/NE cases reversed +- 'F' Print the FPU branch condition for comparison OP. +- 'W' Print the inverse of the FPU branch condition for comparison OP. +- 'w' Print a LSX register. + 'u' Print a LASX register. +- 'T' Print 'f' for (eq:CC ...), 't' for (ne:CC ...), +- 'z' for (eq:?I ...), 'n' for (ne:?I ...). +- 't' Like 'T', but with the EQ/NE cases reversed +- 'Y' Print loongarch_fp_conditionsINTVAL (OP) +- 'Z' Print OP and a comma for 8CC, otherwise print nothing. +- 'z' Print $0 if OP is zero, otherwise print OP normally. + 'v' Print the insn size suffix b, h, w or d for vector modes V16QI, V8HI, + V4SI, V2SI, and w, d for vector modes V4SF, V2DF respectively. + 'V' Print exact log2 of CONST_INT OP element 0 of a replicated + CONST_VECTOR in decimal. + 'W' Print the inverse of the FPU branch condition for comparison OP. ++ 'w' Print a LSX register. + 'X' Print CONST_INT OP in hexadecimal format. + 'x' Print the low 16 bits of CONST_INT OP in hexadecimal format. + 'Y' Print loongarch_fp_conditionsINTVAL (OP) +-- +2.43.0 +
View file
_service:tar_scm:0172-Backport-SME-Allow-target-attributes-in-non-gnu-name.patch
Added
@@ -0,0 +1,2369 @@ +From 82d654912e3671055034e789a8f7110f6d87d447 Mon Sep 17 00:00:00 2001 +From: Richard Sandiford <richard.sandiford@arm.com> +Date: Sat, 2 Dec 2023 13:49:52 +0000 +Subject: PATCH 073/157 BackportSME Allow target attributes in non-gnu + namespaces + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=7fa24687aa3a683fd105ce5ff6b176f48dca3b6c + +Currently there are four static sources of attributes: + +- LANG_HOOKS_ATTRIBUTE_TABLE +- LANG_HOOKS_COMMON_ATTRIBUTE_TABLE +- LANG_HOOKS_FORMAT_ATTRIBUTE_TABLE +- TARGET_ATTRIBUTE_TABLE + +All of the attributes in these tables go in the "gnu" namespace. +This means that they can use the traditional GNU __attribute__((...)) +syntax and the standard gnu::... syntax. + +Standard attributes are registered dynamically with a null namespace. +There are no supported attributes in other namespaces (clang, vendor +namespaces, etc.). + +This patch tries to generalise things by making the namespace +part of the attribute specification. + +It's usual for multiple attributes to be defined in the same namespace, +so rather than adding the namespace to each individual definition, +it seemed better to group attributes in the same namespace together. +This would also allow us to reuse the same table for clang attributes +that are written with the GNU syntax, or other similar situations +where the attribute can be accessed via multiple "spellings". + +The patch therefore adds a scoped_attribute_specs that contains +a namespace and a list of attributes in that namespace. + +It's still possible to have multiple scoped_attribute_specs +for the same namespace. E.g. it makes sense to keep the +C++-specific, C/C++-common, and format-related attributes in +separate tables, even though they're all GNU attributes. + +Current lists of attributes are terminated by a null name. +Rather than keep that for the new structure, it seemed neater +to use an array_slice. This also makes the tables slighly more +compact. + +In general, a target might want to support attributes in multiple +namespaces. Rather than have a separate hook for each possibility +(like the three langhooks above), it seemed better to make +TARGET_ATTRIBUTE_TABLE a table of tables. Specifically, it's +an array_slice of scoped_attribute_specs. + +We can do the same thing for langhooks, which allows the three hooks +above to be merged into a single LANG_HOOKS_ATTRIBUTE_TABLE. +It also allows the standard attributes to be registered statically +and checked by the usual attribs.cc checks. + +The patch adds a TARGET_GNU_ATTRIBUTES helper for the common case +in which a target wants a single table of gnu attributes. It can +only be used if the table is free of preprocessor directives. + +There are probably other things we need to do to make vendor namespaces +work smoothly. E.g. in principle it would be good to make exclusion +sets namespace-aware. But to some extent we have that with standard +vs. gnu attributes too. This patch is just supposed to be a first step. + +gcc/ + * attribs.h (scoped_attribute_specs): New structure. + (register_scoped_attributes): Take a reference to a + scoped_attribute_specs instead of separate namespace and array + parameters. + * plugin.h (register_scoped_attributes): Likewise. + * attribs.cc (register_scoped_attributes): Likewise. + (attribute_tables): Change into an array of scoped_attribute_specs + pointers. Reduce to 1 element for frontends and 1 element for targets. + (empty_attribute_table): Delete. + (check_attribute_tables): Update for changes to attribute_tables. + Use a hash_set to identify duplicates. + (handle_ignored_attributes_option): Update for above changes. + (init_attributes): Likewise. + (excl_pair): Delete. + (test_attribute_exclusions): Update for above changes. Don't + enforce symmetry for standard attributes in the top-level namespace. + * langhooks-def.h (LANG_HOOKS_COMMON_ATTRIBUTE_TABLE): Delete. + (LANG_HOOKS_FORMAT_ATTRIBUTE_TABLE): Likewise. + (LANG_HOOKS_INITIALIZER): Update accordingly. + (LANG_HOOKS_ATTRIBUTE_TABLE): Define to an empty constructor. + * langhooks.h (lang_hooks::common_attribute_table): Delete. + (lang_hooks::format_attribute_table): Likewise. + (lang_hooks::attribute_table): Redefine to an array of + scoped_attribute_specs pointers. + * target-def.h (TARGET_GNU_ATTRIBUTES): New macro. + * target.def (attribute_spec): Redefine to return an array of + scoped_attribute_specs pointers. + * tree-inline.cc (function_attribute_inlinable_p): Update accordingly. + * doc/tm.texi: Regenerate. + * config/aarch64/aarch64.cc (aarch64_attribute_table): Define using + TARGET_GNU_ATTRIBUTES. + * config/alpha/alpha.cc (vms_attribute_table): Likewise. + * config/avr/avr.cc (avr_attribute_table): Likewise. + * config/bfin/bfin.cc (bfin_attribute_table): Likewise. + * config/bpf/bpf.cc (bpf_attribute_table): Likewise. + * config/csky/csky.cc (csky_attribute_table): Likewise. + * config/epiphany/epiphany.cc (epiphany_attribute_table): Likewise. + * config/gcn/gcn.cc (gcn_attribute_table): Likewise. + * config/h8300/h8300.cc (h8300_attribute_table): Likewise. + * config/loongarch/loongarch.cc (loongarch_attribute_table): Likewise. + * config/m32c/m32c.cc (m32c_attribute_table): Likewise. + * config/m32r/m32r.cc (m32r_attribute_table): Likewise. + * config/m68k/m68k.cc (m68k_attribute_table): Likewise. + * config/mcore/mcore.cc (mcore_attribute_table): Likewise. + * config/microblaze/microblaze.cc (microblaze_attribute_table): + Likewise. + * config/mips/mips.cc (mips_attribute_table): Likewise. + * config/msp430/msp430.cc (msp430_attribute_table): Likewise. + * config/nds32/nds32.cc (nds32_attribute_table): Likewise. + * config/nvptx/nvptx.cc (nvptx_attribute_table): Likewise. + * config/riscv/riscv.cc (riscv_attribute_table): Likewise. + * config/rl78/rl78.cc (rl78_attribute_table): Likewise. + * config/rx/rx.cc (rx_attribute_table): Likewise. + * config/s390/s390.cc (s390_attribute_table): Likewise. + * config/sh/sh.cc (sh_attribute_table): Likewise. + * config/sparc/sparc.cc (sparc_attribute_table): Likewise. + * config/stormy16/stormy16.cc (xstormy16_attribute_table): Likewise. + * config/v850/v850.cc (v850_attribute_table): Likewise. + * config/visium/visium.cc (visium_attribute_table): Likewise. + * config/arc/arc.cc (arc_attribute_table): Likewise. Move further + down file. + * config/arm/arm.cc (arm_attribute_table): Update for above changes, + using... + (arm_gnu_attributes, arm_gnu_attribute_table): ...these new globals. + * config/i386/i386-options.h (ix86_attribute_table): Delete. + (ix86_gnu_attribute_table): Declare. + * config/i386/i386-options.cc (ix86_attribute_table): Replace with... + (ix86_gnu_attributes, ix86_gnu_attribute_table): ...these two globals. + * config/i386/i386.cc (ix86_attribute_table): Define as an array of + scoped_attribute_specs pointers. + * config/ia64/ia64.cc (ia64_attribute_table): Update for above changes, + using... + (ia64_gnu_attributes, ia64_gnu_attribute_table): ...these new globals. + * config/rs6000/rs6000.cc (rs6000_attribute_table): Update for above + changes, using... + (rs6000_gnu_attributes, rs6000_gnu_attribute_table): ...these new + globals. + +gcc/ada/ + * gcc-interface/gigi.h (gnat_internal_attribute_table): Change + type to scoped_attribute_specs. + * gcc-interface/utils.cc (gnat_internal_attribute_table): Likewise, + using... + (gnat_internal_attributes): ...this as the underlying array. + * gcc-interface/misc.cc (gnat_attribute_table): New global. + (LANG_HOOKS_ATTRIBUTE_TABLE): Use it. + +gcc/c-family/ + * c-common.h (c_common_attribute_table): Replace with... + (c_common_gnu_attribute_table): ...this. + (c_common_format_attribute_table): Change type to + scoped_attribute_specs. + * c-attribs.cc (c_common_attribute_table): Replace with... + (c_common_gnu_attributes, c_common_gnu_attribute_table): ...these + new globals. + (c_common_format_attribute_table): Change type to + scoped_attribute_specs, using... + (c_common_format_attributes): ...this as the underlying array. + +gcc/c/ + * c-tree.h (std_attribute_table): Declare. + * c-decl.cc (std_attribute_table): Change type to + scoped_attribute_specs, using... + (std_attributes): ...this as the underlying array. + (c_init_decl_processing): Remove call to register_scoped_attributes. + * c-objc-common.h (c_objc_attribute_table): New global. + (LANG_HOOKS_ATTRIBUTE_TABLE): Use it. + (LANG_HOOKS_COMMON_ATTRIBUTE_TABLE): Delete. + (LANG_HOOKS_FORMAT_ATTRIBUTE_TABLE): Delete. + +gcc/cp/ + * cp-tree.h (cxx_attribute_table): Delete. + (cxx_gnu_attribute_table, std_attribute_table): Declare. + * cp-objcp-common.h (LANG_HOOKS_COMMON_ATTRIBUTE_TABLE): Delete. + (LANG_HOOKS_FORMAT_ATTRIBUTE_TABLE): Delete. + (cp_objcp_attribute_table): New table. + (LANG_HOOKS_ATTRIBUTE_TABLE): Redefine. + * tree.cc (cxx_attribute_table): Replace with... + (cxx_gnu_attributes, cxx_gnu_attribute_table): ...these globals. + (std_attribute_table): Change type to scoped_attribute_specs, using... + (std_attributes): ...this as the underlying array. + (init_tree): Remove call to register_scoped_attributes. + +gcc/d/ + * d-tree.h (d_langhook_attribute_table): Replace with... + (d_langhook_gnu_attribute_table): ...this. + (d_langhook_common_attribute_table): Change type to + scoped_attribute_specs. + * d-attribs.cc (d_langhook_common_attribute_table): Change type to + scoped_attribute_specs, using... + (d_langhook_common_attributes): ...this as the underlying array. + (d_langhook_attribute_table): Replace with... + (d_langhook_gnu_attributes, d_langhook_gnu_attribute_table): ...these + new globals. + (uda_attribute_p): Update accordingly, and update for new + targetm.attribute_table type. + * d-lang.cc (d_langhook_attribute_table): New global. + (LANG_HOOKS_COMMON_ATTRIBUTE_TABLE): Delete. + +gcc/fortran/ + * f95-lang.cc: Include attribs.h. + (gfc_attribute_table): Change to an array of scoped_attribute_specs + pointers, using... + (gfc_gnu_attributes, gfc_gnu_attribute_table): ...these new globals. + +gcc/jit/ + * dummy-frontend.cc (jit_format_attribute_table): Change type to + scoped_attribute_specs, using... + (jit_format_attributes): ...this as the underlying array. + (jit_attribute_table): Change to an array of scoped_attribute_specs + pointers, using... + (jit_gnu_attributes, jit_gnu_attribute_table): ...these new globals + for the original array. Include the format attributes. + (LANG_HOOKS_COMMON_ATTRIBUTE_TABLE): Delete. + (LANG_HOOKS_FORMAT_ATTRIBUTE_TABLE): Delete. + (LANG_HOOKS_ATTRIBUTE_TABLE): Define. + +gcc/lto/ + * lto-lang.cc (lto_format_attribute_table): Change type to + scoped_attribute_specs, using... + (lto_format_attributes): ...this as the underlying array. + (lto_attribute_table): Change to an array of scoped_attribute_specs + pointers, using... + (lto_gnu_attributes, lto_gnu_attribute_table): ...these new globals + for the original array. Include the format attributes. + (LANG_HOOKS_COMMON_ATTRIBUTE_TABLE): Delete. + (LANG_HOOKS_FORMAT_ATTRIBUTE_TABLE): Delete. + (LANG_HOOKS_ATTRIBUTE_TABLE): Define. +--- + gcc/ada/gcc-interface/gigi.h | 2 +- + gcc/ada/gcc-interface/misc.cc | 7 +- + gcc/ada/gcc-interface/utils.cc | 8 +- + gcc/attribs.cc | 221 ++++++++++++---------------- + gcc/attribs.h | 12 +- + gcc/c-family/c-attribs.cc | 20 ++- + gcc/c-family/c-common.h | 4 +- + gcc/c/c-decl.cc | 12 +- + gcc/c/c-objc-common.h | 14 +- + gcc/c/c-tree.h | 2 + + gcc/config/aarch64/aarch64.cc | 7 +- + gcc/config/alpha/alpha.cc | 7 +- + gcc/config/arc/arc.cc | 74 +++++----- + gcc/config/arm/arm.cc | 15 +- + gcc/config/avr/avr.cc | 7 +- + gcc/config/bfin/bfin.cc | 7 +- + gcc/config/bpf/bpf.cc | 9 +- + gcc/config/csky/csky.cc | 7 +- + gcc/config/epiphany/epiphany.cc | 7 +- + gcc/config/gcn/gcn.cc | 8 +- + gcc/config/h8300/h8300.cc | 7 +- + gcc/config/i386/i386-options.cc | 10 +- + gcc/config/i386/i386-options.h | 2 +- + gcc/config/i386/i386.cc | 5 + + gcc/config/ia64/ia64.cc | 15 +- + gcc/config/m32c/m32c.cc | 7 +- + gcc/config/m32r/m32r.cc | 7 +- + gcc/config/m68k/m68k.cc | 7 +- + gcc/config/mcore/mcore.cc | 7 +- + gcc/config/microblaze/microblaze.cc | 7 +- + gcc/config/mips/mips.cc | 7 +- + gcc/config/msp430/msp430.cc | 8 +- + gcc/config/nds32/nds32.cc | 9 +- + gcc/config/nvptx/nvptx.cc | 7 +- + gcc/config/riscv/riscv.cc | 9 +- + gcc/config/rl78/rl78.cc | 7 +- + gcc/config/rs6000/rs6000.cc | 13 +- + gcc/config/rx/rx.cc | 7 +- + gcc/config/s390/s390.cc | 9 +- + gcc/config/sh/sh.cc | 7 +- + gcc/config/sparc/sparc.cc | 7 +- + gcc/config/stormy16/stormy16.cc | 7 +- + gcc/config/v850/v850.cc | 7 +- + gcc/config/visium/visium.cc | 7 +- + gcc/cp/cp-objcp-common.h | 15 +- + gcc/cp/cp-tree.h | 3 +- + gcc/cp/tree.cc | 16 +- + gcc/d/d-attribs.cc | 35 ++--- + gcc/d/d-lang.cc | 8 +- + gcc/d/d-tree.h | 4 +- + gcc/doc/tm.texi | 33 ++++- + gcc/fortran/f95-lang.cc | 14 +- + gcc/jit/dummy-frontend.cc | 32 ++-- + gcc/langhooks-def.h | 6 +- + gcc/langhooks.h | 4 +- + gcc/lto/lto-lang.cc | 30 ++-- + gcc/plugin.h | 3 +- + gcc/target-def.h | 14 ++ + gcc/target.def | 35 ++++- + gcc/tree-inline.cc | 7 +- + 60 files changed, 491 insertions(+), 403 deletions(-) + +diff --git a/gcc/ada/gcc-interface/gigi.h b/gcc/ada/gcc-interface/gigi.h +index bd559d176..6ababfcbb 100644 +--- a/gcc/ada/gcc-interface/gigi.h ++++ b/gcc/ada/gcc-interface/gigi.h +@@ -349,7 +349,7 @@ struct attrib + }; + + /* Table of machine-independent internal attributes. */ +-extern const struct attribute_spec gnat_internal_attribute_table; ++extern const struct scoped_attribute_specs gnat_internal_attribute_table; + + /* Define the entries in the standard data array. */ + enum standard_datatypes +diff --git a/gcc/ada/gcc-interface/misc.cc b/gcc/ada/gcc-interface/misc.cc +index 2caa83ff8..8dd055772 100644 +--- a/gcc/ada/gcc-interface/misc.cc ++++ b/gcc/ada/gcc-interface/misc.cc +@@ -1339,6 +1339,11 @@ get_lang_specific (tree node) + return TYPE_LANG_SPECIFIC (node); + } + ++const struct scoped_attribute_specs *const gnat_attribute_table = ++{ ++ &gnat_internal_attribute_table ++}; ++ + /* Definitions for our language-specific hooks. */ + + #undef LANG_HOOKS_NAME +@@ -1404,7 +1409,7 @@ get_lang_specific (tree node) + #undef LANG_HOOKS_GET_FIXED_POINT_TYPE_INFO + #define LANG_HOOKS_GET_FIXED_POINT_TYPE_INFO gnat_get_fixed_point_type_info + #undef LANG_HOOKS_ATTRIBUTE_TABLE +-#define LANG_HOOKS_ATTRIBUTE_TABLE gnat_internal_attribute_table ++#define LANG_HOOKS_ATTRIBUTE_TABLE gnat_attribute_table + #undef LANG_HOOKS_BUILTIN_FUNCTION + #define LANG_HOOKS_BUILTIN_FUNCTION gnat_builtin_function + #undef LANG_HOOKS_INIT_TS +diff --git a/gcc/ada/gcc-interface/utils.cc b/gcc/ada/gcc-interface/utils.cc +index 049cf74eb..ef8524fa9 100644 +--- a/gcc/ada/gcc-interface/utils.cc ++++ b/gcc/ada/gcc-interface/utils.cc +@@ -134,7 +134,7 @@ static tree fake_attribute_handler (tree *, tree, tree, int, bool *); + + /* Table of machine-independent internal attributes for Ada. We support + this minimal set of attributes to accommodate the needs of builtins. */ +-const struct attribute_spec gnat_internal_attribute_table = ++static const attribute_spec gnat_internal_attributes = + { + /* { name, min_len, max_len, decl_req, type_req, fn_type_req, + affects_type_identity, handler, exclude } */ +@@ -207,9 +207,11 @@ const struct attribute_spec gnat_internal_attribute_table = + fake_attribute_handler, NULL }, + { "format_arg", 1, 1, false, true, true, false, + fake_attribute_handler, NULL }, ++}; + +- { NULL, 0, 0, false, false, false, false, +- NULL, NULL } ++const scoped_attribute_specs gnat_internal_attribute_table = ++{ ++ "gnu", gnat_internal_attributes + }; + + /* Associates a GNAT tree node to a GCC tree node. It is used in +diff --git a/gcc/attribs.cc b/gcc/attribs.cc +index 16d05b1da..656ea739e 100644 +--- a/gcc/attribs.cc ++++ b/gcc/attribs.cc +@@ -39,7 +39,7 @@ along with GCC; see the file COPYING3. If not see + + /* Table of the tables of attributes (common, language, format, machine) + searched. */ +-static const struct attribute_spec *attribute_tables4; ++static array_slice<const scoped_attribute_specs *const> attribute_tables2; + + /* Substring representation. */ + +@@ -102,13 +102,6 @@ static const struct attribute_spec *lookup_scoped_attribute_spec (const_tree, + + static bool attributes_initialized = false; + +-/* Default empty table of attributes. */ +- +-static const struct attribute_spec empty_attribute_table = +-{ +- { NULL, 0, 0, false, false, false, false, NULL, NULL } +-}; +- + /* Return base name of the attribute. Ie '__attr__' is turned into 'attr'. + To avoid need for copying, we simply return length of the string. */ + +@@ -118,21 +111,19 @@ extract_attribute_substring (struct substring *str) + canonicalize_attr_name (str->str, str->length); + } + +-/* Insert an array of attributes ATTRIBUTES into a namespace. This +- array must be NULL terminated. NS is the name of attribute +- namespace. IGNORED_P is true iff all unknown attributes in this +- namespace should be ignored for the purposes of -Wattributes. The +- function returns the namespace into which the attributes have been +- registered. */ ++/* Insert SPECS into its namespace. IGNORED_P is true iff all unknown ++ attributes in this namespace should be ignored for the purposes of ++ -Wattributes. The function returns the namespace into which the ++ attributes have been registered. */ + + scoped_attributes * +-register_scoped_attributes (const struct attribute_spec *attributes, +- const char *ns, bool ignored_p /*=false*/) ++register_scoped_attributes (const scoped_attribute_specs &specs, ++ bool ignored_p /*=false*/) + { + scoped_attributes *result = NULL; + + /* See if we already have attributes in the namespace NS. */ +- result = find_attribute_namespace (ns); ++ result = find_attribute_namespace (specs.ns); + + if (result == NULL) + { +@@ -143,7 +134,7 @@ register_scoped_attributes (const struct attribute_spec *attributes, + attributes_table.create (64); + + memset (&sa, 0, sizeof (sa)); +- sa.ns = ns; ++ sa.ns = specs.ns; + sa.attributes.create (64); + sa.ignored_p = ignored_p; + result = attributes_table.safe_push (sa); +@@ -153,10 +144,10 @@ register_scoped_attributes (const struct attribute_spec *attributes, + result->ignored_p |= ignored_p; + + /* Really add the attributes to their namespace now. */ +- for (unsigned i = 0; attributesi.name != NULL; ++i) ++ for (const attribute_spec &attribute : specs.attributes) + { +- result->attributes.safe_push (attributesi); +- register_scoped_attribute (&attributesi, result); ++ result->attributes.safe_push (attribute); ++ register_scoped_attribute (&attribute, result); + } + + gcc_assert (result != NULL); +@@ -183,49 +174,40 @@ find_attribute_namespace (const char* ns) + static void + check_attribute_tables (void) + { +- for (size_t i = 0; i < ARRAY_SIZE (attribute_tables); i++) +- for (size_t j = 0; attribute_tablesij.name != NULL; j++) +- { +- /* The name must not begin and end with __. */ +- const char *name = attribute_tablesij.name; +- int len = strlen (name); ++ hash_set<pair_hash<nofree_string_hash, nofree_string_hash>> names; + +- gcc_assert (!(name0 == '_' && name1 == '_' +- && namelen - 1 == '_' && namelen - 2 == '_')); ++ for (auto scoped_array : attribute_tables) ++ for (auto scoped_attributes : scoped_array) ++ for (const attribute_spec &attribute : scoped_attributes->attributes) ++ { ++ /* The name must not begin and end with __. */ ++ const char *name = attribute.name; ++ int len = strlen (name); ++ ++ gcc_assert (!(name0 == '_' && name1 == '_' ++ && namelen - 1 == '_' && namelen - 2 == '_')); + +- /* The minimum and maximum lengths must be consistent. */ +- gcc_assert (attribute_tablesij.min_length >= 0); ++ /* The minimum and maximum lengths must be consistent. */ ++ gcc_assert (attribute.min_length >= 0); + +- gcc_assert (attribute_tablesij.max_length == -1 +- || (attribute_tablesij.max_length +- >= attribute_tablesij.min_length)); ++ gcc_assert (attribute.max_length == -1 ++ || attribute.max_length >= attribute.min_length); + +- /* An attribute cannot require both a DECL and a TYPE. */ +- gcc_assert (!attribute_tablesij.decl_required +- || !attribute_tablesij.type_required); ++ /* An attribute cannot require both a DECL and a TYPE. */ ++ gcc_assert (!attribute.decl_required ++ || !attribute.type_required); + + /* If an attribute requires a function type, in particular + it requires a type. */ +- gcc_assert (!attribute_tablesij.function_type_required +- || attribute_tablesij.type_required); +- } +- +- /* Check that each name occurs just once in each table. */ +- for (size_t i = 0; i < ARRAY_SIZE (attribute_tables); i++) +- for (size_t j = 0; attribute_tablesij.name != NULL; j++) +- for (size_t k = j + 1; attribute_tablesik.name != NULL; k++) +- gcc_assert (strcmp (attribute_tablesij.name, +- attribute_tablesik.name)); +- +- /* Check that no name occurs in more than one table. Names that +- begin with '*' are exempt, and may be overridden. */ +- for (size_t i = 0; i < ARRAY_SIZE (attribute_tables); i++) +- for (size_t j = i + 1; j < ARRAY_SIZE (attribute_tables); j++) +- for (size_t k = 0; attribute_tablesik.name != NULL; k++) +- for (size_t l = 0; attribute_tablesjl.name != NULL; l++) +- gcc_assert (attribute_tablesik.name0 == '*' +- || strcmp (attribute_tablesik.name, +- attribute_tablesjl.name)); ++ gcc_assert (!attribute.function_type_required ++ || attribute.type_required); ++ ++ /* Check that no name occurs more than once. Names that ++ begin with '*' are exempt, and may be overridden. */ ++ const char *ns = scoped_attributes->ns; ++ if (name0 != '*' && names.add ({ ns ? ns : "", name })) ++ gcc_unreachable (); ++ } + } + + /* Used to stash pointers to allocated memory so that we can free them at +@@ -280,7 +262,7 @@ handle_ignored_attributes_option (vec<char *> *v) + canonicalize_attr_name (vendor_start, vendor_len); + /* We perform all this hijinks so that we don't have to copy OPT. */ + tree vendor_id = get_identifier_with_length (vendor_start, vendor_len); +- const char *attr; ++ array_slice<const attribute_spec> attrs; + /* In the "vendor::" case, we should ignore *any* attribute coming + from this attribute namespace. */ + if (attr_len > 0) +@@ -292,22 +274,23 @@ handle_ignored_attributes_option (vec<char *> *v) + } + canonicalize_attr_name (attr_start, attr_len); + tree attr_id = get_identifier_with_length (attr_start, attr_len); +- attr = IDENTIFIER_POINTER (attr_id); ++ const char *attr = IDENTIFIER_POINTER (attr_id); + /* If we've already seen this vendor::attr, ignore it. Attempting to + register it twice would lead to a crash. */ + if (lookup_scoped_attribute_spec (vendor_id, attr_id)) + continue; ++ /* Create a table with extra attributes which we will register. ++ We can't free it here, so squirrel away the pointers. */ ++ attribute_spec *table = new attribute_spec { ++ attr, 0, -2, false, false, false, false, nullptr, nullptr ++ }; ++ ignored_attributes_table.safe_push (table); ++ attrs = { table, 1 }; + } +- else +- attr = nullptr; +- /* Create a table with extra attributes which we will register. +- We can't free it here, so squirrel away the pointers. */ +- attribute_spec *table = new attribute_spec2; +- ignored_attributes_table.safe_push (table); +- table0 = { attr, 0, -2, false, false, false, false, nullptr, nullptr }; +- table1 = { nullptr, 0, 0, false, false, false, false, nullptr, +- nullptr }; +- register_scoped_attributes (table, IDENTIFIER_POINTER (vendor_id), !attr); ++ const scoped_attribute_specs scoped_specs = { ++ IDENTIFIER_POINTER (vendor_id), attrs ++ }; ++ register_scoped_attributes (scoped_specs, attrs.empty ()); + } + } + +@@ -327,27 +310,18 @@ free_attr_data () + void + init_attributes (void) + { +- size_t i; +- + if (attributes_initialized) + return; + +- attribute_tables0 = lang_hooks.common_attribute_table; +- attribute_tables1 = lang_hooks.attribute_table; +- attribute_tables2 = lang_hooks.format_attribute_table; +- attribute_tables3 = targetm.attribute_table; +- +- /* Translate NULL pointers to pointers to the empty table. */ +- for (i = 0; i < ARRAY_SIZE (attribute_tables); i++) +- if (attribute_tablesi == NULL) +- attribute_tablesi = empty_attribute_table; ++ attribute_tables0 = lang_hooks.attribute_table; ++ attribute_tables1 = targetm.attribute_table; + + if (flag_checking) + check_attribute_tables (); + +- for (i = 0; i < ARRAY_SIZE (attribute_tables); ++i) +- /* Put all the GNU attributes into the "gnu" namespace. */ +- register_scoped_attributes (attribute_tablesi, "gnu"); ++ for (auto scoped_array : attribute_tables) ++ for (auto scoped_attributes : scoped_array) ++ register_scoped_attributes (*scoped_attributes); + + vec<char *> *ignored = (vec<char *> *) flag_ignored_attributes; + handle_ignored_attributes_option (ignored); +@@ -2551,10 +2525,6 @@ attr_access::array_as_string (tree type) const + namespace selftest + { + +-/* Helper types to verify the consistency attribute exclusions. */ +- +-typedef std::pair<const char *, const char *> excl_pair; +- + /* Self-test to verify that each attribute exclusion is symmetric, + meaning that if attribute A is encoded as incompatible with + attribute B then the opposite relationship is also encoded. +@@ -2569,55 +2539,54 @@ test_attribute_exclusions () + /* Iterate over the array of attribute tables first (with TI0 as + the index) and over the array of attribute_spec in each table + (with SI0 as the index). */ +- const size_t ntables = ARRAY_SIZE (attribute_tables); ++ hash_set<excl_hash_traits> excl_set; + +- /* Set of pairs of mutually exclusive attributes. */ +- typedef hash_set<excl_hash_traits> exclusion_set; +- exclusion_set excl_set; ++ for (auto scoped_array : attribute_tables) ++ for (auto scoped_attributes : scoped_array) ++ for (const attribute_spec &attribute : scoped_attributes->attributes) ++ { ++ const attribute_spec::exclusions *excl = attribute.exclude; + +- for (size_t ti0 = 0; ti0 != ntables; ++ti0) +- for (size_t s0 = 0; attribute_tablesti0s0.name; ++s0) +- { +- const attribute_spec::exclusions *excl +- = attribute_tablesti0s0.exclude; ++ /* Skip each attribute that doesn't define exclusions. */ ++ if (!excl) ++ continue; + +- /* Skip each attribute that doesn't define exclusions. */ +- if (!excl) +- continue; ++ /* Skip standard (non-GNU) attributes, since currently the ++ exclusions are implicitly for GNU attributes only. ++ Also, C++ likely and unlikely get rewritten to gnu::hot ++ and gnu::cold, so symmetry isn't necessary there. */ ++ if (!scoped_attributes->ns) ++ continue; + +- const char *attr_name = attribute_tablesti0s0.name; ++ const char *attr_name = attribute.name; + +- /* Iterate over the set of exclusions for every attribute +- (with EI0 as the index) adding the exclusions defined +- for each to the set. */ +- for (size_t ei0 = 0; exclei0.name; ++ei0) +- { +- const char *excl_name = exclei0.name; ++ /* Iterate over the set of exclusions for every attribute ++ (with EI0 as the index) adding the exclusions defined ++ for each to the set. */ ++ for (size_t ei0 = 0; exclei0.name; ++ei0) ++ { ++ const char *excl_name = exclei0.name; + +- if (!strcmp (attr_name, excl_name)) +- continue; ++ if (!strcmp (attr_name, excl_name)) ++ continue; + +- excl_set.add (excl_pair (attr_name, excl_name)); +- } +- } ++ excl_set.add ({ attr_name, excl_name }); ++ } ++ } + + /* Traverse the set of mutually exclusive pairs of attributes + and verify that they are symmetric. */ +- for (exclusion_set::iterator it = excl_set.begin (); +- it != excl_set.end (); +- ++it) +- { +- if (!excl_set.contains (excl_pair ((*it).second, (*it).first))) +- { +- /* An exclusion for an attribute has been found that +- doesn't have a corresponding exclusion in the opposite +- direction. */ +- char desc120; +- sprintf (desc, "'%s' attribute exclusion '%s' must be symmetric", +- (*it).first, (*it).second); +- fail (SELFTEST_LOCATION, desc); +- } +- } ++ for (auto excl_pair : excl_set) ++ if (!excl_set.contains ({ excl_pair.second, excl_pair.first })) ++ { ++ /* An exclusion for an attribute has been found that ++ doesn't have a corresponding exclusion in the opposite ++ direction. */ ++ char desc120; ++ sprintf (desc, "'%s' attribute exclusion '%s' must be symmetric", ++ excl_pair.first, excl_pair.second); ++ fail (SELFTEST_LOCATION, desc); ++ } + } + + void +diff --git a/gcc/attribs.h b/gcc/attribs.h +index 5b6f63ede..0856f98fb 100644 +--- a/gcc/attribs.h ++++ b/gcc/attribs.h +@@ -20,6 +20,13 @@ along with GCC; see the file COPYING3. If not see + #ifndef GCC_ATTRIBS_H + #define GCC_ATTRIBS_H + ++/* A set of attributes that belong to the same namespace, given by NS. */ ++struct scoped_attribute_specs ++{ ++ const char *ns; ++ array_slice<const attribute_spec> attributes; ++}; ++ + extern const struct attribute_spec *lookup_attribute_spec (const_tree); + extern void free_attr_data (); + extern void init_attributes (void); +@@ -42,9 +49,8 @@ extern tree make_attribute (const char *, const char *, tree); + extern bool attribute_ignored_p (tree); + extern bool attribute_ignored_p (const attribute_spec *const); + +-extern struct scoped_attributes* register_scoped_attributes (const struct attribute_spec *, +- const char *, +- bool = false); ++extern struct scoped_attributes * ++ register_scoped_attributes (const scoped_attribute_specs &, bool = false); + + extern char *sorted_attr_string (tree); + extern bool common_function_versions (tree, tree); +diff --git a/gcc/c-family/c-attribs.cc b/gcc/c-family/c-attribs.cc +index 111a33f40..d5c0392b7 100644 +--- a/gcc/c-family/c-attribs.cc ++++ b/gcc/c-family/c-attribs.cc +@@ -282,7 +282,7 @@ static const struct attribute_spec::exclusions attr_stack_protect_exclusions = + /* Table of machine-independent attributes common to all C-like languages. + + Current list of processed common attributes: nonnull. */ +-const struct attribute_spec c_common_attribute_table = ++const struct attribute_spec c_common_gnu_attributes = + { + /* { name, min_len, max_len, decl_req, type_req, fn_type_req, + affects_type_identity, handler, exclude } */ +@@ -554,23 +554,31 @@ const struct attribute_spec c_common_attribute_table = + { "*dealloc", 1, 2, true, false, false, false, + handle_dealloc_attribute, NULL }, + { "tainted_args", 0, 0, true, false, false, false, +- handle_tainted_args_attribute, NULL }, +- { NULL, 0, 0, false, false, false, false, NULL, NULL } ++ handle_tainted_args_attribute, NULL } ++}; ++ ++const struct scoped_attribute_specs c_common_gnu_attribute_table = ++{ ++ "gnu", c_common_gnu_attributes + }; + + /* Give the specifications for the format attributes, used by C and all + descendants. + + Current list of processed format attributes: format, format_arg. */ +-const struct attribute_spec c_common_format_attribute_table = ++const struct attribute_spec c_common_format_attributes = + { + /* { name, min_len, max_len, decl_req, type_req, fn_type_req, + affects_type_identity, handler, exclude } */ + { "format", 3, 3, false, true, true, false, + handle_format_attribute, NULL }, + { "format_arg", 1, 1, false, true, true, false, +- handle_format_arg_attribute, NULL }, +- { NULL, 0, 0, false, false, false, false, NULL, NULL } ++ handle_format_arg_attribute, NULL } ++}; ++ ++const struct scoped_attribute_specs c_common_format_attribute_table = ++{ ++ "gnu", c_common_format_attributes + }; + + /* Returns TRUE iff the attribute indicated by ATTR_ID takes a plain +diff --git a/gcc/c-family/c-common.h b/gcc/c-family/c-common.h +index 3d5b9c40e..d1503c5a7 100644 +--- a/gcc/c-family/c-common.h ++++ b/gcc/c-family/c-common.h +@@ -819,8 +819,8 @@ enum conversion_safety { + extern struct visibility_flags visibility_options; + + /* Attribute table common to the C front ends. */ +-extern const struct attribute_spec c_common_attribute_table; +-extern const struct attribute_spec c_common_format_attribute_table; ++extern const struct scoped_attribute_specs c_common_gnu_attribute_table; ++extern const struct scoped_attribute_specs c_common_format_attribute_table; + + /* Pointer to function to lazily generate the VAR_DECL for __FUNCTION__ etc. + ID is the identifier to use, NAME is the string. +diff --git a/gcc/c/c-decl.cc b/gcc/c/c-decl.cc +index 619a20909..9d87a8cdb 100644 +--- a/gcc/c/c-decl.cc ++++ b/gcc/c/c-decl.cc +@@ -4460,7 +4460,7 @@ handle_nodiscard_attribute (tree *node, tree name, tree /*args*/, + return NULL_TREE; + } + /* Table of supported standard (C2x) attributes. */ +-const struct attribute_spec std_attribute_table = ++static const attribute_spec std_attributes = + { + /* { name, min_len, max_len, decl_req, type_req, fn_type_req, + affects_type_identity, handler, exclude } */ +@@ -4471,8 +4471,12 @@ const struct attribute_spec std_attribute_table = + { "maybe_unused", 0, 0, false, false, false, false, + handle_unused_attribute, NULL }, + { "nodiscard", 0, 1, false, false, false, false, +- handle_nodiscard_attribute, NULL }, +- { NULL, 0, 0, false, false, false, false, NULL, NULL } ++ handle_nodiscard_attribute, NULL } ++}; ++ ++const scoped_attribute_specs std_attribute_table = ++{ ++ nullptr, std_attributes + }; + + /* Create the predefined scalar types of C, +@@ -4488,8 +4492,6 @@ c_init_decl_processing (void) + /* Initialize reserved words for parser. */ + c_parse_init (); + +- register_scoped_attributes (std_attribute_table, NULL); +- + current_function_decl = NULL_TREE; + + gcc_obstack_init (&parser_obstack); +diff --git a/gcc/c/c-objc-common.h b/gcc/c/c-objc-common.h +index 0b60df975..bc3dded23 100644 +--- a/gcc/c/c-objc-common.h ++++ b/gcc/c/c-objc-common.h +@@ -70,11 +70,15 @@ along with GCC; see the file COPYING3. If not see + #undef LANG_HOOKS_FINALIZE_EARLY_DEBUG + #define LANG_HOOKS_FINALIZE_EARLY_DEBUG c_common_finalize_early_debug + +-/* Attribute hooks. */ +-#undef LANG_HOOKS_COMMON_ATTRIBUTE_TABLE +-#define LANG_HOOKS_COMMON_ATTRIBUTE_TABLE c_common_attribute_table +-#undef LANG_HOOKS_FORMAT_ATTRIBUTE_TABLE +-#define LANG_HOOKS_FORMAT_ATTRIBUTE_TABLE c_common_format_attribute_table ++static const scoped_attribute_specs *const c_objc_attribute_table = ++{ ++ &std_attribute_table, ++ &c_common_gnu_attribute_table, ++ &c_common_format_attribute_table ++}; ++ ++#undef LANG_HOOKS_ATTRIBUTE_TABLE ++#define LANG_HOOKS_ATTRIBUTE_TABLE c_objc_attribute_table + + #undef LANG_HOOKS_TREE_DUMP_DUMP_TREE_FN + #define LANG_HOOKS_TREE_DUMP_DUMP_TREE_FN c_dump_tree +diff --git a/gcc/c/c-tree.h b/gcc/c/c-tree.h +index c70f0ba5a..654bd4094 100644 +--- a/gcc/c/c-tree.h ++++ b/gcc/c/c-tree.h +@@ -835,6 +835,8 @@ set_c_expr_source_range (c_expr *expr, + /* In c-fold.cc */ + extern vec<tree> incomplete_record_decls; + ++extern const struct scoped_attribute_specs std_attribute_table; ++ + #if CHECKING_P + namespace selftest { + extern void run_c_tests (void); +diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc +index 4194dfc70..114252a3c 100644 +--- a/gcc/config/aarch64/aarch64.cc ++++ b/gcc/config/aarch64/aarch64.cc +@@ -2986,7 +2986,7 @@ handle_aarch64_vector_pcs_attribute (tree *node, tree name, tree, + } + + /* Table of machine attributes. */ +-static const struct attribute_spec aarch64_attribute_table = ++TARGET_GNU_ATTRIBUTES (aarch64_attribute_table, + { + /* { name, min_len, max_len, decl_req, type_req, fn_type_req, + affects_type_identity, handler, exclude } */ +@@ -2997,9 +2997,8 @@ static const struct attribute_spec aarch64_attribute_table = + NULL }, + { "Advanced SIMD type", 1, 1, false, true, false, true, NULL, NULL }, + { "SVE type", 3, 3, false, true, false, true, NULL, NULL }, +- { "SVE sizeless type", 0, 0, false, true, false, true, NULL, NULL }, +- { NULL, 0, 0, false, false, false, false, NULL, NULL } +-}; ++ { "SVE sizeless type", 0, 0, false, true, false, true, NULL, NULL } ++}); + + /* An ISA extension in the co-processor and main instruction set space. */ + struct aarch64_option_extension +diff --git a/gcc/config/alpha/alpha.cc b/gcc/config/alpha/alpha.cc +index 66c17149d..7fb491918 100644 +--- a/gcc/config/alpha/alpha.cc ++++ b/gcc/config/alpha/alpha.cc +@@ -7475,14 +7475,13 @@ common_object_handler (tree *node, tree name ATTRIBUTE_UNUSED, + return NULL_TREE; + } + +-static const struct attribute_spec vms_attribute_table = ++TARGET_GNU_ATTRIBUTES (vms_attribute_table, + { + /* { name, min_len, max_len, decl_req, type_req, fn_type_req, + affects_type_identity, handler, exclude } */ + { COMMON_OBJECT, 0, 1, true, false, false, false, common_object_handler, +- NULL }, +- { NULL, 0, 0, false, false, false, false, NULL, NULL } +-}; ++ NULL } ++}); + + void + vms_output_aligned_decl_common(FILE *file, tree decl, const char *name, +diff --git a/gcc/config/arc/arc.cc b/gcc/config/arc/arc.cc +index fbc17e684..1c6adcab4 100644 +--- a/gcc/config/arc/arc.cc ++++ b/gcc/config/arc/arc.cc +@@ -230,44 +230,6 @@ static tree arc_handle_secure_attribute (tree *, tree, tree, int, bool *); + static tree arc_handle_uncached_attribute (tree *, tree, tree, int, bool *); + static tree arc_handle_aux_attribute (tree *, tree, tree, int, bool *); + +-/* Initialized arc_attribute_table to NULL since arc doesnot have any +- machine specific supported attributes. */ +-const struct attribute_spec arc_attribute_table = +-{ +- /* { name, min_len, max_len, decl_req, type_req, fn_type_req, +- affects_type_identity, handler, exclude } */ +- { "interrupt", 1, 1, true, false, false, true, +- arc_handle_interrupt_attribute, NULL }, +- /* Function calls made to this symbol must be done indirectly, because +- it may lie outside of the 21/25 bit addressing range of a normal function +- call. */ +- { "long_call", 0, 0, false, true, true, false, NULL, NULL }, +- /* Whereas these functions are always known to reside within the 25 bit +- addressing range of unconditionalized bl. */ +- { "medium_call", 0, 0, false, true, true, false, NULL, NULL }, +- /* And these functions are always known to reside within the 21 bit +- addressing range of blcc. */ +- { "short_call", 0, 0, false, true, true, false, NULL, NULL }, +- /* Function which are not having the prologue and epilogue generated +- by the compiler. */ +- { "naked", 0, 0, true, false, false, false, arc_handle_fndecl_attribute, +- NULL }, +- /* Functions calls made using jli instruction. The pointer in JLI +- table is found latter. */ +- { "jli_always", 0, 0, false, true, true, false, NULL, NULL }, +- /* Functions calls made using jli instruction. The pointer in JLI +- table is given as input parameter. */ +- { "jli_fixed", 1, 1, false, true, true, false, arc_handle_jli_attribute, +- NULL }, +- /* Call a function using secure-mode. */ +- { "secure_call", 1, 1, false, true, true, false, arc_handle_secure_attribute, +- NULL }, +- /* Bypass caches using .di flag. */ +- { "uncached", 0, 0, false, true, false, false, arc_handle_uncached_attribute, +- NULL }, +- { "aux", 0, 1, true, false, false, false, arc_handle_aux_attribute, NULL }, +- { NULL, 0, 0, false, false, false, false, NULL, NULL } +-}; + static int arc_comp_type_attributes (const_tree, const_tree); + static void arc_file_start (void); + static void arc_internal_label (FILE *, const char *, unsigned long); +@@ -819,6 +781,42 @@ static rtx arc_legitimize_address_0 (rtx, rtx, machine_mode mode); + + #include "target-def.h" + ++TARGET_GNU_ATTRIBUTES (arc_attribute_table, ++{ ++ /* { name, min_len, max_len, decl_req, type_req, fn_type_req, ++ affects_type_identity, handler, exclude } */ ++ { "interrupt", 1, 1, true, false, false, true, ++ arc_handle_interrupt_attribute, NULL }, ++ /* Function calls made to this symbol must be done indirectly, because ++ it may lie outside of the 21/25 bit addressing range of a normal function ++ call. */ ++ { "long_call", 0, 0, false, true, true, false, NULL, NULL }, ++ /* Whereas these functions are always known to reside within the 25 bit ++ addressing range of unconditionalized bl. */ ++ { "medium_call", 0, 0, false, true, true, false, NULL, NULL }, ++ /* And these functions are always known to reside within the 21 bit ++ addressing range of blcc. */ ++ { "short_call", 0, 0, false, true, true, false, NULL, NULL }, ++ /* Function which are not having the prologue and epilogue generated ++ by the compiler. */ ++ { "naked", 0, 0, true, false, false, false, arc_handle_fndecl_attribute, ++ NULL }, ++ /* Functions calls made using jli instruction. The pointer in JLI ++ table is found latter. */ ++ { "jli_always", 0, 0, false, true, true, false, NULL, NULL }, ++ /* Functions calls made using jli instruction. The pointer in JLI ++ table is given as input parameter. */ ++ { "jli_fixed", 1, 1, false, true, true, false, arc_handle_jli_attribute, ++ NULL }, ++ /* Call a function using secure-mode. */ ++ { "secure_call", 1, 1, false, true, true, false, arc_handle_secure_attribute, ++ NULL }, ++ /* Bypass caches using .di flag. */ ++ { "uncached", 0, 0, false, true, false, false, arc_handle_uncached_attribute, ++ NULL }, ++ { "aux", 0, 1, true, false, false, false, arc_handle_aux_attribute, NULL } ++}); ++ + #undef TARGET_ASM_ALIGNED_HI_OP + #define TARGET_ASM_ALIGNED_HI_OP "\t.hword\t" + #undef TARGET_ASM_ALIGNED_SI_OP +diff --git a/gcc/config/arm/arm.cc b/gcc/config/arm/arm.cc +index c72e9c0b0..3bdc7e18e 100644 +--- a/gcc/config/arm/arm.cc ++++ b/gcc/config/arm/arm.cc +@@ -329,7 +329,7 @@ static rtx_insn *thumb1_md_asm_adjust (vec<rtx> &, vec<rtx> &, + static const char *arm_identify_fpu_from_isa (sbitmap); +  + /* Table of machine attributes. */ +-static const struct attribute_spec arm_attribute_table = ++static const attribute_spec arm_gnu_attributes = + { + /* { name, min_len, max_len, decl_req, type_req, fn_type_req, + affects_type_identity, handler, exclude } */ +@@ -377,8 +377,17 @@ static const struct attribute_spec arm_attribute_table = + arm_handle_cmse_nonsecure_entry, NULL }, + { "cmse_nonsecure_call", 0, 0, true, false, false, true, + arm_handle_cmse_nonsecure_call, NULL }, +- { "Advanced SIMD type", 1, 1, false, true, false, true, NULL, NULL }, +- { NULL, 0, 0, false, false, false, false, NULL, NULL } ++ { "Advanced SIMD type", 1, 1, false, true, false, true, NULL, NULL } ++}; ++ ++static const scoped_attribute_specs arm_gnu_attribute_table = ++{ ++ "gnu", arm_gnu_attributes ++}; ++ ++static const scoped_attribute_specs *const arm_attribute_table = ++{ ++ &arm_gnu_attribute_table + }; +  + /* Initialize the GCC target structure. */ +diff --git a/gcc/config/avr/avr.cc b/gcc/config/avr/avr.cc +index 1b5a95410..7b37278ca 100644 +--- a/gcc/config/avr/avr.cc ++++ b/gcc/config/avr/avr.cc +@@ -9723,7 +9723,7 @@ avr_eval_addr_attrib (rtx x) + + + /* AVR attributes. */ +-static const struct attribute_spec avr_attribute_table = ++TARGET_GNU_ATTRIBUTES (avr_attribute_table, + { + /* { name, min_len, max_len, decl_req, type_req, fn_type_req, + affects_type_identity, handler, exclude } */ +@@ -9748,9 +9748,8 @@ static const struct attribute_spec avr_attribute_table = + { "address", 1, 1, true, false, false, false, + avr_handle_addr_attribute, NULL }, + { "absdata", 0, 0, true, false, false, false, +- avr_handle_absdata_attribute, NULL }, +- { NULL, 0, 0, false, false, false, false, NULL, NULL } +-}; ++ avr_handle_absdata_attribute, NULL } ++}); + + + /* Return true if we support address space AS for the architecture in effect +diff --git a/gcc/config/bfin/bfin.cc b/gcc/config/bfin/bfin.cc +index b2a9142f5..fbc5c84d1 100644 +--- a/gcc/config/bfin/bfin.cc ++++ b/gcc/config/bfin/bfin.cc +@@ -4895,7 +4895,7 @@ bfin_handle_l2_attribute (tree *node, tree ARG_UNUSED (name), + } + + /* Table of valid machine attributes. */ +-static const struct attribute_spec bfin_attribute_table = ++TARGET_GNU_ATTRIBUTES (bfin_attribute_table, + { + /* { name, min_len, max_len, decl_req, type_req, fn_type_req, + affects_type_identity, handler, exclude } */ +@@ -4920,9 +4920,8 @@ static const struct attribute_spec bfin_attribute_table = + bfin_handle_l1_data_attribute, NULL }, + { "l1_data_B", 0, 0, true, false, false, false, + bfin_handle_l1_data_attribute, NULL }, +- { "l2", 0, 0, true, false, false, false, bfin_handle_l2_attribute, NULL }, +- { NULL, 0, 0, false, false, false, false, NULL, NULL } +-}; ++ { "l2", 0, 0, true, false, false, false, bfin_handle_l2_attribute, NULL } ++}); +  + /* Implementation of TARGET_ASM_INTEGER. When using FD-PIC, we need to + tell the assembler to generate pointers to function descriptors in +diff --git a/gcc/config/bpf/bpf.cc b/gcc/config/bpf/bpf.cc +index 6a0e3bbca..0343af9c7 100644 +--- a/gcc/config/bpf/bpf.cc ++++ b/gcc/config/bpf/bpf.cc +@@ -146,7 +146,7 @@ bpf_handle_preserve_access_index_attribute (tree *node, tree name, + + /* Target-specific attributes. */ + +-static const struct attribute_spec bpf_attribute_table = ++TARGET_GNU_ATTRIBUTES (bpf_attribute_table, + { + /* Syntax: { name, min_len, max_len, decl_required, type_required, + function_type_required, affects_type_identity, handler, +@@ -159,11 +159,8 @@ static const struct attribute_spec bpf_attribute_table = + /* CO-RE support: attribute to mark that all accesses to the declared + struct/union/array should be recorded. */ + { "preserve_access_index", 0, -1, false, true, false, true, +- bpf_handle_preserve_access_index_attribute, NULL }, +- +- /* The last attribute spec is set to be NULL. */ +- { NULL, 0, 0, false, false, false, false, NULL, NULL } +-}; ++ bpf_handle_preserve_access_index_attribute, NULL } ++}); + + #undef TARGET_ATTRIBUTE_TABLE + #define TARGET_ATTRIBUTE_TABLE bpf_attribute_table +diff --git a/gcc/config/csky/csky.cc b/gcc/config/csky/csky.cc +index e315e09a8..b511fafe5 100644 +--- a/gcc/config/csky/csky.cc ++++ b/gcc/config/csky/csky.cc +@@ -211,16 +211,15 @@ const int csky_dbx_regnoFIRST_PSEUDO_REGISTER = + /* Table of machine attributes. */ + static tree csky_handle_fndecl_attribute (tree *, tree, tree, int, bool *); + static tree csky_handle_isr_attribute (tree *, tree, tree, int, bool *); +-static const struct attribute_spec csky_attribute_table = ++TARGET_GNU_ATTRIBUTES (csky_attribute_table, + { + /* { name, min_len, max_len, decl_req, type_req, fn_type_req, + affects_type_identity, handler, exclude } */ + { "naked", 0, 0, true, false, false, false, csky_handle_fndecl_attribute, NULL }, + /* Interrupt Service Routines have special prologue and epilogue requirements. */ + { "interrupt", 0, 1, false, false, false, false, csky_handle_isr_attribute, NULL }, +- { "isr", 0, 1, false, false, false, false, csky_handle_isr_attribute, NULL }, +- { NULL, 0, 0, false, false, false, false, NULL, NULL } +-}; ++ { "isr", 0, 1, false, false, false, false, csky_handle_isr_attribute, NULL } ++}); + + /* A C structure for machine-specific, per-function data. + This is added to the cfun structure. */ +diff --git a/gcc/config/epiphany/epiphany.cc b/gcc/config/epiphany/epiphany.cc +index 62636b1ec..8a7c0a988 100644 +--- a/gcc/config/epiphany/epiphany.cc ++++ b/gcc/config/epiphany/epiphany.cc +@@ -460,7 +460,7 @@ epiphany_init_reg_tables (void) + They unmask them while calling an interruptible + function, though. */ + +-static const struct attribute_spec epiphany_attribute_table = ++TARGET_GNU_ATTRIBUTES (epiphany_attribute_table, + { + /* { name, min_len, max_len, decl_req, type_req, fn_type_req, + affects_type_identity, handler, exclude } */ +@@ -470,9 +470,8 @@ static const struct attribute_spec epiphany_attribute_table = + epiphany_handle_forwarder_attribute, NULL }, + { "long_call", 0, 0, false, true, true, false, NULL, NULL }, + { "short_call", 0, 0, false, true, true, false, NULL, NULL }, +- { "disinterrupt", 0, 0, false, true, true, true, NULL, NULL }, +- { NULL, 0, 0, false, false, false, false, NULL, NULL } +-}; ++ { "disinterrupt", 0, 0, false, true, true, true, NULL, NULL } ++}); + + /* Handle an "interrupt" attribute; arguments as in + struct attribute_spec.handler. */ +diff --git a/gcc/config/gcn/gcn.cc b/gcc/config/gcn/gcn.cc +index e2cbdd1ac..0b049abcc 100644 +--- a/gcc/config/gcn/gcn.cc ++++ b/gcc/config/gcn/gcn.cc +@@ -363,14 +363,12 @@ gcn_handle_amdgpu_hsa_kernel_attribute (tree *node, tree name, + + Create target-specific __attribute__ types. */ + +-static const struct attribute_spec gcn_attribute_table = { ++TARGET_GNU_ATTRIBUTES (gcn_attribute_table, { + /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler, + affects_type_identity } */ + {"amdgpu_hsa_kernel", 0, GCN_KERNEL_ARG_TYPES, false, true, +- true, true, gcn_handle_amdgpu_hsa_kernel_attribute, NULL}, +- /* End element. */ +- {NULL, 0, 0, false, false, false, false, NULL, NULL} +-}; ++ true, true, gcn_handle_amdgpu_hsa_kernel_attribute, NULL} ++}); + + /* }}} */ + /* {{{ Registers and modes. */ +diff --git a/gcc/config/h8300/h8300.cc b/gcc/config/h8300/h8300.cc +index 78cf15f15..a0fa689de 100644 +--- a/gcc/config/h8300/h8300.cc ++++ b/gcc/config/h8300/h8300.cc +@@ -4909,7 +4909,7 @@ h8300_insert_attributes (tree node, tree *attributes) + tiny_data: This variable lives in the tiny data area and can be + referenced with 16-bit absolute memory references. */ + +-static const struct attribute_spec h8300_attribute_table = ++TARGET_GNU_ATTRIBUTES (h8300_attribute_table, + { + /* { name, min_len, max_len, decl_req, type_req, fn_type_req, + affects_type_identity, handler, exclude } */ +@@ -4926,9 +4926,8 @@ static const struct attribute_spec h8300_attribute_table = + { "eightbit_data", 0, 0, true, false, false, false, + h8300_handle_eightbit_data_attribute, NULL }, + { "tiny_data", 0, 0, true, false, false, false, +- h8300_handle_tiny_data_attribute, NULL }, +- { NULL, 0, 0, false, false, false, false, NULL, NULL } +-}; ++ h8300_handle_tiny_data_attribute, NULL } ++}); + + + /* Handle an attribute requiring a FUNCTION_DECL; arguments as in +diff --git a/gcc/config/i386/i386-options.cc b/gcc/config/i386/i386-options.cc +index 86932d719..991661fe4 100644 +--- a/gcc/config/i386/i386-options.cc ++++ b/gcc/config/i386/i386-options.cc +@@ -3875,7 +3875,7 @@ handle_nodirect_extern_access_attribute (tree *pnode, tree name, + } + + /* Table of valid machine attributes. */ +-const struct attribute_spec ix86_attribute_table = ++static const attribute_spec ix86_gnu_attributes = + { + /* { name, min_len, max_len, decl_req, type_req, fn_type_req, + affects_type_identity, handler, exclude } */ +@@ -3955,10 +3955,12 @@ const struct attribute_spec ix86_attribute_table = + { "cf_check", 0, 0, true, false, false, false, + ix86_handle_fndecl_attribute, NULL }, + { "nodirect_extern_access", 0, 0, true, false, false, false, +- handle_nodirect_extern_access_attribute, NULL }, ++ handle_nodirect_extern_access_attribute, NULL } ++}; + +- /* End element. */ +- { NULL, 0, 0, false, false, false, false, NULL, NULL } ++const scoped_attribute_specs ix86_gnu_attribute_table = ++{ ++ "gnu", ix86_gnu_attributes + }; + + #include "gt-i386-options.h" +diff --git a/gcc/config/i386/i386-options.h b/gcc/config/i386/i386-options.h +index ce4034f62..a7bdb22c0 100644 +--- a/gcc/config/i386/i386-options.h ++++ b/gcc/config/i386/i386-options.h +@@ -82,7 +82,7 @@ void ix86_function_specific_print (FILE *, int, + struct cl_target_option *); + bool ix86_valid_target_attribute_p (tree, tree, tree, int); + +-extern const struct attribute_spec ix86_attribute_table; ++extern const struct scoped_attribute_specs ix86_gnu_attribute_table; + + + #endif /* GCC_I386_OPTIONS_H */ +diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc +index 83a0d8abb..ade965927 100644 +--- a/gcc/config/i386/i386.cc ++++ b/gcc/config/i386/i386.cc +@@ -24293,6 +24293,11 @@ ix86_run_selftests (void) + + #endif /* CHECKING_P */ + ++static const scoped_attribute_specs *const ix86_attribute_table = ++{ ++ &ix86_gnu_attribute_table ++}; ++ + /* Initialize the GCC target structure. */ + #undef TARGET_RETURN_IN_MEMORY + #define TARGET_RETURN_IN_MEMORY ix86_return_in_memory +diff --git a/gcc/config/ia64/ia64.cc b/gcc/config/ia64/ia64.cc +index f9fb681a3..b9ced1c46 100644 +--- a/gcc/config/ia64/ia64.cc ++++ b/gcc/config/ia64/ia64.cc +@@ -357,7 +357,7 @@ static bool ia64_expand_vec_perm_const_1 (struct expand_vec_perm_d *d); + +  + /* Table of valid machine attributes. */ +-static const struct attribute_spec ia64_attribute_table = ++static const attribute_spec ia64_gnu_attributes = + { + /* { name, min_len, max_len, decl_req, type_req, fn_type_req, + affects_type_identity, handler, exclude } */ +@@ -369,8 +369,17 @@ static const struct attribute_spec ia64_attribute_table = + ia64_vms_common_object_attribute, NULL }, + #endif + { "version_id", 1, 1, true, false, false, false, +- ia64_handle_version_id_attribute, NULL }, +- { NULL, 0, 0, false, false, false, false, NULL, NULL } ++ ia64_handle_version_id_attribute, NULL } ++}; ++ ++static const scoped_attribute_specs ia64_gnu_attribute_table = ++{ ++ "gnu", ia64_gnu_attributes ++}; ++ ++static const scoped_attribute_specs *const ia64_attribute_table = ++{ ++ &ia64_gnu_attribute_table + }; + + /* Initialize the GCC target structure. */ +diff --git a/gcc/config/m32c/m32c.cc b/gcc/config/m32c/m32c.cc +index 11ca9a43a..a8f6523df 100644 +--- a/gcc/config/m32c/m32c.cc ++++ b/gcc/config/m32c/m32c.cc +@@ -2996,7 +2996,7 @@ current_function_special_page_vector (rtx x) + + #undef TARGET_ATTRIBUTE_TABLE + #define TARGET_ATTRIBUTE_TABLE m32c_attribute_table +-static const struct attribute_spec m32c_attribute_table = { ++TARGET_GNU_ATTRIBUTES (m32c_attribute_table, { + /* { name, min_len, max_len, decl_req, type_req, fn_type_req, + affects_type_identity, handler, exclude } */ + { "interrupt", 0, 0, false, false, false, false, interrupt_handler, NULL }, +@@ -3004,9 +3004,8 @@ static const struct attribute_spec m32c_attribute_table = { + { "fast_interrupt", 0, 0, false, false, false, false, + interrupt_handler, NULL }, + { "function_vector", 1, 1, true, false, false, false, +- function_vector_handler, NULL }, +- { NULL, 0, 0, false, false, false, false, NULL, NULL } +-}; ++ function_vector_handler, NULL } ++}); + + #undef TARGET_COMP_TYPE_ATTRIBUTES + #define TARGET_COMP_TYPE_ATTRIBUTES m32c_comp_type_attributes +diff --git a/gcc/config/m32r/m32r.cc b/gcc/config/m32r/m32r.cc +index bca768172..78a17f0a1 100644 +--- a/gcc/config/m32r/m32r.cc ++++ b/gcc/config/m32r/m32r.cc +@@ -111,15 +111,14 @@ static HOST_WIDE_INT m32r_starting_frame_offset (void); +  + /* M32R specific attributes. */ + +-static const struct attribute_spec m32r_attribute_table = ++TARGET_GNU_ATTRIBUTES (m32r_attribute_table, + { + /* { name, min_len, max_len, decl_req, type_req, fn_type_req, + affects_type_identity, handler, exclude } */ + { "interrupt", 0, 0, true, false, false, false, NULL, NULL }, + { "model", 1, 1, true, false, false, false, m32r_handle_model_attribute, +- NULL }, +- { NULL, 0, 0, false, false, false, false, NULL, NULL } +-}; ++ NULL } ++}); +  + /* Initialize the GCC target structure. */ + #undef TARGET_ATTRIBUTE_TABLE +diff --git a/gcc/config/m68k/m68k.cc b/gcc/config/m68k/m68k.cc +index 62898dafe..effb6db8d 100644 +--- a/gcc/config/m68k/m68k.cc ++++ b/gcc/config/m68k/m68k.cc +@@ -360,7 +360,7 @@ static void m68k_asm_final_postscan_insn (FILE *, rtx_insn *insn, rtx , int); + #undef TARGET_ASM_FINAL_POSTSCAN_INSN + #define TARGET_ASM_FINAL_POSTSCAN_INSN m68k_asm_final_postscan_insn + +-static const struct attribute_spec m68k_attribute_table = ++TARGET_GNU_ATTRIBUTES (m68k_attribute_table, + { + /* { name, min_len, max_len, decl_req, type_req, fn_type_req, + affects_type_identity, handler, exclude } */ +@@ -369,9 +369,8 @@ static const struct attribute_spec m68k_attribute_table = + { "interrupt_handler", 0, 0, true, false, false, false, + m68k_handle_fndecl_attribute, NULL }, + { "interrupt_thread", 0, 0, true, false, false, false, +- m68k_handle_fndecl_attribute, NULL }, +- { NULL, 0, 0, false, false, false, false, NULL, NULL } +-}; ++ m68k_handle_fndecl_attribute, NULL } ++}); + + struct gcc_target targetm = TARGET_INITIALIZER; +  +diff --git a/gcc/config/mcore/mcore.cc b/gcc/config/mcore/mcore.cc +index 28e707496..e497b0f44 100644 +--- a/gcc/config/mcore/mcore.cc ++++ b/gcc/config/mcore/mcore.cc +@@ -150,16 +150,15 @@ static bool mcore_modes_tieable_p (machine_mode, machine_mode); +  + /* MCore specific attributes. */ + +-static const struct attribute_spec mcore_attribute_table = ++TARGET_GNU_ATTRIBUTES (mcore_attribute_table, + { + /* { name, min_len, max_len, decl_req, type_req, fn_type_req, + affects_type_identity, handler, exclude } */ + { "dllexport", 0, 0, true, false, false, false, NULL, NULL }, + { "dllimport", 0, 0, true, false, false, false, NULL, NULL }, + { "naked", 0, 0, true, false, false, false, +- mcore_handle_naked_attribute, NULL }, +- { NULL, 0, 0, false, false, false, false, NULL, NULL } +-}; ++ mcore_handle_naked_attribute, NULL } ++}); +  + /* Initialize the GCC target structure. */ + #undef TARGET_ASM_EXTERNAL_LIBCALL +diff --git a/gcc/config/microblaze/microblaze.cc b/gcc/config/microblaze/microblaze.cc +index f32effecf..6b14d3e29 100644 +--- a/gcc/config/microblaze/microblaze.cc ++++ b/gcc/config/microblaze/microblaze.cc +@@ -218,15 +218,14 @@ int break_handler; + int fast_interrupt; + int save_volatiles; + +-const struct attribute_spec microblaze_attribute_table = { ++TARGET_GNU_ATTRIBUTES (microblaze_attribute_table, { + /* name min_len, max_len, decl_req, type_req, fn_type_req, + affects_type_identity, handler, exclude */ + {"interrupt_handler", 0, 0, true, false, false, false, NULL, NULL }, + {"break_handler", 0, 0, true, false, false, false, NULL, NULL }, + {"fast_interrupt", 0, 0, true, false, false, false, NULL, NULL }, +- {"save_volatiles", 0, 0, true, false, false, false, NULL, NULL }, +- { NULL, 0, 0, false, false, false, false, NULL, NULL } +-}; ++ {"save_volatiles", 0, 0, true, false, false, false, NULL, NULL } ++}); + + static int microblaze_interrupt_function_p (tree); + +diff --git a/gcc/config/mips/mips.cc b/gcc/config/mips/mips.cc +index 02d11ddbf..5474ca152 100644 +--- a/gcc/config/mips/mips.cc ++++ b/gcc/config/mips/mips.cc +@@ -607,7 +607,7 @@ static tree mips_handle_use_shadow_register_set_attr (tree *, tree, tree, int, + bool *); + + /* The value of TARGET_ATTRIBUTE_TABLE. */ +-static const struct attribute_spec mips_attribute_table = { ++TARGET_GNU_ATTRIBUTES (mips_attribute_table, { + /* { name, min_len, max_len, decl_req, type_req, fn_type_req, + affects_type_identity, handler, exclude } */ + { "long_call", 0, 0, false, true, true, false, NULL, NULL }, +@@ -629,9 +629,8 @@ static const struct attribute_spec mips_attribute_table = { + { "use_shadow_register_set", 0, 1, false, true, true, false, + mips_handle_use_shadow_register_set_attr, NULL }, + { "keep_interrupts_masked", 0, 0, false, true, true, false, NULL, NULL }, +- { "use_debug_exception_return", 0, 0, false, true, true, false, NULL, NULL }, +- { NULL, 0, 0, false, false, false, false, NULL, NULL } +-}; ++ { "use_debug_exception_return", 0, 0, false, true, true, false, NULL, NULL } ++}); +  + /* A table describing all the processors GCC knows about; see + mips-cpus.def for details. */ +diff --git a/gcc/config/msp430/msp430.cc b/gcc/config/msp430/msp430.cc +index 7a378ceac..f58855978 100644 +--- a/gcc/config/msp430/msp430.cc ++++ b/gcc/config/msp430/msp430.cc +@@ -2055,7 +2055,7 @@ static const struct attribute_spec::exclusions attr_either_exclusions = + #define TARGET_ATTRIBUTE_TABLE msp430_attribute_table + + /* Table of MSP430-specific attributes. */ +-const struct attribute_spec msp430_attribute_table = ++TARGET_GNU_ATTRIBUTES (msp430_attribute_table, + { + /* { name, min_num_args, max_num_args, decl_req, type_req, fn_type_req, + affects_type_identity, handler, exclude } */ +@@ -2073,10 +2073,8 @@ const struct attribute_spec msp430_attribute_table = + { ATTR_UPPER, 0, 0, true, false, false, false, msp430_section_attr, + attr_upper_exclusions }, + { ATTR_EITHER, 0, 0, true, false, false, false, msp430_section_attr, +- attr_either_exclusions }, +- +- { NULL, 0, 0, false, false, false, false, NULL, NULL } +- }; ++ attr_either_exclusions } ++ }); + + #undef TARGET_HANDLE_GENERIC_ATTRIBUTE + #define TARGET_HANDLE_GENERIC_ATTRIBUTE msp430_handle_generic_attribute +diff --git a/gcc/config/nds32/nds32.cc b/gcc/config/nds32/nds32.cc +index 27530495f..519b11e4c 100644 +--- a/gcc/config/nds32/nds32.cc ++++ b/gcc/config/nds32/nds32.cc +@@ -288,7 +288,7 @@ static const int nds32_reg_alloc_order_for_speed = + }; + + /* Defining target-specific uses of __attribute__. */ +-static const struct attribute_spec nds32_attribute_table = ++TARGET_GNU_ATTRIBUTES (nds32_attribute_table, + { + /* Syntax: { name, min_len, max_len, decl_required, type_required, + function_type_required, affects_type_identity, handler, +@@ -326,11 +326,8 @@ static const struct attribute_spec nds32_attribute_table = + + /* FOR BACKWARD COMPATIBILITY, + this attribute also tells no prologue/epilogue. */ +- { "no_prologue", 0, 0, false, false, false, false, NULL, NULL }, +- +- /* The last attribute spec is set to be NULL. */ +- { NULL, 0, 0, false, false, false, false, NULL, NULL } +-}; ++ { "no_prologue", 0, 0, false, false, false, false, NULL, NULL } ++}); + + + /* ------------------------------------------------------------------------ */ +diff --git a/gcc/config/nvptx/nvptx.cc b/gcc/config/nvptx/nvptx.cc +index 7f2103ba6..9a3e418f4 100644 +--- a/gcc/config/nvptx/nvptx.cc ++++ b/gcc/config/nvptx/nvptx.cc +@@ -5817,16 +5817,15 @@ nvptx_handle_shared_attribute (tree *node, tree name, tree ARG_UNUSED (args), + } + + /* Table of valid machine attributes. */ +-static const struct attribute_spec nvptx_attribute_table = ++TARGET_GNU_ATTRIBUTES (nvptx_attribute_table, + { + /* { name, min_len, max_len, decl_req, type_req, fn_type_req, + affects_type_identity, handler, exclude } */ + { "kernel", 0, 0, true, false, false, false, nvptx_handle_kernel_attribute, + NULL }, + { "shared", 0, 0, true, false, false, false, nvptx_handle_shared_attribute, +- NULL }, +- { NULL, 0, 0, false, false, false, false, NULL, NULL } +-}; ++ NULL } ++}); +  + /* Limit vector alignments to BIGGEST_ALIGNMENT. */ + +diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc +index 9cf79beba..f5a27bdc9 100644 +--- a/gcc/config/riscv/riscv.cc ++++ b/gcc/config/riscv/riscv.cc +@@ -336,7 +336,7 @@ static tree riscv_handle_fndecl_attribute (tree *, tree, tree, int, bool *); + static tree riscv_handle_type_attribute (tree *, tree, tree, int, bool *); + + /* Defining target-specific uses of __attribute__. */ +-static const struct attribute_spec riscv_attribute_table = ++TARGET_GNU_ATTRIBUTES (riscv_attribute_table, + { + /* Syntax: { name, min_len, max_len, decl_required, type_required, + function_type_required, affects_type_identity, handler, +@@ -347,11 +347,8 @@ static const struct attribute_spec riscv_attribute_table = + riscv_handle_fndecl_attribute, NULL }, + /* This attribute generates prologue/epilogue for interrupt handlers. */ + { "interrupt", 0, 1, false, true, true, false, +- riscv_handle_type_attribute, NULL }, +- +- /* The last attribute spec is set to be NULL. */ +- { NULL, 0, 0, false, false, false, false, NULL, NULL } +-}; ++ riscv_handle_type_attribute, NULL } ++}); + + /* Order for the CLOBBERs/USEs of gpr_save. */ + static const unsigned gpr_save_reg_order = { +diff --git a/gcc/config/rl78/rl78.cc b/gcc/config/rl78/rl78.cc +index b3727c0a8..97386c7ea 100644 +--- a/gcc/config/rl78/rl78.cc ++++ b/gcc/config/rl78/rl78.cc +@@ -898,7 +898,7 @@ rl78_handle_vector_attribute (tree * node, + #define TARGET_ATTRIBUTE_TABLE rl78_attribute_table + + /* Table of RL78-specific attributes. */ +-const struct attribute_spec rl78_attribute_table = ++TARGET_GNU_ATTRIBUTES (rl78_attribute_table, + { + /* Name, min_len, max_len, decl_req, type_req, fn_type_req, + affects_type_identity, handler, exclude. */ +@@ -911,9 +911,8 @@ const struct attribute_spec rl78_attribute_table = + { "saddr", 0, 0, true, false, false, false, + rl78_handle_saddr_attribute, NULL }, + { "vector", 1, -1, true, false, false, false, +- rl78_handle_vector_attribute, NULL }, +- { NULL, 0, 0, false, false, false, false, NULL, NULL } +-}; ++ rl78_handle_vector_attribute, NULL } ++}); + + +  +diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc +index 55d4ce751..46e3d1a12 100644 +--- a/gcc/config/rs6000/rs6000.cc ++++ b/gcc/config/rs6000/rs6000.cc +@@ -1276,7 +1276,7 @@ static const char alt_reg_names8 = + + /* Table of valid machine attributes. */ + +-static const struct attribute_spec rs6000_attribute_table = ++static const attribute_spec rs6000_gnu_attributes = + { + /* { name, min_len, max_len, decl_req, type_req, fn_type_req, + affects_type_identity, handler, exclude } */ +@@ -1293,7 +1293,16 @@ static const struct attribute_spec rs6000_attribute_table = + #ifdef SUBTARGET_ATTRIBUTE_TABLE + SUBTARGET_ATTRIBUTE_TABLE, + #endif +- { NULL, 0, 0, false, false, false, false, NULL, NULL } ++}; ++ ++static const scoped_attribute_specs rs6000_gnu_attribute_table = ++{ ++ "gnu", rs6000_gnu_attributes ++}; ++ ++static const scoped_attribute_specs *const rs6000_attribute_table = ++{ ++ &rs6000_gnu_attribute_table + }; +  + #ifndef TARGET_PROFILE_KERNEL +diff --git a/gcc/config/rx/rx.cc b/gcc/config/rx/rx.cc +index 412a3a354..2f1178b00 100644 +--- a/gcc/config/rx/rx.cc ++++ b/gcc/config/rx/rx.cc +@@ -2759,7 +2759,7 @@ rx_handle_vector_attribute (tree * node, + } + + /* Table of RX specific attributes. */ +-const struct attribute_spec rx_attribute_table = ++TARGET_GNU_ATTRIBUTES (rx_attribute_table, + { + /* Name, min_len, max_len, decl_req, type_req, fn_type_req, + affects_type_identity, handler, exclude. */ +@@ -2770,9 +2770,8 @@ const struct attribute_spec rx_attribute_table = + { "naked", 0, 0, true, false, false, false, + rx_handle_func_attribute, NULL }, + { "vector", 1, -1, true, false, false, false, +- rx_handle_vector_attribute, NULL }, +- { NULL, 0, 0, false, false, false, false, NULL, NULL } +-}; ++ rx_handle_vector_attribute, NULL } ++}); + + /* Implement TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE. */ + +diff --git a/gcc/config/s390/s390.cc b/gcc/config/s390/s390.cc +index f1599a5c5..dcdf7dad0 100644 +--- a/gcc/config/s390/s390.cc ++++ b/gcc/config/s390/s390.cc +@@ -1247,7 +1247,7 @@ s390_handle_string_attribute (tree *node, tree name ATTRIBUTE_UNUSED, + return NULL_TREE; + } + +-static const struct attribute_spec s390_attribute_table = { ++TARGET_GNU_ATTRIBUTES (s390_attribute_table, { + { "hotpatch", 2, 2, true, false, false, false, + s390_handle_hotpatch_attribute, NULL }, + { "s390_vector_bool", 0, 0, false, true, false, true, +@@ -1263,11 +1263,8 @@ static const struct attribute_spec s390_attribute_table = { + { "function_return_reg", 1, 1, true, false, false, false, + s390_handle_string_attribute, NULL }, + { "function_return_mem", 1, 1, true, false, false, false, +- s390_handle_string_attribute, NULL }, +- +- /* End element. */ +- { NULL, 0, 0, false, false, false, false, NULL, NULL } +-}; ++ s390_handle_string_attribute, NULL } ++}); + + /* Return the alignment for LABEL. We default to the -falign-labels + value except for the literal pool base label. */ +diff --git a/gcc/config/sh/sh.cc b/gcc/config/sh/sh.cc +index 74d61c43b..5717b7ab8 100644 +--- a/gcc/config/sh/sh.cc ++++ b/gcc/config/sh/sh.cc +@@ -328,7 +328,7 @@ static bool sh_hard_regno_mode_ok (unsigned int, machine_mode); + static bool sh_modes_tieable_p (machine_mode, machine_mode); + static bool sh_can_change_mode_class (machine_mode, machine_mode, reg_class_t); +  +-static const struct attribute_spec sh_attribute_table = ++TARGET_GNU_ATTRIBUTES (sh_attribute_table, + { + /* { name, min_len, max_len, decl_req, type_req, fn_type_req, + affects_type_identity, handler, exclude } */ +@@ -347,9 +347,8 @@ static const struct attribute_spec sh_attribute_table = + { "resbank", 0, 0, true, false, false, false, + sh_handle_resbank_handler_attribute, NULL }, + { "function_vector", 1, 1, true, false, false, false, +- sh2a_handle_function_vector_handler_attribute, NULL }, +- { NULL, 0, 0, false, false, false, false, NULL, NULL } +-}; ++ sh2a_handle_function_vector_handler_attribute, NULL } ++}); +  + /* Initialize the GCC target structure. */ + #undef TARGET_ATTRIBUTE_TABLE +diff --git a/gcc/config/sparc/sparc.cc b/gcc/config/sparc/sparc.cc +index 27db12e6b..61bf302db 100644 +--- a/gcc/config/sparc/sparc.cc ++++ b/gcc/config/sparc/sparc.cc +@@ -719,13 +719,12 @@ static HARD_REG_SET sparc_zero_call_used_regs (HARD_REG_SET); +  + #ifdef SUBTARGET_ATTRIBUTE_TABLE + /* Table of valid machine attributes. */ +-static const struct attribute_spec sparc_attribute_table = ++TARGET_GNU_ATTRIBUTES (sparc_attribute_table, + { + /* { name, min_len, max_len, decl_req, type_req, fn_type_req, + do_diagnostic, handler, exclude } */ +- SUBTARGET_ATTRIBUTE_TABLE, +- { NULL, 0, 0, false, false, false, false, NULL, NULL } +-}; ++ SUBTARGET_ATTRIBUTE_TABLE ++}); + #endif +  + char sparc_hard_reg_printed8; +diff --git a/gcc/config/stormy16/stormy16.cc b/gcc/config/stormy16/stormy16.cc +index fabf09ab9..3adc0212a 100644 +--- a/gcc/config/stormy16/stormy16.cc ++++ b/gcc/config/stormy16/stormy16.cc +@@ -2202,7 +2202,7 @@ static tree xstormy16_handle_interrupt_attribute + static tree xstormy16_handle_below100_attribute + (tree *, tree, tree, int, bool *); + +-static const struct attribute_spec xstormy16_attribute_table = ++TARGET_GNU_ATTRIBUTES (xstormy16_attribute_table, + { + /* name, min_len, max_len, decl_req, type_req, fn_type_req, + affects_type_identity, handler, exclude. */ +@@ -2211,9 +2211,8 @@ static const struct attribute_spec xstormy16_attribute_table = + { "BELOW100", 0, 0, false, false, false, false, + xstormy16_handle_below100_attribute, NULL }, + { "below100", 0, 0, false, false, false, false, +- xstormy16_handle_below100_attribute, NULL }, +- { NULL, 0, 0, false, false, false, false, NULL, NULL } +-}; ++ xstormy16_handle_below100_attribute, NULL } ++}); + + /* Handle an "interrupt" attribute; + arguments as in struct attribute_spec.handler. */ +diff --git a/gcc/config/v850/v850.cc b/gcc/config/v850/v850.cc +index c7d432990..b7bbfb810 100644 +--- a/gcc/config/v850/v850.cc ++++ b/gcc/config/v850/v850.cc +@@ -3114,7 +3114,7 @@ v850_adjust_insn_length (rtx_insn *insn, int length) +  + /* V850 specific attributes. */ + +-static const struct attribute_spec v850_attribute_table = ++TARGET_GNU_ATTRIBUTES (v850_attribute_table, + { + /* { name, min_len, max_len, decl_req, type_req, fn_type_req, + affects_type_identity, handler, exclude } */ +@@ -3127,9 +3127,8 @@ static const struct attribute_spec v850_attribute_table = + { "tda", 0, 0, true, false, false, false, + v850_handle_data_area_attribute, NULL }, + { "zda", 0, 0, true, false, false, false, +- v850_handle_data_area_attribute, NULL }, +- { NULL, 0, 0, false, false, false, false, NULL, NULL } +-}; ++ v850_handle_data_area_attribute, NULL } ++}); +  + static void + v850_option_override (void) +diff --git a/gcc/config/visium/visium.cc b/gcc/config/visium/visium.cc +index 35b46ced9..b572603bb 100644 +--- a/gcc/config/visium/visium.cc ++++ b/gcc/config/visium/visium.cc +@@ -145,14 +145,13 @@ static inline bool current_function_has_lr_slot (void); + + /* Supported attributes: + interrupt -- specifies this function is an interrupt handler. */ +-static const struct attribute_spec visium_attribute_table = ++TARGET_GNU_ATTRIBUTES (visium_attribute_table, + { + /* { name, min_len, max_len, decl_req, type_req, fn_type_req, + affects_type_identity, handler, exclude } */ + { "interrupt", 0, 0, true, false, false, false, visium_handle_interrupt_attr, +- NULL}, +- { NULL, 0, 0, false, false, false, false, NULL, NULL }, +-}; ++ NULL} ++}); + + static struct machine_function *visium_init_machine_status (void); + +diff --git a/gcc/cp/cp-objcp-common.h b/gcc/cp/cp-objcp-common.h +index 3c04e5c02..ff0d0883a 100644 +--- a/gcc/cp/cp-objcp-common.h ++++ b/gcc/cp/cp-objcp-common.h +@@ -123,13 +123,16 @@ extern tree cxx_simulate_record_decl (location_t, const char *, + #undef LANG_HOOKS_FINALIZE_EARLY_DEBUG + #define LANG_HOOKS_FINALIZE_EARLY_DEBUG c_common_finalize_early_debug + +-/* Attribute hooks. */ +-#undef LANG_HOOKS_COMMON_ATTRIBUTE_TABLE +-#define LANG_HOOKS_COMMON_ATTRIBUTE_TABLE c_common_attribute_table +-#undef LANG_HOOKS_FORMAT_ATTRIBUTE_TABLE +-#define LANG_HOOKS_FORMAT_ATTRIBUTE_TABLE c_common_format_attribute_table ++static const scoped_attribute_specs *const cp_objcp_attribute_table = ++{ ++ &std_attribute_table, ++ &cxx_gnu_attribute_table, ++ &c_common_gnu_attribute_table, ++ &c_common_format_attribute_table ++}; ++ + #undef LANG_HOOKS_ATTRIBUTE_TABLE +-#define LANG_HOOKS_ATTRIBUTE_TABLE cxx_attribute_table ++#define LANG_HOOKS_ATTRIBUTE_TABLE cp_objcp_attribute_table + + #undef LANG_HOOKS_TREE_INLINING_VAR_MOD_TYPE_P + #define LANG_HOOKS_TREE_INLINING_VAR_MOD_TYPE_P cp_var_mod_type_p +diff --git a/gcc/cp/cp-tree.h b/gcc/cp/cp-tree.h +index 64b3196d1..52d19faa3 100644 +--- a/gcc/cp/cp-tree.h ++++ b/gcc/cp/cp-tree.h +@@ -7897,7 +7897,8 @@ extern tree maybe_dummy_object (tree, tree *); + extern bool is_dummy_object (const_tree); + extern bool is_byte_access_type (tree); + extern bool is_byte_access_type_not_plain_char (tree); +-extern const struct attribute_spec cxx_attribute_table; ++extern const struct scoped_attribute_specs cxx_gnu_attribute_table; ++extern const struct scoped_attribute_specs std_attribute_table; + extern tree make_ptrmem_cst (tree, tree); + extern tree cp_build_type_attribute_variant (tree, tree); + extern tree cp_build_reference_type (tree, bool); +diff --git a/gcc/cp/tree.cc b/gcc/cp/tree.cc +index a7933ad2c..6cfc7a2d7 100644 +--- a/gcc/cp/tree.cc ++++ b/gcc/cp/tree.cc +@@ -5004,7 +5004,7 @@ handle_likeliness_attribute (tree *node, tree name, tree args, + } + + /* Table of valid C++ attributes. */ +-const struct attribute_spec cxx_attribute_table = ++static const attribute_spec cxx_gnu_attributes = + { + /* { name, min_len, max_len, decl_req, type_req, fn_type_req, + affects_type_identity, handler, exclude } */ +@@ -5012,11 +5012,15 @@ const struct attribute_spec cxx_attribute_table = + handle_init_priority_attribute, NULL }, + { "abi_tag", 1, -1, false, false, false, true, + handle_abi_tag_attribute, NULL }, +- { NULL, 0, 0, false, false, false, false, NULL, NULL } ++}; ++ ++const scoped_attribute_specs cxx_gnu_attribute_table = ++{ ++ "gnu", cxx_gnu_attributes + }; + + /* Table of C++ standard attributes. */ +-const struct attribute_spec std_attribute_table = ++static const attribute_spec std_attributes = + { + /* { name, min_len, max_len, decl_req, type_req, fn_type_req, + affects_type_identity, handler, exclude } */ +@@ -5031,10 +5035,11 @@ const struct attribute_spec std_attribute_table = + { "unlikely", 0, 0, false, false, false, false, + handle_likeliness_attribute, attr_cold_hot_exclusions }, + { "noreturn", 0, 0, true, false, false, false, +- handle_noreturn_attribute, attr_noreturn_exclusions }, +- { NULL, 0, 0, false, false, false, false, NULL, NULL } ++ handle_noreturn_attribute, attr_noreturn_exclusions } + }; + ++const scoped_attribute_specs std_attribute_table = { nullptr, std_attributes }; ++ + /* Handle an "init_priority" attribute; arguments as in + struct attribute_spec.handler. */ + static tree +@@ -5617,7 +5622,6 @@ void + init_tree (void) + { + list_hash_table = hash_table<list_hasher>::create_ggc (61); +- register_scoped_attributes (std_attribute_table, NULL); + } + + /* Returns the kind of special function that DECL (a FUNCTION_DECL) +diff --git a/gcc/d/d-attribs.cc b/gcc/d/d-attribs.cc +index c271de0c7..e402c0c11 100644 +--- a/gcc/d/d-attribs.cc ++++ b/gcc/d/d-attribs.cc +@@ -157,7 +157,7 @@ extern const struct attribute_spec::exclusions attr_cold_hot_exclusions = + + /* Table of machine-independent attributes. + For internal use (marking of built-ins) only. */ +-const attribute_spec d_langhook_common_attribute_table = ++static const attribute_spec d_langhook_common_attributes = + { + ATTR_SPEC ("noreturn", 0, 0, true, false, false, false, + handle_noreturn_attribute, attr_noreturn_exclusions), +@@ -183,11 +183,15 @@ const attribute_spec d_langhook_common_attribute_table = + handle_type_generic_attribute, NULL), + ATTR_SPEC ("fn spec", 1, 1, false, true, true, false, + handle_fnspec_attribute, NULL), +- ATTR_SPEC (NULL, 0, 0, false, false, false, false, NULL, NULL), ++}; ++ ++const scoped_attribute_specs d_langhook_common_attribute_table = ++{ ++ "gnu", d_langhook_common_attributes + }; + + /* Table of D language attributes exposed by `gcc.attribute' UDAs. */ +-const attribute_spec d_langhook_attribute_table = ++static const attribute_spec d_langhook_gnu_attributes = + { + ATTR_SPEC ("noinline", 0, 0, true, false, false, false, + d_handle_noinline_attribute, attr_noinline_exclusions), +@@ -223,9 +227,12 @@ const attribute_spec d_langhook_attribute_table = + d_handle_restrict_attribute, NULL), + ATTR_SPEC ("used", 0, 0, true, false, false, false, + d_handle_used_attribute, NULL), +- ATTR_SPEC (NULL, 0, 0, false, false, false, false, NULL, NULL), + }; + ++const scoped_attribute_specs d_langhook_gnu_attribute_table = ++{ ++ "gnu", d_langhook_gnu_attributes ++}; + + /* Insert the type attribute ATTRNAME with value VALUE into TYPE. + Returns a new variant of the original type declaration. */ +@@ -270,20 +277,14 @@ uda_attribute_p (const char *name) + + /* Search both our language, and target attribute tables. + Common and format attributes are kept internal. */ +- for (const attribute_spec *p = d_langhook_attribute_table; p->name; p++) +- { +- if (get_identifier (p->name) == ident) +- return true; +- } ++ for (const attribute_spec &p : d_langhook_gnu_attributes) ++ if (get_identifier (p.name) == ident) ++ return true; + +- if (targetm.attribute_table) +- { +- for (const attribute_spec *p = targetm.attribute_table; p->name; p++) +- { +- if (get_identifier (p->name) == ident) +- return true; +- } +- } ++ for (auto scoped_attributes : targetm.attribute_table) ++ for (const attribute_spec &p : scoped_attributes->attributes) ++ if (get_identifier (p.name) == ident) ++ return true; + + return false; + } +diff --git a/gcc/d/d-lang.cc b/gcc/d/d-lang.cc +index f078f24fc..da9d6d4a2 100644 +--- a/gcc/d/d-lang.cc ++++ b/gcc/d/d-lang.cc +@@ -1938,6 +1938,12 @@ d_enum_underlying_base_type (const_tree type) + return TREE_TYPE (type); + } + ++const scoped_attribute_specs *const d_langhook_attribute_table = ++{ ++ &d_langhook_gnu_attribute_table, ++ &d_langhook_common_attribute_table, ++}; ++ + /* Definitions for our language-specific hooks. */ + + #undef LANG_HOOKS_NAME +@@ -1949,7 +1955,6 @@ d_enum_underlying_base_type (const_tree type) + #undef LANG_HOOKS_HANDLE_OPTION + #undef LANG_HOOKS_POST_OPTIONS + #undef LANG_HOOKS_PARSE_FILE +-#undef LANG_HOOKS_COMMON_ATTRIBUTE_TABLE + #undef LANG_HOOKS_ATTRIBUTE_TABLE + #undef LANG_HOOKS_GET_ALIAS_SET + #undef LANG_HOOKS_TYPES_COMPATIBLE_P +@@ -1981,7 +1986,6 @@ d_enum_underlying_base_type (const_tree type) + #define LANG_HOOKS_HANDLE_OPTION d_handle_option + #define LANG_HOOKS_POST_OPTIONS d_post_options + #define LANG_HOOKS_PARSE_FILE d_parse_file +-#define LANG_HOOKS_COMMON_ATTRIBUTE_TABLE d_langhook_common_attribute_table + #define LANG_HOOKS_ATTRIBUTE_TABLE d_langhook_attribute_table + #define LANG_HOOKS_GET_ALIAS_SET d_get_alias_set + #define LANG_HOOKS_TYPES_COMPATIBLE_P d_types_compatible_p +diff --git a/gcc/d/d-tree.h b/gcc/d/d-tree.h +index aedbdd80a..d4245b63b 100644 +--- a/gcc/d/d-tree.h ++++ b/gcc/d/d-tree.h +@@ -496,8 +496,8 @@ extern tree insert_decl_attribute (tree, const char *, tree = NULL_TREE); + extern void apply_user_attributes (Dsymbol *, tree); + + /* In d-builtins.cc. */ +-extern const attribute_spec d_langhook_attribute_table; +-extern const attribute_spec d_langhook_common_attribute_table; ++extern const struct scoped_attribute_specs d_langhook_gnu_attribute_table; ++extern const struct scoped_attribute_specs d_langhook_common_attribute_table; + extern Type *build_frontend_type (tree); + + extern tree d_builtin_function (tree); +diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi +index 4f93facf7..95d96ce1b 100644 +--- a/gcc/doc/tm.texi ++++ b/gcc/doc/tm.texi +@@ -10427,12 +10427,33 @@ Target-specific attributes may be defined for functions, data and types. + These are described using the following target hooks; they also need to + be documented in @file{extend.texi}. + +-@deftypevr {Target Hook} {const struct attribute_spec *} TARGET_ATTRIBUTE_TABLE +-If defined, this target hook points to an array of @samp{struct +-attribute_spec} (defined in @file{tree-core.h}) specifying the machine +-specific attributes for this target and some of the restrictions on the +-entities to which these attributes are applied and the arguments they +-take. ++@deftypevr {Target Hook} {array_slice<const struct scoped_attribute_specs *const>} TARGET_ATTRIBUTE_TABLE ++If defined, this target hook provides an array of ++@samp{scoped_attribute_spec}s (defined in @file{attribs.h}) that specify the ++machine-specific attributes for this target. The information includes some ++of the restrictions on the entities to which these attributes are applied ++and the arguments that the attributes take. ++ ++In C and C++, these attributes are associated with two syntaxes: ++the traditional GNU @code{__attribute__} syntax and the standard ++@samp{} syntax. Attributes that support the GNU syntax must be ++placed in the @code{gnu} namespace. Such attributes can then also be ++written @samp{gnu::@dots{}}. Attributes that use only the standard ++syntax should be placed in whichever namespace the attribute specification ++requires. For example, a target might choose to support vendor-specific ++@samp{} attributes that the vendor places in their own namespace. ++ ++Targets that only define attributes in the @code{gnu} namespace ++can uase the following shorthand to define the table: ++ ++@smallexample ++TARGET_GNU_ATTRIBUTES (@var{cpu_attribute_table}, @{ ++ @{ "@var{attribute1}", @dots{} @}, ++ @{ "@var{attribute2}", @dots{} @}, ++ @dots{}, ++ @{ "@var{attributen}", @dots{} @}, ++@}); ++@end smallexample + @end deftypevr + + @deftypefn {Target Hook} bool TARGET_ATTRIBUTE_TAKES_IDENTIFIER_P (const_tree @var{name}) +diff --git a/gcc/fortran/f95-lang.cc b/gcc/fortran/f95-lang.cc +index 468a0b7e3..27ffc7511 100644 +--- a/gcc/fortran/f95-lang.cc ++++ b/gcc/fortran/f95-lang.cc +@@ -39,6 +39,7 @@ along with GCC; see the file COPYING3. If not see + #include "cpp.h" + #include "trans-types.h" + #include "trans-const.h" ++#include "attribs.h" + + /* Language-dependent contents of an identifier. */ + +@@ -87,7 +88,7 @@ gfc_handle_omp_declare_target_attribute (tree *, tree, tree, int, bool *) + } + + /* Table of valid Fortran attributes. */ +-static const struct attribute_spec gfc_attribute_table = ++static const attribute_spec gfc_gnu_attributes = + { + /* { name, min_len, max_len, decl_req, type_req, fn_type_req, + affects_type_identity, handler, exclude } */ +@@ -97,7 +98,16 @@ static const struct attribute_spec gfc_attribute_table = + gfc_handle_omp_declare_target_attribute, NULL }, + { "oacc function", 0, -1, true, false, false, false, + gfc_handle_omp_declare_target_attribute, NULL }, +- { NULL, 0, 0, false, false, false, false, NULL, NULL } ++}; ++ ++static const scoped_attribute_specs gfc_gnu_attribute_table = ++{ ++ "gnu", gfc_gnu_attributes ++}; ++ ++static const scoped_attribute_specs *const gfc_attribute_table = ++{ ++ &gfc_gnu_attribute_table + }; + + #undef LANG_HOOKS_NAME +diff --git a/gcc/jit/dummy-frontend.cc b/gcc/jit/dummy-frontend.cc +index 84ff359bf..5f9f5336c 100644 +--- a/gcc/jit/dummy-frontend.cc ++++ b/gcc/jit/dummy-frontend.cc +@@ -87,7 +87,7 @@ static const struct attribute_spec::exclusions attr_const_pure_exclusions = + }; + + /* Table of machine-independent attributes supported in libgccjit. */ +-const struct attribute_spec jit_attribute_table = ++static const attribute_spec jit_gnu_attributes = + { + /* { name, min_len, max_len, decl_req, type_req, fn_type_req, + affects_type_identity, handler, exclude } */ +@@ -128,22 +128,36 @@ const struct attribute_spec jit_attribute_table = + /* For internal use only. The leading '*' both prevents its usage in + source code and signals that it may be overridden by machine tables. */ + { "*tm regparm", 0, 0, false, true, true, false, +- ignore_attribute, NULL }, +- { NULL, 0, 0, false, false, false, false, NULL, NULL } ++ ignore_attribute, NULL } ++}; ++ ++static const scoped_attribute_specs jit_gnu_attribute_table = ++{ ++ "gnu", jit_gnu_attributes + }; + + /* Give the specifications for the format attributes, used by C and all + descendants. */ + +-const struct attribute_spec jit_format_attribute_table = ++static const attribute_spec jit_format_attributes = + { + /* { name, min_len, max_len, decl_req, type_req, fn_type_req, + affects_type_identity, handler, exclude } */ + { "format", 3, 3, false, true, true, false, + handle_format_attribute, NULL }, + { "format_arg", 1, 1, false, true, true, false, +- handle_format_arg_attribute, NULL }, +- { NULL, 0, 0, false, false, false, false, NULL, NULL } ++ handle_format_arg_attribute, NULL } ++}; ++ ++static const scoped_attribute_specs jit_format_attribute_table = ++{ ++ "gnu", jit_format_attributes ++}; ++ ++static const scoped_attribute_specs *const jit_attribute_table = ++{ ++ &jit_gnu_attribute_table, ++ &jit_format_attribute_table + }; + + /* Attribute handlers. */ +@@ -722,10 +736,8 @@ jit_langhook_getdecls (void) + #define LANG_HOOKS_GETDECLS jit_langhook_getdecls + + /* Attribute hooks. */ +-#undef LANG_HOOKS_COMMON_ATTRIBUTE_TABLE +-#define LANG_HOOKS_COMMON_ATTRIBUTE_TABLE jit_attribute_table +-#undef LANG_HOOKS_FORMAT_ATTRIBUTE_TABLE +-#define LANG_HOOKS_FORMAT_ATTRIBUTE_TABLE jit_format_attribute_table ++#undef LANG_HOOKS_ATTRIBUTE_TABLE ++#define LANG_HOOKS_ATTRIBUTE_TABLE jit_attribute_table + + #undef LANG_HOOKS_DEEP_UNSHARING + #define LANG_HOOKS_DEEP_UNSHARING true +diff --git a/gcc/langhooks-def.h b/gcc/langhooks-def.h +index e22639517..11998e40f 100644 +--- a/gcc/langhooks-def.h ++++ b/gcc/langhooks-def.h +@@ -151,9 +151,7 @@ extern void lhd_finalize_early_debug (void); + #define LANG_HOOKS_FINALIZE_EARLY_DEBUG lhd_finalize_early_debug + + /* Attribute hooks. */ +-#define LANG_HOOKS_ATTRIBUTE_TABLE NULL +-#define LANG_HOOKS_COMMON_ATTRIBUTE_TABLE NULL +-#define LANG_HOOKS_FORMAT_ATTRIBUTE_TABLE NULL ++#define LANG_HOOKS_ATTRIBUTE_TABLE {} + + /* Tree inlining hooks. */ + #define LANG_HOOKS_TREE_INLINING_VAR_MOD_TYPE_P \ +@@ -365,8 +363,6 @@ extern void lhd_end_section (void); + LANG_HOOKS_PRINT_ERROR_FUNCTION, \ + LANG_HOOKS_TO_TARGET_CHARSET, \ + LANG_HOOKS_ATTRIBUTE_TABLE, \ +- LANG_HOOKS_COMMON_ATTRIBUTE_TABLE, \ +- LANG_HOOKS_FORMAT_ATTRIBUTE_TABLE, \ + LANG_HOOKS_TREE_INLINING_INITIALIZER, \ + LANG_HOOKS_TREE_DUMP_INITIALIZER, \ + LANG_HOOKS_DECLS, \ +diff --git a/gcc/langhooks.h b/gcc/langhooks.h +index 4731f089a..5954f58e8 100644 +--- a/gcc/langhooks.h ++++ b/gcc/langhooks.h +@@ -530,9 +530,7 @@ struct lang_hooks + table of attributes specific to the language, a table of + attributes common to two or more languages (to allow easy + sharing), and a table of attributes for checking formats. */ +- const struct attribute_spec *attribute_table; +- const struct attribute_spec *common_attribute_table; +- const struct attribute_spec *format_attribute_table; ++ array_slice<const struct scoped_attribute_specs *const> attribute_table; + + struct lang_hooks_for_tree_inlining tree_inlining; + +diff --git a/gcc/lto/lto-lang.cc b/gcc/lto/lto-lang.cc +index 8d58d924d..601e92e86 100644 +--- a/gcc/lto/lto-lang.cc ++++ b/gcc/lto/lto-lang.cc +@@ -94,7 +94,7 @@ static const struct attribute_spec::exclusions attr_const_pure_exclusions = + }; + + /* Table of machine-independent attributes supported in GIMPLE. */ +-const struct attribute_spec lto_attribute_table = ++static const attribute_spec lto_gnu_attributes = + { + /* { name, min_len, max_len, decl_req, type_req, fn_type_req, + affects_type_identity, handler, exclude } */ +@@ -135,14 +135,18 @@ const struct attribute_spec lto_attribute_table = + /* For internal use only. The leading '*' both prevents its usage in + source code and signals that it may be overridden by machine tables. */ + { "*tm regparm", 0, 0, false, true, true, false, +- ignore_attribute, NULL }, +- { NULL, 0, 0, false, false, false, false, NULL, NULL } ++ ignore_attribute, NULL } ++}; ++ ++static const scoped_attribute_specs lto_gnu_attribute_table = ++{ ++ "gnu", lto_gnu_attributes + }; + + /* Give the specifications for the format attributes, used by C and all + descendants. */ + +-const struct attribute_spec lto_format_attribute_table = ++static const attribute_spec lto_format_attributes = + { + /* { name, min_len, max_len, decl_req, type_req, fn_type_req, + affects_type_identity, handler, exclude } */ +@@ -150,7 +154,17 @@ const struct attribute_spec lto_format_attribute_table = + handle_format_attribute, NULL }, + { "format_arg", 1, 1, false, true, true, false, + handle_format_arg_attribute, NULL }, +- { NULL, 0, 0, false, false, false, false, NULL, NULL } ++}; ++ ++static const scoped_attribute_specs lto_format_attribute_table = ++{ ++ "gnu", lto_format_attributes ++}; ++ ++static const scoped_attribute_specs *const lto_attribute_table = ++{ ++ <o_gnu_attribute_table, ++ <o_format_attribute_table + }; + + enum built_in_attribute +@@ -1453,10 +1467,8 @@ static void lto_init_ts (void) + #define LANG_HOOKS_EH_PERSONALITY lto_eh_personality + + /* Attribute hooks. */ +-#undef LANG_HOOKS_COMMON_ATTRIBUTE_TABLE +-#define LANG_HOOKS_COMMON_ATTRIBUTE_TABLE lto_attribute_table +-#undef LANG_HOOKS_FORMAT_ATTRIBUTE_TABLE +-#define LANG_HOOKS_FORMAT_ATTRIBUTE_TABLE lto_format_attribute_table ++#undef LANG_HOOKS_ATTRIBUTE_TABLE ++#define LANG_HOOKS_ATTRIBUTE_TABLE lto_attribute_table + + #undef LANG_HOOKS_BEGIN_SECTION + #define LANG_HOOKS_BEGIN_SECTION lto_obj_begin_section +diff --git a/gcc/plugin.h b/gcc/plugin.h +index ff999c405..e29651d35 100644 +--- a/gcc/plugin.h ++++ b/gcc/plugin.h +@@ -198,8 +198,7 @@ invoke_plugin_callbacks (int event ATTRIBUTE_UNUSED, + + extern void register_attribute (const struct attribute_spec *attr); + /* The default argument for the third parameter is given in attribs.h. */ +-extern struct scoped_attributes* register_scoped_attributes (const struct attribute_spec *, +- const char *, ++extern struct scoped_attributes* register_scoped_attributes (const struct scoped_attribute_spec &, + bool); + + #endif /* PLUGIN_H */ +diff --git a/gcc/target-def.h b/gcc/target-def.h +index f81f8fe3b..70fb393f3 100644 +--- a/gcc/target-def.h ++++ b/gcc/target-def.h +@@ -114,6 +114,20 @@ + #define TARGET_FUNCTION_INCOMING_ARG TARGET_FUNCTION_ARG + #endif + ++/* Declare a target attribute table called NAME that only has GNU attributes. ++ There should be no null trailing element. E.g.: ++ ++ TARGET_GNU_ATTRIBUTES (aarch64_attribute_table, ++ { ++ { "aarch64_vector_pcs", ... }, ++ ... ++ }); */ ++ ++#define TARGET_GNU_ATTRIBUTES(NAME, ...) \ ++ static const attribute_spec NAME##_2 = __VA_ARGS__; \ ++ static const scoped_attribute_specs NAME##_1 = { "gnu", NAME##_2 }; \ ++ static const scoped_attribute_specs *const NAME = { &NAME##_1 } ++ + #include "target-hooks-def.h" + + #include "hooks.h" +diff --git a/gcc/target.def b/gcc/target.def +index 60096c60c..6cdc09fc2 100644 +--- a/gcc/target.def ++++ b/gcc/target.def +@@ -2199,15 +2199,36 @@ merging.", + merge_type_attributes) + + /* Table of machine attributes and functions to handle them. +- Ignored if NULL. */ ++ Ignored if empty. */ + DEFHOOKPOD + (attribute_table, +- "If defined, this target hook points to an array of @samp{struct\n\ +-attribute_spec} (defined in @file{tree-core.h}) specifying the machine\n\ +-specific attributes for this target and some of the restrictions on the\n\ +-entities to which these attributes are applied and the arguments they\n\ +-take.", +- const struct attribute_spec *, NULL) ++ "If defined, this target hook provides an array of\n\ ++@samp{scoped_attribute_spec}s (defined in @file{attribs.h}) that specify the\n\ ++machine-specific attributes for this target. The information includes some\n\ ++of the restrictions on the entities to which these attributes are applied\n\ ++and the arguments that the attributes take.\n\ ++\n\ ++In C and C++, these attributes are associated with two syntaxes:\n\ ++the traditional GNU @code{__attribute__} syntax and the standard\n\ ++@samp{} syntax. Attributes that support the GNU syntax must be\n\ ++placed in the @code{gnu} namespace. Such attributes can then also be\n\ ++written @samp{gnu::@dots{}}. Attributes that use only the standard\n\ ++syntax should be placed in whichever namespace the attribute specification\n\ ++requires. For example, a target might choose to support vendor-specific\n\ ++@samp{} attributes that the vendor places in their own namespace.\n\ ++\n\ ++Targets that only define attributes in the @code{gnu} namespace\n\ ++can uase the following shorthand to define the table:\n\ ++\n\ ++@smallexample\n\ ++TARGET_GNU_ATTRIBUTES (@var{cpu_attribute_table}, @{\n\ ++ @{ \"@var{attribute1}\", @dots{} @},\n\ ++ @{ \"@var{attribute2}\", @dots{} @},\n\ ++ @dots{},\n\ ++ @{ \"@var{attributen}\", @dots{} @},\n\ ++@});\n\ ++@end smallexample", ++ array_slice<const struct scoped_attribute_specs *const>, {}) + + /* Return true iff attribute NAME expects a plain identifier as its first + argument. */ +diff --git a/gcc/tree-inline.cc b/gcc/tree-inline.cc +index f50dbbc52..67879c2c8 100644 +--- a/gcc/tree-inline.cc ++++ b/gcc/tree-inline.cc +@@ -4105,17 +4105,16 @@ inline_forbidden_p (tree fndecl) + static bool + function_attribute_inlinable_p (const_tree fndecl) + { +- if (targetm.attribute_table) ++ for (auto scoped_attributes : targetm.attribute_table) + { + const_tree a; + + for (a = DECL_ATTRIBUTES (fndecl); a; a = TREE_CHAIN (a)) + { + const_tree name = get_attribute_name (a); +- int i; + +- for (i = 0; targetm.attribute_tablei.name != NULL; i++) +- if (is_attribute_p (targetm.attribute_tablei.name, name)) ++ for (const attribute_spec &attribute : scoped_attributes->attributes) ++ if (is_attribute_p (attribute.name, name)) + return targetm.function_attribute_inlinable_p (fndecl); + } + } +-- +2.33.0 +
View file
_service:tar_scm:0172-LoongArch-Fix-explicit-relocs-extreme-tls-desc.c-tes.patch
Added
@@ -0,0 +1,45 @@ +From 9503e64bf304d44947791d9ff17d65a6905e59ce Mon Sep 17 00:00:00 2001 +From: Lulu Cheng <chenglulu@loongson.cn> +Date: Fri, 28 Jun 2024 15:04:26 +0800 +Subject: PATCH 172/188 LoongArch: Fix explicit-relocs-{extreme-,}tls-desc.c + tests. + +After r15-1579, ADD and LD/ST pairs will be merged into LDX/STX. +Cause these two tests to fail. To guarantee that these two tests pass, +add the compilation option '-fno-late-combine-instructions'. + +gcc/testsuite/ChangeLog: + + * gcc.target/loongarch/explicit-relocs-extreme-tls-desc.c: + Add compilation options '-fno-late-combine-instructions'. + * gcc.target/loongarch/explicit-relocs-tls-desc.c: Likewise. +--- + .../gcc.target/loongarch/explicit-relocs-extreme-tls-desc.c | 2 +- + gcc/testsuite/gcc.target/loongarch/explicit-relocs-tls-desc.c | 2 +- + 2 files changed, 2 insertions(+), 2 deletions(-) + +diff --git a/gcc/testsuite/gcc.target/loongarch/explicit-relocs-extreme-tls-desc.c b/gcc/testsuite/gcc.target/loongarch/explicit-relocs-extreme-tls-desc.c +index 3797556e1..e9eb0d6f7 100644 +--- a/gcc/testsuite/gcc.target/loongarch/explicit-relocs-extreme-tls-desc.c ++++ b/gcc/testsuite/gcc.target/loongarch/explicit-relocs-extreme-tls-desc.c +@@ -1,5 +1,5 @@ + /* { dg-do compile } */ +-/* { dg-options "-O2 -fPIC -mexplicit-relocs -mtls-dialect=desc -mcmodel=extreme" } */ ++/* { dg-options "-O2 -fPIC -mexplicit-relocs -mtls-dialect=desc -mcmodel=extreme -fno-late-combine-instructions" } */ + + __thread int a __attribute__((visibility("hidden"))); + extern __thread int b __attribute__((visibility("default"))); +diff --git a/gcc/testsuite/gcc.target/loongarch/explicit-relocs-tls-desc.c b/gcc/testsuite/gcc.target/loongarch/explicit-relocs-tls-desc.c +index f66903091..fed478458 100644 +--- a/gcc/testsuite/gcc.target/loongarch/explicit-relocs-tls-desc.c ++++ b/gcc/testsuite/gcc.target/loongarch/explicit-relocs-tls-desc.c +@@ -1,5 +1,5 @@ + /* { dg-do compile } */ +-/* { dg-options "-O2 -fPIC -mexplicit-relocs -mtls-dialect=desc" } */ ++/* { dg-options "-O2 -fPIC -mexplicit-relocs -mtls-dialect=desc -fno-late-combine-instructions" } */ + + __thread int a __attribute__((visibility("hidden"))); + extern __thread int b __attribute__((visibility("default"))); +-- +2.43.0 +
View file
_service:tar_scm:0173-Backport-SME-aarch64-Fix-plugin-header-install.patch
Added
@@ -0,0 +1,64 @@ +From b1025ef48bff0622e54822dc0974f38748e9109f Mon Sep 17 00:00:00 2001 +From: Jakub Jelinek <jakub@redhat.com> +Date: Thu, 22 Dec 2022 11:15:47 +0100 +Subject: PATCH 074/157 BackportSME aarch64: Fix plugin header install + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=5b30e9bc211fede06cf85b54e466012540bef14d + +The r13-2943-g11a113d501ff64 made aarch64.h include +aarch64-option-extensions.def, but that file isn't installed +for building plugins. + +On Wed, Dec 21, 2022 at 09:56:33AM +0000, Richard Sandiford wrote: +> Should this (and aarch64-fusion-pairs.def and aarch64-tuning-flags.def) +> be in TM_H instead? The first two OPTIONS_H_EXTRA entries seem to be +> for aarch64-opt.h (included via aarch64.opt). +> +> I guess TM_H should also have aarch64-arches.def, since it's included +> for aarch64_feature. + +gcc/Makefile.in has +TM_H = $(GTM_H) insn-flags.h $(OPTIONS_H) +and +OPTIONS_H = options.h flag-types.h $(OPTIONS_H_EXTRA) +which means that adding something into TM_H when it is already in +OPTIONS_H_EXTRA is a unnecessary. +It is true that aarch64-fusion-pairs.def (included by aarch64-protos.h) +and aarch64-tuning-flags.def (ditto) and aarch64-option-extensions.def +(included by aarch64.h) aren't needed for options.h, so I think the +right patch would be following. + +2022-12-22 Jakub Jelinek <jakub@redhat.com> + + * config/aarch64/t-aarch64 (TM_H): Don't add aarch64-cores.def, + add aarch64-fusion-pairs.def, aarch64-tuning-flags.def and + aarch64-option-extensions.def. + (OPTIONS_H_EXTRA): Don't add aarch64-fusion-pairs.def nor + aarch64-tuning-flags.def. +--- + gcc/config/aarch64/t-aarch64 | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + +diff --git a/gcc/config/aarch64/t-aarch64 b/gcc/config/aarch64/t-aarch64 +index ba74abc0a..6a21a248f 100644 +--- a/gcc/config/aarch64/t-aarch64 ++++ b/gcc/config/aarch64/t-aarch64 +@@ -18,11 +18,11 @@ + # along with GCC; see the file COPYING3. If not see + # <http://www.gnu.org/licenses/>. + +-TM_H += $(srcdir)/config/aarch64/aarch64-cores.def ++TM_H += $(srcdir)/config/aarch64/aarch64-fusion-pairs.def \ ++ $(srcdir)/config/aarch64/aarch64-tuning-flags.def \ ++ $(srcdir)/config/aarch64/aarch64-option-extensions.def + OPTIONS_H_EXTRA += $(srcdir)/config/aarch64/aarch64-cores.def \ +- $(srcdir)/config/aarch64/aarch64-arches.def \ +- $(srcdir)/config/aarch64/aarch64-fusion-pairs.def \ +- $(srcdir)/config/aarch64/aarch64-tuning-flags.def ++ $(srcdir)/config/aarch64/aarch64-arches.def + + $(srcdir)/config/aarch64/aarch64-tune.md: s-aarch64-tune-md; @true + s-aarch64-tune-md: $(srcdir)/config/aarch64/gentune.sh \ +-- +2.33.0 +
View file
_service:tar_scm:0173-LoongArch-Define-loongarch_insn_cost-and-set-the-cos.patch
Added
@@ -0,0 +1,70 @@ +From 727b1a2cff9cecd904545895bbf39a89fbf1ea4f Mon Sep 17 00:00:00 2001 +From: Lulu Cheng <chenglulu@loongson.cn> +Date: Fri, 28 Jun 2024 15:09:48 +0800 +Subject: PATCH 173/188 LoongArch: Define loongarch_insn_cost and set the + cost of movcf2gr and movgr2cf. + +The following two FAIL items have been fixed: + +FAIL: gcc.target/loongarch/movcf2gr-via-fr.c scan-assembler movcf2fr\\t\\\\\$f0-9+,\\\\\$fcc +FAIL: gcc.target/loongarch/movcf2gr-via-fr.c scan-assembler movfr2gr\\\\.s\\t\\\\\$r4 + +gcc/ChangeLog: + + * config/loongarch/loongarch.cc (loongarch_insn_cost): + New function. + (TARGET_INSN_COST): New macro. +--- + gcc/config/loongarch/loongarch.cc | 29 +++++++++++++++++++++++++++++ + 1 file changed, 29 insertions(+) + +diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc +index 35524b5da..958e82b86 100644 +--- a/gcc/config/loongarch/loongarch.cc ++++ b/gcc/config/loongarch/loongarch.cc +@@ -4369,6 +4369,33 @@ loongarch_address_cost (rtx addr, machine_mode mode, + return loongarch_address_insns (addr, mode, false); + } + ++/* Implement TARGET_INSN_COST. */ ++ ++static int ++loongarch_insn_cost (rtx_insn *insn, bool speed) ++{ ++ rtx x = PATTERN (insn); ++ int cost = pattern_cost (x, speed); ++ ++ /* On LA464, prevent movcf2fr and movfr2gr from merging into movcf2gr. */ ++ if (GET_CODE (x) == SET ++ && GET_MODE (XEXP (x, 0)) == FCCmode) ++ { ++ rtx dest, src; ++ dest = XEXP (x, 0); ++ src = XEXP (x, 1); ++ ++ if (REG_P (dest) && REG_P (src)) ++ { ++ if (GP_REG_P (REGNO (dest)) && FCC_REG_P (REGNO (src))) ++ cost = loongarch_cost->movcf2gr; ++ else if (FCC_REG_P (REGNO (dest)) && GP_REG_P (REGNO (src))) ++ cost = loongarch_cost->movgr2cf; ++ } ++ } ++ return cost; ++} ++ + /* Return one word of double-word value OP, taking into account the fixed + endianness of certain registers. HIGH_P is true to select the high part, + false to select the low part. */ +@@ -11089,6 +11116,8 @@ loongarch_asm_code_end (void) + #define TARGET_RTX_COSTS loongarch_rtx_costs + #undef TARGET_ADDRESS_COST + #define TARGET_ADDRESS_COST loongarch_address_cost ++#undef TARGET_INSN_COST ++#define TARGET_INSN_COST loongarch_insn_cost + #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST + #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \ + loongarch_builtin_vectorization_cost +-- +2.43.0 +
View file
_service:tar_scm:0174-Backport-SME-aarch64-Add-arm_streaming-_compatible-a.patch
Added
@@ -0,0 +1,1178 @@ +From 70b732b4518dd0e44b9e6bfaaad78492b8db8f29 Mon Sep 17 00:00:00 2001 +From: Richard Sandiford <richard.sandiford@arm.com> +Date: Tue, 5 Dec 2023 10:11:23 +0000 +Subject: PATCH 075/157 BackportSME aarch64: Add + arm_streaming(_compatible) attributes + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=2c9a54b4238308b127c3b60b01a591363131e7db + +This patch adds support for recognising the SME arm::streaming +and arm::streaming_compatible attributes. These attributes +respectively describe whether the processor is definitely in +"streaming mode" (PSTATE.SM==1), whether the processor is +definitely not in streaming mode (PSTATE.SM==0), or whether +we don't know at compile time either way. + +As far as the compiler is concerned, this effectively creates three +ISA submodes: streaming mode enables things that are not available +in non-streaming mode, non-streaming mode enables things that not +available in streaming mode, and streaming-compatible mode has to stick +to the common subset. This means that some instructions are conditional +on PSTATE.SM==1 and some are conditional on PSTATE.SM==0. + +I wondered about recording the streaming state in a new variable. +However, the set of available instructions is also influenced by +PSTATE.ZA (added later), so I think it makes sense to view this +as an instance of a more general mechanism. Also, keeping the +PSTATE.SM state in the same flag variable as the other ISA +features makes it possible to sum up the requirements of an +ACLE function in a single value. + +The patch therefore adds a new set of feature flags called "ISA modes". +Unlike the other two sets of flags (optional features and architecture- +level features), these ISA modes are not controlled directly by +command-line parameters or "target" attributes. + +arm::streaming and arm::streaming_compatible are function type attributes +rather than function declaration attributes. This means that we need +to find somewhere to copy the type information across to a function's +target options. The patch does this in aarch64_set_current_function. + +We also need to record which ISA mode a callee expects/requires +to be active on entry. (The same mode is then active on return.) +The patch extends the current UNSPEC_CALLEE_ABI cookie to include +this information, as well as the PCS variant that it recorded +previously. + +The attributes can also be written __arm_streaming and +__arm_streaming_compatible. This has two advantages: it triggers +an error on compilers that don't understand the attributes, and it +eases use on C, where ... attributes were only added in C23. + +gcc/ + * config/aarch64/aarch64-isa-modes.def: New file. + * config/aarch64/aarch64.h: Include it in the feature enumerations. + (AARCH64_FL_SM_STATE, AARCH64_FL_ISA_MODES): New constants. + (AARCH64_FL_DEFAULT_ISA_MODE): Likewise. + (AARCH64_ISA_MODE): New macro. + (CUMULATIVE_ARGS): Add an isa_mode field. + * config/aarch64/aarch64-protos.h (aarch64_gen_callee_cookie): Declare. + (aarch64_tlsdesc_abi_id): Return an arm_pcs. + * config/aarch64/aarch64.cc (attr_streaming_exclusions) + (aarch64_gnu_attributes, aarch64_gnu_attribute_table) + (aarch64_arm_attributes, aarch64_arm_attribute_table): New tables. + (aarch64_attribute_table): Redefine to include the gnu and arm + attributes. + (aarch64_fntype_pstate_sm, aarch64_fntype_isa_mode): New functions. + (aarch64_fndecl_pstate_sm, aarch64_fndecl_isa_mode): Likewise. + (aarch64_gen_callee_cookie, aarch64_callee_abi): Likewise. + (aarch64_insn_callee_cookie, aarch64_insn_callee_abi): Use them. + (aarch64_function_arg, aarch64_output_mi_thunk): Likewise. + (aarch64_init_cumulative_args): Initialize the isa_mode field. + (aarch64_output_mi_thunk): Use aarch64_gen_callee_cookie to get + the ABI cookie. + (aarch64_override_options): Add the ISA mode to the feature set. + (aarch64_temporary_target::copy_from_fndecl): Likewise. + (aarch64_fndecl_options, aarch64_handle_attr_arch): Likewise. + (aarch64_set_current_function): Maintain the correct ISA mode. + (aarch64_tlsdesc_abi_id): Return an arm_pcs. + (aarch64_comp_type_attributes): Handle arm::streaming and + arm::streaming_compatible. + * config/aarch64/aarch64-c.cc (aarch64_define_unconditional_macros): + Define __arm_streaming and __arm_streaming_compatible. + * config/aarch64/aarch64.md (tlsdesc_small_<mode>): Use + aarch64_gen_callee_cookie to get the ABI cookie. + * config/aarch64/t-aarch64 (TM_H): Add all feature-related .def files. + +gcc/testsuite/ + * gcc.target/aarch64/sme/aarch64-sme.exp: New harness. + * gcc.target/aarch64/sme/streaming_mode_1.c: New test. + * gcc.target/aarch64/sme/streaming_mode_2.c: Likewise. + * gcc.target/aarch64/sme/keyword_macros_1.c: Likewise. + * g++.target/aarch64/sme/aarch64-sme.exp: New harness. + * g++.target/aarch64/sme/streaming_mode_1.C: New test. + * g++.target/aarch64/sme/streaming_mode_2.C: Likewise. + * g++.target/aarch64/sme/keyword_macros_1.C: Likewise. + * gcc.target/aarch64/auto-init-1.c: Only expect the call insn + to contain 1 (const_int 0), not 2. +--- + gcc/config/aarch64/aarch64-c.cc | 14 ++ + gcc/config/aarch64/aarch64-isa-modes.def | 35 +++ + gcc/config/aarch64/aarch64-protos.h | 3 +- + gcc/config/aarch64/aarch64.cc | 233 +++++++++++++++--- + gcc/config/aarch64/aarch64.h | 24 +- + gcc/config/aarch64/aarch64.md | 3 +- + gcc/config/aarch64/t-aarch64 | 5 +- + .../g++.target/aarch64/sme/aarch64-sme.exp | 40 +++ + .../g++.target/aarch64/sme/keyword_macros_1.C | 4 + + .../g++.target/aarch64/sme/streaming_mode_1.C | 142 +++++++++++ + .../g++.target/aarch64/sme/streaming_mode_2.C | 25 ++ + .../gcc.target/aarch64/auto-init-1.c | 3 +- + .../gcc.target/aarch64/sme/aarch64-sme.exp | 40 +++ + .../gcc.target/aarch64/sme/keyword_macros_1.c | 4 + + .../gcc.target/aarch64/sme/streaming_mode_1.c | 130 ++++++++++ + .../gcc.target/aarch64/sme/streaming_mode_2.c | 25 ++ + 16 files changed, 685 insertions(+), 45 deletions(-) + create mode 100644 gcc/config/aarch64/aarch64-isa-modes.def + create mode 100644 gcc/testsuite/g++.target/aarch64/sme/aarch64-sme.exp + create mode 100644 gcc/testsuite/g++.target/aarch64/sme/keyword_macros_1.C + create mode 100644 gcc/testsuite/g++.target/aarch64/sme/streaming_mode_1.C + create mode 100644 gcc/testsuite/g++.target/aarch64/sme/streaming_mode_2.C + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/aarch64-sme.exp + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/keyword_macros_1.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/streaming_mode_1.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/streaming_mode_2.c + +diff --git a/gcc/config/aarch64/aarch64-c.cc b/gcc/config/aarch64/aarch64-c.cc +index 4085ad840..397745fbd 100644 +--- a/gcc/config/aarch64/aarch64-c.cc ++++ b/gcc/config/aarch64/aarch64-c.cc +@@ -72,6 +72,20 @@ aarch64_define_unconditional_macros (cpp_reader *pfile) + builtin_define_with_int_value ("__ARM_SIZEOF_WCHAR_T", WCHAR_TYPE_SIZE / 8); + + builtin_define ("__GCC_ASM_FLAG_OUTPUTS__"); ++ ++ /* Define keyword attributes like __arm_streaming as macros that expand ++ to the associated ... attribute. Use __extension__ in the attribute ++ for C, since the ... syntax was only added in C23. */ ++#define DEFINE_ARM_KEYWORD_MACRO(NAME) \ ++ builtin_define_with_value ("__arm_" NAME, \ ++ lang_GNU_CXX () \ ++ ? "arm::" NAME "" \ ++ : "__extension__ arm::" NAME "", 0); ++ ++ DEFINE_ARM_KEYWORD_MACRO ("streaming"); ++ DEFINE_ARM_KEYWORD_MACRO ("streaming_compatible"); ++ ++#undef DEFINE_ARM_KEYWORD_MACRO + } + + /* Undefine/redefine macros that depend on the current backend state and may +diff --git a/gcc/config/aarch64/aarch64-isa-modes.def b/gcc/config/aarch64/aarch64-isa-modes.def +new file mode 100644 +index 000000000..5915c98a8 +--- /dev/null ++++ b/gcc/config/aarch64/aarch64-isa-modes.def +@@ -0,0 +1,35 @@ ++/* Copyright (C) 2023 Free Software Foundation, Inc. ++ ++ This file is part of GCC. ++ ++ GCC is free software; you can redistribute it and/or modify it ++ under the terms of the GNU General Public License as published ++ by the Free Software Foundation; either version 3, or (at your ++ option) any later version. ++ ++ GCC is distributed in the hope that it will be useful, but WITHOUT ++ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY ++ or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public ++ License for more details. ++ ++ You should have received a copy of the GNU General Public License ++ along with GCC; see the file COPYING3. If not see ++ <http://www.gnu.org/licenses/>. */ ++ ++/* This file defines a set of "ISA modes"; in other words, it defines ++ various bits of runtime state that control the set of available ++ instructions or that affect the semantics of instructions in some way. ++ ++ Before using #include to read this file, define a macro: ++ ++ DEF_AARCH64_ISA_MODE(NAME) ++ ++ where NAME is the name of the mode. */ ++ ++/* Indicates that PSTATE.SM is known to be 1 or 0 respectively. These ++ modes are mutually exclusive. If neither mode is active then the state ++ of PSTATE.SM is not known at compile time. */ ++DEF_AARCH64_ISA_MODE(SM_ON) ++DEF_AARCH64_ISA_MODE(SM_OFF) ++ ++#undef DEF_AARCH64_ISA_MODE +diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h +index 14a568140..9b03410dc 100644 +--- a/gcc/config/aarch64/aarch64-protos.h ++++ b/gcc/config/aarch64/aarch64-protos.h +@@ -772,6 +772,7 @@ bool aarch64_constant_address_p (rtx); + bool aarch64_emit_approx_div (rtx, rtx, rtx); + bool aarch64_emit_approx_sqrt (rtx, rtx, bool); + tree aarch64_vector_load_decl (tree); ++rtx aarch64_gen_callee_cookie (aarch64_feature_flags, arm_pcs); + void aarch64_expand_call (rtx, rtx, rtx, bool); + bool aarch64_expand_cpymem (rtx *); + bool aarch64_expand_setmem (rtx *); +@@ -851,7 +852,7 @@ bool aarch64_is_mov_xn_imm (unsigned HOST_WIDE_INT); + bool aarch64_use_return_insn_p (void); + const char *aarch64_output_casesi (rtx *); + +-unsigned int aarch64_tlsdesc_abi_id (); ++arm_pcs aarch64_tlsdesc_abi_id (); + enum aarch64_symbol_type aarch64_classify_symbol (rtx, HOST_WIDE_INT); + enum aarch64_symbol_type aarch64_classify_tls_symbol (rtx); + enum reg_class aarch64_regno_regclass (unsigned); +diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc +index 114252a3c..904166b21 100644 +--- a/gcc/config/aarch64/aarch64.cc ++++ b/gcc/config/aarch64/aarch64.cc +@@ -2985,8 +2985,18 @@ handle_aarch64_vector_pcs_attribute (tree *node, tree name, tree, + gcc_unreachable (); + } + ++/* Mutually-exclusive function type attributes for controlling PSTATE.SM. */ ++static const struct attribute_spec::exclusions attr_streaming_exclusions = ++{ ++ /* Attribute name exclusion applies to: ++ function, type, variable */ ++ { "streaming", false, true, false }, ++ { "streaming_compatible", false, true, false }, ++ { NULL, false, false, false } ++}; ++ + /* Table of machine attributes. */ +-TARGET_GNU_ATTRIBUTES (aarch64_attribute_table, ++static const attribute_spec aarch64_gnu_attributes = + { + /* { name, min_len, max_len, decl_req, type_req, fn_type_req, + affects_type_identity, handler, exclude } */ +@@ -2998,7 +3008,31 @@ TARGET_GNU_ATTRIBUTES (aarch64_attribute_table, + { "Advanced SIMD type", 1, 1, false, true, false, true, NULL, NULL }, + { "SVE type", 3, 3, false, true, false, true, NULL, NULL }, + { "SVE sizeless type", 0, 0, false, true, false, true, NULL, NULL } +-}); ++}; ++ ++static const scoped_attribute_specs aarch64_gnu_attribute_table = ++{ ++ "gnu", aarch64_gnu_attributes ++}; ++ ++static const attribute_spec aarch64_arm_attributes = ++{ ++ { "streaming", 0, 0, false, true, true, true, ++ NULL, attr_streaming_exclusions }, ++ { "streaming_compatible", 0, 0, false, true, true, true, ++ NULL, attr_streaming_exclusions }, ++}; ++ ++static const scoped_attribute_specs aarch64_arm_attribute_table = ++{ ++ "arm", aarch64_arm_attributes ++}; ++ ++static const scoped_attribute_specs *const aarch64_attribute_table = ++{ ++ &aarch64_gnu_attribute_table, ++ &aarch64_arm_attribute_table ++}; + + /* An ISA extension in the co-processor and main instruction set space. */ + struct aarch64_option_extension +@@ -4301,6 +4335,48 @@ aarch64_fntype_abi (const_tree fntype) + return default_function_abi; + } + ++/* Return the state of PSTATE.SM on entry to functions of type FNTYPE. */ ++ ++static aarch64_feature_flags ++aarch64_fntype_pstate_sm (const_tree fntype) ++{ ++ if (lookup_attribute ("arm", "streaming", TYPE_ATTRIBUTES (fntype))) ++ return AARCH64_FL_SM_ON; ++ ++ if (lookup_attribute ("arm", "streaming_compatible", ++ TYPE_ATTRIBUTES (fntype))) ++ return 0; ++ ++ return AARCH64_FL_SM_OFF; ++} ++ ++/* Return the ISA mode on entry to functions of type FNTYPE. */ ++ ++static aarch64_feature_flags ++aarch64_fntype_isa_mode (const_tree fntype) ++{ ++ return aarch64_fntype_pstate_sm (fntype); ++} ++ ++/* Return the state of PSTATE.SM when compiling the body of ++ function FNDECL. This might be different from the state of ++ PSTATE.SM on entry. */ ++ ++static aarch64_feature_flags ++aarch64_fndecl_pstate_sm (const_tree fndecl) ++{ ++ return aarch64_fntype_pstate_sm (TREE_TYPE (fndecl)); ++} ++ ++/* Return the ISA mode that should be used to compile the body of ++ function FNDECL. */ ++ ++static aarch64_feature_flags ++aarch64_fndecl_isa_mode (const_tree fndecl) ++{ ++ return aarch64_fndecl_pstate_sm (fndecl); ++} ++ + /* Implement TARGET_COMPATIBLE_VECTOR_TYPES_P. */ + + static bool +@@ -4363,17 +4439,46 @@ aarch64_reg_save_mode (unsigned int regno) + gcc_unreachable (); + } + +-/* Implement TARGET_INSN_CALLEE_ABI. */ ++/* Given the ISA mode on entry to a callee and the ABI of the callee, ++ return the CONST_INT that should be placed in an UNSPEC_CALLEE_ABI rtx. */ + +-const predefined_function_abi & +-aarch64_insn_callee_abi (const rtx_insn *insn) ++rtx ++aarch64_gen_callee_cookie (aarch64_feature_flags isa_mode, arm_pcs pcs_variant) ++{ ++ return gen_int_mode ((unsigned int) isa_mode ++ | (unsigned int) pcs_variant << AARCH64_NUM_ISA_MODES, ++ DImode); ++} ++ ++/* COOKIE is a CONST_INT from an UNSPEC_CALLEE_ABI rtx. Return the ++ callee's ABI. */ ++ ++static const predefined_function_abi & ++aarch64_callee_abi (rtx cookie) ++{ ++ return function_abisUINTVAL (cookie) >> AARCH64_NUM_ISA_MODES; ++} ++ ++/* INSN is a call instruction. Return the CONST_INT stored in its ++ UNSPEC_CALLEE_ABI rtx. */ ++ ++static rtx ++aarch64_insn_callee_cookie (const rtx_insn *insn) + { + rtx pat = PATTERN (insn); + gcc_assert (GET_CODE (pat) == PARALLEL); + rtx unspec = XVECEXP (pat, 0, 1); + gcc_assert (GET_CODE (unspec) == UNSPEC + && XINT (unspec, 1) == UNSPEC_CALLEE_ABI); +- return function_abisINTVAL (XVECEXP (unspec, 0, 0)); ++ return XVECEXP (unspec, 0, 0); ++} ++ ++/* Implement TARGET_INSN_CALLEE_ABI. */ ++ ++const predefined_function_abi & ++aarch64_insn_callee_abi (const rtx_insn *insn) ++{ ++ return aarch64_callee_abi (aarch64_insn_callee_cookie (insn)); + } + + /* Implement TARGET_HARD_REGNO_CALL_PART_CLOBBERED. The callee only saves +@@ -8117,7 +8222,7 @@ aarch64_function_arg (cumulative_args_t pcum_v, const function_arg_info &arg) + || pcum->pcs_variant == ARM_PCS_SVE); + + if (arg.end_marker_p ()) +- return gen_int_mode (pcum->pcs_variant, DImode); ++ return aarch64_gen_callee_cookie (pcum->isa_mode, pcum->pcs_variant); + + aarch64_layout_arg (pcum_v, arg); + return pcum->aapcs_reg; +@@ -8138,9 +8243,15 @@ aarch64_init_cumulative_args (CUMULATIVE_ARGS *pcum, + pcum->aapcs_nextnvrn = 0; + pcum->aapcs_nextnprn = 0; + if (fntype) +- pcum->pcs_variant = (arm_pcs) fntype_abi (fntype).id (); ++ { ++ pcum->pcs_variant = (arm_pcs) fntype_abi (fntype).id (); ++ pcum->isa_mode = aarch64_fntype_isa_mode (fntype); ++ } + else +- pcum->pcs_variant = ARM_PCS_AAPCS64; ++ { ++ pcum->pcs_variant = ARM_PCS_AAPCS64; ++ pcum->isa_mode = AARCH64_FL_DEFAULT_ISA_MODE; ++ } + pcum->aapcs_reg = NULL_RTX; + pcum->aapcs_arg_processed = false; + pcum->aapcs_stack_words = 0; +@@ -10627,7 +10738,9 @@ aarch64_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED, + } + funexp = XEXP (DECL_RTL (function), 0); + funexp = gen_rtx_MEM (FUNCTION_MODE, funexp); +- rtx callee_abi = gen_int_mode (fndecl_abi (function).id (), DImode); ++ auto isa_mode = aarch64_fntype_isa_mode (TREE_TYPE (function)); ++ auto pcs_variant = arm_pcs (fndecl_abi (function).id ()); ++ rtx callee_abi = aarch64_gen_callee_cookie (isa_mode, pcs_variant); + insn = emit_call_insn (gen_sibcall (funexp, const0_rtx, callee_abi)); + SIBLING_CALL_P (insn) = 1; + +@@ -18618,6 +18731,7 @@ aarch64_override_options (void) + SUBTARGET_OVERRIDE_OPTIONS; + #endif + ++ auto isa_mode = AARCH64_FL_DEFAULT_ISA_MODE; + if (cpu && arch) + { + /* If both -mcpu and -march are specified, warn if they are not +@@ -18630,25 +18744,25 @@ aarch64_override_options (void) + } + + selected_arch = arch->arch; +- aarch64_set_asm_isa_flags (arch_isa); ++ aarch64_set_asm_isa_flags (arch_isa | isa_mode); + } + else if (cpu) + { + selected_arch = cpu->arch; +- aarch64_set_asm_isa_flags (cpu_isa); ++ aarch64_set_asm_isa_flags (cpu_isa | isa_mode); + } + else if (arch) + { + cpu = &all_coresarch->ident; + selected_arch = arch->arch; +- aarch64_set_asm_isa_flags (arch_isa); ++ aarch64_set_asm_isa_flags (arch_isa | isa_mode); + } + else + { + /* No -mcpu or -march specified, so use the default CPU. */ + cpu = &all_coresTARGET_CPU_DEFAULT; + selected_arch = cpu->arch; +- aarch64_set_asm_isa_flags (cpu->flags); ++ aarch64_set_asm_isa_flags (cpu->flags | isa_mode); + } + + selected_tune = tune ? tune->ident : cpu->ident; +@@ -18821,6 +18935,21 @@ aarch64_save_restore_target_globals (tree new_tree) + TREE_TARGET_GLOBALS (new_tree) = save_target_globals_default_opts (); + } + ++/* Return the target_option_node for FNDECL, or the current options ++ if FNDECL is null. */ ++ ++static tree ++aarch64_fndecl_options (tree fndecl) ++{ ++ if (!fndecl) ++ return target_option_current_node; ++ ++ if (tree options = DECL_FUNCTION_SPECIFIC_TARGET (fndecl)) ++ return options; ++ ++ return target_option_default_node; ++} ++ + /* Implement TARGET_SET_CURRENT_FUNCTION. Unpack the codegen decisions + like tuning and ISA features from the DECL_FUNCTION_SPECIFIC_TARGET + of the function, if such exists. This function may be called multiple +@@ -18830,25 +18959,24 @@ aarch64_save_restore_target_globals (tree new_tree) + static void + aarch64_set_current_function (tree fndecl) + { +- if (!fndecl || fndecl == aarch64_previous_fndecl) +- return; +- +- tree old_tree = (aarch64_previous_fndecl +- ? DECL_FUNCTION_SPECIFIC_TARGET (aarch64_previous_fndecl) +- : NULL_TREE); +- +- tree new_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl); ++ tree old_tree = aarch64_fndecl_options (aarch64_previous_fndecl); ++ tree new_tree = aarch64_fndecl_options (fndecl); + +- /* If current function has no attributes but the previous one did, +- use the default node. */ +- if (!new_tree && old_tree) +- new_tree = target_option_default_node; ++ auto new_isa_mode = (fndecl ++ ? aarch64_fndecl_isa_mode (fndecl) ++ : AARCH64_FL_DEFAULT_ISA_MODE); ++ auto isa_flags = TREE_TARGET_OPTION (new_tree)->x_aarch64_isa_flags; + + /* If nothing to do, return. #pragma GCC reset or #pragma GCC pop to + the default have been handled by aarch64_save_restore_target_globals from + aarch64_pragma_target_parse. */ +- if (old_tree == new_tree) +- return; ++ if (old_tree == new_tree ++ && (!fndecl || aarch64_previous_fndecl) ++ && (isa_flags & AARCH64_FL_ISA_MODES) == new_isa_mode) ++ { ++ gcc_assert (AARCH64_ISA_MODE == new_isa_mode); ++ return; ++ } + + aarch64_previous_fndecl = fndecl; + +@@ -18856,7 +18984,28 @@ aarch64_set_current_function (tree fndecl) + cl_target_option_restore (&global_options, &global_options_set, + TREE_TARGET_OPTION (new_tree)); + ++ /* The ISA mode can vary based on function type attributes and ++ function declaration attributes. Make sure that the target ++ options correctly reflect these attributes. */ ++ if ((isa_flags & AARCH64_FL_ISA_MODES) != new_isa_mode) ++ { ++ auto base_flags = (aarch64_asm_isa_flags & ~AARCH64_FL_ISA_MODES); ++ aarch64_set_asm_isa_flags (base_flags | new_isa_mode); ++ ++ aarch64_override_options_internal (&global_options); ++ new_tree = build_target_option_node (&global_options, ++ &global_options_set); ++ DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_tree; ++ ++ tree new_optimize = build_optimization_node (&global_options, ++ &global_options_set); ++ if (new_optimize != optimization_default_node) ++ DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize; ++ } ++ + aarch64_save_restore_target_globals (new_tree); ++ ++ gcc_assert (AARCH64_ISA_MODE == new_isa_mode); + } + + /* Enum describing the various ways we can handle attributes. +@@ -18906,7 +19055,7 @@ aarch64_handle_attr_arch (const char *str) + { + gcc_assert (tmp_arch); + selected_arch = tmp_arch->arch; +- aarch64_set_asm_isa_flags (tmp_flags); ++ aarch64_set_asm_isa_flags (tmp_flags | AARCH64_ISA_MODE); + return true; + } + +@@ -18947,7 +19096,7 @@ aarch64_handle_attr_cpu (const char *str) + gcc_assert (tmp_cpu); + selected_tune = tmp_cpu->ident; + selected_arch = tmp_cpu->arch; +- aarch64_set_asm_isa_flags (tmp_flags); ++ aarch64_set_asm_isa_flags (tmp_flags | AARCH64_ISA_MODE); + return true; + } + +@@ -19047,7 +19196,7 @@ aarch64_handle_attr_isa_flags (char *str) + features if the user wants to handpick specific features. */ + if (strncmp ("+nothing", str, 8) == 0) + { +- isa_flags = 0; ++ isa_flags = AARCH64_ISA_MODE; + str += 8; + } + +@@ -19552,7 +19701,7 @@ aarch64_can_inline_p (tree caller, tree callee) + /* Return the ID of the TLDESC ABI, initializing the descriptor if hasn't + been already. */ + +-unsigned int ++arm_pcs + aarch64_tlsdesc_abi_id () + { + predefined_function_abi &tlsdesc_abi = function_abisARM_PCS_TLSDESC; +@@ -19566,7 +19715,7 @@ aarch64_tlsdesc_abi_id () + SET_HARD_REG_BIT (full_reg_clobbers, regno); + tlsdesc_abi.initialize (ARM_PCS_TLSDESC, full_reg_clobbers); + } +- return tlsdesc_abi.id (); ++ return ARM_PCS_TLSDESC; + } + + /* Return true if SYMBOL_REF X binds locally. */ +@@ -27270,22 +27419,26 @@ aarch64_simd_clone_usable (struct cgraph_node *node) + static int + aarch64_comp_type_attributes (const_tree type1, const_tree type2) + { +- auto check_attr = &(const char *name) { +- tree attr1 = lookup_attribute (name, TYPE_ATTRIBUTES (type1)); +- tree attr2 = lookup_attribute (name, TYPE_ATTRIBUTES (type2)); ++ auto check_attr = &(const char *ns, const char *name) { ++ tree attr1 = lookup_attribute (ns, name, TYPE_ATTRIBUTES (type1)); ++ tree attr2 = lookup_attribute (ns, name, TYPE_ATTRIBUTES (type2)); + if (!attr1 && !attr2) + return true; + + return attr1 && attr2 && attribute_value_equal (attr1, attr2); + }; + +- if (!check_attr ("aarch64_vector_pcs")) ++ if (!check_attr ("gnu", "aarch64_vector_pcs")) ++ return 0; ++ if (!check_attr ("gnu", "Advanced SIMD type")) ++ return 0; ++ if (!check_attr ("gnu", "SVE type")) + return 0; +- if (!check_attr ("Advanced SIMD type")) ++ if (!check_attr ("gnu", "SVE sizeless type")) + return 0; +- if (!check_attr ("SVE type")) ++ if (!check_attr ("arm", "streaming")) + return 0; +- if (!check_attr ("SVE sizeless type")) ++ if (!check_attr ("arm", "streaming_compatible")) + return 0; + return 1; + } +diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h +index 19b82b4f3..84215c8c3 100644 +--- a/gcc/config/aarch64/aarch64.h ++++ b/gcc/config/aarch64/aarch64.h +@@ -157,10 +157,13 @@ + + #ifndef USED_FOR_TARGET + +-/* Define an enum of all features (architectures and extensions). */ ++/* Define an enum of all features (ISA modes, architectures and extensions). ++ The ISA modes must come first. */ + enum class aarch64_feature : unsigned char { ++#define DEF_AARCH64_ISA_MODE(IDENT) IDENT, + #define AARCH64_OPT_EXTENSION(A, IDENT, C, D, E, F) IDENT, + #define AARCH64_ARCH(A, B, IDENT, D, E) IDENT, ++#include "aarch64-isa-modes.def" + #include "aarch64-option-extensions.def" + #include "aarch64-arches.def" + }; +@@ -169,16 +172,34 @@ enum class aarch64_feature : unsigned char { + #define HANDLE(IDENT) \ + constexpr auto AARCH64_FL_##IDENT \ + = aarch64_feature_flags (1) << int (aarch64_feature::IDENT); ++#define DEF_AARCH64_ISA_MODE(IDENT) HANDLE (IDENT) + #define AARCH64_OPT_EXTENSION(A, IDENT, C, D, E, F) HANDLE (IDENT) + #define AARCH64_ARCH(A, B, IDENT, D, E) HANDLE (IDENT) ++#include "aarch64-isa-modes.def" + #include "aarch64-option-extensions.def" + #include "aarch64-arches.def" + #undef HANDLE + ++constexpr auto AARCH64_FL_SM_STATE = AARCH64_FL_SM_ON | AARCH64_FL_SM_OFF; ++ ++constexpr unsigned int AARCH64_NUM_ISA_MODES = (0 ++#define DEF_AARCH64_ISA_MODE(IDENT) + 1 ++#include "aarch64-isa-modes.def" ++); ++ ++/* The mask of all ISA modes. */ ++constexpr auto AARCH64_FL_ISA_MODES ++ = (aarch64_feature_flags (1) << AARCH64_NUM_ISA_MODES) - 1; ++ ++/* The default ISA mode, for functions with no attributes that specify ++ something to the contrary. */ ++constexpr auto AARCH64_FL_DEFAULT_ISA_MODE = AARCH64_FL_SM_OFF; ++ + #endif + + /* Macros to test ISA flags. */ + ++#define AARCH64_ISA_MODE (aarch64_isa_flags & AARCH64_FL_ISA_MODES) + #define AARCH64_ISA_CRC (aarch64_isa_flags & AARCH64_FL_CRC) + #define AARCH64_ISA_CRYPTO (aarch64_isa_flags & AARCH64_FL_CRYPTO) + #define AARCH64_ISA_FP (aarch64_isa_flags & AARCH64_FL_FP) +@@ -904,6 +925,7 @@ enum arm_pcs + typedef struct + { + enum arm_pcs pcs_variant; ++ aarch64_feature_flags isa_mode; + int aapcs_arg_processed; /* No need to lay out this argument again. */ + int aapcs_ncrn; /* Next Core register number. */ + int aapcs_nextncrn; /* Next next core register number. */ +diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md +index c0977a3da..29a665e45 100644 +--- a/gcc/config/aarch64/aarch64.md ++++ b/gcc/config/aarch64/aarch64.md +@@ -7017,7 +7017,8 @@ + { + if (TARGET_SVE) + { +- rtx abi = gen_int_mode (aarch64_tlsdesc_abi_id (), DImode); ++ rtx abi = aarch64_gen_callee_cookie (AARCH64_ISA_MODE, ++ aarch64_tlsdesc_abi_id ()); + rtx_insn *call + = emit_call_insn (gen_tlsdesc_small_sve_<mode> (operands0, abi)); + RTL_CONST_CALL_P (call) = 1; +diff --git a/gcc/config/aarch64/t-aarch64 b/gcc/config/aarch64/t-aarch64 +index 6a21a248f..10cd8f093 100644 +--- a/gcc/config/aarch64/t-aarch64 ++++ b/gcc/config/aarch64/t-aarch64 +@@ -20,7 +20,10 @@ + + TM_H += $(srcdir)/config/aarch64/aarch64-fusion-pairs.def \ + $(srcdir)/config/aarch64/aarch64-tuning-flags.def \ +- $(srcdir)/config/aarch64/aarch64-option-extensions.def ++ $(srcdir)/config/aarch64/aarch64-option-extensions.def \ ++ $(srcdir)/config/aarch64/aarch64-cores.def \ ++ $(srcdir)/config/aarch64/aarch64-isa-modes.def \ ++ $(srcdir)/config/aarch64/aarch64-arches.def + OPTIONS_H_EXTRA += $(srcdir)/config/aarch64/aarch64-cores.def \ + $(srcdir)/config/aarch64/aarch64-arches.def + +diff --git a/gcc/testsuite/g++.target/aarch64/sme/aarch64-sme.exp b/gcc/testsuite/g++.target/aarch64/sme/aarch64-sme.exp +new file mode 100644 +index 000000000..72fcd0bd9 +--- /dev/null ++++ b/gcc/testsuite/g++.target/aarch64/sme/aarch64-sme.exp +@@ -0,0 +1,40 @@ ++# Specific regression driver for AArch64 SME. ++# Copyright (C) 2009-2023 Free Software Foundation, Inc. ++# ++# This file is part of GCC. ++# ++# GCC is free software; you can redistribute it and/or modify it ++# under the terms of the GNU General Public License as published by ++# the Free Software Foundation; either version 3, or (at your option) ++# any later version. ++# ++# GCC is distributed in the hope that it will be useful, but ++# WITHOUT ANY WARRANTY; without even the implied warranty of ++# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++# General Public License for more details. ++# ++# You should have received a copy of the GNU General Public License ++# along with GCC; see the file COPYING3. If not see ++# <http://www.gnu.org/licenses/>. */ ++ ++# GCC testsuite that uses the `dg.exp' driver. ++ ++# Exit immediately if this isn't an AArch64 target. ++if {!istarget aarch64*-*-* } { ++ return ++} ++ ++# Load support procs. ++load_lib g++-dg.exp ++ ++# Initialize `dg'. ++dg-init ++ ++aarch64-with-arch-dg-options "" { ++ # Main loop. ++ dg-runtest lsort glob -nocomplain $srcdir/$subdir/*.\cCS\ \ ++ "" "" ++} ++ ++# All done. ++dg-finish +diff --git a/gcc/testsuite/g++.target/aarch64/sme/keyword_macros_1.C b/gcc/testsuite/g++.target/aarch64/sme/keyword_macros_1.C +new file mode 100644 +index 000000000..032485adf +--- /dev/null ++++ b/gcc/testsuite/g++.target/aarch64/sme/keyword_macros_1.C +@@ -0,0 +1,4 @@ ++/* { dg-options "-std=c++11 -pedantic-errors" } */ ++ ++void f1 () __arm_streaming; ++void f2 () __arm_streaming_compatible; +diff --git a/gcc/testsuite/g++.target/aarch64/sme/streaming_mode_1.C b/gcc/testsuite/g++.target/aarch64/sme/streaming_mode_1.C +new file mode 100644 +index 000000000..c3de726e7 +--- /dev/null ++++ b/gcc/testsuite/g++.target/aarch64/sme/streaming_mode_1.C +@@ -0,0 +1,142 @@ ++// { dg-options "" } ++ ++void sc_a () arm::streaming_compatible; ++void sc_a (); // { dg-error "ambiguating new declaration" "" { xfail *-*-* } } ++ ++void sc_b (); ++void sc_b () arm::streaming_compatible; // { dg-error "ambiguating new declaration" } ++ ++void sc_c () arm::streaming_compatible; ++void sc_c () {} // Inherits attribute from declaration (confusingly). ++ ++void sc_d (); ++void sc_d () arm::streaming_compatible {} // { dg-error "ambiguating new declaration" } ++ ++void sc_e () arm::streaming_compatible {} ++void sc_e (); // { dg-error "ambiguating new declaration" "" { xfail *-*-* } } ++ ++void sc_f () {} ++void sc_f () arm::streaming_compatible; // { dg-error "ambiguating new declaration" } ++ ++extern void (*sc_g) (); ++extern void (*sc_g) () arm::streaming_compatible; // { dg-error "conflicting declaration" } ++ ++extern void (*sc_h) () arm::streaming_compatible; ++extern void (*sc_h) (); // { dg-error "conflicting declaration" } ++ ++//---------------------------------------------------------------------------- ++ ++void s_a () arm::streaming; ++void s_a (); // { dg-error "ambiguating new declaration" "" { xfail *-*-* } } ++ ++void s_b (); ++void s_b () arm::streaming; // { dg-error "ambiguating new declaration" } ++ ++void s_c () arm::streaming; ++void s_c () {} // Inherits attribute from declaration (confusingly). ++ ++void s_d (); ++void s_d () arm::streaming {} // { dg-error "ambiguating new declaration" } ++ ++void s_e () arm::streaming {} ++void s_e (); // { dg-error "ambiguating new declaration" "" { xfail *-*-* } } ++ ++void s_f () {} ++void s_f () arm::streaming; // { dg-error "ambiguating new declaration" } ++ ++extern void (*s_g) (); ++extern void (*s_g) () arm::streaming; // { dg-error "conflicting declaration" } ++ ++extern void (*s_h) () arm::streaming; ++extern void (*s_h) (); // { dg-error "conflicting declaration" } ++ ++//---------------------------------------------------------------------------- ++ ++void mixed_a () arm::streaming; ++void mixed_a () arm::streaming_compatible; // { dg-error "ambiguating new declaration" } ++ ++void mixed_b () arm::streaming_compatible; ++void mixed_b () arm::streaming; // { dg-error "ambiguating new declaration" } ++ ++void mixed_c () arm::streaming; ++void mixed_c () arm::streaming_compatible {} // { dg-error "ambiguating new declaration" } ++ ++void mixed_d () arm::streaming_compatible; ++void mixed_d () arm::streaming {} // { dg-error "ambiguating new declaration" } ++ ++void mixed_e () arm::streaming {} ++void mixed_e () arm::streaming_compatible; // { dg-error "ambiguating new declaration" } ++ ++void mixed_f () arm::streaming_compatible {} ++void mixed_f () arm::streaming; // { dg-error "ambiguating new declaration" } ++ ++extern void (*mixed_g) () arm::streaming_compatible; ++extern void (*mixed_g) () arm::streaming; // { dg-error "conflicting declaration" } ++ ++extern void (*mixed_h) () arm::streaming; ++extern void (*mixed_h) () arm::streaming_compatible; // { dg-error "conflicting declaration" } ++ ++//---------------------------------------------------------------------------- ++ ++void contradiction_1 () arm::streaming, arm::streaming_compatible; // { dg-warning "conflicts with attribute" } ++void contradiction_2 () arm::streaming_compatible, arm::streaming; // { dg-warning "conflicts with attribute" } ++ ++int arm::streaming_compatible int_attr; // { dg-warning "attribute ignored" } ++void arm::streaming_compatible ret_attr (); // { dg-warning "attribute ignored" } ++void *arm::streaming ptr_attr; // { dg-warning "only applies to function types" } ++ ++typedef void s_callback () arm::streaming; ++typedef void sc_callback () arm::streaming_compatible; ++ ++typedef void contradiction_callback_1 () arm::streaming, arm::streaming_compatible; // { dg-warning "conflicts with attribute" } ++typedef void contradiction_callback_2 () arm::streaming_compatible, arm::streaming; // { dg-warning "conflicts with attribute" } ++ ++void (*contradiction_callback_ptr_1) () arm::streaming, arm::streaming_compatible; // { dg-warning "conflicts with attribute" } ++void (*contradiction_callback_ptr_2) () arm::streaming_compatible, arm::streaming; // { dg-warning "conflicts with attribute" } ++ ++struct s { ++ void (*contradiction_callback_ptr_1) () arm::streaming, arm::streaming_compatible; // { dg-warning "conflicts with attribute" } ++ void (*contradiction_callback_ptr_2) () arm::streaming_compatible, arm::streaming; // { dg-warning "conflicts with attribute" } ++}; ++ ++//---------------------------------------------------------------------------- ++ ++void keyword_ok_1 () __arm_streaming; ++void keyword_ok_1 () __arm_streaming; ++ ++void keyword_ok_2 () __arm_streaming; ++void keyword_ok_2 () arm::streaming; ++ ++void keyword_ok_3 () arm::streaming; ++void keyword_ok_3 () __arm_streaming; ++ ++void keyword_ok_4 () __arm_streaming arm::streaming; ++ ++void keyword_ok_5 () __arm_streaming_compatible; ++void keyword_ok_5 () arm::streaming_compatible; ++ ++//---------------------------------------------------------------------------- ++ ++void keyword_contradiction_1 () __arm_streaming; ++void keyword_contradiction_1 (); // { dg-error "ambiguating new declaration" "" { xfail *-*-* } } ++ ++void keyword_contradiction_2 (); ++void keyword_contradiction_2 () __arm_streaming; // { dg-error "ambiguating new declaration" } ++ ++void keyword_contradiction_3 () __arm_streaming; ++void keyword_contradiction_3 () arm::streaming_compatible; // { dg-error "ambiguating new declaration" } ++ ++void keyword_contradiction_4 () arm::streaming_compatible; ++void keyword_contradiction_4 () __arm_streaming; // { dg-error "ambiguating new declaration" } ++ ++//---------------------------------------------------------------------------- ++ ++struct s1 ++{ ++ virtual void f () arm::streaming; ++}; ++ ++struct s2 : public s1 ++{ ++ void f () override; // { dg-error "conflicting type attributes" } ++}; +diff --git a/gcc/testsuite/g++.target/aarch64/sme/streaming_mode_2.C b/gcc/testsuite/g++.target/aarch64/sme/streaming_mode_2.C +new file mode 100644 +index 000000000..f2dd2db9b +--- /dev/null ++++ b/gcc/testsuite/g++.target/aarch64/sme/streaming_mode_2.C +@@ -0,0 +1,25 @@ ++// { dg-options "" } ++ ++void sc_fn () arm::streaming_compatible; ++void s_fn () arm::streaming; ++void ns_fn (); ++ ++void (*sc_fn_ptr) () arm::streaming_compatible; ++void (*s_fn_ptr) () arm::streaming; ++void (*ns_fn_ptr) (); ++ ++void ++f () ++{ ++ sc_fn_ptr = sc_fn; ++ sc_fn_ptr = s_fn; // { dg-error "invalid conversion" } ++ sc_fn_ptr = ns_fn; // { dg-error "invalid conversion" } ++ ++ s_fn_ptr = sc_fn; // { dg-error "invalid conversion" } ++ s_fn_ptr = s_fn; ++ s_fn_ptr = ns_fn; // { dg-error "invalid conversion" } ++ ++ ns_fn_ptr = sc_fn; // { dg-error "invalid conversion" } ++ ns_fn_ptr = s_fn; // { dg-error "invalid conversion" } ++ ns_fn_ptr = ns_fn; ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/auto-init-1.c b/gcc/testsuite/gcc.target/aarch64/auto-init-1.c +index 0fa470880..45bb02561 100644 +--- a/gcc/testsuite/gcc.target/aarch64/auto-init-1.c ++++ b/gcc/testsuite/gcc.target/aarch64/auto-init-1.c +@@ -29,4 +29,5 @@ void foo() + return; + } + +-/* { dg-final { scan-rtl-dump-times "const_int 0" 11 "expand" } } */ ++/* Includes 1 for the call instruction and 1 for a nop. */ ++/* { dg-final { scan-rtl-dump-times "const_int 0" 10 "expand" } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/aarch64-sme.exp b/gcc/testsuite/gcc.target/aarch64/sme/aarch64-sme.exp +new file mode 100644 +index 000000000..c990e5924 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sme/aarch64-sme.exp +@@ -0,0 +1,40 @@ ++# Specific regression driver for AArch64 SME. ++# Copyright (C) 2009-2023 Free Software Foundation, Inc. ++# ++# This file is part of GCC. ++# ++# GCC is free software; you can redistribute it and/or modify it ++# under the terms of the GNU General Public License as published by ++# the Free Software Foundation; either version 3, or (at your option) ++# any later version. ++# ++# GCC is distributed in the hope that it will be useful, but ++# WITHOUT ANY WARRANTY; without even the implied warranty of ++# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++# General Public License for more details. ++# ++# You should have received a copy of the GNU General Public License ++# along with GCC; see the file COPYING3. If not see ++# <http://www.gnu.org/licenses/>. */ ++ ++# GCC testsuite that uses the `dg.exp' driver. ++ ++# Exit immediately if this isn't an AArch64 target. ++if {!istarget aarch64*-*-* } { ++ return ++} ++ ++# Load support procs. ++load_lib gcc-dg.exp ++ ++# Initialize `dg'. ++dg-init ++ ++aarch64-with-arch-dg-options "" { ++ # Main loop. ++ dg-runtest lsort glob -nocomplain $srcdir/$subdir/*.\cCS\ \ ++ "" "" ++} ++ ++# All done. ++dg-finish +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/keyword_macros_1.c b/gcc/testsuite/gcc.target/aarch64/sme/keyword_macros_1.c +new file mode 100644 +index 000000000..8f1b83676 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sme/keyword_macros_1.c +@@ -0,0 +1,4 @@ ++/* { dg-options "-std=c90 -pedantic-errors" } */ ++ ++void f1 () __arm_streaming; ++void f2 () __arm_streaming_compatible; +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/streaming_mode_1.c b/gcc/testsuite/gcc.target/aarch64/sme/streaming_mode_1.c +new file mode 100644 +index 000000000..8874b05b8 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sme/streaming_mode_1.c +@@ -0,0 +1,130 @@ ++// { dg-options "" } ++ ++void sc_a () arm::streaming_compatible; ++void sc_a (); // { dg-error "conflicting types" } ++ ++void sc_b (); ++void sc_b () arm::streaming_compatible; // { dg-error "conflicting types" } ++ ++void sc_c () arm::streaming_compatible; ++void sc_c () {} // Inherits attribute from declaration (confusingly). ++ ++void sc_d (); ++void sc_d () arm::streaming_compatible {} // { dg-error "conflicting types" } ++ ++void sc_e () arm::streaming_compatible {} ++void sc_e (); // { dg-error "conflicting types" } ++ ++void sc_f () {} ++void sc_f () arm::streaming_compatible; // { dg-error "conflicting types" } ++ ++extern void (*sc_g) (); ++extern void (*sc_g) () arm::streaming_compatible; // { dg-error "conflicting types" } ++ ++extern void (*sc_h) () arm::streaming_compatible; ++extern void (*sc_h) (); // { dg-error "conflicting types" } ++ ++//---------------------------------------------------------------------------- ++ ++void s_a () arm::streaming; ++void s_a (); // { dg-error "conflicting types" } ++ ++void s_b (); ++void s_b () arm::streaming; // { dg-error "conflicting types" } ++ ++void s_c () arm::streaming; ++void s_c () {} // Inherits attribute from declaration (confusingly). ++ ++void s_d (); ++void s_d () arm::streaming {} // { dg-error "conflicting types" } ++ ++void s_e () arm::streaming {} ++void s_e (); // { dg-error "conflicting types" } ++ ++void s_f () {} ++void s_f () arm::streaming; // { dg-error "conflicting types" } ++ ++extern void (*s_g) (); ++extern void (*s_g) () arm::streaming; // { dg-error "conflicting types" } ++ ++extern void (*s_h) () arm::streaming; ++extern void (*s_h) (); // { dg-error "conflicting types" } ++ ++//---------------------------------------------------------------------------- ++ ++void mixed_a () arm::streaming; ++void mixed_a () arm::streaming_compatible; // { dg-error "conflicting types" } ++ ++void mixed_b () arm::streaming_compatible; ++void mixed_b () arm::streaming; // { dg-error "conflicting types" } ++ ++void mixed_c () arm::streaming; ++void mixed_c () arm::streaming_compatible {} // { dg-error "conflicting types" } ++ ++void mixed_d () arm::streaming_compatible; ++void mixed_d () arm::streaming {} // { dg-error "conflicting types" } ++ ++void mixed_e () arm::streaming {} ++void mixed_e () arm::streaming_compatible; // { dg-error "conflicting types" } ++ ++void mixed_f () arm::streaming_compatible {} ++void mixed_f () arm::streaming; // { dg-error "conflicting types" } ++ ++extern void (*mixed_g) () arm::streaming_compatible; ++extern void (*mixed_g) () arm::streaming; // { dg-error "conflicting types" } ++ ++extern void (*mixed_h) () arm::streaming; ++extern void (*mixed_h) () arm::streaming_compatible; // { dg-error "conflicting types" } ++ ++//---------------------------------------------------------------------------- ++ ++void contradiction_1 () arm::streaming, arm::streaming_compatible; // { dg-warning "conflicts with attribute" } ++void contradiction_2 () arm::streaming_compatible, arm::streaming; // { dg-warning "conflicts with attribute" } ++ ++int arm::streaming_compatible int_attr; // { dg-warning "only applies to function types" } ++void arm::streaming_compatible ret_attr (); // { dg-warning "only applies to function types" } ++void *arm::streaming ptr_attr; // { dg-warning "only applies to function types" } ++ ++typedef void s_callback () arm::streaming; ++typedef void sc_callback () arm::streaming_compatible; ++ ++typedef void contradiction_callback_1 () arm::streaming, arm::streaming_compatible; // { dg-warning "conflicts with attribute" } ++typedef void contradiction_callback_2 () arm::streaming_compatible, arm::streaming; // { dg-warning "conflicts with attribute" } ++ ++void (*contradiction_callback_ptr_1) () arm::streaming, arm::streaming_compatible; // { dg-warning "conflicts with attribute" } ++void (*contradiction_callback_ptr_2) () arm::streaming_compatible, arm::streaming; // { dg-warning "conflicts with attribute" } ++ ++struct s { ++ void (*contradiction_callback_ptr_1) () arm::streaming, arm::streaming_compatible; // { dg-warning "conflicts with attribute" } ++ void (*contradiction_callback_ptr_2) () arm::streaming_compatible, arm::streaming; // { dg-warning "conflicts with attribute" } ++}; ++ ++//---------------------------------------------------------------------------- ++ ++void keyword_ok_1 () __arm_streaming; ++void keyword_ok_1 () __arm_streaming; ++ ++void keyword_ok_2 () __arm_streaming; ++void keyword_ok_2 () arm::streaming; ++ ++void keyword_ok_3 () arm::streaming; ++void keyword_ok_3 () __arm_streaming; ++ ++void keyword_ok_4 () __arm_streaming arm::streaming; ++ ++void keyword_ok_5 () __arm_streaming_compatible; ++void keyword_ok_5 () arm::streaming_compatible; ++ ++//---------------------------------------------------------------------------- ++ ++void keyword_contradiction_1 () __arm_streaming; ++void keyword_contradiction_1 (); // { dg-error "conflicting types" } ++ ++void keyword_contradiction_2 (); ++void keyword_contradiction_2 () __arm_streaming; // { dg-error "conflicting types" } ++ ++void keyword_contradiction_3 () __arm_streaming; ++void keyword_contradiction_3 () arm::streaming_compatible; // { dg-error "conflicting types" } ++ ++void keyword_contradiction_4 () arm::streaming_compatible; ++void keyword_contradiction_4 () __arm_streaming; // { dg-error "conflicting types" } +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/streaming_mode_2.c b/gcc/testsuite/gcc.target/aarch64/sme/streaming_mode_2.c +new file mode 100644 +index 000000000..e8be0f821 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sme/streaming_mode_2.c +@@ -0,0 +1,25 @@ ++// { dg-options "" } ++ ++void sc_fn () arm::streaming_compatible; ++void s_fn () arm::streaming; ++void ns_fn (); ++ ++void (*sc_fn_ptr) () arm::streaming_compatible; ++void (*s_fn_ptr) () arm::streaming; ++void (*ns_fn_ptr) (); ++ ++void ++f () ++{ ++ sc_fn_ptr = sc_fn; ++ sc_fn_ptr = s_fn; // { dg-error "incompatible pointer type" } ++ sc_fn_ptr = ns_fn; // { dg-error "incompatible pointer type" } ++ ++ s_fn_ptr = sc_fn; // { dg-error "incompatible pointer type" } ++ s_fn_ptr = s_fn; ++ s_fn_ptr = ns_fn; // { dg-error "incompatible pointer type" } ++ ++ ns_fn_ptr = sc_fn; // { dg-error "incompatible pointer type" } ++ ns_fn_ptr = s_fn; // { dg-error "incompatible pointer type" } ++ ns_fn_ptr = ns_fn; ++} +-- +2.33.0 +
View file
_service:tar_scm:0174-LoongArch-TFmode-is-not-allowed-to-be-stored-in-the-.patch
Added
@@ -0,0 +1,73 @@ +From b6b917847a25afbaba9983e80b62b69ed3ce3983 Mon Sep 17 00:00:00 2001 +From: Lulu Cheng <chenglulu@loongson.cn> +Date: Thu, 4 Jul 2024 10:37:26 +0800 +Subject: PATCH 174/188 LoongArch: TFmode is not allowed to be stored in the + float register. + + PR target/115752 + +gcc/ChangeLog: + + * config/loongarch/loongarch.cc + (loongarch_hard_regno_mode_ok_uncached): Replace + UNITS_PER_FPVALUE with UNITS_PER_HWFPVALUE. + * config/loongarch/loongarch.h (UNITS_PER_FPVALUE): Delete. + +gcc/testsuite/ChangeLog: + + * gcc.target/loongarch/pr115752.c: New test. +--- + gcc/config/loongarch/loongarch.cc | 2 +- + gcc/config/loongarch/loongarch.h | 7 ------- + gcc/testsuite/gcc.target/loongarch/pr115752.c | 8 ++++++++ + 3 files changed, 9 insertions(+), 8 deletions(-) + create mode 100644 gcc/testsuite/gcc.target/loongarch/pr115752.c + +diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc +index 958e82b86..b78512e0e 100644 +--- a/gcc/config/loongarch/loongarch.cc ++++ b/gcc/config/loongarch/loongarch.cc +@@ -6760,7 +6760,7 @@ loongarch_hard_regno_mode_ok_uncached (unsigned int regno, machine_mode mode) + if (mclass == MODE_FLOAT + || mclass == MODE_COMPLEX_FLOAT + || mclass == MODE_VECTOR_FLOAT) +- return size <= UNITS_PER_FPVALUE; ++ return size <= UNITS_PER_HWFPVALUE; + + /* Allow integer modes that fit into a single register. We need + to put integers into FPRs when using instructions like CVT +diff --git a/gcc/config/loongarch/loongarch.h b/gcc/config/loongarch/loongarch.h +index 6743d2684..a23dabde1 100644 +--- a/gcc/config/loongarch/loongarch.h ++++ b/gcc/config/loongarch/loongarch.h +@@ -146,13 +146,6 @@ along with GCC; see the file COPYING3. If not see + #define UNITS_PER_HWFPVALUE \ + (TARGET_SOFT_FLOAT ? 0 : UNITS_PER_FP_REG) + +-/* The largest size of value that can be held in floating-point +- registers. */ +-#define UNITS_PER_FPVALUE \ +- (TARGET_SOFT_FLOAT ? 0 \ +- : TARGET_SINGLE_FLOAT ? UNITS_PER_FP_REG \ +- : LONG_DOUBLE_TYPE_SIZE / BITS_PER_UNIT) +- + /* The number of bytes in a double. */ + #define UNITS_PER_DOUBLE (TYPE_PRECISION (double_type_node) / BITS_PER_UNIT) + +diff --git a/gcc/testsuite/gcc.target/loongarch/pr115752.c b/gcc/testsuite/gcc.target/loongarch/pr115752.c +new file mode 100644 +index 000000000..df4bae524 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/pr115752.c +@@ -0,0 +1,8 @@ ++/* { dg-do compile } */ ++ ++long double ++test (long double xx) ++{ ++ __asm ("" :: "f"(xx)); /* { dg-error "inconsistent operand constraints in an 'asm'" } */ ++ return xx + 1; ++} +-- +2.43.0 +
View file
_service:tar_scm:0175-Backport-SME-aarch64-Add-sme.patch
Added
@@ -0,0 +1,330 @@ +From c097d9ffc7dd8f90f78eb3b994f3691f4c8f812d Mon Sep 17 00:00:00 2001 +From: Richard Sandiford <richard.sandiford@arm.com> +Date: Tue, 5 Dec 2023 10:11:23 +0000 +Subject: PATCH 076/157 BackportSME aarch64: Add +sme + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=7e04bd1fadf3410c3d24b56f650a52ff53d01a3c + +This patch adds the +sme ISA feature and requires it to be present +when compiling arm_streaming code. (arm_streaming_compatible code +does not necessarily assume the presence of SME. It just has to +work when SME is present and streaming mode is enabled.) + +gcc/ + * doc/invoke.texi: Document SME. + * doc/sourcebuild.texi: Document aarch64_sve. + * config/aarch64/aarch64-option-extensions.def (sme): Define. + * config/aarch64/aarch64.h (AARCH64_ISA_SME): New macro. + (TARGET_SME): Likewise. + * config/aarch64/aarch64.cc (aarch64_override_options_internal): + Ensure that SME is present when compiling streaming code. + +gcc/testsuite/ + * lib/target-supports.exp (check_effective_target_aarch64_sme): New + target test. + * gcc.target/aarch64/sme/aarch64-sme.exp: Force SME to be enabled + if it isn't by default. + * g++.target/aarch64/sme/aarch64-sme.exp: Likewise. + * gcc.target/aarch64/sme/streaming_mode_3.c: New test. +--- + .../aarch64/aarch64-option-extensions.def | 2 + + gcc/config/aarch64/aarch64.cc | 33 ++++++++++ + gcc/config/aarch64/aarch64.h | 5 ++ + gcc/doc/invoke.texi | 2 + + gcc/doc/sourcebuild.texi | 2 + + .../g++.target/aarch64/sme/aarch64-sme.exp | 10 ++- + .../gcc.target/aarch64/sme/aarch64-sme.exp | 10 ++- + .../gcc.target/aarch64/sme/streaming_mode_3.c | 63 +++++++++++++++++++ + .../gcc.target/aarch64/sme/streaming_mode_4.c | 22 +++++++ + gcc/testsuite/lib/target-supports.exp | 12 ++++ + 10 files changed, 157 insertions(+), 4 deletions(-) + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/streaming_mode_3.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/streaming_mode_4.c + +diff --git a/gcc/config/aarch64/aarch64-option-extensions.def b/gcc/config/aarch64/aarch64-option-extensions.def +index bdf4baf30..faee64a79 100644 +--- a/gcc/config/aarch64/aarch64-option-extensions.def ++++ b/gcc/config/aarch64/aarch64-option-extensions.def +@@ -149,4 +149,6 @@ AARCH64_OPT_EXTENSION("ls64", LS64, (), (), (), "") + + AARCH64_OPT_EXTENSION("mops", MOPS, (), (), (), "") + ++AARCH64_OPT_EXTENSION("sme", SME, (BF16, SVE2), (), (), "sme") ++ + #undef AARCH64_OPT_EXTENSION +diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc +index 904166b21..8f8395201 100644 +--- a/gcc/config/aarch64/aarch64.cc ++++ b/gcc/config/aarch64/aarch64.cc +@@ -11648,6 +11648,23 @@ aarch64_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2) + return true; + } + ++/* Implement TARGET_START_CALL_ARGS. */ ++ ++static void ++aarch64_start_call_args (cumulative_args_t ca_v) ++{ ++ CUMULATIVE_ARGS *ca = get_cumulative_args (ca_v); ++ ++ if (!TARGET_SME && (ca->isa_mode & AARCH64_FL_SM_ON)) ++ { ++ error ("calling a streaming function requires the ISA extension %qs", ++ "sme"); ++ inform (input_location, "you can enable %qs using the command-line" ++ " option %<-march%>, or by using the %<target%>" ++ " attribute or pragma", "sme"); ++ } ++} ++ + /* This function is used by the call expanders of the machine description. + RESULT is the register in which the result is returned. It's NULL for + "call" and "sibcall". +@@ -18194,6 +18211,19 @@ aarch64_override_options_internal (struct gcc_options *opts) + && !fixed_regsR18_REGNUM) + error ("%<-fsanitize=shadow-call-stack%> requires %<-ffixed-x18%>"); + ++ if ((opts->x_aarch64_isa_flags & AARCH64_FL_SM_ON) ++ && !(opts->x_aarch64_isa_flags & AARCH64_FL_SME)) ++ { ++ error ("streaming functions require the ISA extension %qs", "sme"); ++ inform (input_location, "you can enable %qs using the command-line" ++ " option %<-march%>, or by using the %<target%>" ++ " attribute or pragma", "sme"); ++ opts->x_target_flags &= ~MASK_GENERAL_REGS_ONLY; ++ auto new_flags = (opts->x_aarch64_asm_isa_flags ++ | feature_deps::SME ().enable); ++ aarch64_set_asm_isa_flags (opts, new_flags); ++ } ++ + initialize_aarch64_code_model (opts); + initialize_aarch64_tls_size (opts); + +@@ -28159,6 +28189,9 @@ aarch64_get_v16qi_mode () + #undef TARGET_FUNCTION_VALUE_REGNO_P + #define TARGET_FUNCTION_VALUE_REGNO_P aarch64_function_value_regno_p + ++#undef TARGET_START_CALL_ARGS ++#define TARGET_START_CALL_ARGS aarch64_start_call_args ++ + #undef TARGET_GIMPLE_FOLD_BUILTIN + #define TARGET_GIMPLE_FOLD_BUILTIN aarch64_gimple_fold_builtin + +diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h +index 84215c8c3..dd2de4e88 100644 +--- a/gcc/config/aarch64/aarch64.h ++++ b/gcc/config/aarch64/aarch64.h +@@ -214,6 +214,7 @@ constexpr auto AARCH64_FL_DEFAULT_ISA_MODE = AARCH64_FL_SM_OFF; + #define AARCH64_ISA_SVE2_BITPERM (aarch64_isa_flags & AARCH64_FL_SVE2_BITPERM) + #define AARCH64_ISA_SVE2_SHA3 (aarch64_isa_flags & AARCH64_FL_SVE2_SHA3) + #define AARCH64_ISA_SVE2_SM4 (aarch64_isa_flags & AARCH64_FL_SVE2_SM4) ++#define AARCH64_ISA_SME (aarch64_isa_flags & AARCH64_FL_SME) + #define AARCH64_ISA_V8_3A (aarch64_isa_flags & AARCH64_FL_V8_3A) + #define AARCH64_ISA_DOTPROD (aarch64_isa_flags & AARCH64_FL_DOTPROD) + #define AARCH64_ISA_AES (aarch64_isa_flags & AARCH64_FL_AES) +@@ -292,6 +293,10 @@ constexpr auto AARCH64_FL_DEFAULT_ISA_MODE = AARCH64_FL_SM_OFF; + /* SVE2 SM4 instructions, enabled through +sve2-sm4. */ + #define TARGET_SVE2_SM4 (AARCH64_ISA_SVE2_SM4) + ++/* SME instructions, enabled through +sme. Note that this does not ++ imply anything about the state of PSTATE.SM. */ ++#define TARGET_SME (AARCH64_ISA_SME) ++ + /* ARMv8.3-A features. */ + #define TARGET_ARMV8_3 (AARCH64_ISA_V8_3A) + +diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi +index 53709b246..2420b05d9 100644 +--- a/gcc/doc/invoke.texi ++++ b/gcc/doc/invoke.texi +@@ -19478,6 +19478,8 @@ Enable the instructions to accelerate memory operations like @code{memcpy}, + Enable the Flag Manipulation instructions Extension. + @item pauth + Enable the Pointer Authentication Extension. ++@item sme ++Enable the Scalable Matrix Extension. + + @end table + +diff --git a/gcc/doc/sourcebuild.texi b/gcc/doc/sourcebuild.texi +index 454fae11a..80936a0eb 100644 +--- a/gcc/doc/sourcebuild.texi ++++ b/gcc/doc/sourcebuild.texi +@@ -2277,6 +2277,8 @@ AArch64 target which generates instruction sequences for big endian. + @item aarch64_small_fpic + Binutils installed on test system supports relocation types required by -fpic + for AArch64 small memory model. ++@item aarch64_sme ++AArch64 target that generates instructions for SME. + @item aarch64_sve_hw + AArch64 target that is able to generate and execute SVE code (regardless of + whether it does so by default). +diff --git a/gcc/testsuite/g++.target/aarch64/sme/aarch64-sme.exp b/gcc/testsuite/g++.target/aarch64/sme/aarch64-sme.exp +index 72fcd0bd9..1c3e69cde 100644 +--- a/gcc/testsuite/g++.target/aarch64/sme/aarch64-sme.exp ++++ b/gcc/testsuite/g++.target/aarch64/sme/aarch64-sme.exp +@@ -30,10 +30,16 @@ load_lib g++-dg.exp + # Initialize `dg'. + dg-init + +-aarch64-with-arch-dg-options "" { ++if { check_effective_target_aarch64_sme } { ++ set sme_flags "" ++} else { ++ set sme_flags "-march=armv9-a+sme" ++} ++ ++aarch64-with-arch-dg-options $sme_flags { + # Main loop. + dg-runtest lsort glob -nocomplain $srcdir/$subdir/*.\cCS\ \ +- "" "" ++ "" $sme_flags + } + + # All done. +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/aarch64-sme.exp b/gcc/testsuite/gcc.target/aarch64/sme/aarch64-sme.exp +index c990e5924..011310e80 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sme/aarch64-sme.exp ++++ b/gcc/testsuite/gcc.target/aarch64/sme/aarch64-sme.exp +@@ -30,10 +30,16 @@ load_lib gcc-dg.exp + # Initialize `dg'. + dg-init + +-aarch64-with-arch-dg-options "" { ++if { check_effective_target_aarch64_sme } { ++ set sme_flags "" ++} else { ++ set sme_flags "-march=armv9-a+sme" ++} ++ ++aarch64-with-arch-dg-options $sme_flags { + # Main loop. + dg-runtest lsort glob -nocomplain $srcdir/$subdir/*.\cCS\ \ +- "" "" ++ "" $sme_flags + } + + # All done. +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/streaming_mode_3.c b/gcc/testsuite/gcc.target/aarch64/sme/streaming_mode_3.c +new file mode 100644 +index 000000000..45ec92321 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sme/streaming_mode_3.c +@@ -0,0 +1,63 @@ ++// { dg-options "" } ++ ++#pragma GCC target "+nosme" ++ ++void sc_a () arm::streaming_compatible {} ++void s_a () arm::streaming {} // { dg-error "streaming functions require the ISA extension 'sme'" } ++void ns_a () {} ++ ++void sc_b () arm::streaming_compatible {} ++void ns_b () {} ++void s_b () arm::streaming {} // { dg-error "streaming functions require the ISA extension 'sme'" } ++ ++void sc_c () arm::streaming_compatible {} ++void sc_d () arm::streaming_compatible {} ++ ++void s_c () arm::streaming {} // { dg-error "streaming functions require the ISA extension 'sme'" } ++void s_d () arm::streaming {} // { dg-error "streaming functions require the ISA extension 'sme'" } ++ ++void ns_c () {} ++void ns_d () {} ++ ++void sc_e () arm::streaming_compatible; ++void s_e () arm::streaming; ++void ns_e (); ++ ++#pragma GCC target "+sme" ++ ++void sc_f () arm::streaming_compatible {} ++void s_f () arm::streaming {} ++void ns_f () {} ++ ++void sc_g () arm::streaming_compatible {} ++void ns_g () {} ++void s_g () arm::streaming {} ++ ++void sc_h () arm::streaming_compatible {} ++void sc_i () arm::streaming_compatible {} ++ ++void s_h () arm::streaming {} ++void s_i () arm::streaming {} ++ ++void ns_h () {} ++void ns_i () {} ++ ++void sc_j () arm::streaming_compatible; ++void s_j () arm::streaming; ++void ns_j (); ++ ++#pragma GCC target "+sme" ++ ++void sc_k () arm::streaming_compatible {} ++ ++#pragma GCC target "+nosme" ++#pragma GCC target "+sme" ++ ++void s_k () arm::streaming {} ++ ++#pragma GCC target "+nosme" ++#pragma GCC target "+sme" ++ ++void ns_k () {} ++ ++#pragma GCC target "+nosme" +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/streaming_mode_4.c b/gcc/testsuite/gcc.target/aarch64/sme/streaming_mode_4.c +new file mode 100644 +index 000000000..50e92f2e1 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sme/streaming_mode_4.c +@@ -0,0 +1,22 @@ ++// { dg-options "-mgeneral-regs-only" } ++ ++void sc_a () arm::streaming_compatible {} ++void s_a () arm::streaming {} // { dg-error "streaming functions require the ISA extension 'sme'" } ++void ns_a () {} ++ ++void sc_b () arm::streaming_compatible {} ++void ns_b () {} ++void s_b () arm::streaming {} // { dg-error "streaming functions require the ISA extension 'sme'" } ++ ++void sc_c () arm::streaming_compatible {} ++void sc_d () arm::streaming_compatible {} ++ ++void s_c () arm::streaming {} // { dg-error "streaming functions require the ISA extension 'sme'" } ++void s_d () arm::streaming {} // { dg-error "streaming functions require the ISA extension 'sme'" } ++ ++void ns_c () {} ++void ns_d () {} ++ ++void sc_e () arm::streaming_compatible; ++void s_e () arm::streaming; ++void ns_e (); +diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp +index bd89d4f52..e2a9ef5fa 100644 +--- a/gcc/testsuite/lib/target-supports.exp ++++ b/gcc/testsuite/lib/target-supports.exp +@@ -3887,6 +3887,18 @@ proc aarch64_sve_bits { } { + } + } + ++# Return 1 if this is an AArch64 target that generates instructions for SME. ++proc check_effective_target_aarch64_sme { } { ++ if { !istarget aarch64*-*-* } { ++ return 0 ++ } ++ return check_no_compiler_messages aarch64_sme assembly { ++ #if !defined (__ARM_FEATURE_SME) ++ #error FOO ++ #endif ++ } ++} ++ + # Return 1 if this is a compiler supporting ARC atomic operations + proc check_effective_target_arc_atomic { } { + return check_no_compiler_messages arc_atomic assembly { +-- +2.33.0 +
View file
_service:tar_scm:0175-LoongArch-Remove-unreachable-codes.patch
Added
@@ -0,0 +1,249 @@ +From a75609d90f506709e4af26701aa4fb6adce00700 Mon Sep 17 00:00:00 2001 +From: Lulu Cheng <chenglulu@loongson.cn> +Date: Thu, 4 Jul 2024 15:00:40 +0800 +Subject: PATCH 175/188 LoongArch: Remove unreachable codes. + +gcc/ChangeLog: + + * config/loongarch/loongarch.cc + (loongarch_split_move): Delete. + (loongarch_hard_regno_mode_ok_uncached): Likewise. + * config/loongarch/loongarch.md + (move_doubleword_fpr<mode>): Likewise. + (load_low<mode>): Likewise. + (load_high<mode>): Likewise. + (store_word<mode>): Likewise. + (movgr2frh<mode>): Likewise. + (movfrh2gr<mode>): Likewise. +--- + gcc/config/loongarch/loongarch.cc | 47 +++---------- + gcc/config/loongarch/loongarch.md | 109 ------------------------------ + 2 files changed, 8 insertions(+), 148 deletions(-) + +diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc +index b78512e0e..260dd7b5f 100644 +--- a/gcc/config/loongarch/loongarch.cc ++++ b/gcc/config/loongarch/loongarch.cc +@@ -4459,42 +4459,13 @@ loongarch_split_move_p (rtx dest, rtx src) + void + loongarch_split_move (rtx dest, rtx src) + { +- rtx low_dest; +- + gcc_checking_assert (loongarch_split_move_p (dest, src)); + if (LSX_SUPPORTED_MODE_P (GET_MODE (dest))) + loongarch_split_128bit_move (dest, src); + else if (LASX_SUPPORTED_MODE_P (GET_MODE (dest))) + loongarch_split_256bit_move (dest, src); +- else if (FP_REG_RTX_P (dest) || FP_REG_RTX_P (src)) +- { +- if (!TARGET_64BIT && GET_MODE (dest) == DImode) +- emit_insn (gen_move_doubleword_fprdi (dest, src)); +- else if (!TARGET_64BIT && GET_MODE (dest) == DFmode) +- emit_insn (gen_move_doubleword_fprdf (dest, src)); +- else if (TARGET_64BIT && GET_MODE (dest) == TFmode) +- emit_insn (gen_move_doubleword_fprtf (dest, src)); +- else +- gcc_unreachable (); +- } + else +- { +- /* The operation can be split into two normal moves. Decide in +- which order to do them. */ +- low_dest = loongarch_subword (dest, false); +- if (REG_P (low_dest) && reg_overlap_mentioned_p (low_dest, src)) +- { +- loongarch_emit_move (loongarch_subword (dest, true), +- loongarch_subword (src, true)); +- loongarch_emit_move (low_dest, loongarch_subword (src, false)); +- } +- else +- { +- loongarch_emit_move (low_dest, loongarch_subword (src, false)); +- loongarch_emit_move (loongarch_subword (dest, true), +- loongarch_subword (src, true)); +- } +- } ++ gcc_unreachable (); + } + + /* Check if adding an integer constant value for a specific mode can be +@@ -6743,20 +6714,18 @@ loongarch_hard_regno_mode_ok_uncached (unsigned int regno, machine_mode mode) + size = GET_MODE_SIZE (mode); + mclass = GET_MODE_CLASS (mode); + +- if (GP_REG_P (regno) && !LSX_SUPPORTED_MODE_P (mode) ++ if (GP_REG_P (regno) ++ && !LSX_SUPPORTED_MODE_P (mode) + && !LASX_SUPPORTED_MODE_P (mode)) + return ((regno - GP_REG_FIRST) & 1) == 0 || size <= UNITS_PER_WORD; + +- /* For LSX, allow TImode and 128-bit vector modes in all FPR. */ +- if (FP_REG_P (regno) && LSX_SUPPORTED_MODE_P (mode)) +- return true; +- +- /* FIXED ME: For LASX, allow TImode and 256-bit vector modes in all FPR. */ +- if (FP_REG_P (regno) && LASX_SUPPORTED_MODE_P (mode)) +- return true; +- + if (FP_REG_P (regno)) + { ++ /* Allow 128-bit or 256-bit vector modes in all FPR. */ ++ if (LSX_SUPPORTED_MODE_P (mode) ++ || LASX_SUPPORTED_MODE_P (mode)) ++ return true; ++ + if (mclass == MODE_FLOAT + || mclass == MODE_COMPLEX_FLOAT + || mclass == MODE_VECTOR_FLOAT) +diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md +index 55a759850..16f9f37c8 100644 +--- a/gcc/config/loongarch/loongarch.md ++++ b/gcc/config/loongarch/loongarch.md +@@ -400,9 +400,6 @@ + ;; 64-bit modes for which we provide move patterns. + (define_mode_iterator MOVE64 DI DF) + +-;; 128-bit modes for which we provide move patterns on 64-bit targets. +-(define_mode_iterator MOVE128 TI TF) +- + ;; Iterator for sub-32-bit integer modes. + (define_mode_iterator SHORT QI HI) + +@@ -421,12 +418,6 @@ + (define_mode_iterator ANYFI (SI "TARGET_HARD_FLOAT") + (DI "TARGET_DOUBLE_FLOAT")) + +-;; A mode for which moves involving FPRs may need to be split. +-(define_mode_iterator SPLITF +- (DF "!TARGET_64BIT && TARGET_DOUBLE_FLOAT") +- (DI "!TARGET_64BIT && TARGET_DOUBLE_FLOAT") +- (TF "TARGET_64BIT && TARGET_DOUBLE_FLOAT")) +- + ;; A mode for anything with 32 bits or more, and able to be loaded with + ;; the same addressing mode as ld.w. + (define_mode_iterator LD_AT_LEAST_32_BIT GPR ANYF) +@@ -2421,41 +2412,6 @@ + (set_attr "move_type" "move,load,store") + (set_attr "mode" "DF")) + +-;; Emit a doubleword move in which exactly one of the operands is +-;; a floating-point register. We can't just emit two normal moves +-;; because of the constraints imposed by the FPU register model; +-;; see loongarch_can_change_mode_class for details. Instead, we keep +-;; the FPR whole and use special patterns to refer to each word of +-;; the other operand. +- +-(define_expand "move_doubleword_fpr<mode>" +- (set (match_operand:SPLITF 0) +- (match_operand:SPLITF 1)) +- "" +-{ +- if (FP_REG_RTX_P (operands0)) +- { +- rtx low = loongarch_subword (operands1, 0); +- rtx high = loongarch_subword (operands1, 1); +- emit_insn (gen_load_low<mode> (operands0, low)); +- if (!TARGET_64BIT) +- emit_insn (gen_movgr2frh<mode> (operands0, high, operands0)); +- else +- emit_insn (gen_load_high<mode> (operands0, high, operands0)); +- } +- else +- { +- rtx low = loongarch_subword (operands0, 0); +- rtx high = loongarch_subword (operands0, 1); +- emit_insn (gen_store_word<mode> (low, operands1, const0_rtx)); +- if (!TARGET_64BIT) +- emit_insn (gen_movfrh2gr<mode> (high, operands1)); +- else +- emit_insn (gen_store_word<mode> (high, operands1, const1_rtx)); +- } +- DONE; +-}) +- + ;; Clear one FCC register + + (define_expand "movfcc" +@@ -2742,49 +2698,6 @@ + (set_attr "type" "fcvt") + (set_attr "mode" "<ANYF:MODE>")) + +-;; Load the low word of operand 0 with operand 1. +-(define_insn "load_low<mode>" +- (set (match_operand:SPLITF 0 "register_operand" "=f,f") +- (unspec:SPLITF (match_operand:<HALFMODE> 1 "general_operand" "rJ,m") +- UNSPEC_LOAD_LOW)) +- "TARGET_HARD_FLOAT" +-{ +- operands0 = loongarch_subword (operands0, 0); +- return loongarch_output_move (operands0, operands1); +-} +- (set_attr "move_type" "mgtf,fpload") +- (set_attr "mode" "<HALFMODE>")) +- +-;; Load the high word of operand 0 from operand 1, preserving the value +-;; in the low word. +-(define_insn "load_high<mode>" +- (set (match_operand:SPLITF 0 "register_operand" "=f,f") +- (unspec:SPLITF (match_operand:<HALFMODE> 1 "general_operand" "rJ,m") +- (match_operand:SPLITF 2 "register_operand" "0,0") +- UNSPEC_LOAD_HIGH)) +- "TARGET_HARD_FLOAT" +-{ +- operands0 = loongarch_subword (operands0, 1); +- return loongarch_output_move (operands0, operands1); +-} +- (set_attr "move_type" "mgtf,fpload") +- (set_attr "mode" "<HALFMODE>")) +- +-;; Store one word of operand 1 in operand 0. Operand 2 is 1 to store the +-;; high word and 0 to store the low word. +-(define_insn "store_word<mode>" +- (set (match_operand:<HALFMODE> 0 "nonimmediate_operand" "=r,m") +- (unspec:<HALFMODE> (match_operand:SPLITF 1 "register_operand" "f,f") +- (match_operand 2 "const_int_operand") +- UNSPEC_STORE_WORD)) +- "TARGET_HARD_FLOAT" +-{ +- operands1 = loongarch_subword (operands1, INTVAL (operands2)); +- return loongarch_output_move (operands0, operands1); +-} +- (set_attr "move_type" "mftg,fpstore") +- (set_attr "mode" "<HALFMODE>")) +- + ;; Thread-Local Storage + + (define_insn "@got_load_tls_desc<mode>" +@@ -2876,28 +2789,6 @@ + (const_int 4) + (const_int 2)))) + +-;; Move operand 1 to the high word of operand 0 using movgr2frh.w, preserving the +-;; value in the low word. +-(define_insn "movgr2frh<mode>" +- (set (match_operand:SPLITF 0 "register_operand" "=f") +- (unspec:SPLITF (match_operand:<HALFMODE> 1 "reg_or_0_operand" "rJ") +- (match_operand:SPLITF 2 "register_operand" "0") +- UNSPEC_MOVGR2FRH)) +- "TARGET_DOUBLE_FLOAT" +- "movgr2frh.w\t%z1,%0" +- (set_attr "move_type" "mgtf") +- (set_attr "mode" "<HALFMODE>")) +- +-;; Move high word of operand 1 to operand 0 using movfrh2gr.s. +-(define_insn "movfrh2gr<mode>" +- (set (match_operand:<HALFMODE> 0 "register_operand" "=r") +- (unspec:<HALFMODE> (match_operand:SPLITF 1 "register_operand" "f") +- UNSPEC_MOVFRH2GR)) +- "TARGET_DOUBLE_FLOAT" +- "movfrh2gr.s\t%0,%1" +- (set_attr "move_type" "mftg") +- (set_attr "mode" "<HALFMODE>")) +- +  + ;; Expand in-line code to clear the instruction cache between operand0 and + ;; operand1. +-- +2.43.0 +
View file
_service:tar_scm:0176-Backport-SME-aarch64-Add-r-m-and-m-r-alternatives-to.patch
Added
@@ -0,0 +1,168 @@ +From d8233e19aae2272c4863de5e8d61d49d3147e807 Mon Sep 17 00:00:00 2001 +From: Kyrylo Tkachov <kyrylo.tkachov@arm.com> +Date: Thu, 1 Jun 2023 09:37:06 +0100 +Subject: PATCH 077/157 BackportSME aarch64: Add =r,m and =m,r + alternatives to 64-bit vector move patterns + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=12e71b593ea0c64d919df525cd75ea10b7be8a4b + +We can use the X registers to load and store 64-bit vector modes, we just need to add the alternatives +to the mov patterns. This straightforward patch does that and for the pair variants too. +For the testcase in the code we now generate the optimal assembly without any superfluous +GP<->SIMD moves. + +Bootstrapped and tested on aarch64-none-linux-gnu and aarch64_be-none-elf. + +gcc/ChangeLog: + + * config/aarch64/aarch64-simd.md (*aarch64_simd_mov<VDMOV:mode>): + Add =r,m and =r,m alternatives. + (load_pair<DREG:mode><DREG2:mode>): Likewise. + (vec_store_pair<DREG:mode><DREG2:mode>): Likewise. + +gcc/testsuite/ChangeLog: + + * gcc.target/aarch64/xreg-vec-modes_1.c: New test. +--- + gcc/config/aarch64/aarch64-simd.md | 40 ++++++++++-------- + .../gcc.target/aarch64/xreg-vec-modes_1.c | 42 +++++++++++++++++++ + 2 files changed, 65 insertions(+), 17 deletions(-) + create mode 100644 gcc/testsuite/gcc.target/aarch64/xreg-vec-modes_1.c + +diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md +index 2d688edf5..b5c52ba16 100644 +--- a/gcc/config/aarch64/aarch64-simd.md ++++ b/gcc/config/aarch64/aarch64-simd.md +@@ -116,26 +116,28 @@ + + (define_insn "*aarch64_simd_mov<VDMOV:mode>" + (set (match_operand:VDMOV 0 "nonimmediate_operand" +- "=w, m, m, w, ?r, ?w, ?r, w, w") ++ "=w, r, m, m, m, w, ?r, ?w, ?r, w, w") + (match_operand:VDMOV 1 "general_operand" +- "m, Dz, w, w, w, r, r, Dn, Dz")) ++ "m, m, Dz, w, r, w, w, r, r, Dn, Dz")) + "TARGET_FLOAT + && (register_operand (operands0, <MODE>mode) + || aarch64_simd_reg_or_zero (operands1, <MODE>mode))" + "@ + ldr\t%d0, %1 ++ ldr\t%x0, %1 + str\txzr, %0 + str\t%d1, %0 ++ str\t%x1, %0 + * return TARGET_SIMD ? \"mov\t%0.<Vbtype>, %1.<Vbtype>\" : \"fmov\t%d0, %d1\"; + * return TARGET_SIMD ? \"umov\t%0, %1.d0\" : \"fmov\t%x0, %d1\"; + fmov\t%d0, %1 + mov\t%0, %1 + * return aarch64_output_simd_mov_immediate (operands1, 64); + fmov\t%d0, xzr" +- (set_attr "type" "neon_load1_1reg<q>, store_8, neon_store1_1reg<q>,\ +- neon_logic<q>, neon_to_gp<q>, f_mcr,\ ++ (set_attr "type" "neon_load1_1reg<q>, load_8, store_8, neon_store1_1reg<q>,\ ++ store_8, neon_logic<q>, neon_to_gp<q>, f_mcr,\ + mov_reg, neon_move<q>, f_mcr") +- (set_attr "arch" "*,*,*,*,*,*,*,simd,*") ++ (set_attr "arch" "*,*,*,*,*,*,*,*,*,simd,*") + ) + + (define_insn "*aarch64_simd_mov<VQMOV:mode>" +@@ -177,31 +179,35 @@ + ) + + (define_insn "load_pair<DREG:mode><DREG2:mode>" +- (set (match_operand:DREG 0 "register_operand" "=w") +- (match_operand:DREG 1 "aarch64_mem_pair_operand" "Ump")) +- (set (match_operand:DREG2 2 "register_operand" "=w") +- (match_operand:DREG2 3 "memory_operand" "m")) ++ (set (match_operand:DREG 0 "register_operand" "=w,r") ++ (match_operand:DREG 1 "aarch64_mem_pair_operand" "Ump,Ump")) ++ (set (match_operand:DREG2 2 "register_operand" "=w,r") ++ (match_operand:DREG2 3 "memory_operand" "m,m")) + "TARGET_FLOAT + && rtx_equal_p (XEXP (operands3, 0), + plus_constant (Pmode, + XEXP (operands1, 0), + GET_MODE_SIZE (<DREG:MODE>mode)))" +- "ldp\\t%d0, %d2, %z1" +- (set_attr "type" "neon_ldp") ++ "@ ++ ldp\t%d0, %d2, %z1 ++ ldp\t%x0, %x2, %z1" ++ (set_attr "type" "neon_ldp,load_16") + ) + + (define_insn "vec_store_pair<DREG:mode><DREG2:mode>" +- (set (match_operand:DREG 0 "aarch64_mem_pair_operand" "=Ump") +- (match_operand:DREG 1 "register_operand" "w")) +- (set (match_operand:DREG2 2 "memory_operand" "=m") +- (match_operand:DREG2 3 "register_operand" "w")) ++ (set (match_operand:DREG 0 "aarch64_mem_pair_operand" "=Ump,Ump") ++ (match_operand:DREG 1 "register_operand" "w,r")) ++ (set (match_operand:DREG2 2 "memory_operand" "=m,m") ++ (match_operand:DREG2 3 "register_operand" "w,r")) + "TARGET_FLOAT + && rtx_equal_p (XEXP (operands2, 0), + plus_constant (Pmode, + XEXP (operands0, 0), + GET_MODE_SIZE (<DREG:MODE>mode)))" +- "stp\\t%d1, %d3, %z0" +- (set_attr "type" "neon_stp") ++ "@ ++ stp\t%d1, %d3, %z0 ++ stp\t%x1, %x3, %z0" ++ (set_attr "type" "neon_stp,store_16") + ) + + (define_insn "load_pair<VQ:mode><VQ2:mode>" +diff --git a/gcc/testsuite/gcc.target/aarch64/xreg-vec-modes_1.c b/gcc/testsuite/gcc.target/aarch64/xreg-vec-modes_1.c +new file mode 100644 +index 000000000..fc4dcb1ad +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/xreg-vec-modes_1.c +@@ -0,0 +1,42 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2" } */ ++/* { dg-final { check-function-bodies "**" "" "" } } */ ++ ++typedef unsigned int v2si __attribute__((vector_size (8))); ++ ++#define force_gp(V1) asm volatile ("" \ ++ : "=r"(V1) \ ++ : "r"(V1) \ ++ : /* No clobbers */); ++ ++/* ++** foo: ++** ldr (x0-9+), \x1\ ++** str \1, \x0\ ++** ret ++*/ ++ ++void ++foo (v2si *a, v2si *b) ++{ ++ v2si tmp = *b; ++ force_gp (tmp); ++ *a = tmp; ++} ++ ++/* ++** foo2: ++** ldp (x0-9+), (x0-9+), \x0\ ++** stp \1, \2, \x1\ ++** ret ++*/ ++void ++foo2 (v2si *a, v2si *b) ++{ ++ v2si t1 = *a; ++ v2si t2 = a1; ++ force_gp (t1); ++ force_gp (t2); ++ *b = t1; ++ b1 = t2; ++} +-- +2.33.0 +
View file
_service:tar_scm:0176-LoongArch-Organize-the-code-related-to-split-move-an.patch
Added
@@ -0,0 +1,413 @@ +From 95089699271d235efc29ae48b78f8c7f1b6386c4 Mon Sep 17 00:00:00 2001 +From: Lulu Cheng <chenglulu@loongson.cn> +Date: Fri, 12 Jul 2024 09:57:40 +0800 +Subject: PATCH 176/188 LoongArch: Organize the code related to split move + and merge the same functions. + +gcc/ChangeLog: + + * config/loongarch/loongarch-protos.h + (loongarch_split_128bit_move): Delete. + (loongarch_split_128bit_move_p): Delete. + (loongarch_split_256bit_move): Delete. + (loongarch_split_256bit_move_p): Delete. + (loongarch_split_vector_move): Add a function declaration. + * config/loongarch/loongarch.cc + (loongarch_vector_costs::finish_cost): Adjust the code + formatting. + (loongarch_split_vector_move_p): Merge + loongarch_split_128bit_move_p and loongarch_split_256bit_move_p. + (loongarch_split_move_p): Merge code. + (loongarch_split_move): Likewise. + (loongarch_split_128bit_move_p): Delete. + (loongarch_split_256bit_move_p): Delete. + (loongarch_split_128bit_move): Delete. + (loongarch_split_vector_move): Merge loongarch_split_128bit_move + and loongarch_split_256bit_move. + (loongarch_split_256bit_move): Delete. + (loongarch_global_init): Remove the extra semicolon at the + end of the function. + * config/loongarch/loongarch.md (*movdf_softfloat): Added a new + condition TARGET_64BIT. +--- + gcc/config/loongarch/loongarch-protos.h | 5 +- + gcc/config/loongarch/loongarch.cc | 221 ++++++------------------ + gcc/config/loongarch/loongarch.md | 1 + + 3 files changed, 58 insertions(+), 169 deletions(-) + +diff --git a/gcc/config/loongarch/loongarch-protos.h b/gcc/config/loongarch/loongarch-protos.h +index 0c31a74b7..abf1a0893 100644 +--- a/gcc/config/loongarch/loongarch-protos.h ++++ b/gcc/config/loongarch/loongarch-protos.h +@@ -85,10 +85,7 @@ extern bool loongarch_split_move_p (rtx, rtx); + extern void loongarch_split_move (rtx, rtx); + extern bool loongarch_addu16i_imm12_operand_p (HOST_WIDE_INT, machine_mode); + extern void loongarch_split_plus_constant (rtx *, machine_mode); +-extern void loongarch_split_128bit_move (rtx, rtx); +-extern bool loongarch_split_128bit_move_p (rtx, rtx); +-extern void loongarch_split_256bit_move (rtx, rtx); +-extern bool loongarch_split_256bit_move_p (rtx, rtx); ++extern void loongarch_split_vector_move (rtx, rtx); + extern const char *loongarch_output_move (rtx, rtx); + #ifdef RTX_CODE + extern void loongarch_expand_scc (rtx *); +diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc +index 260dd7b5f..53bd8d7ec 100644 +--- a/gcc/config/loongarch/loongarch.cc ++++ b/gcc/config/loongarch/loongarch.cc +@@ -4351,10 +4351,10 @@ void + loongarch_vector_costs::finish_cost (const vector_costs *scalar_costs) + { + loop_vec_info loop_vinfo = dyn_cast<loop_vec_info> (m_vinfo); ++ + if (loop_vinfo) +- { +- m_suggested_unroll_factor = determine_suggested_unroll_factor (loop_vinfo); +- } ++ m_suggested_unroll_factor ++ = determine_suggested_unroll_factor (loop_vinfo); + + vector_costs::finish_cost (scalar_costs); + } +@@ -4420,6 +4420,7 @@ loongarch_subword (rtx op, bool high_p) + return simplify_gen_subreg (word_mode, op, mode, byte); + } + ++static bool loongarch_split_vector_move_p (rtx dest, rtx src); + /* Return true if a move from SRC to DEST should be split into two. + SPLIT_TYPE describes the split condition. */ + +@@ -4441,13 +4442,11 @@ loongarch_split_move_p (rtx dest, rtx src) + return false; + } + +- /* Check if LSX moves need splitting. */ +- if (LSX_SUPPORTED_MODE_P (GET_MODE (dest))) +- return loongarch_split_128bit_move_p (dest, src); + +- /* Check if LASX moves need splitting. */ +- if (LASX_SUPPORTED_MODE_P (GET_MODE (dest))) +- return loongarch_split_256bit_move_p (dest, src); ++ /* Check if vector moves need splitting. */ ++ if (LSX_SUPPORTED_MODE_P (GET_MODE (dest)) ++ || LASX_SUPPORTED_MODE_P (GET_MODE (dest))) ++ return loongarch_split_vector_move_p (dest, src); + + /* Otherwise split all multiword moves. */ + return size > UNITS_PER_WORD; +@@ -4460,10 +4459,9 @@ void + loongarch_split_move (rtx dest, rtx src) + { + gcc_checking_assert (loongarch_split_move_p (dest, src)); +- if (LSX_SUPPORTED_MODE_P (GET_MODE (dest))) +- loongarch_split_128bit_move (dest, src); +- else if (LASX_SUPPORTED_MODE_P (GET_MODE (dest))) +- loongarch_split_256bit_move (dest, src); ++ if (LSX_SUPPORTED_MODE_P (GET_MODE (dest)) ++ || LASX_SUPPORTED_MODE_P (GET_MODE (dest))) ++ loongarch_split_vector_move (dest, src); + else + gcc_unreachable (); + } +@@ -4585,224 +4583,117 @@ loongarch_output_move_index_float (rtx x, machine_mode mode, bool ldr) + + return insnldrindex-2; + } +-/* Return true if a 128-bit move from SRC to DEST should be split. */ +- +-bool +-loongarch_split_128bit_move_p (rtx dest, rtx src) +-{ +- /* LSX-to-LSX moves can be done in a single instruction. */ +- if (FP_REG_RTX_P (src) && FP_REG_RTX_P (dest)) +- return false; +- +- /* Check for LSX loads and stores. */ +- if (FP_REG_RTX_P (dest) && MEM_P (src)) +- return false; +- if (FP_REG_RTX_P (src) && MEM_P (dest)) +- return false; +- +- /* Check for LSX set to an immediate const vector with valid replicated +- element. */ +- if (FP_REG_RTX_P (dest) +- && loongarch_const_vector_same_int_p (src, GET_MODE (src), -512, 511)) +- return false; +- +- /* Check for LSX load zero immediate. */ +- if (FP_REG_RTX_P (dest) && src == CONST0_RTX (GET_MODE (src))) +- return false; +- +- return true; +-} +- +-/* Return true if a 256-bit move from SRC to DEST should be split. */ ++/* Return true if a vector move from SRC to DEST should be split. */ + +-bool +-loongarch_split_256bit_move_p (rtx dest, rtx src) ++static bool ++loongarch_split_vector_move_p (rtx dest, rtx src) + { +- /* LSX-to-LSX moves can be done in a single instruction. */ ++ /* Vector moves can be done in a single instruction. */ + if (FP_REG_RTX_P (src) && FP_REG_RTX_P (dest)) + return false; + +- /* Check for LSX loads and stores. */ ++ /* Check for vector loads and stores. */ + if (FP_REG_RTX_P (dest) && MEM_P (src)) + return false; + if (FP_REG_RTX_P (src) && MEM_P (dest)) + return false; + +- /* Check for LSX set to an immediate const vector with valid replicated ++ /* Check for vector set to an immediate const vector with valid replicated + element. */ + if (FP_REG_RTX_P (dest) + && loongarch_const_vector_same_int_p (src, GET_MODE (src), -512, 511)) + return false; + +- /* Check for LSX load zero immediate. */ ++ /* Check for vector load zero immediate. */ + if (FP_REG_RTX_P (dest) && src == CONST0_RTX (GET_MODE (src))) + return false; + + return true; + } + +-/* Split a 128-bit move from SRC to DEST. */ ++/* Split a vector move from SRC to DEST. */ + + void +-loongarch_split_128bit_move (rtx dest, rtx src) ++loongarch_split_vector_move (rtx dest, rtx src) + { + int byte, index; +- rtx low_dest, low_src, d, s; ++ rtx s, d; ++ machine_mode mode = GET_MODE (dest); ++ bool lsx_p = LSX_SUPPORTED_MODE_P (mode); + + if (FP_REG_RTX_P (dest)) + { + gcc_assert (!MEM_P (src)); + +- rtx new_dest = dest; +- if (!TARGET_64BIT) +- { +- if (GET_MODE (dest) != V4SImode) +- new_dest = simplify_gen_subreg (V4SImode, dest, GET_MODE (dest), 0); +- } +- else +- { +- if (GET_MODE (dest) != V2DImode) +- new_dest = simplify_gen_subreg (V2DImode, dest, GET_MODE (dest), 0); +- } +- +- for (byte = 0, index = 0; byte < GET_MODE_SIZE (TImode); +- byte += UNITS_PER_WORD, index++) +- { +- s = loongarch_subword_at_byte (src, byte); +- if (!TARGET_64BIT) +- emit_insn (gen_lsx_vinsgr2vr_w (new_dest, s, new_dest, +- GEN_INT (1 << index))); +- else +- emit_insn (gen_lsx_vinsgr2vr_d (new_dest, s, new_dest, +- GEN_INT (1 << index))); +- } +- } +- else if (FP_REG_RTX_P (src)) +- { +- gcc_assert (!MEM_P (dest)); +- +- rtx new_src = src; +- if (!TARGET_64BIT) +- { +- if (GET_MODE (src) != V4SImode) +- new_src = simplify_gen_subreg (V4SImode, src, GET_MODE (src), 0); +- } +- else +- { +- if (GET_MODE (src) != V2DImode) +- new_src = simplify_gen_subreg (V2DImode, src, GET_MODE (src), 0); +- } ++ rtx (*gen_vinsgr2vr_d) (rtx, rtx, rtx, rtx); + +- for (byte = 0, index = 0; byte < GET_MODE_SIZE (TImode); +- byte += UNITS_PER_WORD, index++) +- { +- d = loongarch_subword_at_byte (dest, byte); +- if (!TARGET_64BIT) +- emit_insn (gen_lsx_vpickve2gr_w (d, new_src, GEN_INT (index))); +- else +- emit_insn (gen_lsx_vpickve2gr_d (d, new_src, GEN_INT (index))); +- } +- } +- else +- { +- low_dest = loongarch_subword_at_byte (dest, 0); +- low_src = loongarch_subword_at_byte (src, 0); +- gcc_assert (REG_P (low_dest) && REG_P (low_src)); +- /* Make sure the source register is not written before reading. */ +- if (REGNO (low_dest) <= REGNO (low_src)) ++ if (lsx_p) + { +- for (byte = 0; byte < GET_MODE_SIZE (TImode); +- byte += UNITS_PER_WORD) +- { +- d = loongarch_subword_at_byte (dest, byte); +- s = loongarch_subword_at_byte (src, byte); +- loongarch_emit_move (d, s); +- } ++ mode = V2DImode; ++ gen_vinsgr2vr_d = gen_lsx_vinsgr2vr_d; + } + else + { +- for (byte = GET_MODE_SIZE (TImode) - UNITS_PER_WORD; byte >= 0; +- byte -= UNITS_PER_WORD) +- { +- d = loongarch_subword_at_byte (dest, byte); +- s = loongarch_subword_at_byte (src, byte); +- loongarch_emit_move (d, s); +- } ++ mode = V4DImode; ++ gen_vinsgr2vr_d = gen_lasx_xvinsgr2vr_d; + } +- } +-} +- +-/* Split a 256-bit move from SRC to DEST. */ +- +-void +-loongarch_split_256bit_move (rtx dest, rtx src) +-{ +- int byte, index; +- rtx low_dest, low_src, d, s; +- +- if (FP_REG_RTX_P (dest)) +- { +- gcc_assert (!MEM_P (src)); + + rtx new_dest = dest; +- if (!TARGET_64BIT) +- { +- if (GET_MODE (dest) != V8SImode) +- new_dest = simplify_gen_subreg (V8SImode, dest, GET_MODE (dest), 0); +- } +- else +- { +- if (GET_MODE (dest) != V4DImode) +- new_dest = simplify_gen_subreg (V4DImode, dest, GET_MODE (dest), 0); +- } ++ ++ if (GET_MODE (dest) != mode) ++ new_dest = simplify_gen_subreg (mode, dest, GET_MODE (dest), 0); + + for (byte = 0, index = 0; byte < GET_MODE_SIZE (GET_MODE (dest)); + byte += UNITS_PER_WORD, index++) + { + s = loongarch_subword_at_byte (src, byte); +- if (!TARGET_64BIT) +- emit_insn (gen_lasx_xvinsgr2vr_w (new_dest, s, new_dest, +- GEN_INT (1 << index))); +- else +- emit_insn (gen_lasx_xvinsgr2vr_d (new_dest, s, new_dest, +- GEN_INT (1 << index))); ++ emit_insn (gen_vinsgr2vr_d (new_dest, s, new_dest, ++ GEN_INT (1 << index))); + } + } + else if (FP_REG_RTX_P (src)) + { + gcc_assert (!MEM_P (dest)); + +- rtx new_src = src; +- if (!TARGET_64BIT) ++ rtx (*gen_vpickve2gr_d) (rtx, rtx, rtx); ++ ++ if (lsx_p) + { +- if (GET_MODE (src) != V8SImode) +- new_src = simplify_gen_subreg (V8SImode, src, GET_MODE (src), 0); ++ mode = V2DImode; ++ gen_vpickve2gr_d = gen_lsx_vpickve2gr_d; + } + else + { +- if (GET_MODE (src) != V4DImode) +- new_src = simplify_gen_subreg (V4DImode, src, GET_MODE (src), 0); ++ mode = V4DImode; ++ gen_vpickve2gr_d = gen_lasx_xvpickve2gr_d; + } + ++ rtx new_src = src; ++ if (GET_MODE (src) != mode) ++ new_src = simplify_gen_subreg (mode, src, GET_MODE (src), 0); ++ + for (byte = 0, index = 0; byte < GET_MODE_SIZE (GET_MODE (src)); + byte += UNITS_PER_WORD, index++) + { + d = loongarch_subword_at_byte (dest, byte); +- if (!TARGET_64BIT) +- emit_insn (gen_lsx_vpickve2gr_w (d, new_src, GEN_INT (index))); +- else +- emit_insn (gen_lsx_vpickve2gr_d (d, new_src, GEN_INT (index))); ++ emit_insn (gen_vpickve2gr_d (d, new_src, GEN_INT (index))); + } + } + else + { ++ /* This part of the code is designed to handle the following situations: ++ (set (reg:V2DI 4 $r4) ++ (reg:V2DI 6 $r6)) ++ The trigger test case is lsx-mov-1.c. */ ++ rtx low_dest, low_src; ++ + low_dest = loongarch_subword_at_byte (dest, 0); + low_src = loongarch_subword_at_byte (src, 0); + gcc_assert (REG_P (low_dest) && REG_P (low_src)); + /* Make sure the source register is not written before reading. */ + if (REGNO (low_dest) <= REGNO (low_src)) + { +- for (byte = 0; byte < GET_MODE_SIZE (TImode); ++ for (byte = 0; byte < GET_MODE_SIZE (GET_MODE (dest)); + byte += UNITS_PER_WORD) + { + d = loongarch_subword_at_byte (dest, byte); +@@ -4812,8 +4703,8 @@ loongarch_split_256bit_move (rtx dest, rtx src) + } + else + { +- for (byte = GET_MODE_SIZE (TImode) - UNITS_PER_WORD; byte >= 0; +- byte -= UNITS_PER_WORD) ++ for (byte = GET_MODE_SIZE (GET_MODE (dest)) - UNITS_PER_WORD; ++ byte >= 0; byte -= UNITS_PER_WORD) + { + d = loongarch_subword_at_byte (dest, byte); + s = loongarch_subword_at_byte (src, byte); +@@ -7603,7 +7494,7 @@ loongarch_global_init (void) + + /* Function to allocate machine-dependent function status. */ + init_machine_status = &loongarch_init_machine_status; +-}; ++} + + static void + loongarch_reg_init (void) +diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md +index 16f9f37c8..8bcb43042 100644 +--- a/gcc/config/loongarch/loongarch.md ++++ b/gcc/config/loongarch/loongarch.md +@@ -2406,6 +2406,7 @@ + (set (match_operand:DF 0 "nonimmediate_operand" "=r,r,m") + (match_operand:DF 1 "move_operand" "rG,m,rG")) + "(TARGET_SOFT_FLOAT || TARGET_SINGLE_FLOAT) ++ && TARGET_64BIT + && (register_operand (operands0, DFmode) + || reg_or_0_operand (operands1, DFmode))" + { return loongarch_output_move (operands0, operands1); } +-- +2.43.0 +
View file
_service:tar_scm:0177-Backport-SME-AArch64-Rewrite-simd-move-immediate-pat.patch
Added
@@ -0,0 +1,167 @@ +From 7d40978965ff893871a79f5f624f54ae02a34a8b Mon Sep 17 00:00:00 2001 +From: Tamar Christina <tamar.christina@arm.com> +Date: Wed, 18 Oct 2023 09:34:01 +0100 +Subject: PATCH 078/157 BackportSME AArch64: Rewrite simd move immediate + patterns to new syntax + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=04227acbe9e6c60d1e314a6b4f2d949c07f30baa + +This rewrites the simd MOV patterns to use the new compact syntax. +No change in semantics is expected. This will be needed in follow on patches. + +This also merges the splits into the define_insn which will also be needed soon. + +gcc/ChangeLog: + + PR tree-optimization/109154 + * config/aarch64/aarch64-simd.md (*aarch64_simd_mov<VDMOV:mode>): + Rewrite to new syntax. + (*aarch64_simd_mov<VQMOV:mode): Rewrite to new syntax and merge in + splits. +--- + gcc/config/aarch64/aarch64-simd.md | 116 ++++++++++++----------------- + 1 file changed, 47 insertions(+), 69 deletions(-) + +diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md +index b5c52ba16..1f4b30642 100644 +--- a/gcc/config/aarch64/aarch64-simd.md ++++ b/gcc/config/aarch64/aarch64-simd.md +@@ -115,54 +115,59 @@ + ) + + (define_insn "*aarch64_simd_mov<VDMOV:mode>" +- (set (match_operand:VDMOV 0 "nonimmediate_operand" +- "=w, r, m, m, m, w, ?r, ?w, ?r, w, w") +- (match_operand:VDMOV 1 "general_operand" +- "m, m, Dz, w, r, w, w, r, r, Dn, Dz")) ++ (set (match_operand:VDMOV 0 "nonimmediate_operand") ++ (match_operand:VDMOV 1 "general_operand")) + "TARGET_FLOAT + && (register_operand (operands0, <MODE>mode) + || aarch64_simd_reg_or_zero (operands1, <MODE>mode))" +- "@ +- ldr\t%d0, %1 +- ldr\t%x0, %1 +- str\txzr, %0 +- str\t%d1, %0 +- str\t%x1, %0 +- * return TARGET_SIMD ? \"mov\t%0.<Vbtype>, %1.<Vbtype>\" : \"fmov\t%d0, %d1\"; +- * return TARGET_SIMD ? \"umov\t%0, %1.d0\" : \"fmov\t%x0, %d1\"; +- fmov\t%d0, %1 +- mov\t%0, %1 +- * return aarch64_output_simd_mov_immediate (operands1, 64); +- fmov\t%d0, xzr" +- (set_attr "type" "neon_load1_1reg<q>, load_8, store_8, neon_store1_1reg<q>,\ +- store_8, neon_logic<q>, neon_to_gp<q>, f_mcr,\ +- mov_reg, neon_move<q>, f_mcr") +- (set_attr "arch" "*,*,*,*,*,*,*,*,*,simd,*") +-) +- +-(define_insn "*aarch64_simd_mov<VQMOV:mode>" +- (set (match_operand:VQMOV 0 "nonimmediate_operand" +- "=w, Umn, m, w, ?r, ?w, ?r, w, w") +- (match_operand:VQMOV 1 "general_operand" +- "m, Dz, w, w, w, r, r, Dn, Dz")) ++ {@ cons: =0, 1; attrs: type, arch ++ w , m ; neon_load1_1reg<q> , * ldr\t%d0, %1 ++ r , m ; load_8 , * ldr\t%x0, %1 ++ m , Dz; store_8 , * str\txzr, %0 ++ m , w ; neon_store1_1reg<q>, * str\t%d1, %0 ++ m , r ; store_8 , * str\t%x1, %0 ++ w , w ; neon_logic<q> , simd mov\t%0.<Vbtype>, %1.<Vbtype> ++ w , w ; neon_logic<q> , * fmov\t%d0, %d1 ++ ?r, w ; neon_to_gp<q> , simd umov\t%0, %1.d0 ++ ?r, w ; neon_to_gp<q> , * fmov\t%x0, %d1 ++ ?w, r ; f_mcr , * fmov\t%d0, %1 ++ ?r, r ; mov_reg , * mov\t%0, %1 ++ w , Dn; neon_move<q> , simd << aarch64_output_simd_mov_immediate (operands1, 64); ++ w , Dz; f_mcr , * fmov\t%d0, xzr ++ } ++) ++ ++(define_insn_and_split "*aarch64_simd_mov<VQMOV:mode>" ++ (set (match_operand:VQMOV 0 "nonimmediate_operand") ++ (match_operand:VQMOV 1 "general_operand")) + "TARGET_FLOAT + && (register_operand (operands0, <MODE>mode) + || aarch64_simd_reg_or_zero (operands1, <MODE>mode))" +- "@ +- ldr\t%q0, %1 +- stp\txzr, xzr, %0 +- str\t%q1, %0 +- mov\t%0.<Vbtype>, %1.<Vbtype> +- # +- # +- # +- * return aarch64_output_simd_mov_immediate (operands1, 128); +- fmov\t%d0, xzr" +- (set_attr "type" "neon_load1_1reg<q>, store_16, neon_store1_1reg<q>,\ +- neon_logic<q>, multiple, multiple,\ +- multiple, neon_move<q>, fmov") +- (set_attr "length" "4,4,4,4,8,8,8,4,4") +- (set_attr "arch" "*,*,*,simd,*,*,*,simd,*") ++ {@ cons: =0, 1; attrs: type, arch, length ++ w , m ; neon_load1_1reg<q> , * , 4 ldr\t%q0, %1 ++ Umn, Dz; store_16 , * , 4 stp\txzr, xzr, %0 ++ m , w ; neon_store1_1reg<q>, * , 4 str\t%q1, %0 ++ w , w ; neon_logic<q> , simd, 4 mov\t%0.<Vbtype>, %1.<Vbtype> ++ ?r , w ; multiple , * , 8 # ++ ?w , r ; multiple , * , 8 # ++ ?r , r ; multiple , * , 8 # ++ w , Dn; neon_move<q> , simd, 4 << aarch64_output_simd_mov_immediate (operands1, 128); ++ w , Dz; fmov , * , 4 fmov\t%d0, xzr ++ } ++ "&& reload_completed ++ && (REG_P (operands0) ++ && REG_P (operands1) ++ && !(FP_REGNUM_P (REGNO (operands0)) ++ && FP_REGNUM_P (REGNO (operands1))))" ++ (const_int 0) ++ { ++ if (GP_REGNUM_P (REGNO (operands0)) ++ && GP_REGNUM_P (REGNO (operands1))) ++ aarch64_simd_emit_reg_reg_move (operands, DImode, 2); ++ else ++ aarch64_split_simd_move (operands0, operands1); ++ DONE; ++ } + ) + + ;; When storing lane zero we can use the normal STR and its more permissive +@@ -238,33 +243,6 @@ + (set_attr "type" "neon_stp_q") + ) + +- +-(define_split +- (set (match_operand:VQMOV 0 "register_operand" "") +- (match_operand:VQMOV 1 "register_operand" "")) +- "TARGET_FLOAT +- && reload_completed +- && GP_REGNUM_P (REGNO (operands0)) +- && GP_REGNUM_P (REGNO (operands1))" +- (const_int 0) +-{ +- aarch64_simd_emit_reg_reg_move (operands, DImode, 2); +- DONE; +-}) +- +-(define_split +- (set (match_operand:VQMOV 0 "register_operand" "") +- (match_operand:VQMOV 1 "register_operand" "")) +- "TARGET_FLOAT +- && reload_completed +- && ((FP_REGNUM_P (REGNO (operands0)) && GP_REGNUM_P (REGNO (operands1))) +- || (GP_REGNUM_P (REGNO (operands0)) && FP_REGNUM_P (REGNO (operands1))))" +- (const_int 0) +-{ +- aarch64_split_simd_move (operands0, operands1); +- DONE; +-}) +- + (define_expand "@aarch64_split_simd_mov<mode>" + (set (match_operand:VQMOV 0) + (match_operand:VQMOV 1)) +-- +2.33.0 +
View file
_service:tar_scm:0177-LoongArch-Expand-some-SImode-operations-through-si3_.patch
Added
@@ -0,0 +1,364 @@ +From 34c8e935780d43a797e403ca6604115ec393f0e6 Mon Sep 17 00:00:00 2001 +From: Xi Ruoyao <xry111@xry111.site> +Date: Sat, 20 Jul 2024 20:38:13 +0800 +Subject: PATCH 177/188 LoongArch: Expand some SImode operations through + "si3_extend" instructions if TARGET_64BIT + +We already had "si3_extend" insns and we hoped the fwprop or combine +passes can use them to remove unnecessary sign extensions. But this +does not always work: for cases like x << 1 | y, the compiler +tends to do + + (sign_extend:DI + (ior:SI (ashift:SI (reg:SI $r4) + (const_int 1)) + (reg:SI $r5))) + +instead of + + (ior:DI (sign_extend:DI (ashift:SI (reg:SI $r4) (const_int 1))) + (sign_extend:DI (reg:SI $r5))) + +So we cannot match the ashlsi3_extend instruction here and we get: + + slli.w $r4,$r4,1 + or $r4,$r5,$r4 + slli.w $r4,$r4,0 # <= redundant + jr $r1 + +To eliminate this redundant extension we need to turn SImode shift etc. +to DImode "si3_extend" operations earlier, when we expand the SImode +operation. We are already doing this for addition, now do it for +shifts, rotates, substract, multiplication, division, and modulo as +well. + +The bytepick.w definition for TARGET_64BIT needs to be adjusted so it +won't be undone by the shift expanding. + +gcc/ChangeLog: + + * config/loongarch/loongarch.md (optab): Add (rotatert "rotr"). + (<optab:any_shift><mode>3, <optab:any_div><mode>3, + sub<mode>3, rotr<mode>3, mul<mode>3): Add a "*" to the insn name + so we can redefine the names with define_expand. + (*<optab:any_shift>si3_extend): Remove "*" so we can use them + in expanders. + (*subsi3_extended, *mulsi3_extended): Likewise, also remove the + trailing "ed" for consistency. + (*<optab:any_div>si3_extended): Add mode for sign_extend to + prevent an ICE using it in expanders. + (shift_w, arith_w): New define_code_iterator. + (<optab:any_w><mode>3): New define_expand. Expand with + <optab:any_w>si3_extend for SImode if TARGET_64BIT. + (<optab:arith_w><mode>3): Likewise. + (mul<mode>3): Expand to mulsi3_extended for SImode if + TARGET_64BIT and ISA_HAS_DIV32. + (<optab:any_div><mode>3): Expand to <optab:any_div>si3_extended + for SImode if TARGET_64BIT. + (rotl<mode>3): Expand to rotrsi3_extend for SImode if + TARGET_64BIT. + (bytepick_w_<bytepick_imm>): Add mode for lshiftrt and ashift. + (bitsize, bytepick_imm, bytepick_w_ashift_amount): New + define_mode_attr. + (bytepick_w_<bytepick_imm>_extend): Adjust for the RTL change + caused by 32-bit shift expanding. Now bytepick_imm only covers + 2 and 3, separate one remaining case to ... + (bytepick_w_1_extend): ... here, new define_insn. + +gcc/testsuite/ChangeLog: + + * gcc.target/loongarch/bitwise_extend.c: New test. +--- + gcc/config/loongarch/loongarch.md | 131 +++++++++++++++--- + .../gcc.target/loongarch/bitwise_extend.c | 45 ++++++ + 2 files changed, 154 insertions(+), 22 deletions(-) + create mode 100644 gcc/testsuite/gcc.target/loongarch/bitwise_extend.c + +diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md +index 8bcb43042..6915dab0e 100644 +--- a/gcc/config/loongarch/loongarch.md ++++ b/gcc/config/loongarch/loongarch.md +@@ -546,6 +546,7 @@ + (define_code_attr optab (ashift "ashl") + (ashiftrt "ashr") + (lshiftrt "lshr") ++ (rotatert "rotr") + (ior "ior") + (xor "xor") + (and "and") +@@ -624,6 +625,49 @@ + (48 "6") + (56 "7")) + ++;; Expand some 32-bit operations to si3_extend operations if TARGET_64BIT ++;; so the redundant sign extension can be removed if the output is used as ++;; an input of a bitwise operation. Note plus, rotl, and div are handled ++;; separately. ++(define_code_iterator shift_w any_shift rotatert) ++(define_code_iterator arith_w minus mult) ++ ++(define_expand "<optab><mode>3" ++ (set (match_operand:GPR 0 "register_operand" "=r") ++ (shift_w:GPR (match_operand:GPR 1 "register_operand" "r") ++ (match_operand:SI 2 "arith_operand" "rI"))) ++ "" ++{ ++ if (TARGET_64BIT && <MODE>mode == SImode) ++ { ++ rtx t = gen_reg_rtx (DImode); ++ emit_insn (gen_<optab>si3_extend (t, operands1, operands2)); ++ t = gen_lowpart (SImode, t); ++ SUBREG_PROMOTED_VAR_P (t) = 1; ++ SUBREG_PROMOTED_SET (t, SRP_SIGNED); ++ emit_move_insn (operands0, t); ++ DONE; ++ } ++}) ++ ++(define_expand "<optab><mode>3" ++ (set (match_operand:GPR 0 "register_operand" "=r") ++ (arith_w:GPR (match_operand:GPR 1 "register_operand" "r") ++ (match_operand:GPR 2 "register_operand" "r"))) ++ "" ++{ ++ if (TARGET_64BIT && <MODE>mode == SImode) ++ { ++ rtx t = gen_reg_rtx (DImode); ++ emit_insn (gen_<optab>si3_extend (t, operands1, operands2)); ++ t = gen_lowpart (SImode, t); ++ SUBREG_PROMOTED_VAR_P (t) = 1; ++ SUBREG_PROMOTED_SET (t, SRP_SIGNED); ++ emit_move_insn (operands0, t); ++ DONE; ++ } ++}) ++ + ;; + ;; .................... + ;; +@@ -781,7 +825,7 @@ + (set_attr "type" "fadd") + (set_attr "mode" "<UNITMODE>")) + +-(define_insn "sub<mode>3" ++(define_insn "*sub<mode>3" + (set (match_operand:GPR 0 "register_operand" "=r") + (minus:GPR (match_operand:GPR 1 "register_operand" "r") + (match_operand:GPR 2 "register_operand" "r"))) +@@ -791,7 +835,7 @@ + (set_attr "mode" "<MODE>")) + + +-(define_insn "*subsi3_extended" ++(define_insn "subsi3_extend" + (set (match_operand:DI 0 "register_operand" "=r") + (sign_extend:DI + (minus:SI (match_operand:SI 1 "reg_or_0_operand" "rJ") +@@ -818,7 +862,7 @@ + (set_attr "type" "fmul") + (set_attr "mode" "<MODE>")) + +-(define_insn "mul<mode>3" ++(define_insn "*mul<mode>3" + (set (match_operand:GPR 0 "register_operand" "=r") + (mult:GPR (match_operand:GPR 1 "register_operand" "r") + (match_operand:GPR 2 "register_operand" "r"))) +@@ -827,7 +871,7 @@ + (set_attr "type" "imul") + (set_attr "mode" "<MODE>")) + +-(define_insn "*mulsi3_extended" ++(define_insn "mulsi3_extend" + (set (match_operand:DI 0 "register_operand" "=r") + (sign_extend:DI + (mult:SI (match_operand:SI 1 "register_operand" "r") +@@ -1001,8 +1045,19 @@ + (match_operand:GPR 2 "register_operand"))) + "" + { +- if (GET_MODE (operands0) == SImode && TARGET_64BIT && !ISA_HAS_DIV32) ++ if (GET_MODE (operands0) == SImode && TARGET_64BIT) + { ++ if (ISA_HAS_DIV32) ++ { ++ rtx t = gen_reg_rtx (DImode); ++ emit_insn (gen_<optab>si3_extended (t, operands1, operands2)); ++ t = gen_lowpart (SImode, t); ++ SUBREG_PROMOTED_VAR_P (t) = 1; ++ SUBREG_PROMOTED_SET (t, SRP_SIGNED); ++ emit_move_insn (operands0, t); ++ DONE; ++ } ++ + rtx reg1 = gen_reg_rtx (DImode); + rtx reg2 = gen_reg_rtx (DImode); + rtx rd = gen_reg_rtx (DImode); +@@ -1038,7 +1093,7 @@ + + (define_insn "<optab>si3_extended" + (set (match_operand:DI 0 "register_operand" "=r,&r,&r") +- (sign_extend ++ (sign_extend:DI + (any_div:SI (match_operand:SI 1 "register_operand" "r,r,0") + (match_operand:SI 2 "register_operand" "r,r,r")))) + "TARGET_64BIT && ISA_HAS_DIV32" +@@ -2981,7 +3036,7 @@ + ;; + ;; .................... + +-(define_insn "<optab><mode>3" ++(define_insn "*<optab><mode>3" + (set (match_operand:GPR 0 "register_operand" "=r") + (any_shift:GPR (match_operand:GPR 1 "register_operand" "r") + (match_operand:SI 2 "arith_operand" "rI"))) +@@ -2996,7 +3051,7 @@ + (set_attr "type" "shift") + (set_attr "mode" "<MODE>")) + +-(define_insn "*<optab>si3_extend" ++(define_insn "<optab>si3_extend" + (set (match_operand:DI 0 "register_operand" "=r") + (sign_extend:DI + (any_shift:SI (match_operand:SI 1 "register_operand" "r") +@@ -3011,7 +3066,7 @@ + (set_attr "type" "shift") + (set_attr "mode" "SI")) + +-(define_insn "rotr<mode>3" ++(define_insn "*rotr<mode>3" + (set (match_operand:GPR 0 "register_operand" "=r,r") + (rotatert:GPR (match_operand:GPR 1 "register_operand" "r,r") + (match_operand:SI 2 "arith_operand" "r,I"))) +@@ -3040,6 +3095,19 @@ + "" + { + operands3 = gen_reg_rtx (SImode); ++ ++ if (TARGET_64BIT && <MODE>mode == SImode) ++ { ++ rtx t = gen_reg_rtx (DImode); ++ ++ emit_insn (gen_negsi2 (operands3, operands2)); ++ emit_insn (gen_rotrsi3_extend (t, operands1, operands3)); ++ t = gen_lowpart (SImode, t); ++ SUBREG_PROMOTED_VAR_P (t) = 1; ++ SUBREG_PROMOTED_SET (t, SRP_SIGNED); ++ emit_move_insn (operands0, t); ++ DONE; ++ } + }); + + ;; The following templates were added to generate "bstrpick.d + alsl.d" +@@ -4061,26 +4129,45 @@ + + (define_insn "bytepick_w_<bytepick_imm>" + (set (match_operand:SI 0 "register_operand" "=r") +- (ior:SI (lshiftrt (match_operand:SI 1 "register_operand" "r") +- (const_int <bytepick_w_lshiftrt_amount>)) +- (ashift (match_operand:SI 2 "register_operand" "r") +- (const_int bytepick_w_ashift_amount)))) ++ (ior:SI (lshiftrt:SI (match_operand:SI 1 "register_operand" "r") ++ (const_int <bytepick_w_lshiftrt_amount>)) ++ (ashift:SI (match_operand:SI 2 "register_operand" "r") ++ (const_int bytepick_w_ashift_amount)))) + "" + "bytepick.w\t%0,%1,%2,<bytepick_imm>" + (set_attr "mode" "SI")) + ++(define_mode_attr bitsize (QI "8") (HI "16")) ++(define_mode_attr bytepick_imm (QI "3") (HI "2")) ++(define_mode_attr bytepick_w_ashift_amount (QI "24") (HI "16")) ++ + (define_insn "bytepick_w_<bytepick_imm>_extend" + (set (match_operand:DI 0 "register_operand" "=r") +- (sign_extend:DI +- (subreg:SI +- (ior:DI (subreg:DI (lshiftrt +- (match_operand:SI 1 "register_operand" "r") +- (const_int <bytepick_w_lshiftrt_amount>)) 0) +- (subreg:DI (ashift +- (match_operand:SI 2 "register_operand" "r") +- (const_int bytepick_w_ashift_amount)) 0)) 0))) ++ (ior:DI ++ (ashift:DI ++ (sign_extend:DI ++ (subreg:SHORT (match_operand:DI 1 "register_operand" "r") 0)) ++ (const_int <bytepick_w_ashift_amount>)) ++ (zero_extract:DI (match_operand:DI 2 "register_operand" "r") ++ (const_int <bytepick_w_ashift_amount>) ++ (const_int <bitsize>)))) + "TARGET_64BIT" +- "bytepick.w\t%0,%1,%2,<bytepick_imm>" ++ "bytepick.w\t%0,%2,%1,<bytepick_imm>" ++ (set_attr "mode" "SI")) ++ ++(define_insn "bytepick_w_1_extend" ++ (set (match_operand:DI 0 "register_operand" "=r") ++ (ior:DI ++ (ashift:DI ++ (sign_extract:DI (match_operand:DI 1 "register_operand" "r") ++ (const_int 24) ++ (const_int 0)) ++ (const_int 8)) ++ (zero_extract:DI (match_operand:DI 2 "register_operand" "r") ++ (const_int 8) ++ (const_int 24)))) ++ "TARGET_64BIT" ++ "bytepick.w\t%0,%2,%1,1" + (set_attr "mode" "SI")) + + (define_insn "bytepick_d_<bytepick_imm>" +diff --git a/gcc/testsuite/gcc.target/loongarch/bitwise_extend.c b/gcc/testsuite/gcc.target/loongarch/bitwise_extend.c +new file mode 100644 +index 000000000..c2bc489a7 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/bitwise_extend.c +@@ -0,0 +1,45 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -march=loongarch64 -mdiv32" } */ ++/* { dg-final { scan-assembler-not "slli\\.w" } } */ ++ ++int ++f1 (int a, int b) ++{ ++ return (a << b) | b; ++} ++ ++int ++f2 (int a, int b) ++{ ++ return (a - b) | b; ++} ++ ++int ++f3 (int a, int b) ++{ ++ return (a * b) | b; ++} ++ ++int ++f4 (int a, int b) ++{ ++ return (unsigned) a >> b | (unsigned) a << (32 - b) | b; ++} ++ ++int ++f5 (int a, int b) ++{ ++ return (unsigned) a << b | (unsigned) a >> (32 - b) | b; ++} ++ ++int ++f6 (int a, int b) ++{ ++ return (a % b) | b; ++} ++ ++int ++f7 (int a, int b) ++{ ++ return (a + b) | b; ++} +-- +2.43.0 +
View file
_service:tar_scm:0178-Backport-SME-AArch64-remove-test-comment-from-mov-mo.patch
Added
@@ -0,0 +1,34 @@ +From 883af5a13e648e74cb8d8722be6d4980e8bc8f48 Mon Sep 17 00:00:00 2001 +From: Tamar Christina <tamar.christina@arm.com> +Date: Tue, 20 Jun 2023 08:54:42 +0100 +Subject: PATCH 079/157 BackportSME AArch64: remove test comment from + *mov<mode>_aarch64 + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=36de416df8b3f109353e309011061fa66e872e3a + +I accidentally left a test comment in the final version of the patch. +This removes the comment. + +gcc/ChangeLog: + + * config/aarch64/aarch64.md (*mov<mode>_aarch64): Drop test comment. +--- + gcc/config/aarch64/aarch64.md | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md +index 29a665e45..1ec23fae8 100644 +--- a/gcc/config/aarch64/aarch64.md ++++ b/gcc/config/aarch64/aarch64.md +@@ -1213,7 +1213,7 @@ + m, r Z ; store_4 , * str<size>\\t%w1, %0 + m, w ; store_4 , * str\t%<size>1, %0 + r, w ; neon_to_gp<q> , simd umov\t%w0, %1.<v>0 +- r, w ; neon_to_gp<q> , nosimd fmov\t%w0, %s1 /*foo */ ++ r, w ; neon_to_gp<q> , nosimd fmov\t%w0, %s1 + w, r Z ; neon_from_gp<q>, simd dup\t%0.<Vallxd>, %w1 + w, r Z ; neon_from_gp<q>, nosimd fmov\t%s0, %w1 + w, w ; neon_dup , simd dup\t%<Vetype>0, %1.<v>0 +-- +2.33.0 +
View file
_service:tar_scm:0178-LoongArch-Relax-ins_zero_bitmask_operand-and-remove-.patch
Added
@@ -0,0 +1,123 @@ +From 8311e0053c8a9646b8798c53ae4a8f45d12c42c1 Mon Sep 17 00:00:00 2001 +From: Xi Ruoyao <xry111@xry111.site> +Date: Sun, 28 Jul 2024 17:02:49 +0800 +Subject: PATCH 178/188 LoongArch: Relax ins_zero_bitmask_operand and remove + and<mode>3_align + +In r15-1207 I was too stupid to realize we just need to relax +ins_zero_bitmask_operand to allow using bstrins for aligning, instead of +adding a new split. And, "> 12" in ins_zero_bitmask_operand also makes +no sense: it rejects bstrins for things like "x & ~4l" with no good +reason. + +So fix my errors now. + +gcc/ChangeLog: + + * config/loongarch/predicates.md (ins_zero_bitmask_operand): + Cover more cases that bstrins can benefit. + (high_bitmask_operand): Remove. + * config/loongarch/constraints.md (Yy): Remove. + * config/loongarch/loongarch.md (and<mode>3_align): Remove. + +gcc/testsuite/ChangeLog: + + * gcc.target/loongarch/bstrins-4.c: New test. +--- + gcc/config/loongarch/constraints.md | 4 ---- + gcc/config/loongarch/loongarch.md | 17 ----------------- + gcc/config/loongarch/predicates.md | 9 ++------- + gcc/testsuite/gcc.target/loongarch/bstrins-4.c | 9 +++++++++ + 4 files changed, 11 insertions(+), 28 deletions(-) + create mode 100644 gcc/testsuite/gcc.target/loongarch/bstrins-4.c + +diff --git a/gcc/config/loongarch/constraints.md b/gcc/config/loongarch/constraints.md +index f3f5776da..d653ea82a 100644 +--- a/gcc/config/loongarch/constraints.md ++++ b/gcc/config/loongarch/constraints.md +@@ -292,10 +292,6 @@ + "@internal" + (match_operand 0 "low_bitmask_operand")) + +-(define_constraint "Yy" +- "@internal" +- (match_operand 0 "high_bitmask_operand")) +- + (define_constraint "YI" + "@internal + A replicated vector const in which the replicated value is in the range +diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md +index 6915dab0e..1ebcfa0c7 100644 +--- a/gcc/config/loongarch/loongarch.md ++++ b/gcc/config/loongarch/loongarch.md +@@ -1588,23 +1588,6 @@ + (set_attr "move_type" "pick_ins") + (set_attr "mode" "<MODE>")) + +-(define_insn_and_split "and<mode>3_align" +- (set (match_operand:GPR 0 "register_operand" "=r") +- (and:GPR (match_operand:GPR 1 "register_operand" "r") +- (match_operand:GPR 2 "high_bitmask_operand" "Yy"))) +- "" +- "#" +- "" +- (set (match_dup 0) (match_dup 1)) +- (set (zero_extract:GPR (match_dup 0) (match_dup 2) (const_int 0)) +- (const_int 0)) +-{ +- int len; +- +- len = low_bitmask_len (<MODE>mode, ~INTVAL (operands2)); +- operands2 = GEN_INT (len); +-}) +- + (define_insn_and_split "*bstrins_<mode>_for_mask" + (set (match_operand:GPR 0 "register_operand" "=r") + (and:GPR (match_operand:GPR 1 "register_operand" "r") +diff --git a/gcc/config/loongarch/predicates.md b/gcc/config/loongarch/predicates.md +index 95be8a4fe..2b7f7ed47 100644 +--- a/gcc/config/loongarch/predicates.md ++++ b/gcc/config/loongarch/predicates.md +@@ -293,10 +293,6 @@ + (and (match_code "const_int") + (match_test "low_bitmask_len (mode, INTVAL (op)) > 12"))) + +-(define_predicate "high_bitmask_operand" +- (and (match_code "const_int") +- (match_test "low_bitmask_len (mode, ~INTVAL (op)) > 0"))) +- + (define_predicate "d_operand" + (and (match_code "reg") + (match_test "GP_REG_P (REGNO (op))"))) +@@ -406,11 +402,10 @@ + + (define_predicate "ins_zero_bitmask_operand" + (and (match_code "const_int") +- (match_test "INTVAL (op) != -1") +- (match_test "INTVAL (op) & 1") + (match_test "low_bitmask_len (mode, \ + ~UINTVAL (op) | (~UINTVAL(op) - 1)) \ +- > 12"))) ++ > 0") ++ (not (match_operand 0 "const_uns_arith_operand")))) + + (define_predicate "const_call_insn_operand" + (match_code "const,symbol_ref,label_ref") +diff --git a/gcc/testsuite/gcc.target/loongarch/bstrins-4.c b/gcc/testsuite/gcc.target/loongarch/bstrins-4.c +new file mode 100644 +index 000000000..0823cfc38 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/bstrins-4.c +@@ -0,0 +1,9 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -march=loongarch64 -mabi=lp64d" } */ ++/* { dg-final { scan-assembler "bstrins\\.d\t\\\$r4,\\\$r0,2,2" } } */ ++ ++long ++x (long a) ++{ ++ return a & ~4; ++} +-- +2.43.0 +
View file
_service:tar_scm:0179-Backport-SME-aarch64-Distinguish-streaming-compatibl.patch
Added
@@ -0,0 +1,1552 @@ +From 4a0e91dc27b30ae673ba132bf2be17a74bc89f31 Mon Sep 17 00:00:00 2001 +From: Richard Sandiford <richard.sandiford@arm.com> +Date: Tue, 5 Dec 2023 10:11:24 +0000 +Subject: PATCH 080/157 BackportSME aarch64: Distinguish + streaming-compatible AdvSIMD insns + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=c86ee4f683e05e5809597d96b5eeb261c9c92cac + +The vast majority of Advanced SIMD instructions are not +available in streaming mode, but some of the load/store/move +instructions are. This patch adds a new target feature macro +called TARGET_BASE_SIMD for this streaming-compatible subset. + +The vector-to-vector move instructions are not streaming-compatible, +so we need to use the SVE move instructions where enabled, or fall +back to the nofp16 handling otherwise. + +I haven't found a good way of testing the SVE EXT alternative +in aarch64_simd_mov_from_<mode>high, but I'd rather provide it +than not. + +gcc/ + * config/aarch64/aarch64.h (TARGET_BASE_SIMD): New macro. + (TARGET_SIMD): Require PSTATE.SM to be 0. + (AARCH64_ISA_SM_OFF): New macro. + * config/aarch64/aarch64.cc (aarch64_array_mode_supported_p): + Allow Advanced SIMD structure modes for TARGET_BASE_SIMD. + (aarch64_print_operand): Support '%Z'. + (aarch64_secondary_reload): Expect SVE moves to be used for + Advanced SIMD modes if SVE is enabled and non-streaming + Advanced SIMD isn't. + (aarch64_register_move_cost): Likewise. + (aarch64_simd_container_mode): Extend Advanced SIMD mode + handling to TARGET_BASE_SIMD. + (aarch64_expand_cpymem): Expand commentary. + * config/aarch64/aarch64.md (arches): Add base_simd and nobase_simd. + (arch_enabled): Handle it. + (*mov<mode>_aarch64): Extend UMOV alternative to TARGET_BASE_SIMD. + (*movti_aarch64): Use an SVE move instruction if non-streaming + SIMD isn't available. + (*mov<TFD:mode>_aarch64): Likewise. + (load_pair_dw_tftf): Extend to TARGET_BASE_SIMD. + (store_pair_dw_tftf): Likewise. + (loadwb_pair<TX:mode>_<P:mode>): Likewise. + (storewb_pair<TX:mode>_<P:mode>): Likewise. + * config/aarch64/aarch64-simd.md (*aarch64_simd_mov<VDMOV:mode>): + Allow UMOV in streaming mode. + (*aarch64_simd_mov<VQMOV:mode>): Use an SVE move instruction + if non-streaming SIMD isn't available. + (aarch64_store_lane0<mode>): Depend on TARGET_FLOAT rather than + TARGET_SIMD. + (aarch64_simd_mov_from_<mode>low): Likewise. Use fmov if + Advanced SIMD is completely disabled. + (aarch64_simd_mov_from_<mode>high): Use SVE EXT instructions if + non-streaming SIMD isn't available. + +gcc/testsuite/ + * gcc.target/aarch64/movdf_2.c: New test. + * gcc.target/aarch64/movdi_3.c: Likewise. + * gcc.target/aarch64/movhf_2.c: Likewise. + * gcc.target/aarch64/movhi_2.c: Likewise. + * gcc.target/aarch64/movqi_2.c: Likewise. + * gcc.target/aarch64/movsf_2.c: Likewise. + * gcc.target/aarch64/movsi_2.c: Likewise. + * gcc.target/aarch64/movtf_3.c: Likewise. + * gcc.target/aarch64/movtf_4.c: Likewise. + * gcc.target/aarch64/movti_3.c: Likewise. + * gcc.target/aarch64/movti_4.c: Likewise. + * gcc.target/aarch64/movv16qi_4.c: Likewise. + * gcc.target/aarch64/movv16qi_5.c: Likewise. + * gcc.target/aarch64/movv8qi_4.c: Likewise. + * gcc.target/aarch64/sme/arm_neon_1.c: Likewise. + * gcc.target/aarch64/sme/arm_neon_2.c: Likewise. + * gcc.target/aarch64/sme/arm_neon_3.c: Likewise. +--- + gcc/config/aarch64/aarch64-simd.md | 50 ++++++----- + gcc/config/aarch64/aarch64.cc | 16 ++-- + gcc/config/aarch64/aarch64.h | 12 ++- + gcc/config/aarch64/aarch64.md | 77 +++++++++-------- + gcc/testsuite/gcc.target/aarch64/movdf_2.c | 51 +++++++++++ + gcc/testsuite/gcc.target/aarch64/movdi_3.c | 59 +++++++++++++ + gcc/testsuite/gcc.target/aarch64/movhf_2.c | 53 ++++++++++++ + gcc/testsuite/gcc.target/aarch64/movhi_2.c | 61 +++++++++++++ + gcc/testsuite/gcc.target/aarch64/movqi_2.c | 59 +++++++++++++ + gcc/testsuite/gcc.target/aarch64/movsf_2.c | 51 +++++++++++ + gcc/testsuite/gcc.target/aarch64/movsi_2.c | 59 +++++++++++++ + gcc/testsuite/gcc.target/aarch64/movtf_3.c | 81 +++++++++++++++++ + gcc/testsuite/gcc.target/aarch64/movtf_4.c | 78 +++++++++++++++++ + gcc/testsuite/gcc.target/aarch64/movti_3.c | 86 +++++++++++++++++++ + gcc/testsuite/gcc.target/aarch64/movti_4.c | 83 ++++++++++++++++++ + gcc/testsuite/gcc.target/aarch64/movv16qi_4.c | 82 ++++++++++++++++++ + gcc/testsuite/gcc.target/aarch64/movv16qi_5.c | 79 +++++++++++++++++ + gcc/testsuite/gcc.target/aarch64/movv8qi_4.c | 55 ++++++++++++ + .../gcc.target/aarch64/sme/arm_neon_1.c | 13 +++ + .../gcc.target/aarch64/sme/arm_neon_2.c | 11 +++ + .../gcc.target/aarch64/sme/arm_neon_3.c | 11 +++ + 21 files changed, 1062 insertions(+), 65 deletions(-) + create mode 100644 gcc/testsuite/gcc.target/aarch64/movdf_2.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/movdi_3.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/movhf_2.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/movhi_2.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/movqi_2.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/movsf_2.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/movsi_2.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/movtf_3.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/movtf_4.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/movti_3.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/movti_4.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/movv16qi_4.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/movv16qi_5.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/movv8qi_4.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/arm_neon_1.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/arm_neon_2.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/arm_neon_3.c + +diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md +index 1f4b30642..62493cdfa 100644 +--- a/gcc/config/aarch64/aarch64-simd.md ++++ b/gcc/config/aarch64/aarch64-simd.md +@@ -121,19 +121,19 @@ + && (register_operand (operands0, <MODE>mode) + || aarch64_simd_reg_or_zero (operands1, <MODE>mode))" + {@ cons: =0, 1; attrs: type, arch +- w , m ; neon_load1_1reg<q> , * ldr\t%d0, %1 +- r , m ; load_8 , * ldr\t%x0, %1 +- m , Dz; store_8 , * str\txzr, %0 +- m , w ; neon_store1_1reg<q>, * str\t%d1, %0 +- m , r ; store_8 , * str\t%x1, %0 +- w , w ; neon_logic<q> , simd mov\t%0.<Vbtype>, %1.<Vbtype> +- w , w ; neon_logic<q> , * fmov\t%d0, %d1 +- ?r, w ; neon_to_gp<q> , simd umov\t%0, %1.d0 +- ?r, w ; neon_to_gp<q> , * fmov\t%x0, %d1 +- ?w, r ; f_mcr , * fmov\t%d0, %1 +- ?r, r ; mov_reg , * mov\t%0, %1 +- w , Dn; neon_move<q> , simd << aarch64_output_simd_mov_immediate (operands1, 64); +- w , Dz; f_mcr , * fmov\t%d0, xzr ++ w , m ; neon_load1_1reg<q> , * ldr\t%d0, %1 ++ r , m ; load_8 , * ldr\t%x0, %1 ++ m , Dz; store_8 , * str\txzr, %0 ++ m , w ; neon_store1_1reg<q>, * str\t%d1, %0 ++ m , r ; store_8 , * str\t%x1, %0 ++ w , w ; neon_logic<q> , simd mov\t%0.<Vbtype>, %1.<Vbtype> ++ w , w ; neon_logic<q> , * fmov\t%d0, %d1 ++ ?r, w ; neon_to_gp<q> , base_simd umov\t%0, %1.d0 ++ ?r, w ; neon_to_gp<q> , * fmov\t%x0, %d1 ++ ?w, r ; f_mcr , * fmov\t%d0, %1 ++ ?r, r ; mov_reg , * mov\t%0, %1 ++ w , Dn; neon_move<q> , simd << aarch64_output_simd_mov_immediate (operands1, 64); ++ w , Dz; f_mcr , * fmov\t%d0, xzr + } + ) + +@@ -148,6 +148,7 @@ + Umn, Dz; store_16 , * , 4 stp\txzr, xzr, %0 + m , w ; neon_store1_1reg<q>, * , 4 str\t%q1, %0 + w , w ; neon_logic<q> , simd, 4 mov\t%0.<Vbtype>, %1.<Vbtype> ++ w , w ; * , sve , 4 mov\t%Z0.d, %Z1.d + ?r , w ; multiple , * , 8 # + ?w , r ; multiple , * , 8 # + ?r , r ; multiple , * , 8 # +@@ -177,7 +178,7 @@ + (set (match_operand:<VEL> 0 "memory_operand" "=m") + (vec_select:<VEL> (match_operand:VALL_F16 1 "register_operand" "w") + (parallel (match_operand 2 "const_int_operand" "n")))) +- "TARGET_SIMD ++ "TARGET_FLOAT + && ENDIAN_LANE_N (<nunits>, INTVAL (operands2)) == 0" + "str\\t%<Vetype>1, %0" + (set_attr "type" "neon_store1_1reg<q>") +@@ -312,35 +313,38 @@ + ) + + (define_insn_and_split "aarch64_simd_mov_from_<mode>low" +- (set (match_operand:<VHALF> 0 "register_operand" "=w,?r") ++ (set (match_operand:<VHALF> 0 "register_operand" "=w,?r,?r") + (vec_select:<VHALF> +- (match_operand:VQMOV_NO2E 1 "register_operand" "w,w") ++ (match_operand:VQMOV_NO2E 1 "register_operand" "w,w,w") + (match_operand:VQMOV_NO2E 2 "vect_par_cnst_lo_half" ""))) +- "TARGET_SIMD" ++ "TARGET_FLOAT" + "@ + # +- umov\t%0, %1.d0" ++ umov\t%0, %1.d0 ++ fmov\t%0, %d1" + "&& reload_completed && aarch64_simd_register (operands0, <VHALF>mode)" + (set (match_dup 0) (match_dup 1)) + { + operands1 = aarch64_replace_reg_mode (operands1, <VHALF>mode); + } +- (set_attr "type" "mov_reg,neon_to_gp<q>") ++ (set_attr "type" "mov_reg,neon_to_gp<q>,f_mrc") ++ (set_attr "arch" "simd,base_simd,*") + (set_attr "length" "4") + ) + + (define_insn "aarch64_simd_mov_from_<mode>high" +- (set (match_operand:<VHALF> 0 "register_operand" "=w,?r,?r") ++ (set (match_operand:<VHALF> 0 "register_operand" "=w,w,?r,?r") + (vec_select:<VHALF> +- (match_operand:VQMOV_NO2E 1 "register_operand" "w,w,w") ++ (match_operand:VQMOV_NO2E 1 "register_operand" "w,w,w,w") + (match_operand:VQMOV_NO2E 2 "vect_par_cnst_hi_half" ""))) + "TARGET_FLOAT" + "@ + dup\t%d0, %1.d1 ++ ext\t%Z0.b, %Z0.b, %Z0.b, #8 + umov\t%0, %1.d1 + fmov\t%0, %1.d1" +- (set_attr "type" "neon_dup<q>,neon_to_gp<q>,f_mrc") +- (set_attr "arch" "simd,simd,*") ++ (set_attr "type" "neon_dup<q>,*,neon_to_gp<q>,f_mrc") ++ (set_attr "arch" "simd,sve,simd,*") + (set_attr "length" "4") + ) + +diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc +index 8f8395201..08a98f8ba 100644 +--- a/gcc/config/aarch64/aarch64.cc ++++ b/gcc/config/aarch64/aarch64.cc +@@ -3999,7 +3999,7 @@ static bool + aarch64_array_mode_supported_p (machine_mode mode, + unsigned HOST_WIDE_INT nelems) + { +- if (TARGET_SIMD ++ if (TARGET_BASE_SIMD + && (AARCH64_VALID_SIMD_QREG_MODE (mode) + || AARCH64_VALID_SIMD_DREG_MODE (mode)) + && (nelems >= 2 && nelems <= 4)) +@@ -12955,8 +12955,8 @@ aarch64_secondary_reload (bool in_p ATTRIBUTE_UNUSED, rtx x, + return NO_REGS; + } + +- /* Without the TARGET_SIMD instructions we cannot move a Q register +- to a Q register directly. We need a scratch. */ ++ /* Without the TARGET_SIMD or TARGET_SVE instructions we cannot move a ++ Q register to a Q register directly. We need a scratch. */ + if (REG_P (x) + && (mode == TFmode + || mode == TImode +@@ -15540,7 +15540,7 @@ aarch64_register_move_cost (machine_mode mode, + secondary reload. A general register is used as a scratch to move + the upper DI value and the lower DI value is moved directly, + hence the cost is the sum of three moves. */ +- if (! TARGET_SIMD) ++ if (!TARGET_SIMD && !TARGET_SVE) + return regmove_cost->GP2FP + regmove_cost->FP2GP + regmove_cost->FP2FP; + + return regmove_cost->FP2FP; +@@ -21107,7 +21107,7 @@ aarch64_simd_container_mode (scalar_mode mode, poly_int64 width) + return aarch64_full_sve_mode (mode).else_mode (word_mode); + + gcc_assert (known_eq (width, 64) || known_eq (width, 128)); +- if (TARGET_SIMD) ++ if (TARGET_BASE_SIMD) + { + if (known_eq (width, 128)) + return aarch64_vq_mode (mode).else_mode (word_mode); +@@ -25221,7 +25221,11 @@ aarch64_expand_cpymem (rtx *operands) + int copy_bits = 256; + + /* Default to 256-bit LDP/STP on large copies, however small copies, no SIMD +- support or slow 256-bit LDP/STP fall back to 128-bit chunks. */ ++ support or slow 256-bit LDP/STP fall back to 128-bit chunks. ++ ++ ??? Although it would be possible to use LDP/STP Qn in streaming mode ++ (so using TARGET_BASE_SIMD instead of TARGET_SIMD), it isn't clear ++ whether that would improve performance. */ + if (size <= 24 + || !TARGET_SIMD + || (aarch64_tune_params.extra_tuning_flags +diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h +index dd2de4e88..a3c83a3b1 100644 +--- a/gcc/config/aarch64/aarch64.h ++++ b/gcc/config/aarch64/aarch64.h +@@ -61,8 +61,15 @@ + #define WORDS_BIG_ENDIAN (BYTES_BIG_ENDIAN) + + /* AdvSIMD is supported in the default configuration, unless disabled by +- -mgeneral-regs-only or by the +nosimd extension. */ +-#define TARGET_SIMD (AARCH64_ISA_SIMD) ++ -mgeneral-regs-only or by the +nosimd extension. The set of available ++ instructions is then subdivided into: ++ ++ - the "base" set, available both in SME streaming mode and in ++ non-streaming mode ++ ++ - the full set, available only in non-streaming mode. */ ++#define TARGET_BASE_SIMD (AARCH64_ISA_SIMD) ++#define TARGET_SIMD (AARCH64_ISA_SIMD && AARCH64_ISA_SM_OFF) + #define TARGET_FLOAT (AARCH64_ISA_FP) + + #define UNITS_PER_WORD 8 +@@ -199,6 +206,7 @@ constexpr auto AARCH64_FL_DEFAULT_ISA_MODE = AARCH64_FL_SM_OFF; + + /* Macros to test ISA flags. */ + ++#define AARCH64_ISA_SM_OFF (aarch64_isa_flags & AARCH64_FL_SM_OFF) + #define AARCH64_ISA_MODE (aarch64_isa_flags & AARCH64_FL_ISA_MODES) + #define AARCH64_ISA_CRC (aarch64_isa_flags & AARCH64_FL_CRC) + #define AARCH64_ISA_CRYPTO (aarch64_isa_flags & AARCH64_FL_CRYPTO) +diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md +index 1ec23fae8..079c8a3f9 100644 +--- a/gcc/config/aarch64/aarch64.md ++++ b/gcc/config/aarch64/aarch64.md +@@ -378,7 +378,8 @@ + ;; As a convenience, "fp_q" means "fp" + the ability to move between + ;; Q registers and is equivalent to "simd". + +-(define_enum "arches" any rcpc8_4 fp fp_q simd nosimd sve fp16) ++(define_enum "arches" any rcpc8_4 fp fp_q base_simd nobase_simd ++ simd nosimd sve fp16) + + (define_enum_attr "arch" "arches" (const_string "any")) + +@@ -406,6 +407,12 @@ + (and (eq_attr "arch" "fp") + (match_test "TARGET_FLOAT")) + ++ (and (eq_attr "arch" "base_simd") ++ (match_test "TARGET_BASE_SIMD")) ++ ++ (and (eq_attr "arch" "nobase_simd") ++ (match_test "!TARGET_BASE_SIMD")) ++ + (and (eq_attr "arch" "fp_q, simd") + (match_test "TARGET_SIMD")) + +@@ -1202,22 +1209,22 @@ + "(register_operand (operands0, <MODE>mode) + || aarch64_reg_or_zero (operands1, <MODE>mode))" + {@ cons: =0, 1; attrs: type, arch +- r, r ; mov_reg , * mov\t%w0, %w1 +- r, M ; mov_imm , * mov\t%w0, %1 +- w, D<hq>; neon_move , simd << aarch64_output_scalar_simd_mov_immediate (operands1, <MODE>mode); ++ r, r ; mov_reg , * mov\t%w0, %w1 ++ r, M ; mov_imm , * mov\t%w0, %1 ++ w, D<hq>; neon_move , simd << aarch64_output_scalar_simd_mov_immediate (operands1, <MODE>mode); + /* The "mov_imm" type for CNT is just a placeholder. */ +- r, Usv ; mov_imm , sve << aarch64_output_sve_cnt_immediate ("cnt", "%x0", operands1); +- r, Usr ; mov_imm , sve << aarch64_output_sve_rdvl (operands1); +- r, m ; load_4 , * ldr<size>\t%w0, %1 +- w, m ; load_4 , * ldr\t%<size>0, %1 +- m, r Z ; store_4 , * str<size>\\t%w1, %0 +- m, w ; store_4 , * str\t%<size>1, %0 +- r, w ; neon_to_gp<q> , simd umov\t%w0, %1.<v>0 +- r, w ; neon_to_gp<q> , nosimd fmov\t%w0, %s1 +- w, r Z ; neon_from_gp<q>, simd dup\t%0.<Vallxd>, %w1 +- w, r Z ; neon_from_gp<q>, nosimd fmov\t%s0, %w1 +- w, w ; neon_dup , simd dup\t%<Vetype>0, %1.<v>0 +- w, w ; neon_dup , nosimd fmov\t%s0, %s1 ++ r, Usv ; mov_imm , sve << aarch64_output_sve_cnt_immediate ("cnt", "%x0", operands1); ++ r, Usr ; mov_imm , sve << aarch64_output_sve_rdvl (operands1); ++ r, m ; load_4 , * ldr<size>\t%w0, %1 ++ w, m ; load_4 , * ldr\t%<size>0, %1 ++ m, r Z ; store_4 , * str<size>\\t%w1, %0 ++ m, w ; store_4 , * str\t%<size>1, %0 ++ r, w ; neon_to_gp<q> , base_simd umov\t%w0, %1.<v>0 ++ r, w ; neon_to_gp<q> , nobase_simd fmov\t%w0, %s1 ++ w, r Z ; neon_from_gp<q>, simd dup\t%0.<Vallxd>, %w1 ++ w, r Z ; neon_from_gp<q>, nosimd fmov\t%s0, %w1 ++ w, w ; neon_dup , simd dup\t%<Vetype>0, %1.<v>0 ++ w, w ; neon_dup , nosimd fmov\t%s0, %s1 + } + ) + +@@ -1372,9 +1379,9 @@ + + (define_insn "*movti_aarch64" + (set (match_operand:TI 0 +- "nonimmediate_operand" "= r,w,w,w, r,w,r,m,m,w,m") ++ "nonimmediate_operand" "= r,w,w,w, r,w,w,r,m,m,w,m") + (match_operand:TI 1 +- "aarch64_movti_operand" " rUti,Z,Z,r, w,w,m,r,Z,m,w")) ++ "aarch64_movti_operand" " rUti,Z,Z,r, w,w,w,m,r,Z,m,w")) + "(register_operand (operands0, TImode) + || aarch64_reg_or_zero (operands1, TImode))" + "@ +@@ -1384,16 +1391,17 @@ + # + # + mov\\t%0.16b, %1.16b ++ mov\\t%Z0.d, %Z1.d + ldp\\t%0, %H0, %1 + stp\\t%1, %H1, %0 + stp\\txzr, xzr, %0 + ldr\\t%q0, %1 + str\\t%q1, %0" +- (set_attr "type" "multiple,neon_move,f_mcr,f_mcr,f_mrc,neon_logic_q, \ ++ (set_attr "type" "multiple,neon_move,f_mcr,f_mcr,f_mrc,neon_logic_q,*,\ + load_16,store_16,store_16,\ + load_16,store_16") +- (set_attr "length" "8,4,4,8,8,4,4,4,4,4,4") +- (set_attr "arch" "*,simd,*,*,*,simd,*,*,*,fp,fp") ++ (set_attr "length" "8,4,4,8,8,4,4,4,4,4,4,4") ++ (set_attr "arch" "*,simd,*,*,*,simd,sve,*,*,*,fp,fp") + ) + + ;; Split a TImode register-register or register-immediate move into +@@ -1529,13 +1537,14 @@ + + (define_insn "*mov<mode>_aarch64" + (set (match_operand:TFD 0 +- "nonimmediate_operand" "=w,?r ,w ,?r,w,?w,w,m,?r,m ,m") ++ "nonimmediate_operand" "=w,w,?r ,w ,?r,w,?w,w,m,?r,m ,m") + (match_operand:TFD 1 +- "general_operand" " w,?rY,?r,w ,Y,Y ,m,w,m ,?r,Y")) ++ "general_operand" " w,w,?rY,?r,w ,Y,Y ,m,w,m ,?r,Y")) + "TARGET_FLOAT && (register_operand (operands0, <MODE>mode) + || aarch64_reg_or_fp_zero (operands1, <MODE>mode))" + "@ + mov\\t%0.16b, %1.16b ++ mov\\t%Z0.d, %Z1.d + # + # + # +@@ -1546,10 +1555,10 @@ + ldp\\t%0, %H0, %1 + stp\\t%1, %H1, %0 + stp\\txzr, xzr, %0" +- (set_attr "type" "logic_reg,multiple,f_mcr,f_mrc,neon_move_q,f_mcr,\ ++ (set_attr "type" "logic_reg,*,multiple,f_mcr,f_mrc,neon_move_q,f_mcr,\ + f_loadd,f_stored,load_16,store_16,store_16") +- (set_attr "length" "4,8,8,8,4,4,4,4,4,4,4") +- (set_attr "arch" "simd,*,*,*,simd,*,*,*,*,*,*") ++ (set_attr "length" "4,4,8,8,8,4,4,4,4,4,4,4") ++ (set_attr "arch" "simd,sve,*,*,*,simd,*,*,*,*,*,*") + ) + + (define_split +@@ -1738,7 +1747,7 @@ + (match_operand:TF 1 "aarch64_mem_pair_operand" "Ump")) + (set (match_operand:TF 2 "register_operand" "=w") + (match_operand:TF 3 "memory_operand" "m")) +- "TARGET_SIMD ++ "TARGET_BASE_SIMD + && rtx_equal_p (XEXP (operands3, 0), + plus_constant (Pmode, + XEXP (operands1, 0), +@@ -1788,11 +1797,11 @@ + (match_operand:TF 1 "register_operand" "w")) + (set (match_operand:TF 2 "memory_operand" "=m") + (match_operand:TF 3 "register_operand" "w")) +- "TARGET_SIMD && +- rtx_equal_p (XEXP (operands2, 0), +- plus_constant (Pmode, +- XEXP (operands0, 0), +- GET_MODE_SIZE (TFmode)))" ++ "TARGET_BASE_SIMD ++ && rtx_equal_p (XEXP (operands2, 0), ++ plus_constant (Pmode, ++ XEXP (operands0, 0), ++ GET_MODE_SIZE (TFmode)))" + "stp\\t%q1, %q3, %z0" + (set_attr "type" "neon_stp_q") + (set_attr "fp" "yes") +@@ -1840,7 +1849,7 @@ + (set (match_operand:TX 3 "register_operand" "=w") + (mem:TX (plus:P (match_dup 1) + (match_operand:P 5 "const_int_operand" "n"))))) +- "TARGET_SIMD && INTVAL (operands5) == GET_MODE_SIZE (<TX:MODE>mode)" ++ "TARGET_BASE_SIMD && INTVAL (operands5) == GET_MODE_SIZE (<TX:MODE>mode)" + "ldp\\t%q2, %q3, %1, %4" + (set_attr "type" "neon_ldp_q") + ) +@@ -1890,7 +1899,7 @@ + (set (mem:TX (plus:P (match_dup 0) + (match_operand:P 5 "const_int_operand" "n"))) + (match_operand:TX 3 "register_operand" "w"))) +- "TARGET_SIMD ++ "TARGET_BASE_SIMD + && INTVAL (operands5) + == INTVAL (operands4) + GET_MODE_SIZE (<TX:MODE>mode)" + "stp\\t%q2, %q3, %0, %4!" +diff --git a/gcc/testsuite/gcc.target/aarch64/movdf_2.c b/gcc/testsuite/gcc.target/aarch64/movdf_2.c +new file mode 100644 +index 000000000..0d459d317 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/movdf_2.c +@@ -0,0 +1,51 @@ ++/* { dg-do assemble } */ ++/* { dg-options "-O --save-temps" } */ ++/* { dg-final { check-function-bodies "**" "" "" } } */ ++ ++/* ++** fpr_to_fpr: ++** fmov d0, d1 ++** ret ++*/ ++double ++fpr_to_fpr (double q0, double q1) arm::streaming_compatible ++{ ++ return q1; ++} ++ ++/* ++** gpr_to_fpr: ++** fmov d0, x0 ++** ret ++*/ ++double ++gpr_to_fpr () arm::streaming_compatible ++{ ++ register double x0 asm ("x0"); ++ asm volatile ("" : "=r" (x0)); ++ return x0; ++} ++ ++/* ++** zero_to_fpr: ++** fmov d0, xzr ++** ret ++*/ ++double ++zero_to_fpr () arm::streaming_compatible ++{ ++ return 0; ++} ++ ++/* ++** fpr_to_gpr: ++** fmov x0, d0 ++** ret ++*/ ++void ++fpr_to_gpr (double q0) arm::streaming_compatible ++{ ++ register double x0 asm ("x0"); ++ x0 = q0; ++ asm volatile ("" :: "r" (x0)); ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/movdi_3.c b/gcc/testsuite/gcc.target/aarch64/movdi_3.c +new file mode 100644 +index 000000000..31b2cbbae +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/movdi_3.c +@@ -0,0 +1,59 @@ ++/* { dg-do assemble } */ ++/* { dg-options "-O --save-temps" } */ ++/* { dg-final { check-function-bodies "**" "" "" } } */ ++ ++#include <stdint.h> ++ ++/* ++** fpr_to_fpr: ++** fmov d0, d1 ++** ret ++*/ ++void ++fpr_to_fpr (void) arm::streaming_compatible ++{ ++ register uint64_t q0 asm ("q0"); ++ register uint64_t q1 asm ("q1"); ++ asm volatile ("" : "=w" (q1)); ++ q0 = q1; ++ asm volatile ("" :: "w" (q0)); ++} ++ ++/* ++** gpr_to_fpr: ++** fmov d0, x0 ++** ret ++*/ ++void ++gpr_to_fpr (uint64_t x0) arm::streaming_compatible ++{ ++ register uint64_t q0 asm ("q0"); ++ q0 = x0; ++ asm volatile ("" :: "w" (q0)); ++} ++ ++/* ++** zero_to_fpr: ++** fmov d0, xzr ++** ret ++*/ ++void ++zero_to_fpr () arm::streaming_compatible ++{ ++ register uint64_t q0 asm ("q0"); ++ q0 = 0; ++ asm volatile ("" :: "w" (q0)); ++} ++ ++/* ++** fpr_to_gpr: ++** fmov x0, d0 ++** ret ++*/ ++uint64_t ++fpr_to_gpr () arm::streaming_compatible ++{ ++ register uint64_t q0 asm ("q0"); ++ asm volatile ("" : "=w" (q0)); ++ return q0; ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/movhf_2.c b/gcc/testsuite/gcc.target/aarch64/movhf_2.c +new file mode 100644 +index 000000000..3292b0de8 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/movhf_2.c +@@ -0,0 +1,53 @@ ++/* { dg-do assemble } */ ++/* { dg-options "-O --save-temps" } */ ++/* { dg-final { check-function-bodies "**" "" "" } } */ ++ ++#pragma GCC target "+nothing+simd" ++ ++/* ++** fpr_to_fpr: ++** fmov s0, s1 ++** ret ++*/ ++_Float16 ++fpr_to_fpr (_Float16 q0, _Float16 q1) arm::streaming_compatible ++{ ++ return q1; ++} ++ ++/* ++** gpr_to_fpr: ++** fmov s0, w0 ++** ret ++*/ ++_Float16 ++gpr_to_fpr () arm::streaming_compatible ++{ ++ register _Float16 w0 asm ("w0"); ++ asm volatile ("" : "=r" (w0)); ++ return w0; ++} ++ ++/* ++** zero_to_fpr: ++** fmov s0, wzr ++** ret ++*/ ++_Float16 ++zero_to_fpr () arm::streaming_compatible ++{ ++ return 0; ++} ++ ++/* ++** fpr_to_gpr: ++** fmov w0, s0 ++** ret ++*/ ++void ++fpr_to_gpr (_Float16 q0) arm::streaming_compatible ++{ ++ register _Float16 w0 asm ("w0"); ++ w0 = q0; ++ asm volatile ("" :: "r" (w0)); ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/movhi_2.c b/gcc/testsuite/gcc.target/aarch64/movhi_2.c +new file mode 100644 +index 000000000..dbbf3486f +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/movhi_2.c +@@ -0,0 +1,61 @@ ++/* { dg-do assemble } */ ++/* { dg-options "-O --save-temps" } */ ++/* { dg-final { check-function-bodies "**" "" "" } } */ ++ ++#pragma GCC target "+nothing+simd" ++ ++#include <stdint.h> ++ ++/* ++** fpr_to_fpr: ++** fmov s0, s1 ++** ret ++*/ ++void ++fpr_to_fpr (void) arm::streaming_compatible ++{ ++ register uint16_t q0 asm ("q0"); ++ register uint16_t q1 asm ("q1"); ++ asm volatile ("" : "=w" (q1)); ++ q0 = q1; ++ asm volatile ("" :: "w" (q0)); ++} ++ ++/* ++** gpr_to_fpr: ++** fmov s0, w0 ++** ret ++*/ ++void ++gpr_to_fpr (uint16_t w0) arm::streaming_compatible ++{ ++ register uint16_t q0 asm ("q0"); ++ q0 = w0; ++ asm volatile ("" :: "w" (q0)); ++} ++ ++/* ++** zero_to_fpr: ++** fmov s0, wzr ++** ret ++*/ ++void ++zero_to_fpr () arm::streaming_compatible ++{ ++ register uint16_t q0 asm ("q0"); ++ q0 = 0; ++ asm volatile ("" :: "w" (q0)); ++} ++ ++/* ++** fpr_to_gpr: ++** umov w0, v0.h\0\ ++** ret ++*/ ++uint16_t ++fpr_to_gpr () arm::streaming_compatible ++{ ++ register uint16_t q0 asm ("q0"); ++ asm volatile ("" : "=w" (q0)); ++ return q0; ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/movqi_2.c b/gcc/testsuite/gcc.target/aarch64/movqi_2.c +new file mode 100644 +index 000000000..aec087e4e +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/movqi_2.c +@@ -0,0 +1,59 @@ ++/* { dg-do assemble } */ ++/* { dg-options "-O --save-temps" } */ ++/* { dg-final { check-function-bodies "**" "" "" } } */ ++ ++#include <stdint.h> ++ ++/* ++** fpr_to_fpr: ++** fmov s0, s1 ++** ret ++*/ ++void ++fpr_to_fpr (void) arm::streaming_compatible ++{ ++ register uint8_t q0 asm ("q0"); ++ register uint8_t q1 asm ("q1"); ++ asm volatile ("" : "=w" (q1)); ++ q0 = q1; ++ asm volatile ("" :: "w" (q0)); ++} ++ ++/* ++** gpr_to_fpr: ++** fmov s0, w0 ++** ret ++*/ ++void ++gpr_to_fpr (uint8_t w0) arm::streaming_compatible ++{ ++ register uint8_t q0 asm ("q0"); ++ q0 = w0; ++ asm volatile ("" :: "w" (q0)); ++} ++ ++/* ++** zero_to_fpr: ++** fmov s0, wzr ++** ret ++*/ ++void ++zero_to_fpr () arm::streaming_compatible ++{ ++ register uint8_t q0 asm ("q0"); ++ q0 = 0; ++ asm volatile ("" :: "w" (q0)); ++} ++ ++/* ++** fpr_to_gpr: ++** umov w0, v0.b\0\ ++** ret ++*/ ++uint8_t ++fpr_to_gpr () arm::streaming_compatible ++{ ++ register uint8_t q0 asm ("q0"); ++ asm volatile ("" : "=w" (q0)); ++ return q0; ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/movsf_2.c b/gcc/testsuite/gcc.target/aarch64/movsf_2.c +new file mode 100644 +index 000000000..7fed4b22f +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/movsf_2.c +@@ -0,0 +1,51 @@ ++/* { dg-do assemble } */ ++/* { dg-options "-O --save-temps" } */ ++/* { dg-final { check-function-bodies "**" "" "" } } */ ++ ++/* ++** fpr_to_fpr: ++** fmov s0, s1 ++** ret ++*/ ++float ++fpr_to_fpr (float q0, float q1) arm::streaming_compatible ++{ ++ return q1; ++} ++ ++/* ++** gpr_to_fpr: ++** fmov s0, w0 ++** ret ++*/ ++float ++gpr_to_fpr () arm::streaming_compatible ++{ ++ register float w0 asm ("w0"); ++ asm volatile ("" : "=r" (w0)); ++ return w0; ++} ++ ++/* ++** zero_to_fpr: ++** fmov s0, wzr ++** ret ++*/ ++float ++zero_to_fpr () arm::streaming_compatible ++{ ++ return 0; ++} ++ ++/* ++** fpr_to_gpr: ++** fmov w0, s0 ++** ret ++*/ ++void ++fpr_to_gpr (float q0) arm::streaming_compatible ++{ ++ register float w0 asm ("w0"); ++ w0 = q0; ++ asm volatile ("" :: "r" (w0)); ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/movsi_2.c b/gcc/testsuite/gcc.target/aarch64/movsi_2.c +new file mode 100644 +index 000000000..c14d2468a +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/movsi_2.c +@@ -0,0 +1,59 @@ ++/* { dg-do assemble } */ ++/* { dg-options "-O --save-temps" } */ ++/* { dg-final { check-function-bodies "**" "" "" } } */ ++ ++#include <stdint.h> ++ ++/* ++** fpr_to_fpr: ++** fmov s0, s1 ++** ret ++*/ ++void ++fpr_to_fpr (void) arm::streaming_compatible ++{ ++ register uint32_t q0 asm ("q0"); ++ register uint32_t q1 asm ("q1"); ++ asm volatile ("" : "=w" (q1)); ++ q0 = q1; ++ asm volatile ("" :: "w" (q0)); ++} ++ ++/* ++** gpr_to_fpr: ++** fmov s0, w0 ++** ret ++*/ ++void ++gpr_to_fpr (uint32_t w0) arm::streaming_compatible ++{ ++ register uint32_t q0 asm ("q0"); ++ q0 = w0; ++ asm volatile ("" :: "w" (q0)); ++} ++ ++/* ++** zero_to_fpr: ++** fmov s0, wzr ++** ret ++*/ ++void ++zero_to_fpr () arm::streaming_compatible ++{ ++ register uint32_t q0 asm ("q0"); ++ q0 = 0; ++ asm volatile ("" :: "w" (q0)); ++} ++ ++/* ++** fpr_to_gpr: ++** fmov w0, s0 ++** ret ++*/ ++uint32_t ++fpr_to_gpr () arm::streaming_compatible ++{ ++ register uint32_t q0 asm ("q0"); ++ asm volatile ("" : "=w" (q0)); ++ return q0; ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/movtf_3.c b/gcc/testsuite/gcc.target/aarch64/movtf_3.c +new file mode 100644 +index 000000000..dd164a418 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/movtf_3.c +@@ -0,0 +1,81 @@ ++/* { dg-do assemble } */ ++/* { dg-require-effective-target large_long_double } */ ++/* { dg-options "-O -mtune=neoverse-v1 --save-temps" } */ ++/* { dg-final { check-function-bodies "**" "" "" } } */ ++ ++#pragma GCC target "+nosve" ++ ++/* ++** fpr_to_fpr: ++** sub sp, sp, #16 ++** str q1, \sp\ ++** ldr q0, \sp\ ++** add sp, sp, #?16 ++** ret ++*/ ++long double ++fpr_to_fpr (long double q0, long double q1) arm::streaming_compatible ++{ ++ return q1; ++} ++ ++/* ++** gpr_to_fpr: { target aarch64_little_endian } ++** fmov d0, x0 ++** fmov v0.d\1\, x1 ++** ret ++*/ ++/* ++** gpr_to_fpr: { target aarch64_big_endian } ++** fmov d0, x1 ++** fmov v0.d\1\, x0 ++** ret ++*/ ++long double ++gpr_to_fpr () arm::streaming_compatible ++{ ++ register long double x0 asm ("x0"); ++ asm volatile ("" : "=r" (x0)); ++ return x0; ++} ++ ++/* ++** zero_to_fpr: ++** fmov s0, wzr ++** ret ++*/ ++long double ++zero_to_fpr () arm::streaming_compatible ++{ ++ return 0; ++} ++ ++/* ++** fpr_to_gpr: { target aarch64_little_endian } ++** ( ++** fmov x0, d0 ++** fmov x1, v0.d\1\ ++** | ++** fmov x1, v0.d\1\ ++** fmov x0, d0 ++** ) ++** ret ++*/ ++/* ++** fpr_to_gpr: { target aarch64_big_endian } ++** ( ++** fmov x1, d0 ++** fmov x0, v0.d\1\ ++** | ++** fmov x0, v0.d\1\ ++** fmov x1, d0 ++** ) ++** ret ++*/ ++void ++fpr_to_gpr (long double q0) arm::streaming_compatible ++{ ++ register long double x0 asm ("x0"); ++ x0 = q0; ++ asm volatile ("" :: "r" (x0)); ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/movtf_4.c b/gcc/testsuite/gcc.target/aarch64/movtf_4.c +new file mode 100644 +index 000000000..faf9703e2 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/movtf_4.c +@@ -0,0 +1,78 @@ ++/* { dg-do assemble } */ ++/* { dg-require-effective-target large_long_double } */ ++/* { dg-options "-O -mtune=neoverse-v1 --save-temps" } */ ++/* { dg-final { check-function-bodies "**" "" "" } } */ ++ ++#pragma GCC target "+sve" ++ ++/* ++** fpr_to_fpr: ++** mov z0.d, z1.d ++** ret ++*/ ++long double ++fpr_to_fpr (long double q0, long double q1) arm::streaming_compatible ++{ ++ return q1; ++} ++ ++/* ++** gpr_to_fpr: { target aarch64_little_endian } ++** fmov d0, x0 ++** fmov v0.d\1\, x1 ++** ret ++*/ ++/* ++** gpr_to_fpr: { target aarch64_big_endian } ++** fmov d0, x1 ++** fmov v0.d\1\, x0 ++** ret ++*/ ++long double ++gpr_to_fpr () arm::streaming_compatible ++{ ++ register long double x0 asm ("x0"); ++ asm volatile ("" : "=r" (x0)); ++ return x0; ++} ++ ++/* ++** zero_to_fpr: ++** fmov s0, wzr ++** ret ++*/ ++long double ++zero_to_fpr () arm::streaming_compatible ++{ ++ return 0; ++} ++ ++/* ++** fpr_to_gpr: { target aarch64_little_endian } ++** ( ++** fmov x0, d0 ++** fmov x1, v0.d\1\ ++** | ++** fmov x1, v0.d\1\ ++** fmov x0, d0 ++** ) ++** ret ++*/ ++/* ++** fpr_to_gpr: { target aarch64_big_endian } ++** ( ++** fmov x1, d0 ++** fmov x0, v0.d\1\ ++** | ++** fmov x0, v0.d\1\ ++** fmov x1, d0 ++** ) ++** ret ++*/ ++void ++fpr_to_gpr (long double q0) arm::streaming_compatible ++{ ++ register long double x0 asm ("x0"); ++ x0 = q0; ++ asm volatile ("" :: "r" (x0)); ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/movti_3.c b/gcc/testsuite/gcc.target/aarch64/movti_3.c +new file mode 100644 +index 000000000..243109181 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/movti_3.c +@@ -0,0 +1,86 @@ ++/* { dg-do assemble } */ ++/* { dg-options "-O -mtune=neoverse-v1 --save-temps" } */ ++/* { dg-final { check-function-bodies "**" "" "" } } */ ++ ++#pragma GCC target "+nosve" ++ ++/* ++** fpr_to_fpr: ++** sub sp, sp, #16 ++** str q1, \sp\ ++** ldr q0, \sp\ ++** add sp, sp, #?16 ++** ret ++*/ ++void ++fpr_to_fpr (void) arm::streaming_compatible ++{ ++ register __int128_t q0 asm ("q0"); ++ register __int128_t q1 asm ("q1"); ++ asm volatile ("" : "=w" (q1)); ++ q0 = q1; ++ asm volatile ("" :: "w" (q0)); ++} ++ ++/* ++** gpr_to_fpr: { target aarch64_little_endian } ++** fmov d0, x0 ++** fmov v0.d\1\, x1 ++** ret ++*/ ++/* ++** gpr_to_fpr: { target aarch64_big_endian } ++** fmov d0, x1 ++** fmov v0.d\1\, x0 ++** ret ++*/ ++void ++gpr_to_fpr (__int128_t x0) arm::streaming_compatible ++{ ++ register __int128_t q0 asm ("q0"); ++ q0 = x0; ++ asm volatile ("" :: "w" (q0)); ++} ++ ++/* ++** zero_to_fpr: ++** fmov d0, xzr ++** ret ++*/ ++void ++zero_to_fpr () arm::streaming_compatible ++{ ++ register __int128_t q0 asm ("q0"); ++ q0 = 0; ++ asm volatile ("" :: "w" (q0)); ++} ++ ++/* ++** fpr_to_gpr: { target aarch64_little_endian } ++** ( ++** fmov x0, d0 ++** fmov x1, v0.d\1\ ++** | ++** fmov x1, v0.d\1\ ++** fmov x0, d0 ++** ) ++** ret ++*/ ++/* ++** fpr_to_gpr: { target aarch64_big_endian } ++** ( ++** fmov x1, d0 ++** fmov x0, v0.d\1\ ++** | ++** fmov x0, v0.d\1\ ++** fmov x1, d0 ++** ) ++** ret ++*/ ++__int128_t ++fpr_to_gpr () arm::streaming_compatible ++{ ++ register __int128_t q0 asm ("q0"); ++ asm volatile ("" : "=w" (q0)); ++ return q0; ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/movti_4.c b/gcc/testsuite/gcc.target/aarch64/movti_4.c +new file mode 100644 +index 000000000..a70feccb0 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/movti_4.c +@@ -0,0 +1,83 @@ ++/* { dg-do assemble } */ ++/* { dg-options "-O -mtune=neoverse-v1 --save-temps" } */ ++/* { dg-final { check-function-bodies "**" "" "" } } */ ++ ++#pragma GCC target "+sve" ++ ++/* ++** fpr_to_fpr: ++** mov z0\.d, z1\.d ++** ret ++*/ ++void ++fpr_to_fpr (void) arm::streaming_compatible ++{ ++ register __int128_t q0 asm ("q0"); ++ register __int128_t q1 asm ("q1"); ++ asm volatile ("" : "=w" (q1)); ++ q0 = q1; ++ asm volatile ("" :: "w" (q0)); ++} ++ ++/* ++** gpr_to_fpr: { target aarch64_little_endian } ++** fmov d0, x0 ++** fmov v0.d\1\, x1 ++** ret ++*/ ++/* ++** gpr_to_fpr: { target aarch64_big_endian } ++** fmov d0, x1 ++** fmov v0.d\1\, x0 ++** ret ++*/ ++void ++gpr_to_fpr (__int128_t x0) arm::streaming_compatible ++{ ++ register __int128_t q0 asm ("q0"); ++ q0 = x0; ++ asm volatile ("" :: "w" (q0)); ++} ++ ++/* ++** zero_to_fpr: ++** fmov d0, xzr ++** ret ++*/ ++void ++zero_to_fpr () arm::streaming_compatible ++{ ++ register __int128_t q0 asm ("q0"); ++ q0 = 0; ++ asm volatile ("" :: "w" (q0)); ++} ++ ++/* ++** fpr_to_gpr: { target aarch64_little_endian } ++** ( ++** fmov x0, d0 ++** fmov x1, v0.d\1\ ++** | ++** fmov x1, v0.d\1\ ++** fmov x0, d0 ++** ) ++** ret ++*/ ++/* ++** fpr_to_gpr: { target aarch64_big_endian } ++** ( ++** fmov x1, d0 ++** fmov x0, v0.d\1\ ++** | ++** fmov x0, v0.d\1\ ++** fmov x1, d0 ++** ) ++** ret ++*/ ++__int128_t ++fpr_to_gpr () arm::streaming_compatible ++{ ++ register __int128_t q0 asm ("q0"); ++ asm volatile ("" : "=w" (q0)); ++ return q0; ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/movv16qi_4.c b/gcc/testsuite/gcc.target/aarch64/movv16qi_4.c +new file mode 100644 +index 000000000..7bec888b7 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/movv16qi_4.c +@@ -0,0 +1,82 @@ ++/* { dg-do assemble } */ ++/* { dg-options "-O -mtune=neoverse-v1 --save-temps" } */ ++/* { dg-final { check-function-bodies "**" "" "" } } */ ++ ++#pragma GCC target "+nosve" ++ ++typedef unsigned char v16qi __attribute__((vector_size(16))); ++ ++/* ++** fpr_to_fpr: ++** sub sp, sp, #16 ++** str q1, \sp\ ++** ldr q0, \sp\ ++** add sp, sp, #?16 ++** ret ++*/ ++v16qi ++fpr_to_fpr (v16qi q0, v16qi q1) arm::streaming_compatible ++{ ++ return q1; ++} ++ ++/* ++** gpr_to_fpr: { target aarch64_little_endian } ++** fmov d0, x0 ++** fmov v0.d\1\, x1 ++** ret ++*/ ++/* ++** gpr_to_fpr: { target aarch64_big_endian } ++** fmov d0, x1 ++** fmov v0.d\1\, x0 ++** ret ++*/ ++v16qi ++gpr_to_fpr () arm::streaming_compatible ++{ ++ register v16qi x0 asm ("x0"); ++ asm volatile ("" : "=r" (x0)); ++ return x0; ++} ++ ++/* ++** zero_to_fpr: ++** fmov d0, xzr ++** ret ++*/ ++v16qi ++zero_to_fpr () arm::streaming_compatible ++{ ++ return (v16qi) {}; ++} ++ ++/* ++** fpr_to_gpr: { target aarch64_little_endian } ++** ( ++** umov x0, v0.d\0\ ++** fmov x1, v0.d\1\ ++** | ++** fmov x1, v0.d\1\ ++** umov x0, v0.d\0\ ++** ) ++** ret ++*/ ++/* ++** fpr_to_gpr: { target aarch64_big_endian } ++** ( ++** umov x1, v0.d\0\ ++** fmov x0, v0.d\1\ ++** | ++** fmov x0, v0.d\1\ ++** umov x1, v0.d\0\ ++** ) ++** ret ++*/ ++void ++fpr_to_gpr (v16qi q0) arm::streaming_compatible ++{ ++ register v16qi x0 asm ("x0"); ++ x0 = q0; ++ asm volatile ("" :: "r" (x0)); ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/movv16qi_5.c b/gcc/testsuite/gcc.target/aarch64/movv16qi_5.c +new file mode 100644 +index 000000000..2d36342b3 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/movv16qi_5.c +@@ -0,0 +1,79 @@ ++/* { dg-do assemble } */ ++/* { dg-options "-O -mtune=neoverse-v1 --save-temps" } */ ++/* { dg-final { check-function-bodies "**" "" "" } } */ ++ ++#pragma GCC target "+sve" ++ ++typedef unsigned char v16qi __attribute__((vector_size(16))); ++ ++/* ++** fpr_to_fpr: ++** mov z0.d, z1.d ++** ret ++*/ ++v16qi ++fpr_to_fpr (v16qi q0, v16qi q1) arm::streaming_compatible ++{ ++ return q1; ++} ++ ++/* ++** gpr_to_fpr: { target aarch64_little_endian } ++** fmov d0, x0 ++** fmov v0.d\1\, x1 ++** ret ++*/ ++/* ++** gpr_to_fpr: { target aarch64_big_endian } ++** fmov d0, x1 ++** fmov v0.d\1\, x0 ++** ret ++*/ ++v16qi ++gpr_to_fpr () arm::streaming_compatible ++{ ++ register v16qi x0 asm ("x0"); ++ asm volatile ("" : "=r" (x0)); ++ return x0; ++} ++ ++/* ++** zero_to_fpr: ++** fmov d0, xzr ++** ret ++*/ ++v16qi ++zero_to_fpr () arm::streaming_compatible ++{ ++ return (v16qi) {}; ++} ++ ++/* ++** fpr_to_gpr: { target aarch64_little_endian } ++** ( ++** umov x0, v0.d\0\ ++** fmov x1, v0.d\1\ ++** | ++** fmov x1, v0.d\1\ ++** umov x0, v0.d\0\ ++** ) ++** ret ++*/ ++/* ++** fpr_to_gpr: { target aarch64_big_endian } ++** ( ++** umov x1, v0.d\0\ ++** fmov x0, v0.d\1\ ++** | ++** fmov x0, v0.d\1\ ++** umov x1, v0.d\0\ ++** ) ++** ret ++*/ ++void ++fpr_to_gpr (v16qi q0) arm::streaming_compatible ++{ ++ register v16qi x0 asm ("x0"); ++ x0 = q0; ++ asm volatile ("" :: "r" (x0)); ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/movv8qi_4.c b/gcc/testsuite/gcc.target/aarch64/movv8qi_4.c +new file mode 100644 +index 000000000..12ae25a3a +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/movv8qi_4.c +@@ -0,0 +1,55 @@ ++/* { dg-do assemble } */ ++/* { dg-options "-O -mtune=neoverse-v1 --save-temps" } */ ++/* { dg-final { check-function-bodies "**" "" "" } } */ ++ ++#pragma GCC target "+nosve" ++ ++typedef unsigned char v8qi __attribute__((vector_size(8))); ++ ++/* ++** fpr_to_fpr: ++** fmov d0, d1 ++** ret ++*/ ++v8qi ++fpr_to_fpr (v8qi q0, v8qi q1) arm::streaming_compatible ++{ ++ return q1; ++} ++ ++/* ++** gpr_to_fpr: ++** fmov d0, x0 ++** ret ++*/ ++v8qi ++gpr_to_fpr () arm::streaming_compatible ++{ ++ register v8qi x0 asm ("x0"); ++ asm volatile ("" : "=r" (x0)); ++ return x0; ++} ++ ++/* ++** zero_to_fpr: ++** fmov d0, xzr ++** ret ++*/ ++v8qi ++zero_to_fpr () arm::streaming_compatible ++{ ++ return (v8qi) {}; ++} ++ ++/* ++** fpr_to_gpr: ++** umov x0, v0\.d\0\ ++** ret ++*/ ++void ++fpr_to_gpr (v8qi q0) arm::streaming_compatible ++{ ++ register v8qi x0 asm ("x0"); ++ x0 = q0; ++ asm volatile ("" :: "r" (x0)); ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/arm_neon_1.c b/gcc/testsuite/gcc.target/aarch64/sme/arm_neon_1.c +new file mode 100644 +index 000000000..5b5346cf4 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sme/arm_neon_1.c +@@ -0,0 +1,13 @@ ++// { dg-options "" } ++ ++#include <arm_neon.h> ++ ++#pragma GCC target "+nosme" ++ ++// { dg-error {inlining failed.*'vhaddq_s32'} "" { target *-*-* } 0 } ++ ++int32x4_t ++foo (int32x4_t x, int32x4_t y) arm::streaming_compatible ++{ ++ return vhaddq_s32 (x, y); ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/arm_neon_2.c b/gcc/testsuite/gcc.target/aarch64/sme/arm_neon_2.c +new file mode 100644 +index 000000000..2092c4471 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sme/arm_neon_2.c +@@ -0,0 +1,11 @@ ++// { dg-options "" } ++ ++#include <arm_neon.h> ++ ++// { dg-error {inlining failed.*'vhaddq_s32'} "" { target *-*-* } 0 } ++ ++int32x4_t ++foo (int32x4_t x, int32x4_t y) arm::streaming_compatible ++{ ++ return vhaddq_s32 (x, y); ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/arm_neon_3.c b/gcc/testsuite/gcc.target/aarch64/sme/arm_neon_3.c +new file mode 100644 +index 000000000..36794e5b0 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sme/arm_neon_3.c +@@ -0,0 +1,11 @@ ++// { dg-options "" } ++ ++#include <arm_neon.h> ++ ++// { dg-error {inlining failed.*'vhaddq_s32'} "" { target *-*-* } 0 } ++ ++int32x4_t ++foo (int32x4_t x, int32x4_t y) arm::streaming ++{ ++ return vhaddq_s32 (x, y); ++} +-- +2.33.0 +
View file
_service:tar_scm:0179-LoongArch-Rework-bswap-hi-si-di-2-definition.patch
Added
@@ -0,0 +1,224 @@ +From 54bf8fc616af5cdb9e4c787a2dfb2c516c8e425a Mon Sep 17 00:00:00 2001 +From: Xi Ruoyao <xry111@xry111.site> +Date: Sun, 28 Jul 2024 19:57:02 +0800 +Subject: PATCH 179/188 LoongArch: Rework bswap{hi,si,di}2 definition + +Per a gcc-help thread we are generating sub-optimal code for +__builtin_bswap{32,64}. To fix it: + +- Use a single revb.d instruction for bswapdi2. +- Use a single revb.2w instruction for bswapsi2 for TARGET_64BIT, + revb.2h + rotri.w for !TARGET_64BIT. +- Use a single revb.2h instruction for bswapsi2 (x) r>> 16, and a single + revb.2w instruction for bswapdi2 (x) r>> 32. + +Unfortunately I cannot figure out a way to make the compiler generate +revb.4h or revh.{2w,d} instructions. + +gcc/ChangeLog: + + * config/loongarch/loongarch.md (UNSPEC_REVB_2H, UNSPEC_REVB_4H, + UNSPEC_REVH_D): Remove UNSPECs. + (revb_4h, revh_d): Remove define_insn. + (revb_2h): Define as (rotatert:SI (bswap:SI x) 16) instead of + an UNSPEC. + (revb_2h_extend, revb_2w, *bswapsi2, bswapdi2): New define_insn. + (bswapsi2): Change to define_expand. Only expand to revb.2h + + rotri.w if !TARGET_64BIT. + (bswapdi2): Change to define_insn of which the output is just a + revb.d instruction. + +gcc/testsuite/ChangeLog: + + * gcc.target/loongarch/revb.c: New test. +--- + gcc/config/loongarch/loongarch.md | 79 ++++++++++++----------- + gcc/testsuite/gcc.target/loongarch/revb.c | 61 +++++++++++++++++ + 2 files changed, 104 insertions(+), 36 deletions(-) + create mode 100644 gcc/testsuite/gcc.target/loongarch/revb.c + +diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md +index 1ebcfa0c7..b1c828dba 100644 +--- a/gcc/config/loongarch/loongarch.md ++++ b/gcc/config/loongarch/loongarch.md +@@ -20,11 +20,6 @@ + ;; <http://www.gnu.org/licenses/>. + + (define_c_enum "unspec" +- ;; Integer operations that are too cumbersome to describe directly. +- UNSPEC_REVB_2H +- UNSPEC_REVB_4H +- UNSPEC_REVH_D +- + ;; Floating-point moves. + UNSPEC_LOAD_LOW + UNSPEC_LOAD_HIGH +@@ -3151,55 +3146,67 @@ + + ;; Reverse the order of bytes of operand 1 and store the result in operand 0. + +-(define_insn "bswaphi2" +- (set (match_operand:HI 0 "register_operand" "=r") +- (bswap:HI (match_operand:HI 1 "register_operand" "r"))) ++(define_insn "revb_2h" ++ (set (match_operand:SI 0 "register_operand" "=r") ++ (rotatert:SI (bswap:SI (match_operand:SI 1 "register_operand" "r")) ++ (const_int 16))) + "" + "revb.2h\t%0,%1" + (set_attr "type" "shift")) + +-(define_insn_and_split "bswapsi2" +- (set (match_operand:SI 0 "register_operand" "=r") +- (bswap:SI (match_operand:SI 1 "register_operand" "r"))) +- "" +- "#" +- "" +- (set (match_dup 0) (unspec:SI (match_dup 1) UNSPEC_REVB_2H)) +- (set (match_dup 0) (rotatert:SI (match_dup 0) (const_int 16))) +- "" +- (set_attr "insn_count" "2")) +- +-(define_insn_and_split "bswapdi2" ++(define_insn "revb_2h_extend" + (set (match_operand:DI 0 "register_operand" "=r") +- (bswap:DI (match_operand:DI 1 "register_operand" "r"))) ++ (sign_extend:DI ++ (rotatert:SI ++ (bswap:SI (match_operand:SI 1 "register_operand" "r")) ++ (const_int 16)))) + "TARGET_64BIT" +- "#" +- "" +- (set (match_dup 0) (unspec:DI (match_dup 1) UNSPEC_REVB_4H)) +- (set (match_dup 0) (unspec:DI (match_dup 0) UNSPEC_REVH_D)) +- "" +- (set_attr "insn_count" "2")) ++ "revb.2h\t%0,%1" ++ (set_attr "type" "shift")) + +-(define_insn "revb_2h" +- (set (match_operand:SI 0 "register_operand" "=r") +- (unspec:SI (match_operand:SI 1 "register_operand" "r") UNSPEC_REVB_2H)) ++(define_insn "bswaphi2" ++ (set (match_operand:HI 0 "register_operand" "=r") ++ (bswap:HI (match_operand:HI 1 "register_operand" "r"))) + "" + "revb.2h\t%0,%1" + (set_attr "type" "shift")) + +-(define_insn "revb_4h" ++(define_insn "revb_2w" + (set (match_operand:DI 0 "register_operand" "=r") +- (unspec:DI (match_operand:DI 1 "register_operand" "r") UNSPEC_REVB_4H)) ++ (rotatert:DI (bswap:DI (match_operand:DI 1 "register_operand" "r")) ++ (const_int 32))) + "TARGET_64BIT" +- "revb.4h\t%0,%1" ++ "revb.2w\t%0,%1" + (set_attr "type" "shift")) + +-(define_insn "revh_d" ++(define_insn "*bswapsi2" ++ (set (match_operand:SI 0 "register_operand" "=r") ++ (bswap:SI (match_operand:SI 1 "register_operand" "r"))) ++ "TARGET_64BIT" ++ "revb.2w\t%0,%1" ++ (set_attr "type" "shift")) ++ ++(define_expand "bswapsi2" ++ (set (match_operand:SI 0 "register_operand" "=r") ++ (bswap:SI (match_operand:SI 1 "register_operand" "r"))) ++ "" ++{ ++ if (!TARGET_64BIT) ++ { ++ rtx t = gen_reg_rtx (SImode); ++ emit_insn (gen_revb_2h (t, operands1)); ++ emit_insn (gen_rotrsi3 (operands0, t, GEN_INT (16))); ++ DONE; ++ } ++}) ++ ++(define_insn "bswapdi2" + (set (match_operand:DI 0 "register_operand" "=r") +- (unspec:DI (match_operand:DI 1 "register_operand" "r") UNSPEC_REVH_D)) ++ (bswap:DI (match_operand:DI 1 "register_operand" "r"))) + "TARGET_64BIT" +- "revh.d\t%0,%1" ++ "revb.d\t%0,%1" + (set_attr "type" "shift")) ++ +  + ;; + ;; .................... +diff --git a/gcc/testsuite/gcc.target/loongarch/revb.c b/gcc/testsuite/gcc.target/loongarch/revb.c +new file mode 100644 +index 000000000..27a5d0fc7 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/revb.c +@@ -0,0 +1,61 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -march=loongarch64 -mabi=lp64d" } */ ++/* { dg-final { check-function-bodies "**" "" } } */ ++ ++/* ++**t1: ++** revb.2w \$r4,\$r4 ++** slli.w \$r4,\$r4,0 ++** jr \$r1 ++*/ ++unsigned int ++t1 (unsigned int x) ++{ ++ return __builtin_bswap32 (x); ++} ++ ++/* ++**t2: ++** revb.d \$r4,\$r4 ++** jr \$r1 ++*/ ++unsigned long ++t2 (unsigned long x) ++{ ++ return __builtin_bswap64 (x); ++} ++ ++/* ++**t3: ++** revb.2h \$r4,\$r4 ++** jr \$r1 ++*/ ++unsigned int ++t3 (unsigned int x) ++{ ++ return (x >> 8) & 0xff00ff | (x << 8) & 0xff00ff00; ++} ++ ++/* ++**t4: ++** revb.2w \$r4,\$r4 ++** jr \$r1 ++*/ ++unsigned long ++t4 (unsigned long x) ++{ ++ x = __builtin_bswap64 (x); ++ return x << 32 | x >> 32; ++} ++ ++/* ++**t5: ++** revb.2h \$r4,\$r4 ++** bstrpick.w \$r4,\$r4,15,0 ++** jr \$r1 ++*/ ++unsigned short ++t5 (unsigned short x) ++{ ++ return __builtin_bswap16 (x); ++} +-- +2.43.0 +
View file
_service:tar_scm:0180-Backport-SME-aarch64-Mark-relevant-SVE-instructions-.patch
Added
@@ -0,0 +1,4506 @@ +From 0404dfa43633a35460aba1b96d04f62cc7d6103b Mon Sep 17 00:00:00 2001 +From: Richard Sandiford <richard.sandiford@arm.com> +Date: Tue, 5 Dec 2023 10:11:24 +0000 +Subject: PATCH 081/157 BackportSME aarch64: Mark relevant SVE + instructions as non-streaming + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=983b4365028e9a059b5fb1eef85a297bea19fc8e + +Following on from the previous Advanced SIMD patch, this one +divides SVE instructions into non-streaming and streaming- +compatible groups. + +gcc/ + * config/aarch64/aarch64.h (TARGET_NON_STREAMING): New macro. + (TARGET_SVE2_AES, TARGET_SVE2_BITPERM): Use it. + (TARGET_SVE2_SHA3, TARGET_SVE2_SM4): Likewise. + * config/aarch64/aarch64-sve-builtins-base.def: Separate out + the functions that require PSTATE.SM to be 0 and guard them + with AARCH64_FL_SM_OFF. + * config/aarch64/aarch64-sve-builtins-sve2.def: Likewise. + * config/aarch64/aarch64-sve-builtins.cc (check_required_extensions): + Enforce AARCH64_FL_SM_OFF requirements. + * config/aarch64/aarch64-sve.md (aarch64_wrffr): Require + TARGET_NON_STREAMING + (aarch64_rdffr, aarch64_rdffr_z, *aarch64_rdffr_z_ptest): Likewise. + (*aarch64_rdffr_ptest, *aarch64_rdffr_z_cc, *aarch64_rdffr_cc) + (@aarch64_ld<fn>f1<mode>): Likewise. + (@aarch64_ld<fn>f1_<ANY_EXTEND:optab><SVE_HSDI:mode><SVE_PARTIAL_I:mode>) + (gather_load<mode><v_int_container>): Likewise + (mask_gather_load<mode><v_int_container>): Likewise. + (mask_gather_load<mode><v_int_container>): Likewise. + (*mask_gather_load<mode><v_int_container>_<su>xtw_unpacked): Likewise. + (*mask_gather_load<mode><v_int_container>_sxtw): Likewise. + (*mask_gather_load<mode><v_int_container>_uxtw): Likewise. + (@aarch64_gather_load_<ANY_EXTEND:optab><SVE_4HSI:mode><SVE_4BHI:mode>) + (@aarch64_gather_load_<ANY_EXTEND:optab><SVE_2HSDI:mode> + <SVE_2BHSI:mode>): Likewise. + (*aarch64_gather_load_<ANY_EXTEND:optab><SVE_2HSDI:mode> + <SVE_2BHSI:mode>_<ANY_EXTEND2:su>xtw_unpacked) + (*aarch64_gather_load_<ANY_EXTEND:optab><SVE_2HSDI:mode> + <SVE_2BHSI:mode>_sxtw): Likewise. + (*aarch64_gather_load_<ANY_EXTEND:optab><SVE_2HSDI:mode> + <SVE_2BHSI:mode>_uxtw): Likewise. + (@aarch64_ldff1_gather<mode>, @aarch64_ldff1_gather<mode>): Likewise. + (*aarch64_ldff1_gather<mode>_sxtw): Likewise. + (*aarch64_ldff1_gather<mode>_uxtw): Likewise. + (@aarch64_ldff1_gather_<ANY_EXTEND:optab><VNx4_WIDE:mode> + <VNx4_NARROW:mode>): Likewise. + (@aarch64_ldff1_gather_<ANY_EXTEND:optab><VNx2_WIDE:mode> + <VNx2_NARROW:mode>): Likewise. + (*aarch64_ldff1_gather_<ANY_EXTEND:optab><VNx2_WIDE:mode> + <VNx2_NARROW:mode>_sxtw): Likewise. + (*aarch64_ldff1_gather_<ANY_EXTEND:optab><VNx2_WIDE:mode> + <VNx2_NARROW:mode>_uxtw): Likewise. + (@aarch64_sve_gather_prefetch<SVE_FULL_I:mode><VNx4SI_ONLY:mode>) + (@aarch64_sve_gather_prefetch<SVE_FULL_I:mode><VNx2DI_ONLY:mode>) + (*aarch64_sve_gather_prefetch<SVE_FULL_I:mode><VNx2DI_ONLY:mode>_sxtw) + (*aarch64_sve_gather_prefetch<SVE_FULL_I:mode><VNx2DI_ONLY:mode>_uxtw) + (scatter_store<mode><v_int_container>): Likewise. + (mask_scatter_store<mode><v_int_container>): Likewise. + (*mask_scatter_store<mode><v_int_container>_<su>xtw_unpacked) + (*mask_scatter_store<mode><v_int_container>_sxtw): Likewise. + (*mask_scatter_store<mode><v_int_container>_uxtw): Likewise. + (@aarch64_scatter_store_trunc<VNx4_NARROW:mode><VNx4_WIDE:mode>) + (@aarch64_scatter_store_trunc<VNx2_NARROW:mode><VNx2_WIDE:mode>) + (*aarch64_scatter_store_trunc<VNx2_NARROW:mode><VNx2_WIDE:mode>_sxtw) + (*aarch64_scatter_store_trunc<VNx2_NARROW:mode><VNx2_WIDE:mode>_uxtw) + (@aarch64_sve_ld1ro<mode>, @aarch64_adr<mode>): Likewise. + (*aarch64_adr_sxtw, *aarch64_adr_uxtw_unspec): Likewise. + (*aarch64_adr_uxtw_and, @aarch64_adr<mode>_shift): Likewise. + (*aarch64_adr<mode>_shift, *aarch64_adr_shift_sxtw): Likewise. + (*aarch64_adr_shift_uxtw, @aarch64_sve_add_<optab><vsi2qi>): Likewise. + (@aarch64_sve_<sve_fp_op><mode>, fold_left_plus_<mode>): Likewise. + (mask_fold_left_plus_<mode>, @aarch64_sve_compact<mode>): Likewise. + * config/aarch64/aarch64-sve2.md (@aarch64_gather_ldnt<mode>) + (@aarch64_gather_ldnt_<ANY_EXTEND:optab><SVE_FULL_SDI:mode> + <SVE_PARTIAL_I:mode>): Likewise. + (@aarch64_sve2_histcnt<mode>, @aarch64_sve2_histseg<mode>): Likewise. + (@aarch64_pred_<SVE2_MATCH:sve_int_op><mode>): Likewise. + (*aarch64_pred_<SVE2_MATCH:sve_int_op><mode>_cc): Likewise. + (*aarch64_pred_<SVE2_MATCH:sve_int_op><mode>_ptest): Likewise. + * config/aarch64/iterators.md (SVE_FP_UNARY_INT): Make FEXPA + depend on TARGET_NON_STREAMING. + (SVE_BFLOAT_TERNARY_LONG): Likewise BFMMLA. + +gcc/testsuite/ + * g++.target/aarch64/sve/aarch64-ssve.exp: New harness. + * g++.target/aarch64/sve/acle/aarch64-sve-acle-asm.exp: Add + -DSTREAMING_COMPATIBLE to the list of options. + * g++.target/aarch64/sve2/acle/aarch64-sve2-acle-asm.exp: Likewise. + * gcc.target/aarch64/sve/acle/aarch64-sve-acle-asm.exp: Likewise. + * gcc.target/aarch64/sve2/acle/aarch64-sve2-acle-asm.exp: Likewise. + Fix pasto in variable name. + * gcc.target/aarch64/sve/acle/asm/test_sve_acle.h: Mark functions + as streaming-compatible if STREAMING_COMPATIBLE is defined. + * gcc.target/aarch64/sve/acle/asm/adda_f16.c: Disable for + streaming-compatible code. + * gcc.target/aarch64/sve/acle/asm/adda_f32.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/adda_f64.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/adrb.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/adrd.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/adrh.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/adrw.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/bfmmla_f32.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/compact_f32.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/compact_f64.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/compact_s32.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/compact_s64.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/compact_u32.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/compact_u64.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/expa_f16.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/expa_f32.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/expa_f64.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/ld1_gather_f32.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/ld1_gather_f64.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/ld1_gather_s32.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/ld1_gather_s64.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/ld1_gather_u32.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/ld1_gather_u64.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/ld1ro_bf16.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/ld1ro_f16.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/ld1ro_f32.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/ld1ro_f64.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/ld1ro_s16.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/ld1ro_s32.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/ld1ro_s64.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/ld1ro_s8.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/ld1ro_u16.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/ld1ro_u32.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/ld1ro_u64.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/ld1ro_u8.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/ld1sb_gather_s32.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/ld1sb_gather_s64.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/ld1sb_gather_u32.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/ld1sb_gather_u64.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/ld1sh_gather_s32.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/ld1sh_gather_s64.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/ld1sh_gather_u32.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/ld1sh_gather_u64.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/ld1sw_gather_s64.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/ld1sw_gather_u64.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/ld1ub_gather_s32.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/ld1ub_gather_s64.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/ld1ub_gather_u32.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/ld1ub_gather_u64.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/ld1uh_gather_s32.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/ld1uh_gather_s64.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/ld1uh_gather_u32.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/ld1uh_gather_u64.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/ld1uw_gather_s64.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/ld1uw_gather_u64.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/ldff1_bf16.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/ldff1_f16.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/ldff1_f32.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/ldff1_f64.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/ldff1_gather_f32.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/ldff1_gather_f64.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/ldff1_gather_s32.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/ldff1_gather_s64.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/ldff1_gather_u32.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/ldff1_gather_u64.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/ldff1_s16.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/ldff1_s32.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/ldff1_s64.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/ldff1_s8.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/ldff1_u16.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/ldff1_u32.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/ldff1_u64.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/ldff1_u8.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/ldff1sb_gather_s32.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/ldff1sb_gather_s64.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/ldff1sb_gather_u32.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/ldff1sb_gather_u64.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/ldff1sb_s16.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/ldff1sb_s32.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/ldff1sb_s64.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/ldff1sb_u16.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/ldff1sb_u32.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/ldff1sb_u64.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/ldff1sh_gather_s32.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/ldff1sh_gather_s64.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/ldff1sh_gather_u32.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/ldff1sh_gather_u64.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/ldff1sh_s32.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/ldff1sh_s64.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/ldff1sh_u32.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/ldff1sh_u64.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/ldff1sw_gather_s64.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/ldff1sw_gather_u64.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/ldff1sw_s64.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/ldff1sw_u64.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/ldff1ub_gather_s32.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/ldff1ub_gather_s64.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/ldff1ub_gather_u32.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/ldff1ub_gather_u64.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/ldff1ub_s16.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/ldff1ub_s32.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/ldff1ub_s64.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/ldff1ub_u16.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/ldff1ub_u32.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/ldff1ub_u64.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/ldff1uh_gather_s32.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/ldff1uh_gather_s64.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/ldff1uh_gather_u32.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/ldff1uh_gather_u64.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/ldff1uh_s32.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/ldff1uh_s64.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/ldff1uh_u32.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/ldff1uh_u64.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/ldff1uw_gather_s64.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/ldff1uw_gather_u64.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/ldff1uw_s64.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/ldff1uw_u64.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/ldnf1_bf16.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/ldnf1_f16.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/ldnf1_f32.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/ldnf1_f64.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/ldnf1_s16.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/ldnf1_s32.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/ldnf1_s64.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/ldnf1_s8.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/ldnf1_u16.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/ldnf1_u32.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/ldnf1_u64.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/ldnf1_u8.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/ldnf1sb_s16.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/ldnf1sb_s32.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/ldnf1sb_s64.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/ldnf1sb_u16.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/ldnf1sb_u32.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/ldnf1sb_u64.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/ldnf1sh_s32.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/ldnf1sh_s64.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/ldnf1sh_u32.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/ldnf1sh_u64.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/ldnf1sw_s64.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/ldnf1sw_u64.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/ldnf1ub_s16.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/ldnf1ub_s32.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/ldnf1ub_s64.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/ldnf1ub_u16.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/ldnf1ub_u32.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/ldnf1ub_u64.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/ldnf1uh_s32.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/ldnf1uh_s64.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/ldnf1uh_u32.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/ldnf1uh_u64.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/ldnf1uw_s64.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/ldnf1uw_u64.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/mmla_f32.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/mmla_f64.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/mmla_s32.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/mmla_u32.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/prfb_gather.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/prfd_gather.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/prfh_gather.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/prfw_gather.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/rdffr_1.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/st1_scatter_f32.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/st1_scatter_f64.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/st1_scatter_s32.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/st1_scatter_s64.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/st1_scatter_u32.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/st1_scatter_u64.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/st1b_scatter_s32.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/st1b_scatter_s64.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/st1b_scatter_u32.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/st1b_scatter_u64.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/st1h_scatter_s32.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/st1h_scatter_s64.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/st1h_scatter_u32.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/st1h_scatter_u64.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/st1w_scatter_s64.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/st1w_scatter_u64.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/tmad_f16.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/tmad_f32.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/tmad_f64.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/tsmul_f16.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/tsmul_f32.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/tsmul_f64.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/tssel_f16.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/tssel_f32.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/tssel_f64.c: Likewise. + * gcc.target/aarch64/sve/acle/asm/usmmla_s32.c: Likewise. + * gcc.target/aarch64/sve2/acle/asm/aesd_u8.c: Likewise. + * gcc.target/aarch64/sve2/acle/asm/aese_u8.c: Likewise. + * gcc.target/aarch64/sve2/acle/asm/aesimc_u8.c: Likewise. + * gcc.target/aarch64/sve2/acle/asm/aesmc_u8.c: Likewise. + * gcc.target/aarch64/sve2/acle/asm/bdep_u16.c: Likewise. + * gcc.target/aarch64/sve2/acle/asm/bdep_u32.c: Likewise. + * gcc.target/aarch64/sve2/acle/asm/bdep_u64.c: Likewise. + * gcc.target/aarch64/sve2/acle/asm/bdep_u8.c: Likewise. + * gcc.target/aarch64/sve2/acle/asm/bext_u16.c: Likewise. + * gcc.target/aarch64/sve2/acle/asm/bext_u32.c: Likewise. + * gcc.target/aarch64/sve2/acle/asm/bext_u64.c: Likewise. + * gcc.target/aarch64/sve2/acle/asm/bext_u8.c: Likewise. + * gcc.target/aarch64/sve2/acle/asm/bgrp_u16.c: Likewise. + * gcc.target/aarch64/sve2/acle/asm/bgrp_u32.c: Likewise. + * gcc.target/aarch64/sve2/acle/asm/bgrp_u64.c: Likewise. + * gcc.target/aarch64/sve2/acle/asm/bgrp_u8.c: Likewise. + * gcc.target/aarch64/sve2/acle/asm/histcnt_s32.c: Likewise. + * gcc.target/aarch64/sve2/acle/asm/histcnt_s64.c: Likewise. + * gcc.target/aarch64/sve2/acle/asm/histcnt_u32.c: Likewise. + * gcc.target/aarch64/sve2/acle/asm/histcnt_u64.c: Likewise. + * gcc.target/aarch64/sve2/acle/asm/histseg_s8.c: Likewise. + * gcc.target/aarch64/sve2/acle/asm/histseg_u8.c: Likewise. + * gcc.target/aarch64/sve2/acle/asm/ldnt1_gather_f32.c: Likewise. + * gcc.target/aarch64/sve2/acle/asm/ldnt1_gather_f64.c: Likewise. + * gcc.target/aarch64/sve2/acle/asm/ldnt1_gather_s32.c: Likewise. + * gcc.target/aarch64/sve2/acle/asm/ldnt1_gather_s64.c: Likewise. + * gcc.target/aarch64/sve2/acle/asm/ldnt1_gather_u32.c: Likewise. + * gcc.target/aarch64/sve2/acle/asm/ldnt1_gather_u64.c: Likewise. + * gcc.target/aarch64/sve2/acle/asm/ldnt1sb_gather_s32.c: Likewise. + * gcc.target/aarch64/sve2/acle/asm/ldnt1sb_gather_s64.c: Likewise. + * gcc.target/aarch64/sve2/acle/asm/ldnt1sb_gather_u32.c: Likewise. + * gcc.target/aarch64/sve2/acle/asm/ldnt1sb_gather_u64.c: Likewise. + * gcc.target/aarch64/sve2/acle/asm/ldnt1sh_gather_s32.c: Likewise. + * gcc.target/aarch64/sve2/acle/asm/ldnt1sh_gather_s64.c: Likewise. + * gcc.target/aarch64/sve2/acle/asm/ldnt1sh_gather_u32.c: Likewise. + * gcc.target/aarch64/sve2/acle/asm/ldnt1sh_gather_u64.c: Likewise. + * gcc.target/aarch64/sve2/acle/asm/ldnt1sw_gather_s64.c: Likewise. + * gcc.target/aarch64/sve2/acle/asm/ldnt1sw_gather_u64.c: Likewise. + * gcc.target/aarch64/sve2/acle/asm/ldnt1ub_gather_s32.c: Likewise. + * gcc.target/aarch64/sve2/acle/asm/ldnt1ub_gather_s64.c: Likewise. + * gcc.target/aarch64/sve2/acle/asm/ldnt1ub_gather_u32.c: Likewise. + * gcc.target/aarch64/sve2/acle/asm/ldnt1ub_gather_u64.c: Likewise. + * gcc.target/aarch64/sve2/acle/asm/ldnt1uh_gather_s32.c: Likewise. + * gcc.target/aarch64/sve2/acle/asm/ldnt1uh_gather_s64.c: Likewise. + * gcc.target/aarch64/sve2/acle/asm/ldnt1uh_gather_u32.c: Likewise. + * gcc.target/aarch64/sve2/acle/asm/ldnt1uh_gather_u64.c: Likewise. + * gcc.target/aarch64/sve2/acle/asm/ldnt1uw_gather_s64.c: Likewise. + * gcc.target/aarch64/sve2/acle/asm/ldnt1uw_gather_u64.c: Likewise. + * gcc.target/aarch64/sve2/acle/asm/match_s16.c: Likewise. + * gcc.target/aarch64/sve2/acle/asm/match_s8.c: Likewise. + * gcc.target/aarch64/sve2/acle/asm/match_u16.c: Likewise. + * gcc.target/aarch64/sve2/acle/asm/match_u8.c: Likewise. + * gcc.target/aarch64/sve2/acle/asm/nmatch_s16.c: Likewise. + * gcc.target/aarch64/sve2/acle/asm/nmatch_s8.c: Likewise. + * gcc.target/aarch64/sve2/acle/asm/nmatch_u16.c: Likewise. + * gcc.target/aarch64/sve2/acle/asm/nmatch_u8.c: Likewise. + * gcc.target/aarch64/sve2/acle/asm/pmullb_pair_u64.c: Likewise. + * gcc.target/aarch64/sve2/acle/asm/pmullt_pair_u64.c: Likewise. + * gcc.target/aarch64/sve2/acle/asm/rax1_s64.c: Likewise. + * gcc.target/aarch64/sve2/acle/asm/rax1_u64.c: Likewise. + * gcc.target/aarch64/sve2/acle/asm/sm4e_u32.c: Likewise. + * gcc.target/aarch64/sve2/acle/asm/sm4ekey_u32.c: Likewise. + * gcc.target/aarch64/sve2/acle/asm/stnt1_scatter_f32.c: Likewise. + * gcc.target/aarch64/sve2/acle/asm/stnt1_scatter_f64.c: Likewise. + * gcc.target/aarch64/sve2/acle/asm/stnt1_scatter_s32.c: Likewise. + * gcc.target/aarch64/sve2/acle/asm/stnt1_scatter_s64.c: Likewise. + * gcc.target/aarch64/sve2/acle/asm/stnt1_scatter_u32.c: Likewise. + * gcc.target/aarch64/sve2/acle/asm/stnt1_scatter_u64.c: Likewise. + * gcc.target/aarch64/sve2/acle/asm/stnt1b_scatter_s32.c: Likewise. + * gcc.target/aarch64/sve2/acle/asm/stnt1b_scatter_s64.c: Likewise. + * gcc.target/aarch64/sve2/acle/asm/stnt1b_scatter_u32.c: Likewise. + * gcc.target/aarch64/sve2/acle/asm/stnt1b_scatter_u64.c: Likewise. + * gcc.target/aarch64/sve2/acle/asm/stnt1h_scatter_s32.c: Likewise. + * gcc.target/aarch64/sve2/acle/asm/stnt1h_scatter_s64.c: Likewise. + * gcc.target/aarch64/sve2/acle/asm/stnt1h_scatter_u32.c: Likewise. + * gcc.target/aarch64/sve2/acle/asm/stnt1h_scatter_u64.c: Likewise. + * gcc.target/aarch64/sve2/acle/asm/stnt1w_scatter_s64.c: Likewise. + * gcc.target/aarch64/sve2/acle/asm/stnt1w_scatter_u64.c: Likewise. +--- + .../aarch64/aarch64-sve-builtins-base.def | 158 +++++---- + .../aarch64/aarch64-sve-builtins-sve2.def | 63 ++-- + gcc/config/aarch64/aarch64-sve-builtins.cc | 7 + + gcc/config/aarch64/aarch64-sve.md | 124 +++---- + gcc/config/aarch64/aarch64-sve2.md | 14 +- + gcc/config/aarch64/aarch64.h | 11 +- + gcc/config/aarch64/iterators.md | 4 +- + .../g++.target/aarch64/sve/aarch64-ssve.exp | 308 ++++++++++++++++++ + .../aarch64/sve/acle/aarch64-sve-acle-asm.exp | 1 + + .../sve2/acle/aarch64-sve2-acle-asm.exp | 1 + + .../aarch64/sve/acle/aarch64-sve-acle-asm.exp | 1 + + .../aarch64/sve/acle/asm/adda_f16.c | 1 + + .../aarch64/sve/acle/asm/adda_f32.c | 1 + + .../aarch64/sve/acle/asm/adda_f64.c | 1 + + .../gcc.target/aarch64/sve/acle/asm/adrb.c | 1 + + .../gcc.target/aarch64/sve/acle/asm/adrd.c | 1 + + .../gcc.target/aarch64/sve/acle/asm/adrh.c | 1 + + .../gcc.target/aarch64/sve/acle/asm/adrw.c | 1 + + .../aarch64/sve/acle/asm/bfmmla_f32.c | 1 + + .../aarch64/sve/acle/asm/compact_f32.c | 1 + + .../aarch64/sve/acle/asm/compact_f64.c | 1 + + .../aarch64/sve/acle/asm/compact_s32.c | 1 + + .../aarch64/sve/acle/asm/compact_s64.c | 1 + + .../aarch64/sve/acle/asm/compact_u32.c | 1 + + .../aarch64/sve/acle/asm/compact_u64.c | 1 + + .../aarch64/sve/acle/asm/expa_f16.c | 1 + + .../aarch64/sve/acle/asm/expa_f32.c | 1 + + .../aarch64/sve/acle/asm/expa_f64.c | 1 + + .../aarch64/sve/acle/asm/ld1_gather_f32.c | 1 + + .../aarch64/sve/acle/asm/ld1_gather_f64.c | 1 + + .../aarch64/sve/acle/asm/ld1_gather_s32.c | 1 + + .../aarch64/sve/acle/asm/ld1_gather_s64.c | 1 + + .../aarch64/sve/acle/asm/ld1_gather_u32.c | 1 + + .../aarch64/sve/acle/asm/ld1_gather_u64.c | 1 + + .../aarch64/sve/acle/asm/ld1ro_bf16.c | 1 + + .../aarch64/sve/acle/asm/ld1ro_f16.c | 1 + + .../aarch64/sve/acle/asm/ld1ro_f32.c | 1 + + .../aarch64/sve/acle/asm/ld1ro_f64.c | 1 + + .../aarch64/sve/acle/asm/ld1ro_s16.c | 1 + + .../aarch64/sve/acle/asm/ld1ro_s32.c | 1 + + .../aarch64/sve/acle/asm/ld1ro_s64.c | 1 + + .../aarch64/sve/acle/asm/ld1ro_s8.c | 1 + + .../aarch64/sve/acle/asm/ld1ro_u16.c | 1 + + .../aarch64/sve/acle/asm/ld1ro_u32.c | 1 + + .../aarch64/sve/acle/asm/ld1ro_u64.c | 1 + + .../aarch64/sve/acle/asm/ld1ro_u8.c | 1 + + .../aarch64/sve/acle/asm/ld1sb_gather_s32.c | 1 + + .../aarch64/sve/acle/asm/ld1sb_gather_s64.c | 1 + + .../aarch64/sve/acle/asm/ld1sb_gather_u32.c | 1 + + .../aarch64/sve/acle/asm/ld1sb_gather_u64.c | 1 + + .../aarch64/sve/acle/asm/ld1sh_gather_s32.c | 1 + + .../aarch64/sve/acle/asm/ld1sh_gather_s64.c | 1 + + .../aarch64/sve/acle/asm/ld1sh_gather_u32.c | 1 + + .../aarch64/sve/acle/asm/ld1sh_gather_u64.c | 1 + + .../aarch64/sve/acle/asm/ld1sw_gather_s64.c | 1 + + .../aarch64/sve/acle/asm/ld1sw_gather_u64.c | 1 + + .../aarch64/sve/acle/asm/ld1ub_gather_s32.c | 1 + + .../aarch64/sve/acle/asm/ld1ub_gather_s64.c | 1 + + .../aarch64/sve/acle/asm/ld1ub_gather_u32.c | 1 + + .../aarch64/sve/acle/asm/ld1ub_gather_u64.c | 1 + + .../aarch64/sve/acle/asm/ld1uh_gather_s32.c | 1 + + .../aarch64/sve/acle/asm/ld1uh_gather_s64.c | 1 + + .../aarch64/sve/acle/asm/ld1uh_gather_u32.c | 1 + + .../aarch64/sve/acle/asm/ld1uh_gather_u64.c | 1 + + .../aarch64/sve/acle/asm/ld1uw_gather_s64.c | 1 + + .../aarch64/sve/acle/asm/ld1uw_gather_u64.c | 1 + + .../aarch64/sve/acle/asm/ldff1_bf16.c | 1 + + .../aarch64/sve/acle/asm/ldff1_f16.c | 1 + + .../aarch64/sve/acle/asm/ldff1_f32.c | 1 + + .../aarch64/sve/acle/asm/ldff1_f64.c | 1 + + .../aarch64/sve/acle/asm/ldff1_gather_f32.c | 1 + + .../aarch64/sve/acle/asm/ldff1_gather_f64.c | 1 + + .../aarch64/sve/acle/asm/ldff1_gather_s32.c | 1 + + .../aarch64/sve/acle/asm/ldff1_gather_s64.c | 1 + + .../aarch64/sve/acle/asm/ldff1_gather_u32.c | 1 + + .../aarch64/sve/acle/asm/ldff1_gather_u64.c | 1 + + .../aarch64/sve/acle/asm/ldff1_s16.c | 1 + + .../aarch64/sve/acle/asm/ldff1_s32.c | 1 + + .../aarch64/sve/acle/asm/ldff1_s64.c | 1 + + .../aarch64/sve/acle/asm/ldff1_s8.c | 1 + + .../aarch64/sve/acle/asm/ldff1_u16.c | 1 + + .../aarch64/sve/acle/asm/ldff1_u32.c | 1 + + .../aarch64/sve/acle/asm/ldff1_u64.c | 1 + + .../aarch64/sve/acle/asm/ldff1_u8.c | 1 + + .../aarch64/sve/acle/asm/ldff1sb_gather_s32.c | 1 + + .../aarch64/sve/acle/asm/ldff1sb_gather_s64.c | 1 + + .../aarch64/sve/acle/asm/ldff1sb_gather_u32.c | 1 + + .../aarch64/sve/acle/asm/ldff1sb_gather_u64.c | 1 + + .../aarch64/sve/acle/asm/ldff1sb_s16.c | 1 + + .../aarch64/sve/acle/asm/ldff1sb_s32.c | 1 + + .../aarch64/sve/acle/asm/ldff1sb_s64.c | 1 + + .../aarch64/sve/acle/asm/ldff1sb_u16.c | 1 + + .../aarch64/sve/acle/asm/ldff1sb_u32.c | 1 + + .../aarch64/sve/acle/asm/ldff1sb_u64.c | 1 + + .../aarch64/sve/acle/asm/ldff1sh_gather_s32.c | 1 + + .../aarch64/sve/acle/asm/ldff1sh_gather_s64.c | 1 + + .../aarch64/sve/acle/asm/ldff1sh_gather_u32.c | 1 + + .../aarch64/sve/acle/asm/ldff1sh_gather_u64.c | 1 + + .../aarch64/sve/acle/asm/ldff1sh_s32.c | 1 + + .../aarch64/sve/acle/asm/ldff1sh_s64.c | 1 + + .../aarch64/sve/acle/asm/ldff1sh_u32.c | 1 + + .../aarch64/sve/acle/asm/ldff1sh_u64.c | 1 + + .../aarch64/sve/acle/asm/ldff1sw_gather_s64.c | 1 + + .../aarch64/sve/acle/asm/ldff1sw_gather_u64.c | 1 + + .../aarch64/sve/acle/asm/ldff1sw_s64.c | 1 + + .../aarch64/sve/acle/asm/ldff1sw_u64.c | 1 + + .../aarch64/sve/acle/asm/ldff1ub_gather_s32.c | 1 + + .../aarch64/sve/acle/asm/ldff1ub_gather_s64.c | 1 + + .../aarch64/sve/acle/asm/ldff1ub_gather_u32.c | 1 + + .../aarch64/sve/acle/asm/ldff1ub_gather_u64.c | 1 + + .../aarch64/sve/acle/asm/ldff1ub_s16.c | 1 + + .../aarch64/sve/acle/asm/ldff1ub_s32.c | 1 + + .../aarch64/sve/acle/asm/ldff1ub_s64.c | 1 + + .../aarch64/sve/acle/asm/ldff1ub_u16.c | 1 + + .../aarch64/sve/acle/asm/ldff1ub_u32.c | 1 + + .../aarch64/sve/acle/asm/ldff1ub_u64.c | 1 + + .../aarch64/sve/acle/asm/ldff1uh_gather_s32.c | 1 + + .../aarch64/sve/acle/asm/ldff1uh_gather_s64.c | 1 + + .../aarch64/sve/acle/asm/ldff1uh_gather_u32.c | 1 + + .../aarch64/sve/acle/asm/ldff1uh_gather_u64.c | 1 + + .../aarch64/sve/acle/asm/ldff1uh_s32.c | 1 + + .../aarch64/sve/acle/asm/ldff1uh_s64.c | 1 + + .../aarch64/sve/acle/asm/ldff1uh_u32.c | 1 + + .../aarch64/sve/acle/asm/ldff1uh_u64.c | 1 + + .../aarch64/sve/acle/asm/ldff1uw_gather_s64.c | 1 + + .../aarch64/sve/acle/asm/ldff1uw_gather_u64.c | 1 + + .../aarch64/sve/acle/asm/ldff1uw_s64.c | 1 + + .../aarch64/sve/acle/asm/ldff1uw_u64.c | 1 + + .../aarch64/sve/acle/asm/ldnf1_bf16.c | 1 + + .../aarch64/sve/acle/asm/ldnf1_f16.c | 1 + + .../aarch64/sve/acle/asm/ldnf1_f32.c | 1 + + .../aarch64/sve/acle/asm/ldnf1_f64.c | 1 + + .../aarch64/sve/acle/asm/ldnf1_s16.c | 1 + + .../aarch64/sve/acle/asm/ldnf1_s32.c | 1 + + .../aarch64/sve/acle/asm/ldnf1_s64.c | 1 + + .../aarch64/sve/acle/asm/ldnf1_s8.c | 1 + + .../aarch64/sve/acle/asm/ldnf1_u16.c | 1 + + .../aarch64/sve/acle/asm/ldnf1_u32.c | 1 + + .../aarch64/sve/acle/asm/ldnf1_u64.c | 1 + + .../aarch64/sve/acle/asm/ldnf1_u8.c | 1 + + .../aarch64/sve/acle/asm/ldnf1sb_s16.c | 1 + + .../aarch64/sve/acle/asm/ldnf1sb_s32.c | 1 + + .../aarch64/sve/acle/asm/ldnf1sb_s64.c | 1 + + .../aarch64/sve/acle/asm/ldnf1sb_u16.c | 1 + + .../aarch64/sve/acle/asm/ldnf1sb_u32.c | 1 + + .../aarch64/sve/acle/asm/ldnf1sb_u64.c | 1 + + .../aarch64/sve/acle/asm/ldnf1sh_s32.c | 1 + + .../aarch64/sve/acle/asm/ldnf1sh_s64.c | 1 + + .../aarch64/sve/acle/asm/ldnf1sh_u32.c | 1 + + .../aarch64/sve/acle/asm/ldnf1sh_u64.c | 1 + + .../aarch64/sve/acle/asm/ldnf1sw_s64.c | 1 + + .../aarch64/sve/acle/asm/ldnf1sw_u64.c | 1 + + .../aarch64/sve/acle/asm/ldnf1ub_s16.c | 1 + + .../aarch64/sve/acle/asm/ldnf1ub_s32.c | 1 + + .../aarch64/sve/acle/asm/ldnf1ub_s64.c | 1 + + .../aarch64/sve/acle/asm/ldnf1ub_u16.c | 1 + + .../aarch64/sve/acle/asm/ldnf1ub_u32.c | 1 + + .../aarch64/sve/acle/asm/ldnf1ub_u64.c | 1 + + .../aarch64/sve/acle/asm/ldnf1uh_s32.c | 1 + + .../aarch64/sve/acle/asm/ldnf1uh_s64.c | 1 + + .../aarch64/sve/acle/asm/ldnf1uh_u32.c | 1 + + .../aarch64/sve/acle/asm/ldnf1uh_u64.c | 1 + + .../aarch64/sve/acle/asm/ldnf1uw_s64.c | 1 + + .../aarch64/sve/acle/asm/ldnf1uw_u64.c | 1 + + .../aarch64/sve/acle/asm/mmla_f32.c | 1 + + .../aarch64/sve/acle/asm/mmla_f64.c | 1 + + .../aarch64/sve/acle/asm/mmla_s32.c | 1 + + .../aarch64/sve/acle/asm/mmla_u32.c | 1 + + .../aarch64/sve/acle/asm/prfb_gather.c | 1 + + .../aarch64/sve/acle/asm/prfd_gather.c | 1 + + .../aarch64/sve/acle/asm/prfh_gather.c | 1 + + .../aarch64/sve/acle/asm/prfw_gather.c | 1 + + .../gcc.target/aarch64/sve/acle/asm/rdffr_1.c | 1 + + .../aarch64/sve/acle/asm/st1_scatter_f32.c | 1 + + .../aarch64/sve/acle/asm/st1_scatter_f64.c | 1 + + .../aarch64/sve/acle/asm/st1_scatter_s32.c | 1 + + .../aarch64/sve/acle/asm/st1_scatter_s64.c | 1 + + .../aarch64/sve/acle/asm/st1_scatter_u32.c | 1 + + .../aarch64/sve/acle/asm/st1_scatter_u64.c | 1 + + .../aarch64/sve/acle/asm/st1b_scatter_s32.c | 1 + + .../aarch64/sve/acle/asm/st1b_scatter_s64.c | 1 + + .../aarch64/sve/acle/asm/st1b_scatter_u32.c | 1 + + .../aarch64/sve/acle/asm/st1b_scatter_u64.c | 1 + + .../aarch64/sve/acle/asm/st1h_scatter_s32.c | 1 + + .../aarch64/sve/acle/asm/st1h_scatter_s64.c | 1 + + .../aarch64/sve/acle/asm/st1h_scatter_u32.c | 1 + + .../aarch64/sve/acle/asm/st1h_scatter_u64.c | 1 + + .../aarch64/sve/acle/asm/st1w_scatter_s64.c | 1 + + .../aarch64/sve/acle/asm/st1w_scatter_u64.c | 1 + + .../aarch64/sve/acle/asm/test_sve_acle.h | 11 +- + .../aarch64/sve/acle/asm/tmad_f16.c | 1 + + .../aarch64/sve/acle/asm/tmad_f32.c | 1 + + .../aarch64/sve/acle/asm/tmad_f64.c | 1 + + .../aarch64/sve/acle/asm/tsmul_f16.c | 1 + + .../aarch64/sve/acle/asm/tsmul_f32.c | 1 + + .../aarch64/sve/acle/asm/tsmul_f64.c | 1 + + .../aarch64/sve/acle/asm/tssel_f16.c | 1 + + .../aarch64/sve/acle/asm/tssel_f32.c | 1 + + .../aarch64/sve/acle/asm/tssel_f64.c | 1 + + .../aarch64/sve/acle/asm/usmmla_s32.c | 1 + + .../sve2/acle/aarch64-sve2-acle-asm.exp | 1 + + .../aarch64/sve2/acle/asm/aesd_u8.c | 1 + + .../aarch64/sve2/acle/asm/aese_u8.c | 1 + + .../aarch64/sve2/acle/asm/aesimc_u8.c | 1 + + .../aarch64/sve2/acle/asm/aesmc_u8.c | 1 + + .../aarch64/sve2/acle/asm/bdep_u16.c | 1 + + .../aarch64/sve2/acle/asm/bdep_u32.c | 1 + + .../aarch64/sve2/acle/asm/bdep_u64.c | 1 + + .../aarch64/sve2/acle/asm/bdep_u8.c | 1 + + .../aarch64/sve2/acle/asm/bext_u16.c | 1 + + .../aarch64/sve2/acle/asm/bext_u32.c | 1 + + .../aarch64/sve2/acle/asm/bext_u64.c | 1 + + .../aarch64/sve2/acle/asm/bext_u8.c | 1 + + .../aarch64/sve2/acle/asm/bgrp_u16.c | 1 + + .../aarch64/sve2/acle/asm/bgrp_u32.c | 1 + + .../aarch64/sve2/acle/asm/bgrp_u64.c | 1 + + .../aarch64/sve2/acle/asm/bgrp_u8.c | 1 + + .../aarch64/sve2/acle/asm/histcnt_s32.c | 1 + + .../aarch64/sve2/acle/asm/histcnt_s64.c | 1 + + .../aarch64/sve2/acle/asm/histcnt_u32.c | 1 + + .../aarch64/sve2/acle/asm/histcnt_u64.c | 1 + + .../aarch64/sve2/acle/asm/histseg_s8.c | 1 + + .../aarch64/sve2/acle/asm/histseg_u8.c | 1 + + .../aarch64/sve2/acle/asm/ldnt1_gather_f32.c | 1 + + .../aarch64/sve2/acle/asm/ldnt1_gather_f64.c | 1 + + .../aarch64/sve2/acle/asm/ldnt1_gather_s32.c | 1 + + .../aarch64/sve2/acle/asm/ldnt1_gather_s64.c | 1 + + .../aarch64/sve2/acle/asm/ldnt1_gather_u32.c | 1 + + .../aarch64/sve2/acle/asm/ldnt1_gather_u64.c | 1 + + .../sve2/acle/asm/ldnt1sb_gather_s32.c | 1 + + .../sve2/acle/asm/ldnt1sb_gather_s64.c | 1 + + .../sve2/acle/asm/ldnt1sb_gather_u32.c | 1 + + .../sve2/acle/asm/ldnt1sb_gather_u64.c | 1 + + .../sve2/acle/asm/ldnt1sh_gather_s32.c | 1 + + .../sve2/acle/asm/ldnt1sh_gather_s64.c | 1 + + .../sve2/acle/asm/ldnt1sh_gather_u32.c | 1 + + .../sve2/acle/asm/ldnt1sh_gather_u64.c | 1 + + .../sve2/acle/asm/ldnt1sw_gather_s64.c | 1 + + .../sve2/acle/asm/ldnt1sw_gather_u64.c | 1 + + .../sve2/acle/asm/ldnt1ub_gather_s32.c | 1 + + .../sve2/acle/asm/ldnt1ub_gather_s64.c | 1 + + .../sve2/acle/asm/ldnt1ub_gather_u32.c | 1 + + .../sve2/acle/asm/ldnt1ub_gather_u64.c | 1 + + .../sve2/acle/asm/ldnt1uh_gather_s32.c | 1 + + .../sve2/acle/asm/ldnt1uh_gather_s64.c | 1 + + .../sve2/acle/asm/ldnt1uh_gather_u32.c | 1 + + .../sve2/acle/asm/ldnt1uh_gather_u64.c | 1 + + .../sve2/acle/asm/ldnt1uw_gather_s64.c | 1 + + .../sve2/acle/asm/ldnt1uw_gather_u64.c | 1 + + .../aarch64/sve2/acle/asm/match_s16.c | 1 + + .../aarch64/sve2/acle/asm/match_s8.c | 1 + + .../aarch64/sve2/acle/asm/match_u16.c | 1 + + .../aarch64/sve2/acle/asm/match_u8.c | 1 + + .../aarch64/sve2/acle/asm/nmatch_s16.c | 1 + + .../aarch64/sve2/acle/asm/nmatch_s8.c | 1 + + .../aarch64/sve2/acle/asm/nmatch_u16.c | 1 + + .../aarch64/sve2/acle/asm/nmatch_u8.c | 1 + + .../aarch64/sve2/acle/asm/pmullb_pair_u64.c | 1 + + .../aarch64/sve2/acle/asm/pmullt_pair_u64.c | 1 + + .../aarch64/sve2/acle/asm/rax1_s64.c | 1 + + .../aarch64/sve2/acle/asm/rax1_u64.c | 1 + + .../aarch64/sve2/acle/asm/sm4e_u32.c | 1 + + .../aarch64/sve2/acle/asm/sm4ekey_u32.c | 1 + + .../aarch64/sve2/acle/asm/stnt1_scatter_f32.c | 1 + + .../aarch64/sve2/acle/asm/stnt1_scatter_f64.c | 1 + + .../aarch64/sve2/acle/asm/stnt1_scatter_s32.c | 1 + + .../aarch64/sve2/acle/asm/stnt1_scatter_s64.c | 1 + + .../aarch64/sve2/acle/asm/stnt1_scatter_u32.c | 1 + + .../aarch64/sve2/acle/asm/stnt1_scatter_u64.c | 1 + + .../sve2/acle/asm/stnt1b_scatter_s32.c | 1 + + .../sve2/acle/asm/stnt1b_scatter_s64.c | 1 + + .../sve2/acle/asm/stnt1b_scatter_u32.c | 1 + + .../sve2/acle/asm/stnt1b_scatter_u64.c | 1 + + .../sve2/acle/asm/stnt1h_scatter_s32.c | 1 + + .../sve2/acle/asm/stnt1h_scatter_s64.c | 1 + + .../sve2/acle/asm/stnt1h_scatter_u32.c | 1 + + .../sve2/acle/asm/stnt1h_scatter_u64.c | 1 + + .../sve2/acle/asm/stnt1w_scatter_s64.c | 1 + + .../sve2/acle/asm/stnt1w_scatter_u64.c | 1 + + 279 files changed, 805 insertions(+), 165 deletions(-) + create mode 100644 gcc/testsuite/g++.target/aarch64/sve/aarch64-ssve.exp + +diff --git a/gcc/config/aarch64/aarch64-sve-builtins-base.def b/gcc/config/aarch64/aarch64-sve-builtins-base.def +index 756469959..e732b4792 100644 +--- a/gcc/config/aarch64/aarch64-sve-builtins-base.def ++++ b/gcc/config/aarch64/aarch64-sve-builtins-base.def +@@ -25,12 +25,7 @@ DEF_SVE_FUNCTION (svacgt, compare_opt_n, all_float, implicit) + DEF_SVE_FUNCTION (svacle, compare_opt_n, all_float, implicit) + DEF_SVE_FUNCTION (svaclt, compare_opt_n, all_float, implicit) + DEF_SVE_FUNCTION (svadd, binary_opt_n, all_arith, mxz) +-DEF_SVE_FUNCTION (svadda, fold_left, all_float, implicit) + DEF_SVE_FUNCTION (svaddv, reduction_wide, all_arith, implicit) +-DEF_SVE_FUNCTION (svadrb, adr_offset, none, none) +-DEF_SVE_FUNCTION (svadrd, adr_index, none, none) +-DEF_SVE_FUNCTION (svadrh, adr_index, none, none) +-DEF_SVE_FUNCTION (svadrw, adr_index, none, none) + DEF_SVE_FUNCTION (svand, binary_opt_n, all_integer, mxz) + DEF_SVE_FUNCTION (svand, binary_opt_n, b, z) + DEF_SVE_FUNCTION (svandv, reduction, all_integer, implicit) +@@ -75,7 +70,6 @@ DEF_SVE_FUNCTION (svcnth_pat, count_pat, none, none) + DEF_SVE_FUNCTION (svcntp, count_pred, all_pred, implicit) + DEF_SVE_FUNCTION (svcntw, count_inherent, none, none) + DEF_SVE_FUNCTION (svcntw_pat, count_pat, none, none) +-DEF_SVE_FUNCTION (svcompact, unary, sd_data, implicit) + DEF_SVE_FUNCTION (svcreate2, create, all_data, none) + DEF_SVE_FUNCTION (svcreate3, create, all_data, none) + DEF_SVE_FUNCTION (svcreate4, create, all_data, none) +@@ -93,7 +87,6 @@ DEF_SVE_FUNCTION (svdupq_lane, binary_uint64_n, all_data, none) + DEF_SVE_FUNCTION (sveor, binary_opt_n, all_integer, mxz) + DEF_SVE_FUNCTION (sveor, binary_opt_n, b, z) + DEF_SVE_FUNCTION (sveorv, reduction, all_integer, implicit) +-DEF_SVE_FUNCTION (svexpa, unary_uint, all_float, none) + DEF_SVE_FUNCTION (svext, ext, all_data, none) + DEF_SVE_FUNCTION (svextb, unary, hsd_integer, mxz) + DEF_SVE_FUNCTION (svexth, unary, sd_integer, mxz) +@@ -106,51 +99,13 @@ DEF_SVE_FUNCTION (svinsr, binary_n, all_data, none) + DEF_SVE_FUNCTION (svlasta, reduction, all_data, implicit) + DEF_SVE_FUNCTION (svlastb, reduction, all_data, implicit) + DEF_SVE_FUNCTION (svld1, load, all_data, implicit) +-DEF_SVE_FUNCTION (svld1_gather, load_gather_sv, sd_data, implicit) +-DEF_SVE_FUNCTION (svld1_gather, load_gather_vs, sd_data, implicit) + DEF_SVE_FUNCTION (svld1rq, load_replicate, all_data, implicit) + DEF_SVE_FUNCTION (svld1sb, load_ext, hsd_integer, implicit) +-DEF_SVE_FUNCTION (svld1sb_gather, load_ext_gather_offset, sd_integer, implicit) + DEF_SVE_FUNCTION (svld1sh, load_ext, sd_integer, implicit) +-DEF_SVE_FUNCTION (svld1sh_gather, load_ext_gather_offset, sd_integer, implicit) +-DEF_SVE_FUNCTION (svld1sh_gather, load_ext_gather_index, sd_integer, implicit) + DEF_SVE_FUNCTION (svld1sw, load_ext, d_integer, implicit) +-DEF_SVE_FUNCTION (svld1sw_gather, load_ext_gather_offset, d_integer, implicit) +-DEF_SVE_FUNCTION (svld1sw_gather, load_ext_gather_index, d_integer, implicit) + DEF_SVE_FUNCTION (svld1ub, load_ext, hsd_integer, implicit) +-DEF_SVE_FUNCTION (svld1ub_gather, load_ext_gather_offset, sd_integer, implicit) + DEF_SVE_FUNCTION (svld1uh, load_ext, sd_integer, implicit) +-DEF_SVE_FUNCTION (svld1uh_gather, load_ext_gather_offset, sd_integer, implicit) +-DEF_SVE_FUNCTION (svld1uh_gather, load_ext_gather_index, sd_integer, implicit) + DEF_SVE_FUNCTION (svld1uw, load_ext, d_integer, implicit) +-DEF_SVE_FUNCTION (svld1uw_gather, load_ext_gather_offset, d_integer, implicit) +-DEF_SVE_FUNCTION (svld1uw_gather, load_ext_gather_index, d_integer, implicit) +-DEF_SVE_FUNCTION (svldff1, load, all_data, implicit) +-DEF_SVE_FUNCTION (svldff1_gather, load_gather_sv, sd_data, implicit) +-DEF_SVE_FUNCTION (svldff1_gather, load_gather_vs, sd_data, implicit) +-DEF_SVE_FUNCTION (svldff1sb, load_ext, hsd_integer, implicit) +-DEF_SVE_FUNCTION (svldff1sb_gather, load_ext_gather_offset, sd_integer, implicit) +-DEF_SVE_FUNCTION (svldff1sh, load_ext, sd_integer, implicit) +-DEF_SVE_FUNCTION (svldff1sh_gather, load_ext_gather_offset, sd_integer, implicit) +-DEF_SVE_FUNCTION (svldff1sh_gather, load_ext_gather_index, sd_integer, implicit) +-DEF_SVE_FUNCTION (svldff1sw, load_ext, d_integer, implicit) +-DEF_SVE_FUNCTION (svldff1sw_gather, load_ext_gather_offset, d_integer, implicit) +-DEF_SVE_FUNCTION (svldff1sw_gather, load_ext_gather_index, d_integer, implicit) +-DEF_SVE_FUNCTION (svldff1ub, load_ext, hsd_integer, implicit) +-DEF_SVE_FUNCTION (svldff1ub_gather, load_ext_gather_offset, sd_integer, implicit) +-DEF_SVE_FUNCTION (svldff1uh, load_ext, sd_integer, implicit) +-DEF_SVE_FUNCTION (svldff1uh_gather, load_ext_gather_offset, sd_integer, implicit) +-DEF_SVE_FUNCTION (svldff1uh_gather, load_ext_gather_index, sd_integer, implicit) +-DEF_SVE_FUNCTION (svldff1uw, load_ext, d_integer, implicit) +-DEF_SVE_FUNCTION (svldff1uw_gather, load_ext_gather_offset, d_integer, implicit) +-DEF_SVE_FUNCTION (svldff1uw_gather, load_ext_gather_index, d_integer, implicit) +-DEF_SVE_FUNCTION (svldnf1, load, all_data, implicit) +-DEF_SVE_FUNCTION (svldnf1sb, load_ext, hsd_integer, implicit) +-DEF_SVE_FUNCTION (svldnf1sh, load_ext, sd_integer, implicit) +-DEF_SVE_FUNCTION (svldnf1sw, load_ext, d_integer, implicit) +-DEF_SVE_FUNCTION (svldnf1ub, load_ext, hsd_integer, implicit) +-DEF_SVE_FUNCTION (svldnf1uh, load_ext, sd_integer, implicit) +-DEF_SVE_FUNCTION (svldnf1uw, load_ext, d_integer, implicit) + DEF_SVE_FUNCTION (svldnt1, load, all_data, implicit) + DEF_SVE_FUNCTION (svld2, load, all_data, implicit) + DEF_SVE_FUNCTION (svld3, load, all_data, implicit) +@@ -173,7 +128,6 @@ DEF_SVE_FUNCTION (svmla, ternary_opt_n, all_arith, mxz) + DEF_SVE_FUNCTION (svmla_lane, ternary_lane, all_float, none) + DEF_SVE_FUNCTION (svmls, ternary_opt_n, all_arith, mxz) + DEF_SVE_FUNCTION (svmls_lane, ternary_lane, all_float, none) +-DEF_SVE_FUNCTION (svmmla, mmla, none, none) + DEF_SVE_FUNCTION (svmov, unary, b, z) + DEF_SVE_FUNCTION (svmsb, ternary_opt_n, all_arith, mxz) + DEF_SVE_FUNCTION (svmul, binary_opt_n, all_arith, mxz) +@@ -197,13 +151,9 @@ DEF_SVE_FUNCTION (svpfalse, inherent_b, b, none) + DEF_SVE_FUNCTION (svpfirst, unary, b, implicit) + DEF_SVE_FUNCTION (svpnext, unary_pred, all_pred, implicit) + DEF_SVE_FUNCTION (svprfb, prefetch, none, implicit) +-DEF_SVE_FUNCTION (svprfb_gather, prefetch_gather_offset, none, implicit) + DEF_SVE_FUNCTION (svprfd, prefetch, none, implicit) +-DEF_SVE_FUNCTION (svprfd_gather, prefetch_gather_index, none, implicit) + DEF_SVE_FUNCTION (svprfh, prefetch, none, implicit) +-DEF_SVE_FUNCTION (svprfh_gather, prefetch_gather_index, none, implicit) + DEF_SVE_FUNCTION (svprfw, prefetch, none, implicit) +-DEF_SVE_FUNCTION (svprfw_gather, prefetch_gather_index, none, implicit) + DEF_SVE_FUNCTION (svptest_any, ptest, none, implicit) + DEF_SVE_FUNCTION (svptest_first, ptest, none, implicit) + DEF_SVE_FUNCTION (svptest_last, ptest, none, implicit) +@@ -244,7 +194,6 @@ DEF_SVE_FUNCTION (svqincw_pat, inc_dec_pat, s_integer, none) + DEF_SVE_FUNCTION (svqincw_pat, inc_dec_pat, sd_integer, none) + DEF_SVE_FUNCTION (svqsub, binary_opt_n, all_integer, none) + DEF_SVE_FUNCTION (svrbit, unary, all_integer, mxz) +-DEF_SVE_FUNCTION (svrdffr, rdffr, none, z_or_none) + DEF_SVE_FUNCTION (svrecpe, unary, all_float, none) + DEF_SVE_FUNCTION (svrecps, binary, all_float, none) + DEF_SVE_FUNCTION (svrecpx, unary, all_float, mxz) +@@ -269,20 +218,12 @@ DEF_SVE_FUNCTION (svsel, binary, b, implicit) + DEF_SVE_FUNCTION (svset2, set, all_data, none) + DEF_SVE_FUNCTION (svset3, set, all_data, none) + DEF_SVE_FUNCTION (svset4, set, all_data, none) +-DEF_SVE_FUNCTION (svsetffr, setffr, none, none) + DEF_SVE_FUNCTION (svsplice, binary, all_data, implicit) + DEF_SVE_FUNCTION (svsqrt, unary, all_float, mxz) + DEF_SVE_FUNCTION (svst1, store, all_data, implicit) +-DEF_SVE_FUNCTION (svst1_scatter, store_scatter_index, sd_data, implicit) +-DEF_SVE_FUNCTION (svst1_scatter, store_scatter_offset, sd_data, implicit) + DEF_SVE_FUNCTION (svst1b, store, hsd_integer, implicit) +-DEF_SVE_FUNCTION (svst1b_scatter, store_scatter_offset, sd_integer, implicit) + DEF_SVE_FUNCTION (svst1h, store, sd_integer, implicit) +-DEF_SVE_FUNCTION (svst1h_scatter, store_scatter_index, sd_integer, implicit) +-DEF_SVE_FUNCTION (svst1h_scatter, store_scatter_offset, sd_integer, implicit) + DEF_SVE_FUNCTION (svst1w, store, d_integer, implicit) +-DEF_SVE_FUNCTION (svst1w_scatter, store_scatter_index, d_integer, implicit) +-DEF_SVE_FUNCTION (svst1w_scatter, store_scatter_offset, d_integer, implicit) + DEF_SVE_FUNCTION (svst2, store, all_data, implicit) + DEF_SVE_FUNCTION (svst3, store, all_data, implicit) + DEF_SVE_FUNCTION (svst4, store, all_data, implicit) +@@ -290,13 +231,10 @@ DEF_SVE_FUNCTION (svstnt1, store, all_data, implicit) + DEF_SVE_FUNCTION (svsub, binary_opt_n, all_arith, mxz) + DEF_SVE_FUNCTION (svsubr, binary_opt_n, all_arith, mxz) + DEF_SVE_FUNCTION (svtbl, binary_uint, all_data, none) +-DEF_SVE_FUNCTION (svtmad, tmad, all_float, none) + DEF_SVE_FUNCTION (svtrn1, binary, all_data, none) + DEF_SVE_FUNCTION (svtrn1, binary_pred, all_pred, none) + DEF_SVE_FUNCTION (svtrn2, binary, all_data, none) + DEF_SVE_FUNCTION (svtrn2, binary_pred, all_pred, none) +-DEF_SVE_FUNCTION (svtsmul, binary_uint, all_float, none) +-DEF_SVE_FUNCTION (svtssel, binary_uint, all_float, none) + DEF_SVE_FUNCTION (svundef, inherent, all_data, none) + DEF_SVE_FUNCTION (svundef2, inherent, all_data, none) + DEF_SVE_FUNCTION (svundef3, inherent, all_data, none) +@@ -311,13 +249,78 @@ DEF_SVE_FUNCTION (svuzp2, binary, all_data, none) + DEF_SVE_FUNCTION (svuzp2, binary_pred, all_pred, none) + DEF_SVE_FUNCTION (svwhilele, compare_scalar, while, none) + DEF_SVE_FUNCTION (svwhilelt, compare_scalar, while, none) +-DEF_SVE_FUNCTION (svwrffr, setffr, none, implicit) + DEF_SVE_FUNCTION (svzip1, binary, all_data, none) + DEF_SVE_FUNCTION (svzip1, binary_pred, all_pred, none) + DEF_SVE_FUNCTION (svzip2, binary, all_data, none) + DEF_SVE_FUNCTION (svzip2, binary_pred, all_pred, none) + #undef REQUIRED_EXTENSIONS + ++#define REQUIRED_EXTENSIONS AARCH64_FL_SVE | AARCH64_FL_SM_OFF ++DEF_SVE_FUNCTION (svadda, fold_left, all_float, implicit) ++DEF_SVE_FUNCTION (svadrb, adr_offset, none, none) ++DEF_SVE_FUNCTION (svadrd, adr_index, none, none) ++DEF_SVE_FUNCTION (svadrh, adr_index, none, none) ++DEF_SVE_FUNCTION (svadrw, adr_index, none, none) ++DEF_SVE_FUNCTION (svcompact, unary, sd_data, implicit) ++DEF_SVE_FUNCTION (svexpa, unary_uint, all_float, none) ++DEF_SVE_FUNCTION (svld1_gather, load_gather_sv, sd_data, implicit) ++DEF_SVE_FUNCTION (svld1_gather, load_gather_vs, sd_data, implicit) ++DEF_SVE_FUNCTION (svld1sb_gather, load_ext_gather_offset, sd_integer, implicit) ++DEF_SVE_FUNCTION (svld1sh_gather, load_ext_gather_offset, sd_integer, implicit) ++DEF_SVE_FUNCTION (svld1sh_gather, load_ext_gather_index, sd_integer, implicit) ++DEF_SVE_FUNCTION (svld1sw_gather, load_ext_gather_offset, d_integer, implicit) ++DEF_SVE_FUNCTION (svld1sw_gather, load_ext_gather_index, d_integer, implicit) ++DEF_SVE_FUNCTION (svld1ub_gather, load_ext_gather_offset, sd_integer, implicit) ++DEF_SVE_FUNCTION (svld1uh_gather, load_ext_gather_offset, sd_integer, implicit) ++DEF_SVE_FUNCTION (svld1uh_gather, load_ext_gather_index, sd_integer, implicit) ++DEF_SVE_FUNCTION (svld1uw_gather, load_ext_gather_offset, d_integer, implicit) ++DEF_SVE_FUNCTION (svld1uw_gather, load_ext_gather_index, d_integer, implicit) ++DEF_SVE_FUNCTION (svldff1, load, all_data, implicit) ++DEF_SVE_FUNCTION (svldff1_gather, load_gather_sv, sd_data, implicit) ++DEF_SVE_FUNCTION (svldff1_gather, load_gather_vs, sd_data, implicit) ++DEF_SVE_FUNCTION (svldff1sb, load_ext, hsd_integer, implicit) ++DEF_SVE_FUNCTION (svldff1sb_gather, load_ext_gather_offset, sd_integer, implicit) ++DEF_SVE_FUNCTION (svldff1sh, load_ext, sd_integer, implicit) ++DEF_SVE_FUNCTION (svldff1sh_gather, load_ext_gather_offset, sd_integer, implicit) ++DEF_SVE_FUNCTION (svldff1sh_gather, load_ext_gather_index, sd_integer, implicit) ++DEF_SVE_FUNCTION (svldff1sw, load_ext, d_integer, implicit) ++DEF_SVE_FUNCTION (svldff1sw_gather, load_ext_gather_offset, d_integer, implicit) ++DEF_SVE_FUNCTION (svldff1sw_gather, load_ext_gather_index, d_integer, implicit) ++DEF_SVE_FUNCTION (svldff1ub, load_ext, hsd_integer, implicit) ++DEF_SVE_FUNCTION (svldff1ub_gather, load_ext_gather_offset, sd_integer, implicit) ++DEF_SVE_FUNCTION (svldff1uh, load_ext, sd_integer, implicit) ++DEF_SVE_FUNCTION (svldff1uh_gather, load_ext_gather_offset, sd_integer, implicit) ++DEF_SVE_FUNCTION (svldff1uh_gather, load_ext_gather_index, sd_integer, implicit) ++DEF_SVE_FUNCTION (svldff1uw, load_ext, d_integer, implicit) ++DEF_SVE_FUNCTION (svldff1uw_gather, load_ext_gather_offset, d_integer, implicit) ++DEF_SVE_FUNCTION (svldff1uw_gather, load_ext_gather_index, d_integer, implicit) ++DEF_SVE_FUNCTION (svldnf1, load, all_data, implicit) ++DEF_SVE_FUNCTION (svldnf1sb, load_ext, hsd_integer, implicit) ++DEF_SVE_FUNCTION (svldnf1sh, load_ext, sd_integer, implicit) ++DEF_SVE_FUNCTION (svldnf1sw, load_ext, d_integer, implicit) ++DEF_SVE_FUNCTION (svldnf1ub, load_ext, hsd_integer, implicit) ++DEF_SVE_FUNCTION (svldnf1uh, load_ext, sd_integer, implicit) ++DEF_SVE_FUNCTION (svldnf1uw, load_ext, d_integer, implicit) ++DEF_SVE_FUNCTION (svmmla, mmla, none, none) ++DEF_SVE_FUNCTION (svprfb_gather, prefetch_gather_offset, none, implicit) ++DEF_SVE_FUNCTION (svprfd_gather, prefetch_gather_index, none, implicit) ++DEF_SVE_FUNCTION (svprfh_gather, prefetch_gather_index, none, implicit) ++DEF_SVE_FUNCTION (svprfw_gather, prefetch_gather_index, none, implicit) ++DEF_SVE_FUNCTION (svrdffr, rdffr, none, z_or_none) ++DEF_SVE_FUNCTION (svsetffr, setffr, none, none) ++DEF_SVE_FUNCTION (svst1_scatter, store_scatter_index, sd_data, implicit) ++DEF_SVE_FUNCTION (svst1_scatter, store_scatter_offset, sd_data, implicit) ++DEF_SVE_FUNCTION (svst1b_scatter, store_scatter_offset, sd_integer, implicit) ++DEF_SVE_FUNCTION (svst1h_scatter, store_scatter_index, sd_integer, implicit) ++DEF_SVE_FUNCTION (svst1h_scatter, store_scatter_offset, sd_integer, implicit) ++DEF_SVE_FUNCTION (svst1w_scatter, store_scatter_index, d_integer, implicit) ++DEF_SVE_FUNCTION (svst1w_scatter, store_scatter_offset, d_integer, implicit) ++DEF_SVE_FUNCTION (svtmad, tmad, all_float, none) ++DEF_SVE_FUNCTION (svtsmul, binary_uint, all_float, none) ++DEF_SVE_FUNCTION (svtssel, binary_uint, all_float, none) ++DEF_SVE_FUNCTION (svwrffr, setffr, none, implicit) ++#undef REQUIRED_EXTENSIONS ++ + #define REQUIRED_EXTENSIONS AARCH64_FL_SVE | AARCH64_FL_BF16 + DEF_SVE_FUNCTION (svbfdot, ternary_bfloat_opt_n, s_float, none) + DEF_SVE_FUNCTION (svbfdot_lane, ternary_bfloat_lanex2, s_float, none) +@@ -325,27 +328,37 @@ DEF_SVE_FUNCTION (svbfmlalb, ternary_bfloat_opt_n, s_float, none) + DEF_SVE_FUNCTION (svbfmlalb_lane, ternary_bfloat_lane, s_float, none) + DEF_SVE_FUNCTION (svbfmlalt, ternary_bfloat_opt_n, s_float, none) + DEF_SVE_FUNCTION (svbfmlalt_lane, ternary_bfloat_lane, s_float, none) +-DEF_SVE_FUNCTION (svbfmmla, ternary_bfloat, s_float, none) + DEF_SVE_FUNCTION (svcvt, unary_convert, cvt_bfloat, mxz) + DEF_SVE_FUNCTION (svcvtnt, unary_convert_narrowt, cvt_bfloat, mx) + #undef REQUIRED_EXTENSIONS + ++#define REQUIRED_EXTENSIONS (AARCH64_FL_SVE \ ++ | AARCH64_FL_BF16 \ ++ | AARCH64_FL_SM_OFF) ++DEF_SVE_FUNCTION (svbfmmla, ternary_bfloat, s_float, none) ++#undef REQUIRED_EXTENSIONS ++ + #define REQUIRED_EXTENSIONS AARCH64_FL_SVE | AARCH64_FL_I8MM +-DEF_SVE_FUNCTION (svmmla, mmla, s_integer, none) +-DEF_SVE_FUNCTION (svusmmla, ternary_uintq_intq, s_signed, none) + DEF_SVE_FUNCTION (svsudot, ternary_intq_uintq_opt_n, s_signed, none) + DEF_SVE_FUNCTION (svsudot_lane, ternary_intq_uintq_lane, s_signed, none) + DEF_SVE_FUNCTION (svusdot, ternary_uintq_intq_opt_n, s_signed, none) + DEF_SVE_FUNCTION (svusdot_lane, ternary_uintq_intq_lane, s_signed, none) + #undef REQUIRED_EXTENSIONS + +-#define REQUIRED_EXTENSIONS AARCH64_FL_SVE | AARCH64_FL_F32MM ++#define REQUIRED_EXTENSIONS (AARCH64_FL_SVE \ ++ | AARCH64_FL_I8MM \ ++ | AARCH64_FL_SM_OFF) ++DEF_SVE_FUNCTION (svmmla, mmla, s_integer, none) ++DEF_SVE_FUNCTION (svusmmla, ternary_uintq_intq, s_signed, none) ++#undef REQUIRED_EXTENSIONS ++ ++#define REQUIRED_EXTENSIONS (AARCH64_FL_SVE \ ++ | AARCH64_FL_F32MM \ ++ | AARCH64_FL_SM_OFF) + DEF_SVE_FUNCTION (svmmla, mmla, s_float, none) + #undef REQUIRED_EXTENSIONS + + #define REQUIRED_EXTENSIONS AARCH64_FL_SVE | AARCH64_FL_F64MM +-DEF_SVE_FUNCTION (svld1ro, load_replicate, all_data, implicit) +-DEF_SVE_FUNCTION (svmmla, mmla, d_float, none) + DEF_SVE_FUNCTION (svtrn1q, binary, all_data, none) + DEF_SVE_FUNCTION (svtrn2q, binary, all_data, none) + DEF_SVE_FUNCTION (svuzp1q, binary, all_data, none) +@@ -353,3 +366,10 @@ DEF_SVE_FUNCTION (svuzp2q, binary, all_data, none) + DEF_SVE_FUNCTION (svzip1q, binary, all_data, none) + DEF_SVE_FUNCTION (svzip2q, binary, all_data, none) + #undef REQUIRED_EXTENSIONS ++ ++#define REQUIRED_EXTENSIONS (AARCH64_FL_SVE \ ++ | AARCH64_FL_F64MM \ ++ | AARCH64_FL_SM_OFF) ++DEF_SVE_FUNCTION (svld1ro, load_replicate, all_data, implicit) ++DEF_SVE_FUNCTION (svmmla, mmla, d_float, none) ++#undef REQUIRED_EXTENSIONS +diff --git a/gcc/config/aarch64/aarch64-sve-builtins-sve2.def b/gcc/config/aarch64/aarch64-sve-builtins-sve2.def +index d5f23a887..3c0a0e072 100644 +--- a/gcc/config/aarch64/aarch64-sve-builtins-sve2.def ++++ b/gcc/config/aarch64/aarch64-sve-builtins-sve2.def +@@ -51,24 +51,9 @@ DEF_SVE_FUNCTION (sveor3, ternary_opt_n, all_integer, none) + DEF_SVE_FUNCTION (sveorbt, ternary_opt_n, all_integer, none) + DEF_SVE_FUNCTION (sveortb, ternary_opt_n, all_integer, none) + DEF_SVE_FUNCTION (svhadd, binary_opt_n, all_integer, mxz) +-DEF_SVE_FUNCTION (svhistcnt, binary_to_uint, sd_integer, z) +-DEF_SVE_FUNCTION (svhistseg, binary_to_uint, b_integer, none) + DEF_SVE_FUNCTION (svhsub, binary_opt_n, all_integer, mxz) + DEF_SVE_FUNCTION (svhsubr, binary_opt_n, all_integer, mxz) +-DEF_SVE_FUNCTION (svldnt1_gather, load_gather_sv_restricted, sd_data, implicit) +-DEF_SVE_FUNCTION (svldnt1_gather, load_gather_vs, sd_data, implicit) +-DEF_SVE_FUNCTION (svldnt1sb_gather, load_ext_gather_offset_restricted, sd_integer, implicit) +-DEF_SVE_FUNCTION (svldnt1sh_gather, load_ext_gather_offset_restricted, sd_integer, implicit) +-DEF_SVE_FUNCTION (svldnt1sh_gather, load_ext_gather_index_restricted, sd_integer, implicit) +-DEF_SVE_FUNCTION (svldnt1sw_gather, load_ext_gather_offset_restricted, d_integer, implicit) +-DEF_SVE_FUNCTION (svldnt1sw_gather, load_ext_gather_index_restricted, d_integer, implicit) +-DEF_SVE_FUNCTION (svldnt1ub_gather, load_ext_gather_offset_restricted, sd_integer, implicit) +-DEF_SVE_FUNCTION (svldnt1uh_gather, load_ext_gather_offset_restricted, sd_integer, implicit) +-DEF_SVE_FUNCTION (svldnt1uh_gather, load_ext_gather_index_restricted, sd_integer, implicit) +-DEF_SVE_FUNCTION (svldnt1uw_gather, load_ext_gather_offset_restricted, d_integer, implicit) +-DEF_SVE_FUNCTION (svldnt1uw_gather, load_ext_gather_index_restricted, d_integer, implicit) + DEF_SVE_FUNCTION (svlogb, unary_to_int, all_float, mxz) +-DEF_SVE_FUNCTION (svmatch, compare, bh_integer, implicit) + DEF_SVE_FUNCTION (svmaxp, binary, all_arith, mx) + DEF_SVE_FUNCTION (svmaxnmp, binary, all_float, mx) + DEF_SVE_FUNCTION (svmla_lane, ternary_lane, hsd_integer, none) +@@ -91,7 +76,6 @@ DEF_SVE_FUNCTION (svmullb_lane, binary_long_lane, sd_integer, none) + DEF_SVE_FUNCTION (svmullt, binary_long_opt_n, hsd_integer, none) + DEF_SVE_FUNCTION (svmullt_lane, binary_long_lane, sd_integer, none) + DEF_SVE_FUNCTION (svnbsl, ternary_opt_n, all_integer, none) +-DEF_SVE_FUNCTION (svnmatch, compare, bh_integer, implicit) + DEF_SVE_FUNCTION (svpmul, binary_opt_n, b_unsigned, none) + DEF_SVE_FUNCTION (svpmullb, binary_long_opt_n, hd_unsigned, none) + DEF_SVE_FUNCTION (svpmullb_pair, binary_opt_n, bs_unsigned, none) +@@ -164,13 +148,6 @@ DEF_SVE_FUNCTION (svsli, ternary_shift_left_imm, all_integer, none) + DEF_SVE_FUNCTION (svsqadd, binary_int_opt_n, all_unsigned, mxz) + DEF_SVE_FUNCTION (svsra, ternary_shift_right_imm, all_integer, none) + DEF_SVE_FUNCTION (svsri, ternary_shift_right_imm, all_integer, none) +-DEF_SVE_FUNCTION (svstnt1_scatter, store_scatter_index_restricted, sd_data, implicit) +-DEF_SVE_FUNCTION (svstnt1_scatter, store_scatter_offset_restricted, sd_data, implicit) +-DEF_SVE_FUNCTION (svstnt1b_scatter, store_scatter_offset_restricted, sd_integer, implicit) +-DEF_SVE_FUNCTION (svstnt1h_scatter, store_scatter_index_restricted, sd_integer, implicit) +-DEF_SVE_FUNCTION (svstnt1h_scatter, store_scatter_offset_restricted, sd_integer, implicit) +-DEF_SVE_FUNCTION (svstnt1w_scatter, store_scatter_index_restricted, d_integer, implicit) +-DEF_SVE_FUNCTION (svstnt1w_scatter, store_scatter_offset_restricted, d_integer, implicit) + DEF_SVE_FUNCTION (svsubhnb, binary_narrowb_opt_n, hsd_integer, none) + DEF_SVE_FUNCTION (svsubhnt, binary_narrowt_opt_n, hsd_integer, none) + DEF_SVE_FUNCTION (svsublb, binary_long_opt_n, hsd_integer, none) +@@ -191,7 +168,36 @@ DEF_SVE_FUNCTION (svxar, ternary_shift_right_imm, all_integer, none) + + #define REQUIRED_EXTENSIONS (AARCH64_FL_SVE \ + | AARCH64_FL_SVE2 \ +- | AARCH64_FL_SVE2_AES) ++ | AARCH64_FL_SM_OFF) ++DEF_SVE_FUNCTION (svhistcnt, binary_to_uint, sd_integer, z) ++DEF_SVE_FUNCTION (svhistseg, binary_to_uint, b_integer, none) ++DEF_SVE_FUNCTION (svldnt1_gather, load_gather_sv_restricted, sd_data, implicit) ++DEF_SVE_FUNCTION (svldnt1_gather, load_gather_vs, sd_data, implicit) ++DEF_SVE_FUNCTION (svldnt1sb_gather, load_ext_gather_offset_restricted, sd_integer, implicit) ++DEF_SVE_FUNCTION (svldnt1sh_gather, load_ext_gather_offset_restricted, sd_integer, implicit) ++DEF_SVE_FUNCTION (svldnt1sh_gather, load_ext_gather_index_restricted, sd_integer, implicit) ++DEF_SVE_FUNCTION (svldnt1sw_gather, load_ext_gather_offset_restricted, d_integer, implicit) ++DEF_SVE_FUNCTION (svldnt1sw_gather, load_ext_gather_index_restricted, d_integer, implicit) ++DEF_SVE_FUNCTION (svldnt1ub_gather, load_ext_gather_offset_restricted, sd_integer, implicit) ++DEF_SVE_FUNCTION (svldnt1uh_gather, load_ext_gather_offset_restricted, sd_integer, implicit) ++DEF_SVE_FUNCTION (svldnt1uh_gather, load_ext_gather_index_restricted, sd_integer, implicit) ++DEF_SVE_FUNCTION (svldnt1uw_gather, load_ext_gather_offset_restricted, d_integer, implicit) ++DEF_SVE_FUNCTION (svldnt1uw_gather, load_ext_gather_index_restricted, d_integer, implicit) ++DEF_SVE_FUNCTION (svmatch, compare, bh_integer, implicit) ++DEF_SVE_FUNCTION (svnmatch, compare, bh_integer, implicit) ++DEF_SVE_FUNCTION (svstnt1_scatter, store_scatter_index_restricted, sd_data, implicit) ++DEF_SVE_FUNCTION (svstnt1_scatter, store_scatter_offset_restricted, sd_data, implicit) ++DEF_SVE_FUNCTION (svstnt1b_scatter, store_scatter_offset_restricted, sd_integer, implicit) ++DEF_SVE_FUNCTION (svstnt1h_scatter, store_scatter_index_restricted, sd_integer, implicit) ++DEF_SVE_FUNCTION (svstnt1h_scatter, store_scatter_offset_restricted, sd_integer, implicit) ++DEF_SVE_FUNCTION (svstnt1w_scatter, store_scatter_index_restricted, d_integer, implicit) ++DEF_SVE_FUNCTION (svstnt1w_scatter, store_scatter_offset_restricted, d_integer, implicit) ++#undef REQUIRED_EXTENSIONS ++ ++#define REQUIRED_EXTENSIONS (AARCH64_FL_SVE \ ++ | AARCH64_FL_SVE2 \ ++ | AARCH64_FL_SVE2_AES \ ++ | AARCH64_FL_SM_OFF) + DEF_SVE_FUNCTION (svaesd, binary, b_unsigned, none) + DEF_SVE_FUNCTION (svaese, binary, b_unsigned, none) + DEF_SVE_FUNCTION (svaesmc, unary, b_unsigned, none) +@@ -202,7 +208,8 @@ DEF_SVE_FUNCTION (svpmullt_pair, binary_opt_n, d_unsigned, none) + + #define REQUIRED_EXTENSIONS (AARCH64_FL_SVE \ + | AARCH64_FL_SVE2 \ +- | AARCH64_FL_SVE2_BITPERM) ++ | AARCH64_FL_SVE2_BITPERM \ ++ | AARCH64_FL_SM_OFF) + DEF_SVE_FUNCTION (svbdep, binary_opt_n, all_unsigned, none) + DEF_SVE_FUNCTION (svbext, binary_opt_n, all_unsigned, none) + DEF_SVE_FUNCTION (svbgrp, binary_opt_n, all_unsigned, none) +@@ -210,13 +217,15 @@ DEF_SVE_FUNCTION (svbgrp, binary_opt_n, all_unsigned, none) + + #define REQUIRED_EXTENSIONS (AARCH64_FL_SVE \ + | AARCH64_FL_SVE2 \ +- | AARCH64_FL_SVE2_SHA3) ++ | AARCH64_FL_SVE2_SHA3 \ ++ | AARCH64_FL_SM_OFF) + DEF_SVE_FUNCTION (svrax1, binary, d_integer, none) + #undef REQUIRED_EXTENSIONS + + #define REQUIRED_EXTENSIONS (AARCH64_FL_SVE \ + | AARCH64_FL_SVE2 \ +- | AARCH64_FL_SVE2_SM4) ++ | AARCH64_FL_SVE2_SM4 \ ++ | AARCH64_FL_SM_OFF) + DEF_SVE_FUNCTION (svsm4e, binary, s_unsigned, none) + DEF_SVE_FUNCTION (svsm4ekey, binary, s_unsigned, none) + #undef REQUIRED_EXTENSIONS +diff --git a/gcc/config/aarch64/aarch64-sve-builtins.cc b/gcc/config/aarch64/aarch64-sve-builtins.cc +index c439f2e8a..5f3a2baea 100644 +--- a/gcc/config/aarch64/aarch64-sve-builtins.cc ++++ b/gcc/config/aarch64/aarch64-sve-builtins.cc +@@ -738,6 +738,13 @@ check_required_extensions (location_t location, tree fndecl, + if (missing_extensions == 0) + return check_required_registers (location, fndecl); + ++ if (missing_extensions & AARCH64_FL_SM_OFF) ++ { ++ error_at (location, "ACLE function %qD cannot be called when" ++ " SME streaming mode is enabled", fndecl); ++ return false; ++ } ++ + static const struct { + aarch64_feature_flags flag; + const char *name; +diff --git a/gcc/config/aarch64/aarch64-sve.md b/gcc/config/aarch64/aarch64-sve.md +index 28b73d807..a8a5dc3a2 100644 +--- a/gcc/config/aarch64/aarch64-sve.md ++++ b/gcc/config/aarch64/aarch64-sve.md +@@ -1082,7 +1082,7 @@ + (match_operand:VNx16BI 0 "aarch64_simd_reg_or_minus_one" "Dm, Upa")) + (set (reg:VNx16BI FFRT_REGNUM) + (unspec:VNx16BI (match_dup 0) UNSPEC_WRFFR)) +- "TARGET_SVE" ++ "TARGET_SVE && TARGET_NON_STREAMING" + "@ + setffr + wrffr\t%0.b" +@@ -1123,7 +1123,7 @@ + (define_insn "aarch64_rdffr" + (set (match_operand:VNx16BI 0 "register_operand" "=Upa") + (reg:VNx16BI FFRT_REGNUM)) +- "TARGET_SVE" ++ "TARGET_SVE && TARGET_NON_STREAMING" + "rdffr\t%0.b" + ) + +@@ -1133,7 +1133,7 @@ + (and:VNx16BI + (reg:VNx16BI FFRT_REGNUM) + (match_operand:VNx16BI 1 "register_operand" "Upa"))) +- "TARGET_SVE" ++ "TARGET_SVE && TARGET_NON_STREAMING" + "rdffr\t%0.b, %1/z" + ) + +@@ -1149,7 +1149,7 @@ + (match_dup 1)) + UNSPEC_PTEST)) + (clobber (match_scratch:VNx16BI 0 "=Upa")) +- "TARGET_SVE" ++ "TARGET_SVE && TARGET_NON_STREAMING" + "rdffrs\t%0.b, %1/z" + ) + +@@ -1163,7 +1163,7 @@ + (reg:VNx16BI FFRT_REGNUM) + UNSPEC_PTEST)) + (clobber (match_scratch:VNx16BI 0 "=Upa")) +- "TARGET_SVE" ++ "TARGET_SVE && TARGET_NON_STREAMING" + "rdffrs\t%0.b, %1/z" + ) + +@@ -1182,7 +1182,7 @@ + (and:VNx16BI + (reg:VNx16BI FFRT_REGNUM) + (match_dup 1))) +- "TARGET_SVE" ++ "TARGET_SVE && TARGET_NON_STREAMING" + "rdffrs\t%0.b, %1/z" + ) + +@@ -1197,7 +1197,7 @@ + UNSPEC_PTEST)) + (set (match_operand:VNx16BI 0 "register_operand" "=Upa") + (reg:VNx16BI FFRT_REGNUM)) +- "TARGET_SVE" ++ "TARGET_SVE && TARGET_NON_STREAMING" + "rdffrs\t%0.b, %1/z" + ) + +@@ -1327,7 +1327,7 @@ + (match_operand:SVE_FULL 1 "aarch64_sve_ld<fn>f1_operand" "Ut<fn>") + (reg:VNx16BI FFRT_REGNUM) + SVE_LDFF1_LDNF1)) +- "TARGET_SVE" ++ "TARGET_SVE && TARGET_NON_STREAMING" + "ld<fn>f1<Vesize>\t%0.<Vetype>, %2/z, %1" + ) + +@@ -1361,7 +1361,9 @@ + (reg:VNx16BI FFRT_REGNUM) + SVE_LDFF1_LDNF1)) + UNSPEC_PRED_X)) +- "TARGET_SVE && (~<SVE_HSDI:narrower_mask> & <SVE_PARTIAL_I:self_mask>) == 0" ++ "TARGET_SVE ++ && TARGET_NON_STREAMING ++ && (~<SVE_HSDI:narrower_mask> & <SVE_PARTIAL_I:self_mask>) == 0" + "ld<fn>f1<ANY_EXTEND:s><SVE_PARTIAL_I:Vesize>\t%0.<SVE_HSDI:Vctype>, %2/z, %1" + "&& !CONSTANT_P (operands3)" + { +@@ -1409,7 +1411,7 @@ + (match_operand:DI 4 "aarch64_gather_scale_operand_<Vesize>") + (mem:BLK (scratch)) + UNSPEC_LD1_GATHER)) +- "TARGET_SVE" ++ "TARGET_SVE && TARGET_NON_STREAMING" + { + operands5 = aarch64_ptrue_reg (<VPRED>mode); + } +@@ -1427,7 +1429,7 @@ + (match_operand:DI 4 "aarch64_gather_scale_operand_<Vesize>" "Ui1, Ui1, Ui1, Ui1, i, i") + (mem:BLK (scratch)) + UNSPEC_LD1_GATHER)) +- "TARGET_SVE" ++ "TARGET_SVE && TARGET_NON_STREAMING" + "@ + ld1<Vesize>\t%0.s, %5/z, %2.s + ld1<Vesize>\t%0.s, %5/z, %2.s, #%1 +@@ -1449,7 +1451,7 @@ + (match_operand:DI 4 "aarch64_gather_scale_operand_<Vesize>" "Ui1, Ui1, Ui1, i") + (mem:BLK (scratch)) + UNSPEC_LD1_GATHER)) +- "TARGET_SVE" ++ "TARGET_SVE && TARGET_NON_STREAMING" + "@ + ld1<Vesize>\t%0.d, %5/z, %2.d + ld1<Vesize>\t%0.d, %5/z, %2.d, #%1 +@@ -1472,7 +1474,7 @@ + (match_operand:DI 4 "aarch64_gather_scale_operand_<Vesize>" "Ui1, i") + (mem:BLK (scratch)) + UNSPEC_LD1_GATHER)) +- "TARGET_SVE" ++ "TARGET_SVE && TARGET_NON_STREAMING" + "@ + ld1<Vesize>\t%0.d, %5/z, %1, %2.d, <su>xtw + ld1<Vesize>\t%0.d, %5/z, %1, %2.d, <su>xtw %p4" +@@ -1499,7 +1501,7 @@ + (match_operand:DI 4 "aarch64_gather_scale_operand_<Vesize>" "Ui1, i") + (mem:BLK (scratch)) + UNSPEC_LD1_GATHER)) +- "TARGET_SVE" ++ "TARGET_SVE && TARGET_NON_STREAMING" + "@ + ld1<Vesize>\t%0.d, %5/z, %1, %2.d, sxtw + ld1<Vesize>\t%0.d, %5/z, %1, %2.d, sxtw %p4" +@@ -1523,7 +1525,7 @@ + (match_operand:DI 4 "aarch64_gather_scale_operand_<Vesize>" "Ui1, i") + (mem:BLK (scratch)) + UNSPEC_LD1_GATHER)) +- "TARGET_SVE" ++ "TARGET_SVE && TARGET_NON_STREAMING" + "@ + ld1<Vesize>\t%0.d, %5/z, %1, %2.d, uxtw + ld1<Vesize>\t%0.d, %5/z, %1, %2.d, uxtw %p4" +@@ -1557,7 +1559,9 @@ + (mem:BLK (scratch)) + UNSPEC_LD1_GATHER)) + UNSPEC_PRED_X)) +- "TARGET_SVE && (~<SVE_4HSI:narrower_mask> & <SVE_4BHI:self_mask>) == 0" ++ "TARGET_SVE ++ && TARGET_NON_STREAMING ++ && (~<SVE_4HSI:narrower_mask> & <SVE_4BHI:self_mask>) == 0" + "@ + ld1<ANY_EXTEND:s><SVE_4BHI:Vesize>\t%0.s, %5/z, %2.s + ld1<ANY_EXTEND:s><SVE_4BHI:Vesize>\t%0.s, %5/z, %2.s, #%1 +@@ -1587,7 +1591,9 @@ + (mem:BLK (scratch)) + UNSPEC_LD1_GATHER)) + UNSPEC_PRED_X)) +- "TARGET_SVE && (~<SVE_2HSDI:narrower_mask> & <SVE_2BHSI:self_mask>) == 0" ++ "TARGET_SVE ++ && TARGET_NON_STREAMING ++ && (~<SVE_2HSDI:narrower_mask> & <SVE_2BHSI:self_mask>) == 0" + "@ + ld1<ANY_EXTEND:s><SVE_2BHSI:Vesize>\t%0.d, %5/z, %2.d + ld1<ANY_EXTEND:s><SVE_2BHSI:Vesize>\t%0.d, %5/z, %2.d, #%1 +@@ -1618,7 +1624,9 @@ + (mem:BLK (scratch)) + UNSPEC_LD1_GATHER)) + UNSPEC_PRED_X)) +- "TARGET_SVE && (~<SVE_2HSDI:narrower_mask> & <SVE_2BHSI:self_mask>) == 0" ++ "TARGET_SVE ++ && TARGET_NON_STREAMING ++ && (~<SVE_2HSDI:narrower_mask> & <SVE_2BHSI:self_mask>) == 0" + "@ + ld1<ANY_EXTEND:s><SVE_2BHSI:Vesize>\t%0.d, %5/z, %1, %2.d, <ANY_EXTEND2:su>xtw + ld1<ANY_EXTEND:s><SVE_2BHSI:Vesize>\t%0.d, %5/z, %1, %2.d, <ANY_EXTEND2:su>xtw %p4" +@@ -1650,7 +1658,9 @@ + (mem:BLK (scratch)) + UNSPEC_LD1_GATHER)) + UNSPEC_PRED_X)) +- "TARGET_SVE && (~<SVE_2HSDI:narrower_mask> & <SVE_2BHSI:self_mask>) == 0" ++ "TARGET_SVE ++ && TARGET_NON_STREAMING ++ && (~<SVE_2HSDI:narrower_mask> & <SVE_2BHSI:self_mask>) == 0" + "@ + ld1<ANY_EXTEND:s><SVE_2BHSI:Vesize>\t%0.d, %5/z, %1, %2.d, sxtw + ld1<ANY_EXTEND:s><SVE_2BHSI:Vesize>\t%0.d, %5/z, %1, %2.d, sxtw %p4" +@@ -1679,7 +1689,9 @@ + (mem:BLK (scratch)) + UNSPEC_LD1_GATHER)) + UNSPEC_PRED_X)) +- "TARGET_SVE && (~<SVE_2HSDI:narrower_mask> & <SVE_2BHSI:self_mask>) == 0" ++ "TARGET_SVE ++ && TARGET_NON_STREAMING ++ && (~<SVE_2HSDI:narrower_mask> & <SVE_2BHSI:self_mask>) == 0" + "@ + ld1<ANY_EXTEND:s><SVE_2BHSI:Vesize>\t%0.d, %5/z, %1, %2.d, uxtw + ld1<ANY_EXTEND:s><SVE_2BHSI:Vesize>\t%0.d, %5/z, %1, %2.d, uxtw %p4" +@@ -1710,7 +1722,7 @@ + (mem:BLK (scratch)) + (reg:VNx16BI FFRT_REGNUM) + UNSPEC_LDFF1_GATHER)) +- "TARGET_SVE" ++ "TARGET_SVE && TARGET_NON_STREAMING" + "@ + ldff1w\t%0.s, %5/z, %2.s + ldff1w\t%0.s, %5/z, %2.s, #%1 +@@ -1733,7 +1745,7 @@ + (mem:BLK (scratch)) + (reg:VNx16BI FFRT_REGNUM) + UNSPEC_LDFF1_GATHER)) +- "TARGET_SVE" ++ "TARGET_SVE && TARGET_NON_STREAMING" + "@ + ldff1d\t%0.d, %5/z, %2.d + ldff1d\t%0.d, %5/z, %2.d, #%1 +@@ -1758,7 +1770,7 @@ + (mem:BLK (scratch)) + (reg:VNx16BI FFRT_REGNUM) + UNSPEC_LDFF1_GATHER)) +- "TARGET_SVE" ++ "TARGET_SVE && TARGET_NON_STREAMING" + "@ + ldff1d\t%0.d, %5/z, %1, %2.d, sxtw + ldff1d\t%0.d, %5/z, %1, %2.d, sxtw %p4" +@@ -1782,7 +1794,7 @@ + (mem:BLK (scratch)) + (reg:VNx16BI FFRT_REGNUM) + UNSPEC_LDFF1_GATHER)) +- "TARGET_SVE" ++ "TARGET_SVE && TARGET_NON_STREAMING" + "@ + ldff1d\t%0.d, %5/z, %1, %2.d, uxtw + ldff1d\t%0.d, %5/z, %1, %2.d, uxtw %p4" +@@ -1817,7 +1829,7 @@ + (reg:VNx16BI FFRT_REGNUM) + UNSPEC_LDFF1_GATHER)) + UNSPEC_PRED_X)) +- "TARGET_SVE" ++ "TARGET_SVE && TARGET_NON_STREAMING" + "@ + ldff1<ANY_EXTEND:s><VNx4_NARROW:Vesize>\t%0.s, %5/z, %2.s + ldff1<ANY_EXTEND:s><VNx4_NARROW:Vesize>\t%0.s, %5/z, %2.s, #%1 +@@ -1848,7 +1860,7 @@ + (reg:VNx16BI FFRT_REGNUM) + UNSPEC_LDFF1_GATHER)) + UNSPEC_PRED_X)) +- "TARGET_SVE" ++ "TARGET_SVE && TARGET_NON_STREAMING" + "@ + ldff1<ANY_EXTEND:s><VNx2_NARROW:Vesize>\t%0.d, %5/z, %2.d + ldff1<ANY_EXTEND:s><VNx2_NARROW:Vesize>\t%0.d, %5/z, %2.d, #%1 +@@ -1881,7 +1893,7 @@ + (reg:VNx16BI FFRT_REGNUM) + UNSPEC_LDFF1_GATHER)) + UNSPEC_PRED_X)) +- "TARGET_SVE" ++ "TARGET_SVE && TARGET_NON_STREAMING" + "@ + ldff1<ANY_EXTEND:s><VNx2_NARROW:Vesize>\t%0.d, %5/z, %1, %2.d, sxtw + ldff1<ANY_EXTEND:s><VNx2_NARROW:Vesize>\t%0.d, %5/z, %1, %2.d, sxtw %p4" +@@ -1910,7 +1922,7 @@ + (reg:VNx16BI FFRT_REGNUM) + UNSPEC_LDFF1_GATHER)) + UNSPEC_PRED_X)) +- "TARGET_SVE" ++ "TARGET_SVE && TARGET_NON_STREAMING" + "@ + ldff1<ANY_EXTEND:s><VNx2_NARROW:Vesize>\t%0.d, %5/z, %1, %2.d, uxtw + ldff1<ANY_EXTEND:s><VNx2_NARROW:Vesize>\t%0.d, %5/z, %1, %2.d, uxtw %p4" +@@ -1985,7 +1997,7 @@ + UNSPEC_SVE_PREFETCH_GATHER) + (match_operand:DI 7 "const_int_operand") + (match_operand:DI 8 "const_int_operand")) +- "TARGET_SVE" ++ "TARGET_SVE && TARGET_NON_STREAMING" + { + static const char *const insns2 = { + "prf<SVE_FULL_I:Vesize>", "%0, %2.s", +@@ -2014,7 +2026,7 @@ + UNSPEC_SVE_PREFETCH_GATHER) + (match_operand:DI 7 "const_int_operand") + (match_operand:DI 8 "const_int_operand")) +- "TARGET_SVE" ++ "TARGET_SVE && TARGET_NON_STREAMING" + { + static const char *const insns2 = { + "prf<SVE_FULL_I:Vesize>", "%0, %2.d", +@@ -2045,7 +2057,7 @@ + UNSPEC_SVE_PREFETCH_GATHER) + (match_operand:DI 7 "const_int_operand") + (match_operand:DI 8 "const_int_operand")) +- "TARGET_SVE" ++ "TARGET_SVE && TARGET_NON_STREAMING" + { + static const char *const insns2 = { + "prfb", "%0, %1, %2.d, sxtw", +@@ -2075,7 +2087,7 @@ + UNSPEC_SVE_PREFETCH_GATHER) + (match_operand:DI 7 "const_int_operand") + (match_operand:DI 8 "const_int_operand")) +- "TARGET_SVE" ++ "TARGET_SVE && TARGET_NON_STREAMING" + { + static const char *const insns2 = { + "prfb", "%0, %1, %2.d, uxtw", +@@ -2242,7 +2254,7 @@ + (match_operand:DI 3 "aarch64_gather_scale_operand_<Vesize>") + (match_operand:SVE_24 4 "register_operand") + UNSPEC_ST1_SCATTER)) +- "TARGET_SVE" ++ "TARGET_SVE && TARGET_NON_STREAMING" + { + operands5 = aarch64_ptrue_reg (<VPRED>mode); + } +@@ -2260,7 +2272,7 @@ + (match_operand:DI 3 "aarch64_gather_scale_operand_<Vesize>" "Ui1, Ui1, Ui1, Ui1, i, i") + (match_operand:SVE_4 4 "register_operand" "w, w, w, w, w, w") + UNSPEC_ST1_SCATTER)) +- "TARGET_SVE" ++ "TARGET_SVE && TARGET_NON_STREAMING" + "@ + st1<Vesize>\t%4.s, %5, %1.s + st1<Vesize>\t%4.s, %5, %1.s, #%0 +@@ -2282,7 +2294,7 @@ + (match_operand:DI 3 "aarch64_gather_scale_operand_<Vesize>" "Ui1, Ui1, Ui1, i") + (match_operand:SVE_2 4 "register_operand" "w, w, w, w") + UNSPEC_ST1_SCATTER)) +- "TARGET_SVE" ++ "TARGET_SVE && TARGET_NON_STREAMING" + "@ + st1<Vesize>\t%4.d, %5, %1.d + st1<Vesize>\t%4.d, %5, %1.d, #%0 +@@ -2305,7 +2317,7 @@ + (match_operand:DI 3 "aarch64_gather_scale_operand_<Vesize>" "Ui1, i") + (match_operand:SVE_2 4 "register_operand" "w, w") + UNSPEC_ST1_SCATTER)) +- "TARGET_SVE" ++ "TARGET_SVE && TARGET_NON_STREAMING" + "@ + st1<Vesize>\t%4.d, %5, %0, %1.d, <su>xtw + st1<Vesize>\t%4.d, %5, %0, %1.d, <su>xtw %p3" +@@ -2332,7 +2344,7 @@ + (match_operand:DI 3 "aarch64_gather_scale_operand_<Vesize>" "Ui1, i") + (match_operand:SVE_2 4 "register_operand" "w, w") + UNSPEC_ST1_SCATTER)) +- "TARGET_SVE" ++ "TARGET_SVE && TARGET_NON_STREAMING" + "@ + st1<Vesize>\t%4.d, %5, %0, %1.d, sxtw + st1<Vesize>\t%4.d, %5, %0, %1.d, sxtw %p3" +@@ -2356,7 +2368,7 @@ + (match_operand:DI 3 "aarch64_gather_scale_operand_<Vesize>" "Ui1, i") + (match_operand:SVE_2 4 "register_operand" "w, w") + UNSPEC_ST1_SCATTER)) +- "TARGET_SVE" ++ "TARGET_SVE && TARGET_NON_STREAMING" + "@ + st1<Vesize>\t%4.d, %5, %0, %1.d, uxtw + st1<Vesize>\t%4.d, %5, %0, %1.d, uxtw %p3" +@@ -2384,7 +2396,7 @@ + (truncate:VNx4_NARROW + (match_operand:VNx4_WIDE 4 "register_operand" "w, w, w, w, w, w")) + UNSPEC_ST1_SCATTER)) +- "TARGET_SVE" ++ "TARGET_SVE && TARGET_NON_STREAMING" + "@ + st1<VNx4_NARROW:Vesize>\t%4.s, %5, %1.s + st1<VNx4_NARROW:Vesize>\t%4.s, %5, %1.s, #%0 +@@ -2407,7 +2419,7 @@ + (truncate:VNx2_NARROW + (match_operand:VNx2_WIDE 4 "register_operand" "w, w, w, w")) + UNSPEC_ST1_SCATTER)) +- "TARGET_SVE" ++ "TARGET_SVE && TARGET_NON_STREAMING" + "@ + st1<VNx2_NARROW:Vesize>\t%4.d, %5, %1.d + st1<VNx2_NARROW:Vesize>\t%4.d, %5, %1.d, #%0 +@@ -2432,7 +2444,7 @@ + (truncate:VNx2_NARROW + (match_operand:VNx2_WIDE 4 "register_operand" "w, w")) + UNSPEC_ST1_SCATTER)) +- "TARGET_SVE" ++ "TARGET_SVE && TARGET_NON_STREAMING" + "@ + st1<VNx2_NARROW:Vesize>\t%4.d, %5, %0, %1.d, sxtw + st1<VNx2_NARROW:Vesize>\t%4.d, %5, %0, %1.d, sxtw %p3" +@@ -2456,7 +2468,7 @@ + (truncate:VNx2_NARROW + (match_operand:VNx2_WIDE 4 "register_operand" "w, w")) + UNSPEC_ST1_SCATTER)) +- "TARGET_SVE" ++ "TARGET_SVE && TARGET_NON_STREAMING" + "@ + st1<VNx2_NARROW:Vesize>\t%4.d, %5, %0, %1.d, uxtw + st1<VNx2_NARROW:Vesize>\t%4.d, %5, %0, %1.d, uxtw %p3" +@@ -2602,7 +2614,7 @@ + (match_operand:OI 1 "aarch64_sve_ld1ro_operand_<Vesize>" + "UO<Vesize>") + UNSPEC_LD1RO)) +- "TARGET_SVE_F64MM" ++ "TARGET_SVE_F64MM && TARGET_NON_STREAMING" + { + operands1 = gen_rtx_MEM (<VEL>mode, XEXP (operands1, 0)); + return "ld1ro<Vesize>\t%0.<Vetype>, %2/z, %1"; +@@ -3834,7 +3846,7 @@ + (match_operand:SVE_FULL_SDI 1 "register_operand" "w") + (match_operand:SVE_FULL_SDI 2 "register_operand" "w") + UNSPEC_ADR)) +- "TARGET_SVE" ++ "TARGET_SVE && TARGET_NON_STREAMING" + "adr\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>" + ) + +@@ -3850,7 +3862,7 @@ + (match_operand:VNx2DI 2 "register_operand" "w"))) + UNSPEC_PRED_X) + UNSPEC_ADR)) +- "TARGET_SVE" ++ "TARGET_SVE && TARGET_NON_STREAMING" + "adr\t%0.d, %1.d, %2.d, sxtw" + "&& !CONSTANT_P (operands3)" + { +@@ -3867,7 +3879,7 @@ + (match_operand:VNx2DI 2 "register_operand" "w") + (match_operand:VNx2DI 3 "aarch64_sve_uxtw_immediate")) + UNSPEC_ADR)) +- "TARGET_SVE" ++ "TARGET_SVE && TARGET_NON_STREAMING" + "adr\t%0.d, %1.d, %2.d, uxtw" + ) + +@@ -3879,7 +3891,7 @@ + (match_operand:VNx2DI 2 "register_operand" "w") + (match_operand:VNx2DI 3 "aarch64_sve_uxtw_immediate")) + (match_operand:VNx2DI 1 "register_operand" "w"))) +- "TARGET_SVE" ++ "TARGET_SVE && TARGET_NON_STREAMING" + "adr\t%0.d, %1.d, %2.d, uxtw" + ) + +@@ -3894,7 +3906,7 @@ + (match_operand:SVE_FULL_SDI 3 "const_1_to_3_operand")) + UNSPEC_PRED_X) + (match_operand:SVE_FULL_SDI 1 "register_operand"))) +- "TARGET_SVE" ++ "TARGET_SVE && TARGET_NON_STREAMING" + { + operands4 = CONSTM1_RTX (<VPRED>mode); + } +@@ -3910,7 +3922,7 @@ + (match_operand:SVE_24I 3 "const_1_to_3_operand")) + UNSPEC_PRED_X) + (match_operand:SVE_24I 1 "register_operand" "w"))) +- "TARGET_SVE" ++ "TARGET_SVE && TARGET_NON_STREAMING" + "adr\t%0.<Vctype>, %1.<Vctype>, %2.<Vctype>, lsl %3" + "&& !CONSTANT_P (operands4)" + { +@@ -3934,7 +3946,7 @@ + (match_operand:VNx2DI 3 "const_1_to_3_operand")) + UNSPEC_PRED_X) + (match_operand:VNx2DI 1 "register_operand" "w"))) +- "TARGET_SVE" ++ "TARGET_SVE && TARGET_NON_STREAMING" + "adr\t%0.d, %1.d, %2.d, sxtw %3" + "&& (!CONSTANT_P (operands4) || !CONSTANT_P (operands5))" + { +@@ -3955,7 +3967,7 @@ + (match_operand:VNx2DI 3 "const_1_to_3_operand")) + UNSPEC_PRED_X) + (match_operand:VNx2DI 1 "register_operand" "w"))) +- "TARGET_SVE" ++ "TARGET_SVE && TARGET_NON_STREAMING" + "adr\t%0.d, %1.d, %2.d, uxtw %3" + "&& !CONSTANT_P (operands5)" + { +@@ -6967,7 +6979,7 @@ + (match_operand:<VSI2QI> 3 "register_operand" "w, w") + MATMUL) + (match_operand:VNx4SI_ONLY 1 "register_operand" "0, w"))) +- "TARGET_SVE_I8MM" ++ "TARGET_SVE_I8MM && TARGET_NON_STREAMING" + "@ + <sur>mmla\\t%0.s, %2.b, %3.b + movprfx\t%0, %1\;<sur>mmla\\t%0.s, %2.b, %3.b" +@@ -7538,7 +7550,7 @@ + (match_operand:SVE_MATMULF 3 "register_operand" "w, w") + (match_operand:SVE_MATMULF 1 "register_operand" "0, w") + FMMLA)) +- "TARGET_SVE" ++ "TARGET_SVE && TARGET_NON_STREAMING" + "@ + <sve_fp_op>\\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype> + movprfx\t%0, %1\;<sve_fp_op>\\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>" +@@ -8601,7 +8613,7 @@ + (match_operand:<VEL> 1 "register_operand") + (match_operand:SVE_FULL_F 2 "register_operand") + UNSPEC_FADDA)) +- "TARGET_SVE" ++ "TARGET_SVE && TARGET_NON_STREAMING" + { + operands3 = aarch64_ptrue_reg (<VPRED>mode); + } +@@ -8614,7 +8626,7 @@ + (match_operand:<VEL> 1 "register_operand" "0") + (match_operand:SVE_FULL_F 2 "register_operand" "w") + UNSPEC_FADDA)) +- "TARGET_SVE" ++ "TARGET_SVE && TARGET_NON_STREAMING" + "fadda\t%<Vetype>0, %3, %<Vetype>0, %2.<Vetype>" + ) + +@@ -8668,7 +8680,7 @@ + (match_operand:<VPRED> 1 "register_operand" "Upl") + (match_operand:SVE_FULL_SD 2 "register_operand" "w") + UNSPEC_SVE_COMPACT)) +- "TARGET_SVE" ++ "TARGET_SVE && TARGET_NON_STREAMING" + "compact\t%0.<Vetype>, %1, %2.<Vetype>" + ) + +diff --git a/gcc/config/aarch64/aarch64-sve2.md b/gcc/config/aarch64/aarch64-sve2.md +index f138f4be4..36555f65c 100644 +--- a/gcc/config/aarch64/aarch64-sve2.md ++++ b/gcc/config/aarch64/aarch64-sve2.md +@@ -109,7 +109,7 @@ + (match_operand:<V_INT_EQUIV> 3 "register_operand" "w, w") + (mem:BLK (scratch)) + UNSPEC_LDNT1_GATHER)) +- "TARGET_SVE2" ++ "TARGET_SVE2 && TARGET_NON_STREAMING" + "@ + ldnt1<Vesize>\t%0.<Vetype>, %1/z, %3.<Vetype> + ldnt1<Vesize>\t%0.<Vetype>, %1/z, %3.<Vetype>, %2" +@@ -129,6 +129,7 @@ + UNSPEC_LDNT1_GATHER)) + UNSPEC_PRED_X)) + "TARGET_SVE2 ++ && TARGET_NON_STREAMING + && (~<SVE_FULL_SDI:narrower_mask> & <SVE_PARTIAL_I:self_mask>) == 0" + "@ + ldnt1<ANY_EXTEND:s><SVE_PARTIAL_I:Vesize>\t%0.<SVE_FULL_SDI:Vetype>, %1/z, %3.<SVE_FULL_SDI:Vetype> +@@ -159,7 +160,7 @@ + (match_operand:SVE_FULL_SD 3 "register_operand" "w, w") + + UNSPEC_STNT1_SCATTER)) +- "TARGET_SVE" ++ "TARGET_SVE && TARGET_NON_STREAMING" + "@ + stnt1<Vesize>\t%3.<Vetype>, %0, %2.<Vetype> + stnt1<Vesize>\t%3.<Vetype>, %0, %2.<Vetype>, %1" +@@ -176,6 +177,7 @@ + (match_operand:SVE_FULL_SDI 3 "register_operand" "w, w")) + UNSPEC_STNT1_SCATTER)) + "TARGET_SVE2 ++ && TARGET_NON_STREAMING + && (~<SVE_FULL_SDI:narrower_mask> & <SVE_PARTIAL_I:self_mask>) == 0" + "@ + stnt1<SVE_PARTIAL_I:Vesize>\t%3.<SVE_FULL_SDI:Vetype>, %0, %2.<SVE_FULL_SDI:Vetype> +@@ -2426,7 +2428,7 @@ + (match_operand:SVE_FULL_SDI 2 "register_operand" "w") + (match_operand:SVE_FULL_SDI 3 "register_operand" "w") + UNSPEC_HISTCNT)) +- "TARGET_SVE2" ++ "TARGET_SVE2 && TARGET_NON_STREAMING" + "histcnt\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>" + ) + +@@ -2436,7 +2438,7 @@ + (match_operand:VNx16QI_ONLY 1 "register_operand" "w") + (match_operand:VNx16QI_ONLY 2 "register_operand" "w") + UNSPEC_HISTSEG)) +- "TARGET_SVE2" ++ "TARGET_SVE2 && TARGET_NON_STREAMING" + "histseg\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>" + ) + +@@ -2460,7 +2462,7 @@ + SVE2_MATCH) + UNSPEC_PRED_Z)) + (clobber (reg:CC_NZC CC_REGNUM)) +- "TARGET_SVE2" ++ "TARGET_SVE2 && TARGET_NON_STREAMING" + "<sve_int_op>\t%0.<Vetype>, %1/z, %3.<Vetype>, %4.<Vetype>" + ) + +@@ -2491,6 +2493,7 @@ + SVE2_MATCH) + UNSPEC_PRED_Z)) + "TARGET_SVE2 ++ && TARGET_NON_STREAMING + && aarch64_sve_same_pred_for_ptest_p (&operands4, &operands6)" + "<sve_int_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>" + "&& !rtx_equal_p (operands4, operands6)" +@@ -2518,6 +2521,7 @@ + UNSPEC_PTEST)) + (clobber (match_scratch:<VPRED> 0 "=Upa")) + "TARGET_SVE2 ++ && TARGET_NON_STREAMING + && aarch64_sve_same_pred_for_ptest_p (&operands4, &operands6)" + "<sve_int_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>" + "&& !rtx_equal_p (operands4, operands6)" +diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h +index a3c83a3b1..8f0ac2cde 100644 +--- a/gcc/config/aarch64/aarch64.h ++++ b/gcc/config/aarch64/aarch64.h +@@ -252,6 +252,9 @@ constexpr auto AARCH64_FL_DEFAULT_ISA_MODE = AARCH64_FL_SM_OFF; + #define AARCH64_ISA_MOPS (aarch64_isa_flags & AARCH64_FL_MOPS) + #define AARCH64_ISA_LS64 (aarch64_isa_flags & AARCH64_FL_LS64) + ++/* The current function is a normal non-streaming function. */ ++#define TARGET_NON_STREAMING (AARCH64_ISA_SM_OFF) ++ + /* Crypto is an optional extension to AdvSIMD. */ + #define TARGET_CRYPTO (AARCH64_ISA_CRYPTO) + +@@ -290,16 +293,16 @@ constexpr auto AARCH64_FL_DEFAULT_ISA_MODE = AARCH64_FL_SM_OFF; + #define TARGET_SVE2 (AARCH64_ISA_SVE2) + + /* SVE2 AES instructions, enabled through +sve2-aes. */ +-#define TARGET_SVE2_AES (AARCH64_ISA_SVE2_AES) ++#define TARGET_SVE2_AES (AARCH64_ISA_SVE2_AES && TARGET_NON_STREAMING) + + /* SVE2 BITPERM instructions, enabled through +sve2-bitperm. */ +-#define TARGET_SVE2_BITPERM (AARCH64_ISA_SVE2_BITPERM) ++#define TARGET_SVE2_BITPERM (AARCH64_ISA_SVE2_BITPERM && TARGET_NON_STREAMING) + + /* SVE2 SHA3 instructions, enabled through +sve2-sha3. */ +-#define TARGET_SVE2_SHA3 (AARCH64_ISA_SVE2_SHA3) ++#define TARGET_SVE2_SHA3 (AARCH64_ISA_SVE2_SHA3 && TARGET_NON_STREAMING) + + /* SVE2 SM4 instructions, enabled through +sve2-sm4. */ +-#define TARGET_SVE2_SM4 (AARCH64_ISA_SVE2_SM4) ++#define TARGET_SVE2_SM4 (AARCH64_ISA_SVE2_SM4 && TARGET_NON_STREAMING) + + /* SME instructions, enabled through +sme. Note that this does not + imply anything about the state of PSTATE.SM. */ +diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md +index 8dd2035bc..226dea48a 100644 +--- a/gcc/config/aarch64/iterators.md ++++ b/gcc/config/aarch64/iterators.md +@@ -2706,7 +2706,7 @@ + + (define_int_iterator SVE_FP_UNARY UNSPEC_FRECPE UNSPEC_RSQRTE) + +-(define_int_iterator SVE_FP_UNARY_INT UNSPEC_FEXPA) ++(define_int_iterator SVE_FP_UNARY_INT (UNSPEC_FEXPA "TARGET_NON_STREAMING")) + + (define_int_iterator SVE_INT_SHIFT_IMM UNSPEC_ASRD + (UNSPEC_SQSHLU "TARGET_SVE2") +@@ -2720,7 +2720,7 @@ + (define_int_iterator SVE_BFLOAT_TERNARY_LONG UNSPEC_BFDOT + UNSPEC_BFMLALB + UNSPEC_BFMLALT +- UNSPEC_BFMMLA) ++ (UNSPEC_BFMMLA "TARGET_NON_STREAMING")) + + (define_int_iterator SVE_BFLOAT_TERNARY_LONG_LANE UNSPEC_BFDOT + UNSPEC_BFMLALB +diff --git a/gcc/testsuite/g++.target/aarch64/sve/aarch64-ssve.exp b/gcc/testsuite/g++.target/aarch64/sve/aarch64-ssve.exp +new file mode 100644 +index 000000000..d6a5a561a +--- /dev/null ++++ b/gcc/testsuite/g++.target/aarch64/sve/aarch64-ssve.exp +@@ -0,0 +1,308 @@ ++# Specific regression driver for AArch64 SME. ++# Copyright (C) 2009-2023 Free Software Foundation, Inc. ++# ++# This file is part of GCC. ++# ++# GCC is free software; you can redistribute it and/or modify it ++# under the terms of the GNU General Public License as published by ++# the Free Software Foundation; either version 3, or (at your option) ++# any later version. ++# ++# GCC is distributed in the hope that it will be useful, but ++# WITHOUT ANY WARRANTY; without even the implied warranty of ++# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++# General Public License for more details. ++# ++# You should have received a copy of the GNU General Public License ++# along with GCC; see the file COPYING3. If not see ++# <http://www.gnu.org/licenses/>. */ ++ ++# Test whether certain SVE instructions are accepted or rejected in ++# SME streaming mode. ++ ++# Exit immediately if this isn't an AArch64 target. ++if {!istarget aarch64*-*-* } { ++ return ++} ++ ++load_lib gcc-defs.exp ++ ++gcc_parallel_test_enable 0 ++ ++# Code shared by all tests. ++set preamble { ++#include <arm_sve.h> ++ ++#pragma GCC target "+i8mm+f32mm+f64mm+sve2+sve2-bitperm+sve2-sm4+sve2-aes+sve2-sha3+sme" ++ ++extern svbool_t &pred; ++ ++extern svint8_t &s8; ++extern svint32_t &s32; ++ ++extern svuint8_t &u8; ++extern svuint16_t &u16; ++extern svuint32_t &u32; ++extern svuint64_t &u64; ++ ++extern svbfloat16_t &bf16; ++extern svfloat32_t &f32; ++ ++extern void *void_ptr; ++ ++extern int8_t *s8_ptr; ++extern int16_t *s16_ptr; ++extern int32_t *s32_ptr; ++ ++extern uint8_t *u8_ptr; ++extern uint16_t *u16_ptr; ++extern uint32_t *u32_ptr; ++extern uint64_t *u64_ptr; ++ ++extern uint64_t indx; ++} ++ ++# Wrap a standalone call in a streaming-compatible function. ++set sc_harness { ++void ++foo () arm::streaming_compatible ++{ ++ $CALL; ++} ++} ++ ++# HARNESS is some source code that should be appended to the preamble ++# variable defined above. It includes the string "$CALL", which should be ++# replaced by the function call in CALL. The result after both steps is ++# a complete C++ translation unit. ++# ++# Try compiling the C++ code and see what output GCC produces. ++# The expected output is either: ++# ++# - empty, if SHOULD_PASS is true ++# - a message rejecting CALL in streaming mode, if SHOULD_PASS is false ++# ++# CALL is simple enough that it can be used in test names. ++proc check_ssve_call { harness name call should_pass } { ++ global preamble ++ ++ set filename test-pid ++ set fd open $filename.cc w ++ puts $fd $preamble ++ puts -nonewline $fd string map list {$CALL} $call $harness ++ close $fd ++ remote_download host $filename.cc ++ ++ set test "streaming SVE call $name" ++ ++ set gcc_output g++_target_compile $filename.cc $filename.s assembly "" ++ remote_file build delete $filename.cc $filename.s ++ ++ if { string equal $gcc_output "" } { ++ if { $should_pass } { ++ pass $test ++ } else { ++ fail $test ++ } ++ return ++ } ++ ++ set lines split $gcc_output "\n" ++ set error_text "cannot be called when SME streaming mode is enabled" ++ if { llength $lines == 3 ++ && string first "In function" lindex $lines 0 >= 0 ++ && string first $error_text lindex $lines 1 >= 0 ++ && string equal lindex $lines 2 "" } { ++ if { $should_pass } { ++ fail $test ++ } else { ++ pass $test ++ } ++ return ++ } ++ ++ verbose -log "$test: unexpected output" ++ fail $test ++} ++ ++# Apply check_ssve_call to each line in CALLS. The other arguments are ++# as for check_ssve_call. ++proc check_ssve_calls { harness calls should_pass } { ++ foreach line split $calls "\n" { ++ set call string trim $line ++ if { string equal $call "" } { ++ continue ++ } ++ check_ssve_call $harness "$call" $call $should_pass ++ } ++} ++ ++# A small selection of things that are valid in streaming mode. ++set streaming_ok { ++ s8 = svadd_x (pred, s8, s8) ++ s8 = svld1 (pred, s8_ptr) ++} ++ ++# This order follows the list in the SME manual. ++set nonstreaming_only { ++ u32 = svadrb_offset (u32, u32) ++ u64 = svadrb_offset (u64, u64) ++ u32 = svadrh_index (u32, u32) ++ u64 = svadrh_index (u64, u64) ++ u32 = svadrw_index (u32, u32) ++ u64 = svadrw_index (u64, u64) ++ u32 = svadrd_index (u32, u32) ++ u64 = svadrd_index (u64, u64) ++ u8 = svaesd (u8, u8) ++ u8 = svaese (u8, u8) ++ u8 = svaesimc (u8) ++ u8 = svaesmc (u8) ++ u8 = svbdep (u8, u8) ++ u8 = svbext (u8, u8) ++ f32 = svbfmmla (f32, bf16, bf16) ++ u8 = svbgrp (u8, u8) ++ u32 = svcompact (pred, u32) ++ f32 = svadda (pred, 1.0f, f32) ++ f32 = svexpa (u32) ++ f32 = svmmla (f32, f32, f32) ++ f32 = svtmad (f32, f32, 0) ++ f32 = svtsmul (f32, u32) ++ f32 = svtssel (f32, u32) ++ u32 = svhistcnt_z (pred, u32, u32) ++ u8 = svhistseg (u8, u8) ++ u32 = svld1ub_gather_offset_u32 (pred, u8_ptr, u32) ++ u32 = svld1ub_gather_offset_u32 (pred, u32, 1) ++ u64 = svld1_gather_index (pred, u64_ptr, u64) ++ u64 = svld1_gather_index_u64 (pred, u64, 1) ++ u32 = svld1uh_gather_index_u32 (pred, u16_ptr, u32) ++ u32 = svld1uh_gather_index_u32 (pred, u32, 1) ++ u8 = svld1ro (pred, u8_ptr + indx) ++ u8 = svld1ro (pred, u8_ptr + 1) ++ u16 = svld1ro (pred, u16_ptr + indx) ++ u16 = svld1ro (pred, u16_ptr + 1) ++ u32 = svld1ro (pred, u32_ptr + indx) ++ u32 = svld1ro (pred, u32_ptr + 1) ++ u64 = svld1ro (pred, u64_ptr + indx) ++ u64 = svld1ro (pred, u64_ptr + 1) ++ u32 = svld1sb_gather_offset_u32 (pred, s8_ptr, u32) ++ u32 = svld1sb_gather_offset_u32 (pred, u32, 1) ++ u32 = svld1sh_gather_index_u32 (pred, s16_ptr, u32) ++ u32 = svld1sh_gather_index_u32 (pred, u32, 1) ++ u64 = svld1sw_gather_index_u64 (pred, s32_ptr, u64) ++ u64 = svld1sw_gather_index_u64 (pred, u64, 1) ++ u64 = svld1uw_gather_index_u64 (pred, u32_ptr, u64) ++ u64 = svld1uw_gather_index_u64 (pred, u64, 1) ++ u32 = svld1_gather_index (pred, u32_ptr, u32) ++ u32 = svld1_gather_index_u32 (pred, u32, 1) ++ u8 = svldff1(pred, u8_ptr) ++ u16 = svldff1ub_u16(pred, u8_ptr) ++ u32 = svldff1ub_u32(pred, u8_ptr) ++ u64 = svldff1ub_u64(pred, u8_ptr) ++ u32 = svldff1ub_gather_offset_u32 (pred, u8_ptr, u32) ++ u32 = svldff1ub_gather_offset_u32 (pred, u32, 1) ++ u64 = svldff1(pred, u64_ptr) ++ u64 = svldff1_gather_index (pred, u64_ptr, u64) ++ u64 = svldff1_gather_index_u64 (pred, u64, 1) ++ u16 = svldff1(pred, u16_ptr) ++ u32 = svldff1uh_u32(pred, u16_ptr) ++ u64 = svldff1uh_u64(pred, u16_ptr) ++ u32 = svldff1uh_gather_offset_u32 (pred, u16_ptr, u32) ++ u32 = svldff1uh_gather_offset_u32 (pred, u32, 1) ++ u16 = svldff1sb_u16(pred, s8_ptr) ++ u32 = svldff1sb_u32(pred, s8_ptr) ++ u64 = svldff1sb_u64(pred, s8_ptr) ++ u32 = svldff1sb_gather_offset_u32 (pred, s8_ptr, u32) ++ u32 = svldff1sb_gather_offset_u32 (pred, u32, 1) ++ u32 = svldff1sh_u32(pred, s16_ptr) ++ u64 = svldff1sh_u64(pred, s16_ptr) ++ u32 = svldff1sh_gather_offset_u32 (pred, s16_ptr, u32) ++ u32 = svldff1sh_gather_offset_u32 (pred, u32, 1) ++ u64 = svldff1sw_u64(pred, s32_ptr) ++ u64 = svldff1sw_gather_offset_u64 (pred, s32_ptr, u64) ++ u64 = svldff1sw_gather_offset_u64 (pred, u64, 1) ++ u32 = svldff1(pred, u32_ptr) ++ u32 = svldff1_gather_index (pred, u32_ptr, u32) ++ u32 = svldff1_gather_index_u32 (pred, u32, 1) ++ u64 = svldff1uw_u64(pred, u32_ptr) ++ u64 = svldff1uw_gather_offset_u64 (pred, u32_ptr, u64) ++ u64 = svldff1uw_gather_offset_u64 (pred, u64, 1) ++ u8 = svldnf1(pred, u8_ptr) ++ u16 = svldnf1ub_u16(pred, u8_ptr) ++ u32 = svldnf1ub_u32(pred, u8_ptr) ++ u64 = svldnf1ub_u64(pred, u8_ptr) ++ u64 = svldnf1(pred, u64_ptr) ++ u16 = svldnf1(pred, u16_ptr) ++ u32 = svldnf1uh_u32(pred, u16_ptr) ++ u64 = svldnf1uh_u64(pred, u16_ptr) ++ u16 = svldnf1sb_u16(pred, s8_ptr) ++ u32 = svldnf1sb_u32(pred, s8_ptr) ++ u64 = svldnf1sb_u64(pred, s8_ptr) ++ u32 = svldnf1sh_u32(pred, s16_ptr) ++ u64 = svldnf1sh_u64(pred, s16_ptr) ++ u64 = svldnf1sw_u64(pred, s32_ptr) ++ u32 = svldnf1(pred, u32_ptr) ++ u64 = svldnf1uw_u64(pred, u32_ptr) ++ u32 = svldnt1ub_gather_offset_u32 (pred, u8_ptr, u32) ++ u32 = svldnt1ub_gather_offset_u32 (pred, u32, 1) ++ u64 = svldnt1_gather_index (pred, u64_ptr, u64) ++ u64 = svldnt1_gather_index_u64 (pred, u64, 1) ++ u32 = svldnt1uh_gather_offset_u32 (pred, u16_ptr, u32) ++ u32 = svldnt1uh_gather_offset_u32 (pred, u32, 1) ++ u32 = svldnt1sb_gather_offset_u32 (pred, s8_ptr, u32) ++ u32 = svldnt1sb_gather_offset_u32 (pred, u32, 1) ++ u32 = svldnt1sh_gather_offset_u32 (pred, s16_ptr, u32) ++ u32 = svldnt1sh_gather_offset_u32 (pred, u32, 1) ++ u64 = svldnt1sw_gather_offset_u64 (pred, s32_ptr, u64) ++ u64 = svldnt1sw_gather_offset_u64 (pred, u64, 1) ++ u64 = svldnt1uw_gather_offset_u64 (pred, u32_ptr, u64) ++ u64 = svldnt1uw_gather_offset_u64 (pred, u64, 1) ++ u32 = svldnt1_gather_offset (pred, u32_ptr, u32) ++ u32 = svldnt1_gather_offset_u32 (pred, u32, 1) ++ pred = svmatch (pred, u8, u8) ++ pred = svnmatch (pred, u8, u8) ++ u64 = svpmullb_pair (u64, u64) ++ u64 = svpmullt_pair (u64, u64) ++ svprfb_gather_offset (pred, void_ptr, u64, SV_PLDL1KEEP) ++ svprfb_gather_offset (pred, u64, 1, SV_PLDL1KEEP) ++ svprfd_gather_index (pred, void_ptr, u64, SV_PLDL1KEEP) ++ svprfd_gather_index (pred, u64, 1, SV_PLDL1KEEP) ++ svprfh_gather_index (pred, void_ptr, u64, SV_PLDL1KEEP) ++ svprfh_gather_index (pred, u64, 1, SV_PLDL1KEEP) ++ svprfw_gather_index (pred, void_ptr, u64, SV_PLDL1KEEP) ++ svprfw_gather_index (pred, u64, 1, SV_PLDL1KEEP) ++ u64 = svrax1 (u64, u64) ++ pred = svrdffr () ++ pred = svrdffr_z (pred) ++ svsetffr () ++ u32 = svsm4e (u32, u32) ++ u32 = svsm4ekey (u32, u32) ++ s32 = svmmla (s32, s8, s8) ++ svst1b_scatter_offset (pred, u8_ptr, u32, u32) ++ svst1b_scatter_offset (pred, u32, 1, u32) ++ svst1_scatter_index (pred, u64_ptr, u64, u64) ++ svst1_scatter_index (pred, u64, 1, u64) ++ svst1h_scatter_index (pred, u16_ptr, u32, u32) ++ svst1h_scatter_index (pred, u32, 1, u32) ++ svst1w_scatter_index (pred, u32_ptr, u64, u64) ++ svst1w_scatter_index (pred, u64, 1, u64) ++ svst1_scatter_index (pred, u32_ptr, u32, u32) ++ svst1_scatter_index (pred, u32, 1, u32) ++ svstnt1b_scatter_offset (pred, u8_ptr, u32, u32) ++ svstnt1b_scatter_offset (pred, u32, 1, u32) ++ svstnt1_scatter_offset (pred, u64_ptr, u64, u64) ++ svstnt1_scatter_offset (pred, u64, 1, u64) ++ svstnt1h_scatter_offset (pred, u16_ptr, u32, u32) ++ svstnt1h_scatter_offset (pred, u32, 1, u32) ++ svstnt1w_scatter_offset (pred, u32_ptr, u64, u64) ++ svstnt1w_scatter_offset (pred, u64, 1, u64) ++ svstnt1_scatter_offset (pred, u32_ptr, u32, u32) ++ svstnt1_scatter_offset (pred, u32, 1, u32) ++ u32 = svmmla (u32, u8, u8) ++ s32 = svusmmla (s32, u8, s8) ++ svwrffr (pred) ++} ++ ++check_ssve_calls $sc_harness $streaming_ok 1 ++check_ssve_calls $sc_harness $nonstreaming_only 0 ++ ++gcc_parallel_test_enable 1 +diff --git a/gcc/testsuite/g++.target/aarch64/sve/acle/aarch64-sve-acle-asm.exp b/gcc/testsuite/g++.target/aarch64/sve/acle/aarch64-sve-acle-asm.exp +index 38140413a..45270be60 100644 +--- a/gcc/testsuite/g++.target/aarch64/sve/acle/aarch64-sve-acle-asm.exp ++++ b/gcc/testsuite/g++.target/aarch64/sve/acle/aarch64-sve-acle-asm.exp +@@ -50,6 +50,7 @@ if { info exists gcc_runtest_parallelize_limit_minor } { + torture-init + set-torture-options { + "-std=c++98 -O0 -g" ++ "-std=c++11 -O0 -DSTREAMING_COMPATIBLE" + "-std=c++98 -O1 -g" + "-std=c++11 -O2 -g" + "-std=c++14 -O3 -g" +diff --git a/gcc/testsuite/g++.target/aarch64/sve2/acle/aarch64-sve2-acle-asm.exp b/gcc/testsuite/g++.target/aarch64/sve2/acle/aarch64-sve2-acle-asm.exp +index 78e8ecae7..0a7151220 100644 +--- a/gcc/testsuite/g++.target/aarch64/sve2/acle/aarch64-sve2-acle-asm.exp ++++ b/gcc/testsuite/g++.target/aarch64/sve2/acle/aarch64-sve2-acle-asm.exp +@@ -53,6 +53,7 @@ if { info exists gcc_runtest_parallelize_limit_minor } { + torture-init + set-torture-options { + "-std=c++98 -O0 -g" ++ "-std=c++11 -O0 -DSTREAMING_COMPATIBLE" + "-std=c++98 -O1 -g" + "-std=c++11 -O2 -g" + "-std=c++14 -O3 -g" +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/aarch64-sve-acle-asm.exp b/gcc/testsuite/gcc.target/aarch64/sve/acle/aarch64-sve-acle-asm.exp +index a271f1793..8cb2b9bb4 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/aarch64-sve-acle-asm.exp ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/aarch64-sve-acle-asm.exp +@@ -50,6 +50,7 @@ if { info exists gcc_runtest_parallelize_limit_minor } { + torture-init + set-torture-options { + "-std=c90 -O0 -g" ++ "-std=c90 -O0 -DSTREAMING_COMPATIBLE" + "-std=c90 -O1 -g" + "-std=c99 -O2 -g" + "-std=c11 -O3 -g" +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/adda_f16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/adda_f16.c +index 6c6bfa1c2..4d6ec2d65 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/adda_f16.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/adda_f16.c +@@ -1,3 +1,4 @@ ++/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ + /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + + #include "test_sve_acle.h" +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/adda_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/adda_f32.c +index 8b2a1dd1c..04afbcee6 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/adda_f32.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/adda_f32.c +@@ -1,3 +1,4 @@ ++/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ + /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + + #include "test_sve_acle.h" +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/adda_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/adda_f64.c +index 90a56420a..8b4c7d1ff 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/adda_f64.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/adda_f64.c +@@ -1,3 +1,4 @@ ++/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ + /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + + #include "test_sve_acle.h" +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/adrb.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/adrb.c +index a61eec971..5dcdc54b0 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/adrb.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/adrb.c +@@ -1,3 +1,4 @@ ++/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ + /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + + #include "test_sve_acle.h" +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/adrd.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/adrd.c +index 970485bd6..d9d16ce3f 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/adrd.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/adrd.c +@@ -1,3 +1,4 @@ ++/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ + /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + + #include "test_sve_acle.h" +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/adrh.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/adrh.c +index d06f51fe3..a358c2403 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/adrh.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/adrh.c +@@ -1,3 +1,4 @@ ++/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ + /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + + #include "test_sve_acle.h" +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/adrw.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/adrw.c +index b23f25a11..bd1e9af0a 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/adrw.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/adrw.c +@@ -1,3 +1,4 @@ ++/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ + /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + + #include "test_sve_acle.h" +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/bfmmla_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/bfmmla_f32.c +index b1d98fbf5..4bb2912a4 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/bfmmla_f32.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/bfmmla_f32.c +@@ -1,3 +1,4 @@ ++/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ + /* { dg-additional-options "-march=armv8.2-a+sve+bf16" } */ + /* { dg-require-effective-target aarch64_asm_bf16_ok } */ + /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/compact_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/compact_f32.c +index 2e80d6830..d261ec00b 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/compact_f32.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/compact_f32.c +@@ -1,3 +1,4 @@ ++/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ + /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + + #include "test_sve_acle.h" +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/compact_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/compact_f64.c +index e0bc33efe..024b0510f 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/compact_f64.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/compact_f64.c +@@ -1,3 +1,4 @@ ++/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ + /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + + #include "test_sve_acle.h" +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/compact_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/compact_s32.c +index e4634982b..0b32dfb60 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/compact_s32.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/compact_s32.c +@@ -1,3 +1,4 @@ ++/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ + /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + + #include "test_sve_acle.h" +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/compact_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/compact_s64.c +index 71cb97b8a..38688dbca 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/compact_s64.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/compact_s64.c +@@ -1,3 +1,4 @@ ++/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ + /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + + #include "test_sve_acle.h" +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/compact_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/compact_u32.c +index 954329a0b..a3e89cc97 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/compact_u32.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/compact_u32.c +@@ -1,3 +1,4 @@ ++/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ + /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + + #include "test_sve_acle.h" +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/compact_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/compact_u64.c +index ec664845f..602ab048c 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/compact_u64.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/compact_u64.c +@@ -1,3 +1,4 @@ ++/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ + /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + + #include "test_sve_acle.h" +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/expa_f16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/expa_f16.c +index 5a5411e46..87c26e6ea 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/expa_f16.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/expa_f16.c +@@ -1,3 +1,4 @@ ++/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ + /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + + #include "test_sve_acle.h" +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/expa_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/expa_f32.c +index 4ded1c575..5e9839537 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/expa_f32.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/expa_f32.c +@@ -1,3 +1,4 @@ ++/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ + /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + + #include "test_sve_acle.h" +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/expa_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/expa_f64.c +index c31f9ccb5..b117df2a4 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/expa_f64.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/expa_f64.c +@@ -1,3 +1,4 @@ ++/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ + /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + + #include "test_sve_acle.h" +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1_gather_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1_gather_f32.c +index 00b68ff29..8b972f61b 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1_gather_f32.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1_gather_f32.c +@@ -1,3 +1,4 @@ ++/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ + /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + + #include "test_sve_acle.h" +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1_gather_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1_gather_f64.c +index 47127960c..413d4d62d 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1_gather_f64.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1_gather_f64.c +@@ -1,3 +1,4 @@ ++/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ + /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + + #include "test_sve_acle.h" +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1_gather_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1_gather_s32.c +index 9b6335547..b3df7d154 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1_gather_s32.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1_gather_s32.c +@@ -1,3 +1,4 @@ ++/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ + /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + + #include "test_sve_acle.h" +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1_gather_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1_gather_s64.c +index c9cea3ad8..0da1e5296 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1_gather_s64.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1_gather_s64.c +@@ -1,3 +1,4 @@ ++/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ + /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + + #include "test_sve_acle.h" +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1_gather_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1_gather_u32.c +index 2cccc8d49..a3304c419 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1_gather_u32.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1_gather_u32.c +@@ -1,3 +1,4 @@ ++/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ + /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + + #include "test_sve_acle.h" +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1_gather_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1_gather_u64.c +index 6ee1d48ab..73ef94805 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1_gather_u64.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1_gather_u64.c +@@ -1,3 +1,4 @@ ++/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ + /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + + #include "test_sve_acle.h" +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ro_bf16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ro_bf16.c +index cb1801778..fe909b666 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ro_bf16.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ro_bf16.c +@@ -1,3 +1,4 @@ ++/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ + /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + /* { dg-additional-options "-march=armv8.6-a+f64mm" } */ + /* { dg-require-effective-target aarch64_asm_f64mm_ok } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ro_f16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ro_f16.c +index 86081edbd..30ba30639 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ro_f16.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ro_f16.c +@@ -1,3 +1,4 @@ ++/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ + /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + /* { dg-additional-options "-march=armv8.6-a+f64mm" } */ + /* { dg-require-effective-target aarch64_asm_f64mm_ok } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ro_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ro_f32.c +index c8df00f8a..cf62fada9 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ro_f32.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ro_f32.c +@@ -1,3 +1,4 @@ ++/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ + /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + /* { dg-additional-options "-march=armv8.6-a+f64mm" } */ + /* { dg-require-effective-target aarch64_asm_f64mm_ok } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ro_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ro_f64.c +index 2fb9d5b74..b9fde4dac 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ro_f64.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ro_f64.c +@@ -1,3 +1,4 @@ ++/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ + /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + /* { dg-additional-options "-march=armv8.6-a+f64mm" } */ + /* { dg-require-effective-target aarch64_asm_f64mm_ok } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ro_s16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ro_s16.c +index 3cd211b16..35b7dd1d2 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ro_s16.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ro_s16.c +@@ -1,3 +1,4 @@ ++/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ + /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + /* { dg-additional-options "-march=armv8.6-a+f64mm" } */ + /* { dg-require-effective-target aarch64_asm_f64mm_ok } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ro_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ro_s32.c +index 44b16ed5f..57b6a6567 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ro_s32.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ro_s32.c +@@ -1,3 +1,4 @@ ++/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ + /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + /* { dg-additional-options "-march=armv8.6-a+f64mm" } */ + /* { dg-require-effective-target aarch64_asm_f64mm_ok } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ro_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ro_s64.c +index 3aa9a15ee..bd7e28478 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ro_s64.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ro_s64.c +@@ -1,3 +1,4 @@ ++/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ + /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + /* { dg-additional-options "-march=armv8.6-a+f64mm" } */ + /* { dg-require-effective-target aarch64_asm_f64mm_ok } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ro_s8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ro_s8.c +index 49aff5146..143800003 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ro_s8.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ro_s8.c +@@ -1,3 +1,4 @@ ++/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ + /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + /* { dg-additional-options "-march=armv8.6-a+f64mm" } */ + /* { dg-require-effective-target aarch64_asm_f64mm_ok } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ro_u16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ro_u16.c +index 00bf9e129..145b0b7f3 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ro_u16.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ro_u16.c +@@ -1,3 +1,4 @@ ++/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ + /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + /* { dg-additional-options "-march=armv8.6-a+f64mm" } */ + /* { dg-require-effective-target aarch64_asm_f64mm_ok } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ro_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ro_u32.c +index 9e9b3290a..9f150631b 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ro_u32.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ro_u32.c +@@ -1,3 +1,4 @@ ++/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ + /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + /* { dg-additional-options "-march=armv8.6-a+f64mm" } */ + /* { dg-require-effective-target aarch64_asm_f64mm_ok } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ro_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ro_u64.c +index 64ec62871..8dd75d136 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ro_u64.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ro_u64.c +@@ -1,3 +1,4 @@ ++/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ + /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + /* { dg-additional-options "-march=armv8.6-a+f64mm" } */ + /* { dg-require-effective-target aarch64_asm_f64mm_ok } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ro_u8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ro_u8.c +index 22701320b..f15454586 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ro_u8.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ro_u8.c +@@ -1,3 +1,4 @@ ++/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ + /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + /* { dg-additional-options "-march=armv8.6-a+f64mm" } */ + /* { dg-require-effective-target aarch64_asm_f64mm_ok } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1sb_gather_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1sb_gather_s32.c +index 16a5316a9..06249ad4c 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1sb_gather_s32.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1sb_gather_s32.c +@@ -1,3 +1,4 @@ ++/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ + /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + + #include "test_sve_acle.h" +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1sb_gather_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1sb_gather_s64.c +index 3f953247e..8d141e133 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1sb_gather_s64.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1sb_gather_s64.c +@@ -1,3 +1,4 @@ ++/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ + /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + + #include "test_sve_acle.h" +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1sb_gather_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1sb_gather_u32.c +index 424de65a6..77836cbf6 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1sb_gather_u32.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1sb_gather_u32.c +@@ -1,3 +1,4 @@ ++/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ + /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + + #include "test_sve_acle.h" +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1sb_gather_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1sb_gather_u64.c +index aa375bea2..f4b24ab41 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1sb_gather_u64.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1sb_gather_u64.c +@@ -1,3 +1,4 @@ ++/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ + /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + + #include "test_sve_acle.h" +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1sh_gather_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1sh_gather_s32.c +index ed07b4dfc..1b9782368 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1sh_gather_s32.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1sh_gather_s32.c +@@ -1,3 +1,4 @@ ++/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ + /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + + #include "test_sve_acle.h" +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1sh_gather_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1sh_gather_s64.c +index 20ca42720..2009dec81 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1sh_gather_s64.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1sh_gather_s64.c +@@ -1,3 +1,4 @@ ++/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ + /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + + #include "test_sve_acle.h" +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1sh_gather_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1sh_gather_u32.c +index e3a85a23f..0e1d48966 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1sh_gather_u32.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1sh_gather_u32.c +@@ -1,3 +1,4 @@ ++/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ + /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + + #include "test_sve_acle.h" +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1sh_gather_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1sh_gather_u64.c +index 3a0094fba..115d7d3a9 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1sh_gather_u64.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1sh_gather_u64.c +@@ -1,3 +1,4 @@ ++/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ + /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + + #include "test_sve_acle.h" +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1sw_gather_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1sw_gather_s64.c +index 4d076b486..5dc44421c 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1sw_gather_s64.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1sw_gather_s64.c +@@ -1,3 +1,4 @@ ++/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ + /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + + #include "test_sve_acle.h" +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1sw_gather_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1sw_gather_u64.c +index ffa85eb3e..fac4ec41c 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1sw_gather_u64.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1sw_gather_u64.c +@@ -1,3 +1,4 @@ ++/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ + /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + + #include "test_sve_acle.h" +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ub_gather_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ub_gather_s32.c +index a9c418265..f57df4226 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ub_gather_s32.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ub_gather_s32.c +@@ -1,3 +1,4 @@ ++/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ + /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + + #include "test_sve_acle.h" +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ub_gather_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ub_gather_s64.c +index 99af86ddf..0c069fa4f 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ub_gather_s64.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ub_gather_s64.c +@@ -1,3 +1,4 @@ ++/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ + /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + + #include "test_sve_acle.h" +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ub_gather_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ub_gather_u32.c +index 77c7e0a2d..98102e013 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ub_gather_u32.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ub_gather_u32.c +@@ -1,3 +1,4 @@ ++/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ + /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + + #include "test_sve_acle.h" +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ub_gather_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ub_gather_u64.c +index b605f8b67..f86a34d12 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ub_gather_u64.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ub_gather_u64.c +@@ -1,3 +1,4 @@ ++/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ + /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + + #include "test_sve_acle.h" +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1uh_gather_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1uh_gather_s32.c +index 84fb5c335..139371878 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1uh_gather_s32.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1uh_gather_s32.c +@@ -1,3 +1,4 @@ ++/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ + /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + + #include "test_sve_acle.h" +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1uh_gather_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1uh_gather_s64.c +index 447001793..f0338aae6 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1uh_gather_s64.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1uh_gather_s64.c +@@ -1,3 +1,4 @@ ++/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ + /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + + #include "test_sve_acle.h" +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1uh_gather_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1uh_gather_u32.c +index 09d3cc8c2..5810bc0ac 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1uh_gather_u32.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1uh_gather_u32.c +@@ -1,3 +1,4 @@ ++/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ + /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + + #include "test_sve_acle.h" +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1uh_gather_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1uh_gather_u64.c +index f3dcf03cd..52e95abb9 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1uh_gather_u64.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1uh_gather_u64.c +@@ -1,3 +1,4 @@ ++/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ + /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + + #include "test_sve_acle.h" +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1uw_gather_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1uw_gather_s64.c +index f4e9d5db9..0889eefdd 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1uw_gather_s64.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1uw_gather_s64.c +@@ -1,3 +1,4 @@ ++/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ + /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + + #include "test_sve_acle.h" +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1uw_gather_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1uw_gather_u64.c +index 854d19233..fb144d756 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1uw_gather_u64.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1uw_gather_u64.c +@@ -1,3 +1,4 @@ ++/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ + /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + + #include "test_sve_acle.h" +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1_bf16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1_bf16.c +index 80f646870..1f997480e 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1_bf16.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1_bf16.c +@@ -1,3 +1,4 @@ ++/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ + /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + + #include "test_sve_acle.h" +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1_f16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1_f16.c +index 13ce863c9..60405d0a0 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1_f16.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1_f16.c +@@ -1,3 +1,4 @@ ++/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ + /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + + #include "test_sve_acle.h" +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1_f32.c +index 2fcc63390..225e9969d 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1_f32.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1_f32.c +@@ -1,3 +1,4 @@ ++/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ + /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + + #include "test_sve_acle.h" +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1_f64.c +index cc15b927a..366e36afd 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1_f64.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1_f64.c +@@ -1,3 +1,4 @@ ++/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ + /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + + #include "test_sve_acle.h" +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1_gather_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1_gather_f32.c +index 7e330c042..b84b9bcdd 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1_gather_f32.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1_gather_f32.c +@@ -1,3 +1,4 @@ ++/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ + /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + + #include "test_sve_acle.h" +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1_gather_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1_gather_f64.c +index d0e47f0bf..e779b0712 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1_gather_f64.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1_gather_f64.c +@@ -1,3 +1,4 @@ ++/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ + /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + + #include "test_sve_acle.h" +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1_gather_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1_gather_s32.c +index 66bf0f746..17e0f9aa2 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1_gather_s32.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1_gather_s32.c +@@ -1,3 +1,4 @@ ++/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ + /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + + #include "test_sve_acle.h" +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1_gather_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1_gather_s64.c +index faf71bf9d..030f187b1 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1_gather_s64.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1_gather_s64.c +@@ -1,3 +1,4 @@ ++/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ + /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + + #include "test_sve_acle.h" +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1_gather_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1_gather_u32.c +index 41c7dc9cf..fb8653016 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1_gather_u32.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1_gather_u32.c +@@ -1,3 +1,4 @@ ++/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ + /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + + #include "test_sve_acle.h" +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1_gather_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1_gather_u64.c +index 8b53ce94f..5be30a2d8 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1_gather_u64.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1_gather_u64.c +@@ -1,3 +1,4 @@ ++/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ + /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + + #include "test_sve_acle.h" +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1_s16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1_s16.c +index 1d5fde0e6..61d242c07 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1_s16.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1_s16.c +@@ -1,3 +1,4 @@ ++/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ + /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + + #include "test_sve_acle.h" +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1_s32.c +index 97a36e884..afe748ef9 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1_s32.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1_s32.c +@@ -1,3 +1,4 @@ ++/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ + /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + + #include "test_sve_acle.h" +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1_s64.c +index c018a4c1c..bee222855 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1_s64.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1_s64.c +@@ -1,3 +1,4 @@ ++/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ + /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + + #include "test_sve_acle.h" +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1_s8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1_s8.c +index cf620d1f4..ccaac2ca4 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1_s8.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1_s8.c +@@ -1,3 +1,4 @@ ++/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ + /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + + #include "test_sve_acle.h" +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1_u16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1_u16.c +index 1fa819296..c8416f99d 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1_u16.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1_u16.c +@@ -1,3 +1,4 @@ ++/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ + /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + + #include "test_sve_acle.h" +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1_u32.c +index 5224ec40a..ec26a82ca 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1_u32.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1_u32.c +@@ -1,3 +1,4 @@ ++/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ + /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + + #include "test_sve_acle.h" +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1_u64.c +index 18e87f2b8..e211f1794 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1_u64.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1_u64.c +@@ -1,3 +1,4 @@ ++/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ + /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + + #include "test_sve_acle.h" +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1_u8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1_u8.c +index 83883fca4..24dfe452f 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1_u8.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1_u8.c +@@ -1,3 +1,4 @@ ++/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ + /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + + #include "test_sve_acle.h" +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sb_gather_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sb_gather_s32.c +index c2a676807..f7e3977bf 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sb_gather_s32.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sb_gather_s32.c +@@ -1,3 +1,4 @@ ++/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ + /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + + #include "test_sve_acle.h" +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sb_gather_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sb_gather_s64.c +index 2f2a04d24..7f2a829a8 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sb_gather_s64.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sb_gather_s64.c +@@ -1,3 +1,4 @@ ++/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ + /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + + #include "test_sve_acle.h" +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sb_gather_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sb_gather_u32.c +index e3e83a205..685f62808 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sb_gather_u32.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sb_gather_u32.c +@@ -1,3 +1,4 @@ ++/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ + /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + + #include "test_sve_acle.h" +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sb_gather_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sb_gather_u64.c +index 769f2c266..49a7a8536 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sb_gather_u64.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sb_gather_u64.c +@@ -1,3 +1,4 @@ ++/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ + /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + + #include "test_sve_acle.h" +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sb_s16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sb_s16.c +index e0a748c6a..1d30c7ba6 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sb_s16.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sb_s16.c +@@ -1,3 +1,4 @@ ++/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ + /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + + #include "test_sve_acle.h" +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sb_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sb_s32.c +index 86716da9b..c2b3f42cb 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sb_s32.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sb_s32.c +@@ -1,3 +1,4 @@ ++/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ + /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + + #include "test_sve_acle.h" +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sb_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sb_s64.c +index e7a4aa6e9..585a6241e 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sb_s64.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sb_s64.c +@@ -1,3 +1,4 @@ ++/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ + /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + + #include "test_sve_acle.h" +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sb_u16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sb_u16.c +index 69ba96d52..ebb2f0f66 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sb_u16.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sb_u16.c +@@ -1,3 +1,4 @@ ++/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ + /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + + #include "test_sve_acle.h" +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sb_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sb_u32.c +index e1a1873f0..f4ea96cf9 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sb_u32.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sb_u32.c +@@ -1,3 +1,4 @@ ++/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ + /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + + #include "test_sve_acle.h" +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sb_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sb_u64.c +index 0a49cbcc0..e3735239c 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sb_u64.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sb_u64.c +@@ -1,3 +1,4 @@ ++/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ + /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + + #include "test_sve_acle.h" +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sh_gather_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sh_gather_s32.c +index b633335dc..67e70361b 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sh_gather_s32.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sh_gather_s32.c +@@ -1,3 +1,4 @@ ++/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ + /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + + #include "test_sve_acle.h" +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sh_gather_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sh_gather_s64.c +index 32a4309b6..5755c79bc 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sh_gather_s64.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sh_gather_s64.c +@@ -1,3 +1,4 @@ ++/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ + /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + + #include "test_sve_acle.h" +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sh_gather_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sh_gather_u32.c +index 73a9be892..a58489995 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sh_gather_u32.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sh_gather_u32.c +@@ -1,3 +1,4 @@ ++/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ + /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + + #include "test_sve_acle.h" +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sh_gather_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sh_gather_u64.c +index 94ea73b63..b18751209 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sh_gather_u64.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sh_gather_u64.c +@@ -1,3 +1,4 @@ ++/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ + /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + + #include "test_sve_acle.h" +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sh_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sh_s32.c +index 81b64e836..bffac9365 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sh_s32.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sh_s32.c +@@ -1,3 +1,4 @@ ++/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ + /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + + #include "test_sve_acle.h" +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sh_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sh_s64.c +index 453b3ff24..a4acb1e5e 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sh_s64.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sh_s64.c +@@ -1,3 +1,4 @@ ++/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ + /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + + #include "test_sve_acle.h" +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sh_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sh_u32.c +index bbbed79dc..828288cd8 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sh_u32.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sh_u32.c +@@ -1,3 +1,4 @@ ++/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ + /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + + #include "test_sve_acle.h" +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sh_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sh_u64.c +index 5430e256b..e3432c46c 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sh_u64.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sh_u64.c +@@ -1,3 +1,4 @@ ++/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ + /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + + #include "test_sve_acle.h" +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sw_gather_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sw_gather_s64.c +index e5da8a83d..78aa34ec0 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sw_gather_s64.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sw_gather_s64.c +@@ -1,3 +1,4 @@ ++/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ + /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + + #include "test_sve_acle.h" +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sw_gather_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sw_gather_u64.c +index 411428756..9dad1212c 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sw_gather_u64.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sw_gather_u64.c +@@ -1,3 +1,4 @@ ++/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ + /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + + #include "test_sve_acle.h" +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sw_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sw_s64.c +index d795ace63..33b6c10dd 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sw_s64.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sw_s64.c +@@ -1,3 +1,4 @@ ++/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ + /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + + #include "test_sve_acle.h" +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sw_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sw_u64.c +index 6caf2f504..e8c9c845f 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sw_u64.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sw_u64.c +@@ -1,3 +1,4 @@ ++/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ + /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + + #include "test_sve_acle.h" +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1ub_gather_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1ub_gather_s32.c +index af0be08d2..b1c9c8135 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1ub_gather_s32.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1ub_gather_s32.c +@@ -1,3 +1,4 @@ ++/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ + /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + + #include "test_sve_acle.h" +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1ub_gather_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1ub_gather_s64.c +index 43124dd89..9ab776a21 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1ub_gather_s64.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1ub_gather_s64.c +@@ -1,3 +1,4 @@ ++/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ + /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + + #include "test_sve_acle.h" +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1ub_gather_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1ub_gather_u32.c +index 90c4e58a2..745740dfa 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1ub_gather_u32.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1ub_gather_u32.c +@@ -1,3 +1,4 @@ ++/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ + /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + + #include "test_sve_acle.h" +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1ub_gather_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1ub_gather_u64.c +index 302623a40..3a7bd6a43 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1ub_gather_u64.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1ub_gather_u64.c +@@ -1,3 +1,4 @@ ++/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ + /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + + #include "test_sve_acle.h" +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1ub_s16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1ub_s16.c +index 88ad2d1dc..ade0704f7 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1ub_s16.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1ub_s16.c +@@ -1,3 +1,4 @@ ++/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ + /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + + #include "test_sve_acle.h" +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1ub_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1ub_s32.c +index e8e06411f..5d3e0ce95 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1ub_s32.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1ub_s32.c +@@ -1,3 +1,4 @@ ++/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ + /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + + #include "test_sve_acle.h" +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1ub_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1ub_s64.c +index 21d02ddb7..08ae802ee 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1ub_s64.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1ub_s64.c +@@ -1,3 +1,4 @@ ++/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ + /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + + #include "test_sve_acle.h" +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1ub_u16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1ub_u16.c +index 904cb027e..d8dc5e157 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1ub_u16.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1ub_u16.c +@@ -1,3 +1,4 @@ ++/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ + /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + + #include "test_sve_acle.h" +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1ub_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1ub_u32.c +index a40012318..042ae5a9f 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1ub_u32.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1ub_u32.c +@@ -1,3 +1,4 @@ ++/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ + /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + + #include "test_sve_acle.h" +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1ub_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1ub_u64.c +index a9a98a683..d0844fa51 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1ub_u64.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1ub_u64.c +@@ -1,3 +1,4 @@ ++/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ + /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + + #include "test_sve_acle.h" +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1uh_gather_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1uh_gather_s32.c +index d02e44342..12460105d 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1uh_gather_s32.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1uh_gather_s32.c +@@ -1,3 +1,4 @@ ++/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ + /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + + #include "test_sve_acle.h" +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1uh_gather_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1uh_gather_s64.c +index 663a73d27..536331371 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1uh_gather_s64.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1uh_gather_s64.c +@@ -1,3 +1,4 @@ ++/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ + /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + + #include "test_sve_acle.h" +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1uh_gather_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1uh_gather_u32.c +index 5e0ef067f..602e6a686 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1uh_gather_u32.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1uh_gather_u32.c +@@ -1,3 +1,4 @@ ++/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ + /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + + #include "test_sve_acle.h" +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1uh_gather_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1uh_gather_u64.c +index 1cfae1b95..4b307b341 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1uh_gather_u64.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1uh_gather_u64.c +@@ -1,3 +1,4 @@ ++/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ + /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + + #include "test_sve_acle.h" +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1uh_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1uh_s32.c +index abb3d769a..db205b1ef 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1uh_s32.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1uh_s32.c +@@ -1,3 +1,4 @@ ++/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ + /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + + #include "test_sve_acle.h" +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1uh_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1uh_s64.c +index 6e330e8e8..0eac877eb 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1uh_s64.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1uh_s64.c +@@ -1,3 +1,4 @@ ++/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ + /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + + #include "test_sve_acle.h" +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1uh_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1uh_u32.c +index 4eb5323e9..266ecf167 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1uh_u32.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1uh_u32.c +@@ -1,3 +1,4 @@ ++/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ + /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + + #include "test_sve_acle.h" +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1uh_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1uh_u64.c +index ebac26e7d..bdd725e4a 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1uh_u64.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1uh_u64.c +@@ -1,3 +1,4 @@ ++/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ + /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + + #include "test_sve_acle.h" +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1uw_gather_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1uw_gather_s64.c +index 6c0daea52..ab2c79da7 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1uw_gather_s64.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1uw_gather_s64.c +@@ -1,3 +1,4 @@ ++/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ + /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + + #include "test_sve_acle.h" +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1uw_gather_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1uw_gather_u64.c +index 0e400c679..361d7de05 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1uw_gather_u64.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1uw_gather_u64.c +@@ -1,3 +1,4 @@ ++/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ + /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + + #include "test_sve_acle.h" +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1uw_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1uw_s64.c +index ac9779899..8adcec3d5 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1uw_s64.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1uw_s64.c +@@ -1,3 +1,4 @@ ++/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ + /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + + #include "test_sve_acle.h" +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1uw_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1uw_u64.c +index c7ab06171..781fc1a9c 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1uw_u64.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1uw_u64.c +@@ -1,3 +1,4 @@ ++/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ + /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + + #include "test_sve_acle.h" +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1_bf16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1_bf16.c +index 947a896e7..93b4425ec 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1_bf16.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1_bf16.c +@@ -1,3 +1,4 @@ ++/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ + /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + + #include "test_sve_acle.h" +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1_f16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1_f16.c +index cf0178688..d47d748c7 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1_f16.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1_f16.c +@@ -1,3 +1,4 @@ ++/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ + /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + + #include "test_sve_acle.h" +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1_f32.c +index 83b73ec8e..e390d6857 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1_f32.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1_f32.c +@@ -1,3 +1,4 @@ ++/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ + /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + + #include "test_sve_acle.h" +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1_f64.c +index 778096e82..97a0e39e7 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1_f64.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1_f64.c +@@ -1,3 +1,4 @@ ++/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ + /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + + #include "test_sve_acle.h" +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1_s16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1_s16.c +index 592c8237d..21008d7f9 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1_s16.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1_s16.c +@@ -1,3 +1,4 @@ ++/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ + /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + + #include "test_sve_acle.h" +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1_s32.c +index 634092af8..8a3d795b3 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1_s32.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1_s32.c +@@ -1,3 +1,4 @@ ++/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ + /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + + #include "test_sve_acle.h" +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1_s64.c +index 4a03f6676..c0b57a2f3 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1_s64.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1_s64.c +@@ -1,3 +1,4 @@ ++/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ + /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + + #include "test_sve_acle.h" +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1_s8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1_s8.c +index 162ee176a..6714152d9 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1_s8.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1_s8.c +@@ -1,3 +1,4 @@ ++/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ + /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + + #include "test_sve_acle.h" +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1_u16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1_u16.c +index e920ac43b..3df404d77 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1_u16.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1_u16.c +@@ -1,3 +1,4 @@ ++/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ + /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + + #include "test_sve_acle.h" +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1_u32.c +index 65e28c5c2..e899a4a6f 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1_u32.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1_u32.c +@@ -1,3 +1,4 @@ ++/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ + /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + + #include "test_sve_acle.h" +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1_u64.c +index 70d3f27d8..ab69656cf 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1_u64.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1_u64.c +@@ -1,3 +1,4 @@ ++/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ + /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + + #include "test_sve_acle.h" +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1_u8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1_u8.c +index 5c29f1d19..5d7b07497 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1_u8.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1_u8.c +@@ -1,3 +1,4 @@ ++/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ + /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + + #include "test_sve_acle.h" +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1sb_s16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1sb_s16.c +index e04b9a788..5b53c885d 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1sb_s16.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1sb_s16.c +@@ -1,3 +1,4 @@ ++/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ + /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + + #include "test_sve_acle.h" +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1sb_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1sb_s32.c +index 0553fc98d..992eba7cc 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1sb_s32.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1sb_s32.c +@@ -1,3 +1,4 @@ ++/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ + /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + + #include "test_sve_acle.h" +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1sb_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1sb_s64.c +index 61a474fdf..99e0f8bd0 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1sb_s64.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1sb_s64.c +@@ -1,3 +1,4 @@ ++/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ + /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + + #include "test_sve_acle.h" +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1sb_u16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1sb_u16.c +index be63d8bf9..fe23913f2 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1sb_u16.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1sb_u16.c +@@ -1,3 +1,4 @@ ++/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ + /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + + #include "test_sve_acle.h" +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1sb_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1sb_u32.c +index 4f52490b4..6deb39770 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1sb_u32.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1sb_u32.c +@@ -1,3 +1,4 @@ ++/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ + /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + + #include "test_sve_acle.h" +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1sb_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1sb_u64.c +index 73f50d182..e76457da6 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1sb_u64.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1sb_u64.c +@@ -1,3 +1,4 @@ ++/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ + /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + + #include "test_sve_acle.h" +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1sh_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1sh_s32.c +index 08c7dc6dd..e49a7f8ed 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1sh_s32.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1sh_s32.c +@@ -1,3 +1,4 @@ ++/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ + /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + + #include "test_sve_acle.h" +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1sh_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1sh_s64.c +index 6a41bc26b..00b40281c 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1sh_s64.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1sh_s64.c +@@ -1,3 +1,4 @@ ++/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ + /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + + #include "test_sve_acle.h" +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1sh_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1sh_u32.c +index 2f7718730..41560af33 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1sh_u32.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1sh_u32.c +@@ -1,3 +1,4 @@ ++/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ + /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + + #include "test_sve_acle.h" +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1sh_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1sh_u64.c +index d7f1a68a4..0acf4b349 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1sh_u64.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1sh_u64.c +@@ -1,3 +1,4 @@ ++/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ + /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + + #include "test_sve_acle.h" +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1sw_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1sw_s64.c +index 5b483e4aa..578212898 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1sw_s64.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1sw_s64.c +@@ -1,3 +1,4 @@ ++/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ + /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + + #include "test_sve_acle.h" +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1sw_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1sw_u64.c +index 62121ce0a..8249c4c3f 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1sw_u64.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1sw_u64.c +@@ -1,3 +1,4 @@ ++/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ + /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + + #include "test_sve_acle.h" +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1ub_s16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1ub_s16.c +index 8fe13411f..e59c451f7 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1ub_s16.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1ub_s16.c +@@ -1,3 +1,4 @@ ++/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ + /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + + #include "test_sve_acle.h" +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1ub_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1ub_s32.c +index 50122e3b7..d788576e2 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1ub_s32.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1ub_s32.c +@@ -1,3 +1,4 @@ ++/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ + /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + + #include "test_sve_acle.h" +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1ub_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1ub_s64.c +index d7cce11b6..b21fdb964 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1ub_s64.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1ub_s64.c +@@ -1,3 +1,4 @@ ++/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ + /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + + #include "test_sve_acle.h" +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1ub_u16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1ub_u16.c +index 7bf82c3b6..1ae41b002 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1ub_u16.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1ub_u16.c +@@ -1,3 +1,4 @@ ++/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ + /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + + #include "test_sve_acle.h" +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1ub_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1ub_u32.c +index e2fef064b..e3d8fb3b5 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1ub_u32.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1ub_u32.c +@@ -1,3 +1,4 @@ ++/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ + /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + + #include "test_sve_acle.h" +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1ub_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1ub_u64.c +index 57c61e122..df9a0c07f 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1ub_u64.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1ub_u64.c +@@ -1,3 +1,4 @@ ++/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ + /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + + #include "test_sve_acle.h" +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1uh_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1uh_s32.c +index ed9686c4e..c3467d846 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1uh_s32.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1uh_s32.c +@@ -1,3 +1,4 @@ ++/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ + /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + + #include "test_sve_acle.h" +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1uh_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1uh_s64.c +index a3107f562..bf3355e99 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1uh_s64.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1uh_s64.c +@@ -1,3 +1,4 @@ ++/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ + /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + + #include "test_sve_acle.h" +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1uh_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1uh_u32.c +index 93d5abaf7..bcc3eb3fd 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1uh_u32.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1uh_u32.c +@@ -1,3 +1,4 @@ ++/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ + /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + + #include "test_sve_acle.h" +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1uh_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1uh_u64.c +index 32d36a84c..4c01c13ac 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1uh_u64.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1uh_u64.c +@@ -1,3 +1,4 @@ ++/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ + /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + + #include "test_sve_acle.h" +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1uw_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1uw_s64.c +index 373922791..3c6556591 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1uw_s64.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1uw_s64.c +@@ -1,3 +1,4 @@ ++/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ + /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + + #include "test_sve_acle.h" +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1uw_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1uw_u64.c +index b3c3be1d0..b222a0dc6 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1uw_u64.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1uw_u64.c +@@ -1,3 +1,4 @@ ++/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ + /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + + #include "test_sve_acle.h" +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mmla_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mmla_f32.c +index f66dbf397..e1c7f47dc 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mmla_f32.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mmla_f32.c +@@ -1,3 +1,4 @@ ++/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ + /* { dg-require-effective-target aarch64_asm_f32mm_ok } */ + /* { dg-additional-options "-march=armv8.2-a+f32mm" } */ + /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mmla_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mmla_f64.c +index 49dc0607c..c45caa700 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mmla_f64.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mmla_f64.c +@@ -1,3 +1,4 @@ ++/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ + /* { dg-require-effective-target aarch64_asm_f64mm_ok } */ + /* { dg-additional-options "-march=armv8.2-a+f64mm" } */ + /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mmla_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mmla_s32.c +index e7ce009ac..dc155461c 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mmla_s32.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mmla_s32.c +@@ -1,3 +1,4 @@ ++/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ + /* { dg-require-effective-target aarch64_asm_i8mm_ok } */ + /* { dg-additional-options "-march=armv8.2-a+sve+i8mm" } */ + /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mmla_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mmla_u32.c +index 81f5166fb..43d601a47 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mmla_u32.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mmla_u32.c +@@ -1,3 +1,4 @@ ++/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ + /* { dg-require-effective-target aarch64_asm_i8mm_ok } */ + /* { dg-additional-options "-march=armv8.2-a+sve+i8mm" } */ + /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/prfb_gather.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/prfb_gather.c +index c4bfbbbf7..f32cfbfcb 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/prfb_gather.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/prfb_gather.c +@@ -1,3 +1,4 @@ ++/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ + /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + + #include "test_sve_acle.h" +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/prfd_gather.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/prfd_gather.c +index a84acb1a1..8a4293b62 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/prfd_gather.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/prfd_gather.c +@@ -1,3 +1,4 @@ ++/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ + /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + + #include "test_sve_acle.h" +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/prfh_gather.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/prfh_gather.c +index 04b7a1575..6beca4b8e 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/prfh_gather.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/prfh_gather.c +@@ -1,3 +1,4 @@ ++/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ + /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + + #include "test_sve_acle.h" +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/prfw_gather.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/prfw_gather.c +index 2bbae1b9e..6af44ac82 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/prfw_gather.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/prfw_gather.c +@@ -1,3 +1,4 @@ ++/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ + /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + + #include "test_sve_acle.h" +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rdffr_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rdffr_1.c +index 5564e967f..7e28ef641 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rdffr_1.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rdffr_1.c +@@ -1,3 +1,4 @@ ++/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ + /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + + #include "test_sve_acle.h" +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1_scatter_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1_scatter_f32.c +index cb6774ad0..1efd43445 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1_scatter_f32.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1_scatter_f32.c +@@ -1,3 +1,4 @@ ++/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ + /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + + #include "test_sve_acle.h" +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1_scatter_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1_scatter_f64.c +index fe978bbe5..f50c43e83 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1_scatter_f64.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1_scatter_f64.c +@@ -1,3 +1,4 @@ ++/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ + /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + + #include "test_sve_acle.h" +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1_scatter_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1_scatter_s32.c +index d244e701a..bb6fb10b8 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1_scatter_s32.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1_scatter_s32.c +@@ -1,3 +1,4 @@ ++/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ + /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + + #include "test_sve_acle.h" +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1_scatter_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1_scatter_s64.c +index 5c4ebf440..19ec78e9e 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1_scatter_s64.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1_scatter_s64.c +@@ -1,3 +1,4 @@ ++/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ + /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + + #include "test_sve_acle.h" +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1_scatter_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1_scatter_u32.c +index fe3f7259f..57fbb91b0 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1_scatter_u32.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1_scatter_u32.c +@@ -1,3 +1,4 @@ ++/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ + /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + + #include "test_sve_acle.h" +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1_scatter_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1_scatter_u64.c +index 232123566..60018be5b 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1_scatter_u64.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1_scatter_u64.c +@@ -1,3 +1,4 @@ ++/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ + /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + + #include "test_sve_acle.h" +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1b_scatter_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1b_scatter_s32.c +index d59033356..fb1bb29db 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1b_scatter_s32.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1b_scatter_s32.c +@@ -1,3 +1,4 @@ ++/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ + /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + + #include "test_sve_acle.h" +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1b_scatter_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1b_scatter_s64.c +index c7a35f1b4..65ee9a071 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1b_scatter_s64.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1b_scatter_s64.c +@@ -1,3 +1,4 @@ ++/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ + /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + + #include "test_sve_acle.h" +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1b_scatter_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1b_scatter_u32.c +index e098cb9b7..ceec61939 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1b_scatter_u32.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1b_scatter_u32.c +@@ -1,3 +1,4 @@ ++/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ + /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + + #include "test_sve_acle.h" +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1b_scatter_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1b_scatter_u64.c +index 058d1313f..aeedbc6d7 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1b_scatter_u64.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1b_scatter_u64.c +@@ -1,3 +1,4 @@ ++/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ + /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + + #include "test_sve_acle.h" +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1h_scatter_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1h_scatter_s32.c +index 2a23d41f3..2d69d085b 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1h_scatter_s32.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1h_scatter_s32.c +@@ -1,3 +1,4 @@ ++/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ + /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + + #include "test_sve_acle.h" +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1h_scatter_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1h_scatter_s64.c +index 6a1adb056..3e5733ef9 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1h_scatter_s64.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1h_scatter_s64.c +@@ -1,3 +1,4 @@ ++/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ + /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + + #include "test_sve_acle.h" +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1h_scatter_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1h_scatter_u32.c +index 12197315d..5cd330a3d 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1h_scatter_u32.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1h_scatter_u32.c +@@ -1,3 +1,4 @@ ++/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ + /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + + #include "test_sve_acle.h" +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1h_scatter_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1h_scatter_u64.c +index 7021ea68f..0ee9948cb 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1h_scatter_u64.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1h_scatter_u64.c +@@ -1,3 +1,4 @@ ++/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ + /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + + #include "test_sve_acle.h" +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1w_scatter_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1w_scatter_s64.c +index 2363f592b..f18bedce1 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1w_scatter_s64.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1w_scatter_s64.c +@@ -1,3 +1,4 @@ ++/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ + /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + + #include "test_sve_acle.h" +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1w_scatter_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1w_scatter_u64.c +index 767c009b4..6850865ec 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1w_scatter_u64.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1w_scatter_u64.c +@@ -1,3 +1,4 @@ ++/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ + /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + + #include "test_sve_acle.h" +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/test_sve_acle.h b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/test_sve_acle.h +index 2da61ff5c..d8916809b 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/test_sve_acle.h ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/test_sve_acle.h +@@ -11,10 +11,17 @@ + #error "Please define -DTEST_OVERLOADS or -DTEST_FULL" + #endif + ++#ifdef STREAMING_COMPATIBLE ++#define ATTR __arm_streaming_compatible ++#else ++#define ATTR ++#endif ++ + #ifdef __cplusplus +-#define PROTO(NAME, RET, ARGS) extern "C" RET NAME ARGS; RET NAME ARGS ++#define PROTO(NAME, RET, ARGS) \ ++ extern "C" RET NAME ARGS ATTR; RET NAME ARGS ATTR + #else +-#define PROTO(NAME, RET, ARGS) RET NAME ARGS ++#define PROTO(NAME, RET, ARGS) RET NAME ARGS ATTR + #endif + + #define TEST_UNIFORM_Z(NAME, TYPE, CODE1, CODE2) \ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/tmad_f16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/tmad_f16.c +index 3a00716e3..c0b03a0d3 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/tmad_f16.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/tmad_f16.c +@@ -1,3 +1,4 @@ ++/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ + /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + + #include "test_sve_acle.h" +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/tmad_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/tmad_f32.c +index b73d420fb..8eef8a12c 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/tmad_f32.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/tmad_f32.c +@@ -1,3 +1,4 @@ ++/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ + /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + + #include "test_sve_acle.h" +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/tmad_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/tmad_f64.c +index fc31928a6..5c96c5579 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/tmad_f64.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/tmad_f64.c +@@ -1,3 +1,4 @@ ++/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ + /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + + #include "test_sve_acle.h" +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/tsmul_f16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/tsmul_f16.c +index 94bc696eb..9deed667f 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/tsmul_f16.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/tsmul_f16.c +@@ -1,3 +1,4 @@ ++/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ + /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + + #include "test_sve_acle.h" +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/tsmul_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/tsmul_f32.c +index d0ec91882..749ea8664 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/tsmul_f32.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/tsmul_f32.c +@@ -1,3 +1,4 @@ ++/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ + /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + + #include "test_sve_acle.h" +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/tsmul_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/tsmul_f64.c +index 23e0da3f7..053abcb26 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/tsmul_f64.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/tsmul_f64.c +@@ -1,3 +1,4 @@ ++/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ + /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + + #include "test_sve_acle.h" +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/tssel_f16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/tssel_f16.c +index e7c3ea03b..3ab251fe0 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/tssel_f16.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/tssel_f16.c +@@ -1,3 +1,4 @@ ++/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ + /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + + #include "test_sve_acle.h" +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/tssel_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/tssel_f32.c +index 022573a19..6c6471c5e 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/tssel_f32.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/tssel_f32.c +@@ -1,3 +1,4 @@ ++/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ + /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + + #include "test_sve_acle.h" +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/tssel_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/tssel_f64.c +index ffcdf4224..9559e0f35 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/tssel_f64.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/tssel_f64.c +@@ -1,3 +1,4 @@ ++/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ + /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + + #include "test_sve_acle.h" +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/usmmla_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/usmmla_s32.c +index 9440f3fd9..a0dd7e334 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/usmmla_s32.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/usmmla_s32.c +@@ -1,3 +1,4 @@ ++/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ + /* { dg-require-effective-target aarch64_asm_i8mm_ok } */ + /* { dg-additional-options "-march=armv8.2-a+sve+i8mm" } */ + /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/aarch64-sve2-acle-asm.exp b/gcc/testsuite/gcc.target/aarch64/sve2/acle/aarch64-sve2-acle-asm.exp +index e08cd6121..2fb27fb5e 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve2/acle/aarch64-sve2-acle-asm.exp ++++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/aarch64-sve2-acle-asm.exp +@@ -52,6 +52,7 @@ if { info exists gcc_runtest_parallelize_limit_minor } { + torture-init + set-torture-options { + "-std=c90 -O0 -g" ++ "-std=c90 -O0 -DSTREAMING_COMPATIBLE" + "-std=c90 -O1 -g" + "-std=c99 -O2 -g" + "-std=c11 -O3 -g" +diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/aesd_u8.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/aesd_u8.c +index 622f5cf46..484f7251f 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/aesd_u8.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/aesd_u8.c +@@ -1,3 +1,4 @@ ++/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ + /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + + #include "test_sve_acle.h" +diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/aese_u8.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/aese_u8.c +index 6555bbb1d..6869bbd05 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/aese_u8.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/aese_u8.c +@@ -1,3 +1,4 @@ ++/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ + /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + + #include "test_sve_acle.h" +diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/aesimc_u8.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/aesimc_u8.c +index 4630595ff..534ffe06f 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/aesimc_u8.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/aesimc_u8.c +@@ -1,3 +1,4 @@ ++/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ + /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + + #include "test_sve_acle.h" +diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/aesmc_u8.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/aesmc_u8.c +index 6e8acf48f..1660a8eaf 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/aesmc_u8.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/aesmc_u8.c +@@ -1,3 +1,4 @@ ++/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ + /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + + #include "test_sve_acle.h" +diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/bdep_u16.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/bdep_u16.c +index 14230850f..c1a4e1061 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/bdep_u16.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/bdep_u16.c +@@ -1,3 +1,4 @@ ++/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ + /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + + #include "test_sve_acle.h" +diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/bdep_u32.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/bdep_u32.c +index 7f08df4ba..4f14cc4c4 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/bdep_u32.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/bdep_u32.c +@@ -1,3 +1,4 @@ ++/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ + /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + + #include "test_sve_acle.h" +diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/bdep_u64.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/bdep_u64.c +index 7f7cbbeeb..091253ec6 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/bdep_u64.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/bdep_u64.c +@@ -1,3 +1,4 @@ ++/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ + /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + + #include "test_sve_acle.h" +diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/bdep_u8.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/bdep_u8.c +index b420323b9..deb1ad27d 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/bdep_u8.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/bdep_u8.c +@@ -1,3 +1,4 @@ ++/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ + /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + + #include "test_sve_acle.h" +diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/bext_u16.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/bext_u16.c +index 50a647918..9efa501ef 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/bext_u16.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/bext_u16.c +@@ -1,3 +1,4 @@ ++/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ + /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + + #include "test_sve_acle.h" +diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/bext_u32.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/bext_u32.c +index 9f98b843c..18963da5b 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/bext_u32.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/bext_u32.c +@@ -1,3 +1,4 @@ ++/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ + /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + + #include "test_sve_acle.h" +diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/bext_u64.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/bext_u64.c +index 9dbaec1b7..91591f93b 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/bext_u64.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/bext_u64.c +@@ -1,3 +1,4 @@ ++/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ + /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + + #include "test_sve_acle.h" +diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/bext_u8.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/bext_u8.c +index 81ed5a463..1211587ef 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/bext_u8.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/bext_u8.c +@@ -1,3 +1,4 @@ ++/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ + /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + + #include "test_sve_acle.h" +diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/bgrp_u16.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/bgrp_u16.c +index 70aeae3f3..72868bea7 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/bgrp_u16.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/bgrp_u16.c +@@ -1,3 +1,4 @@ ++/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ + /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + + #include "test_sve_acle.h" +diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/bgrp_u32.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/bgrp_u32.c +index 6e19e38d8..c8923816f 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/bgrp_u32.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/bgrp_u32.c +@@ -1,3 +1,4 @@ ++/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ + /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + + #include "test_sve_acle.h" +diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/bgrp_u64.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/bgrp_u64.c +index 27fa40f47..86989529f 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/bgrp_u64.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/bgrp_u64.c +@@ -1,3 +1,4 @@ ++/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ + /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + + #include "test_sve_acle.h" +diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/bgrp_u8.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/bgrp_u8.c +index b667e03e3..5cd941a7a 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/bgrp_u8.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/bgrp_u8.c +@@ -1,3 +1,4 @@ ++/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ + /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + + #include "test_sve_acle.h" +diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/histcnt_s32.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/histcnt_s32.c +index 7bf783a7c..53d6c5c56 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/histcnt_s32.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/histcnt_s32.c +@@ -1,3 +1,4 @@ ++/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ + /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + + #include "test_sve_acle.h" +diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/histcnt_s64.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/histcnt_s64.c +index 001f5f0f1..c6d9862e3 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/histcnt_s64.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/histcnt_s64.c +@@ -1,3 +1,4 @@ ++/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ + /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + + #include "test_sve_acle.h" +diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/histcnt_u32.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/histcnt_u32.c +index d93091adc..cb11a0026 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/histcnt_u32.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/histcnt_u32.c +@@ -1,3 +1,4 @@ ++/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ + /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + + #include "test_sve_acle.h" +diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/histcnt_u64.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/histcnt_u64.c +index 3b8898023..0bb06cdb4 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/histcnt_u64.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/histcnt_u64.c +@@ -1,3 +1,4 @@ ++/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ + /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + + #include "test_sve_acle.h" +diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/histseg_s8.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/histseg_s8.c +index 380ccdf85..ce3458e5e 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/histseg_s8.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/histseg_s8.c +@@ -1,3 +1,4 @@ ++/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ + /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + + #include "test_sve_acle.h" +diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/histseg_u8.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/histseg_u8.c +index f43292f0c..7b1eff811 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/histseg_u8.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/histseg_u8.c +@@ -1,3 +1,4 @@ ++/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ + /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + + #include "test_sve_acle.h" +diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1_gather_f32.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1_gather_f32.c +index 102810e25..17e3673a4 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1_gather_f32.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1_gather_f32.c +@@ -1,3 +1,4 @@ ++/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ + /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + + #include "test_sve_acle.h" +diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1_gather_f64.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1_gather_f64.c +index a0ed71227..8ce32e9f9 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1_gather_f64.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1_gather_f64.c +@@ -1,3 +1,4 @@ ++/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ + /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + + #include "test_sve_acle.h" +diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1_gather_s32.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1_gather_s32.c +index 94c64971c..b7e1d7a99 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1_gather_s32.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1_gather_s32.c +@@ -1,3 +1,4 @@ ++/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ + /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + + #include "test_sve_acle.h" +diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1_gather_s64.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1_gather_s64.c +index a0aa6703f..b0789ad21 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1_gather_s64.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1_gather_s64.c +@@ -1,3 +1,4 @@ ++/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ + /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + + #include "test_sve_acle.h" +diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1_gather_u32.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1_gather_u32.c +index e1479684e..df09eaa76 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1_gather_u32.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1_gather_u32.c +@@ -1,3 +1,4 @@ ++/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ + /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + + #include "test_sve_acle.h" +diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1_gather_u64.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1_gather_u64.c +index 77cdcfeba..5f185ea82 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1_gather_u64.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1_gather_u64.c +@@ -1,3 +1,4 @@ ++/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ + /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + + #include "test_sve_acle.h" +diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1sb_gather_s32.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1sb_gather_s32.c +index bb729483f..71fece575 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1sb_gather_s32.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1sb_gather_s32.c +@@ -1,3 +1,4 @@ ++/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ + /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + + #include "test_sve_acle.h" +diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1sb_gather_s64.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1sb_gather_s64.c +index de5b69314..1183e72f0 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1sb_gather_s64.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1sb_gather_s64.c +@@ -1,3 +1,4 @@ ++/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ + /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + + #include "test_sve_acle.h" +diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1sb_gather_u32.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1sb_gather_u32.c +index d01ec18e4..4d5e6e771 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1sb_gather_u32.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1sb_gather_u32.c +@@ -1,3 +1,4 @@ ++/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ + /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + + #include "test_sve_acle.h" +diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1sb_gather_u64.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1sb_gather_u64.c +index b96e94353..ed329a23f 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1sb_gather_u64.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1sb_gather_u64.c +@@ -1,3 +1,4 @@ ++/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ + /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + + #include "test_sve_acle.h" +diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1sh_gather_s32.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1sh_gather_s32.c +index 1dcfbc0fb..6dbd6cea0 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1sh_gather_s32.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1sh_gather_s32.c +@@ -1,3 +1,4 @@ ++/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ + /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + + #include "test_sve_acle.h" +diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1sh_gather_s64.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1sh_gather_s64.c +index 4166ed0a6..4ea3335a2 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1sh_gather_s64.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1sh_gather_s64.c +@@ -1,3 +1,4 @@ ++/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ + /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + + #include "test_sve_acle.h" +diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1sh_gather_u32.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1sh_gather_u32.c +index 7680344da..d55451519 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1sh_gather_u32.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1sh_gather_u32.c +@@ -1,3 +1,4 @@ ++/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ + /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + + #include "test_sve_acle.h" +diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1sh_gather_u64.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1sh_gather_u64.c +index 2427c83ab..18c8ca44e 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1sh_gather_u64.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1sh_gather_u64.c +@@ -1,3 +1,4 @@ ++/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ + /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + + #include "test_sve_acle.h" +diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1sw_gather_s64.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1sw_gather_s64.c +index 2f538e847..41bff31d0 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1sw_gather_s64.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1sw_gather_s64.c +@@ -1,3 +1,4 @@ ++/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ + /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + + #include "test_sve_acle.h" +diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1sw_gather_u64.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1sw_gather_u64.c +index ace1c2f2f..30b8f6948 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1sw_gather_u64.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1sw_gather_u64.c +@@ -1,3 +1,4 @@ ++/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ + /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + + #include "test_sve_acle.h" +diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1ub_gather_s32.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1ub_gather_s32.c +index d3b29eb19..8750d11af 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1ub_gather_s32.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1ub_gather_s32.c +@@ -1,3 +1,4 @@ ++/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ + /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + + #include "test_sve_acle.h" +diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1ub_gather_s64.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1ub_gather_s64.c +index 3bc406620..f7981991a 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1ub_gather_s64.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1ub_gather_s64.c +@@ -1,3 +1,4 @@ ++/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ + /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + + #include "test_sve_acle.h" +diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1ub_gather_u32.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1ub_gather_u32.c +index 0af4b40b8..4d5ee4ef4 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1ub_gather_u32.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1ub_gather_u32.c +@@ -1,3 +1,4 @@ ++/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ + /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + + #include "test_sve_acle.h" +diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1ub_gather_u64.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1ub_gather_u64.c +index fe28d78ed..005c29c06 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1ub_gather_u64.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1ub_gather_u64.c +@@ -1,3 +1,4 @@ ++/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ + /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + + #include "test_sve_acle.h" +diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1uh_gather_s32.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1uh_gather_s32.c +index 985432615..92613b166 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1uh_gather_s32.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1uh_gather_s32.c +@@ -1,3 +1,4 @@ ++/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ + /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + + #include "test_sve_acle.h" +diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1uh_gather_s64.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1uh_gather_s64.c +index 3c5baeee6..be2e6d126 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1uh_gather_s64.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1uh_gather_s64.c +@@ -1,3 +1,4 @@ ++/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ + /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + + #include "test_sve_acle.h" +diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1uh_gather_u32.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1uh_gather_u32.c +index 4d945e9f9..4d122059f 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1uh_gather_u32.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1uh_gather_u32.c +@@ -1,3 +1,4 @@ ++/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ + /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + + #include "test_sve_acle.h" +diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1uh_gather_u64.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1uh_gather_u64.c +index 680238ac4..e3bc1044c 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1uh_gather_u64.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1uh_gather_u64.c +@@ -1,3 +1,4 @@ ++/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ + /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + + #include "test_sve_acle.h" +diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1uw_gather_s64.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1uw_gather_s64.c +index 787ae9def..9efa4b2cb 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1uw_gather_s64.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1uw_gather_s64.c +@@ -1,3 +1,4 @@ ++/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ + /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + + #include "test_sve_acle.h" +diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1uw_gather_u64.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1uw_gather_u64.c +index 4810bc3c4..4ded4454d 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1uw_gather_u64.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1uw_gather_u64.c +@@ -1,3 +1,4 @@ ++/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ + /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + + #include "test_sve_acle.h" +diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/match_s16.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/match_s16.c +index baebc7693..d0ce81294 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/match_s16.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/match_s16.c +@@ -1,3 +1,4 @@ ++/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ + /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + + #include "test_sve_acle.h" +diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/match_s8.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/match_s8.c +index f35a75379..03473906a 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/match_s8.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/match_s8.c +@@ -1,3 +1,4 @@ ++/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ + /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + + #include "test_sve_acle.h" +diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/match_u16.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/match_u16.c +index 0bdf4462f..2a8b4d250 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/match_u16.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/match_u16.c +@@ -1,3 +1,4 @@ ++/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ + /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + + #include "test_sve_acle.h" +diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/match_u8.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/match_u8.c +index 6d78692bd..8409276d9 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/match_u8.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/match_u8.c +@@ -1,3 +1,4 @@ ++/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ + /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + + #include "test_sve_acle.h" +diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/nmatch_s16.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/nmatch_s16.c +index 935b19a10..044ba1de3 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/nmatch_s16.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/nmatch_s16.c +@@ -1,3 +1,4 @@ ++/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ + /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + + #include "test_sve_acle.h" +diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/nmatch_s8.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/nmatch_s8.c +index 8a00b30f3..6c2d890fa 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/nmatch_s8.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/nmatch_s8.c +@@ -1,3 +1,4 @@ ++/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ + /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + + #include "test_sve_acle.h" +diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/nmatch_u16.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/nmatch_u16.c +index 868c20a11..863e31054 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/nmatch_u16.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/nmatch_u16.c +@@ -1,3 +1,4 @@ ++/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ + /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + + #include "test_sve_acle.h" +diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/nmatch_u8.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/nmatch_u8.c +index af6b58165..a62783db7 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/nmatch_u8.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/nmatch_u8.c +@@ -1,3 +1,4 @@ ++/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ + /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + + #include "test_sve_acle.h" +diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/pmullb_pair_u64.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/pmullb_pair_u64.c +index 944609214..1fd85e0ce 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/pmullb_pair_u64.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/pmullb_pair_u64.c +@@ -1,3 +1,4 @@ ++/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ + /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + + #include "test_sve_acle.h" +diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/pmullt_pair_u64.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/pmullt_pair_u64.c +index 90e2e991f..300d885ab 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/pmullt_pair_u64.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/pmullt_pair_u64.c +@@ -1,3 +1,4 @@ ++/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ + /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + + #include "test_sve_acle.h" +diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/rax1_s64.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/rax1_s64.c +index ea80d40db..9dbc71839 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/rax1_s64.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/rax1_s64.c +@@ -1,3 +1,4 @@ ++/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ + /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + + #include "test_sve_acle.h" +diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/rax1_u64.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/rax1_u64.c +index b237c7edd..5caa2a544 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/rax1_u64.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/rax1_u64.c +@@ -1,3 +1,4 @@ ++/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ + /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + + #include "test_sve_acle.h" +diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/sm4e_u32.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/sm4e_u32.c +index 0ff5746d8..14194eef6 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/sm4e_u32.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/sm4e_u32.c +@@ -1,3 +1,4 @@ ++/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ + /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + + #include "test_sve_acle.h" +diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/sm4ekey_u32.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/sm4ekey_u32.c +index 58ad33c5d..e72384108 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/sm4ekey_u32.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/sm4ekey_u32.c +@@ -1,3 +1,4 @@ ++/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ + /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + + #include "test_sve_acle.h" +diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/stnt1_scatter_f32.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/stnt1_scatter_f32.c +index 3f928e20e..75539f692 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/stnt1_scatter_f32.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/stnt1_scatter_f32.c +@@ -1,3 +1,4 @@ ++/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ + /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + + #include "test_sve_acle.h" +diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/stnt1_scatter_f64.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/stnt1_scatter_f64.c +index 8a35c76b9..c0d47d0c1 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/stnt1_scatter_f64.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/stnt1_scatter_f64.c +@@ -1,3 +1,4 @@ ++/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ + /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + + #include "test_sve_acle.h" +diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/stnt1_scatter_s32.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/stnt1_scatter_s32.c +index bd6002682..80fb3e869 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/stnt1_scatter_s32.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/stnt1_scatter_s32.c +@@ -1,3 +1,4 @@ ++/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ + /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + + #include "test_sve_acle.h" +diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/stnt1_scatter_s64.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/stnt1_scatter_s64.c +index 0bfa2616e..edd2bc418 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/stnt1_scatter_s64.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/stnt1_scatter_s64.c +@@ -1,3 +1,4 @@ ++/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ + /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + + #include "test_sve_acle.h" +diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/stnt1_scatter_u32.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/stnt1_scatter_u32.c +index fbfa008c1..a6e5059de 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/stnt1_scatter_u32.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/stnt1_scatter_u32.c +@@ -1,3 +1,4 @@ ++/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ + /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + + #include "test_sve_acle.h" +diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/stnt1_scatter_u64.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/stnt1_scatter_u64.c +index c283135c4..067e5b109 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/stnt1_scatter_u64.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/stnt1_scatter_u64.c +@@ -1,3 +1,4 @@ ++/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ + /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + + #include "test_sve_acle.h" +diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/stnt1b_scatter_s32.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/stnt1b_scatter_s32.c +index bf6ba5973..498fe82e5 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/stnt1b_scatter_s32.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/stnt1b_scatter_s32.c +@@ -1,3 +1,4 @@ ++/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ + /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + + #include "test_sve_acle.h" +diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/stnt1b_scatter_s64.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/stnt1b_scatter_s64.c +index a24d0c89c..614f5fb1a 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/stnt1b_scatter_s64.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/stnt1b_scatter_s64.c +@@ -1,3 +1,4 @@ ++/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ + /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + + #include "test_sve_acle.h" +diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/stnt1b_scatter_u32.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/stnt1b_scatter_u32.c +index 2b05a7720..ce2c482af 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/stnt1b_scatter_u32.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/stnt1b_scatter_u32.c +@@ -1,3 +1,4 @@ ++/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ + /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + + #include "test_sve_acle.h" +diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/stnt1b_scatter_u64.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/stnt1b_scatter_u64.c +index a13c5f5bb..593dc1939 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/stnt1b_scatter_u64.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/stnt1b_scatter_u64.c +@@ -1,3 +1,4 @@ ++/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ + /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + + #include "test_sve_acle.h" +diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/stnt1h_scatter_s32.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/stnt1h_scatter_s32.c +index 4e012f61f..b9d06c1c5 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/stnt1h_scatter_s32.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/stnt1h_scatter_s32.c +@@ -1,3 +1,4 @@ ++/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ + /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + + #include "test_sve_acle.h" +diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/stnt1h_scatter_s64.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/stnt1h_scatter_s64.c +index e934a708d..006e0e24d 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/stnt1h_scatter_s64.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/stnt1h_scatter_s64.c +@@ -1,3 +1,4 @@ ++/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ + /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + + #include "test_sve_acle.h" +diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/stnt1h_scatter_u32.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/stnt1h_scatter_u32.c +index db21821eb..8cd7cb86a 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/stnt1h_scatter_u32.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/stnt1h_scatter_u32.c +@@ -1,3 +1,4 @@ ++/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ + /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + + #include "test_sve_acle.h" +diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/stnt1h_scatter_u64.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/stnt1h_scatter_u64.c +index 53f930da1..972ee3689 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/stnt1h_scatter_u64.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/stnt1h_scatter_u64.c +@@ -1,3 +1,4 @@ ++/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ + /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + + #include "test_sve_acle.h" +diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/stnt1w_scatter_s64.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/stnt1w_scatter_s64.c +index ec6c837d9..368a17c47 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/stnt1w_scatter_s64.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/stnt1w_scatter_s64.c +@@ -1,3 +1,4 @@ ++/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ + /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + + #include "test_sve_acle.h" +diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/stnt1w_scatter_u64.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/stnt1w_scatter_u64.c +index 3c5d96de4..57d60a350 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/stnt1w_scatter_u64.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/stnt1w_scatter_u64.c +@@ -1,3 +1,4 @@ ++/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ + /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ + + #include "test_sve_acle.h" +-- +2.33.0 +
View file
_service:tar_scm:0180-testsuite-fix-dg-do-preprocess-typo.patch
Added
@@ -0,0 +1,26 @@ +From 35d804730dcac1b3e96db4b587de0cd77fec3504 Mon Sep 17 00:00:00 2001 +From: Sam James <sam@gentoo.org> +Date: Tue, 30 Jul 2024 21:46:29 +0100 +Subject: PATCH 180/188 testsuite: fix 'dg-do-preprocess' typo + +We want 'dg-do preprocess', not 'dg-do-preprocess'. Fix that. + + PR target/106828 + * g++.target/loongarch/pr106828.C: Fix 'dg-do compile' typo. +--- + gcc/testsuite/g++.target/loongarch/pr106828.C | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/gcc/testsuite/g++.target/loongarch/pr106828.C b/gcc/testsuite/g++.target/loongarch/pr106828.C +index 190c1db71..0d13cbbd5 100644 +--- a/gcc/testsuite/g++.target/loongarch/pr106828.C ++++ b/gcc/testsuite/g++.target/loongarch/pr106828.C +@@ -1,4 +1,4 @@ +-/* { dg-do-preprocess } */ ++/* { dg-do preprocess } */ + /* { dg-options "-mabi=lp64d -fsanitize=address" } */ + + /* Tests whether the compiler supports compile option '-fsanitize=address'. */ +-- +2.43.0 +
View file
_service:tar_scm:0181-Backport-SME-AArch64-Support-new-tbranch-optab.patch
Added
@@ -0,0 +1,250 @@ +From da06b276b6ae281efad2ec3b982e09b1f4015917 Mon Sep 17 00:00:00 2001 +From: Tamar Christina <tamar.christina@arm.com> +Date: Mon, 12 Dec 2022 15:18:56 +0000 +Subject: PATCH 082/157 BackportSME AArch64: Support new tbranch optab. + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=17ae956c0fa6baac3d22764019d5dd5ebf5c2b11 + +This implements the new tbranch optab for AArch64. + +we cannot emit one big RTL for the final instruction immediately. +The reason that all comparisons in the AArch64 backend expand to separate CC +compares, and separate testing of the operands is for ifcvt. + +The separate CC compare is needed so ifcvt can produce csel, cset etc from the +compares. Unlike say combine, ifcvt can not do recog on a parallel with a +clobber. Should we emit the instruction directly then ifcvt will not be able +to say, make a csel, because we have no patterns which handle zero_extract and +compare. (unlike combine ifcvt cannot transform the extract into an AND). + +While you could provide various patterns for this (and I did try) you end up +with broken patterns because you can't add the clobber to the CC register. If +you do, ifcvt recog fails. + +i.e. + +int +f1 (int x) +{ + if (x & 1) + return 1; + return x; +} + +We lose csel here. + +Secondly the reason the compare with an explicit CC mode is needed is so that +ifcvt can transform the operation into a version that doesn't require the flags +to be set. But it only does so if it know the explicit usage of the CC reg. + +For instance + +int +foo (int a, int b) +{ + return ((a & (1 << 25)) ? 5 : 4); +} + +Doesn't require a comparison, the optimal form is: + +foo(int, int): + ubfx x0, x0, 25, 1 + add w0, w0, 4 + ret + +and no compare is actually needed. If you represent the instruction using an +ANDS instead of a zero_extract then you get close, but you end up with an ands +followed by an add, which is a slower operation. + +gcc/ChangeLog: + + * config/aarch64/aarch64.md (*tb<optab><mode>1): Rename to... + (*tb<optab><ALLI:mode><GPI:mode>1): ... this. + (tbranch_<code><mode>4): New. + * config/aarch64/iterators.md(ZEROM, zerom): New. + +gcc/testsuite/ChangeLog: + + * gcc.target/aarch64/tbz_1.c: New test. +--- + gcc/config/aarch64/aarch64.md | 33 ++++++-- + gcc/config/aarch64/iterators.md | 2 + + gcc/testsuite/gcc.target/aarch64/tbz_1.c | 95 ++++++++++++++++++++++++ + 3 files changed, 122 insertions(+), 8 deletions(-) + create mode 100644 gcc/testsuite/gcc.target/aarch64/tbz_1.c + +diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md +index 079c8a3f9..2becc888e 100644 +--- a/gcc/config/aarch64/aarch64.md ++++ b/gcc/config/aarch64/aarch64.md +@@ -953,12 +953,29 @@ + (const_int 1))) + ) + +-(define_insn "*tb<optab><mode>1" ++(define_expand "tbranch_<code><mode>3" + (set (pc) (if_then_else +- (EQL (zero_extract:DI (match_operand:GPI 0 "register_operand" "r") +- (const_int 1) +- (match_operand 1 +- "aarch64_simd_shift_imm_<mode>" "n")) ++ (EQL (match_operand:ALLI 0 "register_operand") ++ (match_operand 1 "aarch64_simd_shift_imm_<mode>")) ++ (label_ref (match_operand 2 "")) ++ (pc))) ++ "" ++{ ++ rtx bitvalue = gen_reg_rtx (<ZEROM>mode); ++ rtx reg = gen_lowpart (<ZEROM>mode, operands0); ++ rtx val = GEN_INT (1UL << UINTVAL (operands1)); ++ emit_insn (gen_and<zerom>3 (bitvalue, reg, val)); ++ operands1 = const0_rtx; ++ operands0 = aarch64_gen_compare_reg (<CODE>, bitvalue, ++ operands1); ++}) ++ ++(define_insn "*tb<optab><ALLI:mode><GPI:mode>1" ++ (set (pc) (if_then_else ++ (EQL (zero_extract:GPI (match_operand:ALLI 0 "register_operand" "r") ++ (const_int 1) ++ (match_operand 1 ++ "aarch64_simd_shift_imm_<ALLI:mode>" "n")) + (const_int 0)) + (label_ref (match_operand 2 "" "")) + (pc))) +@@ -969,15 +986,15 @@ + { + if (get_attr_far_branch (insn) == 1) + return aarch64_gen_far_branch (operands, 2, "Ltb", +- "<inv_tb>\\t%<w>0, %1, "); ++ "<inv_tb>\\t%<ALLI:w>0, %1, "); + else + { + operands1 = GEN_INT (HOST_WIDE_INT_1U << UINTVAL (operands1)); +- return "tst\t%<w>0, %1\;<bcond>\t%l2"; ++ return "tst\t%<ALLI:w>0, %1\;<bcond>\t%l2"; + } + } + else +- return "<tbz>\t%<w>0, %1, %l2"; ++ return "<tbz>\t%<ALLI:w>0, %1, %l2"; + } + (set_attr "type" "branch") + (set (attr "length") +diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md +index 226dea48a..b616f5c9a 100644 +--- a/gcc/config/aarch64/iterators.md ++++ b/gcc/config/aarch64/iterators.md +@@ -1104,6 +1104,8 @@ + + ;; Give the number of bits in the mode + (define_mode_attr sizen (QI "8") (HI "16") (SI "32") (DI "64")) ++(define_mode_attr ZEROM (QI "SI") (HI "SI") (SI "SI") (DI "DI")) ++(define_mode_attr zerom (QI "si") (HI "si") (SI "si") (DI "di")) + + ;; Give the ordinal of the MSB in the mode + (define_mode_attr sizem1 (QI "#7") (HI "#15") (SI "#31") (DI "#63") +diff --git a/gcc/testsuite/gcc.target/aarch64/tbz_1.c b/gcc/testsuite/gcc.target/aarch64/tbz_1.c +new file mode 100644 +index 000000000..39deb58e2 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/tbz_1.c +@@ -0,0 +1,95 @@ ++/* { dg-do compile } */ ++/* { dg-additional-options "-O2 -std=c99 -fno-unwind-tables -fno-asynchronous-unwind-tables" } */ ++/* { dg-final { check-function-bodies "**" "" "" { target { le } } } } */ ++ ++#include <stdbool.h> ++ ++void h(void); ++ ++/* ++** g1: ++** tbnz w0-9+, #?0, .L(0-9+) ++** ret ++** ... ++*/ ++void g1(bool x) ++{ ++ if (__builtin_expect (x, 0)) ++ h (); ++} ++ ++/* ++** g2: ++** tbz w0-9+, #?0, .L(0-9+) ++** b h ++** ... ++*/ ++void g2(bool x) ++{ ++ if (__builtin_expect (x, 1)) ++ h (); ++} ++ ++/* ++** g3_ge: ++** tbnz w0-9+, #?31, .L0-9+ ++** b h ++** ... ++*/ ++void g3_ge(int x) ++{ ++ if (__builtin_expect (x >= 0, 1)) ++ h (); ++} ++ ++/* ++** g3_gt: ++** cmp w0-9+, 0 ++** ble .L0-9+ ++** b h ++** ... ++*/ ++void g3_gt(int x) ++{ ++ if (__builtin_expect (x > 0, 1)) ++ h (); ++} ++ ++/* ++** g3_lt: ++** tbz w0-9+, #?31, .L0-9+ ++** b h ++** ... ++*/ ++void g3_lt(int x) ++{ ++ if (__builtin_expect (x < 0, 1)) ++ h (); ++} ++ ++/* ++** g3_le: ++** cmp w0-9+, 0 ++** bgt .L0-9+ ++** b h ++** ... ++*/ ++void g3_le(int x) ++{ ++ if (__builtin_expect (x <= 0, 1)) ++ h (); ++} ++ ++/* ++** g5: ++** mov w0-9+, 65279 ++** tst w0-9+, w0-9+ ++** beq .L0-9+ ++** b h ++** ... ++*/ ++void g5(int x) ++{ ++ if (__builtin_expect (x & 0xfeff, 1)) ++ h (); ++} +-- +2.33.0 +
View file
_service:tar_scm:0181-LoongArch-Remove-gawk-extension-from-a-generator-scr.patch
Added
@@ -0,0 +1,47 @@ +From 643248a4c60c016af44bc740b35c7ac174849029 Mon Sep 17 00:00:00 2001 +From: Yang Yujie <yangyujie@loongson.cn> +Date: Tue, 23 Jul 2024 10:04:26 +0800 +Subject: PATCH 181/188 LoongArch: Remove gawk extension from a generator + script. + +gcc/ChangeLog: + + * config/loongarch/genopts/gen-evolution.awk: Do not use + "length()" to compute the size of an array. +--- + gcc/config/loongarch/genopts/gen-evolution.awk | 7 ++++--- + 1 file changed, 4 insertions(+), 3 deletions(-) + +diff --git a/gcc/config/loongarch/genopts/gen-evolution.awk b/gcc/config/loongarch/genopts/gen-evolution.awk +index 4d105afa9..1c8004e41 100644 +--- a/gcc/config/loongarch/genopts/gen-evolution.awk ++++ b/gcc/config/loongarch/genopts/gen-evolution.awk +@@ -1,4 +1,4 @@ +-#!/usr/bin/gawk ++#!/usr/bin/awk -f + # + # A simple script that generates loongarch-evolution.h + # from genopts/isa-evolution.in +@@ -94,8 +94,9 @@ function gen_cpucfg_useful_idx() + idx_bucketcpucfg_wordi = 1 + + delete idx_list ++ j = 1 + for (i in idx_bucket) +- idx_listlength(idx_list)-1 = i+0 ++ idx_listj++ = i+0 + delete idx_bucket + + asort (idx_list) +@@ -108,7 +109,7 @@ function gen_cpucfg_useful_idx() + print "" + + printf ("static constexpr int N_CPUCFG_WORDS = %d;\n", +- idx_listlength(idx_list) + 1) ++ idx_listj - 1 + 1) + + delete idx_list + } +-- +2.43.0 +
View file
_service:tar_scm:0182-Backport-SME-aarch64-Use-local-frame-vars-in-shrink-.patch
Added
@@ -0,0 +1,381 @@ +From 755f67b1abd70b3c3ea20076fe60c1d303bf1e0c Mon Sep 17 00:00:00 2001 +From: Richard Sandiford <richard.sandiford@arm.com> +Date: Tue, 12 Sep 2023 16:05:04 +0100 +Subject: PATCH 083/157 BackportSME aarch64: Use local frame vars in + shrink-wrapping code + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=76d89da25af3064e80c9b7b584c678ff72b1f0bd + +aarch64_layout_frame uses a shorthand for referring to +cfun->machine->frame: + + aarch64_frame &frame = cfun->machine->frame; + +This patch does the same for some other heavy users of the structure. +No functional change intended. + +gcc/ + * config/aarch64/aarch64.cc (aarch64_save_callee_saves): Use + a local shorthand for cfun->machine->frame. + (aarch64_restore_callee_saves, aarch64_get_separate_components): + (aarch64_process_components): Likewise. + (aarch64_allocate_and_probe_stack_space): Likewise. + (aarch64_expand_prologue, aarch64_expand_epilogue): Likewise. + (aarch64_layout_frame): Use existing shorthand for one more case. +--- + gcc/config/aarch64/aarch64.cc | 123 ++++++++++++++++++---------------- + 1 file changed, 64 insertions(+), 59 deletions(-) + +diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc +index 08a98f8ba..b7da1d0be 100644 +--- a/gcc/config/aarch64/aarch64.cc ++++ b/gcc/config/aarch64/aarch64.cc +@@ -8951,7 +8951,7 @@ aarch64_layout_frame (void) + frame.is_scs_enabled + = (!crtl->calls_eh_return + && sanitize_flags_p (SANITIZE_SHADOW_CALL_STACK) +- && known_ge (cfun->machine->frame.reg_offsetLR_REGNUM, 0)); ++ && known_ge (frame.reg_offsetLR_REGNUM, 0)); + + /* When shadow call stack is enabled, the scs_pop in the epilogue will + restore x30, and we don't need to pop x30 again in the traditional +@@ -9363,6 +9363,7 @@ aarch64_save_callee_saves (poly_int64 start_offset, + unsigned start, unsigned limit, bool skip_wb, + bool hard_fp_valid_p) + { ++ aarch64_frame &frame = cfun->machine->frame; + rtx_insn *insn; + unsigned regno; + unsigned regno2; +@@ -9377,8 +9378,8 @@ aarch64_save_callee_saves (poly_int64 start_offset, + bool frame_related_p = aarch64_emit_cfi_for_reg_p (regno); + + if (skip_wb +- && (regno == cfun->machine->frame.wb_push_candidate1 +- || regno == cfun->machine->frame.wb_push_candidate2)) ++ && (regno == frame.wb_push_candidate1 ++ || regno == frame.wb_push_candidate2)) + continue; + + if (cfun->machine->reg_is_wrapped_separatelyregno) +@@ -9386,7 +9387,7 @@ aarch64_save_callee_saves (poly_int64 start_offset, + + machine_mode mode = aarch64_reg_save_mode (regno); + reg = gen_rtx_REG (mode, regno); +- offset = start_offset + cfun->machine->frame.reg_offsetregno; ++ offset = start_offset + frame.reg_offsetregno; + rtx base_rtx = stack_pointer_rtx; + poly_int64 sp_offset = offset; + +@@ -9399,7 +9400,7 @@ aarch64_save_callee_saves (poly_int64 start_offset, + { + gcc_assert (known_eq (start_offset, 0)); + poly_int64 fp_offset +- = cfun->machine->frame.below_hard_fp_saved_regs_size; ++ = frame.below_hard_fp_saved_regs_size; + if (hard_fp_valid_p) + base_rtx = hard_frame_pointer_rtx; + else +@@ -9421,8 +9422,7 @@ aarch64_save_callee_saves (poly_int64 start_offset, + && (regno2 = aarch64_next_callee_save (regno + 1, limit)) <= limit + && !cfun->machine->reg_is_wrapped_separatelyregno2 + && known_eq (GET_MODE_SIZE (mode), +- cfun->machine->frame.reg_offsetregno2 +- - cfun->machine->frame.reg_offsetregno)) ++ frame.reg_offsetregno2 - frame.reg_offsetregno)) + { + rtx reg2 = gen_rtx_REG (mode, regno2); + rtx mem2; +@@ -9472,6 +9472,7 @@ static void + aarch64_restore_callee_saves (poly_int64 start_offset, unsigned start, + unsigned limit, bool skip_wb, rtx *cfi_ops) + { ++ aarch64_frame &frame = cfun->machine->frame; + unsigned regno; + unsigned regno2; + poly_int64 offset; +@@ -9488,13 +9489,13 @@ aarch64_restore_callee_saves (poly_int64 start_offset, unsigned start, + rtx reg, mem; + + if (skip_wb +- && (regno == cfun->machine->frame.wb_pop_candidate1 +- || regno == cfun->machine->frame.wb_pop_candidate2)) ++ && (regno == frame.wb_pop_candidate1 ++ || regno == frame.wb_pop_candidate2)) + continue; + + machine_mode mode = aarch64_reg_save_mode (regno); + reg = gen_rtx_REG (mode, regno); +- offset = start_offset + cfun->machine->frame.reg_offsetregno; ++ offset = start_offset + frame.reg_offsetregno; + rtx base_rtx = stack_pointer_rtx; + if (mode == VNx2DImode && BYTES_BIG_ENDIAN) + aarch64_adjust_sve_callee_save_base (mode, base_rtx, anchor_reg, +@@ -9505,8 +9506,7 @@ aarch64_restore_callee_saves (poly_int64 start_offset, unsigned start, + && (regno2 = aarch64_next_callee_save (regno + 1, limit)) <= limit + && !cfun->machine->reg_is_wrapped_separatelyregno2 + && known_eq (GET_MODE_SIZE (mode), +- cfun->machine->frame.reg_offsetregno2 +- - cfun->machine->frame.reg_offsetregno)) ++ frame.reg_offsetregno2 - frame.reg_offsetregno)) + { + rtx reg2 = gen_rtx_REG (mode, regno2); + rtx mem2; +@@ -9611,6 +9611,7 @@ offset_12bit_unsigned_scaled_p (machine_mode mode, poly_int64 offset) + static sbitmap + aarch64_get_separate_components (void) + { ++ aarch64_frame &frame = cfun->machine->frame; + sbitmap components = sbitmap_alloc (LAST_SAVED_REGNUM + 1); + bitmap_clear (components); + +@@ -9627,18 +9628,18 @@ aarch64_get_separate_components (void) + if (mode == VNx2DImode && BYTES_BIG_ENDIAN) + continue; + +- poly_int64 offset = cfun->machine->frame.reg_offsetregno; ++ poly_int64 offset = frame.reg_offsetregno; + + /* If the register is saved in the first SVE save slot, we use + it as a stack probe for -fstack-clash-protection. */ + if (flag_stack_clash_protection +- && maybe_ne (cfun->machine->frame.below_hard_fp_saved_regs_size, 0) ++ && maybe_ne (frame.below_hard_fp_saved_regs_size, 0) + && known_eq (offset, 0)) + continue; + + /* Get the offset relative to the register we'll use. */ + if (frame_pointer_needed) +- offset -= cfun->machine->frame.below_hard_fp_saved_regs_size; ++ offset -= frame.below_hard_fp_saved_regs_size; + else + offset += crtl->outgoing_args_size; + +@@ -9657,11 +9658,11 @@ aarch64_get_separate_components (void) + /* If the spare predicate register used by big-endian SVE code + is call-preserved, it must be saved in the main prologue + before any saves that use it. */ +- if (cfun->machine->frame.spare_pred_reg != INVALID_REGNUM) +- bitmap_clear_bit (components, cfun->machine->frame.spare_pred_reg); ++ if (frame.spare_pred_reg != INVALID_REGNUM) ++ bitmap_clear_bit (components, frame.spare_pred_reg); + +- unsigned reg1 = cfun->machine->frame.wb_push_candidate1; +- unsigned reg2 = cfun->machine->frame.wb_push_candidate2; ++ unsigned reg1 = frame.wb_push_candidate1; ++ unsigned reg2 = frame.wb_push_candidate2; + /* If registers have been chosen to be stored/restored with + writeback don't interfere with them to avoid having to output explicit + stack adjustment instructions. */ +@@ -9770,6 +9771,7 @@ aarch64_get_next_set_bit (sbitmap bmp, unsigned int start) + static void + aarch64_process_components (sbitmap components, bool prologue_p) + { ++ aarch64_frame &frame = cfun->machine->frame; + rtx ptr_reg = gen_rtx_REG (Pmode, frame_pointer_needed + ? HARD_FRAME_POINTER_REGNUM + : STACK_POINTER_REGNUM); +@@ -9784,9 +9786,9 @@ aarch64_process_components (sbitmap components, bool prologue_p) + machine_mode mode = aarch64_reg_save_mode (regno); + + rtx reg = gen_rtx_REG (mode, regno); +- poly_int64 offset = cfun->machine->frame.reg_offsetregno; ++ poly_int64 offset = frame.reg_offsetregno; + if (frame_pointer_needed) +- offset -= cfun->machine->frame.below_hard_fp_saved_regs_size; ++ offset -= frame.below_hard_fp_saved_regs_size; + else + offset += crtl->outgoing_args_size; + +@@ -9811,14 +9813,14 @@ aarch64_process_components (sbitmap components, bool prologue_p) + break; + } + +- poly_int64 offset2 = cfun->machine->frame.reg_offsetregno2; ++ poly_int64 offset2 = frame.reg_offsetregno2; + /* The next register is not of the same class or its offset is not + mergeable with the current one into a pair. */ + if (aarch64_sve_mode_p (mode) + || !satisfies_constraint_Ump (mem) + || GP_REGNUM_P (regno) != GP_REGNUM_P (regno2) + || (crtl->abi->id () == ARM_PCS_SIMD && FP_REGNUM_P (regno)) +- || maybe_ne ((offset2 - cfun->machine->frame.reg_offsetregno), ++ || maybe_ne ((offset2 - frame.reg_offsetregno), + GET_MODE_SIZE (mode))) + { + insn = emit_insn (set); +@@ -9840,7 +9842,7 @@ aarch64_process_components (sbitmap components, bool prologue_p) + /* REGNO2 can be saved/restored in a pair with REGNO. */ + rtx reg2 = gen_rtx_REG (mode, regno2); + if (frame_pointer_needed) +- offset2 -= cfun->machine->frame.below_hard_fp_saved_regs_size; ++ offset2 -= frame.below_hard_fp_saved_regs_size; + else + offset2 += crtl->outgoing_args_size; + rtx addr2 = plus_constant (Pmode, ptr_reg, offset2); +@@ -9935,6 +9937,7 @@ aarch64_allocate_and_probe_stack_space (rtx temp1, rtx temp2, + bool frame_related_p, + bool final_adjustment_p) + { ++ aarch64_frame &frame = cfun->machine->frame; + HOST_WIDE_INT guard_size + = 1 << param_stack_clash_protection_guard_size; + HOST_WIDE_INT guard_used_by_caller = STACK_CLASH_CALLER_GUARD; +@@ -9955,25 +9958,25 @@ aarch64_allocate_and_probe_stack_space (rtx temp1, rtx temp2, + register as a probe. We can't assume that LR was saved at position 0 + though, so treat any space below it as unprobed. */ + if (final_adjustment_p +- && known_eq (cfun->machine->frame.below_hard_fp_saved_regs_size, 0)) ++ && known_eq (frame.below_hard_fp_saved_regs_size, 0)) + { +- poly_int64 lr_offset = cfun->machine->frame.reg_offsetLR_REGNUM; ++ poly_int64 lr_offset = frame.reg_offsetLR_REGNUM; + if (known_ge (lr_offset, 0)) + min_probe_threshold -= lr_offset.to_constant (); + else + gcc_assert (!flag_stack_clash_protection || known_eq (poly_size, 0)); + } + +- poly_int64 frame_size = cfun->machine->frame.frame_size; ++ poly_int64 frame_size = frame.frame_size; + + /* We should always have a positive probe threshold. */ + gcc_assert (min_probe_threshold > 0); + + if (flag_stack_clash_protection && !final_adjustment_p) + { +- poly_int64 initial_adjust = cfun->machine->frame.initial_adjust; +- poly_int64 sve_callee_adjust = cfun->machine->frame.sve_callee_adjust; +- poly_int64 final_adjust = cfun->machine->frame.final_adjust; ++ poly_int64 initial_adjust = frame.initial_adjust; ++ poly_int64 sve_callee_adjust = frame.sve_callee_adjust; ++ poly_int64 final_adjust = frame.final_adjust; + + if (known_eq (frame_size, 0)) + { +@@ -10262,17 +10265,18 @@ aarch64_epilogue_uses (int regno) + void + aarch64_expand_prologue (void) + { +- poly_int64 frame_size = cfun->machine->frame.frame_size; +- poly_int64 initial_adjust = cfun->machine->frame.initial_adjust; +- HOST_WIDE_INT callee_adjust = cfun->machine->frame.callee_adjust; +- poly_int64 final_adjust = cfun->machine->frame.final_adjust; +- poly_int64 callee_offset = cfun->machine->frame.callee_offset; +- poly_int64 sve_callee_adjust = cfun->machine->frame.sve_callee_adjust; ++ aarch64_frame &frame = cfun->machine->frame; ++ poly_int64 frame_size = frame.frame_size; ++ poly_int64 initial_adjust = frame.initial_adjust; ++ HOST_WIDE_INT callee_adjust = frame.callee_adjust; ++ poly_int64 final_adjust = frame.final_adjust; ++ poly_int64 callee_offset = frame.callee_offset; ++ poly_int64 sve_callee_adjust = frame.sve_callee_adjust; + poly_int64 below_hard_fp_saved_regs_size +- = cfun->machine->frame.below_hard_fp_saved_regs_size; +- unsigned reg1 = cfun->machine->frame.wb_push_candidate1; +- unsigned reg2 = cfun->machine->frame.wb_push_candidate2; +- bool emit_frame_chain = cfun->machine->frame.emit_frame_chain; ++ = frame.below_hard_fp_saved_regs_size; ++ unsigned reg1 = frame.wb_push_candidate1; ++ unsigned reg2 = frame.wb_push_candidate2; ++ bool emit_frame_chain = frame.emit_frame_chain; + rtx_insn *insn; + + if (flag_stack_clash_protection && known_eq (callee_adjust, 0)) +@@ -10303,7 +10307,7 @@ aarch64_expand_prologue (void) + } + + /* Push return address to shadow call stack. */ +- if (cfun->machine->frame.is_scs_enabled) ++ if (frame.is_scs_enabled) + emit_insn (gen_scs_push ()); + + if (flag_stack_usage_info) +@@ -10342,7 +10346,7 @@ aarch64_expand_prologue (void) + + /* The offset of the frame chain record (if any) from the current SP. */ + poly_int64 chain_offset = (initial_adjust + callee_adjust +- - cfun->machine->frame.hard_fp_offset); ++ - frame.hard_fp_offset); + gcc_assert (known_ge (chain_offset, 0)); + + /* The offset of the bottom of the save area from the current SP. */ +@@ -10445,16 +10449,17 @@ aarch64_use_return_insn_p (void) + void + aarch64_expand_epilogue (rtx_call_insn *sibcall) + { +- poly_int64 initial_adjust = cfun->machine->frame.initial_adjust; +- HOST_WIDE_INT callee_adjust = cfun->machine->frame.callee_adjust; +- poly_int64 final_adjust = cfun->machine->frame.final_adjust; +- poly_int64 callee_offset = cfun->machine->frame.callee_offset; +- poly_int64 sve_callee_adjust = cfun->machine->frame.sve_callee_adjust; ++ aarch64_frame &frame = cfun->machine->frame; ++ poly_int64 initial_adjust = frame.initial_adjust; ++ HOST_WIDE_INT callee_adjust = frame.callee_adjust; ++ poly_int64 final_adjust = frame.final_adjust; ++ poly_int64 callee_offset = frame.callee_offset; ++ poly_int64 sve_callee_adjust = frame.sve_callee_adjust; + poly_int64 below_hard_fp_saved_regs_size +- = cfun->machine->frame.below_hard_fp_saved_regs_size; +- unsigned reg1 = cfun->machine->frame.wb_pop_candidate1; +- unsigned reg2 = cfun->machine->frame.wb_pop_candidate2; +- unsigned int last_gpr = (cfun->machine->frame.is_scs_enabled ++ = frame.below_hard_fp_saved_regs_size; ++ unsigned reg1 = frame.wb_pop_candidate1; ++ unsigned reg2 = frame.wb_pop_candidate2; ++ unsigned int last_gpr = (frame.is_scs_enabled + ? R29_REGNUM : R30_REGNUM); + rtx cfi_ops = NULL; + rtx_insn *insn; +@@ -10488,7 +10493,7 @@ aarch64_expand_epilogue (rtx_call_insn *sibcall) + /* We need to add memory barrier to prevent read from deallocated stack. */ + bool need_barrier_p + = maybe_ne (get_frame_size () +- + cfun->machine->frame.saved_varargs_size, 0); ++ + frame.saved_varargs_size, 0); + + /* Emit a barrier to prevent loads from a deallocated stack. */ + if (maybe_gt (final_adjust, crtl->outgoing_args_size) +@@ -10569,7 +10574,7 @@ aarch64_expand_epilogue (rtx_call_insn *sibcall) + } + + /* Pop return address from shadow call stack. */ +- if (cfun->machine->frame.is_scs_enabled) ++ if (frame.is_scs_enabled) + { + machine_mode mode = aarch64_reg_save_mode (R30_REGNUM); + rtx reg = gen_rtx_REG (mode, R30_REGNUM); +@@ -13023,24 +13028,24 @@ aarch64_can_eliminate (const int from ATTRIBUTE_UNUSED, const int to) + poly_int64 + aarch64_initial_elimination_offset (unsigned from, unsigned to) + { ++ aarch64_frame &frame = cfun->machine->frame; ++ + if (to == HARD_FRAME_POINTER_REGNUM) + { + if (from == ARG_POINTER_REGNUM) +- return cfun->machine->frame.hard_fp_offset; ++ return frame.hard_fp_offset; + + if (from == FRAME_POINTER_REGNUM) +- return cfun->machine->frame.hard_fp_offset +- - cfun->machine->frame.locals_offset; ++ return frame.hard_fp_offset - frame.locals_offset; + } + + if (to == STACK_POINTER_REGNUM) + { + if (from == FRAME_POINTER_REGNUM) +- return cfun->machine->frame.frame_size +- - cfun->machine->frame.locals_offset; ++ return frame.frame_size - frame.locals_offset; + } + +- return cfun->machine->frame.frame_size; ++ return frame.frame_size; + } + + +-- +2.33.0 +
View file
_service:tar_scm:0182-LoongArch-Use-iorn-and-andn-standard-pattern-names.patch
Added
@@ -0,0 +1,226 @@ +From 64560e75b4d020b6c47e07592595ceed663541af Mon Sep 17 00:00:00 2001 +From: Lulu Cheng <chenglulu@loongson.cn> +Date: Thu, 1 Aug 2024 16:07:25 +0800 +Subject: PATCH 182/188 LoongArch: Use iorn and andn standard pattern names. + +R15-1890 introduced new optabs iorc and andc, and its corresponding +internal functions BIT_{ANDC,IORC}, and if targets defines such optabs +for vector modes. And in r15-2258 the iorc and andc were renamed to +iorn and andn. +So we changed the andn and iorn implementation templates to the standard +template names. + +gcc/ChangeLog: + + * config/loongarch/lasx.md (xvandn<mode>3): Rename to ... + (andn<mode>3): This. + (xvorn<mode>3): Rename to ... + (iorn<mode>3): This. + * config/loongarch/loongarch-builtins.cc + (CODE_FOR_lsx_vandn_v): Defined as the modified name. + (CODE_FOR_lsx_vorn_v): Likewise. + (CODE_FOR_lasx_xvandn_v): Likewise. + (CODE_FOR_lasx_xvorn_v): Likewise. + (loongarch_expand_builtin_insn): When the builtin function to be + called is __builtin_lasx_xvandn or __builtin_lsx_vandn, swap the + two operands. + * config/loongarch/loongarch.md (<optab>n<mode>): Rename to ... + (<optab>n<mode>3): This. + * config/loongarch/lsx.md (vandn<mode>3): Rename to ... + (andn<mode>3): This. + (vorn<mode>3): Rename to ... + (iorn<mode>3): This. + +gcc/testsuite/ChangeLog: + + * gcc.target/loongarch/lasx-andn-iorn.c: New test. + * gcc.target/loongarch/lsx-andn-iorn.c: New test. +--- + gcc/config/loongarch/lasx.md | 10 +++---- + gcc/config/loongarch/loongarch-builtins.cc | 10 ++++--- + gcc/config/loongarch/loongarch.md | 8 +++--- + gcc/config/loongarch/lsx.md | 10 +++---- + .../gcc.target/loongarch/lasx-andn-iorn.c | 11 ++++++++ + .../gcc.target/loongarch/lsx-andn-iorn.c | 28 +++++++++++++++++++ + 6 files changed, 59 insertions(+), 18 deletions(-) + create mode 100644 gcc/testsuite/gcc.target/loongarch/lasx-andn-iorn.c + create mode 100644 gcc/testsuite/gcc.target/loongarch/lsx-andn-iorn.c + +diff --git a/gcc/config/loongarch/lasx.md b/gcc/config/loongarch/lasx.md +index 44a7d58ff..3775155ca 100644 +--- a/gcc/config/loongarch/lasx.md ++++ b/gcc/config/loongarch/lasx.md +@@ -2716,12 +2716,12 @@ + (set_attr "mode" "V4DI")) + + ;; Extend loongson-sx to loongson-asx. +-(define_insn "xvandn<mode>3" ++(define_insn "andn<mode>3" + (set (match_operand:LASX 0 "register_operand" "=f") +- (and:LASX (not:LASX (match_operand:LASX 1 "register_operand" "f")) +- (match_operand:LASX 2 "register_operand" "f"))) ++ (and:LASX (not:LASX (match_operand:LASX 2 "register_operand" "f")) ++ (match_operand:LASX 1 "register_operand" "f"))) + "ISA_HAS_LASX" +- "xvandn.v\t%u0,%u1,%u2" ++ "xvandn.v\t%u0,%u2,%u1" + (set_attr "type" "simd_logic") + (set_attr "mode" "<MODE>")) + +@@ -4637,7 +4637,7 @@ + (set_attr "type" "simd_int_arith") + (set_attr "mode" "<MODE>")) + +-(define_insn "xvorn<mode>3" ++(define_insn "iorn<mode>3" + (set (match_operand:ILASX 0 "register_operand" "=f") + (ior:ILASX (not:ILASX (match_operand:ILASX 2 "register_operand" "f")) + (match_operand:ILASX 1 "register_operand" "f"))) +diff --git a/gcc/config/loongarch/loongarch-builtins.cc b/gcc/config/loongarch/loongarch-builtins.cc +index 51abba007..f9ff85d2e 100644 +--- a/gcc/config/loongarch/loongarch-builtins.cc ++++ b/gcc/config/loongarch/loongarch-builtins.cc +@@ -458,8 +458,8 @@ AVAIL_ALL (lasx_frecipe, ISA_HAS_LASX && ISA_HAS_FRECIPE) + #define CODE_FOR_lsx_vabsd_du CODE_FOR_lsx_vabsd_u_du + #define CODE_FOR_lsx_vftint_wu_s CODE_FOR_lsx_vftint_u_wu_s + #define CODE_FOR_lsx_vftint_lu_d CODE_FOR_lsx_vftint_u_lu_d +-#define CODE_FOR_lsx_vandn_v CODE_FOR_vandnv16qi3 +-#define CODE_FOR_lsx_vorn_v CODE_FOR_vornv16qi3 ++#define CODE_FOR_lsx_vandn_v CODE_FOR_andnv16qi3 ++#define CODE_FOR_lsx_vorn_v CODE_FOR_iornv16qi3 + #define CODE_FOR_lsx_vneg_b CODE_FOR_vnegv16qi2 + #define CODE_FOR_lsx_vneg_h CODE_FOR_vnegv8hi2 + #define CODE_FOR_lsx_vneg_w CODE_FOR_vnegv4si2 +@@ -692,8 +692,8 @@ AVAIL_ALL (lasx_frecipe, ISA_HAS_LASX && ISA_HAS_FRECIPE) + #define CODE_FOR_lasx_xvrepli_w CODE_FOR_lasx_xvrepliv8si + #define CODE_FOR_lasx_xvrepli_d CODE_FOR_lasx_xvrepliv4di + +-#define CODE_FOR_lasx_xvandn_v CODE_FOR_xvandnv32qi3 +-#define CODE_FOR_lasx_xvorn_v CODE_FOR_xvornv32qi3 ++#define CODE_FOR_lasx_xvandn_v CODE_FOR_andnv32qi3 ++#define CODE_FOR_lasx_xvorn_v CODE_FOR_iornv32qi3 + #define CODE_FOR_lasx_xvneg_b CODE_FOR_negv32qi2 + #define CODE_FOR_lasx_xvneg_h CODE_FOR_negv16hi2 + #define CODE_FOR_lasx_xvneg_w CODE_FOR_negv8si2 +@@ -2853,6 +2853,7 @@ loongarch_expand_builtin_insn (enum insn_code icode, unsigned int nops, + case CODE_FOR_lsx_vpickod_b: + case CODE_FOR_lsx_vpickod_h: + case CODE_FOR_lsx_vpickod_w: ++ case CODE_FOR_lsx_vandn_v: + case CODE_FOR_lasx_xvilvh_b: + case CODE_FOR_lasx_xvilvh_h: + case CODE_FOR_lasx_xvilvh_w: +@@ -2873,6 +2874,7 @@ loongarch_expand_builtin_insn (enum insn_code icode, unsigned int nops, + case CODE_FOR_lasx_xvpickod_b: + case CODE_FOR_lasx_xvpickod_h: + case CODE_FOR_lasx_xvpickod_w: ++ case CODE_FOR_lasx_xvandn_v: + /* Swap the operands 1 and 2 for interleave operations. Built-ins follow + convention of ISA, which have op1 as higher component and op2 as lower + component. However, the VEC_PERM op in tree and vec_concat in RTL +diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md +index b1c828dba..58c8f28ed 100644 +--- a/gcc/config/loongarch/loongarch.md ++++ b/gcc/config/loongarch/loongarch.md +@@ -1701,13 +1701,13 @@ + (set_attr "type" "logical") + (set_attr "mode" "SI")) + +-(define_insn "<optab>n<mode>" ++(define_insn "<optab>n<mode>3" + (set (match_operand:X 0 "register_operand" "=r") + (neg_bitwise:X +- (not:X (match_operand:X 1 "register_operand" "r")) +- (match_operand:X 2 "register_operand" "r"))) ++ (not:X (match_operand:X 2 "register_operand" "r")) ++ (match_operand:X 1 "register_operand" "r"))) + "" +- "<insn>n\t%0,%2,%1" ++ "<insn>n\t%0,%1,%2" + (set_attr "type" "logical") + (set_attr "mode" "<MODE>")) + +diff --git a/gcc/config/loongarch/lsx.md b/gcc/config/loongarch/lsx.md +index 2eac11473..c7480aafd 100644 +--- a/gcc/config/loongarch/lsx.md ++++ b/gcc/config/loongarch/lsx.md +@@ -2344,12 +2344,12 @@ + } + (set_attr "mode" "V4SF")) + +-(define_insn "vandn<mode>3" ++(define_insn "andn<mode>3" + (set (match_operand:LSX 0 "register_operand" "=f") +- (and:LSX (not:LSX (match_operand:LSX 1 "register_operand" "f")) +- (match_operand:LSX 2 "register_operand" "f"))) ++ (and:LSX (not:LSX (match_operand:LSX 2 "register_operand" "f")) ++ (match_operand:LSX 1 "register_operand" "f"))) + "ISA_HAS_LSX" +- "vandn.v\t%w0,%w1,%w2" ++ "vandn.v\t%w0,%w2,%w1" + (set_attr "type" "simd_logic") + (set_attr "mode" "<MODE>")) + +@@ -3028,7 +3028,7 @@ + (set_attr "type" "simd_int_arith") + (set_attr "mode" "<MODE>")) + +-(define_insn "vorn<mode>3" ++(define_insn "iorn<mode>3" + (set (match_operand:ILSX 0 "register_operand" "=f") + (ior:ILSX (not:ILSX (match_operand:ILSX 2 "register_operand" "f")) + (match_operand:ILSX 1 "register_operand" "f"))) +diff --git a/gcc/testsuite/gcc.target/loongarch/lasx-andn-iorn.c b/gcc/testsuite/gcc.target/loongarch/lasx-andn-iorn.c +new file mode 100644 +index 000000000..4aa5f19a6 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/lasx-andn-iorn.c +@@ -0,0 +1,11 @@ ++#define N 8 ++ ++#include "./lsx-andn-iorn.c" ++ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -mlasx -ftree-vectorize" } */ ++ ++/* We should produce a BIT_ANDC and BIT_IORC here. */ ++ ++/* { dg-final { scan-tree-dump ".BIT_ANDN " "optimized" } } */ ++/* { dg-final { scan-tree-dump ".BIT_IORN " "optimized" } } */ +diff --git a/gcc/testsuite/gcc.target/loongarch/lsx-andn-iorn.c b/gcc/testsuite/gcc.target/loongarch/lsx-andn-iorn.c +new file mode 100644 +index 000000000..7bceccd37 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/lsx-andn-iorn.c +@@ -0,0 +1,28 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -mlsx -ftree-vectorize" } */ ++ ++#ifndef N ++#define N 4 ++#endif ++ ++extern float aN, bN; ++extern int cN, dN; ++ ++void ++bar1 (void) ++{ ++ for (int i = 0; i < N; i++) ++ di = ai > bi ? 0 : ci; ++} ++ ++void ++bar2 (void) ++{ ++ for (int i = 0; i < N; i++) ++ di = ai > bi ? ci: -1; ++} ++ ++/* We should produce a BIT_ANDC and BIT_IORC here. */ ++ ++/* { dg-final { scan-tree-dump ".BIT_ANDN " "optimized" } } */ ++/* { dg-final { scan-tree-dump ".BIT_IORN " "optimized" } } */ +-- +2.43.0 +
View file
_service:tar_scm:0183-Backport-SME-aarch64-Avoid-a-use-of-callee_offset.patch
Added
@@ -0,0 +1,103 @@ +From 54a6e52207703a8643fc406175377105f887ebef Mon Sep 17 00:00:00 2001 +From: Richard Sandiford <richard.sandiford@arm.com> +Date: Tue, 12 Sep 2023 16:05:04 +0100 +Subject: PATCH BackportSME aarch64: Avoid a use of callee_offset + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=f9ab771fa8cd747f34786c6f33deea32c2eb828b + +When we emit the frame chain, i.e. when we reach Here in this statement +of aarch64_expand_prologue: + + if (emit_frame_chain) + { + // Here + ... + } + +the stack is in one of two states: + +- We've allocated up to the frame chain, but no more. + +- We've allocated the whole frame, and the frame chain is within easy + reach of the new SP. + +The offset of the frame chain from the current SP is available +in aarch64_frame as callee_offset. It is also available as the +chain_offset local variable, where the latter is calculated from other +data. (However, chain_offset is not always equal to callee_offset when +!emit_frame_chain, so chain_offset isn't redundant.) + +In c600df9a4060da3c6121ff4d0b93f179eafd69d1 I switched to using +chain_offset for the initialisation of the hard frame pointer: + + aarch64_add_offset (Pmode, hard_frame_pointer_rtx, +- stack_pointer_rtx, callee_offset, ++ stack_pointer_rtx, chain_offset, + tmp1_rtx, tmp0_rtx, frame_pointer_needed); + +But the later REG_CFA_ADJUST_CFA handling still used callee_offset. + +I think the difference is harmless, but it's more logical for the +CFA note to be in sync, and it's more convenient for later patches +if it uses chain_offset. + +gcc/ + * config/aarch64/aarch64.cc (aarch64_expand_prologue): Use + chain_offset rather than callee_offset. +--- + gcc/config/aarch64/aarch64.cc | 4 +--- + 1 file changed, 1 insertion(+), 3 deletions(-) + +diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc +index b7da1d0be..fbd7a079a 100644 +--- a/gcc/config/aarch64/aarch64.cc ++++ b/gcc/config/aarch64/aarch64.cc +@@ -10263,21 +10263,20 @@ aarch64_epilogue_uses (int regno) + current FP is also set up if it is in use. */ + + void + aarch64_expand_prologue (void) + { + aarch64_frame &frame = cfun->machine->frame; + poly_int64 frame_size = frame.frame_size; + poly_int64 initial_adjust = frame.initial_adjust; + HOST_WIDE_INT callee_adjust = frame.callee_adjust; + poly_int64 final_adjust = frame.final_adjust; +- poly_int64 callee_offset = frame.callee_offset; + poly_int64 sve_callee_adjust = frame.sve_callee_adjust; + poly_int64 below_hard_fp_saved_regs_size + = frame.below_hard_fp_saved_regs_size; + unsigned reg1 = frame.wb_push_candidate1; + unsigned reg2 = frame.wb_push_candidate2; + bool emit_frame_chain = frame.emit_frame_chain; + rtx_insn *insn; + + if (flag_stack_clash_protection && known_eq (callee_adjust, 0)) + { +@@ -10376,22 +10375,21 @@ aarch64_expand_prologue (void) + the CFA based on the frame pointer. We therefore need new + DW_CFA_expressions to re-express the save slots with addresses + based on the frame pointer. */ + rtx_insn *insn = get_last_insn (); + gcc_assert (RTX_FRAME_RELATED_P (insn)); + + /* Add an explicit CFA definition if this was previously + implicit. */ + if (!find_reg_note (insn, REG_CFA_ADJUST_CFA, NULL_RTX)) + { +- rtx src = plus_constant (Pmode, stack_pointer_rtx, +- callee_offset); ++ rtx src = plus_constant (Pmode, stack_pointer_rtx, chain_offset); + add_reg_note (insn, REG_CFA_ADJUST_CFA, + gen_rtx_SET (hard_frame_pointer_rtx, src)); + } + + /* Change the save slot expressions for the registers that + we've already saved. */ + aarch64_add_cfa_expression (insn, regno_reg_rtxreg2, + hard_frame_pointer_rtx, UNITS_PER_WORD); + aarch64_add_cfa_expression (insn, regno_reg_rtxreg1, + hard_frame_pointer_rtx, 0); +-- +2.38.1.windows.1 +
View file
_service:tar_scm:0183-LoongArch-Drop-vcond-u-expanders.patch
Added
@@ -0,0 +1,127 @@ +From 8394519779553a2c59214d76054dd1ba87a380b3 Mon Sep 17 00:00:00 2001 +From: Lulu Cheng <chenglulu@loongson.cn> +Date: Thu, 8 Aug 2024 10:39:54 +0800 +Subject: PATCH 183/188 LoongArch: Drop vcond{,u} expanders. + +Optabs vcond{,u} will be removed for GCC 15. Since regtest shows no +fallout, dropping the expanders, now. + +gcc/ChangeLog: + + PR target/114189 + * config/loongarch/lasx.md (vcondu<LASX:mode><ILASX:mode>): Delete. + (vcond<LASX:mode><LASX_2:mode>): Likewise. + * config/loongarch/lsx.md (vcondu<LSX:mode><ILSX:mode>): Likewise. + (vcond<LSX:mode><LSX_2:mode>): Likewise. +--- + gcc/config/loongarch/lasx.md | 37 ------------------------------------ + gcc/config/loongarch/lsx.md | 31 ------------------------------ + 2 files changed, 68 deletions(-) + +diff --git a/gcc/config/loongarch/lasx.md b/gcc/config/loongarch/lasx.md +index 3775155ca..be2f6ca8e 100644 +--- a/gcc/config/loongarch/lasx.md ++++ b/gcc/config/loongarch/lasx.md +@@ -165,9 +165,6 @@ + ;; All vector modes with 256 bits. + (define_mode_iterator LASX V4DF V8SF V4DI V8SI V16HI V32QI) + +-;; Same as LASX. Used by vcond to iterate two modes. +-(define_mode_iterator LASX_2 V4DF V8SF V4DI V8SI V16HI V32QI) +- + ;; Only used for splitting insert_d and copy_{u,s}.d. + (define_mode_iterator LASX_D V4DI V4DF) + +@@ -762,40 +759,6 @@ + DONE; + }) + +-;; FIXME: 256?? +-(define_expand "vcondu<LASX:mode><ILASX:mode>" +- (match_operand:LASX 0 "register_operand") +- (match_operand:LASX 1 "reg_or_m1_operand") +- (match_operand:LASX 2 "reg_or_0_operand") +- (match_operator 3 "" +- (match_operand:ILASX 4 "register_operand") +- (match_operand:ILASX 5 "register_operand")) +- "ISA_HAS_LASX +- && (GET_MODE_NUNITS (<LASX:MODE>mode) +- == GET_MODE_NUNITS (<ILASX:MODE>mode))" +-{ +- loongarch_expand_vec_cond_expr (<LASX:MODE>mode, <LASX:VIMODE256>mode, +- operands); +- DONE; +-}) +- +-;; FIXME: 256?? +-(define_expand "vcond<LASX:mode><LASX_2:mode>" +- (match_operand:LASX 0 "register_operand") +- (match_operand:LASX 1 "reg_or_m1_operand") +- (match_operand:LASX 2 "reg_or_0_operand") +- (match_operator 3 "" +- (match_operand:LASX_2 4 "register_operand") +- (match_operand:LASX_2 5 "register_operand")) +- "ISA_HAS_LASX +- && (GET_MODE_NUNITS (<LASX:MODE>mode) +- == GET_MODE_NUNITS (<LASX_2:MODE>mode))" +-{ +- loongarch_expand_vec_cond_expr (<LASX:MODE>mode, <LASX:VIMODE256>mode, +- operands); +- DONE; +-}) +- + ;; Same as vcond_ + (define_expand "vcond_mask_<mode><mode256_i>" + (match_operand:LASX 0 "register_operand") +diff --git a/gcc/config/loongarch/lsx.md b/gcc/config/loongarch/lsx.md +index c7480aafd..5cb5bc61f 100644 +--- a/gcc/config/loongarch/lsx.md ++++ b/gcc/config/loongarch/lsx.md +@@ -186,9 +186,6 @@ + ;; All vector modes with 128 bits. + (define_mode_iterator LSX V2DF V4SF V2DI V4SI V8HI V16QI) + +-;; Same as LSX. Used by vcond to iterate two modes. +-(define_mode_iterator LSX_2 V2DF V4SF V2DI V4SI V8HI V16QI) +- + ;; Only used for vilvh and splitting insert_d and copy_{u,s}.d. + (define_mode_iterator LSX_D V2DI V2DF) + +@@ -533,34 +530,6 @@ + DONE; + }) + +-(define_expand "vcondu<LSX:mode><ILSX:mode>" +- (match_operand:LSX 0 "register_operand") +- (match_operand:LSX 1 "reg_or_m1_operand") +- (match_operand:LSX 2 "reg_or_0_operand") +- (match_operator 3 "" +- (match_operand:ILSX 4 "register_operand") +- (match_operand:ILSX 5 "register_operand")) +- "ISA_HAS_LSX +- && (GET_MODE_NUNITS (<LSX:MODE>mode) == GET_MODE_NUNITS (<ILSX:MODE>mode))" +-{ +- loongarch_expand_vec_cond_expr (<LSX:MODE>mode, <LSX:VIMODE>mode, operands); +- DONE; +-}) +- +-(define_expand "vcond<LSX:mode><LSX_2:mode>" +- (match_operand:LSX 0 "register_operand") +- (match_operand:LSX 1 "reg_or_m1_operand") +- (match_operand:LSX 2 "reg_or_0_operand") +- (match_operator 3 "" +- (match_operand:LSX_2 4 "register_operand") +- (match_operand:LSX_2 5 "register_operand")) +- "ISA_HAS_LSX +- && (GET_MODE_NUNITS (<LSX:MODE>mode) == GET_MODE_NUNITS (<LSX_2:MODE>mode))" +-{ +- loongarch_expand_vec_cond_expr (<LSX:MODE>mode, <LSX:VIMODE>mode, operands); +- DONE; +-}) +- + (define_expand "vcond_mask_<mode><mode_i>" + (match_operand:LSX 0 "register_operand") + (match_operand:LSX 1 "reg_or_m1_operand") +-- +2.43.0 +
View file
_service:tar_scm:0184-Backport-SME-aarch64-Explicitly-handle-frames-with-n.patch
Added
@@ -0,0 +1,51 @@ +From 82bbe6513987a7656150110164e25f44fe410796 Mon Sep 17 00:00:00 2001 +From: Richard Sandiford <richard.sandiford@arm.com> +Date: Tue, 12 Sep 2023 16:05:05 +0100 +Subject: PATCH 085/157 BackportSME aarch64: Explicitly handle frames + with no saved registers + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=c601c918c9ac01ef8315774a642ff924f77c85e5 + +If a frame has no saved registers, it can be allocated in one go. +There is no need to treat the areas below and above the saved +registers as separate. + +And if we allocate the frame in one go, it should be allocated +as the initial_adjust rather than the final_adjust. This allows the +frame size to grow to guard_size - guard_used_by_caller before a stack +probe is needed. (A frame with no register saves is necessarily a +leaf frame.) + +This is a no-op as thing stand, since a leaf function will have +no outgoing arguments, and so all the frame will be above where +the saved registers normally go. + +gcc/ + * config/aarch64/aarch64.cc (aarch64_layout_frame): Explicitly + allocate the frame in one go if there are no saved registers. +--- + gcc/config/aarch64/aarch64.cc | 8 +++++--- + 1 file changed, 5 insertions(+), 3 deletions(-) + +diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc +index fbd7a079a..c59af6b1c 100644 +--- a/gcc/config/aarch64/aarch64.cc ++++ b/gcc/config/aarch64/aarch64.cc +@@ -8978,9 +8978,11 @@ aarch64_layout_frame (void) + + HOST_WIDE_INT const_size, const_outgoing_args_size, const_fp_offset; + HOST_WIDE_INT const_saved_regs_size; +- if (frame.frame_size.is_constant (&const_size) +- && const_size < max_push_offset +- && known_eq (frame.hard_fp_offset, const_size)) ++ if (known_eq (frame.saved_regs_size, 0)) ++ frame.initial_adjust = frame.frame_size; ++ else if (frame.frame_size.is_constant (&const_size) ++ && const_size < max_push_offset ++ && known_eq (frame.hard_fp_offset, const_size)) + { + /* Simple, small frame with no outgoing arguments: + +-- +2.33.0 +
View file
_service:tar_scm:0184-LoongArch-Provide-ashr-lshr-and-ashl-RTL-pattern-for.patch
Added
@@ -0,0 +1,220 @@ +From d9ce0e85c8cba331413c6a521987a1ecbd94df1c Mon Sep 17 00:00:00 2001 +From: Lulu Cheng <chenglulu@loongson.cn> +Date: Thu, 8 Aug 2024 09:59:28 +0800 +Subject: PATCH 184/188 LoongArch: Provide ashr lshr and ashl RTL pattern for + vectors. + +We support vashr vlshr and vashl. However, in r15-1638 support optimize +x < 0 ? -1 : 0 into (signed) x >> 31 and x < 0 ? 1 : 0 into (unsigned) x >> 31. +To support this optimization, vector ashr lshr and ashl need to be implemented. + +gcc/ChangeLog: + + * config/loongarch/loongarch.md (insn): Added rotatert rotr pairs. + * config/loongarch/simd.md (rotr<mode>3): Remove to ... + (<optab><mode>3): This. + +gcc/testsuite/ChangeLog: + + * g++.target/loongarch/vect-ashr-lshr.C: New test. +--- + gcc/config/loongarch/loongarch.md | 1 + + gcc/config/loongarch/simd.md | 13 +- + .../g++.target/loongarch/vect-ashr-lshr.C | 147 ++++++++++++++++++ + 3 files changed, 155 insertions(+), 6 deletions(-) + create mode 100644 gcc/testsuite/g++.target/loongarch/vect-ashr-lshr.C + +diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md +index 58c8f28ed..867977b36 100644 +--- a/gcc/config/loongarch/loongarch.md ++++ b/gcc/config/loongarch/loongarch.md +@@ -559,6 +559,7 @@ + (define_code_attr insn (ashift "sll") + (ashiftrt "sra") + (lshiftrt "srl") ++ (rotatert "rotr") + (ior "or") + (xor "xor") + (and "and") +diff --git a/gcc/config/loongarch/simd.md b/gcc/config/loongarch/simd.md +index 00d4c7831..c28b95282 100644 +--- a/gcc/config/loongarch/simd.md ++++ b/gcc/config/loongarch/simd.md +@@ -306,14 +306,15 @@ + operands4 = gen_reg_rtx (<MODE>mode); + }); + +-;; <x>vrotri.{b/h/w/d} ++;; <x>v{rotr/sll/sra/srl}i.{b/h/w/d} + +-(define_insn "rotr<mode>3" ++(define_insn "<optab><mode>3" + (set (match_operand:IVEC 0 "register_operand" "=f") +- (rotatert:IVEC (match_operand:IVEC 1 "register_operand" "f") +- (match_operand:SI 2 "const_<bitimm>_operand"))) +- "" +- "<x>vrotri.<simdfmt>\t%<wu>0,%<wu>1,%2"; ++ (shift_w:IVEC ++ (match_operand:IVEC 1 "register_operand" "f") ++ (match_operand:SI 2 "const_<bitimm>_operand"))) ++ "ISA_HAS_LSX" ++ "<x>v<insn>i.<simdfmt>\t%<wu>0,%<wu>1,%2" + (set_attr "type" "simd_int_arith") + (set_attr "mode" "<MODE>")) + +diff --git a/gcc/testsuite/g++.target/loongarch/vect-ashr-lshr.C b/gcc/testsuite/g++.target/loongarch/vect-ashr-lshr.C +new file mode 100644 +index 000000000..bcef985fa +--- /dev/null ++++ b/gcc/testsuite/g++.target/loongarch/vect-ashr-lshr.C +@@ -0,0 +1,147 @@ ++/* { dg-do compile } */ ++/* { dg-options "-mlasx -O2" } */ ++/* { dg-final { scan-assembler-times "vsrli.b" 2 } } */ ++/* { dg-final { scan-assembler-times "vsrli.h" 2 } } */ ++/* { dg-final { scan-assembler-times "vsrli.w" 2 } } */ ++/* { dg-final { scan-assembler-times "vsrli.d" 2 } } */ ++/* { dg-final { scan-assembler-times "vsrai.b" 2 } } */ ++/* { dg-final { scan-assembler-times "vsrai.h" 2 } } */ ++/* { dg-final { scan-assembler-times "vsrai.w" 2 } } */ ++/* { dg-final { scan-assembler-times "vsrai.d" 2 } } */ ++ ++typedef signed char v16qi __attribute__((vector_size(16))); ++typedef signed char v32qi __attribute__((vector_size(32))); ++typedef short v8hi __attribute__((vector_size(16))); ++typedef short v16hi __attribute__((vector_size(32))); ++typedef int v4si __attribute__((vector_size(16))); ++typedef int v8si __attribute__((vector_size(32))); ++typedef long long v2di __attribute__((vector_size(16))); ++typedef long long v4di __attribute__((vector_size(32))); ++ ++v16qi ++foo (v16qi a) ++{ ++ v16qi const1_op = __extension__(v16qi){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}; ++ v16qi const0_op = __extension__(v16qi){0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}; ++ return a < const0_op ? const1_op : const0_op; ++} ++ ++v32qi ++foo2 (v32qi a) ++{ ++ v32qi const1_op = __extension__(v32qi){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}; ++ v32qi const0_op = __extension__(v32qi){0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}; ++ return a < const0_op ? const1_op : const0_op; ++} ++ ++v8hi ++foo3 (v8hi a) ++{ ++ v8hi const1_op = __extension__(v8hi){1,1,1,1,1,1,1,1}; ++ v8hi const0_op = __extension__(v8hi){0,0,0,0,0,0,0,0}; ++ return a < const0_op ? const1_op : const0_op; ++} ++ ++v16hi ++foo4 (v16hi a) ++{ ++ v16hi const1_op = __extension__(v16hi){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}; ++ v16hi const0_op = __extension__(v16hi){0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}; ++ return a < const0_op ? const1_op : const0_op; ++} ++ ++v4si ++foo5 (v4si a) ++{ ++ v4si const1_op = __extension__(v4si){1,1,1,1}; ++ v4si const0_op = __extension__(v4si){0,0,0,0}; ++ return a < const0_op ? const1_op : const0_op; ++} ++ ++v8si ++foo6 (v8si a) ++{ ++ v8si const1_op = __extension__(v8si){1,1,1,1,1,1,1,1}; ++ v8si const0_op = __extension__(v8si){0,0,0,0,0,0,0,0}; ++ return a < const0_op ? const1_op : const0_op; ++} ++ ++v2di ++foo7 (v2di a) ++{ ++ v2di const1_op = __extension__(v2di){1,1}; ++ v2di const0_op = __extension__(v2di){0,0}; ++ return a < const0_op ? const1_op : const0_op; ++} ++ ++v4di ++foo8 (v4di a) ++{ ++ v4di const1_op = __extension__(v4di){1,1,1,1}; ++ v4di const0_op = __extension__(v4di){0,0,0,0}; ++ return a < const0_op ? const1_op : const0_op; ++} ++ ++v16qi ++foo9 (v16qi a) ++{ ++ v16qi const1_op = __extension__(v16qi){-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1}; ++ v16qi const0_op = __extension__(v16qi){0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}; ++ return a < const0_op ? const1_op : const0_op; ++} ++ ++v32qi ++foo10 (v32qi a) ++{ ++ v32qi const1_op = __extension__(v32qi){-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1}; ++ v32qi const0_op = __extension__(v32qi){0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}; ++ return a < const0_op ? const1_op : const0_op; ++} ++ ++v8hi ++foo11 (v8hi a) ++{ ++ v8hi const1_op = __extension__(v8hi){-1,-1,-1,-1,-1,-1,-1,-1}; ++ v8hi const0_op = __extension__(v8hi){0,0,0,0,0,0,0,0}; ++ return a < const0_op ? const1_op : const0_op; ++} ++ ++v16hi ++foo12 (v16hi a) ++{ ++ v16hi const1_op = __extension__(v16hi){-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1}; ++ v16hi const0_op = __extension__(v16hi){0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}; ++ return a < const0_op ? const1_op : const0_op; ++} ++ ++v4si ++foo13 (v4si a) ++{ ++ v4si const1_op = __extension__(v4si){-1,-1,-1,-1}; ++ v4si const0_op = __extension__(v4si){0,0,0,0}; ++ return a < const0_op ? const1_op : const0_op; ++} ++ ++v8si ++foo14 (v8si a) ++{ ++ v8si const1_op = __extension__(v8si){-1,-1,-1,-1,-1,-1,-1,-1}; ++ v8si const0_op = __extension__(v8si){0,0,0,0,0,0,0,0}; ++ return a < const0_op ? const1_op : const0_op; ++} ++ ++v2di ++foo15 (v2di a) ++{ ++ v2di const1_op = __extension__(v2di){-1,-1}; ++ v2di const0_op = __extension__(v2di){0,0}; ++ return a < const0_op ? const1_op : const0_op; ++} ++ ++v4di ++foo16 (v4di a) ++{ ++ v4di const1_op = __extension__(v4di){-1,-1,-1,-1}; ++ v4di const0_op = __extension__(v4di){0,0,0,0}; ++ return a < const0_op ? const1_op : const0_op; ++} +-- +2.43.0 +
View file
_service:tar_scm:0185-Backport-SME-aarch64-Add-bytes_below_saved_regs-to-f.patch
Added
@@ -0,0 +1,236 @@ +From bf985fe08b6298218180666a7d20f4aa0b41326f Mon Sep 17 00:00:00 2001 +From: Richard Sandiford <richard.sandiford@arm.com> +Date: Tue, 12 Sep 2023 16:05:05 +0100 +Subject: PATCH 086/157 BackportSME aarch64: Add bytes_below_saved_regs + to frame info + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=7b792ecaa9414bc81520b3da552d40ad854be976 + +The frame layout code currently hard-codes the assumption that +the number of bytes below the saved registers is equal to the +size of the outgoing arguments. This patch abstracts that +value into a new field of aarch64_frame. + +gcc/ + * config/aarch64/aarch64.h (aarch64_frame::bytes_below_saved_regs): New + field. + * config/aarch64/aarch64.cc (aarch64_layout_frame): Initialize it, + and use it instead of crtl->outgoing_args_size. + (aarch64_get_separate_components): Use bytes_below_saved_regs instead + of outgoing_args_size. + (aarch64_process_components): Likewise. +--- + gcc/config/aarch64/aarch64.cc | 71 ++++++++++++++++++----------------- + gcc/config/aarch64/aarch64.h | 5 +++ + 2 files changed, 41 insertions(+), 35 deletions(-) + +diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc +index c59af6b1c..5533dd85b 100644 +--- a/gcc/config/aarch64/aarch64.cc ++++ b/gcc/config/aarch64/aarch64.cc +@@ -8817,6 +8817,8 @@ aarch64_layout_frame (void) + gcc_assert (crtl->is_leaf + || maybe_ne (frame.reg_offsetR30_REGNUM, SLOT_NOT_REQUIRED)); + ++ frame.bytes_below_saved_regs = crtl->outgoing_args_size; ++ + /* Now assign stack slots for the registers. Start with the predicate + registers, since predicate LDR and STR have a relatively small + offset range. These saves happen below the hard frame pointer. */ +@@ -8921,18 +8923,18 @@ aarch64_layout_frame (void) + + poly_int64 varargs_and_saved_regs_size = offset + frame.saved_varargs_size; + +- poly_int64 above_outgoing_args ++ poly_int64 saved_regs_and_above + = aligned_upper_bound (varargs_and_saved_regs_size + + get_frame_size (), + STACK_BOUNDARY / BITS_PER_UNIT); + + frame.hard_fp_offset +- = above_outgoing_args - frame.below_hard_fp_saved_regs_size; ++ = saved_regs_and_above - frame.below_hard_fp_saved_regs_size; + + /* Both these values are already aligned. */ +- gcc_assert (multiple_p (crtl->outgoing_args_size, ++ gcc_assert (multiple_p (frame.bytes_below_saved_regs, + STACK_BOUNDARY / BITS_PER_UNIT)); +- frame.frame_size = above_outgoing_args + crtl->outgoing_args_size; ++ frame.frame_size = saved_regs_and_above + frame.bytes_below_saved_regs; + + frame.locals_offset = frame.saved_varargs_size; + +@@ -8976,7 +8978,7 @@ aarch64_layout_frame (void) + else if (frame.wb_pop_candidate1 != INVALID_REGNUM) + max_push_offset = 256; + +- HOST_WIDE_INT const_size, const_outgoing_args_size, const_fp_offset; ++ HOST_WIDE_INT const_size, const_below_saved_regs, const_fp_offset; + HOST_WIDE_INT const_saved_regs_size; + if (known_eq (frame.saved_regs_size, 0)) + frame.initial_adjust = frame.frame_size; +@@ -8984,31 +8986,31 @@ aarch64_layout_frame (void) + && const_size < max_push_offset + && known_eq (frame.hard_fp_offset, const_size)) + { +- /* Simple, small frame with no outgoing arguments: ++ /* Simple, small frame with no data below the saved registers. + + stp reg1, reg2, sp, -frame_size! + stp reg3, reg4, sp, 16 */ + frame.callee_adjust = const_size; + } +- else if (crtl->outgoing_args_size.is_constant (&const_outgoing_args_size) ++ else if (frame.bytes_below_saved_regs.is_constant (&const_below_saved_regs) + && frame.saved_regs_size.is_constant (&const_saved_regs_size) +- && const_outgoing_args_size + const_saved_regs_size < 512 +- /* We could handle this case even with outgoing args, provided +- that the number of args left us with valid offsets for all +- predicate and vector save slots. It's such a rare case that +- it hardly seems worth the effort though. */ +- && (!saves_below_hard_fp_p || const_outgoing_args_size == 0) ++ && const_below_saved_regs + const_saved_regs_size < 512 ++ /* We could handle this case even with data below the saved ++ registers, provided that that data left us with valid offsets ++ for all predicate and vector save slots. It's such a rare ++ case that it hardly seems worth the effort though. */ ++ && (!saves_below_hard_fp_p || const_below_saved_regs == 0) + && !(cfun->calls_alloca + && frame.hard_fp_offset.is_constant (&const_fp_offset) + && const_fp_offset < max_push_offset)) + { +- /* Frame with small outgoing arguments: ++ /* Frame with small area below the saved registers: + + sub sp, sp, frame_size +- stp reg1, reg2, sp, outgoing_args_size +- stp reg3, reg4, sp, outgoing_args_size + 16 */ ++ stp reg1, reg2, sp, bytes_below_saved_regs ++ stp reg3, reg4, sp, bytes_below_saved_regs + 16 */ + frame.initial_adjust = frame.frame_size; +- frame.callee_offset = const_outgoing_args_size; ++ frame.callee_offset = const_below_saved_regs; + } + else if (saves_below_hard_fp_p + && known_eq (frame.saved_regs_size, +@@ -9018,30 +9020,29 @@ aarch64_layout_frame (void) + + sub sp, sp, hard_fp_offset + below_hard_fp_saved_regs_size + save SVE registers relative to SP +- sub sp, sp, outgoing_args_size */ ++ sub sp, sp, bytes_below_saved_regs */ + frame.initial_adjust = (frame.hard_fp_offset + + frame.below_hard_fp_saved_regs_size); +- frame.final_adjust = crtl->outgoing_args_size; ++ frame.final_adjust = frame.bytes_below_saved_regs; + } + else if (frame.hard_fp_offset.is_constant (&const_fp_offset) + && const_fp_offset < max_push_offset) + { +- /* Frame with large outgoing arguments or SVE saves, but with +- a small local area: ++ /* Frame with large area below the saved registers, or with SVE saves, ++ but with a small area above: + + stp reg1, reg2, sp, -hard_fp_offset! + stp reg3, reg4, sp, 16 + sub sp, sp, below_hard_fp_saved_regs_size + save SVE registers relative to SP +- sub sp, sp, outgoing_args_size */ ++ sub sp, sp, bytes_below_saved_regs */ + frame.callee_adjust = const_fp_offset; + frame.sve_callee_adjust = frame.below_hard_fp_saved_regs_size; +- frame.final_adjust = crtl->outgoing_args_size; ++ frame.final_adjust = frame.bytes_below_saved_regs; + } + else + { +- /* Frame with large local area and outgoing arguments or SVE saves, +- using frame pointer: ++ /* General case: + + sub sp, sp, hard_fp_offset + stp x29, x30, sp, 0 +@@ -9049,10 +9050,10 @@ aarch64_layout_frame (void) + stp reg3, reg4, sp, 16 + sub sp, sp, below_hard_fp_saved_regs_size + save SVE registers relative to SP +- sub sp, sp, outgoing_args_size */ ++ sub sp, sp, bytes_below_saved_regs */ + frame.initial_adjust = frame.hard_fp_offset; + frame.sve_callee_adjust = frame.below_hard_fp_saved_regs_size; +- frame.final_adjust = crtl->outgoing_args_size; ++ frame.final_adjust = frame.bytes_below_saved_regs; + } + + /* Make sure the individual adjustments add up to the full frame size. */ +@@ -9643,7 +9644,7 @@ aarch64_get_separate_components (void) + if (frame_pointer_needed) + offset -= frame.below_hard_fp_saved_regs_size; + else +- offset += crtl->outgoing_args_size; ++ offset += frame.bytes_below_saved_regs; + + /* Check that we can access the stack slot of the register with one + direct load with no adjustments needed. */ +@@ -9792,7 +9793,7 @@ aarch64_process_components (sbitmap components, bool prologue_p) + if (frame_pointer_needed) + offset -= frame.below_hard_fp_saved_regs_size; + else +- offset += crtl->outgoing_args_size; ++ offset += frame.bytes_below_saved_regs; + + rtx addr = plus_constant (Pmode, ptr_reg, offset); + rtx mem = gen_frame_mem (mode, addr); +@@ -9846,7 +9847,7 @@ aarch64_process_components (sbitmap components, bool prologue_p) + if (frame_pointer_needed) + offset2 -= frame.below_hard_fp_saved_regs_size; + else +- offset2 += crtl->outgoing_args_size; ++ offset2 += frame.bytes_below_saved_regs; + rtx addr2 = plus_constant (Pmode, ptr_reg, offset2); + rtx mem2 = gen_frame_mem (mode, addr2); + rtx set2 = prologue_p ? gen_rtx_SET (mem2, reg2) +@@ -9920,10 +9921,10 @@ aarch64_stack_clash_protection_alloca_probe_range (void) + registers. If POLY_SIZE is not large enough to require a probe this function + will only adjust the stack. When allocating the stack space + FRAME_RELATED_P is then used to indicate if the allocation is frame related. +- FINAL_ADJUSTMENT_P indicates whether we are allocating the outgoing +- arguments. If we are then we ensure that any allocation larger than the ABI +- defined buffer needs a probe so that the invariant of having a 1KB buffer is +- maintained. ++ FINAL_ADJUSTMENT_P indicates whether we are allocating the area below ++ the saved registers. If we are then we ensure that any allocation ++ larger than the ABI defined buffer needs a probe so that the ++ invariant of having a 1KB buffer is maintained. + + We emit barriers after each stack adjustment to prevent optimizations from + breaking the invariant that we never drop the stack more than a page. This +@@ -10132,7 +10133,7 @@ aarch64_allocate_and_probe_stack_space (rtx temp1, rtx temp2, + /* Handle any residuals. Residuals of at least MIN_PROBE_THRESHOLD have to + be probed. This maintains the requirement that each page is probed at + least once. For initial probing we probe only if the allocation is +- more than GUARD_SIZE - buffer, and for the outgoing arguments we probe ++ more than GUARD_SIZE - buffer, and below the saved registers we probe + if the amount is larger than buffer. GUARD_SIZE - buffer + buffer == + GUARD_SIZE. This works that for any allocation that is large enough to + trigger a probe here, we'll have at least one, and if they're not large +diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h +index 8f0ac2cde..9e0ca380e 100644 +--- a/gcc/config/aarch64/aarch64.h ++++ b/gcc/config/aarch64/aarch64.h +@@ -801,6 +801,11 @@ struct GTY (()) aarch64_frame + /* The size of the callee-save registers with a slot in REG_OFFSET. */ + poly_int64 saved_regs_size; + ++ /* The number of bytes between the bottom of the static frame (the bottom ++ of the outgoing arguments) and the bottom of the register save area. ++ This value is always a multiple of STACK_BOUNDARY. */ ++ poly_int64 bytes_below_saved_regs; ++ + /* The size of the callee-save registers with a slot in REG_OFFSET that + are saved below the hard frame pointer. */ + poly_int64 below_hard_fp_saved_regs_size; +-- +2.33.0 +
View file
_service:tar_scm:0185-LoongArch-Implement-scalar-isinf-isnormal-and-isfini.patch
Added
@@ -0,0 +1,203 @@ +From 7e8e122306feaecf8d7b520b4e7c0b9908ca6fd2 Mon Sep 17 00:00:00 2001 +From: Xi Ruoyao <xry111@xry111.site> +Date: Thu, 4 Jul 2024 02:49:28 +0800 +Subject: PATCH 185/188 LoongArch: Implement scalar isinf, isnormal, and + isfinite via fclass + +Doing so can avoid loading FP constants from the memory. It also +partially fixes PR 66262 as fclass does not signal on sNaN. + +gcc/ChangeLog: + + * config/loongarch/loongarch.md (extendsidi2): Add ("=r", "f") + alternative and use movfr2gr.s for it. The spec clearly states + movfr2gr.s sign extends the value to GRLEN. + (fclass_<fmt>): Make the result SImode instead of a floating + mode. The fclass results are really not FP values. + (FCLASS_MASK): New define_int_iterator. + (fclass_optab): New define_int_attr. + (<FCLASS_MASK:fclass_optab><ANYF:mode>): New define_expand + template. + +gcc/testsuite/ChangeLog: + + * gcc.target/loongarch/fclass-compile.c: New test. + * gcc.target/loongarch/fclass-run.c: New test. +--- + gcc/config/loongarch/loongarch.md | 53 ++++++++++++++++--- + .../gcc.target/loongarch/fclass-compile.c | 20 +++++++ + .../gcc.target/loongarch/fclass-run.c | 53 +++++++++++++++++++ + 3 files changed, 119 insertions(+), 7 deletions(-) + create mode 100644 gcc/testsuite/gcc.target/loongarch/fclass-compile.c + create mode 100644 gcc/testsuite/gcc.target/loongarch/fclass-run.c + +diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md +index 867977b36..15960a79f 100644 +--- a/gcc/config/loongarch/loongarch.md ++++ b/gcc/config/loongarch/loongarch.md +@@ -1851,16 +1851,17 @@ + ;; .................... + + (define_insn "extendsidi2" +- (set (match_operand:DI 0 "register_operand" "=r,r,r,r") ++ (set (match_operand:DI 0 "register_operand" "=r,r,r,r,r") + (sign_extend:DI +- (match_operand:SI 1 "nonimmediate_operand" "r,ZC,m,k"))) ++ (match_operand:SI 1 "nonimmediate_operand" "r,ZC,m,k,f"))) + "TARGET_64BIT" + "@ + slli.w\t%0,%1,0 + ldptr.w\t%0,%1 + ld.w\t%0,%1 +- ldx.w\t%0,%1" +- (set_attr "move_type" "sll0,load,load,load") ++ ldx.w\t%0,%1 ++ movfr2gr.s\t%0,%1" ++ (set_attr "move_type" "sll0,load,load,load,mftg") + (set_attr "mode" "DI")) + + (define_insn "extend<SHORT:mode><GPR:mode>2" +@@ -4110,14 +4111,52 @@ + "movgr2fcsr\t$r%0,%1") + + (define_insn "fclass_<fmt>" +- (set (match_operand:ANYF 0 "register_operand" "=f") +- (unspec:ANYF (match_operand:ANYF 1 "register_operand" "f") +- UNSPEC_FCLASS)) ++ (set (match_operand:SI 0 "register_operand" "=f") ++ (unspec:SI (match_operand:ANYF 1 "register_operand" "f") ++ UNSPEC_FCLASS)) + "TARGET_HARD_FLOAT" + "fclass.<fmt>\t%0,%1" + (set_attr "type" "unknown") + (set_attr "mode" "<MODE>")) + ++(define_int_iterator FCLASS_MASK 68 136 952) ++(define_int_attr fclass_optab ++ (68 "isinf") ++ (136 "isnormal") ++ (952 "isfinite")) ++ ++(define_expand "<FCLASS_MASK:fclass_optab><ANYF:mode>2" ++ (match_operand:SI 0 "register_operand" "=r") ++ (match_operand:ANYF 1 "register_operand" " f") ++ (const_int FCLASS_MASK) ++ "TARGET_HARD_FLOAT" ++ { ++ rtx ft0 = gen_reg_rtx (SImode); ++ rtx t0 = gen_reg_rtx (word_mode); ++ rtx mask = GEN_INT (<FCLASS_MASK>); ++ ++ emit_insn (gen_fclass_<ANYF:fmt> (ft0, operands1)); ++ ++ if (TARGET_64BIT) ++ emit_insn (gen_extend_insn (t0, ft0, DImode, SImode, 0)); ++ else ++ emit_move_insn (t0, ft0); ++ ++ emit_move_insn (t0, gen_rtx_AND (word_mode, t0, mask)); ++ emit_move_insn (t0, gen_rtx_NE (word_mode, t0, const0_rtx)); ++ ++ if (TARGET_64BIT) ++ { ++ t0 = lowpart_subreg (SImode, t0, DImode); ++ SUBREG_PROMOTED_VAR_P (t0) = 1; ++ SUBREG_PROMOTED_SET (t0, SRP_SIGNED); ++ } ++ ++ emit_move_insn (operands0, t0); ++ ++ DONE; ++ }) ++ + (define_insn "bytepick_w_<bytepick_imm>" + (set (match_operand:SI 0 "register_operand" "=r") + (ior:SI (lshiftrt:SI (match_operand:SI 1 "register_operand" "r") +diff --git a/gcc/testsuite/gcc.target/loongarch/fclass-compile.c b/gcc/testsuite/gcc.target/loongarch/fclass-compile.c +new file mode 100644 +index 000000000..9c24d6e26 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/fclass-compile.c +@@ -0,0 +1,20 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -march=loongarch64 -mfpu=64 -mabi=lp64d" } */ ++/* { dg-final { scan-assembler-times "fclass\\.s" 1 } } */ ++/* { dg-final { scan-assembler-times "fclass\\.d" 1 } } */ ++ ++__attribute__ ((noipa)) int ++test_fclass_f (float f) ++{ ++ return __builtin_isinf (f) ++ | __builtin_isnormal (f) << 1 ++ | __builtin_isfinite (f) << 2; ++} ++ ++__attribute__ ((noipa)) int ++test_fclass_d (double d) ++{ ++ return __builtin_isinf (d) ++ | __builtin_isnormal (d) << 1 ++ | __builtin_isfinite (d) << 2; ++} +diff --git a/gcc/testsuite/gcc.target/loongarch/fclass-run.c b/gcc/testsuite/gcc.target/loongarch/fclass-run.c +new file mode 100644 +index 000000000..e5585f9d5 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/fclass-run.c +@@ -0,0 +1,53 @@ ++/* { dg-do run } */ ++/* { dg-options "-O2 -fsignaling-nans -D_GNU_SOURCE -std=c23" } */ ++/* { dg-require-effective-target fenv_exceptions } */ ++ ++#include <fenv.h> ++#include "fclass-compile.c" ++ ++#define ASSERT_EQ(x, y) (void)(x == y || (__builtin_abort (), 1)) ++ ++int ++main (void) ++{ ++ volatile float f_inf = __builtin_inff (); ++ volatile float f_zero = 0; ++ volatile float f_normal = 114.514; ++ volatile float f_subnormal = 1e-40; ++ volatile float f_qnan = __builtin_nanf (""); ++ volatile float f_snan = __builtin_nansf (""); ++ volatile double d_inf = __builtin_inf (); ++ volatile double d_zero = 0; ++ volatile double d_normal = 1919.810; ++ volatile double d_subnormal = 1e-320; ++ volatile double d_qnan = __builtin_nan (""); ++ volatile double d_snan = __builtin_nans (""); ++ ++#if __loongarch_frlen >= 64 ++ /* With fclass.{s/d} we shouldn't signal, even if the input is sNaN. ++ PR 66462. */ ++ feenableexcept (FE_INVALID); ++#endif ++ ++ ASSERT_EQ (test_fclass_f (f_inf), 0b001); ++ ASSERT_EQ (test_fclass_f (-f_inf), 0b001); ++ ASSERT_EQ (test_fclass_f (f_zero), 0b100); ++ ASSERT_EQ (test_fclass_f (-f_zero), 0b100); ++ ASSERT_EQ (test_fclass_f (f_normal), 0b110); ++ ASSERT_EQ (test_fclass_f (-f_normal), 0b110); ++ ASSERT_EQ (test_fclass_f (f_subnormal), 0b100); ++ ASSERT_EQ (test_fclass_f (-f_subnormal), 0b100); ++ ASSERT_EQ (test_fclass_f (f_qnan), 0); ++ ASSERT_EQ (test_fclass_f (f_snan), 0); ++ ++ ASSERT_EQ (test_fclass_d (d_inf), 0b001); ++ ASSERT_EQ (test_fclass_d (-d_inf), 0b001); ++ ASSERT_EQ (test_fclass_d (d_zero), 0b100); ++ ASSERT_EQ (test_fclass_d (-d_zero), 0b100); ++ ASSERT_EQ (test_fclass_d (d_normal), 0b110); ++ ASSERT_EQ (test_fclass_d (-d_normal), 0b110); ++ ASSERT_EQ (test_fclass_d (d_subnormal), 0b100); ++ ASSERT_EQ (test_fclass_d (-d_subnormal), 0b100); ++ ASSERT_EQ (test_fclass_d (d_qnan), 0); ++ ASSERT_EQ (test_fclass_d (d_snan), 0); ++} +-- +2.43.0 +
View file
_service:tar_scm:0186-Backport-SME-aarch64-Add-bytes_below_hard_fp-to-fram.patch
Added
@@ -0,0 +1,87 @@ +From bd5299017c233bcdf0fcc3dd7217eec1641411fe Mon Sep 17 00:00:00 2001 +From: Richard Sandiford <richard.sandiford@arm.com> +Date: Tue, 12 Sep 2023 16:05:06 +0100 +Subject: PATCH 087/157 BackportSME aarch64: Add bytes_below_hard_fp to + frame info + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=99305f306246079cc57d30dae7c32107f02ff3e8 + +Following on from the previous bytes_below_saved_regs patch, this one +records the number of bytes that are below the hard frame pointer. +This eventually replaces below_hard_fp_saved_regs_size. + +If a frame pointer is not needed, the epilogue adds final_adjust +to the stack pointer before restoring registers: + + aarch64_add_sp (tmp1_rtx, tmp0_rtx, final_adjust, true); + +Therefore, if the epilogue needs to restore the stack pointer from +the hard frame pointer, the directly corresponding offset is: + + -bytes_below_hard_fp + final_adjust + +i.e. go from the hard frame pointer to the bottom of the frame, +then add the same amount as if we were using the stack pointer +from the outset. + +gcc/ + * config/aarch64/aarch64.h (aarch64_frame::bytes_below_hard_fp): New + field. + * config/aarch64/aarch64.cc (aarch64_layout_frame): Initialize it. + (aarch64_expand_epilogue): Use it instead of + below_hard_fp_saved_regs_size. +--- + gcc/config/aarch64/aarch64.cc | 6 +++--- + gcc/config/aarch64/aarch64.h | 5 +++++ + 2 files changed, 8 insertions(+), 3 deletions(-) + +diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc +index 5533dd85b..2bb49b9b0 100644 +--- a/gcc/config/aarch64/aarch64.cc ++++ b/gcc/config/aarch64/aarch64.cc +@@ -8869,6 +8869,7 @@ aarch64_layout_frame (void) + of the callee save area. */ + bool saves_below_hard_fp_p = maybe_ne (offset, 0); + frame.below_hard_fp_saved_regs_size = offset; ++ frame.bytes_below_hard_fp = offset + frame.bytes_below_saved_regs; + if (frame.emit_frame_chain) + { + /* FP and LR are placed in the linkage record. */ +@@ -10456,8 +10457,7 @@ aarch64_expand_epilogue (rtx_call_insn *sibcall) + poly_int64 final_adjust = frame.final_adjust; + poly_int64 callee_offset = frame.callee_offset; + poly_int64 sve_callee_adjust = frame.sve_callee_adjust; +- poly_int64 below_hard_fp_saved_regs_size +- = frame.below_hard_fp_saved_regs_size; ++ poly_int64 bytes_below_hard_fp = frame.bytes_below_hard_fp; + unsigned reg1 = frame.wb_pop_candidate1; + unsigned reg2 = frame.wb_pop_candidate2; + unsigned int last_gpr = (frame.is_scs_enabled +@@ -10515,7 +10515,7 @@ aarch64_expand_epilogue (rtx_call_insn *sibcall) + is restored on the instruction doing the writeback. */ + aarch64_add_offset (Pmode, stack_pointer_rtx, + hard_frame_pointer_rtx, +- -callee_offset - below_hard_fp_saved_regs_size, ++ -bytes_below_hard_fp + final_adjust, + tmp1_rtx, tmp0_rtx, callee_adjust == 0); + else + /* The case where we need to re-use the register here is very rare, so +diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h +index 9e0ca380e..dedc5b32f 100644 +--- a/gcc/config/aarch64/aarch64.h ++++ b/gcc/config/aarch64/aarch64.h +@@ -810,6 +810,11 @@ struct GTY (()) aarch64_frame + are saved below the hard frame pointer. */ + poly_int64 below_hard_fp_saved_regs_size; + ++ /* The number of bytes between the bottom of the static frame (the bottom ++ of the outgoing arguments) and the hard frame pointer. This value is ++ always a multiple of STACK_BOUNDARY. */ ++ poly_int64 bytes_below_hard_fp; ++ + /* Offset from the base of the frame (incomming SP) to the + top of the locals area. This value is always a multiple of + STACK_BOUNDARY. */ +-- +2.33.0 +
View file
_service:tar_scm:0186-LoongArch-Add-support-to-annotate-tablejump.patch
Added
@@ -0,0 +1,155 @@ +From 5079c41ada379bd8d1bdb92dd2b91e72e9496ea6 Mon Sep 17 00:00:00 2001 +From: Xi Ruoyao <xry111@xry111.site> +Date: Thu, 11 Jul 2024 19:43:48 +0800 +Subject: PATCH 186/188 LoongArch: Add support to annotate tablejump + +This is per the request from the kernel developers. For generating the +ORC unwind info, the objtool program needs to analysis the control flow +of a .o file. If a jump table is used, objtool has to correlate the +jump instruction with the table. + +On x86 (where objtool was initially developed) it's simple: a relocation +entry natrually correlates them because one single instruction is used +for table-based jump. But on an RISC machine objtool would have to +reconstruct the data flow if it must find out the correlation on its +own. + +So, emit an additional section to store the correlation info as pairs of +addresses, each pair contains the address of a jump instruction (jr) and +the address of the jump table. This is very trivial to implement in +GCC. + +gcc/ChangeLog: + + * config/loongarch/genopts/loongarch.opt.in + (mannotate-tablejump): New option. + * config/loongarch/loongarch.opt: Regenerate. + * config/loongarch/loongarch.md (tablejump<mode>): Emit + additional correlation info between the jump instruction and the + jump table, if -mannotate-tablejump. + * doc/invoke.texi: Document -mannotate-tablejump. + +gcc/testsuite/ChangeLog: + + * gcc.target/loongarch/jump-table-annotate.c: New test. + +Suggested-by: Tiezhu Yang <yangtiezhu@loongson.cn> +--- + gcc/config/loongarch/genopts/loongarch.opt.in | 4 ++++ + gcc/config/loongarch/loongarch.md | 12 +++++++++++- + gcc/config/loongarch/loongarch.opt | 4 ++++ + gcc/doc/invoke.texi | 13 ++++++++++++- + .../gcc.target/loongarch/jump-table-annotate.c | 15 +++++++++++++++ + 5 files changed, 46 insertions(+), 2 deletions(-) + create mode 100644 gcc/testsuite/gcc.target/loongarch/jump-table-annotate.c + +diff --git a/gcc/config/loongarch/genopts/loongarch.opt.in b/gcc/config/loongarch/genopts/loongarch.opt.in +index 0ecd10922..20795f6bd 100644 +--- a/gcc/config/loongarch/genopts/loongarch.opt.in ++++ b/gcc/config/loongarch/genopts/loongarch.opt.in +@@ -301,3 +301,7 @@ default value is 4. + ; CPUCFG independently, so we use bit flags to specify them. + TargetVariable + HOST_WIDE_INT la_isa_evolution = 0 ++ ++mannotate-tablejump ++Target Mask(ANNOTATE_TABLEJUMP) Save ++Annotate table jump instruction (jr {reg}) to correlate it with the jump table. +diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md +index 15960a79f..66236a7c7 100644 +--- a/gcc/config/loongarch/loongarch.md ++++ b/gcc/config/loongarch/loongarch.md +@@ -3496,12 +3496,22 @@ + DONE; + }) + ++(define_mode_attr mode_size (DI "8") (SI "4")) ++ + (define_insn "@tablejump<mode>" + (set (pc) + (match_operand:P 0 "register_operand" "e")) + (use (label_ref (match_operand 1 "" ""))) + "" +- "jr\t%0" ++ { ++ return TARGET_ANNOTATE_TABLEJUMP ++ ? "1:jr\t%0\n\t" ++ ".pushsection\t.discard.tablejump_annotate\n\t" ++ "\t.<mode_size>byte\t1b\n\t" ++ "\t.<mode_size>byte\t%1\n\t" ++ ".popsection" ++ : "jr\t%0"; ++ } + (set_attr "type" "jump") + (set_attr "mode" "none")) + +diff --git a/gcc/config/loongarch/loongarch.opt b/gcc/config/loongarch/loongarch.opt +index 69b3b965c..16fed6ec3 100644 +--- a/gcc/config/loongarch/loongarch.opt ++++ b/gcc/config/loongarch/loongarch.opt +@@ -310,6 +310,10 @@ default value is 4. + TargetVariable + HOST_WIDE_INT la_isa_evolution = 0 + ++mannotate-tablejump ++Target Mask(ANNOTATE_TABLEJUMP) Save ++Annotate table jump instruction (jr {reg}) to correlate it with the jump table ++ + mfrecipe + Target Mask(ISA_FRECIPE) Var(la_isa_evolution) + Support frecipe.{s/d} and frsqrte.{s/d} instructions. +diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi +index f6d59317b..d2c52cdf4 100644 +--- a/gcc/doc/invoke.texi ++++ b/gcc/doc/invoke.texi +@@ -1011,7 +1011,7 @@ Objective-C and Objective-C++ Dialects}. + -mcmodel=@var{code-model} -mrelax -mpass-mrelax-to-as @gol + -mrecip -mrecip=@var{opt} -mfrecipe -mno-frecipe -mdiv32 -mno-div32 @gol + -mlam-bh -mno-lam-bh -mlamcas -mno-lamcas -mld-seq-sa -mno-ld-seq-sa @gol +--mtls-dialect=@var{opt}} ++-mtls-dialect=@var{opt} -mannotate-tablejump -mno-annotate-tablejump} + + @emph{M32R/D Options} + @gccoptlist{-m32r2 -m32rx -m32r @gol +@@ -24750,6 +24750,17 @@ Whether a load-load barrier (@code{dbar 0x700}) is needed. When build with + This option controls which tls dialect may be used for general dynamic and + local dynamic TLS models. + ++@opindex mannotate-tablejump ++@opindex mno-annotate-tablejump ++@item -mannotate-tablejump ++@itemx -mno-annotate-tablejump ++Create an annotation section @code{.discard.tablejump_annotate} to ++correlate the @code{jirl} instruction and the jump table when a jump ++table is used to optimize the @code{switch} statement. Some external ++tools, for example @file{objtool} of the Linux kernel building system, ++need the annotation to analysis the control flow. The default is ++@option{-mno-annotate-tablejump}. ++ + @table @samp + @item trad + Use traditional TLS. This is the default. +diff --git a/gcc/testsuite/gcc.target/loongarch/jump-table-annotate.c b/gcc/testsuite/gcc.target/loongarch/jump-table-annotate.c +new file mode 100644 +index 000000000..9d58e60e3 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/jump-table-annotate.c +@@ -0,0 +1,15 @@ ++/* { dg-do compile } */ ++/* { dg-additional-options "-mannotate-tablejump" } */ ++ ++extern void asdf(int); ++void foo(int x) { ++ switch (x) { ++ case 0: asdf(10); break; ++ case 1: asdf(11); break; ++ case 2: asdf(12); break; ++ case 3: asdf(13); break; ++ case 4: asdf(14); break; ++ } ++} ++ ++/* { dg-final { scan-assembler "\\.discard\\.tablejump_annotate" } } */ +-- +2.43.0 +
View file
_service:tar_scm:0187-Backport-SME-aarch64-Robustify-stack-tie-handling.patch
Added
@@ -0,0 +1,126 @@ +From 4dc3e578d958ceb73f973483f42247c3d33210dc Mon Sep 17 00:00:00 2001 +From: Richard Sandiford <richard.sandiford@arm.com> +Date: Tue, 20 Jun 2023 21:48:38 +0100 +Subject: PATCH 088/157 BackportSME aarch64: Robustify stack tie handling + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=580b74a79146e51268dd11192d3870645adb0bbb + +The SVE handling of stack clash protection copied the stack +pointer to X11 before the probe and set up X11 as the CFA +for unwind purposes: + + /* This is done to provide unwinding information for the stack + adjustments we're about to do, however to prevent the optimizers + from removing the R11 move and leaving the CFA note (which would be + very wrong) we tie the old and new stack pointer together. + The tie will expand to nothing but the optimizers will not touch + the instruction. */ + rtx stack_ptr_copy = gen_rtx_REG (Pmode, STACK_CLASH_SVE_CFA_REGNUM); + emit_move_insn (stack_ptr_copy, stack_pointer_rtx); + emit_insn (gen_stack_tie (stack_ptr_copy, stack_pointer_rtx)); + + /* We want the CFA independent of the stack pointer for the + duration of the loop. */ + add_reg_note (insn, REG_CFA_DEF_CFA, stack_ptr_copy); + RTX_FRAME_RELATED_P (insn) = 1; + +-fcprop-registers is now smart enough to realise that X11 = SP, +replace X11 with SP in the stack tie, and delete the instruction +created above. + +This patch tries to prevent that by making stack_tie fussy about +the register numbers. It fixes failures in +gcc.target/aarch64/sve/pcs/stack_clash*.c. + +gcc/ + * config/aarch64/aarch64.md (stack_tie): Hard-code the first + register operand to the stack pointer. Require the second register + operand to have the number specified in a separate const_int operand. + * config/aarch64/aarch64.cc (aarch64_emit_stack_tie): New function. + (aarch64_allocate_and_probe_stack_space): Use it. + (aarch64_expand_prologue, aarch64_expand_epilogue): Likewise. + (aarch64_expand_epilogue): Likewise. +--- + gcc/config/aarch64/aarch64.cc | 18 ++++++++++++++---- + gcc/config/aarch64/aarch64.md | 7 ++++--- + 2 files changed, 18 insertions(+), 7 deletions(-) + +diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc +index 2bb49b9b0..4d505c6fc 100644 +--- a/gcc/config/aarch64/aarch64.cc ++++ b/gcc/config/aarch64/aarch64.cc +@@ -9917,6 +9917,16 @@ aarch64_stack_clash_protection_alloca_probe_range (void) + return STACK_CLASH_CALLER_GUARD; + } + ++/* Emit a stack tie that acts as a scheduling barrier for all previous and ++ subsequent memory accesses and that requires the stack pointer and REG ++ to have their current values. REG can be stack_pointer_rtx if no ++ other register's value needs to be fixed. */ ++ ++static void ++aarch64_emit_stack_tie (rtx reg) ++{ ++ emit_insn (gen_stack_tie (reg, gen_int_mode (REGNO (reg), DImode))); ++} + + /* Allocate POLY_SIZE bytes of stack space using TEMP1 and TEMP2 as scratch + registers. If POLY_SIZE is not large enough to require a probe this function +@@ -10030,7 +10040,7 @@ aarch64_allocate_and_probe_stack_space (rtx temp1, rtx temp2, + the instruction. */ + rtx stack_ptr_copy = gen_rtx_REG (Pmode, STACK_CLASH_SVE_CFA_REGNUM); + emit_move_insn (stack_ptr_copy, stack_pointer_rtx); +- emit_insn (gen_stack_tie (stack_ptr_copy, stack_pointer_rtx)); ++ aarch64_emit_stack_tie (stack_ptr_copy); + + /* We want the CFA independent of the stack pointer for the + duration of the loop. */ +@@ -10398,7 +10408,7 @@ aarch64_expand_prologue (void) + aarch64_add_cfa_expression (insn, regno_reg_rtxreg1, + hard_frame_pointer_rtx, 0); + } +- emit_insn (gen_stack_tie (stack_pointer_rtx, hard_frame_pointer_rtx)); ++ aarch64_emit_stack_tie (hard_frame_pointer_rtx); + } + + aarch64_save_callee_saves (saved_regs_offset, R0_REGNUM, R30_REGNUM, +@@ -10501,7 +10511,7 @@ aarch64_expand_epilogue (rtx_call_insn *sibcall) + || cfun->calls_alloca + || crtl->calls_eh_return) + { +- emit_insn (gen_stack_tie (stack_pointer_rtx, stack_pointer_rtx)); ++ aarch64_emit_stack_tie (stack_pointer_rtx); + need_barrier_p = false; + } + +@@ -10540,7 +10550,7 @@ aarch64_expand_epilogue (rtx_call_insn *sibcall) + callee_adjust != 0, &cfi_ops); + + if (need_barrier_p) +- emit_insn (gen_stack_tie (stack_pointer_rtx, stack_pointer_rtx)); ++ aarch64_emit_stack_tie (stack_pointer_rtx); + + if (callee_adjust != 0) + aarch64_pop_regs (reg1, reg2, callee_adjust, &cfi_ops); +diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md +index 2becc888e..2ce123255 100644 +--- a/gcc/config/aarch64/aarch64.md ++++ b/gcc/config/aarch64/aarch64.md +@@ -7088,10 +7088,11 @@ + + (define_insn "stack_tie" + (set (mem:BLK (scratch)) +- (unspec:BLK (match_operand:DI 0 "register_operand" "rk") +- (match_operand:DI 1 "register_operand" "rk") ++ (unspec:BLK (reg:DI SP_REGNUM) ++ (match_operand:DI 0 "register_operand" "rk") ++ (match_operand:DI 1 "const_int_operand") + UNSPEC_PRLG_STK)) +- "" ++ "REGNO (operands0) == INTVAL (operands1)" + "" + (set_attr "length" "0") + ) +-- +2.33.0 +
View file
_service:tar_scm:0187-LoongArch-Fix-up-r15-4130.patch
Added
@@ -0,0 +1,32 @@ +From 8cdf96cd61612746262a811b8a091ecab27bd3a1 Mon Sep 17 00:00:00 2001 +From: Xi Ruoyao <xry111@xry111.site> +Date: Wed, 10 Jul 2024 12:15:23 +0800 +Subject: PATCH 187/188 LoongArch: Fix up r15-4130 + +An earlier version of the patch (lacking the regeneration of some files) +was pushed. Fix it up now. + +gcc/ChangeLog: + + * config/loongarch/loongarch.opt: Regenerate. + * config/loongarch/loongarch.opt.urls: Regenerate. +--- + gcc/config/loongarch/loongarch.opt | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/gcc/config/loongarch/loongarch.opt b/gcc/config/loongarch/loongarch.opt +index 16fed6ec3..f9c7bd446 100644 +--- a/gcc/config/loongarch/loongarch.opt ++++ b/gcc/config/loongarch/loongarch.opt +@@ -312,7 +312,7 @@ HOST_WIDE_INT la_isa_evolution = 0 + + mannotate-tablejump + Target Mask(ANNOTATE_TABLEJUMP) Save +-Annotate table jump instruction (jr {reg}) to correlate it with the jump table ++Annotate table jump instruction (jr {reg}) to correlate it with the jump table. + + mfrecipe + Target Mask(ISA_FRECIPE) Var(la_isa_evolution) +-- +2.43.0 +
View file
_service:tar_scm:0188-Backport-SME-aarch64-Tweak-aarch64_save-restore_call.patch
Added
@@ -0,0 +1,228 @@ +From 8e010ea1a3e122a74696250d7c6ce5660a88b8f5 Mon Sep 17 00:00:00 2001 +From: Richard Sandiford <richard.sandiford@arm.com> +Date: Tue, 12 Sep 2023 16:05:06 +0100 +Subject: PATCH 089/157 BackportSME aarch64: Tweak + aarch64_save/restore_callee_saves + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=38698967268c44991e02aa1e5a2ce9382d6de9db + +aarch64_save_callee_saves and aarch64_restore_callee_saves took +a parameter called start_offset that gives the offset of the +bottom of the saved register area from the current stack pointer. +However, it's more convenient for later patches if we use the +bottom of the entire frame as the reference point, rather than +the bottom of the saved registers. + +Doing that removes the need for the callee_offset field. +Other than that, this is not a win on its own. It only really +makes sense in combination with the follow-on patches. + +gcc/ + * config/aarch64/aarch64.h (aarch64_frame::callee_offset): Delete. + * config/aarch64/aarch64.cc (aarch64_layout_frame): Remove + callee_offset handling. + (aarch64_save_callee_saves): Replace the start_offset parameter + with a bytes_below_sp parameter. + (aarch64_restore_callee_saves): Likewise. + (aarch64_expand_prologue): Update accordingly. + (aarch64_expand_epilogue): Likewise. +--- + gcc/config/aarch64/aarch64.cc | 56 +++++++++++++++++------------------ + gcc/config/aarch64/aarch64.h | 4 --- + 2 files changed, 28 insertions(+), 32 deletions(-) + +diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc +index 4d505c6fc..a0a4c7ac3 100644 +--- a/gcc/config/aarch64/aarch64.cc ++++ b/gcc/config/aarch64/aarch64.cc +@@ -8943,7 +8943,6 @@ aarch64_layout_frame (void) + frame.final_adjust = 0; + frame.callee_adjust = 0; + frame.sve_callee_adjust = 0; +- frame.callee_offset = 0; + + frame.wb_pop_candidate1 = frame.wb_push_candidate1; + frame.wb_pop_candidate2 = frame.wb_push_candidate2; +@@ -9011,7 +9010,6 @@ aarch64_layout_frame (void) + stp reg1, reg2, sp, bytes_below_saved_regs + stp reg3, reg4, sp, bytes_below_saved_regs + 16 */ + frame.initial_adjust = frame.frame_size; +- frame.callee_offset = const_below_saved_regs; + } + else if (saves_below_hard_fp_p + && known_eq (frame.saved_regs_size, +@@ -9358,12 +9356,13 @@ aarch64_add_cfa_expression (rtx_insn *insn, rtx reg, + } + + /* Emit code to save the callee-saved registers from register number START +- to LIMIT to the stack at the location starting at offset START_OFFSET, +- skipping any write-back candidates if SKIP_WB is true. HARD_FP_VALID_P +- is true if the hard frame pointer has been set up. */ ++ to LIMIT to the stack. The stack pointer is currently BYTES_BELOW_SP ++ bytes above the bottom of the static frame. Skip any write-back ++ candidates if SKIP_WB is true. HARD_FP_VALID_P is true if the hard ++ frame pointer has been set up. */ + + static void +-aarch64_save_callee_saves (poly_int64 start_offset, ++aarch64_save_callee_saves (poly_int64 bytes_below_sp, + unsigned start, unsigned limit, bool skip_wb, + bool hard_fp_valid_p) + { +@@ -9391,7 +9390,9 @@ aarch64_save_callee_saves (poly_int64 start_offset, + + machine_mode mode = aarch64_reg_save_mode (regno); + reg = gen_rtx_REG (mode, regno); +- offset = start_offset + frame.reg_offsetregno; ++ offset = (frame.reg_offsetregno ++ + frame.bytes_below_saved_regs ++ - bytes_below_sp); + rtx base_rtx = stack_pointer_rtx; + poly_int64 sp_offset = offset; + +@@ -9402,9 +9403,7 @@ aarch64_save_callee_saves (poly_int64 start_offset, + else if (GP_REGNUM_P (regno) + && (!offset.is_constant (&const_offset) || const_offset >= 512)) + { +- gcc_assert (known_eq (start_offset, 0)); +- poly_int64 fp_offset +- = frame.below_hard_fp_saved_regs_size; ++ poly_int64 fp_offset = frame.bytes_below_hard_fp - bytes_below_sp; + if (hard_fp_valid_p) + base_rtx = hard_frame_pointer_rtx; + else +@@ -9468,12 +9467,13 @@ aarch64_save_callee_saves (poly_int64 start_offset, + } + + /* Emit code to restore the callee registers from register number START +- up to and including LIMIT. Restore from the stack offset START_OFFSET, +- skipping any write-back candidates if SKIP_WB is true. Write the +- appropriate REG_CFA_RESTORE notes into CFI_OPS. */ ++ up to and including LIMIT. The stack pointer is currently BYTES_BELOW_SP ++ bytes above the bottom of the static frame. Skip any write-back ++ candidates if SKIP_WB is true. Write the appropriate REG_CFA_RESTORE ++ notes into CFI_OPS. */ + + static void +-aarch64_restore_callee_saves (poly_int64 start_offset, unsigned start, ++aarch64_restore_callee_saves (poly_int64 bytes_below_sp, unsigned start, + unsigned limit, bool skip_wb, rtx *cfi_ops) + { + aarch64_frame &frame = cfun->machine->frame; +@@ -9499,7 +9499,9 @@ aarch64_restore_callee_saves (poly_int64 start_offset, unsigned start, + + machine_mode mode = aarch64_reg_save_mode (regno); + reg = gen_rtx_REG (mode, regno); +- offset = start_offset + frame.reg_offsetregno; ++ offset = (frame.reg_offsetregno ++ + frame.bytes_below_saved_regs ++ - bytes_below_sp); + rtx base_rtx = stack_pointer_rtx; + if (mode == VNx2DImode && BYTES_BIG_ENDIAN) + aarch64_adjust_sve_callee_save_base (mode, base_rtx, anchor_reg, +@@ -10285,8 +10287,6 @@ aarch64_expand_prologue (void) + HOST_WIDE_INT callee_adjust = frame.callee_adjust; + poly_int64 final_adjust = frame.final_adjust; + poly_int64 sve_callee_adjust = frame.sve_callee_adjust; +- poly_int64 below_hard_fp_saved_regs_size +- = frame.below_hard_fp_saved_regs_size; + unsigned reg1 = frame.wb_push_candidate1; + unsigned reg2 = frame.wb_push_candidate2; + bool emit_frame_chain = frame.emit_frame_chain; +@@ -10362,8 +10362,8 @@ aarch64_expand_prologue (void) + - frame.hard_fp_offset); + gcc_assert (known_ge (chain_offset, 0)); + +- /* The offset of the bottom of the save area from the current SP. */ +- poly_int64 saved_regs_offset = chain_offset - below_hard_fp_saved_regs_size; ++ /* The offset of the current SP from the bottom of the static frame. */ ++ poly_int64 bytes_below_sp = frame_size - initial_adjust - callee_adjust; + + if (emit_frame_chain) + { +@@ -10371,7 +10371,7 @@ aarch64_expand_prologue (void) + { + reg1 = R29_REGNUM; + reg2 = R30_REGNUM; +- aarch64_save_callee_saves (saved_regs_offset, reg1, reg2, ++ aarch64_save_callee_saves (bytes_below_sp, reg1, reg2, + false, false); + } + else +@@ -10411,7 +10411,7 @@ aarch64_expand_prologue (void) + aarch64_emit_stack_tie (hard_frame_pointer_rtx); + } + +- aarch64_save_callee_saves (saved_regs_offset, R0_REGNUM, R30_REGNUM, ++ aarch64_save_callee_saves (bytes_below_sp, R0_REGNUM, R30_REGNUM, + callee_adjust != 0 || emit_frame_chain, + emit_frame_chain); + if (maybe_ne (sve_callee_adjust, 0)) +@@ -10421,16 +10421,17 @@ aarch64_expand_prologue (void) + aarch64_allocate_and_probe_stack_space (tmp1_rtx, tmp0_rtx, + sve_callee_adjust, + !frame_pointer_needed, false); +- saved_regs_offset += sve_callee_adjust; ++ bytes_below_sp -= sve_callee_adjust; + } +- aarch64_save_callee_saves (saved_regs_offset, P0_REGNUM, P15_REGNUM, ++ aarch64_save_callee_saves (bytes_below_sp, P0_REGNUM, P15_REGNUM, + false, emit_frame_chain); +- aarch64_save_callee_saves (saved_regs_offset, V0_REGNUM, V31_REGNUM, ++ aarch64_save_callee_saves (bytes_below_sp, V0_REGNUM, V31_REGNUM, + callee_adjust != 0 || emit_frame_chain, + emit_frame_chain); + + /* We may need to probe the final adjustment if it is larger than the guard + that is assumed by the called. */ ++ gcc_assert (known_eq (bytes_below_sp, final_adjust)); + aarch64_allocate_and_probe_stack_space (tmp1_rtx, tmp0_rtx, final_adjust, + !frame_pointer_needed, true); + } +@@ -10465,7 +10466,6 @@ aarch64_expand_epilogue (rtx_call_insn *sibcall) + poly_int64 initial_adjust = frame.initial_adjust; + HOST_WIDE_INT callee_adjust = frame.callee_adjust; + poly_int64 final_adjust = frame.final_adjust; +- poly_int64 callee_offset = frame.callee_offset; + poly_int64 sve_callee_adjust = frame.sve_callee_adjust; + poly_int64 bytes_below_hard_fp = frame.bytes_below_hard_fp; + unsigned reg1 = frame.wb_pop_candidate1; +@@ -10535,9 +10535,9 @@ aarch64_expand_epilogue (rtx_call_insn *sibcall) + + /* Restore the vector registers before the predicate registers, + so that we can use P4 as a temporary for big-endian SVE frames. */ +- aarch64_restore_callee_saves (callee_offset, V0_REGNUM, V31_REGNUM, ++ aarch64_restore_callee_saves (final_adjust, V0_REGNUM, V31_REGNUM, + callee_adjust != 0, &cfi_ops); +- aarch64_restore_callee_saves (callee_offset, P0_REGNUM, P15_REGNUM, ++ aarch64_restore_callee_saves (final_adjust, P0_REGNUM, P15_REGNUM, + false, &cfi_ops); + if (maybe_ne (sve_callee_adjust, 0)) + aarch64_add_sp (NULL_RTX, NULL_RTX, sve_callee_adjust, true); +@@ -10545,7 +10545,7 @@ aarch64_expand_epilogue (rtx_call_insn *sibcall) + /* When shadow call stack is enabled, the scs_pop in the epilogue will + restore x30, we don't need to restore x30 again in the traditional + way. */ +- aarch64_restore_callee_saves (callee_offset - sve_callee_adjust, ++ aarch64_restore_callee_saves (final_adjust + sve_callee_adjust, + R0_REGNUM, last_gpr, + callee_adjust != 0, &cfi_ops); + +diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h +index dedc5b32f..a1db4f689 100644 +--- a/gcc/config/aarch64/aarch64.h ++++ b/gcc/config/aarch64/aarch64.h +@@ -837,10 +837,6 @@ struct GTY (()) aarch64_frame + It is zero when no push is used. */ + HOST_WIDE_INT callee_adjust; + +- /* The offset from SP to the callee-save registers after initial_adjust. +- It may be non-zero if no push is used (ie. callee_adjust == 0). */ +- poly_int64 callee_offset; +- + /* The size of the stack adjustment before saving or after restoring + SVE registers. */ + poly_int64 sve_callee_adjust; +-- +2.33.0 +
View file
_service:tar_scm:0188-libphobos-Update-build-scripts-for-LoongArch64.patch
Added
@@ -0,0 +1,304 @@ +From 46e279e1c79086e930965c9a15d08b70a2c06a80 Mon Sep 17 00:00:00 2001 +From: Yang Yujie <yangyujie@loongson.cn> +Date: Mon, 28 Oct 2024 01:53:57 +0000 +Subject: PATCH 188/188 libphobos: Update build scripts for LoongArch64. + +libphobos/ChangeLog: + + * m4/druntime/cpu.m4: Support loongarch* targets. + * libdruntime/Makefile.am: Same. + * libdruntime/Makefile.in: Regenerate. + * configure: Regenerate. +--- + libphobos/configure | 21 ++++++- + libphobos/libdruntime/Makefile.am | 3 + + libphobos/libdruntime/Makefile.in | 94 +++++++++++++++++++------------ + libphobos/m4/druntime/cpu.m4 | 5 ++ + 4 files changed, 85 insertions(+), 38 deletions(-) + +diff --git a/libphobos/configure b/libphobos/configure +index 9da06f087..6acb2dd89 100755 +--- a/libphobos/configure ++++ b/libphobos/configure +@@ -696,6 +696,8 @@ DRUNTIME_CPU_POWERPC_FALSE + DRUNTIME_CPU_POWERPC_TRUE + DRUNTIME_CPU_MIPS_FALSE + DRUNTIME_CPU_MIPS_TRUE ++DRUNTIME_CPU_LOONGARCH_FALSE ++DRUNTIME_CPU_LOONGARCH_TRUE + DRUNTIME_CPU_ARM_FALSE + DRUNTIME_CPU_ARM_TRUE + DRUNTIME_CPU_AARCH64_FALSE +@@ -11750,7 +11752,7 @@ else + lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2 + lt_status=$lt_dlunknown + cat > conftest.$ac_ext <<_LT_EOF +-#line 11753 "configure" ++#line 11755 "configure" + #include "confdefs.h" + + #if HAVE_DLFCN_H +@@ -11856,7 +11858,7 @@ else + lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2 + lt_status=$lt_dlunknown + cat > conftest.$ac_ext <<_LT_EOF +-#line 11859 "configure" ++#line 11861 "configure" + #include "confdefs.h" + + #if HAVE_DLFCN_H +@@ -14137,6 +14139,9 @@ ac_compiler_gnu=$ac_cv_c_compiler_gnu + ;; + mips*) druntime_target_cpu_parsed="mips" + ;; ++ loongarch*) ++ druntime_target_cpu_parsed="loongarch" ++ ;; + powerpc*) + druntime_target_cpu_parsed="powerpc" + ;; +@@ -14174,6 +14179,14 @@ else + DRUNTIME_CPU_MIPS_FALSE= + fi + ++ if test "$druntime_target_cpu_parsed" = "loongarch"; then ++ DRUNTIME_CPU_LOONGARCH_TRUE= ++ DRUNTIME_CPU_LOONGARCH_FALSE='#' ++else ++ DRUNTIME_CPU_LOONGARCH_TRUE='#' ++ DRUNTIME_CPU_LOONGARCH_FALSE= ++fi ++ + if test "$druntime_target_cpu_parsed" = "powerpc"; then + DRUNTIME_CPU_POWERPC_TRUE= + DRUNTIME_CPU_POWERPC_FALSE='#' +@@ -15738,6 +15751,10 @@ if test -z "${DRUNTIME_CPU_MIPS_TRUE}" && test -z "${DRUNTIME_CPU_MIPS_FALSE}"; + as_fn_error $? "conditional \"DRUNTIME_CPU_MIPS\" was never defined. + Usually this means the macro was only invoked conditionally." "$LINENO" 5 + fi ++if test -z "${DRUNTIME_CPU_LOONGARCH_TRUE}" && test -z "${DRUNTIME_CPU_LOONGARCH_FALSE}"; then ++ as_fn_error $? "conditional \"DRUNTIME_CPU_LOONGARCH\" was never defined. ++Usually this means the macro was only invoked conditionally." "$LINENO" 5 ++fi + if test -z "${DRUNTIME_CPU_POWERPC_TRUE}" && test -z "${DRUNTIME_CPU_POWERPC_FALSE}"; then + as_fn_error $? "conditional \"DRUNTIME_CPU_POWERPC\" was never defined. + Usually this means the macro was only invoked conditionally." "$LINENO" 5 +diff --git a/libphobos/libdruntime/Makefile.am b/libphobos/libdruntime/Makefile.am +index 6ca4012b7..65e3f1b44 100644 +--- a/libphobos/libdruntime/Makefile.am ++++ b/libphobos/libdruntime/Makefile.am +@@ -86,6 +86,9 @@ endif + if DRUNTIME_CPU_MIPS + DRUNTIME_SOURCES_CONFIGURED += config/mips/switchcontext.S + endif ++if DRUNTIME_CPU_LOONGARCH ++ DRUNTIME_SOURCES_CONFIGURED += config/loongarch/switchcontext.S ++endif + if DRUNTIME_CPU_POWERPC + DRUNTIME_SOURCES_CONFIGURED += config/powerpc/switchcontext.S + endif +diff --git a/libphobos/libdruntime/Makefile.in b/libphobos/libdruntime/Makefile.in +index f7f78d71f..91cd65362 100644 +--- a/libphobos/libdruntime/Makefile.in ++++ b/libphobos/libdruntime/Makefile.in +@@ -124,12 +124,13 @@ target_triplet = @target@ + # CPU specific sources + @DRUNTIME_CPU_AARCH64_TRUE@am__append_11 = config/aarch64/switchcontext.S + @DRUNTIME_CPU_ARM_TRUE@am__append_12 = config/arm/switchcontext.S +-@DRUNTIME_CPU_MIPS_TRUE@am__append_13 = config/mips/switchcontext.S +-@DRUNTIME_CPU_POWERPC_TRUE@am__append_14 = config/powerpc/switchcontext.S +-@DRUNTIME_CPU_X86_TRUE@@DRUNTIME_OS_MINGW_TRUE@am__append_15 = config/mingw/switchcontext.S +-@DRUNTIME_CPU_X86_TRUE@@DRUNTIME_OS_MINGW_FALSE@am__append_16 = config/x86/switchcontext.S +-@DRUNTIME_CPU_SYSTEMZ_TRUE@am__append_17 = config/systemz/get_tls_offset.S +-@DRUNTIME_CPU_S390_TRUE@am__append_18 = config/s390/get_tls_offset.S ++@DRUNTIME_CPU_LOONGARCH_TRUE@am__append_13 = config/loongarch/switchcontext.S ++@DRUNTIME_CPU_MIPS_TRUE@am__append_14 = config/mips/switchcontext.S ++@DRUNTIME_CPU_POWERPC_TRUE@am__append_15 = config/powerpc/switchcontext.S ++@DRUNTIME_CPU_X86_TRUE@@DRUNTIME_OS_MINGW_TRUE@am__append_16 = config/mingw/switchcontext.S ++@DRUNTIME_CPU_X86_TRUE@@DRUNTIME_OS_MINGW_FALSE@am__append_17 = config/x86/switchcontext.S ++@DRUNTIME_CPU_SYSTEMZ_TRUE@am__append_18 = config/systemz/get_tls_offset.S ++@DRUNTIME_CPU_S390_TRUE@am__append_19 = config/s390/get_tls_offset.S + subdir = libdruntime + ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 + am__aclocal_m4_deps = $(top_srcdir)/../config/acx.m4 \ +@@ -474,45 +475,49 @@ am__objects_22 = core/sys/solaris/dlfcn.lo core/sys/solaris/elf.lo \ + @DRUNTIME_OS_SOLARIS_TRUE@am__objects_23 = $(am__objects_22) + @DRUNTIME_CPU_AARCH64_TRUE@am__objects_24 = config/aarch64/libgdruntime_la-switchcontext.lo + @DRUNTIME_CPU_ARM_TRUE@am__objects_25 = config/arm/libgdruntime_la-switchcontext.lo +-@DRUNTIME_CPU_MIPS_TRUE@am__objects_26 = config/mips/libgdruntime_la-switchcontext.lo +-@DRUNTIME_CPU_POWERPC_TRUE@am__objects_27 = config/powerpc/libgdruntime_la-switchcontext.lo +-@DRUNTIME_CPU_X86_TRUE@@DRUNTIME_OS_MINGW_TRUE@am__objects_28 = config/mingw/libgdruntime_la-switchcontext.lo +-@DRUNTIME_CPU_X86_TRUE@@DRUNTIME_OS_MINGW_FALSE@am__objects_29 = config/x86/libgdruntime_la-switchcontext.lo +-@DRUNTIME_CPU_SYSTEMZ_TRUE@am__objects_30 = config/systemz/libgdruntime_la-get_tls_offset.lo +-@DRUNTIME_CPU_S390_TRUE@am__objects_31 = config/s390/libgdruntime_la-get_tls_offset.lo +-am__objects_32 = $(am__objects_5) $(am__objects_7) $(am__objects_9) \ ++@DRUNTIME_CPU_LOONGARCH_TRUE@am__objects_26 = config/loongarch/libgdruntime_la-switchcontext.lo ++@DRUNTIME_CPU_MIPS_TRUE@am__objects_27 = config/mips/libgdruntime_la-switchcontext.lo ++@DRUNTIME_CPU_POWERPC_TRUE@am__objects_28 = config/powerpc/libgdruntime_la-switchcontext.lo ++@DRUNTIME_CPU_X86_TRUE@@DRUNTIME_OS_MINGW_TRUE@am__objects_29 = config/mingw/libgdruntime_la-switchcontext.lo ++@DRUNTIME_CPU_X86_TRUE@@DRUNTIME_OS_MINGW_FALSE@am__objects_30 = config/x86/libgdruntime_la-switchcontext.lo ++@DRUNTIME_CPU_SYSTEMZ_TRUE@am__objects_31 = config/systemz/libgdruntime_la-get_tls_offset.lo ++@DRUNTIME_CPU_S390_TRUE@am__objects_32 = config/s390/libgdruntime_la-get_tls_offset.lo ++am__objects_33 = $(am__objects_6) $(am__objects_8) $(am__objects_10) \ + $(am__objects_11) $(am__objects_13) $(am__objects_15) \ + $(am__objects_17) $(am__objects_19) $(am__objects_21) \ + $(am__objects_23) $(am__objects_24) $(am__objects_25) \ + $(am__objects_26) $(am__objects_27) $(am__objects_28) \ +- $(am__objects_29) $(am__objects_30) $(am__objects_31) +-am__objects_33 = gcc/config.lo gcc/libbacktrace.lo +-am__objects_34 = $(am__objects_1) $(am__objects_2) $(am__objects_3) \ +- $(am__objects_32) $(am__objects_33) +-am_libgdruntime_la_OBJECTS = $(am__objects_34) ++ $(am__objects_29) $(am__objects_30) $(am__objects_31) \ ++ $(am__objects_32) ++am__objects_34 = gcc/config.lo gcc/libbacktrace.lo ++am__objects_35 = $(am__objects_1) $(am__objects_2) $(am__objects_3) \ ++ $(am__objects_33) $(am__objects_34) ++am_libgdruntime_la_OBJECTS = $(am__objects_35) + libgdruntime_la_OBJECTS = $(am_libgdruntime_la_OBJECTS) + am__DEPENDENCIES_2 = $(am__DEPENDENCIES_1) $(am__DEPENDENCIES_1) +-am__objects_35 = core/stdc/libgdruntime_convenience_la-errno_.lo +-@DRUNTIME_OS_MINGW_TRUE@am__objects_36 = $(am__objects_20) \ ++am__objects_36 = core/stdc/libgdruntime_convenience_la-errno_.lo ++@DRUNTIME_OS_MINGW_TRUE@am__objects_37 = $(am__objects_20) \ + @DRUNTIME_OS_MINGW_TRUE@ config/mingw/libgdruntime_convenience_la-msvc.lo +-@DRUNTIME_CPU_AARCH64_TRUE@am__objects_37 = config/aarch64/libgdruntime_convenience_la-switchcontext.lo +-@DRUNTIME_CPU_ARM_TRUE@am__objects_38 = config/arm/libgdruntime_convenience_la-switchcontext.lo +-@DRUNTIME_CPU_MIPS_TRUE@am__objects_39 = config/mips/libgdruntime_convenience_la-switchcontext.lo +-@DRUNTIME_CPU_POWERPC_TRUE@am__objects_40 = config/powerpc/libgdruntime_convenience_la-switchcontext.lo +-@DRUNTIME_CPU_X86_TRUE@@DRUNTIME_OS_MINGW_TRUE@am__objects_41 = config/mingw/libgdruntime_convenience_la-switchcontext.lo +-@DRUNTIME_CPU_X86_TRUE@@DRUNTIME_OS_MINGW_FALSE@am__objects_42 = config/x86/libgdruntime_convenience_la-switchcontext.lo +-@DRUNTIME_CPU_SYSTEMZ_TRUE@am__objects_43 = config/systemz/libgdruntime_convenience_la-get_tls_offset.lo +-@DRUNTIME_CPU_S390_TRUE@am__objects_44 = config/s390/libgdruntime_convenience_la-get_tls_offset.lo +-am__objects_45 = $(am__objects_5) $(am__objects_7) $(am__objects_9) \ ++@DRUNTIME_CPU_AARCH64_TRUE@am__objects_38 = config/aarch64/libgdruntime_convenience_la-switchcontext.lo ++@DRUNTIME_CPU_ARM_TRUE@am__objects_39 = config/arm/libgdruntime_convenience_la-switchcontext.lo ++@DRUNTIME_CPU_LOONGARCH_TRUE@am__objects_40 = config/loongarch/libgdruntime_convenience_la-switchcontext.lo ++@DRUNTIME_CPU_MIPS_TRUE@am__objects_41 = config/mips/libgdruntime_convenience_la-switchcontext.lo ++@DRUNTIME_CPU_POWERPC_TRUE@am__objects_42 = config/powerpc/libgdruntime_convenience_la-switchcontext.lo ++@DRUNTIME_CPU_X86_TRUE@@DRUNTIME_OS_MINGW_TRUE@am__objects_43 = config/mingw/libgdruntime_convenience_la-switchcontext.lo ++@DRUNTIME_CPU_X86_TRUE@@DRUNTIME_OS_MINGW_FALSE@am__objects_44 = config/x86/libgdruntime_convenience_la-switchcontext.lo ++@DRUNTIME_CPU_SYSTEMZ_TRUE@am__objects_45 = config/systemz/libgdruntime_convenience_la-get_tls_offset.lo ++@DRUNTIME_CPU_S390_TRUE@am__objects_46 = config/s390/libgdruntime_convenience_la-get_tls_offset.lo ++am__objects_47 = $(am__objects_5) $(am__objects_7) $(am__objects_9) \ + $(am__objects_11) $(am__objects_13) $(am__objects_15) \ + $(am__objects_17) $(am__objects_19) $(am__objects_36) \ + $(am__objects_23) $(am__objects_37) $(am__objects_38) \ + $(am__objects_39) $(am__objects_40) $(am__objects_41) \ +- $(am__objects_42) $(am__objects_43) $(am__objects_44) +-am__objects_46 = $(am__objects_1) $(am__objects_35) $(am__objects_3) \ +- $(am__objects_45) $(am__objects_33) +-am__objects_47 = $(am__objects_46) +-am_libgdruntime_convenience_la_OBJECTS = $(am__objects_47) ++ $(am__objects_42) $(am__objects_43) $(am__objects_44) \ ++ $(am__objects_45) $(am__objects_46) ++am__objects_48 = $(am__objects_1) $(am__objects_35) $(am__objects_3) \ ++ $(am__objects_47) $(am__objects_33) ++am__objects_49 = $(am__objects_48) ++am_libgdruntime_convenience_la_OBJECTS = $(am__objects_49) + libgdruntime_convenience_la_OBJECTS = \ + $(am_libgdruntime_convenience_la_OBJECTS) + AM_V_P = $(am__v_P_@AM_V@) +@@ -787,7 +792,7 @@ DRUNTIME_SOURCES_CONFIGURED = $(am__append_1) $(am__append_2) \ + $(am__append_9) $(am__append_10) $(am__append_11) \ + $(am__append_12) $(am__append_13) $(am__append_14) \ + $(am__append_15) $(am__append_16) $(am__append_17) \ +- $(am__append_18) ++ $(am__append_18) $(am__append_19) + + # Provide __start_minfo, __stop_minfo if linker doesn't. + @DRUNTIME_OS_MINFO_BRACKETING_FALSE@DRTSTUFF = gcc/drtbegin.o gcc/drtend.o +@@ -1900,6 +1905,11 @@ config/arm/$(am__dirstamp): + @: > config/arm/$(am__dirstamp) + config/arm/libgdruntime_la-switchcontext.lo: \ + config/arm/$(am__dirstamp) ++config/loongarch/$(am__dirstamp): ++ @$(MKDIR_P) config/loongarch ++ @: > config/loongarch/$(am__dirstamp) ++config/loongarch/libgdruntime_la-switchcontext.lo: \ ++ config/loongarch/$(am__dirstamp) + config/mips/$(am__dirstamp): + @$(MKDIR_P) config/mips + @: > config/mips/$(am__dirstamp) +@@ -1940,6 +1950,8 @@ config/aarch64/libgdruntime_convenience_la-switchcontext.lo: \ + config/aarch64/$(am__dirstamp) + config/arm/libgdruntime_convenience_la-switchcontext.lo: \ + config/arm/$(am__dirstamp) ++config/loongarch/libgdruntime_convenience_la-switchcontext.lo: \ ++ config/loongarch/$(am__dirstamp) + config/mips/libgdruntime_convenience_la-switchcontext.lo: \ + config/mips/$(am__dirstamp) + config/powerpc/libgdruntime_convenience_la-switchcontext.lo: \ +@@ -1964,6 +1976,8 @@ mostlyclean-compile: + -rm -f config/arm/*.lo + -rm -f config/mingw/*.$(OBJEXT) + -rm -f config/mingw/*.lo ++ -rm -f config/loongarch/*.$(OBJEXT) ++ -rm -f config/loongarch/*.lo + -rm -f config/mips/*.$(OBJEXT) + -rm -f config/mips/*.lo + -rm -f config/powerpc/*.$(OBJEXT) +@@ -2087,7 +2101,10 @@ config/aarch64/libgdruntime_la-switchcontext.lo: config/aarch64/switchcontext.S + config/arm/libgdruntime_la-switchcontext.lo: config/arm/switchcontext.S + $(AM_V_CPPAS)$(LIBTOOL) $(AM_V_lt) $(libgdruntime_la_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CCAS) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CCASFLAGS) $(CCASFLAGS) -c -o config/arm/libgdruntime_la-switchcontext.lo `test -f 'config/arm/switchcontext.S' || echo '$(srcdir)/'`config/arm/switchcontext.S + +-config/mips/libgdruntime_la-switchcontext.lo: config/mips/switchcontext.S ++config/loongarch/libgdruntime_la-switchcontext.lo: config/loongarch/switchcontext.S ++ $(AM_V_CPPAS)$(LIBTOOL) $(AM_V_lt) $(libgdruntime_la_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CCAS) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CCASFLAGS) ++ ++onfig/mips/libgdruntime_la-switchcontext.lo: config/mips/switchcontext.S + $(AM_V_CPPAS)$(LIBTOOL) $(AM_V_lt) $(libgdruntime_la_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CCAS) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CCASFLAGS) $(CCASFLAGS) -c -o config/mips/libgdruntime_la-switchcontext.lo `test -f 'config/mips/switchcontext.S' || echo '$(srcdir)/'`config/mips/switchcontext.S + + config/powerpc/libgdruntime_la-switchcontext.lo: config/powerpc/switchcontext.S +@@ -2111,6 +2128,9 @@ config/aarch64/libgdruntime_convenience_la-switchcontext.lo: config/aarch64/swit + config/arm/libgdruntime_convenience_la-switchcontext.lo: config/arm/switchcontext.S + $(AM_V_CPPAS)$(LIBTOOL) $(AM_V_lt) $(libgdruntime_convenience_la_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CCAS) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CCASFLAGS) $(CCASFLAGS) -c -o config/arm/libgdruntime_convenience_la-switchcontext.lo `test -f 'config/arm/switchcontext.S' || echo '$(srcdir)/'`config/arm/switchcontext.S + ++config/loongarch/libgdruntime_convenience_la-switchcontext.lo: config/loongarch/switchcontext.S ++ $(AM_V_CPPAS)$(LIBTOOL) $(AM_V_lt) $(libgdruntime_convenience_la_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CCAS) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM ++ + config/mips/libgdruntime_convenience_la-switchcontext.lo: config/mips/switchcontext.S + $(AM_V_CPPAS)$(LIBTOOL) $(AM_V_lt) $(libgdruntime_convenience_la_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CCAS) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CCASFLAGS) $(CCASFLAGS) -c -o config/mips/libgdruntime_convenience_la-switchcontext.lo `test -f 'config/mips/switchcontext.S' || echo '$(srcdir)/'`config/mips/switchcontext.S + +@@ -2158,6 +2178,7 @@ clean-libtool: + -rm -rf config/aarch64/.libs config/aarch64/_libs + -rm -rf config/arm/.libs config/arm/_libs + -rm -rf config/mingw/.libs config/mingw/_libs ++ -rm -rf config/loongarch/.libs config/loongarch/_libs + -rm -rf config/mips/.libs config/mips/_libs + -rm -rf config/powerpc/.libs config/powerpc/_libs + -rm -rf config/s390/.libs config/s390/_libs +@@ -2319,6 +2340,7 @@ distclean-generic: + -rm -f config/aarch64/$(am__dirstamp) + -rm -f config/arm/$(am__dirstamp) + -rm -f config/mingw/$(am__dirstamp) ++ -rm -f config/loongarch/$(am__dirstamp) + -rm -f config/mips/$(am__dirstamp) + -rm -f config/powerpc/$(am__dirstamp) + -rm -f config/s390/$(am__dirstamp) +diff --git a/libphobos/m4/druntime/cpu.m4 b/libphobos/m4/druntime/cpu.m4 +index db3a92c15..3461b2d3c 100644 +--- a/libphobos/m4/druntime/cpu.m4 ++++ b/libphobos/m4/druntime/cpu.m4 +@@ -15,6 +15,9 @@ AC_DEFUN(DRUNTIME_CPU_SOURCES, + ;; + arm*) druntime_target_cpu_parsed="arm" + ;; ++ loongarch*) ++ druntime_target_cpu_parsed="loongarch" ++ ;; + mips*) druntime_target_cpu_parsed="mips" + ;; + powerpc*) +@@ -34,6 +37,8 @@ AC_DEFUN(DRUNTIME_CPU_SOURCES, + test "$druntime_target_cpu_parsed" = "aarch64") + AM_CONDITIONAL(DRUNTIME_CPU_ARM, + test "$druntime_target_cpu_parsed" = "arm") ++ AM_CONDITIONAL(DRUNTIME_CPU_LOONGARCH, ++ test "$druntime_target_cpu_parsed" = "loongarch") + AM_CONDITIONAL(DRUNTIME_CPU_MIPS, + test "$druntime_target_cpu_parsed" = "mips") + AM_CONDITIONAL(DRUNTIME_CPU_POWERPC, +-- +2.43.0 +
View file
_service:tar_scm:0189-Backport-SME-aarch64-Only-calculate-chain_offset-if-.patch
Added
@@ -0,0 +1,47 @@ +From c8768dd861538817db8c1955dcce4b6d8ce17c48 Mon Sep 17 00:00:00 2001 +From: Richard Sandiford <richard.sandiford@arm.com> +Date: Tue, 12 Sep 2023 16:05:07 +0100 +Subject: PATCH 090/157 BackportSME aarch64: Only calculate chain_offset + if there is a chain + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=aa8b57ee0206e8e5ac7078692ee67fb6ead05645 + +After previous patches, it is no longer necessary to calculate +a chain_offset in cases where there is no chain record. + +gcc/ + * config/aarch64/aarch64.cc (aarch64_expand_prologue): Move the + calculation of chain_offset into the emit_frame_chain block. +--- + gcc/config/aarch64/aarch64.cc | 10 +++++----- + 1 file changed, 5 insertions(+), 5 deletions(-) + +diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc +index a0a4c7ac3..bef6a658b 100644 +--- a/gcc/config/aarch64/aarch64.cc ++++ b/gcc/config/aarch64/aarch64.cc +@@ -10357,16 +10357,16 @@ aarch64_expand_prologue (void) + if (callee_adjust != 0) + aarch64_push_regs (reg1, reg2, callee_adjust); + +- /* The offset of the frame chain record (if any) from the current SP. */ +- poly_int64 chain_offset = (initial_adjust + callee_adjust +- - frame.hard_fp_offset); +- gcc_assert (known_ge (chain_offset, 0)); +- + /* The offset of the current SP from the bottom of the static frame. */ + poly_int64 bytes_below_sp = frame_size - initial_adjust - callee_adjust; + + if (emit_frame_chain) + { ++ /* The offset of the frame chain record (if any) from the current SP. */ ++ poly_int64 chain_offset = (initial_adjust + callee_adjust ++ - frame.hard_fp_offset); ++ gcc_assert (known_ge (chain_offset, 0)); ++ + if (callee_adjust == 0) + { + reg1 = R29_REGNUM; +-- +2.33.0 +
View file
_service:tar_scm:0189-LoongArch-fix-building-errors.patch
Added
@@ -0,0 +1,273 @@ +From 142ae446cab26f1beb81a53a7da3c477ce42df40 Mon Sep 17 00:00:00 2001 +From: Peng Fan <fanpeng@loongson.cn> +Date: Mon, 28 Oct 2024 09:02:51 +0000 +Subject: PATCH LoongArch: fix building errors. + +--- + config/mt-loongarch-mlib | 2 +- + gcc/config/loongarch/loongarch-evolution.h | 2 +- + gcc/config/loongarch/loongarch-opts.cc | 1 + + gcc/config/loongarch/loongarch-str.h | 11 +++--- + gcc/config/loongarch/loongarch.cc | 9 +---- + gcc/config/loongarch/loongarch.md | 44 ++++++++++++++++------ + gcc/config/loongarch/simd.md | 15 +++++--- + gcc/doc/invoke.texi | 3 +- + 8 files changed, 53 insertions(+), 34 deletions(-) + +diff --git a/config/mt-loongarch-mlib b/config/mt-loongarch-mlib +index 4cfe568f1..bbbba277f 100644 +--- a/config/mt-loongarch-mlib ++++ b/config/mt-loongarch-mlib +@@ -1 +1 @@ +-FLAGS_FOR_TARGET += -fmultiflags ++FLAGS_FOR_TARGET += +diff --git a/gcc/config/loongarch/loongarch-evolution.h b/gcc/config/loongarch/loongarch-evolution.h +index d64996481..7e8e602c7 100644 +--- a/gcc/config/loongarch/loongarch-evolution.h ++++ b/gcc/config/loongarch/loongarch-evolution.h +@@ -1,7 +1,7 @@ + /* Generated automatically by "genstr" from "isa-evolution.in". + Please do not edit this file directly. + +- Copyright (C) 2023 Free Software Foundation, Inc. ++ Copyright (C) 2023-2024 Free Software Foundation, Inc. + + This file is part of GCC. + +diff --git a/gcc/config/loongarch/loongarch-opts.cc b/gcc/config/loongarch/loongarch-opts.cc +index 735daeb7c..1d08bb6a1 100644 +--- a/gcc/config/loongarch/loongarch-opts.cc ++++ b/gcc/config/loongarch/loongarch-opts.cc +@@ -1071,6 +1071,7 @@ loongarch_init_misc_options (struct gcc_options *opts, + + #undef INIT_TARGET_FLAG + ++#define TARGET_DIRECT_EXTERN_ACCESS_OPTS_P(opts) (((opts->x_target_flags) & MASK_DIRECT_EXTERN_ACCESS) != 0) + /* Set mexplicit-relocs default. */ + if (opts->x_la_opt_explicit_relocs == M_OPT_UNSET) + opts->x_la_opt_explicit_relocs = (HAVE_AS_EXPLICIT_RELOCS +diff --git a/gcc/config/loongarch/loongarch-str.h b/gcc/config/loongarch/loongarch-str.h +index 3cbe12f7b..13d161a8c 100644 +--- a/gcc/config/loongarch/loongarch-str.h ++++ b/gcc/config/loongarch/loongarch-str.h +@@ -66,9 +66,10 @@ along with GCC; see the file COPYING3. If not see + #define STR_CMODEL_LARGE "large" + #define STR_CMODEL_EXTREME "extreme" + +-#define OPTSTR_FRECIPE "frecipe" +-#define OPTSTR_DIV32 "div32" +-#define OPTSTR_LAM_BH "lam-bh" +-#define OPTSTR_LAMCAS "lamcas" +-#define OPTSTR_LD_SEQ_SA "ld-seq-sa" ++#define OPTSTR_FRECIPE "frecipe" ++#define OPTSTR_DIV32 "div32" ++#define OPTSTR_LAM_BH "lam-bh" ++#define OPTSTR_LAMCAS "lamcas" ++#define OPTSTR_LD_SEQ_SA "ld-seq-sa" ++ + #endif /* LOONGARCH_STR_H */ +diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc +index 53bd8d7ec..6be0d80b3 100644 +--- a/gcc/config/loongarch/loongarch.cc ++++ b/gcc/config/loongarch/loongarch.cc +@@ -764,14 +764,7 @@ loongarch_setup_incoming_varargs (cumulative_args_t cum, + argument. Advance a local copy of CUM past the last "real" named + argument, to find out how many registers are left over. */ + local_cum = *get_cumulative_args (cum); +- +- /* For a C23 variadic function w/o any named argument, and w/o an +- artifical argument for large return value, skip advancing args. +- There is such an artifical argument iff. arg.type is non-NULL +- (PR 114175). */ +- if (!TYPE_NO_NAMED_ARGS_STDARG_P (TREE_TYPE (current_function_decl)) +- || arg.type != NULL_TREE) +- loongarch_function_arg_advance (pack_cumulative_args (&local_cum), arg); ++ loongarch_function_arg_advance (pack_cumulative_args (&local_cum), arg); + + /* Found out how many registers we need to save. */ + gp_saved = MAX_ARGS_IN_REGISTERS - local_cum.num_gprs; +diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md +index 66236a7c7..d8d444c7a 100644 +--- a/gcc/config/loongarch/loongarch.md ++++ b/gcc/config/loongarch/loongarch.md +@@ -32,6 +32,7 @@ + UNSPEC_FCLASS + UNSPEC_FMAX + UNSPEC_FMIN ++ UNSPEC_COPYSIGN + UNSPEC_FTINT + UNSPEC_FTINTRM + UNSPEC_FTINTRP +@@ -415,11 +416,13 @@ + + ;; A mode for anything with 32 bits or more, and able to be loaded with + ;; the same addressing mode as ld.w. +-(define_mode_iterator LD_AT_LEAST_32_BIT GPR ANYF) ++;; (define_mode_iterator LD_AT_LEAST_32_BIT GPR ANYF) ++(define_mode_iterator LD_AT_LEAST_32_BIT (SI "") (DI "TARGET_64BIT") (SF "TARGET_HARD_FLOAT") (DF "TARGET_DOUBLE_FLOAT")) + + ;; A mode for anything able to be stored with the same addressing mode as + ;; st.w. +-(define_mode_iterator ST_ANY QHWD ANYF) ++;; (define_mode_iterator ST_ANY QHWD ANYF) ++(define_mode_iterator ST_ANY (QI "") (HI "") (SI "") (DI "TARGET_64BIT") (SF "TARGET_HARD_FLOAT") (DF "TARGET_DOUBLE_FLOAT")) + + ;; A mode for anything legal as a input of a div or mod instruction. + (define_mode_iterator DIV (DI "TARGET_64BIT") +@@ -590,6 +593,10 @@ + (define_code_attr sel (eq "masknez") (ne "maskeqz")) + (define_code_attr selinv (eq "maskeqz") (ne "masknez")) + ++(define_int_attr lrint_allow_inexact (UNSPEC_FTINT "1") ++ (UNSPEC_FTINTRM "0") ++ (UNSPEC_FTINTRP "0")) ++ + ;; Iterator and attributes for floating-point to fixed-point conversion + ;; instructions. + (define_int_iterator LRINT UNSPEC_FTINT UNSPEC_FTINTRM UNSPEC_FTINTRP) +@@ -625,7 +632,8 @@ + ;; so the redundant sign extension can be removed if the output is used as + ;; an input of a bitwise operation. Note plus, rotl, and div are handled + ;; separately. +-(define_code_iterator shift_w any_shift rotatert) ++;; (define_code_iterator shift_w any_shift rotatert) ++(define_code_iterator shift_w ashift ashiftrt lshiftrt rotatert) + (define_code_iterator arith_w minus mult) + + (define_expand "<optab><mode>3" +@@ -1324,8 +1332,9 @@ + + (define_insn "copysign<mode>3" + (set (match_operand:ANYF 0 "register_operand" "=f") +- (copysign:ANYF (match_operand:ANYF 1 "register_operand" "f") +- (match_operand:ANYF 2 "register_operand" "f"))) ++ (unspec:ANYF (match_operand:ANYF 1 "register_operand" "f") ++ (match_operand:ANYF 2 "register_operand" "f") ++ UNSPEC_COPYSIGN)) + "TARGET_HARD_FLOAT" + "fcopysign.<fmt>\t%0,%1,%2" + (set_attr "type" "fcopysign") +@@ -2722,12 +2731,13 @@ + (set_attr "mode" "<MODE>")) + + ;; Convert floating-point numbers to integers ++;; (<LRINT> == UNSPEC_FTINT + (define_insn "<lrint_pattern><ANYF:mode><ANYFI:mode>2" + (set (match_operand:ANYFI 0 "register_operand" "=f") + (unspec:ANYFI (match_operand:ANYF 1 "register_operand" "f") + LRINT)) + "TARGET_HARD_FLOAT && +- (<LRINT> == UNSPEC_FTINT ++ (<lrint_allow_inexact> + || flag_fp_int_builtin_inexact + || !flag_trapping_math)" + "ftint<lrint_submenmonic>.<ANYFI:ifmt>.<ANYF:fmt> %0,%1" +@@ -4135,15 +4145,26 @@ + (136 "isnormal") + (952 "isfinite")) + +-(define_expand "<FCLASS_MASK:fclass_optab><ANYF:mode>2" ++;;(define_expand "<FCLASS_MASK:fclass_optab><ANYF:mode>2" ++;; (match_operand:SI 0 "register_operand" "=r") ++;; (match_operand:ANYF 1 "register_operand" " f") ++;; (const_int FCLASS_MASK) ++;; "TARGET_HARD_FLOAT" ++;; { ++;; rtx ft0 = gen_reg_rtx (SImode); ++;; rtx t0 = gen_reg_rtx (word_mode); ++;; rtx mask = GEN_INT (<FCLASS_MASK>); ++ ++(define_expand "fclass_optab<ANYF:mode>2" ++ (unspec:ANYF + (match_operand:SI 0 "register_operand" "=r") +- (match_operand:ANYF 1 "register_operand" " f") +- (const_int FCLASS_MASK) ++ (match_operand:ANYF 1 "register_operand" " f") ++ UNSPEC_FCLASS) + "TARGET_HARD_FLOAT" + { + rtx ft0 = gen_reg_rtx (SImode); + rtx t0 = gen_reg_rtx (word_mode); +- rtx mask = GEN_INT (<FCLASS_MASK>); ++ rtx mask = GEN_INT (GET_MODE_MASK (<MODE>mode)); + + emit_insn (gen_fclass_<ANYF:fmt> (ft0, operands1)); + +@@ -4165,7 +4186,8 @@ + emit_move_insn (operands0, t0); + + DONE; +- }) ++ } ++ (set_attr "mode" "<ANYF:MODE>")) + + (define_insn "bytepick_w_<bytepick_imm>" + (set (match_operand:SI 0 "register_operand" "=r") +diff --git a/gcc/config/loongarch/simd.md b/gcc/config/loongarch/simd.md +index c28b95282..9e4c08196 100644 +--- a/gcc/config/loongarch/simd.md ++++ b/gcc/config/loongarch/simd.md +@@ -30,10 +30,13 @@ + (define_mode_iterator FLASX V4DF V8SF) + + ;; All integer modes available +-(define_mode_iterator IVEC (ILSX "ISA_HAS_LSX") (ILASX "ISA_HAS_LASX")) ++;; (define_mode_iterator IVEC (ILSX "ISA_HAS_LSX") (ILASX "ISA_HAS_LASX")) ++(define_mode_iterator IVEC (V2DI "ISA_HAS_LSX") (V4SI "ISA_HAS_LSX") (V8HI "ISA_HAS_LSX") (V16QI "ISA_HAS_LSX") ++ (V4DI "ISA_HAS_LASX") (V8SI "ISA_HAS_LASX") (V16HI "ISA_HAS_LASX") (V32QI "ISA_HAS_LASX")) + + ;; All FP modes available +-(define_mode_iterator FVEC (FLSX "ISA_HAS_LSX") (FLASX "ISA_HAS_LASX")) ++(define_mode_iterator FVEC (V2DF "ISA_HAS_LSX") (V4SF "ISA_HAS_LSX") ++ (V4DF "ISA_HAS_LASX") (V8SF "ISA_HAS_LASX")) + + ;; Mnemonic prefix, "x" for LASX modes. + (define_mode_attr x (V2DI "") (V4SI "") (V8HI "") (V16QI "") +@@ -162,12 +165,12 @@ + ;; Expand the standard-named patterns to <x>vfrint instructions if + ;; raising inexact exception is allowed. + ++;; "<SIMD_FRINT> == UNSPEC_SIMD_FRINT || + (define_expand "<simd_frint_pattern><mode>2" + (set (match_operand:FVEC 0 "register_operand" "=f") + (unspec:FVEC (match_operand:FVEC 1 "register_operand" "f") + SIMD_FRINT)) +- "<SIMD_FRINT> == UNSPEC_SIMD_FRINT || +- flag_fp_int_builtin_inexact || ++ "flag_fp_int_builtin_inexact || + !flag_trapping_math") + + ;; ftrunc is like btrunc, but it's allowed to raise inexact exception +@@ -221,13 +224,13 @@ + ;; Expand the standard-named patterns to <x>vftint instructions if + ;; raising inexact exception. + ++;; "<SIMD_FRINT> == UNSPEC_SIMD_FRINT || + (define_expand "l<simd_frint_pattern><mode><vimode>2" + (set (match_operand:<VIMODE> 0 "register_operand" "=f") + (fix:<VIMODE> + (unspec:FVEC (match_operand:FVEC 1 "register_operand" "f") + SIMD_FRINT))) +- "<SIMD_FRINT> == UNSPEC_SIMD_FRINT || +- flag_fp_int_builtin_inexact || ++ "flag_fp_int_builtin_inexact || + !flag_trapping_math") + + ;; fix_trunc is allowed to raise inexact exception even if +diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi +index d2c52cdf4..8a09938fc 100644 +--- a/gcc/doc/invoke.texi ++++ b/gcc/doc/invoke.texi +@@ -1006,8 +1006,7 @@ Objective-C and Objective-C++ Dialects}. + -mcond-move-float -mno-cond-move-float @gol + -memcpy -mno-memcpy -mstrict-align -mno-strict-align @gol + -mmax-inline-memcpy-size=@var{n} @gol +--mexplicit-relocs -mno-explicit-relocs @gol +--mdirect-extern-access -mno-direct-extern-access @gol ++-mexplicit-relocs=@var{style} -mexplicit-relocs -mno-explicit-relocs @gol + -mcmodel=@var{code-model} -mrelax -mpass-mrelax-to-as @gol + -mrecip -mrecip=@var{opt} -mfrecipe -mno-frecipe -mdiv32 -mno-div32 @gol + -mlam-bh -mno-lam-bh -mlamcas -mno-lamcas -mld-seq-sa -mno-ld-seq-sa @gol +-- +2.45.2 +
View file
_service:tar_scm:0190-Backport-SME-aarch64-Rename-locals_offset-to-bytes_a.patch
Added
@@ -0,0 +1,94 @@ +From 43dc03de6d608e10d83cc7994d127e3764bfbcf7 Mon Sep 17 00:00:00 2001 +From: Richard Sandiford <richard.sandiford@arm.com> +Date: Tue, 12 Sep 2023 16:05:07 +0100 +Subject: PATCH 091/157 BackportSME aarch64: Rename locals_offset to + bytes_above_locals +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=28034dbb5e32711d5f4d655576f2499e6f57f854 + +locals_offset was described as: + + /* Offset from the base of the frame (incomming SP) to the + top of the locals area. This value is always a multiple of + STACK_BOUNDARY. */ + +This is implicitly an “upside down” view of the frame: the incoming +SP is at offset 0, and anything N bytes below the incoming SP is at +offset N (rather than -N). + +However, reg_offset instead uses a “right way up” view; that is, +it views offsets in address terms. Something above X is at a +positive offset from X and something below X is at a negative +offset from X. + +Also, even on FRAME_GROWS_DOWNWARD targets like AArch64, +target-independent code views offsets in address terms too: +locals are allocated at negative offsets to virtual_stack_vars. + +It seems confusing to have *_offset fields of the same structure +using different polarities like this. This patch tries to avoid +that by renaming locals_offset to bytes_above_locals. + +gcc/ + * config/aarch64/aarch64.h (aarch64_frame::locals_offset): Rename to... + (aarch64_frame::bytes_above_locals): ...this. + * config/aarch64/aarch64.cc (aarch64_layout_frame) + (aarch64_initial_elimination_offset): Update accordingly. +--- + gcc/config/aarch64/aarch64.cc | 6 +++--- + gcc/config/aarch64/aarch64.h | 6 +++--- + 2 files changed, 6 insertions(+), 6 deletions(-) + +diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc +index bef6a658b..992f71bbd 100644 +--- a/gcc/config/aarch64/aarch64.cc ++++ b/gcc/config/aarch64/aarch64.cc +@@ -8937,7 +8937,7 @@ aarch64_layout_frame (void) + STACK_BOUNDARY / BITS_PER_UNIT)); + frame.frame_size = saved_regs_and_above + frame.bytes_below_saved_regs; + +- frame.locals_offset = frame.saved_varargs_size; ++ frame.bytes_above_locals = frame.saved_varargs_size; + + frame.initial_adjust = 0; + frame.final_adjust = 0; +@@ -13047,13 +13047,13 @@ aarch64_initial_elimination_offset (unsigned from, unsigned to) + return frame.hard_fp_offset; + + if (from == FRAME_POINTER_REGNUM) +- return frame.hard_fp_offset - frame.locals_offset; ++ return frame.hard_fp_offset - frame.bytes_above_locals; + } + + if (to == STACK_POINTER_REGNUM) + { + if (from == FRAME_POINTER_REGNUM) +- return frame.frame_size - frame.locals_offset; ++ return frame.frame_size - frame.bytes_above_locals; + } + + return frame.frame_size; +diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h +index a1db4f689..2acff9a96 100644 +--- a/gcc/config/aarch64/aarch64.h ++++ b/gcc/config/aarch64/aarch64.h +@@ -815,10 +815,10 @@ struct GTY (()) aarch64_frame + always a multiple of STACK_BOUNDARY. */ + poly_int64 bytes_below_hard_fp; + +- /* Offset from the base of the frame (incomming SP) to the +- top of the locals area. This value is always a multiple of ++ /* The number of bytes between the top of the locals area and the top ++ of the frame (the incomming SP). This value is always a multiple of + STACK_BOUNDARY. */ +- poly_int64 locals_offset; ++ poly_int64 bytes_above_locals; + + /* Offset from the base of the frame (incomming SP) to the + hard_frame_pointer. This value is always a multiple of +-- +2.33.0 +
View file
_service:tar_scm:0190-tree-optimization-110702-avoid-zero-based-memory-ref.patch
Added
@@ -0,0 +1,119 @@ +From 13dfb01e5c30c3bd09333ac79d6ff96a617fea67 Mon Sep 17 00:00:00 2001 +From: Richard Biener <rguenther@suse.de> +Date: Thu, 3 Aug 2023 13:11:12 +0200 +Subject: PATCH tree-optimization/110702 - avoid zero-based memory references + in IVOPTs + +Sometimes IVOPTs chooses a weird induction variable which downstream +leads to issues. Most of the times we can fend those off during costing +by rejecting the candidate but it looks like the address description +costing synthesizes is different from what we end up generating so +the following fixes things up at code generation time. Specifically +we avoid the create_mem_ref_raw fallback which uses a literal zero +address base with the actual base in index2. For the case in question +we have the address + + type = unsigned long + offset = 0 + elements = { + 0 = &e * -3, + 1 = (sizetype) a.9_30 * 232, + 2 = ivtmp.28_44 * 4 + } + +from which we code generate the problematical + + _3 = MEM(long int *)0B + ivtmp.36_9 + ivtmp.28_44 * 4; + +which references the object at address zero. The patch below +recognizes the fallback after the fact and transforms the +TARGET_MEM_REF memory reference into a LEA for which this form +isn't problematic: + + _24 = &MEM(long int *)0B + ivtmp.36_34 + ivtmp.28_44 * 4; + _3 = *_24; + +hereby avoiding the correctness issue. We'd later conclude the +program terminates at the null pointer dereference and make the +function pure, miscompling the main function of the testcase. + + PR tree-optimization/110702 + * tree-ssa-loop-ivopts.cc (rewrite_use_address): When + we created a NULL pointer based access rewrite that to + a LEA. + + * gcc.dg/torture/pr110702.c: New testcase. +--- + gcc/testsuite/gcc.dg/torture/pr110702.c | 31 +++++++++++++++++++++++++ + gcc/tree-ssa-loop-ivopts.cc | 17 +++++++++++++- + 2 files changed, 47 insertions(+), 1 deletion(-) + create mode 100644 gcc/testsuite/gcc.dg/torture/pr110702.c + +diff --git a/gcc/testsuite/gcc.dg/torture/pr110702.c b/gcc/testsuite/gcc.dg/torture/pr110702.c +new file mode 100644 +index 00000000000..aab9c7d923e +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/torture/pr110702.c +@@ -0,0 +1,31 @@ ++/* { dg-do run } */ ++ ++void abort (void); ++ ++int a, b, c, d; ++long e974; ++ ++void f() ++{ ++ for (; a >= 0; a--) ++ { ++ b = 0; ++ for (; b <= 3; b++) ++ { ++ c = 0; ++ for (; c <= 3; c++) ++ { ++ int *g = &d; ++ *g = e00b | eaba; ++ } ++ } ++ } ++} ++ ++int main() ++{ ++ f(); ++ if (a != -1) ++ abort (); ++ return 0; ++} +diff --git a/gcc/tree-ssa-loop-ivopts.cc b/gcc/tree-ssa-loop-ivopts.cc +index 92fc1c7d734..934897af691 100644 +--- a/gcc/tree-ssa-loop-ivopts.cc ++++ b/gcc/tree-ssa-loop-ivopts.cc +@@ -7630,7 +7630,22 @@ rewrite_use_address (struct ivopts_data *data, + true, GSI_SAME_STMT); + } + else +- copy_ref_info (ref, *use->op_p); ++ { ++ /* When we end up confused enough and have no suitable base but ++ stuffed everything to index2 use a LEA for the address and ++ create a plain MEM_REF to avoid basing a memory reference ++ on address zero which create_mem_ref_raw does as fallback. */ ++ if (TREE_CODE (ref) == TARGET_MEM_REF ++ && TMR_INDEX2 (ref) != NULL_TREE ++ && integer_zerop (TREE_OPERAND (ref, 0))) ++ { ++ ref = fold_build1 (ADDR_EXPR, TREE_TYPE (TREE_OPERAND (ref, 0)), ref); ++ ref = force_gimple_operand_gsi (&bsi, ref, true, NULL_TREE, ++ true, GSI_SAME_STMT); ++ ref = build2 (MEM_REF, type, ref, build_zero_cst (alias_ptr_type)); ++ } ++ copy_ref_info (ref, *use->op_p); ++ } + + *use->op_p = ref; + } +-- +2.45.2 +
View file
_service:tar_scm:0191-Backport-SME-aarch64-Rename-hard_fp_offset-to-bytes_.patch
Added
@@ -0,0 +1,151 @@ +From e33aa6e25334fd94e1e4f2d8b6c8247029657a54 Mon Sep 17 00:00:00 2001 +From: Richard Sandiford <richard.sandiford@arm.com> +Date: Tue, 12 Sep 2023 16:05:08 +0100 +Subject: PATCH 092/157 BackportSME aarch64: Rename hard_fp_offset to + bytes_above_hard_fp +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=ed61c87f044f5460109c197855b316641db3c6c6 + +Similarly to the previous locals_offset patch, hard_fp_offset +was described as: + + /* Offset from the base of the frame (incomming SP) to the + hard_frame_pointer. This value is always a multiple of + STACK_BOUNDARY. */ + poly_int64 hard_fp_offset; + +which again took an “upside-down” view: higher offsets meant lower +addresses. This patch renames the field to bytes_above_hard_fp instead. + +gcc/ + * config/aarch64/aarch64.h (aarch64_frame::hard_fp_offset): Rename + to... + (aarch64_frame::bytes_above_hard_fp): ...this. + * config/aarch64/aarch64.cc (aarch64_layout_frame) + (aarch64_expand_prologue): Update accordingly. + (aarch64_initial_elimination_offset): Likewise. +--- + gcc/config/aarch64/aarch64.cc | 26 +++++++++++++------------- + gcc/config/aarch64/aarch64.h | 6 +++--- + 2 files changed, 16 insertions(+), 16 deletions(-) + +diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc +index 992f71bbd..67199a026 100644 +--- a/gcc/config/aarch64/aarch64.cc ++++ b/gcc/config/aarch64/aarch64.cc +@@ -8929,7 +8929,7 @@ aarch64_layout_frame (void) + + get_frame_size (), + STACK_BOUNDARY / BITS_PER_UNIT); + +- frame.hard_fp_offset ++ frame.bytes_above_hard_fp + = saved_regs_and_above - frame.below_hard_fp_saved_regs_size; + + /* Both these values are already aligned. */ +@@ -8978,13 +8978,13 @@ aarch64_layout_frame (void) + else if (frame.wb_pop_candidate1 != INVALID_REGNUM) + max_push_offset = 256; + +- HOST_WIDE_INT const_size, const_below_saved_regs, const_fp_offset; ++ HOST_WIDE_INT const_size, const_below_saved_regs, const_above_fp; + HOST_WIDE_INT const_saved_regs_size; + if (known_eq (frame.saved_regs_size, 0)) + frame.initial_adjust = frame.frame_size; + else if (frame.frame_size.is_constant (&const_size) + && const_size < max_push_offset +- && known_eq (frame.hard_fp_offset, const_size)) ++ && known_eq (frame.bytes_above_hard_fp, const_size)) + { + /* Simple, small frame with no data below the saved registers. + +@@ -9001,8 +9001,8 @@ aarch64_layout_frame (void) + case that it hardly seems worth the effort though. */ + && (!saves_below_hard_fp_p || const_below_saved_regs == 0) + && !(cfun->calls_alloca +- && frame.hard_fp_offset.is_constant (&const_fp_offset) +- && const_fp_offset < max_push_offset)) ++ && frame.bytes_above_hard_fp.is_constant (&const_above_fp) ++ && const_above_fp < max_push_offset)) + { + /* Frame with small area below the saved registers: + +@@ -9020,12 +9020,12 @@ aarch64_layout_frame (void) + sub sp, sp, hard_fp_offset + below_hard_fp_saved_regs_size + save SVE registers relative to SP + sub sp, sp, bytes_below_saved_regs */ +- frame.initial_adjust = (frame.hard_fp_offset ++ frame.initial_adjust = (frame.bytes_above_hard_fp + + frame.below_hard_fp_saved_regs_size); + frame.final_adjust = frame.bytes_below_saved_regs; + } +- else if (frame.hard_fp_offset.is_constant (&const_fp_offset) +- && const_fp_offset < max_push_offset) ++ else if (frame.bytes_above_hard_fp.is_constant (&const_above_fp) ++ && const_above_fp < max_push_offset) + { + /* Frame with large area below the saved registers, or with SVE saves, + but with a small area above: +@@ -9035,7 +9035,7 @@ aarch64_layout_frame (void) + sub sp, sp, below_hard_fp_saved_regs_size + save SVE registers relative to SP + sub sp, sp, bytes_below_saved_regs */ +- frame.callee_adjust = const_fp_offset; ++ frame.callee_adjust = const_above_fp; + frame.sve_callee_adjust = frame.below_hard_fp_saved_regs_size; + frame.final_adjust = frame.bytes_below_saved_regs; + } +@@ -9050,7 +9050,7 @@ aarch64_layout_frame (void) + sub sp, sp, below_hard_fp_saved_regs_size + save SVE registers relative to SP + sub sp, sp, bytes_below_saved_regs */ +- frame.initial_adjust = frame.hard_fp_offset; ++ frame.initial_adjust = frame.bytes_above_hard_fp; + frame.sve_callee_adjust = frame.below_hard_fp_saved_regs_size; + frame.final_adjust = frame.bytes_below_saved_regs; + } +@@ -10364,7 +10364,7 @@ aarch64_expand_prologue (void) + { + /* The offset of the frame chain record (if any) from the current SP. */ + poly_int64 chain_offset = (initial_adjust + callee_adjust +- - frame.hard_fp_offset); ++ - frame.bytes_above_hard_fp); + gcc_assert (known_ge (chain_offset, 0)); + + if (callee_adjust == 0) +@@ -13044,10 +13044,10 @@ aarch64_initial_elimination_offset (unsigned from, unsigned to) + if (to == HARD_FRAME_POINTER_REGNUM) + { + if (from == ARG_POINTER_REGNUM) +- return frame.hard_fp_offset; ++ return frame.bytes_above_hard_fp; + + if (from == FRAME_POINTER_REGNUM) +- return frame.hard_fp_offset - frame.bytes_above_locals; ++ return frame.bytes_above_hard_fp - frame.bytes_above_locals; + } + + if (to == STACK_POINTER_REGNUM) +diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h +index 2acff9a96..0f7822c3d 100644 +--- a/gcc/config/aarch64/aarch64.h ++++ b/gcc/config/aarch64/aarch64.h +@@ -820,10 +820,10 @@ struct GTY (()) aarch64_frame + STACK_BOUNDARY. */ + poly_int64 bytes_above_locals; + +- /* Offset from the base of the frame (incomming SP) to the +- hard_frame_pointer. This value is always a multiple of ++ /* The number of bytes between the hard_frame_pointer and the top of ++ the frame (the incomming SP). This value is always a multiple of + STACK_BOUNDARY. */ +- poly_int64 hard_fp_offset; ++ poly_int64 bytes_above_hard_fp; + + /* The size of the frame. This value is the offset from base of the + frame (incomming SP) to the stack_pointer. This value is always +-- +2.33.0 +
View file
_service:tar_scm:0191-LoongArch-Change-OSDIR-for-distribution.patch
Added
@@ -0,0 +1,25 @@ +From 25423cf92026221b7c8798533c40d3e6269a1d7c Mon Sep 17 00:00:00 2001 +From: Peng Fan <fanpeng@loongson.cn> +Date: Thu, 31 Oct 2024 02:01:49 +0000 +Subject: PATCH LoongArch: Change OSDIR for distribution + +Signed-off-by: Peng Fan <fanpeng@loongson.cn> +--- + gcc/config/loongarch/t-linux | 3 +++ + 1 file changed, 3 insertions(+) + +diff --git a/gcc/config/loongarch/t-linux b/gcc/config/loongarch/t-linux +index 7cd7cde25..1d1f42596 100644 +--- a/gcc/config/loongarch/t-linux ++++ b/gcc/config/loongarch/t-linux +@@ -28,4 +28,7 @@ ifeq ($(filter LA_DISABLE_MULTILIB,$(tm_defines)),) + MULTILIB_OSDIRNAMES += mabi.lp64f=$(MULTIOSDIR_lp64f) + MULTILIB_OSDIRNAMES += mabi.lp64s=$(MULTIOSDIR_lp64s) + ++else ++ MULTILIB_OSDIRNAMES := ../lib64 ++ + endif +-- +2.45.2 +
View file
_service:tar_scm:0192-Backport-SME-aarch64-Tweak-frame_size-comment.patch
Added
@@ -0,0 +1,37 @@ +From 6aa0db727b6e3a7fed95b014f25f3f022d1f46e2 Mon Sep 17 00:00:00 2001 +From: Richard Sandiford <richard.sandiford@arm.com> +Date: Tue, 12 Sep 2023 16:05:08 +0100 +Subject: PATCH 093/157 BackportSME aarch64: Tweak frame_size comment +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=492b60670e69b0a7f11345b69a3c922c20d5d8c3 + +This patch fixes another case in which a value was described with +an “upside-down” view. + +gcc/ + * config/aarch64/aarch64.h (aarch64_frame::frame_size): Tweak comment. +--- + gcc/config/aarch64/aarch64.h | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h +index 0f7822c3d..39abca051 100644 +--- a/gcc/config/aarch64/aarch64.h ++++ b/gcc/config/aarch64/aarch64.h +@@ -825,8 +825,8 @@ struct GTY (()) aarch64_frame + STACK_BOUNDARY. */ + poly_int64 bytes_above_hard_fp; + +- /* The size of the frame. This value is the offset from base of the +- frame (incomming SP) to the stack_pointer. This value is always ++ /* The size of the frame, i.e. the number of bytes between the bottom ++ of the outgoing arguments and the incoming SP. This value is always + a multiple of STACK_BOUNDARY. */ + poly_int64 frame_size; + +-- +2.33.0 +
View file
_service:tar_scm:0193-Backport-SME-aarch64-Measure-reg_offset-from-the-bot.patch
Added
@@ -0,0 +1,198 @@ +From 3b10711c6a5610bf8e2287b9491557268ee148da Mon Sep 17 00:00:00 2001 +From: Richard Sandiford <richard.sandiford@arm.com> +Date: Tue, 12 Sep 2023 16:05:09 +0100 +Subject: PATCH 094/157 BackportSME aarch64: Measure reg_offset from the + bottom of the frame + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=67a36b6f5d6be11d280081b461e72910aca2fc54 + +reg_offset was measured from the bottom of the saved register area. +This made perfect sense with the original layout, since the bottom +of the saved register area was also the hard frame pointer address. +It became slightly less obvious with SVE, since we save SVE +registers below the hard frame pointer, but it still made sense. + +However, if we want to allow different frame layouts, it's more +convenient and obvious to measure reg_offset from the bottom of +the frame. After previous patches, it's also a slight simplification +in its own right. + +gcc/ + * config/aarch64/aarch64.h (aarch64_frame): Add comment above + reg_offset. + * config/aarch64/aarch64.cc (aarch64_layout_frame): Walk offsets + from the bottom of the frame, rather than the bottom of the saved + register area. Measure reg_offset from the bottom of the frame + rather than the bottom of the saved register area. + (aarch64_save_callee_saves): Update accordingly. + (aarch64_restore_callee_saves): Likewise. + (aarch64_get_separate_components): Likewise. + (aarch64_process_components): Likewise. +--- + gcc/config/aarch64/aarch64.cc | 53 ++++++++++++++++------------------- + gcc/config/aarch64/aarch64.h | 3 ++ + 2 files changed, 27 insertions(+), 29 deletions(-) + +diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc +index 67199a026..df8a83b04 100644 +--- a/gcc/config/aarch64/aarch64.cc ++++ b/gcc/config/aarch64/aarch64.cc +@@ -8739,7 +8739,6 @@ aarch64_needs_frame_chain (void) + static void + aarch64_layout_frame (void) + { +- poly_int64 offset = 0; + int regno, last_fp_reg = INVALID_REGNUM; + machine_mode vector_save_mode = aarch64_reg_save_mode (V8_REGNUM); + poly_int64 vector_save_size = GET_MODE_SIZE (vector_save_mode); +@@ -8817,7 +8816,9 @@ aarch64_layout_frame (void) + gcc_assert (crtl->is_leaf + || maybe_ne (frame.reg_offsetR30_REGNUM, SLOT_NOT_REQUIRED)); + +- frame.bytes_below_saved_regs = crtl->outgoing_args_size; ++ poly_int64 offset = crtl->outgoing_args_size; ++ gcc_assert (multiple_p (offset, STACK_BOUNDARY / BITS_PER_UNIT)); ++ frame.bytes_below_saved_regs = offset; + + /* Now assign stack slots for the registers. Start with the predicate + registers, since predicate LDR and STR have a relatively small +@@ -8829,7 +8830,8 @@ aarch64_layout_frame (void) + offset += BYTES_PER_SVE_PRED; + } + +- if (maybe_ne (offset, 0)) ++ poly_int64 saved_prs_size = offset - frame.bytes_below_saved_regs; ++ if (maybe_ne (saved_prs_size, 0)) + { + /* If we have any vector registers to save above the predicate registers, + the offset of the vector register save slots need to be a multiple +@@ -8847,10 +8849,10 @@ aarch64_layout_frame (void) + offset = aligned_upper_bound (offset, STACK_BOUNDARY / BITS_PER_UNIT); + else + { +- if (known_le (offset, vector_save_size)) +- offset = vector_save_size; +- else if (known_le (offset, vector_save_size * 2)) +- offset = vector_save_size * 2; ++ if (known_le (saved_prs_size, vector_save_size)) ++ offset = frame.bytes_below_saved_regs + vector_save_size; ++ else if (known_le (saved_prs_size, vector_save_size * 2)) ++ offset = frame.bytes_below_saved_regs + vector_save_size * 2; + else + gcc_unreachable (); + } +@@ -8867,9 +8869,10 @@ aarch64_layout_frame (void) + + /* OFFSET is now the offset of the hard frame pointer from the bottom + of the callee save area. */ +- bool saves_below_hard_fp_p = maybe_ne (offset, 0); +- frame.below_hard_fp_saved_regs_size = offset; +- frame.bytes_below_hard_fp = offset + frame.bytes_below_saved_regs; ++ frame.below_hard_fp_saved_regs_size = offset - frame.bytes_below_saved_regs; ++ bool saves_below_hard_fp_p ++ = maybe_ne (frame.below_hard_fp_saved_regs_size, 0); ++ frame.bytes_below_hard_fp = offset; + if (frame.emit_frame_chain) + { + /* FP and LR are placed in the linkage record. */ +@@ -8920,9 +8923,10 @@ aarch64_layout_frame (void) + + offset = aligned_upper_bound (offset, STACK_BOUNDARY / BITS_PER_UNIT); + +- frame.saved_regs_size = offset; ++ frame.saved_regs_size = offset - frame.bytes_below_saved_regs; + +- poly_int64 varargs_and_saved_regs_size = offset + frame.saved_varargs_size; ++ poly_int64 varargs_and_saved_regs_size ++ = frame.saved_regs_size + frame.saved_varargs_size; + + poly_int64 saved_regs_and_above + = aligned_upper_bound (varargs_and_saved_regs_size +@@ -9390,9 +9394,7 @@ aarch64_save_callee_saves (poly_int64 bytes_below_sp, + + machine_mode mode = aarch64_reg_save_mode (regno); + reg = gen_rtx_REG (mode, regno); +- offset = (frame.reg_offsetregno +- + frame.bytes_below_saved_regs +- - bytes_below_sp); ++ offset = frame.reg_offsetregno - bytes_below_sp; + rtx base_rtx = stack_pointer_rtx; + poly_int64 sp_offset = offset; + +@@ -9499,9 +9501,7 @@ aarch64_restore_callee_saves (poly_int64 bytes_below_sp, unsigned start, + + machine_mode mode = aarch64_reg_save_mode (regno); + reg = gen_rtx_REG (mode, regno); +- offset = (frame.reg_offsetregno +- + frame.bytes_below_saved_regs +- - bytes_below_sp); ++ offset = frame.reg_offsetregno - bytes_below_sp; + rtx base_rtx = stack_pointer_rtx; + if (mode == VNx2DImode && BYTES_BIG_ENDIAN) + aarch64_adjust_sve_callee_save_base (mode, base_rtx, anchor_reg, +@@ -9640,14 +9640,12 @@ aarch64_get_separate_components (void) + it as a stack probe for -fstack-clash-protection. */ + if (flag_stack_clash_protection + && maybe_ne (frame.below_hard_fp_saved_regs_size, 0) +- && known_eq (offset, 0)) ++ && known_eq (offset, frame.bytes_below_saved_regs)) + continue; + + /* Get the offset relative to the register we'll use. */ + if (frame_pointer_needed) +- offset -= frame.below_hard_fp_saved_regs_size; +- else +- offset += frame.bytes_below_saved_regs; ++ offset -= frame.bytes_below_hard_fp; + + /* Check that we can access the stack slot of the register with one + direct load with no adjustments needed. */ +@@ -9794,9 +9792,7 @@ aarch64_process_components (sbitmap components, bool prologue_p) + rtx reg = gen_rtx_REG (mode, regno); + poly_int64 offset = frame.reg_offsetregno; + if (frame_pointer_needed) +- offset -= frame.below_hard_fp_saved_regs_size; +- else +- offset += frame.bytes_below_saved_regs; ++ offset -= frame.bytes_below_hard_fp; + + rtx addr = plus_constant (Pmode, ptr_reg, offset); + rtx mem = gen_frame_mem (mode, addr); +@@ -9848,9 +9844,7 @@ aarch64_process_components (sbitmap components, bool prologue_p) + /* REGNO2 can be saved/restored in a pair with REGNO. */ + rtx reg2 = gen_rtx_REG (mode, regno2); + if (frame_pointer_needed) +- offset2 -= frame.below_hard_fp_saved_regs_size; +- else +- offset2 += frame.bytes_below_saved_regs; ++ offset2 -= frame.bytes_below_hard_fp; + rtx addr2 = plus_constant (Pmode, ptr_reg, offset2); + rtx mem2 = gen_frame_mem (mode, addr2); + rtx set2 = prologue_p ? gen_rtx_SET (mem2, reg2) +@@ -9976,7 +9970,8 @@ aarch64_allocate_and_probe_stack_space (rtx temp1, rtx temp2, + if (final_adjustment_p + && known_eq (frame.below_hard_fp_saved_regs_size, 0)) + { +- poly_int64 lr_offset = frame.reg_offsetLR_REGNUM; ++ poly_int64 lr_offset = (frame.reg_offsetLR_REGNUM ++ - frame.bytes_below_saved_regs); + if (known_ge (lr_offset, 0)) + min_probe_threshold -= lr_offset.to_constant (); + else +diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h +index 39abca051..f340237d0 100644 +--- a/gcc/config/aarch64/aarch64.h ++++ b/gcc/config/aarch64/aarch64.h +@@ -790,6 +790,9 @@ extern enum aarch64_processor aarch64_tune; + #ifdef HAVE_POLY_INT_H + struct GTY (()) aarch64_frame + { ++ /* The offset from the bottom of the static frame (the bottom of the ++ outgoing arguments) of each register save slot, or -2 if no save is ++ needed. */ + poly_int64 reg_offsetLAST_SAVED_REGNUM + 1; + + /* The number of extra stack bytes taken up by register varargs. +-- +2.33.0 +
View file
_service:tar_scm:0194-Backport-SME-aarch64-Simplify-top-of-frame-allocatio.patch
Added
@@ -0,0 +1,58 @@ +From 4b8f3f194e68d0d411eaa6692699d8e5e2b4217d Mon Sep 17 00:00:00 2001 +From: Richard Sandiford <richard.sandiford@arm.com> +Date: Tue, 12 Sep 2023 16:05:09 +0100 +Subject: PATCH 095/157 BackportSME aarch64: Simplify top of frame + allocation + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=bc9dcdde80915d7585a21daa2b69f4adf4a1e3c1 + +After previous patches, it no longer really makes sense to allocate +the top of the frame in terms of varargs_and_saved_regs_size and +saved_regs_and_above. + +gcc/ + * config/aarch64/aarch64.cc (aarch64_layout_frame): Simplify + the allocation of the top of the frame. +--- + gcc/config/aarch64/aarch64.cc | 23 ++++++++--------------- + 1 file changed, 8 insertions(+), 15 deletions(-) + +diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc +index df8a83b04..3329aa364 100644 +--- a/gcc/config/aarch64/aarch64.cc ++++ b/gcc/config/aarch64/aarch64.cc +@@ -8925,23 +8925,16 @@ aarch64_layout_frame (void) + + frame.saved_regs_size = offset - frame.bytes_below_saved_regs; + +- poly_int64 varargs_and_saved_regs_size +- = frame.saved_regs_size + frame.saved_varargs_size; +- +- poly_int64 saved_regs_and_above +- = aligned_upper_bound (varargs_and_saved_regs_size +- + get_frame_size (), +- STACK_BOUNDARY / BITS_PER_UNIT); +- +- frame.bytes_above_hard_fp +- = saved_regs_and_above - frame.below_hard_fp_saved_regs_size; ++ offset += get_frame_size (); ++ offset = aligned_upper_bound (offset, STACK_BOUNDARY / BITS_PER_UNIT); ++ auto top_of_locals = offset; + +- /* Both these values are already aligned. */ +- gcc_assert (multiple_p (frame.bytes_below_saved_regs, +- STACK_BOUNDARY / BITS_PER_UNIT)); +- frame.frame_size = saved_regs_and_above + frame.bytes_below_saved_regs; ++ offset += frame.saved_varargs_size; ++ gcc_assert (multiple_p (offset, STACK_BOUNDARY / BITS_PER_UNIT)); ++ frame.frame_size = offset; + +- frame.bytes_above_locals = frame.saved_varargs_size; ++ frame.bytes_above_hard_fp = frame.frame_size - frame.bytes_below_hard_fp; ++ frame.bytes_above_locals = frame.frame_size - top_of_locals; + + frame.initial_adjust = 0; + frame.final_adjust = 0; +-- +2.33.0 +
View file
_service:tar_scm:0195-Backport-SME-aarch64-Minor-initial-adjustment-tweak.patch
Added
@@ -0,0 +1,41 @@ +From 0ab484f5de7d28c0a7166439d403e0983834b120 Mon Sep 17 00:00:00 2001 +From: Richard Sandiford <richard.sandiford@arm.com> +Date: Tue, 12 Sep 2023 16:05:10 +0100 +Subject: PATCH 096/157 BackportSME aarch64: Minor initial adjustment + tweak + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=ee5466ff4faca2076cc61f1f120d0b5062c8111c + +This patch just changes a calculation of initial_adjust +to one that makes it slightly more obvious that the total +adjustment is frame.frame_size. + +gcc/ + * config/aarch64/aarch64.cc (aarch64_layout_frame): Tweak + calculation of initial_adjust for frames in which all saves + are SVE saves. +--- + gcc/config/aarch64/aarch64.cc | 5 ++--- + 1 file changed, 2 insertions(+), 3 deletions(-) + +diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc +index 3329aa364..72604dd9d 100644 +--- a/gcc/config/aarch64/aarch64.cc ++++ b/gcc/config/aarch64/aarch64.cc +@@ -9014,11 +9014,10 @@ aarch64_layout_frame (void) + { + /* Frame in which all saves are SVE saves: + +- sub sp, sp, hard_fp_offset + below_hard_fp_saved_regs_size ++ sub sp, sp, frame_size - bytes_below_saved_regs + save SVE registers relative to SP + sub sp, sp, bytes_below_saved_regs */ +- frame.initial_adjust = (frame.bytes_above_hard_fp +- + frame.below_hard_fp_saved_regs_size); ++ frame.initial_adjust = frame.frame_size - frame.bytes_below_saved_regs; + frame.final_adjust = frame.bytes_below_saved_regs; + } + else if (frame.bytes_above_hard_fp.is_constant (&const_above_fp) +-- +2.33.0 +
View file
_service:tar_scm:0196-Backport-SME-aarch64-Tweak-stack-clash-boundary-cond.patch
Added
@@ -0,0 +1,128 @@ +From b4581d1e6a7b94dfbd58871dad51d3f12889081f Mon Sep 17 00:00:00 2001 +From: Richard Sandiford <richard.sandiford@arm.com> +Date: Tue, 12 Sep 2023 16:05:10 +0100 +Subject: PATCH 097/157 BackportSME aarch64: Tweak stack clash boundary + condition + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=1785b8077cc03214ebd1db953c870172fcf15966 + +The AArch64 ABI says that, when stack clash protection is used, +there can be a maximum of 1KiB of unprobed space at sp on entry +to a function. Therefore, we need to probe when allocating +>= guard_size - 1KiB of data (>= rather than >). This is what +GCC does. + +If an allocation is exactly guard_size bytes, it is enough to allocate +those bytes and probe once at offset 1024. It isn't possible to use a +single probe at any other offset: higher would conmplicate later code, +by leaving more unprobed space than usual, while lower would risk +leaving an entire page unprobed. For simplicity, the code probes all +allocations at offset 1024. + +Some register saves also act as probes. If we need to allocate +more space below the last such register save probe, we need to +probe the allocation if it is > 1KiB. Again, this allocation is +then sometimes (but not always) probed at offset 1024. This sort of +allocation is currently only used for outgoing arguments, which are +rarely this big. + +However, the code also probed if this final outgoing-arguments +allocation was == 1KiB, rather than just > 1KiB. This isn't +necessary, since the register save then probes at offset 1024 +as required. Continuing to probe allocations of exactly 1KiB +would complicate later patches. + +gcc/ + * config/aarch64/aarch64.cc (aarch64_allocate_and_probe_stack_space): + Don't probe final allocations that are exactly 1KiB in size (after + unprobed space above the final allocation has been deducted). + +gcc/testsuite/ + * gcc.target/aarch64/stack-check-prologue-17.c: New test. +--- + gcc/config/aarch64/aarch64.cc | 4 +- + .../aarch64/stack-check-prologue-17.c | 55 +++++++++++++++++++ + 2 files changed, 58 insertions(+), 1 deletion(-) + create mode 100644 gcc/testsuite/gcc.target/aarch64/stack-check-prologue-17.c + +diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc +index 72604dd9d..ba92a23a7 100644 +--- a/gcc/config/aarch64/aarch64.cc ++++ b/gcc/config/aarch64/aarch64.cc +@@ -9943,9 +9943,11 @@ aarch64_allocate_and_probe_stack_space (rtx temp1, rtx temp2, + HOST_WIDE_INT guard_size + = 1 << param_stack_clash_protection_guard_size; + HOST_WIDE_INT guard_used_by_caller = STACK_CLASH_CALLER_GUARD; ++ HOST_WIDE_INT byte_sp_alignment = STACK_BOUNDARY / BITS_PER_UNIT; ++ gcc_assert (multiple_p (poly_size, byte_sp_alignment)); + HOST_WIDE_INT min_probe_threshold + = (final_adjustment_p +- ? guard_used_by_caller ++ ? guard_used_by_caller + byte_sp_alignment + : guard_size - guard_used_by_caller); + /* When doing the final adjustment for the outgoing arguments, take into + account any unprobed space there is above the current SP. There are +diff --git a/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-17.c b/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-17.c +new file mode 100644 +index 000000000..0d8a25d73 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-17.c +@@ -0,0 +1,55 @@ ++/* { dg-options "-O2 -fstack-clash-protection -fomit-frame-pointer --param stack-clash-protection-guard-size=12" } */ ++/* { dg-final { check-function-bodies "**" "" } } */ ++ ++void f(int, ...); ++void g(); ++ ++/* ++** test1: ++** ... ++** str x30, \sp\ ++** sub sp, sp, #1024 ++** cbnz w0, .* ++** bl g ++** ... ++*/ ++int test1(int z) { ++ __uint128_t x = 0; ++ int y0x400; ++ if (z) ++ { ++ f(0, 0, 0, 0, 0, 0, 0, &y, ++ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, ++ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, ++ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, ++ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x); ++ } ++ g(); ++ return 1; ++} ++ ++/* ++** test2: ++** ... ++** str x30, \sp\ ++** sub sp, sp, #1040 ++** str xzr, \sp\ ++** cbnz w0, .* ++** bl g ++** ... ++*/ ++int test2(int z) { ++ __uint128_t x = 0; ++ int y0x400; ++ if (z) ++ { ++ f(0, 0, 0, 0, 0, 0, 0, &y, ++ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, ++ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, ++ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, ++ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, ++ x); ++ } ++ g(); ++ return 1; ++} +-- +2.33.0 +
View file
_service:tar_scm:0197-Backport-SME-aarch64-Put-LR-save-probe-in-first-16-b.patch
Added
@@ -0,0 +1,409 @@ +From ffd483dc6a2a4af495d56cf5ebdbbb3b9ca58820 Mon Sep 17 00:00:00 2001 +From: Richard Sandiford <richard.sandiford@arm.com> +Date: Tue, 12 Sep 2023 16:05:11 +0100 +Subject: PATCH 098/157 BackportSME aarch64: Put LR save probe in first + 16 bytes + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=fee0a18abfdd4874194abd149943fa7c77a29b7c + +-fstack-clash-protection uses the save of LR as a probe for the next +allocation. The next allocation could be: + +* another part of the static frame, e.g. when allocating SVE save slots + or outgoing arguments + +* an alloca in the same function + +* an allocation made by a callee function + +However, when -fomit-frame-pointer is used, the LR save slot is placed +above the other GPR save slots. It could therefore be up to 80 bytes +above the base of the GPR save area (which is also the hard fp address). + +aarch64_allocate_and_probe_stack_space took this into account when +deciding how much subsequent space could be allocated without needing +a probe. However, it interacted badly with: + + /* If doing a small final adjustment, we always probe at offset 0. + This is done to avoid issues when LR is not at position 0 or when + the final adjustment is smaller than the probing offset. */ + else if (final_adjustment_p && rounded_size == 0) + residual_probe_offset = 0; + +which forces any allocation that is smaller than the guard page size +to be probed at offset 0 rather than the usual offset 1024. It was +therefore possible to construct cases in which we had: + +* a probe using LR at SP + 80 bytes (or some other value >= 16) +* an allocation of the guard page size - 16 bytes +* a probe at SP + 0 + +which allocates guard page size + 64 consecutive unprobed bytes. + +This patch requires the LR probe to be in the first 16 bytes of the +save area when stack clash protection is active. Doing it +unconditionally would cause code-quality regressions. + +Putting LR before other registers prevents push/pop allocation +when shadow call stacks are enabled, since LR is restored +separately from the other callee-saved registers. + +The new comment doesn't say that the probe register is required +to be LR, since a later patch removes that restriction. + +gcc/ + * config/aarch64/aarch64.cc (aarch64_layout_frame): Ensure that + the LR save slot is in the first 16 bytes of the register save area. + Only form STP/LDP push/pop candidates if both registers are valid. + (aarch64_allocate_and_probe_stack_space): Remove workaround for + when LR was not in the first 16 bytes. + +gcc/testsuite/ + * gcc.target/aarch64/stack-check-prologue-18.c: New test. + * gcc.target/aarch64/stack-check-prologue-19.c: Likewise. + * gcc.target/aarch64/stack-check-prologue-20.c: Likewise. +--- + gcc/config/aarch64/aarch64.cc | 72 ++++++------- + .../aarch64/stack-check-prologue-18.c | 100 ++++++++++++++++++ + .../aarch64/stack-check-prologue-19.c | 100 ++++++++++++++++++ + .../aarch64/stack-check-prologue-20.c | 3 + + 4 files changed, 233 insertions(+), 42 deletions(-) + create mode 100644 gcc/testsuite/gcc.target/aarch64/stack-check-prologue-18.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/stack-check-prologue-19.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/stack-check-prologue-20.c + +diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc +index ba92a23a7..1ba4c2f89 100644 +--- a/gcc/config/aarch64/aarch64.cc ++++ b/gcc/config/aarch64/aarch64.cc +@@ -8873,26 +8873,34 @@ aarch64_layout_frame (void) + bool saves_below_hard_fp_p + = maybe_ne (frame.below_hard_fp_saved_regs_size, 0); + frame.bytes_below_hard_fp = offset; ++ ++ auto allocate_gpr_slot = &(unsigned int regno) ++ { ++ frame.reg_offsetregno = offset; ++ if (frame.wb_push_candidate1 == INVALID_REGNUM) ++ frame.wb_push_candidate1 = regno; ++ else if (frame.wb_push_candidate2 == INVALID_REGNUM) ++ frame.wb_push_candidate2 = regno; ++ offset += UNITS_PER_WORD; ++ }; ++ + if (frame.emit_frame_chain) + { + /* FP and LR are placed in the linkage record. */ +- frame.reg_offsetR29_REGNUM = offset; +- frame.wb_push_candidate1 = R29_REGNUM; +- frame.reg_offsetR30_REGNUM = offset + UNITS_PER_WORD; +- frame.wb_push_candidate2 = R30_REGNUM; +- offset += 2 * UNITS_PER_WORD; ++ allocate_gpr_slot (R29_REGNUM); ++ allocate_gpr_slot (R30_REGNUM); + } ++ else if (flag_stack_clash_protection ++ && known_eq (frame.reg_offsetR30_REGNUM, SLOT_REQUIRED)) ++ /* Put the LR save slot first, since it makes a good choice of probe ++ for stack clash purposes. The idea is that the link register usually ++ has to be saved before a call anyway, and so we lose little by ++ stopping it from being individually shrink-wrapped. */ ++ allocate_gpr_slot (R30_REGNUM); + + for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++) + if (known_eq (frame.reg_offsetregno, SLOT_REQUIRED)) +- { +- frame.reg_offsetregno = offset; +- if (frame.wb_push_candidate1 == INVALID_REGNUM) +- frame.wb_push_candidate1 = regno; +- else if (frame.wb_push_candidate2 == INVALID_REGNUM) +- frame.wb_push_candidate2 = regno; +- offset += UNITS_PER_WORD; +- } ++ allocate_gpr_slot (regno); + + poly_int64 max_int_offset = offset; + offset = aligned_upper_bound (offset, STACK_BOUNDARY / BITS_PER_UNIT); +@@ -8970,10 +8978,13 @@ aarch64_layout_frame (void) + max_push_offset to 0, because no registers are popped at this time, + so callee_adjust cannot be adjusted. */ + HOST_WIDE_INT max_push_offset = 0; +- if (frame.wb_pop_candidate2 != INVALID_REGNUM) +- max_push_offset = 512; +- else if (frame.wb_pop_candidate1 != INVALID_REGNUM) +- max_push_offset = 256; ++ if (frame.wb_pop_candidate1 != INVALID_REGNUM) ++ { ++ if (frame.wb_pop_candidate2 != INVALID_REGNUM) ++ max_push_offset = 512; ++ else ++ max_push_offset = 256; ++ } + + HOST_WIDE_INT const_size, const_below_saved_regs, const_above_fp; + HOST_WIDE_INT const_saved_regs_size; +@@ -9949,29 +9960,6 @@ aarch64_allocate_and_probe_stack_space (rtx temp1, rtx temp2, + = (final_adjustment_p + ? guard_used_by_caller + byte_sp_alignment + : guard_size - guard_used_by_caller); +- /* When doing the final adjustment for the outgoing arguments, take into +- account any unprobed space there is above the current SP. There are +- two cases: +- +- - When saving SVE registers below the hard frame pointer, we force +- the lowest save to take place in the prologue before doing the final +- adjustment (i.e. we don't allow the save to be shrink-wrapped). +- This acts as a probe at SP, so there is no unprobed space. +- +- - When there are no SVE register saves, we use the store of the link +- register as a probe. We can't assume that LR was saved at position 0 +- though, so treat any space below it as unprobed. */ +- if (final_adjustment_p +- && known_eq (frame.below_hard_fp_saved_regs_size, 0)) +- { +- poly_int64 lr_offset = (frame.reg_offsetLR_REGNUM +- - frame.bytes_below_saved_regs); +- if (known_ge (lr_offset, 0)) +- min_probe_threshold -= lr_offset.to_constant (); +- else +- gcc_assert (!flag_stack_clash_protection || known_eq (poly_size, 0)); +- } +- + poly_int64 frame_size = frame.frame_size; + + /* We should always have a positive probe threshold. */ +@@ -10151,8 +10139,8 @@ aarch64_allocate_and_probe_stack_space (rtx temp1, rtx temp2, + if (final_adjustment_p && rounded_size != 0) + min_probe_threshold = 0; + /* If doing a small final adjustment, we always probe at offset 0. +- This is done to avoid issues when LR is not at position 0 or when +- the final adjustment is smaller than the probing offset. */ ++ This is done to avoid issues when the final adjustment is smaller ++ than the probing offset. */ + else if (final_adjustment_p && rounded_size == 0) + residual_probe_offset = 0; + +diff --git a/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-18.c b/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-18.c +new file mode 100644 +index 000000000..82447d20f +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-18.c +@@ -0,0 +1,100 @@ ++/* { dg-options "-O2 -fstack-clash-protection -fomit-frame-pointer --param stack-clash-protection-guard-size=12" } */ ++/* { dg-final { check-function-bodies "**" "" } } */ ++ ++void f(int, ...); ++void g(); ++ ++/* ++** test1: ++** ... ++** str x30, \sp\ ++** sub sp, sp, #4064 ++** str xzr, \sp\ ++** cbnz w0, .* ++** bl g ++** ... ++** str x26, \sp, #?4128\ ++** ... ++*/ ++int test1(int z) { ++ __uint128_t x = 0; ++ int y0x400; ++ if (z) ++ { ++ asm volatile ("" ::: ++ "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26"); ++ f(0, 0, 0, 0, 0, 0, 0, &y, ++ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, ++ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, ++ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, ++ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, ++ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, ++ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, ++ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, ++ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, ++ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, ++ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, ++ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, ++ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, ++ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, ++ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, ++ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, ++ x, x, x, x, x, x, x, x, x, x, x, x, x, x); ++ } ++ g(); ++ return 1; ++} ++ ++/* ++** test2: ++** ... ++** str x30, \sp\ ++** sub sp, sp, #1040 ++** str xzr, \sp\ ++** cbnz w0, .* ++** bl g ++** ... ++*/ ++int test2(int z) { ++ __uint128_t x = 0; ++ int y0x400; ++ if (z) ++ { ++ asm volatile ("" ::: ++ "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26"); ++ f(0, 0, 0, 0, 0, 0, 0, &y, ++ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, ++ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, ++ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, ++ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, ++ x); ++ } ++ g(); ++ return 1; ++} ++ ++/* ++** test3: ++** ... ++** str x30, \sp\ ++** sub sp, sp, #1024 ++** cbnz w0, .* ++** bl g ++** ... ++*/ ++int test3(int z) { ++ __uint128_t x = 0; ++ int y0x400; ++ if (z) ++ { ++ asm volatile ("" ::: ++ "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26"); ++ f(0, 0, 0, 0, 0, 0, 0, &y, ++ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, ++ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, ++ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, ++ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x); ++ } ++ g(); ++ return 1; ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-19.c b/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-19.c +new file mode 100644 +index 000000000..73ac3e4e4 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-19.c +@@ -0,0 +1,100 @@ ++/* { dg-options "-O2 -fstack-clash-protection -fomit-frame-pointer --param stack-clash-protection-guard-size=12 -fsanitize=shadow-call-stack -ffixed-x18" } */ ++/* { dg-final { check-function-bodies "**" "" } } */ ++ ++void f(int, ...); ++void g(); ++ ++/* ++** test1: ++** ... ++** str x30, \sp\ ++** sub sp, sp, #4064 ++** str xzr, \sp\ ++** cbnz w0, .* ++** bl g ++** ... ++** str x26, \sp, #?4128\ ++** ... ++*/ ++int test1(int z) { ++ __uint128_t x = 0; ++ int y0x400; ++ if (z) ++ { ++ asm volatile ("" ::: ++ "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26"); ++ f(0, 0, 0, 0, 0, 0, 0, &y, ++ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, ++ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, ++ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, ++ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, ++ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, ++ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, ++ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, ++ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, ++ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, ++ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, ++ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, ++ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, ++ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, ++ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, ++ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, ++ x, x, x, x, x, x, x, x, x, x, x, x, x, x); ++ } ++ g(); ++ return 1; ++} ++ ++/* ++** test2: ++** ... ++** str x30, \sp\ ++** sub sp, sp, #1040 ++** str xzr, \sp\ ++** cbnz w0, .* ++** bl g ++** ... ++*/ ++int test2(int z) { ++ __uint128_t x = 0; ++ int y0x400; ++ if (z) ++ { ++ asm volatile ("" ::: ++ "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26"); ++ f(0, 0, 0, 0, 0, 0, 0, &y, ++ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, ++ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, ++ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, ++ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, ++ x); ++ } ++ g(); ++ return 1; ++} ++ ++/* ++** test3: ++** ... ++** str x30, \sp\ ++** sub sp, sp, #1024 ++** cbnz w0, .* ++** bl g ++** ... ++*/ ++int test3(int z) { ++ __uint128_t x = 0; ++ int y0x400; ++ if (z) ++ { ++ asm volatile ("" ::: ++ "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26"); ++ f(0, 0, 0, 0, 0, 0, 0, &y, ++ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, ++ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, ++ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, ++ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x); ++ } ++ g(); ++ return 1; ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-20.c b/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-20.c +new file mode 100644 +index 000000000..690aae8df +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-20.c +@@ -0,0 +1,3 @@ ++/* { dg-options "-O2 -fstack-protector-all -fstack-clash-protection -fomit-frame-pointer --param stack-clash-protection-guard-size=12 -fsanitize=shadow-call-stack -ffixed-x18" } */ ++ ++#include "stack-check-prologue-19.c" +-- +2.33.0 +
View file
_service:tar_scm:0198-Backport-SME-aarch64-Simplify-probe-of-final-frame-a.patch
Added
@@ -0,0 +1,126 @@ +From c12de24e57cbe26c224bab39698736fa4004f8ff Mon Sep 17 00:00:00 2001 +From: Richard Sandiford <richard.sandiford@arm.com> +Date: Tue, 12 Sep 2023 16:05:11 +0100 +Subject: PATCH 099/157 BackportSME aarch64: Simplify probe of final + frame allocation + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=f87028a905059573ae7fdfe526d034fd70b3bcae + +Previous patches ensured that the final frame allocation only needs +a probe when the size is strictly greater than 1KiB. It's therefore +safe to use the normal 1024 probe offset in all cases. + +The main motivation for doing this is to simplify the code and +remove the number of special cases. + +gcc/ + * config/aarch64/aarch64.cc (aarch64_allocate_and_probe_stack_space): + Always probe the residual allocation at offset 1024, asserting + that that is in range. + +gcc/testsuite/ + * gcc.target/aarch64/stack-check-prologue-17.c: Expect the probe + to be at offset 1024 rather than offset 0. + * gcc.target/aarch64/stack-check-prologue-18.c: Likewise. + * gcc.target/aarch64/stack-check-prologue-19.c: Likewise. +--- + gcc/config/aarch64/aarch64.cc | 12 ++++-------- + .../gcc.target/aarch64/stack-check-prologue-17.c | 2 +- + .../gcc.target/aarch64/stack-check-prologue-18.c | 4 ++-- + .../gcc.target/aarch64/stack-check-prologue-19.c | 4 ++-- + 4 files changed, 9 insertions(+), 13 deletions(-) + +diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc +index 1ba4c2f89..6d835dc8f 100644 +--- a/gcc/config/aarch64/aarch64.cc ++++ b/gcc/config/aarch64/aarch64.cc +@@ -10133,16 +10133,12 @@ aarch64_allocate_and_probe_stack_space (rtx temp1, rtx temp2, + are still safe. */ + if (residual) + { +- HOST_WIDE_INT residual_probe_offset = guard_used_by_caller; ++ gcc_assert (guard_used_by_caller + byte_sp_alignment <= size); ++ + /* If we're doing final adjustments, and we've done any full page + allocations then any residual needs to be probed. */ + if (final_adjustment_p && rounded_size != 0) + min_probe_threshold = 0; +- /* If doing a small final adjustment, we always probe at offset 0. +- This is done to avoid issues when the final adjustment is smaller +- than the probing offset. */ +- else if (final_adjustment_p && rounded_size == 0) +- residual_probe_offset = 0; + + aarch64_sub_sp (temp1, temp2, residual, frame_related_p); + if (residual >= min_probe_threshold) +@@ -10153,8 +10149,8 @@ aarch64_allocate_and_probe_stack_space (rtx temp1, rtx temp2, + HOST_WIDE_INT_PRINT_DEC " bytes, probing will be required." + "\n", residual); + +- emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx, +- residual_probe_offset)); ++ emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx, ++ guard_used_by_caller)); + emit_insn (gen_blockage ()); + } + } +diff --git a/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-17.c b/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-17.c +index 0d8a25d73..f0ec13897 100644 +--- a/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-17.c ++++ b/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-17.c +@@ -33,7 +33,7 @@ int test1(int z) { + ** ... + ** str x30, \sp\ + ** sub sp, sp, #1040 +-** str xzr, \sp\ ++** str xzr, \sp, #?1024\ + ** cbnz w0, .* + ** bl g + ** ... +diff --git a/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-18.c b/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-18.c +index 82447d20f..6383bec5e 100644 +--- a/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-18.c ++++ b/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-18.c +@@ -9,7 +9,7 @@ void g(); + ** ... + ** str x30, \sp\ + ** sub sp, sp, #4064 +-** str xzr, \sp\ ++** str xzr, \sp, #?1024\ + ** cbnz w0, .* + ** bl g + ** ... +@@ -50,7 +50,7 @@ int test1(int z) { + ** ... + ** str x30, \sp\ + ** sub sp, sp, #1040 +-** str xzr, \sp\ ++** str xzr, \sp, #?1024\ + ** cbnz w0, .* + ** bl g + ** ... +diff --git a/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-19.c b/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-19.c +index 73ac3e4e4..562039b5e 100644 +--- a/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-19.c ++++ b/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-19.c +@@ -9,7 +9,7 @@ void g(); + ** ... + ** str x30, \sp\ + ** sub sp, sp, #4064 +-** str xzr, \sp\ ++** str xzr, \sp, #?1024\ + ** cbnz w0, .* + ** bl g + ** ... +@@ -50,7 +50,7 @@ int test1(int z) { + ** ... + ** str x30, \sp\ + ** sub sp, sp, #1040 +-** str xzr, \sp\ ++** str xzr, \sp, #?1024\ + ** cbnz w0, .* + ** bl g + ** ... +-- +2.33.0 +
View file
_service:tar_scm:0199-Backport-SME-aarch64-Explicitly-record-probe-registe.patch
Added
@@ -0,0 +1,280 @@ +From 1bf3e9a04411b483c89d2e2f9096ab66800c3b3f Mon Sep 17 00:00:00 2001 +From: Richard Sandiford <richard.sandiford@arm.com> +Date: Tue, 12 Sep 2023 16:05:12 +0100 +Subject: PATCH 100/157 BackportSME aarch64: Explicitly record probe + registers in frame info + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=5ce957484eea15f09503fcffa4dfdfb70ad82f8f + +The stack frame is currently divided into three areas: + +A: the area above the hard frame pointer +B: the SVE saves below the hard frame pointer +C: the outgoing arguments + +If the stack frame is allocated in one chunk, the allocation needs a +probe if the frame size is >= guard_size - 1KiB. In addition, if the +function is not a leaf function, it must probe an address no more than +1KiB above the outgoing SP. We ensured the second condition by + +(1) using single-chunk allocations for non-leaf functions only if + the link register save slot is within 512 bytes of the bottom + of the frame; and + +(2) using the link register save as a probe (meaning, for instance, + that it can't be individually shrink wrapped) + +If instead the stack is allocated in multiple chunks, then: + +* an allocation involving only the outgoing arguments (C above) requires + a probe if the allocation size is > 1KiB + +* any other allocation requires a probe if the allocation size + is >= guard_size - 1KiB + +* second and subsequent allocations require the previous allocation + to probe at the bottom of the allocated area, regardless of the size + of that previous allocation + +The final point means that, unlike for single allocations, +it can be necessary to have both a non-SVE register probe and +an SVE register probe. For example: + +* allocate A, probe using a non-SVE register save +* allocate B, probe using an SVE register save +* allocate C + +The non-SVE register used in this case was again the link register. +It was previously used even if the link register save slot was some +bytes above the bottom of the non-SVE register saves, but an earlier +patch avoided that by putting the link register save slot first. + +As a belt-and-braces fix, this patch explicitly records which +probe registers we're using and allows the non-SVE probe to be +whichever register comes first (as for SVE). + +The patch also avoids unnecessary probes in sve/pcs/stack_clash_3.c. + +gcc/ + * config/aarch64/aarch64.h (aarch64_frame::sve_save_and_probe) + (aarch64_frame::hard_fp_save_and_probe): New fields. + * config/aarch64/aarch64.cc (aarch64_layout_frame): Initialize them. + Rather than asserting that a leaf function saves LR, instead assert + that a leaf function saves something. + (aarch64_get_separate_components): Prevent the chosen probe + registers from being individually shrink-wrapped. + (aarch64_allocate_and_probe_stack_space): Remove workaround for + probe registers that aren't at the bottom of the previous allocation. + +gcc/testsuite/ + * gcc.target/aarch64/sve/pcs/stack_clash_3.c: Avoid redundant probes. +--- + gcc/config/aarch64/aarch64.cc | 68 +++++++++++++++---- + gcc/config/aarch64/aarch64.h | 8 +++ + .../aarch64/sve/pcs/stack_clash_3.c | 6 +- + 3 files changed, 64 insertions(+), 18 deletions(-) + +diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc +index 6d835dc8f..dd80ceba8 100644 +--- a/gcc/config/aarch64/aarch64.cc ++++ b/gcc/config/aarch64/aarch64.cc +@@ -8810,15 +8810,11 @@ aarch64_layout_frame (void) + && !crtl->abi->clobbers_full_reg_p (regno)) + frame.reg_offsetregno = SLOT_REQUIRED; + +- /* With stack-clash, LR must be saved in non-leaf functions. The saving of +- LR counts as an implicit probe which allows us to maintain the invariant +- described in the comment at expand_prologue. */ +- gcc_assert (crtl->is_leaf +- || maybe_ne (frame.reg_offsetR30_REGNUM, SLOT_NOT_REQUIRED)); + + poly_int64 offset = crtl->outgoing_args_size; + gcc_assert (multiple_p (offset, STACK_BOUNDARY / BITS_PER_UNIT)); + frame.bytes_below_saved_regs = offset; ++ frame.sve_save_and_probe = INVALID_REGNUM; + + /* Now assign stack slots for the registers. Start with the predicate + registers, since predicate LDR and STR have a relatively small +@@ -8826,6 +8822,8 @@ aarch64_layout_frame (void) + for (regno = P0_REGNUM; regno <= P15_REGNUM; regno++) + if (known_eq (frame.reg_offsetregno, SLOT_REQUIRED)) + { ++ if (frame.sve_save_and_probe == INVALID_REGNUM) ++ frame.sve_save_and_probe = regno; + frame.reg_offsetregno = offset; + offset += BYTES_PER_SVE_PRED; + } +@@ -8863,6 +8861,8 @@ aarch64_layout_frame (void) + for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++) + if (known_eq (frame.reg_offsetregno, SLOT_REQUIRED)) + { ++ if (frame.sve_save_and_probe == INVALID_REGNUM) ++ frame.sve_save_and_probe = regno; + frame.reg_offsetregno = offset; + offset += vector_save_size; + } +@@ -8872,10 +8872,18 @@ aarch64_layout_frame (void) + frame.below_hard_fp_saved_regs_size = offset - frame.bytes_below_saved_regs; + bool saves_below_hard_fp_p + = maybe_ne (frame.below_hard_fp_saved_regs_size, 0); ++ gcc_assert (!saves_below_hard_fp_p ++ || (frame.sve_save_and_probe != INVALID_REGNUM ++ && known_eq (frame.reg_offsetframe.sve_save_and_probe, ++ frame.bytes_below_saved_regs))); ++ + frame.bytes_below_hard_fp = offset; ++ frame.hard_fp_save_and_probe = INVALID_REGNUM; + + auto allocate_gpr_slot = &(unsigned int regno) + { ++ if (frame.hard_fp_save_and_probe == INVALID_REGNUM) ++ frame.hard_fp_save_and_probe = regno; + frame.reg_offsetregno = offset; + if (frame.wb_push_candidate1 == INVALID_REGNUM) + frame.wb_push_candidate1 = regno; +@@ -8909,6 +8917,8 @@ aarch64_layout_frame (void) + for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++) + if (known_eq (frame.reg_offsetregno, SLOT_REQUIRED)) + { ++ if (frame.hard_fp_save_and_probe == INVALID_REGNUM) ++ frame.hard_fp_save_and_probe = regno; + /* If there is an alignment gap between integer and fp callee-saves, + allocate the last fp register to it if possible. */ + if (regno == last_fp_reg +@@ -8932,6 +8942,17 @@ aarch64_layout_frame (void) + offset = aligned_upper_bound (offset, STACK_BOUNDARY / BITS_PER_UNIT); + + frame.saved_regs_size = offset - frame.bytes_below_saved_regs; ++ gcc_assert (known_eq (frame.saved_regs_size, ++ frame.below_hard_fp_saved_regs_size) ++ || (frame.hard_fp_save_and_probe != INVALID_REGNUM ++ && known_eq (frame.reg_offsetframe.hard_fp_save_and_probe, ++ frame.bytes_below_hard_fp))); ++ ++ /* With stack-clash, a register must be saved in non-leaf functions. ++ The saving of the bottommost register counts as an implicit probe, ++ which allows us to maintain the invariant described in the comment ++ at expand_prologue. */ ++ gcc_assert (crtl->is_leaf || maybe_ne (frame.saved_regs_size, 0)); + + offset += get_frame_size (); + offset = aligned_upper_bound (offset, STACK_BOUNDARY / BITS_PER_UNIT); +@@ -9062,6 +9083,25 @@ aarch64_layout_frame (void) + frame.final_adjust = frame.bytes_below_saved_regs; + } + ++ /* The frame is allocated in pieces, with each non-final piece ++ including a register save at offset 0 that acts as a probe for ++ the following piece. In addition, the save of the bottommost register ++ acts as a probe for callees and allocas. Roll back any probes that ++ aren't needed. ++ ++ A probe isn't needed if it is associated with the final allocation ++ (including callees and allocas) that happens before the epilogue is ++ executed. */ ++ if (crtl->is_leaf ++ && !cfun->calls_alloca ++ && known_eq (frame.final_adjust, 0)) ++ { ++ if (maybe_ne (frame.sve_callee_adjust, 0)) ++ frame.sve_save_and_probe = INVALID_REGNUM; ++ else ++ frame.hard_fp_save_and_probe = INVALID_REGNUM; ++ } ++ + /* Make sure the individual adjustments add up to the full frame size. */ + gcc_assert (known_eq (frame.initial_adjust + + frame.callee_adjust +@@ -9639,13 +9679,6 @@ aarch64_get_separate_components (void) + + poly_int64 offset = frame.reg_offsetregno; + +- /* If the register is saved in the first SVE save slot, we use +- it as a stack probe for -fstack-clash-protection. */ +- if (flag_stack_clash_protection +- && maybe_ne (frame.below_hard_fp_saved_regs_size, 0) +- && known_eq (offset, frame.bytes_below_saved_regs)) +- continue; +- + /* Get the offset relative to the register we'll use. */ + if (frame_pointer_needed) + offset -= frame.bytes_below_hard_fp; +@@ -9680,6 +9713,13 @@ aarch64_get_separate_components (void) + + bitmap_clear_bit (components, LR_REGNUM); + bitmap_clear_bit (components, SP_REGNUM); ++ if (flag_stack_clash_protection) ++ { ++ if (frame.sve_save_and_probe != INVALID_REGNUM) ++ bitmap_clear_bit (components, frame.sve_save_and_probe); ++ if (frame.hard_fp_save_and_probe != INVALID_REGNUM) ++ bitmap_clear_bit (components, frame.hard_fp_save_and_probe); ++ } + + return components; + } +@@ -10226,8 +10266,8 @@ aarch64_epilogue_uses (int regno) + When probing is needed, we emit a probe at the start of the prologue + and every PARAM_STACK_CLASH_PROTECTION_GUARD_SIZE bytes thereafter. + +- We have to track how much space has been allocated and the only stores +- to the stack we track as implicit probes are the FP/LR stores. ++ We can also use register saves as probes. These are stored in ++ sve_save_and_probe and hard_fp_save_and_probe. + + For outgoing arguments we probe if the size is larger than 1KB, such that + the ABI specified buffer is maintained for the next callee. +diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h +index f340237d0..af480d9e8 100644 +--- a/gcc/config/aarch64/aarch64.h ++++ b/gcc/config/aarch64/aarch64.h +@@ -887,6 +887,14 @@ struct GTY (()) aarch64_frame + This is the register they should use. */ + unsigned spare_pred_reg; + ++ /* An SVE register that is saved below the hard frame pointer and that acts ++ as a probe for later allocations, or INVALID_REGNUM if none. */ ++ unsigned sve_save_and_probe; ++ ++ /* A register that is saved at the hard frame pointer and that acts ++ as a probe for later allocations, or INVALID_REGNUM if none. */ ++ unsigned hard_fp_save_and_probe; ++ + bool laid_out; + + /* True if shadow call stack should be enabled for the current function. */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/stack_clash_3.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/stack_clash_3.c +index 3e01ec36c..3530a0d50 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/pcs/stack_clash_3.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/stack_clash_3.c +@@ -11,11 +11,10 @@ + ** mov x11, sp + ** ... + ** sub sp, sp, x13 +-** str p4, \sp\ + ** cbz w0, ^\n* ++** str p4, \sp\ + ** ... + ** ptrue p0\.b, all +-** ldr p4, \sp\ + ** addvl sp, sp, #1 + ** ldr x24, \sp\, 32 + ** ret +@@ -39,13 +38,12 @@ test_1 (int n) + ** mov x11, sp + ** ... + ** sub sp, sp, x13 +-** str p4, \sp\ + ** cbz w0, ^\n* ++** str p4, \sp\ + ** str p5, \sp, #1, mul vl\ + ** str p6, \sp, #2, mul vl\ + ** ... + ** ptrue p0\.b, all +-** ldr p4, \sp\ + ** addvl sp, sp, #1 + ** ldr x24, \sp\, 32 + ** ret +-- +2.33.0 +
View file
_service:tar_scm:0200-Backport-SME-aarch64-Remove-below_hard_fp_saved_regs.patch
Added
@@ -0,0 +1,160 @@ +From 5c33afb2173f68a0166bd180977cd1e547df22dc Mon Sep 17 00:00:00 2001 +From: Richard Sandiford <richard.sandiford@arm.com> +Date: Tue, 12 Sep 2023 16:05:12 +0100 +Subject: PATCH 101/157 BackportSME aarch64: Remove + below_hard_fp_saved_regs_size + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=2abfc867d3ba025ac2146bb21b92a93e6325dec1 + +After previous patches, it's no longer necessary to store +saved_regs_size and below_hard_fp_saved_regs_size in the frame info. +All measurements instead use the top or bottom of the frame as +reference points. + +gcc/ + * config/aarch64/aarch64.h (aarch64_frame::saved_regs_size) + (aarch64_frame::below_hard_fp_saved_regs_size): Delete. + * config/aarch64/aarch64.cc (aarch64_layout_frame): Update accordingly. +--- + gcc/config/aarch64/aarch64.cc | 45 ++++++++++++++++------------------- + gcc/config/aarch64/aarch64.h | 7 ------ + 2 files changed, 21 insertions(+), 31 deletions(-) + +diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc +index dd80ceba8..0894ed325 100644 +--- a/gcc/config/aarch64/aarch64.cc ++++ b/gcc/config/aarch64/aarch64.cc +@@ -8869,9 +8869,8 @@ aarch64_layout_frame (void) + + /* OFFSET is now the offset of the hard frame pointer from the bottom + of the callee save area. */ +- frame.below_hard_fp_saved_regs_size = offset - frame.bytes_below_saved_regs; +- bool saves_below_hard_fp_p +- = maybe_ne (frame.below_hard_fp_saved_regs_size, 0); ++ auto below_hard_fp_saved_regs_size = offset - frame.bytes_below_saved_regs; ++ bool saves_below_hard_fp_p = maybe_ne (below_hard_fp_saved_regs_size, 0); + gcc_assert (!saves_below_hard_fp_p + || (frame.sve_save_and_probe != INVALID_REGNUM + && known_eq (frame.reg_offsetframe.sve_save_and_probe, +@@ -8941,9 +8940,8 @@ aarch64_layout_frame (void) + + offset = aligned_upper_bound (offset, STACK_BOUNDARY / BITS_PER_UNIT); + +- frame.saved_regs_size = offset - frame.bytes_below_saved_regs; +- gcc_assert (known_eq (frame.saved_regs_size, +- frame.below_hard_fp_saved_regs_size) ++ auto saved_regs_size = offset - frame.bytes_below_saved_regs; ++ gcc_assert (known_eq (saved_regs_size, below_hard_fp_saved_regs_size) + || (frame.hard_fp_save_and_probe != INVALID_REGNUM + && known_eq (frame.reg_offsetframe.hard_fp_save_and_probe, + frame.bytes_below_hard_fp))); +@@ -8952,7 +8950,7 @@ aarch64_layout_frame (void) + The saving of the bottommost register counts as an implicit probe, + which allows us to maintain the invariant described in the comment + at expand_prologue. */ +- gcc_assert (crtl->is_leaf || maybe_ne (frame.saved_regs_size, 0)); ++ gcc_assert (crtl->is_leaf || maybe_ne (saved_regs_size, 0)); + + offset += get_frame_size (); + offset = aligned_upper_bound (offset, STACK_BOUNDARY / BITS_PER_UNIT); +@@ -9009,7 +9007,7 @@ aarch64_layout_frame (void) + + HOST_WIDE_INT const_size, const_below_saved_regs, const_above_fp; + HOST_WIDE_INT const_saved_regs_size; +- if (known_eq (frame.saved_regs_size, 0)) ++ if (known_eq (saved_regs_size, 0)) + frame.initial_adjust = frame.frame_size; + else if (frame.frame_size.is_constant (&const_size) + && const_size < max_push_offset +@@ -9022,7 +9020,7 @@ aarch64_layout_frame (void) + frame.callee_adjust = const_size; + } + else if (frame.bytes_below_saved_regs.is_constant (&const_below_saved_regs) +- && frame.saved_regs_size.is_constant (&const_saved_regs_size) ++ && saved_regs_size.is_constant (&const_saved_regs_size) + && const_below_saved_regs + const_saved_regs_size < 512 + /* We could handle this case even with data below the saved + registers, provided that that data left us with valid offsets +@@ -9041,8 +9039,7 @@ aarch64_layout_frame (void) + frame.initial_adjust = frame.frame_size; + } + else if (saves_below_hard_fp_p +- && known_eq (frame.saved_regs_size, +- frame.below_hard_fp_saved_regs_size)) ++ && known_eq (saved_regs_size, below_hard_fp_saved_regs_size)) + { + /* Frame in which all saves are SVE saves: + +@@ -9064,7 +9061,7 @@ aarch64_layout_frame (void) + save SVE registers relative to SP + sub sp, sp, bytes_below_saved_regs */ + frame.callee_adjust = const_above_fp; +- frame.sve_callee_adjust = frame.below_hard_fp_saved_regs_size; ++ frame.sve_callee_adjust = below_hard_fp_saved_regs_size; + frame.final_adjust = frame.bytes_below_saved_regs; + } + else +@@ -9079,7 +9076,7 @@ aarch64_layout_frame (void) + save SVE registers relative to SP + sub sp, sp, bytes_below_saved_regs */ + frame.initial_adjust = frame.bytes_above_hard_fp; +- frame.sve_callee_adjust = frame.below_hard_fp_saved_regs_size; ++ frame.sve_callee_adjust = below_hard_fp_saved_regs_size; + frame.final_adjust = frame.bytes_below_saved_regs; + } + +@@ -10231,17 +10228,17 @@ aarch64_epilogue_uses (int regno) + | local variables | <-- frame_pointer_rtx + | | + +-------------------------------+ +- | padding | \ +- +-------------------------------+ | +- | callee-saved registers | | frame.saved_regs_size +- +-------------------------------+ | +- | LR' | | +- +-------------------------------+ | +- | FP' | | +- +-------------------------------+ |<- hard_frame_pointer_rtx (aligned) +- | SVE vector registers | | \ +- +-------------------------------+ | | below_hard_fp_saved_regs_size +- | SVE predicate registers | / / ++ | padding | ++ +-------------------------------+ ++ | callee-saved registers | ++ +-------------------------------+ ++ | LR' | ++ +-------------------------------+ ++ | FP' | ++ +-------------------------------+ <-- hard_frame_pointer_rtx (aligned) ++ | SVE vector registers | ++ +-------------------------------+ ++ | SVE predicate registers | + +-------------------------------+ + | dynamic allocation | + +-------------------------------+ +diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h +index af480d9e8..292ef2eec 100644 +--- a/gcc/config/aarch64/aarch64.h ++++ b/gcc/config/aarch64/aarch64.h +@@ -801,18 +801,11 @@ struct GTY (()) aarch64_frame + STACK_BOUNDARY. */ + HOST_WIDE_INT saved_varargs_size; + +- /* The size of the callee-save registers with a slot in REG_OFFSET. */ +- poly_int64 saved_regs_size; +- + /* The number of bytes between the bottom of the static frame (the bottom + of the outgoing arguments) and the bottom of the register save area. + This value is always a multiple of STACK_BOUNDARY. */ + poly_int64 bytes_below_saved_regs; + +- /* The size of the callee-save registers with a slot in REG_OFFSET that +- are saved below the hard frame pointer. */ +- poly_int64 below_hard_fp_saved_regs_size; +- + /* The number of bytes between the bottom of the static frame (the bottom + of the outgoing arguments) and the hard frame pointer. This value is + always a multiple of STACK_BOUNDARY. */ +-- +2.33.0 +
View file
_service:tar_scm:0201-Backport-SME-aarch64-Make-stack-smash-canary-protect.patch
Added
@@ -0,0 +1,301 @@ +From b225443d64481bc225e29bf119d99b719c69cd3c Mon Sep 17 00:00:00 2001 +From: Richard Sandiford <richard.sandiford@arm.com> +Date: Tue, 12 Sep 2023 16:05:13 +0100 +Subject: PATCH 102/157 BackportSME aarch64: Make stack smash canary + protect saved registers + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=3e4afea3b192c205c9a9da99f4cac65c68087eaf + +AArch64 normally puts the saved registers near the bottom of the frame, +immediately above any dynamic allocations. But this means that a +stack-smash attack on those dynamic allocations could overwrite the +saved registers without needing to reach as far as the stack smash +canary. + +The same thing could also happen for variable-sized arguments that are +passed by value, since those are allocated before a call and popped on +return. + +This patch avoids that by putting the locals (and thus the canary) below +the saved registers when stack smash protection is active. + +The patch fixes CVE-2023-4039. + +gcc/ + * config/aarch64/aarch64.cc (aarch64_save_regs_above_locals_p): + New function. + (aarch64_layout_frame): Use it to decide whether locals should + go above or below the saved registers. + (aarch64_expand_prologue): Update stack layout comment. + Emit a stack tie after the final adjustment. + +gcc/testsuite/ + * gcc.target/aarch64/stack-protector-8.c: New test. + * gcc.target/aarch64/stack-protector-9.c: Likewise. +--- + gcc/config/aarch64/aarch64.cc | 46 +++++++-- + .../gcc.target/aarch64/stack-protector-8.c | 95 +++++++++++++++++++ + .../gcc.target/aarch64/stack-protector-9.c | 33 +++++++ + 3 files changed, 168 insertions(+), 6 deletions(-) + create mode 100644 gcc/testsuite/gcc.target/aarch64/stack-protector-8.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/stack-protector-9.c + +diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc +index 0894ed325..8d4dd2891 100644 +--- a/gcc/config/aarch64/aarch64.cc ++++ b/gcc/config/aarch64/aarch64.cc +@@ -8733,6 +8733,20 @@ aarch64_needs_frame_chain (void) + return aarch64_use_frame_pointer; + } + ++/* Return true if the current function should save registers above ++ the locals area, rather than below it. */ ++ ++static bool ++aarch64_save_regs_above_locals_p () ++{ ++ /* When using stack smash protection, make sure that the canary slot ++ comes between the locals and the saved registers. Otherwise, ++ it would be possible for a carefully sized smash attack to change ++ the saved registers (particularly LR and FP) without reaching the ++ canary. */ ++ return crtl->stack_protect_guard; ++} ++ + /* Mark the registers that need to be saved by the callee and calculate + the size of the callee-saved registers area and frame record (both FP + and LR may be omitted). */ +@@ -8744,6 +8758,7 @@ aarch64_layout_frame (void) + poly_int64 vector_save_size = GET_MODE_SIZE (vector_save_mode); + bool frame_related_fp_reg_p = false; + aarch64_frame &frame = cfun->machine->frame; ++ poly_int64 top_of_locals = -1; + + frame.emit_frame_chain = aarch64_needs_frame_chain (); + +@@ -8810,9 +8825,16 @@ aarch64_layout_frame (void) + && !crtl->abi->clobbers_full_reg_p (regno)) + frame.reg_offsetregno = SLOT_REQUIRED; + ++ bool regs_at_top_p = aarch64_save_regs_above_locals_p (); + + poly_int64 offset = crtl->outgoing_args_size; + gcc_assert (multiple_p (offset, STACK_BOUNDARY / BITS_PER_UNIT)); ++ if (regs_at_top_p) ++ { ++ offset += get_frame_size (); ++ offset = aligned_upper_bound (offset, STACK_BOUNDARY / BITS_PER_UNIT); ++ top_of_locals = offset; ++ } + frame.bytes_below_saved_regs = offset; + frame.sve_save_and_probe = INVALID_REGNUM; + +@@ -8952,15 +8974,18 @@ aarch64_layout_frame (void) + at expand_prologue. */ + gcc_assert (crtl->is_leaf || maybe_ne (saved_regs_size, 0)); + +- offset += get_frame_size (); +- offset = aligned_upper_bound (offset, STACK_BOUNDARY / BITS_PER_UNIT); +- auto top_of_locals = offset; +- ++ if (!regs_at_top_p) ++ { ++ offset += get_frame_size (); ++ offset = aligned_upper_bound (offset, STACK_BOUNDARY / BITS_PER_UNIT); ++ top_of_locals = offset; ++ } + offset += frame.saved_varargs_size; + gcc_assert (multiple_p (offset, STACK_BOUNDARY / BITS_PER_UNIT)); + frame.frame_size = offset; + + frame.bytes_above_hard_fp = frame.frame_size - frame.bytes_below_hard_fp; ++ gcc_assert (known_ge (top_of_locals, 0)); + frame.bytes_above_locals = frame.frame_size - top_of_locals; + + frame.initial_adjust = 0; +@@ -10225,10 +10250,10 @@ aarch64_epilogue_uses (int regno) + | for register varargs | + | | + +-------------------------------+ +- | local variables | <-- frame_pointer_rtx ++ | local variables (1) | <-- frame_pointer_rtx + | | + +-------------------------------+ +- | padding | ++ | padding (1) | + +-------------------------------+ + | callee-saved registers | + +-------------------------------+ +@@ -10240,6 +10265,10 @@ aarch64_epilogue_uses (int regno) + +-------------------------------+ + | SVE predicate registers | + +-------------------------------+ ++ | local variables (2) | ++ +-------------------------------+ ++ | padding (2) | ++ +-------------------------------+ + | dynamic allocation | + +-------------------------------+ + | padding | +@@ -10249,6 +10278,9 @@ aarch64_epilogue_uses (int regno) + +-------------------------------+ + | | <-- stack_pointer_rtx (aligned) + ++ The regions marked (1) and (2) are mutually exclusive. (2) is used ++ when aarch64_save_regs_above_locals_p is true. ++ + Dynamic stack allocations via alloca() decrease stack_pointer_rtx + but leave frame_pointer_rtx and hard_frame_pointer_rtx + unchanged. +@@ -10444,6 +10476,8 @@ aarch64_expand_prologue (void) + gcc_assert (known_eq (bytes_below_sp, final_adjust)); + aarch64_allocate_and_probe_stack_space (tmp1_rtx, tmp0_rtx, final_adjust, + !frame_pointer_needed, true); ++ if (emit_frame_chain && maybe_ne (final_adjust, 0)) ++ aarch64_emit_stack_tie (hard_frame_pointer_rtx); + } + + /* Return TRUE if we can use a simple_return insn. +diff --git a/gcc/testsuite/gcc.target/aarch64/stack-protector-8.c b/gcc/testsuite/gcc.target/aarch64/stack-protector-8.c +new file mode 100644 +index 000000000..e71d820e3 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/stack-protector-8.c +@@ -0,0 +1,95 @@ ++/* { dg-options " -O -fstack-protector-strong -mstack-protector-guard=sysreg -mstack-protector-guard-reg=tpidr2_el0 -mstack-protector-guard-offset=16" } */ ++/* { dg-final { check-function-bodies "**" "" } } */ ++ ++void g(void *); ++__SVBool_t *h(void *); ++ ++/* ++** test1: ++** sub sp, sp, #288 ++** stp x29, x30, \sp, #?272\ ++** add x29, sp, #?272 ++** mrs (x0-9+), tpidr2_el0 ++** ldr (x0-9+), \\1, #?16\ ++** str \2, \sp, #?264\ ++** mov \2, #?0 ++** add x0, sp, #?8 ++** bl g ++** ... ++** mrs .* ++** ... ++** bne .* ++** ... ++** ldp x29, x30, \sp, #?272\ ++** add sp, sp, #?288 ++** ret ++** bl __stack_chk_fail ++*/ ++int test1() { ++ int y0x40; ++ g(y); ++ return 1; ++} ++ ++/* ++** test2: ++** stp x29, x30, \sp, #?-16\! ++** mov x29, sp ++** sub sp, sp, #1040 ++** mrs (x0-9+), tpidr2_el0 ++** ldr (x0-9+), \\1, #?16\ ++** str \2, \sp, #?1032\ ++** mov \2, #?0 ++** add x0, sp, #?8 ++** bl g ++** ... ++** mrs .* ++** ... ++** bne .* ++** ... ++** add sp, sp, #?1040 ++** ldp x29, x30, \sp\, #?16 ++** ret ++** bl __stack_chk_fail ++*/ ++int test2() { ++ int y0x100; ++ g(y); ++ return 1; ++} ++ ++#pragma GCC target "+sve" ++ ++/* ++** test3: ++** stp x29, x30, \sp, #?-16\! ++** mov x29, sp ++** addvl sp, sp, #-18 ++** ... ++** str p4, \sp\ ++** ... ++** sub sp, sp, #272 ++** mrs (x0-9+), tpidr2_el0 ++** ldr (x0-9+), \\1, #?16\ ++** str \2, \sp, #?264\ ++** mov \2, #?0 ++** add x0, sp, #?8 ++** bl h ++** ... ++** mrs .* ++** ... ++** bne .* ++** ... ++** add sp, sp, #?272 ++** ... ++** ldr p4, \sp\ ++** ... ++** addvl sp, sp, #18 ++** ldp x29, x30, \sp\, #?16 ++** ret ++** bl __stack_chk_fail ++*/ ++__SVBool_t test3() { ++ int y0x40; ++ return *h(y); ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/stack-protector-9.c b/gcc/testsuite/gcc.target/aarch64/stack-protector-9.c +new file mode 100644 +index 000000000..58f322aa4 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/stack-protector-9.c +@@ -0,0 +1,33 @@ ++/* { dg-options "-O2 -mcpu=neoverse-v1 -fstack-protector-all" } */ ++/* { dg-final { check-function-bodies "**" "" } } */ ++ ++/* ++** main: ++** ... ++** stp x29, x30, \sp, #?-0-9+\! ++** ... ++** sub sp, sp, #0-9+ ++** ... ++** str x0-9+, \x29, #?-8\ ++** ... ++*/ ++int f(const char *); ++void g(void *); ++int main(int argc, char* argv) ++{ ++ int a; ++ int b; ++ char c2+f(argv1); ++ int d0x100; ++ char y; ++ ++ y=42; a=4; b=10; ++ c0 = 'h'; c1 = '\0'; ++ ++ cf(argv2) = '\0'; ++ ++ __builtin_printf("%d %d\n%s\n", a, b, c); ++ g(d); ++ ++ return 0; ++} +-- +2.33.0 +
View file
_service:tar_scm:0202-Backport-SME-Handle-epilogues-that-contain-jumps.patch
Added
@@ -0,0 +1,201 @@ +From 31433584b018cb2dc81e2366351a57bf5e1c4e44 Mon Sep 17 00:00:00 2001 +From: Richard Sandiford <richard.sandiford@arm.com> +Date: Tue, 17 Oct 2023 23:45:43 +0100 +Subject: PATCH 103/157 BackportSME Handle epilogues that contain jumps + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=aeb3f0436f8ae84e593eda9641fe4e6fdf0afb3e + +The prologue/epilogue pass allows the prologue sequence to contain +jumps. The sequence is then partitioned into basic blocks using +find_many_sub_basic_blocks. + +This patch treats epilogues in a similar way. Since only one block +might need to be split, the patch (re)introduces a find_sub_basic_blocks +routine to handle a single block. + +The new routine hard-codes the assumption that split_block will chain +the new block immediately after the original block. The routine doesn't +try to replicate the fix for PR81030, since that was specific to +gimple->rtl expansion. + +The patch is needed for follow-on aarch64 patches that add conditional +code to the epilogue. The tests are part of those patches. + +gcc/ + * cfgbuild.h (find_sub_basic_blocks): Declare. + * cfgbuild.cc (update_profile_for_new_sub_basic_block): New function, + split out from... + (find_many_sub_basic_blocks): ...here. + (find_sub_basic_blocks): New function. + * function.cc (thread_prologue_and_epilogue_insns): Handle + epilogues that contain jumps. +--- + gcc/cfgbuild.cc | 95 +++++++++++++++++++++++++++++++++---------------- + gcc/cfgbuild.h | 1 + + gcc/function.cc | 4 +++ + 3 files changed, 70 insertions(+), 30 deletions(-) + +diff --git a/gcc/cfgbuild.cc b/gcc/cfgbuild.cc +index 646a06614..58b865f29 100644 +--- a/gcc/cfgbuild.cc ++++ b/gcc/cfgbuild.cc +@@ -693,6 +693,43 @@ compute_outgoing_frequencies (basic_block b) + } + } + ++/* Update the profile information for BB, which was created by splitting ++ an RTL block that had a non-final jump. */ ++ ++static void ++update_profile_for_new_sub_basic_block (basic_block bb) ++{ ++ edge e; ++ edge_iterator ei; ++ ++ bool initialized_src = false, uninitialized_src = false; ++ bb->count = profile_count::zero (); ++ FOR_EACH_EDGE (e, ei, bb->preds) ++ { ++ if (e->count ().initialized_p ()) ++ { ++ bb->count += e->count (); ++ initialized_src = true; ++ } ++ else ++ uninitialized_src = true; ++ } ++ /* When some edges are missing with read profile, this is ++ most likely because RTL expansion introduced loop. ++ When profile is guessed we may have BB that is reachable ++ from unlikely path as well as from normal path. ++ ++ TODO: We should handle loops created during BB expansion ++ correctly here. For now we assume all those loop to cycle ++ precisely once. */ ++ if (!initialized_src ++ || (uninitialized_src ++ && profile_status_for_fn (cfun) < PROFILE_GUESSED)) ++ bb->count = profile_count::uninitialized (); ++ ++ compute_outgoing_frequencies (bb); ++} ++ + /* Assume that some pass has inserted labels or control flow + instructions within a basic block. Split basic blocks as needed + and create edges. */ +@@ -744,40 +781,15 @@ find_many_sub_basic_blocks (sbitmap blocks) + if (profile_status_for_fn (cfun) != PROFILE_ABSENT) + FOR_BB_BETWEEN (bb, min, max->next_bb, next_bb) + { +- edge e; +- edge_iterator ei; +- + if (STATE (bb) == BLOCK_ORIGINAL) + continue; + if (STATE (bb) == BLOCK_NEW) + { +- bool initialized_src = false, uninitialized_src = false; +- bb->count = profile_count::zero (); +- FOR_EACH_EDGE (e, ei, bb->preds) +- { +- if (e->count ().initialized_p ()) +- { +- bb->count += e->count (); +- initialized_src = true; +- } +- else +- uninitialized_src = true; +- } +- /* When some edges are missing with read profile, this is +- most likely because RTL expansion introduced loop. +- When profile is guessed we may have BB that is reachable +- from unlikely path as well as from normal path. +- +- TODO: We should handle loops created during BB expansion +- correctly here. For now we assume all those loop to cycle +- precisely once. */ +- if (!initialized_src +- || (uninitialized_src +- && profile_status_for_fn (cfun) < PROFILE_GUESSED)) +- bb->count = profile_count::uninitialized (); ++ update_profile_for_new_sub_basic_block (bb); ++ continue; + } +- /* If nothing changed, there is no need to create new BBs. */ +- else if (EDGE_COUNT (bb->succs) == n_succsbb->index) ++ /* If nothing changed, there is no need to create new BBs. */ ++ if (EDGE_COUNT (bb->succs) == n_succsbb->index) + { + /* In rare occassions RTL expansion might have mistakely assigned + a probabilities different from what is in CFG. This happens +@@ -788,10 +800,33 @@ find_many_sub_basic_blocks (sbitmap blocks) + update_br_prob_note (bb); + continue; + } +- + compute_outgoing_frequencies (bb); + } + + FOR_EACH_BB_FN (bb, cfun) + SET_STATE (bb, 0); + } ++ ++/* Like find_many_sub_basic_blocks, but look only within BB. */ ++ ++void ++find_sub_basic_blocks (basic_block bb) ++{ ++ basic_block end_bb = bb->next_bb; ++ find_bb_boundaries (bb); ++ if (bb->next_bb == end_bb) ++ return; ++ ++ /* Re-scan and wire in all edges. This expects simple (conditional) ++ jumps at the end of each new basic blocks. */ ++ make_edges (bb, end_bb->prev_bb, 1); ++ ++ /* Update branch probabilities. Expect only (un)conditional jumps ++ to be created with only the forward edges. */ ++ if (profile_status_for_fn (cfun) != PROFILE_ABSENT) ++ { ++ compute_outgoing_frequencies (bb); ++ for (bb = bb->next_bb; bb != end_bb; bb = bb->next_bb) ++ update_profile_for_new_sub_basic_block (bb); ++ } ++} +diff --git a/gcc/cfgbuild.h b/gcc/cfgbuild.h +index 85145da7f..53543bb75 100644 +--- a/gcc/cfgbuild.h ++++ b/gcc/cfgbuild.h +@@ -24,5 +24,6 @@ extern bool inside_basic_block_p (const rtx_insn *); + extern bool control_flow_insn_p (const rtx_insn *); + extern void rtl_make_eh_edge (sbitmap, basic_block, rtx); + extern void find_many_sub_basic_blocks (sbitmap); ++extern void find_sub_basic_blocks (basic_block); + + #endif /* GCC_CFGBUILD_H */ +diff --git a/gcc/function.cc b/gcc/function.cc +index ddab43ca4..f4fc211a0 100644 +--- a/gcc/function.cc ++++ b/gcc/function.cc +@@ -6126,6 +6126,8 @@ thread_prologue_and_epilogue_insns (void) + && returnjump_p (BB_END (e->src))) + e->flags &= ~EDGE_FALLTHRU; + } ++ ++ find_sub_basic_blocks (BLOCK_FOR_INSN (epilogue_seq)); + } + else if (next_active_insn (BB_END (exit_fallthru_edge->src))) + { +@@ -6234,6 +6236,8 @@ thread_prologue_and_epilogue_insns (void) + set_insn_locations (seq, epilogue_location); + + emit_insn_before (seq, insn); ++ ++ find_sub_basic_blocks (BLOCK_FOR_INSN (insn)); + } + } + +-- +2.33.0 +
View file
_service:tar_scm:0203-Backport-SME-aarch64-Use-vecs-to-store-register-save.patch
Added
@@ -0,0 +1,709 @@ +From 554c83414c10909c39e0ad30026ffa4821dd9698 Mon Sep 17 00:00:00 2001 +From: Richard Sandiford <richard.sandiford@arm.com> +Date: Tue, 17 Oct 2023 23:46:33 +0100 +Subject: PATCH 104/157 BackportSME aarch64: Use vecs to store register + save order + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=575858508090b18dcbc176db285c9f55227ca4c0 + +aarch64_save/restore_callee_saves looped over registers in register +number order. This in turn meant that we could only use LDP and STP +for registers that were consecutive both number-wise and +offset-wise (after unsaved registers are excluded). + +This patch instead builds lists of the registers that we've decided to +save, in offset order. We can then form LDP/STP pairs regardless of +register number order, which in turn means that we can put the LR save +slot first without losing LDP/STP opportunities. + +gcc/ + * config/aarch64/aarch64.h (aarch64_frame): Add vectors that + store the list saved GPRs, FPRs and predicate registers. + * config/aarch64/aarch64.cc (aarch64_layout_frame): Initialize + the lists of saved registers. Use them to choose push candidates. + Invalidate pop candidates if we're not going to do a pop. + (aarch64_next_callee_save): Delete. + (aarch64_save_callee_saves): Take a list of registers, + rather than a range. Make !skip_wb select only write-back + candidates. + (aarch64_expand_prologue): Update calls accordingly. + (aarch64_restore_callee_saves): Take a list of registers, + rather than a range. Always skip pop candidates. Also skip + LR if shadow call stacks are enabled. + (aarch64_expand_epilogue): Update calls accordingly. + +gcc/testsuite/ + * gcc.target/aarch64/sve/pcs/stack_clash_2.c: Expect restores + to happen in offset order. + * gcc.target/aarch64/sve/pcs/stack_clash_2_128.c: Likewise. + * gcc.target/aarch64/sve/pcs/stack_clash_2_256.c: Likewise. + * gcc.target/aarch64/sve/pcs/stack_clash_2_512.c: Likewise. + * gcc.target/aarch64/sve/pcs/stack_clash_2_1024.c: Likewise. + * gcc.target/aarch64/sve/pcs/stack_clash_2_2048.c: Likewise. +--- + gcc/config/aarch64/aarch64.cc | 203 +++++++++--------- + gcc/config/aarch64/aarch64.h | 9 +- + .../aarch64/sve/pcs/stack_clash_2.c | 6 +- + .../aarch64/sve/pcs/stack_clash_2_1024.c | 6 +- + .../aarch64/sve/pcs/stack_clash_2_128.c | 6 +- + .../aarch64/sve/pcs/stack_clash_2_2048.c | 6 +- + .../aarch64/sve/pcs/stack_clash_2_256.c | 6 +- + .../aarch64/sve/pcs/stack_clash_2_512.c | 6 +- + 8 files changed, 128 insertions(+), 120 deletions(-) + +diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc +index 8d4dd2891..e10c9d763 100644 +--- a/gcc/config/aarch64/aarch64.cc ++++ b/gcc/config/aarch64/aarch64.cc +@@ -8753,13 +8753,17 @@ aarch64_save_regs_above_locals_p () + static void + aarch64_layout_frame (void) + { +- int regno, last_fp_reg = INVALID_REGNUM; ++ unsigned regno, last_fp_reg = INVALID_REGNUM; + machine_mode vector_save_mode = aarch64_reg_save_mode (V8_REGNUM); + poly_int64 vector_save_size = GET_MODE_SIZE (vector_save_mode); + bool frame_related_fp_reg_p = false; + aarch64_frame &frame = cfun->machine->frame; + poly_int64 top_of_locals = -1; + ++ vec_safe_truncate (frame.saved_gprs, 0); ++ vec_safe_truncate (frame.saved_fprs, 0); ++ vec_safe_truncate (frame.saved_prs, 0); ++ + frame.emit_frame_chain = aarch64_needs_frame_chain (); + + /* Adjust the outgoing arguments size if required. Keep it in sync with what +@@ -8844,6 +8848,7 @@ aarch64_layout_frame (void) + for (regno = P0_REGNUM; regno <= P15_REGNUM; regno++) + if (known_eq (frame.reg_offsetregno, SLOT_REQUIRED)) + { ++ vec_safe_push (frame.saved_prs, regno); + if (frame.sve_save_and_probe == INVALID_REGNUM) + frame.sve_save_and_probe = regno; + frame.reg_offsetregno = offset; +@@ -8865,7 +8870,7 @@ aarch64_layout_frame (void) + If we don't have any vector registers to save, and we know how + big the predicate save area is, we can just round it up to the + next 16-byte boundary. */ +- if (last_fp_reg == (int) INVALID_REGNUM && offset.is_constant ()) ++ if (last_fp_reg == INVALID_REGNUM && offset.is_constant ()) + offset = aligned_upper_bound (offset, STACK_BOUNDARY / BITS_PER_UNIT); + else + { +@@ -8879,10 +8884,11 @@ aarch64_layout_frame (void) + } + + /* If we need to save any SVE vector registers, add them next. */ +- if (last_fp_reg != (int) INVALID_REGNUM && crtl->abi->id () == ARM_PCS_SVE) ++ if (last_fp_reg != INVALID_REGNUM && crtl->abi->id () == ARM_PCS_SVE) + for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++) + if (known_eq (frame.reg_offsetregno, SLOT_REQUIRED)) + { ++ vec_safe_push (frame.saved_fprs, regno); + if (frame.sve_save_and_probe == INVALID_REGNUM) + frame.sve_save_and_probe = regno; + frame.reg_offsetregno = offset; +@@ -8903,13 +8909,8 @@ aarch64_layout_frame (void) + + auto allocate_gpr_slot = &(unsigned int regno) + { +- if (frame.hard_fp_save_and_probe == INVALID_REGNUM) +- frame.hard_fp_save_and_probe = regno; ++ vec_safe_push (frame.saved_gprs, regno); + frame.reg_offsetregno = offset; +- if (frame.wb_push_candidate1 == INVALID_REGNUM) +- frame.wb_push_candidate1 = regno; +- else if (frame.wb_push_candidate2 == INVALID_REGNUM) +- frame.wb_push_candidate2 = regno; + offset += UNITS_PER_WORD; + }; + +@@ -8938,8 +8939,7 @@ aarch64_layout_frame (void) + for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++) + if (known_eq (frame.reg_offsetregno, SLOT_REQUIRED)) + { +- if (frame.hard_fp_save_and_probe == INVALID_REGNUM) +- frame.hard_fp_save_and_probe = regno; ++ vec_safe_push (frame.saved_fprs, regno); + /* If there is an alignment gap between integer and fp callee-saves, + allocate the last fp register to it if possible. */ + if (regno == last_fp_reg +@@ -8952,21 +8952,25 @@ aarch64_layout_frame (void) + } + + frame.reg_offsetregno = offset; +- if (frame.wb_push_candidate1 == INVALID_REGNUM) +- frame.wb_push_candidate1 = regno; +- else if (frame.wb_push_candidate2 == INVALID_REGNUM +- && frame.wb_push_candidate1 >= V0_REGNUM) +- frame.wb_push_candidate2 = regno; + offset += vector_save_size; + } + + offset = aligned_upper_bound (offset, STACK_BOUNDARY / BITS_PER_UNIT); +- + auto saved_regs_size = offset - frame.bytes_below_saved_regs; +- gcc_assert (known_eq (saved_regs_size, below_hard_fp_saved_regs_size) +- || (frame.hard_fp_save_and_probe != INVALID_REGNUM +- && known_eq (frame.reg_offsetframe.hard_fp_save_and_probe, +- frame.bytes_below_hard_fp))); ++ ++ array_slice<unsigned int> push_regs = (!vec_safe_is_empty (frame.saved_gprs) ++ ? frame.saved_gprs ++ : frame.saved_fprs); ++ if (!push_regs.empty () ++ && known_eq (frame.reg_offsetpush_regs0, frame.bytes_below_hard_fp)) ++ { ++ frame.hard_fp_save_and_probe = push_regs0; ++ frame.wb_push_candidate1 = push_regs0; ++ if (push_regs.size () > 1) ++ frame.wb_push_candidate2 = push_regs1; ++ } ++ else ++ gcc_assert (known_eq (saved_regs_size, below_hard_fp_saved_regs_size)); + + /* With stack-clash, a register must be saved in non-leaf functions. + The saving of the bottommost register counts as an implicit probe, +@@ -9130,12 +9134,14 @@ aarch64_layout_frame (void) + + frame.sve_callee_adjust + + frame.final_adjust, frame.frame_size)); + +- if (!frame.emit_frame_chain && frame.callee_adjust == 0) ++ if (frame.callee_adjust == 0) + { +- /* We've decided not to associate any register saves with the initial +- stack allocation. */ +- frame.wb_pop_candidate1 = frame.wb_push_candidate1 = INVALID_REGNUM; +- frame.wb_pop_candidate2 = frame.wb_push_candidate2 = INVALID_REGNUM; ++ /* We've decided not to do a "real" push and pop. However, ++ setting up the frame chain is treated as being essentially ++ a multi-instruction push. */ ++ frame.wb_pop_candidate1 = frame.wb_pop_candidate2 = INVALID_REGNUM; ++ if (!frame.emit_frame_chain) ++ frame.wb_push_candidate1 = frame.wb_push_candidate2 = INVALID_REGNUM; + } + + frame.laid_out = true; +@@ -9150,17 +9156,6 @@ aarch64_register_saved_on_entry (int regno) + return known_ge (cfun->machine->frame.reg_offsetregno, 0); + } + +-/* Return the next register up from REGNO up to LIMIT for the callee +- to save. */ +- +-static unsigned +-aarch64_next_callee_save (unsigned regno, unsigned limit) +-{ +- while (regno <= limit && !aarch64_register_saved_on_entry (regno)) +- regno ++; +- return regno; +-} +- + /* Push the register number REGNO of mode MODE to the stack with write-back + adjusting the stack by ADJUSTMENT. */ + +@@ -9424,41 +9419,46 @@ aarch64_add_cfa_expression (rtx_insn *insn, rtx reg, + add_reg_note (insn, REG_CFA_EXPRESSION, gen_rtx_SET (mem, reg)); + } + +-/* Emit code to save the callee-saved registers from register number START +- to LIMIT to the stack. The stack pointer is currently BYTES_BELOW_SP +- bytes above the bottom of the static frame. Skip any write-back +- candidates if SKIP_WB is true. HARD_FP_VALID_P is true if the hard +- frame pointer has been set up. */ ++/* Emit code to save the callee-saved registers in REGS. Skip any ++ write-back candidates if SKIP_WB is true, otherwise consider only ++ write-back candidates. ++ ++ The stack pointer is currently BYTES_BELOW_SP bytes above the bottom ++ of the static frame. HARD_FP_VALID_P is true if the hard frame pointer ++ has been set up. */ + + static void + aarch64_save_callee_saves (poly_int64 bytes_below_sp, +- unsigned start, unsigned limit, bool skip_wb, ++ array_slice<unsigned int> regs, bool skip_wb, + bool hard_fp_valid_p) + { + aarch64_frame &frame = cfun->machine->frame; + rtx_insn *insn; +- unsigned regno; +- unsigned regno2; + rtx anchor_reg = NULL_RTX, ptrue = NULL_RTX; + +- for (regno = aarch64_next_callee_save (start, limit); +- regno <= limit; +- regno = aarch64_next_callee_save (regno + 1, limit)) ++ auto skip_save_p = &(unsigned int regno) ++ { ++ if (cfun->machine->reg_is_wrapped_separatelyregno) ++ return true; ++ ++ if (skip_wb == (regno == frame.wb_push_candidate1 ++ || regno == frame.wb_push_candidate2)) ++ return true; ++ ++ return false; ++ }; ++ ++ for (unsigned int i = 0; i < regs.size (); ++i) + { +- rtx reg, mem; ++ unsigned int regno = regsi; + poly_int64 offset; + bool frame_related_p = aarch64_emit_cfi_for_reg_p (regno); + +- if (skip_wb +- && (regno == frame.wb_push_candidate1 +- || regno == frame.wb_push_candidate2)) +- continue; +- +- if (cfun->machine->reg_is_wrapped_separatelyregno) ++ if (skip_save_p (regno)) + continue; + + machine_mode mode = aarch64_reg_save_mode (regno); +- reg = gen_rtx_REG (mode, regno); ++ rtx reg = gen_rtx_REG (mode, regno); + offset = frame.reg_offsetregno - bytes_below_sp; + rtx base_rtx = stack_pointer_rtx; + poly_int64 sp_offset = offset; +@@ -9485,12 +9485,13 @@ aarch64_save_callee_saves (poly_int64 bytes_below_sp, + } + offset -= fp_offset; + } +- mem = gen_frame_mem (mode, plus_constant (Pmode, base_rtx, offset)); ++ rtx mem = gen_frame_mem (mode, plus_constant (Pmode, base_rtx, offset)); + bool need_cfa_note_p = (base_rtx != stack_pointer_rtx); + ++ unsigned int regno2; + if (!aarch64_sve_mode_p (mode) +- && (regno2 = aarch64_next_callee_save (regno + 1, limit)) <= limit +- && !cfun->machine->reg_is_wrapped_separatelyregno2 ++ && i + 1 < regs.size () ++ && (regno2 = regsi + 1, !skip_save_p (regno2)) + && known_eq (GET_MODE_SIZE (mode), + frame.reg_offsetregno2 - frame.reg_offsetregno)) + { +@@ -9516,6 +9517,7 @@ aarch64_save_callee_saves (poly_int64 bytes_below_sp, + } + + regno = regno2; ++ ++i; + } + else if (mode == VNx2DImode && BYTES_BIG_ENDIAN) + { +@@ -9533,49 +9535,57 @@ aarch64_save_callee_saves (poly_int64 bytes_below_sp, + } + } + +-/* Emit code to restore the callee registers from register number START +- up to and including LIMIT. The stack pointer is currently BYTES_BELOW_SP +- bytes above the bottom of the static frame. Skip any write-back +- candidates if SKIP_WB is true. Write the appropriate REG_CFA_RESTORE +- notes into CFI_OPS. */ ++/* Emit code to restore the callee registers in REGS, ignoring pop candidates ++ and any other registers that are handled separately. Write the appropriate ++ REG_CFA_RESTORE notes into CFI_OPS. ++ ++ The stack pointer is currently BYTES_BELOW_SP bytes above the bottom ++ of the static frame. */ + + static void +-aarch64_restore_callee_saves (poly_int64 bytes_below_sp, unsigned start, +- unsigned limit, bool skip_wb, rtx *cfi_ops) ++aarch64_restore_callee_saves (poly_int64 bytes_below_sp, ++ array_slice<unsigned int> regs, rtx *cfi_ops) + { + aarch64_frame &frame = cfun->machine->frame; +- unsigned regno; +- unsigned regno2; + poly_int64 offset; + rtx anchor_reg = NULL_RTX, ptrue = NULL_RTX; + +- for (regno = aarch64_next_callee_save (start, limit); +- regno <= limit; +- regno = aarch64_next_callee_save (regno + 1, limit)) ++ auto skip_restore_p = &(unsigned int regno) + { +- bool frame_related_p = aarch64_emit_cfi_for_reg_p (regno); + if (cfun->machine->reg_is_wrapped_separatelyregno) +- continue; ++ return true; ++ ++ if (regno == frame.wb_pop_candidate1 ++ || regno == frame.wb_pop_candidate2) ++ return true; + +- rtx reg, mem; ++ /* The shadow call stack code restores LR separately. */ ++ if (frame.is_scs_enabled && regno == LR_REGNUM) ++ return true; + +- if (skip_wb +- && (regno == frame.wb_pop_candidate1 +- || regno == frame.wb_pop_candidate2)) ++ return false; ++ }; ++ ++ for (unsigned int i = 0; i < regs.size (); ++i) ++ { ++ unsigned int regno = regsi; ++ bool frame_related_p = aarch64_emit_cfi_for_reg_p (regno); ++ if (skip_restore_p (regno)) + continue; + + machine_mode mode = aarch64_reg_save_mode (regno); +- reg = gen_rtx_REG (mode, regno); ++ rtx reg = gen_rtx_REG (mode, regno); + offset = frame.reg_offsetregno - bytes_below_sp; + rtx base_rtx = stack_pointer_rtx; + if (mode == VNx2DImode && BYTES_BIG_ENDIAN) + aarch64_adjust_sve_callee_save_base (mode, base_rtx, anchor_reg, + offset, ptrue); +- mem = gen_frame_mem (mode, plus_constant (Pmode, base_rtx, offset)); ++ rtx mem = gen_frame_mem (mode, plus_constant (Pmode, base_rtx, offset)); + ++ unsigned int regno2; + if (!aarch64_sve_mode_p (mode) +- && (regno2 = aarch64_next_callee_save (regno + 1, limit)) <= limit +- && !cfun->machine->reg_is_wrapped_separatelyregno2 ++ && i + 1 < regs.size () ++ && (regno2 = regsi + 1, !skip_restore_p (regno2)) + && known_eq (GET_MODE_SIZE (mode), + frame.reg_offsetregno2 - frame.reg_offsetregno)) + { +@@ -9588,6 +9598,7 @@ aarch64_restore_callee_saves (poly_int64 bytes_below_sp, unsigned start, + + *cfi_ops = alloc_reg_note (REG_CFA_RESTORE, reg2, *cfi_ops); + regno = regno2; ++ ++i; + } + else if (mode == VNx2DImode && BYTES_BIG_ENDIAN) + emit_insn (gen_aarch64_pred_mov (mode, reg, ptrue, mem)); +@@ -10409,13 +10420,10 @@ aarch64_expand_prologue (void) + - frame.bytes_above_hard_fp); + gcc_assert (known_ge (chain_offset, 0)); + ++ gcc_assert (reg1 == R29_REGNUM && reg2 == R30_REGNUM); + if (callee_adjust == 0) +- { +- reg1 = R29_REGNUM; +- reg2 = R30_REGNUM; +- aarch64_save_callee_saves (bytes_below_sp, reg1, reg2, +- false, false); +- } ++ aarch64_save_callee_saves (bytes_below_sp, frame.saved_gprs, ++ false, false); + else + gcc_assert (known_eq (chain_offset, 0)); + aarch64_add_offset (Pmode, hard_frame_pointer_rtx, +@@ -10453,8 +10461,7 @@ aarch64_expand_prologue (void) + aarch64_emit_stack_tie (hard_frame_pointer_rtx); + } + +- aarch64_save_callee_saves (bytes_below_sp, R0_REGNUM, R30_REGNUM, +- callee_adjust != 0 || emit_frame_chain, ++ aarch64_save_callee_saves (bytes_below_sp, frame.saved_gprs, true, + emit_frame_chain); + if (maybe_ne (sve_callee_adjust, 0)) + { +@@ -10465,10 +10472,9 @@ aarch64_expand_prologue (void) + !frame_pointer_needed, false); + bytes_below_sp -= sve_callee_adjust; + } +- aarch64_save_callee_saves (bytes_below_sp, P0_REGNUM, P15_REGNUM, +- false, emit_frame_chain); +- aarch64_save_callee_saves (bytes_below_sp, V0_REGNUM, V31_REGNUM, +- callee_adjust != 0 || emit_frame_chain, ++ aarch64_save_callee_saves (bytes_below_sp, frame.saved_prs, true, ++ emit_frame_chain); ++ aarch64_save_callee_saves (bytes_below_sp, frame.saved_fprs, true, + emit_frame_chain); + + /* We may need to probe the final adjustment if it is larger than the guard +@@ -10514,8 +10520,6 @@ aarch64_expand_epilogue (rtx_call_insn *sibcall) + poly_int64 bytes_below_hard_fp = frame.bytes_below_hard_fp; + unsigned reg1 = frame.wb_pop_candidate1; + unsigned reg2 = frame.wb_pop_candidate2; +- unsigned int last_gpr = (frame.is_scs_enabled +- ? R29_REGNUM : R30_REGNUM); + rtx cfi_ops = NULL; + rtx_insn *insn; + /* A stack clash protection prologue may not have left EP0_REGNUM or +@@ -10579,10 +10583,8 @@ aarch64_expand_epilogue (rtx_call_insn *sibcall) + + /* Restore the vector registers before the predicate registers, + so that we can use P4 as a temporary for big-endian SVE frames. */ +- aarch64_restore_callee_saves (final_adjust, V0_REGNUM, V31_REGNUM, +- callee_adjust != 0, &cfi_ops); +- aarch64_restore_callee_saves (final_adjust, P0_REGNUM, P15_REGNUM, +- false, &cfi_ops); ++ aarch64_restore_callee_saves (final_adjust, frame.saved_fprs, &cfi_ops); ++ aarch64_restore_callee_saves (final_adjust, frame.saved_prs, &cfi_ops); + if (maybe_ne (sve_callee_adjust, 0)) + aarch64_add_sp (NULL_RTX, NULL_RTX, sve_callee_adjust, true); + +@@ -10590,8 +10592,7 @@ aarch64_expand_epilogue (rtx_call_insn *sibcall) + restore x30, we don't need to restore x30 again in the traditional + way. */ + aarch64_restore_callee_saves (final_adjust + sve_callee_adjust, +- R0_REGNUM, last_gpr, +- callee_adjust != 0, &cfi_ops); ++ frame.saved_gprs, &cfi_ops); + + if (need_barrier_p) + aarch64_emit_stack_tie (stack_pointer_rtx); +diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h +index 292ef2eec..1591cde8b 100644 +--- a/gcc/config/aarch64/aarch64.h ++++ b/gcc/config/aarch64/aarch64.h +@@ -787,7 +787,7 @@ extern enum aarch64_processor aarch64_tune; + + #define DEFAULT_PCC_STRUCT_RETURN 0 + +-#ifdef HAVE_POLY_INT_H ++#if defined(HAVE_POLY_INT_H) && defined(GCC_VEC_H) + struct GTY (()) aarch64_frame + { + /* The offset from the bottom of the static frame (the bottom of the +@@ -795,6 +795,13 @@ struct GTY (()) aarch64_frame + needed. */ + poly_int64 reg_offsetLAST_SAVED_REGNUM + 1; + ++ /* The list of GPRs, FPRs and predicate registers that have nonnegative ++ entries in reg_offset. The registers are listed in order of ++ increasing offset (rather than increasing register number). */ ++ vec<unsigned, va_gc_atomic> *saved_gprs; ++ vec<unsigned, va_gc_atomic> *saved_fprs; ++ vec<unsigned, va_gc_atomic> *saved_prs; ++ + /* The number of extra stack bytes taken up by register varargs. + This area is allocated by the callee at the very top of the + frame. This value is rounded up to a multiple of +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/stack_clash_2.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/stack_clash_2.c +index 4622a1eed..bbb45d266 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/pcs/stack_clash_2.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/stack_clash_2.c +@@ -215,9 +215,9 @@ test_7 (void) + ** add sp, sp, #?16 + ** ldr p4, \sp\ + ** addvl sp, sp, #1 ++** ldp x29, x30, \sp\ + ** ldp x24, x25, \sp, 16\ + ** ldr x26, \sp, 32\ +-** ldp x29, x30, \sp\ + ** mov x12, #?4144 + ** add sp, sp, x12 + ** ret +@@ -283,9 +283,9 @@ test_9 (int n) + ** addvl sp, x29, #-1 + ** ldr p4, \sp\ + ** addvl sp, sp, #1 ++** ldp x29, x30, \sp\ + ** ldp x24, x25, \sp, 16\ + ** ldr x26, \sp, 32\ +-** ldp x29, x30, \sp\ + ** mov x12, #?4144 + ** add sp, sp, x12 + ** ret +@@ -319,9 +319,9 @@ test_10 (int n) + ** addvl sp, x29, #-1 + ** ldr p4, \sp\ + ** addvl sp, sp, #1 ++** ldp x29, x30, \sp\ + ** ldp x24, x25, \sp, 16\ + ** ldr x26, \sp, 32\ +-** ldp x29, x30, \sp\ + ** add sp, sp, #?3008 + ** add sp, sp, #?126976 + ** ret +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/stack_clash_2_1024.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/stack_clash_2_1024.c +index e31200fc2..9437c7a85 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/pcs/stack_clash_2_1024.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/stack_clash_2_1024.c +@@ -176,9 +176,9 @@ test_7 (void) + ** add sp, sp, #?16 + ** ldr z16, \sp\ + ** add sp, sp, #?128 ++** ldp x29, x30, \sp\ + ** ldp x24, x25, \sp, 16\ + ** ldr x26, \sp, 32\ +-** ldp x29, x30, \sp\ + ** mov x12, #?4144 + ** add sp, sp, x12 + ** ret +@@ -234,9 +234,9 @@ test_9 (int n) + ** sub sp, x29, #128 + ** ldr z16, \sp\ + ** add sp, sp, #?128 ++** ldp x29, x30, \sp\ + ** ldp x24, x25, \sp, 16\ + ** ldr x26, \sp, 32\ +-** ldp x29, x30, \sp\ + ** mov x12, #?4144 + ** add sp, sp, x12 + ** ret +@@ -268,9 +268,9 @@ test_10 (int n) + ** sub sp, x29, #128 + ** ldr z16, \sp\ + ** add sp, sp, #?128 ++** ldp x29, x30, \sp\ + ** ldp x24, x25, \sp, 16\ + ** ldr x26, \sp, 32\ +-** ldp x29, x30, \sp\ + ** add sp, sp, #?3008 + ** add sp, sp, #?126976 + ** ret +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/stack_clash_2_128.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/stack_clash_2_128.c +index 41193b411..b4e1627fa 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/pcs/stack_clash_2_128.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/stack_clash_2_128.c +@@ -176,9 +176,9 @@ test_7 (void) + ** add sp, sp, #?16 + ** ldr p4, \sp\ + ** add sp, sp, #?16 ++** ldp x29, x30, \sp\ + ** ldp x24, x25, \sp, 16\ + ** ldr x26, \sp, 32\ +-** ldp x29, x30, \sp\ + ** mov x12, #?4144 + ** add sp, sp, x12 + ** ret +@@ -234,9 +234,9 @@ test_9 (int n) + ** sub sp, x29, #16 + ** ldr p4, \sp\ + ** add sp, sp, #?16 ++** ldp x29, x30, \sp\ + ** ldp x24, x25, \sp, 16\ + ** ldr x26, \sp, 32\ +-** ldp x29, x30, \sp\ + ** mov x12, #?4144 + ** add sp, sp, x12 + ** ret +@@ -267,9 +267,9 @@ test_10 (int n) + ** sub sp, x29, #16 + ** ldr p4, \sp\ + ** add sp, sp, #?16 ++** ldp x29, x30, \sp\ + ** ldp x24, x25, \sp, 16\ + ** ldr x26, \sp, 32\ +-** ldp x29, x30, \sp\ + ** add sp, sp, #?3008 + ** add sp, sp, #?126976 + ** ret +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/stack_clash_2_2048.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/stack_clash_2_2048.c +index f63751678..921209379 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/pcs/stack_clash_2_2048.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/stack_clash_2_2048.c +@@ -176,9 +176,9 @@ test_7 (void) + ** add sp, sp, #?16 + ** ldr z16, \sp\ + ** add sp, sp, #?256 ++** ldp x29, x30, \sp\ + ** ldp x24, x25, \sp, 16\ + ** ldr x26, \sp, 32\ +-** ldp x29, x30, \sp\ + ** mov x12, #?4144 + ** add sp, sp, x12 + ** ret +@@ -234,9 +234,9 @@ test_9 (int n) + ** sub sp, x29, #256 + ** ldr z16, \sp\ + ** add sp, sp, #?256 ++** ldp x29, x30, \sp\ + ** ldp x24, x25, \sp, 16\ + ** ldr x26, \sp, 32\ +-** ldp x29, x30, \sp\ + ** mov x12, #?4144 + ** add sp, sp, x12 + ** ret +@@ -268,9 +268,9 @@ test_10 (int n) + ** sub sp, x29, #256 + ** ldr z16, \sp\ + ** add sp, sp, #?256 ++** ldp x29, x30, \sp\ + ** ldp x24, x25, \sp, 16\ + ** ldr x26, \sp, 32\ +-** ldp x29, x30, \sp\ + ** add sp, sp, #?3008 + ** add sp, sp, #?126976 + ** ret +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/stack_clash_2_256.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/stack_clash_2_256.c +index 6bcbb5772..bd8bef0f0 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/pcs/stack_clash_2_256.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/stack_clash_2_256.c +@@ -176,9 +176,9 @@ test_7 (void) + ** add sp, sp, #?16 + ** ldr z16, \sp\ + ** add sp, sp, #?32 ++** ldp x29, x30, \sp\ + ** ldp x24, x25, \sp, 16\ + ** ldr x26, \sp, 32\ +-** ldp x29, x30, \sp\ + ** mov x12, #?4144 + ** add sp, sp, x12 + ** ret +@@ -234,9 +234,9 @@ test_9 (int n) + ** sub sp, x29, #32 + ** ldr z16, \sp\ + ** add sp, sp, #?32 ++** ldp x29, x30, \sp\ + ** ldp x24, x25, \sp, 16\ + ** ldr x26, \sp, 32\ +-** ldp x29, x30, \sp\ + ** mov x12, #?4144 + ** add sp, sp, x12 + ** ret +@@ -267,9 +267,9 @@ test_10 (int n) + ** sub sp, x29, #32 + ** ldr z16, \sp\ + ** add sp, sp, #?32 ++** ldp x29, x30, \sp\ + ** ldp x24, x25, \sp, 16\ + ** ldr x26, \sp, 32\ +-** ldp x29, x30, \sp\ + ** add sp, sp, #?3008 + ** add sp, sp, #?126976 + ** ret +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/stack_clash_2_512.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/stack_clash_2_512.c +index dc7df8e6b..2c76ccecd 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/pcs/stack_clash_2_512.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/stack_clash_2_512.c +@@ -176,9 +176,9 @@ test_7 (void) + ** add sp, sp, #?16 + ** ldr z16, \sp\ + ** add sp, sp, #?64 ++** ldp x29, x30, \sp\ + ** ldp x24, x25, \sp, 16\ + ** ldr x26, \sp, 32\ +-** ldp x29, x30, \sp\ + ** mov x12, #?4144 + ** add sp, sp, x12 + ** ret +@@ -234,9 +234,9 @@ test_9 (int n) + ** sub sp, x29, #64 + ** ldr z16, \sp\ + ** add sp, sp, #?64 ++** ldp x29, x30, \sp\ + ** ldp x24, x25, \sp, 16\ + ** ldr x26, \sp, 32\ +-** ldp x29, x30, \sp\ + ** mov x12, #?4144 + ** add sp, sp, x12 + ** ret +@@ -268,9 +268,9 @@ test_10 (int n) + ** sub sp, x29, #64 + ** ldr z16, \sp\ + ** add sp, sp, #?64 ++** ldp x29, x30, \sp\ + ** ldp x24, x25, \sp, 16\ + ** ldr x26, \sp, 32\ +-** ldp x29, x30, \sp\ + ** add sp, sp, #?3008 + ** add sp, sp, #?126976 + ** ret +-- +2.33.0 +
View file
_service:tar_scm:0204-Backport-SME-aarch64-Put-LR-save-slot-first-in-more-.patch
Added
@@ -0,0 +1,107 @@ +From ccc3ca614bbaa242fe25ec82b903dfcac03fe2de Mon Sep 17 00:00:00 2001 +From: Richard Sandiford <richard.sandiford@arm.com> +Date: Tue, 17 Oct 2023 23:46:33 +0100 +Subject: PATCH 105/157 BackportSME aarch64: Put LR save slot first in + more cases + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=773306e9ef4ea1407f89686eb513a50602493666 + +Now that the prologue and epilogue code iterates over saved +registers in offset order, we can put the LR save slot first +without compromising LDP/STP formation. + +This isn't worthwhile when shadow call stacks are enabled, since the +first two registers are also push/pop candidates, and LR cannot be +popped when shadow call stacks are enabled. (LR is instead loaded +first and compared against the shadow stack's value.) + +But otherwise, it seems better to put the LR save slot first, +to reduce unnecessary variation with the layout for stack clash +protection. + +gcc/ + * config/aarch64/aarch64.cc (aarch64_layout_frame): Don't make + the position of the LR save slot dependent on stack clash + protection unless shadow call stacks are enabled. + +gcc/testsuite/ + * gcc.target/aarch64/test_frame_2.c: Expect x30 to come before x19. + * gcc.target/aarch64/test_frame_4.c: Likewise. + * gcc.target/aarch64/test_frame_7.c: Likewise. + * gcc.target/aarch64/test_frame_10.c: Likewise. +--- + gcc/config/aarch64/aarch64.cc | 2 +- + gcc/testsuite/gcc.target/aarch64/test_frame_10.c | 4 ++-- + gcc/testsuite/gcc.target/aarch64/test_frame_2.c | 4 ++-- + gcc/testsuite/gcc.target/aarch64/test_frame_4.c | 4 ++-- + gcc/testsuite/gcc.target/aarch64/test_frame_7.c | 4 ++-- + 5 files changed, 9 insertions(+), 9 deletions(-) + +diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc +index e10c9d763..1c127192d 100644 +--- a/gcc/config/aarch64/aarch64.cc ++++ b/gcc/config/aarch64/aarch64.cc +@@ -8920,7 +8920,7 @@ aarch64_layout_frame (void) + allocate_gpr_slot (R29_REGNUM); + allocate_gpr_slot (R30_REGNUM); + } +- else if (flag_stack_clash_protection ++ else if ((flag_stack_clash_protection || !frame.is_scs_enabled) + && known_eq (frame.reg_offsetR30_REGNUM, SLOT_REQUIRED)) + /* Put the LR save slot first, since it makes a good choice of probe + for stack clash purposes. The idea is that the link register usually +diff --git a/gcc/testsuite/gcc.target/aarch64/test_frame_10.c b/gcc/testsuite/gcc.target/aarch64/test_frame_10.c +index c19505082..c54ab2d0c 100644 +--- a/gcc/testsuite/gcc.target/aarch64/test_frame_10.c ++++ b/gcc/testsuite/gcc.target/aarch64/test_frame_10.c +@@ -14,6 +14,6 @@ + t_frame_pattern_outgoing (test10, 480, "x19", 24, a8, a9, a10) + t_frame_run (test10) + +-/* { dg-final { scan-assembler-times "stp\tx19, x30, \\\sp, \0-9\+\\\" 1 } } */ +-/* { dg-final { scan-assembler "ldp\tx19, x30, \\\sp, \0-9\+\\\" } } */ ++/* { dg-final { scan-assembler-times "stp\tx30, x19, \\\sp, \0-9\+\\\" 1 } } */ ++/* { dg-final { scan-assembler "ldp\tx30, x19, \\\sp, \0-9\+\\\" } } */ + +diff --git a/gcc/testsuite/gcc.target/aarch64/test_frame_2.c b/gcc/testsuite/gcc.target/aarch64/test_frame_2.c +index 7e5df84cf..0d715314c 100644 +--- a/gcc/testsuite/gcc.target/aarch64/test_frame_2.c ++++ b/gcc/testsuite/gcc.target/aarch64/test_frame_2.c +@@ -14,6 +14,6 @@ t_frame_pattern (test2, 200, "x19") + t_frame_run (test2) + + +-/* { dg-final { scan-assembler-times "stp\tx19, x30, \\\sp, -\0-9\+\\\!" 1 } } */ +-/* { dg-final { scan-assembler "ldp\tx19, x30, \\\sp\\\, \0-9\+" } } */ ++/* { dg-final { scan-assembler-times "stp\tx30, x19, \\\sp, -\0-9\+\\\!" 1 } } */ ++/* { dg-final { scan-assembler "ldp\tx30, x19, \\\sp\\\, \0-9\+" } } */ + +diff --git a/gcc/testsuite/gcc.target/aarch64/test_frame_4.c b/gcc/testsuite/gcc.target/aarch64/test_frame_4.c +index ed13487a0..b41229c42 100644 +--- a/gcc/testsuite/gcc.target/aarch64/test_frame_4.c ++++ b/gcc/testsuite/gcc.target/aarch64/test_frame_4.c +@@ -13,6 +13,6 @@ + t_frame_pattern (test4, 400, "x19") + t_frame_run (test4) + +-/* { dg-final { scan-assembler-times "stp\tx19, x30, \\\sp, -\0-9\+\\\!" 1 } } */ +-/* { dg-final { scan-assembler "ldp\tx19, x30, \\\sp\\\, \0-9\+" } } */ ++/* { dg-final { scan-assembler-times "stp\tx30, x19, \\\sp, -\0-9\+\\\!" 1 } } */ ++/* { dg-final { scan-assembler "ldp\tx30, x19, \\\sp\\\, \0-9\+" } } */ + +diff --git a/gcc/testsuite/gcc.target/aarch64/test_frame_7.c b/gcc/testsuite/gcc.target/aarch64/test_frame_7.c +index 964527949..5702656a5 100644 +--- a/gcc/testsuite/gcc.target/aarch64/test_frame_7.c ++++ b/gcc/testsuite/gcc.target/aarch64/test_frame_7.c +@@ -13,6 +13,6 @@ + t_frame_pattern (test7, 700, "x19") + t_frame_run (test7) + +-/* { dg-final { scan-assembler-times "stp\tx19, x30, \\\sp" 1 } } */ +-/* { dg-final { scan-assembler "ldp\tx19, x30, \\\sp\\\" } } */ ++/* { dg-final { scan-assembler-times "stp\tx30, x19, \\\sp" 1 } } */ ++/* { dg-final { scan-assembler "ldp\tx30, x19, \\\sp\\\" } } */ + +-- +2.33.0 +
View file
_service:tar_scm:0205-Backport-SME-aarch64-Switch-PSTATE.SM-around-calls.patch
Added
@@ -0,0 +1,3270 @@ +From 88a41bc24eb793eee27aa9f4ef6b763b3c3e76e6 Mon Sep 17 00:00:00 2001 +From: Richard Sandiford <richard.sandiford@arm.com> +Date: Tue, 5 Dec 2023 10:11:25 +0000 +Subject: PATCH 106/157 BackportSME aarch64: Switch PSTATE.SM around + calls + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=dd8090f40079fa41ee58d9f76b2e50ed4f95c6bf + +This patch adds support for switching to the appropriate SME mode +for each call. Switching to streaming mode requires an SMSTART SM +instruction and switching to non-streaming mode requires an SMSTOP SM +instruction. If the call is being made from streaming-compatible code, +these switches are conditional on the current mode being the opposite +of the one that the call needs. + +Since changing PSTATE.SM changes the vector length and effectively +changes the ISA, the code to do the switching has to be emitted late. +The patch does this using a new pass that runs next to late prologue/ +epilogue insertion. (It doesn't use md_reorg because later additions +need the CFG.) + +If a streaming-compatible function needs to switch mode for a call, +it must restore the original mode afterwards. The old mode must +therefore be available immediately after the call. The easiest +way of ensuring this is to force the use of a hard frame pointer +and ensure that the old state is saved at an in-range offset +from there. + +Changing modes clobbers the Z and P registers, so we need to +save and restore live Z and P state around each mode switch. +However, mode switches are not expected to be performance +critical, so it seemed better to err on the side of being +correct rather than trying to optimise the save and restore +with surrounding code. + +gcc/ + * config/aarch64/aarch64-passes.def + (pass_late_thread_prologue_and_epilogue): New pass. + * config/aarch64/aarch64-sme.md: New file. + * config/aarch64/aarch64.md: Include it. + (*tb<optab><mode>1): Rename to... + (@aarch64_tb<optab><mode>): ...this. + (call, call_value, sibcall, sibcall_value): Don't require operand 2 + to be a CONST_INT. + * config/aarch64/aarch64-protos.h (aarch64_emit_call_insn): Return + the insn. + (make_pass_switch_sm_state): Declare. + * config/aarch64/aarch64.h (TARGET_STREAMING_COMPATIBLE): New macro. + (CALL_USED_REGISTER): Mark VG as call-preserved. + (aarch64_frame::old_svcr_offset): New member variable. + (machine_function::call_switches_sm_state): Likewise. + (CUMULATIVE_ARGS::num_sme_mode_switch_args): Likewise. + (CUMULATIVE_ARGS::sme_mode_switch_args): Likewise. + * config/aarch64/aarch64.cc: Include tree-pass.h and cfgbuild.h. + (aarch64_cfun_incoming_pstate_sm): New function. + (aarch64_call_switches_pstate_sm): Likewise. + (aarch64_reg_save_mode): Return DImode for VG_REGNUM. + (aarch64_callee_isa_mode): New function. + (aarch64_insn_callee_isa_mode): Likewise. + (aarch64_guard_switch_pstate_sm): Likewise. + (aarch64_switch_pstate_sm): Likewise. + (aarch64_sme_mode_switch_regs): New class. + (aarch64_record_sme_mode_switch_args): New function. + (aarch64_finish_sme_mode_switch_args): Likewise. + (aarch64_function_arg): Handle the end marker by returning a + PARALLEL that contains the ABI cookie that we used previously + alongside the result of aarch64_finish_sme_mode_switch_args. + (aarch64_init_cumulative_args): Initialize num_sme_mode_switch_args. + (aarch64_function_arg_advance): If a call would switch SM state, + record all argument registers that would need to be saved around + the mode switch. + (aarch64_need_old_pstate_sm): New function. + (aarch64_layout_frame): Decide whether the frame needs to store the + incoming value of PSTATE.SM and allocate a save slot for it if so. + If a function switches SME state, arrange to save the old value + of the DWARF VG register. Handle the case where this is the only + register save slot above the FP. + (aarch64_save_callee_saves): Handles saves of the DWARF VG register. + (aarch64_get_separate_components): Prevent such saves from being + shrink-wrapped. + (aarch64_old_svcr_mem): New function. + (aarch64_read_old_svcr): Likewise. + (aarch64_guard_switch_pstate_sm): Likewise. + (aarch64_expand_prologue): Handle saves of the DWARF VG register. + Initialize any SVCR save slot. + (aarch64_expand_call): Allow the cookie to be PARALLEL that contains + both the UNSPEC_CALLEE_ABI value and a list of registers that need + to be preserved across a change to PSTATE.SM. If the call does + involve such a change to PSTATE.SM, record the registers that + would be clobbered by this process. Also emit an instruction + to mark the temporary change in VG. Update call_switches_pstate_sm. + (aarch64_emit_call_insn): Return the emitted instruction. + (aarch64_frame_pointer_required): New function. + (aarch64_conditional_register_usage): Prevent VG_REGNUM from being + treated as a register operand. + (aarch64_switch_pstate_sm_for_call): New function. + (pass_data_switch_pstate_sm): New pass variable. + (pass_switch_pstate_sm): New pass class. + (make_pass_switch_pstate_sm): New function. + (TARGET_FRAME_POINTER_REQUIRED): Define. + * config/aarch64/t-aarch64 (s-check-sve-md): Add aarch64-sme.md. + +gcc/testsuite/ + * gcc.target/aarch64/sme/call_sm_switch_1.c: New test. + * gcc.target/aarch64/sme/call_sm_switch_2.c: Likewise. + * gcc.target/aarch64/sme/call_sm_switch_3.c: Likewise. + * gcc.target/aarch64/sme/call_sm_switch_4.c: Likewise. + * gcc.target/aarch64/sme/call_sm_switch_5.c: Likewise. + * gcc.target/aarch64/sme/call_sm_switch_6.c: Likewise. + * gcc.target/aarch64/sme/call_sm_switch_7.c: Likewise. + * gcc.target/aarch64/sme/call_sm_switch_8.c: Likewise. + * gcc.target/aarch64/sme/call_sm_switch_9.c: Likewise. + * gcc.target/aarch64/sme/call_sm_switch_10.c: Likewise. +--- + gcc/config/aarch64/aarch64-passes.def | 1 + + gcc/config/aarch64/aarch64-protos.h | 3 +- + gcc/config/aarch64/aarch64-sme.md | 171 ++++ + gcc/config/aarch64/aarch64.cc | 883 +++++++++++++++++- + gcc/config/aarch64/aarch64.h | 25 +- + gcc/config/aarch64/aarch64.md | 13 +- + gcc/config/aarch64/t-aarch64 | 5 +- + .../gcc.target/aarch64/sme/call_sm_switch_1.c | 233 +++++ + .../aarch64/sme/call_sm_switch_10.c | 37 + + .../gcc.target/aarch64/sme/call_sm_switch_2.c | 43 + + .../gcc.target/aarch64/sme/call_sm_switch_3.c | 166 ++++ + .../gcc.target/aarch64/sme/call_sm_switch_4.c | 43 + + .../gcc.target/aarch64/sme/call_sm_switch_5.c | 318 +++++++ + .../gcc.target/aarch64/sme/call_sm_switch_6.c | 45 + + .../gcc.target/aarch64/sme/call_sm_switch_7.c | 516 ++++++++++ + .../gcc.target/aarch64/sme/call_sm_switch_8.c | 87 ++ + .../gcc.target/aarch64/sme/call_sm_switch_9.c | 103 ++ + 17 files changed, 2668 insertions(+), 24 deletions(-) + create mode 100644 gcc/config/aarch64/aarch64-sme.md + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_1.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_10.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_2.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_3.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_4.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_5.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_6.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_7.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_8.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_9.c + +diff --git a/gcc/config/aarch64/aarch64-passes.def b/gcc/config/aarch64/aarch64-passes.def +index a2babc112..c6cbbf2ef 100644 +--- a/gcc/config/aarch64/aarch64-passes.def ++++ b/gcc/config/aarch64/aarch64-passes.def +@@ -20,6 +20,7 @@ + + INSERT_PASS_AFTER (pass_regrename, 1, pass_fma_steering); + INSERT_PASS_BEFORE (pass_reorder_blocks, 1, pass_track_speculation); ++INSERT_PASS_BEFORE (pass_late_thread_prologue_and_epilogue, 1, pass_switch_pstate_sm); + INSERT_PASS_AFTER (pass_machine_reorg, 1, pass_tag_collision_avoidance); + INSERT_PASS_BEFORE (pass_shorten_branches, 1, pass_insert_bti); + INSERT_PASS_AFTER (pass_if_after_combine, 1, pass_cc_fusion); +diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h +index 9b03410dc..737f47026 100644 +--- a/gcc/config/aarch64/aarch64-protos.h ++++ b/gcc/config/aarch64/aarch64-protos.h +@@ -913,7 +913,7 @@ void aarch64_init_cumulative_args (CUMULATIVE_ARGS *, const_tree, rtx, + const_tree, unsigned, bool = false); + void aarch64_init_expanders (void); + void aarch64_init_simd_builtins (void); +-void aarch64_emit_call_insn (rtx); ++rtx_call_insn *aarch64_emit_call_insn (rtx); + void aarch64_register_pragmas (void); + void aarch64_relayout_simd_types (void); + void aarch64_reset_previous_fndecl (void); +@@ -1055,6 +1055,7 @@ rtl_opt_pass *make_pass_track_speculation (gcc::context *); + rtl_opt_pass *make_pass_tag_collision_avoidance (gcc::context *); + rtl_opt_pass *make_pass_insert_bti (gcc::context *ctxt); + rtl_opt_pass *make_pass_cc_fusion (gcc::context *ctxt); ++rtl_opt_pass *make_pass_switch_pstate_sm (gcc::context *ctxt); + + poly_uint64 aarch64_regmode_natural_size (machine_mode); + +diff --git a/gcc/config/aarch64/aarch64-sme.md b/gcc/config/aarch64/aarch64-sme.md +new file mode 100644 +index 000000000..52427b4f1 +--- /dev/null ++++ b/gcc/config/aarch64/aarch64-sme.md +@@ -0,0 +1,171 @@ ++;; Machine description for AArch64 SME. ++;; Copyright (C) 2023 Free Software Foundation, Inc. ++;; ++;; This file is part of GCC. ++;; ++;; GCC is free software; you can redistribute it and/or modify it ++;; under the terms of the GNU General Public License as published by ++;; the Free Software Foundation; either version 3, or (at your option) ++;; any later version. ++;; ++;; GCC is distributed in the hope that it will be useful, but ++;; WITHOUT ANY WARRANTY; without even the implied warranty of ++;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++;; General Public License for more details. ++;; ++;; You should have received a copy of the GNU General Public License ++;; along with GCC; see the file COPYING3. If not see ++;; <http://www.gnu.org/licenses/>. ++ ++;; The file is organised into the following sections (search for the full ++;; line): ++;; ++;; == State management ++;; ---- Test current state ++;; ---- PSTATE.SM management ++ ++;; ========================================================================= ++;; == State management ++;; ========================================================================= ++;; ++;; Many of the instructions in this section are only valid when SME is ++;; present. However, they don't have a TARGET_SME condition since ++;; (a) they are only emitted under direct control of aarch64 code and ++;; (b) they are sometimes used conditionally, particularly in streaming- ++;; compatible code. ++;; ++;; ========================================================================= ++ ++;; ------------------------------------------------------------------------- ++;; ---- Test current state ++;; ------------------------------------------------------------------------- ++ ++(define_c_enum "unspec" ++ UNSPEC_OLD_VG_SAVED ++ UNSPEC_UPDATE_VG ++ UNSPEC_GET_SME_STATE ++ UNSPEC_READ_SVCR ++) ++ ++;; A marker instruction to say that the old value of the DWARF VG register ++;; has been saved to the stack, for CFI purposes. Operand 0 is the old ++;; value of the register and operand 1 is the save slot. ++(define_insn "aarch64_old_vg_saved" ++ (set (reg:DI VG_REGNUM) ++ (unspec:DI (match_operand 0) ++ (match_operand 1) UNSPEC_OLD_VG_SAVED)) ++ "" ++ "" ++ (set_attr "type" "no_insn") ++) ++ ++;; A marker to indicate places where a call temporarily changes VG. ++(define_insn "aarch64_update_vg" ++ (set (reg:DI VG_REGNUM) ++ (unspec:DI (reg:DI VG_REGNUM) UNSPEC_UPDATE_VG)) ++ "" ++ "" ++ (set_attr "type" "no_insn") ++) ++ ++(define_insn "aarch64_get_sme_state" ++ (set (reg:TI R0_REGNUM) ++ (unspec_volatile:TI (const_int 0) UNSPEC_GET_SME_STATE)) ++ (clobber (reg:DI R16_REGNUM)) ++ (clobber (reg:DI R17_REGNUM)) ++ (clobber (reg:DI R18_REGNUM)) ++ (clobber (reg:DI R30_REGNUM)) ++ (clobber (reg:CC CC_REGNUM)) ++ "" ++ "bl\t__arm_sme_state" ++) ++ ++(define_insn "aarch64_read_svcr" ++ (set (match_operand:DI 0 "register_operand" "=r") ++ (unspec_volatile:DI (const_int 0) UNSPEC_READ_SVCR)) ++ "" ++ "mrs\t%0, svcr" ++) ++ ++;; ------------------------------------------------------------------------- ++;; ---- PSTATE.SM management ++;; ------------------------------------------------------------------------- ++;; Includes: ++;; - SMSTART SM ++;; - SMSTOP SM ++;; ------------------------------------------------------------------------- ++ ++(define_c_enum "unspec" ++ UNSPEC_SMSTART_SM ++ UNSPEC_SMSTOP_SM ++) ++ ++;; Turn on streaming mode. This clobbers all SVE state. ++;; ++;; Depend on VG_REGNUM to ensure that the VG save slot has already been ++;; initialized. ++(define_insn "aarch64_smstart_sm" ++ (unspec_volatile (const_int 0) UNSPEC_SMSTART_SM) ++ (use (reg:DI VG_REGNUM)) ++ (clobber (reg:V4x16QI V0_REGNUM)) ++ (clobber (reg:V4x16QI V4_REGNUM)) ++ (clobber (reg:V4x16QI V8_REGNUM)) ++ (clobber (reg:V4x16QI V12_REGNUM)) ++ (clobber (reg:V4x16QI V16_REGNUM)) ++ (clobber (reg:V4x16QI V20_REGNUM)) ++ (clobber (reg:V4x16QI V24_REGNUM)) ++ (clobber (reg:V4x16QI V28_REGNUM)) ++ (clobber (reg:VNx16BI P0_REGNUM)) ++ (clobber (reg:VNx16BI P1_REGNUM)) ++ (clobber (reg:VNx16BI P2_REGNUM)) ++ (clobber (reg:VNx16BI P3_REGNUM)) ++ (clobber (reg:VNx16BI P4_REGNUM)) ++ (clobber (reg:VNx16BI P5_REGNUM)) ++ (clobber (reg:VNx16BI P6_REGNUM)) ++ (clobber (reg:VNx16BI P7_REGNUM)) ++ (clobber (reg:VNx16BI P8_REGNUM)) ++ (clobber (reg:VNx16BI P9_REGNUM)) ++ (clobber (reg:VNx16BI P10_REGNUM)) ++ (clobber (reg:VNx16BI P11_REGNUM)) ++ (clobber (reg:VNx16BI P12_REGNUM)) ++ (clobber (reg:VNx16BI P13_REGNUM)) ++ (clobber (reg:VNx16BI P14_REGNUM)) ++ (clobber (reg:VNx16BI P15_REGNUM)) ++ "" ++ "smstart\tsm" ++) ++ ++;; Turn off streaming mode. This clobbers all SVE state. ++;; ++;; Depend on VG_REGNUM to ensure that the VG save slot has already been ++;; initialized. ++(define_insn "aarch64_smstop_sm" ++ (unspec_volatile (const_int 0) UNSPEC_SMSTOP_SM) ++ (use (reg:DI VG_REGNUM)) ++ (clobber (reg:V4x16QI V0_REGNUM)) ++ (clobber (reg:V4x16QI V4_REGNUM)) ++ (clobber (reg:V4x16QI V8_REGNUM)) ++ (clobber (reg:V4x16QI V12_REGNUM)) ++ (clobber (reg:V4x16QI V16_REGNUM)) ++ (clobber (reg:V4x16QI V20_REGNUM)) ++ (clobber (reg:V4x16QI V24_REGNUM)) ++ (clobber (reg:V4x16QI V28_REGNUM)) ++ (clobber (reg:VNx16BI P0_REGNUM)) ++ (clobber (reg:VNx16BI P1_REGNUM)) ++ (clobber (reg:VNx16BI P2_REGNUM)) ++ (clobber (reg:VNx16BI P3_REGNUM)) ++ (clobber (reg:VNx16BI P4_REGNUM)) ++ (clobber (reg:VNx16BI P5_REGNUM)) ++ (clobber (reg:VNx16BI P6_REGNUM)) ++ (clobber (reg:VNx16BI P7_REGNUM)) ++ (clobber (reg:VNx16BI P8_REGNUM)) ++ (clobber (reg:VNx16BI P9_REGNUM)) ++ (clobber (reg:VNx16BI P10_REGNUM)) ++ (clobber (reg:VNx16BI P11_REGNUM)) ++ (clobber (reg:VNx16BI P12_REGNUM)) ++ (clobber (reg:VNx16BI P13_REGNUM)) ++ (clobber (reg:VNx16BI P14_REGNUM)) ++ (clobber (reg:VNx16BI P15_REGNUM)) ++ "" ++ "smstop\tsm" ++) +diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc +index 1c127192d..82f8e574e 100644 +--- a/gcc/config/aarch64/aarch64.cc ++++ b/gcc/config/aarch64/aarch64.cc +@@ -82,6 +82,8 @@ + #include "tree-dfa.h" + #include "asan.h" + #include "aarch64-feature-deps.h" ++#include "tree-pass.h" ++#include "cfgbuild.h" + + /* This file should be included last. */ + #include "target-def.h" +@@ -4377,6 +4379,26 @@ aarch64_fndecl_isa_mode (const_tree fndecl) + return aarch64_fndecl_pstate_sm (fndecl); + } + ++/* Return the state of PSTATE.SM on entry to the current function. ++ This might be different from the state of PSTATE.SM in the function ++ body. */ ++ ++static aarch64_feature_flags ++aarch64_cfun_incoming_pstate_sm () ++{ ++ return aarch64_fntype_pstate_sm (TREE_TYPE (cfun->decl)); ++} ++ ++/* Return true if a call from the current function to a function with ++ ISA mode CALLEE_MODE would involve a change to PSTATE.SM around ++ the BL instruction. */ ++ ++static bool ++aarch64_call_switches_pstate_sm (aarch64_feature_flags callee_mode) ++{ ++ return (callee_mode & ~AARCH64_ISA_MODE & AARCH64_FL_SM_STATE) != 0; ++} ++ + /* Implement TARGET_COMPATIBLE_VECTOR_TYPES_P. */ + + static bool +@@ -4400,7 +4422,7 @@ aarch64_emit_cfi_for_reg_p (unsigned int regno) + static machine_mode + aarch64_reg_save_mode (unsigned int regno) + { +- if (GP_REGNUM_P (regno)) ++ if (GP_REGNUM_P (regno) || regno == VG_REGNUM) + return DImode; + + if (FP_REGNUM_P (regno)) +@@ -4459,6 +4481,16 @@ aarch64_callee_abi (rtx cookie) + return function_abisUINTVAL (cookie) >> AARCH64_NUM_ISA_MODES; + } + ++/* COOKIE is a CONST_INT from an UNSPEC_CALLEE_ABI rtx. Return the ++ required ISA mode on entry to the callee, which is also the ISA ++ mode on return from the callee. */ ++ ++static aarch64_feature_flags ++aarch64_callee_isa_mode (rtx cookie) ++{ ++ return UINTVAL (cookie) & AARCH64_FL_ISA_MODES; ++} ++ + /* INSN is a call instruction. Return the CONST_INT stored in its + UNSPEC_CALLEE_ABI rtx. */ + +@@ -4481,6 +4513,15 @@ aarch64_insn_callee_abi (const rtx_insn *insn) + return aarch64_callee_abi (aarch64_insn_callee_cookie (insn)); + } + ++/* INSN is a call instruction. Return the required ISA mode on entry to ++ the callee, which is also the ISA mode on return from the callee. */ ++ ++static aarch64_feature_flags ++aarch64_insn_callee_isa_mode (const rtx_insn *insn) ++{ ++ return aarch64_callee_isa_mode (aarch64_insn_callee_cookie (insn)); ++} ++ + /* Implement TARGET_HARD_REGNO_CALL_PART_CLOBBERED. The callee only saves + the lower 64 bits of a 128-bit register. Tell the compiler the callee + clobbers the top 64 bits when restoring the bottom 64 bits. */ +@@ -6645,6 +6686,437 @@ aarch64_sub_sp (rtx temp1, rtx temp2, poly_int64 delta, bool frame_related_p, + temp1, temp2, frame_related_p, emit_move_imm); + } + ++/* A streaming-compatible function needs to switch temporarily to the known ++ PSTATE.SM mode described by LOCAL_MODE. The low bit of OLD_SVCR contains ++ the runtime state of PSTATE.SM in the streaming-compatible code, before ++ the start of the switch to LOCAL_MODE. ++ ++ Emit instructions to branch around the mode switch if PSTATE.SM already ++ matches LOCAL_MODE. Return the label that the branch jumps to. */ ++ ++static rtx_insn * ++aarch64_guard_switch_pstate_sm (rtx old_svcr, aarch64_feature_flags local_mode) ++{ ++ local_mode &= AARCH64_FL_SM_STATE; ++ gcc_assert (local_mode != 0); ++ auto already_ok_cond = (local_mode & AARCH64_FL_SM_ON ? NE : EQ); ++ auto *label = gen_label_rtx (); ++ auto *jump = emit_jump_insn (gen_aarch64_tb (already_ok_cond, DImode, DImode, ++ old_svcr, const0_rtx, label)); ++ JUMP_LABEL (jump) = label; ++ return label; ++} ++ ++/* Emit code to switch from the PSTATE.SM state in OLD_MODE to the PSTATE.SM ++ state in NEW_MODE. This is known to involve either an SMSTART SM or ++ an SMSTOP SM. */ ++ ++static void ++aarch64_switch_pstate_sm (aarch64_feature_flags old_mode, ++ aarch64_feature_flags new_mode) ++{ ++ old_mode &= AARCH64_FL_SM_STATE; ++ new_mode &= AARCH64_FL_SM_STATE; ++ gcc_assert (old_mode != new_mode); ++ ++ if ((new_mode & AARCH64_FL_SM_ON) ++ || (new_mode == 0 && (old_mode & AARCH64_FL_SM_OFF))) ++ emit_insn (gen_aarch64_smstart_sm ()); ++ else ++ emit_insn (gen_aarch64_smstop_sm ()); ++} ++ ++/* As a side-effect, SMSTART SM and SMSTOP SM clobber the contents of all ++ FP and predicate registers. This class emits code to preserve any ++ necessary registers around the mode switch. ++ ++ The class uses four approaches to saving and restoring contents, enumerated ++ by group_type: ++ ++ - GPR: save and restore the contents of FP registers using GPRs. ++ This is used if the FP register contains no more than 64 significant ++ bits. The registers used are FIRST_GPR onwards. ++ ++ - MEM_128: save and restore 128-bit SIMD registers using memory. ++ ++ - MEM_SVE_PRED: save and restore full SVE predicate registers using memory. ++ ++ - MEM_SVE_DATA: save and restore full SVE vector registers using memory. ++ ++ The save slots within each memory group are consecutive, with the ++ MEM_SVE_PRED slots occupying a region below the MEM_SVE_DATA slots. ++ ++ There will only be two mode switches for each use of SME, so they should ++ not be particularly performance-sensitive. It's also rare for SIMD, SVE ++ or predicate registers to be live across mode switches. We therefore ++ don't preallocate the save slots but instead allocate them locally on ++ demand. This makes the code emitted by the class self-contained. */ ++ ++class aarch64_sme_mode_switch_regs ++{ ++public: ++ static const unsigned int FIRST_GPR = R10_REGNUM; ++ ++ void add_reg (machine_mode, unsigned int); ++ void add_call_args (rtx_call_insn *); ++ void add_call_result (rtx_call_insn *); ++ ++ void emit_prologue (); ++ void emit_epilogue (); ++ ++ /* The number of GPRs needed to save FP registers, starting from ++ FIRST_GPR. */ ++ unsigned int num_gprs () { return m_group_countGPR; } ++ ++private: ++ enum sequence { PROLOGUE, EPILOGUE }; ++ enum group_type { GPR, MEM_128, MEM_SVE_PRED, MEM_SVE_DATA, NUM_GROUPS }; ++ ++ /* Information about the save location for one FP, SIMD, SVE data, or ++ SVE predicate register. */ ++ struct save_location { ++ /* The register to be saved. */ ++ rtx reg; ++ ++ /* Which group the save location belongs to. */ ++ group_type group; ++ ++ /* A zero-based index of the register within the group. */ ++ unsigned int index; ++ }; ++ ++ unsigned int sve_data_headroom (); ++ rtx get_slot_mem (machine_mode, poly_int64); ++ void emit_stack_adjust (sequence, poly_int64); ++ void emit_mem_move (sequence, const save_location &, poly_int64); ++ ++ void emit_gpr_moves (sequence); ++ void emit_mem_128_moves (sequence); ++ void emit_sve_sp_adjust (sequence); ++ void emit_sve_pred_moves (sequence); ++ void emit_sve_data_moves (sequence); ++ ++ /* All save locations, in no particular order. */ ++ auto_vec<save_location, 12> m_save_locations; ++ ++ /* The number of registers in each group. */ ++ unsigned int m_group_countNUM_GROUPS = {}; ++}; ++ ++/* Record that (reg:MODE REGNO) needs to be preserved around the mode ++ switch. */ ++ ++void ++aarch64_sme_mode_switch_regs::add_reg (machine_mode mode, unsigned int regno) ++{ ++ if (!FP_REGNUM_P (regno) && !PR_REGNUM_P (regno)) ++ return; ++ ++ unsigned int end_regno = end_hard_regno (mode, regno); ++ unsigned int vec_flags = aarch64_classify_vector_mode (mode); ++ gcc_assert ((vec_flags & VEC_STRUCT) || end_regno == regno + 1); ++ for (; regno < end_regno; regno++) ++ { ++ machine_mode submode = mode; ++ if (vec_flags & VEC_STRUCT) ++ { ++ if (vec_flags & VEC_SVE_DATA) ++ submode = SVE_BYTE_MODE; ++ else if (vec_flags & VEC_PARTIAL) ++ submode = V8QImode; ++ else ++ submode = V16QImode; ++ } ++ save_location loc; ++ loc.reg = gen_rtx_REG (submode, regno); ++ if (vec_flags == VEC_SVE_PRED) ++ { ++ gcc_assert (PR_REGNUM_P (regno)); ++ loc.group = MEM_SVE_PRED; ++ } ++ else ++ { ++ gcc_assert (FP_REGNUM_P (regno)); ++ if (known_le (GET_MODE_SIZE (submode), 8)) ++ loc.group = GPR; ++ else if (known_eq (GET_MODE_SIZE (submode), 16)) ++ loc.group = MEM_128; ++ else ++ loc.group = MEM_SVE_DATA; ++ } ++ loc.index = m_group_countloc.group++; ++ m_save_locations.quick_push (loc); ++ } ++} ++ ++/* Record that the arguments to CALL_INSN need to be preserved around ++ the mode switch. */ ++ ++void ++aarch64_sme_mode_switch_regs::add_call_args (rtx_call_insn *call_insn) ++{ ++ for (rtx node = CALL_INSN_FUNCTION_USAGE (call_insn); ++ node; node = XEXP (node, 1)) ++ { ++ rtx item = XEXP (node, 0); ++ if (GET_CODE (item) != USE) ++ continue; ++ item = XEXP (item, 0); ++ if (!REG_P (item)) ++ continue; ++ add_reg (GET_MODE (item), REGNO (item)); ++ } ++} ++ ++/* Record that the return value from CALL_INSN (if any) needs to be ++ preserved around the mode switch. */ ++ ++void ++aarch64_sme_mode_switch_regs::add_call_result (rtx_call_insn *call_insn) ++{ ++ rtx pat = PATTERN (call_insn); ++ gcc_assert (GET_CODE (pat) == PARALLEL); ++ pat = XVECEXP (pat, 0, 0); ++ if (GET_CODE (pat) == CALL) ++ return; ++ rtx dest = SET_DEST (pat); ++ if (GET_CODE (dest) == PARALLEL) ++ for (int i = 0; i < XVECLEN (dest, 0); ++i) ++ { ++ rtx x = XVECEXP (dest, 0, i); ++ gcc_assert (GET_CODE (x) == EXPR_LIST); ++ rtx reg = XEXP (x, 0); ++ add_reg (GET_MODE (reg), REGNO (reg)); ++ } ++ else ++ add_reg (GET_MODE (dest), REGNO (dest)); ++} ++ ++/* Emit code to save registers before the mode switch. */ ++ ++void ++aarch64_sme_mode_switch_regs::emit_prologue () ++{ ++ emit_sve_sp_adjust (PROLOGUE); ++ emit_sve_pred_moves (PROLOGUE); ++ emit_sve_data_moves (PROLOGUE); ++ emit_mem_128_moves (PROLOGUE); ++ emit_gpr_moves (PROLOGUE); ++} ++ ++/* Emit code to restore registers after the mode switch. */ ++ ++void ++aarch64_sme_mode_switch_regs::emit_epilogue () ++{ ++ emit_gpr_moves (EPILOGUE); ++ emit_mem_128_moves (EPILOGUE); ++ emit_sve_pred_moves (EPILOGUE); ++ emit_sve_data_moves (EPILOGUE); ++ emit_sve_sp_adjust (EPILOGUE); ++} ++ ++/* The SVE predicate registers are stored below the SVE data registers, ++ with the predicate save area being padded to a data-register-sized ++ boundary. Return the size of this padded area as a whole number ++ of data register slots. */ ++ ++unsigned int ++aarch64_sme_mode_switch_regs::sve_data_headroom () ++{ ++ return CEIL (m_group_countMEM_SVE_PRED, 8); ++} ++ ++/* Return a memory reference of mode MODE to OFFSET bytes from the ++ stack pointer. */ ++ ++rtx ++aarch64_sme_mode_switch_regs::get_slot_mem (machine_mode mode, ++ poly_int64 offset) ++{ ++ rtx addr = plus_constant (Pmode, stack_pointer_rtx, offset); ++ return gen_rtx_MEM (mode, addr); ++} ++ ++/* Allocate or deallocate SIZE bytes of stack space: SEQ decides which. */ ++ ++void ++aarch64_sme_mode_switch_regs::emit_stack_adjust (sequence seq, ++ poly_int64 size) ++{ ++ if (seq == PROLOGUE) ++ size = -size; ++ emit_insn (gen_rtx_SET (stack_pointer_rtx, ++ plus_constant (Pmode, stack_pointer_rtx, size))); ++} ++ ++/* Save or restore the register in LOC, whose slot is OFFSET bytes from ++ the stack pointer. SEQ chooses between saving and restoring. */ ++ ++void ++aarch64_sme_mode_switch_regs::emit_mem_move (sequence seq, ++ const save_location &loc, ++ poly_int64 offset) ++{ ++ rtx mem = get_slot_mem (GET_MODE (loc.reg), offset); ++ if (seq == PROLOGUE) ++ emit_move_insn (mem, loc.reg); ++ else ++ emit_move_insn (loc.reg, mem); ++} ++ ++/* Emit instructions to save or restore the GPR group. SEQ chooses between ++ saving and restoring. */ ++ ++void ++aarch64_sme_mode_switch_regs::emit_gpr_moves (sequence seq) ++{ ++ for (auto &loc : m_save_locations) ++ if (loc.group == GPR) ++ { ++ gcc_assert (loc.index < 8); ++ rtx gpr = gen_rtx_REG (GET_MODE (loc.reg), FIRST_GPR + loc.index); ++ if (seq == PROLOGUE) ++ emit_move_insn (gpr, loc.reg); ++ else ++ emit_move_insn (loc.reg, gpr); ++ } ++} ++ ++/* Emit instructions to save or restore the MEM_128 group. SEQ chooses ++ between saving and restoring. */ ++ ++void ++aarch64_sme_mode_switch_regs::emit_mem_128_moves (sequence seq) ++{ ++ HOST_WIDE_INT count = m_group_countMEM_128; ++ if (count == 0) ++ return; ++ ++ auto sp = stack_pointer_rtx; ++ auto sp_adjust = (seq == PROLOGUE ? -count : count) * 16; ++ ++ /* Pick a common mode that supports LDR & STR with pre/post-modification ++ and LDP & STP with pre/post-modification. */ ++ auto mode = TFmode; ++ ++ /* An instruction pattern that should be emitted at the end. */ ++ rtx last_pat = NULL_RTX; ++ ++ /* A previous MEM_128 location that hasn't been handled yet. */ ++ save_location *prev_loc = nullptr; ++ ++ /* Look for LDP/STPs and record any leftover LDR/STR in PREV_LOC. */ ++ for (auto &loc : m_save_locations) ++ if (loc.group == MEM_128) ++ { ++ if (!prev_loc) ++ { ++ prev_loc = &loc; ++ continue; ++ } ++ gcc_assert (loc.index == prev_loc->index + 1); ++ ++ /* The offset of the base of the save area from the current ++ stack pointer. */ ++ HOST_WIDE_INT bias = 0; ++ if (prev_loc->index == 0 && seq == PROLOGUE) ++ bias = sp_adjust; ++ ++ /* Get the two sets in the LDP/STP. */ ++ rtx ops = { ++ gen_rtx_REG (mode, REGNO (prev_loc->reg)), ++ get_slot_mem (mode, prev_loc->index * 16 + bias), ++ gen_rtx_REG (mode, REGNO (loc.reg)), ++ get_slot_mem (mode, loc.index * 16 + bias) ++ }; ++ unsigned int lhs = (seq == PROLOGUE); ++ rtx set1 = gen_rtx_SET (opslhs, ops1 - lhs); ++ rtx set2 = gen_rtx_SET (opslhs + 2, ops3 - lhs); ++ ++ /* Combine the sets with any stack allocation/deallocation. */ ++ rtvec vec; ++ if (prev_loc->index == 0) ++ { ++ rtx plus_sp = plus_constant (Pmode, sp, sp_adjust); ++ vec = gen_rtvec (3, gen_rtx_SET (sp, plus_sp), set1, set2); ++ } ++ else ++ vec = gen_rtvec (2, set1, set2); ++ rtx pat = gen_rtx_PARALLEL (VOIDmode, vec); ++ ++ /* Queue a deallocation to the end, otherwise emit the ++ instruction now. */ ++ if (seq == EPILOGUE && prev_loc->index == 0) ++ last_pat = pat; ++ else ++ emit_insn (pat); ++ prev_loc = nullptr; ++ } ++ ++ /* Handle any leftover LDR/STR. */ ++ if (prev_loc) ++ { ++ rtx reg = gen_rtx_REG (mode, REGNO (prev_loc->reg)); ++ rtx addr; ++ if (prev_loc->index != 0) ++ addr = plus_constant (Pmode, sp, prev_loc->index * 16); ++ else if (seq == PROLOGUE) ++ { ++ rtx allocate = plus_constant (Pmode, sp, -count * 16); ++ addr = gen_rtx_PRE_MODIFY (Pmode, sp, allocate); ++ } ++ else ++ { ++ rtx deallocate = plus_constant (Pmode, sp, count * 16); ++ addr = gen_rtx_POST_MODIFY (Pmode, sp, deallocate); ++ } ++ rtx mem = gen_rtx_MEM (mode, addr); ++ if (seq == PROLOGUE) ++ emit_move_insn (mem, reg); ++ else ++ emit_move_insn (reg, mem); ++ } ++ ++ if (last_pat) ++ emit_insn (last_pat); ++} ++ ++/* Allocate or deallocate the stack space needed by the SVE groups. ++ SEQ chooses between allocating and deallocating. */ ++ ++void ++aarch64_sme_mode_switch_regs::emit_sve_sp_adjust (sequence seq) ++{ ++ if (unsigned int count = m_group_countMEM_SVE_DATA + sve_data_headroom ()) ++ emit_stack_adjust (seq, count * BYTES_PER_SVE_VECTOR); ++} ++ ++/* Save or restore the MEM_SVE_DATA group. SEQ chooses between saving ++ and restoring. */ ++ ++void ++aarch64_sme_mode_switch_regs::emit_sve_data_moves (sequence seq) ++{ ++ for (auto &loc : m_save_locations) ++ if (loc.group == MEM_SVE_DATA) ++ { ++ auto index = loc.index + sve_data_headroom (); ++ emit_mem_move (seq, loc, index * BYTES_PER_SVE_VECTOR); ++ } ++} ++ ++/* Save or restore the MEM_SVE_PRED group. SEQ chooses between saving ++ and restoring. */ ++ ++void ++aarch64_sme_mode_switch_regs::emit_sve_pred_moves (sequence seq) ++{ ++ for (auto &loc : m_save_locations) ++ if (loc.group == MEM_SVE_PRED) ++ emit_mem_move (seq, loc, loc.index * BYTES_PER_SVE_PRED); ++} ++ + /* Set DEST to (vec_series BASE STEP). */ + + static void +@@ -8211,6 +8683,40 @@ on_stack: + return; + } + ++/* Add the current argument register to the set of those that need ++ to be saved and restored around a change to PSTATE.SM. */ ++ ++static void ++aarch64_record_sme_mode_switch_args (CUMULATIVE_ARGS *pcum) ++{ ++ subrtx_var_iterator::array_type array; ++ FOR_EACH_SUBRTX_VAR (iter, array, pcum->aapcs_reg, NONCONST) ++ { ++ rtx x = *iter; ++ if (REG_P (x) && (FP_REGNUM_P (REGNO (x)) || PR_REGNUM_P (REGNO (x)))) ++ { ++ unsigned int i = pcum->num_sme_mode_switch_args++; ++ gcc_assert (i < ARRAY_SIZE (pcum->sme_mode_switch_args)); ++ pcum->sme_mode_switch_argsi = x; ++ } ++ } ++} ++ ++/* Return a parallel that contains all the registers that need to be ++ saved around a change to PSTATE.SM. Return const0_rtx if there is ++ no such mode switch, or if no registers need to be saved. */ ++ ++static rtx ++aarch64_finish_sme_mode_switch_args (CUMULATIVE_ARGS *pcum) ++{ ++ if (!pcum->num_sme_mode_switch_args) ++ return const0_rtx; ++ ++ auto argvec = gen_rtvec_v (pcum->num_sme_mode_switch_args, ++ pcum->sme_mode_switch_args); ++ return gen_rtx_PARALLEL (VOIDmode, argvec); ++} ++ + /* Implement TARGET_FUNCTION_ARG. */ + + static rtx +@@ -8222,7 +8728,13 @@ aarch64_function_arg (cumulative_args_t pcum_v, const function_arg_info &arg) + || pcum->pcs_variant == ARM_PCS_SVE); + + if (arg.end_marker_p ()) +- return aarch64_gen_callee_cookie (pcum->isa_mode, pcum->pcs_variant); ++ { ++ rtx abi_cookie = aarch64_gen_callee_cookie (pcum->isa_mode, ++ pcum->pcs_variant); ++ rtx sme_mode_switch_args = aarch64_finish_sme_mode_switch_args (pcum); ++ return gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, abi_cookie, ++ sme_mode_switch_args)); ++ } + + aarch64_layout_arg (pcum_v, arg); + return pcum->aapcs_reg; +@@ -8257,6 +8769,7 @@ aarch64_init_cumulative_args (CUMULATIVE_ARGS *pcum, + pcum->aapcs_stack_words = 0; + pcum->aapcs_stack_size = 0; + pcum->silent_p = silent_p; ++ pcum->num_sme_mode_switch_args = 0; + + if (!silent_p + && !TARGET_FLOAT +@@ -8297,6 +8810,10 @@ aarch64_function_arg_advance (cumulative_args_t pcum_v, + aarch64_layout_arg (pcum_v, arg); + gcc_assert ((pcum->aapcs_reg != NULL_RTX) + != (pcum->aapcs_stack_words != 0)); ++ if (pcum->aapcs_reg ++ && aarch64_call_switches_pstate_sm (pcum->isa_mode)) ++ aarch64_record_sme_mode_switch_args (pcum); ++ + pcum->aapcs_arg_processed = false; + pcum->aapcs_ncrn = pcum->aapcs_nextncrn; + pcum->aapcs_nvrn = pcum->aapcs_nextnvrn; +@@ -8747,6 +9264,30 @@ aarch64_save_regs_above_locals_p () + return crtl->stack_protect_guard; + } + ++/* Return true if the current function needs to record the incoming ++ value of PSTATE.SM. */ ++static bool ++aarch64_need_old_pstate_sm () ++{ ++ /* Exit early if the incoming value of PSTATE.SM is known at ++ compile time. */ ++ if (aarch64_cfun_incoming_pstate_sm () != 0) ++ return false; ++ ++ if (cfun->machine->call_switches_pstate_sm) ++ for (auto insn = get_insns (); insn; insn = NEXT_INSN (insn)) ++ if (auto *call = dyn_cast<rtx_call_insn *> (insn)) ++ if (!SIBLING_CALL_P (call)) ++ { ++ /* Return true if there is a call to a non-streaming-compatible ++ function. */ ++ auto callee_isa_mode = aarch64_insn_callee_isa_mode (call); ++ if (aarch64_call_switches_pstate_sm (callee_isa_mode)) ++ return true; ++ } ++ return false; ++} ++ + /* Mark the registers that need to be saved by the callee and calculate + the size of the callee-saved registers area and frame record (both FP + and LR may be omitted). */ +@@ -8780,6 +9321,7 @@ aarch64_layout_frame (void) + /* First mark all the registers that really need to be saved... */ + for (regno = 0; regno <= LAST_SAVED_REGNUM; regno++) + frame.reg_offsetregno = SLOT_NOT_REQUIRED; ++ frame.old_svcr_offset = SLOT_NOT_REQUIRED; + + /* ... that includes the eh data registers (if needed)... */ + if (crtl->calls_eh_return) +@@ -8932,6 +9474,21 @@ aarch64_layout_frame (void) + if (known_eq (frame.reg_offsetregno, SLOT_REQUIRED)) + allocate_gpr_slot (regno); + ++ if (aarch64_need_old_pstate_sm ()) ++ { ++ frame.old_svcr_offset = offset; ++ offset += UNITS_PER_WORD; ++ } ++ ++ /* If the current function changes the SVE vector length, ensure that the ++ old value of the DWARF VG register is saved and available in the CFI, ++ so that outer frames with VL-sized offsets can be processed correctly. */ ++ if (cfun->machine->call_switches_pstate_sm) ++ { ++ frame.reg_offsetVG_REGNUM = offset; ++ offset += UNITS_PER_WORD; ++ } ++ + poly_int64 max_int_offset = offset; + offset = aligned_upper_bound (offset, STACK_BOUNDARY / BITS_PER_UNIT); + bool has_align_gap = maybe_ne (offset, max_int_offset); +@@ -8969,8 +9526,6 @@ aarch64_layout_frame (void) + if (push_regs.size () > 1) + frame.wb_push_candidate2 = push_regs1; + } +- else +- gcc_assert (known_eq (saved_regs_size, below_hard_fp_saved_regs_size)); + + /* With stack-clash, a register must be saved in non-leaf functions. + The saving of the bottommost register counts as an implicit probe, +@@ -9078,7 +9633,8 @@ aarch64_layout_frame (void) + frame.initial_adjust = frame.frame_size - frame.bytes_below_saved_regs; + frame.final_adjust = frame.bytes_below_saved_regs; + } +- else if (frame.bytes_above_hard_fp.is_constant (&const_above_fp) ++ else if (frame.wb_push_candidate1 != INVALID_REGNUM ++ && frame.bytes_above_hard_fp.is_constant (&const_above_fp) + && const_above_fp < max_push_offset) + { + /* Frame with large area below the saved registers, or with SVE saves, +@@ -9459,7 +10015,13 @@ aarch64_save_callee_saves (poly_int64 bytes_below_sp, + + machine_mode mode = aarch64_reg_save_mode (regno); + rtx reg = gen_rtx_REG (mode, regno); ++ rtx move_src = reg; + offset = frame.reg_offsetregno - bytes_below_sp; ++ if (regno == VG_REGNUM) ++ { ++ move_src = gen_rtx_REG (DImode, IP0_REGNUM); ++ emit_move_insn (move_src, gen_int_mode (aarch64_sve_vg, DImode)); ++ } + rtx base_rtx = stack_pointer_rtx; + poly_int64 sp_offset = offset; + +@@ -9467,7 +10029,7 @@ aarch64_save_callee_saves (poly_int64 bytes_below_sp, + if (mode == VNx2DImode && BYTES_BIG_ENDIAN) + aarch64_adjust_sve_callee_save_base (mode, base_rtx, anchor_reg, + offset, ptrue); +- else if (GP_REGNUM_P (regno) ++ else if (GP_REGNUM_P (REGNO (reg)) + && (!offset.is_constant (&const_offset) || const_offset >= 512)) + { + poly_int64 fp_offset = frame.bytes_below_hard_fp - bytes_below_sp; +@@ -9490,6 +10052,7 @@ aarch64_save_callee_saves (poly_int64 bytes_below_sp, + + unsigned int regno2; + if (!aarch64_sve_mode_p (mode) ++ && reg == move_src + && i + 1 < regs.size () + && (regno2 = regsi + 1, !skip_save_p (regno2)) + && known_eq (GET_MODE_SIZE (mode), +@@ -9521,17 +10084,24 @@ aarch64_save_callee_saves (poly_int64 bytes_below_sp, + } + else if (mode == VNx2DImode && BYTES_BIG_ENDIAN) + { +- insn = emit_insn (gen_aarch64_pred_mov (mode, mem, ptrue, reg)); ++ insn = emit_insn (gen_aarch64_pred_mov (mode, mem, ptrue, move_src)); + need_cfa_note_p = true; + } + else if (aarch64_sve_mode_p (mode)) +- insn = emit_insn (gen_rtx_SET (mem, reg)); ++ insn = emit_insn (gen_rtx_SET (mem, move_src)); + else +- insn = emit_move_insn (mem, reg); ++ insn = emit_move_insn (mem, move_src); + + RTX_FRAME_RELATED_P (insn) = frame_related_p; + if (frame_related_p && need_cfa_note_p) + aarch64_add_cfa_expression (insn, reg, stack_pointer_rtx, sp_offset); ++ else if (frame_related_p && move_src != reg) ++ add_reg_note (insn, REG_FRAME_RELATED_EXPR, gen_rtx_SET (mem, reg)); ++ ++ /* Emit a fake instruction to indicate that the VG save slot has ++ been initialized. */ ++ if (regno == VG_REGNUM) ++ emit_insn (gen_aarch64_old_vg_saved (move_src, mem)); + } + } + +@@ -9754,6 +10324,10 @@ aarch64_get_separate_components (void) + bitmap_clear_bit (components, frame.hard_fp_save_and_probe); + } + ++ /* The VG save sequence needs a temporary GPR. Punt for now on trying ++ to find one. */ ++ bitmap_clear_bit (components, VG_REGNUM); ++ + return components; + } + +@@ -10249,6 +10823,47 @@ aarch64_epilogue_uses (int regno) + return 0; + } + ++/* The current function's frame has a save slot for the incoming state ++ of SVCR. Return a legitimate memory for the slot, based on the hard ++ frame pointer. */ ++ ++static rtx ++aarch64_old_svcr_mem () ++{ ++ gcc_assert (frame_pointer_needed ++ && known_ge (cfun->machine->frame.old_svcr_offset, 0)); ++ rtx base = hard_frame_pointer_rtx; ++ poly_int64 offset = (0 ++ /* hard fp -> bottom of frame. */ ++ - cfun->machine->frame.bytes_below_hard_fp ++ /* bottom of frame -> save slot. */ ++ + cfun->machine->frame.old_svcr_offset); ++ return gen_frame_mem (DImode, plus_constant (Pmode, base, offset)); ++} ++ ++/* The current function's frame has a save slot for the incoming state ++ of SVCR. Load the slot into register REGNO and return the register. */ ++ ++static rtx ++aarch64_read_old_svcr (unsigned int regno) ++{ ++ rtx svcr = gen_rtx_REG (DImode, regno); ++ emit_move_insn (svcr, aarch64_old_svcr_mem ()); ++ return svcr; ++} ++ ++/* Like the rtx version of aarch64_guard_switch_pstate_sm, but first ++ load the incoming value of SVCR from its save slot into temporary ++ register REGNO. */ ++ ++static rtx_insn * ++aarch64_guard_switch_pstate_sm (unsigned int regno, ++ aarch64_feature_flags local_mode) ++{ ++ rtx old_svcr = aarch64_read_old_svcr (regno); ++ return aarch64_guard_switch_pstate_sm (old_svcr, local_mode); ++} ++ + /* AArch64 stack frames generated by this compiler look like: + + +-------------------------------+ +@@ -10463,6 +11078,12 @@ aarch64_expand_prologue (void) + + aarch64_save_callee_saves (bytes_below_sp, frame.saved_gprs, true, + emit_frame_chain); ++ if (maybe_ge (frame.reg_offsetVG_REGNUM, 0)) ++ { ++ unsigned int saved_regs = { VG_REGNUM }; ++ aarch64_save_callee_saves (bytes_below_sp, saved_regs, true, ++ emit_frame_chain); ++ } + if (maybe_ne (sve_callee_adjust, 0)) + { + gcc_assert (!flag_stack_clash_protection +@@ -10484,6 +11105,40 @@ aarch64_expand_prologue (void) + !frame_pointer_needed, true); + if (emit_frame_chain && maybe_ne (final_adjust, 0)) + aarch64_emit_stack_tie (hard_frame_pointer_rtx); ++ ++ /* Save the incoming value of PSTATE.SM, if required. */ ++ if (known_ge (frame.old_svcr_offset, 0)) ++ { ++ rtx mem = aarch64_old_svcr_mem (); ++ MEM_VOLATILE_P (mem) = 1; ++ if (TARGET_SME) ++ { ++ rtx reg = gen_rtx_REG (DImode, IP0_REGNUM); ++ emit_insn (gen_aarch64_read_svcr (reg)); ++ emit_move_insn (mem, reg); ++ } ++ else ++ { ++ rtx old_r0 = NULL_RTX, old_r1 = NULL_RTX; ++ auto &args = crtl->args.info; ++ if (args.aapcs_ncrn > 0) ++ { ++ old_r0 = gen_rtx_REG (DImode, PROBE_STACK_FIRST_REGNUM); ++ emit_move_insn (old_r0, gen_rtx_REG (DImode, R0_REGNUM)); ++ } ++ if (args.aapcs_ncrn > 1) ++ { ++ old_r1 = gen_rtx_REG (DImode, PROBE_STACK_SECOND_REGNUM); ++ emit_move_insn (old_r1, gen_rtx_REG (DImode, R1_REGNUM)); ++ } ++ emit_insn (gen_aarch64_get_sme_state ()); ++ emit_move_insn (mem, gen_rtx_REG (DImode, R0_REGNUM)); ++ if (old_r0) ++ emit_move_insn (gen_rtx_REG (DImode, R0_REGNUM), old_r0); ++ if (old_r1) ++ emit_move_insn (gen_rtx_REG (DImode, R1_REGNUM), old_r1); ++ } ++ } + } + + /* Return TRUE if we can use a simple_return insn. +@@ -11730,17 +12385,33 @@ aarch64_start_call_args (cumulative_args_t ca_v) + RESULT is the register in which the result is returned. It's NULL for + "call" and "sibcall". + MEM is the location of the function call. +- CALLEE_ABI is a const_int that gives the arm_pcs of the callee. ++ COOKIE is either: ++ - a const_int that gives the argument to the call's UNSPEC_CALLEE_ABI. ++ - a PARALLEL that contains such a const_int as its first element. ++ The second element is a PARALLEL that lists all the argument ++ registers that need to be saved and restored around a change ++ in PSTATE.SM, or const0_rtx if no such switch is needed. + SIBCALL indicates whether this function call is normal call or sibling call. + It will generate different pattern accordingly. */ + + void +-aarch64_expand_call (rtx result, rtx mem, rtx callee_abi, bool sibcall) ++aarch64_expand_call (rtx result, rtx mem, rtx cookie, bool sibcall) + { + rtx call, callee, tmp; + rtvec vec; + machine_mode mode; + ++ rtx callee_abi = cookie; ++ rtx sme_mode_switch_args = const0_rtx; ++ if (GET_CODE (cookie) == PARALLEL) ++ { ++ callee_abi = XVECEXP (cookie, 0, 0); ++ sme_mode_switch_args = XVECEXP (cookie, 0, 1); ++ } ++ ++ gcc_assert (CONST_INT_P (callee_abi)); ++ auto callee_isa_mode = aarch64_callee_isa_mode (callee_abi); ++ + gcc_assert (MEM_P (mem)); + callee = XEXP (mem, 0); + mode = GET_MODE (callee); +@@ -11765,26 +12436,75 @@ aarch64_expand_call (rtx result, rtx mem, rtx callee_abi, bool sibcall) + else + tmp = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, LR_REGNUM)); + +- gcc_assert (CONST_INT_P (callee_abi)); + callee_abi = gen_rtx_UNSPEC (DImode, gen_rtvec (1, callee_abi), + UNSPEC_CALLEE_ABI); + + vec = gen_rtvec (3, call, callee_abi, tmp); + call = gen_rtx_PARALLEL (VOIDmode, vec); + +- aarch64_emit_call_insn (call); ++ auto call_insn = aarch64_emit_call_insn (call); ++ ++ /* Check whether the call requires a change to PSTATE.SM. We can't ++ emit the instructions to change PSTATE.SM yet, since they involve ++ a change in vector length and a change in instruction set, which ++ cannot be represented in RTL. ++ ++ For now, just record which registers will be clobbered and used ++ by the changes to PSTATE.SM. */ ++ if (!sibcall && aarch64_call_switches_pstate_sm (callee_isa_mode)) ++ { ++ aarch64_sme_mode_switch_regs args_switch; ++ if (sme_mode_switch_args != const0_rtx) ++ { ++ unsigned int num_args = XVECLEN (sme_mode_switch_args, 0); ++ for (unsigned int i = 0; i < num_args; ++i) ++ { ++ rtx x = XVECEXP (sme_mode_switch_args, 0, i); ++ args_switch.add_reg (GET_MODE (x), REGNO (x)); ++ } ++ } ++ ++ aarch64_sme_mode_switch_regs result_switch; ++ if (result) ++ result_switch.add_call_result (call_insn); ++ ++ unsigned int num_gprs = MAX (args_switch.num_gprs (), ++ result_switch.num_gprs ()); ++ for (unsigned int i = 0; i < num_gprs; ++i) ++ clobber_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), ++ gen_rtx_REG (DImode, args_switch.FIRST_GPR + i)); ++ ++ for (int regno = V0_REGNUM; regno < V0_REGNUM + 32; regno += 4) ++ clobber_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), ++ gen_rtx_REG (V4x16QImode, regno)); ++ ++ for (int regno = P0_REGNUM; regno < P0_REGNUM + 16; regno += 1) ++ clobber_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), ++ gen_rtx_REG (VNx16BImode, regno)); ++ ++ /* Ensure that the VG save slot has been initialized. Also emit ++ an instruction to model the effect of the temporary clobber ++ of VG, so that the prologue/epilogue pass sees the need to ++ save the old value. */ ++ use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), ++ gen_rtx_REG (DImode, VG_REGNUM)); ++ emit_insn_before (gen_aarch64_update_vg (), call_insn); ++ ++ cfun->machine->call_switches_pstate_sm = true; ++ } + } + + /* Emit call insn with PAT and do aarch64-specific handling. */ + +-void ++rtx_call_insn * + aarch64_emit_call_insn (rtx pat) + { +- rtx insn = emit_call_insn (pat); ++ auto insn = emit_call_insn (pat); + + rtx *fusage = &CALL_INSN_FUNCTION_USAGE (insn); + clobber_reg (fusage, gen_rtx_REG (word_mode, IP0_REGNUM)); + clobber_reg (fusage, gen_rtx_REG (word_mode, IP1_REGNUM)); ++ return as_a<rtx_call_insn *> (insn); + } + + machine_mode +@@ -13069,6 +13789,16 @@ aarch64_secondary_memory_needed (machine_mode mode, reg_class_t class1, + return false; + } + ++/* Implement TARGET_FRAME_POINTER_REQUIRED. */ ++ ++static bool ++aarch64_frame_pointer_required () ++{ ++ /* If the function needs to record the incoming value of PSTATE.SM, ++ make sure that the slot is accessible from the frame pointer. */ ++ return aarch64_need_old_pstate_sm (); ++} ++ + static bool + aarch64_can_eliminate (const int from ATTRIBUTE_UNUSED, const int to) + { +@@ -20607,7 +21337,8 @@ aarch64_conditional_register_usage (void) + call_used_regsi = 1; + } + +- /* Only allow the FFR and FFRT to be accessed via special patterns. */ ++ /* Only allow these registers to be accessed via special patterns. */ ++ CLEAR_HARD_REG_BIT (operand_reg_set, VG_REGNUM); + CLEAR_HARD_REG_BIT (operand_reg_set, FFR_REGNUM); + CLEAR_HARD_REG_BIT (operand_reg_set, FFRT_REGNUM); + +@@ -27903,6 +28634,123 @@ aarch64_indirect_call_asm (rtx addr) + return ""; + } + ++/* If CALL involves a change in PSTATE.SM, emit the instructions needed ++ to switch to the new mode and the instructions needed to restore the ++ original mode. Return true if something changed. */ ++static bool ++aarch64_switch_pstate_sm_for_call (rtx_call_insn *call) ++{ ++ /* Mode switches for sibling calls are handled via the epilogue. */ ++ if (SIBLING_CALL_P (call)) ++ return false; ++ ++ auto callee_isa_mode = aarch64_insn_callee_isa_mode (call); ++ if (!aarch64_call_switches_pstate_sm (callee_isa_mode)) ++ return false; ++ ++ /* Switch mode before the call, preserving any argument registers ++ across the switch. */ ++ start_sequence (); ++ rtx_insn *args_guard_label = nullptr; ++ if (TARGET_STREAMING_COMPATIBLE) ++ args_guard_label = aarch64_guard_switch_pstate_sm (IP0_REGNUM, ++ callee_isa_mode); ++ aarch64_sme_mode_switch_regs args_switch; ++ args_switch.add_call_args (call); ++ args_switch.emit_prologue (); ++ aarch64_switch_pstate_sm (AARCH64_ISA_MODE, callee_isa_mode); ++ args_switch.emit_epilogue (); ++ if (args_guard_label) ++ emit_label (args_guard_label); ++ auto args_seq = get_insns (); ++ end_sequence (); ++ emit_insn_before (args_seq, call); ++ ++ if (find_reg_note (call, REG_NORETURN, NULL_RTX)) ++ return true; ++ ++ /* Switch mode after the call, preserving any return registers across ++ the switch. */ ++ start_sequence (); ++ rtx_insn *return_guard_label = nullptr; ++ if (TARGET_STREAMING_COMPATIBLE) ++ return_guard_label = aarch64_guard_switch_pstate_sm (IP0_REGNUM, ++ callee_isa_mode); ++ aarch64_sme_mode_switch_regs return_switch; ++ return_switch.add_call_result (call); ++ return_switch.emit_prologue (); ++ aarch64_switch_pstate_sm (callee_isa_mode, AARCH64_ISA_MODE); ++ return_switch.emit_epilogue (); ++ if (return_guard_label) ++ emit_label (return_guard_label); ++ auto result_seq = get_insns (); ++ end_sequence (); ++ emit_insn_after (result_seq, call); ++ return true; ++} ++ ++namespace { ++ ++const pass_data pass_data_switch_pstate_sm = ++{ ++ RTL_PASS, // type ++ "smstarts", // name ++ OPTGROUP_NONE, // optinfo_flags ++ TV_NONE, // tv_id ++ 0, // properties_required ++ 0, // properties_provided ++ 0, // properties_destroyed ++ 0, // todo_flags_start ++ TODO_df_finish, // todo_flags_finish ++}; ++ ++class pass_switch_pstate_sm : public rtl_opt_pass ++{ ++public: ++ pass_switch_pstate_sm (gcc::context *ctxt) ++ : rtl_opt_pass (pass_data_switch_pstate_sm, ctxt) ++ {} ++ ++ // opt_pass methods: ++ bool gate (function *) override final; ++ unsigned int execute (function *) override final; ++}; ++ ++bool ++pass_switch_pstate_sm::gate (function *) ++{ ++ return cfun->machine->call_switches_pstate_sm; ++} ++ ++/* Emit any instructions needed to switch PSTATE.SM. */ ++unsigned int ++pass_switch_pstate_sm::execute (function *fn) ++{ ++ basic_block bb; ++ ++ auto_sbitmap blocks (last_basic_block_for_fn (cfun)); ++ bitmap_clear (blocks); ++ FOR_EACH_BB_FN (bb, fn) ++ { ++ rtx_insn *insn; ++ FOR_BB_INSNS (bb, insn) ++ if (auto *call = dyn_cast<rtx_call_insn *> (insn)) ++ if (aarch64_switch_pstate_sm_for_call (call)) ++ bitmap_set_bit (blocks, bb->index); ++ } ++ find_many_sub_basic_blocks (blocks); ++ clear_aux_for_blocks (); ++ return 0; ++} ++ ++} ++ ++rtl_opt_pass * ++make_pass_switch_pstate_sm (gcc::context *ctxt) ++{ ++ return new pass_switch_pstate_sm (ctxt); ++} ++ + /* Target-specific selftests. */ + + #if CHECKING_P +@@ -28176,6 +29024,9 @@ aarch64_get_v16qi_mode () + #undef TARGET_CALLEE_COPIES + #define TARGET_CALLEE_COPIES hook_bool_CUMULATIVE_ARGS_arg_info_false + ++#undef TARGET_FRAME_POINTER_REQUIRED ++#define TARGET_FRAME_POINTER_REQUIRED aarch64_frame_pointer_required ++ + #undef TARGET_CAN_ELIMINATE + #define TARGET_CAN_ELIMINATE aarch64_can_eliminate + +diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h +index 1591cde8b..6bfe55968 100644 +--- a/gcc/config/aarch64/aarch64.h ++++ b/gcc/config/aarch64/aarch64.h +@@ -255,6 +255,10 @@ constexpr auto AARCH64_FL_DEFAULT_ISA_MODE = AARCH64_FL_SM_OFF; + /* The current function is a normal non-streaming function. */ + #define TARGET_NON_STREAMING (AARCH64_ISA_SM_OFF) + ++/* The current function has a streaming-compatible body. */ ++#define TARGET_STREAMING_COMPATIBLE \ ++ ((aarch64_isa_flags & AARCH64_FL_SM_STATE) == 0) ++ + /* Crypto is an optional extension to AdvSIMD. */ + #define TARGET_CRYPTO (AARCH64_ISA_CRYPTO) + +@@ -461,7 +465,7 @@ constexpr auto AARCH64_FL_DEFAULT_ISA_MODE = AARCH64_FL_SM_OFF; + 0, 0, 0, 0, 0, 0, 0, 0, /* V8 - V15 */ \ + 1, 1, 1, 1, 1, 1, 1, 1, /* V16 - V23 */ \ + 1, 1, 1, 1, 1, 1, 1, 1, /* V24 - V31 */ \ +- 1, 1, 1, 1, /* SFP, AP, CC, VG */ \ ++ 1, 1, 1, 0, /* SFP, AP, CC, VG */ \ + 1, 1, 1, 1, 1, 1, 1, 1, /* P0 - P7 */ \ + 1, 1, 1, 1, 1, 1, 1, 1, /* P8 - P15 */ \ + 1, 1 /* FFR and FFRT */ \ +@@ -802,6 +806,13 @@ struct GTY (()) aarch64_frame + vec<unsigned, va_gc_atomic> *saved_fprs; + vec<unsigned, va_gc_atomic> *saved_prs; + ++ /* The offset from the base of the frame of a 64-bit slot whose low ++ bit contains the incoming value of PSTATE.SM. This slot must be ++ within reach of the hard frame pointer. ++ ++ The offset is -1 if such a slot isn't needed. */ ++ poly_int64 old_svcr_offset; ++ + /* The number of extra stack bytes taken up by register varargs. + This area is allocated by the callee at the very top of the + frame. This value is rounded up to a multiple of +@@ -910,6 +921,12 @@ typedef struct GTY (()) machine_function + /* One entry for each general purpose register. */ + rtx call_viaSP_REGNUM; + bool label_is_assembled; ++ ++ /* True if we've expanded at least one call to a function that changes ++ PSTATE.SM. This should only be used for saving compile time: false ++ guarantees that no such mode switch exists. */ ++ bool call_switches_pstate_sm; ++ + /* A set of all decls that have been passed to a vld1 intrinsic in the + current function. This is used to help guide the vector cost model. */ + hash_set<tree> *vector_load_decls; +@@ -978,6 +995,12 @@ typedef struct + stack arg area so far. */ + bool silent_p; /* True if we should act silently, rather than + raise an error for invalid calls. */ ++ ++ /* A list of registers that need to be saved and restored around a ++ change to PSTATE.SM. An auto_vec would be more convenient, but those ++ can't be copied. */ ++ unsigned int num_sme_mode_switch_args; ++ rtx sme_mode_switch_args12; + } CUMULATIVE_ARGS; + #endif + +diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md +index 2ce123255..bb867de74 100644 +--- a/gcc/config/aarch64/aarch64.md ++++ b/gcc/config/aarch64/aarch64.md +@@ -970,7 +970,7 @@ + operands1); + }) + +-(define_insn "*tb<optab><ALLI:mode><GPI:mode>1" ++(define_insn "@aarch64_tb<optab><ALLI:mode><GPI:mode>" + (set (pc) (if_then_else + (EQL (zero_extract:GPI (match_operand:ALLI 0 "register_operand" "r") + (const_int 1) +@@ -1057,7 +1057,7 @@ + (parallel + (call (match_operand 0 "memory_operand") + (match_operand 1 "general_operand")) +- (unspec:DI (match_operand 2 "const_int_operand") UNSPEC_CALLEE_ABI) ++ (unspec:DI (match_operand 2) UNSPEC_CALLEE_ABI) + (clobber (reg:DI LR_REGNUM))) + "" + " +@@ -1083,7 +1083,7 @@ + (set (match_operand 0 "") + (call (match_operand 1 "memory_operand") + (match_operand 2 "general_operand"))) +- (unspec:DI (match_operand 3 "const_int_operand") UNSPEC_CALLEE_ABI) ++ (unspec:DI (match_operand 3) UNSPEC_CALLEE_ABI) + (clobber (reg:DI LR_REGNUM))) + "" + " +@@ -1110,7 +1110,7 @@ + (parallel + (call (match_operand 0 "memory_operand") + (match_operand 1 "general_operand")) +- (unspec:DI (match_operand 2 "const_int_operand") UNSPEC_CALLEE_ABI) ++ (unspec:DI (match_operand 2) UNSPEC_CALLEE_ABI) + (return)) + "" + { +@@ -1124,7 +1124,7 @@ + (set (match_operand 0 "") + (call (match_operand 1 "memory_operand") + (match_operand 2 "general_operand"))) +- (unspec:DI (match_operand 3 "const_int_operand") UNSPEC_CALLEE_ABI) ++ (unspec:DI (match_operand 3) UNSPEC_CALLEE_ABI) + (return)) + "" + { +@@ -7747,3 +7747,6 @@ + + ;; SVE2. + (include "aarch64-sve2.md") ++ ++;; SME and extensions ++(include "aarch64-sme.md") +diff --git a/gcc/config/aarch64/t-aarch64 b/gcc/config/aarch64/t-aarch64 +index 10cd8f093..49731ba92 100644 +--- a/gcc/config/aarch64/t-aarch64 ++++ b/gcc/config/aarch64/t-aarch64 +@@ -186,9 +186,12 @@ MULTILIB_DIRNAMES = $(subst $(comma), ,$(TM_MULTILIB_CONFIG)) + insn-conditions.md: s-check-sve-md + s-check-sve-md: $(srcdir)/config/aarch64/check-sve-md.awk \ + $(srcdir)/config/aarch64/aarch64-sve.md \ +- $(srcdir)/config/aarch64/aarch64-sve2.md ++ $(srcdir)/config/aarch64/aarch64-sve2.md \ ++ $(srcdir)/config/aarch64/aarch64-sme.md + $(AWK) -f $(srcdir)/config/aarch64/check-sve-md.awk \ + $(srcdir)/config/aarch64/aarch64-sve.md + $(AWK) -f $(srcdir)/config/aarch64/check-sve-md.awk \ + $(srcdir)/config/aarch64/aarch64-sve2.md ++ $(AWK) -f $(srcdir)/config/aarch64/check-sve-md.awk \ ++ $(srcdir)/config/aarch64/aarch64-sme.md + $(STAMP) s-check-sve-md +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_1.c b/gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_1.c +new file mode 100644 +index 000000000..a2de55773 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_1.c +@@ -0,0 +1,233 @@ ++// { dg-options "-O -fomit-frame-pointer -fno-optimize-sibling-calls" } ++// { dg-final { check-function-bodies "**" "" } } ++ ++void ns_callee (); ++ void s_callee () arm::streaming; ++ void sc_callee () arm::streaming_compatible; ++ ++void ns_callee_stack (int, int, int, int, int, int, int, int, int); ++ ++struct callbacks { ++ void (*ns_ptr) (); ++ void (*s_ptr) () arm::streaming; ++ void (*sc_ptr) () arm::streaming_compatible; ++}; ++ ++/* ++** n_caller: { target lp64 } ++** stp x30, (x19|x20-8), \sp, #?-96\! ++** cntd x16 ++** str x16, \sp, #?16\ ++** stp d8, d9, \sp, #?32\ ++** stp d10, d11, \sp, #?48\ ++** stp d12, d13, \sp, #?64\ ++** stp d14, d15, \sp, #?80\ ++** mov \1, x0 ++** bl ns_callee ++** smstart sm ++** bl s_callee ++** smstop sm ++** bl sc_callee ++** ldr (x0-9+), \\1\ ++** blr \2 ++** ldr (x0-9+), \\1, #?8\ ++** smstart sm ++** blr \3 ++** smstop sm ++** ldr (x0-9+), \\1, #?16\ ++** blr \4 ++** ldp d8, d9, \sp, #?32\ ++** ldp d10, d11, \sp, #?48\ ++** ldp d12, d13, \sp, #?64\ ++** ldp d14, d15, \sp, #?80\ ++** ldp x30, \1, \sp\, #?96 ++** ret ++*/ ++void ++n_caller (struct callbacks *c) ++{ ++ ns_callee (); ++ s_callee (); ++ sc_callee (); ++ ++ c->ns_ptr (); ++ c->s_ptr (); ++ c->sc_ptr (); ++} ++ ++/* ++** s_caller: { target lp64 } ++** stp x30, (x19|x20-8), \sp, #?-96\! ++** cntd x16 ++** str x16, \sp, #?16\ ++** stp d8, d9, \sp, #?32\ ++** stp d10, d11, \sp, #?48\ ++** stp d12, d13, \sp, #?64\ ++** stp d14, d15, \sp, #?80\ ++** mov \1, x0 ++** smstop sm ++** bl ns_callee ++** smstart sm ++** bl s_callee ++** bl sc_callee ++** ldr (x0-9+), \\1\ ++** smstop sm ++** blr \2 ++** smstart sm ++** ldr (x0-9+), \\1, #?8\ ++** blr \3 ++** ldr (x0-9+), \\1, #?16\ ++** blr \4 ++** ldp d8, d9, \sp, #?32\ ++** ldp d10, d11, \sp, #?48\ ++** ldp d12, d13, \sp, #?64\ ++** ldp d14, d15, \sp, #?80\ ++** ldp x30, \1, \sp\, #?96 ++** ret ++*/ ++void ++s_caller (struct callbacks *c) arm::streaming ++{ ++ ns_callee (); ++ s_callee (); ++ sc_callee (); ++ ++ c->ns_ptr (); ++ c->s_ptr (); ++ c->sc_ptr (); ++} ++ ++/* ++** sc_caller_sme: ++** stp x29, x30, \sp, #?-96\! ++** mov x29, sp ++** cntd x16 ++** str x16, \sp, #?24\ ++** stp d8, d9, \sp, #?32\ ++** stp d10, d11, \sp, #?48\ ++** stp d12, d13, \sp, #?64\ ++** stp d14, d15, \sp, #?80\ ++** mrs x16, svcr ++** str x16, \x29, #?16\ ++** ldr x16, \x29, #?16\ ++** tbz x16, 0, .* ++** smstop sm ++** bl ns_callee ++** ldr x16, \x29, #?16\ ++** tbz x16, 0, .* ++** smstart sm ++** ldr x16, \x29, #?16\ ++** tbnz x16, 0, .* ++** smstart sm ++** bl s_callee ++** ldr x16, \x29, #?16\ ++** tbnz x16, 0, .* ++** smstop sm ++** bl sc_callee ++** ldp d8, d9, \sp, #?32\ ++** ldp d10, d11, \sp, #?48\ ++** ldp d12, d13, \sp, #?64\ ++** ldp d14, d15, \sp, #?80\ ++** ldp x29, x30, \sp\, #?96 ++** ret ++*/ ++void ++sc_caller_sme () arm::streaming_compatible ++{ ++ ns_callee (); ++ s_callee (); ++ sc_callee (); ++} ++ ++#pragma GCC target "+nosme" ++ ++/* ++** sc_caller: ++** stp x29, x30, \sp, #?-96\! ++** mov x29, sp ++** cntd x16 ++** str x16, \sp, #?24\ ++** stp d8, d9, \sp, #?32\ ++** stp d10, d11, \sp, #?48\ ++** stp d12, d13, \sp, #?64\ ++** stp d14, d15, \sp, #?80\ ++** bl __arm_sme_state ++** str x0, \x29, #?16\ ++** ... ++** bl sc_callee ++** ldp d8, d9, \sp, #?32\ ++** ldp d10, d11, \sp, #?48\ ++** ldp d12, d13, \sp, #?64\ ++** ldp d14, d15, \sp, #?80\ ++** ldp x29, x30, \sp\, #?96 ++** ret ++*/ ++void ++sc_caller () arm::streaming_compatible ++{ ++ ns_callee (); ++ sc_callee (); ++} ++ ++/* ++** sc_caller_x0: ++** ... ++** mov x10, x0 ++** bl __arm_sme_state ++** ... ++** str wzr, \x10\ ++** ... ++*/ ++void ++sc_caller_x0 (int *ptr) arm::streaming_compatible ++{ ++ *ptr = 0; ++ ns_callee (); ++ sc_callee (); ++} ++ ++/* ++** sc_caller_x1: ++** ... ++** mov x10, x0 ++** mov x11, x1 ++** bl __arm_sme_state ++** ... ++** str w11, \x10\ ++** ... ++*/ ++void ++sc_caller_x1 (int *ptr, int a) arm::streaming_compatible ++{ ++ *ptr = a; ++ ns_callee (); ++ sc_callee (); ++} ++ ++/* ++** sc_caller_stack: ++** sub sp, sp, #112 ++** stp x29, x30, \sp, #?16\ ++** add x29, sp, #?16 ++** ... ++** stp d8, d9, \sp, #?48\ ++** ... ++** bl __arm_sme_state ++** str x0, \x29, #?16\ ++** ... ++** bl ns_callee_stack ++** ldr x16, \x29, #?16\ ++** tbz x16, 0, .* ++** smstart sm ++** ... ++*/ ++void ++sc_caller_stack () arm::streaming_compatible ++{ ++ ns_callee_stack (0, 0, 0, 0, 0, 0, 0, 0, 0); ++} ++ ++/* { dg-final { scan-assembler {n_caller:(?:(?!ret).)*\.cfi_offset 46, -80\n} } } */ ++/* { dg-final { scan-assembler {s_caller:(?:(?!ret).)*\.cfi_offset 46, -80\n} } } */ ++/* { dg-final { scan-assembler {sc_caller_sme:(?:(?!ret).)*\.cfi_offset 46, -72\n} } } */ ++/* { dg-final { scan-assembler {sc_caller:(?:(?!ret).)*\.cfi_offset 46, -72\n} } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_10.c b/gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_10.c +new file mode 100644 +index 000000000..49c5e4a6a +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_10.c +@@ -0,0 +1,37 @@ ++// { dg-options "" } ++ ++#pragma GCC target "+nosme" ++ ++void ns_callee (); ++ void s_callee () arm::streaming; ++ void sc_callee () arm::streaming_compatible; ++ ++struct callbacks { ++ void (*ns_ptr) (); ++ void (*s_ptr) () arm::streaming; ++ void (*sc_ptr) () arm::streaming_compatible; ++}; ++ ++void ++n_caller (struct callbacks *c) ++{ ++ ns_callee (); ++ s_callee (); // { dg-error "calling a streaming function requires the ISA extension 'sme'" } ++ sc_callee (); ++ ++ c->ns_ptr (); ++ c->s_ptr (); // { dg-error "calling a streaming function requires the ISA extension 'sme'" } ++ c->sc_ptr (); ++} ++ ++void ++sc_caller_sme (struct callbacks *c) arm::streaming_compatible ++{ ++ ns_callee (); ++ s_callee (); // { dg-error "calling a streaming function requires the ISA extension 'sme'" } ++ sc_callee (); ++ ++ c->ns_ptr (); ++ c->s_ptr (); // { dg-error "calling a streaming function requires the ISA extension 'sme'" } ++ c->sc_ptr (); ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_2.c b/gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_2.c +new file mode 100644 +index 000000000..890fcbc5b +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_2.c +@@ -0,0 +1,43 @@ ++// { dg-options "-O -fomit-frame-pointer -fno-optimize-sibling-calls" } ++ ++void ns_callee (); ++ void s_callee () arm::streaming; ++ void sc_callee () arm::streaming_compatible; ++ ++struct callbacks { ++ void (*ns_ptr) (); ++ void (*s_ptr) () arm::streaming; ++ void (*sc_ptr) () arm::streaming_compatible; ++}; ++ ++void ++n_caller (struct callbacks *c) ++{ ++ ns_callee (); ++ sc_callee (); ++ ++ c->ns_ptr (); ++ c->sc_ptr (); ++} ++ ++void ++s_caller (struct callbacks *c) arm::streaming ++{ ++ s_callee (); ++ sc_callee (); ++ ++ c->s_ptr (); ++ c->sc_ptr (); ++} ++ ++void ++sc_caller (struct callbacks *c) arm::streaming_compatible ++{ ++ sc_callee (); ++ ++ c->sc_ptr (); ++} ++ ++// { dg-final { scan-assembler-not {dpqz0-9+,} } } ++// { dg-final { scan-assembler-not {smstart\tsm} } } ++// { dg-final { scan-assembler-not {smstop\tsm} } } +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_3.c b/gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_3.c +new file mode 100644 +index 000000000..ed999d085 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_3.c +@@ -0,0 +1,166 @@ ++// { dg-options "-O -fomit-frame-pointer -fno-optimize-sibling-calls" } ++// { dg-final { check-function-bodies "**" "" } } ++ ++__attribute__((aarch64_vector_pcs)) void ns_callee (); ++__attribute__((aarch64_vector_pcs)) void s_callee () arm::streaming; ++__attribute__((aarch64_vector_pcs)) void sc_callee () arm::streaming_compatible; ++ ++struct callbacks { ++ __attribute__((aarch64_vector_pcs)) void (*ns_ptr) (); ++ __attribute__((aarch64_vector_pcs)) void (*s_ptr) () arm::streaming; ++ __attribute__((aarch64_vector_pcs)) void (*sc_ptr) () arm::streaming_compatible; ++}; ++ ++/* ++** n_caller: { target lp64 } ++** stp x30, (x19|x20-8), \sp, #?-288\! ++** cntd x16 ++** str x16, \sp, #?16\ ++** stp q8, q9, \sp, #?32\ ++** stp q10, q11, \sp, #?64\ ++** stp q12, q13, \sp, #?96\ ++** stp q14, q15, \sp, #?128\ ++** stp q16, q17, \sp, #?160\ ++** stp q18, q19, \sp, #?192\ ++** stp q20, q21, \sp, #?224\ ++** stp q22, q23, \sp, #?256\ ++** mov \1, x0 ++** bl ns_callee ++** smstart sm ++** bl s_callee ++** smstop sm ++** bl sc_callee ++** ldr (x0-9+), \\1\ ++** blr \2 ++** ldr (x0-9+), \\1, #?8\ ++** smstart sm ++** blr \3 ++** smstop sm ++** ldr (x0-9+), \\1, #?16\ ++** blr \4 ++** ldp q8, q9, \sp, #?32\ ++** ldp q10, q11, \sp, #?64\ ++** ldp q12, q13, \sp, #?96\ ++** ldp q14, q15, \sp, #?128\ ++** ldp q16, q17, \sp, #?160\ ++** ldp q18, q19, \sp, #?192\ ++** ldp q20, q21, \sp, #?224\ ++** ldp q22, q23, \sp, #?256\ ++** ldp x30, \1, \sp\, #?288 ++** ret ++*/ ++void __attribute__((aarch64_vector_pcs)) ++n_caller (struct callbacks *c) ++{ ++ ns_callee (); ++ s_callee (); ++ sc_callee (); ++ ++ c->ns_ptr (); ++ c->s_ptr (); ++ c->sc_ptr (); ++} ++ ++/* ++** s_caller: { target lp64 } ++** stp x30, (x19|x20-8), \sp, #?-288\! ++** cntd x16 ++** str x16, \sp, #?16\ ++** stp q8, q9, \sp, #?32\ ++** stp q10, q11, \sp, #?64\ ++** stp q12, q13, \sp, #?96\ ++** stp q14, q15, \sp, #?128\ ++** stp q16, q17, \sp, #?160\ ++** stp q18, q19, \sp, #?192\ ++** stp q20, q21, \sp, #?224\ ++** stp q22, q23, \sp, #?256\ ++** mov \1, x0 ++** smstop sm ++** bl ns_callee ++** smstart sm ++** bl s_callee ++** bl sc_callee ++** ldr (x0-9+), \\1\ ++** smstop sm ++** blr \2 ++** smstart sm ++** ldr (x0-9+), \\1, #?8\ ++** blr \3 ++** ldr (x0-9+), \\1, #?16\ ++** blr \4 ++** ldp q8, q9, \sp, #?32\ ++** ldp q10, q11, \sp, #?64\ ++** ldp q12, q13, \sp, #?96\ ++** ldp q14, q15, \sp, #?128\ ++** ldp q16, q17, \sp, #?160\ ++** ldp q18, q19, \sp, #?192\ ++** ldp q20, q21, \sp, #?224\ ++** ldp q22, q23, \sp, #?256\ ++** ldp x30, \1, \sp\, #?288 ++** ret ++*/ ++void __attribute__((aarch64_vector_pcs)) ++s_caller (struct callbacks *c) arm::streaming ++{ ++ ns_callee (); ++ s_callee (); ++ sc_callee (); ++ ++ c->ns_ptr (); ++ c->s_ptr (); ++ c->sc_ptr (); ++} ++ ++/* ++** sc_caller: ++** stp x29, x30, \sp, #?-288\! ++** mov x29, sp ++** cntd x16 ++** str x16, \sp, #?24\ ++** stp q8, q9, \sp, #?32\ ++** stp q10, q11, \sp, #?64\ ++** stp q12, q13, \sp, #?96\ ++** stp q14, q15, \sp, #?128\ ++** stp q16, q17, \sp, #?160\ ++** stp q18, q19, \sp, #?192\ ++** stp q20, q21, \sp, #?224\ ++** stp q22, q23, \sp, #?256\ ++** mrs x16, svcr ++** str x16, \x29, #?16\ ++** ldr x16, \x29, #?16\ ++** tbz x16, 0, .* ++** smstop sm ++** bl ns_callee ++** ldr x16, \x29, #?16\ ++** tbz x16, 0, .* ++** smstart sm ++** ldr x16, \x29, #?16\ ++** tbnz x16, 0, .* ++** smstart sm ++** bl s_callee ++** ldr x16, \x29, #?16\ ++** tbnz x16, 0, .* ++** smstop sm ++** bl sc_callee ++** ldp q8, q9, \sp, #?32\ ++** ldp q10, q11, \sp, #?64\ ++** ldp q12, q13, \sp, #?96\ ++** ldp q14, q15, \sp, #?128\ ++** ldp q16, q17, \sp, #?160\ ++** ldp q18, q19, \sp, #?192\ ++** ldp q20, q21, \sp, #?224\ ++** ldp q22, q23, \sp, #?256\ ++** ldp x29, x30, \sp\, #?288 ++** ret ++*/ ++void __attribute__((aarch64_vector_pcs)) ++sc_caller () arm::streaming_compatible ++{ ++ ns_callee (); ++ s_callee (); ++ sc_callee (); ++} ++ ++/* { dg-final { scan-assembler {n_caller:(?:(?!ret).)*\.cfi_offset 46, -272\n} } } */ ++/* { dg-final { scan-assembler {s_caller:(?:(?!ret).)*\.cfi_offset 46, -272\n} } } */ ++/* { dg-final { scan-assembler {sc_caller:(?:(?!ret).)*\.cfi_offset 46, -264\n} } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_4.c b/gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_4.c +new file mode 100644 +index 000000000..f93a67f97 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_4.c +@@ -0,0 +1,43 @@ ++// { dg-options "-O -fomit-frame-pointer -fno-optimize-sibling-calls" } ++ ++__attribute__((aarch64_vector_pcs)) void ns_callee (); ++__attribute__((aarch64_vector_pcs)) void s_callee () arm::streaming; ++__attribute__((aarch64_vector_pcs)) void sc_callee () arm::streaming_compatible; ++ ++struct callbacks { ++ __attribute__((aarch64_vector_pcs)) void (*ns_ptr) (); ++ __attribute__((aarch64_vector_pcs)) void (*s_ptr) () arm::streaming; ++ __attribute__((aarch64_vector_pcs)) void (*sc_ptr) () arm::streaming_compatible; ++}; ++ ++void __attribute__((aarch64_vector_pcs)) ++n_caller (struct callbacks *c) ++{ ++ ns_callee (); ++ sc_callee (); ++ ++ c->ns_ptr (); ++ c->sc_ptr (); ++} ++ ++void __attribute__((aarch64_vector_pcs)) ++s_caller (struct callbacks *c) arm::streaming ++{ ++ s_callee (); ++ sc_callee (); ++ ++ c->s_ptr (); ++ c->sc_ptr (); ++} ++ ++void __attribute__((aarch64_vector_pcs)) ++sc_caller (struct callbacks *c) arm::streaming_compatible ++{ ++ sc_callee (); ++ ++ c->sc_ptr (); ++} ++ ++// { dg-final { scan-assembler-not {dpqz0-9+,} } } ++// { dg-final { scan-assembler-not {smstart\tsm} } } ++// { dg-final { scan-assembler-not {smstop\tsm} } } +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_5.c b/gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_5.c +new file mode 100644 +index 000000000..be9b5cc04 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_5.c +@@ -0,0 +1,318 @@ ++// { dg-options "-O -fomit-frame-pointer -fno-optimize-sibling-calls" } ++// { dg-final { check-function-bodies "**" "" } } ++ ++#include <arm_sve.h> ++ ++svbool_t ns_callee (); ++ svbool_t s_callee () arm::streaming; ++ svbool_t sc_callee () arm::streaming_compatible; ++ ++struct callbacks { ++ svbool_t (*ns_ptr) (); ++ svbool_t (*s_ptr) () arm::streaming; ++ svbool_t (*sc_ptr) () arm::streaming_compatible; ++}; ++ ++/* ++** n_caller: { target lp64 } ++** stp x30, (x19|x20-8), \sp, #?-32\! ++** cntd x16 ++** str x16, \sp, #?16\ ++** addvl sp, sp, #-18 ++** str p4, \sp\ ++** str p5, \sp, #1, mul vl\ ++** str p6, \sp, #2, mul vl\ ++** str p7, \sp, #3, mul vl\ ++** str p8, \sp, #4, mul vl\ ++** str p9, \sp, #5, mul vl\ ++** str p10, \sp, #6, mul vl\ ++** str p11, \sp, #7, mul vl\ ++** str p12, \sp, #8, mul vl\ ++** str p13, \sp, #9, mul vl\ ++** str p14, \sp, #10, mul vl\ ++** str p15, \sp, #11, mul vl\ ++** str z8, \sp, #2, mul vl\ ++** str z9, \sp, #3, mul vl\ ++** str z10, \sp, #4, mul vl\ ++** str z11, \sp, #5, mul vl\ ++** str z12, \sp, #6, mul vl\ ++** str z13, \sp, #7, mul vl\ ++** str z14, \sp, #8, mul vl\ ++** str z15, \sp, #9, mul vl\ ++** str z16, \sp, #10, mul vl\ ++** str z17, \sp, #11, mul vl\ ++** str z18, \sp, #12, mul vl\ ++** str z19, \sp, #13, mul vl\ ++** str z20, \sp, #14, mul vl\ ++** str z21, \sp, #15, mul vl\ ++** str z22, \sp, #16, mul vl\ ++** str z23, \sp, #17, mul vl\ ++** mov \1, x0 ++** bl ns_callee ++** smstart sm ++** bl s_callee ++** addvl sp, sp, #-1 ++** str p0, \sp\ ++** smstop sm ++** ldr p0, \sp\ ++** addvl sp, sp, #1 ++** bl sc_callee ++** ldr (x0-9+), \\1\ ++** blr \2 ++** ldr (x0-9+), \\1, #?8\ ++** smstart sm ++** blr \3 ++** addvl sp, sp, #-1 ++** str p0, \sp\ ++** smstop sm ++** ldr p0, \sp\ ++** addvl sp, sp, #1 ++** ldr (x0-9+), \\1, #?16\ ++** blr \4 ++** ldr z8, \sp, #2, mul vl\ ++** ldr z9, \sp, #3, mul vl\ ++** ldr z10, \sp, #4, mul vl\ ++** ldr z11, \sp, #5, mul vl\ ++** ldr z12, \sp, #6, mul vl\ ++** ldr z13, \sp, #7, mul vl\ ++** ldr z14, \sp, #8, mul vl\ ++** ldr z15, \sp, #9, mul vl\ ++** ldr z16, \sp, #10, mul vl\ ++** ldr z17, \sp, #11, mul vl\ ++** ldr z18, \sp, #12, mul vl\ ++** ldr z19, \sp, #13, mul vl\ ++** ldr z20, \sp, #14, mul vl\ ++** ldr z21, \sp, #15, mul vl\ ++** ldr z22, \sp, #16, mul vl\ ++** ldr z23, \sp, #17, mul vl\ ++** ldr p4, \sp\ ++** ldr p5, \sp, #1, mul vl\ ++** ldr p6, \sp, #2, mul vl\ ++** ldr p7, \sp, #3, mul vl\ ++** ldr p8, \sp, #4, mul vl\ ++** ldr p9, \sp, #5, mul vl\ ++** ldr p10, \sp, #6, mul vl\ ++** ldr p11, \sp, #7, mul vl\ ++** ldr p12, \sp, #8, mul vl\ ++** ldr p13, \sp, #9, mul vl\ ++** ldr p14, \sp, #10, mul vl\ ++** ldr p15, \sp, #11, mul vl\ ++** addvl sp, sp, #18 ++** ldp x30, \1, \sp\, #?32 ++** ret ++*/ ++svbool_t ++n_caller (struct callbacks *c) ++{ ++ ns_callee (); ++ s_callee (); ++ sc_callee (); ++ ++ c->ns_ptr (); ++ c->s_ptr (); ++ return c->sc_ptr (); ++} ++ ++/* ++** s_caller: { target lp64 } ++** stp x30, (x19|x20-8), \sp, #?-32\! ++** cntd x16 ++** str x16, \sp, #?16\ ++** addvl sp, sp, #-18 ++** str p4, \sp\ ++** str p5, \sp, #1, mul vl\ ++** str p6, \sp, #2, mul vl\ ++** str p7, \sp, #3, mul vl\ ++** str p8, \sp, #4, mul vl\ ++** str p9, \sp, #5, mul vl\ ++** str p10, \sp, #6, mul vl\ ++** str p11, \sp, #7, mul vl\ ++** str p12, \sp, #8, mul vl\ ++** str p13, \sp, #9, mul vl\ ++** str p14, \sp, #10, mul vl\ ++** str p15, \sp, #11, mul vl\ ++** str z8, \sp, #2, mul vl\ ++** str z9, \sp, #3, mul vl\ ++** str z10, \sp, #4, mul vl\ ++** str z11, \sp, #5, mul vl\ ++** str z12, \sp, #6, mul vl\ ++** str z13, \sp, #7, mul vl\ ++** str z14, \sp, #8, mul vl\ ++** str z15, \sp, #9, mul vl\ ++** str z16, \sp, #10, mul vl\ ++** str z17, \sp, #11, mul vl\ ++** str z18, \sp, #12, mul vl\ ++** str z19, \sp, #13, mul vl\ ++** str z20, \sp, #14, mul vl\ ++** str z21, \sp, #15, mul vl\ ++** str z22, \sp, #16, mul vl\ ++** str z23, \sp, #17, mul vl\ ++** mov \1, x0 ++** smstop sm ++** bl ns_callee ++** addvl sp, sp, #-1 ++** str p0, \sp\ ++** smstart sm ++** ldr p0, \sp\ ++** addvl sp, sp, #1 ++** bl s_callee ++** bl sc_callee ++** ldr (x0-9+), \\1\ ++** smstop sm ++** blr \2 ++** addvl sp, sp, #-1 ++** str p0, \sp\ ++** smstart sm ++** ldr p0, \sp\ ++** addvl sp, sp, #1 ++** ldr (x0-9+), \\1, #?8\ ++** blr \3 ++** ldr (x0-9+), \\1, #?16\ ++** blr \4 ++** ldr z8, \sp, #2, mul vl\ ++** ldr z9, \sp, #3, mul vl\ ++** ldr z10, \sp, #4, mul vl\ ++** ldr z11, \sp, #5, mul vl\ ++** ldr z12, \sp, #6, mul vl\ ++** ldr z13, \sp, #7, mul vl\ ++** ldr z14, \sp, #8, mul vl\ ++** ldr z15, \sp, #9, mul vl\ ++** ldr z16, \sp, #10, mul vl\ ++** ldr z17, \sp, #11, mul vl\ ++** ldr z18, \sp, #12, mul vl\ ++** ldr z19, \sp, #13, mul vl\ ++** ldr z20, \sp, #14, mul vl\ ++** ldr z21, \sp, #15, mul vl\ ++** ldr z22, \sp, #16, mul vl\ ++** ldr z23, \sp, #17, mul vl\ ++** ldr p4, \sp\ ++** ldr p5, \sp, #1, mul vl\ ++** ldr p6, \sp, #2, mul vl\ ++** ldr p7, \sp, #3, mul vl\ ++** ldr p8, \sp, #4, mul vl\ ++** ldr p9, \sp, #5, mul vl\ ++** ldr p10, \sp, #6, mul vl\ ++** ldr p11, \sp, #7, mul vl\ ++** ldr p12, \sp, #8, mul vl\ ++** ldr p13, \sp, #9, mul vl\ ++** ldr p14, \sp, #10, mul vl\ ++** ldr p15, \sp, #11, mul vl\ ++** addvl sp, sp, #18 ++** ldp x30, \1, \sp\, #?32 ++** ret ++*/ ++svbool_t ++s_caller (struct callbacks *c) arm::streaming ++{ ++ ns_callee (); ++ s_callee (); ++ sc_callee (); ++ ++ c->ns_ptr (); ++ c->s_ptr (); ++ return c->sc_ptr (); ++} ++ ++/* ++** sc_caller: ++** stp x29, x30, \sp, #?-32\! ++** mov x29, sp ++** cntd x16 ++** str x16, \sp, #?24\ ++** addvl sp, sp, #-18 ++** str p4, \sp\ ++** str p5, \sp, #1, mul vl\ ++** str p6, \sp, #2, mul vl\ ++** str p7, \sp, #3, mul vl\ ++** str p8, \sp, #4, mul vl\ ++** str p9, \sp, #5, mul vl\ ++** str p10, \sp, #6, mul vl\ ++** str p11, \sp, #7, mul vl\ ++** str p12, \sp, #8, mul vl\ ++** str p13, \sp, #9, mul vl\ ++** str p14, \sp, #10, mul vl\ ++** str p15, \sp, #11, mul vl\ ++** str z8, \sp, #2, mul vl\ ++** str z9, \sp, #3, mul vl\ ++** str z10, \sp, #4, mul vl\ ++** str z11, \sp, #5, mul vl\ ++** str z12, \sp, #6, mul vl\ ++** str z13, \sp, #7, mul vl\ ++** str z14, \sp, #8, mul vl\ ++** str z15, \sp, #9, mul vl\ ++** str z16, \sp, #10, mul vl\ ++** str z17, \sp, #11, mul vl\ ++** str z18, \sp, #12, mul vl\ ++** str z19, \sp, #13, mul vl\ ++** str z20, \sp, #14, mul vl\ ++** str z21, \sp, #15, mul vl\ ++** str z22, \sp, #16, mul vl\ ++** str z23, \sp, #17, mul vl\ ++** mrs x16, svcr ++** str x16, \x29, #?16\ ++** ldr x16, \x29, #?16\ ++** tbz x16, 0, .* ++** smstop sm ++** bl ns_callee ++** ldr x16, \x29, #?16\ ++** tbz x16, 0, .* ++** addvl sp, sp, #-1 ++** str p0, \sp\ ++** smstart sm ++** ldr p0, \sp\ ++** addvl sp, sp, #1 ++** ldr x16, \x29, #?16\ ++** tbnz x16, 0, .* ++** smstart sm ++** bl s_callee ++** ldr x16, \x29, #?16\ ++** tbnz x16, 0, .* ++** addvl sp, sp, #-1 ++** str p0, \sp\ ++** smstop sm ++** ldr p0, \sp\ ++** addvl sp, sp, #1 ++** bl sc_callee ++** ldr z8, \sp, #2, mul vl\ ++** ldr z9, \sp, #3, mul vl\ ++** ldr z10, \sp, #4, mul vl\ ++** ldr z11, \sp, #5, mul vl\ ++** ldr z12, \sp, #6, mul vl\ ++** ldr z13, \sp, #7, mul vl\ ++** ldr z14, \sp, #8, mul vl\ ++** ldr z15, \sp, #9, mul vl\ ++** ldr z16, \sp, #10, mul vl\ ++** ldr z17, \sp, #11, mul vl\ ++** ldr z18, \sp, #12, mul vl\ ++** ldr z19, \sp, #13, mul vl\ ++** ldr z20, \sp, #14, mul vl\ ++** ldr z21, \sp, #15, mul vl\ ++** ldr z22, \sp, #16, mul vl\ ++** ldr z23, \sp, #17, mul vl\ ++** ldr p4, \sp\ ++** ldr p5, \sp, #1, mul vl\ ++** ldr p6, \sp, #2, mul vl\ ++** ldr p7, \sp, #3, mul vl\ ++** ldr p8, \sp, #4, mul vl\ ++** ldr p9, \sp, #5, mul vl\ ++** ldr p10, \sp, #6, mul vl\ ++** ldr p11, \sp, #7, mul vl\ ++** ldr p12, \sp, #8, mul vl\ ++** ldr p13, \sp, #9, mul vl\ ++** ldr p14, \sp, #10, mul vl\ ++** ldr p15, \sp, #11, mul vl\ ++** addvl sp, sp, #18 ++** ldp x29, x30, \sp\, #?32 ++** ret ++*/ ++svbool_t ++sc_caller () arm::streaming_compatible ++{ ++ ns_callee (); ++ s_callee (); ++ return sc_callee (); ++} ++ ++/* { dg-final { scan-assembler {n_caller:(?:(?!ret).)*\.cfi_offset 46, -16\n} } } */ ++/* { dg-final { scan-assembler {s_caller:(?:(?!ret).)*\.cfi_offset 46, -16\n} } } */ ++/* { dg-final { scan-assembler {sc_caller:(?:(?!ret).)*\.cfi_offset 46, -8\n} } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_6.c b/gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_6.c +new file mode 100644 +index 000000000..0f6bc4f6c +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_6.c +@@ -0,0 +1,45 @@ ++// { dg-options "-O -fomit-frame-pointer -fno-optimize-sibling-calls" } ++ ++#include <arm_sve.h> ++ ++svbool_t ns_callee (); ++ svbool_t s_callee () arm::streaming; ++ svbool_t sc_callee () arm::streaming_compatible; ++ ++struct callbacks { ++ svbool_t (*ns_ptr) (); ++ svbool_t (*s_ptr) () arm::streaming; ++ svbool_t (*sc_ptr) () arm::streaming_compatible; ++}; ++ ++svbool_t ++n_caller (struct callbacks *c) ++{ ++ ns_callee (); ++ sc_callee (); ++ ++ c->ns_ptr (); ++ return c->sc_ptr (); ++} ++ ++svbool_t ++s_caller (struct callbacks *c) arm::streaming ++{ ++ s_callee (); ++ sc_callee (); ++ ++ c->s_ptr (); ++ return c->sc_ptr (); ++} ++ ++svbool_t ++sc_caller (struct callbacks *c) arm::streaming_compatible ++{ ++ sc_callee (); ++ ++ return c->sc_ptr (); ++} ++ ++// { dg-final { scan-assembler-not {dpqz0-9+,} } } ++// { dg-final { scan-assembler-not {smstart\tsm} } } ++// { dg-final { scan-assembler-not {smstop\tsm} } } +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_7.c b/gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_7.c +new file mode 100644 +index 000000000..6482a489f +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_7.c +@@ -0,0 +1,516 @@ ++// { dg-options "-O -fomit-frame-pointer -fno-optimize-sibling-calls" } ++// { dg-final { check-function-bodies "**" "" } } ++ ++#include <arm_neon.h> ++#include <arm_sve.h> ++ ++double produce_d0 (); ++void consume_d0 (double); ++ ++/* ++** test_d0: ++** ... ++** smstop sm ++** bl produce_d0 ++** fmov x10, d0 ++** smstart sm ++** fmov d0, x10 ++** fmov x10, d0 ++** smstop sm ++** fmov d0, x10 ++** bl consume_d0 ++** ... ++*/ ++void ++test_d0 () arm::streaming ++{ ++ double res = produce_d0 (); ++ asm volatile (""); ++ consume_d0 (res); ++} ++ ++int8x8_t produce_d0_vec (); ++void consume_d0_vec (int8x8_t); ++ ++/* ++** test_d0_vec: ++** ... ++** smstop sm ++** bl produce_d0_vec ++** ( ++** fmov x10, d0 ++** | ++** umov x10, v0.d\0\ ++** ) ++** smstart sm ++** fmov d0, x10 ++** ( ++** fmov x10, d0 ++** | ++** umov x10, v0.d\0\ ++** ) ++** smstop sm ++** fmov d0, x10 ++** bl consume_d0_vec ++** ... ++*/ ++void ++test_d0_vec () arm::streaming ++{ ++ int8x8_t res = produce_d0_vec (); ++ asm volatile (""); ++ consume_d0_vec (res); ++} ++ ++int8x16_t produce_q0 (); ++void consume_q0 (int8x16_t); ++ ++/* ++** test_q0: ++** ... ++** smstop sm ++** bl produce_q0 ++** str q0, \sp, #?-16\! ++** smstart sm ++** ldr q0, \sp\, #?16 ++** str q0, \sp, #?-16\! ++** smstop sm ++** ldr q0, \sp\, #?16 ++** bl consume_q0 ++** ... ++*/ ++void ++test_q0 () arm::streaming ++{ ++ int8x16_t res = produce_q0 (); ++ asm volatile (""); ++ consume_q0 (res); ++} ++ ++int8x16x2_t produce_q1 (); ++void consume_q1 (int8x16x2_t); ++ ++/* ++** test_q1: ++** ... ++** smstop sm ++** bl produce_q1 ++** stp q0, q1, \sp, #?-32\! ++** smstart sm ++** ldp q0, q1, \sp\, #?32 ++** stp q0, q1, \sp, #?-32\! ++** smstop sm ++** ldp q0, q1, \sp\, #?32 ++** bl consume_q1 ++** ... ++*/ ++void ++test_q1 () arm::streaming ++{ ++ int8x16x2_t res = produce_q1 (); ++ asm volatile (""); ++ consume_q1 (res); ++} ++ ++int8x16x3_t produce_q2 (); ++void consume_q2 (int8x16x3_t); ++ ++/* ++** test_q2: ++** ... ++** smstop sm ++** bl produce_q2 ++** stp q0, q1, \sp, #?-48\! ++** str q2, \sp, #?32\ ++** smstart sm ++** ldr q2, \sp, #?32\ ++** ldp q0, q1, \sp\, #?48 ++** stp q0, q1, \sp, #?-48\! ++** str q2, \sp, #?32\ ++** smstop sm ++** ldr q2, \sp, #?32\ ++** ldp q0, q1, \sp\, #?48 ++** bl consume_q2 ++** ... ++*/ ++void ++test_q2 () arm::streaming ++{ ++ int8x16x3_t res = produce_q2 (); ++ asm volatile (""); ++ consume_q2 (res); ++} ++ ++int8x16x4_t produce_q3 (); ++void consume_q3 (int8x16x4_t); ++ ++/* ++** test_q3: ++** ... ++** smstop sm ++** bl produce_q3 ++** stp q0, q1, \sp, #?-64\! ++** stp q2, q3, \sp, #?32\ ++** smstart sm ++** ldp q2, q3, \sp, #?32\ ++** ldp q0, q1, \sp\, #?64 ++** stp q0, q1, \sp, #?-64\! ++** stp q2, q3, \sp, #?32\ ++** smstop sm ++** ldp q2, q3, \sp, #?32\ ++** ldp q0, q1, \sp\, #?64 ++** bl consume_q3 ++** ... ++*/ ++void ++test_q3 () arm::streaming ++{ ++ int8x16x4_t res = produce_q3 (); ++ asm volatile (""); ++ consume_q3 (res); ++} ++ ++svint8_t produce_z0 (); ++void consume_z0 (svint8_t); ++ ++/* ++** test_z0: ++** ... ++** smstop sm ++** bl produce_z0 ++** addvl sp, sp, #-1 ++** str z0, \sp\ ++** smstart sm ++** ldr z0, \sp\ ++** addvl sp, sp, #1 ++** addvl sp, sp, #-1 ++** str z0, \sp\ ++** smstop sm ++** ldr z0, \sp\ ++** addvl sp, sp, #1 ++** bl consume_z0 ++** ... ++*/ ++void ++test_z0 () arm::streaming ++{ ++ svint8_t res = produce_z0 (); ++ asm volatile (""); ++ consume_z0 (res); ++} ++ ++svint8x4_t produce_z3 (); ++void consume_z3 (svint8x4_t); ++ ++/* ++** test_z3: ++** ... ++** smstop sm ++** bl produce_z3 ++** addvl sp, sp, #-4 ++** str z0, \sp\ ++** str z1, \sp, #1, mul vl\ ++** str z2, \sp, #2, mul vl\ ++** str z3, \sp, #3, mul vl\ ++** smstart sm ++** ldr z0, \sp\ ++** ldr z1, \sp, #1, mul vl\ ++** ldr z2, \sp, #2, mul vl\ ++** ldr z3, \sp, #3, mul vl\ ++** addvl sp, sp, #4 ++** addvl sp, sp, #-4 ++** str z0, \sp\ ++** str z1, \sp, #1, mul vl\ ++** str z2, \sp, #2, mul vl\ ++** str z3, \sp, #3, mul vl\ ++** smstop sm ++** ldr z0, \sp\ ++** ldr z1, \sp, #1, mul vl\ ++** ldr z2, \sp, #2, mul vl\ ++** ldr z3, \sp, #3, mul vl\ ++** addvl sp, sp, #4 ++** bl consume_z3 ++** ... ++*/ ++void ++test_z3 () arm::streaming ++{ ++ svint8x4_t res = produce_z3 (); ++ asm volatile (""); ++ consume_z3 (res); ++} ++ ++svbool_t produce_p0 (); ++void consume_p0 (svbool_t); ++ ++/* ++** test_p0: ++** ... ++** smstop sm ++** bl produce_p0 ++** addvl sp, sp, #-1 ++** str p0, \sp\ ++** smstart sm ++** ldr p0, \sp\ ++** addvl sp, sp, #1 ++** addvl sp, sp, #-1 ++** str p0, \sp\ ++** smstop sm ++** ldr p0, \sp\ ++** addvl sp, sp, #1 ++** bl consume_p0 ++** ... ++*/ ++void ++test_p0 () arm::streaming ++{ ++ svbool_t res = produce_p0 (); ++ asm volatile (""); ++ consume_p0 (res); ++} ++ ++void consume_d7 (double, double, double, double, double, double, double, ++ double); ++ ++/* ++** test_d7: ++** ... ++** fmov x10, d0 ++** fmov x11, d1 ++** fmov x12, d2 ++** fmov x13, d3 ++** fmov x14, d4 ++** fmov x15, d5 ++** fmov x16, d6 ++** fmov x17, d7 ++** smstop sm ++** fmov d0, x10 ++** fmov d1, x11 ++** fmov d2, x12 ++** fmov d3, x13 ++** fmov d4, x14 ++** fmov d5, x15 ++** fmov d6, x16 ++** fmov d7, x17 ++** bl consume_d7 ++** ... ++*/ ++void ++test_d7 () arm::streaming ++{ ++ consume_d7 (1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); ++} ++ ++void consume_d7_vec (int8x8_t, int8x8_t, int8x8_t, int8x8_t, int8x8_t, ++ int8x8_t, int8x8_t, int8x8_t); ++ ++/* ++** test_d7_vec: ++** ... ++** ( ++** fmov x10, d0 ++** fmov x11, d1 ++** fmov x12, d2 ++** fmov x13, d3 ++** fmov x14, d4 ++** fmov x15, d5 ++** fmov x16, d6 ++** fmov x17, d7 ++** | ++** umov x10, v0.d\0\ ++** umov x11, v1.d\0\ ++** umov x12, v2.d\0\ ++** umov x13, v3.d\0\ ++** umov x14, v4.d\0\ ++** umov x15, v5.d\0\ ++** umov x16, v6.d\0\ ++** umov x17, v7.d\0\ ++** ) ++** smstop sm ++** fmov d0, x10 ++** fmov d1, x11 ++** fmov d2, x12 ++** fmov d3, x13 ++** fmov d4, x14 ++** fmov d5, x15 ++** fmov d6, x16 ++** fmov d7, x17 ++** bl consume_d7_vec ++** ... ++*/ ++void ++test_d7_vec (int8x8_t *ptr) arm::streaming ++{ ++ consume_d7_vec (*ptr, *ptr, *ptr, *ptr, *ptr, *ptr, *ptr, *ptr); ++} ++ ++void consume_q7 (int8x16_t, int8x16_t, int8x16_t, int8x16_t, int8x16_t, ++ int8x16_t, int8x16_t, int8x16_t); ++ ++/* ++** test_q7: ++** ... ++** stp q0, q1, \sp, #?-128\! ++** stp q2, q3, \sp, #?32\ ++** stp q4, q5, \sp, #?64\ ++** stp q6, q7, \sp, #?96\ ++** smstop sm ++** ldp q2, q3, \sp, #?32\ ++** ldp q4, q5, \sp, #?64\ ++** ldp q6, q7, \sp, #?96\ ++** ldp q0, q1, \sp\, #?128 ++** bl consume_q7 ++** ... ++*/ ++void ++test_q7 (int8x16_t *ptr) arm::streaming ++{ ++ consume_q7 (*ptr, *ptr, *ptr, *ptr, *ptr, *ptr, *ptr, *ptr); ++} ++ ++void consume_z7 (svint8_t, svint8_t, svint8_t, svint8_t, svint8_t, ++ svint8_t, svint8_t, svint8_t); ++ ++/* ++** test_z7: ++** ... ++** addvl sp, sp, #-8 ++** str z0, \sp\ ++** str z1, \sp, #1, mul vl\ ++** str z2, \sp, #2, mul vl\ ++** str z3, \sp, #3, mul vl\ ++** str z4, \sp, #4, mul vl\ ++** str z5, \sp, #5, mul vl\ ++** str z6, \sp, #6, mul vl\ ++** str z7, \sp, #7, mul vl\ ++** smstop sm ++** ldr z0, \sp\ ++** ldr z1, \sp, #1, mul vl\ ++** ldr z2, \sp, #2, mul vl\ ++** ldr z3, \sp, #3, mul vl\ ++** ldr z4, \sp, #4, mul vl\ ++** ldr z5, \sp, #5, mul vl\ ++** ldr z6, \sp, #6, mul vl\ ++** ldr z7, \sp, #7, mul vl\ ++** addvl sp, sp, #8 ++** bl consume_z7 ++** ... ++*/ ++void ++test_z7 (svint8_t *ptr) arm::streaming ++{ ++ consume_z7 (*ptr, *ptr, *ptr, *ptr, *ptr, *ptr, *ptr, *ptr); ++} ++ ++void consume_p3 (svbool_t, svbool_t, svbool_t, svbool_t); ++ ++/* ++** test_p3: ++** ... ++** addvl sp, sp, #-1 ++** str p0, \sp\ ++** str p1, \sp, #1, mul vl\ ++** str p2, \sp, #2, mul vl\ ++** str p3, \sp, #3, mul vl\ ++** smstop sm ++** ldr p0, \sp\ ++** ldr p1, \sp, #1, mul vl\ ++** ldr p2, \sp, #2, mul vl\ ++** ldr p3, \sp, #3, mul vl\ ++** addvl sp, sp, #1 ++** bl consume_p3 ++** ... ++*/ ++void ++test_p3 (svbool_t *ptr) arm::streaming ++{ ++ consume_p3 (*ptr, *ptr, *ptr, *ptr); ++} ++ ++void consume_mixed (float, double, float32x4_t, svfloat32_t, ++ float, double, float64x2_t, svfloat64_t, ++ svbool_t, svbool_t, svbool_t, svbool_t); ++ ++/* ++** test_mixed: ++** ... ++** addvl sp, sp, #-3 ++** str p0, \sp\ ++** str p1, \sp, #1, mul vl\ ++** str p2, \sp, #2, mul vl\ ++** str p3, \sp, #3, mul vl\ ++** str z3, \sp, #1, mul vl\ ++** str z7, \sp, #2, mul vl\ ++** stp q2, q6, \sp, #?-32\! ++** fmov w10, s0 ++** fmov x11, d1 ++** fmov w12, s4 ++** fmov x13, d5 ++** smstop sm ++** fmov s0, w10 ++** fmov d1, x11 ++** fmov s4, w12 ++** fmov d5, x13 ++** ldp q2, q6, \sp\, #?32 ++** ldr p0, \sp\ ++** ldr p1, \sp, #1, mul vl\ ++** ldr p2, \sp, #2, mul vl\ ++** ldr p3, \sp, #3, mul vl\ ++** ldr z3, \sp, #1, mul vl\ ++** ldr z7, \sp, #2, mul vl\ ++** addvl sp, sp, #3 ++** bl consume_mixed ++** ... ++*/ ++void ++test_mixed (float32x4_t *float32x4_ptr, ++ svfloat32_t *svfloat32_ptr, ++ float64x2_t *float64x2_ptr, ++ svfloat64_t *svfloat64_ptr, ++ svbool_t *svbool_ptr) arm::streaming ++{ ++ consume_mixed (1.0f, 2.0, *float32x4_ptr, *svfloat32_ptr, ++ 3.0f, 4.0, *float64x2_ptr, *svfloat64_ptr, ++ *svbool_ptr, *svbool_ptr, *svbool_ptr, *svbool_ptr); ++} ++ ++void consume_varargs (float, ...); ++ ++/* ++** test_varargs: ++** ... ++** stp q3, q7, \sp, #?-32\! ++** fmov w10, s0 ++** fmov x11, d1 ++** ( ++** fmov x12, d2 ++** | ++** umov x12, v2.d\0\ ++** ) ++** fmov x13, d4 ++** fmov x14, d5 ++** ( ++** fmov x15, d6 ++** | ++** umov x15, v6.d\0\ ++** ) ++** smstop sm ++** fmov s0, w10 ++** fmov d1, x11 ++** fmov d2, x12 ++** fmov d4, x13 ++** fmov d5, x14 ++** fmov d6, x15 ++** ldp q3, q7, \sp\, #?32 ++** bl consume_varargs ++** ... ++*/ ++void ++test_varargs (float32x2_t *float32x2_ptr, ++ float32x4_t *float32x4_ptr, ++ float64x1_t *float64x1_ptr, ++ float64x2_t *float64x2_ptr) arm::streaming ++{ ++ consume_varargs (1.0f, 2.0, *float32x2_ptr, *float32x4_ptr, ++ 3.0f, 4.0, *float64x1_ptr, *float64x2_ptr); ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_8.c b/gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_8.c +new file mode 100644 +index 000000000..f44724df3 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_8.c +@@ -0,0 +1,87 @@ ++// { dg-options "-O -fomit-frame-pointer -fno-optimize-sibling-calls -msve-vector-bits=128" } ++// { dg-final { check-function-bodies "**" "" } } ++ ++#include <arm_sve.h> ++ ++svint8_t produce_z0 (); ++void consume_z0 (svint8_t); ++ ++/* ++** test_z0: ++** ... ++** smstop sm ++** bl produce_z0 ++** str q0, \sp, #?-16\! ++** smstart sm ++** ldr q0, \sp\, #?16 ++** str q0, \sp, #?-16\! ++** smstop sm ++** ldr q0, \sp\, #?16 ++** bl consume_z0 ++** ... ++*/ ++void ++test_z0 () arm::streaming ++{ ++ svint8_t res = produce_z0 (); ++ asm volatile (""); ++ consume_z0 (res); ++} ++ ++svint8x4_t produce_z3 (); ++void consume_z3 (svint8x4_t); ++ ++/* ++** test_z3: ++** ... ++** smstop sm ++** bl produce_z3 ++** stp q0, q1, \sp, #?-64\! ++** stp q2, q3, \sp, #?32\ ++** smstart sm ++** ldp q2, q3, \sp, #?32\ ++** ldp q0, q1, \sp\, #?64 ++** stp q0, q1, \sp, #?-64\! ++** stp q2, q3, \sp, #?32\ ++** smstop sm ++** ldp q2, q3, \sp, #?32\ ++** ldp q0, q1, \sp\, #?64 ++** bl consume_z3 ++** ... ++*/ ++void ++test_z3 () arm::streaming ++{ ++ svint8x4_t res = produce_z3 (); ++ asm volatile (""); ++ consume_z3 (res); ++} ++ ++svbool_t produce_p0 (); ++void consume_p0 (svbool_t); ++ ++/* ++** test_p0: ++** ... ++** smstop sm ++** bl produce_p0 ++** sub sp, sp, #?16 ++** str p0, \sp\ ++** smstart sm ++** ldr p0, \sp\ ++** add sp, sp, #?16 ++** sub sp, sp, #?16 ++** str p0, \sp\ ++** smstop sm ++** ldr p0, \sp\ ++** add sp, sp, #?16 ++** bl consume_p0 ++** ... ++*/ ++void ++test_p0 () arm::streaming ++{ ++ svbool_t res = produce_p0 (); ++ asm volatile (""); ++ consume_p0 (res); ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_9.c b/gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_9.c +new file mode 100644 +index 000000000..83b4073ee +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_9.c +@@ -0,0 +1,103 @@ ++// { dg-options "-O -fomit-frame-pointer -fno-optimize-sibling-calls -msve-vector-bits=256" } ++// { dg-final { check-function-bodies "**" "" } } ++ ++#include <arm_sve.h> ++ ++svint8_t produce_z0 (); ++void consume_z0 (svint8_t); ++ ++/* ++** test_z0: ++** ... ++** smstop sm ++** bl produce_z0 ++** sub sp, sp, #?32 ++** str z0, \sp\ ++** smstart sm ++** ldr z0, \sp\ ++** add sp, sp, #?32 ++** sub sp, sp, #?32 ++** str z0, \sp\ ++** smstop sm ++** ldr z0, \sp\ ++** add sp, sp, #?32 ++** bl consume_z0 ++** ... ++*/ ++void ++test_z0 () arm::streaming ++{ ++ svint8_t res = produce_z0 (); ++ asm volatile (""); ++ consume_z0 (res); ++} ++ ++svint8x4_t produce_z3 (); ++void consume_z3 (svint8x4_t); ++ ++/* ++** test_z3: ++** ... ++** smstop sm ++** bl produce_z3 ++** sub sp, sp, #?128 ++** str z0, \sp\ ++** str z1, \sp, #1, mul vl\ ++** str z2, \sp, #2, mul vl\ ++** str z3, \sp, #3, mul vl\ ++** smstart sm ++** ldr z0, \sp\ ++** ldr z1, \sp, #1, mul vl\ ++** ldr z2, \sp, #2, mul vl\ ++** ldr z3, \sp, #3, mul vl\ ++** add sp, sp, #?128 ++** sub sp, sp, #?128 ++** str z0, \sp\ ++** str z1, \sp, #1, mul vl\ ++** str z2, \sp, #2, mul vl\ ++** str z3, \sp, #3, mul vl\ ++** smstop sm ++** ldr z0, \sp\ ++** ldr z1, \sp, #1, mul vl\ ++** ldr z2, \sp, #2, mul vl\ ++** ldr z3, \sp, #3, mul vl\ ++** add sp, sp, #?128 ++** bl consume_z3 ++** ... ++*/ ++void ++test_z3 () arm::streaming ++{ ++ svint8x4_t res = produce_z3 (); ++ asm volatile (""); ++ consume_z3 (res); ++} ++ ++svbool_t produce_p0 (); ++void consume_p0 (svbool_t); ++ ++/* ++** test_p0: ++** ... ++** smstop sm ++** bl produce_p0 ++** sub sp, sp, #?32 ++** str p0, \sp\ ++** smstart sm ++** ldr p0, \sp\ ++** add sp, sp, #?32 ++** sub sp, sp, #?32 ++** str p0, \sp\ ++** smstop sm ++** ldr p0, \sp\ ++** add sp, sp, #?32 ++** bl consume_p0 ++** ... ++*/ ++void ++test_p0 () arm::streaming ++{ ++ svbool_t res = produce_p0 (); ++ asm volatile (""); ++ consume_p0 (res); ++} +-- +2.33.0 +
View file
_service:tar_scm:0206-Backport-SME-aarch64-Add-support-for-SME-ZA-attribut.patch
Added
@@ -0,0 +1,4324 @@ +From 1efd433c779f66440facc8ba5cd23bdbdd6672ba Mon Sep 17 00:00:00 2001 +From: Richard Sandiford <richard.sandiford@arm.com> +Date: Tue, 5 Dec 2023 10:11:26 +0000 +Subject: PATCH 107/157 BackportSME aarch64: Add support for SME ZA + attributes + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=3af9ceb631b741095d8eabd055ff7c23d4a69e6f + +SME has an array called ZA that can be enabled and disabled separately +from streaming mode. A status bit called PSTATE.ZA indicates whether +ZA is currently enabled or not. + +In C and C++, the state of PSTATE.ZA is controlled using function +attributes. There are four attributes that can be attached to +function types to indicate that the function shares ZA with its +caller. These are: + +- arm::in("za") +- arm::out("za") +- arm::inout("za") +- arm::preserves("za") + +If a function's type has one of these shared-ZA attributes, +PSTATE.ZA is specified to be 1 on entry to the function and on return +from the function. Otherwise, the caller and callee have separate +ZA contexts; they do not use ZA to share data. + +Although normal non-shared-ZA functions have a separate ZA context +from their callers, nested uses of ZA are expected to be rare. +The ABI therefore defines a cooperative lazy saving scheme that +allows saves and restore of ZA to be kept to a minimum. +(Callers still have the option of doing a full save and restore +if they prefer.) + +Functions that want to use ZA internally have an arm::new("za") +attribute, which tells the compiler to enable PSTATE.ZA for +the duration of the function body. It also tells the compiler +to commit any lazy save initiated by a caller. + +The patch uses various abstract hard registers to track dataflow +relating to ZA. See the comments in the patch for details. + +The lazy save scheme is intended to be transparent to most normal +functions, so that they don't need to be recompiled for SME. +This is reflected in the way that most normal functions ignore +the new hard registers added in the patch. + +As with arm::streaming and arm::streaming_compatible, the attributes are +also available as __arm_<attr>. This has two advantages: it triggers an +error on compilers that don't understand the attributes, and it eases +use on C, where ... attributes were only added in C23. + +gcc/ + * config/aarch64/aarch64-isa-modes.def (ZA_ON): New ISA mode. + * config/aarch64/aarch64-protos.h (aarch64_rdsvl_immediate_p) + (aarch64_output_rdsvl, aarch64_optimize_mode_switching) + (aarch64_restore_za): Declare. + * config/aarch64/constraints.md (UsR): New constraint. + * config/aarch64/aarch64.md (LOWERING_REGNUM, TPIDR_BLOCK_REGNUM) + (SME_STATE_REGNUM, TPIDR2_SETUP_REGNUM, ZA_FREE_REGNUM) + (ZA_SAVED_REGNUM, ZA_REGNUM, FIRST_FAKE_REGNUM): New constants. + (LAST_FAKE_REGNUM): Likewise. + (UNSPEC_SAVE_NZCV, UNSPEC_RESTORE_NZCV, UNSPEC_SME_VQ): New unspecs. + (arches): Add sme. + (arch_enabled): Handle it. + (*cb<optab><mode>1): Rename to... + (aarch64_cb<optab><mode>1): ...this. + (*movsi_aarch64): Add an alternative for RDSVL. + (*movdi_aarch64): Likewise. + (aarch64_save_nzcv, aarch64_restore_nzcv): New insns. + * config/aarch64/aarch64-sme.md (UNSPEC_SMSTOP_ZA) + (UNSPEC_INITIAL_ZERO_ZA, UNSPEC_TPIDR2_SAVE, UNSPEC_TPIDR2_RESTORE) + (UNSPEC_READ_TPIDR2, UNSPEC_WRITE_TPIDR2, UNSPEC_SETUP_LOCAL_TPIDR2) + (UNSPEC_RESTORE_ZA, UNSPEC_START_PRIVATE_ZA_CALL): New unspecs. + (UNSPEC_END_PRIVATE_ZA_CALL, UNSPEC_COMMIT_LAZY_SAVE): Likewise. + (UNSPECV_ASM_UPDATE_ZA): New unspecv. + (aarch64_tpidr2_save, aarch64_smstart_za, aarch64_smstop_za) + (aarch64_initial_zero_za, aarch64_setup_local_tpidr2) + (aarch64_clear_tpidr2, aarch64_write_tpidr2, aarch64_read_tpidr2) + (aarch64_tpidr2_restore, aarch64_restore_za, aarch64_asm_update_za) + (aarch64_start_private_za_call, aarch64_end_private_za_call) + (aarch64_commit_lazy_save): New patterns. + * config/aarch64/aarch64.h (AARCH64_ISA_ZA_ON, TARGET_ZA): New macros. + (FIXED_REGISTERS, REGISTER_NAMES): Add the new fake ZA registers. + (CALL_USED_REGISTERS): Replace with... + (CALL_REALLY_USED_REGISTERS): ...this and add the fake ZA registers. + (FIRST_PSEUDO_REGISTER): Bump to include the fake ZA registers. + (FAKE_REGS): New register class. + (REG_CLASS_NAMES): Update accordingly. + (REG_CLASS_CONTENTS): Likewise. + (machine_function::tpidr2_block): New member variable. + (machine_function::tpidr2_block_ptr): Likewise. + (machine_function::za_save_buffer): Likewise. + (machine_function::next_asm_update_za_id): Likewise. + (CUMULATIVE_ARGS::shared_za_flags): Likewise. + (aarch64_mode_entity, aarch64_local_sme_state): New enums. + (aarch64_tristate_mode): Likewise. + (OPTIMIZE_MODE_SWITCHING, NUM_MODES_FOR_MODE_SWITCHING): Define. + * config/aarch64/aarch64.cc (AARCH64_STATE_SHARED, AARCH64_STATE_IN) + (AARCH64_STATE_OUT): New constants. + (aarch64_attribute_shared_state_flags): New function. + (aarch64_lookup_shared_state_flags, aarch64_fndecl_has_new_state) + (aarch64_check_state_string, cmp_string_csts): Likewise. + (aarch64_merge_string_arguments, aarch64_check_arm_new_against_type) + (handle_arm_new, handle_arm_shared): Likewise. + (handle_arm_new_za_attribute): New + (aarch64_arm_attribute_table): Add new, preserves, in, out, and inout. + (aarch64_hard_regno_nregs): Handle FAKE_REGS. + (aarch64_hard_regno_mode_ok): Likewise. + (aarch64_fntype_shared_flags, aarch64_fntype_pstate_za): New functions. + (aarch64_fntype_isa_mode): Include aarch64_fntype_pstate_za. + (aarch64_fndecl_has_state, aarch64_fndecl_pstate_za): New functions. + (aarch64_fndecl_isa_mode): Include aarch64_fndecl_pstate_za. + (aarch64_cfun_incoming_pstate_za, aarch64_cfun_shared_flags) + (aarch64_cfun_has_new_state, aarch64_cfun_has_state): New functions. + (aarch64_sme_vq_immediate, aarch64_sme_vq_unspec_p): Likewise. + (aarch64_rdsvl_immediate_p, aarch64_output_rdsvl): Likewise. + (aarch64_expand_mov_immediate): Handle RDSVL immediates. + (aarch64_function_arg): Add the ZA sharing flags as a third limb + of the PARALLEL. + (aarch64_init_cumulative_args): Record the ZA sharing flags. + (aarch64_extra_live_on_entry): New function. Handle the new + ZA-related fake registers. + (aarch64_epilogue_uses): Handle the new ZA-related fake registers. + (aarch64_cannot_force_const_mem): Handle UNSPEC_SME_VQ constants. + (aarch64_get_tpidr2_block, aarch64_get_tpidr2_ptr): New functions. + (aarch64_init_tpidr2_block, aarch64_restore_za): Likewise. + (aarch64_layout_frame): Check whether the current function creates + new ZA state. Record that it clobbers LR if so. + (aarch64_expand_prologue): Handle functions that create new ZA state. + (aarch64_expand_epilogue): Likewise. + (aarch64_create_tpidr2_block): New function. + (aarch64_restore_za): Likewise. + (aarch64_start_call_args): Disallow calls to shared-ZA functions + from functions that have no ZA state. Emit a marker instruction + before calls to private-ZA functions from functions that have + SME state. + (aarch64_expand_call): Add return registers for state that is + managed via attributes. Record the use and clobber information + for the ZA registers. + (aarch64_end_call_args): New function. + (aarch64_regno_regclass): Handle FAKE_REGS. + (aarch64_class_max_nregs): Likewise. + (aarch64_override_options_internal): Require TARGET_SME for + functions that have ZA state. + (aarch64_conditional_register_usage): Handle FAKE_REGS. + (aarch64_mov_operand_p): Handle RDSVL immediates. + (aarch64_comp_type_attributes): Check that the ZA sharing flags + are equal. + (aarch64_merge_decl_attributes): New function. + (aarch64_optimize_mode_switching, aarch64_mode_emit_za_save_buffer) + (aarch64_mode_emit_local_sme_state, aarch64_mode_emit): Likewise. + (aarch64_insn_references_sme_state_p): Likewise. + (aarch64_mode_needed_local_sme_state): Likewise. + (aarch64_mode_needed_za_save_buffer, aarch64_mode_needed): Likewise. + (aarch64_mode_after_local_sme_state, aarch64_mode_after): Likewise. + (aarch64_local_sme_confluence, aarch64_mode_confluence): Likewise. + (aarch64_one_shot_backprop, aarch64_local_sme_backprop): Likewise. + (aarch64_mode_backprop, aarch64_mode_entry): Likewise. + (aarch64_mode_exit, aarch64_mode_eh_handler): Likewise. + (aarch64_mode_priority, aarch64_md_asm_adjust): Likewise. + (TARGET_END_CALL_ARGS, TARGET_MERGE_DECL_ATTRIBUTES): Define. + (TARGET_MODE_EMIT, TARGET_MODE_NEEDED, TARGET_MODE_AFTER): Likewise. + (TARGET_MODE_CONFLUENCE, TARGET_MODE_BACKPROP): Likewise. + (TARGET_MODE_ENTRY, TARGET_MODE_EXIT): Likewise. + (TARGET_MODE_EH_HANDLER, TARGET_MODE_PRIORITY): Likewise. + (TARGET_EXTRA_LIVE_ON_ENTRY): Likewise. + (TARGET_MD_ASM_ADJUST): Use aarch64_md_asm_adjust. + * config/aarch64/aarch64-c.cc (aarch64_define_unconditional_macros): + Define __arm_new, __arm_preserves,__arm_in, __arm_out, and __arm_inout. + +gcc/testsuite/ + * gcc.target/aarch64/sme/za_state_1.c: New test. + * gcc.target/aarch64/sme/za_state_2.c: Likewise. + * gcc.target/aarch64/sme/za_state_3.c: Likewise. + * gcc.target/aarch64/sme/za_state_4.c: Likewise. + * gcc.target/aarch64/sme/za_state_5.c: Likewise. + * gcc.target/aarch64/sme/za_state_6.c: Likewise. + * g++.target/aarch64/sme/exceptions_1.C: Likewise. + * gcc.target/aarch64/sme/keyword_macros_1.c: Add ZA macros. + * g++.target/aarch64/sme/keyword_macros_1.C: Likewise. +--- + gcc/config/aarch64/aarch64-c.cc | 32 + + gcc/config/aarch64/aarch64-isa-modes.def | 5 + + gcc/config/aarch64/aarch64-protos.h | 5 + + gcc/config/aarch64/aarch64-sme.md | 287 ++++ + gcc/config/aarch64/aarch64.cc | 1371 ++++++++++++++++- + gcc/config/aarch64/aarch64.h | 98 +- + gcc/config/aarch64/aarch64.md | 81 +- + gcc/config/aarch64/constraints.md | 6 + + .../g++.target/aarch64/sme/exceptions_1.C | 189 +++ + .../g++.target/aarch64/sme/keyword_macros_1.C | 5 + + .../gcc.target/aarch64/sme/keyword_macros_1.c | 5 + + .../gcc.target/aarch64/sme/za_state_1.c | 154 ++ + .../gcc.target/aarch64/sme/za_state_2.c | 73 + + .../gcc.target/aarch64/sme/za_state_3.c | 31 + + .../gcc.target/aarch64/sme/za_state_4.c | 585 +++++++ + .../gcc.target/aarch64/sme/za_state_5.c | 595 +++++++ + .../gcc.target/aarch64/sme/za_state_6.c | 23 + + 17 files changed, 3523 insertions(+), 22 deletions(-) + create mode 100644 gcc/testsuite/g++.target/aarch64/sme/exceptions_1.C + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/za_state_1.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/za_state_2.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/za_state_3.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/za_state_4.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/za_state_5.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/za_state_6.c + +diff --git a/gcc/config/aarch64/aarch64-c.cc b/gcc/config/aarch64/aarch64-c.cc +index 397745fbd..76c20848f 100644 +--- a/gcc/config/aarch64/aarch64-c.cc ++++ b/gcc/config/aarch64/aarch64-c.cc +@@ -73,6 +73,8 @@ aarch64_define_unconditional_macros (cpp_reader *pfile) + + builtin_define ("__GCC_ASM_FLAG_OUTPUTS__"); + ++ builtin_define ("__ARM_STATE_ZA"); ++ + /* Define keyword attributes like __arm_streaming as macros that expand + to the associated ... attribute. Use __extension__ in the attribute + for C, since the ... syntax was only added in C23. */ +@@ -86,6 +88,36 @@ aarch64_define_unconditional_macros (cpp_reader *pfile) + DEFINE_ARM_KEYWORD_MACRO ("streaming_compatible"); + + #undef DEFINE_ARM_KEYWORD_MACRO ++ ++ /* Same for the keyword attributes that take arguments. The snag here ++ is that some old modes warn about or reject variadic arguments. */ ++ auto *cpp_opts = cpp_get_options (parse_in); ++ if (!cpp_opts->traditional) ++ { ++ auto old_warn_variadic_macros = cpp_opts->warn_variadic_macros; ++ auto old_cpp_warn_c90_c99_compat = cpp_opts->cpp_warn_c90_c99_compat; ++ ++ cpp_opts->warn_variadic_macros = false; ++ cpp_opts->cpp_warn_c90_c99_compat = 0; ++ ++#define DEFINE_ARM_KEYWORD_MACRO_ARGS(NAME) \ ++ builtin_define_with_value ("__arm_" NAME "(...)", \ ++ lang_GNU_CXX () \ ++ ? "arm::" NAME "(__VA_ARGS__)" \ ++ : "__extension__ arm::" NAME \ ++ "(__VA_ARGS__)", 0); ++ ++ DEFINE_ARM_KEYWORD_MACRO_ARGS ("new"); ++ DEFINE_ARM_KEYWORD_MACRO_ARGS ("preserves"); ++ DEFINE_ARM_KEYWORD_MACRO_ARGS ("in"); ++ DEFINE_ARM_KEYWORD_MACRO_ARGS ("out"); ++ DEFINE_ARM_KEYWORD_MACRO_ARGS ("inout"); ++ ++#undef DEFINE_ARM_KEYWORD_MACRO_ARGS ++ ++ cpp_opts->warn_variadic_macros = old_warn_variadic_macros; ++ cpp_opts->cpp_warn_c90_c99_compat = old_cpp_warn_c90_c99_compat; ++ } + } + + /* Undefine/redefine macros that depend on the current backend state and may +diff --git a/gcc/config/aarch64/aarch64-isa-modes.def b/gcc/config/aarch64/aarch64-isa-modes.def +index 5915c98a8..c0ada35bd 100644 +--- a/gcc/config/aarch64/aarch64-isa-modes.def ++++ b/gcc/config/aarch64/aarch64-isa-modes.def +@@ -32,4 +32,9 @@ + DEF_AARCH64_ISA_MODE(SM_ON) + DEF_AARCH64_ISA_MODE(SM_OFF) + ++/* Indicates that PSTATE.ZA is known to be 1. The converse is that ++ PSTATE.ZA might be 0 or 1, depending on whether there is an uncommitted ++ lazy save. */ ++DEF_AARCH64_ISA_MODE(ZA_ON) ++ + #undef DEF_AARCH64_ISA_MODE +diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h +index 737f47026..0883ddd1a 100644 +--- a/gcc/config/aarch64/aarch64-protos.h ++++ b/gcc/config/aarch64/aarch64-protos.h +@@ -808,6 +808,8 @@ bool aarch64_sve_addvl_addpl_immediate_p (rtx); + bool aarch64_sve_vector_inc_dec_immediate_p (rtx); + int aarch64_add_offset_temporaries (rtx); + void aarch64_split_add_offset (scalar_int_mode, rtx, rtx, rtx, rtx, rtx); ++bool aarch64_rdsvl_immediate_p (const_rtx); ++char *aarch64_output_rdsvl (const_rtx); + bool aarch64_mov_operand_p (rtx, machine_mode); + rtx aarch64_reverse_mask (machine_mode, unsigned int); + bool aarch64_offset_7bit_signed_scaled_p (machine_mode, poly_int64); +@@ -1083,4 +1085,7 @@ extern bool aarch64_harden_sls_blr_p (void); + + extern void aarch64_output_patchable_area (unsigned int, bool); + ++bool aarch64_optimize_mode_switching (aarch64_mode_entity); ++void aarch64_restore_za (rtx); ++ + #endif /* GCC_AARCH64_PROTOS_H */ +diff --git a/gcc/config/aarch64/aarch64-sme.md b/gcc/config/aarch64/aarch64-sme.md +index 52427b4f1..d4973098e 100644 +--- a/gcc/config/aarch64/aarch64-sme.md ++++ b/gcc/config/aarch64/aarch64-sme.md +@@ -23,6 +23,7 @@ + ;; == State management + ;; ---- Test current state + ;; ---- PSTATE.SM management ++;; ---- PSTATE.ZA management + + ;; ========================================================================= + ;; == State management +@@ -169,3 +170,289 @@ + "" + "smstop\tsm" + ) ++ ++;; ------------------------------------------------------------------------- ++;; ---- PSTATE.ZA management ++;; ------------------------------------------------------------------------- ++;; Includes: ++;; - SMSTART ZA ++;; - SMSTOP ZA ++;; plus calls to support routines. ++;; ------------------------------------------------------------------------- ++ ++(define_c_enum "unspec" ++ UNSPEC_SMSTOP_ZA ++ UNSPEC_INITIAL_ZERO_ZA ++ UNSPEC_TPIDR2_SAVE ++ UNSPEC_TPIDR2_RESTORE ++ UNSPEC_READ_TPIDR2 ++ UNSPEC_WRITE_TPIDR2 ++ UNSPEC_SETUP_LOCAL_TPIDR2 ++ UNSPEC_RESTORE_ZA ++ UNSPEC_START_PRIVATE_ZA_CALL ++ UNSPEC_END_PRIVATE_ZA_CALL ++ UNSPEC_COMMIT_LAZY_SAVE ++) ++ ++(define_c_enum "unspecv" ++ UNSPECV_ASM_UPDATE_ZA ++) ++ ++;; Use the ABI-defined routine to commit an uncommitted lazy save. ++;; This relies on the current PSTATE.ZA, so depends on SME_STATE_REGNUM. ++;; The fake TPIDR2_SETUP_REGNUM register initially holds the incoming ++;; value of the architected TPIDR2_EL0. ++(define_insn "aarch64_tpidr2_save" ++ (set (reg:DI ZA_FREE_REGNUM) ++ (unspec:DI (reg:DI SME_STATE_REGNUM) ++ (reg:DI TPIDR2_SETUP_REGNUM) UNSPEC_TPIDR2_SAVE)) ++ (clobber (reg:DI R14_REGNUM)) ++ (clobber (reg:DI R15_REGNUM)) ++ (clobber (reg:DI R16_REGNUM)) ++ (clobber (reg:DI R17_REGNUM)) ++ (clobber (reg:DI R18_REGNUM)) ++ (clobber (reg:DI R30_REGNUM)) ++ (clobber (reg:CC CC_REGNUM)) ++ "" ++ "bl\t__arm_tpidr2_save" ++) ++ ++;; Set PSTATE.ZA to 1. If ZA was previously dormant or active, ++;; it remains in the same state afterwards, with the same contents. ++;; Otherwise, it goes from off to on with zeroed contents. ++;; ++;; Later writes of TPIDR2_EL0 to a nonzero value must not be moved ++;; up past this instruction, since that could create an invalid ++;; combination of having an active lazy save while ZA is off. ++;; Create an anti-dependence by reading the current contents ++;; of TPIDR2_SETUP_REGNUM. ++;; ++;; Making this depend on ZA_FREE_REGNUM ensures that contents belonging ++;; to the caller have already been saved. That isn't necessary for this ++;; instruction itself, since PSTATE.ZA is already 1 if it contains data. ++;; But doing this here means that other uses of ZA can just depend on ++;; SME_STATE_REGNUM, rather than both SME_STATE_REGNUM and ZA_FREE_REGNUM. ++(define_insn "aarch64_smstart_za" ++ (set (reg:DI SME_STATE_REGNUM) ++ (const_int 1)) ++ (use (reg:DI TPIDR2_SETUP_REGNUM)) ++ (use (reg:DI ZA_FREE_REGNUM)) ++ "" ++ "smstart\tza" ++) ++ ++;; Disable ZA and discard its current contents. ++;; ++;; The ABI says that the ZA save buffer must be null whenever PSTATE.ZA ++;; is zero, so earlier writes to TPIDR2_EL0 must not be moved down past ++;; this instruction. Depend on TPIDR2_SETUP_REGNUM to ensure this. ++;; ++;; We can only turn ZA off once we know that it is free (i.e. doesn't ++;; contain data belonging to the caller). Depend on ZA_FREE_REGNUM ++;; to ensure this. ++;; ++;; We only turn ZA off when the current function's ZA state is dead, ++;; or perhaps if we're sure that the contents are saved. Either way, ++;; we know whether ZA is saved or not. ++(define_insn "aarch64_smstop_za" ++ (set (reg:DI SME_STATE_REGNUM) ++ (const_int 0)) ++ (set (reg:DI ZA_SAVED_REGNUM) ++ (unspec:DI (reg:DI TPIDR2_SETUP_REGNUM) ++ (reg:DI ZA_FREE_REGNUM) UNSPEC_SMSTOP_ZA)) ++ "" ++ "smstop\tza" ++) ++ ++;; Zero ZA after committing a lazy save. The sequencing is enforced ++;; by reading ZA_FREE_REGNUM. ++(define_insn "aarch64_initial_zero_za" ++ (set (reg:DI ZA_REGNUM) ++ (unspec:DI (reg:DI SME_STATE_REGNUM) ++ (reg:DI ZA_FREE_REGNUM) UNSPEC_INITIAL_ZERO_ZA)) ++ "" ++ "zero\t{ za }" ++) ++ ++;; Initialize the abstract TPIDR2_BLOCK_REGNUM from the contents of ++;; the current function's TPIDR2 block. Other instructions can then ++;; depend on TPIDR2_BLOCK_REGNUM rather than on the memory block. ++(define_insn "aarch64_setup_local_tpidr2" ++ (set (reg:DI TPIDR2_BLOCK_REGNUM) ++ (unspec:DI (match_operand:V16QI 0 "memory_operand" "m") ++ UNSPEC_SETUP_LOCAL_TPIDR2)) ++ "" ++ "" ++ (set_attr "type" "no_insn") ++) ++ ++;; Clear TPIDR2_EL0, cancelling any uncommitted lazy save. ++(define_insn "aarch64_clear_tpidr2" ++ (set (reg:DI TPIDR2_SETUP_REGNUM) ++ (const_int 0)) ++ "" ++ "msr\ttpidr2_el0, xzr" ++) ++ ++;; Point TPIDR2_EL0 to the current function's TPIDR2 block, whose address ++;; is given by operand 0. TPIDR2_BLOCK_REGNUM represents the contents of the ++;; pointed-to block. ++(define_insn "aarch64_write_tpidr2" ++ (set (reg:DI TPIDR2_SETUP_REGNUM) ++ (unspec:DI (match_operand 0 "pmode_register_operand" "r") ++ (reg:DI TPIDR2_BLOCK_REGNUM) UNSPEC_WRITE_TPIDR2)) ++ "" ++ "msr\ttpidr2_el0, %0" ++) ++ ++;; Check whether ZA has been saved. The system depends on the value that ++;; we wrote to TPIDR2_EL0 previously, so it depends on TPDIR2_SETUP_REGNUM. ++(define_insn "aarch64_read_tpidr2" ++ (set (match_operand:DI 0 "register_operand" "=r") ++ (unspec:DI (reg:DI TPIDR2_SETUP_REGNUM) ++ (reg:DI ZA_SAVED_REGNUM) UNSPEC_READ_TPIDR2)) ++ "" ++ "mrs\t%0, tpidr2_el0" ++) ++ ++;; Use the ABI-defined routine to restore lazy-saved ZA contents ++;; from the TPIDR2 block pointed to by X0. ZA must already be active. ++(define_insn "aarch64_tpidr2_restore" ++ (set (reg:DI ZA_SAVED_REGNUM) ++ (unspec:DI (reg:DI R0_REGNUM) UNSPEC_TPIDR2_RESTORE)) ++ (set (reg:DI SME_STATE_REGNUM) ++ (unspec:DI (reg:DI SME_STATE_REGNUM) UNSPEC_TPIDR2_RESTORE)) ++ (clobber (reg:DI R14_REGNUM)) ++ (clobber (reg:DI R15_REGNUM)) ++ (clobber (reg:DI R16_REGNUM)) ++ (clobber (reg:DI R17_REGNUM)) ++ (clobber (reg:DI R18_REGNUM)) ++ (clobber (reg:DI R30_REGNUM)) ++ (clobber (reg:CC CC_REGNUM)) ++ "" ++ "bl\t__arm_tpidr2_restore" ++) ++ ++;; Check whether a lazy save set up by aarch64_save_za was committed ++;; and restore the saved contents if so. ++;; ++;; Operand 0 is the address of the current function's TPIDR2 block. ++(define_insn_and_split "aarch64_restore_za" ++ (set (reg:DI ZA_SAVED_REGNUM) ++ (unspec:DI (match_operand 0 "pmode_register_operand" "r") ++ (reg:DI SME_STATE_REGNUM) ++ (reg:DI TPIDR2_SETUP_REGNUM) ++ (reg:DI ZA_SAVED_REGNUM) UNSPEC_RESTORE_ZA)) ++ (clobber (reg:DI R0_REGNUM)) ++ (clobber (reg:DI R14_REGNUM)) ++ (clobber (reg:DI R15_REGNUM)) ++ (clobber (reg:DI R16_REGNUM)) ++ (clobber (reg:DI R17_REGNUM)) ++ (clobber (reg:DI R18_REGNUM)) ++ (clobber (reg:DI R30_REGNUM)) ++ (clobber (reg:CC CC_REGNUM)) ++ "" ++ "#" ++ "&& epilogue_completed" ++ (const_int 0) ++ { ++ auto label = gen_label_rtx (); ++ auto tpidr2 = gen_rtx_REG (DImode, R16_REGNUM); ++ emit_insn (gen_aarch64_read_tpidr2 (tpidr2)); ++ auto jump = emit_likely_jump_insn (gen_aarch64_cbnedi1 (tpidr2, label)); ++ JUMP_LABEL (jump) = label; ++ ++ aarch64_restore_za (operands0); ++ emit_label (label); ++ DONE; ++ } ++) ++ ++;; This instruction is emitted after asms that alter ZA, in order to model ++;; the effect on dataflow. The asm itself can't have ZA as an input or ++;; an output, since there is no associated data type. Instead it retains ++;; the original "za" clobber, which on its own would indicate that ZA ++;; is dead. ++;; ++;; The operand is a unique identifier. ++(define_insn "aarch64_asm_update_za" ++ (set (reg:VNx16QI ZA_REGNUM) ++ (unspec_volatile:VNx16QI ++ (reg:VNx16QI ZA_REGNUM) ++ (reg:DI SME_STATE_REGNUM) ++ (match_operand 0 "const_int_operand") ++ UNSPECV_ASM_UPDATE_ZA)) ++ "" ++ "" ++ (set_attr "type" "no_insn") ++) ++ ++;; This pseudo-instruction is emitted as part of a call to a private-ZA ++;; function from a function with ZA state. It marks a natural place to set ++;; up a lazy save, if that turns out to be necessary. The save itself ++;; is managed by the mode-switching pass. ++(define_insn "aarch64_start_private_za_call" ++ (set (reg:DI LOWERING_REGNUM) ++ (unspec:DI (reg:DI LOWERING_REGNUM) UNSPEC_START_PRIVATE_ZA_CALL)) ++ "" ++ "" ++ (set_attr "type" "no_insn") ++) ++ ++;; This pseudo-instruction is emitted as part of a call to a private-ZA ++;; function from a function with ZA state. It marks a natural place to restore ++;; the current function's ZA contents from the lazy save buffer, if that ++;; turns out to be necessary. The save itself is managed by the ++;; mode-switching pass. ++(define_insn "aarch64_end_private_za_call" ++ (set (reg:DI LOWERING_REGNUM) ++ (unspec:DI (reg:DI LOWERING_REGNUM) UNSPEC_END_PRIVATE_ZA_CALL)) ++ "" ++ "" ++ (set_attr "type" "no_insn") ++) ++ ++;; This pseudo-instruction is emitted before a private-ZA function uses ++;; PSTATE.ZA state for the first time. The instruction checks whether ++;; ZA currently contains data belonging to a caller and commits the ++;; lazy save if so. ++;; ++;; Operand 0 is the incoming value of TPIDR2_EL0. Operand 1 is nonzero ++;; if ZA is live, and should therefore be zeroed after committing a save. ++;; ++;; The instruction is generated by the mode-switching pass. It is a ++;; define_insn_and_split rather than a define_expand because of the ++;; internal control flow. ++(define_insn_and_split "aarch64_commit_lazy_save" ++ (set (reg:DI ZA_FREE_REGNUM) ++ (unspec:DI (match_operand 0 "pmode_register_operand" "r") ++ (match_operand 1 "const_int_operand") ++ (reg:DI SME_STATE_REGNUM) ++ (reg:DI TPIDR2_SETUP_REGNUM) ++ (reg:VNx16QI ZA_REGNUM) UNSPEC_COMMIT_LAZY_SAVE)) ++ (set (reg:DI ZA_REGNUM) ++ (unspec:DI (reg:DI SME_STATE_REGNUM) ++ (reg:DI ZA_FREE_REGNUM) UNSPEC_INITIAL_ZERO_ZA)) ++ (clobber (reg:DI R14_REGNUM)) ++ (clobber (reg:DI R15_REGNUM)) ++ (clobber (reg:DI R16_REGNUM)) ++ (clobber (reg:DI R17_REGNUM)) ++ (clobber (reg:DI R18_REGNUM)) ++ (clobber (reg:DI R30_REGNUM)) ++ (clobber (reg:CC CC_REGNUM)) ++ "" ++ "#" ++ "true" ++ (const_int 0) ++ { ++ auto label = gen_label_rtx (); ++ auto jump = emit_jump_insn (gen_aarch64_cbeqdi1 (operands0, label)); ++ JUMP_LABEL (jump) = label; ++ emit_insn (gen_aarch64_tpidr2_save ()); ++ emit_insn (gen_aarch64_clear_tpidr2 ()); ++ if (INTVAL (operands1) != 0) ++ emit_insn (gen_aarch64_initial_zero_za ()); ++ emit_label (label); ++ DONE; ++ } ++) +diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc +index 82f8e574e..a6e996c5b 100644 +--- a/gcc/config/aarch64/aarch64.cc ++++ b/gcc/config/aarch64/aarch64.cc +@@ -91,6 +91,26 @@ + /* Defined for convenience. */ + #define POINTER_BYTES (POINTER_SIZE / BITS_PER_UNIT) + ++/* Flags that describe how a function shares certain architectural state ++ with its callers. ++ ++ - AARCH64_STATE_SHARED indicates that the function does share the state ++ with callers. ++ ++ - AARCH64_STATE_IN indicates that the function reads (or might read) the ++ incoming state. The converse is that the function ignores the incoming ++ state. ++ ++ - AARCH64_STATE_OUT indicates that the function returns new state. ++ The converse is that the state on return is the same as it was on entry. ++ ++ A function that partially modifies the state treats it as both IN ++ and OUT (because the value on return depends to some extent on the ++ value on input). */ ++constexpr auto AARCH64_STATE_SHARED = 1U << 0; ++constexpr auto AARCH64_STATE_IN = 1U << 1; ++constexpr auto AARCH64_STATE_OUT = 1U << 2; ++ + /* Information about a legitimate vector immediate operand. */ + struct simd_immediate_info + { +@@ -2959,6 +2979,151 @@ static const struct processor all_cores = + /* The current tuning set. */ + struct tune_params aarch64_tune_params = generic_tunings; + ++/* If NAME is the name of an arm:: attribute that describes shared state, ++ return its associated AARCH64_STATE_* flags, otherwise return 0. */ ++static unsigned int ++aarch64_attribute_shared_state_flags (const char *name) ++{ ++ if (strcmp (name, "in") == 0) ++ return AARCH64_STATE_SHARED | AARCH64_STATE_IN; ++ if (strcmp (name, "inout") == 0) ++ return AARCH64_STATE_SHARED | AARCH64_STATE_IN | AARCH64_STATE_OUT; ++ if (strcmp (name, "out") == 0) ++ return AARCH64_STATE_SHARED | AARCH64_STATE_OUT; ++ if (strcmp (name, "preserves") == 0) ++ return AARCH64_STATE_SHARED; ++ return 0; ++} ++ ++/* See whether attribute list ATTRS has any sharing information ++ for state STATE_NAME. Return the associated state flags if so, ++ otherwise return 0. */ ++static unsigned int ++aarch64_lookup_shared_state_flags (tree attrs, const char *state_name) ++{ ++ for (tree attr = attrs; attr; attr = TREE_CHAIN (attr)) ++ { ++ if (!cxx11_attribute_p (attr)) ++ continue; ++ ++ auto ns = IDENTIFIER_POINTER (TREE_PURPOSE (TREE_PURPOSE (attr))); ++ if (strcmp (ns, "arm") != 0) ++ continue; ++ ++ auto attr_name = IDENTIFIER_POINTER (TREE_VALUE (TREE_PURPOSE (attr))); ++ auto flags = aarch64_attribute_shared_state_flags (attr_name); ++ if (!flags) ++ continue; ++ ++ for (tree arg = TREE_VALUE (attr); arg; arg = TREE_CHAIN (arg)) ++ { ++ tree value = TREE_VALUE (arg); ++ if (TREE_CODE (value) == STRING_CST ++ && strcmp (TREE_STRING_POINTER (value), state_name) == 0) ++ return flags; ++ } ++ } ++ return 0; ++} ++ ++/* Return true if DECL creates a new scope for state STATE_STRING. */ ++static bool ++aarch64_fndecl_has_new_state (const_tree decl, const char *state_name) ++{ ++ if (tree attr = lookup_attribute ("arm", "new", DECL_ATTRIBUTES (decl))) ++ for (tree arg = TREE_VALUE (attr); arg; arg = TREE_CHAIN (arg)) ++ { ++ tree value = TREE_VALUE (arg); ++ if (TREE_CODE (value) == STRING_CST ++ && strcmp (TREE_STRING_POINTER (value), state_name) == 0) ++ return true; ++ } ++ return false; ++} ++ ++/* Return true if attribute argument VALUE is a recognized state string, ++ otherwise report an error. NAME is the name of the attribute to which ++ VALUE is being passed. */ ++static bool ++aarch64_check_state_string (tree name, tree value) ++{ ++ if (TREE_CODE (value) != STRING_CST) ++ { ++ error ("the arguments to %qE must be constant strings", name); ++ return false; ++ } ++ ++ const char *state_name = TREE_STRING_POINTER (value); ++ if (strcmp (state_name, "za") != 0) ++ { ++ error ("unrecognized state string %qs", state_name); ++ return false; ++ } ++ ++ return true; ++} ++ ++/* qsort callback to compare two STRING_CSTs. */ ++static int ++cmp_string_csts (const void *a, const void *b) ++{ ++ return strcmp (TREE_STRING_POINTER (*(const_tree const *) a), ++ TREE_STRING_POINTER (*(const_tree const *) b)); ++} ++ ++/* Canonicalize a list of state strings. ARGS contains the arguments to ++ a new attribute while OLD_ATTR, if nonnull, contains a previous attribute ++ of the same type. If CAN_MERGE_IN_PLACE, it is safe to adjust OLD_ATTR's ++ arguments and drop the new attribute. Otherwise, the new attribute must ++ be kept and ARGS must include the information in OLD_ATTR. ++ ++ In both cases, the new arguments must be a sorted list of state strings ++ with duplicates removed. ++ ++ Return true if new attribute should be kept, false if it should be ++ dropped. */ ++static bool ++aarch64_merge_string_arguments (tree args, tree old_attr, ++ bool can_merge_in_place) ++{ ++ /* Get a sorted list of all state strings (including duplicates). */ ++ auto add_args = (vec<tree> &strings, const_tree args) ++ { ++ for (const_tree arg = args; arg; arg = TREE_CHAIN (arg)) ++ if (TREE_CODE (TREE_VALUE (arg)) == STRING_CST) ++ strings.safe_push (TREE_VALUE (arg)); ++ }; ++ auto_vec<tree, 16> strings; ++ add_args (strings, args); ++ if (old_attr) ++ add_args (strings, TREE_VALUE (old_attr)); ++ strings.qsort (cmp_string_csts); ++ ++ /* The list can be empty if there was no previous attribute and if all ++ the new arguments are erroneous. Drop the attribute in that case. */ ++ if (strings.is_empty ()) ++ return false; ++ ++ /* Destructively modify one of the argument lists, removing duplicates ++ on the fly. */ ++ bool use_old_attr = old_attr && can_merge_in_place; ++ tree *end = use_old_attr ? &TREE_VALUE (old_attr) : &args; ++ tree prev = NULL_TREE; ++ for (tree arg : strings) ++ { ++ if (prev && simple_cst_equal (arg, prev)) ++ continue; ++ prev = arg; ++ if (!*end) ++ *end = tree_cons (NULL_TREE, arg, NULL_TREE); ++ else ++ TREE_VALUE (*end) = arg; ++ end = &TREE_CHAIN (*end); ++ } ++ *end = NULL_TREE; ++ return !use_old_attr; ++} ++ + /* Check whether an 'aarch64_vector_pcs' attribute is valid. */ + + static tree +@@ -2987,6 +3152,101 @@ handle_aarch64_vector_pcs_attribute (tree *node, tree name, tree, + gcc_unreachable (); + } + ++/* Return true if arm::new(ARGS) is compatible with the type of decl DECL, ++ otherwise report an error. */ ++static bool ++aarch64_check_arm_new_against_type (tree args, tree decl) ++{ ++ tree type_attrs = TYPE_ATTRIBUTES (TREE_TYPE (decl)); ++ for (tree arg = args; arg; arg = TREE_CHAIN (arg)) ++ { ++ tree value = TREE_VALUE (arg); ++ if (TREE_CODE (value) == STRING_CST) ++ { ++ const char *state_name = TREE_STRING_POINTER (value); ++ if (aarch64_lookup_shared_state_flags (type_attrs, state_name)) ++ { ++ error_at (DECL_SOURCE_LOCATION (decl), ++ "cannot create a new %qs scope since %qs is shared" ++ " with callers", state_name, state_name); ++ return false; ++ } ++ } ++ } ++ return true; ++} ++ ++/* Callback for arm::new attributes. */ ++static tree ++handle_arm_new (tree *node, tree name, tree args, int, bool *no_add_attrs) ++{ ++ tree decl = *node; ++ if (TREE_CODE (decl) != FUNCTION_DECL) ++ { ++ error ("%qE attribute applies only to function definitions", name); ++ *no_add_attrs = true; ++ return NULL_TREE; ++ } ++ if (TREE_TYPE (decl) == error_mark_node) ++ { ++ *no_add_attrs = true; ++ return NULL_TREE; ++ } ++ ++ for (tree arg = args; arg; arg = TREE_CHAIN (arg)) ++ aarch64_check_state_string (name, TREE_VALUE (arg)); ++ ++ if (!aarch64_check_arm_new_against_type (args, decl)) ++ { ++ *no_add_attrs = true; ++ return NULL_TREE; ++ } ++ ++ /* If there is an old attribute, we should try to update it in-place, ++ so that there is only one (definitive) arm::new attribute on the decl. */ ++ tree old_attr = lookup_attribute ("arm", "new", DECL_ATTRIBUTES (decl)); ++ if (!aarch64_merge_string_arguments (args, old_attr, true)) ++ *no_add_attrs = true; ++ ++ return NULL_TREE; ++} ++ ++/* Callback for arm::{in,out,inout,preserves} attributes. */ ++static tree ++handle_arm_shared (tree *node, tree name, tree args, ++ int, bool *no_add_attrs) ++{ ++ tree type = *node; ++ tree old_attrs = TYPE_ATTRIBUTES (type); ++ auto flags = aarch64_attribute_shared_state_flags (IDENTIFIER_POINTER (name)); ++ for (tree arg = args; arg; arg = TREE_CHAIN (arg)) ++ { ++ tree value = TREE_VALUE (arg); ++ if (aarch64_check_state_string (name, value)) ++ { ++ const char *state_name = TREE_STRING_POINTER (value); ++ auto old_flags = aarch64_lookup_shared_state_flags (old_attrs, ++ state_name); ++ if (old_flags && old_flags != flags) ++ { ++ error ("inconsistent attributes for state %qs", state_name); ++ *no_add_attrs = true; ++ return NULL_TREE; ++ } ++ } ++ } ++ ++ /* We can't update an old attribute in-place, since types are shared. ++ Instead make sure that this new attribute contains all the ++ information, so that the old attribute becomes redundant. */ ++ tree old_attr = lookup_attribute ("arm", IDENTIFIER_POINTER (name), ++ old_attrs); ++ if (!aarch64_merge_string_arguments (args, old_attr, false)) ++ *no_add_attrs = true; ++ ++ return NULL_TREE; ++} ++ + /* Mutually-exclusive function type attributes for controlling PSTATE.SM. */ + static const struct attribute_spec::exclusions attr_streaming_exclusions = + { +@@ -3023,6 +3283,16 @@ static const attribute_spec aarch64_arm_attributes = + NULL, attr_streaming_exclusions }, + { "streaming_compatible", 0, 0, false, true, true, true, + NULL, attr_streaming_exclusions }, ++ { "new", 1, -1, true, false, false, false, ++ handle_arm_new, NULL }, ++ { "preserves", 1, -1, false, true, true, true, ++ handle_arm_shared, NULL }, ++ { "in", 1, -1, false, true, true, true, ++ handle_arm_shared, NULL }, ++ { "out", 1, -1, false, true, true, true, ++ handle_arm_shared, NULL }, ++ { "inout", 1, -1, false, true, true, true, ++ handle_arm_shared, NULL } + }; + + static const scoped_attribute_specs aarch64_arm_attribute_table = +@@ -4202,6 +4472,7 @@ aarch64_hard_regno_nregs (unsigned regno, machine_mode mode) + case PR_HI_REGS: + case FFR_REGS: + case PR_AND_FFR_REGS: ++ case FAKE_REGS: + return 1; + default: + return CEIL (lowest_size, UNITS_PER_WORD); +@@ -4232,6 +4503,10 @@ aarch64_hard_regno_mode_ok (unsigned regno, machine_mode mode) + if (pr_or_ffr_regnum_p (regno)) + return false; + ++ /* These registers are abstract; their modes don't matter. */ ++ if (FAKE_REGNUM_P (regno)) ++ return true; ++ + if (regno == SP_REGNUM) + /* The purpose of comparing with ptr_mode is to support the + global register variable associated with the stack pointer +@@ -4352,12 +4627,34 @@ aarch64_fntype_pstate_sm (const_tree fntype) + return AARCH64_FL_SM_OFF; + } + ++/* Return state flags that describe whether and how functions of type ++ FNTYPE share state STATE_NAME with their callers. */ ++ ++static unsigned int ++aarch64_fntype_shared_flags (const_tree fntype, const char *state_name) ++{ ++ return aarch64_lookup_shared_state_flags (TYPE_ATTRIBUTES (fntype), ++ state_name); ++} ++ ++/* Return the state of PSTATE.ZA on entry to functions of type FNTYPE. */ ++ ++static aarch64_feature_flags ++aarch64_fntype_pstate_za (const_tree fntype) ++{ ++ if (aarch64_fntype_shared_flags (fntype, "za")) ++ return AARCH64_FL_ZA_ON; ++ ++ return 0; ++} ++ + /* Return the ISA mode on entry to functions of type FNTYPE. */ + + static aarch64_feature_flags + aarch64_fntype_isa_mode (const_tree fntype) + { +- return aarch64_fntype_pstate_sm (fntype); ++ return (aarch64_fntype_pstate_sm (fntype) ++ | aarch64_fntype_pstate_za (fntype)); + } + + /* Return the state of PSTATE.SM when compiling the body of +@@ -4370,13 +4667,37 @@ aarch64_fndecl_pstate_sm (const_tree fndecl) + return aarch64_fntype_pstate_sm (TREE_TYPE (fndecl)); + } + ++/* Return true if function FNDECL has state STATE_NAME, either by creating ++ new state itself or by sharing state with callers. */ ++ ++static bool ++aarch64_fndecl_has_state (tree fndecl, const char *state_name) ++{ ++ return (aarch64_fndecl_has_new_state (fndecl, state_name) ++ || aarch64_fntype_shared_flags (TREE_TYPE (fndecl), ++ state_name) != 0); ++} ++ ++/* Return the state of PSTATE.ZA when compiling the body of function FNDECL. ++ This might be different from the state of PSTATE.ZA on entry. */ ++ ++static aarch64_feature_flags ++aarch64_fndecl_pstate_za (const_tree fndecl) ++{ ++ if (aarch64_fndecl_has_new_state (fndecl, "za")) ++ return AARCH64_FL_ZA_ON; ++ ++ return aarch64_fntype_pstate_za (TREE_TYPE (fndecl)); ++} ++ + /* Return the ISA mode that should be used to compile the body of + function FNDECL. */ + + static aarch64_feature_flags + aarch64_fndecl_isa_mode (const_tree fndecl) + { +- return aarch64_fndecl_pstate_sm (fndecl); ++ return (aarch64_fndecl_pstate_sm (fndecl) ++ | aarch64_fndecl_pstate_za (fndecl)); + } + + /* Return the state of PSTATE.SM on entry to the current function. +@@ -4389,6 +4710,44 @@ aarch64_cfun_incoming_pstate_sm () + return aarch64_fntype_pstate_sm (TREE_TYPE (cfun->decl)); + } + ++/* Return the state of PSTATE.ZA on entry to the current function. ++ This might be different from the state of PSTATE.ZA in the function ++ body. */ ++ ++static aarch64_feature_flags ++aarch64_cfun_incoming_pstate_za () ++{ ++ return aarch64_fntype_pstate_za (TREE_TYPE (cfun->decl)); ++} ++ ++/* Return state flags that describe whether and how the current function shares ++ state STATE_NAME with callers. */ ++ ++static unsigned int ++aarch64_cfun_shared_flags (const char *state_name) ++{ ++ return aarch64_fntype_shared_flags (TREE_TYPE (cfun->decl), state_name); ++} ++ ++/* Return true if the current function creates new state of type STATE_NAME ++ (as opposed to sharing the state with its callers or ignoring the state ++ altogether). */ ++ ++static bool ++aarch64_cfun_has_new_state (const char *state_name) ++{ ++ return aarch64_fndecl_has_new_state (cfun->decl, state_name); ++} ++ ++/* Return true if the current function has state STATE_NAME, either by ++ creating new state itself or by sharing state with callers. */ ++ ++static bool ++aarch64_cfun_has_state (const char *state_name) ++{ ++ return aarch64_fndecl_has_state (cfun->decl, state_name); ++} ++ + /* Return true if a call from the current function to a function with + ISA mode CALLEE_MODE would involve a change to PSTATE.SM around + the BL instruction. */ +@@ -5952,6 +6311,74 @@ aarch64_output_sve_vector_inc_dec (const char *operands, rtx x) + factor, nelts_per_vq); + } + ++/* Return a constant that represents FACTOR multiplied by the ++ number of 128-bit quadwords in an SME vector. ISA_MODE is the ++ ISA mode in which the calculation is being performed. */ ++ ++static rtx ++aarch64_sme_vq_immediate (machine_mode mode, HOST_WIDE_INT factor, ++ aarch64_feature_flags isa_mode) ++{ ++ gcc_assert (aarch64_sve_rdvl_factor_p (factor)); ++ if (isa_mode & AARCH64_FL_SM_ON) ++ /* We're in streaming mode, so we can use normal poly-int values. */ ++ return gen_int_mode ({ factor, factor }, mode); ++ ++ rtvec vec = gen_rtvec (1, gen_int_mode (factor, SImode)); ++ rtx unspec = gen_rtx_UNSPEC (mode, vec, UNSPEC_SME_VQ); ++ return gen_rtx_CONST (mode, unspec); ++} ++ ++/* Return true if X is a constant that represents some number X ++ multiplied by the number of quadwords in an SME vector. Store this X ++ in *FACTOR if so. */ ++ ++static bool ++aarch64_sme_vq_unspec_p (const_rtx x, HOST_WIDE_INT *factor) ++{ ++ if (!TARGET_SME || GET_CODE (x) != CONST) ++ return false; ++ ++ x = XEXP (x, 0); ++ if (GET_CODE (x) != UNSPEC ++ || XINT (x, 1) != UNSPEC_SME_VQ ++ || XVECLEN (x, 0) != 1) ++ return false; ++ ++ x = XVECEXP (x, 0, 0); ++ if (!CONST_INT_P (x)) ++ return false; ++ ++ *factor = INTVAL (x); ++ return true; ++} ++ ++/* Return true if X is a constant that represents some number Y ++ multiplied by the number of quadwords in an SME vector, and if ++ that Y is in the range of RDSVL. */ ++ ++bool ++aarch64_rdsvl_immediate_p (const_rtx x) ++{ ++ HOST_WIDE_INT factor; ++ return (aarch64_sme_vq_unspec_p (x, &factor) ++ && aarch64_sve_rdvl_factor_p (factor)); ++} ++ ++/* Return the asm string for an RDSVL instruction that calculates X, ++ which is a constant that satisfies aarch64_rdsvl_immediate_p. */ ++ ++char * ++aarch64_output_rdsvl (const_rtx x) ++{ ++ gcc_assert (aarch64_rdsvl_immediate_p (x)); ++ static char buffersizeof ("rdsvl\t%x0, #-") + 3 * sizeof (int); ++ x = XVECEXP (XEXP (x, 0), 0, 0); ++ snprintf (buffer, sizeof (buffer), "rdsvl\t%%x0, #%d", ++ (int) INTVAL (x) / 16); ++ return buffer; ++} ++ + /* Multipliers for repeating bitmasks of width 32, 16, 8, 4, and 2. */ + + static const unsigned HOST_WIDE_INT bitmask_imm_mul = +@@ -7717,6 +8144,15 @@ aarch64_expand_mov_immediate (rtx dest, rtx imm) + return; + } + ++ if (aarch64_rdsvl_immediate_p (base)) ++ { ++ /* We could handle non-constant offsets if they are ever ++ generated. */ ++ gcc_assert (const_offset == 0); ++ emit_insn (gen_rtx_SET (dest, imm)); ++ return; ++ } ++ + sty = aarch64_classify_symbol (base, const_offset); + switch (sty) + { +@@ -8732,8 +9168,10 @@ aarch64_function_arg (cumulative_args_t pcum_v, const function_arg_info &arg) + rtx abi_cookie = aarch64_gen_callee_cookie (pcum->isa_mode, + pcum->pcs_variant); + rtx sme_mode_switch_args = aarch64_finish_sme_mode_switch_args (pcum); +- return gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, abi_cookie, +- sme_mode_switch_args)); ++ rtx shared_za_flags = gen_int_mode (pcum->shared_za_flags, SImode); ++ return gen_rtx_PARALLEL (VOIDmode, gen_rtvec (3, abi_cookie, ++ sme_mode_switch_args, ++ shared_za_flags)); + } + + aarch64_layout_arg (pcum_v, arg); +@@ -8744,7 +9182,7 @@ void + aarch64_init_cumulative_args (CUMULATIVE_ARGS *pcum, + const_tree fntype, + rtx libname ATTRIBUTE_UNUSED, +- const_tree fndecl ATTRIBUTE_UNUSED, ++ const_tree fndecl, + unsigned n_named ATTRIBUTE_UNUSED, + bool silent_p) + { +@@ -8769,6 +9207,8 @@ aarch64_init_cumulative_args (CUMULATIVE_ARGS *pcum, + pcum->aapcs_stack_words = 0; + pcum->aapcs_stack_size = 0; + pcum->silent_p = silent_p; ++ pcum->shared_za_flags ++ = (fntype ? aarch64_fntype_shared_flags (fntype, "za") : 0U); + pcum->num_sme_mode_switch_args = 0; + + if (!silent_p +@@ -10803,14 +11243,31 @@ aarch64_allocate_and_probe_stack_space (rtx temp1, rtx temp2, + } + } + ++/* Implement TARGET_EXTRA_LIVE_ON_ENTRY. */ ++ ++void ++aarch64_extra_live_on_entry (bitmap regs) ++{ ++ if (TARGET_ZA) ++ { ++ bitmap_set_bit (regs, LOWERING_REGNUM); ++ bitmap_set_bit (regs, SME_STATE_REGNUM); ++ bitmap_set_bit (regs, TPIDR2_SETUP_REGNUM); ++ bitmap_set_bit (regs, ZA_FREE_REGNUM); ++ bitmap_set_bit (regs, ZA_SAVED_REGNUM); ++ ++ /* The only time ZA can't have live contents on entry is when ++ the function explicitly treats it as a pure output. */ ++ auto za_flags = aarch64_cfun_shared_flags ("za"); ++ if (za_flags != (AARCH64_STATE_SHARED | AARCH64_STATE_OUT)) ++ bitmap_set_bit (regs, ZA_REGNUM); ++ } ++} ++ + /* Return 1 if the register is used by the epilogue. We need to say the + return register is used, but only after epilogue generation is complete. + Note that in the case of sibcalls, the values "used by the epilogue" are +- considered live at the start of the called function. +- +- For SIMD functions we need to return 1 for FP registers that are saved and +- restored by a function but are not zero in call_used_regs. If we do not do +- this optimizations may remove the restore of the register. */ ++ considered live at the start of the called function. */ + + int + aarch64_epilogue_uses (int regno) +@@ -10820,6 +11277,18 @@ aarch64_epilogue_uses (int regno) + if (regno == LR_REGNUM) + return 1; + } ++ if (regno == LOWERING_REGNUM && TARGET_ZA) ++ return 1; ++ if (regno == SME_STATE_REGNUM && TARGET_ZA) ++ return 1; ++ if (regno == TPIDR2_SETUP_REGNUM && TARGET_ZA) ++ return 1; ++ /* If the function shares SME state with its caller, ensure that that ++ data is not in the lazy save buffer on exit. */ ++ if (regno == ZA_SAVED_REGNUM && aarch64_cfun_incoming_pstate_za () != 0) ++ return 1; ++ if (regno == ZA_REGNUM && aarch64_cfun_shared_flags ("za") != 0) ++ return 1; + return 0; + } + +@@ -11501,8 +11970,10 @@ aarch64_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x) + + /* There's no way to calculate VL-based values using relocations. */ + subrtx_iterator::array_type array; ++ HOST_WIDE_INT factor; + FOR_EACH_SUBRTX (iter, array, x, ALL) +- if (GET_CODE (*iter) == CONST_POLY_INT) ++ if (GET_CODE (*iter) == CONST_POLY_INT ++ || aarch64_sme_vq_unspec_p (x, &factor)) + return true; + + poly_int64 offset; +@@ -12364,6 +12835,72 @@ aarch64_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2) + return true; + } + ++/* Return a fresh memory reference to the current function's TPIDR2 block, ++ creating a block if necessary. */ ++ ++static rtx ++aarch64_get_tpidr2_block () ++{ ++ if (!cfun->machine->tpidr2_block) ++ /* The TPIDR2 block is 16 bytes in size and must be aligned to a 128-bit ++ boundary. */ ++ cfun->machine->tpidr2_block = assign_stack_local (V16QImode, 16, 128); ++ return copy_rtx (cfun->machine->tpidr2_block); ++} ++ ++/* Return a fresh register that points to the current function's ++ TPIDR2 block, creating a block if necessary. */ ++ ++static rtx ++aarch64_get_tpidr2_ptr () ++{ ++ rtx block = aarch64_get_tpidr2_block (); ++ return force_reg (Pmode, XEXP (block, 0)); ++} ++ ++/* Emit instructions to allocate a ZA lazy save buffer and initialize the ++ current function's TPIDR2 block. */ ++ ++static void ++aarch64_init_tpidr2_block () ++{ ++ rtx block = aarch64_get_tpidr2_block (); ++ ++ /* The ZA save buffer is SVL.B*SVL.B bytes in size. */ ++ rtx svl_bytes = aarch64_sme_vq_immediate (Pmode, 16, AARCH64_ISA_MODE); ++ rtx svl_bytes_reg = force_reg (DImode, svl_bytes); ++ rtx za_size = expand_simple_binop (Pmode, MULT, svl_bytes_reg, ++ svl_bytes_reg, NULL, 0, OPTAB_LIB_WIDEN); ++ rtx za_save_buffer = allocate_dynamic_stack_space (za_size, 128, ++ BITS_PER_UNIT, -1, true); ++ za_save_buffer = force_reg (Pmode, za_save_buffer); ++ cfun->machine->za_save_buffer = za_save_buffer; ++ ++ /* The first word of the block points to the save buffer and the second ++ word is the number of ZA slices to save. */ ++ rtx block_0 = adjust_address (block, DImode, 0); ++ rtx block_8 = adjust_address (block, DImode, 8); ++ emit_insn (gen_store_pair_dw_didi (block_0, za_save_buffer, ++ block_8, svl_bytes_reg)); ++ ++ if (!memory_operand (block, V16QImode)) ++ block = replace_equiv_address (block, force_reg (Pmode, XEXP (block, 0))); ++ emit_insn (gen_aarch64_setup_local_tpidr2 (block)); ++} ++ ++/* Restore the contents of ZA from the lazy save buffer, given that ++ register TPIDR2_BLOCK points to the current function's TPIDR2 block. ++ PSTATE.ZA is known to be 0 and TPIDR2_EL0 is known to be null. */ ++ ++void ++aarch64_restore_za (rtx tpidr2_block) ++{ ++ emit_insn (gen_aarch64_smstart_za ()); ++ if (REGNO (tpidr2_block) != R0_REGNUM) ++ emit_move_insn (gen_rtx_REG (Pmode, R0_REGNUM), tpidr2_block); ++ emit_insn (gen_aarch64_tpidr2_restore ()); ++} ++ + /* Implement TARGET_START_CALL_ARGS. */ + + static void +@@ -12379,6 +12916,20 @@ aarch64_start_call_args (cumulative_args_t ca_v) + " option %<-march%>, or by using the %<target%>" + " attribute or pragma", "sme"); + } ++ ++ if ((ca->shared_za_flags & (AARCH64_STATE_IN | AARCH64_STATE_OUT)) ++ && !aarch64_cfun_has_state ("za")) ++ error ("call to a function that shares %qs state from a function" ++ " that has no %qs state", "za", "za"); ++ else if (!TARGET_ZA && (ca->isa_mode & AARCH64_FL_ZA_ON)) ++ error ("call to a function that shares SME state from a function" ++ " that has no SME state"); ++ ++ /* If this is a call to a private ZA function, emit a marker to ++ indicate where any necessary set-up code could be inserted. ++ The code itself is inserted by the mode-switching pass. */ ++ if (TARGET_ZA && !(ca->isa_mode & AARCH64_FL_ZA_ON)) ++ emit_insn (gen_aarch64_start_private_za_call ()); + } + + /* This function is used by the call expanders of the machine description. +@@ -12391,6 +12942,8 @@ aarch64_start_call_args (cumulative_args_t ca_v) + The second element is a PARALLEL that lists all the argument + registers that need to be saved and restored around a change + in PSTATE.SM, or const0_rtx if no such switch is needed. ++ The third element is a const_int that contains the sharing flags ++ for ZA. + SIBCALL indicates whether this function call is normal call or sibling call. + It will generate different pattern accordingly. */ + +@@ -12403,10 +12956,12 @@ aarch64_expand_call (rtx result, rtx mem, rtx cookie, bool sibcall) + + rtx callee_abi = cookie; + rtx sme_mode_switch_args = const0_rtx; ++ unsigned int shared_za_flags = 0; + if (GET_CODE (cookie) == PARALLEL) + { + callee_abi = XVECEXP (cookie, 0, 0); + sme_mode_switch_args = XVECEXP (cookie, 0, 1); ++ shared_za_flags = INTVAL (XVECEXP (cookie, 0, 2)); + } + + gcc_assert (CONST_INT_P (callee_abi)); +@@ -12426,6 +12981,41 @@ aarch64_expand_call (rtx result, rtx mem, rtx cookie, bool sibcall) + : !REG_P (callee)) + XEXP (mem, 0) = force_reg (mode, callee); + ++ /* Accumulate the return values, including state that is shared via ++ attributes. */ ++ auto_vec<rtx, 8> return_values; ++ if (result) ++ { ++ if (GET_CODE (result) == PARALLEL) ++ for (int i = 0; i < XVECLEN (result, 0); ++i) ++ return_values.safe_push (XVECEXP (result, 0, i)); ++ else ++ return_values.safe_push (result); ++ } ++ unsigned int orig_num_return_values = return_values.length (); ++ if (shared_za_flags & AARCH64_STATE_OUT) ++ return_values.safe_push (gen_rtx_REG (VNx16BImode, ZA_REGNUM)); ++ /* When calling private-ZA functions from functions with ZA state, ++ we want to know whether the call committed a lazy save. */ ++ if (TARGET_ZA && !shared_za_flags) ++ return_values.safe_push (gen_rtx_REG (VNx16BImode, ZA_SAVED_REGNUM)); ++ ++ /* Create the new return value, if necessary. */ ++ if (orig_num_return_values != return_values.length ()) ++ { ++ if (return_values.length () == 1) ++ result = return_values0; ++ else ++ { ++ for (rtx &x : return_values) ++ if (GET_CODE (x) != EXPR_LIST) ++ x = gen_rtx_EXPR_LIST (VOIDmode, x, const0_rtx); ++ rtvec v = gen_rtvec_v (return_values.length (), ++ return_values.address ()); ++ result = gen_rtx_PARALLEL (VOIDmode, v); ++ } ++ } ++ + call = gen_rtx_CALL (VOIDmode, mem, const0_rtx); + + if (result != NULL_RTX) +@@ -12492,6 +13082,50 @@ aarch64_expand_call (rtx result, rtx mem, rtx cookie, bool sibcall) + + cfun->machine->call_switches_pstate_sm = true; + } ++ ++ /* Add any ZA-related information. ++ ZA_REGNUM represents the current function's ZA state, rather than ++ the contents of the ZA register itself. We ensure that the function's ++ ZA state is preserved by private-ZA call sequences, so the call itself ++ does not use or clobber ZA_REGNUM. */ ++ if (TARGET_ZA) ++ { ++ /* The callee requires ZA to be active if the callee is shared-ZA, ++ otherwise it requires ZA to be dormant or off. The state of ZA is ++ captured by a combination of SME_STATE_REGNUM, TPIDR2_SETUP_REGNUM, ++ and ZA_SAVED_REGNUM. */ ++ use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), ++ gen_rtx_REG (DImode, SME_STATE_REGNUM)); ++ use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), ++ gen_rtx_REG (DImode, TPIDR2_SETUP_REGNUM)); ++ use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), ++ gen_rtx_REG (VNx16BImode, ZA_SAVED_REGNUM)); ++ ++ /* Keep the aarch64_start/end_private_za_call markers live. */ ++ if (!(callee_isa_mode & AARCH64_FL_ZA_ON)) ++ use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), ++ gen_rtx_REG (VNx16BImode, LOWERING_REGNUM)); ++ ++ /* If the callee is a shared-ZA function, record whether it uses the ++ current value of ZA. */ ++ if (shared_za_flags & AARCH64_STATE_IN) ++ use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), ++ gen_rtx_REG (VNx16BImode, ZA_REGNUM)); ++ } ++} ++ ++/* Implement TARGET_END_CALL_ARGS. */ ++ ++static void ++aarch64_end_call_args (cumulative_args_t ca_v) ++{ ++ CUMULATIVE_ARGS *ca = get_cumulative_args (ca_v); ++ ++ /* If this is a call to a private ZA function, emit a marker to ++ indicate where any necessary restoration code could be inserted. ++ The code itself is inserted by the mode-switching pass. */ ++ if (TARGET_ZA && !(ca->isa_mode & AARCH64_FL_ZA_ON)) ++ emit_insn (gen_aarch64_end_private_za_call ()); + } + + /* Emit call insn with PAT and do aarch64-specific handling. */ +@@ -13602,6 +14236,9 @@ aarch64_regno_regclass (unsigned regno) + if (regno == FFR_REGNUM || regno == FFRT_REGNUM) + return FFR_REGS; + ++ if (FAKE_REGNUM_P (regno)) ++ return FAKE_REGS; ++ + return NO_REGS; + } + +@@ -13957,12 +14594,14 @@ aarch64_class_max_nregs (reg_class_t regclass, machine_mode mode) + return (vec_flags & VEC_ADVSIMD + ? CEIL (lowest_size, UNITS_PER_VREG) + : CEIL (lowest_size, UNITS_PER_WORD)); ++ + case STACK_REG: + case PR_REGS: + case PR_LO_REGS: + case PR_HI_REGS: + case FFR_REGS: + case PR_AND_FFR_REGS: ++ case FAKE_REGS: + return 1; + + case NO_REGS: +@@ -19002,10 +19641,14 @@ aarch64_override_options_internal (struct gcc_options *opts) + && !fixed_regsR18_REGNUM) + error ("%<-fsanitize=shadow-call-stack%> requires %<-ffixed-x18%>"); + +- if ((opts->x_aarch64_isa_flags & AARCH64_FL_SM_ON) ++ if ((opts->x_aarch64_isa_flags & (AARCH64_FL_SM_ON | AARCH64_FL_ZA_ON)) + && !(opts->x_aarch64_isa_flags & AARCH64_FL_SME)) + { +- error ("streaming functions require the ISA extension %qs", "sme"); ++ if (opts->x_aarch64_isa_flags & AARCH64_FL_SM_ON) ++ error ("streaming functions require the ISA extension %qs", "sme"); ++ else ++ error ("functions with SME state require the ISA extension %qs", ++ "sme"); + inform (input_location, "you can enable %qs using the command-line" + " option %<-march%>, or by using the %<target%>" + " attribute or pragma", "sme"); +@@ -21341,6 +21984,8 @@ aarch64_conditional_register_usage (void) + CLEAR_HARD_REG_BIT (operand_reg_set, VG_REGNUM); + CLEAR_HARD_REG_BIT (operand_reg_set, FFR_REGNUM); + CLEAR_HARD_REG_BIT (operand_reg_set, FFRT_REGNUM); ++ for (int i = FIRST_FAKE_REGNUM; i <= LAST_FAKE_REGNUM; ++i) ++ CLEAR_HARD_REG_BIT (operand_reg_set, i); + + /* When tracking speculation, we need a couple of call-clobbered registers + to track the speculation state. It would be nice to just use +@@ -22795,6 +23440,9 @@ aarch64_mov_operand_p (rtx x, machine_mode mode) + || aarch64_sve_rdvl_immediate_p (x))) + return true; + ++ if (aarch64_rdsvl_immediate_p (x)) ++ return true; ++ + return aarch64_classify_symbolic_expression (x) + == SYMBOL_TINY_ABSOLUTE; + } +@@ -28266,9 +28914,45 @@ aarch64_comp_type_attributes (const_tree type1, const_tree type2) + return 0; + if (!check_attr ("arm", "streaming_compatible")) + return 0; ++ if (aarch64_lookup_shared_state_flags (TYPE_ATTRIBUTES (type1), "za") ++ != aarch64_lookup_shared_state_flags (TYPE_ATTRIBUTES (type2), "za")) ++ return 0; + return 1; + } + ++/* Implement TARGET_MERGE_DECL_ATTRIBUTES. */ ++ ++static tree ++aarch64_merge_decl_attributes (tree olddecl, tree newdecl) ++{ ++ tree old_attrs = DECL_ATTRIBUTES (olddecl); ++ tree old_new = lookup_attribute ("arm", "new", old_attrs); ++ ++ tree new_attrs = DECL_ATTRIBUTES (newdecl); ++ tree new_new = lookup_attribute ("arm", "new", new_attrs); ++ ++ if (DECL_INITIAL (olddecl) && new_new) ++ { ++ error ("cannot apply attribute %qs to %q+D after the function" ++ " has been defined", "new", newdecl); ++ inform (DECL_SOURCE_LOCATION (olddecl), "%q+D defined here", ++ newdecl); ++ } ++ else ++ { ++ if (old_new && new_new) ++ { ++ old_attrs = remove_attribute ("arm", "new", old_attrs); ++ TREE_VALUE (new_new) = chainon (TREE_VALUE (new_new), ++ TREE_VALUE (old_new)); ++ } ++ if (new_new) ++ aarch64_check_arm_new_against_type (TREE_VALUE (new_new), newdecl); ++ } ++ ++ return merge_attributes (old_attrs, new_attrs); ++} ++ + /* Implement TARGET_GET_MULTILIB_ABI_NAME */ + + static const char * +@@ -28634,6 +29318,629 @@ aarch64_indirect_call_asm (rtx addr) + return ""; + } + ++/* Implement OPTIMIZE_MODE_SWITCHING. */ ++ ++bool ++aarch64_optimize_mode_switching (aarch64_mode_entity entity) ++{ ++ bool have_sme_state = (aarch64_cfun_incoming_pstate_za () != 0 ++ || (aarch64_cfun_has_new_state ("za") ++ && df_regs_ever_live_p (ZA_REGNUM))); ++ ++ if (have_sme_state && nonlocal_goto_handler_labels) ++ { ++ static bool reported; ++ if (!reported) ++ { ++ sorry ("non-local gotos in functions with SME state"); ++ reported = true; ++ } ++ } ++ ++ switch (entity) ++ { ++ case aarch64_mode_entity::HAVE_ZA_SAVE_BUFFER: ++ case aarch64_mode_entity::LOCAL_SME_STATE: ++ return have_sme_state && !nonlocal_goto_handler_labels; ++ } ++ gcc_unreachable (); ++} ++ ++/* Implement TARGET_MODE_EMIT for ZA_SAVE_BUFFER. */ ++ ++static void ++aarch64_mode_emit_za_save_buffer (aarch64_tristate_mode mode, ++ aarch64_tristate_mode prev_mode) ++{ ++ if (mode == aarch64_tristate_mode::YES) ++ { ++ gcc_assert (prev_mode == aarch64_tristate_mode::NO); ++ aarch64_init_tpidr2_block (); ++ } ++ else ++ gcc_unreachable (); ++} ++ ++/* Implement TARGET_MODE_EMIT for LOCAL_SME_STATE. */ ++ ++static void ++aarch64_mode_emit_local_sme_state (aarch64_local_sme_state mode, ++ aarch64_local_sme_state prev_mode) ++{ ++ /* Back-propagation should ensure that we're always starting from ++ a known mode. */ ++ gcc_assert (prev_mode != aarch64_local_sme_state::ANY); ++ ++ if (prev_mode == aarch64_local_sme_state::INACTIVE_CALLER) ++ { ++ /* Commit any uncommitted lazy save. This leaves ZA either active ++ and zero (lazy save case) or off (normal case). ++ ++ The sequence is: ++ ++ mrs <temp>, tpidr2_el0 ++ cbz <temp>, no_save ++ bl __arm_tpidr2_save ++ msr tpidr2_el0, xzr ++ zero { za } // Only if ZA is live ++ no_save: */ ++ bool is_active = (mode == aarch64_local_sme_state::ACTIVE_LIVE ++ || mode == aarch64_local_sme_state::ACTIVE_DEAD); ++ auto tmp_reg = gen_reg_rtx (DImode); ++ auto active_flag = gen_int_mode (is_active, DImode); ++ emit_insn (gen_aarch64_read_tpidr2 (tmp_reg)); ++ emit_insn (gen_aarch64_commit_lazy_save (tmp_reg, active_flag)); ++ } ++ ++ if (mode == aarch64_local_sme_state::ACTIVE_LIVE ++ || mode == aarch64_local_sme_state::ACTIVE_DEAD) ++ { ++ if (prev_mode == aarch64_local_sme_state::INACTIVE_LOCAL) ++ { ++ /* Make ZA active after being inactive. ++ ++ First handle the case in which the lazy save we set up was ++ committed by a callee. If the function's source-level ZA state ++ is live then we must conditionally restore it from the lazy ++ save buffer. Otherwise we can just force PSTATE.ZA to 1. */ ++ if (mode == aarch64_local_sme_state::ACTIVE_LIVE) ++ emit_insn (gen_aarch64_restore_za (aarch64_get_tpidr2_ptr ())); ++ else ++ emit_insn (gen_aarch64_smstart_za ()); ++ ++ /* Now handle the case in which the lazy save was not committed. ++ In that case, ZA still contains the current function's ZA state, ++ and we just need to cancel the lazy save. */ ++ emit_insn (gen_aarch64_clear_tpidr2 ()); ++ return; ++ } ++ ++ if (prev_mode == aarch64_local_sme_state::SAVED_LOCAL) ++ { ++ /* Retrieve the current function's ZA state from the lazy save ++ buffer. */ ++ aarch64_restore_za (aarch64_get_tpidr2_ptr ()); ++ return; ++ } ++ ++ if (prev_mode == aarch64_local_sme_state::INACTIVE_CALLER ++ || prev_mode == aarch64_local_sme_state::OFF) ++ { ++ /* INACTIVE_CALLER means that we are enabling ZA for the first ++ time in this function. The code above means that ZA is either ++ active and zero (if we committed a lazy save) or off. Handle ++ the latter case by forcing ZA on. ++ ++ OFF means that PSTATE.ZA is guaranteed to be 0. We just need ++ to force it to 1. ++ ++ Both cases leave ZA zeroed. */ ++ emit_insn (gen_aarch64_smstart_za ()); ++ return; ++ } ++ ++ if (prev_mode == aarch64_local_sme_state::ACTIVE_DEAD ++ || prev_mode == aarch64_local_sme_state::ACTIVE_LIVE) ++ /* A simple change in liveness, such as in a CFG structure where ++ ZA is only conditionally defined. No code is needed. */ ++ return; ++ ++ gcc_unreachable (); ++ } ++ ++ if (mode == aarch64_local_sme_state::INACTIVE_LOCAL) ++ { ++ if (prev_mode == aarch64_local_sme_state::ACTIVE_LIVE ++ || prev_mode == aarch64_local_sme_state::ACTIVE_DEAD ++ || prev_mode == aarch64_local_sme_state::INACTIVE_CALLER) ++ { ++ /* A transition from ACTIVE_LIVE to INACTIVE_LOCAL is the usual ++ case of setting up a lazy save buffer before a call. ++ A transition from INACTIVE_CALLER is similar, except that ++ the contents of ZA are known to be zero. ++ ++ A transition from ACTIVE_DEAD means that ZA is live at the ++ point of the transition, but is dead on at least one incoming ++ edge. (That is, ZA is only conditionally initialized.) ++ For efficiency, we want to set up a lazy save even for ++ dead contents, since forcing ZA off would make later code ++ restore ZA from the lazy save buffer. */ ++ emit_insn (gen_aarch64_write_tpidr2 (aarch64_get_tpidr2_ptr ())); ++ return; ++ } ++ ++ if (prev_mode == aarch64_local_sme_state::SAVED_LOCAL ++ || prev_mode == aarch64_local_sme_state::OFF) ++ /* We're simply discarding the information about which inactive ++ state applies. */ ++ return; ++ ++ gcc_unreachable (); ++ } ++ ++ if (mode == aarch64_local_sme_state::INACTIVE_CALLER ++ || mode == aarch64_local_sme_state::OFF) ++ { ++ /* The transition to INACTIVE_CALLER is used before returning from ++ new("za") functions. Any state in ZA belongs to the current ++ function rather than a caller, but that state is no longer ++ needed. Clear any pending lazy save and turn ZA off. ++ ++ The transition to OFF is used before calling a private-ZA function. ++ We committed any incoming lazy save above, so at this point any ++ contents in ZA belong to the current function. */ ++ if (prev_mode == aarch64_local_sme_state::INACTIVE_LOCAL) ++ emit_insn (gen_aarch64_clear_tpidr2 ()); ++ ++ if (prev_mode != aarch64_local_sme_state::OFF ++ && prev_mode != aarch64_local_sme_state::SAVED_LOCAL) ++ emit_insn (gen_aarch64_smstop_za ()); ++ ++ return; ++ } ++ ++ if (mode == aarch64_local_sme_state::SAVED_LOCAL) ++ { ++ /* This is a transition to an exception handler. */ ++ gcc_assert (prev_mode == aarch64_local_sme_state::OFF ++ || prev_mode == aarch64_local_sme_state::INACTIVE_LOCAL); ++ return; ++ } ++ ++ gcc_unreachable (); ++} ++ ++/* Implement TARGET_MODE_EMIT. */ ++ ++static void ++aarch64_mode_emit (int entity, int mode, int prev_mode, HARD_REG_SET live) ++{ ++ if (mode == prev_mode) ++ return; ++ ++ start_sequence (); ++ switch (aarch64_mode_entity (entity)) ++ { ++ case aarch64_mode_entity::HAVE_ZA_SAVE_BUFFER: ++ aarch64_mode_emit_za_save_buffer (aarch64_tristate_mode (mode), ++ aarch64_tristate_mode (prev_mode)); ++ break; ++ ++ case aarch64_mode_entity::LOCAL_SME_STATE: ++ aarch64_mode_emit_local_sme_state (aarch64_local_sme_state (mode), ++ aarch64_local_sme_state (prev_mode)); ++ break; ++ } ++ rtx_insn *seq = get_insns (); ++ end_sequence (); ++ ++ /* Get the set of clobbered registers that are currently live. */ ++ HARD_REG_SET clobbers = {}; ++ for (rtx_insn *insn = seq; insn; insn = NEXT_INSN (insn)) ++ { ++ vec_rtx_properties properties; ++ properties.add_insn (insn, false); ++ for (rtx_obj_reference ref : properties.refs ()) ++ if (ref.is_write () && HARD_REGISTER_NUM_P (ref.regno)) ++ SET_HARD_REG_BIT (clobbers, ref.regno); ++ } ++ clobbers &= live; ++ ++ /* Emit instructions to save clobbered registers to pseudos. Queue ++ instructions to restore the registers afterwards. ++ ++ This should only needed in rare situations. */ ++ auto_vec<rtx, 33> after; ++ for (unsigned int regno = R0_REGNUM; regno < R30_REGNUM; ++regno) ++ if (TEST_HARD_REG_BIT (clobbers, regno)) ++ { ++ rtx hard_reg = gen_rtx_REG (DImode, regno); ++ rtx pseudo_reg = gen_reg_rtx (DImode); ++ emit_move_insn (pseudo_reg, hard_reg); ++ after.quick_push (gen_move_insn (hard_reg, pseudo_reg)); ++ } ++ if (TEST_HARD_REG_BIT (clobbers, CC_REGNUM)) ++ { ++ rtx pseudo_reg = gen_reg_rtx (DImode); ++ emit_insn (gen_aarch64_save_nzcv (pseudo_reg)); ++ after.quick_push (gen_aarch64_restore_nzcv (pseudo_reg)); ++ } ++ ++ /* Emit the transition instructions themselves. */ ++ emit_insn (seq); ++ ++ /* Restore the clobbered registers. */ ++ for (auto *insn : after) ++ emit_insn (insn); ++} ++ ++/* Return true if INSN references the SME state represented by hard register ++ REGNO. */ ++ ++static bool ++aarch64_insn_references_sme_state_p (rtx_insn *insn, unsigned int regno) ++{ ++ df_ref ref; ++ FOR_EACH_INSN_DEF (ref, insn) ++ if (!DF_REF_FLAGS_IS_SET (ref, DF_REF_MUST_CLOBBER) ++ && DF_REF_REGNO (ref) == regno) ++ return true; ++ FOR_EACH_INSN_USE (ref, insn) ++ if (DF_REF_REGNO (ref) == regno) ++ return true; ++ return false; ++} ++ ++/* Implement TARGET_MODE_NEEDED for LOCAL_SME_STATE. */ ++ ++static aarch64_local_sme_state ++aarch64_mode_needed_local_sme_state (rtx_insn *insn, HARD_REG_SET live) ++{ ++ if (!CALL_P (insn) ++ && find_reg_note (insn, REG_EH_REGION, NULL_RTX)) ++ { ++ static bool reported; ++ if (!reported) ++ { ++ sorry ("catching non-call exceptions in functions with SME state"); ++ reported = true; ++ } ++ /* Aim for graceful error recovery by picking the value that is ++ least likely to generate an ICE. */ ++ return aarch64_local_sme_state::INACTIVE_LOCAL; ++ } ++ ++ /* A non-local goto is equivalent to a return. We disallow non-local ++ receivers in functions with SME state, so we know that the target ++ expects ZA to be dormant or off. */ ++ if (JUMP_P (insn) ++ && find_reg_note (insn, REG_NON_LOCAL_GOTO, NULL_RTX)) ++ return aarch64_local_sme_state::INACTIVE_CALLER; ++ ++ /* start_private_za_call and end_private_za_call bracket a sequence ++ that calls a private-ZA function. Force ZA to be turned off if the ++ function doesn't have any live ZA state, otherwise require ZA to be ++ inactive. */ ++ auto icode = recog_memoized (insn); ++ if (icode == CODE_FOR_aarch64_start_private_za_call ++ || icode == CODE_FOR_aarch64_end_private_za_call) ++ return (TEST_HARD_REG_BIT (live, ZA_REGNUM) ++ ? aarch64_local_sme_state::INACTIVE_LOCAL ++ : aarch64_local_sme_state::OFF); ++ ++ /* Force ZA to contain the current function's ZA state if INSN wants ++ to access it. */ ++ if (aarch64_insn_references_sme_state_p (insn, ZA_REGNUM)) ++ return (TEST_HARD_REG_BIT (live, ZA_REGNUM) ++ ? aarch64_local_sme_state::ACTIVE_LIVE ++ : aarch64_local_sme_state::ACTIVE_DEAD); ++ ++ return aarch64_local_sme_state::ANY; ++} ++ ++/* Implement TARGET_MODE_NEEDED for ZA_SAVE_BUFFER. */ ++ ++static aarch64_tristate_mode ++aarch64_mode_needed_za_save_buffer (rtx_insn *insn, HARD_REG_SET live) ++{ ++ /* We need to set up a lazy save buffer no later than the first ++ transition to INACTIVE_LOCAL (which involves setting up a lazy save). */ ++ if (aarch64_mode_needed_local_sme_state (insn, live) ++ == aarch64_local_sme_state::INACTIVE_LOCAL) ++ return aarch64_tristate_mode::YES; ++ ++ /* Also make sure that the lazy save buffer is set up before the first ++ insn that throws internally. The exception handler will sometimes ++ load from it. */ ++ if (find_reg_note (insn, REG_EH_REGION, NULL_RTX)) ++ return aarch64_tristate_mode::YES; ++ ++ return aarch64_tristate_mode::MAYBE; ++} ++ ++/* Implement TARGET_MODE_NEEDED. */ ++ ++static int ++aarch64_mode_needed (int entity, rtx_insn *insn, HARD_REG_SET live) ++{ ++ switch (aarch64_mode_entity (entity)) ++ { ++ case aarch64_mode_entity::HAVE_ZA_SAVE_BUFFER: ++ return int (aarch64_mode_needed_za_save_buffer (insn, live)); ++ ++ case aarch64_mode_entity::LOCAL_SME_STATE: ++ return int (aarch64_mode_needed_local_sme_state (insn, live)); ++ } ++ gcc_unreachable (); ++} ++ ++/* Implement TARGET_MODE_AFTER for LOCAL_SME_STATE. */ ++ ++static aarch64_local_sme_state ++aarch64_mode_after_local_sme_state (aarch64_local_sme_state mode, ++ HARD_REG_SET live) ++{ ++ /* Note places where ZA dies, so that we can try to avoid saving and ++ restoring state that isn't needed. */ ++ if (mode == aarch64_local_sme_state::ACTIVE_LIVE ++ && !TEST_HARD_REG_BIT (live, ZA_REGNUM)) ++ return aarch64_local_sme_state::ACTIVE_DEAD; ++ ++ /* Note where ZA is born, e.g. when moving past an __arm_out("za") ++ function. */ ++ if (mode == aarch64_local_sme_state::ACTIVE_DEAD ++ && TEST_HARD_REG_BIT (live, ZA_REGNUM)) ++ return aarch64_local_sme_state::ACTIVE_LIVE; ++ ++ return mode; ++} ++ ++/* Implement TARGET_MODE_AFTER. */ ++ ++static int ++aarch64_mode_after (int entity, int mode, rtx_insn *, HARD_REG_SET live) ++{ ++ switch (aarch64_mode_entity (entity)) ++ { ++ case aarch64_mode_entity::HAVE_ZA_SAVE_BUFFER: ++ return mode; ++ ++ case aarch64_mode_entity::LOCAL_SME_STATE: ++ return int (aarch64_mode_after_local_sme_state ++ (aarch64_local_sme_state (mode), live)); ++ } ++ gcc_unreachable (); ++} ++ ++/* Implement TARGET_MODE_CONFLUENCE for LOCAL_SME_STATE. */ ++ ++static aarch64_local_sme_state ++aarch64_local_sme_confluence (aarch64_local_sme_state mode1, ++ aarch64_local_sme_state mode2) ++{ ++ /* Perform a symmetrical check for two values. */ ++ auto is_pair = &(aarch64_local_sme_state val1, ++ aarch64_local_sme_state val2) ++ { ++ return ((mode1 == val1 && mode2 == val2) ++ || (mode1 == val2 && mode2 == val1)); ++ }; ++ ++ /* INACTIVE_CALLER means ZA is off or it has dormant contents belonging ++ to a caller. OFF is one of the options. */ ++ if (is_pair (aarch64_local_sme_state::INACTIVE_CALLER, ++ aarch64_local_sme_state::OFF)) ++ return aarch64_local_sme_state::INACTIVE_CALLER; ++ ++ /* Similarly for dormant contents belonging to the current function. */ ++ if (is_pair (aarch64_local_sme_state::INACTIVE_LOCAL, ++ aarch64_local_sme_state::OFF)) ++ return aarch64_local_sme_state::INACTIVE_LOCAL; ++ ++ /* Treat a conditionally-initialized value as a fully-initialized value. */ ++ if (is_pair (aarch64_local_sme_state::ACTIVE_LIVE, ++ aarch64_local_sme_state::ACTIVE_DEAD)) ++ return aarch64_local_sme_state::ACTIVE_LIVE; ++ ++ return aarch64_local_sme_state::ANY; ++} ++ ++/* Implement TARGET_MODE_CONFLUENCE. */ ++ ++static int ++aarch64_mode_confluence (int entity, int mode1, int mode2) ++{ ++ gcc_assert (mode1 != mode2); ++ switch (aarch64_mode_entity (entity)) ++ { ++ case aarch64_mode_entity::HAVE_ZA_SAVE_BUFFER: ++ return int (aarch64_tristate_mode::MAYBE); ++ ++ case aarch64_mode_entity::LOCAL_SME_STATE: ++ return int (aarch64_local_sme_confluence ++ (aarch64_local_sme_state (mode1), ++ aarch64_local_sme_state (mode2))); ++ } ++ gcc_unreachable (); ++} ++ ++/* Implement TARGET_MODE_BACKPROP for an entity that either stays ++ NO throughput, or makes one transition from NO to YES. */ ++ ++static aarch64_tristate_mode ++aarch64_one_shot_backprop (aarch64_tristate_mode mode1, ++ aarch64_tristate_mode mode2) ++{ ++ /* Keep bringing the transition forward until it starts from NO. */ ++ if (mode1 == aarch64_tristate_mode::MAYBE ++ && mode2 == aarch64_tristate_mode::YES) ++ return mode2; ++ ++ return aarch64_tristate_mode::MAYBE; ++} ++ ++/* Implement TARGET_MODE_BACKPROP for LOCAL_SME_STATE. */ ++ ++static aarch64_local_sme_state ++aarch64_local_sme_backprop (aarch64_local_sme_state mode1, ++ aarch64_local_sme_state mode2) ++{ ++ /* We always need to know what the current state is when transitioning ++ to a new state. Force any location with indeterminate starting state ++ to be active. */ ++ if (mode1 == aarch64_local_sme_state::ANY) ++ switch (mode2) ++ { ++ case aarch64_local_sme_state::INACTIVE_CALLER: ++ case aarch64_local_sme_state::OFF: ++ case aarch64_local_sme_state::ACTIVE_DEAD: ++ /* The current function's ZA state is not live. */ ++ return aarch64_local_sme_state::ACTIVE_DEAD; ++ ++ case aarch64_local_sme_state::INACTIVE_LOCAL: ++ case aarch64_local_sme_state::ACTIVE_LIVE: ++ /* The current function's ZA state is live. */ ++ return aarch64_local_sme_state::ACTIVE_LIVE; ++ ++ case aarch64_local_sme_state::SAVED_LOCAL: ++ /* This is a transition to an exception handler. Since we don't ++ support non-call exceptions for SME functions, the source of ++ the transition must be known. We'll assert later if that's ++ not the case. */ ++ return aarch64_local_sme_state::ANY; ++ ++ case aarch64_local_sme_state::ANY: ++ return aarch64_local_sme_state::ANY; ++ } ++ ++ return aarch64_local_sme_state::ANY; ++} ++ ++/* Implement TARGET_MODE_BACKPROP. */ ++ ++static int ++aarch64_mode_backprop (int entity, int mode1, int mode2) ++{ ++ switch (aarch64_mode_entity (entity)) ++ { ++ case aarch64_mode_entity::HAVE_ZA_SAVE_BUFFER: ++ return int (aarch64_one_shot_backprop (aarch64_tristate_mode (mode1), ++ aarch64_tristate_mode (mode2))); ++ ++ case aarch64_mode_entity::LOCAL_SME_STATE: ++ return int (aarch64_local_sme_backprop ++ (aarch64_local_sme_state (mode1), ++ aarch64_local_sme_state (mode2))); ++ } ++ gcc_unreachable (); ++} ++ ++/* Implement TARGET_MODE_ENTRY. */ ++ ++static int ++aarch64_mode_entry (int entity) ++{ ++ switch (aarch64_mode_entity (entity)) ++ { ++ case aarch64_mode_entity::HAVE_ZA_SAVE_BUFFER: ++ return int (aarch64_tristate_mode::NO); ++ ++ case aarch64_mode_entity::LOCAL_SME_STATE: ++ return int (aarch64_cfun_shared_flags ("za") != 0 ++ ? aarch64_local_sme_state::ACTIVE_LIVE ++ : aarch64_local_sme_state::INACTIVE_CALLER); ++ } ++ gcc_unreachable (); ++} ++ ++/* Implement TARGET_MODE_EXIT. */ ++ ++static int ++aarch64_mode_exit (int entity) ++{ ++ switch (aarch64_mode_entity (entity)) ++ { ++ case aarch64_mode_entity::HAVE_ZA_SAVE_BUFFER: ++ return int (aarch64_tristate_mode::MAYBE); ++ ++ case aarch64_mode_entity::LOCAL_SME_STATE: ++ return int (aarch64_cfun_shared_flags ("za") != 0 ++ ? aarch64_local_sme_state::ACTIVE_LIVE ++ : aarch64_local_sme_state::INACTIVE_CALLER); ++ } ++ gcc_unreachable (); ++} ++ ++/* Implement TARGET_MODE_EH_HANDLER. */ ++ ++static int ++aarch64_mode_eh_handler (int entity) ++{ ++ switch (aarch64_mode_entity (entity)) ++ { ++ case aarch64_mode_entity::HAVE_ZA_SAVE_BUFFER: ++ /* Require a lazy save buffer to be allocated before the first ++ insn that can throw. */ ++ return int (aarch64_tristate_mode::YES); ++ ++ case aarch64_mode_entity::LOCAL_SME_STATE: ++ return int (aarch64_local_sme_state::SAVED_LOCAL); ++ } ++ gcc_unreachable (); ++} ++ ++/* Implement TARGET_MODE_PRIORITY. */ ++ ++static int ++aarch64_mode_priority (int, int n) ++{ ++ return n; ++} ++ ++/* Implement TARGET_MD_ASM_ADJUST. */ ++ ++static rtx_insn * ++aarch64_md_asm_adjust (vec<rtx> &outputs, vec<rtx> &inputs, ++ vec<machine_mode> &input_modes, ++ vec<const char *> &constraints, ++ vec<rtx> &uses, vec<rtx> &clobbers, ++ HARD_REG_SET &clobbered_regs, location_t loc) ++{ ++ rtx_insn *seq = arm_md_asm_adjust (outputs, inputs, input_modes, constraints, ++ uses, clobbers, clobbered_regs, loc); ++ ++ /* "za" in the clobber list of a function with ZA state is defined to ++ mean that the asm can read from and write to ZA. We can model the ++ read using a USE, but unfortunately, it's not possible to model the ++ write directly. Use a separate insn to model the effect. ++ ++ We must ensure that ZA is active on entry, which is enforced by using ++ SME_STATE_REGNUM. The asm must ensure that ZA is active on return. */ ++ if (TARGET_ZA) ++ for (unsigned int i = clobbers.length (); i-- > 0; ) ++ { ++ rtx x = clobbersi; ++ if (REG_P (x) && REGNO (x) == ZA_REGNUM) ++ { ++ auto id = cfun->machine->next_asm_update_za_id++; ++ ++ start_sequence (); ++ if (seq) ++ emit_insn (seq); ++ emit_insn (gen_aarch64_asm_update_za (gen_int_mode (id, SImode))); ++ seq = get_insns (); ++ end_sequence (); ++ ++ uses.safe_push (gen_rtx_REG (VNx16QImode, ZA_REGNUM)); ++ uses.safe_push (gen_rtx_REG (DImode, SME_STATE_REGNUM)); ++ ++ clobbers.ordered_remove (i); ++ CLEAR_HARD_REG_BIT (clobbered_regs, ZA_REGNUM); ++ } ++ } ++ return seq; ++} ++ + /* If CALL involves a change in PSTATE.SM, emit the instructions needed + to switch to the new mode and the instructions needed to restore the + original mode. Return true if something changed. */ +@@ -29108,6 +30415,9 @@ aarch64_get_v16qi_mode () + #undef TARGET_START_CALL_ARGS + #define TARGET_START_CALL_ARGS aarch64_start_call_args + ++#undef TARGET_END_CALL_ARGS ++#define TARGET_END_CALL_ARGS aarch64_end_call_args ++ + #undef TARGET_GIMPLE_FOLD_BUILTIN + #define TARGET_GIMPLE_FOLD_BUILTIN aarch64_gimple_fold_builtin + +@@ -29473,6 +30783,9 @@ aarch64_libgcc_floating_mode_supported_p + #undef TARGET_COMP_TYPE_ATTRIBUTES + #define TARGET_COMP_TYPE_ATTRIBUTES aarch64_comp_type_attributes + ++#undef TARGET_MERGE_DECL_ATTRIBUTES ++#define TARGET_MERGE_DECL_ATTRIBUTES aarch64_merge_decl_attributes ++ + #undef TARGET_GET_MULTILIB_ABI_NAME + #define TARGET_GET_MULTILIB_ABI_NAME aarch64_get_multilib_abi_name + +@@ -29493,8 +30806,35 @@ aarch64_libgcc_floating_mode_supported_p + #undef TARGET_STRICT_ARGUMENT_NAMING + #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true + ++#undef TARGET_MODE_EMIT ++#define TARGET_MODE_EMIT aarch64_mode_emit ++ ++#undef TARGET_MODE_NEEDED ++#define TARGET_MODE_NEEDED aarch64_mode_needed ++ ++#undef TARGET_MODE_AFTER ++#define TARGET_MODE_AFTER aarch64_mode_after ++ ++#undef TARGET_MODE_CONFLUENCE ++#define TARGET_MODE_CONFLUENCE aarch64_mode_confluence ++ ++#undef TARGET_MODE_BACKPROP ++#define TARGET_MODE_BACKPROP aarch64_mode_backprop ++ ++#undef TARGET_MODE_ENTRY ++#define TARGET_MODE_ENTRY aarch64_mode_entry ++ ++#undef TARGET_MODE_EXIT ++#define TARGET_MODE_EXIT aarch64_mode_exit ++ ++#undef TARGET_MODE_EH_HANDLER ++#define TARGET_MODE_EH_HANDLER aarch64_mode_eh_handler ++ ++#undef TARGET_MODE_PRIORITY ++#define TARGET_MODE_PRIORITY aarch64_mode_priority ++ + #undef TARGET_MD_ASM_ADJUST +-#define TARGET_MD_ASM_ADJUST arm_md_asm_adjust ++#define TARGET_MD_ASM_ADJUST aarch64_md_asm_adjust + + #undef TARGET_ASM_FILE_END + #define TARGET_ASM_FILE_END aarch64_asm_file_end +@@ -29505,6 +30845,9 @@ aarch64_libgcc_floating_mode_supported_p + #undef TARGET_HAVE_SHADOW_CALL_STACK + #define TARGET_HAVE_SHADOW_CALL_STACK true + ++#undef TARGET_EXTRA_LIVE_ON_ENTRY ++#define TARGET_EXTRA_LIVE_ON_ENTRY aarch64_extra_live_on_entry ++ + #undef TARGET_EMIT_EPILOGUE_FOR_SIBCALL + #define TARGET_EMIT_EPILOGUE_FOR_SIBCALL aarch64_expand_epilogue + +diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h +index 6bfe55968..89d30b9bf 100644 +--- a/gcc/config/aarch64/aarch64.h ++++ b/gcc/config/aarch64/aarch64.h +@@ -207,6 +207,7 @@ constexpr auto AARCH64_FL_DEFAULT_ISA_MODE = AARCH64_FL_SM_OFF; + /* Macros to test ISA flags. */ + + #define AARCH64_ISA_SM_OFF (aarch64_isa_flags & AARCH64_FL_SM_OFF) ++#define AARCH64_ISA_ZA_ON (aarch64_isa_flags & AARCH64_FL_ZA_ON) + #define AARCH64_ISA_MODE (aarch64_isa_flags & AARCH64_FL_ISA_MODES) + #define AARCH64_ISA_CRC (aarch64_isa_flags & AARCH64_FL_CRC) + #define AARCH64_ISA_CRYPTO (aarch64_isa_flags & AARCH64_FL_CRYPTO) +@@ -259,6 +260,9 @@ constexpr auto AARCH64_FL_DEFAULT_ISA_MODE = AARCH64_FL_SM_OFF; + #define TARGET_STREAMING_COMPATIBLE \ + ((aarch64_isa_flags & AARCH64_FL_SM_STATE) == 0) + ++/* PSTATE.ZA is enabled in the current function body. */ ++#define TARGET_ZA (AARCH64_ISA_ZA_ON) ++ + /* Crypto is an optional extension to AdvSIMD. */ + #define TARGET_CRYPTO (AARCH64_ISA_CRYPTO) + +@@ -445,7 +449,8 @@ constexpr auto AARCH64_FL_DEFAULT_ISA_MODE = AARCH64_FL_SM_OFF; + 1, 1, 1, 1, /* SFP, AP, CC, VG */ \ + 0, 0, 0, 0, 0, 0, 0, 0, /* P0 - P7 */ \ + 0, 0, 0, 0, 0, 0, 0, 0, /* P8 - P15 */ \ +- 1, 1 /* FFR and FFRT */ \ ++ 1, 1, /* FFR and FFRT */ \ ++ 1, 1, 1, 1, 1, 1, 1 /* Fake registers */ \ + } + + /* X30 is marked as caller-saved which is in line with regular function call +@@ -455,7 +460,7 @@ constexpr auto AARCH64_FL_DEFAULT_ISA_MODE = AARCH64_FL_SM_OFF; + true but not until function epilogues have been generated. This ensures + that X30 is available for use in leaf functions if needed. */ + +-#define CALL_USED_REGISTERS \ ++#define CALL_REALLY_USED_REGISTERS \ + { \ + 1, 1, 1, 1, 1, 1, 1, 1, /* R0 - R7 */ \ + 1, 1, 1, 1, 1, 1, 1, 1, /* R8 - R15 */ \ +@@ -468,7 +473,8 @@ constexpr auto AARCH64_FL_DEFAULT_ISA_MODE = AARCH64_FL_SM_OFF; + 1, 1, 1, 0, /* SFP, AP, CC, VG */ \ + 1, 1, 1, 1, 1, 1, 1, 1, /* P0 - P7 */ \ + 1, 1, 1, 1, 1, 1, 1, 1, /* P8 - P15 */ \ +- 1, 1 /* FFR and FFRT */ \ ++ 1, 1, /* FFR and FFRT */ \ ++ 0, 0, 0, 0, 0, 0, 0 /* Fake registers */ \ + } + + #define REGISTER_NAMES \ +@@ -484,7 +490,9 @@ constexpr auto AARCH64_FL_DEFAULT_ISA_MODE = AARCH64_FL_SM_OFF; + "sfp", "ap", "cc", "vg", \ + "p0", "p1", "p2", "p3", "p4", "p5", "p6", "p7", \ + "p8", "p9", "p10", "p11", "p12", "p13", "p14", "p15", \ +- "ffr", "ffrt" \ ++ "ffr", "ffrt", \ ++ "lowering", "tpidr2_block", "sme_state", "tpidr2_setup", \ ++ "za_free", "za_saved", "za" \ + } + + /* Generate the register aliases for core register N */ +@@ -533,7 +541,7 @@ constexpr auto AARCH64_FL_DEFAULT_ISA_MODE = AARCH64_FL_SM_OFF; + #define FRAME_POINTER_REGNUM SFP_REGNUM + #define STACK_POINTER_REGNUM SP_REGNUM + #define ARG_POINTER_REGNUM AP_REGNUM +-#define FIRST_PSEUDO_REGISTER (FFRT_REGNUM + 1) ++#define FIRST_PSEUDO_REGISTER (LAST_FAKE_REGNUM + 1) + + /* The number of argument registers available for each class. */ + #define NUM_ARG_REGS 8 +@@ -657,6 +665,9 @@ constexpr auto AARCH64_FL_DEFAULT_ISA_MODE = AARCH64_FL_SM_OFF; + + #define FP_SIMD_SAVED_REGNUM_P(REGNO) \ + (((unsigned) (REGNO - V8_REGNUM)) <= (V23_REGNUM - V8_REGNUM)) ++ ++#define FAKE_REGNUM_P(REGNO) \ ++ IN_RANGE (REGNO, FIRST_FAKE_REGNUM, LAST_FAKE_REGNUM) +  + /* Register and constant classes. */ + +@@ -677,6 +688,7 @@ enum reg_class + PR_REGS, + FFR_REGS, + PR_AND_FFR_REGS, ++ FAKE_REGS, + ALL_REGS, + LIM_REG_CLASSES /* Last */ + }; +@@ -700,6 +712,7 @@ enum reg_class + "PR_REGS", \ + "FFR_REGS", \ + "PR_AND_FFR_REGS", \ ++ "FAKE_REGS", \ + "ALL_REGS" \ + } + +@@ -720,6 +733,7 @@ enum reg_class + { 0x00000000, 0x00000000, 0x000ffff0 }, /* PR_REGS */ \ + { 0x00000000, 0x00000000, 0x00300000 }, /* FFR_REGS */ \ + { 0x00000000, 0x00000000, 0x003ffff0 }, /* PR_AND_FFR_REGS */ \ ++ { 0x00000000, 0x00000000, 0x1fc00000 }, /* FAKE_REGS */ \ + { 0xffffffff, 0xffffffff, 0x000fffff } /* ALL_REGS */ \ + } + +@@ -920,6 +934,15 @@ typedef struct GTY (()) machine_function + bool reg_is_wrapped_separatelyLAST_SAVED_REGNUM; + /* One entry for each general purpose register. */ + rtx call_viaSP_REGNUM; ++ ++ /* A pseudo register that points to the function's TPIDR2 block, or null ++ if the function doesn't have a TPIDR2 block. */ ++ rtx tpidr2_block; ++ ++ /* A pseudo register that points to the function's ZA save buffer, ++ or null if none. */ ++ rtx za_save_buffer; ++ + bool label_is_assembled; + + /* True if we've expanded at least one call to a function that changes +@@ -927,6 +950,10 @@ typedef struct GTY (()) machine_function + guarantees that no such mode switch exists. */ + bool call_switches_pstate_sm; + ++ /* Used to generated unique identifiers for each update to ZA by an ++ asm statement. */ ++ unsigned int next_asm_update_za_id; ++ + /* A set of all decls that have been passed to a vld1 intrinsic in the + current function. This is used to help guide the vector cost model. */ + hash_set<tree> *vector_load_decls; +@@ -996,6 +1023,10 @@ typedef struct + bool silent_p; /* True if we should act silently, rather than + raise an error for invalid calls. */ + ++ /* AARCH64_STATE_* flags that describe whether the function shares ZA ++ with its callers. */ ++ unsigned int shared_za_flags; ++ + /* A list of registers that need to be saved and restored around a + change to PSTATE.SM. An auto_vec would be more convenient, but those + can't be copied. */ +@@ -1344,4 +1375,61 @@ extern poly_uint16 aarch64_sve_vg; + STACK_BOUNDARY / BITS_PER_UNIT) \ + : (crtl->outgoing_args_size + STACK_POINTER_OFFSET)) + ++#ifndef USED_FOR_TARGET ++ ++/* Enumerates the mode-switching "entities" for AArch64. */ ++enum class aarch64_mode_entity : int ++{ ++ /* An aarch64_tristate_mode that says whether we have created a local ++ save buffer for the current function's ZA state. The only transition ++ is from NO to YES. */ ++ HAVE_ZA_SAVE_BUFFER, ++ ++ /* An aarch64_local_sme_state that reflects the state of all data ++ controlled by PSTATE.ZA. */ ++ LOCAL_SME_STATE ++}; ++ ++/* Describes the state of all data controlled by PSTATE.ZA */ ++enum class aarch64_local_sme_state : int ++{ ++ /* ZA is in the off or dormant state. If it is dormant, the contents ++ of ZA belong to a caller. */ ++ INACTIVE_CALLER, ++ ++ /* ZA is in the off state: PSTATE.ZA is 0 and TPIDR2_EL0 is null. */ ++ OFF, ++ ++ /* ZA is in the off or dormant state. If it is dormant, the contents ++ of ZA belong to the current function. */ ++ INACTIVE_LOCAL, ++ ++ /* ZA is in the off state and the current function's ZA contents are ++ stored in the lazy save buffer. This is the state on entry to ++ exception handlers. */ ++ SAVED_LOCAL, ++ ++ /* ZA is in the active state: PSTATE.ZA is 1 and TPIDR2_EL0 is null. ++ The contents of ZA are live. */ ++ ACTIVE_LIVE, ++ ++ /* ZA is in the active state: PSTATE.ZA is 1 and TPIDR2_EL0 is null. ++ The contents of ZA are dead. */ ++ ACTIVE_DEAD, ++ ++ /* ZA could be in multiple states. */ ++ ANY ++}; ++ ++enum class aarch64_tristate_mode : int { NO, YES, MAYBE }; ++ ++#define OPTIMIZE_MODE_SWITCHING(ENTITY) \ ++ aarch64_optimize_mode_switching (aarch64_mode_entity (ENTITY)) ++ ++#define NUM_MODES_FOR_MODE_SWITCHING \ ++ { int (aarch64_tristate_mode::MAYBE), \ ++ int (aarch64_local_sme_state::ANY) } ++ ++#endif ++ + #endif /* GCC_AARCH64_H */ +diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md +index bb867de74..05a7c6675 100644 +--- a/gcc/config/aarch64/aarch64.md ++++ b/gcc/config/aarch64/aarch64.md +@@ -111,6 +111,56 @@ + ;; "FFR token": a fake register used for representing the scheduling + ;; restrictions on FFR-related operations. + (FFRT_REGNUM 85) ++ ++ ;; ---------------------------------------------------------------- ++ ;; Fake registers ++ ;; ---------------------------------------------------------------- ++ ;; These registers represent abstract things, rather than real ++ ;; architected registers. ++ ++ ;; Sometimes we use placeholder instructions to mark where later ++ ;; ABI-related lowering is needed. These placeholders read and ++ ;; write this register. Instructions that depend on the lowering ++ ;; read the register. ++ (LOWERING_REGNUM 86) ++ ++ ;; Represents the contents of the current function's TPIDR2 block, ++ ;; in abstract form. ++ (TPIDR2_BLOCK_REGNUM 87) ++ ++ ;; Holds the value that the current function wants PSTATE.ZA to be. ++ ;; The actual value can sometimes vary, because it does not track ++ ;; changes to PSTATE.ZA that happen during a lazy save and restore. ++ ;; Those effects are instead tracked by ZA_SAVED_REGNUM. ++ (SME_STATE_REGNUM 88) ++ ++ ;; Instructions write to this register if they set TPIDR2_EL0 to a ++ ;; well-defined value. Instructions read from the register if they ++ ;; depend on the result of such writes. ++ ;; ++ ;; The register does not model the architected TPIDR2_ELO, just the ++ ;; current function's management of it. ++ (TPIDR2_SETUP_REGNUM 89) ++ ++ ;; Represents the property "has an incoming lazy save been committed?". ++ (ZA_FREE_REGNUM 90) ++ ++ ;; Represents the property "are the current function's ZA contents ++ ;; stored in the lazy save buffer, rather than in ZA itself?". ++ (ZA_SAVED_REGNUM 91) ++ ++ ;; Represents the contents of the current function's ZA state in ++ ;; abstract form. At various times in the function, these contents ++ ;; might be stored in ZA itself, or in the function's lazy save buffer. ++ ;; ++ ;; The contents persist even when the architected ZA is off. Private-ZA ++ ;; functions have no effect on its contents. ++ (ZA_REGNUM 92) ++ ;; ---------------------------------------------------------------- ++ (FIRST_FAKE_REGNUM LOWERING_REGNUM) ++ (LAST_FAKE_REGNUM ZA_REGNUM) ++ ;; ---------------------------------------------------------------- ++ + ;; The pair of scratch registers used for stack probing with -fstack-check. + ;; Leave R9 alone as a possible choice for the static chain. + ;; Note that the use of these registers is mutually exclusive with the use +@@ -303,7 +353,12 @@ + UNSPEC_TAG_SPACE ; Translate address to MTE tag address space. + UNSPEC_LD1RO + UNSPEC_SALT_ADDR ++ UNSPEC_SAVE_NZCV ++ UNSPEC_RESTORE_NZCV + UNSPECV_PATCHABLE_AREA ++ ;; Wraps a constant integer that should be multiplied by the number ++ ;; of quadwords in an SME vector. ++ UNSPEC_SME_VQ + ) + + (define_c_enum "unspecv" +@@ -379,7 +434,7 @@ + ;; Q registers and is equivalent to "simd". + + (define_enum "arches" any rcpc8_4 fp fp_q base_simd nobase_simd +- simd nosimd sve fp16) ++ simd nosimd sve fp16 sme) + + (define_enum_attr "arch" "arches" (const_string "any")) + +@@ -423,7 +478,10 @@ + (match_test "TARGET_FP_F16INST")) + + (and (eq_attr "arch" "sve") +- (match_test "TARGET_SVE"))) ++ (match_test "TARGET_SVE")) ++ ++ (and (eq_attr "arch" "sme") ++ (match_test "TARGET_SME"))) + (const_string "yes") + (const_string "no"))) + +@@ -928,7 +986,7 @@ + (set_attr "sls_length" "retbr") + ) + +-(define_insn "*cb<optab><mode>1" ++(define_insn "aarch64_cb<optab><mode>1" + (set (pc) (if_then_else (EQL (match_operand:GPI 0 "register_operand" "r") + (const_int 0)) + (label_ref (match_operand 1 "" "")) +@@ -1291,6 +1349,7 @@ + /* The "mov_imm" type for CNT is just a placeholder. */ + r , Usv; mov_imm , sve , 4 << aarch64_output_sve_cnt_immediate ("cnt", "%x0", operands1); + r , Usr; mov_imm , sve, 4 << aarch64_output_sve_rdvl (operands1); ++ r , UsR; mov_imm , sme, 4 << aarch64_output_rdsvl (operands1); + r , m ; load_4 , * , 4 ldr\t%w0, %1 + w , m ; load_4 , fp , 4 ldr\t%s0, %1 + m , r Z; store_4 , * , 4 str\t%w1, %0 +@@ -1326,6 +1385,7 @@ + /* The "mov_imm" type for CNT is just a placeholder. */ + r, Usv; mov_imm , sve , 4 << aarch64_output_sve_cnt_immediate ("cnt", "%x0", operands1); + r, Usr; mov_imm , sve, 4 << aarch64_output_sve_rdvl (operands1); ++ r, UsR; mov_imm , sme, 4 << aarch64_output_rdsvl (operands1); + r, m ; load_8 , * , 4 ldr\t%x0, %1 + w, m ; load_8 , fp , 4 ldr\t%d0, %1 + m, r Z; store_8 , * , 4 str\t%x1, %0 +@@ -7733,6 +7793,21 @@ + (set (attr "length") (symbol_ref "INTVAL (operands0)")) + ) + ++(define_insn "aarch64_save_nzcv" ++ (set (match_operand:DI 0 "register_operand" "=r") ++ (unspec:DI (reg:CC CC_REGNUM) UNSPEC_SAVE_NZCV)) ++ "" ++ "mrs\t%0, nzcv" ++) ++ ++(define_insn "aarch64_restore_nzcv" ++ (set (reg:CC CC_REGNUM) ++ (unspec:CC (match_operand:DI 0 "register_operand" "r") ++ UNSPEC_RESTORE_NZCV)) ++ "" ++ "msr\tnzcv, %0" ++) ++ + ;; AdvSIMD Stuff + (include "aarch64-simd.md") + +diff --git a/gcc/config/aarch64/constraints.md b/gcc/config/aarch64/constraints.md +index 212a73416..88fb9a07c 100644 +--- a/gcc/config/aarch64/constraints.md ++++ b/gcc/config/aarch64/constraints.md +@@ -220,6 +220,12 @@ + (and (match_code "const_poly_int") + (match_test "aarch64_sve_rdvl_immediate_p (op)"))) + ++(define_constraint "UsR" ++ "@internal ++ A constraint that matches a value produced by RDSVL." ++ (and (match_code "const") ++ (match_test "aarch64_rdsvl_immediate_p (op)"))) ++ + (define_constraint "Usv" + "@internal + A constraint that matches a VG-based constant that can be loaded by +diff --git a/gcc/testsuite/g++.target/aarch64/sme/exceptions_1.C b/gcc/testsuite/g++.target/aarch64/sme/exceptions_1.C +new file mode 100644 +index 000000000..a245546d8 +--- /dev/null ++++ b/gcc/testsuite/g++.target/aarch64/sme/exceptions_1.C +@@ -0,0 +1,189 @@ ++// { dg-options "-O -fno-optimize-sibling-calls" } ++// { dg-final { check-function-bodies "**" "" } } ++ ++void callee_inout() __arm_inout("za"); ++void callee_in() noexcept __arm_in("za"); ++void callee_out() noexcept __arm_out("za"); ++void callee_normal(); ++ ++/* ++** _Z5test1v: ++** ... ++** bl __arm_tpidr2_save ++** ... ++** bl __cxa_begin_catch ++** bl __cxa_end_catch ++** mov w0, #?2 ++** ... ++*/ ++__arm_new("za") int ++test1 () ++{ ++ try ++ { ++ callee_inout(); ++ return 1; ++ } ++ catch (...) ++ { ++ return 2; ++ } ++} ++ ++/* ++** _Z5test2v: ++** ... ++** bl __arm_tpidr2_save ++** ... ++** bl __cxa_begin_catch ++** smstart za ++** bl _Z10callee_outv ++** bl _Z9callee_inv ++** smstop za ++** bl __cxa_end_catch ++** mov w0, #?2 ++** ... ++*/ ++__arm_new("za") int ++test2 () ++{ ++ try ++ { ++ callee_inout(); ++ return 1; ++ } ++ catch (...) ++ { ++ callee_out(); ++ callee_in(); ++ return 2; ++ } ++} ++ ++/* ++** _Z5test3v: ++** ... ++** bl __arm_tpidr2_save ++** ... ++** smstop za ++** ... ++** bl _Z13callee_normalv ++** ... ++** bl __cxa_begin_catch ++** smstart za ++** bl _Z10callee_outv ++** bl _Z9callee_inv ++** smstop za ++** bl __cxa_end_catch ++** mov w0, #?2 ++** ... ++*/ ++__arm_new("za") int ++test3 () ++{ ++ try ++ { ++ callee_normal(); ++ return 1; ++ } ++ catch (...) ++ { ++ callee_out(); ++ callee_in(); ++ return 2; ++ } ++} ++ ++__arm_new("za") int ++test4 () ++{ ++ try ++ { ++ // No lazy save set up because this is a shared-ZA function. ++ callee_inout(); ++ return 1; ++ } ++ catch (...) ++ { ++ callee_inout(); ++ return 2; ++ } ++} ++// { dg-final { scan-assembler {_Z5test4v:(?:(?!msr\ttpidr2_el0, x0-9+).)*\tret} } } ++ ++/* ++** _Z5test5v: ++** ... ++** bl __arm_tpidr2_save ++** ... ++** smstart za ++** ... ++** bl _Z12callee_inoutv ++** add (x0-9+), ^\n+ ++** msr tpidr2_el0, \1 ++** bl _Z13callee_normalv ++** msr tpidr2_el0, xzr ++** smstop za ++** ... ++** bl __cxa_begin_catch ++** ... ++** mrs x0-9+, tpidr2_el0 ++** ... ++** smstart za ++** ... ++** bl __arm_tpidr2_restore ++** msr tpidr2_el0, xzr ++** bl _Z12callee_inoutv ++** smstop za ++** bl __cxa_end_catch ++** mov w0, #?2 ++** ... ++*/ ++__arm_new("za") int ++test5 () ++{ ++ try ++ { ++ callee_inout(); ++ callee_normal(); ++ return 1; ++ } ++ catch (...) ++ { ++ callee_inout(); ++ return 2; ++ } ++} ++ ++/* ++** _Z5test6v: ++** ... ++** msr tpidr2_el0, x0-9+ ++** bl _Z13callee_normalv ++** msr tpidr2_el0, xzr ++** ... ++** bl __cxa_begin_catch ++** bl __cxa_end_catch ++** ... ++** mrs x0-9+, tpidr2_el0 ++** ... ++** smstart za ++** ... ++** bl __arm_tpidr2_restore ++** msr tpidr2_el0, xzr ++** ... ++*/ ++int ++test6 () __arm_inout("za") ++{ ++ try ++ { ++ callee_normal(); ++ callee_out(); ++ return 1; ++ } ++ catch (...) ++ { ++ return 2; ++ } ++} +diff --git a/gcc/testsuite/g++.target/aarch64/sme/keyword_macros_1.C b/gcc/testsuite/g++.target/aarch64/sme/keyword_macros_1.C +index 032485adf..8b0755014 100644 +--- a/gcc/testsuite/g++.target/aarch64/sme/keyword_macros_1.C ++++ b/gcc/testsuite/g++.target/aarch64/sme/keyword_macros_1.C +@@ -2,3 +2,8 @@ + + void f1 () __arm_streaming; + void f2 () __arm_streaming_compatible; ++void f3 () __arm_in("za"); ++void f4 () __arm_out("za"); ++void f5 () __arm_inout("za"); ++void f6 () __arm_preserves("za"); ++__arm_new("za") void f7 () {} +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/keyword_macros_1.c b/gcc/testsuite/gcc.target/aarch64/sme/keyword_macros_1.c +index 8f1b83676..fcabe3edc 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sme/keyword_macros_1.c ++++ b/gcc/testsuite/gcc.target/aarch64/sme/keyword_macros_1.c +@@ -2,3 +2,8 @@ + + void f1 () __arm_streaming; + void f2 () __arm_streaming_compatible; ++void f3 () __arm_in("za"); ++void f4 () __arm_out("za"); ++void f5 () __arm_inout("za"); ++void f6 () __arm_preserves("za"); ++__arm_new("za") void f7 () {} +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/za_state_1.c b/gcc/testsuite/gcc.target/aarch64/sme/za_state_1.c +new file mode 100644 +index 000000000..856880e21 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sme/za_state_1.c +@@ -0,0 +1,154 @@ ++// { dg-options "" } ++ ++void shared_a () arm::inout("za"); ++void shared_a (); // { dg-error "conflicting types" } ++ ++void shared_b (); ++void shared_b () arm::inout("za"); // { dg-error "conflicting types" } ++ ++void shared_c () arm::inout("za"); ++void shared_c () {} // Inherits attribute from declaration (confusingly). ++ ++void shared_d (); ++void shared_d () arm::inout("za") {} // { dg-error "conflicting types" } ++ ++void shared_e () arm::inout("za") {} ++void shared_e (); // { dg-error "conflicting types" } ++ ++void shared_f () {} ++void shared_f () arm::inout("za"); // { dg-error "conflicting types" } ++ ++extern void (*shared_g) (); ++extern void (*shared_g) () arm::inout("za"); // { dg-error "conflicting types" } ++ ++extern void (*shared_h) () arm::inout("za"); ++extern void (*shared_h) (); // { dg-error "conflicting types" } ++ ++//---------------------------------------------------------------------------- ++ ++void preserved_a () arm::preserves("za"); ++void preserved_a (); // { dg-error "conflicting types" } ++ ++void preserved_b (); ++void preserved_b () arm::preserves("za"); // { dg-error "conflicting types" } ++ ++void preserved_c () arm::preserves("za"); ++void preserved_c () {} // Inherits attribute from declaration (confusingly). ++ ++void preserved_d (); ++void preserved_d () arm::preserves("za") {} // { dg-error "conflicting types" } ++ ++void preserved_e () arm::preserves("za") {} ++void preserved_e (); // { dg-error "conflicting types" } ++ ++void preserved_f () {} ++void preserved_f () arm::preserves("za"); // { dg-error "conflicting types" } ++ ++extern void (*preserved_g) (); ++extern void (*preserved_g) () arm::preserves("za"); // { dg-error "conflicting types" } ++ ++extern void (*preserved_h) () arm::preserves("za"); ++extern void (*preserved_h) (); // { dg-error "conflicting types" } ++ ++//---------------------------------------------------------------------------- ++ ++void replicated_1 () arm::in("za", "za"), arm::in("za"); ++void replicated_2 () arm::out("za", "za"), arm::out("za"); ++void replicated_3 () arm::inout("za", "za"), arm::inout("za"); ++void replicated_4 () arm::preserves("za", "za"), arm::preserves("za"); ++ ++//---------------------------------------------------------------------------- ++ ++void invalid_1 () arm::in; // { dg-error "wrong number of arguments" } ++void invalid_2 () arm::in(); // { dg-error "parentheses must be omitted" } ++ // { dg-error "wrong number of arguments" "" { target *-*-* } .-1 } ++void invalid_3 () arm::in(""); // { dg-error "unrecognized state string ''" } ++void invalid_4 () arm::in("foo"); // { dg-error "unrecognized state string 'foo'" } ++void invalid_5 () arm::in(42); // { dg-error "the arguments to 'in' must be constant strings" } ++void invalid_6 () arm::in(*(int *)0 ? "za" : "za"); // { dg-error "the arguments to 'in' must be constant strings" } ++ ++//---------------------------------------------------------------------------- ++ ++void mixed_a () arm::preserves("za"); ++void mixed_a () arm::inout("za"); // { dg-error "conflicting types" } ++ ++void mixed_b () arm::inout("za"); ++void mixed_b () arm::preserves("za"); // { dg-error "conflicting types" } ++ ++void mixed_c () arm::preserves("za"); ++void mixed_c () arm::in("za") {} // { dg-error "conflicting types" } ++ ++void mixed_d () arm::inout("za"); ++void mixed_d () arm::in("za") {} // { dg-error "conflicting types" } ++ ++void mixed_e () arm::out("za") {} ++void mixed_e () arm::in("za"); // { dg-error "conflicting types" } ++ ++void mixed_f () arm::inout("za") {} ++void mixed_f () arm::out("za"); // { dg-error "conflicting types" } ++ ++extern void (*mixed_g) () arm::in("za"); ++extern void (*mixed_g) () arm::preserves("za"); // { dg-error "conflicting types" } ++ ++extern void (*mixed_h) () arm::preserves("za"); ++extern void (*mixed_h) () arm::out("za"); // { dg-error "conflicting types" } ++ ++//---------------------------------------------------------------------------- ++ ++void contradiction_1 () arm::preserves("za"), arm::inout("za"); // { dg-error "inconsistent attributes for state 'za'" } ++void contradiction_2 () arm::inout("za"), arm::preserves("za"); // { dg-error "inconsistent attributes for state 'za'" } ++ ++int arm::inout("za") int_attr; // { dg-warning "only applies to function types" } ++void *arm::preserves("za") ptr_attr; // { dg-warning "only applies to function types" } ++ ++typedef void preserved_callback () arm::preserves("za"); ++typedef void shared_callback () arm::inout("za"); ++ ++void (*preserved_callback_ptr) () arm::preserves("za"); ++void (*shared_callback_ptr) () arm::inout("za"); ++ ++typedef void contradiction_callback_1 () arm::preserves("za"), arm::inout("za"); // { dg-error "inconsistent attributes for state 'za'" } ++typedef void contradiction_callback_2 () arm::inout("za"), arm::preserves("za"); // { dg-error "inconsistent attributes for state 'za'" } ++ ++void (*contradiction_callback_ptr_1) () arm::preserves("za"), arm::inout("za"); // { dg-error "inconsistent attributes for state 'za'" } ++void (*contradiction_callback_ptr_2) () arm::inout("za"), arm::preserves("za"); // { dg-error "inconsistent attributes for state 'za'" } ++ ++struct s { ++ void (*contradiction_callback_ptr_1) () arm::preserves("za"), arm::inout("za"); // { dg-error "inconsistent attributes for state 'za'" } ++ void (*contradiction_callback_ptr_2) () arm::inout("za"), arm::preserves("za"); // { dg-error "inconsistent attributes for state 'za'" } ++}; ++ ++//---------------------------------------------------------------------------- ++ ++void keyword_ok_1 () __arm_inout("za"); ++void keyword_ok_1 () __arm_inout("za"); ++ ++void keyword_ok_2 () __arm_in("za"); ++void keyword_ok_2 () arm::in("za"); ++ ++void keyword_ok_3 () arm::out("za"); ++void keyword_ok_3 () __arm_out("za"); ++ ++void keyword_ok_4 () __arm_inout("za") arm::inout("za"); ++ ++void keyword_ok_5 () __arm_preserves("za"); ++void keyword_ok_5 () arm::preserves("za"); ++ ++__arm_new("za") void keyword_ok_6 () {} ++ ++//---------------------------------------------------------------------------- ++ ++void keyword_conflict_1 () __arm_inout("za"); ++void keyword_conflict_1 (); // { dg-error "conflicting types" } ++ ++void keyword_conflict_2 (); ++void keyword_conflict_2 () __arm_inout("za"); // { dg-error "conflicting types" } ++ ++void keyword_conflict_3 () __arm_inout("za"); ++void keyword_conflict_3 () arm::preserves("za"); // { dg-error "conflicting types" } ++ ++void keyword_conflict_4 () arm::preserves("za"); ++void keyword_conflict_4 () __arm_inout("za"); // { dg-error "conflicting types" } ++ ++__arm_new("za") void keyword_conflict_5 () __arm_inout("za") {} // { dg-error "cannot create a new 'za' scope since 'za' is shared with callers" } ++__arm_new("za") void keyword_conflict_6 () __arm_preserves("za") {} // { dg-error "cannot create a new 'za' scope since 'za' is shared with callers" } +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/za_state_2.c b/gcc/testsuite/gcc.target/aarch64/sme/za_state_2.c +new file mode 100644 +index 000000000..572ff309f +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sme/za_state_2.c +@@ -0,0 +1,73 @@ ++// { dg-options "" } ++ ++arm::new("za") void new_za_a (); ++void new_za_a (); ++ ++void new_za_b (); ++arm::new("za") void new_za_b (); ++ ++arm::new("za") void new_za_c (); ++void new_za_c () {} ++ ++void new_za_d (); ++arm::new("za") void new_za_d () {} ++ ++arm::new("za") void new_za_e () {} ++void new_za_e (); ++ ++void new_za_f () {} ++arm::new("za") void new_za_f (); // { dg-error "cannot apply attribute 'new' to 'new_za_f' after the function has been defined" } ++ ++//---------------------------------------------------------------------------- ++ ++arm::new("za") void shared_a (); ++void shared_a () arm::inout("za"); // { dg-error "conflicting types" } ++ ++void shared_b () arm::inout("za"); ++arm::new("za") void shared_b (); // { dg-error "conflicting types" } ++ ++arm::new("za") void shared_c (); ++void shared_c () arm::in("za") {} // { dg-error "conflicting types" } ++ ++void shared_d () arm::in("za"); ++arm::new("za") void shared_d () {} // { dg-error "cannot create a new 'za' scope since 'za' is shared with callers" } ++ ++arm::new("za") void shared_e () {} ++void shared_e () arm::out("za"); // { dg-error "conflicting types" } ++ ++void shared_f () arm::out("za") {} ++arm::new("za") void shared_f (); // { dg-error "conflicting types" } ++ ++arm::new("za") void shared_g () {} ++void shared_g () arm::preserves("za"); // { dg-error "conflicting types" } ++ ++void shared_h () arm::preserves("za") {} ++arm::new("za") void shared_h (); // { dg-error "conflicting types" } ++ ++//---------------------------------------------------------------------------- ++ ++arm::new("za") void contradiction_1 () arm::inout("za"); // { dg-error "cannot create a new 'za' scope since 'za' is shared with callers" } ++void contradiction_2 arm::new("za") () arm::inout("za"); // { dg-error "cannot create a new 'za' scope since 'za' is shared with callers" } ++arm::new("za") void contradiction_3 () arm::preserves("za"); // { dg-error "cannot create a new 'za' scope since 'za' is shared with callers" } ++void contradiction_4 arm::new("za") () arm::preserves("za"); // { dg-error "cannot create a new 'za' scope since 'za' is shared with callers" } ++ ++int arm::new("za") int_attr; // { dg-warning "does not apply to types" } ++arm::new("za") int int_var_attr; // { dg-error "applies only to function definitions" } ++typedef void new_za_callback () arm::new("za"); // { dg-warning "does not apply to types" } ++arm::new("za") void (*new_za_var_callback) (); // { dg-error "applies only to function definitions" } ++ ++//---------------------------------------------------------------------------- ++ ++arm::new("za") void complementary_1 () arm::streaming {} ++void complementary_2 arm::new("za") () arm::streaming {} ++arm::new("za") void complementary_3 () arm::streaming_compatible {} ++void complementary_4 arm::new("za") () arm::streaming_compatible {} ++ ++//---------------------------------------------------------------------------- ++ ++#pragma GCC target "+nosme" ++ ++arm::new("za") void bereft_1 (); ++arm::new("za") void bereft_2 () {} // { dg-error "functions with SME state require the ISA extension 'sme'" } ++void bereft_3 () arm::inout("za"); ++void bereft_4 () arm::inout("za") {} // { dg-error "functions with SME state require the ISA extension 'sme'" } +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/za_state_3.c b/gcc/testsuite/gcc.target/aarch64/sme/za_state_3.c +new file mode 100644 +index 000000000..203f6ae8a +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sme/za_state_3.c +@@ -0,0 +1,31 @@ ++// { dg-options "" } ++ ++void normal_callee (); ++void in_callee () arm::in("za"); ++void out_callee () arm::out("za"); ++void inout_callee () arm::inout("za"); ++void preserves_callee () arm::preserves("za"); ++ ++struct callbacks { ++ void (*normal_ptr) (); ++ void (*in_ptr) () arm::in("za"); ++ void (*out_ptr) () arm::out("za"); ++ void (*inout_ptr) () arm::inout("za"); ++ void (*preserves_ptr) () arm::preserves("za"); ++}; ++ ++void ++normal_caller (struct callbacks *c) ++{ ++ normal_callee (); ++ in_callee (); // { dg-error {call to a function that shares 'za' state from a function that has no 'za' state} } ++ out_callee (); // { dg-error {call to a function that shares 'za' state from a function that has no 'za' state} } ++ inout_callee (); // { dg-error {call to a function that shares 'za' state from a function that has no 'za' state} } ++ preserves_callee (); // { dg-error {call to a function that shares SME state from a function that has no SME state} } ++ ++ c->normal_ptr (); ++ c->in_ptr (); // { dg-error {call to a function that shares 'za' state from a function that has no 'za' state} } ++ c->out_ptr (); // { dg-error {call to a function that shares 'za' state from a function that has no 'za' state} } ++ c->inout_ptr (); // { dg-error {call to a function that shares 'za' state from a function that has no 'za' state} } ++ c->preserves_ptr (); // { dg-error {call to a function that shares SME state from a function that has no SME state} } ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/za_state_4.c b/gcc/testsuite/gcc.target/aarch64/sme/za_state_4.c +new file mode 100644 +index 000000000..cec0abf0e +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sme/za_state_4.c +@@ -0,0 +1,585 @@ ++// { dg-options "-O -fno-optimize-sibling-calls" } ++// { dg-final { check-function-bodies "**" "" } } ++ ++void private_za(); ++void out_za() __arm_out("za"); ++void in_za() __arm_in("za"); ++void inout_za() __arm_inout("za"); ++void preserves_za() __arm_preserves("za"); ++ ++/* ++** test1: ++** ret ++*/ ++__arm_new("za") void test1() ++{ ++} ++ ++/* ++** test2: ++** ldr w0, \x0\ ++** ret ++*/ ++__arm_new("za") int test2(int *ptr) ++{ ++ return *ptr; ++} ++ ++/* ++** test3: ++** stp ^\n+ ++** mov x29, sp ++** bl private_za ++** ( ++** mov w0, 0 ++** ldp ^\n+ ++** | ++** ldp ^\n+ ++** mov w0, 0 ++** ) ++** ret ++*/ ++__arm_new("za") int test3() ++{ ++ private_za(); ++ return 0; ++} ++ ++/* ++** test4: ++** ... ++** mrs x0, tpidr2_el0 ++** cbz x0, ^\n+ ++** bl __arm_tpidr2_save ++** msr tpidr2_el0, xzr ++** zero { za } ++** smstart za ++** bl in_za ++** smstop za ++** ldp ^\n+ ++** ret ++*/ ++__arm_new("za") void test4() ++{ ++ in_za(); // Uses zeroed contents. ++} ++ ++/* ++** test5: ++** ... ++** mrs x0, tpidr2_el0 ++** cbz x0, ^\n+ ++** bl __arm_tpidr2_save ++** msr tpidr2_el0, xzr ++** smstop za ++** bl private_za ++** smstart za ++** bl out_za ++** bl in_za ++** smstop za ++** bl private_za ++** ldp ^\n+ ++** ret ++*/ ++__arm_new("za") void test5() ++{ ++ private_za(); ++ out_za(); ++ in_za(); ++ private_za(); ++} ++ ++// Despite the long test, there shouldn't be too much scope for variation ++// here. The point is both to test correctness and code quality. ++/* ++** test6: ++** stp ^\n+ ++** mov x29, sp ++** mrs x0, tpidr2_el0 ++** cbz x0, ^\n+ ++** bl __arm_tpidr2_save ++** msr tpidr2_el0, xzr ++** smstart za ++** bl out_za ++** rdsvl (x0-9+), #1 ++** mul (x0-9+), \1, \1 ++** sub sp, sp, \2 ++** mov (x0-9+), sp ++** stp \3, \1, \x29, #?16\ ++** add (x0-9+), x29, #?16 ++** msr tpidr2_el0, \4 ++** bl private_za ++** ( ++** add (x0-9+), x29, #?16 ++** mrs (x0-9+), tpidr2_el0 ++** cbnz \6, ^\n+ ++** smstart za ++** mov x0, \5 ++** | ++** add x0, x29, #?16 ++** mrs (x0-9+), tpidr2_el0 ++** cbnz \6, ^\n+ ++** smstart za ++** ) ++** bl __arm_tpidr2_restore ++** msr tpidr2_el0, xzr ++** bl in_za ++** smstop za ++** mov sp, x29 ++** ldp ^\n+ ++** ret ++*/ ++__arm_new("za") void test6() ++{ ++ out_za(); ++ private_za(); ++ in_za(); ++} ++ ++// Rely on previous tests for the part leading up to the smstart. ++/* ++** test7: ++** ... ++** smstart za ++** bl out_za ++** bl in_za ++** smstop za ++** bl private_za ++** smstart za ++** bl out_za ++** bl in_za ++** smstop za ++** ldp ^\n+ ++** ret ++*/ ++__arm_new("za") void test7() ++{ ++ out_za(); ++ in_za(); ++ private_za(); ++ out_za(); ++ in_za(); ++} ++ ++/* ++** test8: ++** ... ++** smstart za ++** bl out_za ++** bl in_za ++** smstop za ++** bl private_za ++** smstart za ++** bl out_za ++** bl in_za ++** smstop za ++** bl private_za ++** ldp ^\n+ ++** ret ++*/ ++__arm_new("za") void test8() ++{ ++ out_za(); ++ in_za(); ++ private_za(); ++ out_za(); ++ in_za(); ++ private_za(); ++} ++ ++/* ++** test9: ++** ... ++** msr tpidr2_el0, x0-9+ ++** bl private_za ++** bl private_za ++** bl private_za ++** bl private_za ++** add x0-9+, x29, #?16 ++** mrs x0-9+, tpidr2_el0 ++** ... ++*/ ++__arm_new("za") void test9() ++{ ++ out_za(); ++ private_za(); ++ private_za(); ++ private_za(); ++ private_za(); ++ in_za(); ++} ++ ++/* ++** test10: ++** ldr (w0-9+), \x0\ ++** cbz \1, ^\n+ ++** ldr ^\n+ ++** add ^\n+ ++** str ^\n+ ++** ret ++** ... ++*/ ++__arm_new("za") void test10(volatile int *ptr) ++{ ++ if (__builtin_expect (*ptr != 0, 1)) ++ *ptr = *ptr + 1; ++ else ++ inout_za(); ++} ++ ++/* ++** test11: ++** ... ++** ldr w0-9+, ^\n+ ++** add (w0-9+), ^\n+ ++** str \1, ^\n+ ++** ... ++** ret ++** mrs x0-9+, tpidr2_el0 ++** ... ++** smstart za ++** bl inout_za ++** ldr (w0-9+), ^\n+ ++** cbnz \2, ^\n+ ++** smstop za ++** ... ++*/ ++__arm_new("za") void test11(volatile int *ptr) ++{ ++ if (__builtin_expect (*ptr == 0, 0)) ++ do ++ inout_za(); ++ while (*ptr); ++ else ++ *ptr += 1; ++} ++ ++__arm_new("za") void test12(volatile int *ptr) ++{ ++ do ++ { ++ inout_za(); ++ private_za(); ++ } ++ while (*ptr); ++ out_za(); ++ in_za(); ++} ++ ++/* ++** test13: ++** stp ^\n+ ++** ... ++** stp ^\n+ ++** ... ++** bl __arm_tpidr2_save ++** ... ++** msr tpidr2_el0, x0-9+ ++** bl private_za ++** ... ++** mrs x0-9+, tpidr2_el0 ++** ... ++** bl inout_za ++** ... ++** msr tpidr2_el0, x0-9+ ++** ... ++** bl private_za ++** ... ++** cbnz ^\n+ ++** smstart za ++** msr tpidr2_el0, xzr ++** bl out_za ++** bl in_za ++** ... ++** smstop za ++** ... ++*/ ++__arm_new("za") void test13(volatile int *ptr) ++{ ++ do ++ { ++ private_za(); ++ inout_za(); ++ private_za(); ++ } ++ while (*ptr); ++ out_za(); ++ in_za(); ++} ++ ++/* ++** test14: ++** ... ++** bl __arm_tpidr2_save ++** ... ++** smstart za ++** bl inout_za ++** ldr ^\n+ ++** cbnz ^\n+ ++** bl out_za ++** bl in_za ++** smstop za ++** ... ++*/ ++__arm_new("za") void test14(volatile int *ptr) ++{ ++ do ++ inout_za(); ++ while (*ptr); ++ out_za(); ++ in_za(); ++} ++ ++/* ++** test15: ++** ... ++** bl __arm_tpidr2_save ++** ... ++** smstart za ++** bl out_za ++** bl in_za ++** ldr ^\n+ ++** cbnz ^\n+ ++** smstop za ++** bl private_za ++** ldr ^\n+ ++** ldp ^\n+ ++** ret ++*/ ++__arm_new("za") void test15(volatile int *ptr) ++{ ++ do ++ { ++ out_za(); ++ in_za(); ++ } ++ while (*ptr); ++ private_za(); ++} ++ ++/* ++** test16: ++** ... ++** bl __arm_tpidr2_save ++** ... ++** smstart za ++** b ^\n+ ++-- loop: ++** ... ++** mrs x0-9+, tpidr2_el0 ++** ... ++** msr tpidr2_el0, xzr ++-- loop_entry: ++** bl inout_za ++** ... ++** msr tpidr2_el0, x0-9+ ++** bl private_za ++** ldr ^\n+ ++** cbnz ^\n+ ++** msr tpidr2_el0, xzr ++** smstop za ++** bl private_za ++** ... ++*/ ++__arm_new("za") void test16(volatile int *ptr) ++{ ++ do ++ { ++ inout_za(); ++ private_za(); ++ } ++ while (*ptr); ++ private_za(); ++} ++ ++/* ++** test17: ++** ... ++** bl private_za ++** ldr ^\n+ ++** cbnz ^\n+ ++** ... ++** msr tpidr2_el0, xzr ++** ... ++** smstop za ++** ... ++*/ ++__arm_new("za") void test17(volatile int *ptr) ++{ ++ do ++ { ++ inout_za(); ++ private_za(); ++ } ++ while (*ptr); ++} ++ ++/* ++** test18: ++** ldr w0-9+, ^\n+ ++** cbnz w0-9+, ^\n+ ++** ret ++** ... ++** smstop za ++** bl private_za ++** ... ++*/ ++__arm_new("za") void test18(volatile int *ptr) ++{ ++ if (__builtin_expect (*ptr, 0)) ++ { ++ out_za(); ++ in_za(); ++ private_za(); ++ } ++} ++ ++/* ++** test19: ++** ... ++** ldr w0-9+, ^\n+ ++** cbz w0-9+, ^\n+ ++** mrs x0-9+, tpidr2_el0 ++** ... ++** smstop za ++** bl private_za ++** ... ++*/ ++__arm_new("za") void test19(volatile int *ptr) ++{ ++ if (__builtin_expect (*ptr != 0, 1)) ++ private_za(); ++ else ++ do ++ { ++ inout_za(); ++ private_za(); ++ } ++ while (*ptr); ++} ++ ++/* ++** test20: ++** ... ++** bl a20 ++** (?:(?!x0).)* ++** bl b20 ++** ... ++** mov (wx0-9+), wx0 ++** ... ++** bl __arm_tpidr2_restore ++** ... ++** mov wx0, \1 ++** ... ++** bl c20 ++** ... ++*/ ++__arm_new("za") void test20() ++{ ++ extern int a20() __arm_inout("za"); ++ extern int b20(int); ++ extern void c20(int) __arm_inout("za"); ++ c20(b20(a20())); ++} ++ ++/* ++** test21: ++** ... ++** bl a21 ++** (?:(?!x0).)* ++** bl b21 ++** ... ++** mov (x0-9+), x0 ++** ... ++** bl __arm_tpidr2_restore ++** ... ++** mov x0, \1 ++** ... ++** bl c21 ++** ... ++*/ ++__arm_new("za") void test21() ++{ ++ extern __UINT64_TYPE__ a21() __arm_inout("za"); ++ extern __UINT64_TYPE__ b21(__UINT64_TYPE__); ++ extern void c21(__UINT64_TYPE__) __arm_inout("za"); ++ c21(b21(a21())); ++} ++ ++/* ++** test22: ++** (?:(?!rdsvl).)* ++** rdsvl x0-9+, #1 ++** (?:(?!rdsvl).)* ++*/ ++__arm_new("za") void test22(volatile int *ptr) ++{ ++ inout_za(); ++ if (*ptr) ++ *ptr += 1; ++ else ++ private_za(); ++ private_za(); ++ in_za(); ++} ++ ++/* ++** test23: ++** (?:(?!__arm_tpidr2_save).)* ++** bl __arm_tpidr2_save ++** (?:(?!__arm_tpidr2_save).)* ++*/ ++__arm_new("za") void test23(volatile int *ptr) ++{ ++ if (*ptr) ++ *ptr += 1; ++ else ++ inout_za(); ++ inout_za(); ++} ++ ++/* ++** test24: ++** ... ++** bl in_za ++** ... ++** incb x1 ++** ... ++** bl out_za ++** bl inout_za ++** ... ++** msr tpidr2_el0, x0-9+ ++** ... ++** bl private_za ++** ... ++** mrs x0-9+, tpidr2_el0 ++** ... ++** incb x1 ++** ... ++** msr tpidr2_el0, x0-9+ ++** ... ++** bl private_za ++** ... ++** mrs x0-9+, tpidr2_el0 ++** ... ++** incb x1 ++** ... ++** smstop za ++** ... ++** bl private_za ++** ... ++** ret ++*/ ++__arm_new("za") void test24() ++{ ++ in_za(); ++ asm ("incb\tx1" ::: "x1", "za"); ++ out_za(); ++ inout_za(); ++ private_za(); ++ asm ("incb\tx1" ::: "x1", "za"); ++ private_za(); ++ asm ("incb\tx1" ::: "x1", "za"); ++ in_za(); ++ private_za(); ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/za_state_5.c b/gcc/testsuite/gcc.target/aarch64/sme/za_state_5.c +new file mode 100644 +index 000000000..d54840d3d +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sme/za_state_5.c +@@ -0,0 +1,595 @@ ++// { dg-options "-O2 -fno-optimize-sibling-calls" } ++// { dg-final { check-function-bodies "**" "" } } ++ ++void private_za(); ++void out_za() __arm_out("za"); ++void in_za() __arm_in("za"); ++void inout_za() __arm_inout("za"); ++void preserves_za() __arm_preserves("za"); ++ ++/* ++** test1: ++** ret ++*/ ++void test1() __arm_inout("za") ++{ ++} ++ ++/* ++** test2: ++** ldr w0, \x0\ ++** ret ++*/ ++int test2(int *ptr) __arm_inout("za") ++{ ++ return *ptr; ++} ++ ++/* ++** test3: ++** ... ++** sub sp, sp, x0-9+ ++** ... ++** msr tpidr2_el0, x0-9+ ++** ... ++** bl private_za ++** ... ++** mrs x0-9+, tpidr2_el0 ++** ... ++** smstart za ++** ... ++** bl __arm_tpidr2_restore ++** ... ++** msr tpidr2_el0, xzr ++** ... ++*/ ++int test3() __arm_inout("za") ++{ ++ private_za(); ++ return 0; ++} ++ ++/* ++** test4: ++** stp ^\n+ ++** ^\n+ ++** bl in_za ++** ldp ^\n+ ++** ret ++*/ ++void test4() __arm_inout("za") ++{ ++ in_za(); ++} ++ ++/* ++** test5: ++** ... ++** smstop za ++** ... ++** bl private_za ++** smstart za ++** bl out_za ++** bl in_za ++** ... ++** sub sp, sp, x0-9+ ++** ... ++** msr tpidr2_el0, x0-9+ ++** ... ++** bl private_za ++** ... ++** mrs x0-9+, tpidr2_el0 ++** ... ++** smstart za ++** ... ++** bl __arm_tpidr2_restore ++** ... ++** msr tpidr2_el0, xzr ++** ... ++*/ ++void test5() __arm_inout("za") ++{ ++ private_za(); ++ out_za(); ++ in_za(); ++ private_za(); ++} ++ ++/* ++** test6: ++** ... ++** bl out_za ++** ... ++** sub sp, sp, x0-9+ ++** ... ++** msr tpidr2_el0, x0-9+ ++** ... ++** bl private_za ++** ... ++** mrs x0-9+, tpidr2_el0 ++** ... ++** smstart za ++** ... ++** bl __arm_tpidr2_restore ++** ... ++** msr tpidr2_el0, xzr ++** ... ++** bl in_za ++** ... ++*/ ++void test6() __arm_inout("za") ++{ ++ out_za(); ++ private_za(); ++ in_za(); ++} ++ ++/* ++** test7: ++** stp ^\n+ ++** ^\n+ ++** bl out_za ++** bl in_za ++** smstop za ++** bl private_za ++** smstart za ++** bl out_za ++** bl in_za ++** ldp ^\n+ ++** ret ++*/ ++void test7() __arm_inout("za") ++{ ++ out_za(); ++ in_za(); ++ private_za(); ++ out_za(); ++ in_za(); ++} ++ ++/* ++** test8: ++** stp ^\n+ ++** ^\n+ ++** bl out_za ++** bl in_za ++** smstop za ++** bl private_za ++** smstart za ++** bl out_za ++** bl in_za ++** ... ++** sub sp, sp, x0-9+ ++** ... ++** msr tpidr2_el0, x0-9+ ++** ... ++** bl private_za ++** ... ++** mrs x0-9+, tpidr2_el0 ++** ... ++** smstart za ++** ... ++** bl __arm_tpidr2_restore ++** ... ++** msr tpidr2_el0, xzr ++** ... ++** ret ++*/ ++void test8() __arm_inout("za") ++{ ++ out_za(); ++ in_za(); ++ private_za(); ++ out_za(); ++ in_za(); ++ private_za(); ++} ++ ++/* ++** test9: ++** stp ^\n+ ++** ^\n+ ++** bl out_za ++** ... ++** msr tpidr2_el0, x0-9+ ++** bl private_za ++** bl private_za ++** bl private_za ++** bl private_za ++** ... ++** mrs x0-9+, tpidr2_el0 ++** ... ++** smstart za ++** ... ++** bl __arm_tpidr2_restore ++** ... ++** msr tpidr2_el0, xzr ++** ... ++*/ ++void test9() __arm_inout("za") ++{ ++ out_za(); ++ private_za(); ++ private_za(); ++ private_za(); ++ private_za(); ++ in_za(); ++} ++ ++/* ++** test10: ++** ldr (w0-9+), \x0\ ++** cbz \1, ^\n+ ++** ldr ^\n+ ++** add ^\n+ ++** str ^\n+ ++** ret ++** ... ++*/ ++void test10(volatile int *ptr) __arm_inout("za") ++{ ++ if (__builtin_expect (*ptr != 0, 1)) ++ *ptr = *ptr + 1; ++ else ++ inout_za(); ++} ++ ++/* ++** test11: ++** (?!.*(\t__arm|\tza|tpidr2_el0)).* ++*/ ++void test11(volatile int *ptr) __arm_inout("za") ++{ ++ if (__builtin_expect (*ptr == 0, 0)) ++ do ++ inout_za(); ++ while (*ptr); ++ else ++ *ptr += 1; ++} ++ ++void test12(volatile int *ptr) __arm_inout("za") ++{ ++ do ++ { ++ inout_za(); ++ private_za(); ++ } ++ while (*ptr); ++ out_za(); ++ in_za(); ++} ++ ++/* ++** test13: ++** stp ^\n+ ++** ... ++** stp ^\n+ ++** ... ++-- loop: ++** mrs x0-9+, tpidr2_el0 ++** ... ++** smstart za ++** ... ++** bl __arm_tpidr2_restore ++** ... ++** msr tpidr2_el0, xzr ++** bl inout_za ++** ... ++** msr tpidr2_el0, x0-9+ ++** ... ++** bl private_za ++** ldr ^\n+ ++** cbnz ^\n+ ++** smstart za ++** msr tpidr2_el0, xzr ++** bl out_za ++** bl in_za ++** ^\n+ ++** ^\n+ ++** ldp ^\n+ ++** ret ++*/ ++void test13(volatile int *ptr) __arm_inout("za") ++{ ++ do ++ { ++ private_za(); ++ inout_za(); ++ private_za(); ++ } ++ while (*ptr); ++ out_za(); ++ in_za(); ++} ++ ++/* ++** test14: ++** ... ++** bl inout_za ++** ldr ^\n+ ++** cbnz ^\n+ ++** bl out_za ++** bl in_za ++** ... ++*/ ++void test14(volatile int *ptr) __arm_inout("za") ++{ ++ do ++ inout_za(); ++ while (*ptr); ++ out_za(); ++ in_za(); ++} ++ ++/* ++** test15: ++** ... ++** bl out_za ++** bl in_za ++** ldr ^\n+ ++** cbnz ^\n+ ++** ... ++** stp ^\n+ ++** ... ++** msr tpidr2_el0, ^\n+ ++** ... ++** bl private_za ++** ... ++** mrs x0-9+, tpidr2_el0 ++** ... ++** bl __arm_tpidr2_restore ++** ... ++** msr tpidr2_el0, xzr ++** ... ++*/ ++void test15(volatile int *ptr) __arm_inout("za") ++{ ++ do ++ { ++ out_za(); ++ in_za(); ++ } ++ while (*ptr); ++ private_za(); ++} ++ ++/* ++** test16: ++** stp ^\n+ ++** ... ++** stp ^\n+ ++** ... ++** b ^\n+ ++-- loop: ++** ... ++** mrs x0-9+, tpidr2_el0 ++** ... ++** msr tpidr2_el0, xzr ++-- loop_entry: ++** bl inout_za ++** ... ++** msr tpidr2_el0, x0-9+ ++** ... ++** bl private_za ++** ... ++** bl private_za ++** ... ++** mrs x0-9+, tpidr2_el0 ++** ... ++** bl __arm_tpidr2_restore ++** ... ++** msr tpidr2_el0, xzr ++** ... ++*/ ++void test16(volatile int *ptr) __arm_inout("za") ++{ ++ do ++ { ++ inout_za(); ++ private_za(); ++ } ++ while (*ptr); ++ private_za(); ++} ++ ++/* ++** test17: ++** ... ++-- loop: ++** bl inout_za ++** ... ++** msr tpidr2_el0, x0-9+ ++** ... ++** bl private_za ++** ... ++** mrs x0-9+, tpidr2_el0 ++** ... ++** smstart za ++** ... ++** bl __arm_tpidr2_restore ++** ... ++** msr tpidr2_el0, xzr ++** ... ++** cbnz ^\n+ ++** ^\n+ ++** ^\n+ ++** ldp ^\n+ ++** ret ++*/ ++void test17(volatile int *ptr) __arm_inout("za") ++{ ++ do ++ { ++ inout_za(); ++ private_za(); ++ while (*ptr) ++ ptr += 1; ++ } ++ while (*ptr); ++} ++ ++/* ++** test18: ++** ldr w0-9+, ^\n+ ++** cbnz w0-9+, ^\n+ ++** ret ++** ... ++** bl out_za ++** bl in_za ++** ... ++** msr tpidr2_el0, x0-9+ ++** ... ++** bl private_za ++** ... ++** mrs x0-9+, tpidr2_el0 ++** ... ++** bl __arm_tpidr2_restore ++** ... ++** msr tpidr2_el0, xzr ++** ... ++*/ ++void test18(volatile int *ptr) __arm_inout("za") ++{ ++ if (__builtin_expect (*ptr, 0)) ++ { ++ out_za(); ++ in_za(); ++ private_za(); ++ } ++} ++ ++void test19(volatile int *ptr) __arm_inout("za") ++{ ++ if (__builtin_expect (*ptr != 0, 1)) ++ private_za(); ++ else ++ do ++ { ++ inout_za(); ++ private_za(); ++ } ++ while (*ptr); ++} ++ ++/* ++** test20: ++** ... ++** bl a20 ++** (?:(?!x0).)* ++** bl b20 ++** ... ++** mov (wx0-9+), wx0 ++** ... ++** bl __arm_tpidr2_restore ++** ... ++** mov wx0, \1 ++** ... ++** bl c20 ++** ... ++*/ ++void test20() __arm_inout("za") ++{ ++ extern int a20() __arm_inout("za"); ++ extern int b20(int); ++ extern void c20(int) __arm_inout("za"); ++ c20(b20(a20())); ++} ++ ++/* ++** test21: ++** ... ++** bl a21 ++** (?:(?!x0).)* ++** bl b21 ++** ... ++** mov (x0-9+), x0 ++** ... ++** bl __arm_tpidr2_restore ++** ... ++** mov x0, \1 ++** ... ++** bl c21 ++** ... ++*/ ++void test21() __arm_inout("za") ++{ ++ extern __UINT64_TYPE__ a21() __arm_inout("za"); ++ extern __UINT64_TYPE__ b21(__UINT64_TYPE__); ++ extern void c21(__UINT64_TYPE__) __arm_inout("za"); ++ c21(b21(a21())); ++} ++ ++/* ++** test22: ++** (?:(?!rdsvl).)* ++** rdsvl x0-9+, #1 ++** (?:(?!rdsvl).)* ++*/ ++void test22(volatile int *ptr) __arm_inout("za") ++{ ++ inout_za(); ++ if (*ptr) ++ *ptr += 1; ++ else ++ private_za(); ++ private_za(); ++ in_za(); ++} ++ ++void test23(volatile int *ptr) __arm_inout("za") ++{ ++ if (*ptr) ++ *ptr += 1; ++ else ++ inout_za(); ++ inout_za(); ++} ++ ++/* ++** test24: ++** ... ++** bl in_za ++** ... ++** incb x1 ++** ... ++** bl out_za ++** bl inout_za ++** ... ++** msr tpidr2_el0, x0-9+ ++** ... ++** bl private_za ++** ... ++** mrs x0-9+, tpidr2_el0 ++** ... ++** incb x1 ++** ... ++** msr tpidr2_el0, x0-9+ ++** ... ++** bl private_za ++** ... ++** mrs x0-9+, tpidr2_el0 ++** ... ++** incb x1 ++** ... ++** msr tpidr2_el0, x0-9+ ++** ... ++** bl private_za ++** ... ++** mrs x0-9+, tpidr2_el0 ++** ... ++** ret ++*/ ++void test24() __arm_inout("za") ++{ ++ in_za(); ++ asm ("incb\tx1" ::: "x1", "za"); ++ out_za(); ++ inout_za(); ++ private_za(); ++ asm ("incb\tx1" ::: "x1", "za"); ++ private_za(); ++ asm ("incb\tx1" ::: "x1", "za"); ++ in_za(); ++ private_za(); ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/za_state_6.c b/gcc/testsuite/gcc.target/aarch64/sme/za_state_6.c +new file mode 100644 +index 000000000..d5b226ae1 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sme/za_state_6.c +@@ -0,0 +1,23 @@ ++// { dg-options "-O -fno-optimize-sibling-calls -fomit-frame-pointer" } ++ ++void private_za(); ++void out_za() __arm_out("za"); ++void in_za() __arm_in("za"); ++ ++__arm_new("za") void test20(volatile int *ptr) ++{ ++ if (*ptr) ++ out_za(); ++ else ++ *ptr += 1; ++ *ptr += 1; ++ if (*ptr) ++ in_za(); ++ else ++ *ptr += 1; ++} ++ ++// { dg-final { scan-assembler {\tbl\t__arm_tpidr2_save\n} } } ++// { dg-final { scan-assembler {\tsmstart\tza\n} } } ++// { dg-final { scan-assembler {\tsmstop\tza\n} } } ++// { dg-final { scan-assembler-not {\tsub\tsp, sp, x0-9+\n} } } +-- +2.33.0 +
View file
_service:tar_scm:0207-Backport-SME-aarch64-Add-a-register-class-for-w12-w1.patch
Added
@@ -0,0 +1,103 @@ +From 9866b4c1d85d88fd9e25ff3ac5224b69d4e0f0b2 Mon Sep 17 00:00:00 2001 +From: Richard Sandiford <richard.sandiford@arm.com> +Date: Tue, 5 Dec 2023 10:11:26 +0000 +Subject: PATCH 108/157 BackportSME aarch64: Add a register class for + w12-w15 + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=084122adb5792a9c8e7f7876e2c1d59ba80c228b + +Some SME instructions use w12-w15 to index ZA. This patch +adds a register class for that range. + +gcc/ + * config/aarch64/aarch64.h (W12_W15_REGNUM_P): New macro. + (W12_W15_REGS): New register class. + (REG_CLASS_NAMES, REG_CLASS_CONTENTS): Add entries for it. + * config/aarch64/aarch64.cc (aarch64_regno_regclass) + (aarch64_class_max_nregs, aarch64_register_move_cost): Handle + W12_W15_REGS. +--- + gcc/config/aarch64/aarch64.cc | 12 +++++++----- + gcc/config/aarch64/aarch64.h | 6 ++++++ + 2 files changed, 13 insertions(+), 5 deletions(-) + +diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc +index a6e996c5b..112dfeabb 100644 +--- a/gcc/config/aarch64/aarch64.cc ++++ b/gcc/config/aarch64/aarch64.cc +@@ -14213,6 +14213,9 @@ aarch64_label_mentioned_p (rtx x) + enum reg_class + aarch64_regno_regclass (unsigned regno) + { ++ if (W12_W15_REGNUM_P (regno)) ++ return W12_W15_REGS; ++ + if (STUB_REGNUM_P (regno)) + return STUB_REGS; + +@@ -14577,6 +14580,7 @@ aarch64_class_max_nregs (reg_class_t regclass, machine_mode mode) + unsigned int nregs, vec_flags; + switch (regclass) + { ++ case W12_W15_REGS: + case STUB_REGS: + case TAILCALL_ADDR_REGS: + case POINTER_REGS: +@@ -16926,13 +16930,11 @@ aarch64_register_move_cost (machine_mode mode, + const struct cpu_regmove_cost *regmove_cost + = aarch64_tune_params.regmove_cost; + +- /* Caller save and pointer regs are equivalent to GENERAL_REGS. */ +- if (to == TAILCALL_ADDR_REGS || to == POINTER_REGS +- || to == STUB_REGS) ++ /* Trest any subset of POINTER_REGS as though it were GENERAL_REGS. */ ++ if (reg_class_subset_p (to, POINTER_REGS)) + to = GENERAL_REGS; + +- if (from == TAILCALL_ADDR_REGS || from == POINTER_REGS +- || from == STUB_REGS) ++ if (reg_class_subset_p (from, POINTER_REGS)) + from = GENERAL_REGS; + + /* Make RDFFR very expensive. In particular, if we know that the FFR +diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h +index 89d30b9bf..8b21faf34 100644 +--- a/gcc/config/aarch64/aarch64.h ++++ b/gcc/config/aarch64/aarch64.h +@@ -648,6 +648,9 @@ constexpr auto AARCH64_FL_DEFAULT_ISA_MODE = AARCH64_FL_SM_OFF; + && (REGNO) != R17_REGNUM \ + && (REGNO) != R30_REGNUM) \ + ++#define W12_W15_REGNUM_P(REGNO) \ ++ IN_RANGE (REGNO, R12_REGNUM, R15_REGNUM) ++ + #define FP_REGNUM_P(REGNO) \ + (((unsigned) (REGNO - V0_REGNUM)) <= (V31_REGNUM - V0_REGNUM)) + +@@ -674,6 +677,7 @@ constexpr auto AARCH64_FL_DEFAULT_ISA_MODE = AARCH64_FL_SM_OFF; + enum reg_class + { + NO_REGS, ++ W12_W15_REGS, + TAILCALL_ADDR_REGS, + STUB_REGS, + GENERAL_REGS, +@@ -698,6 +702,7 @@ enum reg_class + #define REG_CLASS_NAMES \ + { \ + "NO_REGS", \ ++ "W12_W15_REGS", \ + "TAILCALL_ADDR_REGS", \ + "STUB_REGS", \ + "GENERAL_REGS", \ +@@ -719,6 +724,7 @@ enum reg_class + #define REG_CLASS_CONTENTS \ + { \ + { 0x00000000, 0x00000000, 0x00000000 }, /* NO_REGS */ \ ++ { 0x0000f000, 0x00000000, 0x00000000 }, /* W12_W15_REGS */ \ + { 0x00030000, 0x00000000, 0x00000000 }, /* TAILCALL_ADDR_REGS */\ + { 0x3ffcffff, 0x00000000, 0x00000000 }, /* STUB_REGS */ \ + { 0x7fffffff, 0x00000000, 0x00000003 }, /* GENERAL_REGS */ \ +-- +2.33.0 +
View file
_service:tar_scm:0208-Backport-SME-aarch64-Add-a-VNx1TI-mode.patch
Added
@@ -0,0 +1,72 @@ +From 8310c0df319a86bc2f63b8d3198dd1c394827bac Mon Sep 17 00:00:00 2001 +From: Richard Sandiford <richard.sandiford@arm.com> +Date: Tue, 5 Dec 2023 10:11:27 +0000 +Subject: PATCH 109/157 BackportSME aarch64: Add a VNx1TI mode + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=80fc055cf00fee4b1f9f19f77c8880b12226e086 + +Although TI isn't really a native SVE element mode, it's convenient +for SME if we define VNx1TI anyway, so that it can be used to +distinguish .Q ZA operations from others. It's purely an RTL +convenience and isn't (yet) a valid storage mode. + +gcc/ + * config/aarch64/aarch64-modes.def: Add VNx1TI. +--- + gcc/config/aarch64/aarch64-modes.def | 21 ++++++++++++++------- + 1 file changed, 14 insertions(+), 7 deletions(-) + +diff --git a/gcc/config/aarch64/aarch64-modes.def b/gcc/config/aarch64/aarch64-modes.def +index 8f399225a..8fa66fdb3 100644 +--- a/gcc/config/aarch64/aarch64-modes.def ++++ b/gcc/config/aarch64/aarch64-modes.def +@@ -146,7 +146,7 @@ ADV_SIMD_Q_REG_STRUCT_MODES (4, V4x16, V4x8, V4x4, V4x2) + for 8-bit, 16-bit, 32-bit and 64-bit elements respectively. It isn't + strictly necessary to set the alignment here, since the default would + be clamped to BIGGEST_ALIGNMENT anyhow, but it seems clearer. */ +-#define SVE_MODES(NVECS, VB, VH, VS, VD) \ ++#define SVE_MODES(NVECS, VB, VH, VS, VD, VT) \ + VECTOR_MODES_WITH_PREFIX (VNx, INT, 16 * NVECS, NVECS == 1 ? 1 : 4); \ + VECTOR_MODES_WITH_PREFIX (VNx, FLOAT, 16 * NVECS, NVECS == 1 ? 1 : 4); \ + \ +@@ -154,6 +154,7 @@ ADV_SIMD_Q_REG_STRUCT_MODES (4, V4x16, V4x8, V4x4, V4x2) + ADJUST_NUNITS (VH##HI, aarch64_sve_vg * NVECS * 4); \ + ADJUST_NUNITS (VS##SI, aarch64_sve_vg * NVECS * 2); \ + ADJUST_NUNITS (VD##DI, aarch64_sve_vg * NVECS); \ ++ ADJUST_NUNITS (VT##TI, exact_div (aarch64_sve_vg * NVECS, 2)); \ + ADJUST_NUNITS (VH##BF, aarch64_sve_vg * NVECS * 4); \ + ADJUST_NUNITS (VH##HF, aarch64_sve_vg * NVECS * 4); \ + ADJUST_NUNITS (VS##SF, aarch64_sve_vg * NVECS * 2); \ +@@ -163,17 +164,23 @@ ADV_SIMD_Q_REG_STRUCT_MODES (4, V4x16, V4x8, V4x4, V4x2) + ADJUST_ALIGNMENT (VH##HI, 16); \ + ADJUST_ALIGNMENT (VS##SI, 16); \ + ADJUST_ALIGNMENT (VD##DI, 16); \ ++ ADJUST_ALIGNMENT (VT##TI, 16); \ + ADJUST_ALIGNMENT (VH##BF, 16); \ + ADJUST_ALIGNMENT (VH##HF, 16); \ + ADJUST_ALIGNMENT (VS##SF, 16); \ + ADJUST_ALIGNMENT (VD##DF, 16); + +-/* Give SVE vectors the names normally used for 256-bit vectors. +- The actual number depends on command-line flags. */ +-SVE_MODES (1, VNx16, VNx8, VNx4, VNx2) +-SVE_MODES (2, VNx32, VNx16, VNx8, VNx4) +-SVE_MODES (3, VNx48, VNx24, VNx12, VNx6) +-SVE_MODES (4, VNx64, VNx32, VNx16, VNx8) ++/* Give SVE vectors names of the form VNxX, where X describes what is ++ stored in each 128-bit unit. The actual size of the mode depends ++ on command-line flags. ++ ++ VNx1TI isn't really a native SVE mode, but it can be useful in some ++ limited situations. */ ++VECTOR_MODE_WITH_PREFIX (VNx, INT, TI, 1, 1); ++SVE_MODES (1, VNx16, VNx8, VNx4, VNx2, VNx1) ++SVE_MODES (2, VNx32, VNx16, VNx8, VNx4, VNx2) ++SVE_MODES (3, VNx48, VNx24, VNx12, VNx6, VNx3) ++SVE_MODES (4, VNx64, VNx32, VNx16, VNx8, VNx4) + + /* Partial SVE vectors: + +-- +2.33.0 +
View file
_service:tar_scm:0209-Backport-SME-aarch64-Generalise-unspec_based_functio.patch
Added
@@ -0,0 +1,118 @@ +From e3c0d3d98ab1f60900533f3f75c598f899f37c9f Mon Sep 17 00:00:00 2001 +From: Richard Sandiford <richard.sandiford@arm.com> +Date: Tue, 5 Dec 2023 10:11:27 +0000 +Subject: PATCH 110/157 BackportSME aarch64: Generalise + unspec_based_function_base + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=1ec23d5a29bc5d89cef60e2aba2fe4095ee12a8f + +Until now, SVE intrinsics that map directly to unspecs +have always used type suffix 0 to distinguish between signed +integers, unsigned integers, and floating-point values. +SME adds functions that need to use type suffix 1 instead. +This patch generalises the classes accordingly. + +gcc/ + * config/aarch64/aarch64-sve-builtins-functions.h + (unspec_based_function_base): Allow type suffix 1 to determine + the mode of the operation. + (unspec_based_function): Update accordingly. + (unspec_based_fused_function): Likewise. + (unspec_based_fused_lane_function): Likewise. +--- + .../aarch64/aarch64-sve-builtins-functions.h | 29 ++++++++++++------- + 1 file changed, 18 insertions(+), 11 deletions(-) + +diff --git a/gcc/config/aarch64/aarch64-sve-builtins-functions.h b/gcc/config/aarch64/aarch64-sve-builtins-functions.h +index 94a6d1207..f5fa4030c 100644 +--- a/gcc/config/aarch64/aarch64-sve-builtins-functions.h ++++ b/gcc/config/aarch64/aarch64-sve-builtins-functions.h +@@ -250,18 +250,21 @@ class unspec_based_function_base : public function_base + public: + CONSTEXPR unspec_based_function_base (int unspec_for_sint, + int unspec_for_uint, +- int unspec_for_fp) ++ int unspec_for_fp, ++ unsigned int suffix_index = 0) + : m_unspec_for_sint (unspec_for_sint), + m_unspec_for_uint (unspec_for_uint), +- m_unspec_for_fp (unspec_for_fp) ++ m_unspec_for_fp (unspec_for_fp), ++ m_suffix_index (suffix_index) + {} + + /* Return the unspec code to use for INSTANCE, based on type suffix 0. */ + int + unspec_for (const function_instance &instance) const + { +- return (!instance.type_suffix (0).integer_p ? m_unspec_for_fp +- : instance.type_suffix (0).unsigned_p ? m_unspec_for_uint ++ auto &suffix = instance.type_suffix (m_suffix_index); ++ return (!suffix.integer_p ? m_unspec_for_fp ++ : suffix.unsigned_p ? m_unspec_for_uint + : m_unspec_for_sint); + } + +@@ -270,6 +273,9 @@ public: + int m_unspec_for_sint; + int m_unspec_for_uint; + int m_unspec_for_fp; ++ ++ /* Which type suffix is used to choose between the unspecs. */ ++ unsigned int m_suffix_index; + }; + + /* A function_base for functions that have an associated unspec code. +@@ -336,7 +342,8 @@ public: + rtx + expand (function_expander &e) const OVERRIDE + { +- return e.use_exact_insn (CODE (unspec_for (e), e.vector_mode (0))); ++ return e.use_exact_insn (CODE (unspec_for (e), ++ e.vector_mode (m_suffix_index))); + } + }; + +@@ -395,16 +402,16 @@ public: + { + int unspec = unspec_for (e); + insn_code icode; +- if (e.type_suffix (0).float_p) ++ if (e.type_suffix (m_suffix_index).float_p) + { + /* Put the operands in the normal (fma ...) order, with the accumulator + last. This fits naturally since that's also the unprinted operand + in the asm output. */ + e.rotate_inputs_left (0, e.pred != PRED_none ? 4 : 3); +- icode = code_for_aarch64_sve (unspec, e.vector_mode (0)); ++ icode = code_for_aarch64_sve (unspec, e.vector_mode (m_suffix_index)); + } + else +- icode = INT_CODE (unspec, e.vector_mode (0)); ++ icode = INT_CODE (unspec, e.vector_mode (m_suffix_index)); + return e.use_exact_insn (icode); + } + }; +@@ -430,16 +437,16 @@ public: + { + int unspec = unspec_for (e); + insn_code icode; +- if (e.type_suffix (0).float_p) ++ if (e.type_suffix (m_suffix_index).float_p) + { + /* Put the operands in the normal (fma ...) order, with the accumulator + last. This fits naturally since that's also the unprinted operand + in the asm output. */ + e.rotate_inputs_left (0, e.pred != PRED_none ? 5 : 4); +- icode = code_for_aarch64_lane (unspec, e.vector_mode (0)); ++ icode = code_for_aarch64_lane (unspec, e.vector_mode (m_suffix_index)); + } + else +- icode = INT_CODE (unspec, e.vector_mode (0)); ++ icode = INT_CODE (unspec, e.vector_mode (m_suffix_index)); + return e.use_exact_insn (icode); + } + }; +-- +2.33.0 +
View file
_service:tar_scm:0210-Backport-SME-aarch64-Generalise-_m-rules-for-SVE-int.patch
Added
@@ -0,0 +1,117 @@ +From 3d721b42c97baba562b77988cec0fec229217519 Mon Sep 17 00:00:00 2001 +From: Richard Sandiford <richard.sandiford@arm.com> +Date: Tue, 5 Dec 2023 10:11:28 +0000 +Subject: PATCH 111/157 BackportSME aarch64: Generalise _m rules for SVE + intrinsics + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=8de9304d94d4ec42863a25c1cb1a1ba9a1e3e0fe + +In SVE there was a simple rule that unary merging (_m) intrinsics +had a separate initial argument to specify the values of inactive +lanes, whereas other merging functions took inactive lanes from +the first operand to the operation. + +That rule began to break down in SVE2, and it continues to do +so in SME. This patch therefore adds a virtual function to +specify whether the separate initial argument is present or not. +The old rule is still the default. + +gcc/ + * config/aarch64/aarch64-sve-builtins.h + (function_shape::has_merge_argument_p): New member function. + * config/aarch64/aarch64-sve-builtins.cc: + (function_resolver::check_gp_argument): Use it. + (function_expander::get_fallback_value): Likewise. + * config/aarch64/aarch64-sve-builtins-shapes.cc + (apply_predication): Likewise. + (unary_convert_narrowt_def::has_merge_argument_p): New function. +--- + gcc/config/aarch64/aarch64-sve-builtins-shapes.cc | 10 ++++++++-- + gcc/config/aarch64/aarch64-sve-builtins.cc | 4 ++-- + gcc/config/aarch64/aarch64-sve-builtins.h | 13 +++++++++++++ + 3 files changed, 23 insertions(+), 4 deletions(-) + +diff --git a/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc b/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc +index 95e40d8f3..c536949ba 100644 +--- a/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc ++++ b/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc +@@ -66,8 +66,8 @@ apply_predication (const function_instance &instance, tree return_type, + the same type as the result. For unary_convert_narrowt it also + provides the "bottom" half of active elements, and is present + for all types of predication. */ +- if ((argument_types.length () == 2 && instance.pred == PRED_m) +- || instance.shape == shapes::unary_convert_narrowt) ++ auto nargs = argument_types.length () - 1; ++ if (instance.shape->has_merge_argument_p (instance, nargs)) + argument_types.quick_insert (0, return_type); + } + } +@@ -3271,6 +3271,12 @@ SHAPE (unary_convert) + predicate. */ + struct unary_convert_narrowt_def : public overloaded_base<1> + { ++ bool ++ has_merge_argument_p (const function_instance &, unsigned int) const override ++ { ++ return true; ++ } ++ + void + build (function_builder &b, const function_group_info &group) const OVERRIDE + { +diff --git a/gcc/config/aarch64/aarch64-sve-builtins.cc b/gcc/config/aarch64/aarch64-sve-builtins.cc +index 5f3a2baea..3441b4294 100644 +--- a/gcc/config/aarch64/aarch64-sve-builtins.cc ++++ b/gcc/config/aarch64/aarch64-sve-builtins.cc +@@ -2287,7 +2287,7 @@ function_resolver::check_gp_argument (unsigned int nops, + if (pred != PRED_none) + { + /* Unary merge operations should use resolve_unary instead. */ +- gcc_assert (nops != 1 || pred != PRED_m); ++ gcc_assert (!shape->has_merge_argument_p (*this, nops)); + nargs = nops + 1; + if (!check_num_arguments (nargs) + || !require_vector_type (i, VECTOR_TYPE_svbool_t)) +@@ -2931,7 +2931,7 @@ function_expander::get_fallback_value (machine_mode mode, unsigned int nops, + + gcc_assert (pred == PRED_m || pred == PRED_x); + if (merge_argno == DEFAULT_MERGE_ARGNO) +- merge_argno = nops == 1 && pred == PRED_m ? 0 : 1; ++ merge_argno = shape->has_merge_argument_p (*this, nops) ? 0 : 1; + + if (merge_argno == 0) + return argsargno++; +diff --git a/gcc/config/aarch64/aarch64-sve-builtins.h b/gcc/config/aarch64/aarch64-sve-builtins.h +index 7132b6e77..f16ac3947 100644 +--- a/gcc/config/aarch64/aarch64-sve-builtins.h ++++ b/gcc/config/aarch64/aarch64-sve-builtins.h +@@ -710,6 +710,9 @@ public: + class function_shape + { + public: ++ virtual bool has_merge_argument_p (const function_instance &, ++ unsigned int) const; ++ + virtual bool explicit_type_suffix_p (unsigned int) const = 0; + + /* True if the group suffix is present in overloaded names. +@@ -982,6 +985,16 @@ function_base::vectors_per_tuple (const function_instance &instance) const + return instance.group_suffix ().vectors_per_tuple; + } + ++/* Return true if INSTANCE (which has NARGS arguments) has an initial ++ vector argument whose only purpose is to specify the values of ++ inactive lanes. */ ++inline bool ++function_shape::has_merge_argument_p (const function_instance &instance, ++ unsigned int nargs) const ++{ ++ return nargs == 1 && instance.pred == PRED_m; ++} ++ + /* Return the mode of the result of a call. */ + inline machine_mode + function_expander::result_mode () const +-- +2.33.0 +
View file
_service:tar_scm:0211-Backport-SME-aarch64-Add-support-for-arm_sme.h.patch
Added
@@ -0,0 +1,15955 @@ +From 6c651a11f8e68244c4c53ad7b29983f54a3bc737 Mon Sep 17 00:00:00 2001 +From: Richard Sandiford <richard.sandiford@arm.com> +Date: Tue, 5 Dec 2023 10:11:28 +0000 +Subject: PATCH 112/157 BackportSME aarch64: Add support for <arm_sme.h> + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=4f6ab9537051e156d52bd8e9df40107ba6685895 + +This adds support for the SME parts of arm_sme.h. + +gcc/ + * doc/invoke.texi: Document +sme-i16i64 and +sme-f64f64. + * config.gcc (aarch64*-*-*): Add arm_sme.h to the list of headers + to install and aarch64-sve-builtins-sme.o to the list of objects + to build. + * config/aarch64/aarch64-c.cc (aarch64_update_cpp_builtins): Define + or undefine TARGET_SME, TARGET_SME_I16I64 and TARGET_SME_F64F64. + (aarch64_pragma_aarch64): Handle arm_sme.h. + * config/aarch64/aarch64-option-extensions.def (sme-i16i64) + (sme-f64f64): New extensions. + * config/aarch64/aarch64-protos.h (aarch64_sme_vq_immediate) + (aarch64_addsvl_addspl_immediate_p, aarch64_output_addsvl_addspl) + (aarch64_output_sme_zero_za): Declare. + (aarch64_output_move_struct): Delete. + (aarch64_sme_ldr_vnum_offset): Declare. + (aarch64_sve::handle_arm_sme_h): Likewise. + * config/aarch64/aarch64.h (AARCH64_ISA_SM_ON): New macro. + (AARCH64_ISA_SME_I16I64, AARCH64_ISA_SME_F64F64): Likewise. + (TARGET_STREAMING, TARGET_STREAMING_SME): Likewise. + (TARGET_SME_I16I64, TARGET_SME_F64F64): Likewise. + * config/aarch64/aarch64.cc (aarch64_sve_rdvl_factor_p): Rename to... + (aarch64_sve_rdvl_addvl_factor_p): ...this. + (aarch64_sve_rdvl_immediate_p): Update accordingly. + (aarch64_rdsvl_immediate_p, aarch64_add_offset): Likewise. + (aarch64_sme_vq_immediate): Likewise. Make public. + (aarch64_sve_addpl_factor_p): New function. + (aarch64_sve_addvl_addpl_immediate_p): Use + aarch64_sve_rdvl_addvl_factor_p and aarch64_sve_addpl_factor_p. + (aarch64_addsvl_addspl_immediate_p): New function. + (aarch64_output_addsvl_addspl): Likewise. + (aarch64_cannot_force_const_mem): Return true for RDSVL immediates. + (aarch64_classify_index): Handle .Q scaling for VNx1TImode. + (aarch64_classify_address): Likewise for vnum offsets. + (aarch64_output_sme_zero_za): New function. + (aarch64_sme_ldr_vnum_offset_p): Likewise. + * config/aarch64/predicates.md (aarch64_addsvl_addspl_immediate): + New predicate. + (aarch64_pluslong_operand): Include it for SME. + * config/aarch64/constraints.md (Ucj, Uav): New constraints. + * config/aarch64/iterators.md (VNx1TI_ONLY): New mode iterator. + (SME_ZA_I, SME_ZA_SDI, SME_ZA_SDF_I, SME_MOP_BHI): Likewise. + (SME_MOP_HSDF): Likewise. + (UNSPEC_SME_ADDHA, UNSPEC_SME_ADDVA, UNSPEC_SME_FMOPA) + (UNSPEC_SME_FMOPS, UNSPEC_SME_LD1_HOR, UNSPEC_SME_LD1_VER) + (UNSPEC_SME_READ_HOR, UNSPEC_SME_READ_VER, UNSPEC_SME_SMOPA) + (UNSPEC_SME_SMOPS, UNSPEC_SME_ST1_HOR, UNSPEC_SME_ST1_VER) + (UNSPEC_SME_SUMOPA, UNSPEC_SME_SUMOPS, UNSPEC_SME_UMOPA) + (UNSPEC_SME_UMOPS, UNSPEC_SME_USMOPA, UNSPEC_SME_USMOPS) + (UNSPEC_SME_WRITE_HOR, UNSPEC_SME_WRITE_VER): New unspecs. + (elem_bits): Handle x2 and x4 structure modes, plus VNx1TI. + (Vetype, Vesize, VPRED): Handle VNx1TI. + (b): New mode attribute. + (SME_LD1, SME_READ, SME_ST1, SME_WRITE, SME_BINARY_SDI, SME_INT_MOP) + (SME_FP_MOP): New int iterators. + (optab): Handle SME unspecs. + (hv): New int attribute. + * config/aarch64/aarch64.md (*add<mode>3_aarch64): Handle ADDSVL + and ADDSPL. + * config/aarch64/aarch64-sme.md (UNSPEC_SME_LDR): New unspec. + (@aarch64_sme_<optab><mode>, @aarch64_sme_<optab><mode>_plus) + (aarch64_sme_ldr0, @aarch64_sme_ldrn<mode>): New patterns. + (UNSPEC_SME_STR): New unspec. + (@aarch64_sme_<optab><mode>, @aarch64_sme_<optab><mode>_plus) + (aarch64_sme_str0, @aarch64_sme_strn<mode>): New patterns. + (@aarch64_sme_<optab><v_int_container><mode>): Likewise. + (*aarch64_sme_<optab><v_int_container><mode>_plus): Likewise. + (@aarch64_sme_<optab><VNx1TI_ONLY:mode><SVE_FULL:mode>): Likewise. + (@aarch64_sme_<optab><v_int_container><mode>): Likewise. + (*aarch64_sme_<optab><v_int_container><mode>_plus): Likewise. + (@aarch64_sme_<optab><VNx1TI_ONLY:mode><SVE_FULL:mode>): Likewise. + (UNSPEC_SME_ZERO): New unspec. + (aarch64_sme_zero): New pattern. + (@aarch64_sme_<SME_BINARY_SDI:optab><mode>): Likewise. + (@aarch64_sme_<SME_INT_MOP:optab><mode>): Likewise. + (@aarch64_sme_<SME_FP_MOP:optab><mode>): Likewise. + * config/aarch64/aarch64-sve-builtins.def: Add ZA type suffixes. + Include aarch64-sve-builtins-sme.def. + (DEF_SME_ZA_FUNCTION): New macro. + * config/aarch64/aarch64-sve-builtins.h (CP_READ_ZA): New call + property. + (CP_WRITE_ZA): Likewise. + (PRED_za_m): New predication type. + (type_suffix_index): Handle DEF_SME_ZA_SUFFIX. + (type_suffix_info): Add vector_p and za_p fields. + (function_instance::num_za_tiles): New member function. + (function_builder::get_attributes): Add an aarch64_feature_flags + argument. + (function_expander::get_contiguous_base): Take a base argument + number, a vnum argument number, and an argument that indicates + whether the vnum parameter is a factor of the SME vector length + or the prevailing vector length. + (function_expander::add_integer_operand): Take a poly_int64. + (sve_switcher::sve_switcher): Take a base set of flags. + (sme_switcher): New class. + (scalar_types): Add a null entry for NUM_VECTOR_TYPES. + * config/aarch64/aarch64-sve-builtins.cc: Include + aarch64-sve-builtins-sme.h. + (pred_suffixes): Add an entry for PRED_za_m. + (type_suffixes): Initialize vector_p and za_p. Handle ZA suffixes. + (TYPES_all_za, TYPES_d_za, TYPES_za_bhsd_data, TYPES_za_all_data) + (TYPES_za_s_integer, TYPES_za_d_integer, TYPES_mop_base) + (TYPES_mop_base_signed, TYPES_mop_base_unsigned, TYPES_mop_i16i64) + (TYPES_mop_i16i64_signed, TYPES_mop_i16i64_unsigned, TYPES_za): New + type suffix macros. + (preds_m, preds_za_m): New predication lists. + (function_groups): Handle DEF_SME_ZA_FUNCTION. + (scalar_types): Add an entry for NUM_VECTOR_TYPES. + (find_type_suffix_for_scalar_type): Check positively for vectors + rather than negatively for predicates. + (check_required_extensions): Handle PSTATE.SM and PSTATE.ZA + requirements. + (report_out_of_range): Handle the case where the minimum and + maximum are the same. + (function_instance::reads_global_state_p): Return true for functions + that read ZA. + (function_instance::modifies_global_state_p): Return true for functions + that write to ZA. + (sve_switcher::sve_switcher): Add a base flags argument. + (function_builder::get_name): Handle "__arm_" prefixes. + (add_attribute): Add an overload that takes a namespaces. + (add_shared_state_attribute): New function. + (function_builder::get_attributes): Take the required feature flags + as argument. Add streaming and ZA attributes where appropriate. + (function_builder::add_unique_function): Update calls accordingly. + (function_resolver::check_gp_argument): Assert that the predication + isn't ZA _m predication. + (function_checker::function_checker): Don't bias the argument + number for ZA _m predication. + (function_expander::get_contiguous_base): Add arguments that + specify the base argument number, the vnum argument number, + and an argument that indicates whether the vnum parameter is + a factor of the SME vector length or the prevailing vector length. + Handle the SME case. + (function_expander::add_input_operand): Handle pmode_register_operand. + (function_expander::add_integer_operand): Take a poly_int64. + (init_builtins): Call handle_arm_sme_h for LTO. + (handle_arm_sve_h): Skip SME intrinsics. + (handle_arm_sme_h): New function. + * config/aarch64/aarch64-sve-builtins-functions.h + (read_write_za, write_za): New classes. + (unspec_based_sme_function, za_arith_function): New using aliases. + (quiet_za_arith_function): Likewise. + * config/aarch64/aarch64-sve-builtins-shapes.h + (binary_za_int_m, binary_za_m, binary_za_uint_m, bool_inherent) + (inherent_za, inherent_mask_za, ldr_za, load_za, read_za_m, store_za) + (str_za, unary_za_m, write_za_m): Declare. + * config/aarch64/aarch64-sve-builtins-shapes.cc (apply_predication): + Expect za_m functions to have an existing governing predicate. + (binary_za_m_base, binary_za_int_m_def, binary_za_m_def): New classes. + (binary_za_uint_m_def, bool_inherent_def, inherent_za_def): Likewise. + (inherent_mask_za_def, ldr_za_def, load_za_def, read_za_m_def) + (store_za_def, str_za_def, unary_za_m_def, write_za_m_def): Likewise. + * config/aarch64/arm_sme.h: New file. + * config/aarch64/aarch64-sve-builtins-sme.h: Likewise. + * config/aarch64/aarch64-sve-builtins-sme.cc: Likewise. + * config/aarch64/aarch64-sve-builtins-sme.def: Likewise. + * config/aarch64/t-aarch64 (aarch64-sve-builtins.o): Depend on + aarch64-sve-builtins-sme.def and aarch64-sve-builtins-sme.h. + (aarch64-sve-builtins-sme.o): New rule. + +gcc/testsuite/ + * lib/target-supports.exp: Add sme and sme-i16i64 features. + * gcc.target/aarch64/pragma_cpp_predefs_4.c: Test __ARM_FEATURE_SME* + macros. + * gcc.target/aarch64/sve/acle/asm/test_sve_acle.h: Allow functions + to be marked as __arm_streaming, __arm_streaming_compatible, and + __arm_inout("za"). + * g++.target/aarch64/sve/acle/general-c++/func_redef_4.c: Mark the + function as __arm_streaming_compatible. + * g++.target/aarch64/sve/acle/general-c++/func_redef_5.c: Likewise. + * g++.target/aarch64/sve/acle/general-c++/func_redef_7.c: Likewise. + * gcc.target/aarch64/sve/acle/general-c/func_redef_4.c: Likewise. + * gcc.target/aarch64/sve/acle/general-c/func_redef_5.c: Likewise. + * g++.target/aarch64/sme/aarch64-sme-acle-asm.exp: New test harness. + * gcc.target/aarch64/sme/aarch64-sme-acle-asm.exp: Likewise. + * gcc.target/aarch64/sve/acle/general-c/binary_za_int_m_1.c: New test. + * gcc.target/aarch64/sve/acle/general-c/binary_za_m_1.c: Likewise. + * gcc.target/aarch64/sve/acle/general-c/binary_za_m_2.c: Likewise. + * gcc.target/aarch64/sve/acle/general-c/binary_za_uint_m_1.c: Likewise. + * gcc.target/aarch64/sve/acle/general-c/read_za_m_1.c: Likewise. + * gcc.target/aarch64/sve/acle/general-c/unary_za_m_1.c: Likewise. + * gcc.target/aarch64/sve/acle/general-c/write_za_m_1.c: Likewise. +--- + gcc/config.gcc | 4 +- + gcc/config/aarch64/aarch64-c.cc | 6 + + .../aarch64/aarch64-option-extensions.def | 4 + + gcc/config/aarch64/aarch64-protos.h | 8 +- + gcc/config/aarch64/aarch64-sme.md | 373 +++++++++++++++ + .../aarch64/aarch64-sve-builtins-functions.h | 64 +++ + .../aarch64/aarch64-sve-builtins-shapes.cc | 306 +++++++++++- + .../aarch64/aarch64-sve-builtins-shapes.h | 13 + + .../aarch64/aarch64-sve-builtins-sme.cc | 412 +++++++++++++++++ + .../aarch64/aarch64-sve-builtins-sme.def | 76 +++ + gcc/config/aarch64/aarch64-sve-builtins-sme.h | 57 +++ + gcc/config/aarch64/aarch64-sve-builtins.cc | 336 ++++++++++++-- + gcc/config/aarch64/aarch64-sve-builtins.def | 28 ++ + gcc/config/aarch64/aarch64-sve-builtins.h | 46 +- + gcc/config/aarch64/aarch64.cc | 140 +++++- + gcc/config/aarch64/aarch64.h | 15 + + gcc/config/aarch64/aarch64.md | 13 +- + gcc/config/aarch64/arm_sme.h | 45 ++ + gcc/config/aarch64/constraints.md | 9 + + gcc/config/aarch64/iterators.md | 94 +++- + gcc/config/aarch64/predicates.md | 8 +- + gcc/config/aarch64/t-aarch64 | 17 +- + gcc/doc/invoke.texi | 4 + + .../aarch64/sme/aarch64-sme-acle-asm.exp | 82 ++++ + .../sve/acle/general-c++/func_redef_4.c | 3 +- + .../sve/acle/general-c++/func_redef_5.c | 1 + + .../sve/acle/general-c++/func_redef_7.c | 1 + + .../gcc.target/aarch64/pragma_cpp_predefs_4.c | 38 ++ + .../aarch64/sme/aarch64-sme-acle-asm.exp | 81 ++++ + .../aarch64/sme/acle-asm/addha_za32.c | 48 ++ + .../aarch64/sme/acle-asm/addha_za64.c | 50 ++ + .../aarch64/sme/acle-asm/addva_za32.c | 48 ++ + .../aarch64/sme/acle-asm/addva_za64.c | 50 ++ + .../aarch64/sme/acle-asm/arm_has_sme_sc.c | 25 + + .../sme/acle-asm/arm_in_streaming_mode_ns.c | 11 + + .../sme/acle-asm/arm_in_streaming_mode_s.c | 11 + + .../sme/acle-asm/arm_in_streaming_mode_sc.c | 26 ++ + .../gcc.target/aarch64/sme/acle-asm/cntsb_s.c | 310 +++++++++++++ + .../aarch64/sme/acle-asm/cntsb_sc.c | 12 + + .../gcc.target/aarch64/sme/acle-asm/cntsd_s.c | 277 +++++++++++ + .../aarch64/sme/acle-asm/cntsd_sc.c | 13 + + .../gcc.target/aarch64/sme/acle-asm/cntsh_s.c | 279 +++++++++++ + .../aarch64/sme/acle-asm/cntsh_sc.c | 13 + + .../gcc.target/aarch64/sme/acle-asm/cntsw_s.c | 278 +++++++++++ + .../aarch64/sme/acle-asm/cntsw_sc.c | 13 + + .../aarch64/sme/acle-asm/ld1_hor_vnum_za128.c | 77 ++++ + .../aarch64/sme/acle-asm/ld1_hor_vnum_za16.c | 123 +++++ + .../aarch64/sme/acle-asm/ld1_hor_vnum_za32.c | 123 +++++ + .../aarch64/sme/acle-asm/ld1_hor_vnum_za64.c | 112 +++++ + .../aarch64/sme/acle-asm/ld1_hor_vnum_za8.c | 112 +++++ + .../aarch64/sme/acle-asm/ld1_hor_za128.c | 83 ++++ + .../aarch64/sme/acle-asm/ld1_hor_za16.c | 126 +++++ + .../aarch64/sme/acle-asm/ld1_hor_za32.c | 125 +++++ + .../aarch64/sme/acle-asm/ld1_hor_za64.c | 105 +++++ + .../aarch64/sme/acle-asm/ld1_hor_za8.c | 95 ++++ + .../aarch64/sme/acle-asm/ld1_ver_vnum_za128.c | 77 ++++ + .../aarch64/sme/acle-asm/ld1_ver_vnum_za16.c | 123 +++++ + .../aarch64/sme/acle-asm/ld1_ver_vnum_za32.c | 123 +++++ + .../aarch64/sme/acle-asm/ld1_ver_vnum_za64.c | 112 +++++ + .../aarch64/sme/acle-asm/ld1_ver_vnum_za8.c | 112 +++++ + .../aarch64/sme/acle-asm/ld1_ver_za128.c | 83 ++++ + .../aarch64/sme/acle-asm/ld1_ver_za16.c | 126 +++++ + .../aarch64/sme/acle-asm/ld1_ver_za32.c | 125 +++++ + .../aarch64/sme/acle-asm/ld1_ver_za64.c | 105 +++++ + .../aarch64/sme/acle-asm/ld1_ver_za8.c | 95 ++++ + .../aarch64/sme/acle-asm/ldr_vnum_za_s.c | 147 ++++++ + .../aarch64/sme/acle-asm/ldr_vnum_za_sc.c | 148 ++++++ + .../aarch64/sme/acle-asm/ldr_za_s.c | 124 +++++ + .../aarch64/sme/acle-asm/ldr_za_sc.c | 71 +++ + .../aarch64/sme/acle-asm/mopa_za32.c | 102 ++++ + .../aarch64/sme/acle-asm/mopa_za64.c | 70 +++ + .../aarch64/sme/acle-asm/mops_za32.c | 102 ++++ + .../aarch64/sme/acle-asm/mops_za64.c | 70 +++ + .../aarch64/sme/acle-asm/read_hor_za128.c | 435 ++++++++++++++++++ + .../aarch64/sme/acle-asm/read_hor_za16.c | 207 +++++++++ + .../aarch64/sme/acle-asm/read_hor_za32.c | 196 ++++++++ + .../aarch64/sme/acle-asm/read_hor_za64.c | 186 ++++++++ + .../aarch64/sme/acle-asm/read_hor_za8.c | 125 +++++ + .../aarch64/sme/acle-asm/read_ver_za128.c | 435 ++++++++++++++++++ + .../aarch64/sme/acle-asm/read_ver_za16.c | 207 +++++++++ + .../aarch64/sme/acle-asm/read_ver_za32.c | 196 ++++++++ + .../aarch64/sme/acle-asm/read_ver_za64.c | 186 ++++++++ + .../aarch64/sme/acle-asm/read_ver_za8.c | 125 +++++ + .../aarch64/sme/acle-asm/st1_hor_vnum_za128.c | 77 ++++ + .../aarch64/sme/acle-asm/st1_hor_vnum_za16.c | 123 +++++ + .../aarch64/sme/acle-asm/st1_hor_vnum_za32.c | 123 +++++ + .../aarch64/sme/acle-asm/st1_hor_vnum_za64.c | 112 +++++ + .../aarch64/sme/acle-asm/st1_hor_vnum_za8.c | 112 +++++ + .../aarch64/sme/acle-asm/st1_hor_za128.c | 83 ++++ + .../aarch64/sme/acle-asm/st1_hor_za16.c | 126 +++++ + .../aarch64/sme/acle-asm/st1_hor_za32.c | 125 +++++ + .../aarch64/sme/acle-asm/st1_hor_za64.c | 105 +++++ + .../aarch64/sme/acle-asm/st1_hor_za8.c | 95 ++++ + .../aarch64/sme/acle-asm/st1_ver_vnum_za128.c | 77 ++++ + .../aarch64/sme/acle-asm/st1_ver_vnum_za16.c | 123 +++++ + .../aarch64/sme/acle-asm/st1_ver_vnum_za32.c | 123 +++++ + .../aarch64/sme/acle-asm/st1_ver_vnum_za64.c | 112 +++++ + .../aarch64/sme/acle-asm/st1_ver_vnum_za8.c | 112 +++++ + .../aarch64/sme/acle-asm/st1_ver_za128.c | 83 ++++ + .../aarch64/sme/acle-asm/st1_ver_za16.c | 126 +++++ + .../aarch64/sme/acle-asm/st1_ver_za32.c | 125 +++++ + .../aarch64/sme/acle-asm/st1_ver_za64.c | 105 +++++ + .../aarch64/sme/acle-asm/st1_ver_za8.c | 95 ++++ + .../aarch64/sme/acle-asm/str_vnum_za_s.c | 147 ++++++ + .../aarch64/sme/acle-asm/str_vnum_za_sc.c | 148 ++++++ + .../aarch64/sme/acle-asm/str_za_s.c | 124 +++++ + .../aarch64/sme/acle-asm/str_za_sc.c | 71 +++ + .../aarch64/sme/acle-asm/sumopa_za32.c | 30 ++ + .../aarch64/sme/acle-asm/sumopa_za64.c | 32 ++ + .../aarch64/sme/acle-asm/sumops_za32.c | 30 ++ + .../aarch64/sme/acle-asm/sumops_za64.c | 32 ++ + .../aarch64/sme/acle-asm/test_sme_acle.h | 62 +++ + .../aarch64/sme/acle-asm/undef_za.c | 33 ++ + .../aarch64/sme/acle-asm/usmopa_za32.c | 30 ++ + .../aarch64/sme/acle-asm/usmopa_za64.c | 32 ++ + .../aarch64/sme/acle-asm/usmops_za32.c | 30 ++ + .../aarch64/sme/acle-asm/usmops_za64.c | 32 ++ + .../aarch64/sme/acle-asm/write_hor_za128.c | 193 ++++++++ + .../aarch64/sme/acle-asm/write_hor_za16.c | 133 ++++++ + .../aarch64/sme/acle-asm/write_hor_za32.c | 143 ++++++ + .../aarch64/sme/acle-asm/write_hor_za64.c | 133 ++++++ + .../aarch64/sme/acle-asm/write_hor_za8.c | 93 ++++ + .../aarch64/sme/acle-asm/write_ver_za128.c | 193 ++++++++ + .../aarch64/sme/acle-asm/write_ver_za16.c | 133 ++++++ + .../aarch64/sme/acle-asm/write_ver_za32.c | 143 ++++++ + .../aarch64/sme/acle-asm/write_ver_za64.c | 133 ++++++ + .../aarch64/sme/acle-asm/write_ver_za8.c | 93 ++++ + .../aarch64/sme/acle-asm/zero_mask_za.c | 130 ++++++ + .../gcc.target/aarch64/sme/acle-asm/zero_za.c | 11 + + .../aarch64/sve/acle/asm/test_sve_acle.h | 14 +- + .../sve/acle/general-c/binary_za_int_m_1.c | 50 ++ + .../sve/acle/general-c/binary_za_m_1.c | 49 ++ + .../sve/acle/general-c/binary_za_m_2.c | 11 + + .../sve/acle/general-c/binary_za_uint_m_1.c | 50 ++ + .../aarch64/sve/acle/general-c/func_redef_4.c | 3 +- + .../aarch64/sve/acle/general-c/func_redef_5.c | 1 + + .../aarch64/sve/acle/general-c/read_za_m_1.c | 48 ++ + .../aarch64/sve/acle/general-c/unary_za_m_1.c | 49 ++ + .../aarch64/sve/acle/general-c/write_za_m_1.c | 48 ++ + gcc/testsuite/lib/target-supports.exp | 3 +- + 140 files changed, 13816 insertions(+), 78 deletions(-) + create mode 100644 gcc/config/aarch64/aarch64-sve-builtins-sme.cc + create mode 100644 gcc/config/aarch64/aarch64-sve-builtins-sme.def + create mode 100644 gcc/config/aarch64/aarch64-sve-builtins-sme.h + create mode 100644 gcc/config/aarch64/arm_sme.h + create mode 100644 gcc/testsuite/g++.target/aarch64/sme/aarch64-sme-acle-asm.exp + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/aarch64-sme-acle-asm.exp + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/addha_za32.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/addha_za64.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/addva_za32.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/addva_za64.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/arm_has_sme_sc.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/arm_in_streaming_mode_ns.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/arm_in_streaming_mode_s.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/arm_in_streaming_mode_sc.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/cntsb_s.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/cntsb_sc.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/cntsd_s.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/cntsd_sc.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/cntsh_s.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/cntsh_sc.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/cntsw_s.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/cntsw_sc.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ld1_hor_vnum_za128.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ld1_hor_vnum_za16.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ld1_hor_vnum_za32.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ld1_hor_vnum_za64.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ld1_hor_vnum_za8.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ld1_hor_za128.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ld1_hor_za16.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ld1_hor_za32.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ld1_hor_za64.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ld1_hor_za8.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ld1_ver_vnum_za128.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ld1_ver_vnum_za16.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ld1_ver_vnum_za32.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ld1_ver_vnum_za64.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ld1_ver_vnum_za8.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ld1_ver_za128.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ld1_ver_za16.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ld1_ver_za32.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ld1_ver_za64.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ld1_ver_za8.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ldr_vnum_za_s.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ldr_vnum_za_sc.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ldr_za_s.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ldr_za_sc.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/mopa_za32.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/mopa_za64.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/mops_za32.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/mops_za64.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/read_hor_za128.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/read_hor_za16.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/read_hor_za32.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/read_hor_za64.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/read_hor_za8.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/read_ver_za128.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/read_ver_za16.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/read_ver_za32.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/read_ver_za64.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/read_ver_za8.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/st1_hor_vnum_za128.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/st1_hor_vnum_za16.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/st1_hor_vnum_za32.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/st1_hor_vnum_za64.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/st1_hor_vnum_za8.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/st1_hor_za128.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/st1_hor_za16.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/st1_hor_za32.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/st1_hor_za64.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/st1_hor_za8.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/st1_ver_vnum_za128.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/st1_ver_vnum_za16.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/st1_ver_vnum_za32.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/st1_ver_vnum_za64.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/st1_ver_vnum_za8.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/st1_ver_za128.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/st1_ver_za16.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/st1_ver_za32.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/st1_ver_za64.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/st1_ver_za8.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/str_vnum_za_s.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/str_vnum_za_sc.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/str_za_s.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/str_za_sc.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/sumopa_za32.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/sumopa_za64.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/sumops_za32.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/sumops_za64.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/test_sme_acle.h + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/undef_za.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/usmopa_za32.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/usmopa_za64.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/usmops_za32.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/usmops_za64.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/write_hor_za128.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/write_hor_za16.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/write_hor_za32.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/write_hor_za64.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/write_hor_za8.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/write_ver_za128.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/write_ver_za16.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/write_ver_za32.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/write_ver_za64.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/write_ver_za8.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/zero_mask_za.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/zero_za.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_za_int_m_1.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_za_m_1.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_za_m_2.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_za_uint_m_1.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/read_za_m_1.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/unary_za_m_1.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/write_za_m_1.c + +diff --git a/gcc/config.gcc b/gcc/config.gcc +index da66603cd..19b21a280 100644 +--- a/gcc/config.gcc ++++ b/gcc/config.gcc +@@ -325,11 +325,11 @@ m32c*-*-*) + ;; + aarch64*-*-*) + cpu_type=aarch64 +- extra_headers="arm_fp16.h arm_neon.h arm_bf16.h arm_acle.h arm_sve.h" ++ extra_headers="arm_fp16.h arm_neon.h arm_bf16.h arm_acle.h arm_sve.h arm_sme.h" + c_target_objs="aarch64-c.o" + cxx_target_objs="aarch64-c.o" + d_target_objs="aarch64-d.o" +- extra_objs="aarch64-builtins.o aarch-common.o aarch64-sve-builtins.o aarch64-sve-builtins-shapes.o aarch64-sve-builtins-base.o aarch64-sve-builtins-sve2.o cortex-a57-fma-steering.o aarch64-speculation.o falkor-tag-collision-avoidance.o aarch64-bti-insert.o aarch64-cc-fusion.o" ++ extra_objs="aarch64-builtins.o aarch-common.o aarch64-sve-builtins.o aarch64-sve-builtins-shapes.o aarch64-sve-builtins-base.o aarch64-sve-builtins-sve2.o aarch64-sve-builtins-sme.o cortex-a57-fma-steering.o aarch64-speculation.o falkor-tag-collision-avoidance.o aarch64-bti-insert.o aarch64-cc-fusion.o" + target_gtfiles="\$(srcdir)/config/aarch64/aarch64-builtins.cc \$(srcdir)/config/aarch64/aarch64-sve-builtins.h \$(srcdir)/config/aarch64/aarch64-sve-builtins.cc" + target_has_targetm_common=yes + ;; +diff --git a/gcc/config/aarch64/aarch64-c.cc b/gcc/config/aarch64/aarch64-c.cc +index 76c20848f..cb8a6c2fc 100644 +--- a/gcc/config/aarch64/aarch64-c.cc ++++ b/gcc/config/aarch64/aarch64-c.cc +@@ -250,6 +250,10 @@ aarch64_update_cpp_builtins (cpp_reader *pfile) + "__ARM_FEATURE_LS64", pfile); + aarch64_def_or_undef (AARCH64_ISA_RCPC, "__ARM_FEATURE_RCPC", pfile); + ++ aarch64_def_or_undef (TARGET_SME, "__ARM_FEATURE_SME", pfile); ++ aarch64_def_or_undef (TARGET_SME_I16I64, "__ARM_FEATURE_SME_I16I64", pfile); ++ aarch64_def_or_undef (TARGET_SME_F64F64, "__ARM_FEATURE_SME_F64F64", pfile); ++ + /* Not for ACLE, but required to keep "float.h" correct if we switch + target between implementations that do or do not support ARMv8.2-A + 16-bit floating-point extensions. */ +@@ -347,6 +351,8 @@ aarch64_pragma_aarch64 (cpp_reader *) + const char *name = TREE_STRING_POINTER (x); + if (strcmp (name, "arm_sve.h") == 0) + aarch64_sve::handle_arm_sve_h (); ++ else if (strcmp (name, "arm_sme.h") == 0) ++ aarch64_sve::handle_arm_sme_h (); + else if (strcmp (name, "arm_neon.h") == 0) + handle_arm_neon_h (); + else if (strcmp (name, "arm_acle.h") == 0) +diff --git a/gcc/config/aarch64/aarch64-option-extensions.def b/gcc/config/aarch64/aarch64-option-extensions.def +index faee64a79..98854dbce 100644 +--- a/gcc/config/aarch64/aarch64-option-extensions.def ++++ b/gcc/config/aarch64/aarch64-option-extensions.def +@@ -151,4 +151,8 @@ AARCH64_OPT_EXTENSION("mops", MOPS, (), (), (), "") + + AARCH64_OPT_EXTENSION("sme", SME, (BF16, SVE2), (), (), "sme") + ++AARCH64_OPT_EXTENSION("sme-i16i64", SME_I16I64, (SME), (), (), "") ++ ++AARCH64_OPT_EXTENSION("sme-f64f64", SME_F64F64, (SME), (), (), "") ++ + #undef AARCH64_OPT_EXTENSION +diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h +index 0883ddd1a..81900fa83 100644 +--- a/gcc/config/aarch64/aarch64-protos.h ++++ b/gcc/config/aarch64/aarch64-protos.h +@@ -809,7 +809,11 @@ bool aarch64_sve_vector_inc_dec_immediate_p (rtx); + int aarch64_add_offset_temporaries (rtx); + void aarch64_split_add_offset (scalar_int_mode, rtx, rtx, rtx, rtx, rtx); + bool aarch64_rdsvl_immediate_p (const_rtx); ++rtx aarch64_sme_vq_immediate (machine_mode mode, HOST_WIDE_INT, ++ aarch64_feature_flags); + char *aarch64_output_rdsvl (const_rtx); ++bool aarch64_addsvl_addspl_immediate_p (const_rtx); ++char *aarch64_output_addsvl_addspl (rtx); + bool aarch64_mov_operand_p (rtx, machine_mode); + rtx aarch64_reverse_mask (machine_mode, unsigned int); + bool aarch64_offset_7bit_signed_scaled_p (machine_mode, poly_int64); +@@ -853,6 +857,7 @@ int aarch64_movk_shift (const wide_int_ref &, const wide_int_ref &); + bool aarch64_is_mov_xn_imm (unsigned HOST_WIDE_INT); + bool aarch64_use_return_insn_p (void); + const char *aarch64_output_casesi (rtx *); ++const char *aarch64_output_sme_zero_za (rtx); + + arm_pcs aarch64_tlsdesc_abi_id (); + enum aarch64_symbol_type aarch64_classify_symbol (rtx, HOST_WIDE_INT); +@@ -867,7 +872,6 @@ int aarch64_uxt_size (int, HOST_WIDE_INT); + int aarch64_vec_fpconst_pow_of_2 (rtx); + rtx aarch64_eh_return_handler_rtx (void); + rtx aarch64_mask_from_zextract_ops (rtx, rtx); +-const char *aarch64_output_move_struct (rtx *operands); + rtx aarch64_return_addr_rtx (void); + rtx aarch64_return_addr (int, rtx); + rtx aarch64_simd_gen_const_vector_dup (machine_mode, HOST_WIDE_INT); +@@ -881,6 +885,7 @@ bool aarch64_sve_ldnf1_operand_p (rtx); + bool aarch64_sve_ldr_operand_p (rtx); + bool aarch64_sve_prefetch_operand_p (rtx, machine_mode); + bool aarch64_sve_struct_memory_operand_p (rtx); ++bool aarch64_sme_ldr_vnum_offset_p (rtx, rtx); + rtx aarch64_simd_vect_par_cnst_half (machine_mode, int, bool); + rtx aarch64_gen_stepped_int_parallel (unsigned int, int, int); + bool aarch64_stepped_int_parallel_p (rtx, int); +@@ -1000,6 +1005,7 @@ void handle_arm_neon_h (void); + namespace aarch64_sve { + void init_builtins (); + void handle_arm_sve_h (); ++ void handle_arm_sme_h (); + tree builtin_decl (unsigned, bool); + bool builtin_type_p (const_tree); + bool builtin_type_p (const_tree, unsigned int *, unsigned int *); +diff --git a/gcc/config/aarch64/aarch64-sme.md b/gcc/config/aarch64/aarch64-sme.md +index d4973098e..da0745f65 100644 +--- a/gcc/config/aarch64/aarch64-sme.md ++++ b/gcc/config/aarch64/aarch64-sme.md +@@ -24,6 +24,19 @@ + ;; ---- Test current state + ;; ---- PSTATE.SM management + ;; ---- PSTATE.ZA management ++;; ++;; == Loads, stores and moves ++;; ---- Single-vector loads ++;; ---- Single-vector stores ++;; ---- Single-vector moves ++;; ---- Zeroing ++;; ++;; == Binary arithmetic ++;; ---- Binary arithmetic on ZA tile ++;; ++;; == Ternary arithmetic ++;; ---- INT Sum of outer products ++;; ---- FP Sum of outer products + + ;; ========================================================================= + ;; == State management +@@ -456,3 +469,363 @@ + DONE; + } + ) ++ ++;; ========================================================================= ++;; == Loads, stores and moves ++;; ========================================================================= ++ ++;; ------------------------------------------------------------------------- ++;; ---- Single-vector loads ++;; ------------------------------------------------------------------------- ++;; Includes: ++;; - LD1 ++;; - LDR ++;; ------------------------------------------------------------------------- ++ ++(define_c_enum "unspec" ++ UNSPEC_SME_LDR ++) ++ ++(define_insn "@aarch64_sme_<optab><mode>" ++ (set (reg:SME_ZA_I ZA_REGNUM) ++ (unspec:SME_ZA_I ++ (reg:SME_ZA_I ZA_REGNUM) ++ (reg:DI SME_STATE_REGNUM) ++ (match_operand:DI 0 "const_int_operand") ++ (match_operand:SI 1 "register_operand" "Ucj") ++ (match_operand:<VPRED> 2 "register_operand" "Upl") ++ (match_operand:SME_ZA_I 3 "aarch64_sve_ldff1_operand" "Utf") ++ SME_LD1)) ++ "TARGET_STREAMING_SME" ++ "ld1<Vesize>\t{ za%0<hv>.<Vetype>%w1, 0 }, %2/z, %3" ++) ++ ++(define_insn "@aarch64_sme_<optab><mode>_plus" ++ (set (reg:SME_ZA_I ZA_REGNUM) ++ (unspec:SME_ZA_I ++ (reg:SME_ZA_I ZA_REGNUM) ++ (reg:DI SME_STATE_REGNUM) ++ (match_operand:DI 0 "const_int_operand") ++ (plus:SI (match_operand:SI 1 "register_operand" "Ucj") ++ (match_operand:SI 2 "const_int_operand")) ++ (match_operand:<VPRED> 3 "register_operand" "Upl") ++ (match_operand:SME_ZA_I 4 "aarch64_sve_ldff1_operand" "Utf") ++ SME_LD1)) ++ "TARGET_STREAMING_SME ++ && UINTVAL (operands2) < 128 / <elem_bits>" ++ "ld1<Vesize>\t{ za%0<hv>.<Vetype>%w1, %2 }, %3/z, %4" ++) ++ ++(define_insn "aarch64_sme_ldr0" ++ (set (reg:VNx16QI ZA_REGNUM) ++ (unspec:VNx16QI ++ (reg:VNx16QI ZA_REGNUM) ++ (reg:DI SME_STATE_REGNUM) ++ (match_operand:SI 0 "register_operand" "Ucj") ++ (mem:VNx16QI (match_operand 1 "pmode_register_operand" "rk")) ++ UNSPEC_SME_LDR)) ++ "TARGET_SME" ++ "ldr\tza%w0, 0, %1, #0, mul vl" ++) ++ ++(define_insn "@aarch64_sme_ldrn<mode>" ++ (set (reg:VNx16QI ZA_REGNUM) ++ (unspec:VNx16QI ++ (reg:VNx16QI ZA_REGNUM) ++ (reg:DI SME_STATE_REGNUM) ++ (plus:SI (match_operand:SI 0 "register_operand" "Ucj") ++ (match_operand:SI 1 "const_int_operand")) ++ (mem:VNx16QI ++ (plus:P (match_operand:P 2 "register_operand" "rk") ++ (match_operand:P 3 "aarch64_mov_operand"))) ++ UNSPEC_SME_LDR)) ++ "TARGET_SME ++ && aarch64_sme_ldr_vnum_offset_p (operands1, operands3)" ++ "ldr\tza%w0, %1, %2, #%1, mul vl" ++) ++ ++;; ------------------------------------------------------------------------- ++;; ---- Single-vector stores ++;; ------------------------------------------------------------------------- ++;; Includes: ++;; - ST1 ++;; - STR ++;; ------------------------------------------------------------------------- ++ ++(define_c_enum "unspec" ++ UNSPEC_SME_STR ++) ++ ++(define_insn "@aarch64_sme_<optab><mode>" ++ (set (match_operand:SME_ZA_I 0 "aarch64_sve_ldff1_operand" "+Utf") ++ (unspec:SME_ZA_I ++ (reg:SME_ZA_I ZA_REGNUM) ++ (reg:DI SME_STATE_REGNUM) ++ (match_dup 0) ++ (match_operand:DI 1 "const_int_operand") ++ (match_operand:SI 2 "register_operand" "Ucj") ++ (match_operand:<VPRED> 3 "register_operand" "Upl") ++ SME_ST1)) ++ "TARGET_STREAMING_SME" ++ "st1<Vesize>\t{ za%1<hv>.<Vetype>%w2, 0 }, %3, %0" ++) ++ ++(define_insn "@aarch64_sme_<optab><mode>_plus" ++ (set (match_operand:SME_ZA_I 0 "aarch64_sve_ldff1_operand" "+Utf") ++ (unspec:SME_ZA_I ++ (reg:SME_ZA_I ZA_REGNUM) ++ (reg:DI SME_STATE_REGNUM) ++ (match_dup 0) ++ (match_operand:DI 1 "const_int_operand") ++ (plus:SI (match_operand:SI 2 "register_operand" "Ucj") ++ (match_operand:SI 3 "const_int_operand")) ++ (match_operand:<VPRED> 4 "register_operand" "Upl") ++ SME_ST1)) ++ "TARGET_STREAMING_SME ++ && UINTVAL (operands3) < 128 / <elem_bits>" ++ "st1<Vesize>\t{ za%1<hv>.<Vetype>%w2, %3 }, %4, %0" ++) ++ ++(define_insn "aarch64_sme_str0" ++ (set (mem:VNx16QI (match_operand 1 "pmode_register_operand" "rk")) ++ (unspec:VNx16QI ++ (reg:VNx16QI ZA_REGNUM) ++ (reg:DI SME_STATE_REGNUM) ++ (mem:VNx16QI (match_dup 1)) ++ (match_operand:SI 0 "register_operand" "Ucj") ++ UNSPEC_SME_STR)) ++ "TARGET_SME" ++ "str\tza%w0, 0, %1, #0, mul vl" ++) ++ ++(define_insn "@aarch64_sme_strn<mode>" ++ (set (mem:VNx16QI ++ (plus:P (match_operand:P 2 "register_operand" "rk") ++ (match_operand:P 3 "aarch64_mov_operand"))) ++ (unspec:VNx16QI ++ (reg:VNx16QI ZA_REGNUM) ++ (reg:DI SME_STATE_REGNUM) ++ (mem:VNx16QI (plus:P (match_dup 2) (match_dup 3))) ++ (plus:SI (match_operand:SI 0 "register_operand" "Ucj") ++ (match_operand:SI 1 "const_int_operand")) ++ UNSPEC_SME_STR)) ++ "TARGET_SME ++ && aarch64_sme_ldr_vnum_offset_p (operands1, operands3)" ++ "str\tza%w0, %1, %2, #%1, mul vl" ++) ++ ++;; ------------------------------------------------------------------------- ++;; ---- Single-vector moves ++;; ------------------------------------------------------------------------- ++;; Includes: ++;; - MOVA ++;; ------------------------------------------------------------------------- ++ ++(define_insn "@aarch64_sme_<optab><v_int_container><mode>" ++ (set (match_operand:SVE_FULL 0 "register_operand" "=w") ++ (unspec:SVE_FULL ++ (reg:<V_INT_CONTAINER> ZA_REGNUM) ++ (reg:DI SME_STATE_REGNUM) ++ (match_operand:SVE_FULL 1 "register_operand" "0") ++ (match_operand:<VPRED> 2 "register_operand" "Upl") ++ (match_operand:DI 3 "const_int_operand") ++ (match_operand:SI 4 "register_operand" "Ucj") ++ SME_READ)) ++ "TARGET_STREAMING_SME" ++ "mova\t%0.<Vetype>, %2/m, za%3<hv>.<Vetype>%w4, 0" ++) ++ ++(define_insn "*aarch64_sme_<optab><v_int_container><mode>_plus" ++ (set (match_operand:SVE_FULL 0 "register_operand" "=w") ++ (unspec:SVE_FULL ++ (reg:<V_INT_CONTAINER> ZA_REGNUM) ++ (reg:DI SME_STATE_REGNUM) ++ (match_operand:SVE_FULL 1 "register_operand" "0") ++ (match_operand:<VPRED> 2 "register_operand" "Upl") ++ (match_operand:DI 3 "const_int_operand") ++ (plus:SI (match_operand:SI 4 "register_operand" "Ucj") ++ (match_operand:SI 5 "const_int_operand")) ++ SME_READ)) ++ "TARGET_STREAMING_SME ++ && UINTVAL (operands5) < 128 / <elem_bits>" ++ "mova\t%0.<Vetype>, %2/m, za%3<hv>.<Vetype>%w4, %5" ++) ++ ++(define_insn "@aarch64_sme_<optab><VNx1TI_ONLY:mode><SVE_FULL:mode>" ++ (set (match_operand:SVE_FULL 0 "register_operand" "=w") ++ (unspec:SVE_FULL ++ (reg:VNx1TI_ONLY ZA_REGNUM) ++ (reg:DI SME_STATE_REGNUM) ++ (match_operand:SVE_FULL 1 "register_operand" "0") ++ (match_operand:VNx2BI 2 "register_operand" "Upl") ++ (match_operand:DI 3 "const_int_operand") ++ (match_operand:SI 4 "register_operand" "Ucj") ++ SME_READ)) ++ "TARGET_STREAMING_SME" ++ "mova\t%0.q, %2/m, za%3<hv>.q%w4, 0" ++) ++ ++(define_insn "@aarch64_sme_<optab><v_int_container><mode>" ++ (set (reg:<V_INT_CONTAINER> ZA_REGNUM) ++ (unspec:<V_INT_CONTAINER> ++ (reg:SVE_FULL ZA_REGNUM) ++ (reg:DI SME_STATE_REGNUM) ++ (match_operand:DI 0 "const_int_operand") ++ (match_operand:SI 1 "register_operand" "Ucj") ++ (match_operand:<VPRED> 2 "register_operand" "Upl") ++ (match_operand:SVE_FULL 3 "register_operand" "w") ++ SME_WRITE)) ++ "TARGET_STREAMING_SME" ++ "mova\tza%0<hv>.<Vetype>%w1, 0, %2/m, %3.<Vetype>" ++) ++ ++(define_insn "*aarch64_sme_<optab><v_int_container><mode>_plus" ++ (set (reg:<V_INT_CONTAINER> ZA_REGNUM) ++ (unspec:<V_INT_CONTAINER> ++ (reg:SVE_FULL ZA_REGNUM) ++ (reg:DI SME_STATE_REGNUM) ++ (match_operand:DI 0 "const_int_operand") ++ (plus:SI (match_operand:SI 1 "register_operand" "Ucj") ++ (match_operand:SI 2 "const_int_operand")) ++ (match_operand:<VPRED> 3 "register_operand" "Upl") ++ (match_operand:SVE_FULL 4 "register_operand" "w") ++ SME_WRITE)) ++ "TARGET_STREAMING_SME ++ && UINTVAL (operands2) < 128 / <elem_bits>" ++ "mova\tza%0<hv>.<Vetype>%w1, %2, %3/m, %4.<Vetype>" ++) ++ ++(define_insn "@aarch64_sme_<optab><VNx1TI_ONLY:mode><SVE_FULL:mode>" ++ (set (reg:VNx1TI_ONLY ZA_REGNUM) ++ (unspec:VNx1TI_ONLY ++ (reg:VNx1TI_ONLY ZA_REGNUM) ++ (reg:DI SME_STATE_REGNUM) ++ (match_operand:DI 0 "const_int_operand") ++ (match_operand:SI 1 "register_operand" "Ucj") ++ (match_operand:VNx2BI 2 "register_operand" "Upl") ++ (match_operand:SVE_FULL 3 "register_operand" "w") ++ SME_WRITE)) ++ "TARGET_STREAMING_SME" ++ "mova\tza%0<hv>.q%w1, 0, %2/m, %3.q" ++) ++ ++;; ------------------------------------------------------------------------- ++;; ---- Zeroing ++;; ------------------------------------------------------------------------- ++;; Includes: ++;; - ZERO ++;; ------------------------------------------------------------------------- ++ ++(define_c_enum "unspec" UNSPEC_SME_ZERO) ++ ++(define_insn "aarch64_sme_zero_za" ++ (set (reg:VNx16QI ZA_REGNUM) ++ (unspec:VNx16QI (reg:VNx16QI ZA_REGNUM) ++ (reg:DI SME_STATE_REGNUM) ++ (match_operand:DI 0 "const_int_operand") ++ UNSPEC_SME_ZERO)) ++ "TARGET_SME" ++ { ++ return aarch64_output_sme_zero_za (operands0); ++ } ++) ++ ++;; ========================================================================= ++;; == Binary arithmetic ++;; ========================================================================= ++ ++;; ------------------------------------------------------------------------- ++;; ---- Binary arithmetic on ZA tile ++;; ------------------------------------------------------------------------- ++;; Includes: ++;; - ADDHA ++;; - ADDVA ++;; ------------------------------------------------------------------------- ++ ++(define_insn "@aarch64_sme_<optab><mode>" ++ (set (reg:SME_ZA_SDI ZA_REGNUM) ++ (unspec:SME_ZA_SDI ++ (reg:SME_ZA_SDI ZA_REGNUM) ++ (reg:DI SME_STATE_REGNUM) ++ (match_operand:DI 0 "const_int_operand") ++ (match_operand:<VPRED> 1 "register_operand" "Upl") ++ (match_operand:<VPRED> 2 "register_operand" "Upl") ++ (match_operand:SME_ZA_SDI 3 "register_operand" "w") ++ SME_BINARY_SDI)) ++ "TARGET_STREAMING_SME" ++ "<optab>\tza%0.<Vetype>, %1/m, %2/m, %3.<Vetype>" ++) ++ ++;; ========================================================================= ++;; == Ternary arithmetic ++;; ========================================================================= ++ ++;; ------------------------------------------------------------------------- ++;; ---- INT Sum of outer products ++;; ------------------------------------------------------------------------- ++;; Includes: ++;; - SMOPA ++;; - SMOPS ++;; - SUMOPA ++;; - SUMOPS ++;; - UMOPA ++;; - UMOPS ++;; - USMOPA ++;; - USMOPS ++;; ------------------------------------------------------------------------- ++ ++(define_insn "@aarch64_sme_<optab><VNx4SI_ONLY:mode><VNx16QI_ONLY:mode>" ++ (set (reg:VNx4SI_ONLY ZA_REGNUM) ++ (unspec:VNx4SI_ONLY ++ (reg:VNx4SI_ONLY ZA_REGNUM) ++ (reg:DI SME_STATE_REGNUM) ++ (match_operand:DI 0 "const_int_operand") ++ (match_operand:<VNx4SI_ONLY:VPRED> 1 "register_operand" "Upl") ++ (match_operand:<VNx4SI_ONLY:VPRED> 2 "register_operand" "Upl") ++ (match_operand:VNx16QI_ONLY 3 "register_operand" "w") ++ (match_operand:VNx16QI_ONLY 4 "register_operand" "w") ++ SME_INT_MOP)) ++ "TARGET_STREAMING_SME" ++ "<optab>\tza%0.s, %1/m, %2/m, %3.b, %4.b" ++) ++ ++(define_insn "@aarch64_sme_<optab><VNx2DI_ONLY:mode><VNx8HI_ONLY:mode>" ++ (set (reg:VNx2DI_ONLY ZA_REGNUM) ++ (unspec:VNx2DI_ONLY ++ (reg:VNx2DI_ONLY ZA_REGNUM) ++ (reg:DI SME_STATE_REGNUM) ++ (match_operand:DI 0 "const_int_operand") ++ (match_operand:<VNx2DI_ONLY:VPRED> 1 "register_operand" "Upl") ++ (match_operand:<VNx2DI_ONLY:VPRED> 2 "register_operand" "Upl") ++ (match_operand:VNx8HI_ONLY 3 "register_operand" "w") ++ (match_operand:VNx8HI_ONLY 4 "register_operand" "w") ++ SME_INT_MOP)) ++ "TARGET_STREAMING_SME && TARGET_SME_I16I64" ++ "<optab>\tza%0.d, %1/m, %2/m, %3.h, %4.h" ++) ++ ++;; ------------------------------------------------------------------------- ++;; ---- FP Sum of outer products ++;; ------------------------------------------------------------------------- ++;; Includes: ++;; - BFMOPA ++;; - BFMOPS ++;; - FMOPA ++;; - FMOPS ++;; ------------------------------------------------------------------------- ++ ++(define_insn "@aarch64_sme_<optab><SME_ZA_SDF_I:mode><SME_MOP_HSDF:mode>" ++ (set (reg:SME_ZA_SDF_I ZA_REGNUM) ++ (unspec:SME_ZA_SDF_I ++ (reg:SME_ZA_SDF_I ZA_REGNUM) ++ (reg:DI SME_STATE_REGNUM) ++ (match_operand:DI 0 "const_int_operand") ++ (match_operand:<SME_ZA_SDF_I:VPRED> 1 "register_operand" "Upl") ++ (match_operand:<SME_ZA_SDF_I:VPRED> 2 "register_operand" "Upl") ++ (match_operand:SME_MOP_HSDF 3 "register_operand" "w") ++ (match_operand:SME_MOP_HSDF 4 "register_operand" "w") ++ SME_FP_MOP)) ++ "TARGET_STREAMING_SME ++ && (<SME_ZA_SDF_I:elem_bits> == 32) == (<SME_MOP_HSDF:elem_bits> <= 32)" ++ "<b><optab>\tza%0.<SME_ZA_SDF_I:Vetype>, %1/m, %2/m, %3.<SME_MOP_HSDF:Vetype>, %4.<SME_MOP_HSDF:Vetype>" ++) +diff --git a/gcc/config/aarch64/aarch64-sve-builtins-functions.h b/gcc/config/aarch64/aarch64-sve-builtins-functions.h +index f5fa4030c..9dfce5c0e 100644 +--- a/gcc/config/aarch64/aarch64-sve-builtins-functions.h ++++ b/gcc/config/aarch64/aarch64-sve-builtins-functions.h +@@ -50,6 +50,27 @@ public: + } + }; + ++/* Wrap T, which is derived from function_base, and indicate that it ++ additionally has the call properties in PROPERTIES. */ ++template<typename T, unsigned int PROPERTIES> ++class add_call_properties : public T ++{ ++public: ++ using T::T; ++ ++ unsigned int ++ call_properties (const function_instance &fi) const override ++ { ++ return T::call_properties (fi) | PROPERTIES; ++ } ++}; ++ ++template<typename T> ++using read_write_za = add_call_properties<T, CP_READ_ZA | CP_WRITE_ZA>; ++ ++template<typename T> ++using write_za = add_call_properties<T, CP_WRITE_ZA>; ++ + /* A function_base that sometimes or always operates on tuples of + vectors. */ + class multi_vector_function : public function_base +@@ -383,6 +404,49 @@ typedef unspec_based_function_exact_insn<code_for_aarch64_sve_sub> + typedef unspec_based_function_exact_insn<code_for_aarch64_sve_sub_lane> + unspec_based_sub_lane_function; + ++/* General SME unspec-based functions, parameterized on the vector mode. */ ++class sme_1mode_function : public read_write_za<unspec_based_function_base> ++{ ++public: ++ using parent = read_write_za<unspec_based_function_base>; ++ ++ CONSTEXPR sme_1mode_function (int unspec_for_sint, int unspec_for_uint, ++ int unspec_for_fp) ++ : parent (unspec_for_sint, unspec_for_uint, unspec_for_fp, 1) ++ {} ++ ++ rtx ++ expand (function_expander &e) const override ++ { ++ auto icode = code_for_aarch64_sme (unspec_for (e), e.tuple_mode (1)); ++ return e.use_exact_insn (icode); ++ } ++}; ++ ++/* General SME unspec-based functions, parameterized on both the ZA mode ++ and the vector mode. */ ++template<insn_code (*CODE) (int, machine_mode, machine_mode)> ++class sme_2mode_function_t : public read_write_za<unspec_based_function_base> ++{ ++public: ++ using parent = read_write_za<unspec_based_function_base>; ++ ++ CONSTEXPR sme_2mode_function_t (int unspec_for_sint, int unspec_for_uint, ++ int unspec_for_fp) ++ : parent (unspec_for_sint, unspec_for_uint, unspec_for_fp, 1) ++ {} ++ ++ rtx ++ expand (function_expander &e) const override ++ { ++ insn_code icode = CODE (unspec_for (e), e.vector_mode (0), ++ e.tuple_mode (1)); ++ return e.use_exact_insn (icode); ++ } ++}; ++ ++using sme_2mode_function = sme_2mode_function_t<code_for_aarch64_sme>; ++ + /* A function that acts like unspec_based_function_exact_insn<INT_CODE> + when operating on integers, but that expands to an (fma ...)-style + aarch64_sve* operation when applied to floats. */ +diff --git a/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc b/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc +index c536949ba..bdde849c8 100644 +--- a/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc ++++ b/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc +@@ -59,7 +59,10 @@ static void + apply_predication (const function_instance &instance, tree return_type, + vec<tree> &argument_types) + { +- if (instance.pred != PRED_none) ++ /* There are currently no SME ZA instructions that have both merging and ++ unpredicated forms, so for simplicity, the predicates are always included ++ in the original format string. */ ++ if (instance.pred != PRED_none && instance.pred != PRED_za_m) + { + argument_types.quick_insert (0, get_svbool_t ()); + /* For unary merge operations, the first argument is a vector with +@@ -589,6 +592,33 @@ struct binary_imm_long_base : public overloaded_base<0> + } + }; + ++/* Base class for binary_za_m and similar shapes. */ ++template<type_class_index TCLASS = function_resolver::SAME_TYPE_CLASS, ++ unsigned int BITS = function_resolver::SAME_SIZE> ++struct binary_za_m_base : public overloaded_base<1> ++{ ++ tree ++ resolve (function_resolver &r) const override ++ { ++ type_suffix_index type; ++ if (!r.check_num_arguments (5) ++ || !r.require_integer_immediate (0) ++ || !r.require_vector_type (1, VECTOR_TYPE_svbool_t) ++ || !r.require_vector_type (2, VECTOR_TYPE_svbool_t) ++ || (type = r.infer_vector_type (3)) == NUM_TYPE_SUFFIXES ++ || !r.require_derived_vector_type (4, 3, type, TCLASS, BITS)) ++ return error_mark_node; ++ ++ return r.resolve_to (r.mode_suffix_id, r.type_suffix_ids0, type); ++ } ++ ++ bool ++ check (function_checker &c) const override ++ { ++ return c.require_immediate_range (0, 0, c.num_za_tiles () - 1); ++ } ++}; ++ + /* Base class for inc_dec and inc_dec_pat. */ + struct inc_dec_base : public overloaded_base<0> + { +@@ -1576,6 +1606,68 @@ struct binary_wide_opt_n_def : public overloaded_base<0> + }; + SHAPE (binary_wide_opt_n) + ++/* void svfoo_t0_t1_g(uint64_t, svbool_t, svbool_t, sv<t1>x<g>_t, ++ sv<t1:int>x<g>_t) ++ ++ where the first argument is a ZA tile. */ ++struct binary_za_int_m_def : public binary_za_m_base<TYPE_signed> ++{ ++ void ++ build (function_builder &b, const function_group_info &group) const override ++ { ++ b.add_overloaded_functions (group, MODE_none); ++ build_all (b, "_,su64,vp,vp,t1,ts1", group, MODE_none); ++ } ++}; ++SHAPE (binary_za_int_m) ++ ++/* void svfoo_t0_t1_g(uint64_t, svbool_t, svbool_t, sv<t1>x<g>_t, ++ sv<t1>x<g>_t) ++ ++ where the first argument is a ZA tile. */ ++struct binary_za_m_def : public binary_za_m_base<> ++{ ++ void ++ build (function_builder &b, const function_group_info &group) const override ++ { ++ b.add_overloaded_functions (group, MODE_none); ++ /* Allow the overloaded form to be specified seperately, with just ++ a single suffix. This is necessary for the 64-bit SME MOP intrinsics, ++ which have some forms dependent on FEAT_SME_I16I64 and some forms ++ dependent on FEAT_SME_F64F64. The resolver needs to be defined ++ for base SME. */ ++ if (group.types01 != NUM_TYPE_SUFFIXES) ++ build_all (b, "_,su64,vp,vp,t1,t1", group, MODE_none); ++ } ++}; ++SHAPE (binary_za_m) ++ ++/* void svfoo_t0_t1_g(uint64_t, svbool_t, svbool_t, sv<t1>x<g>_t, ++ sv<t1:uint>x<g>_t) ++ ++ where the first argument is a ZA tile. */ ++struct binary_za_uint_m_def : public binary_za_m_base<TYPE_unsigned> ++{ ++ void ++ build (function_builder &b, const function_group_info &group) const override ++ { ++ b.add_overloaded_functions (group, MODE_none); ++ build_all (b, "_,su64,vp,vp,t1,tu1", group, MODE_none); ++ } ++}; ++SHAPE (binary_za_uint_m) ++ ++/* bool svfoo(). */ ++struct bool_inherent_def : public nonoverloaded_base ++{ ++ void ++ build (function_builder &b, const function_group_info &group) const override ++ { ++ build_all (b, "sp", group, MODE_none); ++ } ++}; ++SHAPE (bool_inherent) ++ + /* sv<t0>_t svfoo_t0(sv<t0>_t, sv<t0>_t) + <t0>_t svfoo_n_t0(<t0>_t, sv<t0>_t). */ + struct clast_def : public overloaded_base<0> +@@ -2055,6 +2147,51 @@ struct inherent_b_def : public overloaded_base<0> + }; + SHAPE (inherent_b) + ++/* void svfoo_t0(). */ ++struct inherent_za_def : public nonoverloaded_base ++{ ++ void ++ build (function_builder &b, const function_group_info &group) const override ++ { ++ build_all (b, "_", group, MODE_none); ++ } ++}; ++SHAPE (inherent_za) ++ ++/* void svfoo_t0(uint64_t) ++ ++ where the argument is an integer constant that specifies an 8-bit mask. */ ++struct inherent_mask_za_def : public nonoverloaded_base ++{ ++ void ++ build (function_builder &b, const function_group_info &group) const override ++ { ++ build_all (b, "_,su64", group, MODE_none); ++ } ++ ++ bool ++ check (function_checker &c) const override ++ { ++ return c.require_immediate_range (0, 0, 255); ++ } ++}; ++SHAPE (inherent_mask_za) ++ ++/* void svfoo_t0(uint32_t, const void *) ++ void svfoo_vnum_t0(uint32_t, const void *, int64_t) ++ ++ where the first argument is a variable ZA slice. */ ++struct ldr_za_def : public nonoverloaded_base ++{ ++ void ++ build (function_builder &b, const function_group_info &group) const override ++ { ++ build_all (b, "_,su32,al", group, MODE_none); ++ build_all (b, "_,su32,al,ss64", group, MODE_vnum); ++ } ++}; ++SHAPE (ldr_za) ++ + /* sv<t0>xN_t svfoo_t0(const <t0>_t *) + sv<t0>xN_t svfoo_vnum_t0(const <t0>_t *, int64_t). */ + struct load_def : public load_contiguous_base +@@ -2265,6 +2402,27 @@ struct load_replicate_def : public load_contiguous_base + }; + SHAPE (load_replicate) + ++/* void svfoo_t0(uint64_t, uint32_t, svbool_t, const void *) ++ void svfoo_vnum_t0(uint64_t, uint32_t, svbool_t, const void *, int64_t) ++ ++ where the first two fields form a (ZA tile, slice) pair. */ ++struct load_za_def : public nonoverloaded_base ++{ ++ void ++ build (function_builder &b, const function_group_info &group) const override ++ { ++ build_all (b, "_,su64,su32,vp,al", group, MODE_none); ++ build_all (b, "_,su64,su32,vp,al,ss64", group, MODE_vnum); ++ } ++ ++ bool ++ check (function_checker &c) const override ++ { ++ return c.require_immediate_range (0, 0, c.num_za_tiles () - 1); ++ } ++}; ++SHAPE (load_za) ++ + /* svbool_t svfoo(enum svpattern). */ + struct pattern_pred_def : public nonoverloaded_base + { +@@ -2359,6 +2517,48 @@ struct rdffr_def : public nonoverloaded_base + }; + SHAPE (rdffr) + ++/* sv<t1>_t svfoo_t0_t1(uint64_t, uint32_t) ++ ++ where the first two fields form a (ZA tile, slice) pair. */ ++struct read_za_m_def : public overloaded_base<1> ++{ ++ bool ++ has_merge_argument_p (const function_instance &, unsigned int) const override ++ { ++ return true; ++ } ++ ++ void ++ build (function_builder &b, const function_group_info &group) const override ++ { ++ b.add_overloaded_functions (group, MODE_none); ++ build_all (b, "t1,su64,su32", group, MODE_none); ++ } ++ ++ tree ++ resolve (function_resolver &r) const override ++ { ++ gcc_assert (r.pred == PRED_m); ++ type_suffix_index type; ++ if (!r.check_num_arguments (4) ++ || (type = r.infer_vector_type (0)) == NUM_TYPE_SUFFIXES ++ || !r.require_vector_type (1, VECTOR_TYPE_svbool_t) ++ || !r.require_integer_immediate (2) ++ || !r.require_scalar_type (3, "uint32_t")) ++ return error_mark_node; ++ ++ return r.resolve_to (r.mode_suffix_id, r.type_suffix_ids0, type); ++ } ++ ++ bool ++ check (function_checker &c) const override ++ { ++ gcc_assert (c.pred == PRED_m); ++ return c.require_immediate_range (1, 0, c.num_za_tiles () - 1); ++ } ++}; ++SHAPE (read_za_m) ++ + /* <t0>_t svfoo_t0(sv<t0>_t). */ + struct reduction_def : public overloaded_base<0> + { +@@ -2727,6 +2927,42 @@ struct store_scatter_offset_restricted_def : public store_scatter_base + }; + SHAPE (store_scatter_offset_restricted) + ++/* void svfoo_t0(uint64_t, uint32_t, svbool_t, void *) ++ void svfoo_vnum_t0(uint64_t, uint32_t, svbool_t, void *, int64_t) ++ ++ where the first two fields form a (ZA tile, slice) pair. */ ++struct store_za_def : public nonoverloaded_base ++{ ++ void ++ build (function_builder &b, const function_group_info &group) const override ++ { ++ build_all (b, "_,su64,su32,vp,as", group, MODE_none); ++ build_all (b, "_,su64,su32,vp,as,ss64", group, MODE_vnum); ++ } ++ ++ bool ++ check (function_checker &c) const override ++ { ++ return c.require_immediate_range (0, 0, c.num_za_tiles () - 1); ++ } ++}; ++SHAPE (store_za) ++ ++/* void svfoo_t0(uint32_t, void *) ++ void svfoo_vnum_t0(uint32_t, void *, int64_t) ++ ++ where the first argument is a variable ZA slice. */ ++struct str_za_def : public nonoverloaded_base ++{ ++ void ++ build (function_builder &b, const function_group_info &group) const override ++ { ++ build_all (b, "_,su32,as", group, MODE_none); ++ build_all (b, "_,su32,as,ss64", group, MODE_vnum); ++ } ++}; ++SHAPE (str_za) ++ + /* sv<t0>_t svfoo_t0(sv<t0>xN_t, sv<t0:uint>_t). */ + struct tbl_tuple_def : public overloaded_base<0> + { +@@ -3487,4 +3723,72 @@ struct unary_widen_def : public overloaded_base<0> + }; + SHAPE (unary_widen) + ++/* void svfoo_t0_t1(uint64_t, svbool_t, svbool_t, sv<t1>_t) ++ ++ where the first argument is a ZA tile. */ ++struct unary_za_m_def : public overloaded_base<1> ++{ ++ void ++ build (function_builder &b, const function_group_info &group) const override ++ { ++ b.add_overloaded_functions (group, MODE_none); ++ build_all (b, "_,su64,vp,vp,t1", group, MODE_none); ++ } ++ ++ tree ++ resolve (function_resolver &r) const override ++ { ++ type_suffix_index type; ++ if (!r.check_num_arguments (4) ++ || !r.require_integer_immediate (0) ++ || !r.require_vector_type (1, VECTOR_TYPE_svbool_t) ++ || !r.require_vector_type (2, VECTOR_TYPE_svbool_t) ++ || (type = r.infer_vector_type (3)) == NUM_TYPE_SUFFIXES) ++ return error_mark_node; ++ ++ return r.resolve_to (r.mode_suffix_id, r.type_suffix_ids0, type); ++ } ++ ++ bool ++ check (function_checker &c) const override ++ { ++ return c.require_immediate_range (0, 0, c.num_za_tiles () - 1); ++ } ++}; ++SHAPE (unary_za_m) ++ ++/* void svfoo_t0_t1(uint64_t, uint32_t, svbool_t, sv<t1>_t) ++ ++ where the first two fields form a (ZA tile, slice) pair. */ ++struct write_za_m_def : public overloaded_base<1> ++{ ++ void ++ build (function_builder &b, const function_group_info &group) const override ++ { ++ b.add_overloaded_functions (group, MODE_none); ++ build_all (b, "_,su64,su32,vp,t1", group, MODE_none); ++ } ++ ++ tree ++ resolve (function_resolver &r) const override ++ { ++ type_suffix_index type; ++ if (!r.check_num_arguments (4) ++ || !r.require_integer_immediate (0) ++ || !r.require_scalar_type (1, "uint32_t") ++ || !r.require_vector_type (2, VECTOR_TYPE_svbool_t) ++ || (type = r.infer_vector_type (3)) == NUM_TYPE_SUFFIXES) ++ return error_mark_node; ++ ++ return r.resolve_to (r.mode_suffix_id, r.type_suffix_ids0, type); ++ } ++ ++ bool ++ check (function_checker &c) const override ++ { ++ return c.require_immediate_range (0, 0, c.num_za_tiles () - 1); ++ } ++}; ++SHAPE (write_za_m) ++ + } +diff --git a/gcc/config/aarch64/aarch64-sve-builtins-shapes.h b/gcc/config/aarch64/aarch64-sve-builtins-shapes.h +index 2b06152d4..9c1f44bdc 100644 +--- a/gcc/config/aarch64/aarch64-sve-builtins-shapes.h ++++ b/gcc/config/aarch64/aarch64-sve-builtins-shapes.h +@@ -93,6 +93,10 @@ namespace aarch64_sve + extern const function_shape *const binary_uint64_opt_n; + extern const function_shape *const binary_wide; + extern const function_shape *const binary_wide_opt_n; ++ extern const function_shape *const binary_za_int_m; ++ extern const function_shape *const binary_za_m; ++ extern const function_shape *const binary_za_uint_m; ++ extern const function_shape *const bool_inherent; + extern const function_shape *const clast; + extern const function_shape *const compare; + extern const function_shape *const compare_opt_n; +@@ -114,6 +118,9 @@ namespace aarch64_sve + extern const function_shape *const inc_dec_pred_scalar; + extern const function_shape *const inherent; + extern const function_shape *const inherent_b; ++ extern const function_shape *const inherent_za; ++ extern const function_shape *const inherent_mask_za; ++ extern const function_shape *const ldr_za; + extern const function_shape *const load; + extern const function_shape *const load_ext; + extern const function_shape *const load_ext_gather_index; +@@ -124,6 +131,7 @@ namespace aarch64_sve + extern const function_shape *const load_gather_sv_restricted; + extern const function_shape *const load_gather_vs; + extern const function_shape *const load_replicate; ++ extern const function_shape *const load_za; + extern const function_shape *const mmla; + extern const function_shape *const pattern_pred; + extern const function_shape *const prefetch; +@@ -131,6 +139,7 @@ namespace aarch64_sve + extern const function_shape *const prefetch_gather_offset; + extern const function_shape *const ptest; + extern const function_shape *const rdffr; ++ extern const function_shape *const read_za_m; + extern const function_shape *const reduction; + extern const function_shape *const reduction_wide; + extern const function_shape *const reinterpret; +@@ -148,6 +157,8 @@ namespace aarch64_sve + extern const function_shape *const store_scatter_index_restricted; + extern const function_shape *const store_scatter_offset; + extern const function_shape *const store_scatter_offset_restricted; ++ extern const function_shape *const store_za; ++ extern const function_shape *const str_za; + extern const function_shape *const tbl_tuple; + extern const function_shape *const ternary_bfloat; + extern const function_shape *const ternary_bfloat_lane; +@@ -186,6 +197,8 @@ namespace aarch64_sve + extern const function_shape *const unary_to_uint; + extern const function_shape *const unary_uint; + extern const function_shape *const unary_widen; ++ extern const function_shape *const unary_za_m; ++ extern const function_shape *const write_za_m; + } + } + +diff --git a/gcc/config/aarch64/aarch64-sve-builtins-sme.cc b/gcc/config/aarch64/aarch64-sve-builtins-sme.cc +new file mode 100644 +index 000000000..e1df6ce0d +--- /dev/null ++++ b/gcc/config/aarch64/aarch64-sve-builtins-sme.cc +@@ -0,0 +1,412 @@ ++/* ACLE support for AArch64 SME. ++ Copyright (C) 2023 Free Software Foundation, Inc. ++ ++ This file is part of GCC. ++ ++ GCC is free software; you can redistribute it and/or modify it ++ under the terms of the GNU General Public License as published by ++ the Free Software Foundation; either version 3, or (at your option) ++ any later version. ++ ++ GCC is distributed in the hope that it will be useful, but ++ WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ General Public License for more details. ++ ++ You should have received a copy of the GNU General Public License ++ along with GCC; see the file COPYING3. If not see ++ <http://www.gnu.org/licenses/>. */ ++ ++#include "config.h" ++#include "system.h" ++#include "coretypes.h" ++#include "tm.h" ++#include "tree.h" ++#include "rtl.h" ++#include "tm_p.h" ++#include "memmodel.h" ++#include "insn-codes.h" ++#include "optabs.h" ++#include "recog.h" ++#include "expr.h" ++#include "basic-block.h" ++#include "function.h" ++#include "fold-const.h" ++#include "gimple.h" ++#include "gimple-iterator.h" ++#include "gimplify.h" ++#include "explow.h" ++#include "emit-rtl.h" ++#include "aarch64-sve-builtins.h" ++#include "aarch64-sve-builtins-shapes.h" ++#include "aarch64-sve-builtins-base.h" ++#include "aarch64-sve-builtins-sme.h" ++#include "aarch64-sve-builtins-functions.h" ++ ++using namespace aarch64_sve; ++ ++namespace { ++ ++class load_store_za_base : public function_base ++{ ++public: ++ tree ++ memory_scalar_type (const function_instance &) const override ++ { ++ return void_type_node; ++ } ++}; ++ ++class read_write_za_base : public function_base ++{ ++public: ++ constexpr read_write_za_base (int unspec) : m_unspec (unspec) {} ++ ++ rtx ++ expand (function_expander &e) const override ++ { ++ auto za_mode = e.vector_mode (0); ++ auto z_mode = e.vector_mode (1); ++ auto icode = (za_mode == VNx1TImode ++ ? code_for_aarch64_sme (m_unspec, za_mode, z_mode) ++ : code_for_aarch64_sme (m_unspec, z_mode, z_mode)); ++ return e.use_exact_insn (icode); ++ } ++ ++ int m_unspec; ++}; ++ ++using load_za_base = add_call_properties<load_store_za_base, ++ CP_READ_MEMORY | CP_READ_ZA ++ | CP_WRITE_ZA>; ++ ++using store_za_base = add_call_properties<load_store_za_base, ++ CP_WRITE_MEMORY | CP_READ_ZA>; ++ ++/* E is a load or store intrinsic that accesses a ZA slice of mode MEM_MODE. ++ The intrinsic has a vnum parameter at index ARGNO. Return true if the ++ vnum argument is a constant that is a valid ZA offset for the underlying ++ instruction. */ ++ ++static bool ++has_in_range_vnum_arg (function_expander &e, machine_mode mem_mode, ++ unsigned int argno) ++{ ++ return (e.mode_suffix_id == MODE_vnum ++ && CONST_INT_P (e.argsargno) ++ && UINTVAL (e.argsargno) < 16 / GET_MODE_UNIT_SIZE (mem_mode)); ++} ++ ++/* E is a ZA load or store intrinsic that uses instruction ICODE. Add a ++ 32-bit operand that gives the total ZA slice. (The instruction hard-codes ++ the constant offset to 0, so there is no operand for that.) ++ ++ Argument ARGNO is the intrinsic's slice argument. If the intrinsic is ++ a _vnum intrinsic, argument VNUM_ARGNO is the intrinsic's vnum operand, ++ which must be added to the slice argument. */ ++ ++static void ++add_load_store_slice_operand (function_expander &e, insn_code icode, ++ unsigned int argno, unsigned int vnum_argno) ++{ ++ rtx base = e.argsargno; ++ if (e.mode_suffix_id == MODE_vnum) ++ { ++ rtx vnum = lowpart_subreg (SImode, e.argsvnum_argno, DImode); ++ base = simplify_gen_binary (PLUS, SImode, base, vnum); ++ } ++ e.add_input_operand (icode, base); ++} ++ ++/* Add a memory operand for ZA LD1 or ST1 intrinsic E. BASE_ARGNO is ++ the index of the base argument. */ ++ ++static void ++add_load_store_operand (function_expander &e, unsigned int base_argno) ++{ ++ auto mode = e.vector_mode (0); ++ rtx base = e.get_contiguous_base (mode, base_argno, base_argno + 1, ++ AARCH64_FL_SM_ON); ++ auto mem = gen_rtx_MEM (mode, force_reg (Pmode, base)); ++ set_mem_align (mem, BITS_PER_UNIT); ++ e.add_fixed_operand (mem); ++} ++ ++/* Expand ZA LDR or STR intrinsic E. There are two underlying instructions: ++ ++ - BASE_CODE has a zero ZA slice offset ++ - VNUM_CODE has a constant operand for the ZA slice offset. */ ++ ++static rtx ++expand_ldr_str_za (function_expander &e, insn_code base_code, ++ insn_code vnum_code) ++{ ++ if (has_in_range_vnum_arg (e, VNx16QImode, 2)) ++ { ++ rtx mem_offset = aarch64_sme_vq_immediate (Pmode, ++ UINTVAL (e.args2) * 16, ++ AARCH64_ISA_MODE); ++ e.add_input_operand (vnum_code, e.args0); ++ e.add_input_operand (vnum_code, e.args2); ++ e.add_input_operand (vnum_code, e.args1); ++ e.add_input_operand (vnum_code, mem_offset); ++ return e.generate_insn (vnum_code); ++ } ++ else ++ { ++ rtx base = e.get_contiguous_base (VNx16QImode, 1, 2, AARCH64_FL_SM_ON); ++ add_load_store_slice_operand (e, base_code, 0, 2); ++ e.add_input_operand (base_code, base); ++ return e.generate_insn (base_code); ++ } ++} ++ ++/* Expand ZA LD1 or ST1 intrinsic E. UNSPEC is the load or store unspec. ++ IS_LOAD is true if E is a load, false if it is a store. */ ++ ++static rtx ++expand_ld1_st1 (function_expander &e, int unspec, bool is_load) ++{ ++ bool is_vnum = has_in_range_vnum_arg (e, e.vector_mode (0), 4); ++ auto icode = (is_vnum ++ ? code_for_aarch64_sme_plus (unspec, e.vector_mode (0)) ++ : code_for_aarch64_sme (unspec, e.vector_mode (0))); ++ if (!is_load) ++ add_load_store_operand (e, 3); ++ e.add_input_operand (icode, e.args0); ++ if (is_vnum) ++ { ++ e.add_input_operand (icode, e.args1); ++ e.add_input_operand (icode, e.args4); ++ } ++ else ++ add_load_store_slice_operand (e, icode, 1, 4); ++ e.add_input_operand (icode, e.args2); ++ if (is_load) ++ add_load_store_operand (e, 3); ++ return e.generate_insn (icode); ++} ++ ++class arm_has_sme_impl : public function_base ++{ ++ gimple * ++ fold (gimple_folder &f) const override ++ { ++ if (TARGET_SME) ++ return f.fold_to_cstu (1); ++ return nullptr; ++ } ++ ++ rtx ++ expand (function_expander &e) const override ++ { ++ if (TARGET_SME) ++ return const1_rtx; ++ emit_insn (gen_aarch64_get_sme_state ()); ++ return expand_simple_binop (DImode, LSHIFTRT, ++ gen_rtx_REG (DImode, R0_REGNUM), ++ gen_int_mode (63, QImode), ++ e.possible_target, true, OPTAB_LIB_WIDEN); ++ } ++}; ++ ++class arm_in_streaming_mode_impl : public function_base ++{ ++ gimple * ++ fold (gimple_folder &f) const override ++ { ++ if (TARGET_STREAMING) ++ return f.fold_to_cstu (1); ++ if (TARGET_NON_STREAMING) ++ return f.fold_to_cstu (0); ++ return nullptr; ++ } ++ ++ rtx ++ expand (function_expander &e) const override ++ { ++ if (TARGET_STREAMING) ++ return const1_rtx; ++ ++ if (TARGET_NON_STREAMING) ++ return const0_rtx; ++ ++ rtx reg; ++ if (TARGET_SME) ++ { ++ reg = gen_reg_rtx (DImode); ++ emit_insn (gen_aarch64_read_svcr (reg)); ++ } ++ else ++ { ++ emit_insn (gen_aarch64_get_sme_state ()); ++ reg = gen_rtx_REG (DImode, R0_REGNUM); ++ } ++ return expand_simple_binop (DImode, AND, reg, gen_int_mode (1, DImode), ++ e.possible_target, true, OPTAB_LIB_WIDEN); ++ } ++}; ++ ++/* Implements svcntsbhwd. */ ++class svcnts_bhwd_impl : public function_base ++{ ++public: ++ constexpr svcnts_bhwd_impl (machine_mode ref_mode) : m_ref_mode (ref_mode) {} ++ ++ unsigned int ++ get_shift () const ++ { ++ return exact_log2 (GET_MODE_UNIT_SIZE (m_ref_mode)); ++ } ++ ++ gimple * ++ fold (gimple_folder &f) const override ++ { ++ if (TARGET_STREAMING) ++ return f.fold_to_cstu (GET_MODE_NUNITS (m_ref_mode)); ++ return nullptr; ++ } ++ ++ rtx ++ expand (function_expander &e) const override ++ { ++ rtx cntsb = aarch64_sme_vq_immediate (DImode, 16, AARCH64_ISA_MODE); ++ auto shift = get_shift (); ++ if (!shift) ++ return cntsb; ++ ++ return expand_simple_binop (DImode, LSHIFTRT, cntsb, ++ gen_int_mode (shift, QImode), ++ e.possible_target, true, OPTAB_LIB_WIDEN); ++ } ++ ++ /* The mode of the vector associated with the bhwd suffix. */ ++ machine_mode m_ref_mode; ++}; ++ ++class svld1_za_impl : public load_za_base ++{ ++public: ++ constexpr svld1_za_impl (int unspec) : m_unspec (unspec) {} ++ ++ rtx ++ expand (function_expander &e) const override ++ { ++ return expand_ld1_st1 (e, m_unspec, true); ++ } ++ ++ int m_unspec; ++}; ++ ++class svldr_za_impl : public load_za_base ++{ ++public: ++ rtx ++ expand (function_expander &e) const override ++ { ++ return expand_ldr_str_za (e, CODE_FOR_aarch64_sme_ldr0, ++ code_for_aarch64_sme_ldrn (Pmode)); ++ } ++}; ++ ++using svread_za_tile_impl = add_call_properties<read_write_za_base, ++ CP_READ_ZA>; ++ ++class svst1_za_impl : public store_za_base ++{ ++public: ++ constexpr svst1_za_impl (int unspec) : m_unspec (unspec) {} ++ ++ rtx ++ expand (function_expander &e) const override ++ { ++ return expand_ld1_st1 (e, m_unspec, false); ++ } ++ ++ int m_unspec; ++}; ++ ++class svstr_za_impl : public store_za_base ++{ ++public: ++ rtx ++ expand (function_expander &e) const override ++ { ++ return expand_ldr_str_za (e, CODE_FOR_aarch64_sme_str0, ++ code_for_aarch64_sme_strn (Pmode)); ++ } ++}; ++ ++class svundef_za_impl : public write_za<function_base> ++{ ++public: ++ rtx ++ expand (function_expander &) const override ++ { ++ rtx target = gen_rtx_REG (VNx16QImode, ZA_REGNUM); ++ emit_clobber (copy_rtx (target)); ++ return const0_rtx; ++ } ++}; ++ ++using svwrite_za_tile_impl = add_call_properties<read_write_za_base, ++ CP_READ_ZA | CP_WRITE_ZA>; ++ ++class svzero_mask_za_impl : public write_za<function_base> ++{ ++public: ++ rtx ++ expand (function_expander &e) const override ++ { ++ return e.use_exact_insn (CODE_FOR_aarch64_sme_zero_za); ++ } ++}; ++ ++class svzero_za_impl : public write_za<function_base> ++{ ++public: ++ rtx ++ expand (function_expander &) const override ++ { ++ emit_insn (gen_aarch64_sme_zero_za (gen_int_mode (0xff, SImode))); ++ return const0_rtx; ++ } ++}; ++ ++} /* end anonymous namespace */ ++ ++namespace aarch64_sve { ++ ++FUNCTION (arm_has_sme, arm_has_sme_impl, ) ++FUNCTION (arm_in_streaming_mode, arm_in_streaming_mode_impl, ) ++FUNCTION (svaddha_za, sme_1mode_function, (UNSPEC_SME_ADDHA, ++ UNSPEC_SME_ADDHA, -1)) ++FUNCTION (svaddva_za, sme_1mode_function, (UNSPEC_SME_ADDVA, ++ UNSPEC_SME_ADDVA, -1)) ++FUNCTION (svcntsb, svcnts_bhwd_impl, (VNx16QImode)) ++FUNCTION (svcntsd, svcnts_bhwd_impl, (VNx2DImode)) ++FUNCTION (svcntsh, svcnts_bhwd_impl, (VNx8HImode)) ++FUNCTION (svcntsw, svcnts_bhwd_impl, (VNx4SImode)) ++FUNCTION (svld1_hor_za, svld1_za_impl, (UNSPEC_SME_LD1_HOR)) ++FUNCTION (svld1_ver_za, svld1_za_impl, (UNSPEC_SME_LD1_VER)) ++FUNCTION (svldr_za, svldr_za_impl, ) ++FUNCTION (svmopa_za, sme_2mode_function, (UNSPEC_SME_SMOPA, UNSPEC_SME_UMOPA, ++ UNSPEC_SME_FMOPA)) ++FUNCTION (svmops_za, sme_2mode_function, (UNSPEC_SME_SMOPS, UNSPEC_SME_UMOPS, ++ UNSPEC_SME_FMOPS)) ++FUNCTION (svread_hor_za, svread_za_tile_impl, (UNSPEC_SME_READ_HOR)) ++FUNCTION (svread_ver_za, svread_za_tile_impl, (UNSPEC_SME_READ_VER)) ++FUNCTION (svst1_hor_za, svst1_za_impl, (UNSPEC_SME_ST1_HOR)) ++FUNCTION (svst1_ver_za, svst1_za_impl, (UNSPEC_SME_ST1_VER)) ++FUNCTION (svstr_za, svstr_za_impl, ) ++FUNCTION (svsumopa_za, sme_2mode_function, (UNSPEC_SME_SUMOPA, -1, -1)) ++FUNCTION (svsumops_za, sme_2mode_function, (UNSPEC_SME_SUMOPS, -1, -1)) ++FUNCTION (svundef_za, svundef_za_impl, ) ++FUNCTION (svusmopa_za, sme_2mode_function, (-1, UNSPEC_SME_USMOPA, -1)) ++FUNCTION (svusmops_za, sme_2mode_function, (-1, UNSPEC_SME_USMOPS, -1)) ++FUNCTION (svwrite_hor_za, svwrite_za_tile_impl, (UNSPEC_SME_WRITE_HOR)) ++FUNCTION (svwrite_ver_za, svwrite_za_tile_impl, (UNSPEC_SME_WRITE_VER)) ++FUNCTION (svzero_mask_za, svzero_mask_za_impl, ) ++FUNCTION (svzero_za, svzero_za_impl, ) ++ ++} /* end namespace aarch64_sve */ +diff --git a/gcc/config/aarch64/aarch64-sve-builtins-sme.def b/gcc/config/aarch64/aarch64-sve-builtins-sme.def +new file mode 100644 +index 000000000..5bdcc93f4 +--- /dev/null ++++ b/gcc/config/aarch64/aarch64-sve-builtins-sme.def +@@ -0,0 +1,76 @@ ++/* ACLE support for AArch64 SME. ++ Copyright (C) 2023 Free Software Foundation, Inc. ++ ++ This file is part of GCC. ++ ++ GCC is free software; you can redistribute it and/or modify it ++ under the terms of the GNU General Public License as published by ++ the Free Software Foundation; either version 3, or (at your option) ++ any later version. ++ ++ GCC is distributed in the hope that it will be useful, but ++ WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ General Public License for more details. ++ ++ You should have received a copy of the GNU General Public License ++ along with GCC; see the file COPYING3. If not see ++ <http://www.gnu.org/licenses/>. */ ++ ++#define REQUIRED_EXTENSIONS 0 ++DEF_SVE_FUNCTION (arm_has_sme, bool_inherent, none, none) ++DEF_SVE_FUNCTION (arm_in_streaming_mode, bool_inherent, none, none) ++#undef REQUIRED_EXTENSIONS ++ ++#define REQUIRED_EXTENSIONS AARCH64_FL_SME ++DEF_SVE_FUNCTION (svcntsb, count_inherent, none, none) ++DEF_SVE_FUNCTION (svcntsd, count_inherent, none, none) ++DEF_SVE_FUNCTION (svcntsh, count_inherent, none, none) ++DEF_SVE_FUNCTION (svcntsw, count_inherent, none, none) ++DEF_SME_ZA_FUNCTION (svldr, ldr_za, za, none) ++DEF_SME_ZA_FUNCTION (svstr, str_za, za, none) ++DEF_SME_ZA_FUNCTION (svundef, inherent_za, za, none) ++DEF_SME_ZA_FUNCTION (svzero, inherent_za, za, none) ++DEF_SME_ZA_FUNCTION (svzero_mask, inherent_mask_za, za, none) ++#undef REQUIRED_EXTENSIONS ++ ++#define REQUIRED_EXTENSIONS AARCH64_FL_SME | AARCH64_FL_SM_ON ++DEF_SME_ZA_FUNCTION (svaddha, unary_za_m, za_s_integer, za_m) ++DEF_SME_ZA_FUNCTION (svaddva, unary_za_m, za_s_integer, za_m) ++DEF_SME_ZA_FUNCTION (svld1_hor, load_za, all_za, none) ++DEF_SME_ZA_FUNCTION (svld1_ver, load_za, all_za, none) ++DEF_SME_ZA_FUNCTION (svmopa, binary_za_m, mop_base, za_m) ++DEF_SME_ZA_FUNCTION (svmopa, binary_za_m, d_za, za_m) ++DEF_SME_ZA_FUNCTION (svmops, binary_za_m, mop_base, za_m) ++DEF_SME_ZA_FUNCTION (svmops, binary_za_m, d_za, za_m) ++DEF_SME_ZA_FUNCTION (svread_hor, read_za_m, za_all_data, m) ++DEF_SME_ZA_FUNCTION (svread_ver, read_za_m, za_all_data, m) ++DEF_SME_ZA_FUNCTION (svst1_hor, store_za, all_za, none) ++DEF_SME_ZA_FUNCTION (svst1_ver, store_za, all_za, none) ++DEF_SME_ZA_FUNCTION (svsumopa, binary_za_uint_m, mop_base_signed, za_m) ++DEF_SME_ZA_FUNCTION (svsumops, binary_za_uint_m, mop_base_signed, za_m) ++DEF_SME_ZA_FUNCTION (svusmopa, binary_za_int_m, mop_base_unsigned, za_m) ++DEF_SME_ZA_FUNCTION (svusmops, binary_za_int_m, mop_base_unsigned, za_m) ++DEF_SME_ZA_FUNCTION (svwrite_hor, write_za_m, za_all_data, za_m) ++DEF_SME_ZA_FUNCTION (svwrite_ver, write_za_m, za_all_data, za_m) ++#undef REQUIRED_EXTENSIONS ++ ++#define REQUIRED_EXTENSIONS (AARCH64_FL_SME \ ++ | AARCH64_FL_SME_I16I64 \ ++ | AARCH64_FL_SM_ON) ++DEF_SME_ZA_FUNCTION (svaddha, unary_za_m, za_d_integer, za_m) ++DEF_SME_ZA_FUNCTION (svaddva, unary_za_m, za_d_integer, za_m) ++DEF_SME_ZA_FUNCTION (svmopa, binary_za_m, mop_i16i64, za_m) ++DEF_SME_ZA_FUNCTION (svmops, binary_za_m, mop_i16i64, za_m) ++DEF_SME_ZA_FUNCTION (svsumopa, binary_za_uint_m, mop_i16i64_signed, za_m) ++DEF_SME_ZA_FUNCTION (svsumops, binary_za_uint_m, mop_i16i64_signed, za_m) ++DEF_SME_ZA_FUNCTION (svusmopa, binary_za_int_m, mop_i16i64_unsigned, za_m) ++DEF_SME_ZA_FUNCTION (svusmops, binary_za_int_m, mop_i16i64_unsigned, za_m) ++#undef REQUIRED_EXTENSIONS ++ ++#define REQUIRED_EXTENSIONS (AARCH64_FL_SME \ ++ | AARCH64_FL_SME_F64F64 \ ++ | AARCH64_FL_SM_ON) ++DEF_SME_ZA_FUNCTION (svmopa, binary_za_m, za_d_float, za_m) ++DEF_SME_ZA_FUNCTION (svmops, binary_za_m, za_d_float, za_m) ++#undef REQUIRED_EXTENSIONS +diff --git a/gcc/config/aarch64/aarch64-sve-builtins-sme.h b/gcc/config/aarch64/aarch64-sve-builtins-sme.h +new file mode 100644 +index 000000000..acfed7700 +--- /dev/null ++++ b/gcc/config/aarch64/aarch64-sve-builtins-sme.h +@@ -0,0 +1,57 @@ ++/* ACLE support for AArch64 SME. ++ Copyright (C) 2023 Free Software Foundation, Inc. ++ ++ This file is part of GCC. ++ ++ GCC is free software; you can redistribute it and/or modify it ++ under the terms of the GNU General Public License as published by ++ the Free Software Foundation; either version 3, or (at your option) ++ any later version. ++ ++ GCC is distributed in the hope that it will be useful, but ++ WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ General Public License for more details. ++ ++ You should have received a copy of the GNU General Public License ++ along with GCC; see the file COPYING3. If not see ++ <http://www.gnu.org/licenses/>. */ ++ ++#ifndef GCC_AARCH64_SVE_BUILTINS_SME_H ++#define GCC_AARCH64_SVE_BUILTINS_SME_H ++ ++namespace aarch64_sve ++{ ++ namespace functions ++ { ++ extern const function_base *const arm_has_sme; ++ extern const function_base *const arm_in_streaming_mode; ++ extern const function_base *const svaddha_za; ++ extern const function_base *const svaddva_za; ++ extern const function_base *const svcntsb; ++ extern const function_base *const svcntsd; ++ extern const function_base *const svcntsh; ++ extern const function_base *const svcntsw; ++ extern const function_base *const svld1_hor_za; ++ extern const function_base *const svld1_ver_za; ++ extern const function_base *const svldr_za; ++ extern const function_base *const svmopa_za; ++ extern const function_base *const svmops_za; ++ extern const function_base *const svread_hor_za; ++ extern const function_base *const svread_ver_za; ++ extern const function_base *const svst1_hor_za; ++ extern const function_base *const svst1_ver_za; ++ extern const function_base *const svstr_za; ++ extern const function_base *const svsumopa_za; ++ extern const function_base *const svsumops_za; ++ extern const function_base *const svusmopa_za; ++ extern const function_base *const svusmops_za; ++ extern const function_base *const svwrite_hor_za; ++ extern const function_base *const svwrite_ver_za; ++ extern const function_base *const svundef_za; ++ extern const function_base *const svzero_za; ++ extern const function_base *const svzero_mask_za; ++ } ++} ++ ++#endif +diff --git a/gcc/config/aarch64/aarch64-sve-builtins.cc b/gcc/config/aarch64/aarch64-sve-builtins.cc +index 3441b4294..32971a7c8 100644 +--- a/gcc/config/aarch64/aarch64-sve-builtins.cc ++++ b/gcc/config/aarch64/aarch64-sve-builtins.cc +@@ -51,6 +51,7 @@ + #include "aarch64-sve-builtins.h" + #include "aarch64-sve-builtins-base.h" + #include "aarch64-sve-builtins-sve2.h" ++#include "aarch64-sve-builtins-sme.h" + #include "aarch64-sve-builtins-shapes.h" + + namespace aarch64_sve { +@@ -112,6 +113,7 @@ static const char *const pred_suffixesNUM_PREDS + 1 = { + "_m", + "_x", + "_z", ++ "_m", + "" + }; + +@@ -136,12 +138,28 @@ CONSTEXPR const type_suffix_info type_suffixesNUM_TYPE_SUFFIXES + 1 = { + TYPE_##CLASS == TYPE_signed || TYPE_##CLASS == TYPE_unsigned, \ + TYPE_##CLASS == TYPE_unsigned, \ + TYPE_##CLASS == TYPE_float, \ ++ TYPE_##CLASS != TYPE_bool, \ + TYPE_##CLASS == TYPE_bool, \ ++ false, \ ++ 0, \ ++ MODE }, ++#define DEF_SME_ZA_SUFFIX(NAME, BITS, MODE) \ ++ { "_" #NAME, \ ++ NUM_VECTOR_TYPES, \ ++ NUM_TYPE_CLASSES, \ ++ BITS, \ ++ BITS / BITS_PER_UNIT, \ ++ false, \ ++ false, \ ++ false, \ ++ false, \ ++ false, \ ++ true, \ + 0, \ + MODE }, + #include "aarch64-sve-builtins.def" + { "", NUM_VECTOR_TYPES, TYPE_bool, 0, 0, false, false, false, false, +- 0, VOIDmode } ++ false, false, 0, VOIDmode } + }; + + CONSTEXPR const group_suffix_info group_suffixes = { +@@ -422,6 +440,79 @@ CONSTEXPR const group_suffix_info group_suffixes = { + TYPES_while1 (D, b32), \ + TYPES_while1 (D, b64) + ++/* _za8 _za16 _za32 _za64 _za128. */ ++#define TYPES_all_za(S, D) \ ++ S (za8), S (za16), S (za32), S (za64), S (za128) ++ ++/* _za64. */ ++#define TYPES_d_za(S, D) \ ++ S (za64) ++ ++/* { _za8 } x { _s8 _u8 } ++ ++ { _za16 } x { _bf16 _f16 _s16 _u16 } ++ ++ { _za32 } x { _f32 _s32 _u32 } ++ ++ { _za64 } x { _f64 _s64 _u64 }. */ ++#define TYPES_za_bhsd_data(S, D) \ ++ D (za8, s8), D (za8, u8), \ ++ D (za16, bf16), D (za16, f16), D (za16, s16), D (za16, u16), \ ++ D (za32, f32), D (za32, s32), D (za32, u32), \ ++ D (za64, f64), D (za64, s64), D (za64, u64) ++ ++/* Likewise, plus: ++ ++ { _za128 } x { _bf16 } ++ { _f16 _f32 _f64 } ++ { _s8 _s16 _s32 _s64 } ++ { _u8 _u16 _u32 _u64 }. */ ++ ++#define TYPES_za_all_data(S, D) \ ++ TYPES_za_bhsd_data (S, D), \ ++ TYPES_reinterpret1 (D, za128) ++ ++/* _za32 x { _s32 _u32 }. */ ++#define TYPES_za_s_integer(S, D) \ ++ D (za32, s32), D (za32, u32) ++ ++ ++/* _za64_f64. */ ++#define TYPES_za_d_float(S, D) \ ++ D (za64, f64) ++ ++/* _za64 x { _s64 _u64 }. */ ++#define TYPES_za_d_integer(S, D) \ ++ D (za64, s64), D (za64, u64) ++ ++/* _za32 x { _s8 _u8 _bf16 _f16 _f32 }. */ ++#define TYPES_mop_base(S, D) \ ++ D (za32, s8), D (za32, u8), D (za32, bf16), D (za32, f16), D (za32, f32) ++ ++/* _za32_s8. */ ++#define TYPES_mop_base_signed(S, D) \ ++ D (za32, s8) ++ ++/* _za32_u8. */ ++#define TYPES_mop_base_unsigned(S, D) \ ++ D (za32, u8) ++ ++/* _za64 x { _s16 _u16 }. */ ++#define TYPES_mop_i16i64(S, D) \ ++ D (za64, s16), D (za64, u16) ++ ++/* _za64_s16. */ ++#define TYPES_mop_i16i64_signed(S, D) \ ++ D (za64, s16) ++ ++/* _za64_u16. */ ++#define TYPES_mop_i16i64_unsigned(S, D) \ ++ D (za64, u16) ++ ++/* _za. */ ++#define TYPES_za(S, D) \ ++ S (za) ++ + /* Describe a pair of type suffixes in which only the first is used. */ + #define DEF_VECTOR_TYPE(X) { TYPE_SUFFIX_ ## X, NUM_TYPE_SUFFIXES } + +@@ -489,6 +580,19 @@ DEF_SVE_TYPES_ARRAY (cvt_narrow); + DEF_SVE_TYPES_ARRAY (inc_dec_n); + DEF_SVE_TYPES_ARRAY (reinterpret); + DEF_SVE_TYPES_ARRAY (while); ++DEF_SVE_TYPES_ARRAY (all_za); ++DEF_SVE_TYPES_ARRAY (d_za); ++DEF_SVE_TYPES_ARRAY (za_all_data); ++DEF_SVE_TYPES_ARRAY (za_s_integer); ++DEF_SVE_TYPES_ARRAY (za_d_float); ++DEF_SVE_TYPES_ARRAY (za_d_integer); ++DEF_SVE_TYPES_ARRAY (mop_base); ++DEF_SVE_TYPES_ARRAY (mop_base_signed); ++DEF_SVE_TYPES_ARRAY (mop_base_unsigned); ++DEF_SVE_TYPES_ARRAY (mop_i16i64); ++DEF_SVE_TYPES_ARRAY (mop_i16i64_signed); ++DEF_SVE_TYPES_ARRAY (mop_i16i64_unsigned); ++DEF_SVE_TYPES_ARRAY (za); + + static const group_suffix_index groups_none = { + GROUP_none, NUM_GROUP_SUFFIXES +@@ -505,6 +609,9 @@ static const predication_index preds_none = { PRED_none, NUM_PREDS }; + explicit suffix. */ + static const predication_index preds_implicit = { PRED_implicit, NUM_PREDS }; + ++/* Used by functions that only support "_m" predication. */ ++static const predication_index preds_m = { PRED_m, NUM_PREDS }; ++ + /* Used by functions that allow merging and "don't care" predication, + but are not suitable for predicated MOVPRFX. */ + static const predication_index preds_mx = { +@@ -536,17 +643,23 @@ static const predication_index preds_z_or_none = { + /* Used by (mostly predicate) functions that only support "_z" predication. */ + static const predication_index preds_z = { PRED_z, NUM_PREDS }; + ++/* Used by SME instructions that always merge into ZA. */ ++static const predication_index preds_za_m = { PRED_za_m, NUM_PREDS }; ++ + /* A list of all SVE ACLE functions. */ + static CONSTEXPR const function_group_info function_groups = { + #define DEF_SVE_FUNCTION_GS(NAME, SHAPE, TYPES, GROUPS, PREDS) \ + { #NAME, &functions::NAME, &shapes::SHAPE, types_##TYPES, groups_##GROUPS, \ + preds_##PREDS, REQUIRED_EXTENSIONS }, ++#define DEF_SME_ZA_FUNCTION_GS(NAME, SHAPE, TYPES, GROUPS, PREDS) \ ++ { #NAME, &functions::NAME##_za, &shapes::SHAPE, types_##TYPES, \ ++ groups_##GROUPS, preds_##PREDS, (REQUIRED_EXTENSIONS | AARCH64_FL_ZA_ON) }, + #include "aarch64-sve-builtins.def" + }; + + /* The scalar type associated with each vector type. */ +-extern GTY(()) tree scalar_typesNUM_VECTOR_TYPES; +-tree scalar_typesNUM_VECTOR_TYPES; ++extern GTY(()) tree scalar_typesNUM_VECTOR_TYPES + 1; ++tree scalar_typesNUM_VECTOR_TYPES + 1; + + /* The single-predicate and single-vector types, with their built-in + "__SV..._t" name. Allow an index of NUM_VECTOR_TYPES, which always +@@ -654,7 +767,7 @@ find_type_suffix_for_scalar_type (const_tree type) + /* A linear search should be OK here, since the code isn't hot and + the number of types is only small. */ + for (unsigned int suffix_i = 0; suffix_i < NUM_TYPE_SUFFIXES; ++suffix_i) +- if (!type_suffixessuffix_i.bool_p) ++ if (type_suffixessuffix_i.vector_p) + { + vector_type_index vector_i = type_suffixessuffix_i.vector_type; + if (matches_type_p (scalar_typesvector_i, type)) +@@ -745,6 +858,20 @@ check_required_extensions (location_t location, tree fndecl, + return false; + } + ++ if (missing_extensions & AARCH64_FL_SM_ON) ++ { ++ error_at (location, "ACLE function %qD can only be called when" ++ " SME streaming mode is enabled", fndecl); ++ return false; ++ } ++ ++ if (missing_extensions & AARCH64_FL_ZA_ON) ++ { ++ error_at (location, "ACLE function %qD can only be called from" ++ " a function that has %qs state", fndecl, "za"); ++ return false; ++ } ++ + static const struct { + aarch64_feature_flags flag; + const char *name; +@@ -780,9 +907,13 @@ report_out_of_range (location_t location, tree fndecl, unsigned int argno, + HOST_WIDE_INT actual, HOST_WIDE_INT min, + HOST_WIDE_INT max) + { +- error_at (location, "passing %wd to argument %d of %qE, which expects" +- " a value in the range %wd, %wd", actual, argno + 1, fndecl, +- min, max); ++ if (min == max) ++ error_at (location, "passing %wd to argument %d of %qE, which expects" ++ " the value %wd", actual, argno + 1, fndecl, min); ++ else ++ error_at (location, "passing %wd to argument %d of %qE, which expects" ++ " a value in the range %wd, %wd", actual, argno + 1, fndecl, ++ min, max); + } + + /* Report that LOCATION has a call to FNDECL in which argument ARGNO has +@@ -869,7 +1000,7 @@ function_instance::reads_global_state_p () const + return true; + + /* Handle direct reads of global state. */ +- return flags & (CP_READ_MEMORY | CP_READ_FFR); ++ return flags & (CP_READ_MEMORY | CP_READ_FFR | CP_READ_ZA); + } + + /* Return true if calls to the function could modify some form of +@@ -890,7 +1021,7 @@ function_instance::modifies_global_state_p () const + return true; + + /* Handle direct modifications of global state. */ +- return flags & (CP_WRITE_MEMORY | CP_WRITE_FFR); ++ return flags & (CP_WRITE_MEMORY | CP_WRITE_FFR | CP_WRITE_ZA); + } + + /* Return true if calls to the function could raise a signal. */ +@@ -922,8 +1053,8 @@ registered_function_hasher::equal (value_type value, const compare_type &key) + return value->instance == key; + } + +-sve_switcher::sve_switcher () +- : aarch64_simd_switcher (AARCH64_FL_F16 | AARCH64_FL_SVE) ++sve_switcher::sve_switcher (aarch64_feature_flags flags) ++ : aarch64_simd_switcher (AARCH64_FL_F16 | AARCH64_FL_SVE | flags) + { + /* Changing the ISA flags and have_regs_of_mode should be enough here. + We shouldn't need to pay the compile-time cost of a full target +@@ -979,6 +1110,10 @@ char * + function_builder::get_name (const function_instance &instance, + bool overloaded_p) + { ++ /* __arm_* functions are listed as arm_*, so that the associated GCC ++ code is not in the implementation namespace. */ ++ if (strncmp (instance.base_name, "arm_", 4) == 0) ++ append_name ("__"); + append_name (instance.base_name); + if (overloaded_p) + switch (instance.displacement_units ()) +@@ -1016,12 +1151,72 @@ add_attribute (const char *name, tree attrs) + return tree_cons (get_identifier (name), NULL_TREE, attrs); + } + +-/* Return the appropriate function attributes for INSTANCE. */ ++/* Add attribute NS::NAME to ATTRS. */ ++static tree ++add_attribute (const char *ns, const char *name, tree value, tree attrs) ++{ ++ return tree_cons (build_tree_list (get_identifier (ns), ++ get_identifier (name)), ++ value, attrs); ++} ++ ++/* Attribute arm::NAME describes shared state that is an input if IS_IN ++ and an output if IS_OUT. Check whether a call with call properties ++ CALL_FLAGS needs such an attribute. Add it to in-progress attribute ++ list ATTRS if so. Return the new attribute list. */ ++static tree ++add_shared_state_attribute (const char *name, bool is_in, bool is_out, ++ unsigned int call_flags, tree attrs) ++{ ++ struct state_flag_info ++ { ++ const char *name; ++ unsigned int read_flag; ++ unsigned int write_flag; ++ }; ++ static state_flag_info state_flags = ++ { ++ { "za", CP_READ_ZA, CP_WRITE_ZA } ++ }; ++ ++ tree args = NULL_TREE; ++ for (const auto &state_flag : state_flags) ++ { ++ auto all_flags = state_flag.read_flag | state_flag.write_flag; ++ auto these_flags = ((is_in ? state_flag.read_flag : 0) ++ | (is_out ? state_flag.write_flag : 0)); ++ if ((call_flags & all_flags) == these_flags) ++ { ++ tree value = build_string (strlen (state_flag.name) + 1, ++ state_flag.name); ++ args = tree_cons (NULL_TREE, value, args); ++ } ++ } ++ if (args) ++ attrs = add_attribute ("arm", name, args, attrs); ++ return attrs; ++} ++ ++/* Return the appropriate function attributes for INSTANCE, which requires ++ the feature flags in REQUIRED_EXTENSIONS. */ + tree +-function_builder::get_attributes (const function_instance &instance) ++function_builder::get_attributes (const function_instance &instance, ++ aarch64_feature_flags required_extensions) + { + tree attrs = NULL_TREE; + ++ if (required_extensions & AARCH64_FL_SM_ON) ++ attrs = add_attribute ("arm", "streaming", NULL_TREE, attrs); ++ else if (!(required_extensions & AARCH64_FL_SM_OFF)) ++ attrs = add_attribute ("arm", "streaming_compatible", NULL_TREE, attrs); ++ ++ attrs = add_shared_state_attribute ("in", true, false, ++ instance.call_properties (), attrs); ++ attrs = add_shared_state_attribute ("out", false, true, ++ instance.call_properties (), attrs); ++ attrs = add_shared_state_attribute ("inout", true, true, ++ instance.call_properties (), attrs); ++ + if (!instance.modifies_global_state_p ()) + { + if (instance.reads_global_state_p ()) +@@ -1097,7 +1292,7 @@ add_unique_function (const function_instance &instance, + tree fntype = build_function_type_array (return_type, + argument_types.length (), + argument_types.address ()); +- tree attrs = get_attributes (instance); ++ tree attrs = get_attributes (instance, required_extensions); + registered_function &rfn = add_function (instance, name, fntype, attrs, + required_extensions, false, false); + +@@ -1114,7 +1309,7 @@ add_unique_function (const function_instance &instance, + if (strcmp (name, overload_name) != 0) + { + /* Attribute lists shouldn't be shared. */ +- tree attrs = get_attributes (instance); ++ tree attrs = get_attributes (instance, required_extensions); + bool placeholder_p = !(m_direct_overloads || force_direct_overloads); + add_function (instance, overload_name, fntype, attrs, + required_extensions, false, placeholder_p); +@@ -2283,6 +2478,7 @@ bool + function_resolver::check_gp_argument (unsigned int nops, + unsigned int &i, unsigned int &nargs) + { ++ gcc_assert (pred != PRED_za_m); + i = 0; + if (pred != PRED_none) + { +@@ -2488,9 +2684,7 @@ function_checker::function_checker (location_t location, + unsigned int nargs, tree *args) + : function_call_info (location, instance, fndecl), + m_fntype (fntype), m_nargs (nargs), m_args (args), +- /* We don't have to worry about unary _m operations here, since they +- never have arguments that need checking. */ +- m_base_arg (pred != PRED_none ? 1 : 0) ++ m_base_arg (pred != PRED_none && pred != PRED_za_m ? 1 : 0) + { + } + +@@ -2889,21 +3083,51 @@ function_expander::convert_to_pmode (rtx x) + } + + /* Return the base address for a contiguous load or store function. +- MEM_MODE is the mode of the addressed memory. */ ++ MEM_MODE is the mode of the addressed memory, BASE_ARGNO is ++ the index of the base argument, and VNUM_ARGNO is the index of ++ the vnum offset argument (if any). VL_ISA_MODE is AARCH64_FL_SM_ON ++ if the vnum argument is a factor of the SME vector length, 0 if it ++ is a factor of the current prevailing vector length. */ + rtx +-function_expander::get_contiguous_base (machine_mode mem_mode) ++function_expander::get_contiguous_base (machine_mode mem_mode, ++ unsigned int base_argno, ++ unsigned int vnum_argno, ++ aarch64_feature_flags vl_isa_mode) + { +- rtx base = convert_to_pmode (args1); ++ rtx base = convert_to_pmode (argsbase_argno); + if (mode_suffix_id == MODE_vnum) + { +- /* Use the size of the memory mode for extending loads and truncating +- stores. Use the size of a full vector for non-extending loads +- and non-truncating stores (including svld234 and svst234). */ +- poly_int64 size = ordered_min (GET_MODE_SIZE (mem_mode), +- BYTES_PER_SVE_VECTOR); +- rtx offset = gen_int_mode (size, Pmode); +- offset = simplify_gen_binary (MULT, Pmode, args2, offset); +- base = simplify_gen_binary (PLUS, Pmode, base, offset); ++ rtx vnum = argsvnum_argno; ++ if (vnum != const0_rtx) ++ { ++ /* Use the size of the memory mode for extending loads and truncating ++ stores. Use the size of a full vector for non-extending loads ++ and non-truncating stores (including svld234 and svst234). */ ++ poly_int64 size = ordered_min (GET_MODE_SIZE (mem_mode), ++ BYTES_PER_SVE_VECTOR); ++ rtx offset; ++ if ((vl_isa_mode & AARCH64_FL_SM_ON) ++ && !TARGET_STREAMING ++ && !size.is_constant ()) ++ { ++ gcc_assert (known_eq (size, BYTES_PER_SVE_VECTOR)); ++ if (CONST_INT_P (vnum) && IN_RANGE (INTVAL (vnum), -32, 31)) ++ offset = aarch64_sme_vq_immediate (Pmode, INTVAL (vnum) * 16, ++ AARCH64_ISA_MODE); ++ else ++ { ++ offset = aarch64_sme_vq_immediate (Pmode, 16, ++ AARCH64_ISA_MODE); ++ offset = simplify_gen_binary (MULT, Pmode, vnum, offset); ++ } ++ } ++ else ++ { ++ offset = gen_int_mode (size, Pmode); ++ offset = simplify_gen_binary (MULT, Pmode, vnum, offset); ++ } ++ base = simplify_gen_binary (PLUS, Pmode, base, offset); ++ } + } + return base; + } +@@ -2991,11 +3215,18 @@ function_expander::add_input_operand (insn_code icode, rtx x) + machine_mode mode = operand.mode; + if (mode == VOIDmode) + { +- /* The only allowable use of VOIDmode is the wildcard +- aarch64_any_register_operand, which is used to avoid +- combinatorial explosion in the reinterpret patterns. */ +- gcc_assert (operand.predicate == aarch64_any_register_operand); +- mode = GET_MODE (x); ++ /* The only allowable uses of VOIDmode are: ++ ++ - the wildcard aarch64_any_register_operand, which is used ++ to avoid combinatorial explosion in the reinterpret patterns ++ ++ - pmode_register_operand, which always has mode Pmode. */ ++ if (operand.predicate == aarch64_any_register_operand) ++ mode = GET_MODE (x); ++ else if (operand.predicate == pmode_register_operand) ++ mode = Pmode; ++ else ++ gcc_unreachable (); + } + else if (!VECTOR_MODE_P (GET_MODE (x)) && VECTOR_MODE_P (mode)) + x = expand_vector_broadcast (mode, x); +@@ -3010,7 +3241,7 @@ function_expander::add_input_operand (insn_code icode, rtx x) + + /* Add an integer operand with value X to the instruction. */ + void +-function_expander::add_integer_operand (HOST_WIDE_INT x) ++function_expander::add_integer_operand (poly_int64 x) + { + m_ops.safe_grow (m_ops.length () + 1, true); + create_integer_operand (&m_ops.last (), x); +@@ -3555,7 +3786,10 @@ init_builtins () + sve_switcher sve; + register_builtin_types (); + if (in_lto_p) +- handle_arm_sve_h (); ++ { ++ handle_arm_sve_h (); ++ handle_arm_sme_h (); ++ } + } + + /* Register vector type TYPE under its arm_sve.h name. */ +@@ -3705,7 +3939,8 @@ handle_arm_sve_h () + function_table = new hash_table<registered_function_hasher> (1023); + function_builder builder; + for (unsigned int i = 0; i < ARRAY_SIZE (function_groups); ++i) +- builder.register_function_group (function_groupsi); ++ if (!(function_groupsi.required_extensions & AARCH64_FL_SME)) ++ builder.register_function_group (function_groupsi); + } + + /* Return the function decl with SVE function subcode CODE, or error_mark_node +@@ -3718,6 +3953,33 @@ builtin_decl (unsigned int code, bool) + return (*registered_functions)code->decl; + } + ++/* Implement #pragma GCC aarch64 "arm_sme.h". */ ++void ++handle_arm_sme_h () ++{ ++ if (!function_table) ++ { ++ error ("%qs defined without first defining %qs", ++ "arm_sme.h", "arm_sve.h"); ++ return; ++ } ++ ++ static bool initialized_p; ++ if (initialized_p) ++ { ++ error ("duplicate definition of %qs", "arm_sme.h"); ++ return; ++ } ++ initialized_p = true; ++ ++ sme_switcher sme; ++ ++ function_builder builder; ++ for (unsigned int i = 0; i < ARRAY_SIZE (function_groups); ++i) ++ if (function_groupsi.required_extensions & AARCH64_FL_SME) ++ builder.register_function_group (function_groupsi); ++} ++ + /* If we're implementing manual overloading, check whether the SVE + function with subcode CODE is overloaded, and if so attempt to + determine the corresponding non-overloaded function. The call +diff --git a/gcc/config/aarch64/aarch64-sve-builtins.def b/gcc/config/aarch64/aarch64-sve-builtins.def +index be10b5ea1..69c11b1d0 100644 +--- a/gcc/config/aarch64/aarch64-sve-builtins.def ++++ b/gcc/config/aarch64/aarch64-sve-builtins.def +@@ -29,6 +29,10 @@ + #define DEF_SVE_TYPE_SUFFIX(A, B, C, D, E) + #endif + ++#ifndef DEF_SME_ZA_SUFFIX ++#define DEF_SME_ZA_SUFFIX(A, B, C) ++#endif ++ + #ifndef DEF_SVE_GROUP_SUFFIX + #define DEF_SVE_GROUP_SUFFIX(A, B, C) + #endif +@@ -42,6 +46,16 @@ + DEF_SVE_FUNCTION_GS (NAME, SHAPE, TYPES, none, PREDS) + #endif + ++#ifndef DEF_SME_ZA_FUNCTION_GS ++#define DEF_SME_ZA_FUNCTION_GS(NAME, SHAPE, TYPES, GROUP, PREDS) \ ++ DEF_SVE_FUNCTION_GS(NAME, SHAPE, TYPES, GROUP, PREDS) ++#endif ++ ++#ifndef DEF_SME_ZA_FUNCTION ++#define DEF_SME_ZA_FUNCTION(NAME, SHAPE, TYPES, PREDS) \ ++ DEF_SME_ZA_FUNCTION_GS (NAME, SHAPE, TYPES, none, PREDS) ++#endif ++ + DEF_SVE_MODE (n, none, none, none) + DEF_SVE_MODE (index, none, none, elements) + DEF_SVE_MODE (offset, none, none, bytes) +@@ -104,16 +118,30 @@ DEF_SVE_TYPE_SUFFIX (u16, svuint16_t, unsigned, 16, VNx8HImode) + DEF_SVE_TYPE_SUFFIX (u32, svuint32_t, unsigned, 32, VNx4SImode) + DEF_SVE_TYPE_SUFFIX (u64, svuint64_t, unsigned, 64, VNx2DImode) + ++/* Associate _za with bytes. This is needed for svldr_vnum_za and ++ svstr_vnum_za, whose ZA offset can be in the range 0, 15, as for za8. */ ++DEF_SME_ZA_SUFFIX (za, 8, VNx16QImode) ++ ++DEF_SME_ZA_SUFFIX (za8, 8, VNx16QImode) ++DEF_SME_ZA_SUFFIX (za16, 16, VNx8HImode) ++DEF_SME_ZA_SUFFIX (za32, 32, VNx4SImode) ++DEF_SME_ZA_SUFFIX (za64, 64, VNx2DImode) ++DEF_SME_ZA_SUFFIX (za128, 128, VNx1TImode) ++ + DEF_SVE_GROUP_SUFFIX (x2, 0, 2) + DEF_SVE_GROUP_SUFFIX (x3, 0, 3) + DEF_SVE_GROUP_SUFFIX (x4, 0, 4) + + #include "aarch64-sve-builtins-base.def" + #include "aarch64-sve-builtins-sve2.def" ++#include "aarch64-sve-builtins-sme.def" + ++#undef DEF_SME_ZA_FUNCTION + #undef DEF_SVE_FUNCTION ++#undef DEF_SME_ZA_FUNCTION_GS + #undef DEF_SVE_FUNCTION_GS + #undef DEF_SVE_GROUP_SUFFIX ++#undef DEF_SME_ZA_SUFFIX + #undef DEF_SVE_TYPE_SUFFIX + #undef DEF_SVE_TYPE + #undef DEF_SVE_MODE +diff --git a/gcc/config/aarch64/aarch64-sve-builtins.h b/gcc/config/aarch64/aarch64-sve-builtins.h +index f16ac3947..6ef6bb93f 100644 +--- a/gcc/config/aarch64/aarch64-sve-builtins.h ++++ b/gcc/config/aarch64/aarch64-sve-builtins.h +@@ -97,6 +97,8 @@ const unsigned int CP_PREFETCH_MEMORY = 1U << 3; + const unsigned int CP_WRITE_MEMORY = 1U << 4; + const unsigned int CP_READ_FFR = 1U << 5; + const unsigned int CP_WRITE_FFR = 1U << 6; ++const unsigned int CP_READ_ZA = 1U << 7; ++const unsigned int CP_WRITE_ZA = 1U << 8; + + /* Enumerates the SVE predicate and (data) vector types, together called + "vector types" for brevity. */ +@@ -142,6 +144,10 @@ enum predication_index + /* Zero predication: set inactive lanes of the vector result to zero. */ + PRED_z, + ++ /* Merging predication for SME's ZA: merge into slices of the array ++ instead of overwriting the whole slices. */ ++ PRED_za_m, ++ + NUM_PREDS + }; + +@@ -176,6 +182,8 @@ enum type_suffix_index + { + #define DEF_SVE_TYPE_SUFFIX(NAME, ACLE_TYPE, CLASS, BITS, MODE) \ + TYPE_SUFFIX_ ## NAME, ++#define DEF_SME_ZA_SUFFIX(NAME, BITS, MODE) \ ++ TYPE_SUFFIX_ ## NAME, + #include "aarch64-sve-builtins.def" + NUM_TYPE_SUFFIXES + }; +@@ -240,9 +248,13 @@ struct type_suffix_info + unsigned int unsigned_p : 1; + /* True if the suffix is for a floating-point type. */ + unsigned int float_p : 1; ++ /* True if the suffix is for a vector type (integer or float). */ ++ unsigned int vector_p : 1; + /* True if the suffix is for a boolean type. */ + unsigned int bool_p : 1; +- unsigned int spare : 12; ++ /* True if the suffix is for SME's ZA. */ ++ unsigned int za_p : 1; ++ unsigned int spare : 10; + + /* The associated vector or predicate mode. */ + machine_mode vector_mode : 16; +@@ -356,13 +368,15 @@ public: + tree displacement_vector_type () const; + units_index displacement_units () const; + ++ unsigned int num_za_tiles () const; ++ + const type_suffix_info &type_suffix (unsigned int) const; + const group_suffix_info &group_suffix () const; + + tree scalar_type (unsigned int) const; + tree vector_type (unsigned int) const; + tree tuple_type (unsigned int) const; +- unsigned int elements_per_vq (unsigned int i) const; ++ unsigned int elements_per_vq (unsigned int) const; + machine_mode vector_mode (unsigned int) const; + machine_mode tuple_mode (unsigned int) const; + machine_mode gp_mode (unsigned int) const; +@@ -401,7 +415,7 @@ private: + + char *get_name (const function_instance &, bool); + +- tree get_attributes (const function_instance &); ++ tree get_attributes (const function_instance &, aarch64_feature_flags); + + registered_function &add_function (const function_instance &, + const char *, tree, tree, +@@ -605,7 +619,8 @@ public: + bool overlaps_input_p (rtx); + + rtx convert_to_pmode (rtx); +- rtx get_contiguous_base (machine_mode); ++ rtx get_contiguous_base (machine_mode, unsigned int = 1, unsigned int = 2, ++ aarch64_feature_flags = 0); + rtx get_fallback_value (machine_mode, unsigned int, + unsigned int, unsigned int &); + rtx get_reg_target (); +@@ -613,7 +628,7 @@ public: + + void add_output_operand (insn_code); + void add_input_operand (insn_code, rtx); +- void add_integer_operand (HOST_WIDE_INT); ++ void add_integer_operand (poly_int64); + void add_mem_operand (machine_mode, rtx); + void add_address_operand (rtx); + void add_fixed_operand (rtx); +@@ -738,7 +753,7 @@ public: + class sve_switcher : public aarch64_simd_switcher + { + public: +- sve_switcher (); ++ sve_switcher (aarch64_feature_flags = 0); + ~sve_switcher (); + + private: +@@ -746,11 +761,18 @@ private: + bool m_old_have_regs_of_modeMAX_MACHINE_MODE; + }; + ++/* Extends sve_switch enough for defining arm_sme.h. */ ++class sme_switcher : public sve_switcher ++{ ++public: ++ sme_switcher () : sve_switcher (AARCH64_FL_SME) {} ++}; ++ + extern const type_suffix_info type_suffixesNUM_TYPE_SUFFIXES + 1; + extern const mode_suffix_info mode_suffixesMODE_none + 1; + extern const group_suffix_info group_suffixesNUM_GROUP_SUFFIXES; + +-extern tree scalar_typesNUM_VECTOR_TYPES; ++extern tree scalar_typesNUM_VECTOR_TYPES + 1; + extern tree acle_vector_typesMAX_TUPLE_SIZENUM_VECTOR_TYPES + 1; + extern tree acle_svpattern; + extern tree acle_svprfop; +@@ -883,6 +905,16 @@ function_instance::displacement_vector_type () const + return acle_vector_types0mode_suffix ().displacement_vector_type; + } + ++/* Return the number of ZA tiles associated with the _za<N> suffix ++ (which is always the first type suffix). */ ++inline unsigned int ++function_instance::num_za_tiles () const ++{ ++ auto &suffix = type_suffix (0); ++ gcc_checking_assert (suffix.za_p); ++ return suffix.element_bytes; ++} ++ + /* If the function takes a vector or scalar displacement, return the units + in which the displacement is measured, otherwise return UNITS_none. */ + inline units_index +diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc +index 112dfeabb..113784e31 100644 +--- a/gcc/config/aarch64/aarch64.cc ++++ b/gcc/config/aarch64/aarch64.cc +@@ -6160,15 +6160,26 @@ aarch64_output_sve_scalar_inc_dec (rtx offset) + } + + /* Return true if a single RDVL instruction can multiply FACTOR by the +- number of 128-bit quadwords in an SVE vector. */ ++ number of 128-bit quadwords in an SVE vector. This is also the ++ range of ADDVL. */ + + static bool +-aarch64_sve_rdvl_factor_p (HOST_WIDE_INT factor) ++aarch64_sve_rdvl_addvl_factor_p (HOST_WIDE_INT factor) + { + return (multiple_p (factor, 16) + && IN_RANGE (factor, -32 * 16, 31 * 16)); + } + ++/* Return true if ADDPL can be used to add FACTOR multiplied by the number ++ of quadwords in an SVE vector. */ ++ ++static bool ++aarch64_sve_addpl_factor_p (HOST_WIDE_INT factor) ++{ ++ return (multiple_p (factor, 2) ++ && IN_RANGE (factor, -32 * 2, 31 * 2)); ++} ++ + /* Return true if we can move VALUE into a register using a single + RDVL instruction. */ + +@@ -6176,7 +6187,7 @@ static bool + aarch64_sve_rdvl_immediate_p (poly_int64 value) + { + HOST_WIDE_INT factor = value.coeffs0; +- return value.coeffs1 == factor && aarch64_sve_rdvl_factor_p (factor); ++ return value.coeffs1 == factor && aarch64_sve_rdvl_addvl_factor_p (factor); + } + + /* Likewise for rtx X. */ +@@ -6212,10 +6223,8 @@ aarch64_sve_addvl_addpl_immediate_p (poly_int64 value) + HOST_WIDE_INT factor = value.coeffs0; + if (factor == 0 || value.coeffs1 != factor) + return false; +- /* FACTOR counts VG / 2, so a value of 2 is one predicate width +- and a value of 16 is one vector width. */ +- return (((factor & 15) == 0 && IN_RANGE (factor, -32 * 16, 31 * 16)) +- || ((factor & 1) == 0 && IN_RANGE (factor, -32 * 2, 31 * 2))); ++ return (aarch64_sve_rdvl_addvl_factor_p (factor) ++ || aarch64_sve_addpl_factor_p (factor)); + } + + /* Likewise for rtx X. */ +@@ -6315,11 +6324,11 @@ aarch64_output_sve_vector_inc_dec (const char *operands, rtx x) + number of 128-bit quadwords in an SME vector. ISA_MODE is the + ISA mode in which the calculation is being performed. */ + +-static rtx ++rtx + aarch64_sme_vq_immediate (machine_mode mode, HOST_WIDE_INT factor, + aarch64_feature_flags isa_mode) + { +- gcc_assert (aarch64_sve_rdvl_factor_p (factor)); ++ gcc_assert (aarch64_sve_rdvl_addvl_factor_p (factor)); + if (isa_mode & AARCH64_FL_SM_ON) + /* We're in streaming mode, so we can use normal poly-int values. */ + return gen_int_mode ({ factor, factor }, mode); +@@ -6362,7 +6371,7 @@ aarch64_rdsvl_immediate_p (const_rtx x) + { + HOST_WIDE_INT factor; + return (aarch64_sme_vq_unspec_p (x, &factor) +- && aarch64_sve_rdvl_factor_p (factor)); ++ && aarch64_sve_rdvl_addvl_factor_p (factor)); + } + + /* Return the asm string for an RDSVL instruction that calculates X, +@@ -6379,6 +6388,38 @@ aarch64_output_rdsvl (const_rtx x) + return buffer; + } + ++/* Return true if X is a constant that can be added using ADDSVL or ADDSPL. */ ++ ++bool ++aarch64_addsvl_addspl_immediate_p (const_rtx x) ++{ ++ HOST_WIDE_INT factor; ++ return (aarch64_sme_vq_unspec_p (x, &factor) ++ && (aarch64_sve_rdvl_addvl_factor_p (factor) ++ || aarch64_sve_addpl_factor_p (factor))); ++} ++ ++/* X is a constant that satisfies aarch64_addsvl_addspl_immediate_p. ++ Return the asm string for the associated instruction. */ ++ ++char * ++aarch64_output_addsvl_addspl (rtx x) ++{ ++ static char buffersizeof ("addspl\t%x0, %x1, #-") + 3 * sizeof (int); ++ HOST_WIDE_INT factor; ++ if (!aarch64_sme_vq_unspec_p (x, &factor)) ++ gcc_unreachable (); ++ if (aarch64_sve_rdvl_addvl_factor_p (factor)) ++ snprintf (buffer, sizeof (buffer), "addsvl\t%%x0, %%x1, #%d", ++ (int) factor / 16); ++ else if (aarch64_sve_addpl_factor_p (factor)) ++ snprintf (buffer, sizeof (buffer), "addspl\t%%x0, %%x1, #%d", ++ (int) factor / 2); ++ else ++ gcc_unreachable (); ++ return buffer; ++} ++ + /* Multipliers for repeating bitmasks of width 32, 16, 8, 4, and 2. */ + + static const unsigned HOST_WIDE_INT bitmask_imm_mul = +@@ -6965,7 +7006,7 @@ aarch64_add_offset (scalar_int_mode mode, rtx dest, rtx src, + { + /* Try to use an unshifted CNTBHWD or RDVL. */ + if (aarch64_sve_cnt_factor_p (factor) +- || aarch64_sve_rdvl_factor_p (factor)) ++ || aarch64_sve_rdvl_addvl_factor_p (factor)) + { + val = gen_int_mode (poly_int64 (factor, factor), mode); + shift = 0; +@@ -12185,7 +12226,7 @@ aarch64_classify_index (struct aarch64_address_info *info, rtx x, + && contains_reg_of_modeGENERAL_REGSGET_MODE (SUBREG_REG (index))) + index = SUBREG_REG (index); + +- if (aarch64_sve_data_mode_p (mode)) ++ if (aarch64_sve_data_mode_p (mode) || mode == VNx1TImode) + { + if (type != ADDRESS_REG_REG + || (1 << shift) != GET_MODE_UNIT_SIZE (mode)) +@@ -12288,7 +12329,8 @@ aarch64_classify_address (struct aarch64_address_info *info, + && ((vec_flags == 0 + && known_lt (GET_MODE_SIZE (mode), 16)) + || vec_flags == VEC_ADVSIMD +- || vec_flags & VEC_SVE_DATA)); ++ || vec_flags & VEC_SVE_DATA ++ || mode == VNx1TImode)); + + /* For SVE, only accept Rn, Rn, #offset, MUL VL and Rn, Rm, LSL #shift. + The latter is not valid for SVE predicates, and that's rejected through +@@ -12407,7 +12449,7 @@ aarch64_classify_address (struct aarch64_address_info *info, + /* Make "m" use the LD1 offset range for SVE data modes, so + that pre-RTL optimizers like ivopts will work to that + instead of the wider LDR/STR range. */ +- if (vec_flags == VEC_SVE_DATA) ++ if (vec_flags == VEC_SVE_DATA || mode == VNx1TImode) + return (type == ADDR_QUERY_M + ? offset_4bit_signed_scaled_p (mode, offset) + : offset_9bit_signed_scaled_p (mode, offset)); +@@ -14750,6 +14792,51 @@ aarch64_output_casesi (rtx *operands) + return ""; + } + ++/* Return the asm string for an SME ZERO instruction whose 8-bit mask ++ operand is MASK. */ ++const char * ++aarch64_output_sme_zero_za (rtx mask) ++{ ++ auto mask_val = UINTVAL (mask); ++ if (mask_val == 0) ++ return "zero\t{}"; ++ ++ if (mask_val == 0xff) ++ return "zero\t{ za }"; ++ ++ static constexpr std::pair<unsigned int, char> tiles = { ++ { 0xff, 'b' }, ++ { 0x55, 'h' }, ++ { 0x11, 's' }, ++ { 0x01, 'd' } ++ }; ++ /* The last entry in the list has the form "za7.d }", but that's the ++ same length as "za7.d, ". */ ++ static char buffersizeof("zero\t{ ") + sizeof ("za7.d, ") * 8 + 1; ++ unsigned int i = 0; ++ i += snprintf (buffer + i, sizeof (buffer) - i, "zero\t"); ++ const char *prefix = "{ "; ++ for (auto &tile : tiles) ++ { ++ auto tile_mask = tile.first; ++ unsigned int tile_index = 0; ++ while (tile_mask < 0x100) ++ { ++ if ((mask_val & tile_mask) == tile_mask) ++ { ++ i += snprintf (buffer + i, sizeof (buffer) - i, "%sza%d.%c", ++ prefix, tile_index, tile.second); ++ prefix = ", "; ++ mask_val &= ~tile_mask; ++ } ++ tile_mask <<= 1; ++ tile_index += 1; ++ } ++ } ++ gcc_assert (mask_val == 0 && i + 3 <= sizeof (buffer)); ++ snprintf (buffer + i, sizeof (buffer) - i, " }"); ++ return buffer; ++} + + /* Return size in bits of an arithmetic operand which is shifted/scaled and + masked such that it is suitable for a UXTB, UXTH, or UXTW extend +@@ -23756,6 +23843,31 @@ aarch64_sve_struct_memory_operand_p (rtx op) + && offset_4bit_signed_scaled_p (SVE_BYTE_MODE, last)); + } + ++/* Return true if OFFSET is a constant integer and if VNUM is ++ OFFSET * the number of bytes in an SVE vector. This is the requirement ++ that exists in SME LDR and STR instructions, where the VL offset must ++ equal the ZA slice offset. */ ++bool ++aarch64_sme_ldr_vnum_offset_p (rtx offset, rtx vnum) ++{ ++ if (!CONST_INT_P (offset) || !IN_RANGE (INTVAL (offset), 0, 15)) ++ return false; ++ ++ if (TARGET_STREAMING) ++ { ++ poly_int64 const_vnum; ++ return (poly_int_rtx_p (vnum, &const_vnum) ++ && known_eq (const_vnum, ++ INTVAL (offset) * BYTES_PER_SVE_VECTOR)); ++ } ++ else ++ { ++ HOST_WIDE_INT factor; ++ return (aarch64_sme_vq_unspec_p (vnum, &factor) ++ && factor == INTVAL (offset) * 16); ++ } ++} ++ + /* Emit a register copy from operand to operand, taking care not to + early-clobber source registers in the process. + +diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h +index 8b21faf34..50fdf2f50 100644 +--- a/gcc/config/aarch64/aarch64.h ++++ b/gcc/config/aarch64/aarch64.h +@@ -207,6 +207,7 @@ constexpr auto AARCH64_FL_DEFAULT_ISA_MODE = AARCH64_FL_SM_OFF; + /* Macros to test ISA flags. */ + + #define AARCH64_ISA_SM_OFF (aarch64_isa_flags & AARCH64_FL_SM_OFF) ++#define AARCH64_ISA_SM_ON (aarch64_isa_flags & AARCH64_FL_SM_ON) + #define AARCH64_ISA_ZA_ON (aarch64_isa_flags & AARCH64_FL_ZA_ON) + #define AARCH64_ISA_MODE (aarch64_isa_flags & AARCH64_FL_ISA_MODES) + #define AARCH64_ISA_CRC (aarch64_isa_flags & AARCH64_FL_CRC) +@@ -224,6 +225,8 @@ constexpr auto AARCH64_FL_DEFAULT_ISA_MODE = AARCH64_FL_SM_OFF; + #define AARCH64_ISA_SVE2_SHA3 (aarch64_isa_flags & AARCH64_FL_SVE2_SHA3) + #define AARCH64_ISA_SVE2_SM4 (aarch64_isa_flags & AARCH64_FL_SVE2_SM4) + #define AARCH64_ISA_SME (aarch64_isa_flags & AARCH64_FL_SME) ++#define AARCH64_ISA_SME_I16I64 (aarch64_isa_flags & AARCH64_FL_SME_I16I64) ++#define AARCH64_ISA_SME_F64F64 (aarch64_isa_flags & AARCH64_FL_SME_F64F64) + #define AARCH64_ISA_V8_3A (aarch64_isa_flags & AARCH64_FL_V8_3A) + #define AARCH64_ISA_DOTPROD (aarch64_isa_flags & AARCH64_FL_DOTPROD) + #define AARCH64_ISA_AES (aarch64_isa_flags & AARCH64_FL_AES) +@@ -256,6 +259,9 @@ constexpr auto AARCH64_FL_DEFAULT_ISA_MODE = AARCH64_FL_SM_OFF; + /* The current function is a normal non-streaming function. */ + #define TARGET_NON_STREAMING (AARCH64_ISA_SM_OFF) + ++/* The current function has a streaming body. */ ++#define TARGET_STREAMING (AARCH64_ISA_SM_ON) ++ + /* The current function has a streaming-compatible body. */ + #define TARGET_STREAMING_COMPATIBLE \ + ((aarch64_isa_flags & AARCH64_FL_SM_STATE) == 0) +@@ -316,6 +322,15 @@ constexpr auto AARCH64_FL_DEFAULT_ISA_MODE = AARCH64_FL_SM_OFF; + imply anything about the state of PSTATE.SM. */ + #define TARGET_SME (AARCH64_ISA_SME) + ++/* Streaming-mode SME instructions. */ ++#define TARGET_STREAMING_SME (TARGET_STREAMING && TARGET_SME) ++ ++/* The FEAT_SME_I16I64 extension to SME, enabled through +sme-i16i64. */ ++#define TARGET_SME_I16I64 (AARCH64_ISA_SME_I16I64) ++ ++/* The FEAT_SME_F64F64 extension to SME, enabled through +sme-f64f64. */ ++#define TARGET_SME_F64F64 (AARCH64_ISA_SME_F64F64) ++ + /* ARMv8.3-A features. */ + #define TARGET_ARMV8_3 (AARCH64_ISA_V8_3A) + +diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md +index 05a7c6675..6b4341866 100644 +--- a/gcc/config/aarch64/aarch64.md ++++ b/gcc/config/aarch64/aarch64.md +@@ -2147,10 +2147,10 @@ + + (define_insn "*add<mode>3_aarch64" + (set +- (match_operand:GPI 0 "register_operand" "=rk,rk,w,rk,r,r,rk") ++ (match_operand:GPI 0 "register_operand" "=rk,rk,w,rk,r,r,rk,rk") + (plus:GPI +- (match_operand:GPI 1 "register_operand" "%rk,rk,w,rk,rk,0,rk") +- (match_operand:GPI 2 "aarch64_pluslong_operand" "I,r,w,J,Uaa,Uai,Uav"))) ++ (match_operand:GPI 1 "register_operand" "%rk,rk,w,rk,rk,0,rk,rk") ++ (match_operand:GPI 2 "aarch64_pluslong_operand" "I,r,w,J,Uaa,Uai,Uav,UaV"))) + "" + "@ + add\\t%<w>0, %<w>1, %2 +@@ -2159,10 +2159,11 @@ + sub\\t%<w>0, %<w>1, #%n2 + # + * return aarch64_output_sve_scalar_inc_dec (operands2); +- * return aarch64_output_sve_addvl_addpl (operands2);" ++ * return aarch64_output_sve_addvl_addpl (operands2); ++ * return aarch64_output_addsvl_addspl (operands2);" + ;; The "alu_imm" types for INC/DEC and ADDVL/ADDPL are just placeholders. +- (set_attr "type" "alu_imm,alu_sreg,neon_add,alu_imm,multiple,alu_imm,alu_imm") +- (set_attr "arch" "*,*,simd,*,*,sve,sve") ++ (set_attr "type" "alu_imm,alu_sreg,neon_add,alu_imm,multiple,alu_imm,alu_imm,alu_imm") ++ (set_attr "arch" "*,*,simd,*,*,sve,sve,sme") + ) + + ;; zero_extend version of above +diff --git a/gcc/config/aarch64/arm_sme.h b/gcc/config/aarch64/arm_sme.h +new file mode 100644 +index 000000000..5ddd49f57 +--- /dev/null ++++ b/gcc/config/aarch64/arm_sme.h +@@ -0,0 +1,45 @@ ++/* AArch64 SME intrinsics include file. ++ Copyright (C) 2023 Free Software Foundation, Inc. ++ ++ This file is part of GCC. ++ ++ GCC is free software; you can redistribute it and/or modify it ++ under the terms of the GNU General Public License as published ++ by the Free Software Foundation; either version 3, or (at your ++ option) any later version. ++ ++ GCC is distributed in the hope that it will be useful, but WITHOUT ++ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY ++ or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public ++ License for more details. ++ ++ Under Section 7 of GPL version 3, you are granted additional ++ permissions described in the GCC Runtime Library Exception, version ++ 3.1, as published by the Free Software Foundation. ++ ++ You should have received a copy of the GNU General Public License and ++ a copy of the GCC Runtime Library Exception along with this program; ++ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see ++ <http://www.gnu.org/licenses/>. */ ++ ++#ifndef _ARM_SME_H_ ++#define _ARM_SME_H_ ++ ++#include <arm_sve.h> ++#pragma GCC aarch64 "arm_sme.h" ++ ++void __arm_za_disable(void) __arm_streaming_compatible; ++ ++void *__arm_sc_memcpy(void *, const void *, __SIZE_TYPE__) ++ __arm_streaming_compatible; ++ ++void *__arm_sc_memmove(void *, const void *, __SIZE_TYPE__) ++ __arm_streaming_compatible; ++ ++void *__arm_sc_memset(void *, int, __SIZE_TYPE__) ++ __arm_streaming_compatible; ++ ++void *__arm_sc_memchr(void *, int, __SIZE_TYPE__) ++ __arm_streaming_compatible; ++ ++#endif +diff --git a/gcc/config/aarch64/constraints.md b/gcc/config/aarch64/constraints.md +index 88fb9a07c..2da423779 100644 +--- a/gcc/config/aarch64/constraints.md ++++ b/gcc/config/aarch64/constraints.md +@@ -21,6 +21,9 @@ + (define_register_constraint "k" "STACK_REG" + "@internal The stack register.") + ++(define_register_constraint "Ucj" "W12_W15_REGS" ++ "@internal r12-r15, which can be used to index ZA.") ++ + (define_register_constraint "Ucs" "TAILCALL_ADDR_REGS" + "@internal Registers suitable for an indirect tail call") + +@@ -74,6 +77,12 @@ + a single ADDVL or ADDPL." + (match_operand 0 "aarch64_sve_addvl_addpl_immediate")) + ++(define_constraint "UaV" ++ "@internal ++ A constraint that matches a VG-based constant that can be added by ++ a single ADDSVL or ADDSPL." ++ (match_operand 0 "aarch64_addsvl_addspl_immediate")) ++ + (define_constraint "Uat" + "@internal + A constraint that matches a VG-based constant that can be added by +diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md +index b616f5c9a..152d28f6b 100644 +--- a/gcc/config/aarch64/iterators.md ++++ b/gcc/config/aarch64/iterators.md +@@ -450,6 +450,7 @@ + (define_mode_iterator VNx4SF_ONLY VNx4SF) + (define_mode_iterator VNx2DI_ONLY VNx2DI) + (define_mode_iterator VNx2DF_ONLY VNx2DF) ++(define_mode_iterator VNx1TI_ONLY VNx1TI) + + ;; All fully-packed SVE vector modes. + (define_mode_iterator SVE_FULL VNx16QI VNx8HI VNx4SI VNx2DI +@@ -608,6 +609,17 @@ + ;; Bfloat16 modes to which V4SF can be converted + (define_mode_iterator V4SF_TO_BF V4BF V8BF) + ++;; The modes used to represent different ZA access sizes. ++(define_mode_iterator SME_ZA_I VNx16QI VNx8HI VNx4SI VNx2DI VNx1TI) ++(define_mode_iterator SME_ZA_SDI VNx4SI (VNx2DI "TARGET_SME_I16I64")) ++ ++(define_mode_iterator SME_ZA_SDF_I VNx4SI (VNx2DI "TARGET_SME_F64F64")) ++ ++;; The modes for which outer product instructions are supported. ++(define_mode_iterator SME_MOP_BHI VNx16QI (VNx8HI "TARGET_SME_I16I64")) ++(define_mode_iterator SME_MOP_HSDF VNx8BF VNx8HF VNx4SF ++ (VNx2DF "TARGET_SME_F64F64")) ++ + ;; ------------------------------------------------------------------ + ;; Unspec enumerations for Advance SIMD. These could well go into + ;; aarch64.md but for their use in int_iterators here. +@@ -986,6 +998,28 @@ + UNSPEC_BFCVTN2 ; Used in aarch64-simd.md. + UNSPEC_BFCVT ; Used in aarch64-simd.md. + UNSPEC_FCVTXN ; Used in aarch64-simd.md. ++ ++ ;; All used in aarch64-sme.md ++ UNSPEC_SME_ADDHA ++ UNSPEC_SME_ADDVA ++ UNSPEC_SME_FMOPA ++ UNSPEC_SME_FMOPS ++ UNSPEC_SME_LD1_HOR ++ UNSPEC_SME_LD1_VER ++ UNSPEC_SME_READ_HOR ++ UNSPEC_SME_READ_VER ++ UNSPEC_SME_SMOPA ++ UNSPEC_SME_SMOPS ++ UNSPEC_SME_ST1_HOR ++ UNSPEC_SME_ST1_VER ++ UNSPEC_SME_SUMOPA ++ UNSPEC_SME_SUMOPS ++ UNSPEC_SME_UMOPA ++ UNSPEC_SME_UMOPS ++ UNSPEC_SME_USMOPA ++ UNSPEC_SME_USMOPS ++ UNSPEC_SME_WRITE_HOR ++ UNSPEC_SME_WRITE_VER + ) + + ;; ------------------------------------------------------------------ +@@ -1115,9 +1149,15 @@ + ;; element. + (define_mode_attr elem_bits (VNx16BI "8") (VNx8BI "16") + (VNx4BI "32") (VNx2BI "64") +- (VNx16QI "8") (VNx8HI "16") +- (VNx4SI "32") (VNx2DI "64") +- (VNx8HF "16") (VNx4SF "32") (VNx2DF "64")) ++ (VNx16QI "8") (VNx32QI "8") (VNx64QI "8") ++ (VNx8HI "16") (VNx16HI "16") (VNx32HI "16") ++ (VNx8HF "16") (VNx16HF "16") (VNx32HF "16") ++ (VNx8BF "16") (VNx16BF "16") (VNx32BF "16") ++ (VNx4SI "32") (VNx8SI "32") (VNx16SI "32") ++ (VNx4SF "32") (VNx8SF "32") (VNx16SF "32") ++ (VNx2DI "64") (VNx4DI "64") (VNx8DI "64") ++ (VNx2DF "64") (VNx4DF "64") (VNx8DF "64") ++ (VNx1TI "128")) + + ;; The number of bits in a vector container. + (define_mode_attr container_bits (VNx16QI "8") +@@ -1243,6 +1283,7 @@ + (VNx4SF "s") (VNx2SF "s") + (VNx2DI "d") + (VNx2DF "d") ++ (VNx1TI "q") + (BF "h") (V4BF "h") (V8BF "h") + (HF "h") + (SF "s") (DF "d") +@@ -1261,6 +1302,7 @@ + (VNx4SF "w") (VNx2SF "w") + (VNx2DI "d") + (VNx2DF "d") ++ (VNx1TI "q") + (VNx32QI "b") (VNx48QI "b") (VNx64QI "b") + (VNx16HI "h") (VNx24HI "h") (VNx32HI "h") + (VNx16HF "h") (VNx24HF "h") (VNx32HF "h") +@@ -2052,6 +2094,7 @@ + (VNx4SF "VNx4BI") (VNx2SF "VNx2BI") + (VNx2DI "VNx2BI") + (VNx2DF "VNx2BI") ++ (VNx1TI "VNx2BI") + (VNx32QI "VNx16BI") + (VNx16HI "VNx8BI") (VNx16HF "VNx8BI") + (VNx16BF "VNx8BI") +@@ -2132,6 +2175,8 @@ + ;; The constraint to use for an SVE FCMLA lane index. + (define_mode_attr sve_lane_pair_con (VNx8HF "y") (VNx4SF "x")) + ++(define_mode_attr b (VNx8BF "b") (VNx8HF "") (VNx4SF "") (VNx2DF "")) ++ + ;; ------------------------------------------------------------------- + ;; Code Iterators + ;; ------------------------------------------------------------------- +@@ -3159,6 +3204,20 @@ + (define_int_iterator FCMUL_OP UNSPEC_FCMUL + UNSPEC_FCMUL_CONJ) + ++(define_int_iterator SME_LD1 UNSPEC_SME_LD1_HOR UNSPEC_SME_LD1_VER) ++(define_int_iterator SME_READ UNSPEC_SME_READ_HOR UNSPEC_SME_READ_VER) ++(define_int_iterator SME_ST1 UNSPEC_SME_ST1_HOR UNSPEC_SME_ST1_VER) ++(define_int_iterator SME_WRITE UNSPEC_SME_WRITE_HOR UNSPEC_SME_WRITE_VER) ++ ++(define_int_iterator SME_BINARY_SDI UNSPEC_SME_ADDHA UNSPEC_SME_ADDVA) ++ ++(define_int_iterator SME_INT_MOP UNSPEC_SME_SMOPA UNSPEC_SME_SMOPS ++ UNSPEC_SME_SUMOPA UNSPEC_SME_SUMOPS ++ UNSPEC_SME_UMOPA UNSPEC_SME_UMOPS ++ UNSPEC_SME_USMOPA UNSPEC_SME_USMOPS) ++ ++(define_int_iterator SME_FP_MOP UNSPEC_SME_FMOPA UNSPEC_SME_FMOPS) ++ + ;; Iterators for atomic operations. + + (define_int_iterator ATOMIC_LDOP +@@ -3231,6 +3290,26 @@ + (UNSPEC_PMULLT "pmullt") + (UNSPEC_PMULLT_PAIR "pmullt_pair") + (UNSPEC_SMATMUL "smatmul") ++ (UNSPEC_SME_ADDHA "addha") ++ (UNSPEC_SME_ADDVA "addva") ++ (UNSPEC_SME_FMOPA "fmopa") ++ (UNSPEC_SME_FMOPS "fmops") ++ (UNSPEC_SME_LD1_HOR "ld1_hor") ++ (UNSPEC_SME_LD1_VER "ld1_ver") ++ (UNSPEC_SME_READ_HOR "read_hor") ++ (UNSPEC_SME_READ_VER "read_ver") ++ (UNSPEC_SME_SMOPA "smopa") ++ (UNSPEC_SME_SMOPS "smops") ++ (UNSPEC_SME_ST1_HOR "st1_hor") ++ (UNSPEC_SME_ST1_VER "st1_ver") ++ (UNSPEC_SME_SUMOPA "sumopa") ++ (UNSPEC_SME_SUMOPS "sumops") ++ (UNSPEC_SME_UMOPA "umopa") ++ (UNSPEC_SME_UMOPS "umops") ++ (UNSPEC_SME_USMOPA "usmopa") ++ (UNSPEC_SME_USMOPS "usmops") ++ (UNSPEC_SME_WRITE_HOR "write_hor") ++ (UNSPEC_SME_WRITE_VER "write_ver") + (UNSPEC_SQCADD90 "sqcadd90") + (UNSPEC_SQCADD270 "sqcadd270") + (UNSPEC_SQRDCMLAH "sqrdcmlah") +@@ -4000,6 +4079,15 @@ + (define_int_attr unspec (UNSPEC_WHILERW "UNSPEC_WHILERW") + (UNSPEC_WHILEWR "UNSPEC_WHILEWR")) + ++(define_int_attr hv (UNSPEC_SME_LD1_HOR "h") ++ (UNSPEC_SME_LD1_VER "v") ++ (UNSPEC_SME_READ_HOR "h") ++ (UNSPEC_SME_READ_VER "v") ++ (UNSPEC_SME_ST1_HOR "h") ++ (UNSPEC_SME_ST1_VER "v") ++ (UNSPEC_SME_WRITE_HOR "h") ++ (UNSPEC_SME_WRITE_VER "v")) ++ + ;; Iterators and attributes for fpcr fpsr getter setters + + (define_int_iterator GET_FPSCR +diff --git a/gcc/config/aarch64/predicates.md b/gcc/config/aarch64/predicates.md +index 1b8496c07..3ec9e9103 100644 +--- a/gcc/config/aarch64/predicates.md ++++ b/gcc/config/aarch64/predicates.md +@@ -212,11 +212,17 @@ + (and (match_code "const_poly_int") + (match_test "aarch64_add_offset_temporaries (op) == 1"))) + ++(define_predicate "aarch64_addsvl_addspl_immediate" ++ (and (match_code "const") ++ (match_test "aarch64_addsvl_addspl_immediate_p (op)"))) ++ + (define_predicate "aarch64_pluslong_operand" + (ior (match_operand 0 "register_operand") + (match_operand 0 "aarch64_pluslong_immediate") + (and (match_test "TARGET_SVE") +- (match_operand 0 "aarch64_sve_plus_immediate")))) ++ (match_operand 0 "aarch64_sve_plus_immediate")) ++ (and (match_test "TARGET_SME") ++ (match_operand 0 "aarch64_addsvl_addspl_immediate")))) + + (define_predicate "aarch64_pluslong_or_poly_operand" + (ior (match_operand 0 "aarch64_pluslong_operand") +diff --git a/gcc/config/aarch64/t-aarch64 b/gcc/config/aarch64/t-aarch64 +index 49731ba92..be60cc003 100644 +--- a/gcc/config/aarch64/t-aarch64 ++++ b/gcc/config/aarch64/t-aarch64 +@@ -63,6 +63,7 @@ aarch64-sve-builtins.o: $(srcdir)/config/aarch64/aarch64-sve-builtins.cc \ + $(srcdir)/config/aarch64/aarch64-sve-builtins.def \ + $(srcdir)/config/aarch64/aarch64-sve-builtins-base.def \ + $(srcdir)/config/aarch64/aarch64-sve-builtins-sve2.def \ ++ $(srcdir)/config/aarch64/aarch64-sve-builtins-sme.def \ + $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) $(TREE_H) $(RTL_H) \ + $(TM_P_H) memmodel.h insn-codes.h $(OPTABS_H) $(RECOG_H) $(DIAGNOSTIC_H) \ + $(EXPR_H) $(BASIC_BLOCK_H) $(FUNCTION_H) fold-const.h $(GIMPLE_H) \ +@@ -72,7 +73,8 @@ aarch64-sve-builtins.o: $(srcdir)/config/aarch64/aarch64-sve-builtins.cc \ + $(srcdir)/config/aarch64/aarch64-sve-builtins.h \ + $(srcdir)/config/aarch64/aarch64-sve-builtins-shapes.h \ + $(srcdir)/config/aarch64/aarch64-sve-builtins-base.h \ +- $(srcdir)/config/aarch64/aarch64-sve-builtins-sve2.h ++ $(srcdir)/config/aarch64/aarch64-sve-builtins-sve2.h \ ++ $(srcdir)/config/aarch64/aarch64-sve-builtins-sme.h + $(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \ + $(srcdir)/config/aarch64/aarch64-sve-builtins.cc + +@@ -113,6 +115,19 @@ aarch64-sve-builtins-sve2.o: \ + $(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \ + $(srcdir)/config/aarch64/aarch64-sve-builtins-sve2.cc + ++aarch64-sve-builtins-sme.o: \ ++ $(srcdir)/config/aarch64/aarch64-sve-builtins-sme.cc \ ++ $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) $(TREE_H) $(RTL_H) \ ++ $(TM_P_H) memmodel.h insn-codes.h $(OPTABS_H) $(RECOG_H) \ ++ $(EXPR_H) $(BASIC_BLOCK_H) $(FUNCTION_H) fold-const.h $(GIMPLE_H) \ ++ gimple-iterator.h gimplify.h explow.h $(EMIT_RTL_H) \ ++ $(srcdir)/config/aarch64/aarch64-sve-builtins.h \ ++ $(srcdir)/config/aarch64/aarch64-sve-builtins-shapes.h \ ++ $(srcdir)/config/aarch64/aarch64-sve-builtins-sme.h \ ++ $(srcdir)/config/aarch64/aarch64-sve-builtins-functions.h ++ $(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \ ++ $(srcdir)/config/aarch64/aarch64-sve-builtins-sme.cc ++ + aarch64-builtin-iterators.h: $(srcdir)/config/aarch64/geniterators.sh \ + $(srcdir)/config/aarch64/iterators.md + $(SHELL) $(srcdir)/config/aarch64/geniterators.sh \ +diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi +index 2420b05d9..47fff9c90 100644 +--- a/gcc/doc/invoke.texi ++++ b/gcc/doc/invoke.texi +@@ -19480,6 +19480,10 @@ Enable the Flag Manipulation instructions Extension. + Enable the Pointer Authentication Extension. + @item sme + Enable the Scalable Matrix Extension. ++@item sme-i16i64 ++Enable the FEAT_SME_I16I64 extension to SME. ++@item sme-f64f64 ++Enable the FEAT_SME_F64F64 extension to SME. + + @end table + +diff --git a/gcc/testsuite/g++.target/aarch64/sme/aarch64-sme-acle-asm.exp b/gcc/testsuite/g++.target/aarch64/sme/aarch64-sme-acle-asm.exp +new file mode 100644 +index 000000000..a9ed3a195 +--- /dev/null ++++ b/gcc/testsuite/g++.target/aarch64/sme/aarch64-sme-acle-asm.exp +@@ -0,0 +1,82 @@ ++# Assembly-based regression-test driver for the SME ACLE. ++# Copyright (C) 2009-2023 Free Software Foundation, Inc. ++# ++# This file is part of GCC. ++# ++# GCC is free software; you can redistribute it and/or modify it ++# under the terms of the GNU General Public License as published by ++# the Free Software Foundation; either version 3, or (at your option) ++# any later version. ++# ++# GCC is distributed in the hope that it will be useful, but ++# WITHOUT ANY WARRANTY; without even the implied warranty of ++# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++# General Public License for more details. ++# ++# You should have received a copy of the GNU General Public License ++# along with GCC; see the file COPYING3. If not see ++# <http://www.gnu.org/licenses/>. */ ++ ++# GCC testsuite that uses the `dg.exp' driver. ++ ++# Exit immediately if this isn't an AArch64 target. ++if {!istarget aarch64*-*-* } { ++ return ++} ++ ++# Load support procs. ++load_lib g++-dg.exp ++ ++# Initialize `dg'. ++dg-init ++ ++# Force SME if we're not testing it already. ++if { check_effective_target_aarch64_sme } { ++ set sme_flags "" ++} else { ++ set sme_flags "-march=armv9-a+sme" ++} ++ ++# Turn off any codegen tweaks by default that may affect expected assembly. ++# Tests relying on those should turn them on explicitly. ++set sme_flags "$sme_flags -mtune=generic -moverride=tune=none" ++ ++global gcc_runtest_parallelize_limit_minor ++if { info exists gcc_runtest_parallelize_limit_minor } { ++ set old_limit_minor $gcc_runtest_parallelize_limit_minor ++ set gcc_runtest_parallelize_limit_minor 1 ++} ++ ++torture-init ++set-torture-options { ++ "-std=c++11 -O0 -g" ++ "-std=c++14 -O1 -g" ++ "-std=c++17 -Og -g" ++ "-std=c++23 -Os -g" ++ "-std=gnu++11 -O2 -fno-schedule-insns -fno-schedule-insns2 -DCHECK_ASM --save-temps" ++ "-std=gnu++23 -Ofast -g" ++} { ++ "-DTEST_FULL" ++ "-DTEST_OVERLOADS" ++} ++ ++# Main loop. ++set gcc_subdir string replace $subdir 0 2 gcc ++set files glob -nocomplain $srcdir/$gcc_subdir/acle-asm/*.c ++set save-dg-do-what-default ${dg-do-what-default} ++if { check_effective_target_aarch64_asm_sme-i16i64_ok } { ++ set dg-do-what-default assemble ++} else { ++ set dg-do-what-default compile ++} ++gcc-dg-runtest lsort $files "" "$sme_flags -fno-ipa-icf" ++set dg-do-what-default ${save-dg-do-what-default} ++ ++torture-finish ++ ++if { info exists gcc_runtest_parallelize_limit_minor } { ++ set gcc_runtest_parallelize_limit_minor $old_limit_minor ++} ++ ++# All done. ++dg-finish +diff --git a/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/func_redef_4.c b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/func_redef_4.c +index 9591e3d01..f2f922d4f 100644 +--- a/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/func_redef_4.c ++++ b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/func_redef_4.c +@@ -4,6 +4,7 @@ + to be diagnosed. Any attempt to call the function before including + arm_sve.h will lead to a link failure. (Same for taking its address, + etc.) */ +-extern __SVUint8_t svadd_u8_x (__SVBool_t, __SVUint8_t, __SVUint8_t); ++extern __SVUint8_t svadd_u8_x (__SVBool_t, __SVUint8_t, __SVUint8_t) ++ __arm_streaming_compatible; + + #pragma GCC aarch64 "arm_sve.h" +diff --git a/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/func_redef_5.c b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/func_redef_5.c +index f87201984..f24ef002c 100644 +--- a/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/func_redef_5.c ++++ b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/func_redef_5.c +@@ -2,6 +2,7 @@ + + __SVUint8_t + svadd_u8_x (__SVBool_t pg, __SVUint8_t x, __SVUint8_t y) ++ __arm_streaming_compatible + { + return x; + } +diff --git a/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/func_redef_7.c b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/func_redef_7.c +index 1f2e4bf66..6752ea11e 100644 +--- a/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/func_redef_7.c ++++ b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/func_redef_7.c +@@ -2,6 +2,7 @@ + + __SVUint8_t + svadd_x (__SVBool_t pg, __SVUint8_t x, __SVUint8_t y) ++ __arm_streaming_compatible + { + return x; + } +diff --git a/gcc/testsuite/gcc.target/aarch64/pragma_cpp_predefs_4.c b/gcc/testsuite/gcc.target/aarch64/pragma_cpp_predefs_4.c +index 0e6461fa4..23ebe5e4f 100644 +--- a/gcc/testsuite/gcc.target/aarch64/pragma_cpp_predefs_4.c ++++ b/gcc/testsuite/gcc.target/aarch64/pragma_cpp_predefs_4.c +@@ -45,3 +45,41 @@ + #ifdef __ARM_FEATURE_SVE2_SHA3 + #error Foo + #endif ++ ++#pragma GCC target "+sme" ++#ifndef __ARM_FEATURE_SME ++#error Foo ++#endif ++ ++#pragma GCC target "+sme+nofp" ++#ifdef __ARM_FEATURE_SME ++#error Foo ++#endif ++ ++#pragma GCC target "+sme+nosimd" ++#ifdef __ARM_FEATURE_SME ++#error Foo ++#endif ++ ++#pragma GCC target "+sme+nobf16" ++#ifdef __ARM_FEATURE_SME ++#error Foo ++#endif ++ ++#pragma GCC target "+nothing+sme" ++#ifdef __ARM_FEATURE_SME_I16I64 ++#error Foo ++#endif ++#ifdef __ARM_FEATURE_SME_F64F64 ++#error Foo ++#endif ++ ++#pragma GCC target "+sme-i16i64" ++#ifndef __ARM_FEATURE_SME_I16I64 ++#error Foo ++#endif ++ ++#pragma GCC target "+sme-f64f64" ++#ifndef __ARM_FEATURE_SME_F64F64 ++#error Foo ++#endif +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/aarch64-sme-acle-asm.exp b/gcc/testsuite/gcc.target/aarch64/sme/aarch64-sme-acle-asm.exp +new file mode 100644 +index 000000000..e2d002f26 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sme/aarch64-sme-acle-asm.exp +@@ -0,0 +1,81 @@ ++# Assembly-based regression-test driver for the SME ACLE. ++# Copyright (C) 2009-2023 Free Software Foundation, Inc. ++# ++# This file is part of GCC. ++# ++# GCC is free software; you can redistribute it and/or modify it ++# under the terms of the GNU General Public License as published by ++# the Free Software Foundation; either version 3, or (at your option) ++# any later version. ++# ++# GCC is distributed in the hope that it will be useful, but ++# WITHOUT ANY WARRANTY; without even the implied warranty of ++# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++# General Public License for more details. ++# ++# You should have received a copy of the GNU General Public License ++# along with GCC; see the file COPYING3. If not see ++# <http://www.gnu.org/licenses/>. */ ++ ++# GCC testsuite that uses the `dg.exp' driver. ++ ++# Exit immediately if this isn't an AArch64 target. ++if {!istarget aarch64*-*-* } { ++ return ++} ++ ++# Load support procs. ++load_lib gcc-dg.exp ++ ++# Initialize `dg'. ++dg-init ++ ++# Force SME if we're not testing it already. ++if { check_effective_target_aarch64_sme } { ++ set sme_flags "" ++} else { ++ set sme_flags "-march=armv9-a+sme" ++} ++ ++# Turn off any codegen tweaks by default that may affect expected assembly. ++# Tests relying on those should turn them on explicitly. ++set sme_flags "$sme_flags -mtune=generic -moverride=tune=none" ++ ++global gcc_runtest_parallelize_limit_minor ++if { info exists gcc_runtest_parallelize_limit_minor } { ++ set old_limit_minor $gcc_runtest_parallelize_limit_minor ++ set gcc_runtest_parallelize_limit_minor 1 ++} ++ ++torture-init ++set-torture-options { ++ "-std=c90 -O0 -g" ++ "-std=c99 -Og -g" ++ "-std=c11 -Os -g" ++ "-std=c23 -O2 -fno-schedule-insns -fno-schedule-insns2 -DCHECK_ASM --save-temps" ++ "-std=gnu90 -O3 -g" ++ "-std=gnu23 -Ofast -g" ++} { ++ "-DTEST_FULL" ++ "-DTEST_OVERLOADS" ++} ++ ++# Main loop. ++set files glob -nocomplain $srcdir/$subdir/acle-asm/*.c ++set save-dg-do-what-default ${dg-do-what-default} ++if { check_effective_target_aarch64_asm_sme-i16i64_ok } { ++ set dg-do-what-default assemble ++} else { ++ set dg-do-what-default compile ++} ++gcc-dg-runtest lsort $files "" "$sme_flags -fno-ipa-icf" ++set dg-do-what-default ${save-dg-do-what-default} ++ ++torture-finish ++ ++if { info exists gcc_runtest_parallelize_limit_minor } { ++ set gcc_runtest_parallelize_limit_minor $old_limit_minor ++} ++ ++# All done. ++dg-finish +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/addha_za32.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/addha_za32.c +new file mode 100644 +index 000000000..8dee40145 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/addha_za32.c +@@ -0,0 +1,48 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sme_acle.h" ++ ++/* ++** addha_za32_s32_0_p0_p1_z0: ++** addha za0\.s, p0/m, p1/m, z0\.s ++** ret ++*/ ++TEST_UNIFORM_ZA (addha_za32_s32_0_p0_p1_z0, svint32_t, ++ svaddha_za32_s32_m (0, p0, p1, z0), ++ svaddha_za32_m (0, p0, p1, z0)) ++ ++/* ++** addha_za32_s32_0_p1_p0_z1: ++** addha za0\.s, p1/m, p0/m, z1\.s ++** ret ++*/ ++TEST_UNIFORM_ZA (addha_za32_s32_0_p1_p0_z1, svint32_t, ++ svaddha_za32_s32_m (0, p1, p0, z1), ++ svaddha_za32_m (0, p1, p0, z1)) ++ ++/* ++** addha_za32_s32_1_p0_p1_z0: ++** addha za1\.s, p0/m, p1/m, z0\.s ++** ret ++*/ ++TEST_UNIFORM_ZA (addha_za32_s32_1_p0_p1_z0, svint32_t, ++ svaddha_za32_s32_m (1, p0, p1, z0), ++ svaddha_za32_m (1, p0, p1, z0)) ++ ++/* ++** addha_za32_s32_3_p0_p1_z0: ++** addha za3\.s, p0/m, p1/m, z0\.s ++** ret ++*/ ++TEST_UNIFORM_ZA (addha_za32_s32_3_p0_p1_z0, svint32_t, ++ svaddha_za32_s32_m (3, p0, p1, z0), ++ svaddha_za32_m (3, p0, p1, z0)) ++ ++/* ++** addha_za32_u32_0_p0_p1_z0: ++** addha za0\.s, p0/m, p1/m, z0\.s ++** ret ++*/ ++TEST_UNIFORM_ZA (addha_za32_u32_0_p0_p1_z0, svuint32_t, ++ svaddha_za32_u32_m (0, p0, p1, z0), ++ svaddha_za32_m (0, p0, p1, z0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/addha_za64.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/addha_za64.c +new file mode 100644 +index 000000000..363ff1aab +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/addha_za64.c +@@ -0,0 +1,50 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sme_acle.h" ++ ++#pragma GCC target "+sme-i16i64" ++ ++/* ++** addha_za64_s64_0_p0_p1_z0: ++** addha za0\.d, p0/m, p1/m, z0\.d ++** ret ++*/ ++TEST_UNIFORM_ZA (addha_za64_s64_0_p0_p1_z0, svint64_t, ++ svaddha_za64_s64_m (0, p0, p1, z0), ++ svaddha_za64_m (0, p0, p1, z0)) ++ ++/* ++** addha_za64_s64_0_p1_p0_z1: ++** addha za0\.d, p1/m, p0/m, z1\.d ++** ret ++*/ ++TEST_UNIFORM_ZA (addha_za64_s64_0_p1_p0_z1, svint64_t, ++ svaddha_za64_s64_m (0, p1, p0, z1), ++ svaddha_za64_m (0, p1, p0, z1)) ++ ++/* ++** addha_za64_s64_1_p0_p1_z0: ++** addha za1\.d, p0/m, p1/m, z0\.d ++** ret ++*/ ++TEST_UNIFORM_ZA (addha_za64_s64_1_p0_p1_z0, svint64_t, ++ svaddha_za64_s64_m (1, p0, p1, z0), ++ svaddha_za64_m (1, p0, p1, z0)) ++ ++/* ++** addha_za64_s64_7_p0_p1_z0: ++** addha za7\.d, p0/m, p1/m, z0\.d ++** ret ++*/ ++TEST_UNIFORM_ZA (addha_za64_s64_7_p0_p1_z0, svint64_t, ++ svaddha_za64_s64_m (7, p0, p1, z0), ++ svaddha_za64_m (7, p0, p1, z0)) ++ ++/* ++** addha_za64_u64_0_p0_p1_z0: ++** addha za0\.d, p0/m, p1/m, z0\.d ++** ret ++*/ ++TEST_UNIFORM_ZA (addha_za64_u64_0_p0_p1_z0, svuint64_t, ++ svaddha_za64_u64_m (0, p0, p1, z0), ++ svaddha_za64_m (0, p0, p1, z0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/addva_za32.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/addva_za32.c +new file mode 100644 +index 000000000..0de019ac8 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/addva_za32.c +@@ -0,0 +1,48 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sme_acle.h" ++ ++/* ++** addva_za32_s32_0_p0_p1_z0: ++** addva za0\.s, p0/m, p1/m, z0\.s ++** ret ++*/ ++TEST_UNIFORM_ZA (addva_za32_s32_0_p0_p1_z0, svint32_t, ++ svaddva_za32_s32_m (0, p0, p1, z0), ++ svaddva_za32_m (0, p0, p1, z0)) ++ ++/* ++** addva_za32_s32_0_p1_p0_z1: ++** addva za0\.s, p1/m, p0/m, z1\.s ++** ret ++*/ ++TEST_UNIFORM_ZA (addva_za32_s32_0_p1_p0_z1, svint32_t, ++ svaddva_za32_s32_m (0, p1, p0, z1), ++ svaddva_za32_m (0, p1, p0, z1)) ++ ++/* ++** addva_za32_s32_1_p0_p1_z0: ++** addva za1\.s, p0/m, p1/m, z0\.s ++** ret ++*/ ++TEST_UNIFORM_ZA (addva_za32_s32_1_p0_p1_z0, svint32_t, ++ svaddva_za32_s32_m (1, p0, p1, z0), ++ svaddva_za32_m (1, p0, p1, z0)) ++ ++/* ++** addva_za32_s32_3_p0_p1_z0: ++** addva za3\.s, p0/m, p1/m, z0\.s ++** ret ++*/ ++TEST_UNIFORM_ZA (addva_za32_s32_3_p0_p1_z0, svint32_t, ++ svaddva_za32_s32_m (3, p0, p1, z0), ++ svaddva_za32_m (3, p0, p1, z0)) ++ ++/* ++** addva_za32_u32_0_p0_p1_z0: ++** addva za0\.s, p0/m, p1/m, z0\.s ++** ret ++*/ ++TEST_UNIFORM_ZA (addva_za32_u32_0_p0_p1_z0, svuint32_t, ++ svaddva_za32_u32_m (0, p0, p1, z0), ++ svaddva_za32_m (0, p0, p1, z0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/addva_za64.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/addva_za64.c +new file mode 100644 +index 000000000..d83d4e03c +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/addva_za64.c +@@ -0,0 +1,50 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sme_acle.h" ++ ++#pragma GCC target "+sme-i16i64" ++ ++/* ++** addva_za64_s64_0_p0_p1_z0: ++** addva za0\.d, p0/m, p1/m, z0\.d ++** ret ++*/ ++TEST_UNIFORM_ZA (addva_za64_s64_0_p0_p1_z0, svint64_t, ++ svaddva_za64_s64_m (0, p0, p1, z0), ++ svaddva_za64_m (0, p0, p1, z0)) ++ ++/* ++** addva_za64_s64_0_p1_p0_z1: ++** addva za0\.d, p1/m, p0/m, z1\.d ++** ret ++*/ ++TEST_UNIFORM_ZA (addva_za64_s64_0_p1_p0_z1, svint64_t, ++ svaddva_za64_s64_m (0, p1, p0, z1), ++ svaddva_za64_m (0, p1, p0, z1)) ++ ++/* ++** addva_za64_s64_1_p0_p1_z0: ++** addva za1\.d, p0/m, p1/m, z0\.d ++** ret ++*/ ++TEST_UNIFORM_ZA (addva_za64_s64_1_p0_p1_z0, svint64_t, ++ svaddva_za64_s64_m (1, p0, p1, z0), ++ svaddva_za64_m (1, p0, p1, z0)) ++ ++/* ++** addva_za64_s64_7_p0_p1_z0: ++** addva za7\.d, p0/m, p1/m, z0\.d ++** ret ++*/ ++TEST_UNIFORM_ZA (addva_za64_s64_7_p0_p1_z0, svint64_t, ++ svaddva_za64_s64_m (7, p0, p1, z0), ++ svaddva_za64_m (7, p0, p1, z0)) ++ ++/* ++** addva_za64_u64_0_p0_p1_z0: ++** addva za0\.d, p0/m, p1/m, z0\.d ++** ret ++*/ ++TEST_UNIFORM_ZA (addva_za64_u64_0_p0_p1_z0, svuint64_t, ++ svaddva_za64_u64_m (0, p0, p1, z0), ++ svaddva_za64_m (0, p0, p1, z0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/arm_has_sme_sc.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/arm_has_sme_sc.c +new file mode 100644 +index 000000000..e37793f9e +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/arm_has_sme_sc.c +@@ -0,0 +1,25 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#define STREAMING_COMPATIBLE ++#define NO_SHARED_ZA ++#include "test_sme_acle.h" ++ ++#pragma GCC target "+nosme" ++ ++/* ++** test_nosme: ++** ... ++** bl __arm_sme_state ++** lsr x0, x0, #?63 ++** ... ++*/ ++PROTO (test_nosme, int, ()) { return __arm_has_sme (); } ++ ++#pragma GCC target "+sme" ++ ++/* ++** test_sme: ++** mov w0, #?1 ++** ret ++*/ ++PROTO (test_sme, int, ()) { return __arm_has_sme (); } +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/arm_in_streaming_mode_ns.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/arm_in_streaming_mode_ns.c +new file mode 100644 +index 000000000..ba475d67b +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/arm_in_streaming_mode_ns.c +@@ -0,0 +1,11 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#define NON_STREAMING ++#include "test_sme_acle.h" ++ ++/* ++** test_sme: ++** mov w0, #?0 ++** ret ++*/ ++PROTO (test_sme, int, ()) { return __arm_in_streaming_mode (); } +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/arm_in_streaming_mode_s.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/arm_in_streaming_mode_s.c +new file mode 100644 +index 000000000..b88d47921 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/arm_in_streaming_mode_s.c +@@ -0,0 +1,11 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#define NO_SHARED_ZA ++#include "test_sme_acle.h" ++ ++/* ++** test_sme: ++** mov w0, #?1 ++** ret ++*/ ++PROTO (test_sme, int, ()) { return __arm_in_streaming_mode (); } +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/arm_in_streaming_mode_sc.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/arm_in_streaming_mode_sc.c +new file mode 100644 +index 000000000..fb3588a64 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/arm_in_streaming_mode_sc.c +@@ -0,0 +1,26 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#define STREAMING_COMPATIBLE ++#define NO_SHARED_ZA ++#include "test_sme_acle.h" ++ ++#pragma GCC target "+nosme" ++ ++/* ++** test_nosme: ++** ... ++** bl __arm_sme_state ++** and w0, w0, #?1 ++** ... ++*/ ++PROTO (test_nosme, int, ()) { return __arm_in_streaming_mode (); } ++ ++#pragma GCC target "+sme" ++ ++/* ++** test_sme: ++** mrs x(0-9+), svcr ++** and w0, w\1, #?1 ++** ret ++*/ ++PROTO (test_sme, int, ()) { return __arm_in_streaming_mode (); } +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/cntsb_s.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/cntsb_s.c +new file mode 100644 +index 000000000..0a8de45be +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/cntsb_s.c +@@ -0,0 +1,310 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#define NO_SHARED_ZA ++#include "test_sme_acle.h" ++ ++/* ++** cntb_1: ++** cntb x0 ++** ret ++*/ ++PROTO (cntb_1, uint64_t, ()) { return svcntsb (); } ++ ++/* ++** cntb_2: ++** cntb x0, all, mul #2 ++** ret ++*/ ++PROTO (cntb_2, uint64_t, ()) { return svcntsb () * 2; } ++ ++/* ++** cntb_3: ++** cntb x0, all, mul #3 ++** ret ++*/ ++PROTO (cntb_3, uint64_t, ()) { return svcntsb () * 3; } ++ ++/* ++** cntb_4: ++** cntb x0, all, mul #4 ++** ret ++*/ ++PROTO (cntb_4, uint64_t, ()) { return svcntsb () * 4; } ++ ++/* ++** cntb_8: ++** cntb x0, all, mul #8 ++** ret ++*/ ++PROTO (cntb_8, uint64_t, ()) { return svcntsb () * 8; } ++ ++/* ++** cntb_15: ++** cntb x0, all, mul #15 ++** ret ++*/ ++PROTO (cntb_15, uint64_t, ()) { return svcntsb () * 15; } ++ ++/* ++** cntb_16: ++** cntb x0, all, mul #16 ++** ret ++*/ ++PROTO (cntb_16, uint64_t, ()) { return svcntsb () * 16; } ++ ++/* ++** cntb_17: ++** rdvl x0, #17 ++** ret ++*/ ++PROTO (cntb_17, uint64_t, ()) { return svcntsb () * 17; } ++ ++/* ++** cntb_31: ++** rdvl x0, #31 ++** ret ++*/ ++PROTO (cntb_31, uint64_t, ()) { return svcntsb () * 31; } ++ ++/* ++** cntb_32: ++** cntb (x0-9+) ++** lsl x0, \1, 5 ++** ret ++*/ ++PROTO (cntb_32, uint64_t, ()) { return svcntsb () * 32; } ++ ++/* Other sequences would be OK. */ ++/* ++** cntb_33: ++** cntb (x0-9+) ++** lsl x0, \1, 5 ++** incb x0 ++** ret ++*/ ++PROTO (cntb_33, uint64_t, ()) { return svcntsb () * 33; } ++ ++/* ++** cntb_64: ++** cntb (x0-9+) ++** lsl x0, \1, 6 ++** ret ++*/ ++PROTO (cntb_64, uint64_t, ()) { return svcntsb () * 64; } ++ ++/* ++** cntb_128: ++** cntb (x0-9+) ++** lsl x0, \1, 7 ++** ret ++*/ ++PROTO (cntb_128, uint64_t, ()) { return svcntsb () * 128; } ++ ++/* Other sequences would be OK. */ ++/* ++** cntb_129: ++** cntb (x0-9+) ++** lsl x0, \1, 7 ++** incb x0 ++** ret ++*/ ++PROTO (cntb_129, uint64_t, ()) { return svcntsb () * 129; } ++ ++/* ++** cntb_m1: ++** rdvl x0, #-1 ++** ret ++*/ ++PROTO (cntb_m1, uint64_t, ()) { return -svcntsb (); } ++ ++/* ++** cntb_m13: ++** rdvl x0, #-13 ++** ret ++*/ ++PROTO (cntb_m13, uint64_t, ()) { return -svcntsb () * 13; } ++ ++/* ++** cntb_m15: ++** rdvl x0, #-15 ++** ret ++*/ ++PROTO (cntb_m15, uint64_t, ()) { return -svcntsb () * 15; } ++ ++/* ++** cntb_m16: ++** rdvl x0, #-16 ++** ret ++*/ ++PROTO (cntb_m16, uint64_t, ()) { return -svcntsb () * 16; } ++ ++/* ++** cntb_m17: ++** rdvl x0, #-17 ++** ret ++*/ ++PROTO (cntb_m17, uint64_t, ()) { return -svcntsb () * 17; } ++ ++/* ++** cntb_m32: ++** rdvl x0, #-32 ++** ret ++*/ ++PROTO (cntb_m32, uint64_t, ()) { return -svcntsb () * 32; } ++ ++/* ++** cntb_m33: ++** rdvl x0, #-32 ++** decb x0 ++** ret ++*/ ++PROTO (cntb_m33, uint64_t, ()) { return -svcntsb () * 33; } ++ ++/* ++** cntb_m34: ++** rdvl (x0-9+), #-17 ++** lsl x0, \1, #?1 ++** ret ++*/ ++PROTO (cntb_m34, uint64_t, ()) { return -svcntsb () * 34; } ++ ++/* ++** cntb_m64: ++** rdvl (x0-9+), #-1 ++** lsl x0, \1, #?6 ++** ret ++*/ ++PROTO (cntb_m64, uint64_t, ()) { return -svcntsb () * 64; } ++ ++/* ++** incb_1: ++** incb x0 ++** ret ++*/ ++PROTO (incb_1, uint64_t, (uint64_t x0)) { return x0 + svcntsb (); } ++ ++/* ++** incb_2: ++** incb x0, all, mul #2 ++** ret ++*/ ++PROTO (incb_2, uint64_t, (uint64_t x0)) { return x0 + svcntsb () * 2; } ++ ++/* ++** incb_3: ++** incb x0, all, mul #3 ++** ret ++*/ ++PROTO (incb_3, uint64_t, (uint64_t x0)) { return x0 + svcntsb () * 3; } ++ ++/* ++** incb_4: ++** incb x0, all, mul #4 ++** ret ++*/ ++PROTO (incb_4, uint64_t, (uint64_t x0)) { return x0 + svcntsb () * 4; } ++ ++/* ++** incb_8: ++** incb x0, all, mul #8 ++** ret ++*/ ++PROTO (incb_8, uint64_t, (uint64_t x0)) { return x0 + svcntsb () * 8; } ++ ++/* ++** incb_15: ++** incb x0, all, mul #15 ++** ret ++*/ ++PROTO (incb_15, uint64_t, (uint64_t x0)) { return x0 + svcntsb () * 15; } ++ ++/* ++** incb_16: ++** incb x0, all, mul #16 ++** ret ++*/ ++PROTO (incb_16, uint64_t, (uint64_t x0)) { return x0 + svcntsb () * 16; } ++ ++/* ++** incb_17: ++** addvl x0, x0, #17 ++** ret ++*/ ++PROTO (incb_17, uint64_t, (uint64_t x0)) { return x0 + svcntsb () * 17; } ++ ++/* ++** incb_31: ++** addvl x0, x0, #31 ++** ret ++*/ ++PROTO (incb_31, uint64_t, (uint64_t x0)) { return x0 + svcntsb () * 31; } ++ ++/* ++** decb_1: ++** decb x0 ++** ret ++*/ ++PROTO (decb_1, uint64_t, (uint64_t x0)) { return x0 - svcntsb (); } ++ ++/* ++** decb_2: ++** decb x0, all, mul #2 ++** ret ++*/ ++PROTO (decb_2, uint64_t, (uint64_t x0)) { return x0 - svcntsb () * 2; } ++ ++/* ++** decb_3: ++** decb x0, all, mul #3 ++** ret ++*/ ++PROTO (decb_3, uint64_t, (uint64_t x0)) { return x0 - svcntsb () * 3; } ++ ++/* ++** decb_4: ++** decb x0, all, mul #4 ++** ret ++*/ ++PROTO (decb_4, uint64_t, (uint64_t x0)) { return x0 - svcntsb () * 4; } ++ ++/* ++** decb_8: ++** decb x0, all, mul #8 ++** ret ++*/ ++PROTO (decb_8, uint64_t, (uint64_t x0)) { return x0 - svcntsb () * 8; } ++ ++/* ++** decb_15: ++** decb x0, all, mul #15 ++** ret ++*/ ++PROTO (decb_15, uint64_t, (uint64_t x0)) { return x0 - svcntsb () * 15; } ++ ++/* ++** decb_16: ++** decb x0, all, mul #16 ++** ret ++*/ ++PROTO (decb_16, uint64_t, (uint64_t x0)) { return x0 - svcntsb () * 16; } ++ ++/* ++** decb_17: ++** addvl x0, x0, #-17 ++** ret ++*/ ++PROTO (decb_17, uint64_t, (uint64_t x0)) { return x0 - svcntsb () * 17; } ++ ++/* ++** decb_31: ++** addvl x0, x0, #-31 ++** ret ++*/ ++PROTO (decb_31, uint64_t, (uint64_t x0)) { return x0 - svcntsb () * 31; } ++ ++/* ++** decb_32: ++** addvl x0, x0, #-32 ++** ret ++*/ ++PROTO (decb_32, uint64_t, (uint64_t x0)) { return x0 - svcntsb () * 32; } +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/cntsb_sc.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/cntsb_sc.c +new file mode 100644 +index 000000000..9ee4c8afc +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/cntsb_sc.c +@@ -0,0 +1,12 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#define STREAMING_COMPATIBLE ++#define NO_SHARED_ZA ++#include "test_sme_acle.h" ++ ++/* ++** cntsb: ++** rdsvl x0, #1 ++** ret ++*/ ++PROTO (cntsb, uint64_t, ()) { return svcntsb (); } +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/cntsd_s.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/cntsd_s.c +new file mode 100644 +index 000000000..3bf9498e9 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/cntsd_s.c +@@ -0,0 +1,277 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#define NO_SHARED_ZA ++#include "test_sme_acle.h" ++ ++/* ++** cntd_1: ++** cntd x0 ++** ret ++*/ ++PROTO (cntd_1, uint64_t, ()) { return svcntsd (); } ++ ++/* ++** cntd_2: ++** cntw x0 ++** ret ++*/ ++PROTO (cntd_2, uint64_t, ()) { return svcntsd () * 2; } ++ ++/* ++** cntd_3: ++** cntd x0, all, mul #3 ++** ret ++*/ ++PROTO (cntd_3, uint64_t, ()) { return svcntsd () * 3; } ++ ++/* ++** cntd_4: ++** cnth x0 ++** ret ++*/ ++PROTO (cntd_4, uint64_t, ()) { return svcntsd () * 4; } ++ ++/* ++** cntd_8: ++** cntb x0 ++** ret ++*/ ++PROTO (cntd_8, uint64_t, ()) { return svcntsd () * 8; } ++ ++/* ++** cntd_15: ++** cntd x0, all, mul #15 ++** ret ++*/ ++PROTO (cntd_15, uint64_t, ()) { return svcntsd () * 15; } ++ ++/* ++** cntd_16: ++** cntb x0, all, mul #2 ++** ret ++*/ ++PROTO (cntd_16, uint64_t, ()) { return svcntsd () * 16; } ++ ++/* Other sequences would be OK. */ ++/* ++** cntd_17: ++** rdvl (x0-9+), #17 ++** asr x0, \1, 3 ++** ret ++*/ ++PROTO (cntd_17, uint64_t, ()) { return svcntsd () * 17; } ++ ++/* ++** cntd_32: ++** cntb x0, all, mul #4 ++** ret ++*/ ++PROTO (cntd_32, uint64_t, ()) { return svcntsd () * 32; } ++ ++/* ++** cntd_64: ++** cntb x0, all, mul #8 ++** ret ++*/ ++PROTO (cntd_64, uint64_t, ()) { return svcntsd () * 64; } ++ ++/* ++** cntd_128: ++** cntb x0, all, mul #16 ++** ret ++*/ ++PROTO (cntd_128, uint64_t, ()) { return svcntsd () * 128; } ++ ++/* ++** cntd_m1: ++** cntd (x0-9+) ++** neg x0, \1 ++** ret ++*/ ++PROTO (cntd_m1, uint64_t, ()) { return -svcntsd (); } ++ ++/* ++** cntd_m13: ++** cntd (x0-9+), all, mul #13 ++** neg x0, \1 ++** ret ++*/ ++PROTO (cntd_m13, uint64_t, ()) { return -svcntsd () * 13; } ++ ++/* ++** cntd_m15: ++** cntd (x0-9+), all, mul #15 ++** neg x0, \1 ++** ret ++*/ ++PROTO (cntd_m15, uint64_t, ()) { return -svcntsd () * 15; } ++ ++/* ++** cntd_m16: ++** rdvl x0, #-2 ++** ret ++*/ ++PROTO (cntd_m16, uint64_t, ()) { return -svcntsd () * 16; } ++ ++/* Other sequences would be OK. */ ++/* ++** cntd_m17: ++** rdvl (x0-9+), #-17 ++** asr x0, \1, 3 ++** ret ++*/ ++PROTO (cntd_m17, uint64_t, ()) { return -svcntsd () * 17; } ++ ++/* ++** incd_1: ++** incd x0 ++** ret ++*/ ++PROTO (incd_1, uint64_t, (uint64_t x0)) { return x0 + svcntsd (); } ++ ++/* ++** incd_2: ++** incw x0 ++** ret ++*/ ++PROTO (incd_2, uint64_t, (uint64_t x0)) { return x0 + svcntsd () * 2; } ++ ++/* ++** incd_3: ++** incd x0, all, mul #3 ++** ret ++*/ ++PROTO (incd_3, uint64_t, (uint64_t x0)) { return x0 + svcntsd () * 3; } ++ ++/* ++** incd_4: ++** inch x0 ++** ret ++*/ ++PROTO (incd_4, uint64_t, (uint64_t x0)) { return x0 + svcntsd () * 4; } ++ ++/* ++** incd_7: ++** incd x0, all, mul #7 ++** ret ++*/ ++PROTO (incd_7, uint64_t, (uint64_t x0)) { return x0 + svcntsd () * 7; } ++ ++/* ++** incd_8: ++** incb x0 ++** ret ++*/ ++PROTO (incd_8, uint64_t, (uint64_t x0)) { return x0 + svcntsd () * 8; } ++ ++/* ++** incd_9: ++** incd x0, all, mul #9 ++** ret ++*/ ++PROTO (incd_9, uint64_t, (uint64_t x0)) { return x0 + svcntsd () * 9; } ++ ++/* ++** incd_15: ++** incd x0, all, mul #15 ++** ret ++*/ ++PROTO (incd_15, uint64_t, (uint64_t x0)) { return x0 + svcntsd () * 15; } ++ ++/* ++** incd_16: ++** incb x0, all, mul #2 ++** ret ++*/ ++PROTO (incd_16, uint64_t, (uint64_t x0)) { return x0 + svcntsd () * 16; } ++ ++/* ++** incd_18: ++** incw x0, all, mul #9 ++** ret ++*/ ++PROTO (incd_18, uint64_t, (uint64_t x0)) { return x0 + svcntsd () * 18; } ++ ++/* ++** incd_30: ++** incw x0, all, mul #15 ++** ret ++*/ ++PROTO (incd_30, uint64_t, (uint64_t x0)) { return x0 + svcntsd () * 30; } ++ ++/* ++** decd_1: ++** decd x0 ++** ret ++*/ ++PROTO (decd_1, uint64_t, (uint64_t x0)) { return x0 - svcntsd (); } ++ ++/* ++** decd_2: ++** decw x0 ++** ret ++*/ ++PROTO (decd_2, uint64_t, (uint64_t x0)) { return x0 - svcntsd () * 2; } ++ ++/* ++** decd_3: ++** decd x0, all, mul #3 ++** ret ++*/ ++PROTO (decd_3, uint64_t, (uint64_t x0)) { return x0 - svcntsd () * 3; } ++ ++/* ++** decd_4: ++** dech x0 ++** ret ++*/ ++PROTO (decd_4, uint64_t, (uint64_t x0)) { return x0 - svcntsd () * 4; } ++ ++/* ++** decd_7: ++** decd x0, all, mul #7 ++** ret ++*/ ++PROTO (decd_7, uint64_t, (uint64_t x0)) { return x0 - svcntsd () * 7; } ++ ++/* ++** decd_8: ++** decb x0 ++** ret ++*/ ++PROTO (decd_8, uint64_t, (uint64_t x0)) { return x0 - svcntsd () * 8; } ++ ++/* ++** decd_9: ++** decd x0, all, mul #9 ++** ret ++*/ ++PROTO (decd_9, uint64_t, (uint64_t x0)) { return x0 - svcntsd () * 9; } ++ ++/* ++** decd_15: ++** decd x0, all, mul #15 ++** ret ++*/ ++PROTO (decd_15, uint64_t, (uint64_t x0)) { return x0 - svcntsd () * 15; } ++ ++/* ++** decd_16: ++** decb x0, all, mul #2 ++** ret ++*/ ++PROTO (decd_16, uint64_t, (uint64_t x0)) { return x0 - svcntsd () * 16; } ++ ++/* ++** decd_18: ++** decw x0, all, mul #9 ++** ret ++*/ ++PROTO (decd_18, uint64_t, (uint64_t x0)) { return x0 - svcntsd () * 18; } ++ ++/* ++** decd_30: ++** decw x0, all, mul #15 ++** ret ++*/ ++PROTO (decd_30, uint64_t, (uint64_t x0)) { return x0 - svcntsd () * 30; } +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/cntsd_sc.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/cntsd_sc.c +new file mode 100644 +index 000000000..90fb374ba +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/cntsd_sc.c +@@ -0,0 +1,13 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#define STREAMING_COMPATIBLE ++#define NO_SHARED_ZA ++#include "test_sme_acle.h" ++ ++/* ++** cntsd: ++** rdsvl (x0-9)+, #1 ++** lsr x0, \1, #?3 ++** ret ++*/ ++PROTO (cntsd, uint64_t, ()) { return svcntsd (); } +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/cntsh_s.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/cntsh_s.c +new file mode 100644 +index 000000000..021c39a14 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/cntsh_s.c +@@ -0,0 +1,279 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#define NO_SHARED_ZA ++#include "test_sme_acle.h" ++ ++/* ++** cnth_1: ++** cnth x0 ++** ret ++*/ ++PROTO (cnth_1, uint64_t, ()) { return svcntsh (); } ++ ++/* ++** cnth_2: ++** cntb x0 ++** ret ++*/ ++PROTO (cnth_2, uint64_t, ()) { return svcntsh () * 2; } ++ ++/* ++** cnth_3: ++** cnth x0, all, mul #3 ++** ret ++*/ ++PROTO (cnth_3, uint64_t, ()) { return svcntsh () * 3; } ++ ++/* ++** cnth_4: ++** cntb x0, all, mul #2 ++** ret ++*/ ++PROTO (cnth_4, uint64_t, ()) { return svcntsh () * 4; } ++ ++/* ++** cnth_8: ++** cntb x0, all, mul #4 ++** ret ++*/ ++PROTO (cnth_8, uint64_t, ()) { return svcntsh () * 8; } ++ ++/* ++** cnth_15: ++** cnth x0, all, mul #15 ++** ret ++*/ ++PROTO (cnth_15, uint64_t, ()) { return svcntsh () * 15; } ++ ++/* ++** cnth_16: ++** cntb x0, all, mul #8 ++** ret ++*/ ++PROTO (cnth_16, uint64_t, ()) { return svcntsh () * 16; } ++ ++/* Other sequences would be OK. */ ++/* ++** cnth_17: ++** rdvl (x0-9+), #17 ++** asr x0, \1, 1 ++** ret ++*/ ++PROTO (cnth_17, uint64_t, ()) { return svcntsh () * 17; } ++ ++/* ++** cnth_32: ++** cntb x0, all, mul #16 ++** ret ++*/ ++PROTO (cnth_32, uint64_t, ()) { return svcntsh () * 32; } ++ ++/* ++** cnth_64: ++** cntb (x0-9+) ++** lsl x0, \1, 5 ++** ret ++*/ ++PROTO (cnth_64, uint64_t, ()) { return svcntsh () * 64; } ++ ++/* ++** cnth_128: ++** cntb (x0-9+) ++** lsl x0, \1, 6 ++** ret ++*/ ++PROTO (cnth_128, uint64_t, ()) { return svcntsh () * 128; } ++ ++/* ++** cnth_m1: ++** cnth (x0-9+) ++** neg x0, \1 ++** ret ++*/ ++PROTO (cnth_m1, uint64_t, ()) { return -svcntsh (); } ++ ++/* ++** cnth_m13: ++** cnth (x0-9+), all, mul #13 ++** neg x0, \1 ++** ret ++*/ ++PROTO (cnth_m13, uint64_t, ()) { return -svcntsh () * 13; } ++ ++/* ++** cnth_m15: ++** cnth (x0-9+), all, mul #15 ++** neg x0, \1 ++** ret ++*/ ++PROTO (cnth_m15, uint64_t, ()) { return -svcntsh () * 15; } ++ ++/* ++** cnth_m16: ++** rdvl x0, #-8 ++** ret ++*/ ++PROTO (cnth_m16, uint64_t, ()) { return -svcntsh () * 16; } ++ ++/* Other sequences would be OK. */ ++/* ++** cnth_m17: ++** rdvl (x0-9+), #-17 ++** asr x0, \1, 1 ++** ret ++*/ ++PROTO (cnth_m17, uint64_t, ()) { return -svcntsh () * 17; } ++ ++/* ++** inch_1: ++** inch x0 ++** ret ++*/ ++PROTO (inch_1, uint64_t, (uint64_t x0)) { return x0 + svcntsh (); } ++ ++/* ++** inch_2: ++** incb x0 ++** ret ++*/ ++PROTO (inch_2, uint64_t, (uint64_t x0)) { return x0 + svcntsh () * 2; } ++ ++/* ++** inch_3: ++** inch x0, all, mul #3 ++** ret ++*/ ++PROTO (inch_3, uint64_t, (uint64_t x0)) { return x0 + svcntsh () * 3; } ++ ++/* ++** inch_4: ++** incb x0, all, mul #2 ++** ret ++*/ ++PROTO (inch_4, uint64_t, (uint64_t x0)) { return x0 + svcntsh () * 4; } ++ ++/* ++** inch_7: ++** inch x0, all, mul #7 ++** ret ++*/ ++PROTO (inch_7, uint64_t, (uint64_t x0)) { return x0 + svcntsh () * 7; } ++ ++/* ++** inch_8: ++** incb x0, all, mul #4 ++** ret ++*/ ++PROTO (inch_8, uint64_t, (uint64_t x0)) { return x0 + svcntsh () * 8; } ++ ++/* ++** inch_9: ++** inch x0, all, mul #9 ++** ret ++*/ ++PROTO (inch_9, uint64_t, (uint64_t x0)) { return x0 + svcntsh () * 9; } ++ ++/* ++** inch_15: ++** inch x0, all, mul #15 ++** ret ++*/ ++PROTO (inch_15, uint64_t, (uint64_t x0)) { return x0 + svcntsh () * 15; } ++ ++/* ++** inch_16: ++** incb x0, all, mul #8 ++** ret ++*/ ++PROTO (inch_16, uint64_t, (uint64_t x0)) { return x0 + svcntsh () * 16; } ++ ++/* ++** inch_18: ++** incb x0, all, mul #9 ++** ret ++*/ ++PROTO (inch_18, uint64_t, (uint64_t x0)) { return x0 + svcntsh () * 18; } ++ ++/* ++** inch_30: ++** incb x0, all, mul #15 ++** ret ++*/ ++PROTO (inch_30, uint64_t, (uint64_t x0)) { return x0 + svcntsh () * 30; } ++ ++/* ++** dech_1: ++** dech x0 ++** ret ++*/ ++PROTO (dech_1, uint64_t, (uint64_t x0)) { return x0 - svcntsh (); } ++ ++/* ++** dech_2: ++** decb x0 ++** ret ++*/ ++PROTO (dech_2, uint64_t, (uint64_t x0)) { return x0 - svcntsh () * 2; } ++ ++/* ++** dech_3: ++** dech x0, all, mul #3 ++** ret ++*/ ++PROTO (dech_3, uint64_t, (uint64_t x0)) { return x0 - svcntsh () * 3; } ++ ++/* ++** dech_4: ++** decb x0, all, mul #2 ++** ret ++*/ ++PROTO (dech_4, uint64_t, (uint64_t x0)) { return x0 - svcntsh () * 4; } ++ ++/* ++** dech_7: ++** dech x0, all, mul #7 ++** ret ++*/ ++PROTO (dech_7, uint64_t, (uint64_t x0)) { return x0 - svcntsh () * 7; } ++ ++/* ++** dech_8: ++** decb x0, all, mul #4 ++** ret ++*/ ++PROTO (dech_8, uint64_t, (uint64_t x0)) { return x0 - svcntsh () * 8; } ++ ++/* ++** dech_9: ++** dech x0, all, mul #9 ++** ret ++*/ ++PROTO (dech_9, uint64_t, (uint64_t x0)) { return x0 - svcntsh () * 9; } ++ ++/* ++** dech_15: ++** dech x0, all, mul #15 ++** ret ++*/ ++PROTO (dech_15, uint64_t, (uint64_t x0)) { return x0 - svcntsh () * 15; } ++ ++/* ++** dech_16: ++** decb x0, all, mul #8 ++** ret ++*/ ++PROTO (dech_16, uint64_t, (uint64_t x0)) { return x0 - svcntsh () * 16; } ++ ++/* ++** dech_18: ++** decb x0, all, mul #9 ++** ret ++*/ ++PROTO (dech_18, uint64_t, (uint64_t x0)) { return x0 - svcntsh () * 18; } ++ ++/* ++** dech_30: ++** decb x0, all, mul #15 ++** ret ++*/ ++PROTO (dech_30, uint64_t, (uint64_t x0)) { return x0 - svcntsh () * 30; } +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/cntsh_sc.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/cntsh_sc.c +new file mode 100644 +index 000000000..9f6c85208 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/cntsh_sc.c +@@ -0,0 +1,13 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#define STREAMING_COMPATIBLE ++#define NO_SHARED_ZA ++#include "test_sme_acle.h" ++ ++/* ++** cntsh: ++** rdsvl (x0-9)+, #1 ++** lsr x0, \1, #?1 ++** ret ++*/ ++PROTO (cntsh, uint64_t, ()) { return svcntsh (); } +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/cntsw_s.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/cntsw_s.c +new file mode 100644 +index 000000000..c421e1b8e +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/cntsw_s.c +@@ -0,0 +1,278 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#define NO_SHARED_ZA ++#include "test_sme_acle.h" ++ ++/* ++** cntw_1: ++** cntw x0 ++** ret ++*/ ++PROTO (cntw_1, uint64_t, ()) { return svcntsw (); } ++ ++/* ++** cntw_2: ++** cnth x0 ++** ret ++*/ ++PROTO (cntw_2, uint64_t, ()) { return svcntsw () * 2; } ++ ++/* ++** cntw_3: ++** cntw x0, all, mul #3 ++** ret ++*/ ++PROTO (cntw_3, uint64_t, ()) { return svcntsw () * 3; } ++ ++/* ++** cntw_4: ++** cntb x0 ++** ret ++*/ ++PROTO (cntw_4, uint64_t, ()) { return svcntsw () * 4; } ++ ++/* ++** cntw_8: ++** cntb x0, all, mul #2 ++** ret ++*/ ++PROTO (cntw_8, uint64_t, ()) { return svcntsw () * 8; } ++ ++/* ++** cntw_15: ++** cntw x0, all, mul #15 ++** ret ++*/ ++PROTO (cntw_15, uint64_t, ()) { return svcntsw () * 15; } ++ ++/* ++** cntw_16: ++** cntb x0, all, mul #4 ++** ret ++*/ ++PROTO (cntw_16, uint64_t, ()) { return svcntsw () * 16; } ++ ++/* Other sequences would be OK. */ ++/* ++** cntw_17: ++** rdvl (x0-9+), #17 ++** asr x0, \1, 2 ++** ret ++*/ ++PROTO (cntw_17, uint64_t, ()) { return svcntsw () * 17; } ++ ++/* ++** cntw_32: ++** cntb x0, all, mul #8 ++** ret ++*/ ++PROTO (cntw_32, uint64_t, ()) { return svcntsw () * 32; } ++ ++/* ++** cntw_64: ++** cntb x0, all, mul #16 ++** ret ++*/ ++PROTO (cntw_64, uint64_t, ()) { return svcntsw () * 64; } ++ ++/* ++** cntw_128: ++** cntb (x0-9+) ++** lsl x0, \1, 5 ++** ret ++*/ ++PROTO (cntw_128, uint64_t, ()) { return svcntsw () * 128; } ++ ++/* ++** cntw_m1: ++** cntw (x0-9+) ++** neg x0, \1 ++** ret ++*/ ++PROTO (cntw_m1, uint64_t, ()) { return -svcntsw (); } ++ ++/* ++** cntw_m13: ++** cntw (x0-9+), all, mul #13 ++** neg x0, \1 ++** ret ++*/ ++PROTO (cntw_m13, uint64_t, ()) { return -svcntsw () * 13; } ++ ++/* ++** cntw_m15: ++** cntw (x0-9+), all, mul #15 ++** neg x0, \1 ++** ret ++*/ ++PROTO (cntw_m15, uint64_t, ()) { return -svcntsw () * 15; } ++ ++/* ++** cntw_m16: ++** rdvl (x0-9+), #-4 ++** ret ++*/ ++PROTO (cntw_m16, uint64_t, ()) { return -svcntsw () * 16; } ++ ++/* Other sequences would be OK. */ ++/* ++** cntw_m17: ++** rdvl (x0-9+), #-17 ++** asr x0, \1, 2 ++** ret ++*/ ++PROTO (cntw_m17, uint64_t, ()) { return -svcntsw () * 17; } ++ ++/* ++** incw_1: ++** incw x0 ++** ret ++*/ ++PROTO (incw_1, uint64_t, (uint64_t x0)) { return x0 + svcntsw (); } ++ ++/* ++** incw_2: ++** inch x0 ++** ret ++*/ ++PROTO (incw_2, uint64_t, (uint64_t x0)) { return x0 + svcntsw () * 2; } ++ ++/* ++** incw_3: ++** incw x0, all, mul #3 ++** ret ++*/ ++PROTO (incw_3, uint64_t, (uint64_t x0)) { return x0 + svcntsw () * 3; } ++ ++/* ++** incw_4: ++** incb x0 ++** ret ++*/ ++PROTO (incw_4, uint64_t, (uint64_t x0)) { return x0 + svcntsw () * 4; } ++ ++/* ++** incw_7: ++** incw x0, all, mul #7 ++** ret ++*/ ++PROTO (incw_7, uint64_t, (uint64_t x0)) { return x0 + svcntsw () * 7; } ++ ++/* ++** incw_8: ++** incb x0, all, mul #2 ++** ret ++*/ ++PROTO (incw_8, uint64_t, (uint64_t x0)) { return x0 + svcntsw () * 8; } ++ ++/* ++** incw_9: ++** incw x0, all, mul #9 ++** ret ++*/ ++PROTO (incw_9, uint64_t, (uint64_t x0)) { return x0 + svcntsw () * 9; } ++ ++/* ++** incw_15: ++** incw x0, all, mul #15 ++** ret ++*/ ++PROTO (incw_15, uint64_t, (uint64_t x0)) { return x0 + svcntsw () * 15; } ++ ++/* ++** incw_16: ++** incb x0, all, mul #4 ++** ret ++*/ ++PROTO (incw_16, uint64_t, (uint64_t x0)) { return x0 + svcntsw () * 16; } ++ ++/* ++** incw_18: ++** inch x0, all, mul #9 ++** ret ++*/ ++PROTO (incw_18, uint64_t, (uint64_t x0)) { return x0 + svcntsw () * 18; } ++ ++/* ++** incw_30: ++** inch x0, all, mul #15 ++** ret ++*/ ++PROTO (incw_30, uint64_t, (uint64_t x0)) { return x0 + svcntsw () * 30; } ++ ++/* ++** decw_1: ++** decw x0 ++** ret ++*/ ++PROTO (decw_1, uint64_t, (uint64_t x0)) { return x0 - svcntsw (); } ++ ++/* ++** decw_2: ++** dech x0 ++** ret ++*/ ++PROTO (decw_2, uint64_t, (uint64_t x0)) { return x0 - svcntsw () * 2; } ++ ++/* ++** decw_3: ++** decw x0, all, mul #3 ++** ret ++*/ ++PROTO (decw_3, uint64_t, (uint64_t x0)) { return x0 - svcntsw () * 3; } ++ ++/* ++** decw_4: ++** decb x0 ++** ret ++*/ ++PROTO (decw_4, uint64_t, (uint64_t x0)) { return x0 - svcntsw () * 4; } ++ ++/* ++** decw_7: ++** decw x0, all, mul #7 ++** ret ++*/ ++PROTO (decw_7, uint64_t, (uint64_t x0)) { return x0 - svcntsw () * 7; } ++ ++/* ++** decw_8: ++** decb x0, all, mul #2 ++** ret ++*/ ++PROTO (decw_8, uint64_t, (uint64_t x0)) { return x0 - svcntsw () * 8; } ++ ++/* ++** decw_9: ++** decw x0, all, mul #9 ++** ret ++*/ ++PROTO (decw_9, uint64_t, (uint64_t x0)) { return x0 - svcntsw () * 9; } ++ ++/* ++** decw_15: ++** decw x0, all, mul #15 ++** ret ++*/ ++PROTO (decw_15, uint64_t, (uint64_t x0)) { return x0 - svcntsw () * 15; } ++ ++/* ++** decw_16: ++** decb x0, all, mul #4 ++** ret ++*/ ++PROTO (decw_16, uint64_t, (uint64_t x0)) { return x0 - svcntsw () * 16; } ++ ++/* ++** decw_18: ++** dech x0, all, mul #9 ++** ret ++*/ ++PROTO (decw_18, uint64_t, (uint64_t x0)) { return x0 - svcntsw () * 18; } ++ ++/* ++** decw_30: ++** dech x0, all, mul #15 ++** ret ++*/ ++PROTO (decw_30, uint64_t, (uint64_t x0)) { return x0 - svcntsw () * 30; } +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/cntsw_sc.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/cntsw_sc.c +new file mode 100644 +index 000000000..75ca937c4 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/cntsw_sc.c +@@ -0,0 +1,13 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#define STREAMING_COMPATIBLE ++#define NO_SHARED_ZA ++#include "test_sme_acle.h" ++ ++/* ++** cntsw: ++** rdsvl (x0-9)+, #1 ++** lsr x0, \1, #?2 ++** ret ++*/ ++PROTO (cntsw, uint64_t, ()) { return svcntsw (); } +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ld1_hor_vnum_za128.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ld1_hor_vnum_za128.c +new file mode 100644 +index 000000000..fbbeb4f12 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ld1_hor_vnum_za128.c +@@ -0,0 +1,77 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sme_acle.h" ++ ++/* ++** ld1_vnum_za128_0_0_0: ++** mov (w12-5), (?:wzr|#?0) ++** ld1q { za0h\.q\\1, 0\ }, p0/z, \x1\ ++** ret ++*/ ++TEST_LOAD_ZA (ld1_vnum_za128_0_0_0, ++ svld1_hor_vnum_za128 (0, 0, p0, x1, 0), ++ svld1_hor_vnum_za128 (0, 0, p0, x1, 0)) ++ ++/* ++** ld1_vnum_za128_7_1_0: ++** mov (w12-5), #?1 ++** ld1q { za7h\.q\\1, 0\ }, p0/z, \x1\ ++** ret ++*/ ++TEST_LOAD_ZA (ld1_vnum_za128_7_1_0, ++ svld1_hor_vnum_za128 (7, 1, p0, x1, 0), ++ svld1_hor_vnum_za128 (7, 1, p0, x1, 0)) ++ ++/* ++** ld1_vnum_za128_11_1_5: ++** incb x1, all, mul #5 ++** mov (w12-5), #?6 ++** ld1q { za11h\.q\\1, 0\ }, p0/z, \x1\ ++** ret ++*/ ++TEST_LOAD_ZA (ld1_vnum_za128_11_1_5, ++ svld1_hor_vnum_za128 (11, 1, p0, x1, 5), ++ svld1_hor_vnum_za128 (11, 1, p0, x1, 5)) ++ ++/* ++** ld1_vnum_za128_3_w0_0: ++** mov (w12-5), w0 ++** ld1q { za3h\.q\\1, 0\ }, p0/z, \x1\ ++** ret ++*/ ++TEST_LOAD_ZA (ld1_vnum_za128_3_w0_0, ++ svld1_hor_vnum_za128 (3, w0, p0, x1, 0), ++ svld1_hor_vnum_za128 (3, w0, p0, x1, 0)) ++ ++/* ++** ld1_vnum_za128_5_w0_0: ++** incb x1, all, mul #13 ++** add (w12-5), w0, #?13 ++** ld1q { za5h\.q\\1, 0\ }, p0/z, \x1\ ++** ret ++*/ ++TEST_LOAD_ZA (ld1_vnum_za128_5_w0_0, ++ svld1_hor_vnum_za128 (5, w0, p0, x1, 13), ++ svld1_hor_vnum_za128 (5, w0, p0, x1, 13)) ++ ++/* ++** ld1_vnum_za128_11_w0_0: ++** cntb (x0-9+) ++** madd (x0-9+), (?:\1, x2|x2, \1), x1 ++** add (w12-5), (?:w0, w2|w2, w0) ++** ld1q { za11h\.q\\3, 0\ }, p0/z, \\2\ ++** ret ++*/ ++TEST_LOAD_ZA (ld1_vnum_za128_11_w0_0, ++ svld1_hor_vnum_za128 (11, w0, p0, x1, x2), ++ svld1_hor_vnum_za128 (11, w0, p0, x1, x2)) ++ ++/* ++** ld1_vnum_za128_15_w0p1_0: ++** add (w12-5), w0, #?1 ++** ld1q { za15h\.q\\1, 0\ }, p0/z, \x1\ ++** ret ++*/ ++TEST_LOAD_ZA (ld1_vnum_za128_15_w0p1_0, ++ svld1_hor_vnum_za128 (15, w0 + 1, p0, x1, 0), ++ svld1_hor_vnum_za128 (15, w0 + 1, p0, x1, 0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ld1_hor_vnum_za16.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ld1_hor_vnum_za16.c +new file mode 100644 +index 000000000..30e7a71ed +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ld1_hor_vnum_za16.c +@@ -0,0 +1,123 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sme_acle.h" ++ ++/* ++** ld1_vnum_za16_1_0_1: ++** incb x1 ++** mov (w12-5), (?:wzr|#?0) ++** ld1h { za1h\.h\\1, 1\ }, p0/z, \x1\ ++** ret ++*/ ++TEST_LOAD_ZA (ld1_vnum_za16_1_0_1, ++ svld1_hor_vnum_za16 (1, 0, p0, x1, 1), ++ svld1_hor_vnum_za16 (1, 0, p0, x1, 1)) ++ ++/* ++** ld1_vnum_za16_1_1_1: ++** incb x1 ++** mov (w12-5), #?1 ++** ld1h { za1h\.h\\1, 1\ }, p0/z, \x1\ ++** ret ++*/ ++TEST_LOAD_ZA (ld1_vnum_za16_1_1_1, ++ svld1_hor_vnum_za16 (1, 1, p0, x1, 1), ++ svld1_hor_vnum_za16 (1, 1, p0, x1, 1)) ++ ++/* ++** ld1_vnum_za16_0_0_8: ++** incb x1, all, mul #8 ++** mov (w12-5), #?8 ++** ld1h { za0h\.h\\1, 0\ }, p0/z, \x1\ ++** ret ++*/ ++TEST_LOAD_ZA (ld1_vnum_za16_0_0_8, ++ svld1_hor_vnum_za16 (0, 0, p0, x1, 8), ++ svld1_hor_vnum_za16 (0, 0, p0, x1, 8)) ++ ++/* ++** ld1_vnum_za16_0_1_8: ++** incb x1, all, mul #8 ++** mov (w12-5), #?9 ++** ld1h { za0h\.h\\1, 0\ }, p0/z, \x1\ ++** ret ++*/ ++TEST_LOAD_ZA (ld1_vnum_za16_0_1_8, ++ svld1_hor_vnum_za16 (0, 1, p0, x1, 8), ++ svld1_hor_vnum_za16 (0, 1, p0, x1, 8)) ++ ++/* ++** ld1_vnum_za16_0_w0_0: ++** mov (w12-5), w0 ++** ld1h { za0h\.h\\1, 0\ }, p0/z, \x1\ ++** ret ++*/ ++TEST_LOAD_ZA (ld1_vnum_za16_0_w0_0, ++ svld1_hor_vnum_za16 (0, w0, p0, x1, 0), ++ svld1_hor_vnum_za16 (0, w0, p0, x1, 0)) ++ ++/* ++** ld1_vnum_za16_0_w0_1: ++** incb x1 ++** mov (w12-5), w0 ++** ld1h { za0h\.h\\1, 1\ }, p0/z, \x1\ ++** ret ++*/ ++TEST_LOAD_ZA (ld1_vnum_za16_0_w0_1, ++ svld1_hor_vnum_za16 (0, w0, p0, x1, 1), ++ svld1_hor_vnum_za16 (0, w0, p0, x1, 1)) ++ ++/* ++** ld1_vnum_za16_0_w0_7: ++** incb x1, all, mul #7 ++** mov (w12-5), w0 ++** ld1h { za0h\.h\\1, 7\ }, p0/z, \x1\ ++** ret ++*/ ++TEST_LOAD_ZA (ld1_vnum_za16_0_w0_7, ++ svld1_hor_vnum_za16 (0, w0, p0, x1, 7), ++ svld1_hor_vnum_za16 (0, w0, p0, x1, 7)) ++ ++/* ++** ld1_vnum_za16_1_w0_8: ++** incb x1, all, mul #8 ++** add (w12-5), w0, #?8 ++** ld1h { za1h\.h\\1, 0\ }, p0/z, \x1\ ++** ret ++*/ ++TEST_LOAD_ZA (ld1_vnum_za16_1_w0_8, ++ svld1_hor_vnum_za16 (1, w0, p0, x1, 8), ++ svld1_hor_vnum_za16 (1, w0, p0, x1, 8)) ++ ++/* ++** ld1_vnum_za16_1_w0_13: ++** incb x1, all, mul #13 ++** add (w12-5), w0, #?13 ++** ld1h { za1h\.h\\1, 0\ }, p0/z, \x1\ ++** ret ++*/ ++TEST_LOAD_ZA (ld1_vnum_za16_1_w0_13, ++ svld1_hor_vnum_za16 (1, w0, p0, x1, 13), ++ svld1_hor_vnum_za16 (1, w0, p0, x1, 13)) ++ ++/* ++** ld1_vnum_za16_0_w0_x2: ++** cntb (x0-9+) ++** madd (x0-9+), (?:\1, x2|x2, \1), x1 ++** add (w12-5), (?:w0, w2|w2, w0) ++** ld1h { za0h\.h\\3, 0\ }, p0/z, \\2\ ++** ret ++*/ ++TEST_LOAD_ZA (ld1_vnum_za16_0_w0_x2, ++ svld1_hor_vnum_za16 (0, w0, p0, x1, x2), ++ svld1_hor_vnum_za16 (0, w0, p0, x1, x2)) ++ ++/* ++** ld1_vnum_za16_1_w0p1_0: ++** mov (w12-5), w0 ++** ld1h { za1h\.h\\1, 1\ }, p0/z, \x1\ ++** ret ++*/ ++TEST_LOAD_ZA (ld1_vnum_za16_1_w0p1_0, ++ svld1_hor_vnum_za16 (1, w0 + 1, p0, x1, 0), ++ svld1_hor_vnum_za16 (1, w0 + 1, p0, x1, 0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ld1_hor_vnum_za32.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ld1_hor_vnum_za32.c +new file mode 100644 +index 000000000..49ffaede8 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ld1_hor_vnum_za32.c +@@ -0,0 +1,123 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sme_acle.h" ++ ++/* ++** ld1_vnum_za32_3_0_1: ++** incb x1 ++** mov (w12-5), (?:wzr|#?0) ++** ld1w { za3h\.s\\1, 1\ }, p0/z, \x1\ ++** ret ++*/ ++TEST_LOAD_ZA (ld1_vnum_za32_3_0_1, ++ svld1_hor_vnum_za32 (3, 0, p0, x1, 1), ++ svld1_hor_vnum_za32 (3, 0, p0, x1, 1)) ++ ++/* ++** ld1_vnum_za32_2_1_1: ++** incb x1 ++** mov (w12-5), #?1 ++** ld1w { za2h\.s\\1, 1\ }, p0/z, \x1\ ++** ret ++*/ ++TEST_LOAD_ZA (ld1_vnum_za32_2_1_1, ++ svld1_hor_vnum_za32 (2, 1, p0, x1, 1), ++ svld1_hor_vnum_za32 (2, 1, p0, x1, 1)) ++ ++/* ++** ld1_vnum_za32_0_0_4: ++** incb x1, all, mul #4 ++** mov (w12-5), #?4 ++** ld1w { za0h\.s\\1, 0\ }, p0/z, \x1\ ++** ret ++*/ ++TEST_LOAD_ZA (ld1_vnum_za32_0_0_4, ++ svld1_hor_vnum_za32 (0, 0, p0, x1, 4), ++ svld1_hor_vnum_za32 (0, 0, p0, x1, 4)) ++ ++/* ++** ld1_vnum_za32_2_1_4: ++** incb x1, all, mul #4 ++** mov (w12-5), #?5 ++** ld1w { za2h\.s\\1, 0\ }, p0/z, \x1\ ++** ret ++*/ ++TEST_LOAD_ZA (ld1_vnum_za32_2_1_4, ++ svld1_hor_vnum_za32 (2, 1, p0, x1, 4), ++ svld1_hor_vnum_za32 (2, 1, p0, x1, 4)) ++ ++/* ++** ld1_vnum_za32_0_w0_0: ++** mov (w12-5), w0 ++** ld1w { za0h\.s\\1, 0\ }, p0/z, \x1\ ++** ret ++*/ ++TEST_LOAD_ZA (ld1_vnum_za32_0_w0_0, ++ svld1_hor_vnum_za32 (0, w0, p0, x1, 0), ++ svld1_hor_vnum_za32 (0, w0, p0, x1, 0)) ++ ++/* ++** ld1_vnum_za32_0_w0_1: ++** incb x1 ++** mov (w12-5), w0 ++** ld1w { za0h\.s\\1, 1\ }, p0/z, \x1\ ++** ret ++*/ ++TEST_LOAD_ZA (ld1_vnum_za32_0_w0_1, ++ svld1_hor_vnum_za32 (0, w0, p0, x1, 1), ++ svld1_hor_vnum_za32 (0, w0, p0, x1, 1)) ++ ++/* ++** ld1_vnum_za32_0_w0_3: ++** incb x1, all, mul #3 ++** mov (w12-5), w0 ++** ld1w { za0h\.s\\1, 3\ }, p0/z, \x1\ ++** ret ++*/ ++TEST_LOAD_ZA (ld1_vnum_za32_0_w0_3, ++ svld1_hor_vnum_za32 (0, w0, p0, x1, 3), ++ svld1_hor_vnum_za32 (0, w0, p0, x1, 3)) ++ ++/* ++** ld1_vnum_za32_1_w0_4: ++** incb x1, all, mul #4 ++** add (w12-5), w0, #?4 ++** ld1w { za1h\.s\\1, 0\ }, p0/z, \x1\ ++** ret ++*/ ++TEST_LOAD_ZA (ld1_vnum_za32_1_w0_4, ++ svld1_hor_vnum_za32 (1, w0, p0, x1, 4), ++ svld1_hor_vnum_za32 (1, w0, p0, x1, 4)) ++ ++/* ++** ld1_vnum_za32_3_w0_13: ++** incb x1, all, mul #13 ++** add (w12-5), w0, #?13 ++** ld1w { za3h\.s\\1, 0\ }, p0/z, \x1\ ++** ret ++*/ ++TEST_LOAD_ZA (ld1_vnum_za32_3_w0_13, ++ svld1_hor_vnum_za32 (3, w0, p0, x1, 13), ++ svld1_hor_vnum_za32 (3, w0, p0, x1, 13)) ++ ++/* ++** ld1_vnum_za32_0_w0_x2: ++** cntb (x0-9+) ++** madd (x0-9+), (?:\1, x2|x2, \1), x1 ++** add (w12-5), (?:w0, w2|w2, w0) ++** ld1w { za0h\.s\\3, 0\ }, p0/z, \\2\ ++** ret ++*/ ++TEST_LOAD_ZA (ld1_vnum_za32_0_w0_x2, ++ svld1_hor_vnum_za32 (0, w0, p0, x1, x2), ++ svld1_hor_vnum_za32 (0, w0, p0, x1, x2)) ++ ++/* ++** ld1_vnum_za32_1_w0p1_0: ++** mov (w12-5), w0 ++** ld1w { za1h\.s\\1, 1\ }, p0/z, \x1\ ++** ret ++*/ ++TEST_LOAD_ZA (ld1_vnum_za32_1_w0p1_0, ++ svld1_hor_vnum_za32 (1, w0 + 1, p0, x1, 0), ++ svld1_hor_vnum_za32 (1, w0 + 1, p0, x1, 0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ld1_hor_vnum_za64.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ld1_hor_vnum_za64.c +new file mode 100644 +index 000000000..df09b1c81 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ld1_hor_vnum_za64.c +@@ -0,0 +1,112 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sme_acle.h" ++ ++/* ++** ld1_vnum_za64_3_0_1: ++** incb x1 ++** mov (w12-5), (?:wzr|#?0) ++** ld1d { za3h\.d\\1, 1\ }, p0/z, \x1\ ++** ret ++*/ ++TEST_LOAD_ZA (ld1_vnum_za64_3_0_1, ++ svld1_hor_vnum_za64 (3, 0, p0, x1, 1), ++ svld1_hor_vnum_za64 (3, 0, p0, x1, 1)) ++ ++/* ++** ld1_vnum_za64_7_1_1: ++** incb x1 ++** mov (w12-5), #?1 ++** ld1d { za7h\.d\\1, 1\ }, p0/z, \x1\ ++** ret ++*/ ++TEST_LOAD_ZA (ld1_vnum_za64_7_1_1, ++ svld1_hor_vnum_za64 (7, 1, p0, x1, 1), ++ svld1_hor_vnum_za64 (7, 1, p0, x1, 1)) ++ ++/* ++** ld1_vnum_za64_0_0_2: ++** incb x1, all, mul #2 ++** mov (w12-5), #?2 ++** ld1d { za0h\.d\\1, 0\ }, p0/z, \x1\ ++** ret ++*/ ++TEST_LOAD_ZA (ld1_vnum_za64_0_0_2, ++ svld1_hor_vnum_za64 (0, 0, p0, x1, 2), ++ svld1_hor_vnum_za64 (0, 0, p0, x1, 2)) ++ ++/* ++** ld1_vnum_za64_5_1_2: ++** incb x1, all, mul #2 ++** mov (w12-5), #?3 ++** ld1d { za5h\.d\\1, 0\ }, p0/z, \x1\ ++** ret ++*/ ++TEST_LOAD_ZA (ld1_vnum_za64_5_1_2, ++ svld1_hor_vnum_za64 (5, 1, p0, x1, 2), ++ svld1_hor_vnum_za64 (5, 1, p0, x1, 2)) ++ ++/* ++** ld1_vnum_za64_0_w0_0: ++** mov (w12-5), w0 ++** ld1d { za0h\.d\\1, 0\ }, p0/z, \x1\ ++** ret ++*/ ++TEST_LOAD_ZA (ld1_vnum_za64_0_w0_0, ++ svld1_hor_vnum_za64 (0, w0, p0, x1, 0), ++ svld1_hor_vnum_za64 (0, w0, p0, x1, 0)) ++ ++/* ++** ld1_vnum_za64_0_w0_1: ++** incb x1 ++** mov (w12-5), w0 ++** ld1d { za0h\.d\\1, 1\ }, p0/z, \x1\ ++** ret ++*/ ++TEST_LOAD_ZA (ld1_vnum_za64_0_w0_1, ++ svld1_hor_vnum_za64 (0, w0, p0, x1, 1), ++ svld1_hor_vnum_za64 (0, w0, p0, x1, 1)) ++ ++/* ++** ld1_vnum_za64_6_w0_2: ++** incb x1, all, mul #2 ++** add (w12-5), w0, #?2 ++** ld1d { za6h\.d\\1, 0\ }, p0/z, \x1\ ++** ret ++*/ ++TEST_LOAD_ZA (ld1_vnum_za64_6_w0_2, ++ svld1_hor_vnum_za64 (6, w0, p0, x1, 2), ++ svld1_hor_vnum_za64 (6, w0, p0, x1, 2)) ++ ++/* ++** ld1_vnum_za64_2_w0_13: ++** incb x1, all, mul #13 ++** add (w12-5), w0, #?13 ++** ld1d { za2h\.d\\1, 0\ }, p0/z, \x1\ ++** ret ++*/ ++TEST_LOAD_ZA (ld1_vnum_za64_2_w0_13, ++ svld1_hor_vnum_za64 (2, w0, p0, x1, 13), ++ svld1_hor_vnum_za64 (2, w0, p0, x1, 13)) ++ ++/* ++** ld1_vnum_za64_4_w0_x2: ++** cntb (x0-9+) ++** madd (x0-9+), (?:\1, x2|x2, \1), x1 ++** add (w12-5), (?:w0, w2|w2, w0) ++** ld1d { za4h\.d\\3, 0\ }, p0/z, \\2\ ++** ret ++*/ ++TEST_LOAD_ZA (ld1_vnum_za64_4_w0_x2, ++ svld1_hor_vnum_za64 (4, w0, p0, x1, x2), ++ svld1_hor_vnum_za64 (4, w0, p0, x1, x2)) ++ ++/* ++** ld1_vnum_za64_1_w0p1_0: ++** mov (w12-5), w0 ++** ld1d { za1h\.d\\1, 1\ }, p0/z, \x1\ ++** ret ++*/ ++TEST_LOAD_ZA (ld1_vnum_za64_1_w0p1_0, ++ svld1_hor_vnum_za64 (1, w0 + 1, p0, x1, 0), ++ svld1_hor_vnum_za64 (1, w0 + 1, p0, x1, 0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ld1_hor_vnum_za8.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ld1_hor_vnum_za8.c +new file mode 100644 +index 000000000..c42931d3e +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ld1_hor_vnum_za8.c +@@ -0,0 +1,112 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sme_acle.h" ++ ++/* ++** ld1_vnum_za8_0_0_1: ++** incb x1 ++** mov (w12-5), (?:wzr|#?0) ++** ld1b { za0h\.b\\1, 1\ }, p0/z, \x1\ ++** ret ++*/ ++TEST_LOAD_ZA (ld1_vnum_za8_0_0_1, ++ svld1_hor_vnum_za8 (0, 0, p0, x1, 1), ++ svld1_hor_vnum_za8 (0, 0, p0, x1, 1)) ++ ++/* ++** ld1_vnum_za8_0_1_1: ++** incb x1 ++** mov (w12-5), #?1 ++** ld1b { za0h\.b\\1, 1\ }, p0/z, \x1\ ++** ret ++*/ ++TEST_LOAD_ZA (ld1_vnum_za8_0_1_1, ++ svld1_hor_vnum_za8 (0, 1, p0, x1, 1), ++ svld1_hor_vnum_za8 (0, 1, p0, x1, 1)) ++ ++/* ++** ld1_vnum_za8_0_0_16: ++** incb x1, all, mul #16 ++** mov (w12-5), #?16 ++** ld1b { za0h\.b\\1, 0\ }, p0/z, \x1\ ++** ret ++*/ ++TEST_LOAD_ZA (ld1_vnum_za8_0_0_16, ++ svld1_hor_vnum_za8 (0, 0, p0, x1, 16), ++ svld1_hor_vnum_za8 (0, 0, p0, x1, 16)) ++ ++/* ++** ld1_vnum_za8_0_1_16: ++** incb x1, all, mul #16 ++** mov (w12-5), #?17 ++** ld1b { za0h\.b\\1, 0\ }, p0/z, \x1\ ++** ret ++*/ ++TEST_LOAD_ZA (ld1_vnum_za8_0_1_16, ++ svld1_hor_vnum_za8 (0, 1, p0, x1, 16), ++ svld1_hor_vnum_za8 (0, 1, p0, x1, 16)) ++ ++/* ++** ld1_vnum_za8_0_w0_0: ++** mov (w12-5), w0 ++** ld1b { za0h\.b\\1, 0\ }, p0/z, \x1\ ++** ret ++*/ ++TEST_LOAD_ZA (ld1_vnum_za8_0_w0_0, ++ svld1_hor_vnum_za8 (0, w0, p0, x1, 0), ++ svld1_hor_vnum_za8 (0, w0, p0, x1, 0)) ++ ++/* ++** ld1_vnum_za8_0_w0_1: ++** incb x1 ++** mov (w12-5), w0 ++** ld1b { za0h\.b\\1, 1\ }, p0/z, \x1\ ++** ret ++*/ ++TEST_LOAD_ZA (ld1_vnum_za8_0_w0_1, ++ svld1_hor_vnum_za8 (0, w0, p0, x1, 1), ++ svld1_hor_vnum_za8 (0, w0, p0, x1, 1)) ++ ++/* ++** ld1_vnum_za8_0_w0_15: ++** incb x1, all, mul #15 ++** mov (w12-5), w0 ++** ld1b { za0h\.b\\1, 15\ }, p0/z, \x1\ ++** ret ++*/ ++TEST_LOAD_ZA (ld1_vnum_za8_0_w0_15, ++ svld1_hor_vnum_za8 (0, w0, p0, x1, 15), ++ svld1_hor_vnum_za8 (0, w0, p0, x1, 15)) ++ ++/* ++** ld1_vnum_za8_0_w0_16: ++** incb x1, all, mul #16 ++** add (w12-5), w0, #?16 ++** ld1b { za0h\.b\\1, 0\ }, p0/z, \x1\ ++** ret ++*/ ++TEST_LOAD_ZA (ld1_vnum_za8_0_w0_16, ++ svld1_hor_vnum_za8 (0, w0, p0, x1, 16), ++ svld1_hor_vnum_za8 (0, w0, p0, x1, 16)) ++ ++/* ++** ld1_vnum_za8_0_w0_x2: ++** cntb (x0-9+) ++** mul (x0-9+), (?:\1, x2|x2, \1) ++** add (w12-5), (?:w0, w2|w2, w0) ++** ld1b { za0h\.b\\3, 0\ }, p0/z, \x1, \2\ ++** ret ++*/ ++TEST_LOAD_ZA (ld1_vnum_za8_0_w0_x2, ++ svld1_hor_vnum_za8 (0, w0, p0, x1, x2), ++ svld1_hor_vnum_za8 (0, w0, p0, x1, x2)) ++ ++/* ++** ld1_vnum_za8_0_w0p1_0: ++** mov (w12-5), w0 ++** ld1b { za0h\.b\\1, 1\ }, p0/z, \x1\ ++** ret ++*/ ++TEST_LOAD_ZA (ld1_vnum_za8_0_w0p1_0, ++ svld1_hor_vnum_za8 (0, w0 + 1, p0, x1, 0), ++ svld1_hor_vnum_za8 (0, w0 + 1, p0, x1, 0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ld1_hor_za128.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ld1_hor_za128.c +new file mode 100644 +index 000000000..2c6292217 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ld1_hor_za128.c +@@ -0,0 +1,83 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sme_acle.h" ++ ++/* ++** ld1_za128_0_0: ++** mov (w12-5), (?:wzr|#?0) ++** ld1q { za0h\.q\\1, 0\ }, p0/z, \x1\ ++** ret ++*/ ++TEST_LOAD_ZA (ld1_za128_0_0, ++ svld1_hor_za128 (0, 0, p0, x1), ++ svld1_hor_za128 (0, 0, p0, x1)) ++ ++/* ++** ld1_za128_0_1: ++** mov (w12-5), #?1 ++** ld1q { za0h\.q\\1, 0\ }, p0/z, \x1\ ++** ret ++*/ ++TEST_LOAD_ZA (ld1_za128_0_1, ++ svld1_hor_za128 (0, 1, p0, x1), ++ svld1_hor_za128 (0, 1, p0, x1)) ++ ++/* ++** ld1_za128_0_w0: ++** mov (w12-5), w0 ++** ld1q { za0h\.q\\1, 0\ }, p0/z, \x1\ ++** ret ++*/ ++TEST_LOAD_ZA (ld1_za128_0_w0, ++ svld1_hor_za128 (0, w0, p0, x1), ++ svld1_hor_za128 (0, w0, p0, x1)) ++ ++/* ++** ld1_za128_0_w0_p1: ++** add (w12-5), w0, #?1 ++** ld1q { za0h\.q\\1, 0\ }, p0/z, \x1\ ++** ret ++*/ ++TEST_LOAD_ZA (ld1_za128_0_w0_p1, ++ svld1_hor_za128 (0, w0 + 1, p0, x1), ++ svld1_hor_za128 (0, w0 + 1, p0, x1)) ++ ++/* ++** ld1_za128_7_w0: ++** mov (w12-5), w0 ++** ld1q { za7h\.q\\1, 0\ }, p0/z, \x1\ ++** ret ++*/ ++TEST_LOAD_ZA (ld1_za128_7_w0, ++ svld1_hor_za128 (7, w0, p0, x1), ++ svld1_hor_za128 (7, w0, p0, x1)) ++ ++/* ++** ld1_za128_13_w0: ++** mov (w12-5), w0 ++** ld1q { za13h\.q\\1, 0\ }, p0/z, \x1\ ++** ret ++*/ ++TEST_LOAD_ZA (ld1_za128_13_w0, ++ svld1_hor_za128 (13, w0, p0, x1), ++ svld1_hor_za128 (13, w0, p0, x1)) ++ ++/* ++** ld1_za128_15_w0: ++** mov (w12-5), w0 ++** ld1q { za15h\.q\\1, 0\ }, p0/z, \x1\ ++** ret ++*/ ++TEST_LOAD_ZA (ld1_za128_15_w0, ++ svld1_hor_za128 (15, w0, p0, x1), ++ svld1_hor_za128 (15, w0, p0, x1)) ++ ++/* ++** ld1_za128_9_w0_index: ++** mov (w12-5), w0 ++** ld1q { za9h\.q\\1, 0\ }, p0/z, \x1, x2, lsl #?4\ ++** ret ++*/ ++TEST_LOAD_ZA (ld1_za128_9_w0_index, ++ svld1_hor_za128 (9, w0, p0, x1 + x2 * 16), ++ svld1_hor_za128 (9, w0, p0, x1 + x2 * 16)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ld1_hor_za16.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ld1_hor_za16.c +new file mode 100644 +index 000000000..3570bea61 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ld1_hor_za16.c +@@ -0,0 +1,126 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sme_acle.h" ++ ++/* ++** ld1_za16_0_0: ++** mov (w12-5), (?:wzr|#?0) ++** ld1h { za0h\.h\\1, 0\ }, p0/z, \x1\ ++** ret ++*/ ++TEST_LOAD_ZA (ld1_za16_0_0, ++ svld1_hor_za16 (0, 0, p0, x1), ++ svld1_hor_za16 (0, 0, p0, x1)) ++ ++/* It would also be OK (and perhaps better) to move 0 into a register ++ and use an offset of 7. */ ++/* ++** ld1_za16_0_7: ++** mov (w12-5), #?7 ++** ld1h { za0h\.h\\1, 0\ }, p0/z, \x1\ ++** ret ++*/ ++TEST_LOAD_ZA (ld1_za16_0_7, ++ svld1_hor_za16 (0, 7, p0, x1), ++ svld1_hor_za16 (0, 7, p0, x1)) ++ ++/* ++** ld1_za16_0_8: ++** mov (w12-5), #?8 ++** ld1h { za0h\.h\\1, 0\ }, p0/z, \x1\ ++** ret ++*/ ++TEST_LOAD_ZA (ld1_za16_0_8, ++ svld1_hor_za16 (0, 8, p0, x1), ++ svld1_hor_za16 (0, 8, p0, x1)) ++ ++/* ++** ld1_za16_0_w0: ++** mov (w12-5), w0 ++** ld1h { za0h\.h\\1, 0\ }, p0/z, \x1\ ++** ret ++*/ ++TEST_LOAD_ZA (ld1_za16_0_w0, ++ svld1_hor_za16 (0, w0, p0, x1), ++ svld1_hor_za16 (0, w0, p0, x1)) ++ ++/* ++** ld1_za16_0_w0_p1: ++** mov (w12-5), w0 ++** ld1h { za0h\.h\\1, 1\ }, p0/z, \x1\ ++** ret ++*/ ++TEST_LOAD_ZA (ld1_za16_0_w0_p1, ++ svld1_hor_za16 (0, w0 + 1, p0, x1), ++ svld1_hor_za16 (0, w0 + 1, p0, x1)) ++ ++/* ++** ld1_za16_0_w0_p7: ++** mov (w12-5), w0 ++** ld1h { za0h\.h\\1, 7\ }, p0/z, \x1\ ++** ret ++*/ ++TEST_LOAD_ZA (ld1_za16_0_w0_p7, ++ svld1_hor_za16 (0, w0 + 7, p0, x1), ++ svld1_hor_za16 (0, w0 + 7, p0, x1)) ++ ++/* ++** ld1_za16_1_w0: ++** mov (w12-5), w0 ++** ld1h { za1h\.h\\1, 0\ }, p0/z, \x1\ ++** ret ++*/ ++TEST_LOAD_ZA (ld1_za16_1_w0, ++ svld1_hor_za16 (1, w0, p0, x1), ++ svld1_hor_za16 (1, w0, p0, x1)) ++ ++ ++/* ++** ld1_za16_1_w0_p1: ++** mov (w12-5), w0 ++** ld1h { za1h\.h\\1, 1\ }, p0/z, \x1\ ++** ret ++*/ ++TEST_LOAD_ZA (ld1_za16_1_w0_p1, ++ svld1_hor_za16 (1, w0 + 1, p0, x1), ++ svld1_hor_za16 (1, w0 + 1, p0, x1)) ++ ++/* ++** ld1_za16_1_w0_p7: ++** mov (w12-5), w0 ++** ld1h { za1h\.h\\1, 7\ }, p0/z, \x1\ ++** ret ++*/ ++TEST_LOAD_ZA (ld1_za16_1_w0_p7, ++ svld1_hor_za16 (1, w0 + 7, p0, x1), ++ svld1_hor_za16 (1, w0 + 7, p0, x1)) ++ ++/* ++** ld1_za16_1_w0_p5_index: ++** mov (w12-5), w0 ++** ld1h { za1h\.h\\1, 5\ }, p0/z, \x1, x2, lsl #?1\ ++** ret ++*/ ++TEST_LOAD_ZA (ld1_za16_1_w0_p5_index, ++ svld1_hor_za16 (1, w0 + 5, p0, x1 + x2 * 2), ++ svld1_hor_za16 (1, w0 + 5, p0, x1 + x2 * 2)) ++ ++/* ++** ld1_za16_0_w0_p8: ++** add (w12-5), w0, #?8 ++** ld1h { za0h\.h\\1, 0\ }, p0/z, \x1\ ++** ret ++*/ ++TEST_LOAD_ZA (ld1_za16_0_w0_p8, ++ svld1_hor_za16 (0, w0 + 8, p0, x1), ++ svld1_hor_za16 (0, w0 + 8, p0, x1)) ++ ++/* ++** ld1_za16_0_w0_m1: ++** sub (w12-5), w0, #?1 ++** ld1h { za0h\.h\\1, 0\ }, p0/z, \x1\ ++** ret ++*/ ++TEST_LOAD_ZA (ld1_za16_0_w0_m1, ++ svld1_hor_za16 (0, w0 - 1, p0, x1), ++ svld1_hor_za16 (0, w0 - 1, p0, x1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ld1_hor_za32.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ld1_hor_za32.c +new file mode 100644 +index 000000000..a8f6606bd +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ld1_hor_za32.c +@@ -0,0 +1,125 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sme_acle.h" ++ ++/* ++** ld1_za32_0_0: ++** mov (w12-5), (?:w0|#?0) ++** ld1w { za0h\.s\\1, 0\ }, p0/z, \x1\ ++** ret ++*/ ++TEST_LOAD_ZA (ld1_za32_0_0, ++ svld1_hor_za32 (0, 0, p0, x1), ++ svld1_hor_za32 (0, 0, p0, x1)) ++ ++/* It would also be OK (and perhaps better) to move 0 into a register ++ and use an offset of 3. */ ++/* ++** ld1_za32_0_3: ++** mov (w12-5), #?3 ++** ld1w { za0h\.s\\1, 0\ }, p0/z, \x1\ ++** ret ++*/ ++TEST_LOAD_ZA (ld1_za32_0_3, ++ svld1_hor_za32 (0, 3, p0, x1), ++ svld1_hor_za32 (0, 3, p0, x1)) ++ ++/* ++** ld1_za32_0_4: ++** mov (w12-5), #?4 ++** ld1w { za0h\.s\\1, 0\ }, p0/z, \x1\ ++** ret ++*/ ++TEST_LOAD_ZA (ld1_za32_0_4, ++ svld1_hor_za32 (0, 4, p0, x1), ++ svld1_hor_za32 (0, 4, p0, x1)) ++ ++/* ++** ld1_za32_0_w0: ++** mov (w12-5), w0 ++** ld1w { za0h\.s\\1, 0\ }, p0/z, \x1\ ++** ret ++*/ ++TEST_LOAD_ZA (ld1_za32_0_w0, ++ svld1_hor_za32 (0, w0, p0, x1), ++ svld1_hor_za32 (0, w0, p0, x1)) ++ ++/* ++** ld1_za32_0_w0_p1: ++** mov (w12-5), w0 ++** ld1w { za0h\.s\\1, 1\ }, p0/z, \x1\ ++** ret ++*/ ++TEST_LOAD_ZA (ld1_za32_0_w0_p1, ++ svld1_hor_za32 (0, w0 + 1, p0, x1), ++ svld1_hor_za32 (0, w0 + 1, p0, x1)) ++ ++/* ++** ld1_za32_0_w0_p3: ++** mov (w12-5), w0 ++** ld1w { za0h\.s\\1, 3\ }, p0/z, \x1\ ++** ret ++*/ ++TEST_LOAD_ZA (ld1_za32_0_w0_p3, ++ svld1_hor_za32 (0, w0 + 3, p0, x1), ++ svld1_hor_za32 (0, w0 + 3, p0, x1)) ++ ++/* ++** ld1_za32_3_w0: ++** mov (w12-5), w0 ++** ld1w { za3h\.s\\1, 0\ }, p0/z, \x1\ ++** ret ++*/ ++TEST_LOAD_ZA (ld1_za32_3_w0, ++ svld1_hor_za32 (3, w0, p0, x1), ++ svld1_hor_za32 (3, w0, p0, x1)) ++ ++/* ++** ld1_za32_3_w0_p1: ++** mov (w12-5), w0 ++** ld1w { za3h\.s\\1, 1\ }, p0/z, \x1\ ++** ret ++*/ ++TEST_LOAD_ZA (ld1_za32_3_w0_p1, ++ svld1_hor_za32 (3, w0 + 1, p0, x1), ++ svld1_hor_za32 (3, w0 + 1, p0, x1)) ++ ++/* ++** ld1_za32_3_w0_p3: ++** mov (w12-5), w0 ++** ld1w { za3h\.s\\1, 3\ }, p0/z, \x1\ ++** ret ++*/ ++TEST_LOAD_ZA (ld1_za32_3_w0_p3, ++ svld1_hor_za32 (3, w0 + 3, p0, x1), ++ svld1_hor_za32 (3, w0 + 3, p0, x1)) ++ ++/* ++** ld1_za32_1_w0_p2_index: ++** mov (w12-5), w0 ++** ld1w { za1h\.s\\1, 2\ }, p0/z, \x1, x2, lsl #?2\ ++** ret ++*/ ++TEST_LOAD_ZA (ld1_za32_1_w0_p2_index, ++ svld1_hor_za32 (1, w0 + 2, p0, x1 + x2 * 4), ++ svld1_hor_za32 (1, w0 + 2, p0, x1 + x2 * 4)) ++ ++/* ++** ld1_za32_0_w0_p4: ++** add (w12-5), w0, #?4 ++** ld1w { za0h\.s\\1, 0\ }, p0/z, \x1\ ++** ret ++*/ ++TEST_LOAD_ZA (ld1_za32_0_w0_p4, ++ svld1_hor_za32 (0, w0 + 4, p0, x1), ++ svld1_hor_za32 (0, w0 + 4, p0, x1)) ++ ++/* ++** ld1_za32_0_w0_m1: ++** sub (w12-5), w0, #?1 ++** ld1w { za0h\.s\\1, 0\ }, p0/z, \x1\ ++** ret ++*/ ++TEST_LOAD_ZA (ld1_za32_0_w0_m1, ++ svld1_hor_za32 (0, w0 - 1, p0, x1), ++ svld1_hor_za32 (0, w0 - 1, p0, x1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ld1_hor_za64.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ld1_hor_za64.c +new file mode 100644 +index 000000000..f4573eb71 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ld1_hor_za64.c +@@ -0,0 +1,105 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sme_acle.h" ++ ++/* ++** ld1_za64_0_0: ++** mov (w12-5), (?:wzr|#?0) ++** ld1d { za0h\.d\\1, 0\ }, p0/z, \x1\ ++** ret ++*/ ++TEST_LOAD_ZA (ld1_za64_0_0, ++ svld1_hor_za64 (0, 0, p0, x1), ++ svld1_hor_za64 (0, 0, p0, x1)) ++ ++/* It would also be OK (and perhaps better) to move 0 into a register ++ and use an offset of 1. */ ++/* ++** ld1_za64_0_1: ++** mov (w12-5), #?1 ++** ld1d { za0h\.d\\1, 0\ }, p0/z, \x1\ ++** ret ++*/ ++TEST_LOAD_ZA (ld1_za64_0_1, ++ svld1_hor_za64 (0, 1, p0, x1), ++ svld1_hor_za64 (0, 1, p0, x1)) ++ ++/* ++** ld1_za64_0_2: ++** mov (w12-5), #?2 ++** ld1d { za0h\.d\\1, 0\ }, p0/z, \x1\ ++** ret ++*/ ++TEST_LOAD_ZA (ld1_za64_0_2, ++ svld1_hor_za64 (0, 2, p0, x1), ++ svld1_hor_za64 (0, 2, p0, x1)) ++ ++/* ++** ld1_za64_0_w0: ++** mov (w12-5), w0 ++** ld1d { za0h\.d\\1, 0\ }, p0/z, \x1\ ++** ret ++*/ ++TEST_LOAD_ZA (ld1_za64_0_w0, ++ svld1_hor_za64 (0, w0, p0, x1), ++ svld1_hor_za64 (0, w0, p0, x1)) ++ ++/* ++** ld1_za64_0_w0_p1: ++** mov (w12-5), w0 ++** ld1d { za0h\.d\\1, 1\ }, p0/z, \x1\ ++** ret ++*/ ++TEST_LOAD_ZA (ld1_za64_0_w0_p1, ++ svld1_hor_za64 (0, w0 + 1, p0, x1), ++ svld1_hor_za64 (0, w0 + 1, p0, x1)) ++ ++/* ++** ld1_za64_7_w0: ++** mov (w12-5), w0 ++** ld1d { za7h\.d\\1, 0\ }, p0/z, \x1\ ++** ret ++*/ ++TEST_LOAD_ZA (ld1_za64_7_w0, ++ svld1_hor_za64 (7, w0, p0, x1), ++ svld1_hor_za64 (7, w0, p0, x1)) ++ ++/* ++** ld1_za64_7_w0_p1: ++** mov (w12-5), w0 ++** ld1d { za7h\.d\\1, 1\ }, p0/z, \x1\ ++** ret ++*/ ++TEST_LOAD_ZA (ld1_za64_7_w0_p1, ++ svld1_hor_za64 (7, w0 + 1, p0, x1), ++ svld1_hor_za64 (7, w0 + 1, p0, x1)) ++ ++/* ++** ld1_za64_5_w0_p1_index: ++** mov (w12-5), w0 ++** ld1d { za5h\.d\\1, 1\ }, p0/z, \x1, x2, lsl #?3\ ++** ret ++*/ ++TEST_LOAD_ZA (ld1_za64_5_w0_p1_index, ++ svld1_hor_za64 (5, w0 + 1, p0, x1 + x2 * 8), ++ svld1_hor_za64 (5, w0 + 1, p0, x1 + x2 * 8)) ++ ++/* ++** ld1_za64_0_w0_p2: ++** add (w12-5), w0, #?2 ++** ld1d { za0h\.d\\1, 0\ }, p0/z, \x1\ ++** ret ++*/ ++TEST_LOAD_ZA (ld1_za64_0_w0_p2, ++ svld1_hor_za64 (0, w0 + 2, p0, x1), ++ svld1_hor_za64 (0, w0 + 2, p0, x1)) ++ ++/* ++** ld1_za64_0_w0_m1: ++** sub (w12-5), w0, #?1 ++** ld1d { za0h\.d\\1, 0\ }, p0/z, \x1\ ++** ret ++*/ ++TEST_LOAD_ZA (ld1_za64_0_w0_m1, ++ svld1_hor_za64 (0, w0 - 1, p0, x1), ++ svld1_hor_za64 (0, w0 - 1, p0, x1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ld1_hor_za8.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ld1_hor_za8.c +new file mode 100644 +index 000000000..eef0927cd +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ld1_hor_za8.c +@@ -0,0 +1,95 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sme_acle.h" ++ ++/* ++** ld1_za8_0_0: ++** mov (w12-5), (?:wzr|#?0) ++** ld1b { za0h\.b\\1, 0\ }, p0/z, \x1\ ++** ret ++*/ ++TEST_LOAD_ZA (ld1_za8_0_0, ++ svld1_hor_za8 (0, 0, p0, x1), ++ svld1_hor_za8 (0, 0, p0, x1)) ++ ++/* It would also be OK (and perhaps better) to move 0 into a register ++ and use an offset of 15. */ ++/* ++** ld1_za8_0_15: ++** mov (w12-5), #?15 ++** ld1b { za0h\.b\\1, 0\ }, p0/z, \x1\ ++** ret ++*/ ++TEST_LOAD_ZA (ld1_za8_0_15, ++ svld1_hor_za8 (0, 15, p0, x1), ++ svld1_hor_za8 (0, 15, p0, x1)) ++ ++/* ++** ld1_za8_0_16: ++** mov (w12-5), #?16 ++** ld1b { za0h\.b\\1, 0\ }, p0/z, \x1\ ++** ret ++*/ ++TEST_LOAD_ZA (ld1_za8_0_16, ++ svld1_hor_za8 (0, 16, p0, x1), ++ svld1_hor_za8 (0, 16, p0, x1)) ++ ++/* ++** ld1_za8_0_w0: ++** mov (w12-5), w0 ++** ld1b { za0h\.b\\1, 0\ }, p0/z, \x1\ ++** ret ++*/ ++TEST_LOAD_ZA (ld1_za8_0_w0, ++ svld1_hor_za8 (0, w0, p0, x1), ++ svld1_hor_za8 (0, w0, p0, x1)) ++ ++/* ++** ld1_za8_0_w0_p1: ++** mov (w12-5), w0 ++** ld1b { za0h\.b\\1, 1\ }, p0/z, \x1\ ++** ret ++*/ ++TEST_LOAD_ZA (ld1_za8_0_w0_p1, ++ svld1_hor_za8 (0, w0 + 1, p0, x1), ++ svld1_hor_za8 (0, w0 + 1, p0, x1)) ++ ++/* ++** ld1_za8_0_w0_p15: ++** mov (w12-5), w0 ++** ld1b { za0h\.b\\1, 15\ }, p0/z, \x1\ ++** ret ++*/ ++TEST_LOAD_ZA (ld1_za8_0_w0_p15, ++ svld1_hor_za8 (0, w0 + 15, p0, x1), ++ svld1_hor_za8 (0, w0 + 15, p0, x1)) ++ ++/* ++** ld1_za8_0_w0_p13_index: ++** mov (w12-5), w0 ++** ld1b { za0h\.b\\1, 15\ }, p0/z, \x1, x2\ ++** ret ++*/ ++TEST_LOAD_ZA (ld1_za8_0_w0_p13_index, ++ svld1_hor_za8 (0, w0 + 15, p0, x1 + x2), ++ svld1_hor_za8 (0, w0 + 15, p0, x1 + x2)) ++ ++/* ++** ld1_za8_0_w0_p16: ++** add (w12-5), w0, #?16 ++** ld1b { za0h\.b\\1, 0\ }, p0/z, \x1\ ++** ret ++*/ ++TEST_LOAD_ZA (ld1_za8_0_w0_p16, ++ svld1_hor_za8 (0, w0 + 16, p0, x1), ++ svld1_hor_za8 (0, w0 + 16, p0, x1)) ++ ++/* ++** ld1_za8_0_w0_m1: ++** sub (w12-5), w0, #?1 ++** ld1b { za0h\.b\\1, 0\ }, p0/z, \x1\ ++** ret ++*/ ++TEST_LOAD_ZA (ld1_za8_0_w0_m1, ++ svld1_hor_za8 (0, w0 - 1, p0, x1), ++ svld1_hor_za8 (0, w0 - 1, p0, x1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ld1_ver_vnum_za128.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ld1_ver_vnum_za128.c +new file mode 100644 +index 000000000..e90da4b33 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ld1_ver_vnum_za128.c +@@ -0,0 +1,77 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sme_acle.h" ++ ++/* ++** ld1_vnum_za128_0_0_0: ++** mov (w12-5), (?:wzr|#?0) ++** ld1q { za0v\.q\\1, 0\ }, p0/z, \x1\ ++** ret ++*/ ++TEST_LOAD_ZA (ld1_vnum_za128_0_0_0, ++ svld1_ver_vnum_za128 (0, 0, p0, x1, 0), ++ svld1_ver_vnum_za128 (0, 0, p0, x1, 0)) ++ ++/* ++** ld1_vnum_za128_7_1_0: ++** mov (w12-5), #?1 ++** ld1q { za7v\.q\\1, 0\ }, p0/z, \x1\ ++** ret ++*/ ++TEST_LOAD_ZA (ld1_vnum_za128_7_1_0, ++ svld1_ver_vnum_za128 (7, 1, p0, x1, 0), ++ svld1_ver_vnum_za128 (7, 1, p0, x1, 0)) ++ ++/* ++** ld1_vnum_za128_11_1_5: ++** incb x1, all, mul #5 ++** mov (w12-5), #?6 ++** ld1q { za11v\.q\\1, 0\ }, p0/z, \x1\ ++** ret ++*/ ++TEST_LOAD_ZA (ld1_vnum_za128_11_1_5, ++ svld1_ver_vnum_za128 (11, 1, p0, x1, 5), ++ svld1_ver_vnum_za128 (11, 1, p0, x1, 5)) ++ ++/* ++** ld1_vnum_za128_3_w0_0: ++** mov (w12-5), w0 ++** ld1q { za3v\.q\\1, 0\ }, p0/z, \x1\ ++** ret ++*/ ++TEST_LOAD_ZA (ld1_vnum_za128_3_w0_0, ++ svld1_ver_vnum_za128 (3, w0, p0, x1, 0), ++ svld1_ver_vnum_za128 (3, w0, p0, x1, 0)) ++ ++/* ++** ld1_vnum_za128_5_w0_0: ++** incb x1, all, mul #13 ++** add (w12-5), w0, #?13 ++** ld1q { za5v\.q\\1, 0\ }, p0/z, \x1\ ++** ret ++*/ ++TEST_LOAD_ZA (ld1_vnum_za128_5_w0_0, ++ svld1_ver_vnum_za128 (5, w0, p0, x1, 13), ++ svld1_ver_vnum_za128 (5, w0, p0, x1, 13)) ++ ++/* ++** ld1_vnum_za128_11_w0_0: ++** cntb (x0-9+) ++** madd (x0-9+), (?:\1, x2|x2, \1), x1 ++** add (w12-5), (?:w0, w2|w2, w0) ++** ld1q { za11v\.q\\3, 0\ }, p0/z, \\2\ ++** ret ++*/ ++TEST_LOAD_ZA (ld1_vnum_za128_11_w0_0, ++ svld1_ver_vnum_za128 (11, w0, p0, x1, x2), ++ svld1_ver_vnum_za128 (11, w0, p0, x1, x2)) ++ ++/* ++** ld1_vnum_za128_15_w0p1_0: ++** add (w12-5), w0, #?1 ++** ld1q { za15v\.q\\1, 0\ }, p0/z, \x1\ ++** ret ++*/ ++TEST_LOAD_ZA (ld1_vnum_za128_15_w0p1_0, ++ svld1_ver_vnum_za128 (15, w0 + 1, p0, x1, 0), ++ svld1_ver_vnum_za128 (15, w0 + 1, p0, x1, 0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ld1_ver_vnum_za16.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ld1_ver_vnum_za16.c +new file mode 100644 +index 000000000..7868cf4ea +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ld1_ver_vnum_za16.c +@@ -0,0 +1,123 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sme_acle.h" ++ ++/* ++** ld1_vnum_za16_1_0_1: ++** incb x1 ++** mov (w12-5), (?:wzr|#?0) ++** ld1h { za1v\.h\\1, 1\ }, p0/z, \x1\ ++** ret ++*/ ++TEST_LOAD_ZA (ld1_vnum_za16_1_0_1, ++ svld1_ver_vnum_za16 (1, 0, p0, x1, 1), ++ svld1_ver_vnum_za16 (1, 0, p0, x1, 1)) ++ ++/* ++** ld1_vnum_za16_1_1_1: ++** incb x1 ++** mov (w12-5), #?1 ++** ld1h { za1v\.h\\1, 1\ }, p0/z, \x1\ ++** ret ++*/ ++TEST_LOAD_ZA (ld1_vnum_za16_1_1_1, ++ svld1_ver_vnum_za16 (1, 1, p0, x1, 1), ++ svld1_ver_vnum_za16 (1, 1, p0, x1, 1)) ++ ++/* ++** ld1_vnum_za16_0_0_8: ++** incb x1, all, mul #8 ++** mov (w12-5), #?8 ++** ld1h { za0v\.h\\1, 0\ }, p0/z, \x1\ ++** ret ++*/ ++TEST_LOAD_ZA (ld1_vnum_za16_0_0_8, ++ svld1_ver_vnum_za16 (0, 0, p0, x1, 8), ++ svld1_ver_vnum_za16 (0, 0, p0, x1, 8)) ++ ++/* ++** ld1_vnum_za16_0_1_8: ++** incb x1, all, mul #8 ++** mov (w12-5), #?9 ++** ld1h { za0v\.h\\1, 0\ }, p0/z, \x1\ ++** ret ++*/ ++TEST_LOAD_ZA (ld1_vnum_za16_0_1_8, ++ svld1_ver_vnum_za16 (0, 1, p0, x1, 8), ++ svld1_ver_vnum_za16 (0, 1, p0, x1, 8)) ++ ++/* ++** ld1_vnum_za16_0_w0_0: ++** mov (w12-5), w0 ++** ld1h { za0v\.h\\1, 0\ }, p0/z, \x1\ ++** ret ++*/ ++TEST_LOAD_ZA (ld1_vnum_za16_0_w0_0, ++ svld1_ver_vnum_za16 (0, w0, p0, x1, 0), ++ svld1_ver_vnum_za16 (0, w0, p0, x1, 0)) ++ ++/* ++** ld1_vnum_za16_0_w0_1: ++** incb x1 ++** mov (w12-5), w0 ++** ld1h { za0v\.h\\1, 1\ }, p0/z, \x1\ ++** ret ++*/ ++TEST_LOAD_ZA (ld1_vnum_za16_0_w0_1, ++ svld1_ver_vnum_za16 (0, w0, p0, x1, 1), ++ svld1_ver_vnum_za16 (0, w0, p0, x1, 1)) ++ ++/* ++** ld1_vnum_za16_0_w0_7: ++** incb x1, all, mul #7 ++** mov (w12-5), w0 ++** ld1h { za0v\.h\\1, 7\ }, p0/z, \x1\ ++** ret ++*/ ++TEST_LOAD_ZA (ld1_vnum_za16_0_w0_7, ++ svld1_ver_vnum_za16 (0, w0, p0, x1, 7), ++ svld1_ver_vnum_za16 (0, w0, p0, x1, 7)) ++ ++/* ++** ld1_vnum_za16_1_w0_8: ++** incb x1, all, mul #8 ++** add (w12-5), w0, #?8 ++** ld1h { za1v\.h\\1, 0\ }, p0/z, \x1\ ++** ret ++*/ ++TEST_LOAD_ZA (ld1_vnum_za16_1_w0_8, ++ svld1_ver_vnum_za16 (1, w0, p0, x1, 8), ++ svld1_ver_vnum_za16 (1, w0, p0, x1, 8)) ++ ++/* ++** ld1_vnum_za16_1_w0_13: ++** incb x1, all, mul #13 ++** add (w12-5), w0, #?13 ++** ld1h { za1v\.h\\1, 0\ }, p0/z, \x1\ ++** ret ++*/ ++TEST_LOAD_ZA (ld1_vnum_za16_1_w0_13, ++ svld1_ver_vnum_za16 (1, w0, p0, x1, 13), ++ svld1_ver_vnum_za16 (1, w0, p0, x1, 13)) ++ ++/* ++** ld1_vnum_za16_0_w0_x2: ++** cntb (x0-9+) ++** madd (x0-9+), (?:\1, x2|x2, \1), x1 ++** add (w12-5), (?:w0, w2|w2, w0) ++** ld1h { za0v\.h\\3, 0\ }, p0/z, \\2\ ++** ret ++*/ ++TEST_LOAD_ZA (ld1_vnum_za16_0_w0_x2, ++ svld1_ver_vnum_za16 (0, w0, p0, x1, x2), ++ svld1_ver_vnum_za16 (0, w0, p0, x1, x2)) ++ ++/* ++** ld1_vnum_za16_1_w0p1_0: ++** mov (w12-5), w0 ++** ld1h { za1v\.h\\1, 1\ }, p0/z, \x1\ ++** ret ++*/ ++TEST_LOAD_ZA (ld1_vnum_za16_1_w0p1_0, ++ svld1_ver_vnum_za16 (1, w0 + 1, p0, x1, 0), ++ svld1_ver_vnum_za16 (1, w0 + 1, p0, x1, 0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ld1_ver_vnum_za32.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ld1_ver_vnum_za32.c +new file mode 100644 +index 000000000..053b60140 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ld1_ver_vnum_za32.c +@@ -0,0 +1,123 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sme_acle.h" ++ ++/* ++** ld1_vnum_za32_3_0_1: ++** incb x1 ++** mov (w12-5), (?:wzr|#?0) ++** ld1w { za3v\.s\\1, 1\ }, p0/z, \x1\ ++** ret ++*/ ++TEST_LOAD_ZA (ld1_vnum_za32_3_0_1, ++ svld1_ver_vnum_za32 (3, 0, p0, x1, 1), ++ svld1_ver_vnum_za32 (3, 0, p0, x1, 1)) ++ ++/* ++** ld1_vnum_za32_2_1_1: ++** incb x1 ++** mov (w12-5), #?1 ++** ld1w { za2v\.s\\1, 1\ }, p0/z, \x1\ ++** ret ++*/ ++TEST_LOAD_ZA (ld1_vnum_za32_2_1_1, ++ svld1_ver_vnum_za32 (2, 1, p0, x1, 1), ++ svld1_ver_vnum_za32 (2, 1, p0, x1, 1)) ++ ++/* ++** ld1_vnum_za32_0_0_4: ++** incb x1, all, mul #4 ++** mov (w12-5), #?4 ++** ld1w { za0v\.s\\1, 0\ }, p0/z, \x1\ ++** ret ++*/ ++TEST_LOAD_ZA (ld1_vnum_za32_0_0_4, ++ svld1_ver_vnum_za32 (0, 0, p0, x1, 4), ++ svld1_ver_vnum_za32 (0, 0, p0, x1, 4)) ++ ++/* ++** ld1_vnum_za32_2_1_4: ++** incb x1, all, mul #4 ++** mov (w12-5), #?5 ++** ld1w { za2v\.s\\1, 0\ }, p0/z, \x1\ ++** ret ++*/ ++TEST_LOAD_ZA (ld1_vnum_za32_2_1_4, ++ svld1_ver_vnum_za32 (2, 1, p0, x1, 4), ++ svld1_ver_vnum_za32 (2, 1, p0, x1, 4)) ++ ++/* ++** ld1_vnum_za32_0_w0_0: ++** mov (w12-5), w0 ++** ld1w { za0v\.s\\1, 0\ }, p0/z, \x1\ ++** ret ++*/ ++TEST_LOAD_ZA (ld1_vnum_za32_0_w0_0, ++ svld1_ver_vnum_za32 (0, w0, p0, x1, 0), ++ svld1_ver_vnum_za32 (0, w0, p0, x1, 0)) ++ ++/* ++** ld1_vnum_za32_0_w0_1: ++** incb x1 ++** mov (w12-5), w0 ++** ld1w { za0v\.s\\1, 1\ }, p0/z, \x1\ ++** ret ++*/ ++TEST_LOAD_ZA (ld1_vnum_za32_0_w0_1, ++ svld1_ver_vnum_za32 (0, w0, p0, x1, 1), ++ svld1_ver_vnum_za32 (0, w0, p0, x1, 1)) ++ ++/* ++** ld1_vnum_za32_0_w0_3: ++** incb x1, all, mul #3 ++** mov (w12-5), w0 ++** ld1w { za0v\.s\\1, 3\ }, p0/z, \x1\ ++** ret ++*/ ++TEST_LOAD_ZA (ld1_vnum_za32_0_w0_3, ++ svld1_ver_vnum_za32 (0, w0, p0, x1, 3), ++ svld1_ver_vnum_za32 (0, w0, p0, x1, 3)) ++ ++/* ++** ld1_vnum_za32_1_w0_4: ++** incb x1, all, mul #4 ++** add (w12-5), w0, #?4 ++** ld1w { za1v\.s\\1, 0\ }, p0/z, \x1\ ++** ret ++*/ ++TEST_LOAD_ZA (ld1_vnum_za32_1_w0_4, ++ svld1_ver_vnum_za32 (1, w0, p0, x1, 4), ++ svld1_ver_vnum_za32 (1, w0, p0, x1, 4)) ++ ++/* ++** ld1_vnum_za32_3_w0_13: ++** incb x1, all, mul #13 ++** add (w12-5), w0, #?13 ++** ld1w { za3v\.s\\1, 0\ }, p0/z, \x1\ ++** ret ++*/ ++TEST_LOAD_ZA (ld1_vnum_za32_3_w0_13, ++ svld1_ver_vnum_za32 (3, w0, p0, x1, 13), ++ svld1_ver_vnum_za32 (3, w0, p0, x1, 13)) ++ ++/* ++** ld1_vnum_za32_0_w0_x2: ++** cntb (x0-9+) ++** madd (x0-9+), (?:\1, x2|x2, \1), x1 ++** add (w12-5), (?:w0, w2|w2, w0) ++** ld1w { za0v\.s\\3, 0\ }, p0/z, \\2\ ++** ret ++*/ ++TEST_LOAD_ZA (ld1_vnum_za32_0_w0_x2, ++ svld1_ver_vnum_za32 (0, w0, p0, x1, x2), ++ svld1_ver_vnum_za32 (0, w0, p0, x1, x2)) ++ ++/* ++** ld1_vnum_za32_1_w0p1_0: ++** mov (w12-5), w0 ++** ld1w { za1v\.s\\1, 1\ }, p0/z, \x1\ ++** ret ++*/ ++TEST_LOAD_ZA (ld1_vnum_za32_1_w0p1_0, ++ svld1_ver_vnum_za32 (1, w0 + 1, p0, x1, 0), ++ svld1_ver_vnum_za32 (1, w0 + 1, p0, x1, 0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ld1_ver_vnum_za64.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ld1_ver_vnum_za64.c +new file mode 100644 +index 000000000..d04764979 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ld1_ver_vnum_za64.c +@@ -0,0 +1,112 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sme_acle.h" ++ ++/* ++** ld1_vnum_za64_3_0_1: ++** incb x1 ++** mov (w12-5), (?:wzr|#?0) ++** ld1d { za3v\.d\\1, 1\ }, p0/z, \x1\ ++** ret ++*/ ++TEST_LOAD_ZA (ld1_vnum_za64_3_0_1, ++ svld1_ver_vnum_za64 (3, 0, p0, x1, 1), ++ svld1_ver_vnum_za64 (3, 0, p0, x1, 1)) ++ ++/* ++** ld1_vnum_za64_7_1_1: ++** incb x1 ++** mov (w12-5), #?1 ++** ld1d { za7v\.d\\1, 1\ }, p0/z, \x1\ ++** ret ++*/ ++TEST_LOAD_ZA (ld1_vnum_za64_7_1_1, ++ svld1_ver_vnum_za64 (7, 1, p0, x1, 1), ++ svld1_ver_vnum_za64 (7, 1, p0, x1, 1)) ++ ++/* ++** ld1_vnum_za64_0_0_2: ++** incb x1, all, mul #2 ++** mov (w12-5), #?2 ++** ld1d { za0v\.d\\1, 0\ }, p0/z, \x1\ ++** ret ++*/ ++TEST_LOAD_ZA (ld1_vnum_za64_0_0_2, ++ svld1_ver_vnum_za64 (0, 0, p0, x1, 2), ++ svld1_ver_vnum_za64 (0, 0, p0, x1, 2)) ++ ++/* ++** ld1_vnum_za64_5_1_2: ++** incb x1, all, mul #2 ++** mov (w12-5), #?3 ++** ld1d { za5v\.d\\1, 0\ }, p0/z, \x1\ ++** ret ++*/ ++TEST_LOAD_ZA (ld1_vnum_za64_5_1_2, ++ svld1_ver_vnum_za64 (5, 1, p0, x1, 2), ++ svld1_ver_vnum_za64 (5, 1, p0, x1, 2)) ++ ++/* ++** ld1_vnum_za64_0_w0_0: ++** mov (w12-5), w0 ++** ld1d { za0v\.d\\1, 0\ }, p0/z, \x1\ ++** ret ++*/ ++TEST_LOAD_ZA (ld1_vnum_za64_0_w0_0, ++ svld1_ver_vnum_za64 (0, w0, p0, x1, 0), ++ svld1_ver_vnum_za64 (0, w0, p0, x1, 0)) ++ ++/* ++** ld1_vnum_za64_0_w0_1: ++** incb x1 ++** mov (w12-5), w0 ++** ld1d { za0v\.d\\1, 1\ }, p0/z, \x1\ ++** ret ++*/ ++TEST_LOAD_ZA (ld1_vnum_za64_0_w0_1, ++ svld1_ver_vnum_za64 (0, w0, p0, x1, 1), ++ svld1_ver_vnum_za64 (0, w0, p0, x1, 1)) ++ ++/* ++** ld1_vnum_za64_6_w0_2: ++** incb x1, all, mul #2 ++** add (w12-5), w0, #?2 ++** ld1d { za6v\.d\\1, 0\ }, p0/z, \x1\ ++** ret ++*/ ++TEST_LOAD_ZA (ld1_vnum_za64_6_w0_2, ++ svld1_ver_vnum_za64 (6, w0, p0, x1, 2), ++ svld1_ver_vnum_za64 (6, w0, p0, x1, 2)) ++ ++/* ++** ld1_vnum_za64_2_w0_13: ++** incb x1, all, mul #13 ++** add (w12-5), w0, #?13 ++** ld1d { za2v\.d\\1, 0\ }, p0/z, \x1\ ++** ret ++*/ ++TEST_LOAD_ZA (ld1_vnum_za64_2_w0_13, ++ svld1_ver_vnum_za64 (2, w0, p0, x1, 13), ++ svld1_ver_vnum_za64 (2, w0, p0, x1, 13)) ++ ++/* ++** ld1_vnum_za64_4_w0_x2: ++** cntb (x0-9+) ++** madd (x0-9+), (?:\1, x2|x2, \1), x1 ++** add (w12-5), (?:w0, w2|w2, w0) ++** ld1d { za4v\.d\\3, 0\ }, p0/z, \\2\ ++** ret ++*/ ++TEST_LOAD_ZA (ld1_vnum_za64_4_w0_x2, ++ svld1_ver_vnum_za64 (4, w0, p0, x1, x2), ++ svld1_ver_vnum_za64 (4, w0, p0, x1, x2)) ++ ++/* ++** ld1_vnum_za64_1_w0p1_0: ++** mov (w12-5), w0 ++** ld1d { za1v\.d\\1, 1\ }, p0/z, \x1\ ++** ret ++*/ ++TEST_LOAD_ZA (ld1_vnum_za64_1_w0p1_0, ++ svld1_ver_vnum_za64 (1, w0 + 1, p0, x1, 0), ++ svld1_ver_vnum_za64 (1, w0 + 1, p0, x1, 0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ld1_ver_vnum_za8.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ld1_ver_vnum_za8.c +new file mode 100644 +index 000000000..e99d95e3a +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ld1_ver_vnum_za8.c +@@ -0,0 +1,112 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sme_acle.h" ++ ++/* ++** ld1_vnum_za8_0_0_1: ++** incb x1 ++** mov (w12-5), (?:wzr|#?0) ++** ld1b { za0v\.b\\1, 1\ }, p0/z, \x1\ ++** ret ++*/ ++TEST_LOAD_ZA (ld1_vnum_za8_0_0_1, ++ svld1_ver_vnum_za8 (0, 0, p0, x1, 1), ++ svld1_ver_vnum_za8 (0, 0, p0, x1, 1)) ++ ++/* ++** ld1_vnum_za8_0_1_1: ++** incb x1 ++** mov (w12-5), #?1 ++** ld1b { za0v\.b\\1, 1\ }, p0/z, \x1\ ++** ret ++*/ ++TEST_LOAD_ZA (ld1_vnum_za8_0_1_1, ++ svld1_ver_vnum_za8 (0, 1, p0, x1, 1), ++ svld1_ver_vnum_za8 (0, 1, p0, x1, 1)) ++ ++/* ++** ld1_vnum_za8_0_0_16: ++** incb x1, all, mul #16 ++** mov (w12-5), #?16 ++** ld1b { za0v\.b\\1, 0\ }, p0/z, \x1\ ++** ret ++*/ ++TEST_LOAD_ZA (ld1_vnum_za8_0_0_16, ++ svld1_ver_vnum_za8 (0, 0, p0, x1, 16), ++ svld1_ver_vnum_za8 (0, 0, p0, x1, 16)) ++ ++/* ++** ld1_vnum_za8_0_1_16: ++** incb x1, all, mul #16 ++** mov (w12-5), #?17 ++** ld1b { za0v\.b\\1, 0\ }, p0/z, \x1\ ++** ret ++*/ ++TEST_LOAD_ZA (ld1_vnum_za8_0_1_16, ++ svld1_ver_vnum_za8 (0, 1, p0, x1, 16), ++ svld1_ver_vnum_za8 (0, 1, p0, x1, 16)) ++ ++/* ++** ld1_vnum_za8_0_w0_0: ++** mov (w12-5), w0 ++** ld1b { za0v\.b\\1, 0\ }, p0/z, \x1\ ++** ret ++*/ ++TEST_LOAD_ZA (ld1_vnum_za8_0_w0_0, ++ svld1_ver_vnum_za8 (0, w0, p0, x1, 0), ++ svld1_ver_vnum_za8 (0, w0, p0, x1, 0)) ++ ++/* ++** ld1_vnum_za8_0_w0_1: ++** incb x1 ++** mov (w12-5), w0 ++** ld1b { za0v\.b\\1, 1\ }, p0/z, \x1\ ++** ret ++*/ ++TEST_LOAD_ZA (ld1_vnum_za8_0_w0_1, ++ svld1_ver_vnum_za8 (0, w0, p0, x1, 1), ++ svld1_ver_vnum_za8 (0, w0, p0, x1, 1)) ++ ++/* ++** ld1_vnum_za8_0_w0_15: ++** incb x1, all, mul #15 ++** mov (w12-5), w0 ++** ld1b { za0v\.b\\1, 15\ }, p0/z, \x1\ ++** ret ++*/ ++TEST_LOAD_ZA (ld1_vnum_za8_0_w0_15, ++ svld1_ver_vnum_za8 (0, w0, p0, x1, 15), ++ svld1_ver_vnum_za8 (0, w0, p0, x1, 15)) ++ ++/* ++** ld1_vnum_za8_0_w0_16: ++** incb x1, all, mul #16 ++** add (w12-5), w0, #?16 ++** ld1b { za0v\.b\\1, 0\ }, p0/z, \x1\ ++** ret ++*/ ++TEST_LOAD_ZA (ld1_vnum_za8_0_w0_16, ++ svld1_ver_vnum_za8 (0, w0, p0, x1, 16), ++ svld1_ver_vnum_za8 (0, w0, p0, x1, 16)) ++ ++/* ++** ld1_vnum_za8_0_w0_x2: ++** cntb (x0-9+) ++** mul (x0-9+), (?:\1, x2|x2, \1) ++** add (w12-5), (?:w0, w2|w2, w0) ++** ld1b { za0v\.b\\3, 0\ }, p0/z, \x1, \2\ ++** ret ++*/ ++TEST_LOAD_ZA (ld1_vnum_za8_0_w0_x2, ++ svld1_ver_vnum_za8 (0, w0, p0, x1, x2), ++ svld1_ver_vnum_za8 (0, w0, p0, x1, x2)) ++ ++/* ++** ld1_vnum_za8_0_w0p1_0: ++** mov (w12-5), w0 ++** ld1b { za0v\.b\\1, 1\ }, p0/z, \x1\ ++** ret ++*/ ++TEST_LOAD_ZA (ld1_vnum_za8_0_w0p1_0, ++ svld1_ver_vnum_za8 (0, w0 + 1, p0, x1, 0), ++ svld1_ver_vnum_za8 (0, w0 + 1, p0, x1, 0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ld1_ver_za128.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ld1_ver_za128.c +new file mode 100644 +index 000000000..e81f40258 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ld1_ver_za128.c +@@ -0,0 +1,83 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sme_acle.h" ++ ++/* ++** ld1_za128_0_0: ++** mov (w12-5), (?:wzr|#?0) ++** ld1q { za0v\.q\\1, 0\ }, p0/z, \x1\ ++** ret ++*/ ++TEST_LOAD_ZA (ld1_za128_0_0, ++ svld1_ver_za128 (0, 0, p0, x1), ++ svld1_ver_za128 (0, 0, p0, x1)) ++ ++/* ++** ld1_za128_0_1: ++** mov (w12-5), #?1 ++** ld1q { za0v\.q\\1, 0\ }, p0/z, \x1\ ++** ret ++*/ ++TEST_LOAD_ZA (ld1_za128_0_1, ++ svld1_ver_za128 (0, 1, p0, x1), ++ svld1_ver_za128 (0, 1, p0, x1)) ++ ++/* ++** ld1_za128_0_w0: ++** mov (w12-5), w0 ++** ld1q { za0v\.q\\1, 0\ }, p0/z, \x1\ ++** ret ++*/ ++TEST_LOAD_ZA (ld1_za128_0_w0, ++ svld1_ver_za128 (0, w0, p0, x1), ++ svld1_ver_za128 (0, w0, p0, x1)) ++ ++/* ++** ld1_za128_0_w0_p1: ++** add (w12-5), w0, #?1 ++** ld1q { za0v\.q\\1, 0\ }, p0/z, \x1\ ++** ret ++*/ ++TEST_LOAD_ZA (ld1_za128_0_w0_p1, ++ svld1_ver_za128 (0, w0 + 1, p0, x1), ++ svld1_ver_za128 (0, w0 + 1, p0, x1)) ++ ++/* ++** ld1_za128_7_w0: ++** mov (w12-5), w0 ++** ld1q { za7v\.q\\1, 0\ }, p0/z, \x1\ ++** ret ++*/ ++TEST_LOAD_ZA (ld1_za128_7_w0, ++ svld1_ver_za128 (7, w0, p0, x1), ++ svld1_ver_za128 (7, w0, p0, x1)) ++ ++/* ++** ld1_za128_13_w0: ++** mov (w12-5), w0 ++** ld1q { za13v\.q\\1, 0\ }, p0/z, \x1\ ++** ret ++*/ ++TEST_LOAD_ZA (ld1_za128_13_w0, ++ svld1_ver_za128 (13, w0, p0, x1), ++ svld1_ver_za128 (13, w0, p0, x1)) ++ ++/* ++** ld1_za128_15_w0: ++** mov (w12-5), w0 ++** ld1q { za15v\.q\\1, 0\ }, p0/z, \x1\ ++** ret ++*/ ++TEST_LOAD_ZA (ld1_za128_15_w0, ++ svld1_ver_za128 (15, w0, p0, x1), ++ svld1_ver_za128 (15, w0, p0, x1)) ++ ++/* ++** ld1_za128_9_w0_index: ++** mov (w12-5), w0 ++** ld1q { za9v\.q\\1, 0\ }, p0/z, \x1, x2, lsl #?4\ ++** ret ++*/ ++TEST_LOAD_ZA (ld1_za128_9_w0_index, ++ svld1_ver_za128 (9, w0, p0, x1 + x2 * 16), ++ svld1_ver_za128 (9, w0, p0, x1 + x2 * 16)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ld1_ver_za16.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ld1_ver_za16.c +new file mode 100644 +index 000000000..0938b1eba +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ld1_ver_za16.c +@@ -0,0 +1,126 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sme_acle.h" ++ ++/* ++** ld1_za16_0_0: ++** mov (w12-5), (?:wzr|#?0) ++** ld1h { za0v\.h\\1, 0\ }, p0/z, \x1\ ++** ret ++*/ ++TEST_LOAD_ZA (ld1_za16_0_0, ++ svld1_ver_za16 (0, 0, p0, x1), ++ svld1_ver_za16 (0, 0, p0, x1)) ++ ++/* It would also be OK (and perhaps better) to move 0 into a register ++ and use an offset of 7. */ ++/* ++** ld1_za16_0_7: ++** mov (w12-5), #?7 ++** ld1h { za0v\.h\\1, 0\ }, p0/z, \x1\ ++** ret ++*/ ++TEST_LOAD_ZA (ld1_za16_0_7, ++ svld1_ver_za16 (0, 7, p0, x1), ++ svld1_ver_za16 (0, 7, p0, x1)) ++ ++/* ++** ld1_za16_0_8: ++** mov (w12-5), #?8 ++** ld1h { za0v\.h\\1, 0\ }, p0/z, \x1\ ++** ret ++*/ ++TEST_LOAD_ZA (ld1_za16_0_8, ++ svld1_ver_za16 (0, 8, p0, x1), ++ svld1_ver_za16 (0, 8, p0, x1)) ++ ++/* ++** ld1_za16_0_w0: ++** mov (w12-5), w0 ++** ld1h { za0v\.h\\1, 0\ }, p0/z, \x1\ ++** ret ++*/ ++TEST_LOAD_ZA (ld1_za16_0_w0, ++ svld1_ver_za16 (0, w0, p0, x1), ++ svld1_ver_za16 (0, w0, p0, x1)) ++ ++/* ++** ld1_za16_0_w0_p1: ++** mov (w12-5), w0 ++** ld1h { za0v\.h\\1, 1\ }, p0/z, \x1\ ++** ret ++*/ ++TEST_LOAD_ZA (ld1_za16_0_w0_p1, ++ svld1_ver_za16 (0, w0 + 1, p0, x1), ++ svld1_ver_za16 (0, w0 + 1, p0, x1)) ++ ++/* ++** ld1_za16_0_w0_p7: ++** mov (w12-5), w0 ++** ld1h { za0v\.h\\1, 7\ }, p0/z, \x1\ ++** ret ++*/ ++TEST_LOAD_ZA (ld1_za16_0_w0_p7, ++ svld1_ver_za16 (0, w0 + 7, p0, x1), ++ svld1_ver_za16 (0, w0 + 7, p0, x1)) ++ ++/* ++** ld1_za16_1_w0: ++** mov (w12-5), w0 ++** ld1h { za1v\.h\\1, 0\ }, p0/z, \x1\ ++** ret ++*/ ++TEST_LOAD_ZA (ld1_za16_1_w0, ++ svld1_ver_za16 (1, w0, p0, x1), ++ svld1_ver_za16 (1, w0, p0, x1)) ++ ++ ++/* ++** ld1_za16_1_w0_p1: ++** mov (w12-5), w0 ++** ld1h { za1v\.h\\1, 1\ }, p0/z, \x1\ ++** ret ++*/ ++TEST_LOAD_ZA (ld1_za16_1_w0_p1, ++ svld1_ver_za16 (1, w0 + 1, p0, x1), ++ svld1_ver_za16 (1, w0 + 1, p0, x1)) ++ ++/* ++** ld1_za16_1_w0_p7: ++** mov (w12-5), w0 ++** ld1h { za1v\.h\\1, 7\ }, p0/z, \x1\ ++** ret ++*/ ++TEST_LOAD_ZA (ld1_za16_1_w0_p7, ++ svld1_ver_za16 (1, w0 + 7, p0, x1), ++ svld1_ver_za16 (1, w0 + 7, p0, x1)) ++ ++/* ++** ld1_za16_1_w0_p5_index: ++** mov (w12-5), w0 ++** ld1h { za1v\.h\\1, 5\ }, p0/z, \x1, x2, lsl #?1\ ++** ret ++*/ ++TEST_LOAD_ZA (ld1_za16_1_w0_p5_index, ++ svld1_ver_za16 (1, w0 + 5, p0, x1 + x2 * 2), ++ svld1_ver_za16 (1, w0 + 5, p0, x1 + x2 * 2)) ++ ++/* ++** ld1_za16_0_w0_p8: ++** add (w12-5), w0, #?8 ++** ld1h { za0v\.h\\1, 0\ }, p0/z, \x1\ ++** ret ++*/ ++TEST_LOAD_ZA (ld1_za16_0_w0_p8, ++ svld1_ver_za16 (0, w0 + 8, p0, x1), ++ svld1_ver_za16 (0, w0 + 8, p0, x1)) ++ ++/* ++** ld1_za16_0_w0_m1: ++** sub (w12-5), w0, #?1 ++** ld1h { za0v\.h\\1, 0\ }, p0/z, \x1\ ++** ret ++*/ ++TEST_LOAD_ZA (ld1_za16_0_w0_m1, ++ svld1_ver_za16 (0, w0 - 1, p0, x1), ++ svld1_ver_za16 (0, w0 - 1, p0, x1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ld1_ver_za32.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ld1_ver_za32.c +new file mode 100644 +index 000000000..bb9d93184 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ld1_ver_za32.c +@@ -0,0 +1,125 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sme_acle.h" ++ ++/* ++** ld1_za32_0_0: ++** mov (w12-5), (?:w0|#?0) ++** ld1w { za0v\.s\\1, 0\ }, p0/z, \x1\ ++** ret ++*/ ++TEST_LOAD_ZA (ld1_za32_0_0, ++ svld1_ver_za32 (0, 0, p0, x1), ++ svld1_ver_za32 (0, 0, p0, x1)) ++ ++/* It would also be OK (and perhaps better) to move 0 into a register ++ and use an offset of 3. */ ++/* ++** ld1_za32_0_3: ++** mov (w12-5), #?3 ++** ld1w { za0v\.s\\1, 0\ }, p0/z, \x1\ ++** ret ++*/ ++TEST_LOAD_ZA (ld1_za32_0_3, ++ svld1_ver_za32 (0, 3, p0, x1), ++ svld1_ver_za32 (0, 3, p0, x1)) ++ ++/* ++** ld1_za32_0_4: ++** mov (w12-5), #?4 ++** ld1w { za0v\.s\\1, 0\ }, p0/z, \x1\ ++** ret ++*/ ++TEST_LOAD_ZA (ld1_za32_0_4, ++ svld1_ver_za32 (0, 4, p0, x1), ++ svld1_ver_za32 (0, 4, p0, x1)) ++ ++/* ++** ld1_za32_0_w0: ++** mov (w12-5), w0 ++** ld1w { za0v\.s\\1, 0\ }, p0/z, \x1\ ++** ret ++*/ ++TEST_LOAD_ZA (ld1_za32_0_w0, ++ svld1_ver_za32 (0, w0, p0, x1), ++ svld1_ver_za32 (0, w0, p0, x1)) ++ ++/* ++** ld1_za32_0_w0_p1: ++** mov (w12-5), w0 ++** ld1w { za0v\.s\\1, 1\ }, p0/z, \x1\ ++** ret ++*/ ++TEST_LOAD_ZA (ld1_za32_0_w0_p1, ++ svld1_ver_za32 (0, w0 + 1, p0, x1), ++ svld1_ver_za32 (0, w0 + 1, p0, x1)) ++ ++/* ++** ld1_za32_0_w0_p3: ++** mov (w12-5), w0 ++** ld1w { za0v\.s\\1, 3\ }, p0/z, \x1\ ++** ret ++*/ ++TEST_LOAD_ZA (ld1_za32_0_w0_p3, ++ svld1_ver_za32 (0, w0 + 3, p0, x1), ++ svld1_ver_za32 (0, w0 + 3, p0, x1)) ++ ++/* ++** ld1_za32_3_w0: ++** mov (w12-5), w0 ++** ld1w { za3v\.s\\1, 0\ }, p0/z, \x1\ ++** ret ++*/ ++TEST_LOAD_ZA (ld1_za32_3_w0, ++ svld1_ver_za32 (3, w0, p0, x1), ++ svld1_ver_za32 (3, w0, p0, x1)) ++ ++/* ++** ld1_za32_3_w0_p1: ++** mov (w12-5), w0 ++** ld1w { za3v\.s\\1, 1\ }, p0/z, \x1\ ++** ret ++*/ ++TEST_LOAD_ZA (ld1_za32_3_w0_p1, ++ svld1_ver_za32 (3, w0 + 1, p0, x1), ++ svld1_ver_za32 (3, w0 + 1, p0, x1)) ++ ++/* ++** ld1_za32_3_w0_p3: ++** mov (w12-5), w0 ++** ld1w { za3v\.s\\1, 3\ }, p0/z, \x1\ ++** ret ++*/ ++TEST_LOAD_ZA (ld1_za32_3_w0_p3, ++ svld1_ver_za32 (3, w0 + 3, p0, x1), ++ svld1_ver_za32 (3, w0 + 3, p0, x1)) ++ ++/* ++** ld1_za32_1_w0_p2_index: ++** mov (w12-5), w0 ++** ld1w { za1v\.s\\1, 2\ }, p0/z, \x1, x2, lsl #?2\ ++** ret ++*/ ++TEST_LOAD_ZA (ld1_za32_1_w0_p2_index, ++ svld1_ver_za32 (1, w0 + 2, p0, x1 + x2 * 4), ++ svld1_ver_za32 (1, w0 + 2, p0, x1 + x2 * 4)) ++ ++/* ++** ld1_za32_0_w0_p4: ++** add (w12-5), w0, #?4 ++** ld1w { za0v\.s\\1, 0\ }, p0/z, \x1\ ++** ret ++*/ ++TEST_LOAD_ZA (ld1_za32_0_w0_p4, ++ svld1_ver_za32 (0, w0 + 4, p0, x1), ++ svld1_ver_za32 (0, w0 + 4, p0, x1)) ++ ++/* ++** ld1_za32_0_w0_m1: ++** sub (w12-5), w0, #?1 ++** ld1w { za0v\.s\\1, 0\ }, p0/z, \x1\ ++** ret ++*/ ++TEST_LOAD_ZA (ld1_za32_0_w0_m1, ++ svld1_ver_za32 (0, w0 - 1, p0, x1), ++ svld1_ver_za32 (0, w0 - 1, p0, x1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ld1_ver_za64.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ld1_ver_za64.c +new file mode 100644 +index 000000000..58d73ad06 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ld1_ver_za64.c +@@ -0,0 +1,105 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sme_acle.h" ++ ++/* ++** ld1_za64_0_0: ++** mov (w12-5), (?:wzr|#?0) ++** ld1d { za0v\.d\\1, 0\ }, p0/z, \x1\ ++** ret ++*/ ++TEST_LOAD_ZA (ld1_za64_0_0, ++ svld1_ver_za64 (0, 0, p0, x1), ++ svld1_ver_za64 (0, 0, p0, x1)) ++ ++/* It would also be OK (and perhaps better) to move 0 into a register ++ and use an offset of 1. */ ++/* ++** ld1_za64_0_1: ++** mov (w12-5), #?1 ++** ld1d { za0v\.d\\1, 0\ }, p0/z, \x1\ ++** ret ++*/ ++TEST_LOAD_ZA (ld1_za64_0_1, ++ svld1_ver_za64 (0, 1, p0, x1), ++ svld1_ver_za64 (0, 1, p0, x1)) ++ ++/* ++** ld1_za64_0_2: ++** mov (w12-5), #?2 ++** ld1d { za0v\.d\\1, 0\ }, p0/z, \x1\ ++** ret ++*/ ++TEST_LOAD_ZA (ld1_za64_0_2, ++ svld1_ver_za64 (0, 2, p0, x1), ++ svld1_ver_za64 (0, 2, p0, x1)) ++ ++/* ++** ld1_za64_0_w0: ++** mov (w12-5), w0 ++** ld1d { za0v\.d\\1, 0\ }, p0/z, \x1\ ++** ret ++*/ ++TEST_LOAD_ZA (ld1_za64_0_w0, ++ svld1_ver_za64 (0, w0, p0, x1), ++ svld1_ver_za64 (0, w0, p0, x1)) ++ ++/* ++** ld1_za64_0_w0_p1: ++** mov (w12-5), w0 ++** ld1d { za0v\.d\\1, 1\ }, p0/z, \x1\ ++** ret ++*/ ++TEST_LOAD_ZA (ld1_za64_0_w0_p1, ++ svld1_ver_za64 (0, w0 + 1, p0, x1), ++ svld1_ver_za64 (0, w0 + 1, p0, x1)) ++ ++/* ++** ld1_za64_7_w0: ++** mov (w12-5), w0 ++** ld1d { za7v\.d\\1, 0\ }, p0/z, \x1\ ++** ret ++*/ ++TEST_LOAD_ZA (ld1_za64_7_w0, ++ svld1_ver_za64 (7, w0, p0, x1), ++ svld1_ver_za64 (7, w0, p0, x1)) ++ ++/* ++** ld1_za64_7_w0_p1: ++** mov (w12-5), w0 ++** ld1d { za7v\.d\\1, 1\ }, p0/z, \x1\ ++** ret ++*/ ++TEST_LOAD_ZA (ld1_za64_7_w0_p1, ++ svld1_ver_za64 (7, w0 + 1, p0, x1), ++ svld1_ver_za64 (7, w0 + 1, p0, x1)) ++ ++/* ++** ld1_za64_5_w0_p1_index: ++** mov (w12-5), w0 ++** ld1d { za5v\.d\\1, 1\ }, p0/z, \x1, x2, lsl #?3\ ++** ret ++*/ ++TEST_LOAD_ZA (ld1_za64_5_w0_p1_index, ++ svld1_ver_za64 (5, w0 + 1, p0, x1 + x2 * 8), ++ svld1_ver_za64 (5, w0 + 1, p0, x1 + x2 * 8)) ++ ++/* ++** ld1_za64_0_w0_p2: ++** add (w12-5), w0, #?2 ++** ld1d { za0v\.d\\1, 0\ }, p0/z, \x1\ ++** ret ++*/ ++TEST_LOAD_ZA (ld1_za64_0_w0_p2, ++ svld1_ver_za64 (0, w0 + 2, p0, x1), ++ svld1_ver_za64 (0, w0 + 2, p0, x1)) ++ ++/* ++** ld1_za64_0_w0_m1: ++** sub (w12-5), w0, #?1 ++** ld1d { za0v\.d\\1, 0\ }, p0/z, \x1\ ++** ret ++*/ ++TEST_LOAD_ZA (ld1_za64_0_w0_m1, ++ svld1_ver_za64 (0, w0 - 1, p0, x1), ++ svld1_ver_za64 (0, w0 - 1, p0, x1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ld1_ver_za8.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ld1_ver_za8.c +new file mode 100644 +index 000000000..38211b211 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ld1_ver_za8.c +@@ -0,0 +1,95 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sme_acle.h" ++ ++/* ++** ld1_za8_0_0: ++** mov (w12-5), (?:wzr|#?0) ++** ld1b { za0v\.b\\1, 0\ }, p0/z, \x1\ ++** ret ++*/ ++TEST_LOAD_ZA (ld1_za8_0_0, ++ svld1_ver_za8 (0, 0, p0, x1), ++ svld1_ver_za8 (0, 0, p0, x1)) ++ ++/* It would also be OK (and perhaps better) to move 0 into a register ++ and use an offset of 15. */ ++/* ++** ld1_za8_0_15: ++** mov (w12-5), #?15 ++** ld1b { za0v\.b\\1, 0\ }, p0/z, \x1\ ++** ret ++*/ ++TEST_LOAD_ZA (ld1_za8_0_15, ++ svld1_ver_za8 (0, 15, p0, x1), ++ svld1_ver_za8 (0, 15, p0, x1)) ++ ++/* ++** ld1_za8_0_16: ++** mov (w12-5), #?16 ++** ld1b { za0v\.b\\1, 0\ }, p0/z, \x1\ ++** ret ++*/ ++TEST_LOAD_ZA (ld1_za8_0_16, ++ svld1_ver_za8 (0, 16, p0, x1), ++ svld1_ver_za8 (0, 16, p0, x1)) ++ ++/* ++** ld1_za8_0_w0: ++** mov (w12-5), w0 ++** ld1b { za0v\.b\\1, 0\ }, p0/z, \x1\ ++** ret ++*/ ++TEST_LOAD_ZA (ld1_za8_0_w0, ++ svld1_ver_za8 (0, w0, p0, x1), ++ svld1_ver_za8 (0, w0, p0, x1)) ++ ++/* ++** ld1_za8_0_w0_p1: ++** mov (w12-5), w0 ++** ld1b { za0v\.b\\1, 1\ }, p0/z, \x1\ ++** ret ++*/ ++TEST_LOAD_ZA (ld1_za8_0_w0_p1, ++ svld1_ver_za8 (0, w0 + 1, p0, x1), ++ svld1_ver_za8 (0, w0 + 1, p0, x1)) ++ ++/* ++** ld1_za8_0_w0_p15: ++** mov (w12-5), w0 ++** ld1b { za0v\.b\\1, 15\ }, p0/z, \x1\ ++** ret ++*/ ++TEST_LOAD_ZA (ld1_za8_0_w0_p15, ++ svld1_ver_za8 (0, w0 + 15, p0, x1), ++ svld1_ver_za8 (0, w0 + 15, p0, x1)) ++ ++/* ++** ld1_za8_0_w0_p13_index: ++** mov (w12-5), w0 ++** ld1b { za0v\.b\\1, 15\ }, p0/z, \x1, x2\ ++** ret ++*/ ++TEST_LOAD_ZA (ld1_za8_0_w0_p13_index, ++ svld1_ver_za8 (0, w0 + 15, p0, x1 + x2), ++ svld1_ver_za8 (0, w0 + 15, p0, x1 + x2)) ++ ++/* ++** ld1_za8_0_w0_p16: ++** add (w12-5), w0, #?16 ++** ld1b { za0v\.b\\1, 0\ }, p0/z, \x1\ ++** ret ++*/ ++TEST_LOAD_ZA (ld1_za8_0_w0_p16, ++ svld1_ver_za8 (0, w0 + 16, p0, x1), ++ svld1_ver_za8 (0, w0 + 16, p0, x1)) ++ ++/* ++** ld1_za8_0_w0_m1: ++** sub (w12-5), w0, #?1 ++** ld1b { za0v\.b\\1, 0\ }, p0/z, \x1\ ++** ret ++*/ ++TEST_LOAD_ZA (ld1_za8_0_w0_m1, ++ svld1_ver_za8 (0, w0 - 1, p0, x1), ++ svld1_ver_za8 (0, w0 - 1, p0, x1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ldr_vnum_za_s.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ldr_vnum_za_s.c +new file mode 100644 +index 000000000..90495d080 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ldr_vnum_za_s.c +@@ -0,0 +1,147 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sme_acle.h" ++ ++/* ++** ldr_vnum_za_0_0: ++** mov (w12-5), (?:wzr|#?0) ++** ldr za\\1, 0\, \x1(?:, #0, mul vl)?\ ++** ret ++*/ ++TEST_LOAD_ZA (ldr_vnum_za_0_0, ++ svldr_vnum_za (0, x1, 0), ++ svldr_vnum_za (0, x1, 0)) ++ ++/* ++** ldr_vnum_za_0_1: ++** mov (w12-5), (?:wzr|#?0) ++** ldr za\\1, 1\, \x1(?:, #1, mul vl)?\ ++** ret ++*/ ++TEST_LOAD_ZA (ldr_vnum_za_0_1, ++ svldr_vnum_za (0, x1, 1), ++ svldr_vnum_za (0, x1, 1)) ++ ++/* ++** ldr_vnum_za_1_0: ++** mov (w12-5), #?1 ++** ldr za\\1, 0\, \x1(?:, #0, mul vl)?\ ++** ret ++*/ ++TEST_LOAD_ZA (ldr_vnum_za_1_0, ++ svldr_vnum_za (1, x1, 0), ++ svldr_vnum_za (1, x1, 0)) ++ ++/* ++** ldr_vnum_za_1_2: ++** mov (w12-5), #?1 ++** ldr za\\1, 2\, \x1(?:, #2, mul vl)?\ ++** ret ++*/ ++TEST_LOAD_ZA (ldr_vnum_za_1_2, ++ svldr_vnum_za (1, x1, 2), ++ svldr_vnum_za (1, x1, 2)) ++ ++/* ++** ldr_vnum_za_w0_0: ++** mov (w12-5), w0 ++** ldr za\\1, 0\, \x1(?:, #0, mul vl)?\ ++** ret ++*/ ++TEST_LOAD_ZA (ldr_vnum_za_w0_0, ++ svldr_vnum_za (w0, x1, 0), ++ svldr_vnum_za (w0, x1, 0)) ++ ++/* ++** ldr_vnum_za_w0_1: ++** mov (w12-5), w0 ++** ldr za\\1, 1\, \x1, #1, mul vl\ ++** ret ++*/ ++TEST_LOAD_ZA (ldr_vnum_za_w0_1, ++ svldr_vnum_za (w0, x1, 1), ++ svldr_vnum_za (w0, x1, 1)) ++ ++/* ++** ldr_vnum_za_w0_13: ++** mov (w12-5), w0 ++** ldr za\\1, 13\, \x1, #13, mul vl\ ++** ret ++*/ ++TEST_LOAD_ZA (ldr_vnum_za_w0_13, ++ svldr_vnum_za (w0, x1, 13), ++ svldr_vnum_za (w0, x1, 13)) ++ ++/* ++** ldr_vnum_za_w0_15: ++** mov (w12-5), w0 ++** ldr za\\1, 15\, \x1, #15, mul vl\ ++** ret ++*/ ++TEST_LOAD_ZA (ldr_vnum_za_w0_15, ++ svldr_vnum_za (w0, x1, 15), ++ svldr_vnum_za (w0, x1, 15)) ++ ++/* ++** ldr_vnum_za_w0_16: ++** ( ++** add (w12-5), w0, #?16 ++** incb x1, all, mul #16 ++** ldr za\\1, 0\, \x1(?:, #0, mul vl)?\ ++** | ++** incb x1, all, mul #16 ++** add (w12-5), w0, #?16 ++** ldr za\\2, 0\, \x1(?:, #0, mul vl)?\ ++** ) ++** ret ++*/ ++TEST_LOAD_ZA (ldr_vnum_za_w0_16, ++ svldr_vnum_za (w0, x1, 16), ++ svldr_vnum_za (w0, x1, 16)) ++ ++/* ++** ldr_vnum_za_w0_m1: ++** ( ++** sub (w12-5), w0, #?1 ++** decb x1 ++** ldr za\\1, 0\, \x1(?:, #0, mul vl)?\ ++** | ++** decb x1 ++** sub (w12-5), w0, #?1 ++** ldr za\\2, 0\, \x1(?:, #0, mul vl)?\ ++** ) ++** ret ++*/ ++TEST_LOAD_ZA (ldr_vnum_za_w0_m1, ++ svldr_vnum_za (w0, x1, -1), ++ svldr_vnum_za (w0, x1, -1)) ++ ++/* ++** ldr_vnum_za_w0p1_0: ++** add (w12-5), w0, #?1 ++** ldr za\\1, 0\, \x1(?:, #0, mul vl)?\ ++** ret ++*/ ++TEST_LOAD_ZA (ldr_vnum_za_w0p1_0, ++ svldr_vnum_za (w0 + 1, x1, 0), ++ svldr_vnum_za (w0 + 1, x1, 0)) ++ ++/* ++** ldr_vnum_za_w0m1_1: ++** sub (w12-5), w0, #?1 ++** ldr za\\1, 1\, \x1(?:, #1, mul vl)?\ ++** ret ++*/ ++TEST_LOAD_ZA (ldr_vnum_za_w0m1_1, ++ svldr_vnum_za (w0 - 1, x1, 1), ++ svldr_vnum_za (w0 - 1, x1, 1)) ++ ++/* ++** ldr_vnum_za_w0p2_3: ++** add (w12-5), w0, #?2 ++** ldr za\\1, 3\, \x1(?:, #3, mul vl)?\ ++** ret ++*/ ++TEST_LOAD_ZA (ldr_vnum_za_w0p2_3, ++ svldr_vnum_za (w0 + 2, x1, 3), ++ svldr_vnum_za (w0 + 2, x1, 3)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ldr_vnum_za_sc.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ldr_vnum_za_sc.c +new file mode 100644 +index 000000000..dfc2d139f +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ldr_vnum_za_sc.c +@@ -0,0 +1,148 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#define STREAMING_COMPATIBLE ++#include "test_sme_acle.h" ++ ++/* ++** ldr_vnum_za_0_0: ++** mov (w12-5), (?:wzr|#?0) ++** ldr za\\1, 0\, \x1(?:, #0, mul vl)?\ ++** ret ++*/ ++TEST_LOAD_ZA (ldr_vnum_za_0_0, ++ svldr_vnum_za (0, x1, 0), ++ svldr_vnum_za (0, x1, 0)) ++ ++/* ++** ldr_vnum_za_0_1: ++** mov (w12-5), (?:wzr|#?0) ++** ldr za\\1, 1\, \x1(?:, #1, mul vl)?\ ++** ret ++*/ ++TEST_LOAD_ZA (ldr_vnum_za_0_1, ++ svldr_vnum_za (0, x1, 1), ++ svldr_vnum_za (0, x1, 1)) ++ ++/* ++** ldr_vnum_za_1_0: ++** mov (w12-5), #?1 ++** ldr za\\1, 0\, \x1(?:, #0, mul vl)?\ ++** ret ++*/ ++TEST_LOAD_ZA (ldr_vnum_za_1_0, ++ svldr_vnum_za (1, x1, 0), ++ svldr_vnum_za (1, x1, 0)) ++ ++/* ++** ldr_vnum_za_1_2: ++** mov (w12-5), #?1 ++** ldr za\\1, 2\, \x1(?:, #2, mul vl)?\ ++** ret ++*/ ++TEST_LOAD_ZA (ldr_vnum_za_1_2, ++ svldr_vnum_za (1, x1, 2), ++ svldr_vnum_za (1, x1, 2)) ++ ++/* ++** ldr_vnum_za_w0_0: ++** mov (w12-5), w0 ++** ldr za\\1, 0\, \x1(?:, #0, mul vl)?\ ++** ret ++*/ ++TEST_LOAD_ZA (ldr_vnum_za_w0_0, ++ svldr_vnum_za (w0, x1, 0), ++ svldr_vnum_za (w0, x1, 0)) ++ ++/* ++** ldr_vnum_za_w0_1: ++** mov (w12-5), w0 ++** ldr za\\1, 1\, \x1, #1, mul vl\ ++** ret ++*/ ++TEST_LOAD_ZA (ldr_vnum_za_w0_1, ++ svldr_vnum_za (w0, x1, 1), ++ svldr_vnum_za (w0, x1, 1)) ++ ++/* ++** ldr_vnum_za_w0_13: ++** mov (w12-5), w0 ++** ldr za\\1, 13\, \x1, #13, mul vl\ ++** ret ++*/ ++TEST_LOAD_ZA (ldr_vnum_za_w0_13, ++ svldr_vnum_za (w0, x1, 13), ++ svldr_vnum_za (w0, x1, 13)) ++ ++/* ++** ldr_vnum_za_w0_15: ++** mov (w12-5), w0 ++** ldr za\\1, 15\, \x1, #15, mul vl\ ++** ret ++*/ ++TEST_LOAD_ZA (ldr_vnum_za_w0_15, ++ svldr_vnum_za (w0, x1, 15), ++ svldr_vnum_za (w0, x1, 15)) ++ ++/* ++** ldr_vnum_za_w0_16: ++** ( ++** add (w12-5), w0, #?16 ++** addsvl (x0-9+), x1, #16 ++** ldr za\\1, 0\, \\2(?:, #0, mul vl)?\ ++** | ++** addsvl (x0-9+), x1, #16 ++** add (w12-5), w0, #?16 ++** ldr za\\4, 0\, \\3(?:, #0, mul vl)?\ ++** ) ++** ret ++*/ ++TEST_LOAD_ZA (ldr_vnum_za_w0_16, ++ svldr_vnum_za (w0, x1, 16), ++ svldr_vnum_za (w0, x1, 16)) ++ ++/* ++** ldr_vnum_za_w0_m1: ++** ( ++** sub (w12-5), w0, #?1 ++** addsvl (x0-9+), x1, #-1 ++** ldr za\\1, 0\, \\2(?:, #0, mul vl)?\ ++** | ++** addsvl (x0-9+), x1, #-1 ++** sub (w12-5), w0, #?1 ++** ldr za\\4, 0\, \\3(?:, #0, mul vl)?\ ++** ) ++** ret ++*/ ++TEST_LOAD_ZA (ldr_vnum_za_w0_m1, ++ svldr_vnum_za (w0, x1, -1), ++ svldr_vnum_za (w0, x1, -1)) ++ ++/* ++** ldr_vnum_za_w0p1_0: ++** add (w12-5), w0, #?1 ++** ldr za\\1, 0\, \x1(?:, #0, mul vl)?\ ++** ret ++*/ ++TEST_LOAD_ZA (ldr_vnum_za_w0p1_0, ++ svldr_vnum_za (w0 + 1, x1, 0), ++ svldr_vnum_za (w0 + 1, x1, 0)) ++ ++/* ++** ldr_vnum_za_w0m1_1: ++** sub (w12-5), w0, #?1 ++** ldr za\\1, 1\, \x1(?:, #1, mul vl)?\ ++** ret ++*/ ++TEST_LOAD_ZA (ldr_vnum_za_w0m1_1, ++ svldr_vnum_za (w0 - 1, x1, 1), ++ svldr_vnum_za (w0 - 1, x1, 1)) ++ ++/* ++** ldr_vnum_za_w0p2_3: ++** add (w12-5), w0, #?2 ++** ldr za\\1, 3\, \x1(?:, #3, mul vl)?\ ++** ret ++*/ ++TEST_LOAD_ZA (ldr_vnum_za_w0p2_3, ++ svldr_vnum_za (w0 + 2, x1, 3), ++ svldr_vnum_za (w0 + 2, x1, 3)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ldr_za_s.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ldr_za_s.c +new file mode 100644 +index 000000000..313b3239a +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ldr_za_s.c +@@ -0,0 +1,124 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sme_acle.h" ++ ++/* ++** ldr_za_0: ++** mov (w12-5), (?:wzr|#?0) ++** ldr za\\1, 0\, \x1(?:, #0, mul vl)?\ ++** ret ++*/ ++TEST_LOAD_ZA (ldr_za_0, ++ svldr_za (0, x1), ++ svldr_za (0, x1)) ++ ++/* ++** ldr_za_1: ++** mov (w12-5), #?1 ++** ldr za\\1, 0\, \x1(?:, #0, mul vl)?\ ++** ret ++*/ ++TEST_LOAD_ZA (ldr_za_1, ++ svldr_za (1, x1), ++ svldr_za (1, x1)) ++ ++/* ++** ldr_za_w0: ++** mov (w12-5), w0 ++** ldr za\\1, 0\, \x1(?:, #0, mul vl)?\ ++** ret ++*/ ++TEST_LOAD_ZA (ldr_za_w0, ++ svldr_za (w0, x1), ++ svldr_za (w0, x1)) ++ ++/* ++** ldr_za_w0_1_vnum: ++** mov (w12-5), w0 ++** ldr za\\1, 1\, \x1, #1, mul vl\ ++** ret ++*/ ++TEST_LOAD_ZA (ldr_za_w0_1_vnum, ++ svldr_za (w0 + 1, x1 + svcntsb ()), ++ svldr_za (w0 + 1, x1 + svcntsb ())) ++ ++/* ++** ldr_za_w0_13_vnum: ++** mov (w12-5), w0 ++** ldr za\\1, 13\, \x1, #13, mul vl\ ++** ret ++*/ ++TEST_LOAD_ZA (ldr_za_w0_13_vnum, ++ svldr_za (w0 + 13, x1 + svcntsb () * 13), ++ svldr_za (w0 + 13, x1 + svcntsb () * 13)) ++ ++/* ++** ldr_za_w0_15_vnum: ++** mov (w12-5), w0 ++** ldr za\\1, 15\, \x1, #15, mul vl\ ++** ret ++*/ ++TEST_LOAD_ZA (ldr_za_w0_15_vnum, ++ svldr_za (w0 + 15, x1 + svcntsb () * 15), ++ svldr_za (w0 + 15, x1 + svcntsb () * 15)) ++ ++/* ++** ldr_za_w0_16_vnum: ++** ( ++** add (w12-5), w0, #?16 ++** incb x1, all, mul #16 ++** ldr za\\1, 0\, \x1(?:, #0, mul vl)?\ ++** | ++** incb x1, all, mul #16 ++** add (w12-5), w0, #?16 ++** ldr za\\2, 0\, \x1(?:, #0, mul vl)?\ ++** ) ++** ret ++*/ ++TEST_LOAD_ZA (ldr_za_w0_16_vnum, ++ svldr_za (w0 + 16, x1 + svcntsb () * 16), ++ svldr_za (w0 + 16, x1 + svcntsb () * 16)) ++ ++/* ++** ldr_za_w0_m1_vnum: ++** ( ++** sub (w12-5), w0, #?1 ++** decb x1 ++** ldr za\\1, 0\, \x1(?:, #0, mul vl)?\ ++** | ++** decb x1 ++** sub (w12-5), w0, #?1 ++** ldr za\\2, 0\, \x1(?:, #0, mul vl)?\ ++** ) ++** ret ++*/ ++TEST_LOAD_ZA (ldr_za_w0_m1_vnum, ++ svldr_za (w0 - 1, x1 - svcntsb ()), ++ svldr_za (w0 - 1, x1 - svcntsb ())) ++ ++/* ++** ldr_za_w0p2: ++** add (w12-5), w0, #?2 ++** ldr za\\1, 0\, \x1(?:, #0, mul vl)?\ ++** ret ++*/ ++TEST_LOAD_ZA (ldr_za_w0p2, ++ svldr_za (w0 + 2, x1), ++ svldr_za (w0 + 2, x1)) ++ ++/* ++** ldr_za_offset: ++** ( ++** mov (w12-5), w0 ++** add (x0-9+), x1, #?1 ++** ldr za\\1, 0\, \\2(?:, #0, mul vl)?\ ++** | ++** add (x0-9+), x1, #?1 ++** mov (w12-5), w0 ++** ldr za\\4, 0\, \\3(?:, #0, mul vl)?\ ++** ) ++** ret ++*/ ++TEST_LOAD_ZA (ldr_za_offset, ++ svldr_za (w0, x1 + 1), ++ svldr_za (w0, x1 + 1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ldr_za_sc.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ldr_za_sc.c +new file mode 100644 +index 000000000..a27be7671 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ldr_za_sc.c +@@ -0,0 +1,71 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#define STREAMING_COMPATIBLE ++#include "test_sme_acle.h" ++ ++/* ++** ldr_za_0: ++** mov (w12-5), (?:wzr|#?0) ++** ldr za\\1, 0\, \x1(?:, #0, mul vl)?\ ++** ret ++*/ ++TEST_LOAD_ZA (ldr_za_0, ++ svldr_za (0, x1), ++ svldr_za (0, x1)) ++ ++/* ++** ldr_za_1: ++** mov (w12-5), #?1 ++** ldr za\\1, 0\, \x1(?:, #0, mul vl)?\ ++** ret ++*/ ++TEST_LOAD_ZA (ldr_za_1, ++ svldr_za (1, x1), ++ svldr_za (1, x1)) ++ ++/* ++** ldr_za_w0: ++** mov (w12-5), w0 ++** ldr za\\1, 0\, \x1(?:, #0, mul vl)?\ ++** ret ++*/ ++TEST_LOAD_ZA (ldr_za_w0, ++ svldr_za (w0, x1), ++ svldr_za (w0, x1)) ++ ++/* ++** ldr_za_w0_1_vnum: ++** mov (w12-5), w0 ++** ldr za\\1, 1\, \x1, #1, mul vl\ ++** ret ++*/ ++TEST_LOAD_ZA (ldr_za_w0_1_vnum, ++ svldr_za (w0 + 1, x1 + svcntsb ()), ++ svldr_za (w0 + 1, x1 + svcntsb ())) ++ ++/* ++** ldr_za_w0p2: ++** add (w12-5), w0, #?2 ++** ldr za\\1, 0\, \x1(?:, #0, mul vl)?\ ++** ret ++*/ ++TEST_LOAD_ZA (ldr_za_w0p2, ++ svldr_za (w0 + 2, x1), ++ svldr_za (w0 + 2, x1)) ++ ++/* ++** ldr_za_offset: ++** ( ++** mov (w12-5), w0 ++** add (x0-9+), x1, #?1 ++** ldr za\\1, 0\, \\2(?:, #0, mul vl)?\ ++** | ++** add (x0-9+), x1, #?1 ++** mov (w12-5), w0 ++** ldr za\\4, 0\, \\3(?:, #0, mul vl)?\ ++** ) ++** ret ++*/ ++TEST_LOAD_ZA (ldr_za_offset, ++ svldr_za (w0, x1 + 1), ++ svldr_za (w0, x1 + 1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/mopa_za32.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/mopa_za32.c +new file mode 100644 +index 000000000..480de2c7f +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/mopa_za32.c +@@ -0,0 +1,102 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sme_acle.h" ++ ++/* ++** mopa_za32_s8_0_p0_p1_z0_z1: ++** smopa za0\.s, p0/m, p1/m, z0\.b, z1\.b ++** ret ++*/ ++TEST_UNIFORM_ZA (mopa_za32_s8_0_p0_p1_z0_z1, svint8_t, ++ svmopa_za32_s8_m (0, p0, p1, z0, z1), ++ svmopa_za32_m (0, p0, p1, z0, z1)) ++ ++/* ++** mopa_za32_s8_0_p1_p0_z1_z0: ++** smopa za0\.s, p1/m, p0/m, z1\.b, z0\.b ++** ret ++*/ ++TEST_UNIFORM_ZA (mopa_za32_s8_0_p1_p0_z1_z0, svint8_t, ++ svmopa_za32_s8_m (0, p1, p0, z1, z0), ++ svmopa_za32_m (0, p1, p0, z1, z0)) ++ ++/* ++** mopa_za32_s8_3_p0_p1_z0_z1: ++** smopa za3\.s, p0/m, p1/m, z0\.b, z1\.b ++** ret ++*/ ++TEST_UNIFORM_ZA (mopa_za32_s8_3_p0_p1_z0_z1, svint8_t, ++ svmopa_za32_s8_m (3, p0, p1, z0, z1), ++ svmopa_za32_m (3, p0, p1, z0, z1)) ++ ++/* ++** mopa_za32_u8_0_p0_p1_z0_z1: ++** umopa za0\.s, p0/m, p1/m, z0\.b, z1\.b ++** ret ++*/ ++TEST_UNIFORM_ZA (mopa_za32_u8_0_p0_p1_z0_z1, svuint8_t, ++ svmopa_za32_u8_m (0, p0, p1, z0, z1), ++ svmopa_za32_m (0, p0, p1, z0, z1)) ++ ++/* ++** mopa_za32_u8_3_p0_p1_z0_z1: ++** umopa za3\.s, p0/m, p1/m, z0\.b, z1\.b ++** ret ++*/ ++TEST_UNIFORM_ZA (mopa_za32_u8_3_p0_p1_z0_z1, svuint8_t, ++ svmopa_za32_u8_m (3, p0, p1, z0, z1), ++ svmopa_za32_m (3, p0, p1, z0, z1)) ++ ++/* ++** mopa_za32_bf16_0_p0_p1_z0_z1: ++** bfmopa za0\.s, p0/m, p1/m, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_ZA (mopa_za32_bf16_0_p0_p1_z0_z1, svbfloat16_t, ++ svmopa_za32_bf16_m (0, p0, p1, z0, z1), ++ svmopa_za32_m (0, p0, p1, z0, z1)) ++ ++/* ++** mopa_za32_bf16_3_p0_p1_z0_z1: ++** bfmopa za3\.s, p0/m, p1/m, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_ZA (mopa_za32_bf16_3_p0_p1_z0_z1, svbfloat16_t, ++ svmopa_za32_bf16_m (3, p0, p1, z0, z1), ++ svmopa_za32_m (3, p0, p1, z0, z1)) ++ ++/* ++** mopa_za32_f16_0_p0_p1_z0_z1: ++** fmopa za0\.s, p0/m, p1/m, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_ZA (mopa_za32_f16_0_p0_p1_z0_z1, svfloat16_t, ++ svmopa_za32_f16_m (0, p0, p1, z0, z1), ++ svmopa_za32_m (0, p0, p1, z0, z1)) ++ ++/* ++** mopa_za32_f16_3_p0_p1_z0_z1: ++** fmopa za3\.s, p0/m, p1/m, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_ZA (mopa_za32_f16_3_p0_p1_z0_z1, svfloat16_t, ++ svmopa_za32_f16_m (3, p0, p1, z0, z1), ++ svmopa_za32_m (3, p0, p1, z0, z1)) ++ ++/* ++** mopa_za32_f32_0_p0_p1_z0_z1: ++** fmopa za0\.s, p0/m, p1/m, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_ZA (mopa_za32_f32_0_p0_p1_z0_z1, svfloat32_t, ++ svmopa_za32_f32_m (0, p0, p1, z0, z1), ++ svmopa_za32_m (0, p0, p1, z0, z1)) ++ ++/* ++** mopa_za32_f32_3_p0_p1_z0_z1: ++** fmopa za3\.s, p0/m, p1/m, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_ZA (mopa_za32_f32_3_p0_p1_z0_z1, svfloat32_t, ++ svmopa_za32_f32_m (3, p0, p1, z0, z1), ++ svmopa_za32_m (3, p0, p1, z0, z1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/mopa_za64.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/mopa_za64.c +new file mode 100644 +index 000000000..f523b9605 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/mopa_za64.c +@@ -0,0 +1,70 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sme_acle.h" ++ ++#pragma GCC target "+sme-i16i64" ++ ++/* ++** mopa_za64_s16_0_p0_p1_z0_z1: ++** smopa za0\.d, p0/m, p1/m, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_ZA (mopa_za64_s16_0_p0_p1_z0_z1, svint16_t, ++ svmopa_za64_s16_m (0, p0, p1, z0, z1), ++ svmopa_za64_m (0, p0, p1, z0, z1)) ++ ++/* ++** mopa_za64_s16_0_p1_p0_z1_z0: ++** smopa za0\.d, p1/m, p0/m, z1\.h, z0\.h ++** ret ++*/ ++TEST_UNIFORM_ZA (mopa_za64_s16_0_p1_p0_z1_z0, svint16_t, ++ svmopa_za64_s16_m (0, p1, p0, z1, z0), ++ svmopa_za64_m (0, p1, p0, z1, z0)) ++ ++/* ++** mopa_za64_s16_7_p0_p1_z0_z1: ++** smopa za7\.d, p0/m, p1/m, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_ZA (mopa_za64_s16_7_p0_p1_z0_z1, svint16_t, ++ svmopa_za64_s16_m (7, p0, p1, z0, z1), ++ svmopa_za64_m (7, p0, p1, z0, z1)) ++ ++/* ++** mopa_za64_u16_0_p0_p1_z0_z1: ++** umopa za0\.d, p0/m, p1/m, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_ZA (mopa_za64_u16_0_p0_p1_z0_z1, svuint16_t, ++ svmopa_za64_u16_m (0, p0, p1, z0, z1), ++ svmopa_za64_m (0, p0, p1, z0, z1)) ++ ++/* ++** mopa_za64_u16_7_p0_p1_z0_z1: ++** umopa za7\.d, p0/m, p1/m, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_ZA (mopa_za64_u16_7_p0_p1_z0_z1, svuint16_t, ++ svmopa_za64_u16_m (7, p0, p1, z0, z1), ++ svmopa_za64_m (7, p0, p1, z0, z1)) ++ ++#pragma GCC target "+nosme-i16i64+sme-f64f64" ++ ++/* ++** mopa_za64_f64_0_p0_p1_z0_z1: ++** fmopa za0\.d, p0/m, p1/m, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_ZA (mopa_za64_f64_0_p0_p1_z0_z1, svfloat64_t, ++ svmopa_za64_f64_m (0, p0, p1, z0, z1), ++ svmopa_za64_m (0, p0, p1, z0, z1)) ++ ++/* ++** mopa_za64_f64_7_p0_p1_z0_z1: ++** fmopa za7\.d, p0/m, p1/m, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_ZA (mopa_za64_f64_7_p0_p1_z0_z1, svfloat64_t, ++ svmopa_za64_f64_m (7, p0, p1, z0, z1), ++ svmopa_za64_m (7, p0, p1, z0, z1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/mops_za32.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/mops_za32.c +new file mode 100644 +index 000000000..63c2b80fd +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/mops_za32.c +@@ -0,0 +1,102 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sme_acle.h" ++ ++/* ++** mops_za32_s8_0_p0_p1_z0_z1: ++** smops za0\.s, p0/m, p1/m, z0\.b, z1\.b ++** ret ++*/ ++TEST_UNIFORM_ZA (mops_za32_s8_0_p0_p1_z0_z1, svint8_t, ++ svmops_za32_s8_m (0, p0, p1, z0, z1), ++ svmops_za32_m (0, p0, p1, z0, z1)) ++ ++/* ++** mops_za32_s8_0_p1_p0_z1_z0: ++** smops za0\.s, p1/m, p0/m, z1\.b, z0\.b ++** ret ++*/ ++TEST_UNIFORM_ZA (mops_za32_s8_0_p1_p0_z1_z0, svint8_t, ++ svmops_za32_s8_m (0, p1, p0, z1, z0), ++ svmops_za32_m (0, p1, p0, z1, z0)) ++ ++/* ++** mops_za32_s8_3_p0_p1_z0_z1: ++** smops za3\.s, p0/m, p1/m, z0\.b, z1\.b ++** ret ++*/ ++TEST_UNIFORM_ZA (mops_za32_s8_3_p0_p1_z0_z1, svint8_t, ++ svmops_za32_s8_m (3, p0, p1, z0, z1), ++ svmops_za32_m (3, p0, p1, z0, z1)) ++ ++/* ++** mops_za32_u8_0_p0_p1_z0_z1: ++** umops za0\.s, p0/m, p1/m, z0\.b, z1\.b ++** ret ++*/ ++TEST_UNIFORM_ZA (mops_za32_u8_0_p0_p1_z0_z1, svuint8_t, ++ svmops_za32_u8_m (0, p0, p1, z0, z1), ++ svmops_za32_m (0, p0, p1, z0, z1)) ++ ++/* ++** mops_za32_u8_3_p0_p1_z0_z1: ++** umops za3\.s, p0/m, p1/m, z0\.b, z1\.b ++** ret ++*/ ++TEST_UNIFORM_ZA (mops_za32_u8_3_p0_p1_z0_z1, svuint8_t, ++ svmops_za32_u8_m (3, p0, p1, z0, z1), ++ svmops_za32_m (3, p0, p1, z0, z1)) ++ ++/* ++** mops_za32_bf16_0_p0_p1_z0_z1: ++** bfmops za0\.s, p0/m, p1/m, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_ZA (mops_za32_bf16_0_p0_p1_z0_z1, svbfloat16_t, ++ svmops_za32_bf16_m (0, p0, p1, z0, z1), ++ svmops_za32_m (0, p0, p1, z0, z1)) ++ ++/* ++** mops_za32_bf16_3_p0_p1_z0_z1: ++** bfmops za3\.s, p0/m, p1/m, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_ZA (mops_za32_bf16_3_p0_p1_z0_z1, svbfloat16_t, ++ svmops_za32_bf16_m (3, p0, p1, z0, z1), ++ svmops_za32_m (3, p0, p1, z0, z1)) ++ ++/* ++** mops_za32_f16_0_p0_p1_z0_z1: ++** fmops za0\.s, p0/m, p1/m, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_ZA (mops_za32_f16_0_p0_p1_z0_z1, svfloat16_t, ++ svmops_za32_f16_m (0, p0, p1, z0, z1), ++ svmops_za32_m (0, p0, p1, z0, z1)) ++ ++/* ++** mops_za32_f16_3_p0_p1_z0_z1: ++** fmops za3\.s, p0/m, p1/m, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_ZA (mops_za32_f16_3_p0_p1_z0_z1, svfloat16_t, ++ svmops_za32_f16_m (3, p0, p1, z0, z1), ++ svmops_za32_m (3, p0, p1, z0, z1)) ++ ++/* ++** mops_za32_f32_0_p0_p1_z0_z1: ++** fmops za0\.s, p0/m, p1/m, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_ZA (mops_za32_f32_0_p0_p1_z0_z1, svfloat32_t, ++ svmops_za32_f32_m (0, p0, p1, z0, z1), ++ svmops_za32_m (0, p0, p1, z0, z1)) ++ ++/* ++** mops_za32_f32_3_p0_p1_z0_z1: ++** fmops za3\.s, p0/m, p1/m, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_ZA (mops_za32_f32_3_p0_p1_z0_z1, svfloat32_t, ++ svmops_za32_f32_m (3, p0, p1, z0, z1), ++ svmops_za32_m (3, p0, p1, z0, z1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/mops_za64.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/mops_za64.c +new file mode 100644 +index 000000000..bc04c3cf7 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/mops_za64.c +@@ -0,0 +1,70 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sme_acle.h" ++ ++#pragma GCC target "+sme-i16i64" ++ ++/* ++** mops_za64_s16_0_p0_p1_z0_z1: ++** smops za0\.d, p0/m, p1/m, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_ZA (mops_za64_s16_0_p0_p1_z0_z1, svint16_t, ++ svmops_za64_s16_m (0, p0, p1, z0, z1), ++ svmops_za64_m (0, p0, p1, z0, z1)) ++ ++/* ++** mops_za64_s16_0_p1_p0_z1_z0: ++** smops za0\.d, p1/m, p0/m, z1\.h, z0\.h ++** ret ++*/ ++TEST_UNIFORM_ZA (mops_za64_s16_0_p1_p0_z1_z0, svint16_t, ++ svmops_za64_s16_m (0, p1, p0, z1, z0), ++ svmops_za64_m (0, p1, p0, z1, z0)) ++ ++/* ++** mops_za64_s16_7_p0_p1_z0_z1: ++** smops za7\.d, p0/m, p1/m, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_ZA (mops_za64_s16_7_p0_p1_z0_z1, svint16_t, ++ svmops_za64_s16_m (7, p0, p1, z0, z1), ++ svmops_za64_m (7, p0, p1, z0, z1)) ++ ++/* ++** mops_za64_u16_0_p0_p1_z0_z1: ++** umops za0\.d, p0/m, p1/m, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_ZA (mops_za64_u16_0_p0_p1_z0_z1, svuint16_t, ++ svmops_za64_u16_m (0, p0, p1, z0, z1), ++ svmops_za64_m (0, p0, p1, z0, z1)) ++ ++/* ++** mops_za64_u16_7_p0_p1_z0_z1: ++** umops za7\.d, p0/m, p1/m, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_ZA (mops_za64_u16_7_p0_p1_z0_z1, svuint16_t, ++ svmops_za64_u16_m (7, p0, p1, z0, z1), ++ svmops_za64_m (7, p0, p1, z0, z1)) ++ ++#pragma GCC target "+nosme-i16i64+sme-f64f64" ++ ++/* ++** mops_za64_f64_0_p0_p1_z0_z1: ++** fmops za0\.d, p0/m, p1/m, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_ZA (mops_za64_f64_0_p0_p1_z0_z1, svfloat64_t, ++ svmops_za64_f64_m (0, p0, p1, z0, z1), ++ svmops_za64_m (0, p0, p1, z0, z1)) ++ ++/* ++** mops_za64_f64_7_p0_p1_z0_z1: ++** fmops za7\.d, p0/m, p1/m, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_ZA (mops_za64_f64_7_p0_p1_z0_z1, svfloat64_t, ++ svmops_za64_f64_m (7, p0, p1, z0, z1), ++ svmops_za64_m (7, p0, p1, z0, z1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/read_hor_za128.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/read_hor_za128.c +new file mode 100644 +index 000000000..c8eef3b16 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/read_hor_za128.c +@@ -0,0 +1,435 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sme_acle.h" ++ ++/* ++** read_za128_s8_0_0_tied: ++** mov (w12-5), (?:wzr|#?0) ++** mova z0\.q, p0/m, za0h\.q\\1, 0\ ++** ret ++*/ ++TEST_READ_ZA (read_za128_s8_0_0_tied, svint8_t, ++ z0 = svread_hor_za128_s8_m (z0, p0, 0, 0), ++ z0 = svread_hor_za128_m (z0, p0, 0, 0)) ++ ++/* ++** read_za128_s8_0_1_tied: ++** mov (w12-5), #?1 ++** mova z0\.q, p0/m, za0h\.q\\1, 0\ ++** ret ++*/ ++TEST_READ_ZA (read_za128_s8_0_1_tied, svint8_t, ++ z0 = svread_hor_za128_s8_m (z0, p0, 0, 1), ++ z0 = svread_hor_za128_m (z0, p0, 0, 1)) ++ ++/* ++** read_za128_s8_0_w0_tied: ++** mov (w12-5), w0 ++** mova z0\.q, p0/m, za0h\.q\\1, 0\ ++** ret ++*/ ++TEST_READ_ZA (read_za128_s8_0_w0_tied, svint8_t, ++ z0 = svread_hor_za128_s8_m (z0, p0, 0, w0), ++ z0 = svread_hor_za128_m (z0, p0, 0, w0)) ++ ++/* ++** read_za128_s8_0_w0p1_tied: ++** add (w12-5), w0, #?1 ++** mova z0\.q, p0/m, za0h\.q\\1, 0\ ++** ret ++*/ ++TEST_READ_ZA (read_za128_s8_0_w0p1_tied, svint8_t, ++ z0 = svread_hor_za128_s8_m (z0, p0, 0, w0 + 1), ++ z0 = svread_hor_za128_m (z0, p0, 0, w0 + 1)) ++ ++/* ++** read_za128_s8_0_w0m1_tied: ++** sub (w12-5), w0, #?1 ++** mova z0\.q, p0/m, za0h\.q\\1, 0\ ++** ret ++*/ ++TEST_READ_ZA (read_za128_s8_0_w0m1_tied, svint8_t, ++ z0 = svread_hor_za128_s8_m (z0, p0, 0, w0 - 1), ++ z0 = svread_hor_za128_m (z0, p0, 0, w0 - 1)) ++ ++/* ++** read_za128_s8_1_w0_tied: ++** mov (w12-5), w0 ++** mova z0\.q, p0/m, za1h\.q\\1, 0\ ++** ret ++*/ ++TEST_READ_ZA (read_za128_s8_1_w0_tied, svint8_t, ++ z0 = svread_hor_za128_s8_m (z0, p0, 1, w0), ++ z0 = svread_hor_za128_m (z0, p0, 1, w0)) ++ ++/* ++** read_za128_s8_15_w0_tied: ++** mov (w12-5), w0 ++** mova z0\.q, p0/m, za15h\.q\\1, 0\ ++** ret ++*/ ++TEST_READ_ZA (read_za128_s8_15_w0_tied, svint8_t, ++ z0 = svread_hor_za128_s8_m (z0, p0, 15, w0), ++ z0 = svread_hor_za128_m (z0, p0, 15, w0)) ++ ++/* ++** read_za128_s8_0_w0_untied: ++** ( ++** mov (w12-5), w0 ++** mov z0\.d, z1\.d ++** mova z0\.q, p0/m, za0h\.q\\1, 0\ ++** | ++** mov z0\.d, z1\.d ++** mov (w12-5), w0 ++** mova z0\.q, p0/m, za0h\.q\\2, 0\ ++** | ++** mov (w12-5), w0 ++** mova z1\.q, p0/m, za0h\.q\\3, 0\ ++** mov z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_READ_ZA (read_za128_s8_0_w0_untied, svint8_t, ++ z0 = svread_hor_za128_s8_m (z1, p0, 0, w0), ++ z0 = svread_hor_za128_m (z1, p0, 0, w0)) ++ ++/* ++** read_za128_u8_0_w0_tied: ++** mov (w12-5), w0 ++** mova z0\.q, p0/m, za0h\.q\\1, 0\ ++** ret ++*/ ++TEST_READ_ZA (read_za128_u8_0_w0_tied, svuint8_t, ++ z0 = svread_hor_za128_u8_m (z0, p0, 0, w0), ++ z0 = svread_hor_za128_m (z0, p0, 0, w0)) ++ ++/* ++** read_za128_u8_0_w0_untied: ++** ( ++** mov (w12-5), w0 ++** mov z0\.d, z1\.d ++** mova z0\.q, p0/m, za0h\.q\\1, 0\ ++** | ++** mov z0\.d, z1\.d ++** mov (w12-5), w0 ++** mova z0\.q, p0/m, za0h\.q\\2, 0\ ++** | ++** mov (w12-5), w0 ++** mova z1\.q, p0/m, za0h\.q\\3, 0\ ++** mov z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_READ_ZA (read_za128_u8_0_w0_untied, svuint8_t, ++ z0 = svread_hor_za128_u8_m (z1, p0, 0, w0), ++ z0 = svread_hor_za128_m (z1, p0, 0, w0)) ++ ++/* ++** read_za128_s16_0_w0_tied: ++** mov (w12-5), w0 ++** mova z0\.q, p0/m, za0h\.q\\1, 0\ ++** ret ++*/ ++TEST_READ_ZA (read_za128_s16_0_w0_tied, svint16_t, ++ z0 = svread_hor_za128_s16_m (z0, p0, 0, w0), ++ z0 = svread_hor_za128_m (z0, p0, 0, w0)) ++ ++/* ++** read_za128_s16_0_w0_untied: ++** ( ++** mov (w12-5), w0 ++** mov z0\.d, z1\.d ++** mova z0\.q, p0/m, za0h\.q\\1, 0\ ++** | ++** mov z0\.d, z1\.d ++** mov (w12-5), w0 ++** mova z0\.q, p0/m, za0h\.q\\2, 0\ ++** | ++** mov (w12-5), w0 ++** mova z1\.q, p0/m, za0h\.q\\3, 0\ ++** mov z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_READ_ZA (read_za128_s16_0_w0_untied, svint16_t, ++ z0 = svread_hor_za128_s16_m (z1, p0, 0, w0), ++ z0 = svread_hor_za128_m (z1, p0, 0, w0)) ++ ++/* ++** read_za128_u16_0_w0_tied: ++** mov (w12-5), w0 ++** mova z0\.q, p0/m, za0h\.q\\1, 0\ ++** ret ++*/ ++TEST_READ_ZA (read_za128_u16_0_w0_tied, svuint16_t, ++ z0 = svread_hor_za128_u16_m (z0, p0, 0, w0), ++ z0 = svread_hor_za128_m (z0, p0, 0, w0)) ++ ++/* ++** read_za128_u16_0_w0_untied: ++** ( ++** mov (w12-5), w0 ++** mov z0\.d, z1\.d ++** mova z0\.q, p0/m, za0h\.q\\1, 0\ ++** | ++** mov z0\.d, z1\.d ++** mov (w12-5), w0 ++** mova z0\.q, p0/m, za0h\.q\\2, 0\ ++** | ++** mov (w12-5), w0 ++** mova z1\.q, p0/m, za0h\.q\\3, 0\ ++** mov z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_READ_ZA (read_za128_u16_0_w0_untied, svuint16_t, ++ z0 = svread_hor_za128_u16_m (z1, p0, 0, w0), ++ z0 = svread_hor_za128_m (z1, p0, 0, w0)) ++ ++/* ++** read_za128_f16_0_w0_tied: ++** mov (w12-5), w0 ++** mova z0\.q, p0/m, za0h\.q\\1, 0\ ++** ret ++*/ ++TEST_READ_ZA (read_za128_f16_0_w0_tied, svfloat16_t, ++ z0 = svread_hor_za128_f16_m (z0, p0, 0, w0), ++ z0 = svread_hor_za128_m (z0, p0, 0, w0)) ++ ++/* ++** read_za128_f16_0_w0_untied: ++** ( ++** mov (w12-5), w0 ++** mov z0\.d, z1\.d ++** mova z0\.q, p0/m, za0h\.q\\1, 0\ ++** | ++** mov z0\.d, z1\.d ++** mov (w12-5), w0 ++** mova z0\.q, p0/m, za0h\.q\\2, 0\ ++** | ++** mov (w12-5), w0 ++** mova z1\.q, p0/m, za0h\.q\\3, 0\ ++** mov z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_READ_ZA (read_za128_f16_0_w0_untied, svfloat16_t, ++ z0 = svread_hor_za128_f16_m (z1, p0, 0, w0), ++ z0 = svread_hor_za128_m (z1, p0, 0, w0)) ++ ++/* ++** read_za128_bf16_0_w0_tied: ++** mov (w12-5), w0 ++** mova z0\.q, p0/m, za0h\.q\\1, 0\ ++** ret ++*/ ++TEST_READ_ZA (read_za128_bf16_0_w0_tied, svbfloat16_t, ++ z0 = svread_hor_za128_bf16_m (z0, p0, 0, w0), ++ z0 = svread_hor_za128_m (z0, p0, 0, w0)) ++ ++/* ++** read_za128_bf16_0_w0_untied: ++** ( ++** mov (w12-5), w0 ++** mov z0\.d, z1\.d ++** mova z0\.q, p0/m, za0h\.q\\1, 0\ ++** | ++** mov z0\.d, z1\.d ++** mov (w12-5), w0 ++** mova z0\.q, p0/m, za0h\.q\\2, 0\ ++** | ++** mov (w12-5), w0 ++** mova z1\.q, p0/m, za0h\.q\\3, 0\ ++** mov z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_READ_ZA (read_za128_bf16_0_w0_untied, svbfloat16_t, ++ z0 = svread_hor_za128_bf16_m (z1, p0, 0, w0), ++ z0 = svread_hor_za128_m (z1, p0, 0, w0)) ++ ++/* ++** read_za128_s32_0_w0_tied: ++** mov (w12-5), w0 ++** mova z0\.q, p0/m, za0h\.q\\1, 0\ ++** ret ++*/ ++TEST_READ_ZA (read_za128_s32_0_w0_tied, svint32_t, ++ z0 = svread_hor_za128_s32_m (z0, p0, 0, w0), ++ z0 = svread_hor_za128_m (z0, p0, 0, w0)) ++ ++/* ++** read_za128_s32_0_w0_untied: ++** ( ++** mov (w12-5), w0 ++** mov z0\.d, z1\.d ++** mova z0\.q, p0/m, za0h\.q\\1, 0\ ++** | ++** mov z0\.d, z1\.d ++** mov (w12-5), w0 ++** mova z0\.q, p0/m, za0h\.q\\2, 0\ ++** | ++** mov (w12-5), w0 ++** mova z1\.q, p0/m, za0h\.q\\3, 0\ ++** mov z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_READ_ZA (read_za128_s32_0_w0_untied, svint32_t, ++ z0 = svread_hor_za128_s32_m (z1, p0, 0, w0), ++ z0 = svread_hor_za128_m (z1, p0, 0, w0)) ++ ++/* ++** read_za128_u32_0_w0_tied: ++** mov (w12-5), w0 ++** mova z0\.q, p0/m, za0h\.q\\1, 0\ ++** ret ++*/ ++TEST_READ_ZA (read_za128_u32_0_w0_tied, svuint32_t, ++ z0 = svread_hor_za128_u32_m (z0, p0, 0, w0), ++ z0 = svread_hor_za128_m (z0, p0, 0, w0)) ++ ++/* ++** read_za128_u32_0_w0_untied: ++** ( ++** mov (w12-5), w0 ++** mov z0\.d, z1\.d ++** mova z0\.q, p0/m, za0h\.q\\1, 0\ ++** | ++** mov z0\.d, z1\.d ++** mov (w12-5), w0 ++** mova z0\.q, p0/m, za0h\.q\\2, 0\ ++** | ++** mov (w12-5), w0 ++** mova z1\.q, p0/m, za0h\.q\\3, 0\ ++** mov z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_READ_ZA (read_za128_u32_0_w0_untied, svuint32_t, ++ z0 = svread_hor_za128_u32_m (z1, p0, 0, w0), ++ z0 = svread_hor_za128_m (z1, p0, 0, w0)) ++ ++/* ++** read_za128_f32_0_w0_tied: ++** mov (w12-5), w0 ++** mova z0\.q, p0/m, za0h\.q\\1, 0\ ++** ret ++*/ ++TEST_READ_ZA (read_za128_f32_0_w0_tied, svfloat32_t, ++ z0 = svread_hor_za128_f32_m (z0, p0, 0, w0), ++ z0 = svread_hor_za128_m (z0, p0, 0, w0)) ++ ++/* ++** read_za128_f32_0_w0_untied: ++** ( ++** mov (w12-5), w0 ++** mov z0\.d, z1\.d ++** mova z0\.q, p0/m, za0h\.q\\1, 0\ ++** | ++** mov z0\.d, z1\.d ++** mov (w12-5), w0 ++** mova z0\.q, p0/m, za0h\.q\\2, 0\ ++** | ++** mov (w12-5), w0 ++** mova z1\.q, p0/m, za0h\.q\\3, 0\ ++** mov z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_READ_ZA (read_za128_f32_0_w0_untied, svfloat32_t, ++ z0 = svread_hor_za128_f32_m (z1, p0, 0, w0), ++ z0 = svread_hor_za128_m (z1, p0, 0, w0)) ++ ++/* ++** read_za128_s64_0_w0_tied: ++** mov (w12-5), w0 ++** mova z0\.q, p0/m, za0h\.q\\1, 0\ ++** ret ++*/ ++TEST_READ_ZA (read_za128_s64_0_w0_tied, svint64_t, ++ z0 = svread_hor_za128_s64_m (z0, p0, 0, w0), ++ z0 = svread_hor_za128_m (z0, p0, 0, w0)) ++ ++/* ++** read_za128_s64_0_w0_untied: ++** ( ++** mov (w12-5), w0 ++** mov z0\.d, z1\.d ++** mova z0\.q, p0/m, za0h\.q\\1, 0\ ++** | ++** mov z0\.d, z1\.d ++** mov (w12-5), w0 ++** mova z0\.q, p0/m, za0h\.q\\2, 0\ ++** | ++** mov (w12-5), w0 ++** mova z1\.q, p0/m, za0h\.q\\3, 0\ ++** mov z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_READ_ZA (read_za128_s64_0_w0_untied, svint64_t, ++ z0 = svread_hor_za128_s64_m (z1, p0, 0, w0), ++ z0 = svread_hor_za128_m (z1, p0, 0, w0)) ++ ++/* ++** read_za128_u64_0_w0_tied: ++** mov (w12-5), w0 ++** mova z0\.q, p0/m, za0h\.q\\1, 0\ ++** ret ++*/ ++TEST_READ_ZA (read_za128_u64_0_w0_tied, svuint64_t, ++ z0 = svread_hor_za128_u64_m (z0, p0, 0, w0), ++ z0 = svread_hor_za128_m (z0, p0, 0, w0)) ++ ++/* ++** read_za128_u64_0_w0_untied: ++** ( ++** mov (w12-5), w0 ++** mov z0\.d, z1\.d ++** mova z0\.q, p0/m, za0h\.q\\1, 0\ ++** | ++** mov z0\.d, z1\.d ++** mov (w12-5), w0 ++** mova z0\.q, p0/m, za0h\.q\\2, 0\ ++** | ++** mov (w12-5), w0 ++** mova z1\.q, p0/m, za0h\.q\\3, 0\ ++** mov z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_READ_ZA (read_za128_u64_0_w0_untied, svuint64_t, ++ z0 = svread_hor_za128_u64_m (z1, p0, 0, w0), ++ z0 = svread_hor_za128_m (z1, p0, 0, w0)) ++ ++/* ++** read_za128_f64_0_w0_tied: ++** mov (w12-5), w0 ++** mova z0\.q, p0/m, za0h\.q\\1, 0\ ++** ret ++*/ ++TEST_READ_ZA (read_za128_f64_0_w0_tied, svfloat64_t, ++ z0 = svread_hor_za128_f64_m (z0, p0, 0, w0), ++ z0 = svread_hor_za128_m (z0, p0, 0, w0)) ++ ++/* ++** read_za128_f64_0_w0_untied: ++** ( ++** mov (w12-5), w0 ++** mov z0\.d, z1\.d ++** mova z0\.q, p0/m, za0h\.q\\1, 0\ ++** | ++** mov z0\.d, z1\.d ++** mov (w12-5), w0 ++** mova z0\.q, p0/m, za0h\.q\\2, 0\ ++** | ++** mov (w12-5), w0 ++** mova z1\.q, p0/m, za0h\.q\\3, 0\ ++** mov z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_READ_ZA (read_za128_f64_0_w0_untied, svfloat64_t, ++ z0 = svread_hor_za128_f64_m (z1, p0, 0, w0), ++ z0 = svread_hor_za128_m (z1, p0, 0, w0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/read_hor_za16.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/read_hor_za16.c +new file mode 100644 +index 000000000..2e0a96591 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/read_hor_za16.c +@@ -0,0 +1,207 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sme_acle.h" ++ ++/* ++** read_za16_s16_0_0_tied: ++** mov (w12-5), (?:wzr|#?0) ++** mova z0\.h, p0/m, za0h\.h\\1, 0\ ++** ret ++*/ ++TEST_READ_ZA (read_za16_s16_0_0_tied, svint16_t, ++ z0 = svread_hor_za16_s16_m (z0, p0, 0, 0), ++ z0 = svread_hor_za16_m (z0, p0, 0, 0)) ++ ++/* ++** read_za16_s16_0_1_tied: ++** mov (w12-5), #?1 ++** mova z0\.h, p0/m, za0h\.h\\1, 0\ ++** ret ++*/ ++TEST_READ_ZA (read_za16_s16_0_1_tied, svint16_t, ++ z0 = svread_hor_za16_s16_m (z0, p0, 0, 1), ++ z0 = svread_hor_za16_m (z0, p0, 0, 1)) ++ ++/* ++** read_za16_s16_0_w0_tied: ++** mov (w12-5), w0 ++** mova z0\.h, p0/m, za0h\.h\\1, 0\ ++** ret ++*/ ++TEST_READ_ZA (read_za16_s16_0_w0_tied, svint16_t, ++ z0 = svread_hor_za16_s16_m (z0, p0, 0, w0), ++ z0 = svread_hor_za16_m (z0, p0, 0, w0)) ++ ++/* ++** read_za16_s16_0_w0p1_tied: ++** mov (w12-5), w0 ++** mova z0\.h, p0/m, za0h\.h\\1, 1\ ++** ret ++*/ ++TEST_READ_ZA (read_za16_s16_0_w0p1_tied, svint16_t, ++ z0 = svread_hor_za16_s16_m (z0, p0, 0, w0 + 1), ++ z0 = svread_hor_za16_m (z0, p0, 0, w0 + 1)) ++ ++/* ++** read_za16_s16_0_w0p7_tied: ++** mov (w12-5), w0 ++** mova z0\.h, p0/m, za0h\.h\\1, 7\ ++** ret ++*/ ++TEST_READ_ZA (read_za16_s16_0_w0p7_tied, svint16_t, ++ z0 = svread_hor_za16_s16_m (z0, p0, 0, w0 + 7), ++ z0 = svread_hor_za16_m (z0, p0, 0, w0 + 7)) ++ ++/* ++** read_za16_s16_0_w0p8_tied: ++** add (w12-5), w0, #?8 ++** mova z0\.h, p0/m, za0h\.h\\1, 0\ ++** ret ++*/ ++TEST_READ_ZA (read_za16_s16_0_w0p8_tied, svint16_t, ++ z0 = svread_hor_za16_s16_m (z0, p0, 0, w0 + 8), ++ z0 = svread_hor_za16_m (z0, p0, 0, w0 + 8)) ++ ++/* ++** read_za16_s16_0_w0m1_tied: ++** sub (w12-5), w0, #?1 ++** mova z0\.h, p0/m, za0h\.h\\1, 0\ ++** ret ++*/ ++TEST_READ_ZA (read_za16_s16_0_w0m1_tied, svint16_t, ++ z0 = svread_hor_za16_s16_m (z0, p0, 0, w0 - 1), ++ z0 = svread_hor_za16_m (z0, p0, 0, w0 - 1)) ++ ++/* ++** read_za16_s16_1_w0_tied: ++** mov (w12-5), w0 ++** mova z0\.h, p0/m, za1h\.h\\1, 0\ ++** ret ++*/ ++TEST_READ_ZA (read_za16_s16_1_w0_tied, svint16_t, ++ z0 = svread_hor_za16_s16_m (z0, p0, 1, w0), ++ z0 = svread_hor_za16_m (z0, p0, 1, w0)) ++ ++/* ++** read_za16_s16_1_w0p7_tied: ++** mov (w12-5), w0 ++** mova z0\.h, p0/m, za1h\.h\\1, 7\ ++** ret ++*/ ++TEST_READ_ZA (read_za16_s16_1_w0p7_tied, svint16_t, ++ z0 = svread_hor_za16_s16_m (z0, p0, 1, w0 + 7), ++ z0 = svread_hor_za16_m (z0, p0, 1, w0 + 7)) ++ ++/* ++** read_za16_s16_0_w0_untied: ++** ( ++** mov (w12-5), w0 ++** mov z0\.d, z1\.d ++** mova z0\.h, p0/m, za0h\.h\\1, 0\ ++** | ++** mov z0\.d, z1\.d ++** mov (w12-5), w0 ++** mova z0\.h, p0/m, za0h\.h\\2, 0\ ++** | ++** mov (w12-5), w0 ++** mova z1\.h, p0/m, za0h\.h\\3, 0\ ++** mov z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_READ_ZA (read_za16_s16_0_w0_untied, svint16_t, ++ z0 = svread_hor_za16_s16_m (z1, p0, 0, w0), ++ z0 = svread_hor_za16_m (z1, p0, 0, w0)) ++ ++/* ++** read_za16_u16_0_w0_tied: ++** mov (w12-5), w0 ++** mova z0\.h, p0/m, za0h\.h\\1, 0\ ++** ret ++*/ ++TEST_READ_ZA (read_za16_u16_0_w0_tied, svuint16_t, ++ z0 = svread_hor_za16_u16_m (z0, p0, 0, w0), ++ z0 = svread_hor_za16_m (z0, p0, 0, w0)) ++ ++/* ++** read_za16_u16_0_w0_untied: ++** ( ++** mov (w12-5), w0 ++** mov z0\.d, z1\.d ++** mova z0\.h, p0/m, za0h\.h\\1, 0\ ++** | ++** mov z0\.d, z1\.d ++** mov (w12-5), w0 ++** mova z0\.h, p0/m, za0h\.h\\2, 0\ ++** | ++** mov (w12-5), w0 ++** mova z1\.h, p0/m, za0h\.h\\3, 0\ ++** mov z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_READ_ZA (read_za16_u16_0_w0_untied, svuint16_t, ++ z0 = svread_hor_za16_u16_m (z1, p0, 0, w0), ++ z0 = svread_hor_za16_m (z1, p0, 0, w0)) ++ ++/* ++** read_za16_f16_0_w0_tied: ++** mov (w12-5), w0 ++** mova z0\.h, p0/m, za0h\.h\\1, 0\ ++** ret ++*/ ++TEST_READ_ZA (read_za16_f16_0_w0_tied, svfloat16_t, ++ z0 = svread_hor_za16_f16_m (z0, p0, 0, w0), ++ z0 = svread_hor_za16_m (z0, p0, 0, w0)) ++ ++/* ++** read_za16_f16_0_w0_untied: ++** ( ++** mov (w12-5), w0 ++** mov z0\.d, z1\.d ++** mova z0\.h, p0/m, za0h\.h\\1, 0\ ++** | ++** mov z0\.d, z1\.d ++** mov (w12-5), w0 ++** mova z0\.h, p0/m, za0h\.h\\2, 0\ ++** | ++** mov (w12-5), w0 ++** mova z1\.h, p0/m, za0h\.h\\3, 0\ ++** mov z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_READ_ZA (read_za16_f16_0_w0_untied, svfloat16_t, ++ z0 = svread_hor_za16_f16_m (z1, p0, 0, w0), ++ z0 = svread_hor_za16_m (z1, p0, 0, w0)) ++ ++/* ++** read_za16_bf16_0_w0_tied: ++** mov (w12-5), w0 ++** mova z0\.h, p0/m, za0h\.h\\1, 0\ ++** ret ++*/ ++TEST_READ_ZA (read_za16_bf16_0_w0_tied, svbfloat16_t, ++ z0 = svread_hor_za16_bf16_m (z0, p0, 0, w0), ++ z0 = svread_hor_za16_m (z0, p0, 0, w0)) ++ ++/* ++** read_za16_bf16_0_w0_untied: ++** ( ++** mov (w12-5), w0 ++** mov z0\.d, z1\.d ++** mova z0\.h, p0/m, za0h\.h\\1, 0\ ++** | ++** mov z0\.d, z1\.d ++** mov (w12-5), w0 ++** mova z0\.h, p0/m, za0h\.h\\2, 0\ ++** | ++** mov (w12-5), w0 ++** mova z1\.h, p0/m, za0h\.h\\3, 0\ ++** mov z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_READ_ZA (read_za16_bf16_0_w0_untied, svbfloat16_t, ++ z0 = svread_hor_za16_bf16_m (z1, p0, 0, w0), ++ z0 = svread_hor_za16_m (z1, p0, 0, w0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/read_hor_za32.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/read_hor_za32.c +new file mode 100644 +index 000000000..d111b60a7 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/read_hor_za32.c +@@ -0,0 +1,196 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sme_acle.h" ++ ++/* ++** read_za32_s32_0_0_tied: ++** mov (w12-5), (?:wzr|#?0) ++** mova z0\.s, p0/m, za0h\.s\\1, 0\ ++** ret ++*/ ++TEST_READ_ZA (read_za32_s32_0_0_tied, svint32_t, ++ z0 = svread_hor_za32_s32_m (z0, p0, 0, 0), ++ z0 = svread_hor_za32_m (z0, p0, 0, 0)) ++ ++/* ++** read_za32_s32_0_1_tied: ++** mov (w12-5), #?1 ++** mova z0\.s, p0/m, za0h\.s\\1, 0\ ++** ret ++*/ ++TEST_READ_ZA (read_za32_s32_0_1_tied, svint32_t, ++ z0 = svread_hor_za32_s32_m (z0, p0, 0, 1), ++ z0 = svread_hor_za32_m (z0, p0, 0, 1)) ++ ++/* ++** read_za32_s32_0_w0_tied: ++** mov (w12-5), w0 ++** mova z0\.s, p0/m, za0h\.s\\1, 0\ ++** ret ++*/ ++TEST_READ_ZA (read_za32_s32_0_w0_tied, svint32_t, ++ z0 = svread_hor_za32_s32_m (z0, p0, 0, w0), ++ z0 = svread_hor_za32_m (z0, p0, 0, w0)) ++ ++/* ++** read_za32_s32_0_w0p1_tied: ++** mov (w12-5), w0 ++** mova z0\.s, p0/m, za0h\.s\\1, 1\ ++** ret ++*/ ++TEST_READ_ZA (read_za32_s32_0_w0p1_tied, svint32_t, ++ z0 = svread_hor_za32_s32_m (z0, p0, 0, w0 + 1), ++ z0 = svread_hor_za32_m (z0, p0, 0, w0 + 1)) ++ ++/* ++** read_za32_s32_0_w0p3_tied: ++** mov (w12-5), w0 ++** mova z0\.s, p0/m, za0h\.s\\1, 3\ ++** ret ++*/ ++TEST_READ_ZA (read_za32_s32_0_w0p3_tied, svint32_t, ++ z0 = svread_hor_za32_s32_m (z0, p0, 0, w0 + 3), ++ z0 = svread_hor_za32_m (z0, p0, 0, w0 + 3)) ++ ++/* ++** read_za32_s32_0_w0p4_tied: ++** add (w12-5), w0, #?4 ++** mova z0\.s, p0/m, za0h\.s\\1, 0\ ++** ret ++*/ ++TEST_READ_ZA (read_za32_s32_0_w0p4_tied, svint32_t, ++ z0 = svread_hor_za32_s32_m (z0, p0, 0, w0 + 4), ++ z0 = svread_hor_za32_m (z0, p0, 0, w0 + 4)) ++ ++/* ++** read_za32_s32_0_w0m1_tied: ++** sub (w12-5), w0, #?1 ++** mova z0\.s, p0/m, za0h\.s\\1, 0\ ++** ret ++*/ ++TEST_READ_ZA (read_za32_s32_0_w0m1_tied, svint32_t, ++ z0 = svread_hor_za32_s32_m (z0, p0, 0, w0 - 1), ++ z0 = svread_hor_za32_m (z0, p0, 0, w0 - 1)) ++ ++/* ++** read_za32_s32_1_w0_tied: ++** mov (w12-5), w0 ++** mova z0\.s, p0/m, za1h\.s\\1, 0\ ++** ret ++*/ ++TEST_READ_ZA (read_za32_s32_1_w0_tied, svint32_t, ++ z0 = svread_hor_za32_s32_m (z0, p0, 1, w0), ++ z0 = svread_hor_za32_m (z0, p0, 1, w0)) ++ ++/* ++** read_za32_s32_1_w0p3_tied: ++** mov (w12-5), w0 ++** mova z0\.s, p0/m, za1h\.s\\1, 3\ ++** ret ++*/ ++TEST_READ_ZA (read_za32_s32_1_w0p3_tied, svint32_t, ++ z0 = svread_hor_za32_s32_m (z0, p0, 1, w0 + 3), ++ z0 = svread_hor_za32_m (z0, p0, 1, w0 + 3)) ++ ++/* ++** read_za32_s32_3_w0_tied: ++** mov (w12-5), w0 ++** mova z0\.s, p0/m, za3h\.s\\1, 0\ ++** ret ++*/ ++TEST_READ_ZA (read_za32_s32_3_w0_tied, svint32_t, ++ z0 = svread_hor_za32_s32_m (z0, p0, 3, w0), ++ z0 = svread_hor_za32_m (z0, p0, 3, w0)) ++ ++/* ++** read_za32_s32_3_w0p3_tied: ++** mov (w12-5), w0 ++** mova z0\.s, p0/m, za3h\.s\\1, 3\ ++** ret ++*/ ++TEST_READ_ZA (read_za32_s32_3_w0p3_tied, svint32_t, ++ z0 = svread_hor_za32_s32_m (z0, p0, 3, w0 + 3), ++ z0 = svread_hor_za32_m (z0, p0, 3, w0 + 3)) ++ ++/* ++** read_za32_s32_0_w0_untied: ++** ( ++** mov (w12-5), w0 ++** mov z0\.d, z1\.d ++** mova z0\.s, p0/m, za0h\.s\\1, 0\ ++** | ++** mov z0\.d, z1\.d ++** mov (w12-5), w0 ++** mova z0\.s, p0/m, za0h\.s\\2, 0\ ++** | ++** mov (w12-5), w0 ++** mova z1\.s, p0/m, za0h\.s\\3, 0\ ++** mov z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_READ_ZA (read_za32_s32_0_w0_untied, svint32_t, ++ z0 = svread_hor_za32_s32_m (z1, p0, 0, w0), ++ z0 = svread_hor_za32_m (z1, p0, 0, w0)) ++ ++/* ++** read_za32_u32_0_w0_tied: ++** mov (w12-5), w0 ++** mova z0\.s, p0/m, za0h\.s\\1, 0\ ++** ret ++*/ ++TEST_READ_ZA (read_za32_u32_0_w0_tied, svuint32_t, ++ z0 = svread_hor_za32_u32_m (z0, p0, 0, w0), ++ z0 = svread_hor_za32_m (z0, p0, 0, w0)) ++ ++/* ++** read_za32_u32_0_w0_untied: ++** ( ++** mov (w12-5), w0 ++** mov z0\.d, z1\.d ++** mova z0\.s, p0/m, za0h\.s\\1, 0\ ++** | ++** mov z0\.d, z1\.d ++** mov (w12-5), w0 ++** mova z0\.s, p0/m, za0h\.s\\2, 0\ ++** | ++** mov (w12-5), w0 ++** mova z1\.s, p0/m, za0h\.s\\3, 0\ ++** mov z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_READ_ZA (read_za32_u32_0_w0_untied, svuint32_t, ++ z0 = svread_hor_za32_u32_m (z1, p0, 0, w0), ++ z0 = svread_hor_za32_m (z1, p0, 0, w0)) ++ ++/* ++** read_za32_f32_0_w0_tied: ++** mov (w12-5), w0 ++** mova z0\.s, p0/m, za0h\.s\\1, 0\ ++** ret ++*/ ++TEST_READ_ZA (read_za32_f32_0_w0_tied, svfloat32_t, ++ z0 = svread_hor_za32_f32_m (z0, p0, 0, w0), ++ z0 = svread_hor_za32_m (z0, p0, 0, w0)) ++ ++/* ++** read_za32_f32_0_w0_untied: ++** ( ++** mov (w12-5), w0 ++** mov z0\.d, z1\.d ++** mova z0\.s, p0/m, za0h\.s\\1, 0\ ++** | ++** mov z0\.d, z1\.d ++** mov (w12-5), w0 ++** mova z0\.s, p0/m, za0h\.s\\2, 0\ ++** | ++** mov (w12-5), w0 ++** mova z1\.s, p0/m, za0h\.s\\3, 0\ ++** mov z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_READ_ZA (read_za32_f32_0_w0_untied, svfloat32_t, ++ z0 = svread_hor_za32_f32_m (z1, p0, 0, w0), ++ z0 = svread_hor_za32_m (z1, p0, 0, w0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/read_hor_za64.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/read_hor_za64.c +new file mode 100644 +index 000000000..b75c531a5 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/read_hor_za64.c +@@ -0,0 +1,186 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sme_acle.h" ++ ++/* ++** read_za64_s64_0_0_tied: ++** mov (w12-5), (?:wzr|#?0) ++** mova z0\.d, p0/m, za0h\.d\\1, 0\ ++** ret ++*/ ++TEST_READ_ZA (read_za64_s64_0_0_tied, svint64_t, ++ z0 = svread_hor_za64_s64_m (z0, p0, 0, 0), ++ z0 = svread_hor_za64_m (z0, p0, 0, 0)) ++ ++/* ++** read_za64_s64_0_1_tied: ++** mov (w12-5), #?1 ++** mova z0\.d, p0/m, za0h\.d\\1, 0\ ++** ret ++*/ ++TEST_READ_ZA (read_za64_s64_0_1_tied, svint64_t, ++ z0 = svread_hor_za64_s64_m (z0, p0, 0, 1), ++ z0 = svread_hor_za64_m (z0, p0, 0, 1)) ++ ++/* ++** read_za64_s64_0_w0_tied: ++** mov (w12-5), w0 ++** mova z0\.d, p0/m, za0h\.d\\1, 0\ ++** ret ++*/ ++TEST_READ_ZA (read_za64_s64_0_w0_tied, svint64_t, ++ z0 = svread_hor_za64_s64_m (z0, p0, 0, w0), ++ z0 = svread_hor_za64_m (z0, p0, 0, w0)) ++ ++/* ++** read_za64_s64_0_w0p1_tied: ++** mov (w12-5), w0 ++** mova z0\.d, p0/m, za0h\.d\\1, 1\ ++** ret ++*/ ++TEST_READ_ZA (read_za64_s64_0_w0p1_tied, svint64_t, ++ z0 = svread_hor_za64_s64_m (z0, p0, 0, w0 + 1), ++ z0 = svread_hor_za64_m (z0, p0, 0, w0 + 1)) ++ ++/* ++** read_za64_s64_0_w0p2_tied: ++** add (w12-5), w0, #?2 ++** mova z0\.d, p0/m, za0h\.d\\1, 0\ ++** ret ++*/ ++TEST_READ_ZA (read_za64_s64_0_w0p2_tied, svint64_t, ++ z0 = svread_hor_za64_s64_m (z0, p0, 0, w0 + 2), ++ z0 = svread_hor_za64_m (z0, p0, 0, w0 + 2)) ++ ++/* ++** read_za64_s64_0_w0m1_tied: ++** sub (w12-5), w0, #?1 ++** mova z0\.d, p0/m, za0h\.d\\1, 0\ ++** ret ++*/ ++TEST_READ_ZA (read_za64_s64_0_w0m1_tied, svint64_t, ++ z0 = svread_hor_za64_s64_m (z0, p0, 0, w0 - 1), ++ z0 = svread_hor_za64_m (z0, p0, 0, w0 - 1)) ++ ++/* ++** read_za64_s64_1_w0_tied: ++** mov (w12-5), w0 ++** mova z0\.d, p0/m, za1h\.d\\1, 0\ ++** ret ++*/ ++TEST_READ_ZA (read_za64_s64_1_w0_tied, svint64_t, ++ z0 = svread_hor_za64_s64_m (z0, p0, 1, w0), ++ z0 = svread_hor_za64_m (z0, p0, 1, w0)) ++ ++/* ++** read_za64_s64_1_w0p1_tied: ++** mov (w12-5), w0 ++** mova z0\.d, p0/m, za1h\.d\\1, 1\ ++** ret ++*/ ++TEST_READ_ZA (read_za64_s64_1_w0p1_tied, svint64_t, ++ z0 = svread_hor_za64_s64_m (z0, p0, 1, w0 + 1), ++ z0 = svread_hor_za64_m (z0, p0, 1, w0 + 1)) ++ ++/* ++** read_za64_s64_7_w0_tied: ++** mov (w12-5), w0 ++** mova z0\.d, p0/m, za7h\.d\\1, 0\ ++** ret ++*/ ++TEST_READ_ZA (read_za64_s64_7_w0_tied, svint64_t, ++ z0 = svread_hor_za64_s64_m (z0, p0, 7, w0), ++ z0 = svread_hor_za64_m (z0, p0, 7, w0)) ++ ++/* ++** read_za64_s64_7_w0p1_tied: ++** mov (w12-5), w0 ++** mova z0\.d, p0/m, za7h\.d\\1, 1\ ++** ret ++*/ ++TEST_READ_ZA (read_za64_s64_7_w0p1_tied, svint64_t, ++ z0 = svread_hor_za64_s64_m (z0, p0, 7, w0 + 1), ++ z0 = svread_hor_za64_m (z0, p0, 7, w0 + 1)) ++ ++/* ++** read_za64_s64_0_w0_untied: ++** ( ++** mov (w12-5), w0 ++** mov z0\.d, z1\.d ++** mova z0\.d, p0/m, za0h\.d\\1, 0\ ++** | ++** mov z0\.d, z1\.d ++** mov (w12-5), w0 ++** mova z0\.d, p0/m, za0h\.d\\2, 0\ ++** | ++** mov (w12-5), w0 ++** mova z1\.d, p0/m, za0h\.d\\3, 0\ ++** mov z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_READ_ZA (read_za64_s64_0_w0_untied, svint64_t, ++ z0 = svread_hor_za64_s64_m (z1, p0, 0, w0), ++ z0 = svread_hor_za64_m (z1, p0, 0, w0)) ++ ++/* ++** read_za64_u64_0_w0_tied: ++** mov (w12-5), w0 ++** mova z0\.d, p0/m, za0h\.d\\1, 0\ ++** ret ++*/ ++TEST_READ_ZA (read_za64_u64_0_w0_tied, svuint64_t, ++ z0 = svread_hor_za64_u64_m (z0, p0, 0, w0), ++ z0 = svread_hor_za64_m (z0, p0, 0, w0)) ++ ++/* ++** read_za64_u64_0_w0_untied: ++** ( ++** mov (w12-5), w0 ++** mov z0\.d, z1\.d ++** mova z0\.d, p0/m, za0h\.d\\1, 0\ ++** | ++** mov z0\.d, z1\.d ++** mov (w12-5), w0 ++** mova z0\.d, p0/m, za0h\.d\\2, 0\ ++** | ++** mov (w12-5), w0 ++** mova z1\.d, p0/m, za0h\.d\\3, 0\ ++** mov z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_READ_ZA (read_za64_u64_0_w0_untied, svuint64_t, ++ z0 = svread_hor_za64_u64_m (z1, p0, 0, w0), ++ z0 = svread_hor_za64_m (z1, p0, 0, w0)) ++ ++/* ++** read_za64_f64_0_w0_tied: ++** mov (w12-5), w0 ++** mova z0\.d, p0/m, za0h\.d\\1, 0\ ++** ret ++*/ ++TEST_READ_ZA (read_za64_f64_0_w0_tied, svfloat64_t, ++ z0 = svread_hor_za64_f64_m (z0, p0, 0, w0), ++ z0 = svread_hor_za64_m (z0, p0, 0, w0)) ++ ++/* ++** read_za64_f64_0_w0_untied: ++** ( ++** mov (w12-5), w0 ++** mov z0\.d, z1\.d ++** mova z0\.d, p0/m, za0h\.d\\1, 0\ ++** | ++** mov z0\.d, z1\.d ++** mov (w12-5), w0 ++** mova z0\.d, p0/m, za0h\.d\\2, 0\ ++** | ++** mov (w12-5), w0 ++** mova z1\.d, p0/m, za0h\.d\\3, 0\ ++** mov z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_READ_ZA (read_za64_f64_0_w0_untied, svfloat64_t, ++ z0 = svread_hor_za64_f64_m (z1, p0, 0, w0), ++ z0 = svread_hor_za64_m (z1, p0, 0, w0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/read_hor_za8.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/read_hor_za8.c +new file mode 100644 +index 000000000..0ad5a953f +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/read_hor_za8.c +@@ -0,0 +1,125 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sme_acle.h" ++ ++/* ++** read_za8_s8_0_0_tied: ++** mov (w12-5), (?:wzr|#?0) ++** mova z0\.b, p0/m, za0h\.b\\1, 0\ ++** ret ++*/ ++TEST_READ_ZA (read_za8_s8_0_0_tied, svint8_t, ++ z0 = svread_hor_za8_s8_m (z0, p0, 0, 0), ++ z0 = svread_hor_za8_m (z0, p0, 0, 0)) ++ ++/* ++** read_za8_s8_0_1_tied: ++** mov (w12-5), #?1 ++** mova z0\.b, p0/m, za0h\.b\\1, 0\ ++** ret ++*/ ++TEST_READ_ZA (read_za8_s8_0_1_tied, svint8_t, ++ z0 = svread_hor_za8_s8_m (z0, p0, 0, 1), ++ z0 = svread_hor_za8_m (z0, p0, 0, 1)) ++ ++/* ++** read_za8_s8_0_w0_tied: ++** mov (w12-5), w0 ++** mova z0\.b, p0/m, za0h\.b\\1, 0\ ++** ret ++*/ ++TEST_READ_ZA (read_za8_s8_0_w0_tied, svint8_t, ++ z0 = svread_hor_za8_s8_m (z0, p0, 0, w0), ++ z0 = svread_hor_za8_m (z0, p0, 0, w0)) ++ ++/* ++** read_za8_s8_0_w0p1_tied: ++** mov (w12-5), w0 ++** mova z0\.b, p0/m, za0h\.b\\1, 1\ ++** ret ++*/ ++TEST_READ_ZA (read_za8_s8_0_w0p1_tied, svint8_t, ++ z0 = svread_hor_za8_s8_m (z0, p0, 0, w0 + 1), ++ z0 = svread_hor_za8_m (z0, p0, 0, w0 + 1)) ++ ++/* ++** read_za8_s8_0_w0p15_tied: ++** mov (w12-5), w0 ++** mova z0\.b, p0/m, za0h\.b\\1, 15\ ++** ret ++*/ ++TEST_READ_ZA (read_za8_s8_0_w0p15_tied, svint8_t, ++ z0 = svread_hor_za8_s8_m (z0, p0, 0, w0 + 15), ++ z0 = svread_hor_za8_m (z0, p0, 0, w0 + 15)) ++ ++/* ++** read_za8_s8_0_w0p16_tied: ++** add (w12-5), w0, #?16 ++** mova z0\.b, p0/m, za0h\.b\\1, 0\ ++** ret ++*/ ++TEST_READ_ZA (read_za8_s8_0_w0p16_tied, svint8_t, ++ z0 = svread_hor_za8_s8_m (z0, p0, 0, w0 + 16), ++ z0 = svread_hor_za8_m (z0, p0, 0, w0 + 16)) ++ ++/* ++** read_za8_s8_0_w0m1_tied: ++** sub (w12-5), w0, #?1 ++** mova z0\.b, p0/m, za0h\.b\\1, 0\ ++** ret ++*/ ++TEST_READ_ZA (read_za8_s8_0_w0m1_tied, svint8_t, ++ z0 = svread_hor_za8_s8_m (z0, p0, 0, w0 - 1), ++ z0 = svread_hor_za8_m (z0, p0, 0, w0 - 1)) ++ ++/* ++** read_za8_s8_0_w0_untied: ++** ( ++** mov (w12-5), w0 ++** mov z0\.d, z1\.d ++** mova z0\.b, p0/m, za0h\.b\\1, 0\ ++** | ++** mov z0\.d, z1\.d ++** mov (w12-5), w0 ++** mova z0\.b, p0/m, za0h\.b\\2, 0\ ++** | ++** mov (w12-5), w0 ++** mova z1\.b, p0/m, za0h\.b\\3, 0\ ++** mov z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_READ_ZA (read_za8_s8_0_w0_untied, svint8_t, ++ z0 = svread_hor_za8_s8_m (z1, p0, 0, w0), ++ z0 = svread_hor_za8_m (z1, p0, 0, w0)) ++ ++/* ++** read_za8_u8_0_w0_tied: ++** mov (w12-5), w0 ++** mova z0\.b, p0/m, za0h\.b\\1, 0\ ++** ret ++*/ ++TEST_READ_ZA (read_za8_u8_0_w0_tied, svuint8_t, ++ z0 = svread_hor_za8_u8_m (z0, p0, 0, w0), ++ z0 = svread_hor_za8_m (z0, p0, 0, w0)) ++ ++/* ++** read_za8_u8_0_w0_untied: ++** ( ++** mov (w12-5), w0 ++** mov z0\.d, z1\.d ++** mova z0\.b, p0/m, za0h\.b\\1, 0\ ++** | ++** mov z0\.d, z1\.d ++** mov (w12-5), w0 ++** mova z0\.b, p0/m, za0h\.b\\2, 0\ ++** | ++** mov (w12-5), w0 ++** mova z1\.b, p0/m, za0h\.b\\3, 0\ ++** mov z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_READ_ZA (read_za8_u8_0_w0_untied, svuint8_t, ++ z0 = svread_hor_za8_u8_m (z1, p0, 0, w0), ++ z0 = svread_hor_za8_m (z1, p0, 0, w0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/read_ver_za128.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/read_ver_za128.c +new file mode 100644 +index 000000000..93d5d60ea +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/read_ver_za128.c +@@ -0,0 +1,435 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sme_acle.h" ++ ++/* ++** read_za128_s8_0_0_tied: ++** mov (w12-5), (?:wzr|#?0) ++** mova z0\.q, p0/m, za0v\.q\\1, 0\ ++** ret ++*/ ++TEST_READ_ZA (read_za128_s8_0_0_tied, svint8_t, ++ z0 = svread_ver_za128_s8_m (z0, p0, 0, 0), ++ z0 = svread_ver_za128_m (z0, p0, 0, 0)) ++ ++/* ++** read_za128_s8_0_1_tied: ++** mov (w12-5), #?1 ++** mova z0\.q, p0/m, za0v\.q\\1, 0\ ++** ret ++*/ ++TEST_READ_ZA (read_za128_s8_0_1_tied, svint8_t, ++ z0 = svread_ver_za128_s8_m (z0, p0, 0, 1), ++ z0 = svread_ver_za128_m (z0, p0, 0, 1)) ++ ++/* ++** read_za128_s8_0_w0_tied: ++** mov (w12-5), w0 ++** mova z0\.q, p0/m, za0v\.q\\1, 0\ ++** ret ++*/ ++TEST_READ_ZA (read_za128_s8_0_w0_tied, svint8_t, ++ z0 = svread_ver_za128_s8_m (z0, p0, 0, w0), ++ z0 = svread_ver_za128_m (z0, p0, 0, w0)) ++ ++/* ++** read_za128_s8_0_w0p1_tied: ++** add (w12-5), w0, #?1 ++** mova z0\.q, p0/m, za0v\.q\\1, 0\ ++** ret ++*/ ++TEST_READ_ZA (read_za128_s8_0_w0p1_tied, svint8_t, ++ z0 = svread_ver_za128_s8_m (z0, p0, 0, w0 + 1), ++ z0 = svread_ver_za128_m (z0, p0, 0, w0 + 1)) ++ ++/* ++** read_za128_s8_0_w0m1_tied: ++** sub (w12-5), w0, #?1 ++** mova z0\.q, p0/m, za0v\.q\\1, 0\ ++** ret ++*/ ++TEST_READ_ZA (read_za128_s8_0_w0m1_tied, svint8_t, ++ z0 = svread_ver_za128_s8_m (z0, p0, 0, w0 - 1), ++ z0 = svread_ver_za128_m (z0, p0, 0, w0 - 1)) ++ ++/* ++** read_za128_s8_1_w0_tied: ++** mov (w12-5), w0 ++** mova z0\.q, p0/m, za1v\.q\\1, 0\ ++** ret ++*/ ++TEST_READ_ZA (read_za128_s8_1_w0_tied, svint8_t, ++ z0 = svread_ver_za128_s8_m (z0, p0, 1, w0), ++ z0 = svread_ver_za128_m (z0, p0, 1, w0)) ++ ++/* ++** read_za128_s8_15_w0_tied: ++** mov (w12-5), w0 ++** mova z0\.q, p0/m, za15v\.q\\1, 0\ ++** ret ++*/ ++TEST_READ_ZA (read_za128_s8_15_w0_tied, svint8_t, ++ z0 = svread_ver_za128_s8_m (z0, p0, 15, w0), ++ z0 = svread_ver_za128_m (z0, p0, 15, w0)) ++ ++/* ++** read_za128_s8_0_w0_untied: ++** ( ++** mov (w12-5), w0 ++** mov z0\.d, z1\.d ++** mova z0\.q, p0/m, za0v\.q\\1, 0\ ++** | ++** mov z0\.d, z1\.d ++** mov (w12-5), w0 ++** mova z0\.q, p0/m, za0v\.q\\2, 0\ ++** | ++** mov (w12-5), w0 ++** mova z1\.q, p0/m, za0v\.q\\3, 0\ ++** mov z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_READ_ZA (read_za128_s8_0_w0_untied, svint8_t, ++ z0 = svread_ver_za128_s8_m (z1, p0, 0, w0), ++ z0 = svread_ver_za128_m (z1, p0, 0, w0)) ++ ++/* ++** read_za128_u8_0_w0_tied: ++** mov (w12-5), w0 ++** mova z0\.q, p0/m, za0v\.q\\1, 0\ ++** ret ++*/ ++TEST_READ_ZA (read_za128_u8_0_w0_tied, svuint8_t, ++ z0 = svread_ver_za128_u8_m (z0, p0, 0, w0), ++ z0 = svread_ver_za128_m (z0, p0, 0, w0)) ++ ++/* ++** read_za128_u8_0_w0_untied: ++** ( ++** mov (w12-5), w0 ++** mov z0\.d, z1\.d ++** mova z0\.q, p0/m, za0v\.q\\1, 0\ ++** | ++** mov z0\.d, z1\.d ++** mov (w12-5), w0 ++** mova z0\.q, p0/m, za0v\.q\\2, 0\ ++** | ++** mov (w12-5), w0 ++** mova z1\.q, p0/m, za0v\.q\\3, 0\ ++** mov z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_READ_ZA (read_za128_u8_0_w0_untied, svuint8_t, ++ z0 = svread_ver_za128_u8_m (z1, p0, 0, w0), ++ z0 = svread_ver_za128_m (z1, p0, 0, w0)) ++ ++/* ++** read_za128_s16_0_w0_tied: ++** mov (w12-5), w0 ++** mova z0\.q, p0/m, za0v\.q\\1, 0\ ++** ret ++*/ ++TEST_READ_ZA (read_za128_s16_0_w0_tied, svint16_t, ++ z0 = svread_ver_za128_s16_m (z0, p0, 0, w0), ++ z0 = svread_ver_za128_m (z0, p0, 0, w0)) ++ ++/* ++** read_za128_s16_0_w0_untied: ++** ( ++** mov (w12-5), w0 ++** mov z0\.d, z1\.d ++** mova z0\.q, p0/m, za0v\.q\\1, 0\ ++** | ++** mov z0\.d, z1\.d ++** mov (w12-5), w0 ++** mova z0\.q, p0/m, za0v\.q\\2, 0\ ++** | ++** mov (w12-5), w0 ++** mova z1\.q, p0/m, za0v\.q\\3, 0\ ++** mov z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_READ_ZA (read_za128_s16_0_w0_untied, svint16_t, ++ z0 = svread_ver_za128_s16_m (z1, p0, 0, w0), ++ z0 = svread_ver_za128_m (z1, p0, 0, w0)) ++ ++/* ++** read_za128_u16_0_w0_tied: ++** mov (w12-5), w0 ++** mova z0\.q, p0/m, za0v\.q\\1, 0\ ++** ret ++*/ ++TEST_READ_ZA (read_za128_u16_0_w0_tied, svuint16_t, ++ z0 = svread_ver_za128_u16_m (z0, p0, 0, w0), ++ z0 = svread_ver_za128_m (z0, p0, 0, w0)) ++ ++/* ++** read_za128_u16_0_w0_untied: ++** ( ++** mov (w12-5), w0 ++** mov z0\.d, z1\.d ++** mova z0\.q, p0/m, za0v\.q\\1, 0\ ++** | ++** mov z0\.d, z1\.d ++** mov (w12-5), w0 ++** mova z0\.q, p0/m, za0v\.q\\2, 0\ ++** | ++** mov (w12-5), w0 ++** mova z1\.q, p0/m, za0v\.q\\3, 0\ ++** mov z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_READ_ZA (read_za128_u16_0_w0_untied, svuint16_t, ++ z0 = svread_ver_za128_u16_m (z1, p0, 0, w0), ++ z0 = svread_ver_za128_m (z1, p0, 0, w0)) ++ ++/* ++** read_za128_f16_0_w0_tied: ++** mov (w12-5), w0 ++** mova z0\.q, p0/m, za0v\.q\\1, 0\ ++** ret ++*/ ++TEST_READ_ZA (read_za128_f16_0_w0_tied, svfloat16_t, ++ z0 = svread_ver_za128_f16_m (z0, p0, 0, w0), ++ z0 = svread_ver_za128_m (z0, p0, 0, w0)) ++ ++/* ++** read_za128_f16_0_w0_untied: ++** ( ++** mov (w12-5), w0 ++** mov z0\.d, z1\.d ++** mova z0\.q, p0/m, za0v\.q\\1, 0\ ++** | ++** mov z0\.d, z1\.d ++** mov (w12-5), w0 ++** mova z0\.q, p0/m, za0v\.q\\2, 0\ ++** | ++** mov (w12-5), w0 ++** mova z1\.q, p0/m, za0v\.q\\3, 0\ ++** mov z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_READ_ZA (read_za128_f16_0_w0_untied, svfloat16_t, ++ z0 = svread_ver_za128_f16_m (z1, p0, 0, w0), ++ z0 = svread_ver_za128_m (z1, p0, 0, w0)) ++ ++/* ++** read_za128_bf16_0_w0_tied: ++** mov (w12-5), w0 ++** mova z0\.q, p0/m, za0v\.q\\1, 0\ ++** ret ++*/ ++TEST_READ_ZA (read_za128_bf16_0_w0_tied, svbfloat16_t, ++ z0 = svread_ver_za128_bf16_m (z0, p0, 0, w0), ++ z0 = svread_ver_za128_m (z0, p0, 0, w0)) ++ ++/* ++** read_za128_bf16_0_w0_untied: ++** ( ++** mov (w12-5), w0 ++** mov z0\.d, z1\.d ++** mova z0\.q, p0/m, za0v\.q\\1, 0\ ++** | ++** mov z0\.d, z1\.d ++** mov (w12-5), w0 ++** mova z0\.q, p0/m, za0v\.q\\2, 0\ ++** | ++** mov (w12-5), w0 ++** mova z1\.q, p0/m, za0v\.q\\3, 0\ ++** mov z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_READ_ZA (read_za128_bf16_0_w0_untied, svbfloat16_t, ++ z0 = svread_ver_za128_bf16_m (z1, p0, 0, w0), ++ z0 = svread_ver_za128_m (z1, p0, 0, w0)) ++ ++/* ++** read_za128_s32_0_w0_tied: ++** mov (w12-5), w0 ++** mova z0\.q, p0/m, za0v\.q\\1, 0\ ++** ret ++*/ ++TEST_READ_ZA (read_za128_s32_0_w0_tied, svint32_t, ++ z0 = svread_ver_za128_s32_m (z0, p0, 0, w0), ++ z0 = svread_ver_za128_m (z0, p0, 0, w0)) ++ ++/* ++** read_za128_s32_0_w0_untied: ++** ( ++** mov (w12-5), w0 ++** mov z0\.d, z1\.d ++** mova z0\.q, p0/m, za0v\.q\\1, 0\ ++** | ++** mov z0\.d, z1\.d ++** mov (w12-5), w0 ++** mova z0\.q, p0/m, za0v\.q\\2, 0\ ++** | ++** mov (w12-5), w0 ++** mova z1\.q, p0/m, za0v\.q\\3, 0\ ++** mov z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_READ_ZA (read_za128_s32_0_w0_untied, svint32_t, ++ z0 = svread_ver_za128_s32_m (z1, p0, 0, w0), ++ z0 = svread_ver_za128_m (z1, p0, 0, w0)) ++ ++/* ++** read_za128_u32_0_w0_tied: ++** mov (w12-5), w0 ++** mova z0\.q, p0/m, za0v\.q\\1, 0\ ++** ret ++*/ ++TEST_READ_ZA (read_za128_u32_0_w0_tied, svuint32_t, ++ z0 = svread_ver_za128_u32_m (z0, p0, 0, w0), ++ z0 = svread_ver_za128_m (z0, p0, 0, w0)) ++ ++/* ++** read_za128_u32_0_w0_untied: ++** ( ++** mov (w12-5), w0 ++** mov z0\.d, z1\.d ++** mova z0\.q, p0/m, za0v\.q\\1, 0\ ++** | ++** mov z0\.d, z1\.d ++** mov (w12-5), w0 ++** mova z0\.q, p0/m, za0v\.q\\2, 0\ ++** | ++** mov (w12-5), w0 ++** mova z1\.q, p0/m, za0v\.q\\3, 0\ ++** mov z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_READ_ZA (read_za128_u32_0_w0_untied, svuint32_t, ++ z0 = svread_ver_za128_u32_m (z1, p0, 0, w0), ++ z0 = svread_ver_za128_m (z1, p0, 0, w0)) ++ ++/* ++** read_za128_f32_0_w0_tied: ++** mov (w12-5), w0 ++** mova z0\.q, p0/m, za0v\.q\\1, 0\ ++** ret ++*/ ++TEST_READ_ZA (read_za128_f32_0_w0_tied, svfloat32_t, ++ z0 = svread_ver_za128_f32_m (z0, p0, 0, w0), ++ z0 = svread_ver_za128_m (z0, p0, 0, w0)) ++ ++/* ++** read_za128_f32_0_w0_untied: ++** ( ++** mov (w12-5), w0 ++** mov z0\.d, z1\.d ++** mova z0\.q, p0/m, za0v\.q\\1, 0\ ++** | ++** mov z0\.d, z1\.d ++** mov (w12-5), w0 ++** mova z0\.q, p0/m, za0v\.q\\2, 0\ ++** | ++** mov (w12-5), w0 ++** mova z1\.q, p0/m, za0v\.q\\3, 0\ ++** mov z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_READ_ZA (read_za128_f32_0_w0_untied, svfloat32_t, ++ z0 = svread_ver_za128_f32_m (z1, p0, 0, w0), ++ z0 = svread_ver_za128_m (z1, p0, 0, w0)) ++ ++/* ++** read_za128_s64_0_w0_tied: ++** mov (w12-5), w0 ++** mova z0\.q, p0/m, za0v\.q\\1, 0\ ++** ret ++*/ ++TEST_READ_ZA (read_za128_s64_0_w0_tied, svint64_t, ++ z0 = svread_ver_za128_s64_m (z0, p0, 0, w0), ++ z0 = svread_ver_za128_m (z0, p0, 0, w0)) ++ ++/* ++** read_za128_s64_0_w0_untied: ++** ( ++** mov (w12-5), w0 ++** mov z0\.d, z1\.d ++** mova z0\.q, p0/m, za0v\.q\\1, 0\ ++** | ++** mov z0\.d, z1\.d ++** mov (w12-5), w0 ++** mova z0\.q, p0/m, za0v\.q\\2, 0\ ++** | ++** mov (w12-5), w0 ++** mova z1\.q, p0/m, za0v\.q\\3, 0\ ++** mov z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_READ_ZA (read_za128_s64_0_w0_untied, svint64_t, ++ z0 = svread_ver_za128_s64_m (z1, p0, 0, w0), ++ z0 = svread_ver_za128_m (z1, p0, 0, w0)) ++ ++/* ++** read_za128_u64_0_w0_tied: ++** mov (w12-5), w0 ++** mova z0\.q, p0/m, za0v\.q\\1, 0\ ++** ret ++*/ ++TEST_READ_ZA (read_za128_u64_0_w0_tied, svuint64_t, ++ z0 = svread_ver_za128_u64_m (z0, p0, 0, w0), ++ z0 = svread_ver_za128_m (z0, p0, 0, w0)) ++ ++/* ++** read_za128_u64_0_w0_untied: ++** ( ++** mov (w12-5), w0 ++** mov z0\.d, z1\.d ++** mova z0\.q, p0/m, za0v\.q\\1, 0\ ++** | ++** mov z0\.d, z1\.d ++** mov (w12-5), w0 ++** mova z0\.q, p0/m, za0v\.q\\2, 0\ ++** | ++** mov (w12-5), w0 ++** mova z1\.q, p0/m, za0v\.q\\3, 0\ ++** mov z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_READ_ZA (read_za128_u64_0_w0_untied, svuint64_t, ++ z0 = svread_ver_za128_u64_m (z1, p0, 0, w0), ++ z0 = svread_ver_za128_m (z1, p0, 0, w0)) ++ ++/* ++** read_za128_f64_0_w0_tied: ++** mov (w12-5), w0 ++** mova z0\.q, p0/m, za0v\.q\\1, 0\ ++** ret ++*/ ++TEST_READ_ZA (read_za128_f64_0_w0_tied, svfloat64_t, ++ z0 = svread_ver_za128_f64_m (z0, p0, 0, w0), ++ z0 = svread_ver_za128_m (z0, p0, 0, w0)) ++ ++/* ++** read_za128_f64_0_w0_untied: ++** ( ++** mov (w12-5), w0 ++** mov z0\.d, z1\.d ++** mova z0\.q, p0/m, za0v\.q\\1, 0\ ++** | ++** mov z0\.d, z1\.d ++** mov (w12-5), w0 ++** mova z0\.q, p0/m, za0v\.q\\2, 0\ ++** | ++** mov (w12-5), w0 ++** mova z1\.q, p0/m, za0v\.q\\3, 0\ ++** mov z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_READ_ZA (read_za128_f64_0_w0_untied, svfloat64_t, ++ z0 = svread_ver_za128_f64_m (z1, p0, 0, w0), ++ z0 = svread_ver_za128_m (z1, p0, 0, w0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/read_ver_za16.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/read_ver_za16.c +new file mode 100644 +index 000000000..d0353dce6 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/read_ver_za16.c +@@ -0,0 +1,207 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sme_acle.h" ++ ++/* ++** read_za16_s16_0_0_tied: ++** mov (w12-5), (?:wzr|#?0) ++** mova z0\.h, p0/m, za0v\.h\\1, 0\ ++** ret ++*/ ++TEST_READ_ZA (read_za16_s16_0_0_tied, svint16_t, ++ z0 = svread_ver_za16_s16_m (z0, p0, 0, 0), ++ z0 = svread_ver_za16_m (z0, p0, 0, 0)) ++ ++/* ++** read_za16_s16_0_1_tied: ++** mov (w12-5), #?1 ++** mova z0\.h, p0/m, za0v\.h\\1, 0\ ++** ret ++*/ ++TEST_READ_ZA (read_za16_s16_0_1_tied, svint16_t, ++ z0 = svread_ver_za16_s16_m (z0, p0, 0, 1), ++ z0 = svread_ver_za16_m (z0, p0, 0, 1)) ++ ++/* ++** read_za16_s16_0_w0_tied: ++** mov (w12-5), w0 ++** mova z0\.h, p0/m, za0v\.h\\1, 0\ ++** ret ++*/ ++TEST_READ_ZA (read_za16_s16_0_w0_tied, svint16_t, ++ z0 = svread_ver_za16_s16_m (z0, p0, 0, w0), ++ z0 = svread_ver_za16_m (z0, p0, 0, w0)) ++ ++/* ++** read_za16_s16_0_w0p1_tied: ++** mov (w12-5), w0 ++** mova z0\.h, p0/m, za0v\.h\\1, 1\ ++** ret ++*/ ++TEST_READ_ZA (read_za16_s16_0_w0p1_tied, svint16_t, ++ z0 = svread_ver_za16_s16_m (z0, p0, 0, w0 + 1), ++ z0 = svread_ver_za16_m (z0, p0, 0, w0 + 1)) ++ ++/* ++** read_za16_s16_0_w0p7_tied: ++** mov (w12-5), w0 ++** mova z0\.h, p0/m, za0v\.h\\1, 7\ ++** ret ++*/ ++TEST_READ_ZA (read_za16_s16_0_w0p7_tied, svint16_t, ++ z0 = svread_ver_za16_s16_m (z0, p0, 0, w0 + 7), ++ z0 = svread_ver_za16_m (z0, p0, 0, w0 + 7)) ++ ++/* ++** read_za16_s16_0_w0p8_tied: ++** add (w12-5), w0, #?8 ++** mova z0\.h, p0/m, za0v\.h\\1, 0\ ++** ret ++*/ ++TEST_READ_ZA (read_za16_s16_0_w0p8_tied, svint16_t, ++ z0 = svread_ver_za16_s16_m (z0, p0, 0, w0 + 8), ++ z0 = svread_ver_za16_m (z0, p0, 0, w0 + 8)) ++ ++/* ++** read_za16_s16_0_w0m1_tied: ++** sub (w12-5), w0, #?1 ++** mova z0\.h, p0/m, za0v\.h\\1, 0\ ++** ret ++*/ ++TEST_READ_ZA (read_za16_s16_0_w0m1_tied, svint16_t, ++ z0 = svread_ver_za16_s16_m (z0, p0, 0, w0 - 1), ++ z0 = svread_ver_za16_m (z0, p0, 0, w0 - 1)) ++ ++/* ++** read_za16_s16_1_w0_tied: ++** mov (w12-5), w0 ++** mova z0\.h, p0/m, za1v\.h\\1, 0\ ++** ret ++*/ ++TEST_READ_ZA (read_za16_s16_1_w0_tied, svint16_t, ++ z0 = svread_ver_za16_s16_m (z0, p0, 1, w0), ++ z0 = svread_ver_za16_m (z0, p0, 1, w0)) ++ ++/* ++** read_za16_s16_1_w0p7_tied: ++** mov (w12-5), w0 ++** mova z0\.h, p0/m, za1v\.h\\1, 7\ ++** ret ++*/ ++TEST_READ_ZA (read_za16_s16_1_w0p7_tied, svint16_t, ++ z0 = svread_ver_za16_s16_m (z0, p0, 1, w0 + 7), ++ z0 = svread_ver_za16_m (z0, p0, 1, w0 + 7)) ++ ++/* ++** read_za16_s16_0_w0_untied: ++** ( ++** mov (w12-5), w0 ++** mov z0\.d, z1\.d ++** mova z0\.h, p0/m, za0v\.h\\1, 0\ ++** | ++** mov z0\.d, z1\.d ++** mov (w12-5), w0 ++** mova z0\.h, p0/m, za0v\.h\\2, 0\ ++** | ++** mov (w12-5), w0 ++** mova z1\.h, p0/m, za0v\.h\\3, 0\ ++** mov z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_READ_ZA (read_za16_s16_0_w0_untied, svint16_t, ++ z0 = svread_ver_za16_s16_m (z1, p0, 0, w0), ++ z0 = svread_ver_za16_m (z1, p0, 0, w0)) ++ ++/* ++** read_za16_u16_0_w0_tied: ++** mov (w12-5), w0 ++** mova z0\.h, p0/m, za0v\.h\\1, 0\ ++** ret ++*/ ++TEST_READ_ZA (read_za16_u16_0_w0_tied, svuint16_t, ++ z0 = svread_ver_za16_u16_m (z0, p0, 0, w0), ++ z0 = svread_ver_za16_m (z0, p0, 0, w0)) ++ ++/* ++** read_za16_u16_0_w0_untied: ++** ( ++** mov (w12-5), w0 ++** mov z0\.d, z1\.d ++** mova z0\.h, p0/m, za0v\.h\\1, 0\ ++** | ++** mov z0\.d, z1\.d ++** mov (w12-5), w0 ++** mova z0\.h, p0/m, za0v\.h\\2, 0\ ++** | ++** mov (w12-5), w0 ++** mova z1\.h, p0/m, za0v\.h\\3, 0\ ++** mov z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_READ_ZA (read_za16_u16_0_w0_untied, svuint16_t, ++ z0 = svread_ver_za16_u16_m (z1, p0, 0, w0), ++ z0 = svread_ver_za16_m (z1, p0, 0, w0)) ++ ++/* ++** read_za16_f16_0_w0_tied: ++** mov (w12-5), w0 ++** mova z0\.h, p0/m, za0v\.h\\1, 0\ ++** ret ++*/ ++TEST_READ_ZA (read_za16_f16_0_w0_tied, svfloat16_t, ++ z0 = svread_ver_za16_f16_m (z0, p0, 0, w0), ++ z0 = svread_ver_za16_m (z0, p0, 0, w0)) ++ ++/* ++** read_za16_f16_0_w0_untied: ++** ( ++** mov (w12-5), w0 ++** mov z0\.d, z1\.d ++** mova z0\.h, p0/m, za0v\.h\\1, 0\ ++** | ++** mov z0\.d, z1\.d ++** mov (w12-5), w0 ++** mova z0\.h, p0/m, za0v\.h\\2, 0\ ++** | ++** mov (w12-5), w0 ++** mova z1\.h, p0/m, za0v\.h\\3, 0\ ++** mov z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_READ_ZA (read_za16_f16_0_w0_untied, svfloat16_t, ++ z0 = svread_ver_za16_f16_m (z1, p0, 0, w0), ++ z0 = svread_ver_za16_m (z1, p0, 0, w0)) ++ ++/* ++** read_za16_bf16_0_w0_tied: ++** mov (w12-5), w0 ++** mova z0\.h, p0/m, za0v\.h\\1, 0\ ++** ret ++*/ ++TEST_READ_ZA (read_za16_bf16_0_w0_tied, svbfloat16_t, ++ z0 = svread_ver_za16_bf16_m (z0, p0, 0, w0), ++ z0 = svread_ver_za16_m (z0, p0, 0, w0)) ++ ++/* ++** read_za16_bf16_0_w0_untied: ++** ( ++** mov (w12-5), w0 ++** mov z0\.d, z1\.d ++** mova z0\.h, p0/m, za0v\.h\\1, 0\ ++** | ++** mov z0\.d, z1\.d ++** mov (w12-5), w0 ++** mova z0\.h, p0/m, za0v\.h\\2, 0\ ++** | ++** mov (w12-5), w0 ++** mova z1\.h, p0/m, za0v\.h\\3, 0\ ++** mov z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_READ_ZA (read_za16_bf16_0_w0_untied, svbfloat16_t, ++ z0 = svread_ver_za16_bf16_m (z1, p0, 0, w0), ++ z0 = svread_ver_za16_m (z1, p0, 0, w0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/read_ver_za32.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/read_ver_za32.c +new file mode 100644 +index 000000000..362e818ee +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/read_ver_za32.c +@@ -0,0 +1,196 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sme_acle.h" ++ ++/* ++** read_za32_s32_0_0_tied: ++** mov (w12-5), (?:wzr|#?0) ++** mova z0\.s, p0/m, za0v\.s\\1, 0\ ++** ret ++*/ ++TEST_READ_ZA (read_za32_s32_0_0_tied, svint32_t, ++ z0 = svread_ver_za32_s32_m (z0, p0, 0, 0), ++ z0 = svread_ver_za32_m (z0, p0, 0, 0)) ++ ++/* ++** read_za32_s32_0_1_tied: ++** mov (w12-5), #?1 ++** mova z0\.s, p0/m, za0v\.s\\1, 0\ ++** ret ++*/ ++TEST_READ_ZA (read_za32_s32_0_1_tied, svint32_t, ++ z0 = svread_ver_za32_s32_m (z0, p0, 0, 1), ++ z0 = svread_ver_za32_m (z0, p0, 0, 1)) ++ ++/* ++** read_za32_s32_0_w0_tied: ++** mov (w12-5), w0 ++** mova z0\.s, p0/m, za0v\.s\\1, 0\ ++** ret ++*/ ++TEST_READ_ZA (read_za32_s32_0_w0_tied, svint32_t, ++ z0 = svread_ver_za32_s32_m (z0, p0, 0, w0), ++ z0 = svread_ver_za32_m (z0, p0, 0, w0)) ++ ++/* ++** read_za32_s32_0_w0p1_tied: ++** mov (w12-5), w0 ++** mova z0\.s, p0/m, za0v\.s\\1, 1\ ++** ret ++*/ ++TEST_READ_ZA (read_za32_s32_0_w0p1_tied, svint32_t, ++ z0 = svread_ver_za32_s32_m (z0, p0, 0, w0 + 1), ++ z0 = svread_ver_za32_m (z0, p0, 0, w0 + 1)) ++ ++/* ++** read_za32_s32_0_w0p3_tied: ++** mov (w12-5), w0 ++** mova z0\.s, p0/m, za0v\.s\\1, 3\ ++** ret ++*/ ++TEST_READ_ZA (read_za32_s32_0_w0p3_tied, svint32_t, ++ z0 = svread_ver_za32_s32_m (z0, p0, 0, w0 + 3), ++ z0 = svread_ver_za32_m (z0, p0, 0, w0 + 3)) ++ ++/* ++** read_za32_s32_0_w0p4_tied: ++** add (w12-5), w0, #?4 ++** mova z0\.s, p0/m, za0v\.s\\1, 0\ ++** ret ++*/ ++TEST_READ_ZA (read_za32_s32_0_w0p4_tied, svint32_t, ++ z0 = svread_ver_za32_s32_m (z0, p0, 0, w0 + 4), ++ z0 = svread_ver_za32_m (z0, p0, 0, w0 + 4)) ++ ++/* ++** read_za32_s32_0_w0m1_tied: ++** sub (w12-5), w0, #?1 ++** mova z0\.s, p0/m, za0v\.s\\1, 0\ ++** ret ++*/ ++TEST_READ_ZA (read_za32_s32_0_w0m1_tied, svint32_t, ++ z0 = svread_ver_za32_s32_m (z0, p0, 0, w0 - 1), ++ z0 = svread_ver_za32_m (z0, p0, 0, w0 - 1)) ++ ++/* ++** read_za32_s32_1_w0_tied: ++** mov (w12-5), w0 ++** mova z0\.s, p0/m, za1v\.s\\1, 0\ ++** ret ++*/ ++TEST_READ_ZA (read_za32_s32_1_w0_tied, svint32_t, ++ z0 = svread_ver_za32_s32_m (z0, p0, 1, w0), ++ z0 = svread_ver_za32_m (z0, p0, 1, w0)) ++ ++/* ++** read_za32_s32_1_w0p3_tied: ++** mov (w12-5), w0 ++** mova z0\.s, p0/m, za1v\.s\\1, 3\ ++** ret ++*/ ++TEST_READ_ZA (read_za32_s32_1_w0p3_tied, svint32_t, ++ z0 = svread_ver_za32_s32_m (z0, p0, 1, w0 + 3), ++ z0 = svread_ver_za32_m (z0, p0, 1, w0 + 3)) ++ ++/* ++** read_za32_s32_3_w0_tied: ++** mov (w12-5), w0 ++** mova z0\.s, p0/m, za3v\.s\\1, 0\ ++** ret ++*/ ++TEST_READ_ZA (read_za32_s32_3_w0_tied, svint32_t, ++ z0 = svread_ver_za32_s32_m (z0, p0, 3, w0), ++ z0 = svread_ver_za32_m (z0, p0, 3, w0)) ++ ++/* ++** read_za32_s32_3_w0p3_tied: ++** mov (w12-5), w0 ++** mova z0\.s, p0/m, za3v\.s\\1, 3\ ++** ret ++*/ ++TEST_READ_ZA (read_za32_s32_3_w0p3_tied, svint32_t, ++ z0 = svread_ver_za32_s32_m (z0, p0, 3, w0 + 3), ++ z0 = svread_ver_za32_m (z0, p0, 3, w0 + 3)) ++ ++/* ++** read_za32_s32_0_w0_untied: ++** ( ++** mov (w12-5), w0 ++** mov z0\.d, z1\.d ++** mova z0\.s, p0/m, za0v\.s\\1, 0\ ++** | ++** mov z0\.d, z1\.d ++** mov (w12-5), w0 ++** mova z0\.s, p0/m, za0v\.s\\2, 0\ ++** | ++** mov (w12-5), w0 ++** mova z1\.s, p0/m, za0v\.s\\3, 0\ ++** mov z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_READ_ZA (read_za32_s32_0_w0_untied, svint32_t, ++ z0 = svread_ver_za32_s32_m (z1, p0, 0, w0), ++ z0 = svread_ver_za32_m (z1, p0, 0, w0)) ++ ++/* ++** read_za32_u32_0_w0_tied: ++** mov (w12-5), w0 ++** mova z0\.s, p0/m, za0v\.s\\1, 0\ ++** ret ++*/ ++TEST_READ_ZA (read_za32_u32_0_w0_tied, svuint32_t, ++ z0 = svread_ver_za32_u32_m (z0, p0, 0, w0), ++ z0 = svread_ver_za32_m (z0, p0, 0, w0)) ++ ++/* ++** read_za32_u32_0_w0_untied: ++** ( ++** mov (w12-5), w0 ++** mov z0\.d, z1\.d ++** mova z0\.s, p0/m, za0v\.s\\1, 0\ ++** | ++** mov z0\.d, z1\.d ++** mov (w12-5), w0 ++** mova z0\.s, p0/m, za0v\.s\\2, 0\ ++** | ++** mov (w12-5), w0 ++** mova z1\.s, p0/m, za0v\.s\\3, 0\ ++** mov z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_READ_ZA (read_za32_u32_0_w0_untied, svuint32_t, ++ z0 = svread_ver_za32_u32_m (z1, p0, 0, w0), ++ z0 = svread_ver_za32_m (z1, p0, 0, w0)) ++ ++/* ++** read_za32_f32_0_w0_tied: ++** mov (w12-5), w0 ++** mova z0\.s, p0/m, za0v\.s\\1, 0\ ++** ret ++*/ ++TEST_READ_ZA (read_za32_f32_0_w0_tied, svfloat32_t, ++ z0 = svread_ver_za32_f32_m (z0, p0, 0, w0), ++ z0 = svread_ver_za32_m (z0, p0, 0, w0)) ++ ++/* ++** read_za32_f32_0_w0_untied: ++** ( ++** mov (w12-5), w0 ++** mov z0\.d, z1\.d ++** mova z0\.s, p0/m, za0v\.s\\1, 0\ ++** | ++** mov z0\.d, z1\.d ++** mov (w12-5), w0 ++** mova z0\.s, p0/m, za0v\.s\\2, 0\ ++** | ++** mov (w12-5), w0 ++** mova z1\.s, p0/m, za0v\.s\\3, 0\ ++** mov z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_READ_ZA (read_za32_f32_0_w0_untied, svfloat32_t, ++ z0 = svread_ver_za32_f32_m (z1, p0, 0, w0), ++ z0 = svread_ver_za32_m (z1, p0, 0, w0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/read_ver_za64.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/read_ver_za64.c +new file mode 100644 +index 000000000..dba3c6ffa +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/read_ver_za64.c +@@ -0,0 +1,186 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sme_acle.h" ++ ++/* ++** read_za64_s64_0_0_tied: ++** mov (w12-5), (?:wzr|#?0) ++** mova z0\.d, p0/m, za0v\.d\\1, 0\ ++** ret ++*/ ++TEST_READ_ZA (read_za64_s64_0_0_tied, svint64_t, ++ z0 = svread_ver_za64_s64_m (z0, p0, 0, 0), ++ z0 = svread_ver_za64_m (z0, p0, 0, 0)) ++ ++/* ++** read_za64_s64_0_1_tied: ++** mov (w12-5), #?1 ++** mova z0\.d, p0/m, za0v\.d\\1, 0\ ++** ret ++*/ ++TEST_READ_ZA (read_za64_s64_0_1_tied, svint64_t, ++ z0 = svread_ver_za64_s64_m (z0, p0, 0, 1), ++ z0 = svread_ver_za64_m (z0, p0, 0, 1)) ++ ++/* ++** read_za64_s64_0_w0_tied: ++** mov (w12-5), w0 ++** mova z0\.d, p0/m, za0v\.d\\1, 0\ ++** ret ++*/ ++TEST_READ_ZA (read_za64_s64_0_w0_tied, svint64_t, ++ z0 = svread_ver_za64_s64_m (z0, p0, 0, w0), ++ z0 = svread_ver_za64_m (z0, p0, 0, w0)) ++ ++/* ++** read_za64_s64_0_w0p1_tied: ++** mov (w12-5), w0 ++** mova z0\.d, p0/m, za0v\.d\\1, 1\ ++** ret ++*/ ++TEST_READ_ZA (read_za64_s64_0_w0p1_tied, svint64_t, ++ z0 = svread_ver_za64_s64_m (z0, p0, 0, w0 + 1), ++ z0 = svread_ver_za64_m (z0, p0, 0, w0 + 1)) ++ ++/* ++** read_za64_s64_0_w0p2_tied: ++** add (w12-5), w0, #?2 ++** mova z0\.d, p0/m, za0v\.d\\1, 0\ ++** ret ++*/ ++TEST_READ_ZA (read_za64_s64_0_w0p2_tied, svint64_t, ++ z0 = svread_ver_za64_s64_m (z0, p0, 0, w0 + 2), ++ z0 = svread_ver_za64_m (z0, p0, 0, w0 + 2)) ++ ++/* ++** read_za64_s64_0_w0m1_tied: ++** sub (w12-5), w0, #?1 ++** mova z0\.d, p0/m, za0v\.d\\1, 0\ ++** ret ++*/ ++TEST_READ_ZA (read_za64_s64_0_w0m1_tied, svint64_t, ++ z0 = svread_ver_za64_s64_m (z0, p0, 0, w0 - 1), ++ z0 = svread_ver_za64_m (z0, p0, 0, w0 - 1)) ++ ++/* ++** read_za64_s64_1_w0_tied: ++** mov (w12-5), w0 ++** mova z0\.d, p0/m, za1v\.d\\1, 0\ ++** ret ++*/ ++TEST_READ_ZA (read_za64_s64_1_w0_tied, svint64_t, ++ z0 = svread_ver_za64_s64_m (z0, p0, 1, w0), ++ z0 = svread_ver_za64_m (z0, p0, 1, w0)) ++ ++/* ++** read_za64_s64_1_w0p1_tied: ++** mov (w12-5), w0 ++** mova z0\.d, p0/m, za1v\.d\\1, 1\ ++** ret ++*/ ++TEST_READ_ZA (read_za64_s64_1_w0p1_tied, svint64_t, ++ z0 = svread_ver_za64_s64_m (z0, p0, 1, w0 + 1), ++ z0 = svread_ver_za64_m (z0, p0, 1, w0 + 1)) ++ ++/* ++** read_za64_s64_7_w0_tied: ++** mov (w12-5), w0 ++** mova z0\.d, p0/m, za7v\.d\\1, 0\ ++** ret ++*/ ++TEST_READ_ZA (read_za64_s64_7_w0_tied, svint64_t, ++ z0 = svread_ver_za64_s64_m (z0, p0, 7, w0), ++ z0 = svread_ver_za64_m (z0, p0, 7, w0)) ++ ++/* ++** read_za64_s64_7_w0p1_tied: ++** mov (w12-5), w0 ++** mova z0\.d, p0/m, za7v\.d\\1, 1\ ++** ret ++*/ ++TEST_READ_ZA (read_za64_s64_7_w0p1_tied, svint64_t, ++ z0 = svread_ver_za64_s64_m (z0, p0, 7, w0 + 1), ++ z0 = svread_ver_za64_m (z0, p0, 7, w0 + 1)) ++ ++/* ++** read_za64_s64_0_w0_untied: ++** ( ++** mov (w12-5), w0 ++** mov z0\.d, z1\.d ++** mova z0\.d, p0/m, za0v\.d\\1, 0\ ++** | ++** mov z0\.d, z1\.d ++** mov (w12-5), w0 ++** mova z0\.d, p0/m, za0v\.d\\2, 0\ ++** | ++** mov (w12-5), w0 ++** mova z1\.d, p0/m, za0v\.d\\3, 0\ ++** mov z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_READ_ZA (read_za64_s64_0_w0_untied, svint64_t, ++ z0 = svread_ver_za64_s64_m (z1, p0, 0, w0), ++ z0 = svread_ver_za64_m (z1, p0, 0, w0)) ++ ++/* ++** read_za64_u64_0_w0_tied: ++** mov (w12-5), w0 ++** mova z0\.d, p0/m, za0v\.d\\1, 0\ ++** ret ++*/ ++TEST_READ_ZA (read_za64_u64_0_w0_tied, svuint64_t, ++ z0 = svread_ver_za64_u64_m (z0, p0, 0, w0), ++ z0 = svread_ver_za64_m (z0, p0, 0, w0)) ++ ++/* ++** read_za64_u64_0_w0_untied: ++** ( ++** mov (w12-5), w0 ++** mov z0\.d, z1\.d ++** mova z0\.d, p0/m, za0v\.d\\1, 0\ ++** | ++** mov z0\.d, z1\.d ++** mov (w12-5), w0 ++** mova z0\.d, p0/m, za0v\.d\\2, 0\ ++** | ++** mov (w12-5), w0 ++** mova z1\.d, p0/m, za0v\.d\\3, 0\ ++** mov z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_READ_ZA (read_za64_u64_0_w0_untied, svuint64_t, ++ z0 = svread_ver_za64_u64_m (z1, p0, 0, w0), ++ z0 = svread_ver_za64_m (z1, p0, 0, w0)) ++ ++/* ++** read_za64_f64_0_w0_tied: ++** mov (w12-5), w0 ++** mova z0\.d, p0/m, za0v\.d\\1, 0\ ++** ret ++*/ ++TEST_READ_ZA (read_za64_f64_0_w0_tied, svfloat64_t, ++ z0 = svread_ver_za64_f64_m (z0, p0, 0, w0), ++ z0 = svread_ver_za64_m (z0, p0, 0, w0)) ++ ++/* ++** read_za64_f64_0_w0_untied: ++** ( ++** mov (w12-5), w0 ++** mov z0\.d, z1\.d ++** mova z0\.d, p0/m, za0v\.d\\1, 0\ ++** | ++** mov z0\.d, z1\.d ++** mov (w12-5), w0 ++** mova z0\.d, p0/m, za0v\.d\\2, 0\ ++** | ++** mov (w12-5), w0 ++** mova z1\.d, p0/m, za0v\.d\\3, 0\ ++** mov z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_READ_ZA (read_za64_f64_0_w0_untied, svfloat64_t, ++ z0 = svread_ver_za64_f64_m (z1, p0, 0, w0), ++ z0 = svread_ver_za64_m (z1, p0, 0, w0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/read_ver_za8.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/read_ver_za8.c +new file mode 100644 +index 000000000..87564d1fa +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/read_ver_za8.c +@@ -0,0 +1,125 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sme_acle.h" ++ ++/* ++** read_za8_s8_0_0_tied: ++** mov (w12-5), (?:wzr|#?0) ++** mova z0\.b, p0/m, za0v\.b\\1, 0\ ++** ret ++*/ ++TEST_READ_ZA (read_za8_s8_0_0_tied, svint8_t, ++ z0 = svread_ver_za8_s8_m (z0, p0, 0, 0), ++ z0 = svread_ver_za8_m (z0, p0, 0, 0)) ++ ++/* ++** read_za8_s8_0_1_tied: ++** mov (w12-5), #?1 ++** mova z0\.b, p0/m, za0v\.b\\1, 0\ ++** ret ++*/ ++TEST_READ_ZA (read_za8_s8_0_1_tied, svint8_t, ++ z0 = svread_ver_za8_s8_m (z0, p0, 0, 1), ++ z0 = svread_ver_za8_m (z0, p0, 0, 1)) ++ ++/* ++** read_za8_s8_0_w0_tied: ++** mov (w12-5), w0 ++** mova z0\.b, p0/m, za0v\.b\\1, 0\ ++** ret ++*/ ++TEST_READ_ZA (read_za8_s8_0_w0_tied, svint8_t, ++ z0 = svread_ver_za8_s8_m (z0, p0, 0, w0), ++ z0 = svread_ver_za8_m (z0, p0, 0, w0)) ++ ++/* ++** read_za8_s8_0_w0p1_tied: ++** mov (w12-5), w0 ++** mova z0\.b, p0/m, za0v\.b\\1, 1\ ++** ret ++*/ ++TEST_READ_ZA (read_za8_s8_0_w0p1_tied, svint8_t, ++ z0 = svread_ver_za8_s8_m (z0, p0, 0, w0 + 1), ++ z0 = svread_ver_za8_m (z0, p0, 0, w0 + 1)) ++ ++/* ++** read_za8_s8_0_w0p15_tied: ++** mov (w12-5), w0 ++** mova z0\.b, p0/m, za0v\.b\\1, 15\ ++** ret ++*/ ++TEST_READ_ZA (read_za8_s8_0_w0p15_tied, svint8_t, ++ z0 = svread_ver_za8_s8_m (z0, p0, 0, w0 + 15), ++ z0 = svread_ver_za8_m (z0, p0, 0, w0 + 15)) ++ ++/* ++** read_za8_s8_0_w0p16_tied: ++** add (w12-5), w0, #?16 ++** mova z0\.b, p0/m, za0v\.b\\1, 0\ ++** ret ++*/ ++TEST_READ_ZA (read_za8_s8_0_w0p16_tied, svint8_t, ++ z0 = svread_ver_za8_s8_m (z0, p0, 0, w0 + 16), ++ z0 = svread_ver_za8_m (z0, p0, 0, w0 + 16)) ++ ++/* ++** read_za8_s8_0_w0m1_tied: ++** sub (w12-5), w0, #?1 ++** mova z0\.b, p0/m, za0v\.b\\1, 0\ ++** ret ++*/ ++TEST_READ_ZA (read_za8_s8_0_w0m1_tied, svint8_t, ++ z0 = svread_ver_za8_s8_m (z0, p0, 0, w0 - 1), ++ z0 = svread_ver_za8_m (z0, p0, 0, w0 - 1)) ++ ++/* ++** read_za8_s8_0_w0_untied: ++** ( ++** mov (w12-5), w0 ++** mov z0\.d, z1\.d ++** mova z0\.b, p0/m, za0v\.b\\1, 0\ ++** | ++** mov z0\.d, z1\.d ++** mov (w12-5), w0 ++** mova z0\.b, p0/m, za0v\.b\\2, 0\ ++** | ++** mov (w12-5), w0 ++** mova z1\.b, p0/m, za0v\.b\\3, 0\ ++** mov z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_READ_ZA (read_za8_s8_0_w0_untied, svint8_t, ++ z0 = svread_ver_za8_s8_m (z1, p0, 0, w0), ++ z0 = svread_ver_za8_m (z1, p0, 0, w0)) ++ ++/* ++** read_za8_u8_0_w0_tied: ++** mov (w12-5), w0 ++** mova z0\.b, p0/m, za0v\.b\\1, 0\ ++** ret ++*/ ++TEST_READ_ZA (read_za8_u8_0_w0_tied, svuint8_t, ++ z0 = svread_ver_za8_u8_m (z0, p0, 0, w0), ++ z0 = svread_ver_za8_m (z0, p0, 0, w0)) ++ ++/* ++** read_za8_u8_0_w0_untied: ++** ( ++** mov (w12-5), w0 ++** mov z0\.d, z1\.d ++** mova z0\.b, p0/m, za0v\.b\\1, 0\ ++** | ++** mov z0\.d, z1\.d ++** mov (w12-5), w0 ++** mova z0\.b, p0/m, za0v\.b\\2, 0\ ++** | ++** mov (w12-5), w0 ++** mova z1\.b, p0/m, za0v\.b\\3, 0\ ++** mov z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_READ_ZA (read_za8_u8_0_w0_untied, svuint8_t, ++ z0 = svread_ver_za8_u8_m (z1, p0, 0, w0), ++ z0 = svread_ver_za8_m (z1, p0, 0, w0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/st1_hor_vnum_za128.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/st1_hor_vnum_za128.c +new file mode 100644 +index 000000000..057b6f21e +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/st1_hor_vnum_za128.c +@@ -0,0 +1,77 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sme_acle.h" ++ ++/* ++** st1_vnum_za128_0_0_0: ++** mov (w12-5), (?:wzr|#?0) ++** st1q { za0h\.q\\1, 0\ }, p0, \x1\ ++** ret ++*/ ++TEST_STORE_ZA (st1_vnum_za128_0_0_0, ++ svst1_hor_vnum_za128 (0, 0, p0, x1, 0), ++ svst1_hor_vnum_za128 (0, 0, p0, x1, 0)) ++ ++/* ++** st1_vnum_za128_7_1_0: ++** mov (w12-5), #?1 ++** st1q { za7h\.q\\1, 0\ }, p0, \x1\ ++** ret ++*/ ++TEST_STORE_ZA (st1_vnum_za128_7_1_0, ++ svst1_hor_vnum_za128 (7, 1, p0, x1, 0), ++ svst1_hor_vnum_za128 (7, 1, p0, x1, 0)) ++ ++/* ++** st1_vnum_za128_11_1_5: ++** incb x1, all, mul #5 ++** mov (w12-5), #?6 ++** st1q { za11h\.q\\1, 0\ }, p0, \x1\ ++** ret ++*/ ++TEST_STORE_ZA (st1_vnum_za128_11_1_5, ++ svst1_hor_vnum_za128 (11, 1, p0, x1, 5), ++ svst1_hor_vnum_za128 (11, 1, p0, x1, 5)) ++ ++/* ++** st1_vnum_za128_3_w0_0: ++** mov (w12-5), w0 ++** st1q { za3h\.q\\1, 0\ }, p0, \x1\ ++** ret ++*/ ++TEST_STORE_ZA (st1_vnum_za128_3_w0_0, ++ svst1_hor_vnum_za128 (3, w0, p0, x1, 0), ++ svst1_hor_vnum_za128 (3, w0, p0, x1, 0)) ++ ++/* ++** st1_vnum_za128_5_w0_0: ++** incb x1, all, mul #13 ++** add (w12-5), w0, #?13 ++** st1q { za5h\.q\\1, 0\ }, p0, \x1\ ++** ret ++*/ ++TEST_STORE_ZA (st1_vnum_za128_5_w0_0, ++ svst1_hor_vnum_za128 (5, w0, p0, x1, 13), ++ svst1_hor_vnum_za128 (5, w0, p0, x1, 13)) ++ ++/* ++** st1_vnum_za128_11_w0_0: ++** cntb (x0-9+) ++** madd (x0-9+), (?:\1, x2|x2, \1), x1 ++** add (w12-5), (?:w0, w2|w2, w0) ++** st1q { za11h\.q\\3, 0\ }, p0, \\2\ ++** ret ++*/ ++TEST_STORE_ZA (st1_vnum_za128_11_w0_0, ++ svst1_hor_vnum_za128 (11, w0, p0, x1, x2), ++ svst1_hor_vnum_za128 (11, w0, p0, x1, x2)) ++ ++/* ++** st1_vnum_za128_15_w0p1_0: ++** add (w12-5), w0, #?1 ++** st1q { za15h\.q\\1, 0\ }, p0, \x1\ ++** ret ++*/ ++TEST_STORE_ZA (st1_vnum_za128_15_w0p1_0, ++ svst1_hor_vnum_za128 (15, w0 + 1, p0, x1, 0), ++ svst1_hor_vnum_za128 (15, w0 + 1, p0, x1, 0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/st1_hor_vnum_za16.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/st1_hor_vnum_za16.c +new file mode 100644 +index 000000000..0b57dda0a +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/st1_hor_vnum_za16.c +@@ -0,0 +1,123 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sme_acle.h" ++ ++/* ++** st1_vnum_za16_1_0_1: ++** incb x1 ++** mov (w12-5), (?:wzr|#?0) ++** st1h { za1h\.h\\1, 1\ }, p0, \x1\ ++** ret ++*/ ++TEST_STORE_ZA (st1_vnum_za16_1_0_1, ++ svst1_hor_vnum_za16 (1, 0, p0, x1, 1), ++ svst1_hor_vnum_za16 (1, 0, p0, x1, 1)) ++ ++/* ++** st1_vnum_za16_1_1_1: ++** incb x1 ++** mov (w12-5), #?1 ++** st1h { za1h\.h\\1, 1\ }, p0, \x1\ ++** ret ++*/ ++TEST_STORE_ZA (st1_vnum_za16_1_1_1, ++ svst1_hor_vnum_za16 (1, 1, p0, x1, 1), ++ svst1_hor_vnum_za16 (1, 1, p0, x1, 1)) ++ ++/* ++** st1_vnum_za16_0_0_8: ++** incb x1, all, mul #8 ++** mov (w12-5), #?8 ++** st1h { za0h\.h\\1, 0\ }, p0, \x1\ ++** ret ++*/ ++TEST_STORE_ZA (st1_vnum_za16_0_0_8, ++ svst1_hor_vnum_za16 (0, 0, p0, x1, 8), ++ svst1_hor_vnum_za16 (0, 0, p0, x1, 8)) ++ ++/* ++** st1_vnum_za16_0_1_8: ++** incb x1, all, mul #8 ++** mov (w12-5), #?9 ++** st1h { za0h\.h\\1, 0\ }, p0, \x1\ ++** ret ++*/ ++TEST_STORE_ZA (st1_vnum_za16_0_1_8, ++ svst1_hor_vnum_za16 (0, 1, p0, x1, 8), ++ svst1_hor_vnum_za16 (0, 1, p0, x1, 8)) ++ ++/* ++** st1_vnum_za16_0_w0_0: ++** mov (w12-5), w0 ++** st1h { za0h\.h\\1, 0\ }, p0, \x1\ ++** ret ++*/ ++TEST_STORE_ZA (st1_vnum_za16_0_w0_0, ++ svst1_hor_vnum_za16 (0, w0, p0, x1, 0), ++ svst1_hor_vnum_za16 (0, w0, p0, x1, 0)) ++ ++/* ++** st1_vnum_za16_0_w0_1: ++** incb x1 ++** mov (w12-5), w0 ++** st1h { za0h\.h\\1, 1\ }, p0, \x1\ ++** ret ++*/ ++TEST_STORE_ZA (st1_vnum_za16_0_w0_1, ++ svst1_hor_vnum_za16 (0, w0, p0, x1, 1), ++ svst1_hor_vnum_za16 (0, w0, p0, x1, 1)) ++ ++/* ++** st1_vnum_za16_0_w0_7: ++** incb x1, all, mul #7 ++** mov (w12-5), w0 ++** st1h { za0h\.h\\1, 7\ }, p0, \x1\ ++** ret ++*/ ++TEST_STORE_ZA (st1_vnum_za16_0_w0_7, ++ svst1_hor_vnum_za16 (0, w0, p0, x1, 7), ++ svst1_hor_vnum_za16 (0, w0, p0, x1, 7)) ++ ++/* ++** st1_vnum_za16_1_w0_8: ++** incb x1, all, mul #8 ++** add (w12-5), w0, #?8 ++** st1h { za1h\.h\\1, 0\ }, p0, \x1\ ++** ret ++*/ ++TEST_STORE_ZA (st1_vnum_za16_1_w0_8, ++ svst1_hor_vnum_za16 (1, w0, p0, x1, 8), ++ svst1_hor_vnum_za16 (1, w0, p0, x1, 8)) ++ ++/* ++** st1_vnum_za16_1_w0_13: ++** incb x1, all, mul #13 ++** add (w12-5), w0, #?13 ++** st1h { za1h\.h\\1, 0\ }, p0, \x1\ ++** ret ++*/ ++TEST_STORE_ZA (st1_vnum_za16_1_w0_13, ++ svst1_hor_vnum_za16 (1, w0, p0, x1, 13), ++ svst1_hor_vnum_za16 (1, w0, p0, x1, 13)) ++ ++/* ++** st1_vnum_za16_0_w0_x2: ++** cntb (x0-9+) ++** madd (x0-9+), (?:\1, x2|x2, \1), x1 ++** add (w12-5), (?:w0, w2|w2, w0) ++** st1h { za0h\.h\\3, 0\ }, p0, \\2\ ++** ret ++*/ ++TEST_STORE_ZA (st1_vnum_za16_0_w0_x2, ++ svst1_hor_vnum_za16 (0, w0, p0, x1, x2), ++ svst1_hor_vnum_za16 (0, w0, p0, x1, x2)) ++ ++/* ++** st1_vnum_za16_1_w0p1_0: ++** mov (w12-5), w0 ++** st1h { za1h\.h\\1, 1\ }, p0, \x1\ ++** ret ++*/ ++TEST_STORE_ZA (st1_vnum_za16_1_w0p1_0, ++ svst1_hor_vnum_za16 (1, w0 + 1, p0, x1, 0), ++ svst1_hor_vnum_za16 (1, w0 + 1, p0, x1, 0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/st1_hor_vnum_za32.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/st1_hor_vnum_za32.c +new file mode 100644 +index 000000000..d4381182f +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/st1_hor_vnum_za32.c +@@ -0,0 +1,123 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sme_acle.h" ++ ++/* ++** st1_vnum_za32_3_0_1: ++** incb x1 ++** mov (w12-5), (?:wzr|#?0) ++** st1w { za3h\.s\\1, 1\ }, p0, \x1\ ++** ret ++*/ ++TEST_STORE_ZA (st1_vnum_za32_3_0_1, ++ svst1_hor_vnum_za32 (3, 0, p0, x1, 1), ++ svst1_hor_vnum_za32 (3, 0, p0, x1, 1)) ++ ++/* ++** st1_vnum_za32_2_1_1: ++** incb x1 ++** mov (w12-5), #?1 ++** st1w { za2h\.s\\1, 1\ }, p0, \x1\ ++** ret ++*/ ++TEST_STORE_ZA (st1_vnum_za32_2_1_1, ++ svst1_hor_vnum_za32 (2, 1, p0, x1, 1), ++ svst1_hor_vnum_za32 (2, 1, p0, x1, 1)) ++ ++/* ++** st1_vnum_za32_0_0_4: ++** incb x1, all, mul #4 ++** mov (w12-5), #?4 ++** st1w { za0h\.s\\1, 0\ }, p0, \x1\ ++** ret ++*/ ++TEST_STORE_ZA (st1_vnum_za32_0_0_4, ++ svst1_hor_vnum_za32 (0, 0, p0, x1, 4), ++ svst1_hor_vnum_za32 (0, 0, p0, x1, 4)) ++ ++/* ++** st1_vnum_za32_2_1_4: ++** incb x1, all, mul #4 ++** mov (w12-5), #?5 ++** st1w { za2h\.s\\1, 0\ }, p0, \x1\ ++** ret ++*/ ++TEST_STORE_ZA (st1_vnum_za32_2_1_4, ++ svst1_hor_vnum_za32 (2, 1, p0, x1, 4), ++ svst1_hor_vnum_za32 (2, 1, p0, x1, 4)) ++ ++/* ++** st1_vnum_za32_0_w0_0: ++** mov (w12-5), w0 ++** st1w { za0h\.s\\1, 0\ }, p0, \x1\ ++** ret ++*/ ++TEST_STORE_ZA (st1_vnum_za32_0_w0_0, ++ svst1_hor_vnum_za32 (0, w0, p0, x1, 0), ++ svst1_hor_vnum_za32 (0, w0, p0, x1, 0)) ++ ++/* ++** st1_vnum_za32_0_w0_1: ++** incb x1 ++** mov (w12-5), w0 ++** st1w { za0h\.s\\1, 1\ }, p0, \x1\ ++** ret ++*/ ++TEST_STORE_ZA (st1_vnum_za32_0_w0_1, ++ svst1_hor_vnum_za32 (0, w0, p0, x1, 1), ++ svst1_hor_vnum_za32 (0, w0, p0, x1, 1)) ++ ++/* ++** st1_vnum_za32_0_w0_3: ++** incb x1, all, mul #3 ++** mov (w12-5), w0 ++** st1w { za0h\.s\\1, 3\ }, p0, \x1\ ++** ret ++*/ ++TEST_STORE_ZA (st1_vnum_za32_0_w0_3, ++ svst1_hor_vnum_za32 (0, w0, p0, x1, 3), ++ svst1_hor_vnum_za32 (0, w0, p0, x1, 3)) ++ ++/* ++** st1_vnum_za32_1_w0_4: ++** incb x1, all, mul #4 ++** add (w12-5), w0, #?4 ++** st1w { za1h\.s\\1, 0\ }, p0, \x1\ ++** ret ++*/ ++TEST_STORE_ZA (st1_vnum_za32_1_w0_4, ++ svst1_hor_vnum_za32 (1, w0, p0, x1, 4), ++ svst1_hor_vnum_za32 (1, w0, p0, x1, 4)) ++ ++/* ++** st1_vnum_za32_3_w0_13: ++** incb x1, all, mul #13 ++** add (w12-5), w0, #?13 ++** st1w { za3h\.s\\1, 0\ }, p0, \x1\ ++** ret ++*/ ++TEST_STORE_ZA (st1_vnum_za32_3_w0_13, ++ svst1_hor_vnum_za32 (3, w0, p0, x1, 13), ++ svst1_hor_vnum_za32 (3, w0, p0, x1, 13)) ++ ++/* ++** st1_vnum_za32_0_w0_x2: ++** cntb (x0-9+) ++** madd (x0-9+), (?:\1, x2|x2, \1), x1 ++** add (w12-5), (?:w0, w2|w2, w0) ++** st1w { za0h\.s\\3, 0\ }, p0, \\2\ ++** ret ++*/ ++TEST_STORE_ZA (st1_vnum_za32_0_w0_x2, ++ svst1_hor_vnum_za32 (0, w0, p0, x1, x2), ++ svst1_hor_vnum_za32 (0, w0, p0, x1, x2)) ++ ++/* ++** st1_vnum_za32_1_w0p1_0: ++** mov (w12-5), w0 ++** st1w { za1h\.s\\1, 1\ }, p0, \x1\ ++** ret ++*/ ++TEST_STORE_ZA (st1_vnum_za32_1_w0p1_0, ++ svst1_hor_vnum_za32 (1, w0 + 1, p0, x1, 0), ++ svst1_hor_vnum_za32 (1, w0 + 1, p0, x1, 0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/st1_hor_vnum_za64.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/st1_hor_vnum_za64.c +new file mode 100644 +index 000000000..be6063712 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/st1_hor_vnum_za64.c +@@ -0,0 +1,112 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sme_acle.h" ++ ++/* ++** st1_vnum_za64_3_0_1: ++** incb x1 ++** mov (w12-5), (?:wzr|#?0) ++** st1d { za3h\.d\\1, 1\ }, p0, \x1\ ++** ret ++*/ ++TEST_STORE_ZA (st1_vnum_za64_3_0_1, ++ svst1_hor_vnum_za64 (3, 0, p0, x1, 1), ++ svst1_hor_vnum_za64 (3, 0, p0, x1, 1)) ++ ++/* ++** st1_vnum_za64_7_1_1: ++** incb x1 ++** mov (w12-5), #?1 ++** st1d { za7h\.d\\1, 1\ }, p0, \x1\ ++** ret ++*/ ++TEST_STORE_ZA (st1_vnum_za64_7_1_1, ++ svst1_hor_vnum_za64 (7, 1, p0, x1, 1), ++ svst1_hor_vnum_za64 (7, 1, p0, x1, 1)) ++ ++/* ++** st1_vnum_za64_0_0_2: ++** incb x1, all, mul #2 ++** mov (w12-5), #?2 ++** st1d { za0h\.d\\1, 0\ }, p0, \x1\ ++** ret ++*/ ++TEST_STORE_ZA (st1_vnum_za64_0_0_2, ++ svst1_hor_vnum_za64 (0, 0, p0, x1, 2), ++ svst1_hor_vnum_za64 (0, 0, p0, x1, 2)) ++ ++/* ++** st1_vnum_za64_5_1_2: ++** incb x1, all, mul #2 ++** mov (w12-5), #?3 ++** st1d { za5h\.d\\1, 0\ }, p0, \x1\ ++** ret ++*/ ++TEST_STORE_ZA (st1_vnum_za64_5_1_2, ++ svst1_hor_vnum_za64 (5, 1, p0, x1, 2), ++ svst1_hor_vnum_za64 (5, 1, p0, x1, 2)) ++ ++/* ++** st1_vnum_za64_0_w0_0: ++** mov (w12-5), w0 ++** st1d { za0h\.d\\1, 0\ }, p0, \x1\ ++** ret ++*/ ++TEST_STORE_ZA (st1_vnum_za64_0_w0_0, ++ svst1_hor_vnum_za64 (0, w0, p0, x1, 0), ++ svst1_hor_vnum_za64 (0, w0, p0, x1, 0)) ++ ++/* ++** st1_vnum_za64_0_w0_1: ++** incb x1 ++** mov (w12-5), w0 ++** st1d { za0h\.d\\1, 1\ }, p0, \x1\ ++** ret ++*/ ++TEST_STORE_ZA (st1_vnum_za64_0_w0_1, ++ svst1_hor_vnum_za64 (0, w0, p0, x1, 1), ++ svst1_hor_vnum_za64 (0, w0, p0, x1, 1)) ++ ++/* ++** st1_vnum_za64_6_w0_2: ++** incb x1, all, mul #2 ++** add (w12-5), w0, #?2 ++** st1d { za6h\.d\\1, 0\ }, p0, \x1\ ++** ret ++*/ ++TEST_STORE_ZA (st1_vnum_za64_6_w0_2, ++ svst1_hor_vnum_za64 (6, w0, p0, x1, 2), ++ svst1_hor_vnum_za64 (6, w0, p0, x1, 2)) ++ ++/* ++** st1_vnum_za64_2_w0_13: ++** incb x1, all, mul #13 ++** add (w12-5), w0, #?13 ++** st1d { za2h\.d\\1, 0\ }, p0, \x1\ ++** ret ++*/ ++TEST_STORE_ZA (st1_vnum_za64_2_w0_13, ++ svst1_hor_vnum_za64 (2, w0, p0, x1, 13), ++ svst1_hor_vnum_za64 (2, w0, p0, x1, 13)) ++ ++/* ++** st1_vnum_za64_4_w0_x2: ++** cntb (x0-9+) ++** madd (x0-9+), (?:\1, x2|x2, \1), x1 ++** add (w12-5), (?:w0, w2|w2, w0) ++** st1d { za4h\.d\\3, 0\ }, p0, \\2\ ++** ret ++*/ ++TEST_STORE_ZA (st1_vnum_za64_4_w0_x2, ++ svst1_hor_vnum_za64 (4, w0, p0, x1, x2), ++ svst1_hor_vnum_za64 (4, w0, p0, x1, x2)) ++ ++/* ++** st1_vnum_za64_1_w0p1_0: ++** mov (w12-5), w0 ++** st1d { za1h\.d\\1, 1\ }, p0, \x1\ ++** ret ++*/ ++TEST_STORE_ZA (st1_vnum_za64_1_w0p1_0, ++ svst1_hor_vnum_za64 (1, w0 + 1, p0, x1, 0), ++ svst1_hor_vnum_za64 (1, w0 + 1, p0, x1, 0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/st1_hor_vnum_za8.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/st1_hor_vnum_za8.c +new file mode 100644 +index 000000000..eed41d25e +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/st1_hor_vnum_za8.c +@@ -0,0 +1,112 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sme_acle.h" ++ ++/* ++** st1_vnum_za8_0_0_1: ++** incb x1 ++** mov (w12-5), (?:wzr|#?0) ++** st1b { za0h\.b\\1, 1\ }, p0, \x1\ ++** ret ++*/ ++TEST_STORE_ZA (st1_vnum_za8_0_0_1, ++ svst1_hor_vnum_za8 (0, 0, p0, x1, 1), ++ svst1_hor_vnum_za8 (0, 0, p0, x1, 1)) ++ ++/* ++** st1_vnum_za8_0_1_1: ++** incb x1 ++** mov (w12-5), #?1 ++** st1b { za0h\.b\\1, 1\ }, p0, \x1\ ++** ret ++*/ ++TEST_STORE_ZA (st1_vnum_za8_0_1_1, ++ svst1_hor_vnum_za8 (0, 1, p0, x1, 1), ++ svst1_hor_vnum_za8 (0, 1, p0, x1, 1)) ++ ++/* ++** st1_vnum_za8_0_0_16: ++** incb x1, all, mul #16 ++** mov (w12-5), #?16 ++** st1b { za0h\.b\\1, 0\ }, p0, \x1\ ++** ret ++*/ ++TEST_STORE_ZA (st1_vnum_za8_0_0_16, ++ svst1_hor_vnum_za8 (0, 0, p0, x1, 16), ++ svst1_hor_vnum_za8 (0, 0, p0, x1, 16)) ++ ++/* ++** st1_vnum_za8_0_1_16: ++** incb x1, all, mul #16 ++** mov (w12-5), #?17 ++** st1b { za0h\.b\\1, 0\ }, p0, \x1\ ++** ret ++*/ ++TEST_STORE_ZA (st1_vnum_za8_0_1_16, ++ svst1_hor_vnum_za8 (0, 1, p0, x1, 16), ++ svst1_hor_vnum_za8 (0, 1, p0, x1, 16)) ++ ++/* ++** st1_vnum_za8_0_w0_0: ++** mov (w12-5), w0 ++** st1b { za0h\.b\\1, 0\ }, p0, \x1\ ++** ret ++*/ ++TEST_STORE_ZA (st1_vnum_za8_0_w0_0, ++ svst1_hor_vnum_za8 (0, w0, p0, x1, 0), ++ svst1_hor_vnum_za8 (0, w0, p0, x1, 0)) ++ ++/* ++** st1_vnum_za8_0_w0_1: ++** incb x1 ++** mov (w12-5), w0 ++** st1b { za0h\.b\\1, 1\ }, p0, \x1\ ++** ret ++*/ ++TEST_STORE_ZA (st1_vnum_za8_0_w0_1, ++ svst1_hor_vnum_za8 (0, w0, p0, x1, 1), ++ svst1_hor_vnum_za8 (0, w0, p0, x1, 1)) ++ ++/* ++** st1_vnum_za8_0_w0_15: ++** incb x1, all, mul #15 ++** mov (w12-5), w0 ++** st1b { za0h\.b\\1, 15\ }, p0, \x1\ ++** ret ++*/ ++TEST_STORE_ZA (st1_vnum_za8_0_w0_15, ++ svst1_hor_vnum_za8 (0, w0, p0, x1, 15), ++ svst1_hor_vnum_za8 (0, w0, p0, x1, 15)) ++ ++/* ++** st1_vnum_za8_0_w0_16: ++** incb x1, all, mul #16 ++** add (w12-5), w0, #?16 ++** st1b { za0h\.b\\1, 0\ }, p0, \x1\ ++** ret ++*/ ++TEST_STORE_ZA (st1_vnum_za8_0_w0_16, ++ svst1_hor_vnum_za8 (0, w0, p0, x1, 16), ++ svst1_hor_vnum_za8 (0, w0, p0, x1, 16)) ++ ++/* ++** st1_vnum_za8_0_w0_x2: ++** cntb (x0-9+) ++** mul (x0-9+), (?:\1, x2|x2, \1) ++** add (w12-5), (?:w0, w2|w2, w0) ++** st1b { za0h\.b\\3, 0\ }, p0, \x1, \2\ ++** ret ++*/ ++TEST_STORE_ZA (st1_vnum_za8_0_w0_x2, ++ svst1_hor_vnum_za8 (0, w0, p0, x1, x2), ++ svst1_hor_vnum_za8 (0, w0, p0, x1, x2)) ++ ++/* ++** st1_vnum_za8_0_w0p1_0: ++** mov (w12-5), w0 ++** st1b { za0h\.b\\1, 1\ }, p0, \x1\ ++** ret ++*/ ++TEST_STORE_ZA (st1_vnum_za8_0_w0p1_0, ++ svst1_hor_vnum_za8 (0, w0 + 1, p0, x1, 0), ++ svst1_hor_vnum_za8 (0, w0 + 1, p0, x1, 0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/st1_hor_za128.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/st1_hor_za128.c +new file mode 100644 +index 000000000..5f3d613d5 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/st1_hor_za128.c +@@ -0,0 +1,83 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sme_acle.h" ++ ++/* ++** st1_za128_0_0: ++** mov (w12-5), (?:wzr|#?0) ++** st1q { za0h\.q\\1, 0\ }, p0, \x1\ ++** ret ++*/ ++TEST_STORE_ZA (st1_za128_0_0, ++ svst1_hor_za128 (0, 0, p0, x1), ++ svst1_hor_za128 (0, 0, p0, x1)) ++ ++/* ++** st1_za128_0_1: ++** mov (w12-5), #?1 ++** st1q { za0h\.q\\1, 0\ }, p0, \x1\ ++** ret ++*/ ++TEST_STORE_ZA (st1_za128_0_1, ++ svst1_hor_za128 (0, 1, p0, x1), ++ svst1_hor_za128 (0, 1, p0, x1)) ++ ++/* ++** st1_za128_0_w0: ++** mov (w12-5), w0 ++** st1q { za0h\.q\\1, 0\ }, p0, \x1\ ++** ret ++*/ ++TEST_STORE_ZA (st1_za128_0_w0, ++ svst1_hor_za128 (0, w0, p0, x1), ++ svst1_hor_za128 (0, w0, p0, x1)) ++ ++/* ++** st1_za128_0_w0_p1: ++** add (w12-5), w0, #?1 ++** st1q { za0h\.q\\1, 0\ }, p0, \x1\ ++** ret ++*/ ++TEST_STORE_ZA (st1_za128_0_w0_p1, ++ svst1_hor_za128 (0, w0 + 1, p0, x1), ++ svst1_hor_za128 (0, w0 + 1, p0, x1)) ++ ++/* ++** st1_za128_7_w0: ++** mov (w12-5), w0 ++** st1q { za7h\.q\\1, 0\ }, p0, \x1\ ++** ret ++*/ ++TEST_STORE_ZA (st1_za128_7_w0, ++ svst1_hor_za128 (7, w0, p0, x1), ++ svst1_hor_za128 (7, w0, p0, x1)) ++ ++/* ++** st1_za128_13_w0: ++** mov (w12-5), w0 ++** st1q { za13h\.q\\1, 0\ }, p0, \x1\ ++** ret ++*/ ++TEST_STORE_ZA (st1_za128_13_w0, ++ svst1_hor_za128 (13, w0, p0, x1), ++ svst1_hor_za128 (13, w0, p0, x1)) ++ ++/* ++** st1_za128_15_w0: ++** mov (w12-5), w0 ++** st1q { za15h\.q\\1, 0\ }, p0, \x1\ ++** ret ++*/ ++TEST_STORE_ZA (st1_za128_15_w0, ++ svst1_hor_za128 (15, w0, p0, x1), ++ svst1_hor_za128 (15, w0, p0, x1)) ++ ++/* ++** st1_za128_9_w0_index: ++** mov (w12-5), w0 ++** st1q { za9h\.q\\1, 0\ }, p0, \x1, x2, lsl #?4\ ++** ret ++*/ ++TEST_STORE_ZA (st1_za128_9_w0_index, ++ svst1_hor_za128 (9, w0, p0, x1 + x2 * 16), ++ svst1_hor_za128 (9, w0, p0, x1 + x2 * 16)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/st1_hor_za16.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/st1_hor_za16.c +new file mode 100644 +index 000000000..206306b23 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/st1_hor_za16.c +@@ -0,0 +1,126 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sme_acle.h" ++ ++/* ++** st1_za16_0_0: ++** mov (w12-5), (?:wzr|#?0) ++** st1h { za0h\.h\\1, 0\ }, p0, \x1\ ++** ret ++*/ ++TEST_STORE_ZA (st1_za16_0_0, ++ svst1_hor_za16 (0, 0, p0, x1), ++ svst1_hor_za16 (0, 0, p0, x1)) ++ ++/* It would also be OK (and perhaps better) to move 0 into a register ++ and use an offset of 7. */ ++/* ++** st1_za16_0_7: ++** mov (w12-5), #?7 ++** st1h { za0h\.h\\1, 0\ }, p0, \x1\ ++** ret ++*/ ++TEST_STORE_ZA (st1_za16_0_7, ++ svst1_hor_za16 (0, 7, p0, x1), ++ svst1_hor_za16 (0, 7, p0, x1)) ++ ++/* ++** st1_za16_0_8: ++** mov (w12-5), #?8 ++** st1h { za0h\.h\\1, 0\ }, p0, \x1\ ++** ret ++*/ ++TEST_STORE_ZA (st1_za16_0_8, ++ svst1_hor_za16 (0, 8, p0, x1), ++ svst1_hor_za16 (0, 8, p0, x1)) ++ ++/* ++** st1_za16_0_w0: ++** mov (w12-5), w0 ++** st1h { za0h\.h\\1, 0\ }, p0, \x1\ ++** ret ++*/ ++TEST_STORE_ZA (st1_za16_0_w0, ++ svst1_hor_za16 (0, w0, p0, x1), ++ svst1_hor_za16 (0, w0, p0, x1)) ++ ++/* ++** st1_za16_0_w0_p1: ++** mov (w12-5), w0 ++** st1h { za0h\.h\\1, 1\ }, p0, \x1\ ++** ret ++*/ ++TEST_STORE_ZA (st1_za16_0_w0_p1, ++ svst1_hor_za16 (0, w0 + 1, p0, x1), ++ svst1_hor_za16 (0, w0 + 1, p0, x1)) ++ ++/* ++** st1_za16_0_w0_p7: ++** mov (w12-5), w0 ++** st1h { za0h\.h\\1, 7\ }, p0, \x1\ ++** ret ++*/ ++TEST_STORE_ZA (st1_za16_0_w0_p7, ++ svst1_hor_za16 (0, w0 + 7, p0, x1), ++ svst1_hor_za16 (0, w0 + 7, p0, x1)) ++ ++/* ++** st1_za16_1_w0: ++** mov (w12-5), w0 ++** st1h { za1h\.h\\1, 0\ }, p0, \x1\ ++** ret ++*/ ++TEST_STORE_ZA (st1_za16_1_w0, ++ svst1_hor_za16 (1, w0, p0, x1), ++ svst1_hor_za16 (1, w0, p0, x1)) ++ ++ ++/* ++** st1_za16_1_w0_p1: ++** mov (w12-5), w0 ++** st1h { za1h\.h\\1, 1\ }, p0, \x1\ ++** ret ++*/ ++TEST_STORE_ZA (st1_za16_1_w0_p1, ++ svst1_hor_za16 (1, w0 + 1, p0, x1), ++ svst1_hor_za16 (1, w0 + 1, p0, x1)) ++ ++/* ++** st1_za16_1_w0_p7: ++** mov (w12-5), w0 ++** st1h { za1h\.h\\1, 7\ }, p0, \x1\ ++** ret ++*/ ++TEST_STORE_ZA (st1_za16_1_w0_p7, ++ svst1_hor_za16 (1, w0 + 7, p0, x1), ++ svst1_hor_za16 (1, w0 + 7, p0, x1)) ++ ++/* ++** st1_za16_1_w0_p5_index: ++** mov (w12-5), w0 ++** st1h { za1h\.h\\1, 5\ }, p0, \x1, x2, lsl #?1\ ++** ret ++*/ ++TEST_STORE_ZA (st1_za16_1_w0_p5_index, ++ svst1_hor_za16 (1, w0 + 5, p0, x1 + x2 * 2), ++ svst1_hor_za16 (1, w0 + 5, p0, x1 + x2 * 2)) ++ ++/* ++** st1_za16_0_w0_p8: ++** add (w12-5), w0, #?8 ++** st1h { za0h\.h\\1, 0\ }, p0, \x1\ ++** ret ++*/ ++TEST_STORE_ZA (st1_za16_0_w0_p8, ++ svst1_hor_za16 (0, w0 + 8, p0, x1), ++ svst1_hor_za16 (0, w0 + 8, p0, x1)) ++ ++/* ++** st1_za16_0_w0_m1: ++** sub (w12-5), w0, #?1 ++** st1h { za0h\.h\\1, 0\ }, p0, \x1\ ++** ret ++*/ ++TEST_STORE_ZA (st1_za16_0_w0_m1, ++ svst1_hor_za16 (0, w0 - 1, p0, x1), ++ svst1_hor_za16 (0, w0 - 1, p0, x1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/st1_hor_za32.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/st1_hor_za32.c +new file mode 100644 +index 000000000..ed9b2b2e9 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/st1_hor_za32.c +@@ -0,0 +1,125 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sme_acle.h" ++ ++/* ++** st1_za32_0_0: ++** mov (w12-5), (?:w0|#?0) ++** st1w { za0h\.s\\1, 0\ }, p0, \x1\ ++** ret ++*/ ++TEST_STORE_ZA (st1_za32_0_0, ++ svst1_hor_za32 (0, 0, p0, x1), ++ svst1_hor_za32 (0, 0, p0, x1)) ++ ++/* It would also be OK (and perhaps better) to move 0 into a register ++ and use an offset of 3. */ ++/* ++** st1_za32_0_3: ++** mov (w12-5), #?3 ++** st1w { za0h\.s\\1, 0\ }, p0, \x1\ ++** ret ++*/ ++TEST_STORE_ZA (st1_za32_0_3, ++ svst1_hor_za32 (0, 3, p0, x1), ++ svst1_hor_za32 (0, 3, p0, x1)) ++ ++/* ++** st1_za32_0_4: ++** mov (w12-5), #?4 ++** st1w { za0h\.s\\1, 0\ }, p0, \x1\ ++** ret ++*/ ++TEST_STORE_ZA (st1_za32_0_4, ++ svst1_hor_za32 (0, 4, p0, x1), ++ svst1_hor_za32 (0, 4, p0, x1)) ++ ++/* ++** st1_za32_0_w0: ++** mov (w12-5), w0 ++** st1w { za0h\.s\\1, 0\ }, p0, \x1\ ++** ret ++*/ ++TEST_STORE_ZA (st1_za32_0_w0, ++ svst1_hor_za32 (0, w0, p0, x1), ++ svst1_hor_za32 (0, w0, p0, x1)) ++ ++/* ++** st1_za32_0_w0_p1: ++** mov (w12-5), w0 ++** st1w { za0h\.s\\1, 1\ }, p0, \x1\ ++** ret ++*/ ++TEST_STORE_ZA (st1_za32_0_w0_p1, ++ svst1_hor_za32 (0, w0 + 1, p0, x1), ++ svst1_hor_za32 (0, w0 + 1, p0, x1)) ++ ++/* ++** st1_za32_0_w0_p3: ++** mov (w12-5), w0 ++** st1w { za0h\.s\\1, 3\ }, p0, \x1\ ++** ret ++*/ ++TEST_STORE_ZA (st1_za32_0_w0_p3, ++ svst1_hor_za32 (0, w0 + 3, p0, x1), ++ svst1_hor_za32 (0, w0 + 3, p0, x1)) ++ ++/* ++** st1_za32_3_w0: ++** mov (w12-5), w0 ++** st1w { za3h\.s\\1, 0\ }, p0, \x1\ ++** ret ++*/ ++TEST_STORE_ZA (st1_za32_3_w0, ++ svst1_hor_za32 (3, w0, p0, x1), ++ svst1_hor_za32 (3, w0, p0, x1)) ++ ++/* ++** st1_za32_3_w0_p1: ++** mov (w12-5), w0 ++** st1w { za3h\.s\\1, 1\ }, p0, \x1\ ++** ret ++*/ ++TEST_STORE_ZA (st1_za32_3_w0_p1, ++ svst1_hor_za32 (3, w0 + 1, p0, x1), ++ svst1_hor_za32 (3, w0 + 1, p0, x1)) ++ ++/* ++** st1_za32_3_w0_p3: ++** mov (w12-5), w0 ++** st1w { za3h\.s\\1, 3\ }, p0, \x1\ ++** ret ++*/ ++TEST_STORE_ZA (st1_za32_3_w0_p3, ++ svst1_hor_za32 (3, w0 + 3, p0, x1), ++ svst1_hor_za32 (3, w0 + 3, p0, x1)) ++ ++/* ++** st1_za32_1_w0_p2_index: ++** mov (w12-5), w0 ++** st1w { za1h\.s\\1, 2\ }, p0, \x1, x2, lsl #?2\ ++** ret ++*/ ++TEST_STORE_ZA (st1_za32_1_w0_p2_index, ++ svst1_hor_za32 (1, w0 + 2, p0, x1 + x2 * 4), ++ svst1_hor_za32 (1, w0 + 2, p0, x1 + x2 * 4)) ++ ++/* ++** st1_za32_0_w0_p4: ++** add (w12-5), w0, #?4 ++** st1w { za0h\.s\\1, 0\ }, p0, \x1\ ++** ret ++*/ ++TEST_STORE_ZA (st1_za32_0_w0_p4, ++ svst1_hor_za32 (0, w0 + 4, p0, x1), ++ svst1_hor_za32 (0, w0 + 4, p0, x1)) ++ ++/* ++** st1_za32_0_w0_m1: ++** sub (w12-5), w0, #?1 ++** st1w { za0h\.s\\1, 0\ }, p0, \x1\ ++** ret ++*/ ++TEST_STORE_ZA (st1_za32_0_w0_m1, ++ svst1_hor_za32 (0, w0 - 1, p0, x1), ++ svst1_hor_za32 (0, w0 - 1, p0, x1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/st1_hor_za64.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/st1_hor_za64.c +new file mode 100644 +index 000000000..3600f5b8f +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/st1_hor_za64.c +@@ -0,0 +1,105 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sme_acle.h" ++ ++/* ++** st1_za64_0_0: ++** mov (w12-5), (?:wzr|#?0) ++** st1d { za0h\.d\\1, 0\ }, p0, \x1\ ++** ret ++*/ ++TEST_STORE_ZA (st1_za64_0_0, ++ svst1_hor_za64 (0, 0, p0, x1), ++ svst1_hor_za64 (0, 0, p0, x1)) ++ ++/* It would also be OK (and perhaps better) to move 0 into a register ++ and use an offset of 1. */ ++/* ++** st1_za64_0_1: ++** mov (w12-5), #?1 ++** st1d { za0h\.d\\1, 0\ }, p0, \x1\ ++** ret ++*/ ++TEST_STORE_ZA (st1_za64_0_1, ++ svst1_hor_za64 (0, 1, p0, x1), ++ svst1_hor_za64 (0, 1, p0, x1)) ++ ++/* ++** st1_za64_0_2: ++** mov (w12-5), #?2 ++** st1d { za0h\.d\\1, 0\ }, p0, \x1\ ++** ret ++*/ ++TEST_STORE_ZA (st1_za64_0_2, ++ svst1_hor_za64 (0, 2, p0, x1), ++ svst1_hor_za64 (0, 2, p0, x1)) ++ ++/* ++** st1_za64_0_w0: ++** mov (w12-5), w0 ++** st1d { za0h\.d\\1, 0\ }, p0, \x1\ ++** ret ++*/ ++TEST_STORE_ZA (st1_za64_0_w0, ++ svst1_hor_za64 (0, w0, p0, x1), ++ svst1_hor_za64 (0, w0, p0, x1)) ++ ++/* ++** st1_za64_0_w0_p1: ++** mov (w12-5), w0 ++** st1d { za0h\.d\\1, 1\ }, p0, \x1\ ++** ret ++*/ ++TEST_STORE_ZA (st1_za64_0_w0_p1, ++ svst1_hor_za64 (0, w0 + 1, p0, x1), ++ svst1_hor_za64 (0, w0 + 1, p0, x1)) ++ ++/* ++** st1_za64_7_w0: ++** mov (w12-5), w0 ++** st1d { za7h\.d\\1, 0\ }, p0, \x1\ ++** ret ++*/ ++TEST_STORE_ZA (st1_za64_7_w0, ++ svst1_hor_za64 (7, w0, p0, x1), ++ svst1_hor_za64 (7, w0, p0, x1)) ++ ++/* ++** st1_za64_7_w0_p1: ++** mov (w12-5), w0 ++** st1d { za7h\.d\\1, 1\ }, p0, \x1\ ++** ret ++*/ ++TEST_STORE_ZA (st1_za64_7_w0_p1, ++ svst1_hor_za64 (7, w0 + 1, p0, x1), ++ svst1_hor_za64 (7, w0 + 1, p0, x1)) ++ ++/* ++** st1_za64_5_w0_p1_index: ++** mov (w12-5), w0 ++** st1d { za5h\.d\\1, 1\ }, p0, \x1, x2, lsl #?3\ ++** ret ++*/ ++TEST_STORE_ZA (st1_za64_5_w0_p1_index, ++ svst1_hor_za64 (5, w0 + 1, p0, x1 + x2 * 8), ++ svst1_hor_za64 (5, w0 + 1, p0, x1 + x2 * 8)) ++ ++/* ++** st1_za64_0_w0_p2: ++** add (w12-5), w0, #?2 ++** st1d { za0h\.d\\1, 0\ }, p0, \x1\ ++** ret ++*/ ++TEST_STORE_ZA (st1_za64_0_w0_p2, ++ svst1_hor_za64 (0, w0 + 2, p0, x1), ++ svst1_hor_za64 (0, w0 + 2, p0, x1)) ++ ++/* ++** st1_za64_0_w0_m1: ++** sub (w12-5), w0, #?1 ++** st1d { za0h\.d\\1, 0\ }, p0, \x1\ ++** ret ++*/ ++TEST_STORE_ZA (st1_za64_0_w0_m1, ++ svst1_hor_za64 (0, w0 - 1, p0, x1), ++ svst1_hor_za64 (0, w0 - 1, p0, x1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/st1_hor_za8.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/st1_hor_za8.c +new file mode 100644 +index 000000000..9026fae9e +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/st1_hor_za8.c +@@ -0,0 +1,95 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sme_acle.h" ++ ++/* ++** st1_za8_0_0: ++** mov (w12-5), (?:wzr|#?0) ++** st1b { za0h\.b\\1, 0\ }, p0, \x1\ ++** ret ++*/ ++TEST_STORE_ZA (st1_za8_0_0, ++ svst1_hor_za8 (0, 0, p0, x1), ++ svst1_hor_za8 (0, 0, p0, x1)) ++ ++/* It would also be OK (and perhaps better) to move 0 into a register ++ and use an offset of 15. */ ++/* ++** st1_za8_0_15: ++** mov (w12-5), #?15 ++** st1b { za0h\.b\\1, 0\ }, p0, \x1\ ++** ret ++*/ ++TEST_STORE_ZA (st1_za8_0_15, ++ svst1_hor_za8 (0, 15, p0, x1), ++ svst1_hor_za8 (0, 15, p0, x1)) ++ ++/* ++** st1_za8_0_16: ++** mov (w12-5), #?16 ++** st1b { za0h\.b\\1, 0\ }, p0, \x1\ ++** ret ++*/ ++TEST_STORE_ZA (st1_za8_0_16, ++ svst1_hor_za8 (0, 16, p0, x1), ++ svst1_hor_za8 (0, 16, p0, x1)) ++ ++/* ++** st1_za8_0_w0: ++** mov (w12-5), w0 ++** st1b { za0h\.b\\1, 0\ }, p0, \x1\ ++** ret ++*/ ++TEST_STORE_ZA (st1_za8_0_w0, ++ svst1_hor_za8 (0, w0, p0, x1), ++ svst1_hor_za8 (0, w0, p0, x1)) ++ ++/* ++** st1_za8_0_w0_p1: ++** mov (w12-5), w0 ++** st1b { za0h\.b\\1, 1\ }, p0, \x1\ ++** ret ++*/ ++TEST_STORE_ZA (st1_za8_0_w0_p1, ++ svst1_hor_za8 (0, w0 + 1, p0, x1), ++ svst1_hor_za8 (0, w0 + 1, p0, x1)) ++ ++/* ++** st1_za8_0_w0_p15: ++** mov (w12-5), w0 ++** st1b { za0h\.b\\1, 15\ }, p0, \x1\ ++** ret ++*/ ++TEST_STORE_ZA (st1_za8_0_w0_p15, ++ svst1_hor_za8 (0, w0 + 15, p0, x1), ++ svst1_hor_za8 (0, w0 + 15, p0, x1)) ++ ++/* ++** st1_za8_0_w0_p13_index: ++** mov (w12-5), w0 ++** st1b { za0h\.b\\1, 15\ }, p0, \x1, x2\ ++** ret ++*/ ++TEST_STORE_ZA (st1_za8_0_w0_p13_index, ++ svst1_hor_za8 (0, w0 + 15, p0, x1 + x2), ++ svst1_hor_za8 (0, w0 + 15, p0, x1 + x2)) ++ ++/* ++** st1_za8_0_w0_p16: ++** add (w12-5), w0, #?16 ++** st1b { za0h\.b\\1, 0\ }, p0, \x1\ ++** ret ++*/ ++TEST_STORE_ZA (st1_za8_0_w0_p16, ++ svst1_hor_za8 (0, w0 + 16, p0, x1), ++ svst1_hor_za8 (0, w0 + 16, p0, x1)) ++ ++/* ++** st1_za8_0_w0_m1: ++** sub (w12-5), w0, #?1 ++** st1b { za0h\.b\\1, 0\ }, p0, \x1\ ++** ret ++*/ ++TEST_STORE_ZA (st1_za8_0_w0_m1, ++ svst1_hor_za8 (0, w0 - 1, p0, x1), ++ svst1_hor_za8 (0, w0 - 1, p0, x1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/st1_ver_vnum_za128.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/st1_ver_vnum_za128.c +new file mode 100644 +index 000000000..210687a48 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/st1_ver_vnum_za128.c +@@ -0,0 +1,77 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sme_acle.h" ++ ++/* ++** st1_vnum_za128_0_0_0: ++** mov (w12-5), (?:wzr|#?0) ++** st1q { za0v\.q\\1, 0\ }, p0, \x1\ ++** ret ++*/ ++TEST_STORE_ZA (st1_vnum_za128_0_0_0, ++ svst1_ver_vnum_za128 (0, 0, p0, x1, 0), ++ svst1_ver_vnum_za128 (0, 0, p0, x1, 0)) ++ ++/* ++** st1_vnum_za128_7_1_0: ++** mov (w12-5), #?1 ++** st1q { za7v\.q\\1, 0\ }, p0, \x1\ ++** ret ++*/ ++TEST_STORE_ZA (st1_vnum_za128_7_1_0, ++ svst1_ver_vnum_za128 (7, 1, p0, x1, 0), ++ svst1_ver_vnum_za128 (7, 1, p0, x1, 0)) ++ ++/* ++** st1_vnum_za128_11_1_5: ++** incb x1, all, mul #5 ++** mov (w12-5), #?6 ++** st1q { za11v\.q\\1, 0\ }, p0, \x1\ ++** ret ++*/ ++TEST_STORE_ZA (st1_vnum_za128_11_1_5, ++ svst1_ver_vnum_za128 (11, 1, p0, x1, 5), ++ svst1_ver_vnum_za128 (11, 1, p0, x1, 5)) ++ ++/* ++** st1_vnum_za128_3_w0_0: ++** mov (w12-5), w0 ++** st1q { za3v\.q\\1, 0\ }, p0, \x1\ ++** ret ++*/ ++TEST_STORE_ZA (st1_vnum_za128_3_w0_0, ++ svst1_ver_vnum_za128 (3, w0, p0, x1, 0), ++ svst1_ver_vnum_za128 (3, w0, p0, x1, 0)) ++ ++/* ++** st1_vnum_za128_5_w0_0: ++** incb x1, all, mul #13 ++** add (w12-5), w0, #?13 ++** st1q { za5v\.q\\1, 0\ }, p0, \x1\ ++** ret ++*/ ++TEST_STORE_ZA (st1_vnum_za128_5_w0_0, ++ svst1_ver_vnum_za128 (5, w0, p0, x1, 13), ++ svst1_ver_vnum_za128 (5, w0, p0, x1, 13)) ++ ++/* ++** st1_vnum_za128_11_w0_0: ++** cntb (x0-9+) ++** madd (x0-9+), (?:\1, x2|x2, \1), x1 ++** add (w12-5), (?:w0, w2|w2, w0) ++** st1q { za11v\.q\\3, 0\ }, p0, \\2\ ++** ret ++*/ ++TEST_STORE_ZA (st1_vnum_za128_11_w0_0, ++ svst1_ver_vnum_za128 (11, w0, p0, x1, x2), ++ svst1_ver_vnum_za128 (11, w0, p0, x1, x2)) ++ ++/* ++** st1_vnum_za128_15_w0p1_0: ++** add (w12-5), w0, #?1 ++** st1q { za15v\.q\\1, 0\ }, p0, \x1\ ++** ret ++*/ ++TEST_STORE_ZA (st1_vnum_za128_15_w0p1_0, ++ svst1_ver_vnum_za128 (15, w0 + 1, p0, x1, 0), ++ svst1_ver_vnum_za128 (15, w0 + 1, p0, x1, 0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/st1_ver_vnum_za16.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/st1_ver_vnum_za16.c +new file mode 100644 +index 000000000..f75a22402 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/st1_ver_vnum_za16.c +@@ -0,0 +1,123 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sme_acle.h" ++ ++/* ++** st1_vnum_za16_1_0_1: ++** incb x1 ++** mov (w12-5), (?:wzr|#?0) ++** st1h { za1v\.h\\1, 1\ }, p0, \x1\ ++** ret ++*/ ++TEST_STORE_ZA (st1_vnum_za16_1_0_1, ++ svst1_ver_vnum_za16 (1, 0, p0, x1, 1), ++ svst1_ver_vnum_za16 (1, 0, p0, x1, 1)) ++ ++/* ++** st1_vnum_za16_1_1_1: ++** incb x1 ++** mov (w12-5), #?1 ++** st1h { za1v\.h\\1, 1\ }, p0, \x1\ ++** ret ++*/ ++TEST_STORE_ZA (st1_vnum_za16_1_1_1, ++ svst1_ver_vnum_za16 (1, 1, p0, x1, 1), ++ svst1_ver_vnum_za16 (1, 1, p0, x1, 1)) ++ ++/* ++** st1_vnum_za16_0_0_8: ++** incb x1, all, mul #8 ++** mov (w12-5), #?8 ++** st1h { za0v\.h\\1, 0\ }, p0, \x1\ ++** ret ++*/ ++TEST_STORE_ZA (st1_vnum_za16_0_0_8, ++ svst1_ver_vnum_za16 (0, 0, p0, x1, 8), ++ svst1_ver_vnum_za16 (0, 0, p0, x1, 8)) ++ ++/* ++** st1_vnum_za16_0_1_8: ++** incb x1, all, mul #8 ++** mov (w12-5), #?9 ++** st1h { za0v\.h\\1, 0\ }, p0, \x1\ ++** ret ++*/ ++TEST_STORE_ZA (st1_vnum_za16_0_1_8, ++ svst1_ver_vnum_za16 (0, 1, p0, x1, 8), ++ svst1_ver_vnum_za16 (0, 1, p0, x1, 8)) ++ ++/* ++** st1_vnum_za16_0_w0_0: ++** mov (w12-5), w0 ++** st1h { za0v\.h\\1, 0\ }, p0, \x1\ ++** ret ++*/ ++TEST_STORE_ZA (st1_vnum_za16_0_w0_0, ++ svst1_ver_vnum_za16 (0, w0, p0, x1, 0), ++ svst1_ver_vnum_za16 (0, w0, p0, x1, 0)) ++ ++/* ++** st1_vnum_za16_0_w0_1: ++** incb x1 ++** mov (w12-5), w0 ++** st1h { za0v\.h\\1, 1\ }, p0, \x1\ ++** ret ++*/ ++TEST_STORE_ZA (st1_vnum_za16_0_w0_1, ++ svst1_ver_vnum_za16 (0, w0, p0, x1, 1), ++ svst1_ver_vnum_za16 (0, w0, p0, x1, 1)) ++ ++/* ++** st1_vnum_za16_0_w0_7: ++** incb x1, all, mul #7 ++** mov (w12-5), w0 ++** st1h { za0v\.h\\1, 7\ }, p0, \x1\ ++** ret ++*/ ++TEST_STORE_ZA (st1_vnum_za16_0_w0_7, ++ svst1_ver_vnum_za16 (0, w0, p0, x1, 7), ++ svst1_ver_vnum_za16 (0, w0, p0, x1, 7)) ++ ++/* ++** st1_vnum_za16_1_w0_8: ++** incb x1, all, mul #8 ++** add (w12-5), w0, #?8 ++** st1h { za1v\.h\\1, 0\ }, p0, \x1\ ++** ret ++*/ ++TEST_STORE_ZA (st1_vnum_za16_1_w0_8, ++ svst1_ver_vnum_za16 (1, w0, p0, x1, 8), ++ svst1_ver_vnum_za16 (1, w0, p0, x1, 8)) ++ ++/* ++** st1_vnum_za16_1_w0_13: ++** incb x1, all, mul #13 ++** add (w12-5), w0, #?13 ++** st1h { za1v\.h\\1, 0\ }, p0, \x1\ ++** ret ++*/ ++TEST_STORE_ZA (st1_vnum_za16_1_w0_13, ++ svst1_ver_vnum_za16 (1, w0, p0, x1, 13), ++ svst1_ver_vnum_za16 (1, w0, p0, x1, 13)) ++ ++/* ++** st1_vnum_za16_0_w0_x2: ++** cntb (x0-9+) ++** madd (x0-9+), (?:\1, x2|x2, \1), x1 ++** add (w12-5), (?:w0, w2|w2, w0) ++** st1h { za0v\.h\\3, 0\ }, p0, \\2\ ++** ret ++*/ ++TEST_STORE_ZA (st1_vnum_za16_0_w0_x2, ++ svst1_ver_vnum_za16 (0, w0, p0, x1, x2), ++ svst1_ver_vnum_za16 (0, w0, p0, x1, x2)) ++ ++/* ++** st1_vnum_za16_1_w0p1_0: ++** mov (w12-5), w0 ++** st1h { za1v\.h\\1, 1\ }, p0, \x1\ ++** ret ++*/ ++TEST_STORE_ZA (st1_vnum_za16_1_w0p1_0, ++ svst1_ver_vnum_za16 (1, w0 + 1, p0, x1, 0), ++ svst1_ver_vnum_za16 (1, w0 + 1, p0, x1, 0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/st1_ver_vnum_za32.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/st1_ver_vnum_za32.c +new file mode 100644 +index 000000000..45db67a9f +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/st1_ver_vnum_za32.c +@@ -0,0 +1,123 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sme_acle.h" ++ ++/* ++** st1_vnum_za32_3_0_1: ++** incb x1 ++** mov (w12-5), (?:wzr|#?0) ++** st1w { za3v\.s\\1, 1\ }, p0, \x1\ ++** ret ++*/ ++TEST_STORE_ZA (st1_vnum_za32_3_0_1, ++ svst1_ver_vnum_za32 (3, 0, p0, x1, 1), ++ svst1_ver_vnum_za32 (3, 0, p0, x1, 1)) ++ ++/* ++** st1_vnum_za32_2_1_1: ++** incb x1 ++** mov (w12-5), #?1 ++** st1w { za2v\.s\\1, 1\ }, p0, \x1\ ++** ret ++*/ ++TEST_STORE_ZA (st1_vnum_za32_2_1_1, ++ svst1_ver_vnum_za32 (2, 1, p0, x1, 1), ++ svst1_ver_vnum_za32 (2, 1, p0, x1, 1)) ++ ++/* ++** st1_vnum_za32_0_0_4: ++** incb x1, all, mul #4 ++** mov (w12-5), #?4 ++** st1w { za0v\.s\\1, 0\ }, p0, \x1\ ++** ret ++*/ ++TEST_STORE_ZA (st1_vnum_za32_0_0_4, ++ svst1_ver_vnum_za32 (0, 0, p0, x1, 4), ++ svst1_ver_vnum_za32 (0, 0, p0, x1, 4)) ++ ++/* ++** st1_vnum_za32_2_1_4: ++** incb x1, all, mul #4 ++** mov (w12-5), #?5 ++** st1w { za2v\.s\\1, 0\ }, p0, \x1\ ++** ret ++*/ ++TEST_STORE_ZA (st1_vnum_za32_2_1_4, ++ svst1_ver_vnum_za32 (2, 1, p0, x1, 4), ++ svst1_ver_vnum_za32 (2, 1, p0, x1, 4)) ++ ++/* ++** st1_vnum_za32_0_w0_0: ++** mov (w12-5), w0 ++** st1w { za0v\.s\\1, 0\ }, p0, \x1\ ++** ret ++*/ ++TEST_STORE_ZA (st1_vnum_za32_0_w0_0, ++ svst1_ver_vnum_za32 (0, w0, p0, x1, 0), ++ svst1_ver_vnum_za32 (0, w0, p0, x1, 0)) ++ ++/* ++** st1_vnum_za32_0_w0_1: ++** incb x1 ++** mov (w12-5), w0 ++** st1w { za0v\.s\\1, 1\ }, p0, \x1\ ++** ret ++*/ ++TEST_STORE_ZA (st1_vnum_za32_0_w0_1, ++ svst1_ver_vnum_za32 (0, w0, p0, x1, 1), ++ svst1_ver_vnum_za32 (0, w0, p0, x1, 1)) ++ ++/* ++** st1_vnum_za32_0_w0_3: ++** incb x1, all, mul #3 ++** mov (w12-5), w0 ++** st1w { za0v\.s\\1, 3\ }, p0, \x1\ ++** ret ++*/ ++TEST_STORE_ZA (st1_vnum_za32_0_w0_3, ++ svst1_ver_vnum_za32 (0, w0, p0, x1, 3), ++ svst1_ver_vnum_za32 (0, w0, p0, x1, 3)) ++ ++/* ++** st1_vnum_za32_1_w0_4: ++** incb x1, all, mul #4 ++** add (w12-5), w0, #?4 ++** st1w { za1v\.s\\1, 0\ }, p0, \x1\ ++** ret ++*/ ++TEST_STORE_ZA (st1_vnum_za32_1_w0_4, ++ svst1_ver_vnum_za32 (1, w0, p0, x1, 4), ++ svst1_ver_vnum_za32 (1, w0, p0, x1, 4)) ++ ++/* ++** st1_vnum_za32_3_w0_13: ++** incb x1, all, mul #13 ++** add (w12-5), w0, #?13 ++** st1w { za3v\.s\\1, 0\ }, p0, \x1\ ++** ret ++*/ ++TEST_STORE_ZA (st1_vnum_za32_3_w0_13, ++ svst1_ver_vnum_za32 (3, w0, p0, x1, 13), ++ svst1_ver_vnum_za32 (3, w0, p0, x1, 13)) ++ ++/* ++** st1_vnum_za32_0_w0_x2: ++** cntb (x0-9+) ++** madd (x0-9+), (?:\1, x2|x2, \1), x1 ++** add (w12-5), (?:w0, w2|w2, w0) ++** st1w { za0v\.s\\3, 0\ }, p0, \\2\ ++** ret ++*/ ++TEST_STORE_ZA (st1_vnum_za32_0_w0_x2, ++ svst1_ver_vnum_za32 (0, w0, p0, x1, x2), ++ svst1_ver_vnum_za32 (0, w0, p0, x1, x2)) ++ ++/* ++** st1_vnum_za32_1_w0p1_0: ++** mov (w12-5), w0 ++** st1w { za1v\.s\\1, 1\ }, p0, \x1\ ++** ret ++*/ ++TEST_STORE_ZA (st1_vnum_za32_1_w0p1_0, ++ svst1_ver_vnum_za32 (1, w0 + 1, p0, x1, 0), ++ svst1_ver_vnum_za32 (1, w0 + 1, p0, x1, 0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/st1_ver_vnum_za64.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/st1_ver_vnum_za64.c +new file mode 100644 +index 000000000..bd061fc61 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/st1_ver_vnum_za64.c +@@ -0,0 +1,112 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sme_acle.h" ++ ++/* ++** st1_vnum_za64_3_0_1: ++** incb x1 ++** mov (w12-5), (?:wzr|#?0) ++** st1d { za3v\.d\\1, 1\ }, p0, \x1\ ++** ret ++*/ ++TEST_STORE_ZA (st1_vnum_za64_3_0_1, ++ svst1_ver_vnum_za64 (3, 0, p0, x1, 1), ++ svst1_ver_vnum_za64 (3, 0, p0, x1, 1)) ++ ++/* ++** st1_vnum_za64_7_1_1: ++** incb x1 ++** mov (w12-5), #?1 ++** st1d { za7v\.d\\1, 1\ }, p0, \x1\ ++** ret ++*/ ++TEST_STORE_ZA (st1_vnum_za64_7_1_1, ++ svst1_ver_vnum_za64 (7, 1, p0, x1, 1), ++ svst1_ver_vnum_za64 (7, 1, p0, x1, 1)) ++ ++/* ++** st1_vnum_za64_0_0_2: ++** incb x1, all, mul #2 ++** mov (w12-5), #?2 ++** st1d { za0v\.d\\1, 0\ }, p0, \x1\ ++** ret ++*/ ++TEST_STORE_ZA (st1_vnum_za64_0_0_2, ++ svst1_ver_vnum_za64 (0, 0, p0, x1, 2), ++ svst1_ver_vnum_za64 (0, 0, p0, x1, 2)) ++ ++/* ++** st1_vnum_za64_5_1_2: ++** incb x1, all, mul #2 ++** mov (w12-5), #?3 ++** st1d { za5v\.d\\1, 0\ }, p0, \x1\ ++** ret ++*/ ++TEST_STORE_ZA (st1_vnum_za64_5_1_2, ++ svst1_ver_vnum_za64 (5, 1, p0, x1, 2), ++ svst1_ver_vnum_za64 (5, 1, p0, x1, 2)) ++ ++/* ++** st1_vnum_za64_0_w0_0: ++** mov (w12-5), w0 ++** st1d { za0v\.d\\1, 0\ }, p0, \x1\ ++** ret ++*/ ++TEST_STORE_ZA (st1_vnum_za64_0_w0_0, ++ svst1_ver_vnum_za64 (0, w0, p0, x1, 0), ++ svst1_ver_vnum_za64 (0, w0, p0, x1, 0)) ++ ++/* ++** st1_vnum_za64_0_w0_1: ++** incb x1 ++** mov (w12-5), w0 ++** st1d { za0v\.d\\1, 1\ }, p0, \x1\ ++** ret ++*/ ++TEST_STORE_ZA (st1_vnum_za64_0_w0_1, ++ svst1_ver_vnum_za64 (0, w0, p0, x1, 1), ++ svst1_ver_vnum_za64 (0, w0, p0, x1, 1)) ++ ++/* ++** st1_vnum_za64_6_w0_2: ++** incb x1, all, mul #2 ++** add (w12-5), w0, #?2 ++** st1d { za6v\.d\\1, 0\ }, p0, \x1\ ++** ret ++*/ ++TEST_STORE_ZA (st1_vnum_za64_6_w0_2, ++ svst1_ver_vnum_za64 (6, w0, p0, x1, 2), ++ svst1_ver_vnum_za64 (6, w0, p0, x1, 2)) ++ ++/* ++** st1_vnum_za64_2_w0_13: ++** incb x1, all, mul #13 ++** add (w12-5), w0, #?13 ++** st1d { za2v\.d\\1, 0\ }, p0, \x1\ ++** ret ++*/ ++TEST_STORE_ZA (st1_vnum_za64_2_w0_13, ++ svst1_ver_vnum_za64 (2, w0, p0, x1, 13), ++ svst1_ver_vnum_za64 (2, w0, p0, x1, 13)) ++ ++/* ++** st1_vnum_za64_4_w0_x2: ++** cntb (x0-9+) ++** madd (x0-9+), (?:\1, x2|x2, \1), x1 ++** add (w12-5), (?:w0, w2|w2, w0) ++** st1d { za4v\.d\\3, 0\ }, p0, \\2\ ++** ret ++*/ ++TEST_STORE_ZA (st1_vnum_za64_4_w0_x2, ++ svst1_ver_vnum_za64 (4, w0, p0, x1, x2), ++ svst1_ver_vnum_za64 (4, w0, p0, x1, x2)) ++ ++/* ++** st1_vnum_za64_1_w0p1_0: ++** mov (w12-5), w0 ++** st1d { za1v\.d\\1, 1\ }, p0, \x1\ ++** ret ++*/ ++TEST_STORE_ZA (st1_vnum_za64_1_w0p1_0, ++ svst1_ver_vnum_za64 (1, w0 + 1, p0, x1, 0), ++ svst1_ver_vnum_za64 (1, w0 + 1, p0, x1, 0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/st1_ver_vnum_za8.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/st1_ver_vnum_za8.c +new file mode 100644 +index 000000000..b15a7eb08 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/st1_ver_vnum_za8.c +@@ -0,0 +1,112 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sme_acle.h" ++ ++/* ++** st1_vnum_za8_0_0_1: ++** incb x1 ++** mov (w12-5), (?:wzr|#?0) ++** st1b { za0v\.b\\1, 1\ }, p0, \x1\ ++** ret ++*/ ++TEST_STORE_ZA (st1_vnum_za8_0_0_1, ++ svst1_ver_vnum_za8 (0, 0, p0, x1, 1), ++ svst1_ver_vnum_za8 (0, 0, p0, x1, 1)) ++ ++/* ++** st1_vnum_za8_0_1_1: ++** incb x1 ++** mov (w12-5), #?1 ++** st1b { za0v\.b\\1, 1\ }, p0, \x1\ ++** ret ++*/ ++TEST_STORE_ZA (st1_vnum_za8_0_1_1, ++ svst1_ver_vnum_za8 (0, 1, p0, x1, 1), ++ svst1_ver_vnum_za8 (0, 1, p0, x1, 1)) ++ ++/* ++** st1_vnum_za8_0_0_16: ++** incb x1, all, mul #16 ++** mov (w12-5), #?16 ++** st1b { za0v\.b\\1, 0\ }, p0, \x1\ ++** ret ++*/ ++TEST_STORE_ZA (st1_vnum_za8_0_0_16, ++ svst1_ver_vnum_za8 (0, 0, p0, x1, 16), ++ svst1_ver_vnum_za8 (0, 0, p0, x1, 16)) ++ ++/* ++** st1_vnum_za8_0_1_16: ++** incb x1, all, mul #16 ++** mov (w12-5), #?17 ++** st1b { za0v\.b\\1, 0\ }, p0, \x1\ ++** ret ++*/ ++TEST_STORE_ZA (st1_vnum_za8_0_1_16, ++ svst1_ver_vnum_za8 (0, 1, p0, x1, 16), ++ svst1_ver_vnum_za8 (0, 1, p0, x1, 16)) ++ ++/* ++** st1_vnum_za8_0_w0_0: ++** mov (w12-5), w0 ++** st1b { za0v\.b\\1, 0\ }, p0, \x1\ ++** ret ++*/ ++TEST_STORE_ZA (st1_vnum_za8_0_w0_0, ++ svst1_ver_vnum_za8 (0, w0, p0, x1, 0), ++ svst1_ver_vnum_za8 (0, w0, p0, x1, 0)) ++ ++/* ++** st1_vnum_za8_0_w0_1: ++** incb x1 ++** mov (w12-5), w0 ++** st1b { za0v\.b\\1, 1\ }, p0, \x1\ ++** ret ++*/ ++TEST_STORE_ZA (st1_vnum_za8_0_w0_1, ++ svst1_ver_vnum_za8 (0, w0, p0, x1, 1), ++ svst1_ver_vnum_za8 (0, w0, p0, x1, 1)) ++ ++/* ++** st1_vnum_za8_0_w0_15: ++** incb x1, all, mul #15 ++** mov (w12-5), w0 ++** st1b { za0v\.b\\1, 15\ }, p0, \x1\ ++** ret ++*/ ++TEST_STORE_ZA (st1_vnum_za8_0_w0_15, ++ svst1_ver_vnum_za8 (0, w0, p0, x1, 15), ++ svst1_ver_vnum_za8 (0, w0, p0, x1, 15)) ++ ++/* ++** st1_vnum_za8_0_w0_16: ++** incb x1, all, mul #16 ++** add (w12-5), w0, #?16 ++** st1b { za0v\.b\\1, 0\ }, p0, \x1\ ++** ret ++*/ ++TEST_STORE_ZA (st1_vnum_za8_0_w0_16, ++ svst1_ver_vnum_za8 (0, w0, p0, x1, 16), ++ svst1_ver_vnum_za8 (0, w0, p0, x1, 16)) ++ ++/* ++** st1_vnum_za8_0_w0_x2: ++** cntb (x0-9+) ++** mul (x0-9+), (?:\1, x2|x2, \1) ++** add (w12-5), (?:w0, w2|w2, w0) ++** st1b { za0v\.b\\3, 0\ }, p0, \x1, \2\ ++** ret ++*/ ++TEST_STORE_ZA (st1_vnum_za8_0_w0_x2, ++ svst1_ver_vnum_za8 (0, w0, p0, x1, x2), ++ svst1_ver_vnum_za8 (0, w0, p0, x1, x2)) ++ ++/* ++** st1_vnum_za8_0_w0p1_0: ++** mov (w12-5), w0 ++** st1b { za0v\.b\\1, 1\ }, p0, \x1\ ++** ret ++*/ ++TEST_STORE_ZA (st1_vnum_za8_0_w0p1_0, ++ svst1_ver_vnum_za8 (0, w0 + 1, p0, x1, 0), ++ svst1_ver_vnum_za8 (0, w0 + 1, p0, x1, 0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/st1_ver_za128.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/st1_ver_za128.c +new file mode 100644 +index 000000000..7be6d5a5f +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/st1_ver_za128.c +@@ -0,0 +1,83 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sme_acle.h" ++ ++/* ++** st1_za128_0_0: ++** mov (w12-5), (?:wzr|#?0) ++** st1q { za0v\.q\\1, 0\ }, p0, \x1\ ++** ret ++*/ ++TEST_STORE_ZA (st1_za128_0_0, ++ svst1_ver_za128 (0, 0, p0, x1), ++ svst1_ver_za128 (0, 0, p0, x1)) ++ ++/* ++** st1_za128_0_1: ++** mov (w12-5), #?1 ++** st1q { za0v\.q\\1, 0\ }, p0, \x1\ ++** ret ++*/ ++TEST_STORE_ZA (st1_za128_0_1, ++ svst1_ver_za128 (0, 1, p0, x1), ++ svst1_ver_za128 (0, 1, p0, x1)) ++ ++/* ++** st1_za128_0_w0: ++** mov (w12-5), w0 ++** st1q { za0v\.q\\1, 0\ }, p0, \x1\ ++** ret ++*/ ++TEST_STORE_ZA (st1_za128_0_w0, ++ svst1_ver_za128 (0, w0, p0, x1), ++ svst1_ver_za128 (0, w0, p0, x1)) ++ ++/* ++** st1_za128_0_w0_p1: ++** add (w12-5), w0, #?1 ++** st1q { za0v\.q\\1, 0\ }, p0, \x1\ ++** ret ++*/ ++TEST_STORE_ZA (st1_za128_0_w0_p1, ++ svst1_ver_za128 (0, w0 + 1, p0, x1), ++ svst1_ver_za128 (0, w0 + 1, p0, x1)) ++ ++/* ++** st1_za128_7_w0: ++** mov (w12-5), w0 ++** st1q { za7v\.q\\1, 0\ }, p0, \x1\ ++** ret ++*/ ++TEST_STORE_ZA (st1_za128_7_w0, ++ svst1_ver_za128 (7, w0, p0, x1), ++ svst1_ver_za128 (7, w0, p0, x1)) ++ ++/* ++** st1_za128_13_w0: ++** mov (w12-5), w0 ++** st1q { za13v\.q\\1, 0\ }, p0, \x1\ ++** ret ++*/ ++TEST_STORE_ZA (st1_za128_13_w0, ++ svst1_ver_za128 (13, w0, p0, x1), ++ svst1_ver_za128 (13, w0, p0, x1)) ++ ++/* ++** st1_za128_15_w0: ++** mov (w12-5), w0 ++** st1q { za15v\.q\\1, 0\ }, p0, \x1\ ++** ret ++*/ ++TEST_STORE_ZA (st1_za128_15_w0, ++ svst1_ver_za128 (15, w0, p0, x1), ++ svst1_ver_za128 (15, w0, p0, x1)) ++ ++/* ++** st1_za128_9_w0_index: ++** mov (w12-5), w0 ++** st1q { za9v\.q\\1, 0\ }, p0, \x1, x2, lsl #?4\ ++** ret ++*/ ++TEST_STORE_ZA (st1_za128_9_w0_index, ++ svst1_ver_za128 (9, w0, p0, x1 + x2 * 16), ++ svst1_ver_za128 (9, w0, p0, x1 + x2 * 16)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/st1_ver_za16.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/st1_ver_za16.c +new file mode 100644 +index 000000000..1bbf12a14 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/st1_ver_za16.c +@@ -0,0 +1,126 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sme_acle.h" ++ ++/* ++** st1_za16_0_0: ++** mov (w12-5), (?:wzr|#?0) ++** st1h { za0v\.h\\1, 0\ }, p0, \x1\ ++** ret ++*/ ++TEST_STORE_ZA (st1_za16_0_0, ++ svst1_ver_za16 (0, 0, p0, x1), ++ svst1_ver_za16 (0, 0, p0, x1)) ++ ++/* It would also be OK (and perhaps better) to move 0 into a register ++ and use an offset of 7. */ ++/* ++** st1_za16_0_7: ++** mov (w12-5), #?7 ++** st1h { za0v\.h\\1, 0\ }, p0, \x1\ ++** ret ++*/ ++TEST_STORE_ZA (st1_za16_0_7, ++ svst1_ver_za16 (0, 7, p0, x1), ++ svst1_ver_za16 (0, 7, p0, x1)) ++ ++/* ++** st1_za16_0_8: ++** mov (w12-5), #?8 ++** st1h { za0v\.h\\1, 0\ }, p0, \x1\ ++** ret ++*/ ++TEST_STORE_ZA (st1_za16_0_8, ++ svst1_ver_za16 (0, 8, p0, x1), ++ svst1_ver_za16 (0, 8, p0, x1)) ++ ++/* ++** st1_za16_0_w0: ++** mov (w12-5), w0 ++** st1h { za0v\.h\\1, 0\ }, p0, \x1\ ++** ret ++*/ ++TEST_STORE_ZA (st1_za16_0_w0, ++ svst1_ver_za16 (0, w0, p0, x1), ++ svst1_ver_za16 (0, w0, p0, x1)) ++ ++/* ++** st1_za16_0_w0_p1: ++** mov (w12-5), w0 ++** st1h { za0v\.h\\1, 1\ }, p0, \x1\ ++** ret ++*/ ++TEST_STORE_ZA (st1_za16_0_w0_p1, ++ svst1_ver_za16 (0, w0 + 1, p0, x1), ++ svst1_ver_za16 (0, w0 + 1, p0, x1)) ++ ++/* ++** st1_za16_0_w0_p7: ++** mov (w12-5), w0 ++** st1h { za0v\.h\\1, 7\ }, p0, \x1\ ++** ret ++*/ ++TEST_STORE_ZA (st1_za16_0_w0_p7, ++ svst1_ver_za16 (0, w0 + 7, p0, x1), ++ svst1_ver_za16 (0, w0 + 7, p0, x1)) ++ ++/* ++** st1_za16_1_w0: ++** mov (w12-5), w0 ++** st1h { za1v\.h\\1, 0\ }, p0, \x1\ ++** ret ++*/ ++TEST_STORE_ZA (st1_za16_1_w0, ++ svst1_ver_za16 (1, w0, p0, x1), ++ svst1_ver_za16 (1, w0, p0, x1)) ++ ++ ++/* ++** st1_za16_1_w0_p1: ++** mov (w12-5), w0 ++** st1h { za1v\.h\\1, 1\ }, p0, \x1\ ++** ret ++*/ ++TEST_STORE_ZA (st1_za16_1_w0_p1, ++ svst1_ver_za16 (1, w0 + 1, p0, x1), ++ svst1_ver_za16 (1, w0 + 1, p0, x1)) ++ ++/* ++** st1_za16_1_w0_p7: ++** mov (w12-5), w0 ++** st1h { za1v\.h\\1, 7\ }, p0, \x1\ ++** ret ++*/ ++TEST_STORE_ZA (st1_za16_1_w0_p7, ++ svst1_ver_za16 (1, w0 + 7, p0, x1), ++ svst1_ver_za16 (1, w0 + 7, p0, x1)) ++ ++/* ++** st1_za16_1_w0_p5_index: ++** mov (w12-5), w0 ++** st1h { za1v\.h\\1, 5\ }, p0, \x1, x2, lsl #?1\ ++** ret ++*/ ++TEST_STORE_ZA (st1_za16_1_w0_p5_index, ++ svst1_ver_za16 (1, w0 + 5, p0, x1 + x2 * 2), ++ svst1_ver_za16 (1, w0 + 5, p0, x1 + x2 * 2)) ++ ++/* ++** st1_za16_0_w0_p8: ++** add (w12-5), w0, #?8 ++** st1h { za0v\.h\\1, 0\ }, p0, \x1\ ++** ret ++*/ ++TEST_STORE_ZA (st1_za16_0_w0_p8, ++ svst1_ver_za16 (0, w0 + 8, p0, x1), ++ svst1_ver_za16 (0, w0 + 8, p0, x1)) ++ ++/* ++** st1_za16_0_w0_m1: ++** sub (w12-5), w0, #?1 ++** st1h { za0v\.h\\1, 0\ }, p0, \x1\ ++** ret ++*/ ++TEST_STORE_ZA (st1_za16_0_w0_m1, ++ svst1_ver_za16 (0, w0 - 1, p0, x1), ++ svst1_ver_za16 (0, w0 - 1, p0, x1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/st1_ver_za32.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/st1_ver_za32.c +new file mode 100644 +index 000000000..9809e9708 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/st1_ver_za32.c +@@ -0,0 +1,125 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sme_acle.h" ++ ++/* ++** st1_za32_0_0: ++** mov (w12-5), (?:w0|#?0) ++** st1w { za0v\.s\\1, 0\ }, p0, \x1\ ++** ret ++*/ ++TEST_STORE_ZA (st1_za32_0_0, ++ svst1_ver_za32 (0, 0, p0, x1), ++ svst1_ver_za32 (0, 0, p0, x1)) ++ ++/* It would also be OK (and perhaps better) to move 0 into a register ++ and use an offset of 3. */ ++/* ++** st1_za32_0_3: ++** mov (w12-5), #?3 ++** st1w { za0v\.s\\1, 0\ }, p0, \x1\ ++** ret ++*/ ++TEST_STORE_ZA (st1_za32_0_3, ++ svst1_ver_za32 (0, 3, p0, x1), ++ svst1_ver_za32 (0, 3, p0, x1)) ++ ++/* ++** st1_za32_0_4: ++** mov (w12-5), #?4 ++** st1w { za0v\.s\\1, 0\ }, p0, \x1\ ++** ret ++*/ ++TEST_STORE_ZA (st1_za32_0_4, ++ svst1_ver_za32 (0, 4, p0, x1), ++ svst1_ver_za32 (0, 4, p0, x1)) ++ ++/* ++** st1_za32_0_w0: ++** mov (w12-5), w0 ++** st1w { za0v\.s\\1, 0\ }, p0, \x1\ ++** ret ++*/ ++TEST_STORE_ZA (st1_za32_0_w0, ++ svst1_ver_za32 (0, w0, p0, x1), ++ svst1_ver_za32 (0, w0, p0, x1)) ++ ++/* ++** st1_za32_0_w0_p1: ++** mov (w12-5), w0 ++** st1w { za0v\.s\\1, 1\ }, p0, \x1\ ++** ret ++*/ ++TEST_STORE_ZA (st1_za32_0_w0_p1, ++ svst1_ver_za32 (0, w0 + 1, p0, x1), ++ svst1_ver_za32 (0, w0 + 1, p0, x1)) ++ ++/* ++** st1_za32_0_w0_p3: ++** mov (w12-5), w0 ++** st1w { za0v\.s\\1, 3\ }, p0, \x1\ ++** ret ++*/ ++TEST_STORE_ZA (st1_za32_0_w0_p3, ++ svst1_ver_za32 (0, w0 + 3, p0, x1), ++ svst1_ver_za32 (0, w0 + 3, p0, x1)) ++ ++/* ++** st1_za32_3_w0: ++** mov (w12-5), w0 ++** st1w { za3v\.s\\1, 0\ }, p0, \x1\ ++** ret ++*/ ++TEST_STORE_ZA (st1_za32_3_w0, ++ svst1_ver_za32 (3, w0, p0, x1), ++ svst1_ver_za32 (3, w0, p0, x1)) ++ ++/* ++** st1_za32_3_w0_p1: ++** mov (w12-5), w0 ++** st1w { za3v\.s\\1, 1\ }, p0, \x1\ ++** ret ++*/ ++TEST_STORE_ZA (st1_za32_3_w0_p1, ++ svst1_ver_za32 (3, w0 + 1, p0, x1), ++ svst1_ver_za32 (3, w0 + 1, p0, x1)) ++ ++/* ++** st1_za32_3_w0_p3: ++** mov (w12-5), w0 ++** st1w { za3v\.s\\1, 3\ }, p0, \x1\ ++** ret ++*/ ++TEST_STORE_ZA (st1_za32_3_w0_p3, ++ svst1_ver_za32 (3, w0 + 3, p0, x1), ++ svst1_ver_za32 (3, w0 + 3, p0, x1)) ++ ++/* ++** st1_za32_1_w0_p2_index: ++** mov (w12-5), w0 ++** st1w { za1v\.s\\1, 2\ }, p0, \x1, x2, lsl #?2\ ++** ret ++*/ ++TEST_STORE_ZA (st1_za32_1_w0_p2_index, ++ svst1_ver_za32 (1, w0 + 2, p0, x1 + x2 * 4), ++ svst1_ver_za32 (1, w0 + 2, p0, x1 + x2 * 4)) ++ ++/* ++** st1_za32_0_w0_p4: ++** add (w12-5), w0, #?4 ++** st1w { za0v\.s\\1, 0\ }, p0, \x1\ ++** ret ++*/ ++TEST_STORE_ZA (st1_za32_0_w0_p4, ++ svst1_ver_za32 (0, w0 + 4, p0, x1), ++ svst1_ver_za32 (0, w0 + 4, p0, x1)) ++ ++/* ++** st1_za32_0_w0_m1: ++** sub (w12-5), w0, #?1 ++** st1w { za0v\.s\\1, 0\ }, p0, \x1\ ++** ret ++*/ ++TEST_STORE_ZA (st1_za32_0_w0_m1, ++ svst1_ver_za32 (0, w0 - 1, p0, x1), ++ svst1_ver_za32 (0, w0 - 1, p0, x1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/st1_ver_za64.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/st1_ver_za64.c +new file mode 100644 +index 000000000..0e93f4da3 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/st1_ver_za64.c +@@ -0,0 +1,105 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sme_acle.h" ++ ++/* ++** st1_za64_0_0: ++** mov (w12-5), (?:wzr|#?0) ++** st1d { za0v\.d\\1, 0\ }, p0, \x1\ ++** ret ++*/ ++TEST_STORE_ZA (st1_za64_0_0, ++ svst1_ver_za64 (0, 0, p0, x1), ++ svst1_ver_za64 (0, 0, p0, x1)) ++ ++/* It would also be OK (and perhaps better) to move 0 into a register ++ and use an offset of 1. */ ++/* ++** st1_za64_0_1: ++** mov (w12-5), #?1 ++** st1d { za0v\.d\\1, 0\ }, p0, \x1\ ++** ret ++*/ ++TEST_STORE_ZA (st1_za64_0_1, ++ svst1_ver_za64 (0, 1, p0, x1), ++ svst1_ver_za64 (0, 1, p0, x1)) ++ ++/* ++** st1_za64_0_2: ++** mov (w12-5), #?2 ++** st1d { za0v\.d\\1, 0\ }, p0, \x1\ ++** ret ++*/ ++TEST_STORE_ZA (st1_za64_0_2, ++ svst1_ver_za64 (0, 2, p0, x1), ++ svst1_ver_za64 (0, 2, p0, x1)) ++ ++/* ++** st1_za64_0_w0: ++** mov (w12-5), w0 ++** st1d { za0v\.d\\1, 0\ }, p0, \x1\ ++** ret ++*/ ++TEST_STORE_ZA (st1_za64_0_w0, ++ svst1_ver_za64 (0, w0, p0, x1), ++ svst1_ver_za64 (0, w0, p0, x1)) ++ ++/* ++** st1_za64_0_w0_p1: ++** mov (w12-5), w0 ++** st1d { za0v\.d\\1, 1\ }, p0, \x1\ ++** ret ++*/ ++TEST_STORE_ZA (st1_za64_0_w0_p1, ++ svst1_ver_za64 (0, w0 + 1, p0, x1), ++ svst1_ver_za64 (0, w0 + 1, p0, x1)) ++ ++/* ++** st1_za64_7_w0: ++** mov (w12-5), w0 ++** st1d { za7v\.d\\1, 0\ }, p0, \x1\ ++** ret ++*/ ++TEST_STORE_ZA (st1_za64_7_w0, ++ svst1_ver_za64 (7, w0, p0, x1), ++ svst1_ver_za64 (7, w0, p0, x1)) ++ ++/* ++** st1_za64_7_w0_p1: ++** mov (w12-5), w0 ++** st1d { za7v\.d\\1, 1\ }, p0, \x1\ ++** ret ++*/ ++TEST_STORE_ZA (st1_za64_7_w0_p1, ++ svst1_ver_za64 (7, w0 + 1, p0, x1), ++ svst1_ver_za64 (7, w0 + 1, p0, x1)) ++ ++/* ++** st1_za64_5_w0_p1_index: ++** mov (w12-5), w0 ++** st1d { za5v\.d\\1, 1\ }, p0, \x1, x2, lsl #?3\ ++** ret ++*/ ++TEST_STORE_ZA (st1_za64_5_w0_p1_index, ++ svst1_ver_za64 (5, w0 + 1, p0, x1 + x2 * 8), ++ svst1_ver_za64 (5, w0 + 1, p0, x1 + x2 * 8)) ++ ++/* ++** st1_za64_0_w0_p2: ++** add (w12-5), w0, #?2 ++** st1d { za0v\.d\\1, 0\ }, p0, \x1\ ++** ret ++*/ ++TEST_STORE_ZA (st1_za64_0_w0_p2, ++ svst1_ver_za64 (0, w0 + 2, p0, x1), ++ svst1_ver_za64 (0, w0 + 2, p0, x1)) ++ ++/* ++** st1_za64_0_w0_m1: ++** sub (w12-5), w0, #?1 ++** st1d { za0v\.d\\1, 0\ }, p0, \x1\ ++** ret ++*/ ++TEST_STORE_ZA (st1_za64_0_w0_m1, ++ svst1_ver_za64 (0, w0 - 1, p0, x1), ++ svst1_ver_za64 (0, w0 - 1, p0, x1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/st1_ver_za8.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/st1_ver_za8.c +new file mode 100644 +index 000000000..c76b5c28b +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/st1_ver_za8.c +@@ -0,0 +1,95 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sme_acle.h" ++ ++/* ++** st1_za8_0_0: ++** mov (w12-5), (?:wzr|#?0) ++** st1b { za0v\.b\\1, 0\ }, p0, \x1\ ++** ret ++*/ ++TEST_STORE_ZA (st1_za8_0_0, ++ svst1_ver_za8 (0, 0, p0, x1), ++ svst1_ver_za8 (0, 0, p0, x1)) ++ ++/* It would also be OK (and perhaps better) to move 0 into a register ++ and use an offset of 15. */ ++/* ++** st1_za8_0_15: ++** mov (w12-5), #?15 ++** st1b { za0v\.b\\1, 0\ }, p0, \x1\ ++** ret ++*/ ++TEST_STORE_ZA (st1_za8_0_15, ++ svst1_ver_za8 (0, 15, p0, x1), ++ svst1_ver_za8 (0, 15, p0, x1)) ++ ++/* ++** st1_za8_0_16: ++** mov (w12-5), #?16 ++** st1b { za0v\.b\\1, 0\ }, p0, \x1\ ++** ret ++*/ ++TEST_STORE_ZA (st1_za8_0_16, ++ svst1_ver_za8 (0, 16, p0, x1), ++ svst1_ver_za8 (0, 16, p0, x1)) ++ ++/* ++** st1_za8_0_w0: ++** mov (w12-5), w0 ++** st1b { za0v\.b\\1, 0\ }, p0, \x1\ ++** ret ++*/ ++TEST_STORE_ZA (st1_za8_0_w0, ++ svst1_ver_za8 (0, w0, p0, x1), ++ svst1_ver_za8 (0, w0, p0, x1)) ++ ++/* ++** st1_za8_0_w0_p1: ++** mov (w12-5), w0 ++** st1b { za0v\.b\\1, 1\ }, p0, \x1\ ++** ret ++*/ ++TEST_STORE_ZA (st1_za8_0_w0_p1, ++ svst1_ver_za8 (0, w0 + 1, p0, x1), ++ svst1_ver_za8 (0, w0 + 1, p0, x1)) ++ ++/* ++** st1_za8_0_w0_p15: ++** mov (w12-5), w0 ++** st1b { za0v\.b\\1, 15\ }, p0, \x1\ ++** ret ++*/ ++TEST_STORE_ZA (st1_za8_0_w0_p15, ++ svst1_ver_za8 (0, w0 + 15, p0, x1), ++ svst1_ver_za8 (0, w0 + 15, p0, x1)) ++ ++/* ++** st1_za8_0_w0_p13_index: ++** mov (w12-5), w0 ++** st1b { za0v\.b\\1, 15\ }, p0, \x1, x2\ ++** ret ++*/ ++TEST_STORE_ZA (st1_za8_0_w0_p13_index, ++ svst1_ver_za8 (0, w0 + 15, p0, x1 + x2), ++ svst1_ver_za8 (0, w0 + 15, p0, x1 + x2)) ++ ++/* ++** st1_za8_0_w0_p16: ++** add (w12-5), w0, #?16 ++** st1b { za0v\.b\\1, 0\ }, p0, \x1\ ++** ret ++*/ ++TEST_STORE_ZA (st1_za8_0_w0_p16, ++ svst1_ver_za8 (0, w0 + 16, p0, x1), ++ svst1_ver_za8 (0, w0 + 16, p0, x1)) ++ ++/* ++** st1_za8_0_w0_m1: ++** sub (w12-5), w0, #?1 ++** st1b { za0v\.b\\1, 0\ }, p0, \x1\ ++** ret ++*/ ++TEST_STORE_ZA (st1_za8_0_w0_m1, ++ svst1_ver_za8 (0, w0 - 1, p0, x1), ++ svst1_ver_za8 (0, w0 - 1, p0, x1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/str_vnum_za_s.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/str_vnum_za_s.c +new file mode 100644 +index 000000000..3ef7e0c09 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/str_vnum_za_s.c +@@ -0,0 +1,147 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sme_acle.h" ++ ++/* ++** str_vnum_za_0_0: ++** mov (w12-5), (?:wzr|#?0) ++** str za\\1, 0\, \x1(?:, #0, mul vl)?\ ++** ret ++*/ ++TEST_STORE_ZA (str_vnum_za_0_0, ++ svstr_vnum_za (0, x1, 0), ++ svstr_vnum_za (0, x1, 0)) ++ ++/* ++** str_vnum_za_0_1: ++** mov (w12-5), (?:wzr|#?0) ++** str za\\1, 1\, \x1(?:, #1, mul vl)?\ ++** ret ++*/ ++TEST_STORE_ZA (str_vnum_za_0_1, ++ svstr_vnum_za (0, x1, 1), ++ svstr_vnum_za (0, x1, 1)) ++ ++/* ++** str_vnum_za_1_0: ++** mov (w12-5), #?1 ++** str za\\1, 0\, \x1(?:, #0, mul vl)?\ ++** ret ++*/ ++TEST_STORE_ZA (str_vnum_za_1_0, ++ svstr_vnum_za (1, x1, 0), ++ svstr_vnum_za (1, x1, 0)) ++ ++/* ++** str_vnum_za_1_2: ++** mov (w12-5), #?1 ++** str za\\1, 2\, \x1(?:, #2, mul vl)?\ ++** ret ++*/ ++TEST_STORE_ZA (str_vnum_za_1_2, ++ svstr_vnum_za (1, x1, 2), ++ svstr_vnum_za (1, x1, 2)) ++ ++/* ++** str_vnum_za_w0_0: ++** mov (w12-5), w0 ++** str za\\1, 0\, \x1(?:, #0, mul vl)?\ ++** ret ++*/ ++TEST_STORE_ZA (str_vnum_za_w0_0, ++ svstr_vnum_za (w0, x1, 0), ++ svstr_vnum_za (w0, x1, 0)) ++ ++/* ++** str_vnum_za_w0_1: ++** mov (w12-5), w0 ++** str za\\1, 1\, \x1, #1, mul vl\ ++** ret ++*/ ++TEST_STORE_ZA (str_vnum_za_w0_1, ++ svstr_vnum_za (w0, x1, 1), ++ svstr_vnum_za (w0, x1, 1)) ++ ++/* ++** str_vnum_za_w0_13: ++** mov (w12-5), w0 ++** str za\\1, 13\, \x1, #13, mul vl\ ++** ret ++*/ ++TEST_STORE_ZA (str_vnum_za_w0_13, ++ svstr_vnum_za (w0, x1, 13), ++ svstr_vnum_za (w0, x1, 13)) ++ ++/* ++** str_vnum_za_w0_15: ++** mov (w12-5), w0 ++** str za\\1, 15\, \x1, #15, mul vl\ ++** ret ++*/ ++TEST_STORE_ZA (str_vnum_za_w0_15, ++ svstr_vnum_za (w0, x1, 15), ++ svstr_vnum_za (w0, x1, 15)) ++ ++/* ++** str_vnum_za_w0_16: ++** ( ++** add (w12-5), w0, #?16 ++** incb x1, all, mul #16 ++** str za\\1, 0\, \x1(?:, #0, mul vl)?\ ++** | ++** incb x1, all, mul #16 ++** add (w12-5), w0, #?16 ++** str za\\2, 0\, \x1(?:, #0, mul vl)?\ ++** ) ++** ret ++*/ ++TEST_STORE_ZA (str_vnum_za_w0_16, ++ svstr_vnum_za (w0, x1, 16), ++ svstr_vnum_za (w0, x1, 16)) ++ ++/* ++** str_vnum_za_w0_m1: ++** ( ++** sub (w12-5), w0, #?1 ++** decb x1 ++** str za\\1, 0\, \x1(?:, #0, mul vl)?\ ++** | ++** decb x1 ++** sub (w12-5), w0, #?1 ++** str za\\2, 0\, \x1(?:, #0, mul vl)?\ ++** ) ++** ret ++*/ ++TEST_STORE_ZA (str_vnum_za_w0_m1, ++ svstr_vnum_za (w0, x1, -1), ++ svstr_vnum_za (w0, x1, -1)) ++ ++/* ++** str_vnum_za_w0p1_0: ++** add (w12-5), w0, #?1 ++** str za\\1, 0\, \x1(?:, #0, mul vl)?\ ++** ret ++*/ ++TEST_STORE_ZA (str_vnum_za_w0p1_0, ++ svstr_vnum_za (w0 + 1, x1, 0), ++ svstr_vnum_za (w0 + 1, x1, 0)) ++ ++/* ++** str_vnum_za_w0m1_1: ++** sub (w12-5), w0, #?1 ++** str za\\1, 1\, \x1(?:, #1, mul vl)?\ ++** ret ++*/ ++TEST_STORE_ZA (str_vnum_za_w0m1_1, ++ svstr_vnum_za (w0 - 1, x1, 1), ++ svstr_vnum_za (w0 - 1, x1, 1)) ++ ++/* ++** str_vnum_za_w0p2_3: ++** add (w12-5), w0, #?2 ++** str za\\1, 3\, \x1(?:, #3, mul vl)?\ ++** ret ++*/ ++TEST_STORE_ZA (str_vnum_za_w0p2_3, ++ svstr_vnum_za (w0 + 2, x1, 3), ++ svstr_vnum_za (w0 + 2, x1, 3)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/str_vnum_za_sc.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/str_vnum_za_sc.c +new file mode 100644 +index 000000000..7cd09e67c +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/str_vnum_za_sc.c +@@ -0,0 +1,148 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#define STREAMING_COMPATIBLE ++#include "test_sme_acle.h" ++ ++/* ++** str_vnum_za_0_0: ++** mov (w12-5), (?:wzr|#?0) ++** str za\\1, 0\, \x1(?:, #0, mul vl)?\ ++** ret ++*/ ++TEST_STORE_ZA (str_vnum_za_0_0, ++ svstr_vnum_za (0, x1, 0), ++ svstr_vnum_za (0, x1, 0)) ++ ++/* ++** str_vnum_za_0_1: ++** mov (w12-5), (?:wzr|#?0) ++** str za\\1, 1\, \x1(?:, #1, mul vl)?\ ++** ret ++*/ ++TEST_STORE_ZA (str_vnum_za_0_1, ++ svstr_vnum_za (0, x1, 1), ++ svstr_vnum_za (0, x1, 1)) ++ ++/* ++** str_vnum_za_1_0: ++** mov (w12-5), #?1 ++** str za\\1, 0\, \x1(?:, #0, mul vl)?\ ++** ret ++*/ ++TEST_STORE_ZA (str_vnum_za_1_0, ++ svstr_vnum_za (1, x1, 0), ++ svstr_vnum_za (1, x1, 0)) ++ ++/* ++** str_vnum_za_1_2: ++** mov (w12-5), #?1 ++** str za\\1, 2\, \x1(?:, #2, mul vl)?\ ++** ret ++*/ ++TEST_STORE_ZA (str_vnum_za_1_2, ++ svstr_vnum_za (1, x1, 2), ++ svstr_vnum_za (1, x1, 2)) ++ ++/* ++** str_vnum_za_w0_0: ++** mov (w12-5), w0 ++** str za\\1, 0\, \x1(?:, #0, mul vl)?\ ++** ret ++*/ ++TEST_STORE_ZA (str_vnum_za_w0_0, ++ svstr_vnum_za (w0, x1, 0), ++ svstr_vnum_za (w0, x1, 0)) ++ ++/* ++** str_vnum_za_w0_1: ++** mov (w12-5), w0 ++** str za\\1, 1\, \x1, #1, mul vl\ ++** ret ++*/ ++TEST_STORE_ZA (str_vnum_za_w0_1, ++ svstr_vnum_za (w0, x1, 1), ++ svstr_vnum_za (w0, x1, 1)) ++ ++/* ++** str_vnum_za_w0_13: ++** mov (w12-5), w0 ++** str za\\1, 13\, \x1, #13, mul vl\ ++** ret ++*/ ++TEST_STORE_ZA (str_vnum_za_w0_13, ++ svstr_vnum_za (w0, x1, 13), ++ svstr_vnum_za (w0, x1, 13)) ++ ++/* ++** str_vnum_za_w0_15: ++** mov (w12-5), w0 ++** str za\\1, 15\, \x1, #15, mul vl\ ++** ret ++*/ ++TEST_STORE_ZA (str_vnum_za_w0_15, ++ svstr_vnum_za (w0, x1, 15), ++ svstr_vnum_za (w0, x1, 15)) ++ ++/* ++** str_vnum_za_w0_16: ++** ( ++** add (w12-5), w0, #?16 ++** addsvl (x0-9+), x1, #16 ++** str za\\1, 0\, \\2(?:, #0, mul vl)?\ ++** | ++** addsvl (x0-9+), x1, #16 ++** add (w12-5), w0, #?16 ++** str za\\4, 0\, \\3(?:, #0, mul vl)?\ ++** ) ++** ret ++*/ ++TEST_STORE_ZA (str_vnum_za_w0_16, ++ svstr_vnum_za (w0, x1, 16), ++ svstr_vnum_za (w0, x1, 16)) ++ ++/* ++** str_vnum_za_w0_m1: ++** ( ++** sub (w12-5), w0, #?1 ++** addsvl (x0-9+), x1, #-1 ++** str za\\1, 0\, \\2(?:, #0, mul vl)?\ ++** | ++** addsvl (x0-9+), x1, #-1 ++** sub (w12-5), w0, #?1 ++** str za\\4, 0\, \\3(?:, #0, mul vl)?\ ++** ) ++** ret ++*/ ++TEST_STORE_ZA (str_vnum_za_w0_m1, ++ svstr_vnum_za (w0, x1, -1), ++ svstr_vnum_za (w0, x1, -1)) ++ ++/* ++** str_vnum_za_w0p1_0: ++** add (w12-5), w0, #?1 ++** str za\\1, 0\, \x1(?:, #0, mul vl)?\ ++** ret ++*/ ++TEST_STORE_ZA (str_vnum_za_w0p1_0, ++ svstr_vnum_za (w0 + 1, x1, 0), ++ svstr_vnum_za (w0 + 1, x1, 0)) ++ ++/* ++** str_vnum_za_w0m1_1: ++** sub (w12-5), w0, #?1 ++** str za\\1, 1\, \x1(?:, #1, mul vl)?\ ++** ret ++*/ ++TEST_STORE_ZA (str_vnum_za_w0m1_1, ++ svstr_vnum_za (w0 - 1, x1, 1), ++ svstr_vnum_za (w0 - 1, x1, 1)) ++ ++/* ++** str_vnum_za_w0p2_3: ++** add (w12-5), w0, #?2 ++** str za\\1, 3\, \x1(?:, #3, mul vl)?\ ++** ret ++*/ ++TEST_STORE_ZA (str_vnum_za_w0p2_3, ++ svstr_vnum_za (w0 + 2, x1, 3), ++ svstr_vnum_za (w0 + 2, x1, 3)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/str_za_s.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/str_za_s.c +new file mode 100644 +index 000000000..4d953c596 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/str_za_s.c +@@ -0,0 +1,124 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sme_acle.h" ++ ++/* ++** str_za_0: ++** mov (w12-5), (?:wzr|#?0) ++** str za\\1, 0\, \x1(?:, #0, mul vl)?\ ++** ret ++*/ ++TEST_STORE_ZA (str_za_0, ++ svstr_za (0, x1), ++ svstr_za (0, x1)) ++ ++/* ++** str_za_1: ++** mov (w12-5), #?1 ++** str za\\1, 0\, \x1(?:, #0, mul vl)?\ ++** ret ++*/ ++TEST_STORE_ZA (str_za_1, ++ svstr_za (1, x1), ++ svstr_za (1, x1)) ++ ++/* ++** str_za_w0: ++** mov (w12-5), w0 ++** str za\\1, 0\, \x1(?:, #0, mul vl)?\ ++** ret ++*/ ++TEST_STORE_ZA (str_za_w0, ++ svstr_za (w0, x1), ++ svstr_za (w0, x1)) ++ ++/* ++** str_za_w0_1_vnum: ++** mov (w12-5), w0 ++** str za\\1, 1\, \x1, #1, mul vl\ ++** ret ++*/ ++TEST_STORE_ZA (str_za_w0_1_vnum, ++ svstr_za (w0 + 1, x1 + svcntsb ()), ++ svstr_za (w0 + 1, x1 + svcntsb ())) ++ ++/* ++** str_za_w0_13_vnum: ++** mov (w12-5), w0 ++** str za\\1, 13\, \x1, #13, mul vl\ ++** ret ++*/ ++TEST_STORE_ZA (str_za_w0_13_vnum, ++ svstr_za (w0 + 13, x1 + svcntsb () * 13), ++ svstr_za (w0 + 13, x1 + svcntsb () * 13)) ++ ++/* ++** str_za_w0_15_vnum: ++** mov (w12-5), w0 ++** str za\\1, 15\, \x1, #15, mul vl\ ++** ret ++*/ ++TEST_STORE_ZA (str_za_w0_15_vnum, ++ svstr_za (w0 + 15, x1 + svcntsb () * 15), ++ svstr_za (w0 + 15, x1 + svcntsb () * 15)) ++ ++/* ++** str_za_w0_16_vnum: ++** ( ++** add (w12-5), w0, #?16 ++** incb x1, all, mul #16 ++** str za\\1, 0\, \x1(?:, #0, mul vl)?\ ++** | ++** incb x1, all, mul #16 ++** add (w12-5), w0, #?16 ++** str za\\2, 0\, \x1(?:, #0, mul vl)?\ ++** ) ++** ret ++*/ ++TEST_STORE_ZA (str_za_w0_16_vnum, ++ svstr_za (w0 + 16, x1 + svcntsb () * 16), ++ svstr_za (w0 + 16, x1 + svcntsb () * 16)) ++ ++/* ++** str_za_w0_m1_vnum: ++** ( ++** sub (w12-5), w0, #?1 ++** decb x1 ++** str za\\1, 0\, \x1(?:, #0, mul vl)?\ ++** | ++** decb x1 ++** sub (w12-5), w0, #?1 ++** str za\\2, 0\, \x1(?:, #0, mul vl)?\ ++** ) ++** ret ++*/ ++TEST_STORE_ZA (str_za_w0_m1_vnum, ++ svstr_za (w0 - 1, x1 - svcntsb ()), ++ svstr_za (w0 - 1, x1 - svcntsb ())) ++ ++/* ++** str_za_w0p2: ++** add (w12-5), w0, #?2 ++** str za\\1, 0\, \x1(?:, #0, mul vl)?\ ++** ret ++*/ ++TEST_STORE_ZA (str_za_w0p2, ++ svstr_za (w0 + 2, x1), ++ svstr_za (w0 + 2, x1)) ++ ++/* ++** str_za_offset: ++** ( ++** mov (w12-5), w0 ++** add (x0-9+), x1, #?1 ++** str za\\1, 0\, \\2(?:, #0, mul vl)?\ ++** | ++** add (x0-9+), x1, #?1 ++** mov (w12-5), w0 ++** str za\\4, 0\, \\3(?:, #0, mul vl)?\ ++** ) ++** ret ++*/ ++TEST_STORE_ZA (str_za_offset, ++ svstr_za (w0, x1 + 1), ++ svstr_za (w0, x1 + 1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/str_za_sc.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/str_za_sc.c +new file mode 100644 +index 000000000..3406055e7 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/str_za_sc.c +@@ -0,0 +1,71 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#define STREAMING_COMPATIBLE ++#include "test_sme_acle.h" ++ ++/* ++** str_za_0: ++** mov (w12-5), (?:wzr|#?0) ++** str za\\1, 0\, \x1(?:, #0, mul vl)?\ ++** ret ++*/ ++TEST_STORE_ZA (str_za_0, ++ svstr_za (0, x1), ++ svstr_za (0, x1)) ++ ++/* ++** str_za_1: ++** mov (w12-5), #?1 ++** str za\\1, 0\, \x1(?:, #0, mul vl)?\ ++** ret ++*/ ++TEST_STORE_ZA (str_za_1, ++ svstr_za (1, x1), ++ svstr_za (1, x1)) ++ ++/* ++** str_za_w0: ++** mov (w12-5), w0 ++** str za\\1, 0\, \x1(?:, #0, mul vl)?\ ++** ret ++*/ ++TEST_STORE_ZA (str_za_w0, ++ svstr_za (w0, x1), ++ svstr_za (w0, x1)) ++ ++/* ++** str_za_w0_1_vnum: ++** mov (w12-5), w0 ++** str za\\1, 1\, \x1, #1, mul vl\ ++** ret ++*/ ++TEST_STORE_ZA (str_za_w0_1_vnum, ++ svstr_za (w0 + 1, x1 + svcntsb ()), ++ svstr_za (w0 + 1, x1 + svcntsb ())) ++ ++/* ++** str_za_w0p2: ++** add (w12-5), w0, #?2 ++** str za\\1, 0\, \x1(?:, #0, mul vl)?\ ++** ret ++*/ ++TEST_STORE_ZA (str_za_w0p2, ++ svstr_za (w0 + 2, x1), ++ svstr_za (w0 + 2, x1)) ++ ++/* ++** str_za_offset: ++** ( ++** mov (w12-5), w0 ++** add (x0-9+), x1, #?1 ++** str za\\1, 0\, \\2(?:, #0, mul vl)?\ ++** | ++** add (x0-9+), x1, #?1 ++** mov (w12-5), w0 ++** str za\\4, 0\, \\3(?:, #0, mul vl)?\ ++** ) ++** ret ++*/ ++TEST_STORE_ZA (str_za_offset, ++ svstr_za (w0, x1 + 1), ++ svstr_za (w0, x1 + 1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/sumopa_za32.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/sumopa_za32.c +new file mode 100644 +index 000000000..9dd66f722 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/sumopa_za32.c +@@ -0,0 +1,30 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sme_acle.h" ++ ++/* ++** sumopa_za32_s8_0_p0_p1_z0_z4: ++** sumopa za0\.s, p0/m, p1/m, z0\.b, z4\.b ++** ret ++*/ ++TEST_DUAL_ZA (sumopa_za32_s8_0_p0_p1_z0_z4, svint8_t, svuint8_t, ++ svsumopa_za32_s8_m (0, p0, p1, z0, z4), ++ svsumopa_za32_m (0, p0, p1, z0, z4)) ++ ++/* ++** sumopa_za32_s8_0_p1_p0_z4_z0: ++** sumopa za0\.s, p1/m, p0/m, z4\.b, z0\.b ++** ret ++*/ ++TEST_DUAL_ZA (sumopa_za32_s8_0_p1_p0_z4_z0, svuint8_t, svint8_t, ++ svsumopa_za32_s8_m (0, p1, p0, z4, z0), ++ svsumopa_za32_m (0, p1, p0, z4, z0)) ++ ++/* ++** sumopa_za32_s8_3_p0_p1_z0_z4: ++** sumopa za3\.s, p0/m, p1/m, z0\.b, z4\.b ++** ret ++*/ ++TEST_DUAL_ZA (sumopa_za32_s8_3_p0_p1_z0_z4, svint8_t, svuint8_t, ++ svsumopa_za32_s8_m (3, p0, p1, z0, z4), ++ svsumopa_za32_m (3, p0, p1, z0, z4)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/sumopa_za64.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/sumopa_za64.c +new file mode 100644 +index 000000000..2a78ab85d +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/sumopa_za64.c +@@ -0,0 +1,32 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sme_acle.h" ++ ++#pragma GCC target "+sme-i16i64" ++ ++/* ++** sumopa_za64_s16_0_p0_p1_z0_z4: ++** sumopa za0\.d, p0/m, p1/m, z0\.h, z4\.h ++** ret ++*/ ++TEST_DUAL_ZA (sumopa_za64_s16_0_p0_p1_z0_z4, svint16_t, svuint16_t, ++ svsumopa_za64_s16_m (0, p0, p1, z0, z4), ++ svsumopa_za64_m (0, p0, p1, z0, z4)) ++ ++/* ++** sumopa_za64_s16_0_p1_p0_z4_z0: ++** sumopa za0\.d, p1/m, p0/m, z4\.h, z0\.h ++** ret ++*/ ++TEST_DUAL_ZA (sumopa_za64_s16_0_p1_p0_z4_z0, svuint16_t, svint16_t, ++ svsumopa_za64_s16_m (0, p1, p0, z4, z0), ++ svsumopa_za64_m (0, p1, p0, z4, z0)) ++ ++/* ++** sumopa_za64_s16_7_p0_p1_z0_z4: ++** sumopa za7\.d, p0/m, p1/m, z0\.h, z4\.h ++** ret ++*/ ++TEST_DUAL_ZA (sumopa_za64_s16_7_p0_p1_z0_z4, svint16_t, svuint16_t, ++ svsumopa_za64_s16_m (7, p0, p1, z0, z4), ++ svsumopa_za64_m (7, p0, p1, z0, z4)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/sumops_za32.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/sumops_za32.c +new file mode 100644 +index 000000000..55cb92d1b +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/sumops_za32.c +@@ -0,0 +1,30 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sme_acle.h" ++ ++/* ++** sumops_za32_s8_0_p0_p1_z0_z4: ++** sumops za0\.s, p0/m, p1/m, z0\.b, z4\.b ++** ret ++*/ ++TEST_DUAL_ZA (sumops_za32_s8_0_p0_p1_z0_z4, svint8_t, svuint8_t, ++ svsumops_za32_s8_m (0, p0, p1, z0, z4), ++ svsumops_za32_m (0, p0, p1, z0, z4)) ++ ++/* ++** sumops_za32_s8_0_p1_p0_z4_z0: ++** sumops za0\.s, p1/m, p0/m, z4\.b, z0\.b ++** ret ++*/ ++TEST_DUAL_ZA (sumops_za32_s8_0_p1_p0_z4_z0, svuint8_t, svint8_t, ++ svsumops_za32_s8_m (0, p1, p0, z4, z0), ++ svsumops_za32_m (0, p1, p0, z4, z0)) ++ ++/* ++** sumops_za32_s8_3_p0_p1_z0_z4: ++** sumops za3\.s, p0/m, p1/m, z0\.b, z4\.b ++** ret ++*/ ++TEST_DUAL_ZA (sumops_za32_s8_3_p0_p1_z0_z4, svint8_t, svuint8_t, ++ svsumops_za32_s8_m (3, p0, p1, z0, z4), ++ svsumops_za32_m (3, p0, p1, z0, z4)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/sumops_za64.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/sumops_za64.c +new file mode 100644 +index 000000000..910a45b29 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/sumops_za64.c +@@ -0,0 +1,32 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sme_acle.h" ++ ++#pragma GCC target "+sme-i16i64" ++ ++/* ++** sumops_za64_s16_0_p0_p1_z0_z4: ++** sumops za0\.d, p0/m, p1/m, z0\.h, z4\.h ++** ret ++*/ ++TEST_DUAL_ZA (sumops_za64_s16_0_p0_p1_z0_z4, svint16_t, svuint16_t, ++ svsumops_za64_s16_m (0, p0, p1, z0, z4), ++ svsumops_za64_m (0, p0, p1, z0, z4)) ++ ++/* ++** sumops_za64_s16_0_p1_p0_z4_z0: ++** sumops za0\.d, p1/m, p0/m, z4\.h, z0\.h ++** ret ++*/ ++TEST_DUAL_ZA (sumops_za64_s16_0_p1_p0_z4_z0, svuint16_t, svint16_t, ++ svsumops_za64_s16_m (0, p1, p0, z4, z0), ++ svsumops_za64_m (0, p1, p0, z4, z0)) ++ ++/* ++** sumops_za64_s16_7_p0_p1_z0_z4: ++** sumops za7\.d, p0/m, p1/m, z0\.h, z4\.h ++** ret ++*/ ++TEST_DUAL_ZA (sumops_za64_s16_7_p0_p1_z0_z4, svint16_t, svuint16_t, ++ svsumops_za64_s16_m (7, p0, p1, z0, z4), ++ svsumops_za64_m (7, p0, p1, z0, z4)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/test_sme_acle.h b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/test_sme_acle.h +new file mode 100644 +index 000000000..aaadab2f7 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/test_sme_acle.h +@@ -0,0 +1,62 @@ ++#ifndef TEST_SME_ACLE_H ++#define TEST_SME_ACLE_H 1 ++ ++#if (!defined(STREAMING_COMPATIBLE) \ ++ && !defined(NON_STREAMING) \ ++ && !defined(STREAMING)) ++#define STREAMING ++#endif ++ ++#if !defined(NO_SHARED_ZA) ++#define SHARED_ZA ++#endif ++ ++#include "../../sve/acle/asm/test_sve_acle.h" ++ ++#include <arm_sme.h> ++ ++#define TEST_LOAD_ZA(NAME, CODE1, CODE2) \ ++ PROTO (NAME, void, (svbool_t p0, int32_t w0, const char *x1, \ ++ uint64_t x2)) \ ++ { \ ++ INVOKE (CODE1, CODE2); \ ++ } ++ ++#define TEST_STORE_ZA(NAME, CODE1, CODE2) \ ++ PROTO (NAME, void, (svbool_t p0, int32_t w0, char *x1, \ ++ uint64_t x2)) \ ++ { \ ++ INVOKE (CODE1, CODE2); \ ++ } ++ ++#define TEST_READ_ZA(NAME, TYPE, CODE1, CODE2) \ ++ PROTO (NAME, TYPE, (TYPE z0, TYPE z1, svbool_t p0, \ ++ int32_t w0)) \ ++ { \ ++ INVOKE (CODE1, CODE2); \ ++ return z0; \ ++ } ++ ++#define TEST_WRITE_ZA(NAME, TYPE, CODE1, CODE2) \ ++ PROTO (NAME, void, (TYPE z0, TYPE z1, svbool_t p0, \ ++ int32_t w0)) \ ++ { \ ++ INVOKE (CODE1, CODE2); \ ++ } ++ ++#define TEST_UNIFORM_ZA(NAME, TYPE, CODE1, CODE2) \ ++ PROTO (NAME, void, (TYPE z0, TYPE z1, svbool_t p0, \ ++ svbool_t p1)) \ ++ { \ ++ INVOKE (CODE1, CODE2); \ ++ } ++ ++#define TEST_DUAL_ZA(NAME, TYPE1, TYPE2, CODE1, CODE2) \ ++ PROTO (NAME, void, (TYPE1 z0, TYPE1 z1, TYPE1 z2, TYPE1 z3, \ ++ TYPE2 z4, TYPE2 z5, TYPE2 z6, TYPE2 z7, \ ++ svbool_t p0, svbool_t p1)) \ ++ { \ ++ INVOKE (CODE1, CODE2); \ ++ } ++ ++#endif +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/undef_za.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/undef_za.c +new file mode 100644 +index 000000000..5474328fb +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/undef_za.c +@@ -0,0 +1,33 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#define STREAMING_COMPATIBLE ++#include "test_sme_acle.h" ++ ++/* ++** undef_za_1: ++** ret ++*/ ++PROTO (undef_za_1, void, ()) { svundef_za (); } ++ ++/* ++** undef_za_2: ++** ret ++*/ ++PROTO (undef_za_2, void, ()) ++{ ++ svzero_za (); ++ svundef_za (); ++} ++ ++/* ++** undef_za_3: ++** mov (w12-5), (?:wzr|#?0) ++** str za\\1, 0\, \x0(?:, #0, mul vl)\ ++** ret ++*/ ++PROTO (undef_za_3, void, (void *ptr)) ++{ ++ svzero_za (); ++ svundef_za (); ++ svstr_za (0, ptr); ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/usmopa_za32.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/usmopa_za32.c +new file mode 100644 +index 000000000..bbc0b6c11 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/usmopa_za32.c +@@ -0,0 +1,30 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sme_acle.h" ++ ++/* ++** usmopa_za32_u8_0_p0_p1_z0_z4: ++** usmopa za0\.s, p0/m, p1/m, z0\.b, z4\.b ++** ret ++*/ ++TEST_DUAL_ZA (usmopa_za32_u8_0_p0_p1_z0_z4, svuint8_t, svint8_t, ++ svusmopa_za32_u8_m (0, p0, p1, z0, z4), ++ svusmopa_za32_m (0, p0, p1, z0, z4)) ++ ++/* ++** usmopa_za32_u8_0_p1_p0_z4_z0: ++** usmopa za0\.s, p1/m, p0/m, z4\.b, z0\.b ++** ret ++*/ ++TEST_DUAL_ZA (usmopa_za32_u8_0_p1_p0_z4_z0, svint8_t, svuint8_t, ++ svusmopa_za32_u8_m (0, p1, p0, z4, z0), ++ svusmopa_za32_m (0, p1, p0, z4, z0)) ++ ++/* ++** usmopa_za32_u8_3_p0_p1_z0_z4: ++** usmopa za3\.s, p0/m, p1/m, z0\.b, z4\.b ++** ret ++*/ ++TEST_DUAL_ZA (usmopa_za32_u8_3_p0_p1_z0_z4, svuint8_t, svint8_t, ++ svusmopa_za32_u8_m (3, p0, p1, z0, z4), ++ svusmopa_za32_m (3, p0, p1, z0, z4)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/usmopa_za64.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/usmopa_za64.c +new file mode 100644 +index 000000000..64ee25bc7 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/usmopa_za64.c +@@ -0,0 +1,32 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sme_acle.h" ++ ++#pragma GCC target "+sme-i16i64" ++ ++/* ++** usmopa_za64_u16_0_p0_p1_z0_z4: ++** usmopa za0\.d, p0/m, p1/m, z0\.h, z4\.h ++** ret ++*/ ++TEST_DUAL_ZA (usmopa_za64_u16_0_p0_p1_z0_z4, svuint16_t, svint16_t, ++ svusmopa_za64_u16_m (0, p0, p1, z0, z4), ++ svusmopa_za64_m (0, p0, p1, z0, z4)) ++ ++/* ++** usmopa_za64_u16_0_p1_p0_z4_z0: ++** usmopa za0\.d, p1/m, p0/m, z4\.h, z0\.h ++** ret ++*/ ++TEST_DUAL_ZA (usmopa_za64_u16_0_p1_p0_z4_z0, svint16_t, svuint16_t, ++ svusmopa_za64_u16_m (0, p1, p0, z4, z0), ++ svusmopa_za64_m (0, p1, p0, z4, z0)) ++ ++/* ++** usmopa_za64_u16_7_p0_p1_z0_z4: ++** usmopa za7\.d, p0/m, p1/m, z0\.h, z4\.h ++** ret ++*/ ++TEST_DUAL_ZA (usmopa_za64_u16_7_p0_p1_z0_z4, svuint16_t, svint16_t, ++ svusmopa_za64_u16_m (7, p0, p1, z0, z4), ++ svusmopa_za64_m (7, p0, p1, z0, z4)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/usmops_za32.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/usmops_za32.c +new file mode 100644 +index 000000000..98fd33157 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/usmops_za32.c +@@ -0,0 +1,30 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sme_acle.h" ++ ++/* ++** usmops_za32_u8_0_p0_p1_z0_z4: ++** usmops za0\.s, p0/m, p1/m, z0\.b, z4\.b ++** ret ++*/ ++TEST_DUAL_ZA (usmops_za32_u8_0_p0_p1_z0_z4, svuint8_t, svint8_t, ++ svusmops_za32_u8_m (0, p0, p1, z0, z4), ++ svusmops_za32_m (0, p0, p1, z0, z4)) ++ ++/* ++** usmops_za32_u8_0_p1_p0_z4_z0: ++** usmops za0\.s, p1/m, p0/m, z4\.b, z0\.b ++** ret ++*/ ++TEST_DUAL_ZA (usmops_za32_u8_0_p1_p0_z4_z0, svint8_t, svuint8_t, ++ svusmops_za32_u8_m (0, p1, p0, z4, z0), ++ svusmops_za32_m (0, p1, p0, z4, z0)) ++ ++/* ++** usmops_za32_u8_3_p0_p1_z0_z4: ++** usmops za3\.s, p0/m, p1/m, z0\.b, z4\.b ++** ret ++*/ ++TEST_DUAL_ZA (usmops_za32_u8_3_p0_p1_z0_z4, svuint8_t, svint8_t, ++ svusmops_za32_u8_m (3, p0, p1, z0, z4), ++ svusmops_za32_m (3, p0, p1, z0, z4)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/usmops_za64.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/usmops_za64.c +new file mode 100644 +index 000000000..e20cdab41 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/usmops_za64.c +@@ -0,0 +1,32 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sme_acle.h" ++ ++#pragma GCC target "+sme-i16i64" ++ ++/* ++** usmops_za64_u16_0_p0_p1_z0_z4: ++** usmops za0\.d, p0/m, p1/m, z0\.h, z4\.h ++** ret ++*/ ++TEST_DUAL_ZA (usmops_za64_u16_0_p0_p1_z0_z4, svuint16_t, svint16_t, ++ svusmops_za64_u16_m (0, p0, p1, z0, z4), ++ svusmops_za64_m (0, p0, p1, z0, z4)) ++ ++/* ++** usmops_za64_u16_0_p1_p0_z4_z0: ++** usmops za0\.d, p1/m, p0/m, z4\.h, z0\.h ++** ret ++*/ ++TEST_DUAL_ZA (usmops_za64_u16_0_p1_p0_z4_z0, svint16_t, svuint16_t, ++ svusmops_za64_u16_m (0, p1, p0, z4, z0), ++ svusmops_za64_m (0, p1, p0, z4, z0)) ++ ++/* ++** usmops_za64_u16_7_p0_p1_z0_z4: ++** usmops za7\.d, p0/m, p1/m, z0\.h, z4\.h ++** ret ++*/ ++TEST_DUAL_ZA (usmops_za64_u16_7_p0_p1_z0_z4, svuint16_t, svint16_t, ++ svusmops_za64_u16_m (7, p0, p1, z0, z4), ++ svusmops_za64_m (7, p0, p1, z0, z4)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/write_hor_za128.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/write_hor_za128.c +new file mode 100644 +index 000000000..119a2535e +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/write_hor_za128.c +@@ -0,0 +1,193 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sme_acle.h" ++ ++/* ++** write_za128_s8_0_0_z0: ++** mov (w12-5), (?:wzr|#?0) ++** mova za0h\.q\\1, 0\, p0/m, z0\.q ++** ret ++*/ ++TEST_WRITE_ZA (write_za128_s8_0_0_z0, svint8_t, ++ svwrite_hor_za128_s8_m (0, 0, p0, z0), ++ svwrite_hor_za128_m (0, 0, p0, z0)) ++ ++/* ++** write_za128_s8_0_1_z0: ++** mov (w12-5), #?1 ++** mova za0h\.q\\1, 0\, p0/m, z0\.q ++** ret ++*/ ++TEST_WRITE_ZA (write_za128_s8_0_1_z0, svint8_t, ++ svwrite_hor_za128_s8_m (0, 1, p0, z0), ++ svwrite_hor_za128_m (0, 1, p0, z0)) ++ ++/* ++** write_za128_s8_0_w0_z0: ++** mov (w12-5), w0 ++** mova za0h\.q\\1, 0\, p0/m, z0\.q ++** ret ++*/ ++TEST_WRITE_ZA (write_za128_s8_0_w0_z0, svint8_t, ++ svwrite_hor_za128_s8_m (0, w0, p0, z0), ++ svwrite_hor_za128_m (0, w0, p0, z0)) ++ ++/* ++** write_za128_s8_0_w0p1_z0: ++** add (w12-5), w0, #?1 ++** mova za0h\.q\\1, 0\, p0/m, z0\.q ++** ret ++*/ ++TEST_WRITE_ZA (write_za128_s8_0_w0p1_z0, svint8_t, ++ svwrite_hor_za128_s8_m (0, w0 + 1, p0, z0), ++ svwrite_hor_za128_m (0, w0 + 1, p0, z0)) ++ ++/* ++** write_za128_s8_0_w0m1_z0: ++** sub (w12-5), w0, #?1 ++** mova za0h\.q\\1, 0\, p0/m, z0\.q ++** ret ++*/ ++TEST_WRITE_ZA (write_za128_s8_0_w0m1_z0, svint8_t, ++ svwrite_hor_za128_s8_m (0, w0 - 1, p0, z0), ++ svwrite_hor_za128_m (0, w0 - 1, p0, z0)) ++ ++/* ++** write_za128_s8_1_w0_z0: ++** mov (w12-5), w0 ++** mova za1h\.q\\1, 0\, p0/m, z0\.q ++** ret ++*/ ++TEST_WRITE_ZA (write_za128_s8_1_w0_z0, svint8_t, ++ svwrite_hor_za128_s8_m (1, w0, p0, z0), ++ svwrite_hor_za128_m (1, w0, p0, z0)) ++ ++/* ++** write_za128_s8_15_w0_z0: ++** mov (w12-5), w0 ++** mova za15h\.q\\1, 0\, p0/m, z0\.q ++** ret ++*/ ++TEST_WRITE_ZA (write_za128_s8_15_w0_z0, svint8_t, ++ svwrite_hor_za128_s8_m (15, w0, p0, z0), ++ svwrite_hor_za128_m (15, w0, p0, z0)) ++ ++/* ++** write_za128_s8_0_w0_z1: ++** mov (w12-5), w0 ++** mova za0h\.q\\1, 0\, p0/m, z1\.q ++** ret ++*/ ++TEST_WRITE_ZA (write_za128_s8_0_w0_z1, svint8_t, ++ svwrite_hor_za128_s8_m (0, w0, p0, z1), ++ svwrite_hor_za128_m (0, w0, p0, z1)) ++ ++/* ++** write_za128_u8_0_w0_z0: ++** mov (w12-5), w0 ++** mova za0h\.q\\1, 0\, p0/m, z0\.q ++** ret ++*/ ++TEST_WRITE_ZA (write_za128_u8_0_w0_z0, svuint8_t, ++ svwrite_hor_za128_u8_m (0, w0, p0, z0), ++ svwrite_hor_za128_m (0, w0, p0, z0)) ++ ++/* ++** write_za128_s16_0_w0_z0: ++** mov (w12-5), w0 ++** mova za0h\.q\\1, 0\, p0/m, z0\.q ++** ret ++*/ ++TEST_WRITE_ZA (write_za128_s16_0_w0_z0, svint16_t, ++ svwrite_hor_za128_s16_m (0, w0, p0, z0), ++ svwrite_hor_za128_m (0, w0, p0, z0)) ++ ++/* ++** write_za128_u16_0_w0_z0: ++** mov (w12-5), w0 ++** mova za0h\.q\\1, 0\, p0/m, z0\.q ++** ret ++*/ ++TEST_WRITE_ZA (write_za128_u16_0_w0_z0, svuint16_t, ++ svwrite_hor_za128_u16_m (0, w0, p0, z0), ++ svwrite_hor_za128_m (0, w0, p0, z0)) ++ ++/* ++** write_za128_f16_0_w0_z0: ++** mov (w12-5), w0 ++** mova za0h\.q\\1, 0\, p0/m, z0\.q ++** ret ++*/ ++TEST_WRITE_ZA (write_za128_f16_0_w0_z0, svfloat16_t, ++ svwrite_hor_za128_f16_m (0, w0, p0, z0), ++ svwrite_hor_za128_m (0, w0, p0, z0)) ++ ++/* ++** write_za128_bf16_0_w0_z0: ++** mov (w12-5), w0 ++** mova za0h\.q\\1, 0\, p0/m, z0\.q ++** ret ++*/ ++TEST_WRITE_ZA (write_za128_bf16_0_w0_z0, svbfloat16_t, ++ svwrite_hor_za128_bf16_m (0, w0, p0, z0), ++ svwrite_hor_za128_m (0, w0, p0, z0)) ++ ++/* ++** write_za128_s32_0_w0_z0: ++** mov (w12-5), w0 ++** mova za0h\.q\\1, 0\, p0/m, z0\.q ++** ret ++*/ ++TEST_WRITE_ZA (write_za128_s32_0_w0_z0, svint32_t, ++ svwrite_hor_za128_s32_m (0, w0, p0, z0), ++ svwrite_hor_za128_m (0, w0, p0, z0)) ++ ++/* ++** write_za128_u32_0_w0_z0: ++** mov (w12-5), w0 ++** mova za0h\.q\\1, 0\, p0/m, z0\.q ++** ret ++*/ ++TEST_WRITE_ZA (write_za128_u32_0_w0_z0, svuint32_t, ++ svwrite_hor_za128_u32_m (0, w0, p0, z0), ++ svwrite_hor_za128_m (0, w0, p0, z0)) ++ ++/* ++** write_za128_f32_0_w0_z0: ++** mov (w12-5), w0 ++** mova za0h\.q\\1, 0\, p0/m, z0\.q ++** ret ++*/ ++TEST_WRITE_ZA (write_za128_f32_0_w0_z0, svfloat32_t, ++ svwrite_hor_za128_f32_m (0, w0, p0, z0), ++ svwrite_hor_za128_m (0, w0, p0, z0)) ++ ++/* ++** write_za128_s64_0_w0_z0: ++** mov (w12-5), w0 ++** mova za0h\.q\\1, 0\, p0/m, z0\.q ++** ret ++*/ ++TEST_WRITE_ZA (write_za128_s64_0_w0_z0, svint64_t, ++ svwrite_hor_za128_s64_m (0, w0, p0, z0), ++ svwrite_hor_za128_m (0, w0, p0, z0)) ++ ++/* ++** write_za128_u64_0_w0_z0: ++** mov (w12-5), w0 ++** mova za0h\.q\\1, 0\, p0/m, z0\.q ++** ret ++*/ ++TEST_WRITE_ZA (write_za128_u64_0_w0_z0, svuint64_t, ++ svwrite_hor_za128_u64_m (0, w0, p0, z0), ++ svwrite_hor_za128_m (0, w0, p0, z0)) ++ ++/* ++** write_za128_f64_0_w0_z0: ++** mov (w12-5), w0 ++** mova za0h\.q\\1, 0\, p0/m, z0\.q ++** ret ++*/ ++TEST_WRITE_ZA (write_za128_f64_0_w0_z0, svfloat64_t, ++ svwrite_hor_za128_f64_m (0, w0, p0, z0), ++ svwrite_hor_za128_m (0, w0, p0, z0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/write_hor_za16.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/write_hor_za16.c +new file mode 100644 +index 000000000..c8f13f7bc +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/write_hor_za16.c +@@ -0,0 +1,133 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sme_acle.h" ++ ++/* ++** write_za16_s16_0_0_z0: ++** mov (w12-5), (?:wzr|#?0) ++** mova za0h\.h\\1, 0\, p0/m, z0\.h ++** ret ++*/ ++TEST_WRITE_ZA (write_za16_s16_0_0_z0, svint16_t, ++ svwrite_hor_za16_s16_m (0, 0, p0, z0), ++ svwrite_hor_za16_m (0, 0, p0, z0)) ++ ++/* ++** write_za16_s16_0_1_z0: ++** mov (w12-5), #?1 ++** mova za0h\.h\\1, 0\, p0/m, z0\.h ++** ret ++*/ ++TEST_WRITE_ZA (write_za16_s16_0_1_z0, svint16_t, ++ svwrite_hor_za16_s16_m (0, 1, p0, z0), ++ svwrite_hor_za16_m (0, 1, p0, z0)) ++ ++/* ++** write_za16_s16_0_w0_z0: ++** mov (w12-5), w0 ++** mova za0h\.h\\1, 0\, p0/m, z0\.h ++** ret ++*/ ++TEST_WRITE_ZA (write_za16_s16_0_w0_z0, svint16_t, ++ svwrite_hor_za16_s16_m (0, w0, p0, z0), ++ svwrite_hor_za16_m (0, w0, p0, z0)) ++ ++/* ++** write_za16_s16_0_w0p1_z0: ++** mov (w12-5), w0 ++** mova za0h\.h\\1, 1\, p0/m, z0\.h ++** ret ++*/ ++TEST_WRITE_ZA (write_za16_s16_0_w0p1_z0, svint16_t, ++ svwrite_hor_za16_s16_m (0, w0 + 1, p0, z0), ++ svwrite_hor_za16_m (0, w0 + 1, p0, z0)) ++ ++/* ++** write_za16_s16_0_w0p7_z0: ++** mov (w12-5), w0 ++** mova za0h\.h\\1, 7\, p0/m, z0\.h ++** ret ++*/ ++TEST_WRITE_ZA (write_za16_s16_0_w0p7_z0, svint16_t, ++ svwrite_hor_za16_s16_m (0, w0 + 7, p0, z0), ++ svwrite_hor_za16_m (0, w0 + 7, p0, z0)) ++ ++/* ++** write_za16_s16_0_w0p8_z0: ++** add (w12-5), w0, #?8 ++** mova za0h\.h\\1, 0\, p0/m, z0\.h ++** ret ++*/ ++TEST_WRITE_ZA (write_za16_s16_0_w0p8_z0, svint16_t, ++ svwrite_hor_za16_s16_m (0, w0 + 8, p0, z0), ++ svwrite_hor_za16_m (0, w0 + 8, p0, z0)) ++ ++/* ++** write_za16_s16_0_w0m1_z0: ++** sub (w12-5), w0, #?1 ++** mova za0h\.h\\1, 0\, p0/m, z0\.h ++** ret ++*/ ++TEST_WRITE_ZA (write_za16_s16_0_w0m1_z0, svint16_t, ++ svwrite_hor_za16_s16_m (0, w0 - 1, p0, z0), ++ svwrite_hor_za16_m (0, w0 - 1, p0, z0)) ++ ++/* ++** write_za16_s16_1_w0_z0: ++** mov (w12-5), w0 ++** mova za1h\.h\\1, 0\, p0/m, z0\.h ++** ret ++*/ ++TEST_WRITE_ZA (write_za16_s16_1_w0_z0, svint16_t, ++ svwrite_hor_za16_s16_m (1, w0, p0, z0), ++ svwrite_hor_za16_m (1, w0, p0, z0)) ++ ++/* ++** write_za16_s16_1_w0p7_z0: ++** mov (w12-5), w0 ++** mova za1h\.h\\1, 7\, p0/m, z0\.h ++** ret ++*/ ++TEST_WRITE_ZA (write_za16_s16_1_w0p7_z0, svint16_t, ++ svwrite_hor_za16_s16_m (1, w0 + 7, p0, z0), ++ svwrite_hor_za16_m (1, w0 + 7, p0, z0)) ++ ++/* ++** write_za16_s16_0_w0_z1: ++** mov (w12-5), w0 ++** mova za0h\.h\\1, 0\, p0/m, z1\.h ++** ret ++*/ ++TEST_WRITE_ZA (write_za16_s16_0_w0_z1, svint16_t, ++ svwrite_hor_za16_s16_m (0, w0, p0, z1), ++ svwrite_hor_za16_m (0, w0, p0, z1)) ++ ++/* ++** write_za16_u16_0_w0_z0: ++** mov (w12-5), w0 ++** mova za0h\.h\\1, 0\, p0/m, z0\.h ++** ret ++*/ ++TEST_WRITE_ZA (write_za16_u16_0_w0_z0, svuint16_t, ++ svwrite_hor_za16_u16_m (0, w0, p0, z0), ++ svwrite_hor_za16_m (0, w0, p0, z0)) ++ ++/* ++** write_za16_f16_0_w0_z0: ++** mov (w12-5), w0 ++** mova za0h\.h\\1, 0\, p0/m, z0\.h ++** ret ++*/ ++TEST_WRITE_ZA (write_za16_f16_0_w0_z0, svfloat16_t, ++ svwrite_hor_za16_f16_m (0, w0, p0, z0), ++ svwrite_hor_za16_m (0, w0, p0, z0)) ++ ++/* ++** write_za16_bf16_0_w0_z0: ++** mov (w12-5), w0 ++** mova za0h\.h\\1, 0\, p0/m, z0\.h ++** ret ++*/ ++TEST_WRITE_ZA (write_za16_bf16_0_w0_z0, svbfloat16_t, ++ svwrite_hor_za16_bf16_m (0, w0, p0, z0), ++ svwrite_hor_za16_m (0, w0, p0, z0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/write_hor_za32.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/write_hor_za32.c +new file mode 100644 +index 000000000..ea2f5ae89 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/write_hor_za32.c +@@ -0,0 +1,143 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sme_acle.h" ++ ++/* ++** write_za32_s32_0_0_z0: ++** mov (w12-5), (?:wzr|#?0) ++** mova za0h\.s\\1, 0\, p0/m, z0\.s ++** ret ++*/ ++TEST_WRITE_ZA (write_za32_s32_0_0_z0, svint32_t, ++ svwrite_hor_za32_s32_m (0, 0, p0, z0), ++ svwrite_hor_za32_m (0, 0, p0, z0)) ++ ++/* ++** write_za32_s32_0_1_z0: ++** mov (w12-5), #?1 ++** mova za0h\.s\\1, 0\, p0/m, z0\.s ++** ret ++*/ ++TEST_WRITE_ZA (write_za32_s32_0_1_z0, svint32_t, ++ svwrite_hor_za32_s32_m (0, 1, p0, z0), ++ svwrite_hor_za32_m (0, 1, p0, z0)) ++ ++/* ++** write_za32_s32_0_w0_z0: ++** mov (w12-5), w0 ++** mova za0h\.s\\1, 0\, p0/m, z0\.s ++** ret ++*/ ++TEST_WRITE_ZA (write_za32_s32_0_w0_z0, svint32_t, ++ svwrite_hor_za32_s32_m (0, w0, p0, z0), ++ svwrite_hor_za32_m (0, w0, p0, z0)) ++ ++/* ++** write_za32_s32_0_w0p1_z0: ++** mov (w12-5), w0 ++** mova za0h\.s\\1, 1\, p0/m, z0\.s ++** ret ++*/ ++TEST_WRITE_ZA (write_za32_s32_0_w0p1_z0, svint32_t, ++ svwrite_hor_za32_s32_m (0, w0 + 1, p0, z0), ++ svwrite_hor_za32_m (0, w0 + 1, p0, z0)) ++ ++/* ++** write_za32_s32_0_w0p3_z0: ++** mov (w12-5), w0 ++** mova za0h\.s\\1, 3\, p0/m, z0\.s ++** ret ++*/ ++TEST_WRITE_ZA (write_za32_s32_0_w0p3_z0, svint32_t, ++ svwrite_hor_za32_s32_m (0, w0 + 3, p0, z0), ++ svwrite_hor_za32_m (0, w0 + 3, p0, z0)) ++ ++/* ++** write_za32_s32_0_w0p4_z0: ++** add (w12-5), w0, #?4 ++** mova za0h\.s\\1, 0\, p0/m, z0\.s ++** ret ++*/ ++TEST_WRITE_ZA (write_za32_s32_0_w0p4_z0, svint32_t, ++ svwrite_hor_za32_s32_m (0, w0 + 4, p0, z0), ++ svwrite_hor_za32_m (0, w0 + 4, p0, z0)) ++ ++/* ++** write_za32_s32_0_w0m1_z0: ++** sub (w12-5), w0, #?1 ++** mova za0h\.s\\1, 0\, p0/m, z0\.s ++** ret ++*/ ++TEST_WRITE_ZA (write_za32_s32_0_w0m1_z0, svint32_t, ++ svwrite_hor_za32_s32_m (0, w0 - 1, p0, z0), ++ svwrite_hor_za32_m (0, w0 - 1, p0, z0)) ++ ++/* ++** write_za32_s32_1_w0_z0: ++** mov (w12-5), w0 ++** mova za1h\.s\\1, 0\, p0/m, z0\.s ++** ret ++*/ ++TEST_WRITE_ZA (write_za32_s32_1_w0_z0, svint32_t, ++ svwrite_hor_za32_s32_m (1, w0, p0, z0), ++ svwrite_hor_za32_m (1, w0, p0, z0)) ++ ++/* ++** write_za32_s32_1_w0p3_z0: ++** mov (w12-5), w0 ++** mova za1h\.s\\1, 3\, p0/m, z0\.s ++** ret ++*/ ++TEST_WRITE_ZA (write_za32_s32_1_w0p3_z0, svint32_t, ++ svwrite_hor_za32_s32_m (1, w0 + 3, p0, z0), ++ svwrite_hor_za32_m (1, w0 + 3, p0, z0)) ++ ++/* ++** write_za32_s32_3_w0_z0: ++** mov (w12-5), w0 ++** mova za3h\.s\\1, 0\, p0/m, z0\.s ++** ret ++*/ ++TEST_WRITE_ZA (write_za32_s32_3_w0_z0, svint32_t, ++ svwrite_hor_za32_s32_m (3, w0, p0, z0), ++ svwrite_hor_za32_m (3, w0, p0, z0)) ++ ++/* ++** write_za32_s32_3_w0p3_z0: ++** mov (w12-5), w0 ++** mova za3h\.s\\1, 3\, p0/m, z0\.s ++** ret ++*/ ++TEST_WRITE_ZA (write_za32_s32_3_w0p3_z0, svint32_t, ++ svwrite_hor_za32_s32_m (3, w0 + 3, p0, z0), ++ svwrite_hor_za32_m (3, w0 + 3, p0, z0)) ++ ++/* ++** write_za32_s32_0_w0_z1: ++** mov (w12-5), w0 ++** mova za0h\.s\\1, 0\, p0/m, z1\.s ++** ret ++*/ ++TEST_WRITE_ZA (write_za32_s32_0_w0_z1, svint32_t, ++ svwrite_hor_za32_s32_m (0, w0, p0, z1), ++ svwrite_hor_za32_m (0, w0, p0, z1)) ++ ++/* ++** write_za32_u32_0_w0_z0: ++** mov (w12-5), w0 ++** mova za0h\.s\\1, 0\, p0/m, z0\.s ++** ret ++*/ ++TEST_WRITE_ZA (write_za32_u32_0_w0_z0, svuint32_t, ++ svwrite_hor_za32_u32_m (0, w0, p0, z0), ++ svwrite_hor_za32_m (0, w0, p0, z0)) ++ ++/* ++** write_za32_f32_0_w0_z0: ++** mov (w12-5), w0 ++** mova za0h\.s\\1, 0\, p0/m, z0\.s ++** ret ++*/ ++TEST_WRITE_ZA (write_za32_f32_0_w0_z0, svfloat32_t, ++ svwrite_hor_za32_f32_m (0, w0, p0, z0), ++ svwrite_hor_za32_m (0, w0, p0, z0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/write_hor_za64.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/write_hor_za64.c +new file mode 100644 +index 000000000..2b0a157d2 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/write_hor_za64.c +@@ -0,0 +1,133 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sme_acle.h" ++ ++/* ++** write_za64_s64_0_0_z0: ++** mov (w12-5), (?:wzr|#?0) ++** mova za0h\.d\\1, 0\, p0/m, z0\.d ++** ret ++*/ ++TEST_WRITE_ZA (write_za64_s64_0_0_z0, svint64_t, ++ svwrite_hor_za64_s64_m (0, 0, p0, z0), ++ svwrite_hor_za64_m (0, 0, p0, z0)) ++ ++/* ++** write_za64_s64_0_1_z0: ++** mov (w12-5), #?1 ++** mova za0h\.d\\1, 0\, p0/m, z0\.d ++** ret ++*/ ++TEST_WRITE_ZA (write_za64_s64_0_1_z0, svint64_t, ++ svwrite_hor_za64_s64_m (0, 1, p0, z0), ++ svwrite_hor_za64_m (0, 1, p0, z0)) ++ ++/* ++** write_za64_s64_0_w0_z0: ++** mov (w12-5), w0 ++** mova za0h\.d\\1, 0\, p0/m, z0\.d ++** ret ++*/ ++TEST_WRITE_ZA (write_za64_s64_0_w0_z0, svint64_t, ++ svwrite_hor_za64_s64_m (0, w0, p0, z0), ++ svwrite_hor_za64_m (0, w0, p0, z0)) ++ ++/* ++** write_za64_s64_0_w0p1_z0: ++** mov (w12-5), w0 ++** mova za0h\.d\\1, 1\, p0/m, z0\.d ++** ret ++*/ ++TEST_WRITE_ZA (write_za64_s64_0_w0p1_z0, svint64_t, ++ svwrite_hor_za64_s64_m (0, w0 + 1, p0, z0), ++ svwrite_hor_za64_m (0, w0 + 1, p0, z0)) ++ ++/* ++** write_za64_s64_0_w0p2_z0: ++** add (w12-5), w0, #?2 ++** mova za0h\.d\\1, 0\, p0/m, z0\.d ++** ret ++*/ ++TEST_WRITE_ZA (write_za64_s64_0_w0p2_z0, svint64_t, ++ svwrite_hor_za64_s64_m (0, w0 + 2, p0, z0), ++ svwrite_hor_za64_m (0, w0 + 2, p0, z0)) ++ ++/* ++** write_za64_s64_0_w0m1_z0: ++** sub (w12-5), w0, #?1 ++** mova za0h\.d\\1, 0\, p0/m, z0\.d ++** ret ++*/ ++TEST_WRITE_ZA (write_za64_s64_0_w0m1_z0, svint64_t, ++ svwrite_hor_za64_s64_m (0, w0 - 1, p0, z0), ++ svwrite_hor_za64_m (0, w0 - 1, p0, z0)) ++ ++/* ++** write_za64_s64_1_w0_z0: ++** mov (w12-5), w0 ++** mova za1h\.d\\1, 0\, p0/m, z0\.d ++** ret ++*/ ++TEST_WRITE_ZA (write_za64_s64_1_w0_z0, svint64_t, ++ svwrite_hor_za64_s64_m (1, w0, p0, z0), ++ svwrite_hor_za64_m (1, w0, p0, z0)) ++ ++/* ++** write_za64_s64_1_w0p1_z0: ++** mov (w12-5), w0 ++** mova za1h\.d\\1, 1\, p0/m, z0\.d ++** ret ++*/ ++TEST_WRITE_ZA (write_za64_s64_1_w0p1_z0, svint64_t, ++ svwrite_hor_za64_s64_m (1, w0 + 1, p0, z0), ++ svwrite_hor_za64_m (1, w0 + 1, p0, z0)) ++ ++/* ++** write_za64_s64_7_w0_z0: ++** mov (w12-5), w0 ++** mova za7h\.d\\1, 0\, p0/m, z0\.d ++** ret ++*/ ++TEST_WRITE_ZA (write_za64_s64_7_w0_z0, svint64_t, ++ svwrite_hor_za64_s64_m (7, w0, p0, z0), ++ svwrite_hor_za64_m (7, w0, p0, z0)) ++ ++/* ++** write_za64_s64_7_w0p1_z0: ++** mov (w12-5), w0 ++** mova za7h\.d\\1, 1\, p0/m, z0\.d ++** ret ++*/ ++TEST_WRITE_ZA (write_za64_s64_7_w0p1_z0, svint64_t, ++ svwrite_hor_za64_s64_m (7, w0 + 1, p0, z0), ++ svwrite_hor_za64_m (7, w0 + 1, p0, z0)) ++ ++/* ++** write_za64_s64_0_w0_z1: ++** mov (w12-5), w0 ++** mova za0h\.d\\1, 0\, p0/m, z1\.d ++** ret ++*/ ++TEST_WRITE_ZA (write_za64_s64_0_w0_z1, svint64_t, ++ svwrite_hor_za64_s64_m (0, w0, p0, z1), ++ svwrite_hor_za64_m (0, w0, p0, z1)) ++ ++/* ++** write_za64_u64_0_w0_z0: ++** mov (w12-5), w0 ++** mova za0h\.d\\1, 0\, p0/m, z0\.d ++** ret ++*/ ++TEST_WRITE_ZA (write_za64_u64_0_w0_z0, svuint64_t, ++ svwrite_hor_za64_u64_m (0, w0, p0, z0), ++ svwrite_hor_za64_m (0, w0, p0, z0)) ++ ++/* ++** write_za64_f64_0_w0_z0: ++** mov (w12-5), w0 ++** mova za0h\.d\\1, 0\, p0/m, z0\.d ++** ret ++*/ ++TEST_WRITE_ZA (write_za64_f64_0_w0_z0, svfloat64_t, ++ svwrite_hor_za64_f64_m (0, w0, p0, z0), ++ svwrite_hor_za64_m (0, w0, p0, z0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/write_hor_za8.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/write_hor_za8.c +new file mode 100644 +index 000000000..683e1a64a +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/write_hor_za8.c +@@ -0,0 +1,93 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sme_acle.h" ++ ++/* ++** write_za8_s8_0_0_z0: ++** mov (w12-5), (?:wzr|#?0) ++** mova za0h\.b\\1, 0\, p0/m, z0\.b ++** ret ++*/ ++TEST_WRITE_ZA (write_za8_s8_0_0_z0, svint8_t, ++ svwrite_hor_za8_s8_m (0, 0, p0, z0), ++ svwrite_hor_za8_m (0, 0, p0, z0)) ++ ++/* ++** write_za8_s8_0_1_z0: ++** mov (w12-5), #?1 ++** mova za0h\.b\\1, 0\, p0/m, z0\.b ++** ret ++*/ ++TEST_WRITE_ZA (write_za8_s8_0_1_z0, svint8_t, ++ svwrite_hor_za8_s8_m (0, 1, p0, z0), ++ svwrite_hor_za8_m (0, 1, p0, z0)) ++ ++/* ++** write_za8_s8_0_w0_z0: ++** mov (w12-5), w0 ++** mova za0h\.b\\1, 0\, p0/m, z0\.b ++** ret ++*/ ++TEST_WRITE_ZA (write_za8_s8_0_w0_z0, svint8_t, ++ svwrite_hor_za8_s8_m (0, w0, p0, z0), ++ svwrite_hor_za8_m (0, w0, p0, z0)) ++ ++/* ++** write_za8_s8_0_w0p1_z0: ++** mov (w12-5), w0 ++** mova za0h\.b\\1, 1\, p0/m, z0\.b ++** ret ++*/ ++TEST_WRITE_ZA (write_za8_s8_0_w0p1_z0, svint8_t, ++ svwrite_hor_za8_s8_m (0, w0 + 1, p0, z0), ++ svwrite_hor_za8_m (0, w0 + 1, p0, z0)) ++ ++/* ++** write_za8_s8_0_w0p15_z0: ++** mov (w12-5), w0 ++** mova za0h\.b\\1, 15\, p0/m, z0\.b ++** ret ++*/ ++TEST_WRITE_ZA (write_za8_s8_0_w0p15_z0, svint8_t, ++ svwrite_hor_za8_s8_m (0, w0 + 15, p0, z0), ++ svwrite_hor_za8_m (0, w0 + 15, p0, z0)) ++ ++/* ++** write_za8_s8_0_w0p16_z0: ++** add (w12-5), w0, #?16 ++** mova za0h\.b\\1, 0\, p0/m, z0\.b ++** ret ++*/ ++TEST_WRITE_ZA (write_za8_s8_0_w0p16_z0, svint8_t, ++ svwrite_hor_za8_s8_m (0, w0 + 16, p0, z0), ++ svwrite_hor_za8_m (0, w0 + 16, p0, z0)) ++ ++/* ++** write_za8_s8_0_w0m1_z0: ++** sub (w12-5), w0, #?1 ++** mova za0h\.b\\1, 0\, p0/m, z0\.b ++** ret ++*/ ++TEST_WRITE_ZA (write_za8_s8_0_w0m1_z0, svint8_t, ++ svwrite_hor_za8_s8_m (0, w0 - 1, p0, z0), ++ svwrite_hor_za8_m (0, w0 - 1, p0, z0)) ++ ++/* ++** write_za8_s8_0_w0_z1: ++** mov (w12-5), w0 ++** mova za0h\.b\\1, 0\, p0/m, z1\.b ++** ret ++*/ ++TEST_WRITE_ZA (write_za8_s8_0_w0_z1, svint8_t, ++ svwrite_hor_za8_s8_m (0, w0, p0, z1), ++ svwrite_hor_za8_m (0, w0, p0, z1)) ++ ++/* ++** write_za8_u8_0_w0_z0: ++** mov (w12-5), w0 ++** mova za0h\.b\\1, 0\, p0/m, z0\.b ++** ret ++*/ ++TEST_WRITE_ZA (write_za8_u8_0_w0_z0, svuint8_t, ++ svwrite_hor_za8_u8_m (0, w0, p0, z0), ++ svwrite_hor_za8_m (0, w0, p0, z0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/write_ver_za128.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/write_ver_za128.c +new file mode 100644 +index 000000000..9622e99dd +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/write_ver_za128.c +@@ -0,0 +1,193 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sme_acle.h" ++ ++/* ++** write_za128_s8_0_0_z0: ++** mov (w12-5), (?:wzr|#?0) ++** mova za0v\.q\\1, 0\, p0/m, z0\.q ++** ret ++*/ ++TEST_WRITE_ZA (write_za128_s8_0_0_z0, svint8_t, ++ svwrite_ver_za128_s8_m (0, 0, p0, z0), ++ svwrite_ver_za128_m (0, 0, p0, z0)) ++ ++/* ++** write_za128_s8_0_1_z0: ++** mov (w12-5), #?1 ++** mova za0v\.q\\1, 0\, p0/m, z0\.q ++** ret ++*/ ++TEST_WRITE_ZA (write_za128_s8_0_1_z0, svint8_t, ++ svwrite_ver_za128_s8_m (0, 1, p0, z0), ++ svwrite_ver_za128_m (0, 1, p0, z0)) ++ ++/* ++** write_za128_s8_0_w0_z0: ++** mov (w12-5), w0 ++** mova za0v\.q\\1, 0\, p0/m, z0\.q ++** ret ++*/ ++TEST_WRITE_ZA (write_za128_s8_0_w0_z0, svint8_t, ++ svwrite_ver_za128_s8_m (0, w0, p0, z0), ++ svwrite_ver_za128_m (0, w0, p0, z0)) ++ ++/* ++** write_za128_s8_0_w0p1_z0: ++** add (w12-5), w0, #?1 ++** mova za0v\.q\\1, 0\, p0/m, z0\.q ++** ret ++*/ ++TEST_WRITE_ZA (write_za128_s8_0_w0p1_z0, svint8_t, ++ svwrite_ver_za128_s8_m (0, w0 + 1, p0, z0), ++ svwrite_ver_za128_m (0, w0 + 1, p0, z0)) ++ ++/* ++** write_za128_s8_0_w0m1_z0: ++** sub (w12-5), w0, #?1 ++** mova za0v\.q\\1, 0\, p0/m, z0\.q ++** ret ++*/ ++TEST_WRITE_ZA (write_za128_s8_0_w0m1_z0, svint8_t, ++ svwrite_ver_za128_s8_m (0, w0 - 1, p0, z0), ++ svwrite_ver_za128_m (0, w0 - 1, p0, z0)) ++ ++/* ++** write_za128_s8_1_w0_z0: ++** mov (w12-5), w0 ++** mova za1v\.q\\1, 0\, p0/m, z0\.q ++** ret ++*/ ++TEST_WRITE_ZA (write_za128_s8_1_w0_z0, svint8_t, ++ svwrite_ver_za128_s8_m (1, w0, p0, z0), ++ svwrite_ver_za128_m (1, w0, p0, z0)) ++ ++/* ++** write_za128_s8_15_w0_z0: ++** mov (w12-5), w0 ++** mova za15v\.q\\1, 0\, p0/m, z0\.q ++** ret ++*/ ++TEST_WRITE_ZA (write_za128_s8_15_w0_z0, svint8_t, ++ svwrite_ver_za128_s8_m (15, w0, p0, z0), ++ svwrite_ver_za128_m (15, w0, p0, z0)) ++ ++/* ++** write_za128_s8_0_w0_z1: ++** mov (w12-5), w0 ++** mova za0v\.q\\1, 0\, p0/m, z1\.q ++** ret ++*/ ++TEST_WRITE_ZA (write_za128_s8_0_w0_z1, svint8_t, ++ svwrite_ver_za128_s8_m (0, w0, p0, z1), ++ svwrite_ver_za128_m (0, w0, p0, z1)) ++ ++/* ++** write_za128_u8_0_w0_z0: ++** mov (w12-5), w0 ++** mova za0v\.q\\1, 0\, p0/m, z0\.q ++** ret ++*/ ++TEST_WRITE_ZA (write_za128_u8_0_w0_z0, svuint8_t, ++ svwrite_ver_za128_u8_m (0, w0, p0, z0), ++ svwrite_ver_za128_m (0, w0, p0, z0)) ++ ++/* ++** write_za128_s16_0_w0_z0: ++** mov (w12-5), w0 ++** mova za0v\.q\\1, 0\, p0/m, z0\.q ++** ret ++*/ ++TEST_WRITE_ZA (write_za128_s16_0_w0_z0, svint16_t, ++ svwrite_ver_za128_s16_m (0, w0, p0, z0), ++ svwrite_ver_za128_m (0, w0, p0, z0)) ++ ++/* ++** write_za128_u16_0_w0_z0: ++** mov (w12-5), w0 ++** mova za0v\.q\\1, 0\, p0/m, z0\.q ++** ret ++*/ ++TEST_WRITE_ZA (write_za128_u16_0_w0_z0, svuint16_t, ++ svwrite_ver_za128_u16_m (0, w0, p0, z0), ++ svwrite_ver_za128_m (0, w0, p0, z0)) ++ ++/* ++** write_za128_f16_0_w0_z0: ++** mov (w12-5), w0 ++** mova za0v\.q\\1, 0\, p0/m, z0\.q ++** ret ++*/ ++TEST_WRITE_ZA (write_za128_f16_0_w0_z0, svfloat16_t, ++ svwrite_ver_za128_f16_m (0, w0, p0, z0), ++ svwrite_ver_za128_m (0, w0, p0, z0)) ++ ++/* ++** write_za128_bf16_0_w0_z0: ++** mov (w12-5), w0 ++** mova za0v\.q\\1, 0\, p0/m, z0\.q ++** ret ++*/ ++TEST_WRITE_ZA (write_za128_bf16_0_w0_z0, svbfloat16_t, ++ svwrite_ver_za128_bf16_m (0, w0, p0, z0), ++ svwrite_ver_za128_m (0, w0, p0, z0)) ++ ++/* ++** write_za128_s32_0_w0_z0: ++** mov (w12-5), w0 ++** mova za0v\.q\\1, 0\, p0/m, z0\.q ++** ret ++*/ ++TEST_WRITE_ZA (write_za128_s32_0_w0_z0, svint32_t, ++ svwrite_ver_za128_s32_m (0, w0, p0, z0), ++ svwrite_ver_za128_m (0, w0, p0, z0)) ++ ++/* ++** write_za128_u32_0_w0_z0: ++** mov (w12-5), w0 ++** mova za0v\.q\\1, 0\, p0/m, z0\.q ++** ret ++*/ ++TEST_WRITE_ZA (write_za128_u32_0_w0_z0, svuint32_t, ++ svwrite_ver_za128_u32_m (0, w0, p0, z0), ++ svwrite_ver_za128_m (0, w0, p0, z0)) ++ ++/* ++** write_za128_f32_0_w0_z0: ++** mov (w12-5), w0 ++** mova za0v\.q\\1, 0\, p0/m, z0\.q ++** ret ++*/ ++TEST_WRITE_ZA (write_za128_f32_0_w0_z0, svfloat32_t, ++ svwrite_ver_za128_f32_m (0, w0, p0, z0), ++ svwrite_ver_za128_m (0, w0, p0, z0)) ++ ++/* ++** write_za128_s64_0_w0_z0: ++** mov (w12-5), w0 ++** mova za0v\.q\\1, 0\, p0/m, z0\.q ++** ret ++*/ ++TEST_WRITE_ZA (write_za128_s64_0_w0_z0, svint64_t, ++ svwrite_ver_za128_s64_m (0, w0, p0, z0), ++ svwrite_ver_za128_m (0, w0, p0, z0)) ++ ++/* ++** write_za128_u64_0_w0_z0: ++** mov (w12-5), w0 ++** mova za0v\.q\\1, 0\, p0/m, z0\.q ++** ret ++*/ ++TEST_WRITE_ZA (write_za128_u64_0_w0_z0, svuint64_t, ++ svwrite_ver_za128_u64_m (0, w0, p0, z0), ++ svwrite_ver_za128_m (0, w0, p0, z0)) ++ ++/* ++** write_za128_f64_0_w0_z0: ++** mov (w12-5), w0 ++** mova za0v\.q\\1, 0\, p0/m, z0\.q ++** ret ++*/ ++TEST_WRITE_ZA (write_za128_f64_0_w0_z0, svfloat64_t, ++ svwrite_ver_za128_f64_m (0, w0, p0, z0), ++ svwrite_ver_za128_m (0, w0, p0, z0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/write_ver_za16.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/write_ver_za16.c +new file mode 100644 +index 000000000..5430f2307 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/write_ver_za16.c +@@ -0,0 +1,133 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sme_acle.h" ++ ++/* ++** write_za16_s16_0_0_z0: ++** mov (w12-5), (?:wzr|#?0) ++** mova za0v\.h\\1, 0\, p0/m, z0\.h ++** ret ++*/ ++TEST_WRITE_ZA (write_za16_s16_0_0_z0, svint16_t, ++ svwrite_ver_za16_s16_m (0, 0, p0, z0), ++ svwrite_ver_za16_m (0, 0, p0, z0)) ++ ++/* ++** write_za16_s16_0_1_z0: ++** mov (w12-5), #?1 ++** mova za0v\.h\\1, 0\, p0/m, z0\.h ++** ret ++*/ ++TEST_WRITE_ZA (write_za16_s16_0_1_z0, svint16_t, ++ svwrite_ver_za16_s16_m (0, 1, p0, z0), ++ svwrite_ver_za16_m (0, 1, p0, z0)) ++ ++/* ++** write_za16_s16_0_w0_z0: ++** mov (w12-5), w0 ++** mova za0v\.h\\1, 0\, p0/m, z0\.h ++** ret ++*/ ++TEST_WRITE_ZA (write_za16_s16_0_w0_z0, svint16_t, ++ svwrite_ver_za16_s16_m (0, w0, p0, z0), ++ svwrite_ver_za16_m (0, w0, p0, z0)) ++ ++/* ++** write_za16_s16_0_w0p1_z0: ++** mov (w12-5), w0 ++** mova za0v\.h\\1, 1\, p0/m, z0\.h ++** ret ++*/ ++TEST_WRITE_ZA (write_za16_s16_0_w0p1_z0, svint16_t, ++ svwrite_ver_za16_s16_m (0, w0 + 1, p0, z0), ++ svwrite_ver_za16_m (0, w0 + 1, p0, z0)) ++ ++/* ++** write_za16_s16_0_w0p7_z0: ++** mov (w12-5), w0 ++** mova za0v\.h\\1, 7\, p0/m, z0\.h ++** ret ++*/ ++TEST_WRITE_ZA (write_za16_s16_0_w0p7_z0, svint16_t, ++ svwrite_ver_za16_s16_m (0, w0 + 7, p0, z0), ++ svwrite_ver_za16_m (0, w0 + 7, p0, z0)) ++ ++/* ++** write_za16_s16_0_w0p8_z0: ++** add (w12-5), w0, #?8 ++** mova za0v\.h\\1, 0\, p0/m, z0\.h ++** ret ++*/ ++TEST_WRITE_ZA (write_za16_s16_0_w0p8_z0, svint16_t, ++ svwrite_ver_za16_s16_m (0, w0 + 8, p0, z0), ++ svwrite_ver_za16_m (0, w0 + 8, p0, z0)) ++ ++/* ++** write_za16_s16_0_w0m1_z0: ++** sub (w12-5), w0, #?1 ++** mova za0v\.h\\1, 0\, p0/m, z0\.h ++** ret ++*/ ++TEST_WRITE_ZA (write_za16_s16_0_w0m1_z0, svint16_t, ++ svwrite_ver_za16_s16_m (0, w0 - 1, p0, z0), ++ svwrite_ver_za16_m (0, w0 - 1, p0, z0)) ++ ++/* ++** write_za16_s16_1_w0_z0: ++** mov (w12-5), w0 ++** mova za1v\.h\\1, 0\, p0/m, z0\.h ++** ret ++*/ ++TEST_WRITE_ZA (write_za16_s16_1_w0_z0, svint16_t, ++ svwrite_ver_za16_s16_m (1, w0, p0, z0), ++ svwrite_ver_za16_m (1, w0, p0, z0)) ++ ++/* ++** write_za16_s16_1_w0p7_z0: ++** mov (w12-5), w0 ++** mova za1v\.h\\1, 7\, p0/m, z0\.h ++** ret ++*/ ++TEST_WRITE_ZA (write_za16_s16_1_w0p7_z0, svint16_t, ++ svwrite_ver_za16_s16_m (1, w0 + 7, p0, z0), ++ svwrite_ver_za16_m (1, w0 + 7, p0, z0)) ++ ++/* ++** write_za16_s16_0_w0_z1: ++** mov (w12-5), w0 ++** mova za0v\.h\\1, 0\, p0/m, z1\.h ++** ret ++*/ ++TEST_WRITE_ZA (write_za16_s16_0_w0_z1, svint16_t, ++ svwrite_ver_za16_s16_m (0, w0, p0, z1), ++ svwrite_ver_za16_m (0, w0, p0, z1)) ++ ++/* ++** write_za16_u16_0_w0_z0: ++** mov (w12-5), w0 ++** mova za0v\.h\\1, 0\, p0/m, z0\.h ++** ret ++*/ ++TEST_WRITE_ZA (write_za16_u16_0_w0_z0, svuint16_t, ++ svwrite_ver_za16_u16_m (0, w0, p0, z0), ++ svwrite_ver_za16_m (0, w0, p0, z0)) ++ ++/* ++** write_za16_f16_0_w0_z0: ++** mov (w12-5), w0 ++** mova za0v\.h\\1, 0\, p0/m, z0\.h ++** ret ++*/ ++TEST_WRITE_ZA (write_za16_f16_0_w0_z0, svfloat16_t, ++ svwrite_ver_za16_f16_m (0, w0, p0, z0), ++ svwrite_ver_za16_m (0, w0, p0, z0)) ++ ++/* ++** write_za16_bf16_0_w0_z0: ++** mov (w12-5), w0 ++** mova za0v\.h\\1, 0\, p0/m, z0\.h ++** ret ++*/ ++TEST_WRITE_ZA (write_za16_bf16_0_w0_z0, svbfloat16_t, ++ svwrite_ver_za16_bf16_m (0, w0, p0, z0), ++ svwrite_ver_za16_m (0, w0, p0, z0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/write_ver_za32.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/write_ver_za32.c +new file mode 100644 +index 000000000..960ce163d +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/write_ver_za32.c +@@ -0,0 +1,143 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sme_acle.h" ++ ++/* ++** write_za32_s32_0_0_z0: ++** mov (w12-5), (?:wzr|#?0) ++** mova za0v\.s\\1, 0\, p0/m, z0\.s ++** ret ++*/ ++TEST_WRITE_ZA (write_za32_s32_0_0_z0, svint32_t, ++ svwrite_ver_za32_s32_m (0, 0, p0, z0), ++ svwrite_ver_za32_m (0, 0, p0, z0)) ++ ++/* ++** write_za32_s32_0_1_z0: ++** mov (w12-5), #?1 ++** mova za0v\.s\\1, 0\, p0/m, z0\.s ++** ret ++*/ ++TEST_WRITE_ZA (write_za32_s32_0_1_z0, svint32_t, ++ svwrite_ver_za32_s32_m (0, 1, p0, z0), ++ svwrite_ver_za32_m (0, 1, p0, z0)) ++ ++/* ++** write_za32_s32_0_w0_z0: ++** mov (w12-5), w0 ++** mova za0v\.s\\1, 0\, p0/m, z0\.s ++** ret ++*/ ++TEST_WRITE_ZA (write_za32_s32_0_w0_z0, svint32_t, ++ svwrite_ver_za32_s32_m (0, w0, p0, z0), ++ svwrite_ver_za32_m (0, w0, p0, z0)) ++ ++/* ++** write_za32_s32_0_w0p1_z0: ++** mov (w12-5), w0 ++** mova za0v\.s\\1, 1\, p0/m, z0\.s ++** ret ++*/ ++TEST_WRITE_ZA (write_za32_s32_0_w0p1_z0, svint32_t, ++ svwrite_ver_za32_s32_m (0, w0 + 1, p0, z0), ++ svwrite_ver_za32_m (0, w0 + 1, p0, z0)) ++ ++/* ++** write_za32_s32_0_w0p3_z0: ++** mov (w12-5), w0 ++** mova za0v\.s\\1, 3\, p0/m, z0\.s ++** ret ++*/ ++TEST_WRITE_ZA (write_za32_s32_0_w0p3_z0, svint32_t, ++ svwrite_ver_za32_s32_m (0, w0 + 3, p0, z0), ++ svwrite_ver_za32_m (0, w0 + 3, p0, z0)) ++ ++/* ++** write_za32_s32_0_w0p4_z0: ++** add (w12-5), w0, #?4 ++** mova za0v\.s\\1, 0\, p0/m, z0\.s ++** ret ++*/ ++TEST_WRITE_ZA (write_za32_s32_0_w0p4_z0, svint32_t, ++ svwrite_ver_za32_s32_m (0, w0 + 4, p0, z0), ++ svwrite_ver_za32_m (0, w0 + 4, p0, z0)) ++ ++/* ++** write_za32_s32_0_w0m1_z0: ++** sub (w12-5), w0, #?1 ++** mova za0v\.s\\1, 0\, p0/m, z0\.s ++** ret ++*/ ++TEST_WRITE_ZA (write_za32_s32_0_w0m1_z0, svint32_t, ++ svwrite_ver_za32_s32_m (0, w0 - 1, p0, z0), ++ svwrite_ver_za32_m (0, w0 - 1, p0, z0)) ++ ++/* ++** write_za32_s32_1_w0_z0: ++** mov (w12-5), w0 ++** mova za1v\.s\\1, 0\, p0/m, z0\.s ++** ret ++*/ ++TEST_WRITE_ZA (write_za32_s32_1_w0_z0, svint32_t, ++ svwrite_ver_za32_s32_m (1, w0, p0, z0), ++ svwrite_ver_za32_m (1, w0, p0, z0)) ++ ++/* ++** write_za32_s32_1_w0p3_z0: ++** mov (w12-5), w0 ++** mova za1v\.s\\1, 3\, p0/m, z0\.s ++** ret ++*/ ++TEST_WRITE_ZA (write_za32_s32_1_w0p3_z0, svint32_t, ++ svwrite_ver_za32_s32_m (1, w0 + 3, p0, z0), ++ svwrite_ver_za32_m (1, w0 + 3, p0, z0)) ++ ++/* ++** write_za32_s32_3_w0_z0: ++** mov (w12-5), w0 ++** mova za3v\.s\\1, 0\, p0/m, z0\.s ++** ret ++*/ ++TEST_WRITE_ZA (write_za32_s32_3_w0_z0, svint32_t, ++ svwrite_ver_za32_s32_m (3, w0, p0, z0), ++ svwrite_ver_za32_m (3, w0, p0, z0)) ++ ++/* ++** write_za32_s32_3_w0p3_z0: ++** mov (w12-5), w0 ++** mova za3v\.s\\1, 3\, p0/m, z0\.s ++** ret ++*/ ++TEST_WRITE_ZA (write_za32_s32_3_w0p3_z0, svint32_t, ++ svwrite_ver_za32_s32_m (3, w0 + 3, p0, z0), ++ svwrite_ver_za32_m (3, w0 + 3, p0, z0)) ++ ++/* ++** write_za32_s32_0_w0_z1: ++** mov (w12-5), w0 ++** mova za0v\.s\\1, 0\, p0/m, z1\.s ++** ret ++*/ ++TEST_WRITE_ZA (write_za32_s32_0_w0_z1, svint32_t, ++ svwrite_ver_za32_s32_m (0, w0, p0, z1), ++ svwrite_ver_za32_m (0, w0, p0, z1)) ++ ++/* ++** write_za32_u32_0_w0_z0: ++** mov (w12-5), w0 ++** mova za0v\.s\\1, 0\, p0/m, z0\.s ++** ret ++*/ ++TEST_WRITE_ZA (write_za32_u32_0_w0_z0, svuint32_t, ++ svwrite_ver_za32_u32_m (0, w0, p0, z0), ++ svwrite_ver_za32_m (0, w0, p0, z0)) ++ ++/* ++** write_za32_f32_0_w0_z0: ++** mov (w12-5), w0 ++** mova za0v\.s\\1, 0\, p0/m, z0\.s ++** ret ++*/ ++TEST_WRITE_ZA (write_za32_f32_0_w0_z0, svfloat32_t, ++ svwrite_ver_za32_f32_m (0, w0, p0, z0), ++ svwrite_ver_za32_m (0, w0, p0, z0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/write_ver_za64.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/write_ver_za64.c +new file mode 100644 +index 000000000..962c4002e +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/write_ver_za64.c +@@ -0,0 +1,133 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sme_acle.h" ++ ++/* ++** write_za64_s64_0_0_z0: ++** mov (w12-5), (?:wzr|#?0) ++** mova za0v\.d\\1, 0\, p0/m, z0\.d ++** ret ++*/ ++TEST_WRITE_ZA (write_za64_s64_0_0_z0, svint64_t, ++ svwrite_ver_za64_s64_m (0, 0, p0, z0), ++ svwrite_ver_za64_m (0, 0, p0, z0)) ++ ++/* ++** write_za64_s64_0_1_z0: ++** mov (w12-5), #?1 ++** mova za0v\.d\\1, 0\, p0/m, z0\.d ++** ret ++*/ ++TEST_WRITE_ZA (write_za64_s64_0_1_z0, svint64_t, ++ svwrite_ver_za64_s64_m (0, 1, p0, z0), ++ svwrite_ver_za64_m (0, 1, p0, z0)) ++ ++/* ++** write_za64_s64_0_w0_z0: ++** mov (w12-5), w0 ++** mova za0v\.d\\1, 0\, p0/m, z0\.d ++** ret ++*/ ++TEST_WRITE_ZA (write_za64_s64_0_w0_z0, svint64_t, ++ svwrite_ver_za64_s64_m (0, w0, p0, z0), ++ svwrite_ver_za64_m (0, w0, p0, z0)) ++ ++/* ++** write_za64_s64_0_w0p1_z0: ++** mov (w12-5), w0 ++** mova za0v\.d\\1, 1\, p0/m, z0\.d ++** ret ++*/ ++TEST_WRITE_ZA (write_za64_s64_0_w0p1_z0, svint64_t, ++ svwrite_ver_za64_s64_m (0, w0 + 1, p0, z0), ++ svwrite_ver_za64_m (0, w0 + 1, p0, z0)) ++ ++/* ++** write_za64_s64_0_w0p2_z0: ++** add (w12-5), w0, #?2 ++** mova za0v\.d\\1, 0\, p0/m, z0\.d ++** ret ++*/ ++TEST_WRITE_ZA (write_za64_s64_0_w0p2_z0, svint64_t, ++ svwrite_ver_za64_s64_m (0, w0 + 2, p0, z0), ++ svwrite_ver_za64_m (0, w0 + 2, p0, z0)) ++ ++/* ++** write_za64_s64_0_w0m1_z0: ++** sub (w12-5), w0, #?1 ++** mova za0v\.d\\1, 0\, p0/m, z0\.d ++** ret ++*/ ++TEST_WRITE_ZA (write_za64_s64_0_w0m1_z0, svint64_t, ++ svwrite_ver_za64_s64_m (0, w0 - 1, p0, z0), ++ svwrite_ver_za64_m (0, w0 - 1, p0, z0)) ++ ++/* ++** write_za64_s64_1_w0_z0: ++** mov (w12-5), w0 ++** mova za1v\.d\\1, 0\, p0/m, z0\.d ++** ret ++*/ ++TEST_WRITE_ZA (write_za64_s64_1_w0_z0, svint64_t, ++ svwrite_ver_za64_s64_m (1, w0, p0, z0), ++ svwrite_ver_za64_m (1, w0, p0, z0)) ++ ++/* ++** write_za64_s64_1_w0p1_z0: ++** mov (w12-5), w0 ++** mova za1v\.d\\1, 1\, p0/m, z0\.d ++** ret ++*/ ++TEST_WRITE_ZA (write_za64_s64_1_w0p1_z0, svint64_t, ++ svwrite_ver_za64_s64_m (1, w0 + 1, p0, z0), ++ svwrite_ver_za64_m (1, w0 + 1, p0, z0)) ++ ++/* ++** write_za64_s64_7_w0_z0: ++** mov (w12-5), w0 ++** mova za7v\.d\\1, 0\, p0/m, z0\.d ++** ret ++*/ ++TEST_WRITE_ZA (write_za64_s64_7_w0_z0, svint64_t, ++ svwrite_ver_za64_s64_m (7, w0, p0, z0), ++ svwrite_ver_za64_m (7, w0, p0, z0)) ++ ++/* ++** write_za64_s64_7_w0p1_z0: ++** mov (w12-5), w0 ++** mova za7v\.d\\1, 1\, p0/m, z0\.d ++** ret ++*/ ++TEST_WRITE_ZA (write_za64_s64_7_w0p1_z0, svint64_t, ++ svwrite_ver_za64_s64_m (7, w0 + 1, p0, z0), ++ svwrite_ver_za64_m (7, w0 + 1, p0, z0)) ++ ++/* ++** write_za64_s64_0_w0_z1: ++** mov (w12-5), w0 ++** mova za0v\.d\\1, 0\, p0/m, z1\.d ++** ret ++*/ ++TEST_WRITE_ZA (write_za64_s64_0_w0_z1, svint64_t, ++ svwrite_ver_za64_s64_m (0, w0, p0, z1), ++ svwrite_ver_za64_m (0, w0, p0, z1)) ++ ++/* ++** write_za64_u64_0_w0_z0: ++** mov (w12-5), w0 ++** mova za0v\.d\\1, 0\, p0/m, z0\.d ++** ret ++*/ ++TEST_WRITE_ZA (write_za64_u64_0_w0_z0, svuint64_t, ++ svwrite_ver_za64_u64_m (0, w0, p0, z0), ++ svwrite_ver_za64_m (0, w0, p0, z0)) ++ ++/* ++** write_za64_f64_0_w0_z0: ++** mov (w12-5), w0 ++** mova za0v\.d\\1, 0\, p0/m, z0\.d ++** ret ++*/ ++TEST_WRITE_ZA (write_za64_f64_0_w0_z0, svfloat64_t, ++ svwrite_ver_za64_f64_m (0, w0, p0, z0), ++ svwrite_ver_za64_m (0, w0, p0, z0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/write_ver_za8.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/write_ver_za8.c +new file mode 100644 +index 000000000..dd6182821 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/write_ver_za8.c +@@ -0,0 +1,93 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sme_acle.h" ++ ++/* ++** write_za8_s8_0_0_z0: ++** mov (w12-5), (?:wzr|#?0) ++** mova za0v\.b\\1, 0\, p0/m, z0\.b ++** ret ++*/ ++TEST_WRITE_ZA (write_za8_s8_0_0_z0, svint8_t, ++ svwrite_ver_za8_s8_m (0, 0, p0, z0), ++ svwrite_ver_za8_m (0, 0, p0, z0)) ++ ++/* ++** write_za8_s8_0_1_z0: ++** mov (w12-5), #?1 ++** mova za0v\.b\\1, 0\, p0/m, z0\.b ++** ret ++*/ ++TEST_WRITE_ZA (write_za8_s8_0_1_z0, svint8_t, ++ svwrite_ver_za8_s8_m (0, 1, p0, z0), ++ svwrite_ver_za8_m (0, 1, p0, z0)) ++ ++/* ++** write_za8_s8_0_w0_z0: ++** mov (w12-5), w0 ++** mova za0v\.b\\1, 0\, p0/m, z0\.b ++** ret ++*/ ++TEST_WRITE_ZA (write_za8_s8_0_w0_z0, svint8_t, ++ svwrite_ver_za8_s8_m (0, w0, p0, z0), ++ svwrite_ver_za8_m (0, w0, p0, z0)) ++ ++/* ++** write_za8_s8_0_w0p1_z0: ++** mov (w12-5), w0 ++** mova za0v\.b\\1, 1\, p0/m, z0\.b ++** ret ++*/ ++TEST_WRITE_ZA (write_za8_s8_0_w0p1_z0, svint8_t, ++ svwrite_ver_za8_s8_m (0, w0 + 1, p0, z0), ++ svwrite_ver_za8_m (0, w0 + 1, p0, z0)) ++ ++/* ++** write_za8_s8_0_w0p15_z0: ++** mov (w12-5), w0 ++** mova za0v\.b\\1, 15\, p0/m, z0\.b ++** ret ++*/ ++TEST_WRITE_ZA (write_za8_s8_0_w0p15_z0, svint8_t, ++ svwrite_ver_za8_s8_m (0, w0 + 15, p0, z0), ++ svwrite_ver_za8_m (0, w0 + 15, p0, z0)) ++ ++/* ++** write_za8_s8_0_w0p16_z0: ++** add (w12-5), w0, #?16 ++** mova za0v\.b\\1, 0\, p0/m, z0\.b ++** ret ++*/ ++TEST_WRITE_ZA (write_za8_s8_0_w0p16_z0, svint8_t, ++ svwrite_ver_za8_s8_m (0, w0 + 16, p0, z0), ++ svwrite_ver_za8_m (0, w0 + 16, p0, z0)) ++ ++/* ++** write_za8_s8_0_w0m1_z0: ++** sub (w12-5), w0, #?1 ++** mova za0v\.b\\1, 0\, p0/m, z0\.b ++** ret ++*/ ++TEST_WRITE_ZA (write_za8_s8_0_w0m1_z0, svint8_t, ++ svwrite_ver_za8_s8_m (0, w0 - 1, p0, z0), ++ svwrite_ver_za8_m (0, w0 - 1, p0, z0)) ++ ++/* ++** write_za8_s8_0_w0_z1: ++** mov (w12-5), w0 ++** mova za0v\.b\\1, 0\, p0/m, z1\.b ++** ret ++*/ ++TEST_WRITE_ZA (write_za8_s8_0_w0_z1, svint8_t, ++ svwrite_ver_za8_s8_m (0, w0, p0, z1), ++ svwrite_ver_za8_m (0, w0, p0, z1)) ++ ++/* ++** write_za8_u8_0_w0_z0: ++** mov (w12-5), w0 ++** mova za0v\.b\\1, 0\, p0/m, z0\.b ++** ret ++*/ ++TEST_WRITE_ZA (write_za8_u8_0_w0_z0, svuint8_t, ++ svwrite_ver_za8_u8_m (0, w0, p0, z0), ++ svwrite_ver_za8_m (0, w0, p0, z0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/zero_mask_za.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/zero_mask_za.c +new file mode 100644 +index 000000000..9ce7331eb +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/zero_mask_za.c +@@ -0,0 +1,130 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#define STREAMING_COMPATIBLE ++#include "test_sme_acle.h" ++ ++/* ++** zero_mask_za_0: ++** zero { *} ++** ret ++*/ ++PROTO (zero_mask_za_0, void, ()) { svzero_mask_za (0); } ++ ++/* ++** zero_mask_za_01: ++** zero { za0\.d } ++** ret ++*/ ++PROTO (zero_mask_za_01, void, ()) { svzero_mask_za (0x01); } ++ ++/* ++** zero_mask_za_80: ++** zero { za7\.d } ++** ret ++*/ ++PROTO (zero_mask_za_80, void, ()) { svzero_mask_za (0x80); } ++ ++/* ++** zero_mask_za_03: ++** zero { za0\.d, za1\.d } ++** ret ++*/ ++PROTO (zero_mask_za_03, void, ()) { svzero_mask_za (0x03); } ++ ++/* ++** zero_mask_za_09: ++** zero { za0\.d, za3\.d } ++** ret ++*/ ++PROTO (zero_mask_za_09, void, ()) { svzero_mask_za (0x09); } ++ ++/* ++** zero_mask_za_0d: ++** zero { za0\.d, za2\.d, za3\.d } ++** ret ++*/ ++PROTO (zero_mask_za_0d, void, ()) { svzero_mask_za (0x0d); } ++ ++/* ++** zero_mask_za_3c: ++** zero { za2\.d, za3\.d, za4\.d, za5\.d } ++** ret ++*/ ++PROTO (zero_mask_za_3c, void, ()) { svzero_mask_za (0x3c); } ++ ++/* ++** zero_mask_za_5a: ++** zero { za1\.d, za3\.d, za4\.d, za6\.d } ++** ret ++*/ ++PROTO (zero_mask_za_5a, void, ()) { svzero_mask_za (0x5a); } ++ ++/* ++** zero_mask_za_11: ++** zero { za0\.s } ++** ret ++*/ ++PROTO (zero_mask_za_11, void, ()) { svzero_mask_za (0x11); } ++ ++/* ++** zero_mask_za_88: ++** zero { za3\.s } ++** ret ++*/ ++PROTO (zero_mask_za_88, void, ()) { svzero_mask_za (0x88); } ++ ++/* ++** zero_mask_za_33: ++** zero { za0\.s, za1\.s } ++** ret ++*/ ++PROTO (zero_mask_za_33, void, ()) { svzero_mask_za (0x33); } ++ ++/* ++** zero_mask_za_cc: ++** zero { za2\.s, za3\.s } ++** ret ++*/ ++PROTO (zero_mask_za_cc, void, ()) { svzero_mask_za (0xcc); } ++ ++/* ++** zero_mask_za_55: ++** zero { za0\.h } ++** ret ++*/ ++PROTO (zero_mask_za_55, void, ()) { svzero_mask_za (0x55); } ++ ++/* ++** zero_mask_za_aa: ++** zero { za1\.h } ++** ret ++*/ ++PROTO (zero_mask_za_aa, void, ()) { svzero_mask_za (0xaa); } ++ ++/* ++** zero_mask_za_ab: ++** zero { za1\.h, za0\.d } ++** ret ++*/ ++PROTO (zero_mask_za_ab, void, ()) { svzero_mask_za (0xab); } ++ ++/* ++** zero_mask_za_d7: ++** zero { za0\.h, za1\.d, za7\.d } ++** ret ++*/ ++PROTO (zero_mask_za_d7, void, ()) { svzero_mask_za (0xd7); } ++ ++/* ++** zero_mask_za_bf: ++** zero { za1\.h, za0\.s, za2\.d } ++** ret ++*/ ++PROTO (zero_mask_za_bf, void, ()) { svzero_mask_za (0xbf); } ++ ++/* ++** zero_mask_za_ff: ++** zero { za } ++** ret ++*/ ++PROTO (zero_mask_za_ff, void, ()) { svzero_mask_za (0xff); } +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/zero_za.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/zero_za.c +new file mode 100644 +index 000000000..4688d0950 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/zero_za.c +@@ -0,0 +1,11 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#define STREAMING_COMPATIBLE ++#include "test_sme_acle.h" ++ ++/* ++** zero_za: ++** zero { za } ++** ret ++*/ ++PROTO (zero_za, void, ()) { svzero_za (); } +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/test_sve_acle.h b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/test_sve_acle.h +index d8916809b..84925b9bd 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/test_sve_acle.h ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/test_sve_acle.h +@@ -12,11 +12,21 @@ + #endif + + #ifdef STREAMING_COMPATIBLE +-#define ATTR __arm_streaming_compatible ++#define SM_ATTR __arm_streaming_compatible ++#elif defined(STREAMING) ++#define SM_ATTR __arm_streaming + #else +-#define ATTR ++#define SM_ATTR + #endif + ++#ifdef SHARED_ZA ++#define ZA_ATTR __arm_inout("za") ++#else ++#define ZA_ATTR ++#endif ++ ++#define ATTR SM_ATTR ZA_ATTR ++ + #ifdef __cplusplus + #define PROTO(NAME, RET, ARGS) \ + extern "C" RET NAME ARGS ATTR; RET NAME ARGS ATTR +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_za_int_m_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_za_int_m_1.c +new file mode 100644 +index 000000000..fce1ef1dd +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_za_int_m_1.c +@@ -0,0 +1,50 @@ ++/* { dg-do compile } */ ++ ++#include <arm_sme.h> ++ ++#pragma GCC target ("arch=armv9-a+sme") ++ ++void ++f1 (svbool_t pg, svint8_t s8, svuint8_t u8, ++ svint16_t s16, svuint16_t u16, svfloat16_t f16, uint32_t tile) ++ __arm_streaming __arm_inout("za") ++{ ++ svusmopa_za32_m (0, pg, pg, u8); /* { dg-error {too few arguments to function 'svusmopa_za32_m'} } */ ++ svusmopa_za32_m (0, pg, pg, u8, s8, 0); /* { dg-error {too many arguments to function 'svusmopa_za32_m'} } */ ++ svusmopa_za32_m (tile, pg, pg, u8, s8); /* { dg-error {argument 1 of 'svusmopa_za32_m' must be an integer constant expression} } */ ++ svusmopa_za32_m (-1, pg, pg, u8, s8); /* { dg-error {passing -1 to argument 1 of 'svusmopa_za32_m', which expects a value in the range \0, 3\} } */ ++ svusmopa_za32_m (4, pg, pg, u8, s8); /* { dg-error {passing 4 to argument 1 of 'svusmopa_za32_m', which expects a value in the range \0, 3\} } */ ++ svusmopa_za32_m (0, u8, pg, u8, s8); /* { dg-error {passing 'svuint8_t' to argument 2 of 'svusmopa_za32_m', which expects 'svbool_t'} } */ ++ svusmopa_za32_m (0, pg, u8, u8, s8); /* { dg-error {passing 'svuint8_t' to argument 3 of 'svusmopa_za32_m', which expects 'svbool_t'} } */ ++ svusmopa_za32_m (0, pg, pg, tile, s8); /* { dg-error {passing 'uint32_t'.* to argument 4 of 'svusmopa_za32_m', which expects an SVE type} } */ ++ svusmopa_za32_m (0, pg, pg, s8, s8); /* { dg-error {'svusmopa_za32_m' has no form that takes 'svint8_t' arguments} } */ ++ svusmopa_za32_m (0, pg, pg, pg, s8); /* { dg-error {'svusmopa_za32_m' has no form that takes 'svbool_t' arguments} } */ ++ svusmopa_za32_m (0, pg, pg, f16, s8); /* { dg-error {'svusmopa_za32_m' has no form that takes 'svfloat16_t' arguments} } */ ++ svusmopa_za32_m (0, pg, pg, u8, u8); /* { dg-error {passing 'svuint8_t' to argument 5 of 'svusmopa_za32_m', which expects a vector of signed integers} } */ ++ svusmopa_za32_m (0, pg, pg, u8, s16); /* { dg-error {arguments 4 and 5 of 'svusmopa_za32_m' must have the same element size, but the values passed here have type 'svuint8_t' and 'svint16_t' respectively} } */ ++ svusmopa_za32_m (0, pg, pg, u16, s16); /* { dg-error {'svusmopa_za32_m' has no form that takes 'svuint16_t' arguments} } */ ++ ++ svusmopa_za64_m (0, pg, pg, u16, s16); /* { dg-error {ACLE function 'svusmopa_za64_u16_m' requires ISA extension 'sme-i16i64'} } */ ++} ++ ++void ++f2 (svbool_t pg, svint8_t s8, svuint8_t u8) __arm_streaming ++{ ++ svusmopa_za32_m (0, pg, pg, u8, s8); /* { dg-error {ACLE function 'svusmopa_za32_u8_m' can only be called from a function that has 'za' state} } */ ++} ++ ++void ++f3 (svbool_t pg, svint8_t s8, svuint8_t u8) __arm_inout("za") ++{ ++ svusmopa_za32_m (0, pg, pg, u8, s8); /* { dg-error {ACLE function 'svusmopa_za32_u8_m' can only be called when SME streaming mode is enabled} } */ ++} ++ ++#pragma GCC target ("arch=armv9-a+sme-i16i64") ++ ++void ++f4 (svbool_t pg, svint16_t s16, svuint16_t u16) ++ __arm_streaming __arm_inout("za") ++{ ++ svusmopa_za64_m (-1, pg, pg, u16, s16); /* { dg-error {passing -1 to argument 1 of 'svusmopa_za64_m', which expects a value in the range \0, 7\} } */ ++ svusmopa_za64_m (8, pg, pg, u16, s16); /* { dg-error {passing 8 to argument 1 of 'svusmopa_za64_m', which expects a value in the range \0, 7\} } */ ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_za_m_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_za_m_1.c +new file mode 100644 +index 000000000..7e91a41cc +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_za_m_1.c +@@ -0,0 +1,49 @@ ++/* { dg-do compile } */ ++ ++#include <arm_sme.h> ++ ++#pragma GCC target ("arch=armv9-a+sme") ++ ++void ++f1 (svbool_t pg, svint8_t s8, svuint8_t u8, svint16_t s16, svint32_t s32, ++ svfloat16_t f16, svfloat32_t f32, svfloat64_t f64, uint32_t tile) ++ __arm_streaming __arm_inout("za") ++{ ++ svmopa_za32_m (0, pg, pg, s8); /* { dg-error {too few arguments to function 'svmopa_za32_m'} } */ ++ svmopa_za32_m (0, pg, pg, s8, s8, 0); /* { dg-error {too many arguments to function 'svmopa_za32_m'} } */ ++ svmopa_za32_m (tile, pg, pg, s8, s8); /* { dg-error {argument 1 of 'svmopa_za32_m' must be an integer constant expression} } */ ++ svmopa_za32_m (-1, pg, pg, s8, s8); /* { dg-error {passing -1 to argument 1 of 'svmopa_za32_m', which expects a value in the range \0, 3\} } */ ++ svmopa_za32_m (4, pg, pg, s8, s8); /* { dg-error {passing 4 to argument 1 of 'svmopa_za32_m', which expects a value in the range \0, 3\} } */ ++ svmopa_za32_m (0, u8, pg, s8, s8); /* { dg-error {passing 'svuint8_t' to argument 2 of 'svmopa_za32_m', which expects 'svbool_t'} } */ ++ svmopa_za32_m (0, pg, u8, s8, s8); /* { dg-error {passing 'svuint8_t' to argument 3 of 'svmopa_za32_m', which expects 'svbool_t'} } */ ++ svmopa_za32_m (0, pg, pg, tile, s8); /* { dg-error {passing 'uint32_t'.* to argument 4 of 'svmopa_za32_m', which expects an SVE type} } */ ++ svmopa_za32_m (0, pg, pg, u8, s8); /* { dg-error {passing 'svint8_t'.* to argument 5 of 'svmopa_za32_m', but argument 4 had type 'svuint8_t'} } */ ++ svmopa_za32_m (0, pg, pg, s8, f16); /* { dg-error {passing 'svfloat16_t'.* to argument 5 of 'svmopa_za32_m', but argument 4 had type 'svint8_t'} } */ ++ svmopa_za32_m (0, pg, pg, pg, pg); /* { dg-error {'svmopa_za32_m' has no form that takes 'svbool_t' arguments} } */ ++ svmopa_za32_m (0, pg, pg, s16, s16); /* { dg-error {'svmopa_za32_m' has no form that takes 'svint16_t' arguments} } */ ++ svmopa_za32_m (0, pg, pg, s32, s32); /* { dg-error {'svmopa_za32_m' has no form that takes 'svint32_t' arguments} } */ ++ svmopa_za32_m (0, pg, pg, f64, f64); /* { dg-error {'svmopa_za32_m' has no form that takes 'svfloat64_t' arguments} } */ ++ ++ svmopa_za64_m (0, pg, pg, s16, s16); /* { dg-error {ACLE function 'svmopa_za64_s16_m' requires ISA extension 'sme-i16i64'} } */ ++} ++ ++void ++f2 (svbool_t pg, svint8_t s8) __arm_streaming ++{ ++ svmopa_za32_m (0, pg, pg, s8, s8); /* { dg-error {ACLE function 'svmopa_za32_s8_m' can only be called from a function that has 'za' state} } */ ++} ++ ++void ++f3 (svbool_t pg, svint8_t s8) __arm_inout("za") ++{ ++ svmopa_za32_m (0, pg, pg, s8, s8); /* { dg-error {ACLE function 'svmopa_za32_s8_m' can only be called when SME streaming mode is enabled} } */ ++} ++ ++#pragma GCC target ("arch=armv9-a+sme-i16i64") ++ ++void ++f4 (svbool_t pg, svint16_t s16) __arm_streaming __arm_inout("za") ++{ ++ svmopa_za64_m (-1, pg, pg, s16, s16); /* { dg-error {passing -1 to argument 1 of 'svmopa_za64_m', which expects a value in the range \0, 7\} } */ ++ svmopa_za64_m (8, pg, pg, s16, s16); /* { dg-error {passing 8 to argument 1 of 'svmopa_za64_m', which expects a value in the range \0, 7\} } */ ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_za_m_2.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_za_m_2.c +new file mode 100644 +index 000000000..dfc1b737d +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_za_m_2.c +@@ -0,0 +1,11 @@ ++/* { dg-do compile } */ ++ ++#include <arm_sme.h> ++ ++#pragma GCC target ("arch=armv9-a+sme") ++ ++void ++f1 (svbool_t pg, svfloat64_t f64) __arm_streaming __arm_inout("za") ++{ ++ svmopa_za64_m (0, pg, pg, f64, f64); /* { dg-error {ACLE function 'svmopa_za64_f64_m' requires ISA extension 'sme-f64f64'} } */ ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_za_uint_m_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_za_uint_m_1.c +new file mode 100644 +index 000000000..555f95a61 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_za_uint_m_1.c +@@ -0,0 +1,50 @@ ++/* { dg-do compile } */ ++ ++#include <arm_sme.h> ++ ++#pragma GCC target ("arch=armv9-a+sme") ++ ++void ++f1 (svbool_t pg, svint8_t s8, svuint8_t u8, ++ svint16_t s16, svuint16_t u16, svfloat16_t f16, uint32_t tile) ++ __arm_streaming __arm_inout("za") ++{ ++ svsumopa_za32_m (0, pg, pg, s8); /* { dg-error {too few arguments to function 'svsumopa_za32_m'} } */ ++ svsumopa_za32_m (0, pg, pg, s8, u8, 0); /* { dg-error {too many arguments to function 'svsumopa_za32_m'} } */ ++ svsumopa_za32_m (tile, pg, pg, s8, u8); /* { dg-error {argument 1 of 'svsumopa_za32_m' must be an integer constant expression} } */ ++ svsumopa_za32_m (-1, pg, pg, s8, u8); /* { dg-error {passing -1 to argument 1 of 'svsumopa_za32_m', which expects a value in the range \0, 3\} } */ ++ svsumopa_za32_m (4, pg, pg, s8, u8); /* { dg-error {passing 4 to argument 1 of 'svsumopa_za32_m', which expects a value in the range \0, 3\} } */ ++ svsumopa_za32_m (0, u8, pg, s8, u8); /* { dg-error {passing 'svuint8_t' to argument 2 of 'svsumopa_za32_m', which expects 'svbool_t'} } */ ++ svsumopa_za32_m (0, pg, u8, s8, u8); /* { dg-error {passing 'svuint8_t' to argument 3 of 'svsumopa_za32_m', which expects 'svbool_t'} } */ ++ svsumopa_za32_m (0, pg, pg, tile, s8); /* { dg-error {passing 'uint32_t'.* to argument 4 of 'svsumopa_za32_m', which expects an SVE type} } */ ++ svsumopa_za32_m (0, pg, pg, u8, u8); /* { dg-error {'svsumopa_za32_m' has no form that takes 'svuint8_t' arguments} } */ ++ svsumopa_za32_m (0, pg, pg, pg, u8); /* { dg-error {'svsumopa_za32_m' has no form that takes 'svbool_t' arguments} } */ ++ svsumopa_za32_m (0, pg, pg, f16, u8); /* { dg-error {'svsumopa_za32_m' has no form that takes 'svfloat16_t' arguments} } */ ++ svsumopa_za32_m (0, pg, pg, s8, s8); /* { dg-error {passing 'svint8_t' to argument 5 of 'svsumopa_za32_m', which expects a vector of unsigned integers} } */ ++ svsumopa_za32_m (0, pg, pg, s8, u16); /* { dg-error {arguments 4 and 5 of 'svsumopa_za32_m' must have the same element size, but the values passed here have type 'svint8_t' and 'svuint16_t' respectively} } */ ++ svsumopa_za32_m (0, pg, pg, s16, u16); /* { dg-error {'svsumopa_za32_m' has no form that takes 'svint16_t' arguments} } */ ++ ++ svsumopa_za64_m (0, pg, pg, s16, u16); /* { dg-error {ACLE function 'svsumopa_za64_s16_m' requires ISA extension 'sme-i16i64'} } */ ++} ++ ++void ++f2 (svbool_t pg, svint8_t s8, svuint8_t u8) __arm_streaming ++{ ++ svsumopa_za32_m (0, pg, pg, s8, u8); /* { dg-error {ACLE function 'svsumopa_za32_s8_m' can only be called from a function that has 'za' state} } */ ++} ++ ++void ++f3 (svbool_t pg, svint8_t s8, svuint8_t u8) __arm_inout("za") ++{ ++ svsumopa_za32_m (0, pg, pg, s8, u8); /* { dg-error {ACLE function 'svsumopa_za32_s8_m' can only be called when SME streaming mode is enabled} } */ ++} ++ ++#pragma GCC target ("arch=armv9-a+sme-i16i64") ++ ++void ++f4 (svbool_t pg, svint16_t s16, svuint16_t u16) ++ __arm_streaming __arm_inout("za") ++{ ++ svsumopa_za64_m (-1, pg, pg, s16, u16); /* { dg-error {passing -1 to argument 1 of 'svsumopa_za64_m', which expects a value in the range \0, 7\} } */ ++ svsumopa_za64_m (8, pg, pg, s16, u16); /* { dg-error {passing 8 to argument 1 of 'svsumopa_za64_m', which expects a value in the range \0, 7\} } */ ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/func_redef_4.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/func_redef_4.c +index 9591e3d01..5aa0ea671 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/func_redef_4.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/func_redef_4.c +@@ -4,6 +4,7 @@ + to be diagnosed. Any attempt to call the function before including + arm_sve.h will lead to a link failure. (Same for taking its address, + etc.) */ +-extern __SVUint8_t svadd_u8_x (__SVBool_t, __SVUint8_t, __SVUint8_t); ++extern __SVUint8_t svadd_u8_x (__SVBool_t, __SVUint8_t, __SVUint8_t) ++ __arm_streaming_compatible; + + #pragma GCC aarch64 "arm_sve.h" +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/func_redef_5.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/func_redef_5.c +index 85923611d..ede9a8063 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/func_redef_5.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/func_redef_5.c +@@ -8,6 +8,7 @@ + explicit definition "wins". This isn't supported behavior though. */ + __SVUint8_t + svadd_u8_x (__SVBool_t pg, __SVUint8_t x, __SVUint8_t y) ++ __arm_streaming_compatible + { + return x; + } +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/read_za_m_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/read_za_m_1.c +new file mode 100644 +index 000000000..421979ea0 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/read_za_m_1.c +@@ -0,0 +1,48 @@ ++/* { dg-do compile } */ ++ ++#include <arm_sme.h> ++ ++#pragma GCC target ("arch=armv9-a+sme") ++ ++void ++f1 (svbool_t pg, svint8_t s8, svint64_t s64, svuint8_t u8, svuint16_t u16, ++ svfloat32_t f32, uint32_t tile) ++ __arm_streaming __arm_inout("za") ++{ ++ svread_hor_za8_m (s8, pg, 0); /* { dg-error {too few arguments to function 'svread_hor_za8_m'} } */ ++ svread_hor_za8_m (s8, pg, 0, 0, 0); /* { dg-error {too many arguments to function 'svread_hor_za8_m'} } */ ++ svread_hor_za8_m (tile, pg, 0, 0); /* { dg-error {passing 'uint32_t'.* to argument 1 of 'svread_hor_za8_m', which expects an SVE type} } */ ++ svread_hor_za8_m (pg, pg, 0, 0); /* { dg-error {'svread_hor_za8_m' has no form that takes 'svbool_t' arguments} } */ ++ svread_hor_za8_m (u16, pg, 0, 0); /* { dg-error {'svread_hor_za8_m' has no form that takes 'svuint16_t' arguments} } */ ++ svread_hor_za8_m (s8, s8, 0, 0); /* { dg-error {passing 'svint8_t' to argument 2 of 'svread_hor_za8_m', which expects 'svbool_t'} } */ ++ svread_hor_za8_m (s8, pg, tile, 0); /* { dg-error {argument 3 of 'svread_hor_za8_m' must be an integer constant expression} } */ ++ svread_hor_za8_m (s8, pg, -1, 0); /* { dg-error {passing -1 to argument 3 of 'svread_hor_za8_m', which expects the value 0} } */ ++ svread_hor_za8_m (s8, pg, 1, 0); /* { dg-error {passing 1 to argument 3 of 'svread_hor_za8_m', which expects the value 0} } */ ++ svread_hor_za8_m (s8, pg, 0, u8); /* { dg-error {passing 'svuint8_t' to argument 4 of 'svread_hor_za8_m', which expects 'uint32_t'} } */ ++ ++ svread_hor_za16_m (u16, pg, -1, 0); /* { dg-error {passing -1 to argument 3 of 'svread_hor_za16_m', which expects a value in the range \0, 1\} } */ ++ svread_hor_za16_m (u16, pg, 2, 0); /* { dg-error {passing 2 to argument 3 of 'svread_hor_za16_m', which expects a value in the range \0, 1\} } */ ++ ++ svread_hor_za32_m (f32, pg, -1, 0); /* { dg-error {passing -1 to argument 3 of 'svread_hor_za32_m', which expects a value in the range \0, 3\} } */ ++ svread_hor_za32_m (f32, pg, 4, 0); /* { dg-error {passing 4 to argument 3 of 'svread_hor_za32_m', which expects a value in the range \0, 3\} } */ ++ ++ svread_hor_za64_m (s64, pg, -1, 0); /* { dg-error {passing -1 to argument 3 of 'svread_hor_za64_m', which expects a value in the range \0, 7\} } */ ++ svread_hor_za64_m (s64, pg, 8, 0); /* { dg-error {passing 8 to argument 3 of 'svread_hor_za64_m', which expects a value in the range \0, 7\} } */ ++ ++ svread_hor_za128_m (s8, pg, -1, 0); /* { dg-error {passing -1 to argument 3 of 'svread_hor_za128_m', which expects a value in the range \0, 15\} } */ ++ svread_hor_za128_m (s8, pg, 16, 0); /* { dg-error {passing 16 to argument 3 of 'svread_hor_za128_m', which expects a value in the range \0, 15\} } */ ++ svread_hor_za128_m (f32, pg, -1, 0); /* { dg-error {passing -1 to argument 3 of 'svread_hor_za128_m', which expects a value in the range \0, 15\} } */ ++ svread_hor_za128_m (f32, pg, 16, 0); /* { dg-error {passing 16 to argument 3 of 'svread_hor_za128_m', which expects a value in the range \0, 15\} } */ ++} ++ ++void ++f2 (svbool_t pg, svint8_t s8) __arm_streaming ++{ ++ svread_hor_za8_m (s8, pg, 0, 0); /* { dg-error {ACLE function 'svread_hor_za8_s8_m' can only be called from a function that has 'za' state} } */ ++} ++ ++void ++f3 (svbool_t pg, svint8_t s8) __arm_inout("za") ++{ ++ svread_hor_za8_m (s8, pg, 0, 0); /* { dg-error {ACLE function 'svread_hor_za8_s8_m' can only be called when SME streaming mode is enabled} } */ ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/unary_za_m_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/unary_za_m_1.c +new file mode 100644 +index 000000000..948ce2cb3 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/unary_za_m_1.c +@@ -0,0 +1,49 @@ ++/* { dg-do compile } */ ++ ++#include <arm_sme.h> ++ ++#pragma GCC target ("arch=armv9-a+sme") ++ ++void ++f1 (svbool_t pg, svuint8_t u8, svint16_t s16, svint32_t s32, svint64_t s64, ++ svfloat32_t f32, uint32_t tile) ++ __arm_streaming __arm_inout("za") ++{ ++ svaddha_za32_m (0, pg, pg); /* { dg-error {too few arguments to function 'svaddha_za32_m'} } */ ++ svaddha_za32_m (0, pg, pg, s32, s32); /* { dg-error {too many arguments to function 'svaddha_za32_m'} } */ ++ svaddha_za32_m (tile, pg, pg, s32); /* { dg-error {argument 1 of 'svaddha_za32_m' must be an integer constant expression} } */ ++ svaddha_za32_m (-1, pg, pg, s32); /* { dg-error {passing -1 to argument 1 of 'svaddha_za32_m', which expects a value in the range \0, 3\} } */ ++ svaddha_za32_m (4, pg, pg, s32); /* { dg-error {passing 4 to argument 1 of 'svaddha_za32_m', which expects a value in the range \0, 3\} } */ ++ svaddha_za32_m (0, u8, pg, s32); /* { dg-error {passing 'svuint8_t' to argument 2 of 'svaddha_za32_m', which expects 'svbool_t'} } */ ++ svaddha_za32_m (0, pg, u8, s32); /* { dg-error {passing 'svuint8_t' to argument 3 of 'svaddha_za32_m', which expects 'svbool_t'} } */ ++ svaddha_za32_m (0, pg, pg, tile); /* { dg-error {passing 'uint32_t'.* to argument 4 of 'svaddha_za32_m', which expects an SVE type} } */ ++ svaddha_za32_m (0, pg, pg, pg); /* { dg-error {'svaddha_za32_m' has no form that takes 'svbool_t' arguments} } */ ++ svaddha_za32_m (0, pg, pg, u8); /* { dg-error {'svaddha_za32_m' has no form that takes 'svuint8_t' arguments} } */ ++ svaddha_za32_m (0, pg, pg, s16); /* { dg-error {'svaddha_za32_m' has no form that takes 'svint16_t' arguments} } */ ++ svaddha_za32_m (0, pg, pg, f32); /* { dg-error {'svaddha_za32_m' has no form that takes 'svfloat32_t' arguments} } */ ++ svaddha_za32_m (0, pg, pg, s64); /* { dg-error {'svaddha_za32_m' has no form that takes 'svint64_t' arguments} } */ ++ ++ svaddha_za64_m (0, pg, pg, s64); /* { dg-error {ACLE function 'svaddha_za64_s64_m' requires ISA extension 'sme-i16i64'} } */ ++} ++ ++void ++f2 (svbool_t pg, svint32_t s32) __arm_streaming ++{ ++ svaddha_za32_m (0, pg, pg, s32); /* { dg-error {ACLE function 'svaddha_za32_s32_m' can only be called from a function that has 'za' state} } */ ++} ++ ++void ++f3 (svbool_t pg, svint32_t s32) __arm_inout("za") ++{ ++ svaddha_za32_m (0, pg, pg, s32); /* { dg-error {ACLE function 'svaddha_za32_s32_m' can only be called when SME streaming mode is enabled} } */ ++} ++ ++#pragma GCC target ("arch=armv9-a+sme-i16i64") ++ ++void ++f4 (svbool_t pg, svint64_t s64) ++ __arm_streaming __arm_inout("za") ++{ ++ svaddha_za64_m (-1, pg, pg, s64); /* { dg-error {passing -1 to argument 1 of 'svaddha_za64_m', which expects a value in the range \0, 7\} } */ ++ svaddha_za64_m (8, pg, pg, s64); /* { dg-error {passing 8 to argument 1 of 'svaddha_za64_m', which expects a value in the range \0, 7\} } */ ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/write_za_m_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/write_za_m_1.c +new file mode 100644 +index 000000000..af79c406b +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/write_za_m_1.c +@@ -0,0 +1,48 @@ ++/* { dg-do compile } */ ++ ++#include <arm_sme.h> ++ ++#pragma GCC target ("arch=armv9-a+sme") ++ ++void ++f1 (svbool_t pg, svint8_t s8, svint64_t s64, svuint8_t u8, svuint16_t u16, ++ svfloat32_t f32, uint32_t tile) ++ __arm_streaming __arm_inout("za") ++{ ++ svwrite_ver_za8_m (0, 0, pg); /* { dg-error {too few arguments to function 'svwrite_ver_za8_m'} } */ ++ svwrite_ver_za8_m (0, 0, pg, s8, 0); /* { dg-error {too many arguments to function 'svwrite_ver_za8_m'} } */ ++ svwrite_ver_za8_m (tile, 0, pg, s8); /* { dg-error {argument 1 of 'svwrite_ver_za8_m' must be an integer constant expression} } */ ++ svwrite_ver_za8_m (-1, 0, pg, s8); /* { dg-error {passing -1 to argument 1 of 'svwrite_ver_za8_m', which expects the value 0} } */ ++ svwrite_ver_za8_m (1, 0, pg, s8); /* { dg-error {passing 1 to argument 1 of 'svwrite_ver_za8_m', which expects the value 0} } */ ++ svwrite_ver_za8_m (0, u8, pg, s8); /* { dg-error {passing 'svuint8_t' to argument 2 of 'svwrite_ver_za8_m', which expects 'uint32_t'} } */ ++ svwrite_ver_za8_m (0, 0, s8, s8); /* { dg-error {passing 'svint8_t' to argument 3 of 'svwrite_ver_za8_m', which expects 'svbool_t'} } */ ++ svwrite_ver_za8_m (0, 0, pg, tile); /* { dg-error {passing 'uint32_t'.* to argument 4 of 'svwrite_ver_za8_m', which expects an SVE type} } */ ++ svwrite_ver_za8_m (0, 0, pg, pg); /* { dg-error {'svwrite_ver_za8_m' has no form that takes 'svbool_t' arguments} } */ ++ svwrite_ver_za8_m (0, 0, pg, u16); /* { dg-error {'svwrite_ver_za8_m' has no form that takes 'svuint16_t' arguments} } */ ++ ++ svwrite_ver_za16_m (-1, 0, pg, u16); /* { dg-error {passing -1 to argument 1 of 'svwrite_ver_za16_m', which expects a value in the range \0, 1\} } */ ++ svwrite_ver_za16_m (2, 0, pg, u16); /* { dg-error {passing 2 to argument 1 of 'svwrite_ver_za16_m', which expects a value in the range \0, 1\} } */ ++ ++ svwrite_ver_za32_m (-1, 0, pg, f32); /* { dg-error {passing -1 to argument 1 of 'svwrite_ver_za32_m', which expects a value in the range \0, 3\} } */ ++ svwrite_ver_za32_m (4, 0, pg, f32); /* { dg-error {passing 4 to argument 1 of 'svwrite_ver_za32_m', which expects a value in the range \0, 3\} } */ ++ ++ svwrite_ver_za64_m (-1, 0, pg, s64); /* { dg-error {passing -1 to argument 1 of 'svwrite_ver_za64_m', which expects a value in the range \0, 7\} } */ ++ svwrite_ver_za64_m (8, 0, pg, s64); /* { dg-error {passing 8 to argument 1 of 'svwrite_ver_za64_m', which expects a value in the range \0, 7\} } */ ++ ++ svwrite_ver_za128_m (-1, 0, pg, s8); /* { dg-error {passing -1 to argument 1 of 'svwrite_ver_za128_m', which expects a value in the range \0, 15\} } */ ++ svwrite_ver_za128_m (16, 0, pg, s8); /* { dg-error {passing 16 to argument 1 of 'svwrite_ver_za128_m', which expects a value in the range \0, 15\} } */ ++ svwrite_ver_za128_m (-1, 0, pg, f32); /* { dg-error {passing -1 to argument 1 of 'svwrite_ver_za128_m', which expects a value in the range \0, 15\} } */ ++ svwrite_ver_za128_m (16, 0, pg, f32); /* { dg-error {passing 16 to argument 1 of 'svwrite_ver_za128_m', which expects a value in the range \0, 15\} } */ ++} ++ ++void ++f2 (svbool_t pg, svint8_t s8) __arm_streaming ++{ ++ svwrite_ver_za8_m (0, 0, pg, s8); /* { dg-error {ACLE function 'svwrite_ver_za8_s8_m' can only be called from a function that has 'za' state} } */ ++} ++ ++void ++f3 (svbool_t pg, svint8_t s8) __arm_inout("za") ++{ ++ svwrite_ver_za8_m (0, 0, pg, s8); /* { dg-error {ACLE function 'svwrite_ver_za8_s8_m' can only be called when SME streaming mode is enabled} } */ ++} +diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp +index e2a9ef5fa..292737dae 100644 +--- a/gcc/testsuite/lib/target-supports.exp ++++ b/gcc/testsuite/lib/target-supports.exp +@@ -10622,7 +10622,8 @@ proc check_effective_target_aarch64_tiny { } { + # various architecture extensions via the .arch_extension pseudo-op. + + foreach { aarch64_ext } { "fp" "simd" "crypto" "crc" "lse" "dotprod" "sve" +- "i8mm" "f32mm" "f64mm" "bf16" "sb" "sve2" } { ++ "i8mm" "f32mm" "f64mm" "bf16" "sb" "sve2" ++ "sme" "sme-i16i64" } { + eval string map list FUNC $aarch64_ext { + proc check_effective_target_aarch64_asm_FUNC_ok { } { + if { istarget aarch64*-*-* } { +-- +2.33.0 +
View file
_service:tar_scm:0212-Backport-SME-aarch64-Add-support-for-__arm_locally_s.patch
Added
@@ -0,0 +1,1748 @@ +From 0ad41f11bea5c303ff39c54cae8e46afdfae6070 Mon Sep 17 00:00:00 2001 +From: Richard Sandiford <richard.sandiford@arm.com> +Date: Tue, 5 Dec 2023 10:11:29 +0000 +Subject: PATCH 113/157 BackportSME aarch64: Add support for + __arm_locally_streaming + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=3f6e5991fab507aa79121dc44d1afcd622c78744 + +This patch adds support for the __arm_locally_streaming attribute, +which allows a function to use SME internally without changing +the function's ABI. The attribute is valid but redundant for +__arm_streaming functions. + +gcc/ + * config/aarch64/aarch64.cc (aarch64_arm_attribute_table): Add + arm::locally_streaming. + (aarch64_fndecl_is_locally_streaming): New function. + (aarch64_fndecl_sm_state): Handle locally-streaming functions. + (aarch64_cfun_enables_pstate_sm): New function. + (aarch64_add_offset): Add an argument that specifies whether + the streaming vector length should be used instead of the + prevailing one. + (aarch64_split_add_offset, aarch64_add_sp, aarch64_sub_sp): Likewise. + (aarch64_allocate_and_probe_stack_space): Likewise. + (aarch64_expand_mov_immediate): Update calls accordingly. + (aarch64_need_old_pstate_sm): Return true for locally-streaming + streaming-compatible functions. + (aarch64_layout_frame): Force all call-preserved Z and P registers + to be saved and restored if the function switches PSTATE.SM in the + prologue. + (aarch64_get_separate_components): Disable shrink-wrapping of + such Z and P saves and restores. + (aarch64_use_late_prologue_epilogue): New function. + (aarch64_expand_prologue): Measure SVE lengths in the streaming + vector length for locally-streaming functions, then emit code + to enable streaming mode. + (aarch64_expand_epilogue): Likewise in reverse. + (TARGET_USE_LATE_PROLOGUE_EPILOGUE): Define. + * config/aarch64/aarch64-c.cc (aarch64_define_unconditional_macros): + Define __arm_locally_streaming. + +gcc/testsuite/ + * gcc.target/aarch64/sme/locally_streaming_1.c: New test. + * gcc.target/aarch64/sme/locally_streaming_2.c: Likewise. + * gcc.target/aarch64/sme/locally_streaming_3.c: Likewise. + * gcc.target/aarch64/sme/locally_streaming_4.c: Likewise. + * gcc.target/aarch64/sme/keyword_macros_1.c: Add + __arm_locally_streaming. + * g++.target/aarch64/sme/keyword_macros_1.C: Likewise. +--- + gcc/config/aarch64/aarch64-c.cc | 1 + + gcc/config/aarch64/aarch64.cc | 233 +++++++-- + .../g++.target/aarch64/sme/keyword_macros_1.C | 1 + + .../gcc.target/aarch64/sme/keyword_macros_1.c | 1 + + .../aarch64/sme/locally_streaming_1.c | 466 ++++++++++++++++++ + .../aarch64/sme/locally_streaming_2.c | 177 +++++++ + .../aarch64/sme/locally_streaming_3.c | 273 ++++++++++ + .../aarch64/sme/locally_streaming_4.c | 145 ++++++ + 8 files changed, 1259 insertions(+), 38 deletions(-) + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/locally_streaming_1.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/locally_streaming_2.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/locally_streaming_3.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/locally_streaming_4.c + +diff --git a/gcc/config/aarch64/aarch64-c.cc b/gcc/config/aarch64/aarch64-c.cc +index cb8a6c2fc..745719d8b 100644 +--- a/gcc/config/aarch64/aarch64-c.cc ++++ b/gcc/config/aarch64/aarch64-c.cc +@@ -86,6 +86,7 @@ aarch64_define_unconditional_macros (cpp_reader *pfile) + + DEFINE_ARM_KEYWORD_MACRO ("streaming"); + DEFINE_ARM_KEYWORD_MACRO ("streaming_compatible"); ++ DEFINE_ARM_KEYWORD_MACRO ("locally_streaming"); + + #undef DEFINE_ARM_KEYWORD_MACRO + +diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc +index 113784e31..4cb43c2e2 100644 +--- a/gcc/config/aarch64/aarch64.cc ++++ b/gcc/config/aarch64/aarch64.cc +@@ -3283,6 +3283,7 @@ static const attribute_spec aarch64_arm_attributes = + NULL, attr_streaming_exclusions }, + { "streaming_compatible", 0, 0, false, true, true, true, + NULL, attr_streaming_exclusions }, ++ { "locally_streaming", 0, 0, true, false, false, false, NULL, NULL }, + { "new", 1, -1, true, false, false, false, + handle_arm_new, NULL }, + { "preserves", 1, -1, false, true, true, true, +@@ -4657,6 +4658,16 @@ aarch64_fntype_isa_mode (const_tree fntype) + | aarch64_fntype_pstate_za (fntype)); + } + ++/* Return true if FNDECL uses streaming mode internally, as an ++ implementation choice. */ ++ ++static bool ++aarch64_fndecl_is_locally_streaming (const_tree fndecl) ++{ ++ return lookup_attribute ("arm", "locally_streaming", ++ DECL_ATTRIBUTES (fndecl)); ++} ++ + /* Return the state of PSTATE.SM when compiling the body of + function FNDECL. This might be different from the state of + PSTATE.SM on entry. */ +@@ -4664,6 +4675,9 @@ aarch64_fntype_isa_mode (const_tree fntype) + static aarch64_feature_flags + aarch64_fndecl_pstate_sm (const_tree fndecl) + { ++ if (aarch64_fndecl_is_locally_streaming (fndecl)) ++ return AARCH64_FL_SM_ON; ++ + return aarch64_fntype_pstate_sm (TREE_TYPE (fndecl)); + } + +@@ -4739,6 +4753,16 @@ aarch64_cfun_has_new_state (const char *state_name) + return aarch64_fndecl_has_new_state (cfun->decl, state_name); + } + ++/* Return true if PSTATE.SM is 1 in the body of the current function, ++ but is not guaranteed to be 1 on entry. */ ++ ++static bool ++aarch64_cfun_enables_pstate_sm () ++{ ++ return (aarch64_fndecl_is_locally_streaming (cfun->decl) ++ && aarch64_cfun_incoming_pstate_sm () != AARCH64_FL_SM_ON); ++} ++ + /* Return true if the current function has state STATE_NAME, either by + creating new state itself or by sharing state with callers. */ + +@@ -6931,6 +6955,10 @@ aarch64_add_offset_temporaries (rtx x) + TEMP2, if nonnull, is a second temporary register that doesn't + overlap either DEST or REG. + ++ FORCE_ISA_MODE is AARCH64_FL_SM_ON if any variable component of OFFSET ++ is measured relative to the SME vector length instead of the current ++ prevailing vector length. It is 0 otherwise. ++ + Since this function may be used to adjust the stack pointer, we must + ensure that it cannot cause transient stack deallocation (for example + by first incrementing SP and then decrementing when adjusting by a +@@ -6939,6 +6967,7 @@ aarch64_add_offset_temporaries (rtx x) + static void + aarch64_add_offset (scalar_int_mode mode, rtx dest, rtx src, + poly_int64 offset, rtx temp1, rtx temp2, ++ aarch64_feature_flags force_isa_mode, + bool frame_related_p, bool emit_move_imm = true) + { + gcc_assert (emit_move_imm || temp1 != NULL_RTX); +@@ -6951,9 +6980,18 @@ aarch64_add_offset (scalar_int_mode mode, rtx dest, rtx src, + /* Try using ADDVL or ADDPL to add the whole value. */ + if (src != const0_rtx && aarch64_sve_addvl_addpl_immediate_p (offset)) + { +- rtx offset_rtx = gen_int_mode (offset, mode); ++ gcc_assert (offset.coeffs0 == offset.coeffs1); ++ rtx offset_rtx; ++ if (force_isa_mode == 0) ++ offset_rtx = gen_int_mode (offset, mode); ++ else ++ offset_rtx = aarch64_sme_vq_immediate (mode, offset.coeffs0, 0); + rtx_insn *insn = emit_insn (gen_add3_insn (dest, src, offset_rtx)); + RTX_FRAME_RELATED_P (insn) = frame_related_p; ++ if (frame_related_p && (force_isa_mode & AARCH64_FL_SM_ON)) ++ add_reg_note (insn, REG_CFA_ADJUST_CFA, ++ gen_rtx_SET (dest, plus_constant (Pmode, src, ++ offset))); + return; + } + +@@ -6969,11 +7007,19 @@ aarch64_add_offset (scalar_int_mode mode, rtx dest, rtx src, + if (src != const0_rtx + && aarch64_sve_addvl_addpl_immediate_p (poly_offset)) + { +- rtx offset_rtx = gen_int_mode (poly_offset, mode); ++ rtx offset_rtx; ++ if (force_isa_mode == 0) ++ offset_rtx = gen_int_mode (poly_offset, mode); ++ else ++ offset_rtx = aarch64_sme_vq_immediate (mode, factor, 0); + if (frame_related_p) + { + rtx_insn *insn = emit_insn (gen_add3_insn (dest, src, offset_rtx)); + RTX_FRAME_RELATED_P (insn) = true; ++ if (force_isa_mode & AARCH64_FL_SM_ON) ++ add_reg_note (insn, REG_CFA_ADJUST_CFA, ++ gen_rtx_SET (dest, plus_constant (Pmode, src, ++ poly_offset))); + src = dest; + } + else +@@ -7004,9 +7050,19 @@ aarch64_add_offset (scalar_int_mode mode, rtx dest, rtx src, + rtx val; + if (IN_RANGE (rel_factor, -32, 31)) + { ++ if (force_isa_mode & AARCH64_FL_SM_ON) ++ { ++ /* Try to use an unshifted RDSVL, otherwise fall back on ++ a shifted RDSVL #1. */ ++ if (aarch64_sve_rdvl_addvl_factor_p (factor)) ++ shift = 0; ++ else ++ factor = rel_factor * 16; ++ val = aarch64_sme_vq_immediate (mode, factor, 0); ++ } + /* Try to use an unshifted CNTBHWD or RDVL. */ +- if (aarch64_sve_cnt_factor_p (factor) +- || aarch64_sve_rdvl_addvl_factor_p (factor)) ++ else if (aarch64_sve_cnt_factor_p (factor) ++ || aarch64_sve_rdvl_addvl_factor_p (factor)) + { + val = gen_int_mode (poly_int64 (factor, factor), mode); + shift = 0; +@@ -7036,11 +7092,18 @@ aarch64_add_offset (scalar_int_mode mode, rtx dest, rtx src, + a shift and add sequence for the multiplication. + If CNTB << SHIFT is out of range, stick with the current + shift factor. */ +- if (IN_RANGE (low_bit, 2, 16 * 16)) ++ if (force_isa_mode == 0 ++ && IN_RANGE (low_bit, 2, 16 * 16)) + { + val = gen_int_mode (poly_int64 (low_bit, low_bit), mode); + shift = 0; + } ++ else if ((force_isa_mode & AARCH64_FL_SM_ON) ++ && aarch64_sve_rdvl_addvl_factor_p (low_bit)) ++ { ++ val = aarch64_sme_vq_immediate (mode, low_bit, 0); ++ shift = 0; ++ } + else + val = gen_int_mode (BYTES_PER_SVE_VECTOR, mode); + +@@ -7128,30 +7191,34 @@ aarch64_split_add_offset (scalar_int_mode mode, rtx dest, rtx src, + rtx offset_rtx, rtx temp1, rtx temp2) + { + aarch64_add_offset (mode, dest, src, rtx_to_poly_int64 (offset_rtx), +- temp1, temp2, false); ++ temp1, temp2, 0, false); + } + + /* Add DELTA to the stack pointer, marking the instructions frame-related. +- TEMP1 is available as a temporary if nonnull. EMIT_MOVE_IMM is false +- if TEMP1 already contains abs (DELTA). */ ++ TEMP1 is available as a temporary if nonnull. FORCE_ISA_MODE is as ++ for aarch64_add_offset. EMIT_MOVE_IMM is false if TEMP1 already ++ contains abs (DELTA). */ + + static inline void +-aarch64_add_sp (rtx temp1, rtx temp2, poly_int64 delta, bool emit_move_imm) ++aarch64_add_sp (rtx temp1, rtx temp2, poly_int64 delta, ++ aarch64_feature_flags force_isa_mode, bool emit_move_imm) + { + aarch64_add_offset (Pmode, stack_pointer_rtx, stack_pointer_rtx, delta, +- temp1, temp2, true, emit_move_imm); ++ temp1, temp2, force_isa_mode, true, emit_move_imm); + } + + /* Subtract DELTA from the stack pointer, marking the instructions +- frame-related if FRAME_RELATED_P. TEMP1 is available as a temporary +- if nonnull. */ ++ frame-related if FRAME_RELATED_P. FORCE_ISA_MODE is as for ++ aarch64_add_offset. TEMP1 is available as a temporary if nonnull. */ + + static inline void +-aarch64_sub_sp (rtx temp1, rtx temp2, poly_int64 delta, bool frame_related_p, +- bool emit_move_imm = true) ++aarch64_sub_sp (rtx temp1, rtx temp2, poly_int64 delta, ++ aarch64_feature_flags force_isa_mode, ++ bool frame_related_p, bool emit_move_imm = true) + { + aarch64_add_offset (Pmode, stack_pointer_rtx, stack_pointer_rtx, -delta, +- temp1, temp2, frame_related_p, emit_move_imm); ++ temp1, temp2, force_isa_mode, frame_related_p, ++ emit_move_imm); + } + + /* A streaming-compatible function needs to switch temporarily to the known +@@ -8176,11 +8243,11 @@ aarch64_expand_mov_immediate (rtx dest, rtx imm) + { + base = aarch64_force_temporary (int_mode, dest, base); + aarch64_add_offset (int_mode, dest, base, offset, +- NULL_RTX, NULL_RTX, false); ++ NULL_RTX, NULL_RTX, 0, false); + } + else + aarch64_add_offset (int_mode, dest, base, offset, +- dest, NULL_RTX, false); ++ dest, NULL_RTX, 0, false); + } + return; + } +@@ -8207,7 +8274,7 @@ aarch64_expand_mov_immediate (rtx dest, rtx imm) + gcc_assert (can_create_pseudo_p ()); + base = aarch64_force_temporary (int_mode, dest, base); + aarch64_add_offset (int_mode, dest, base, const_offset, +- NULL_RTX, NULL_RTX, false); ++ NULL_RTX, NULL_RTX, 0, false); + return; + } + +@@ -8247,7 +8314,7 @@ aarch64_expand_mov_immediate (rtx dest, rtx imm) + gcc_assert(can_create_pseudo_p ()); + base = aarch64_force_temporary (int_mode, dest, base); + aarch64_add_offset (int_mode, dest, base, const_offset, +- NULL_RTX, NULL_RTX, false); ++ NULL_RTX, NULL_RTX, 0, false); + return; + } + /* FALLTHRU */ +@@ -9755,6 +9822,9 @@ aarch64_need_old_pstate_sm () + if (aarch64_cfun_incoming_pstate_sm () != 0) + return false; + ++ if (aarch64_cfun_enables_pstate_sm ()) ++ return true; ++ + if (cfun->machine->call_switches_pstate_sm) + for (auto insn = get_insns (); insn; insn = NEXT_INSN (insn)) + if (auto *call = dyn_cast<rtx_call_insn *> (insn)) +@@ -9781,6 +9851,7 @@ aarch64_layout_frame (void) + bool frame_related_fp_reg_p = false; + aarch64_frame &frame = cfun->machine->frame; + poly_int64 top_of_locals = -1; ++ bool enables_pstate_sm = aarch64_cfun_enables_pstate_sm (); + + vec_safe_truncate (frame.saved_gprs, 0); + vec_safe_truncate (frame.saved_fprs, 0); +@@ -9818,7 +9889,7 @@ aarch64_layout_frame (void) + frame.reg_offsetregno = SLOT_REQUIRED; + + for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++) +- if (df_regs_ever_live_p (regno) ++ if ((enables_pstate_sm || df_regs_ever_live_p (regno)) + && !fixed_regsregno + && !crtl->abi->clobbers_full_reg_p (regno)) + { +@@ -9847,7 +9918,7 @@ aarch64_layout_frame (void) + } + + for (regno = P0_REGNUM; regno <= P15_REGNUM; regno++) +- if (df_regs_ever_live_p (regno) ++ if ((enables_pstate_sm || df_regs_ever_live_p (regno)) + && !fixed_regsregno + && !crtl->abi->clobbers_full_reg_p (regno)) + frame.reg_offsetregno = SLOT_REQUIRED; +@@ -9964,7 +10035,8 @@ aarch64_layout_frame (void) + /* If the current function changes the SVE vector length, ensure that the + old value of the DWARF VG register is saved and available in the CFI, + so that outer frames with VL-sized offsets can be processed correctly. */ +- if (cfun->machine->call_switches_pstate_sm) ++ if (cfun->machine->call_switches_pstate_sm ++ || aarch64_cfun_enables_pstate_sm ()) + { + frame.reg_offsetVG_REGNUM = offset; + offset += UNITS_PER_WORD; +@@ -10749,9 +10821,16 @@ aarch64_get_separate_components (void) + bitmap_clear (components); + + /* The registers we need saved to the frame. */ ++ bool enables_pstate_sm = aarch64_cfun_enables_pstate_sm (); + for (unsigned regno = 0; regno <= LAST_SAVED_REGNUM; regno++) + if (aarch64_register_saved_on_entry (regno)) + { ++ /* Disallow shrink wrapping for registers that will be clobbered ++ by an SMSTART SM in the prologue. */ ++ if (enables_pstate_sm ++ && (FP_REGNUM_P (regno) || PR_REGNUM_P (regno))) ++ continue; ++ + /* Punt on saves and restores that use ST1D and LD1D. We could + try to be smarter, but it would involve making sure that the + spare predicate register itself is safe to use at the save +@@ -11070,11 +11149,16 @@ aarch64_emit_stack_tie (rtx reg) + events, e.g. if we were to allow the stack to be dropped by more than a page + and then have multiple probes up and we take a signal somewhere in between + then the signal handler doesn't know the state of the stack and can make no +- assumptions about which pages have been probed. */ ++ assumptions about which pages have been probed. ++ ++ FORCE_ISA_MODE is AARCH64_FL_SM_ON if any variable component of POLY_SIZE ++ is measured relative to the SME vector length instead of the current ++ prevailing vector length. It is 0 otherwise. */ + + static void + aarch64_allocate_and_probe_stack_space (rtx temp1, rtx temp2, + poly_int64 poly_size, ++ aarch64_feature_flags force_isa_mode, + bool frame_related_p, + bool final_adjustment_p) + { +@@ -11116,7 +11200,8 @@ aarch64_allocate_and_probe_stack_space (rtx temp1, rtx temp2, + if (known_lt (poly_size, min_probe_threshold) + || !flag_stack_clash_protection) + { +- aarch64_sub_sp (temp1, temp2, poly_size, frame_related_p); ++ aarch64_sub_sp (temp1, temp2, poly_size, force_isa_mode, ++ frame_related_p); + return; + } + +@@ -11133,7 +11218,8 @@ aarch64_allocate_and_probe_stack_space (rtx temp1, rtx temp2, + + /* First calculate the amount of bytes we're actually spilling. */ + aarch64_add_offset (Pmode, temp1, CONST0_RTX (Pmode), +- poly_size, temp1, temp2, false, true); ++ poly_size, temp1, temp2, force_isa_mode, ++ false, true); + + rtx_insn *insn = get_last_insn (); + +@@ -11191,7 +11277,7 @@ aarch64_allocate_and_probe_stack_space (rtx temp1, rtx temp2, + { + for (HOST_WIDE_INT i = 0; i < rounded_size; i += guard_size) + { +- aarch64_sub_sp (NULL, temp2, guard_size, true); ++ aarch64_sub_sp (NULL, temp2, guard_size, force_isa_mode, true); + emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx, + guard_used_by_caller)); + emit_insn (gen_blockage ()); +@@ -11202,7 +11288,7 @@ aarch64_allocate_and_probe_stack_space (rtx temp1, rtx temp2, + { + /* Compute the ending address. */ + aarch64_add_offset (Pmode, temp1, stack_pointer_rtx, -rounded_size, +- temp1, NULL, false, true); ++ temp1, NULL, force_isa_mode, false, true); + rtx_insn *insn = get_last_insn (); + + /* For the initial allocation, we don't have a frame pointer +@@ -11268,7 +11354,7 @@ aarch64_allocate_and_probe_stack_space (rtx temp1, rtx temp2, + if (final_adjustment_p && rounded_size != 0) + min_probe_threshold = 0; + +- aarch64_sub_sp (temp1, temp2, residual, frame_related_p); ++ aarch64_sub_sp (temp1, temp2, residual, force_isa_mode, frame_related_p); + if (residual >= min_probe_threshold) + { + if (dump_file) +@@ -11333,6 +11419,14 @@ aarch64_epilogue_uses (int regno) + return 0; + } + ++/* Implement TARGET_USE_LATE_PROLOGUE_EPILOGUE. */ ++ ++static bool ++aarch64_use_late_prologue_epilogue () ++{ ++ return aarch64_cfun_enables_pstate_sm (); ++} ++ + /* The current function's frame has a save slot for the incoming state + of SVCR. Return a legitimate memory for the slot, based on the hard + frame pointer. */ +@@ -11469,6 +11563,9 @@ aarch64_expand_prologue (void) + unsigned reg2 = frame.wb_push_candidate2; + bool emit_frame_chain = frame.emit_frame_chain; + rtx_insn *insn; ++ aarch64_feature_flags force_isa_mode = 0; ++ if (aarch64_cfun_enables_pstate_sm ()) ++ force_isa_mode = AARCH64_FL_SM_ON; + + if (flag_stack_clash_protection && known_eq (callee_adjust, 0)) + { +@@ -11530,7 +11627,7 @@ aarch64_expand_prologue (void) + less the amount of the guard reserved for use by the caller's + outgoing args. */ + aarch64_allocate_and_probe_stack_space (tmp0_rtx, tmp1_rtx, initial_adjust, +- true, false); ++ force_isa_mode, true, false); + + if (callee_adjust != 0) + aarch64_push_regs (reg1, reg2, callee_adjust); +@@ -11553,7 +11650,8 @@ aarch64_expand_prologue (void) + gcc_assert (known_eq (chain_offset, 0)); + aarch64_add_offset (Pmode, hard_frame_pointer_rtx, + stack_pointer_rtx, chain_offset, +- tmp1_rtx, tmp0_rtx, frame_pointer_needed); ++ tmp1_rtx, tmp0_rtx, force_isa_mode, ++ frame_pointer_needed); + if (frame_pointer_needed && !frame_size.is_constant ()) + { + /* Variable-sized frames need to describe the save slot +@@ -11600,6 +11698,7 @@ aarch64_expand_prologue (void) + || known_eq (initial_adjust, 0)); + aarch64_allocate_and_probe_stack_space (tmp1_rtx, tmp0_rtx, + sve_callee_adjust, ++ force_isa_mode, + !frame_pointer_needed, false); + bytes_below_sp -= sve_callee_adjust; + } +@@ -11612,12 +11711,15 @@ aarch64_expand_prologue (void) + that is assumed by the called. */ + gcc_assert (known_eq (bytes_below_sp, final_adjust)); + aarch64_allocate_and_probe_stack_space (tmp1_rtx, tmp0_rtx, final_adjust, ++ force_isa_mode, + !frame_pointer_needed, true); + if (emit_frame_chain && maybe_ne (final_adjust, 0)) + aarch64_emit_stack_tie (hard_frame_pointer_rtx); + +- /* Save the incoming value of PSTATE.SM, if required. */ +- if (known_ge (frame.old_svcr_offset, 0)) ++ /* Save the incoming value of PSTATE.SM, if required. Code further ++ down does this for locally-streaming functions. */ ++ if (known_ge (frame.old_svcr_offset, 0) ++ && !aarch64_cfun_enables_pstate_sm ()) + { + rtx mem = aarch64_old_svcr_mem (); + MEM_VOLATILE_P (mem) = 1; +@@ -11649,6 +11751,34 @@ aarch64_expand_prologue (void) + emit_move_insn (gen_rtx_REG (DImode, R1_REGNUM), old_r1); + } + } ++ ++ /* Enable PSTATE.SM, if required. */ ++ if (aarch64_cfun_enables_pstate_sm ()) ++ { ++ rtx_insn *guard_label = nullptr; ++ if (known_ge (cfun->machine->frame.old_svcr_offset, 0)) ++ { ++ /* The current function is streaming-compatible. Save the ++ original state of PSTATE.SM. */ ++ rtx svcr = gen_rtx_REG (DImode, IP0_REGNUM); ++ emit_insn (gen_aarch64_read_svcr (svcr)); ++ emit_move_insn (aarch64_old_svcr_mem (), svcr); ++ guard_label = aarch64_guard_switch_pstate_sm (svcr, ++ aarch64_isa_flags); ++ } ++ aarch64_sme_mode_switch_regs args_switch; ++ auto &args = crtl->args.info; ++ for (unsigned int i = 0; i < args.num_sme_mode_switch_args; ++i) ++ { ++ rtx x = args.sme_mode_switch_argsi; ++ args_switch.add_reg (GET_MODE (x), REGNO (x)); ++ } ++ args_switch.emit_prologue (); ++ emit_insn (gen_aarch64_smstart_sm ()); ++ args_switch.emit_epilogue (); ++ if (guard_label) ++ emit_label (guard_label); ++ } + } + + /* Return TRUE if we can use a simple_return insn. +@@ -11695,6 +11825,9 @@ aarch64_expand_epilogue (rtx_call_insn *sibcall) + HOST_WIDE_INT guard_size + = 1 << param_stack_clash_protection_guard_size; + HOST_WIDE_INT guard_used_by_caller = STACK_CLASH_CALLER_GUARD; ++ aarch64_feature_flags force_isa_mode = 0; ++ if (aarch64_cfun_enables_pstate_sm ()) ++ force_isa_mode = AARCH64_FL_SM_ON; + + /* We can re-use the registers when: + +@@ -11719,6 +11852,24 @@ aarch64_expand_epilogue (rtx_call_insn *sibcall) + = maybe_ne (get_frame_size () + + frame.saved_varargs_size, 0); + ++ /* Reset PSTATE.SM, if required. */ ++ if (aarch64_cfun_enables_pstate_sm ()) ++ { ++ rtx_insn *guard_label = nullptr; ++ if (known_ge (cfun->machine->frame.old_svcr_offset, 0)) ++ guard_label = aarch64_guard_switch_pstate_sm (IP0_REGNUM, ++ aarch64_isa_flags); ++ aarch64_sme_mode_switch_regs return_switch; ++ if (crtl->return_rtx && REG_P (crtl->return_rtx)) ++ return_switch.add_reg (GET_MODE (crtl->return_rtx), ++ REGNO (crtl->return_rtx)); ++ return_switch.emit_prologue (); ++ emit_insn (gen_aarch64_smstop_sm ()); ++ return_switch.emit_epilogue (); ++ if (guard_label) ++ emit_label (guard_label); ++ } ++ + /* Emit a barrier to prevent loads from a deallocated stack. */ + if (maybe_gt (final_adjust, crtl->outgoing_args_size) + || cfun->calls_alloca +@@ -11739,19 +11890,21 @@ aarch64_expand_epilogue (rtx_call_insn *sibcall) + aarch64_add_offset (Pmode, stack_pointer_rtx, + hard_frame_pointer_rtx, + -bytes_below_hard_fp + final_adjust, +- tmp1_rtx, tmp0_rtx, callee_adjust == 0); ++ tmp1_rtx, tmp0_rtx, force_isa_mode, ++ callee_adjust == 0); + else + /* The case where we need to re-use the register here is very rare, so + avoid the complicated condition and just always emit a move if the + immediate doesn't fit. */ +- aarch64_add_sp (tmp1_rtx, tmp0_rtx, final_adjust, true); ++ aarch64_add_sp (tmp1_rtx, tmp0_rtx, final_adjust, force_isa_mode, true); + + /* Restore the vector registers before the predicate registers, + so that we can use P4 as a temporary for big-endian SVE frames. */ + aarch64_restore_callee_saves (final_adjust, frame.saved_fprs, &cfi_ops); + aarch64_restore_callee_saves (final_adjust, frame.saved_prs, &cfi_ops); + if (maybe_ne (sve_callee_adjust, 0)) +- aarch64_add_sp (NULL_RTX, NULL_RTX, sve_callee_adjust, true); ++ aarch64_add_sp (NULL_RTX, NULL_RTX, sve_callee_adjust, ++ force_isa_mode, true); + + /* When shadow call stack is enabled, the scs_pop in the epilogue will + restore x30, we don't need to restore x30 again in the traditional +@@ -11781,7 +11934,7 @@ aarch64_expand_epilogue (rtx_call_insn *sibcall) + + /* Liveness of EP0_REGNUM can not be trusted across function calls either, so + add restriction on emit_move optimization to leaf functions. */ +- aarch64_add_sp (tmp0_rtx, tmp1_rtx, initial_adjust, ++ aarch64_add_sp (tmp0_rtx, tmp1_rtx, initial_adjust, force_isa_mode, + (!can_inherit_p || !crtl->is_leaf + || df_regs_ever_live_p (EP0_REGNUM))); + +@@ -11914,7 +12067,8 @@ aarch64_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED, + temp1 = gen_rtx_REG (Pmode, EP1_REGNUM); + + if (vcall_offset == 0) +- aarch64_add_offset (Pmode, this_rtx, this_rtx, delta, temp1, temp0, false); ++ aarch64_add_offset (Pmode, this_rtx, this_rtx, delta, temp1, temp0, ++ 0, false); + else + { + gcc_assert ((vcall_offset & (POINTER_BYTES - 1)) == 0); +@@ -11927,7 +12081,7 @@ aarch64_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED, + plus_constant (Pmode, this_rtx, delta)); + else + aarch64_add_offset (Pmode, this_rtx, this_rtx, delta, +- temp1, temp0, false); ++ temp1, temp0, 0, false); + } + + if (Pmode == ptr_mode) +@@ -30962,6 +31116,9 @@ aarch64_libgcc_floating_mode_supported_p + #undef TARGET_EXTRA_LIVE_ON_ENTRY + #define TARGET_EXTRA_LIVE_ON_ENTRY aarch64_extra_live_on_entry + ++#undef TARGET_USE_LATE_PROLOGUE_EPILOGUE ++#define TARGET_USE_LATE_PROLOGUE_EPILOGUE aarch64_use_late_prologue_epilogue ++ + #undef TARGET_EMIT_EPILOGUE_FOR_SIBCALL + #define TARGET_EMIT_EPILOGUE_FOR_SIBCALL aarch64_expand_epilogue + +diff --git a/gcc/testsuite/g++.target/aarch64/sme/keyword_macros_1.C b/gcc/testsuite/g++.target/aarch64/sme/keyword_macros_1.C +index 8b0755014..dc5c097bd 100644 +--- a/gcc/testsuite/g++.target/aarch64/sme/keyword_macros_1.C ++++ b/gcc/testsuite/g++.target/aarch64/sme/keyword_macros_1.C +@@ -7,3 +7,4 @@ void f4 () __arm_out("za"); + void f5 () __arm_inout("za"); + void f6 () __arm_preserves("za"); + __arm_new("za") void f7 () {} ++__arm_locally_streaming void f8 () {} +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/keyword_macros_1.c b/gcc/testsuite/gcc.target/aarch64/sme/keyword_macros_1.c +index fcabe3edc..22f5facfd 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sme/keyword_macros_1.c ++++ b/gcc/testsuite/gcc.target/aarch64/sme/keyword_macros_1.c +@@ -7,3 +7,4 @@ void f4 () __arm_out("za"); + void f5 () __arm_inout("za"); + void f6 () __arm_preserves("za"); + __arm_new("za") void f7 () {} ++__arm_locally_streaming void f8 () {} +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/locally_streaming_1.c b/gcc/testsuite/gcc.target/aarch64/sme/locally_streaming_1.c +new file mode 100644 +index 000000000..20ff4b87d +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sme/locally_streaming_1.c +@@ -0,0 +1,466 @@ ++// { dg-options "-O -fomit-frame-pointer" } ++// { dg-final { check-function-bodies "**" "" } } ++ ++void consume_za () arm::streaming, arm::inout("za"); ++ ++/* ++** n_ls: ++** sub sp, sp, #?80 ++** cntd x16 ++** str x16, \sp\ ++** stp d8, d9, \sp, #?16\ ++** stp d10, d11, \sp, #?32\ ++** stp d12, d13, \sp, #?48\ ++** stp d14, d15, \sp, #?64\ ++** smstart sm ++** smstop sm ++** ldp d8, d9, \sp, #?16\ ++** ldp d10, d11, \sp, #?32\ ++** ldp d12, d13, \sp, #?48\ ++** ldp d14, d15, \sp, #?64\ ++** add sp, sp, #?80 ++** ret ++*/ ++arm::locally_streaming void ++n_ls () ++{ ++ asm (""); ++} ++ ++/* ++** s_ls: ++** ret ++*/ ++arm::locally_streaming void ++s_ls () arm::streaming ++{ ++ asm (""); ++} ++ ++/* ++** sc_ls: ++** stp x29, x30, \sp, #?-96\! ++** mov x29, sp ++** cntd x16 ++** str x16, \sp, #?24\ ++** stp d8, d9, \sp, #?32\ ++** stp d10, d11, \sp, #?48\ ++** stp d12, d13, \sp, #?64\ ++** stp d14, d15, \sp, #?80\ ++** mrs x16, svcr ++** str x16, \x29, #?16\ ++** tbnz x16, 0, ^\n+ ++** smstart sm ++** ldr x16, \x29, #?16\ ++** tbnz x16, 0, ^\n+ ++** smstop sm ++** ldp d8, d9, \sp, #?32\ ++** ldp d10, d11, \sp, #?48\ ++** ldp d12, d13, \sp, #?64\ ++** ldp d14, d15, \sp, #?80\ ++** ldp x29, x30, \sp\, #?96 ++** ret ++*/ ++arm::locally_streaming void ++sc_ls () arm::streaming_compatible ++{ ++ asm (""); ++} ++ ++/* ++** n_ls_new_za: ++** str x30, \sp, #?-80\! ++** cntd x16 ++** str x16, \sp, #?8\ ++** stp d8, d9, \sp, #?16\ ++** stp d10, d11, \sp, #?32\ ++** stp d12, d13, \sp, #?48\ ++** stp d14, d15, \sp, #?64\ ++** smstart sm ++** mrs (x0-9+), tpidr2_el0 ++** cbz \1, ^\n+ ++** bl __arm_tpidr2_save ++** msr tpidr2_el0, xzr ++** zero { za } ++** smstart za ++** bl consume_za ++** smstop za ++** smstop sm ++** ldp d8, d9, \sp, #?16\ ++** ldp d10, d11, \sp, #?32\ ++** ldp d12, d13, \sp, #?48\ ++** ldp d14, d15, \sp, #?64\ ++** ldr x30, \sp\, #?80 ++** ret ++*/ ++arm::locally_streaming, arm::new("za") void ++n_ls_new_za () ++{ ++ consume_za (); ++ asm (""); ++} ++ ++/* ++** s_ls_new_za: ++** str x30, \sp, #?-16\! ++** mrs (x0-9+), tpidr2_el0 ++** cbz \1, ^\n+ ++** bl __arm_tpidr2_save ++** msr tpidr2_el0, xzr ++** zero { za } ++** smstart za ++** bl consume_za ++** smstop za ++** ldr x30, \sp\, #?16 ++** ret ++*/ ++arm::locally_streaming, arm::new("za") void ++s_ls_new_za () arm::streaming ++{ ++ consume_za (); ++ asm (""); ++} ++ ++/* ++** sc_ls_new_za: ++** stp x29, x30, \sp, #?-96\! ++** mov x29, sp ++** cntd x16 ++** str x16, \sp, #?24\ ++** stp d8, d9, \sp, #?32\ ++** stp d10, d11, \sp, #?48\ ++** stp d12, d13, \sp, #?64\ ++** stp d14, d15, \sp, #?80\ ++** mrs x16, svcr ++** str x16, \x29, #?16\ ++** tbnz x16, 0, ^\n+ ++** smstart sm ++** mrs (x0-9+), tpidr2_el0 ++** cbz \1, ^\n+ ++** bl __arm_tpidr2_save ++** msr tpidr2_el0, xzr ++** zero { za } ++** smstart za ++** bl consume_za ++** smstop za ++** ldr x16, \x29, #?16\ ++** tbnz x16, 0, ^\n+ ++** smstop sm ++** ldp d8, d9, \sp, #?32\ ++** ldp d10, d11, \sp, #?48\ ++** ldp d12, d13, \sp, #?64\ ++** ldp d14, d15, \sp, #?80\ ++** ldp x29, x30, \sp\, #?96 ++** ret ++*/ ++arm::locally_streaming, arm::new("za") void ++sc_ls_new_za () arm::streaming_compatible ++{ ++ consume_za (); ++ asm (""); ++} ++ ++/* ++** n_ls_shared_za: ++** str x30, \sp, #?-80\! ++** cntd x16 ++** str x16, \sp, #?8\ ++** stp d8, d9, \sp, #?16\ ++** stp d10, d11, \sp, #?32\ ++** stp d12, d13, \sp, #?48\ ++** stp d14, d15, \sp, #?64\ ++** smstart sm ++** bl consume_za ++** smstop sm ++** ldp d8, d9, \sp, #?16\ ++** ldp d10, d11, \sp, #?32\ ++** ldp d12, d13, \sp, #?48\ ++** ldp d14, d15, \sp, #?64\ ++** ldr x30, \sp\, #?80 ++** ret ++*/ ++arm::locally_streaming void ++n_ls_shared_za () arm::inout("za") ++{ ++ consume_za (); ++ asm (""); ++} ++ ++/* ++** s_ls_shared_za: ++** str x30, \sp, #?-16\! ++** bl consume_za ++** ldr x30, \sp\, #?16 ++** ret ++*/ ++arm::locally_streaming void ++s_ls_shared_za () arm::streaming, arm::inout("za") ++{ ++ consume_za (); ++ asm (""); ++} ++ ++/* ++** sc_ls_shared_za: ++** stp x29, x30, \sp, #?-96\! ++** mov x29, sp ++** cntd x16 ++** str x16, \sp, #?24\ ++** stp d8, d9, \sp, #?32\ ++** stp d10, d11, \sp, #?48\ ++** stp d12, d13, \sp, #?64\ ++** stp d14, d15, \sp, #?80\ ++** mrs x16, svcr ++** str x16, \x29, #?16\ ++** tbnz x16, 0, ^\n+ ++** smstart sm ++** bl consume_za ++** ldr x16, \x29, #?16\ ++** tbnz x16, 0, ^\n+ ++** smstop sm ++** ldp d8, d9, \sp, #?32\ ++** ldp d10, d11, \sp, #?48\ ++** ldp d12, d13, \sp, #?64\ ++** ldp d14, d15, \sp, #?80\ ++** ldp x29, x30, \sp\, #?96 ++** ret ++*/ ++arm::locally_streaming void ++sc_ls_shared_za () arm::streaming_compatible, arm::inout("za") ++{ ++ consume_za (); ++ asm (""); ++} ++ ++/* ++** n_ls_vector_pcs: ++** sub sp, sp, #?272 ++** cntd x16 ++** str x16, \sp\ ++** stp q8, q9, \sp, #?16\ ++** stp q10, q11, \sp, #?48\ ++** stp q12, q13, \sp, #?80\ ++** stp q14, q15, \sp, #?112\ ++** stp q16, q17, \sp, #?144\ ++** stp q18, q19, \sp, #?176\ ++** stp q20, q21, \sp, #?208\ ++** stp q22, q23, \sp, #?240\ ++** smstart sm ++** smstop sm ++** ldp q8, q9, \sp, #?16\ ++** ldp q10, q11, \sp, #?48\ ++** ldp q12, q13, \sp, #?80\ ++** ldp q14, q15, \sp, #?112\ ++** ldp q16, q17, \sp, #?144\ ++** ldp q18, q19, \sp, #?176\ ++** ldp q20, q21, \sp, #?208\ ++** ldp q22, q23, \sp, #?240\ ++** add sp, sp, #?272 ++** ret ++*/ ++arm::locally_streaming void __attribute__((aarch64_vector_pcs)) ++n_ls_vector_pcs () ++{ ++ asm (""); ++} ++ ++/* ++** n_ls_sve_pcs: ++** sub sp, sp, #?16 ++** cntd x16 ++** str x16, \sp\ ++** addsvl sp, sp, #-18 ++** str p4, \sp\ ++** str p5, \sp, #1, mul vl\ ++** str p6, \sp, #2, mul vl\ ++** str p7, \sp, #3, mul vl\ ++** str p8, \sp, #4, mul vl\ ++** str p9, \sp, #5, mul vl\ ++** str p10, \sp, #6, mul vl\ ++** str p11, \sp, #7, mul vl\ ++** str p12, \sp, #8, mul vl\ ++** str p13, \sp, #9, mul vl\ ++** str p14, \sp, #10, mul vl\ ++** str p15, \sp, #11, mul vl\ ++** str z8, \sp, #2, mul vl\ ++** str z9, \sp, #3, mul vl\ ++** str z10, \sp, #4, mul vl\ ++** str z11, \sp, #5, mul vl\ ++** str z12, \sp, #6, mul vl\ ++** str z13, \sp, #7, mul vl\ ++** str z14, \sp, #8, mul vl\ ++** str z15, \sp, #9, mul vl\ ++** str z16, \sp, #10, mul vl\ ++** str z17, \sp, #11, mul vl\ ++** str z18, \sp, #12, mul vl\ ++** str z19, \sp, #13, mul vl\ ++** str z20, \sp, #14, mul vl\ ++** str z21, \sp, #15, mul vl\ ++** str z22, \sp, #16, mul vl\ ++** str z23, \sp, #17, mul vl\ ++** addvl sp, sp, #-1 ++** str p0, \sp\ ++** smstart sm ++** ldr p0, \sp\ ++** addvl sp, sp, #1 ++** smstop sm ++** ldr z8, \sp, #2, mul vl\ ++** ldr z9, \sp, #3, mul vl\ ++** ldr z10, \sp, #4, mul vl\ ++** ldr z11, \sp, #5, mul vl\ ++** ldr z12, \sp, #6, mul vl\ ++** ldr z13, \sp, #7, mul vl\ ++** ldr z14, \sp, #8, mul vl\ ++** ldr z15, \sp, #9, mul vl\ ++** ldr z16, \sp, #10, mul vl\ ++** ldr z17, \sp, #11, mul vl\ ++** ldr z18, \sp, #12, mul vl\ ++** ldr z19, \sp, #13, mul vl\ ++** ldr z20, \sp, #14, mul vl\ ++** ldr z21, \sp, #15, mul vl\ ++** ldr z22, \sp, #16, mul vl\ ++** ldr z23, \sp, #17, mul vl\ ++** ldr p4, \sp\ ++** ldr p5, \sp, #1, mul vl\ ++** ldr p6, \sp, #2, mul vl\ ++** ldr p7, \sp, #3, mul vl\ ++** ldr p8, \sp, #4, mul vl\ ++** ldr p9, \sp, #5, mul vl\ ++** ldr p10, \sp, #6, mul vl\ ++** ldr p11, \sp, #7, mul vl\ ++** ldr p12, \sp, #8, mul vl\ ++** ldr p13, \sp, #9, mul vl\ ++** ldr p14, \sp, #10, mul vl\ ++** ldr p15, \sp, #11, mul vl\ ++** addsvl sp, sp, #18 ++** add sp, sp, #?16 ++** ret ++*/ ++arm::locally_streaming void ++n_ls_sve_pcs (__SVBool_t x) ++{ ++ asm (""); ++} ++ ++/* ++** n_ls_v0: ++** addsvl sp, sp, #-1 ++** ... ++** smstart sm ++** add x0-9+, ^\n+ ++** smstop sm ++** ... ++** addsvl sp, sp, #1 ++** ... ++*/ ++#define TEST(VN) __SVInt32_t VN; asm ("" :: "r" (&VN)); ++arm::locally_streaming void ++n_ls_v0 () ++{ ++ TEST (v0); ++} ++ ++/* ++** n_ls_v32: ++** addsvl sp, sp, #-32 ++** ... ++** smstart sm ++** ... ++** smstop sm ++** ... ++** rdsvl (x0-9+), #1 ++** lsl (x0-9+), \1, #?5 ++** add sp, sp, \2 ++** ... ++*/ ++arm::locally_streaming void ++n_ls_v32 () ++{ ++ TEST (v0); ++ TEST (v1); ++ TEST (v2); ++ TEST (v3); ++ TEST (v4); ++ TEST (v5); ++ TEST (v6); ++ TEST (v7); ++ TEST (v8); ++ TEST (v9); ++ TEST (v10); ++ TEST (v11); ++ TEST (v12); ++ TEST (v13); ++ TEST (v14); ++ TEST (v15); ++ TEST (v16); ++ TEST (v17); ++ TEST (v18); ++ TEST (v19); ++ TEST (v20); ++ TEST (v21); ++ TEST (v22); ++ TEST (v23); ++ TEST (v24); ++ TEST (v25); ++ TEST (v26); ++ TEST (v27); ++ TEST (v28); ++ TEST (v29); ++ TEST (v30); ++ TEST (v31); ++} ++ ++/* ++** n_ls_v33: ++** rdsvl (x0-9+), #1 ++** mov (x0-9+), #?33 ++** mul (x0-9+), (?:\1, \2|\2, \1) ++** sub sp, sp, \3 ++** ... ++** smstart sm ++** ... ++** smstop sm ++** ... ++** rdsvl (x0-9+), #1 ++** mov (x0-9+), #?33 ++** mul (x0-9+), (?:\4, \5|\5, \4) ++** add sp, sp, \6 ++** ... ++*/ ++arm::locally_streaming void ++n_ls_v33 () ++{ ++ TEST (v0); ++ TEST (v1); ++ TEST (v2); ++ TEST (v3); ++ TEST (v4); ++ TEST (v5); ++ TEST (v6); ++ TEST (v7); ++ TEST (v8); ++ TEST (v9); ++ TEST (v10); ++ TEST (v11); ++ TEST (v12); ++ TEST (v13); ++ TEST (v14); ++ TEST (v15); ++ TEST (v16); ++ TEST (v17); ++ TEST (v18); ++ TEST (v19); ++ TEST (v20); ++ TEST (v21); ++ TEST (v22); ++ TEST (v23); ++ TEST (v24); ++ TEST (v25); ++ TEST (v26); ++ TEST (v27); ++ TEST (v28); ++ TEST (v29); ++ TEST (v30); ++ TEST (v31); ++ TEST (v32); ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/locally_streaming_2.c b/gcc/testsuite/gcc.target/aarch64/sme/locally_streaming_2.c +new file mode 100644 +index 000000000..0eba99385 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sme/locally_streaming_2.c +@@ -0,0 +1,177 @@ ++// { dg-options "-O -fomit-frame-pointer" } ++// { dg-final { check-function-bodies "**" "" } } ++ ++#include <arm_neon.h> ++#include <arm_sve.h> ++ ++/* ++** test_d0: ++** ... ++** smstart sm ++** ... ++** fmov x10, d0 ++** smstop sm ++** fmov d0, x10 ++** ... ++*/ ++arm::locally_streaming double ++test_d0 () ++{ ++ asm (""); ++ return 1.0f; ++} ++ ++/* ++** test_d0_vec: ++** ... ++** smstart sm ++** ... ++** ( ++** fmov x10, d0 ++** | ++** umov x10, v0.d\0\ ++** ) ++** smstop sm ++** fmov d0, x10 ++** ... ++*/ ++arm::locally_streaming int8x8_t ++test_d0_vec () ++{ ++ asm (""); ++ return (int8x8_t) {}; ++} ++ ++/* ++** test_q0: ++** ... ++** smstart sm ++** ... ++** str q0, \sp, #?-16\! ++** smstop sm ++** ldr q0, \sp\, #?16 ++** ... ++*/ ++arm::locally_streaming int8x16_t ++test_q0 () ++{ ++ asm (""); ++ return (int8x16_t) {}; ++} ++ ++/* ++** test_q1: ++** ... ++** smstart sm ++** ... ++** stp q0, q1, \sp, #?-32\! ++** smstop sm ++** ldp q0, q1, \sp\, #?32 ++** ... ++*/ ++arm::locally_streaming int8x16x2_t ++test_q1 () ++{ ++ asm (""); ++ return (int8x16x2_t) {}; ++} ++ ++/* ++** test_q2: ++** ... ++** smstart sm ++** ... ++** stp q0, q1, \sp, #?-48\! ++** str q2, \sp, #?32\ ++** smstop sm ++** ldr q2, \sp, #?32\ ++** ldp q0, q1, \sp\, #?48 ++** ... ++*/ ++arm::locally_streaming int8x16x3_t ++test_q2 () ++{ ++ asm (""); ++ return (int8x16x3_t) {}; ++} ++ ++/* ++** test_q3: ++** ... ++** smstart sm ++** ... ++** stp q0, q1, \sp, #?-64\! ++** stp q2, q3, \sp, #?32\ ++** smstop sm ++** ldp q2, q3, \sp, #?32\ ++** ldp q0, q1, \sp\, #?64 ++** ... ++*/ ++arm::locally_streaming int8x16x4_t ++test_q3 () ++{ ++ asm (""); ++ return (int8x16x4_t) {}; ++} ++ ++/* ++** test_z0: ++** ... ++** smstart sm ++** mov z0\.b, #0 ++** addvl sp, sp, #-1 ++** str z0, \sp\ ++** smstop sm ++** ldr z0, \sp\ ++** addvl sp, sp, #1 ++** ... ++*/ ++arm::locally_streaming svint8_t ++test_z0 () ++{ ++ asm (""); ++ return (svint8_t) {}; ++} ++ ++/* ++** test_z3: ++** ... ++** smstart sm ++** ... ++** addvl sp, sp, #-4 ++** str z0, \sp\ ++** str z1, \sp, #1, mul vl\ ++** str z2, \sp, #2, mul vl\ ++** str z3, \sp, #3, mul vl\ ++** smstop sm ++** ldr z0, \sp\ ++** ldr z1, \sp, #1, mul vl\ ++** ldr z2, \sp, #2, mul vl\ ++** ldr z3, \sp, #3, mul vl\ ++** ... ++*/ ++arm::locally_streaming svint8x4_t ++test_z3 () ++{ ++ asm (""); ++ return (svint8x4_t) {}; ++} ++ ++/* ++** test_p0: ++** ... ++** smstart sm ++** pfalse p0\.b ++** addvl sp, sp, #-1 ++** str p0, \sp\ ++** smstop sm ++** ldr p0, \sp\ ++** addvl sp, sp, #1 ++** ... ++*/ ++arm::locally_streaming svbool_t ++test_p0 () ++{ ++ asm (""); ++ return (svbool_t) {}; ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/locally_streaming_3.c b/gcc/testsuite/gcc.target/aarch64/sme/locally_streaming_3.c +new file mode 100644 +index 000000000..2bdea6ac6 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sme/locally_streaming_3.c +@@ -0,0 +1,273 @@ ++// { dg-options "-O -fomit-frame-pointer" } ++// { dg-final { check-function-bodies "**" "" } } ++ ++#include <arm_neon.h> ++#include <arm_sve.h> ++ ++/* ++** test_d0: ++** ... ++** fmov x10, d0 ++** smstart sm ++** fmov d0, x10 ++** smstop sm ++** ... ++*/ ++arm::locally_streaming void ++test_d0 (double d0) ++{ ++ asm (""); ++} ++ ++/* ++** test_d7: ++** ... ++** fmov x10, d0 ++** fmov x11, d1 ++** fmov x12, d2 ++** fmov x13, d3 ++** fmov x14, d4 ++** fmov x15, d5 ++** fmov x16, d6 ++** fmov x17, d7 ++** smstart sm ++** fmov d0, x10 ++** fmov d1, x11 ++** fmov d2, x12 ++** fmov d3, x13 ++** fmov d4, x14 ++** fmov d5, x15 ++** fmov d6, x16 ++** fmov d7, x17 ++** smstop sm ++** ... ++*/ ++arm::locally_streaming void ++test_d7 (double d0, double d1, double d2, double d3, ++ double d4, double d5, double d6, double d7) ++{ ++ asm (""); ++} ++ ++/* ++** test_d0_vec: ++** ... ++** ( ++** fmov x10, d0 ++** | ++** umov x10, v0.d\0\ ++** ) ++** smstart sm ++** fmov d0, x10 ++** smstop sm ++** ... ++*/ ++arm::locally_streaming void ++test_d0_vec (int8x8_t d0) ++{ ++ asm (""); ++} ++ ++/* ++** test_d7_vec: ++** ... ++** ( ++** fmov x10, d0 ++** fmov x11, d1 ++** fmov x12, d2 ++** fmov x13, d3 ++** fmov x14, d4 ++** fmov x15, d5 ++** fmov x16, d6 ++** fmov x17, d7 ++** | ++** umov x10, v0.d\0\ ++** umov x11, v1.d\0\ ++** umov x12, v2.d\0\ ++** umov x13, v3.d\0\ ++** umov x14, v4.d\0\ ++** umov x15, v5.d\0\ ++** umov x16, v6.d\0\ ++** umov x17, v7.d\0\ ++** ) ++** smstart sm ++** fmov d0, x10 ++** fmov d1, x11 ++** fmov d2, x12 ++** fmov d3, x13 ++** fmov d4, x14 ++** fmov d5, x15 ++** fmov d6, x16 ++** fmov d7, x17 ++** smstop sm ++** ... ++*/ ++arm::locally_streaming void ++test_d7_vec (int8x8_t d0, int8x8_t d1, int8x8_t d2, int8x8_t d3, ++ int8x8_t d4, int8x8_t d5, int8x8_t d6, int8x8_t d7) ++{ ++ asm (""); ++} ++ ++/* ++** test_q0: ++** ... ++** str q0, \sp, #?-16\! ++** smstart sm ++** ldr q0, \sp\, #?16 ++** smstop sm ++** ... ++*/ ++arm::locally_streaming void ++test_q0 (int8x16_t q0) ++{ ++ asm (""); ++} ++ ++/* ++** test_q7: ++** ... ++** stp q0, q1, \sp, #?-128\! ++** stp q2, q3, \sp, #?32\ ++** stp q4, q5, \sp, #?64\ ++** stp q6, q7, \sp, #?96\ ++** smstart sm ++** ldp q2, q3, \sp, #?32\ ++** ldp q4, q5, \sp, #?64\ ++** ldp q6, q7, \sp, #?96\ ++** ldp q0, q1, \sp\, #?128 ++** smstop sm ++** ... ++*/ ++arm::locally_streaming void ++test_q7 (int8x16x4_t q0, int8x16x4_t q4) ++{ ++ asm (""); ++} ++ ++/* ++** test_z0: ++** ... ++** addvl sp, sp, #-1 ++** str z0, \sp\ ++** smstart sm ++** ldr z0, \sp\ ++** addvl sp, sp, #1 ++** smstop sm ++** ... ++*/ ++arm::locally_streaming void ++test_z0 (svint8_t z0) ++{ ++ asm (""); ++} ++ ++/* ++** test_z7: ++** ... ++** addvl sp, sp, #-8 ++** str z0, \sp\ ++** str z1, \sp, #1, mul vl\ ++** str z2, \sp, #2, mul vl\ ++** str z3, \sp, #3, mul vl\ ++** str z4, \sp, #4, mul vl\ ++** str z5, \sp, #5, mul vl\ ++** str z6, \sp, #6, mul vl\ ++** str z7, \sp, #7, mul vl\ ++** smstart sm ++** ldr z0, \sp\ ++** ldr z1, \sp, #1, mul vl\ ++** ldr z2, \sp, #2, mul vl\ ++** ldr z3, \sp, #3, mul vl\ ++** ldr z4, \sp, #4, mul vl\ ++** ldr z5, \sp, #5, mul vl\ ++** ldr z6, \sp, #6, mul vl\ ++** ldr z7, \sp, #7, mul vl\ ++** addvl sp, sp, #8 ++** smstop sm ++** ... ++*/ ++arm::locally_streaming void ++test_z7 (svint8x4_t z0, svint8x4_t z4) ++{ ++ asm (""); ++} ++ ++/* ++** test_p0: ++** ... ++** addvl sp, sp, #-1 ++** str p0, \sp\ ++** smstart sm ++** ldr p0, \sp\ ++** addvl sp, sp, #1 ++** smstop sm ++** ... ++*/ ++arm::locally_streaming void ++test_p0 (svbool_t p0) ++{ ++ asm (""); ++} ++ ++/* ++** test_p3: ++** ... ++** addvl sp, sp, #-1 ++** str p0, \sp\ ++** str p1, \sp, #1, mul vl\ ++** str p2, \sp, #2, mul vl\ ++** str p3, \sp, #3, mul vl\ ++** smstart sm ++** ldr p0, \sp\ ++** ldr p1, \sp, #1, mul vl\ ++** ldr p2, \sp, #2, mul vl\ ++** ldr p3, \sp, #3, mul vl\ ++** addvl sp, sp, #1 ++** smstop sm ++** ... ++*/ ++arm::locally_streaming void ++test_p3 (svbool_t p0, svbool_t p1, svbool_t p2, svbool_t p3) ++{ ++ asm (""); ++} ++ ++/* ++** test_mixed: ++** ... ++** addvl sp, sp, #-3 ++** str p0, \sp\ ++** str p1, \sp, #1, mul vl\ ++** str p2, \sp, #2, mul vl\ ++** str p3, \sp, #3, mul vl\ ++** str z3, \sp, #1, mul vl\ ++** str z7, \sp, #2, mul vl\ ++** stp q2, q6, \sp, #?-32\! ++** fmov w10, s0 ++** fmov x11, d1 ++** fmov w12, s4 ++** fmov x13, d5 ++** smstart sm ++** fmov s0, w10 ++** fmov d1, x11 ++** fmov s4, w12 ++** fmov d5, x13 ++** ldp q2, q6, \sp\, #?32 ++** ldr p0, \sp\ ++** ldr p1, \sp, #1, mul vl\ ++** ldr p2, \sp, #2, mul vl\ ++** ldr p3, \sp, #3, mul vl\ ++** ldr z3, \sp, #1, mul vl\ ++** ldr z7, \sp, #2, mul vl\ ++** addvl sp, sp, #3 ++** smstop sm ++** ... ++*/ ++arm::locally_streaming void ++test_mixed (float s0, double d1, float32x4_t q2, svfloat32_t z3, ++ float s4, double d5, float64x2_t q6, svfloat64_t z7, ++ svbool_t p0, svbool_t p1, svbool_t p2, svbool_t p3) ++{ ++ asm (""); ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/locally_streaming_4.c b/gcc/testsuite/gcc.target/aarch64/sme/locally_streaming_4.c +new file mode 100644 +index 000000000..42adeb152 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sme/locally_streaming_4.c +@@ -0,0 +1,145 @@ ++// { dg-options "-O -fomit-frame-pointer" } ++/* { dg-final { check-function-bodies "**" "" } } */ ++ ++#include <arm_neon.h> ++#include <arm_sve.h> ++ ++/* ++** test_d0: ++** ... ++** smstart sm ++** ... ++** fmov x10, d0 ++** smstop sm ++** fmov d0, x10 ++** ... ++** smstart sm ++** ... ++** smstop sm ++** ... ++*/ ++void consume_d0 (double d0); ++ ++__arm_locally_streaming void ++test_d0 () ++{ ++ asm (""); ++ consume_d0 (1.0); ++ asm (""); ++} ++ ++/* ++** test_d7: ++** ... ++** fmov x10, d0 ++** fmov x11, d1 ++** fmov x12, d2 ++** fmov x13, d3 ++** fmov x14, d4 ++** fmov x15, d5 ++** fmov x16, d6 ++** fmov x17, d7 ++** smstop sm ++** fmov d0, x10 ++** fmov d1, x11 ++** fmov d2, x12 ++** fmov d3, x13 ++** fmov d4, x14 ++** fmov d5, x15 ++** fmov d6, x16 ++** fmov d7, x17 ++** ... ++*/ ++void consume_d7 (double d0, double d1, double d2, double d3, ++ double d4, double d5, double d6, double d7); ++__arm_locally_streaming void ++test_d7 () ++{ ++ asm (""); ++ consume_d7 (1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); ++ asm (""); ++} ++ ++/* ++** test_q7: ++** ... ++** stp q0, q1, \sp, #?-128\! ++** stp q2, q3, \sp, #?32\ ++** stp q4, q5, \sp, #?64\ ++** stp q6, q7, \sp, #?96\ ++** smstop sm ++** ldp q2, q3, \sp, #?32\ ++** ldp q4, q5, \sp, #?64\ ++** ldp q6, q7, \sp, #?96\ ++** ldp q0, q1, \sp\, #?128 ++** ... ++*/ ++void consume_q7 (int8x16x4_t q0, int8x16x4_t q4); ++ ++__arm_locally_streaming void ++test_q7 (int8x16x4_t *ptr) ++{ ++ asm (""); ++ consume_q7 (ptr0, ptr1); ++ asm (""); ++} ++ ++/* ++** test_z7: ++** ... ++** addvl sp, sp, #-8 ++** str z0, \sp\ ++** str z1, \sp, #1, mul vl\ ++** str z2, \sp, #2, mul vl\ ++** str z3, \sp, #3, mul vl\ ++** str z4, \sp, #4, mul vl\ ++** str z5, \sp, #5, mul vl\ ++** str z6, \sp, #6, mul vl\ ++** str z7, \sp, #7, mul vl\ ++** smstop sm ++** ldr z0, \sp\ ++** ldr z1, \sp, #1, mul vl\ ++** ldr z2, \sp, #2, mul vl\ ++** ldr z3, \sp, #3, mul vl\ ++** ldr z4, \sp, #4, mul vl\ ++** ldr z5, \sp, #5, mul vl\ ++** ldr z6, \sp, #6, mul vl\ ++** ldr z7, \sp, #7, mul vl\ ++** addvl sp, sp, #8 ++** ... ++*/ ++void consume_z7 (svint8x4_t z0, svint8x4_t z4); ++ ++__arm_locally_streaming void ++test_z7 (svint8x4_t *ptr1, svint8x4_t *ptr2) ++{ ++ asm (""); ++ consume_z7 (*ptr1, *ptr2); ++ asm (""); ++} ++ ++/* ++** test_p3: ++** ... ++** addvl sp, sp, #-1 ++** str p0, \sp\ ++** str p1, \sp, #1, mul vl\ ++** str p2, \sp, #2, mul vl\ ++** str p3, \sp, #3, mul vl\ ++** smstop sm ++** ldr p0, \sp\ ++** ldr p1, \sp, #1, mul vl\ ++** ldr p2, \sp, #2, mul vl\ ++** ldr p3, \sp, #3, mul vl\ ++** addvl sp, sp, #1 ++** ... ++*/ ++void consume_p3 (svbool_t p0, svbool_t p1, svbool_t p2, svbool_t p3); ++ ++__arm_locally_streaming void ++test_p3 (svbool_t *ptr1, svbool_t *ptr2, svbool_t *ptr3, svbool_t *ptr4) ++{ ++ asm (""); ++ consume_p3 (*ptr1, *ptr2, *ptr3, *ptr4); ++ asm (""); ++} +-- +2.33.0 +
View file
_service:tar_scm:0213-Backport-SME-aarch64-Handle-PSTATE.SM-across-abnorma.patch
Added
@@ -0,0 +1,708 @@ +From ef9c800309fa326ca56dd9d9affd7d5498624bb8 Mon Sep 17 00:00:00 2001 +From: Richard Sandiford <richard.sandiford@arm.com> +Date: Tue, 5 Dec 2023 10:11:29 +0000 +Subject: PATCH 114/157 BackportSME aarch64: Handle PSTATE.SM across + abnormal edges + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=275706fc59b5fdcc26d46d9f19951fc86b40c515 + +PSTATE.SM is always off on entry to an exception handler, and on entry +to a nonlocal goto receiver. Those entry points need to switch +PSTATE.SM back to the appropriate state for the current function. +In the case of streaming-compatible functions, they need to restore +the mode that the caller was originally using. + +The requirement on nonlocal goto receivers means that nonlocal +jumps need to ensure that PSTATE.SM is zero. + +gcc/ + * config/aarch64/aarch64.cc: Include except.h + (aarch64_sme_mode_switch_regs::add_call_preserved_reg): New function. + (aarch64_sme_mode_switch_regs::add_call_preserved_regs): Likewise. + (aarch64_need_old_pstate_sm): Return true if the function has + a nonlocal-goto or exception receiver. + (aarch64_switch_pstate_sm_for_landing_pad): New function. + (aarch64_switch_pstate_sm_for_jump): Likewise. + (pass_switch_pstate_sm::gate): Enable the pass for all + streaming and streaming-compatible functions. + (pass_switch_pstate_sm::execute): Handle non-local gotos and their + receivers. Handle exception handler entry points. + +gcc/testsuite/ + * g++.target/aarch64/sme/exceptions_2.C: New test. + * gcc.target/aarch64/sme/nonlocal_goto_1.c: Likewise. + * gcc.target/aarch64/sme/nonlocal_goto_2.c: Likewise. + * gcc.target/aarch64/sme/nonlocal_goto_3.c: Likewise. + * gcc.target/aarch64/sme/nonlocal_goto_4.c: Likewise. + * gcc.target/aarch64/sme/nonlocal_goto_5.c: Likewise. + * gcc.target/aarch64/sme/nonlocal_goto_6.c: Likewise. + * gcc.target/aarch64/sme/nonlocal_goto_7.c: Likewise. +--- + gcc/config/aarch64/aarch64.cc | 141 ++++++++++++++++- + .../g++.target/aarch64/sme/exceptions_2.C | 148 ++++++++++++++++++ + .../gcc.target/aarch64/sme/nonlocal_goto_1.c | 58 +++++++ + .../gcc.target/aarch64/sme/nonlocal_goto_2.c | 44 ++++++ + .../gcc.target/aarch64/sme/nonlocal_goto_3.c | 46 ++++++ + .../gcc.target/aarch64/sme/nonlocal_goto_4.c | 25 +++ + .../gcc.target/aarch64/sme/nonlocal_goto_5.c | 26 +++ + .../gcc.target/aarch64/sme/nonlocal_goto_6.c | 31 ++++ + .../gcc.target/aarch64/sme/nonlocal_goto_7.c | 25 +++ + 9 files changed, 537 insertions(+), 7 deletions(-) + create mode 100644 gcc/testsuite/g++.target/aarch64/sme/exceptions_2.C + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/nonlocal_goto_1.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/nonlocal_goto_2.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/nonlocal_goto_3.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/nonlocal_goto_4.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/nonlocal_goto_5.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/nonlocal_goto_6.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/nonlocal_goto_7.c + +diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc +index 4cb43c2e2..effb567c2 100644 +--- a/gcc/config/aarch64/aarch64.cc ++++ b/gcc/config/aarch64/aarch64.cc +@@ -82,6 +82,7 @@ + #include "tree-dfa.h" + #include "asan.h" + #include "aarch64-feature-deps.h" ++#include "except.h" + #include "tree-pass.h" + #include "cfgbuild.h" + +@@ -7295,6 +7296,8 @@ public: + void add_reg (machine_mode, unsigned int); + void add_call_args (rtx_call_insn *); + void add_call_result (rtx_call_insn *); ++ void add_call_preserved_reg (unsigned int); ++ void add_call_preserved_regs (bitmap); + + void emit_prologue (); + void emit_epilogue (); +@@ -7427,6 +7430,46 @@ aarch64_sme_mode_switch_regs::add_call_result (rtx_call_insn *call_insn) + add_reg (GET_MODE (dest), REGNO (dest)); + } + ++/* REGNO is a register that is call-preserved under the current function's ABI. ++ Record that it must be preserved around the mode switch. */ ++ ++void ++aarch64_sme_mode_switch_regs::add_call_preserved_reg (unsigned int regno) ++{ ++ if (FP_REGNUM_P (regno)) ++ switch (crtl->abi->id ()) ++ { ++ case ARM_PCS_SVE: ++ add_reg (VNx16QImode, regno); ++ break; ++ case ARM_PCS_SIMD: ++ add_reg (V16QImode, regno); ++ break; ++ case ARM_PCS_AAPCS64: ++ add_reg (DImode, regno); ++ break; ++ default: ++ gcc_unreachable (); ++ } ++ else if (PR_REGNUM_P (regno)) ++ add_reg (VNx16BImode, regno); ++} ++ ++/* The hard registers in REGS are call-preserved under the current function's ++ ABI. Record that they must be preserved around the mode switch. */ ++ ++void ++aarch64_sme_mode_switch_regs::add_call_preserved_regs (bitmap regs) ++{ ++ bitmap_iterator bi; ++ unsigned int regno; ++ EXECUTE_IF_SET_IN_BITMAP (regs, 0, regno, bi) ++ if (HARD_REGISTER_NUM_P (regno)) ++ add_call_preserved_reg (regno); ++ else ++ break; ++} ++ + /* Emit code to save registers before the mode switch. */ + + void +@@ -9825,6 +9868,23 @@ aarch64_need_old_pstate_sm () + if (aarch64_cfun_enables_pstate_sm ()) + return true; + ++ /* Non-local goto receivers are entered with PSTATE.SM equal to 0, ++ but the function needs to return with PSTATE.SM unchanged. */ ++ if (nonlocal_goto_handler_labels) ++ return true; ++ ++ /* Likewise for exception handlers. */ ++ eh_landing_pad lp; ++ for (unsigned int i = 1; vec_safe_iterate (cfun->eh->lp_array, i, &lp); ++i) ++ if (lp && lp->post_landing_pad) ++ return true; ++ ++ /* Non-local gotos need to set PSTATE.SM to zero. It's possible to call ++ streaming-compatible functions without SME being available, so PSTATE.SM ++ should only be changed if it is currently set to one. */ ++ if (crtl->has_nonlocal_goto) ++ return true; ++ + if (cfun->machine->call_switches_pstate_sm) + for (auto insn = get_insns (); insn; insn = NEXT_INSN (insn)) + if (auto *call = dyn_cast<rtx_call_insn *> (insn)) +@@ -30209,6 +30269,59 @@ aarch64_md_asm_adjust (vec<rtx> &outputs, vec<rtx> &inputs, + return seq; + } + ++/* BB is the target of an exception or nonlocal goto edge, which means ++ that PSTATE.SM is known to be 0 on entry. Put it into the state that ++ the current function requires. */ ++ ++static bool ++aarch64_switch_pstate_sm_for_landing_pad (basic_block bb) ++{ ++ if (TARGET_NON_STREAMING) ++ return false; ++ ++ start_sequence (); ++ rtx_insn *guard_label = nullptr; ++ if (TARGET_STREAMING_COMPATIBLE) ++ guard_label = aarch64_guard_switch_pstate_sm (IP0_REGNUM, ++ AARCH64_FL_SM_OFF); ++ aarch64_sme_mode_switch_regs args_switch; ++ args_switch.add_call_preserved_regs (df_get_live_in (bb)); ++ args_switch.emit_prologue (); ++ aarch64_switch_pstate_sm (AARCH64_FL_SM_OFF, AARCH64_FL_SM_ON); ++ args_switch.emit_epilogue (); ++ if (guard_label) ++ emit_label (guard_label); ++ auto seq = get_insns (); ++ end_sequence (); ++ ++ emit_insn_after (seq, bb_note (bb)); ++ return true; ++} ++ ++/* JUMP is a nonlocal goto. Its target requires PSTATE.SM to be 0 on entry, ++ so arrange to make it so. */ ++ ++static bool ++aarch64_switch_pstate_sm_for_jump (rtx_insn *jump) ++{ ++ if (TARGET_NON_STREAMING) ++ return false; ++ ++ start_sequence (); ++ rtx_insn *guard_label = nullptr; ++ if (TARGET_STREAMING_COMPATIBLE) ++ guard_label = aarch64_guard_switch_pstate_sm (IP0_REGNUM, ++ AARCH64_FL_SM_OFF); ++ aarch64_switch_pstate_sm (AARCH64_FL_SM_ON, AARCH64_FL_SM_OFF); ++ if (guard_label) ++ emit_label (guard_label); ++ auto seq = get_insns (); ++ end_sequence (); ++ ++ emit_insn_before (seq, jump); ++ return true; ++} ++ + /* If CALL involves a change in PSTATE.SM, emit the instructions needed + to switch to the new mode and the instructions needed to restore the + original mode. Return true if something changed. */ +@@ -30292,9 +30405,10 @@ public: + }; + + bool +-pass_switch_pstate_sm::gate (function *) ++pass_switch_pstate_sm::gate (function *fn) + { +- return cfun->machine->call_switches_pstate_sm; ++ return (aarch64_fndecl_pstate_sm (fn->decl) != AARCH64_FL_SM_OFF ++ || cfun->machine->call_switches_pstate_sm); + } + + /* Emit any instructions needed to switch PSTATE.SM. */ +@@ -30307,11 +30421,24 @@ pass_switch_pstate_sm::execute (function *fn) + bitmap_clear (blocks); + FOR_EACH_BB_FN (bb, fn) + { +- rtx_insn *insn; +- FOR_BB_INSNS (bb, insn) +- if (auto *call = dyn_cast<rtx_call_insn *> (insn)) +- if (aarch64_switch_pstate_sm_for_call (call)) +- bitmap_set_bit (blocks, bb->index); ++ if (has_abnormal_call_or_eh_pred_edge_p (bb) ++ && aarch64_switch_pstate_sm_for_landing_pad (bb)) ++ bitmap_set_bit (blocks, bb->index); ++ ++ if (cfun->machine->call_switches_pstate_sm) ++ { ++ rtx_insn *insn; ++ FOR_BB_INSNS (bb, insn) ++ if (auto *call = dyn_cast<rtx_call_insn *> (insn)) ++ if (aarch64_switch_pstate_sm_for_call (call)) ++ bitmap_set_bit (blocks, bb->index); ++ } ++ ++ auto end = BB_END (bb); ++ if (JUMP_P (end) ++ && find_reg_note (end, REG_NON_LOCAL_GOTO, NULL_RTX) ++ && aarch64_switch_pstate_sm_for_jump (end)) ++ bitmap_set_bit (blocks, bb->index); + } + find_many_sub_basic_blocks (blocks); + clear_aux_for_blocks (); +diff --git a/gcc/testsuite/g++.target/aarch64/sme/exceptions_2.C b/gcc/testsuite/g++.target/aarch64/sme/exceptions_2.C +new file mode 100644 +index 000000000..f791b6ecc +--- /dev/null ++++ b/gcc/testsuite/g++.target/aarch64/sme/exceptions_2.C +@@ -0,0 +1,148 @@ ++// { dg-options "-O -fno-optimize-sibling-calls" } ++// { dg-final { check-function-bodies "**" "" } } ++ ++void n_callee(); ++void s_callee() __arm_streaming; ++void sc_callee() __arm_streaming_compatible; ++ ++void n_callee_ne() noexcept; ++void s_callee_ne() noexcept __arm_streaming; ++void sc_callee_ne() noexcept __arm_streaming_compatible; ++ ++void n_caller1() ++{ ++ try ++ { ++ n_callee(); ++ sc_callee(); ++ } ++ catch (...) ++ { ++ n_callee_ne(); ++ sc_callee_ne(); ++ } ++} ++// { dg-final { scan-assembler {_Z9n_caller1v:(?:(?!smstart|smstop).)*\tret} } } ++ ++/* ++** _Z9n_caller2v: ++** ... ++** cntd (x0-9+) ++** str \1, ^\n+ ++** ... ++** bl __cxa_begin_catch ++** smstart sm ++** bl _Z11s_callee_nev ++** smstop sm ++** bl __cxa_end_catch ++** ... ++*/ ++void n_caller2() ++{ ++ try ++ { ++ n_callee(); ++ sc_callee(); ++ } ++ catch (...) ++ { ++ s_callee_ne(); ++ } ++} ++ ++/* ++** _Z9s_caller1v: ++** ... ++** bl __cxa_end_catch ++** smstart sm ++** ... ++*/ ++int s_caller1() __arm_streaming ++{ ++ try ++ { ++ s_callee(); ++ return 1; ++ } ++ catch (...) ++ { ++ return 2; ++ } ++} ++ ++/* ++** _Z9s_caller2v: ++** ... ++** bl __cxa_begin_catch ++** smstart sm ++** bl _Z11s_callee_nev ++** smstop sm ++** bl __cxa_end_catch ++** smstart sm ++** ... ++*/ ++int s_caller2() __arm_streaming ++{ ++ try ++ { ++ n_callee(); ++ return 1; ++ } ++ catch (...) ++ { ++ s_callee_ne(); ++ return 2; ++ } ++} ++ ++/* ++** _Z10sc_caller1v: ++** ... ++** cntd (x0-9+) ++** str \1, ^\n+ ++** mrs (x0-9+), svcr ++** str \2, (^\n+) ++** ... ++** bl __cxa_end_catch ++** ldr (x0-9+), \3 ++** tbz \4, 0, ^\n+ ++** smstart sm ++** ... ++*/ ++int sc_caller1() __arm_streaming_compatible ++{ ++ try ++ { ++ sc_callee(); ++ return 1; ++ } ++ catch (...) ++ { ++ return 2; ++ } ++} ++ ++/* ++** _Z10ls_caller1v: ++** ... ++** cntd (x0-9+) ++** str \1, ^\n+ ++** ... ++** bl __cxa_begin_catch ++** smstart sm ++** bl _Z12sc_callee_nev ++** smstop sm ++** bl __cxa_end_catch ++** ... ++*/ ++__arm_locally_streaming void ls_caller1() ++{ ++ try ++ { ++ sc_callee(); ++ } ++ catch (...) ++ { ++ sc_callee_ne(); ++ } ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/nonlocal_goto_1.c b/gcc/testsuite/gcc.target/aarch64/sme/nonlocal_goto_1.c +new file mode 100644 +index 000000000..4e3869fcc +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sme/nonlocal_goto_1.c +@@ -0,0 +1,58 @@ ++/* { dg-options "-O2 -fno-schedule-insns -fno-schedule-insns2" } */ ++/* { dg-final { check-function-bodies "**" "" } } */ ++ ++void run(void (*)()); ++ ++/* ++** foo: ++** ... ++** mrs x16, svcr ++** ... ++** str x16, (.*) ++** ... ++** ldr x16, \1 ++** tbz x16, 0, .* ++** smstop sm ++** bl __clear_cache ++** ldr x16, \1 ++** tbz x16, 0, .* ++** smstart sm ++** add x0, .* ++** ldr x16, \1 ++** tbz x16, 0, .* ++** smstop sm ++** bl run ++** ldr x16, \1 ++** tbz x16, 0, .* ++** smstart sm ++** mov w0, 1 ++** ... ++** ret ++** ldr x16, \1 ++** tbz x16, 0, .* ++** smstart sm ++** mov w0, 0 ++** ... ++*/ ++int ++foo (int *ptr) __arm_streaming_compatible ++{ ++ __label__ failure; ++ ++ void bar () { *ptr += 1; goto failure; } ++ run (bar); ++ return 1; ++ ++failure: ++ return 0; ++} ++ ++// { dg-final { scan-assembler {\tstp\tx19, x20,} } } ++// { dg-final { scan-assembler {\tstp\tx21, x22,} } } ++// { dg-final { scan-assembler {\tstp\tx23, x24,} } } ++// { dg-final { scan-assembler {\tstp\tx25, x26,} } } ++// { dg-final { scan-assembler {\tstp\tx27, x28,} } } ++// { dg-final { scan-assembler {\tstp\td8, d9,} } } ++// { dg-final { scan-assembler {\tstp\td10, d11,} } } ++// { dg-final { scan-assembler {\tstp\td12, d13,} } } ++// { dg-final { scan-assembler {\tstp\td14, d15,} } } +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/nonlocal_goto_2.c b/gcc/testsuite/gcc.target/aarch64/sme/nonlocal_goto_2.c +new file mode 100644 +index 000000000..2a2db72c3 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sme/nonlocal_goto_2.c +@@ -0,0 +1,44 @@ ++/* { dg-options "-O2 -fno-schedule-insns -fno-schedule-insns2" } */ ++/* { dg-final { check-function-bodies "**" "" } } */ ++ ++void run(void (*)()); ++ ++/* ++** foo: ++** ... ++** smstop sm ++** bl __clear_cache ++** smstart sm ++** add x0, .* ++** smstop sm ++** bl run ++** smstart sm ++** mov w0, 1 ++** ... ++** ret ++** smstart sm ++** mov w0, 0 ++** ... ++*/ ++int ++foo (int *ptr) __arm_streaming ++{ ++ __label__ failure; ++ ++ void bar () { *ptr += 1; goto failure; } ++ run (bar); ++ return 1; ++ ++failure: ++ return 0; ++} ++ ++// { dg-final { scan-assembler {\tstp\tx19, x20,} } } ++// { dg-final { scan-assembler {\tstp\tx21, x22,} } } ++// { dg-final { scan-assembler {\tstp\tx23, x24,} } } ++// { dg-final { scan-assembler {\tstp\tx25, x26,} } } ++// { dg-final { scan-assembler {\tstp\tx27, x28,} } } ++// { dg-final { scan-assembler {\tstp\td8, d9,} } } ++// { dg-final { scan-assembler {\tstp\td10, d11,} } } ++// { dg-final { scan-assembler {\tstp\td12, d13,} } } ++// { dg-final { scan-assembler {\tstp\td14, d15,} } } +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/nonlocal_goto_3.c b/gcc/testsuite/gcc.target/aarch64/sme/nonlocal_goto_3.c +new file mode 100644 +index 000000000..022b04052 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sme/nonlocal_goto_3.c +@@ -0,0 +1,46 @@ ++/* { dg-options "-O2 -fno-schedule-insns -fno-schedule-insns2" } */ ++/* { dg-final { check-function-bodies "**" "" } } */ ++ ++void run(void (*)()); ++ ++/* ++** foo: ++** ... ++** smstart sm ++** ... ++** smstop sm ++** bl __clear_cache ++** smstart sm ++** add x0, .* ++** smstop sm ++** bl run ++** smstart sm ++** mov w0, 1 ++** ... ++** smstart sm ++** mov w0, 0 ++** smstop sm ++** ... ++*/ ++__arm_locally_streaming int ++foo (int *ptr) ++{ ++ __label__ failure; ++ ++ void bar () { *ptr += 1; goto failure; } ++ run (bar); ++ return 1; ++ ++failure: ++ return 0; ++} ++ ++// { dg-final { scan-assembler {\tstp\tx19, x20,} } } ++// { dg-final { scan-assembler {\tstp\tx21, x22,} } } ++// { dg-final { scan-assembler {\tstp\tx23, x24,} } } ++// { dg-final { scan-assembler {\tstp\tx25, x26,} } } ++// { dg-final { scan-assembler {\tstp\tx27, x28,} } } ++// { dg-final { scan-assembler {\tstp\td8, d9,} } } ++// { dg-final { scan-assembler {\tstp\td10, d11,} } } ++// { dg-final { scan-assembler {\tstp\td12, d13,} } } ++// { dg-final { scan-assembler {\tstp\td14, d15,} } } +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/nonlocal_goto_4.c b/gcc/testsuite/gcc.target/aarch64/sme/nonlocal_goto_4.c +new file mode 100644 +index 000000000..044607628 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sme/nonlocal_goto_4.c +@@ -0,0 +1,25 @@ ++/* { dg-options "-O2 -fno-schedule-insns -fno-schedule-insns2" } */ ++/* { dg-final { check-function-bodies "**" "" } } */ ++ ++void run(void (*)()); ++ ++/* ++** bar.0: ++** ... ++** smstart sm ++** ... ++** smstop sm ++** br x0-9+ ++*/ ++int ++foo (int *ptr) ++{ ++ __label__ failure; ++ ++ __arm_locally_streaming void bar () { *ptr += 1; goto failure; } ++ run (bar); ++ return 1; ++ ++failure: ++ return 0; ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/nonlocal_goto_5.c b/gcc/testsuite/gcc.target/aarch64/sme/nonlocal_goto_5.c +new file mode 100644 +index 000000000..4246aec8b +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sme/nonlocal_goto_5.c +@@ -0,0 +1,26 @@ ++/* { dg-options "-O2 -fno-schedule-insns -fno-schedule-insns2" } */ ++/* { dg-final { check-function-bodies "**" "" } } */ ++ ++void run(void (*)() __arm_streaming); ++ ++/* ++** bar.0: ++** ... ++** smstop sm ++** br x0-9+ ++*/ ++int ++foo (int *ptr) ++{ ++ __label__ failure; ++ ++ void bar () __arm_streaming { *ptr += 1; goto failure; } ++ run (bar); ++ return 1; ++ ++failure: ++ return 0; ++} ++ ++// { dg-final { scan-assembler-not {smstart\t} } } ++// { dg-final { scan-assembler-not {mrs\t} } } +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/nonlocal_goto_6.c b/gcc/testsuite/gcc.target/aarch64/sme/nonlocal_goto_6.c +new file mode 100644 +index 000000000..151e2f22d +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sme/nonlocal_goto_6.c +@@ -0,0 +1,31 @@ ++/* { dg-options "-O2 -fno-schedule-insns -fno-schedule-insns2" } */ ++/* { dg-final { check-function-bodies "**" "" } } */ ++ ++void run(void (*)() __arm_streaming_compatible); ++ ++/* ++** bar.0: ++** ... ++** mrs x16, svcr ++** ... ++** str x16, (.*) ++** ... ++** ldr x16, \1 ++** tbz x16, 0, .* ++** smstop sm ++** br x0-9+ ++*/ ++int ++foo (int *ptr) ++{ ++ __label__ failure; ++ ++ void bar () __arm_streaming_compatible { *ptr += 1; goto failure; } ++ run (bar); ++ return 1; ++ ++failure: ++ return 0; ++} ++ ++// { dg-final { scan-assembler-not {smstart\t} } } +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/nonlocal_goto_7.c b/gcc/testsuite/gcc.target/aarch64/sme/nonlocal_goto_7.c +new file mode 100644 +index 000000000..9cc3ad5d2 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sme/nonlocal_goto_7.c +@@ -0,0 +1,25 @@ ++/* { dg-options "-O2 -fno-schedule-insns -fno-schedule-insns2" } */ ++ ++void run(void (*)() __arm_inout("za")); ++void callee () __arm_inout("za"); ++ ++int ++foo (int *ptr) ++{ ++ __label__ failure; ++ ++ void bar () __arm_inout("za") ++ { ++ callee (); ++ *ptr += 1; ++ goto failure; ++ } ++ run (bar); ++ return 1; ++ ++failure: ++ return 0; ++} ++ ++// { dg-final { scan-assembler-not {\tsmstart\t} } } ++// { dg-final { scan-assembler-not {\tsmstop\t} } } +-- +2.33.0 +
View file
_service:tar_scm:0214-Backport-SME-aarch64-Enforce-inlining-restrictions-f.patch
Added
@@ -0,0 +1,913 @@ +From c4578108ab766178fe7ebd51421c1ac9f317b675 Mon Sep 17 00:00:00 2001 +From: Richard Sandiford <richard.sandiford@arm.com> +Date: Tue, 5 Dec 2023 10:11:30 +0000 +Subject: PATCH 115/157 BackportSME aarch64: Enforce inlining + restrictions for SME + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=0e9aa05df6c643610a3821af52eda642a525a886 + +A function that has local ZA state cannot be inlined into its caller, +since we only support managing ZA switches at function scope. + +A function whose body directly clobbers ZA state cannot be inlined into +a function with ZA state. + +A function whose body requires a particular PSTATE.SM setting can only +be inlined into a function body that guarantees that PSTATE.SM setting. +The callee's function type doesn't matter here: one locally-streaming +function can be inlined into another. + +gcc/ + * config/aarch64/aarch64.cc: Include symbol-summary.h, ipa-prop.h, + and ipa-fnsummary.h + (aarch64_function_attribute_inlinable_p): New function. + (AARCH64_IPA_SM_FIXED, AARCH64_IPA_CLOBBERS_ZA): New constants. + (aarch64_need_ipa_fn_target_info): New function. + (aarch64_update_ipa_fn_target_info): Likewise. + (aarch64_can_inline_p): Restrict the previous ISA flag checks + to non-modal features. Prevent callees that require a particular + PSTATE.SM state from being inlined into callers that can't guarantee + that state. Also prevent callees that have ZA state from being + inlined into callers that don't. Finally, prevent callees that + clobber ZA from being inlined into callers that have ZA state. + (TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P): Define. + (TARGET_NEED_IPA_FN_TARGET_INFO): Likewise. + (TARGET_UPDATE_IPA_FN_TARGET_INFO): Likewise. + +gcc/testsuite/ + * gcc.target/aarch64/sme/inlining_1.c: New test. + * gcc.target/aarch64/sme/inlining_2.c: Likewise. + * gcc.target/aarch64/sme/inlining_3.c: Likewise. + * gcc.target/aarch64/sme/inlining_4.c: Likewise. + * gcc.target/aarch64/sme/inlining_5.c: Likewise. + * gcc.target/aarch64/sme/inlining_6.c: Likewise. + * gcc.target/aarch64/sme/inlining_7.c: Likewise. + * gcc.target/aarch64/sme/inlining_8.c: Likewise. +--- + gcc/config/aarch64/aarch64.cc | 132 +++++++++++++++++- + .../gcc.target/aarch64/sme/inlining_1.c | 47 +++++++ + .../gcc.target/aarch64/sme/inlining_10.c | 57 ++++++++ + .../gcc.target/aarch64/sme/inlining_11.c | 57 ++++++++ + .../gcc.target/aarch64/sme/inlining_12.c | 15 ++ + .../gcc.target/aarch64/sme/inlining_13.c | 15 ++ + .../gcc.target/aarch64/sme/inlining_14.c | 15 ++ + .../gcc.target/aarch64/sme/inlining_15.c | 27 ++++ + .../gcc.target/aarch64/sme/inlining_2.c | 47 +++++++ + .../gcc.target/aarch64/sme/inlining_3.c | 47 +++++++ + .../gcc.target/aarch64/sme/inlining_4.c | 47 +++++++ + .../gcc.target/aarch64/sme/inlining_5.c | 47 +++++++ + .../gcc.target/aarch64/sme/inlining_6.c | 31 ++++ + .../gcc.target/aarch64/sme/inlining_7.c | 31 ++++ + .../gcc.target/aarch64/sme/inlining_8.c | 31 ++++ + .../gcc.target/aarch64/sme/inlining_9.c | 55 ++++++++ + 16 files changed, 696 insertions(+), 5 deletions(-) + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/inlining_1.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/inlining_10.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/inlining_11.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/inlining_12.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/inlining_13.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/inlining_14.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/inlining_15.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/inlining_2.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/inlining_3.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/inlining_4.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/inlining_5.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/inlining_6.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/inlining_7.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/inlining_8.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/inlining_9.c + +diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc +index effb567c2..eab94d5c2 100644 +--- a/gcc/config/aarch64/aarch64.cc ++++ b/gcc/config/aarch64/aarch64.cc +@@ -85,6 +85,9 @@ + #include "except.h" + #include "tree-pass.h" + #include "cfgbuild.h" ++#include "symbol-summary.h" ++#include "ipa-prop.h" ++#include "ipa-fnsummary.h" + + /* This file should be included last. */ + #include "target-def.h" +@@ -21351,6 +21354,17 @@ aarch64_option_valid_attribute_p (tree fndecl, tree, tree args, int) + return ret; + } + ++/* Implement TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P. Use an opt-out ++ rather than an opt-in list. */ ++ ++static bool ++aarch64_function_attribute_inlinable_p (const_tree fndecl) ++{ ++ /* A function that has local ZA state cannot be inlined into its caller, ++ since we only support managing ZA switches at function scope. */ ++ return !aarch64_fndecl_has_new_state (fndecl, "za"); ++} ++ + /* Helper for aarch64_can_inline_p. In the case where CALLER and CALLEE are + tri-bool options (yes, no, don't care) and the default value is + DEF, determine whether to reject inlining. */ +@@ -21372,6 +21386,60 @@ aarch64_tribools_ok_for_inlining_p (int caller, int callee, + return (callee == caller || callee == def); + } + ++/* Bit allocations for ipa_fn_summary::target_info. */ ++ ++/* Set if the function contains a stmt that relies on the function's ++ choice of PSTATE.SM setting (0 for non-streaming, 1 for streaming). ++ Not meaningful for streaming-compatible functions. */ ++constexpr auto AARCH64_IPA_SM_FIXED = 1U << 0; ++ ++/* Set if the function clobbers ZA. Not meaningful for functions that ++ have ZA state. */ ++constexpr auto AARCH64_IPA_CLOBBERS_ZA = 1U << 1; ++ ++/* Implement TARGET_NEED_IPA_FN_TARGET_INFO. */ ++ ++static bool ++aarch64_need_ipa_fn_target_info (const_tree, unsigned int &) ++{ ++ /* We could in principle skip this for streaming-compatible functions ++ that have ZA state, but that's a rare combination. */ ++ return true; ++} ++ ++/* Implement TARGET_UPDATE_IPA_FN_TARGET_INFO. */ ++ ++static bool ++aarch64_update_ipa_fn_target_info (unsigned int &info, const gimple *stmt) ++{ ++ if (auto *ga = dyn_cast<const gasm *> (stmt)) ++ { ++ /* We don't know what the asm does, so conservatively assume that ++ it requires the function's current SM mode. */ ++ info |= AARCH64_IPA_SM_FIXED; ++ for (unsigned int i = 0; i < gimple_asm_nclobbers (ga); ++i) ++ { ++ tree op = gimple_asm_clobber_op (ga, i); ++ const char *clobber = TREE_STRING_POINTER (TREE_VALUE (op)); ++ if (strcmp (clobber, "za") == 0) ++ info |= AARCH64_IPA_CLOBBERS_ZA; ++ } ++ } ++ if (auto *call = dyn_cast<const gcall *> (stmt)) ++ { ++ if (gimple_call_builtin_p (call, BUILT_IN_MD)) ++ { ++ /* The attributes on AArch64 builtins are supposed to be accurate. ++ If the function isn't marked streaming-compatible then it ++ needs whichever SM mode it selects. */ ++ tree decl = gimple_call_fndecl (call); ++ if (aarch64_fndecl_pstate_sm (decl) != 0) ++ info |= AARCH64_IPA_SM_FIXED; ++ } ++ } ++ return true; ++} ++ + /* Implement TARGET_CAN_INLINE_P. Decide whether it is valid + to inline CALLEE into CALLER based on target-specific info. + Make sure that the caller and callee have compatible architectural +@@ -21394,12 +21462,56 @@ aarch64_can_inline_p (tree caller, tree callee) + : target_option_default_node); + + /* Callee's ISA flags should be a subset of the caller's. */ +- if ((caller_opts->x_aarch64_asm_isa_flags +- & callee_opts->x_aarch64_asm_isa_flags) +- != callee_opts->x_aarch64_asm_isa_flags) ++ auto caller_asm_isa = (caller_opts->x_aarch64_asm_isa_flags ++ & ~AARCH64_FL_ISA_MODES); ++ auto callee_asm_isa = (callee_opts->x_aarch64_asm_isa_flags ++ & ~AARCH64_FL_ISA_MODES); ++ if (callee_asm_isa & ~caller_asm_isa) + return false; +- if ((caller_opts->x_aarch64_isa_flags & callee_opts->x_aarch64_isa_flags) +- != callee_opts->x_aarch64_isa_flags) ++ ++ auto caller_isa = (caller_opts->x_aarch64_isa_flags ++ & ~AARCH64_FL_ISA_MODES); ++ auto callee_isa = (callee_opts->x_aarch64_isa_flags ++ & ~AARCH64_FL_ISA_MODES); ++ if (callee_isa & ~caller_isa) ++ return false; ++ ++ /* Return true if the callee might have target_info property PROPERTY. ++ The answer must be true unless we have positive proof to the contrary. */ ++ auto callee_has_property = &(unsigned int property) ++ { ++ if (ipa_fn_summaries) ++ if (auto *summary = ipa_fn_summaries->get (cgraph_node::get (callee))) ++ if (!(summary->target_info & property)) ++ return false; ++ return true; ++ }; ++ ++ /* Streaming-compatible code can be inlined into functions with any ++ PSTATE.SM mode. Otherwise the caller and callee must agree on ++ PSTATE.SM mode, unless we can prove that the callee is naturally ++ streaming-compatible. */ ++ auto caller_sm = (caller_opts->x_aarch64_isa_flags & AARCH64_FL_SM_STATE); ++ auto callee_sm = (callee_opts->x_aarch64_isa_flags & AARCH64_FL_SM_STATE); ++ if (callee_sm ++ && caller_sm != callee_sm ++ && callee_has_property (AARCH64_IPA_SM_FIXED)) ++ return false; ++ ++ /* aarch64_function_attribute_inlinable_p prevents new-ZA functions ++ from being inlined into others. We also need to prevent inlining ++ of shared-ZA functions into functions without ZA state, since this ++ is an error condition. ++ ++ The only other problematic case for ZA is inlining a function that ++ directly clobbers ZA into a function that has ZA state. */ ++ auto caller_za = (caller_opts->x_aarch64_isa_flags & AARCH64_FL_ZA_ON); ++ auto callee_za = (callee_opts->x_aarch64_isa_flags & AARCH64_FL_ZA_ON); ++ if (!caller_za && callee_za) ++ return false; ++ if (caller_za ++ && !callee_za ++ && callee_has_property (AARCH64_IPA_CLOBBERS_ZA)) + return false; + + /* Allow non-strict aligned functions inlining into strict +@@ -30732,6 +30844,16 @@ aarch64_get_v16qi_mode () + #undef TARGET_CAN_ELIMINATE + #define TARGET_CAN_ELIMINATE aarch64_can_eliminate + ++#undef TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P ++#define TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P \ ++ aarch64_function_attribute_inlinable_p ++ ++#undef TARGET_NEED_IPA_FN_TARGET_INFO ++#define TARGET_NEED_IPA_FN_TARGET_INFO aarch64_need_ipa_fn_target_info ++ ++#undef TARGET_UPDATE_IPA_FN_TARGET_INFO ++#define TARGET_UPDATE_IPA_FN_TARGET_INFO aarch64_update_ipa_fn_target_info ++ + #undef TARGET_CAN_INLINE_P + #define TARGET_CAN_INLINE_P aarch64_can_inline_p + +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/inlining_1.c b/gcc/testsuite/gcc.target/aarch64/sme/inlining_1.c +new file mode 100644 +index 000000000..24dc2b341 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sme/inlining_1.c +@@ -0,0 +1,47 @@ ++/* { dg-options "" } */ ++ ++inline void __attribute__((always_inline)) ++sc_callee () arm::streaming_compatible {} ++ ++inline void __attribute__((always_inline)) ++s_callee () arm::streaming {} ++ ++inline void __attribute__((always_inline)) ++n_callee () {} ++ ++arm::locally_streaming inline void __attribute__((always_inline)) ++sc_ls_callee () arm::streaming_compatible {} ++ ++arm::locally_streaming inline void __attribute__((always_inline)) ++n_ls_callee () {} ++ ++inline void __attribute__((always_inline)) ++sc_asm_callee () arm::streaming_compatible { asm (""); } ++ ++inline void __attribute__((always_inline)) ++s_asm_callee () arm::streaming { asm (""); } // { dg-error "inlining failed" } ++ ++inline void __attribute__((always_inline)) ++n_asm_callee () { asm (""); } // { dg-error "inlining failed" } ++ ++arm::locally_streaming inline void __attribute__((always_inline)) ++sc_ls_asm_callee () arm::streaming_compatible { asm (""); } // { dg-error "inlining failed" } ++ ++arm::locally_streaming inline void __attribute__((always_inline)) ++n_ls_asm_callee () { asm (""); } // { dg-error "inlining failed" } ++ ++void ++sc_caller () arm::streaming_compatible ++{ ++ sc_callee (); ++ s_callee (); ++ n_callee (); ++ sc_ls_callee (); ++ n_ls_callee (); ++ ++ sc_asm_callee (); ++ s_asm_callee (); ++ n_asm_callee (); ++ sc_ls_asm_callee (); ++ n_ls_asm_callee (); ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/inlining_10.c b/gcc/testsuite/gcc.target/aarch64/sme/inlining_10.c +new file mode 100644 +index 000000000..adfd45a87 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sme/inlining_10.c +@@ -0,0 +1,57 @@ ++/* { dg-options "" } */ ++ ++#include <arm_neon.h> ++#include <arm_sme.h> ++ ++uint8x16_t *neon; ++svint64_t *sve; ++int64_t *ptr; ++ ++// Gets expanded to addition early, so no error. An error would be ++// more correct though. ++inline void __attribute__((always_inline)) ++call_vadd () ++{ ++ neon4 = vaddq_u8 (neon5, neon6); ++} ++ ++inline void __attribute__((always_inline)) ++call_vbsl () // { dg-error "inlining failed" } ++{ ++ neon0 = vbslq_u8 (neon1, neon2, neon3); ++} ++ ++inline void __attribute__((always_inline)) ++call_svadd () ++{ ++ *sve = svadd_x (svptrue_b8 (), *sve, 1); ++} ++ ++inline void __attribute__((always_inline)) ++call_svld1_gather () // { dg-error "inlining failed" } ++{ ++ *sve = svld1_gather_offset (svptrue_b8 (), ptr, *sve); ++} ++ ++inline void __attribute__((always_inline)) ++call_svzero () arm::inout("za") ++{ ++ svzero_za (); ++} ++ ++inline void __attribute__((always_inline)) ++call_svst1_za () arm::streaming, arm::inout("za") // { dg-error "inlining failed" } ++{ ++ svst1_ver_za64 (0, 0, svptrue_b8 (), ptr); ++} ++ ++void ++sc_caller () arm::inout("za"), arm::streaming_compatible ++{ ++ call_vadd (); ++ call_vbsl (); ++ call_svadd (); ++ call_svld1_gather (); ++ call_svzero (); ++ call_svst1_za (); ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/inlining_11.c b/gcc/testsuite/gcc.target/aarch64/sme/inlining_11.c +new file mode 100644 +index 000000000..d05a92c1c +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sme/inlining_11.c +@@ -0,0 +1,57 @@ ++/* { dg-options "" } */ ++ ++#include <arm_neon.h> ++#include <arm_sme.h> ++ ++uint8x16_t *neon; ++svint64_t *sve; ++int64_t *ptr; ++ ++// Gets expanded to addition early, so no error. An error would be ++// more correct though. ++inline void __attribute__((always_inline)) ++call_vadd () ++{ ++ neon4 = vaddq_u8 (neon5, neon6); ++} ++ ++inline void __attribute__((always_inline)) ++call_vbsl () // { dg-error "inlining failed" } ++{ ++ neon0 = vbslq_u8 (neon1, neon2, neon3); ++} ++ ++inline void __attribute__((always_inline)) ++call_svadd () ++{ ++ *sve = svadd_x (svptrue_b8 (), *sve, 1); ++} ++ ++inline void __attribute__((always_inline)) ++call_svld1_gather () // { dg-error "inlining failed" } ++{ ++ *sve = svld1_gather_offset (svptrue_b8 (), ptr, *sve); ++} ++ ++inline void __attribute__((always_inline)) ++call_svzero () arm::inout("za") ++{ ++ svzero_za (); ++} ++ ++inline void __attribute__((always_inline)) ++call_svst1_za () arm::streaming, arm::inout("za") ++{ ++ svst1_ver_za64 (0, 0, svptrue_b8 (), ptr); ++} ++ ++void ++sc_caller () arm::inout("za"), arm::streaming ++{ ++ call_vadd (); ++ call_vbsl (); ++ call_svadd (); ++ call_svld1_gather (); ++ call_svzero (); ++ call_svst1_za (); ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/inlining_12.c b/gcc/testsuite/gcc.target/aarch64/sme/inlining_12.c +new file mode 100644 +index 000000000..366f8b24a +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sme/inlining_12.c +@@ -0,0 +1,15 @@ ++/* { dg-options "" } */ ++ ++#include <arm_sme.h> ++ ++inline void __attribute__((always_inline)) ++call_svzero () arm::inout("za"), arm::streaming_compatible // { dg-error "inlining failed" } ++{ ++ svzero_za (); ++} ++ ++void ++n_caller () ++{ ++ call_svzero (); ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/inlining_13.c b/gcc/testsuite/gcc.target/aarch64/sme/inlining_13.c +new file mode 100644 +index 000000000..bdbd7408c +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sme/inlining_13.c +@@ -0,0 +1,15 @@ ++/* { dg-options "" } */ ++ ++#include <arm_sme.h> ++ ++inline void __attribute__((always_inline)) ++call_svzero () arm::inout("za"), arm::streaming_compatible // { dg-error "inlining failed" } ++{ ++ svzero_za (); ++} ++ ++void ++s_caller () ++{ ++ call_svzero (); ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/inlining_14.c b/gcc/testsuite/gcc.target/aarch64/sme/inlining_14.c +new file mode 100644 +index 000000000..0ce4384f6 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sme/inlining_14.c +@@ -0,0 +1,15 @@ ++/* { dg-options "" } */ ++ ++#include <arm_sme.h> ++ ++inline void __attribute__((always_inline)) ++call_svzero () arm::inout("za"), arm::streaming_compatible // { dg-error "inlining failed" } ++{ ++ svzero_za (); ++} ++ ++void ++sc_caller () ++{ ++ call_svzero (); ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/inlining_15.c b/gcc/testsuite/gcc.target/aarch64/sme/inlining_15.c +new file mode 100644 +index 000000000..06fc5d7f5 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sme/inlining_15.c +@@ -0,0 +1,27 @@ ++/* { dg-options "" } */ ++ ++#include <arm_sme.h> ++ ++inline void ++call_svzero () arm::inout("za"), arm::streaming_compatible ++{ ++ svzero_za (); ++} ++ ++void ++n_caller () ++{ ++ call_svzero (); // { dg-error "call to a function that shares 'za' state from a function that has no 'za' state" } ++} ++ ++void ++s_caller () ++{ ++ call_svzero (); // { dg-error "call to a function that shares 'za' state from a function that has no 'za' state" } ++} ++ ++void ++sc_caller () ++{ ++ call_svzero (); // { dg-error "call to a function that shares 'za' state from a function that has no 'za' state" } ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/inlining_2.c b/gcc/testsuite/gcc.target/aarch64/sme/inlining_2.c +new file mode 100644 +index 000000000..ea2a57049 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sme/inlining_2.c +@@ -0,0 +1,47 @@ ++/* { dg-options "" } */ ++ ++inline void __attribute__((always_inline)) ++sc_callee () arm::streaming_compatible {} ++ ++inline void __attribute__((always_inline)) ++s_callee () arm::streaming {} ++ ++inline void __attribute__((always_inline)) ++n_callee () {} ++ ++arm::locally_streaming inline void __attribute__((always_inline)) ++sc_ls_callee () arm::streaming_compatible {} ++ ++arm::locally_streaming inline void __attribute__((always_inline)) ++n_ls_callee () {} ++ ++inline void __attribute__((always_inline)) ++sc_asm_callee () arm::streaming_compatible { asm (""); } ++ ++inline void __attribute__((always_inline)) ++s_asm_callee () arm::streaming { asm (""); } ++ ++inline void __attribute__((always_inline)) ++n_asm_callee () { asm (""); } // { dg-error "inlining failed" } ++ ++arm::locally_streaming inline void __attribute__((always_inline)) ++sc_ls_asm_callee () arm::streaming_compatible { asm (""); } ++ ++arm::locally_streaming inline void __attribute__((always_inline)) ++n_ls_asm_callee () { asm (""); } ++ ++void ++s_caller () arm::streaming ++{ ++ sc_callee (); ++ s_callee (); ++ n_callee (); ++ sc_ls_callee (); ++ n_ls_callee (); ++ ++ sc_asm_callee (); ++ s_asm_callee (); ++ n_asm_callee (); ++ sc_ls_asm_callee (); ++ n_ls_asm_callee (); ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/inlining_3.c b/gcc/testsuite/gcc.target/aarch64/sme/inlining_3.c +new file mode 100644 +index 000000000..d7ffb3819 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sme/inlining_3.c +@@ -0,0 +1,47 @@ ++/* { dg-options "" } */ ++ ++inline void __attribute__((always_inline)) ++sc_callee () arm::streaming_compatible {} ++ ++inline void __attribute__((always_inline)) ++s_callee () arm::streaming {} ++ ++inline void __attribute__((always_inline)) ++n_callee () {} ++ ++arm::locally_streaming inline void __attribute__((always_inline)) ++sc_ls_callee () arm::streaming_compatible {} ++ ++arm::locally_streaming inline void __attribute__((always_inline)) ++n_ls_callee () {} ++ ++inline void __attribute__((always_inline)) ++sc_asm_callee () arm::streaming_compatible { asm (""); } ++ ++inline void __attribute__((always_inline)) ++s_asm_callee () arm::streaming { asm (""); } // { dg-error "inlining failed" } ++ ++inline void __attribute__((always_inline)) ++n_asm_callee () { asm (""); } ++ ++arm::locally_streaming inline void __attribute__((always_inline)) ++sc_ls_asm_callee () arm::streaming_compatible { asm (""); } // { dg-error "inlining failed" } ++ ++arm::locally_streaming inline void __attribute__((always_inline)) ++n_ls_asm_callee () { asm (""); } // { dg-error "inlining failed" } ++ ++void ++n_caller () ++{ ++ sc_callee (); ++ s_callee (); ++ n_callee (); ++ sc_ls_callee (); ++ n_ls_callee (); ++ ++ sc_asm_callee (); ++ s_asm_callee (); ++ n_asm_callee (); ++ sc_ls_asm_callee (); ++ n_ls_asm_callee (); ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/inlining_4.c b/gcc/testsuite/gcc.target/aarch64/sme/inlining_4.c +new file mode 100644 +index 000000000..789203725 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sme/inlining_4.c +@@ -0,0 +1,47 @@ ++/* { dg-options "" } */ ++ ++inline void __attribute__((always_inline)) ++sc_callee () arm::streaming_compatible {} ++ ++inline void __attribute__((always_inline)) ++s_callee () arm::streaming {} ++ ++inline void __attribute__((always_inline)) ++n_callee () {} ++ ++arm::locally_streaming inline void __attribute__((always_inline)) ++sc_ls_callee () arm::streaming_compatible {} ++ ++arm::locally_streaming inline void __attribute__((always_inline)) ++n_ls_callee () {} ++ ++inline void __attribute__((always_inline)) ++sc_asm_callee () arm::streaming_compatible { asm (""); } ++ ++inline void __attribute__((always_inline)) ++s_asm_callee () arm::streaming { asm (""); } ++ ++inline void __attribute__((always_inline)) ++n_asm_callee () { asm (""); } // { dg-error "inlining failed" } ++ ++arm::locally_streaming inline void __attribute__((always_inline)) ++sc_ls_asm_callee () arm::streaming_compatible { asm (""); } ++ ++arm::locally_streaming inline void __attribute__((always_inline)) ++n_ls_asm_callee () { asm (""); } ++ ++arm::locally_streaming void ++sc_ls_caller () arm::streaming_compatible ++{ ++ sc_callee (); ++ s_callee (); ++ n_callee (); ++ sc_ls_callee (); ++ n_ls_callee (); ++ ++ sc_asm_callee (); ++ s_asm_callee (); ++ n_asm_callee (); ++ sc_ls_asm_callee (); ++ n_ls_asm_callee (); ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/inlining_5.c b/gcc/testsuite/gcc.target/aarch64/sme/inlining_5.c +new file mode 100644 +index 000000000..d19cdc450 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sme/inlining_5.c +@@ -0,0 +1,47 @@ ++/* { dg-options "" } */ ++ ++inline void __attribute__((always_inline)) ++sc_callee () arm::streaming_compatible {} ++ ++inline void __attribute__((always_inline)) ++s_callee () arm::streaming {} ++ ++inline void __attribute__((always_inline)) ++n_callee () {} ++ ++arm::locally_streaming inline void __attribute__((always_inline)) ++sc_ls_callee () arm::streaming_compatible {} ++ ++arm::locally_streaming inline void __attribute__((always_inline)) ++n_ls_callee () {} ++ ++inline void __attribute__((always_inline)) ++sc_asm_callee () arm::streaming_compatible { asm (""); } ++ ++inline void __attribute__((always_inline)) ++s_asm_callee () arm::streaming { asm (""); } ++ ++inline void __attribute__((always_inline)) ++n_asm_callee () { asm (""); } // { dg-error "inlining failed" } ++ ++arm::locally_streaming inline void __attribute__((always_inline)) ++sc_ls_asm_callee () arm::streaming_compatible { asm (""); } ++ ++arm::locally_streaming inline void __attribute__((always_inline)) ++n_ls_asm_callee () { asm (""); } ++ ++arm::locally_streaming void ++n_ls_caller () ++{ ++ sc_callee (); ++ s_callee (); ++ n_callee (); ++ sc_ls_callee (); ++ n_ls_callee (); ++ ++ sc_asm_callee (); ++ s_asm_callee (); ++ n_asm_callee (); ++ sc_ls_asm_callee (); ++ n_ls_asm_callee (); ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/inlining_6.c b/gcc/testsuite/gcc.target/aarch64/sme/inlining_6.c +new file mode 100644 +index 000000000..a5eb399f1 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sme/inlining_6.c +@@ -0,0 +1,31 @@ ++/* { dg-options "" } */ ++ ++inline void __attribute__((always_inline)) ++shared_callee () arm::inout("za") {} ++ ++arm::new("za") inline void __attribute__((always_inline)) ++new_callee () {} // { dg-error "inlining failed" } ++ ++inline void __attribute__((always_inline)) ++normal_callee () {} ++ ++inline void __attribute__((always_inline)) ++shared_asm_callee () arm::inout("za") { asm volatile ("" ::: "za"); } ++ ++arm::new("za") inline void __attribute__((always_inline)) ++new_asm_callee () { asm volatile ("" ::: "za"); } // { dg-error "inlining failed" } ++ ++inline void __attribute__((always_inline)) ++normal_asm_callee () { asm volatile ("" ::: "za"); } // { dg-error "inlining failed" } ++ ++void ++shared_caller () arm::inout("za") ++{ ++ shared_callee (); ++ new_callee (); ++ normal_callee (); ++ ++ shared_asm_callee (); ++ new_asm_callee (); ++ normal_asm_callee (); ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/inlining_7.c b/gcc/testsuite/gcc.target/aarch64/sme/inlining_7.c +new file mode 100644 +index 000000000..0f046283f +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sme/inlining_7.c +@@ -0,0 +1,31 @@ ++/* { dg-options "" } */ ++ ++inline void __attribute__((always_inline)) ++shared_callee () arm::inout("za") {} ++ ++arm::new("za") inline void __attribute__((always_inline)) ++new_callee () {} // { dg-error "inlining failed" } ++ ++inline void __attribute__((always_inline)) ++normal_callee () {} ++ ++inline void __attribute__((always_inline)) ++shared_asm_callee () arm::inout("za") { asm volatile ("" ::: "za"); } ++ ++arm::new("za") inline void __attribute__((always_inline)) ++new_asm_callee () { asm volatile ("" ::: "za"); } // { dg-error "inlining failed" } ++ ++inline void __attribute__((always_inline)) ++normal_asm_callee () { asm volatile ("" ::: "za"); } // { dg-error "inlining failed" } ++ ++arm::new("za") void ++new_caller () ++{ ++ shared_callee (); ++ new_callee (); ++ normal_callee (); ++ ++ shared_asm_callee (); ++ new_asm_callee (); ++ normal_asm_callee (); ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/inlining_8.c b/gcc/testsuite/gcc.target/aarch64/sme/inlining_8.c +new file mode 100644 +index 000000000..fd8a3a61e +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sme/inlining_8.c +@@ -0,0 +1,31 @@ ++/* { dg-options "" } */ ++ ++inline void __attribute__((always_inline)) ++shared_callee () arm::inout("za") {} // { dg-error "inlining failed" } ++ ++arm::new("za") inline void __attribute__((always_inline)) ++new_callee () {} // { dg-error "inlining failed" } ++ ++inline void __attribute__((always_inline)) ++normal_callee () {} ++ ++inline void __attribute__((always_inline)) ++shared_asm_callee () arm::inout("za") { asm volatile ("" ::: "za"); } // { dg-error "inlining failed" } ++ ++arm::new("za") inline void __attribute__((always_inline)) ++new_asm_callee () { asm volatile ("" ::: "za"); } // { dg-error "inlining failed" } ++ ++inline void __attribute__((always_inline)) ++normal_asm_callee () { asm volatile ("" ::: "za"); } ++ ++void ++normal_caller () ++{ ++ shared_callee (); ++ new_callee (); ++ normal_callee (); ++ ++ shared_asm_callee (); ++ new_asm_callee (); ++ normal_asm_callee (); ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/inlining_9.c b/gcc/testsuite/gcc.target/aarch64/sme/inlining_9.c +new file mode 100644 +index 000000000..91520e378 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sme/inlining_9.c +@@ -0,0 +1,55 @@ ++/* { dg-options "" } */ ++ ++#include <arm_neon.h> ++#include <arm_sme.h> ++ ++uint8x16_t *neon; ++svint64_t *sve; ++int64_t *ptr; ++ ++inline void __attribute__((always_inline)) ++call_vadd () ++{ ++ neon4 = vaddq_u8 (neon5, neon6); ++} ++ ++inline void __attribute__((always_inline)) ++call_vbsl () ++{ ++ neon0 = vbslq_u8 (neon1, neon2, neon3); ++} ++ ++inline void __attribute__((always_inline)) ++call_svadd () ++{ ++ *sve = svadd_x (svptrue_b8 (), *sve, 1); ++} ++ ++inline void __attribute__((always_inline)) ++call_svld1_gather () ++{ ++ *sve = svld1_gather_offset (svptrue_b8 (), ptr, *sve); ++} ++ ++inline void __attribute__((always_inline)) ++call_svzero () arm::inout("za") ++{ ++ svzero_za (); ++} ++ ++inline void __attribute__((always_inline)) ++call_svst1_za () arm::streaming, arm::inout("za") // { dg-error "inlining failed" } ++{ ++ svst1_ver_za64 (0, 0, svptrue_b8 (), ptr); ++} ++ ++void ++n_caller () arm::inout("za") ++{ ++ call_vadd (); ++ call_vbsl (); ++ call_svadd (); ++ call_svld1_gather (); ++ call_svzero (); ++ call_svst1_za (); ++} +-- +2.33.0 +
View file
_service:tar_scm:0215-Backport-SME-aarch64-Update-sibcall-handling-for-SME.patch
Added
@@ -0,0 +1,424 @@ +From 08b6cbe756ede25b16b8e9ff9ee32f76c4f8430f Mon Sep 17 00:00:00 2001 +From: Richard Sandiford <richard.sandiford@arm.com> +Date: Tue, 5 Dec 2023 10:11:30 +0000 +Subject: PATCH 116/157 BackportSME aarch64: Update sibcall handling for + SME + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=0e7fee57c00ae17611651e0b057dc03b6e276b82 + +We only support tail calls between functions with the same PSTATE.ZA +setting ("private-ZA" to "private-ZA" and "shared-ZA" to "shared-ZA"). + +Only a normal non-streaming function can tail-call another non-streaming +function, and only a streaming function can tail-call another streaming +function. Any function can tail-call a streaming-compatible function. + +gcc/ + * config/aarch64/aarch64.cc (aarch64_function_ok_for_sibcall): + Enforce PSTATE.SM and PSTATE.ZA restrictions. + (aarch64_expand_epilogue): Save and restore the arguments + to a sibcall around any change to PSTATE.SM. + +gcc/testsuite/ + * gcc.target/aarch64/sme/sibcall_1.c: New test. + * gcc.target/aarch64/sme/sibcall_2.c: Likewise. + * gcc.target/aarch64/sme/sibcall_3.c: Likewise. + * gcc.target/aarch64/sme/sibcall_4.c: Likewise. + * gcc.target/aarch64/sme/sibcall_5.c: Likewise. + * gcc.target/aarch64/sme/sibcall_6.c: Likewise. + * gcc.target/aarch64/sme/sibcall_7.c: Likewise. + * gcc.target/aarch64/sme/sibcall_8.c: Likewise. +--- + gcc/config/aarch64/aarch64.cc | 9 +++- + .../gcc.target/aarch64/sme/sibcall_1.c | 45 +++++++++++++++++++ + .../gcc.target/aarch64/sme/sibcall_2.c | 45 +++++++++++++++++++ + .../gcc.target/aarch64/sme/sibcall_3.c | 45 +++++++++++++++++++ + .../gcc.target/aarch64/sme/sibcall_4.c | 45 +++++++++++++++++++ + .../gcc.target/aarch64/sme/sibcall_5.c | 45 +++++++++++++++++++ + .../gcc.target/aarch64/sme/sibcall_6.c | 26 +++++++++++ + .../gcc.target/aarch64/sme/sibcall_7.c | 26 +++++++++++ + .../gcc.target/aarch64/sme/sibcall_8.c | 19 ++++++++ + 9 files changed, 304 insertions(+), 1 deletion(-) + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/sibcall_1.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/sibcall_2.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/sibcall_3.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/sibcall_4.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/sibcall_5.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/sibcall_6.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/sibcall_7.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/sibcall_8.c + +diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc +index eab94d5c2..b8e540b6e 100644 +--- a/gcc/config/aarch64/aarch64.cc ++++ b/gcc/config/aarch64/aarch64.cc +@@ -8660,6 +8660,11 @@ aarch64_function_ok_for_sibcall (tree, tree exp) + if (crtl->abi->id () != expr_callee_abi (exp).id ()) + return false; + ++ tree fntype = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp))); ++ if (aarch64_fntype_pstate_sm (fntype) & ~aarch64_cfun_incoming_pstate_sm ()) ++ return false; ++ if (aarch64_fntype_pstate_za (fntype) != aarch64_cfun_incoming_pstate_za ()) ++ return false; + return true; + } + +@@ -11923,7 +11928,9 @@ aarch64_expand_epilogue (rtx_call_insn *sibcall) + guard_label = aarch64_guard_switch_pstate_sm (IP0_REGNUM, + aarch64_isa_flags); + aarch64_sme_mode_switch_regs return_switch; +- if (crtl->return_rtx && REG_P (crtl->return_rtx)) ++ if (sibcall) ++ return_switch.add_call_args (sibcall); ++ else if (crtl->return_rtx && REG_P (crtl->return_rtx)) + return_switch.add_reg (GET_MODE (crtl->return_rtx), + REGNO (crtl->return_rtx)); + return_switch.emit_prologue (); +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/sibcall_1.c b/gcc/testsuite/gcc.target/aarch64/sme/sibcall_1.c +new file mode 100644 +index 000000000..c7530de5c +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sme/sibcall_1.c +@@ -0,0 +1,45 @@ ++/* { dg-options "-O2" } */ ++ ++void sc_callee () arm::streaming_compatible; ++void s_callee () arm::streaming; ++void n_callee (); ++ ++arm::locally_streaming __attribute__((noipa)) void ++sc_ls_callee () arm::streaming_compatible {} ++arm::locally_streaming __attribute__((noipa)) void ++n_ls_callee () {} ++ ++void ++sc_to_sc () arm::streaming_compatible ++{ ++ sc_callee (); ++} ++/* { dg-final { scan-assembler {\tb\tsc_callee} } } */ ++ ++void ++sc_to_s () arm::streaming_compatible ++{ ++ s_callee (); ++} ++/* { dg-final { scan-assembler {\tbl\ts_callee} } } */ ++ ++void ++sc_to_n () arm::streaming_compatible ++{ ++ n_callee (); ++} ++/* { dg-final { scan-assembler {\tbl\tn_callee} } } */ ++ ++void ++sc_to_sc_ls () arm::streaming_compatible ++{ ++ sc_ls_callee (); ++} ++/* { dg-final { scan-assembler {\tb\tsc_ls_callee} } } */ ++ ++void ++sc_to_n_ls () arm::streaming_compatible ++{ ++ n_ls_callee (); ++} ++/* { dg-final { scan-assembler {\tbl\tn_ls_callee} } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/sibcall_2.c b/gcc/testsuite/gcc.target/aarch64/sme/sibcall_2.c +new file mode 100644 +index 000000000..8d1c8a9f9 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sme/sibcall_2.c +@@ -0,0 +1,45 @@ ++/* { dg-options "-O2" } */ ++ ++void sc_callee () arm::streaming_compatible; ++void s_callee () arm::streaming; ++void n_callee (); ++ ++arm::locally_streaming __attribute__((noipa)) void ++sc_ls_callee () arm::streaming_compatible {} ++arm::locally_streaming __attribute__((noipa)) void ++n_ls_callee () {} ++ ++void ++s_to_sc () arm::streaming ++{ ++ sc_callee (); ++} ++/* { dg-final { scan-assembler {\tb\tsc_callee} } } */ ++ ++void ++s_to_s () arm::streaming ++{ ++ s_callee (); ++} ++/* { dg-final { scan-assembler {\tb\ts_callee} } } */ ++ ++void ++s_to_n () arm::streaming ++{ ++ n_callee (); ++} ++/* { dg-final { scan-assembler {\tbl\tn_callee} } } */ ++ ++void ++s_to_sc_ls () arm::streaming ++{ ++ sc_ls_callee (); ++} ++/* { dg-final { scan-assembler {\tb\tsc_ls_callee} } } */ ++ ++void ++s_to_n_ls () arm::streaming ++{ ++ n_ls_callee (); ++} ++/* { dg-final { scan-assembler {\tbl\tn_ls_callee} } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/sibcall_3.c b/gcc/testsuite/gcc.target/aarch64/sme/sibcall_3.c +new file mode 100644 +index 000000000..2ae937fc5 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sme/sibcall_3.c +@@ -0,0 +1,45 @@ ++/* { dg-options "-O2" } */ ++ ++void sc_callee () arm::streaming_compatible; ++void s_callee () arm::streaming; ++void n_callee (); ++ ++arm::locally_streaming __attribute__((noipa)) void ++sc_ls_callee () arm::streaming_compatible {} ++arm::locally_streaming __attribute__((noipa)) void ++n_ls_callee () {} ++ ++void ++n_to_sc () ++{ ++ sc_callee (); ++} ++/* { dg-final { scan-assembler {\tb\tsc_callee} } } */ ++ ++void ++n_to_s () ++{ ++ s_callee (); ++} ++/* { dg-final { scan-assembler {\tbl\ts_callee} } } */ ++ ++void ++n_to_n () ++{ ++ n_callee (); ++} ++/* { dg-final { scan-assembler {\tb\tn_callee} } } */ ++ ++void ++n_to_sc_ls () ++{ ++ sc_ls_callee (); ++} ++/* { dg-final { scan-assembler {\tb\tsc_ls_callee} } } */ ++ ++void ++n_to_n_ls () ++{ ++ n_ls_callee (); ++} ++/* { dg-final { scan-assembler {\tb\tn_ls_callee} } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/sibcall_4.c b/gcc/testsuite/gcc.target/aarch64/sme/sibcall_4.c +new file mode 100644 +index 000000000..6935a1bd7 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sme/sibcall_4.c +@@ -0,0 +1,45 @@ ++/* { dg-options "-O2" } */ ++ ++void sc_callee () arm::streaming_compatible; ++void s_callee () arm::streaming; ++void n_callee (); ++ ++arm::locally_streaming __attribute__((noipa)) void ++sc_ls_callee () arm::streaming_compatible {} ++arm::locally_streaming __attribute__((noipa)) void ++n_ls_callee () {} ++ ++arm::locally_streaming void ++sc_to_sc () arm::streaming_compatible ++{ ++ sc_callee (); ++} ++/* { dg-final { scan-assembler {\tb\tsc_callee} } } */ ++ ++arm::locally_streaming void ++sc_to_s () arm::streaming_compatible ++{ ++ s_callee (); ++} ++/* { dg-final { scan-assembler {\tbl\ts_callee} } } */ ++ ++arm::locally_streaming void ++sc_to_n () arm::streaming_compatible ++{ ++ n_callee (); ++} ++/* { dg-final { scan-assembler {\tbl\tn_callee} } } */ ++ ++arm::locally_streaming void ++sc_to_sc_ls () arm::streaming_compatible ++{ ++ sc_ls_callee (); ++} ++/* { dg-final { scan-assembler {\tb\tsc_ls_callee} } } */ ++ ++arm::locally_streaming void ++sc_to_n_ls () arm::streaming_compatible ++{ ++ n_ls_callee (); ++} ++/* { dg-final { scan-assembler {\tbl\tn_ls_callee} } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/sibcall_5.c b/gcc/testsuite/gcc.target/aarch64/sme/sibcall_5.c +new file mode 100644 +index 000000000..7aaf58dfa +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sme/sibcall_5.c +@@ -0,0 +1,45 @@ ++/* { dg-options "-O2" } */ ++ ++void sc_callee () arm::streaming_compatible; ++void s_callee () arm::streaming; ++void n_callee (); ++ ++arm::locally_streaming __attribute__((noipa)) void ++sc_ls_callee () arm::streaming_compatible {} ++arm::locally_streaming __attribute__((noipa)) void ++n_ls_callee () {} ++ ++arm::locally_streaming void ++n_to_sc () ++{ ++ sc_callee (); ++} ++/* { dg-final { scan-assembler {\tb\tsc_callee} } } */ ++ ++arm::locally_streaming void ++n_to_s () ++{ ++ s_callee (); ++} ++/* { dg-final { scan-assembler {\tbl\ts_callee} } } */ ++ ++arm::locally_streaming void ++n_to_n () ++{ ++ n_callee (); ++} ++/* { dg-final { scan-assembler {\tb\tn_callee} } } */ ++ ++arm::locally_streaming void ++n_to_sc_ls () ++{ ++ sc_ls_callee (); ++} ++/* { dg-final { scan-assembler {\tb\tsc_ls_callee} } } */ ++ ++arm::locally_streaming void ++n_to_n_ls () ++{ ++ n_ls_callee (); ++} ++/* { dg-final { scan-assembler {\tb\tn_ls_callee} } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/sibcall_6.c b/gcc/testsuite/gcc.target/aarch64/sme/sibcall_6.c +new file mode 100644 +index 000000000..e568edb17 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sme/sibcall_6.c +@@ -0,0 +1,26 @@ ++/* { dg-options "-O2" } */ ++ ++void shared_callee () arm::inout("za"); ++arm::new("za") __attribute__((noipa)) void new_callee () {} ++void normal_callee (); ++ ++void ++shared_to_shared () arm::inout("za") ++{ ++ shared_callee (); ++} ++/* { dg-final { scan-assembler {\tb\tshared_callee} } } */ ++ ++void ++shared_to_new () arm::inout("za") ++{ ++ new_callee (); ++} ++/* { dg-final { scan-assembler {\tbl\tnew_callee} } } */ ++ ++void ++shared_to_normal () arm::inout("za") ++{ ++ normal_callee (); ++} ++/* { dg-final { scan-assembler {\tbl\tnormal_callee} } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/sibcall_7.c b/gcc/testsuite/gcc.target/aarch64/sme/sibcall_7.c +new file mode 100644 +index 000000000..a5f576d20 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sme/sibcall_7.c +@@ -0,0 +1,26 @@ ++/* { dg-options "-O2" } */ ++ ++void shared_callee () arm::inout("za"); ++arm::new("za") __attribute__((noipa)) void new_callee () {} ++void normal_callee (); ++ ++arm::new("za") void ++new_to_shared () ++{ ++ shared_callee (); ++} ++/* { dg-final { scan-assembler {\tbl\tshared_callee} } } */ ++ ++arm::new("za") void ++new_to_new () ++{ ++ new_callee (); ++} ++/* { dg-final { scan-assembler {\tb\tnew_callee} } } */ ++ ++arm::new("za") void ++new_to_normal () ++{ ++ normal_callee (); ++} ++/* { dg-final { scan-assembler {\tb\tnormal_callee} } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/sibcall_8.c b/gcc/testsuite/gcc.target/aarch64/sme/sibcall_8.c +new file mode 100644 +index 000000000..33370f7a8 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sme/sibcall_8.c +@@ -0,0 +1,19 @@ ++/* { dg-options "-O2" } */ ++ ++void shared_callee () arm::inout("za"); ++arm::new("za") __attribute__((noipa)) void new_callee () {} ++void normal_callee (); ++ ++void ++normal_to_new () ++{ ++ new_callee (); ++} ++/* { dg-final { scan-assembler {\tb\tnew_callee} } } */ ++ ++void ++normal_to_normal () ++{ ++ normal_callee (); ++} ++/* { dg-final { scan-assembler {\tb\tnormal_callee} } } */ +-- +2.33.0 +
View file
_service:tar_scm:0216-Backport-SME-libgcc-aarch64-Configure-check-for-.var.patch
Added
@@ -0,0 +1,117 @@ +From e0da78a258a34c26488b7ae623f9ae8727c2b264 Mon Sep 17 00:00:00 2001 +From: Szabolcs Nagy <szabolcs.nagy@arm.com> +Date: Mon, 14 Nov 2022 17:14:18 +0000 +Subject: PATCH 117/157 BackportSME libgcc: aarch64: Configure check for + .variant_pcs support + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=3ebb591c65b4fbe0cddd23ebc0ca2d9f7aef2cec + +Ideally SME support routines in libgcc are marked as variant PCS symbols +so check if as supports the directive. + +libgcc/ChangeLog: + + * config.in: Undef HAVE_AS_VARIANT_PCS. + * configure: Regenerate. + * configure.ac: Check for .variant_pcs. +--- + libgcc/config.in | 3 +++ + libgcc/configure | 39 +++++++++++++++++++++++++++++++++++++++ + libgcc/configure.ac | 17 +++++++++++++++++ + 3 files changed, 59 insertions(+) + +diff --git a/libgcc/config.in b/libgcc/config.in +index f93c64a00..5dd96cdf6 100644 +--- a/libgcc/config.in ++++ b/libgcc/config.in +@@ -13,6 +13,9 @@ + /* Define to 1 if the assembler supports LSE. */ + #undef HAVE_AS_LSE + ++/* Define to 1 if the assembler supports .variant_pcs. */ ++#undef HAVE_AS_VARIANT_PCS ++ + /* Define to 1 if the target assembler supports thread-local storage. */ + #undef HAVE_CC_TLS + +diff --git a/libgcc/configure b/libgcc/configure +index 1f9b2ac57..afe02b303 100755 +--- a/libgcc/configure ++++ b/libgcc/configure +@@ -5619,6 +5619,45 @@ $as_echo "#define HAVE_AS_LSE 1" >>confdefs.h + ;; + esac + ++ ++ ++case "${target}" in ++aarch64*-*-*) ++ { $as_echo "$as_me:${as_lineno-$LINENO}: checking if as supports .variant_pcs" >&5 ++$as_echo_n "checking if as supports .variant_pcs... " >&6; } ++if ${libgcc_cv_as_variant_pcs+:} false; then : ++ $as_echo_n "(cached) " >&6 ++else ++ ++ cat confdefs.h - <<_ACEOF >conftest.$ac_ext ++/* end confdefs.h. */ ++ ++int ++main () ++{ ++asm (".variant_pcs foobar"); ++ ; ++ return 0; ++} ++_ACEOF ++if ac_fn_c_try_compile "$LINENO"; then : ++ libgcc_cv_as_variant_pcs=yes ++else ++ libgcc_cv_as_variant_pcs=no ++fi ++rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext ++ ++fi ++{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $libgcc_cv_as_variant_pcs" >&5 ++$as_echo "$libgcc_cv_as_variant_pcs" >&6; } ++ if test x$libgcc_cv_as_variant_pcs = xyes; then ++ ++$as_echo "#define HAVE_AS_VARIANT_PCS 1" >>confdefs.h ++ ++ fi ++ ;; ++esac ++ + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for init priority support" >&5 + $as_echo_n "checking for init priority support... " >&6; } + if ${libgcc_cv_init_priority+:} false; then : +diff --git a/libgcc/configure.ac b/libgcc/configure.ac +index 2fc9d5d7c..abc398c91 100644 +--- a/libgcc/configure.ac ++++ b/libgcc/configure.ac +@@ -648,6 +648,23 @@ changequote(,)dnl + esac) + LIBGCC_CHECK_AS_LSE + ++dnl Check if as supports .variant_pcs. ++AC_DEFUN(LIBGCC_CHECK_AS_VARIANT_PCS, ++case "${target}" in ++aarch64*-*-*) ++ AC_CACHE_CHECK(if as supports .variant_pcs, libgcc_cv_as_variant_pcs, ++ AC_COMPILE_IFELSE(AC_LANG_PROGRAM(, ++ asm (".variant_pcs foobar");), ++ libgcc_cv_as_variant_pcs=yes, libgcc_cv_as_variant_pcs=no) ++ ) ++ if test x$libgcc_cv_as_variant_pcs = xyes; then ++ AC_DEFINE(HAVE_AS_VARIANT_PCS, 1, ++ Define to 1 if the assembler supports .variant_pcs.) ++ fi ++ ;; ++esac) ++LIBGCC_CHECK_AS_VARIANT_PCS ++ + dnl Check if as supports RTM instructions. + AC_CACHE_CHECK(for init priority support, libgcc_cv_init_priority, + AC_COMPILE_IFELSE(AC_LANG_PROGRAM(, +-- +2.33.0 +
View file
_service:tar_scm:0217-Backport-SME-libgcc-aarch64-Configure-check-for-__ge.patch
Added
@@ -0,0 +1,117 @@ +From 66d4035958e1dee2d16f9290004921674eb492b3 Mon Sep 17 00:00:00 2001 +From: Szabolcs Nagy <szabolcs.nagy@arm.com> +Date: Mon, 4 Dec 2023 10:52:52 +0000 +Subject: PATCH 118/157 BackportSME libgcc: aarch64: Configure check for + __getauxval + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=dbbfb52b0e9c66ee9d05b8fd17c4f44655e48463 + +Add configure check for the __getauxval ABI symbol, which is always +available on aarch64 glibc, and may be available on other linux C +runtimes. For now only enabled on glibc, others have to override it + + target_configargs=libgcc_cv_have___getauxval=yes + +This is deliberately obscure as it should be auto detected, ideally +via a feature test macro in unistd.h (link time detection is not +possible since the libc may not be installed at libgcc build time), +but currently there is no such feature test mechanism. + +Without __getauxval, libgcc cannot do runtime CPU feature detection +and has to assume only the build time known features are available. + +libgcc/ChangeLog: + + * config.in: Undef HAVE___GETAUXVAL. + * configure: Regenerate. + * configure.ac: Check for __getauxval. +--- + libgcc/config.in | 3 +++ + libgcc/configure | 26 ++++++++++++++++++++++++++ + libgcc/configure.ac | 19 +++++++++++++++++++ + 3 files changed, 48 insertions(+) + +diff --git a/libgcc/config.in b/libgcc/config.in +index 5dd96cdf6..441d4d39b 100644 +--- a/libgcc/config.in ++++ b/libgcc/config.in +@@ -16,6 +16,9 @@ + /* Define to 1 if the assembler supports .variant_pcs. */ + #undef HAVE_AS_VARIANT_PCS + ++/* Define to 1 if __getauxval is available. */ ++#undef HAVE___GETAUXVAL ++ + /* Define to 1 if the target assembler supports thread-local storage. */ + #undef HAVE_CC_TLS + +diff --git a/libgcc/configure b/libgcc/configure +index afe02b303..a874ef57e 100755 +--- a/libgcc/configure ++++ b/libgcc/configure +@@ -5658,6 +5658,32 @@ $as_echo "#define HAVE_AS_VARIANT_PCS 1" >>confdefs.h + ;; + esac + ++# Check __getauxval ABI symbol for CPU feature detection. ++case ${target} in ++aarch64*-linux-*) ++ # No link check because the libc may not be present. ++ { $as_echo "$as_me:${as_lineno-$LINENO}: checking for __getauxval" >&5 ++$as_echo_n "checking for __getauxval... " >&6; } ++if ${libgcc_cv_have___getauxval+:} false; then : ++ $as_echo_n "(cached) " >&6 ++else ++ case ${target} in ++ *-linux-gnu*) ++ libgcc_cv_have___getauxval=yes ++ ;; ++ *) ++ libgcc_cv_have___getauxval=no ++ esac ++fi ++{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $libgcc_cv_have___getauxval" >&5 ++$as_echo "$libgcc_cv_have___getauxval" >&6; } ++ if test x$libgcc_cv_have___getauxval = xyes; then ++ ++$as_echo "#define HAVE___GETAUXVAL 1" >>confdefs.h ++ ++ fi ++esac ++ + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for init priority support" >&5 + $as_echo_n "checking for init priority support... " >&6; } + if ${libgcc_cv_init_priority+:} false; then : +diff --git a/libgcc/configure.ac b/libgcc/configure.ac +index abc398c91..64b45ae14 100644 +--- a/libgcc/configure.ac ++++ b/libgcc/configure.ac +@@ -665,6 +665,25 @@ aarch64*-*-*) + esac) + LIBGCC_CHECK_AS_VARIANT_PCS + ++# Check __getauxval ABI symbol for CPU feature detection. ++case ${target} in ++aarch64*-linux-*) ++ # No link check because the libc may not be present. ++ AC_CACHE_CHECK(for __getauxval, ++ libgcc_cv_have___getauxval, ++ case ${target} in ++ *-linux-gnu*) ++ libgcc_cv_have___getauxval=yes ++ ;; ++ *) ++ libgcc_cv_have___getauxval=no ++ esac) ++ if test x$libgcc_cv_have___getauxval = xyes; then ++ AC_DEFINE(HAVE___GETAUXVAL, 1, ++ Define to 1 if __getauxval is available.) ++ fi ++esac ++ + dnl Check if as supports RTM instructions. + AC_CACHE_CHECK(for init priority support, libgcc_cv_init_priority, + AC_COMPILE_IFELSE(AC_LANG_PROGRAM(, +-- +2.33.0 +
View file
_service:tar_scm:0218-Backport-SME-libgcc-aarch64-Add-SME-runtime-support.patch
Added
@@ -0,0 +1,627 @@ +From 1e111ac2d71c5469dc526559de009542acaeb16f Mon Sep 17 00:00:00 2001 +From: Szabolcs Nagy <szabolcs.nagy@arm.com> +Date: Tue, 15 Nov 2022 14:08:55 +0000 +Subject: PATCH 119/157 BackportSME libgcc: aarch64: Add SME runtime + support + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=328c17af772207cb03740809c05ba2c3abfb86be + +The call ABI for SME (Scalable Matrix Extension) requires a number of +helper routines which are added to libgcc so they are tied to the +compiler version instead of the libc version. See +https://github.com/ARM-software/abi-aa/blob/main/aapcs64/aapcs64.rst#sme-support-routines + +The routines are in shared libgcc and static libgcc eh, even though +they are not related to exception handling. This is to avoid linking +a copy of the routines into dynamic linked binaries, because TPIDR2_EL0 +block can be extended in the future which is better to handle in a +single place per process. + +The support routines have to decide if SME is accessible or not. Linux +tells userspace if SME is accessible via AT_HWCAP2, otherwise a new +__aarch64_sme_accessible symbol was introduced that a libc can define. +Due to libgcc and libc build order, the symbol availability cannot be +checked so for __aarch64_sme_accessible an unistd.h feature test macro +is used while such detection mechanism is not available for __getauxval +so we rely on configure checks based on the target triplet. + +Asm helper code is added to make writing the routines easier. + +libgcc/ChangeLog: + + * config/aarch64/t-aarch64: Add sources to the build. + * config/aarch64/__aarch64_have_sme.c: New file. + * config/aarch64/__arm_sme_state.S: New file. + * config/aarch64/__arm_tpidr2_restore.S: New file. + * config/aarch64/__arm_tpidr2_save.S: New file. + * config/aarch64/__arm_za_disable.S: New file. + * config/aarch64/aarch64-asm.h: New file. + * config/aarch64/libgcc-sme.ver: New file. +--- + libgcc/config/aarch64/__aarch64_have_sme.c | 75 ++++++++++++++ + libgcc/config/aarch64/__arm_sme_state.S | 55 ++++++++++ + libgcc/config/aarch64/__arm_tpidr2_restore.S | 89 ++++++++++++++++ + libgcc/config/aarch64/__arm_tpidr2_save.S | 101 +++++++++++++++++++ + libgcc/config/aarch64/__arm_za_disable.S | 65 ++++++++++++ + libgcc/config/aarch64/aarch64-asm.h | 98 ++++++++++++++++++ + libgcc/config/aarch64/libgcc-sme.ver | 24 +++++ + libgcc/config/aarch64/t-aarch64 | 10 ++ + 8 files changed, 517 insertions(+) + create mode 100644 libgcc/config/aarch64/__aarch64_have_sme.c + create mode 100644 libgcc/config/aarch64/__arm_sme_state.S + create mode 100644 libgcc/config/aarch64/__arm_tpidr2_restore.S + create mode 100644 libgcc/config/aarch64/__arm_tpidr2_save.S + create mode 100644 libgcc/config/aarch64/__arm_za_disable.S + create mode 100644 libgcc/config/aarch64/aarch64-asm.h + create mode 100644 libgcc/config/aarch64/libgcc-sme.ver + +diff --git a/libgcc/config/aarch64/__aarch64_have_sme.c b/libgcc/config/aarch64/__aarch64_have_sme.c +new file mode 100644 +index 000000000..5e6492462 +--- /dev/null ++++ b/libgcc/config/aarch64/__aarch64_have_sme.c +@@ -0,0 +1,75 @@ ++/* Initializer for SME support. ++ Copyright (C) 2023 Free Software Foundation, Inc. ++ ++ This file is part of GCC. ++ ++ GCC is free software; you can redistribute it and/or modify it ++ under the terms of the GNU General Public License as published ++ by the Free Software Foundation; either version 3, or (at your ++ option) any later version. ++ ++ GCC is distributed in the hope that it will be useful, but WITHOUT ++ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY ++ or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public ++ License for more details. ++ ++ Under Section 7 of GPL version 3, you are granted additional ++ permissions described in the GCC Runtime Library Exception, version ++ 3.1, as published by the Free Software Foundation. ++ ++ You should have received a copy of the GNU General Public License and ++ a copy of the GCC Runtime Library Exception along with this program; ++ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see ++ <http://www.gnu.org/licenses/>. */ ++ ++#include "auto-target.h" ++ ++#ifndef inhibit_libc ++/* For libc feature test macros. */ ++# include <unistd.h> ++#endif ++ ++#if __ARM_FEATURE_SME ++/* Avoid runtime SME detection if libgcc is built with SME. */ ++# define HAVE_SME_CONST const ++# define HAVE_SME_VALUE 1 ++#elif HAVE___GETAUXVAL ++/* SME access detection on Linux. */ ++# define HAVE_SME_CONST ++# define HAVE_SME_VALUE 0 ++# define HAVE_SME_CTOR sme_accessible () ++ ++# define AT_HWCAP2 26 ++# define HWCAP2_SME (1 << 23) ++unsigned long int __getauxval (unsigned long int); ++ ++static _Bool ++sme_accessible (void) ++{ ++ unsigned long hwcap2 = __getauxval (AT_HWCAP2); ++ return (hwcap2 & HWCAP2_SME) != 0; ++} ++#elif __LIBC___AARCH64_SME_ACCESSIBLE ++/* Alternative SME access detection. */ ++# define HAVE_SME_CONST ++# define HAVE_SME_VALUE 0 ++# define HAVE_SME_CTOR __aarch64_sme_accessible () ++_Bool __aarch64_sme_accessible (void); ++#else ++# define HAVE_SME_CONST const ++# define HAVE_SME_VALUE 0 ++#endif ++ ++/* Define the symbol gating SME support in libgcc. */ ++HAVE_SME_CONST _Bool __aarch64_have_sme ++ __attribute__((visibility("hidden"), nocommon)) = HAVE_SME_VALUE; ++ ++#ifdef HAVE_SME_CTOR ++/* Use a higher priority to ensure it runs before user constructors ++ with priority 100. */ ++static void __attribute__((constructor (90))) ++init_have_sme (void) ++{ ++ __aarch64_have_sme = HAVE_SME_CTOR; ++} ++#endif +diff --git a/libgcc/config/aarch64/__arm_sme_state.S b/libgcc/config/aarch64/__arm_sme_state.S +new file mode 100644 +index 000000000..c4e16cac0 +--- /dev/null ++++ b/libgcc/config/aarch64/__arm_sme_state.S +@@ -0,0 +1,55 @@ ++/* Support routine for SME. ++ Copyright (C) 2023 Free Software Foundation, Inc. ++ ++ This file is part of GCC. ++ ++ GCC is free software; you can redistribute it and/or modify it ++ under the terms of the GNU General Public License as published ++ by the Free Software Foundation; either version 3, or (at your ++ option) any later version. ++ ++ GCC is distributed in the hope that it will be useful, but WITHOUT ++ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY ++ or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public ++ License for more details. ++ ++ Under Section 7 of GPL version 3, you are granted additional ++ permissions described in the GCC Runtime Library Exception, version ++ 3.1, as published by the Free Software Foundation. ++ ++ You should have received a copy of the GNU General Public License and ++ a copy of the GCC Runtime Library Exception along with this program; ++ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see ++ <http://www.gnu.org/licenses/>. */ ++ ++#include "aarch64-asm.h" ++ ++/* Query SME state. Call ABI: ++ - Private ZA, streaming-compatible. ++ - x2-x15, x19-x29, sp and fp regs are call preserved. ++ - Takes no argument. ++ - Returns SME state in x0 and TPIDR2_EL0 in x1. */ ++ ++.hidden __aarch64_have_sme ++ ++variant_pcs (__arm_sme_state) ++ ++ENTRY (__arm_sme_state) ++ /* Check if SME is available. */ ++ adrp x1, __aarch64_have_sme ++ ldrb w1, x1, :lo12:__aarch64_have_sme ++ cbz w1, L(nosme) ++ ++ /* Expose the bottom 2 bits of svcr (SM, ZA) in x0 and set the ++ top 2 bits indicating that SME and TPIDR2_EL0 are available. */ ++ .inst 0xd53b4240 /* mrs x0, svcr */ ++ .inst 0xd53bd0a1 /* mrs x1, tpidr2_el0 */ ++ and x0, x0, 3 ++ orr x0, x0, 0xc000000000000000 ++ ret ++ ++L(nosme): ++ mov x0, 0 ++ mov x1, 0 ++ ret ++END (__arm_sme_state) +diff --git a/libgcc/config/aarch64/__arm_tpidr2_restore.S b/libgcc/config/aarch64/__arm_tpidr2_restore.S +new file mode 100644 +index 000000000..4569d04a2 +--- /dev/null ++++ b/libgcc/config/aarch64/__arm_tpidr2_restore.S +@@ -0,0 +1,89 @@ ++/* Support routine for SME. ++ Copyright (C) 2023 Free Software Foundation, Inc. ++ ++ This file is part of GCC. ++ ++ GCC is free software; you can redistribute it and/or modify it ++ under the terms of the GNU General Public License as published ++ by the Free Software Foundation; either version 3, or (at your ++ option) any later version. ++ ++ GCC is distributed in the hope that it will be useful, but WITHOUT ++ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY ++ or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public ++ License for more details. ++ ++ Under Section 7 of GPL version 3, you are granted additional ++ permissions described in the GCC Runtime Library Exception, version ++ 3.1, as published by the Free Software Foundation. ++ ++ You should have received a copy of the GNU General Public License and ++ a copy of the GCC Runtime Library Exception along with this program; ++ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see ++ <http://www.gnu.org/licenses/>. */ ++ ++#include "aarch64-asm.h" ++ ++/* Used for lazy ZA restore. Call ABI: ++ - Shared ZA, streaming-compatible. ++ - x0 is a pointer to a TPIDR2 block. ++ - x0-x13, x19-x29, sp and fp regs are call preserved. ++ - Does not return a value. ++ - Can abort on failure (then registers are not preserved). */ ++ ++variant_pcs (__arm_tpidr2_restore) ++ ++ENTRY (__arm_tpidr2_restore) ++ .inst 0xd53bd0ae /* mrs x14, tpidr2_el0 */ ++ cbnz x14, L(fail) ++ ++ /* check reserved bytes. */ ++ ldrh w15, x0, 10 ++ ldr w16, x0, 12 ++ orr w15, w15, w16 ++ cbnz w15, L(fail) ++ ++ ldr x16, x0 ++ cbz x16, L(end) ++ ldrh w17, x0, 8 ++ cbz w17, L(end) ++ ++ /* x0: blk, x14: 0, x15: 0, ++ x16: za_save_buffer, x17: num_za_save_slices. */ ++ ++L(restore_loop): ++ .inst 0xe1006200 /* ldr zaw15, 0, x16 */ ++ .inst 0xe1006201 /* ldr zaw15, 1, x16, 1, mul vl */ ++ .inst 0xe1006202 /* ldr zaw15, 2, x16, 2, mul vl */ ++ .inst 0xe1006203 /* ldr zaw15, 3, x16, 3, mul vl */ ++ .inst 0xe1006204 /* ldr zaw15, 4, x16, 4, mul vl */ ++ .inst 0xe1006205 /* ldr zaw15, 5, x16, 5, mul vl */ ++ .inst 0xe1006206 /* ldr zaw15, 6, x16, 6, mul vl */ ++ .inst 0xe1006207 /* ldr zaw15, 7, x16, 7, mul vl */ ++ .inst 0xe1006208 /* ldr zaw15, 8, x16, 8, mul vl */ ++ .inst 0xe1006209 /* ldr zaw15, 9, x16, 9, mul vl */ ++ .inst 0xe100620a /* ldr zaw15, 10, x16, 10, mul vl */ ++ .inst 0xe100620b /* ldr zaw15, 11, x16, 11, mul vl */ ++ .inst 0xe100620c /* ldr zaw15, 12, x16, 12, mul vl */ ++ .inst 0xe100620d /* ldr zaw15, 13, x16, 13, mul vl */ ++ .inst 0xe100620e /* ldr zaw15, 14, x16, 14, mul vl */ ++ .inst 0xe100620f /* ldr zaw15, 15, x16, 15, mul vl */ ++ add w15, w15, 16 ++ .inst 0x04305a10 /* addsvl x16, x16, 16 */ ++ cmp w17, w15 ++ bhi L(restore_loop) ++L(end): ++ ret ++L(fail): ++ PACIASP ++ stp x29, x30, sp, -32! ++ .cfi_adjust_cfa_offset 32 ++ .cfi_rel_offset x29, 0 ++ .cfi_rel_offset x30, 8 ++ mov x29, sp ++ .inst 0x04e0e3f0 /* cntd x16 */ ++ str x16, sp, 16 ++ .cfi_rel_offset 46, 16 ++ .inst 0xd503467f /* smstop */ ++ bl abort ++END (__arm_tpidr2_restore) +diff --git a/libgcc/config/aarch64/__arm_tpidr2_save.S b/libgcc/config/aarch64/__arm_tpidr2_save.S +new file mode 100644 +index 000000000..879cf7980 +--- /dev/null ++++ b/libgcc/config/aarch64/__arm_tpidr2_save.S +@@ -0,0 +1,101 @@ ++/* Support routine for SME. ++ Copyright (C) 2023 Free Software Foundation, Inc. ++ ++ This file is part of GCC. ++ ++ GCC is free software; you can redistribute it and/or modify it ++ under the terms of the GNU General Public License as published ++ by the Free Software Foundation; either version 3, or (at your ++ option) any later version. ++ ++ GCC is distributed in the hope that it will be useful, but WITHOUT ++ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY ++ or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public ++ License for more details. ++ ++ Under Section 7 of GPL version 3, you are granted additional ++ permissions described in the GCC Runtime Library Exception, version ++ 3.1, as published by the Free Software Foundation. ++ ++ You should have received a copy of the GNU General Public License and ++ a copy of the GCC Runtime Library Exception along with this program; ++ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see ++ <http://www.gnu.org/licenses/>. */ ++ ++#include "aarch64-asm.h" ++ ++/* Used for lazy ZA save. Call ABI: ++ - Private ZA, streaming-compatible. ++ - x0-x13, x19-x29, sp and fp regs are call preserved. ++ - Takes no argument. ++ - Does not return a value. ++ - Can abort on failure (then registers are not preserved). */ ++ ++.hidden __aarch64_have_sme ++ ++variant_pcs (__arm_tpidr2_save) ++ ++ENTRY (__arm_tpidr2_save) ++ /* Check if SME is available. */ ++ adrp x14, __aarch64_have_sme ++ ldrb w14, x14, :lo12:__aarch64_have_sme ++ cbz w14, L(end) ++ ++ .inst 0xd53bd0ae /* mrs x14, tpidr2_el0 */ ++ cbz x14, L(end) ++ ++ /* check reserved bytes. */ ++ ldrh w15, x14, 10 ++ ldr w16, x14, 12 ++ orr w15, w15, w16 ++ cbnz w15, L(fail) ++ ++ ldr x16, x14 ++ cbz x16, L(end) ++ ldrh w17, x14, 8 ++ cbz w17, L(end) ++ ++ /* x14: tpidr2, x15: 0, ++ x16: za_save_buffer, x17: num_za_save_slices. */ ++ ++L(save_loop): ++ .inst 0xe1206200 /* str zaw15, 0, x16 */ ++ .inst 0xe1206201 /* str zaw15, 1, x16, 1, mul vl */ ++ .inst 0xe1206202 /* str zaw15, 2, x16, 2, mul vl */ ++ .inst 0xe1206203 /* str zaw15, 3, x16, 3, mul vl */ ++ .inst 0xe1206204 /* str zaw15, 4, x16, 4, mul vl */ ++ .inst 0xe1206205 /* str zaw15, 5, x16, 5, mul vl */ ++ .inst 0xe1206206 /* str zaw15, 6, x16, 6, mul vl */ ++ .inst 0xe1206207 /* str zaw15, 7, x16, 7, mul vl */ ++ .inst 0xe1206208 /* str zaw15, 8, x16, 8, mul vl */ ++ .inst 0xe1206209 /* str zaw15, 9, x16, 9, mul vl */ ++ .inst 0xe120620a /* str zaw15, 10, x16, 10, mul vl */ ++ .inst 0xe120620b /* str zaw15, 11, x16, 11, mul vl */ ++ .inst 0xe120620c /* str zaw15, 12, x16, 12, mul vl */ ++ .inst 0xe120620d /* str zaw15, 13, x16, 13, mul vl */ ++ .inst 0xe120620e /* str zaw15, 14, x16, 14, mul vl */ ++ .inst 0xe120620f /* str zaw15, 15, x16, 15, mul vl */ ++ add w15, w15, 16 ++ .inst 0x04305a10 /* addsvl x16, x16, 16 */ ++ cmp w17, w15 ++ bhi L(save_loop) ++L(end): ++ ret ++L(fail): ++ PACIASP ++ stp x29, x30, sp, -32! ++ .cfi_adjust_cfa_offset 32 ++ .cfi_rel_offset x29, 0 ++ .cfi_rel_offset x30, 8 ++ mov x29, sp ++ .inst 0x04e0e3f0 /* cntd x16 */ ++ str x16, sp, 16 ++ .cfi_rel_offset 46, 16 ++ .inst 0xd503467f /* smstop */ ++ bl abort ++END (__arm_tpidr2_save) ++ ++/* Hidden alias used by __arm_za_disable. */ ++.global __libgcc_arm_tpidr2_save ++.hidden __libgcc_arm_tpidr2_save ++.set __libgcc_arm_tpidr2_save, __arm_tpidr2_save +diff --git a/libgcc/config/aarch64/__arm_za_disable.S b/libgcc/config/aarch64/__arm_za_disable.S +new file mode 100644 +index 000000000..cff5b9cec +--- /dev/null ++++ b/libgcc/config/aarch64/__arm_za_disable.S +@@ -0,0 +1,65 @@ ++/* Support routine for SME. ++ Copyright (C) 2023 Free Software Foundation, Inc. ++ ++ This file is part of GCC. ++ ++ GCC is free software; you can redistribute it and/or modify it ++ under the terms of the GNU General Public License as published ++ by the Free Software Foundation; either version 3, or (at your ++ option) any later version. ++ ++ GCC is distributed in the hope that it will be useful, but WITHOUT ++ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY ++ or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public ++ License for more details. ++ ++ Under Section 7 of GPL version 3, you are granted additional ++ permissions described in the GCC Runtime Library Exception, version ++ 3.1, as published by the Free Software Foundation. ++ ++ You should have received a copy of the GNU General Public License and ++ a copy of the GCC Runtime Library Exception along with this program; ++ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see ++ <http://www.gnu.org/licenses/>. */ ++ ++#include "aarch64-asm.h" ++ ++/* Disable ZA. Call ABI: ++ - Private ZA, streaming-compatible. ++ - x0-x13, x19-x29, sp and fp regs are call preserved. ++ - Takes no argument. ++ - Does not return a value. ++ - Can abort on failure (then registers are not preserved). */ ++ ++.hidden __aarch64_have_sme ++ ++.hidden __libgcc_arm_tpidr2_save ++ ++variant_pcs (__arm_za_disable) ++ ++ENTRY (__arm_za_disable) ++ /* Check if SME is available. */ ++ adrp x14, __aarch64_have_sme ++ ldrb w14, x14, :lo12:__aarch64_have_sme ++ cbz w14, L(end) ++ ++ .inst 0xd53bd0ae /* mrs x14, tpidr2_el0 */ ++ cbz x14, L(end) ++ ++ PACIASP ++ stp x29, x30, sp, -16! ++ .cfi_adjust_cfa_offset 16 ++ .cfi_rel_offset x29, 0 ++ .cfi_rel_offset x30, 8 ++ mov x29, sp ++ bl __libgcc_arm_tpidr2_save ++ .inst 0xd51bd0bf /* msr tpidr2_el0, xzr */ ++ .inst 0xd503447f /* smstop za */ ++ ldp x29, x30, sp, 16 ++ .cfi_adjust_cfa_offset -16 ++ .cfi_restore x29 ++ .cfi_restore x30 ++ AUTIASP ++L(end): ++ ret ++END (__arm_za_disable) +diff --git a/libgcc/config/aarch64/aarch64-asm.h b/libgcc/config/aarch64/aarch64-asm.h +new file mode 100644 +index 000000000..8969b06b0 +--- /dev/null ++++ b/libgcc/config/aarch64/aarch64-asm.h +@@ -0,0 +1,98 @@ ++/* AArch64 asm definitions. ++ Copyright (C) 2023 Free Software Foundation, Inc. ++ ++ This file is part of GCC. ++ ++ GCC is free software; you can redistribute it and/or modify it ++ under the terms of the GNU General Public License as published ++ by the Free Software Foundation; either version 3, or (at your ++ option) any later version. ++ ++ GCC is distributed in the hope that it will be useful, but WITHOUT ++ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY ++ or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public ++ License for more details. ++ ++ Under Section 7 of GPL version 3, you are granted additional ++ permissions described in the GCC Runtime Library Exception, version ++ 3.1, as published by the Free Software Foundation. ++ ++ You should have received a copy of the GNU General Public License and ++ a copy of the GCC Runtime Library Exception along with this program; ++ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see ++ <http://www.gnu.org/licenses/>. */ ++ ++#include "auto-target.h" ++ ++#define L(label) .L ## label ++ ++/* Marking variant PCS symbol references is important for PLT calls ++ otherwise it is for documenting the PCS in the symbol table. */ ++#ifdef HAVE_AS_VARIANT_PCS ++# define variant_pcs(name) .variant_pcs name ++#else ++# define variant_pcs(name) ++#endif ++ ++/* GNU_PROPERTY_AARCH64_* macros from elf.h for use in asm code. */ ++#define FEATURE_1_AND 0xc0000000 ++#define FEATURE_1_BTI 1 ++#define FEATURE_1_PAC 2 ++ ++/* Supported features based on the code generation options. */ ++#if defined(__ARM_FEATURE_BTI_DEFAULT) ++# define BTI_FLAG FEATURE_1_BTI ++# define BTI_C hint 34 ++#else ++# define BTI_FLAG 0 ++# define BTI_C ++#endif ++ ++#if __ARM_FEATURE_PAC_DEFAULT & 3 ++# define PAC_FLAG FEATURE_1_PAC ++# define PACIASP hint 25; .cfi_window_save ++# define AUTIASP hint 29; .cfi_window_save ++#else ++# define PAC_FLAG 0 ++# define PACIASP ++# define AUTIASP ++#endif ++ ++/* Add a NT_GNU_PROPERTY_TYPE_0 note. */ ++#define GNU_PROPERTY(type, value) \ ++ .section .note.gnu.property, "a"; \ ++ .p2align 3; \ ++ .word 4; \ ++ .word 16; \ ++ .word 5; \ ++ .asciz "GNU"; \ ++ .word type; \ ++ .word 4; \ ++ .word value; \ ++ .word 0; \ ++ .previous ++ ++#if defined(__linux__) || defined(__FreeBSD__) ++/* Do not require executable stack. */ ++.section .note.GNU-stack, "", %progbits ++.previous ++ ++/* Add GNU property note if built with branch protection. */ ++# if (BTI_FLAG|PAC_FLAG) != 0 ++GNU_PROPERTY (FEATURE_1_AND, BTI_FLAG|PAC_FLAG) ++# endif ++#endif ++ ++#define ENTRY_ALIGN(name, align) \ ++ .global name; \ ++ .type name,%function; \ ++ .balign align; \ ++ name: \ ++ .cfi_startproc; \ ++ BTI_C ++ ++#define ENTRY(name) ENTRY_ALIGN(name, 16) ++ ++#define END(name) \ ++ .cfi_endproc; \ ++ .size name, .-name +diff --git a/libgcc/config/aarch64/libgcc-sme.ver b/libgcc/config/aarch64/libgcc-sme.ver +new file mode 100644 +index 000000000..da889c6c0 +--- /dev/null ++++ b/libgcc/config/aarch64/libgcc-sme.ver +@@ -0,0 +1,24 @@ ++# Copyright (C) 2023 Free Software Foundation, Inc. ++# ++# This file is part of GCC. ++# ++# GCC is free software; you can redistribute it and/or modify ++# it under the terms of the GNU General Public License as published by ++# the Free Software Foundation; either version 3, or (at your option) ++# any later version. ++# ++# GCC is distributed in the hope that it will be useful, ++# but WITHOUT ANY WARRANTY; without even the implied warranty of ++# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++# GNU General Public License for more details. ++# ++# You should have received a copy of the GNU General Public License ++# along with GCC; see the file COPYING3. If not see ++# <http://www.gnu.org/licenses/>. ++ ++GCC_14.0 { ++ __arm_sme_state ++ __arm_tpidr2_restore ++ __arm_tpidr2_save ++ __arm_za_disable ++} +diff --git a/libgcc/config/aarch64/t-aarch64 b/libgcc/config/aarch64/t-aarch64 +index 8ca803bd3..5a8feb184 100644 +--- a/libgcc/config/aarch64/t-aarch64 ++++ b/libgcc/config/aarch64/t-aarch64 +@@ -19,3 +19,13 @@ + # <http://www.gnu.org/licenses/>. + + LIB2ADD += $(srcdir)/config/aarch64/sync-cache.c ++ ++# Add sme runtime to shared libgcc ++LIB2ADDEH += \ ++ $(srcdir)/config/aarch64/__aarch64_have_sme.c \ ++ $(srcdir)/config/aarch64/__arm_sme_state.S \ ++ $(srcdir)/config/aarch64/__arm_tpidr2_restore.S \ ++ $(srcdir)/config/aarch64/__arm_tpidr2_save.S \ ++ $(srcdir)/config/aarch64/__arm_za_disable.S ++ ++SHLIB_MAPFILES += $(srcdir)/config/aarch64/libgcc-sme.ver +-- +2.33.0 +
View file
_service:tar_scm:0219-Backport-SME-libgcc-aarch64-Add-SME-unwinder-support.patch
Added
@@ -0,0 +1,70 @@ +From 310c8b5aaedad1430146fed9d8992201278164a6 Mon Sep 17 00:00:00 2001 +From: Szabolcs Nagy <szabolcs.nagy@arm.com> +Date: Fri, 29 Sep 2023 13:55:51 +0100 +Subject: PATCH 120/157 BackportSME libgcc: aarch64: Add SME unwinder + support + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=91d68665b8b7a5dffd0bbf8cd1f74c3c41d4c2d8 + +To support the ZA lazy save scheme, the PCS requires the unwinder to +reset the SME state to PSTATE.SM=0, PSTATE.ZA=0, TPIDR2_EL0=0 on entry +to an exception handler. We use the __arm_za_disable SME runtime call +unconditionally to achieve this. +https://github.com/ARM-software/abi-aa/blob/main/aapcs64/aapcs64.rst#exceptions + +The hidden alias is used to avoid a PLT and avoid inconsistent VPCS +marking (we don't rely on special PCS at the call site). In case of +static linking the SME runtime init code is linked in code that raises +exceptions. + +libgcc/ChangeLog: + + * config/aarch64/__arm_za_disable.S: Add hidden alias. + * config/aarch64/aarch64-unwind.h: Reset the SME state before + EH return via the _Unwind_Frames_Extra hook. +--- + libgcc/config/aarch64/__arm_za_disable.S | 5 +++++ + libgcc/config/aarch64/aarch64-unwind.h | 16 ++++++++++++++++ + 2 files changed, 21 insertions(+) + +diff --git a/libgcc/config/aarch64/__arm_za_disable.S b/libgcc/config/aarch64/__arm_za_disable.S +index cff5b9cec..03fc28a39 100644 +--- a/libgcc/config/aarch64/__arm_za_disable.S ++++ b/libgcc/config/aarch64/__arm_za_disable.S +@@ -63,3 +63,8 @@ ENTRY (__arm_za_disable) + L(end): + ret + END (__arm_za_disable) ++ ++/* Hidden alias used by the unwinder. */ ++.global __libgcc_arm_za_disable ++.hidden __libgcc_arm_za_disable ++.set __libgcc_arm_za_disable, __arm_za_disable +diff --git a/libgcc/config/aarch64/aarch64-unwind.h b/libgcc/config/aarch64/aarch64-unwind.h +index 40b22d3c2..bfa695dcb 100644 +--- a/libgcc/config/aarch64/aarch64-unwind.h ++++ b/libgcc/config/aarch64/aarch64-unwind.h +@@ -87,4 +87,20 @@ aarch64_frob_update_context (struct _Unwind_Context *context, + return; + } + ++/* SME runtime function local to libgcc, streaming compatible ++ and preserves more registers than the base PCS requires, but ++ we don't rely on that here. */ ++__attribute__ ((visibility ("hidden"))) ++void __libgcc_arm_za_disable (void); ++ ++/* Disable the SME ZA state in case an unwound frame used the ZA ++ lazy saving scheme. */ ++#undef _Unwind_Frames_Extra ++#define _Unwind_Frames_Extra(x) \ ++ do \ ++ { \ ++ __libgcc_arm_za_disable (); \ ++ } \ ++ while (0) ++ + #endif /* defined AARCH64_UNWIND_H && defined __ILP32__ */ +-- +2.33.0 +
View file
_service:tar_scm:0220-Backport-SME-libgcc-Fix-config.in.patch
Added
@@ -0,0 +1,51 @@ +From b20b75158d1230a8b6cbabb36e3b128cbd9ec86f Mon Sep 17 00:00:00 2001 +From: Szabolcs Nagy <szabolcs.nagy@arm.com> +Date: Fri, 8 Dec 2023 12:22:54 +0000 +Subject: PATCH 121/157 BackportSME libgcc: Fix config.in + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=47575ec9edcd3078f066aa54ba428420be796bef + +It was updated incorrectly in + + commit dbbfb52b0e9c66ee9d05b8fd17c4f44655e48463 + Author: Szabolcs Nagy <szabolcs.nagy@arm.com> + CommitDate: 2023-12-08 11:29:06 +0000 + + libgcc: aarch64: Configure check for __getauxval + +so regenerate it. + +libgcc/ChangeLog: + + * config.in: Regenerate. +--- + libgcc/config.in | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +diff --git a/libgcc/config.in b/libgcc/config.in +index 441d4d39b..8f7dd437b 100644 +--- a/libgcc/config.in ++++ b/libgcc/config.in +@@ -16,9 +16,6 @@ + /* Define to 1 if the assembler supports .variant_pcs. */ + #undef HAVE_AS_VARIANT_PCS + +-/* Define to 1 if __getauxval is available. */ +-#undef HAVE___GETAUXVAL +- + /* Define to 1 if the target assembler supports thread-local storage. */ + #undef HAVE_CC_TLS + +@@ -67,6 +64,9 @@ + /* Define to 1 if you have the <unistd.h> header file. */ + #undef HAVE_UNISTD_H + ++/* Define to 1 if __getauxval is available. */ ++#undef HAVE___GETAUXVAL ++ + /* Define to the address where bug reports for this package should be sent. */ + #undef PACKAGE_BUGREPORT + +-- +2.33.0 +
View file
_service:tar_scm:0221-Backport-SME-aarch64-Add-funwind-tables-to-some-test.patch
Added
@@ -0,0 +1,54 @@ +From 0214ca06a182481851ed90aae21f460f87d26084 Mon Sep 17 00:00:00 2001 +From: Richard Sandiford <richard.sandiford@arm.com> +Date: Sun, 10 Dec 2023 19:46:05 +0000 +Subject: PATCH 122/157 BackportSME aarch64: Add -funwind-tables to some + tests + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=02ecdaab7a50f4505fd905effb6d238d773dc813 + +The .cfi scans in these tests failed for *-elf targets because +those targets don't enable .eh_frame info by default. + +gcc/testsuite/ + * gcc.target/aarch64/sme/call_sm_switch_1.c: Add -funwind-tables. + * gcc.target/aarch64/sme/call_sm_switch_3.c: Likewise. + * gcc.target/aarch64/sme/call_sm_switch_5.c: Likewise. +--- + gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_1.c | 2 +- + gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_3.c | 2 +- + gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_5.c | 2 +- + 3 files changed, 3 insertions(+), 3 deletions(-) + +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_1.c b/gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_1.c +index a2de55773..98922aaea 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_1.c ++++ b/gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_1.c +@@ -1,4 +1,4 @@ +-// { dg-options "-O -fomit-frame-pointer -fno-optimize-sibling-calls" } ++// { dg-options "-O -fomit-frame-pointer -fno-optimize-sibling-calls -funwind-tables" } + // { dg-final { check-function-bodies "**" "" } } + + void ns_callee (); +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_3.c b/gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_3.c +index ed999d085..4250fe798 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_3.c ++++ b/gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_3.c +@@ -1,4 +1,4 @@ +-// { dg-options "-O -fomit-frame-pointer -fno-optimize-sibling-calls" } ++// { dg-options "-O -fomit-frame-pointer -fno-optimize-sibling-calls -funwind-tables" } + // { dg-final { check-function-bodies "**" "" } } + + __attribute__((aarch64_vector_pcs)) void ns_callee (); +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_5.c b/gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_5.c +index be9b5cc04..e3d9bc274 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_5.c ++++ b/gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_5.c +@@ -1,4 +1,4 @@ +-// { dg-options "-O -fomit-frame-pointer -fno-optimize-sibling-calls" } ++// { dg-options "-O -fomit-frame-pointer -fno-optimize-sibling-calls -funwind-tables" } + // { dg-final { check-function-bodies "**" "" } } + + #include <arm_sve.h> +-- +2.33.0 +
View file
_service:tar_scm:0222-Backport-SME-aarch64-Skip-some-SME-register-save-tes.patch
Added
@@ -0,0 +1,106 @@ +From cc2e901eccd40992432f74270a9ebc1b708b6eb1 Mon Sep 17 00:00:00 2001 +From: Richard Sandiford <richard.sandiford@arm.com> +Date: Sun, 10 Dec 2023 19:46:05 +0000 +Subject: PATCH 123/157 BackportSME aarch64: Skip some SME register save + tests on BE + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=23ea0bc2cf042d74c4adfe26a57cf96b1d837a91 + +Big-endian targets need to save Z8-Z15 in the same order as +the registers would appear for D8-D15, because the layout is +mandated by the EH ABI. BE targets therefore use ST1D instead +of the normal STR for those registers (but not for others). + +That difference is already tested elsewhere and isn't important +for the SME tests. This patch therefore restricts the affected +tests to LE. + +gcc/testsuite/ + * gcc.target/aarch64/sme/call_sm_switch_5.c: Restrict tests that + contain Z8-Z23 saves to little-endian. + * gcc.target/aarch64/sme/call_sm_switch_8.c: Likewise. + * gcc.target/aarch64/sme/locally_streaming_1.c: Likewise. +--- + gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_5.c | 6 +++--- + gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_8.c | 6 +++--- + gcc/testsuite/gcc.target/aarch64/sme/locally_streaming_1.c | 2 +- + 3 files changed, 7 insertions(+), 7 deletions(-) + +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_5.c b/gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_5.c +index e3d9bc274..6238ab80d 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_5.c ++++ b/gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_5.c +@@ -14,7 +14,7 @@ struct callbacks { + }; + + /* +-** n_caller: { target lp64 } ++** n_caller: { target { lp64 && aarch64_little_endian } } + ** stp x30, (x19|x20-8), \sp, #?-32\! + ** cntd x16 + ** str x16, \sp, #?16\ +@@ -114,7 +114,7 @@ n_caller (struct callbacks *c) + } + + /* +-** s_caller: { target lp64 } ++** s_caller: { target { lp64 && aarch64_little_endian } } + ** stp x30, (x19|x20-8), \sp, #?-32\! + ** cntd x16 + ** str x16, \sp, #?16\ +@@ -214,7 +214,7 @@ s_caller (struct callbacks *c) arm::streaming + } + + /* +-** sc_caller: ++** sc_caller: { target aarch64_little_endian } + ** stp x29, x30, \sp, #?-32\! + ** mov x29, sp + ** cntd x16 +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_8.c b/gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_8.c +index f44724df3..c909b34ff 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_8.c ++++ b/gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_8.c +@@ -7,7 +7,7 @@ svint8_t produce_z0 (); + void consume_z0 (svint8_t); + + /* +-** test_z0: ++** test_z0: { target aarch64_little_endian } + ** ... + ** smstop sm + ** bl produce_z0 +@@ -32,7 +32,7 @@ svint8x4_t produce_z3 (); + void consume_z3 (svint8x4_t); + + /* +-** test_z3: ++** test_z3: { target aarch64_little_endian } + ** ... + ** smstop sm + ** bl produce_z3 +@@ -61,7 +61,7 @@ svbool_t produce_p0 (); + void consume_p0 (svbool_t); + + /* +-** test_p0: ++** test_p0: { target aarch64_little_endian } + ** ... + ** smstop sm + ** bl produce_p0 +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/locally_streaming_1.c b/gcc/testsuite/gcc.target/aarch64/sme/locally_streaming_1.c +index 20ff4b87d..4bb637f47 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sme/locally_streaming_1.c ++++ b/gcc/testsuite/gcc.target/aarch64/sme/locally_streaming_1.c +@@ -265,7 +265,7 @@ n_ls_vector_pcs () + } + + /* +-** n_ls_sve_pcs: ++** n_ls_sve_pcs: { target aarch64_little_endian } + ** sub sp, sp, #?16 + ** cntd x16 + ** str x16, \sp\ +-- +2.33.0 +
View file
_service:tar_scm:0223-Backport-SME-Add-OPTIONS_H_EXTRA-to-GTFILES.patch
Added
@@ -0,0 +1,37 @@ +From ab7a2c3b74c65d62d661621c56ef984cfb72f985 Mon Sep 17 00:00:00 2001 +From: Richard Sandiford <richard.sandiford@arm.com> +Date: Thu, 29 Sep 2022 11:32:50 +0100 +Subject: PATCH 124/157 BackportSME Add OPTIONS_H_EXTRA to GTFILES + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=c1e1fa054970a30844eb94d726b4954dcb8b9063 + +I have a patch that adds a typedef to aarch64's <cpu>-opts.h. +The typedef is used for a TargetVariable in the .opt file, +which means that it is covered by PCH and so needs to be +visible to gengtype. + +<cpu>-opts.h is not included directly in tm.h, but indirectly +by target headers (in this case aarch64.h). There was therefore +nothing that caused it to be added to GTFILES. + +gcc/ + * Makefile.in (GTFILES): Add OPTIONS_H_EXTRA. +--- + gcc/Makefile.in | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/gcc/Makefile.in b/gcc/Makefile.in +index 5cd838270..fcfa54697 100644 +--- a/gcc/Makefile.in ++++ b/gcc/Makefile.in +@@ -2648,6 +2648,7 @@ s-match: build/genmatch$(build_exeext) $(srcdir)/match.pd cfn-operators.pd + + GTFILES = $(CPPLIB_H) $(srcdir)/input.h $(srcdir)/coretypes.h \ + $(host_xm_file_list) \ ++ $(OPTIONS_H_EXTRA) \ + $(tm_file_list) $(HASHTAB_H) $(SPLAY_TREE_H) $(srcdir)/bitmap.h \ + $(srcdir)/wide-int.h $(srcdir)/alias.h \ + $(srcdir)/coverage.cc $(srcdir)/rtl.h \ +-- +2.33.0 +
View file
_service:tar_scm:0224-Backport-SME-aarch64-Add-V1DI-mode.patch
Added
@@ -0,0 +1,177 @@ +From 21f9190106f8324be42e3e8e0510467386dd68a0 Mon Sep 17 00:00:00 2001 +From: Andrew Carlotti <andrew.carlotti@arm.com> +Date: Fri, 15 Jul 2022 15:25:53 +0100 +Subject: PATCH 125/157 BackportSME aarch64: Add V1DI mode + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=5ba864c5d11a1c20891a1e054cb7814ec23de5c9 + +We already have a V1DF mode, so this makes the vector modes more consistent. + +Additionally, this allows us to recognise uint64x1_t and int64x1_t types given +only the mode and type qualifiers (e.g. in aarch64_lookup_simd_builtin_type). + +gcc/ChangeLog: + + * config/aarch64/aarch64-builtins.cc + (v1di_UP): Add V1DI mode to _UP macros. + * config/aarch64/aarch64-modes.def (VECTOR_MODE): Add V1DI mode. + * config/aarch64/aarch64-simd-builtin-types.def: Use V1DI mode. + * config/aarch64/aarch64-simd.md + (vec_extractv2dfv1df): Replace with... + (vec_extract<mode><V1half>): ...this. + * config/aarch64/aarch64.cc + (aarch64_classify_vector_mode): Add V1DI mode. + * config/aarch64/iterators.md + (VQ_2E, V1HALF, V1half): New. + (nunits): Add V1DI mode. +--- + gcc/config/aarch64/aarch64-builtins.cc | 1 + + gcc/config/aarch64/aarch64-modes.def | 1 + + gcc/config/aarch64/aarch64-simd-builtin-types.def | 6 +++--- + gcc/config/aarch64/aarch64-simd.md | 14 +++++++------- + gcc/config/aarch64/aarch64.cc | 2 +- + gcc/config/aarch64/iterators.md | 14 ++++++++++++-- + 6 files changed, 25 insertions(+), 13 deletions(-) + +diff --git a/gcc/config/aarch64/aarch64-builtins.cc b/gcc/config/aarch64/aarch64-builtins.cc +index 015e9d975..37bb3af48 100644 +--- a/gcc/config/aarch64/aarch64-builtins.cc ++++ b/gcc/config/aarch64/aarch64-builtins.cc +@@ -55,6 +55,7 @@ + #define v2si_UP E_V2SImode + #define v2sf_UP E_V2SFmode + #define v1df_UP E_V1DFmode ++#define v1di_UP E_V1DImode + #define di_UP E_DImode + #define df_UP E_DFmode + #define v16qi_UP E_V16QImode +diff --git a/gcc/config/aarch64/aarch64-modes.def b/gcc/config/aarch64/aarch64-modes.def +index 8fa66fdb3..dd74da4b3 100644 +--- a/gcc/config/aarch64/aarch64-modes.def ++++ b/gcc/config/aarch64/aarch64-modes.def +@@ -70,6 +70,7 @@ VECTOR_MODES (INT, 8); /* V8QI V4HI V2SI. */ + VECTOR_MODES (INT, 16); /* V16QI V8HI V4SI V2DI. */ + VECTOR_MODES (FLOAT, 8); /* V2SF. */ + VECTOR_MODES (FLOAT, 16); /* V4SF V2DF. */ ++VECTOR_MODE (INT, DI, 1); /* V1DI. */ + VECTOR_MODE (FLOAT, DF, 1); /* V1DF. */ + VECTOR_MODE (FLOAT, HF, 2); /* V2HF. */ + +diff --git a/gcc/config/aarch64/aarch64-simd-builtin-types.def b/gcc/config/aarch64/aarch64-simd-builtin-types.def +index 248e51e96..405455814 100644 +--- a/gcc/config/aarch64/aarch64-simd-builtin-types.def ++++ b/gcc/config/aarch64/aarch64-simd-builtin-types.def +@@ -24,7 +24,7 @@ + ENTRY (Int16x8_t, V8HI, none, 11) + ENTRY (Int32x2_t, V2SI, none, 11) + ENTRY (Int32x4_t, V4SI, none, 11) +- ENTRY (Int64x1_t, DI, none, 11) ++ ENTRY (Int64x1_t, V1DI, none, 11) + ENTRY (Int64x2_t, V2DI, none, 11) + ENTRY (Uint8x8_t, V8QI, unsigned, 11) + ENTRY (Uint8x16_t, V16QI, unsigned, 12) +@@ -32,7 +32,7 @@ + ENTRY (Uint16x8_t, V8HI, unsigned, 12) + ENTRY (Uint32x2_t, V2SI, unsigned, 12) + ENTRY (Uint32x4_t, V4SI, unsigned, 12) +- ENTRY (Uint64x1_t, DI, unsigned, 12) ++ ENTRY (Uint64x1_t, V1DI, unsigned, 12) + ENTRY (Uint64x2_t, V2DI, unsigned, 12) + ENTRY (Poly8_t, QI, poly, 9) + ENTRY (Poly16_t, HI, poly, 10) +@@ -42,7 +42,7 @@ + ENTRY (Poly8x16_t, V16QI, poly, 12) + ENTRY (Poly16x4_t, V4HI, poly, 12) + ENTRY (Poly16x8_t, V8HI, poly, 12) +- ENTRY (Poly64x1_t, DI, poly, 12) ++ ENTRY (Poly64x1_t, V1DI, poly, 12) + ENTRY (Poly64x2_t, V2DI, poly, 12) + ENTRY (Float16x4_t, V4HF, none, 13) + ENTRY (Float16x8_t, V8HF, none, 13) +diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md +index 62493cdfa..04592fc90 100644 +--- a/gcc/config/aarch64/aarch64-simd.md ++++ b/gcc/config/aarch64/aarch64-simd.md +@@ -8326,16 +8326,16 @@ + }) + + ;; Extract a single-element 64-bit vector from one half of a 128-bit vector. +-(define_expand "vec_extractv2dfv1df" +- (match_operand:V1DF 0 "register_operand") +- (match_operand:V2DF 1 "register_operand") ++(define_expand "vec_extract<mode><V1half>" ++ (match_operand:<V1HALF> 0 "register_operand") ++ (match_operand:VQ_2E 1 "register_operand") + (match_operand 2 "immediate_operand") + "TARGET_SIMD" + { +- /* V1DF is rarely used by other patterns, so it should be better to hide +- it in a subreg destination of a normal DF op. */ +- rtx scalar0 = gen_lowpart (DFmode, operands0); +- emit_insn (gen_vec_extractv2dfdf (scalar0, operands1, operands2)); ++ /* V1DI and V1DF are rarely used by other patterns, so it should be better ++ to hide it in a subreg destination of a normal DI or DF op. */ ++ rtx scalar0 = gen_lowpart (<VHALF>mode, operands0); ++ emit_insn (gen_vec_extract<mode><Vhalf> (scalar0, operands1, operands2)); + DONE; + }) + +diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc +index b8e540b6e..f7285555b 100644 +--- a/gcc/config/aarch64/aarch64.cc ++++ b/gcc/config/aarch64/aarch64.cc +@@ -4117,7 +4117,7 @@ aarch64_classify_vector_mode (machine_mode mode) + case E_V8QImode: + case E_V4HImode: + case E_V2SImode: +- /* ...E_V1DImode doesn't exist. */ ++ case E_V1DImode: + case E_V4HFmode: + case E_V4BFmode: + case E_V2SFmode: +diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md +index 152d28f6b..94db8c53f 100644 +--- a/gcc/config/aarch64/iterators.md ++++ b/gcc/config/aarch64/iterators.md +@@ -138,6 +138,9 @@ + ;; VQ without 2 element modes. + (define_mode_iterator VQ_NO2E V16QI V8HI V4SI V8HF V4SF V8BF) + ++;; 2 element quad vector modes. ++(define_mode_iterator VQ_2E V2DI V2DF) ++ + ;; BFmode vector modes. + (define_mode_iterator VBF V4BF V8BF) + +@@ -1116,12 +1119,13 @@ + (define_mode_attr nunits (V8QI "8") (V16QI "16") + (V4HI "4") (V8HI "8") + (V2SI "2") (V4SI "4") +- (V2DI "2") (V8DI "8") ++ (V1DI "1") (V2DI "2") + (V4HF "4") (V8HF "8") + (V4BF "4") (V8BF "8") + (V2SF "2") (V4SF "4") + (V1DF "1") (V2DF "2") +- (DI "1") (DF "1")) ++ (DI "1") (DF "1") ++ (V8DI "8")) + + ;; Map a mode to the number of bits in it, if the size of the mode + ;; is constant. +@@ -1501,6 +1505,12 @@ + (V2DI "di") (V2SF "sf") + (V4SF "v2sf") (V2DF "df")) + ++;; Single-element half modes of quad vector modes. ++(define_mode_attr V1HALF (V2DI "V1DI") (V2DF "V1DF")) ++ ++;; Single-element half modes of quad vector modes, in lower-case ++(define_mode_attr V1half (V2DI "v1di") (V2DF "v1df")) ++ + ;; Double modes of vector modes. + (define_mode_attr VDBL (V8QI "V16QI") (V4HI "V8HI") + (V4HF "V8HF") (V4BF "V8BF") +-- +2.33.0 +
View file
_service:tar_scm:0225-Backport-SME-Allow-md-iterators-to-include-other-ite.patch
Added
@@ -0,0 +1,217 @@ +From eaea26e2218ee61a9be0e2933548c752167dcdb5 Mon Sep 17 00:00:00 2001 +From: Richard Sandiford <richard.sandiford@arm.com> +Date: Fri, 10 Nov 2023 15:46:21 +0000 +Subject: PATCH 126/157 BackportSME Allow md iterators to include other + iterators + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=5dbaf4851bbf56b6176dca1f1e7d38a16b5b84ee + +This patch allows an .md iterator to include the contents of +previous iterators, possibly with an extra condition attached. + +Too much indirection might become hard to follow, so for the +AArch64 changes I tried to stick to things that seemed likely +to be uncontroversial: + +(a) structure iterators that combine modes for different sizes + and vector counts + +(b) iterators that explicitly duplicate another iterator + (for iterating over the cross product) + +gcc/ + * read-rtl.cc (md_reader::read_mapping): Allow iterators to + include other iterators. + * doc/md.texi: Document the change. + * config/aarch64/iterators.md (DREG2, VQ2, TX2, DX2, SX2): Include + the iterator that is being duplicated, rather than reproducing it. + (VSTRUCT_D): Redefine using VSTRUCT_234D. + (VSTRUCT_Q): Likewise VSTRUCT_234Q. + (VSTRUCT_2QD, VSTRUCT_3QD, VSTRUCT_4QD, VSTRUCT_QD): Redefine using + the individual D and Q iterators. +--- + gcc/config/aarch64/iterators.md | 58 ++++++++------------------------- + gcc/doc/md.texi | 13 ++++++++ + gcc/read-rtl.cc | 21 ++++++++++-- + 3 files changed, 46 insertions(+), 46 deletions(-) + +diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md +index 94db8c53f..a1659dfba 100644 +--- a/gcc/config/aarch64/iterators.md ++++ b/gcc/config/aarch64/iterators.md +@@ -106,7 +106,7 @@ + (define_mode_iterator DREG V8QI V4HI V4HF V2SI V2SF DF) + + ;; Copy of the above. +-(define_mode_iterator DREG2 V8QI V4HI V4HF V2SI V2SF DF) ++(define_mode_iterator DREG2 DREG) + + ;; All modes suitable to store/load pair (2 elements) using STP/LDP. + (define_mode_iterator VP_2E V2SI V2SF V2DI V2DF) +@@ -121,7 +121,7 @@ + (define_mode_iterator VQ V16QI V8HI V4SI V2DI V8HF V4SF V2DF V8BF) + + ;; Copy of the above. +-(define_mode_iterator VQ2 V16QI V8HI V4SI V2DI V8HF V8BF V4SF V2DF) ++(define_mode_iterator VQ2 VQ) + + ;; Quad vector modes suitable for moving. Includes BFmode. + (define_mode_iterator VQMOV V16QI V8HI V4SI V2DI V8HF V8BF V4SF V2DF) +@@ -321,14 +321,6 @@ + ;; Advanced SIMD opaque structure modes. + (define_mode_iterator VSTRUCT OI CI XI) + +-;; Advanced SIMD 64-bit vector structure modes. +-(define_mode_iterator VSTRUCT_D V2x8QI V2x4HI V2x2SI V2x1DI +- V2x4HF V2x2SF V2x1DF V2x4BF +- V3x8QI V3x4HI V3x2SI V3x1DI +- V3x4HF V3x2SF V3x1DF V3x4BF +- V4x8QI V4x4HI V4x2SI V4x1DI +- V4x4HF V4x2SF V4x1DF V4x4BF) +- + ;; Advanced SIMD 64-bit 2-vector structure modes. + (define_mode_iterator VSTRUCT_2D V2x8QI V2x4HI V2x2SI V2x1DI + V2x4HF V2x2SF V2x1DF V2x4BF) +@@ -341,6 +333,9 @@ + (define_mode_iterator VSTRUCT_4D V4x8QI V4x4HI V4x2SI V4x1DI + V4x4HF V4x2SF V4x1DF V4x4BF) + ++;; Advanced SIMD 64-bit vector structure modes. ++(define_mode_iterator VSTRUCT_D VSTRUCT_2D VSTRUCT_3D VSTRUCT_4D) ++ + ;; Advanced SIMD 64-bit 2-vector structure modes minus V2x1DI and V2x1DF. + (define_mode_iterator VSTRUCT_2DNX V2x8QI V2x4HI V2x2SI V2x4HF + V2x2SF V2x4BF) +@@ -365,14 +360,6 @@ + ;; Advanced SIMD 64-bit 4-vector structure modes with 64-bit elements. + (define_mode_iterator VSTRUCT_4DX V4x1DI V4x1DF) + +-;; Advanced SIMD 128-bit vector structure modes. +-(define_mode_iterator VSTRUCT_Q V2x16QI V2x8HI V2x4SI V2x2DI +- V2x8HF V2x4SF V2x2DF V2x8BF +- V3x16QI V3x8HI V3x4SI V3x2DI +- V3x8HF V3x4SF V3x2DF V3x8BF +- V4x16QI V4x8HI V4x4SI V4x2DI +- V4x8HF V4x4SF V4x2DF V4x8BF) +- + ;; Advanced SIMD 128-bit 2-vector structure modes. + (define_mode_iterator VSTRUCT_2Q V2x16QI V2x8HI V2x4SI V2x2DI + V2x8HF V2x4SF V2x2DF V2x8BF) +@@ -385,49 +372,32 @@ + (define_mode_iterator VSTRUCT_4Q V4x16QI V4x8HI V4x4SI V4x2DI + V4x8HF V4x4SF V4x2DF V4x8BF) + ++;; Advanced SIMD 128-bit vector structure modes. ++(define_mode_iterator VSTRUCT_Q VSTRUCT_2Q VSTRUCT_3Q VSTRUCT_4Q) ++ + ;; Advanced SIMD 2-vector structure modes. +-(define_mode_iterator VSTRUCT_2QD V2x8QI V2x4HI V2x2SI V2x1DI +- V2x4HF V2x2SF V2x1DF V2x4BF +- V2x16QI V2x8HI V2x4SI V2x2DI +- V2x8HF V2x4SF V2x2DF V2x8BF) ++(define_mode_iterator VSTRUCT_2QD VSTRUCT_2D VSTRUCT_2Q) + + ;; Advanced SIMD 3-vector structure modes. +-(define_mode_iterator VSTRUCT_3QD V3x8QI V3x4HI V3x2SI V3x1DI +- V3x4HF V3x2SF V3x1DF V3x4BF +- V3x16QI V3x8HI V3x4SI V3x2DI +- V3x8HF V3x4SF V3x2DF V3x8BF) ++(define_mode_iterator VSTRUCT_3QD VSTRUCT_3D VSTRUCT_3Q) + + ;; Advanced SIMD 4-vector structure modes. +-(define_mode_iterator VSTRUCT_4QD V4x8QI V4x4HI V4x2SI V4x1DI +- V4x4HF V4x2SF V4x1DF V4x4BF +- V4x16QI V4x8HI V4x4SI V4x2DI +- V4x8HF V4x4SF V4x2DF V4x8BF) ++(define_mode_iterator VSTRUCT_4QD VSTRUCT_4D VSTRUCT_4Q) + + ;; Advanced SIMD vector structure modes. +-(define_mode_iterator VSTRUCT_QD V2x8QI V2x4HI V2x2SI V2x1DI +- V2x4HF V2x2SF V2x1DF V2x4BF +- V3x8QI V3x4HI V3x2SI V3x1DI +- V3x4HF V3x2SF V3x1DF V3x4BF +- V4x8QI V4x4HI V4x2SI V4x1DI +- V4x4HF V4x2SF V4x1DF V4x4BF +- V2x16QI V2x8HI V2x4SI V2x2DI +- V2x8HF V2x4SF V2x2DF V2x8BF +- V3x16QI V3x8HI V3x4SI V3x2DI +- V3x8HF V3x4SF V3x2DF V3x8BF +- V4x16QI V4x8HI V4x4SI V4x2DI +- V4x8HF V4x4SF V4x2DF V4x8BF) ++(define_mode_iterator VSTRUCT_QD VSTRUCT_D VSTRUCT_Q) + + ;; Double scalar modes + (define_mode_iterator DX DI DF DD) + + ;; Duplicate of the above +-(define_mode_iterator DX2 DI DF DD) ++(define_mode_iterator DX2 DX) + + ;; Single scalar modes + (define_mode_iterator SX SI SF) + + ;; Duplicate of the above +-(define_mode_iterator SX2 SI SF) ++(define_mode_iterator SX2 SX) + + ;; Single and double integer and float modes + (define_mode_iterator DSX DF DI SF SI) +diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi +index 04ace8f7f..c0cf0ec64 100644 +--- a/gcc/doc/md.texi ++++ b/gcc/doc/md.texi +@@ -11561,6 +11561,19 @@ to @code{@var{mode}}. For example: + means that the @code{:DI} expansion only applies if @code{TARGET_64BIT} + but that the @code{:SI} expansion has no such constraint. + ++It is also possible to include iterators in other iterators. For example: ++ ++@smallexample ++(define_mode_iterator VI V16QI V8HI V4SI V2DI) ++(define_mode_iterator VF V8HF V4SF (V2DF "TARGET_DOUBLE")) ++(define_mode_iterator V VI (VF "TARGET_FLOAT")) ++@end smallexample ++ ++makes @samp{:V} iterate over the modes in @code{VI} and the modes ++in @code{VF}. When a construct uses @samp{:V}, the @code{V8HF} and ++@code{V4SF} expansions require @samp{TARGET_FLOAT} while the @code{V2DF} ++expansion requires @samp{TARGET_DOUBLE && TARGET_FLOAT}. ++ + Iterators are applied in the order they are defined. This can be + significant if two iterators are used in a construct that requires + substitutions. @xref{Substitutions}. +diff --git a/gcc/read-rtl.cc b/gcc/read-rtl.cc +index 798d24859..cdfa9e7b8 100644 +--- a/gcc/read-rtl.cc ++++ b/gcc/read-rtl.cc +@@ -1261,8 +1261,25 @@ md_reader::read_mapping (struct iterator_group *group, htab_t table) + string = read_string (false); + require_char_ws (')'); + } +- number = group->find_builtin (name.string); +- end_ptr = add_map_value (end_ptr, number, string); ++ auto *subm = (struct mapping *) htab_find (group->iterators, ++ &name.string); ++ if (subm) ++ { ++ if (m == subm) ++ fatal_with_file_and_line ("recursive definition of `%s'", ++ name.string); ++ for (map_value *v = subm->values; v; v = v->next) ++ { ++ auto *joined = rtx_reader_ptr->join_c_conditions (v->string, ++ string); ++ end_ptr = add_map_value (end_ptr, v->number, joined); ++ } ++ } ++ else ++ { ++ number = group->find_builtin (name.string); ++ end_ptr = add_map_value (end_ptr, number, string); ++ } + c = read_skip_spaces (); + } + while (c != ''); +-- +2.33.0 +
View file
_service:tar_scm:0226-Backport-SME-riscv-Add-support-for-strlen-inline-exp.patch
Added
@@ -0,0 +1,142 @@ +From 637e6469f2225b6f6f6b0c84b4e7abcd8dfd7ca4 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Christoph=20M=C3=BCllner?= <christoph.muellner@vrull.eu> +Date: Wed, 28 Sep 2022 11:19:06 +0200 +Subject: PATCH 127/157 BackportSME riscv: Add support for strlen inline + expansion +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=df48285b2484eb4f8e0570c566677114eb0e553a + +Note: Only introduce the definitions of function +emit_likely_jump_insn and emit_unlikely_jump_insn, +and drop others. + +This patch implements the expansion of the strlen builtin for RV32/RV64 +for xlen-aligned aligned strings if Zbb or XTheadBb instructions are available. +The inserted sequences are: + +rv32gc_zbb (RV64 is similar): + add a3,a0,4 + li a4,-1 +.L1: lw a5,0(a0) + add a0,a0,4 + orc.b a5,a5 + beq a5,a4,.L1 + not a5,a5 + ctz a5,a5 + srl a5,a5,0x3 + add a0,a0,a5 + sub a0,a0,a3 + +rv64gc_xtheadbb (RV32 is similar): + add a4,a0,8 +.L2: ld a5,0(a0) + add a0,a0,8 + th.tstnbz a5,a5 + beqz a5,.L2 + th.rev a5,a5 + th.ff1 a5,a5 + srl a5,a5,0x3 + add a0,a0,a5 + sub a0,a0,a4 + +This allows to inline calls to strlen(), with optimized code for +xlen-aligned strings, resulting in the following benefits over +a call to libc: +* no call/ret instructions +* no stack frame allocation +* no register saving/restoring +* no alignment test + +The inlining mechanism is gated by a new switch ('-minline-strlen') +and by the variable 'optimize_size'. + +Tested using the glibc string tests. + +Signed-off-by: Christoph Müllner <christoph.muellner@vrull.eu> + +gcc/ChangeLog: + + * config.gcc: Add new object riscv-string.o. + riscv-string.cc. + * config/riscv/riscv-protos.h (riscv_expand_strlen): + New function. + * config/riscv/riscv.md (strlen<mode>): New expand INSN. + * config/riscv/riscv.opt: New flag 'minline-strlen'. + * config/riscv/t-riscv: Add new object riscv-string.o. + * config/riscv/thead.md (th_rev<mode>2): Export INSN name. + (th_rev<mode>2): Likewise. + (th_tstnbz<mode>2): New INSN. + * doc/invoke.texi: Document '-minline-strlen'. + * emit-rtl.cc (emit_likely_jump_insn): New helper function. + (emit_unlikely_jump_insn): Likewise. + * rtl.h (emit_likely_jump_insn): New prototype. + (emit_unlikely_jump_insn): Likewise. + * config/riscv/riscv-string.cc: New file. + +gcc/testsuite/ChangeLog: + + * gcc.target/riscv/xtheadbb-strlen-unaligned.c: New test. + * gcc.target/riscv/xtheadbb-strlen.c: New test. + * gcc.target/riscv/zbb-strlen-disabled-2.c: New test. + * gcc.target/riscv/zbb-strlen-disabled.c: New test. + * gcc.target/riscv/zbb-strlen-unaligned.c: New test. + * gcc.target/riscv/zbb-strlen.c: New test. +--- + gcc/emit-rtl.cc | 24 ++++++++++++++++++++++++ + gcc/rtl.h | 2 ++ + 2 files changed, 26 insertions(+) + +diff --git a/gcc/emit-rtl.cc b/gcc/emit-rtl.cc +index 1e02ae254..2df5ff414 100644 +--- a/gcc/emit-rtl.cc ++++ b/gcc/emit-rtl.cc +@@ -5163,6 +5163,30 @@ emit_jump_insn (rtx x) + return last; + } + ++/* Make an insn of code JUMP_INSN with pattern X, ++ add a REG_BR_PROB note that indicates very likely probability, ++ and add it to the end of the doubly-linked list. */ ++ ++rtx_insn * ++emit_likely_jump_insn (rtx x) ++{ ++ rtx_insn *jump = emit_jump_insn (x); ++ add_reg_br_prob_note (jump, profile_probability::very_likely ()); ++ return jump; ++} ++ ++/* Make an insn of code JUMP_INSN with pattern X, ++ add a REG_BR_PROB note that indicates very unlikely probability, ++ and add it to the end of the doubly-linked list. */ ++ ++rtx_insn * ++emit_unlikely_jump_insn (rtx x) ++{ ++ rtx_insn *jump = emit_jump_insn (x); ++ add_reg_br_prob_note (jump, profile_probability::very_unlikely ()); ++ return jump; ++} ++ + /* Make an insn of code CALL_INSN with pattern X + and add it to the end of the doubly-linked list. */ + +diff --git a/gcc/rtl.h b/gcc/rtl.h +index 488016bb4..a0db225cb 100644 +--- a/gcc/rtl.h ++++ b/gcc/rtl.h +@@ -3333,6 +3333,8 @@ extern rtx_note *emit_note_after (enum insn_note, rtx_insn *); + extern rtx_insn *emit_insn (rtx); + extern rtx_insn *emit_debug_insn (rtx); + extern rtx_insn *emit_jump_insn (rtx); ++extern rtx_insn *emit_likely_jump_insn (rtx); ++extern rtx_insn *emit_unlikely_jump_insn (rtx); + extern rtx_insn *emit_call_insn (rtx); + extern rtx_code_label *emit_label (rtx); + extern rtx_jump_table_data *emit_jump_table_data (rtx); +-- +2.33.0 +
View file
_service:tar_scm:0227-Backport-SME-attribs-Add-overloads-with-namespace-na.patch
Added
@@ -0,0 +1,189 @@ +From 8c6ffb4c6f86231eee318ceeb8546a53037edfe9 Mon Sep 17 00:00:00 2001 +From: Jakub Jelinek <jakub@redhat.com> +Date: Tue, 4 Oct 2022 23:13:15 +0200 +Subject: PATCH 128/157 BackportSME attribs: Add overloads with namespace + name + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=0764dc8537a4f87089ecd32391cb5f8803b43c96 + +I've discovered a problem with the way we handle scoped attributes. For +declaration or type attributes for attributes we don't know anything about +we just don't add them to the declarations or types, so later in the FEs and +middle-end it is fine to use lookup_attribute etc. which just check the +attribute name and not namespace because non-standard non-GNU attributes +just won't show there. But in the case of attributes on statements, nothing +has filtered out the unknown attributes, so with my earlier assume +attribute patch e.g. c-c++-common/Wno-attributes-6.c test failed because +it uses: +vendor::assume(1 + 1 == 2); +with -Wno-attributes=vendor::assume and lookup_attribute ("assume", ) +finds such attribute and handled it that way. +So, for those cases, this patch introduces lookup_attribute and +remove_attribute overloads which specify also the namespace. +I think the fallthrough, hot, cold, likely, unlikely attribute handling +will need to use the new APIs too, so that we don't handle +msft::fallthrough attribute as something we'd know. + +2022-10-04 Jakub Jelinek <jakub@redhat.com> + + * attribs.h (remove_attribute): Declare overload with additional + attr_ns argument. + (private_lookup_attribute): Declare overload with additional + attr_ns and attr_ns_len arguments. + (lookup_attribute): New overload with additional attr_ns argument. + * attribs.cc (remove_attribute): New overload with additional + attr_ns argument. + (private_lookup_attribute): New overload with additional + attr_ns and attr_ns_len arguments. +--- + gcc/attribs.cc | 63 ++++++++++++++++++++++++++++++++++++++++++++++++++ + gcc/attribs.h | 38 ++++++++++++++++++++++++++++++ + 2 files changed, 101 insertions(+) + +diff --git a/gcc/attribs.cc b/gcc/attribs.cc +index 656ea739e..8e2696bc5 100644 +--- a/gcc/attribs.cc ++++ b/gcc/attribs.cc +@@ -1598,6 +1598,36 @@ remove_attribute (const char *attr_name, tree list) + return list; + } + ++/* Similarly but also match namespace on the removed attributes. */ ++ ++tree ++remove_attribute (const char *attr_ns, const char *attr_name, tree list) ++{ ++ tree *p; ++ gcc_checking_assert (attr_name0 != '_'); ++ gcc_checking_assert (attr_ns == NULL || attr_ns0 != '_'); ++ ++ for (p = &list; *p;) ++ { ++ tree l = *p; ++ ++ tree attr = get_attribute_name (l); ++ if (is_attribute_p (attr_name, attr)) ++ { ++ tree ns = get_attribute_namespace (l); ++ if ((ns == NULL_TREE && attr_ns == NULL) ++ || (ns && attr_ns && is_attribute_p (attr_ns, ns))) ++ { ++ *p = TREE_CHAIN (l); ++ continue; ++ } ++ } ++ p = &TREE_CHAIN (l); ++ } ++ ++ return list; ++} ++ + /* Return an attribute list that is the union of a1 and a2. */ + + tree +@@ -1995,6 +2025,39 @@ private_lookup_attribute (const char *attr_name, size_t attr_len, tree list) + return list; + } + ++/* Similarly but with also attribute namespace. */ ++ ++tree ++private_lookup_attribute (const char *attr_ns, const char *attr_name, ++ size_t attr_ns_len, size_t attr_len, tree list) ++{ ++ while (list) ++ { ++ tree attr = get_attribute_name (list); ++ size_t ident_len = IDENTIFIER_LENGTH (attr); ++ if (cmp_attribs (attr_name, attr_len, IDENTIFIER_POINTER (attr), ++ ident_len)) ++ { ++ tree ns = get_attribute_namespace (list); ++ if (ns == NULL_TREE) ++ { ++ if (attr_ns == NULL) ++ break; ++ } ++ else if (attr_ns) ++ { ++ ident_len = IDENTIFIER_LENGTH (ns); ++ if (cmp_attribs (attr_ns, attr_ns_len, IDENTIFIER_POINTER (ns), ++ ident_len)) ++ break; ++ } ++ } ++ list = TREE_CHAIN (list); ++ } ++ ++ return list; ++} ++ + /* Return true if the function decl or type NODE has been declared + with attribute ANAME among attributes ATTRS. */ + +diff --git a/gcc/attribs.h b/gcc/attribs.h +index 0856f98fb..9ad530fcb 100644 +--- a/gcc/attribs.h ++++ b/gcc/attribs.h +@@ -88,6 +88,10 @@ extern tree merge_type_attributes (tree, tree); + + extern tree remove_attribute (const char *, tree); + ++/* Similarly but also with specific attribute namespace. */ ++ ++extern tree remove_attribute (const char *, const char *, tree); ++ + /* Given two attributes lists, return a list of their union. */ + + extern tree merge_attributes (tree, tree); +@@ -119,6 +123,10 @@ extern int attribute_list_contained (const_tree, const_tree); + for size. */ + extern tree private_lookup_attribute (const char *attr_name, size_t attr_len, + tree list); ++extern tree private_lookup_attribute (const char *attr_ns, ++ const char *attr_name, ++ size_t attr_ns_len, size_t attr_len, ++ tree list); + + extern unsigned decls_mismatched_attributes (tree, tree, tree, + const char* const, +@@ -215,6 +223,36 @@ lookup_attribute (const char *attr_name, tree list) + } + } + ++/* Similar to lookup_attribute, but also match the attribute namespace. */ ++ ++static inline tree ++lookup_attribute (const char *attr_ns, const char *attr_name, tree list) ++{ ++ if (CHECKING_P && attr_name0 != '_') ++ { ++ size_t attr_len = strlen (attr_name); ++ gcc_checking_assert (!canonicalize_attr_name (attr_name, attr_len)); ++ } ++ if (CHECKING_P && attr_ns && attr_ns0 != '_') ++ { ++ size_t attr_ns_len = strlen (attr_ns); ++ gcc_checking_assert (!canonicalize_attr_name (attr_ns, attr_ns_len)); ++ } ++ /* In most cases, list is NULL_TREE. */ ++ if (list == NULL_TREE) ++ return NULL_TREE; ++ else ++ { ++ size_t attr_ns_len = attr_ns ? strlen (attr_ns) : 0; ++ size_t attr_len = strlen (attr_name); ++ /* Do the strlen() before calling the out-of-line implementation. ++ In most cases attr_name is a string constant, and the compiler ++ will optimize the strlen() away. */ ++ return private_lookup_attribute (attr_ns, attr_name, ++ attr_ns_len, attr_len, list); ++ } ++} ++ + /* Given an attribute name ATTR_NAME and a list of attributes LIST, + return a pointer to the attribute's list first element if the attribute + starts with ATTR_NAME. ATTR_NAME must be in the form 'text' (not +-- +2.33.0 +
View file
_service:tar_scm:0228-Backport-SME-vec-Add-array_slice-constructors-from-n.patch
Added
@@ -0,0 +1,47 @@ +From 044dc671f7eb723df5b6ce2364d6ae579c0cc984 Mon Sep 17 00:00:00 2001 +From: Martin Jambor <mjambor@suse.cz> +Date: Tue, 30 Aug 2022 18:50:35 +0200 +Subject: PATCH 129/157 BackportSME vec: Add array_slice constructors + from non-const and gc vectors + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=15433c214df295f2281a90fcf283355b21beca0e + +This patch adds constructors of array_slice that are required to +create them from non-const (heap or auto) vectors or from GC vectors. + +gcc/ChangeLog: + +2022-08-08 Martin Jambor <mjambor@suse.cz> + + * vec.h (array_slice): Add constructors for non-const reference to + heap vector and pointers to heap vectors. +--- + gcc/vec.h | 12 ++++++++++++ + 1 file changed, 12 insertions(+) + +diff --git a/gcc/vec.h b/gcc/vec.h +index 3ba7ea7ed..fc3b10c85 100644 +--- a/gcc/vec.h ++++ b/gcc/vec.h +@@ -2264,6 +2264,18 @@ public: + array_slice (const vec<OtherT> &v) + : m_base (v.address ()), m_size (v.length ()) {} + ++ template<typename OtherT> ++ array_slice (vec<OtherT> &v) ++ : m_base (v.address ()), m_size (v.length ()) {} ++ ++ template<typename OtherT> ++ array_slice (const vec<OtherT, va_gc> *v) ++ : m_base (v ? v->address () : nullptr), m_size (v ? v->length () : 0) {} ++ ++ template<typename OtherT> ++ array_slice (vec<OtherT, va_gc> *v) ++ : m_base (v ? v->address () : nullptr), m_size (v ? v->length () : 0) {} ++ + iterator begin () { return m_base; } + iterator end () { return m_base + m_size; } + +-- +2.33.0 +
View file
_service:tar_scm:0229-Backport-SME-A-couple-of-va_gc_atomic-tweaks.patch
Added
@@ -0,0 +1,140 @@ +From 12dd36f06e13ee9cd684c00732caa684f49b3610 Mon Sep 17 00:00:00 2001 +From: Richard Sandiford <richard.sandiford@arm.com> +Date: Thu, 29 Jun 2023 08:48:17 +0100 +Subject: PATCH 130/157 BackportSME A couple of va_gc_atomic tweaks +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=4e9f6c14280699997a633cefd3fb315b2bd4762c + +The only current user of va_gc_atomic is Ada's: + + vec<Entity_Id, va_gc_atomic> + +It uses the generic gt_pch_nx routines (with gt_pch_nx being the +“note pointers” hooks), such as: + + template<typename T, typename A> + void + gt_pch_nx (vec<T, A, vl_embed> *v) + { + extern void gt_pch_nx (T &); + for (unsigned i = 0; i < v->length (); i++) + gt_pch_nx ((*v)i); + } + +It then defines gt_pch_nx routines for Entity_Id &. + +The problem is that if we wanted to take the same approach for +an array of unsigned ints, we'd need to define: + + inline void gt_pch_nx (unsigned int &) { } + +which would then be ambiguous with: + + inline void gt_pch_nx (unsigned int) { } + +The point of va_gc_atomic is that the elements don't need to be GCed, +and so we have: + + template<typename T> + void + gt_ggc_mx (vec<T, va_gc_atomic, vl_embed> *v ATTRIBUTE_UNUSED) + { + /* Nothing to do. Vectors of atomic types wrt GC do not need to + be traversed. */ + } + +I think it's therefore reasonable to assume that no pointers will +need to be processed for PCH either. + +The patch also relaxes the array_slice constructor for vec<T, va_gc> * +so that it handles all embedded vectors. + +gcc/ + * vec.h (gt_pch_nx): Add overloads for va_gc_atomic. + (array_slice): Relax va_gc constructor to handle all vectors + with a vl_embed layout. + +gcc/ada/ + * gcc-interface/decl.cc (gt_pch_nx): Remove overloads for Entity_Id. +--- + gcc/ada/gcc-interface/decl.cc | 11 ----------- + gcc/vec.h | 22 ++++++++++++++++++---- + 2 files changed, 18 insertions(+), 15 deletions(-) + +diff --git a/gcc/ada/gcc-interface/decl.cc b/gcc/ada/gcc-interface/decl.cc +index 1c7a71684..7193b55c7 100644 +--- a/gcc/ada/gcc-interface/decl.cc ++++ b/gcc/ada/gcc-interface/decl.cc +@@ -163,17 +163,6 @@ struct GTY((for_user)) tree_entity_vec_map + vec<Entity_Id, va_gc_atomic> *to; + }; + +-void +-gt_pch_nx (Entity_Id &) +-{ +-} +- +-void +-gt_pch_nx (Entity_Id *x, gt_pointer_operator op, void *cookie) +-{ +- op (x, NULL, cookie); +-} +- + struct dummy_type_hasher : ggc_cache_ptr_hash<tree_entity_vec_map> + { + static inline hashval_t +diff --git a/gcc/vec.h b/gcc/vec.h +index fc3b10c85..592d3f7e0 100644 +--- a/gcc/vec.h ++++ b/gcc/vec.h +@@ -1383,6 +1383,13 @@ gt_pch_nx (vec<T, A, vl_embed> *v) + gt_pch_nx ((*v)i); + } + ++template<typename T> ++void ++gt_pch_nx (vec<T, va_gc_atomic, vl_embed> *) ++{ ++ /* No pointers to note. */ ++} ++ + template<typename T, typename A> + void + gt_pch_nx (vec<T *, A, vl_embed> *v, gt_pointer_operator op, void *cookie) +@@ -1400,6 +1407,13 @@ gt_pch_nx (vec<T, A, vl_embed> *v, gt_pointer_operator op, void *cookie) + gt_pch_nx (&((*v)i), op, cookie); + } + ++template<typename T> ++void ++gt_pch_nx (vec<T, va_gc_atomic, vl_embed> *, gt_pointer_operator, void *) ++{ ++ /* No pointers to note. */ ++} ++ + + /* Space efficient vector. These vectors can grow dynamically and are + allocated together with their control data. They are suited to be +@@ -2268,12 +2282,12 @@ public: + array_slice (vec<OtherT> &v) + : m_base (v.address ()), m_size (v.length ()) {} + +- template<typename OtherT> +- array_slice (const vec<OtherT, va_gc> *v) ++ template<typename OtherT, typename A> ++ array_slice (const vec<OtherT, A, vl_embed> *v) + : m_base (v ? v->address () : nullptr), m_size (v ? v->length () : 0) {} + +- template<typename OtherT> +- array_slice (vec<OtherT, va_gc> *v) ++ template<typename OtherT, typename A> ++ array_slice (vec<OtherT, A, vl_embed> *v) + : m_base (v ? v->address () : nullptr), m_size (v ? v->length () : 0) {} + + iterator begin () { return m_base; } +-- +2.33.0 +
View file
_service:tar_scm:0230-Backport-SME-middle-end-Fix-issue-of-poly_uint16-1-1.patch
Added
@@ -0,0 +1,34 @@ +From bb15d4c4476e3ba303c5afe0adae0d86ab5f0a9b Mon Sep 17 00:00:00 2001 +From: zhongjuzhe <juzhe.zhong@rivai.ai> +Date: Mon, 22 Aug 2022 10:15:31 +0100 +Subject: PATCH 131/157 BackportSME middle-end: Fix issue of poly_uint16 + (1, 1) in self test + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=21e7d87a901d45f0cb5e5510d22bfbdb0d0ac6a1 + +This patch fix issue of poly_uint16 (1, 1) in machine mode self test. + +gcc/ChangeLog: + + * simplify-rtx.cc (test_vector_subregs_fore_back): Make first value + and repeat value different. +--- + gcc/simplify-rtx.cc | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/gcc/simplify-rtx.cc b/gcc/simplify-rtx.cc +index e152918b0..fc79a2e2e 100644 +--- a/gcc/simplify-rtx.cc ++++ b/gcc/simplify-rtx.cc +@@ -8380,7 +8380,7 @@ test_vector_subregs_fore_back (machine_mode inner_mode) + for (unsigned int i = 0; i < count; ++i) + builder.quick_push (gen_int_mode (i, int_mode)); + for (unsigned int i = 0; i < count; ++i) +- builder.quick_push (gen_int_mode (-(int) i, int_mode)); ++ builder.quick_push (gen_int_mode (-1 - (int) i, int_mode)); + rtx x = builder.build (); + + test_vector_subregs_modes (x); +-- +2.33.0 +
View file
_service:tar_scm:0231-SME-Add-missing-header-file-in-aarch64.cc.patch
Added
@@ -0,0 +1,24 @@ +From cce05b3365c3986ca74c04f442662a21b4f03a61 Mon Sep 17 00:00:00 2001 +From: xiezhiheng <xiezhiheng@huawei.com> +Date: Mon, 4 Mar 2024 14:39:36 +0800 +Subject: PATCH 132/157 SME Add missing header file in `aarch64.cc` + +--- + gcc/config/aarch64/aarch64.cc | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc +index f7285555b..0117a3e12 100644 +--- a/gcc/config/aarch64/aarch64.cc ++++ b/gcc/config/aarch64/aarch64.cc +@@ -86,6 +86,7 @@ + #include "tree-pass.h" + #include "cfgbuild.h" + #include "symbol-summary.h" ++#include "value-range.h" + #include "ipa-prop.h" + #include "ipa-fnsummary.h" + +-- +2.33.0 +
View file
_service:tar_scm:0232-Backport-SME-c-Add-support-for-__extension__.patch
Added
@@ -0,0 +1,327 @@ +From 3714cfb47fafef884aa2ff330935fb44b7966909 Mon Sep 17 00:00:00 2001 +From: Richard Sandiford <richard.sandiford@arm.com> +Date: Thu, 24 Aug 2023 11:49:58 +0100 +Subject: PATCH 133/157 BackportSME c: Add support for __extension__ + ... + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=207a5daa9dcf31e367152163ad2a2ab4a0858967 + + attributes are a recent addition to C, but as a GNU extension, +GCC allows them to be used in C11 and earlier. Normally this use +would trigger a pedwarn (for -pedantic, -Wc11-c2x-compat, etc.). + +This patch allows the pedwarn to be suppressed by starting the +attribute-list with __extension__. + +Also, :: is not a single lexing token prior to C2X, so it wasn't +possible to use scoped attributes in C11, even as a GNU extension. +The patch allows two colons to be used in place of :: when +__extension__ is used. No attempt is made to check whether the +two colons are immediately adjacent. + +gcc/ + * doc/extend.texi: Document the C __extension__ ... construct. + +gcc/c/ + * c-parser.cc (c_parser_std_attribute): Conditionally allow + two colons to be used in place of ::. + (c_parser_std_attribute_list): New function, split out from... + (c_parser_std_attribute_specifier): ...here. Allow the attribute-list + to start with __extension__. When it does, also allow two colons + to be used in place of ::. + +gcc/testsuite/ + * gcc.dg/c2x-attr-syntax-6.c: New test. + * gcc.dg/c2x-attr-syntax-7.c: Likewise. +--- + gcc/c/c-parser.cc | 64 ++++++++++++++++++------ + gcc/doc/extend.texi | 27 ++++++++-- + gcc/testsuite/gcc.dg/c2x-attr-syntax-6.c | 62 +++++++++++++++++++++++ + gcc/testsuite/gcc.dg/c2x-attr-syntax-7.c | 60 ++++++++++++++++++++++ + 4 files changed, 193 insertions(+), 20 deletions(-) + create mode 100644 gcc/testsuite/gcc.dg/c2x-attr-syntax-6.c + create mode 100644 gcc/testsuite/gcc.dg/c2x-attr-syntax-7.c + +diff --git a/gcc/c/c-parser.cc b/gcc/c/c-parser.cc +index 78a313fe3..486f46e1c 100644 +--- a/gcc/c/c-parser.cc ++++ b/gcc/c/c-parser.cc +@@ -4894,10 +4894,18 @@ c_parser_balanced_token_sequence (c_parser *parser) + ( balanced-token-sequenceopt ) + + Keywords are accepted as identifiers for this purpose. +-*/ ++ ++ As an extension, we permit an attribute-specifier to be: ++ ++ __extension__ attribute-list ++ ++ Two colons are then accepted as a synonym for ::. No attempt is made ++ to check whether the colons are immediately adjacent. LOOSE_SCOPE_P ++ indicates whether this relaxation is in effect. */ + + static tree +-c_parser_std_attribute (c_parser *parser, bool for_tm) ++c_parser_std_attribute (c_parser *parser, bool for_tm, ++ bool loose_scope_p = false) + { + c_token *token = c_parser_peek_token (parser); + tree ns, name, attribute; +@@ -4910,9 +4918,14 @@ c_parser_std_attribute (c_parser *parser, bool for_tm) + } + name = canonicalize_attr_name (token->value); + c_parser_consume_token (parser); +- if (c_parser_next_token_is (parser, CPP_SCOPE)) ++ if (c_parser_next_token_is (parser, CPP_SCOPE) ++ || (loose_scope_p ++ && c_parser_next_token_is (parser, CPP_COLON) ++ && c_parser_peek_2nd_token (parser)->type == CPP_COLON)) + { + ns = name; ++ if (c_parser_next_token_is (parser, CPP_COLON)) ++ c_parser_consume_token (parser); + c_parser_consume_token (parser); + token = c_parser_peek_token (parser); + if (token->type != CPP_NAME && token->type != CPP_KEYWORD) +@@ -4981,19 +4994,9 @@ c_parser_std_attribute (c_parser *parser, bool for_tm) + } + + static tree +-c_parser_std_attribute_specifier (c_parser *parser, bool for_tm) ++c_parser_std_attribute_list (c_parser *parser, bool for_tm, ++ bool loose_scope_p = false) + { +- location_t loc = c_parser_peek_token (parser)->location; +- if (!c_parser_require (parser, CPP_OPEN_SQUARE, "expected %<%>")) +- return NULL_TREE; +- if (!c_parser_require (parser, CPP_OPEN_SQUARE, "expected %<%>")) +- { +- c_parser_skip_until_found (parser, CPP_CLOSE_SQUARE, "expected %<%>"); +- return NULL_TREE; +- } +- if (!for_tm) +- pedwarn_c11 (loc, OPT_Wpedantic, +- "ISO C does not support %<%> attributes before C2X"); + tree attributes = NULL_TREE; + while (true) + { +@@ -5005,7 +5008,7 @@ c_parser_std_attribute_specifier (c_parser *parser, bool for_tm) + c_parser_consume_token (parser); + continue; + } +- tree attribute = c_parser_std_attribute (parser, for_tm); ++ tree attribute = c_parser_std_attribute (parser, for_tm, loose_scope_p); + if (attribute != error_mark_node) + { + TREE_CHAIN (attribute) = attributes; +@@ -5014,6 +5017,35 @@ c_parser_std_attribute_specifier (c_parser *parser, bool for_tm) + if (c_parser_next_token_is_not (parser, CPP_COMMA)) + break; + } ++ return attributes; ++} ++ ++static tree ++c_parser_std_attribute_specifier (c_parser *parser, bool for_tm) ++{ ++ location_t loc = c_parser_peek_token (parser)->location; ++ if (!c_parser_require (parser, CPP_OPEN_SQUARE, "expected %<%>")) ++ return NULL_TREE; ++ if (!c_parser_require (parser, CPP_OPEN_SQUARE, "expected %<%>")) ++ { ++ c_parser_skip_until_found (parser, CPP_CLOSE_SQUARE, "expected %<%>"); ++ return NULL_TREE; ++ } ++ tree attributes; ++ if (c_parser_next_token_is_keyword (parser, RID_EXTENSION)) ++ { ++ auto ext = disable_extension_diagnostics (); ++ c_parser_consume_token (parser); ++ attributes = c_parser_std_attribute_list (parser, for_tm, true); ++ restore_extension_diagnostics (ext); ++ } ++ else ++ { ++ if (!for_tm) ++ pedwarn_c11 (loc, OPT_Wpedantic, ++ "ISO C does not support %<%> attributes before C2X"); ++ attributes = c_parser_std_attribute_list (parser, for_tm); ++ } + c_parser_skip_until_found (parser, CPP_CLOSE_SQUARE, "expected %<%>"); + c_parser_skip_until_found (parser, CPP_CLOSE_SQUARE, "expected %<%>"); + return nreverse (attributes); +diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi +index 674db2f1a..3cfecee53 100644 +--- a/gcc/doc/extend.texi ++++ b/gcc/doc/extend.texi +@@ -11726,10 +11726,29 @@ macros to replace them with the customary keywords. It looks like this: + @findex __extension__ + @opindex pedantic + @option{-pedantic} and other options cause warnings for many GNU C extensions. +-You can +-prevent such warnings within one expression by writing +-@code{__extension__} before the expression. @code{__extension__} has no +-effect aside from this. ++You can suppress such warnings using the keyword @code{__extension__}. ++Specifically: ++ ++@itemize @bullet ++@item ++Writing @code{__extension__} before an expression prevents warnings ++about extensions within that expression. ++ ++@item ++In C, writing: ++ ++@smallexample ++__extension__ @dots{} ++@end smallexample ++ ++suppresses warnings about using @samp{} attributes in C versions ++that predate C2X@. Since the scope token @samp{::} is not a single ++lexing token in earlier versions of C, this construct also allows two colons ++to be used in place of @code{::}. GCC does not check whether the two ++colons are immediately adjacent. ++@end itemize ++ ++@code{__extension__} has no effect aside from this. + + @node Incomplete Enums + @section Incomplete @code{enum} Types +diff --git a/gcc/testsuite/gcc.dg/c2x-attr-syntax-6.c b/gcc/testsuite/gcc.dg/c2x-attr-syntax-6.c +new file mode 100644 +index 000000000..9e5f65ce4 +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/c2x-attr-syntax-6.c +@@ -0,0 +1,62 @@ ++/* Test C2x attribute syntax: use of __extension__ in C11 mode. */ ++/* { dg-do compile } */ ++/* { dg-options "-std=c11 -pedantic-errors" } */ ++ ++#define FOO :: ++#define BAR : ++#define JOIN(A, B) A/**/B ++#define JOIN2(A, B) A##B ++ ++typedef int __extension__ gnu::vector_size (4) g1; ++typedef int __extension__ gnu :: vector_size (4) g2; ++typedef int __extension__ gnu : : vector_size (4) g3; ++typedef int __extension__ gnu: :vector_size (4) g4; ++typedef int __extension__ gnu FOO vector_size (4) g5; ++typedef int __extension__ gnu BAR BAR vector_size (4) g6; ++typedef int __extension__ gnu :/**/: vector_size (4) g7; ++typedef int __extension__ gnu JOIN(:,:) vector_size (4) g8; ++typedef int __extension__ gnu :: vector_size (sizeof (void (*)(...))) g10; ++typedef int __extension__ g11; ++typedef int __extension__, g12; ++typedef int __extension__, ,,,, ,, , g13; ++__extension__ deprecated int g14 (); ++__extension__ nodiscard int g15 (); ++__extension__ noreturn void g16 (); ++ ++int ++cases (int x) ++{ ++ switch (x) ++ { ++ case 1: ++ case 2: ++ case 4: ++ x += 1; ++ __extension__ fallthrough; ++ case 19: ++ case 33: ++ x *= 2; ++ fallthrough; /* { dg-error {attributes before C2X} } */ ++ case 99: ++ return x; ++ ++ default: ++ return 0; ++ } ++} ++ ++typedef int __extension__ vector_size (4) b1; /* { dg-error {'vector_size' attribute ignored} } */ ++typedef int __extension__ __extension__ b2; /* { dg-error {'extension' attribute ignored} } */ ++typedef int __extension__ unknown_attribute b3; /* { dg-error {'unknown_attribute' attribute ignored} } */ ++typedef int __extension__ gnu:vector_size(4) b4; /* { dg-error {expected '\' before ':'} } */ ++/* { dg-error {'gnu' attribute ignored} "" { target *-*-* } .-1 } */ ++typedef int __extension__ gnu JOIN2(:,:) vector_size (4) b5; /* { dg-error {pasting ":" and ":" does not give a valid preprocessing token} } */ ++typedef int gnu::vector_size(4) b6; /* { dg-error {expected '\' before ':'} } */ ++/* { dg-error {'gnu' attribute ignored} "" { target *-*-* } .-1 } */ ++/* { dg-error {attributes before C2X} "" { target *-*-* } .-2 } */ ++typedef int gnu : : vector_size(4) b7; /* { dg-error {expected '\' before ':'} } */ ++/* { dg-error {'gnu' attribute ignored} "" { target *-*-* } .-1 } */ ++/* { dg-error {attributes before C2X} "" { target *-*-* } .-2 } */ ++typedef int gnu : vector_size(4) b8; /* { dg-error {expected '\' before ':'} } */ ++/* { dg-error {'gnu' attribute ignored} "" { target *-*-* } .-1 } */ ++/* { dg-error {attributes before C2X} "" { target *-*-* } .-2 } */ +diff --git a/gcc/testsuite/gcc.dg/c2x-attr-syntax-7.c b/gcc/testsuite/gcc.dg/c2x-attr-syntax-7.c +new file mode 100644 +index 000000000..702f733b1 +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/c2x-attr-syntax-7.c +@@ -0,0 +1,60 @@ ++/* Test C2x attribute syntax: use of __extension__ in C11 mode. */ ++/* { dg-do compile } */ ++/* { dg-options "-std=c2x -pedantic-errors -Wc11-c2x-compat" } */ ++ ++#define FOO :: ++#define BAR : ++#define JOIN(A, B) A/**/B ++#define JOIN2(A, B) A##B ++ ++typedef int __extension__ gnu::vector_size (4) g1; ++typedef int __extension__ gnu :: vector_size (4) g2; ++typedef int __extension__ gnu : : vector_size (4) g3; ++typedef int __extension__ gnu: :vector_size (4) g4; ++typedef int __extension__ gnu FOO vector_size (4) g5; ++typedef int __extension__ gnu BAR BAR vector_size (4) g6; ++typedef int __extension__ gnu :/**/: vector_size (4) g7; ++typedef int __extension__ gnu JOIN(:,:) vector_size (4) g8; ++typedef int __extension__ gnu :: vector_size (sizeof (void (*)(...))) g10; ++typedef int __extension__ g11; ++typedef int __extension__, g12; ++typedef int __extension__, ,,,, ,, , g13; ++__extension__ deprecated int g14 (); ++__extension__ nodiscard int g15 (); ++__extension__ noreturn void g16 (); ++ ++int ++cases (int x) ++{ ++ switch (x) ++ { ++ case 1: ++ case 2: ++ case 4: ++ x += 1; ++ __extension__ fallthrough; ++ case 19: ++ case 33: ++ x *= 2; ++ fallthrough; /* { dg-warning {attributes before C2X} } */ ++ case 99: ++ return x; ++ ++ default: ++ return 0; ++ } ++} ++ ++typedef int __extension__ vector_size (4) b1; /* { dg-error {'vector_size' attribute ignored} } */ ++typedef int __extension__ __extension__ b2; /* { dg-error {'extension' attribute ignored} } */ ++typedef int __extension__ unknown_attribute b3; /* { dg-error {'unknown_attribute' attribute ignored} } */ ++typedef int __extension__ gnu:vector_size(4) b4; /* { dg-error {expected '\' before ':'} } */ ++/* { dg-error {'gnu' attribute ignored} "" { target *-*-* } .-1 } */ ++typedef int __extension__ gnu JOIN2(:,:) vector_size (4) b5; ++typedef int gnu::vector_size(4) b6; /* { dg-warning {attributes before C2X} } */ ++typedef int gnu : : vector_size(4) b7; /* { dg-error {expected '\' before ':'} } */ ++/* { dg-error {'gnu' attribute ignored} "" { target *-*-* } .-1 } */ ++/* { dg-warning {attributes before C2X} "" { target *-*-* } .-2 } */ ++typedef int gnu : vector_size(4) b8; /* { dg-error {expected '\' before ':'} } */ ++/* { dg-error {'gnu' attribute ignored} "" { target *-*-* } .-1 } */ ++/* { dg-warning {attributes before C2X} "" { target *-*-* } .-2 } */ +-- +2.33.0 +
View file
_service:tar_scm:0233-Backport-SME-lra-Updates-of-biggest-mode-for-hard-re.patch
Added
@@ -0,0 +1,140 @@ +From 29a71fc5cbfc3b5e4649abf51740daed5ea243bd Mon Sep 17 00:00:00 2001 +From: Richard Sandiford <richard.sandiford@arm.com> +Date: Tue, 5 Dec 2023 09:20:55 +0000 +Subject: PATCH 134/157 BackportSME lra: Updates of biggest mode for hard + regs PR112278 + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=6e2e0ce6795c863e295eb33559f8dc0500297da3 + +LRA keeps track of the biggest mode for both hard registers and +pseudos. The updates assume that the modes are ordered, i.e. that +we can tell whether one is no bigger than the other at compile time. + +That is (or at least seemed to be) a reasonable restriction for pseudos. +But it isn't necessarily so for hard registers, since the uses of hard +registers can be logically distinct. The testcase is an example of this. + +The biggest mode of hard registers is also special for other reasons. +As the existing comment says: + + /* A reg can have a biggest_mode of VOIDmode if it was only ever seen as + part of a multi-word register. In that case, just use the reg_rtx + mode. Do the same also if the biggest mode was larger than a register + or we can not compare the modes. Otherwise, limit the size to that of + the biggest access in the function or to the natural mode at least. */ + +This patch applies the same approach to the updates. + +gcc/ + PR rtl-optimization/112278 + * lra-int.h (lra_update_biggest_mode): New function. + * lra-coalesce.cc (merge_pseudos): Use it. + * lra-lives.cc (process_bb_lives): Likewise. + * lra.cc (new_insn_reg): Likewise. + +gcc/testsuite/ + PR rtl-optimization/112278 + * gcc.target/aarch64/sve/pr112278.c: New test. +--- + gcc/lra-coalesce.cc | 4 +--- + gcc/lra-int.h | 15 +++++++++++++++ + gcc/lra-lives.cc | 4 +--- + gcc/lra.cc | 5 ++--- + gcc/testsuite/gcc.target/aarch64/sve/pr112278.c | 15 +++++++++++++++ + 5 files changed, 34 insertions(+), 9 deletions(-) + create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/pr112278.c + +diff --git a/gcc/lra-coalesce.cc b/gcc/lra-coalesce.cc +index c82934569..901a44663 100644 +--- a/gcc/lra-coalesce.cc ++++ b/gcc/lra-coalesce.cc +@@ -112,9 +112,7 @@ merge_pseudos (int regno1, int regno2) + = (lra_merge_live_ranges + (lra_reg_infofirst.live_ranges, + lra_copy_live_range_list (lra_reg_infofirst2.live_ranges))); +- if (partial_subreg_p (lra_reg_infofirst.biggest_mode, +- lra_reg_infofirst2.biggest_mode)) +- lra_reg_infofirst.biggest_mode = lra_reg_infofirst2.biggest_mode; ++ lra_update_biggest_mode (first, lra_reg_infofirst2.biggest_mode); + } + + /* Change pseudos in *LOC on their coalescing group +diff --git a/gcc/lra-int.h b/gcc/lra-int.h +index 04baefef3..040e87d11 100644 +--- a/gcc/lra-int.h ++++ b/gcc/lra-int.h +@@ -525,4 +525,19 @@ lra_assign_reg_val (int from, int to) + lra_reg_infoto.offset = lra_reg_infofrom.offset; + } + ++/* Update REGNO's biggest recorded mode so that it includes a reference ++ in mode MODE. */ ++inline void ++lra_update_biggest_mode (int regno, machine_mode mode) ++{ ++ if (!ordered_p (GET_MODE_SIZE (lra_reg_inforegno.biggest_mode), ++ GET_MODE_SIZE (mode))) ++ { ++ gcc_checking_assert (HARD_REGISTER_NUM_P (regno)); ++ lra_reg_inforegno.biggest_mode = reg_raw_moderegno; ++ } ++ else if (partial_subreg_p (lra_reg_inforegno.biggest_mode, mode)) ++ lra_reg_inforegno.biggest_mode = mode; ++} ++ + #endif /* GCC_LRA_INT_H */ +diff --git a/gcc/lra-lives.cc b/gcc/lra-lives.cc +index a755464ee..fb4a12304 100644 +--- a/gcc/lra-lives.cc ++++ b/gcc/lra-lives.cc +@@ -770,9 +770,7 @@ process_bb_lives (basic_block bb, int &curr_point, bool dead_insn_p) + { + int regno = reg->regno; + +- if (partial_subreg_p (lra_reg_inforegno.biggest_mode, +- reg->biggest_mode)) +- lra_reg_inforegno.biggest_mode = reg->biggest_mode; ++ lra_update_biggest_mode (regno, reg->biggest_mode); + if (HARD_REGISTER_NUM_P (regno)) + lra_hard_reg_usageregno += freq; + } +diff --git a/gcc/lra.cc b/gcc/lra.cc +index 1444cb759..8fda432f1 100644 +--- a/gcc/lra.cc ++++ b/gcc/lra.cc +@@ -559,9 +559,8 @@ new_insn_reg (rtx_insn *insn, int regno, enum op_type type, + lra_insn_reg *ir = lra_insn_reg_pool.allocate (); + ir->type = type; + ir->biggest_mode = mode; +- if (NONDEBUG_INSN_P (insn) +- && partial_subreg_p (lra_reg_inforegno.biggest_mode, mode)) +- lra_reg_inforegno.biggest_mode = mode; ++ if (NONDEBUG_INSN_P (insn)) ++ lra_update_biggest_mode (regno, mode); + ir->subreg_p = subreg_p; + ir->early_clobber_alts = early_clobber_alts; + ir->regno = regno; +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pr112278.c b/gcc/testsuite/gcc.target/aarch64/sve/pr112278.c +new file mode 100644 +index 000000000..4f56add2b +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/pr112278.c +@@ -0,0 +1,15 @@ ++#include <arm_neon.h> ++#include <arm_sve.h> ++ ++void ++f (void) ++{ ++ { ++ register svint8_t v0 asm ("z0"); ++ asm volatile ("" : "=w" (v0)); ++ } ++ { ++ register int8x8x4_t v0 asm ("v0"); ++ asm volatile ("" : "=w" (v0)); ++ } ++} +-- +2.33.0 +
View file
_service:tar_scm:0234-Backport-SME-c-Support-C2x-empty-initializer-braces.patch
Added
@@ -0,0 +1,672 @@ +From 0a34bb6b18cdf34cb9d4f34b1697e1bcfcff139b Mon Sep 17 00:00:00 2001 +From: Joseph Myers <joseph@codesourcery.com> +Date: Thu, 25 Aug 2022 21:02:57 +0000 +Subject: PATCH 135/157 BackportSME c: Support C2x empty initializer + braces + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=14cfa01755a66afbae2539f8b5796c960ddcecc6 + +ISO C2x standardizes empty initializer braces {}. Implement this +feature accordingly. The basic case was already supported and so just +needed diagnostic adjustments. However, the standard feature also +includes two cases that were not previously supported: empty +initializer braces for scalars, and empty initializer braces for +VLAs. Thus, add support for those features as well, updating existing +tests that expected them to be diagnosed. + +There was already some gimplifier support for converting +variable-sized initializations with empty CONSTRUCTORs to memset. +However, it didn't apply here; code earlier in gimplify_modify_expr +ended up calling gimplify_init_constructor via +gimplify_modify_expr_rhs, which ended up handling the CONSTRUCTOR in a +way that generated an ICE later. Add a check for this case earlier in +gimplify_modify_expr to avoid that issue. + +Bootstrapped with no regressions for x86_64-pc-linux-gnu. + +gcc/ + * gimplify.cc (gimplify_modify_expr): Convert initialization from + a variable-size CONSTRUCTOR to memset before call to + gimplify_modify_expr_rhs. + +gcc/c/ + * c-decl.cc (start_decl): Do not diagnose initialization of + variable-sized objects here. + * c-parser.cc (c_parser_braced_init): Add argument DECL. All + callers changed. + (c_parser_initializer): Diagnose initialization of variable-sized + objects other than with braced initializer. + (c_parser_braced_init): Use pedwarn_c11 for empty initializer + braces and update diagnostic text. Diagnose initialization of + variable-sized objects with nonempty braces. + * c-typeck.cc (digest_init): Update diagnostic for initialization + of variable-sized objects. + (really_start_incremental_init, set_designator) + (process_init_element): Update comments. + (pop_init_level): Allow scalar empty initializers. + +gcc/testsuite/ + * gcc.dg/c11-empty-init-1.c, gcc.dg/c11-empty-init-2.c, + gcc.dg/c11-empty-init-3.c, gcc.dg/c2x-empty-init-1.c, + gcc.dg/c2x-empty-init-2.c, gcc.dg/c2x-empty-init-3.c, + gcc.dg/gnu2x-empty-init-1.c, gcc.dg/gnu2x-empty-init-2.c: New + tests. + * gcc.dg/torture/dfp-default-init-1.c: Also test empty + initializers. + * gcc.dg/init-bad-1.c, gcc.dg/noncompile/pr71583.c, + gcc.dg/pr61096-1.c, gcc.dg/vla-init-2.c, gcc.dg/vla-init-3.c, + gcc.target/i386/sse2-bfloat16-scalar-typecheck.c: Update expected + diagnostics. + * gcc.dg/ubsan/c-shift-1.c: Use nonempty initializers for VLA + initializations expected to be diagnosed. +--- + gcc/c/c-decl.cc | 20 +----- + gcc/c/c-parser.cc | 24 +++++-- + gcc/c/c-typeck.cc | 23 ++++--- + gcc/gimplify.cc | 15 +++++ + gcc/testsuite/gcc.dg/c11-empty-init-1.c | 25 +++++++ + gcc/testsuite/gcc.dg/c11-empty-init-2.c | 25 +++++++ + gcc/testsuite/gcc.dg/c11-empty-init-3.c | 25 +++++++ + gcc/testsuite/gcc.dg/c2x-empty-init-1.c | 80 +++++++++++++++++++++++ + gcc/testsuite/gcc.dg/c2x-empty-init-2.c | 18 +++++ + gcc/testsuite/gcc.dg/c2x-empty-init-3.c | 25 +++++++ + gcc/testsuite/gcc.dg/gnu2x-empty-init-1.c | 29 ++++++++ + gcc/testsuite/gcc.dg/gnu2x-empty-init-2.c | 16 +++++ + gcc/testsuite/gcc.dg/init-bad-1.c | 3 +- + gcc/testsuite/gcc.dg/noncompile/pr71583.c | 2 +- + gcc/testsuite/gcc.dg/pr61096-1.c | 2 +- + gcc/testsuite/gcc.dg/ubsan/c-shift-1.c | 12 ++-- + gcc/testsuite/gcc.dg/vla-init-2.c | 1 - + gcc/testsuite/gcc.dg/vla-init-3.c | 1 - + 18 files changed, 301 insertions(+), 45 deletions(-) + create mode 100644 gcc/testsuite/gcc.dg/c11-empty-init-1.c + create mode 100644 gcc/testsuite/gcc.dg/c11-empty-init-2.c + create mode 100644 gcc/testsuite/gcc.dg/c11-empty-init-3.c + create mode 100644 gcc/testsuite/gcc.dg/c2x-empty-init-1.c + create mode 100644 gcc/testsuite/gcc.dg/c2x-empty-init-2.c + create mode 100644 gcc/testsuite/gcc.dg/c2x-empty-init-3.c + create mode 100644 gcc/testsuite/gcc.dg/gnu2x-empty-init-1.c + create mode 100644 gcc/testsuite/gcc.dg/gnu2x-empty-init-2.c + +diff --git a/gcc/c/c-decl.cc b/gcc/c/c-decl.cc +index 9d87a8cdb..685bb1757 100644 +--- a/gcc/c/c-decl.cc ++++ b/gcc/c/c-decl.cc +@@ -5166,29 +5166,15 @@ start_decl (struct c_declarator *declarator, struct c_declspecs *declspecs, + initialized = false; + else if (COMPLETE_TYPE_P (TREE_TYPE (decl))) + { +- /* A complete type is ok if size is fixed. */ +- +- if (!poly_int_tree_p (TYPE_SIZE (TREE_TYPE (decl))) +- || C_DECL_VARIABLE_SIZE (decl)) +- { +- error ("variable-sized object may not be initialized"); +- initialized = false; +- } ++ /* A complete type is ok if size is fixed. If the size is ++ variable, an empty initializer is OK and nonempty ++ initializers will be diagnosed in the parser. */ + } + else if (TREE_CODE (TREE_TYPE (decl)) != ARRAY_TYPE) + { + error ("variable %qD has initializer but incomplete type", decl); + initialized = false; + } +- else if (C_DECL_VARIABLE_SIZE (decl)) +- { +- /* Although C99 is unclear about whether incomplete arrays +- of VLAs themselves count as VLAs, it does not make +- sense to permit them to be initialized given that +- ordinary VLAs may not be initialized. */ +- error ("variable-sized object may not be initialized"); +- initialized = false; +- } + } + + if (initialized) +diff --git a/gcc/c/c-parser.cc b/gcc/c/c-parser.cc +index 486f46e1c..6db535d11 100644 +--- a/gcc/c/c-parser.cc ++++ b/gcc/c/c-parser.cc +@@ -1515,7 +1515,7 @@ static tree c_parser_simple_asm_expr (c_parser *); + static tree c_parser_gnu_attributes (c_parser *); + static struct c_expr c_parser_initializer (c_parser *, tree); + static struct c_expr c_parser_braced_init (c_parser *, tree, bool, +- struct obstack *); ++ struct obstack *, tree); + static void c_parser_initelt (c_parser *, struct obstack *); + static void c_parser_initval (c_parser *, struct c_expr *, + struct obstack *); +@@ -5247,11 +5247,15 @@ static struct c_expr + c_parser_initializer (c_parser *parser, tree decl) + { + if (c_parser_next_token_is (parser, CPP_OPEN_BRACE)) +- return c_parser_braced_init (parser, NULL_TREE, false, NULL); ++ return c_parser_braced_init (parser, NULL_TREE, false, NULL, decl); + else + { + struct c_expr ret; + location_t loc = c_parser_peek_token (parser)->location; ++ if (decl != error_mark_node && C_DECL_VARIABLE_SIZE (decl)) ++ error_at (loc, ++ "variable-sized object may not be initialized except " ++ "with an empty initializer"); + ret = c_parser_expr_no_commas (parser, NULL); + /* This is handled mostly by gimplify.cc, but we have to deal with + not warning about int x = x; as it is a GCC extension to turn off +@@ -5278,11 +5282,12 @@ location_t last_init_list_comma; + compound literal, and NULL_TREE for other initializers and for + nested braced lists. NESTED_P is true for nested braced lists, + false for the list of a compound literal or the list that is the +- top-level initializer in a declaration. */ ++ top-level initializer in a declaration. DECL is the declaration for ++ the top-level initializer for a declaration, otherwise NULL_TREE. */ + + static struct c_expr + c_parser_braced_init (c_parser *parser, tree type, bool nested_p, +- struct obstack *outer_obstack) ++ struct obstack *outer_obstack, tree decl) + { + struct c_expr ret; + struct obstack braced_init_obstack; +@@ -5300,10 +5305,15 @@ c_parser_braced_init (c_parser *parser, tree type, bool nested_p, + really_start_incremental_init (type); + if (c_parser_next_token_is (parser, CPP_CLOSE_BRACE)) + { +- pedwarn (brace_loc, OPT_Wpedantic, "ISO C forbids empty initializer braces"); ++ pedwarn_c11 (brace_loc, OPT_Wpedantic, ++ "ISO C forbids empty initializer braces before C2X"); + } + else + { ++ if (decl && decl != error_mark_node && C_DECL_VARIABLE_SIZE (decl)) ++ error_at (brace_loc, ++ "variable-sized object may not be initialized except " ++ "with an empty initializer"); + /* Parse a non-empty initializer list, possibly with a trailing + comma. */ + while (true) +@@ -5559,7 +5569,7 @@ c_parser_initval (c_parser *parser, struct c_expr *after, + + if (c_parser_next_token_is (parser, CPP_OPEN_BRACE) && !after) + init = c_parser_braced_init (parser, NULL_TREE, true, +- braced_init_obstack); ++ braced_init_obstack, NULL_TREE); + else + { + init = c_parser_expr_no_commas (parser, after); +@@ -10312,7 +10322,7 @@ c_parser_postfix_expression_after_paren_type (c_parser *parser, + error_at (type_loc, "compound literal has variable size"); + type = error_mark_node; + } +- init = c_parser_braced_init (parser, type, false, NULL); ++ init = c_parser_braced_init (parser, type, false, NULL, NULL_TREE); + finish_init (); + maybe_warn_string_init (type_loc, type, init); + +diff --git a/gcc/c/c-typeck.cc b/gcc/c/c-typeck.cc +index 603b03fe1..0889dd4cb 100644 +--- a/gcc/c/c-typeck.cc ++++ b/gcc/c/c-typeck.cc +@@ -8267,7 +8267,9 @@ digest_init (location_t init_loc, tree type, tree init, tree origtype, + + if (COMPLETE_TYPE_P (type) && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST) + { +- error_init (init_loc, "variable-sized object may not be initialized"); ++ error_init (init_loc, ++ "variable-sized object may not be initialized except " ++ "with an empty initializer"); + return error_mark_node; + } + +@@ -8617,8 +8619,9 @@ really_start_incremental_init (tree type) + constructor_max_index = integer_minus_one_node; + + /* constructor_max_index needs to be an INTEGER_CST. Attempts +- to initialize VLAs will cause a proper error; avoid tree +- checking errors as well by setting a safe value. */ ++ to initialize VLAs with a nonempty initializer will cause a ++ proper error; avoid tree checking errors as well by setting a ++ safe value. */ + if (constructor_max_index + && TREE_CODE (constructor_max_index) != INTEGER_CST) + constructor_max_index = integer_minus_one_node; +@@ -9000,12 +9003,14 @@ pop_init_level (location_t loc, int implicit, + && !gnu_vector_type_p (constructor_type)) + { + /* A nonincremental scalar initializer--just return +- the element, after verifying there is just one. */ ++ the element, after verifying there is just one. ++ Empty scalar initializers are supported in C2X. */ + if (vec_safe_is_empty (constructor_elements)) + { +- if (!constructor_erroneous && constructor_type != error_mark_node) +- error_init (loc, "empty scalar initializer"); +- ret.value = error_mark_node; ++ if (constructor_erroneous || constructor_type == error_mark_node) ++ ret.value = error_mark_node; ++ else ++ ret.value = build_zero_cst (constructor_type); + } + else if (vec_safe_length (constructor_elements) != 1) + { +@@ -9090,7 +9095,7 @@ set_designator (location_t loc, bool array, + return true; + + /* Likewise for an initializer for a variable-size type. Those are +- diagnosed in digest_init. */ ++ diagnosed in the parser, except for empty initializer braces. */ + if (COMPLETE_TYPE_P (constructor_type) + && TREE_CODE (TYPE_SIZE (constructor_type)) != INTEGER_CST) + return true; +@@ -10251,7 +10256,7 @@ process_init_element (location_t loc, struct c_expr value, bool implicit, + return; + + /* Ignore elements of an initializer for a variable-size type. +- Those are diagnosed in digest_init. */ ++ Those are diagnosed in the parser (empty initializer braces are OK). */ + if (COMPLETE_TYPE_P (constructor_type) + && !poly_int_tree_p (TYPE_SIZE (constructor_type))) + return; +diff --git a/gcc/gimplify.cc b/gcc/gimplify.cc +index a551c574a..91500e2fb 100644 +--- a/gcc/gimplify.cc ++++ b/gcc/gimplify.cc +@@ -6026,6 +6026,21 @@ gimplify_modify_expr (tree *expr_p, gimple_seq *pre_p, gimple_seq *post_p, + return GS_ALL_DONE; + } + ++ /* Convert initialization from an empty variable-size CONSTRUCTOR to ++ memset. */ ++ if (TREE_TYPE (*from_p) != error_mark_node ++ && TYPE_SIZE_UNIT (TREE_TYPE (*from_p)) ++ && !poly_int_tree_p (TYPE_SIZE_UNIT (TREE_TYPE (*from_p))) ++ && TREE_CODE (*from_p) == CONSTRUCTOR ++ && CONSTRUCTOR_NELTS (*from_p) == 0) ++ { ++ maybe_with_size_expr (from_p); ++ gcc_assert (TREE_CODE (*from_p) == WITH_SIZE_EXPR); ++ return gimplify_modify_expr_to_memset (expr_p, ++ TREE_OPERAND (*from_p, 1), ++ want_value, pre_p); ++ } ++ + /* Insert pointer conversions required by the middle-end that are not + required by the frontend. This fixes middle-end type checking for + for example gcc.dg/redecl-6.c. */ +diff --git a/gcc/testsuite/gcc.dg/c11-empty-init-1.c b/gcc/testsuite/gcc.dg/c11-empty-init-1.c +new file mode 100644 +index 000000000..120c28225 +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/c11-empty-init-1.c +@@ -0,0 +1,25 @@ ++/* Test C11 does not support empty initializers. */ ++/* { dg-do compile } */ ++/* { dg-options "-std=c11 -pedantic-errors" } */ ++ ++struct s { int a; }; ++struct s s = {}; /* { dg-error "empty initializer" } */ ++int x = {}; /* { dg-error "empty initializer" } */ ++float y = {}; /* { dg-error "empty initializer" } */ ++void *p = {}; /* { dg-error "empty initializer" } */ ++union u { int a; long b; }; ++union u z = {}; /* { dg-error "empty initializer" } */ ++int aa2 = {}; /* { dg-error "empty initializer" } */ ++ ++void ++f (int a) ++{ ++ int vlaa = {}; /* { dg-error "empty initializer" } */ ++ struct s as = {}; /* { dg-error "empty initializer" } */ ++ int ax = {}; /* { dg-error "empty initializer" } */ ++ float ay = {}; /* { dg-error "empty initializer" } */ ++ void *ap = {}; /* { dg-error "empty initializer" } */ ++ union u az = {}; /* { dg-error "empty initializer" } */ ++ int aaa2 = {}; /* { dg-error "empty initializer" } */ ++ int t = (int) {}; /* { dg-error "empty initializer" } */ ++} +diff --git a/gcc/testsuite/gcc.dg/c11-empty-init-2.c b/gcc/testsuite/gcc.dg/c11-empty-init-2.c +new file mode 100644 +index 000000000..3ec7c512a +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/c11-empty-init-2.c +@@ -0,0 +1,25 @@ ++/* Test C11 does not support empty initializers. */ ++/* { dg-do compile } */ ++/* { dg-options "-std=c11 -pedantic" } */ ++ ++struct s { int a; }; ++struct s s = {}; /* { dg-warning "empty initializer" } */ ++int x = {}; /* { dg-warning "empty initializer" } */ ++float y = {}; /* { dg-warning "empty initializer" } */ ++void *p = {}; /* { dg-warning "empty initializer" } */ ++union u { int a; long b; }; ++union u z = {}; /* { dg-warning "empty initializer" } */ ++int aa2 = {}; /* { dg-warning "empty initializer" } */ ++ ++void ++f (int a) ++{ ++ int vlaa = {}; /* { dg-warning "empty initializer" } */ ++ struct s as = {}; /* { dg-warning "empty initializer" } */ ++ int ax = {}; /* { dg-warning "empty initializer" } */ ++ float ay = {}; /* { dg-warning "empty initializer" } */ ++ void *ap = {}; /* { dg-warning "empty initializer" } */ ++ union u az = {}; /* { dg-warning "empty initializer" } */ ++ int aaa2 = {}; /* { dg-warning "empty initializer" } */ ++ int t = (int) {}; /* { dg-warning "empty initializer" } */ ++} +diff --git a/gcc/testsuite/gcc.dg/c11-empty-init-3.c b/gcc/testsuite/gcc.dg/c11-empty-init-3.c +new file mode 100644 +index 000000000..fd43fa789 +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/c11-empty-init-3.c +@@ -0,0 +1,25 @@ ++/* Test C11 does not support empty initializers. */ ++/* { dg-do compile } */ ++/* { dg-options "-std=c11 -Wc11-c2x-compat" } */ ++ ++struct s { int a; }; ++struct s s = {}; /* { dg-warning "empty initializer" } */ ++int x = {}; /* { dg-warning "empty initializer" } */ ++float y = {}; /* { dg-warning "empty initializer" } */ ++void *p = {}; /* { dg-warning "empty initializer" } */ ++union u { int a; long b; }; ++union u z = {}; /* { dg-warning "empty initializer" } */ ++int aa2 = {}; /* { dg-warning "empty initializer" } */ ++ ++void ++f (int a) ++{ ++ int vlaa = {}; /* { dg-warning "empty initializer" } */ ++ struct s as = {}; /* { dg-warning "empty initializer" } */ ++ int ax = {}; /* { dg-warning "empty initializer" } */ ++ float ay = {}; /* { dg-warning "empty initializer" } */ ++ void *ap = {}; /* { dg-warning "empty initializer" } */ ++ union u az = {}; /* { dg-warning "empty initializer" } */ ++ int aaa2 = {}; /* { dg-warning "empty initializer" } */ ++ int t = (int) {}; /* { dg-warning "empty initializer" } */ ++} +diff --git a/gcc/testsuite/gcc.dg/c2x-empty-init-1.c b/gcc/testsuite/gcc.dg/c2x-empty-init-1.c +new file mode 100644 +index 000000000..1487a2b23 +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/c2x-empty-init-1.c +@@ -0,0 +1,80 @@ ++/* Test C2X support for empty initializers: valid use cases. */ ++/* { dg-do run } */ ++/* { dg-options "-std=c2x -pedantic-errors" } */ ++ ++extern void exit (int); ++extern void abort (void); ++ ++struct s { int a; }; ++struct s s = {}; ++int x = {}; ++float y = {}; ++void *p = {}; ++union u { int a; long b; }; ++union u z = {}; ++int aa2 = {}; ++ ++void ++f (int a) ++{ ++ volatile int vlaa = {}; ++ struct s as = {}; ++ int ax = {}; ++ float ay = {}; ++ void *ap = {}; ++ union u az = {}; ++ int aaa2 = {}; ++ for (int i = 0; i < a; i++) ++ if (vlai != 0) ++ abort (); ++ if (as.a != 0) ++ abort (); ++ if (ax != 0) ++ abort (); ++ if (ay != 0) ++ abort (); ++ if (ap != 0) ++ abort (); ++ if (az.a != 0) ++ abort (); ++ if (aaa0 != 0) ++ abort (); ++ if (aaa1 != 0) ++ abort (); ++ if ((int) {} != 0) ++ abort (); ++ if ((float) {} != 0) ++ abort (); ++ if ((struct s) {}.a != 0) ++ abort (); ++ if ((union u) {}.a != 0) ++ abort (); ++ if ((int 5) {}2 != 0) ++ abort (); ++ /* Overwrite contents of vla before second call to make it more likely stack ++ contents are nonzero if proper initialization did not occur. */ ++ for (int i = 0; i < a; i++) ++ vlai = -1; ++} ++ ++int ++main (void) ++{ ++ f (100); ++ f (100); ++ if (s.a != 0) ++ abort (); ++ if (x != 0) ++ abort (); ++ if (y != 0) ++ abort (); ++ if (p != 0) ++ abort (); ++ if (z.a != 0) ++ abort (); ++ if (aa0 != 0) ++ abort (); ++ if (aa1 != 0) ++ abort (); ++ exit (0); ++} +diff --git a/gcc/testsuite/gcc.dg/c2x-empty-init-2.c b/gcc/testsuite/gcc.dg/c2x-empty-init-2.c +new file mode 100644 +index 000000000..0dc81ce5b +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/c2x-empty-init-2.c +@@ -0,0 +1,18 @@ ++/* Test C2X support for empty initializers: invalid use cases. */ ++/* { dg-do compile } */ ++/* { dg-options "-std=c2x -pedantic-errors" } */ ++ ++/* Empty initialization is invalid for arrays of unknown size. This is ++ diagnosed via the diagnostic for zero-size arrays. */ ++int x = {}; /* { dg-error "zero or negative size array" } */ ++ ++void ++f (int a) ++{ ++ int x1 = {}; /* { dg-error "zero or negative size array" } */ ++ int x2a = {}; /* { dg-error "zero or negative size array" } */ ++ /* Nonempty VLA initializers are still invalid. */ ++ int x3a = { 0 }; /* { dg-error "variable-sized object may not be initialized except with an empty initializer" } */ ++ /* Variable-size compound literals are still invalid. */ ++ (void) (int a) {}; /* { dg-error "compound literal has variable size" } */ ++} +diff --git a/gcc/testsuite/gcc.dg/c2x-empty-init-3.c b/gcc/testsuite/gcc.dg/c2x-empty-init-3.c +new file mode 100644 +index 000000000..472f8169c +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/c2x-empty-init-3.c +@@ -0,0 +1,25 @@ ++/* Test empty initializers diagnosed in C2X mode with -Wc11-c2x-compat. */ ++/* { dg-do compile } */ ++/* { dg-options "-std=c2x -Wc11-c2x-compat" } */ ++ ++struct s { int a; }; ++struct s s = {}; /* { dg-warning "empty initializer" } */ ++int x = {}; /* { dg-warning "empty initializer" } */ ++float y = {}; /* { dg-warning "empty initializer" } */ ++void *p = {}; /* { dg-warning "empty initializer" } */ ++union u { int a; long b; }; ++union u z = {}; /* { dg-warning "empty initializer" } */ ++int aa2 = {}; /* { dg-warning "empty initializer" } */ ++ ++void ++f (int a) ++{ ++ int vlaa = {}; /* { dg-warning "empty initializer" } */ ++ struct s as = {}; /* { dg-warning "empty initializer" } */ ++ int ax = {}; /* { dg-warning "empty initializer" } */ ++ float ay = {}; /* { dg-warning "empty initializer" } */ ++ void *ap = {}; /* { dg-warning "empty initializer" } */ ++ union u az = {}; /* { dg-warning "empty initializer" } */ ++ int aaa2 = {}; /* { dg-warning "empty initializer" } */ ++ int t = (int) {}; /* { dg-warning "empty initializer" } */ ++} +diff --git a/gcc/testsuite/gcc.dg/gnu2x-empty-init-1.c b/gcc/testsuite/gcc.dg/gnu2x-empty-init-1.c +new file mode 100644 +index 000000000..e7dc9dfde +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/gnu2x-empty-init-1.c +@@ -0,0 +1,29 @@ ++/* Test C2X support for empty initializers: valid use cases with GNU ++ extensions. */ ++/* { dg-do run } */ ++/* { dg-options "-std=gnu2x" } */ ++ ++extern void exit (int); ++extern void abort (void); ++ ++void ++f (int a) ++{ ++ struct s { volatile int xa; }; ++ struct s b = {}; ++ for (int i = 0; i < a; i++) ++ if (b.xi != 0) ++ abort (); ++ /* Overwrite contents of b.x before second call to make it more likely stack ++ contents are nonzero if proper initialization did not occur. */ ++ for (int i = 0; i < a; i++) ++ b.xi = -1; ++} ++ ++int ++main (void) ++{ ++ f (100); ++ f (100); ++ exit (0); ++} +diff --git a/gcc/testsuite/gcc.dg/gnu2x-empty-init-2.c b/gcc/testsuite/gcc.dg/gnu2x-empty-init-2.c +new file mode 100644 +index 000000000..69ee4e36b +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/gnu2x-empty-init-2.c +@@ -0,0 +1,16 @@ ++/* Test C2X support for empty initializers: invalid use cases with GNU ++ extensions. */ ++/* { dg-do compile } */ ++/* { dg-options "-std=gnu2x" } */ ++ ++void ++f (int a) ++{ ++ /* Make sure a non-braced initializer for a VLA-in-struct is still not ++ allowed. */ ++ struct s { int xa; }; ++ struct s b; ++ for (int i = 0; i < a; i++) ++ b.xi = 0; ++ struct s c = b; /* { dg-error "variable-sized object may not be initialized except with an empty initializer" } */ ++} +diff --git a/gcc/testsuite/gcc.dg/init-bad-1.c b/gcc/testsuite/gcc.dg/init-bad-1.c +index 61734045f..0da10c315 100644 +--- a/gcc/testsuite/gcc.dg/init-bad-1.c ++++ b/gcc/testsuite/gcc.dg/init-bad-1.c +@@ -21,8 +21,7 @@ char t11 = { "xy" }; /* { dg-warning "initializer-string for array of 'char' i + char u1 = { "x", "x" }; /* { dg-error "excess elements in 'char' array initializer" } */ + /* { dg-message "near init" "near" { target *-*-* } .-1 } */ + +-int i = { }; /* { dg-error "empty scalar initializer" } */ +-/* { dg-message "near init" "near" { target *-*-* } .-1 } */ ++int i = { }; + + int j = { 1 }; + +diff --git a/gcc/testsuite/gcc.dg/noncompile/pr71583.c b/gcc/testsuite/gcc.dg/noncompile/pr71583.c +index 5045b88b6..fe6e556ad 100644 +--- a/gcc/testsuite/gcc.dg/noncompile/pr71583.c ++++ b/gcc/testsuite/gcc.dg/noncompile/pr71583.c +@@ -5,7 +5,7 @@ void + f (int i) + { + (int (*)++i) { int }; /* { dg-error "expected" } */ +- (int (*)++i) { }; /* { dg-error "empty" } */ ++ (int (*)++i) { }; + (int (*)++i) { , }; /* { dg-error "expected" } */ + (int (*)++i) { f () }; /* { dg-error "too few" } */ + } +diff --git a/gcc/testsuite/gcc.dg/pr61096-1.c b/gcc/testsuite/gcc.dg/pr61096-1.c +index e707904c0..f41789c5f 100644 +--- a/gcc/testsuite/gcc.dg/pr61096-1.c ++++ b/gcc/testsuite/gcc.dg/pr61096-1.c +@@ -36,7 +36,7 @@ struct S s = { { 1 }, { 3 } }; /* { dg-error "23:extra brace group at end of ini + struct g g1 = { {0, { 1 } } }; /* { dg-error "21:initialization of flexible array member in a nested context" } */ + struct g g2 = { .f0 = 1 }; /* { dg-error "20:array index in non-array initializer" } */ + +-__extension__ int a8 = { }; /* { dg-error "24:empty scalar initializer" } */ ++__extension__ int a8 = { }; + int a910 = {1.2 = 2 }; /* { dg-error "16:array index in initializer not of integer type" } */ + int a1010 = {e = 2 }; /* { dg-error "17:nonconstant array index in initializer" } */ + __extension__ int a1110 = {1 ... e = 1 }; /* { dg-error "31:nonconstant array index in initializer" } */ +diff --git a/gcc/testsuite/gcc.dg/ubsan/c-shift-1.c b/gcc/testsuite/gcc.dg/ubsan/c-shift-1.c +index 9d561016f..f88ee2de3 100644 +--- a/gcc/testsuite/gcc.dg/ubsan/c-shift-1.c ++++ b/gcc/testsuite/gcc.dg/ubsan/c-shift-1.c +@@ -7,12 +7,12 @@ int + main (void) + { + /* None of the following should pass. */ +- int A1 >> -1 = {}; /* { dg-error "variable-sized object may not be initialized" } */ +- int B-1 >> -1 = {}; /* { dg-error "variable-sized object may not be initialized" } */ +- int D1 << -1 = {}; /* { dg-error "variable-sized object may not be initialized" } */ +- int E-1 << -1 = {}; /* { dg-error "variable-sized object may not be initialized" } */ +- int F-1 >> 200 = {}; /* { dg-error "variable-sized object may not be initialized" } */ +- int G1 << 200 = {}; /* { dg-error "variable-sized object may not be initialized" } */ ++ int A1 >> -1 = { 0 }; /* { dg-error "variable-sized object may not be initialized" } */ ++ int B-1 >> -1 = { 0 }; /* { dg-error "variable-sized object may not be initialized" } */ ++ int D1 << -1 = { 0 }; /* { dg-error "variable-sized object may not be initialized" } */ ++ int E-1 << -1 = { 0 }; /* { dg-error "variable-sized object may not be initialized" } */ ++ int F-1 >> 200 = { 0 }; /* { dg-error "variable-sized object may not be initialized" } */ ++ int G1 << 200 = { 0 }; /* { dg-error "variable-sized object may not be initialized" } */ + + return 0; + } +diff --git a/gcc/testsuite/gcc.dg/vla-init-2.c b/gcc/testsuite/gcc.dg/vla-init-2.c +index 19fbffc26..f23630a36 100644 +--- a/gcc/testsuite/gcc.dg/vla-init-2.c ++++ b/gcc/testsuite/gcc.dg/vla-init-2.c +@@ -7,4 +7,3 @@ + + const int i = 1; + void foo() { char ai = {""}; } /* { dg-error "variable-sized object may not be initialized" } */ +-/* { dg-error "array size missing in 'a'" "extra error" { target *-*-* } .-1 } */ +diff --git a/gcc/testsuite/gcc.dg/vla-init-3.c b/gcc/testsuite/gcc.dg/vla-init-3.c +index 55e1de69c..a854f1268 100644 +--- a/gcc/testsuite/gcc.dg/vla-init-3.c ++++ b/gcc/testsuite/gcc.dg/vla-init-3.c +@@ -6,4 +6,3 @@ + /* { dg-options "" } */ + + void foo(int i) { char ai = {""}; } /* { dg-error "variable-sized object may not be initialized" } */ +-/* { dg-error "array size missing in 'a'" "extra error" { target *-*-* } .-1 } */ +-- +2.33.0 +
View file
_service:tar_scm:0235-Backport-SME-aarch64-Update-sizeless-tests-for-recen.patch
Added
@@ -0,0 +1,115 @@ +From 67001778883e10110c505dd8876a447a19d1ac5e Mon Sep 17 00:00:00 2001 +From: Richard Sandiford <richard.sandiford@arm.com> +Date: Wed, 31 Aug 2022 15:39:27 +0100 +Subject: PATCH 136/157 BackportSME aarch64: Update sizeless tests for + recent GNU C changes + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=de9805c08121a84ce368dccfe043a3f44c3ff13b + +The tests for sizeless SVE types include checks that the types +are handled for initialisation purposes in the same way as scalars. +GNU C and C2x now allow scalars to be initialised using empty braces, +so this patch updates the SVE tests to match. + +gcc/testsuite/ + * gcc.target/aarch64/sve/acle/general-c/gnu_vectors_1.c: Update + tests for empty initializers. + * gcc.target/aarch64/sve/acle/general-c/gnu_vectors_2.c: Likewise. + * gcc.target/aarch64/sve/acle/general-c/sizeless-1.c: Likewise. + * gcc.target/aarch64/sve/acle/general-c/sizeless-2.c: Likewise. +--- + .../gcc.target/aarch64/sve/acle/general-c/gnu_vectors_1.c | 4 ++-- + .../gcc.target/aarch64/sve/acle/general-c/gnu_vectors_2.c | 4 ++-- + .../gcc.target/aarch64/sve/acle/general-c/sizeless-1.c | 4 ++-- + .../gcc.target/aarch64/sve/acle/general-c/sizeless-2.c | 4 ++-- + 4 files changed, 8 insertions(+), 8 deletions(-) + +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/gnu_vectors_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/gnu_vectors_1.c +index 285751eeb..9db953583 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/gnu_vectors_1.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/gnu_vectors_1.c +@@ -12,7 +12,7 @@ f (svuint8_t sve_u1, svint8_t sve_s1, + /* Initialization. */ + + svuint8_t init_sve_u1 = 0; /* { dg-error {incompatible types when initializing type 'svuint8_t' using type 'int'} } */ +- svuint8_t init_sve_u2 = {}; /* { dg-error {empty scalar initializer} } */ ++ svuint8_t init_sve_u2 = {}; + svuint8_t init_sve_u3 = { sve_u1 }; + svuint8_t init_sve_u4 = { gnu_u1 }; + svuint8_t init_sve_u5 = { sve_s1 }; /* { dg-error {incompatible types when initializing type 'svuint8_t' using type 'svint8_t'} } */ +@@ -31,7 +31,7 @@ f (svuint8_t sve_u1, svint8_t sve_s1, + + /* Compound literals. */ + +- (svuint8_t) {}; /* { dg-error {empty scalar initializer} } */ ++ (svuint8_t) {}; + (svuint8_t) { 0 }; /* { dg-error {incompatible types when initializing type 'svuint8_t' using type 'int'} } */ + (svuint8_t) { sve_u1 }; + (svuint8_t) { gnu_u1 }; +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/gnu_vectors_2.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/gnu_vectors_2.c +index 306fd4780..c05b16406 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/gnu_vectors_2.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/gnu_vectors_2.c +@@ -12,7 +12,7 @@ f (svuint8_t sve_u1, svint8_t sve_s1, + /* Initialization. */ + + svuint8_t init_sve_u1 = 0; /* { dg-error {incompatible types when initializing type 'svuint8_t' using type 'int'} } */ +- svuint8_t init_sve_u2 = {}; /* { dg-error {empty scalar initializer} } */ ++ svuint8_t init_sve_u2 = {}; + svuint8_t init_sve_u3 = { sve_u1 }; + svuint8_t init_sve_u4 = { gnu_u1 }; + svuint8_t init_sve_u5 = { sve_s1 }; +@@ -31,7 +31,7 @@ f (svuint8_t sve_u1, svint8_t sve_s1, + + /* Compound literals. */ + +- (svuint8_t) {}; /* { dg-error {empty scalar initializer} } */ ++ (svuint8_t) {}; + (svuint8_t) { 0 }; /* { dg-error {incompatible types when initializing type 'svuint8_t' using type 'int'} } */ + (svuint8_t) { sve_u1 }; + (svuint8_t) { gnu_u1 }; +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/sizeless-1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/sizeless-1.c +index 7fc51e7ad..4b34a71c1 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/sizeless-1.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/sizeless-1.c +@@ -66,14 +66,14 @@ statements (int n) + + svint8_t init_sve_sc1 = sve_sc1; + svint8_t init_sve_sc2 = sve_sh1; /* { dg-error {incompatible types when initializing type 'svint8_t' using type 'svint16_t'} } */ +- svint8_t init_sve_sc3 = {}; /* { dg-error {empty scalar initializer} } */ ++ svint8_t init_sve_sc3 = {}; + + int initi_a = sve_sc1; /* { dg-error {incompatible types when initializing type 'int' using type 'svint8_t'} } */ + int initi_b = { sve_sc1 }; /* { dg-error {incompatible types when initializing type 'int' using type 'svint8_t'} } */ + + /* Compound literals. */ + +- (svint8_t) {}; /* { dg-error {empty scalar initializer} } */ ++ (svint8_t) {}; + (svint8_t) { sve_sc1 }; + + (int) { sve_sc1 }; /* { dg-error {incompatible types when initializing type 'int' using type 'svint8_t'} } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/sizeless-2.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/sizeless-2.c +index c575492c1..34dfd598e 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/sizeless-2.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/sizeless-2.c +@@ -66,14 +66,14 @@ statements (int n) + + svint8_t init_sve_sc1 = sve_sc1; + svint8_t init_sve_sc2 = sve_sh1; /* { dg-error {incompatible types when initializing type 'svint8_t' using type 'svint16_t'} } */ +- svint8_t init_sve_sc3 = {}; /* { dg-error {empty scalar initializer} } */ ++ svint8_t init_sve_sc3 = {}; + + int initi_a = sve_sc1; /* { dg-error {incompatible types when initializing type 'int' using type 'svint8_t'} } */ + int initi_b = { sve_sc1 }; /* { dg-error {incompatible types when initializing type 'int' using type 'svint8_t'} } */ + + /* Compound literals. */ + +- (svint8_t) {}; /* { dg-error {empty scalar initializer} } */ ++ (svint8_t) {}; + (svint8_t) { sve_sc1 }; + + (int) { sve_sc1 }; /* { dg-error {incompatible types when initializing type 'int' using type 'svint8_t'} } */ +-- +2.33.0 +
View file
_service:tar_scm:0236-Backport-SME-attribs-Namespace-aware-lookup_attribut.patch
Added
@@ -0,0 +1,58 @@ +From dbe5a29054d4eb1e0f5173c8f2291569eac71c96 Mon Sep 17 00:00:00 2001 +From: Richard Sandiford <richard.sandiford@arm.com> +Date: Sat, 2 Dec 2023 13:49:55 +0000 +Subject: PATCH 137/157 BackportSME attribs: Namespace-aware + lookup_attribute_spec + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=df4643f90c45db2501c731d4fded60dc1426b484 + +attribute_ignored_p already used a namespace-aware query +to find the attribute_spec for an existing attribute: + + const attribute_spec *as = lookup_attribute_spec (TREE_PURPOSE (attr)); + +This patch does the same for other callers in the file. + +gcc/ + * attribs.cc (comp_type_attributes): Pass the full TREE_PURPOSE + to lookup_attribute_spec, rather than just the name. + (remove_attributes_matching): Likewise. +--- + gcc/attribs.cc | 7 +++---- + 1 file changed, 3 insertions(+), 4 deletions(-) + +diff --git a/gcc/attribs.cc b/gcc/attribs.cc +index 8e2696bc5..1dbc30a95 100644 +--- a/gcc/attribs.cc ++++ b/gcc/attribs.cc +@@ -1417,7 +1417,7 @@ comp_type_attributes (const_tree type1, const_tree type2) + const struct attribute_spec *as; + const_tree attr; + +- as = lookup_attribute_spec (get_attribute_name (a)); ++ as = lookup_attribute_spec (TREE_PURPOSE (a)); + if (!as || as->affects_type_identity == false) + continue; + +@@ -1431,7 +1431,7 @@ comp_type_attributes (const_tree type1, const_tree type2) + { + const struct attribute_spec *as; + +- as = lookup_attribute_spec (get_attribute_name (a)); ++ as = lookup_attribute_spec (TREE_PURPOSE (a)); + if (!as || as->affects_type_identity == false) + continue; + +@@ -1473,8 +1473,7 @@ remove_attributes_matching (tree attrs, Predicate predicate) + const_tree start = attrs; + for (const_tree attr = attrs; attr; attr = TREE_CHAIN (attr)) + { +- tree name = get_attribute_name (attr); +- const attribute_spec *as = lookup_attribute_spec (name); ++ const attribute_spec *as = lookup_attribute_spec (TREE_PURPOSE (attr)); + const_tree end; + if (!predicate (attr, as)) + end = attr; +-- +2.33.0 +
View file
_service:tar_scm:0237-Backport-SME-c-family-ICE-with-gnu-nocf_check-PR1069.patch
Added
@@ -0,0 +1,281 @@ +From 6f42edc5035b7f7e96730dca19757b148e1be70c Mon Sep 17 00:00:00 2001 +From: Marek Polacek <polacek@redhat.com> +Date: Thu, 29 Sep 2022 17:49:32 -0400 +Subject: PATCH 138/157 BackportSME c-family: ICE with + gnu::nocf_check PR106937 + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=67efffec943656a509e036cd3c785a5c3d6885e1 + +When getting the name of an attribute, we ought to use +get_attribute_name, which handles both and __attribute__(()) +forms. Failure to do so may result in an ICE, like here. + +pp_c_attributes_display wasn't able to print the form of +attributes, so this patch teaches it to. + +When printing a pointer to function with a standard attribute, the attribute +should be printed after the parameter-list. With this patch we print: + + aka 'void (*)(int) gnu::nocf_check' + +or, in C++ with noexcept: + + aka 'void (*)(int) noexcept gnu::nocf_check' + +pp_c_attributes has been unused since its introduction in r56273 so +this patch removes it. + + PR c++/106937 + +gcc/c-family/ChangeLog: + + * c-pretty-print.cc (pp_c_specifier_qualifier_list): Print only GNU + attributes here. + (c_pretty_printer::direct_abstract_declarator): Print the standard + attributes here. + (pp_c_attributes): Remove. + (pp_c_attributes_display): Print the form if appropriate. Use + get_attribute_name. Don't print a trailing space when printing the + form. + * c-pretty-print.h (pp_c_attributes): Remove. + +gcc/cp/ChangeLog: + + * error.cc: Include "attribs.h". + (dump_type_prefix): Print only GNU attributes here. + (dump_type_suffix): Print standard attributes here. + +gcc/testsuite/ChangeLog: + + * c-c++-common/pointer-to-fn1.c: New test. +--- + gcc/c-family/c-pretty-print.cc | 96 ++++++++++++--------- + gcc/c-family/c-pretty-print.h | 1 - + gcc/cp/error.cc | 16 +++- + gcc/testsuite/c-c++-common/pointer-to-fn1.c | 18 ++++ + 4 files changed, 86 insertions(+), 45 deletions(-) + create mode 100644 gcc/testsuite/c-c++-common/pointer-to-fn1.c + +diff --git a/gcc/c-family/c-pretty-print.cc b/gcc/c-family/c-pretty-print.cc +index 71a0cb510..4d60627b3 100644 +--- a/gcc/c-family/c-pretty-print.cc ++++ b/gcc/c-family/c-pretty-print.cc +@@ -462,7 +462,12 @@ pp_c_specifier_qualifier_list (c_pretty_printer *pp, tree t) + { + pp_c_whitespace (pp); + pp_c_left_paren (pp); +- pp_c_attributes_display (pp, TYPE_ATTRIBUTES (pointee)); ++ /* If we're dealing with the GNU form of attributes, print this: ++ void (__attribute__((noreturn)) *f) (); ++ If it is the standard attribute, we'll print the attribute ++ in c_pretty_printer::direct_abstract_declarator/FUNCTION_TYPE. */ ++ if (!cxx11_attribute_p (TYPE_ATTRIBUTES (pointee))) ++ pp_c_attributes_display (pp, TYPE_ATTRIBUTES (pointee)); + } + else if (!c_dialect_cxx ()) + pp_c_whitespace (pp); +@@ -591,6 +596,13 @@ c_pretty_printer::direct_abstract_declarator (tree t) + case FUNCTION_TYPE: + pp_c_parameter_type_list (this, t); + direct_abstract_declarator (TREE_TYPE (t)); ++ /* If this is the standard attribute, print ++ void (*)() noreturn; */ ++ if (cxx11_attribute_p (TYPE_ATTRIBUTES (t))) ++ { ++ pp_space (this); ++ pp_c_attributes_display (this, TYPE_ATTRIBUTES (t)); ++ } + break; + + case ARRAY_TYPE: +@@ -845,32 +857,7 @@ c_pretty_printer::declaration (tree t) + pp_c_init_declarator (this, t); + } + +-/* Pretty-print ATTRIBUTES using GNU C extension syntax. */ +- +-void +-pp_c_attributes (c_pretty_printer *pp, tree attributes) +-{ +- if (attributes == NULL_TREE) +- return; +- +- pp_c_ws_string (pp, "__attribute__"); +- pp_c_left_paren (pp); +- pp_c_left_paren (pp); +- for (; attributes != NULL_TREE; attributes = TREE_CHAIN (attributes)) +- { +- pp_tree_identifier (pp, TREE_PURPOSE (attributes)); +- if (TREE_VALUE (attributes)) +- pp_c_call_argument_list (pp, TREE_VALUE (attributes)); +- +- if (TREE_CHAIN (attributes)) +- pp_separate_with (pp, ','); +- } +- pp_c_right_paren (pp); +- pp_c_right_paren (pp); +-} +- +-/* Pretty-print ATTRIBUTES using GNU C extension syntax for attributes +- marked to be displayed on disgnostic. */ ++/* Pretty-print ATTRIBUTES marked to be displayed on diagnostic. */ + + void + pp_c_attributes_display (c_pretty_printer *pp, tree a) +@@ -880,10 +867,12 @@ pp_c_attributes_display (c_pretty_printer *pp, tree a) + if (a == NULL_TREE) + return; + ++ const bool std_p = cxx11_attribute_p (a); ++ + for (; a != NULL_TREE; a = TREE_CHAIN (a)) + { +- const struct attribute_spec *as; +- as = lookup_attribute_spec (TREE_PURPOSE (a)); ++ const struct attribute_spec *as ++ = lookup_attribute_spec (get_attribute_name (a)); + if (!as || as->affects_type_identity == false) + continue; + if (c_dialect_cxx () +@@ -891,26 +880,47 @@ pp_c_attributes_display (c_pretty_printer *pp, tree a) + /* In C++ transaction_safe is printed at the end of the declarator. */ + continue; + if (is_first) +- { +- pp_c_ws_string (pp, "__attribute__"); +- pp_c_left_paren (pp); +- pp_c_left_paren (pp); +- is_first = false; +- } ++ { ++ if (std_p) ++ { ++ pp_c_left_bracket (pp); ++ pp_c_left_bracket (pp); ++ } ++ else ++ { ++ pp_c_ws_string (pp, "__attribute__"); ++ pp_c_left_paren (pp); ++ pp_c_left_paren (pp); ++ } ++ is_first = false; ++ } + else +- { +- pp_separate_with (pp, ','); +- } +- pp_tree_identifier (pp, TREE_PURPOSE (a)); ++ pp_separate_with (pp, ','); ++ tree ns; ++ if (std_p && (ns = get_attribute_namespace (a))) ++ { ++ pp_tree_identifier (pp, ns); ++ pp_colon (pp); ++ pp_colon (pp); ++ } ++ pp_tree_identifier (pp, get_attribute_name (a)); + if (TREE_VALUE (a)) +- pp_c_call_argument_list (pp, TREE_VALUE (a)); ++ pp_c_call_argument_list (pp, TREE_VALUE (a)); + } + + if (!is_first) + { +- pp_c_right_paren (pp); +- pp_c_right_paren (pp); +- pp_c_whitespace (pp); ++ if (std_p) ++ { ++ pp_c_right_bracket (pp); ++ pp_c_right_bracket (pp); ++ } ++ else ++ { ++ pp_c_right_paren (pp); ++ pp_c_right_paren (pp); ++ pp_c_whitespace (pp); ++ } + } + } + +diff --git a/gcc/c-family/c-pretty-print.h b/gcc/c-family/c-pretty-print.h +index ba7624dab..6a9090919 100644 +--- a/gcc/c-family/c-pretty-print.h ++++ b/gcc/c-family/c-pretty-print.h +@@ -119,7 +119,6 @@ void pp_c_space_for_pointer_operator (c_pretty_printer *, tree); + /* Declarations. */ + void pp_c_tree_decl_identifier (c_pretty_printer *, tree); + void pp_c_function_definition (c_pretty_printer *, tree); +-void pp_c_attributes (c_pretty_printer *, tree); + void pp_c_attributes_display (c_pretty_printer *, tree); + void pp_c_cv_qualifiers (c_pretty_printer *pp, int qualifiers, bool func_type); + void pp_c_type_qualifier_list (c_pretty_printer *, tree); +diff --git a/gcc/cp/error.cc b/gcc/cp/error.cc +index 1e944ca3f..eeb12a7b4 100644 +--- a/gcc/cp/error.cc ++++ b/gcc/cp/error.cc +@@ -35,6 +35,7 @@ along with GCC; see the file COPYING3. If not see + #include "internal-fn.h" + #include "gcc-rich-location.h" + #include "cp-name-hint.h" ++#include "attribs.h" + + #define pp_separate_with_comma(PP) pp_cxx_separate_with (PP, ',') + #define pp_separate_with_semicolon(PP) pp_cxx_separate_with (PP, ';') +@@ -896,7 +897,12 @@ dump_type_prefix (cxx_pretty_printer *pp, tree t, int flags) + { + pp_cxx_whitespace (pp); + pp_cxx_left_paren (pp); +- pp_c_attributes_display (pp, TYPE_ATTRIBUTES (sub)); ++ /* If we're dealing with the GNU form of attributes, print this: ++ void (__attribute__((noreturn)) *f) (); ++ If it is the standard attribute, we'll print the attribute ++ in dump_type_suffix. */ ++ if (!cxx11_attribute_p (TYPE_ATTRIBUTES (sub))) ++ pp_c_attributes_display (pp, TYPE_ATTRIBUTES (sub)); + } + if (TYPE_PTR_P (t)) + pp_star (pp); +@@ -1029,6 +1035,14 @@ dump_type_suffix (cxx_pretty_printer *pp, tree t, int flags) + if (tx_safe_fn_type_p (t)) + pp_cxx_ws_string (pp, "transaction_safe"); + dump_exception_spec (pp, TYPE_RAISES_EXCEPTIONS (t), flags); ++ /* If this is the standard attribute, print ++ void (*)() noreturn; */ ++ if (cxx11_attribute_p (TYPE_ATTRIBUTES (t))) ++ { ++ pp_space (pp); ++ pp_c_attributes_display (pp, TYPE_ATTRIBUTES (t)); ++ pp->padding = pp_before; ++ } + dump_type_suffix (pp, TREE_TYPE (t), flags); + break; + } +diff --git a/gcc/testsuite/c-c++-common/pointer-to-fn1.c b/gcc/testsuite/c-c++-common/pointer-to-fn1.c +new file mode 100644 +index 000000000..975885462 +--- /dev/null ++++ b/gcc/testsuite/c-c++-common/pointer-to-fn1.c +@@ -0,0 +1,18 @@ ++/* PR c++/106937 */ ++/* { dg-options "-fcf-protection" } */ ++/* { dg-additional-options "-std=c++11 -fpermissive" { target c++ } } */ ++/* Test printing a pointer to function with attribute. */ ++ ++__attribute__((nocf_check)) typedef void (*FPA1)(); ++gnu::nocf_check typedef void (*FPA2)(int); ++typedef void (*FP1)(); ++typedef void (*FP2)(int); ++ ++void ++g (FP1 f1, FP2 f2) ++{ ++ FPA1 p1 = f1; // { dg-warning {aka 'void \(__attribute__\(\(nocf_check\)\) \*\)\(\)'} } ++ FPA2 p2 = f2; // { dg-warning {aka 'void \(\*\)\(int\) \\gnu::nocf_check\\'} } ++ FP1 p3 = p1; // { dg-warning {aka 'void \(__attribute__\(\(nocf_check\)\) \*\)\(\)'} } ++ FP2 p4 = p2; // { dg-warning {aka 'void \(\*\)\(int\) \\gnu::nocf_check\\'} } ++} +-- +2.33.0 +
View file
_service:tar_scm:0238-Backport-SME-AArch64-Fix-assert-in-aarch64_move_imm-.patch
Added
@@ -0,0 +1,35 @@ +From d13efe98cafa04aeb24f8e0f695e648887986228 Mon Sep 17 00:00:00 2001 +From: Wilco Dijkstra <wilco.dijkstra@arm.com> +Date: Wed, 7 Dec 2022 14:16:24 +0000 +Subject: PATCH 139/157 BackportSME AArch64: Fix assert in + aarch64_move_imm PR108006 + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=952c8a1dc6235dc49ab207a7f18f63d2bc97fbc9 + +Ensure we only pass SI/DImode which fixes the assert. + +gcc/ + PR target/108006 + * config/aarch64/aarch64.cc (aarch64_expand_sve_const_vector): + Fix call to aarch64_move_imm to use SI/DI. +--- + gcc/config/aarch64/aarch64.cc | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc +index 0117a3e12..309ecc3d9 100644 +--- a/gcc/config/aarch64/aarch64.cc ++++ b/gcc/config/aarch64/aarch64.cc +@@ -7925,7 +7925,8 @@ aarch64_expand_sve_const_vector (rtx target, rtx src) + /* If the integer can be moved into a general register by a + single instruction, do that and duplicate the result. */ + if (CONST_INT_P (elt_value) +- && aarch64_move_imm (INTVAL (elt_value), elt_mode)) ++ && aarch64_move_imm (INTVAL (elt_value), ++ encoded_bits <= 32 ? SImode : DImode)) + { + elt_value = force_reg (elt_mode, elt_value); + return expand_vector_broadcast (mode, elt_value); +-- +2.33.0 +
View file
_service:tar_scm:0239-Backport-SME-testsuite-Only-run-fcf-protection-test-.patch
Added
@@ -0,0 +1,37 @@ +From 071f26ce18db5a09cbae0607b065028a09a856ac Mon Sep 17 00:00:00 2001 +From: Marek Polacek <polacek@redhat.com> +Date: Tue, 11 Oct 2022 12:51:40 -0400 +Subject: PATCH 140/157 BackportSME testsuite: Only run -fcf-protection + test on i?86/x86_64 PR107213 + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=cc694f45087c892e69ebbb177203c708f00b1bc7 + +This test fails on non-i?86/x86_64 targets because on those targets +we get + + error: '-fcf-protection=full' is not supported for this target + +so this patch limits where the test is run. + + PR testsuite/107213 + +gcc/testsuite/ChangeLog: + + * c-c++-common/pointer-to-fn1.c: Only run on i?86/x86_64. +--- + gcc/testsuite/c-c++-common/pointer-to-fn1.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/gcc/testsuite/c-c++-common/pointer-to-fn1.c b/gcc/testsuite/c-c++-common/pointer-to-fn1.c +index 975885462..e2f948d82 100644 +--- a/gcc/testsuite/c-c++-common/pointer-to-fn1.c ++++ b/gcc/testsuite/c-c++-common/pointer-to-fn1.c +@@ -1,4 +1,5 @@ + /* PR c++/106937 */ ++/* { dg-do compile { target i?86-*-* x86_64-*-* } } */ + /* { dg-options "-fcf-protection" } */ + /* { dg-additional-options "-std=c++11 -fpermissive" { target c++ } } */ + /* Test printing a pointer to function with attribute. */ +-- +2.33.0 +
View file
_service:tar_scm:0240-Backport-SME-Fix-PRs-106764-106765-and-107307-all-IC.patch
Added
@@ -0,0 +1,113 @@ +From 202ebc25e509ae0a2ac7d05c822cf6a8a817e49a Mon Sep 17 00:00:00 2001 +From: Andrew Pinski <apinski@marvell.com> +Date: Thu, 17 Nov 2022 22:08:07 +0000 +Subject: PATCH 141/157 BackportSME Fix PRs 106764, 106765, and 107307, + all ICE after invalid re-declaration + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=bd0c9d9e706adaeea0d96152daade0a6819a8715 + +The problem here is the gimplifier returns GS_ERROR but +in some cases we don't check that soon enough and try +to do other work which could crash. +So the fix in these two cases is to return GS_ERROR +early if the gimplify_* functions had return GS_ERROR. + +OK? Bootstrapped and tested on x86_64-linux-gnu with no regressions. + +Thanks, +Andrew Pinski + +gcc/ChangeLog: + + PR c/106764 + PR c/106765 + PR c/107307 + * gimplify.cc (gimplify_compound_lval): Return GS_ERROR + if gimplify_expr had return GS_ERROR. + (gimplify_call_expr): Likewise. + +gcc/testsuite/ChangeLog: + + PR c/106764 + PR c/106765 + PR c/107307 + * gcc.dg/redecl-19.c: New test. + * gcc.dg/redecl-20.c: New test. + * gcc.dg/redecl-21.c: New test. +--- + gcc/gimplify.cc | 5 +++++ + gcc/testsuite/gcc.dg/redecl-19.c | 5 +++++ + gcc/testsuite/gcc.dg/redecl-20.c | 9 +++++++++ + gcc/testsuite/gcc.dg/redecl-21.c | 9 +++++++++ + 4 files changed, 28 insertions(+) + create mode 100644 gcc/testsuite/gcc.dg/redecl-19.c + create mode 100644 gcc/testsuite/gcc.dg/redecl-20.c + create mode 100644 gcc/testsuite/gcc.dg/redecl-21.c + +diff --git a/gcc/gimplify.cc b/gcc/gimplify.cc +index 91500e2fb..e9f527850 100644 +--- a/gcc/gimplify.cc ++++ b/gcc/gimplify.cc +@@ -3272,6 +3272,8 @@ gimplify_compound_lval (tree *expr_p, gimple_seq *pre_p, gimple_seq *post_p, + tret = gimplify_expr (p, pre_p, post_p, is_gimple_min_lval, + fallback | fb_lvalue); + ret = MIN (ret, tret); ++ if (ret == GS_ERROR) ++ return GS_ERROR; + + /* Step 2a: if we have component references we do not support on + registers then make sure the base isn't a register. Of course +@@ -3664,6 +3666,9 @@ gimplify_call_expr (tree *expr_p, gimple_seq *pre_p, bool want_value) + ret = gimplify_expr (&CALL_EXPR_FN (*expr_p), pre_p, NULL, + is_gimple_call_addr, fb_rvalue); + ++ if (ret == GS_ERROR) ++ return GS_ERROR; ++ + nargs = call_expr_nargs (*expr_p); + + /* Get argument types for verification. */ +diff --git a/gcc/testsuite/gcc.dg/redecl-19.c b/gcc/testsuite/gcc.dg/redecl-19.c +new file mode 100644 +index 000000000..cc1068544 +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/redecl-19.c +@@ -0,0 +1,5 @@ ++/* We used to ICE in the gimplifier, PR 106764 */ ++/* { dg-do compile } */ ++/* { dg-options "-w" } */ ++(*a)(); // { dg-note "" } ++b(){a()} a; // { dg-error "" } +diff --git a/gcc/testsuite/gcc.dg/redecl-20.c b/gcc/testsuite/gcc.dg/redecl-20.c +new file mode 100644 +index 000000000..07f52115e +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/redecl-20.c +@@ -0,0 +1,9 @@ ++/* We used to ICE in the gimplifier, PR 107307 */ ++// { dg-do compile } ++// { dg-options "-w" } ++void f () ++{ ++ const struct { int a1; } b; // { dg-note "" } ++ int *c = b.a; ++ int *b; // { dg-error "" } ++} +diff --git a/gcc/testsuite/gcc.dg/redecl-21.c b/gcc/testsuite/gcc.dg/redecl-21.c +new file mode 100644 +index 000000000..2f2a6548a +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/redecl-21.c +@@ -0,0 +1,9 @@ ++/* We used to ICE in the gimplifier, PR 106765 */ ++/* { dg-do compile } */ ++/* { dg-options "-w" } */ ++struct a { ++ int b ++} c() { ++ struct a a; // { dg-note "" } ++ a.b; ++ d a; // { dg-error "" } +-- +2.33.0 +
View file
_service:tar_scm:0241-Backport-SME-aarch64-Remove-expected-error-for-compo.patch
Added
@@ -0,0 +1,43 @@ +From bc42a8bdab7b2ffeb81441c7c8a9a1215d8502ee Mon Sep 17 00:00:00 2001 +From: Richard Sandiford <richard.sandiford@arm.com> +Date: Thu, 26 Jan 2023 15:51:00 +0000 +Subject: PATCH 142/157 BackportSME aarch64: Remove expected error for + compound literals + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=96fbe541481fcc7d1a8884fb8dbefd7979eb9543 + +GCC no longer treats empty compound literals as an error +(see 14cfa01755a66afbae2539f8b5796c960ddcecc6). + +gcc/testsuite/ + * gcc.target/aarch64/bfloat16_scalar_typecheck.c: Accept empty + compound literals. +--- + gcc/testsuite/gcc.target/aarch64/bfloat16_scalar_typecheck.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/gcc/testsuite/gcc.target/aarch64/bfloat16_scalar_typecheck.c b/gcc/testsuite/gcc.target/aarch64/bfloat16_scalar_typecheck.c +index 7c9188cf2..f4ae68028 100644 +--- a/gcc/testsuite/gcc.target/aarch64/bfloat16_scalar_typecheck.c ++++ b/gcc/testsuite/gcc.target/aarch64/bfloat16_scalar_typecheck.c +@@ -40,7 +40,7 @@ bfloat16_t footest (bfloat16_t scalar0) + short initi_1_4 = glob_bfloat; /* { dg-error {invalid conversion from type 'bfloat16_t'} } */ + double initi_1_5 = glob_bfloat; /* { dg-error {invalid conversion from type 'bfloat16_t'} } */ + +- bfloat16_t scalar2_1 = {}; /* { dg-error {empty scalar initializer} } */ ++ bfloat16_t scalar2_1 = {}; + bfloat16_t scalar2_2 = { glob_bfloat }; + bfloat16_t scalar2_3 = { 0 }; /* { dg-error {invalid conversion to type 'bfloat16_t'} } */ + bfloat16_t scalar2_4 = { 0.1 }; /* { dg-error {invalid conversion to type 'bfloat16_t'} } */ +@@ -92,7 +92,7 @@ bfloat16_t footest (bfloat16_t scalar0) + + /* Compound literals. */ + +- (bfloat16_t) {}; /* { dg-error {empty scalar initializer} } */ ++ (bfloat16_t) {}; + (bfloat16_t) { glob_bfloat }; + (bfloat16_t) { 0 }; /* { dg-error {invalid conversion to type 'bfloat16_t'} } */ + (bfloat16_t) { 0.1 }; /* { dg-error {invalid conversion to type 'bfloat16_t'} } */ +-- +2.33.0 +
View file
_service:tar_scm:0242-Backport-SME-aarch64-Remove-redundant-builtins-code.patch
Added
@@ -0,0 +1,264 @@ +From 42bfa9a26205da222cebbe830168b6f0b5e668b4 Mon Sep 17 00:00:00 2001 +From: Richard Sandiford <richard.sandiford@arm.com> +Date: Tue, 12 Jul 2022 12:59:25 +0100 +Subject: PATCH 143/157 BackportSME aarch64: Remove redundant builtins + code + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=e80daf04c8888f527d2fc7f6cbcd1b4c853dcd04 + +aarch64_builtin_vectorized_function handles some built-in functions +that already have equivalent internal functions. This seems to be +redundant now, since the target builtins that it chooses are mapped +to the same optab patterns as the internal functions. + +gcc/ + * config/aarch64/aarch64-builtins.cc + (aarch64_builtin_vectorized_function): Remove handling of + floor, ceil, trunc, round, nearbyint, sqrt, clz and ctz. + +gcc/testsuite/ + * gcc.target/aarch64/vect_unary_1.c: New test. +--- + gcc/config/aarch64/aarch64-builtins.cc | 32 --- + .../gcc.target/aarch64/vect_unary_1.c | 186 ++++++++++++++++++ + 2 files changed, 186 insertions(+), 32 deletions(-) + create mode 100644 gcc/testsuite/gcc.target/aarch64/vect_unary_1.c + +diff --git a/gcc/config/aarch64/aarch64-builtins.cc b/gcc/config/aarch64/aarch64-builtins.cc +index 37bb3af48..23a84cd53 100644 +--- a/gcc/config/aarch64/aarch64-builtins.cc ++++ b/gcc/config/aarch64/aarch64-builtins.cc +@@ -2653,38 +2653,6 @@ aarch64_builtin_vectorized_function (unsigned int fn, tree type_out, + switch (fn) + { + #undef AARCH64_CHECK_BUILTIN_MODE +-#define AARCH64_CHECK_BUILTIN_MODE(C, N) \ +- (out_mode == V##C##N##Fmode && in_mode == V##C##N##Fmode) +- CASE_CFN_FLOOR: +- return AARCH64_FIND_FRINT_VARIANT (floor); +- CASE_CFN_CEIL: +- return AARCH64_FIND_FRINT_VARIANT (ceil); +- CASE_CFN_TRUNC: +- return AARCH64_FIND_FRINT_VARIANT (btrunc); +- CASE_CFN_ROUND: +- return AARCH64_FIND_FRINT_VARIANT (round); +- CASE_CFN_NEARBYINT: +- return AARCH64_FIND_FRINT_VARIANT (nearbyint); +- CASE_CFN_SQRT: +- return AARCH64_FIND_FRINT_VARIANT (sqrt); +-#undef AARCH64_CHECK_BUILTIN_MODE +-#define AARCH64_CHECK_BUILTIN_MODE(C, N) \ +- (out_mode == V##C##SImode && in_mode == V##C##N##Imode) +- CASE_CFN_CLZ: +- { +- if (AARCH64_CHECK_BUILTIN_MODE (4, S)) +- return aarch64_builtin_declsAARCH64_SIMD_BUILTIN_UNOP_clzv4si; +- return NULL_TREE; +- } +- CASE_CFN_CTZ: +- { +- if (AARCH64_CHECK_BUILTIN_MODE (2, S)) +- return aarch64_builtin_declsAARCH64_SIMD_BUILTIN_UNOP_ctzv2si; +- else if (AARCH64_CHECK_BUILTIN_MODE (4, S)) +- return aarch64_builtin_declsAARCH64_SIMD_BUILTIN_UNOP_ctzv4si; +- return NULL_TREE; +- } +-#undef AARCH64_CHECK_BUILTIN_MODE + #define AARCH64_CHECK_BUILTIN_MODE(C, N) \ + (out_mode == V##C##N##Imode && in_mode == V##C##N##Fmode) + CASE_CFN_IFLOOR: +diff --git a/gcc/testsuite/gcc.target/aarch64/vect_unary_1.c b/gcc/testsuite/gcc.target/aarch64/vect_unary_1.c +new file mode 100644 +index 000000000..8516808be +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/vect_unary_1.c +@@ -0,0 +1,186 @@ ++/* { dg-options "-O3 --save-temps" } */ ++/* { dg-final { check-function-bodies "**" "" "" } } */ ++ ++#include <stdint.h> ++ ++#define TEST2(OUT, NAME, IN) \ ++OUT __attribute__((vector_size(sizeof(OUT) * 2))) \ ++test2_##OUT##_##NAME##_##IN (float dummy, \ ++ IN __attribute__((vector_size(sizeof(IN) * 2))) y) \ ++{ \ ++ OUT __attribute__((vector_size(sizeof(OUT) * 2))) x; \ ++ x0 = __builtin_##NAME (y0); \ ++ x1 = __builtin_##NAME (y1); \ ++ return x; \ ++} \ ++ ++#define TEST4(OUT, NAME, IN) \ ++OUT __attribute__((vector_size(16))) \ ++test4_##OUT##_##NAME##_##IN (float dummy, \ ++ IN __attribute__((vector_size(16))) y) \ ++{ \ ++ OUT __attribute__((vector_size(16))) x; \ ++ x0 = __builtin_##NAME (y0); \ ++ x1 = __builtin_##NAME (y1); \ ++ x2 = __builtin_##NAME (y2); \ ++ x3 = __builtin_##NAME (y3); \ ++ return x; \ ++} \ ++ ++/* ++** test2_float_truncf_float: ++** frintz v0.2s, v1.2s ++** ret ++*/ ++TEST2 (float, truncf, float) ++ ++/* ++** test2_double_trunc_double: ++** frintz v0.2d, v1.2d ++** ret ++*/ ++TEST2 (double, trunc, double) ++ ++/* ++** test4_float_truncf_float: ++** frintz v0.4s, v1.4s ++** ret ++*/ ++TEST4 (float, truncf, float) ++ ++/* ++** test2_float_roundf_float: ++** frinta v0.2s, v1.2s ++** ret ++*/ ++TEST2 (float, roundf, float) ++ ++/* ++** test2_double_round_double: ++** frinta v0.2d, v1.2d ++** ret ++*/ ++TEST2 (double, round, double) ++ ++/* ++** test4_float_roundf_float: ++** frinta v0.4s, v1.4s ++** ret ++*/ ++TEST4 (float, roundf, float) ++ ++/* ++** test2_float_nearbyintf_float: ++** frinti v0.2s, v1.2s ++** ret ++*/ ++TEST2 (float, nearbyintf, float) ++ ++/* ++** test2_double_nearbyint_double: ++** frinti v0.2d, v1.2d ++** ret ++*/ ++TEST2 (double, nearbyint, double) ++ ++/* ++** test4_float_nearbyintf_float: ++** frinti v0.4s, v1.4s ++** ret ++*/ ++TEST4 (float, nearbyintf, float) ++ ++/* ++** test2_float_floorf_float: ++** frintm v0.2s, v1.2s ++** ret ++*/ ++TEST2 (float, floorf, float) ++ ++/* ++** test2_double_floor_double: ++** frintm v0.2d, v1.2d ++** ret ++*/ ++TEST2 (double, floor, double) ++ ++/* ++** test4_float_floorf_float: ++** frintm v0.4s, v1.4s ++** ret ++*/ ++TEST4 (float, floorf, float) ++ ++/* ++** test2_float_ceilf_float: ++** frintp v0.2s, v1.2s ++** ret ++*/ ++TEST2 (float, ceilf, float) ++ ++/* ++** test2_double_ceil_double: ++** frintp v0.2d, v1.2d ++** ret ++*/ ++TEST2 (double, ceil, double) ++ ++/* ++** test4_float_ceilf_float: ++** frintp v0.4s, v1.4s ++** ret ++*/ ++TEST4 (float, ceilf, float) ++ ++/* ++** test2_float_rintf_float: ++** frintx v0.2s, v1.2s ++** ret ++*/ ++TEST2 (float, rintf, float) ++ ++/* ++** test2_double_rint_double: ++** frintx v0.2d, v1.2d ++** ret ++*/ ++TEST2 (double, rint, double) ++ ++/* ++** test4_float_rintf_float: ++** frintx v0.4s, v1.4s ++** ret ++*/ ++TEST4 (float, rintf, float) ++ ++/* ++** test2_int_clz_int: ++** clz v0.2s, v1.2s ++** ret ++*/ ++TEST2 (int, clz, int) ++ ++/* ++** test4_int_clz_int: ++** clz v0.4s, v1.4s ++** ret ++*/ ++TEST4 (int, clz, int) ++ ++/* ++** test2_int_ctz_int: ++** rev32 (v0-9+).8b, v1.8b ++** rbit (v0-9+).8b, \1.8b ++** clz v0.2s, \2.2s ++** ret ++*/ ++TEST2 (int, ctz, int) ++ ++/* ++** test4_int_ctz_int: ++** rev32 (v0-9+).16b, v1.16b ++** rbit (v0-9+).16b, \1.16b ++** clz v0.4s, \2.4s ++** ret ++*/ ++TEST4 (int, ctz, int) +-- +2.33.0 +
View file
_service:tar_scm:0243-Backport-SME-AArch64-Fix-Armv9-a-warnings-that-get-e.patch
Added
@@ -0,0 +1,63 @@ +From a1ba437195286af3389ba9f2d43b8cb6c73ba3d8 Mon Sep 17 00:00:00 2001 +From: Tamar Christina <tamar.christina@arm.com> +Date: Thu, 12 Oct 2023 15:55:58 +0100 +Subject: PATCH 144/157 BackportSME AArch64: Fix Armv9-a warnings that + get emitted whenever a ACLE header is used. + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=de593b3cffdc9c030c5e697ad9092b1b085dabc4 + +At the moment, trying to use -march=armv9-a with any ACLE header such as +arm_neon.h results in rows and rows of warnings saying: + +<built-in>: warning: "__ARM_ARCH" redefined +<built-in>: note: this is the location of the previous definition + +This is obviously not useful and happens because the header was defined at +__ARM_ARCH == 8 and the commandline changes it. + +The Arm port solves this by undef the macro during argument processing and we do +the same on AArch64 for the majority of macros. However we define this macro +using a different helper which requires the manual undef. + +Thanks, +Tamar + +gcc/ChangeLog: + + * config/aarch64/aarch64-c.cc (aarch64_update_cpp_builtins): Add undef. + +gcc/testsuite/ChangeLog: + + * gcc.target/aarch64/armv9_warning.c: New test. +--- + gcc/config/aarch64/aarch64-c.cc | 1 + + gcc/testsuite/gcc.target/aarch64/armv9_warning.c | 5 +++++ + 2 files changed, 6 insertions(+) + create mode 100644 gcc/testsuite/gcc.target/aarch64/armv9_warning.c + +diff --git a/gcc/config/aarch64/aarch64-c.cc b/gcc/config/aarch64/aarch64-c.cc +index 745719d8b..2d2ac42c4 100644 +--- a/gcc/config/aarch64/aarch64-c.cc ++++ b/gcc/config/aarch64/aarch64-c.cc +@@ -129,6 +129,7 @@ aarch64_update_cpp_builtins (cpp_reader *pfile) + { + aarch64_def_or_undef (flag_unsafe_math_optimizations, "__ARM_FP_FAST", pfile); + ++ cpp_undef (pfile, "__ARM_ARCH"); + builtin_define_with_int_value ("__ARM_ARCH", AARCH64_ISA_V9A ? 9 : 8); + + builtin_define_with_int_value ("__ARM_SIZEOF_MINIMAL_ENUM", +diff --git a/gcc/testsuite/gcc.target/aarch64/armv9_warning.c b/gcc/testsuite/gcc.target/aarch64/armv9_warning.c +new file mode 100644 +index 000000000..35690d5bc +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/armv9_warning.c +@@ -0,0 +1,5 @@ ++/* { dg-do compile } */ ++/* { dg-additional-options "-march=armv9-a -Wpedantic -Werror" } */ ++ ++#include <arm_neon.h> ++ +-- +2.33.0 +
View file
_service:tar_scm:0244-Backport-SME-Canonicalize-X-Y-as-X-Y-in-match.pd-whe.patch
Added
@@ -0,0 +1,184 @@ +From f6652dbebf81372884e9fd8b68627fc7a94d8d3b Mon Sep 17 00:00:00 2001 +From: Roger Sayle <roger@nextmovesoftware.com> +Date: Fri, 27 May 2022 08:57:46 +0100 +Subject: PATCH 145/157 BackportSME Canonicalize X&-Y as X*Y in match.pd + when Y is 0,1. + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=8fb94fc6097c0a934aac0d89c9c5e2038da67655 + +"For every pessimization, there's an equal and opposite optimization". + +In the review of my original patch for PR middle-end/98865, Richard +Biener pointed out that match.pd shouldn't be transforming X*Y into +X&-Y as the former is considered cheaper by tree-ssa's cost model +(operator count). A corollary of this is that we should instead be +transforming X&-Y into the cheaper X*Y as a preferred canonical form +(especially as RTL expansion now intelligently selects the appropriate +implementation based on the target's costs). + +With this patch we now generate identical code for: +int foo(int x, int y) { return -(x&1) & y; } +int bar(int x, int y) { return (x&1) * y; } + +specifically on x86_64-pc-linux-gnu both use and/neg/and with -O2, +but both use and/mul with -Os. + +One minor wrinkle/improvement is that this patch includes three +additional optimizations (that account for the change in canonical +form) to continue to optimize PR92834 and PR94786. + +2022-05-27 Roger Sayle <roger@nextmovesoftware.com> + +gcc/ChangeLog + * match.pd (match_zero_one_valued_p): New predicate. + (mult @0 @1): Use zero_one_valued_p for optimization to the + expression "bit_and @0 @1". + (bit_and (negate zero_one_valued_p@0) @1): Optimize to MULT_EXPR. + (plus @0 (mult (minus @1 @0) zero_one_valued_p@2)): New transform. + (minus @0 (mult (minus @0 @1) zero_one_valued_p@2)): Likewise. + (bit_xor @0 (mult (bit_xor @0 @1) zero_one_valued_p@2)): Likewise. + Remove three redundant transforms obsoleted by the three above. + +gcc/testsuite/ChangeLog + * gcc.dg/pr98865.c: New test case. +--- + gcc/match.pd | 86 ++++++++++++++++------------------ + gcc/testsuite/gcc.dg/pr98865.c | 14 ++++++ + 2 files changed, 55 insertions(+), 45 deletions(-) + create mode 100644 gcc/testsuite/gcc.dg/pr98865.c + +diff --git a/gcc/match.pd b/gcc/match.pd +index aee58e47b..6d3165bcd 100644 +--- a/gcc/match.pd ++++ b/gcc/match.pd +@@ -285,14 +285,6 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) + || !COMPLEX_FLOAT_TYPE_P (type))) + (negate @0))) + +-/* Transform { 0 or 1 } * { 0 or 1 } into { 0 or 1 } & { 0 or 1 } */ +-(simplify +- (mult SSA_NAME@1 SSA_NAME@2) +- (if (INTEGRAL_TYPE_P (type) +- && get_nonzero_bits (@1) == 1 +- && get_nonzero_bits (@2) == 1) +- (bit_and @1 @2))) +- + /* Transform x * { 0 or 1, 0 or 1, ... } into x & { 0 or -1, 0 or -1, ...}, + unless the target has native support for the former but not the latter. */ + (simplify +@@ -1790,6 +1782,27 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) + (bit_not (bit_not @0)) + @0) + ++(match zero_one_valued_p ++ @0 ++ (if (INTEGRAL_TYPE_P (type) && tree_nonzero_bits (@0) == 1))) ++(match zero_one_valued_p ++ truth_valued_p@0) ++ ++/* Transform { 0 or 1 } * { 0 or 1 } into { 0 or 1 } & { 0 or 1 }. */ ++(simplify ++ (mult zero_one_valued_p@0 zero_one_valued_p@1) ++ (if (INTEGRAL_TYPE_P (type)) ++ (bit_and @0 @1))) ++ ++/* Transform X & -Y into X * Y when Y is { 0 or 1 }. */ ++(simplify ++ (bit_and:c (convert? (negate zero_one_valued_p@0)) @1) ++ (if (INTEGRAL_TYPE_P (type) ++ && INTEGRAL_TYPE_P (TREE_TYPE (@0)) ++ && TREE_CODE (TREE_TYPE (@0)) != BOOLEAN_TYPE ++ && !TYPE_UNSIGNED (TREE_TYPE (@0))) ++ (mult (convert @0) @1))) ++ + /* Convert ~ (-A) to A - 1. */ + (simplify + (bit_not (convert? (negate @0))) +@@ -3281,44 +3294,27 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) + (cmp @0 (minmax:c @0 @1)) + { constant_boolean_node (cmp == GE_EXPR || cmp == LE_EXPR, type); } )) + +-/* Undo fancy way of writing max/min or other ?: expressions, +- like a - ((a - b) & -(a < b)), in this case into (a < b) ? b : a. ++/* Undo fancy ways of writing max/min or other ?: expressions, like ++ a - ((a - b) & -(a < b)) and a - (a - b) * (a < b) into (a < b) ? b : a. + People normally use ?: and that is what we actually try to optimize. */ +-(for cmp (simple_comparison) +- (simplify +- (minus @0 (bit_and:c (minus @0 @1) +- (convert? (negate@4 (convert? (cmp@5 @2 @3)))))) +- (if (INTEGRAL_TYPE_P (type) +- && INTEGRAL_TYPE_P (TREE_TYPE (@4)) +- && TREE_CODE (TREE_TYPE (@4)) != BOOLEAN_TYPE +- && INTEGRAL_TYPE_P (TREE_TYPE (@5)) +- && (TYPE_PRECISION (TREE_TYPE (@4)) >= TYPE_PRECISION (type) +- || !TYPE_UNSIGNED (TREE_TYPE (@4))) +- && (GIMPLE || !TREE_SIDE_EFFECTS (@1))) +- (cond (cmp @2 @3) @1 @0))) +- (simplify +- (plus:c @0 (bit_and:c (minus @1 @0) +- (convert? (negate@4 (convert? (cmp@5 @2 @3)))))) +- (if (INTEGRAL_TYPE_P (type) +- && INTEGRAL_TYPE_P (TREE_TYPE (@4)) +- && TREE_CODE (TREE_TYPE (@4)) != BOOLEAN_TYPE +- && INTEGRAL_TYPE_P (TREE_TYPE (@5)) +- && (TYPE_PRECISION (TREE_TYPE (@4)) >= TYPE_PRECISION (type) +- || !TYPE_UNSIGNED (TREE_TYPE (@4))) +- && (GIMPLE || !TREE_SIDE_EFFECTS (@1))) +- (cond (cmp @2 @3) @1 @0))) +- /* Similarly with ^ instead of - though in that case with :c. */ +- (simplify +- (bit_xor:c @0 (bit_and:c (bit_xor:c @0 @1) +- (convert? (negate@4 (convert? (cmp@5 @2 @3)))))) +- (if (INTEGRAL_TYPE_P (type) +- && INTEGRAL_TYPE_P (TREE_TYPE (@4)) +- && TREE_CODE (TREE_TYPE (@4)) != BOOLEAN_TYPE +- && INTEGRAL_TYPE_P (TREE_TYPE (@5)) +- && (TYPE_PRECISION (TREE_TYPE (@4)) >= TYPE_PRECISION (type) +- || !TYPE_UNSIGNED (TREE_TYPE (@4))) +- && (GIMPLE || !TREE_SIDE_EFFECTS (@1))) +- (cond (cmp @2 @3) @1 @0)))) ++/* Transform A + (B-A)*cmp into cmp ? B : A. */ ++(simplify ++ (plus:c @0 (mult:c (minus @1 @0) zero_one_valued_p@2)) ++ (if (INTEGRAL_TYPE_P (type) ++ && (GIMPLE || !TREE_SIDE_EFFECTS (@1))) ++ (cond (convert:boolean_type_node @2) @1 @0))) ++/* Transform A - (A-B)*cmp into cmp ? B : A. */ ++(simplify ++ (minus @0 (mult:c (minus @0 @1) zero_one_valued_p@2)) ++ (if (INTEGRAL_TYPE_P (type) ++ && (GIMPLE || !TREE_SIDE_EFFECTS (@1))) ++ (cond (convert:boolean_type_node @2) @1 @0))) ++/* Transform A ^ (A^B)*cmp into cmp ? B : A. */ ++(simplify ++ (bit_xor:c @0 (mult:c (bit_xor:c @0 @1) zero_one_valued_p@2)) ++ (if (INTEGRAL_TYPE_P (type) ++ && (GIMPLE || !TREE_SIDE_EFFECTS (@1))) ++ (cond (convert:boolean_type_node @2) @1 @0))) + + /* Simplifications of shift and rotates. */ + +diff --git a/gcc/testsuite/gcc.dg/pr98865.c b/gcc/testsuite/gcc.dg/pr98865.c +new file mode 100644 +index 000000000..95f727033 +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/pr98865.c +@@ -0,0 +1,14 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -fdump-tree-optimized" } */ ++ ++int foo(int x, int y) ++{ ++ return -(x&1) & y; ++} ++ ++int bar(int x, int y) ++{ ++ return (x&1) * y; ++} ++ ++/* { dg-final { scan-tree-dump-times " \\* " 2 "optimized" } } */ +-- +2.33.0 +
View file
_service:tar_scm:0245-Backport-SME-middle-end-Add-new-tbranch-optab-to-add.patch
Added
@@ -0,0 +1,417 @@ +From a8f10b4b73c2624599765edf7ff19d53eca15135 Mon Sep 17 00:00:00 2001 +From: Tamar Christina <tamar.christina@arm.com> +Date: Mon, 12 Dec 2022 15:16:50 +0000 +Subject: PATCH 146/157 BackportSME middle-end: Add new tbranch optab to + add support for bit-test-and-branch operations + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=dc582d2ef32e2d3723c68d111f4e49607631f34d + +This adds a new test-and-branch optab that can be used to do a conditional test +of a bit and branch. This is similar to the cbranch optab but instead can +test any arbitrary bit inside the register. + +This patch recognizes boolean comparisons and single bit mask tests. + +gcc/ChangeLog: + + * dojump.cc (do_jump): Pass along value. + (do_jump_by_parts_greater_rtx): Likewise. + (do_jump_by_parts_zero_rtx): Likewise. + (do_jump_by_parts_equality_rtx): Likewise. + (do_compare_rtx_and_jump): Likewise. + (do_compare_and_jump): Likewise. + * dojump.h (do_compare_rtx_and_jump): New. + * optabs.cc (emit_cmp_and_jump_insn_1): Refactor to take optab to check. + (validate_test_and_branch): New. + (emit_cmp_and_jump_insns): Optiobally take a value, and when value is + supplied then check if it's suitable for tbranch. + * optabs.def (tbranch_eq$a4, tbranch_ne$a4): New. + * doc/md.texi (tbranch_@var{op}@var{mode}4): Document it. + * optabs.h (emit_cmp_and_jump_insns): New. + * tree.h (tree_zero_one_valued_p): New. +--- + gcc/doc/md.texi | 7 +++ + gcc/dojump.cc | 52 +++++++++++++++------- + gcc/dojump.h | 4 ++ + gcc/optabs.cc | 114 ++++++++++++++++++++++++++++++++++++++++++++---- + gcc/optabs.def | 2 + + gcc/optabs.h | 4 ++ + gcc/tree.h | 1 + + 7 files changed, 159 insertions(+), 25 deletions(-) + +diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi +index c0cf0ec64..2193900e7 100644 +--- a/gcc/doc/md.texi ++++ b/gcc/doc/md.texi +@@ -7299,6 +7299,13 @@ case, you can and should make operand 1's predicate reject some operators + in the @samp{cstore@var{mode}4} pattern, or remove the pattern altogether + from the machine description. + ++@cindex @code{tbranch_@var{op}@var{mode}3} instruction pattern ++@item @samp{tbranch_@var{op}@var{mode}3} ++Conditional branch instruction combined with a bit test-and-compare ++instruction. Operand 0 is the operand of the comparison. Operand 1 is the bit ++position of Operand 1 to test. Operand 3 is the @code{code_label} to jump to. ++@var{op} is one of @var{eq} or @var{ne}. ++ + @cindex @code{cbranch@var{mode}4} instruction pattern + @item @samp{cbranch@var{mode}4} + Conditional branch instruction combined with a compare instruction. +diff --git a/gcc/dojump.cc b/gcc/dojump.cc +index 0c880d653..604b28537 100644 +--- a/gcc/dojump.cc ++++ b/gcc/dojump.cc +@@ -621,7 +621,7 @@ do_jump (tree exp, rtx_code_label *if_false_label, + } + do_compare_rtx_and_jump (temp, CONST0_RTX (GET_MODE (temp)), + NE, TYPE_UNSIGNED (TREE_TYPE (exp)), +- GET_MODE (temp), NULL_RTX, ++ exp, GET_MODE (temp), NULL_RTX, + if_false_label, if_true_label, prob); + } + +@@ -689,7 +689,7 @@ do_jump_by_parts_greater_rtx (scalar_int_mode mode, int unsignedp, rtx op0, + + /* All but high-order word must be compared as unsigned. */ + do_compare_rtx_and_jump (op0_word, op1_word, code, (unsignedp || i > 0), +- word_mode, NULL_RTX, NULL, if_true_label, ++ NULL, word_mode, NULL_RTX, NULL, if_true_label, + prob); + + /* Emit only one comparison for 0. Do not emit the last cond jump. */ +@@ -697,8 +697,8 @@ do_jump_by_parts_greater_rtx (scalar_int_mode mode, int unsignedp, rtx op0, + break; + + /* Consider lower words only if these are equal. */ +- do_compare_rtx_and_jump (op0_word, op1_word, NE, unsignedp, word_mode, +- NULL_RTX, NULL, if_false_label, ++ do_compare_rtx_and_jump (op0_word, op1_word, NE, unsignedp, NULL, ++ word_mode, NULL_RTX, NULL, if_false_label, + prob.invert ()); + } + +@@ -757,7 +757,7 @@ do_jump_by_parts_zero_rtx (scalar_int_mode mode, rtx op0, + + if (part != 0) + { +- do_compare_rtx_and_jump (part, const0_rtx, EQ, 1, word_mode, ++ do_compare_rtx_and_jump (part, const0_rtx, EQ, 1, NULL, word_mode, + NULL_RTX, if_false_label, if_true_label, prob); + return; + } +@@ -768,7 +768,7 @@ do_jump_by_parts_zero_rtx (scalar_int_mode mode, rtx op0, + + for (i = 0; i < nwords; i++) + do_compare_rtx_and_jump (operand_subword_force (op0, i, mode), +- const0_rtx, EQ, 1, word_mode, NULL_RTX, ++ const0_rtx, EQ, 1, NULL, word_mode, NULL_RTX, + if_false_label, NULL, prob); + + if (if_true_label) +@@ -811,8 +811,8 @@ do_jump_by_parts_equality_rtx (scalar_int_mode mode, rtx op0, rtx op1, + + for (i = 0; i < nwords; i++) + do_compare_rtx_and_jump (operand_subword_force (op0, i, mode), +- operand_subword_force (op1, i, mode), +- EQ, 0, word_mode, NULL_RTX, ++ operand_subword_force (op1, i, mode), ++ EQ, 0, NULL, word_mode, NULL_RTX, + if_false_label, NULL, prob); + + if (if_true_label) +@@ -964,6 +964,23 @@ do_compare_rtx_and_jump (rtx op0, rtx op1, enum rtx_code code, int unsignedp, + rtx_code_label *if_false_label, + rtx_code_label *if_true_label, + profile_probability prob) ++{ ++ do_compare_rtx_and_jump (op0, op1, code, unsignedp, NULL, mode, size, ++ if_false_label, if_true_label, prob); ++} ++ ++/* Like do_compare_and_jump but expects the values to compare as two rtx's. ++ The decision as to signed or unsigned comparison must be made by the caller. ++ ++ If MODE is BLKmode, SIZE is an RTX giving the size of the objects being ++ compared. */ ++ ++void ++do_compare_rtx_and_jump (rtx op0, rtx op1, enum rtx_code code, int unsignedp, ++ tree val, machine_mode mode, rtx size, ++ rtx_code_label *if_false_label, ++ rtx_code_label *if_true_label, ++ profile_probability prob) + { + rtx tem; + rtx_code_label *dummy_label = NULL; +@@ -1179,8 +1196,10 @@ do_compare_rtx_and_jump (rtx op0, rtx op1, enum rtx_code code, int unsignedp, + } + else + dest_label = if_false_label; +- do_compare_rtx_and_jump (op0, op1, first_code, unsignedp, mode, +- size, dest_label, NULL, first_prob); ++ ++ do_compare_rtx_and_jump (op0, op1, first_code, unsignedp, ++ val, mode, size, dest_label, NULL, ++ first_prob); + } + /* For !and_them we want to split: + if (x) goto t; // prob; +@@ -1194,8 +1213,9 @@ do_compare_rtx_and_jump (rtx op0, rtx op1, enum rtx_code code, int unsignedp, + else + { + profile_probability first_prob = prob.split (cprob); +- do_compare_rtx_and_jump (op0, op1, first_code, unsignedp, mode, +- size, NULL, if_true_label, first_prob); ++ do_compare_rtx_and_jump (op0, op1, first_code, unsignedp, ++ val, mode, size, NULL, ++ if_true_label, first_prob); + if (orig_code == NE && can_compare_p (UNEQ, mode, ccp_jump)) + { + /* x != y can be split into x unord y || x ltgt y +@@ -1217,7 +1237,7 @@ do_compare_rtx_and_jump (rtx op0, rtx op1, enum rtx_code code, int unsignedp, + } + } + +- emit_cmp_and_jump_insns (op0, op1, code, size, mode, unsignedp, ++ emit_cmp_and_jump_insns (op0, op1, code, size, mode, unsignedp, val, + if_true_label, prob); + } + +@@ -1291,9 +1311,9 @@ do_compare_and_jump (tree treeop0, tree treeop1, enum rtx_code signed_code, + op1 = new_op1; + } + +- do_compare_rtx_and_jump (op0, op1, code, unsignedp, mode, +- ((mode == BLKmode) +- ? expr_size (treeop0) : NULL_RTX), ++ do_compare_rtx_and_jump (op0, op1, code, unsignedp, treeop0, mode, ++ ((mode == BLKmode) ++ ? expr_size (treeop0) : NULL_RTX), + if_false_label, if_true_label, prob); + } + +diff --git a/gcc/dojump.h b/gcc/dojump.h +index e379cceb3..d1d79c490 100644 +--- a/gcc/dojump.h ++++ b/gcc/dojump.h +@@ -71,6 +71,10 @@ extern void jumpifnot (tree exp, rtx_code_label *label, + extern void jumpifnot_1 (enum tree_code, tree, tree, rtx_code_label *, + profile_probability); + ++extern void do_compare_rtx_and_jump (rtx, rtx, enum rtx_code, int, tree, ++ machine_mode, rtx, rtx_code_label *, ++ rtx_code_label *, profile_probability); ++ + extern void do_compare_rtx_and_jump (rtx, rtx, enum rtx_code, int, + machine_mode, rtx, rtx_code_label *, + rtx_code_label *, profile_probability); +diff --git a/gcc/optabs.cc b/gcc/optabs.cc +index 3d8fa3abd..b441137de 100644 +--- a/gcc/optabs.cc ++++ b/gcc/optabs.cc +@@ -46,6 +46,8 @@ along with GCC; see the file COPYING3. If not see + #include "libfuncs.h" + #include "internal-fn.h" + #include "langhooks.h" ++#include "gimple.h" ++#include "ssa.h" + + static void prepare_float_lib_cmp (rtx, rtx, enum rtx_code, rtx *, + machine_mode *); +@@ -4621,7 +4623,8 @@ prepare_operand (enum insn_code icode, rtx x, int opnum, machine_mode mode, + + static void + emit_cmp_and_jump_insn_1 (rtx test, machine_mode mode, rtx label, +- profile_probability prob) ++ direct_optab cmp_optab, profile_probability prob, ++ bool test_branch) + { + machine_mode optab_mode; + enum mode_class mclass; +@@ -4630,12 +4633,17 @@ emit_cmp_and_jump_insn_1 (rtx test, machine_mode mode, rtx label, + + mclass = GET_MODE_CLASS (mode); + optab_mode = (mclass == MODE_CC) ? CCmode : mode; +- icode = optab_handler (cbranch_optab, optab_mode); ++ icode = optab_handler (cmp_optab, optab_mode); + + gcc_assert (icode != CODE_FOR_nothing); +- gcc_assert (insn_operand_matches (icode, 0, test)); +- insn = emit_jump_insn (GEN_FCN (icode) (test, XEXP (test, 0), +- XEXP (test, 1), label)); ++ gcc_assert (test_branch || insn_operand_matches (icode, 0, test)); ++ if (test_branch) ++ insn = emit_jump_insn (GEN_FCN (icode) (XEXP (test, 0), ++ XEXP (test, 1), label)); ++ else ++ insn = emit_jump_insn (GEN_FCN (icode) (test, XEXP (test, 0), ++ XEXP (test, 1), label)); ++ + if (prob.initialized_p () + && profile_status_for_fn (cfun) != PROFILE_ABSENT + && insn +@@ -4645,6 +4653,68 @@ emit_cmp_and_jump_insn_1 (rtx test, machine_mode mode, rtx label, + add_reg_br_prob_note (insn, prob); + } + ++/* PTEST points to a comparison that compares its first operand with zero. ++ Check to see if it can be performed as a bit-test-and-branch instead. ++ On success, return the instruction that performs the bit-test-and-branch ++ and replace the second operand of *PTEST with the bit number to test. ++ On failure, return CODE_FOR_nothing and leave *PTEST unchanged. ++ ++ Note that the comparison described by *PTEST should not be taken ++ literally after a successful return. *PTEST is just a convenient ++ place to store the two operands of the bit-and-test. ++ ++ VAL must contain the original tree expression for the first operand ++ of *PTEST. */ ++ ++static enum insn_code ++validate_test_and_branch (tree val, rtx *ptest, machine_mode *pmode, optab *res) ++{ ++ if (!val || TREE_CODE (val) != SSA_NAME) ++ return CODE_FOR_nothing; ++ ++ machine_mode mode = TYPE_MODE (TREE_TYPE (val)); ++ rtx test = *ptest; ++ direct_optab optab; ++ ++ if (GET_CODE (test) == EQ) ++ optab = tbranch_eq_optab; ++ else if (GET_CODE (test) == NE) ++ optab = tbranch_ne_optab; ++ else ++ return CODE_FOR_nothing; ++ ++ *res = optab; ++ ++ /* If the target supports the testbit comparison directly, great. */ ++ auto icode = direct_optab_handler (optab, mode); ++ if (icode == CODE_FOR_nothing) ++ return icode; ++ ++ if (tree_zero_one_valued_p (val)) ++ { ++ auto pos = BITS_BIG_ENDIAN ? GET_MODE_BITSIZE (mode) - 1 : 0; ++ XEXP (test, 1) = gen_int_mode (pos, mode); ++ *ptest = test; ++ *pmode = mode; ++ return icode; ++ } ++ ++ wide_int wcst = get_nonzero_bits (val); ++ if (wcst == -1) ++ return CODE_FOR_nothing; ++ ++ int bitpos; ++ ++ if ((bitpos = wi::exact_log2 (wcst)) == -1) ++ return CODE_FOR_nothing; ++ ++ auto pos = BITS_BIG_ENDIAN ? GET_MODE_BITSIZE (mode) - 1 - bitpos : bitpos; ++ XEXP (test, 1) = gen_int_mode (pos, mode); ++ *ptest = test; ++ *pmode = mode; ++ return icode; ++} ++ + /* Generate code to compare X with Y so that the condition codes are + set and to jump to LABEL if the condition is true. If X is a + constant and Y is not a constant, then the comparison is swapped to +@@ -4662,11 +4732,13 @@ emit_cmp_and_jump_insn_1 (rtx test, machine_mode mode, rtx label, + It will be potentially converted into an unsigned variant based on + UNSIGNEDP to select a proper jump instruction. + +- PROB is the probability of jumping to LABEL. */ ++ PROB is the probability of jumping to LABEL. If the comparison is against ++ zero then VAL contains the expression from which the non-zero RTL is ++ derived. */ + + void + emit_cmp_and_jump_insns (rtx x, rtx y, enum rtx_code comparison, rtx size, +- machine_mode mode, int unsignedp, rtx label, ++ machine_mode mode, int unsignedp, tree val, rtx label, + profile_probability prob) + { + rtx op0 = x, op1 = y; +@@ -4691,10 +4763,34 @@ emit_cmp_and_jump_insns (rtx x, rtx y, enum rtx_code comparison, rtx size, + + prepare_cmp_insn (op0, op1, comparison, size, unsignedp, OPTAB_LIB_WIDEN, + &test, &mode); +- emit_cmp_and_jump_insn_1 (test, mode, label, prob); ++ ++ /* Check if we're comparing a truth type with 0, and if so check if ++ the target supports tbranch. */ ++ machine_mode tmode = mode; ++ direct_optab optab; ++ if (op1 == CONST0_RTX (GET_MODE (op1)) ++ && validate_test_and_branch (val, &test, &tmode, ++ &optab) != CODE_FOR_nothing) ++ { ++ emit_cmp_and_jump_insn_1 (test, tmode, label, optab, prob, true); ++ return; ++ } ++ ++ emit_cmp_and_jump_insn_1 (test, mode, label, cbranch_optab, prob, false); + } + +- ++/* Overloaded version of emit_cmp_and_jump_insns in which VAL is unknown. */ ++ ++void ++emit_cmp_and_jump_insns (rtx x, rtx y, enum rtx_code comparison, rtx size, ++ machine_mode mode, int unsignedp, rtx label, ++ profile_probability prob) ++{ ++ emit_cmp_and_jump_insns (x, y, comparison, size, mode, unsignedp, NULL, ++ label, prob); ++} ++ ++ + /* Emit a library call comparison between floating point X and Y. + COMPARISON is the rtl operator to compare with (EQ, NE, GT, etc.). */ + +diff --git a/gcc/optabs.def b/gcc/optabs.def +index 801310eba..dbf529434 100644 +--- a/gcc/optabs.def ++++ b/gcc/optabs.def +@@ -220,6 +220,8 @@ OPTAB_D (reload_in_optab, "reload_in$a") + OPTAB_D (reload_out_optab, "reload_out$a") + + OPTAB_DC(cbranch_optab, "cbranch$a4", COMPARE) ++OPTAB_D (tbranch_eq_optab, "tbranch_eq$a3") ++OPTAB_D (tbranch_ne_optab, "tbranch_ne$a3") + OPTAB_D (addcc_optab, "add$acc") + OPTAB_D (negcc_optab, "neg$acc") + OPTAB_D (notcc_optab, "not$acc") +diff --git a/gcc/optabs.h b/gcc/optabs.h +index cfd7c742d..cd55604bc 100644 +--- a/gcc/optabs.h ++++ b/gcc/optabs.h +@@ -268,6 +268,10 @@ extern void emit_cmp_and_jump_insns (rtx, rtx, enum rtx_code, rtx, + machine_mode, int, rtx, + profile_probability prob + = profile_probability::uninitialized ()); ++extern void emit_cmp_and_jump_insns (rtx, rtx, enum rtx_code, rtx, ++ machine_mode, int, tree, rtx, ++ profile_probability prob ++ = profile_probability::uninitialized ()); + + /* Generate code to indirectly jump to a location given in the rtx LOC. */ + extern void emit_indirect_jump (rtx); +diff --git a/gcc/tree.h b/gcc/tree.h +index 3ff7732dc..07af584d6 100644 +--- a/gcc/tree.h ++++ b/gcc/tree.h +@@ -4627,6 +4627,7 @@ extern tree signed_or_unsigned_type_for (int, tree); + extern tree signed_type_for (tree); + extern tree unsigned_type_for (tree); + extern bool is_truth_type_for (tree, tree); ++extern bool tree_zero_one_valued_p (tree); + extern tree truth_type_for (tree); + extern tree build_pointer_type_for_mode (tree, machine_mode, bool); + extern tree build_pointer_type (tree); +-- +2.33.0 +
View file
_service:tar_scm:0246-Backport-SME-explow-Allow-dynamic-allocations-after-.patch
Added
@@ -0,0 +1,110 @@ +From fe64cc72e6221cf05e40bc868287bd1fcf07479f Mon Sep 17 00:00:00 2001 +From: Richard Sandiford <richard.sandiford@arm.com> +Date: Mon, 6 Nov 2023 10:49:58 +0000 +Subject: PATCH 147/157 BackportSME explow: Allow dynamic allocations + after vregs + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=a7a45364dddef399bfb550235166df51108a3142 + +This patch allows allocate_dynamic_stack_space to be called before +or after virtual registers have been instantiated. It uses the +same approach as allocate_stack_local, which already supported this. + +gcc/ + * function.h (get_stack_dynamic_offset): Declare. + * function.cc (get_stack_dynamic_offset): New function, + split out from... + (get_stack_dynamic_offset): ...here. + * explow.cc (allocate_dynamic_stack_space): Handle calls made + after virtual registers have been instantiated. +--- + gcc/explow.cc | 10 +++++++--- + gcc/function.cc | 12 +++++++++++- + gcc/function.h | 1 + + 3 files changed, 19 insertions(+), 4 deletions(-) + +diff --git a/gcc/explow.cc b/gcc/explow.cc +index d8aa75ee9..124e8f49e 100644 +--- a/gcc/explow.cc ++++ b/gcc/explow.cc +@@ -1375,12 +1375,16 @@ allocate_dynamic_stack_space (rtx size, unsigned size_align, + HOST_WIDE_INT stack_usage_size = -1; + rtx_code_label *final_label; + rtx final_target, target; ++ rtx addr = (virtuals_instantiated ++ ? plus_constant (Pmode, stack_pointer_rtx, ++ get_stack_dynamic_offset ()) ++ : virtual_stack_dynamic_rtx); + + /* If we're asking for zero bytes, it doesn't matter what we point + to since we can't dereference it. But return a reasonable + address anyway. */ + if (size == const0_rtx) +- return virtual_stack_dynamic_rtx; ++ return addr; + + /* Otherwise, show we're calling alloca or equivalent. */ + cfun->calls_alloca = 1; +@@ -1532,7 +1536,7 @@ allocate_dynamic_stack_space (rtx size, unsigned size_align, + poly_int64 saved_stack_pointer_delta; + + if (!STACK_GROWS_DOWNWARD) +- emit_move_insn (target, virtual_stack_dynamic_rtx); ++ emit_move_insn (target, force_operand (addr, target)); + + /* Check stack bounds if necessary. */ + if (crtl->limit_stack) +@@ -1575,7 +1579,7 @@ allocate_dynamic_stack_space (rtx size, unsigned size_align, + stack_pointer_delta = saved_stack_pointer_delta; + + if (STACK_GROWS_DOWNWARD) +- emit_move_insn (target, virtual_stack_dynamic_rtx); ++ emit_move_insn (target, force_operand (addr, target)); + } + + suppress_reg_args_size = false; +diff --git a/gcc/function.cc b/gcc/function.cc +index f4fc211a0..e62b2a8d6 100644 +--- a/gcc/function.cc ++++ b/gcc/function.cc +@@ -1945,6 +1945,16 @@ instantiate_decls (tree fndecl) + vec_free (cfun->local_decls); + } + ++/* Return the value of STACK_DYNAMIC_OFFSET for the current function. ++ This is done through a function wrapper so that the macro sees a ++ predictable set of included files. */ ++ ++poly_int64 ++get_stack_dynamic_offset () ++{ ++ return STACK_DYNAMIC_OFFSET (current_function_decl); ++} ++ + /* Pass through the INSNS of function FNDECL and convert virtual register + references to hard register references. */ + +@@ -1956,7 +1966,7 @@ instantiate_virtual_regs (void) + /* Compute the offsets to use for this function. */ + in_arg_offset = FIRST_PARM_OFFSET (current_function_decl); + var_offset = targetm.starting_frame_offset (); +- dynamic_offset = STACK_DYNAMIC_OFFSET (current_function_decl); ++ dynamic_offset = get_stack_dynamic_offset (); + out_arg_offset = STACK_POINTER_OFFSET; + #ifdef FRAME_POINTER_CFA_OFFSET + cfa_offset = FRAME_POINTER_CFA_OFFSET (current_function_decl); +diff --git a/gcc/function.h b/gcc/function.h +index 4e8131706..a5846465a 100644 +--- a/gcc/function.h ++++ b/gcc/function.h +@@ -711,6 +711,7 @@ extern vec<edge> convert_jumps_to_returns (basic_block last_bb, bool simple_p, + extern basic_block emit_return_for_exit (edge exit_fallthru_edge, + bool simple_p); + extern void reposition_prologue_and_epilogue_notes (void); ++extern poly_int64 get_stack_dynamic_offset (); + + /* Returns the name of the current function. */ + extern const char *fndecl_name (tree); +-- +2.33.0 +
View file
_service:tar_scm:0247-Backport-SME-PR105169-Fix-references-to-discarded-se.patch
Added
@@ -0,0 +1,225 @@ +From 6cc7bcadadbc2521a2db4c02adfe066d805e37ef Mon Sep 17 00:00:00 2001 +From: Giuliano Belinassi <gbelinassi@suse.de> +Date: Fri, 6 May 2022 23:37:52 -0300 +Subject: PATCH 148/157 BackportSME PR105169 Fix references to discarded + sections + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=7a3f38a966a52893fb5bae301a1a3d56961358fb + +When -fpatchable-function-entry= is enabled, certain C++ codes fails to +link because of generated references to discarded sections in +__patchable_function_entry section. This commit fixes this problem by +puting those references in a COMDAT section. + +2022-05-06 Giuliano Belinassi <gbelinassi@suse.de> + +gcc/ChangeLog + PR c++/105169 + * targhooks.cc (default_print_patchable_function_entry_1): Handle COMDAT case. + * varasm.cc (switch_to_comdat_section): New + (handle_vtv_comdat_section): Call switch_to_comdat_section. + * varasm.h: Declare switch_to_comdat_section. + +gcc/testsuite/ChangeLog +2022-05-06 Giuliano Belinassi <gbelinassi@suse.de> + + PR c++/105169 + * g++.dg/modules/pr105169.h: New file. + * g++.dg/modules/pr105169_a.C: New test. + * g++.dg/modules/pr105169_b.C: New file. +--- + gcc/targhooks.cc | 8 ++++-- + gcc/testsuite/g++.dg/modules/pr105169.h | 22 +++++++++++++++ + gcc/testsuite/g++.dg/modules/pr105169_a.C | 25 +++++++++++++++++ + gcc/testsuite/g++.dg/modules/pr105169_b.C | 12 +++++++++ + gcc/varasm.cc | 33 ++++++++++++++--------- + gcc/varasm.h | 2 ++ + 6 files changed, 87 insertions(+), 15 deletions(-) + create mode 100644 gcc/testsuite/g++.dg/modules/pr105169.h + create mode 100644 gcc/testsuite/g++.dg/modules/pr105169_a.C + create mode 100644 gcc/testsuite/g++.dg/modules/pr105169_b.C + +diff --git a/gcc/targhooks.cc b/gcc/targhooks.cc +index c88afa5db..175a0e18a 100644 +--- a/gcc/targhooks.cc ++++ b/gcc/targhooks.cc +@@ -2019,8 +2019,12 @@ default_print_patchable_function_entry_1 (FILE *file, + patch_area_number++; + ASM_GENERATE_INTERNAL_LABEL (buf, "LPFE", patch_area_number); + +- switch_to_section (get_section ("__patchable_function_entries", +- flags, current_function_decl)); ++ section *sect = get_section ("__patchable_function_entries", ++ flags, current_function_decl); ++ if (HAVE_COMDAT_GROUP && DECL_COMDAT_GROUP (current_function_decl)) ++ switch_to_comdat_section (sect, current_function_decl); ++ else ++ switch_to_section (sect); + assemble_align (POINTER_SIZE); + fputs (asm_op, file); + assemble_name_raw (file, buf); +diff --git a/gcc/testsuite/g++.dg/modules/pr105169.h b/gcc/testsuite/g++.dg/modules/pr105169.h +new file mode 100644 +index 000000000..a7e762705 +--- /dev/null ++++ b/gcc/testsuite/g++.dg/modules/pr105169.h +@@ -0,0 +1,22 @@ ++class IPXAddressClass ++{ ++public: ++ IPXAddressClass(void); ++}; ++ ++class WinsockInterfaceClass ++{ ++ ++public: ++ WinsockInterfaceClass(void); ++ ++ virtual void Set_Broadcast_Address(void*){}; ++ ++ virtual int Get_Protocol(void) ++ { ++ return 0; ++ }; ++ ++protected: ++}; ++ +diff --git a/gcc/testsuite/g++.dg/modules/pr105169_a.C b/gcc/testsuite/g++.dg/modules/pr105169_a.C +new file mode 100644 +index 000000000..66dc4b790 +--- /dev/null ++++ b/gcc/testsuite/g++.dg/modules/pr105169_a.C +@@ -0,0 +1,25 @@ ++/* { dg-module-do link } */ ++/* { dg-options "-std=c++11 -fpatchable-function-entry=1 -O2" } */ ++/* { dg-additional-options "-std=c++11 -fpatchable-function-entry=1 -O2" } */ ++ ++/* This test is in the "modules" package because it supports multiple files ++ linkage. */ ++ ++#include "pr105169.h" ++ ++WinsockInterfaceClass* PacketTransport; ++ ++IPXAddressClass::IPXAddressClass(void) ++{ ++} ++ ++int function() ++{ ++ return PacketTransport->Get_Protocol(); ++} ++ ++int main() ++{ ++ IPXAddressClass ipxaddr; ++ return 0; ++} +diff --git a/gcc/testsuite/g++.dg/modules/pr105169_b.C b/gcc/testsuite/g++.dg/modules/pr105169_b.C +new file mode 100644 +index 000000000..5f8b00dfe +--- /dev/null ++++ b/gcc/testsuite/g++.dg/modules/pr105169_b.C +@@ -0,0 +1,12 @@ ++/* { dg-module-do link } */ ++/* { dg-options "-std=c++11 -fpatchable-function-entry=1 -O2" } */ ++/* { dg-additional-options "-std=c++11 -fpatchable-function-entry=1 -O2" } */ ++ ++/* This test is in the "modules" package because it supports multiple files ++ linkage. */ ++ ++#include "pr105169.h" ++ ++WinsockInterfaceClass::WinsockInterfaceClass(void) ++{ ++} +diff --git a/gcc/varasm.cc b/gcc/varasm.cc +index 3f69b47a7..bae935694 100644 +--- a/gcc/varasm.cc ++++ b/gcc/varasm.cc +@@ -8459,25 +8459,21 @@ default_asm_output_ident_directive (const char *ident_str) + fprintf (asm_out_file, "%s\"%s\"\n", ident_asm_op, ident_str); + } + +- +-/* This function ensures that vtable_map variables are not only +- in the comdat section, but that each variable has its own unique +- comdat name. Without this the variables end up in the same section +- with a single comdat name. +- ++/* Switch to a COMDAT section with COMDAT name of decl. ++ + FIXME: resolve_unique_section needs to deal better with + decls with both DECL_SECTION_NAME and DECL_ONE_ONLY. Once + that is fixed, this if-else statement can be replaced with + a single call to "switch_to_section (sect)". */ + +-static void +-handle_vtv_comdat_section (section *sect, const_tree decl ATTRIBUTE_UNUSED) ++void ++switch_to_comdat_section (section *sect, tree decl) + { + #if defined (OBJECT_FORMAT_ELF) + targetm.asm_out.named_section (sect->named.name, + sect->named.common.flags + | SECTION_LINKONCE, +- DECL_NAME (decl)); ++ decl); + in_section = sect; + #else + /* Neither OBJECT_FORMAT_PE, nor OBJECT_FORMAT_COFF is set here. +@@ -8492,18 +8488,18 @@ handle_vtv_comdat_section (section *sect, const_tree decl ATTRIBUTE_UNUSED) + { + char *name; + +- if (TREE_CODE (DECL_NAME (decl)) == IDENTIFIER_NODE) ++ if (TREE_CODE (decl) == IDENTIFIER_NODE) + name = ACONCAT ((sect->named.name, "$", +- IDENTIFIER_POINTER (DECL_NAME (decl)), NULL)); ++ IDENTIFIER_POINTER (decl), NULL)); + else + name = ACONCAT ((sect->named.name, "$", +- IDENTIFIER_POINTER (DECL_COMDAT_GROUP (DECL_NAME (decl))), ++ IDENTIFIER_POINTER (DECL_COMDAT_GROUP (decl)), + NULL)); + + targetm.asm_out.named_section (name, + sect->named.common.flags + | SECTION_LINKONCE, +- DECL_NAME (decl)); ++ decl); + in_section = sect; + } + else +@@ -8511,4 +8507,15 @@ handle_vtv_comdat_section (section *sect, const_tree decl ATTRIBUTE_UNUSED) + #endif + } + ++/* This function ensures that vtable_map variables are not only ++ in the comdat section, but that each variable has its own unique ++ comdat name. Without this the variables end up in the same section ++ with a single comdat name. */ ++ ++static void ++handle_vtv_comdat_section (section *sect, const_tree decl ATTRIBUTE_UNUSED) ++{ ++ switch_to_comdat_section(sect, DECL_NAME (decl)); ++} ++ + #include "gt-varasm.h" +diff --git a/gcc/varasm.h b/gcc/varasm.h +index d5d8c4e55..8ba8374e7 100644 +--- a/gcc/varasm.h ++++ b/gcc/varasm.h +@@ -79,4 +79,6 @@ extern rtx assemble_static_space (unsigned HOST_WIDE_INT); + + extern rtx assemble_trampoline_template (void); + ++extern void switch_to_comdat_section (section *, tree); ++ + #endif // GCC_VARASM_H +-- +2.33.0 +
View file
_service:tar_scm:0248-Backport-SME-RISC-V-autovec-Verify-that-GET_MODE_NUN.patch
Added
@@ -0,0 +1,53 @@ +From a3b4a0ac472415a52ce836e8997f7a69a06fad33 Mon Sep 17 00:00:00 2001 +From: Michael Collison <collison@rivosinc.com> +Date: Sat, 6 May 2023 12:37:50 -0600 +Subject: PATCH 149/157 BackportSME RISC-V: autovec: Verify that + GET_MODE_NUNITS is a multiple of 2. + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=730909fa858bd691095bc23655077aa13b7941a9 + +While working on autovectorizing for the RISCV port I encountered an issue +where can_duplicate_and_interleave_p assumes that GET_MODE_NUNITS is a +evenly divisible by two. The RISC-V target has vector modes (e.g. VNx1DImode), +where GET_MODE_NUNITS is equal to one. + +Tested on RISCV and x86_64-linux-gnu. Okay? + +gcc/ + * tree-vect-slp.cc (can_duplicate_and_interleave_p): + Check that GET_MODE_NUNITS is a multiple of 2. +--- + gcc/tree-vect-slp.cc | 7 +++++-- + 1 file changed, 5 insertions(+), 2 deletions(-) + +diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc +index 6cbf8085f..d02f0ce37 100644 +--- a/gcc/tree-vect-slp.cc ++++ b/gcc/tree-vect-slp.cc +@@ -401,10 +401,13 @@ can_duplicate_and_interleave_p (vec_info *vinfo, unsigned int count, + (GET_MODE_BITSIZE (int_mode), 1); + tree vector_type + = get_vectype_for_scalar_type (vinfo, int_type, count); ++ poly_int64 half_nelts; + if (vector_type + && VECTOR_MODE_P (TYPE_MODE (vector_type)) + && known_eq (GET_MODE_SIZE (TYPE_MODE (vector_type)), +- GET_MODE_SIZE (base_vector_mode))) ++ GET_MODE_SIZE (base_vector_mode)) ++ && multiple_p (GET_MODE_NUNITS (TYPE_MODE (vector_type)), ++ 2, &half_nelts)) + { + /* Try fusing consecutive sequences of COUNT / NVECTORS elements + together into elements of type INT_TYPE and using the result +@@ -412,7 +415,7 @@ can_duplicate_and_interleave_p (vec_info *vinfo, unsigned int count, + poly_uint64 nelts = GET_MODE_NUNITS (TYPE_MODE (vector_type)); + vec_perm_builder sel1 (nelts, 2, 3); + vec_perm_builder sel2 (nelts, 2, 3); +- poly_int64 half_nelts = exact_div (nelts, 2); ++ + for (unsigned int i = 0; i < 3; ++i) + { + sel1.quick_push (i); +-- +2.33.0 +
View file
_service:tar_scm:0249-Backport-SME-Add-operator-to-gimple_stmt_iterator-an.patch
Added
@@ -0,0 +1,42 @@ +From b0ca9a6eb1406a60eec566cf302790bee89879af Mon Sep 17 00:00:00 2001 +From: Richard Biener <rguenther@suse.de> +Date: Tue, 18 Apr 2023 16:58:26 +0200 +Subject: PATCH 150/157 BackportSME Add operator* to gimple_stmt_iterator + and gphi_iterator + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=c39cdd9e654540f74cd2478019c40f1611554a44 + +This allows STL style iterator dereference. It's the same +as gsi_stmt () or .phi (). + + * gimple-iterator.h (gimple_stmt_iterator::operator*): Add. + (gphi_iterator::operator*): Likewise. +--- + gcc/gimple-iterator.h | 4 ++++ + 1 file changed, 4 insertions(+) + +diff --git a/gcc/gimple-iterator.h b/gcc/gimple-iterator.h +index 216ebee24..5d281e4f7 100644 +--- a/gcc/gimple-iterator.h ++++ b/gcc/gimple-iterator.h +@@ -24,6 +24,8 @@ along with GCC; see the file COPYING3. If not see + + struct gimple_stmt_iterator + { ++ gimple *operator * () const { return ptr; } ++ + /* Sequence node holding the current statement. */ + gimple_seq_node ptr; + +@@ -38,6 +40,8 @@ struct gimple_stmt_iterator + /* Iterator over GIMPLE_PHI statements. */ + struct gphi_iterator : public gimple_stmt_iterator + { ++ gphi *operator * () const { return as_a <gphi *> (ptr); } ++ + gphi *phi () const + { + return as_a <gphi *> (ptr); +-- +2.33.0 +
View file
_service:tar_scm:0250-Backport-SME-tree-optimization-110221-SLP-and-loop-m.patch
Added
@@ -0,0 +1,75 @@ +From 90518c07dfb770b680fd8bdba76dd1b39103277d Mon Sep 17 00:00:00 2001 +From: Richard Biener <rguenther@suse.de> +Date: Fri, 10 Nov 2023 12:39:11 +0100 +Subject: PATCH 151/157 BackportSME tree-optimization/110221 - SLP and + loop mask/len + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=e5f1956498251a4973d52c8aad3faf34d0443169 + +The following fixes the issue that when SLP stmts are internal defs +but appear invariant because they end up only using invariant defs +then they get scheduled outside of the loop. This nice optimization +breaks down when loop masks or lens are applied since those are not +explicitly tracked as dependences. The following makes sure to never +schedule internal defs outside of the vectorized loop when the +loop uses masks/lens. + + PR tree-optimization/110221 + * tree-vect-slp.cc (vect_schedule_slp_node): When loop + masking / len is applied make sure to not schedule + intenal defs outside of the loop. + + * gfortran.dg/pr110221.f: New testcase. +--- + gcc/testsuite/gfortran.dg/pr110221.f | 17 +++++++++++++++++ + gcc/tree-vect-slp.cc | 10 ++++++++++ + 2 files changed, 27 insertions(+) + create mode 100644 gcc/testsuite/gfortran.dg/pr110221.f + +diff --git a/gcc/testsuite/gfortran.dg/pr110221.f b/gcc/testsuite/gfortran.dg/pr110221.f +new file mode 100644 +index 000000000..8b5738431 +--- /dev/null ++++ b/gcc/testsuite/gfortran.dg/pr110221.f +@@ -0,0 +1,17 @@ ++C PR middle-end/68146 ++C { dg-do compile } ++C { dg-options "-O2 -w" } ++C { dg-additional-options "-mavx512f --param vect-partial-vector-usage=2" { target avx512f } } ++ SUBROUTINE CJYVB(V,Z,V0,CBJ,CDJ,CBY,CYY) ++ IMPLICIT DOUBLE PRECISION (A,B,G,O-Y) ++ IMPLICIT COMPLEX*16 (C,Z) ++ DIMENSION CBJ(0:*),CDJ(0:*),CBY(0:*) ++ N=INT(V) ++ CALL GAMMA2(VG,GA) ++ DO 65 K=1,N ++ CBY(K)=CYY ++65 CONTINUE ++ CDJ(0)=V0/Z*CBJ(0)-CBJ(1) ++ DO 70 K=1,N ++70 CDJ(K)=-(K+V0)/Z*CBJ(K)+CBJ(K-1) ++ END +diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc +index d02f0ce37..e3e246977 100644 +--- a/gcc/tree-vect-slp.cc ++++ b/gcc/tree-vect-slp.cc +@@ -8531,6 +8531,16 @@ vect_schedule_slp_node (vec_info *vinfo, + /* Emit other stmts after the children vectorized defs which is + earliest possible. */ + gimple *last_stmt = NULL; ++ if (auto loop_vinfo = dyn_cast <loop_vec_info> (vinfo)) ++ if (LOOP_VINFO_FULLY_MASKED_P (loop_vinfo) ++ || LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo)) ++ { ++ /* But avoid scheduling internal defs outside of the loop when ++ we might have only implicitly tracked loop mask/len defs. */ ++ gimple_stmt_iterator si ++ = gsi_after_labels (LOOP_VINFO_LOOP (loop_vinfo)->header); ++ last_stmt = *si; ++ } + bool seen_vector_def = false; + FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child) + if (SLP_TREE_DEF_TYPE (child) == vect_internal_def) +-- +2.33.0 +
View file
_service:tar_scm:0251-SME-Adapt-some-testsuites.patch
Added
@@ -0,0 +1,116 @@ +From b60c29e6658c8620f1116ce5a38a6eb823af64e6 Mon Sep 17 00:00:00 2001 +From: xiezhiheng <xiezhiheng@huawei.com> +Date: Thu, 7 Mar 2024 10:22:39 +0800 +Subject: PATCH 152/157 SME Adapt some testsuites + +gcc.target/aarch64/sme/aarch64-sme-acle-asm.exp: + GCC 12.3.0 do not support -std=c23 and -std=gnu23 + +gcc.target/aarch64/sme/streaming_mode_2.c: + It's a warning in GCC 12.3.0 + +gcc.dg/c2x-attr-syntax-6.c: +gcc.dg/c2x-attr-syntax-7.c: + GCC 12.3.0 do not support C2x (...) function prototypes and + C2x noreturn attribute + +gcc.target/aarch64/sme/za_state_4.c: + Seems need a ldp/stp optimization, not a functionality issue +--- + gcc/testsuite/gcc.dg/c2x-attr-syntax-6.c | 2 -- + gcc/testsuite/gcc.dg/c2x-attr-syntax-7.c | 2 -- + .../gcc.target/aarch64/sme/aarch64-sme-acle-asm.exp | 2 -- + .../gcc.target/aarch64/sme/streaming_mode_2.c | 12 ++++++------ + gcc/testsuite/gcc.target/aarch64/sme/za_state_4.c | 1 + + 5 files changed, 7 insertions(+), 12 deletions(-) + +diff --git a/gcc/testsuite/gcc.dg/c2x-attr-syntax-6.c b/gcc/testsuite/gcc.dg/c2x-attr-syntax-6.c +index 9e5f65ce4..2385b25fe 100644 +--- a/gcc/testsuite/gcc.dg/c2x-attr-syntax-6.c ++++ b/gcc/testsuite/gcc.dg/c2x-attr-syntax-6.c +@@ -15,13 +15,11 @@ typedef int __extension__ gnu FOO vector_size (4) g5; + typedef int __extension__ gnu BAR BAR vector_size (4) g6; + typedef int __extension__ gnu :/**/: vector_size (4) g7; + typedef int __extension__ gnu JOIN(:,:) vector_size (4) g8; +-typedef int __extension__ gnu :: vector_size (sizeof (void (*)(...))) g10; + typedef int __extension__ g11; + typedef int __extension__, g12; + typedef int __extension__, ,,,, ,, , g13; + __extension__ deprecated int g14 (); + __extension__ nodiscard int g15 (); +-__extension__ noreturn void g16 (); + + int + cases (int x) +diff --git a/gcc/testsuite/gcc.dg/c2x-attr-syntax-7.c b/gcc/testsuite/gcc.dg/c2x-attr-syntax-7.c +index 702f733b1..5bbdba665 100644 +--- a/gcc/testsuite/gcc.dg/c2x-attr-syntax-7.c ++++ b/gcc/testsuite/gcc.dg/c2x-attr-syntax-7.c +@@ -15,13 +15,11 @@ typedef int __extension__ gnu FOO vector_size (4) g5; + typedef int __extension__ gnu BAR BAR vector_size (4) g6; + typedef int __extension__ gnu :/**/: vector_size (4) g7; + typedef int __extension__ gnu JOIN(:,:) vector_size (4) g8; +-typedef int __extension__ gnu :: vector_size (sizeof (void (*)(...))) g10; + typedef int __extension__ g11; + typedef int __extension__, g12; + typedef int __extension__, ,,,, ,, , g13; + __extension__ deprecated int g14 (); + __extension__ nodiscard int g15 (); +-__extension__ noreturn void g16 (); + + int + cases (int x) +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/aarch64-sme-acle-asm.exp b/gcc/testsuite/gcc.target/aarch64/sme/aarch64-sme-acle-asm.exp +index e2d002f26..a0a4fe4f7 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sme/aarch64-sme-acle-asm.exp ++++ b/gcc/testsuite/gcc.target/aarch64/sme/aarch64-sme-acle-asm.exp +@@ -52,9 +52,7 @@ set-torture-options { + "-std=c90 -O0 -g" + "-std=c99 -Og -g" + "-std=c11 -Os -g" +- "-std=c23 -O2 -fno-schedule-insns -fno-schedule-insns2 -DCHECK_ASM --save-temps" + "-std=gnu90 -O3 -g" +- "-std=gnu23 -Ofast -g" + } { + "-DTEST_FULL" + "-DTEST_OVERLOADS" +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/streaming_mode_2.c b/gcc/testsuite/gcc.target/aarch64/sme/streaming_mode_2.c +index e8be0f821..1e328c817 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sme/streaming_mode_2.c ++++ b/gcc/testsuite/gcc.target/aarch64/sme/streaming_mode_2.c +@@ -12,14 +12,14 @@ void + f () + { + sc_fn_ptr = sc_fn; +- sc_fn_ptr = s_fn; // { dg-error "incompatible pointer type" } +- sc_fn_ptr = ns_fn; // { dg-error "incompatible pointer type" } ++ sc_fn_ptr = s_fn; // { dg-warning "incompatible pointer type" } ++ sc_fn_ptr = ns_fn; // { dg-warning "incompatible pointer type" } + +- s_fn_ptr = sc_fn; // { dg-error "incompatible pointer type" } ++ s_fn_ptr = sc_fn; // { dg-warning "incompatible pointer type" } + s_fn_ptr = s_fn; +- s_fn_ptr = ns_fn; // { dg-error "incompatible pointer type" } ++ s_fn_ptr = ns_fn; // { dg-warning "incompatible pointer type" } + +- ns_fn_ptr = sc_fn; // { dg-error "incompatible pointer type" } +- ns_fn_ptr = s_fn; // { dg-error "incompatible pointer type" } ++ ns_fn_ptr = sc_fn; // { dg-warning "incompatible pointer type" } ++ ns_fn_ptr = s_fn; // { dg-warning "incompatible pointer type" } + ns_fn_ptr = ns_fn; + } +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/za_state_4.c b/gcc/testsuite/gcc.target/aarch64/sme/za_state_4.c +index cec0abf0e..a764a7c89 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sme/za_state_4.c ++++ b/gcc/testsuite/gcc.target/aarch64/sme/za_state_4.c +@@ -105,6 +105,7 @@ __arm_new("za") void test5() + ** mul (x0-9+), \1, \1 + ** sub sp, sp, \2 + ** mov (x0-9+), sp ++** add ^\n+ + ** stp \3, \1, \x29, #?16\ + ** add (x0-9+), x29, #?16 + ** msr tpidr2_el0, \4 +-- +2.33.0 +
View file
_service:tar_scm:0252-SME-Fix-error-by-backported-patches-and-IPA-prefetch.patch
Added
@@ -0,0 +1,43 @@ +From ce53aec1f43f79c093db662a2e8e3062462757b4 Mon Sep 17 00:00:00 2001 +From: xiezhiheng <xiezhiheng@huawei.com> +Date: Thu, 22 Aug 2024 16:35:28 +0800 +Subject: PATCH 153/157 SME Fix error by backported patches and IPA + prefetch + +Fix +gtype-desc.cc: In function 'void gt_pch_p_30vec_cgraph_node__va_gc_atomic_(void*, void*, gt_pointer_operator, void*)': +gtype-desc.cc:11032:35: error: call of overloaded 'gt_pch_nx(vec<cgraph_node*, va_gc_atomic>*, void (*&)(void*, void*, void*), void*&)' is ambiguous +11032 | gt_pch_nx (&((*x)), op, cookie); + | ^ +In file included from ../../gcc/hash-table.h:248, + from ../../gcc/coretypes.h:486, + from gtype-desc.cc:23: +../../gcc/vec.h:1395:1: note: candidate: 'void gt_pch_nx(vec<T*, A, vl_embed>*, gt_pointer_operator, void*) with T = cgraph_node; A = va_gc_atomic; gt_pointer_operator = void (*)(void*, void*, void*)' + 1395 | gt_pch_nx (vec<T *, A, vl_embed> *v, gt_pointer_operator op, void *cookie) + | ^~~~~~~~~ +../../gcc/vec.h:1403:1: note: candidate: 'void gt_pch_nx(vec<T, A, vl_embed>*, gt_pointer_operator, void*) with T = cgraph_node*; A = va_gc_atomic; gt_pointer_operator = void (*)(void*, void*, void*)' + 1403 | gt_pch_nx (vec<T, A, vl_embed> *v, gt_pointer_operator op, void *cookie) + | ^~~~~~~~~ +../../gcc/vec.h:1412:1: note: candidate: 'void gt_pch_nx(vec<T, va_gc_atomic, vl_embed>*, gt_pointer_operator, void*) with T = cgraph_node*; gt_pointer_operator = void (*)(void*, void*, void*)' + 1412 | gt_pch_nx (vec<T, va_gc_atomic, vl_embed> *, gt_pointer_operator, void *) + | ^~~~~~~~~ +--- + gcc/cgraph.h | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/gcc/cgraph.h b/gcc/cgraph.h +index b84ff2f98..2332539e5 100644 +--- a/gcc/cgraph.h ++++ b/gcc/cgraph.h +@@ -1660,7 +1660,7 @@ public: + /* ECF flags determined from the caller. */ + int ecf_flags; + /* Vector of potential call targets determined by analysis. */ +- vec<cgraph_node *, va_gc_atomic> *targets; ++ vec<cgraph_node *, va_gc> *targets; + + /* Number of speculative call targets, it's less than GCOV_TOPN_VALUES. */ + unsigned num_speculative_call_targets : 16; +-- +2.33.0 +
View file
_service:tar_scm:0253-aarch64-Fix-return-register-handling-in-untyped_call.patch
Added
@@ -0,0 +1,66 @@ +From 38d0605ac8bc90324170041676fc05e7e595769e Mon Sep 17 00:00:00 2001 +From: Richard Sandiford <richard.sandiford@arm.com> +Date: Wed, 20 Sep 2023 11:13:19 +0100 +Subject: PATCH aarch64: Fix return register handling in untyped_call + +While working on another patch, I hit a problem with the aarch64 +expansion of untyped_call. The expander emits the usual: + + (set (mem ...) (reg resN)) + +instructions to store the result registers to memory, but it didn't +say in RTL where those resN results came from. This eventually led +to a failure of gcc.dg/torture/stackalign/builtin-return-2.c, +via regrename. + +This patch turns the untyped call from a plain call to a call_value, +to represent that the call returns (or might return) a useful value. +The patch also uses a PARALLEL return rtx to represent all the possible +return registers. + +gcc/ + * config/aarch64/aarch64.md (untyped_call): Emit a call_value + rather than a call. List each possible destination register + in the call pattern. + +(cherry picked from commit 629efe27744d13c3b83bbe8338b84c37c83dbe4f) +--- + gcc/config/aarch64/aarch64.md | 20 +++++++++++++++++++- + 1 file changed, 19 insertions(+), 1 deletion(-) + +diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md +index b9b3ba4f9164..cd568f3daa0f 100644 +--- a/gcc/config/aarch64/aarch64.md ++++ b/gcc/config/aarch64/aarch64.md +@@ -1159,9 +1159,27 @@ + { + int i; + ++ /* Generate a PARALLEL that contains all of the register results. ++ The offsets are somewhat arbitrary, since we don't know the ++ actual return type. The main thing we need to avoid is having ++ overlapping byte ranges, since those might give the impression ++ that two registers are known to have data in common. */ ++ rtvec rets = rtvec_alloc (XVECLEN (operands2, 0)); ++ poly_int64 offset = 0; ++ for (i = 0; i < XVECLEN (operands2, 0); i++) ++ { ++ rtx reg = SET_SRC (XVECEXP (operands2, 0, i)); ++ gcc_assert (REG_P (reg)); ++ rtx offset_rtx = gen_int_mode (offset, Pmode); ++ rtx piece = gen_rtx_EXPR_LIST (VOIDmode, reg, offset_rtx); ++ RTVEC_ELT (rets, i) = piece; ++ offset += GET_MODE_SIZE (GET_MODE (reg)); ++ } ++ rtx ret = gen_rtx_PARALLEL (VOIDmode, rets); ++ + /* Untyped calls always use the default ABI. It's only possible to use + ABI variants if we know the type of the target function. */ +- emit_call_insn (gen_call (operands0, const0_rtx, const0_rtx)); ++ emit_call_insn (gen_call_value (ret, operands0, const0_rtx, const0_rtx)); + + for (i = 0; i < XVECLEN (operands2, 0); i++) + { +-- +2.43.5 +
View file
_service:tar_scm:0254-aarch64-Fix-loose-ldpstp-check.patch
Added
@@ -0,0 +1,119 @@ +From 74f99f1adc696f446115f36974a3f94f66294a53 Mon Sep 17 00:00:00 2001 +From: Richard Sandiford <richard.sandiford@arm.com> +Date: Wed, 20 Sep 2023 11:13:20 +0100 +Subject: PATCH aarch64: Fix loose ldpstp check PR111411 + +aarch64_operands_ok_for_ldpstp contained the code: + + /* One of the memory accesses must be a mempair operand. + If it is not the first one, they need to be swapped by the + peephole. */ + if (!aarch64_mem_pair_operand (mem_1, GET_MODE (mem_1)) + && !aarch64_mem_pair_operand (mem_2, GET_MODE (mem_2))) + return false; + +But the requirement isn't just that one of the accesses must be a +valid mempair operand. It's that the lower access must be, since +that's the access that will be used for the instruction operand. + +gcc/ + PR target/111411 + * config/aarch64/aarch64.cc (aarch64_operands_ok_for_ldpstp): Require + the lower memory access to a mem-pair operand. + +gcc/testsuite/ + PR target/111411 + * gcc.dg/rtl/aarch64/pr111411.c: New test. + +(cherry picked from commit 2d38f45bcca62ca0c7afef4b579f82c5c2a01610) +--- + gcc/config/aarch64/aarch64.cc | 8 ++- + gcc/testsuite/gcc.dg/rtl/aarch64/pr111411.c | 57 +++++++++++++++++++++ + 2 files changed, 60 insertions(+), 5 deletions(-) + create mode 100644 gcc/testsuite/gcc.dg/rtl/aarch64/pr111411.c + +diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc +index 96c3f48fdc49..a979accd90a9 100644 +--- a/gcc/config/aarch64/aarch64.cc ++++ b/gcc/config/aarch64/aarch64.cc +@@ -26031,11 +26031,9 @@ aarch64_operands_ok_for_ldpstp (rtx *operands, bool load, + gcc_assert (known_eq (GET_MODE_SIZE (GET_MODE (mem_1)), + GET_MODE_SIZE (GET_MODE (mem_2)))); + +- /* One of the memory accesses must be a mempair operand. +- If it is not the first one, they need to be swapped by the +- peephole. */ +- if (!aarch64_mem_pair_operand (mem_1, GET_MODE (mem_1)) +- && !aarch64_mem_pair_operand (mem_2, GET_MODE (mem_2))) ++ /* The lower memory access must be a mem-pair operand. */ ++ rtx lower_mem = reversed ? mem_2 : mem_1; ++ if (!aarch64_mem_pair_operand (lower_mem, GET_MODE (lower_mem))) + return false; + + if (REG_P (reg_1) && FP_REGNUM_P (REGNO (reg_1))) +diff --git a/gcc/testsuite/gcc.dg/rtl/aarch64/pr111411.c b/gcc/testsuite/gcc.dg/rtl/aarch64/pr111411.c +new file mode 100644 +index 000000000000..ad07e9c6c893 +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/rtl/aarch64/pr111411.c +@@ -0,0 +1,57 @@ ++/* { dg-do compile { target aarch64*-*-* } } */ ++/* { dg-require-effective-target lp64 } */ ++/* { dg-options "-O -fdisable-rtl-postreload -fpeephole2 -fno-schedule-fusion" } */ ++ ++extern int data; ++ ++void __RTL (startwith ("ira")) foo (void *ptr) ++{ ++ (function "foo" ++ (param "ptr" ++ (DECL_RTL (reg/v:DI <0> ptr )) ++ (DECL_RTL_INCOMING (reg/v:DI x0 ptr )) ++ ) ;; param "ptr" ++ (insn-chain ++ (block 2 ++ (edge-from entry (flags "FALLTHRU")) ++ (cnote 3 bb 2 NOTE_INSN_BASIC_BLOCK) ++ (insn 4 (set (reg:DI <0>) (reg:DI x0))) ++ (insn 5 (set (reg:DI <1>) ++ (plus:DI (reg:DI <0>) (const_int 768)))) ++ (insn 6 (set (mem:SI (plus:DI (reg:DI <0>) ++ (const_int 508)) 1 &data+508 S4 A4) ++ (const_int 0))) ++ (insn 7 (set (mem:SI (plus:DI (reg:DI <1>) ++ (const_int -256)) 1 &data+512 S4 A4) ++ (const_int 0))) ++ (edge-to exit (flags "FALLTHRU")) ++ ) ;; block 2 ++ ) ;; insn-chain ++ ) ;; function ++} ++ ++void __RTL (startwith ("ira")) bar (void *ptr) ++{ ++ (function "bar" ++ (param "ptr" ++ (DECL_RTL (reg/v:DI <0> ptr )) ++ (DECL_RTL_INCOMING (reg/v:DI x0 ptr )) ++ ) ;; param "ptr" ++ (insn-chain ++ (block 2 ++ (edge-from entry (flags "FALLTHRU")) ++ (cnote 3 bb 2 NOTE_INSN_BASIC_BLOCK) ++ (insn 4 (set (reg:DI <0>) (reg:DI x0))) ++ (insn 5 (set (reg:DI <1>) ++ (plus:DI (reg:DI <0>) (const_int 768)))) ++ (insn 6 (set (mem:SI (plus:DI (reg:DI <1>) ++ (const_int -256)) 1 &data+512 S4 A4) ++ (const_int 0))) ++ (insn 7 (set (mem:SI (plus:DI (reg:DI <0>) ++ (const_int 508)) 1 &data+508 S4 A4) ++ (const_int 0))) ++ (edge-to exit (flags "FALLTHRU")) ++ ) ;; block 2 ++ ) ;; insn-chain ++ ) ;; function ++} +-- +2.43.5 +
View file
_service:tar_scm:0255-x86-Add-a-new-option-mdaz-ftz-to-enable-FTZ-and-DAZ-.patch
Added
@@ -0,0 +1,135 @@ +From 1649f9fbbc5267de2a675336d3ac665528a03db8 Mon Sep 17 00:00:00 2001 +From: liuhongt <hongtao.liu@intel.com> +Date: Wed, 10 May 2023 15:16:58 +0800 +Subject: PATCH 01/28 x86: Add a new option -mdaz-ftz to enable FTZ and DAZ + flags in MXCSR. + + if (mdaz-ftz) + link crtfastmath.o + else if ((Ofast || ffast-math || funsafe-math-optimizations) + && !mno-daz-ftz) + link crtfastmath.o + else + Don't link crtfastmath.o + +gcc/ChangeLog: + + * config/i386/cygwin.h (ENDFILE_SPEC): Link crtfastmath.o + whenever -mdaz-ftz is specified. Don't link crtfastmath.o + when -mno-daz-ftz is specified. + * config/i386/darwin.h (ENDFILE_SPEC): Ditto. + * config/i386/gnu-user-common.h + (GNU_USER_TARGET_MATHFILE_SPEC): Ditto. + * config/i386/mingw32.h (ENDFILE_SPEC): Ditto. + * config/i386/i386.opt (mdaz-ftz): New option. + * doc/invoke.texi (x86 options): Document mftz-daz. +--- + gcc/config/i386/cygwin.h | 2 +- + gcc/config/i386/darwin.h | 4 ++-- + gcc/config/i386/gnu-user-common.h | 2 +- + gcc/config/i386/i386.opt | 4 ++++ + gcc/config/i386/mingw32.h | 2 +- + gcc/doc/invoke.texi | 11 ++++++++++- + 6 files changed, 19 insertions(+), 6 deletions(-) + +diff --git a/gcc/config/i386/cygwin.h b/gcc/config/i386/cygwin.h +index d06eda369..5412c5d44 100644 +--- a/gcc/config/i386/cygwin.h ++++ b/gcc/config/i386/cygwin.h +@@ -57,7 +57,7 @@ along with GCC; see the file COPYING3. If not see + + #undef ENDFILE_SPEC + #define ENDFILE_SPEC \ +- "%{Ofast|ffast-math|funsafe-math-optimizations:crtfastmath.o%s}\ ++ "%{mdaz-ftz:crtfastmath.o%s;Ofast|ffast-math|funsafe-math-optimizations:%{!mno-daz-ftz:crtfastmath.o%s}} \ + %{!shared:%:if-exists(default-manifest.o%s)}\ + %{fvtable-verify=none:%s; \ + fvtable-verify=preinit:vtv_end.o%s; \ +diff --git a/gcc/config/i386/darwin.h b/gcc/config/i386/darwin.h +index a55f6b2b8..2f773924d 100644 +--- a/gcc/config/i386/darwin.h ++++ b/gcc/config/i386/darwin.h +@@ -109,8 +109,8 @@ along with GCC; see the file COPYING3. If not see + "%{!force_cpusubtype_ALL:-force_cpusubtype_ALL} " + + #undef ENDFILE_SPEC +-#define ENDFILE_SPEC \ +- "%{Ofast|ffast-math|funsafe-math-optimizations:crtfastmath.o%s} \ ++#define ENDFILE_SPEC ++\ "%{mdaz-ftz:crtfastmath.o%s;Ofast|ffast-math|funsafe-math-optimizations:%{!mno-daz-ftz:crtfastmath.o%s}} \ + %{mpc32:crtprec32.o%s} \ + %{mpc64:crtprec64.o%s} \ + %{mpc80:crtprec80.o%s}" TM_DESTRUCTOR +diff --git a/gcc/config/i386/gnu-user-common.h b/gcc/config/i386/gnu-user-common.h +index 23b54c5be..3d2a33f17 100644 +--- a/gcc/config/i386/gnu-user-common.h ++++ b/gcc/config/i386/gnu-user-common.h +@@ -47,7 +47,7 @@ along with GCC; see the file COPYING3. If not see + + /* Similar to standard GNU userspace, but adding -ffast-math support. */ + #define GNU_USER_TARGET_MATHFILE_SPEC \ +- "%{Ofast|ffast-math|funsafe-math-optimizations:crtfastmath.o%s} \ ++ "%{mdaz-ftz:crtfastmath.o%s;Ofast|ffast-math|funsafe-math-optimizations:%{!mno-daz-ftz:crtfastmath.o%s}} \ + %{mpc32:crtprec32.o%s} \ + %{mpc64:crtprec64.o%s} \ + %{mpc80:crtprec80.o%s}" +diff --git a/gcc/config/i386/i386.opt b/gcc/config/i386/i386.opt +index fc1b944ac..498fb454d 100644 +--- a/gcc/config/i386/i386.opt ++++ b/gcc/config/i386/i386.opt +@@ -420,6 +420,10 @@ mpc80 + Target RejectNegative + Set 80387 floating-point precision to 80-bit. + ++mdaz-ftz ++Target ++Set the FTZ and DAZ Flags. ++ + mpreferred-stack-boundary= + Target RejectNegative Joined UInteger Var(ix86_preferred_stack_boundary_arg) + Attempt to keep stack aligned to this power of 2. +diff --git a/gcc/config/i386/mingw32.h b/gcc/config/i386/mingw32.h +index d3ca0cd02..ddbe6a405 100644 +--- a/gcc/config/i386/mingw32.h ++++ b/gcc/config/i386/mingw32.h +@@ -197,7 +197,7 @@ along with GCC; see the file COPYING3. If not see + + #undef ENDFILE_SPEC + #define ENDFILE_SPEC \ +- "%{Ofast|ffast-math|funsafe-math-optimizations:crtfastmath.o%s} \ ++ "%{mdaz-ftz:crtfastmath.o%s;Ofast|ffast-math|funsafe-math-optimizations:%{!mno-daz-ftz:crtfastmath.o%s}} \ + %{!shared:%:if-exists(default-manifest.o%s)}\ + %{fvtable-verify=none:%s; \ + fvtable-verify=preinit:vtv_end.o%s; \ +diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi +index 2b376e0e9..3a48655e5 100644 +--- a/gcc/doc/invoke.texi ++++ b/gcc/doc/invoke.texi +@@ -1437,7 +1437,7 @@ See RS/6000 and PowerPC Options. + -m96bit-long-double -mlong-double-64 -mlong-double-80 -mlong-double-128 @gol + -mregparm=@var{num} -msseregparm @gol + -mveclibabi=@var{type} -mvect8-ret-in-mem @gol +--mpc32 -mpc64 -mpc80 -mstackrealign @gol ++-mpc32 -mpc64 -mpc80 -mdaz-ftz -mstackrealign @gol + -momit-leaf-frame-pointer -mno-red-zone -mno-tls-direct-seg-refs @gol + -mcmodel=@var{code-model} -mabi=@var{name} -maddress-mode=@var{mode} @gol + -m32 -m64 -mx32 -m16 -miamcu -mlarge-data-threshold=@var{num} @gol +@@ -32122,6 +32122,15 @@ are enabled by default; routines in such libraries could suffer significant + loss of accuracy, typically through so-called ``catastrophic cancellation'', + when this option is used to set the precision to less than extended precision. + ++@item -mdaz-ftz ++@opindex mdaz-ftz ++ ++The flush-to-zero (FTZ) and denormals-are-zero (DAZ) flags in the MXCSR register ++are used to control floating-point calculations.SSE and AVX instructions ++including scalar and vector instructions could benefit from enabling the FTZ ++and DAZ flags when @option{-mdaz-ftz} is specified. Don't set FTZ/DAZ flags ++when @option{-mno-daz-ftz} is specified. ++ + @item -mstackrealign + @opindex mstackrealign + Realign the stack at entry. On the x86, the @option{-mstackrealign} +-- +2.31.1 +
View file
_service:tar_scm:0256-Explicitly-view_convert_expr-mask-to-signed-type-whe.patch
Added
@@ -0,0 +1,65 @@ +From e70fa730dcfcb3a7b1d56a2e166752d4299f0504 Mon Sep 17 00:00:00 2001 +From: liuhongt <hongtao.liu@intel.com> +Date: Mon, 5 Jun 2023 12:38:41 +0800 +Subject: PATCH 02/28 Explicitly view_convert_expr mask to signed type when + folding pblendvb builtins. + +Since mask < 0 will be always false for vector char when +-funsigned-char, but vpblendvb needs to check the most significant +bit. The patch explicitly VCE to vector signed char. + +gcc/ChangeLog: + + PR target/110108 + * config/i386/i386.cc (ix86_gimple_fold_builtin): Explicitly + view_convert_expr mask to signed type when folding pblendvb + builtins. + +gcc/testsuite/ChangeLog: + + * gcc.target/i386/pr110108-2.c: New test. +--- + gcc/config/i386/i386.cc | 4 +++- + gcc/testsuite/gcc.target/i386/pr110108-2.c | 14 ++++++++++++++ + 2 files changed, 17 insertions(+), 1 deletion(-) + create mode 100644 gcc/testsuite/gcc.target/i386/pr110108-2.c + +diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc +index 462dce10e..479fc6010 100644 +--- a/gcc/config/i386/i386.cc ++++ b/gcc/config/i386/i386.cc +@@ -18396,8 +18396,10 @@ ix86_gimple_fold_builtin (gimple_stmt_iterator *gsi) + tree itype = GET_MODE_INNER (TYPE_MODE (type)) == E_SFmode + ? intSI_type_node : intDI_type_node; + type = get_same_sized_vectype (itype, type); +- arg2 = gimple_build (&stmts, VIEW_CONVERT_EXPR, type, arg2); + } ++ else ++ type = signed_type_for (type); ++ arg2 = gimple_build (&stmts, VIEW_CONVERT_EXPR, type, arg2); + tree zero_vec = build_zero_cst (type); + tree cmp_type = truth_type_for (type); + tree cmp = gimple_build (&stmts, LT_EXPR, cmp_type, arg2, zero_vec); +diff --git a/gcc/testsuite/gcc.target/i386/pr110108-2.c b/gcc/testsuite/gcc.target/i386/pr110108-2.c +new file mode 100644 +index 000000000..2d1d2fd49 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/i386/pr110108-2.c +@@ -0,0 +1,14 @@ ++/* { dg-do compile } */ ++/* { dg-options "-mavx2 -O2 -funsigned-char" } */ ++/* { dg-final { scan-assembler-times "vpblendvb" 2 } } */ ++ ++#include <immintrin.h> ++__m128i do_stuff_128(__m128i X0, __m128i X1, __m128i X2) { ++ __m128i Result = _mm_blendv_epi8(X0, X1, X2); ++ return Result; ++} ++ ++__m256i do_stuff_256(__m256i X0, __m256i X1, __m256i X2) { ++ __m256i Result = _mm256_blendv_epi8(X0, X1, X2); ++ return Result; ++} +-- +2.31.1 +
View file
_service:tar_scm:0257-Make-option-mvzeroupper-independent-of-optimization-.patch
Added
@@ -0,0 +1,138 @@ +From 48715f03ad08f185153bfb0ff4c0802ab2d9579c Mon Sep 17 00:00:00 2001 +From: liuhongt <hongtao.liu@intel.com> +Date: Mon, 26 Jun 2023 09:50:25 +0800 +Subject: PATCH 03/28 Make option mvzeroupper independent of optimization + level. + +pass_insert_vzeroupper is under condition + +TARGET_AVX && TARGET_VZEROUPPER +&& flag_expensive_optimizations && !optimize_size + +But the document of mvzeroupper doesn't mention the insertion +required -O2 and above, it may confuse users when they explicitly +use -Os -mvzeroupper. + +------------ +mvzeroupper +Target Mask(VZEROUPPER) Save +Generate vzeroupper instruction before a transfer of control flow out of +the function. +------------ + +The patch moves flag_expensive_optimizations && !optimize_size to +ix86_option_override_internal. It makes -mvzeroupper independent of +optimization level, but still keeps the behavior of architecture +tuning(emit_vzeroupper) unchanged. + +gcc/ChangeLog: + + * config/i386/i386-features.cc (pass_insert_vzeroupper:gate): + Move flag_expensive_optimizations && !optimize_size to .. + * config/i386/i386-options.cc (ix86_option_override_internal): + .. this, it makes -mvzeroupper independent of optimization + level, but still keeps the behavior of architecture + tuning(emit_vzeroupper) unchanged. + +gcc/testsuite/ChangeLog: + + * gcc.target/i386/avx-vzeroupper-29.c: New testcase. + * gcc.target/i386/avx-vzeroupper-12.c: Adjust testcase. + * gcc.target/i386/avx-vzeroupper-7.c: Ditto. + * gcc.target/i386/avx-vzeroupper-9.c: Ditto. +--- + gcc/config/i386/i386-features.cc | 3 +-- + gcc/config/i386/i386-options.cc | 4 +++- + gcc/testsuite/gcc.target/i386/avx-vzeroupper-12.c | 3 ++- + gcc/testsuite/gcc.target/i386/avx-vzeroupper-29.c | 14 ++++++++++++++ + gcc/testsuite/gcc.target/i386/avx-vzeroupper-7.c | 3 ++- + gcc/testsuite/gcc.target/i386/avx-vzeroupper-9.c | 3 ++- + 6 files changed, 24 insertions(+), 6 deletions(-) + create mode 100644 gcc/testsuite/gcc.target/i386/avx-vzeroupper-29.c + +diff --git a/gcc/config/i386/i386-features.cc b/gcc/config/i386/i386-features.cc +index 6fe41c3c2..6a2444eb6 100644 +--- a/gcc/config/i386/i386-features.cc ++++ b/gcc/config/i386/i386-features.cc +@@ -1875,8 +1875,7 @@ public: + /* opt_pass methods: */ + virtual bool gate (function *) + { +- return TARGET_AVX && TARGET_VZEROUPPER +- && flag_expensive_optimizations && !optimize_size; ++ return TARGET_AVX && TARGET_VZEROUPPER; + } + + virtual unsigned int execute (function *) +diff --git a/gcc/config/i386/i386-options.cc b/gcc/config/i386/i386-options.cc +index ff44ad4e0..74e969b68 100644 +--- a/gcc/config/i386/i386-options.cc ++++ b/gcc/config/i386/i386-options.cc +@@ -2702,7 +2702,9 @@ ix86_option_override_internal (bool main_args_p, + sorry ("%<-mcall-ms2sysv-xlogues%> isn%'t currently supported with SEH"); + + if (!(opts_set->x_target_flags & MASK_VZEROUPPER) +- && TARGET_EMIT_VZEROUPPER) ++ && TARGET_EMIT_VZEROUPPER ++ && flag_expensive_optimizations ++ && !optimize_size) + opts->x_target_flags |= MASK_VZEROUPPER; + if (!(opts_set->x_target_flags & MASK_STV)) + opts->x_target_flags |= MASK_STV; +diff --git a/gcc/testsuite/gcc.target/i386/avx-vzeroupper-12.c b/gcc/testsuite/gcc.target/i386/avx-vzeroupper-12.c +index e694d4048..5a40e8783 100644 +--- a/gcc/testsuite/gcc.target/i386/avx-vzeroupper-12.c ++++ b/gcc/testsuite/gcc.target/i386/avx-vzeroupper-12.c +@@ -16,5 +16,6 @@ foo () + _mm256_zeroupper (); + } + +-/* { dg-final { scan-assembler-times "avx_vzeroupper" 4 } } */ ++/* { dg-final { scan-assembler-times "avx_vzeroupper" 4 { target ia32 } } } */ ++/* { dg-final { scan-assembler-times "avx_vzeroupper" 5 { target { ! ia32 } } } } */ + /* { dg-final { scan-assembler-times "\\*avx_vzeroall" 1 } } */ +diff --git a/gcc/testsuite/gcc.target/i386/avx-vzeroupper-29.c b/gcc/testsuite/gcc.target/i386/avx-vzeroupper-29.c +new file mode 100644 +index 000000000..4af637757 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/i386/avx-vzeroupper-29.c +@@ -0,0 +1,14 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O0 -mavx -mtune=generic -mvzeroupper -dp" } */ ++ ++#include <immintrin.h> ++ ++extern __m256 x, y; ++ ++void ++foo () ++{ ++ x = y; ++} ++ ++/* { dg-final { scan-assembler-times "avx_vzeroupper" 1 } } */ +diff --git a/gcc/testsuite/gcc.target/i386/avx-vzeroupper-7.c b/gcc/testsuite/gcc.target/i386/avx-vzeroupper-7.c +index ab6d68779..75fe58897 100644 +--- a/gcc/testsuite/gcc.target/i386/avx-vzeroupper-7.c ++++ b/gcc/testsuite/gcc.target/i386/avx-vzeroupper-7.c +@@ -12,4 +12,5 @@ foo () + _mm256_zeroupper (); + } + +-/* { dg-final { scan-assembler-times "avx_vzeroupper" 1 } } */ ++/* { dg-final { scan-assembler-times "avx_vzeroupper" 1 { target ia32 } } } */ ++/* { dg-final { scan-assembler-times "avx_vzeroupper" 2 { target { ! ia32 } } } } */ +diff --git a/gcc/testsuite/gcc.target/i386/avx-vzeroupper-9.c b/gcc/testsuite/gcc.target/i386/avx-vzeroupper-9.c +index 974e1626a..fa0a6dfca 100644 +--- a/gcc/testsuite/gcc.target/i386/avx-vzeroupper-9.c ++++ b/gcc/testsuite/gcc.target/i386/avx-vzeroupper-9.c +@@ -15,4 +15,5 @@ foo () + _mm256_zeroupper (); + } + +-/* { dg-final { scan-assembler-times "avx_vzeroupper" 4 } } */ ++/* { dg-final { scan-assembler-times "avx_vzeroupper" 4 { target ia32 } } } */ ++/* { dg-final { scan-assembler-times "avx_vzeroupper" 5 { target { ! ia32 } } } } */ +-- +2.31.1 +
View file
_service:tar_scm:0258-i386-Sync-tune_string-with-arch_string-for-target-at.patch
Added
@@ -0,0 +1,68 @@ +From 8039d773354360ed8ff2f25c63843fc637eacc67 Mon Sep 17 00:00:00 2001 +From: Hongyu Wang <hongyu.wang@intel.com> +Date: Sun, 25 Jun 2023 09:50:21 +0800 +Subject: PATCH 04/28 i386: Sync tune_string with arch_string for target + attribute + +arch=* + +For function with target attribute arch=*, current logic will set its +tune to -mtune from command line so all target_clones will get same +tuning flags which would affect the performance for each clone. Override +tune with arch if tune was not explicitly specified to get proper tuning +flags for target_clones. + +gcc/ChangeLog: + + * config/i386/i386-options.cc (ix86_valid_target_attribute_tree): + Override tune_string with arch_string if tune_string is not + explicitly specified. + +gcc/testsuite/ChangeLog: + + * gcc.target/i386/mvc17.c: New test. + +(cherry picked from commit 2916278d14e9ac28c361c396a67256acbebda6e8) +--- + gcc/config/i386/i386-options.cc | 6 +++++- + gcc/testsuite/gcc.target/i386/mvc17.c | 11 +++++++++++ + 2 files changed, 16 insertions(+), 1 deletion(-) + create mode 100644 gcc/testsuite/gcc.target/i386/mvc17.c + +diff --git a/gcc/config/i386/i386-options.cc b/gcc/config/i386/i386-options.cc +index 74e969b68..fb2ed942f 100644 +--- a/gcc/config/i386/i386-options.cc ++++ b/gcc/config/i386/i386-options.cc +@@ -1378,7 +1378,11 @@ ix86_valid_target_attribute_tree (tree fndecl, tree args, + if (option_stringsIX86_FUNCTION_SPECIFIC_TUNE) + opts->x_ix86_tune_string + = ggc_strdup (option_stringsIX86_FUNCTION_SPECIFIC_TUNE); +- else if (orig_tune_defaulted) ++ /* If we have explicit arch string and no tune string specified, set ++ tune_string to NULL and later it will be overriden by arch_string ++ so target clones can get proper optimization. */ ++ else if (option_stringsIX86_FUNCTION_SPECIFIC_ARCH ++ || orig_tune_defaulted) + opts->x_ix86_tune_string = NULL; + + /* If fpmath= is not set, and we now have sse2 on 32-bit, use it. */ +diff --git a/gcc/testsuite/gcc.target/i386/mvc17.c b/gcc/testsuite/gcc.target/i386/mvc17.c +new file mode 100644 +index 000000000..8b83c1aec +--- /dev/null ++++ b/gcc/testsuite/gcc.target/i386/mvc17.c +@@ -0,0 +1,11 @@ ++/* { dg-do compile } */ ++/* { dg-require-ifunc "" } */ ++/* { dg-options "-O2 -march=x86-64" } */ ++/* { dg-final { scan-assembler-times "rep mov" 1 } } */ ++ ++__attribute__((target_clones("default","arch=icelake-server"))) ++void ++foo (char *a, char *b, int size) ++{ ++ __builtin_memcpy (a, b, size & 0x7F); ++} +-- +2.31.1 +
View file
_service:tar_scm:0259-Refine-maskloadmn-pattern-with-UNSPEC_MASKLOAD.patch
Added
@@ -0,0 +1,111 @@ +From fbcb1a5899b1bd3964aed78ed74041121e618d36 Mon Sep 17 00:00:00 2001 +From: liuhongt <hongtao.liu@intel.com> +Date: Tue, 20 Jun 2023 15:41:00 +0800 +Subject: PATCH 05/28 Refine maskloadmn pattern with UNSPEC_MASKLOAD. + +If mem_addr points to a memory region with less than whole vector size +bytes of accessible memory and k is a mask that would prevent reading +the inaccessible bytes from mem_addr, add UNSPEC_MASKLOAD to prevent +it to be transformed to vpblendd. + +gcc/ChangeLog: + + PR target/110309 + * config/i386/sse.md (maskload<mode><avx512fmaskmodelower>): + Refine pattern with UNSPEC_MASKLOAD. + (maskload<mode><avx512fmaskmodelower>): Ditto. + (*<avx512>_load<mode>_mask): Extend mode iterator to + VI12HF_AVX512VL. + (*<avx512>_load<mode>): Ditto. + +gcc/testsuite/ChangeLog: + + * gcc.target/i386/pr110309.c: New test. +--- + gcc/config/i386/sse.md | 32 +++++++++++++----------- + gcc/testsuite/gcc.target/i386/pr110309.c | 10 ++++++++ + 2 files changed, 28 insertions(+), 14 deletions(-) + create mode 100644 gcc/testsuite/gcc.target/i386/pr110309.c + +diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md +index eb767e56c..b30e96cb1 100644 +--- a/gcc/config/i386/sse.md ++++ b/gcc/config/i386/sse.md +@@ -1411,12 +1411,12 @@ + }) + + (define_insn "*<avx512>_load<mode>_mask" +- (set (match_operand:VI12_AVX512VL 0 "register_operand" "=v") +- (vec_merge:VI12_AVX512VL +- (unspec:VI12_AVX512VL +- (match_operand:VI12_AVX512VL 1 "memory_operand" "m") ++ (set (match_operand:VI12HF_AVX512VL 0 "register_operand" "=v") ++ (vec_merge:VI12HF_AVX512VL ++ (unspec:VI12HF_AVX512VL ++ (match_operand:VI12HF_AVX512VL 1 "memory_operand" "m") + UNSPEC_MASKLOAD) +- (match_operand:VI12_AVX512VL 2 "nonimm_or_0_operand" "0C") ++ (match_operand:VI12HF_AVX512VL 2 "nonimm_or_0_operand" "0C") + (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk"))) + "TARGET_AVX512BW" + "vmovdqu<ssescalarsize>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}" +@@ -1425,9 +1425,9 @@ + (set_attr "mode" "<sseinsnmode>")) + + (define_insn_and_split "*<avx512>_load<mode>" +- (set (match_operand:VI12_AVX512VL 0 "register_operand" "=v") +- (unspec:VI12_AVX512VL +- (match_operand:VI12_AVX512VL 1 "memory_operand" "m") ++ (set (match_operand:VI12HF_AVX512VL 0 "register_operand" "=v") ++ (unspec:VI12HF_AVX512VL ++ (match_operand:VI12HF_AVX512VL 1 "memory_operand" "m") + UNSPEC_MASKLOAD)) + "TARGET_AVX512BW" + "#" +@@ -25973,17 +25973,21 @@ + "TARGET_AVX") + + (define_expand "maskload<mode><avx512fmaskmodelower>" +- (set (match_operand:V48H_AVX512VL 0 "register_operand") +- (vec_merge:V48H_AVX512VL +- (match_operand:V48H_AVX512VL 1 "memory_operand") ++ (set (match_operand:V48_AVX512VL 0 "register_operand") ++ (vec_merge:V48_AVX512VL ++ (unspec:V48_AVX512VL ++ (match_operand:V48_AVX512VL 1 "memory_operand") ++ UNSPEC_MASKLOAD) + (match_dup 0) + (match_operand:<avx512fmaskmode> 2 "register_operand"))) + "TARGET_AVX512F") + + (define_expand "maskload<mode><avx512fmaskmodelower>" +- (set (match_operand:VI12_AVX512VL 0 "register_operand") +- (vec_merge:VI12_AVX512VL +- (match_operand:VI12_AVX512VL 1 "memory_operand") ++ (set (match_operand:VI12HF_AVX512VL 0 "register_operand") ++ (vec_merge:VI12HF_AVX512VL ++ (unspec:VI12HF_AVX512VL ++ (match_operand:VI12HF_AVX512VL 1 "memory_operand") ++ UNSPEC_MASKLOAD) + (match_dup 0) + (match_operand:<avx512fmaskmode> 2 "register_operand"))) + "TARGET_AVX512BW") +diff --git a/gcc/testsuite/gcc.target/i386/pr110309.c b/gcc/testsuite/gcc.target/i386/pr110309.c +new file mode 100644 +index 000000000..f6e9e9c3c +--- /dev/null ++++ b/gcc/testsuite/gcc.target/i386/pr110309.c +@@ -0,0 +1,10 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O3 --param vect-partial-vector-usage=1 -march=znver4 -mprefer-vector-width=256" } */ ++/* { dg-final { scan-assembler-not {(?n)vpblendd.*ymm} } } */ ++ ++ ++void foo (int * __restrict a, int *b) ++{ ++ for (int i = 0; i < 6; ++i) ++ ai = bi + 42; ++} +-- +2.31.1 +
View file
_service:tar_scm:0260-Refine-maskstore-patterns-with-UNSPEC_MASKMOV.patch
Added
@@ -0,0 +1,126 @@ +From 5ad28ef4010c1248b4d94396d03f863705f7b0db Mon Sep 17 00:00:00 2001 +From: liuhongt <hongtao.liu@intel.com> +Date: Mon, 26 Jun 2023 21:07:09 +0800 +Subject: PATCH 06/28 Refine maskstore patterns with UNSPEC_MASKMOV. + +Similar like r14-2070-gc79476da46728e + +If mem_addr points to a memory region with less than whole vector size +bytes of accessible memory and k is a mask that would prevent reading +the inaccessible bytes from mem_addr, add UNSPEC_MASKMOV to prevent +it to be transformed to any other whole memory access instructions. + +gcc/ChangeLog: + + PR rtl-optimization/110237 + * config/i386/sse.md (<avx512>_store<mode>_mask): Refine with + UNSPEC_MASKMOV. + (maskstore<mode><avx512fmaskmodelower): Ditto. + (*<avx512>_store<mode>_mask): New define_insn, it's renamed + from original <avx512>_store<mode>_mask. +--- + gcc/config/i386/sse.md | 69 ++++++++++++++++++++++++++++++++++-------- + 1 file changed, 57 insertions(+), 12 deletions(-) + +diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md +index b30e96cb1..3af159896 100644 +--- a/gcc/config/i386/sse.md ++++ b/gcc/config/i386/sse.md +@@ -1554,7 +1554,7 @@ + (set_attr "prefix" "evex") + (set_attr "mode" "<sseinsnmode>")) + +-(define_insn "<avx512>_store<mode>_mask" ++(define_insn "*<avx512>_store<mode>_mask" + (set (match_operand:V48_AVX512VL 0 "memory_operand" "=m") + (vec_merge:V48_AVX512VL + (match_operand:V48_AVX512VL 1 "register_operand" "v") +@@ -1582,7 +1582,7 @@ + (set_attr "memory" "store") + (set_attr "mode" "<sseinsnmode>")) + +-(define_insn "<avx512>_store<mode>_mask" ++(define_insn "*<avx512>_store<mode>_mask" + (set (match_operand:VI12HF_AVX512VL 0 "memory_operand" "=m") + (vec_merge:VI12HF_AVX512VL + (match_operand:VI12HF_AVX512VL 1 "register_operand" "v") +@@ -26002,21 +26002,66 @@ + "TARGET_AVX") + + (define_expand "maskstore<mode><avx512fmaskmodelower>" +- (set (match_operand:V48H_AVX512VL 0 "memory_operand") +- (vec_merge:V48H_AVX512VL +- (match_operand:V48H_AVX512VL 1 "register_operand") +- (match_dup 0) +- (match_operand:<avx512fmaskmode> 2 "register_operand"))) ++ (set (match_operand:V48_AVX512VL 0 "memory_operand") ++ (unspec:V48_AVX512VL ++ (match_operand:V48_AVX512VL 1 "register_operand") ++ (match_dup 0) ++ (match_operand:<avx512fmaskmode> 2 "register_operand") ++ UNSPEC_MASKMOV)) + "TARGET_AVX512F") + + (define_expand "maskstore<mode><avx512fmaskmodelower>" +- (set (match_operand:VI12_AVX512VL 0 "memory_operand") +- (vec_merge:VI12_AVX512VL +- (match_operand:VI12_AVX512VL 1 "register_operand") +- (match_dup 0) +- (match_operand:<avx512fmaskmode> 2 "register_operand"))) ++ (set (match_operand:VI12HF_AVX512VL 0 "memory_operand") ++ (unspec:VI12HF_AVX512VL ++ (match_operand:VI12HF_AVX512VL 1 "register_operand") ++ (match_dup 0) ++ (match_operand:<avx512fmaskmode> 2 "register_operand") ++ UNSPEC_MASKMOV)) + "TARGET_AVX512BW") + ++(define_insn "<avx512>_store<mode>_mask" ++ (set (match_operand:V48_AVX512VL 0 "memory_operand" "=m") ++ (unspec:V48_AVX512VL ++ (match_operand:V48_AVX512VL 1 "register_operand" "v") ++ (match_dup 0) ++ (match_operand:<avx512fmaskmode> 2 "register_operand" "Yk") ++ UNSPEC_MASKMOV)) ++ "TARGET_AVX512F" ++{ ++ if (FLOAT_MODE_P (GET_MODE_INNER (<MODE>mode))) ++ { ++ if (misaligned_operand (operands0, <MODE>mode)) ++ return "vmovu<ssemodesuffix>\t{%1, %0%{%2%}|%0%{%2%}, %1}"; ++ else ++ return "vmova<ssemodesuffix>\t{%1, %0%{%2%}|%0%{%2%}, %1}"; ++ } ++ else ++ { ++ if (misaligned_operand (operands0, <MODE>mode)) ++ return "vmovdqu<ssescalarsize>\t{%1, %0%{%2%}|%0%{%2%}, %1}"; ++ else ++ return "vmovdqa<ssescalarsize>\t{%1, %0%{%2%}|%0%{%2%}, %1}"; ++ } ++} ++ (set_attr "type" "ssemov") ++ (set_attr "prefix" "evex") ++ (set_attr "memory" "store") ++ (set_attr "mode" "<sseinsnmode>")) ++ ++(define_insn "<avx512>_store<mode>_mask" ++ (set (match_operand:VI12HF_AVX512VL 0 "memory_operand" "=m") ++ (unspec:VI12HF_AVX512VL ++ (match_operand:VI12HF_AVX512VL 1 "register_operand" "v") ++ (match_dup 0) ++ (match_operand:<avx512fmaskmode> 2 "register_operand" "Yk") ++ UNSPEC_MASKMOV)) ++ "TARGET_AVX512BW" ++ "vmovdqu<ssescalarsize>\t{%1, %0%{%2%}|%0%{%2%}, %1}" ++ (set_attr "type" "ssemov") ++ (set_attr "prefix" "evex") ++ (set_attr "memory" "store") ++ (set_attr "mode" "<sseinsnmode>")) ++ + (define_expand "cbranch<mode>4" + (set (reg:CC FLAGS_REG) + (compare:CC (match_operand:VI48_AVX 1 "register_operand") +-- +2.31.1 +
View file
_service:tar_scm:0261-x86-Update-model-values-for-Alderlake-and-Rocketlake.patch
Added
@@ -0,0 +1,38 @@ +From 50757adc93ef32a97a8a1083f5d53a9c00da6ac8 Mon Sep 17 00:00:00 2001 +From: "Cui, Lili" <lili.cui@intel.com> +Date: Thu, 29 Jun 2023 03:10:35 +0000 +Subject: PATCH 07/28 x86: Update model values for Alderlake and Rocketlake. + +Update model values for Alderlake and Rocketlake according to SDM. + +gcc/ChangeLog + + * common/config/i386/cpuinfo.h (get_intel_cpu): Remove model value 0xa8 + from Rocketlake, remove model value 0xbf from Alderlake. +--- + gcc/common/config/i386/cpuinfo.h | 2 -- + 1 file changed, 2 deletions(-) + +diff --git a/gcc/common/config/i386/cpuinfo.h b/gcc/common/config/i386/cpuinfo.h +index 0333da56b..28b2ff0b0 100644 +--- a/gcc/common/config/i386/cpuinfo.h ++++ b/gcc/common/config/i386/cpuinfo.h +@@ -435,7 +435,6 @@ get_intel_cpu (struct __processor_model *cpu_model, + cpu_model->__cpu_subtype = INTEL_COREI7_SKYLAKE; + break; + case 0xa7: +- case 0xa8: + /* Rocket Lake. */ + cpu = "rocketlake"; + CHECK___builtin_cpu_is ("corei7"); +@@ -508,7 +507,6 @@ get_intel_cpu (struct __processor_model *cpu_model, + break; + case 0x97: + case 0x9a: +- case 0xbf: + /* Alder Lake. */ + cpu = "alderlake"; + CHECK___builtin_cpu_is ("corei7"); +-- +2.31.1 +
View file
_service:tar_scm:0262-Workaround-possible-CPUID-bug-in-Sandy-Bridge.patch
Added
@@ -0,0 +1,78 @@ +From 60364b439a80c217174e1830e0b7507d6f4538c4 Mon Sep 17 00:00:00 2001 +From: liuhongt <hongtao.liu@intel.com> +Date: Fri, 4 Aug 2023 09:27:39 +0800 +Subject: PATCH 08/28 Workaround possible CPUID bug in Sandy Bridge. + +Don't access leaf 7 subleaf 1 unless subleaf 0 says it is +supported via EAX. + +Intel documentation says invalid subleaves return 0. We had been +relying on that behavior instead of checking the max sublef number. + +It appears that some Sandy Bridge CPUs return at least the subleaf 0 +EDX value for subleaf 1. Best guess is that this is a bug in a +microcode patch since all of the bits we're seeing set in EDX were +introduced after Sandy Bridge was originally released. + +This is causing avxvnniint16 to be incorrectly enabled with +-march=native on these CPUs. + +gcc/ChangeLog: + + * common/config/i386/cpuinfo.h (get_available_features): Check + max_subleaf_level for valid subleaf before use CPUID. +--- + gcc/common/config/i386/cpuinfo.h | 29 +++++++++++++++++------------ + 1 file changed, 17 insertions(+), 12 deletions(-) + +diff --git a/gcc/common/config/i386/cpuinfo.h b/gcc/common/config/i386/cpuinfo.h +index 28b2ff0b0..316ad3cb3 100644 +--- a/gcc/common/config/i386/cpuinfo.h ++++ b/gcc/common/config/i386/cpuinfo.h +@@ -647,7 +647,9 @@ get_available_features (struct __processor_model *cpu_model, + /* Get Advanced Features at level 7 (eax = 7, ecx = 0/1). */ + if (max_cpuid_level >= 7) + { +- __cpuid_count (7, 0, eax, ebx, ecx, edx); ++ unsigned int max_subleaf_level; ++ ++ __cpuid_count (7, 0, max_subleaf_level, ebx, ecx, edx); + if (ebx & bit_BMI) + set_feature (FEATURE_BMI); + if (ebx & bit_SGX) +@@ -759,18 +761,21 @@ get_available_features (struct __processor_model *cpu_model, + set_feature (FEATURE_AVX512FP16); + } + +- __cpuid_count (7, 1, eax, ebx, ecx, edx); +- if (eax & bit_HRESET) +- set_feature (FEATURE_HRESET); +- if (avx_usable) +- { +- if (eax & bit_AVXVNNI) +- set_feature (FEATURE_AVXVNNI); +- } +- if (avx512_usable) ++ if (max_subleaf_level >= 1) + { +- if (eax & bit_AVX512BF16) +- set_feature (FEATURE_AVX512BF16); ++ __cpuid_count (7, 1, eax, ebx, ecx, edx); ++ if (eax & bit_HRESET) ++ set_feature (FEATURE_HRESET); ++ if (avx_usable) ++ { ++ if (eax & bit_AVXVNNI) ++ set_feature (FEATURE_AVXVNNI); ++ } ++ if (avx512_usable) ++ { ++ if (eax & bit_AVX512BF16) ++ set_feature (FEATURE_AVX512BF16); ++ } + } + } + +-- +2.31.1 +
View file
_service:tar_scm:0263-Software-mitigation-Disable-gather-generation-in-vec.patch
Added
@@ -0,0 +1,220 @@ +From cfffbec938afdc45c31db5ec282ce21ad1ba2dc7 Mon Sep 17 00:00:00 2001 +From: liuhongt <hongtao.liu@intel.com> +Date: Thu, 10 Aug 2023 11:41:39 +0800 +Subject: PATCH 09/28 Software mitigation: Disable gather generation in + vectorization for GDS affected Intel Processors. + +For more details of GDS (Gather Data Sampling), refer to +https://www.intel.com/content/www/us/en/developer/articles/technical/software-security-guidance/advisory-guidance/gather-data-sampling.html + +After microcode update, there's performance regression. To avoid that, +the patch disables gather generation in autovectorization but uses +gather scalar emulation instead. + +gcc/ChangeLog: + + * config/i386/i386-options.cc (m_GDS): New macro. + * config/i386/x86-tune.def (X86_TUNE_USE_GATHER_2PARTS): Don't + enable for m_GDS. + (X86_TUNE_USE_GATHER_4PARTS): Ditto. + (X86_TUNE_USE_GATHER): Ditto. + +gcc/testsuite/ChangeLog: + + * gcc.target/i386/avx2-gather-2.c: Adjust options to keep + gather vectorization. + * gcc.target/i386/avx2-gather-6.c: Ditto. + * gcc.target/i386/avx512f-pr88464-1.c: Ditto. + * gcc.target/i386/avx512f-pr88464-5.c: Ditto. + * gcc.target/i386/avx512vl-pr88464-1.c: Ditto. + * gcc.target/i386/avx512vl-pr88464-11.c: Ditto. + * gcc.target/i386/avx512vl-pr88464-3.c: Ditto. + * gcc.target/i386/avx512vl-pr88464-9.c: Ditto. + * gcc.target/i386/pr88531-1b.c: Ditto. + * gcc.target/i386/pr88531-1c.c: Ditto. + +(cherry picked from commit 3064d1f5c48cb6ce1b4133570dd08ecca8abb52d) +--- + gcc/config/i386/i386-options.cc | 5 +++++ + gcc/config/i386/x86-tune.def | 9 ++++++--- + gcc/testsuite/gcc.target/i386/avx2-gather-2.c | 2 +- + gcc/testsuite/gcc.target/i386/avx2-gather-6.c | 2 +- + gcc/testsuite/gcc.target/i386/avx512f-pr88464-1.c | 2 +- + gcc/testsuite/gcc.target/i386/avx512f-pr88464-5.c | 2 +- + gcc/testsuite/gcc.target/i386/avx512vl-pr88464-1.c | 2 +- + gcc/testsuite/gcc.target/i386/avx512vl-pr88464-11.c | 2 +- + gcc/testsuite/gcc.target/i386/avx512vl-pr88464-3.c | 2 +- + gcc/testsuite/gcc.target/i386/avx512vl-pr88464-9.c | 2 +- + gcc/testsuite/gcc.target/i386/pr88531-1b.c | 2 +- + gcc/testsuite/gcc.target/i386/pr88531-1c.c | 2 +- + 12 files changed, 21 insertions(+), 13 deletions(-) + +diff --git a/gcc/config/i386/i386-options.cc b/gcc/config/i386/i386-options.cc +index fb2ed942f..9617fc162 100644 +--- a/gcc/config/i386/i386-options.cc ++++ b/gcc/config/i386/i386-options.cc +@@ -137,6 +137,11 @@ along with GCC; see the file COPYING3. If not see + #define m_GOLDMONT_PLUS (HOST_WIDE_INT_1U<<PROCESSOR_GOLDMONT_PLUS) + #define m_TREMONT (HOST_WIDE_INT_1U<<PROCESSOR_TREMONT) + #define m_INTEL (HOST_WIDE_INT_1U<<PROCESSOR_INTEL) ++/* Gather Data Sampling / CVE-2022-40982 / INTEL-SA-00828. ++ Software mitigation. */ ++#define m_GDS (m_SKYLAKE | m_SKYLAKE_AVX512 | m_CANNONLAKE \ ++ | m_ICELAKE_CLIENT | m_ICELAKE_SERVER | m_CASCADELAKE \ ++ | m_TIGERLAKE | m_COOPERLAKE | m_ROCKETLAKE) + + #define m_GEODE (HOST_WIDE_INT_1U<<PROCESSOR_GEODE) + #define m_K6 (HOST_WIDE_INT_1U<<PROCESSOR_K6) +diff --git a/gcc/config/i386/x86-tune.def b/gcc/config/i386/x86-tune.def +index e6b9e2125..4392709fc 100644 +--- a/gcc/config/i386/x86-tune.def ++++ b/gcc/config/i386/x86-tune.def +@@ -467,7 +467,8 @@ DEF_TUNE (X86_TUNE_AVOID_4BYTE_PREFIXES, "avoid_4byte_prefixes", + /* X86_TUNE_USE_GATHER_2PARTS: Use gather instructions for vectors with 2 + elements. */ + DEF_TUNE (X86_TUNE_USE_GATHER_2PARTS, "use_gather_2parts", +- ~(m_ZNVER1 | m_ZNVER2 | m_ZNVER3 | m_ZNVER4 | m_ALDERLAKE | m_GENERIC)) ++ ~(m_ZNVER1 | m_ZNVER2 | m_ZNVER3 | m_ZNVER4 | m_ALDERLAKE ++ | m_GENERIC | m_GDS)) + + /* X86_TUNE_USE_SCATTER_2PARTS: Use scater instructions for vectors with 2 + elements. */ +@@ -477,7 +478,8 @@ DEF_TUNE (X86_TUNE_USE_SCATTER_2PARTS, "use_scatter_2parts", + /* X86_TUNE_USE_GATHER_4PARTS: Use gather instructions for vectors with 4 + elements. */ + DEF_TUNE (X86_TUNE_USE_GATHER_4PARTS, "use_gather_4parts", +- ~(m_ZNVER1 | m_ZNVER2 | m_ZNVER3 | m_ZNVER4 | m_ALDERLAKE | m_GENERIC)) ++ ~(m_ZNVER1 | m_ZNVER2 | m_ZNVER3 | m_ZNVER4 | m_ALDERLAKE ++ | m_GENERIC | m_GDS)) + + /* X86_TUNE_USE_SCATTER_4PARTS: Use scater instructions for vectors with 4 + elements. */ +@@ -487,7 +489,8 @@ DEF_TUNE (X86_TUNE_USE_SCATTER_4PARTS, "use_scatter_4parts", + /* X86_TUNE_USE_GATHER: Use gather instructions for vectors with 8 or more + elements. */ + DEF_TUNE (X86_TUNE_USE_GATHER, "use_gather", +- ~(m_ZNVER1 | m_ZNVER2 | m_ZNVER4 | m_ALDERLAKE | m_GENERIC)) ++ ~(m_ZNVER1 | m_ZNVER2 | m_ZNVER4 | m_ALDERLAKE ++ | m_GENERIC | m_GDS)) + + /* X86_TUNE_USE_SCATTER: Use scater instructions for vectors with 8 or more + elements. */ +diff --git a/gcc/testsuite/gcc.target/i386/avx2-gather-2.c b/gcc/testsuite/gcc.target/i386/avx2-gather-2.c +index ad5ef7310..978924b0f 100644 +--- a/gcc/testsuite/gcc.target/i386/avx2-gather-2.c ++++ b/gcc/testsuite/gcc.target/i386/avx2-gather-2.c +@@ -1,5 +1,5 @@ + /* { dg-do compile } */ +-/* { dg-options "-O3 -fdump-tree-vect-details -march=skylake" } */ ++/* { dg-options "-O3 -fdump-tree-vect-details -march=skylake -mtune=haswell" } */ + + #include "avx2-gather-1.c" + +diff --git a/gcc/testsuite/gcc.target/i386/avx2-gather-6.c b/gcc/testsuite/gcc.target/i386/avx2-gather-6.c +index b9119581a..067b251e3 100644 +--- a/gcc/testsuite/gcc.target/i386/avx2-gather-6.c ++++ b/gcc/testsuite/gcc.target/i386/avx2-gather-6.c +@@ -1,5 +1,5 @@ + /* { dg-do compile } */ +-/* { dg-options "-O3 -mavx2 -fno-common -fdump-tree-vect-details -mtune=skylake" } */ ++/* { dg-options "-O3 -mavx2 -fno-common -fdump-tree-vect-details -mtune=haswell" } */ + + #include "avx2-gather-5.c" + +diff --git a/gcc/testsuite/gcc.target/i386/avx512f-pr88464-1.c b/gcc/testsuite/gcc.target/i386/avx512f-pr88464-1.c +index 06d21bb01..d1a229861 100644 +--- a/gcc/testsuite/gcc.target/i386/avx512f-pr88464-1.c ++++ b/gcc/testsuite/gcc.target/i386/avx512f-pr88464-1.c +@@ -1,6 +1,6 @@ + /* PR tree-optimization/88464 */ + /* { dg-do compile } */ +-/* { dg-options "-O3 -mavx512f -mprefer-vector-width=512 -mtune=skylake-avx512 -fdump-tree-vect-details" } */ ++/* { dg-options "-O3 -mavx512f -mprefer-vector-width=512 -mtune=haswell -fdump-tree-vect-details" } */ + /* { dg-final { scan-tree-dump-times "loop vectorized using 64 byte vectors" 4 "vect" } } */ + /* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 4 "vect" } } */ + +diff --git a/gcc/testsuite/gcc.target/i386/avx512f-pr88464-5.c b/gcc/testsuite/gcc.target/i386/avx512f-pr88464-5.c +index 462e951fd..d7b0b2b28 100644 +--- a/gcc/testsuite/gcc.target/i386/avx512f-pr88464-5.c ++++ b/gcc/testsuite/gcc.target/i386/avx512f-pr88464-5.c +@@ -1,6 +1,6 @@ + /* PR tree-optimization/88464 */ + /* { dg-do compile } */ +-/* { dg-options "-O3 -mavx512f -mprefer-vector-width=512 -mtune=skylake-avx512 -fdump-tree-vect-details" } */ ++/* { dg-options "-O3 -mavx512f -mprefer-vector-width=512 -mtune=haswell -fdump-tree-vect-details" } */ + /* { dg-final { scan-tree-dump-times "loop vectorized using 64 byte vectors" 4 "vect" } } */ + /* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 4 "vect" } } */ + +diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-pr88464-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-pr88464-1.c +index 55a28dddb..07439185e 100644 +--- a/gcc/testsuite/gcc.target/i386/avx512vl-pr88464-1.c ++++ b/gcc/testsuite/gcc.target/i386/avx512vl-pr88464-1.c +@@ -1,6 +1,6 @@ + /* PR tree-optimization/88464 */ + /* { dg-do compile } */ +-/* { dg-options "-O3 -mavx512vl -mprefer-vector-width=256 -mtune=skylake-avx512 -fdump-tree-vect-details" } */ ++/* { dg-options "-O3 -mavx512vl -mprefer-vector-width=256 -mtune=haswell -fdump-tree-vect-details" } */ + /* { dg-final { scan-tree-dump-times "loop vectorized using 32 byte vectors" 4 "vect" } } */ + /* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 4 "vect" } } */ + +diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-pr88464-11.c b/gcc/testsuite/gcc.target/i386/avx512vl-pr88464-11.c +index 969600885..3a9810827 100644 +--- a/gcc/testsuite/gcc.target/i386/avx512vl-pr88464-11.c ++++ b/gcc/testsuite/gcc.target/i386/avx512vl-pr88464-11.c +@@ -1,6 +1,6 @@ + /* PR tree-optimization/88464 */ + /* { dg-do compile } */ +-/* { dg-options "-O3 -mavx512vl -mprefer-vector-width=128 -mtune=skylake-avx512 -fdump-tree-vect-details" } */ ++/* { dg-options "-O3 -mavx512vl -mprefer-vector-width=128 -mtune=haswell -fdump-tree-vect-details" } */ + /* { dg-final { scan-tree-dump-times "loop vectorized using 16 byte vectors" 4 "vect" } } */ + /* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 4 "vect" } } */ + +diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-pr88464-3.c b/gcc/testsuite/gcc.target/i386/avx512vl-pr88464-3.c +index 6b0c8a859..ac669e048 100644 +--- a/gcc/testsuite/gcc.target/i386/avx512vl-pr88464-3.c ++++ b/gcc/testsuite/gcc.target/i386/avx512vl-pr88464-3.c +@@ -1,6 +1,6 @@ + /* PR tree-optimization/88464 */ + /* { dg-do compile } */ +-/* { dg-options "-O3 -mavx512vl -mprefer-vector-width=128 -mtune=skylake-avx512 -fdump-tree-vect-details" } */ ++/* { dg-options "-O3 -mavx512vl -mprefer-vector-width=128 -mtune=haswell -fdump-tree-vect-details" } */ + /* { dg-final { scan-tree-dump-times "loop vectorized using 16 byte vectors" 4 "vect" } } */ + /* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 4 "vect" } } */ + +diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-pr88464-9.c b/gcc/testsuite/gcc.target/i386/avx512vl-pr88464-9.c +index 3af568ab3..14a1083b6 100644 +--- a/gcc/testsuite/gcc.target/i386/avx512vl-pr88464-9.c ++++ b/gcc/testsuite/gcc.target/i386/avx512vl-pr88464-9.c +@@ -1,6 +1,6 @@ + /* PR tree-optimization/88464 */ + /* { dg-do compile } */ +-/* { dg-options "-O3 -mavx512vl -mprefer-vector-width=256 -mtune=skylake-avx512 -fdump-tree-vect-details" } */ ++/* { dg-options "-O3 -mavx512vl -mprefer-vector-width=256 -mtune=haswell -fdump-tree-vect-details" } */ + /* { dg-final { scan-tree-dump-times "loop vectorized using 32 byte vectors" 4 "vect" } } */ + /* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 4 "vect" } } */ + +diff --git a/gcc/testsuite/gcc.target/i386/pr88531-1b.c b/gcc/testsuite/gcc.target/i386/pr88531-1b.c +index 812c8a10f..e6df789de 100644 +--- a/gcc/testsuite/gcc.target/i386/pr88531-1b.c ++++ b/gcc/testsuite/gcc.target/i386/pr88531-1b.c +@@ -1,5 +1,5 @@ + /* { dg-do compile } */ +-/* { dg-options "-O3 -march=skylake -mfpmath=sse" } */ ++/* { dg-options "-O3 -march=skylake -mfpmath=sse -mtune=haswell" } */ + + #include "pr88531-1a.c" + +diff --git a/gcc/testsuite/gcc.target/i386/pr88531-1c.c b/gcc/testsuite/gcc.target/i386/pr88531-1c.c +index 43fc5913e..a093c87c0 100644 +--- a/gcc/testsuite/gcc.target/i386/pr88531-1c.c ++++ b/gcc/testsuite/gcc.target/i386/pr88531-1c.c +@@ -1,5 +1,5 @@ + /* { dg-do compile } */ +-/* { dg-options "-O3 -march=skylake-avx512 -mfpmath=sse" } */ ++/* { dg-options "-O3 -march=skylake-avx512 -mfpmath=sse -mtune=haswell" } */ + + #include "pr88531-1a.c" + +-- +2.31.1 +
View file
_service:tar_scm:0264-Support-m-no-gather-m-no-scatter-to-enable-disable-v.patch
Added
@@ -0,0 +1,187 @@ +From c269629130cb23252da2db026ce9ed13f57f69f4 Mon Sep 17 00:00:00 2001 +From: liuhongt <hongtao.liu@intel.com> +Date: Thu, 10 Aug 2023 16:26:13 +0800 +Subject: PATCH 10/28 Support -mno-gather -mno-scatter to enable/disable + vectorization for all gather/scatter instructions + +Rename original use_gather to use_gather_8parts, Support +-mtune-ctrl={,^}use_gather to set/clear tune features +use_gather_{2parts, 4parts, 8parts}. Support the new option -mgather +as alias of -mtune-ctrl=, use_gather, ^use_gather. + +Similar for use_scatter. + +gcc/ChangeLog: + + * config/i386/i386-builtins.cc + (ix86_vectorize_builtin_gather): Adjust for use_gather_8parts. + * config/i386/i386-options.cc (parse_mtune_ctrl_str): + Set/Clear tune features use_{gather,scatter}_{2parts, 4parts, + 8parts} for -mtune-crtl={,^}{use_gather,use_scatter}. + * config/i386/i386.cc (ix86_vectorize_builtin_scatter): Adjust + for use_scatter_8parts + * config/i386/i386.h (TARGET_USE_GATHER): Rename to .. + (TARGET_USE_GATHER_8PARTS): .. this. + (TARGET_USE_SCATTER): Rename to .. + (TARGET_USE_SCATTER_8PARTS): .. this. + * config/i386/x86-tune.def (X86_TUNE_USE_GATHER): Rename to + (X86_TUNE_USE_GATHER_8PARTS): .. this. + (X86_TUNE_USE_SCATTER): Rename to + (X86_TUNE_USE_SCATTER_8PARTS): .. this. + * config/i386/i386.opt: Add new options mgather, mscatter. + +(cherry picked from commit b2a927fb5343db363ea4361da0d6bcee227b6737) +--- + gcc/config/i386/i386-builtins.cc | 2 +- + gcc/config/i386/i386-options.cc | 54 +++++++++++++++++++++++--------- + gcc/config/i386/i386.cc | 2 +- + gcc/config/i386/i386.h | 8 ++--- + gcc/config/i386/i386.opt | 4 +++ + gcc/config/i386/x86-tune.def | 4 +-- + 6 files changed, 52 insertions(+), 22 deletions(-) + +diff --git a/gcc/config/i386/i386-builtins.cc b/gcc/config/i386/i386-builtins.cc +index 050c6228a..8ed32e14f 100644 +--- a/gcc/config/i386/i386-builtins.cc ++++ b/gcc/config/i386/i386-builtins.cc +@@ -1790,7 +1790,7 @@ ix86_vectorize_builtin_gather (const_tree mem_vectype, + ? !TARGET_USE_GATHER_2PARTS + : (known_eq (TYPE_VECTOR_SUBPARTS (mem_vectype), 4u) + ? !TARGET_USE_GATHER_4PARTS +- : !TARGET_USE_GATHER))) ++ : !TARGET_USE_GATHER_8PARTS))) + return NULL_TREE; + + if ((TREE_CODE (index_type) != INTEGER_TYPE +diff --git a/gcc/config/i386/i386-options.cc b/gcc/config/i386/i386-options.cc +index 9617fc162..3df1f0c41 100644 +--- a/gcc/config/i386/i386-options.cc ++++ b/gcc/config/i386/i386-options.cc +@@ -1705,20 +1705,46 @@ parse_mtune_ctrl_str (struct gcc_options *opts, bool dump) + curr_feature_string++; + clear = true; + } +- for (i = 0; i < X86_TUNE_LAST; i++) +- { +- if (!strcmp (curr_feature_string, ix86_tune_feature_namesi)) +- { +- ix86_tune_featuresi = !clear; +- if (dump) +- fprintf (stderr, "Explicitly %s feature %s\n", +- clear ? "clear" : "set", ix86_tune_feature_namesi); +- break; +- } +- } +- if (i == X86_TUNE_LAST) +- error ("unknown parameter to option %<-mtune-ctrl%>: %s", +- clear ? curr_feature_string - 1 : curr_feature_string); ++ ++ if (!strcmp (curr_feature_string, "use_gather")) ++ { ++ ix86_tune_featuresX86_TUNE_USE_GATHER_2PARTS = !clear; ++ ix86_tune_featuresX86_TUNE_USE_GATHER_4PARTS = !clear; ++ ix86_tune_featuresX86_TUNE_USE_GATHER_8PARTS = !clear; ++ if (dump) ++ fprintf (stderr, "Explicitly %s features use_gather_2parts," ++ " use_gather_4parts, use_gather_8parts\n", ++ clear ? "clear" : "set"); ++ ++ } ++ else if (!strcmp (curr_feature_string, "use_scatter")) ++ { ++ ix86_tune_featuresX86_TUNE_USE_SCATTER_2PARTS = !clear; ++ ix86_tune_featuresX86_TUNE_USE_SCATTER_4PARTS = !clear; ++ ix86_tune_featuresX86_TUNE_USE_SCATTER_8PARTS = !clear; ++ if (dump) ++ fprintf (stderr, "Explicitly %s features use_scatter_2parts," ++ " use_scatter_4parts, use_scatter_8parts\n", ++ clear ? "clear" : "set"); ++ } ++ else ++ { ++ for (i = 0; i < X86_TUNE_LAST; i++) ++ { ++ if (!strcmp (curr_feature_string, ix86_tune_feature_namesi)) ++ { ++ ix86_tune_featuresi = !clear; ++ if (dump) ++ fprintf (stderr, "Explicitly %s feature %s\n", ++ clear ? "clear" : "set", ix86_tune_feature_namesi); ++ break; ++ } ++ } ++ ++ if (i == X86_TUNE_LAST) ++ error ("unknown parameter to option %<-mtune-ctrl%>: %s", ++ clear ? curr_feature_string - 1 : curr_feature_string); ++ } + curr_feature_string = next_feature_string; + } + while (curr_feature_string); +diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc +index 479fc6010..e75d37023 100644 +--- a/gcc/config/i386/i386.cc ++++ b/gcc/config/i386/i386.cc +@@ -18937,7 +18937,7 @@ ix86_vectorize_builtin_scatter (const_tree vectype, + ? !TARGET_USE_SCATTER_2PARTS + : (known_eq (TYPE_VECTOR_SUBPARTS (vectype), 4u) + ? !TARGET_USE_SCATTER_4PARTS +- : !TARGET_USE_SCATTER)) ++ : !TARGET_USE_SCATTER_8PARTS)) + return NULL_TREE; + + if ((TREE_CODE (index_type) != INTEGER_TYPE +diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h +index 688aaabd3..aaa136ba0 100644 +--- a/gcc/config/i386/i386.h ++++ b/gcc/config/i386/i386.h +@@ -403,10 +403,10 @@ extern unsigned char ix86_tune_featuresX86_TUNE_LAST; + ix86_tune_featuresX86_TUNE_USE_GATHER_4PARTS + #define TARGET_USE_SCATTER_4PARTS \ + ix86_tune_featuresX86_TUNE_USE_SCATTER_4PARTS +-#define TARGET_USE_GATHER \ +- ix86_tune_featuresX86_TUNE_USE_GATHER +-#define TARGET_USE_SCATTER \ +- ix86_tune_featuresX86_TUNE_USE_SCATTER ++#define TARGET_USE_GATHER_8PARTS \ ++ ix86_tune_featuresX86_TUNE_USE_GATHER_8PARTS ++#define TARGET_USE_SCATTER_8PARTS \ ++ ix86_tune_featuresX86_TUNE_USE_SCATTER_8PARTS + #define TARGET_FUSE_CMP_AND_BRANCH_32 \ + ix86_tune_featuresX86_TUNE_FUSE_CMP_AND_BRANCH_32 + #define TARGET_FUSE_CMP_AND_BRANCH_64 \ +diff --git a/gcc/config/i386/i386.opt b/gcc/config/i386/i386.opt +index 498fb454d..b154110d8 100644 +--- a/gcc/config/i386/i386.opt ++++ b/gcc/config/i386/i386.opt +@@ -1222,3 +1222,7 @@ Instructions number above which STFL stall penalty can be compensated. + munroll-only-small-loops + Target Var(ix86_unroll_only_small_loops) Init(0) Save + Enable conservative small loop unrolling. ++ ++mscatter ++Target Alias(mtune-ctrl=, use_scatter, ^use_scatter) ++Enable vectorization for scatter instruction. +diff --git a/gcc/config/i386/x86-tune.def b/gcc/config/i386/x86-tune.def +index 4392709fc..bdb455d20 100644 +--- a/gcc/config/i386/x86-tune.def ++++ b/gcc/config/i386/x86-tune.def +@@ -488,13 +488,13 @@ DEF_TUNE (X86_TUNE_USE_SCATTER_4PARTS, "use_scatter_4parts", + + /* X86_TUNE_USE_GATHER: Use gather instructions for vectors with 8 or more + elements. */ +-DEF_TUNE (X86_TUNE_USE_GATHER, "use_gather", ++DEF_TUNE (X86_TUNE_USE_GATHER_8PARTS, "use_gather_8parts", + ~(m_ZNVER1 | m_ZNVER2 | m_ZNVER4 | m_ALDERLAKE + | m_GENERIC | m_GDS)) + + /* X86_TUNE_USE_SCATTER: Use scater instructions for vectors with 8 or more + elements. */ +-DEF_TUNE (X86_TUNE_USE_SCATTER, "use_scatter", ++DEF_TUNE (X86_TUNE_USE_SCATTER_8PARTS, "use_scatter_8parts", + ~(m_ZNVER4)) + + /* X86_TUNE_AVOID_128FMA_CHAINS: Avoid creating loops with tight 128bit or +-- +2.31.1 +
View file
_service:tar_scm:0265-Remove-constraint-modifier-for-fcmaddcph-fmaddcph-fc.patch
Added
@@ -0,0 +1,129 @@ +From 764518a35e90a3e13c469275da9c3c7002fe1982 Mon Sep 17 00:00:00 2001 +From: liuhongt <hongtao.liu@intel.com> +Date: Fri, 8 Sep 2023 09:22:43 +0800 +Subject: PATCH 11/28 Remove constraint modifier % for + fcmaddcph/fmaddcph/fcmulcph since there're not commutative. + +gcc/ChangeLog: + + PR target/111306 + PR target/111335 + * config/i386/sse.md (int_comm): New int_attr. + (fma_<complexopname>_<mode><sdc_maskz_name><round_name>): + Remove % for Complex conjugate operations since they're not + commutative. + (fma_<complexpairopname>_<mode>_pair): Ditto. + (<avx512>_<complexopname>_<mode>_mask<round_name>): Ditto. + (cmul<conj_op><mode>3): Ditto. + +gcc/testsuite/ChangeLog: + + * gcc.target/i386/pr111306.c: New test. + +(cherry picked from commit f197392a16ffb1327f1d12ff8ff05f9295e015cb) +--- + gcc/config/i386/sse.md | 16 ++++++++--- + gcc/testsuite/gcc.target/i386/pr111306.c | 36 ++++++++++++++++++++++++ + 2 files changed, 48 insertions(+), 4 deletions(-) + create mode 100644 gcc/testsuite/gcc.target/i386/pr111306.c + +diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md +index 3af159896..f25dd5f2b 100644 +--- a/gcc/config/i386/sse.md ++++ b/gcc/config/i386/sse.md +@@ -6318,6 +6318,14 @@ + (UNSPEC_COMPLEX_FMA_PAIR "fmaddc") + (UNSPEC_COMPLEX_FCMA_PAIR "fcmaddc")) + ++(define_int_attr int_comm ++ (UNSPEC_COMPLEX_FMA "") ++ (UNSPEC_COMPLEX_FMA_PAIR "") ++ (UNSPEC_COMPLEX_FCMA "") ++ (UNSPEC_COMPLEX_FCMA_PAIR "") ++ (UNSPEC_COMPLEX_FMUL "%") ++ (UNSPEC_COMPLEX_FCMUL "")) ++ + (define_int_attr conj_op + (UNSPEC_COMPLEX_FMA "") + (UNSPEC_COMPLEX_FCMA "_conj") +@@ -6431,7 +6439,7 @@ + (define_insn "fma_<complexopname>_<mode><sdc_maskz_name><round_name>" + (set (match_operand:VF_AVX512FP16VL 0 "register_operand" "=&v") + (unspec:VF_AVX512FP16VL +- (match_operand:VF_AVX512FP16VL 1 "<round_nimm_predicate>" "%v") ++ (match_operand:VF_AVX512FP16VL 1 "<round_nimm_predicate>" "<int_comm>v") + (match_operand:VF_AVX512FP16VL 2 "<round_nimm_predicate>" "<round_constraint>") + (match_operand:VF_AVX512FP16VL 3 "<round_nimm_predicate>" "0") + UNSPEC_COMPLEX_F_C_MA)) +@@ -6495,7 +6503,7 @@ + (define_insn "fma_<complexpairopname>_<mode>_pair" + (set (match_operand:VF1_AVX512VL 0 "register_operand" "=&v") + (unspec:VF1_AVX512VL +- (match_operand:VF1_AVX512VL 1 "vector_operand" "%v") ++ (match_operand:VF1_AVX512VL 1 "vector_operand" "<int_comm>v") + (match_operand:VF1_AVX512VL 2 "bcst_vector_operand" "vmBr") + (match_operand:VF1_AVX512VL 3 "vector_operand" "0") + UNSPEC_COMPLEX_F_C_MA_PAIR)) +@@ -6562,7 +6570,7 @@ + (set (match_operand:VF_AVX512FP16VL 0 "register_operand" "=&v") + (vec_merge:VF_AVX512FP16VL + (unspec:VF_AVX512FP16VL +- (match_operand:VF_AVX512FP16VL 1 "nonimmediate_operand" "%v") ++ (match_operand:VF_AVX512FP16VL 1 "nonimmediate_operand" "<int_comm>v") + (match_operand:VF_AVX512FP16VL 2 "nonimmediate_operand" "<round_constraint>") + (match_operand:VF_AVX512FP16VL 3 "register_operand" "0") + UNSPEC_COMPLEX_F_C_MA) +@@ -6586,7 +6594,7 @@ + (define_insn "<avx512>_<complexopname>_<mode><maskc_name><round_name>" + (set (match_operand:VF_AVX512FP16VL 0 "register_operand" "=&v") + (unspec:VF_AVX512FP16VL +- (match_operand:VF_AVX512FP16VL 1 "nonimmediate_operand" "%v") ++ (match_operand:VF_AVX512FP16VL 1 "nonimmediate_operand" "<int_comm>v") + (match_operand:VF_AVX512FP16VL 2 "nonimmediate_operand" "<round_constraint>") + UNSPEC_COMPLEX_F_C_MUL)) + "TARGET_AVX512FP16 && <round_mode512bit_condition>" +diff --git a/gcc/testsuite/gcc.target/i386/pr111306.c b/gcc/testsuite/gcc.target/i386/pr111306.c +new file mode 100644 +index 000000000..541725ebd +--- /dev/null ++++ b/gcc/testsuite/gcc.target/i386/pr111306.c +@@ -0,0 +1,36 @@ ++/* { dg-do run } */ ++/* { dg-options "-O2 -mavx512fp16 -mavx512vl" } */ ++/* { dg-require-effective-target avx512fp16 } */ ++ ++#define AVX512FP16 ++#include "avx512f-helper.h" ++ ++__attribute__((optimize("O2"),noipa)) ++void func1(_Float16 *a, _Float16 *b, int n, _Float16 *c) { ++ __m512h rA = _mm512_loadu_ph(a); ++ for (int i = 0; i < n; i += 32) { ++ __m512h rB = _mm512_loadu_ph(b + i); ++ _mm512_storeu_ph(c + i, _mm512_fcmul_pch(rB, rA)); ++ } ++} ++ ++void ++test_512 (void) ++{ ++ int n = 32; ++ _Float16 an, bn, cn; ++ _Float16 expn; ++ for (int i = 1; i <= n; i++) { ++ ai - 1 = i & 1 ? -i : i; ++ bi - 1 = i; ++ } ++ ++ func1(a, b, n, c); ++ for (int i = 0; i < n / 32; i += 2) { ++ if (ci != ai * bi + ai+1 * bi+1 ++ || ci+1 != ai * bi+1 - ai+1*bi) ++ __builtin_abort (); ++ } ++} ++ ++ +-- +2.31.1 +
View file
_service:tar_scm:0266-Disparage-slightly-for-the-alternative-which-move-DF.patch
Added
@@ -0,0 +1,106 @@ +From afd539adfe762adb57863299a11987b7e20e7987 Mon Sep 17 00:00:00 2001 +From: liuhongt <hongtao.liu@intel.com> +Date: Wed, 5 Jul 2023 13:45:11 +0800 +Subject: PATCH 12/28 Disparage slightly for the alternative which move + DFmode between SSE_REGS and GENERAL_REGS. + +For testcase + +void __cond_swap(double* __x, double* __y) { + bool __r = (*__x < *__y); + auto __tmp = __r ? *__x : *__y; + *__y = __r ? *__y : *__x; + *__x = __tmp; +} + +GCC-14 with -O2 and -march=x86-64 options generates the following code: + +__cond_swap(double*, double*): + movsd xmm1, QWORD PTR rdi + movsd xmm0, QWORD PTR rsi + comisd xmm0, xmm1 + jbe .L2 + movq rax, xmm1 + movapd xmm1, xmm0 + movq xmm0, rax +.L2: + movsd QWORD PTR rsi, xmm1 + movsd QWORD PTR rdi, xmm0 + ret + +rax is used to save and restore DFmode value. In RA both GENERAL_REGS +and SSE_REGS cost zero since we didn't disparage the +alternative in movdf_internal pattern, according to register +allocation order, GENERAL_REGS is allocated. The patch add ? for +alternative (r,v) and (v,r) just like we did for movsf/hf/bf_internal +pattern, after that we get optimal RA. + +__cond_swap: +.LFB0: + .cfi_startproc + movsd (%rdi), %xmm1 + movsd (%rsi), %xmm0 + comisd %xmm1, %xmm0 + jbe .L2 + movapd %xmm1, %xmm2 + movapd %xmm0, %xmm1 + movapd %xmm2, %xmm0 +.L2: + movsd %xmm1, (%rsi) + movsd %xmm0, (%rdi) + ret + +gcc/ChangeLog: + + PR target/110170 + * config/i386/i386.md (movdf_internal): Disparage slightly for + 2 alternatives (r,v) and (v,r) by adding constraint modifier + '?'. + +gcc/testsuite/ChangeLog: + + * gcc.target/i386/pr110170-3.c: New test. + +(cherry picked from commit 37a231cc7594d12ba0822077018aad751a6fb94e) +--- + gcc/config/i386/i386.md | 4 ++-- + gcc/testsuite/gcc.target/i386/pr110170-3.c | 11 +++++++++++ + 2 files changed, 13 insertions(+), 2 deletions(-) + create mode 100644 gcc/testsuite/gcc.target/i386/pr110170-3.c + +diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md +index be07be10d..71691f598 100644 +--- a/gcc/config/i386/i386.md ++++ b/gcc/config/i386/i386.md +@@ -3582,9 +3582,9 @@ + ;; Possible store forwarding (partial memory) stall in alternatives 4, 6 and 7. + (define_insn "*movdf_internal" + (set (match_operand:DF 0 "nonimmediate_operand" +- "=Yf*f,m ,Yf*f,?r ,!o,?*r ,!o,!o,?r,?m,?r,?r,v,v,v,m,*x,*x,*x,m ,r ,v,r ,o ,r ,m") ++ "=Yf*f,m ,Yf*f,?r ,!o,?*r ,!o,!o,?r,?m,?r,?r,v,v,v,m,*x,*x,*x,m ,?r,?v,r ,o ,r ,m") + (match_operand:DF 1 "general_operand" +- "Yf*fm,Yf*f,G ,roF,r ,*roF,*r,F ,rm,rC,C ,F ,C,v,m,v,C ,*x,m ,*x,v,r ,roF,rF,rmF,rC")) ++ "Yf*fm,Yf*f,G ,roF,r ,*roF,*r,F ,rm,rC,C ,F ,C,v,m,v,C ,*x,m ,*x, v, r,roF,rF,rmF,rC")) + "!(MEM_P (operands0) && MEM_P (operands1)) + && (lra_in_progress || reload_completed + || !CONST_DOUBLE_P (operands1) +diff --git a/gcc/testsuite/gcc.target/i386/pr110170-3.c b/gcc/testsuite/gcc.target/i386/pr110170-3.c +new file mode 100644 +index 000000000..70daa89e9 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/i386/pr110170-3.c +@@ -0,0 +1,11 @@ ++/* { dg-do compile { target { ! ia32 } } } */ ++/* { dg-options "-O2 -fno-if-conversion -fno-if-conversion2" } */ ++/* { dg-final { scan-assembler-not {(?n)movq.*r} } } */ ++ ++void __cond_swap(double* __x, double* __y) { ++ _Bool __r = (*__x < *__y); ++ double __tmp = __r ? *__x : *__y; ++ *__y = __r ? *__y : *__x; ++ *__x = __tmp; ++} ++ +-- +2.31.1 +
View file
_service:tar_scm:0267-Fix-wrong-code-due-to-vec_merge-pcmp-to-blendvb-spli.patch
Added
@@ -0,0 +1,163 @@ +From 88516507757932c1e67ce99d240596935971d2d0 Mon Sep 17 00:00:00 2001 +From: liuhongt <hongtao.liu@intel.com> +Date: Thu, 9 Nov 2023 13:20:05 +0800 +Subject: PATCH 13/28 Fix wrong code due to vec_merge + pcmp to blendvb + splitter. + +gcc/ChangeLog: + + PR target/112443 + * config/i386/sse.md (*avx2_pcmp<mode>3_4): Fix swap condition + from LT to GT since there's not in the pattern. + (*avx2_pcmp<mode>3_5): Ditto. + +gcc/testsuite/ChangeLog: + + * g++.target/i386/pr112443.C: New test. + +(cherry picked from commit 9a0cc04b9c9b02426762892b88efc5c44ba546bd) +--- + gcc/config/i386/sse.md | 4 +- + gcc/testsuite/g++.target/i386/pr112443.C | 108 +++++++++++++++++++++++ + 2 files changed, 110 insertions(+), 2 deletions(-) + create mode 100644 gcc/testsuite/g++.target/i386/pr112443.C + +diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md +index f25dd5f2b..23b858ab2 100644 +--- a/gcc/config/i386/sse.md ++++ b/gcc/config/i386/sse.md +@@ -16358,7 +16358,7 @@ + (match_dup 4)) + UNSPEC_BLENDV)) + { +- if (INTVAL (operands5) == 1) ++ if (INTVAL (operands5) == 5) + std::swap (operands1, operands2); + operands3 = gen_lowpart (<MODE>mode, operands3); + }) +@@ -16388,7 +16388,7 @@ + (match_dup 4)) + UNSPEC_BLENDV)) + { +- if (INTVAL (operands5) == 1) ++ if (INTVAL (operands5) == 5) + std::swap (operands1, operands2); + }) + +diff --git a/gcc/testsuite/g++.target/i386/pr112443.C b/gcc/testsuite/g++.target/i386/pr112443.C +new file mode 100644 +index 000000000..ebfa9b4a7 +--- /dev/null ++++ b/gcc/testsuite/g++.target/i386/pr112443.C +@@ -0,0 +1,108 @@ ++/* { dg-do run } */ ++/* { dg-require-effective-target avx512bw } */ ++/* { dg-require-effective-target avx512vl } */ ++/* { dg-options "-O2 -std=c++17 -mavx512bw -mavx512vl" } */ ++ ++#include <cstdint> ++#include <x86intrin.h> ++#include <functional> ++#include <ostream> ++ ++#define AVX512BW ++#define AVX512VL ++ ++#include "avx512f-helper.h" ++ ++struct TensorIteratorBase{ ++ char* in; ++ char* out; ++ ++ void for_each(std::function<void(char*, char*, int64_t size)> loop){ ++ loop(out, in, 32); ++ } ++}; ++ ++class Vectorized { ++protected: ++ __m256i values; ++ ++ static inline __m256i invert(const __m256i& v) { ++ const auto ones = _mm256_set1_epi64x(-1); ++ return _mm256_xor_si256(ones, v); ++ } ++public: ++ operator __m256i() const { ++ return values; ++ } ++ ++ static constexpr int size() { ++ return 32; ++ } ++ ++ Vectorized() {} ++ Vectorized(__m256i v) : values(v) {} ++ Vectorized(uint8_t v) { values = _mm256_set1_epi8(v); } ++ static Vectorized blendv(const Vectorized& a, const Vectorized& b, ++ const Vectorized& mask) { ++ return _mm256_blendv_epi8(a, b, mask); ++ } ++ static Vectorized loadu(const void* ptr) { ++ return _mm256_loadu_si256(reinterpret_cast<const __m256i*>(ptr)); ++ } ++ void store(void* ptr) const { ++ _mm256_storeu_si256(reinterpret_cast<__m256i*>(ptr), values); ++ } ++ ++ Vectorized operator<(const Vectorized& other) const { ++ __m256i max = _mm256_max_epu8(values, other); ++ return invert(_mm256_cmpeq_epi8(max, values)); ++ } ++ Vectorized operator-(const Vectorized& b) { ++ return _mm256_sub_epi8(values, b); ++ } ++}; ++ ++std::ostream& operator<<(std::ostream& stream, const Vectorized& vec) { ++ uint8_t bufVectorized::size(); ++ vec.store(buf); ++ stream << "vec"; ++ for (int i = 0; i != Vectorized::size(); i++) { ++ if (i != 0) ++ stream << ", "; ++ stream << bufi*1; ++ } ++ stream << ""; ++ return stream; ++} ++ ++void run(TensorIteratorBase iter){ ++ Vectorized zero_vec(0); ++ Vectorized one_vec(1); ++ ++ iter.for_each(=(char* out, char* in, int64_t size) { ++ for (int64_t i = 0; i <= size - Vectorized::size(); i += Vectorized::size()) { ++ auto self_vec = Vectorized::loadu(in + i); ++ auto left = Vectorized::blendv(zero_vec, one_vec, zero_vec < self_vec); ++ auto right = Vectorized::blendv(zero_vec, one_vec, self_vec < zero_vec); ++ auto outv = left - right; ++ outv.store(out + i); ++ } ++ }); ++} ++ ++void ++test_256 (){ ++ char in32; ++ char out32; ++ for(auto& x: in) x = 1; ++ run(TensorIteratorBase{in, out}); ++ Vectorized::loadu (out); ++ for (int i = 0; i != 32; i++) ++ if (outi != 1) ++ __builtin_abort (); ++} ++ ++void ++test_128 () ++{ ++} +-- +2.31.1 +
View file
_service:tar_scm:0268-Don-t-assume-it-s-AVX_U128_CLEAN-after-call_insn-who.patch
Added
@@ -0,0 +1,151 @@ +From 204ffa7f503411ccac0161c951726274648b6374 Mon Sep 17 00:00:00 2001 +From: liuhongt <hongtao.liu@intel.com> +Date: Thu, 7 Dec 2023 09:17:27 +0800 +Subject: PATCH 14/28 Don't assume it's AVX_U128_CLEAN after call_insn whose + abi.mode_clobber(V4DImode) deosn't contains all SSE_REGS. + +If the function desn't clobber any sse registers or only clobber +128-bit part, then vzeroupper isn't issued before the function exit. +the status not CLEAN but ANY after the function. + +Also for sibling_call, it's safe to issue an vzeroupper. Also there +could be missing vzeroupper since there's no mode_exit for +sibling_call_p. + +gcc/ChangeLog: + + PR target/112891 + * config/i386/i386.cc (ix86_avx_u128_mode_after): Return + AVX_U128_ANY if callee_abi doesn't clobber all_sse_regs to + align with ix86_avx_u128_mode_needed. + (ix86_avx_u128_mode_needed): Return AVX_U128_ClEAN for + sibling_call. + +gcc/testsuite/ChangeLog: + + * gcc.target/i386/pr112891.c: New test. + * gcc.target/i386/pr112891-2.c: New test. + +(cherry picked from commit fc189a08f5b7ad5889bd4c6b320c1dd99dd5d642) +--- + gcc/config/i386/i386.cc | 22 +++++++++++++--- + gcc/testsuite/gcc.target/i386/pr112891-2.c | 30 ++++++++++++++++++++++ + gcc/testsuite/gcc.target/i386/pr112891.c | 29 +++++++++++++++++++++ + 3 files changed, 78 insertions(+), 3 deletions(-) + create mode 100644 gcc/testsuite/gcc.target/i386/pr112891-2.c + create mode 100644 gcc/testsuite/gcc.target/i386/pr112891.c + +diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc +index e75d37023..60f3296b0 100644 +--- a/gcc/config/i386/i386.cc ++++ b/gcc/config/i386/i386.cc +@@ -14416,8 +14416,12 @@ ix86_avx_u128_mode_needed (rtx_insn *insn) + modes wider than 256 bits. It's only safe to issue a + vzeroupper if all SSE registers are clobbered. */ + const function_abi &abi = insn_callee_abi (insn); +- if (!hard_reg_set_subset_p (reg_class_contentsSSE_REGS, +- abi.mode_clobbers (V4DImode))) ++ /* Should be safe to issue an vzeroupper before sibling_call_p. ++ Also there not mode_exit for sibling_call, so there could be ++ missing vzeroupper for that. */ ++ if (!(SIBLING_CALL_P (insn) ++ || hard_reg_set_subset_p (reg_class_contentsSSE_REGS, ++ abi.mode_clobbers (V4DImode)))) + return AVX_U128_ANY; + + return AVX_U128_CLEAN; +@@ -14555,7 +14559,19 @@ ix86_avx_u128_mode_after (int mode, rtx_insn *insn) + bool avx_upper_reg_found = false; + note_stores (insn, ix86_check_avx_upper_stores, &avx_upper_reg_found); + +- return avx_upper_reg_found ? AVX_U128_DIRTY : AVX_U128_CLEAN; ++ if (avx_upper_reg_found) ++ return AVX_U128_DIRTY; ++ ++ /* If the function desn't clobber any sse registers or only clobber ++ 128-bit part, Then vzeroupper isn't issued before the function exit. ++ the status not CLEAN but ANY after the function. */ ++ const function_abi &abi = insn_callee_abi (insn); ++ if (!(SIBLING_CALL_P (insn) ++ || hard_reg_set_subset_p (reg_class_contentsSSE_REGS, ++ abi.mode_clobbers (V4DImode)))) ++ return AVX_U128_ANY; ++ ++ return AVX_U128_CLEAN; + } + + /* Otherwise, return current mode. Remember that if insn +diff --git a/gcc/testsuite/gcc.target/i386/pr112891-2.c b/gcc/testsuite/gcc.target/i386/pr112891-2.c +new file mode 100644 +index 000000000..164c3985d +--- /dev/null ++++ b/gcc/testsuite/gcc.target/i386/pr112891-2.c +@@ -0,0 +1,30 @@ ++/* { dg-do compile } */ ++/* { dg-options "-mavx2 -O3" } */ ++/* { dg-final { scan-assembler-times "vzeroupper" 1 } } */ ++ ++void ++__attribute__((noinline)) ++bar (double* a) ++{ ++ a0 = 1.0; ++ a1 = 2.0; ++} ++ ++double ++__attribute__((noinline)) ++foo (double* __restrict a, double* b) ++{ ++ a0 += b0; ++ a1 += b1; ++ a2 += b2; ++ a3 += b3; ++ bar (b); ++ return a5 + b5; ++} ++ ++double ++foo1 (double* __restrict a, double* b) ++{ ++ double c = foo (a, b); ++ return __builtin_exp (c); ++} +diff --git a/gcc/testsuite/gcc.target/i386/pr112891.c b/gcc/testsuite/gcc.target/i386/pr112891.c +new file mode 100644 +index 000000000..dbf6c6794 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/i386/pr112891.c +@@ -0,0 +1,29 @@ ++/* { dg-do compile } */ ++/* { dg-options "-mavx2 -O3" } */ ++/* { dg-final { scan-assembler-times "vzeroupper" 1 } } */ ++ ++void ++__attribute__((noinline)) ++bar (double* a) ++{ ++ a0 = 1.0; ++ a1 = 2.0; ++} ++ ++void ++__attribute__((noinline)) ++foo (double* __restrict a, double* b) ++{ ++ a0 += b0; ++ a1 += b1; ++ a2 += b2; ++ a3 += b3; ++ bar (b); ++} ++ ++double ++foo1 (double* __restrict a, double* b) ++{ ++ foo (a, b); ++ return __builtin_exp (b1); ++} +-- +2.31.1 +
View file
_service:tar_scm:0269-Disable-FMADD-in-chains-for-Zen4-and-generic.patch
Added
@@ -0,0 +1,142 @@ +From 19ee37b11702c86d7ed271e9e1d00e23cc4ab93c Mon Sep 17 00:00:00 2001 +From: Jan Hubicka <jh@suse.cz> +Date: Fri, 29 Dec 2023 23:51:03 +0100 +Subject: PATCH 15/28 Disable FMADD in chains for Zen4 and generic + +this patch disables use of FMA in matrix multiplication loop for generic (for +x86-64-v3) and zen4. I tested this on zen4 and Xenon Gold Gold 6212U. + +For Intel this is neutral both on the matrix multiplication microbenchmark +(attached) and spec2k17 where the difference was within noise for Core. + +On core the micro-benchmark runs as follows: + +With FMA: + + 578,500,241 cycles:u # 3.645 GHz + ( +- 0.12% ) + 753,318,477 instructions:u # 1.30 insn per +cycle ( +- 0.00% ) + 125,417,701 branches:u # 790.227 M/sec + ( +- 0.00% ) + 0.159146 +- 0.000363 seconds time elapsed ( +- 0.23% ) + +No FMA: + + 577,573,960 cycles:u # 3.514 GHz + ( +- 0.15% ) + 878,318,479 instructions:u # 1.52 insn per +cycle ( +- 0.00% ) + 125,417,702 branches:u # 763.035 M/sec + ( +- 0.00% ) + 0.164734 +- 0.000321 seconds time elapsed ( +- 0.19% ) + +So the cycle count is unchanged and discrete multiply+add takes same time as +FMA. + +While on zen: + +With FMA: + 484875179 cycles:u # 3.599 GHz + ( +- 0.05% ) (82.11%) + 752031517 instructions:u # 1.55 insn per +cycle + 125106525 branches:u # 928.712 M/sec + ( +- 0.03% ) (85.09%) + 128356 branch-misses:u # 0.10% of all +branches ( +- 0.06% ) (83.58%) + +No FMA: + 375875209 cycles:u # 3.592 GHz + ( +- 0.08% ) (80.74%) + 875725341 instructions:u # 2.33 insn per +cycle + 124903825 branches:u # 1.194 G/sec + ( +- 0.04% ) (84.59%) + 0.105203 +- 0.000188 seconds time elapsed ( +- 0.18% ) + +The diffrerence is that Cores understand the fact that fmadd does not need +all three parameters to start computation, while Zen cores doesn't. + +Since this seems noticeable win on zen and not loss on Core it seems like good +default for generic. + +float aSIZESIZE; +float bSIZESIZE; +float cSIZESIZE; + +void init(void) +{ + int i, j, k; + for(i=0; i<SIZE; ++i) + { + for(j=0; j<SIZE; ++j) + { + aij = (float)i + j; + bij = (float)i - j; + cij = 0.0f; + } + } +} + +void mult(void) +{ + int i, j, k; + + for(i=0; i<SIZE; ++i) + { + for(j=0; j<SIZE; ++j) + { + for(k=0; k<SIZE; ++k) + { + cij += aik * bkj; + } + } + } +} + +int main(void) +{ + clock_t s, e; + + init(); + s=clock(); + mult(); + e=clock(); + printf(" mult took %10d clocks\n", (int)(e-s)); + + return 0; + +} + +gcc/ChangeLog: + + * config/i386/x86-tune.def (X86_TUNE_AVOID_128FMA_CHAINS, + X86_TUNE_AVOID_256FMA_CHAINS): Enable for znver4 and Core. +--- + gcc/config/i386/x86-tune.def | 5 +++-- + 1 file changed, 3 insertions(+), 2 deletions(-) + +diff --git a/gcc/config/i386/x86-tune.def b/gcc/config/i386/x86-tune.def +index bdb455d20..fd095f3ec 100644 +--- a/gcc/config/i386/x86-tune.def ++++ b/gcc/config/i386/x86-tune.def +@@ -499,12 +499,13 @@ DEF_TUNE (X86_TUNE_USE_SCATTER_8PARTS, "use_scatter_8parts", + + /* X86_TUNE_AVOID_128FMA_CHAINS: Avoid creating loops with tight 128bit or + smaller FMA chain. */ +-DEF_TUNE (X86_TUNE_AVOID_128FMA_CHAINS, "avoid_fma_chains", m_ZNVER1 | m_ZNVER2 | m_ZNVER3) ++DEF_TUNE (X86_TUNE_AVOID_128FMA_CHAINS, "avoid_fma_chains", m_ZNVER1 | m_ZNVER2 ++ | m_ZNVER3 | m_ZNVER4 | m_GENERIC) + + /* X86_TUNE_AVOID_256FMA_CHAINS: Avoid creating loops with tight 256bit or + smaller FMA chain. */ + DEF_TUNE (X86_TUNE_AVOID_256FMA_CHAINS, "avoid_fma256_chains", m_ZNVER2 | m_ZNVER3 +- | m_ALDERLAKE | m_SAPPHIRERAPIDS) ++ | m_ZNVER4 | m_ALDERLAKE | m_SAPPHIRERAPIDS | m_GENERIC) + + /* X86_TUNE_AVOID_512FMA_CHAINS: Avoid creating loops with tight 512bit or + smaller FMA chain. */ +-- +2.31.1 +
View file
_service:tar_scm:0270-Initial-Raptorlake-Support.patch
Added
@@ -0,0 +1,47 @@ +From 411d1f0bcc0d1c8018fdf5fe84ad2404929556ec Mon Sep 17 00:00:00 2001 +From: Haochen Jiang <haochen.jiang@intel.com> +Date: Fri, 16 Sep 2022 13:59:01 +0800 +Subject: PATCH 16/28 Initial Raptorlake Support + +gcc/ChangeLog: + + * common/config/i386/cpuinfo.h: + (get_intel_cpu): Handle Raptorlake. + * common/config/i386/i386-common.cc: + (processor_alias_table): Add Raptorlake. + +(cherry picked from commit 470a0659b508d684148f362c4dc0eccf5a83a23e) +--- + gcc/common/config/i386/cpuinfo.h | 2 ++ + gcc/common/config/i386/i386-common.cc | 2 ++ + 2 files changed, 4 insertions(+) + +diff --git a/gcc/common/config/i386/cpuinfo.h b/gcc/common/config/i386/cpuinfo.h +index 316ad3cb3..13d0f4cd8 100644 +--- a/gcc/common/config/i386/cpuinfo.h ++++ b/gcc/common/config/i386/cpuinfo.h +@@ -508,6 +508,8 @@ get_intel_cpu (struct __processor_model *cpu_model, + case 0x97: + case 0x9a: + /* Alder Lake. */ ++ case 0xb7: ++ /* Raptor Lake. */ + cpu = "alderlake"; + CHECK___builtin_cpu_is ("corei7"); + CHECK___builtin_cpu_is ("alderlake"); +diff --git a/gcc/common/config/i386/i386-common.cc b/gcc/common/config/i386/i386-common.cc +index f650e255f..c1d700f89 100644 +--- a/gcc/common/config/i386/i386-common.cc ++++ b/gcc/common/config/i386/i386-common.cc +@@ -1939,6 +1939,8 @@ const pta processor_alias_table = + M_CPU_SUBTYPE (INTEL_COREI7_SAPPHIRERAPIDS), P_PROC_AVX512F}, + {"alderlake", PROCESSOR_ALDERLAKE, CPU_HASWELL, PTA_ALDERLAKE, + M_CPU_SUBTYPE (INTEL_COREI7_ALDERLAKE), P_PROC_AVX2}, ++ {"raptorlake", PROCESSOR_ALDERLAKE, CPU_HASWELL, PTA_ALDERLAKE, ++ M_CPU_SUBTYPE (INTEL_COREI7_ALDERLAKE), P_PROC_AVX2}, + {"bonnell", PROCESSOR_BONNELL, CPU_ATOM, PTA_BONNELL, + M_CPU_TYPE (INTEL_BONNELL), P_PROC_SSSE3}, + {"atom", PROCESSOR_BONNELL, CPU_ATOM, PTA_BONNELL, +-- +2.31.1 +
View file
_service:tar_scm:0271-Initial-Meteorlake-Support.patch
Added
@@ -0,0 +1,49 @@ +From 87cea29ede520f4a5af01dff7071ab1d23bd47b5 Mon Sep 17 00:00:00 2001 +From: "Hu, Lin1" <lin1.hu@intel.com> +Date: Fri, 16 Sep 2022 11:25:13 +0800 +Subject: PATCH 17/28 Initial Meteorlake Support + +gcc/ChangeLog: + + * common/config/i386/cpuinfo.h: + (get_intel_cpu): Handle Meteorlake. + * common/config/i386/i386-common.cc: + (processor_alias_table): Add Meteorlake. + +(cherry picked from commit fd206f0e95fb6f41b96eaaaab1dc0c30378e5e08) +--- + gcc/common/config/i386/cpuinfo.h | 4 ++++ + gcc/common/config/i386/i386-common.cc | 2 ++ + 2 files changed, 6 insertions(+) + +diff --git a/gcc/common/config/i386/cpuinfo.h b/gcc/common/config/i386/cpuinfo.h +index 13d0f4cd8..37af92d6b 100644 +--- a/gcc/common/config/i386/cpuinfo.h ++++ b/gcc/common/config/i386/cpuinfo.h +@@ -510,6 +510,10 @@ get_intel_cpu (struct __processor_model *cpu_model, + /* Alder Lake. */ + case 0xb7: + /* Raptor Lake. */ ++ case 0xb5: ++ case 0xaa: ++ case 0xac: ++ /* Meteor Lake. */ + cpu = "alderlake"; + CHECK___builtin_cpu_is ("corei7"); + CHECK___builtin_cpu_is ("alderlake"); +diff --git a/gcc/common/config/i386/i386-common.cc b/gcc/common/config/i386/i386-common.cc +index c1d700f89..cfee672fb 100644 +--- a/gcc/common/config/i386/i386-common.cc ++++ b/gcc/common/config/i386/i386-common.cc +@@ -1941,6 +1941,8 @@ const pta processor_alias_table = + M_CPU_SUBTYPE (INTEL_COREI7_ALDERLAKE), P_PROC_AVX2}, + {"raptorlake", PROCESSOR_ALDERLAKE, CPU_HASWELL, PTA_ALDERLAKE, + M_CPU_SUBTYPE (INTEL_COREI7_ALDERLAKE), P_PROC_AVX2}, ++ {"meteorlake", PROCESSOR_ALDERLAKE, CPU_HASWELL, PTA_ALDERLAKE, ++ M_CPU_SUBTYPE (INTEL_COREI7_ALDERLAKE), P_PROC_AVX2}, + {"bonnell", PROCESSOR_BONNELL, CPU_ATOM, PTA_BONNELL, + M_CPU_TYPE (INTEL_BONNELL), P_PROC_SSSE3}, + {"atom", PROCESSOR_BONNELL, CPU_ATOM, PTA_BONNELL, +-- +2.31.1 +
View file
_service:tar_scm:0272-Support-Intel-AMX-FP16-ISA.patch
Added
@@ -0,0 +1,691 @@ +From c11301c7780213ddf46a0bcdb06079af485f431c Mon Sep 17 00:00:00 2001 +From: Hongyu Wang <hongyu.wang@intel.com> +Date: Fri, 4 Nov 2022 15:50:55 +0800 +Subject: PATCH 18/28 Support Intel AMX-FP16 ISA + +gcc/ChangeLog: + + * common/config/i386/cpuinfo.h (get_available_features): Detect + amx-fp16. + * common/config/i386/i386-common.cc (OPTION_MASK_ISA2_AMX_FP16_SET, + OPTION_MASK_ISA2_AMX_FP16_UNSET): New macros. + (ix86_handle_option): Handle -mamx-fp16. + * common/config/i386/i386-cpuinfo.h (enum processor_features): + Add FEATURE_AMX_FP16. + * common/config/i386/i386-isas.h: Add ISA_NAME_TABLE_ENTRY for + amx-fp16. + * config.gcc: Add amxfp16intrin.h. + * config/i386/cpuid.h (bit_AMX_FP16): New. + * config/i386/i386-c.cc (ix86_target_macros_internal): Define + __AMX_FP16__. + * config/i386/i386-isa.def: Add DEF_PTA for AMX_FP16. + * config/i386/i386-options.cc (isa2_opts): Add -mamx-fp16. + (ix86_valid_target_attribute_inner_p): Add new ATTR. + (ix86_option_override_internal): Handle AMX-FP16. + * config/i386/i386.opt: Add -mamx-fp16. + * config/i386/immintrin.h: Include amxfp16intrin.h. + * doc/extend.texi: Document -mamx-fp16. + * doc/invoke.texi: Document amx-fp16. + * doc/sourcebuild.texi: Document amx_fp16. + * config/i386/amxfp16intrin.h: New file. + +gcc/testsuite/ChangeLog: + + * g++.dg/other/i386-2.C: Add -mamx-fp16. + * g++.dg/other/i386-3.C: Ditto. + * gcc.target/i386/sse-12.c: Ditto. + * gcc.target/i386/sse-13.c: Ditto. + * gcc.target/i386/sse-14.c: Ditto. + * gcc.target/i386/sse-22.c: Ditto. + * gcc.target/i386/sse-23.c: Ditto. + * lib/target-supports.exp: (check_effective_target_amx_fp16): + New proc. + * gcc.target/i386/funcspec-56.inc: Add new target attribute. + * gcc.target/i386/amx-check.h: Add AMX_FP16. + * gcc.target/i386/amx-helper.h: New file to support amx-fp16. + * gcc.target/i386/amxfp16-asmatt-1.c: New test. + * gcc.target/i386/amxfp16-asmintel-1.c: Ditto. + * gcc.target/i386/amxfp16-dpfp16ps-2.c: Ditto. + +Co-authored-by: Haochen Jiang <haochen.jiang@intel.com> + +(cherry picked from commit 2b4a03962a0fe18cadc944d90f1fb85a40004226) +--- + gcc/common/config/i386/cpuinfo.h | 5 ++ + gcc/common/config/i386/i386-common.cc | 15 +++++ + gcc/common/config/i386/i386-cpuinfo.h | 1 + + gcc/common/config/i386/i386-isas.h | 1 + + gcc/config.gcc | 3 +- + gcc/config/i386/amxfp16intrin.h | 46 ++++++++++++++ + gcc/config/i386/cpuid.h | 1 + + gcc/config/i386/i386-c.cc | 2 + + gcc/config/i386/i386-isa.def | 1 + + gcc/config/i386/i386-options.cc | 4 +- + gcc/config/i386/i386.opt | 4 ++ + gcc/config/i386/immintrin.h | 2 + + gcc/doc/extend.texi | 5 ++ + gcc/doc/invoke.texi | 9 ++- + gcc/doc/sourcebuild.texi | 3 + + gcc/testsuite/g++.dg/other/i386-2.C | 2 +- + gcc/testsuite/g++.dg/other/i386-3.C | 2 +- + gcc/testsuite/gcc.target/i386/amx-check.h | 3 + + gcc/testsuite/gcc.target/i386/amx-helper.h | 61 +++++++++++++++++++ + .../gcc.target/i386/amxfp16-asmatt-1.c | 13 ++++ + .../gcc.target/i386/amxfp16-asmintel-1.c | 10 +++ + .../gcc.target/i386/amxfp16-dpfp16ps-2.c | 57 +++++++++++++++++ + gcc/testsuite/gcc.target/i386/funcspec-56.inc | 2 + + gcc/testsuite/gcc.target/i386/sse-12.c | 2 +- + gcc/testsuite/gcc.target/i386/sse-13.c | 2 +- + gcc/testsuite/gcc.target/i386/sse-14.c | 2 +- + gcc/testsuite/gcc.target/i386/sse-22.c | 4 +- + gcc/testsuite/gcc.target/i386/sse-23.c | 2 +- + gcc/testsuite/lib/target-supports.exp | 11 ++++ + 29 files changed, 262 insertions(+), 13 deletions(-) + create mode 100644 gcc/config/i386/amxfp16intrin.h + create mode 100644 gcc/testsuite/gcc.target/i386/amx-helper.h + create mode 100644 gcc/testsuite/gcc.target/i386/amxfp16-asmatt-1.c + create mode 100644 gcc/testsuite/gcc.target/i386/amxfp16-asmintel-1.c + create mode 100644 gcc/testsuite/gcc.target/i386/amxfp16-dpfp16ps-2.c + +diff --git a/gcc/common/config/i386/cpuinfo.h b/gcc/common/config/i386/cpuinfo.h +index 37af92d6b..5951a30aa 100644 +--- a/gcc/common/config/i386/cpuinfo.h ++++ b/gcc/common/config/i386/cpuinfo.h +@@ -783,6 +783,11 @@ get_available_features (struct __processor_model *cpu_model, + set_feature (FEATURE_AVX512BF16); + } + } ++ if (amx_usable) ++ { ++ if (eax & bit_AMX_FP16) ++ set_feature (FEATURE_AMX_FP16); ++ } + } + + /* Get Advanced Features at level 0xd (eax = 0xd, ecx = 1). */ +diff --git a/gcc/common/config/i386/i386-common.cc b/gcc/common/config/i386/i386-common.cc +index cfee672fb..922db33ee 100644 +--- a/gcc/common/config/i386/i386-common.cc ++++ b/gcc/common/config/i386/i386-common.cc +@@ -107,6 +107,7 @@ along with GCC; see the file COPYING3. If not see + #define OPTION_MASK_ISA2_AMX_TILE_SET OPTION_MASK_ISA2_AMX_TILE + #define OPTION_MASK_ISA2_AMX_INT8_SET OPTION_MASK_ISA2_AMX_INT8 + #define OPTION_MASK_ISA2_AMX_BF16_SET OPTION_MASK_ISA2_AMX_BF16 ++#define OPTION_MASK_ISA2_AMX_FP16_SET OPTION_MASK_ISA2_AMX_FP16 + + /* SSE4 includes both SSE4.1 and SSE4.2. -msse4 should be the same + as -msse4.2. */ +@@ -275,6 +276,7 @@ along with GCC; see the file COPYING3. If not see + #define OPTION_MASK_ISA2_KL_UNSET \ + (OPTION_MASK_ISA2_KL | OPTION_MASK_ISA2_WIDEKL_UNSET) + #define OPTION_MASK_ISA2_WIDEKL_UNSET OPTION_MASK_ISA2_WIDEKL ++#define OPTION_MASK_ISA2_AMX_FP16_UNSET OPTION_MASK_ISA2_AMX_FP16 + + /* SSE4 includes both SSE4.1 and SSE4.2. -mno-sse4 should the same + as -mno-sse4.1. */ +@@ -1125,6 +1127,19 @@ ix86_handle_option (struct gcc_options *opts, + } + return true; + ++ case OPT_mamx_fp16: ++ if (value) ++ { ++ opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA2_AMX_FP16_SET; ++ opts->x_ix86_isa_flags2_explicit |= OPTION_MASK_ISA2_AMX_FP16_SET; ++ } ++ else ++ { ++ opts->x_ix86_isa_flags2 &= ~OPTION_MASK_ISA2_AMX_FP16_UNSET; ++ opts->x_ix86_isa_flags2_explicit |= OPTION_MASK_ISA2_AMX_FP16_UNSET; ++ } ++ return true; ++ + case OPT_mfma: + if (value) + { +diff --git a/gcc/common/config/i386/i386-cpuinfo.h b/gcc/common/config/i386/i386-cpuinfo.h +index 82996ebb3..8f22897de 100644 +--- a/gcc/common/config/i386/i386-cpuinfo.h ++++ b/gcc/common/config/i386/i386-cpuinfo.h +@@ -240,6 +240,7 @@ enum processor_features + FEATURE_X86_64_V2, + FEATURE_X86_64_V3, + FEATURE_X86_64_V4, ++ FEATURE_AMX_FP16, + CPU_FEATURE_MAX + }; + +diff --git a/gcc/common/config/i386/i386-isas.h b/gcc/common/config/i386/i386-isas.h +index 2d0646a68..95bab6da2 100644 +--- a/gcc/common/config/i386/i386-isas.h ++++ b/gcc/common/config/i386/i386-isas.h +@@ -175,4 +175,5 @@ ISA_NAMES_TABLE_START + ISA_NAMES_TABLE_ENTRY("x86-64-v2", FEATURE_X86_64_V2, P_X86_64_V2, NULL) + ISA_NAMES_TABLE_ENTRY("x86-64-v3", FEATURE_X86_64_V3, P_X86_64_V3, NULL) + ISA_NAMES_TABLE_ENTRY("x86-64-v4", FEATURE_X86_64_V4, P_X86_64_V4, NULL) ++ ISA_NAMES_TABLE_ENTRY("amx-fp16", FEATURE_AMX_FP16, P_NONE, "-mamx-fp16") + ISA_NAMES_TABLE_END +diff --git a/gcc/config.gcc b/gcc/config.gcc +index 4a0ae9328..e2b4a23dc 100644 +--- a/gcc/config.gcc ++++ b/gcc/config.gcc +@@ -423,7 +423,8 @@ i3456786-*-* | x86_64-*-*) + tsxldtrkintrin.h amxtileintrin.h amxint8intrin.h + amxbf16intrin.h x86gprintrin.h uintrintrin.h + hresetintrin.h keylockerintrin.h avxvnniintrin.h +- mwaitintrin.h avx512fp16intrin.h avx512fp16vlintrin.h" ++ mwaitintrin.h avx512fp16intrin.h avx512fp16vlintrin.h ++ amxfp16intrin.h" + ;; + ia64-*-*) + extra_headers=ia64intrin.h +diff --git a/gcc/config/i386/amxfp16intrin.h b/gcc/config/i386/amxfp16intrin.h +new file mode 100644 +index 000000000..6a114741a +--- /dev/null ++++ b/gcc/config/i386/amxfp16intrin.h +@@ -0,0 +1,46 @@ ++/* Copyright (C) 2020 Free Software Foundation, Inc. ++ ++ This file is part of GCC. ++ ++ GCC is free software; you can redistribute it and/or modify ++ it under the terms of the GNU General Public License as published by ++ the Free Software Foundation; either version 3, or (at your option) ++ any later version. ++ ++ GCC is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ GNU General Public License for more details. ++ ++ Under Section 7 of GPL version 3, you are granted additional ++ permissions described in the GCC Runtime Library Exception, version ++ 3.1, as published by the Free Software Foundation. ++ ++ You should have received a copy of the GNU General Public License and ++ a copy of the GCC Runtime Library Exception along with this program; ++ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see ++ <http://www.gnu.org/licenses/>. */ ++ ++#if !defined _IMMINTRIN_H_INCLUDED ++#error "Never use <amxfp16intrin.h> directly; include <immintrin.h> instead." ++#endif ++ ++#ifndef _AMXFP16INTRIN_H_INCLUDED ++#define _AMXFP16INTRIN_H_INCLUDED ++ ++#if defined(__x86_64__) ++#define _tile_dpfp16ps_internal(dst,src1,src2) \ ++ __asm__ volatile \ ++ ("{tdpfp16ps\t%%tmm"#src2", %%tmm"#src1", %%tmm"#dst"|tdpfp16ps\t%%tmm"#dst", %%tmm"#src1", %%tmm"#src2"}" ::) ++ ++#define _tile_dpfp16ps(dst,src1,src2) \ ++ _tile_dpfp16ps_internal (dst,src1,src2) ++ ++#endif ++ ++#ifdef __DISABLE_AMX_FP16__ ++#undef __DISABLE_AMX_FP16__ ++#pragma GCC pop_options ++#endif /* __DISABLE_AMX_FP16__ */ ++ ++#endif /* _AMXFP16INTRIN_H_INCLUDED */ +diff --git a/gcc/config/i386/cpuid.h b/gcc/config/i386/cpuid.h +index 8b3dc2b1d..d6cd8d1bf 100644 +--- a/gcc/config/i386/cpuid.h ++++ b/gcc/config/i386/cpuid.h +@@ -27,6 +27,7 @@ + /* %eax */ + #define bit_AVXVNNI (1 << 4) + #define bit_AVX512BF16 (1 << 5) ++#define bit_AMX_FP16 (1 << 21) + #define bit_HRESET (1 << 22) + + /* %ecx */ +diff --git a/gcc/config/i386/i386-c.cc b/gcc/config/i386/i386-c.cc +index 3fec4c7e2..4269f29e6 100644 +--- a/gcc/config/i386/i386-c.cc ++++ b/gcc/config/i386/i386-c.cc +@@ -633,6 +633,8 @@ ix86_target_macros_internal (HOST_WIDE_INT isa_flag, + def_or_undef (parse_in, "__WIDEKL__"); + if (isa_flag2 & OPTION_MASK_ISA2_AVXVNNI) + def_or_undef (parse_in, "__AVXVNNI__"); ++ if (isa_flag2 & OPTION_MASK_ISA2_AMX_FP16) ++ def_or_undef (parse_in, "__AMX_FP16__"); + if (TARGET_IAMCU) + { + def_or_undef (parse_in, "__iamcu"); +diff --git a/gcc/config/i386/i386-isa.def b/gcc/config/i386/i386-isa.def +index 83659d0be..c7305c01b 100644 +--- a/gcc/config/i386/i386-isa.def ++++ b/gcc/config/i386/i386-isa.def +@@ -109,3 +109,4 @@ DEF_PTA(KL) + DEF_PTA(WIDEKL) + DEF_PTA(AVXVNNI) + DEF_PTA(AVX512FP16) ++DEF_PTA(AMX_FP16) +diff --git a/gcc/config/i386/i386-options.cc b/gcc/config/i386/i386-options.cc +index 3df1f0c41..3edb7094e 100644 +--- a/gcc/config/i386/i386-options.cc ++++ b/gcc/config/i386/i386-options.cc +@@ -230,7 +230,8 @@ static struct ix86_target_opts isa2_opts = + { "-mkl", OPTION_MASK_ISA2_KL }, + { "-mwidekl", OPTION_MASK_ISA2_WIDEKL }, + { "-mavxvnni", OPTION_MASK_ISA2_AVXVNNI }, +- { "-mavx512fp16", OPTION_MASK_ISA2_AVX512FP16 } ++ { "-mavx512fp16", OPTION_MASK_ISA2_AVX512FP16 }, ++ { "-mamx-fp16", OPTION_MASK_ISA2_AMX_FP16 } + }; + static struct ix86_target_opts isa_opts = + { +@@ -1074,6 +1075,7 @@ ix86_valid_target_attribute_inner_p (tree fndecl, tree args, char *p_strings, + IX86_ATTR_ISA ("hreset", OPT_mhreset), + IX86_ATTR_ISA ("avxvnni", OPT_mavxvnni), + IX86_ATTR_ISA ("avx512fp16", OPT_mavx512fp16), ++ IX86_ATTR_ISA ("amx-fp16", OPT_mamx_fp16), + + /* enum options */ + IX86_ATTR_ENUM ("fpmath=", OPT_mfpmath_), +diff --git a/gcc/config/i386/i386.opt b/gcc/config/i386/i386.opt +index b154110d8..52c6f02ee 100644 +--- a/gcc/config/i386/i386.opt ++++ b/gcc/config/i386/i386.opt +@@ -1226,3 +1226,7 @@ Enable conservative small loop unrolling. + mscatter + Target Alias(mtune-ctrl=, use_scatter, ^use_scatter) + Enable vectorization for scatter instruction. ++ ++mamx-fp16 ++Target Mask(ISA2_AMX_FP16) Var(ix86_isa_flags2) Save ++Support AMX-FP16 built-in functions and code generation. +diff --git a/gcc/config/i386/immintrin.h b/gcc/config/i386/immintrin.h +index 6afd78c2b..0447ca4b2 100644 +--- a/gcc/config/i386/immintrin.h ++++ b/gcc/config/i386/immintrin.h +@@ -128,4 +128,6 @@ + + #include <keylockerintrin.h> + ++#include <amxfp16intrin.h> ++ + #endif /* _IMMINTRIN_H_INCLUDED */ +diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi +index 33a776a79..4ba9d34cd 100644 +--- a/gcc/doc/extend.texi ++++ b/gcc/doc/extend.texi +@@ -7038,6 +7038,11 @@ Enable/disable the generation of the WIDEKL instructions. + @cindex @code{target("avxvnni")} function attribute, x86 + Enable/disable the generation of the AVXVNNI instructions. + ++@item amx-fp16 ++@itemx no-amx-fp16 ++@cindex @code{target("amx-fp16")} function attribute, x86 ++Enable/disable the generation of the AMX-FP16 instructions. ++ + @item cld + @itemx no-cld + @cindex @code{target("cld")} function attribute, x86 +diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi +index 3a48655e5..d25f13217 100644 +--- a/gcc/doc/invoke.texi ++++ b/gcc/doc/invoke.texi +@@ -1428,7 +1428,7 @@ See RS/6000 and PowerPC Options. + -mavx5124fmaps -mavx512vnni -mavx5124vnniw -mprfchw -mrdpid @gol + -mrdseed -msgx -mavx512vp2intersect -mserialize -mtsxldtrk@gol + -mamx-tile -mamx-int8 -mamx-bf16 -muintr -mhreset -mavxvnni@gol +--mavx512fp16 @gol ++-mavx512fp16 -mamx-fp16 @gol + -mcldemote -mms-bitfields -mno-align-stringops -minline-all-stringops @gol + -minline-stringops-dynamically -mstringop-strategy=@var{alg} @gol + -mkl -mwidekl @gol +@@ -32442,6 +32442,9 @@ preferred alignment to @option{-mpreferred-stack-boundary=2}. + @need 200 + @itemx -mwidekl + @opindex mwidekl ++@need 200 ++@itemx -mamx-fp16 ++@opindex mamx-fp16 + These switches enable the use of instructions in the MMX, SSE, + SSE2, SSE3, SSSE3, SSE4, SSE4A, SSE4.1, SSE4.2, AVX, AVX2, AVX512F, AVX512PF, + AVX512ER, AVX512CD, AVX512VL, AVX512BW, AVX512DQ, AVX512IFMA, AVX512VBMI, SHA, +@@ -32451,8 +32454,8 @@ WBNOINVD, FMA4, PREFETCHW, RDPID, PREFETCHWT1, RDSEED, SGX, XOP, LWP, + XSAVEOPT, XSAVEC, XSAVES, RTM, HLE, TBM, MWAITX, CLZERO, PKU, AVX512VBMI2, + GFNI, VAES, WAITPKG, VPCLMULQDQ, AVX512BITALG, MOVDIRI, MOVDIR64B, AVX512BF16, + ENQCMD, AVX512VPOPCNTDQ, AVX5124FMAPS, AVX512VNNI, AVX5124VNNIW, SERIALIZE, +-UINTR, HRESET, AMXTILE, AMXINT8, AMXBF16, KL, WIDEKL, AVXVNNI, AVX512-FP16 +-or CLDEMOTE extended instruction sets. Each has a corresponding ++UINTR, HRESET, AMXTILE, AMXINT8, AMXBF16, KL, WIDEKL, AVXVNNI, AVX512-FP16, ++AMX-FP16 or CLDEMOTE extended instruction sets. Each has a corresponding + @option{-mno-} option to disable use of these instructions. + + These extensions are also available as built-in functions: see +diff --git a/gcc/doc/sourcebuild.texi b/gcc/doc/sourcebuild.texi +index 71c04841d..b64b62dee 100644 +--- a/gcc/doc/sourcebuild.texi ++++ b/gcc/doc/sourcebuild.texi +@@ -2472,6 +2472,9 @@ Target supports the execution of @code{amx-int8} instructions. + @item amx_bf16 + Target supports the execution of @code{amx-bf16} instructions. + ++@item amx_fp16 ++Target supports the execution of @code{amx-fp16} instructions. ++ + @item cell_hw + Test system can execute AltiVec and Cell PPU instructions. + +diff --git a/gcc/testsuite/g++.dg/other/i386-2.C b/gcc/testsuite/g++.dg/other/i386-2.C +index fba3d1ac6..57a6357aa 100644 +--- a/gcc/testsuite/g++.dg/other/i386-2.C ++++ b/gcc/testsuite/g++.dg/other/i386-2.C +@@ -1,5 +1,5 @@ + /* { dg-do compile { target i?86-*-* x86_64-*-* } } */ +-/* { dg-options "-O -pedantic-errors -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512dq -mavx512bw -mavx512vl -mavx512ifma -mavx512vbmi -mavx512vbmi2 -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16 -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavx512fp16" } */ ++/* { dg-options "-O -pedantic-errors -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512dq -mavx512bw -mavx512vl -mavx512ifma -mavx512vbmi -mavx512vbmi2 -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16 -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavx512fp16 -mamx-fp16" } */ + + /* Test that {,x,e,p,t,s,w,a,b,i}mmintrin.h, mm3dnow.h, fma4intrin.h, + xopintrin.h, abmintrin.h, bmiintrin.h, tbmintrin.h, lwpintrin.h, +diff --git a/gcc/testsuite/g++.dg/other/i386-3.C b/gcc/testsuite/g++.dg/other/i386-3.C +index 5cc0fa834..1947547d6 100644 +--- a/gcc/testsuite/g++.dg/other/i386-3.C ++++ b/gcc/testsuite/g++.dg/other/i386-3.C +@@ -1,5 +1,5 @@ + /* { dg-do compile { target i?86-*-* x86_64-*-* } } */ +-/* { dg-options "-O -fkeep-inline-functions -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512dq -mavx512bw -mavx512vl -mavx512ifma -mavx512vbmi -mavx512vbmi2 -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16 -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavx512fp16" } */ ++/* { dg-options "-O -fkeep-inline-functions -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512dq -mavx512bw -mavx512vl -mavx512ifma -mavx512vbmi -mavx512vbmi2 -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16 -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavx512fp16 -mamx-fp16" } */ + + /* Test that {,x,e,p,t,s,w,a,b,i}mmintrin.h, mm3dnow.h, fma4intrin.h, + xopintrin.h, abmintrin.h, bmiintrin.h, tbmintrin.h, lwpintrin.h, +diff --git a/gcc/testsuite/gcc.target/i386/amx-check.h b/gcc/testsuite/gcc.target/i386/amx-check.h +index 6fff5ff46..27dd37bf9 100644 +--- a/gcc/testsuite/gcc.target/i386/amx-check.h ++++ b/gcc/testsuite/gcc.target/i386/amx-check.h +@@ -213,6 +213,9 @@ main () + #ifdef AMX_BF16 + && __builtin_cpu_supports ("amx-bf16") + #endif ++#ifdef AMX_FP16 ++ && __builtin_cpu_supports ("amx-fp16") ++#endif + #ifdef __linux__ + && request_perm_xtile_data () + #endif +diff --git a/gcc/testsuite/gcc.target/i386/amx-helper.h b/gcc/testsuite/gcc.target/i386/amx-helper.h +new file mode 100644 +index 000000000..fe24d7067 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/i386/amx-helper.h +@@ -0,0 +1,61 @@ ++#ifndef AMX_HELPER_H_INCLUDED ++#define AMX_HELPER_H_INCLUDED ++#if defined(AMX_FP16) ++#include <immintrin.h> ++#include <xmmintrin.h> ++#endif ++#include "amx-check.h" ++ ++typedef union ++{ ++ _Float16 f16; ++ uint16_t u; ++} union16f_uw; ++ ++#if defined(AMX_FP16) ++/* Transformation functions between fp16/float */ ++static uint16_t make_f32_fp16 (float f) ++{ ++ union16f_uw tmp; ++ __m128 b = _mm_set_ss (f); ++ __m128h a; ++ tmp.f16 = _mm_cvtsh_h (_mm_cvtss_sh (a, b)); ++ return tmp.u; ++} ++ ++static float make_fp16_f32 (uint16_t fp) ++{ ++ union16f_uw tmp; ++ tmp.u = fp; ++ __m128h b = _mm_set_sh (tmp.f16); ++ __m128 a; ++ return _mm_cvtss_f32 (_mm_cvtsh_ss (a, b)); ++} ++ ++/* Init tile buffer with fp16 pairs */ ++void init_fp16_max_tile_buffer (uint8_t* buf) ++{ ++ int i, j; ++ uint16_t* ptr = (uint16_t *) buf; ++ ++ for (i = 0; i < 16; i++) ++ for (j = 0; j < 32; j++) ++ { ++ float f = 2.5f * i + 1.25f * j; ++ ptri * 32 + j = make_f32_fp16 (f); ++ } ++} ++ ++/* Init tile fp16 pair buffer with zero */ ++void init_fp16_max_tile_zero_buffer (uint8_t* buf) ++{ ++ int i, j; ++ uint16_t* ptr = (uint16_t *) buf; ++ ++ for (i = 0; i < 16; i++) ++ for (j = 0; j < 32; j++) ++ ptri * 32 + j = make_f32_fp16 (0.0f); ++} ++#endif ++ ++#endif +diff --git a/gcc/testsuite/gcc.target/i386/amxfp16-asmatt-1.c b/gcc/testsuite/gcc.target/i386/amxfp16-asmatt-1.c +new file mode 100644 +index 000000000..09ae6d408 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/i386/amxfp16-asmatt-1.c +@@ -0,0 +1,13 @@ ++/* { dg-do compile { target { ! ia32 } } } */ ++/* { dg-options "-O2 -mamx-fp16" } */ ++/* { dg-final { scan-assembler "tdpfp16ps\ \\t+\^\n\*%tmm3+\^\n\*%tmm2+\^\n\*%tmm1" } } */ ++#include <immintrin.h> ++ ++#define TMM1 1 ++#define TMM2 2 ++#define TMM3 3 ++ ++void TEST () ++{ ++ _tile_dpfp16ps (TMM1, TMM2, TMM3); ++} +diff --git a/gcc/testsuite/gcc.target/i386/amxfp16-asmintel-1.c b/gcc/testsuite/gcc.target/i386/amxfp16-asmintel-1.c +new file mode 100644 +index 000000000..a8dff945f +--- /dev/null ++++ b/gcc/testsuite/gcc.target/i386/amxfp16-asmintel-1.c +@@ -0,0 +1,10 @@ ++/* { dg-do compile { target { ! ia32 } } } */ ++/* { dg-require-effective-target masm_intel } */ ++/* { dg-options "-O2 -mamx-fp16 -masm=intel" } */ ++/* { dg-final { scan-assembler "tdpfp16ps\ \\t+\^\n\*%tmm1+\^\n\*%tmm2+\^\n\*%tmm3" } } */ ++#include <immintrin.h> ++ ++void TEST () ++{ ++ _tile_dpfp16ps (1, 2, 3); ++} +diff --git a/gcc/testsuite/gcc.target/i386/amxfp16-dpfp16ps-2.c b/gcc/testsuite/gcc.target/i386/amxfp16-dpfp16ps-2.c +new file mode 100644 +index 000000000..2d359a689 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/i386/amxfp16-dpfp16ps-2.c +@@ -0,0 +1,57 @@ ++/* { dg-do run { target { ! ia32 } } } */ ++/* { dg-require-effective-target amx_tile } */ ++/* { dg-require-effective-target amx_fp16 } */ ++/* { dg-require-effective-target avx512fp16 } */ ++/* { dg-options "-O2 -mamx-tile -mamx-fp16 -mavx512fp16" } */ ++#define AMX_FP16 ++#define DO_TEST test_amx_fp16_dpfp16ps ++void test_amx_fp16_dpfp16ps (); ++#include "amx-helper.h" ++ ++void calc_matrix_dpfp16ps (__tile *dst, __tile *src1, __tile *src2) ++{ ++ uint16_t *src1_buf = (uint16_t *)src1->buf; ++ uint16_t *src2_buf = (uint16_t *)src2->buf; ++ float *dst_buf = (float *)dst->buf; ++ ++ int M = src1->rows; ++ int N = src1->colsb / 4; ++ int K = src2->colsb / 4; ++ int i, j, k, t; ++ ++ for (i = 0; i < M; i++) ++ for (j = 0; j < N; j++) ++ for (k = 0; k < K; k++) ++ for (t = 0; t < 2; t+=2) ++ { ++ dst_bufi * K + k += ++ (make_fp16_f32 (src1_bufi * 2 * N + 2 * j + t) * ++ make_fp16_f32 (src2_bufj * 2 * K + 2 * k + t)) + ++ (make_fp16_f32 (src1_bufi * 2 * N + 2 * j + t + 1) * ++ make_fp16_f32 (src2_bufj * 2 * K + 2 * k + t + 1)); ++ } ++ ++} ++ ++void test_amx_fp16_dpfp16ps () ++{ ++ __tilecfg_u cfg; ++ __tile dst, dst_ref, src1, src2; ++ uint8_t tmp_dst_buf1024, tmp_dst_zero_buf1024; ++ ++ init_fp16_max_tile_buffer (tmp_dst_buf); ++ init_fp16_max_tile_zero_buffer (tmp_dst_zero_buf); ++ ++ init_tile_config (&cfg); ++ init_tile_reg_and_src_with_buffer (1, dst, tmp_dst_zero_buf); ++ init_tile_reg_and_src_with_buffer (2, src1, tmp_dst_buf); ++ init_tile_reg_and_src_with_buffer (3, src2, tmp_dst_buf); ++ ++ calc_matrix_dpfp16ps (&dst, &src1, &src2); ++ ++ _tile_dpfp16ps (1, 2, 3); ++ _tile_stored (1, dst_ref.buf, _STRIDE); ++ ++ if (!check_float_tile_register (&dst_ref, &dst)) ++ abort (); ++} +diff --git a/gcc/testsuite/gcc.target/i386/funcspec-56.inc b/gcc/testsuite/gcc.target/i386/funcspec-56.inc +index f34e7a977..b00cfff03 100644 +--- a/gcc/testsuite/gcc.target/i386/funcspec-56.inc ++++ b/gcc/testsuite/gcc.target/i386/funcspec-56.inc +@@ -80,6 +80,7 @@ extern void test_keylocker (void) __attribute__((__target__("kl"))); + extern void test_widekl (void) __attribute__((__target__("widekl"))); + extern void test_avxvnni (void) __attribute__((__target__("avxvnni"))); + extern void test_avx512fp16 (void) __attribute__((__target__("avx512fp16"))); ++extern void test_amx_fp16 (void) __attribute__((__target__("amx-fp16"))); + + extern void test_no_sgx (void) __attribute__((__target__("no-sgx"))); + extern void test_no_avx5124fmaps(void) __attribute__((__target__("no-avx5124fmaps"))); +@@ -161,6 +162,7 @@ extern void test_no_keylocker (void) __attribute__((__target__("no-kl"))); + extern void test_no_widekl (void) __attribute__((__target__("no-widekl"))); + extern void test_no_avxvnni (void) __attribute__((__target__("no-avxvnni"))); + extern void test_no_avx512fp16 (void) __attribute__((__target__("no-avx512fp16"))); ++extern void test_no_amx_fp16 (void) __attribute__((__target__("no-amx-fp16"))); + + extern void test_arch_nocona (void) __attribute__((__target__("arch=nocona"))); + extern void test_arch_core2 (void) __attribute__((__target__("arch=core2"))); +diff --git a/gcc/testsuite/gcc.target/i386/sse-12.c b/gcc/testsuite/gcc.target/i386/sse-12.c +index 375d4d1b4..9ab4a7e0c 100644 +--- a/gcc/testsuite/gcc.target/i386/sse-12.c ++++ b/gcc/testsuite/gcc.target/i386/sse-12.c +@@ -3,7 +3,7 @@ + popcntintrin.h gfniintrin.h and mm_malloc.h are usable + with -O -std=c89 -pedantic-errors. */ + /* { dg-do compile } */ +-/* { dg-options "-O -std=c89 -pedantic-errors -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512bw -mavx512dq -mavx512vl -mavx512vbmi -mavx512vbmi2 -mavx512ifma -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16 -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni" } */ ++/* { dg-options "-O -std=c89 -pedantic-errors -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512bw -mavx512dq -mavx512vl -mavx512vbmi -mavx512vbmi2 -mavx512ifma -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16 -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mamx-fp16" } */ + + #include <x86intrin.h> + +diff --git a/gcc/testsuite/gcc.target/i386/sse-13.c b/gcc/testsuite/gcc.target/i386/sse-13.c +index e285c307d..a1e453a98 100644 +--- a/gcc/testsuite/gcc.target/i386/sse-13.c ++++ b/gcc/testsuite/gcc.target/i386/sse-13.c +@@ -1,5 +1,5 @@ + /* { dg-do compile } */ +-/* { dg-options "-O2 -Werror-implicit-function-declaration -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512vl -mavx512dq -mavx512bw -mavx512vbmi -mavx512vbmi2 -mavx512ifma -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mavx512vp2intersect -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16 -menqcmd -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavx512fp16" } */ ++/* { dg-options "-O2 -Werror-implicit-function-declaration -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512vl -mavx512dq -mavx512bw -mavx512vbmi -mavx512vbmi2 -mavx512ifma -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mavx512vp2intersect -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16 -menqcmd -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavx512fp16 -mamx-fp16" } */ + /* { dg-add-options bind_pic_locally } */ + + #include <mm_malloc.h> +diff --git a/gcc/testsuite/gcc.target/i386/sse-14.c b/gcc/testsuite/gcc.target/i386/sse-14.c +index f41493b93..eaa1a8d81 100644 +--- a/gcc/testsuite/gcc.target/i386/sse-14.c ++++ b/gcc/testsuite/gcc.target/i386/sse-14.c +@@ -1,5 +1,5 @@ + /* { dg-do compile } */ +-/* { dg-options "-O0 -Werror-implicit-function-declaration -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512dq -mavx512bw -mavx512vl -mavx512ifma -mavx512vbmi -mavx512vbmi2 -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mpconfig -mwbnoinvd -mavx512vl -mavx512bf16 -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavx512fp16" } */ ++/* { dg-options "-O0 -Werror-implicit-function-declaration -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512dq -mavx512bw -mavx512vl -mavx512ifma -mavx512vbmi -mavx512vbmi2 -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mpconfig -mwbnoinvd -mavx512vl -mavx512bf16 -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavx512fp16 -mamx-fp16" } */ + /* { dg-add-options bind_pic_locally } */ + + #include <mm_malloc.h> +diff --git a/gcc/testsuite/gcc.target/i386/sse-22.c b/gcc/testsuite/gcc.target/i386/sse-22.c +index 31492ef36..19afe639d 100644 +--- a/gcc/testsuite/gcc.target/i386/sse-22.c ++++ b/gcc/testsuite/gcc.target/i386/sse-22.c +@@ -103,7 +103,7 @@ + + + #ifndef DIFFERENT_PRAGMAS +-#pragma GCC target ("sse4a,3dnow,avx,avx2,fma4,xop,aes,pclmul,popcnt,abm,lzcnt,bmi,bmi2,tbm,lwp,fsgsbase,rdrnd,f16c,rtm,rdseed,prfchw,adx,fxsr,xsaveopt,avx512f,avx512er,avx512cd,avx512pf,sha,prefetchwt1,avx512vl,avx512bw,avx512dq,avx512vbmi,avx512vbmi2,avx512ifma,avx5124fmaps,avx5124vnniw,avx512vpopcntdq,gfni,avx512bitalg,avx512bf16,avx512vp2intersect,serialize,tsxldtrk,amx-tile,amx-int8,amx-bf16,kl,widekl,avxvnni,avx512fp16") ++#pragma GCC target ("sse4a,3dnow,avx,avx2,fma4,xop,aes,pclmul,popcnt,abm,lzcnt,bmi,bmi2,tbm,lwp,fsgsbase,rdrnd,f16c,rtm,rdseed,prfchw,adx,fxsr,xsaveopt,avx512f,avx512er,avx512cd,avx512pf,sha,prefetchwt1,avx512vl,avx512bw,avx512dq,avx512vbmi,avx512vbmi2,avx512ifma,avx5124fmaps,avx5124vnniw,avx512vpopcntdq,gfni,avx512bitalg,avx512bf16,avx512vp2intersect,serialize,tsxldtrk,amx-tile,amx-int8,amx-bf16,kl,widekl,avxvnni,avx512fp16,amx-fp16") + #endif + + /* Following intrinsics require immediate arguments. They +@@ -220,7 +220,7 @@ test_4 (_mm_cmpestrz, int, __m128i, int, __m128i, int, 1) + + /* immintrin.h (AVX/AVX2/RDRND/FSGSBASE/F16C/RTM/AVX512F/SHA) */ + #ifdef DIFFERENT_PRAGMAS +-#pragma GCC target ("avx,avx2,rdrnd,fsgsbase,f16c,rtm,avx512f,avx512er,avx512cd,avx512pf,sha,avx512vl,avx512bw,avx512dq,avx512ifma,avx512vbmi,avx512vbmi2,avx5124fmaps,avx5124vnniw,avx512vpopcntdq,gfni,avx512bitalg,avx512bf16,avx512vp2intersect,serialize,tsxldtrk,amx-tile,amx-int8,amx-bf16,kl,widekl,avxvnni,avx512fp16") ++#pragma GCC target ("avx,avx2,rdrnd,fsgsbase,f16c,rtm,avx512f,avx512er,avx512cd,avx512pf,sha,avx512vl,avx512bw,avx512dq,avx512ifma,avx512vbmi,avx512vbmi2,avx5124fmaps,avx5124vnniw,avx512vpopcntdq,gfni,avx512bitalg,avx512bf16,avx512vp2intersect,serialize,tsxldtrk,amx-tile,amx-int8,amx-bf16,kl,widekl,avxvnni,avx512fp16,amx-fp16") + #endif + #include <immintrin.h> + test_1 (_cvtss_sh, unsigned short, float, 1) +diff --git a/gcc/testsuite/gcc.target/i386/sse-23.c b/gcc/testsuite/gcc.target/i386/sse-23.c +index b398fd144..151201d97 100644 +--- a/gcc/testsuite/gcc.target/i386/sse-23.c ++++ b/gcc/testsuite/gcc.target/i386/sse-23.c +@@ -843,6 +843,6 @@ + #define __builtin_ia32_vpclmulqdq_v2di(A, B, C) __builtin_ia32_vpclmulqdq_v2di(A, B, 1) + #define __builtin_ia32_vpclmulqdq_v8di(A, B, C) __builtin_ia32_vpclmulqdq_v8di(A, B, 1) + +-#pragma GCC target ("sse4a,3dnow,avx,avx2,fma4,xop,aes,pclmul,popcnt,abm,lzcnt,bmi,bmi2,tbm,lwp,fsgsbase,rdrnd,f16c,fma,rtm,rdseed,prfchw,adx,fxsr,xsaveopt,avx512f,avx512er,avx512cd,avx512pf,sha,prefetchwt1,xsavec,xsaves,clflushopt,avx512bw,avx512dq,avx512vl,avx512vbmi,avx512ifma,avx5124fmaps,avx5124vnniw,avx512vpopcntdq,clwb,mwaitx,clzero,pku,sgx,rdpid,gfni,avx512vbmi2,vpclmulqdq,avx512bitalg,pconfig,wbnoinvd,avx512bf16,enqcmd,avx512vp2intersect,serialize,tsxldtrk,amx-tile,amx-int8,amx-bf16,kl,widekl,avxvnni,avx512fp16") ++#pragma GCC target ("sse4a,3dnow,avx,avx2,fma4,xop,aes,pclmul,popcnt,abm,lzcnt,bmi,bmi2,tbm,lwp,fsgsbase,rdrnd,f16c,fma,rtm,rdseed,prfchw,adx,fxsr,xsaveopt,avx512f,avx512er,avx512cd,avx512pf,sha,prefetchwt1,xsavec,xsaves,clflushopt,avx512bw,avx512dq,avx512vl,avx512vbmi,avx512ifma,avx5124fmaps,avx5124vnniw,avx512vpopcntdq,clwb,mwaitx,clzero,pku,sgx,rdpid,gfni,avx512vbmi2,vpclmulqdq,avx512bitalg,pconfig,wbnoinvd,avx512bf16,enqcmd,avx512vp2intersect,serialize,tsxldtrk,amx-tile,amx-int8,amx-bf16,kl,widekl,avxvnni,avx512fp16,amx-fp16") + + #include <x86intrin.h> +diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp +index c858bd93b..0d83c780c 100644 +--- a/gcc/testsuite/lib/target-supports.exp ++++ b/gcc/testsuite/lib/target-supports.exp +@@ -9972,6 +9972,17 @@ proc check_effective_target_amx_bf16 { } { + } "-mamx-bf16" + } + ++# Return 1 if amx-fp16 instructions can be compiled. ++proc check_effective_target_amx_fp16 { } { ++ return check_no_compiler_messages amx_fp16 object { ++ void ++ foo () ++ { ++ __asm__ volatile ("tdpfp16ps\t%%tmm1, %%tmm2, %%tmm3" ::); ++ } ++ } "-mamx-fp16" ++} ++ + # Return 1 if vpclmulqdq instructions can be compiled. + proc check_effective_target_vpclmulqdq { } { + return check_no_compiler_messages vpclmulqdq object { +-- +2.31.1 +
View file
_service:tar_scm:0273-Support-Intel-prefetchit0-t1.patch
Added
@@ -0,0 +1,902 @@ +From 42a38c8abaa28f67e26b9af3f434fe0107894e7d Mon Sep 17 00:00:00 2001 +From: Haochen Jiang <haochen.jiang@intel.com> +Date: Fri, 4 Nov 2022 15:01:05 +0800 +Subject: PATCH 19/28 Support Intel prefetchit0/t1 + +gcc/ChangeLog: + + * common/config/i386/cpuinfo.h (get_available_features): + Detect PREFETCHI. + * common/config/i386/i386-common.cc + (OPTION_MASK_ISA2_PREFETCHI_SET, + OPTION_MASK_ISA2_PREFETCHI_UNSET): New. + (ix86_handle_option): Handle -mprefetchi. + * common/config/i386/i386-cpuinfo.h + (enum processor_features): Add FEATURE_PREFETCHI. + * common/config/i386/i386-isas.h: Add ISA_NAME_TABLE_ENTRY + for prefetchi. + * config.gcc: Add prfchiintrin.h. + * config/i386/cpuid.h (bit_PREFETCHI): New. + * config/i386/i386-builtin-types.def: + Add DEF_FUNCTION_TYPE (VOID, PCVOID, INT) + and DEF_FUNCTION_TYPE (VOID, PCVOID, INT, INT, INT). + * config/i386/i386-builtin.def (BDESC): Add new builtins. + * config/i386/i386-c.cc (ix86_target_macros_internal): + Define __PREFETCHI__. + * config/i386/i386-expand.cc: Handle new builtins. + * config/i386/i386-isa.def (PREFETCHI): + Add DEF_PTA(PREFETCHI). + * config/i386/i386-options.cc + (ix86_valid_target_attribute_inner_p): Handle prefetchi. + * config/i386/i386.md (prefetchi): New define_insn. + * config/i386/i386.opt: Add option -mprefetchi. + * config/i386/predicates.md (local_func_symbolic_operand): + New predicates. + * config/i386/x86gprintrin.h: Include prfchiintrin.h. + * config/i386/xmmintrin.h (enum _mm_hint): New enum for + prefetchi. + (_mm_prefetch): Handle the highest bit of enum. + * doc/extend.texi: Document prefetchi. + * doc/invoke.texi: Document -mprefetchi. + * doc/sourcebuild.texi: Document target prefetchi. + * config/i386/prfchiintrin.h: New file. + +gcc/testsuite/ChangeLog: + + * g++.dg/other/i386-2.C: Add -mprefetchi. + * g++.dg/other/i386-3.C: Ditto. + * gcc.target/i386/avx-1.c: Ditto. + * gcc.target/i386/funcspec-56.inc: Add new target attribute. + * gcc.target/i386/sse-13.c: Add -mprefetchi. + * gcc.target/i386/sse-23.c: Ditto. + * gcc.target/i386/x86gprintrin-1.c: Ditto. + * gcc.target/i386/x86gprintrin-2.c: Ditto. + * gcc.target/i386/x86gprintrin-3.c: Ditto. + * gcc.target/i386/x86gprintrin-4.c: Ditto. + * gcc.target/i386/x86gprintrin-5.c: Ditto. + * gcc.target/i386/prefetchi-1.c: New test. + * gcc.target/i386/prefetchi-2.c: Ditto. + * gcc.target/i386/prefetchi-3.c: Ditto. + * gcc.target/i386/prefetchi-4.c: Ditto. + +Co-authored-by: Hongtao Liu <hongtao.liu@intel.com> +--- + gcc/common/config/i386/cpuinfo.h | 2 + + gcc/common/config/i386/i386-common.cc | 15 ++++ + gcc/common/config/i386/i386-cpuinfo.h | 1 + + gcc/common/config/i386/i386-isas.h | 1 + + gcc/config.gcc | 2 +- + gcc/config/i386/cpuid.h | 1 + + gcc/config/i386/i386-builtin-types.def | 4 + + gcc/config/i386/i386-builtin.def | 4 + + gcc/config/i386/i386-c.cc | 2 + + gcc/config/i386/i386-expand.cc | 77 +++++++++++++++++++ + gcc/config/i386/i386-isa.def | 1 + + gcc/config/i386/i386-options.cc | 4 +- + gcc/config/i386/i386.md | 23 ++++++ + gcc/config/i386/i386.opt | 4 + + gcc/config/i386/predicates.md | 15 ++++ + gcc/config/i386/prfchiintrin.h | 49 ++++++++++++ + gcc/config/i386/x86gprintrin.h | 2 + + gcc/config/i386/xmmintrin.h | 7 +- + gcc/doc/extend.texi | 5 ++ + gcc/doc/invoke.texi | 7 +- + gcc/doc/sourcebuild.texi | 3 + + gcc/testsuite/g++.dg/other/i386-2.C | 2 +- + gcc/testsuite/g++.dg/other/i386-3.C | 2 +- + gcc/testsuite/gcc.target/i386/avx-1.c | 4 +- + gcc/testsuite/gcc.target/i386/funcspec-56.inc | 2 + + gcc/testsuite/gcc.target/i386/prefetchi-1.c | 40 ++++++++++ + gcc/testsuite/gcc.target/i386/prefetchi-2.c | 26 +++++++ + gcc/testsuite/gcc.target/i386/prefetchi-3.c | 20 +++++ + gcc/testsuite/gcc.target/i386/prefetchi-4.c | 19 +++++ + gcc/testsuite/gcc.target/i386/sse-13.c | 4 +- + gcc/testsuite/gcc.target/i386/sse-23.c | 4 +- + .../gcc.target/i386/x86gprintrin-1.c | 2 +- + .../gcc.target/i386/x86gprintrin-2.c | 2 +- + .../gcc.target/i386/x86gprintrin-3.c | 2 +- + .../gcc.target/i386/x86gprintrin-4.c | 2 +- + .../gcc.target/i386/x86gprintrin-5.c | 2 +- + 36 files changed, 343 insertions(+), 19 deletions(-) + create mode 100644 gcc/config/i386/prfchiintrin.h + create mode 100644 gcc/testsuite/gcc.target/i386/prefetchi-1.c + create mode 100644 gcc/testsuite/gcc.target/i386/prefetchi-2.c + create mode 100644 gcc/testsuite/gcc.target/i386/prefetchi-3.c + create mode 100644 gcc/testsuite/gcc.target/i386/prefetchi-4.c + +diff --git a/gcc/common/config/i386/cpuinfo.h b/gcc/common/config/i386/cpuinfo.h +index 5951a30aa..f17e88144 100644 +--- a/gcc/common/config/i386/cpuinfo.h ++++ b/gcc/common/config/i386/cpuinfo.h +@@ -772,6 +772,8 @@ get_available_features (struct __processor_model *cpu_model, + __cpuid_count (7, 1, eax, ebx, ecx, edx); + if (eax & bit_HRESET) + set_feature (FEATURE_HRESET); ++ if (edx & bit_PREFETCHI) ++ set_feature (FEATURE_PREFETCHI); + if (avx_usable) + { + if (eax & bit_AVXVNNI) +diff --git a/gcc/common/config/i386/i386-common.cc b/gcc/common/config/i386/i386-common.cc +index 922db33ee..c8cf532cf 100644 +--- a/gcc/common/config/i386/i386-common.cc ++++ b/gcc/common/config/i386/i386-common.cc +@@ -108,6 +108,7 @@ along with GCC; see the file COPYING3. If not see + #define OPTION_MASK_ISA2_AMX_INT8_SET OPTION_MASK_ISA2_AMX_INT8 + #define OPTION_MASK_ISA2_AMX_BF16_SET OPTION_MASK_ISA2_AMX_BF16 + #define OPTION_MASK_ISA2_AMX_FP16_SET OPTION_MASK_ISA2_AMX_FP16 ++#define OPTION_MASK_ISA2_PREFETCHI_SET OPTION_MASK_ISA2_PREFETCHI + + /* SSE4 includes both SSE4.1 and SSE4.2. -msse4 should be the same + as -msse4.2. */ +@@ -277,6 +278,7 @@ along with GCC; see the file COPYING3. If not see + (OPTION_MASK_ISA2_KL | OPTION_MASK_ISA2_WIDEKL_UNSET) + #define OPTION_MASK_ISA2_WIDEKL_UNSET OPTION_MASK_ISA2_WIDEKL + #define OPTION_MASK_ISA2_AMX_FP16_UNSET OPTION_MASK_ISA2_AMX_FP16 ++#define OPTION_MASK_ISA2_PREFETCHI_UNSET OPTION_MASK_ISA2_PREFETCHI + + /* SSE4 includes both SSE4.1 and SSE4.2. -mno-sse4 should the same + as -mno-sse4.1. */ +@@ -1140,6 +1142,19 @@ ix86_handle_option (struct gcc_options *opts, + } + return true; + ++ case OPT_mprefetchi: ++ if (value) ++ { ++ opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA2_PREFETCHI_SET; ++ opts->x_ix86_isa_flags2_explicit |= OPTION_MASK_ISA2_PREFETCHI_SET; ++ } ++ else ++ { ++ opts->x_ix86_isa_flags2 &= ~OPTION_MASK_ISA2_PREFETCHI_UNSET; ++ opts->x_ix86_isa_flags2_explicit |= OPTION_MASK_ISA2_PREFETCHI_UNSET; ++ } ++ return true; ++ + case OPT_mfma: + if (value) + { +diff --git a/gcc/common/config/i386/i386-cpuinfo.h b/gcc/common/config/i386/i386-cpuinfo.h +index 8f22897de..95b078acf 100644 +--- a/gcc/common/config/i386/i386-cpuinfo.h ++++ b/gcc/common/config/i386/i386-cpuinfo.h +@@ -241,6 +241,7 @@ enum processor_features + FEATURE_X86_64_V3, + FEATURE_X86_64_V4, + FEATURE_AMX_FP16, ++ FEATURE_PREFETCHI, + CPU_FEATURE_MAX + }; + +diff --git a/gcc/common/config/i386/i386-isas.h b/gcc/common/config/i386/i386-isas.h +index 95bab6da2..6caf06249 100644 +--- a/gcc/common/config/i386/i386-isas.h ++++ b/gcc/common/config/i386/i386-isas.h +@@ -176,4 +176,5 @@ ISA_NAMES_TABLE_START + ISA_NAMES_TABLE_ENTRY("x86-64-v3", FEATURE_X86_64_V3, P_X86_64_V3, NULL) + ISA_NAMES_TABLE_ENTRY("x86-64-v4", FEATURE_X86_64_V4, P_X86_64_V4, NULL) + ISA_NAMES_TABLE_ENTRY("amx-fp16", FEATURE_AMX_FP16, P_NONE, "-mamx-fp16") ++ ISA_NAMES_TABLE_ENTRY("prefetchi", FEATURE_PREFETCHI, P_NONE, "-mprefetchi") + ISA_NAMES_TABLE_END +diff --git a/gcc/config.gcc b/gcc/config.gcc +index e2b4a23dc..81012c651 100644 +--- a/gcc/config.gcc ++++ b/gcc/config.gcc +@@ -424,7 +424,7 @@ i3456786-*-* | x86_64-*-*) + amxbf16intrin.h x86gprintrin.h uintrintrin.h + hresetintrin.h keylockerintrin.h avxvnniintrin.h + mwaitintrin.h avx512fp16intrin.h avx512fp16vlintrin.h +- amxfp16intrin.h" ++ amxfp16intrin.h prfchiintrin.h" + ;; + ia64-*-*) + extra_headers=ia64intrin.h +diff --git a/gcc/config/i386/cpuid.h b/gcc/config/i386/cpuid.h +index d6cd8d1bf..21100149a 100644 +--- a/gcc/config/i386/cpuid.h ++++ b/gcc/config/i386/cpuid.h +@@ -50,6 +50,7 @@ + + /* %edx */ + #define bit_CMPXCHG8B (1 << 8) ++#define bit_PREFETCHI (1 << 14) + #define bit_CMOV (1 << 15) + #define bit_MMX (1 << 23) + #define bit_FXSAVE (1 << 24) +diff --git a/gcc/config/i386/i386-builtin-types.def b/gcc/config/i386/i386-builtin-types.def +index e33f06ab3..ff3b0af84 100644 +--- a/gcc/config/i386/i386-builtin-types.def ++++ b/gcc/config/i386/i386-builtin-types.def +@@ -1387,3 +1387,7 @@ DEF_FUNCTION_TYPE (V32HF, V32HF) + DEF_FUNCTION_TYPE_ALIAS (V8HF_FTYPE_V8HF, ROUND) + DEF_FUNCTION_TYPE_ALIAS (V16HF_FTYPE_V16HF, ROUND) + DEF_FUNCTION_TYPE_ALIAS (V32HF_FTYPE_V32HF, ROUND) ++ ++# PREFETCHI builtins ++DEF_FUNCTION_TYPE (VOID, PCVOID, INT) ++DEF_FUNCTION_TYPE (VOID, PCVOID, INT, INT, INT) +diff --git a/gcc/config/i386/i386-builtin.def b/gcc/config/i386/i386-builtin.def +index 2b1d6c733..d3ab21eea 100644 +--- a/gcc/config/i386/i386-builtin.def ++++ b/gcc/config/i386/i386-builtin.def +@@ -469,6 +469,10 @@ BDESC (0, OPTION_MASK_ISA2_WIDEKL, CODE_FOR_nothing, "__builtin_ia32_aesdecwide2 + BDESC (0, OPTION_MASK_ISA2_WIDEKL, CODE_FOR_nothing, "__builtin_ia32_aesencwide128kl_u8", IX86_BUILTIN_AESENCWIDE128KLU8, UNKNOWN, (int) UINT8_FTYPE_PV2DI_PCV2DI_PCVOID) + BDESC (0, OPTION_MASK_ISA2_WIDEKL, CODE_FOR_nothing, "__builtin_ia32_aesencwide256kl_u8", IX86_BUILTIN_AESENCWIDE256KLU8, UNKNOWN, (int) UINT8_FTYPE_PV2DI_PCV2DI_PCVOID) + ++/* PREFETCHI */ ++BDESC (0, 0, CODE_FOR_prefetchi, "__builtin_ia32_prefetchi", IX86_BUILTIN_PREFETCHI, UNKNOWN, (int) VOID_FTYPE_PCVOID_INT) ++BDESC (0, 0, CODE_FOR_nothing, "__builtin_ia32_prefetch", IX86_BUILTIN_PREFETCH, UNKNOWN, (int) VOID_FTYPE_PCVOID_INT_INT_INT) ++ + BDESC_END (SPECIAL_ARGS, PURE_ARGS) + + /* AVX */ +diff --git a/gcc/config/i386/i386-c.cc b/gcc/config/i386/i386-c.cc +index 4269f29e6..00880bd17 100644 +--- a/gcc/config/i386/i386-c.cc ++++ b/gcc/config/i386/i386-c.cc +@@ -635,6 +635,8 @@ ix86_target_macros_internal (HOST_WIDE_INT isa_flag, + def_or_undef (parse_in, "__AVXVNNI__"); + if (isa_flag2 & OPTION_MASK_ISA2_AMX_FP16) + def_or_undef (parse_in, "__AMX_FP16__"); ++ if (isa_flag2 & OPTION_MASK_ISA2_PREFETCHI) ++ def_or_undef (parse_in, "__PREFETCHI__"); + if (TARGET_IAMCU) + { + def_or_undef (parse_in, "__iamcu"); +diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc +index 77dda5dd4..bc2e61980 100644 +--- a/gcc/config/i386/i386-expand.cc ++++ b/gcc/config/i386/i386-expand.cc +@@ -12850,6 +12850,83 @@ ix86_expand_builtin (tree exp, rtx target, rtx subtarget, + return target; + } + ++ case IX86_BUILTIN_PREFETCH: ++ { ++ arg0 = CALL_EXPR_ARG (exp, 0); // const void * ++ arg1 = CALL_EXPR_ARG (exp, 1); // const int ++ arg2 = CALL_EXPR_ARG (exp, 2); // const int ++ arg3 = CALL_EXPR_ARG (exp, 3); // const int ++ ++ op0 = expand_normal (arg0); ++ op1 = expand_normal (arg1); ++ op2 = expand_normal (arg2); ++ op3 = expand_normal (arg3); ++ ++ if (!CONST_INT_P (op1) || !CONST_INT_P (op2) || !CONST_INT_P (op3)) ++ { ++ error ("second, third and fourth argument must be a const"); ++ return const0_rtx; ++ } ++ ++ if (INTVAL (op3) == 1) ++ { ++ if (TARGET_64BIT ++ && local_func_symbolic_operand (op0, GET_MODE (op0))) ++ emit_insn (gen_prefetchi (op0, op2)); ++ else ++ { ++ warning (0, "instruction prefetch applies when in 64-bit mode" ++ " with RIP-relative addressing and" ++ " option %<-mprefetchi%>;" ++ " they stay NOPs otherwise"); ++ emit_insn (gen_nop ()); ++ } ++ } ++ else ++ { ++ if (!address_operand (op0, VOIDmode)) ++ { ++ op0 = convert_memory_address (Pmode, op0); ++ op0 = copy_addr_to_reg (op0); ++ } ++ emit_insn (gen_prefetch (op0, op1, op2)); ++ } ++ ++ return 0; ++ } ++ ++ case IX86_BUILTIN_PREFETCHI: ++ { ++ arg0 = CALL_EXPR_ARG (exp, 0); // const void * ++ arg1 = CALL_EXPR_ARG (exp, 1); // const int ++ ++ op0 = expand_normal (arg0); ++ op1 = expand_normal (arg1); ++ ++ if (!CONST_INT_P (op1)) ++ { ++ error ("second argument must be a const"); ++ return const0_rtx; ++ } ++ ++ /* GOT/PLT_PIC should not be available for instruction prefetch. ++ It must be real instruction address. */ ++ if (TARGET_64BIT ++ && local_func_symbolic_operand (op0, GET_MODE (op0))) ++ emit_insn (gen_prefetchi (op0, op1)); ++ else ++ { ++ /* Ignore the hint. */ ++ warning (0, "instruction prefetch applies when in 64-bit mode" ++ " with RIP-relative addressing and" ++ " option %<-mprefetchi%>;" ++ " they stay NOPs otherwise"); ++ emit_insn (gen_nop ()); ++ } ++ ++ return 0; ++ } ++ + case IX86_BUILTIN_VEC_INIT_V2SI: + case IX86_BUILTIN_VEC_INIT_V4HI: + case IX86_BUILTIN_VEC_INIT_V8QI: +diff --git a/gcc/config/i386/i386-isa.def b/gcc/config/i386/i386-isa.def +index c7305c01b..744a7df85 100644 +--- a/gcc/config/i386/i386-isa.def ++++ b/gcc/config/i386/i386-isa.def +@@ -110,3 +110,4 @@ DEF_PTA(WIDEKL) + DEF_PTA(AVXVNNI) + DEF_PTA(AVX512FP16) + DEF_PTA(AMX_FP16) ++DEF_PTA(PREFETCHI) +diff --git a/gcc/config/i386/i386-options.cc b/gcc/config/i386/i386-options.cc +index 3edb7094e..724375f02 100644 +--- a/gcc/config/i386/i386-options.cc ++++ b/gcc/config/i386/i386-options.cc +@@ -231,7 +231,8 @@ static struct ix86_target_opts isa2_opts = + { "-mwidekl", OPTION_MASK_ISA2_WIDEKL }, + { "-mavxvnni", OPTION_MASK_ISA2_AVXVNNI }, + { "-mavx512fp16", OPTION_MASK_ISA2_AVX512FP16 }, +- { "-mamx-fp16", OPTION_MASK_ISA2_AMX_FP16 } ++ { "-mamx-fp16", OPTION_MASK_ISA2_AMX_FP16 }, ++ { "-mprefetchi", OPTION_MASK_ISA2_PREFETCHI } + }; + static struct ix86_target_opts isa_opts = + { +@@ -1076,6 +1077,7 @@ ix86_valid_target_attribute_inner_p (tree fndecl, tree args, char *p_strings, + IX86_ATTR_ISA ("avxvnni", OPT_mavxvnni), + IX86_ATTR_ISA ("avx512fp16", OPT_mavx512fp16), + IX86_ATTR_ISA ("amx-fp16", OPT_mamx_fp16), ++ IX86_ATTR_ISA ("prefetchi", OPT_mprefetchi), + + /* enum options */ + IX86_ATTR_ENUM ("fpmath=", OPT_mfpmath_), +diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md +index 71691f598..f08c2cfb1 100644 +--- a/gcc/config/i386/i386.md ++++ b/gcc/config/i386/i386.md +@@ -329,6 +329,9 @@ + + ;; For HRESET support + UNSPECV_HRESET ++ ++ ;; For PREFETCHI support ++ UNSPECV_PREFETCHI + ) + + ;; Constants to represent rounding modes in the ROUND instruction +@@ -22907,6 +22910,26 @@ + (symbol_ref "memory_address_length (operands0, false)")) + (set_attr "memory" "none")) + ++(define_insn "prefetchi" ++ (unspec_volatile (match_operand 0 "local_func_symbolic_operand" "p") ++ (match_operand:SI 1 "const_int_operand") ++ UNSPECV_PREFETCHI) ++ "TARGET_PREFETCHI && TARGET_64BIT" ++{ ++ static const char * const patterns2 = { ++ "prefetchit1\t%0", "prefetchit0\t%0" ++ }; ++ ++ int locality = INTVAL (operands1); ++ gcc_assert (IN_RANGE (locality, 2, 3)); ++ ++ return patternslocality - 2; ++} ++ (set_attr "type" "sse") ++ (set (attr "length_address") ++ (symbol_ref "memory_address_length (operands0, false)")) ++ (set_attr "memory" "none")) ++ + (define_expand "stack_protect_set" + (match_operand 0 "memory_operand") + (match_operand 1 "memory_operand") +diff --git a/gcc/config/i386/i386.opt b/gcc/config/i386/i386.opt +index 52c6f02ee..50cd114f6 100644 +--- a/gcc/config/i386/i386.opt ++++ b/gcc/config/i386/i386.opt +@@ -1230,3 +1230,7 @@ Enable vectorization for scatter instruction. + mamx-fp16 + Target Mask(ISA2_AMX_FP16) Var(ix86_isa_flags2) Save + Support AMX-FP16 built-in functions and code generation. ++ ++mprefetchi ++Target Mask(ISA2_PREFETCHI) Var(ix86_isa_flags2) Save ++Support PREFETCHI built-in functions and code generation. +diff --git a/gcc/config/i386/predicates.md b/gcc/config/i386/predicates.md +index ac02c61ac..774178b78 100644 +--- a/gcc/config/i386/predicates.md ++++ b/gcc/config/i386/predicates.md +@@ -610,6 +610,21 @@ + return false; + }) + ++(define_predicate "local_func_symbolic_operand" ++ (match_operand 0 "local_symbolic_operand") ++{ ++ if (GET_CODE (op) == CONST ++ && GET_CODE (XEXP (op, 0)) == PLUS ++ && CONST_INT_P (XEXP (XEXP (op, 0), 1))) ++ op = XEXP (XEXP (op, 0), 0); ++ ++ if (GET_CODE (op) == SYMBOL_REF ++ && !SYMBOL_REF_FUNCTION_P (op)) ++ return false; ++ ++ return true; ++}) ++ + ;; Test for a legitimate @GOTOFF operand. + ;; + ;; VxWorks does not impose a fixed gap between segments; the run-time +diff --git a/gcc/config/i386/prfchiintrin.h b/gcc/config/i386/prfchiintrin.h +new file mode 100644 +index 000000000..06deef488 +--- /dev/null ++++ b/gcc/config/i386/prfchiintrin.h +@@ -0,0 +1,49 @@ ++/* Copyright (C) 2022 Free Software Foundation, Inc. ++ ++ This file is part of GCC. ++ ++ GCC is free software; you can redistribute it and/or modify ++ it under the terms of the GNU General Public License as published by ++ the Free Software Foundation; either version 3, or (at your option) ++ any later version. ++ ++ GCC is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ GNU General Public License for more details. ++ ++ Under Section 7 of GPL version 3, you are granted additional ++ permissions described in the GCC Runtime Library Exception, version ++ 3.1, as published by the Free Software Foundation. ++ ++ You should have received a copy of the GNU General Public License and ++ a copy of the GCC Runtime Library Exception along with this program; ++ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see ++ <http://www.gnu.org/licenses/>. */ ++ ++#if !defined _X86GPRINTRIN_H_INCLUDED ++# error "Never use <prfchiintrin.h> directly; include <x86gprintrin.h> instead." ++#endif ++ ++#ifndef _PRFCHIINTRIN_H_INCLUDED ++#define _PRFCHIINTRIN_H_INCLUDED ++ ++#ifdef __x86_64__ ++ ++extern __inline void ++__attribute__((__gnu_inline__, __always_inline__, __artificial__)) ++_m_prefetchit0 (void* __P) ++{ ++ __builtin_ia32_prefetchi (__P, 3); ++} ++ ++extern __inline void ++__attribute__((__gnu_inline__, __always_inline__, __artificial__)) ++_m_prefetchit1 (void* __P) ++{ ++ __builtin_ia32_prefetchi (__P, 2); ++} ++ ++#endif ++ ++#endif /* _PRFCHIINTRIN_H_INCLUDED */ +diff --git a/gcc/config/i386/x86gprintrin.h b/gcc/config/i386/x86gprintrin.h +index e0be01d5e..0768aa0d7 100644 +--- a/gcc/config/i386/x86gprintrin.h ++++ b/gcc/config/i386/x86gprintrin.h +@@ -72,6 +72,8 @@ + + #include <pkuintrin.h> + ++#include <prfchiintrin.h> ++ + #include <rdseedintrin.h> + + #include <rtmintrin.h> +diff --git a/gcc/config/i386/xmmintrin.h b/gcc/config/i386/xmmintrin.h +index f1c704a2d..7fb179430 100644 +--- a/gcc/config/i386/xmmintrin.h ++++ b/gcc/config/i386/xmmintrin.h +@@ -36,6 +36,8 @@ + /* Constants for use with _mm_prefetch. */ + enum _mm_hint + { ++ _MM_HINT_IT0 = 19, ++ _MM_HINT_IT1 = 18, + /* _MM_HINT_ET is _MM_HINT_T with set 3rd bit. */ + _MM_HINT_ET0 = 7, + _MM_HINT_ET1 = 6, +@@ -51,11 +53,12 @@ enum _mm_hint + extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) + _mm_prefetch (const void *__P, enum _mm_hint __I) + { +- __builtin_prefetch (__P, (__I & 0x4) >> 2, __I & 0x3); ++ __builtin_ia32_prefetch (__P, (__I & 0x4) >> 2, ++ __I & 0x3, (__I & 0x10) >> 4); + } + #else + #define _mm_prefetch(P, I) \ +- __builtin_prefetch ((P), ((I & 0x4) >> 2), (I & 0x3)) ++ __builtin_ia32_prefetch ((P), ((I) & 0x4) >> 2, ((I) & 0x3), ((I) & 0x10) >> 4) + #endif + + #ifndef __SSE__ +diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi +index 4ba9d34cd..cb987f469 100644 +--- a/gcc/doc/extend.texi ++++ b/gcc/doc/extend.texi +@@ -7043,6 +7043,11 @@ Enable/disable the generation of the AVXVNNI instructions. + @cindex @code{target("amx-fp16")} function attribute, x86 + Enable/disable the generation of the AMX-FP16 instructions. + ++@item prefetchi ++@itemx no-prefetchi ++@cindex @code{target("prefetchi")} function attribute, x86 ++Enable/disable the generation of the PREFETCHI instructions. ++ + @item cld + @itemx no-cld + @cindex @code{target("cld")} function attribute, x86 +diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi +index d25f13217..211b970c0 100644 +--- a/gcc/doc/invoke.texi ++++ b/gcc/doc/invoke.texi +@@ -1428,7 +1428,7 @@ See RS/6000 and PowerPC Options. + -mavx5124fmaps -mavx512vnni -mavx5124vnniw -mprfchw -mrdpid @gol + -mrdseed -msgx -mavx512vp2intersect -mserialize -mtsxldtrk@gol + -mamx-tile -mamx-int8 -mamx-bf16 -muintr -mhreset -mavxvnni@gol +--mavx512fp16 -mamx-fp16 @gol ++-mavx512fp16 -mamx-fp16 -mprefetchi @gol + -mcldemote -mms-bitfields -mno-align-stringops -minline-all-stringops @gol + -minline-stringops-dynamically -mstringop-strategy=@var{alg} @gol + -mkl -mwidekl @gol +@@ -32445,6 +32445,9 @@ preferred alignment to @option{-mpreferred-stack-boundary=2}. + @need 200 + @itemx -mamx-fp16 + @opindex mamx-fp16 ++@need 200 ++@itemx -mprefetchi ++@opindex mprefetchi + These switches enable the use of instructions in the MMX, SSE, + SSE2, SSE3, SSSE3, SSE4, SSE4A, SSE4.1, SSE4.2, AVX, AVX2, AVX512F, AVX512PF, + AVX512ER, AVX512CD, AVX512VL, AVX512BW, AVX512DQ, AVX512IFMA, AVX512VBMI, SHA, +@@ -32455,7 +32458,7 @@ XSAVEOPT, XSAVEC, XSAVES, RTM, HLE, TBM, MWAITX, CLZERO, PKU, AVX512VBMI2, + GFNI, VAES, WAITPKG, VPCLMULQDQ, AVX512BITALG, MOVDIRI, MOVDIR64B, AVX512BF16, + ENQCMD, AVX512VPOPCNTDQ, AVX5124FMAPS, AVX512VNNI, AVX5124VNNIW, SERIALIZE, + UINTR, HRESET, AMXTILE, AMXINT8, AMXBF16, KL, WIDEKL, AVXVNNI, AVX512-FP16, +-AMX-FP16 or CLDEMOTE extended instruction sets. Each has a corresponding ++AMX-FP16, PREFETCHI or CLDEMOTE extended instruction sets. Each has a corresponding + @option{-mno-} option to disable use of these instructions. + + These extensions are also available as built-in functions: see +diff --git a/gcc/doc/sourcebuild.texi b/gcc/doc/sourcebuild.texi +index b64b62dee..c68e492dc 100644 +--- a/gcc/doc/sourcebuild.texi ++++ b/gcc/doc/sourcebuild.texi +@@ -2496,6 +2496,9 @@ Target does not require strict alignment. + @item pie_copyreloc + The x86-64 target linker supports PIE with copy reloc. + ++@item prefetchi ++Target supports the execution of @code{prefetchi} instructions. ++ + @item rdrand + Target supports x86 @code{rdrand} instruction. + +diff --git a/gcc/testsuite/g++.dg/other/i386-2.C b/gcc/testsuite/g++.dg/other/i386-2.C +index 57a6357aa..72ed5fed0 100644 +--- a/gcc/testsuite/g++.dg/other/i386-2.C ++++ b/gcc/testsuite/g++.dg/other/i386-2.C +@@ -1,5 +1,5 @@ + /* { dg-do compile { target i?86-*-* x86_64-*-* } } */ +-/* { dg-options "-O -pedantic-errors -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512dq -mavx512bw -mavx512vl -mavx512ifma -mavx512vbmi -mavx512vbmi2 -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16 -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavx512fp16 -mamx-fp16" } */ ++/* { dg-options "-O -pedantic-errors -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512dq -mavx512bw -mavx512vl -mavx512ifma -mavx512vbmi -mavx512vbmi2 -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16 -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavx512fp16 -mamx-fp16 -mprefetchi" } */ + + /* Test that {,x,e,p,t,s,w,a,b,i}mmintrin.h, mm3dnow.h, fma4intrin.h, + xopintrin.h, abmintrin.h, bmiintrin.h, tbmintrin.h, lwpintrin.h, +diff --git a/gcc/testsuite/g++.dg/other/i386-3.C b/gcc/testsuite/g++.dg/other/i386-3.C +index 1947547d6..9dd53653f 100644 +--- a/gcc/testsuite/g++.dg/other/i386-3.C ++++ b/gcc/testsuite/g++.dg/other/i386-3.C +@@ -1,5 +1,5 @@ + /* { dg-do compile { target i?86-*-* x86_64-*-* } } */ +-/* { dg-options "-O -fkeep-inline-functions -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512dq -mavx512bw -mavx512vl -mavx512ifma -mavx512vbmi -mavx512vbmi2 -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16 -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavx512fp16 -mamx-fp16" } */ ++/* { dg-options "-O -fkeep-inline-functions -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512dq -mavx512bw -mavx512vl -mavx512ifma -mavx512vbmi -mavx512vbmi2 -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16 -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavx512fp16 -mamx-fp16 -mprefetchi" } */ + + /* Test that {,x,e,p,t,s,w,a,b,i}mmintrin.h, mm3dnow.h, fma4intrin.h, + xopintrin.h, abmintrin.h, bmiintrin.h, tbmintrin.h, lwpintrin.h, +diff --git a/gcc/testsuite/gcc.target/i386/avx-1.c b/gcc/testsuite/gcc.target/i386/avx-1.c +index 154e7b3b1..2b46e1b87 100644 +--- a/gcc/testsuite/gcc.target/i386/avx-1.c ++++ b/gcc/testsuite/gcc.target/i386/avx-1.c +@@ -1,5 +1,5 @@ + /* { dg-do compile } */ +-/* { dg-options "-O2 -Werror-implicit-function-declaration -march=k8 -m3dnow -mavx -mavx2 -maes -mpclmul -mgfni -mavx512bw -mavx512fp16 -mavx512vl" } */ ++/* { dg-options "-O2 -Werror-implicit-function-declaration -march=k8 -m3dnow -mavx -mavx2 -maes -mpclmul -mgfni -mavx512bw -mavx512fp16 -mavx512vl -mprefetchi" } */ + /* { dg-add-options bind_pic_locally } */ + + #include <mm_malloc.h> +@@ -153,7 +153,7 @@ + #define __builtin_ia32_shufpd(A, B, N) __builtin_ia32_shufpd(A, B, 0) + + /* xmmintrin.h */ +-#define __builtin_prefetch(P, A, I) __builtin_prefetch(P, 0, _MM_HINT_NTA) ++#define __builtin_ia32_prefetch(A, B, C, D) __builtin_ia32_prefetch(A, 0, 3, 0) + #define __builtin_ia32_pshufw(A, N) __builtin_ia32_pshufw(A, 0) + #define __builtin_ia32_vec_set_v4hi(A, D, N) \ + __builtin_ia32_vec_set_v4hi(A, D, 0) +diff --git a/gcc/testsuite/gcc.target/i386/funcspec-56.inc b/gcc/testsuite/gcc.target/i386/funcspec-56.inc +index b00cfff03..9f073f78c 100644 +--- a/gcc/testsuite/gcc.target/i386/funcspec-56.inc ++++ b/gcc/testsuite/gcc.target/i386/funcspec-56.inc +@@ -81,6 +81,7 @@ extern void test_widekl (void) __attribute__((__target__("widekl"))); + extern void test_avxvnni (void) __attribute__((__target__("avxvnni"))); + extern void test_avx512fp16 (void) __attribute__((__target__("avx512fp16"))); + extern void test_amx_fp16 (void) __attribute__((__target__("amx-fp16"))); ++extern void test_prefetchi (void) __attribute__((__target__("prefetchi"))); + + extern void test_no_sgx (void) __attribute__((__target__("no-sgx"))); + extern void test_no_avx5124fmaps(void) __attribute__((__target__("no-avx5124fmaps"))); +@@ -163,6 +164,7 @@ extern void test_no_widekl (void) __attribute__((__target__("no-widekl"))); + extern void test_no_avxvnni (void) __attribute__((__target__("no-avxvnni"))); + extern void test_no_avx512fp16 (void) __attribute__((__target__("no-avx512fp16"))); + extern void test_no_amx_fp16 (void) __attribute__((__target__("no-amx-fp16"))); ++extern void test_no_prefetchi (void) __attribute__((__target__("no-prefetchi"))); + + extern void test_arch_nocona (void) __attribute__((__target__("arch=nocona"))); + extern void test_arch_core2 (void) __attribute__((__target__("arch=core2"))); +diff --git a/gcc/testsuite/gcc.target/i386/prefetchi-1.c b/gcc/testsuite/gcc.target/i386/prefetchi-1.c +new file mode 100644 +index 000000000..80f25e70e +--- /dev/null ++++ b/gcc/testsuite/gcc.target/i386/prefetchi-1.c +@@ -0,0 +1,40 @@ ++/* { dg-do compile { target { ! ia32 } } } */ ++/* { dg-options "-mprefetchi -O2" } */ ++/* { dg-final { scan-assembler-times "\ \\t\+prefetchit0\ \\t\+" 2 } } */ ++/* { dg-final { scan-assembler-times "\ \\t\+prefetchit1\ \\t\+" 2 } } */ ++ ++#include <x86intrin.h> ++ ++int ++bar (int a) ++{ ++ return a + 1; ++} ++ ++int ++foo1 (int b) ++{ ++ _mm_prefetch (bar, _MM_HINT_IT0); ++ return bar (b) + 1; ++} ++ ++int ++foo2 (int b) ++{ ++ _mm_prefetch (bar, _MM_HINT_IT1); ++ return bar (b) + 1; ++} ++ ++int ++foo3 (int b) ++{ ++ _m_prefetchit0 (bar); ++ return bar (b) + 1; ++} ++ ++int ++foo4 (int b) ++{ ++ _m_prefetchit1 (bar); ++ return bar (b) + 1; ++} +diff --git a/gcc/testsuite/gcc.target/i386/prefetchi-2.c b/gcc/testsuite/gcc.target/i386/prefetchi-2.c +new file mode 100644 +index 000000000..e05ce9c73 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/i386/prefetchi-2.c +@@ -0,0 +1,26 @@ ++/* { dg-do compile { target { ia32 } } } */ ++/* { dg-options "-mprefetchi -O2" } */ ++/* { dg-final { scan-assembler-not "\ \\t\+prefetchit0" } } */ ++/* { dg-final { scan-assembler-not "\ \\t\+prefetchit1" } } */ ++ ++#include <x86intrin.h> ++ ++int ++bar (int a) ++{ ++ return a + 1; ++} ++ ++int ++foo1 (int b) ++{ ++ __builtin_ia32_prefetch (bar, 0, 3, 1); /* { dg-warning "instruction prefetch applies when in 64-bit mode with RIP-relative addressing and option '-mprefetchi'; they stay NOPs otherwise" } */ ++ return bar (b) + 1; ++} ++ ++int ++foo2 (int b) ++{ ++ __builtin_ia32_prefetchi (bar, 2); /* { dg-warning "instruction prefetch applies when in 64-bit mode with RIP-relative addressing and option '-mprefetchi'; they stay NOPs otherwise" } */ ++ return bar (b) + 1; ++} +diff --git a/gcc/testsuite/gcc.target/i386/prefetchi-3.c b/gcc/testsuite/gcc.target/i386/prefetchi-3.c +new file mode 100644 +index 000000000..f0a4173d2 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/i386/prefetchi-3.c +@@ -0,0 +1,20 @@ ++/* { dg-do compile } */ ++/* { dg-options "-mprefetchi -O2" } */ ++/* { dg-final { scan-assembler-not "prefetchit0" } } */ ++/* { dg-final { scan-assembler-not "prefetchit1" } } */ ++ ++#include <x86intrin.h> ++ ++void* p; ++ ++void extern ++prefetchi_test1 (void) ++{ ++ __builtin_ia32_prefetchi (p, 2); /* { dg-warning "instruction prefetch applies when in 64-bit mode with RIP-relative addressing and option '-mprefetchi'; they stay NOPs otherwise" } */ ++} ++ ++void extern ++prefetchi_test2 (void) ++{ ++ __builtin_ia32_prefetch (p, 0, 3, 1); /* { dg-warning "instruction prefetch applies when in 64-bit mode with RIP-relative addressing and option '-mprefetchi'; they stay NOPs otherwise" } */ ++} +diff --git a/gcc/testsuite/gcc.target/i386/prefetchi-4.c b/gcc/testsuite/gcc.target/i386/prefetchi-4.c +new file mode 100644 +index 000000000..73ae596d1 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/i386/prefetchi-4.c +@@ -0,0 +1,19 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O0" } */ ++ ++#include <x86intrin.h> ++ ++void* p; ++ ++void extern ++prefetch_test (void) ++{ ++ __builtin_ia32_prefetch (p, 0, 3, 0); ++ __builtin_ia32_prefetch (p, 0, 2, 0); ++ __builtin_ia32_prefetch (p, 0, 1, 0); ++ __builtin_ia32_prefetch (p, 0, 0, 0); ++ __builtin_ia32_prefetch (p, 1, 3, 0); ++ __builtin_ia32_prefetch (p, 1, 2, 0); ++ __builtin_ia32_prefetch (p, 1, 1, 0); ++ __builtin_ia32_prefetch (p, 1, 0, 0); ++} +diff --git a/gcc/testsuite/gcc.target/i386/sse-13.c b/gcc/testsuite/gcc.target/i386/sse-13.c +index a1e453a98..db7c0fc7a 100644 +--- a/gcc/testsuite/gcc.target/i386/sse-13.c ++++ b/gcc/testsuite/gcc.target/i386/sse-13.c +@@ -1,5 +1,5 @@ + /* { dg-do compile } */ +-/* { dg-options "-O2 -Werror-implicit-function-declaration -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512vl -mavx512dq -mavx512bw -mavx512vbmi -mavx512vbmi2 -mavx512ifma -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mavx512vp2intersect -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16 -menqcmd -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavx512fp16 -mamx-fp16" } */ ++/* { dg-options "-O2 -Werror-implicit-function-declaration -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512vl -mavx512dq -mavx512bw -mavx512vbmi -mavx512vbmi2 -mavx512ifma -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mavx512vp2intersect -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16 -menqcmd -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavx512fp16 -mamx-fp16 -mprefetchi" } */ + /* { dg-add-options bind_pic_locally } */ + + #include <mm_malloc.h> +@@ -125,7 +125,7 @@ + #define __builtin_ia32_shufpd(A, B, N) __builtin_ia32_shufpd(A, B, 0) + + /* xmmintrin.h */ +-#define __builtin_prefetch(P, A, I) __builtin_prefetch(P, 0, _MM_HINT_NTA) ++#define __builtin_ia32_prefetch(A, B, C, D) __builtin_ia32_prefetch(A, 0, 3, 0) + #define __builtin_ia32_pshufw(A, N) __builtin_ia32_pshufw(A, 0) + #define __builtin_ia32_vec_set_v4hi(A, D, N) \ + __builtin_ia32_vec_set_v4hi(A, D, 0) +diff --git a/gcc/testsuite/gcc.target/i386/sse-23.c b/gcc/testsuite/gcc.target/i386/sse-23.c +index 151201d97..741694e87 100644 +--- a/gcc/testsuite/gcc.target/i386/sse-23.c ++++ b/gcc/testsuite/gcc.target/i386/sse-23.c +@@ -94,7 +94,7 @@ + #define __builtin_ia32_shufpd(A, B, N) __builtin_ia32_shufpd(A, B, 0) + + /* xmmintrin.h */ +-#define __builtin_prefetch(P, A, I) __builtin_prefetch(P, 0, _MM_HINT_NTA) ++#define __builtin_ia32_prefetch(A, B, C, D) __builtin_ia32_prefetch(A, 0, 3, 0) + #define __builtin_ia32_pshufw(A, N) __builtin_ia32_pshufw(A, 0) + #define __builtin_ia32_vec_set_v4hi(A, D, N) \ + __builtin_ia32_vec_set_v4hi(A, D, 0) +@@ -843,6 +843,6 @@ + #define __builtin_ia32_vpclmulqdq_v2di(A, B, C) __builtin_ia32_vpclmulqdq_v2di(A, B, 1) + #define __builtin_ia32_vpclmulqdq_v8di(A, B, C) __builtin_ia32_vpclmulqdq_v8di(A, B, 1) + +-#pragma GCC target ("sse4a,3dnow,avx,avx2,fma4,xop,aes,pclmul,popcnt,abm,lzcnt,bmi,bmi2,tbm,lwp,fsgsbase,rdrnd,f16c,fma,rtm,rdseed,prfchw,adx,fxsr,xsaveopt,avx512f,avx512er,avx512cd,avx512pf,sha,prefetchwt1,xsavec,xsaves,clflushopt,avx512bw,avx512dq,avx512vl,avx512vbmi,avx512ifma,avx5124fmaps,avx5124vnniw,avx512vpopcntdq,clwb,mwaitx,clzero,pku,sgx,rdpid,gfni,avx512vbmi2,vpclmulqdq,avx512bitalg,pconfig,wbnoinvd,avx512bf16,enqcmd,avx512vp2intersect,serialize,tsxldtrk,amx-tile,amx-int8,amx-bf16,kl,widekl,avxvnni,avx512fp16,amx-fp16") ++#pragma GCC target ("sse4a,3dnow,avx,avx2,fma4,xop,aes,pclmul,popcnt,abm,lzcnt,bmi,bmi2,tbm,lwp,fsgsbase,rdrnd,f16c,fma,rtm,rdseed,prfchw,adx,fxsr,xsaveopt,avx512f,avx512er,avx512cd,avx512pf,sha,prefetchwt1,xsavec,xsaves,clflushopt,avx512bw,avx512dq,avx512vl,avx512vbmi,avx512ifma,avx5124fmaps,avx5124vnniw,avx512vpopcntdq,clwb,mwaitx,clzero,pku,sgx,rdpid,gfni,avx512vbmi2,vpclmulqdq,avx512bitalg,pconfig,wbnoinvd,avx512bf16,enqcmd,avx512vp2intersect,serialize,tsxldtrk,amx-tile,amx-int8,amx-bf16,kl,widekl,avxvnni,avx512fp16,amx-fp16,prefetchi") + + #include <x86intrin.h> +diff --git a/gcc/testsuite/gcc.target/i386/x86gprintrin-1.c b/gcc/testsuite/gcc.target/i386/x86gprintrin-1.c +index 293be094b..efe7df13b 100644 +--- a/gcc/testsuite/gcc.target/i386/x86gprintrin-1.c ++++ b/gcc/testsuite/gcc.target/i386/x86gprintrin-1.c +@@ -1,7 +1,7 @@ + /* Test that <x86gprintrin.h> is usable with -O -std=c89 -pedantic-errors. */ + /* { dg-do compile } */ + /* { dg-options "-O -std=c89 -pedantic-errors -march=x86-64 -madx -mbmi -mbmi2 -mcldemote -mclflushopt -mclwb -mclzero -menqcmd -mfsgsbase -mfxsr -mhreset -mlzcnt -mlwp -mmovdiri -mmwaitx -mpconfig -mpopcnt -mpku -mptwrite -mrdpid -mrdrnd -mrdseed -mrtm -mserialize -msgx -mshstk -mtbm -mtsxldtrk -mwaitpkg -mwbnoinvd -mxsave -mxsavec -mxsaveopt -mxsaves -mno-sse -mno-mmx" } */ +-/* { dg-additional-options "-muintr" { target { ! ia32 } } } */ ++/* { dg-additional-options "-muintr -mprefetchi" { target { ! ia32 } } } */ + + #include <x86gprintrin.h> + +diff --git a/gcc/testsuite/gcc.target/i386/x86gprintrin-2.c b/gcc/testsuite/gcc.target/i386/x86gprintrin-2.c +index c63302757..5f6970df6 100644 +--- a/gcc/testsuite/gcc.target/i386/x86gprintrin-2.c ++++ b/gcc/testsuite/gcc.target/i386/x86gprintrin-2.c +@@ -1,7 +1,7 @@ + /* { dg-do compile } */ + /* { dg-options "-O2 -Werror-implicit-function-declaration -march=x86-64 -madx -mbmi -mbmi2 -mcldemote -mclflushopt -mclwb -mclzero -menqcmd -mfsgsbase -mfxsr -mhreset -mlzcnt -mlwp -mmovdiri -mmwaitx -mpconfig -mpopcnt -mpku -mptwrite -mrdpid -mrdrnd -mrdseed -mrtm -mserialize -msgx -mshstk -mtbm -mtsxldtrk -mwaitpkg -mwbnoinvd -mxsave -mxsavec -mxsaveopt -mxsaves -mno-sse -mno-mmx" } */ + /* { dg-add-options bind_pic_locally } */ +-/* { dg-additional-options "-muintr" { target { ! ia32 } } } */ ++/* { dg-additional-options "-muintr -mprefetchi" { target { ! ia32 } } } */ + + /* Test that the intrinsics in <x86gprintrin.h> compile with optimization. + All of them are defined as inline functions that reference the proper +diff --git a/gcc/testsuite/gcc.target/i386/x86gprintrin-3.c b/gcc/testsuite/gcc.target/i386/x86gprintrin-3.c +index 3a7e1f4a1..5c075c375 100644 +--- a/gcc/testsuite/gcc.target/i386/x86gprintrin-3.c ++++ b/gcc/testsuite/gcc.target/i386/x86gprintrin-3.c +@@ -1,7 +1,7 @@ + /* { dg-do compile } */ + /* { dg-options "-O0 -Werror-implicit-function-declaration -march=x86-64 -madx -mbmi -mbmi2 -mcldemote -mclflushopt -mclwb -mclzero -menqcmd -mfsgsbase -mfxsr -mhreset -mlzcnt -mlwp -mmovdiri -mmwaitx -mpconfig -mpopcnt -mpku -mptwrite -mrdpid -mrdrnd -mrdseed -mrtm -mserialize -msgx -mshstk -mtbm -mtsxldtrk -mwaitpkg -mwbnoinvd -mxsave -mxsavec -mxsaveopt -mxsaves -mno-sse -mno-mmx" } */ + /* { dg-add-options bind_pic_locally } */ +-/* { dg-additional-options "-muintr" { target { ! ia32 } } } */ ++/* { dg-additional-options "-muintr -mprefetchi" { target { ! ia32 } } } */ + + /* Test that the intrinsics in <x86gprintrin.h> compile without optimization. + All of them are defined as inline functions that reference the proper +diff --git a/gcc/testsuite/gcc.target/i386/x86gprintrin-4.c b/gcc/testsuite/gcc.target/i386/x86gprintrin-4.c +index d8a6126e5..bda4ecea3 100644 +--- a/gcc/testsuite/gcc.target/i386/x86gprintrin-4.c ++++ b/gcc/testsuite/gcc.target/i386/x86gprintrin-4.c +@@ -15,7 +15,7 @@ + + #ifndef DIFFERENT_PRAGMAS + #ifdef __x86_64__ +-#pragma GCC target ("adx,bmi,bmi2,fsgsbase,fxsr,hreset,lwp,lzcnt,popcnt,rdrnd,rdseed,tbm,rtm,serialize,tsxldtrk,uintr,xsaveopt") ++#pragma GCC target ("adx,bmi,bmi2,fsgsbase,fxsr,hreset,lwp,lzcnt,popcnt,prefetchi,rdrnd,rdseed,tbm,rtm,serialize,tsxldtrk,uintr,xsaveopt") + #else + #pragma GCC target ("adx,bmi,bmi2,fsgsbase,fxsr,hreset,lwp,lzcnt,popcnt,rdrnd,rdseed,tbm,rtm,serialize,tsxldtrk,xsaveopt") + #endif +diff --git a/gcc/testsuite/gcc.target/i386/x86gprintrin-5.c b/gcc/testsuite/gcc.target/i386/x86gprintrin-5.c +index 9ef66fdad..4aadfd0b3 100644 +--- a/gcc/testsuite/gcc.target/i386/x86gprintrin-5.c ++++ b/gcc/testsuite/gcc.target/i386/x86gprintrin-5.c +@@ -28,7 +28,7 @@ + #define __builtin_ia32_xabort(M) __builtin_ia32_xabort(1) + + #ifdef __x86_64__ +-#pragma GCC target ("adx,bmi,bmi2,clflushopt,clwb,clzero,enqcmd,fsgsbase,fxsr,hreset,lwp,lzcnt,mwaitx,pconfig,pku,popcnt,rdpid,rdrnd,rdseed,tbm,rtm,serialize,sgx,tsxldtrk,uintr,xsavec,xsaveopt,xsaves,wbnoinvd") ++#pragma GCC target ("adx,bmi,bmi2,clflushopt,clwb,clzero,enqcmd,fsgsbase,fxsr,hreset,lwp,lzcnt,mwaitx,pconfig,pku,popcnt,prefetchi,rdpid,rdrnd,rdseed,tbm,rtm,serialize,sgx,tsxldtrk,uintr,xsavec,xsaveopt,xsaves,wbnoinvd") + #else + #pragma GCC target ("adx,bmi,bmi2,clflushopt,clwb,clzero,enqcmd,fsgsbase,fxsr,hreset,lwp,lzcnt,mwaitx,pconfig,pku,popcnt,rdpid,rdrnd,rdseed,tbm,rtm,serialize,sgx,tsxldtrk,xsavec,xsaveopt,xsaves,wbnoinvd") + #endif +-- +2.31.1 +
View file
_service:tar_scm:0274-Initial-Granite-Rapids-Support.patch
Added
@@ -0,0 +1,277 @@ +From 7f0f8b585cf60b4c09bca42b5339995c2cc74633 Mon Sep 17 00:00:00 2001 +From: Haochen Jiang <haochen.jiang@intel.com> +Date: Mon, 7 Nov 2022 11:04:57 +0800 +Subject: PATCH 20/28 Initial Granite Rapids Support + +gcc/ChangeLog: + + * common/config/i386/cpuinfo.h + (get_intel_cpu): Handle Granite Rapids. + * common/config/i386/i386-common.cc: + (processor_names): Add graniterapids. + (processor_alias_table): Ditto. + * common/config/i386/i386-cpuinfo.h + (enum processor_subtypes): Add INTEL_GRANTIERAPIDS. + * config.gcc: Add -march=graniterapids. + * config/i386/driver-i386.cc (host_detect_local_cpu): + Handle graniterapids. + * config/i386/i386-c.cc (ix86_target_macros_internal): + Ditto. + * config/i386/i386-options.cc (m_GRANITERAPIDS): New. + (processor_cost_table): Add graniterapids. + * config/i386/i386.h (enum processor_type): + Add PROCESSOR_GRANITERAPIDS. + (PTA_GRANITERAPIDS): Ditto. + * doc/extend.texi: Add graniterapids. + * doc/invoke.texi: Ditto. + +gcc/testsuite/ChangeLog: + + * g++.target/i386/mv16.C: Add graniterapids. + * gcc.target/i386/funcspec-56.inc: Handle new march. + +(cherry picked from commit 339ffc5a792dd66647392a235f2f7f6344c5359e) +--- + gcc/common/config/i386/cpuinfo.h | 9 +++++++++ + gcc/common/config/i386/i386-common.cc | 3 +++ + gcc/common/config/i386/i386-cpuinfo.h | 1 + + gcc/config.gcc | 2 +- + gcc/config/i386/driver-i386.cc | 5 ++++- + gcc/config/i386/i386-c.cc | 7 +++++++ + gcc/config/i386/i386-options.cc | 4 +++- + gcc/config/i386/i386.h | 3 +++ + gcc/doc/extend.texi | 3 +++ + gcc/doc/invoke.texi | 11 +++++++++++ + gcc/testsuite/g++.target/i386/mv16.C | 6 ++++++ + gcc/testsuite/gcc.target/i386/funcspec-56.inc | 1 + + 12 files changed, 52 insertions(+), 3 deletions(-) + +diff --git a/gcc/common/config/i386/cpuinfo.h b/gcc/common/config/i386/cpuinfo.h +index f17e88144..1f75ff1ca 100644 +--- a/gcc/common/config/i386/cpuinfo.h ++++ b/gcc/common/config/i386/cpuinfo.h +@@ -528,6 +528,15 @@ get_intel_cpu (struct __processor_model *cpu_model, + cpu_model->__cpu_type = INTEL_COREI7; + cpu_model->__cpu_subtype = INTEL_COREI7_SAPPHIRERAPIDS; + break; ++ case 0xad: ++ case 0xae: ++ /* Granite Rapids. */ ++ cpu = "graniterapids"; ++ CHECK___builtin_cpu_is ("corei7"); ++ CHECK___builtin_cpu_is ("graniterapids"); ++ cpu_model->__cpu_type = INTEL_COREI7; ++ cpu_model->__cpu_subtype = INTEL_COREI7_GRANITERAPIDS; ++ break; + case 0x17: + case 0x1d: + /* Penryn. */ +diff --git a/gcc/common/config/i386/i386-common.cc b/gcc/common/config/i386/i386-common.cc +index c8cf532cf..1aa163463 100644 +--- a/gcc/common/config/i386/i386-common.cc ++++ b/gcc/common/config/i386/i386-common.cc +@@ -1855,6 +1855,7 @@ const char *const processor_names = + "sapphirerapids", + "alderlake", + "rocketlake", ++ "graniterapids", + "intel", + "geode", + "k6", +@@ -1973,6 +1974,8 @@ const pta processor_alias_table = + M_CPU_SUBTYPE (INTEL_COREI7_ALDERLAKE), P_PROC_AVX2}, + {"meteorlake", PROCESSOR_ALDERLAKE, CPU_HASWELL, PTA_ALDERLAKE, + M_CPU_SUBTYPE (INTEL_COREI7_ALDERLAKE), P_PROC_AVX2}, ++ {"graniterapids", PROCESSOR_GRANITERAPIDS, CPU_HASWELL, PTA_GRANITERAPIDS, ++ M_CPU_SUBTYPE (INTEL_COREI7_GRANITERAPIDS), P_PROC_AVX512F}, + {"bonnell", PROCESSOR_BONNELL, CPU_ATOM, PTA_BONNELL, + M_CPU_TYPE (INTEL_BONNELL), P_PROC_SSSE3}, + {"atom", PROCESSOR_BONNELL, CPU_ATOM, PTA_BONNELL, +diff --git a/gcc/common/config/i386/i386-cpuinfo.h b/gcc/common/config/i386/i386-cpuinfo.h +index 95b078acf..7b2d4d242 100644 +--- a/gcc/common/config/i386/i386-cpuinfo.h ++++ b/gcc/common/config/i386/i386-cpuinfo.h +@@ -92,6 +92,7 @@ enum processor_subtypes + AMDFAM19H_ZNVER3, + INTEL_COREI7_ROCKETLAKE, + AMDFAM19H_ZNVER4, ++ INTEL_COREI7_GRANITERAPIDS, + CPU_SUBTYPE_MAX + }; + +diff --git a/gcc/config.gcc b/gcc/config.gcc +index 81012c651..9bad238e3 100644 +--- a/gcc/config.gcc ++++ b/gcc/config.gcc +@@ -670,7 +670,7 @@ slm nehalem westmere sandybridge ivybridge haswell broadwell bonnell \ + silvermont knl knm skylake-avx512 cannonlake icelake-client icelake-server \ + skylake goldmont goldmont-plus tremont cascadelake tigerlake cooperlake \ + sapphirerapids alderlake rocketlake eden-x2 nano nano-1000 nano-2000 nano-3000 \ +-nano-x2 eden-x4 nano-x4 x86-64 x86-64-v2 x86-64-v3 x86-64-v4 native" ++nano-x2 eden-x4 nano-x4 x86-64 x86-64-v2 x86-64-v3 x86-64-v4 graniterapids native" + + # Additional x86 processors supported by --with-cpu=. Each processor + # MUST be separated by exactly one space. +diff --git a/gcc/config/i386/driver-i386.cc b/gcc/config/i386/driver-i386.cc +index 3b5161aed..ea8c3d8d1 100644 +--- a/gcc/config/i386/driver-i386.cc ++++ b/gcc/config/i386/driver-i386.cc +@@ -576,8 +576,11 @@ const char *host_detect_local_cpu (int argc, const char **argv) + /* This is unknown family 0x6 CPU. */ + if (has_feature (FEATURE_AVX)) + { ++ /* Assume Granite Rapids. */ ++ if (has_feature (FEATURE_AMX_FP16)) ++ cpu = "graniterapids"; + /* Assume Tiger Lake */ +- if (has_feature (FEATURE_AVX512VP2INTERSECT)) ++ else if (has_feature (FEATURE_AVX512VP2INTERSECT)) + cpu = "tigerlake"; + /* Assume Sapphire Rapids. */ + else if (has_feature (FEATURE_TSXLDTRK)) +diff --git a/gcc/config/i386/i386-c.cc b/gcc/config/i386/i386-c.cc +index 00880bd17..04f1dd682 100644 +--- a/gcc/config/i386/i386-c.cc ++++ b/gcc/config/i386/i386-c.cc +@@ -242,6 +242,10 @@ ix86_target_macros_internal (HOST_WIDE_INT isa_flag, + def_or_undef (parse_in, "__sapphirerapids"); + def_or_undef (parse_in, "__sapphirerapids__"); + break; ++ case PROCESSOR_GRANITERAPIDS: ++ def_or_undef (parse_in, "__graniterapids"); ++ def_or_undef (parse_in, "__graniterapids__"); ++ break; + case PROCESSOR_ALDERLAKE: + def_or_undef (parse_in, "__alderlake"); + def_or_undef (parse_in, "__alderlake__"); +@@ -419,6 +423,9 @@ ix86_target_macros_internal (HOST_WIDE_INT isa_flag, + case PROCESSOR_ROCKETLAKE: + def_or_undef (parse_in, "__tune_rocketlake__"); + break; ++ case PROCESSOR_GRANITERAPIDS: ++ def_or_undef (parse_in, "__tune_graniterapids__"); ++ break; + case PROCESSOR_INTEL: + case PROCESSOR_GENERIC: + break; +diff --git a/gcc/config/i386/i386-options.cc b/gcc/config/i386/i386-options.cc +index 724375f02..6645e3259 100644 +--- a/gcc/config/i386/i386-options.cc ++++ b/gcc/config/i386/i386-options.cc +@@ -127,10 +127,11 @@ along with GCC; see the file COPYING3. If not see + #define m_SAPPHIRERAPIDS (HOST_WIDE_INT_1U<<PROCESSOR_SAPPHIRERAPIDS) + #define m_ALDERLAKE (HOST_WIDE_INT_1U<<PROCESSOR_ALDERLAKE) + #define m_ROCKETLAKE (HOST_WIDE_INT_1U<<PROCESSOR_ROCKETLAKE) ++#define m_GRANITERAPIDS (HOST_WIDE_INT_1U<<PROCESSOR_GRANITERAPIDS) + #define m_CORE_AVX512 (m_SKYLAKE_AVX512 | m_CANNONLAKE \ + | m_ICELAKE_CLIENT | m_ICELAKE_SERVER | m_CASCADELAKE \ + | m_TIGERLAKE | m_COOPERLAKE | m_SAPPHIRERAPIDS \ +- | m_ROCKETLAKE) ++ | m_ROCKETLAKE | m_GRANITERAPIDS) + #define m_CORE_AVX2 (m_HASWELL | m_SKYLAKE | m_CORE_AVX512) + #define m_CORE_ALL (m_CORE2 | m_NEHALEM | m_SANDYBRIDGE | m_CORE_AVX2) + #define m_GOLDMONT (HOST_WIDE_INT_1U<<PROCESSOR_GOLDMONT) +@@ -761,6 +762,7 @@ static const struct processor_costs *processor_cost_table = + &icelake_cost, + &alderlake_cost, + &icelake_cost, ++ &icelake_cost, + &intel_cost, + &geode_cost, + &k6_cost, +diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h +index aaa136ba0..75953defc 100644 +--- a/gcc/config/i386/i386.h ++++ b/gcc/config/i386/i386.h +@@ -2250,6 +2250,7 @@ enum processor_type + PROCESSOR_SAPPHIRERAPIDS, + PROCESSOR_ALDERLAKE, + PROCESSOR_ROCKETLAKE, ++ PROCESSOR_GRANITERAPIDS, + PROCESSOR_INTEL, + PROCESSOR_GEODE, + PROCESSOR_K6, +@@ -2356,6 +2357,8 @@ constexpr wide_int_bitmask PTA_ALDERLAKE = PTA_TREMONT | PTA_ADX | PTA_AVX + | PTA_AVX2 | PTA_BMI | PTA_BMI2 | PTA_F16C | PTA_FMA | PTA_LZCNT + | PTA_PCONFIG | PTA_PKU | PTA_VAES | PTA_VPCLMULQDQ | PTA_SERIALIZE + | PTA_HRESET | PTA_KL | PTA_WIDEKL | PTA_AVXVNNI; ++constexpr wide_int_bitmask PTA_GRANITERAPIDS = PTA_SAPPHIRERAPIDS | PTA_AMX_FP16 ++ | PTA_PREFETCHI; + constexpr wide_int_bitmask PTA_KNM = PTA_KNL | PTA_AVX5124VNNIW + | PTA_AVX5124FMAPS | PTA_AVX512VPOPCNTDQ; + constexpr wide_int_bitmask PTA_ZNVER1 = PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 +diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi +index cb987f469..ba9faf4b2 100644 +--- a/gcc/doc/extend.texi ++++ b/gcc/doc/extend.texi +@@ -21829,6 +21829,9 @@ Intel Core i7 Alderlake CPU. + @item rocketlake + Intel Core i7 Rocketlake CPU. + ++@item graniterapids ++Intel Core i7 graniterapids CPU. ++ + @item bonnell + Intel Atom Bonnell CPU. + +diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi +index 211b970c0..8ca831dc1 100644 +--- a/gcc/doc/invoke.texi ++++ b/gcc/doc/invoke.texi +@@ -31615,6 +31615,17 @@ CLFLUSHOPT, XSAVEC, XSAVES, AVX512F, AVX512VL, AVX512BW, AVX512DQ, AVX512CD + PKU, AVX512VBMI, AVX512IFMA, SHA, AVX512VNNI, GFNI, VAES, AVX512VBMI2, + VPCLMULQDQ, AVX512BITALG, RDPID and AVX512VPOPCNTDQ instruction set support. + ++@item graniterapids ++Intel graniterapids CPU with 64-bit extensions, MOVBE, MMX, SSE, SSE2, SSE3, ++SSSE3, SSE4.1, SSE4.2, POPCNT, CX16, SAHF, FXSR, AVX, XSAVE, PCLMUL, FSGSBASE, ++RDRND, F16C, AVX2, BMI, BMI2, LZCNT, FMA, MOVBE, HLE, RDSEED, ADCX, PREFETCHW, ++AES, CLFLUSHOPT, XSAVEC, XSAVES, SGX, AVX512F, AVX512VL, AVX512BW, AVX512DQ, ++AVX512CD, PKU, AVX512VBMI, AVX512IFMA, SHA, AVX512VNNI, GFNI, VAES, AVX512VBMI2, ++VPCLMULQDQ, AVX512BITALG, RDPID, AVX512VPOPCNTDQ, PCONFIG, WBNOINVD, CLWB, ++MOVDIRI, MOVDIR64B, AVX512VP2INTERSECT, ENQCMD, CLDEMOTE, PTWRITE, WAITPKG, ++SERIALIZE, TSXLDTRK, UINTR, AMX-BF16, AMX-TILE, AMX-INT8, AVX-VNNI, AVX512FP16, ++AVX512BF16, AMX-FP16 and PREFETCHI instruction set support. ++ + @item k6 + AMD K6 CPU with MMX instruction set support. + +diff --git a/gcc/testsuite/g++.target/i386/mv16.C b/gcc/testsuite/g++.target/i386/mv16.C +index 683928729..65cc24f32 100644 +--- a/gcc/testsuite/g++.target/i386/mv16.C ++++ b/gcc/testsuite/g++.target/i386/mv16.C +@@ -92,6 +92,10 @@ int __attribute__ ((target("arch=rocketlake"))) foo () { + return 24; + } + ++int __attribute__ ((target("arch=graniterapids"))) foo () { ++ return 26; ++} ++ + int main () + { + int val = foo (); +@@ -130,6 +134,8 @@ int main () + assert (val == 23); + else if (__builtin_cpu_is ("rocketlake")) + assert (val == 24); ++ else if (__builtin_cpu_is ("graniterapids")) ++ assert (val == 25); + else + assert (val == 0); + +diff --git a/gcc/testsuite/gcc.target/i386/funcspec-56.inc b/gcc/testsuite/gcc.target/i386/funcspec-56.inc +index 9f073f78c..bdcfdbc88 100644 +--- a/gcc/testsuite/gcc.target/i386/funcspec-56.inc ++++ b/gcc/testsuite/gcc.target/i386/funcspec-56.inc +@@ -188,6 +188,7 @@ extern void test_arch_cooperlake (void) __attribute__((__target__("arch= + extern void test_arch_sapphirerapids (void) __attribute__((__target__("arch=sapphirerapids"))); + extern void test_arch_alderlake (void) __attribute__((__target__("arch=alderlake"))); + extern void test_arch_rocketlake (void) __attribute__((__target__("arch=rocketlake"))); ++extern void test_arch_graniterapids (void) __attribute__((__target__("arch=graniterapids"))); + extern void test_arch_k8 (void) __attribute__((__target__("arch=k8"))); + extern void test_arch_k8_sse3 (void) __attribute__((__target__("arch=k8-sse3"))); + extern void test_arch_opteron (void) __attribute__((__target__("arch=opteron"))); +-- +2.31.1 +
View file
_service:tar_scm:0275-Support-Intel-AMX-COMPLEX.patch
Added
@@ -0,0 +1,722 @@ +From 4f1aff10d93cabe8dfbaf076b6d826a142efb6e1 Mon Sep 17 00:00:00 2001 +From: Haochen Jiang <haochen.jiang@intel.com> +Date: Wed, 31 May 2023 10:45:00 +0800 +Subject: PATCH 21/28 Support Intel AMX-COMPLEX + +gcc/ChangeLog: + + * common/config/i386/cpuinfo.h (get_available_features): + Detect AMX-COMPLEX. + * common/config/i386/i386-common.cc + (OPTION_MASK_ISA2_AMX_COMPLEX_SET, + OPTION_MASK_ISA2_AMX_COMPLEX_UNSET): New. + (ix86_handle_option): Handle -mamx-complex. + * common/config/i386/i386-cpuinfo.h (enum processor_features): + Add FEATURE_AMX_COMPLEX. + * common/config/i386/i386-isas.h: Add ISA_NAME_TABLE_ENTRY for + amx-complex. + * config.gcc: Add amxcomplexintrin.h. + * config/i386/cpuid.h (bit_AMX_COMPLEX): New. + * config/i386/i386-c.cc (ix86_target_macros_internal): Define + __AMX_COMPLEX__. + * config/i386/i386-isa.def (AMX_COMPLEX): Add DEF_PTA(AMX_COMPLEX). + * config/i386/i386-options.cc (ix86_valid_target_attribute_inner_p): + Handle amx-complex. + * config/i386/i386.opt: Add option -mamx-complex. + * config/i386/immintrin.h: Include amxcomplexintrin.h. + * doc/extend.texi: Document amx-complex. + * doc/invoke.texi: Document -mamx-complex. + * doc/sourcebuild.texi: Document target amx-complex. + * config/i386/amxcomplexintrin.h: New file. + +gcc/testsuite/ChangeLog: + + * g++.dg/other/i386-2.C: Add -mamx-complex. + * g++.dg/other/i386-3.C: Ditto. + * gcc.target/i386/amx-check.h: Add cpu check for AMX-COMPLEX. + * gcc.target/i386/amx-helper.h: Add amx-complex support. + * gcc.target/i386/funcspec-56.inc: Add new target attribute. + * gcc.target/i386/sse-12.c: Add -mamx-complex. + * gcc.target/i386/sse-13.c: Ditto. + * gcc.target/i386/sse-14.c: Ditto. + * gcc.target/i386/sse-22.c: Add amx-complex. + * gcc.target/i386/sse-23.c: Ditto. + * lib/target-supports.exp (check_effective_target_amx_complex): New. + * gcc.target/i386/amxcomplex-asmatt-1.c: New test. + * gcc.target/i386/amxcomplex-asmintel-1.c: Ditto. + * gcc.target/i386/amxcomplex-cmmimfp16ps-2.c: Ditto. + * gcc.target/i386/amxcomplex-cmmrlfp16ps-2.c: Ditto. +--- + gcc/common/config/i386/cpuinfo.h | 2 + + gcc/common/config/i386/i386-common.cc | 19 +++++- + gcc/common/config/i386/i386-cpuinfo.h | 1 + + gcc/common/config/i386/i386-isas.h | 2 + + gcc/config.gcc | 2 +- + gcc/config/i386/amxcomplexintrin.h | 59 +++++++++++++++++++ + gcc/config/i386/cpuid.h | 1 + + gcc/config/i386/i386-c.cc | 2 + + gcc/config/i386/i386-isa.def | 1 + + gcc/config/i386/i386-options.cc | 4 +- + gcc/config/i386/i386.opt | 4 ++ + gcc/config/i386/immintrin.h | 2 + + gcc/doc/extend.texi | 5 ++ + gcc/doc/invoke.texi | 7 ++- + gcc/doc/sourcebuild.texi | 3 + + gcc/testsuite/g++.dg/other/i386-2.C | 2 +- + gcc/testsuite/g++.dg/other/i386-3.C | 2 +- + gcc/testsuite/gcc.target/i386/amx-check.h | 3 + + gcc/testsuite/gcc.target/i386/amx-helper.h | 4 +- + .../gcc.target/i386/amxcomplex-asmatt-1.c | 15 +++++ + .../gcc.target/i386/amxcomplex-asmintel-1.c | 12 ++++ + .../i386/amxcomplex-cmmimfp16ps-2.c | 53 +++++++++++++++++ + .../i386/amxcomplex-cmmrlfp16ps-2.c | 53 +++++++++++++++++ + gcc/testsuite/gcc.target/i386/funcspec-56.inc | 2 + + gcc/testsuite/gcc.target/i386/sse-12.c | 2 +- + gcc/testsuite/gcc.target/i386/sse-13.c | 2 +- + gcc/testsuite/gcc.target/i386/sse-14.c | 2 +- + gcc/testsuite/gcc.target/i386/sse-22.c | 4 +- + gcc/testsuite/gcc.target/i386/sse-23.c | 2 +- + gcc/testsuite/lib/target-supports.exp | 11 ++++ + 30 files changed, 268 insertions(+), 15 deletions(-) + create mode 100644 gcc/config/i386/amxcomplexintrin.h + create mode 100644 gcc/testsuite/gcc.target/i386/amxcomplex-asmatt-1.c + create mode 100644 gcc/testsuite/gcc.target/i386/amxcomplex-asmintel-1.c + create mode 100644 gcc/testsuite/gcc.target/i386/amxcomplex-cmmimfp16ps-2.c + create mode 100644 gcc/testsuite/gcc.target/i386/amxcomplex-cmmrlfp16ps-2.c + +diff --git a/gcc/common/config/i386/cpuinfo.h b/gcc/common/config/i386/cpuinfo.h +index 1f75ff1ca..39d3351db 100644 +--- a/gcc/common/config/i386/cpuinfo.h ++++ b/gcc/common/config/i386/cpuinfo.h +@@ -798,6 +798,8 @@ get_available_features (struct __processor_model *cpu_model, + { + if (eax & bit_AMX_FP16) + set_feature (FEATURE_AMX_FP16); ++ if (edx & bit_AMX_COMPLEX) ++ set_feature (FEATURE_AMX_COMPLEX); + } + } + +diff --git a/gcc/common/config/i386/i386-common.cc b/gcc/common/config/i386/i386-common.cc +index 1aa163463..87e8afe9b 100644 +--- a/gcc/common/config/i386/i386-common.cc ++++ b/gcc/common/config/i386/i386-common.cc +@@ -109,6 +109,8 @@ along with GCC; see the file COPYING3. If not see + #define OPTION_MASK_ISA2_AMX_BF16_SET OPTION_MASK_ISA2_AMX_BF16 + #define OPTION_MASK_ISA2_AMX_FP16_SET OPTION_MASK_ISA2_AMX_FP16 + #define OPTION_MASK_ISA2_PREFETCHI_SET OPTION_MASK_ISA2_PREFETCHI ++#define OPTION_MASK_ISA2_AMX_COMPLEX_SET \ ++ (OPTION_MASK_ISA2_AMX_TILE | OPTION_MASK_ISA2_AMX_COMPLEX) + + /* SSE4 includes both SSE4.1 and SSE4.2. -msse4 should be the same + as -msse4.2. */ +@@ -269,7 +271,8 @@ along with GCC; see the file COPYING3. If not see + #define OPTION_MASK_ISA2_SERIALIZE_UNSET OPTION_MASK_ISA2_SERIALIZE + #define OPTION_MASK_ISA2_AVX512VP2INTERSECT_UNSET OPTION_MASK_ISA2_AVX512VP2INTERSECT + #define OPTION_MASK_ISA2_TSXLDTRK_UNSET OPTION_MASK_ISA2_TSXLDTRK +-#define OPTION_MASK_ISA2_AMX_TILE_UNSET OPTION_MASK_ISA2_AMX_TILE ++#define OPTION_MASK_ISA2_AMX_TILE_UNSET \ ++ (OPTION_MASK_ISA2_AMX_TILE | OPTION_MASK_ISA2_AMX_COMPLEX_UNSET) + #define OPTION_MASK_ISA2_AMX_INT8_UNSET OPTION_MASK_ISA2_AMX_INT8 + #define OPTION_MASK_ISA2_AMX_BF16_UNSET OPTION_MASK_ISA2_AMX_BF16 + #define OPTION_MASK_ISA2_UINTR_UNSET OPTION_MASK_ISA2_UINTR +@@ -279,6 +282,7 @@ along with GCC; see the file COPYING3. If not see + #define OPTION_MASK_ISA2_WIDEKL_UNSET OPTION_MASK_ISA2_WIDEKL + #define OPTION_MASK_ISA2_AMX_FP16_UNSET OPTION_MASK_ISA2_AMX_FP16 + #define OPTION_MASK_ISA2_PREFETCHI_UNSET OPTION_MASK_ISA2_PREFETCHI ++#define OPTION_MASK_ISA2_AMX_COMPLEX_UNSET OPTION_MASK_ISA2_AMX_COMPLEX + + /* SSE4 includes both SSE4.1 and SSE4.2. -mno-sse4 should the same + as -mno-sse4.1. */ +@@ -1155,6 +1159,19 @@ ix86_handle_option (struct gcc_options *opts, + } + return true; + ++ case OPT_mamx_complex: ++ if (value) ++ { ++ opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA2_AMX_COMPLEX_SET; ++ opts->x_ix86_isa_flags2_explicit |= OPTION_MASK_ISA2_AMX_COMPLEX_SET; ++ } ++ else ++ { ++ opts->x_ix86_isa_flags2 &= ~OPTION_MASK_ISA2_AMX_COMPLEX_UNSET; ++ opts->x_ix86_isa_flags2_explicit |= OPTION_MASK_ISA2_AMX_COMPLEX_UNSET; ++ } ++ return true; ++ + case OPT_mfma: + if (value) + { +diff --git a/gcc/common/config/i386/i386-cpuinfo.h b/gcc/common/config/i386/i386-cpuinfo.h +index 7b2d4d242..56020faac 100644 +--- a/gcc/common/config/i386/i386-cpuinfo.h ++++ b/gcc/common/config/i386/i386-cpuinfo.h +@@ -243,6 +243,7 @@ enum processor_features + FEATURE_X86_64_V4, + FEATURE_AMX_FP16, + FEATURE_PREFETCHI, ++ FEATURE_AMX_COMPLEX, + CPU_FEATURE_MAX + }; + +diff --git a/gcc/common/config/i386/i386-isas.h b/gcc/common/config/i386/i386-isas.h +index 6caf06249..cbef68479 100644 +--- a/gcc/common/config/i386/i386-isas.h ++++ b/gcc/common/config/i386/i386-isas.h +@@ -177,4 +177,6 @@ ISA_NAMES_TABLE_START + ISA_NAMES_TABLE_ENTRY("x86-64-v4", FEATURE_X86_64_V4, P_X86_64_V4, NULL) + ISA_NAMES_TABLE_ENTRY("amx-fp16", FEATURE_AMX_FP16, P_NONE, "-mamx-fp16") + ISA_NAMES_TABLE_ENTRY("prefetchi", FEATURE_PREFETCHI, P_NONE, "-mprefetchi") ++ ISA_NAMES_TABLE_ENTRY("amx-complex", FEATURE_AMX_COMPLEX, ++ P_NONE, "-mamx-complex") + ISA_NAMES_TABLE_END +diff --git a/gcc/config.gcc b/gcc/config.gcc +index 9bad238e3..ca5c8f8a0 100644 +--- a/gcc/config.gcc ++++ b/gcc/config.gcc +@@ -424,7 +424,7 @@ i3456786-*-* | x86_64-*-*) + amxbf16intrin.h x86gprintrin.h uintrintrin.h + hresetintrin.h keylockerintrin.h avxvnniintrin.h + mwaitintrin.h avx512fp16intrin.h avx512fp16vlintrin.h +- amxfp16intrin.h prfchiintrin.h" ++ amxfp16intrin.h prfchiintrin.h amxcomplexintrin.h" + ;; + ia64-*-*) + extra_headers=ia64intrin.h +diff --git a/gcc/config/i386/amxcomplexintrin.h b/gcc/config/i386/amxcomplexintrin.h +new file mode 100644 +index 000000000..6ea1eca04 +--- /dev/null ++++ b/gcc/config/i386/amxcomplexintrin.h +@@ -0,0 +1,59 @@ ++/* Copyright (C) 2023 Free Software Foundation, Inc. ++ ++ This file is part of GCC. ++ ++ GCC is free software; you can redistribute it and/or modify ++ it under the terms of the GNU General Public License as published by ++ the Free Software Foundation; either version 3, or (at your option) ++ any later version. ++ ++ GCC is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ GNU General Public License for more details. ++ ++ Under Section 7 of GPL version 3, you are granted additional ++ permissions described in the GCC Runtime Library Exception, version ++ 3.1, as published by the Free Software Foundation. ++ ++ You should have received a copy of the GNU General Public License and ++ a copy of the GCC Runtime Library Exception along with this program; ++ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see ++ <http://www.gnu.org/licenses/>. */ ++ ++#if !defined _IMMINTRIN_H_INCLUDED ++#error "Never use <amxcomplexintrin.h> directly; include <immintrin.h> instead." ++#endif ++ ++#ifndef _AMXCOMPLEXINTRIN_H_INCLUDED ++#define _AMXCOMPLEXINTRIN_H_INCLUDED ++ ++#if !defined(__AMX_COMPLEX__) ++#pragma GCC push_options ++#pragma GCC target("amx-complex") ++#define __DISABLE_AMX_COMPLEX__ ++#endif /* __AMX_COMPLEX__ */ ++ ++#if defined(__x86_64__) ++#define _tile_cmmimfp16ps_internal(src1_dst,src2,src3) \ ++ __asm__ volatile\ ++ ("{tcmmimfp16ps\t%%tmm"#src3", %%tmm"#src2", %%tmm"#src1_dst"|tcmmimfp16ps\t%%tmm"#src1_dst", %%tmm"#src2", %%tmm"#src3"}" ::) ++ ++#define _tile_cmmrlfp16ps_internal(src1_dst,src2,src3) \ ++ __asm__ volatile\ ++ ("{tcmmrlfp16ps\t%%tmm"#src3", %%tmm"#src2", %%tmm"#src1_dst"|tcmmrlfp16ps\t%%tmm"#src1_dst", %%tmm"#src2", %%tmm"#src3"}" ::) ++ ++#define _tile_cmmimfp16ps(src1_dst,src2,src3) \ ++ _tile_cmmimfp16ps_internal (src1_dst, src2, src3) ++ ++#define _tile_cmmrlfp16ps(src1_dst,src2,src3) \ ++ _tile_cmmrlfp16ps_internal (src1_dst, src2, src3) ++ ++#endif ++ ++#ifdef __DISABLE_AMX_COMPLEX__ ++#undef __DISABLE_AMX_COMPLEX__ ++#pragma GCC pop_options ++#endif /* __DISABLE_AMX_COMPLEX__ */ ++ ++#endif /* _AMXCOMPLEXINTRIN_H_INCLUDED */ +diff --git a/gcc/config/i386/cpuid.h b/gcc/config/i386/cpuid.h +index 21100149a..530a45fad 100644 +--- a/gcc/config/i386/cpuid.h ++++ b/gcc/config/i386/cpuid.h +@@ -136,6 +136,7 @@ + #define bit_AMX_BF16 (1 << 22) + #define bit_AMX_TILE (1 << 24) + #define bit_AMX_INT8 (1 << 25) ++#define bit_AMX_COMPLEX (1 << 8) + + /* Extended State Enumeration Sub-leaf (%eax == 0xd, %ecx == 1) */ + #define bit_XSAVEOPT (1 << 0) +diff --git a/gcc/config/i386/i386-c.cc b/gcc/config/i386/i386-c.cc +index 04f1dd682..5e0ac278c 100644 +--- a/gcc/config/i386/i386-c.cc ++++ b/gcc/config/i386/i386-c.cc +@@ -644,6 +644,8 @@ ix86_target_macros_internal (HOST_WIDE_INT isa_flag, + def_or_undef (parse_in, "__AMX_FP16__"); + if (isa_flag2 & OPTION_MASK_ISA2_PREFETCHI) + def_or_undef (parse_in, "__PREFETCHI__"); ++ if (isa_flag2 & OPTION_MASK_ISA2_AMX_COMPLEX) ++ def_or_undef (parse_in, "__AMX_COMPLEX__"); + if (TARGET_IAMCU) + { + def_or_undef (parse_in, "__iamcu"); +diff --git a/gcc/config/i386/i386-isa.def b/gcc/config/i386/i386-isa.def +index 744a7df85..7445b1bf7 100644 +--- a/gcc/config/i386/i386-isa.def ++++ b/gcc/config/i386/i386-isa.def +@@ -111,3 +111,4 @@ DEF_PTA(AVXVNNI) + DEF_PTA(AVX512FP16) + DEF_PTA(AMX_FP16) + DEF_PTA(PREFETCHI) ++DEF_PTA(AMX_COMPLEX) +diff --git a/gcc/config/i386/i386-options.cc b/gcc/config/i386/i386-options.cc +index 6645e3259..7efd25084 100644 +--- a/gcc/config/i386/i386-options.cc ++++ b/gcc/config/i386/i386-options.cc +@@ -233,7 +233,8 @@ static struct ix86_target_opts isa2_opts = + { "-mavxvnni", OPTION_MASK_ISA2_AVXVNNI }, + { "-mavx512fp16", OPTION_MASK_ISA2_AVX512FP16 }, + { "-mamx-fp16", OPTION_MASK_ISA2_AMX_FP16 }, +- { "-mprefetchi", OPTION_MASK_ISA2_PREFETCHI } ++ { "-mprefetchi", OPTION_MASK_ISA2_PREFETCHI }, ++ { "-mamx-complex", OPTION_MASK_ISA2_AMX_COMPLEX } + }; + static struct ix86_target_opts isa_opts = + { +@@ -1080,6 +1081,7 @@ ix86_valid_target_attribute_inner_p (tree fndecl, tree args, char *p_strings, + IX86_ATTR_ISA ("avx512fp16", OPT_mavx512fp16), + IX86_ATTR_ISA ("amx-fp16", OPT_mamx_fp16), + IX86_ATTR_ISA ("prefetchi", OPT_mprefetchi), ++ IX86_ATTR_ISA ("amx-complex", OPT_mamx_complex), + + /* enum options */ + IX86_ATTR_ENUM ("fpmath=", OPT_mfpmath_), +diff --git a/gcc/config/i386/i386.opt b/gcc/config/i386/i386.opt +index 50cd114f6..fba94f3f6 100644 +--- a/gcc/config/i386/i386.opt ++++ b/gcc/config/i386/i386.opt +@@ -1234,3 +1234,7 @@ Support AMX-FP16 built-in functions and code generation. + mprefetchi + Target Mask(ISA2_PREFETCHI) Var(ix86_isa_flags2) Save + Support PREFETCHI built-in functions and code generation. ++ ++mamx-complex ++Target Mask(ISA2_AMX_COMPLEX) Var(ix86_isa_flags2) Save ++Support AMX-COMPLEX built-in functions and code generation. +diff --git a/gcc/config/i386/immintrin.h b/gcc/config/i386/immintrin.h +index 0447ca4b2..bd819c7f4 100644 +--- a/gcc/config/i386/immintrin.h ++++ b/gcc/config/i386/immintrin.h +@@ -124,6 +124,8 @@ + + #include <amxbf16intrin.h> + ++#include <amxcomplexintrin.h> ++ + #include <prfchwintrin.h> + + #include <keylockerintrin.h> +diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi +index ba9faf4b2..d7b0bc802 100644 +--- a/gcc/doc/extend.texi ++++ b/gcc/doc/extend.texi +@@ -7048,6 +7048,11 @@ Enable/disable the generation of the AMX-FP16 instructions. + @cindex @code{target("prefetchi")} function attribute, x86 + Enable/disable the generation of the PREFETCHI instructions. + ++@cindex @code{target("amx-complex")} function attribute, x86 ++@item amx-complex ++@itemx no-amx-complex ++Enable/disable the generation of the AMX-COMPLEX instructions. ++ + @item cld + @itemx no-cld + @cindex @code{target("cld")} function attribute, x86 +diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi +index 8ca831dc1..186b33481 100644 +--- a/gcc/doc/invoke.texi ++++ b/gcc/doc/invoke.texi +@@ -1428,7 +1428,7 @@ See RS/6000 and PowerPC Options. + -mavx5124fmaps -mavx512vnni -mavx5124vnniw -mprfchw -mrdpid @gol + -mrdseed -msgx -mavx512vp2intersect -mserialize -mtsxldtrk@gol + -mamx-tile -mamx-int8 -mamx-bf16 -muintr -mhreset -mavxvnni@gol +--mavx512fp16 -mamx-fp16 -mprefetchi @gol ++-mavx512fp16 -mamx-fp16 -mprefetchi -mamx-complex @gol + -mcldemote -mms-bitfields -mno-align-stringops -minline-all-stringops @gol + -minline-stringops-dynamically -mstringop-strategy=@var{alg} @gol + -mkl -mwidekl @gol +@@ -32459,6 +32459,9 @@ preferred alignment to @option{-mpreferred-stack-boundary=2}. + @need 200 + @itemx -mprefetchi + @opindex mprefetchi ++@need 200 ++@opindex mamx-complex ++@itemx -mamx-complex + These switches enable the use of instructions in the MMX, SSE, + SSE2, SSE3, SSSE3, SSE4, SSE4A, SSE4.1, SSE4.2, AVX, AVX2, AVX512F, AVX512PF, + AVX512ER, AVX512CD, AVX512VL, AVX512BW, AVX512DQ, AVX512IFMA, AVX512VBMI, SHA, +@@ -32469,7 +32472,7 @@ XSAVEOPT, XSAVEC, XSAVES, RTM, HLE, TBM, MWAITX, CLZERO, PKU, AVX512VBMI2, + GFNI, VAES, WAITPKG, VPCLMULQDQ, AVX512BITALG, MOVDIRI, MOVDIR64B, AVX512BF16, + ENQCMD, AVX512VPOPCNTDQ, AVX5124FMAPS, AVX512VNNI, AVX5124VNNIW, SERIALIZE, + UINTR, HRESET, AMXTILE, AMXINT8, AMXBF16, KL, WIDEKL, AVXVNNI, AVX512-FP16, +-AMX-FP16, PREFETCHI or CLDEMOTE extended instruction sets. Each has a corresponding ++AMX-FP16, PREFETCHI, AMX-COMPLEX or CLDEMOTE extended instruction sets. Each has a corresponding + @option{-mno-} option to disable use of these instructions. + + These extensions are also available as built-in functions: see +diff --git a/gcc/doc/sourcebuild.texi b/gcc/doc/sourcebuild.texi +index c68e492dc..454fae11a 100644 +--- a/gcc/doc/sourcebuild.texi ++++ b/gcc/doc/sourcebuild.texi +@@ -2472,6 +2472,9 @@ Target supports the execution of @code{amx-int8} instructions. + @item amx_bf16 + Target supports the execution of @code{amx-bf16} instructions. + ++@item amx_complex ++Target supports the execution of @code{amx-complex} instructions. ++ + @item amx_fp16 + Target supports the execution of @code{amx-fp16} instructions. + +diff --git a/gcc/testsuite/g++.dg/other/i386-2.C b/gcc/testsuite/g++.dg/other/i386-2.C +index 72ed5fed0..ae1b8f632 100644 +--- a/gcc/testsuite/g++.dg/other/i386-2.C ++++ b/gcc/testsuite/g++.dg/other/i386-2.C +@@ -1,5 +1,5 @@ + /* { dg-do compile { target i?86-*-* x86_64-*-* } } */ +-/* { dg-options "-O -pedantic-errors -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512dq -mavx512bw -mavx512vl -mavx512ifma -mavx512vbmi -mavx512vbmi2 -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16 -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavx512fp16 -mamx-fp16 -mprefetchi" } */ ++/* { dg-options "-O -pedantic-errors -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512dq -mavx512bw -mavx512vl -mavx512ifma -mavx512vbmi -mavx512vbmi2 -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16 -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavx512fp16 -mamx-fp16 -mprefetchi -mamx-complex" } */ + + /* Test that {,x,e,p,t,s,w,a,b,i}mmintrin.h, mm3dnow.h, fma4intrin.h, + xopintrin.h, abmintrin.h, bmiintrin.h, tbmintrin.h, lwpintrin.h, +diff --git a/gcc/testsuite/g++.dg/other/i386-3.C b/gcc/testsuite/g++.dg/other/i386-3.C +index 9dd53653f..783e35774 100644 +--- a/gcc/testsuite/g++.dg/other/i386-3.C ++++ b/gcc/testsuite/g++.dg/other/i386-3.C +@@ -1,5 +1,5 @@ + /* { dg-do compile { target i?86-*-* x86_64-*-* } } */ +-/* { dg-options "-O -fkeep-inline-functions -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512dq -mavx512bw -mavx512vl -mavx512ifma -mavx512vbmi -mavx512vbmi2 -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16 -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavx512fp16 -mamx-fp16 -mprefetchi" } */ ++/* { dg-options "-O -fkeep-inline-functions -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512dq -mavx512bw -mavx512vl -mavx512ifma -mavx512vbmi -mavx512vbmi2 -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16 -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavx512fp16 -mamx-fp16 -mprefetchi -mamx-complex" } */ + + /* Test that {,x,e,p,t,s,w,a,b,i}mmintrin.h, mm3dnow.h, fma4intrin.h, + xopintrin.h, abmintrin.h, bmiintrin.h, tbmintrin.h, lwpintrin.h, +diff --git a/gcc/testsuite/gcc.target/i386/amx-check.h b/gcc/testsuite/gcc.target/i386/amx-check.h +index 27dd37bf9..f1a04cf1f 100644 +--- a/gcc/testsuite/gcc.target/i386/amx-check.h ++++ b/gcc/testsuite/gcc.target/i386/amx-check.h +@@ -216,6 +216,9 @@ main () + #ifdef AMX_FP16 + && __builtin_cpu_supports ("amx-fp16") + #endif ++#ifdef AMX_COMPLEX ++ && __builtin_cpu_supports ("amx-complex") ++#endif + #ifdef __linux__ + && request_perm_xtile_data () + #endif +diff --git a/gcc/testsuite/gcc.target/i386/amx-helper.h b/gcc/testsuite/gcc.target/i386/amx-helper.h +index fe24d7067..6ed9f5eb3 100644 +--- a/gcc/testsuite/gcc.target/i386/amx-helper.h ++++ b/gcc/testsuite/gcc.target/i386/amx-helper.h +@@ -1,6 +1,6 @@ + #ifndef AMX_HELPER_H_INCLUDED + #define AMX_HELPER_H_INCLUDED +-#if defined(AMX_FP16) ++#if defined(AMX_FP16) || defined(AMX_COMPLEX) + #include <immintrin.h> + #include <xmmintrin.h> + #endif +@@ -12,7 +12,7 @@ typedef union + uint16_t u; + } union16f_uw; + +-#if defined(AMX_FP16) ++#if defined(AMX_FP16) || defined(AMX_COMPLEX) + /* Transformation functions between fp16/float */ + static uint16_t make_f32_fp16 (float f) + { +diff --git a/gcc/testsuite/gcc.target/i386/amxcomplex-asmatt-1.c b/gcc/testsuite/gcc.target/i386/amxcomplex-asmatt-1.c +new file mode 100644 +index 000000000..b6745e34b +--- /dev/null ++++ b/gcc/testsuite/gcc.target/i386/amxcomplex-asmatt-1.c +@@ -0,0 +1,15 @@ ++/* { dg-do compile { target { ! ia32 } } } */ ++/* { dg-options "-O2 -mamx-complex" } */ ++/* { dg-final { scan-assembler "tcmmimfp16ps\ \\t+\^\n\*%tmm3+\^\n\*%tmm2+\^\n\*%tmm1" } } */ ++/* { dg-final { scan-assembler "tcmmrlfp16ps\ \\t+\^\n\*%tmm3+\^\n\*%tmm2+\^\n\*%tmm1" } } */ ++#include <immintrin.h> ++ ++#define TMM1 1 ++#define TMM2 2 ++#define TMM3 3 ++ ++void TEST() ++{ ++ _tile_cmmimfp16ps (TMM1, TMM2, TMM3); ++ _tile_cmmrlfp16ps (TMM1, TMM2, TMM3); ++} +diff --git a/gcc/testsuite/gcc.target/i386/amxcomplex-asmintel-1.c b/gcc/testsuite/gcc.target/i386/amxcomplex-asmintel-1.c +new file mode 100644 +index 000000000..305465e88 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/i386/amxcomplex-asmintel-1.c +@@ -0,0 +1,12 @@ ++/* { dg-do compile { target { ! ia32 } } } */ ++/* { dg-require-effective-target masm_intel } */ ++/* { dg-options "-O2 -mamx-complex -masm=intel" } */ ++/* { dg-final { scan-assembler "tcmmimfp16ps\ \\t+\^\n\*%tmm1+\^\n\*%tmm2+\^\n\*%tmm3" } } */ ++/* { dg-final { scan-assembler "tcmmrlfp16ps\ \\t+\^\n\*%tmm1+\^\n\*%tmm2+\^\n\*%tmm3" } } */ ++#include <immintrin.h> ++ ++void TEST() ++{ ++ _tile_cmmimfp16ps (1, 2, 3); ++ _tile_cmmrlfp16ps (1, 2, 3); ++} +diff --git a/gcc/testsuite/gcc.target/i386/amxcomplex-cmmimfp16ps-2.c b/gcc/testsuite/gcc.target/i386/amxcomplex-cmmimfp16ps-2.c +new file mode 100644 +index 000000000..6e3762c9f +--- /dev/null ++++ b/gcc/testsuite/gcc.target/i386/amxcomplex-cmmimfp16ps-2.c +@@ -0,0 +1,53 @@ ++/* { dg-do run { target { ! ia32 } } } */ ++/* { dg-require-effective-target amx_complex } */ ++/* { dg-require-effective-target avx512fp16 } */ ++/* { dg-options "-O2 -mamx-complex -mavx512fp16" } */ ++#define AMX_COMPLEX ++#define DO_TEST test_amx_complex_cmmimfp16ps ++void test_amx_complex_cmmimfp16ps (); ++#include "amx-helper.h" ++ ++void calc_matrix_cmmimfp16ps (__tile *dst, __tile *src1, __tile *src2) ++{ ++ uint16_t *src1_buf = (uint16_t *) src1->buf; ++ uint16_t *src2_buf = (uint16_t *) src2->buf; ++ float *dst_buf = (float *) dst->buf; ++ ++ int M = src1->rows; ++ int N = src1->colsb / 4; ++ int K = src2->colsb / 4; ++ int i, j, k, t; ++ ++ for (i = 0; i < M; i++) ++ for (j = 0; j < N; j++) ++ for (k = 0; k < K; k++) ++ for (t = 0; t < 2; t+=2) ++ dst_bufi * N + k += ++ (make_fp16_f32(src1_bufi * 2 * N + 2 * j + t) * ++ make_fp16_f32(src2_bufj * 2 * K + 2 * k + t + 1)) + ++ (make_fp16_f32(src1_bufi * 2 * N + 2 * j + t + 1) * ++ make_fp16_f32(src2_bufj * 2 * K + 2 * k + t)); ++} ++ ++void test_amx_complex_cmmimfp16ps () ++{ ++ __tilecfg_u cfg; ++ __tile dst, dst_ref, src1, src2; ++ uint8_t tmp_dst_buf1024, tmp_dst_zero_buf1024; ++ ++ init_fp16_max_tile_buffer (tmp_dst_buf); ++ init_fp16_max_tile_zero_buffer (tmp_dst_zero_buf); ++ ++ init_tile_config (&cfg); ++ init_tile_reg_and_src_with_buffer (1, dst, tmp_dst_zero_buf); ++ init_tile_reg_and_src_with_buffer (2, src1, tmp_dst_buf); ++ init_tile_reg_and_src_with_buffer (3, src2, tmp_dst_buf); ++ ++ calc_matrix_cmmimfp16ps (&dst, &src1, &src2); ++ ++ _tile_cmmimfp16ps (1, 2, 3); ++ _tile_stored (1, dst_ref.buf, _STRIDE); ++ ++ if (!check_tile_register (&dst_ref, &dst)) ++ abort (); ++} +diff --git a/gcc/testsuite/gcc.target/i386/amxcomplex-cmmrlfp16ps-2.c b/gcc/testsuite/gcc.target/i386/amxcomplex-cmmrlfp16ps-2.c +new file mode 100644 +index 000000000..15940708a +--- /dev/null ++++ b/gcc/testsuite/gcc.target/i386/amxcomplex-cmmrlfp16ps-2.c +@@ -0,0 +1,53 @@ ++/* { dg-do run { target { ! ia32 } } } */ ++/* { dg-require-effective-target amx_complex } */ ++/* { dg-require-effective-target avx512fp16 } */ ++/* { dg-options "-O2 -mamx-complex -mavx512fp16" } */ ++#define AMX_COMPLEX ++#define DO_TEST test_amx_complex_cmmrlfp16ps ++void test_amx_complex_cmmrlfp16ps(); ++#include "amx-helper.h" ++ ++void calc_matrix_cmmrlfp16ps (__tile *dst, __tile *src1, __tile *src2) ++{ ++ uint16_t *src1_buf = (uint16_t *) src1->buf; ++ uint16_t *src2_buf = (uint16_t *) src2->buf; ++ float *dst_buf = (float *) dst->buf; ++ ++ int M = src1->rows; ++ int N = src1->colsb / 4; ++ int K = src2->colsb / 4; ++ int i, j, k, t; ++ ++ for (i = 0; i < M; i++) ++ for (j = 0; j < N; j++) ++ for (k = 0; k < K; k++) ++ for (t = 0; t < 2; t+=2) ++ dst_bufi * N + k += ++ (make_fp16_f32 (src1_bufi * 2 * N + 2 * j + t) * ++ make_fp16_f32 (src2_bufj * 2 * K + 2 * k + t)) - ++ (make_fp16_f32 (src1_bufi * 2 * N + 2 * j + t + 1) * ++ make_fp16_f32 (src2_bufj * 2 * K + 2 * k + t + 1)); ++} ++ ++void test_amx_complex_cmmrlfp16ps () ++{ ++ __tilecfg_u cfg; ++ __tile dst, dst_ref, src1, src2; ++ uint8_t tmp_dst_buf1024, tmp_dst_zero_buf1024; ++ ++ init_fp16_max_tile_buffer (tmp_dst_buf); ++ init_fp16_max_tile_zero_buffer (tmp_dst_zero_buf); ++ ++ init_tile_config (&cfg); ++ init_tile_reg_and_src_with_buffer (1, dst, tmp_dst_zero_buf); ++ init_tile_reg_and_src_with_buffer (2, src1, tmp_dst_buf); ++ init_tile_reg_and_src_with_buffer (3, src2, tmp_dst_buf); ++ ++ calc_matrix_cmmrlfp16ps (&dst, &src1, &src2); ++ ++ _tile_cmmrlfp16ps (1, 2, 3); ++ _tile_stored (1, dst_ref.buf, _STRIDE); ++ ++ if (!check_tile_register (&dst_ref, &dst)) ++ abort (); ++} +diff --git a/gcc/testsuite/gcc.target/i386/funcspec-56.inc b/gcc/testsuite/gcc.target/i386/funcspec-56.inc +index bdcfdbc88..1a2f3b83d 100644 +--- a/gcc/testsuite/gcc.target/i386/funcspec-56.inc ++++ b/gcc/testsuite/gcc.target/i386/funcspec-56.inc +@@ -82,6 +82,7 @@ extern void test_avxvnni (void) __attribute__((__target__("avxvnni"))); + extern void test_avx512fp16 (void) __attribute__((__target__("avx512fp16"))); + extern void test_amx_fp16 (void) __attribute__((__target__("amx-fp16"))); + extern void test_prefetchi (void) __attribute__((__target__("prefetchi"))); ++extern void test_amx_complex (void) __attribute__((__target__("amx-complex"))); + + extern void test_no_sgx (void) __attribute__((__target__("no-sgx"))); + extern void test_no_avx5124fmaps(void) __attribute__((__target__("no-avx5124fmaps"))); +@@ -165,6 +166,7 @@ extern void test_no_avxvnni (void) __attribute__((__target__("no-avxvnni"))); + extern void test_no_avx512fp16 (void) __attribute__((__target__("no-avx512fp16"))); + extern void test_no_amx_fp16 (void) __attribute__((__target__("no-amx-fp16"))); + extern void test_no_prefetchi (void) __attribute__((__target__("no-prefetchi"))); ++extern void test_no_amx_complex (void) __attribute__((__target__("no-amx-complex"))); + + extern void test_arch_nocona (void) __attribute__((__target__("arch=nocona"))); + extern void test_arch_core2 (void) __attribute__((__target__("arch=core2"))); +diff --git a/gcc/testsuite/gcc.target/i386/sse-12.c b/gcc/testsuite/gcc.target/i386/sse-12.c +index 9ab4a7e0c..d2aadd506 100644 +--- a/gcc/testsuite/gcc.target/i386/sse-12.c ++++ b/gcc/testsuite/gcc.target/i386/sse-12.c +@@ -3,7 +3,7 @@ + popcntintrin.h gfniintrin.h and mm_malloc.h are usable + with -O -std=c89 -pedantic-errors. */ + /* { dg-do compile } */ +-/* { dg-options "-O -std=c89 -pedantic-errors -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512bw -mavx512dq -mavx512vl -mavx512vbmi -mavx512vbmi2 -mavx512ifma -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16 -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mamx-fp16" } */ ++/* { dg-options "-O -std=c89 -pedantic-errors -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512bw -mavx512dq -mavx512vl -mavx512vbmi -mavx512vbmi2 -mavx512ifma -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16 -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mamx-fp16 -mamx-complex" } */ + + #include <x86intrin.h> + +diff --git a/gcc/testsuite/gcc.target/i386/sse-13.c b/gcc/testsuite/gcc.target/i386/sse-13.c +index db7c0fc7a..c39382836 100644 +--- a/gcc/testsuite/gcc.target/i386/sse-13.c ++++ b/gcc/testsuite/gcc.target/i386/sse-13.c +@@ -1,5 +1,5 @@ + /* { dg-do compile } */ +-/* { dg-options "-O2 -Werror-implicit-function-declaration -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512vl -mavx512dq -mavx512bw -mavx512vbmi -mavx512vbmi2 -mavx512ifma -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mavx512vp2intersect -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16 -menqcmd -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavx512fp16 -mamx-fp16 -mprefetchi" } */ ++/* { dg-options "-O2 -Werror-implicit-function-declaration -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512vl -mavx512dq -mavx512bw -mavx512vbmi -mavx512vbmi2 -mavx512ifma -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mavx512vp2intersect -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16 -menqcmd -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavx512fp16 -mamx-fp16 -mprefetchi -mamx-complex" } */ + /* { dg-add-options bind_pic_locally } */ + + #include <mm_malloc.h> +diff --git a/gcc/testsuite/gcc.target/i386/sse-14.c b/gcc/testsuite/gcc.target/i386/sse-14.c +index eaa1a8d81..c34ac1aec 100644 +--- a/gcc/testsuite/gcc.target/i386/sse-14.c ++++ b/gcc/testsuite/gcc.target/i386/sse-14.c +@@ -1,5 +1,5 @@ + /* { dg-do compile } */ +-/* { dg-options "-O0 -Werror-implicit-function-declaration -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512dq -mavx512bw -mavx512vl -mavx512ifma -mavx512vbmi -mavx512vbmi2 -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mpconfig -mwbnoinvd -mavx512vl -mavx512bf16 -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavx512fp16 -mamx-fp16" } */ ++/* { dg-options "-O0 -Werror-implicit-function-declaration -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512dq -mavx512bw -mavx512vl -mavx512ifma -mavx512vbmi -mavx512vbmi2 -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mpconfig -mwbnoinvd -mavx512vl -mavx512bf16 -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavx512fp16 -mamx-fp16 -mamx-complex" } */ + /* { dg-add-options bind_pic_locally } */ + + #include <mm_malloc.h> +diff --git a/gcc/testsuite/gcc.target/i386/sse-22.c b/gcc/testsuite/gcc.target/i386/sse-22.c +index 19afe639d..c3667b829 100644 +--- a/gcc/testsuite/gcc.target/i386/sse-22.c ++++ b/gcc/testsuite/gcc.target/i386/sse-22.c +@@ -103,7 +103,7 @@ + + + #ifndef DIFFERENT_PRAGMAS +-#pragma GCC target ("sse4a,3dnow,avx,avx2,fma4,xop,aes,pclmul,popcnt,abm,lzcnt,bmi,bmi2,tbm,lwp,fsgsbase,rdrnd,f16c,rtm,rdseed,prfchw,adx,fxsr,xsaveopt,avx512f,avx512er,avx512cd,avx512pf,sha,prefetchwt1,avx512vl,avx512bw,avx512dq,avx512vbmi,avx512vbmi2,avx512ifma,avx5124fmaps,avx5124vnniw,avx512vpopcntdq,gfni,avx512bitalg,avx512bf16,avx512vp2intersect,serialize,tsxldtrk,amx-tile,amx-int8,amx-bf16,kl,widekl,avxvnni,avx512fp16,amx-fp16") ++#pragma GCC target ("sse4a,3dnow,avx,avx2,fma4,xop,aes,pclmul,popcnt,abm,lzcnt,bmi,bmi2,tbm,lwp,fsgsbase,rdrnd,f16c,rtm,rdseed,prfchw,adx,fxsr,xsaveopt,avx512f,avx512er,avx512cd,avx512pf,sha,prefetchwt1,avx512vl,avx512bw,avx512dq,avx512vbmi,avx512vbmi2,avx512ifma,avx5124fmaps,avx5124vnniw,avx512vpopcntdq,gfni,avx512bitalg,avx512bf16,avx512vp2intersect,serialize,tsxldtrk,amx-tile,amx-int8,amx-bf16,kl,widekl,avxvnni,avx512fp16,amx-fp16,amx-complex") + #endif + + /* Following intrinsics require immediate arguments. They +@@ -220,7 +220,7 @@ test_4 (_mm_cmpestrz, int, __m128i, int, __m128i, int, 1) + + /* immintrin.h (AVX/AVX2/RDRND/FSGSBASE/F16C/RTM/AVX512F/SHA) */ + #ifdef DIFFERENT_PRAGMAS +-#pragma GCC target ("avx,avx2,rdrnd,fsgsbase,f16c,rtm,avx512f,avx512er,avx512cd,avx512pf,sha,avx512vl,avx512bw,avx512dq,avx512ifma,avx512vbmi,avx512vbmi2,avx5124fmaps,avx5124vnniw,avx512vpopcntdq,gfni,avx512bitalg,avx512bf16,avx512vp2intersect,serialize,tsxldtrk,amx-tile,amx-int8,amx-bf16,kl,widekl,avxvnni,avx512fp16,amx-fp16") ++#pragma GCC target ("avx,avx2,rdrnd,fsgsbase,f16c,rtm,avx512f,avx512er,avx512cd,avx512pf,sha,avx512vl,avx512bw,avx512dq,avx512ifma,avx512vbmi,avx512vbmi2,avx5124fmaps,avx5124vnniw,avx512vpopcntdq,gfni,avx512bitalg,avx512bf16,avx512vp2intersect,serialize,tsxldtrk,amx-tile,amx-int8,amx-bf16,kl,widekl,avxvnni,avx512fp16,amx-fp16,amx-complex") + #endif + #include <immintrin.h> + test_1 (_cvtss_sh, unsigned short, float, 1) +diff --git a/gcc/testsuite/gcc.target/i386/sse-23.c b/gcc/testsuite/gcc.target/i386/sse-23.c +index 741694e87..756b6eb9c 100644 +--- a/gcc/testsuite/gcc.target/i386/sse-23.c ++++ b/gcc/testsuite/gcc.target/i386/sse-23.c +@@ -843,6 +843,6 @@ + #define __builtin_ia32_vpclmulqdq_v2di(A, B, C) __builtin_ia32_vpclmulqdq_v2di(A, B, 1) + #define __builtin_ia32_vpclmulqdq_v8di(A, B, C) __builtin_ia32_vpclmulqdq_v8di(A, B, 1) + +-#pragma GCC target ("sse4a,3dnow,avx,avx2,fma4,xop,aes,pclmul,popcnt,abm,lzcnt,bmi,bmi2,tbm,lwp,fsgsbase,rdrnd,f16c,fma,rtm,rdseed,prfchw,adx,fxsr,xsaveopt,avx512f,avx512er,avx512cd,avx512pf,sha,prefetchwt1,xsavec,xsaves,clflushopt,avx512bw,avx512dq,avx512vl,avx512vbmi,avx512ifma,avx5124fmaps,avx5124vnniw,avx512vpopcntdq,clwb,mwaitx,clzero,pku,sgx,rdpid,gfni,avx512vbmi2,vpclmulqdq,avx512bitalg,pconfig,wbnoinvd,avx512bf16,enqcmd,avx512vp2intersect,serialize,tsxldtrk,amx-tile,amx-int8,amx-bf16,kl,widekl,avxvnni,avx512fp16,amx-fp16,prefetchi") ++#pragma GCC target ("sse4a,3dnow,avx,avx2,fma4,xop,aes,pclmul,popcnt,abm,lzcnt,bmi,bmi2,tbm,lwp,fsgsbase,rdrnd,f16c,fma,rtm,rdseed,prfchw,adx,fxsr,xsaveopt,avx512f,avx512er,avx512cd,avx512pf,sha,prefetchwt1,xsavec,xsaves,clflushopt,avx512bw,avx512dq,avx512vl,avx512vbmi,avx512ifma,avx5124fmaps,avx5124vnniw,avx512vpopcntdq,clwb,mwaitx,clzero,pku,sgx,rdpid,gfni,avx512vbmi2,vpclmulqdq,avx512bitalg,pconfig,wbnoinvd,avx512bf16,enqcmd,avx512vp2intersect,serialize,tsxldtrk,amx-tile,amx-int8,amx-bf16,kl,widekl,avxvnni,avx512fp16,amx-fp16,prefetchi,amx-complex") + + #include <x86intrin.h> +diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp +index 0d83c780c..d404058fd 100644 +--- a/gcc/testsuite/lib/target-supports.exp ++++ b/gcc/testsuite/lib/target-supports.exp +@@ -9421,6 +9421,17 @@ proc check_effective_target_avxvnni { } { + } "-mavxvnni" + } + ++# Return 1 if amx-complex instructions can be compiled. ++proc check_effective_target_amx_complex { } { ++ return check_no_compiler_messages amx_complex object { ++ void ++ foo () ++ { ++ __asm__ volatile ("tcmmimfp16ps\t%%tmm1, %%tmm2, %%tmm3" ::); ++ } ++ } "-mamx-complex" ++} ++ + # Return 1 if sse instructions can be compiled. + proc check_effective_target_sse { } { + return check_no_compiler_messages sse object { +-- +2.31.1 +
View file
_service:tar_scm:0276-i386-Add-AMX-COMPLEX-to-Granite-Rapids.patch
Added
@@ -0,0 +1,30 @@ +From 40469a6119085e4c4741bcaeb9418606d28b40c4 Mon Sep 17 00:00:00 2001 +From: Haochen Jiang <haochen.jiang@intel.com> +Date: Fri, 31 Mar 2023 10:49:14 +0800 +Subject: PATCH 22/28 i386: Add AMX-COMPLEX to Granite Rapids + +gcc/Changelog: + + * config/i386/i386.h (PTA_GRANITERAPIDS): Add PTA_AMX_COMPLEX. + +(cherry picked from commit afa87bd5f7b126e20268aa959441cde2e02bba0e) +--- + gcc/config/i386/i386.h | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h +index 75953defc..56d7794dc 100644 +--- a/gcc/config/i386/i386.h ++++ b/gcc/config/i386/i386.h +@@ -2358,7 +2358,7 @@ constexpr wide_int_bitmask PTA_ALDERLAKE = PTA_TREMONT | PTA_ADX | PTA_AVX + | PTA_PCONFIG | PTA_PKU | PTA_VAES | PTA_VPCLMULQDQ | PTA_SERIALIZE + | PTA_HRESET | PTA_KL | PTA_WIDEKL | PTA_AVXVNNI; + constexpr wide_int_bitmask PTA_GRANITERAPIDS = PTA_SAPPHIRERAPIDS | PTA_AMX_FP16 +- | PTA_PREFETCHI; ++ | PTA_PREFETCHI | PTA_AMX_COMPLEX; + constexpr wide_int_bitmask PTA_KNM = PTA_KNL | PTA_AVX5124VNNIW + | PTA_AVX5124FMAPS | PTA_AVX512VPOPCNTDQ; + constexpr wide_int_bitmask PTA_ZNVER1 = PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 +-- +2.31.1 +
View file
_service:tar_scm:0277-Initial-Granite-Rapids-D-Support.patch
Added
@@ -0,0 +1,212 @@ +From 125e5d448538f7534e0fe3df9b7947cf41605b51 Mon Sep 17 00:00:00 2001 +From: "Mo, Zewei" <zewei.mo@intel.com> +Date: Mon, 3 Jul 2023 11:00:26 +0800 +Subject: PATCH 23/28 Initial Granite Rapids D Support + +gcc/ChangeLog: + + * common/config/i386/cpuinfo.h + (get_intel_cpu): Handle Granite Rapids D. + * common/config/i386/i386-common.cc: + (processor_alias_table): Add graniterapids-d. + * common/config/i386/i386-cpuinfo.h + (enum processor_subtypes): Add INTEL_COREI7_GRANITERAPIDS_D. + * config.gcc: Add -march=graniterapids-d. + * config/i386/driver-i386.cc (host_detect_local_cpu): + Handle graniterapids-d. + * config/i386/i386.h: (PTA_GRANITERAPIDS_D): New. + * doc/extend.texi: Add graniterapids-d. + * doc/invoke.texi: Ditto. + +gcc/testsuite/ChangeLog: + + * g++.target/i386/mv16.C: Add graniterapids-d. + * gcc.target/i386/funcspec-56.inc: Handle new march. + +(cherry picked from commit a0cb65d34cc141571e870fb3b53b3ff47ae3338d) +--- + gcc/common/config/i386/cpuinfo.h | 9 ++++++++- + gcc/common/config/i386/i386-common.cc | 2 ++ + gcc/common/config/i386/i386-cpuinfo.h | 1 + + gcc/config.gcc | 3 ++- + gcc/config/i386/driver-i386.cc | 5 ++++- + gcc/config/i386/i386.h | 4 +++- + gcc/doc/extend.texi | 3 +++ + gcc/doc/invoke.texi | 11 +++++++++++ + gcc/testsuite/g++.target/i386/mv16.C | 6 ++++++ + gcc/testsuite/gcc.target/i386/funcspec-56.inc | 1 + + 10 files changed, 41 insertions(+), 4 deletions(-) + +diff --git a/gcc/common/config/i386/cpuinfo.h b/gcc/common/config/i386/cpuinfo.h +index 39d3351db..1e53248ef 100644 +--- a/gcc/common/config/i386/cpuinfo.h ++++ b/gcc/common/config/i386/cpuinfo.h +@@ -529,7 +529,6 @@ get_intel_cpu (struct __processor_model *cpu_model, + cpu_model->__cpu_subtype = INTEL_COREI7_SAPPHIRERAPIDS; + break; + case 0xad: +- case 0xae: + /* Granite Rapids. */ + cpu = "graniterapids"; + CHECK___builtin_cpu_is ("corei7"); +@@ -537,6 +536,14 @@ get_intel_cpu (struct __processor_model *cpu_model, + cpu_model->__cpu_type = INTEL_COREI7; + cpu_model->__cpu_subtype = INTEL_COREI7_GRANITERAPIDS; + break; ++ case 0xae: ++ /* Granite Rapids D. */ ++ cpu = "graniterapids-d"; ++ CHECK___builtin_cpu_is ("corei7"); ++ CHECK___builtin_cpu_is ("graniterapids-d"); ++ cpu_model->__cpu_type = INTEL_COREI7; ++ cpu_model->__cpu_subtype = INTEL_COREI7_GRANITERAPIDS_D; ++ break; + case 0x17: + case 0x1d: + /* Penryn. */ +diff --git a/gcc/common/config/i386/i386-common.cc b/gcc/common/config/i386/i386-common.cc +index 87e8afe9b..28f468f48 100644 +--- a/gcc/common/config/i386/i386-common.cc ++++ b/gcc/common/config/i386/i386-common.cc +@@ -1993,6 +1993,8 @@ const pta processor_alias_table = + M_CPU_SUBTYPE (INTEL_COREI7_ALDERLAKE), P_PROC_AVX2}, + {"graniterapids", PROCESSOR_GRANITERAPIDS, CPU_HASWELL, PTA_GRANITERAPIDS, + M_CPU_SUBTYPE (INTEL_COREI7_GRANITERAPIDS), P_PROC_AVX512F}, ++ {"graniterapids-d", PROCESSOR_GRANITERAPIDS, CPU_HASWELL, PTA_GRANITERAPIDS_D, ++ M_CPU_SUBTYPE (INTEL_COREI7_GRANITERAPIDS_D), P_PROC_AVX512F}, + {"bonnell", PROCESSOR_BONNELL, CPU_ATOM, PTA_BONNELL, + M_CPU_TYPE (INTEL_BONNELL), P_PROC_SSSE3}, + {"atom", PROCESSOR_BONNELL, CPU_ATOM, PTA_BONNELL, +diff --git a/gcc/common/config/i386/i386-cpuinfo.h b/gcc/common/config/i386/i386-cpuinfo.h +index 56020faac..a32f32c97 100644 +--- a/gcc/common/config/i386/i386-cpuinfo.h ++++ b/gcc/common/config/i386/i386-cpuinfo.h +@@ -93,6 +93,7 @@ enum processor_subtypes + INTEL_COREI7_ROCKETLAKE, + AMDFAM19H_ZNVER4, + INTEL_COREI7_GRANITERAPIDS, ++ INTEL_COREI7_GRANITERAPIDS_D, + CPU_SUBTYPE_MAX + }; + +diff --git a/gcc/config.gcc b/gcc/config.gcc +index ca5c8f8a0..3108ac4eb 100644 +--- a/gcc/config.gcc ++++ b/gcc/config.gcc +@@ -670,7 +670,8 @@ slm nehalem westmere sandybridge ivybridge haswell broadwell bonnell \ + silvermont knl knm skylake-avx512 cannonlake icelake-client icelake-server \ + skylake goldmont goldmont-plus tremont cascadelake tigerlake cooperlake \ + sapphirerapids alderlake rocketlake eden-x2 nano nano-1000 nano-2000 nano-3000 \ +-nano-x2 eden-x4 nano-x4 x86-64 x86-64-v2 x86-64-v3 x86-64-v4 graniterapids native" ++nano-x2 eden-x4 nano-x4 x86-64 x86-64-v2 x86-64-v3 x86-64-v4 graniterapids \ ++graniterapids-d native" + + # Additional x86 processors supported by --with-cpu=. Each processor + # MUST be separated by exactly one space. +diff --git a/gcc/config/i386/driver-i386.cc b/gcc/config/i386/driver-i386.cc +index ea8c3d8d1..e3bca4b49 100644 +--- a/gcc/config/i386/driver-i386.cc ++++ b/gcc/config/i386/driver-i386.cc +@@ -576,8 +576,11 @@ const char *host_detect_local_cpu (int argc, const char **argv) + /* This is unknown family 0x6 CPU. */ + if (has_feature (FEATURE_AVX)) + { ++ /* Assume Granite Rapids D. */ ++ if (has_feature (FEATURE_AMX_COMPLEX)) ++ cpu = "graniterapids-d"; + /* Assume Granite Rapids. */ +- if (has_feature (FEATURE_AMX_FP16)) ++ else if (has_feature (FEATURE_AMX_FP16)) + cpu = "graniterapids"; + /* Assume Tiger Lake */ + else if (has_feature (FEATURE_AVX512VP2INTERSECT)) +diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h +index 56d7794dc..eda3e5e5b 100644 +--- a/gcc/config/i386/i386.h ++++ b/gcc/config/i386/i386.h +@@ -2358,7 +2358,9 @@ constexpr wide_int_bitmask PTA_ALDERLAKE = PTA_TREMONT | PTA_ADX | PTA_AVX + | PTA_PCONFIG | PTA_PKU | PTA_VAES | PTA_VPCLMULQDQ | PTA_SERIALIZE + | PTA_HRESET | PTA_KL | PTA_WIDEKL | PTA_AVXVNNI; + constexpr wide_int_bitmask PTA_GRANITERAPIDS = PTA_SAPPHIRERAPIDS | PTA_AMX_FP16 +- | PTA_PREFETCHI | PTA_AMX_COMPLEX; ++ | PTA_PREFETCHI; ++constexpr wide_int_bitmask PTA_GRANITERAPIDS_D = PTA_GRANITERAPIDS ++ | PTA_AMX_COMPLEX; + constexpr wide_int_bitmask PTA_KNM = PTA_KNL | PTA_AVX5124VNNIW + | PTA_AVX5124FMAPS | PTA_AVX512VPOPCNTDQ; + constexpr wide_int_bitmask PTA_ZNVER1 = PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 +diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi +index d7b0bc802..674db2f1a 100644 +--- a/gcc/doc/extend.texi ++++ b/gcc/doc/extend.texi +@@ -21837,6 +21837,9 @@ Intel Core i7 Rocketlake CPU. + @item graniterapids + Intel Core i7 graniterapids CPU. + ++@item graniterapids-d ++Intel Core i7 graniterapids D CPU. ++ + @item bonnell + Intel Atom Bonnell CPU. + +diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi +index 186b33481..a2ec060fd 100644 +--- a/gcc/doc/invoke.texi ++++ b/gcc/doc/invoke.texi +@@ -31626,6 +31626,17 @@ MOVDIRI, MOVDIR64B, AVX512VP2INTERSECT, ENQCMD, CLDEMOTE, PTWRITE, WAITPKG, + SERIALIZE, TSXLDTRK, UINTR, AMX-BF16, AMX-TILE, AMX-INT8, AVX-VNNI, AVX512FP16, + AVX512BF16, AMX-FP16 and PREFETCHI instruction set support. + ++@item graniterapids-d ++Intel graniterapids D CPU with 64-bit extensions, MOVBE, MMX, SSE, SSE2, SSE3, ++SSSE3, SSE4.1, SSE4.2, POPCNT, CX16, SAHF, FXSR, AVX, XSAVE, PCLMUL, FSGSBASE, ++RDRND, F16C, AVX2, BMI, BMI2, LZCNT, FMA, MOVBE, HLE, RDSEED, ADCX, PREFETCHW, ++AES, CLFLUSHOPT, XSAVEC, XSAVES, SGX, AVX512F, AVX512VL, AVX512BW, AVX512DQ, ++AVX512CD, PKU, AVX512VBMI, AVX512IFMA, SHA, AVX512VNNI, GFNI, VAES, AVX512VBMI2, ++VPCLMULQDQ, AVX512BITALG, RDPID, AVX512VPOPCNTDQ, PCONFIG, WBNOINVD, CLWB, ++MOVDIRI, MOVDIR64B, AVX512VP2INTERSECT, ENQCMD, CLDEMOTE, PTWRITE, WAITPKG, ++SERIALIZE, TSXLDTRK, UINTR, AMX-BF16, AMX-TILE, AMX-INT8, AVX-VNNI, AVX512FP16, ++AVX512BF16, AMX-FP16, PREFETCHI and AMX-COMPLEX instruction set support. ++ + @item k6 + AMD K6 CPU with MMX instruction set support. + +diff --git a/gcc/testsuite/g++.target/i386/mv16.C b/gcc/testsuite/g++.target/i386/mv16.C +index 65cc24f32..17b1fc722 100644 +--- a/gcc/testsuite/g++.target/i386/mv16.C ++++ b/gcc/testsuite/g++.target/i386/mv16.C +@@ -96,6 +96,10 @@ int __attribute__ ((target("arch=graniterapids"))) foo () { + return 26; + } + ++int __attribute__ ((target("arch=graniterapids-d"))) foo () { ++ return 28; ++} ++ + int main () + { + int val = foo (); +@@ -136,6 +140,8 @@ int main () + assert (val == 24); + else if (__builtin_cpu_is ("graniterapids")) + assert (val == 25); ++ else if (__builtin_cpu_is ("graniterapids-d")) ++ assert (val == 26); + else + assert (val == 0); + +diff --git a/gcc/testsuite/gcc.target/i386/funcspec-56.inc b/gcc/testsuite/gcc.target/i386/funcspec-56.inc +index 1a2f3b83d..f0f3397a7 100644 +--- a/gcc/testsuite/gcc.target/i386/funcspec-56.inc ++++ b/gcc/testsuite/gcc.target/i386/funcspec-56.inc +@@ -191,6 +191,7 @@ extern void test_arch_sapphirerapids (void) __attribute__((__target__("arch=sapp + extern void test_arch_alderlake (void) __attribute__((__target__("arch=alderlake"))); + extern void test_arch_rocketlake (void) __attribute__((__target__("arch=rocketlake"))); + extern void test_arch_graniterapids (void) __attribute__((__target__("arch=graniterapids"))); ++extern void test_arch_graniterapids_d (void) __attribute__((__target__("arch=graniterapids-d"))); + extern void test_arch_k8 (void) __attribute__((__target__("arch=k8"))); + extern void test_arch_k8_sse3 (void) __attribute__((__target__("arch=k8-sse3"))); + extern void test_arch_opteron (void) __attribute__((__target__("arch=opteron"))); +-- +2.31.1 +
View file
_service:tar_scm:0278-Correct-Granite-Rapids-D-documentation.patch
Added
@@ -0,0 +1,48 @@ +From a809a6a416af4d08f7feeadfdd5d1f5a76a830b5 Mon Sep 17 00:00:00 2001 +From: Haochen Jiang <haochen.jiang@intel.com> +Date: Thu, 20 Jul 2023 10:47:18 +0800 +Subject: PATCH 24/28 Correct Granite Rapids{, D} documentation + +gcc/Changelog: + + * doc/invoke.texi: Remove AVX512VP2INTERSECT in + Granite Rapids{, D} from documentation. + +(cherry picked from commit 38daaaa91438d3f635a10bf5d5181c3b29f07df9) +--- + gcc/doc/invoke.texi | 12 ++++++------ + 1 file changed, 6 insertions(+), 6 deletions(-) + +diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi +index a2ec060fd..4d3eccdb2 100644 +--- a/gcc/doc/invoke.texi ++++ b/gcc/doc/invoke.texi +@@ -31622,9 +31622,9 @@ RDRND, F16C, AVX2, BMI, BMI2, LZCNT, FMA, MOVBE, HLE, RDSEED, ADCX, PREFETCHW, + AES, CLFLUSHOPT, XSAVEC, XSAVES, SGX, AVX512F, AVX512VL, AVX512BW, AVX512DQ, + AVX512CD, PKU, AVX512VBMI, AVX512IFMA, SHA, AVX512VNNI, GFNI, VAES, AVX512VBMI2, + VPCLMULQDQ, AVX512BITALG, RDPID, AVX512VPOPCNTDQ, PCONFIG, WBNOINVD, CLWB, +-MOVDIRI, MOVDIR64B, AVX512VP2INTERSECT, ENQCMD, CLDEMOTE, PTWRITE, WAITPKG, +-SERIALIZE, TSXLDTRK, UINTR, AMX-BF16, AMX-TILE, AMX-INT8, AVX-VNNI, AVX512FP16, +-AVX512BF16, AMX-FP16 and PREFETCHI instruction set support. ++MOVDIRI, MOVDIR64B, ENQCMD, CLDEMOTE, PTWRITE, WAITPKG, SERIALIZE, TSXLDTRK, ++UINTR, AMX-BF16, AMX-TILE, AMX-INT8, AVX-VNNI, AVX512-FP16, AVX512BF16, AMX-FP16 ++and PREFETCHI instruction set support. + + @item graniterapids-d + Intel graniterapids D CPU with 64-bit extensions, MOVBE, MMX, SSE, SSE2, SSE3, +@@ -31633,9 +31633,9 @@ RDRND, F16C, AVX2, BMI, BMI2, LZCNT, FMA, MOVBE, HLE, RDSEED, ADCX, PREFETCHW, + AES, CLFLUSHOPT, XSAVEC, XSAVES, SGX, AVX512F, AVX512VL, AVX512BW, AVX512DQ, + AVX512CD, PKU, AVX512VBMI, AVX512IFMA, SHA, AVX512VNNI, GFNI, VAES, AVX512VBMI2, + VPCLMULQDQ, AVX512BITALG, RDPID, AVX512VPOPCNTDQ, PCONFIG, WBNOINVD, CLWB, +-MOVDIRI, MOVDIR64B, AVX512VP2INTERSECT, ENQCMD, CLDEMOTE, PTWRITE, WAITPKG, +-SERIALIZE, TSXLDTRK, UINTR, AMX-BF16, AMX-TILE, AMX-INT8, AVX-VNNI, AVX512FP16, +-AVX512BF16, AMX-FP16, PREFETCHI and AMX-COMPLEX instruction set support. ++MOVDIRI, MOVDIR64B, ENQCMD, CLDEMOTE, PTWRITE, WAITPKG, SERIALIZE, TSXLDTRK, ++UINTR, AMX-BF16, AMX-TILE, AMX-INT8, AVX-VNNI, AVX512FP16, AVX512BF16, AMX-FP16, ++PREFETCHI and AMX-COMPLEX instruction set support. + + @item k6 + AMD K6 CPU with MMX instruction set support. +-- +2.31.1 +
View file
_service:tar_scm:0279-i386-Remove-Meteorlake-s-family_model.patch
Added
@@ -0,0 +1,30 @@ +From 62852213bc6d3e56804ca05826bb95a3a2fe4eba Mon Sep 17 00:00:00 2001 +From: "Hu, Lin1" <lin1.hu@intel.com> +Date: Thu, 15 Dec 2022 15:51:18 +0800 +Subject: PATCH 25/28 i386: Remove Meteorlake's family_model + +gcc/ChangeLog: + + * common/config/i386/cpuinfo.h (get_intel_cpu): Remove case 0xb5 + for meteorlake. + +(cherry picked from commit 9e74b7ec0b218364905e3e7de5c41e8148ffc61b) +--- + gcc/common/config/i386/cpuinfo.h | 1 - + 1 file changed, 1 deletion(-) + +diff --git a/gcc/common/config/i386/cpuinfo.h b/gcc/common/config/i386/cpuinfo.h +index 1e53248ef..348bc0c12 100644 +--- a/gcc/common/config/i386/cpuinfo.h ++++ b/gcc/common/config/i386/cpuinfo.h +@@ -510,7 +510,6 @@ get_intel_cpu (struct __processor_model *cpu_model, + /* Alder Lake. */ + case 0xb7: + /* Raptor Lake. */ +- case 0xb5: + case 0xaa: + case 0xac: + /* Meteor Lake. */ +-- +2.31.1 +
View file
_service:tar_scm:0280-x86-Update-model-values-for-Alderlake-Rocketlake-and.patch
Added
@@ -0,0 +1,33 @@ +From 73042aa18fe70aa30a9c7c760b08e642560ecccd Mon Sep 17 00:00:00 2001 +From: "Cui, Lili" <lili.cui@intel.com> +Date: Thu, 29 Jun 2023 03:10:35 +0000 +Subject: PATCH 26/28 x86: Update model values for Alderlake, Rocketlake and + Raptorlake. + +Update model values for Alderlake, Rocketlake and Raptorlake according to SDM. + +gcc/ChangeLog + + * common/config/i386/cpuinfo.h (get_intel_cpu): Remove model value 0xa8 + from Rocketlake, move model value 0xbf from Alderlake to Raptorlake. + +(cherry picked from commit e510c3be13a8ccdf1fc1b27c2501c126d493f335) +--- + gcc/common/config/i386/cpuinfo.h | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/gcc/common/config/i386/cpuinfo.h b/gcc/common/config/i386/cpuinfo.h +index 348bc0c12..f9bcb6fad 100644 +--- a/gcc/common/config/i386/cpuinfo.h ++++ b/gcc/common/config/i386/cpuinfo.h +@@ -509,6 +509,7 @@ get_intel_cpu (struct __processor_model *cpu_model, + case 0x9a: + /* Alder Lake. */ + case 0xb7: ++ case 0xbf: + /* Raptor Lake. */ + case 0xaa: + case 0xac: +-- +2.31.1 +
View file
_service:tar_scm:0281-x86-Update-model-values-for-Raptorlake.patch
Added
@@ -0,0 +1,32 @@ +From 3dbe28984e0f9c24d6670cfba42983bc32c08b0a Mon Sep 17 00:00:00 2001 +From: "Cui, Lili" <lili.cui@intel.com> +Date: Mon, 14 Aug 2023 02:06:00 +0000 +Subject: PATCH 27/28 x86: Update model values for Raptorlake. + +Update model values for Raptorlake according to SDM. + +gcc/ChangeLog + + * common/config/i386/cpuinfo.h (get_intel_cpu): Add model value 0xba + to Raptorlake. + +(cherry picked from commit 614052dd4ea083e086712809c754ffebd9361316) +--- + gcc/common/config/i386/cpuinfo.h | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/gcc/common/config/i386/cpuinfo.h b/gcc/common/config/i386/cpuinfo.h +index f9bcb6fad..da1568fd1 100644 +--- a/gcc/common/config/i386/cpuinfo.h ++++ b/gcc/common/config/i386/cpuinfo.h +@@ -509,6 +509,7 @@ get_intel_cpu (struct __processor_model *cpu_model, + case 0x9a: + /* Alder Lake. */ + case 0xb7: ++ case 0xba: + case 0xbf: + /* Raptor Lake. */ + case 0xaa: +-- +2.31.1 +
View file
_service:tar_scm:0282-Fix-target_clone-arch-graniterapids-d.patch
Added
@@ -0,0 +1,159 @@ +From 8db0f3cd29bd7f937ffa01dd1100360fbbf5b6f4 Mon Sep 17 00:00:00 2001 +From: liuhongt <hongtao.liu@intel.com> +Date: Tue, 22 Aug 2023 18:18:31 +0800 +Subject: PATCH 28/28 Fix target_clone ("arch=graniterapids-d") + +Both "graniterapid-d" and "graniterapids" are attached with +PROCESSOR_GRANITERAPID in processor_alias_table but mapped to +different __cpu_subtype in get_intel_cpu. + +And get_builtin_code_for_version will try to match the first +PROCESSOR_GRANITERAPIDS in processor_alias_table which maps to +"granitepraids" here. + +861 else if (new_target->arch_specified && new_target->arch > 0) +1862 for (i = 0; i < pta_size; i++) +1863 if (processor_alias_tablei.processor == new_target->arch) +1864 { +1865 const pta *arch_info = &processor_alias_tablei; +1866 switch (arch_info->priority) +1867 { +1868 default: +1869 arg_str = arch_info->name; + +This mismatch makes dispatch_function_versions check the preidcate +of__builtin_cpu_is ("graniterapids") for "graniterapids-d" and causes +the issue. +The patch explicitly adds PROCESSOR_GRANITERAPIDS_D to make a distinction. + +For "alderlake","raptorlake", "meteorlake" they share same isa, cost, +tuning, and mapped to the same __cpu_type/__cpu_subtype in +get_intel_cpu, so no need to add PROCESSOR_RAPTORLAKE and others. + +gcc/ChangeLog: + + * common/config/i386/i386-common.cc (processor_names): Add new + member graniterapids-s. + * config/i386/i386-options.cc (processor_alias_table): Update + table with and PROCESSOR_GRANITERAPIDS_D. + (m_GRANITERAPID_D): New macro. + (m_CORE_AVX512): Add m_GRANITERAPIDS_D. + (processor_cost_table): Add icelake_cost for + PROCESSOR_GRANITERAPIDS_D. + * config/i386/i386.h (enum processor_type): Add new member + PROCESSOR_GRANITERAPIDS_D. + * config/i386/i386-c.cc (ix86_target_macros_internal): Handle + PROCESSOR_GRANITERAPIDS_D +--- + gcc/common/config/i386/i386-common.cc | 6 ++++-- + gcc/config/i386/i386-c.cc | 8 ++++++++ + gcc/config/i386/i386-options.cc | 4 +++- + gcc/config/i386/i386.h | 3 ++- + 4 files changed, 17 insertions(+), 4 deletions(-) + +diff --git a/gcc/common/config/i386/i386-common.cc b/gcc/common/config/i386/i386-common.cc +index 28f468f48..bec6801ce 100644 +--- a/gcc/common/config/i386/i386-common.cc ++++ b/gcc/common/config/i386/i386-common.cc +@@ -1873,6 +1873,7 @@ const char *const processor_names = + "alderlake", + "rocketlake", + "graniterapids", ++ "graniterapids-d", + "intel", + "geode", + "k6", +@@ -1993,8 +1994,9 @@ const pta processor_alias_table = + M_CPU_SUBTYPE (INTEL_COREI7_ALDERLAKE), P_PROC_AVX2}, + {"graniterapids", PROCESSOR_GRANITERAPIDS, CPU_HASWELL, PTA_GRANITERAPIDS, + M_CPU_SUBTYPE (INTEL_COREI7_GRANITERAPIDS), P_PROC_AVX512F}, +- {"graniterapids-d", PROCESSOR_GRANITERAPIDS, CPU_HASWELL, PTA_GRANITERAPIDS_D, +- M_CPU_SUBTYPE (INTEL_COREI7_GRANITERAPIDS_D), P_PROC_AVX512F}, ++ {"graniterapids-d", PROCESSOR_GRANITERAPIDS_D, CPU_HASWELL, ++ PTA_GRANITERAPIDS_D, M_CPU_SUBTYPE (INTEL_COREI7_GRANITERAPIDS_D), ++ P_PROC_AVX512F}, + {"bonnell", PROCESSOR_BONNELL, CPU_ATOM, PTA_BONNELL, + M_CPU_TYPE (INTEL_BONNELL), P_PROC_SSSE3}, + {"atom", PROCESSOR_BONNELL, CPU_ATOM, PTA_BONNELL, +diff --git a/gcc/config/i386/i386-c.cc b/gcc/config/i386/i386-c.cc +index 5e0ac278c..49f0db2b8 100644 +--- a/gcc/config/i386/i386-c.cc ++++ b/gcc/config/i386/i386-c.cc +@@ -246,6 +246,10 @@ ix86_target_macros_internal (HOST_WIDE_INT isa_flag, + def_or_undef (parse_in, "__graniterapids"); + def_or_undef (parse_in, "__graniterapids__"); + break; ++ case PROCESSOR_GRANITERAPIDS_D: ++ def_or_undef (parse_in, "__graniterapids_d"); ++ def_or_undef (parse_in, "__graniterapids_d__"); ++ break; + case PROCESSOR_ALDERLAKE: + def_or_undef (parse_in, "__alderlake"); + def_or_undef (parse_in, "__alderlake__"); +@@ -254,6 +258,7 @@ ix86_target_macros_internal (HOST_WIDE_INT isa_flag, + def_or_undef (parse_in, "__rocketlake"); + def_or_undef (parse_in, "__rocketlake__"); + break; ++ + /* use PROCESSOR_max to not set/unset the arch macro. */ + case PROCESSOR_max: + break; +@@ -426,6 +431,9 @@ ix86_target_macros_internal (HOST_WIDE_INT isa_flag, + case PROCESSOR_GRANITERAPIDS: + def_or_undef (parse_in, "__tune_graniterapids__"); + break; ++ case PROCESSOR_GRANITERAPIDS_D: ++ def_or_undef (parse_in, "__tune_graniterapids_d__"); ++ break; + case PROCESSOR_INTEL: + case PROCESSOR_GENERIC: + break; +diff --git a/gcc/config/i386/i386-options.cc b/gcc/config/i386/i386-options.cc +index 7efd25084..86932d719 100644 +--- a/gcc/config/i386/i386-options.cc ++++ b/gcc/config/i386/i386-options.cc +@@ -128,10 +128,11 @@ along with GCC; see the file COPYING3. If not see + #define m_ALDERLAKE (HOST_WIDE_INT_1U<<PROCESSOR_ALDERLAKE) + #define m_ROCKETLAKE (HOST_WIDE_INT_1U<<PROCESSOR_ROCKETLAKE) + #define m_GRANITERAPIDS (HOST_WIDE_INT_1U<<PROCESSOR_GRANITERAPIDS) ++#define m_GRANITERAPIDS_D (HOST_WIDE_INT_1U<<PROCESSOR_GRANITERAPIDS_D) + #define m_CORE_AVX512 (m_SKYLAKE_AVX512 | m_CANNONLAKE \ + | m_ICELAKE_CLIENT | m_ICELAKE_SERVER | m_CASCADELAKE \ + | m_TIGERLAKE | m_COOPERLAKE | m_SAPPHIRERAPIDS \ +- | m_ROCKETLAKE | m_GRANITERAPIDS) ++ | m_ROCKETLAKE | m_GRANITERAPIDS | m_GRANITERAPIDS_D) + #define m_CORE_AVX2 (m_HASWELL | m_SKYLAKE | m_CORE_AVX512) + #define m_CORE_ALL (m_CORE2 | m_NEHALEM | m_SANDYBRIDGE | m_CORE_AVX2) + #define m_GOLDMONT (HOST_WIDE_INT_1U<<PROCESSOR_GOLDMONT) +@@ -764,6 +765,7 @@ static const struct processor_costs *processor_cost_table = + &alderlake_cost, + &icelake_cost, + &icelake_cost, ++ &icelake_cost, + &intel_cost, + &geode_cost, + &k6_cost, +diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h +index eda3e5e5b..5052f878d 100644 +--- a/gcc/config/i386/i386.h ++++ b/gcc/config/i386/i386.h +@@ -2216,7 +2216,7 @@ extern int const svr4_dbx_register_mapFIRST_PSEUDO_REGISTER; + #define DEFAULT_LARGE_SECTION_THRESHOLD 65536 +  + /* Which processor to tune code generation for. These must be in sync +- with processor_target_table in i386.cc. */ ++ with processor_cost_table in i386-options.cc. */ + + enum processor_type + { +@@ -2251,6 +2251,7 @@ enum processor_type + PROCESSOR_ALDERLAKE, + PROCESSOR_ROCKETLAKE, + PROCESSOR_GRANITERAPIDS, ++ PROCESSOR_GRANITERAPIDS_D, + PROCESSOR_INTEL, + PROCESSOR_GEODE, + PROCESSOR_K6, +-- +2.31.1 +
View file
_service:tar_scm:0283-i386-Change-prefetchi-output-template.patch
Added
@@ -0,0 +1,62 @@ +From 59e07c6c77dcc92d274ca6156b8916f66035dce8 Mon Sep 17 00:00:00 2001 +From: Haochen Jiang <haochen.jiang@intel.com> +Date: Mon, 22 Jul 2024 14:06:18 +0800 +Subject: PATCH 1/2 i386: Change prefetchi output template + +For prefetchi instructions, RIP-relative address is explicitly mentioned +for operand and assembler obeys that rule strictly. This makes +instruction like: + + prefetchit0 bar + +got illegal for assembler, which should be a broad usage for prefetchi. + +Change to %a to explicitly add (%rip) after function label to make it +legal in assembler so that it could pass to linker to get the real address. + +gcc/ChangeLog: + + * config/i386/i386.md (prefetchi): Change to %a. + +gcc/testsuite/ChangeLog: + + * gcc.target/i386/prefetchi-1.c: Check (%rip). + +Reference: +https://gcc.gnu.org/git/?p=gcc.git;a=commit;h= +062e46a813799684c6f900815fd22451d6294ae1 +--- + gcc/config/i386/i386.md | 2 +- + gcc/testsuite/gcc.target/i386/prefetchi-1.c | 4 ++-- + 2 files changed, 3 insertions(+), 3 deletions(-) + +diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md +index f08c2cfb1..1b733008e 100644 +--- a/gcc/config/i386/i386.md ++++ b/gcc/config/i386/i386.md +@@ -22917,7 +22917,7 @@ + "TARGET_PREFETCHI && TARGET_64BIT" + { + static const char * const patterns2 = { +- "prefetchit1\t%0", "prefetchit0\t%0" ++ "prefetchit1\t%a0", "prefetchit0\t%a0" + }; + + int locality = INTVAL (operands1); +diff --git a/gcc/testsuite/gcc.target/i386/prefetchi-1.c b/gcc/testsuite/gcc.target/i386/prefetchi-1.c +index 80f25e70e..03dfdc55e 100644 +--- a/gcc/testsuite/gcc.target/i386/prefetchi-1.c ++++ b/gcc/testsuite/gcc.target/i386/prefetchi-1.c +@@ -1,7 +1,7 @@ + /* { dg-do compile { target { ! ia32 } } } */ + /* { dg-options "-mprefetchi -O2" } */ +-/* { dg-final { scan-assembler-times "\ \\t\+prefetchit0\ \\t\+" 2 } } */ +-/* { dg-final { scan-assembler-times "\ \\t\+prefetchit1\ \\t\+" 2 } } */ ++/* { dg-final { scan-assembler-times "\ \\t\+prefetchit0\ \\t\+bar\\(%rip\\)" 2 } } */ ++/* { dg-final { scan-assembler-times "\ \\t\+prefetchit1\ \\t\+bar\\(%rip\\)" 2 } } */ + + #include <x86intrin.h> + +-- +2.31.1 +
View file
_service:tar_scm:0284-i386-Add-non-optimize-prefetchi-intrins.patch
Added
@@ -0,0 +1,92 @@ +From c19afda0ee549d294fd5714c63db24bcd4570d03 Mon Sep 17 00:00:00 2001 +From: Haochen Jiang <haochen.jiang@intel.com> +Date: Thu, 25 Jul 2024 16:16:05 +0800 +Subject: PATCH 2/2 i386: Add non-optimize prefetchi intrins + +Under -O0, with the "newly" introduced intrins, the variable will be +transformed as mem instead of the origin symbol_ref. The compiler will +then treat the operand as invalid and turn the operation into nop, which +is not expected. Use macro for non-optimize to keep the variable as +symbol_ref just as how prefetch intrin does. + +gcc/ChangeLog: + + * config/i386/prfchiintrin.h + (_m_prefetchit0): Add macro for non-optimized option. + (_m_prefetchit1): Ditto. + +gcc/testsuite/ChangeLog: + + * gcc.target/i386/prefetchi-1b.c: New test. + +Reference: +https://gcc.gnu.org/git/?p=gcc.git;a=commit;h= +b4524c4430ba9771265bd9fc31e69a3f35dfe117 +--- + gcc/config/i386/prfchiintrin.h | 9 +++++++ + gcc/testsuite/gcc.target/i386/prefetchi-1b.c | 26 ++++++++++++++++++++ + 2 files changed, 35 insertions(+) + create mode 100644 gcc/testsuite/gcc.target/i386/prefetchi-1b.c + +diff --git a/gcc/config/i386/prfchiintrin.h b/gcc/config/i386/prfchiintrin.h +index 06deef488..1e3d42dc3 100644 +--- a/gcc/config/i386/prfchiintrin.h ++++ b/gcc/config/i386/prfchiintrin.h +@@ -30,6 +30,7 @@ + + #ifdef __x86_64__ + ++#ifdef __OPTIMIZE__ + extern __inline void + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) + _m_prefetchit0 (void* __P) +@@ -43,6 +44,14 @@ _m_prefetchit1 (void* __P) + { + __builtin_ia32_prefetchi (__P, 2); + } ++#else ++#define _m_prefetchit0(P) \ ++ __builtin_ia32_prefetchi(P, 3) ++ ++#define _m_prefetchit1(P) \ ++ __builtin_ia32_prefetchi(P, 2) ++ ++#endif + + #endif + +diff --git a/gcc/testsuite/gcc.target/i386/prefetchi-1b.c b/gcc/testsuite/gcc.target/i386/prefetchi-1b.c +new file mode 100644 +index 000000000..93139554d +--- /dev/null ++++ b/gcc/testsuite/gcc.target/i386/prefetchi-1b.c +@@ -0,0 +1,26 @@ ++/* { dg-do compile { target { ! ia32 } } } */ ++/* { dg-options "-mprefetchi -O0" } */ ++/* { dg-final { scan-assembler-times "\ \\t\+prefetchit0\ \\t\+bar\\(%rip\\)" 1 } } */ ++/* { dg-final { scan-assembler-times "\ \\t\+prefetchit1\ \\t\+bar\\(%rip\\)" 1 } } */ ++ ++#include <x86intrin.h> ++ ++int ++bar (int a) ++{ ++ return a + 1; ++} ++ ++int ++foo1 (int b) ++{ ++ _m_prefetchit0 (bar); ++ return bar (b) + 1; ++} ++ ++int ++foo2 (int b) ++{ ++ _m_prefetchit1 (bar); ++ return bar (b) + 1; ++} +-- +2.31.1 +
View file
_service:tar_scm:0285-SME-Recover-hip09-and-hip11-in-aarch64-cores.def.patch
Added
@@ -0,0 +1,32 @@ +From 239f0637307ff2f6afb1473e99d0bb0eaf8946b2 Mon Sep 17 00:00:00 2001 +From: xiezhiheng <xiezhiheng@huawei.com> +Date: Fri, 23 Aug 2024 15:37:17 +0800 +Subject: PATCH 154/157 SME Recover hip09 and hip11 in aarch64-cores.def + +--- + gcc/config/aarch64/aarch64-cores.def | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/gcc/config/aarch64/aarch64-cores.def b/gcc/config/aarch64/aarch64-cores.def +index f069c81cf..3337fd1a0 100644 +--- a/gcc/config/aarch64/aarch64-cores.def ++++ b/gcc/config/aarch64/aarch64-cores.def +@@ -130,6 +130,7 @@ AARCH64_CORE("a64fx", a64fx, a64fx, V8_2A, (F16, SVE), a64fx, 0x46, 0x001, -1) + + /* HiSilicon ('H') cores. */ + AARCH64_CORE("tsv110", tsv110, tsv110, V8_2A, (CRYPTO, F16), tsv110, 0x48, 0xd01, -1) ++AARCH64_CORE("hip09", hip09, hip09, V8_5A, (SVE, I8MM, F32MM, F64MM, PROFILE, PREDRES), hip09, 0x48, 0xd02, 0x0) + + /* ARMv8.3-A Architecture Processors. */ + +@@ -171,6 +172,7 @@ AARCH64_CORE("cortex-a710", cortexa710, cortexa57, V9A, (SVE2_BITPERM, MEMTAG, + AARCH64_CORE("cortex-x2", cortexx2, cortexa57, V9A, (SVE2_BITPERM, MEMTAG, I8MM, BF16), neoversen2, 0x41, 0xd48, -1) + + AARCH64_CORE("neoverse-n2", neoversen2, cortexa57, V9A, (I8MM, BF16, SVE2_BITPERM, RNG, MEMTAG, PROFILE), neoversen2, 0x41, 0xd49, -1) ++AARCH64_CORE("hip11", hip11, hip11, V8_5A, (SVE, SVE2, F16), hip11, 0x48, 0xd22, -1) + + AARCH64_CORE("demeter", demeter, cortexa57, V9A, (I8MM, BF16, SVE2_BITPERM, RNG, MEMTAG, PROFILE), neoversev2, 0x41, 0xd4f, -1) + AARCH64_CORE("neoverse-v2", neoversev2, cortexa57, V9A, (I8MM, BF16, SVE2_BITPERM, RNG, MEMTAG, PROFILE), neoversev2, 0x41, 0xd4f, -1) +-- +2.33.0 +
View file
_service:tar_scm:0286-Try-to-use-AI-model-to-guide-optimization.patch
Added
@@ -0,0 +1,671 @@ +diff --git a/gcc/Makefile.in b/gcc/Makefile.in +index fcfa54697..f42aeb8e8 100644 +--- a/gcc/Makefile.in ++++ b/gcc/Makefile.in +@@ -1449,6 +1449,7 @@ OBJS = \ + inchash.o \ + incpath.o \ + init-regs.o \ ++ ipa-hardware-detection.o \ + internal-fn.o \ + ipa-struct-reorg/ipa-struct-reorg.o \ + ipa-cp.o \ +diff --git a/gcc/common.opt b/gcc/common.opt +index fd98382fa..99e626641 100644 +--- a/gcc/common.opt ++++ b/gcc/common.opt +@@ -185,6 +185,9 @@ const char *main_input_basename + Variable + int main_input_baselength + ++Variable ++bool optimize_maximum ++ + ; The base name used for auxiliary output files. + ; dump_base_name minus dump_base_ext. + +@@ -469,6 +472,10 @@ Ofast + Common Optimization + Optimize for speed disregarding exact standards compliance. + ++Om ++Common Optimization ++Optimize for maximizing radical optimization. ++ + Og + Common Optimization + Optimize for debugging experience rather than speed or size. +diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc +index 309ecc3d9..ad853af9a 100644 +--- a/gcc/config/aarch64/aarch64.cc ++++ b/gcc/config/aarch64/aarch64.cc +@@ -18637,6 +18637,134 @@ aarch64_sve_adjust_stmt_cost (class vec_info *vinfo, vect_cost_for_stmt kind, + return stmt_cost; + } + ++/* Check whether in C language or LTO with only C language. */ ++extern bool lang_c_p (void); ++ ++static void ++override_C_optimize_options (struct gcc_options *opts) ++{ ++ opts->x_flag_ipa_reorder_fields = 1; ++ opts->x_flag_ipa_struct_reorg = 6; ++ opts->x_struct_layout_optimize_level = 6; ++ opts->x_flag_gnu89_inline = 1; ++ opts->x_flag_ccmp2 = 1; ++ opts->x_flag_array_widen_compare = 1; ++ opts->x_flag_convert_minmax = 1; ++ opts->x_flag_tree_slp_transpose_vectorize = 1; ++ opts->x_param_max_inline_insns_auto = 64; ++ opts->x_param_inline_unit_growth = 96; ++} ++ ++/* Check whether in CPP language or LTO with only CPP language. */ ++static bool ++lang_cpp_p (void) ++{ ++ const char *language_string = lang_hooks.name; ++ if (!language_string) ++ { ++ return false; ++ } ++ if (lang_GNU_CXX ()) ++ { ++ return true; ++ } ++ else if (strcmp (language_string, "GNU GIMPLE") == 0) // for LTO check ++ { ++ unsigned i = 0; ++ tree t = NULL_TREE; ++ FOR_EACH_VEC_SAFE_ELT (all_translation_units, i, t) ++ { ++ language_string = TRANSLATION_UNIT_LANGUAGE (t); ++ if (language_string == NULL ++ || strncmp (lang_hooks.name, "GNU C++", 7)) ++ { ++ return false; ++ } ++ } ++ return true; ++ } ++ return false; ++} ++ ++static void ++override_CPP_optimize_options (struct gcc_options *opts) ++{ ++ opts->x_flag_finite_loops = 1; ++ opts->x_flag_omit_frame_pointer = 1; ++ opts->x_flag_sized_deallocation = 0; ++ opts->x_flag_loop_elim = 1; ++ opts->x_flag_convert_minmax = 1; ++ opts->x_param_early_inlining_insns = 256; ++ opts->x_param_max_inline_insns_auto = 128; ++ opts->x_param_inline_unit_growth = 256; ++ opts->x_flag_cmlt_arith = 1; ++} ++ ++static void ++override_optimize_options_1 (struct gcc_options *opts) ++{ ++ opts->x_flag_split_ldp_stp = 1; ++ opts->x_flag_if_conversion_gimple = 1; ++ opts->x_flag_ifcvt_allow_complicated_cmps = 1; ++ opts->x_param_ifcvt_allow_register_renaming = 2; ++ opts->x_param_max_rtl_if_conversion_unpredictable_cost = 48; ++ opts->x_param_max_rtl_if_conversion_predictable_cost = 48; ++} ++ ++static void ++override_Fortran_optimize_options (struct gcc_options *opts) ++{ ++ opts->x_flag_unroll_loops = 1; ++ opts->x_flag_unconstrained_commons = 1; ++ opts->x_param_ipa_cp_eval_threshold = 1; ++ opts->x_param_ipa_cp_unit_growth = 80; ++ opts->x_param_ipa_cp_max_recursive_depth = 8; ++ opts->x_param_large_unit_insns = 30000; ++ opts->x_flag_ira_loop_pressure = 1; ++ opts->x_flag_inline_functions_called_once = 0; ++ opts->x_flag_ira_algorithm = IRA_ALGORITHM_PRIORITY; ++ opts->x_flag_delayed_branch = 1; ++ opts->x_flag_gcse_las = 1; ++ opts->x_flag_gcse_sm = 1; ++ opts->x_flag_ipa_pta = 1; ++ opts->x_flag_reorder_blocks_and_partition = 1; ++ opts->x_flag_reorder_blocks = 1; ++ opts->x_flag_crypto_accel_aes = 1; ++ opts->x_param_flexible_seg_len = 1; ++} ++ ++/* Reset the optimize option. ++ After checking the model result, this function can ++ reset the more appropriate options. */ ++static void ++reset_machine_option (struct gcc_options *opts) ++{ ++ if (!(opts->x_optimize_maximum) ++ || strstr (opts->x_aarch64_tune_string, "hip09") == NULL) ++ { ++ return; ++ } ++ ++ const char *ai_infer_level = getenv ("AI_INFER_LEVEL"); ++ if (ai_infer_level) ++ { ++ override_optimize_options_1 (opts); ++ if (lang_c_p ()) ++ { ++ override_C_optimize_options (opts); ++ } ++ else if (lang_cpp_p ()) ++ { ++ override_CPP_optimize_options (opts); ++ } ++ else if (lang_GNU_Fortran ()) ++ { ++ override_Fortran_optimize_options (opts); ++ } ++ } ++} ++ ++ + /* STMT_COST is the cost calculated for STMT_INFO, which has cost kind KIND + and which when vectorized would operate on vector type VECTYPE. Add the + cost of any embedded operations. */ +@@ -20089,6 +20217,7 @@ aarch64_override_options_internal (struct gcc_options *opts) + && opts->x_optimize >= aarch64_tune_params.prefetch->default_opt_level) + opts->x_flag_prefetch_loop_arrays = 1; + ++ reset_machine_option (opts); + aarch64_override_options_after_change_1 (opts); + } + +diff --git a/gcc/ipa-hardware-detection.cc b/gcc/ipa-hardware-detection.cc +new file mode 100644 +index 000000000..8085a8c65 +--- /dev/null ++++ b/gcc/ipa-hardware-detection.cc +@@ -0,0 +1,243 @@ ++/* Hardware Detection. ++ Copyright (C) 2024-2024 Free Software Foundation, Inc. ++This file is part of GCC. ++GCC is free software; you can redistribute it and/or modify it ++under the terms of the GNU General Public License as published by the ++Free Software Foundation; either version 3, or (at your option) any ++later version. ++GCC is distributed in the hope that it will be useful, but WITHOUT ++ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++for more details. ++You should have received a copy of the GNU General Public License ++along with GCC; see the file COPYING3. If not see ++<http://www.gnu.org/licenses/>. */ ++ ++#include "config.h" ++#include "system.h" ++#include "coretypes.h" ++#include "backend.h" ++#include "target.h" ++#include "tree.h" ++#include "gimple.h" ++#include "tree-pass.h" ++#include "gimple-ssa.h" ++#include "tree-pretty-print.h" ++#include "fold-const.h" ++#include "gimplify.h" ++#include "gimple-iterator.h" ++#include "tree-ssa-loop-manip.h" ++#include "tree-ssa-loop.h" ++#include "ssa.h" ++#include "tree-into-ssa.h" ++#include "cfganal.h" ++#include "cfgloop.h" ++#include "gimple-pretty-print.h" ++#include "tree-cfg.h" ++#include "cgraph.h" ++#include "print-tree.h" ++#include "cfghooks.h" ++#include "gimple-fold.h" ++#include "gimplify-me.h" ++ ++namespace { ++ ++/* Build a binary operation and gimplify it. Emit code before GSI. ++ Return the gimple_val holding the result. */ ++ ++static tree ++gimplify_build2 (gimple_stmt_iterator *gsi, enum tree_code code, ++ tree type, tree a, tree b) ++{ ++ tree ret; ++ ++ ret = fold_build2_loc (gimple_location (gsi_stmt (*gsi)), code, type, a, b); ++ return force_gimple_operand_gsi (gsi, ret, true, NULL, true, ++ GSI_SAME_STMT); ++} ++ ++static basic_block ++create_abort_bb (basic_block last_bb) ++{ ++ basic_block bb = create_empty_bb (last_bb); ++ if (last_bb->loop_father != NULL) ++ { ++ add_bb_to_loop (bb, last_bb->loop_father); ++ loops_state_set (LOOPS_NEED_FIXUP); ++ } ++ gimple_stmt_iterator gsi = gsi_last_bb (bb); ++ tree fn = builtin_decl_implicit (BUILT_IN_ABORT); ++ gimple *g = gimple_build_call (fn, 0); ++ gsi_insert_after (&gsi, g, GSI_NEW_STMT); ++ return bb; ++} ++ ++static basic_block ++create_part_bb (basic_block last_bb, tree part_base) ++{ ++ basic_block bb = create_empty_bb (last_bb); ++ if (last_bb->loop_father != NULL) ++ { ++ add_bb_to_loop (bb, last_bb->loop_father); ++ loops_state_set (LOOPS_NEED_FIXUP); ++ } ++ gimple_stmt_iterator gsi = gsi_last_bb (bb); ++ gsi_insert_after (&gsi, gimple_build_nop (), GSI_NEW_STMT); ++ /* This number is used to efficiently identify the supported part range. */ ++ tree part_cond = gimplify_build2 ( ++ &gsi, PLUS_EXPR, unsigned_type_node, part_base, ++ build_int_cst (unsigned_type_node, 4294963967)); ++ gcond *cond = gimple_build_cond (LE_EXPR, part_cond, ++ build_int_cst (unsigned_type_node, 2), ++ NULL_TREE, NULL_TREE); ++ gimple_set_location (cond, input_location); ++ gsi_insert_before (&gsi, cond, GSI_SAME_STMT); ++ gsi_remove (&gsi, true); ++ return bb; ++} ++ ++static void ++create_detection_bb () ++{ ++ edge old_e = single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun)); ++ basic_block ret_bb = old_e->dest; ++ ++ basic_block detection_bb = create_empty_bb (ENTRY_BLOCK_PTR_FOR_FN (cfun)); ++ if (ENTRY_BLOCK_PTR_FOR_FN (cfun)->loop_father != NULL) ++ { ++ add_bb_to_loop (detection_bb, ENTRY_BLOCK_PTR_FOR_FN (cfun)->loop_father); ++ loops_state_set (LOOPS_NEED_FIXUP); ++ } ++ tree cpuid_decl = build_decl (input_location, VAR_DECL, ++ get_identifier ("cpuid"), unsigned_type_node); ++ add_local_decl (cfun, cpuid_decl); ++ ++ gimple_stmt_iterator gsi = gsi_last_bb (detection_bb); ++ vec<tree, va_gc> *outputs = NULL; ++ tree purpose = build_string (strlen ("=r"), "=r"); ++ tree output = build_tree_list ( ++ build_tree_list (NULL_TREE, purpose), cpuid_decl); ++ vec_safe_push (outputs, output); ++ gasm *asm_stmt = gimple_build_asm_vec ( ++ "mrs %0, MIDR_EL1", NULL, outputs, NULL, NULL); ++ gsi_insert_after (&gsi, asm_stmt, GSI_NEW_STMT); ++ gsi_insert_after (&gsi, gimple_build_nop (), GSI_NEW_STMT); ++ ++ tree implementer = gimplify_build2 ( ++ &gsi, RSHIFT_EXPR, unsigned_type_node, cpuid_decl, ++ build_int_cst (unsigned_type_node, 24)); ++ tree part_base = gimplify_build2 ( ++ &gsi, RSHIFT_EXPR, unsigned_type_node, cpuid_decl, ++ build_int_cst (unsigned_type_node, 4)); ++ tree part = gimplify_build2 ( ++ &gsi, BIT_AND_EXPR, unsigned_type_node, part_base, ++ build_int_cst (unsigned_type_node, 4095)); ++ gcond *implementer_cond = gimple_build_cond ( ++ EQ_EXPR, implementer, ++ build_int_cst (unsigned_type_node, 72), ++ NULL_TREE, NULL_TREE); ++ gimple_set_location (implementer_cond, input_location); ++ gsi_insert_before (&gsi, implementer_cond, GSI_SAME_STMT); ++ gsi_remove (&gsi, true); ++ ++ basic_block part_bb = create_part_bb (detection_bb, part); ++ basic_block abort_bb = create_abort_bb (part_bb); ++ ++ remove_edge_raw (old_e); ++ make_single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun), ++ detection_bb, EDGE_FALLTHRU); ++ edge etrue = make_edge (detection_bb, part_bb, EDGE_TRUE_VALUE); ++ etrue->probability = profile_probability::likely (); ++ edge efalse = make_edge (detection_bb, abort_bb, EDGE_FALSE_VALUE); ++ efalse->probability = profile_probability::unlikely (); ++ edge part_true = make_edge (part_bb, ret_bb, EDGE_TRUE_VALUE); ++ part_true->probability = profile_probability::likely (); ++ edge part_false = make_edge (part_bb, abort_bb, EDGE_FALSE_VALUE); ++ part_false->probability = profile_probability::unlikely (); ++ make_single_succ_edge (abort_bb, ret_bb, EDGE_FALLTHRU); ++ if (dom_info_available_p (CDI_DOMINATORS)) ++ { ++ set_immediate_dominator (CDI_DOMINATORS, part_bb, detection_bb); ++ set_immediate_dominator (CDI_DOMINATORS, ret_bb, detection_bb); ++ set_immediate_dominator (CDI_DOMINATORS, abort_bb, detection_bb); ++ } ++} ++ ++const pass_data pass_data_ipa_hardware_detection = ++{ ++ SIMPLE_IPA_PASS, ++ "hardware_detection", ++ OPTGROUP_NONE, ++ TV_IPA_HARDWARE_DETECTION, ++ (PROP_cfg | PROP_ssa), ++ 0, ++ 0, ++ 0, ++ (TODO_update_ssa | TODO_verify_all) ++}; ++ ++class pass_ipa_hardware_detection : public simple_ipa_opt_pass ++{ ++public: ++ pass_ipa_hardware_detection (gcc::context *ctxt) ++ : simple_ipa_opt_pass (pass_data_ipa_hardware_detection, ctxt) ++ {} ++ ++ virtual bool gate (function *); ++ virtual unsigned int execute (function *); ++}; // class pass_ipa_hardware_detection ++ ++bool ++pass_ipa_hardware_detection::gate (function *) ++{ ++ const char *ai_infer_level = getenv ("AI_INFER_LEVEL"); ++ return (ai_infer_level ++ && optimize_maximum > 0 ++ /* Only enable in lto or whole_program. */ ++ && (in_lto_p || flag_whole_program)); ++} ++ ++unsigned int ++pass_ipa_hardware_detection::execute (function *) ++{ ++ unsigned int ret = 0; ++ cgraph_node *cnode; ++ FOR_EACH_FUNCTION (cnode) ++ { ++ if (!cnode->real_symbol_p ()) ++ { ++ continue; ++ } ++ if (cnode->definition) ++ { ++ if (!cnode->has_gimple_body_p () || cnode->inlined_to) ++ continue; ++ ++ cnode->get_body (); ++ function *fn = DECL_STRUCT_FUNCTION (cnode->decl); ++ if (!fn) ++ continue; ++ ++ if (DECL_NAME (cnode->decl) ++ && MAIN_NAME_P (DECL_NAME (cnode->decl))) ++ { ++ push_cfun (fn); ++ calculate_dominance_info (CDI_DOMINATORS); ++ ++ create_detection_bb (); ++ ++ cgraph_edge::rebuild_edges (); ++ free_dominance_info (CDI_DOMINATORS); ++ pop_cfun (); ++ } ++ } ++ } ++ return ret; ++} ++} // anon namespace ++ ++simple_ipa_opt_pass * ++make_pass_ipa_hardware_detection (gcc::context *ctxt) ++{ ++ return new pass_ipa_hardware_detection (ctxt); ++} +diff --git a/gcc/opts-common.cc b/gcc/opts-common.cc +index 489a6e02a..12c3f7299 100644 +--- a/gcc/opts-common.cc ++++ b/gcc/opts-common.cc +@@ -992,6 +992,158 @@ opts_concat (const char *first, ...) + return newstr; + } + ++typedef int64_t (*run_ai_model_func)(int, const char **, ++ const char *, int, int64_t *); ++#define PTR_UNION_TYPE(TOTYPE) union { void *_q; TOTYPE _nq; } ++#define PTR_UNION_AS_VOID_PTR(NAME) (NAME._q) ++#define PTR_UNION_AS_CAST_PTR(NAME) (NAME._nq) ++ ++static int64_t ++ai_infer_optimization (int argc, const char **argv, ++ const char *mcpu_option, ++ int argc_hw, int64_t *argv_hw) ++{ ++ /* Load dependent AI-framework libraries. */ ++ void *onnxruntime_lib_handle = NULL; ++ const char *onnxruntime_lib_path = "libonnxruntime.so"; ++ ++ onnxruntime_lib_handle = dlopen (onnxruntime_lib_path, ++ RTLD_LAZY | RTLD_GLOBAL); ++ if (!onnxruntime_lib_handle) ++ { ++ return -1; ++ } ++ ++ void *ai4c_lib_handle = NULL; ++ const char *ai4c_lib_path = "libONNXRunner.so"; ++ ++ ai4c_lib_handle = dlopen (ai4c_lib_path, RTLD_LAZY | RTLD_GLOBAL); ++ if (!ai4c_lib_handle) ++ { ++ return -1; ++ } ++ ++ /* Clear any existing error. */ ++ dlerror (); ++ ++ /* Run AI4Compiler model. */ ++ if (ai4c_lib_handle == NULL || onnxruntime_lib_handle == NULL) ++ { ++ return -1; ++ } ++ ++ run_ai_model_func run_ai_model; ++ PTR_UNION_TYPE (run_ai_model_func) run_ai_model_func_union; ++ PTR_UNION_AS_VOID_PTR (run_ai_model_func_union) ++ = dlsym (ai4c_lib_handle, "runONNXModelOptimizer"); ++ run_ai_model = PTR_UNION_AS_CAST_PTR (run_ai_model_func_union); ++ if (!run_ai_model) ++ { ++ dlclose (ai4c_lib_handle); ++ dlclose (onnxruntime_lib_handle); ++ return -1; ++ } ++ int64_t model_pred = (*run_ai_model) (argc, argv, ++ mcpu_option, argc_hw, argv_hw); ++ ++ if (ai4c_lib_handle) ++ dlclose (ai4c_lib_handle); ++ ++ if (onnxruntime_lib_handle) ++ dlclose (onnxruntime_lib_handle); ++ ++ if (model_pred == 1) ++ putenv ("AI_INFER_LEVEL=1"); ++ return model_pred; ++} ++ ++static int ++handle_lto_option (unsigned int lang_mask, ++ unsigned int num_decoded_options, ++ unsigned int argc, ++ const char **argv, ++ struct cl_decoded_option *&opt_array) ++{ ++ int ret = 0; ++ char *lan = ""; ++ char *compiler = xstrdup (argv0); ++ lan = strrchr (compiler, '/'); ++ if (lan != NULL) ++ lan ++; ++ else ++ lan = compiler; ++ if (strstr (lan, "gcc") != NULL) ++ { ++ opt_array = XRESIZEVEC (struct cl_decoded_option, opt_array, argc + 2); ++ const char* lto_flag = "-flto=8"; ++ decode_cmdline_option (<o_flag, lang_mask, ++ &opt_arraynum_decoded_options); ++ ret++; ++ const char* ltopartition_flag = "-flto-partition=one"; ++ decode_cmdline_option (<opartition_flag, lang_mask, ++ &opt_arraynum_decoded_options + 1); ++ ret++; ++ } ++ else if (strstr (lan, "g++") != NULL ++ || strstr (lan, "gfortran") != NULL) ++ { ++ opt_array = XRESIZEVEC (struct cl_decoded_option, opt_array, argc + 1); ++ const char* lto_flag = "-flto=8"; ++ decode_cmdline_option (<o_flag, lang_mask, ++ &opt_arraynum_decoded_options); ++ ret++; ++ } ++ if (compiler) ++ free (compiler); ++ return ret; ++} ++ ++static int ++handle_machine_option (unsigned int lang_mask, ++ unsigned int num_decoded_options, ++ unsigned int argc, ++ const char **argv, ++ struct cl_decoded_option *&opt_array) ++{ ++ int ret = 0; ++ bool flag_Om = false; ++ bool flag_hip09 = false; ++ for (unsigned i = 1; i < argc; i ++) ++ { ++ if (strcmp (argvi, "-Om") == 0) ++ flag_Om = true; ++ if (strstr (argvi, "mcpu=hip09") != NULL) ++ flag_hip09 = true; ++ } ++ if (!flag_hip09 || !flag_Om) ++ { ++ return ret; ++ } ++ ++ const char *ai_infer_level = getenv ("AI_INFER_LEVEL"); ++ if (ai_infer_level) ++ { ++ return ret; ++ } ++ int argc_hw = 6; ++ int64_t argv_hwargc_hw = { ++ global_options.x_param_simultaneous_prefetches, ++ global_options.x_param_l1_cache_size, ++ global_options.x_param_l1_cache_line_size, ++ global_options.x_param_l2_cache_size, ++ global_options.x_param_prefetch_latency, ++ global_options.x_param_ipa_prefetch_distance_factor}; ++ int64_t output_pred = ai_infer_optimization ( ++ argc, argv, "hip09", argc_hw, argv_hw); ++ if (output_pred != 1) ++ { ++ return ret; ++ } ++ ++ return handle_lto_option (lang_mask, num_decoded_options, ++ argc, argv, opt_array); ++} ++ + /* Decode command-line options (ARGC and ARGV being the arguments of + main) into an array, setting *DECODED_OPTIONS to a pointer to that + array and *DECODED_OPTIONS_COUNT to the number of entries in the +@@ -1090,6 +1242,9 @@ decode_cmdline_options_to_array (unsigned int argc, const char **argv, + num_decoded_options++; + } + ++ num_decoded_options += handle_machine_option (lang_mask, num_decoded_options, ++ argc, argv, opt_array); ++ + *decoded_options = opt_array; + *decoded_options_count = num_decoded_options; + prune_options (decoded_options, decoded_options_count, lang_mask); +diff --git a/gcc/opts.cc b/gcc/opts.cc +index e34e5ee8e..d97f6079f 100644 +--- a/gcc/opts.cc ++++ b/gcc/opts.cc +@@ -780,6 +780,14 @@ default_options_optimization (struct gcc_options *opts, + opts->x_optimize_debug = 1; + break; + ++ case OPT_Om: ++ /* -Om adds flags to -O3. */ ++ opts->x_optimize_size = 0; ++ opts->x_optimize = 3; ++ opts->x_optimize_maximum = true; ++ opts->x_optimize_debug = 0; ++ break; ++ + case OPT_fopenacc: + if (opt->value) + openacc_mode = true; +@@ -2733,6 +2741,8 @@ common_handle_option (struct gcc_options *opts, + &= ~(SANITIZE_UNDEFINED | SANITIZE_UNDEFINED_NONDEFAULT); + break; + ++ case OPT_Om: ++ break; + case OPT_O: + case OPT_Os: + case OPT_Ofast: +diff --git a/gcc/passes.def b/gcc/passes.def +index 8797f166f..690d344c0 100644 +--- a/gcc/passes.def ++++ b/gcc/passes.def +@@ -179,6 +179,7 @@ along with GCC; see the file COPYING3. If not see + passes are executed after partitioning and thus see just parts of the + compiled unit. */ + INSERT_PASSES_AFTER (all_late_ipa_passes) ++ NEXT_PASS (pass_ipa_hardware_detection); + NEXT_PASS (pass_ipa_pta); + /* FIXME: this should be a normal IP pass. */ + NEXT_PASS (pass_ipa_struct_reorg); +diff --git a/gcc/timevar.def b/gcc/timevar.def +index 8e7510eb3..bd8c9a4f7 100644 +--- a/gcc/timevar.def ++++ b/gcc/timevar.def +@@ -81,6 +81,7 @@ DEFTIMEVAR (TV_IPA_CONSTANT_PROP , "ipa cp") + DEFTIMEVAR (TV_IPA_INLINING , "ipa inlining heuristics") + DEFTIMEVAR (TV_IPA_FNSPLIT , "ipa function splitting") + DEFTIMEVAR (TV_IPA_COMDATS , "ipa comdats") ++DEFTIMEVAR (TV_IPA_HARDWARE_DETECTION, "ipa detection") + DEFTIMEVAR (TV_IPA_PREFETCH , "ipa prefetch") + DEFTIMEVAR (TV_IPA_STRUCT_REORG , "ipa struct reorg optimization") + DEFTIMEVAR (TV_IPA_OPT , "ipa various optimizations") +diff --git a/gcc/tree-pass.h b/gcc/tree-pass.h +index 1c983ef71..ee873f0b2 100644 +--- a/gcc/tree-pass.h ++++ b/gcc/tree-pass.h +@@ -528,6 +528,8 @@ extern ipa_opt_pass_d *make_pass_ipa_icp (gcc::context *ctxt); + extern ipa_opt_pass_d *make_pass_ipa_odr (gcc::context *ctxt); + extern ipa_opt_pass_d *make_pass_ipa_reference (gcc::context *ctxt); + extern ipa_opt_pass_d *make_pass_ipa_pure_const (gcc::context *ctxt); ++extern simple_ipa_opt_pass *make_pass_ipa_hardware_detection (gcc::context * ++ ctxt); + extern simple_ipa_opt_pass *make_pass_ipa_prefetch (gcc::context *ctxt); + extern simple_ipa_opt_pass *make_pass_ipa_struct_reorg (gcc::context *ctxt); + extern simple_ipa_opt_pass *make_pass_ipa_pta (gcc::context *ctxt);
View file
_service:tar_scm:0287-Add-dynamic-memory-access-checks.patch
Added
@@ -0,0 +1,774 @@ +From 08fb60d0a0707af4004b20358f4a921e4ae6cca6 Mon Sep 17 00:00:00 2001 +From: Diachkov Ilia <diachkov.ilia1@huawei-partners.com> +Date: Thu, 22 Aug 2024 15:23:36 +0800 +Subject: PATCH 156/157 Add dynamic memory access checks + +Signed-off-by: Diachkov Ilia <diachkov.ilia1@huawei-partners.com> +--- + gcc/ipa-prefetch.cc | 622 +++++++++++++++++++++++++++++++++++++------- + gcc/params.opt | 4 + + 2 files changed, 525 insertions(+), 101 deletions(-) + +diff --git a/gcc/ipa-prefetch.cc b/gcc/ipa-prefetch.cc +index 94290ea9c..b000d4d75 100644 +--- a/gcc/ipa-prefetch.cc ++++ b/gcc/ipa-prefetch.cc +@@ -368,6 +368,7 @@ typedef std::map<memref_t *, tree> memref_tree_map; + typedef std::set<gimple *> stmt_set; + typedef std::set<tree> tree_set; + typedef std::map<tree, tree> tree_map; ++typedef std::map<tree, poly_offset_int> tree_poly_offset_map; + + tree_memref_map *tm_map; + funct_mrs_map *fmrs_map; +@@ -710,6 +711,20 @@ get_mem_ref_address_ssa_name (tree mem, tree base) + return NULL_TREE; + } + ++static void ++dump_base_addr (tree base_addr) ++{ ++ if (base_addr) ++ { ++ fprintf (dump_file, "Base addr (%s): ", ++ get_tree_code_name (TREE_CODE (base_addr))); ++ print_generic_expr (dump_file, base_addr); ++ } ++ else ++ fprintf (dump_file, "Base addr (%s): ", "null"); ++ fprintf (dump_file, "\n"); ++} ++ + static void + analyse_mem_ref (gimple *stmt, tree mem, memref_t* mr) + { +@@ -736,14 +751,7 @@ analyse_mem_ref (gimple *stmt, tree mem, memref_t* mr) + { + tree base_addr = get_mem_ref_address_ssa_name (mem, base); + if (dump_file) +- { +- fprintf (dump_file, "Base addr (%s): ", +- base_addr ? get_tree_code_name (TREE_CODE (base_addr)) +- : "null"); +- if (base_addr) +- print_generic_expr (dump_file, base_addr); +- fprintf (dump_file, "\n"); +- } ++ dump_base_addr (base_addr); + if (base_addr) + { + mr->base = analyse_addr_eval (base_addr, mr); +@@ -1187,7 +1195,7 @@ reduce_memref_set (memref_set *set, vec<memref_t *> &vec) + } + + static void +-find_nearest_common_dominator (memref_t *mr, basic_block &dom) ++find_nearest_common_post_dominator (memref_t *mr, basic_block &dom) + { + for (unsigned int i = 0; i < mr->stmts.length (); i++) + { +@@ -1196,7 +1204,7 @@ find_nearest_common_dominator (memref_t *mr, basic_block &dom) + if (dom == bb) + continue; + if (dom) +- dom = nearest_common_dominator (CDI_DOMINATORS, dom, bb); ++ dom = nearest_common_dominator (CDI_POST_DOMINATORS, dom, bb); + else + dom = bb; + } +@@ -1495,10 +1503,13 @@ gimple_copy_and_remap (gimple *stmt) + + static gimple * + gimple_copy_and_remap_memref_stmts (memref_t *mr, gimple_seq &stmts, +- int last_idx, stmt_set &processed) ++ int first_idx, int last_idx, ++ stmt_set &processed) + { + gimple *last_stmt = NULL; +- for (int i = mr->stmts.length () - 1; i >= last_idx ; i--) ++ if (first_idx == 0) ++ first_idx = mr->stmts.length () - 1; ++ for (int i = first_idx; i >= last_idx; i--) + { + if (processed.count (mr->stmtsi)) + continue; +@@ -1515,6 +1526,436 @@ gimple_copy_and_remap_memref_stmts (memref_t *mr, gimple_seq &stmts, + return last_stmt; + } + ++/* Check if prefetch insertion may be always unsafe in this case. For now ++ reject cases with access to arrays with no domain or with no elements. */ ++ ++static bool ++check_prefetch_safety (vec<memref_t *> &mrs, memref_t *cmr) ++{ ++ for (unsigned int i = 0; i < mrs.length (); i++) ++ { ++ memref_t *mr = mrsi; ++ if (mr == cmr || mr->used_mrs.empty ()) ++ continue; ++ bool is_store; ++ tree *mem = simple_mem_ref_in_stmt (mr->stmts0, &is_store); ++ if (mem == NULL || TREE_CODE (*mem) != ARRAY_REF) ++ continue; ++ tree array = TREE_OPERAND (*mem, 0); ++ tree atype = TREE_TYPE (array); ++ gcc_assert (atype); ++ tree domain = TYPE_DOMAIN (atype); ++ if (!domain || !tree_fits_uhwi_p (TYPE_MIN_VALUE (domain)) ++ || !tree_fits_uhwi_p (TYPE_MAX_VALUE (domain))) ++ { ++ if (dump_file) ++ { ++ fprintf (dump_file, "Unsupported array type: "); ++ print_generic_expr (dump_file, atype); ++ fprintf (dump_file, "\n"); ++ } ++ return false; ++ } ++ unsigned HOST_WIDE_INT min_val = tree_to_uhwi (TYPE_MIN_VALUE (domain)); ++ unsigned HOST_WIDE_INT max_val = tree_to_uhwi (TYPE_MAX_VALUE (domain)); ++ if (min_val == 0 && max_val == 0) ++ { ++ if (dump_file) ++ { ++ fprintf (dump_file, "Unsupported array type's bounds: "); ++ print_generic_expr (dump_file, atype); ++ fprintf (dump_file, "\n"); ++ } ++ return false; ++ } ++ } ++ return true; ++} ++ ++/* Collect base addresses which we need to check. */ ++ ++static void ++collect_base_addresses (vec<memref_t *> &used_mr_vec, HOST_WIDE_INT dist_val, ++ memref_t *comp_mr, tree_poly_offset_map &offset_map) ++{ ++ if (dump_file) ++ fprintf (dump_file, "Collect base addresses which we need to check.\n"); ++ for (unsigned int i = 0; i < used_mr_vec.length (); i++) ++ { ++ memref_t *mr = used_mr_veci; ++ if (mr == comp_mr || mr->used_mrs.empty ()) ++ continue; ++ bool is_store; ++ tree *mem = simple_mem_ref_in_stmt (mr->stmts0, &is_store); ++ if (mem == NULL || TREE_CODE (*mem) != MEM_REF) ++ continue; ++ tree base = get_base_address (*mem); ++ tree base_addr = get_mem_ref_address_ssa_name (*mem, base); ++ if (!base_addr) ++ continue; ++ if (dump_file) ++ { ++ dump_base_addr (base_addr); ++ if (base) ++ { ++ fprintf (dump_file, "Base:"); ++ print_generic_expr (dump_file, base); ++ fprintf (dump_file, "\n"); ++ } ++ } ++ if (!TREE_OPERAND (base, 1)) ++ continue; ++ poly_offset_int curr_offset = mem_ref_offset (base); ++ poly_offset_int saved_offset = 0; ++ if (offset_map.count (base_addr)) ++ { ++ saved_offset = offset_mapbase_addr; ++ if ((dist_val > 0 && known_gt (curr_offset, saved_offset)) ++ || (dist_val < 0 && known_lt (curr_offset, saved_offset))) ++ offset_mapbase_addr = curr_offset; ++ else if (dump_file) ++ fprintf (dump_file, "Off: step=%ld gt=%d lt=%d\n", dist_val, ++ known_gt (curr_offset, saved_offset), ++ known_lt (curr_offset, saved_offset)); ++ } ++ else ++ offset_mapbase_addr = curr_offset; ++ } ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ { ++ fprintf (dump_file, "Final list of base addresses:\n"); ++ for (tree_poly_offset_map::iterator it1 = offset_map.begin (); ++ it1 != offset_map.end (); ++it1) ++ { ++ tree base_addr = it1->first; ++ poly_offset_int off = it1->second; ++ fprintf (dump_file, "Base:"); ++ print_generic_expr (dump_file, base_addr); ++ HOST_WIDE_INT val = estimated_poly_value (off.force_shwi (), ++ POLY_VALUE_LIKELY); ++ fprintf (dump_file, "\nOff: %ld\n", val); ++ } ++ fprintf (dump_file, "Finish collecting base addresses.\n"); ++ } ++} ++ ++/* Return true if we need page check to access memory at this address. */ ++ ++static bool ++need_page_check (tree base_addr, tree_set &checked_base_addrs) ++{ ++ if (dump_file) ++ dump_base_addr (base_addr); ++ if (base_addr == NULL) ++ { ++ if (dump_file) ++ fprintf (dump_file, "Base address not found\n"); ++ return false; ++ } ++ if (checked_base_addrs.count (base_addr)) ++ { ++ if (dump_file) ++ fprintf (dump_file, "Base address is already checked\n"); ++ return false; ++ } ++ return true; ++} ++ ++/* Insert instructions to check the original address and newly evaluated ++ adress for prefetch correspond the same page. */ ++ ++static gimple * ++insert_page_check (tree addr, tree_poly_offset_map &offset_map, ++ gimple_seq &stmts) ++{ ++ poly_offset_int offset = 0; ++ if (offset_map.count (addr)) ++ offset = offset_mapaddr; ++ tree addr_type = TREE_TYPE (addr); ++ tree utype = unsigned_type_for (addr_type); ++ tree new_addr = build_int_cst (addr_type, 0); ++ if (decl_map->count (addr)) ++ new_addr = (*decl_map)addr; ++ tree t1 = make_ssa_name (utype); ++ tree t2 = make_ssa_name (utype); ++ unsigned long long pmask = ~(param_ipa_prefetch_pagesize - 1); ++ tree pmask_cst = build_int_cst (utype, pmask); ++ tree off_tree = wide_int_to_tree (sizetype, offset); ++ gcc_assert (TREE_CODE (addr_type) == POINTER_TYPE); ++ tree addr_with_offset = gimple_build (&stmts, POINTER_PLUS_EXPR, ++ addr_type, addr, off_tree); ++ tree conv_addr = make_ssa_name (utype); ++ tree conv_new_addr = make_ssa_name (utype); ++ gimple *conv1 = gimple_build_assign (conv_addr, ++ fold_convert (utype, addr_with_offset)); ++ gimple *conv2 = gimple_build_assign (conv_new_addr, ++ fold_convert (utype, new_addr)); ++ gimple *paddr = gimple_build_assign (t1, BIT_AND_EXPR, ++ conv_addr, pmask_cst); ++ gimple *new_paddr = gimple_build_assign (t2, BIT_AND_EXPR, ++ conv_new_addr, pmask_cst); ++ gcond *cond = gimple_build_cond (EQ_EXPR, t1, t2, NULL, NULL); ++ gimple_seq_add_stmt (&stmts, conv1); ++ gimple_seq_add_stmt (&stmts, paddr); ++ gimple_seq_add_stmt (&stmts, conv2); ++ gimple_seq_add_stmt (&stmts, new_paddr); ++ gimple_seq_add_stmt (&stmts, cond); ++ return cond; ++} ++ ++/* Check if this array access needs dynamic address verification. Support only ++ arrays with 1-d indexing. */ ++ ++static bool ++need_array_index_check (tree mem) ++{ ++ /* Check pattern: t1 = (type) t0; ld/st arrayt1. If any index of type (t0) ++ does not go beyond the bounds of the array, we don't need the check. */ ++ tree array = TREE_OPERAND (mem, 0); ++ tree atype = TREE_TYPE (array); ++ tree index = TREE_OPERAND (mem, 1); ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ { ++ fprintf (dump_file, "Array ind: "); ++ print_generic_expr (dump_file, index); ++ fprintf (dump_file, "\nMem: "); ++ print_generic_expr (dump_file, array); ++ fprintf (dump_file, "\nInd type: "); ++ print_generic_expr (dump_file, TREE_TYPE (index)); ++ fprintf (dump_file, "\nMem type: "); ++ print_generic_expr (dump_file, atype); ++ fprintf (dump_file, "\n"); ++ } ++ tree domain = TYPE_DOMAIN (atype); ++ if (!domain || !tree_fits_uhwi_p (TYPE_MIN_VALUE (domain)) ++ || !tree_fits_uhwi_p (TYPE_MAX_VALUE (domain))) ++ { ++ if (dump_file) ++ fprintf (dump_file, "Unsupported array type domain.\n"); ++ return true; ++ } ++ unsigned HOST_WIDE_INT min_val = tree_to_uhwi (TYPE_MIN_VALUE (domain)); ++ unsigned HOST_WIDE_INT max_val = tree_to_uhwi (TYPE_MAX_VALUE (domain)); ++ if (dump_file) ++ fprintf (dump_file, "Array bounds (%ld, %ld)\n", min_val, max_val); ++ if (TREE_CODE (index) != SSA_NAME) ++ return true; ++ ++ gimple *stmt = SSA_NAME_DEF_STMT (index); ++ if (!is_gimple_assign (stmt)) ++ { ++ if (dump_file) ++ { ++ fprintf (dump_file, "Is not assign, stop analysis: "); ++ print_gimple_stmt (dump_file, stmt, 3, TDF_DETAILS); ++ } ++ return true; ++ } ++ tree *lhs = gimple_assign_lhs_ptr (stmt); ++ tree *rhs = gimple_assign_rhs1_ptr (stmt); ++ tree lhs_type = TREE_TYPE (*lhs); ++ tree rhs_type = TREE_TYPE (*rhs); ++ tree ind_type = (TYPE_PRECISION (lhs_type) < TYPE_PRECISION (rhs_type)) ++ ? lhs_type : rhs_type; ++ if (!ind_type || !tree_fits_uhwi_p (TYPE_MIN_VALUE (ind_type)) ++ || !tree_fits_uhwi_p (TYPE_MAX_VALUE (ind_type))) ++ { ++ if (dump_file) ++ fprintf (dump_file, "Unsupported index type.\n"); ++ return true; ++ } ++ int prec = tree_to_uhwi (TYPE_SIZE (ind_type)); ++ unsigned HOST_WIDE_INT t_max_val = tree_to_uhwi (TYPE_MAX_VALUE (ind_type)); ++ unsigned HOST_WIDE_INT t_min_val = tree_to_uhwi (TYPE_MIN_VALUE (ind_type)); ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ { ++ fprintf (dump_file, "Index type (%d, %ld, %ld): ", prec, ++ t_min_val, t_max_val); ++ print_generic_expr (dump_file, ind_type); ++ fprintf (dump_file, "\n"); ++ } ++ return !((t_max_val <= max_val) && (t_min_val >= min_val)); ++} ++ ++/* Insert instructions to check the new index is within the array bounds. */ ++ ++static gimple * ++insert_index_check (tree mem, gimple_seq &stmts) ++{ ++ if (dump_file) ++ fprintf (dump_file, "Insert array index check\n"); ++ tree atype = TREE_TYPE (TREE_OPERAND (mem, 0)); ++ tree ind = TREE_OPERAND (mem, 1); ++ if (decl_map->count (ind)) ++ ind = (*decl_map)ind; ++ tree domain = TYPE_DOMAIN (atype); ++ gcc_assert (domain && tree_fits_uhwi_p (TYPE_MIN_VALUE (domain)) ++ && tree_fits_uhwi_p (TYPE_MAX_VALUE (domain))); ++ ++ tree ind_min_val = TYPE_MIN_VALUE (domain); ++ tree ind_max_val = TYPE_MAX_VALUE (domain); ++ tree t1 = make_ssa_name (boolean_type_node); ++ tree t2 = make_ssa_name (boolean_type_node); ++ tree t3 = make_ssa_name (boolean_type_node); ++ t1 = fold_build2 (LE_EXPR, boolean_type_node, ind, ind_max_val); ++ t2 = fold_build2 (GE_EXPR, boolean_type_node, ind, ind_min_val); ++ t3 = fold_build2 (TRUTH_ANDIF_EXPR, boolean_type_node, t1, t2); ++ gcond *cond = gimple_build_cond (EQ_EXPR, t3, boolean_true_node, NULL, NULL); ++ gimple_seq_add_stmt (&stmts, cond); ++ return cond; ++} ++ ++/* Insert safety checks for memory access stmts newly created to evaluate ++ prefetch addresses. */ ++ ++static void ++process_used_mr (memref_t *mr, tree_poly_offset_map &offset_map, ++ tree_set &checked_base_addrs, gimple_seq &stmts, ++ vec<gimple *> &bbends) ++{ ++ bool is_store; ++ tree *mem = simple_mem_ref_in_stmt (mr->stmts0, &is_store); ++ if (mem == NULL) ++ return; ++ if (dump_file) ++ { ++ fprintf (dump_file, "MR (%d) maybe need to insert address check: ", ++ mr->mr_id); ++ print_generic_expr (dump_file, *mem); ++ fprintf (dump_file, "\n"); ++ } ++ gimple *bbend = NULL; ++ if (TREE_CODE (*mem) == MEM_REF) ++ { ++ tree base = get_base_address (*mem); ++ tree base_addr = get_mem_ref_address_ssa_name (*mem, base); ++ if (!need_page_check (base_addr, checked_base_addrs)) ++ return; ++ bbend = insert_page_check (base_addr, offset_map, stmts); ++ checked_base_addrs.insert (base_addr); ++ } ++ else if (TREE_CODE (*mem) == ARRAY_REF && need_array_index_check (*mem)) ++ bbend = insert_index_check (*mem, stmts); ++ if (bbend) ++ bbends.safe_push (bbend); ++} ++ ++/* Create new variables and insert new stmts to evaluate prefetch addresses. */ ++ ++static void ++create_stmts_for_used_mrs (vec<memref_t *> &used_mr_vec, vec<gimple *> &bbends, ++ gimple_seq &stmts, stmt_set &processed_stmts, ++ HOST_WIDE_INT dist_val, memref_t *comp_mr) ++{ ++ tree_poly_offset_map offset_map; ++ collect_base_addresses (used_mr_vec, dist_val, comp_mr, offset_map); ++ ++ /* Insert stmts to evaluate prefetch addresses. */ ++ tree_set checked_base_addrs; ++ for (unsigned int i = 0; i < used_mr_vec.length (); i++) ++ { ++ memref_t *mr = used_mr_veci; ++ if (mr == comp_mr) ++ continue; ++ gimple *last_stmt = gimple_copy_and_remap_memref_stmts (mr, stmts, 0, 1, ++ processed_stmts); ++ if (last_stmt && dump_file) ++ { ++ fprintf (dump_file, "MR (%d) new mem: ", mr->mr_id); ++ print_generic_expr (dump_file, gimple_assign_lhs (last_stmt)); ++ fprintf (dump_file, "\n"); ++ } ++ if (!mr->used_mrs.empty ()) ++ process_used_mr (mr, offset_map, checked_base_addrs, stmts, bbends); ++ last_stmt = gimple_copy_and_remap_memref_stmts (mr, stmts, 0, 0, ++ processed_stmts); ++ } ++} ++ ++/* Insert prefetch instructions. */ ++ ++static void ++insert_prefetch_stmts (vec<gimple *> &pcalls, gimple_seq &stmts, ++ gimple *&last_pref, vec<memref_t *> &vmrs, ++ stmt_set &processed_stmts) ++{ ++ if (dump_file) ++ fprintf (dump_file, "Evaluate addresses and insert prefetch insns.\n"); ++ ++ tree local; ++ switch (param_ipa_prefetch_locality) ++ { ++ case 0: ++ local = integer_zero_node; ++ break; ++ case 1: ++ local = integer_one_node; ++ break; ++ case 2: ++ local = build_int_cst (integer_type_node, 2); ++ break; ++ default: ++ case 3: ++ local = integer_three_node; ++ break; ++ } ++ tree_set prefetched_addrs; ++ for (unsigned int i = 0; i < vmrs.length (); i++) ++ { ++ memref_t *mr = vmrsi; ++ /* Don't need to copy the last stmt, since we insert prefetch insn ++ instead of it. */ ++ gimple_copy_and_remap_memref_stmts (mr, stmts, 0, 1, processed_stmts); ++ gimple *last_stmt = mr->stmts0; ++ gcc_assert (last_stmt); ++ ++ tree old_addr = get_mem_ref_address_ssa_name (mr->mem, NULL_TREE); ++ tree new_addr = old_addr; ++ if (decl_map->count (old_addr)) ++ new_addr = (*decl_map)old_addr; ++ if (prefetched_addrs.count (new_addr)) ++ continue; ++ /* Insert prefetch intrinsic call. */ ++ tree write_p = mr->is_store ? integer_one_node : integer_zero_node; ++ last_pref = gimple_build_call (builtin_decl_explicit (BUILT_IN_PREFETCH), ++ 3, new_addr, write_p, local); ++ pcalls.safe_push (last_pref); ++ gimple_seq_add_stmt (&stmts, last_pref); ++ prefetched_addrs.insert (new_addr); ++ ++ if (dump_file) ++ { ++ fprintf (dump_file, "Insert %d prefetch stmt:\n", i); ++ print_gimple_stmt (dump_file, last_pref, 0); ++ } ++ } ++} ++ ++/* Split bbs after condition stmts and fix control flow graph. */ ++ ++static void ++correct_cfg (vec<gimple *> &bbends, gimple *last_pref, basic_block &dom_bb) ++{ ++ edge e_last = split_block (dom_bb, last_pref); ++ if (!bbends.length () || last_pref == NULL) ++ return; ++ for (int i = bbends.length () - 1; i >= 0; i--) ++ { ++ gimple *bbend = bbendsi; ++ if (dump_file) ++ { ++ fprintf (dump_file, "Split dom_bb after condition stmts:\n"); ++ print_gimple_stmt (dump_file, bbend, 0); ++ } ++ basic_block last_bb = e_last->dest; ++ edge e = split_block (dom_bb, bbend); ++ e->flags &= ~EDGE_FALLTHRU; ++ e->flags |= EDGE_TRUE_VALUE; ++ edge e_false = make_edge (dom_bb, last_bb, EDGE_FALSE_VALUE); ++ e_false->probability = profile_probability::never (); ++ } ++} ++ + static void + create_cgraph_edge (cgraph_node *n, gimple *stmt) + { +@@ -1529,6 +1970,17 @@ create_cgraph_edge (cgraph_node *n, gimple *stmt) + ipa_call_summaries->get_create (e); + } + ++/* Modify cgraph inserting calls to prefetch intrinsics. */ ++ ++static void ++modify_ipa_info (cgraph_node *n, vec<gimple *> &pcalls) ++{ ++ for (unsigned i = 0; i < pcalls.length (); i++) ++ create_cgraph_edge (n, pcallsi); ++ ipa_update_overall_fn_summary (n); ++ renumber_gimple_stmt_uids (DECL_STRUCT_FUNCTION (n->decl)); ++} ++ + /* Insert prefetch intrinsics in this function, return nonzero on success. */ + + static int +@@ -1607,6 +2059,18 @@ optimize_function (cgraph_node *n, function *fn) + return 0; + } + ++ vec<memref_t *> used_mr_vec = vNULL; ++ for (memref_set::const_iterator it = used_mrs.begin (); ++ it != used_mrs.end (); it++) ++ used_mr_vec.safe_push (*it); ++ used_mr_vec.qsort (memref_id_cmp); ++ if (!check_prefetch_safety (used_mr_vec, comp_mr)) ++ { ++ if (dump_file) ++ fprintf (dump_file, "Prefetching may be unsafe. Skip the case.\n"); ++ return 0; ++ } ++ + /* Filter out memrefs with the same memory references. + TODO: maybe do the same with used mrs. */ + vec<memref_t *> vmrs = vNULL; +@@ -1616,18 +2080,18 @@ optimize_function (cgraph_node *n, function *fn) + /* TODO: maybe it is useful to process also used_mrs. */ + basic_block dom_bb = NULL; + for (unsigned int i = 0; i < vmrs.length (); i++) +- find_nearest_common_dominator (vmrsi, dom_bb); ++ find_nearest_common_post_dominator (vmrsi, dom_bb); + + if (!dom_bb) + { + if (dump_file) +- fprintf (dump_file, "Dominator bb for MRs is not found. " ++ fprintf (dump_file, "Post dominator bb for MRs is not found. " + "Skip the case.\n"); + return 0; + } + else if (dump_file) + { +- fprintf (dump_file, "Dominator bb %d for MRs:\n", dom_bb->index); ++ fprintf (dump_file, "Post dominator bb %d for MRs:\n", dom_bb->index); + gimple_dump_bb (dump_file, dom_bb, 0, dump_flags); + fprintf (dump_file, "\n"); + } +@@ -1636,19 +2100,33 @@ optimize_function (cgraph_node *n, function *fn) + gimple *last_used = NULL; + for (gimple_stmt_iterator si = gsi_last_bb (dom_bb); !gsi_end_p (si); + gsi_prev (&si)) +- if (comp_mr->stmts0 == gsi_stmt (si)) +- { +- last_used = gsi_stmt (si); +- if (dump_file) ++ { ++ bool found = false; ++ for (unsigned int i = 0; i < vmrs.length (); i++) ++ /* TODO: take into account only those MRs that should be ++ checked memory. */ ++ if (vmrsi->stmts0 == gsi_stmt (si)) + { +- fprintf (dump_file, "Last used stmt in dominator bb:\n"); +- print_gimple_stmt (dump_file, last_used, 0); ++ found = true; ++ break; + } +- break; +- } ++ if (found || comp_mr->stmts0 == gsi_stmt (si)) ++ { ++ last_used = gsi_stmt (si); ++ if (dump_file) ++ { ++ fprintf (dump_file, "Last used stmt in post dominator bb:\n"); ++ print_gimple_stmt (dump_file, last_used, 0); ++ } ++ break; ++ } ++ } + +- split_block (dom_bb, last_used); +- gimple_stmt_iterator gsi = gsi_last_bb (dom_bb); ++ gimple_stmt_iterator gsi; ++ if (last_used) ++ gsi = gsi_for_stmt (last_used); ++ else ++ gsi = gsi_last_bb (dom_bb); + + /* Create new inc var. Insert new_var = old_var + step * factor. */ + decl_map = new tree_map; +@@ -1660,7 +2138,7 @@ optimize_function (cgraph_node *n, function *fn) + stmt_set processed_stmts; + if (!dominated_by_p (CDI_DOMINATORS, dom_bb, gimple_bb (comp_mr->stmts0))) + { +- gimple *tmp = gimple_copy_and_remap_memref_stmts (comp_mr, stmts, 0, ++ gimple *tmp = gimple_copy_and_remap_memref_stmts (comp_mr, stmts, 0, 0, + processed_stmts); + inc_var = gimple_assign_lhs (tmp); + } +@@ -1683,86 +2161,26 @@ optimize_function (cgraph_node *n, function *fn) + fprintf (dump_file, "\n"); + } + +- /* Create other new vars. Insert new stmts. */ +- vec<memref_t *> used_mr_vec = vNULL; +- for (memref_set::const_iterator it = used_mrs.begin (); +- it != used_mrs.end (); it++) +- used_mr_vec.safe_push (*it); +- used_mr_vec.qsort (memref_id_cmp); +- +- for (unsigned int j = 0; j < used_mr_vec.length (); j++) +- { +- memref_t *mr = used_mr_vecj; +- if (mr == comp_mr) +- continue; +- gimple *last_stmt = gimple_copy_and_remap_memref_stmts (mr, stmts, 0, +- processed_stmts); +- gcc_assert (last_stmt); +- if (dump_file) +- { +- fprintf (dump_file, "MR (%d) new mem: ", mr->mr_id); +- print_generic_expr (dump_file, gimple_assign_lhs (last_stmt)); +- fprintf (dump_file, "\n"); +- } +- } +- /* On new load check page fault. */ +- /* Insert prefetch instructions. */ +- if (dump_file) +- fprintf (dump_file, "Evaluate addresses and insert prefetch insn.\n"); ++ vec<gimple *> bbends = vNULL; ++ create_stmts_for_used_mrs (used_mr_vec, bbends, stmts, processed_stmts, ++ dist_val, comp_mr); + + vec<gimple *> pcalls = vNULL; +- tree local; +- switch (param_ipa_prefetch_locality) +- { +- case 0: +- local = integer_zero_node; +- break; +- case 1: +- local = integer_one_node; +- break; +- case 2: +- local = build_int_cst (integer_type_node, 2); +- break; +- default: +- case 3: +- local = integer_three_node; +- break; +- } +- tree_set prefetched_addrs; +- for (unsigned int j = 0; j < vmrs.length (); j++) +- { +- memref_t *mr = vmrsj; +- /* Don't need to copy the last stmt, since we insert prefetch insn +- instead of it. */ +- gimple_copy_and_remap_memref_stmts (mr, stmts, 1, processed_stmts); +- gimple *last_stmt = mr->stmts0; +- gcc_assert (last_stmt); +- tree write_p = mr->is_store ? integer_one_node : integer_zero_node; +- tree addr = get_mem_ref_address_ssa_name (mr->mem, NULL_TREE); +- if (decl_map->count (addr)) +- addr = (*decl_map)addr; +- if (prefetched_addrs.count (addr)) +- continue; +- last_stmt = gimple_build_call (builtin_decl_explicit (BUILT_IN_PREFETCH), +- 3, addr, write_p, local); +- pcalls.safe_push (last_stmt); +- gimple_seq_add_stmt (&stmts, last_stmt); +- prefetched_addrs.insert (addr); +- if (dump_file) +- { +- fprintf (dump_file, "Insert %d prefetch stmt:\n", j); +- print_gimple_stmt (dump_file, last_stmt, 0); +- } +- } +- ++ gimple *last_pref = NULL; ++ insert_prefetch_stmts (pcalls, stmts, last_pref, vmrs, processed_stmts); + gsi_insert_seq_after (&gsi, stmts, GSI_NEW_STMT); ++ ++ correct_cfg (bbends, last_pref, dom_bb); ++ + delete decl_map; + +- /* Modify cgraph inserting calls to prefetch intrinsics. */ +- for (unsigned i = 0; i < pcalls.length (); i++) +- create_cgraph_edge (n, pcallsi); +- ipa_update_overall_fn_summary (n); +- renumber_gimple_stmt_uids (DECL_STRUCT_FUNCTION (n->decl)); ++ modify_ipa_info (n, pcalls); ++ ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ { ++ fprintf (dump_file, "After optimization:\n"); ++ dump_function_to_file (cfun->decl, dump_file, (dump_flags_t)0); ++ } + + return 1; + } +@@ -1781,8 +2199,10 @@ insert_prefetch () + fprintf (dump_file, "Optimize function %s\n", n->dump_name ()); + push_cfun (DECL_STRUCT_FUNCTION (n->decl)); + calculate_dominance_info (CDI_DOMINATORS); ++ calculate_dominance_info (CDI_POST_DOMINATORS); + res |= optimize_function (n, fn); + free_dominance_info (CDI_DOMINATORS); ++ free_dominance_info (CDI_POST_DOMINATORS); + pop_cfun (); + } + return res; +diff --git a/gcc/params.opt b/gcc/params.opt +index 747d0f829..fc700ab79 100644 +--- a/gcc/params.opt ++++ b/gcc/params.opt +@@ -317,6 +317,10 @@ The factor represents the number of inductive variable incrementations to evalua + Common Joined UInteger Var(param_ipa_prefetch_locality) Init(3) IntegerRange(0, 3) Param Optimization + The flag represents temporal locality value between 0 and 3, the higher value means the higher temporal locality in the data. + ++-param=ipa-prefetch-pagesize= ++Common Joined UInteger Var(param_ipa_prefetch_pagesize) Init(4096) Param Optimization ++The flag represents current pagesize for runtime checks of memory access addresses. ++ + -param=ira-loop-reserved-regs= + Common Joined UInteger Var(param_ira_loop_reserved_regs) Init(2) Param Optimization + The number of registers in each class kept unused by loop invariant motion. +-- +2.33.0 +
View file
_service:tar_scm:0288-Enable-macro-use-commandline.patch
Added
@@ -0,0 +1,207 @@ +From 7a578a8725f8fd7d92fcbbac14841ea7e8d0870f Mon Sep 17 00:00:00 2001 +From: zhangxiaohua <xiaohua20100827@163.com> +Date: Sun, 25 Aug 2024 23:08:53 +0800 +Subject: PATCH 157/157 Enable macro-use-commandline + +Signed-off-by: zhangxiaohua <xiaohua20100827@163.com> +--- + gcc/c-family/c-opts.cc | 4 +++ + gcc/c-family/c.opt | 4 +++ + gcc/doc/cppopts.texi | 4 +++ + gcc/doc/invoke.texi | 1 + + .../gcc.dg/cpp/macro-use-cmdline-1.c | 26 ++++++++++++++ + .../gcc.dg/cpp/macro-use-cmdline-2.c | 34 +++++++++++++++++++ + libcpp/include/cpplib.h | 3 ++ + libcpp/init.cc | 1 + + libcpp/macro.cc | 16 ++++++++- + 9 files changed, 92 insertions(+), 1 deletion(-) + create mode 100644 gcc/testsuite/gcc.dg/cpp/macro-use-cmdline-1.c + create mode 100644 gcc/testsuite/gcc.dg/cpp/macro-use-cmdline-2.c + +diff --git a/gcc/c-family/c-opts.cc b/gcc/c-family/c-opts.cc +index 5134f6128..744b54dc3 100644 +--- a/gcc/c-family/c-opts.cc ++++ b/gcc/c-family/c-opts.cc +@@ -527,6 +527,10 @@ c_common_handle_option (size_t scode, const char *arg, HOST_WIDE_INT value, + cpp_opts->track_macro_expansion = 2; + break; + ++ case OPT_fmacro_use_commandline: ++ cpp_opts->macro_use_commandline = 1; ++ break; ++ + case OPT_fexec_charset_: + cpp_opts->narrow_charset = arg; + break; +diff --git a/gcc/c-family/c.opt b/gcc/c-family/c.opt +index 07da40ef4..a36c27f07 100644 +--- a/gcc/c-family/c.opt ++++ b/gcc/c-family/c.opt +@@ -2012,6 +2012,10 @@ ftrack-macro-expansion= + C ObjC C++ ObjC++ JoinedOrMissing RejectNegative UInteger + -ftrack-macro-expansion=<0|1|2> Track locations of tokens coming from macro expansion and display them in error messages. + ++fmacro-use-commandline ++C ObjC C++ ObjC++ JoinedOrMissing RejectNegative UInteger ++Preferentially use options from the commandline. ++ + fpretty-templates + C++ ObjC++ Var(flag_pretty_templates) Init(1) + Do not pretty-print template specializations as the template signature followed by the arguments. +diff --git a/gcc/doc/cppopts.texi b/gcc/doc/cppopts.texi +index c0a92b370..8c8a81eac 100644 +--- a/gcc/doc/cppopts.texi ++++ b/gcc/doc/cppopts.texi +@@ -277,6 +277,10 @@ correct column numbers in warnings or errors, even if tabs appear on the + line. If the value is less than 1 or greater than 100, the option is + ignored. The default is 8. + ++@item -fmacro-use-commandline ++@opindex fmacro-use-commandline ++Preferentially use options from the command line. ++ + @item -ftrack-macro-expansion@r{}=@var{level}@r{} + @opindex ftrack-macro-expansion + Track locations of tokens across macro expansions. This allows the +diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi +index bdd8b9429..2ff7d860d 100644 +--- a/gcc/doc/invoke.texi ++++ b/gcc/doc/invoke.texi +@@ -630,6 +630,7 @@ Objective-C and Objective-C++ Dialects}. + -fexec-charset=@var{charset} -fextended-identifiers @gol + -finput-charset=@var{charset} -flarge-source-files @gol + -fmacro-prefix-map=@var{old}=@var{new} -fmax-include-depth=@var{depth} @gol ++-fmacro-use-commandline @gol + -fno-canonical-system-headers -fpch-deps -fpch-preprocess @gol + -fpreprocessed -ftabstop=@var{width} -ftrack-macro-expansion @gol + -fwide-exec-charset=@var{charset} -fworking-directory @gol +diff --git a/gcc/testsuite/gcc.dg/cpp/macro-use-cmdline-1.c b/gcc/testsuite/gcc.dg/cpp/macro-use-cmdline-1.c +new file mode 100644 +index 000000000..f85d9c268 +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/cpp/macro-use-cmdline-1.c +@@ -0,0 +1,26 @@ ++/* ++ { dg-options "-fmacro-use-commandline -DTEST_MACRO=1 -DTEST_MACRO=20" } ++ { dg-do compile } ++ { dg-do run } ++*/ ++ ++/* { dg-warning "-:redefined" "redef TEST_MACRO" { target *-*-* } 0 } ++ { dg-message "-:previous" "prev def TEST_MACRO" { target *-*-* } 0 } ++*/ ++ ++#if DEBUG ++extern int puts (const char *); ++#else ++#define puts(X) ++#endif ++extern void abort (void); ++ ++#define err(str) do { puts(str); abort(); } while (0) ++ ++int main (int argc, char *argv) ++{ ++ int macroValue = TEST_MACRO; ++ if (macroValue != 20) ++ err("macroValue"); ++ return 0; ++} +\ No newline at end of file +diff --git a/gcc/testsuite/gcc.dg/cpp/macro-use-cmdline-2.c b/gcc/testsuite/gcc.dg/cpp/macro-use-cmdline-2.c +new file mode 100644 +index 000000000..99d92d1e4 +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/cpp/macro-use-cmdline-2.c +@@ -0,0 +1,34 @@ ++/* ++ { dg-options "-fmacro-use-commandline -DTEST_MACRO=1" } ++ { dg-do compile } ++ { dg-do run } ++*/ ++ ++#define TEST_MACRO 300 ++#define TEST_MACRO_1 400 ++/* ++ { dg-warning "-:redefined" "redef TEST_MACRO" { target *-*-* } 7 } ++ { dg-message "-:previous" "prev def TEST_MACRO" { target *-*-* } 0 } ++*/ ++ ++#if DEBUG ++extern int puts (const char *); ++#else ++#define puts(X) ++#endif ++ ++extern void abort (void); ++ ++#define err(str) do { puts(str); abort(); } while (0) ++ ++int main (int argc, char *argv) ++{ ++ int macroValue = TEST_MACRO; ++ if (macroValue != 1) ++ err("macroValue"); ++ ++ int macroValue1 = TEST_MACRO_1; ++ if (macroValue1 != 400) ++ err("macroValue1"); ++ return 0; ++} +\ No newline at end of file +diff --git a/libcpp/include/cpplib.h b/libcpp/include/cpplib.h +index 3eba6f74b..c6101ca01 100644 +--- a/libcpp/include/cpplib.h ++++ b/libcpp/include/cpplib.h +@@ -471,6 +471,9 @@ struct cpp_options + consumes the highest amount of memory. */ + unsigned char track_macro_expansion; + ++ /* Use the options on the command line first. */ ++ unsigned char macro_use_commandline; ++ + /* Nonzero means handle C++ alternate operator names. */ + unsigned char operator_names; + +diff --git a/libcpp/init.cc b/libcpp/init.cc +index f4ab83d21..47be60a36 100644 +--- a/libcpp/init.cc ++++ b/libcpp/init.cc +@@ -215,6 +215,7 @@ cpp_create_reader (enum c_lang lang, cpp_hash_table *table, + cpp_options::track_macro_expansion to learn about the other + values. */ + CPP_OPTION (pfile, track_macro_expansion) = 2; ++ CPP_OPTION (pfile, macro_use_commandline) = 0; + CPP_OPTION (pfile, warn_normalize) = normalized_C; + CPP_OPTION (pfile, warn_literal_suffix) = 1; + CPP_OPTION (pfile, canonical_system_headers) +diff --git a/libcpp/macro.cc b/libcpp/macro.cc +index 8ebf360c0..aa9e4ffa6 100644 +--- a/libcpp/macro.cc ++++ b/libcpp/macro.cc +@@ -3852,7 +3852,21 @@ _cpp_create_definition (cpp_reader *pfile, cpp_hashnode *node) + node->value.macro->line, 0, + "this is the location of the previous definition"); + } +- _cpp_free_definition (node); ++#define LOCATION_FROM_LINEMAP 0 ++#define MIN_LINE_OF_MACRO_BEEN_OVERRIDDEN 96 ++#define MAX_LINE_OF_MACRO_BEEN_OVERRIDDEN 128 ++ if (CPP_OPTION (pfile, macro_use_commandline) ++ && node->value.macro->line >= MIN_LINE_OF_MACRO_BEEN_OVERRIDDEN ++ && node->value.macro->line <= MAX_LINE_OF_MACRO_BEEN_OVERRIDDEN ++ && pfile->forced_token_location == LOCATION_FROM_LINEMAP) ++ { ++ cpp_pedwarning_with_line (pfile, CPP_W_NONE, ++ node->value.macro->line, 0, ++ "use the previous definition from commandline"); ++ return false; ++ } ++ else ++ _cpp_free_definition (node); + } + + /* Enter definition in hash table. */ +-- +2.33.0 +
View file
_service:tar_scm:0289-tree-ssa-loop-crc.cc-TARGET_CRC32-may-be-not-defined.patch
Added
@@ -0,0 +1,35 @@ +From 63f99f46e851aecc070496a0e688a0d118c820a4 Mon Sep 17 00:00:00 2001 +From: YunQiang Su <yunqiang@isrc.iscas.ac.cn> +Date: Mon, 2 Sep 2024 17:57:52 +0800 +Subject: PATCH tree-ssa-loop-crc.cc: TARGET_CRC32 may be not defined + +TARGET_CRC32 may be not defined on some architectures, RISC-V is one example. +--- + gcc/tree-ssa-loop-crc.cc | 4 ++++ + 1 file changed, 4 insertions(+) + +diff --git a/gcc/tree-ssa-loop-crc.cc b/gcc/tree-ssa-loop-crc.cc +index b9c2f71ca..7eee9446d 100644 +--- a/gcc/tree-ssa-loop-crc.cc ++++ b/gcc/tree-ssa-loop-crc.cc +@@ -1227,6 +1227,9 @@ convert_to_new_loop (class loop *loop) + static unsigned int + tree_ssa_loop_crc () + { ++#ifndef TARGET_CRC32 ++ return 0; ++#else + if (TARGET_CRC32 == false) + { + warning (OPT____,"The loop-crc optimization is not working." \ +@@ -1269,6 +1272,7 @@ tree_ssa_loop_crc () + } + } + return todo; ++#endif + } + + /* Loop crc. */ +-- +2.33.0 +
View file
_service:tar_scm:0290-Add-ipa-prefetch-test-for-gcc-s-case.patch
Added
@@ -0,0 +1,209 @@ +From 0534ae05fc313c0d449b48ffe3e01642b644e6d2 Mon Sep 17 00:00:00 2001 +From: Diachkov Ilia <diachkov.ilial@huawei-partners.com> +Date: Fri, 6 Sep 2024 10:40:50 +0800 +Subject: PATCH 1/2 Add ipa-prefetch test for gcc's case + +--- + gcc/ipa-prefetch.cc | 4 +- + gcc/testsuite/gcc.dg/ipa/ipa-prefetch-gcc.c | 167 ++++++++++++++++++++ + 2 files changed, 170 insertions(+), 1 deletion(-) + create mode 100644 gcc/testsuite/gcc.dg/ipa/ipa-prefetch-gcc.c + +diff --git a/gcc/ipa-prefetch.cc b/gcc/ipa-prefetch.cc +index b000d4d75..8e628390b 100644 +--- a/gcc/ipa-prefetch.cc ++++ b/gcc/ipa-prefetch.cc +@@ -1668,6 +1668,8 @@ static gimple * + insert_page_check (tree addr, tree_poly_offset_map &offset_map, + gimple_seq &stmts) + { ++ if (dump_file) ++ fprintf (dump_file, "Insert page check.\n"); + poly_offset_int offset = 0; + if (offset_map.count (addr)) + offset = offset_mapaddr; +@@ -1783,7 +1785,7 @@ static gimple * + insert_index_check (tree mem, gimple_seq &stmts) + { + if (dump_file) +- fprintf (dump_file, "Insert array index check\n"); ++ fprintf (dump_file, "Insert array index check.\n"); + tree atype = TREE_TYPE (TREE_OPERAND (mem, 0)); + tree ind = TREE_OPERAND (mem, 1); + if (decl_map->count (ind)) +diff --git a/gcc/testsuite/gcc.dg/ipa/ipa-prefetch-gcc.c b/gcc/testsuite/gcc.dg/ipa/ipa-prefetch-gcc.c +new file mode 100644 +index 000000000..f1001c350 +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/ipa/ipa-prefetch-gcc.c +@@ -0,0 +1,167 @@ ++/* { dg-do link } */ ++/* { dg-options "-O3 -fipa-prefetch -flto -flto-partition=one -fdump-ipa-ipa_prefetch" } */ ++/* { dg-require-effective-target lto } */ ++ ++/* Based on opensource gcc code. */ ++ ++#include <stdbool.h> ++#include <stdlib.h> ++#include <stddef.h> ++ ++#define SPARSESET_ELT_TYPE unsigned int ++#define ALLOCNO_NUM(A) ((A)->num) ++ ++typedef struct sparseset_def ++{ ++ SPARSESET_ELT_TYPE *dense; /* Dense array. */ ++ SPARSESET_ELT_TYPE *sparse; /* Sparse array. */ ++ SPARSESET_ELT_TYPE members; /* Number of elements. */ ++ SPARSESET_ELT_TYPE size; /* Maximum number of elements. */ ++ SPARSESET_ELT_TYPE iter; /* Iterator index. */ ++ unsigned char iter_inc; /* Iteration increment amount. */ ++ bool iterating; ++ SPARSESET_ELT_TYPE elms2; /* Combined dense and sparse arrays. */ ++} *sparseset; ++ ++struct ira_allocno ++{ ++ /* The allocno order number starting with 0. Each allocno has an ++ unique number and the number is never changed for the ++ allocno. */ ++ int num; ++ /* Regno for allocno or cap. */ ++ int regno; ++ /*...*/ ++}; ++ ++typedef struct ira_allocno_live_range *allocno_live_range_t; ++typedef struct ira_allocno *ira_allocno_t; ++ ++struct ira_allocno_live_range ++{ ++ /* Allocno whose live range is described by given structure. */ ++ ira_allocno_t allocno; ++ /* Program point range. */ ++ int start, finish; ++ /* Next structure describing program points where the allocno ++ lives. */ ++ allocno_live_range_t next; ++ /* Pointer to structures with the same start/finish. */ ++ allocno_live_range_t start_next, finish_next; ++}; ++ ++bool ++sparseset_bit_p (sparseset s, SPARSESET_ELT_TYPE e) ++{ ++ SPARSESET_ELT_TYPE idx; ++ ++ idx = s->sparsee; ++ ++ return idx < s->members && s->denseidx == e; ++} ++ ++bool new_pseudos_p; ++int ira_max_point, ira_allocnos_num; ++allocno_live_range_t *ira_finish_point_ranges; ++ ++static inline void ++sparseset_clear (sparseset s) ++{ ++ s->members = 0; ++ s->iterating = false; ++} ++ ++sparseset ++sparseset_alloc (SPARSESET_ELT_TYPE n_elms) ++{ ++ unsigned int n_bytes = sizeof (struct sparseset_def) ++ + ((n_elms - 1) * 2 * sizeof (SPARSESET_ELT_TYPE)); ++ ++ /* We use xcalloc rather than xmalloc to silence some valgrind uninitialized ++ read errors when accessing set->sparsen when "n" is not, and never has ++ been, in the set. These uninitialized reads are expected, by design and ++ harmless. If this turns into a performance problem due to some future ++ additional users of sparseset, we can revisit this decision. */ ++ sparseset set = (sparseset) calloc (1, n_bytes); ++ set->dense = &(set->elms0); ++ set->sparse = &(set->elmsn_elms); ++ set->size = n_elms; ++ sparseset_clear (set); ++ return set; ++} ++ ++void ++sparseset_insert_bit (sparseset s, SPARSESET_ELT_TYPE e, SPARSESET_ELT_TYPE idx) ++{ ++ s->sparsee = idx; ++ s->denseidx = e; ++} ++ ++void ++sparseset_swap (sparseset s, SPARSESET_ELT_TYPE idx1, SPARSESET_ELT_TYPE idx2) ++{ ++ SPARSESET_ELT_TYPE tmp = s->denseidx2; ++ sparseset_insert_bit (s, s->denseidx1, idx2); ++ sparseset_insert_bit (s, tmp, idx1); ++} ++ ++void __attribute__ ((noinline)) ++sparseset_clear_bit (sparseset s, SPARSESET_ELT_TYPE e) ++{ ++ if (sparseset_bit_p (s, e)) ++ { ++ SPARSESET_ELT_TYPE idx = s->sparsee; ++ SPARSESET_ELT_TYPE iter = s->iter; ++ SPARSESET_ELT_TYPE mem = s->members - 1; ++ ++ /* If we are iterating over this set and we want to delete a ++ member we've already visited, then we swap the element we ++ want to delete with the element at the current iteration ++ index so that it plays well together with the code below ++ that actually removes the element. */ ++ if (s->iterating && idx <= iter) ++ { ++ if (idx < iter) ++ { ++ sparseset_swap (s, idx, iter); ++ idx = iter; ++ } ++ s->iter_inc = 0; ++ } ++ ++ /* Replace the element we want to delete with the last element ++ in the dense array and then decrement s->members, effectively ++ removing the element we want to delete. */ ++ sparseset_insert_bit (s, s->densemem, idx); ++ s->members = mem; ++ } ++} ++ ++allocno_live_range_t r; ++sparseset allocnos_live; ++ ++void ++ira_flattening () ++{ ++ int i; ++ ++ if (new_pseudos_p) ++ { ++ allocnos_live = sparseset_alloc (ira_allocnos_num); ++ for (i = 0; i < ira_max_point; i++) ++ { ++ for (r = ira_finish_point_rangesi; r != NULL; r = r->finish_next) ++ sparseset_clear_bit (allocnos_live, ALLOCNO_NUM (r->allocno)); ++ } ++ } ++} ++ ++int main() ++{ ++ ira_flattening (); ++ return 0; ++} ++ ++/* { dg-final { scan-wpa-ipa-dump-times "Insert page check" 1 "ipa_prefetch"} } */ ++/* { dg-final { scan-wpa-ipa-dump-times "Insert 0 prefetch stmt:" 1 "ipa_prefetch"} } */ ++/* { dg-final { scan-wpa-ipa-dump-times "Split dom_bb after condition stmts:" 1 "ipa_prefetch"} } */ +-- +2.33.0 +
View file
_service:tar_scm:0291-Fix-settings-for-wide-operations-tests.patch
Added
@@ -0,0 +1,73 @@ +From 411792b0bbb63715d8e90d46eb4f0d9c810ce8ba Mon Sep 17 00:00:00 2001 +From: Pronin Alexander 00812787 <pronin.alexander@huawei.com> +Date: Tue, 3 Sep 2024 21:26:03 +0800 +Subject: PATCH 2/2 Fix settings for wide operations tests + +Signed-off-by: lin-houzhong <hz_lin8@163.com> +--- + gcc/testsuite/gcc.dg/double_sized_mul-1.c | 8 +++++--- + gcc/testsuite/gcc.dg/double_sized_mul-2.c | 9 +++++---- + gcc/testsuite/gcc.dg/uaddsub.c | 6 ++++-- + 3 files changed, 14 insertions(+), 9 deletions(-) + +diff --git a/gcc/testsuite/gcc.dg/double_sized_mul-1.c b/gcc/testsuite/gcc.dg/double_sized_mul-1.c +index d32a25223..b848e02de 100644 +--- a/gcc/testsuite/gcc.dg/double_sized_mul-1.c ++++ b/gcc/testsuite/gcc.dg/double_sized_mul-1.c +@@ -1,7 +1,8 @@ +-/* { dg-do compile } */ ++/* { dg-do compile { target aarch64*-*-* x86_64*-*-*} } */ + /* fif-conversion-gimple and fuaddsub-overflow-match-all are required for + proper overflow detection in some cases. */ +-/* { dg-options "-O2 -fif-conversion-gimple -march=armv8.2-a -fuaddsub-overflow-match-all -fdump-tree-widening_mul-stats" } */ ++/* { dg-options "-O2 -fif-conversion-gimple -fuaddsub-overflow-match-all -fdump-tree-widening_mul-stats" } */ ++/* { dg-additional-options "-march=armv8.2-a" { target aarch64*-*-* } } */ + #include <stdint.h> + + typedef unsigned __int128 uint128_t; +@@ -138,4 +139,5 @@ uint128_t mul128_perm (uint64_t a, uint64_t b) + return res; + } + +-/* { dg-final { scan-tree-dump-times "double sized mul optimized: 1" 6 "widening_mul" } } */ ++/* { dg-final { scan-tree-dump-times "double sized mul optimized: 1" 6 "widening_mul" { target aarch64*-*-* } } } */ ++/* { dg-final { scan-tree-dump-times "double sized mul optimized: 1" 4 "widening_mul" { target x86_64*-*-* } } } */ +diff --git a/gcc/testsuite/gcc.dg/double_sized_mul-2.c b/gcc/testsuite/gcc.dg/double_sized_mul-2.c +index ff35902b7..cf8f0aedd 100644 +--- a/gcc/testsuite/gcc.dg/double_sized_mul-2.c ++++ b/gcc/testsuite/gcc.dg/double_sized_mul-2.c +@@ -1,7 +1,8 @@ +-/* { dg-do compile } */ +-/* fif-conversion-gimple is required for proper overflow detection +- in some cases. */ +-/* { dg-options "-O2 -fif-conversion-gimple -march=armv8.2-a -fuaddsub-overflow-match-all -fdump-tree-widening_mul-stats" } */ ++/* { dg-do compile { target aarch64*-*-* x86_64*-*-*} } */ ++/* fif-conversion-gimple and fuaddsub-overflow-match-all are required for ++ proper overflow detection in some cases. */ ++/* { dg-options "-O2 -fif-conversion-gimple -fuaddsub-overflow-match-all -fdump-tree-widening_mul-stats" } */ ++/* { dg-additional-options "-march=armv8.2-a" { target aarch64*-*-* } } */ + #include <stdint.h> + + typedef unsigned __int128 uint128_t; +diff --git a/gcc/testsuite/gcc.dg/uaddsub.c b/gcc/testsuite/gcc.dg/uaddsub.c +index 96c26d308..dcb587fc8 100644 +--- a/gcc/testsuite/gcc.dg/uaddsub.c ++++ b/gcc/testsuite/gcc.dg/uaddsub.c +@@ -1,5 +1,6 @@ +-/* { dg-do compile } */ ++/* { dg-do compile { target aarch64*-*-* x86_64-*-* } } */ + /* { dg-options "-O2 -fuaddsub-overflow-match-all -fdump-tree-optimized" } */ ++/* { dg-additional-options "-march=armv8.2-a" { target aarch64*-*-* } } */ + #include <stdint.h> + + typedef unsigned __int128 uint128_t; +@@ -140,4 +141,5 @@ uint256_t sub256 (uint128_t a, uint128_t b) + } + + /* { dg-final { scan-tree-dump-times "= .ADD_OVERFLOW \\(a_\0-9\+\\(D\\), b_\0-9\+\\(D\\)\\)" 5 "optimized" } } */ +-/* { dg-final { scan-tree-dump-times "= .SUB_OVERFLOW \\(a_\0-9\+\\(D\\), b_\0-9\+\\(D\\)\\)" 5 "optimized" } } */ ++/* { dg-final { scan-tree-dump-times "= .SUB_OVERFLOW \\(a_\0-9\+\\(D\\), b_\0-9\+\\(D\\)\\)" 5 "optimized" { target aarch64*-*-* } } } */ ++/* { dg-final { scan-tree-dump-times "= .SUB_OVERFLOW \\(a_\0-9\+\\(D\\), b_\0-9\+\\(D\\)\\)" 4 "optimized" { target x86_64*-*-* } } } */ +-- +2.33.0 +
View file
_service:tar_scm:0292-Fix-errors-in-ipa-prefetch-IAORPF-and-IAOSJ0.patch
Added
@@ -0,0 +1,42 @@ +From 808294bf0f32aaff1cc7e56a756b246d328b3402 Mon Sep 17 00:00:00 2001 +From: Diachkov Ilia <diachkov.ilia1@huawei-partners.com> +Date: Fri, 6 Sep 2024 11:10:03 +0800 +Subject: PATCH 2/3 Fix errors in ipa-prefetch (IAORPF and IAOSJ0) + +Signed-off-by: Diachkov Ilia <diachkov.ilia1@huawei-partners.com> +--- + gcc/ipa-prefetch.cc | 9 +++++---- + 1 file changed, 5 insertions(+), 4 deletions(-) + +diff --git a/gcc/ipa-prefetch.cc b/gcc/ipa-prefetch.cc +index b000d4d75..74af55af0 100644 +--- a/gcc/ipa-prefetch.cc ++++ b/gcc/ipa-prefetch.cc +@@ -1681,7 +1681,8 @@ insert_page_check (tree addr, tree_poly_offset_map &offset_map, + unsigned long long pmask = ~(param_ipa_prefetch_pagesize - 1); + tree pmask_cst = build_int_cst (utype, pmask); + tree off_tree = wide_int_to_tree (sizetype, offset); +- gcc_assert (TREE_CODE (addr_type) == POINTER_TYPE); ++ gcc_assert (TREE_CODE (addr_type) == POINTER_TYPE ++ || TREE_CODE (addr_type) == REFERENCE_TYPE); + tree addr_with_offset = gimple_build (&stmts, POINTER_PLUS_EXPR, + addr_type, addr, off_tree); + tree conv_addr = make_ssa_name (utype); +@@ -2082,11 +2083,11 @@ optimize_function (cgraph_node *n, function *fn) + for (unsigned int i = 0; i < vmrs.length (); i++) + find_nearest_common_post_dominator (vmrsi, dom_bb); + +- if (!dom_bb) ++ if (!dom_bb || dom_bb->index == ENTRY_BLOCK || dom_bb->index == EXIT_BLOCK) + { + if (dump_file) +- fprintf (dump_file, "Post dominator bb for MRs is not found. " +- "Skip the case.\n"); ++ fprintf (dump_file, "Post dominator bb for MRs is not found or " ++ "it's an entry/exit block. Skip the case.\n"); + return 0; + } + else if (dump_file) +-- +2.33.0 +
View file
_service:tar_scm:0293-Fix-error-with-stmts-insertion-in-ipa-prefetch-for-I.patch
Added
@@ -0,0 +1,51 @@ +From bfb77997f423ffe3bdcbd8bb8d7f739fe51ce4f5 Mon Sep 17 00:00:00 2001 +From: Diachkov Ilia <diachkov.ilia1@huawei-partners.com> +Date: Fri, 6 Sep 2024 11:36:11 +0800 +Subject: PATCH 3/3 Fix error with stmts insertion in ipa-prefetch (for + IAO6R3) + +Signed-off-by: Diachkov Ilia <diachkov.ilia1@huawei-partners.com> +--- + gcc/ipa-prefetch.cc | 19 +++++++++++++++++-- + 1 file changed, 17 insertions(+), 2 deletions(-) + +diff --git a/gcc/ipa-prefetch.cc b/gcc/ipa-prefetch.cc +index b000d4d75..6190c2ebb 100644 +--- a/gcc/ipa-prefetch.cc ++++ b/gcc/ipa-prefetch.cc +@@ -2096,7 +2096,7 @@ optimize_function (cgraph_node *n, function *fn) + fprintf (dump_file, "\n"); + } + +- /* Try to find comp_mr's stmt in the dominator bb. */ ++ /* Try to find comp_mr's stmt in the post dominator bb. */ + gimple *last_used = NULL; + for (gimple_stmt_iterator si = gsi_last_bb (dom_bb); !gsi_end_p (si); + gsi_prev (&si)) +@@ -2168,7 +2168,22 @@ optimize_function (cgraph_node *n, function *fn) + vec<gimple *> pcalls = vNULL; + gimple *last_pref = NULL; + insert_prefetch_stmts (pcalls, stmts, last_pref, vmrs, processed_stmts); +- gsi_insert_seq_after (&gsi, stmts, GSI_NEW_STMT); ++ ++ gimple *gstmt = gsi_stmt (gsi); ++ bool insert_after = last_used || gstmt == NULL || !is_ctrl_stmt (gstmt); ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ { ++ fprintf (dump_file, "Insert prefetch sequence %s stmt:\n", ++ insert_after ? "after": "before"); ++ if (gstmt) ++ print_gimple_stmt (dump_file, gstmt, 0); ++ else ++ fprintf (dump_file, "(no stmts)\n"); ++ } ++ if (insert_after) ++ gsi_insert_seq_after (&gsi, stmts, GSI_NEW_STMT); ++ else ++ gsi_insert_seq_before (&gsi, stmts, GSI_NEW_STMT); + + correct_cfg (bbends, last_pref, dom_bb); + +-- +2.33.0 +
View file
_service:tar_scm:0294-Fix-errors-in-ipa-prefetch-IAO50J-and-IAO5H7.patch
Added
@@ -0,0 +1,80 @@ +From cd79fc29d2cdb73836f8699355113e94b833e0e0 Mon Sep 17 00:00:00 2001 +From: Diachkov Ilia <diachkov.ilia1@huawei-partners.com> +Date: Wed, 11 Sep 2024 17:18:58 +0800 +Subject: PATCH 2/2 Fix errors in ipa-prefetch(IAO50J and IAO5H7) + +Signed-off-by: Diachkov Ilia <diachkov.ilia1@huawei-partners.com> +--- + gcc/ipa-prefetch.cc | 35 ++++++++++++++++++++++++++++++----- + 1 file changed, 30 insertions(+), 5 deletions(-) + +diff --git a/gcc/ipa-prefetch.cc b/gcc/ipa-prefetch.cc +index 5184687aa..685f9c267 100644 +--- a/gcc/ipa-prefetch.cc ++++ b/gcc/ipa-prefetch.cc +@@ -2099,6 +2099,18 @@ optimize_function (cgraph_node *n, function *fn) + fprintf (dump_file, "\n"); + } + ++ /* Check that all used mrs dominate found post dominator bb. This case ++ may be supported later by copying MR evaluation to the bb. */ ++ for (unsigned int i = 0; i < used_mr_vec.length (); i++) ++ if (!dominated_by_p (CDI_DOMINATORS, dom_bb, ++ gimple_bb (used_mr_veci->stmts0))) ++ { ++ if (dump_file) ++ fprintf (dump_file, "MR's (%d) bb is not dominate the found bb %d. " ++ "Skip the case.\n", used_mr_veci->mr_id, dom_bb->index); ++ return 0; ++ } ++ + /* Try to find comp_mr's stmt in the post dominator bb. */ + gimple *last_used = NULL; + for (gimple_stmt_iterator si = gsi_last_bb (dom_bb); !gsi_end_p (si); +@@ -2133,17 +2145,29 @@ optimize_function (cgraph_node *n, function *fn) + + /* Create new inc var. Insert new_var = old_var + step * factor. */ + decl_map = new tree_map; +- gcc_assert (comp_mr->stmts0 && gimple_assign_single_p (comp_mr->stmts0)); +- tree inc_var = gimple_assign_lhs (comp_mr->stmts0); ++ gimple *old_inc_stmt = comp_mr->stmts0; ++ gcc_assert (old_inc_stmt && gimple_assign_single_p (old_inc_stmt)); ++ tree inc_var = gimple_assign_lhs (old_inc_stmt); ++ if (dump_file) ++ { ++ fprintf (dump_file, "Old inc stmt: "); ++ print_gimple_stmt (dump_file, old_inc_stmt, 0); ++ } + /* If old_var definition dominates the current use, just use it, otherwise + evaluate it just before new inc var evaluation. */ + gimple_seq stmts = NULL; + stmt_set processed_stmts; +- if (!dominated_by_p (CDI_DOMINATORS, dom_bb, gimple_bb (comp_mr->stmts0))) ++ tree local_inc_var = inc_var; ++ if (!dominated_by_p (CDI_DOMINATORS, dom_bb, gimple_bb (old_inc_stmt))) + { + gimple *tmp = gimple_copy_and_remap_memref_stmts (comp_mr, stmts, 0, 0, + processed_stmts); +- inc_var = gimple_assign_lhs (tmp); ++ local_inc_var = gimple_assign_lhs (tmp); ++ if (dump_file) ++ { ++ fprintf (dump_file, "Localized old inc stmt: "); ++ print_gimple_stmt (dump_file, tmp, 0); ++ } + } + tree var_type = TREE_TYPE (inc_var); + enum tree_code inc_code; +@@ -2155,7 +2179,8 @@ optimize_function (cgraph_node *n, function *fn) + HOST_WIDE_INT dist_val = tree_to_shwi (step) + * param_ipa_prefetch_distance_factor; + tree dist = build_int_cst (TREE_TYPE (step), dist_val); +- tree new_inc_var = gimple_build (&stmts, inc_code, var_type, inc_var, dist); ++ tree new_inc_var = gimple_build (&stmts, inc_code, var_type, local_inc_var, ++ dist); + (*decl_map)inc_var = new_inc_var; + if (dump_file) + { +-- +2.33.0 +
View file
_service:tar_scm:0295-Fix-error-with-grouped_load-merge-in-slp-transpose-v.patch
Added
@@ -0,0 +1,30 @@ +From 7b4cce4896cefefedba9545a9633585e086b7621 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?=E9=83=91=E6=99=A8=E5=8D=89?= <zhengchenhui1@huawei.com> +Date: Wed, 11 Sep 2024 18:26:22 +0800 +Subject: PATCH 1/2 Fix error with grouped_load merge in + slp-transpose-vectorize (for IALR8B) + +--- + gcc/tree-vect-slp.cc | 6 +++++- + 1 file changed, 5 insertions(+), 1 deletion(-) + +diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc +index e3e246977..d4870de43 100644 +--- a/gcc/tree-vect-slp.cc ++++ b/gcc/tree-vect-slp.cc +@@ -3807,7 +3807,11 @@ vect_slp_grouped_load_find (bb_vec_info bb_vinfo, vec<bool> &visited, + these two grouped loads need to be merged. */ + tree opb = get_op_base_address (first_element); + unsigned int grp_size_b = DR_GROUP_SIZE (first_element); +- if (opa == opb && grp_size_a == grp_size_b) ++ /* Ensure that the elements merge to load group meet the alignment condition (dr_misalignment) */ ++ HOST_WIDE_INT diff = 0; ++ diff = (TREE_INT_CST_LOW (DR_INIT (first_element->dr_aux.dr)) ++ - TREE_INT_CST_LOW (DR_INIT (merge_first_element->dr_aux.dr))); ++ if (opa == opb && grp_size_a == grp_size_b && diff >= 0) + { + res.safe_push (first_element); + visitedi = true; +-- +2.33.0 +
View file
_service:tar_scm:0296-Fix-error-in-slp-transpose-vectorize-for-IAQFM3.patch
Added
@@ -0,0 +1,28 @@ +From b3a6a170bf1dc0e460e98a7fd02c92e6b036784a Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?=E9=83=91=E6=99=A8=E5=8D=89?= <zhengchenhui1@huawei.com> +Date: Fri, 13 Sep 2024 14:13:07 +0800 +Subject: PATCH 2/2 Fix error in slp-transpose-vectorize (for IAQFM3) + +--- + gcc/tree-vect-slp.cc | 5 ++++- + 1 file changed, 4 insertions(+), 1 deletion(-) + +diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc +index d4870de43..d7e198dff 100644 +--- a/gcc/tree-vect-slp.cc ++++ b/gcc/tree-vect-slp.cc +@@ -3811,7 +3811,10 @@ vect_slp_grouped_load_find (bb_vec_info bb_vinfo, vec<bool> &visited, + HOST_WIDE_INT diff = 0; + diff = (TREE_INT_CST_LOW (DR_INIT (first_element->dr_aux.dr)) + - TREE_INT_CST_LOW (DR_INIT (merge_first_element->dr_aux.dr))); +- if (opa == opb && grp_size_a == grp_size_b && diff >= 0) ++ if (opa == opb ++ && grp_size_a == grp_size_b ++ && diff >= 0 ++ && check_same_bb (first_element, merge_first_element)) + { + res.safe_push (first_element); + visitedi = true; +-- +2.33.0 +
View file
_service:tar_scm:0297-Fix-grouped-load-merging-error-in-SLP-transpose-vectorization.patch
Added
@@ -0,0 +1,26 @@ +From 8b30d71f881e15bfbc514f9b65fee178610e1536 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?=E9=83=91=E6=99=A8=E5=8D=89?= <zhengchenhui1@huawei.com> +Date: Wed, 18 Sep 2024 10:48:55 +0800 +Subject: PATCH Fix error in slp-transpose-vectorize (for IARHFM) + +--- + gcc/tree-vect-slp.cc | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc +index d7e198dff..fbd638333 100644 +--- a/gcc/tree-vect-slp.cc ++++ b/gcc/tree-vect-slp.cc +@@ -3814,7 +3814,8 @@ vect_slp_grouped_load_find (bb_vec_info bb_vinfo, vec<bool> &visited, + if (opa == opb + && grp_size_a == grp_size_b + && diff >= 0 +- && check_same_bb (first_element, merge_first_element)) ++ && check_same_bb (first_element, merge_first_element) ++ && DR_PTR_INFO (first_element->dr_aux.dr) != DR_PTR_INFO (merge_first_element->dr_aux.dr)) + { + res.safe_push (first_element); + visitedi = true; +-- +2.33.0 +
View file
_service:tar_scm:0298-Mark-prefetch-builtin-as-willreturn.patch
Added
@@ -0,0 +1,99 @@ +From a252bbd11d22481a1e719ed36d800e2192abb369 Mon Sep 17 00:00:00 2001 +From: Pronin Alexander <pronin.alexander@huawei.com> +Date: Thu, 31 Oct 2024 15:49:27 +0800 +Subject: PATCH 1/6 Mark prefetch builtin as willreturn + +Signed-off-by: Pronin Alexander <pronin.alexander@huawei.com> +--- + gcc/common.opt | 4 ++++ + gcc/gimple.cc | 30 ++++++++++++++++++++++++++++++ + gcc/gimple.h | 1 + + gcc/tree-ssa-pre.cc | 4 +--- + 4 files changed, 36 insertions(+), 3 deletions(-) + +diff --git a/gcc/common.opt b/gcc/common.opt +index 688d65e4d..be5fcc681 100644 +--- a/gcc/common.opt ++++ b/gcc/common.opt +@@ -1313,6 +1313,10 @@ fdelete-null-pointer-checks + Common Var(flag_delete_null_pointer_checks) Init(-1) Optimization + Delete useless null pointer checks. + ++fbuiltin-will-return ++Common Var(flag_builtin_will_return) Optimization ++Consider some of the builtins as definitely returning. ++ + fdevirtualize-at-ltrans + Common Var(flag_ltrans_devirtualize) + Stream extra data to support more aggressive devirtualization in LTO local transformation mode. +diff --git a/gcc/gimple.cc b/gcc/gimple.cc +index 9e62da426..04ca9f161 100644 +--- a/gcc/gimple.cc ++++ b/gcc/gimple.cc +@@ -2998,6 +2998,36 @@ nonbarrier_call_p (gimple *call) + return false; + } + ++static inline bool ++will_return_builtin_p (gimple *call) ++{ ++ if (!flag_builtin_will_return) ++ return false; ++ ++ if (!gimple_call_builtin_p (call, BUILT_IN_NORMAL)) ++ return false; ++ ++ switch (DECL_FUNCTION_CODE (gimple_call_fndecl (call))) ++ { ++ case BUILT_IN_PREFETCH: ++ return true; ++ default: ++ return false; ++ } ++} ++ ++bool ++will_return_call_p (gimple *call, function *fun) ++{ ++ int flags = gimple_call_flags (call); ++ if (!(flags & (ECF_CONST|ECF_PURE)) ++ || (flags & ECF_LOOPING_CONST_OR_PURE) ++ || stmt_can_throw_external (fun, call)) ++ return will_return_builtin_p (call); ++ ++ return true; ++} ++ + /* Callback for walk_stmt_load_store_ops. + + Return TRUE if OP will dereference the tree stored in DATA, FALSE +diff --git a/gcc/gimple.h b/gcc/gimple.h +index 77a5a07e9..bb05a7664 100644 +--- a/gcc/gimple.h ++++ b/gcc/gimple.h +@@ -1628,6 +1628,7 @@ extern bool gimple_asm_clobbers_memory_p (const gasm *); + extern void dump_decl_set (FILE *, bitmap); + extern bool nonfreeing_call_p (gimple *); + extern bool nonbarrier_call_p (gimple *); ++extern bool will_return_call_p (gimple *, function *); + extern bool infer_nonnull_range (gimple *, tree); + extern bool infer_nonnull_range_by_dereference (gimple *, tree); + extern bool infer_nonnull_range_by_attribute (gimple *, tree); +diff --git a/gcc/tree-ssa-pre.cc b/gcc/tree-ssa-pre.cc +index 98134b5d3..b5264133a 100644 +--- a/gcc/tree-ssa-pre.cc ++++ b/gcc/tree-ssa-pre.cc +@@ -3988,9 +3988,7 @@ compute_avail (function *fun) + that forbids hoisting possibly trapping expressions + before it. */ + int flags = gimple_call_flags (stmt); +- if (!(flags & (ECF_CONST|ECF_PURE)) +- || (flags & ECF_LOOPING_CONST_OR_PURE) +- || stmt_can_throw_external (fun, stmt)) ++ if (!will_return_call_p (stmt, fun)) + /* Defer setting of BB_MAY_NOTRETURN to avoid it + influencing the processing of the call itself. */ + set_bb_may_notreturn = true; +-- +2.33.0 +
View file
_service:tar_scm:0299-Backport-Disallow-pointer-operands-for-and-partly-PR.patch
Added
@@ -0,0 +1,156 @@ +From 3b109376d057342a31267ea4c9bd422d940874cb Mon Sep 17 00:00:00 2001 +From: Jakub Jelinek <jakub@redhat.com> +Date: Thu, 31 Oct 2024 16:09:43 +0800 +Subject: PATCH 2/6 BackportDisallow pointer operands for |,^ and partly + &PR106878 + +Signed-off-by: Jakub Jelinek <jakub@redhat.com> +--- + gcc/match.pd | 6 ++++- + .../gcc.c-torture/compile/pr106878.c | 15 +++++++++++++ + gcc/tree-cfg.cc | 22 ++++++++++++++++--- + gcc/tree-ssa-reassoc.cc | 16 +++++++++++++- + 4 files changed, 54 insertions(+), 5 deletions(-) + create mode 100644 gcc/testsuite/gcc.c-torture/compile/pr106878.c + +diff --git a/gcc/match.pd b/gcc/match.pd +index 8f41c292f..822e065e8 100644 +--- a/gcc/match.pd ++++ b/gcc/match.pd +@@ -1655,6 +1655,8 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) + && (int_fits_type_p (@1, TREE_TYPE (@0)) + || tree_nop_conversion_p (TREE_TYPE (@0), type))) + || types_match (@0, @1)) ++ && !POINTER_TYPE_P (TREE_TYPE (@0)) ++ && TREE_CODE (TREE_TYPE (@0)) != OFFSET_TYPE + /* ??? This transform conflicts with fold-const.cc doing + Convert (T)(x & c) into (T)x & (T)c, if c is an integer + constants (if x has signed type, the sign bit cannot be set +@@ -1691,7 +1693,9 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) + (if (GIMPLE + && TREE_CODE (@1) != INTEGER_CST + && tree_nop_conversion_p (type, TREE_TYPE (@2)) +- && types_match (type, @0)) ++ && types_match (type, @0) ++ && !POINTER_TYPE_P (TREE_TYPE (@0)) ++ && TREE_CODE (TREE_TYPE (@0)) != OFFSET_TYPE) + (bitop @0 (convert @1))))) + + (for bitop (bit_and bit_ior) +diff --git a/gcc/testsuite/gcc.c-torture/compile/pr106878.c b/gcc/testsuite/gcc.c-torture/compile/pr106878.c +new file mode 100644 +index 000000000..c84571894 +--- /dev/null ++++ b/gcc/testsuite/gcc.c-torture/compile/pr106878.c +@@ -0,0 +1,15 @@ ++/* PR tree-optimization/106878 */ ++ ++typedef __INTPTR_TYPE__ intptr_t; ++typedef __UINTPTR_TYPE__ uintptr_t; ++int a; ++ ++int ++foo (const int *c) ++{ ++ uintptr_t d = ((intptr_t) c | (intptr_t) &a) & 65535 << 16; ++ intptr_t e = (intptr_t) c; ++ if (d != (e & 65535 << 16)) ++ return 1; ++ return 0; ++} +diff --git a/gcc/tree-cfg.cc b/gcc/tree-cfg.cc +index 48b52f785..d33aaec8c 100644 +--- a/gcc/tree-cfg.cc ++++ b/gcc/tree-cfg.cc +@@ -4163,7 +4163,9 @@ verify_gimple_assign_binary (gassign *stmt) + case ROUND_MOD_EXPR: + case RDIV_EXPR: + case EXACT_DIV_EXPR: +- /* Disallow pointer and offset types for many of the binary gimple. */ ++ case BIT_IOR_EXPR: ++ case BIT_XOR_EXPR: ++ /* Disallow pointer and offset types for many of the binary gimple. */ + if (POINTER_TYPE_P (lhs_type) + || TREE_CODE (lhs_type) == OFFSET_TYPE) + { +@@ -4178,9 +4180,23 @@ verify_gimple_assign_binary (gassign *stmt) + + case MIN_EXPR: + case MAX_EXPR: +- case BIT_IOR_EXPR: +- case BIT_XOR_EXPR: ++ /* Continue with generic binary expression handling. */ ++ break; ++ + case BIT_AND_EXPR: ++ if (POINTER_TYPE_P (lhs_type) ++ && TREE_CODE (rhs2) == INTEGER_CST) ++ break; ++ /* Disallow pointer and offset types for many of the binary gimple. */ ++ if (POINTER_TYPE_P (lhs_type) ++ || TREE_CODE (lhs_type) == OFFSET_TYPE) ++ { ++ error ("invalid types for %qs", code_name); ++ debug_generic_expr (lhs_type); ++ debug_generic_expr (rhs1_type); ++ debug_generic_expr (rhs2_type); ++ return true; ++ } + /* Continue with generic binary expression handling. */ + break; + +diff --git a/gcc/tree-ssa-reassoc.cc b/gcc/tree-ssa-reassoc.cc +index e3d521e32..6baef4764 100644 +--- a/gcc/tree-ssa-reassoc.cc ++++ b/gcc/tree-ssa-reassoc.cc +@@ -3617,10 +3617,14 @@ optimize_range_tests_cmp_bitwise (enum tree_code opcode, int first, int length, + tree type2 = NULL_TREE; + bool strict_overflow_p = false; + candidates.truncate (0); ++ if (POINTER_TYPE_P (type1)) ++ type1 = pointer_sized_int_node; + for (j = i; j; j = chainsj - 1) + { + tree type = TREE_TYPE (rangesj - 1.exp); + strict_overflow_p |= rangesj - 1.strict_overflow_p; ++ if (POINTER_TYPE_P (type)) ++ type = pointer_sized_int_node; + if ((b % 4) == 3) + { + /* For the signed < 0 cases, the types should be +@@ -3651,6 +3655,8 @@ optimize_range_tests_cmp_bitwise (enum tree_code opcode, int first, int length, + tree type = TREE_TYPE (rangesj - 1.exp); + if (j == k) + continue; ++ if (POINTER_TYPE_P (type)) ++ type = pointer_sized_int_node; + if ((b % 4) == 3) + { + if (!useless_type_conversion_p (type1, type)) +@@ -3680,7 +3686,7 @@ optimize_range_tests_cmp_bitwise (enum tree_code opcode, int first, int length, + op = r->exp; + continue; + } +- if (id == l) ++ if (id == l || POINTER_TYPE_P (TREE_TYPE (op))) + { + code = (b % 4) == 3 ? BIT_NOT_EXPR : NOP_EXPR; + g = gimple_build_assign (make_ssa_name (type1), code, op); +@@ -3704,6 +3710,14 @@ optimize_range_tests_cmp_bitwise (enum tree_code opcode, int first, int length, + gimple_seq_add_stmt_without_update (&seq, g); + op = gimple_assign_lhs (g); + } ++ type1 = TREE_TYPE (rangesk - 1.exp); ++ if (POINTER_TYPE_P (type1)) ++ { ++ gimple *g ++ = gimple_build_assign (make_ssa_name (type1), NOP_EXPR, op); ++ gimple_seq_add_stmt_without_update (&seq, g); ++ op = gimple_assign_lhs (g); ++ } + candidates.pop (); + if (update_range_test (&rangesk - 1, NULL, candidates.address (), + candidates.length (), opcode, ops, op, +-- +2.33.0 +
View file
_service:tar_scm:0300-Remove-erroneous-pattern-from-gimple-ifcvt.patch
Added
@@ -0,0 +1,55 @@ +From 91ef8899a80e493042fd2687ad89064c9f90cf17 Mon Sep 17 00:00:00 2001 +From: Pronin Alexander <pronin.alexander@huawei.com> +Date: Thu, 31 Oct 2024 16:14:34 +0800 +Subject: PATCH 3/6 Remove erroneous pattern from gimple ifcvt + +Signed-off-by: Pronin Alexander <pronin.alexander@huawei.com> +--- + gcc/match.pd | 2 +- + gcc/testsuite/gcc.dg/ifcvt-gimple-1.c | 21 +++++++++++++++++++++ + 2 files changed, 22 insertions(+), 1 deletion(-) + create mode 100644 gcc/testsuite/gcc.dg/ifcvt-gimple-1.c + +diff --git a/gcc/match.pd b/gcc/match.pd +index 8f41c292f..2dd6581d1 100644 +--- a/gcc/match.pd ++++ b/gcc/match.pd +@@ -4276,7 +4276,7 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) + ) + + (if (flag_if_conversion_gimple) +- (for simple_op (plus minus bit_and bit_ior bit_xor) ++ (for simple_op (plus minus bit_ior bit_xor) + (simplify + (cond @0 (simple_op @1 INTEGER_CST@2) @1) + (switch +diff --git a/gcc/testsuite/gcc.dg/ifcvt-gimple-1.c b/gcc/testsuite/gcc.dg/ifcvt-gimple-1.c +new file mode 100644 +index 000000000..381a4ad51 +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/ifcvt-gimple-1.c +@@ -0,0 +1,21 @@ ++/* { dg-do run } */ ++/* { dg-options "-O2 -fno-inline -fif-conversion-gimple" } */ ++ ++#include <stdlib.h> ++ ++void foo(int a, int *p) { ++ *p = a; ++} ++ ++void verify (int a) { ++ if (a != 3) ++ abort (); ++} ++ ++int main() { ++ int a = 0; ++ foo (3, &a); ++ int tmp = (a > 7) ? a & 1 : a; ++ verify (tmp); ++ return 0; ++} +-- +2.33.0 +
View file
_service:tar_scm:0301-Add-required-check-for-iteration-through-uses.patch
Added
@@ -0,0 +1,33 @@ +From ca24d352e98e357f4f7b8f0d262201765705a08a Mon Sep 17 00:00:00 2001 +From: Pronin Alexander <pronin.alexander@huawei.com> +Date: Thu, 31 Oct 2024 16:31:33 +0800 +Subject: PATCH 4/6 Add required check for iteration through uses + +Signed-off-by: Pronin Alexander <pronin.alexander@huawei.com> +--- + gcc/tree-ssa-math-opts.cc | 9 +++++++-- + 1 file changed, 7 insertions(+), 2 deletions(-) + +diff --git a/gcc/tree-ssa-math-opts.cc b/gcc/tree-ssa-math-opts.cc +index 2c06b8a60..80c06fa01 100644 +--- a/gcc/tree-ssa-math-opts.cc ++++ b/gcc/tree-ssa-math-opts.cc +@@ -4938,8 +4938,13 @@ convert_double_size_mul (gimple_stmt_iterator *gsi, gimple *stmt) + + /* Find the mult low part getter. */ + FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, match3) +- if (gimple_assign_rhs_code (use_stmt) == REALPART_EXPR) +- break; ++ { ++ if (!is_gimple_assign (use_stmt)) ++ continue; ++ ++ if (gimple_assign_rhs_code (use_stmt) == REALPART_EXPR) ++ break; ++ } + + /* Create high and low (if needed) parts extractors. */ + /* Low part. */ +-- +2.33.0 +
View file
_service:tar_scm:0302-Added-param-for-optimization-for-merging-bb-s-with-c.patch
Added
@@ -0,0 +1,158 @@ +From 210147e28d542a03588ba3c3fa473301a03bb687 Mon Sep 17 00:00:00 2001 +From: Gmyrikov Konstantin <gmyrikov.konstantin@huawei-partners.com> +Date: Thu, 31 Oct 2024 16:45:15 +0800 +Subject: PATCH 6/6 Added param for optimization for merging bb's with cheap + insns.Zero param means turned off optimization(default implementation),One + means turned on + +Signed-off-by: Gmyrikov Konstantin <gmyrikov.konstantin@huawei-partners.com> +--- + gcc/params.opt | 4 +++ + gcc/testsuite/gcc.dg/if_comb1.c | 13 +++++++++ + gcc/testsuite/gcc.dg/if_comb2.c | 13 +++++++++ + gcc/testsuite/gcc.dg/if_comb3.c | 12 +++++++++ + gcc/tree-ssa-ifcombine.cc | 47 ++++++++++++++++++++++++++++++--- + 5 files changed, 86 insertions(+), 3 deletions(-) + create mode 100644 gcc/testsuite/gcc.dg/if_comb1.c + create mode 100644 gcc/testsuite/gcc.dg/if_comb2.c + create mode 100644 gcc/testsuite/gcc.dg/if_comb3.c + +diff --git a/gcc/params.opt b/gcc/params.opt +index fc700ab79..3ddfaf5b2 100644 +--- a/gcc/params.opt ++++ b/gcc/params.opt +@@ -789,6 +789,10 @@ Maximum number of VALUEs handled during a single find_base_term call. + Common Joined UInteger Var(param_max_vrp_switch_assertions) Init(10) Param Optimization + Maximum number of assertions to add along the default edge of a switch statement during VRP. + ++-param=merge-assign-stmts-ifcombine= ++Common Joined UInteger Var(param_merge_assign_stmts_ifcombine) Init(0) IntegerRange(0, 1) Param Optimization ++Whether bb's with cheap gimple_assign stmts should be merged in the ifcombine pass. ++ + -param=min-crossjump-insns= + Common Joined UInteger Var(param_min_crossjump_insns) Init(5) IntegerRange(1, 65536) Param Optimization + The minimum number of matching instructions to consider for crossjumping. +diff --git a/gcc/testsuite/gcc.dg/if_comb1.c b/gcc/testsuite/gcc.dg/if_comb1.c +new file mode 100644 +index 000000000..e00adc37d +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/if_comb1.c +@@ -0,0 +1,13 @@ ++/* { dg-do compile } */ ++/* { dg-options "-Ofast -S --param=merge-assign-stmts-ifcombine=1 -fdump-tree-ifcombine" } */ ++ ++int foo (double a, double b, int c) ++{ ++ if (c < 10 || a - b > 1.0) ++ return 0; ++ else ++ return 1; ++} ++ ++/* { dg-final { scan-tree-dump "optimizing two comparisons" "ifcombine"} } */ ++/* { dg-final { scan-tree-dump "Merging blocks" "ifcombine"} } */ +diff --git a/gcc/testsuite/gcc.dg/if_comb2.c b/gcc/testsuite/gcc.dg/if_comb2.c +new file mode 100644 +index 000000000..176e7e726 +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/if_comb2.c +@@ -0,0 +1,13 @@ ++/* { dg-do compile } */ ++/* { dg-options "-Ofast -S --param=merge-assign-stmts-ifcombine=1 -fdump-tree-ifcombine" } */ ++ ++int foo (int a, int b, int c) ++{ ++ if (a > 1 || b * c < 10) ++ return 0; ++ else ++ return 1; ++} ++ ++/* { dg-final { scan-tree-dump "optimizing two comparisons" "ifcombine"} } */ ++/* { dg-final { scan-tree-dump "Merging blocks" "ifcombine"} } */ +diff --git a/gcc/testsuite/gcc.dg/if_comb3.c b/gcc/testsuite/gcc.dg/if_comb3.c +new file mode 100644 +index 000000000..aa2e4510c +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/if_comb3.c +@@ -0,0 +1,12 @@ ++/* { dg-do compile } */ ++/* { dg-options "-Ofast -S --param=merge-assign-stmts-ifcombine=1 -fdump-tree-ifcombine" } */ ++ ++int foo (int a, int b, int c) ++{ ++ if (a > 1 && b + c < 10) ++ a++; ++ return a; ++} ++ ++/* { dg-final { scan-tree-dump "optimizing two comparisons" "ifcombine"} } */ ++/* { dg-final { scan-tree-dump "Merging blocks" "ifcombine"} } */ +diff --git a/gcc/tree-ssa-ifcombine.cc b/gcc/tree-ssa-ifcombine.cc +index ce9bbebf9..264a8bcae 100644 +--- a/gcc/tree-ssa-ifcombine.cc ++++ b/gcc/tree-ssa-ifcombine.cc +@@ -110,6 +110,18 @@ recognize_if_then_else (basic_block cond_bb, + return true; + } + ++/* Verify if gimple insn cheap for param=merge-assign-stmts-ifcombine ++ optimization. */ ++ ++bool is_insn_cheap (enum tree_code t) ++{ ++ static enum tree_code cheap_insns = {MULT_EXPR, PLUS_EXPR, MINUS_EXPR}; ++ for (int i = 0; i < sizeof (cheap_insns)/sizeof (enum tree_code); i++) ++ if (t == cheap_insnsi) ++ return 1; ++ return 0; ++} ++ + /* Verify if the basic block BB does not have side-effects. Return + true in this case, else false. */ + +@@ -572,9 +584,38 @@ ifcombine_ifandif (basic_block inner_cond_bb, bool inner_inv, + = param_logical_op_non_short_circuit; + if (!logical_op_non_short_circuit || sanitize_coverage_p ()) + return false; +- /* Only do this optimization if the inner bb contains only the conditional. */ +- if (!gsi_one_before_end_p (gsi_start_nondebug_after_labels_bb (inner_cond_bb))) +- return false; ++ if (param_merge_assign_stmts_ifcombine) ++ { ++ int number_cheap_insns = 0; ++ int number_conds = 0; ++ for (auto i = gsi_start_nondebug_after_labels_bb ++ (outer_cond_bb); !gsi_end_p (i); gsi_next_nondebug (&i)) ++ if (gimple_code (gsi_stmt (i)) == GIMPLE_ASSIGN ++ && is_insn_cheap (gimple_assign_rhs_code (gsi_stmt (i)))) ++ number_cheap_insns++; ++ else if (gimple_code (gsi_stmt (i)) == GIMPLE_COND) ++ number_conds++; ++ for (auto i = gsi_start_nondebug_after_labels_bb ++ (inner_cond_bb); !gsi_end_p (i); gsi_next_nondebug (&i)) ++ if (gimple_code (gsi_stmt (i)) == GIMPLE_ASSIGN ++ && is_insn_cheap (gimple_assign_rhs_code (gsi_stmt (i)))) ++ number_cheap_insns++; ++ else if (gimple_code (gsi_stmt (i)) == GIMPLE_COND) ++ number_conds++; ++ if (!(number_cheap_insns == 1 && number_conds == 2) ++ && !gsi_one_before_end_p (gsi_start_nondebug_after_labels_bb ++ (inner_cond_bb))) ++ return false; ++ } ++ else ++ { ++ /* Only do this optimization if the inner bb contains ++ only the conditional. */ ++ if (!gsi_one_before_end_p (gsi_start_nondebug_after_labels_bb ++ (inner_cond_bb))) ++ return false; ++ } ++ + t1 = fold_build2_loc (gimple_location (inner_cond), + inner_cond_code, + boolean_type_node, +-- +2.33.0 +
View file
_service:tar_scm:0303-Add-generation-of-stream-in-functions-for-pre-versio.patch
Added
@@ -0,0 +1,6263 @@ +From 4789a6eae616df0b7d07901114c91a2099e4d56d Mon Sep 17 00:00:00 2001 +From: wangchunyang <wangchunyang15@huawei.com> +Date: Wed, 13 Nov 2024 11:26:16 +0800 +Subject: PATCH 1/2 Add generation of stream in functions for pre-version lto + objects + +--- + gcc/lto-streamer.h | 6 + + gcc/opt-read.awk | 1 + + gcc/optc-save-gen.awk | 6044 ++++++++++++++++++++++++++++++++++++++++- + 3 files changed, 6050 insertions(+), 1 deletion(-) + +diff --git a/gcc/lto-streamer.h b/gcc/lto-streamer.h +index 597e9e405..9db1a20b3 100644 +--- a/gcc/lto-streamer.h ++++ b/gcc/lto-streamer.h +@@ -943,12 +943,18 @@ void cl_target_option_stream_in (class data_in *, + struct bitpack_d *, + struct cl_target_option *); + ++void cl_target_option_stream_in_prev (class data_in *, ++ struct bitpack_d *, ++ struct cl_target_option *); ++ + void cl_optimization_stream_out (struct output_block *, + struct bitpack_d *, struct cl_optimization *); + + void cl_optimization_stream_in (class data_in *, + struct bitpack_d *, struct cl_optimization *); + ++void cl_optimization_stream_in_prev (class data_in *, ++ struct bitpack_d *, struct cl_optimization *); + + + /* In lto-opts.cc. */ +diff --git a/gcc/opt-read.awk b/gcc/opt-read.awk +index ce3617c8d..624cf6e3d 100644 +--- a/gcc/opt-read.awk ++++ b/gcc/opt-read.awk +@@ -71,6 +71,7 @@ BEGIN { + n_target_save++ + + extra_target_varsn_extra_target_vars = name ++ extra_target_vars_setname = 1 + extra_target_var_typesn_extra_target_vars = type + n_extra_target_vars++ + } +diff --git a/gcc/optc-save-gen.awk b/gcc/optc-save-gen.awk +index 76e9b3cb9..7c012dd4e 100644 +--- a/gcc/optc-save-gen.awk ++++ b/gcc/optc-save-gen.awk +@@ -174,6 +174,8 @@ print " unsigned HOST_WIDE_INT mask = 0;"; + j = 0; + k = 0; + for (i = 0; i < n_opt_other; i++) { ++ var_opt_other_jvar_opt_otheri = j; ++ var_opt_other_kvar_opt_otheri = k; + print " if (opts_set->x_" var_opt_otheri ") mask |= HOST_WIDE_INT_1U << " j ";"; + j++; + if (j == 64) { +@@ -185,6 +187,8 @@ for (i = 0; i < n_opt_other; i++) { + } + + for (i = 0; i < n_opt_int; i++) { ++ var_opt_int_jvar_opt_inti = j; ++ var_opt_int_kvar_opt_inti = k; + print " if (opts_set->x_" var_opt_inti ") mask |= HOST_WIDE_INT_1U << " j ";"; + j++; + if (j == 64) { +@@ -196,6 +200,8 @@ for (i = 0; i < n_opt_int; i++) { + } + + for (i = 0; i < n_opt_enum; i++) { ++ var_opt_enum_jvar_opt_enumi = j; ++ var_opt_enum_kvar_opt_enumi = k; + print " if (opts_set->x_" var_opt_enumi ") mask |= HOST_WIDE_INT_1U << " j ";"; + j++; + if (j == 64) { +@@ -207,6 +213,8 @@ for (i = 0; i < n_opt_enum; i++) { + } + + for (i = 0; i < n_opt_short; i++) { ++ var_opt_short_jvar_opt_shorti = j; ++ var_opt_short_kvar_opt_shorti = k; + print " if (opts_set->x_" var_opt_shorti ") mask |= HOST_WIDE_INT_1U << " j ";"; + j++; + if (j == 64) { +@@ -218,6 +226,8 @@ for (i = 0; i < n_opt_short; i++) { + } + + for (i = 0; i < n_opt_char; i++) { ++ var_opt_char_jvar_opt_chari = j; ++ var_opt_char_kvar_opt_chari = k; + print " if (opts_set->x_" var_opt_chari ") mask |= HOST_WIDE_INT_1U << " j ";"; + j++; + if (j == 64) { +@@ -229,6 +239,8 @@ for (i = 0; i < n_opt_char; i++) { + } + + for (i = 0; i < n_opt_string; i++) { ++ var_opt_string_jvar_opt_stringi = j; ++ var_opt_string_kvar_opt_stringi = k; + print " if (opts_set->x_" var_opt_stringi ") mask |= HOST_WIDE_INT_1U << " j ";"; + j++; + if (j == 64) { +@@ -604,6 +616,8 @@ for (i = 0; i < n_extra_target_vars; i++) { + if (j == 0 && k == 0) { + print " unsigned HOST_WIDE_INT mask = 0;"; + } ++ extra_target_vars_jextra_target_varsi = j; ++ extra_target_vars_kextra_target_varsi = k; + print " if (opts_set->x_" extra_target_varsi ") mask |= HOST_WIDE_INT_1U << " j ";"; + j++; + if (j == 64) { +@@ -622,6 +636,8 @@ for (i = 0; i < n_target_other; i++) { + if (j == 0 && k == 0) { + print " unsigned HOST_WIDE_INT mask = 0;"; + } ++ var_target_other_jvar_target_otheri = j; ++ var_target_other_kvar_target_otheri = k; + print " if (opts_set->x_" var_target_otheri ") mask |= HOST_WIDE_INT_1U << " j ";"; + j++; + if (j == 64) { +@@ -636,6 +652,8 @@ for (i = 0; i < n_target_enum; i++) { + if (j == 0 && k == 0) { + print " unsigned HOST_WIDE_INT mask = 0;"; + } ++ var_target_enum_jvar_target_enumi = j; ++ var_target_enum_kvar_target_enumi = k; + print " if (opts_set->x_" var_target_enumi ") mask |= HOST_WIDE_INT_1U << " j ";"; + j++; + if (j == 64) { +@@ -654,6 +672,8 @@ for (i = 0; i < n_target_int; i++) { + if (j == 0 && k == 0) { + print " unsigned HOST_WIDE_INT mask = 0;"; + } ++ var_target_int_jvar_target_inti = j; ++ var_target_int_kvar_target_inti = k; + print " if (opts_set->x_" var_target_inti ") mask |= HOST_WIDE_INT_1U << " j ";"; + j++; + if (j == 64) { +@@ -668,6 +688,8 @@ for (i = 0; i < n_target_short; i++) { + if (j == 0 && k == 0) { + print " unsigned HOST_WIDE_INT mask = 0;"; + } ++ var_target_short_jvar_target_shorti = j; ++ var_target_short_kvar_target_shorti = k; + print " if (opts_set->x_" var_target_shorti ") mask |= HOST_WIDE_INT_1U << " j ";"; + j++; + if (j == 64) { +@@ -682,6 +704,8 @@ for (i = 0; i < n_target_char; i++) { + if (j == 0 && k == 0) { + print " unsigned HOST_WIDE_INT mask = 0;"; + } ++ var_target_char_jvar_target_chari = j; ++ var_target_char_kvar_target_chari = k; + print " if (opts_set->x_" var_target_chari ") mask |= HOST_WIDE_INT_1U << " j ";"; + j++; + if (j == 64) { +@@ -696,6 +720,8 @@ for (i = 0; i < n_target_string; i++) { + if (j == 0 && k == 0) { + print " unsigned HOST_WIDE_INT mask = 0;"; + } ++ var_target_string_jvar_target_stringi = j; ++ var_target_string_kvar_target_stringi = k; + print " if (opts_set->x_" var_target_stringi ") mask |= HOST_WIDE_INT_1U << " j ";"; + j++; + if (j == 64) { +@@ -1038,6 +1064,7 @@ for (i = 0; i < n_target_save; i++) { + sub(" *" name "$", "", type) + if (target_save_decli ~ "^const char \\*+_" alnum "+$") { + var_target_strn_target_str++ = name; ++ var_target_str_setname = 1; + string_options_namesname++ + } + else { +@@ -1048,12 +1075,14 @@ for (i = 0; i < n_target_save; i++) { + sub("\\.+", "", name) + sub(" ^ +$", "", type) + var_target_arrayn_target_array = name ++ var_target_array_setname = 1 + var_target_array_typen_target_array = type + var_target_array_sizen_target_array++ = size + } + else { + var_target_val_typen_target_val = type; + var_target_valn_target_val++ = name; ++ var_target_val_setname = 1; + } + } + } +@@ -1069,17 +1098,21 @@ if (have_save) { + + var_list_seenname++; + otype = var_type_struct(flagsi) +- if (otype ~ "^const char \\**$") ++ if (otype ~ "^const char \\**$") { + var_target_strn_target_str++ = "x_" name; ++ var_target_str_set"x_" name = 1; ++ } + else { + var_target_val_typen_target_val = otype; + var_target_valn_target_val++ = "x_" name; ++ var_target_val_set"x_" name; + } + } + } + } else { + var_target_val_typen_target_val = "int"; + var_target_valn_target_val++ = "x_target_flags"; ++ var_target_val_set"x_target_flags" = 1; + } + + for (i = 0; i < n_target_str; i++) { +@@ -1253,6 +1286,224 @@ for (i = 0; i < n_target_int; i++) { + + print "}"; + ++print ""; ++print "/* Stream in target options */"; ++print "void"; ++print "cl_target_option_stream_in_prev (struct data_in *data_in ATTRIBUTE_UNUSED,"; ++print " struct bitpack_d *bp ATTRIBUTE_UNUSED,"; ++print " struct cl_target_option *ptr ATTRIBUTE_UNUSED)"; ++print "{"; ++if ("x_aarch64_branch_protection_string" in var_target_str_set) { ++ print " ptr->x_aarch64_branch_protection_string = bp_unpack_string (data_in, bp);" ++ print " if (ptr->x_aarch64_branch_protection_string)" ++ print " ptr->x_aarch64_branch_protection_string = xstrdup (ptr->x_aarch64_branch_protection_string);" ++} ++else { ++ print " bp_unpack_string (data_in, bp);" ++} ++if ("x_aarch64_override_tune_string" in var_target_str_set) { ++ print " ptr->x_aarch64_override_tune_string = bp_unpack_string (data_in, bp);" ++ print " if (ptr->x_aarch64_override_tune_string)" ++ print " ptr->x_aarch64_override_tune_string = xstrdup (ptr->x_aarch64_override_tune_string);" ++} ++else { ++ print " bp_unpack_string (data_in, bp);" ++} ++if ("x_aarch64_asm_isa_flags" in var_target_val_set) { ++ print " ptr->x_aarch64_asm_isa_flags = (aarch64_feature_flags) bp_unpack_value (bp, 64);" ++} ++else { ++ print " bp_unpack_value (bp, 64);" ++} ++if ("x_aarch64_isa_flags" in var_target_val_set) { ++ print " ptr->x_aarch64_isa_flags = (aarch64_feature_flags) bp_unpack_value (bp, 64);" ++} ++else { ++ print " bp_unpack_value (bp, 64);" ++} ++if ("x_selected_arch" in var_target_val_set) { ++ print " ptr->x_selected_arch = (enum aarch64_arch) bp_unpack_value (bp, 64);" ++} ++else { ++ print " bp_unpack_value (bp, 64);" ++} ++if ("x_aarch64_ra_sign_key" in var_target_val_set) { ++ print " ptr->x_aarch64_ra_sign_key = (enum aarch64_key_type) bp_unpack_value (bp, 64);" ++} ++else { ++ print " bp_unpack_value (bp, 64);" ++} ++if ("x_selected_tune" in var_target_val_set) { ++ print " ptr->x_selected_tune = (enum aarch64_processor) bp_unpack_value (bp, 64);" ++} ++else { ++ print " bp_unpack_value (bp, 64);" ++} ++if ("x_aarch64_stack_protector_guard_offset" in var_target_val_set) { ++ print " ptr->x_aarch64_stack_protector_guard_offset = (long) bp_unpack_value (bp, 64);" ++} ++else { ++ print " bp_unpack_value (bp, 64);" ++} ++if ("x_aarch64_enable_bti" in var_target_val_set) { ++ print " ptr->x_aarch64_enable_bti = (unsigned) bp_unpack_value (bp, 64);" ++} ++else { ++ print " bp_unpack_value (bp, 64);" ++} ++if ("x_aarch64_cmodel_var" in var_target_val_set) { ++ print " ptr->x_aarch64_cmodel_var = (enum aarch64_code_model ) bp_unpack_value (bp, 64);" ++} ++else { ++ print " bp_unpack_value (bp, 64);" ++} ++if ("x_aarch64_fix_a53_err835769" in var_target_val_set) { ++ print " ptr->x_aarch64_fix_a53_err835769 = (signed char ) bp_unpack_value (bp, 64);" ++} ++else { ++ print " bp_unpack_value (bp, 64);" ++} ++if ("x_aarch64_fix_a53_err843419" in var_target_val_set) { ++ print " ptr->x_aarch64_fix_a53_err843419 = (signed char ) bp_unpack_value (bp, 64);" ++} ++else { ++ print " bp_unpack_value (bp, 64);" ++} ++if ("x_target_flags" in var_target_val_set) { ++ print " ptr->x_target_flags = (/* - */ int ) bp_unpack_value (bp, 64);" ++} ++else { ++ print " bp_unpack_value (bp, 64);" ++} ++if ("x_flag_omit_leaf_frame_pointer" in var_target_val_set) { ++ print " ptr->x_flag_omit_leaf_frame_pointer = (signed char ) bp_unpack_value (bp, 64);" ++} ++else { ++ print " bp_unpack_value (bp, 64);" ++} ++if ("x_aarch64_flag_outline_atomics" in var_target_val_set) { ++ print " ptr->x_aarch64_flag_outline_atomics = (signed char ) bp_unpack_value (bp, 64);" ++} ++else { ++ print " bp_unpack_value (bp, 64);" ++} ++if ("x_pcrelative_literal_loads" in var_target_val_set) { ++ print " ptr->x_pcrelative_literal_loads = (signed char ) bp_unpack_value (bp, 64);" ++} ++else { ++ print " bp_unpack_value (bp, 64);" ++} ++if ("x_aarch64_ra_sign_scope" in var_target_val_set) { ++ print " ptr->x_aarch64_ra_sign_scope = (enum aarch64_function_type ) bp_unpack_value (bp, 64);" ++} ++else { ++ print " bp_unpack_value (bp, 64);" ++} ++if ("x_aarch64_tls_dialect" in var_target_val_set) { ++ print " ptr->x_aarch64_tls_dialect = (enum aarch64_tls_type ) bp_unpack_value (bp, 64);" ++} ++else { ++ print " bp_unpack_value (bp, 64);" ++} ++print " unsigned HOST_WIDE_INT explicit_mask_prev1;" ++print " for (size_t i = 0; i < 1; i++)" ++print " explicit_mask_previ = bp_unpack_value (bp, 64);" ++print " for (size_t i = 0; i < sizeof (ptr->explicit_mask) / sizeof (ptr->explicit_mask0); i++)" ++print " ptr->explicit_maski = 0;" ++if ("aarch64_asm_isa_flags" in extra_target_vars_k) { ++ k = extra_target_vars_k"aarch64_asm_isa_flags" ++ j = extra_target_vars_j"aarch64_asm_isa_flags" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev0 >> 0) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("aarch64_isa_flags" in extra_target_vars_k) { ++ k = extra_target_vars_k"aarch64_isa_flags" ++ j = extra_target_vars_j"aarch64_isa_flags" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev0 >> 1) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("selected_arch" in extra_target_vars_k) { ++ k = extra_target_vars_k"selected_arch" ++ j = extra_target_vars_j"selected_arch" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev0 >> 2) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("aarch64_ra_sign_key" in extra_target_vars_k) { ++ k = extra_target_vars_k"aarch64_ra_sign_key" ++ j = extra_target_vars_j"aarch64_ra_sign_key" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev0 >> 3) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("selected_tune" in extra_target_vars_k) { ++ k = extra_target_vars_k"selected_tune" ++ j = extra_target_vars_j"selected_tune" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev0 >> 4) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("aarch64_stack_protector_guard_offset" in extra_target_vars_k) { ++ k = extra_target_vars_k"aarch64_stack_protector_guard_offset" ++ j = extra_target_vars_j"aarch64_stack_protector_guard_offset" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev0 >> 5) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("aarch64_enable_bti" in extra_target_vars_k) { ++ k = extra_target_vars_k"aarch64_enable_bti" ++ j = extra_target_vars_j"aarch64_enable_bti" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev0 >> 6) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("aarch64_cmodel_var" in var_target_enum_k) { ++ k = var_target_enum_k"aarch64_cmodel_var" ++ j = var_target_enum_j"aarch64_cmodel_var" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev0 >> 7) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("aarch64_ra_sign_scope" in var_target_enum_k) { ++ k = var_target_enum_k"aarch64_ra_sign_scope" ++ j = var_target_enum_j"aarch64_ra_sign_scope" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev0 >> 8) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("aarch64_tls_dialect" in var_target_enum_k) { ++ k = var_target_enum_k"aarch64_tls_dialect" ++ j = var_target_enum_j"aarch64_tls_dialect" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev0 >> 9) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("aarch64_fix_a53_err835769" in var_target_char_k) { ++ k = var_target_char_k"aarch64_fix_a53_err835769" ++ j = var_target_char_j"aarch64_fix_a53_err835769" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev0 >> 10) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("aarch64_fix_a53_err843419" in var_target_char_k) { ++ k = var_target_char_k"aarch64_fix_a53_err843419" ++ j = var_target_char_j"aarch64_fix_a53_err843419" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev0 >> 11) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("flag_omit_leaf_frame_pointer" in var_target_char_k) { ++ k = var_target_char_k"flag_omit_leaf_frame_pointer" ++ j = var_target_char_j"flag_omit_leaf_frame_pointer" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev0 >> 12) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("aarch64_flag_outline_atomics" in var_target_char_k) { ++ k = var_target_char_k"aarch64_flag_outline_atomics" ++ j = var_target_char_j"aarch64_flag_outline_atomics" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev0 >> 13) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("pcrelative_literal_loads" in var_target_char_k) { ++ k = var_target_char_k"pcrelative_literal_loads" ++ j = var_target_char_j"pcrelative_literal_loads" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev0 >> 14) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("aarch64_branch_protection_string" in var_target_string_k) { ++ k = var_target_string_k"aarch64_branch_protection_string" ++ j = var_target_string_j"aarch64_branch_protection_string" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev0 >> 15) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("aarch64_override_tune_string" in var_target_string_k) { ++ k = var_target_string_k"aarch64_override_tune_string" ++ j = var_target_string_j"aarch64_override_tune_string" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev0 >> 16) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("target_flags" in var_target_explicit_mask) { ++ print " ptr->explicit_mask_target_flags = bp_unpack_value (bp, 64);" ++} ++else { ++ print " bp_unpack_value (bp, 64);" ++} ++print "}"; ++print ""; ++ + print "/* free heap memory used by target options */"; + print "void"; + print "cl_target_option_free (struct cl_target_option *ptr ATTRIBUTE_UNUSED)"; +@@ -1266,15 +1517,19 @@ print "}"; + + n_opt_val = 4; + var_opt_val0 = "x_optimize" ++var_opt_val_set"x_optimize" = 1 + var_opt_val_type0 = "char " + var_opt_hash0 = 1; + var_opt_val1 = "x_optimize_size" ++var_opt_val_set"x_optimize_size" = 1 + var_opt_val_type1 = "char " + var_opt_hash1 = 1; + var_opt_val2 = "x_optimize_debug" ++var_opt_val_set"x_optimize_debug" = 1 + var_opt_val_type2 = "char " + var_opt_hash2 = 1; + var_opt_val3 = "x_optimize_fast" ++var_opt_val_set"x_optimize_fast" = 1 + var_opt_val_type3 = "char " + var_opt_hash3 = 1; + for (i = 0; i < n_opts; i++) { +@@ -1291,6 +1546,7 @@ for (i = 0; i < n_opts; i++) { + otype = var_type_struct(flagsi) + var_opt_val_typen_opt_val = otype; + var_opt_valn_opt_val = "x_" name; ++ var_opt_val_set"x_" name = 1; + var_opt_hashn_opt_val = flag_set_p("Optimization", flagsi); + var_opt_initn_opt_val = opt_args("Init", flagsi); + n_opt_val++; +@@ -1415,6 +1671,5792 @@ for (i = 0; i < n_opt_val; i++) { + print " for (size_t i = 0; i < sizeof (ptr->explicit_mask) / sizeof (ptr->explicit_mask0); i++)"; + print " ptr->explicit_maski = bp_unpack_value (bp, 64);"; + print "}"; ++ ++print ""; ++print "/* Stream in optimization options */"; ++print "void"; ++print "cl_optimization_stream_in_prev (struct data_in *data_in ATTRIBUTE_UNUSED,"; ++print " struct bitpack_d *bp ATTRIBUTE_UNUSED,"; ++print " struct cl_optimization *ptr ATTRIBUTE_UNUSED)"; ++print "{"; ++if ("x_optimize" in var_opt_val_set) { ++ print " ptr->x_optimize = (char ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_optimize_size" in var_opt_val_set) { ++ print " ptr->x_optimize_size = (char ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_optimize_debug" in var_opt_val_set) { ++ print " ptr->x_optimize_debug = (char ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_optimize_fast" in var_opt_val_set) { ++ print " ptr->x_optimize_fast = (char ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_param_align_loop_iterations" in var_opt_val_set) { ++ print " ptr->x_param_align_loop_iterations = (int ) bp_unpack_var_len_int (bp);" ++ print " if (4 > (int ) 10)" ++ print " ptr->x_param_align_loop_iterations ^= 4;" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_param_align_threshold" in var_opt_val_set) { ++ print " ptr->x_param_align_threshold = (int ) bp_unpack_var_len_int (bp);" ++ print " if (100 > (int ) 10)" ++ print " ptr->x_param_align_threshold ^= 100;" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_param_asan_protect_allocas" in var_opt_val_set) { ++ print " ptr->x_param_asan_protect_allocas = (int ) bp_unpack_var_len_int (bp);" ++ print " if (1 > (int ) 10)" ++ print " ptr->x_param_asan_protect_allocas ^= 1;" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_param_asan_instrument_reads" in var_opt_val_set) { ++ print " ptr->x_param_asan_instrument_reads = (int ) bp_unpack_var_len_int (bp);" ++ print " if (1 > (int ) 10)" ++ print " ptr->x_param_asan_instrument_reads ^= 1;" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_param_asan_instrument_writes" in var_opt_val_set) { ++ print " ptr->x_param_asan_instrument_writes = (int ) bp_unpack_var_len_int (bp);" ++ print " if (1 > (int ) 10)" ++ print " ptr->x_param_asan_instrument_writes ^= 1;" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_param_asan_instrumentation_with_call_threshold" in var_opt_val_set) { ++ print " ptr->x_param_asan_instrumentation_with_call_threshold = (int ) bp_unpack_var_len_int (bp);" ++ print " if (7000 > (int ) 10)" ++ print " ptr->x_param_asan_instrumentation_with_call_threshold ^= 7000;" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_param_asan_memintrin" in var_opt_val_set) { ++ print " ptr->x_param_asan_memintrin = (int ) bp_unpack_var_len_int (bp);" ++ print " if (1 > (int ) 10)" ++ print " ptr->x_param_asan_memintrin ^= 1;" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_param_asan_stack" in var_opt_val_set) { ++ print " ptr->x_param_asan_stack = (int ) bp_unpack_var_len_int (bp);" ++ print " if (1 > (int ) 10)" ++ print " ptr->x_param_asan_stack ^= 1;" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_param_asan_use_after_return" in var_opt_val_set) { ++ print " ptr->x_param_asan_use_after_return = (int ) bp_unpack_var_len_int (bp);" ++ print " if (1 > (int ) 10)" ++ print " ptr->x_param_asan_use_after_return ^= 1;" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_param_avg_loop_niter" in var_opt_val_set) { ++ print " ptr->x_param_avg_loop_niter = (int ) bp_unpack_var_len_int (bp);" ++ print " if (10 > (int ) 10)" ++ print " ptr->x_param_avg_loop_niter ^= 10;" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_param_avoid_fma_max_bits" in var_opt_val_set) { ++ print " ptr->x_param_avoid_fma_max_bits = (int ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_param_builtin_expect_probability" in var_opt_val_set) { ++ print " ptr->x_param_builtin_expect_probability = (int ) bp_unpack_var_len_int (bp);" ++ print " if (90 > (int ) 10)" ++ print " ptr->x_param_builtin_expect_probability ^= 90;" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_param_builtin_string_cmp_inline_length" in var_opt_val_set) { ++ print " ptr->x_param_builtin_string_cmp_inline_length = (int ) bp_unpack_var_len_int (bp);" ++ print " if (3 > (int ) 10)" ++ print " ptr->x_param_builtin_string_cmp_inline_length ^= 3;" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_param_case_values_threshold" in var_opt_val_set) { ++ print " ptr->x_param_case_values_threshold = (int ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_param_comdat_sharing_probability" in var_opt_val_set) { ++ print " ptr->x_param_comdat_sharing_probability = (int ) bp_unpack_var_len_int (bp);" ++ print " if (20 > (int ) 10)" ++ print " ptr->x_param_comdat_sharing_probability ^= 20;" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_param_pointer_compression_size" in var_opt_val_set) { ++ print " ptr->x_param_pointer_compression_size = (int ) bp_unpack_var_len_int (bp);" ++ print " if (32 > (int ) 10)" ++ print " ptr->x_param_pointer_compression_size ^= 32;" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_param_construct_interfere_size" in var_opt_val_set) { ++ print " ptr->x_param_construct_interfere_size = (int ) bp_unpack_var_len_int (bp);" ++ print " if (0 > (int ) 10)" ++ print " ptr->x_param_construct_interfere_size ^= 0;" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_param_destruct_interfere_size" in var_opt_val_set) { ++ print " ptr->x_param_destruct_interfere_size = (int ) bp_unpack_var_len_int (bp);" ++ print " if (0 > (int ) 10)" ++ print " ptr->x_param_destruct_interfere_size ^= 0;" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_param_dse_max_alias_queries_per_store" in var_opt_val_set) { ++ print " ptr->x_param_dse_max_alias_queries_per_store = (int ) bp_unpack_var_len_int (bp);" ++ print " if (256 > (int ) 10)" ++ print " ptr->x_param_dse_max_alias_queries_per_store ^= 256;" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_param_dse_max_object_size" in var_opt_val_set) { ++ print " ptr->x_param_dse_max_object_size = (int ) bp_unpack_var_len_int (bp);" ++ print " if (256 > (int ) 10)" ++ print " ptr->x_param_dse_max_object_size ^= 256;" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_param_early_inlining_insns" in var_opt_val_set) { ++ print " ptr->x_param_early_inlining_insns = (int ) bp_unpack_var_len_int (bp);" ++ print " if (6 > (int ) 10)" ++ print " ptr->x_param_early_inlining_insns ^= 6;" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_param_evrp_mode" in var_opt_val_set) { ++ print " ptr->x_param_evrp_mode = (enum evrp_mode ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_param_evrp_sparse_threshold" in var_opt_val_set) { ++ print " ptr->x_param_evrp_sparse_threshold = (int ) bp_unpack_var_len_int (bp);" ++ print " if (800 > (int ) 10)" ++ print " ptr->x_param_evrp_sparse_threshold ^= 800;" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_param_evrp_switch_limit" in var_opt_val_set) { ++ print " ptr->x_param_evrp_switch_limit = (int ) bp_unpack_var_len_int (bp);" ++ print " if (50 > (int ) 10)" ++ print " ptr->x_param_evrp_switch_limit ^= 50;" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_param_fsm_scale_path_blocks" in var_opt_val_set) { ++ print " ptr->x_param_fsm_scale_path_blocks = (int ) bp_unpack_var_len_int (bp);" ++ print " if (3 > (int ) 10)" ++ print " ptr->x_param_fsm_scale_path_blocks ^= 3;" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_param_fsm_scale_path_stmts" in var_opt_val_set) { ++ print " ptr->x_param_fsm_scale_path_stmts = (int ) bp_unpack_var_len_int (bp);" ++ print " if (2 > (int ) 10)" ++ print " ptr->x_param_fsm_scale_path_stmts ^= 2;" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_param_gcse_after_reload_critical_fraction" in var_opt_val_set) { ++ print " ptr->x_param_gcse_after_reload_critical_fraction = (int ) bp_unpack_var_len_int (bp);" ++ print " if (10 > (int ) 10)" ++ print " ptr->x_param_gcse_after_reload_critical_fraction ^= 10;" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_param_gcse_after_reload_partial_fraction" in var_opt_val_set) { ++ print " ptr->x_param_gcse_after_reload_partial_fraction = (int ) bp_unpack_var_len_int (bp);" ++ print " if (3 > (int ) 10)" ++ print " ptr->x_param_gcse_after_reload_partial_fraction ^= 3;" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_param_gcse_cost_distance_ratio" in var_opt_val_set) { ++ print " ptr->x_param_gcse_cost_distance_ratio = (int ) bp_unpack_var_len_int (bp);" ++ print " if (10 > (int ) 10)" ++ print " ptr->x_param_gcse_cost_distance_ratio ^= 10;" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_param_gcse_unrestricted_cost" in var_opt_val_set) { ++ print " ptr->x_param_gcse_unrestricted_cost = (int ) bp_unpack_var_len_int (bp);" ++ print " if (3 > (int ) 10)" ++ print " ptr->x_param_gcse_unrestricted_cost ^= 3;" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_param_graphite_max_arrays_per_scop" in var_opt_val_set) { ++ print " ptr->x_param_graphite_max_arrays_per_scop = (int ) bp_unpack_var_len_int (bp);" ++ print " if (100 > (int ) 10)" ++ print " ptr->x_param_graphite_max_arrays_per_scop ^= 100;" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_param_graphite_max_nb_scop_params" in var_opt_val_set) { ++ print " ptr->x_param_graphite_max_nb_scop_params = (int ) bp_unpack_var_len_int (bp);" ++ print " if (10 > (int ) 10)" ++ print " ptr->x_param_graphite_max_nb_scop_params ^= 10;" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_param_hwasan_instrument_allocas" in var_opt_val_set) { ++ print " ptr->x_param_hwasan_instrument_allocas = (int ) bp_unpack_var_len_int (bp);" ++ print " if (1 > (int ) 10)" ++ print " ptr->x_param_hwasan_instrument_allocas ^= 1;" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_param_hwasan_instrument_mem_intrinsics" in var_opt_val_set) { ++ print " ptr->x_param_hwasan_instrument_mem_intrinsics = (int ) bp_unpack_var_len_int (bp);" ++ print " if (1 > (int ) 10)" ++ print " ptr->x_param_hwasan_instrument_mem_intrinsics ^= 1;" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_param_hwasan_instrument_reads" in var_opt_val_set) { ++ print " ptr->x_param_hwasan_instrument_reads = (int ) bp_unpack_var_len_int (bp);" ++ print " if (1 > (int ) 10)" ++ print " ptr->x_param_hwasan_instrument_reads ^= 1;" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_param_hwasan_instrument_stack" in var_opt_val_set) { ++ print " ptr->x_param_hwasan_instrument_stack = (int ) bp_unpack_var_len_int (bp);" ++ print " if (1 > (int ) 10)" ++ print " ptr->x_param_hwasan_instrument_stack ^= 1;" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_param_hwasan_instrument_writes" in var_opt_val_set) { ++ print " ptr->x_param_hwasan_instrument_writes = (int ) bp_unpack_var_len_int (bp);" ++ print " if (1 > (int ) 10)" ++ print " ptr->x_param_hwasan_instrument_writes ^= 1;" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_param_hwasan_random_frame_tag" in var_opt_val_set) { ++ print " ptr->x_param_hwasan_random_frame_tag = (int ) bp_unpack_var_len_int (bp);" ++ print " if (1 > (int ) 10)" ++ print " ptr->x_param_hwasan_random_frame_tag ^= 1;" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_param_ifcvt_allow_register_renaming" in var_opt_val_set) { ++ print " ptr->x_param_ifcvt_allow_register_renaming = (int ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_param_inline_heuristics_hint_percent" in var_opt_val_set) { ++ print " ptr->x_param_inline_heuristics_hint_percent = (int ) bp_unpack_var_len_int (bp);" ++ print " if (200 > (int ) 10)" ++ print " ptr->x_param_inline_heuristics_hint_percent ^= 200;" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_param_inline_min_speedup" in var_opt_val_set) { ++ print " ptr->x_param_inline_min_speedup = (int ) bp_unpack_var_len_int (bp);" ++ print " if (30 > (int ) 10)" ++ print " ptr->x_param_inline_min_speedup ^= 30;" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_param_inline_unit_growth" in var_opt_val_set) { ++ print " ptr->x_param_inline_unit_growth = (int ) bp_unpack_var_len_int (bp);" ++ print " if (40 > (int ) 10)" ++ print " ptr->x_param_inline_unit_growth ^= 40;" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_param_ipa_cp_eval_threshold" in var_opt_val_set) { ++ print " ptr->x_param_ipa_cp_eval_threshold = (int ) bp_unpack_var_len_int (bp);" ++ print " if (500 > (int ) 10)" ++ print " ptr->x_param_ipa_cp_eval_threshold ^= 500;" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_param_ipa_cp_large_unit_insns" in var_opt_val_set) { ++ print " ptr->x_param_ipa_cp_large_unit_insns = (int ) bp_unpack_var_len_int (bp);" ++ print " if (16000 > (int ) 10)" ++ print " ptr->x_param_ipa_cp_large_unit_insns ^= 16000;" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_param_ipa_cp_loop_hint_bonus" in var_opt_val_set) { ++ print " ptr->x_param_ipa_cp_loop_hint_bonus = (int ) bp_unpack_var_len_int (bp);" ++ print " if (64 > (int ) 10)" ++ print " ptr->x_param_ipa_cp_loop_hint_bonus ^= 64;" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_param_ipa_cp_max_recursive_depth" in var_opt_val_set) { ++ print " ptr->x_param_ipa_cp_max_recursive_depth = (int ) bp_unpack_var_len_int (bp);" ++ print " if (8 > (int ) 10)" ++ print " ptr->x_param_ipa_cp_max_recursive_depth ^= 8;" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_param_ipa_cp_min_recursive_probability" in var_opt_val_set) { ++ print " ptr->x_param_ipa_cp_min_recursive_probability = (int ) bp_unpack_var_len_int (bp);" ++ print " if (2 > (int ) 10)" ++ print " ptr->x_param_ipa_cp_min_recursive_probability ^= 2;" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_param_ipa_cp_profile_count_base" in var_opt_val_set) { ++ print " ptr->x_param_ipa_cp_profile_count_base = (int ) bp_unpack_var_len_int (bp);" ++ print " if (10 > (int ) 10)" ++ print " ptr->x_param_ipa_cp_profile_count_base ^= 10;" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_param_ipa_cp_recursion_penalty" in var_opt_val_set) { ++ print " ptr->x_param_ipa_cp_recursion_penalty = (int ) bp_unpack_var_len_int (bp);" ++ print " if (40 > (int ) 10)" ++ print " ptr->x_param_ipa_cp_recursion_penalty ^= 40;" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_param_ipa_cp_recursive_freq_factor" in var_opt_val_set) { ++ print " ptr->x_param_ipa_cp_recursive_freq_factor = (int ) bp_unpack_var_len_int (bp);" ++ print " if (6 > (int ) 10)" ++ print " ptr->x_param_ipa_cp_recursive_freq_factor ^= 6;" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_param_ipa_cp_single_call_penalty" in var_opt_val_set) { ++ print " ptr->x_param_ipa_cp_single_call_penalty = (int ) bp_unpack_var_len_int (bp);" ++ print " if (15 > (int ) 10)" ++ print " ptr->x_param_ipa_cp_single_call_penalty ^= 15;" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_param_ipa_cp_unit_growth" in var_opt_val_set) { ++ print " ptr->x_param_ipa_cp_unit_growth = (int ) bp_unpack_var_len_int (bp);" ++ print " if (10 > (int ) 10)" ++ print " ptr->x_param_ipa_cp_unit_growth ^= 10;" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_param_ipa_cp_value_list_size" in var_opt_val_set) { ++ print " ptr->x_param_ipa_cp_value_list_size = (int ) bp_unpack_var_len_int (bp);" ++ print " if (8 > (int ) 10)" ++ print " ptr->x_param_ipa_cp_value_list_size ^= 8;" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_param_ipa_jump_function_lookups" in var_opt_val_set) { ++ print " ptr->x_param_ipa_jump_function_lookups = (int ) bp_unpack_var_len_int (bp);" ++ print " if (8 > (int ) 10)" ++ print " ptr->x_param_ipa_jump_function_lookups ^= 8;" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_param_ipa_max_aa_steps" in var_opt_val_set) { ++ print " ptr->x_param_ipa_max_aa_steps = (int ) bp_unpack_var_len_int (bp);" ++ print " if (25000 > (int ) 10)" ++ print " ptr->x_param_ipa_max_aa_steps ^= 25000;" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_param_ipa_max_agg_items" in var_opt_val_set) { ++ print " ptr->x_param_ipa_max_agg_items = (int ) bp_unpack_var_len_int (bp);" ++ print " if (16 > (int ) 10)" ++ print " ptr->x_param_ipa_max_agg_items ^= 16;" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_param_ipa_max_loop_predicates" in var_opt_val_set) { ++ print " ptr->x_param_ipa_max_loop_predicates = (int ) bp_unpack_var_len_int (bp);" ++ print " if (16 > (int ) 10)" ++ print " ptr->x_param_ipa_max_loop_predicates ^= 16;" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_param_ipa_max_param_expr_ops" in var_opt_val_set) { ++ print " ptr->x_param_ipa_max_param_expr_ops = (int ) bp_unpack_var_len_int (bp);" ++ print " if (10 > (int ) 10)" ++ print " ptr->x_param_ipa_max_param_expr_ops ^= 10;" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_param_ipa_max_switch_predicate_bounds" in var_opt_val_set) { ++ print " ptr->x_param_ipa_max_switch_predicate_bounds = (int ) bp_unpack_var_len_int (bp);" ++ print " if (5 > (int ) 10)" ++ print " ptr->x_param_ipa_max_switch_predicate_bounds ^= 5;" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_param_ipa_prefetch_distance_factor" in var_opt_val_set) { ++ print " ptr->x_param_ipa_prefetch_distance_factor = (int ) bp_unpack_var_len_int (bp);" ++ print " if (4 > (int ) 10)" ++ print " ptr->x_param_ipa_prefetch_distance_factor ^= 4;" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_param_ipa_prefetch_locality" in var_opt_val_set) { ++ print " ptr->x_param_ipa_prefetch_locality = (int ) bp_unpack_var_len_int (bp);" ++ print " if (3 > (int ) 10)" ++ print " ptr->x_param_ipa_prefetch_locality ^= 3;" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_param_ipa_prefetch_pagesize" in var_opt_val_set) { ++ print " ptr->x_param_ipa_prefetch_pagesize = (int ) bp_unpack_var_len_int (bp);" ++ print " if (4096 > (int ) 10)" ++ print " ptr->x_param_ipa_prefetch_pagesize ^= 4096;" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_param_ipa_sra_max_replacements" in var_opt_val_set) { ++ print " ptr->x_param_ipa_sra_max_replacements = (int ) bp_unpack_var_len_int (bp);" ++ print " if (8 > (int ) 10)" ++ print " ptr->x_param_ipa_sra_max_replacements ^= 8;" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_param_ipa_sra_ptr_growth_factor" in var_opt_val_set) { ++ print " ptr->x_param_ipa_sra_ptr_growth_factor = (int ) bp_unpack_var_len_int (bp);" ++ print " if (2 > (int ) 10)" ++ print " ptr->x_param_ipa_sra_ptr_growth_factor ^= 2;" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_param_ira_consider_dup_in_all_alts" in var_opt_val_set) { ++ print " ptr->x_param_ira_consider_dup_in_all_alts = (int ) bp_unpack_var_len_int (bp);" ++ print " if (1 > (int ) 10)" ++ print " ptr->x_param_ira_consider_dup_in_all_alts ^= 1;" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_param_ira_loop_reserved_regs" in var_opt_val_set) { ++ print " ptr->x_param_ira_loop_reserved_regs = (int ) bp_unpack_var_len_int (bp);" ++ print " if (2 > (int ) 10)" ++ print " ptr->x_param_ira_loop_reserved_regs ^= 2;" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_param_ira_max_conflict_table_size" in var_opt_val_set) { ++ print " ptr->x_param_ira_max_conflict_table_size = (int ) bp_unpack_var_len_int (bp);" ++ print " if (1000 > (int ) 10)" ++ print " ptr->x_param_ira_max_conflict_table_size ^= 1000;" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_param_ira_max_loops_num" in var_opt_val_set) { ++ print " ptr->x_param_ira_max_loops_num = (int ) bp_unpack_var_len_int (bp);" ++ print " if (100 > (int ) 10)" ++ print " ptr->x_param_ira_max_loops_num ^= 100;" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_param_iv_always_prune_cand_set_bound" in var_opt_val_set) { ++ print " ptr->x_param_iv_always_prune_cand_set_bound = (int ) bp_unpack_var_len_int (bp);" ++ print " if (10 > (int ) 10)" ++ print " ptr->x_param_iv_always_prune_cand_set_bound ^= 10;" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_param_iv_consider_all_candidates_bound" in var_opt_val_set) { ++ print " ptr->x_param_iv_consider_all_candidates_bound = (int ) bp_unpack_var_len_int (bp);" ++ print " if (40 > (int ) 10)" ++ print " ptr->x_param_iv_consider_all_candidates_bound ^= 40;" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_param_iv_max_considered_uses" in var_opt_val_set) { ++ print " ptr->x_param_iv_max_considered_uses = (int ) bp_unpack_var_len_int (bp);" ++ print " if (250 > (int ) 10)" ++ print " ptr->x_param_iv_max_considered_uses ^= 250;" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_param_jump_table_max_growth_ratio_for_size" in var_opt_val_set) { ++ print " ptr->x_param_jump_table_max_growth_ratio_for_size = (int ) bp_unpack_var_len_int (bp);" ++ print " if (300 > (int ) 10)" ++ print " ptr->x_param_jump_table_max_growth_ratio_for_size ^= 300;" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_param_jump_table_max_growth_ratio_for_speed" in var_opt_val_set) { ++ print " ptr->x_param_jump_table_max_growth_ratio_for_speed = (int ) bp_unpack_var_len_int (bp);" ++ print " if (800 > (int ) 10)" ++ print " ptr->x_param_jump_table_max_growth_ratio_for_speed ^= 800;" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_param_l1_cache_line_size" in var_opt_val_set) { ++ print " ptr->x_param_l1_cache_line_size = (int ) bp_unpack_var_len_int (bp);" ++ print " if (32 > (int ) 10)" ++ print " ptr->x_param_l1_cache_line_size ^= 32;" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_param_l1_cache_size" in var_opt_val_set) { ++ print " ptr->x_param_l1_cache_size = (int ) bp_unpack_var_len_int (bp);" ++ print " if (64 > (int ) 10)" ++ print " ptr->x_param_l1_cache_size ^= 64;" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_param_l2_cache_size" in var_opt_val_set) { ++ print " ptr->x_param_l2_cache_size = (int ) bp_unpack_var_len_int (bp);" ++ print " if (512 > (int ) 10)" ++ print " ptr->x_param_l2_cache_size ^= 512;" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_param_large_function_growth" in var_opt_val_set) { ++ print " ptr->x_param_large_function_growth = (int ) bp_unpack_var_len_int (bp);" ++ print " if (100 > (int ) 10)" ++ print " ptr->x_param_large_function_growth ^= 100;" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_param_large_function_insns" in var_opt_val_set) { ++ print " ptr->x_param_large_function_insns = (int ) bp_unpack_var_len_int (bp);" ++ print " if (2700 > (int ) 10)" ++ print " ptr->x_param_large_function_insns ^= 2700;" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_param_stack_frame_growth" in var_opt_val_set) { ++ print " ptr->x_param_stack_frame_growth = (int ) bp_unpack_var_len_int (bp);" ++ print " if (1000 > (int ) 10)" ++ print " ptr->x_param_stack_frame_growth ^= 1000;" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_param_large_stack_frame" in var_opt_val_set) { ++ print " ptr->x_param_large_stack_frame = (int ) bp_unpack_var_len_int (bp);" ++ print " if (256 > (int ) 10)" ++ print " ptr->x_param_large_stack_frame ^= 256;" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_param_large_unit_insns" in var_opt_val_set) { ++ print " ptr->x_param_large_unit_insns = (int ) bp_unpack_var_len_int (bp);" ++ print " if (10000 > (int ) 10)" ++ print " ptr->x_param_large_unit_insns ^= 10000;" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_param_lim_expensive" in var_opt_val_set) { ++ print " ptr->x_param_lim_expensive = (int ) bp_unpack_var_len_int (bp);" ++ print " if (20 > (int ) 10)" ++ print " ptr->x_param_lim_expensive ^= 20;" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_param_loop_block_tile_size" in var_opt_val_set) { ++ print " ptr->x_param_loop_block_tile_size = (int ) bp_unpack_var_len_int (bp);" ++ print " if (51 > (int ) 10)" ++ print " ptr->x_param_loop_block_tile_size ^= 51;" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_param_loop_interchange_max_num_stmts" in var_opt_val_set) { ++ print " ptr->x_param_loop_interchange_max_num_stmts = (int ) bp_unpack_var_len_int (bp);" ++ print " if (64 > (int ) 10)" ++ print " ptr->x_param_loop_interchange_max_num_stmts ^= 64;" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_param_loop_interchange_stride_ratio" in var_opt_val_set) { ++ print " ptr->x_param_loop_interchange_stride_ratio = (int ) bp_unpack_var_len_int (bp);" ++ print " if (2 > (int ) 10)" ++ print " ptr->x_param_loop_interchange_stride_ratio ^= 2;" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_param_loop_invariant_max_bbs_in_loop" in var_opt_val_set) { ++ print " ptr->x_param_loop_invariant_max_bbs_in_loop = (int ) bp_unpack_var_len_int (bp);" ++ print " if (10000 > (int ) 10)" ++ print " ptr->x_param_loop_invariant_max_bbs_in_loop ^= 10000;" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_param_loop_max_datarefs_for_datadeps" in var_opt_val_set) { ++ print " ptr->x_param_loop_max_datarefs_for_datadeps = (int ) bp_unpack_var_len_int (bp);" ++ print " if (1000 > (int ) 10)" ++ print " ptr->x_param_loop_max_datarefs_for_datadeps ^= 1000;" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_param_loop_versioning_max_inner_insns" in var_opt_val_set) { ++ print " ptr->x_param_loop_versioning_max_inner_insns = (int ) bp_unpack_var_len_int (bp);" ++ print " if (200 > (int ) 10)" ++ print " ptr->x_param_loop_versioning_max_inner_insns ^= 200;" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_param_loop_versioning_max_outer_insns" in var_opt_val_set) { ++ print " ptr->x_param_loop_versioning_max_outer_insns = (int ) bp_unpack_var_len_int (bp);" ++ print " if (100 > (int ) 10)" ++ print " ptr->x_param_loop_versioning_max_outer_insns ^= 100;" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_param_lra_inheritance_ebb_probability_cutoff" in var_opt_val_set) { ++ print " ptr->x_param_lra_inheritance_ebb_probability_cutoff = (int ) bp_unpack_var_len_int (bp);" ++ print " if (40 > (int ) 10)" ++ print " ptr->x_param_lra_inheritance_ebb_probability_cutoff ^= 40;" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_param_lra_max_considered_reload_pseudos" in var_opt_val_set) { ++ print " ptr->x_param_lra_max_considered_reload_pseudos = (int ) bp_unpack_var_len_int (bp);" ++ print " if (500 > (int ) 10)" ++ print " ptr->x_param_lra_max_considered_reload_pseudos ^= 500;" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_param_max_average_unrolled_insns" in var_opt_val_set) { ++ print " ptr->x_param_max_average_unrolled_insns = (int ) bp_unpack_var_len_int (bp);" ++ print " if (80 > (int ) 10)" ++ print " ptr->x_param_max_average_unrolled_insns ^= 80;" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_param_max_combine_insns" in var_opt_val_set) { ++ print " ptr->x_param_max_combine_insns = (int ) bp_unpack_var_len_int (bp);" ++ print " if (4 > (int ) 10)" ++ print " ptr->x_param_max_combine_insns ^= 4;" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_param_max_unroll_iterations" in var_opt_val_set) { ++ print " ptr->x_param_max_unroll_iterations = (int ) bp_unpack_var_len_int (bp);" ++ print " if (8 > (int ) 10)" ++ print " ptr->x_param_max_unroll_iterations ^= 8;" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_param_max_completely_peel_times" in var_opt_val_set) { ++ print " ptr->x_param_max_completely_peel_times = (int ) bp_unpack_var_len_int (bp);" ++ print " if (16 > (int ) 10)" ++ print " ptr->x_param_max_completely_peel_times ^= 16;" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_param_max_completely_peeled_insns" in var_opt_val_set) { ++ print " ptr->x_param_max_completely_peeled_insns = (int ) bp_unpack_var_len_int (bp);" ++ print " if (200 > (int ) 10)" ++ print " ptr->x_param_max_completely_peeled_insns ^= 200;" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_param_max_crossjump_edges" in var_opt_val_set) { ++ print " ptr->x_param_max_crossjump_edges = (int ) bp_unpack_var_len_int (bp);" ++ print " if (100 > (int ) 10)" ++ print " ptr->x_param_max_crossjump_edges ^= 100;" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_param_max_cse_insns" in var_opt_val_set) { ++ print " ptr->x_param_max_cse_insns = (int ) bp_unpack_var_len_int (bp);" ++ print " if (1000 > (int ) 10)" ++ print " ptr->x_param_max_cse_insns ^= 1000;" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_param_max_cse_path_length" in var_opt_val_set) { ++ print " ptr->x_param_max_cse_path_length = (int ) bp_unpack_var_len_int (bp);" ++ print " if (10 > (int ) 10)" ++ print " ptr->x_param_max_cse_path_length ^= 10;" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_param_max_cselib_memory_locations" in var_opt_val_set) { ++ print " ptr->x_param_max_cselib_memory_locations = (int ) bp_unpack_var_len_int (bp);" ++ print " if (500 > (int ) 10)" ++ print " ptr->x_param_max_cselib_memory_locations ^= 500;" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_param_max_debug_marker_count" in var_opt_val_set) { ++ print " ptr->x_param_max_debug_marker_count = (int ) bp_unpack_var_len_int (bp);" ++ print " if (100000 > (int ) 10)" ++ print " ptr->x_param_max_debug_marker_count ^= 100000;" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_param_max_delay_slot_insn_search" in var_opt_val_set) { ++ print " ptr->x_param_max_delay_slot_insn_search = (int ) bp_unpack_var_len_int (bp);" ++ print " if (100 > (int ) 10)" ++ print " ptr->x_param_max_delay_slot_insn_search ^= 100;" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_param_max_delay_slot_live_search" in var_opt_val_set) { ++ print " ptr->x_param_max_delay_slot_live_search = (int ) bp_unpack_var_len_int (bp);" ++ print " if (333 > (int ) 10)" ++ print " ptr->x_param_max_delay_slot_live_search ^= 333;" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_param_max_dse_active_local_stores" in var_opt_val_set) { ++ print " ptr->x_param_max_dse_active_local_stores = (int ) bp_unpack_var_len_int (bp);" ++ print " if (5000 > (int ) 10)" ++ print " ptr->x_param_max_dse_active_local_stores ^= 5000;" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_param_early_inliner_max_iterations" in var_opt_val_set) { ++ print " ptr->x_param_early_inliner_max_iterations = (int ) bp_unpack_var_len_int (bp);" ++ print " if (1 > (int ) 10)" ++ print " ptr->x_param_early_inliner_max_iterations ^= 1;" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_param_max_find_base_term_values" in var_opt_val_set) { ++ print " ptr->x_param_max_find_base_term_values = (int ) bp_unpack_var_len_int (bp);" ++ print " if (200 > (int ) 10)" ++ print " ptr->x_param_max_find_base_term_values ^= 200;" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_param_max_fsm_thread_length" in var_opt_val_set) { ++ print " ptr->x_param_max_fsm_thread_length = (int ) bp_unpack_var_len_int (bp);" ++ print " if (10 > (int ) 10)" ++ print " ptr->x_param_max_fsm_thread_length ^= 10;" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_param_max_fsm_thread_path_insns" in var_opt_val_set) { ++ print " ptr->x_param_max_fsm_thread_path_insns = (int ) bp_unpack_var_len_int (bp);" ++ print " if (100 > (int ) 10)" ++ print " ptr->x_param_max_fsm_thread_path_insns ^= 100;" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_param_max_gcse_insertion_ratio" in var_opt_val_set) { ++ print " ptr->x_param_max_gcse_insertion_ratio = (int ) bp_unpack_var_len_int (bp);" ++ print " if (20 > (int ) 10)" ++ print " ptr->x_param_max_gcse_insertion_ratio ^= 20;" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_param_max_gcse_memory" in var_opt_val_set) { ++ print " ptr->x_param_max_gcse_memory = (int ) bp_unpack_var_len_int (bp);" ++ print " if (131072 > (int ) 10)" ++ print " ptr->x_param_max_gcse_memory ^= 131072;" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_param_max_goto_duplication_insns" in var_opt_val_set) { ++ print " ptr->x_param_max_goto_duplication_insns = (int ) bp_unpack_var_len_int (bp);" ++ print " if (8 > (int ) 10)" ++ print " ptr->x_param_max_goto_duplication_insns ^= 8;" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_param_max_grow_copy_bb_insns" in var_opt_val_set) { ++ print " ptr->x_param_max_grow_copy_bb_insns = (int ) bp_unpack_var_len_int (bp);" ++ print " if (8 > (int ) 10)" ++ print " ptr->x_param_max_grow_copy_bb_insns ^= 8;" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_param_max_hoist_depth" in var_opt_val_set) { ++ print " ptr->x_param_max_hoist_depth = (int ) bp_unpack_var_len_int (bp);" ++ print " if (30 > (int ) 10)" ++ print " ptr->x_param_max_hoist_depth ^= 30;" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_param_inline_functions_called_once_insns" in var_opt_val_set) { ++ print " ptr->x_param_inline_functions_called_once_insns = (int ) bp_unpack_var_len_int (bp);" ++ print " if (4000 > (int ) 10)" ++ print " ptr->x_param_inline_functions_called_once_insns ^= 4000;" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_param_inline_functions_called_once_loop_depth" in var_opt_val_set) { ++ print " ptr->x_param_inline_functions_called_once_loop_depth = (int ) bp_unpack_var_len_int (bp);" ++ print " if (6 > (int ) 10)" ++ print " ptr->x_param_inline_functions_called_once_loop_depth ^= 6;" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_param_max_inline_insns_auto" in var_opt_val_set) { ++ print " ptr->x_param_max_inline_insns_auto = (int ) bp_unpack_var_len_int (bp);" ++ print " if (15 > (int ) 10)" ++ print " ptr->x_param_max_inline_insns_auto ^= 15;" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_param_max_inline_insns_recursive_auto" in var_opt_val_set) { ++ print " ptr->x_param_max_inline_insns_recursive_auto = (int ) bp_unpack_var_len_int (bp);" ++ print " if (450 > (int ) 10)" ++ print " ptr->x_param_max_inline_insns_recursive_auto ^= 450;" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_param_max_inline_insns_recursive" in var_opt_val_set) { ++ print " ptr->x_param_max_inline_insns_recursive = (int ) bp_unpack_var_len_int (bp);" ++ print " if (450 > (int ) 10)" ++ print " ptr->x_param_max_inline_insns_recursive ^= 450;" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_param_max_inline_insns_single" in var_opt_val_set) { ++ print " ptr->x_param_max_inline_insns_single = (int ) bp_unpack_var_len_int (bp);" ++ print " if (70 > (int ) 10)" ++ print " ptr->x_param_max_inline_insns_single ^= 70;" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_param_max_inline_insns_size" in var_opt_val_set) { ++ print " ptr->x_param_max_inline_insns_size = (int ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_param_max_inline_insns_small" in var_opt_val_set) { ++ print " ptr->x_param_max_inline_insns_small = (int ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_param_max_inline_recursive_depth_auto" in var_opt_val_set) { ++ print " ptr->x_param_max_inline_recursive_depth_auto = (int ) bp_unpack_var_len_int (bp);" ++ print " if (8 > (int ) 10)" ++ print " ptr->x_param_max_inline_recursive_depth_auto ^= 8;" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_param_max_inline_recursive_depth" in var_opt_val_set) { ++ print " ptr->x_param_max_inline_recursive_depth = (int ) bp_unpack_var_len_int (bp);" ++ print " if (8 > (int ) 10)" ++ print " ptr->x_param_max_inline_recursive_depth ^= 8;" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_param_max_isl_operations" in var_opt_val_set) { ++ print " ptr->x_param_max_isl_operations = (int ) bp_unpack_var_len_int (bp);" ++ print " if (350000 > (int ) 10)" ++ print " ptr->x_param_max_isl_operations ^= 350000;" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_param_max_iterations_computation_cost" in var_opt_val_set) { ++ print " ptr->x_param_max_iterations_computation_cost = (int ) bp_unpack_var_len_int (bp);" ++ print " if (10 > (int ) 10)" ++ print " ptr->x_param_max_iterations_computation_cost ^= 10;" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_param_max_iterations_to_track" in var_opt_val_set) { ++ print " ptr->x_param_max_iterations_to_track = (int ) bp_unpack_var_len_int (bp);" ++ print " if (1000 > (int ) 10)" ++ print " ptr->x_param_max_iterations_to_track ^= 1000;" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_param_max_jump_thread_duplication_stmts" in var_opt_val_set) { ++ print " ptr->x_param_max_jump_thread_duplication_stmts = (int ) bp_unpack_var_len_int (bp);" ++ print " if (15 > (int ) 10)" ++ print " ptr->x_param_max_jump_thread_duplication_stmts ^= 15;" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_param_max_last_value_rtl" in var_opt_val_set) { ++ print " ptr->x_param_max_last_value_rtl = (int ) bp_unpack_var_len_int (bp);" ++ print " if (10000 > (int ) 10)" ++ print " ptr->x_param_max_last_value_rtl ^= 10000;" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_param_max_loop_header_insns" in var_opt_val_set) { ++ print " ptr->x_param_max_loop_header_insns = (int ) bp_unpack_var_len_int (bp);" ++ print " if (20 > (int ) 10)" ++ print " ptr->x_param_max_loop_header_insns ^= 20;" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_param_max_modulo_backtrack_attempts" in var_opt_val_set) { ++ print " ptr->x_param_max_modulo_backtrack_attempts = (int ) bp_unpack_var_len_int (bp);" ++ print " if (40 > (int ) 10)" ++ print " ptr->x_param_max_modulo_backtrack_attempts ^= 40;" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_param_max_partial_antic_length" in var_opt_val_set) { ++ print " ptr->x_param_max_partial_antic_length = (int ) bp_unpack_var_len_int (bp);" ++ print " if (100 > (int ) 10)" ++ print " ptr->x_param_max_partial_antic_length ^= 100;" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_param_max_peel_branches" in var_opt_val_set) { ++ print " ptr->x_param_max_peel_branches = (int ) bp_unpack_var_len_int (bp);" ++ print " if (32 > (int ) 10)" ++ print " ptr->x_param_max_peel_branches ^= 32;" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_param_max_peel_times" in var_opt_val_set) { ++ print " ptr->x_param_max_peel_times = (int ) bp_unpack_var_len_int (bp);" ++ print " if (16 > (int ) 10)" ++ print " ptr->x_param_max_peel_times ^= 16;" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_param_max_peeled_insns" in var_opt_val_set) { ++ print " ptr->x_param_max_peeled_insns = (int ) bp_unpack_var_len_int (bp);" ++ print " if (100 > (int ) 10)" ++ print " ptr->x_param_max_peeled_insns ^= 100;" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_param_max_pending_list_length" in var_opt_val_set) { ++ print " ptr->x_param_max_pending_list_length = (int ) bp_unpack_var_len_int (bp);" ++ print " if (32 > (int ) 10)" ++ print " ptr->x_param_max_pending_list_length ^= 32;" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_param_max_pipeline_region_blocks" in var_opt_val_set) { ++ print " ptr->x_param_max_pipeline_region_blocks = (int ) bp_unpack_var_len_int (bp);" ++ print " if (15 > (int ) 10)" ++ print " ptr->x_param_max_pipeline_region_blocks ^= 15;" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_param_max_pipeline_region_insns" in var_opt_val_set) { ++ print " ptr->x_param_max_pipeline_region_insns = (int ) bp_unpack_var_len_int (bp);" ++ print " if (200 > (int ) 10)" ++ print " ptr->x_param_max_pipeline_region_insns ^= 200;" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_param_max_pow_sqrt_depth" in var_opt_val_set) { ++ print " ptr->x_param_max_pow_sqrt_depth = (int ) bp_unpack_var_len_int (bp);" ++ print " if (5 > (int ) 10)" ++ print " ptr->x_param_max_pow_sqrt_depth ^= 5;" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_param_max_predicted_iterations" in var_opt_val_set) { ++ print " ptr->x_param_max_predicted_iterations = (int ) bp_unpack_var_len_int (bp);" ++ print " if (100 > (int ) 10)" ++ print " ptr->x_param_max_predicted_iterations ^= 100;" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_param_max_reload_search_insns" in var_opt_val_set) { ++ print " ptr->x_param_max_reload_search_insns = (int ) bp_unpack_var_len_int (bp);" ++ print " if (100 > (int ) 10)" ++ print " ptr->x_param_max_reload_search_insns ^= 100;" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_param_max_rtl_if_conversion_insns" in var_opt_val_set) { ++ print " ptr->x_param_max_rtl_if_conversion_insns = (int ) bp_unpack_var_len_int (bp);" ++ print " if (10 > (int ) 10)" ++ print " ptr->x_param_max_rtl_if_conversion_insns ^= 10;" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_param_max_rtl_if_conversion_predictable_cost" in var_opt_val_set) { ++ print " ptr->x_param_max_rtl_if_conversion_predictable_cost = (int ) bp_unpack_var_len_int (bp);" ++ print " if (20 > (int ) 10)" ++ print " ptr->x_param_max_rtl_if_conversion_predictable_cost ^= 20;" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_param_max_rtl_if_conversion_unpredictable_cost" in var_opt_val_set) { ++ print " ptr->x_param_max_rtl_if_conversion_unpredictable_cost = (int ) bp_unpack_var_len_int (bp);" ++ print " if (40 > (int ) 10)" ++ print " ptr->x_param_max_rtl_if_conversion_unpredictable_cost ^= 40;" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_param_max_sched_extend_regions_iters" in var_opt_val_set) { ++ print " ptr->x_param_max_sched_extend_regions_iters = (int ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_param_max_sched_insn_conflict_delay" in var_opt_val_set) { ++ print " ptr->x_param_max_sched_insn_conflict_delay = (int ) bp_unpack_var_len_int (bp);" ++ print " if (3 > (int ) 10)" ++ print " ptr->x_param_max_sched_insn_conflict_delay ^= 3;" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_param_max_sched_ready_insns" in var_opt_val_set) { ++ print " ptr->x_param_max_sched_ready_insns = (int ) bp_unpack_var_len_int (bp);" ++ print " if (100 > (int ) 10)" ++ print " ptr->x_param_max_sched_ready_insns ^= 100;" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_param_max_sched_region_blocks" in var_opt_val_set) { ++ print " ptr->x_param_max_sched_region_blocks = (int ) bp_unpack_var_len_int (bp);" ++ print " if (10 > (int ) 10)" ++ print " ptr->x_param_max_sched_region_blocks ^= 10;" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_param_max_sched_region_insns" in var_opt_val_set) { ++ print " ptr->x_param_max_sched_region_insns = (int ) bp_unpack_var_len_int (bp);" ++ print " if (100 > (int ) 10)" ++ print " ptr->x_param_max_sched_region_insns ^= 100;" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_param_max_slsr_candidate_scan" in var_opt_val_set) { ++ print " ptr->x_param_max_slsr_candidate_scan = (int ) bp_unpack_var_len_int (bp);" ++ print " if (50 > (int ) 10)" ++ print " ptr->x_param_max_slsr_candidate_scan ^= 50;" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_param_max_speculative_devirt_maydefs" in var_opt_val_set) { ++ print " ptr->x_param_max_speculative_devirt_maydefs = (int ) bp_unpack_var_len_int (bp);" ++ print " if (50 > (int ) 10)" ++ print " ptr->x_param_max_speculative_devirt_maydefs ^= 50;" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_param_max_stores_to_merge" in var_opt_val_set) { ++ print " ptr->x_param_max_stores_to_merge = (int ) bp_unpack_var_len_int (bp);" ++ print " if (64 > (int ) 10)" ++ print " ptr->x_param_max_stores_to_merge ^= 64;" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_param_max_stores_to_sink" in var_opt_val_set) { ++ print " ptr->x_param_max_stores_to_sink = (int ) bp_unpack_var_len_int (bp);" ++ print " if (2 > (int ) 10)" ++ print " ptr->x_param_max_stores_to_sink ^= 2;" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_param_max_tail_merge_comparisons" in var_opt_val_set) { ++ print " ptr->x_param_max_tail_merge_comparisons = (int ) bp_unpack_var_len_int (bp);" ++ print " if (10 > (int ) 10)" ++ print " ptr->x_param_max_tail_merge_comparisons ^= 10;" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_param_max_tail_merge_iterations" in var_opt_val_set) { ++ print " ptr->x_param_max_tail_merge_iterations = (int ) bp_unpack_var_len_int (bp);" ++ print " if (2 > (int ) 10)" ++ print " ptr->x_param_max_tail_merge_iterations ^= 2;" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_param_max_tracked_strlens" in var_opt_val_set) { ++ print " ptr->x_param_max_tracked_strlens = (int ) bp_unpack_var_len_int (bp);" ++ print " if (10000 > (int ) 10)" ++ print " ptr->x_param_max_tracked_strlens ^= 10000;" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_param_max_tree_if_conversion_phi_args" in var_opt_val_set) { ++ print " ptr->x_param_max_tree_if_conversion_phi_args = (int ) bp_unpack_var_len_int (bp);" ++ print " if (4 > (int ) 10)" ++ print " ptr->x_param_max_tree_if_conversion_phi_args ^= 4;" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_param_max_unroll_times" in var_opt_val_set) { ++ print " ptr->x_param_max_unroll_times = (int ) bp_unpack_var_len_int (bp);" ++ print " if (8 > (int ) 10)" ++ print " ptr->x_param_max_unroll_times ^= 8;" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_param_max_unrolled_insns" in var_opt_val_set) { ++ print " ptr->x_param_max_unrolled_insns = (int ) bp_unpack_var_len_int (bp);" ++ print " if (200 > (int ) 10)" ++ print " ptr->x_param_max_unrolled_insns ^= 200;" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_param_max_unswitch_insns" in var_opt_val_set) { ++ print " ptr->x_param_max_unswitch_insns = (int ) bp_unpack_var_len_int (bp);" ++ print " if (50 > (int ) 10)" ++ print " ptr->x_param_max_unswitch_insns ^= 50;" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_param_max_unswitch_level" in var_opt_val_set) { ++ print " ptr->x_param_max_unswitch_level = (int ) bp_unpack_var_len_int (bp);" ++ print " if (3 > (int ) 10)" ++ print " ptr->x_param_max_unswitch_level ^= 3;" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_param_max_variable_expansions" in var_opt_val_set) { ++ print " ptr->x_param_max_variable_expansions = (int ) bp_unpack_var_len_int (bp);" ++ print " if (1 > (int ) 10)" ++ print " ptr->x_param_max_variable_expansions ^= 1;" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_param_max_vartrack_expr_depth" in var_opt_val_set) { ++ print " ptr->x_param_max_vartrack_expr_depth = (int ) bp_unpack_var_len_int (bp);" ++ print " if (12 > (int ) 10)" ++ print " ptr->x_param_max_vartrack_expr_depth ^= 12;" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_param_max_vartrack_reverse_op_size" in var_opt_val_set) { ++ print " ptr->x_param_max_vartrack_reverse_op_size = (int ) bp_unpack_var_len_int (bp);" ++ print " if (50 > (int ) 10)" ++ print " ptr->x_param_max_vartrack_reverse_op_size ^= 50;" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_param_max_vartrack_size" in var_opt_val_set) { ++ print " ptr->x_param_max_vartrack_size = (int ) bp_unpack_var_len_int (bp);" ++ print " if (50000000 > (int ) 10)" ++ print " ptr->x_param_max_vartrack_size ^= 50000000;" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_param_max_vrp_switch_assertions" in var_opt_val_set) { ++ print " ptr->x_param_max_vrp_switch_assertions = (int ) bp_unpack_var_len_int (bp);" ++ print " if (10 > (int ) 10)" ++ print " ptr->x_param_max_vrp_switch_assertions ^= 10;" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_param_min_crossjump_insns" in var_opt_val_set) { ++ print " ptr->x_param_min_crossjump_insns = (int ) bp_unpack_var_len_int (bp);" ++ print " if (5 > (int ) 10)" ++ print " ptr->x_param_min_crossjump_insns ^= 5;" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_param_min_inline_recursive_probability" in var_opt_val_set) { ++ print " ptr->x_param_min_inline_recursive_probability = (int ) bp_unpack_var_len_int (bp);" ++ print " if (10 > (int ) 10)" ++ print " ptr->x_param_min_inline_recursive_probability ^= 10;" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_param_min_insn_to_prefetch_ratio" in var_opt_val_set) { ++ print " ptr->x_param_min_insn_to_prefetch_ratio = (int ) bp_unpack_var_len_int (bp);" ++ print " if (9 > (int ) 10)" ++ print " ptr->x_param_min_insn_to_prefetch_ratio ^= 9;" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_param_min_loop_cond_split_prob" in var_opt_val_set) { ++ print " ptr->x_param_min_loop_cond_split_prob = (int ) bp_unpack_var_len_int (bp);" ++ print " if (30 > (int ) 10)" ++ print " ptr->x_param_min_loop_cond_split_prob ^= 30;" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_param_min_pagesize" in var_opt_val_set) { ++ print " ptr->x_param_min_pagesize = (int ) bp_unpack_var_len_int (bp);" ++ print " if (4096 > (int ) 10)" ++ print " ptr->x_param_min_pagesize ^= 4096;" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_param_min_size_for_stack_sharing" in var_opt_val_set) { ++ print " ptr->x_param_min_size_for_stack_sharing = (int ) bp_unpack_var_len_int (bp);" ++ print " if (32 > (int ) 10)" ++ print " ptr->x_param_min_size_for_stack_sharing ^= 32;" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_param_min_spec_prob" in var_opt_val_set) { ++ print " ptr->x_param_min_spec_prob = (int ) bp_unpack_var_len_int (bp);" ++ print " if (40 > (int ) 10)" ++ print " ptr->x_param_min_spec_prob ^= 40;" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_param_min_vect_loop_bound" in var_opt_val_set) { ++ print " ptr->x_param_min_vect_loop_bound = (int ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_param_modref_max_accesses" in var_opt_val_set) { ++ print " ptr->x_param_modref_max_accesses = (int ) bp_unpack_var_len_int (bp);" ++ print " if (16 > (int ) 10)" ++ print " ptr->x_param_modref_max_accesses ^= 16;" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_param_modref_max_adjustments" in var_opt_val_set) { ++ print " ptr->x_param_modref_max_adjustments = (int ) bp_unpack_var_len_int (bp);" ++ print " if (8 > (int ) 10)" ++ print " ptr->x_param_modref_max_adjustments ^= 8;" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_param_modref_max_bases" in var_opt_val_set) { ++ print " ptr->x_param_modref_max_bases = (int ) bp_unpack_var_len_int (bp);" ++ print " if (32 > (int ) 10)" ++ print " ptr->x_param_modref_max_bases ^= 32;" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_param_modref_max_depth" in var_opt_val_set) { ++ print " ptr->x_param_modref_max_depth = (int ) bp_unpack_var_len_int (bp);" ++ print " if (256 > (int ) 10)" ++ print " ptr->x_param_modref_max_depth ^= 256;" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_param_modref_max_escape_points" in var_opt_val_set) { ++ print " ptr->x_param_modref_max_escape_points = (int ) bp_unpack_var_len_int (bp);" ++ print " if (256 > (int ) 10)" ++ print " ptr->x_param_modref_max_escape_points ^= 256;" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_param_modref_max_refs" in var_opt_val_set) { ++ print " ptr->x_param_modref_max_refs = (int ) bp_unpack_var_len_int (bp);" ++ print " if (16 > (int ) 10)" ++ print " ptr->x_param_modref_max_refs ^= 16;" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_param_modref_max_tests" in var_opt_val_set) { ++ print " ptr->x_param_modref_max_tests = (int ) bp_unpack_var_len_int (bp);" ++ print " if (64 > (int ) 10)" ++ print " ptr->x_param_modref_max_tests ^= 64;" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_param_ldp_dependency_search_range" in var_opt_val_set) { ++ print " ptr->x_param_ldp_dependency_search_range = (int ) bp_unpack_var_len_int (bp);" ++ print " if (16 > (int ) 10)" ++ print " ptr->x_param_ldp_dependency_search_range ^= 16;" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_param_parloops_chunk_size" in var_opt_val_set) { ++ print " ptr->x_param_parloops_chunk_size = (int ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_param_parloops_min_per_thread" in var_opt_val_set) { ++ print " ptr->x_param_parloops_min_per_thread = (int ) bp_unpack_var_len_int (bp);" ++ print " if (100 > (int ) 10)" ++ print " ptr->x_param_parloops_min_per_thread ^= 100;" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_param_parloops_schedule" in var_opt_val_set) { ++ print " ptr->x_param_parloops_schedule = (int ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_param_partial_inlining_entry_probability" in var_opt_val_set) { ++ print " ptr->x_param_partial_inlining_entry_probability = (int ) bp_unpack_var_len_int (bp);" ++ print " if (70 > (int ) 10)" ++ print " ptr->x_param_partial_inlining_entry_probability ^= 70;" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_param_predictable_branch_outcome" in var_opt_val_set) { ++ print " ptr->x_param_predictable_branch_outcome = (int ) bp_unpack_var_len_int (bp);" ++ print " if (2 > (int ) 10)" ++ print " ptr->x_param_predictable_branch_outcome ^= 2;" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_param_prefetch_dynamic_strides" in var_opt_val_set) { ++ print " ptr->x_param_prefetch_dynamic_strides = (int ) bp_unpack_var_len_int (bp);" ++ print " if (1 > (int ) 10)" ++ print " ptr->x_param_prefetch_dynamic_strides ^= 1;" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_param_prefetch_latency" in var_opt_val_set) { ++ print " ptr->x_param_prefetch_latency = (int ) bp_unpack_var_len_int (bp);" ++ print " if (200 > (int ) 10)" ++ print " ptr->x_param_prefetch_latency ^= 200;" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_param_prefetch_min_insn_to_mem_ratio" in var_opt_val_set) { ++ print " ptr->x_param_prefetch_min_insn_to_mem_ratio = (int ) bp_unpack_var_len_int (bp);" ++ print " if (3 > (int ) 10)" ++ print " ptr->x_param_prefetch_min_insn_to_mem_ratio ^= 3;" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_param_prefetch_minimum_stride" in var_opt_val_set) { ++ print " ptr->x_param_prefetch_minimum_stride = (int ) bp_unpack_var_len_int (bp);" ++ print " if (-1 > (int ) 10)" ++ print " ptr->x_param_prefetch_minimum_stride ^= -1;" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_param_ranger_debug" in var_opt_val_set) { ++ print " ptr->x_param_ranger_debug = (enum ranger_debug ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_param_ranger_logical_depth" in var_opt_val_set) { ++ print " ptr->x_param_ranger_logical_depth = (int ) bp_unpack_var_len_int (bp);" ++ print " if (6 > (int ) 10)" ++ print " ptr->x_param_ranger_logical_depth ^= 6;" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_param_relation_block_limit" in var_opt_val_set) { ++ print " ptr->x_param_relation_block_limit = (int ) bp_unpack_var_len_int (bp);" ++ print " if (200 > (int ) 10)" ++ print " ptr->x_param_relation_block_limit ^= 200;" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_param_rpo_vn_max_loop_depth" in var_opt_val_set) { ++ print " ptr->x_param_rpo_vn_max_loop_depth = (int ) bp_unpack_var_len_int (bp);" ++ print " if (7 > (int ) 10)" ++ print " ptr->x_param_rpo_vn_max_loop_depth ^= 7;" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_param_sccvn_max_alias_queries_per_access" in var_opt_val_set) { ++ print " ptr->x_param_sccvn_max_alias_queries_per_access = (int ) bp_unpack_var_len_int (bp);" ++ print " if (1000 > (int ) 10)" ++ print " ptr->x_param_sccvn_max_alias_queries_per_access ^= 1000;" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_param_scev_max_expr_complexity" in var_opt_val_set) { ++ print " ptr->x_param_scev_max_expr_complexity = (int ) bp_unpack_var_len_int (bp);" ++ print " if (10 > (int ) 10)" ++ print " ptr->x_param_scev_max_expr_complexity ^= 10;" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_param_scev_max_expr_size" in var_opt_val_set) { ++ print " ptr->x_param_scev_max_expr_size = (int ) bp_unpack_var_len_int (bp);" ++ print " if (100 > (int ) 10)" ++ print " ptr->x_param_scev_max_expr_size ^= 100;" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_param_sched_autopref_queue_depth" in var_opt_val_set) { ++ print " ptr->x_param_sched_autopref_queue_depth = (int ) bp_unpack_var_len_int (bp);" ++ print " if (-1 > (int ) 10)" ++ print " ptr->x_param_sched_autopref_queue_depth ^= -1;" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_param_sched_mem_true_dep_cost" in var_opt_val_set) { ++ print " ptr->x_param_sched_mem_true_dep_cost = (int ) bp_unpack_var_len_int (bp);" ++ print " if (1 > (int ) 10)" ++ print " ptr->x_param_sched_mem_true_dep_cost ^= 1;" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_param_sched_pressure_algorithm" in var_opt_val_set) { ++ print " ptr->x_param_sched_pressure_algorithm = (int ) bp_unpack_var_len_int (bp);" ++ print " if (1 > (int ) 10)" ++ print " ptr->x_param_sched_pressure_algorithm ^= 1;" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_param_sched_spec_prob_cutoff" in var_opt_val_set) { ++ print " ptr->x_param_sched_spec_prob_cutoff = (int ) bp_unpack_var_len_int (bp);" ++ print " if (40 > (int ) 10)" ++ print " ptr->x_param_sched_spec_prob_cutoff ^= 40;" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_param_sched_state_edge_prob_cutoff" in var_opt_val_set) { ++ print " ptr->x_param_sched_state_edge_prob_cutoff = (int ) bp_unpack_var_len_int (bp);" ++ print " if (10 > (int ) 10)" ++ print " ptr->x_param_sched_state_edge_prob_cutoff ^= 10;" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_param_selsched_insns_to_rename" in var_opt_val_set) { ++ print " ptr->x_param_selsched_insns_to_rename = (int ) bp_unpack_var_len_int (bp);" ++ print " if (2 > (int ) 10)" ++ print " ptr->x_param_selsched_insns_to_rename ^= 2;" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_param_selsched_max_lookahead" in var_opt_val_set) { ++ print " ptr->x_param_selsched_max_lookahead = (int ) bp_unpack_var_len_int (bp);" ++ print " if (50 > (int ) 10)" ++ print " ptr->x_param_selsched_max_lookahead ^= 50;" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_param_selsched_max_sched_times" in var_opt_val_set) { ++ print " ptr->x_param_selsched_max_sched_times = (int ) bp_unpack_var_len_int (bp);" ++ print " if (2 > (int ) 10)" ++ print " ptr->x_param_selsched_max_sched_times ^= 2;" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_semi_relayout_level" in var_opt_val_set) { ++ print " ptr->x_semi_relayout_level = (int ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_param_simultaneous_prefetches" in var_opt_val_set) { ++ print " ptr->x_param_simultaneous_prefetches = (int ) bp_unpack_var_len_int (bp);" ++ print " if (3 > (int ) 10)" ++ print " ptr->x_param_simultaneous_prefetches ^= 3;" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_param_sink_frequency_threshold" in var_opt_val_set) { ++ print " ptr->x_param_sink_frequency_threshold = (int ) bp_unpack_var_len_int (bp);" ++ print " if (75 > (int ) 10)" ++ print " ptr->x_param_sink_frequency_threshold ^= 75;" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_param_sms_dfa_history" in var_opt_val_set) { ++ print " ptr->x_param_sms_dfa_history = (int ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_param_sms_loop_average_count_threshold" in var_opt_val_set) { ++ print " ptr->x_param_sms_loop_average_count_threshold = (int ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_param_sms_max_ii_factor" in var_opt_val_set) { ++ print " ptr->x_param_sms_max_ii_factor = (int ) bp_unpack_var_len_int (bp);" ++ print " if (2 > (int ) 10)" ++ print " ptr->x_param_sms_max_ii_factor ^= 2;" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_param_sms_min_sc" in var_opt_val_set) { ++ print " ptr->x_param_sms_min_sc = (int ) bp_unpack_var_len_int (bp);" ++ print " if (2 > (int ) 10)" ++ print " ptr->x_param_sms_min_sc ^= 2;" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_param_sra_max_propagations" in var_opt_val_set) { ++ print " ptr->x_param_sra_max_propagations = (int ) bp_unpack_var_len_int (bp);" ++ print " if (32 > (int ) 10)" ++ print " ptr->x_param_sra_max_propagations ^= 32;" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_param_sra_max_scalarization_size_size" in var_opt_val_set) { ++ print " ptr->x_param_sra_max_scalarization_size_size = (int ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_param_sra_max_scalarization_size_speed" in var_opt_val_set) { ++ print " ptr->x_param_sra_max_scalarization_size_speed = (int ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_param_ssa_name_def_chain_limit" in var_opt_val_set) { ++ print " ptr->x_param_ssa_name_def_chain_limit = (int ) bp_unpack_var_len_int (bp);" ++ print " if (512 > (int ) 10)" ++ print " ptr->x_param_ssa_name_def_chain_limit ^= 512;" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_param_ssp_buffer_size" in var_opt_val_set) { ++ print " ptr->x_param_ssp_buffer_size = (int ) bp_unpack_var_len_int (bp);" ++ print " if (8 > (int ) 10)" ++ print " ptr->x_param_ssp_buffer_size ^= 8;" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_param_stack_clash_protection_guard_size" in var_opt_val_set) { ++ print " ptr->x_param_stack_clash_protection_guard_size = (int ) bp_unpack_var_len_int (bp);" ++ print " if (12 > (int ) 10)" ++ print " ptr->x_param_stack_clash_protection_guard_size ^= 12;" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_param_stack_clash_protection_probe_interval" in var_opt_val_set) { ++ print " ptr->x_param_stack_clash_protection_probe_interval = (int ) bp_unpack_var_len_int (bp);" ++ print " if (12 > (int ) 10)" ++ print " ptr->x_param_stack_clash_protection_probe_interval ^= 12;" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_param_store_merging_allow_unaligned" in var_opt_val_set) { ++ print " ptr->x_param_store_merging_allow_unaligned = (int ) bp_unpack_var_len_int (bp);" ++ print " if (1 > (int ) 10)" ++ print " ptr->x_param_store_merging_allow_unaligned ^= 1;" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_param_store_merging_max_size" in var_opt_val_set) { ++ print " ptr->x_param_store_merging_max_size = (int ) bp_unpack_var_len_int (bp);" ++ print " if (65536 > (int ) 10)" ++ print " ptr->x_param_store_merging_max_size ^= 65536;" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_param_struct_reorg_cold_struct_ratio" in var_opt_val_set) { ++ print " ptr->x_param_struct_reorg_cold_struct_ratio = (int ) bp_unpack_var_len_int (bp);" ++ print " if (10 > (int ) 10)" ++ print " ptr->x_param_struct_reorg_cold_struct_ratio ^= 10;" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_param_switch_conversion_branch_ratio" in var_opt_val_set) { ++ print " ptr->x_param_switch_conversion_branch_ratio = (int ) bp_unpack_var_len_int (bp);" ++ print " if (8 > (int ) 10)" ++ print " ptr->x_param_switch_conversion_branch_ratio ^= 8;" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_param_threader_debug" in var_opt_val_set) { ++ print " ptr->x_param_threader_debug = (enum threader_debug ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_param_tm_max_aggregate_size" in var_opt_val_set) { ++ print " ptr->x_param_tm_max_aggregate_size = (int ) bp_unpack_var_len_int (bp);" ++ print " if (9 > (int ) 10)" ++ print " ptr->x_param_tm_max_aggregate_size ^= 9;" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_param_tracer_dynamic_coverage_feedback" in var_opt_val_set) { ++ print " ptr->x_param_tracer_dynamic_coverage_feedback = (int ) bp_unpack_var_len_int (bp);" ++ print " if (95 > (int ) 10)" ++ print " ptr->x_param_tracer_dynamic_coverage_feedback ^= 95;" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_param_tracer_dynamic_coverage" in var_opt_val_set) { ++ print " ptr->x_param_tracer_dynamic_coverage = (int ) bp_unpack_var_len_int (bp);" ++ print " if (75 > (int ) 10)" ++ print " ptr->x_param_tracer_dynamic_coverage ^= 75;" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_param_tracer_max_code_growth" in var_opt_val_set) { ++ print " ptr->x_param_tracer_max_code_growth = (int ) bp_unpack_var_len_int (bp);" ++ print " if (100 > (int ) 10)" ++ print " ptr->x_param_tracer_max_code_growth ^= 100;" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_param_tracer_min_branch_probability_feedback" in var_opt_val_set) { ++ print " ptr->x_param_tracer_min_branch_probability_feedback = (int ) bp_unpack_var_len_int (bp);" ++ print " if (80 > (int ) 10)" ++ print " ptr->x_param_tracer_min_branch_probability_feedback ^= 80;" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_param_tracer_min_branch_probability" in var_opt_val_set) { ++ print " ptr->x_param_tracer_min_branch_probability = (int ) bp_unpack_var_len_int (bp);" ++ print " if (50 > (int ) 10)" ++ print " ptr->x_param_tracer_min_branch_probability ^= 50;" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_param_tracer_min_branch_ratio" in var_opt_val_set) { ++ print " ptr->x_param_tracer_min_branch_ratio = (int ) bp_unpack_var_len_int (bp);" ++ print " if (10 > (int ) 10)" ++ print " ptr->x_param_tracer_min_branch_ratio ^= 10;" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_param_tree_reassoc_width" in var_opt_val_set) { ++ print " ptr->x_param_tree_reassoc_width = (int ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_param_uninit_control_dep_attempts" in var_opt_val_set) { ++ print " ptr->x_param_uninit_control_dep_attempts = (int ) bp_unpack_var_len_int (bp);" ++ print " if (1000 > (int ) 10)" ++ print " ptr->x_param_uninit_control_dep_attempts ^= 1000;" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_param_uninlined_function_insns" in var_opt_val_set) { ++ print " ptr->x_param_uninlined_function_insns = (int ) bp_unpack_var_len_int (bp);" ++ print " if (2 > (int ) 10)" ++ print " ptr->x_param_uninlined_function_insns ^= 2;" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_param_uninlined_function_time" in var_opt_val_set) { ++ print " ptr->x_param_uninlined_function_time = (int ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_param_uninlined_function_thunk_insns" in var_opt_val_set) { ++ print " ptr->x_param_uninlined_function_thunk_insns = (int ) bp_unpack_var_len_int (bp);" ++ print " if (2 > (int ) 10)" ++ print " ptr->x_param_uninlined_function_thunk_insns ^= 2;" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_param_uninlined_function_thunk_time" in var_opt_val_set) { ++ print " ptr->x_param_uninlined_function_thunk_time = (int ) bp_unpack_var_len_int (bp);" ++ print " if (2 > (int ) 10)" ++ print " ptr->x_param_uninlined_function_thunk_time ^= 2;" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_param_unlikely_bb_count_fraction" in var_opt_val_set) { ++ print " ptr->x_param_unlikely_bb_count_fraction = (int ) bp_unpack_var_len_int (bp);" ++ print " if (20 > (int ) 10)" ++ print " ptr->x_param_unlikely_bb_count_fraction ^= 20;" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_param_unroll_jam_max_unroll" in var_opt_val_set) { ++ print " ptr->x_param_unroll_jam_max_unroll = (int ) bp_unpack_var_len_int (bp);" ++ print " if (4 > (int ) 10)" ++ print " ptr->x_param_unroll_jam_max_unroll ^= 4;" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_param_unroll_jam_min_percent" in var_opt_val_set) { ++ print " ptr->x_param_unroll_jam_min_percent = (int ) bp_unpack_var_len_int (bp);" ++ print " if (1 > (int ) 10)" ++ print " ptr->x_param_unroll_jam_min_percent ^= 1;" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_param_use_after_scope_direct_emission_threshold" in var_opt_val_set) { ++ print " ptr->x_param_use_after_scope_direct_emission_threshold = (int ) bp_unpack_var_len_int (bp);" ++ print " if (256 > (int ) 10)" ++ print " ptr->x_param_use_after_scope_direct_emission_threshold ^= 256;" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_param_flexible_seg_len" in var_opt_val_set) { ++ print " ptr->x_param_flexible_seg_len = (int ) bp_unpack_var_len_int (bp);" ++ print " if (0 > (int ) 10)" ++ print " ptr->x_param_flexible_seg_len ^= 0;" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_param_vect_epilogues_nomask" in var_opt_val_set) { ++ print " ptr->x_param_vect_epilogues_nomask = (int ) bp_unpack_var_len_int (bp);" ++ print " if (1 > (int ) 10)" ++ print " ptr->x_param_vect_epilogues_nomask ^= 1;" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_param_vect_induction_float" in var_opt_val_set) { ++ print " ptr->x_param_vect_induction_float = (int ) bp_unpack_var_len_int (bp);" ++ print " if (1 > (int ) 10)" ++ print " ptr->x_param_vect_induction_float ^= 1;" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_param_vect_inner_loop_cost_factor" in var_opt_val_set) { ++ print " ptr->x_param_vect_inner_loop_cost_factor = (int ) bp_unpack_var_len_int (bp);" ++ print " if (50 > (int ) 10)" ++ print " ptr->x_param_vect_inner_loop_cost_factor ^= 50;" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_param_vect_max_peeling_for_alignment" in var_opt_val_set) { ++ print " ptr->x_param_vect_max_peeling_for_alignment = (int ) bp_unpack_var_len_int (bp);" ++ print " if (-1 > (int ) 10)" ++ print " ptr->x_param_vect_max_peeling_for_alignment ^= -1;" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_param_vect_max_version_for_alias_checks" in var_opt_val_set) { ++ print " ptr->x_param_vect_max_version_for_alias_checks = (int ) bp_unpack_var_len_int (bp);" ++ print " if (10 > (int ) 10)" ++ print " ptr->x_param_vect_max_version_for_alias_checks ^= 10;" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_param_vect_max_version_for_alignment_checks" in var_opt_val_set) { ++ print " ptr->x_param_vect_max_version_for_alignment_checks = (int ) bp_unpack_var_len_int (bp);" ++ print " if (6 > (int ) 10)" ++ print " ptr->x_param_vect_max_version_for_alignment_checks ^= 6;" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_param_vect_partial_vector_usage" in var_opt_val_set) { ++ print " ptr->x_param_vect_partial_vector_usage = (int ) bp_unpack_var_len_int (bp);" ++ print " if (2 > (int ) 10)" ++ print " ptr->x_param_vect_partial_vector_usage ^= 2;" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_param_vrp1_mode" in var_opt_val_set) { ++ print " ptr->x_param_vrp1_mode = (enum vrp_mode ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_param_vrp2_mode" in var_opt_val_set) { ++ print " ptr->x_param_vrp2_mode = (enum vrp_mode ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_warn_inline" in var_opt_val_set) { ++ print " ptr->x_warn_inline = (signed char ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_flag_aggressive_loop_optimizations" in var_opt_val_set) { ++ print " ptr->x_flag_aggressive_loop_optimizations = (signed char ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_flag_align_functions" in var_opt_val_set) { ++ print " ptr->x_flag_align_functions = (signed char ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_str_align_functions" in var_opt_val_set) { ++ print " ptr->x_str_align_functions = bp_unpack_string (data_in, bp);" ++ print " if (ptr->x_str_align_functions)" ++ print " ptr->x_str_align_functions = xstrdup (ptr->x_str_align_functions);" ++} ++else ++ print " bp_unpack_string (data_in, bp);" ++if ("x_flag_align_jumps" in var_opt_val_set) { ++ print " ptr->x_flag_align_jumps = (signed char ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_str_align_jumps" in var_opt_val_set) { ++ print " ptr->x_str_align_jumps = bp_unpack_string (data_in, bp);" ++ print " if (ptr->x_str_align_jumps)" ++ print " ptr->x_str_align_jumps = xstrdup (ptr->x_str_align_jumps);" ++} ++else ++ print " bp_unpack_string (data_in, bp);" ++if ("x_flag_align_labels" in var_opt_val_set) { ++ print " ptr->x_flag_align_labels = (signed char ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_str_align_labels" in var_opt_val_set) { ++ print " ptr->x_str_align_labels = bp_unpack_string (data_in, bp);" ++ print " if (ptr->x_str_align_labels)" ++ print " ptr->x_str_align_labels = xstrdup (ptr->x_str_align_labels);" ++} ++else ++ print " bp_unpack_string (data_in, bp);" ++if ("x_flag_align_loops" in var_opt_val_set) { ++ print " ptr->x_flag_align_loops = (signed char ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_str_align_loops" in var_opt_val_set) { ++ print " ptr->x_str_align_loops = bp_unpack_string (data_in, bp);" ++ print " if (ptr->x_str_align_loops)" ++ print " ptr->x_str_align_loops = xstrdup (ptr->x_str_align_loops);" ++} ++else ++ print " bp_unpack_string (data_in, bp);" ++if ("x_flag_allocation_dce" in var_opt_val_set) { ++ print " ptr->x_flag_allocation_dce = (signed char ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_flag_store_data_races" in var_opt_val_set) { ++ print " ptr->x_flag_store_data_races = (signed char ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_flag_array_widen_compare" in var_opt_val_set) { ++ print " ptr->x_flag_array_widen_compare = (signed char ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_flag_associative_math" in var_opt_val_set) { ++ print " ptr->x_flag_associative_math = (signed char ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_flag_asynchronous_unwind_tables" in var_opt_val_set) { ++ print " ptr->x_flag_asynchronous_unwind_tables = (signed char ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_flag_auto_inc_dec" in var_opt_val_set) { ++ print " ptr->x_flag_auto_inc_dec = (signed char ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_flag_bit_tests" in var_opt_val_set) { ++ print " ptr->x_flag_bit_tests = (signed char ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_flag_branch_on_count_reg" in var_opt_val_set) { ++ print " ptr->x_flag_branch_on_count_reg = (signed char ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_flag_branch_probabilities" in var_opt_val_set) { ++ print " ptr->x_flag_branch_probabilities = (signed char ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_flag_caller_saves" in var_opt_val_set) { ++ print " ptr->x_flag_caller_saves = (signed char ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_flag_ccmp2" in var_opt_val_set) { ++ print " ptr->x_flag_ccmp2 = (signed char ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_flag_code_hoisting" in var_opt_val_set) { ++ print " ptr->x_flag_code_hoisting = (signed char ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_flag_combine_stack_adjustments" in var_opt_val_set) { ++ print " ptr->x_flag_combine_stack_adjustments = (signed char ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_flag_compare_elim_after_reload" in var_opt_val_set) { ++ print " ptr->x_flag_compare_elim_after_reload = (signed char ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_flag_conserve_stack" in var_opt_val_set) { ++ print " ptr->x_flag_conserve_stack = (signed char ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_flag_convert_minmax" in var_opt_val_set) { ++ print " ptr->x_flag_convert_minmax = (signed char ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_flag_cprop_registers" in var_opt_val_set) { ++ print " ptr->x_flag_cprop_registers = (signed char ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_flag_crossjumping" in var_opt_val_set) { ++ print " ptr->x_flag_crossjumping = (signed char ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_flag_crypto_accel_aes" in var_opt_val_set) { ++ print " ptr->x_flag_crypto_accel_aes = (signed char ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_flag_cse_follow_jumps" in var_opt_val_set) { ++ print " ptr->x_flag_cse_follow_jumps = (signed char ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_flag_cx_fortran_rules" in var_opt_val_set) { ++ print " ptr->x_flag_cx_fortran_rules = (signed char ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_flag_cx_limited_range" in var_opt_val_set) { ++ print " ptr->x_flag_cx_limited_range = (signed char ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_flag_dce" in var_opt_val_set) { ++ print " ptr->x_flag_dce = (signed char ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_flag_defer_pop" in var_opt_val_set) { ++ print " ptr->x_flag_defer_pop = (signed char ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_flag_delayed_branch" in var_opt_val_set) { ++ print " ptr->x_flag_delayed_branch = (signed char ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_flag_delete_dead_exceptions" in var_opt_val_set) { ++ print " ptr->x_flag_delete_dead_exceptions = (signed char ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_flag_delete_null_pointer_checks" in var_opt_val_set) { ++ print " ptr->x_flag_delete_null_pointer_checks = (signed char ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_flag_devirtualize" in var_opt_val_set) { ++ print " ptr->x_flag_devirtualize = (signed char ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_flag_devirtualize_speculatively" in var_opt_val_set) { ++ print " ptr->x_flag_devirtualize_speculatively = (signed char ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_flag_dse" in var_opt_val_set) { ++ print " ptr->x_flag_dse = (signed char ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_flag_early_inlining" in var_opt_val_set) { ++ print " ptr->x_flag_early_inlining = (signed char ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_flag_exceptions" in var_opt_val_set) { ++ print " ptr->x_flag_exceptions = (signed char ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_flag_excess_precision" in var_opt_val_set) { ++ print " ptr->x_flag_excess_precision = (enum excess_precision ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_flag_expensive_optimizations" in var_opt_val_set) { ++ print " ptr->x_flag_expensive_optimizations = (signed char ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_flag_finite_loops" in var_opt_val_set) { ++ print " ptr->x_flag_finite_loops = (signed char ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_flag_finite_math_only" in var_opt_val_set) { ++ print " ptr->x_flag_finite_math_only = (signed char ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_flag_float_store" in var_opt_val_set) { ++ print " ptr->x_flag_float_store = (signed char ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_flag_fold_simple_inlines" in var_opt_val_set) { ++ print " ptr->x_flag_fold_simple_inlines = (signed char ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_flag_forward_propagate" in var_opt_val_set) { ++ print " ptr->x_flag_forward_propagate = (signed char ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_flag_fp_contract_mode" in var_opt_val_set) { ++ print " ptr->x_flag_fp_contract_mode = (enum fp_contract_mode ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_flag_fp_int_builtin_inexact" in var_opt_val_set) { ++ print " ptr->x_flag_fp_int_builtin_inexact = (signed char ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_flag_ftz" in var_opt_val_set) { ++ print " ptr->x_flag_ftz = (signed char ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_flag_no_function_cse" in var_opt_val_set) { ++ print " ptr->x_flag_no_function_cse = (signed char ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_flag_gcse" in var_opt_val_set) { ++ print " ptr->x_flag_gcse = (signed char ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_flag_gcse_after_reload" in var_opt_val_set) { ++ print " ptr->x_flag_gcse_after_reload = (signed char ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_flag_gcse_las" in var_opt_val_set) { ++ print " ptr->x_flag_gcse_las = (signed char ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_flag_gcse_lm" in var_opt_val_set) { ++ print " ptr->x_flag_gcse_lm = (signed char ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_flag_gcse_sm" in var_opt_val_set) { ++ print " ptr->x_flag_gcse_sm = (signed char ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_flag_graphite" in var_opt_val_set) { ++ print " ptr->x_flag_graphite = (signed char ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_flag_graphite_identity" in var_opt_val_set) { ++ print " ptr->x_flag_graphite_identity = (signed char ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_flag_guess_branch_prob" in var_opt_val_set) { ++ print " ptr->x_flag_guess_branch_prob = (signed char ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_flag_harden_compares" in var_opt_val_set) { ++ print " ptr->x_flag_harden_compares = (signed char ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_flag_harden_conditional_branches" in var_opt_val_set) { ++ print " ptr->x_flag_harden_conditional_branches = (signed char ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_flag_hoist_adjacent_loads" in var_opt_val_set) { ++ print " ptr->x_flag_hoist_adjacent_loads = (signed char ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_flag_icp" in var_opt_val_set) { ++ print " ptr->x_flag_icp = (signed char ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_flag_icp_speculatively" in var_opt_val_set) { ++ print " ptr->x_flag_icp_speculatively = (signed char ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_flag_if_conversion" in var_opt_val_set) { ++ print " ptr->x_flag_if_conversion = (signed char ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_flag_if_conversion_gimple" in var_opt_val_set) { ++ print " ptr->x_flag_if_conversion_gimple = (signed char ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_flag_if_conversion2" in var_opt_val_set) { ++ print " ptr->x_flag_if_conversion2 = (signed char ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_flag_ifcvt_allow_complicated_cmps" in var_opt_val_set) { ++ print " ptr->x_flag_ifcvt_allow_complicated_cmps = (signed char ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_flag_indirect_inlining" in var_opt_val_set) { ++ print " ptr->x_flag_indirect_inlining = (signed char ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_flag_no_inline" in var_opt_val_set) { ++ print " ptr->x_flag_no_inline = (signed char ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_flag_inline_atomics" in var_opt_val_set) { ++ print " ptr->x_flag_inline_atomics = (signed char ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_flag_inline_functions" in var_opt_val_set) { ++ print " ptr->x_flag_inline_functions = (signed char ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_flag_inline_functions_called_once" in var_opt_val_set) { ++ print " ptr->x_flag_inline_functions_called_once = (signed char ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_flag_inline_small_functions" in var_opt_val_set) { ++ print " ptr->x_flag_inline_small_functions = (signed char ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_flag_ipa_bit_cp" in var_opt_val_set) { ++ print " ptr->x_flag_ipa_bit_cp = (signed char ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_flag_ipa_cp" in var_opt_val_set) { ++ print " ptr->x_flag_ipa_cp = (signed char ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_flag_ipa_cp_clone" in var_opt_val_set) { ++ print " ptr->x_flag_ipa_cp_clone = (signed char ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_flag_ipa_ic" in var_opt_val_set) { ++ print " ptr->x_flag_ipa_ic = (signed char ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_flag_ipa_icf" in var_opt_val_set) { ++ print " ptr->x_flag_ipa_icf = (signed char ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_flag_ipa_icf_functions" in var_opt_val_set) { ++ print " ptr->x_flag_ipa_icf_functions = (signed char ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_flag_ipa_icf_variables" in var_opt_val_set) { ++ print " ptr->x_flag_ipa_icf_variables = (signed char ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_flag_ipa_modref" in var_opt_val_set) { ++ print " ptr->x_flag_ipa_modref = (signed char ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_flag_ipa_prefetch" in var_opt_val_set) { ++ print " ptr->x_flag_ipa_prefetch = (signed char ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_flag_ipa_profile" in var_opt_val_set) { ++ print " ptr->x_flag_ipa_profile = (signed char ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_flag_ipa_pta" in var_opt_val_set) { ++ print " ptr->x_flag_ipa_pta = (signed char ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_flag_ipa_pure_const" in var_opt_val_set) { ++ print " ptr->x_flag_ipa_pure_const = (signed char ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_flag_ipa_ra" in var_opt_val_set) { ++ print " ptr->x_flag_ipa_ra = (signed char ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_flag_ipa_reference" in var_opt_val_set) { ++ print " ptr->x_flag_ipa_reference = (signed char ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_flag_ipa_reference_addressable" in var_opt_val_set) { ++ print " ptr->x_flag_ipa_reference_addressable = (signed char ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_flag_ipa_reorder_fields" in var_opt_val_set) { ++ print " ptr->x_flag_ipa_reorder_fields = (signed char ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_flag_ipa_sra" in var_opt_val_set) { ++ print " ptr->x_flag_ipa_sra = (signed char ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_flag_ipa_stack_alignment" in var_opt_val_set) { ++ print " ptr->x_flag_ipa_stack_alignment = (signed char ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_flag_ipa_strict_aliasing" in var_opt_val_set) { ++ print " ptr->x_flag_ipa_strict_aliasing = (signed char ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_flag_ipa_struct_reorg" in var_opt_val_set) { ++ print " ptr->x_flag_ipa_struct_reorg = (signed char ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_flag_ipa_vrp" in var_opt_val_set) { ++ print " ptr->x_flag_ipa_vrp = (signed char ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_flag_ira_algorithm" in var_opt_val_set) { ++ print " ptr->x_flag_ira_algorithm = (enum ira_algorithm ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_flag_ira_hoist_pressure" in var_opt_val_set) { ++ print " ptr->x_flag_ira_hoist_pressure = (signed char ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_flag_ira_loop_pressure" in var_opt_val_set) { ++ print " ptr->x_flag_ira_loop_pressure = (signed char ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_flag_ira_region" in var_opt_val_set) { ++ print " ptr->x_flag_ira_region = (enum ira_region ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_flag_ira_share_save_slots" in var_opt_val_set) { ++ print " ptr->x_flag_ira_share_save_slots = (signed char ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_flag_ira_share_spill_slots" in var_opt_val_set) { ++ print " ptr->x_flag_ira_share_spill_slots = (signed char ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_flag_isolate_erroneous_paths_attribute" in var_opt_val_set) { ++ print " ptr->x_flag_isolate_erroneous_paths_attribute = (signed char ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_flag_isolate_erroneous_paths_dereference" in var_opt_val_set) { ++ print " ptr->x_flag_isolate_erroneous_paths_dereference = (signed char ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_flag_ivopts" in var_opt_val_set) { ++ print " ptr->x_flag_ivopts = (signed char ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_flag_jump_tables" in var_opt_val_set) { ++ print " ptr->x_flag_jump_tables = (signed char ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_flag_keep_gc_roots_live" in var_opt_val_set) { ++ print " ptr->x_flag_keep_gc_roots_live = (signed char ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_flag_kernel_pgo" in var_opt_val_set) { ++ print " ptr->x_flag_kernel_pgo = (signed char ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_flag_lifetime_dse" in var_opt_val_set) { ++ print " ptr->x_flag_lifetime_dse = (signed char ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_flag_limit_function_alignment" in var_opt_val_set) { ++ print " ptr->x_flag_limit_function_alignment = (signed char ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_flag_live_patching" in var_opt_val_set) { ++ print " ptr->x_flag_live_patching = (enum live_patching_level ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_flag_live_range_shrinkage" in var_opt_val_set) { ++ print " ptr->x_flag_live_range_shrinkage = (signed char ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_flag_loop_crc" in var_opt_val_set) { ++ print " ptr->x_flag_loop_crc = (signed char ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_flag_loop_elim" in var_opt_val_set) { ++ print " ptr->x_flag_loop_elim = (signed char ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_flag_loop_interchange" in var_opt_val_set) { ++ print " ptr->x_flag_loop_interchange = (signed char ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_flag_loop_nest_optimize" in var_opt_val_set) { ++ print " ptr->x_flag_loop_nest_optimize = (signed char ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_flag_loop_parallelize_all" in var_opt_val_set) { ++ print " ptr->x_flag_loop_parallelize_all = (signed char ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_flag_unroll_jam" in var_opt_val_set) { ++ print " ptr->x_flag_unroll_jam = (signed char ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_flag_lra_remat" in var_opt_val_set) { ++ print " ptr->x_flag_lra_remat = (signed char ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_flag_errno_math" in var_opt_val_set) { ++ print " ptr->x_flag_errno_math = (signed char ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_flag_merge_mull" in var_opt_val_set) { ++ print " ptr->x_flag_merge_mull = (signed char ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_flag_modulo_sched" in var_opt_val_set) { ++ print " ptr->x_flag_modulo_sched = (signed char ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_flag_modulo_sched_allow_regmoves" in var_opt_val_set) { ++ print " ptr->x_flag_modulo_sched_allow_regmoves = (signed char ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_flag_move_loop_invariants" in var_opt_val_set) { ++ print " ptr->x_flag_move_loop_invariants = (signed char ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_flag_move_loop_stores" in var_opt_val_set) { ++ print " ptr->x_flag_move_loop_stores = (signed char ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_flag_non_call_exceptions" in var_opt_val_set) { ++ print " ptr->x_flag_non_call_exceptions = (signed char ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_flag_nothrow_opt" in var_opt_val_set) { ++ print " ptr->x_flag_nothrow_opt = (signed char ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_flag_omit_frame_pointer" in var_opt_val_set) { ++ print " ptr->x_flag_omit_frame_pointer = (signed char ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_flag_opt_info" in var_opt_val_set) { ++ print " ptr->x_flag_opt_info = (signed char ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_flag_optimize_sibling_calls" in var_opt_val_set) { ++ print " ptr->x_flag_optimize_sibling_calls = (signed char ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_flag_optimize_strlen" in var_opt_val_set) { ++ print " ptr->x_flag_optimize_strlen = (signed char ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_flag_fp_model" in var_opt_val_set) { ++ print " ptr->x_flag_fp_model = (enum fp_model ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_flag_pack_struct" in var_opt_val_set) { ++ print " ptr->x_flag_pack_struct = (signed char ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_flag_partial_inlining" in var_opt_val_set) { ++ print " ptr->x_flag_partial_inlining = (signed char ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_flag_patchable_function_entry" in var_opt_val_set) { ++ print " ptr->x_flag_patchable_function_entry = bp_unpack_string (data_in, bp);" ++ print " if (ptr->x_flag_patchable_function_entry)" ++ print " ptr->x_flag_patchable_function_entry = xstrdup (ptr->x_flag_patchable_function_entry);" ++} ++else ++ print " bp_unpack_string (data_in, bp);" ++if ("x_flag_peel_loops" in var_opt_val_set) { ++ print " ptr->x_flag_peel_loops = (signed char ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_flag_no_peephole" in var_opt_val_set) { ++ print " ptr->x_flag_no_peephole = (signed char ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_flag_peephole2" in var_opt_val_set) { ++ print " ptr->x_flag_peephole2 = (signed char ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_flag_plt" in var_opt_val_set) { ++ print " ptr->x_flag_plt = (signed char ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_flag_predictive_commoning" in var_opt_val_set) { ++ print " ptr->x_flag_predictive_commoning = (signed char ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_flag_prefetch_loop_arrays" in var_opt_val_set) { ++ print " ptr->x_flag_prefetch_loop_arrays = (signed char ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_flag_printf_return_value" in var_opt_val_set) { ++ print " ptr->x_flag_printf_return_value = (signed char ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_flag_profile_partial_training" in var_opt_val_set) { ++ print " ptr->x_flag_profile_partial_training = (signed char ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_flag_profile_reorder_functions" in var_opt_val_set) { ++ print " ptr->x_flag_profile_reorder_functions = (signed char ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_flag_reciprocal_math" in var_opt_val_set) { ++ print " ptr->x_flag_reciprocal_math = (signed char ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_flag_ree" in var_opt_val_set) { ++ print " ptr->x_flag_ree = (signed char ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_flag_pcc_struct_return" in var_opt_val_set) { ++ print " ptr->x_flag_pcc_struct_return = (signed char ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_flag_rename_registers" in var_opt_val_set) { ++ print " ptr->x_flag_rename_registers = (signed char ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_flag_reorder_blocks" in var_opt_val_set) { ++ print " ptr->x_flag_reorder_blocks = (signed char ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_flag_reorder_blocks_algorithm" in var_opt_val_set) { ++ print " ptr->x_flag_reorder_blocks_algorithm = (enum reorder_blocks_algorithm ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_flag_reorder_blocks_and_partition" in var_opt_val_set) { ++ print " ptr->x_flag_reorder_blocks_and_partition = (signed char ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_flag_reorder_functions" in var_opt_val_set) { ++ print " ptr->x_flag_reorder_functions = (signed char ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_flag_rerun_cse_after_loop" in var_opt_val_set) { ++ print " ptr->x_flag_rerun_cse_after_loop = (signed char ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_flag_resched_modulo_sched" in var_opt_val_set) { ++ print " ptr->x_flag_resched_modulo_sched = (signed char ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_flag_rounding_math" in var_opt_val_set) { ++ print " ptr->x_flag_rounding_math = (signed char ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_flag_rtti" in var_opt_val_set) { ++ print " ptr->x_flag_rtti = (signed char ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_flag_save_optimization_record" in var_opt_val_set) { ++ print " ptr->x_flag_save_optimization_record = (signed char ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_flag_sched_critical_path_heuristic" in var_opt_val_set) { ++ print " ptr->x_flag_sched_critical_path_heuristic = (signed char ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_flag_sched_dep_count_heuristic" in var_opt_val_set) { ++ print " ptr->x_flag_sched_dep_count_heuristic = (signed char ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_flag_sched_group_heuristic" in var_opt_val_set) { ++ print " ptr->x_flag_sched_group_heuristic = (signed char ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_flag_schedule_interblock" in var_opt_val_set) { ++ print " ptr->x_flag_schedule_interblock = (signed char ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_flag_sched_last_insn_heuristic" in var_opt_val_set) { ++ print " ptr->x_flag_sched_last_insn_heuristic = (signed char ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_flag_sched_pressure" in var_opt_val_set) { ++ print " ptr->x_flag_sched_pressure = (signed char ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_flag_sched_rank_heuristic" in var_opt_val_set) { ++ print " ptr->x_flag_sched_rank_heuristic = (signed char ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_flag_schedule_speculative" in var_opt_val_set) { ++ print " ptr->x_flag_schedule_speculative = (signed char ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_flag_sched_spec_insn_heuristic" in var_opt_val_set) { ++ print " ptr->x_flag_sched_spec_insn_heuristic = (signed char ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_flag_schedule_speculative_load" in var_opt_val_set) { ++ print " ptr->x_flag_schedule_speculative_load = (signed char ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_flag_schedule_speculative_load_dangerous" in var_opt_val_set) { ++ print " ptr->x_flag_schedule_speculative_load_dangerous = (signed char ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_flag_sched_stalled_insns" in var_opt_val_set) { ++ print " ptr->x_flag_sched_stalled_insns = (int ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_flag_sched_stalled_insns_dep" in var_opt_val_set) { ++ print " ptr->x_flag_sched_stalled_insns_dep = (int ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_flag_sched2_use_superblocks" in var_opt_val_set) { ++ print " ptr->x_flag_sched2_use_superblocks = (signed char ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_flag_schedule_fusion" in var_opt_val_set) { ++ print " ptr->x_flag_schedule_fusion = (signed char ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_flag_schedule_insns" in var_opt_val_set) { ++ print " ptr->x_flag_schedule_insns = (signed char ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_flag_schedule_insns_after_reload" in var_opt_val_set) { ++ print " ptr->x_flag_schedule_insns_after_reload = (signed char ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_flag_section_anchors" in var_opt_val_set) { ++ print " ptr->x_flag_section_anchors = (signed char ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_flag_sel_sched_pipelining" in var_opt_val_set) { ++ print " ptr->x_flag_sel_sched_pipelining = (signed char ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_flag_sel_sched_pipelining_outer_loops" in var_opt_val_set) { ++ print " ptr->x_flag_sel_sched_pipelining_outer_loops = (signed char ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_flag_sel_sched_reschedule_pipelined" in var_opt_val_set) { ++ print " ptr->x_flag_sel_sched_reschedule_pipelined = (signed char ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_flag_selective_scheduling" in var_opt_val_set) { ++ print " ptr->x_flag_selective_scheduling = (signed char ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_flag_selective_scheduling2" in var_opt_val_set) { ++ print " ptr->x_flag_selective_scheduling2 = (signed char ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_flag_semantic_interposition" in var_opt_val_set) { ++ print " ptr->x_flag_semantic_interposition = (signed char ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_flag_short_enums" in var_opt_val_set) { ++ print " ptr->x_flag_short_enums = (signed char ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_flag_short_wchar" in var_opt_val_set) { ++ print " ptr->x_flag_short_wchar = (signed char ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_flag_shrink_wrap" in var_opt_val_set) { ++ print " ptr->x_flag_shrink_wrap = (signed char ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_flag_shrink_wrap_separate" in var_opt_val_set) { ++ print " ptr->x_flag_shrink_wrap_separate = (signed char ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_flag_signaling_nans" in var_opt_val_set) { ++ print " ptr->x_flag_signaling_nans = (signed char ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_flag_signed_zeros" in var_opt_val_set) { ++ print " ptr->x_flag_signed_zeros = (signed char ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_flag_simd_cost_model" in var_opt_val_set) { ++ print " ptr->x_flag_simd_cost_model = (enum vect_cost_model ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_flag_simdmath" in var_opt_val_set) { ++ print " ptr->x_flag_simdmath = (signed char ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_flag_single_precision_constant" in var_opt_val_set) { ++ print " ptr->x_flag_single_precision_constant = (signed char ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_flag_split_ivs_in_unroller" in var_opt_val_set) { ++ print " ptr->x_flag_split_ivs_in_unroller = (signed char ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_flag_split_ldp_stp" in var_opt_val_set) { ++ print " ptr->x_flag_split_ldp_stp = (signed char ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_flag_split_loops" in var_opt_val_set) { ++ print " ptr->x_flag_split_loops = (signed char ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_flag_split_paths" in var_opt_val_set) { ++ print " ptr->x_flag_split_paths = (signed char ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_flag_split_wide_types" in var_opt_val_set) { ++ print " ptr->x_flag_split_wide_types = (signed char ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_flag_split_wide_types_early" in var_opt_val_set) { ++ print " ptr->x_flag_split_wide_types_early = (signed char ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_flag_ssa_backprop" in var_opt_val_set) { ++ print " ptr->x_flag_ssa_backprop = (signed char ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_flag_ssa_phiopt" in var_opt_val_set) { ++ print " ptr->x_flag_ssa_phiopt = (signed char ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_flag_stack_clash_protection" in var_opt_val_set) { ++ print " ptr->x_flag_stack_clash_protection = (signed char ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_flag_stack_protect" in var_opt_val_set) { ++ print " ptr->x_flag_stack_protect = (signed char ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_flag_stack_reuse" in var_opt_val_set) { ++ print " ptr->x_flag_stack_reuse = (enum stack_reuse_level ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_flag_stdarg_opt" in var_opt_val_set) { ++ print " ptr->x_flag_stdarg_opt = (signed char ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_flag_store_merging" in var_opt_val_set) { ++ print " ptr->x_flag_store_merging = (signed char ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_flag_strict_aliasing" in var_opt_val_set) { ++ print " ptr->x_flag_strict_aliasing = (signed char ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_flag_strict_enums" in var_opt_val_set) { ++ print " ptr->x_flag_strict_enums = (signed char ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_flag_strict_volatile_bitfields" in var_opt_val_set) { ++ print " ptr->x_flag_strict_volatile_bitfields = (signed char ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_flag_thread_jumps" in var_opt_val_set) { ++ print " ptr->x_flag_thread_jumps = (signed char ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_flag_threadsafe_statics" in var_opt_val_set) { ++ print " ptr->x_flag_threadsafe_statics = (signed char ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_flag_toplevel_reorder" in var_opt_val_set) { ++ print " ptr->x_flag_toplevel_reorder = (signed char ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_flag_tracer" in var_opt_val_set) { ++ print " ptr->x_flag_tracer = (signed char ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_flag_trapping_math" in var_opt_val_set) { ++ print " ptr->x_flag_trapping_math = (signed char ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_flag_trapv" in var_opt_val_set) { ++ print " ptr->x_flag_trapv = (signed char ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_flag_tree_bit_ccp" in var_opt_val_set) { ++ print " ptr->x_flag_tree_bit_ccp = (signed char ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_flag_tree_builtin_call_dce" in var_opt_val_set) { ++ print " ptr->x_flag_tree_builtin_call_dce = (signed char ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_flag_tree_ccp" in var_opt_val_set) { ++ print " ptr->x_flag_tree_ccp = (signed char ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_flag_tree_ch" in var_opt_val_set) { ++ print " ptr->x_flag_tree_ch = (signed char ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_flag_tree_coalesce_vars" in var_opt_val_set) { ++ print " ptr->x_flag_tree_coalesce_vars = (signed char ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_flag_tree_copy_prop" in var_opt_val_set) { ++ print " ptr->x_flag_tree_copy_prop = (signed char ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_flag_tree_cselim" in var_opt_val_set) { ++ print " ptr->x_flag_tree_cselim = (signed char ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_flag_tree_dce" in var_opt_val_set) { ++ print " ptr->x_flag_tree_dce = (signed char ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_flag_tree_dom" in var_opt_val_set) { ++ print " ptr->x_flag_tree_dom = (signed char ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_flag_tree_dse" in var_opt_val_set) { ++ print " ptr->x_flag_tree_dse = (signed char ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_flag_tree_forwprop" in var_opt_val_set) { ++ print " ptr->x_flag_tree_forwprop = (signed char ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_flag_tree_fre" in var_opt_val_set) { ++ print " ptr->x_flag_tree_fre = (signed char ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_flag_tree_loop_distribute_patterns" in var_opt_val_set) { ++ print " ptr->x_flag_tree_loop_distribute_patterns = (signed char ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_flag_tree_loop_distribution" in var_opt_val_set) { ++ print " ptr->x_flag_tree_loop_distribution = (signed char ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_flag_tree_loop_if_convert" in var_opt_val_set) { ++ print " ptr->x_flag_tree_loop_if_convert = (signed char ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_flag_tree_loop_im" in var_opt_val_set) { ++ print " ptr->x_flag_tree_loop_im = (signed char ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_flag_tree_loop_ivcanon" in var_opt_val_set) { ++ print " ptr->x_flag_tree_loop_ivcanon = (signed char ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_flag_tree_loop_optimize" in var_opt_val_set) { ++ print " ptr->x_flag_tree_loop_optimize = (signed char ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_flag_tree_loop_vectorize" in var_opt_val_set) { ++ print " ptr->x_flag_tree_loop_vectorize = (signed char ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_flag_tree_live_range_split" in var_opt_val_set) { ++ print " ptr->x_flag_tree_live_range_split = (signed char ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_flag_tree_parallelize_loops" in var_opt_val_set) { ++ print " ptr->x_flag_tree_parallelize_loops = (int ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_flag_tree_partial_pre" in var_opt_val_set) { ++ print " ptr->x_flag_tree_partial_pre = (signed char ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_flag_tree_phiprop" in var_opt_val_set) { ++ print " ptr->x_flag_tree_phiprop = (signed char ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_flag_tree_pre" in var_opt_val_set) { ++ print " ptr->x_flag_tree_pre = (signed char ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_flag_tree_pta" in var_opt_val_set) { ++ print " ptr->x_flag_tree_pta = (signed char ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_flag_tree_reassoc" in var_opt_val_set) { ++ print " ptr->x_flag_tree_reassoc = (signed char ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_flag_tree_scev_cprop" in var_opt_val_set) { ++ print " ptr->x_flag_tree_scev_cprop = (signed char ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_flag_tree_sink" in var_opt_val_set) { ++ print " ptr->x_flag_tree_sink = (signed char ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_flag_tree_slp_transpose_vectorize" in var_opt_val_set) { ++ print " ptr->x_flag_tree_slp_transpose_vectorize = (signed char ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_flag_tree_slp_vectorize" in var_opt_val_set) { ++ print " ptr->x_flag_tree_slp_vectorize = (signed char ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_flag_tree_slsr" in var_opt_val_set) { ++ print " ptr->x_flag_tree_slsr = (signed char ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_flag_tree_sra" in var_opt_val_set) { ++ print " ptr->x_flag_tree_sra = (signed char ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_flag_tree_switch_conversion" in var_opt_val_set) { ++ print " ptr->x_flag_tree_switch_conversion = (signed char ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_flag_tree_tail_merge" in var_opt_val_set) { ++ print " ptr->x_flag_tree_tail_merge = (signed char ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_flag_tree_ter" in var_opt_val_set) { ++ print " ptr->x_flag_tree_ter = (signed char ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_flag_tree_vectorize" in var_opt_val_set) { ++ print " ptr->x_flag_tree_vectorize = (signed char ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_flag_tree_vrp" in var_opt_val_set) { ++ print " ptr->x_flag_tree_vrp = (signed char ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_flag_auto_var_init" in var_opt_val_set) { ++ print " ptr->x_flag_auto_var_init = (enum auto_init_type ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_flag_unconstrained_commons" in var_opt_val_set) { ++ print " ptr->x_flag_unconstrained_commons = (signed char ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_flag_unroll_all_loops" in var_opt_val_set) { ++ print " ptr->x_flag_unroll_all_loops = (signed char ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_flag_cunroll_grow_size" in var_opt_val_set) { ++ print " ptr->x_flag_cunroll_grow_size = (signed char ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_flag_unroll_loops" in var_opt_val_set) { ++ print " ptr->x_flag_unroll_loops = (signed char ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_flag_unsafe_math_optimizations" in var_opt_val_set) { ++ print " ptr->x_flag_unsafe_math_optimizations = (signed char ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_flag_unswitch_loops" in var_opt_val_set) { ++ print " ptr->x_flag_unswitch_loops = (signed char ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_flag_unwind_tables" in var_opt_val_set) { ++ print " ptr->x_flag_unwind_tables = (signed char ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_flag_var_tracking" in var_opt_val_set) { ++ print " ptr->x_flag_var_tracking = (signed char ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_flag_var_tracking_assignments" in var_opt_val_set) { ++ print " ptr->x_flag_var_tracking_assignments = (signed char ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_flag_var_tracking_assignments_toggle" in var_opt_val_set) { ++ print " ptr->x_flag_var_tracking_assignments_toggle = (signed char ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_flag_var_tracking_uninit" in var_opt_val_set) { ++ print " ptr->x_flag_var_tracking_uninit = (signed char ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_flag_variable_expansion_in_unroller" in var_opt_val_set) { ++ print " ptr->x_flag_variable_expansion_in_unroller = (signed char ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_flag_vect_cost_model" in var_opt_val_set) { ++ print " ptr->x_flag_vect_cost_model = (enum vect_cost_model ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_flag_version_loops_for_strides" in var_opt_val_set) { ++ print " ptr->x_flag_version_loops_for_strides = (signed char ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_flag_value_profile_transformations" in var_opt_val_set) { ++ print " ptr->x_flag_value_profile_transformations = (signed char ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_flag_web" in var_opt_val_set) { ++ print " ptr->x_flag_web = (signed char ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_flag_wrapv" in var_opt_val_set) { ++ print " ptr->x_flag_wrapv = (signed char ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_flag_wrapv_pointer" in var_opt_val_set) { ++ print " ptr->x_flag_wrapv_pointer = (signed char ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_debug_nonbind_markers_p" in var_opt_val_set) { ++ print " ptr->x_debug_nonbind_markers_p = (signed char ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_flag_cmlt_arith" in var_opt_val_set) { ++ print " ptr->x_flag_cmlt_arith = (signed char ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_flag_mlow_precision_div" in var_opt_val_set) { ++ print " ptr->x_flag_mlow_precision_div = (signed char ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_flag_mrecip_low_precision_sqrt" in var_opt_val_set) { ++ print " ptr->x_flag_mrecip_low_precision_sqrt = (signed char ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_flag_mlow_precision_sqrt" in var_opt_val_set) { ++ print " ptr->x_flag_mlow_precision_sqrt = (signed char ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++if ("x_flag_simdmath_64" in var_opt_val_set) { ++ print " ptr->x_flag_simdmath_64 = (signed char ) bp_unpack_var_len_int (bp);" ++} ++else ++ print " bp_unpack_var_len_int (bp);" ++print " unsigned HOST_WIDE_INT explicit_mask_prev9;" ++print " for (size_t i = 0; i < 9; i++)" ++print " explicit_mask_previ = bp_unpack_value (bp, 64);" ++print " for (size_t i = 0; i < sizeof (ptr->explicit_mask) / sizeof (ptr->explicit_mask0); i++)" ++print " ptr->explicit_maski = 0;" ++if ("param_align_loop_iterations" in var_opt_int_k) { ++ k = var_opt_int_k"param_align_loop_iterations" ++ j = var_opt_int_j"param_align_loop_iterations" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev0 >> 0) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("param_align_threshold" in var_opt_int_k) { ++ k = var_opt_int_k"param_align_threshold" ++ j = var_opt_int_j"param_align_threshold" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev0 >> 1) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("param_asan_protect_allocas" in var_opt_int_k) { ++ k = var_opt_int_k"param_asan_protect_allocas" ++ j = var_opt_int_j"param_asan_protect_allocas" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev0 >> 2) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("param_asan_instrument_reads" in var_opt_int_k) { ++ k = var_opt_int_k"param_asan_instrument_reads" ++ j = var_opt_int_j"param_asan_instrument_reads" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev0 >> 3) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("param_asan_instrument_writes" in var_opt_int_k) { ++ k = var_opt_int_k"param_asan_instrument_writes" ++ j = var_opt_int_j"param_asan_instrument_writes" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev0 >> 4) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("param_asan_instrumentation_with_call_threshold" in var_opt_int_k) { ++ k = var_opt_int_k"param_asan_instrumentation_with_call_threshold" ++ j = var_opt_int_j"param_asan_instrumentation_with_call_threshold" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev0 >> 5) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("param_asan_memintrin" in var_opt_int_k) { ++ k = var_opt_int_k"param_asan_memintrin" ++ j = var_opt_int_j"param_asan_memintrin" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev0 >> 6) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("param_asan_stack" in var_opt_int_k) { ++ k = var_opt_int_k"param_asan_stack" ++ j = var_opt_int_j"param_asan_stack" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev0 >> 7) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("param_asan_use_after_return" in var_opt_int_k) { ++ k = var_opt_int_k"param_asan_use_after_return" ++ j = var_opt_int_j"param_asan_use_after_return" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev0 >> 8) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("param_avg_loop_niter" in var_opt_int_k) { ++ k = var_opt_int_k"param_avg_loop_niter" ++ j = var_opt_int_j"param_avg_loop_niter" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev0 >> 9) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("param_avoid_fma_max_bits" in var_opt_int_k) { ++ k = var_opt_int_k"param_avoid_fma_max_bits" ++ j = var_opt_int_j"param_avoid_fma_max_bits" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev0 >> 10) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("param_builtin_expect_probability" in var_opt_int_k) { ++ k = var_opt_int_k"param_builtin_expect_probability" ++ j = var_opt_int_j"param_builtin_expect_probability" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev0 >> 11) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("param_builtin_string_cmp_inline_length" in var_opt_int_k) { ++ k = var_opt_int_k"param_builtin_string_cmp_inline_length" ++ j = var_opt_int_j"param_builtin_string_cmp_inline_length" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev0 >> 12) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("param_case_values_threshold" in var_opt_int_k) { ++ k = var_opt_int_k"param_case_values_threshold" ++ j = var_opt_int_j"param_case_values_threshold" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev0 >> 13) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("param_comdat_sharing_probability" in var_opt_int_k) { ++ k = var_opt_int_k"param_comdat_sharing_probability" ++ j = var_opt_int_j"param_comdat_sharing_probability" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev0 >> 14) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("param_pointer_compression_size" in var_opt_int_k) { ++ k = var_opt_int_k"param_pointer_compression_size" ++ j = var_opt_int_j"param_pointer_compression_size" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev0 >> 15) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("param_construct_interfere_size" in var_opt_int_k) { ++ k = var_opt_int_k"param_construct_interfere_size" ++ j = var_opt_int_j"param_construct_interfere_size" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev0 >> 16) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("param_destruct_interfere_size" in var_opt_int_k) { ++ k = var_opt_int_k"param_destruct_interfere_size" ++ j = var_opt_int_j"param_destruct_interfere_size" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev0 >> 17) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("param_dse_max_alias_queries_per_store" in var_opt_int_k) { ++ k = var_opt_int_k"param_dse_max_alias_queries_per_store" ++ j = var_opt_int_j"param_dse_max_alias_queries_per_store" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev0 >> 18) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("param_dse_max_object_size" in var_opt_int_k) { ++ k = var_opt_int_k"param_dse_max_object_size" ++ j = var_opt_int_j"param_dse_max_object_size" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev0 >> 19) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("param_early_inlining_insns" in var_opt_int_k) { ++ k = var_opt_int_k"param_early_inlining_insns" ++ j = var_opt_int_j"param_early_inlining_insns" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev0 >> 20) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("param_evrp_sparse_threshold" in var_opt_int_k) { ++ k = var_opt_int_k"param_evrp_sparse_threshold" ++ j = var_opt_int_j"param_evrp_sparse_threshold" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev0 >> 21) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("param_evrp_switch_limit" in var_opt_int_k) { ++ k = var_opt_int_k"param_evrp_switch_limit" ++ j = var_opt_int_j"param_evrp_switch_limit" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev0 >> 22) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("param_fsm_scale_path_blocks" in var_opt_int_k) { ++ k = var_opt_int_k"param_fsm_scale_path_blocks" ++ j = var_opt_int_j"param_fsm_scale_path_blocks" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev0 >> 23) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("param_fsm_scale_path_stmts" in var_opt_int_k) { ++ k = var_opt_int_k"param_fsm_scale_path_stmts" ++ j = var_opt_int_j"param_fsm_scale_path_stmts" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev0 >> 24) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("param_gcse_after_reload_critical_fraction" in var_opt_int_k) { ++ k = var_opt_int_k"param_gcse_after_reload_critical_fraction" ++ j = var_opt_int_j"param_gcse_after_reload_critical_fraction" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev0 >> 25) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("param_gcse_after_reload_partial_fraction" in var_opt_int_k) { ++ k = var_opt_int_k"param_gcse_after_reload_partial_fraction" ++ j = var_opt_int_j"param_gcse_after_reload_partial_fraction" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev0 >> 26) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("param_gcse_cost_distance_ratio" in var_opt_int_k) { ++ k = var_opt_int_k"param_gcse_cost_distance_ratio" ++ j = var_opt_int_j"param_gcse_cost_distance_ratio" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev0 >> 27) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("param_gcse_unrestricted_cost" in var_opt_int_k) { ++ k = var_opt_int_k"param_gcse_unrestricted_cost" ++ j = var_opt_int_j"param_gcse_unrestricted_cost" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev0 >> 28) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("param_graphite_max_arrays_per_scop" in var_opt_int_k) { ++ k = var_opt_int_k"param_graphite_max_arrays_per_scop" ++ j = var_opt_int_j"param_graphite_max_arrays_per_scop" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev0 >> 29) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("param_graphite_max_nb_scop_params" in var_opt_int_k) { ++ k = var_opt_int_k"param_graphite_max_nb_scop_params" ++ j = var_opt_int_j"param_graphite_max_nb_scop_params" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev0 >> 30) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("param_hwasan_instrument_allocas" in var_opt_int_k) { ++ k = var_opt_int_k"param_hwasan_instrument_allocas" ++ j = var_opt_int_j"param_hwasan_instrument_allocas" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev0 >> 31) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("param_hwasan_instrument_mem_intrinsics" in var_opt_int_k) { ++ k = var_opt_int_k"param_hwasan_instrument_mem_intrinsics" ++ j = var_opt_int_j"param_hwasan_instrument_mem_intrinsics" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev0 >> 32) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("param_hwasan_instrument_reads" in var_opt_int_k) { ++ k = var_opt_int_k"param_hwasan_instrument_reads" ++ j = var_opt_int_j"param_hwasan_instrument_reads" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev0 >> 33) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("param_hwasan_instrument_stack" in var_opt_int_k) { ++ k = var_opt_int_k"param_hwasan_instrument_stack" ++ j = var_opt_int_j"param_hwasan_instrument_stack" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev0 >> 34) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("param_hwasan_instrument_writes" in var_opt_int_k) { ++ k = var_opt_int_k"param_hwasan_instrument_writes" ++ j = var_opt_int_j"param_hwasan_instrument_writes" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev0 >> 35) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("param_hwasan_random_frame_tag" in var_opt_int_k) { ++ k = var_opt_int_k"param_hwasan_random_frame_tag" ++ j = var_opt_int_j"param_hwasan_random_frame_tag" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev0 >> 36) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("param_ifcvt_allow_register_renaming" in var_opt_int_k) { ++ k = var_opt_int_k"param_ifcvt_allow_register_renaming" ++ j = var_opt_int_j"param_ifcvt_allow_register_renaming" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev0 >> 37) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("param_inline_heuristics_hint_percent" in var_opt_int_k) { ++ k = var_opt_int_k"param_inline_heuristics_hint_percent" ++ j = var_opt_int_j"param_inline_heuristics_hint_percent" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev0 >> 38) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("param_inline_min_speedup" in var_opt_int_k) { ++ k = var_opt_int_k"param_inline_min_speedup" ++ j = var_opt_int_j"param_inline_min_speedup" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev0 >> 39) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("param_inline_unit_growth" in var_opt_int_k) { ++ k = var_opt_int_k"param_inline_unit_growth" ++ j = var_opt_int_j"param_inline_unit_growth" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev0 >> 40) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("param_ipa_cp_eval_threshold" in var_opt_int_k) { ++ k = var_opt_int_k"param_ipa_cp_eval_threshold" ++ j = var_opt_int_j"param_ipa_cp_eval_threshold" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev0 >> 41) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("param_ipa_cp_large_unit_insns" in var_opt_int_k) { ++ k = var_opt_int_k"param_ipa_cp_large_unit_insns" ++ j = var_opt_int_j"param_ipa_cp_large_unit_insns" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev0 >> 42) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("param_ipa_cp_loop_hint_bonus" in var_opt_int_k) { ++ k = var_opt_int_k"param_ipa_cp_loop_hint_bonus" ++ j = var_opt_int_j"param_ipa_cp_loop_hint_bonus" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev0 >> 43) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("param_ipa_cp_max_recursive_depth" in var_opt_int_k) { ++ k = var_opt_int_k"param_ipa_cp_max_recursive_depth" ++ j = var_opt_int_j"param_ipa_cp_max_recursive_depth" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev0 >> 44) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("param_ipa_cp_min_recursive_probability" in var_opt_int_k) { ++ k = var_opt_int_k"param_ipa_cp_min_recursive_probability" ++ j = var_opt_int_j"param_ipa_cp_min_recursive_probability" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev0 >> 45) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("param_ipa_cp_profile_count_base" in var_opt_int_k) { ++ k = var_opt_int_k"param_ipa_cp_profile_count_base" ++ j = var_opt_int_j"param_ipa_cp_profile_count_base" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev0 >> 46) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("param_ipa_cp_recursion_penalty" in var_opt_int_k) { ++ k = var_opt_int_k"param_ipa_cp_recursion_penalty" ++ j = var_opt_int_j"param_ipa_cp_recursion_penalty" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev0 >> 47) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("param_ipa_cp_recursive_freq_factor" in var_opt_int_k) { ++ k = var_opt_int_k"param_ipa_cp_recursive_freq_factor" ++ j = var_opt_int_j"param_ipa_cp_recursive_freq_factor" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev0 >> 48) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("param_ipa_cp_single_call_penalty" in var_opt_int_k) { ++ k = var_opt_int_k"param_ipa_cp_single_call_penalty" ++ j = var_opt_int_j"param_ipa_cp_single_call_penalty" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev0 >> 49) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("param_ipa_cp_unit_growth" in var_opt_int_k) { ++ k = var_opt_int_k"param_ipa_cp_unit_growth" ++ j = var_opt_int_j"param_ipa_cp_unit_growth" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev0 >> 50) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("param_ipa_cp_value_list_size" in var_opt_int_k) { ++ k = var_opt_int_k"param_ipa_cp_value_list_size" ++ j = var_opt_int_j"param_ipa_cp_value_list_size" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev0 >> 51) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("param_ipa_jump_function_lookups" in var_opt_int_k) { ++ k = var_opt_int_k"param_ipa_jump_function_lookups" ++ j = var_opt_int_j"param_ipa_jump_function_lookups" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev0 >> 52) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("param_ipa_max_aa_steps" in var_opt_int_k) { ++ k = var_opt_int_k"param_ipa_max_aa_steps" ++ j = var_opt_int_j"param_ipa_max_aa_steps" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev0 >> 53) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("param_ipa_max_agg_items" in var_opt_int_k) { ++ k = var_opt_int_k"param_ipa_max_agg_items" ++ j = var_opt_int_j"param_ipa_max_agg_items" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev0 >> 54) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("param_ipa_max_loop_predicates" in var_opt_int_k) { ++ k = var_opt_int_k"param_ipa_max_loop_predicates" ++ j = var_opt_int_j"param_ipa_max_loop_predicates" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev0 >> 55) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("param_ipa_max_param_expr_ops" in var_opt_int_k) { ++ k = var_opt_int_k"param_ipa_max_param_expr_ops" ++ j = var_opt_int_j"param_ipa_max_param_expr_ops" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev0 >> 56) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("param_ipa_max_switch_predicate_bounds" in var_opt_int_k) { ++ k = var_opt_int_k"param_ipa_max_switch_predicate_bounds" ++ j = var_opt_int_j"param_ipa_max_switch_predicate_bounds" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev0 >> 57) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("param_ipa_prefetch_distance_factor" in var_opt_int_k) { ++ k = var_opt_int_k"param_ipa_prefetch_distance_factor" ++ j = var_opt_int_j"param_ipa_prefetch_distance_factor" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev0 >> 58) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("param_ipa_prefetch_locality" in var_opt_int_k) { ++ k = var_opt_int_k"param_ipa_prefetch_locality" ++ j = var_opt_int_j"param_ipa_prefetch_locality" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev0 >> 59) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("param_ipa_prefetch_pagesize" in var_opt_int_k) { ++ k = var_opt_int_k"param_ipa_prefetch_pagesize" ++ j = var_opt_int_j"param_ipa_prefetch_pagesize" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev0 >> 60) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("param_ipa_sra_max_replacements" in var_opt_int_k) { ++ k = var_opt_int_k"param_ipa_sra_max_replacements" ++ j = var_opt_int_j"param_ipa_sra_max_replacements" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev0 >> 61) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("param_ipa_sra_ptr_growth_factor" in var_opt_int_k) { ++ k = var_opt_int_k"param_ipa_sra_ptr_growth_factor" ++ j = var_opt_int_j"param_ipa_sra_ptr_growth_factor" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev0 >> 62) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("param_ira_consider_dup_in_all_alts" in var_opt_int_k) { ++ k = var_opt_int_k"param_ira_consider_dup_in_all_alts" ++ j = var_opt_int_j"param_ira_consider_dup_in_all_alts" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev0 >> 63) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("param_ira_loop_reserved_regs" in var_opt_int_k) { ++ k = var_opt_int_k"param_ira_loop_reserved_regs" ++ j = var_opt_int_j"param_ira_loop_reserved_regs" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev1 >> 0) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("param_ira_max_conflict_table_size" in var_opt_int_k) { ++ k = var_opt_int_k"param_ira_max_conflict_table_size" ++ j = var_opt_int_j"param_ira_max_conflict_table_size" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev1 >> 1) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("param_ira_max_loops_num" in var_opt_int_k) { ++ k = var_opt_int_k"param_ira_max_loops_num" ++ j = var_opt_int_j"param_ira_max_loops_num" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev1 >> 2) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("param_iv_always_prune_cand_set_bound" in var_opt_int_k) { ++ k = var_opt_int_k"param_iv_always_prune_cand_set_bound" ++ j = var_opt_int_j"param_iv_always_prune_cand_set_bound" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev1 >> 3) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("param_iv_consider_all_candidates_bound" in var_opt_int_k) { ++ k = var_opt_int_k"param_iv_consider_all_candidates_bound" ++ j = var_opt_int_j"param_iv_consider_all_candidates_bound" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev1 >> 4) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("param_iv_max_considered_uses" in var_opt_int_k) { ++ k = var_opt_int_k"param_iv_max_considered_uses" ++ j = var_opt_int_j"param_iv_max_considered_uses" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev1 >> 5) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("param_jump_table_max_growth_ratio_for_size" in var_opt_int_k) { ++ k = var_opt_int_k"param_jump_table_max_growth_ratio_for_size" ++ j = var_opt_int_j"param_jump_table_max_growth_ratio_for_size" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev1 >> 6) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("param_jump_table_max_growth_ratio_for_speed" in var_opt_int_k) { ++ k = var_opt_int_k"param_jump_table_max_growth_ratio_for_speed" ++ j = var_opt_int_j"param_jump_table_max_growth_ratio_for_speed" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev1 >> 7) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("param_l1_cache_line_size" in var_opt_int_k) { ++ k = var_opt_int_k"param_l1_cache_line_size" ++ j = var_opt_int_j"param_l1_cache_line_size" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev1 >> 8) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("param_l1_cache_size" in var_opt_int_k) { ++ k = var_opt_int_k"param_l1_cache_size" ++ j = var_opt_int_j"param_l1_cache_size" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev1 >> 9) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("param_l2_cache_size" in var_opt_int_k) { ++ k = var_opt_int_k"param_l2_cache_size" ++ j = var_opt_int_j"param_l2_cache_size" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev1 >> 10) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("param_large_function_growth" in var_opt_int_k) { ++ k = var_opt_int_k"param_large_function_growth" ++ j = var_opt_int_j"param_large_function_growth" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev1 >> 11) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("param_large_function_insns" in var_opt_int_k) { ++ k = var_opt_int_k"param_large_function_insns" ++ j = var_opt_int_j"param_large_function_insns" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev1 >> 12) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("param_stack_frame_growth" in var_opt_int_k) { ++ k = var_opt_int_k"param_stack_frame_growth" ++ j = var_opt_int_j"param_stack_frame_growth" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev1 >> 13) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("param_large_stack_frame" in var_opt_int_k) { ++ k = var_opt_int_k"param_large_stack_frame" ++ j = var_opt_int_j"param_large_stack_frame" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev1 >> 14) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("param_large_unit_insns" in var_opt_int_k) { ++ k = var_opt_int_k"param_large_unit_insns" ++ j = var_opt_int_j"param_large_unit_insns" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev1 >> 15) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("param_lim_expensive" in var_opt_int_k) { ++ k = var_opt_int_k"param_lim_expensive" ++ j = var_opt_int_j"param_lim_expensive" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev1 >> 16) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("param_loop_block_tile_size" in var_opt_int_k) { ++ k = var_opt_int_k"param_loop_block_tile_size" ++ j = var_opt_int_j"param_loop_block_tile_size" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev1 >> 17) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("param_loop_interchange_max_num_stmts" in var_opt_int_k) { ++ k = var_opt_int_k"param_loop_interchange_max_num_stmts" ++ j = var_opt_int_j"param_loop_interchange_max_num_stmts" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev1 >> 18) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("param_loop_interchange_stride_ratio" in var_opt_int_k) { ++ k = var_opt_int_k"param_loop_interchange_stride_ratio" ++ j = var_opt_int_j"param_loop_interchange_stride_ratio" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev1 >> 19) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("param_loop_invariant_max_bbs_in_loop" in var_opt_int_k) { ++ k = var_opt_int_k"param_loop_invariant_max_bbs_in_loop" ++ j = var_opt_int_j"param_loop_invariant_max_bbs_in_loop" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev1 >> 20) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("param_loop_max_datarefs_for_datadeps" in var_opt_int_k) { ++ k = var_opt_int_k"param_loop_max_datarefs_for_datadeps" ++ j = var_opt_int_j"param_loop_max_datarefs_for_datadeps" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev1 >> 21) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("param_loop_versioning_max_inner_insns" in var_opt_int_k) { ++ k = var_opt_int_k"param_loop_versioning_max_inner_insns" ++ j = var_opt_int_j"param_loop_versioning_max_inner_insns" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev1 >> 22) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("param_loop_versioning_max_outer_insns" in var_opt_int_k) { ++ k = var_opt_int_k"param_loop_versioning_max_outer_insns" ++ j = var_opt_int_j"param_loop_versioning_max_outer_insns" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev1 >> 23) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("param_lra_inheritance_ebb_probability_cutoff" in var_opt_int_k) { ++ k = var_opt_int_k"param_lra_inheritance_ebb_probability_cutoff" ++ j = var_opt_int_j"param_lra_inheritance_ebb_probability_cutoff" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev1 >> 24) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("param_lra_max_considered_reload_pseudos" in var_opt_int_k) { ++ k = var_opt_int_k"param_lra_max_considered_reload_pseudos" ++ j = var_opt_int_j"param_lra_max_considered_reload_pseudos" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev1 >> 25) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("param_max_average_unrolled_insns" in var_opt_int_k) { ++ k = var_opt_int_k"param_max_average_unrolled_insns" ++ j = var_opt_int_j"param_max_average_unrolled_insns" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev1 >> 26) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("param_max_combine_insns" in var_opt_int_k) { ++ k = var_opt_int_k"param_max_combine_insns" ++ j = var_opt_int_j"param_max_combine_insns" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev1 >> 27) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("param_max_unroll_iterations" in var_opt_int_k) { ++ k = var_opt_int_k"param_max_unroll_iterations" ++ j = var_opt_int_j"param_max_unroll_iterations" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev1 >> 28) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("param_max_completely_peel_times" in var_opt_int_k) { ++ k = var_opt_int_k"param_max_completely_peel_times" ++ j = var_opt_int_j"param_max_completely_peel_times" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev1 >> 29) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("param_max_completely_peeled_insns" in var_opt_int_k) { ++ k = var_opt_int_k"param_max_completely_peeled_insns" ++ j = var_opt_int_j"param_max_completely_peeled_insns" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev1 >> 30) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("param_max_crossjump_edges" in var_opt_int_k) { ++ k = var_opt_int_k"param_max_crossjump_edges" ++ j = var_opt_int_j"param_max_crossjump_edges" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev1 >> 31) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("param_max_cse_insns" in var_opt_int_k) { ++ k = var_opt_int_k"param_max_cse_insns" ++ j = var_opt_int_j"param_max_cse_insns" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev1 >> 32) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("param_max_cse_path_length" in var_opt_int_k) { ++ k = var_opt_int_k"param_max_cse_path_length" ++ j = var_opt_int_j"param_max_cse_path_length" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev1 >> 33) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("param_max_cselib_memory_locations" in var_opt_int_k) { ++ k = var_opt_int_k"param_max_cselib_memory_locations" ++ j = var_opt_int_j"param_max_cselib_memory_locations" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev1 >> 34) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("param_max_debug_marker_count" in var_opt_int_k) { ++ k = var_opt_int_k"param_max_debug_marker_count" ++ j = var_opt_int_j"param_max_debug_marker_count" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev1 >> 35) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("param_max_delay_slot_insn_search" in var_opt_int_k) { ++ k = var_opt_int_k"param_max_delay_slot_insn_search" ++ j = var_opt_int_j"param_max_delay_slot_insn_search" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev1 >> 36) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("param_max_delay_slot_live_search" in var_opt_int_k) { ++ k = var_opt_int_k"param_max_delay_slot_live_search" ++ j = var_opt_int_j"param_max_delay_slot_live_search" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev1 >> 37) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("param_max_dse_active_local_stores" in var_opt_int_k) { ++ k = var_opt_int_k"param_max_dse_active_local_stores" ++ j = var_opt_int_j"param_max_dse_active_local_stores" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev1 >> 38) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("param_early_inliner_max_iterations" in var_opt_int_k) { ++ k = var_opt_int_k"param_early_inliner_max_iterations" ++ j = var_opt_int_j"param_early_inliner_max_iterations" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev1 >> 39) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("param_max_find_base_term_values" in var_opt_int_k) { ++ k = var_opt_int_k"param_max_find_base_term_values" ++ j = var_opt_int_j"param_max_find_base_term_values" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev1 >> 40) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("param_max_fsm_thread_length" in var_opt_int_k) { ++ k = var_opt_int_k"param_max_fsm_thread_length" ++ j = var_opt_int_j"param_max_fsm_thread_length" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev1 >> 41) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("param_max_fsm_thread_path_insns" in var_opt_int_k) { ++ k = var_opt_int_k"param_max_fsm_thread_path_insns" ++ j = var_opt_int_j"param_max_fsm_thread_path_insns" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev1 >> 42) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("param_max_gcse_insertion_ratio" in var_opt_int_k) { ++ k = var_opt_int_k"param_max_gcse_insertion_ratio" ++ j = var_opt_int_j"param_max_gcse_insertion_ratio" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev1 >> 43) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("param_max_gcse_memory" in var_opt_int_k) { ++ k = var_opt_int_k"param_max_gcse_memory" ++ j = var_opt_int_j"param_max_gcse_memory" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev1 >> 44) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("param_max_goto_duplication_insns" in var_opt_int_k) { ++ k = var_opt_int_k"param_max_goto_duplication_insns" ++ j = var_opt_int_j"param_max_goto_duplication_insns" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev1 >> 45) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("param_max_grow_copy_bb_insns" in var_opt_int_k) { ++ k = var_opt_int_k"param_max_grow_copy_bb_insns" ++ j = var_opt_int_j"param_max_grow_copy_bb_insns" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev1 >> 46) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("param_max_hoist_depth" in var_opt_int_k) { ++ k = var_opt_int_k"param_max_hoist_depth" ++ j = var_opt_int_j"param_max_hoist_depth" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev1 >> 47) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("param_inline_functions_called_once_insns" in var_opt_int_k) { ++ k = var_opt_int_k"param_inline_functions_called_once_insns" ++ j = var_opt_int_j"param_inline_functions_called_once_insns" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev1 >> 48) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("param_inline_functions_called_once_loop_depth" in var_opt_int_k) { ++ k = var_opt_int_k"param_inline_functions_called_once_loop_depth" ++ j = var_opt_int_j"param_inline_functions_called_once_loop_depth" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev1 >> 49) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("param_max_inline_insns_auto" in var_opt_int_k) { ++ k = var_opt_int_k"param_max_inline_insns_auto" ++ j = var_opt_int_j"param_max_inline_insns_auto" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev1 >> 50) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("param_max_inline_insns_recursive_auto" in var_opt_int_k) { ++ k = var_opt_int_k"param_max_inline_insns_recursive_auto" ++ j = var_opt_int_j"param_max_inline_insns_recursive_auto" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev1 >> 51) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("param_max_inline_insns_recursive" in var_opt_int_k) { ++ k = var_opt_int_k"param_max_inline_insns_recursive" ++ j = var_opt_int_j"param_max_inline_insns_recursive" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev1 >> 52) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("param_max_inline_insns_single" in var_opt_int_k) { ++ k = var_opt_int_k"param_max_inline_insns_single" ++ j = var_opt_int_j"param_max_inline_insns_single" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev1 >> 53) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("param_max_inline_insns_size" in var_opt_int_k) { ++ k = var_opt_int_k"param_max_inline_insns_size" ++ j = var_opt_int_j"param_max_inline_insns_size" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev1 >> 54) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("param_max_inline_insns_small" in var_opt_int_k) { ++ k = var_opt_int_k"param_max_inline_insns_small" ++ j = var_opt_int_j"param_max_inline_insns_small" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev1 >> 55) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("param_max_inline_recursive_depth_auto" in var_opt_int_k) { ++ k = var_opt_int_k"param_max_inline_recursive_depth_auto" ++ j = var_opt_int_j"param_max_inline_recursive_depth_auto" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev1 >> 56) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("param_max_inline_recursive_depth" in var_opt_int_k) { ++ k = var_opt_int_k"param_max_inline_recursive_depth" ++ j = var_opt_int_j"param_max_inline_recursive_depth" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev1 >> 57) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("param_max_isl_operations" in var_opt_int_k) { ++ k = var_opt_int_k"param_max_isl_operations" ++ j = var_opt_int_j"param_max_isl_operations" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev1 >> 58) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("param_max_iterations_computation_cost" in var_opt_int_k) { ++ k = var_opt_int_k"param_max_iterations_computation_cost" ++ j = var_opt_int_j"param_max_iterations_computation_cost" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev1 >> 59) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("param_max_iterations_to_track" in var_opt_int_k) { ++ k = var_opt_int_k"param_max_iterations_to_track" ++ j = var_opt_int_j"param_max_iterations_to_track" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev1 >> 60) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("param_max_jump_thread_duplication_stmts" in var_opt_int_k) { ++ k = var_opt_int_k"param_max_jump_thread_duplication_stmts" ++ j = var_opt_int_j"param_max_jump_thread_duplication_stmts" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev1 >> 61) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("param_max_last_value_rtl" in var_opt_int_k) { ++ k = var_opt_int_k"param_max_last_value_rtl" ++ j = var_opt_int_j"param_max_last_value_rtl" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev1 >> 62) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("param_max_loop_header_insns" in var_opt_int_k) { ++ k = var_opt_int_k"param_max_loop_header_insns" ++ j = var_opt_int_j"param_max_loop_header_insns" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev1 >> 63) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("param_max_modulo_backtrack_attempts" in var_opt_int_k) { ++ k = var_opt_int_k"param_max_modulo_backtrack_attempts" ++ j = var_opt_int_j"param_max_modulo_backtrack_attempts" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev2 >> 0) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("param_max_partial_antic_length" in var_opt_int_k) { ++ k = var_opt_int_k"param_max_partial_antic_length" ++ j = var_opt_int_j"param_max_partial_antic_length" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev2 >> 1) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("param_max_peel_branches" in var_opt_int_k) { ++ k = var_opt_int_k"param_max_peel_branches" ++ j = var_opt_int_j"param_max_peel_branches" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev2 >> 2) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("param_max_peel_times" in var_opt_int_k) { ++ k = var_opt_int_k"param_max_peel_times" ++ j = var_opt_int_j"param_max_peel_times" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev2 >> 3) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("param_max_peeled_insns" in var_opt_int_k) { ++ k = var_opt_int_k"param_max_peeled_insns" ++ j = var_opt_int_j"param_max_peeled_insns" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev2 >> 4) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("param_max_pending_list_length" in var_opt_int_k) { ++ k = var_opt_int_k"param_max_pending_list_length" ++ j = var_opt_int_j"param_max_pending_list_length" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev2 >> 5) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("param_max_pipeline_region_blocks" in var_opt_int_k) { ++ k = var_opt_int_k"param_max_pipeline_region_blocks" ++ j = var_opt_int_j"param_max_pipeline_region_blocks" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev2 >> 6) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("param_max_pipeline_region_insns" in var_opt_int_k) { ++ k = var_opt_int_k"param_max_pipeline_region_insns" ++ j = var_opt_int_j"param_max_pipeline_region_insns" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev2 >> 7) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("param_max_pow_sqrt_depth" in var_opt_int_k) { ++ k = var_opt_int_k"param_max_pow_sqrt_depth" ++ j = var_opt_int_j"param_max_pow_sqrt_depth" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev2 >> 8) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("param_max_predicted_iterations" in var_opt_int_k) { ++ k = var_opt_int_k"param_max_predicted_iterations" ++ j = var_opt_int_j"param_max_predicted_iterations" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev2 >> 9) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("param_max_reload_search_insns" in var_opt_int_k) { ++ k = var_opt_int_k"param_max_reload_search_insns" ++ j = var_opt_int_j"param_max_reload_search_insns" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev2 >> 10) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("param_max_rtl_if_conversion_insns" in var_opt_int_k) { ++ k = var_opt_int_k"param_max_rtl_if_conversion_insns" ++ j = var_opt_int_j"param_max_rtl_if_conversion_insns" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev2 >> 11) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("param_max_rtl_if_conversion_predictable_cost" in var_opt_int_k) { ++ k = var_opt_int_k"param_max_rtl_if_conversion_predictable_cost" ++ j = var_opt_int_j"param_max_rtl_if_conversion_predictable_cost" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev2 >> 12) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("param_max_rtl_if_conversion_unpredictable_cost" in var_opt_int_k) { ++ k = var_opt_int_k"param_max_rtl_if_conversion_unpredictable_cost" ++ j = var_opt_int_j"param_max_rtl_if_conversion_unpredictable_cost" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev2 >> 13) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("param_max_sched_extend_regions_iters" in var_opt_int_k) { ++ k = var_opt_int_k"param_max_sched_extend_regions_iters" ++ j = var_opt_int_j"param_max_sched_extend_regions_iters" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev2 >> 14) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("param_max_sched_insn_conflict_delay" in var_opt_int_k) { ++ k = var_opt_int_k"param_max_sched_insn_conflict_delay" ++ j = var_opt_int_j"param_max_sched_insn_conflict_delay" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev2 >> 15) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("param_max_sched_ready_insns" in var_opt_int_k) { ++ k = var_opt_int_k"param_max_sched_ready_insns" ++ j = var_opt_int_j"param_max_sched_ready_insns" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev2 >> 16) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("param_max_sched_region_blocks" in var_opt_int_k) { ++ k = var_opt_int_k"param_max_sched_region_blocks" ++ j = var_opt_int_j"param_max_sched_region_blocks" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev2 >> 17) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("param_max_sched_region_insns" in var_opt_int_k) { ++ k = var_opt_int_k"param_max_sched_region_insns" ++ j = var_opt_int_j"param_max_sched_region_insns" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev2 >> 18) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("param_max_slsr_candidate_scan" in var_opt_int_k) { ++ k = var_opt_int_k"param_max_slsr_candidate_scan" ++ j = var_opt_int_j"param_max_slsr_candidate_scan" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev2 >> 19) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("param_max_speculative_devirt_maydefs" in var_opt_int_k) { ++ k = var_opt_int_k"param_max_speculative_devirt_maydefs" ++ j = var_opt_int_j"param_max_speculative_devirt_maydefs" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev2 >> 20) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("param_max_stores_to_merge" in var_opt_int_k) { ++ k = var_opt_int_k"param_max_stores_to_merge" ++ j = var_opt_int_j"param_max_stores_to_merge" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev2 >> 21) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("param_max_stores_to_sink" in var_opt_int_k) { ++ k = var_opt_int_k"param_max_stores_to_sink" ++ j = var_opt_int_j"param_max_stores_to_sink" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev2 >> 22) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("param_max_tail_merge_comparisons" in var_opt_int_k) { ++ k = var_opt_int_k"param_max_tail_merge_comparisons" ++ j = var_opt_int_j"param_max_tail_merge_comparisons" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev2 >> 23) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("param_max_tail_merge_iterations" in var_opt_int_k) { ++ k = var_opt_int_k"param_max_tail_merge_iterations" ++ j = var_opt_int_j"param_max_tail_merge_iterations" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev2 >> 24) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("param_max_tracked_strlens" in var_opt_int_k) { ++ k = var_opt_int_k"param_max_tracked_strlens" ++ j = var_opt_int_j"param_max_tracked_strlens" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev2 >> 25) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("param_max_tree_if_conversion_phi_args" in var_opt_int_k) { ++ k = var_opt_int_k"param_max_tree_if_conversion_phi_args" ++ j = var_opt_int_j"param_max_tree_if_conversion_phi_args" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev2 >> 26) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("param_max_unroll_times" in var_opt_int_k) { ++ k = var_opt_int_k"param_max_unroll_times" ++ j = var_opt_int_j"param_max_unroll_times" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev2 >> 27) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("param_max_unrolled_insns" in var_opt_int_k) { ++ k = var_opt_int_k"param_max_unrolled_insns" ++ j = var_opt_int_j"param_max_unrolled_insns" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev2 >> 28) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("param_max_unswitch_insns" in var_opt_int_k) { ++ k = var_opt_int_k"param_max_unswitch_insns" ++ j = var_opt_int_j"param_max_unswitch_insns" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev2 >> 29) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("param_max_unswitch_level" in var_opt_int_k) { ++ k = var_opt_int_k"param_max_unswitch_level" ++ j = var_opt_int_j"param_max_unswitch_level" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev2 >> 30) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("param_max_variable_expansions" in var_opt_int_k) { ++ k = var_opt_int_k"param_max_variable_expansions" ++ j = var_opt_int_j"param_max_variable_expansions" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev2 >> 31) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("param_max_vartrack_expr_depth" in var_opt_int_k) { ++ k = var_opt_int_k"param_max_vartrack_expr_depth" ++ j = var_opt_int_j"param_max_vartrack_expr_depth" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev2 >> 32) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("param_max_vartrack_reverse_op_size" in var_opt_int_k) { ++ k = var_opt_int_k"param_max_vartrack_reverse_op_size" ++ j = var_opt_int_j"param_max_vartrack_reverse_op_size" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev2 >> 33) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("param_max_vartrack_size" in var_opt_int_k) { ++ k = var_opt_int_k"param_max_vartrack_size" ++ j = var_opt_int_j"param_max_vartrack_size" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev2 >> 34) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("param_max_vrp_switch_assertions" in var_opt_int_k) { ++ k = var_opt_int_k"param_max_vrp_switch_assertions" ++ j = var_opt_int_j"param_max_vrp_switch_assertions" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev2 >> 35) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("param_min_crossjump_insns" in var_opt_int_k) { ++ k = var_opt_int_k"param_min_crossjump_insns" ++ j = var_opt_int_j"param_min_crossjump_insns" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev2 >> 36) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("param_min_inline_recursive_probability" in var_opt_int_k) { ++ k = var_opt_int_k"param_min_inline_recursive_probability" ++ j = var_opt_int_j"param_min_inline_recursive_probability" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev2 >> 37) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("param_min_insn_to_prefetch_ratio" in var_opt_int_k) { ++ k = var_opt_int_k"param_min_insn_to_prefetch_ratio" ++ j = var_opt_int_j"param_min_insn_to_prefetch_ratio" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev2 >> 38) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("param_min_loop_cond_split_prob" in var_opt_int_k) { ++ k = var_opt_int_k"param_min_loop_cond_split_prob" ++ j = var_opt_int_j"param_min_loop_cond_split_prob" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev2 >> 39) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("param_min_pagesize" in var_opt_int_k) { ++ k = var_opt_int_k"param_min_pagesize" ++ j = var_opt_int_j"param_min_pagesize" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev2 >> 40) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("param_min_size_for_stack_sharing" in var_opt_int_k) { ++ k = var_opt_int_k"param_min_size_for_stack_sharing" ++ j = var_opt_int_j"param_min_size_for_stack_sharing" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev2 >> 41) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("param_min_spec_prob" in var_opt_int_k) { ++ k = var_opt_int_k"param_min_spec_prob" ++ j = var_opt_int_j"param_min_spec_prob" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev2 >> 42) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("param_min_vect_loop_bound" in var_opt_int_k) { ++ k = var_opt_int_k"param_min_vect_loop_bound" ++ j = var_opt_int_j"param_min_vect_loop_bound" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev2 >> 43) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("param_modref_max_accesses" in var_opt_int_k) { ++ k = var_opt_int_k"param_modref_max_accesses" ++ j = var_opt_int_j"param_modref_max_accesses" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev2 >> 44) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("param_modref_max_adjustments" in var_opt_int_k) { ++ k = var_opt_int_k"param_modref_max_adjustments" ++ j = var_opt_int_j"param_modref_max_adjustments" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev2 >> 45) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("param_modref_max_bases" in var_opt_int_k) { ++ k = var_opt_int_k"param_modref_max_bases" ++ j = var_opt_int_j"param_modref_max_bases" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev2 >> 46) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("param_modref_max_depth" in var_opt_int_k) { ++ k = var_opt_int_k"param_modref_max_depth" ++ j = var_opt_int_j"param_modref_max_depth" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev2 >> 47) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("param_modref_max_escape_points" in var_opt_int_k) { ++ k = var_opt_int_k"param_modref_max_escape_points" ++ j = var_opt_int_j"param_modref_max_escape_points" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev2 >> 48) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("param_modref_max_refs" in var_opt_int_k) { ++ k = var_opt_int_k"param_modref_max_refs" ++ j = var_opt_int_j"param_modref_max_refs" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev2 >> 49) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("param_modref_max_tests" in var_opt_int_k) { ++ k = var_opt_int_k"param_modref_max_tests" ++ j = var_opt_int_j"param_modref_max_tests" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev2 >> 50) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("param_ldp_dependency_search_range" in var_opt_int_k) { ++ k = var_opt_int_k"param_ldp_dependency_search_range" ++ j = var_opt_int_j"param_ldp_dependency_search_range" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev2 >> 51) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("param_parloops_chunk_size" in var_opt_int_k) { ++ k = var_opt_int_k"param_parloops_chunk_size" ++ j = var_opt_int_j"param_parloops_chunk_size" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev2 >> 52) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("param_parloops_min_per_thread" in var_opt_int_k) { ++ k = var_opt_int_k"param_parloops_min_per_thread" ++ j = var_opt_int_j"param_parloops_min_per_thread" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev2 >> 53) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("param_parloops_schedule" in var_opt_int_k) { ++ k = var_opt_int_k"param_parloops_schedule" ++ j = var_opt_int_j"param_parloops_schedule" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev2 >> 54) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("param_partial_inlining_entry_probability" in var_opt_int_k) { ++ k = var_opt_int_k"param_partial_inlining_entry_probability" ++ j = var_opt_int_j"param_partial_inlining_entry_probability" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev2 >> 55) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("param_predictable_branch_outcome" in var_opt_int_k) { ++ k = var_opt_int_k"param_predictable_branch_outcome" ++ j = var_opt_int_j"param_predictable_branch_outcome" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev2 >> 56) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("param_prefetch_dynamic_strides" in var_opt_int_k) { ++ k = var_opt_int_k"param_prefetch_dynamic_strides" ++ j = var_opt_int_j"param_prefetch_dynamic_strides" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev2 >> 57) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("param_prefetch_latency" in var_opt_int_k) { ++ k = var_opt_int_k"param_prefetch_latency" ++ j = var_opt_int_j"param_prefetch_latency" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev2 >> 58) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("param_prefetch_min_insn_to_mem_ratio" in var_opt_int_k) { ++ k = var_opt_int_k"param_prefetch_min_insn_to_mem_ratio" ++ j = var_opt_int_j"param_prefetch_min_insn_to_mem_ratio" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev2 >> 59) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("param_prefetch_minimum_stride" in var_opt_int_k) { ++ k = var_opt_int_k"param_prefetch_minimum_stride" ++ j = var_opt_int_j"param_prefetch_minimum_stride" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev2 >> 60) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("param_ranger_logical_depth" in var_opt_int_k) { ++ k = var_opt_int_k"param_ranger_logical_depth" ++ j = var_opt_int_j"param_ranger_logical_depth" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev2 >> 61) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("param_relation_block_limit" in var_opt_int_k) { ++ k = var_opt_int_k"param_relation_block_limit" ++ j = var_opt_int_j"param_relation_block_limit" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev2 >> 62) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("param_rpo_vn_max_loop_depth" in var_opt_int_k) { ++ k = var_opt_int_k"param_rpo_vn_max_loop_depth" ++ j = var_opt_int_j"param_rpo_vn_max_loop_depth" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev2 >> 63) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("param_sccvn_max_alias_queries_per_access" in var_opt_int_k) { ++ k = var_opt_int_k"param_sccvn_max_alias_queries_per_access" ++ j = var_opt_int_j"param_sccvn_max_alias_queries_per_access" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev3 >> 0) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("param_scev_max_expr_complexity" in var_opt_int_k) { ++ k = var_opt_int_k"param_scev_max_expr_complexity" ++ j = var_opt_int_j"param_scev_max_expr_complexity" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev3 >> 1) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("param_scev_max_expr_size" in var_opt_int_k) { ++ k = var_opt_int_k"param_scev_max_expr_size" ++ j = var_opt_int_j"param_scev_max_expr_size" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev3 >> 2) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("param_sched_autopref_queue_depth" in var_opt_int_k) { ++ k = var_opt_int_k"param_sched_autopref_queue_depth" ++ j = var_opt_int_j"param_sched_autopref_queue_depth" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev3 >> 3) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("param_sched_mem_true_dep_cost" in var_opt_int_k) { ++ k = var_opt_int_k"param_sched_mem_true_dep_cost" ++ j = var_opt_int_j"param_sched_mem_true_dep_cost" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev3 >> 4) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("param_sched_pressure_algorithm" in var_opt_int_k) { ++ k = var_opt_int_k"param_sched_pressure_algorithm" ++ j = var_opt_int_j"param_sched_pressure_algorithm" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev3 >> 5) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("param_sched_spec_prob_cutoff" in var_opt_int_k) { ++ k = var_opt_int_k"param_sched_spec_prob_cutoff" ++ j = var_opt_int_j"param_sched_spec_prob_cutoff" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev3 >> 6) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("param_sched_state_edge_prob_cutoff" in var_opt_int_k) { ++ k = var_opt_int_k"param_sched_state_edge_prob_cutoff" ++ j = var_opt_int_j"param_sched_state_edge_prob_cutoff" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev3 >> 7) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("param_selsched_insns_to_rename" in var_opt_int_k) { ++ k = var_opt_int_k"param_selsched_insns_to_rename" ++ j = var_opt_int_j"param_selsched_insns_to_rename" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev3 >> 8) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("param_selsched_max_lookahead" in var_opt_int_k) { ++ k = var_opt_int_k"param_selsched_max_lookahead" ++ j = var_opt_int_j"param_selsched_max_lookahead" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev3 >> 9) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("param_selsched_max_sched_times" in var_opt_int_k) { ++ k = var_opt_int_k"param_selsched_max_sched_times" ++ j = var_opt_int_j"param_selsched_max_sched_times" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev3 >> 10) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("semi_relayout_level" in var_opt_int_k) { ++ k = var_opt_int_k"semi_relayout_level" ++ j = var_opt_int_j"semi_relayout_level" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev3 >> 11) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("param_simultaneous_prefetches" in var_opt_int_k) { ++ k = var_opt_int_k"param_simultaneous_prefetches" ++ j = var_opt_int_j"param_simultaneous_prefetches" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev3 >> 12) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("param_sink_frequency_threshold" in var_opt_int_k) { ++ k = var_opt_int_k"param_sink_frequency_threshold" ++ j = var_opt_int_j"param_sink_frequency_threshold" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev3 >> 13) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("param_sms_dfa_history" in var_opt_int_k) { ++ k = var_opt_int_k"param_sms_dfa_history" ++ j = var_opt_int_j"param_sms_dfa_history" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev3 >> 14) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("param_sms_loop_average_count_threshold" in var_opt_int_k) { ++ k = var_opt_int_k"param_sms_loop_average_count_threshold" ++ j = var_opt_int_j"param_sms_loop_average_count_threshold" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev3 >> 15) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("param_sms_max_ii_factor" in var_opt_int_k) { ++ k = var_opt_int_k"param_sms_max_ii_factor" ++ j = var_opt_int_j"param_sms_max_ii_factor" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev3 >> 16) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("param_sms_min_sc" in var_opt_int_k) { ++ k = var_opt_int_k"param_sms_min_sc" ++ j = var_opt_int_j"param_sms_min_sc" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev3 >> 17) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("param_sra_max_propagations" in var_opt_int_k) { ++ k = var_opt_int_k"param_sra_max_propagations" ++ j = var_opt_int_j"param_sra_max_propagations" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev3 >> 18) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("param_sra_max_scalarization_size_size" in var_opt_int_k) { ++ k = var_opt_int_k"param_sra_max_scalarization_size_size" ++ j = var_opt_int_j"param_sra_max_scalarization_size_size" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev3 >> 19) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("param_sra_max_scalarization_size_speed" in var_opt_int_k) { ++ k = var_opt_int_k"param_sra_max_scalarization_size_speed" ++ j = var_opt_int_j"param_sra_max_scalarization_size_speed" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev3 >> 20) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("param_ssa_name_def_chain_limit" in var_opt_int_k) { ++ k = var_opt_int_k"param_ssa_name_def_chain_limit" ++ j = var_opt_int_j"param_ssa_name_def_chain_limit" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev3 >> 21) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("param_ssp_buffer_size" in var_opt_int_k) { ++ k = var_opt_int_k"param_ssp_buffer_size" ++ j = var_opt_int_j"param_ssp_buffer_size" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev3 >> 22) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("param_stack_clash_protection_guard_size" in var_opt_int_k) { ++ k = var_opt_int_k"param_stack_clash_protection_guard_size" ++ j = var_opt_int_j"param_stack_clash_protection_guard_size" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev3 >> 23) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("param_stack_clash_protection_probe_interval" in var_opt_int_k) { ++ k = var_opt_int_k"param_stack_clash_protection_probe_interval" ++ j = var_opt_int_j"param_stack_clash_protection_probe_interval" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev3 >> 24) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("param_store_merging_allow_unaligned" in var_opt_int_k) { ++ k = var_opt_int_k"param_store_merging_allow_unaligned" ++ j = var_opt_int_j"param_store_merging_allow_unaligned" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev3 >> 25) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("param_store_merging_max_size" in var_opt_int_k) { ++ k = var_opt_int_k"param_store_merging_max_size" ++ j = var_opt_int_j"param_store_merging_max_size" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev3 >> 26) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("param_struct_reorg_cold_struct_ratio" in var_opt_int_k) { ++ k = var_opt_int_k"param_struct_reorg_cold_struct_ratio" ++ j = var_opt_int_j"param_struct_reorg_cold_struct_ratio" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev3 >> 27) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("param_switch_conversion_branch_ratio" in var_opt_int_k) { ++ k = var_opt_int_k"param_switch_conversion_branch_ratio" ++ j = var_opt_int_j"param_switch_conversion_branch_ratio" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev3 >> 28) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("param_tm_max_aggregate_size" in var_opt_int_k) { ++ k = var_opt_int_k"param_tm_max_aggregate_size" ++ j = var_opt_int_j"param_tm_max_aggregate_size" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev3 >> 29) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("param_tracer_dynamic_coverage_feedback" in var_opt_int_k) { ++ k = var_opt_int_k"param_tracer_dynamic_coverage_feedback" ++ j = var_opt_int_j"param_tracer_dynamic_coverage_feedback" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev3 >> 30) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("param_tracer_dynamic_coverage" in var_opt_int_k) { ++ k = var_opt_int_k"param_tracer_dynamic_coverage" ++ j = var_opt_int_j"param_tracer_dynamic_coverage" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev3 >> 31) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("param_tracer_max_code_growth" in var_opt_int_k) { ++ k = var_opt_int_k"param_tracer_max_code_growth" ++ j = var_opt_int_j"param_tracer_max_code_growth" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev3 >> 32) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("param_tracer_min_branch_probability_feedback" in var_opt_int_k) { ++ k = var_opt_int_k"param_tracer_min_branch_probability_feedback" ++ j = var_opt_int_j"param_tracer_min_branch_probability_feedback" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev3 >> 33) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("param_tracer_min_branch_probability" in var_opt_int_k) { ++ k = var_opt_int_k"param_tracer_min_branch_probability" ++ j = var_opt_int_j"param_tracer_min_branch_probability" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev3 >> 34) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("param_tracer_min_branch_ratio" in var_opt_int_k) { ++ k = var_opt_int_k"param_tracer_min_branch_ratio" ++ j = var_opt_int_j"param_tracer_min_branch_ratio" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev3 >> 35) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("param_tree_reassoc_width" in var_opt_int_k) { ++ k = var_opt_int_k"param_tree_reassoc_width" ++ j = var_opt_int_j"param_tree_reassoc_width" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev3 >> 36) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("param_uninit_control_dep_attempts" in var_opt_int_k) { ++ k = var_opt_int_k"param_uninit_control_dep_attempts" ++ j = var_opt_int_j"param_uninit_control_dep_attempts" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev3 >> 37) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("param_uninlined_function_insns" in var_opt_int_k) { ++ k = var_opt_int_k"param_uninlined_function_insns" ++ j = var_opt_int_j"param_uninlined_function_insns" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev3 >> 38) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("param_uninlined_function_time" in var_opt_int_k) { ++ k = var_opt_int_k"param_uninlined_function_time" ++ j = var_opt_int_j"param_uninlined_function_time" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev3 >> 39) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("param_uninlined_function_thunk_insns" in var_opt_int_k) { ++ k = var_opt_int_k"param_uninlined_function_thunk_insns" ++ j = var_opt_int_j"param_uninlined_function_thunk_insns" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev3 >> 40) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("param_uninlined_function_thunk_time" in var_opt_int_k) { ++ k = var_opt_int_k"param_uninlined_function_thunk_time" ++ j = var_opt_int_j"param_uninlined_function_thunk_time" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev3 >> 41) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("param_unlikely_bb_count_fraction" in var_opt_int_k) { ++ k = var_opt_int_k"param_unlikely_bb_count_fraction" ++ j = var_opt_int_j"param_unlikely_bb_count_fraction" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev3 >> 42) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("param_unroll_jam_max_unroll" in var_opt_int_k) { ++ k = var_opt_int_k"param_unroll_jam_max_unroll" ++ j = var_opt_int_j"param_unroll_jam_max_unroll" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev3 >> 43) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("param_unroll_jam_min_percent" in var_opt_int_k) { ++ k = var_opt_int_k"param_unroll_jam_min_percent" ++ j = var_opt_int_j"param_unroll_jam_min_percent" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev3 >> 44) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("param_use_after_scope_direct_emission_threshold" in var_opt_int_k) { ++ k = var_opt_int_k"param_use_after_scope_direct_emission_threshold" ++ j = var_opt_int_j"param_use_after_scope_direct_emission_threshold" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev3 >> 45) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("param_flexible_seg_len" in var_opt_int_k) { ++ k = var_opt_int_k"param_flexible_seg_len" ++ j = var_opt_int_j"param_flexible_seg_len" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev3 >> 46) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("param_vect_epilogues_nomask" in var_opt_int_k) { ++ k = var_opt_int_k"param_vect_epilogues_nomask" ++ j = var_opt_int_j"param_vect_epilogues_nomask" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev3 >> 47) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("param_vect_induction_float" in var_opt_int_k) { ++ k = var_opt_int_k"param_vect_induction_float" ++ j = var_opt_int_j"param_vect_induction_float" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev3 >> 48) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("param_vect_inner_loop_cost_factor" in var_opt_int_k) { ++ k = var_opt_int_k"param_vect_inner_loop_cost_factor" ++ j = var_opt_int_j"param_vect_inner_loop_cost_factor" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev3 >> 49) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("param_vect_max_peeling_for_alignment" in var_opt_int_k) { ++ k = var_opt_int_k"param_vect_max_peeling_for_alignment" ++ j = var_opt_int_j"param_vect_max_peeling_for_alignment" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev3 >> 50) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("param_vect_max_version_for_alias_checks" in var_opt_int_k) { ++ k = var_opt_int_k"param_vect_max_version_for_alias_checks" ++ j = var_opt_int_j"param_vect_max_version_for_alias_checks" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev3 >> 51) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("param_vect_max_version_for_alignment_checks" in var_opt_int_k) { ++ k = var_opt_int_k"param_vect_max_version_for_alignment_checks" ++ j = var_opt_int_j"param_vect_max_version_for_alignment_checks" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev3 >> 52) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("param_vect_partial_vector_usage" in var_opt_int_k) { ++ k = var_opt_int_k"param_vect_partial_vector_usage" ++ j = var_opt_int_j"param_vect_partial_vector_usage" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev3 >> 53) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("flag_sched_stalled_insns" in var_opt_int_k) { ++ k = var_opt_int_k"flag_sched_stalled_insns" ++ j = var_opt_int_j"flag_sched_stalled_insns" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev3 >> 54) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("flag_sched_stalled_insns_dep" in var_opt_int_k) { ++ k = var_opt_int_k"flag_sched_stalled_insns_dep" ++ j = var_opt_int_j"flag_sched_stalled_insns_dep" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev3 >> 55) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("flag_tree_parallelize_loops" in var_opt_int_k) { ++ k = var_opt_int_k"flag_tree_parallelize_loops" ++ j = var_opt_int_j"flag_tree_parallelize_loops" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev3 >> 56) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("param_evrp_mode" in var_opt_enum_k) { ++ k = var_opt_enum_k"param_evrp_mode" ++ j = var_opt_enum_j"param_evrp_mode" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev3 >> 57) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("param_ranger_debug" in var_opt_enum_k) { ++ k = var_opt_enum_k"param_ranger_debug" ++ j = var_opt_enum_j"param_ranger_debug" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev3 >> 58) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("param_threader_debug" in var_opt_enum_k) { ++ k = var_opt_enum_k"param_threader_debug" ++ j = var_opt_enum_j"param_threader_debug" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev3 >> 59) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("param_vrp1_mode" in var_opt_enum_k) { ++ k = var_opt_enum_k"param_vrp1_mode" ++ j = var_opt_enum_j"param_vrp1_mode" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev3 >> 60) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("param_vrp2_mode" in var_opt_enum_k) { ++ k = var_opt_enum_k"param_vrp2_mode" ++ j = var_opt_enum_j"param_vrp2_mode" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev3 >> 61) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("flag_excess_precision" in var_opt_enum_k) { ++ k = var_opt_enum_k"flag_excess_precision" ++ j = var_opt_enum_j"flag_excess_precision" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev3 >> 62) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("flag_fp_contract_mode" in var_opt_enum_k) { ++ k = var_opt_enum_k"flag_fp_contract_mode" ++ j = var_opt_enum_j"flag_fp_contract_mode" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev3 >> 63) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("flag_ira_algorithm" in var_opt_enum_k) { ++ k = var_opt_enum_k"flag_ira_algorithm" ++ j = var_opt_enum_j"flag_ira_algorithm" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev4 >> 0) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("flag_ira_region" in var_opt_enum_k) { ++ k = var_opt_enum_k"flag_ira_region" ++ j = var_opt_enum_j"flag_ira_region" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev4 >> 1) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("flag_live_patching" in var_opt_enum_k) { ++ k = var_opt_enum_k"flag_live_patching" ++ j = var_opt_enum_j"flag_live_patching" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev4 >> 2) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("flag_fp_model" in var_opt_enum_k) { ++ k = var_opt_enum_k"flag_fp_model" ++ j = var_opt_enum_j"flag_fp_model" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev4 >> 3) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("flag_reorder_blocks_algorithm" in var_opt_enum_k) { ++ k = var_opt_enum_k"flag_reorder_blocks_algorithm" ++ j = var_opt_enum_j"flag_reorder_blocks_algorithm" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev4 >> 4) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("flag_simd_cost_model" in var_opt_enum_k) { ++ k = var_opt_enum_k"flag_simd_cost_model" ++ j = var_opt_enum_j"flag_simd_cost_model" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev4 >> 5) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("flag_stack_reuse" in var_opt_enum_k) { ++ k = var_opt_enum_k"flag_stack_reuse" ++ j = var_opt_enum_j"flag_stack_reuse" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev4 >> 6) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("flag_auto_var_init" in var_opt_enum_k) { ++ k = var_opt_enum_k"flag_auto_var_init" ++ j = var_opt_enum_j"flag_auto_var_init" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev4 >> 7) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("flag_vect_cost_model" in var_opt_enum_k) { ++ k = var_opt_enum_k"flag_vect_cost_model" ++ j = var_opt_enum_j"flag_vect_cost_model" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev4 >> 8) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("optimize" in var_opt_char_k) { ++ k = var_opt_char_k"optimize" ++ j = var_opt_char_j"optimize" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev4 >> 9) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("optimize_size" in var_opt_char_k) { ++ k = var_opt_char_k"optimize_size" ++ j = var_opt_char_j"optimize_size" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev4 >> 10) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("optimize_debug" in var_opt_char_k) { ++ k = var_opt_char_k"optimize_debug" ++ j = var_opt_char_j"optimize_debug" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev4 >> 11) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("optimize_fast" in var_opt_char_k) { ++ k = var_opt_char_k"optimize_fast" ++ j = var_opt_char_j"optimize_fast" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev4 >> 12) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("warn_inline" in var_opt_char_k) { ++ k = var_opt_char_k"warn_inline" ++ j = var_opt_char_j"warn_inline" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev4 >> 13) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("flag_aggressive_loop_optimizations" in var_opt_char_k) { ++ k = var_opt_char_k"flag_aggressive_loop_optimizations" ++ j = var_opt_char_j"flag_aggressive_loop_optimizations" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev4 >> 14) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("flag_align_functions" in var_opt_char_k) { ++ k = var_opt_char_k"flag_align_functions" ++ j = var_opt_char_j"flag_align_functions" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev4 >> 15) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("flag_align_jumps" in var_opt_char_k) { ++ k = var_opt_char_k"flag_align_jumps" ++ j = var_opt_char_j"flag_align_jumps" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev4 >> 16) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("flag_align_labels" in var_opt_char_k) { ++ k = var_opt_char_k"flag_align_labels" ++ j = var_opt_char_j"flag_align_labels" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev4 >> 17) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("flag_align_loops" in var_opt_char_k) { ++ k = var_opt_char_k"flag_align_loops" ++ j = var_opt_char_j"flag_align_loops" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev4 >> 18) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("flag_allocation_dce" in var_opt_char_k) { ++ k = var_opt_char_k"flag_allocation_dce" ++ j = var_opt_char_j"flag_allocation_dce" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev4 >> 19) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("flag_store_data_races" in var_opt_char_k) { ++ k = var_opt_char_k"flag_store_data_races" ++ j = var_opt_char_j"flag_store_data_races" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev4 >> 20) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("flag_array_widen_compare" in var_opt_char_k) { ++ k = var_opt_char_k"flag_array_widen_compare" ++ j = var_opt_char_j"flag_array_widen_compare" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev4 >> 21) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("flag_associative_math" in var_opt_char_k) { ++ k = var_opt_char_k"flag_associative_math" ++ j = var_opt_char_j"flag_associative_math" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev4 >> 22) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("flag_asynchronous_unwind_tables" in var_opt_char_k) { ++ k = var_opt_char_k"flag_asynchronous_unwind_tables" ++ j = var_opt_char_j"flag_asynchronous_unwind_tables" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev4 >> 23) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("flag_auto_inc_dec" in var_opt_char_k) { ++ k = var_opt_char_k"flag_auto_inc_dec" ++ j = var_opt_char_j"flag_auto_inc_dec" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev4 >> 24) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("flag_bit_tests" in var_opt_char_k) { ++ k = var_opt_char_k"flag_bit_tests" ++ j = var_opt_char_j"flag_bit_tests" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev4 >> 25) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("flag_branch_on_count_reg" in var_opt_char_k) { ++ k = var_opt_char_k"flag_branch_on_count_reg" ++ j = var_opt_char_j"flag_branch_on_count_reg" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev4 >> 26) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("flag_branch_probabilities" in var_opt_char_k) { ++ k = var_opt_char_k"flag_branch_probabilities" ++ j = var_opt_char_j"flag_branch_probabilities" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev4 >> 27) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("flag_caller_saves" in var_opt_char_k) { ++ k = var_opt_char_k"flag_caller_saves" ++ j = var_opt_char_j"flag_caller_saves" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev4 >> 28) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("flag_ccmp2" in var_opt_char_k) { ++ k = var_opt_char_k"flag_ccmp2" ++ j = var_opt_char_j"flag_ccmp2" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev4 >> 29) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("flag_code_hoisting" in var_opt_char_k) { ++ k = var_opt_char_k"flag_code_hoisting" ++ j = var_opt_char_j"flag_code_hoisting" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev4 >> 30) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("flag_combine_stack_adjustments" in var_opt_char_k) { ++ k = var_opt_char_k"flag_combine_stack_adjustments" ++ j = var_opt_char_j"flag_combine_stack_adjustments" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev4 >> 31) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("flag_compare_elim_after_reload" in var_opt_char_k) { ++ k = var_opt_char_k"flag_compare_elim_after_reload" ++ j = var_opt_char_j"flag_compare_elim_after_reload" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev4 >> 32) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("flag_conserve_stack" in var_opt_char_k) { ++ k = var_opt_char_k"flag_conserve_stack" ++ j = var_opt_char_j"flag_conserve_stack" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev4 >> 33) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("flag_convert_minmax" in var_opt_char_k) { ++ k = var_opt_char_k"flag_convert_minmax" ++ j = var_opt_char_j"flag_convert_minmax" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev4 >> 34) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("flag_cprop_registers" in var_opt_char_k) { ++ k = var_opt_char_k"flag_cprop_registers" ++ j = var_opt_char_j"flag_cprop_registers" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev4 >> 35) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("flag_crossjumping" in var_opt_char_k) { ++ k = var_opt_char_k"flag_crossjumping" ++ j = var_opt_char_j"flag_crossjumping" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev4 >> 36) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("flag_crypto_accel_aes" in var_opt_char_k) { ++ k = var_opt_char_k"flag_crypto_accel_aes" ++ j = var_opt_char_j"flag_crypto_accel_aes" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev4 >> 37) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("flag_cse_follow_jumps" in var_opt_char_k) { ++ k = var_opt_char_k"flag_cse_follow_jumps" ++ j = var_opt_char_j"flag_cse_follow_jumps" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev4 >> 38) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("flag_cx_fortran_rules" in var_opt_char_k) { ++ k = var_opt_char_k"flag_cx_fortran_rules" ++ j = var_opt_char_j"flag_cx_fortran_rules" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev4 >> 39) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("flag_cx_limited_range" in var_opt_char_k) { ++ k = var_opt_char_k"flag_cx_limited_range" ++ j = var_opt_char_j"flag_cx_limited_range" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev4 >> 40) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("flag_dce" in var_opt_char_k) { ++ k = var_opt_char_k"flag_dce" ++ j = var_opt_char_j"flag_dce" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev4 >> 41) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("flag_defer_pop" in var_opt_char_k) { ++ k = var_opt_char_k"flag_defer_pop" ++ j = var_opt_char_j"flag_defer_pop" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev4 >> 42) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("flag_delayed_branch" in var_opt_char_k) { ++ k = var_opt_char_k"flag_delayed_branch" ++ j = var_opt_char_j"flag_delayed_branch" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev4 >> 43) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("flag_delete_dead_exceptions" in var_opt_char_k) { ++ k = var_opt_char_k"flag_delete_dead_exceptions" ++ j = var_opt_char_j"flag_delete_dead_exceptions" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev4 >> 44) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("flag_delete_null_pointer_checks" in var_opt_char_k) { ++ k = var_opt_char_k"flag_delete_null_pointer_checks" ++ j = var_opt_char_j"flag_delete_null_pointer_checks" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev4 >> 45) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("flag_devirtualize" in var_opt_char_k) { ++ k = var_opt_char_k"flag_devirtualize" ++ j = var_opt_char_j"flag_devirtualize" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev4 >> 46) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("flag_devirtualize_speculatively" in var_opt_char_k) { ++ k = var_opt_char_k"flag_devirtualize_speculatively" ++ j = var_opt_char_j"flag_devirtualize_speculatively" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev4 >> 47) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("flag_dse" in var_opt_char_k) { ++ k = var_opt_char_k"flag_dse" ++ j = var_opt_char_j"flag_dse" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev4 >> 48) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("flag_early_inlining" in var_opt_char_k) { ++ k = var_opt_char_k"flag_early_inlining" ++ j = var_opt_char_j"flag_early_inlining" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev4 >> 49) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("flag_exceptions" in var_opt_char_k) { ++ k = var_opt_char_k"flag_exceptions" ++ j = var_opt_char_j"flag_exceptions" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev4 >> 50) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("flag_expensive_optimizations" in var_opt_char_k) { ++ k = var_opt_char_k"flag_expensive_optimizations" ++ j = var_opt_char_j"flag_expensive_optimizations" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev4 >> 51) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("flag_finite_loops" in var_opt_char_k) { ++ k = var_opt_char_k"flag_finite_loops" ++ j = var_opt_char_j"flag_finite_loops" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev4 >> 52) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("flag_finite_math_only" in var_opt_char_k) { ++ k = var_opt_char_k"flag_finite_math_only" ++ j = var_opt_char_j"flag_finite_math_only" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev4 >> 53) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("flag_float_store" in var_opt_char_k) { ++ k = var_opt_char_k"flag_float_store" ++ j = var_opt_char_j"flag_float_store" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev4 >> 54) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("flag_fold_simple_inlines" in var_opt_char_k) { ++ k = var_opt_char_k"flag_fold_simple_inlines" ++ j = var_opt_char_j"flag_fold_simple_inlines" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev4 >> 55) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("flag_forward_propagate" in var_opt_char_k) { ++ k = var_opt_char_k"flag_forward_propagate" ++ j = var_opt_char_j"flag_forward_propagate" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev4 >> 56) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("flag_fp_int_builtin_inexact" in var_opt_char_k) { ++ k = var_opt_char_k"flag_fp_int_builtin_inexact" ++ j = var_opt_char_j"flag_fp_int_builtin_inexact" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev4 >> 57) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("flag_ftz" in var_opt_char_k) { ++ k = var_opt_char_k"flag_ftz" ++ j = var_opt_char_j"flag_ftz" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev4 >> 58) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("flag_no_function_cse" in var_opt_char_k) { ++ k = var_opt_char_k"flag_no_function_cse" ++ j = var_opt_char_j"flag_no_function_cse" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev4 >> 59) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("flag_gcse" in var_opt_char_k) { ++ k = var_opt_char_k"flag_gcse" ++ j = var_opt_char_j"flag_gcse" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev4 >> 60) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("flag_gcse_after_reload" in var_opt_char_k) { ++ k = var_opt_char_k"flag_gcse_after_reload" ++ j = var_opt_char_j"flag_gcse_after_reload" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev4 >> 61) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("flag_gcse_las" in var_opt_char_k) { ++ k = var_opt_char_k"flag_gcse_las" ++ j = var_opt_char_j"flag_gcse_las" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev4 >> 62) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("flag_gcse_lm" in var_opt_char_k) { ++ k = var_opt_char_k"flag_gcse_lm" ++ j = var_opt_char_j"flag_gcse_lm" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev4 >> 63) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("flag_gcse_sm" in var_opt_char_k) { ++ k = var_opt_char_k"flag_gcse_sm" ++ j = var_opt_char_j"flag_gcse_sm" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev5 >> 0) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("flag_graphite" in var_opt_char_k) { ++ k = var_opt_char_k"flag_graphite" ++ j = var_opt_char_j"flag_graphite" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev5 >> 1) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("flag_graphite_identity" in var_opt_char_k) { ++ k = var_opt_char_k"flag_graphite_identity" ++ j = var_opt_char_j"flag_graphite_identity" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev5 >> 2) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("flag_guess_branch_prob" in var_opt_char_k) { ++ k = var_opt_char_k"flag_guess_branch_prob" ++ j = var_opt_char_j"flag_guess_branch_prob" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev5 >> 3) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("flag_harden_compares" in var_opt_char_k) { ++ k = var_opt_char_k"flag_harden_compares" ++ j = var_opt_char_j"flag_harden_compares" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev5 >> 4) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("flag_harden_conditional_branches" in var_opt_char_k) { ++ k = var_opt_char_k"flag_harden_conditional_branches" ++ j = var_opt_char_j"flag_harden_conditional_branches" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev5 >> 5) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("flag_hoist_adjacent_loads" in var_opt_char_k) { ++ k = var_opt_char_k"flag_hoist_adjacent_loads" ++ j = var_opt_char_j"flag_hoist_adjacent_loads" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev5 >> 6) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("flag_icp" in var_opt_char_k) { ++ k = var_opt_char_k"flag_icp" ++ j = var_opt_char_j"flag_icp" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev5 >> 7) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("flag_icp_speculatively" in var_opt_char_k) { ++ k = var_opt_char_k"flag_icp_speculatively" ++ j = var_opt_char_j"flag_icp_speculatively" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev5 >> 8) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("flag_if_conversion" in var_opt_char_k) { ++ k = var_opt_char_k"flag_if_conversion" ++ j = var_opt_char_j"flag_if_conversion" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev5 >> 9) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("flag_if_conversion_gimple" in var_opt_char_k) { ++ k = var_opt_char_k"flag_if_conversion_gimple" ++ j = var_opt_char_j"flag_if_conversion_gimple" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev5 >> 10) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("flag_if_conversion2" in var_opt_char_k) { ++ k = var_opt_char_k"flag_if_conversion2" ++ j = var_opt_char_j"flag_if_conversion2" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev5 >> 11) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("flag_ifcvt_allow_complicated_cmps" in var_opt_char_k) { ++ k = var_opt_char_k"flag_ifcvt_allow_complicated_cmps" ++ j = var_opt_char_j"flag_ifcvt_allow_complicated_cmps" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev5 >> 12) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("flag_indirect_inlining" in var_opt_char_k) { ++ k = var_opt_char_k"flag_indirect_inlining" ++ j = var_opt_char_j"flag_indirect_inlining" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev5 >> 13) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("flag_no_inline" in var_opt_char_k) { ++ k = var_opt_char_k"flag_no_inline" ++ j = var_opt_char_j"flag_no_inline" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev5 >> 14) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("flag_inline_atomics" in var_opt_char_k) { ++ k = var_opt_char_k"flag_inline_atomics" ++ j = var_opt_char_j"flag_inline_atomics" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev5 >> 15) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("flag_inline_functions" in var_opt_char_k) { ++ k = var_opt_char_k"flag_inline_functions" ++ j = var_opt_char_j"flag_inline_functions" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev5 >> 16) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("flag_inline_functions_called_once" in var_opt_char_k) { ++ k = var_opt_char_k"flag_inline_functions_called_once" ++ j = var_opt_char_j"flag_inline_functions_called_once" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev5 >> 17) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("flag_inline_small_functions" in var_opt_char_k) { ++ k = var_opt_char_k"flag_inline_small_functions" ++ j = var_opt_char_j"flag_inline_small_functions" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev5 >> 18) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("flag_ipa_bit_cp" in var_opt_char_k) { ++ k = var_opt_char_k"flag_ipa_bit_cp" ++ j = var_opt_char_j"flag_ipa_bit_cp" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev5 >> 19) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("flag_ipa_cp" in var_opt_char_k) { ++ k = var_opt_char_k"flag_ipa_cp" ++ j = var_opt_char_j"flag_ipa_cp" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev5 >> 20) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("flag_ipa_cp_clone" in var_opt_char_k) { ++ k = var_opt_char_k"flag_ipa_cp_clone" ++ j = var_opt_char_j"flag_ipa_cp_clone" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev5 >> 21) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("flag_ipa_ic" in var_opt_char_k) { ++ k = var_opt_char_k"flag_ipa_ic" ++ j = var_opt_char_j"flag_ipa_ic" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev5 >> 22) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("flag_ipa_icf" in var_opt_char_k) { ++ k = var_opt_char_k"flag_ipa_icf" ++ j = var_opt_char_j"flag_ipa_icf" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev5 >> 23) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("flag_ipa_icf_functions" in var_opt_char_k) { ++ k = var_opt_char_k"flag_ipa_icf_functions" ++ j = var_opt_char_j"flag_ipa_icf_functions" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev5 >> 24) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("flag_ipa_icf_variables" in var_opt_char_k) { ++ k = var_opt_char_k"flag_ipa_icf_variables" ++ j = var_opt_char_j"flag_ipa_icf_variables" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev5 >> 25) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("flag_ipa_modref" in var_opt_char_k) { ++ k = var_opt_char_k"flag_ipa_modref" ++ j = var_opt_char_j"flag_ipa_modref" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev5 >> 26) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("flag_ipa_prefetch" in var_opt_char_k) { ++ k = var_opt_char_k"flag_ipa_prefetch" ++ j = var_opt_char_j"flag_ipa_prefetch" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev5 >> 27) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("flag_ipa_profile" in var_opt_char_k) { ++ k = var_opt_char_k"flag_ipa_profile" ++ j = var_opt_char_j"flag_ipa_profile" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev5 >> 28) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("flag_ipa_pta" in var_opt_char_k) { ++ k = var_opt_char_k"flag_ipa_pta" ++ j = var_opt_char_j"flag_ipa_pta" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev5 >> 29) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("flag_ipa_pure_const" in var_opt_char_k) { ++ k = var_opt_char_k"flag_ipa_pure_const" ++ j = var_opt_char_j"flag_ipa_pure_const" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev5 >> 30) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("flag_ipa_ra" in var_opt_char_k) { ++ k = var_opt_char_k"flag_ipa_ra" ++ j = var_opt_char_j"flag_ipa_ra" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev5 >> 31) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("flag_ipa_reference" in var_opt_char_k) { ++ k = var_opt_char_k"flag_ipa_reference" ++ j = var_opt_char_j"flag_ipa_reference" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev5 >> 32) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("flag_ipa_reference_addressable" in var_opt_char_k) { ++ k = var_opt_char_k"flag_ipa_reference_addressable" ++ j = var_opt_char_j"flag_ipa_reference_addressable" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev5 >> 33) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("flag_ipa_reorder_fields" in var_opt_char_k) { ++ k = var_opt_char_k"flag_ipa_reorder_fields" ++ j = var_opt_char_j"flag_ipa_reorder_fields" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev5 >> 34) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("flag_ipa_sra" in var_opt_char_k) { ++ k = var_opt_char_k"flag_ipa_sra" ++ j = var_opt_char_j"flag_ipa_sra" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev5 >> 35) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("flag_ipa_stack_alignment" in var_opt_char_k) { ++ k = var_opt_char_k"flag_ipa_stack_alignment" ++ j = var_opt_char_j"flag_ipa_stack_alignment" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev5 >> 36) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("flag_ipa_strict_aliasing" in var_opt_char_k) { ++ k = var_opt_char_k"flag_ipa_strict_aliasing" ++ j = var_opt_char_j"flag_ipa_strict_aliasing" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev5 >> 37) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("flag_ipa_struct_reorg" in var_opt_char_k) { ++ k = var_opt_char_k"flag_ipa_struct_reorg" ++ j = var_opt_char_j"flag_ipa_struct_reorg" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev5 >> 38) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("flag_ipa_vrp" in var_opt_char_k) { ++ k = var_opt_char_k"flag_ipa_vrp" ++ j = var_opt_char_j"flag_ipa_vrp" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev5 >> 39) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("flag_ira_hoist_pressure" in var_opt_char_k) { ++ k = var_opt_char_k"flag_ira_hoist_pressure" ++ j = var_opt_char_j"flag_ira_hoist_pressure" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev5 >> 40) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("flag_ira_loop_pressure" in var_opt_char_k) { ++ k = var_opt_char_k"flag_ira_loop_pressure" ++ j = var_opt_char_j"flag_ira_loop_pressure" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev5 >> 41) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("flag_ira_share_save_slots" in var_opt_char_k) { ++ k = var_opt_char_k"flag_ira_share_save_slots" ++ j = var_opt_char_j"flag_ira_share_save_slots" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev5 >> 42) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("flag_ira_share_spill_slots" in var_opt_char_k) { ++ k = var_opt_char_k"flag_ira_share_spill_slots" ++ j = var_opt_char_j"flag_ira_share_spill_slots" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev5 >> 43) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("flag_isolate_erroneous_paths_attribute" in var_opt_char_k) { ++ k = var_opt_char_k"flag_isolate_erroneous_paths_attribute" ++ j = var_opt_char_j"flag_isolate_erroneous_paths_attribute" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev5 >> 44) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("flag_isolate_erroneous_paths_dereference" in var_opt_char_k) { ++ k = var_opt_char_k"flag_isolate_erroneous_paths_dereference" ++ j = var_opt_char_j"flag_isolate_erroneous_paths_dereference" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev5 >> 45) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("flag_ivopts" in var_opt_char_k) { ++ k = var_opt_char_k"flag_ivopts" ++ j = var_opt_char_j"flag_ivopts" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev5 >> 46) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("flag_jump_tables" in var_opt_char_k) { ++ k = var_opt_char_k"flag_jump_tables" ++ j = var_opt_char_j"flag_jump_tables" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev5 >> 47) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("flag_keep_gc_roots_live" in var_opt_char_k) { ++ k = var_opt_char_k"flag_keep_gc_roots_live" ++ j = var_opt_char_j"flag_keep_gc_roots_live" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev5 >> 48) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("flag_kernel_pgo" in var_opt_char_k) { ++ k = var_opt_char_k"flag_kernel_pgo" ++ j = var_opt_char_j"flag_kernel_pgo" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev5 >> 49) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("flag_lifetime_dse" in var_opt_char_k) { ++ k = var_opt_char_k"flag_lifetime_dse" ++ j = var_opt_char_j"flag_lifetime_dse" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev5 >> 50) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("flag_limit_function_alignment" in var_opt_char_k) { ++ k = var_opt_char_k"flag_limit_function_alignment" ++ j = var_opt_char_j"flag_limit_function_alignment" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev5 >> 51) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("flag_live_range_shrinkage" in var_opt_char_k) { ++ k = var_opt_char_k"flag_live_range_shrinkage" ++ j = var_opt_char_j"flag_live_range_shrinkage" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev5 >> 52) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("flag_loop_crc" in var_opt_char_k) { ++ k = var_opt_char_k"flag_loop_crc" ++ j = var_opt_char_j"flag_loop_crc" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev5 >> 53) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("flag_loop_elim" in var_opt_char_k) { ++ k = var_opt_char_k"flag_loop_elim" ++ j = var_opt_char_j"flag_loop_elim" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev5 >> 54) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("flag_loop_interchange" in var_opt_char_k) { ++ k = var_opt_char_k"flag_loop_interchange" ++ j = var_opt_char_j"flag_loop_interchange" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev5 >> 55) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("flag_loop_nest_optimize" in var_opt_char_k) { ++ k = var_opt_char_k"flag_loop_nest_optimize" ++ j = var_opt_char_j"flag_loop_nest_optimize" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev5 >> 56) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("flag_loop_parallelize_all" in var_opt_char_k) { ++ k = var_opt_char_k"flag_loop_parallelize_all" ++ j = var_opt_char_j"flag_loop_parallelize_all" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev5 >> 57) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("flag_unroll_jam" in var_opt_char_k) { ++ k = var_opt_char_k"flag_unroll_jam" ++ j = var_opt_char_j"flag_unroll_jam" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev5 >> 58) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("flag_lra_remat" in var_opt_char_k) { ++ k = var_opt_char_k"flag_lra_remat" ++ j = var_opt_char_j"flag_lra_remat" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev5 >> 59) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("flag_errno_math" in var_opt_char_k) { ++ k = var_opt_char_k"flag_errno_math" ++ j = var_opt_char_j"flag_errno_math" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev5 >> 60) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("flag_merge_mull" in var_opt_char_k) { ++ k = var_opt_char_k"flag_merge_mull" ++ j = var_opt_char_j"flag_merge_mull" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev5 >> 61) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("flag_modulo_sched" in var_opt_char_k) { ++ k = var_opt_char_k"flag_modulo_sched" ++ j = var_opt_char_j"flag_modulo_sched" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev5 >> 62) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("flag_modulo_sched_allow_regmoves" in var_opt_char_k) { ++ k = var_opt_char_k"flag_modulo_sched_allow_regmoves" ++ j = var_opt_char_j"flag_modulo_sched_allow_regmoves" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev5 >> 63) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("flag_move_loop_invariants" in var_opt_char_k) { ++ k = var_opt_char_k"flag_move_loop_invariants" ++ j = var_opt_char_j"flag_move_loop_invariants" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev6 >> 0) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("flag_move_loop_stores" in var_opt_char_k) { ++ k = var_opt_char_k"flag_move_loop_stores" ++ j = var_opt_char_j"flag_move_loop_stores" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev6 >> 1) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("flag_non_call_exceptions" in var_opt_char_k) { ++ k = var_opt_char_k"flag_non_call_exceptions" ++ j = var_opt_char_j"flag_non_call_exceptions" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev6 >> 2) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("flag_nothrow_opt" in var_opt_char_k) { ++ k = var_opt_char_k"flag_nothrow_opt" ++ j = var_opt_char_j"flag_nothrow_opt" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev6 >> 3) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("flag_omit_frame_pointer" in var_opt_char_k) { ++ k = var_opt_char_k"flag_omit_frame_pointer" ++ j = var_opt_char_j"flag_omit_frame_pointer" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev6 >> 4) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("flag_opt_info" in var_opt_char_k) { ++ k = var_opt_char_k"flag_opt_info" ++ j = var_opt_char_j"flag_opt_info" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev6 >> 5) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("flag_optimize_sibling_calls" in var_opt_char_k) { ++ k = var_opt_char_k"flag_optimize_sibling_calls" ++ j = var_opt_char_j"flag_optimize_sibling_calls" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev6 >> 6) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("flag_optimize_strlen" in var_opt_char_k) { ++ k = var_opt_char_k"flag_optimize_strlen" ++ j = var_opt_char_j"flag_optimize_strlen" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev6 >> 7) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("flag_pack_struct" in var_opt_char_k) { ++ k = var_opt_char_k"flag_pack_struct" ++ j = var_opt_char_j"flag_pack_struct" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev6 >> 8) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("flag_partial_inlining" in var_opt_char_k) { ++ k = var_opt_char_k"flag_partial_inlining" ++ j = var_opt_char_j"flag_partial_inlining" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev6 >> 9) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("flag_peel_loops" in var_opt_char_k) { ++ k = var_opt_char_k"flag_peel_loops" ++ j = var_opt_char_j"flag_peel_loops" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev6 >> 10) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("flag_no_peephole" in var_opt_char_k) { ++ k = var_opt_char_k"flag_no_peephole" ++ j = var_opt_char_j"flag_no_peephole" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev6 >> 11) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("flag_peephole2" in var_opt_char_k) { ++ k = var_opt_char_k"flag_peephole2" ++ j = var_opt_char_j"flag_peephole2" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev6 >> 12) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("flag_plt" in var_opt_char_k) { ++ k = var_opt_char_k"flag_plt" ++ j = var_opt_char_j"flag_plt" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev6 >> 13) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("flag_predictive_commoning" in var_opt_char_k) { ++ k = var_opt_char_k"flag_predictive_commoning" ++ j = var_opt_char_j"flag_predictive_commoning" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev6 >> 14) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("flag_prefetch_loop_arrays" in var_opt_char_k) { ++ k = var_opt_char_k"flag_prefetch_loop_arrays" ++ j = var_opt_char_j"flag_prefetch_loop_arrays" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev6 >> 15) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("flag_printf_return_value" in var_opt_char_k) { ++ k = var_opt_char_k"flag_printf_return_value" ++ j = var_opt_char_j"flag_printf_return_value" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev6 >> 16) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("flag_profile_partial_training" in var_opt_char_k) { ++ k = var_opt_char_k"flag_profile_partial_training" ++ j = var_opt_char_j"flag_profile_partial_training" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev6 >> 17) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("flag_profile_reorder_functions" in var_opt_char_k) { ++ k = var_opt_char_k"flag_profile_reorder_functions" ++ j = var_opt_char_j"flag_profile_reorder_functions" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev6 >> 18) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("flag_reciprocal_math" in var_opt_char_k) { ++ k = var_opt_char_k"flag_reciprocal_math" ++ j = var_opt_char_j"flag_reciprocal_math" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev6 >> 19) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("flag_ree" in var_opt_char_k) { ++ k = var_opt_char_k"flag_ree" ++ j = var_opt_char_j"flag_ree" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev6 >> 20) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("flag_pcc_struct_return" in var_opt_char_k) { ++ k = var_opt_char_k"flag_pcc_struct_return" ++ j = var_opt_char_j"flag_pcc_struct_return" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev6 >> 21) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("flag_rename_registers" in var_opt_char_k) { ++ k = var_opt_char_k"flag_rename_registers" ++ j = var_opt_char_j"flag_rename_registers" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev6 >> 22) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("flag_reorder_blocks" in var_opt_char_k) { ++ k = var_opt_char_k"flag_reorder_blocks" ++ j = var_opt_char_j"flag_reorder_blocks" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev6 >> 23) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("flag_reorder_blocks_and_partition" in var_opt_char_k) { ++ k = var_opt_char_k"flag_reorder_blocks_and_partition" ++ j = var_opt_char_j"flag_reorder_blocks_and_partition" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev6 >> 24) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("flag_reorder_functions" in var_opt_char_k) { ++ k = var_opt_char_k"flag_reorder_functions" ++ j = var_opt_char_j"flag_reorder_functions" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev6 >> 25) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("flag_rerun_cse_after_loop" in var_opt_char_k) { ++ k = var_opt_char_k"flag_rerun_cse_after_loop" ++ j = var_opt_char_j"flag_rerun_cse_after_loop" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev6 >> 26) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("flag_resched_modulo_sched" in var_opt_char_k) { ++ k = var_opt_char_k"flag_resched_modulo_sched" ++ j = var_opt_char_j"flag_resched_modulo_sched" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev6 >> 27) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("flag_rounding_math" in var_opt_char_k) { ++ k = var_opt_char_k"flag_rounding_math" ++ j = var_opt_char_j"flag_rounding_math" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev6 >> 28) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("flag_rtti" in var_opt_char_k) { ++ k = var_opt_char_k"flag_rtti" ++ j = var_opt_char_j"flag_rtti" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev6 >> 29) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("flag_save_optimization_record" in var_opt_char_k) { ++ k = var_opt_char_k"flag_save_optimization_record" ++ j = var_opt_char_j"flag_save_optimization_record" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev6 >> 30) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("flag_sched_critical_path_heuristic" in var_opt_char_k) { ++ k = var_opt_char_k"flag_sched_critical_path_heuristic" ++ j = var_opt_char_j"flag_sched_critical_path_heuristic" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev6 >> 31) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("flag_sched_dep_count_heuristic" in var_opt_char_k) { ++ k = var_opt_char_k"flag_sched_dep_count_heuristic" ++ j = var_opt_char_j"flag_sched_dep_count_heuristic" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev6 >> 32) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("flag_sched_group_heuristic" in var_opt_char_k) { ++ k = var_opt_char_k"flag_sched_group_heuristic" ++ j = var_opt_char_j"flag_sched_group_heuristic" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev6 >> 33) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("flag_schedule_interblock" in var_opt_char_k) { ++ k = var_opt_char_k"flag_schedule_interblock" ++ j = var_opt_char_j"flag_schedule_interblock" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev6 >> 34) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("flag_sched_last_insn_heuristic" in var_opt_char_k) { ++ k = var_opt_char_k"flag_sched_last_insn_heuristic" ++ j = var_opt_char_j"flag_sched_last_insn_heuristic" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev6 >> 35) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("flag_sched_pressure" in var_opt_char_k) { ++ k = var_opt_char_k"flag_sched_pressure" ++ j = var_opt_char_j"flag_sched_pressure" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev6 >> 36) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("flag_sched_rank_heuristic" in var_opt_char_k) { ++ k = var_opt_char_k"flag_sched_rank_heuristic" ++ j = var_opt_char_j"flag_sched_rank_heuristic" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev6 >> 37) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("flag_schedule_speculative" in var_opt_char_k) { ++ k = var_opt_char_k"flag_schedule_speculative" ++ j = var_opt_char_j"flag_schedule_speculative" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev6 >> 38) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("flag_sched_spec_insn_heuristic" in var_opt_char_k) { ++ k = var_opt_char_k"flag_sched_spec_insn_heuristic" ++ j = var_opt_char_j"flag_sched_spec_insn_heuristic" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev6 >> 39) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("flag_schedule_speculative_load" in var_opt_char_k) { ++ k = var_opt_char_k"flag_schedule_speculative_load" ++ j = var_opt_char_j"flag_schedule_speculative_load" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev6 >> 40) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("flag_schedule_speculative_load_dangerous" in var_opt_char_k) { ++ k = var_opt_char_k"flag_schedule_speculative_load_dangerous" ++ j = var_opt_char_j"flag_schedule_speculative_load_dangerous" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev6 >> 41) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("flag_sched2_use_superblocks" in var_opt_char_k) { ++ k = var_opt_char_k"flag_sched2_use_superblocks" ++ j = var_opt_char_j"flag_sched2_use_superblocks" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev6 >> 42) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("flag_schedule_fusion" in var_opt_char_k) { ++ k = var_opt_char_k"flag_schedule_fusion" ++ j = var_opt_char_j"flag_schedule_fusion" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev6 >> 43) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("flag_schedule_insns" in var_opt_char_k) { ++ k = var_opt_char_k"flag_schedule_insns" ++ j = var_opt_char_j"flag_schedule_insns" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev6 >> 44) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("flag_schedule_insns_after_reload" in var_opt_char_k) { ++ k = var_opt_char_k"flag_schedule_insns_after_reload" ++ j = var_opt_char_j"flag_schedule_insns_after_reload" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev6 >> 45) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("flag_section_anchors" in var_opt_char_k) { ++ k = var_opt_char_k"flag_section_anchors" ++ j = var_opt_char_j"flag_section_anchors" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev6 >> 46) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("flag_sel_sched_pipelining" in var_opt_char_k) { ++ k = var_opt_char_k"flag_sel_sched_pipelining" ++ j = var_opt_char_j"flag_sel_sched_pipelining" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev6 >> 47) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("flag_sel_sched_pipelining_outer_loops" in var_opt_char_k) { ++ k = var_opt_char_k"flag_sel_sched_pipelining_outer_loops" ++ j = var_opt_char_j"flag_sel_sched_pipelining_outer_loops" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev6 >> 48) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("flag_sel_sched_reschedule_pipelined" in var_opt_char_k) { ++ k = var_opt_char_k"flag_sel_sched_reschedule_pipelined" ++ j = var_opt_char_j"flag_sel_sched_reschedule_pipelined" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev6 >> 49) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("flag_selective_scheduling" in var_opt_char_k) { ++ k = var_opt_char_k"flag_selective_scheduling" ++ j = var_opt_char_j"flag_selective_scheduling" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev6 >> 50) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("flag_selective_scheduling2" in var_opt_char_k) { ++ k = var_opt_char_k"flag_selective_scheduling2" ++ j = var_opt_char_j"flag_selective_scheduling2" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev6 >> 51) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("flag_semantic_interposition" in var_opt_char_k) { ++ k = var_opt_char_k"flag_semantic_interposition" ++ j = var_opt_char_j"flag_semantic_interposition" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev6 >> 52) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("flag_short_enums" in var_opt_char_k) { ++ k = var_opt_char_k"flag_short_enums" ++ j = var_opt_char_j"flag_short_enums" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev6 >> 53) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("flag_short_wchar" in var_opt_char_k) { ++ k = var_opt_char_k"flag_short_wchar" ++ j = var_opt_char_j"flag_short_wchar" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev6 >> 54) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("flag_shrink_wrap" in var_opt_char_k) { ++ k = var_opt_char_k"flag_shrink_wrap" ++ j = var_opt_char_j"flag_shrink_wrap" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev6 >> 55) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("flag_shrink_wrap_separate" in var_opt_char_k) { ++ k = var_opt_char_k"flag_shrink_wrap_separate" ++ j = var_opt_char_j"flag_shrink_wrap_separate" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev6 >> 56) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("flag_signaling_nans" in var_opt_char_k) { ++ k = var_opt_char_k"flag_signaling_nans" ++ j = var_opt_char_j"flag_signaling_nans" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev6 >> 57) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("flag_signed_zeros" in var_opt_char_k) { ++ k = var_opt_char_k"flag_signed_zeros" ++ j = var_opt_char_j"flag_signed_zeros" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev6 >> 58) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("flag_simdmath" in var_opt_char_k) { ++ k = var_opt_char_k"flag_simdmath" ++ j = var_opt_char_j"flag_simdmath" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev6 >> 59) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("flag_single_precision_constant" in var_opt_char_k) { ++ k = var_opt_char_k"flag_single_precision_constant" ++ j = var_opt_char_j"flag_single_precision_constant" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev6 >> 60) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("flag_split_ivs_in_unroller" in var_opt_char_k) { ++ k = var_opt_char_k"flag_split_ivs_in_unroller" ++ j = var_opt_char_j"flag_split_ivs_in_unroller" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev6 >> 61) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("flag_split_ldp_stp" in var_opt_char_k) { ++ k = var_opt_char_k"flag_split_ldp_stp" ++ j = var_opt_char_j"flag_split_ldp_stp" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev6 >> 62) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("flag_split_loops" in var_opt_char_k) { ++ k = var_opt_char_k"flag_split_loops" ++ j = var_opt_char_j"flag_split_loops" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev6 >> 63) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("flag_split_paths" in var_opt_char_k) { ++ k = var_opt_char_k"flag_split_paths" ++ j = var_opt_char_j"flag_split_paths" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev7 >> 0) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("flag_split_wide_types" in var_opt_char_k) { ++ k = var_opt_char_k"flag_split_wide_types" ++ j = var_opt_char_j"flag_split_wide_types" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev7 >> 1) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("flag_split_wide_types_early" in var_opt_char_k) { ++ k = var_opt_char_k"flag_split_wide_types_early" ++ j = var_opt_char_j"flag_split_wide_types_early" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev7 >> 2) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("flag_ssa_backprop" in var_opt_char_k) { ++ k = var_opt_char_k"flag_ssa_backprop" ++ j = var_opt_char_j"flag_ssa_backprop" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev7 >> 3) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("flag_ssa_phiopt" in var_opt_char_k) { ++ k = var_opt_char_k"flag_ssa_phiopt" ++ j = var_opt_char_j"flag_ssa_phiopt" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev7 >> 4) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("flag_stack_clash_protection" in var_opt_char_k) { ++ k = var_opt_char_k"flag_stack_clash_protection" ++ j = var_opt_char_j"flag_stack_clash_protection" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev7 >> 5) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("flag_stack_protect" in var_opt_char_k) { ++ k = var_opt_char_k"flag_stack_protect" ++ j = var_opt_char_j"flag_stack_protect" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev7 >> 6) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("flag_stdarg_opt" in var_opt_char_k) { ++ k = var_opt_char_k"flag_stdarg_opt" ++ j = var_opt_char_j"flag_stdarg_opt" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev7 >> 7) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("flag_store_merging" in var_opt_char_k) { ++ k = var_opt_char_k"flag_store_merging" ++ j = var_opt_char_j"flag_store_merging" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev7 >> 8) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("flag_strict_aliasing" in var_opt_char_k) { ++ k = var_opt_char_k"flag_strict_aliasing" ++ j = var_opt_char_j"flag_strict_aliasing" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev7 >> 9) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("flag_strict_enums" in var_opt_char_k) { ++ k = var_opt_char_k"flag_strict_enums" ++ j = var_opt_char_j"flag_strict_enums" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev7 >> 10) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("flag_strict_volatile_bitfields" in var_opt_char_k) { ++ k = var_opt_char_k"flag_strict_volatile_bitfields" ++ j = var_opt_char_j"flag_strict_volatile_bitfields" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev7 >> 11) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("flag_thread_jumps" in var_opt_char_k) { ++ k = var_opt_char_k"flag_thread_jumps" ++ j = var_opt_char_j"flag_thread_jumps" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev7 >> 12) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("flag_threadsafe_statics" in var_opt_char_k) { ++ k = var_opt_char_k"flag_threadsafe_statics" ++ j = var_opt_char_j"flag_threadsafe_statics" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev7 >> 13) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("flag_toplevel_reorder" in var_opt_char_k) { ++ k = var_opt_char_k"flag_toplevel_reorder" ++ j = var_opt_char_j"flag_toplevel_reorder" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev7 >> 14) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("flag_tracer" in var_opt_char_k) { ++ k = var_opt_char_k"flag_tracer" ++ j = var_opt_char_j"flag_tracer" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev7 >> 15) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("flag_trapping_math" in var_opt_char_k) { ++ k = var_opt_char_k"flag_trapping_math" ++ j = var_opt_char_j"flag_trapping_math" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev7 >> 16) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("flag_trapv" in var_opt_char_k) { ++ k = var_opt_char_k"flag_trapv" ++ j = var_opt_char_j"flag_trapv" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev7 >> 17) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("flag_tree_bit_ccp" in var_opt_char_k) { ++ k = var_opt_char_k"flag_tree_bit_ccp" ++ j = var_opt_char_j"flag_tree_bit_ccp" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev7 >> 18) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("flag_tree_builtin_call_dce" in var_opt_char_k) { ++ k = var_opt_char_k"flag_tree_builtin_call_dce" ++ j = var_opt_char_j"flag_tree_builtin_call_dce" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev7 >> 19) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("flag_tree_ccp" in var_opt_char_k) { ++ k = var_opt_char_k"flag_tree_ccp" ++ j = var_opt_char_j"flag_tree_ccp" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev7 >> 20) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("flag_tree_ch" in var_opt_char_k) { ++ k = var_opt_char_k"flag_tree_ch" ++ j = var_opt_char_j"flag_tree_ch" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev7 >> 21) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("flag_tree_coalesce_vars" in var_opt_char_k) { ++ k = var_opt_char_k"flag_tree_coalesce_vars" ++ j = var_opt_char_j"flag_tree_coalesce_vars" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev7 >> 22) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("flag_tree_copy_prop" in var_opt_char_k) { ++ k = var_opt_char_k"flag_tree_copy_prop" ++ j = var_opt_char_j"flag_tree_copy_prop" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev7 >> 23) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("flag_tree_cselim" in var_opt_char_k) { ++ k = var_opt_char_k"flag_tree_cselim" ++ j = var_opt_char_j"flag_tree_cselim" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev7 >> 24) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("flag_tree_dce" in var_opt_char_k) { ++ k = var_opt_char_k"flag_tree_dce" ++ j = var_opt_char_j"flag_tree_dce" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev7 >> 25) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("flag_tree_dom" in var_opt_char_k) { ++ k = var_opt_char_k"flag_tree_dom" ++ j = var_opt_char_j"flag_tree_dom" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev7 >> 26) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("flag_tree_dse" in var_opt_char_k) { ++ k = var_opt_char_k"flag_tree_dse" ++ j = var_opt_char_j"flag_tree_dse" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev7 >> 27) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("flag_tree_forwprop" in var_opt_char_k) { ++ k = var_opt_char_k"flag_tree_forwprop" ++ j = var_opt_char_j"flag_tree_forwprop" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev7 >> 28) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("flag_tree_fre" in var_opt_char_k) { ++ k = var_opt_char_k"flag_tree_fre" ++ j = var_opt_char_j"flag_tree_fre" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev7 >> 29) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("flag_tree_loop_distribute_patterns" in var_opt_char_k) { ++ k = var_opt_char_k"flag_tree_loop_distribute_patterns" ++ j = var_opt_char_j"flag_tree_loop_distribute_patterns" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev7 >> 30) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("flag_tree_loop_distribution" in var_opt_char_k) { ++ k = var_opt_char_k"flag_tree_loop_distribution" ++ j = var_opt_char_j"flag_tree_loop_distribution" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev7 >> 31) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("flag_tree_loop_if_convert" in var_opt_char_k) { ++ k = var_opt_char_k"flag_tree_loop_if_convert" ++ j = var_opt_char_j"flag_tree_loop_if_convert" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev7 >> 32) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("flag_tree_loop_im" in var_opt_char_k) { ++ k = var_opt_char_k"flag_tree_loop_im" ++ j = var_opt_char_j"flag_tree_loop_im" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev7 >> 33) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("flag_tree_loop_ivcanon" in var_opt_char_k) { ++ k = var_opt_char_k"flag_tree_loop_ivcanon" ++ j = var_opt_char_j"flag_tree_loop_ivcanon" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev7 >> 34) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("flag_tree_loop_optimize" in var_opt_char_k) { ++ k = var_opt_char_k"flag_tree_loop_optimize" ++ j = var_opt_char_j"flag_tree_loop_optimize" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev7 >> 35) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("flag_tree_loop_vectorize" in var_opt_char_k) { ++ k = var_opt_char_k"flag_tree_loop_vectorize" ++ j = var_opt_char_j"flag_tree_loop_vectorize" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev7 >> 36) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("flag_tree_live_range_split" in var_opt_char_k) { ++ k = var_opt_char_k"flag_tree_live_range_split" ++ j = var_opt_char_j"flag_tree_live_range_split" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev7 >> 37) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("flag_tree_partial_pre" in var_opt_char_k) { ++ k = var_opt_char_k"flag_tree_partial_pre" ++ j = var_opt_char_j"flag_tree_partial_pre" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev7 >> 38) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("flag_tree_phiprop" in var_opt_char_k) { ++ k = var_opt_char_k"flag_tree_phiprop" ++ j = var_opt_char_j"flag_tree_phiprop" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev7 >> 39) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("flag_tree_pre" in var_opt_char_k) { ++ k = var_opt_char_k"flag_tree_pre" ++ j = var_opt_char_j"flag_tree_pre" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev7 >> 40) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("flag_tree_pta" in var_opt_char_k) { ++ k = var_opt_char_k"flag_tree_pta" ++ j = var_opt_char_j"flag_tree_pta" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev7 >> 41) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("flag_tree_reassoc" in var_opt_char_k) { ++ k = var_opt_char_k"flag_tree_reassoc" ++ j = var_opt_char_j"flag_tree_reassoc" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev7 >> 42) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("flag_tree_scev_cprop" in var_opt_char_k) { ++ k = var_opt_char_k"flag_tree_scev_cprop" ++ j = var_opt_char_j"flag_tree_scev_cprop" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev7 >> 43) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("flag_tree_sink" in var_opt_char_k) { ++ k = var_opt_char_k"flag_tree_sink" ++ j = var_opt_char_j"flag_tree_sink" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev7 >> 44) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("flag_tree_slp_transpose_vectorize" in var_opt_char_k) { ++ k = var_opt_char_k"flag_tree_slp_transpose_vectorize" ++ j = var_opt_char_j"flag_tree_slp_transpose_vectorize" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev7 >> 45) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("flag_tree_slp_vectorize" in var_opt_char_k) { ++ k = var_opt_char_k"flag_tree_slp_vectorize" ++ j = var_opt_char_j"flag_tree_slp_vectorize" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev7 >> 46) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("flag_tree_slsr" in var_opt_char_k) { ++ k = var_opt_char_k"flag_tree_slsr" ++ j = var_opt_char_j"flag_tree_slsr" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev7 >> 47) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("flag_tree_sra" in var_opt_char_k) { ++ k = var_opt_char_k"flag_tree_sra" ++ j = var_opt_char_j"flag_tree_sra" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev7 >> 48) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("flag_tree_switch_conversion" in var_opt_char_k) { ++ k = var_opt_char_k"flag_tree_switch_conversion" ++ j = var_opt_char_j"flag_tree_switch_conversion" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev7 >> 49) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("flag_tree_tail_merge" in var_opt_char_k) { ++ k = var_opt_char_k"flag_tree_tail_merge" ++ j = var_opt_char_j"flag_tree_tail_merge" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev7 >> 50) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("flag_tree_ter" in var_opt_char_k) { ++ k = var_opt_char_k"flag_tree_ter" ++ j = var_opt_char_j"flag_tree_ter" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev7 >> 51) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("flag_tree_vectorize" in var_opt_char_k) { ++ k = var_opt_char_k"flag_tree_vectorize" ++ j = var_opt_char_j"flag_tree_vectorize" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev7 >> 52) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("flag_tree_vrp" in var_opt_char_k) { ++ k = var_opt_char_k"flag_tree_vrp" ++ j = var_opt_char_j"flag_tree_vrp" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev7 >> 53) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("flag_unconstrained_commons" in var_opt_char_k) { ++ k = var_opt_char_k"flag_unconstrained_commons" ++ j = var_opt_char_j"flag_unconstrained_commons" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev7 >> 54) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("flag_unroll_all_loops" in var_opt_char_k) { ++ k = var_opt_char_k"flag_unroll_all_loops" ++ j = var_opt_char_j"flag_unroll_all_loops" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev7 >> 55) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("flag_cunroll_grow_size" in var_opt_char_k) { ++ k = var_opt_char_k"flag_cunroll_grow_size" ++ j = var_opt_char_j"flag_cunroll_grow_size" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev7 >> 56) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("flag_unroll_loops" in var_opt_char_k) { ++ k = var_opt_char_k"flag_unroll_loops" ++ j = var_opt_char_j"flag_unroll_loops" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev7 >> 57) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("flag_unsafe_math_optimizations" in var_opt_char_k) { ++ k = var_opt_char_k"flag_unsafe_math_optimizations" ++ j = var_opt_char_j"flag_unsafe_math_optimizations" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev7 >> 58) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("flag_unswitch_loops" in var_opt_char_k) { ++ k = var_opt_char_k"flag_unswitch_loops" ++ j = var_opt_char_j"flag_unswitch_loops" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev7 >> 59) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("flag_unwind_tables" in var_opt_char_k) { ++ k = var_opt_char_k"flag_unwind_tables" ++ j = var_opt_char_j"flag_unwind_tables" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev7 >> 60) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("flag_var_tracking" in var_opt_char_k) { ++ k = var_opt_char_k"flag_var_tracking" ++ j = var_opt_char_j"flag_var_tracking" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev7 >> 61) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("flag_var_tracking_assignments" in var_opt_char_k) { ++ k = var_opt_char_k"flag_var_tracking_assignments" ++ j = var_opt_char_j"flag_var_tracking_assignments" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev7 >> 62) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("flag_var_tracking_assignments_toggle" in var_opt_char_k) { ++ k = var_opt_char_k"flag_var_tracking_assignments_toggle" ++ j = var_opt_char_j"flag_var_tracking_assignments_toggle" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev7 >> 63) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("flag_var_tracking_uninit" in var_opt_char_k) { ++ k = var_opt_char_k"flag_var_tracking_uninit" ++ j = var_opt_char_j"flag_var_tracking_uninit" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev8 >> 0) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("flag_variable_expansion_in_unroller" in var_opt_char_k) { ++ k = var_opt_char_k"flag_variable_expansion_in_unroller" ++ j = var_opt_char_j"flag_variable_expansion_in_unroller" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev8 >> 1) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("flag_version_loops_for_strides" in var_opt_char_k) { ++ k = var_opt_char_k"flag_version_loops_for_strides" ++ j = var_opt_char_j"flag_version_loops_for_strides" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev8 >> 2) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("flag_value_profile_transformations" in var_opt_char_k) { ++ k = var_opt_char_k"flag_value_profile_transformations" ++ j = var_opt_char_j"flag_value_profile_transformations" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev8 >> 3) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("flag_web" in var_opt_char_k) { ++ k = var_opt_char_k"flag_web" ++ j = var_opt_char_j"flag_web" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev8 >> 4) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("flag_wrapv" in var_opt_char_k) { ++ k = var_opt_char_k"flag_wrapv" ++ j = var_opt_char_j"flag_wrapv" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev8 >> 5) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("flag_wrapv_pointer" in var_opt_char_k) { ++ k = var_opt_char_k"flag_wrapv_pointer" ++ j = var_opt_char_j"flag_wrapv_pointer" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev8 >> 6) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("debug_nonbind_markers_p" in var_opt_char_k) { ++ k = var_opt_char_k"debug_nonbind_markers_p" ++ j = var_opt_char_j"debug_nonbind_markers_p" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev8 >> 7) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("flag_cmlt_arith" in var_opt_char_k) { ++ k = var_opt_char_k"flag_cmlt_arith" ++ j = var_opt_char_j"flag_cmlt_arith" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev8 >> 8) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("flag_mlow_precision_div" in var_opt_char_k) { ++ k = var_opt_char_k"flag_mlow_precision_div" ++ j = var_opt_char_j"flag_mlow_precision_div" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev8 >> 9) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("flag_mrecip_low_precision_sqrt" in var_opt_char_k) { ++ k = var_opt_char_k"flag_mrecip_low_precision_sqrt" ++ j = var_opt_char_j"flag_mrecip_low_precision_sqrt" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev8 >> 10) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("flag_mlow_precision_sqrt" in var_opt_char_k) { ++ k = var_opt_char_k"flag_mlow_precision_sqrt" ++ j = var_opt_char_j"flag_mlow_precision_sqrt" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev8 >> 11) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("flag_simdmath_64" in var_opt_char_k) { ++ k = var_opt_char_k"flag_simdmath_64" ++ j = var_opt_char_j"flag_simdmath_64" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev8 >> 12) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("str_align_functions" in var_opt_string_k) { ++ k = var_opt_string_k"str_align_functions" ++ j = var_opt_string_j"str_align_functions" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev8 >> 13) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("str_align_jumps" in var_opt_string_k) { ++ k = var_opt_string_k"str_align_jumps" ++ j = var_opt_string_j"str_align_jumps" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev8 >> 14) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("str_align_labels" in var_opt_string_k) { ++ k = var_opt_string_k"str_align_labels" ++ j = var_opt_string_j"str_align_labels" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev8 >> 15) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("str_align_loops" in var_opt_string_k) { ++ k = var_opt_string_k"str_align_loops" ++ j = var_opt_string_j"str_align_loops" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev8 >> 16) & HOST_WIDE_INT_1U) << "j";" ++} ++if ("flag_patchable_function_entry" in var_opt_string_k) { ++ k = var_opt_string_k"flag_patchable_function_entry" ++ j = var_opt_string_j"flag_patchable_function_entry" ++ print " ptr->explicit_mask" k " |= ((explicit_mask_prev8 >> 17) & HOST_WIDE_INT_1U) << "j";" ++} ++print "}"; ++print ""; ++ + print "/* Free heap memory used by optimization options */"; + print "void"; + print "cl_optimization_option_free (struct cl_optimization *ptr ATTRIBUTE_UNUSED)"; +-- +2.25.1 +
View file
_service:tar_scm:0304-Add-multi-version-lto-symbol-parse-cross-lto-units-i.patch
Added
@@ -0,0 +1,963 @@ +From f81a5b294711e3a420fe66702f0d9221332271c4 Mon Sep 17 00:00:00 2001 +From: h00564365 <huangxiaoquan1@huawei.com> +Date: Wed, 13 Nov 2024 17:18:01 +0800 +Subject: PATCH 2/2 Add multi-version lto symbol parse, cross lto units + ipa-inline extension, and lto compression algorithm specified. + +--- + gcc/common.opt | 20 +++ + gcc/config/aarch64/aarch64.cc | 41 ++++++ + gcc/doc/tm.texi | 6 + + gcc/doc/tm.texi.in | 2 + + gcc/ipa-inline.cc | 141 ++++++++++++++++++- + gcc/lto-compress.cc | 6 +- + gcc/lto-section-in.cc | 5 + + gcc/lto-streamer-out.cc | 7 +- + gcc/lto-wrapper.cc | 4 + + gcc/optc-save-gen.awk | 57 ++++++++ + gcc/opth-gen.awk | 3 + + gcc/opts.cc | 46 ++++++ + gcc/target.def | 10 ++ + gcc/testsuite/gcc.dg/lto/binary-inline-1_0.c | 15 ++ + gcc/testsuite/gcc.dg/lto/binary-inline-1_1.c | 6 + + gcc/testsuite/gcc.dg/lto/binary-inline-2_0.c | 15 ++ + gcc/testsuite/gcc.dg/lto/binary-inline-2_1.c | 5 + + gcc/testsuite/gcc.dg/lto/binary-inline-3_0.c | 15 ++ + gcc/testsuite/gcc.dg/lto/binary-inline-3_1.c | 10 ++ + gcc/tree-streamer-in.cc | 58 +++++++- + lto-plugin/lto-plugin.c | 83 +++++++++++ + 21 files changed, 547 insertions(+), 8 deletions(-) + create mode 100644 gcc/testsuite/gcc.dg/lto/binary-inline-1_0.c + create mode 100644 gcc/testsuite/gcc.dg/lto/binary-inline-1_1.c + create mode 100644 gcc/testsuite/gcc.dg/lto/binary-inline-2_0.c + create mode 100644 gcc/testsuite/gcc.dg/lto/binary-inline-2_1.c + create mode 100644 gcc/testsuite/gcc.dg/lto/binary-inline-3_0.c + create mode 100644 gcc/testsuite/gcc.dg/lto/binary-inline-3_1.c + +diff --git a/gcc/common.opt b/gcc/common.opt +index be5fcc681..78cfc333a 100644 +--- a/gcc/common.opt ++++ b/gcc/common.opt +@@ -1928,6 +1928,21 @@ finline-atomics + Common Var(flag_inline_atomics) Init(1) Optimization + Inline __atomic operations when a lock free instruction sequence is available. + ++fmulti-version-lib= ++Common Joined Var(multi_version_lib_string) ++Use specify LTO stream in mode for specified target (object or lib). If there ++are multiple target files, use commas (,) to separate them and without spaces. ++ ++finline-force ++Common Var(flag_inline_force) Init(0) Optimization ++Force perform ipa inline when march options are incompatible between functions. ++ ++finline-force= ++Common Joined Var(force_inline_targets_string) ++Force perform ipa inline specified target(object or lib) when march options are ++incompatible between functions. If there are multiple target files, use commas ++(,) to separate them and without spaces. ++ + fcf-protection + Common RejectNegative Alias(fcf-protection=,full) + +@@ -2168,6 +2183,11 @@ flto-partition= + Common Joined RejectNegative Enum(lto_partition_model) Var(flag_lto_partition) Init(LTO_PARTITION_BALANCED) + Specify the algorithm to partition symbols and vars at linktime. + ++flto-compression-algorithm= ++Common Joined Var(lto_compression_algorithm) ++-flto-compression-algorithm=<format> Generate lto compression in zlib/zstd ++format <format>. ++ + ; The initial value of -1 comes from Z_DEFAULT_COMPRESSION in zlib.h. + flto-compression-level= + Common Joined RejectNegative UInteger Var(flag_lto_compression_level) Init(-1) IntegerRange(0, 19) +diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc +index 025a3c478..f095f17aa 100644 +--- a/gcc/config/aarch64/aarch64.cc ++++ b/gcc/config/aarch64/aarch64.cc +@@ -20829,6 +20829,44 @@ aarch64_option_print (FILE *file, int indent, struct cl_target_option *ptr) + arch->name, extension.c_str ()); + } + ++/* Implement TARGET_OPTION_PRINT_DIFF. */ ++ ++static void ++aarch64_option_print_diff (FILE *file, int indent, ++ struct cl_target_option *ptr1, ++ struct cl_target_option *ptr2) ++{ ++ const char *const cpu1 ++ = aarch64_get_tune_cpu (ptr1->x_selected_tune)->name; ++ const struct processor *arch1 = aarch64_get_arch (ptr1->x_selected_arch); ++ std::string extension1 ++ = aarch64_get_extension_string_for_isa_flags (ptr1->x_aarch64_isa_flags, ++ arch1->flags); ++ ++ const char *const cpu2 ++ = aarch64_get_tune_cpu (ptr2->x_selected_tune)->name; ++ const struct processor *arch2 = aarch64_get_arch (ptr2->x_selected_arch); ++ std::string extension2 ++ = aarch64_get_extension_string_for_isa_flags (ptr2->x_aarch64_isa_flags, ++ arch2->flags); ++ ++ if (cpu1 != cpu2 && (!cpu1 || !cpu2 || strcmp (cpu1, cpu2))) ++ fprintf (file, "%*s%s (%s/%s)\n", indent, "", ++ "cpu", cpu1 ? cpu1 : "(null)", cpu2 ? cpu2 : "(null)"); ++ ++ if (arch1->name != arch2->name ++ && (!arch1->name || !arch2->name || strcmp (arch1->name, arch2->name))) ++ fprintf (file, "%*s%s (%s/%s)\n", indent, "", ++ "arch", arch1->name ? arch1->name : "(null)", ++ arch2->name ? arch2->name : "(null)"); ++ ++ if (extension1 != extension2) ++ fprintf (file, "%*s%s (%s/%s)\n", indent, "", ++ "extension", ++ extension1.empty () ? "(null)" : extension1.c_str (), ++ extension2.empty () ? "(null)" : extension2.c_str ()); ++} ++ + static GTY(()) tree aarch64_previous_fndecl; + + void +@@ -31161,6 +31199,9 @@ aarch64_libgcc_floating_mode_supported_p + #undef TARGET_OPTION_PRINT + #define TARGET_OPTION_PRINT aarch64_option_print + ++#undef TARGET_OPTION_PRINT_DIFF ++#define TARGET_OPTION_PRINT_DIFF aarch64_option_print_diff ++ + #undef TARGET_OPTION_VALID_ATTRIBUTE_P + #define TARGET_OPTION_VALID_ATTRIBUTE_P aarch64_option_valid_attribute_p + +diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi +index 1e96521e6..50bbbbc42 100644 +--- a/gcc/doc/tm.texi ++++ b/gcc/doc/tm.texi +@@ -10589,6 +10589,12 @@ information in the @code{struct cl_target_option} structure for + function-specific options. + @end deftypefn + ++@deftypefn {Target Hook} void TARGET_OPTION_PRINT_DIFF (FILE *@var{file}, int @var{indent}, struct cl_target_option *@var{ptr1}, struct cl_target_option *@var{ptr2}) ++This hook is called to print diff additional target-specific ++information in the ptr1 and ptr2 @code{struct cl_target_option} structure for ++function-specific options. ++@end deftypefn ++ + @deftypefn {Target Hook} bool TARGET_OPTION_PRAGMA_PARSE (tree @var{args}, tree @var{pop_target}) + This target hook parses the options for @code{#pragma GCC target}, which + sets the target-specific options for functions that occur later in the +diff --git a/gcc/doc/tm.texi.in b/gcc/doc/tm.texi.in +index 2dd515659..cfda60304 100644 +--- a/gcc/doc/tm.texi.in ++++ b/gcc/doc/tm.texi.in +@@ -6985,6 +6985,8 @@ on this implementation detail. + + @hook TARGET_OPTION_PRINT + ++@hook TARGET_OPTION_PRINT_DIFF ++ + @hook TARGET_OPTION_PRAGMA_PARSE + + @hook TARGET_OPTION_OVERRIDE +diff --git a/gcc/ipa-inline.cc b/gcc/ipa-inline.cc +index f8bb072c4..8d5cc9a84 100644 +--- a/gcc/ipa-inline.cc ++++ b/gcc/ipa-inline.cc +@@ -90,6 +90,8 @@ along with GCC; see the file COPYING3. If not see + the need for offline copy of the function. */ + + #include "config.h" ++#define INCLUDE_SET ++#define INCLUDE_STRING + #include "system.h" + #include "coretypes.h" + #include "backend.h" +@@ -127,6 +129,7 @@ typedef fibonacci_node <sreal, cgraph_edge> edge_heap_node_t; + static int overall_size; + static profile_count max_count; + static profile_count spec_rem; ++static std::set<std::string> force_inline_targets; + + /* Return false when inlining edge E would lead to violating + limits on function unit growth or stack usage growth. +@@ -222,6 +225,38 @@ caller_growth_limits (struct cgraph_edge *e) + return true; + } + ++/* Warn and prompt the user, and output only once for the file pair where ++ the function is located. */ ++ ++static void ++prompt_inline_failed_target_option_reason (struct cgraph_edge *e) ++{ ++ static std::set<std::pair<void*, void*>> address_pair_set; ++ if (e->inline_failed == CIF_TARGET_OPTION_MISMATCH ++ && !cl_target_option_eq_major (target_opts_for_fn (e->caller->decl), ++ target_opts_for_fn (e->callee->ultimate_alias_target ()->decl)) ++ && e->caller->lto_file_data ++ && e->callee->ultimate_alias_target ()->lto_file_data) ++ { ++ std::pair<void*, void*> addr_pair ++ = std::make_pair (&e->caller->lto_file_data, ++ &e->callee->ultimate_alias_target ()->lto_file_data); ++ if (address_pair_set.find (addr_pair) != address_pair_set.end ()) ++ return; ++ ++ address_pair_set.insert (addr_pair); ++ warning (0, "LTO objects caller in: %s, callee in: %s, not inlinable: %s." ++ " Try to use -finline-force=callee_object_or_lib_name to force " ++ "inline", e->caller->lto_file_data->file_name, ++ e->callee->ultimate_alias_target ()->lto_file_data->file_name, ++ cgraph_inline_failed_string (CIF_TARGET_OPTION_MISMATCH)); ++ ++ cl_target_option_print_diff ++ (stderr, 2, target_opts_for_fn (e->caller->decl), ++ target_opts_for_fn (e->callee->ultimate_alias_target ()->decl)); ++ } ++} ++ + /* Dump info about why inlining has failed. */ + + static void +@@ -254,6 +289,8 @@ report_inline_failed_reason (struct cgraph_edge *e) + (dump_file, 2, opts_for_fn (e->caller->decl), + opts_for_fn (e->callee->ultimate_alias_target ()->decl)); + } ++ ++ prompt_inline_failed_target_option_reason (e); + } + + /* Decide whether sanitizer-related attributes allow inlining. */ +@@ -310,6 +347,77 @@ sanitize_attrs_match_for_inline_p (const_tree caller, const_tree callee) + (opts_for_fn (caller->decl)->x_##flag \ + != opts_for_fn (callee->decl)->x_##flag) + ++/* find related node that has lto_file_data. */ ++ ++static cgraph_node * ++find_related_node_lto_file_data (cgraph_node *node) ++{ ++ cgraph_node *cur = node; ++ ++ while (cur->clone_of) ++ { ++ /* Switch to original node, for example xxx.constprop.x function. */ ++ cur = cur->clone_of; ++ if (cur->lto_file_data) ++ return cur; ++ ++ /* Find the lto_file_data information of referring. */ ++ struct ipa_ref *ref = NULL; ++ for (int i = 0; cur->iterate_referring (i, ref); i++) ++ { ++ struct cgraph_node *cnode = dyn_cast <cgraph_node *> (ref->referring); ++ if (cnode && cnode->lto_file_data) ++ return cnode; ++ } ++ } ++ ++ return NULL; ++} ++ ++/* Determines whether to force inline or force inline only the specified ++ object. Use for 3 inline extensions: ++ 1) CIF_TARGET_OPTION_MISMATCH: cancel the restriction that the target options ++ of different compilation units are different. ++ 2) CIF_OVERWRITABLE: indicates that the function is available, which is ++ similar to the "inline" keyword indication. ++ 3) CIF_OPTIMIZATION_MISMATCH: cancel the check in the case of fp_expressions, ++ which is similar to the "always_inline" attribute. ++ */ ++ ++static bool ++can_force_inline_p (cgraph_node *callee) ++{ ++ if (!in_lto_p) ++ return false; ++ if (flag_inline_force) ++ return true; ++ if (force_inline_targets_string) ++ { ++ cgraph_node * node = callee; ++ std::string name = ""; ++ if (callee->ultimate_alias_target () == NULL ++ || callee->ultimate_alias_target ()->lto_file_data == NULL) ++ { ++ node = find_related_node_lto_file_data (callee); ++ if (node && node->lto_file_data) ++ name = node->lto_file_data->file_name; ++ } ++ else ++ name = node->ultimate_alias_target ()->lto_file_data->file_name; ++ while (!name.empty () && name.back () == '/') ++ name.erase (name.length () - 1); ++ if (name.empty ()) ++ return false; ++ size_t last_slash_pos = name.find_last_of ('/'); ++ if (last_slash_pos != std::string::npos ++ && last_slash_pos != name.length () - 1) ++ name = name.substr (last_slash_pos + 1); ++ if (force_inline_targets.find (name) != force_inline_targets.end ()) ++ return true; ++ } ++ return false; ++} ++ + /* Decide if we can inline the edge and possibly update + inline_failed reason. + We check whether inlining is possible at all and whether +@@ -352,7 +460,7 @@ can_inline_edge_p (struct cgraph_edge *e, bool report, + e->inline_failed = CIF_USES_COMDAT_LOCAL; + inlinable = false; + } +- else if (avail <= AVAIL_INTERPOSABLE) ++ else if (avail <= AVAIL_INTERPOSABLE && !can_force_inline_p (callee)) + { + e->inline_failed = CIF_OVERWRITABLE; + inlinable = false; +@@ -378,8 +486,8 @@ can_inline_edge_p (struct cgraph_edge *e, bool report, + inlinable = false; + } + /* Check compatibility of target optimization options. */ +- else if (!targetm.target_option.can_inline_p (caller->decl, +- callee->decl)) ++ else if (!can_force_inline_p (callee) ++ && !targetm.target_option.can_inline_p (caller->decl, callee->decl)) + { + e->inline_failed = CIF_TARGET_OPTION_MISMATCH; + inlinable = false; +@@ -495,7 +603,8 @@ can_inline_edge_by_limits_p (struct cgraph_edge *e, bool report, + bool always_inline = + (DECL_DISREGARD_INLINE_LIMITS (callee->decl) + && lookup_attribute ("always_inline", +- DECL_ATTRIBUTES (callee->decl))); ++ DECL_ATTRIBUTES (callee->decl))) ++ || can_force_inline_p (callee); + ipa_fn_summary *caller_info = ipa_fn_summaries->get (caller); + ipa_fn_summary *callee_info = ipa_fn_summaries->get (callee); + +@@ -2652,6 +2761,27 @@ flatten_remove_node_hook (struct cgraph_node *node, void *data) + removed->add (node); + } + ++/* Parse string that specify forced inlining, separated by commas. */ ++ ++static void ++parse_force_inline_targets_string (const char* s) ++{ ++ std::string target_string (s); ++ std::string delim = ","; ++ size_t start = 0; ++ size_t end = target_string.find (delim); ++ if (target_string.substr (start, end - start) == "") ++ return; ++ ++ while (end != std::string::npos) ++ { ++ force_inline_targets.insert (target_string.substr (start, end - start)); ++ start = end + delim.size (); ++ end = target_string.find (delim, start); ++ } ++ force_inline_targets.insert (target_string.substr (start, end - start)); ++} ++ + /* Decide on the inlining. We do so in the topological order to avoid + expenses on updating data structures. */ + +@@ -2665,6 +2795,9 @@ ipa_inline (void) + int cold; + bool remove_functions = false; + ++ if (force_inline_targets_string) ++ parse_force_inline_targets_string (force_inline_targets_string); ++ + order = XCNEWVEC (struct cgraph_node *, symtab->cgraph_count); + + if (dump_file) +diff --git a/gcc/lto-compress.cc b/gcc/lto-compress.cc +index 27f0992a8..f9d0722a9 100644 +--- a/gcc/lto-compress.cc ++++ b/gcc/lto-compress.cc +@@ -305,7 +305,11 @@ void + lto_end_compression (struct lto_compression_stream *stream) + { + #ifdef HAVE_ZSTD_H +- lto_compression_zstd (stream); ++ if (lto_compression_algorithm ++ && strcmp (lto_compression_algorithm, "zstd") == 0) ++ lto_compression_zstd (stream); ++ else ++ lto_compression_zlib (stream); + #else + lto_compression_zlib (stream); + #endif +diff --git a/gcc/lto-section-in.cc b/gcc/lto-section-in.cc +index ba87c7276..947f8eb15 100644 +--- a/gcc/lto-section-in.cc ++++ b/gcc/lto-section-in.cc +@@ -448,6 +448,11 @@ lto_free_function_in_decl_state_for_node (symtab_node *node) + lto_free_function_in_decl_state (*slot); + node->lto_file_data->function_decl_states->clear_slot (slot); + } ++ ++ /* In force inline case, keep lto file path information. */ ++ if (in_lto_p && (flag_inline_force || force_inline_targets_string)) ++ return; ++ + node->lto_file_data = NULL; + } + +diff --git a/gcc/lto-streamer-out.cc b/gcc/lto-streamer-out.cc +index 471f35c31..a574f0f1e 100644 +--- a/gcc/lto-streamer-out.cc ++++ b/gcc/lto-streamer-out.cc +@@ -2666,7 +2666,12 @@ produce_lto_section () + free (section_name); + + #ifdef HAVE_ZSTD_H +- lto_compression compression = ZSTD; ++ lto_compression compression = ZLIB; ++ if (lto_compression_algorithm ++ && strcmp (lto_compression_algorithm, "zstd") == 0) ++ compression = ZSTD; ++ else ++ compression = ZLIB; + #else + lto_compression compression = ZLIB; + #endif +diff --git a/gcc/lto-wrapper.cc b/gcc/lto-wrapper.cc +index 155ccce57..2b1994652 100644 +--- a/gcc/lto-wrapper.cc ++++ b/gcc/lto-wrapper.cc +@@ -491,6 +491,8 @@ merge_and_complain (vec<cl_decoded_option> &decoded_options, + || decoded_optionsj.opt_index == OPT_fpic) + { + /* -fno-pic in one unit implies -fno-pic everywhere. */ ++ /* The -fno-pic adjustment here should provide some information hints, ++ but may affect the use case test of deja. */ + if (decoded_optionsj.value == 0) + j++; + /* If we have no pic option or merge in -fno-pic, we still may turn +@@ -534,6 +536,8 @@ merge_and_complain (vec<cl_decoded_option> &decoded_options, + || decoded_optionsj.opt_index == OPT_fpie) + { + /* -fno-pie in one unit implies -fno-pie everywhere. */ ++ /* The -fno-pie adjustment here should provide some information hints, ++ but may affect the use case test of deja. */ + if (decoded_optionsj.value == 0) + j++; + /* If we have no pie option or merge in -fno-pie, we still preserve +diff --git a/gcc/optc-save-gen.awk b/gcc/optc-save-gen.awk +index 7c012dd4e..94b85b331 100644 +--- a/gcc/optc-save-gen.awk ++++ b/gcc/optc-save-gen.awk +@@ -1043,6 +1043,10 @@ for (i = 0; i < n_target_string; i++) { + print ""; + } + ++print ""; ++print " if (targetm.target_option.print_diff)"; ++print " targetm.target_option.print_diff (file, indent, ptr1, ptr2);"; ++ + print "}"; + + print ""; +@@ -1160,6 +1164,59 @@ print " return true;"; + + print "}"; + ++print ""; ++print "/* Compare two target major options. */"; ++print "bool"; ++print "cl_target_option_eq_major (struct cl_target_option const *ptr1 ATTRIBUTE_UNUSED,"; ++print " struct cl_target_option const *ptr2 ATTRIBUTE_UNUSED)"; ++print "{"; ++n_target_val_major = 0; ++ ++for (i = 0; i < n_target_save; i++) { ++ var = target_save_decli; ++ sub (" *=.*", "", var); ++ name = var; ++ type = var; ++ sub("^.* *", "", name) ++ sub(" *" name "$", "", type) ++ if (target_save_decli ~ "^const char \\*+_" alnum "+$") ++ continue; ++ if (target_save_decli ~ " .*\\.+\\+$") ++ continue; ++ ++ var_target_val_majorn_target_val_major++ = name; ++} ++if (have_save) { ++ for (i = 0; i < n_opts; i++) { ++ if (flag_set_p("Save", flagsi)) { ++ name = var_name(flagsi) ++ if(name == "") ++ name = "target_flags"; ++ ++ if(name in var_list_seen) ++ continue; ++ ++ var_list_seenname++; ++ otype = var_type_struct(flagsi) ++ if (otype ~ "^const char \\**$") ++ continue; ++ var_target_val_majorn_target_val_major++ = "x_" name; ++ } ++ } ++} else { ++ var_target_val_majorn_target_val_major++ = "x_target_flags"; ++} ++ ++for (i = 0; i < n_target_val_major; i++) { ++ name = var_target_val_majori ++ print " if (ptr1->" name" != ptr2->" name ")"; ++ print " return false;"; ++} ++ ++print " return true;"; ++ ++print "}"; ++ + print ""; + print "/* Hash target options */"; + print "hashval_t"; +diff --git a/gcc/opth-gen.awk b/gcc/opth-gen.awk +index 8bba8ec45..cb016e85d 100644 +--- a/gcc/opth-gen.awk ++++ b/gcc/opth-gen.awk +@@ -330,6 +330,9 @@ print ""; + print "/* Compare two target option variables from a structure. */"; + print "extern bool cl_target_option_eq (const struct cl_target_option *, const struct cl_target_option *);"; + print ""; ++print "/* Compare two target major option variables from a structure. */"; ++print "extern bool cl_target_option_eq_major (const struct cl_target_option *, const struct cl_target_option *);"; ++print ""; + print "/* Free heap memory used by target option variables. */"; + print "extern void cl_target_option_free (struct cl_target_option *);"; + print ""; +diff --git a/gcc/opts.cc b/gcc/opts.cc +index d97f6079f..d9de8747c 100644 +--- a/gcc/opts.cc ++++ b/gcc/opts.cc +@@ -2611,6 +2611,32 @@ print_help (struct gcc_options *opts, unsigned int lang_mask, + lang_mask); + } + ++/* Checks whether the input forced inline string complies with the ++ restriction. */ ++ ++void ++check_force_inline_targets_string (const char *arg, location_t loc) ++{ ++ const int MAX_FORCE_INLINE_TARGET_LEN = 10000; ++ const int MAX_NUM_TARGET = 100; ++ __SIZE_TYPE__ length = strlen (arg); ++ int target_num = 1; ++ if (length > MAX_FORCE_INLINE_TARGET_LEN) ++ error_at (loc, ++ "input string exceeds %d characters to %<-finline_force=%> " ++ "option: %qs", MAX_FORCE_INLINE_TARGET_LEN, arg); ++ for (__SIZE_TYPE__ i = 0; i < length; i++) ++ { ++ if (argi == ',') ++ { ++ target_num++; ++ if (target_num > MAX_NUM_TARGET) ++ error_at (loc, "input target exceeds %d to %<-finline_force=%> " ++ "option: %qs", MAX_NUM_TARGET, arg); ++ } ++ } ++} ++ + /* Handle target- and language-independent options. Return zero to + generate an "unknown option" message. Only options that need + extra handling need to be listed here; if you simply want +@@ -2952,6 +2978,14 @@ common_handle_option (struct gcc_options *opts, + value / 2); + break; + ++ case OPT_finline_force: ++ opts->x_force_inline_targets_string = value ? "" : NULL; ++ break; ++ ++ case OPT_finline_force_: ++ check_force_inline_targets_string (arg, loc); ++ break; ++ + case OPT_finstrument_functions_exclude_function_list_: + add_comma_separated_to_vector + (&opts->x_flag_instrument_functions_exclude_functions, arg); +@@ -3226,6 +3260,18 @@ common_handle_option (struct gcc_options *opts, + "unrecognized argument to %<-flto=%> option: %qs", arg); + break; + ++ case OPT_flto_compression_algorithm_: ++ if (atoi (arg) == 0 ++ && strcmp (arg, "zlib") != 0 ++#ifdef HAVE_ZSTD_H ++ && strcmp (arg, "zstd") != 0 ++#endif ++ ) ++ error_at (loc, ++ "unrecognized argument to %<-flto-compression-algorithm=%> " ++ "option: %qs", arg); ++ break; ++ + case OPT_w: + dc->dc_inhibit_warnings = true; + break; +diff --git a/gcc/target.def b/gcc/target.def +index 7183f363d..142858fa3 100644 +--- a/gcc/target.def ++++ b/gcc/target.def +@@ -6644,6 +6644,16 @@ information in the @code{struct cl_target_option} structure for\n\ + function-specific options.", + void, (FILE *file, int indent, struct cl_target_option *ptr), NULL) + ++/* Function to print any extra target state from the target options ++ structure. */ ++DEFHOOK ++(print_diff, ++ "This hook is called to print diff additional target-specific\n\ ++information in the ptr1 and ptr2 @code{struct cl_target_option} structure for\n\ ++function-specific options.", ++ void, (FILE *file, int indent, struct cl_target_option *ptr1, ++ struct cl_target_option *ptr2), NULL) ++ + /* Function to parse arguments to be validated for #pragma target, and to + change the state if the options are valid. If the first argument is + NULL, the second argument specifies the default options to use. Return +diff --git a/gcc/testsuite/gcc.dg/lto/binary-inline-1_0.c b/gcc/testsuite/gcc.dg/lto/binary-inline-1_0.c +new file mode 100644 +index 000000000..0b5cd5953 +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/lto/binary-inline-1_0.c +@@ -0,0 +1,15 @@ ++/* { dg-lto-do link } */ ++/* { dg-require-effective-target shared } */ ++/* { dg-extra-ld-options {-shared -finline-force=c_lto_binary-inline-1_1.o} } */ ++/* { dg-lto-options {{-O3 -flto -march=armv8.2-a -fdump-ipa-inline-details}} } */ ++ ++extern double multi_op(float x); ++ ++double func_a (float x) ++{ ++ double res = 0; ++ res = multi_op (x); ++ return res; ++} ++ ++/* { dg-final { scan-wpa-ipa-dump "Inlined 1 calls" "inline" } } */ +diff --git a/gcc/testsuite/gcc.dg/lto/binary-inline-1_1.c b/gcc/testsuite/gcc.dg/lto/binary-inline-1_1.c +new file mode 100644 +index 000000000..8181384b7 +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/lto/binary-inline-1_1.c +@@ -0,0 +1,6 @@ ++/* { dg-options "-march=armv8.3-a+sve+f64mm+crc+crypto+fp16+i8mm+simd" } */ ++ ++double multi_op (float x) ++{ ++ return x * 2 + 10; ++} +diff --git a/gcc/testsuite/gcc.dg/lto/binary-inline-2_0.c b/gcc/testsuite/gcc.dg/lto/binary-inline-2_0.c +new file mode 100644 +index 000000000..e873937d3 +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/lto/binary-inline-2_0.c +@@ -0,0 +1,15 @@ ++/* { dg-lto-do link } */ ++/* { dg-require-effective-target shared } */ ++/* { dg-extra-ld-options {-shared -finline-force=c_lto_binary-inline-2_1.o} } */ ++/* { dg-lto-options {{-O3 -flto -fPIC -fdump-ipa-inline-details}} } */ ++ ++extern double multi_op(float x); ++ ++double func_a (float x) ++{ ++ double res = 0; ++ res = multi_op (x); ++ return res; ++} ++ ++/* { dg-final { scan-wpa-ipa-dump "Inlined 1 calls" "inline" } } */ +diff --git a/gcc/testsuite/gcc.dg/lto/binary-inline-2_1.c b/gcc/testsuite/gcc.dg/lto/binary-inline-2_1.c +new file mode 100644 +index 000000000..dc7c4fd9f +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/lto/binary-inline-2_1.c +@@ -0,0 +1,5 @@ ++ ++double multi_op (float x) ++{ ++ return x * 2 + 10; ++} +diff --git a/gcc/testsuite/gcc.dg/lto/binary-inline-3_0.c b/gcc/testsuite/gcc.dg/lto/binary-inline-3_0.c +new file mode 100644 +index 000000000..c78ba066d +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/lto/binary-inline-3_0.c +@@ -0,0 +1,15 @@ ++/* { dg-lto-do link } */ ++/* { dg-require-effective-target shared } */ ++/* { dg-extra-ld-options {-shared -finline-force=c_lto_binary-inline-3_1.o} } */ ++/* { dg-lto-options {{-O3 -flto -fdump-ipa-inline-details}} } */ ++ ++extern double multi_op(double x); ++ ++double func_a (double x) ++{ ++ double res = 0; ++ res = multi_op (x); ++ return res; ++} ++ ++/* { dg-final { scan-wpa-ipa-dump "Inlined 1 calls" "inline" } } */ +diff --git a/gcc/testsuite/gcc.dg/lto/binary-inline-3_1.c b/gcc/testsuite/gcc.dg/lto/binary-inline-3_1.c +new file mode 100644 +index 000000000..8b505fa0c +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/lto/binary-inline-3_1.c +@@ -0,0 +1,10 @@ ++/* { dg-options "-O2 -fno-math-errno" } */ ++ ++#include <math.h> ++ ++double multi_op (double x) ++{ ++ double a = 0; ++ a = sqrt (x); ++ return a * 2 + 10; ++} +diff --git a/gcc/tree-streamer-in.cc b/gcc/tree-streamer-in.cc +index a35a810f4..79f819ad8 100644 +--- a/gcc/tree-streamer-in.cc ++++ b/gcc/tree-streamer-in.cc +@@ -20,6 +20,9 @@ along with GCC; see the file COPYING3. If not see + <http://www.gnu.org/licenses/>. */ + + #include "config.h" ++#include <cstdio> ++#define INCLUDE_SET ++#define INCLUDE_STRING + #include "system.h" + #include "coretypes.h" + #include "backend.h" +@@ -36,6 +39,47 @@ along with GCC; see the file COPYING3. If not see + #include "asan.h" + #include "opts.h" + ++/* Parse string that specify forced inlining, separated by commas. */ ++static std::set<std::string> multi_version_libs; ++static void ++parse_multi_version_lib_string (const char* s) ++{ ++ std::string target_string (s); ++ std::string delim = ","; ++ size_t start = 0; ++ size_t end = target_string.find (delim); ++ if (target_string.substr (start, end - start) == "") ++ return; ++ ++ while (end != std::string::npos) ++ { ++ multi_version_libs.insert (target_string.substr (start, end - start)); ++ start = end + delim.size (); ++ end = target_string.find (delim, start); ++ } ++ multi_version_libs.insert (target_string.substr (start, end - start)); ++} ++ ++static bool ++target_lib_p (std::string name) ++{ ++ if (multi_version_libs.empty () && multi_version_lib_string) ++ parse_multi_version_lib_string (multi_version_lib_string); ++ if (multi_version_lib_string) ++ { ++ while (!name.empty () && name.back () == '/') ++ name.erase (name.length () - 1); ++ if (name.empty ()) ++ return false; ++ size_t last_slash_pos = name.find_last_of ('/'); ++ if (last_slash_pos != std::string::npos ++ && last_slash_pos != name.length () - 1) ++ name = name.substr (last_slash_pos + 1); ++ if (multi_version_libs.find (name) != multi_version_libs.end ()) ++ return true; ++ } ++ return false; ++} + + /* Read a STRING_CST from the string table in DATA_IN using input + block IB. */ +@@ -555,7 +599,12 @@ streamer_read_tree_bitfields (class lto_input_block *ib, + unpack_ts_translation_unit_decl_value_fields (data_in, &bp, expr); + + if (CODE_CONTAINS_STRUCT (code, TS_OPTIMIZATION)) +- cl_optimization_stream_in (data_in, &bp, TREE_OPTIMIZATION (expr)); ++ { ++ if (target_lib_p (data_in->file_data->file_name)) ++ cl_optimization_stream_in_prev (data_in, &bp, TREE_OPTIMIZATION (expr)); ++ else ++ cl_optimization_stream_in (data_in, &bp, TREE_OPTIMIZATION (expr)); ++ } + + if (CODE_CONTAINS_STRUCT (code, TS_CONSTRUCTOR)) + { +@@ -569,7 +618,12 @@ streamer_read_tree_bitfields (class lto_input_block *ib, + #ifndef ACCEL_COMPILER + if (CODE_CONTAINS_STRUCT (code, TS_TARGET_OPTION)) + { +- cl_target_option_stream_in (data_in, &bp, TREE_TARGET_OPTION (expr)); ++ if (target_lib_p (data_in->file_data->file_name)) ++ cl_target_option_stream_in_prev ( ++ data_in, &bp, TREE_TARGET_OPTION (expr)); ++ else ++ cl_target_option_stream_in (data_in, &bp, TREE_TARGET_OPTION (expr)); ++ + if (targetm.target_option.post_stream_in) + targetm.target_option.post_stream_in (TREE_TARGET_OPTION (expr)); + } +diff --git a/lto-plugin/lto-plugin.c b/lto-plugin/lto-plugin.c +index 33d49571d..b3301a8a4 100644 +--- a/lto-plugin/lto-plugin.c ++++ b/lto-plugin/lto-plugin.c +@@ -89,6 +89,10 @@ along with this program; see the file COPYING3. If not see + + #define LTO_SEGMENT_NAME "__GNU_LTO" + ++#define GCC_major_version 12 ++#define LTO_major_version GCC_major_version ++#define LTO_minor_version 0 ++ + /* Return true if STR string starts with PREFIX. */ + + static inline bool +@@ -118,6 +122,18 @@ struct plugin_symtab + unsigned long long id; + }; + ++/* Structure that represents LTO ELF section with information ++ about the format. */ ++ ++struct lto_section ++{ ++ int16_t major_version; ++ int16_t minor_version; ++ unsigned char slim_object; ++ unsigned char _padding; ++ uint16_t flags; ++}; ++ + /* Encapsulates object file data during symbol scan. */ + struct plugin_objfile + { +@@ -126,6 +142,7 @@ struct plugin_objfile + simple_object_read *objfile; + struct plugin_symtab *out; + const struct ld_plugin_input_file *file; ++ struct lto_section version; + }; + + /* All that we have to remember about a file. */ +@@ -216,6 +233,8 @@ static int gold_version = -1; + (in fact, only first letter of style arg is checked.) */ + static enum symbol_style sym_style = ss_none; + ++static bool multi_version_lto_parse = false; ++ + static void + check_1 (int gate, enum ld_plugin_level level, const char *text) + { +@@ -1078,6 +1097,59 @@ err: + return 0; + } + ++/* Process version section of an object file. */ ++ ++static int ++process_lto_version (void *data, const char *name, off_t offset, off_t length) ++{ ++ struct plugin_objfile *obj = (struct plugin_objfile *)data; ++ char *s; ++ char *secdatastart, *secdata; ++ ++ if (!startswith (name, ".gnu.lto_.lto")) ++ return 1; ++ ++ s = strrchr (name, '.'); ++ if (s) ++ sscanf (s, ".%" PRI_LL "x", &obj->out->id); ++ secdata = secdatastart = xmalloc (length); ++ offset += obj->file->offset; ++ if (offset != lseek (obj->file->fd, offset, SEEK_SET)) ++ goto err; ++ ++ do ++ { ++ ssize_t got = read (obj->file->fd, secdata, length); ++ if (got == 0) ++ break; ++ else if (got > 0) ++ { ++ secdata += got; ++ length -= got; ++ } ++ else if (errno != EINTR) ++ goto err; ++ } ++ while (length > 0); ++ if (length > 0) ++ goto err; ++ ++ struct lto_section *lto_info = (struct lto_section *)secdatastart; ++ obj->version = *lto_info; ++ ++ obj->found++; ++ free (secdatastart); ++ return 1; ++ ++err: ++ if (message) ++ message (LDPL_FATAL, "%s: corrupt object file", obj->file->name); ++ /* Force claim_file_handler to abandon this file. */ ++ obj->found = 0; ++ free (secdatastart); ++ return 0; ++} ++ + /* Process one section of an object file. */ + + static int +@@ -1223,6 +1295,15 @@ claim_file_handler (const struct ld_plugin_input_file *file, int *claimed) + if (obj.found == 0 && obj.offload == 0) + goto err; + ++ if (multi_version_lto_parse) ++ { ++ simple_object_find_sections (obj.objfile, process_lto_version, &obj, ++ &err); ++ if (obj.version.major_version != LTO_major_version ++ || obj.version.minor_version != LTO_minor_version) ++ goto err; ++ } ++ + if (obj.found > 1) + resolve_conflicts (<o_file.symtab, <o_file.conflicts); + +@@ -1366,6 +1447,8 @@ process_option (const char *option) + } + else if (startswith (option, "-ltrans-objects=")) + ltrans_objects = xstrdup (option + strlen ("-ltrans-objects=")); ++ else if (strcmp (option, "-multi-version-lto-parse") == 0) ++ multi_version_lto_parse = true; + else + { + int size; +-- +2.25.1 +
View file
_service:tar_scm:0305-Backport-varasm-Handle-private-COMDAT-function-symbo.patch
Added
@@ -0,0 +1,296 @@ +From bbb4954294d010977fcfb96931384101cf015a44 Mon Sep 17 00:00:00 2001 +From: Jakub Jelinek <jakub@redhat.com> +Date: Mon, 26 Feb 2024 17:55:07 +0100 +Subject: PATCH Backportvarasm: Handle private COMDAT function symbol + reference in readonly data section PR113617 + +If default_elf_select_rtx_section is called to put a reference to some +local symbol defined in a comdat section into memory, which happens more often +since the r14-4944 RA change, linking might fail. +default_elf_select_rtx_section puts such constants into .data.rel.ro.local +etc. sections and if linker chooses comdat sections from some other TU +and discards the one to which a relocation in .data.rel.ro.local remains, +linker diagnoses error. References to private comdat symbols can only appear +from functions or data objects in the same comdat group, so the following +patch arranges using .data.rel.ro.local.pool.<comdat_name> and similar sections. + +2024-02-26 Jakub Jelinek <jakub@redhat.com> + H.J. Lu <hjl.tools@gmail.com> + + PR rtl-optimization/113617 + * varasm.cc (default_elf_select_rtx_section): For + references to private symbols in comdat sections + use .data.relro.local.pool.<comdat>, .data.relro.pool.<comdat> + or .rodata.<comdat> comdat sections. + + * g++.dg/other/pr113617.C: New test. + * g++.dg/other/pr113617.h: New test. + * g++.dg/other/pr113617-aux.cc: New test. +--- + gcc/testsuite/g++.dg/other/pr113617-aux.cc | 9 ++ + gcc/testsuite/g++.dg/other/pr113617.C | 27 +++++ + gcc/testsuite/g++.dg/other/pr113617.h | 132 +++++++++++++++++++++ + gcc/varasm.cc | 48 +++++++- + 4 files changed, 215 insertions(+), 1 deletion(-) + create mode 100644 gcc/testsuite/g++.dg/other/pr113617-aux.cc + create mode 100644 gcc/testsuite/g++.dg/other/pr113617.C + create mode 100644 gcc/testsuite/g++.dg/other/pr113617.h + +diff --git a/gcc/testsuite/g++.dg/other/pr113617-aux.cc b/gcc/testsuite/g++.dg/other/pr113617-aux.cc +new file mode 100644 +index 000000000..e6900e05a +--- /dev/null ++++ b/gcc/testsuite/g++.dg/other/pr113617-aux.cc +@@ -0,0 +1,9 @@ ++// PR rtl-optimization/113617 ++// { dg-do link { target { c++17 && c++14_down } } } ++ ++#include "pr113617.h" ++ ++void qux() { ++ A<long long> a; ++ a.foo(0, 0); ++} +diff --git a/gcc/testsuite/g++.dg/other/pr113617.C b/gcc/testsuite/g++.dg/other/pr113617.C +new file mode 100644 +index 000000000..a02dda142 +--- /dev/null ++++ b/gcc/testsuite/g++.dg/other/pr113617.C +@@ -0,0 +1,27 @@ ++// PR rtl-optimization/113617 ++// { dg-do link { target c++11 } } ++// { dg-options "-O2" } ++// { dg-additional-options "-fPIC" { target fpic } } */ ++// { dg-additional-options "-shared" { target shared } } */ ++// { dg-additional-sources pr113617-aux.cc } ++ ++#include "pr113617.h" ++ ++int z; ++long xx1; ++void corge() { ++ A<long long> a; ++ a.foo(xx1, 0); ++} ++ ++typedef unsigned long int VV __attribute__((vector_size (2 * sizeof (long)))); ++VV vv; ++__attribute__((noipa)) static void fn1 (void) {} ++__attribute__((noipa)) static void fn2 (void) {} ++ ++void ++fn3 () ++{ ++ VV a = { (unsigned long) &fn1, (unsigned long) &fn2 }; ++ vv = a; ++} +diff --git a/gcc/testsuite/g++.dg/other/pr113617.h b/gcc/testsuite/g++.dg/other/pr113617.h +new file mode 100644 +index 000000000..4d30eddbc +--- /dev/null ++++ b/gcc/testsuite/g++.dg/other/pr113617.h +@@ -0,0 +1,132 @@ ++namespace { ++template <int V> struct J { static constexpr int value = V; }; ++template <bool V> using K = J<V>; ++using M = K<true>; ++template <int> struct L { template <typename _Tp, typename> using type = _Tp; }; ++template <bool _Cond, typename _If, typename _Else> using N = typename L<_Cond>::type<_If, _Else>; ++M k; ++template <typename _Tp> struct O { using type = _Tp; }; ++template <typename _Up> ++struct P : N<M ::value, O<_Up>, _Up> {}; ++template <typename _Tp> struct Q { using type = typename P<_Tp>::type; }; ++} ++namespace R { ++struct H; ++enum G {}; ++template <typename> class S; ++struct T { using U = bool (*) (H &, const H &, G); U F; }; ++template <typename, typename> class B; ++template <typename _R, typename _F, typename... _A> ++struct B<_R(_A...), _F> { ++ static bool F(H &, const H &, G) { return false; } ++ __attribute__((noipa)) static _R bar(const H &) {} ++}; ++template <typename _R, typename... _A> ++struct S<_R(_A...)> : T { ++ template <typename _F> using AH = B<_R(), _F>; ++ template <typename _F> S(_F) { ++ using AG = AH<_F>; ++ barr = AG::bar; ++ F = AG::F; ++ } ++ using AF = _R (*)(const H &); ++ AF barr; ++}; ++template <typename> class I; ++template <typename _F, typename... _B> ++struct I<_F(_B...)> {}; ++template <typename> using W = decltype(k); ++template <int, typename _F, typename... _B> struct V { ++ typedef I<typename Q<_F>::type(typename Q<_B>::type...)> type; ++}; ++template <typename _F, typename... _B> ++__attribute__((noipa)) typename V<W<_F>::value, _F, _B...>::type ++baz(_F, _B...) { return typename V<W<_F>::value, _F, _B...>::type (); } ++template <typename _Tp> struct AJ { ++ template <typename _Up> struct _Ptr { using type = _Up *; }; ++ using AI = typename _Ptr<_Tp>::type; ++}; ++template <typename _Tp> struct Y { ++ using AI = typename AJ<_Tp>::AI; ++ AI operator->(); ++}; ++} ++extern int z; ++namespace N1 { ++namespace N2 { ++namespace N3 { ++enum Z { Z1, Z2 }; ++template <int> struct X { ++ template <typename _F> ++ __attribute__((noipa)) void boo(long long, long long, long long, _F &) {} ++}; ++struct AC { ++ AC(int); ++ void m1(R::S<void()>); ++}; ++template <typename> ++__attribute__((noipa)) void garply(void *, long long, long long, long long) {} ++template <> ++template <typename _F> ++void X<Z2>::boo(long long, long long x, long long y, _F &fi) { ++ AC pool(z); ++ for (;;) { ++ auto job = R::baz(garply<_F>, &fi, y, y, x); ++ pool.m1(job); ++ } ++} ++struct AB { ++ static AB &bleh(); ++ template <typename _F> ++ void boo(long first, long x, long y, _F fi) { ++ switch (ab1) { ++ case Z1: ++ ab2->boo(first, x, y, fi); ++ case Z2: ++ ab3->boo(first, x, y, fi); ++ } ++ } ++ Z ab1; ++ R::Y<X<Z1>> ab2; ++ R::Y<X<Z2>> ab3; ++}; ++template <typename, bool> struct C; ++template <typename _F> struct C<_F, false> { ++ __attribute__((noipa)) C(_F) {} ++ void boo(long first, long x, long y) { ++ auto u = AB::bleh(); ++ u.boo(first, x, y, *this); ++ } ++}; ++template <typename _F> struct AA { typedef C<_F, 0> type; }; ++} ++} ++} ++struct AD { ++ template <typename _F> ++ static void boo(long first, long x, long y, _F f) { ++ typename N1::N2::N3::AA<_F>::type fi(f); ++ fi.boo(first, x, y); ++ } ++ template <typename _F> ++ static void boo(long first, long x, _F f) { ++ boo(first, x, 0, f); ++ } ++}; ++template <typename> struct A { ++ void foo(long long, long long); ++ int *c; ++}; ++namespace { ++template <typename> struct D { __attribute__((noipa)) D(int *) {} }; ++} ++template <typename T> ++void A<T>::foo(long long x, long long y) ++{ ++ int e; ++ D<T> d(&e); ++ AD::boo(0, y, d); ++ long p; ++ for (p = 0; p < x; p++) ++ cp = cp - 1; ++} +diff --git a/gcc/varasm.cc b/gcc/varasm.cc +index bae935694..d122730b5 100644 +--- a/gcc/varasm.cc ++++ b/gcc/varasm.cc +@@ -7317,17 +7317,63 @@ default_elf_select_rtx_section (machine_mode mode, rtx x, + unsigned HOST_WIDE_INT align) + { + int reloc = compute_reloc_for_rtx (x); ++ tree decl = nullptr; ++ const char *prefix = nullptr; ++ int flags = 0; ++ ++ /* If it is a private COMDAT function symbol reference, call ++ function_rodata_section for the read-only or relocated read-only ++ data section associated with function DECL so that the COMDAT ++ section will be used for the private COMDAT function symbol. */ ++ if (HAVE_COMDAT_GROUP) ++ { ++ if (GET_CODE (x) == CONST ++ && GET_CODE (XEXP (x, 0)) == PLUS ++ && CONST_INT_P (XEXP (XEXP (x, 0), 1))) ++ x = XEXP (XEXP (x, 0), 0); ++ ++ if (GET_CODE (x) == SYMBOL_REF) ++ { ++ decl = SYMBOL_REF_DECL (x); ++ if (decl ++ && (TREE_CODE (decl) != FUNCTION_DECL ++ || !DECL_COMDAT_GROUP (decl) ++ || TREE_PUBLIC (decl))) ++ decl = nullptr; ++ } ++ } + + /* ??? Handle small data here somehow. */ + + if (reloc & targetm.asm_out.reloc_rw_mask ()) + { +- if (reloc == 1) ++ if (decl) ++ { ++ prefix = reloc == 1 ? ".data.rel.ro.local" : ".data.rel.ro"; ++ flags = SECTION_WRITE | SECTION_RELRO; ++ } ++ else if (reloc == 1) + return get_named_section (NULL, ".data.rel.ro.local", 1); + else + return get_named_section (NULL, ".data.rel.ro", 3); + } + ++ if (decl) ++ { ++ const char *comdat = IDENTIFIER_POINTER (DECL_COMDAT_GROUP (decl)); ++ if (!prefix) ++ prefix = ".rodata"; ++ size_t prefix_len = strlen (prefix); ++ size_t comdat_len = strlen (comdat); ++ size_t len = prefix_len + sizeof (".pool.") + comdat_len; ++ char *name = XALLOCAVEC (char, len); ++ memcpy (name, prefix, prefix_len); ++ memcpy (name + prefix_len, ".pool.", sizeof (".pool.") - 1); ++ memcpy (name + prefix_len + sizeof (".pool.") - 1, comdat, ++ comdat_len + 1); ++ return get_section (name, flags | SECTION_LINKONCE, decl); ++ } ++ + return mergeable_constant_section (mode, align, 0); + } + +-- +2.33.0 +
View file
_service:tar_scm:0306-RISC-V-Install-libstdc-libcc1-etc-to-lib64-instead-o.patch
Added
@@ -0,0 +1,65 @@ +From 84edbc6544ed872aedb3cb6f6d0feb8647ff1d8b Mon Sep 17 00:00:00 2001 +From: YunQiang Su <yunqiang@isrc.iscas.ac.cn> +Date: Mon, 14 Oct 2024 10:09:46 +0800 +Subject: PATCH RISC-V: Install libstdc++/libcc1 etc to /lib64 instead of lib + +The problem is that if we are configured with `--disable-multilib`, + gcc -print-multi-os-directory +outputs + . +Thus the dest to install libraries is set to + /usr/lib/. +While other platforms (x86-64, arm64) it will be + /usr/lib/../lib64 +Let's sync riscv64 with them + +Another problem is that + gcc -print-file-name=libzstd.so.1 +will output + /usr/lib64/lp64d/../lib64/libzstd.so.1 +which is also need to patched. +--- + gcc/config.gcc | 3 +++ + gcc/config/riscv/linux.h | 2 ++ + gcc/config/riscv/t-openEuler | 2 ++ + 3 files changed, 7 insertions(+) + create mode 100644 gcc/config/riscv/t-openEuler + +diff --git a/gcc/config.gcc b/gcc/config.gcc +index 19b21a280..23c5bee2b 100644 +--- a/gcc/config.gcc ++++ b/gcc/config.gcc +@@ -2453,6 +2453,9 @@ riscv*-*-linux*) + xyes) tmake_file="${tmake_file} riscv/t-linux-multilib" ;; + *) echo "Unknown value for enable_multilib"; exit 1 + esac ++ case "x${target_vendor}" in ++ xopenEuler) tmake_file="${tmake_file} riscv/t-openEuler" ++ esac + tmake_file="${tmake_file} riscv/t-riscv riscv/t-linux" + gnu_ld=yes + gas=yes +diff --git a/gcc/config/riscv/linux.h b/gcc/config/riscv/linux.h +index b5c6c5027..a8d65f4e0 100644 +--- a/gcc/config/riscv/linux.h ++++ b/gcc/config/riscv/linux.h +@@ -62,6 +62,8 @@ along with GCC; see the file COPYING3. If not see + #define TARGET_ASM_FILE_END file_end_indicate_exec_stack + + #define STARTFILE_PREFIX_SPEC \ ++ "/lib" XLEN_SPEC "/ " \ ++ "/usr/lib" XLEN_SPEC "/ " \ + "/lib" XLEN_SPEC "/" ABI_SPEC "/ " \ + "/usr/lib" XLEN_SPEC "/" ABI_SPEC "/ " \ + "/lib/ " \ +diff --git a/gcc/config/riscv/t-openEuler b/gcc/config/riscv/t-openEuler +new file mode 100644 +index 000000000..26541dd08 +--- /dev/null ++++ b/gcc/config/riscv/t-openEuler +@@ -0,0 +1,2 @@ ++MULTILIB_OPTIONS = mabi=lp64d ++MULTILIB_DIRNAMES = ../lib64 +-- +2.39.5 (Apple Git-154) +
View file
_service:tar_scm:0307-Set-fallback-value-for-print-multi-os-directory.patch
Added
@@ -0,0 +1,105 @@ +From 0d157b14f361f8319f4694c54c6e01ac8f59d278 Mon Sep 17 00:00:00 2001 +From: YunQiang Su <yunqiang@isrc.iscas.ac.cn> +Date: Tue, 8 Oct 2024 17:56:23 +0800 +Subject: PATCH 1/2 Set fallback value for -print-multi-os-directory + +Clang doesn't support -print-multi-os-directory option. +So let's set the fallback value (../lib64) if it is empty. + +This is only needed for the projects built by hostcc: + gcc, libcc1, libiberty + +The projects for targets only, will always built by gcc itself. +--- + gcc/configure | 3 +++ + libcc1/configure | 6 ++++++ + libcc1/configure.ac | 3 +++ + libiberty/Makefile.in | 5 ++++- + libtool.m4 | 3 +++ + 5 files changed, 19 insertions(+), 1 deletion(-) + +diff --git a/gcc/configure b/gcc/configure +index 7e64599b0..ef0449edd 100755 +--- a/gcc/configure ++++ b/gcc/configure +@@ -18598,6 +18598,9 @@ if test "$GCC" = yes; then + # and add multilib dir if necessary. + lt_tmp_lt_search_path_spec= + lt_multi_os_dir=`$CC $CPPFLAGS $CFLAGS $LDFLAGS -print-multi-os-directory 2>/dev/null` ++ if -z "$lt_multi_os_dir" ;then ++ lt_multi_os_dir=../lib64 ++ fi + for lt_sys_path in $lt_search_path_spec; do + if test -d "$lt_sys_path/$lt_multi_os_dir"; then + lt_tmp_lt_search_path_spec="$lt_tmp_lt_search_path_spec $lt_sys_path/$lt_multi_os_dir" +diff --git a/libcc1/configure b/libcc1/configure +index 01cfb2806..3c437d690 100755 +--- a/libcc1/configure ++++ b/libcc1/configure +@@ -9701,6 +9701,9 @@ if test "$GCC" = yes; then + # and add multilib dir if necessary. + lt_tmp_lt_search_path_spec= + lt_multi_os_dir=`$CC $CPPFLAGS $CFLAGS $LDFLAGS -print-multi-os-directory 2>/dev/null` ++ if -z "$lt_multi_os_dir" ;then ++ lt_multi_os_dir=../lib64 ++ fi + for lt_sys_path in $lt_search_path_spec; do + if test -d "$lt_sys_path/$lt_multi_os_dir"; then + lt_tmp_lt_search_path_spec="$lt_tmp_lt_search_path_spec $lt_sys_path/$lt_multi_os_dir" +@@ -14865,6 +14868,9 @@ libsuffix= + if test "$GXX" = yes; then + libsuffix=`$CXX -print-multi-os-directory` + fi ++if -z "$libsuffix" ;then ++ libsuffix=../lib64 ++fi + + + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for socket libraries" >&5 +diff --git a/libcc1/configure.ac b/libcc1/configure.ac +index 36f5a7e09..acd7c4c04 100644 +--- a/libcc1/configure.ac ++++ b/libcc1/configure.ac +@@ -72,6 +72,9 @@ libsuffix= + if test "$GXX" = yes; then + libsuffix=`$CXX -print-multi-os-directory` + fi ++if -z "$libsuffix" ;then ++ libsuffix=../lib64 ++fi + AC_SUBST(libsuffix) + + dnl Test for -lsocket and -lnsl. Copied from libgo/configure.ac. +diff --git a/libiberty/Makefile.in b/libiberty/Makefile.in +index 1b17c2e3a..2bfa00de5 100644 +--- a/libiberty/Makefile.in ++++ b/libiberty/Makefile.in +@@ -385,7 +385,10 @@ install-strip: install + # multilib-specific flags, it's overridden by FLAGS_TO_PASS from the + # default multilib, so we have to take CFLAGS into account as well, + # since it will be passed the multilib flags. +-MULTIOSDIR = `$(CC) $(CFLAGS) -print-multi-os-directory` ++MULTIOSDIR = `$(CC) $(CFLAGS) -print-multi-os-directory 2>/dev/null` ++ifeq ($(MULTIOSDIR),) ++ MULTIOSDIR = ../lib64 ++endif + install_to_libdir: all + if test -n "${target_header_dir}"; then \ + ${mkinstalldirs} $(DESTDIR)$(libdir)/$(MULTIOSDIR); \ +diff --git a/libtool.m4 b/libtool.m4 +index 17f8e5f30..86fc1e705 100644 +--- a/libtool.m4 ++++ b/libtool.m4 +@@ -2059,6 +2059,9 @@ if test "$GCC" = yes; then + # and add multilib dir if necessary. + lt_tmp_lt_search_path_spec= + lt_multi_os_dir=`$CC $CPPFLAGS $CFLAGS $LDFLAGS -print-multi-os-directory 2>/dev/null` ++ if -z "$lt_multi_os_dir" ;then ++ lt_multi_os_dir=../lib64 ++ fi + for lt_sys_path in $lt_search_path_spec; do + if test -d "$lt_sys_path/$lt_multi_os_dir"; then + lt_tmp_lt_search_path_spec="$lt_tmp_lt_search_path_spec $lt_sys_path/$lt_multi_os_dir" +-- +2.47.0 +
View file
_service:tar_scm:0308-Fix-enum-INPUT-MIDDLE-FINAL-aes_stage.patch
Added
@@ -0,0 +1,108 @@ +From 1624bdceb341e0034c22ce46bc2e422726f76cce Mon Sep 17 00:00:00 2001 +From: YunQiang Su <yunqiang@isrc.iscas.ac.cn> +Date: Tue, 8 Oct 2024 17:59:56 +0800 +Subject: PATCH 2/2 Fix enum { INPUT, MIDDLE, FINAL } aes_stage + +The FINAL is defined in ansidecl.h. +Let's rename the elements to + aesINPUT, aseMIDDLE, aseFINAL +to avoid conflits. + +I find this problem when trying to build gcc with clang. +In fact FINAL is defined to empty for clang, and `final` for gcc. +So it coincidentally worked for gcc. +--- + gcc/crypto-accel.cc | 28 ++++++++++++++-------------- + 1 file changed, 14 insertions(+), 14 deletions(-) + +diff --git a/gcc/crypto-accel.cc b/gcc/crypto-accel.cc +index e7766a585..716c4a38b 100644 +--- a/gcc/crypto-accel.cc ++++ b/gcc/crypto-accel.cc +@@ -1251,7 +1251,7 @@ public: + + /* AES stage description. Required for some specializations + for curtain rounds. */ +-typedef enum { INPUT, MIDDLE, FINAL } aes_stage; ++typedef enum { aesINPUT, aesMIDDLE, aesFINAL } aes_stage; + + /* AES entity description. It can be both round or state inside round. + It provides interface for unified analysis between blocks of 4 parts: +@@ -1356,7 +1356,7 @@ struct state_input + + /* Input round state uses special input. */ + template<> +-struct state_input<INPUT> ++struct state_input<aesINPUT> + { + typedef std::pair<rtx, unsigned HOST_WIDE_INT> type; + +@@ -1389,7 +1389,7 @@ struct state_output + + /* Final round state generates special output. */ + template<> +-struct state_output<FINAL> ++struct state_output<aesFINAL> + { + typedef std::pair<rtx, unsigned HOST_WIDE_INT> type; + +@@ -1409,7 +1409,7 @@ struct round_input + + /* Input round uses special input just as its state. */ + template<> +-struct round_input<INPUT> ++struct round_input<aesINPUT> + { + typedef std::pair<rtx, unsigned HOST_WIDE_INT> type; + }; +@@ -1437,7 +1437,7 @@ struct round_output + AES encryption. */ + template<> + template<> +-void round_output<INPUT>::reorder<aes_decrypt_table> (type &out) ++void round_output<aesINPUT>::reorder<aes_decrypt_table> (type &out) + { + gcc_assert (out.size () == 4); + std::swap (out1, out3); +@@ -1445,14 +1445,14 @@ void round_output<INPUT>::reorder<aes_decrypt_table> (type &out) + + template<> + template<> +-void round_output<MIDDLE>::reorder<aes_decrypt_table> (type &out) ++void round_output<aesMIDDLE>::reorder<aes_decrypt_table> (type &out) + { +- round_output<INPUT>::reorder<aes_decrypt_table> (out); ++ round_output<aesINPUT>::reorder<aes_decrypt_table> (out); + } + + /* Final round generates special output. */ + template<> +-struct round_output<FINAL> : state_output<FINAL> ++struct round_output<aesFINAL> : state_output<aesFINAL> + { + template<typename T> + static void finalize (type &out, const T &v) +@@ -1644,14 +1644,14 @@ public: + typedef std::map<rtx_insn *, aes_table_ref<T> > table_ref_map; + + /* AES states typedefs. */ +- typedef aes_state<input_info, INPUT, T> aes_input_state; +- typedef aes_state<round_input_info<T>, MIDDLE, T> aes_body_state; +- typedef aes_state<round_input_info<T>, FINAL, T> aes_final_state; ++ typedef aes_state<input_info, aesINPUT, T> aes_input_state; ++ typedef aes_state<round_input_info<T>, aesMIDDLE, T> aes_body_state; ++ typedef aes_state<round_input_info<T>, aesFINAL, T> aes_final_state; + + /* AES rounds typedefs. */ +- typedef aes_round<input_info, INPUT, T> aes_input_round; +- typedef aes_round<round_input_info<T>, MIDDLE, T> aes_body_round; +- typedef aes_round<round_input_info<T>, FINAL, T> aes_final_round; ++ typedef aes_round<input_info, aesINPUT, T> aes_input_round; ++ typedef aes_round<round_input_info<T>, aesMIDDLE, T> aes_body_round; ++ typedef aes_round<round_input_info<T>, aesFINAL, T> aes_final_round; + + bool run (); + +-- +2.47.0 +
View file
_service:tar_scm:Fix-indentation-and-numbering-errors.diff
Added
@@ -0,0 +1,205 @@ +diff --git a/libphobos/libdruntime/Makefile.in b/libphobos/libdruntime/Makefile.in +index 91cd653623b..b686f5eb492 100644 +--- a/libphobos/libdruntime/Makefile.in ++++ b/libphobos/libdruntime/Makefile.in +@@ -124,13 +124,13 @@ target_triplet = @target@ + # CPU specific sources + @DRUNTIME_CPU_AARCH64_TRUE@am__append_11 = config/aarch64/switchcontext.S + @DRUNTIME_CPU_ARM_TRUE@am__append_12 = config/arm/switchcontext.S +-@DRUNTIME_CPU_LOONGARCH_TRUE@am__append_13 = config/loongarch/switchcontext.S +-@DRUNTIME_CPU_MIPS_TRUE@am__append_14 = config/mips/switchcontext.S +-@DRUNTIME_CPU_POWERPC_TRUE@am__append_15 = config/powerpc/switchcontext.S +-@DRUNTIME_CPU_X86_TRUE@@DRUNTIME_OS_MINGW_TRUE@am__append_16 = config/mingw/switchcontext.S +-@DRUNTIME_CPU_X86_TRUE@@DRUNTIME_OS_MINGW_FALSE@am__append_17 = config/x86/switchcontext.S +-@DRUNTIME_CPU_SYSTEMZ_TRUE@am__append_18 = config/systemz/get_tls_offset.S +-@DRUNTIME_CPU_S390_TRUE@am__append_19 = config/s390/get_tls_offset.S ++@DRUNTIME_CPU_MIPS_TRUE@am__append_13 = config/mips/switchcontext.S ++@DRUNTIME_CPU_POWERPC_TRUE@am__append_14 = config/powerpc/switchcontext.S ++@DRUNTIME_CPU_X86_TRUE@@DRUNTIME_OS_MINGW_TRUE@am__append_15 = config/mingw/switchcontext.S ++@DRUNTIME_CPU_X86_TRUE@@DRUNTIME_OS_MINGW_FALSE@am__append_16 = config/x86/switchcontext.S ++@DRUNTIME_CPU_SYSTEMZ_TRUE@am__append_17 = config/systemz/get_tls_offset.S ++@DRUNTIME_CPU_S390_TRUE@am__append_18 = config/s390/get_tls_offset.S ++@DRUNTIME_CPU_LOONGARCH_TRUE@am__append_19 = config/loongarch/switchcontext.S + subdir = libdruntime + ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 + am__aclocal_m4_deps = $(top_srcdir)/../config/acx.m4 \ +@@ -475,14 +475,14 @@ am__objects_22 = core/sys/solaris/dlfcn.lo core/sys/solaris/elf.lo \ + @DRUNTIME_OS_SOLARIS_TRUE@am__objects_23 = $(am__objects_22) + @DRUNTIME_CPU_AARCH64_TRUE@am__objects_24 = config/aarch64/libgdruntime_la-switchcontext.lo + @DRUNTIME_CPU_ARM_TRUE@am__objects_25 = config/arm/libgdruntime_la-switchcontext.lo +-@DRUNTIME_CPU_LOONGARCH_TRUE@am__objects_26 = config/loongarch/libgdruntime_la-switchcontext.lo +-@DRUNTIME_CPU_MIPS_TRUE@am__objects_27 = config/mips/libgdruntime_la-switchcontext.lo +-@DRUNTIME_CPU_POWERPC_TRUE@am__objects_28 = config/powerpc/libgdruntime_la-switchcontext.lo +-@DRUNTIME_CPU_X86_TRUE@@DRUNTIME_OS_MINGW_TRUE@am__objects_29 = config/mingw/libgdruntime_la-switchcontext.lo +-@DRUNTIME_CPU_X86_TRUE@@DRUNTIME_OS_MINGW_FALSE@am__objects_30 = config/x86/libgdruntime_la-switchcontext.lo +-@DRUNTIME_CPU_SYSTEMZ_TRUE@am__objects_31 = config/systemz/libgdruntime_la-get_tls_offset.lo +-@DRUNTIME_CPU_S390_TRUE@am__objects_32 = config/s390/libgdruntime_la-get_tls_offset.lo +-am__objects_33 = $(am__objects_6) $(am__objects_8) $(am__objects_10) \ ++@DRUNTIME_CPU_MIPS_TRUE@am__objects_26 = config/mips/libgdruntime_la-switchcontext.lo ++@DRUNTIME_CPU_POWERPC_TRUE@am__objects_27 = config/powerpc/libgdruntime_la-switchcontext.lo ++@DRUNTIME_CPU_X86_TRUE@@DRUNTIME_OS_MINGW_TRUE@am__objects_28 = config/mingw/libgdruntime_la-switchcontext.lo ++@DRUNTIME_CPU_X86_TRUE@@DRUNTIME_OS_MINGW_FALSE@am__objects_29 = config/x86/libgdruntime_la-switchcontext.lo ++@DRUNTIME_CPU_SYSTEMZ_TRUE@am__objects_30 = config/systemz/libgdruntime_la-get_tls_offset.lo ++@DRUNTIME_CPU_S390_TRUE@am__objects_31 = config/s390/libgdruntime_la-get_tls_offset.lo ++@DRUNTIME_CPU_LOONGARCH_TRUE@am__objects_32 = config/loongarch/libgdruntime_la-switchcontext.lo ++am__objects_33 = $(am__objects_5) $(am__objects_7) $(am__objects_9) \ + $(am__objects_11) $(am__objects_13) $(am__objects_15) \ + $(am__objects_17) $(am__objects_19) $(am__objects_21) \ + $(am__objects_23) $(am__objects_24) $(am__objects_25) \ +@@ -500,22 +500,22 @@ am__objects_36 = core/stdc/libgdruntime_convenience_la-errno_.lo + @DRUNTIME_OS_MINGW_TRUE@ config/mingw/libgdruntime_convenience_la-msvc.lo + @DRUNTIME_CPU_AARCH64_TRUE@am__objects_38 = config/aarch64/libgdruntime_convenience_la-switchcontext.lo + @DRUNTIME_CPU_ARM_TRUE@am__objects_39 = config/arm/libgdruntime_convenience_la-switchcontext.lo +-@DRUNTIME_CPU_LOONGARCH_TRUE@am__objects_40 = config/loongarch/libgdruntime_convenience_la-switchcontext.lo +-@DRUNTIME_CPU_MIPS_TRUE@am__objects_41 = config/mips/libgdruntime_convenience_la-switchcontext.lo +-@DRUNTIME_CPU_POWERPC_TRUE@am__objects_42 = config/powerpc/libgdruntime_convenience_la-switchcontext.lo +-@DRUNTIME_CPU_X86_TRUE@@DRUNTIME_OS_MINGW_TRUE@am__objects_43 = config/mingw/libgdruntime_convenience_la-switchcontext.lo +-@DRUNTIME_CPU_X86_TRUE@@DRUNTIME_OS_MINGW_FALSE@am__objects_44 = config/x86/libgdruntime_convenience_la-switchcontext.lo +-@DRUNTIME_CPU_SYSTEMZ_TRUE@am__objects_45 = config/systemz/libgdruntime_convenience_la-get_tls_offset.lo +-@DRUNTIME_CPU_S390_TRUE@am__objects_46 = config/s390/libgdruntime_convenience_la-get_tls_offset.lo ++@DRUNTIME_CPU_MIPS_TRUE@am__objects_40 = config/mips/libgdruntime_convenience_la-switchcontext.lo ++@DRUNTIME_CPU_POWERPC_TRUE@am__objects_41 = config/powerpc/libgdruntime_convenience_la-switchcontext.lo ++@DRUNTIME_CPU_X86_TRUE@@DRUNTIME_OS_MINGW_TRUE@am__objects_42 = config/mingw/libgdruntime_convenience_la-switchcontext.lo ++@DRUNTIME_CPU_X86_TRUE@@DRUNTIME_OS_MINGW_FALSE@am__objects_43 = config/x86/libgdruntime_convenience_la-switchcontext.lo ++@DRUNTIME_CPU_SYSTEMZ_TRUE@am__objects_44 = config/systemz/libgdruntime_convenience_la-get_tls_offset.lo ++@DRUNTIME_CPU_S390_TRUE@am__objects_45 = config/s390/libgdruntime_convenience_la-get_tls_offset.lo ++@DRUNTIME_CPU_LOONGARCH_TRUE@am__objects_46 = config/loongarch/libgdruntime_convenience_la-switchcontext.lo + am__objects_47 = $(am__objects_5) $(am__objects_7) $(am__objects_9) \ + $(am__objects_11) $(am__objects_13) $(am__objects_15) \ +- $(am__objects_17) $(am__objects_19) $(am__objects_36) \ +- $(am__objects_23) $(am__objects_37) $(am__objects_38) \ +- $(am__objects_39) $(am__objects_40) $(am__objects_41) \ +- $(am__objects_42) $(am__objects_43) $(am__objects_44) \ +- $(am__objects_45) $(am__objects_46) +-am__objects_48 = $(am__objects_1) $(am__objects_35) $(am__objects_3) \ +- $(am__objects_47) $(am__objects_33) ++ $(am__objects_17) $(am__objects_19) $(am__objects_37) \ ++ $(am__objects_23) $(am__objects_38) $(am__objects_39) \ ++ $(am__objects_40) $(am__objects_41) $(am__objects_42) \ ++ $(am__objects_43) $(am__objects_44) $(am__objects_45) \ ++ $(am__objects_46) ++am__objects_48 = $(am__objects_1) $(am__objects_36) $(am__objects_3) \ ++ $(am__objects_47) $(am__objects_34) + am__objects_49 = $(am__objects_48) + am_libgdruntime_convenience_la_OBJECTS = $(am__objects_49) + libgdruntime_convenience_la_OBJECTS = \ +@@ -1905,11 +1905,6 @@ config/arm/$(am__dirstamp): + @: > config/arm/$(am__dirstamp) + config/arm/libgdruntime_la-switchcontext.lo: \ + config/arm/$(am__dirstamp) +-config/loongarch/$(am__dirstamp): +- @$(MKDIR_P) config/loongarch +- @: > config/loongarch/$(am__dirstamp) +-config/loongarch/libgdruntime_la-switchcontext.lo: \ +- config/loongarch/$(am__dirstamp) + config/mips/$(am__dirstamp): + @$(MKDIR_P) config/mips + @: > config/mips/$(am__dirstamp) +@@ -1937,6 +1932,11 @@ config/s390/$(am__dirstamp): + @: > config/s390/$(am__dirstamp) + config/s390/libgdruntime_la-get_tls_offset.lo: \ + config/s390/$(am__dirstamp) ++config/loongarch/$(am__dirstamp): ++ @$(MKDIR_P) config/loongarch ++ @: > config/loongarch/$(am__dirstamp) ++config/loongarch/libgdruntime_la-switchcontext.lo: \ ++ config/loongarch/$(am__dirstamp) + gcc/config.lo: gcc/$(am__dirstamp) + gcc/libbacktrace.lo: gcc/$(am__dirstamp) + +@@ -1950,8 +1950,6 @@ config/aarch64/libgdruntime_convenience_la-switchcontext.lo: \ + config/aarch64/$(am__dirstamp) + config/arm/libgdruntime_convenience_la-switchcontext.lo: \ + config/arm/$(am__dirstamp) +-config/loongarch/libgdruntime_convenience_la-switchcontext.lo: \ +- config/loongarch/$(am__dirstamp) + config/mips/libgdruntime_convenience_la-switchcontext.lo: \ + config/mips/$(am__dirstamp) + config/powerpc/libgdruntime_convenience_la-switchcontext.lo: \ +@@ -1964,6 +1962,8 @@ config/systemz/libgdruntime_convenience_la-get_tls_offset.lo: \ + config/systemz/$(am__dirstamp) + config/s390/libgdruntime_convenience_la-get_tls_offset.lo: \ + config/s390/$(am__dirstamp) ++config/loongarch/libgdruntime_convenience_la-switchcontext.lo: \ ++ config/loongarch/$(am__dirstamp) + + libgdruntime_convenience.la: $(libgdruntime_convenience_la_OBJECTS) $(libgdruntime_convenience_la_DEPENDENCIES) $(EXTRA_libgdruntime_convenience_la_DEPENDENCIES) + $(AM_V_GEN)$(libgdruntime_convenience_la_LINK) $(libgdruntime_convenience_la_OBJECTS) $(libgdruntime_convenience_la_LIBADD) $(LIBS) +@@ -1976,14 +1976,14 @@ mostlyclean-compile: + -rm -f config/arm/*.lo + -rm -f config/mingw/*.$(OBJEXT) + -rm -f config/mingw/*.lo +- -rm -f config/loongarch/*.$(OBJEXT) +- -rm -f config/loongarch/*.lo + -rm -f config/mips/*.$(OBJEXT) + -rm -f config/mips/*.lo + -rm -f config/powerpc/*.$(OBJEXT) + -rm -f config/powerpc/*.lo + -rm -f config/s390/*.$(OBJEXT) + -rm -f config/s390/*.lo ++ -rm -f config/loongarch/*.$(OBJEXT) ++ -rm -f config/loongarch/*.lo + -rm -f config/systemz/*.$(OBJEXT) + -rm -f config/systemz/*.lo + -rm -f config/x86/*.$(OBJEXT) +@@ -2101,10 +2101,7 @@ config/aarch64/libgdruntime_la-switchcontext.lo: config/aarch64/switchcontext.S + config/arm/libgdruntime_la-switchcontext.lo: config/arm/switchcontext.S + $(AM_V_CPPAS)$(LIBTOOL) $(AM_V_lt) $(libgdruntime_la_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CCAS) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CCASFLAGS) $(CCASFLAGS) -c -o config/arm/libgdruntime_la-switchcontext.lo `test -f 'config/arm/switchcontext.S' || echo '$(srcdir)/'`config/arm/switchcontext.S + +-config/loongarch/libgdruntime_la-switchcontext.lo: config/loongarch/switchcontext.S +- $(AM_V_CPPAS)$(LIBTOOL) $(AM_V_lt) $(libgdruntime_la_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CCAS) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CCASFLAGS) +- +-onfig/mips/libgdruntime_la-switchcontext.lo: config/mips/switchcontext.S ++config/mips/libgdruntime_la-switchcontext.lo: config/mips/switchcontext.S + $(AM_V_CPPAS)$(LIBTOOL) $(AM_V_lt) $(libgdruntime_la_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CCAS) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CCASFLAGS) $(CCASFLAGS) -c -o config/mips/libgdruntime_la-switchcontext.lo `test -f 'config/mips/switchcontext.S' || echo '$(srcdir)/'`config/mips/switchcontext.S + + config/powerpc/libgdruntime_la-switchcontext.lo: config/powerpc/switchcontext.S +@@ -2122,18 +2119,21 @@ config/systemz/libgdruntime_la-get_tls_offset.lo: config/systemz/get_tls_offset. + config/s390/libgdruntime_la-get_tls_offset.lo: config/s390/get_tls_offset.S + $(AM_V_CPPAS)$(LIBTOOL) $(AM_V_lt) $(libgdruntime_la_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CCAS) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CCASFLAGS) $(CCASFLAGS) -c -o config/s390/libgdruntime_la-get_tls_offset.lo `test -f 'config/s390/get_tls_offset.S' || echo '$(srcdir)/'`config/s390/get_tls_offset.S + ++config/loongarch/libgdruntime_la-switchcontext.lo: config/loongarch/switchcontext.S ++ $(AM_V_CPPAS)$(LIBTOOL) $(AM_V_lt) $(libgdruntime_la_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CCAS) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CCASFLAGS) $(CCASFLAGS) -c -o config/loongarch/libgdruntime_la-switchcontext.lo `test -f 'config/loongarch/switchcontext.S' || echo '$(srcdir)/'`config/loongarch/switchcontext.S ++ + config/aarch64/libgdruntime_convenience_la-switchcontext.lo: config/aarch64/switchcontext.S + $(AM_V_CPPAS)$(LIBTOOL) $(AM_V_lt) $(libgdruntime_convenience_la_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CCAS) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CCASFLAGS) $(CCASFLAGS) -c -o config/aarch64/libgdruntime_convenience_la-switchcontext.lo `test -f 'config/aarch64/switchcontext.S' || echo '$(srcdir)/'`config/aarch64/switchcontext.S + + config/arm/libgdruntime_convenience_la-switchcontext.lo: config/arm/switchcontext.S + $(AM_V_CPPAS)$(LIBTOOL) $(AM_V_lt) $(libgdruntime_convenience_la_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CCAS) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CCASFLAGS) $(CCASFLAGS) -c -o config/arm/libgdruntime_convenience_la-switchcontext.lo `test -f 'config/arm/switchcontext.S' || echo '$(srcdir)/'`config/arm/switchcontext.S + +-config/loongarch/libgdruntime_convenience_la-switchcontext.lo: config/loongarch/switchcontext.S +- $(AM_V_CPPAS)$(LIBTOOL) $(AM_V_lt) $(libgdruntime_convenience_la_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CCAS) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM +- + config/mips/libgdruntime_convenience_la-switchcontext.lo: config/mips/switchcontext.S + $(AM_V_CPPAS)$(LIBTOOL) $(AM_V_lt) $(libgdruntime_convenience_la_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CCAS) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CCASFLAGS) $(CCASFLAGS) -c -o config/mips/libgdruntime_convenience_la-switchcontext.lo `test -f 'config/mips/switchcontext.S' || echo '$(srcdir)/'`config/mips/switchcontext.S + ++config/loongarch/libgdruntime_convenience_la-switchcontext.lo: config/loongarch/switchcontext.S ++ $(AM_V_CPPAS)$(LIBTOOL) $(AM_V_lt) $(libgdruntime_convenience_la_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CCAS) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CCASFLAGS) $(CCASFLAGS) -c -o config/loongarch/libgdruntime_convenience_la-switchcontext.lo `test -f 'config/loongarch/switchcontext.S' || echo '$(srcdir)/'`config/loongarch/switchcontext.S ++ + config/powerpc/libgdruntime_convenience_la-switchcontext.lo: config/powerpc/switchcontext.S + $(AM_V_CPPAS)$(LIBTOOL) $(AM_V_lt) $(libgdruntime_convenience_la_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CCAS) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CCASFLAGS) $(CCASFLAGS) -c -o config/powerpc/libgdruntime_convenience_la-switchcontext.lo `test -f 'config/powerpc/switchcontext.S' || echo '$(srcdir)/'`config/powerpc/switchcontext.S + +@@ -2178,10 +2178,10 @@ clean-libtool: + -rm -rf config/aarch64/.libs config/aarch64/_libs + -rm -rf config/arm/.libs config/arm/_libs + -rm -rf config/mingw/.libs config/mingw/_libs +- -rm -rf config/loongarch/.libs config/loongarch/_libs + -rm -rf config/mips/.libs config/mips/_libs + -rm -rf config/powerpc/.libs config/powerpc/_libs + -rm -rf config/s390/.libs config/s390/_libs ++ -rm -rf config/loongarch/.libs config/loongarch/_libs + -rm -rf config/systemz/.libs config/systemz/_libs + -rm -rf config/x86/.libs config/x86/_libs + -rm -rf core/.libs core/_libs +@@ -2340,10 +2340,10 @@ distclean-generic: + -rm -f config/aarch64/$(am__dirstamp) + -rm -f config/arm/$(am__dirstamp) + -rm -f config/mingw/$(am__dirstamp) +- -rm -f config/loongarch/$(am__dirstamp) + -rm -f config/mips/$(am__dirstamp) + -rm -f config/powerpc/$(am__dirstamp) + -rm -f config/s390/$(am__dirstamp) ++ -rm -f config/loongarch/$(am__dirstamp) + -rm -f config/systemz/$(am__dirstamp) + -rm -f config/x86/$(am__dirstamp) + -rm -f core/$(am__dirstamp)
Locations
Projects
Search
Status Monitor
Help
Open Build Service
OBS Manuals
API Documentation
OBS Portal
Reporting a Bug
Contact
Mailing List
Forums
Chat (IRC)
Twitter
Open Build Service (OBS)
is an
openSUSE project
.
浙ICP备2022010568号-2