Projects
openEuler:24.03:SP1:Everything:64G
llvm
Sign Up
Log In
Username
Password
We truncated the diff of some files because they were too big. If you want to see the full diff for every file,
click here
.
Overview
Repositories
Revisions
Requests
Users
Attributes
Meta
Expand all
Collapse all
Changes of Revision 2
View file
_service:tar_scm:llvm.spec
Changed
@@ -1,6 +1,12 @@ %bcond_without sys_llvm %bcond_without check %bcond_with classic_flang +%bcond_with toolchain_clang +%bcond_without bisheng_autotuner + +%if %{with toolchain_clang} +%global toolchain clang +%endif %global maj_ver 17 %global min_ver 0 @@ -38,7 +44,7 @@ Name: %{pkg_name} Version: %{maj_ver}.%{min_ver}.%{patch_ver} -Release: 11 +Release: 19 Summary: The Low Level Virtual Machine License: NCSA @@ -69,13 +75,10 @@ Patch18: 0018-Fix-declaration-definition-mismatch-for-classic-flang.patch Patch19: 0019-Backport-LoongArch-Improve-the-support-for-atomic-and-clear_cache.patch Patch20: 0020-Update-llvm-lit-config-to-support-build_for_openeule.patch - -Patch21: 0021-Backport-GlobalISel-Don-t-expand-stacksave-stackrestore-in-IRTranslator.patch -Patch22: 0022-Backport-AArch64-Refactor-allocation-of-locals-and-stack-realignment.patch -Patch23: 0023-Backport-AArch64-Stack-probing-for-function-prologues.patch -Patch24: 0024-Backport-AArch64-Stack-probing-for-dynamic-allocas-in-SelectionDAG.patch -Patch25: 0025-Backport-AArch64-Stack-probing-for-dynamic-allocas-in-GlobalISel.patch -Patch26: 0026-Update-testcase-for-stack-clash-protection-backport.patch +Patch21: 0021-Add-BiSheng-Autotuner-support-for-LLVM-compiler.patch +Patch22: 0022-Prevent-environment-variables-from-exceeding-NAME_MA.patch +Patch23: 0023-AArch64-Support-HiSilicon-s-HIP09-Processor.patch +Patch24: 0024-Backport-LoongArch-fix-and-add-some-new-support.patch BuildRequires: binutils-devel BuildRequires: cmake @@ -92,6 +95,9 @@ BuildRequires: python3-sphinx BuildRequires: python3-setuptools BuildRequires: zlib-devel +%if %{with toolchain_clang} +BuildRequires: clang +%endif Requires: %{name}-libs%{?_isa} = %{version}-%{release} @@ -128,6 +134,8 @@ Summary: Documentation for LLVM BuildArch: noarch Requires: %{name} = %{version}-%{release} +Provides: %{name}-help = %{version}-%{release} +Obsoletes: %{name}-help < %{version}-%{release} %description doc Documentation for the LLVM compiler infrastructure. @@ -238,6 +246,13 @@ %if %{with classic_flang} -DLLVM_ENABLE_CLASSIC_FLANG=ON \ %endif +%if "%{toolchain}" == "clang" + -DCMAKE_C_COMPILER=clang \ + -DCMAKE_CXX_COMPILER=clang++ \ +%endif +%if %{with bisheng_autotuner} + -DLLVM_ENABLE_AUTOTUNER=ON \ +%endif -DLLVM_INCLUDE_BENCHMARKS=OFF %ninja_build LLVM %ninja_build @@ -299,7 +314,6 @@ %files %license LICENSE.TXT -%{install_prefix}/share/man/man1/* %{install_bindir}/* %exclude %{install_bindir}/not %exclude %{install_bindir}/count @@ -329,6 +343,7 @@ %files doc %license LICENSE.TXT %doc %{install_docdir}/html +%{install_prefix}/share/man/man1/* %files static %license LICENSE.TXT @@ -360,8 +375,32 @@ %{install_includedir}/llvm-gmock %changelog -* Fri May 10 2024 rickyleung <leung.wing.chung@huawei.com> - 17.0.6-11 -- Backport the patches to support stack clash protection +* Mon Sep 23 2024 zhanglimin <zhanglimin@loongson.cn> - 17.0.6-19 +- LoongArch Backport some new support + +* Thu Sep 12 2024 xiajingze <xiajingze1@huawei.com> - 17.0.6-18 +- AArch64 Support HiSilicon's HIP09 Processor + +* Wed Sep 11 2024 hongjinghao <hongjinghao@huawei.com> - 17.0.6-17 +- doc add Provides llvm-help + +* Tue Sep 10 2024 hongjinghao <hongjinghao@huawei.com> - 17.0.6-16 +- doc add Obsoletes llvm-help + +* Tue Sep 3 2024 hongjinghao <hongjinghao@huawei.com> - 17.0.6-15 +- mv man to doc subpackage + +* Mon Jul 22 2024 liyunfei <liyunfei33@huawei.com> - 17.0.6-14 +- Prevent environment variables from exceeding NAME_MAX. + +* Mon Jul 22 2024 liyunfei <liyunfei33@huawei.com> - 17.0.6-13 +- Disable toolchain_clang build for BiSheng Autotuner support temporary. + +* Tue Jul 16 2024 liyunfei <liyunfei33@huawei.com> - 17.0.6-12 +- Add BiSheng Autotuner support. + +* Fri Jul 5 2024 liyunfei <liyunfei33@huawei.com> - 17.0.6-11 +- Add toolchain_clang build support * Mon Apr 29 2024 wangqiang <wangqiang1@kylinos.cn> - 17.0.6-10 - Update llvm-lit config to support macro `build_for_openeuler`
View file
_service:tar_scm:0021-Add-BiSheng-Autotuner-support-for-LLVM-compiler.patch
Added
@@ -0,0 +1,9915 @@ +From a9863e2b6e6783aa9be0b9d1d187084fd4b32a3a Mon Sep 17 00:00:00 2001 +From: Muhammad Asif Manzoor <muhammad.asif.manzoor1@huawei.com> +Date: Thu, 21 Mar 2024 12:50:38 -0400 +Subject: Add BiSheng Autotuner support for LLVM compiler + +Automatic tuning is an automatic iterative process that optimizes a given +program by manipulating compilation options for optimal performance. +BiSheng Autotuner provides a resumable interface for tuning process. BiSheng +Autotuner can tune 1) individual code segments/blocks (fine grain turning) like +loops, callsites, instructions, etc. and 2) entire modules/programs (coarse +grain tuning) for compiler flags, pass ordering, etc. +This patch enables LLVM compiler to extract tuneable code regions and then apply +suggested configuration (by Autotuner) to find out the optimal configurations. +--- + llvm/cmake/modules/CrossCompile.cmake | 1 + + llvm/cmake/modules/HandleLLVMOptions.cmake | 8 + + llvm/include/llvm/Analysis/AutotuningDump.h | 75 ++ + llvm/include/llvm/Analysis/LoopInfo.h | 13 + + llvm/include/llvm/Analysis/Passes.h | 10 + + llvm/include/llvm/AutoTuner/AutoTuning.h | 486 ++++++++++++ + .../llvm/AutoTuner/AutoTuningRemarkManager.h | 43 ++ + .../llvm/AutoTuner/AutoTuningRemarkStreamer.h | 47 ++ + llvm/include/llvm/CodeGen/MachineBasicBlock.h | 13 + + llvm/include/llvm/IR/Function.h | 37 + + llvm/include/llvm/IR/InstrTypes.h | 24 + + llvm/include/llvm/IR/Instructions.h | 24 + + llvm/include/llvm/IR/Module.h | 3 + + llvm/include/llvm/IR/StructuralHash.h | 14 + + llvm/include/llvm/InitializePasses.h | 5 + + llvm/include/llvm/LinkAllPasses.h | 8 + + llvm/include/llvm/Remarks/Remark.h | 32 + + llvm/include/llvm/Support/CommandLine.h | 17 + + llvm/include/llvm/Transforms/Scalar.h | 17 + + .../Transforms/Scalar/AutoTuningCompile.h | 170 +++++ + .../llvm/Transforms/Utils/UnrollLoop.h | 4 + + llvm/lib/Analysis/AutotuningDump.cpp | 265 +++++++ + llvm/lib/Analysis/CMakeLists.txt | 2 + + llvm/lib/Analysis/InlineAdvisor.cpp | 18 + + llvm/lib/Analysis/InlineCost.cpp | 29 + + llvm/lib/Analysis/LoopInfo.cpp | 52 ++ + llvm/lib/AutoTuner/AutoTuning.cpp | 705 ++++++++++++++++++ + .../lib/AutoTuner/AutoTuningRemarkManager.cpp | 299 ++++++++ + .../AutoTuner/AutoTuningRemarkStreamer.cpp | 55 ++ + llvm/lib/AutoTuner/CMakeLists.txt | 11 + + llvm/lib/CMakeLists.txt | 1 + + llvm/lib/CodeGen/CMakeLists.txt | 1 + + llvm/lib/CodeGen/CalcSpillWeights.cpp | 30 + + llvm/lib/CodeGen/MachineBasicBlock.cpp | 36 + + llvm/lib/CodeGen/MachineScheduler.cpp | 44 ++ + llvm/lib/CodeGen/SwitchLoweringUtils.cpp | 19 + + llvm/lib/IR/AsmWriter.cpp | 151 ++++ + llvm/lib/IR/CMakeLists.txt | 1 + + llvm/lib/IR/Function.cpp | 34 + + llvm/lib/IR/Instructions.cpp | 86 +++ + llvm/lib/IR/StructuralHash.cpp | 114 +++ + llvm/lib/Passes/PassBuilder.cpp | 5 + + llvm/lib/Passes/PassBuilderPipelines.cpp | 46 ++ + llvm/lib/Passes/PassRegistry.def | 13 + + llvm/lib/Passes/StandardInstrumentations.cpp | 23 + + .../lib/Remarks/BitstreamRemarkSerializer.cpp | 8 + + llvm/lib/Remarks/RemarkStreamer.cpp | 4 + + llvm/lib/Remarks/YAMLRemarkParser.cpp | 122 +++ + llvm/lib/Remarks/YAMLRemarkParser.h | 6 + + llvm/lib/Remarks/YAMLRemarkSerializer.cpp | 84 +++ + llvm/lib/Support/CommandLine.cpp | 41 + + llvm/lib/Transforms/IPO/CMakeLists.txt | 1 + + llvm/lib/Transforms/IPO/Inliner.cpp | 36 + + llvm/lib/Transforms/IPO/SampleProfile.cpp | 14 + + .../Transforms/Instrumentation/CMakeLists.txt | 1 + + .../Instrumentation/PGOInstrumentation.cpp | 8 + + .../Transforms/Scalar/AutoTuningCompile.cpp | 334 +++++++++ + llvm/lib/Transforms/Scalar/CMakeLists.txt | 2 + + llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp | 187 +++++ + llvm/lib/Transforms/Scalar/Scalar.cpp | 4 + + llvm/lib/Transforms/Scalar/Sink.cpp | 5 + + llvm/lib/Transforms/Utils/CMakeLists.txt | 1 + + llvm/lib/Transforms/Utils/LCSSA.cpp | 5 + + llvm/lib/Transforms/Utils/LoopSimplify.cpp | 8 + + llvm/lib/Transforms/Utils/LoopUnroll.cpp | 3 + + llvm/lib/Transforms/Vectorize/CMakeLists.txt | 1 + + .../Vectorize/LoopVectorizationLegality.cpp | 12 + + .../Transforms/Vectorize/LoopVectorize.cpp | 34 + + .../Inputs/unroll_template.yaml | 8 + + .../AutotuningDump/create-data-dir.ll | 65 ++ + llvm/test/AutoTuning/AutotuningDump/unroll.ll | 35 + + .../autotune_datadir/baseline_config.yaml | 9 + + .../autotune_datadir/random_config.yaml | 9 + + .../AutoTuning/BaselineConfig/Inputs/test.ll | 117 +++ + .../BaselineConfig/apply_baseline_config.ll | 11 + + llvm/test/AutoTuning/BaselineConfig/opp.ll | 67 ++ + .../CodeRegionFilter/function-filtering.ll | 62 ++ + .../Error/Inputs/invalid-format.yaml | 3 + + .../AutoTuning/Error/Inputs/template.yaml | 10 + + .../AutoTuning/Error/file-not-found-error.ll | 29 + + .../AutoTuning/Error/invalid-yaml-error.ll | 27 + + .../AutoTuning/Error/malformed-input-error.ll | 136 ++++ + llvm/test/AutoTuning/Error/output-error.ll | 28 + + llvm/test/AutoTuning/Error/valid-input.ll | 27 + + .../Inputs/template.yaml | 9 + + .../inc-compile-parse-input.ll | 103 +++ + .../AutoTuning/Inline/Inputs/template.yaml | 9 + + .../Inline/Inputs/template_no_metadata.yaml | 7 + + .../test/AutoTuning/Inline/duplicate-calls.ll | 96 +++ + llvm/test/AutoTuning/Inline/force-inline.ll | 84 +++ + .../AutoTuning/Inline/inline-attribute.ll | 85 +++ + llvm/test/AutoTuning/Inline/opp.ll | 64 ++ + .../LoopUnroll/Inputs/debug_loc_template.yaml | 10 + + .../LoopUnroll/Inputs/loop_nest.yaml | 10 + + .../LoopUnroll/Inputs/loop_peel.yaml | 9 + + .../Inputs/unroll_raw_template.yaml | 10 + + .../LoopUnroll/Inputs/unroll_template.yaml | 10 + + .../Inputs/unroll_template_no_metadata.yaml | 8 + + llvm/test/AutoTuning/LoopUnroll/debug_loc.ll | 161 ++++ + .../AutoTuning/LoopUnroll/dynamic_config.ll | 56 ++ + llvm/test/AutoTuning/LoopUnroll/loop_nest.ll | 136 ++++ + llvm/test/AutoTuning/LoopUnroll/loop_peel.ll | 53 ++ + .../AutoTuning/LoopUnroll/unroll-pragma.ll | 129 ++++ + llvm/test/AutoTuning/LoopUnroll/unroll.ll | 101 +++ + llvm/test/AutoTuning/LoopUnroll/unroll_raw.ll | 113 +++ + .../Inputs/vectorize_template.yaml | 9 + + .../vectorize_template_no_metadata.yaml | 7 + + .../LoopVectorize/force-vector-interleave.ll | 88 +++ + .../Inputs/misched_x86_template.yaml | 10 + + .../misched_x86_bidirectional.ll | 73 ++ + .../MachineScheduler/misched_x86_bottomup.ll | 72 ++ + .../MachineScheduler/misched_x86_topdown.ll | 72 ++ + .../AutoTuning/MetaData/structural_hash.ll | 234 ++++++ + .../AutoTuning/MetaData/write_no_metadata.ll | 191 +++++ + .../MetaData/write_with_metadata.ll | 204 +++++ + .../AutoTuning/PGO/Inputs/pgo-instr.proftext | 17 + + .../PGO/Inputs/pgo-sample-cold.prof | 7 + + .../AutoTuning/PGO/Inputs/pgo-sample-hot.prof | 7 + + llvm/test/AutoTuning/PGO/pgo-instr-filters.ll | 61 ++ + .../test/AutoTuning/PGO/pgo-sample-filters.ll | 138 ++++ + .../Inputs/pass_invocation.yaml | 10 + + .../PassInvocation/pass_invocation_read.ll | 64 ++ + .../PassInvocation/pass_invocation_write.ll | 67 ++ + .../PhaseOrdering/Inputs/template.yaml | 8 + + .../AutoTuning/PhaseOrdering/pass-order.ll | 65 ++ + .../AutoTuning/SwitchLowering/switch-opp.ll | 47 ++ + llvm/test/AutoTuning/lit.local.cfg | 2 + + llvm/test/AutoTuning/opt-opp.ll | 315 ++++++++ + llvm/test/lit.site.cfg.py.in | 1 + + llvm/tools/llc/llc.cpp | 19 + + llvm/tools/opt/NewPMDriver.cpp | 42 ++ + llvm/tools/opt/opt.cpp | 53 ++ + 132 files changed, 7801 insertions(+) + create mode 100644 llvm/include/llvm/Analysis/AutotuningDump.h + create mode 100644 llvm/include/llvm/AutoTuner/AutoTuning.h + create mode 100644 llvm/include/llvm/AutoTuner/AutoTuningRemarkManager.h + create mode 100644 llvm/include/llvm/AutoTuner/AutoTuningRemarkStreamer.h + create mode 100644 llvm/include/llvm/Transforms/Scalar/AutoTuningCompile.h + create mode 100644 llvm/lib/Analysis/AutotuningDump.cpp + create mode 100644 llvm/lib/AutoTuner/AutoTuning.cpp + create mode 100644 llvm/lib/AutoTuner/AutoTuningRemarkManager.cpp + create mode 100644 llvm/lib/AutoTuner/AutoTuningRemarkStreamer.cpp + create mode 100644 llvm/lib/AutoTuner/CMakeLists.txt + create mode 100644 llvm/lib/Transforms/Scalar/AutoTuningCompile.cpp + create mode 100644 llvm/test/AutoTuning/AutotuningDump/Inputs/unroll_template.yaml + create mode 100644 llvm/test/AutoTuning/AutotuningDump/create-data-dir.ll + create mode 100644 llvm/test/AutoTuning/AutotuningDump/unroll.ll + create mode 100644 llvm/test/AutoTuning/BaselineConfig/Inputs/autotune_datadir/baseline_config.yaml + create mode 100644 llvm/test/AutoTuning/BaselineConfig/Inputs/autotune_datadir/random_config.yaml + create mode 100644 llvm/test/AutoTuning/BaselineConfig/Inputs/test.ll + create mode 100644 llvm/test/AutoTuning/BaselineConfig/apply_baseline_config.ll + create mode 100644 llvm/test/AutoTuning/BaselineConfig/opp.ll + create mode 100644 llvm/test/AutoTuning/CodeRegionFilter/function-filtering.ll + create mode 100644 llvm/test/AutoTuning/Error/Inputs/invalid-format.yaml + create mode 100644 llvm/test/AutoTuning/Error/Inputs/template.yaml + create mode 100644 llvm/test/AutoTuning/Error/file-not-found-error.ll + create mode 100644 llvm/test/AutoTuning/Error/invalid-yaml-error.ll + create mode 100644 llvm/test/AutoTuning/Error/malformed-input-error.ll + create mode 100644 llvm/test/AutoTuning/Error/output-error.ll + create mode 100644 llvm/test/AutoTuning/Error/valid-input.ll + create mode 100644 llvm/test/AutoTuning/IncrementalCompilation/Inputs/template.yaml + create mode 100644 llvm/test/AutoTuning/IncrementalCompilation/inc-compile-parse-input.ll + create mode 100644 llvm/test/AutoTuning/Inline/Inputs/template.yaml + create mode 100644 llvm/test/AutoTuning/Inline/Inputs/template_no_metadata.yaml + create mode 100644 llvm/test/AutoTuning/Inline/duplicate-calls.ll + create mode 100644 llvm/test/AutoTuning/Inline/force-inline.ll + create mode 100644 llvm/test/AutoTuning/Inline/inline-attribute.ll + create mode 100644 llvm/test/AutoTuning/Inline/opp.ll + create mode 100644 llvm/test/AutoTuning/LoopUnroll/Inputs/debug_loc_template.yaml + create mode 100644 llvm/test/AutoTuning/LoopUnroll/Inputs/loop_nest.yaml + create mode 100644 llvm/test/AutoTuning/LoopUnroll/Inputs/loop_peel.yaml + create mode 100644 llvm/test/AutoTuning/LoopUnroll/Inputs/unroll_raw_template.yaml + create mode 100644 llvm/test/AutoTuning/LoopUnroll/Inputs/unroll_template.yaml + create mode 100644 llvm/test/AutoTuning/LoopUnroll/Inputs/unroll_template_no_metadata.yaml + create mode 100644 llvm/test/AutoTuning/LoopUnroll/debug_loc.ll + create mode 100644 llvm/test/AutoTuning/LoopUnroll/dynamic_config.ll + create mode 100644 llvm/test/AutoTuning/LoopUnroll/loop_nest.ll + create mode 100644 llvm/test/AutoTuning/LoopUnroll/loop_peel.ll + create mode 100644 llvm/test/AutoTuning/LoopUnroll/unroll-pragma.ll + create mode 100644 llvm/test/AutoTuning/LoopUnroll/unroll.ll + create mode 100644 llvm/test/AutoTuning/LoopUnroll/unroll_raw.ll + create mode 100644 llvm/test/AutoTuning/LoopVectorize/Inputs/vectorize_template.yaml + create mode 100644 llvm/test/AutoTuning/LoopVectorize/Inputs/vectorize_template_no_metadata.yaml + create mode 100644 llvm/test/AutoTuning/LoopVectorize/force-vector-interleave.ll + create mode 100644 llvm/test/AutoTuning/MachineScheduler/Inputs/misched_x86_template.yaml
View file
_service:tar_scm:0021-Backport-GlobalISel-Don-t-expand-stacksave-stackrestore-in-IRTranslator.patch
Deleted
@@ -1,315 +0,0 @@ -From 7aeecae6393d5c3333beec64ad343ed1cabe75e4 Mon Sep 17 00:00:00 2001 -From: Matt Arsenault <Matthew.Arsenault@amd.com> -Date: Sat, 29 Jul 2023 19:12:24 -0400 -Subject: PATCH 1/7 GlobalISel: Don't expand stacksave/stackrestore in - IRTranslator - -In some (likely invalid edge cases anyway), it's not correct to -directly copy the stack pointer register. ---- - .../llvm/CodeGen/GlobalISel/LegalizerHelper.h | 2 + - llvm/include/llvm/Support/TargetOpcodes.def | 6 +++ - llvm/include/llvm/Target/GenericOpcodes.td | 12 ++++++ - llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp | 25 ++---------- - .../CodeGen/GlobalISel/LegalizerHelper.cpp | 26 +++++++++++++ - .../AArch64/GISel/AArch64LegalizerInfo.cpp | 4 +- - llvm/lib/Target/X86/X86LegalizerInfo.cpp | 4 ++ - .../AArch64/GlobalISel/arm64-irtranslator.ll | 4 +- - .../GlobalISel/legalizer-info-validation.mir | 10 ++++- - .../GlobalISel/stacksave-stackrestore.ll | 35 +++++++++++++++++ - .../X86/GlobalISel/stacksave-stackrestore.ll | 39 +++++++++++++++++++ - 11 files changed, 141 insertions(+), 26 deletions(-) - create mode 100644 llvm/test/CodeGen/AArch64/GlobalISel/stacksave-stackrestore.ll - create mode 100644 llvm/test/CodeGen/X86/GlobalISel/stacksave-stackrestore.ll - -diff --git a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h -index a568edd0e640..9288091874cf 100644 ---- a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h -+++ b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h -@@ -401,6 +401,8 @@ public: - LegalizeResult lowerExtractInsertVectorElt(MachineInstr &MI); - LegalizeResult lowerShuffleVector(MachineInstr &MI); - LegalizeResult lowerDynStackAlloc(MachineInstr &MI); -+ LegalizeResult lowerStackSave(MachineInstr &MI); -+ LegalizeResult lowerStackRestore(MachineInstr &MI); - LegalizeResult lowerExtract(MachineInstr &MI); - LegalizeResult lowerInsert(MachineInstr &MI); - LegalizeResult lowerSADDO_SSUBO(MachineInstr &MI); -diff --git a/llvm/include/llvm/Support/TargetOpcodes.def b/llvm/include/llvm/Support/TargetOpcodes.def -index 186bea75ae96..c92ce6dc701c 100644 ---- a/llvm/include/llvm/Support/TargetOpcodes.def -+++ b/llvm/include/llvm/Support/TargetOpcodes.def -@@ -763,6 +763,12 @@ HANDLE_TARGET_OPCODE(G_JUMP_TABLE) - /// Generic dynamic stack allocation. - HANDLE_TARGET_OPCODE(G_DYN_STACKALLOC) - -+/// Generic stack pointer save. -+HANDLE_TARGET_OPCODE(G_STACKSAVE) -+ -+/// Generic stack pointer restore. -+HANDLE_TARGET_OPCODE(G_STACKRESTORE) -+ - /// Strict floating point instructions. - HANDLE_TARGET_OPCODE(G_STRICT_FADD) - HANDLE_TARGET_OPCODE(G_STRICT_FSUB) -diff --git a/llvm/include/llvm/Target/GenericOpcodes.td b/llvm/include/llvm/Target/GenericOpcodes.td -index 00d56d1c4bd5..e8cfaeab3cd8 100644 ---- a/llvm/include/llvm/Target/GenericOpcodes.td -+++ b/llvm/include/llvm/Target/GenericOpcodes.td -@@ -225,6 +225,18 @@ def G_DYN_STACKALLOC : GenericInstruction { - let hasSideEffects = true; - } - -+def G_STACKSAVE : GenericInstruction { -+ let OutOperandList = (outs ptype0:$dst); -+ let InOperandList = (ins); -+ let hasSideEffects = true; -+} -+ -+def G_STACKRESTORE : GenericInstruction { -+ let OutOperandList = (outs); -+ let InOperandList = (ins ptype0:$src); -+ let hasSideEffects = true; -+} -+ - def G_FREEZE : GenericInstruction { - let OutOperandList = (outs type0:$dst); - let InOperandList = (ins type0:$src); -diff --git a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp -index 9a67a8d05a4d..e4b837c6b8ce 100644 ---- a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp -+++ b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp -@@ -2229,31 +2229,12 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID, - return true; - } - case Intrinsic::stacksave: { -- // Save the stack pointer to the location provided by the intrinsic. -- Register Reg = getOrCreateVReg(CI); -- Register StackPtr = MF->getSubtarget() -- .getTargetLowering() -- ->getStackPointerRegisterToSaveRestore(); -- -- // If the target doesn't specify a stack pointer, then fall back. -- if (!StackPtr) -- return false; -- -- MIRBuilder.buildCopy(Reg, StackPtr); -+ MIRBuilder.buildInstr(TargetOpcode::G_STACKSAVE, {getOrCreateVReg(CI)}, {}); - return true; - } - case Intrinsic::stackrestore: { -- // Restore the stack pointer from the location provided by the intrinsic. -- Register Reg = getOrCreateVReg(*CI.getArgOperand(0)); -- Register StackPtr = MF->getSubtarget() -- .getTargetLowering() -- ->getStackPointerRegisterToSaveRestore(); -- -- // If the target doesn't specify a stack pointer, then fall back. -- if (!StackPtr) -- return false; -- -- MIRBuilder.buildCopy(StackPtr, Reg); -+ MIRBuilder.buildInstr(TargetOpcode::G_STACKRESTORE, {}, -+ {getOrCreateVReg(*CI.getArgOperand(0))}); - return true; - } - case Intrinsic::cttz: -diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp -index f0da0d88140f..75d9789be4d0 100644 ---- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp -+++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp -@@ -3503,6 +3503,10 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) { - return lowerShuffleVector(MI); - case G_DYN_STACKALLOC: - return lowerDynStackAlloc(MI); -+ case G_STACKSAVE: -+ return lowerStackSave(MI); -+ case G_STACKRESTORE: -+ return lowerStackRestore(MI); - case G_EXTRACT: - return lowerExtract(MI); - case G_INSERT: -@@ -6810,6 +6814,28 @@ LegalizerHelper::lowerDynStackAlloc(MachineInstr &MI) { - return Legalized; - } - -+LegalizerHelper::LegalizeResult -+LegalizerHelper::lowerStackSave(MachineInstr &MI) { -+ Register StackPtr = TLI.getStackPointerRegisterToSaveRestore(); -+ if (!StackPtr) -+ return UnableToLegalize; -+ -+ MIRBuilder.buildCopy(MI.getOperand(0), StackPtr); -+ MI.eraseFromParent(); -+ return Legalized; -+} -+ -+LegalizerHelper::LegalizeResult -+LegalizerHelper::lowerStackRestore(MachineInstr &MI) { -+ Register StackPtr = TLI.getStackPointerRegisterToSaveRestore(); -+ if (!StackPtr) -+ return UnableToLegalize; -+ -+ MIRBuilder.buildCopy(StackPtr, MI.getOperand(0)); -+ MI.eraseFromParent(); -+ return Legalized; -+} -+ - LegalizerHelper::LegalizeResult - LegalizerHelper::lowerExtract(MachineInstr &MI) { - auto DstReg, DstTy, SrcReg, SrcTy = MI.getFirst2RegLLTs(); -diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp -index d905da4eaec3..f0130a0be29d 100644 ---- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp -+++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp -@@ -797,7 +797,9 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) - return Query.Types0 == p0 && Query.Types1 == s64; - }); - -- getActionDefinitionsBuilder(G_DYN_STACKALLOC).lower(); -+ getActionDefinitionsBuilder({G_DYN_STACKALLOC, -+ G_STACKSAVE, -+ G_STACKRESTORE}).lower(); - - if (ST.hasMOPS()) { - // G_BZERO is not supported. Currently it is only emitted by -diff --git a/llvm/lib/Target/X86/X86LegalizerInfo.cpp b/llvm/lib/Target/X86/X86LegalizerInfo.cpp -index a4a247f85f3d..104461cff0a9 100644 ---- a/llvm/lib/Target/X86/X86LegalizerInfo.cpp -+++ b/llvm/lib/Target/X86/X86LegalizerInfo.cpp -@@ -528,6 +528,10 @@ X86LegalizerInfo::X86LegalizerInfo(const X86Subtarget &STI, - // memory intrinsics - getActionDefinitionsBuilder({G_MEMCPY, G_MEMMOVE, G_MEMSET}).libcall(); - -+ getActionDefinitionsBuilder({G_DYN_STACKALLOC, -+ G_STACKSAVE, -+ G_STACKRESTORE}).lower(); -+ - // fp intrinsics - getActionDefinitionsBuilder(G_INTRINSIC_ROUNDEVEN) - .scalarize(0) -diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator.ll b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator.ll -index 5f3544add398..575cd6b874e3 100644 ---- a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator.ll -+++ b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator.ll -@@ -2392,8 +2392,8 @@ declare ptr @llvm.stacksave() - declare void @llvm.stackrestore(ptr) - define void @test_stacksaverestore() { - ; CHECK-LABEL: name: test_stacksaverestore -- ; CHECK: SAVE:%0-9+:_(p0) = COPY $sp
View file
_service:tar_scm:0022-Backport-AArch64-Refactor-allocation-of-locals-and-stack-realignment.patch
Deleted
@@ -1,546 +0,0 @@ -From 8db377e2a22d83637171008b6c8723f1869a2926 Mon Sep 17 00:00:00 2001 -From: rickyleung <leung.wing.chung@huawei.com> -Date: Tue, 7 May 2024 21:24:49 +0800 -Subject: PATCH 3/7 backportAArch64 Refactor allocation of locals and - stack realignment - -Reference: https://github.com/wc00862805aj/llvm-project/commit/dedf2c6bb5193652f6ad7d9ff9e676624c2485b7? - -Factor out some stack allocation in a separate function. This patch -splits out the generic portion of a larger refactoring done as a part of -stack clash protection support. - -The patch is almost, but not quite NFC. The only difference should -be that where we have adjacent allocation of stack space -for local SVE objects and non-local SVE objects the order -of `sub sp, ...` and `addvl sp, ...` instructions is reversed, because now -it's done with a single call to `emitFrameOffset` and it happens -add/subtract the fixed part before the scalable part, e.g. - - addvl sp, sp, #-2 - sub sp, sp, llvm#16, lsl llvm#12 - sub sp, sp, llvm#16 - -becomes - - sub sp, sp, llvm#16, lsl llvm#12 - sub sp, sp, llvm#16 - addvl sp, sp, #-2 ---- - .../Target/AArch64/AArch64FrameLowering.cpp | 114 +++++++++--------- - .../lib/Target/AArch64/AArch64FrameLowering.h | 5 + - .../AArch64/framelayout-sve-basepointer.mir | 4 +- - .../framelayout-sve-fixed-width-access.mir | 2 +- - .../framelayout-sve-scavengingslot.mir | 4 +- - llvm/test/CodeGen/AArch64/framelayout-sve.mir | 54 ++++----- - .../AArch64/spill-stack-realignment.mir | 2 +- - llvm/test/CodeGen/AArch64/stack-guard-sve.ll | 4 +- - .../AArch64/sve-calling-convention-mixed.ll | 4 +- - .../CodeGen/AArch64/sve-fixed-length-fp128.ll | 4 +- - 10 files changed, 103 insertions(+), 94 deletions(-) - -diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp -index 4d5676f34101..eeb6185fa36d 100644 ---- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp -+++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp -@@ -300,6 +300,7 @@ static int64_t getArgumentStackToRestore(MachineFunction &MF, - static bool produceCompactUnwindFrame(MachineFunction &MF); - static bool needsWinCFI(const MachineFunction &MF); - static StackOffset getSVEStackSize(const MachineFunction &MF); -+static unsigned findScratchNonCalleeSaveRegister(MachineBasicBlock *MBB); - static bool needsShadowCallStackPrologueEpilogue(MachineFunction &MF); - - /// Returns true if a homogeneous prolog or epilog code can be emitted -@@ -671,6 +672,44 @@ void AArch64FrameLowering::emitCalleeSavedSVERestores( - emitCalleeSavedRestores(MBB, MBBI, true); - } - -+void AArch64FrameLowering::allocateStackSpace( -+ MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, -+ bool NeedsRealignment, StackOffset AllocSize, bool NeedsWinCFI, -+ bool *HasWinCFI, bool EmitCFI, StackOffset InitialOffset) const { -+ -+ if (!AllocSize) -+ return; -+ -+ DebugLoc DL; -+ MachineFunction &MF = *MBB.getParent(); -+ const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>(); -+ const TargetInstrInfo &TII = *Subtarget.getInstrInfo(); -+ AArch64FunctionInfo &AFI = *MF.getInfo<AArch64FunctionInfo>(); -+ const MachineFrameInfo &MFI = MF.getFrameInfo(); -+ -+ Register TargetReg = -+ NeedsRealignment ? findScratchNonCalleeSaveRegister(&MBB) : AArch64::SP; -+ // SUB Xd/SP, SP, AllocSize -+ emitFrameOffset(MBB, MBBI, DL, TargetReg, AArch64::SP, -AllocSize, &TII, -+ MachineInstr::FrameSetup, false, NeedsWinCFI, HasWinCFI, -+ EmitCFI, InitialOffset); -+ -+ if (NeedsRealignment) { -+ const int64_t MaxAlign = MFI.getMaxAlign().value(); -+ const uint64_t AndMask = ~(MaxAlign - 1); -+ // AND SP, Xd, 0b11111...0000 -+ BuildMI(MBB, MBBI, DL, TII.get(AArch64::ANDXri), AArch64::SP) -+ .addReg(TargetReg, RegState::Kill) -+ .addImm(AArch64_AM::encodeLogicalImmediate(AndMask, 64)) -+ .setMIFlags(MachineInstr::FrameSetup); -+ AFI.setStackRealigned(true); -+ -+ // No need for SEH instructions here; if we're realigning the stack, -+ // we've set a frame pointer and already finished the SEH prologue. -+ assert(!NeedsWinCFI); -+ } -+} -+ - static MCRegister getRegisterOrZero(MCRegister Reg, bool HasSVE) { - switch (Reg.id()) { - default: -@@ -1769,7 +1808,7 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF, - } - } - -- StackOffset AllocateBefore = SVEStackSize, AllocateAfter = {}; -+ StackOffset SVECalleeSavesSize = {}, SVELocalsSize = SVEStackSize; - MachineBasicBlock::iterator CalleeSavesBegin = MBBI, CalleeSavesEnd = MBBI; - - // Process the SVE callee-saves to determine what space needs to be -@@ -1782,67 +1821,32 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF, - ++MBBI; - CalleeSavesEnd = MBBI; - -- AllocateBefore = StackOffset::getScalable(CalleeSavedSize); -- AllocateAfter = SVEStackSize - AllocateBefore; -+ SVECalleeSavesSize = StackOffset::getScalable(CalleeSavedSize); -+ SVELocalsSize = SVEStackSize - SVECalleeSavesSize; - } - - // Allocate space for the callee saves (if any). -- emitFrameOffset( -- MBB, CalleeSavesBegin, DL, AArch64::SP, AArch64::SP, -AllocateBefore, TII, -- MachineInstr::FrameSetup, false, false, nullptr, -- EmitAsyncCFI && !HasFP && AllocateBefore, -- StackOffset::getFixed((int64_t)MFI.getStackSize() - NumBytes)); -+ StackOffset CFAOffset = -+ StackOffset::getFixed((int64_t)MFI.getStackSize() - NumBytes); -+ allocateStackSpace(MBB, CalleeSavesBegin, false, SVECalleeSavesSize, false, -+ nullptr, EmitAsyncCFI && !HasFP, CFAOffset); -+ CFAOffset += SVECalleeSavesSize; - - if (EmitAsyncCFI) - emitCalleeSavedSVELocations(MBB, CalleeSavesEnd); - -- // Finally allocate remaining SVE stack space. -- emitFrameOffset(MBB, CalleeSavesEnd, DL, AArch64::SP, AArch64::SP, -- -AllocateAfter, TII, MachineInstr::FrameSetup, false, false, -- nullptr, EmitAsyncCFI && !HasFP && AllocateAfter, -- AllocateBefore + StackOffset::getFixed( -- (int64_t)MFI.getStackSize() - NumBytes)); -- -- // Allocate space for the rest of the frame. -- if (NumBytes) { -- unsigned scratchSPReg = AArch64::SP; -- -- if (NeedsRealignment) { -- scratchSPReg = findScratchNonCalleeSaveRegister(&MBB); -- assert(scratchSPReg != AArch64::NoRegister); -- } -- -- // If we're a leaf function, try using the red zone. -- if (!canUseRedZone(MF)) { -- // FIXME: in the case of dynamic re-alignment, NumBytes doesn't have -- // the correct value here, as NumBytes also includes padding bytes, -- // which shouldn't be counted here. -- emitFrameOffset( -- MBB, MBBI, DL, scratchSPReg, AArch64::SP, -- StackOffset::getFixed(-NumBytes), TII, MachineInstr::FrameSetup, -- false, NeedsWinCFI, &HasWinCFI, EmitAsyncCFI && !HasFP, -- SVEStackSize + -- StackOffset::getFixed((int64_t)MFI.getStackSize() - NumBytes)); -- } -- if (NeedsRealignment) { -- assert(MFI.getMaxAlign() > Align(1)); -- assert(scratchSPReg != AArch64::SP); -- -- // SUB X9, SP, NumBytes -- // -- X9 is temporary register, so shouldn't contain any live data here, -- // -- free to use. This is already produced by emitFrameOffset above. -- // AND SP, X9, 0b11111...0000 -- uint64_t AndMask = ~(MFI.getMaxAlign().value() - 1); -- -- BuildMI(MBB, MBBI, DL, TII->get(AArch64::ANDXri), AArch64::SP) -- .addReg(scratchSPReg, RegState::Kill) -- .addImm(AArch64_AM::encodeLogicalImmediate(AndMask, 64)); -- AFI->setStackRealigned(true); -- -- // No need for SEH instructions here; if we're realigning the stack, -- // we've set a frame pointer and already finished the SEH prologue. -- assert(!NeedsWinCFI); -- } -+ // Allocate space for the rest of the frame including SVE locals. Align the -+ // stack as necessary. -+ assert(!(canUseRedZone(MF) && NeedsRealignment) && -+ "Cannot use redzone with stack realignment"); -+ if (!canUseRedZone(MF)) { -+ // FIXME: in the case of dynamic re-alignment, NumBytes doesn't have -+ // the correct value here, as NumBytes also includes padding bytes, -+ // which shouldn't be counted here. -+ allocateStackSpace(MBB, CalleeSavesEnd, NeedsRealignment, -+ SVELocalsSize + StackOffset::getFixed(NumBytes), -+ NeedsWinCFI, &HasWinCFI, EmitAsyncCFI && !HasFP, -+ CFAOffset); - } - - // If we need a base pointer, set it up here. It's whatever the value of the -diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.h b/llvm/lib/Target/AArch64/AArch64FrameLowering.h -index 147b5c181be5..f3313f3b53ff 100644 ---- a/llvm/lib/Target/AArch64/AArch64FrameLowering.h -+++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.h -@@ -150,6 +150,11 @@ private:
View file
_service:tar_scm:0022-Prevent-environment-variables-from-exceeding-NAME_MA.patch
Added
@@ -0,0 +1,28 @@ +From 4673c2eaba443678c4dc6ae74ea16a489b415fed Mon Sep 17 00:00:00 2001 +From: liyunfei <liyunfei33@huawei.com> +Date: Tue, 19 Sep 2023 09:31:43 +0800 +Subject: PATCH Prevent environment variables from exceeding NAME_MAX + +--- + llvm/lib/Support/Unix/Path.inc | 6 +++++- + 1 file changed, 5 insertions(+), 1 deletion(-) + +diff --git a/llvm/lib/Support/Unix/Path.inc b/llvm/lib/Support/Unix/Path.inc +index 2ae7c6dc..f13f3165 100644 +--- a/llvm/lib/Support/Unix/Path.inc ++++ b/llvm/lib/Support/Unix/Path.inc +@@ -1427,8 +1427,12 @@ static const char *getEnvTempDir() { + // variable. + const char *EnvironmentVariables = {"TMPDIR", "TMP", "TEMP", "TEMPDIR"}; + for (const char *Env : EnvironmentVariables) { +- if (const char *Dir = std::getenv(Env)) ++ if (const char *Dir = std::getenv(Env)) { ++ if(std::strlen(Dir) > NAME_MAX) { ++ continue; ++ } + return Dir; ++ } + } + + return nullptr; +-- \ No newline at end of file
View file
_service:tar_scm:0023-AArch64-Support-HiSilicon-s-HIP09-Processor.patch
Added
@@ -0,0 +1,517 @@ +From cac43828d26b178807d194b4bd7c5df69603df29 Mon Sep 17 00:00:00 2001 +From: xiajingze <xiajingze1@huawei.com> +Date: Wed, 31 Jul 2024 18:37:29 +0800 +Subject: PATCH AArch64 Support HiSilicon's HIP09 Processor + +Signed-off-by: xiajingze <xiajingze1@huawei.com> +--- + llvm/cmake/modules/HandleLLVMOptions.cmake | 8 ++ + .../llvm/TargetParser/AArch64TargetParser.h | 7 ++ + llvm/lib/Target/AArch64/AArch64.td | 36 +++++++ + .../lib/Target/AArch64/AArch64MacroFusion.cpp | 55 +++++++++++ + llvm/lib/Target/AArch64/AArch64Subtarget.cpp | 9 ++ + llvm/lib/Target/AArch64/AArch64Subtarget.h | 9 +- + llvm/lib/Target/CMakeLists.txt | 4 + + llvm/lib/TargetParser/Host.cpp | 3 + + llvm/test/CodeGen/AArch64/cpus-hip09.ll | 11 +++ + .../CodeGen/AArch64/macro-fusion-mvnclz.mir | 20 ++++ + .../AArch64/misched-fusion-lit-hip09.ll | 73 ++++++++++++++ + llvm/test/CodeGen/AArch64/remat-hip09.ll | 18 ++++ + llvm/test/lit.site.cfg.py.in | 4 + + llvm/unittests/TargetParser/Host.cpp | 5 + + .../TargetParser/TargetParserTest.cpp | 16 +++ + 15 files changed, 277 insertions(+), 1 deletion(-) + create mode 100644 llvm/test/CodeGen/AArch64/cpus-hip09.ll + create mode 100644 llvm/test/CodeGen/AArch64/macro-fusion-mvnclz.mir + create mode 100644 llvm/test/CodeGen/AArch64/misched-fusion-lit-hip09.ll + create mode 100644 llvm/test/CodeGen/AArch64/remat-hip09.ll + +diff --git a/llvm/cmake/modules/HandleLLVMOptions.cmake b/llvm/cmake/modules/HandleLLVMOptions.cmake +index 8be5d4ba5..74e68e25d 100644 +--- a/llvm/cmake/modules/HandleLLVMOptions.cmake ++++ b/llvm/cmake/modules/HandleLLVMOptions.cmake +@@ -112,6 +112,14 @@ else() + set(LLVM_ENABLE_AUTOTUNER 0) + endif() + ++option(LLVM_ENABLE_AARCH64_HIP09 "Enable HIP09 Processor" ON) ++if(LLVM_ENABLE_AARCH64_HIP09) ++ set(LLVM_ENABLE_AARCH64_HIP09 1) ++ add_definitions( -DENABLE_AARCH64_HIP09 ) ++else() ++ set(LLVM_ENABLE_AARCH64_HIP09 0) ++endif() ++ + if(LLVM_ENABLE_EXPENSIVE_CHECKS) + add_compile_definitions(EXPENSIVE_CHECKS) + +diff --git a/llvm/include/llvm/TargetParser/AArch64TargetParser.h b/llvm/include/llvm/TargetParser/AArch64TargetParser.h +index dc4cdfa8e..07cd2fcbb 100644 +--- a/llvm/include/llvm/TargetParser/AArch64TargetParser.h ++++ b/llvm/include/llvm/TargetParser/AArch64TargetParser.h +@@ -542,6 +542,13 @@ inline constexpr CpuInfo CpuInfos = { + (AArch64::AEK_FP16 | AArch64::AEK_RAND | AArch64::AEK_SM4 | + AArch64::AEK_SHA3 | AArch64::AEK_SHA2 | AArch64::AEK_AES | + AArch64::AEK_MTE | AArch64::AEK_SB | AArch64::AEK_SSBS)}, ++#if defined(ENABLE_AARCH64_HIP09) ++ {"hip09", ARMV8_5A, ++ (AArch64::AEK_AES | AArch64::AEK_SM4 | AArch64::AEK_SHA2 | ++ AArch64::AEK_SHA3 | AArch64::AEK_FP16 | AArch64::AEK_PROFILE | ++ AArch64::AEK_FP16FML | AArch64::AEK_SVE | AArch64::AEK_I8MM | ++ AArch64::AEK_F32MM | AArch64::AEK_F64MM | AArch64::AEK_BF16)}, ++#endif + }; + + // An alias for a CPU. +diff --git a/llvm/lib/Target/AArch64/AArch64.td b/llvm/lib/Target/AArch64/AArch64.td +index 8f50af4b7..c8bfd770f 100644 +--- a/llvm/lib/Target/AArch64/AArch64.td ++++ b/llvm/lib/Target/AArch64/AArch64.td +@@ -296,6 +296,12 @@ def FeatureFuseAddSub2RegAndConstOne : SubtargetFeature< + "fuse-addsub-2reg-const1", "HasFuseAddSub2RegAndConstOne", "true", + "CPU fuses (a + b + 1) and (a - b - 1)">; + ++#ifdef ENABLE_AARCH64_HIP09 ++def FeatureFuseMvnClz : SubtargetFeature< ++ "fuse-mvn-clz", "HasFuseMvnClz", "true", ++ "CPU fuses mvn+clz operations">; ++#endif ++ + def FeatureDisableLatencySchedHeuristic : SubtargetFeature< + "disable-latency-sched-heuristic", "DisableLatencySchedHeuristic", "true", + "Disable latency scheduling heuristic">; +@@ -1205,6 +1211,21 @@ def TuneTSV110 : SubtargetFeature<"tsv110", "ARMProcFamily", "TSV110", + FeatureFuseAES, + FeaturePostRAScheduler>; + ++#ifdef ENABLE_AARCH64_HIP09 ++def TuneHIP09 : SubtargetFeature<"hip09", "ARMProcFamily", "HIP09", ++ "HiSilicon HIP-09 processors", ++ FeatureCustomCheapAsMoveHandling, ++ FeatureExperimentalZeroingPseudos, ++ FeatureFuseAES, ++ FeatureLSLFast, ++ FeatureAscendStoreAddress, ++ FeatureCmpBccFusion, ++ FeatureArithmeticBccFusion, ++ FeatureFuseLiterals, ++ FeatureFuseMvnClz, ++ FeaturePostRAScheduler>; ++#endif ++ + def TuneAmpere1 : SubtargetFeature<"ampere1", "ARMProcFamily", "Ampere1", + "Ampere Computing Ampere-1 processors", + FeaturePostRAScheduler, +@@ -1359,6 +1380,14 @@ def ProcessorFeatures { + list<SubtargetFeature> TSV110 = HasV8_2aOps, FeatureCrypto, FeatureFPARMv8, + FeatureNEON, FeaturePerfMon, FeatureSPE, + FeatureFullFP16, FeatureFP16FML, FeatureDotProd; ++#ifdef ENABLE_AARCH64_HIP09 ++ list<SubtargetFeature> HIP09 = HasV8_5aOps, FeatureBF16, FeatureCrypto, FeatureFPARMv8, ++ FeatureMatMulInt8, FeatureMatMulFP32, FeatureMatMulFP64, ++ FeatureNEON, FeaturePerfMon, FeatureRandGen, FeatureSPE, ++ FeatureFullFP16, FeatureFP16FML, FeatureDotProd, ++ FeatureJS, FeatureComplxNum, FeatureSHA3, FeatureSM4, ++ FeatureSVE; ++#endif + list<SubtargetFeature> Ampere1 = HasV8_6aOps, FeatureNEON, FeaturePerfMon, + FeatureSSBS, FeatureRandGen, FeatureSB, + FeatureSHA2, FeatureSHA3, FeatureAES; +@@ -1464,8 +1493,15 @@ def : ProcessorModel<"thunderx2t99", ThunderX2T99Model, + // Marvell ThunderX3T110 Processors. + def : ProcessorModel<"thunderx3t110", ThunderX3T110Model, + ProcessorFeatures.ThunderX3T110, TuneThunderX3T110>; ++ ++// HiSilicon Processors. + def : ProcessorModel<"tsv110", TSV110Model, ProcessorFeatures.TSV110, + TuneTSV110>; ++#ifdef ENABLE_AARCH64_HIP09 ++// FIXME: HiSilicon HIP09 is currently modeled as a Cortex-A57. ++def : ProcessorModel<"hip09", CortexA57Model, ProcessorFeatures.HIP09, ++ TuneHIP09>; ++#endif + + // Support cyclone as an alias for apple-a7 so we can still LTO old bitcode. + def : ProcessorModel<"cyclone", CycloneModel, ProcessorFeatures.AppleA7, +diff --git a/llvm/lib/Target/AArch64/AArch64MacroFusion.cpp b/llvm/lib/Target/AArch64/AArch64MacroFusion.cpp +index 05d60872b..4963ec350 100644 +--- a/llvm/lib/Target/AArch64/AArch64MacroFusion.cpp ++++ b/llvm/lib/Target/AArch64/AArch64MacroFusion.cpp +@@ -51,6 +51,12 @@ static bool isArithmeticBccPair(const MachineInstr *FirstMI, + case AArch64::SUBSXrr: + case AArch64::BICSWrr: + case AArch64::BICSXrr: ++#if defined(ENABLE_AARCH64_HIP09) ++ case AArch64::ADCSWr: ++ case AArch64::ADCSXr: ++ case AArch64::SBCSWr: ++ case AArch64::SBCSXr: ++#endif + return true; + case AArch64::ADDSWrs: + case AArch64::ADDSXrs: +@@ -183,6 +189,20 @@ static bool isLiteralsPair(const MachineInstr *FirstMI, + SecondMI.getOperand(3).getImm() == 16)) + return true; + ++#if defined(ENABLE_AARCH64_HIP09) ++ // 32 bit immediate. ++ if ((FirstMI == nullptr || FirstMI->getOpcode() == AArch64::MOVNWi) && ++ (SecondMI.getOpcode() == AArch64::MOVKWi && ++ SecondMI.getOperand(3).getImm() == 16)) ++ return true; ++ ++ // Lower half of 64 bit immediate. ++ if ((FirstMI == nullptr || FirstMI->getOpcode() == AArch64::MOVNXi) && ++ (SecondMI.getOpcode() == AArch64::MOVKWi && ++ SecondMI.getOperand(3).getImm() == 16)) ++ return true; ++#endif ++ + // Upper half of 64 bit immediate. + if ((FirstMI == nullptr || + (FirstMI->getOpcode() == AArch64::MOVKXi && +@@ -437,6 +457,37 @@ static bool isAddSub2RegAndConstOnePair(const MachineInstr *FirstMI, + return false; + } + ++#if defined(ENABLE_AARCH64_HIP09) ++static bool isMvnClzPair(const MachineInstr *FirstMI, ++ const MachineInstr &SecondMI) { ++ // HIP09 supports fusion of MVN + CLZ. ++ // The CLZ can be fused with MVN and make execution faster. ++ // And the fusion is not allowed for shifted forms. ++ // ++ // Instruction alias info: ++ // 1. MVN <Wd>, <Wm>{, <shift> #<amount>} is equivalent to ++ // ORN <Wd>, WZR, <Wm>{, <shift> #<amount>} ++ // 2. MVN <Xd>, <Xm>{, <shift> #<amount>} is equivalent to ++ // ORN <Xd>, XZR, <Xm>{, <shift> #<amount>} ++ // Assume the 1st instr to be a wildcard if it is unspecified. ++ if ((FirstMI == nullptr || ++ ((FirstMI->getOpcode() == AArch64::ORNWrs) && ++ (FirstMI->getOperand(1).getReg() == AArch64::WZR) && ++ (!AArch64InstrInfo::hasShiftedReg(*FirstMI)))) && ++ (SecondMI.getOpcode() == AArch64::CLZWr)) ++ return true; ++ ++ if ((FirstMI == nullptr || ++ ((FirstMI->getOpcode() == AArch64::ORNXrs) &&
View file
_service:tar_scm:0023-Backport-AArch64-Stack-probing-for-function-prologues.patch
Deleted
@@ -1,2652 +0,0 @@ -From 3a9ddc2f95926a75a9b436ad4dfd4070f535a113 Mon Sep 17 00:00:00 2001 -From: rickyleung <leung.wing.chung@huawei.com> -Date: Tue, 7 May 2024 21:25:52 +0800 -Subject: PATCH 4/7 backportAArch64 Stack probing for function prologues - -Reference: https://github.com/llvm/llvm-project/commit/cc944f502f1ee20d73ff88c2c86cc909f12caadb - -This adds code to AArch64 function prologues to protect against stack -clash attacks by probing (writing to) the stack at regular enough -intervals to ensure that the guard page cannot be skipped over. - -The patch depends on and maintains the following invariants: - -Upon function entry the caller guarantees that it has probed the stack -(e.g. performed a store) at some address sp, #N, where`0 <= N <= -1024`. This invariant comes from a requirement for compatibility with -GCC. Any address range in the allocated stack, no smaller than -stack-probe-size bytes contains at least one probe At any time the stack -pointer is above or in the guard page Probes are performed in -descreasing address order -The stack-probe-size is a function attribute that can be set by a -platform to correspond to the guard page size. - -By default, the stack probe size is 4KiB, which is a safe default as -this is the smallest possible page size for AArch64. Linux uses a 64KiB -guard for AArch64, so this can be overridden by the stack-probe-size -function attribute. - -For small frames without a frame pointer (<= 240 bytes), no probes are -needed. - -For larger frame sizes, LLVM always stores x29 to the stack. This serves -as an implicit stack probe. Thus, while allocating stack objects the -compiler assumes that the stack has been probed at sp. - -There are multiple probing sequences that can be emitted, depending on -the size of the stack allocation: - -A straight-line sequence of subtracts and stores, used when the -allocation size is smaller than 5 guard pages. A loop allocating and -probing one page size per iteration, plus at most a single probe to deal -with the remainder, used when the allocation size is larger but still -known at compile time. A loop which moves the SP down to the target -value held in a register (or a loop, moving a scratch register to the -target value help in SP), used when the allocation size is not known at -compile-time, such as when allocating space for SVE values, or when -over-aligning the stack. This is emitted in AArch64InstrInfo because it -will also be used for dynamic allocas in a future patch. A single probe -where the amount of stack adjustment is unknown, but is known to be less -than or equal to a page size. - ---------- - -Co-authored-by: Oliver Stannard <oliver.stannard@linaro.org> ---- - .../Target/AArch64/AArch64FrameLowering.cpp | 335 +++++++- - .../lib/Target/AArch64/AArch64FrameLowering.h | 17 +- - .../Target/AArch64/AArch64ISelLowering.cpp | 6 + - llvm/lib/Target/AArch64/AArch64ISelLowering.h | 10 + - llvm/lib/Target/AArch64/AArch64InstrInfo.cpp | 90 +++ - llvm/lib/Target/AArch64/AArch64InstrInfo.h | 6 + - llvm/lib/Target/AArch64/AArch64InstrInfo.td | 24 +- - .../AArch64/AArch64MachineFunctionInfo.cpp | 43 +- - .../AArch64/AArch64MachineFunctionInfo.h | 6 + - .../test/CodeGen/AArch64/stack-probing-64k.ll | 392 ++++++++++ - .../AArch64/stack-probing-last-in-block.mir | 146 ++++ - .../test/CodeGen/AArch64/stack-probing-sve.ll | 724 ++++++++++++++++++ - llvm/test/CodeGen/AArch64/stack-probing.ll | 539 +++++++++++++ - 13 files changed, 2300 insertions(+), 38 deletions(-) - create mode 100644 llvm/test/CodeGen/AArch64/stack-probing-64k.ll - create mode 100644 llvm/test/CodeGen/AArch64/stack-probing-last-in-block.mir - create mode 100644 llvm/test/CodeGen/AArch64/stack-probing-sve.ll - create mode 100644 llvm/test/CodeGen/AArch64/stack-probing.ll - -diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp -index eeb6185fa36d..af019ab23770 100644 ---- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp -+++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp -@@ -672,10 +672,18 @@ void AArch64FrameLowering::emitCalleeSavedSVERestores( - emitCalleeSavedRestores(MBB, MBBI, true); - } - -+// Return the maximum possible number of bytes for `Size` due to the -+// architectural limit on the size of a SVE register. -+static int64_t upperBound(StackOffset Size) { -+ static const int64_t MAX_BYTES_PER_SCALABLE_BYTE = 16; -+ return Size.getScalable() * MAX_BYTES_PER_SCALABLE_BYTE + Size.getFixed(); -+} -+ - void AArch64FrameLowering::allocateStackSpace( - MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, -- bool NeedsRealignment, StackOffset AllocSize, bool NeedsWinCFI, -- bool *HasWinCFI, bool EmitCFI, StackOffset InitialOffset) const { -+ int64_t RealignmentPadding, StackOffset AllocSize, bool NeedsWinCFI, -+ bool *HasWinCFI, bool EmitCFI, StackOffset InitialOffset, -+ bool FollowupAllocs) const { - - if (!AllocSize) - return; -@@ -687,27 +695,129 @@ void AArch64FrameLowering::allocateStackSpace( - AArch64FunctionInfo &AFI = *MF.getInfo<AArch64FunctionInfo>(); - const MachineFrameInfo &MFI = MF.getFrameInfo(); - -- Register TargetReg = -- NeedsRealignment ? findScratchNonCalleeSaveRegister(&MBB) : AArch64::SP; -- // SUB Xd/SP, SP, AllocSize -+ const int64_t MaxAlign = MFI.getMaxAlign().value(); -+ const uint64_t AndMask = ~(MaxAlign - 1); -+ -+ if (!Subtarget.getTargetLowering()->hasInlineStackProbe(MF)) { -+ Register TargetReg = RealignmentPadding -+ ? findScratchNonCalleeSaveRegister(&MBB) -+ : AArch64::SP; -+ // SUB Xd/SP, SP, AllocSize -+ emitFrameOffset(MBB, MBBI, DL, TargetReg, AArch64::SP, -AllocSize, &TII, -+ MachineInstr::FrameSetup, false, NeedsWinCFI, HasWinCFI, -+ EmitCFI, InitialOffset); -+ -+ if (RealignmentPadding) { -+ // AND SP, X9, 0b11111...0000 -+ BuildMI(MBB, MBBI, DL, TII.get(AArch64::ANDXri), AArch64::SP) -+ .addReg(TargetReg, RegState::Kill) -+ .addImm(AArch64_AM::encodeLogicalImmediate(AndMask, 64)) -+ .setMIFlags(MachineInstr::FrameSetup); -+ AFI.setStackRealigned(true); -+ -+ // No need for SEH instructions here; if we're realigning the stack, -+ // we've set a frame pointer and already finished the SEH prologue. -+ assert(!NeedsWinCFI); -+ } -+ return; -+ } -+ -+ // -+ // Stack probing allocation. -+ // -+ -+ // Fixed length allocation. If we don't need to re-align the stack and don't -+ // have SVE objects, we can use a more efficient sequence for stack probing. -+ if (AllocSize.getScalable() == 0 && RealignmentPadding == 0) { -+ Register ScratchReg = findScratchNonCalleeSaveRegister(&MBB); -+ assert(ScratchReg != AArch64::NoRegister); -+ BuildMI(MBB, MBBI, DL, TII.get(AArch64::PROBED_STACKALLOC)) -+ .addDef(ScratchReg) -+ .addImm(AllocSize.getFixed()) -+ .addImm(InitialOffset.getFixed()) -+ .addImm(InitialOffset.getScalable()); -+ // The fixed allocation may leave unprobed bytes at the top of the -+ // stack. If we have subsequent alocation (e.g. if we have variable-sized -+ // objects), we need to issue an extra probe, so these allocations start in -+ // a known state. -+ if (FollowupAllocs) { -+ // STR XZR, SP -+ BuildMI(MBB, MBBI, DL, TII.get(AArch64::STRXui)) -+ .addReg(AArch64::XZR) -+ .addReg(AArch64::SP) -+ .addImm(0) -+ .setMIFlags(MachineInstr::FrameSetup); -+ } -+ -+ return; -+ } -+ -+ // Variable length allocation. -+ -+ // If the (unknown) allocation size cannot exceed the probe size, decrement -+ // the stack pointer right away. -+ int64_t ProbeSize = AFI.getStackProbeSize(); -+ if (upperBound(AllocSize) + RealignmentPadding <= ProbeSize) { -+ Register ScratchReg = RealignmentPadding -+ ? findScratchNonCalleeSaveRegister(&MBB) -+ : AArch64::SP; -+ assert(ScratchReg != AArch64::NoRegister); -+ // SUB Xd, SP, AllocSize -+ emitFrameOffset(MBB, MBBI, DL, ScratchReg, AArch64::SP, -AllocSize, &TII, -+ MachineInstr::FrameSetup, false, NeedsWinCFI, HasWinCFI, -+ EmitCFI, InitialOffset); -+ if (RealignmentPadding) { -+ // AND SP, Xn, 0b11111...0000 -+ BuildMI(MBB, MBBI, DL, TII.get(AArch64::ANDXri), AArch64::SP) -+ .addReg(ScratchReg, RegState::Kill) -+ .addImm(AArch64_AM::encodeLogicalImmediate(AndMask, 64)) -+ .setMIFlags(MachineInstr::FrameSetup); -+ AFI.setStackRealigned(true); -+ } -+ if (FollowupAllocs || upperBound(AllocSize) + RealignmentPadding > -+ AArch64::StackProbeMaxUnprobedStack) { -+ // STR XZR, SP -+ BuildMI(MBB, MBBI, DL, TII.get(AArch64::STRXui)) -+ .addReg(AArch64::XZR) -+ .addReg(AArch64::SP) -+ .addImm(0) -+ .setMIFlags(MachineInstr::FrameSetup); -+ } -+ return; -+ } -+ -+ // Emit a variable-length allocation probing loop. -+ // TODO: As an optimisation, the loop can be "unrolled" into a few parts,
View file
_service:tar_scm:0024-Backport-AArch64-Stack-probing-for-dynamic-allocas-in-SelectionDAG.patch
Deleted
@@ -1,744 +0,0 @@ -From e433199a7dbe87324a671299f6509f19d295382f Mon Sep 17 00:00:00 2001 -From: rickyleung <leung.wing.chung@huawei.com> -Date: Fri, 26 Apr 2024 16:59:48 +0800 -Subject: PATCH 5/7 backportAArch64 Stack probing for dynamic allocas in - SelectionDAG - -Reference: https://github.com/llvm/llvm-project/commit/b1806e6a1f0589acc88499419531c4eb82488f1a - -Add support for probing for dynamic allocas (variable-size objects and -outgoing stack arguments). - -Co-authored-by: Oliver Stannard <oliver.stannard@linaro.org> ---- - .../Target/AArch64/AArch64FrameLowering.cpp | 26 ++ - .../Target/AArch64/AArch64ISelLowering.cpp | 152 +++++--- - llvm/lib/Target/AArch64/AArch64ISelLowering.h | 13 +- - llvm/lib/Target/AArch64/AArch64InstrInfo.td | 14 + - .../stack-probing-dynamic-no-frame-setup.ll | 14 + - .../CodeGen/AArch64/stack-probing-dynamic.ll | 362 ++++++++++++++++++ - 6 files changed, 526 insertions(+), 55 deletions(-) - create mode 100644 llvm/test/CodeGen/AArch64/stack-probing-dynamic-no-frame-setup.ll - create mode 100644 llvm/test/CodeGen/AArch64/stack-probing-dynamic.ll - -diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp -index af019ab23770..fe21173f531f 100644 ---- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp -+++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp -@@ -462,6 +462,11 @@ bool AArch64FrameLowering::hasFP(const MachineFunction &MF) const { - /// included as part of the stack frame. - bool - AArch64FrameLowering::hasReservedCallFrame(const MachineFunction &MF) const { -+ // The stack probing code for the dynamically allocated outgoing arguments -+ // area assumes that the stack is probed at the top - either by the prologue -+ // code, which issues a probe if `hasVarSizedObjects` return true, or by the -+ // most recent variable-sized object allocation. Changing the condition here -+ // may need to be followed up by changes to the probe issuing logic. - return !MF.getFrameInfo().hasVarSizedObjects(); - } - -@@ -470,6 +475,9 @@ MachineBasicBlock::iterator AArch64FrameLowering::eliminateCallFramePseudoInstr( - MachineBasicBlock::iterator I) const { - const AArch64InstrInfo *TII = - static_cast<const AArch64InstrInfo *>(MF.getSubtarget().getInstrInfo()); -+ const AArch64TargetLowering *TLI = -+ MF.getSubtarget<AArch64Subtarget>().getTargetLowering(); -+ MachineFrameInfo &MFI = MF.getFrameInfo(); - DebugLoc DL = I->getDebugLoc(); - unsigned Opc = I->getOpcode(); - bool IsDestroy = Opc == TII->getCallFrameDestroyOpcode(); -@@ -496,6 +504,24 @@ MachineBasicBlock::iterator AArch64FrameLowering::eliminateCallFramePseudoInstr( - // Most call frames will be allocated at the start of a function so - // this is OK, but it is a limitation that needs dealing with. - assert(Amount > -0xffffff && Amount < 0xffffff && "call frame too large"); -+ -+ if (TLI->hasInlineStackProbe(MF) && -+ -Amount >= AArch64::StackProbeMaxUnprobedStack) { -+ // When stack probing is enabled, the decrement of SP may need to be -+ // probed. We only need to do this if the call site needs 1024 bytes of -+ // space or more, because a region smaller than that is allowed to be -+ // unprobed at an ABI boundary. We rely on the fact that SP has been -+ // probed exactly at this point, either by the prologue or most recent -+ // dynamic allocation. -+ assert(MFI.hasVarSizedObjects() && -+ "non-reserved call frame without var sized objects?"); -+ Register ScratchReg = -+ MF.getRegInfo().createVirtualRegister(&AArch64::GPR64RegClass); -+ inlineStackProbeFixed(I, ScratchReg, -Amount, StackOffset::get(0, 0)); -+ } else { -+ emitFrameOffset(MBB, I, DL, AArch64::SP, AArch64::SP, -+ StackOffset::getFixed(Amount), TII); -+ } - emitFrameOffset(MBB, I, DL, AArch64::SP, AArch64::SP, - StackOffset::getFixed(Amount), TII); - } -diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp -index 082043420fb9..eff0722e1c77 100644 ---- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp -+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp -@@ -556,10 +556,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM, - setOperationAction(ISD::STACKSAVE, MVT::Other, Expand); - setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand); - -- if (Subtarget->isTargetWindows()) -- setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Custom); -- else -- setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Expand); -+ setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Custom); - - // Constant pool entries - setOperationAction(ISD::ConstantPool, MVT::i64, Custom); -@@ -2288,6 +2285,7 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const { - MAKE_CASE(AArch64ISD::CSINC) - MAKE_CASE(AArch64ISD::THREAD_POINTER) - MAKE_CASE(AArch64ISD::TLSDESC_CALLSEQ) -+ MAKE_CASE(AArch64ISD::PROBED_ALLOCA) - MAKE_CASE(AArch64ISD::ABDS_PRED) - MAKE_CASE(AArch64ISD::ABDU_PRED) - MAKE_CASE(AArch64ISD::HADDS_PRED) -@@ -2646,6 +2644,22 @@ MachineBasicBlock *AArch64TargetLowering::EmitLoweredCatchRet( - return BB; - } - -+MachineBasicBlock * -+AArch64TargetLowering::EmitDynamicProbedAlloc(MachineInstr &MI, -+ MachineBasicBlock *MBB) const { -+ MachineFunction &MF = *MBB->getParent(); -+ MachineBasicBlock::iterator MBBI = MI.getIterator(); -+ DebugLoc DL = MBB->findDebugLoc(MBBI); -+ const AArch64InstrInfo &TII = -+ *MF.getSubtarget<AArch64Subtarget>().getInstrInfo(); -+ Register TargetReg = MI.getOperand(0).getReg(); -+ MachineBasicBlock::iterator NextInst = -+ TII.probedStackAlloc(MBBI, TargetReg, false); -+ -+ MI.eraseFromParent(); -+ return NextInst->getParent(); -+} -+ - MachineBasicBlock * - AArch64TargetLowering::EmitTileLoad(unsigned Opc, unsigned BaseReg, - MachineInstr &MI, -@@ -2774,6 +2788,8 @@ MachineBasicBlock *AArch64TargetLowering::EmitInstrWithCustomInserter( - - case AArch64::CATCHRET: - return EmitLoweredCatchRet(MI, BB); -+ case AArch64::PROBED_STACKALLOC_DYN: -+ return EmitDynamicProbedAlloc(MI, BB); - case AArch64::LD1_MXIPXX_H_PSEUDO_B: - return EmitTileLoad(AArch64::LD1_MXIPXX_H_B, AArch64::ZAB0, MI, BB); - case AArch64::LD1_MXIPXX_H_PSEUDO_H: -@@ -13666,9 +13682,34 @@ SDValue AArch64TargetLowering::LowerATOMIC_LOAD_AND(SDValue Op, - AN->getMemOperand()); - } - --SDValue AArch64TargetLowering::LowerWindowsDYNAMIC_STACKALLOC( -- SDValue Op, SDValue Chain, SDValue &Size, SelectionDAG &DAG) const { -+SDValue -+AArch64TargetLowering::LowerWindowsDYNAMIC_STACKALLOC(SDValue Op, -+ SelectionDAG &DAG) const { -+ - SDLoc dl(Op); -+ // Get the inputs. -+ SDNode *Node = Op.getNode(); -+ SDValue Chain = Op.getOperand(0); -+ SDValue Size = Op.getOperand(1); -+ MaybeAlign Align = -+ cast<ConstantSDNode>(Op.getOperand(2))->getMaybeAlignValue(); -+ EVT VT = Node->getValueType(0); -+ -+ if (DAG.getMachineFunction().getFunction().hasFnAttribute( -+ "no-stack-arg-probe")) { -+ SDValue SP = DAG.getCopyFromReg(Chain, dl, AArch64::SP, MVT::i64); -+ Chain = SP.getValue(1); -+ SP = DAG.getNode(ISD::SUB, dl, MVT::i64, SP, Size); -+ if (Align) -+ SP = DAG.getNode(ISD::AND, dl, VT, SP.getValue(0), -+ DAG.getConstant(-(uint64_t)Align->value(), dl, VT)); -+ Chain = DAG.getCopyToReg(Chain, dl, AArch64::SP, SP); -+ SDValue Ops2 = {SP, Chain}; -+ return DAG.getMergeValues(Ops, dl); -+ } -+ -+ Chain = DAG.getCALLSEQ_START(Chain, 0, 0, dl); -+ - EVT PtrVT = getPointerTy(DAG.getDataLayout()); - SDValue Callee = DAG.getTargetExternalSymbol(Subtarget->getChkStkName(), - PtrVT, 0); -@@ -13692,7 +13733,59 @@ SDValue AArch64TargetLowering::LowerWindowsDYNAMIC_STACKALLOC( - - Size = DAG.getNode(ISD::SHL, dl, MVT::i64, Size, - DAG.getConstant(4, dl, MVT::i64)); -- return Chain; -+ -+ SDValue SP = DAG.getCopyFromReg(Chain, dl, AArch64::SP, MVT::i64); -+ Chain = SP.getValue(1); -+ SP = DAG.getNode(ISD::SUB, dl, MVT::i64, SP, Size); -+ if (Align) -+ SP = DAG.getNode(ISD::AND, dl, VT, SP.getValue(0), -+ DAG.getConstant(-(uint64_t)Align->value(), dl, VT)); -+ Chain = DAG.getCopyToReg(Chain, dl, AArch64::SP, SP); -+ -+ Chain = DAG.getCALLSEQ_END(Chain, 0, 0, SDValue(), dl); -+ -+ SDValue Ops2 = {SP, Chain}; -+ return DAG.getMergeValues(Ops, dl); -+} -+ -+SDValue -+AArch64TargetLowering::LowerInlineDYNAMIC_STACKALLOC(SDValue Op, -+ SelectionDAG &DAG) const { -+ // Get the inputs. -+ SDNode *Node = Op.getNode(); -+ SDValue Chain = Op.getOperand(0); -+ SDValue Size = Op.getOperand(1); -+ -+ MaybeAlign Align = -+ cast<ConstantSDNode>(Op.getOperand(2))->getMaybeAlignValue(); -+ SDLoc dl(Op); -+ EVT VT = Node->getValueType(0);
View file
_service:tar_scm:0024-Backport-LoongArch-fix-and-add-some-new-support.patch
Added
@@ -0,0 +1,5463 @@ +From 53a624f1fbb2d1f837070b400812e8bddf66fd3d Mon Sep 17 00:00:00 2001 +From: Lu Weining <luweining@loongson.cn> +Date: Tue, 5 Dec 2023 09:20:48 +0800 +Subject: PATCH 01/12 BinaryFormatLoongArch Define psABI v2.20 relocs for + R_LARCH_CALL36(#73345) + +R_LARCH_CALL36 was designed for function call on medium code model where +the 2 instructions (pcaddu18i + jirl) must be adjacent. + +(cherry picked from commit c3a9c905fbc486add75e16218fe58a04b7b6c282) +--- + llvm/include/llvm/BinaryFormat/ELFRelocs/LoongArch.def | 6 ++++++ + .../tools/llvm-readobj/ELF/reloc-types-loongarch64.test | 2 ++ + llvm/unittests/Object/ELFTest.cpp | 2 ++ + 3 files changed, 10 insertions(+) + +diff --git a/llvm/include/llvm/BinaryFormat/ELFRelocs/LoongArch.def b/llvm/include/llvm/BinaryFormat/ELFRelocs/LoongArch.def +index 02bce3c71712..c4393432677b 100644 +--- a/llvm/include/llvm/BinaryFormat/ELFRelocs/LoongArch.def ++++ b/llvm/include/llvm/BinaryFormat/ELFRelocs/LoongArch.def +@@ -118,3 +118,9 @@ ELF_RELOC(R_LARCH_SUB6, 106) + ELF_RELOC(R_LARCH_ADD_ULEB128, 107) + ELF_RELOC(R_LARCH_SUB_ULEB128, 108) + ELF_RELOC(R_LARCH_64_PCREL, 109) ++ ++// Relocs added in ELF for the LoongArch™ Architecture v20231102, part of the ++// v2.20 LoongArch ABI specs. ++// ++// Spec addition: https://github.com/loongson/la-abi-specs/pull/4 ++ELF_RELOC(R_LARCH_CALL36, 110) +diff --git a/llvm/test/tools/llvm-readobj/ELF/reloc-types-loongarch64.test b/llvm/test/tools/llvm-readobj/ELF/reloc-types-loongarch64.test +index e32dc893fa79..88ff7fa405ed 100644 +--- a/llvm/test/tools/llvm-readobj/ELF/reloc-types-loongarch64.test ++++ b/llvm/test/tools/llvm-readobj/ELF/reloc-types-loongarch64.test +@@ -102,6 +102,7 @@ + # CHECK: Type: R_LARCH_ADD_ULEB128 (107) + # CHECK: Type: R_LARCH_SUB_ULEB128 (108) + # CHECK: Type: R_LARCH_64_PCREL (109) ++# CHECK: Type: R_LARCH_CALL36 (110) + + --- !ELF + FileHeader: +@@ -211,3 +212,4 @@ Sections: + - Type: R_LARCH_ADD_ULEB128 + - Type: R_LARCH_SUB_ULEB128 + - Type: R_LARCH_64_PCREL ++ - Type: R_LARCH_CALL36 +diff --git a/llvm/unittests/Object/ELFTest.cpp b/llvm/unittests/Object/ELFTest.cpp +index 50b1df124a4a..ed851dde4c00 100644 +--- a/llvm/unittests/Object/ELFTest.cpp ++++ b/llvm/unittests/Object/ELFTest.cpp +@@ -251,6 +251,8 @@ TEST(ELFTest, getELFRelocationTypeNameForLoongArch) { + getELFRelocationTypeName(EM_LOONGARCH, R_LARCH_SUB_ULEB128)); + EXPECT_EQ("R_LARCH_64_PCREL", + getELFRelocationTypeName(EM_LOONGARCH, R_LARCH_64_PCREL)); ++ EXPECT_EQ("R_LARCH_CALL36", ++ getELFRelocationTypeName(EM_LOONGARCH, R_LARCH_CALL36)); + } + + TEST(ELFTest, getELFRelativeRelocationType) { +-- +2.20.1 + + +From a8ed0f26220bbacb2c485a392f79ac4b271d73af Mon Sep 17 00:00:00 2001 +From: wanglei <wanglei@loongson.cn> +Date: Tue, 2 Jan 2024 10:55:02 +0800 +Subject: PATCH 02/12 LoongArch Emit function call code sequence as + `PCADDU18I+JIRL` in medium code model + +According to the description of the psABI v2.20: +https://github.com/loongson/la-abi-specs/releases/tag/v2.20, adjustments +are made to the function call instructions under the medium code model. + +At the same time, AsmParser has already supported parsing the call36 and +tail36 macro instructions. + +(cherry picked from commit 2cf420d5b846a4733ef0ef7c8ed0ae0bfd1c6772) +--- + .../AsmParser/LoongArchAsmParser.cpp | 61 +++++++++++++++++++ + .../LoongArch/LoongArchExpandPseudoInsts.cpp | 29 ++++----- + .../Target/LoongArch/LoongArchInstrInfo.td | 23 ++++++- + .../Target/LoongArch/LoongArchMCInstLower.cpp | 3 + + .../LoongArch/LoongArchTargetMachine.cpp | 4 +- + .../MCTargetDesc/LoongArchBaseInfo.h | 1 + + .../MCTargetDesc/LoongArchELFObjectWriter.cpp | 2 + + .../MCTargetDesc/LoongArchFixupKinds.h | 3 + + .../MCTargetDesc/LoongArchMCCodeEmitter.cpp | 3 + + .../MCTargetDesc/LoongArchMCExpr.cpp | 3 + + .../LoongArch/MCTargetDesc/LoongArchMCExpr.h | 1 + + llvm/test/CodeGen/LoongArch/code-models.ll | 12 ++-- + .../MC/LoongArch/Basic/Integer/invalid64.s | 2 +- + llvm/test/MC/LoongArch/Macros/macros-call.s | 9 +++ + .../MC/LoongArch/Relocations/relocations.s | 5 ++ + 15 files changed, 133 insertions(+), 28 deletions(-) + create mode 100644 llvm/test/MC/LoongArch/Macros/macros-call.s + +diff --git a/llvm/lib/Target/LoongArch/AsmParser/LoongArchAsmParser.cpp b/llvm/lib/Target/LoongArch/AsmParser/LoongArchAsmParser.cpp +index a132e645c864..f908e5bc63d3 100644 +--- a/llvm/lib/Target/LoongArch/AsmParser/LoongArchAsmParser.cpp ++++ b/llvm/lib/Target/LoongArch/AsmParser/LoongArchAsmParser.cpp +@@ -122,6 +122,10 @@ class LoongArchAsmParser : public MCTargetAsmParser { + // Helper to emit pseudo instruction "li.w/d $rd, $imm". + void emitLoadImm(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out); + ++ // Helper to emit pseudo instruction "call36 sym" or "tail36 $rj, sym". ++ void emitFuncCall36(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out, ++ bool IsTailCall); ++ + public: + enum LoongArchMatchResultTy { + Match_Dummy = FIRST_TARGET_MATCH_RESULT_TY, +@@ -401,6 +405,22 @@ public: + IsValidKind; + } + ++ bool isSImm20pcaddu18i() const { ++ if (!isImm()) ++ return false; ++ ++ int64_t Imm; ++ LoongArchMCExpr::VariantKind VK = LoongArchMCExpr::VK_LoongArch_None; ++ bool IsConstantImm = evaluateConstantImm(getImm(), Imm, VK); ++ bool IsValidKind = VK == LoongArchMCExpr::VK_LoongArch_None || ++ VK == LoongArchMCExpr::VK_LoongArch_CALL36; ++ ++ return IsConstantImm ++ ? isInt<20>(Imm) && IsValidKind ++ : LoongArchAsmParser::classifySymbolRef(getImm(), VK) && ++ IsValidKind; ++ } ++ + bool isSImm21lsl2() const { + if (!isImm()) + return false; +@@ -1111,6 +1131,35 @@ void LoongArchAsmParser::emitLoadImm(MCInst &Inst, SMLoc IDLoc, + } + } + ++void LoongArchAsmParser::emitFuncCall36(MCInst &Inst, SMLoc IDLoc, ++ MCStreamer &Out, bool IsTailCall) { ++ // call36 sym ++ // expands to: ++ // pcaddu18i $ra, %call36(sym) ++ // jirl $ra, $ra, 0 ++ // ++ // tail36 $rj, sym ++ // expands to: ++ // pcaddu18i $rj, %call36(sym) ++ // jirl $r0, $rj, 0 ++ unsigned ScratchReg = ++ IsTailCall ? Inst.getOperand(0).getReg() : (unsigned)LoongArch::R1; ++ const MCExpr *Sym = ++ IsTailCall ? Inst.getOperand(1).getExpr() : Inst.getOperand(0).getExpr(); ++ const LoongArchMCExpr *LE = LoongArchMCExpr::create( ++ Sym, llvm::LoongArchMCExpr::VK_LoongArch_CALL36, getContext()); ++ ++ Out.emitInstruction( ++ MCInstBuilder(LoongArch::PCADDU18I).addReg(ScratchReg).addExpr(LE), ++ getSTI()); ++ Out.emitInstruction( ++ MCInstBuilder(LoongArch::JIRL) ++ .addReg(IsTailCall ? (unsigned)LoongArch::R0 : ScratchReg) ++ .addReg(ScratchReg) ++ .addImm(0), ++ getSTI()); ++} ++ + bool LoongArchAsmParser::processInstruction(MCInst &Inst, SMLoc IDLoc, + OperandVector &Operands, + MCStreamer &Out) { +@@ -1159,6 +1208,12 @@ bool LoongArchAsmParser::processInstruction(MCInst &Inst, SMLoc IDLoc, + case LoongArch::PseudoLI_D: + emitLoadImm(Inst, IDLoc, Out); + return false; ++ case LoongArch::PseudoCALL36: ++ emitFuncCall36(Inst, IDLoc, Out, /*IsTailCall=*/false); ++ return false; ++ case LoongArch::PseudoTAIL36: ++ emitFuncCall36(Inst, IDLoc, Out, /*IsTailCall=*/true); ++ return false; + } + Out.emitInstruction(Inst, getSTI()); + return false; +@@ -1440,6 +1495,12 @@ bool LoongArchAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, + /*Upper=*/(1 << 19) - 1, + "operand must be a symbol with modifier (e.g. %pc_hi20) or an integer " + "in the range"); ++ case Match_InvalidSImm20pcaddu18i: ++ return generateImmOutOfRangeError( ++ Operands, ErrorInfo, /*Lower=*/-(1 << 19), ++ /*Upper=*/(1 << 19) - 1, ++ "operand must be a symbol with modifier (e.g. %call36) or an integer " ++ "in the range"); + case Match_InvalidSImm21lsl2: + return generateImmOutOfRangeError( + Operands, ErrorInfo, /*Lower=*/-(1 << 22), /*Upper=*/(1 << 22) - 4, +diff --git a/llvm/lib/Target/LoongArch/LoongArchExpandPseudoInsts.cpp b/llvm/lib/Target/LoongArch/LoongArchExpandPseudoInsts.cpp +index 72c1f1cec198..8eda2dcc1633 100644
View file
_service:tar_scm:0025-Backport-AArch64-Stack-probing-for-dynamic-allocas-in-GlobalISel.patch
Deleted
@@ -1,496 +0,0 @@ -From dbca022577e0da1f411ee84143d59c6c9d941969 Mon Sep 17 00:00:00 2001 -From: rickyleung <leung.wing.chung@huawei.com> -Date: Fri, 26 Apr 2024 17:29:18 +0800 -Subject: PATCH 6/7 backportAArch64 Stack probing for dynamic allocas in - GlobalISel - -Reference: https://github.com/llvm/llvm-project/commit/c1140d49ec3363bf903e4c1dbf7a3f5e8c1b6523 - -Co-authored-by: Oliver Stannard <oliver.stannard@linaro.org> ---- - .../llvm/CodeGen/GlobalISel/LegalizerHelper.h | 2 + - .../CodeGen/GlobalISel/LegalizerHelper.cpp | 37 ++- - .../AArch64/GISel/AArch64LegalizerInfo.cpp | 47 +++- - .../AArch64/GISel/AArch64LegalizerInfo.h | 1 + - .../GlobalISel/legalize-dyn-alloca.mir | 255 ++++++++++++++---- - .../GlobalISel/legalizer-info-validation.mir | 7 + - .../CodeGen/AArch64/stack-probing-dynamic.ll | 3 +- - 7 files changed, 284 insertions(+), 68 deletions(-) - -diff --git a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h -index 9288091874cf..7abbd1f03f16 100644 ---- a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h -+++ b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h -@@ -400,6 +400,8 @@ public: - LegalizeResult lowerUnmergeValues(MachineInstr &MI); - LegalizeResult lowerExtractInsertVectorElt(MachineInstr &MI); - LegalizeResult lowerShuffleVector(MachineInstr &MI); -+ Register getDynStackAllocTargetPtr(Register SPReg, Register AllocSize, -+ Align Alignment, LLT PtrTy); - LegalizeResult lowerDynStackAlloc(MachineInstr &MI); - LegalizeResult lowerStackSave(MachineInstr &MI); - LegalizeResult lowerStackRestore(MachineInstr &MI); -diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp -index 75d9789be4d0..5557456e706d 100644 ---- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp -+++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp -@@ -6777,21 +6777,12 @@ LegalizerHelper::lowerShuffleVector(MachineInstr &MI) { - return Legalized; - } - --LegalizerHelper::LegalizeResult --LegalizerHelper::lowerDynStackAlloc(MachineInstr &MI) { -- const auto &MF = *MI.getMF(); -- const auto &TFI = *MF.getSubtarget().getFrameLowering(); -- if (TFI.getStackGrowthDirection() == TargetFrameLowering::StackGrowsUp) -- return UnableToLegalize; -- -- Register Dst = MI.getOperand(0).getReg(); -- Register AllocSize = MI.getOperand(1).getReg(); -- Align Alignment = assumeAligned(MI.getOperand(2).getImm()); -- -- LLT PtrTy = MRI.getType(Dst); -+Register LegalizerHelper::getDynStackAllocTargetPtr(Register SPReg, -+ Register AllocSize, -+ Align Alignment, -+ LLT PtrTy) { - LLT IntPtrTy = LLT::scalar(PtrTy.getSizeInBits()); - -- Register SPReg = TLI.getStackPointerRegisterToSaveRestore(); - auto SPTmp = MIRBuilder.buildCopy(PtrTy, SPReg); - SPTmp = MIRBuilder.buildCast(IntPtrTy, SPTmp); - -@@ -6806,7 +6797,25 @@ LegalizerHelper::lowerDynStackAlloc(MachineInstr &MI) { - Alloc = MIRBuilder.buildAnd(IntPtrTy, Alloc, AlignCst); - } - -- SPTmp = MIRBuilder.buildCast(PtrTy, Alloc); -+ return MIRBuilder.buildCast(PtrTy, Alloc).getReg(0); -+} -+ -+LegalizerHelper::LegalizeResult -+LegalizerHelper::lowerDynStackAlloc(MachineInstr &MI) { -+ const auto &MF = *MI.getMF(); -+ const auto &TFI = *MF.getSubtarget().getFrameLowering(); -+ if (TFI.getStackGrowthDirection() == TargetFrameLowering::StackGrowsUp) -+ return UnableToLegalize; -+ -+ Register Dst = MI.getOperand(0).getReg(); -+ Register AllocSize = MI.getOperand(1).getReg(); -+ Align Alignment = assumeAligned(MI.getOperand(2).getImm()); -+ -+ LLT PtrTy = MRI.getType(Dst); -+ Register SPReg = TLI.getStackPointerRegisterToSaveRestore(); -+ Register SPTmp = -+ getDynStackAllocTargetPtr(SPReg, AllocSize, Alignment, PtrTy); -+ - MIRBuilder.buildCopy(SPReg, SPTmp); - MIRBuilder.buildCopy(Dst, SPTmp); - -diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp -index f0130a0be29d..0dd2b4d48dd6 100644 ---- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp -+++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp -@@ -797,9 +797,9 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) - return Query.Types0 == p0 && Query.Types1 == s64; - }); - -- getActionDefinitionsBuilder({G_DYN_STACKALLOC, -- G_STACKSAVE, -- G_STACKRESTORE}).lower(); -+ getActionDefinitionsBuilder(G_DYN_STACKALLOC).custom(); -+ -+ getActionDefinitionsBuilder({G_STACKSAVE, G_STACKRESTORE}).lower(); - - if (ST.hasMOPS()) { - // G_BZERO is not supported. Currently it is only emitted by -@@ -993,6 +993,8 @@ bool AArch64LegalizerInfo::legalizeCustom(LegalizerHelper &Helper, - return legalizeMemOps(MI, Helper); - case TargetOpcode::G_FCOPYSIGN: - return legalizeFCopySign(MI, Helper); -+ case TargetOpcode::G_DYN_STACKALLOC: -+ return legalizeDynStackAlloc(MI, Helper); - } - - llvm_unreachable("expected switch to return"); -@@ -1689,3 +1691,42 @@ bool AArch64LegalizerInfo::legalizeFCopySign(MachineInstr &MI, - MI.eraseFromParent(); - return true; - } -+ -+bool AArch64LegalizerInfo::legalizeDynStackAlloc( -+ MachineInstr &MI, LegalizerHelper &Helper) const { -+ MachineFunction &MF = *MI.getParent()->getParent(); -+ MachineIRBuilder &MIRBuilder = Helper.MIRBuilder; -+ MachineRegisterInfo &MRI = *MIRBuilder.getMRI(); -+ -+ // If stack probing is not enabled for this function, use the default -+ // lowering. -+ if (!MF.getFunction().hasFnAttribute("probe-stack") || -+ MF.getFunction().getFnAttribute("probe-stack").getValueAsString() != -+ "inline-asm") { -+ Helper.lowerDynStackAlloc(MI); -+ return true; -+ } -+ -+ Register Dst = MI.getOperand(0).getReg(); -+ Register AllocSize = MI.getOperand(1).getReg(); -+ Align Alignment = assumeAligned(MI.getOperand(2).getImm()); -+ -+ assert(MRI.getType(Dst) == LLT::pointer(0, 64) && -+ "Unexpected type for dynamic alloca"); -+ assert(MRI.getType(AllocSize) == LLT::scalar(64) && -+ "Unexpected type for dynamic alloca"); -+ -+ LLT PtrTy = MRI.getType(Dst); -+ Register SPReg = -+ Helper.getTargetLowering().getStackPointerRegisterToSaveRestore(); -+ Register SPTmp = -+ Helper.getDynStackAllocTargetPtr(SPReg, AllocSize, Alignment, PtrTy); -+ auto NewMI = -+ MIRBuilder.buildInstr(AArch64::PROBED_STACKALLOC_DYN, {}, {SPTmp}); -+ MRI.setRegClass(NewMI.getReg(0), &AArch64::GPR64commonRegClass); -+ MIRBuilder.setInsertPt(*NewMI->getParent(), NewMI); -+ MIRBuilder.buildCopy(Dst, SPTmp); -+ -+ MI.eraseFromParent(); -+ return true; -+} -\ No newline at end of file -diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.h b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.h -index c10f6e071ed4..94484ea59d15 100644 ---- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.h -+++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.h -@@ -58,6 +58,7 @@ private: - bool legalizeCTTZ(MachineInstr &MI, LegalizerHelper &Helper) const; - bool legalizeMemOps(MachineInstr &MI, LegalizerHelper &Helper) const; - bool legalizeFCopySign(MachineInstr &MI, LegalizerHelper &Helper) const; -+ bool legalizeDynStackAlloc(MachineInstr &MI, LegalizerHelper &Helper) const; - const AArch64Subtarget *ST; - }; - } // End llvm namespace. -diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-dyn-alloca.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-dyn-alloca.mir -index e9188fb89f69..882c7468e70f 100644 ---- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-dyn-alloca.mir -+++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-dyn-alloca.mir -@@ -19,6 +19,21 @@ - ret i128* %addr - } - -+ define i8* @test_simple_alloca_stack_probing(i32 %numelts) "probe-stack"="inline-asm" { -+ %addr = alloca i8, i32 %numelts -+ ret i8* %addr -+ } -+ -+ define i8* @test_aligned_alloca_stack_probing(i32 %numelts) "probe-stack"="inline-asm" { -+ %addr = alloca i8, i32 %numelts, align 32 -+ ret i8* %addr -+ } -+ -+ define i128* @test_natural_alloca_stack_probing(i32 %numelts) "probe-stack"="inline-asm" { -+ %addr = alloca i128, i32 %numelts -+ ret i128* %addr -+ } -+ - ... - --- - name: test_simple_alloca -@@ -37,22 +52,23 @@ body: | -
View file
_service:tar_scm:0026-Update-testcase-for-stack-clash-protection-backport.patch
Deleted
@@ -1,177 +0,0 @@ -From 9425ee5f8608ff8611628d83386f61950d7fff85 Mon Sep 17 00:00:00 2001 -From: rickyleung <leung.wing.chung@huawei.com> -Date: Tue, 7 May 2024 21:37:03 +0800 -Subject: PATCH 7/7 Update testcase for stack clash protection backport - ---- - .../GlobalISel/legalize-dyn-alloca.mir | 3 +- - .../GlobalISel/stacksave-stackrestore.ll | 14 ++++++---- - .../CodeGen/AArch64/stack-probing-dynamic.ll | 16 ++++++----- - .../AArch64/stack-probing-last-in-block.mir | 4 +-- - .../X86/GlobalISel/stacksave-stackrestore.ll | 28 +++++++++++-------- - 5 files changed, 36 insertions(+), 29 deletions(-) - -diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-dyn-alloca.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-dyn-alloca.mir -index 882c7468e70f..82781cebc55a 100644 ---- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-dyn-alloca.mir -+++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-dyn-alloca.mir -@@ -313,5 +313,4 @@ body: | - %7:_(s64) = G_AND %5, %6 - %8:_(p0) = G_DYN_STACKALLOC %7(s64), 1 - $x0 = COPY %8(p0) -- RET_ReallyLR implicit $x0 --... -\ No newline at end of file -+ RET_ReallyLR implicit $x0 -\ No newline at end of file -diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/stacksave-stackrestore.ll b/llvm/test/CodeGen/AArch64/GlobalISel/stacksave-stackrestore.ll -index 16bf85af9c17..97ecca0bd77b 100644 ---- a/llvm/test/CodeGen/AArch64/GlobalISel/stacksave-stackrestore.ll -+++ b/llvm/test/CodeGen/AArch64/GlobalISel/stacksave-stackrestore.ll -@@ -15,14 +15,18 @@ define void @test_scoped_alloca(i64 %n) { - ; CHECK-NEXT: .cfi_offset w19, -16 - ; CHECK-NEXT: .cfi_offset w30, -24 - ; CHECK-NEXT: .cfi_offset w29, -32 --; CHECK-NEXT: add x9, x0, #15 -+; CHECK-NEXT: mov x19, x0 -+; CHECK-NEXT: bl llvm.stacksave.p0 -+; CHECK-NEXT: add x9, x19, #15 - ; CHECK-NEXT: mov x8, sp - ; CHECK-NEXT: and x9, x9, #0xfffffffffffffff0 --; CHECK-NEXT: mov x19, sp --; CHECK-NEXT: sub x0, x8, x9 --; CHECK-NEXT: mov sp, x0 -+; CHECK-NEXT: mov x19, x0 -+; CHECK-NEXT: sub x8, x8, x9 -+; CHECK-NEXT: mov sp, x8 -+; CHECK-NEXT: mov x0, x8 - ; CHECK-NEXT: bl use_addr --; CHECK-NEXT: mov sp, x19 -+; CHECK-NEXT: mov x0, x19 -+; CHECK-NEXT: bl llvm.stackrestore.p0 - ; CHECK-NEXT: mov sp, x29 - ; CHECK-NEXT: ldr x19, sp, #16 // 8-byte Folded Reload - ; CHECK-NEXT: ldp x29, x30, sp, #32 // 16-byte Folded Reload -diff --git a/llvm/test/CodeGen/AArch64/stack-probing-dynamic.ll b/llvm/test/CodeGen/AArch64/stack-probing-dynamic.ll -index ad9cdbe92b23..3cbcf7749b2a 100644 ---- a/llvm/test/CodeGen/AArch64/stack-probing-dynamic.ll -+++ b/llvm/test/CodeGen/AArch64/stack-probing-dynamic.ll -@@ -59,10 +59,10 @@ define void @dynamic_fixed(i64 %size, ptr %out1, ptr %out2) #0 { - ; CHECK-NEXT: str xzr, sp, #-64! - ; CHECK-NEXT: add x9, x0, #15 - ; CHECK-NEXT: mov x8, sp --; CHECK-NEXT: sub x10, x29, #64 - ; CHECK-NEXT: and x9, x9, #0xfffffffffffffff0 --; CHECK-NEXT: str x10, x1 -+; CHECK-NEXT: sub x10, x29, #64 - ; CHECK-NEXT: sub x8, x8, x9 -+; CHECK-NEXT: str x10, x1 - ; CHECK-NEXT: .LBB1_1: // =>This Inner Loop Header: Depth=1 - ; CHECK-NEXT: sub sp, sp, #1, lsl #12 // =4096 - ; CHECK-NEXT: cmp sp, x8 -@@ -108,10 +108,10 @@ define void @dynamic_align_64(i64 %size, ptr %out) #0 { - ; CHECK-NEXT: and sp, x9, #0xffffffffffffffc0 - ; CHECK-NEXT: add x9, x0, #15 - ; CHECK-NEXT: mov x8, sp --; CHECK-NEXT: str xzr, sp - ; CHECK-NEXT: and x9, x9, #0xfffffffffffffff0 --; CHECK-NEXT: mov x19, sp -+; CHECK-NEXT: str xzr, sp - ; CHECK-NEXT: sub x8, x8, x9 -+; CHECK-NEXT: mov x19, sp - ; CHECK-NEXT: and x8, x8, #0xffffffffffffffc0 - ; CHECK-NEXT: .LBB2_1: // =>This Inner Loop Header: Depth=1 - ; CHECK-NEXT: sub sp, sp, #1, lsl #12 // =4096 -@@ -167,10 +167,10 @@ define void @dynamic_align_8192(i64 %size, ptr %out) #0 { - ; CHECK-NEXT: mov sp, x9 - ; CHECK-NEXT: add x9, x0, #15 - ; CHECK-NEXT: mov x8, sp --; CHECK-NEXT: str xzr, sp - ; CHECK-NEXT: and x9, x9, #0xfffffffffffffff0 --; CHECK-NEXT: mov x19, sp -+; CHECK-NEXT: str xzr, sp - ; CHECK-NEXT: sub x8, x8, x9 -+; CHECK-NEXT: mov x19, sp - ; CHECK-NEXT: and x8, x8, #0xffffffffffffe000 - ; CHECK-NEXT: .LBB3_4: // =>This Inner Loop Header: Depth=1 - ; CHECK-NEXT: sub sp, sp, #1, lsl #12 // =4096 -@@ -268,8 +268,10 @@ define void @no_reserved_call_frame(i64 %n) #0 { - ; CHECK-NEXT: str xzr, sp - ; CHECK-NEXT: sub sp, sp, #1104 - ; CHECK-NEXT: str xzr, sp -+; CHECK-NEXT: sub sp, sp, #1104 - ; CHECK-NEXT: bl callee_stack_args - ; CHECK-NEXT: add sp, sp, #1104 -+; CHECK-NEXT: add sp, sp, #1104 - ; CHECK-NEXT: mov sp, x29 - ; CHECK-NEXT: .cfi_def_cfa wsp, 16 - ; CHECK-NEXT: ldp x29, x30, sp, #16 // 16-byte Folded Reload -@@ -331,8 +333,8 @@ define void @dynamic_sve(i64 %size, ptr %out) #0 "target-features"="+sve" { - ; CHECK-NEXT: .cfi_offset w29, -32 - ; CHECK-NEXT: rdvl x9, #1 - ; CHECK-NEXT: mov x10, #15 // =0xf --; CHECK-NEXT: mov x8, sp - ; CHECK-NEXT: madd x9, x0, x9, x10 -+; CHECK-NEXT: mov x8, sp - ; CHECK-NEXT: and x9, x9, #0xfffffffffffffff0 - ; CHECK-NEXT: sub x8, x8, x9 - ; CHECK-NEXT: .LBB7_1: // =>This Inner Loop Header: Depth=1 -diff --git a/llvm/test/CodeGen/AArch64/stack-probing-last-in-block.mir b/llvm/test/CodeGen/AArch64/stack-probing-last-in-block.mir -index a8a21ab330ba..9a173be5857e 100644 ---- a/llvm/test/CodeGen/AArch64/stack-probing-last-in-block.mir -+++ b/llvm/test/CodeGen/AArch64/stack-probing-last-in-block.mir -@@ -141,6 +141,4 @@ body: | - B %bb.2 - - bb.2.exit: -- RET_ReallyLR -- --... -\ No newline at end of file -+ RET_ReallyLR -\ No newline at end of file -diff --git a/llvm/test/CodeGen/X86/GlobalISel/stacksave-stackrestore.ll b/llvm/test/CodeGen/X86/GlobalISel/stacksave-stackrestore.ll -index e86c04ee22db..8f665924577f 100644 ---- a/llvm/test/CodeGen/X86/GlobalISel/stacksave-stackrestore.ll -+++ b/llvm/test/CodeGen/X86/GlobalISel/stacksave-stackrestore.ll -@@ -13,21 +13,25 @@ define void @test_scoped_alloca(i64 %n) { - ; CHECK-NEXT: .cfi_offset %rbp, -16 - ; CHECK-NEXT: movq %rsp, %rbp - ; CHECK-NEXT: .cfi_def_cfa_register %rbp -+; CHECK-NEXT: pushq %r14 - ; CHECK-NEXT: pushq %rbx --; CHECK-NEXT: pushq %rax --; CHECK-NEXT: .cfi_offset %rbx, -24 --; CHECK-NEXT: movq %rsp, %rbx --; CHECK-NEXT: movq %rsp, %rax --; CHECK-NEXT: imulq $1, %rdi, %rcx --; CHECK-NEXT: addq $15, %rcx --; CHECK-NEXT: andq $-16, %rcx --; CHECK-NEXT: subq %rcx, %rax --; CHECK-NEXT: movq %rax, %rsp --; CHECK-NEXT: movq %rax, %rdi -+; CHECK-NEXT: .cfi_offset %rbx, -32 -+; CHECK-NEXT: .cfi_offset %r14, -24 -+; CHECK-NEXT: movq %rdi, %rbx -+; CHECK-NEXT: callq llvm.stacksave.p0 -+; CHECK-NEXT: movq %rax, %r14 -+; CHECK-NEXT: movq %rsp, %rdi -+; CHECK-NEXT: imulq $1, %rbx, %rax -+; CHECK-NEXT: addq $15, %rax -+; CHECK-NEXT: andq $-16, %rax -+; CHECK-NEXT: subq %rax, %rdi -+; CHECK-NEXT: movq %rdi, %rsp - ; CHECK-NEXT: callq use_addr --; CHECK-NEXT: movq %rbx, %rsp --; CHECK-NEXT: leaq -8(%rbp), %rsp -+; CHECK-NEXT: movq %r14, %rdi -+; CHECK-NEXT: callq llvm.stackrestore.p0 -+; CHECK-NEXT: leaq -16(%rbp), %rsp - ; CHECK-NEXT: popq %rbx -+; CHECK-NEXT: popq %r14 - ; CHECK-NEXT: popq %rbp - ; CHECK-NEXT: .cfi_def_cfa %rsp, 8 - ; CHECK-NEXT: retq --- -2.42.0.windows.2 -
View file
_service
Changed
@@ -2,7 +2,7 @@ <service name="tar_scm"> <param name="scm">git</param> <param name="url">git@gitee.com:src-openeuler/llvm.git</param> - <param name="revision">openEuler-24.03-LTS-Next</param> + <param name="revision">openEuler-24.03-LTS-SP1</param> <param name="exclude">*</param> <param name="extract">*</param> </service>
Locations
Projects
Search
Status Monitor
Help
Open Build Service
OBS Manuals
API Documentation
OBS Portal
Reporting a Bug
Contact
Mailing List
Forums
Chat (IRC)
Twitter
Open Build Service (OBS)
is an
openSUSE project
.
浙ICP备2022010568号-2