Projects
openEuler:24.03:SP1:Everything
llvm
_service:tar_scm:0021-Add-BiSheng-Autotuner-sup...
Sign Up
Log In
Username
Password
Overview
Repositories
Revisions
Requests
Users
Attributes
Meta
File _service:tar_scm:0021-Add-BiSheng-Autotuner-support-for-LLVM-compiler.patch of Package llvm
From a9863e2b6e6783aa9be0b9d1d187084fd4b32a3a Mon Sep 17 00:00:00 2001 From: Muhammad Asif Manzoor <muhammad.asif.manzoor1@huawei.com> Date: Thu, 21 Mar 2024 12:50:38 -0400 Subject: Add BiSheng Autotuner support for LLVM compiler Automatic tuning is an automatic iterative process that optimizes a given program by manipulating compilation options for optimal performance. BiSheng Autotuner provides a resumable interface for tuning process. BiSheng Autotuner can tune 1) individual code segments/blocks (fine grain turning) like loops, callsites, instructions, etc. and 2) entire modules/programs (coarse grain tuning) for compiler flags, pass ordering, etc. This patch enables LLVM compiler to extract tuneable code regions and then apply suggested configuration (by Autotuner) to find out the optimal configurations. --- llvm/cmake/modules/CrossCompile.cmake | 1 + llvm/cmake/modules/HandleLLVMOptions.cmake | 8 + llvm/include/llvm/Analysis/AutotuningDump.h | 75 ++ llvm/include/llvm/Analysis/LoopInfo.h | 13 + llvm/include/llvm/Analysis/Passes.h | 10 + llvm/include/llvm/AutoTuner/AutoTuning.h | 486 ++++++++++++ .../llvm/AutoTuner/AutoTuningRemarkManager.h | 43 ++ .../llvm/AutoTuner/AutoTuningRemarkStreamer.h | 47 ++ llvm/include/llvm/CodeGen/MachineBasicBlock.h | 13 + llvm/include/llvm/IR/Function.h | 37 + llvm/include/llvm/IR/InstrTypes.h | 24 + llvm/include/llvm/IR/Instructions.h | 24 + llvm/include/llvm/IR/Module.h | 3 + llvm/include/llvm/IR/StructuralHash.h | 14 + llvm/include/llvm/InitializePasses.h | 5 + llvm/include/llvm/LinkAllPasses.h | 8 + llvm/include/llvm/Remarks/Remark.h | 32 + llvm/include/llvm/Support/CommandLine.h | 17 + llvm/include/llvm/Transforms/Scalar.h | 17 + .../Transforms/Scalar/AutoTuningCompile.h | 170 +++++ .../llvm/Transforms/Utils/UnrollLoop.h | 4 + llvm/lib/Analysis/AutotuningDump.cpp | 265 +++++++ llvm/lib/Analysis/CMakeLists.txt | 2 + llvm/lib/Analysis/InlineAdvisor.cpp | 18 + llvm/lib/Analysis/InlineCost.cpp | 29 + llvm/lib/Analysis/LoopInfo.cpp | 52 ++ llvm/lib/AutoTuner/AutoTuning.cpp | 705 ++++++++++++++++++ .../lib/AutoTuner/AutoTuningRemarkManager.cpp | 299 ++++++++ .../AutoTuner/AutoTuningRemarkStreamer.cpp | 55 ++ llvm/lib/AutoTuner/CMakeLists.txt | 11 + llvm/lib/CMakeLists.txt | 1 + llvm/lib/CodeGen/CMakeLists.txt | 1 + llvm/lib/CodeGen/CalcSpillWeights.cpp | 30 + llvm/lib/CodeGen/MachineBasicBlock.cpp | 36 + llvm/lib/CodeGen/MachineScheduler.cpp | 44 ++ llvm/lib/CodeGen/SwitchLoweringUtils.cpp | 19 + llvm/lib/IR/AsmWriter.cpp | 151 ++++ llvm/lib/IR/CMakeLists.txt | 1 + llvm/lib/IR/Function.cpp | 34 + llvm/lib/IR/Instructions.cpp | 86 +++ llvm/lib/IR/StructuralHash.cpp | 114 +++ llvm/lib/Passes/PassBuilder.cpp | 5 + llvm/lib/Passes/PassBuilderPipelines.cpp | 46 ++ llvm/lib/Passes/PassRegistry.def | 13 + llvm/lib/Passes/StandardInstrumentations.cpp | 23 + .../lib/Remarks/BitstreamRemarkSerializer.cpp | 8 + llvm/lib/Remarks/RemarkStreamer.cpp | 4 + llvm/lib/Remarks/YAMLRemarkParser.cpp | 122 +++ llvm/lib/Remarks/YAMLRemarkParser.h | 6 + llvm/lib/Remarks/YAMLRemarkSerializer.cpp | 84 +++ llvm/lib/Support/CommandLine.cpp | 41 + llvm/lib/Transforms/IPO/CMakeLists.txt | 1 + llvm/lib/Transforms/IPO/Inliner.cpp | 36 + llvm/lib/Transforms/IPO/SampleProfile.cpp | 14 + .../Transforms/Instrumentation/CMakeLists.txt | 1 + .../Instrumentation/PGOInstrumentation.cpp | 8 + .../Transforms/Scalar/AutoTuningCompile.cpp | 334 +++++++++ llvm/lib/Transforms/Scalar/CMakeLists.txt | 2 + llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp | 187 +++++ llvm/lib/Transforms/Scalar/Scalar.cpp | 4 + llvm/lib/Transforms/Scalar/Sink.cpp | 5 + llvm/lib/Transforms/Utils/CMakeLists.txt | 1 + llvm/lib/Transforms/Utils/LCSSA.cpp | 5 + llvm/lib/Transforms/Utils/LoopSimplify.cpp | 8 + llvm/lib/Transforms/Utils/LoopUnroll.cpp | 3 + llvm/lib/Transforms/Vectorize/CMakeLists.txt | 1 + .../Vectorize/LoopVectorizationLegality.cpp | 12 + .../Transforms/Vectorize/LoopVectorize.cpp | 34 + .../Inputs/unroll_template.yaml | 8 + .../AutotuningDump/create-data-dir.ll | 65 ++ llvm/test/AutoTuning/AutotuningDump/unroll.ll | 35 + .../autotune_datadir/baseline_config.yaml | 9 + .../autotune_datadir/random_config.yaml | 9 + .../AutoTuning/BaselineConfig/Inputs/test.ll | 117 +++ .../BaselineConfig/apply_baseline_config.ll | 11 + llvm/test/AutoTuning/BaselineConfig/opp.ll | 67 ++ .../CodeRegionFilter/function-filtering.ll | 62 ++ .../Error/Inputs/invalid-format.yaml | 3 + .../AutoTuning/Error/Inputs/template.yaml | 10 + .../AutoTuning/Error/file-not-found-error.ll | 29 + .../AutoTuning/Error/invalid-yaml-error.ll | 27 + .../AutoTuning/Error/malformed-input-error.ll | 136 ++++ llvm/test/AutoTuning/Error/output-error.ll | 28 + llvm/test/AutoTuning/Error/valid-input.ll | 27 + .../Inputs/template.yaml | 9 + .../inc-compile-parse-input.ll | 103 +++ .../AutoTuning/Inline/Inputs/template.yaml | 9 + .../Inline/Inputs/template_no_metadata.yaml | 7 + .../test/AutoTuning/Inline/duplicate-calls.ll | 96 +++ llvm/test/AutoTuning/Inline/force-inline.ll | 84 +++ .../AutoTuning/Inline/inline-attribute.ll | 85 +++ llvm/test/AutoTuning/Inline/opp.ll | 64 ++ .../LoopUnroll/Inputs/debug_loc_template.yaml | 10 + .../LoopUnroll/Inputs/loop_nest.yaml | 10 + .../LoopUnroll/Inputs/loop_peel.yaml | 9 + .../Inputs/unroll_raw_template.yaml | 10 + .../LoopUnroll/Inputs/unroll_template.yaml | 10 + .../Inputs/unroll_template_no_metadata.yaml | 8 + llvm/test/AutoTuning/LoopUnroll/debug_loc.ll | 161 ++++ .../AutoTuning/LoopUnroll/dynamic_config.ll | 56 ++ llvm/test/AutoTuning/LoopUnroll/loop_nest.ll | 136 ++++ llvm/test/AutoTuning/LoopUnroll/loop_peel.ll | 53 ++ .../AutoTuning/LoopUnroll/unroll-pragma.ll | 129 ++++ llvm/test/AutoTuning/LoopUnroll/unroll.ll | 101 +++ llvm/test/AutoTuning/LoopUnroll/unroll_raw.ll | 113 +++ .../Inputs/vectorize_template.yaml | 9 + .../vectorize_template_no_metadata.yaml | 7 + .../LoopVectorize/force-vector-interleave.ll | 88 +++ .../Inputs/misched_x86_template.yaml | 10 + .../misched_x86_bidirectional.ll | 73 ++ .../MachineScheduler/misched_x86_bottomup.ll | 72 ++ .../MachineScheduler/misched_x86_topdown.ll | 72 ++ .../AutoTuning/MetaData/structural_hash.ll | 234 ++++++ .../AutoTuning/MetaData/write_no_metadata.ll | 191 +++++ .../MetaData/write_with_metadata.ll | 204 +++++ .../AutoTuning/PGO/Inputs/pgo-instr.proftext | 17 + .../PGO/Inputs/pgo-sample-cold.prof | 7 + .../AutoTuning/PGO/Inputs/pgo-sample-hot.prof | 7 + llvm/test/AutoTuning/PGO/pgo-instr-filters.ll | 61 ++ .../test/AutoTuning/PGO/pgo-sample-filters.ll | 138 ++++ .../Inputs/pass_invocation.yaml | 10 + .../PassInvocation/pass_invocation_read.ll | 64 ++ .../PassInvocation/pass_invocation_write.ll | 67 ++ .../PhaseOrdering/Inputs/template.yaml | 8 + .../AutoTuning/PhaseOrdering/pass-order.ll | 65 ++ .../AutoTuning/SwitchLowering/switch-opp.ll | 47 ++ llvm/test/AutoTuning/lit.local.cfg | 2 + llvm/test/AutoTuning/opt-opp.ll | 315 ++++++++ llvm/test/lit.site.cfg.py.in | 1 + llvm/tools/llc/llc.cpp | 19 + llvm/tools/opt/NewPMDriver.cpp | 42 ++ llvm/tools/opt/opt.cpp | 53 ++ 132 files changed, 7801 insertions(+) create mode 100644 llvm/include/llvm/Analysis/AutotuningDump.h create mode 100644 llvm/include/llvm/AutoTuner/AutoTuning.h create mode 100644 llvm/include/llvm/AutoTuner/AutoTuningRemarkManager.h create mode 100644 llvm/include/llvm/AutoTuner/AutoTuningRemarkStreamer.h create mode 100644 llvm/include/llvm/Transforms/Scalar/AutoTuningCompile.h create mode 100644 llvm/lib/Analysis/AutotuningDump.cpp create mode 100644 llvm/lib/AutoTuner/AutoTuning.cpp create mode 100644 llvm/lib/AutoTuner/AutoTuningRemarkManager.cpp create mode 100644 llvm/lib/AutoTuner/AutoTuningRemarkStreamer.cpp create mode 100644 llvm/lib/AutoTuner/CMakeLists.txt create mode 100644 llvm/lib/Transforms/Scalar/AutoTuningCompile.cpp create mode 100644 llvm/test/AutoTuning/AutotuningDump/Inputs/unroll_template.yaml create mode 100644 llvm/test/AutoTuning/AutotuningDump/create-data-dir.ll create mode 100644 llvm/test/AutoTuning/AutotuningDump/unroll.ll create mode 100644 llvm/test/AutoTuning/BaselineConfig/Inputs/autotune_datadir/baseline_config.yaml create mode 100644 llvm/test/AutoTuning/BaselineConfig/Inputs/autotune_datadir/random_config.yaml create mode 100644 llvm/test/AutoTuning/BaselineConfig/Inputs/test.ll create mode 100644 llvm/test/AutoTuning/BaselineConfig/apply_baseline_config.ll create mode 100644 llvm/test/AutoTuning/BaselineConfig/opp.ll create mode 100644 llvm/test/AutoTuning/CodeRegionFilter/function-filtering.ll create mode 100644 llvm/test/AutoTuning/Error/Inputs/invalid-format.yaml create mode 100644 llvm/test/AutoTuning/Error/Inputs/template.yaml create mode 100644 llvm/test/AutoTuning/Error/file-not-found-error.ll create mode 100644 llvm/test/AutoTuning/Error/invalid-yaml-error.ll create mode 100644 llvm/test/AutoTuning/Error/malformed-input-error.ll create mode 100644 llvm/test/AutoTuning/Error/output-error.ll create mode 100644 llvm/test/AutoTuning/Error/valid-input.ll create mode 100644 llvm/test/AutoTuning/IncrementalCompilation/Inputs/template.yaml create mode 100644 llvm/test/AutoTuning/IncrementalCompilation/inc-compile-parse-input.ll create mode 100644 llvm/test/AutoTuning/Inline/Inputs/template.yaml create mode 100644 llvm/test/AutoTuning/Inline/Inputs/template_no_metadata.yaml create mode 100644 llvm/test/AutoTuning/Inline/duplicate-calls.ll create mode 100644 llvm/test/AutoTuning/Inline/force-inline.ll create mode 100644 llvm/test/AutoTuning/Inline/inline-attribute.ll create mode 100644 llvm/test/AutoTuning/Inline/opp.ll create mode 100644 llvm/test/AutoTuning/LoopUnroll/Inputs/debug_loc_template.yaml create mode 100644 llvm/test/AutoTuning/LoopUnroll/Inputs/loop_nest.yaml create mode 100644 llvm/test/AutoTuning/LoopUnroll/Inputs/loop_peel.yaml create mode 100644 llvm/test/AutoTuning/LoopUnroll/Inputs/unroll_raw_template.yaml create mode 100644 llvm/test/AutoTuning/LoopUnroll/Inputs/unroll_template.yaml create mode 100644 llvm/test/AutoTuning/LoopUnroll/Inputs/unroll_template_no_metadata.yaml create mode 100644 llvm/test/AutoTuning/LoopUnroll/debug_loc.ll create mode 100644 llvm/test/AutoTuning/LoopUnroll/dynamic_config.ll create mode 100644 llvm/test/AutoTuning/LoopUnroll/loop_nest.ll create mode 100644 llvm/test/AutoTuning/LoopUnroll/loop_peel.ll create mode 100644 llvm/test/AutoTuning/LoopUnroll/unroll-pragma.ll create mode 100644 llvm/test/AutoTuning/LoopUnroll/unroll.ll create mode 100644 llvm/test/AutoTuning/LoopUnroll/unroll_raw.ll create mode 100644 llvm/test/AutoTuning/LoopVectorize/Inputs/vectorize_template.yaml create mode 100644 llvm/test/AutoTuning/LoopVectorize/Inputs/vectorize_template_no_metadata.yaml create mode 100644 llvm/test/AutoTuning/LoopVectorize/force-vector-interleave.ll create mode 100644 llvm/test/AutoTuning/MachineScheduler/Inputs/misched_x86_template.yaml create mode 100644 llvm/test/AutoTuning/MachineScheduler/misched_x86_bidirectional.ll create mode 100644 llvm/test/AutoTuning/MachineScheduler/misched_x86_bottomup.ll create mode 100644 llvm/test/AutoTuning/MachineScheduler/misched_x86_topdown.ll create mode 100644 llvm/test/AutoTuning/MetaData/structural_hash.ll create mode 100644 llvm/test/AutoTuning/MetaData/write_no_metadata.ll create mode 100644 llvm/test/AutoTuning/MetaData/write_with_metadata.ll create mode 100644 llvm/test/AutoTuning/PGO/Inputs/pgo-instr.proftext create mode 100644 llvm/test/AutoTuning/PGO/Inputs/pgo-sample-cold.prof create mode 100644 llvm/test/AutoTuning/PGO/Inputs/pgo-sample-hot.prof create mode 100644 llvm/test/AutoTuning/PGO/pgo-instr-filters.ll create mode 100644 llvm/test/AutoTuning/PGO/pgo-sample-filters.ll create mode 100644 llvm/test/AutoTuning/PassInvocation/Inputs/pass_invocation.yaml create mode 100644 llvm/test/AutoTuning/PassInvocation/pass_invocation_read.ll create mode 100644 llvm/test/AutoTuning/PassInvocation/pass_invocation_write.ll create mode 100644 llvm/test/AutoTuning/PhaseOrdering/Inputs/template.yaml create mode 100644 llvm/test/AutoTuning/PhaseOrdering/pass-order.ll create mode 100644 llvm/test/AutoTuning/SwitchLowering/switch-opp.ll create mode 100644 llvm/test/AutoTuning/lit.local.cfg create mode 100644 llvm/test/AutoTuning/opt-opp.ll diff --git a/llvm/cmake/modules/CrossCompile.cmake b/llvm/cmake/modules/CrossCompile.cmake index 6af47b51d4c6..1a9fb4b2dddc 100644 --- a/llvm/cmake/modules/CrossCompile.cmake +++ b/llvm/cmake/modules/CrossCompile.cmake @@ -82,6 +82,7 @@ function(llvm_create_cross_target project_name target_name toolchain buildtype) -DLLVM_ENABLE_PROJECTS="${llvm_enable_projects_arg}" -DLLVM_EXTERNAL_PROJECTS="${llvm_external_projects_arg}" -DLLVM_ENABLE_RUNTIMES="${llvm_enable_runtimes_arg}" + -DLLVM_ENABLE_AUTOTUNER="${LLVM_ENABLE_AUTOTUNER}" ${external_project_source_dirs} -DLLVM_TEMPORARILY_ALLOW_OLD_TOOLCHAIN="${LLVM_TEMPORARILY_ALLOW_OLD_TOOLCHAIN}" -DLLVM_INCLUDE_BENCHMARKS=OFF diff --git a/llvm/cmake/modules/HandleLLVMOptions.cmake b/llvm/cmake/modules/HandleLLVMOptions.cmake index 62a1a64d37d4..b8e9dbe29d88 100644 --- a/llvm/cmake/modules/HandleLLVMOptions.cmake +++ b/llvm/cmake/modules/HandleLLVMOptions.cmake @@ -112,6 +112,14 @@ else() set(BUILD_FOR_OPENEULER 0) endif() +option(LLVM_ENABLE_AUTOTUNER "Enable BiSheng Auto-Tuning features" OFF) +if (LLVM_ENABLE_AUTOTUNER) + set(LLVM_ENABLE_AUTOTUNER 1) + add_definitions( -DENABLE_AUTOTUNER ) +else() + set(LLVM_ENABLE_AUTOTUNER 0) +endif() + if(LLVM_ENABLE_EXPENSIVE_CHECKS) add_compile_definitions(EXPENSIVE_CHECKS) diff --git a/llvm/include/llvm/Analysis/AutotuningDump.h b/llvm/include/llvm/Analysis/AutotuningDump.h new file mode 100644 index 000000000000..fb973f05323e --- /dev/null +++ b/llvm/include/llvm/Analysis/AutotuningDump.h @@ -0,0 +1,75 @@ +#if defined(ENABLE_AUTOTUNER) +// ===-- AutotuningDump.h - Auto-Tuning-----------------------------------===// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +// ===--------------------------------------------------------------------===// +// +// This file contains pass collecting IR of tuned regions and storing them into +// predetrmined locations, to be used later by autotuning ML guidance +// +// ===--------------------------------------------------------------------===// + +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/LoopPass.h" +#include "llvm/IR/PassManager.h" +#include "llvm/Transforms/Scalar/LoopPassManager.h" +#include <string> + +namespace llvm { +class AutotuningDump { +public: + AutotuningDump(bool IncrementalCompilation = false); + bool run(Module &F, function_ref<LoopInfo &(Function &)> GetLI); + +private: + std::string AutoTuneDirPath; + std::unique_ptr<raw_ostream> createFile(const Twine &File); + int getConfigNumber(); + void dumpToStream(llvm::raw_ostream &os, const Loop &L) const; + void dumpToStream(llvm::raw_ostream &os, const Function &F) const; + void dumpFunctions(llvm::Module &M); + void dumpLoops(llvm::Module &M, function_ref<LoopInfo &(Function &)> GetLI); + void dumpModule(llvm::Module &M); + std::string getDirectoryName(const std::string File) const; + std::string getFileName(std::string FilePath); + + bool IsIncrementalCompilation; +}; + +class AutotuningDumpLegacy : public ModulePass { +public: + static char ID; + AutotuningDumpLegacy(bool IncrementalCompilation = false); + StringRef getPassName() const override; + bool runOnModule(Module &M) override; + void getAnalysisUsage(AnalysisUsage &AU) const override; + +private: + bool IsIncrementalCompilation; +}; + +class AutotuningDumpAnalysis + : public AnalysisInfoMixin<AutotuningDumpAnalysis> { + friend AnalysisInfoMixin<AutotuningDumpAnalysis>; + static AnalysisKey Key; + +public: + AutotuningDumpAnalysis(bool IncrementalCompilation = false) { + IsIncrementalCompilation = IncrementalCompilation; + } + + // This pass only prints IRs of selected function or loops without doing any + // real analyses, thus the return value is meaningless. To avoid leaking data + // or memory, we typedef Result to Optional<bool> to avoid having to return an + // AutotuningDump object. + using Result = std::optional<bool>; + Result run(Module &M, ModuleAnalysisManager &AM); + +private: + bool IsIncrementalCompilation; +}; +} // namespace llvm +#endif \ No newline at end of file diff --git a/llvm/include/llvm/Analysis/LoopInfo.h b/llvm/include/llvm/Analysis/LoopInfo.h index 3434630c27cf..9be3e056cf76 100644 --- a/llvm/include/llvm/Analysis/LoopInfo.h +++ b/llvm/include/llvm/Analysis/LoopInfo.h @@ -26,6 +26,9 @@ #include <algorithm> #include <optional> #include <utility> +#if defined(ENABLE_AUTOTUNER) +#include "llvm/AutoTuner/AutoTuning.h" +#endif namespace llvm { @@ -44,7 +47,12 @@ extern template class LoopBase<BasicBlock, Loop>; /// Represents a single loop in the control flow graph. Note that not all SCCs /// in the CFG are necessarily loops. +#if defined(ENABLE_AUTOTUNER) +class LLVM_EXTERNAL_VISIBILITY Loop : public LoopBase<BasicBlock, Loop>, + public autotuning::Container { +#else class LLVM_EXTERNAL_VISIBILITY Loop : public LoopBase<BasicBlock, Loop> { +#endif public: /// A range representing the start and end location of a loop. class LocRange { @@ -395,6 +403,11 @@ public: return "<unnamed loop>"; } +#if defined(ENABLE_AUTOTUNER) + void initCodeRegion() override; + uint64_t computeStructuralHash() override; +#endif + private: Loop() = default; diff --git a/llvm/include/llvm/Analysis/Passes.h b/llvm/include/llvm/Analysis/Passes.h index ac1bc3549910..65f566cc75de 100644 --- a/llvm/include/llvm/Analysis/Passes.h +++ b/llvm/include/llvm/Analysis/Passes.h @@ -58,6 +58,16 @@ namespace llvm { // in a function and builds the region hierarchy. // FunctionPass *createRegionInfoPass(); + +#if defined(ENABLE_AUTOTUNER) + //===--------------------------------------------------------------------===// + // + // createAutotuningDumpPass - This pass collects IR of tuned regions + // and stores them into predetrmined locations. + // for the purpose of autotuning ML guidance + // + ModulePass *createAutotuningDumpPass(); +#endif } #endif diff --git a/llvm/include/llvm/AutoTuner/AutoTuning.h b/llvm/include/llvm/AutoTuner/AutoTuning.h new file mode 100644 index 000000000000..0f1f276306ec --- /dev/null +++ b/llvm/include/llvm/AutoTuner/AutoTuning.h @@ -0,0 +1,486 @@ +#if defined(ENABLE_AUTOTUNER) +//===-- AutoTuning.h - Auto-Tuning-----------------------------------------===// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines Auto Tuning related functions, models and interfaces. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_AUTOTUNER_AUTOTUNING_H_ +#define LLVM_AUTOTUNER_AUTOTUNING_H_ + +#include "llvm/ADT/DenseMapInfo.h" +#include "llvm/ADT/Hashing.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/IR/DebugInfoMetadata.h" +#include "llvm/IR/DebugLoc.h" +#include "llvm/Support/Casting.h" +#include <map> +#include <memory> +#include <string> +#include <unordered_map> +#include <unordered_set> + +// Options for AutoTuner incremental compilation. +enum AutoTuningCompileOpt { + Inactive, // Disabled incremental compilation. + CoarseGrain, // For tuning LLVMParam. + FineGrain, // For tuning default code regions (Loop, CallSite, Function). + Basic // Same as CoarseGrain but can be applied for any code region. + // Can be used with ImpactRanker. +}; + +namespace autotuning { +// Constant defintion for AutoTuner incremental compilation. +const std::string CompileOptionStart = "start"; +const std::string CompileOptionEnd = "end"; +const std::string CompileOptionUnknow = "unknown"; +const std::string CompileOptionUnroll = "loop-unroll"; +const std::string CompileOptionVectorize = "loop-vectorize"; +const std::string CompileOptionInline = "inline"; + +class ParameterBase { +public: + virtual ~ParameterBase() = default; + enum ParameterKind { + PK_PARAMETER, + }; + ParameterKind getKind() const { return Kind; } + + explicit ParameterBase(ParameterKind K) : Kind(K) {} + +private: + const ParameterKind Kind; +}; + +template <typename T> class Parameter : public ParameterBase { +public: + Parameter(const T &RHS) : ParameterBase(PK_PARAMETER), Value(RHS) {} + const T &getValue() const { return Value; } + void setValue(const T &RHS) { Value = RHS; } + + static bool classof(const ParameterBase *P) { + return P->getKind() == PK_PARAMETER; + } + +private: + T Value; +}; + +/// This class manages parameters of one codeRegion. +class ParameterManager { + +public: + // add a param into this ParameterManager + template <typename T> + void add(const std::string &ParamName, const T ParamValue) { + std::shared_ptr<ParameterBase> Param = + std::make_shared<Parameter<T>>(ParamValue); + this->Parameters[ParamName] = Param; + } + + // Look up the value of a parameter by name in this ParameterManager. + // The found value will be assigned to the reference variable "Value". + // Return true if the parameter exits in this ParameterManager, + // and false otherwise. + template <typename T> + bool findByName(const std::string &ParamName, T &Value) const { + auto Iterator = Parameters.find(ParamName); + if (Iterator == Parameters.end()) { + return false; + } + + auto ParamPtr = llvm::dyn_cast<Parameter<T>>(Iterator->second.get()); + if (ParamPtr != nullptr) { + Value = ParamPtr->getValue(); + return true; + } else { + return false; + } + } + +private: + std::unordered_map<std::string, std::shared_ptr<ParameterBase>> Parameters; +}; + +/// The debug location used to track a CodeRegion back to the source file. +struct SourceLocation { + /// The source file corresponding to this CodeRegion. + std::string SourceFilePath; + unsigned SourceLine = 0; + unsigned SourceColumn = 0; + + bool operator==(const SourceLocation &CR) const { + return (this->SourceFilePath == CR.SourceFilePath) && + (this->SourceLine == CR.SourceLine) && + (this->SourceColumn == CR.SourceColumn); + }; + + explicit operator bool() const { + return !(SourceFilePath.empty() && SourceLine == 0 && SourceColumn == 0); + } +}; + +enum CodeRegionType { + CallSite, // Code region for function inlining. + Function, // Used in AutoTuningDump pass for IR writing. + LLVMParam, // Compilation flags. Tuned individually for each module. + Loop, // Code region for loops. + MachineBasicBlock, // Instruction scheduling code region. + Other, // Pass ordering code region. + ProgramParam, // Compilation flags. Tuned collectively for program. + Switch, // Tuning MinJumpTableEntries parameter for switch inst. + Empty, // Empty CodeRegion. + Invalid // Invalid CodeRegion. +}; + +enum HotnessType { + Unknown, + Cold, + Hot, +}; + +/// DynamicOptions represent a map: Arg -> DynamicConfigs. +/// Where Arg is a tuning parameter on the associated CodeRegion. +/// And DynamicConfigs is the possible tuning values associated with Arg. +typedef std::map<std::string, std::vector<unsigned int>> DynamicOptions; + +/// This class represents a region in source code including +/// its name, function name, type, debug location, and associated pass name. +class CodeRegion { + +public: + // Default constructor + CodeRegion(const CodeRegionType Type = CodeRegionType::Other); + ~CodeRegion() = default; + // Concrete constructors + CodeRegion(const std::string &Name, const std::string &FuncName, + const CodeRegionType &Type, const llvm::DebugLoc &DL, + const DynamicOptions DO = {}); + CodeRegion(const std::string &Name, const std::string &FuncName, + const CodeRegionType &Type, + const SourceLocation &Location = SourceLocation(), + const DynamicOptions DO = {}); + CodeRegion(const std::string &Name, const std::string &FuncName, + const std::string &PassName, const CodeRegionType &Type, + const SourceLocation &Location = SourceLocation(), + const unsigned int Invocation = 0); + + bool operator==(const CodeRegion &CR) const; + inline bool operator!=(const CodeRegion &CR) const { return !(*this == CR); }; + + explicit operator bool() const { + return !(Name.empty() && FuncName.empty() && PassName.empty()); + } + + static std::string getTypeAsString(CodeRegionType CRType); + static std::string getHotnessAsString(HotnessType Hotness); + const std::string &getName() const { return Name; } + const std::string &getFuncName() const { return FuncName; } + const CodeRegionType &getType() const { return Type; } + const std::string &getFileName() const { return Location.SourceFilePath; } + const std::string &getTypeAsString() const { return StringType; } + const SourceLocation &getSourceLoc() const { return Location; } + const std::string &getPassName() const { return PassName; } + unsigned getSize() const { return Size; }; + void setPassName(const std::string &NewPassName); + void setSize(unsigned Size) { this->Size = Size; }; + void setHotness(HotnessType NewHotness) const { this->Hotness = NewHotness; } + HotnessType getHotness() const { return this->Hotness; } + std::string getHotnessAsString() const { return getHotnessAsString(Hotness); } + bool isCold() const { return this->Hotness == Cold; } + bool isHot() const { return this->Hotness == Hot; } + std::uint64_t getHash() const { return this->Hash; } + void setHash(std::uint64_t Hash) { this->Hash = Hash; } + DynamicOptions getAutoTunerOptions() const { return this->AutoTunerOptions; } + void setInvocation(unsigned int Invocation) { this->Invocation = Invocation; } + unsigned int getInvocation() const { return this->Invocation; } + + /// Add dynamic config options with Code Region for AutoTuner to tune instead + /// of using static config options. + void addAutoTunerOptions(const std::string ParamName, + std::vector<unsigned int> Options) const { + this->AutoTunerOptions.insert( + std::pair<std::string, std::vector<unsigned int>>(ParamName, Options)); + } + static CodeRegion getInvalidInstance(); + static CodeRegion getEmptyInstance(); + void setBaselineConfig(std::map<std::string, std::string> Value) const { + this->BaselineConfig = Value; + }; + std::map<std::string, std::string> getBaselineConfig() const { + return this->BaselineConfig; + } + +private: + /// Name of the code region. + /// For most of cases it's set to the name of a header basic block. + std::string Name; + /// Function name of this code region if any. + std::string FuncName; + /// Name of the pass which this code region is associated. + std::string PassName; + /// Type of this code region. Options are other, function, loop, + /// and machine basic block. + CodeRegionType Type; + /// Source Location. + SourceLocation Location; + std::string StringType; + /// Structural hash for the CodeRegion. + std::uint64_t Hash = 0; + /// Configs values passed to AutoTuner for dynamic setting of search space + /// for code regions. + mutable DynamicOptions AutoTunerOptions; + /// Configuration values passed to AutoTuner for generating the same binary + /// as the baseline. + mutable std::map<std::string, std::string> BaselineConfig; + + /// Record the order of invocation of an optimization pass during the whole + /// compilation pipeline. It is used to differentiate multiple invocations of + /// a same optimization pass. + /// Currently, Loop Unroll pass is invoked twice during the compilation + /// pipeline. 'Invocation' helps to relate a code region with the invocation + /// of Loop Unroll pass where the code region is generated. + mutable unsigned int Invocation; + + /// Size of this code region. Usually it refers to the number of instructions + /// but could be different based on implementations. + unsigned Size = 0; + mutable HotnessType Hotness = Unknown; + + /// A boolean flag to record if a CR is initialized or not. + /// It should only be set to true by initContainer(). + /// We only add initialized CR to TuningOpps. + bool Initialized = false; + + friend class AutoTuningEngine; +}; + +/// This class is an interface for classes representing code regions in LLVM +/// (eg. Loop, Function and MachineBasicBlock) to inherit +/// so that auto-tuning can be enabled on them. +/// A Container must contain a CodeRegion. +class Container { + +public: + Container() {} + virtual ~Container(){}; + + /// Abstract method for derived classes to overwrite + virtual void initCodeRegion() = 0; + virtual uint64_t computeStructuralHash() = 0; + + /// Get the Container's CodeRegion. + const CodeRegion &getCodeRegion() const; + /// Set the Container's CodeRegion. + void setCodeRegion(const CodeRegion &NewCR); + /// This method is to look up the value of a parameter that corresponds to an + /// Container. The parameter being looked up is stored in a ParameterManager. + template <typename T> + bool lookUpParams(const std::string &ParamsName, T &Value) const; + + /// Check if the code region is being tuned by config file. + bool requiresIRDump(bool IsFunctionIR = false) const; + +private: + CodeRegion CR; + friend class AutoTuningEngine; +}; +} // end namespace autotuning + +namespace std { +template <> +// Implement hash for CodeRegion data type in std namespace. Only using common +// attributes (with and without using 'OmitAutotuningMetadata' flag) of +// CodeRegion. Remaining attributes are compared in overloaded == function. +struct hash<autotuning::CodeRegion> { + std::size_t operator()(const autotuning::CodeRegion &CR) const { + return llvm::hash_combine(CR.getPassName(), CR.getType()); + } +}; +} // namespace std + +namespace llvm { +// Forward Decleration. +class CallBase; + +typedef autotuning::CodeRegion CodeRegion; +template <> struct DenseMapInfo<CodeRegion> { + static bool isEqual(const CodeRegion &LHS, const CodeRegion &RHS) { + return LHS == RHS; + } + static inline CodeRegion getEmptyKey() { + return autotuning::CodeRegion::getEmptyInstance(); + } + static inline CodeRegion getTombstoneKey() { + return autotuning::CodeRegion::getInvalidInstance(); + } + // Implement hash for CodeRegion data type in llvm namespace. Only using + // common attributes (with and without using 'OmitAutotuningMetadata' flag) + // of CodeRegion. Remaining attributes are compared in overloaded == + // function. + static unsigned getHashValue(const CodeRegion &CR) { + return llvm::hash_combine(CR.getPassName(), CR.getType()); + } +}; +} // namespace llvm + +namespace autotuning { +using namespace llvm; +typedef std::unordered_map<CodeRegion, ParameterManager> LookUpTable; +typedef llvm::SetVector<CodeRegion> CodeRegions; + +/// Structure to store information of CallSite code regions which is used to +/// get a different SourceLocation for multiple callsites (same callee) in a +/// function when these callsites have same SourceLocation due to inlining. +struct CallSiteLocation { + llvm::CallBase *CB; + llvm::Function *Caller; + llvm::Function *Callee; + SourceLocation SrcLoc; +}; + +class AutoTuningEngine { +public: + AutoTuningEngine() { Enabled = false; } + ~AutoTuningEngine() {} + + /// Initialize the Container for auto-tuning. + void initContainer(Container *Container, const std::string &PassName, + const StringRef FuncName = "", bool AddOpportunity = true, + unsigned int Invocation = 0); + + /// Initialize auto-tuning. This method should only be called in the main + /// function. + /// \return Error::success() on success or the related Error otherwise. + llvm::Error init(const std::string &ModuleID); + + /// Finalize auto-tuning. This method should only be called in the main + /// function. + /// \return Error::success() on success or the related Error otherwise. + llvm::Error finalize(); + + /// Return the number of tuning configuration used for this compilation. + llvm::Expected<int> getConfigNumber(); + + void enable() { Enabled = true; } + void disable() { Enabled = false; } + bool isEnabled() const { return Enabled; } + bool isMLEnabled() const { return MLEnabled; } + bool isDumpEnabled() const { return DumpEnabled; } + bool isGenerateOutput() const { return GenerateOutput; } + bool isParseInput() const { return ParseInput; } + bool isTuningAllowedForType(CodeRegionType CRType) const { + return (CodeRegionFilterTypes.count(CRType) > 0); + } + bool isThinLTOTuning() const; + + /// Convert a pass-name to CodeRegionType. + CodeRegionType convertPassToType(std::string Pass); + + /// First sets BaselineConfig value for the CR then + /// add a tuning opportunity into the TuningOpps list. + void addOpportunity(const CodeRegion &OppCR, + std::map<std::string, std::string> BaselineConfig = {}); + bool hasOpportunities() const { return TuningOpps.empty(); } + + bool shouldRunOptPass(std::string FileName, std::string Pass); + + /// Insert all of the callsites of a function in CallSiteLocs vector. + void insertCallSiteLoc(CallSiteLocation Loc); + + /// Update CallSiteLocs vector with new callsites (if any) which get available + /// due to inlining. + void updateCallSiteLocs(llvm::CallBase *CB, llvm::CallBase *Ptr, + llvm::Function *F, unsigned int Line); + + /// Clean up the CallSiteLocs vector by keeping the callsite if there are + /// multiple calls to same callee. This cleaning will be perform before + /// inlining any callsite. + void cleanCallSiteLoc(); + + /// clear the CallSiteLocs vector. + void clearCallSiteLocs(); + + /// Return the SourceLocation::SourceLine (if available). + std::optional<unsigned int> getCallSiteLoc(llvm::CallBase *CB); + + template <typename T> + bool lookUpGlobalParams(const std::string &ParamsName, T &Value) const; + /// A map storing llvm parameters. + std::unordered_map<std::string, std::string> LLVMParams; + /// A map storing program parameters. + std::unordered_map<std::string, std::string> ProgramParams; + +private: + std::string ModuleID; + /// This boolean indicates if the auto-tuning mode is enabled. + /// It will be set to true if the any of the following command line options + /// (auto-tuning-input, auto-tuning-result and auto-tuning-opp) is specified. + bool Enabled; + /// This boolean indicates if the ML guidance feature is enabled in + /// Autotuner. It will be set to true if -fautotune-rank is specified. + bool MLEnabled; + /// This boolean indicates if the IR dumping is enabled or not. IR dumping + /// is enabled for ML guidance feature. It can also be enabled with command + /// line compiler flag 'enable-autotuning-dump'. + bool DumpEnabled = false; + /// This boolean indicates if compiler is parsing/using 'config.yaml' file + /// generated by AutoTuner and use the configuration values instead of + /// determining with compiler heuristic. + bool ParseInput; + /// This boolean indicates if compiler is creating/generating opportunity + /// file(s) which will be consumed by AutoTuner to create the search space. + bool GenerateOutput; + /// A map of filename and set of optimization passes; an optimization pass + /// will be added to this set if a CodeRegion belongs to the optimization + /// pass. + std::unordered_map<std::string, std::unordered_set<std::string>> OppPassList; + + /// Vector to store all of the duplicate calls in a function and the calls + /// which get available due to inlining. + SmallVector<CallSiteLocation, 10> CallSiteLocs; + + /// A set to store the code region types that will be tuned in current + /// autotuning flow. This will be populated with code region types based on + /// 'auto-tuning-type-filter' for -fautotune-generate and the types will be + /// extracted from config.yaml in case of -fautotune. + /// This set is used to apply type-based filtering prior to creating/ + /// initializing a code region. + std::unordered_set<CodeRegionType> CodeRegionFilterTypes; + + // A statically initialized map used to convert 'pass-name' to + // 'CodeRegionType'. + std::unordered_map<std::string, CodeRegionType> PTTMap; + + /// A map of CodeRegion and ParameterManager to keep track of all the + /// parameters of code regions loaded from input config file. + LookUpTable ParamTable; + /// A list of CodeRegions as tuning opportunities + CodeRegions TuningOpps; + /// A ParameterManager for global parameters. + ParameterManager GlobalParams; + + /// Apply filters for CodeRegions. + void applyOppFilters(CodeRegions &CRs); + + /// Apply function name filter for CodeRegions. + bool applyFunctionFilter(std::string FuncName); + + friend class Container; + friend class CodeRegion; + friend class AutoTuningRemarkManager; +}; + +extern class AutoTuningEngine Engine; // AutoTuning Engine + +} // end namespace autotuning + +#endif /* LLVM_AUTOTUNER_AUTOTUNING_H_ */ +#endif diff --git a/llvm/include/llvm/AutoTuner/AutoTuningRemarkManager.h b/llvm/include/llvm/AutoTuner/AutoTuningRemarkManager.h new file mode 100644 index 000000000000..153a2c6246ad --- /dev/null +++ b/llvm/include/llvm/AutoTuner/AutoTuningRemarkManager.h @@ -0,0 +1,43 @@ +#if defined(ENABLE_AUTOTUNER) +//===- llvm/AutoTuner/AutoTuningRemarkManager.h - Remark Manager ----------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file declares the main interface for inputting and outputting +// remarks for AutoTuning. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_AUTOTUNINGREMARKMANAGER_H +#define LLVM_AUTOTUNINGREMARKMANAGER_H + +#include "llvm/AutoTuner/AutoTuning.h" +#include "llvm/Remarks/RemarkStreamer.h" +#include "llvm/Support/Error.h" +#include <string> +#include <unordered_map> +#include <vector> + +namespace autotuning { +class AutoTuningRemarkManager { +public: + /// Read a list of parameters from input file. + /// Return true on success and false on failure. + static llvm::Error read(autotuning::AutoTuningEngine &E, + const std::string &InputName, + const std::string &RemarksFormat); + + /// Dump a list of CodeRegions as tuning opportunities into a file. + /// Return true on success and false on failure. + static llvm::Error dump(const autotuning::AutoTuningEngine &E, + const std::string &DirPath, + const std::string &RemarksFormat, + const std::string &RemarksPasses); +}; +} // namespace autotuning +#endif // LLVM_AUTOTUNINGREMARKMANAGER_H +#endif diff --git a/llvm/include/llvm/AutoTuner/AutoTuningRemarkStreamer.h b/llvm/include/llvm/AutoTuner/AutoTuningRemarkStreamer.h new file mode 100644 index 000000000000..0096139b12e9 --- /dev/null +++ b/llvm/include/llvm/AutoTuner/AutoTuningRemarkStreamer.h @@ -0,0 +1,47 @@ +#if defined(ENABLE_AUTOTUNER) +// ===------------ llvm/AutoTuner/AutoTuningRemarkStreamer.h --------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +// Copyright (C) 2017-2022, Huawei Technologies Co., Ltd. All rights reserved. +// +// ===---------------------------------------------------------------------===// +// +// This file contains the implementation of the conversion between AutoTuner +// CodeRegions and serializable remarks::Remark objects. +// +// ===---------------------------------------------------------------------===// + +#ifndef LLVM_AUTOTUNER_AUTOTUNINGREMARKSTREAMER_H +#define LLVM_AUTOTUNER_AUTOTUNINGREMARKSTREAMER_H + +#include "llvm/AutoTuner/AutoTuning.h" +#include "llvm/Remarks/Remark.h" +#include "llvm/Remarks/RemarkStreamer.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/ToolOutputFile.h" +#include <memory> +#include <string> + +namespace llvm { +/// Streamer for AutoTuner remarks which has logic for dealing with CodeRegions. +class AutoTuningRemarkStreamer { + remarks::RemarkStreamer &RS; + /// Convert CodeRegion into remark objects. + remarks::Remark toRemark(const autotuning::CodeRegion &CR); + +public: + AutoTuningRemarkStreamer(remarks::RemarkStreamer &RS) : RS(RS) {} + /// Emit a CodeRegion through the streamer. + void emit(const autotuning::CodeRegion &CR); + /// Set a pass filter based on a regex \p Filter. + /// Returns an error if the regex is invalid. + Error setFilter(StringRef Filter); +}; +} // end namespace llvm + +#endif // LLVM_AUTOTUNER_AUTOTUNINGREMARKSTREAMER_H +#endif diff --git a/llvm/include/llvm/CodeGen/MachineBasicBlock.h b/llvm/include/llvm/CodeGen/MachineBasicBlock.h index 52388692c196..95ac9acf4e5e 100644 --- a/llvm/include/llvm/CodeGen/MachineBasicBlock.h +++ b/llvm/include/llvm/CodeGen/MachineBasicBlock.h @@ -27,6 +27,9 @@ #include <iterator> #include <string> #include <vector> +#if defined(ENABLE_AUTOTUNER) +#include "llvm/AutoTuner/AutoTuning.h" +#endif namespace llvm { @@ -91,9 +94,19 @@ public: void deleteNode(MachineInstr *MI); }; +#if defined(ENABLE_AUTOTUNER) +class MachineBasicBlock + : public ilist_node_with_parent<MachineBasicBlock, MachineFunction>, + public autotuning::Container { +#else class MachineBasicBlock : public ilist_node_with_parent<MachineBasicBlock, MachineFunction> { +#endif public: +#if defined(ENABLE_AUTOTUNER) + void initCodeRegion() override; + uint64_t computeStructuralHash() override; +#endif /// Pair of physical register and lane mask. /// This is not simply a std::pair typedef because the members should be named /// clearly as they both have an integer type. diff --git a/llvm/include/llvm/IR/Function.h b/llvm/include/llvm/IR/Function.h index 93cf0d27e9a7..c0db48ae1789 100644 --- a/llvm/include/llvm/IR/Function.h +++ b/llvm/include/llvm/IR/Function.h @@ -37,6 +37,9 @@ #include <cstdint> #include <memory> #include <string> +#if defined(ENABLE_AUTOTUNER) +#include "llvm/AutoTuner/AutoTuning.h" +#endif namespace llvm { @@ -56,6 +59,24 @@ class User; class BranchProbabilityInfo; class BlockFrequencyInfo; +#if defined(ENABLE_AUTOTUNER) +class AutoTuningEnabledFunction : public autotuning::Container { +public: + AutoTuningEnabledFunction() = delete; + void initCodeRegion() override; + void setHot() { this->Hotness = autotuning::Hot; } + void setCold() { this->Hotness = autotuning::Cold; } + autotuning::HotnessType getHotness() const { return this->Hotness; } + uint64_t computeStructuralHash() override; + +private: + AutoTuningEnabledFunction(Function *F) { Func = F; }; + Function *Func; + autotuning::HotnessType Hotness = autotuning::Unknown; + friend class Function; +}; +#endif + class LLVM_EXTERNAL_VISIBILITY Function : public GlobalObject, public ilist_node<Function> { public: @@ -68,6 +89,13 @@ public: using arg_iterator = Argument *; using const_arg_iterator = const Argument *; +#if defined(ENABLE_AUTOTUNER) + // There is one-to-one correspondence between ATEFunction and the current + // Function object to avoid messing up the LLVM User and owned Use classes' + // memory layout. + AutoTuningEnabledFunction ATEFunction = AutoTuningEnabledFunction(this); +#endif + private: // Important things that make up a function! BasicBlockListType BasicBlocks; ///< The basic blocks @@ -128,6 +156,11 @@ public: void operator=(const Function&) = delete; ~Function(); +#if defined(ENABLE_AUTOTUNER) + // Return the auto-tuning enabled version of this Function object. + AutoTuningEnabledFunction &getATEFunction() { return ATEFunction; } +#endif + // This is here to help easily convert from FunctionT * (Function * or // MachineFunction *) in BlockFrequencyInfoImpl to Function * by calling // FunctionT->getFunction(). @@ -840,7 +873,11 @@ public: /// AssemblyAnnotationWriter. void print(raw_ostream &OS, AssemblyAnnotationWriter *AAW = nullptr, bool ShouldPreserveUseListOrder = false, +#if defined(ENABLE_AUTOTUNER) + bool IsForDebug = false, bool PrintCompleteIR = false) const; +#else bool IsForDebug = false) const; +#endif /// viewCFG - This function is meant for use from the debugger. You can just /// say 'call F->viewCFG()' and a ghostview window should pop up from the diff --git a/llvm/include/llvm/IR/InstrTypes.h b/llvm/include/llvm/IR/InstrTypes.h index 6095b0a1be69..dcc9bbee30fa 100644 --- a/llvm/include/llvm/IR/InstrTypes.h +++ b/llvm/include/llvm/IR/InstrTypes.h @@ -1169,6 +1169,23 @@ public: using OperandBundleDef = OperandBundleDefT<Value *>; using ConstOperandBundleDef = OperandBundleDefT<const Value *>; +#if defined(ENABLE_AUTOTUNER) +//===----------------------------------------------------------------------===// +// AutoTuningEnabledCallSite Class +//===----------------------------------------------------------------------===// +class CallBase; +class AutoTuningEnabledCallSite : public autotuning::Container { +public: + AutoTuningEnabledCallSite() = delete; + void initCodeRegion() override; + uint64_t computeStructuralHash() override; + AutoTuningEnabledCallSite(CallBase *CallBase) { CB = CallBase; } + +private: + CallBase *CB; +}; +#endif + //===----------------------------------------------------------------------===// // CallBase Class //===----------------------------------------------------------------------===// @@ -1229,6 +1246,13 @@ protected: unsigned getNumSubclassExtraOperandsDynamic() const; public: +#if defined(ENABLE_AUTOTUNER) + // There is one-to-one correspondence between ATECallSite and CallBase class + // to enable auto-tuning. + std::unique_ptr<AutoTuningEnabledCallSite> ATECallSite = + std::make_unique<AutoTuningEnabledCallSite>(this); +#endif + using Instruction::getContext; /// Create a clone of \p CB with a different set of operand bundles and diff --git a/llvm/include/llvm/IR/Instructions.h b/llvm/include/llvm/IR/Instructions.h index 8d60384e1a32..9d638af6eeef 100644 --- a/llvm/include/llvm/IR/Instructions.h +++ b/llvm/include/llvm/IR/Instructions.h @@ -3287,6 +3287,23 @@ struct OperandTraits<BranchInst> : public VariadicOperandTraits<BranchInst, 1> { DEFINE_TRANSPARENT_OPERAND_ACCESSORS(BranchInst, Value) +#if defined(ENABLE_AUTOTUNER) +//===----------------------------------------------------------------------===// +// AutoTuningEnabledSwitchInst Class +//===----------------------------------------------------------------------===// +class SwitchInst; + +class AutoTuningEnabledSwitchInst : public autotuning::Container { +public: + AutoTuningEnabledSwitchInst() = delete; + void initCodeRegion() override; + uint64_t computeStructuralHash() override; + AutoTuningEnabledSwitchInst(SwitchInst *SwitchInst) { SI = SwitchInst; } + +private: + SwitchInst *SI; +}; +#endif //===----------------------------------------------------------------------===// // SwitchInst Class //===----------------------------------------------------------------------===// @@ -3332,6 +3349,13 @@ protected: public: void operator delete(void *Ptr) { User::operator delete(Ptr); } +#if defined(ENABLE_AUTOTUNER) + // There is one-to-one correspondence between ATESwitchInst and + // SwitchInst class to enable AutoTuner. + std::unique_ptr<AutoTuningEnabledSwitchInst> ATESwitchInst = + std::make_unique<AutoTuningEnabledSwitchInst>(this); +#endif + // -2 static const unsigned DefaultPseudoIndex = static_cast<unsigned>(~0L-1); diff --git a/llvm/include/llvm/IR/Module.h b/llvm/include/llvm/IR/Module.h index 670a40b28eab..904a450a1888 100644 --- a/llvm/include/llvm/IR/Module.h +++ b/llvm/include/llvm/IR/Module.h @@ -38,6 +38,9 @@ #include <optional> #include <string> #include <vector> +#if defined(ENABLE_AUTOTUNER) +#include "llvm/AutoTuner/AutoTuning.h" +#endif namespace llvm { diff --git a/llvm/include/llvm/IR/StructuralHash.h b/llvm/include/llvm/IR/StructuralHash.h index 1bdeb85afa3c..c0bcc8153eb8 100644 --- a/llvm/include/llvm/IR/StructuralHash.h +++ b/llvm/include/llvm/IR/StructuralHash.h @@ -15,6 +15,9 @@ #define LLVM_IR_STRUCTURALHASH_H #include <cstdint> +#if defined(ENABLE_AUTOTUNER) +#include <vector> +#endif namespace llvm { @@ -24,6 +27,17 @@ class Module; uint64_t StructuralHash(const Function &F); uint64_t StructuralHash(const Module &M); +#if defined(ENABLE_AUTOTUNER) +class MachineBasicBlock; +class BasicBlock; +class CallBase; +class SwitchInst; + +uint64_t StructuralHash(const std::vector<BasicBlock *> BBs); +uint64_t StructuralHash(const MachineBasicBlock &MBB); +uint64_t StructuralHash(const CallBase &CB); +uint64_t StructuralHash(const SwitchInst &SI); +#endif } // end namespace llvm #endif diff --git a/llvm/include/llvm/InitializePasses.h b/llvm/include/llvm/InitializePasses.h index c6fee47b464b..80bec2d82e24 100644 --- a/llvm/include/llvm/InitializePasses.h +++ b/llvm/include/llvm/InitializePasses.h @@ -340,6 +340,11 @@ void initializeWasmEHPreparePass(PassRegistry&); void initializeWinEHPreparePass(PassRegistry&); void initializeWriteBitcodePassPass(PassRegistry&); void initializeXRayInstrumentationPass(PassRegistry&); +#if defined(ENABLE_AUTOTUNER) +void initializeAutotuningDumpLegacyPass(PassRegistry &); +void initializeAutoTuningCompileFunctionLegacyPass(PassRegistry &); +void initializeAutoTuningCompileModuleLegacyPass(PassRegistry &); +#endif } // end namespace llvm diff --git a/llvm/include/llvm/LinkAllPasses.h b/llvm/include/llvm/LinkAllPasses.h index 7420ea64e954..3a8ecb1399f1 100644 --- a/llvm/include/llvm/LinkAllPasses.h +++ b/llvm/include/llvm/LinkAllPasses.h @@ -54,6 +54,9 @@ #include "llvm/Transforms/Utils/UnifyFunctionExitNodes.h" #include "llvm/Transforms/Vectorize.h" #include <cstdlib> +#if defined(ENABLE_AUTOTUNER) +#include "llvm/Transforms/Scalar/AutoTuningCompile.h" +#endif namespace { struct ForcePassLinking { @@ -93,6 +96,11 @@ namespace { (void) llvm::createInstSimplifyLegacyPass(); (void) llvm::createInstructionCombiningPass(); (void) llvm::createJMCInstrumenterPass(); +#if defined(ENABLE_AUTOTUNER) + (void) llvm::createAutotuningDumpPass(); + (void) llvm::createAutoTuningCompileFunctionLegacyPass(); + (void) llvm::createAutoTuningCompileModuleLegacyPass(); +#endif (void) llvm::createKCFIPass(); (void) llvm::createLCSSAPass(); (void) llvm::createLICMPass(); diff --git a/llvm/include/llvm/Remarks/Remark.h b/llvm/include/llvm/Remarks/Remark.h index a66f7ed73f2f..3bcc0c710498 100644 --- a/llvm/include/llvm/Remarks/Remark.h +++ b/llvm/include/llvm/Remarks/Remark.h @@ -20,6 +20,10 @@ #include "llvm/Support/raw_ostream.h" #include <optional> #include <string> +#if defined(ENABLE_AUTOTUNER) +#include <map> +#include <vector> +#endif namespace llvm { namespace remarks { @@ -47,6 +51,9 @@ struct Argument { StringRef Key; // FIXME: We might want to be able to store other types than strings here. StringRef Val; +#if defined(ENABLE_AUTOTUNER) + std::optional<std::vector<StringRef>> VectorVal; +#endif // If set, the debug location corresponding to the value. std::optional<RemarkLocation> Loc; @@ -65,6 +72,9 @@ enum class Type { Analysis, AnalysisFPCommute, AnalysisAliasing, +#if defined(ENABLE_AUTOTUNER) + AutoTuning, +#endif Failure, First = Unknown, Last = Failure @@ -105,6 +115,28 @@ struct Remark { /// Mangled name of the function that triggers the emssion of this remark. StringRef FunctionName; +#if defined(ENABLE_AUTOTUNER) + /// Type of the code region that the remark is associated with. + std::optional<StringRef> CodeRegionType; + + /// Configuration value for generating the same baseline binary associated + /// with this remark. + std::optional<std::map<std::string, std::string>> BaselineConfig; + + /// Hash of the code region that the remark is associated with. + std::optional<uint64_t> CodeRegionHash; + + /// Configs values passed to AutoTuner for dynamic setting of search space + /// for code regions. + std::optional<std::map<std::string, std::vector<unsigned int>>> + AutoTunerOptions; + + /// Invocation/Registering of Optimization Pass in the compilation pipeline. + /// It is used to differentiate between different invocations of same + /// optimization pass. + std::optional<unsigned int> Invocation; +#endif + /// The location in the source file of the remark. std::optional<RemarkLocation> Loc; diff --git a/llvm/include/llvm/Support/CommandLine.h b/llvm/include/llvm/Support/CommandLine.h index d2079fead668..c59dba2749f0 100644 --- a/llvm/include/llvm/Support/CommandLine.h +++ b/llvm/include/llvm/Support/CommandLine.h @@ -40,6 +40,9 @@ #include <type_traits> #include <vector> +#if defined(ENABLE_AUTOTUNER) +#include <unordered_map> +#endif namespace llvm { namespace vfs { @@ -72,6 +75,20 @@ bool ParseCommandLineOptions(int argc, const char *const *argv, const char *EnvVar = nullptr, bool LongOptionsUseDoubleDash = false); +#if defined(ENABLE_AUTOTUNER) +// It will parse AutoTuner options (LLVMParams & ProgramParams) and add them as +// command line flags for the compilation process. These options are suggested +// by AutoTuner during tuning flow. This function will always be called after +// AutoTuner initialization. +// Returns true on success. Otherwise, this will print the error message to +// stderr and exit. +bool ParseAutoTunerOptions( + std::unordered_map<std::string, std::string> LLVMParams, + std::unordered_map<std::string, std::string> ProgramParams, + StringRef Overview = "", raw_ostream *Errs = nullptr, + const char *EnvVar = nullptr, bool LongOptionsUseDoubleDash = false); +#endif + // Function pointer type for printing version information. using VersionPrinterTy = std::function<void(raw_ostream &)>; diff --git a/llvm/include/llvm/Transforms/Scalar.h b/llvm/include/llvm/Transforms/Scalar.h index aaba710cfde6..e69beeade947 100644 --- a/llvm/include/llvm/Transforms/Scalar.h +++ b/llvm/include/llvm/Transforms/Scalar.h @@ -16,6 +16,10 @@ #include "llvm/Transforms/Utils/SimplifyCFGOptions.h" #include <functional> +#if defined(ENABLE_AUTOTUNER) +#include "llvm/Pass.h" +#include <string> +#endif namespace llvm { @@ -299,6 +303,19 @@ Pass *createLoopSimplifyCFGPass(); // FunctionPass *createInstSimplifyLegacyPass(); +#if defined(ENABLE_AUTOTUNER) +//===--------------------------------------------------------------------===// +// +// createAutotuningCompilePass - It writes IR files with -fautotune-generate +// for autotuning flow. It also enables/disables the execution of optimization +// passes in subsequent compilations (with -fautotune) based on autotuning +// methodology and available opportunities. +// +FunctionPass * +createAutoTuningCompileFunctionLegacyPass(std::string Pass = "unknown"); +ModulePass * +createAutoTuningCompileModuleLegacyPass(std::string Pass = "unknown"); +#endif //===----------------------------------------------------------------------===// // diff --git a/llvm/include/llvm/Transforms/Scalar/AutoTuningCompile.h b/llvm/include/llvm/Transforms/Scalar/AutoTuningCompile.h new file mode 100644 index 000000000000..2cbb48f336ef --- /dev/null +++ b/llvm/include/llvm/Transforms/Scalar/AutoTuningCompile.h @@ -0,0 +1,170 @@ +#if defined(ENABLE_AUTOTUNER) +//===---------------- AutoTuningCompile.h - Auto-Tuning -------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +// Copyright (C) 2017-2022, Huawei Technologies Co., Ltd. All rights reserved. +// +//===----------------------------------------------------------------------===// +// +/// \file +/// This file declares the interface for AutoTuning Incremental Compilation. +/// Incremental compilation requires two passes 1) Module Pass and 2) Function +/// Pass for legacy pass manager. It requires an additional Loop Pass for new +/// pass manager. +/// AutoTuningOptPassGate class is also defined here which is used to enable/ +/// disable the execution of optimization passes for the compilation pipeline. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_AUTOTUNER_AUTOTUNING_COMPILE_H_ +#define LLVM_AUTOTUNER_AUTOTUNING_COMPILE_H_ + +#include "llvm/Analysis/LoopAnalysisManager.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/LoopPass.h" +#include "llvm/IR/OptBisect.h" +#include "llvm/IR/PassManager.h" +#include "llvm/Pass.h" +#include "llvm/Transforms/Scalar/LoopPassManager.h" + +namespace llvm { + +class Pass; + +// Skips or runs optimization passes. +class AutoTuningOptPassGate : public OptPassGate { +public: + explicit AutoTuningOptPassGate(bool Skip = false) : Skip(Skip) {} + + bool shouldRunPass(const StringRef PassName, + StringRef IRDescription) override; + bool isEnabled() const override { return true; } + bool checkPass(const StringRef PassName, const StringRef TargetDesc); + void setSkip(bool Skip) { this->Skip = Skip; } + bool getSkip() const { return Skip; } + +private: + bool Skip; +}; + +// Returns a static AutoTuningOptPassGate object which will be used to register +// CallBack for OptBisect instrumentation. +// It will also be used by AutoTuningCompile passes to enable/disable +// optimization passes. +AutoTuningOptPassGate &getAutoTuningOptPassGate(); + +class AutoTuningCompileModule { +public: + explicit AutoTuningCompileModule(std::string Pass = "unknown"); + bool run(Module &M); + // Write IR files for each module to be re-used in subsequent compilations + // for autotuning cycles. It only works with -fautotune-generate. + void writeIRFiles(Module &M) const; + // Enable/Disable execution of optimization passes in subsequent compilations + // based on autotuning methodology and available opportunities. It Only works + // with -fautotune + bool modifyCompilationPipeline(Module &M) const; + + static void setSkipCompilation(bool Option) { SkipCompilation = Option; } + static bool getSkipCompilation() { return SkipCompilation; } + +private: + static bool SkipCompilation; + std::string Pass = ""; +}; + +class AutoTuningCompileModuleLegacy : public ModulePass { +public: + static char ID; + explicit AutoTuningCompileModuleLegacy(std::string Pass = "unknown"); + bool runOnModule(Module &M) override; + StringRef getPassName() const override; + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesAll(); + } + +private: + std::string Pass = ""; +}; + +class AutoTuningCompileModulePass + : public PassInfoMixin<AutoTuningCompileModulePass> { +public: + explicit AutoTuningCompileModulePass(std::string Pass = "unknown") + : Pass(Pass){}; + PreservedAnalyses run(Module &M, ModuleAnalysisManager &); + +private: + std::string Pass = ""; +}; + +class AutoTuningCompileFunction { +public: + explicit AutoTuningCompileFunction(std::string Pass = "unknown"); + bool run(Function &F); + // Write IR files for each module to be re-used in subsequent compilations + // for autotuning cycles. It only works with -fautotune-generate. + void writeIRFiles(Module &M); + // Enable/Disable execution of optimization passes in subsequent compilations + // based on autotuning methodology and available opportunities. It Only works + // with -fautotune + bool modifyCompilationPipeline(Function &F); + +private: + // A module may have multiple functions; decision to enable/disable + // execution of an optimization pass will be made for the first function and + // will be used for all of the functions in the module. + // 'SkipDecision' will be set once the decision is made for a specific 'Pass'. + bool SkipDecision = false; + + // A module may have multiple functions; IR file will be written once for the + // entire module for a specific 'Pass'. + bool IsModuleWritten = false; + std::string Pass = ""; +}; + +class AutoTuningCompileFunctionLegacy : public FunctionPass { +public: + static char ID; + explicit AutoTuningCompileFunctionLegacy(std::string Pass = "unknown"); + bool runOnFunction(Function &F) override; + StringRef getPassName() const override; + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesAll(); + } + +private: + std::string Pass = ""; +}; + +class AutoTuningCompileFunctionPass + : public PassInfoMixin<AutoTuningCompileFunctionPass> { +public: + explicit AutoTuningCompileFunctionPass(std::string Pass = "unknown") + : Pass(Pass){}; + PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); + +private: + std::string Pass = ""; +}; + +class AutoTuningCompileLoopPass + : public PassInfoMixin<AutoTuningCompileLoopPass> { +public: + explicit AutoTuningCompileLoopPass(std::string Pass = "unknown") + : Pass(Pass){}; + PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM, + LoopStandardAnalysisResults &AR, LPMUpdater &U); + +private: + std::string Pass = ""; +}; + +} // end namespace llvm + +#endif /* LLVM_AUTOTUNER_AUTOTUNING_COMPILE_H_ */ +#endif diff --git a/llvm/include/llvm/Transforms/Utils/UnrollLoop.h b/llvm/include/llvm/Transforms/Utils/UnrollLoop.h index 4f3010965b59..e1cccf417898 100644 --- a/llvm/include/llvm/Transforms/Utils/UnrollLoop.h +++ b/llvm/include/llvm/Transforms/Utils/UnrollLoop.h @@ -108,7 +108,11 @@ bool computeUnrollCount(Loop *L, const TargetTransformInfo &TTI, unsigned TripMultiple, unsigned LoopSize, TargetTransformInfo::UnrollingPreferences &UP, TargetTransformInfo::PeelingPreferences &PP, +#if defined(ENABLE_AUTOTUNER) + bool &UseUpperBound, unsigned int Invocation = 0); +#else bool &UseUpperBound); +#endif void simplifyLoopAfterUnroll(Loop *L, bool SimplifyIVs, LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT, diff --git a/llvm/lib/Analysis/AutotuningDump.cpp b/llvm/lib/Analysis/AutotuningDump.cpp new file mode 100644 index 000000000000..81b2bbead70e --- /dev/null +++ b/llvm/lib/Analysis/AutotuningDump.cpp @@ -0,0 +1,265 @@ +#if defined(ENABLE_AUTOTUNER) +// ===-- AutotuningDump.cpp - Auto-Tuning---------------------------------===// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +// ===--------------------------------------------------------------------===// +// +// This file contains pass collecting IR of tuned regions and storing them into +// predetrmined locations, to be used later by autotuning ML guidance +// +// ===--------------------------------------------------------------------===// +#include "llvm/Analysis/AutotuningDump.h" +#include "llvm/Analysis/Passes.h" +#include "llvm/AutoTuner/AutoTuning.h" +#include "llvm/IR/LegacyPassManager.h" +#include "llvm/InitializePasses.h" +#include "llvm/Pass.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Path.h" +#include "llvm/Support/Process.h" +#include "llvm/Support/raw_ostream.h" +#include <sys/stat.h> + +using namespace llvm; + +#define DEBUG_TYPE "autotuning-dump" + +enum AutotuningDumpOpt { whole_modules, functions, loops }; + +// Enable Debug Options to be specified on the command line +cl::opt<AutotuningDumpOpt> AutotuningDumpMode( + "autotuning-dump-mode", cl::desc("Choose autotuning dump mode:"), + cl::init(whole_modules), + cl::values(clEnumVal(whole_modules, "dump each module in its own file"), + clEnumVal(functions, "dump each function in its own file"), + clEnumVal(loops, "dump each loop in its own file"))); + +AutotuningDump::AutotuningDump(bool IncrementalCompilation) { + // Check if the environment variable AUTOTUNE_DATADIR is set. + IsIncrementalCompilation = IncrementalCompilation; + AutoTuneDirPath = "autotune_datadir"; + if (std::optional<std::string> MaybePath = + llvm::sys::Process::GetEnv("AUTOTUNE_DATADIR")) + AutoTuneDirPath = *MaybePath; +} + +int AutotuningDump::getConfigNumber() { + auto ConfigNumOrErr = autotuning::Engine.getConfigNumber(); + if (ConfigNumOrErr) + return *ConfigNumOrErr; + else { + report_fatal_error("Invalid/missing Autotuner configuration ID"); + return -1; + } +} + +void AutotuningDump::dumpToStream(llvm::raw_ostream &os, const Loop &L) const { + L.print(os); +} + +void AutotuningDump::dumpToStream(llvm::raw_ostream &os, + const Function &F) const { + F.print(os, /*AAW*/ nullptr, /*ShouldPreserveUseListOrder*/ false, + /*IsForDebug*/ false, /*PrintCompleteIR*/ true); +} + +// Create appropriate file. File will contains AbsolutePath/FileName. +std::unique_ptr<raw_ostream> AutotuningDump::createFile(const Twine &File) { + std::error_code EC; + return std::make_unique<raw_fd_ostream>((File).str(), EC, + sys::fs::CD_CreateAlways, + sys::fs::FA_Write, sys::fs::OF_None); +} + +std::string AutotuningDump::getDirectoryName(const std::string File) const { + std::string DirectoryName = AutoTuneDirPath; + if (!autotuning::Engine.isMLEnabled()) + DirectoryName += "/IR_files"; + + DirectoryName = DirectoryName + "/" + File + "/"; + + // Create directory if not already present. + if (std::error_code EC = sys::fs::create_directories(DirectoryName)) + errs() << "could not create directory: " << DirectoryName << ": " + << EC.message(); + + return DirectoryName; +} + +std::string AutotuningDump::getFileName(std::string FilePath) { + if (autotuning::Engine.isMLEnabled()) + return std::to_string(this->getConfigNumber()) + ".ll"; + std::replace(FilePath.begin(), FilePath.end(), '/', '_'); + return FilePath + ".ll"; +} + +void AutotuningDump::dumpModule(Module &M) { + std::unique_ptr<raw_ostream> fptr; + LLVM_DEBUG(dbgs() << "AutotuningDump: Dump module IR files.\n"); + if (IsIncrementalCompilation) { + std::string Filename = M.getSourceFileName(); + llvm::SmallString<128> FilenameVec = StringRef(Filename); + llvm::sys::fs::make_absolute(FilenameVec); + size_t Pos = FilenameVec.rfind("."); + if (Pos != std::string::npos) { + FilenameVec.pop_back_n(FilenameVec.size() - Pos); + FilenameVec.append(".ll"); + } + fptr = createFile(FilenameVec); + } else { + std::string File = llvm::sys::path::filename(M.getName()).str(); + std::string DirectoryName = getDirectoryName(File); + std::string FileName = getFileName(M.getName().str()); + fptr = createFile(DirectoryName + FileName); + } + + M.print(*fptr, nullptr, true, false); +} + +void AutotuningDump::dumpFunctions(Module &M) { + std::string FilePath = M.getName().str(); + std::replace(FilePath.begin(), FilePath.end(), '/', '_'); + std::string DirectoryName = getDirectoryName(FilePath); + for (Function &F : M.getFunctionList()) { // go through all functions + if (F.isDeclaration() || F.empty()) + continue; + + AutoTuningEnabledFunction *AutotuneFunc = &F.getATEFunction(); + assert(AutotuneFunc); + autotuning::Engine.initContainer(AutotuneFunc, "autotuning-dump", + F.getName(), false); + std::string FuncName = F.getName().str(); + // check the whole function + if (AutotuneFunc->requiresIRDump(true)) { + auto fptr = createFile(DirectoryName + Twine(FuncName) + ".ll"); + this->dumpToStream(*fptr, F); + } + } +} + +void AutotuningDump::dumpLoops(Module &M, + function_ref<LoopInfo &(Function &)> GetLI) { + for (Function &F : M) { + // Nothing to do for declarations. + if (F.isDeclaration() || F.empty()) + continue; + + LoopInfo &LI = GetLI(F); + for (auto &L : LI.getLoopsInPreorder()) { + Function *Func = nullptr; + StringRef FuncName = ""; + if (!L->isInvalid()) + Func = L->getHeader()->getParent(); + if (Func) + FuncName = Func->getName(); + + autotuning::Engine.initContainer(L, "autotuning-dump", FuncName, false); + if (L->requiresIRDump()) { + std::string FuncName = L->getCodeRegion().getFuncName(); + unsigned SourceLine = L->getCodeRegion().getSourceLoc().SourceLine; + std::string DirectoryName = AutoTuneDirPath + "/" + + llvm::sys::path::filename(FuncName).str() + + "_loop_" + std::to_string(SourceLine); + std::string FileName = std::to_string(this->getConfigNumber()) + ".ll"; + auto fptr = createFile(DirectoryName + "/" + FileName); + this->dumpToStream(*fptr, *L); + } + } + } +} + +bool AutotuningDump::run(Module &M, + function_ref<LoopInfo &(Function &)> GetLI) { + // Change to absolute path. + SmallString<256> OutputPath = StringRef(AutoTuneDirPath); + sys::fs::make_absolute(OutputPath); + + // Creating new output directory, if it does not exists. + if (std::error_code EC = sys::fs::create_directories(OutputPath)) { + llvm::errs() << (make_error<StringError>( + "could not create directory: " + Twine(OutputPath) + ": " + + EC.message(), + EC)); + return false; + } + + if (IsIncrementalCompilation) { + LLVM_DEBUG( + dbgs() + << "AutotuningDump: IR files writing for incremental compilation.\n"); + dumpModule(M); + return false; + } + + switch (AutotuningDumpMode) { + case whole_modules: + dumpModule(M); + break; + case functions: + dumpFunctions(M); + break; + case loops: + dumpLoops(M, GetLI); + } + + return false; +} + +AutotuningDumpLegacy::AutotuningDumpLegacy(bool IncrementalCompilation) + : ModulePass(AutotuningDumpLegacy::ID) { + IsIncrementalCompilation = IncrementalCompilation; + initializeAutotuningDumpLegacyPass(*PassRegistry::getPassRegistry()); +} + +bool AutotuningDumpLegacy::runOnModule(Module &M) { + if (!autotuning::Engine.isDumpEnabled()) + return false; + + auto GetLI = [this](Function &F) -> LoopInfo & { + return getAnalysis<LoopInfoWrapperPass>(F).getLoopInfo(); + }; + + AutotuningDump Impl(IsIncrementalCompilation); + return Impl.run(M, GetLI); +} + +StringRef AutotuningDumpLegacy::getPassName() const { + return "Autotuning Dump"; +} + +void AutotuningDumpLegacy::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + AU.addRequired<LoopInfoWrapperPass>(); +} + +char AutotuningDumpLegacy::ID = 0; +INITIALIZE_PASS_BEGIN(AutotuningDumpLegacy, "autotuning-dump", + "Dump IR for Autotuned Code Regions", false, false) +INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) +INITIALIZE_PASS_END(AutotuningDumpLegacy, "autotuning-dump", + "Dump IR for Autotuned Code Regions", false, false) + +ModulePass *llvm::createAutotuningDumpPass() { + return new AutotuningDumpLegacy(); +} + +AnalysisKey AutotuningDumpAnalysis::Key; + +AutotuningDumpAnalysis::Result +AutotuningDumpAnalysis::run(Module &M, ModuleAnalysisManager &AM) { + if (!autotuning::Engine.isDumpEnabled()) + return false; + + auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager(); + auto GetLI = [&FAM](Function &F) -> LoopInfo & { + return FAM.getResult<LoopAnalysis>(F); + }; + + AutotuningDump Impl(IsIncrementalCompilation); + Impl.run(M, GetLI); + return false; +} +#endif diff --git a/llvm/lib/Analysis/CMakeLists.txt b/llvm/lib/Analysis/CMakeLists.txt index 4a1797c42789..9c6a70f0221f 100644 --- a/llvm/lib/Analysis/CMakeLists.txt +++ b/llvm/lib/Analysis/CMakeLists.txt @@ -30,6 +30,7 @@ add_llvm_component_library(LLVMAnalysis Analysis.cpp AssumeBundleQueries.cpp AssumptionCache.cpp + AutotuningDump.cpp BasicAliasAnalysis.cpp BlockFrequencyInfo.cpp BlockFrequencyInfoImpl.cpp @@ -153,6 +154,7 @@ add_llvm_component_library(LLVMAnalysis ${MLLinkDeps} LINK_COMPONENTS + AutoTuner BinaryFormat Core Object diff --git a/llvm/lib/Analysis/InlineAdvisor.cpp b/llvm/lib/Analysis/InlineAdvisor.cpp index e2480d51d372..f6b3c14a0345 100644 --- a/llvm/lib/Analysis/InlineAdvisor.cpp +++ b/llvm/lib/Analysis/InlineAdvisor.cpp @@ -383,15 +383,27 @@ llvm::shouldInline(CallBase &CB, Function *Callee = CB.getCalledFunction(); Function *Caller = CB.getCaller(); +#if defined(ENABLE_AUTOTUNER) + // Get the code Region to add BaselineConfig values for inline + const autotuning::CodeRegion &CR = CB.ATECallSite.get()->getCodeRegion(); + static const std::string ForceInlineParamStr = "ForceInline"; +#endif + if (IC.isAlways()) { LLVM_DEBUG(dbgs() << " Inlining " << inlineCostStr(IC) << ", Call: " << CB << "\n"); +#if defined(ENABLE_AUTOTUNER) + autotuning::Engine.addOpportunity(CR, {{ForceInlineParamStr, "1"}}); +#endif return IC; } if (!IC) { LLVM_DEBUG(dbgs() << " NOT Inlining " << inlineCostStr(IC) << ", Call: " << CB << "\n"); +#if defined(ENABLE_AUTOTUNER) + autotuning::Engine.addOpportunity(CR, {{ForceInlineParamStr, "0"}}); +#endif if (IC.isNever()) { ORE.emit([&]() { return OptimizationRemarkMissed(DEBUG_TYPE, "NeverInline", Call) @@ -417,6 +429,9 @@ llvm::shouldInline(CallBase &CB, LLVM_DEBUG(dbgs() << " NOT Inlining: " << CB << " Cost = " << IC.getCost() << ", outer Cost = " << TotalSecondaryCost << '\n'); +#if defined(ENABLE_AUTOTUNER) + autotuning::Engine.addOpportunity(CR, {{ForceInlineParamStr, "0"}}); +#endif ORE.emit([&]() { return OptimizationRemarkMissed(DEBUG_TYPE, "IncreaseCostInOtherContexts", Call) @@ -430,6 +445,9 @@ llvm::shouldInline(CallBase &CB, LLVM_DEBUG(dbgs() << " Inlining " << inlineCostStr(IC) << ", Call: " << CB << '\n'); +#if defined(ENABLE_AUTOTUNER) + autotuning::Engine.addOpportunity(CR, {{ForceInlineParamStr, "1"}}); +#endif return IC; } diff --git a/llvm/lib/Analysis/InlineCost.cpp b/llvm/lib/Analysis/InlineCost.cpp index a2f46edcf5ef..9f8f57865de2 100644 --- a/llvm/lib/Analysis/InlineCost.cpp +++ b/llvm/lib/Analysis/InlineCost.cpp @@ -162,6 +162,14 @@ static cl::opt<bool> DisableGEPConstOperand( "disable-gep-const-evaluation", cl::Hidden, cl::init(false), cl::desc("Disables evaluation of GetElementPtr with constant operands")); +#if defined(ENABLE_AUTOTUNER) +static cl::opt<bool> + EnableLocalCallSiteTuning("auto-tuning-enable-local-callsite-tuning", + cl::init(false), cl::Hidden, + cl::desc("Enable AutoTuning for local callsites " + "as well.")); +#endif + namespace llvm { std::optional<int> getStringFnAttrAsInt(const Attribute &Attr) { if (Attr.isValid()) { @@ -2990,6 +2998,27 @@ InlineCost llvm::getInlineCost( return llvm::InlineCost::getNever(UserDecision->getFailureReason()); } +#if defined(ENABLE_AUTOTUNER) + if (autotuning::Engine.isEnabled() && Call.getCaller() && + (!Callee->hasLocalLinkage() || EnableLocalCallSiteTuning)) { + bool ForceInline = false; + bool Found = false; + + autotuning::Engine.initContainer(Call.ATECallSite.get(), "inline", + Call.getCaller()->getName(), + /* addOpportunity */ false); + + Found = Call.ATECallSite->lookUpParams<bool>("ForceInline", ForceInline); + + if (Found) { + if (ForceInline) + return llvm::InlineCost::getAlways("Force inlined by auto-tuning"); + else + return llvm::InlineCost::getNever("Force non-inlined by auto-tuning"); + } + } +#endif + LLVM_DEBUG(llvm::dbgs() << " Analyzing call of " << Callee->getName() << "... (caller:" << Call.getCaller()->getName() << ")\n"); diff --git a/llvm/lib/Analysis/LoopInfo.cpp b/llvm/lib/Analysis/LoopInfo.cpp index 60a72079e864..36aca73ee675 100644 --- a/llvm/lib/Analysis/LoopInfo.cpp +++ b/llvm/lib/Analysis/LoopInfo.cpp @@ -37,6 +37,10 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/GenericLoopInfoImpl.h" #include "llvm/Support/raw_ostream.h" +#if defined(ENABLE_AUTOTUNER) +#include "llvm/AutoTuner/AutoTuning.h" +#include "llvm/IR/StructuralHash.h" +#endif using namespace llvm; // Explicitly instantiate methods in LoopInfoImpl.h for IR-level Loops. @@ -663,6 +667,54 @@ Loop::LocRange Loop::getLocRange() const { return LocRange(); } +#if defined(ENABLE_AUTOTUNER) +uint64_t Loop::computeStructuralHash() { + std::vector<BasicBlock *> BBs = getBlocks(); + return StructuralHash(BBs); +} + +void Loop::initCodeRegion() { + std::string LoopName; + // use the header's name as the loop name + if (BasicBlock *Header = getHeader()) { + if (Header->hasName()) { + LoopName = Header->getName().str(); + } + // if the header doesn't have a name, + // use the label of this header from AsmWriter + else { + std::string Str; + llvm::raw_string_ostream RSO(Str); + Header->printAsOperand(RSO); + LoopName = RSO.str(); + } + } else { + LoopName = "<unnamed loop>"; + } + + Function *F = this->getHeader()->getParent(); + StringRef FuncName = F->getName(); + + // init the CodeRegion + autotuning::CodeRegion CR = autotuning::CodeRegion( + LoopName, FuncName.data(), autotuning::CodeRegionType::Loop, + this->getStartLoc()); + // Compute the number of non-debug IR instructions in this loop. + unsigned TotalNumInstrs = 0; + for (const BasicBlock *BB : this->getBlocks()) { + unsigned NumInstrs = std::distance(BB->instructionsWithoutDebug().begin(), + BB->instructionsWithoutDebug().end()); + TotalNumInstrs += NumInstrs; + } + CR.setSize(TotalNumInstrs); + // Compute hotness. + autotuning::HotnessType Hotness = F->ATEFunction.getHotness(); + CR.setHotness(Hotness); + + this->setCodeRegion(CR); +} +#endif + #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) LLVM_DUMP_METHOD void Loop::dump() const { print(dbgs()); } diff --git a/llvm/lib/AutoTuner/AutoTuning.cpp b/llvm/lib/AutoTuner/AutoTuning.cpp new file mode 100644 index 000000000000..1f09f06d84a2 --- /dev/null +++ b/llvm/lib/AutoTuner/AutoTuning.cpp @@ -0,0 +1,705 @@ +#if defined(ENABLE_AUTOTUNER) +//===-- AutoTuning.cpp - Auto-Tuning --------------------------------------===// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines Auto Tuning related functions, models and interfaces. +// +//===----------------------------------------------------------------------===// + +#include "llvm/AutoTuner/AutoTuning.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/AutoTuner/AutoTuningRemarkManager.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/Process.h" + +// Enable debug messages for AutoTuning. +#define DEBUG_TYPE "autotuning" + +using namespace llvm; + +// defined in 'lib/Remarks/YAMLRemarkParser.cpp'. +extern cl::opt<bool> OmitAutotuningMetadata; + +// -auto-tuning-input - Command line option to specify the input file. +static cl::opt<std::string> InputFile("auto-tuning-input", cl::Hidden, + cl::desc("Specify the input file")); + +// -auto-tuning-opp - Command line option to specify the output directory of +// tuning opportunities. +static cl::opt<std::string> OutputOppDir( + "auto-tuning-opp", cl::Hidden, + cl::desc("Specify the output directory of tuning opportunities")); + +static cl::opt<std::string> + RemarksPasses("auto-tuning-pass-filter", cl::Hidden, + cl::desc("Only dump auto-tuning remarks from passes whose " + "names match the given regular expression"), + cl::value_desc("regex")); + +static cl::opt<std::string> + ProjectDir("autotuning-project-dir", cl::Hidden, cl::init(""), + cl::desc("Specify project base dir to make code region name " + "relative to base dir. This operation will only be " + "applied for coarse-grain code regions.")); + +// -auto-tuning-config-id - Command line option to specify the config number +// being used for compilation. Required only for ML guidance feature. +static cl::opt<int> CFGNumber( + "auto-tuning-config-id", cl::Hidden, + cl::desc( + "Specify the auto-tuning configuration ID used in this compilation.")); + +static cl::opt<std::string> OutputFormat( + "auto-tuning-remark-format", cl::Hidden, + cl::desc("The format used for auto-tuning remarks (default: YAML)"), + cl::value_desc("format"), cl::init("yaml")); + +// AutoTuner incremental compilation options. +cl::opt<AutoTuningCompileOpt> AutoTuningCompileMode( + "auto-tuning-compile-mode", cl::Hidden, cl::init(Inactive), + cl::desc("AutoTuner: Choose incremental compilation mode."), + cl::values(clEnumVal(Inactive, + "AutoTuner: Disable incremental compilation."), + clEnumVal(CoarseGrain, "AutoTuner: Enable incremental " + "compilation for coarse grain tuning."), + clEnumVal(FineGrain, "AutoTuner: Enable incremental compilation " + "for fine grain tuning."), + clEnumVal(Basic, "AutoTuner: Enable incremental compilation for " + "any kind of code region."))); + +static cl::opt<bool> + EnableAutoTuningDump("enable-autotuning-dump", cl::Hidden, cl::init(false), + cl::desc("Enable AutoTuningDump Pass")); + +static cl::opt<bool> + ThinLTOTuning("autotuning-thin-lto", cl::Hidden, cl::init(false), + cl::desc("AutoTuner enabled in ThinLTO mode.")); + +namespace autotuning { + +static cl::list<CodeRegionType> AutotuningOutputFilter( + "auto-tuning-type-filter", cl::Hidden, cl::CommaSeparated, + cl::desc( + "Select types of code regions to dump auto-tuning opportunities for:"), + cl::values(clEnumVal(LLVMParam, "LLVMParam code regions only"), + clEnumVal(ProgramParam, "ProgramParam code regions only"), + clEnumVal(CallSite, "CallSite code regions only"), + clEnumVal(Function, "Function code regions only"), + clEnumVal(Loop, "Loop code regions only"), + clEnumVal(MachineBasicBlock, + "Machine basic block code regions only"), + clEnumVal(Switch, "Switch code regions only"), + clEnumVal(Other, "All other types of code regions"))); + +static cl::list<std::string> AutotuningFunctionFilter( + "auto-tuning-function-filter", cl::Hidden, cl::CommaSeparated, + cl::desc("Apply code region filtering based on function names")); + +static const cl::opt<bool> ExcludeColdCodeRegion( + "auto-tuning-exclude-cold", cl::Hidden, cl::init(true), + cl::desc("Use profile data to prune cold code regions from auto-tuning")); + +static const cl::opt<bool> CodeRegionMatchingWithHash( + "auto-tuning-code-region-matching-hash", cl::Hidden, cl::init(true), + cl::desc("Use IR hashing to match the Code Regions")); + +static const cl::opt<bool> HotCodeRegionOnly( + "auto-tuning-hot-only", cl::Hidden, cl::init(false), + cl::desc( + "Use profile data to include hot code regions only from auto-tuning")); + +static const cl::opt<unsigned> + SizeThreshold("auto-tuning-size-threshold", cl::Hidden, cl::init(0), + cl::desc("Prune small code regions from auto-tuning with a " + "size smaller than the threshold")); + +static inline const std::string generateName(const std::string &Name) { + if (Name.empty()) + return "unnamed"; + else + return Name; +} + +//===----------------------------------------------------------------------===// +// CodeRegion implementation +CodeRegion::CodeRegion(const CodeRegionType Type) : Type(Type) {} + +CodeRegion::CodeRegion(const std::string &Name, const std::string &FuncName, + const CodeRegionType &Type, const DebugLoc &DL, + const DynamicOptions DO) { + this->Name = generateName(Name); + this->FuncName = generateName(FuncName); + this->Type = Type; + this->StringType = getTypeAsString(Type); + if (DL) { + StringRef File = DL->getFilename(); + unsigned Line = DL->getLine(); + unsigned Col = DL->getColumn(); + this->Location = SourceLocation{File.str(), Line, Col}; + } + this->AutoTunerOptions = DO; +} + +CodeRegion::CodeRegion(const std::string &Name, const std::string &FuncName, + const CodeRegionType &Type, + const SourceLocation &Location, + const DynamicOptions DO) { + this->Name = generateName(Name); + this->FuncName = generateName(FuncName); + this->Type = Type; + this->StringType = getTypeAsString(Type); + this->Location = Location; + this->AutoTunerOptions = DO; +} + +CodeRegion::CodeRegion(const std::string &Name, const std::string &FuncName, + const std::string &PassName, const CodeRegionType &Type, + const SourceLocation &Location, + const unsigned int Invocation) + : CodeRegion(Name, FuncName, Type, Location) { + this->PassName = generateName(PassName); + this->Invocation = Invocation; +} + +bool CodeRegion::operator==(const CodeRegion &CodeRegion) const { + bool IsEqual = false; + if (OmitAutotuningMetadata) + IsEqual = (this->getHash() == CodeRegion.getHash()) && + (this->Type == CodeRegion.getType()) && + (this->PassName == CodeRegion.getPassName()); + else { + IsEqual = (this->Type == CodeRegion.getType()) && + (this->Name == CodeRegion.getName()) && + (this->PassName == CodeRegion.getPassName()) && + (this->FuncName == CodeRegion.getFuncName()) && + (this->Location == CodeRegion.getSourceLoc()); + if (CodeRegionMatchingWithHash) + IsEqual = IsEqual && (this->getHash() == CodeRegion.getHash()); + } + + if (autotuning::Engine.ParseInput) + IsEqual = IsEqual && this->getInvocation() == CodeRegion.getInvocation(); + + if (autotuning::Engine.GenerateOutput) + IsEqual = + IsEqual && this->getBaselineConfig() == CodeRegion.getBaselineConfig(); + + return IsEqual; +} + +std::string CodeRegion::getTypeAsString(CodeRegionType CRType) { + switch (CRType) { + case autotuning::CodeRegionType::MachineBasicBlock: + return "machine_basic_block"; + case autotuning::CodeRegionType::Loop: + return "loop"; + case autotuning::CodeRegionType::Function: + return "function"; + case autotuning::CodeRegionType::CallSite: + return "callsite"; + case autotuning::CodeRegionType::LLVMParam: + return "llvm-param"; + case autotuning::CodeRegionType::ProgramParam: + return "program-param"; + case autotuning::CodeRegionType::Switch: + return "switch"; + default: + return "other"; + } +} + +std::string CodeRegion::getHotnessAsString(HotnessType Hotness) { + switch (Hotness) { + case autotuning::HotnessType::Cold: + return "cold"; + case autotuning::HotnessType::Hot: + return "hot"; + default: + return "unknown"; + } +} + +void CodeRegion::setPassName(const std::string &NewPassName) { + this->PassName = generateName(NewPassName); +} + +/* static */ +autotuning::CodeRegion CodeRegion::getInvalidInstance() { + static autotuning::CodeRegion Invalid = + CodeRegion(autotuning::CodeRegionType::Invalid); + return Invalid; +} + +/* static */ +autotuning::CodeRegion CodeRegion::getEmptyInstance() { + static autotuning::CodeRegion Empty = + CodeRegion(autotuning::CodeRegionType::Empty); + return Empty; +} + +//===----------------------------------------------------------------------===// +// Container implementation +// + +const CodeRegion &Container::getCodeRegion() const { return CR; } + +void Container::setCodeRegion(const CodeRegion &NewCR) { this->CR = NewCR; } + +template <typename T> +bool Container::lookUpParams(const std::string &ParamsName, T &Value) const { + bool Found = false; + auto ConfigMapIterator = Engine.ParamTable.find(CR); + if (ConfigMapIterator != Engine.ParamTable.end()) { + ParameterManager InputParams = ConfigMapIterator->second; + Found = InputParams.findByName(ParamsName, Value); + if (Found) { + LLVM_DEBUG(dbgs() << ParamsName << " is set for the CodeRegion: \n" + << " Name: " << CR.getName() << "\n" + << " FuncName: " << CR.getFuncName() << "\n" + << " PassName: " << CR.getPassName() << "\n" + << " Type: " << CR.getTypeAsString() << "\n" + << " Hash: " << CR.getHash() << "\n" + << "\n"); + } + } + return Found; +} + +bool Container::requiresIRDump(bool IsFunctionIR) const { + auto findBaselineRegion = [&]() -> bool { + for (auto &entry : Engine.TuningOpps) + if (!IsFunctionIR) { + if (CR.getSourceLoc() == entry.getSourceLoc()) + return true; + } else { + if (CR.getFileName() == entry.getFileName() && + CR.getFuncName() == entry.getFuncName()) + return true; + } + return false; + }; + auto findNonBaselineRegion = [&]() { + for (auto &entry : Engine.ParamTable) + if (!IsFunctionIR) { + if (CR.getSourceLoc() == entry.first.getSourceLoc()) + return true; + } else { + if (CR.getFileName() == entry.first.getFileName() && + CR.getFuncName() == entry.first.getFuncName()) + return true; + } + return false; + }; + + if (CFGNumber == -1) + return findBaselineRegion(); + else + return findNonBaselineRegion(); +} + +template bool Container::lookUpParams<int>(const std::string &ParamsName, + int &Value) const; +template bool Container::lookUpParams<bool>(const std::string &ParamsName, + bool &Value) const; +template bool +Container::lookUpParams<std::string>(const std::string &ParamsName, + std::string &Value) const; +template bool Container::lookUpParams<std::vector<std::string>>( + const std::string &ParamsName, std::vector<std::string> &Value) const; + +static unsigned int count(SmallVector<CallSiteLocation, 10> CallSiteLocs, + CallSiteLocation Loc) { + unsigned int Count = 0; + for (unsigned int Idx = 0; Idx < CallSiteLocs.size(); ++Idx) { + if (Loc.Caller == CallSiteLocs[Idx].Caller && + Loc.Callee == CallSiteLocs[Idx].Callee) + Count++; + } + return Count; +} + +bool AutoTuningEngine::isThinLTOTuning() const { return ThinLTOTuning; } + +CodeRegionType AutoTuningEngine::convertPassToType(std::string PassName) { + auto Search = PTTMap.find(PassName); + if (Search == PTTMap.end()) + llvm_unreachable( + "AutoTuningEngine: Invalid/unsupported optimization pass provided.\n"); + return Search->second; +} + +void AutoTuningEngine::insertCallSiteLoc(CallSiteLocation Loc) { + CallSiteLocs.emplace_back(Loc); +} + +// If a function has multiple calls to same callee, then insert all the calls in +// the CallSiteLocs vector which get available due to inlining of such calls. +// It will use "Original Call Line No + New Call Line No" instead of using +// "DebugLoc Line No". +void AutoTuningEngine::updateCallSiteLocs(llvm::CallBase *OldCB, + llvm::CallBase *NewCB, + llvm::Function *Callee, + unsigned int Line) { + for (unsigned int Idx = 0; Idx < CallSiteLocs.size(); ++Idx) { + if (OldCB == CallSiteLocs[Idx].CB) { + CallSiteLocation Loc = CallSiteLocs[Idx]; + Loc.CB = NewCB; + Loc.Callee = Callee; + Loc.SrcLoc.SourceLine = Loc.SrcLoc.SourceLine + Line; + CallSiteLocs.emplace_back(Loc); + break; + } + } +} + +void AutoTuningEngine::cleanCallSiteLoc() { + unsigned int Size = CallSiteLocs.size(); + unsigned int Idx = 0; + for (unsigned int I = 0; I < Size; ++I) { + CallSiteLocation Loc = CallSiteLocs[Idx]; + unsigned int Count = count(CallSiteLocs, Loc); + if (Count == 1) { + CallSiteLocs.erase(CallSiteLocs.begin() + Idx); + continue; + } + Idx++; + } +} + +void AutoTuningEngine::clearCallSiteLocs() { CallSiteLocs.clear(); } + +std::optional<unsigned int> +AutoTuningEngine::getCallSiteLoc(llvm::CallBase *CB) { + for (unsigned int Idx = 0; Idx < CallSiteLocs.size(); ++Idx) { + if (CB == CallSiteLocs[Idx].CB) + return CallSiteLocs[Idx].SrcLoc.SourceLine; + } + return std::nullopt; +} + +void AutoTuningEngine::addOpportunity( + const CodeRegion &OppCR, + std::map<std::string, std::string> BaselineConfig) { + if (!OppCR.Initialized) + return; + + OppCR.setBaselineConfig(BaselineConfig); + if (!TuningOpps.contains(OppCR)) + TuningOpps.insert(OppCR); + else if (OppCR.getHotness() != Unknown) { + // If OppCR already exists in TuningOpps with unknown hotness, + // then update it if the current hotness is hot/cold. + auto OppI = find(TuningOpps, OppCR); + if (OppI->getHotness() == Unknown) + OppI->setHotness(OppCR.getHotness()); + } +} + +void AutoTuningEngine::applyOppFilters(CodeRegions &CRs) { + CodeRegions NewCRs; + for (CodeRegion CR : CRs) { + if (AutotuningOutputFilter.getNumOccurrences() > 0) { + bool IsMatched = false; + for (auto CRType : AutotuningOutputFilter) { + if (CRType == CR.getType()) { + IsMatched = true; + break; + } + } + // Filter out the CodeRegion if its type fails to match any types + // specified from the command line. + if (!IsMatched) + continue; + } + if (SizeThreshold.getNumOccurrences() > 0 && CR.getSize() < SizeThreshold) + continue; + if (ExcludeColdCodeRegion && CR.isCold()) { + LLVM_DEBUG(dbgs() << "Skip CodeRegion with cold function " + << CR.getFuncName() << "\n"); + continue; + } + if (HotCodeRegionOnly && !CR.isHot()) { + LLVM_DEBUG(dbgs() << "Skip CodeRegion with " << CR.getHotnessAsString() + << " function " << CR.getFuncName() << "\n"); + continue; + } + NewCRs.insert(CR); + LLVM_DEBUG(dbgs() << "CodeRegion added as an tuning opportunity: \n" + << " Name: " << CR.getName() << "\n" + << " FuncName: " << CR.getFuncName() << "\n" + << " PassName: " << CR.getPassName() << "\n" + << " Type: " << CR.getTypeAsString() << "\n" + << " Size: " << CR.getSize() << "\n" + << " Hotness: " << CR.getHotnessAsString() << "\n" + << " Hash: " << CR.getHash() << "\n" + << " Location: " << CR.getSourceLoc().SourceFilePath + << "; " << CR.getSourceLoc().SourceLine << "; " + << CR.getSourceLoc().SourceColumn << "\n\n"); + } + if (AutotuningOutputFilter.getNumOccurrences() == 0 || + std::find(AutotuningOutputFilter.begin(), AutotuningOutputFilter.end(), + Other) != AutotuningOutputFilter.end()) { + // Add an empty CodeRegion with ModuleID as an tuning opportunity. + // It could be used to represent a module level code region. + autotuning::CodeRegion GlobalCR = + CodeRegion(ModuleID, "none", "all", Other); + GlobalCR.setHash(llvm::hash_combine(ModuleID, Other)); + NewCRs.insert(GlobalCR); + LLVM_DEBUG(dbgs() << "Module added as an tuning opportunity: \n" + << " Name: " << GlobalCR.getName() << "\n" + << " Hash: " << GlobalCR.getHash() << "\n" + << "\n"); + } + + // Include LLVMParam as an tuning opportunity only if it is specified with + // -auto-tuning-type-filter. + if (std::find(AutotuningOutputFilter.begin(), AutotuningOutputFilter.end(), + LLVMParam) != AutotuningOutputFilter.end()) + NewCRs.insert(CodeRegion(ModuleID, "none", "none", LLVMParam)); + + if (std::find(AutotuningOutputFilter.begin(), AutotuningOutputFilter.end(), + ProgramParam) != AutotuningOutputFilter.end()) + NewCRs.insert(CodeRegion(ModuleID, "none", "none", ProgramParam)); + + CRs = NewCRs; +} + +bool AutoTuningEngine::applyFunctionFilter(std::string FuncName) { + if (AutotuningFunctionFilter.getNumOccurrences() == 0) + return true; + + for (std::string FunctionFilter : AutotuningFunctionFilter) + if (FuncName == FunctionFilter) + return true; + + return false; +} + +void AutoTuningEngine::initContainer(Container *Container, + const std::string &PassName, + const StringRef FuncName, + bool AddOpportunity, + unsigned int Invocation) { + if (Enabled) { + if (!isTuningAllowedForType(convertPassToType(PassName)) && + !(isGenerateOutput() && + AutotuningOutputFilter.getNumOccurrences() == 0)) + return; + + if (!applyFunctionFilter(FuncName.str())) + return; + + // The attributes of a Container could potentially change overtime even with + // the same pass if the associated pass is invoked multiple times at + // different places in the pipeline. Therefore, we need to initCodeRegion + // every time when this function is called to ensure the CodeRegion with the + // latest information will be added as tuning opportunities. + Container->initCodeRegion(); + if (Container->CR.getType() == autotuning::CodeRegionType::Invalid) + return; + + uint64_t hash = Container->computeStructuralHash(); + CodeRegion &OppCR = Container->CR; + if (GenerateOutput) { + if (OppCR.getSize() < SizeThreshold) + return; + if (ExcludeColdCodeRegion && OppCR.isCold()) { + LLVM_DEBUG(dbgs() << "Skip CodeRegion with cold function " + << OppCR.getFuncName() << "\n"); + return; + } + if (HotCodeRegionOnly && !OppCR.isHot()) { + LLVM_DEBUG(dbgs() << "Skip CodeRegion with " + << OppCR.getHotnessAsString() << " function " + << OppCR.getFuncName() << "\n"); + return; + } + } + OppCR.setPassName(PassName); + OppCR.setHash(hash); + OppCR.setInvocation(Invocation); + OppCR.Initialized = true; + if (AddOpportunity) + addOpportunity(OppCR); + } +} + +bool AutoTuningEngine::shouldRunOptPass(std::string Filename, + std::string Pass) { + return OppPassList.count(Filename) ? OppPassList[Filename].count(Pass) + : false; +} + +Error AutoTuningEngine::init(const std::string &Module) { + ParseInput = false; + if (std::optional<std::string> MaybePath = + llvm::sys::Process::GetEnv("AUTOTUNE_INPUT")) { + InputFile = *MaybePath; + ParseInput = true; + } else if (InputFile.getNumOccurrences() > 0) { + ParseInput = true; + } + + GenerateOutput = false; + if (OutputOppDir.getNumOccurrences() > 0) + GenerateOutput = true; + + // Invocation of any of the following command line options + // (auto-tuning-input and auto-tuning-opp) or env variable + // AUTOTUNE_ALL_INPUT can enable auto-tuning mode. + if (ParseInput || GenerateOutput) { + Enabled = true; + // Generate absolute path and remove the base directory (if available). + // A relative path will be used as (coarse-grain) code region name. + llvm::SmallString<128> ModuleVec = StringRef(Module); + llvm::sys::fs::make_absolute(ModuleVec); + if (ProjectDir.size() && ModuleVec.startswith(ProjectDir)) + ModuleID = ModuleVec.substr(ProjectDir.size()).str(); + else + ModuleID = std::string(ModuleVec); + } + + // Initialization of map to be used for pass-name to CodeRegionType + // conversion. + PTTMap = {{"loop-unroll", Loop}, + {"loop-vectorize", Loop}, + {"inline", CallSite}, + {"machine-scheduler", MachineBasicBlock}, + {"switch-lowering", Switch}, + {"autotuning-dump", Function}}; + + if (ParseInput) { + // Currently we only support yaml format for input. + if (Error E = AutoTuningRemarkManager::read(*this, InputFile, "yaml")) { + errs() << "Error parsing auto-tuning input.\n"; + return E; + } else { + LLVM_DEBUG(dbgs() << "AutoTuningEngine is initialized.\n" + << " Size of ParamTable: " << this->ParamTable.size() + << "\n"); + if (LLVMParams.size()) + LLVM_DEBUG(dbgs() << "AutoTuner: LLVMParams applied."); + if (ProgramParams.size()) + LLVM_DEBUG(dbgs() << "AutoTuner: ProgramParams applied.\n"); + } + } + + for (auto CRType : AutotuningOutputFilter) + CodeRegionFilterTypes.insert(CRType); + + if (GenerateOutput) { + switch (AutoTuningCompileMode) { + case CoarseGrain: { + bool Valid = false; + if (AutotuningOutputFilter.getNumOccurrences() > 0) { + Valid = true; + for (auto CRType : AutotuningOutputFilter) + if (CRType != LLVMParam) { + Valid = false; + break; + } + } + if (!Valid) { + AutoTuningCompileMode = Inactive; + errs() << "AutoTunerCompile: Code region type filtering does not match" + " with incremental compilation option.\n" + "Disabling incremental compilation.\n"; + } + break; + } + case FineGrain: { + bool Valid = false; + if (AutotuningOutputFilter.getNumOccurrences() > 0) { + Valid = true; + for (auto CRType : AutotuningOutputFilter) { + if (CRType != Loop && CRType != CallSite && CRType != Function) { + Valid = false; + break; + } + } + } + if (!Valid) { + AutoTuningCompileMode = Inactive; + errs() << "AutoTunerCompile: Code region type filtering does not match" + "with incremental compilation option.\n" + "Disabling incremental compilation.\n"; + } + break; + } + case Basic: + case Inactive: + break; + default: + llvm_unreachable("AutoTuningCompile: Unknown AutoTuner Incremental " + "Compilation mode.\n"); + } + } + + MLEnabled = (CFGNumber.getNumOccurrences() > 0); + if (EnableAutoTuningDump || MLEnabled) + DumpEnabled = true; + return Error::success(); +} + +llvm::Expected<int> AutoTuningEngine::getConfigNumber() { + if (!isMLEnabled()) { + std::string errorMsg = + "No Autotuner configuration specified; ML guidance is unavailable."; + return createStringError(inconvertibleErrorCode(), errorMsg); + } else + return CFGNumber; +} + +Error AutoTuningEngine::finalize() { + if (OutputOppDir.getNumOccurrences() > 0) { + // Apply filters. + applyOppFilters(TuningOpps); + if (!TuningOpps.empty()) { + if (Error E = AutoTuningRemarkManager::dump( + *this, OutputOppDir, OutputFormat, RemarksPasses)) { + errs() << "Error generating auto-tuning opportunities.\n"; + return E; + } + } + + // Clear these two global lists when ending the auto-tuning + // in case of redundant information + TuningOpps.clear(); + } + return Error::success(); +} + +template <typename T> +bool AutoTuningEngine::lookUpGlobalParams(const std::string &ParamsName, + T &Value) const { + bool Found = GlobalParams.findByName(ParamsName, Value); + if (Found) { + LLVM_DEBUG(dbgs() << "Global Variable " << ParamsName << " is set.\n"); + } + return Found; +} + +template bool +AutoTuningEngine::lookUpGlobalParams<int>(const std::string &ParamsName, + int &Value) const; +template bool +AutoTuningEngine::lookUpGlobalParams<bool>(const std::string &ParamsName, + bool &Value) const; +template bool +AutoTuningEngine::lookUpGlobalParams<std::string>(const std::string &ParamsName, + std::string &Value) const; +template bool AutoTuningEngine::lookUpGlobalParams<std::vector<std::string>>( + const std::string &ParamsName, std::vector<std::string> &Value) const; + +class AutoTuningEngine Engine; + +} // namespace autotuning + +#endif diff --git a/llvm/lib/AutoTuner/AutoTuningRemarkManager.cpp b/llvm/lib/AutoTuner/AutoTuningRemarkManager.cpp new file mode 100644 index 000000000000..3e0506e534c4 --- /dev/null +++ b/llvm/lib/AutoTuner/AutoTuningRemarkManager.cpp @@ -0,0 +1,299 @@ +#if defined(ENABLE_AUTOTUNER) +//===- llvm/AutoTuner/AutoTuningRemarkManager.cpp - Remark Manager --------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file contains the implementation of for inputting and outputting remarks +// for AutoTuning. +// +//===----------------------------------------------------------------------===// + +#include "llvm/AutoTuner/AutoTuningRemarkManager.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/AutoTuner/AutoTuning.h" +#include "llvm/AutoTuner/AutoTuningRemarkStreamer.h" +#include "llvm/IR/DebugInfoMetadata.h" +#include "llvm/IR/LLVMRemarkStreamer.h" +#include "llvm/Remarks/Remark.h" +#include "llvm/Remarks/RemarkFormat.h" +#include "llvm/Remarks/RemarkParser.h" +#include "llvm/Remarks/RemarkSerializer.h" +#include "llvm/Remarks/RemarkStreamer.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/FileSystem.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/Path.h" +#include "llvm/Support/ToolOutputFile.h" + +// Enable debug messages for AutoTuner. +#define DEBUG_TYPE "autotuning" + +using namespace llvm; +using namespace autotuning; + +// Helper functions. +namespace { +// Convert string into CodeRegionType. +Expected<CodeRegionType> StringToCodeRegionType(const std::string &CRType) { + if (CRType == "machine_basic_block") + return autotuning::CodeRegionType::MachineBasicBlock; + else if (CRType == "loop") + return autotuning::CodeRegionType::Loop; + else if (CRType == "function") + return autotuning::CodeRegionType::Function; + else if (CRType == "callsite") + return autotuning::CodeRegionType::CallSite; + else if (CRType == "llvm-param") + return autotuning::CodeRegionType::LLVMParam; + else if (CRType == "program-param") + return autotuning::CodeRegionType::ProgramParam; + else if (CRType == "switch") + return autotuning::CodeRegionType::Switch; + else if (CRType == "other") + return autotuning::CodeRegionType::Other; + else + return make_error<StringError>("Unsupported CodeRegionType:" + CRType, + inconvertibleErrorCode()); +} + +// Remark -> autotuning::ParameterManager +ParameterManager RemarkToParameterManager(const remarks::Remark &Remark) { + // Create Parameters from a remark. + ParameterManager ParamManager; + for (const remarks::Argument &Arg : Remark.Args) { + int Value = 0; + if (!Arg.Val.getAsInteger(10, Value)) + // If no errors + ParamManager.add(Arg.Key.str(), Value); + else if (Arg.Val == "true") + ParamManager.add(Arg.Key.str(), true); + else if (Arg.Val == "false") + ParamManager.add(Arg.Key.str(), false); + // If there is a value of vector type + else if (Arg.VectorVal) { + std::vector<std::string> Strings; + for (const StringRef &Val : *Arg.VectorVal) { + Strings.push_back(Val.str()); + } + ParamManager.add(Arg.Key.str(), Strings); + } else + // Add as String Value + ParamManager.add(Arg.Key.str(), Arg.Val); + } + + return ParamManager; +} + +// Remark -> std::unordered_map<std::string, std::string> +std::unordered_map<std::string, std::string> +RemarkToStringMap(const remarks::Remark &Remark) { + std::unordered_map<std::string, std::string> LLVMParams; + for (const remarks::Argument &Arg : Remark.Args) { + // Add as String Value + LLVMParams[Arg.Key.str()] = Arg.Val.str(); + } + return LLVMParams; +} + +// Remark -> autotuning::SourceLocation +SourceLocation RemarkToSourceLocation(const remarks::Remark &Remark) { + SourceLocation Location; + if (Remark.Loc) { + StringRef File = Remark.Loc->SourceFilePath; + unsigned Line = Remark.Loc->SourceLine; + unsigned Column = Remark.Loc->SourceColumn; + Location = {File.str(), Line, Column}; + } + return Location; +} + +// Remark -> autotuning::CodeRegion +CodeRegion RemarkToCodeRegion(const remarks::Remark &Remark, + Expected<CodeRegionType> &Type) { + // Create a SourceLocation from a remark. + SourceLocation Location = RemarkToSourceLocation(Remark); + // Create a CodeRegion from a remark. + CodeRegion CR = CodeRegion(Remark.RemarkName.str(), Remark.FunctionName.str(), + Remark.PassName.str(), Type.get(), Location); + if (Remark.CodeRegionHash) + CR.setHash(Remark.CodeRegionHash.value_or(0)); + if (Remark.Invocation) + CR.setInvocation(Remark.Invocation.value_or(0)); + + return CR; +} + +Expected<std::unique_ptr<ToolOutputFile>> emitAutoTuningRemarks( + const StringRef RemarksFilename, const StringRef RemarksFormat, + const StringRef RemarksPasses, const CodeRegions &CRList) { + if (RemarksFilename.empty()) + return nullptr; + // Parse remark format. Options are yaml, yaml-strtab and bitstream. + Expected<remarks::Format> Format = remarks::parseFormat(RemarksFormat); + if (Error E = Format.takeError()) + return make_error<LLVMRemarkSetupFormatError>(std::move(E)); + + std::error_code EC; + auto Flags = + *Format == remarks::Format::YAML ? sys::fs::OF_Text : sys::fs::OF_None; + auto RemarksFile = + std::make_unique<ToolOutputFile>(RemarksFilename, EC, Flags); + if (EC) + return make_error<LLVMRemarkSetupFormatError>(errorCodeToError(EC)); + // Create a remark serializer to emit code regions. + Expected<std::unique_ptr<remarks::RemarkSerializer>> RemarkSerializer = + remarks::createRemarkSerializer( + *Format, remarks::SerializerMode::Separate, RemarksFile->os()); + + if (Error E = RemarkSerializer.takeError()) + return make_error<LLVMRemarkSetupFormatError>(std::move(E)); + // Create remark streamer based on the serializer. + remarks::RemarkStreamer RStreamer = + remarks::RemarkStreamer(std::move(*RemarkSerializer), RemarksFilename); + AutoTuningRemarkStreamer Streamer(RStreamer); + + if (!RemarksPasses.empty()) + if (Error E = Streamer.setFilter(RemarksPasses)) + return make_error<LLVMRemarkSetupFormatError>(std::move(E)); + // Emit CodeRegions in Remark format. + for (const CodeRegion &CR : CRList) { + Streamer.emit(CR); + } + return std::move(RemarksFile); +} +} // namespace + +llvm::Error AutoTuningRemarkManager::read(AutoTuningEngine &E, + const std::string &InputFileName, + const std::string &RemarksFormat) { + ErrorOr<std::unique_ptr<MemoryBuffer>> Buf = + MemoryBuffer::getFile(InputFileName.c_str()); + if (std::error_code EC = Buf.getError()) + return make_error<StringError>( + "Can't open file " + InputFileName + ": " + EC.message(), EC); + // Parse remark format. Options are yaml, yaml-strtab and bitstream. + Expected<remarks::Format> Format = remarks::parseFormat(RemarksFormat); + if (!Format) + return Format.takeError(); + + Expected<std::unique_ptr<remarks::RemarkParser>> MaybeParser = + remarks::createRemarkParserFromMeta(*Format, (*Buf)->getBuffer()); + if (!MaybeParser) { + return MaybeParser.takeError(); + } + remarks::RemarkParser &Parser = **MaybeParser; + + while (true) { + Expected<std::unique_ptr<remarks::Remark>> MaybeRemark = Parser.next(); + if (!MaybeRemark) { + Error E = MaybeRemark.takeError(); + if (E.isA<remarks::EndOfFileError>()) { + // EOF. + consumeError(std::move(E)); + break; + } + return E; + } + const remarks::Remark &Remark = **MaybeRemark; + + if (Remark.RemarkType != remarks::Type::AutoTuning) + continue; + + if (!Remark.CodeRegionType) + return make_error<StringError>("CodeRegionType field is missing.", + inconvertibleErrorCode()); + Expected<CodeRegionType> Type = + StringToCodeRegionType((*Remark.CodeRegionType).str()); + if (!Type) + return Type.takeError(); + CodeRegionType CRType = Type.get(); + // If CodeRegionType is Other, this remark corresponds to global + // parameters, and no need to create a CodeRegion object. Check if the + // Remark of global parameters is for the current Module. + if (CRType == autotuning::Other && Remark.RemarkName == Engine.ModuleID) { + Engine.GlobalParams = RemarkToParameterManager(Remark); + continue; + } + if (CRType == autotuning::LLVMParam && + Remark.RemarkName == Engine.ModuleID) { + Engine.LLVMParams = RemarkToStringMap(Remark); + continue; + } + if (CRType == autotuning::ProgramParam && + Remark.RemarkName == Engine.ModuleID) { + Engine.ProgramParams = RemarkToStringMap(Remark); + continue; + } + if (Engine.isThinLTOTuning() && + (CRType == autotuning::CallSite || CRType == autotuning::Loop || + CRType == autotuning::MachineBasicBlock || + CRType == autotuning::Function)) { + LLVM_DEBUG(dbgs() << "AutoTuner does not support tuning of " + << CodeRegion::getTypeAsString(CRType) + << " for thinLTO durning link-time optimization. " + "Ignoring current code region.\n"); + continue; + } + // Create a SourceLocation from a remark. + CodeRegion CR = RemarkToCodeRegion(Remark, Type); + ParameterManager ParamManager = RemarkToParameterManager(Remark); + // Add the CodeRegion-ParameterManager entry into LoopUpTable. + Engine.ParamTable[CR] = ParamManager; + + std::string Filename = CR.getSourceLoc().SourceFilePath; + size_t Pos = Filename.rfind("."); + if (Pos != std::string::npos) + Filename.erase(Pos, Filename.size()); + Engine.OppPassList[Filename].insert(CR.getPassName()); + Engine.CodeRegionFilterTypes.insert(CR.getType()); + } + return Error::success(); +} + +Error AutoTuningRemarkManager::dump(const autotuning::AutoTuningEngine &E, + const std::string &DirName, + const std::string &RemarksFormat, + const std::string &RemarksPasses) { + // Change to absolute path. + SmallString<256> OutputPath = StringRef(DirName); + sys::fs::make_absolute(OutputPath); + + // Make sure the new output directory exists, creating it if necessary. + if (std::error_code EC = sys::fs::create_directories(OutputPath)) { + return make_error<StringError>("could not create directory: " + + Twine(OutputPath) + ": " + EC.message(), + EC); + } + if (!Engine.TuningOpps.empty()) { + StringRef ModelFileName = sys::path::filename(Engine.ModuleID); + sys::path::append(OutputPath, ModelFileName + "." + RemarksFormat); + + int i = 1; // Output file suffix starts from 1. + // Check all exiting xml files xml.1...i and create a new file + // suffix.(i+1). + while (sys::fs::exists(OutputPath)) { + sys::path::remove_filename(OutputPath); + sys::path::append(OutputPath, + ModelFileName + "." + RemarksFormat + "." + Twine(i)); + i += 1; + } + Expected<std::unique_ptr<ToolOutputFile>> RemarksFileOrErr = + emitAutoTuningRemarks(OutputPath, RemarksFormat, RemarksPasses, + Engine.TuningOpps); + if (Error E = RemarksFileOrErr.takeError()) { + return E; + } + + std::unique_ptr<ToolOutputFile> RemarksFile = std::move(*RemarksFileOrErr); + if (RemarksFile) + RemarksFile->keep(); + } + return Error::success(); +} + +#endif diff --git a/llvm/lib/AutoTuner/AutoTuningRemarkStreamer.cpp b/llvm/lib/AutoTuner/AutoTuningRemarkStreamer.cpp new file mode 100644 index 000000000000..0516c055a139 --- /dev/null +++ b/llvm/lib/AutoTuner/AutoTuningRemarkStreamer.cpp @@ -0,0 +1,55 @@ +#if defined(ENABLE_AUTOTUNER) +// ===---------- llvm/AutoTuner/AutoTuningRemarkStreamer.cpp --------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +// Copyright (C) 2017-2022, Huawei Technologies Co., Ltd. All rights reserved. +// +// ===---------------------------------------------------------------------===// +// +// This file contains the implementation of the conversion between AutoTuner +// CodeRegions and serializable remarks::Remark objects. +// +// ===---------------------------------------------------------------------===// + +#include "llvm/AutoTuner/AutoTuningRemarkStreamer.h" + +using namespace llvm; + +// autotuning::CodeRegion -> Remark +remarks::Remark +AutoTuningRemarkStreamer::toRemark(const autotuning::CodeRegion &CR) { + remarks::Remark R; // The result. + R.RemarkType = remarks::Type::AutoTuning; + R.PassName = CR.getPassName(); + R.RemarkName = CR.getName(); + R.FunctionName = CR.getFuncName(); + const autotuning::SourceLocation &Location = CR.getSourceLoc(); + if (Location) + R.Loc = remarks::RemarkLocation{Location.SourceFilePath, + Location.SourceLine, Location.SourceColumn}; + R.CodeRegionType = CR.getTypeAsString(); + R.CodeRegionHash = CR.getHash(); + R.AutoTunerOptions = CR.getAutoTunerOptions(); + R.Invocation = CR.getInvocation(); + R.BaselineConfig = CR.getBaselineConfig(); + return R; +} + +void AutoTuningRemarkStreamer::emit(const autotuning::CodeRegion &CR) { + if (!RS.matchesFilter(CR.getPassName())) + return; + + // First, convert the code region to a remark. + remarks::Remark R = toRemark(CR); + // Then, emit the remark through the serializer. + RS.getSerializer().emit(R); +} + +Error AutoTuningRemarkStreamer::setFilter(StringRef Filter) { + return RS.setFilter(Filter); +} +#endif diff --git a/llvm/lib/AutoTuner/CMakeLists.txt b/llvm/lib/AutoTuner/CMakeLists.txt new file mode 100644 index 000000000000..c618474fe5ae --- /dev/null +++ b/llvm/lib/AutoTuner/CMakeLists.txt @@ -0,0 +1,11 @@ +add_llvm_component_library(LLVMAutoTuner + AutoTuning.cpp + AutoTuningRemarkManager.cpp + AutoTuningRemarkStreamer.cpp + + ADDITIONAL_HEADER_DIRS + ${LLVM_MAIN_INCLUDE_DIR}/llvm/AutoTuner + + LINK_COMPONENTS + Remarks +) \ No newline at end of file diff --git a/llvm/lib/CMakeLists.txt b/llvm/lib/CMakeLists.txt index 283baa6090eb..966137c0f71f 100644 --- a/llvm/lib/CMakeLists.txt +++ b/llvm/lib/CMakeLists.txt @@ -28,6 +28,7 @@ add_subdirectory(Object) add_subdirectory(ObjectYAML) add_subdirectory(Option) add_subdirectory(Remarks) +add_subdirectory(AutoTuner) add_subdirectory(Debuginfod) add_subdirectory(DebugInfo) add_subdirectory(DWP) diff --git a/llvm/lib/CodeGen/CMakeLists.txt b/llvm/lib/CodeGen/CMakeLists.txt index 106571b9061b..9029dc7bb3d9 100644 --- a/llvm/lib/CodeGen/CMakeLists.txt +++ b/llvm/lib/CodeGen/CMakeLists.txt @@ -273,6 +273,7 @@ add_llvm_component_library(LLVMCodeGen LINK_COMPONENTS Analysis + AutoTuner BitReader BitWriter CodeGenTypes diff --git a/llvm/lib/CodeGen/CalcSpillWeights.cpp b/llvm/lib/CodeGen/CalcSpillWeights.cpp index 5a005ba7b414..9dcb3833ab91 100644 --- a/llvm/lib/CodeGen/CalcSpillWeights.cpp +++ b/llvm/lib/CodeGen/CalcSpillWeights.cpp @@ -29,6 +29,24 @@ using namespace llvm; #define DEBUG_TYPE "calcspillweights" +#if defined(ENABLE_AUTOTUNER) +static cl::opt<float> LoopWeight( + "reg-spill-loop-weight", cl::Hidden, + cl::desc( + "Tunable extra weight to what looks like a loop induction variable"), + cl::init(3)); + +static cl::opt<float> RemaWeight( + "reg-spill-rematerialize-weight", cl::Hidden, + cl::desc("Tunable reduced weight giving re-materialize oppotunities"), + cl::init(0.5f)); + +static cl::opt<float> + HintWeight("reg-spill-hint-weight", cl::Hidden, + cl::desc("Tunable weakly boost weight of hinted registers"), + cl::init(1.01f)); +#endif + void VirtRegAuxInfo::calculateSpillWeightsAndHints() { LLVM_DEBUG(dbgs() << "********** Compute Spill Weights **********\n" << "********** Function: " << MF.getName() << '\n'); @@ -252,7 +270,11 @@ float VirtRegAuxInfo::weightCalcHelper(LiveInterval &LI, SlotIndex *Start, // Give extra weight to what looks like a loop induction variable update. if (Writes && IsExiting && LIS.isLiveOutOfMBB(LI, MBB)) +#if defined(ENABLE_AUTOTUNER) + Weight *= LoopWeight; +#else Weight *= 3; +#endif TotalWeight += Weight; } @@ -288,7 +310,11 @@ float VirtRegAuxInfo::weightCalcHelper(LiveInterval &LI, SlotIndex *Start, } // Weakly boost the spill weight of hinted registers. +#if defined(ENABLE_AUTOTUNER) + TotalWeight *= HintWeight; +#else TotalWeight *= 1.01F; +#endif } // If the live interval was already unspillable, leave it that way. @@ -315,7 +341,11 @@ float VirtRegAuxInfo::weightCalcHelper(LiveInterval &LI, SlotIndex *Start, // FIXME: this gets much more complicated once we support non-trivial // re-materialization. if (isRematerializable(LI, LIS, VRM, *MF.getSubtarget().getInstrInfo())) +#if defined(ENABLE_AUTOTUNER) + TotalWeight *= RemaWeight; +#else TotalWeight *= 0.5F; +#endif if (IsLocalSplitArtifact) return normalize(TotalWeight, Start->distance(*End), NumInstr); diff --git a/llvm/lib/CodeGen/MachineBasicBlock.cpp b/llvm/lib/CodeGen/MachineBasicBlock.cpp index 231544494c32..327cd40f86a4 100644 --- a/llvm/lib/CodeGen/MachineBasicBlock.cpp +++ b/llvm/lib/CodeGen/MachineBasicBlock.cpp @@ -37,6 +37,9 @@ #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetMachine.h" #include <algorithm> +#if defined(ENABLE_AUTOTUNER) +#include "llvm/IR/StructuralHash.h" +#endif #include <cmath> using namespace llvm; @@ -1703,6 +1706,39 @@ MachineBasicBlock::livein_iterator MachineBasicBlock::livein_begin() const { return LiveIns.begin(); } +#if defined(ENABLE_AUTOTUNER) +uint64_t MachineBasicBlock::computeStructuralHash() { + return StructuralHash(*this); +} + +void MachineBasicBlock::initCodeRegion() { + std::string BasicBlockName = + ("%bb." + Twine(this->getNumber()) + ":" + this->getName()).str(); + MachineFunction *MF = this->getParent(); + StringRef FuncName = MF->getName(); + + autotuning::CodeRegion CR; + if (!this->empty()) { + const DebugLoc &StartLoc = this->front().getDebugLoc(); + CR = autotuning::CodeRegion(BasicBlockName, FuncName.data(), + autotuning::CodeRegionType::MachineBasicBlock, + StartLoc); + } else { + CR = autotuning::CodeRegion(BasicBlockName, FuncName.data(), + autotuning::CodeRegionType::MachineBasicBlock); + } + // Compute the number of non-debug IR instructions in this MBB. + unsigned NumInstrs = std::distance(this->getFirstNonDebugInstr(), + this->getLastNonDebugInstr()); + CR.setSize(NumInstrs); + // Compute hotness. + autotuning::HotnessType Hotness = MF->getFunction().ATEFunction.getHotness(); + CR.setHotness(Hotness); + + this->setCodeRegion(CR); +} +#endif + MachineBasicBlock::liveout_iterator MachineBasicBlock::liveout_begin() const { const MachineFunction &MF = *getParent(); assert(MF.getProperties().hasProperty( diff --git a/llvm/lib/CodeGen/MachineScheduler.cpp b/llvm/lib/CodeGen/MachineScheduler.cpp index ba5432459d12..caccc9e5fad4 100644 --- a/llvm/lib/CodeGen/MachineScheduler.cpp +++ b/llvm/lib/CodeGen/MachineScheduler.cpp @@ -569,6 +569,12 @@ void MachineSchedulerBase::scheduleRegions(ScheduleDAGInstrs &Scheduler, for (MachineFunction::iterator MBB = MF->begin(), MBBEnd = MF->end(); MBB != MBBEnd; ++MBB) { +#if defined(ENABLE_AUTOTUNER) + // before visiting this MBB + // if AutoTuning is enabled, initialize this MBB for auto-tuning + autotuning::Engine.initContainer(&*MBB, DEBUG_TYPE); +#endif + Scheduler.startBlock(&*MBB); #ifndef NDEBUG @@ -3244,6 +3250,44 @@ void GenericScheduler::initPolicy(MachineBasicBlock::iterator Begin, RegionPolicy.ShouldTrackLaneMasks = false; } +#if defined(ENABLE_AUTOTUNER) + // AUTO-TUNING - Look up for MMB level scheduling direction if AutoTuning is + // enabled + if (autotuning::Engine.isEnabled()) { + MachineBasicBlock &MBB = *Begin->getParent(); + + bool NewForceBottomUp = false; + // Look up from xml file, and overwrite values + bool IsForceBottomUpSet = + MBB.lookUpParams<bool>("ForceBottomUp", NewForceBottomUp); + + bool NewForceForceTopDown = false; + bool IsForceTopDownSet = + MBB.lookUpParams<bool>("ForceTopDown", NewForceForceTopDown); + + assert((!NewForceBottomUp || !NewForceForceTopDown) && + "BottomUp and TopDown cannot both set to true"); + + if (IsForceBottomUpSet) { + RegionPolicy.OnlyBottomUp = NewForceBottomUp; + if (RegionPolicy.OnlyBottomUp) { + RegionPolicy.OnlyTopDown = false; + } + } + + if (IsForceTopDownSet) { + RegionPolicy.OnlyTopDown = NewForceForceTopDown; + if (RegionPolicy.OnlyTopDown) { + RegionPolicy.OnlyBottomUp = false; + } + } + + if (IsForceBottomUpSet || IsForceTopDownSet) { + return; + } + } +#endif + // Check -misched-topdown/bottomup can force or unforce scheduling direction. // e.g. -misched-bottomup=false allows scheduling in both directions. assert((!ForceTopDown || !ForceBottomUp) && diff --git a/llvm/lib/CodeGen/SwitchLoweringUtils.cpp b/llvm/lib/CodeGen/SwitchLoweringUtils.cpp index 36a02d5beb4b..d4ac95d534ed 100644 --- a/llvm/lib/CodeGen/SwitchLoweringUtils.cpp +++ b/llvm/lib/CodeGen/SwitchLoweringUtils.cpp @@ -16,6 +16,9 @@ #include "llvm/CodeGen/MachineJumpTableInfo.h" #include "llvm/CodeGen/TargetLowering.h" #include "llvm/Target/TargetMachine.h" +#if defined(ENABLE_AUTOTUNER) +#include "llvm/AutoTuner/AutoTuning.h" +#endif using namespace llvm; using namespace SwitchCG; @@ -61,7 +64,23 @@ void SwitchCG::SwitchLowering::findJumpTables(CaseClusterVector &Clusters, if (!TLI->areJTsAllowed(SI->getParent()->getParent())) return; +#if defined(ENABLE_AUTOTUNER) + unsigned MinJumpTableEntries = TLI->getMinimumJumpTableEntries(); + // Overwrite MinJumpTableEntries when it is set by Autotuner + if (autotuning::Engine.isEnabled()) { + autotuning::Engine.initContainer(SI->ATESwitchInst.get(), + "switch-lowering"); + + int NewValue = 0; // the int value is set by lookUpParams() + bool Changed = + SI->ATESwitchInst->lookUpParams<int>("MinJumpTableEntries", NewValue); + if (Changed) + MinJumpTableEntries = NewValue; + } +#else const unsigned MinJumpTableEntries = TLI->getMinimumJumpTableEntries(); +#endif + const unsigned SmallNumberOfEntries = MinJumpTableEntries / 2; // Bail if not enough cases. diff --git a/llvm/lib/IR/AsmWriter.cpp b/llvm/lib/IR/AsmWriter.cpp index df753b91ff90..af77e6c2dc4d 100644 --- a/llvm/lib/IR/AsmWriter.cpp +++ b/llvm/lib/IR/AsmWriter.cpp @@ -2602,11 +2602,21 @@ public: void writeAllAttributeGroups(); void printTypeIdentities(); +#if defined(ENABLE_AUTOTUNER) + void printGlobal(const GlobalVariable *GV, bool PrintDeclarationOnly = false); + void printAlias(const GlobalAlias *GA); + void printIFunc(const GlobalIFunc *GI); + void printComdat(const Comdat *C); + void printRequisiteDeclarations(const Function *F); + void printFunction(const Function *F, bool PrintCompleteIR = false, + bool PrintDeclarationOnly = false); +#else void printGlobal(const GlobalVariable *GV); void printAlias(const GlobalAlias *GA); void printIFunc(const GlobalIFunc *GI); void printComdat(const Comdat *C); void printFunction(const Function *F); +#endif void printArgument(const Argument *FA, AttributeSet Attrs); void printBasicBlock(const BasicBlock *BB); void printInstructionLine(const Instruction &I); @@ -3593,15 +3603,26 @@ static void maybePrintComdat(formatted_raw_ostream &Out, Out << ')'; } +#if defined(ENABLE_AUTOTUNER) +void AssemblyWriter::printGlobal(const GlobalVariable *GV, + bool PrintDeclarationOnly) { + if (GV->isMaterializable() && !PrintDeclarationOnly) +#else void AssemblyWriter::printGlobal(const GlobalVariable *GV) { if (GV->isMaterializable()) +#endif Out << "; Materializable\n"; AsmWriterContext WriterCtx(&TypePrinter, &Machine, GV->getParent()); WriteAsOperandInternal(Out, GV, WriterCtx); Out << " = "; +#if defined(ENABLE_AUTOTUNER) + if ((!GV->hasInitializer() || PrintDeclarationOnly) && + GV->hasExternalLinkage()) +#else if (!GV->hasInitializer() && GV->hasExternalLinkage()) +#endif Out << "external "; Out << getLinkageNameWithSpace(GV->getLinkage()); @@ -3619,7 +3640,11 @@ void AssemblyWriter::printGlobal(const GlobalVariable *GV) { Out << (GV->isConstant() ? "constant " : "global "); TypePrinter.print(GV->getValueType(), Out); +#if defined(ENABLE_AUTOTUNER) + if (GV->hasInitializer() && !PrintDeclarationOnly) { +#else if (GV->hasInitializer()) { +#endif Out << ' '; writeOperand(GV->getInitializer(), false); } @@ -3769,12 +3794,102 @@ void AssemblyWriter::printTypeIdentities() { } } +#if defined(ENABLE_AUTOTUNER) +/// printRequisiteDeclarations - Print the declarations of type identities, +/// global variables, functions, and function attribute groups of a function. +void AssemblyWriter::printRequisiteDeclarations(const Function *F) { + // walk through instructions and collect global variables & functions + SmallPtrSet<GlobalVariable *, 8> GVs; + SmallPtrSet<Function *, 8> Functions; + for (const BasicBlock &BB : *F) { + for (const Instruction &I : BB) { + // Check for function + if (const auto *CI = dyn_cast<CallInst>(&I)) { + Function *func = CI->getCalledFunction(); + if (func) + Functions.insert(func); + } + // Check for global variables + for (const Use &U : I.operands()) { + if (GlobalVariable *gv = dyn_cast<GlobalVariable>(U)) + GVs.insert(gv); + if (GEPOperator *gepo = dyn_cast<GEPOperator>(&U)) { + if (GlobalVariable *gv = + dyn_cast<GlobalVariable>(gepo->getPointerOperand())) + GVs.insert(gv); + for (auto it = gepo->idx_begin(), et = gepo->idx_end(); it != et; + ++it) { + if (GlobalVariable *gv = dyn_cast<GlobalVariable>(*it)) + GVs.insert(gv); + } + } + } + } + } + + // print type identities + printTypeIdentities(); + + // print global variables + if (!GVs.empty()) { + Out << '\n'; + for (auto GVit = GVs.begin(), et = GVs.end(); GVit != et; ++GVit) { + // Make backups of some properties. They may be modified for printing. + GlobalValue::LinkageTypes SavedLinkage = (*GVit)->getLinkage(); + GlobalVariable::VisibilityTypes SavedVisibility = + (*GVit)->getVisibility(); + + // modify property if needed + if (!(*GVit)->hasAvailableExternallyLinkage() && + !((*GVit)->getName() == "llvm.global_ctors") && + (*GVit)->hasLocalLinkage()) { + (*GVit)->setLinkage(GlobalValue::ExternalLinkage); + (*GVit)->setVisibility(GlobalValue::HiddenVisibility); + } + + printGlobal(*GVit, true); + Out << '\n'; + + // restore backups + (*GVit)->setLinkage(SavedLinkage); + (*GVit)->setVisibility(SavedVisibility); + } + Out << '\n'; + } + + // print functions + for (auto FuncIt = Functions.begin(), et = Functions.end(); FuncIt != et; + ++FuncIt) { + Out << '\n'; + printFunction(*FuncIt, false, true); + } + + // Write attribute groups. + if (!Machine.as_empty()) { + Out << '\n'; + writeAllAttributeGroups(); + } + Out << '\n'; +} + /// printFunction - Print all aspects of a function. +void AssemblyWriter::printFunction(const Function *F, bool PrintCompleteIR, + bool PrintDeclarationOnly) { + if (PrintCompleteIR && !PrintDeclarationOnly) { + printRequisiteDeclarations(F); + } + if (AnnotationWriter && !PrintDeclarationOnly) + AnnotationWriter->emitFunctionAnnot(F, Out); + + if (F->isMaterializable() && !PrintDeclarationOnly) + Out << "; Materializable\n"; +#else void AssemblyWriter::printFunction(const Function *F) { if (AnnotationWriter) AnnotationWriter->emitFunctionAnnot(F, Out); if (F->isMaterializable()) Out << "; Materializable\n"; +#endif const AttributeList &Attrs = F->getAttributes(); if (Attrs.hasFnAttrs()) { @@ -3792,6 +3907,18 @@ void AssemblyWriter::printFunction(const Function *F) { Out << "; Function Attrs: " << AttrStr << '\n'; } +#if defined(ENABLE_AUTOTUNER) + if (!PrintDeclarationOnly) + Machine.incorporateFunction(F); + + if (F->isDeclaration() || PrintDeclarationOnly) { + Out << "declare"; + if (!PrintDeclarationOnly) { + SmallVector<std::pair<unsigned, MDNode *>, 4> MDs; + F->getAllMetadata(MDs); + printMetadataAttachments(MDs, " "); + } +#else Machine.incorporateFunction(F); if (F->isDeclaration()) { @@ -3799,6 +3926,7 @@ void AssemblyWriter::printFunction(const Function *F) { SmallVector<std::pair<unsigned, MDNode *>, 4> MDs; F->getAllMetadata(MDs); printMetadataAttachments(MDs, " "); +#endif Out << ' '; } else Out << "define "; @@ -3824,7 +3952,11 @@ void AssemblyWriter::printFunction(const Function *F) { Out << '('; // Loop over the arguments, printing them... +#if defined(ENABLE_AUTOTUNER) + if ((F->isDeclaration() && !IsForDebug) || PrintDeclarationOnly) { +#else if (F->isDeclaration() && !IsForDebug) { +#endif // We're only interested in the type here - don't print argument names. for (unsigned I = 0, E = FT->getNumParams(); I != E; ++I) { // Insert commas as we go... the first arg doesn't get a comma @@ -3895,7 +4027,11 @@ void AssemblyWriter::printFunction(const Function *F) { writeOperand(F->getPersonalityFn(), /*PrintType=*/true); } +#if defined(ENABLE_AUTOTUNER) + if (F->isDeclaration() || PrintDeclarationOnly) { +#else if (F->isDeclaration()) { +#endif Out << '\n'; } else { SmallVector<std::pair<unsigned, MDNode *>, 4> MDs; @@ -3913,6 +4049,13 @@ void AssemblyWriter::printFunction(const Function *F) { Out << "}\n"; } +#if defined(ENABLE_AUTOTUNER) + // Output metadata + if (!Machine.mdn_empty() && PrintCompleteIR && !PrintDeclarationOnly) { + Out << '\n'; + writeAllMDNodes(); + } +#endif Machine.purgeFunction(); } @@ -4591,13 +4734,21 @@ void AssemblyWriter::printUseLists(const Function *F) { void Function::print(raw_ostream &ROS, AssemblyAnnotationWriter *AAW, bool ShouldPreserveUseListOrder, +#if defined(ENABLE_AUTOTUNER) + bool IsForDebug, bool PrintCompleteIR) const { +#else bool IsForDebug) const { +#endif SlotTracker SlotTable(this->getParent()); formatted_raw_ostream OS(ROS); AssemblyWriter W(OS, SlotTable, this->getParent(), AAW, IsForDebug, ShouldPreserveUseListOrder); +#if defined(ENABLE_AUTOTUNER) + W.printFunction(this, PrintCompleteIR); +#else W.printFunction(this); +#endif } void BasicBlock::print(raw_ostream &ROS, AssemblyAnnotationWriter *AAW, diff --git a/llvm/lib/IR/CMakeLists.txt b/llvm/lib/IR/CMakeLists.txt index 217fe703dd4e..d44d1eea9f3e 100644 --- a/llvm/lib/IR/CMakeLists.txt +++ b/llvm/lib/IR/CMakeLists.txt @@ -78,6 +78,7 @@ add_llvm_component_library(LLVMCore intrinsics_gen LINK_COMPONENTS + AutoTuner BinaryFormat Demangle Remarks diff --git a/llvm/lib/IR/Function.cpp b/llvm/lib/IR/Function.cpp index 435800d9e5f9..ec2620efac38 100644 --- a/llvm/lib/IR/Function.cpp +++ b/llvm/lib/IR/Function.cpp @@ -70,6 +70,10 @@ #include <cstring> #include <string> +#if defined(ENABLE_AUTOTUNER) +#include "llvm/IR/StructuralHash.h" +#endif + using namespace llvm; using ProfileCount = Function::ProfileCount; @@ -1977,6 +1981,36 @@ std::optional<StringRef> Function::getSectionPrefix() const { return std::nullopt; } +#if defined(ENABLE_AUTOTUNER) +uint64_t AutoTuningEnabledFunction::computeStructuralHash() { + return StructuralHash(*(this->Func)); +} + +void AutoTuningEnabledFunction::initCodeRegion() { + StringRef FuncName = Func->getName(); + StringRef EntryBBName; + autotuning::SourceLocation Loc; + + if (!Func->empty()) + EntryBBName = Func->front().getName(); + else + EntryBBName = StringRef("None"); + + DISubprogram *SubProgram = Func->getSubprogram(); + if (SubProgram) + // Set the column number to 0 because there is no information about + // column number for functions. + Loc = {SubProgram->getFilename().str(), SubProgram->getLine(), 0}; + + autotuning::CodeRegion CR = + autotuning::CodeRegion(EntryBBName.data(), FuncName.data(), + autotuning::CodeRegionType::Function, Loc); + CR.setSize(Func->getInstructionCount()); + CR.setHotness(this->getHotness()); + this->setCodeRegion(CR); +} +#endif + bool Function::nullPointerIsDefined() const { return hasFnAttribute(Attribute::NullPointerIsValid); } diff --git a/llvm/lib/IR/Instructions.cpp b/llvm/lib/IR/Instructions.cpp index cb0ac0f8eae6..e614285df07a 100644 --- a/llvm/lib/IR/Instructions.cpp +++ b/llvm/lib/IR/Instructions.cpp @@ -45,6 +45,9 @@ #include <cstdint> #include <optional> #include <vector> +#if defined(ENABLE_AUTOTUNER) +#include "llvm/IR/StructuralHash.h" +#endif using namespace llvm; @@ -259,6 +262,89 @@ void LandingPadInst::addClause(Constant *Val) { getOperandList()[OpNo] = Val; } +#if defined(ENABLE_AUTOTUNER) +uint64_t AutoTuningEnabledSwitchInst::computeStructuralHash() { + return StructuralHash(*(this->SI)); +} + +void AutoTuningEnabledSwitchInst::initCodeRegion() { + std::string SwitchName; + if (this->SI->hasName()) { + SwitchName = this->SI->getName().str(); + } else { + std::string Str; + llvm::raw_string_ostream RSO(Str); + this->SI->getCondition()->printAsOperand(RSO); + SwitchName = RSO.str(); + } + + autotuning::CodeRegion CR = autotuning::CodeRegion( + SwitchName, this->SI->getFunction()->getName().str(), + autotuning::CodeRegionType::Switch, this->SI->getDebugLoc()); + + unsigned TotalNumInsts = 0; + for (auto Case : SI->cases()) { + const BasicBlock *BB = Case.getCaseSuccessor(); + unsigned NumInsts = std::distance(BB->instructionsWithoutDebug().begin(), + BB->instructionsWithoutDebug().end()); + TotalNumInsts += NumInsts; + } + + CR.setSize(TotalNumInsts); + // Compute hotness. + autotuning::HotnessType Hotness = + this->SI->getFunction()->ATEFunction.getHotness(); + CR.setHotness(Hotness); + + this->setCodeRegion(CR); +} + +uint64_t AutoTuningEnabledCallSite::computeStructuralHash() { + return StructuralHash(*(this->CB)); +} + +void AutoTuningEnabledCallSite::initCodeRegion() { + // Use Caller's name as FuncName and Callee's name as Name of a CodeRegion. + Function *Caller = this->CB->getCaller(); + Function *Callee = this->CB->getCalledFunction(); + if (Caller == nullptr || Callee == nullptr) { + this->setCodeRegion(autotuning::CodeRegion::getInvalidInstance()); + return; + } + + autotuning::SourceLocation SrcLoc; + if (this->CB->getDebugLoc()) { + unsigned int SourceLine = this->CB->getDebugLoc()->getLine(); + // Get modified source line number for current callsite if there is another + // call instruction (to same callee) which has same source line number + // happened due to inlining. + std::optional<unsigned int> LineNum = autotuning::Engine.getCallSiteLoc(CB); + if (LineNum) + SourceLine = *LineNum; + SrcLoc = autotuning::SourceLocation{ + this->CB->getDebugLoc()->getFilename().str(), SourceLine, + this->CB->getDebugLoc()->getColumn()}; + } + + // We are using DebugLoc to distinguish between multiple calls to the same + // callee in a function. It may be possible that these multiple calls have + // same DebugLoc either 1) due to inlining of multiple calls (same callee) + // and callee having more calls, or 2) cloned calls added by previous + // optimizations. We are using 'callee name + it's parent (basic block) name' + // to solve these problems. Additionally we are using modified line number + // for the issue # 1; this will handle the cases where the multiple calls are + // in the same basic block. + autotuning::CodeRegion CR = autotuning::CodeRegion( + Callee->getName().str() + "-" + this->CB->getParent()->getName().str(), + Caller->getName().data(), autotuning::CodeRegionType::CallSite, SrcLoc, + autotuning::DynamicOptions{{"ForceInline", {0, 1}}}); + + CR.setSize(Callee->getInstructionCount()); + CR.setHotness(Caller->ATEFunction.getHotness()); + this->setCodeRegion(CR); +} +#endif + //===----------------------------------------------------------------------===// // CallBase Implementation //===----------------------------------------------------------------------===// diff --git a/llvm/lib/IR/StructuralHash.cpp b/llvm/lib/IR/StructuralHash.cpp index 6ea108d831a1..1583e1c82b3e 100644 --- a/llvm/lib/IR/StructuralHash.cpp +++ b/llvm/lib/IR/StructuralHash.cpp @@ -10,9 +10,23 @@ #include "llvm/IR/Function.h" #include "llvm/IR/GlobalVariable.h" #include "llvm/IR/Module.h" +#if defined(ENABLE_AUTOTUNER) +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/IR/InstrTypes.h" +#include "llvm/IR/Instructions.h" +#include "llvm/Support/CommandLine.h" +#endif using namespace llvm; +#if defined(ENABLE_AUTOTUNER) +// AutoTuner Flag to use callsite Debug Location for hash cacluation. +static cl::opt<bool> HashCallSite( + "hash-prior-to-callsite", cl::init(true), cl::Hidden, + cl::desc("Use function IR prior to a call site to compute the hashcode for" + " the call site")); +#endif + namespace { // Basic hashing mechanism to detect structural change to the IR, used to verify @@ -21,16 +35,81 @@ namespace { class StructuralHashImpl { hash_code Hash; +#if defined(ENABLE_AUTOTUNER) + const uint64_t BLOCK_HEADER_HASH = 45798; +#endif template <typename T> void hash(const T &V) { Hash = hash_combine(Hash, V); } public: StructuralHashImpl() : Hash(4) {} +#if defined(ENABLE_AUTOTUNER) + void update(const MachineBasicBlock &MBB) { + // Update the structural hash when we encounter a new basic block. + // Prevents CodeRegions with different structures, but many empty + // BasicBlocks to have the same structural hash. + if (const BasicBlock *Block = MBB.getBasicBlock()) { + hash(BLOCK_HEADER_HASH); // Block header + for (auto &Inst : *Block) + hash(Inst.getOpcode()); + } + } + + void update(const std::vector<BasicBlock *> BBs) { + // Update the structural hash when we encounter a new basic block. + // Prevents CodeRegions with different structures, but many empty + // BasicBlocks to have the same structural hash. + for (BasicBlock *BB : BBs) { + if (BB == nullptr) + continue; + + hash(BLOCK_HEADER_HASH); // Block header + for (auto &Inst : *BB) + hash(Inst.getOpcode()); + } + } + + void update(const llvm::CallBase &CB) { + StringRef Name = ""; + if (HashCallSite) { + update(*CB.getCaller(), std::addressof(CB)); + } else { + const Function &F = *CB.getCaller(); + Name = F.getName(); + std::string FileName = Name.str(); + for (uint64_t Idx = 0; Idx < Name.size(); Idx = Idx + sizeof(uint64_t)) { + uint64_t Value = 0; + FileName.copy((char *)&Value, sizeof(uint64_t), Idx); + hash(Value); + } + } + + update(*CB.getCalledFunction()); + } + + void update(const SwitchInst &SI) { + hash(SI.getNumCases()); + for (auto Case : SI.cases()) { + hash(BLOCK_HEADER_HASH); + const BasicBlock *BB = Case.getCaseSuccessor(); + for (auto &Inst : *BB) + hash(Inst.getOpcode()); + } + } + + void update(const Function &F, const CallBase *TargetCB = nullptr) { + if (F.isDeclaration()) + return; + + const Instruction *I = + TargetCB ? (dyn_cast<Instruction>(TargetCB)) : nullptr; +#else void update(const Function &F) { // Declarations don't affect analyses. if (F.isDeclaration()) return; +#endif hash(12345); // Function header @@ -44,9 +123,18 @@ public: VisitedBBs.insert(BBs[0]); while (!BBs.empty()) { const BasicBlock *BB = BBs.pop_back_val(); +#if defined(ENABLE_AUTOTUNER) + hash(BLOCK_HEADER_HASH); // Block header + for (auto &Inst : *BB) { + hash(Inst.getOpcode()); + if (I && Inst.isIdenticalTo(I)) + return; + } +#else hash(45798); // Block header for (auto &Inst : *BB) hash(Inst.getOpcode()); +#endif const Instruction *Term = BB->getTerminator(); for (unsigned i = 0, e = Term->getNumSuccessors(); i != e; ++i) { @@ -79,6 +167,32 @@ public: } // namespace +#if defined(ENABLE_AUTOTUNER) +uint64_t llvm::StructuralHash(const MachineBasicBlock &MBB) { + StructuralHashImpl H; + H.update(MBB); + return H.getHash(); +} + +uint64_t llvm::StructuralHash(const std::vector<BasicBlock *> BBs) { + StructuralHashImpl H; + H.update(BBs); + return H.getHash(); +} + +uint64_t llvm::StructuralHash(const CallBase &CB) { + StructuralHashImpl H; + H.update(CB); + return H.getHash(); +} + +uint64_t llvm::StructuralHash(const SwitchInst &SI) { + StructuralHashImpl H; + H.update(SI); + return H.getHash(); +} +#endif + uint64_t llvm::StructuralHash(const Function &F) { StructuralHashImpl H; H.update(F); diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp index d0cbbcc0e310..a3ccbc6d258f 100644 --- a/llvm/lib/Passes/PassBuilder.cpp +++ b/llvm/lib/Passes/PassBuilder.cpp @@ -262,6 +262,11 @@ #include "llvm/Transforms/Vectorize/VectorCombine.h" #include <optional> +#if defined(ENABLE_AUTOTUNER) +#include "llvm/Analysis/AutotuningDump.h" +#include "llvm/Transforms/Scalar/AutoTuningCompile.h" +#endif + using namespace llvm; static const Regex DefaultAliasRegex( diff --git a/llvm/lib/Passes/PassBuilderPipelines.cpp b/llvm/lib/Passes/PassBuilderPipelines.cpp index 660cb2e974d7..8009e011833c 100644 --- a/llvm/lib/Passes/PassBuilderPipelines.cpp +++ b/llvm/lib/Passes/PassBuilderPipelines.cpp @@ -133,6 +133,11 @@ #include "llvm/Transforms/Vectorize/SLPVectorizer.h" #include "llvm/Transforms/Vectorize/VectorCombine.h" +#if defined(ENABLE_AUTOTUNER) +#include "llvm/AutoTuner/AutoTuning.h" +#include "llvm/Transforms/Scalar/AutoTuningCompile.h" +#endif + using namespace llvm; static cl::opt<InliningAdvisorMode> UseInlineAdvisor( @@ -289,6 +294,10 @@ PipelineTuningOptions::PipelineTuningOptions() { EagerlyInvalidateAnalyses = EnableEagerlyInvalidateAnalyses; } +#if defined(ENABLE_AUTOTUNER) +extern cl::opt<AutoTuningCompileOpt> AutoTuningCompileMode; +#endif + namespace llvm { extern cl::opt<unsigned> MaxDevirtIterations; extern cl::opt<bool> EnableKnowledgeRetention; @@ -452,9 +461,17 @@ PassBuilder::buildO1FunctionSimplificationPipeline(OptimizationLevel Level, // attention to it. if (Phase != ThinOrFullLTOPhase::ThinLTOPreLink || !PGOOpt || PGOOpt->Action != PGOOptions::SampleUse) +#if defined(ENABLE_AUTOTUNER) + { + if (AutoTuningCompileMode) + LPM2.addPass(AutoTuningCompileLoopPass(autotuning::CompileOptionUnroll)); +#endif LPM2.addPass(LoopFullUnrollPass(Level.getSpeedupLevel(), /* OnlyWhenForced= */ !PTO.LoopUnrolling, PTO.ForgetAllSCEVInLoopUnroll)); +#if defined(ENABLE_AUTOTUNER) + } +#endif invokeLoopOptimizerEndEPCallbacks(LPM2, Level); @@ -631,9 +648,17 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level, // attention to it. if (Phase != ThinOrFullLTOPhase::ThinLTOPreLink || !PGOOpt || PGOOpt->Action != PGOOptions::SampleUse) +#if defined(ENABLE_AUTOTUNER) + { + if (AutoTuningCompileMode) + LPM2.addPass(AutoTuningCompileLoopPass(autotuning::CompileOptionUnroll)); +#endif LPM2.addPass(LoopFullUnrollPass(Level.getSpeedupLevel(), /* OnlyWhenForced= */ !PTO.LoopUnrolling, PTO.ForgetAllSCEVInLoopUnroll)); +#if defined(ENABLE_AUTOTUNER) + } +#endif invokeLoopOptimizerEndEPCallbacks(LPM2, Level); @@ -1110,6 +1135,11 @@ PassBuilder::buildModuleSimplificationPipeline(OptimizationLevel Level, if (EnableSyntheticCounts && !PGOOpt) MPM.addPass(SyntheticCountsPropagation()); +#if defined(ENABLE_AUTOTUNER) + if (AutoTuningCompileMode) + MPM.addPass(AutoTuningCompileModulePass(autotuning::CompileOptionInline)); +#endif + if (EnableModuleInliner) MPM.addPass(buildModuleInlinerPipeline(Level, Phase)); else @@ -1131,6 +1161,12 @@ PassBuilder::buildModuleSimplificationPipeline(OptimizationLevel Level, /// TODO: Should LTO cause any differences to this set of passes? void PassBuilder::addVectorPasses(OptimizationLevel Level, FunctionPassManager &FPM, bool IsFullLTO) { +#if defined(ENABLE_AUTOTUNER) + if (AutoTuningCompileMode && !IsFullLTO) + FPM.addPass( + AutoTuningCompileFunctionPass(autotuning::CompileOptionVectorize)); +#endif + FPM.addPass(LoopVectorizePass( LoopVectorizeOptions(!PTO.LoopInterleaving, !PTO.LoopVectorization))); @@ -1444,6 +1480,10 @@ PassBuilder::buildPerModuleDefaultPipeline(OptimizationLevel Level, return buildO0DefaultPipeline(Level, LTOPreLink); ModulePassManager MPM; +#if defined(ENABLE_AUTOTUNER) + if (AutoTuningCompileMode) + MPM.addPass(AutoTuningCompileModulePass(autotuning::CompileOptionStart)); +#endif // Convert @llvm.global.annotations to !annotation metadata. MPM.addPass(Annotation2MetadataPass()); @@ -1475,6 +1515,12 @@ PassBuilder::buildPerModuleDefaultPipeline(OptimizationLevel Level, if (LTOPreLink) addRequiredLTOPreLinkPasses(MPM); + +#if defined(ENABLE_AUTOTUNER) + if (AutoTuningCompileMode) + MPM.addPass(AutoTuningCompileModulePass(autotuning::CompileOptionEnd)); +#endif + return MPM; } diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def index e10dc995c493..45a539f14b93 100644 --- a/llvm/lib/Passes/PassRegistry.def +++ b/llvm/lib/Passes/PassRegistry.def @@ -29,6 +29,10 @@ MODULE_ANALYSIS("pass-instrumentation", PassInstrumentationAnalysis(PIC)) MODULE_ANALYSIS("inline-advisor", InlineAdvisorAnalysis()) MODULE_ANALYSIS("ir-similarity", IRSimilarityAnalysis()) +#if defined(ENABLE_AUTOTUNER) +MODULE_ANALYSIS("autotuning-dump", AutotuningDumpAnalysis()) +#endif + #ifndef MODULE_ALIAS_ANALYSIS #define MODULE_ALIAS_ANALYSIS(NAME, CREATE_PASS) \ MODULE_ANALYSIS(NAME, CREATE_PASS) @@ -127,6 +131,9 @@ MODULE_PASS("sanmd-module", SanitizerBinaryMetadataPass()) MODULE_PASS("memprof-module", ModuleMemProfilerPass()) MODULE_PASS("poison-checking", PoisonCheckingPass()) MODULE_PASS("pseudo-probe-update", PseudoProbeUpdatePass()) +#if defined(ENABLE_AUTOTUNER) +MODULE_PASS("autotuning-compile-module", AutoTuningCompileModulePass()) +#endif #undef MODULE_PASS #ifndef MODULE_PASS_WITH_PARAMS @@ -430,6 +437,9 @@ FUNCTION_PASS("transform-warning", WarnMissedTransformationsPass()) FUNCTION_PASS("tsan", ThreadSanitizerPass()) FUNCTION_PASS("memprof", MemProfilerPass()) FUNCTION_PASS("declare-to-assign", llvm::AssignmentTrackingPass()) +#if defined(ENABLE_AUTOTUNER) +FUNCTION_PASS("autotuning-compile-function", AutoTuningCompileFunctionPass()) +#endif #undef FUNCTION_PASS #ifndef FUNCTION_PASS_WITH_PARAMS @@ -614,6 +624,9 @@ LOOP_PASS("guard-widening", GuardWideningPass()) LOOP_PASS("loop-bound-split", LoopBoundSplitPass()) LOOP_PASS("loop-reroll", LoopRerollPass()) LOOP_PASS("loop-versioning-licm", LoopVersioningLICMPass()) +#if defined(ENABLE_AUTOTUNER) +LOOP_PASS("autotuning-compile-loop", AutoTuningCompileLoopPass()) +#endif #undef LOOP_PASS #ifndef LOOP_PASS_WITH_PARAMS diff --git a/llvm/lib/Passes/StandardInstrumentations.cpp b/llvm/lib/Passes/StandardInstrumentations.cpp index 7eef511928ec..8653027ceed2 100644 --- a/llvm/lib/Passes/StandardInstrumentations.cpp +++ b/llvm/lib/Passes/StandardInstrumentations.cpp @@ -41,6 +41,10 @@ #include <unordered_set> #include <utility> #include <vector> +#if defined(ENABLE_AUTOTUNER) +#include "llvm/AutoTuner/AutoTuning.h" +#include "llvm/Transforms/Scalar/AutoTuningCompile.h" +#endif using namespace llvm; @@ -107,6 +111,10 @@ static cl::opt<bool> PrintOnCrash( cl::desc("Print the last form of the IR before crash (use -print-on-crash-path to dump to a file)"), cl::Hidden); +#if defined(ENABLE_AUTOTUNER) +extern cl::opt<AutoTuningCompileOpt> AutoTuningCompileMode; +#endif + static cl::opt<std::string> OptBisectPrintIRPath( "opt-bisect-print-ir-path", cl::desc("Print IR to path when opt-bisect-limit is reached"), cl::Hidden); @@ -874,6 +882,21 @@ bool OptPassGateInstrumentation::shouldRun(StringRef PassName, Any IR) { void OptPassGateInstrumentation::registerCallbacks( PassInstrumentationCallbacks &PIC) { +#if defined(ENABLE_AUTOTUNER) + // Using AutoTuner OptBisect to change the behavior of compilation pipeline. + // Flag 'opt-bisect-limit' will be preferred if both 'opt-bisect-limit' and + // incremental compilation flags are used. + if (autotuning::Engine.isParseInput() && AutoTuningCompileMode) { + if (!getAutoTuningOptPassGate().isEnabled()) + return; + + PIC.registerShouldRunOptionalPassCallback([](StringRef PassID, Any IR) { + return isIgnored(PassID) || + getAutoTuningOptPassGate().checkPass(PassID, getIRName(IR)); + }); + return; + } +#endif OptPassGate &PassGate = Context.getOptPassGate(); if (!PassGate.isEnabled()) return; diff --git a/llvm/lib/Remarks/BitstreamRemarkSerializer.cpp b/llvm/lib/Remarks/BitstreamRemarkSerializer.cpp index b2627196bce6..b1dfa9d0f2cf 100644 --- a/llvm/lib/Remarks/BitstreamRemarkSerializer.cpp +++ b/llvm/lib/Remarks/BitstreamRemarkSerializer.cpp @@ -277,6 +277,14 @@ void BitstreamRemarkSerializerHelper::emitRemarkBlock(const Remark &Remark, R.push_back(StrTab.add(Remark.RemarkName).first); R.push_back(StrTab.add(Remark.PassName).first); R.push_back(StrTab.add(Remark.FunctionName).first); +#if defined(ENABLE_AUTOTUNER) + if (Remark.CodeRegionType) + R.push_back(StrTab.add(*Remark.CodeRegionType).first); + if (std::optional<uint64_t> hash = Remark.CodeRegionHash) + R.push_back(*hash); + if (std::optional<unsigned int> Invocation = Remark.Invocation) + R.push_back(*Invocation); +#endif Bitstream.EmitRecordWithAbbrev(RecordRemarkHeaderAbbrevID, R); if (const std::optional<RemarkLocation> &Loc = Remark.Loc) { diff --git a/llvm/lib/Remarks/RemarkStreamer.cpp b/llvm/lib/Remarks/RemarkStreamer.cpp index 9f4676ce37ab..d1faf4f1553a 100644 --- a/llvm/lib/Remarks/RemarkStreamer.cpp +++ b/llvm/lib/Remarks/RemarkStreamer.cpp @@ -14,6 +14,10 @@ #include "llvm/Support/CommandLine.h" #include <optional> +#if defined(ENABLE_AUTOTUNER) +#include "llvm/IR/DebugInfoMetadata.h" +#endif + using namespace llvm; using namespace llvm::remarks; diff --git a/llvm/lib/Remarks/YAMLRemarkParser.cpp b/llvm/lib/Remarks/YAMLRemarkParser.cpp index f5123b0f64ce..baa393c6a619 100644 --- a/llvm/lib/Remarks/YAMLRemarkParser.cpp +++ b/llvm/lib/Remarks/YAMLRemarkParser.cpp @@ -17,10 +17,23 @@ #include "llvm/Support/Endian.h" #include "llvm/Support/Path.h" #include <optional> +#if defined(ENABLE_AUTOTUNER) +#include "llvm/Support/CommandLine.h" +#endif using namespace llvm; using namespace llvm::remarks; +#if defined(ENABLE_AUTOTUNER) +// Creating code regions without meta data (e.g. debug Location, Function Name, +// etc.). +// This flag is added here instead of 'lib/AutoTuner/AutoTuning.cpp' to avoid +// making LLVMRemarks dependent on LLVMCore. +cl::opt<bool> OmitAutotuningMetadata( + "auto-tuning-omit-metadata", cl::Hidden, cl::init(false), + cl::desc("Include only code region hashes and types in opportunity files")); +#endif + char YAMLParseError::ID = 0; static void handleDiagnostic(const SMDiagnostic &Diag, void *Ctx) { @@ -235,6 +248,23 @@ YAMLRemarkParser::parseRemark(yaml::Document &RemarkEntry) { TheRemark.FunctionName = *MaybeStr; else return MaybeStr.takeError(); +#if defined(ENABLE_AUTOTUNER) + } else if (KeyName == "CodeRegionType") { + if (Expected<StringRef> MaybeStr = parseStr(RemarkField)) + TheRemark.CodeRegionType = *MaybeStr; + else + return MaybeStr.takeError(); + } else if (KeyName == "CodeRegionHash") { + if (Expected<uint64_t> MaybeULL = parseUnsignedLL(RemarkField)) + TheRemark.CodeRegionHash = *MaybeULL; + else + return MaybeULL.takeError(); + } else if (KeyName == "Invocation") { + if (Expected<unsigned int> MaybeULL = parseUnsignedLL(RemarkField)) + TheRemark.Invocation = *MaybeULL; + else + return MaybeULL.takeError(); +#endif } else if (KeyName == "Hotness") { if (Expected<unsigned> MaybeU = parseUnsigned(RemarkField)) TheRemark.Hotness = *MaybeU; @@ -261,11 +291,35 @@ YAMLRemarkParser::parseRemark(yaml::Document &RemarkEntry) { } } +#if defined(ENABLE_AUTOTUNER) + // Check if any of the mandatory fields are missing. + if (TheRemark.RemarkType == Type::AutoTuning) { + // We expect type, and pass to be present at least. + if (!TheRemark.CodeRegionType || TheRemark.PassName.empty()) + return error("CodeRegionHash, CodeRegionType, or Pass missing.", + *RemarkEntry.getRoot()); + + // Sanity check for the correct command line option. + if (!OmitAutotuningMetadata && TheRemark.RemarkName.empty()) + return error("Remark Name expected; enable -autotuning-omit-metadata.", + *RemarkEntry.getRoot()); + + if (!OmitAutotuningMetadata && TheRemark.FunctionName.empty()) + return error( + "Remark Function Name expected; enable -autotuning-omit-metadata.", + *RemarkEntry.getRoot()); + } else if (TheRemark.RemarkType == Type::Unknown || + TheRemark.PassName.empty() || TheRemark.RemarkName.empty() || + TheRemark.FunctionName.empty()) + return error("Type, Pass, Name or Function missing.", + *RemarkEntry.getRoot()); +#else // Check if any of the mandatory fields are missing. if (TheRemark.RemarkType == Type::Unknown || TheRemark.PassName.empty() || TheRemark.RemarkName.empty() || TheRemark.FunctionName.empty()) return error("Type, Pass, Name or Function missing.", *RemarkEntry.getRoot()); +#endif return std::move(Result); } @@ -277,6 +331,9 @@ Expected<Type> YAMLRemarkParser::parseType(yaml::MappingNode &Node) { .Case("!Analysis", remarks::Type::Analysis) .Case("!AnalysisFPCommute", remarks::Type::AnalysisFPCommute) .Case("!AnalysisAliasing", remarks::Type::AnalysisAliasing) +#if defined(ENABLE_AUTOTUNER) + .Case("!AutoTuning", remarks::Type::AutoTuning) +#endif .Case("!Failure", remarks::Type::Failure) .Default(remarks::Type::Unknown); if (Type == remarks::Type::Unknown) @@ -313,6 +370,31 @@ Expected<StringRef> YAMLRemarkParser::parseStr(yaml::KeyValueNode &Node) { return Result; } +#if defined(ENABLE_AUTOTUNER) +Expected<std::vector<StringRef>> +YAMLRemarkParser::parseStrVector(yaml::KeyValueNode &Node) { + std::vector<StringRef> Result; + auto *SequenceNode = dyn_cast<yaml::SequenceNode>(Node.getValue()); + if (!SequenceNode) + return error("expected a value of sequence type.", Node); + + for (yaml::Node &Element : *SequenceNode) { + auto *ScalarNode = dyn_cast<yaml::ScalarNode>(&Element); + if (!ScalarNode) + return error("expected a value of scalar type.", Element); + else { + StringRef Str = ScalarNode->getRawValue(); + if (Str.front() == '\'') + Str = Str.drop_front(); + if (Str.back() == '\'') + Str = Str.drop_back(); + Result.push_back(Str); + } + } + return Result; +} +#endif + Expected<unsigned> YAMLRemarkParser::parseUnsigned(yaml::KeyValueNode &Node) { SmallVector<char, 4> Tmp; auto *Value = dyn_cast<yaml::ScalarNode>(Node.getValue()); @@ -324,6 +406,19 @@ Expected<unsigned> YAMLRemarkParser::parseUnsigned(yaml::KeyValueNode &Node) { return UnsignedValue; } +#if defined(ENABLE_AUTOTUNER) +Expected<uint64_t> YAMLRemarkParser::parseUnsignedLL(yaml::KeyValueNode &Node) { + SmallVector<char, 4> Tmp; + if (auto *Value = dyn_cast<yaml::ScalarNode>(Node.getValue())) { + uint64_t UnsignedValue = 0; + if (Value->getValue(Tmp).getAsInteger(10, UnsignedValue)) + return error("expected a value of integer type.", *Value); + return UnsignedValue; + } + return error("expected a value of scalar type.", Node); +} +#endif + Expected<RemarkLocation> YAMLRemarkParser::parseDebugLoc(yaml::KeyValueNode &Node) { auto *DebugLoc = dyn_cast<yaml::MappingNode>(Node.getValue()); @@ -374,6 +469,9 @@ Expected<Argument> YAMLRemarkParser::parseArg(yaml::Node &Node) { std::optional<StringRef> KeyStr; std::optional<StringRef> ValueStr; +#if defined(ENABLE_AUTOTUNER) + std::optional<std::vector<StringRef>> ValueStrVector; +#endif std::optional<RemarkLocation> Loc; for (yaml::KeyValueNode &ArgEntry : *ArgMap) { @@ -400,11 +498,27 @@ Expected<Argument> YAMLRemarkParser::parseArg(yaml::Node &Node) { if (ValueStr) return error("only one string entry is allowed per argument.", ArgEntry); +#if defined(ENABLE_AUTOTUNER) + // Try to parse the value to a string vector. + if (Expected<std::vector<StringRef>> MaybeStrVector = + parseStrVector(ArgEntry)) { + ValueStrVector = *MaybeStrVector; + ValueStr = ""; + } else { + consumeError(MaybeStrVector.takeError()); + // Try to parse the value. + if (Expected<StringRef> MaybeStr = parseStr(ArgEntry)) + ValueStr = *MaybeStr; + else + return MaybeStr.takeError(); + } +#else // Try to parse the value. if (Expected<StringRef> MaybeStr = parseStr(ArgEntry)) ValueStr = *MaybeStr; else return MaybeStr.takeError(); +#endif // Keep the key from the string. KeyStr = KeyName; @@ -412,10 +526,18 @@ Expected<Argument> YAMLRemarkParser::parseArg(yaml::Node &Node) { if (!KeyStr) return error("argument key is missing.", *ArgMap); +#if defined(ENABLE_AUTOTUNER) + if (!ValueStr && !ValueStrVector) +#else if (!ValueStr) +#endif return error("argument value is missing.", *ArgMap); +#if defined(ENABLE_AUTOTUNER) + return Argument{*KeyStr, *ValueStr, ValueStrVector, Loc}; +#else return Argument{*KeyStr, *ValueStr, Loc}; +#endif } Expected<std::unique_ptr<Remark>> YAMLRemarkParser::next() { diff --git a/llvm/lib/Remarks/YAMLRemarkParser.h b/llvm/lib/Remarks/YAMLRemarkParser.h index 8ef72e16be74..141f10dd3900 100644 --- a/llvm/lib/Remarks/YAMLRemarkParser.h +++ b/llvm/lib/Remarks/YAMLRemarkParser.h @@ -91,6 +91,12 @@ protected: Expected<RemarkLocation> parseDebugLoc(yaml::KeyValueNode &Node); /// Parse an argument. Expected<Argument> parseArg(yaml::Node &Node); +#if defined(ENABLE_AUTOTUNER) + /// parse a vector of strings. + Expected<std::vector<StringRef>> parseStrVector(yaml::KeyValueNode &Node); + /// Parse one value to an unsigned long long. + Expected<uint64_t> parseUnsignedLL(yaml::KeyValueNode &Node); +#endif }; /// YAML with a string table to Remark parser. diff --git a/llvm/lib/Remarks/YAMLRemarkSerializer.cpp b/llvm/lib/Remarks/YAMLRemarkSerializer.cpp index 68285c3dde1b..1bc0f23f9221 100644 --- a/llvm/lib/Remarks/YAMLRemarkSerializer.cpp +++ b/llvm/lib/Remarks/YAMLRemarkSerializer.cpp @@ -15,10 +15,45 @@ #include "llvm/Remarks/Remark.h" #include "llvm/Support/FileSystem.h" #include <optional> +#if defined(ENABLE_AUTOTUNER) +#include "llvm/Support/CommandLine.h" +#endif using namespace llvm; using namespace llvm::remarks; +#if defined(ENABLE_AUTOTUNER) +extern cl::opt<bool> OmitAutotuningMetadata; + +// Use the same keys whether we use a string table or not (respectively, T is an +// unsigned or a StringRef). +template <typename T> +static void mapRemarkHeader( + yaml::IO &io, T PassName, T RemarkName, std::optional<RemarkLocation> RL, + T FunctionName, std::optional<StringRef> CodeRegionType, + std::optional<uint64_t> CodeRegionHash, + std::optional<unsigned int> Invocation, + std::optional<std::map<std::string, std::string>> BaselineConfig, + std::optional<std::map<std::string, std::vector<unsigned int>>> + AutoTunerOptions, + std::optional<uint64_t> Hotness, ArrayRef<Argument> Args) { + io.mapRequired("Pass", PassName); + if (!OmitAutotuningMetadata) { + io.mapRequired("Name", RemarkName); + io.mapOptional("DebugLoc", RL); + io.mapRequired("Function", FunctionName); + } + io.mapOptional("CodeRegionType", CodeRegionType); + io.mapOptional("CodeRegionHash", CodeRegionHash); + io.mapOptional("DynamicConfigs", AutoTunerOptions); + io.mapOptional("BaselineConfig", BaselineConfig); + io.mapOptional("Invocation", Invocation); + if (!OmitAutotuningMetadata) { + io.mapOptional("Hotness", Hotness); + io.mapOptional("Args", Args); + } +} +#else // Use the same keys whether we use a string table or not (respectively, T is an // unsigned or a StringRef). template <typename T> @@ -33,6 +68,7 @@ static void mapRemarkHeader(yaml::IO &io, T PassName, T RemarkName, io.mapOptional("Hotness", Hotness); io.mapOptional("Args", Args); } +#endif namespace llvm { namespace yaml { @@ -53,6 +89,10 @@ template <> struct MappingTraits<remarks::Remark *> { else if (io.mapTag("!AnalysisAliasing", (Remark->RemarkType == Type::AnalysisAliasing))) ; +#if defined(ENABLE_AUTOTUNER) + else if (io.mapTag("!AutoTuning", (Remark->RemarkType == Type::AutoTuning))) + ; +#endif else if (io.mapTag("!Failure", (Remark->RemarkType == Type::Failure))) ; else @@ -66,14 +106,58 @@ template <> struct MappingTraits<remarks::Remark *> { unsigned NameID = StrTab.add(Remark->RemarkName).first; unsigned FunctionID = StrTab.add(Remark->FunctionName).first; mapRemarkHeader(io, PassID, NameID, Remark->Loc, FunctionID, +#if defined(ENABLE_AUTOTUNER) + Remark->CodeRegionType, Remark->CodeRegionHash, + Remark->Invocation, Remark->BaselineConfig, + Remark->AutoTunerOptions, Remark->Hotness, Remark->Args); + +#else Remark->Hotness, Remark->Args); +#endif } else { mapRemarkHeader(io, Remark->PassName, Remark->RemarkName, Remark->Loc, +#if defined(ENABLE_AUTOTUNER) + Remark->FunctionName, Remark->CodeRegionType, + Remark->CodeRegionHash, Remark->Invocation, + Remark->BaselineConfig, Remark->AutoTunerOptions, + Remark->Hotness, Remark->Args); +#else Remark->FunctionName, Remark->Hotness, Remark->Args); +#endif } } }; +#if defined(ENABLE_AUTOTUNER) +// YAML I/O to support dumping 'Values: { key: [...], ... }' in opportunity +// files. +template <> +struct MappingTraits<std::map<std::string, std::vector<unsigned int>>> { + static void mapping(IO &io, + std::map<std::string, std::vector<unsigned int>> &OM) { + assert(io.outputting() && "input not yet implemented"); + + // Print as an abbreviated dictionary + llvm::yaml::StdMapStringCustomMappingTraitsImpl< + std::vector<unsigned int>>::output(io, OM); + } + // This sets the beginFlowMapping and endFlowMapping + static const bool flow = true; +}; + +template <> struct MappingTraits<std::map<std::string, std::string>> { + static void mapping(IO &io, std::map<std::string, std::string> &OM) { + assert(io.outputting() && "input not yet implemented"); + + // Print as an abbreviated dictionary + llvm::yaml::StdMapStringCustomMappingTraitsImpl<std::string>::output(io, + OM); + } + // This sets the beginFlowMapping and endFlowMapping + static const bool flow = true; +}; +#endif + template <> struct MappingTraits<RemarkLocation> { static void mapping(IO &io, RemarkLocation &RL) { assert(io.outputting() && "input not yet implemented"); diff --git a/llvm/lib/Support/CommandLine.cpp b/llvm/lib/Support/CommandLine.cpp index d3efb8b67be5..b66415c0e9a9 100644 --- a/llvm/lib/Support/CommandLine.cpp +++ b/llvm/lib/Support/CommandLine.cpp @@ -127,6 +127,9 @@ static inline bool isPrefixedOrGrouping(const Option *O) { O->getFormattingFlag() == cl::AlwaysPrefix; } +#if defined(ENABLE_AUTOTUNER) +#include <map> +#endif namespace { @@ -1470,6 +1473,44 @@ bool cl::ParseCommandLineOptions(int argc, const char *const *argv, Errs, LongOptionsUseDoubleDash); } +#if defined(ENABLE_AUTOTUNER) +bool cl::ParseAutoTunerOptions( + std::unordered_map<std::string, std::string> LLVMParams, + std::unordered_map<std::string, std::string> ProgramParams, + StringRef Overview, raw_ostream *Errs, const char *EnvVar, + bool LongOptionsUseDoubleDash) { + SmallVector<const char *, 20> NewArgv; + BumpPtrAllocator A; + StringSaver Saver(A); + // GlobalParser requires arguments similar to C style command line options + // (int argc, char * argv[]) where argv[0] refers to the program name. + // We are using a fake program name here which is consistent with LLVM. + NewArgv.push_back("AutoTuner (LLVM option parsing)"); + + for (const auto &I : LLVMParams) { + std::string NewOption = I.first + "=" + I.second; + NewArgv.push_back(Saver.save(NewOption).data()); + } + + for (const auto &I : ProgramParams) { + std::string NewOption = I.first + "=" + I.second; + NewArgv.push_back(Saver.save(NewOption).data()); + } + + // Parse options from environment variable. + if (EnvVar) { + if (std::optional<std::string> EnvValue = + sys::Process::GetEnv(StringRef(EnvVar))) + TokenizeGNUCommandLine(*EnvValue, Saver, NewArgv); + } + + int NewArgc = static_cast<int>(NewArgv.size()); + // Parse all options. + return GlobalParser->ParseCommandLineOptions(NewArgc, &NewArgv[0], Overview, + Errs, LongOptionsUseDoubleDash); +} +#endif + /// Reset all options at least once, so that we can parse different options. void CommandLineParser::ResetAllOptionOccurrences() { // Reset all option values to look like they have never been seen before. diff --git a/llvm/lib/Transforms/IPO/CMakeLists.txt b/llvm/lib/Transforms/IPO/CMakeLists.txt index 034f1587ae8d..3507d357a4c6 100644 --- a/llvm/lib/Transforms/IPO/CMakeLists.txt +++ b/llvm/lib/Transforms/IPO/CMakeLists.txt @@ -57,6 +57,7 @@ add_llvm_component_library(LLVMipo LINK_COMPONENTS AggressiveInstCombine Analysis + AutoTuner BitReader BitWriter Core diff --git a/llvm/lib/Transforms/IPO/Inliner.cpp b/llvm/lib/Transforms/IPO/Inliner.cpp index 3e00aebce372..802667819c44 100644 --- a/llvm/lib/Transforms/IPO/Inliner.cpp +++ b/llvm/lib/Transforms/IPO/Inliner.cpp @@ -64,6 +64,9 @@ #include <functional> #include <utility> #include <vector> +#if defined(ENABLE_AUTOTUNER) +#include "llvm/AutoTuner/AutoTuning.h" +#endif using namespace llvm; @@ -298,6 +301,27 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC, // be deleted as a batch after inlining. SmallVector<Function *, 4> DeadFunctionsInComdats; +#if defined(ENABLE_AUTOTUNER) + bool IsAutoTunerEnabled = + autotuning::Engine.isEnabled() && + autotuning::Engine.isTuningAllowedForType(autotuning::CallSite); + if (IsAutoTunerEnabled) { + SmallVector<std::pair<CallBase *, int>, 16> CallsCopy = Calls; + for (int I = 0; I < (int)CallsCopy.size(); ++I) { + CallBase &CB = *CallsCopy[I].first; + DebugLoc DLoc = CB.getDebugLoc(); + if (!CB.getCaller() || !CB.getCalledFunction() || !DLoc) + continue; + autotuning::CallSiteLocation Loc = autotuning::CallSiteLocation{ + &CB, CB.getCaller(), CB.getCalledFunction(), + autotuning::SourceLocation{DLoc->getFilename().str(), DLoc->getLine(), + DLoc->getColumn()}}; + autotuning::Engine.insertCallSiteLoc(Loc); + } + autotuning::Engine.cleanCallSiteLoc(); + } +#endif + // Loop forward over all of the calls. Note that we cannot cache the size as // inlining can introduce new calls that need to be processed. for (int I = 0; I < (int)Calls.size(); ++I) { @@ -412,6 +436,13 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC, if (NewCallee) { if (!NewCallee->isDeclaration()) { Calls.push_back({ICB, NewHistoryID}); +#if defined(ENABLE_AUTOTUNER) + if (IsAutoTunerEnabled) + if (ICB->getDebugLoc()) + autotuning::Engine.updateCallSiteLocs( + CB, ICB, ICB->getCalledFunction(), + ICB->getDebugLoc()->getLine()); +#endif // Continually inlining through an SCC can result in huge compile // times and bloated code since we arbitrarily stop at some point // when the inliner decides it's not profitable to inline anymore. @@ -527,6 +558,11 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC, FAM.invalidate(F, PreservedAnalyses::none()); } +#if defined(ENABLE_AUTOTUNER) + if (IsAutoTunerEnabled) + autotuning::Engine.clearCallSiteLocs(); +#endif + // We must ensure that we only delete functions with comdats if every function // in the comdat is going to be deleted. if (!DeadFunctionsInComdats.empty()) { diff --git a/llvm/lib/Transforms/IPO/SampleProfile.cpp b/llvm/lib/Transforms/IPO/SampleProfile.cpp index a53baecd4776..9590cf625c64 100644 --- a/llvm/lib/Transforms/IPO/SampleProfile.cpp +++ b/llvm/lib/Transforms/IPO/SampleProfile.cpp @@ -1212,6 +1212,20 @@ bool SampleProfileLoader::inlineHotFunctions( } } } +#if defined(ENABLE_AUTOTUNER) + if (autotuning::Engine.isEnabled()) { + // If a callsite is hot/cold, mark its corresponding callee as + // hot/cold respectively so that auto-tuning engine will be able to + // selectively dump code regions as tuning opportunities. + if (const CallInst *CI = dyn_cast<CallInst>(&I)) + if (Function *Callee = CI->getCalledFunction()) { + if (callsiteIsHot(FS, PSI, ProfAccForSymsInList)) + Callee->getATEFunction().setHot(); + else + Callee->getATEFunction().setCold(); + } + } +#endif } if (Hot || ExternalInlineAdvisor) { CIS.insert(CIS.begin(), AllCandidates.begin(), AllCandidates.end()); diff --git a/llvm/lib/Transforms/Instrumentation/CMakeLists.txt b/llvm/lib/Transforms/Instrumentation/CMakeLists.txt index 424f1d433606..955353944b14 100644 --- a/llvm/lib/Transforms/Instrumentation/CMakeLists.txt +++ b/llvm/lib/Transforms/Instrumentation/CMakeLists.txt @@ -30,6 +30,7 @@ add_llvm_component_library(LLVMInstrumentation LINK_COMPONENTS Analysis + AutoTuner Core Demangle MC diff --git a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp index 3c8f25d73c62..b9459b59e704 100644 --- a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp +++ b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp @@ -2132,6 +2132,10 @@ static bool annotateAllFunctions( F->addFnAttr(Attribute::InlineHint); LLVM_DEBUG(dbgs() << "Set inline attribute to function: " << F->getName() << "\n"); +#if defined(ENABLE_AUTOTUNER) + if (autotuning::Engine.isEnabled()) + F->getATEFunction().setHot(); +#endif } for (auto &F : ColdFunctions) { // Only set when there is no Attribute::Hot set by the user. For Hot @@ -2148,6 +2152,10 @@ static bool annotateAllFunctions( F->addFnAttr(Attribute::Cold); LLVM_DEBUG(dbgs() << "Set cold attribute to function: " << F->getName() << "\n"); +#if defined(ENABLE_AUTOTUNER) + if (autotuning::Engine.isEnabled()) + F->getATEFunction().setCold(); +#endif } return true; } diff --git a/llvm/lib/Transforms/Scalar/AutoTuningCompile.cpp b/llvm/lib/Transforms/Scalar/AutoTuningCompile.cpp new file mode 100644 index 000000000000..c33cb7cfc256 --- /dev/null +++ b/llvm/lib/Transforms/Scalar/AutoTuningCompile.cpp @@ -0,0 +1,334 @@ +#if defined(ENABLE_AUTOTUNER) +//===--------------- AutoTuningCompile.cpp - Auto-Tuning ------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +// Copyright (C) 2017-2022, Huawei Technologies Co., Ltd. All rights reserved. +// +//===----------------------------------------------------------------------===// +// +/// \file +/// This pass implements incremental compilation for AutoTuner to reduce the +/// compilation time for tuning process. +/// This pass performs 2 operations. +/// 1. Writing module level IR files which can be used in subsequent +/// compilations for AutoTuner flow. So clang frontend don't have to process +/// the source code from scratch. +/// 2. Add/Remove attributes for modules and functions to enable/disable +/// execution of optimization pass(es). It further reduces the compilation +/// time by skipping optimization pass(es) (If feasible). +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Scalar/AutoTuningCompile.h" +#include "llvm/Analysis/AutotuningDump.h" +#include "llvm/AutoTuner/AutoTuning.h" +#include "llvm/InitializePasses.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Transforms/Scalar.h" +#include <string> + +// Enable debug messages for AutoTuning Compilation. +#define DEBUG_TYPE "autotuning-compile" + +using namespace llvm; + +extern cl::opt<AutoTuningCompileOpt> AutoTuningCompileMode; + +AutoTuningOptPassGate SkipPasses = AutoTuningOptPassGate(true); +AutoTuningOptPassGate RunPasses = AutoTuningOptPassGate(false); +bool AutoTuningCompileModule::SkipCompilation = false; + +static void writeFiles(Module &M, std::string Pass) { + if (autotuning::Engine.isGenerateOutput()) { + switch (AutoTuningCompileMode) { + case Basic: + case CoarseGrain: + if (Pass == autotuning::CompileOptionStart) { + LLVM_DEBUG(dbgs() << "AutoTuningCompile: IR files writing before Pass: " + << Pass << ".\n"); + auto ATD = new AutotuningDumpLegacy(/* Incremental Compilation */ true); + ATD->runOnModule(M); + } + break; + case FineGrain: + if (autotuning::Engine.hasOpportunities()) { + LLVM_DEBUG(dbgs() << "AutoTuningCompile: IR files writing before Pass: " + << Pass << ".\n"); + auto ATD = new AutotuningDumpLegacy(/* Incremental Compilation */ true); + ATD->runOnModule(M); + } + break; + default: + llvm_unreachable("AutoTuningCompile: Unknown AutoTuner Incremental " + "Compilation mode.\n"); + } + } +} + +bool AutoTuningOptPassGate::shouldRunPass(const StringRef PassName, + StringRef IRDescription) { + LLVM_DEBUG(dbgs() << "Skip pass '" << PassName + << "': " << (Skip ? "True" : "False") << '\n'); + return !Skip; +} + +bool AutoTuningOptPassGate::checkPass(const StringRef PassName, + const StringRef TargetDesc) { + if (PassName.startswith("AutoTuningCompile")) { + LLVM_DEBUG(dbgs() << "Running '" << PassName << "'pass.\n"); + return true; + } + + LLVM_DEBUG(dbgs() << "Skip pass '" << PassName + << "': " << (Skip ? "True" : "False") << '\n'); + return !Skip; +} + +AutoTuningCompileModule::AutoTuningCompileModule(std::string Pass) { + this->Pass = Pass; +} + +void AutoTuningCompileModule::writeIRFiles(Module &M) const { + writeFiles(M, Pass); +} + +bool AutoTuningCompileModule::modifyCompilationPipeline(Module &M) const { + bool Changed = false; + LLVM_DEBUG(dbgs() << "AutoTuningCompile: Deciding to enable/disable " + "optimization of module/functions. Pass: " + << Pass << '\n'); + + StringRef Filename = M.getName(); + size_t Pos = Filename.rfind(".ll"); + if (Pos == StringRef::npos) { + errs() << "AutoTuningCompile: Source file is not IR (.ll) file. " + "Disabling incremental compilation.\n"; + AutoTuningCompileMode = Inactive; + return Changed; + } + Filename = Filename.substr(0, Pos); + + switch (AutoTuningCompileMode) { + case Basic: + case CoarseGrain: + LLVM_DEBUG(dbgs() << "AutoTuningCompile: No change in opt pipeline for " + "Basic/CoarseGrain incremental compilation mode.\n"); + break; + case FineGrain: { + if (Pass == autotuning::CompileOptionStart) { + M.getContext().setOptPassGate(SkipPasses); + getAutoTuningOptPassGate().setSkip(true); + setSkipCompilation(true); + LLVM_DEBUG(dbgs() << "AutoTuningCompile: SkipPasses enabled.\n"); + } else if (getSkipCompilation() && + (autotuning::Engine.shouldRunOptPass(Filename.str(), Pass) || + Pass == "end")) { + M.getContext().setOptPassGate(RunPasses); + getAutoTuningOptPassGate().setSkip(false); + setSkipCompilation(false); + LLVM_DEBUG(dbgs() << "AutoTuningCompile: SkipPasses disabled.\n"); + } else + LLVM_DEBUG(dbgs() << "AutoTuningCompile: Old decision (SkipPasses = " + << (getSkipCompilation() ? "True" : "False") + << " ) continued.\n"); + + Changed = true; + break; + } + default: + llvm_unreachable( + "AutoTuningCompile: Unknown AutoTuner Incremental Compilation mode.\n"); + } + + return Changed; +} + +bool AutoTuningCompileModule::run(Module &M) { + bool Changed = false; + if (AutoTuningCompileMode == Inactive) + return Changed; + + if (!autotuning::Engine.isEnabled()) { + LLVM_DEBUG(dbgs() << "AutoTuningCompile: AutoTuner is not enabled.\n"); + return Changed; + } + + writeIRFiles(M); + + if (autotuning::Engine.isParseInput()) + Changed |= modifyCompilationPipeline(M); + + return Changed; +} + +AutoTuningCompileModuleLegacy::AutoTuningCompileModuleLegacy(std::string Pass) + : ModulePass(AutoTuningCompileModuleLegacy::ID) { + this->Pass = Pass; +} + +bool AutoTuningCompileModuleLegacy::runOnModule(Module &M) { + AutoTuningCompileModule Impl(Pass); + return Impl.run(M); +} + +char AutoTuningCompileModuleLegacy::ID = 0; + +StringRef AutoTuningCompileModuleLegacy::getPassName() const { + return "AutoTuner Incremental Compilation"; +} + +INITIALIZE_PASS(AutoTuningCompileModuleLegacy, "autotuning-compile-module", + "AutoTuner Incremental Compilation", false, false) + +// Public interface to the AutoTuningCompile pass +ModulePass *llvm::createAutoTuningCompileModuleLegacyPass(std::string Pass) { + return new AutoTuningCompileModuleLegacy(Pass); +} + +PreservedAnalyses AutoTuningCompileModulePass::run(Module &M, + ModuleAnalysisManager &) { + AutoTuningCompileModule Impl(Pass); + Impl.run(M); + return PreservedAnalyses::all(); +} + +AutoTuningCompileFunction::AutoTuningCompileFunction(std::string Pass) { + this->Pass = Pass; +} + +void AutoTuningCompileFunction::writeIRFiles(Module &M) { + if (IsModuleWritten) + return; + IsModuleWritten = true; + writeFiles(M, Pass); +} + +bool AutoTuningCompileFunction::modifyCompilationPipeline(Function &F) { + bool Changed = false; + LLVM_DEBUG(dbgs() << "AutoTuningCompile: Deciding to enable/disable " + "optimization of module/functions. Pass: " + << Pass << '\n'); + Module *M = F.getParent(); + StringRef Filename = M->getName(); + size_t Pos = Filename.rfind(".ll"); + if (Pos == StringRef::npos) { + errs() << "AutoTuningCompile: Source file is not IR (.ll) file. " + "Disabling incremental compilation.\n"; + AutoTuningCompileMode = Inactive; + return Changed; + } + Filename = Filename.substr(0, Pos); + + switch (AutoTuningCompileMode) { + case Basic: + case CoarseGrain: + LLVM_DEBUG(dbgs() << "AutoTuningCompile: No change in opt pipeline for " + "Basic/CoarseGrain incremental compilation mode.\n"); + break; + case FineGrain: { + if (!AutoTuningCompileModule::getSkipCompilation() && + Pass == autotuning::CompileOptionStart) { + if (!SkipDecision) { + M->getContext().setOptPassGate(SkipPasses); + getAutoTuningOptPassGate().setSkip(true); + SkipDecision = true; + } + AutoTuningCompileModule::setSkipCompilation(true); + LLVM_DEBUG(dbgs() << "AutoTuningCompile: SkipPasses enabled.\n"); + } else if (AutoTuningCompileModule::getSkipCompilation() && + Pass != autotuning::CompileOptionStart && + (autotuning::Engine.shouldRunOptPass(Filename.str(), Pass) || + Pass == autotuning::CompileOptionEnd)) { + M->getContext().setOptPassGate(RunPasses); + getAutoTuningOptPassGate().setSkip(false); + SkipDecision = false; + AutoTuningCompileModule::setSkipCompilation(false); + LLVM_DEBUG(dbgs() << "AutoTuningCompile: SkipPasses disabled.\n"); + } else + LLVM_DEBUG(dbgs() << "AutoTuningCompile: Old decision (SkipPasses = " + << (AutoTuningCompileModule::getSkipCompilation() + ? "True" + : "False") + << " ) continued.\n"); + + Changed = true; + break; + } + default: + llvm_unreachable( + "AutoTuningCompile: Unknown AutoTuner Incremental Compilation mode.\n"); + } + + return Changed; +} + +bool AutoTuningCompileFunction::run(Function &F) { + bool Changed = false; + if (AutoTuningCompileMode == Inactive) + return Changed; + + if (!autotuning::Engine.isEnabled()) { + LLVM_DEBUG(dbgs() << "AutoTuningCompile: AutoTuner is not enabled.\n"); + return Changed; + } + + writeIRFiles(*F.getParent()); + + if (autotuning::Engine.isParseInput()) + Changed |= modifyCompilationPipeline(F); + + return Changed; +} + +AutoTuningCompileFunctionLegacy::AutoTuningCompileFunctionLegacy( + std::string Pass) + : FunctionPass(AutoTuningCompileFunctionLegacy::ID) { + this->Pass = Pass; +} + +bool AutoTuningCompileFunctionLegacy::runOnFunction(Function &F) { + AutoTuningCompileFunction Impl(Pass); + return Impl.run(F); +} + +char AutoTuningCompileFunctionLegacy::ID = 0; + +StringRef AutoTuningCompileFunctionLegacy::getPassName() const { + return "AutoTuner Incremental Compilation"; +} + +INITIALIZE_PASS(AutoTuningCompileFunctionLegacy, "autotuning-compile-function", + "AutoTuner Incremental Compilation", false, false) + +// Public interface to the AutoTuningCompile pass +FunctionPass * +llvm::createAutoTuningCompileFunctionLegacyPass(std::string Pass) { + return new AutoTuningCompileFunctionLegacy(Pass); +} + +PreservedAnalyses +AutoTuningCompileFunctionPass::run(Function &F, FunctionAnalysisManager &AM) { + AutoTuningCompileFunction Impl(Pass); + Impl.run(F); + return PreservedAnalyses::all(); +} + +PreservedAnalyses +AutoTuningCompileLoopPass::run(Loop &L, LoopAnalysisManager &AM, + LoopStandardAnalysisResults &AR, LPMUpdater &U) { + AutoTuningCompileFunction Impl(Pass); + Function *F = L.getHeader()->getParent(); + Impl.run(*F); + return PreservedAnalyses::all(); +} + +AutoTuningOptPassGate &llvm::getAutoTuningOptPassGate() { + static AutoTuningOptPassGate AutoTuningGate; + return AutoTuningGate; +} + +#endif diff --git a/llvm/lib/Transforms/Scalar/CMakeLists.txt b/llvm/lib/Transforms/Scalar/CMakeLists.txt index eb008c15903a..e5a82ea8f923 100644 --- a/llvm/lib/Transforms/Scalar/CMakeLists.txt +++ b/llvm/lib/Transforms/Scalar/CMakeLists.txt @@ -2,6 +2,7 @@ add_llvm_component_library(LLVMScalarOpts ADCE.cpp AlignmentFromAssumptions.cpp AnnotationRemarks.cpp + AutoTuningCompile.cpp BDCE.cpp CallSiteSplitting.cpp ConstantHoisting.cpp @@ -92,6 +93,7 @@ add_llvm_component_library(LLVMScalarOpts LINK_COMPONENTS AggressiveInstCombine Analysis + AutoTuner Core InstCombine Support diff --git a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp index 335b489d3cb2..feb8932eaae7 100644 --- a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp +++ b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp @@ -66,6 +66,9 @@ #include <string> #include <tuple> #include <utility> +#if defined(ENABLE_AUTOTUNER) +#include "llvm/AutoTuner/AutoTuning.h" +#endif using namespace llvm; @@ -173,6 +176,10 @@ static cl::opt<unsigned> cl::desc("Default threshold (max size of unrolled " "loop), used in all but O3 optimizations")); +#if defined(ENABLE_AUTOTUNER) +static const std::string UnrollCountParamStr = "UnrollCount"; +#endif + /// A magic value for use with the Threshold parameter to indicate /// that the loop unroll should be performed regardless of how much /// code expansion would result. @@ -893,7 +900,12 @@ bool llvm::computeUnrollCount( OptimizationRemarkEmitter *ORE, unsigned TripCount, unsigned MaxTripCount, bool MaxOrZero, unsigned TripMultiple, unsigned LoopSize, TargetTransformInfo::UnrollingPreferences &UP, +#if defined(ENABLE_AUTOTUNER) + TargetTransformInfo::PeelingPreferences &PP, bool &UseUpperBound, + unsigned int Invocation) { +#else TargetTransformInfo::PeelingPreferences &PP, bool &UseUpperBound) { +#endif UnrollCostEstimator UCE(*L, LoopSize); @@ -942,6 +954,43 @@ bool llvm::computeUnrollCount( } } +#if defined(ENABLE_AUTOTUNER) + // Priority 2.5 is using Unroll Count set by AutoTuner (if enabled). + if (autotuning::Engine.isEnabled()) { + // Create a code region for current loop. This code region will be added to + // opportunity list once all the relevant information is gathered. + autotuning::Engine.initContainer(L, DEBUG_TYPE, + L->getHeader()->getParent()->getName(), + /* addOpportunity */ false, Invocation); + + int NewValue = 0; // the int value is set by lookUpParams() + bool UnrollCountChanged = L->lookUpParams<int>("UnrollCount", NewValue); + + if (UnrollCountChanged) { + // Setting the UP.Count with the value suggested by AutoTuner. + // AutoTuner will use UnrollCount = 0, 1, X, Y, Z in case of dynamic + // configuration and UnrollCount = 0, 1, 2, 4, 8 otherwise to find + // optimal configuration. Compiler will unroll the loop with suggested + // UnrollCount except when UnrollCount = 1 where AutoTuner is suggesting + // to try loop peeling. + UP.Count = NewValue; + UP.AllowExpensiveTripCount = true; + UP.Force = true; + UP.Runtime = true; + if (!UP.AllowRemainder && UP.Count != 1) + UP.Count = 0; + + // Check for Loop Peeling + if (UP.Count == 1) { + computePeelCount(L, LoopSize, PP, TripCount, DT, SE, AC, UP.Threshold); + UP.Runtime = (PP.PeelCount) ? false : UP.Runtime; + } + + return true; + } + } +#endif + // 3rd priority is exact full unrolling. This will eliminate all copies // of some exit test. UP.Count = 0; @@ -1119,6 +1168,59 @@ bool llvm::computeUnrollCount( return ExplicitUnroll; } +#if defined(ENABLE_AUTOTUNER) +// Given UnrollingPreferences count (UPCount) and TripCount for CodeRegion +// CR, compute the dynamic Unroll values for tuning and add it to CR. +static void +computeAutoTunerDynamicUnrollOptions(unsigned UPCount, unsigned TripCount, + const autotuning::CodeRegion &CR) { + std::vector<unsigned int> DynamicTuningOptions; + unsigned int PotentialTuningOptions[2]; + unsigned int Idx = 0; + int Count = -1; + unsigned int CurrentOption = 2; + unsigned int MaxTuningCount = 64; + DynamicTuningOptions.push_back(0); + // Add LoopPeeling as an additional option. + DynamicTuningOptions.push_back(1); + if (!UPCount) { + TripCount = (TripCount > MaxTuningCount) ? MaxTuningCount : TripCount; + unsigned int Limit = (TripCount == 0) ? 8 : TripCount; + DynamicTuningOptions.push_back(TripCount ? TripCount : 8); + while (CurrentOption < Limit) { + PotentialTuningOptions[Idx] = CurrentOption; + CurrentOption *= 2; + Idx = (Idx + 1) % 2; + ++Count; + } + } else { + while (CurrentOption < UPCount) { + PotentialTuningOptions[Idx] = CurrentOption; + CurrentOption *= 2; + Idx = (Idx + 1) % 2; + ++Count; + } + if (TripCount != UPCount) { + if (CurrentOption == UPCount) { + CurrentOption *= 2; + } + if (!TripCount || CurrentOption < TripCount) { + PotentialTuningOptions[Idx] = CurrentOption; + ++Count; + } + } + if (UPCount != 1) + DynamicTuningOptions.push_back(UPCount); + } + + Count = std::min(1, Count); + while (Count >= 0) + DynamicTuningOptions.push_back(PotentialTuningOptions[Count--]); + + CR.addAutoTunerOptions("UnrollCount", DynamicTuningOptions); +} +#endif + static LoopUnrollResult tryToUnrollLoop(Loop *L, DominatorTree &DT, LoopInfo *LI, ScalarEvolution &SE, const TargetTransformInfo &TTI, AssumptionCache &AC, @@ -1132,7 +1234,12 @@ tryToUnrollLoop(Loop *L, DominatorTree &DT, LoopInfo *LI, ScalarEvolution &SE, std::optional<bool> ProvidedUpperBound, std::optional<bool> ProvidedAllowPeeling, std::optional<bool> ProvidedAllowProfileBasedPeeling, +#if defined(ENABLE_AUTOTUNER) + std::optional<unsigned> ProvidedFullUnrollMaxCount, + unsigned int Invocation = 0) { +#else std::optional<unsigned> ProvidedFullUnrollMaxCount) { +#endif LLVM_DEBUG(dbgs() << "Loop Unroll: F[" << L->getHeader()->getParent()->getName() << "] Loop %" @@ -1276,11 +1383,28 @@ tryToUnrollLoop(Loop *L, DominatorTree &DT, LoopInfo *LI, ScalarEvolution &SE, // computeUnrollCount() decides whether it is beneficial to use upper bound to // fully unroll the loop. bool UseUpperBound = false; + +#if defined(ENABLE_AUTOTUNER) + bool IsCountSetExplicitly = computeUnrollCount( + L, TTI, DT, LI, &AC, SE, EphValues, &ORE, TripCount, MaxTripCount, + MaxOrZero, TripMultiple, LoopSize, UP, PP, UseUpperBound, Invocation); + const autotuning::CodeRegion CR = L->getCodeRegion(); + // computeAutoTunerDynamicUnrollOptions() adds the dynamic Unroll values to + // the CodeRegion. + computeAutoTunerDynamicUnrollOptions(UP.Count, TripCount, CR); + + if (!UP.Count) { + autotuning::Engine.addOpportunity( + CR, {{UnrollCountParamStr, std::to_string(UP.Count)}}); + return LoopUnrollResult::Unmodified; + } +#else bool IsCountSetExplicitly = computeUnrollCount( L, TTI, DT, LI, &AC, SE, EphValues, &ORE, TripCount, MaxTripCount, MaxOrZero, TripMultiple, LoopSize, UP, PP, UseUpperBound); if (!UP.Count) return LoopUnrollResult::Unmodified; +#endif if (PP.PeelCount) { assert(UP.Count == 1 && "Cannot perform peel and unroll in the same step"); @@ -1300,8 +1424,16 @@ tryToUnrollLoop(Loop *L, DominatorTree &DT, LoopInfo *LI, ScalarEvolution &SE, // we had, so we don't want to unroll or peel again. if (PP.PeelProfiledIterations) L->setLoopAlreadyUnrolled(); +#if defined(ENABLE_AUTOTUNER) + autotuning::Engine.addOpportunity( + CR, {{UnrollCountParamStr, std::to_string(UP.Count)}}); + return LoopUnrollResult::PartiallyUnrolled; + } + autotuning::Engine.addOpportunity(CR, {{UnrollCountParamStr, "0"}}); +#else return LoopUnrollResult::PartiallyUnrolled; } +#endif return LoopUnrollResult::Unmodified; } @@ -1329,8 +1461,18 @@ tryToUnrollLoop(Loop *L, DominatorTree &DT, LoopInfo *LI, ScalarEvolution &SE, {UP.Count, UP.Force, UP.Runtime, UP.AllowExpensiveTripCount, UP.UnrollRemainder, ForgetAllSCEV}, LI, &SE, &DT, &AC, &TTI, &ORE, PreserveLCSSA, &RemainderLoop); + +#if defined(ENABLE_AUTOTUNER) + if (UnrollResult == LoopUnrollResult::Unmodified) { + autotuning::Engine.addOpportunity(CR, {{UnrollCountParamStr, "0"}}); + return LoopUnrollResult::Unmodified; + } + autotuning::Engine.addOpportunity( + CR, {{UnrollCountParamStr, std::to_string(UP.Count)}}); +#else if (UnrollResult == LoopUnrollResult::Unmodified) return LoopUnrollResult::Unmodified; +#endif if (RemainderLoop) { std::optional<MDNode *> RemainderLoopID = @@ -1379,6 +1521,20 @@ public: /// Otherwise, forgetAllLoops and rebuild when needed next. bool ForgetAllSCEV; +#if defined(ENABLE_AUTOTUNER) +private: + // 'InvocationCounter' keeps track of Invocation of Loop Unroll Pass and + // assign it to 'Invocation'. So each LoopUnroll Object knows when it is + // being invoked during optimization pipeline. It is used to identify the + // Invocation of a pass if it is invoked multiple times. AutoTuner will use + // this information to generate the Code Regions and apply the suggested + // configuration during the correct invocation of the Loop Unroll Pass. + static unsigned int InvocationCounter; + unsigned int Invocation; + +public: +#endif + std::optional<unsigned> ProvidedCount; std::optional<unsigned> ProvidedThreshold; std::optional<bool> ProvidedAllowPartial; @@ -1405,6 +1561,9 @@ public: ProvidedAllowPeeling(AllowPeeling), ProvidedAllowProfileBasedPeeling(AllowProfileBasedPeeling), ProvidedFullUnrollMaxCount(ProvidedFullUnrollMaxCount) { +#if defined(ENABLE_AUTOTUNER) + Invocation = InvocationCounter++; +#endif initializeLoopUnrollPass(*PassRegistry::getPassRegistry()); } @@ -1431,7 +1590,12 @@ public: /*OnlyFullUnroll*/ false, OnlyWhenForced, ForgetAllSCEV, ProvidedCount, ProvidedThreshold, ProvidedAllowPartial, ProvidedRuntime, ProvidedUpperBound, ProvidedAllowPeeling, +#if defined(ENABLE_AUTOTUNER) + ProvidedAllowProfileBasedPeeling, ProvidedFullUnrollMaxCount, + Invocation); +#else ProvidedAllowProfileBasedPeeling, ProvidedFullUnrollMaxCount); +#endif if (Result == LoopUnrollResult::FullyUnrolled) LPM.markLoopAsDeleted(*L); @@ -1449,6 +1613,9 @@ public: getLoopAnalysisUsage(AU); } }; +#if defined(ENABLE_AUTOTUNER) +unsigned int LoopUnroll::InvocationCounter = 0; +#endif } // end anonymous namespace @@ -1496,6 +1663,11 @@ PreservedAnalyses LoopFullUnrollPass::run(Loop &L, LoopAnalysisManager &AM, std::string LoopName = std::string(L.getName()); +#if defined(ENABLE_AUTOTUNER) + // LoopFullUnrollPass will be invoked first during optimization pipeline. + unsigned int Invocation = 0; +#endif + bool Changed = tryToUnrollLoop(&L, AR.DT, &AR.LI, AR.SE, AR.TTI, AR.AC, ORE, /*BFI*/ nullptr, /*PSI*/ nullptr, @@ -1505,7 +1677,12 @@ PreservedAnalyses LoopFullUnrollPass::run(Loop &L, LoopAnalysisManager &AM, /*Runtime*/ false, /*UpperBound*/ false, /*AllowPeeling*/ true, /*AllowProfileBasedPeeling*/ false, +#if defined(ENABLE_AUTOTUNER) + /*FullUnrollMaxCount*/ std::nullopt, + /*Invocation*/ Invocation) != +#else /*FullUnrollMaxCount*/ std::nullopt) != +#endif LoopUnrollResult::Unmodified; if (!Changed) return PreservedAnalyses::all(); @@ -1588,6 +1765,11 @@ PreservedAnalyses LoopUnrollPass::run(Function &F, bool Changed = false; +#if defined(ENABLE_AUTOTUNER) + // LoopUnrollPass will be invoked second during optimization pipeline. + unsigned int Invocation = 1; +#endif + // The unroller requires loops to be in simplified form, and also needs LCSSA. // Since simplification may add new inner loops, it has to run before the // legality and profitability checks. This means running the loop unroller @@ -1630,7 +1812,12 @@ PreservedAnalyses LoopUnrollPass::run(Function &F, /*Count*/ std::nullopt, /*Threshold*/ std::nullopt, UnrollOpts.AllowPartial, UnrollOpts.AllowRuntime, UnrollOpts.AllowUpperBound, LocalAllowPeeling, +#if defined(ENABLE_AUTOTUNER) + UnrollOpts.AllowProfileBasedPeeling, UnrollOpts.FullUnrollMaxCount, + Invocation); +#else UnrollOpts.AllowProfileBasedPeeling, UnrollOpts.FullUnrollMaxCount); +#endif Changed |= Result != LoopUnrollResult::Unmodified; // The parent must not be damaged by unrolling! diff --git a/llvm/lib/Transforms/Scalar/Scalar.cpp b/llvm/lib/Transforms/Scalar/Scalar.cpp index 37b032e4d7c7..4b140e8d600b 100644 --- a/llvm/lib/Transforms/Scalar/Scalar.cpp +++ b/llvm/lib/Transforms/Scalar/Scalar.cpp @@ -64,4 +64,8 @@ void llvm::initializeScalarOpts(PassRegistry &Registry) { initializeStraightLineStrengthReduceLegacyPassPass(Registry); initializePlaceBackedgeSafepointsLegacyPassPass(Registry); initializeLoopSimplifyCFGLegacyPassPass(Registry); +#if defined(ENABLE_AUTOTUNER) + initializeAutoTuningCompileFunctionLegacyPass(Registry); + initializeAutoTuningCompileModuleLegacyPass(Registry); +#endif } diff --git a/llvm/lib/Transforms/Scalar/Sink.cpp b/llvm/lib/Transforms/Scalar/Sink.cpp index 8b99f73b850b..b3c60686e252 100644 --- a/llvm/lib/Transforms/Scalar/Sink.cpp +++ b/llvm/lib/Transforms/Scalar/Sink.cpp @@ -248,6 +248,11 @@ namespace { } bool runOnFunction(Function &F) override { +#if defined(ENABLE_AUTOTUNER) + if (skipFunction(F)) + return false; +#endif + auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree(); auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); auto &AA = getAnalysis<AAResultsWrapperPass>().getAAResults(); diff --git a/llvm/lib/Transforms/Utils/CMakeLists.txt b/llvm/lib/Transforms/Utils/CMakeLists.txt index a870071f3f64..8616e7b923c0 100644 --- a/llvm/lib/Transforms/Utils/CMakeLists.txt +++ b/llvm/lib/Transforms/Utils/CMakeLists.txt @@ -93,6 +93,7 @@ add_llvm_component_library(LLVMTransformUtils LINK_COMPONENTS Analysis + AutoTuner Core Support TargetParser diff --git a/llvm/lib/Transforms/Utils/LCSSA.cpp b/llvm/lib/Transforms/Utils/LCSSA.cpp index c36b0533580b..20a4edcb29db 100644 --- a/llvm/lib/Transforms/Utils/LCSSA.cpp +++ b/llvm/lib/Transforms/Utils/LCSSA.cpp @@ -491,6 +491,11 @@ char &llvm::LCSSAID = LCSSAWrapperPass::ID; /// Transform \p F into loop-closed SSA form. bool LCSSAWrapperPass::runOnFunction(Function &F) { +#if defined(ENABLE_AUTOTUNER) + if (skipFunction(F)) + return false; +#endif + LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); auto *SEWP = getAnalysisIfAvailable<ScalarEvolutionWrapperPass>(); diff --git a/llvm/lib/Transforms/Utils/LoopSimplify.cpp b/llvm/lib/Transforms/Utils/LoopSimplify.cpp index 3e604fdf2e11..2e42e7f1397f 100644 --- a/llvm/lib/Transforms/Utils/LoopSimplify.cpp +++ b/llvm/lib/Transforms/Utils/LoopSimplify.cpp @@ -69,6 +69,9 @@ #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/LoopUtils.h" +#if defined(ENABLE_AUTOTUNER) +#include "llvm/AutoTuner/AutoTuning.h" +#endif using namespace llvm; #define DEBUG_TYPE "loop-simplify" @@ -793,6 +796,11 @@ Pass *llvm::createLoopSimplifyPass() { return new LoopSimplify(); } /// it in any convenient order) inserting preheaders... /// bool LoopSimplify::runOnFunction(Function &F) { +#if defined(ENABLE_AUTOTUNER) + if (autotuning::Engine.isEnabled() && skipFunction(F)) + return false; +#endif + bool Changed = false; LoopInfo *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); DominatorTree *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); diff --git a/llvm/lib/Transforms/Utils/LoopUnroll.cpp b/llvm/lib/Transforms/Utils/LoopUnroll.cpp index 511dd61308f9..2d2c3e50514b 100644 --- a/llvm/lib/Transforms/Utils/LoopUnroll.cpp +++ b/llvm/lib/Transforms/Utils/LoopUnroll.cpp @@ -69,6 +69,9 @@ #include <numeric> #include <type_traits> #include <vector> +#if defined(ENABLE_AUTOTUNER) +#include "llvm/AutoTuner/AutoTuning.h" +#endif namespace llvm { class DataLayout; diff --git a/llvm/lib/Transforms/Vectorize/CMakeLists.txt b/llvm/lib/Transforms/Vectorize/CMakeLists.txt index 998dfd956575..f2c5c04abb13 100644 --- a/llvm/lib/Transforms/Vectorize/CMakeLists.txt +++ b/llvm/lib/Transforms/Vectorize/CMakeLists.txt @@ -21,6 +21,7 @@ add_llvm_component_library(LLVMVectorize LINK_COMPONENTS Analysis + AutoTuner Core Support TransformUtils diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp index f923f0be6621..f13ce6853666 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp @@ -113,6 +113,18 @@ LoopVectorizeHints::LoopVectorizeHints(const Loop *L, // Populate values with existing loop metadata. getHintsFromMetadata(); +#if defined(ENABLE_AUTOTUNER) + if (autotuning::Engine.isEnabled()) { + int NewValue = 0; + bool VectorizationInterleaveChanged = + L->lookUpParams<int>("VectorizationInterleave", NewValue); + + if (VectorizationInterleaveChanged) { + Interleave.Value = NewValue; + } + } +#endif + // force-vector-interleave overrides DisableInterleaving. if (VectorizerParams::isInterleaveForced()) Interleave.Value = VectorizerParams::VectorizationInterleave; diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index b603bbe55dc9..46fab860f5a3 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -10178,6 +10178,22 @@ LoopVectorizePass::LoopVectorizePass(LoopVectorizeOptions Opts) VectorizeOnlyWhenForced(Opts.VectorizeOnlyWhenForced || !EnableLoopVectorization) {} +#if defined(ENABLE_AUTOTUNER) +// Given the iterleave count (IC) and CR, compute the dynamic values for +// interleave count. Then add it to CR. +static void +computeAutoTunerDynamicInterleaveOptions(unsigned IC, + const autotuning::CodeRegion &CR) { + + std::vector<unsigned int> AutoTunerOptions{1, 2, 4}; + if (std::find(AutoTunerOptions.begin(), AutoTunerOptions.end(), IC) == + AutoTunerOptions.end()) + AutoTunerOptions[2] = IC; + + CR.addAutoTunerOptions("VectorizationInterleave", AutoTunerOptions); +} +#endif + bool LoopVectorizePass::processLoop(Loop *L) { assert((EnableVPlanNativePath || L->isInnermost()) && "VPlan-native path is not enabled. Only process inner loops."); @@ -10190,6 +10206,12 @@ bool LoopVectorizePass::processLoop(Loop *L) { << L->getHeader()->getParent()->getName() << "' from " << DebugLocStr << "\n"); +#if defined(ENABLE_AUTOTUNER) + // Initialize the loop for auto-tuning but do not add it + // as an tuning opportunity yet. + autotuning::Engine.initContainer( + L, LV_NAME, L->getHeader()->getParent()->getName(), false); +#endif LoopVectorizeHints Hints(L, InterleaveOnlyWhenForced, *ORE, TTI); LLVM_DEBUG( @@ -10422,6 +10444,18 @@ bool LoopVectorizePass::processLoop(Loop *L) { InterleaveLoop = false; } +#if defined(ENABLE_AUTOTUNER) + if (!VectorizerParams::isInterleaveForced()) { + // Compute the dynamic values for VectorizationInterleave and add it to the + // CodeRegion. + computeAutoTunerDynamicInterleaveOptions(IC, L->getCodeRegion()); + + // Add the current loop as a tuning opportunity explicitly. + autotuning::Engine.addOpportunity( + L->getCodeRegion(), {{"VectorizationInterleave", std::to_string(IC)}}); + } +#endif + // Override IC if user provided an interleave count. IC = UserIC > 0 ? UserIC : IC; diff --git a/llvm/test/AutoTuning/AutotuningDump/Inputs/unroll_template.yaml b/llvm/test/AutoTuning/AutotuningDump/Inputs/unroll_template.yaml new file mode 100644 index 000000000000..f483a269906a --- /dev/null +++ b/llvm/test/AutoTuning/AutotuningDump/Inputs/unroll_template.yaml @@ -0,0 +1,8 @@ +--- !AutoTuning +Pass: loop-unroll +Name: [name] +Function: foo +CodeRegionType: loop +Args: + - UnrollCount: [number] +... diff --git a/llvm/test/AutoTuning/AutotuningDump/create-data-dir.ll b/llvm/test/AutoTuning/AutotuningDump/create-data-dir.ll new file mode 100644 index 000000000000..ceb9b4fb2ca6 --- /dev/null +++ b/llvm/test/AutoTuning/AutotuningDump/create-data-dir.ll @@ -0,0 +1,65 @@ +; UNSUPPORTED: windows +; RUN: sed 's#\[number\]#0#g; s#\[name\]#for.body#g' \ +; RUN: %S/Inputs/unroll_template.yaml > %t.DEFAULT.yaml +; RUN: opt --disable-output %s -S -passes='require<autotuning-dump>' \ +; RUN: -auto-tuning-input=%t.DEFAULT.yaml -auto-tuning-config-id=1 +; RUN: cat %T/../autotune_datadir/create-data-dir.ll/1.ll | FileCheck %s +; RUN: rm -rf %T/../autotune_datadir/* + +; RUN: cp %t.DEFAULT.yaml %T/../autotune_datadir/config.yaml +; RUN: opt %s -S -passes='require<autotuning-dump>' -auto-tuning-config-id=1 +; RUN: cat %T/../autotune_datadir/create-data-dir.ll/1.ll | FileCheck %s +; RUN: rm -rf %T/../autotune_datadir/* + +; RUN: cp %t.DEFAULT.yaml %T/../autotune_datadir/config.yaml +; RUN: opt %s -S -passes='require<autotuning-dump>' -enable-autotuning-dump +; RUN: echo -n %T/../autotune_datadir/IR_files/ > %t.filename +; RUN: echo -n "create-data-dir.ll/" >> %t.filename +; RUN: echo -n %s | sed 's#/#_#g' >> %t.filename +; RUN: echo -n ".ll" >> %t.filename +; RUN: cat %t.filename | xargs cat | FileCheck %s +; RUN: rm -rf %T/../autotune_datadir + +; ModuleID = 'search.c' +source_filename = "search.c" +target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" +target triple = "aarch64-unknown-linux-gnu" + +; Function Attrs: argmemonly nofree norecurse nosync nounwind readonly uwtable +define dso_local i32 @search(ptr nocapture noundef readonly %Arr, i32 noundef %Value, i32 noundef %Size) { +entry: + %cmp5 = icmp sgt i32 %Size, 0 + br i1 %cmp5, label %for.body.preheader, label %for.end + +for.body.preheader: ; preds = %entry + %wide.trip.count = zext i32 %Size to i64 + br label %for.body + +for.body: ; preds = %for.body.preheader, %for.inc + %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.inc ] + %arrayidx = getelementptr inbounds i32, ptr %Arr, i64 %indvars.iv + %0 = load i32, ptr %arrayidx, align 4 + %cmp1 = icmp eq i32 %0, %Value + br i1 %cmp1, label %for.end.loopexit.split.loop.exit, label %for.inc + +for.inc: ; preds = %for.body + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond.not = icmp eq i64 %indvars.iv.next, %wide.trip.count + br i1 %exitcond.not, label %for.end, label %for.body + +for.end.loopexit.split.loop.exit: ; preds = %for.body + %1 = trunc i64 %indvars.iv to i32 + br label %for.end + +for.end: ; preds = %for.inc, %for.end.loopexit.split.loop.exit, %entry + %Idx.0.lcssa = phi i32 [ 0, %entry ], [ %1, %for.end.loopexit.split.loop.exit ], [ %Size, %for.inc ] + ret i32 %Idx.0.lcssa +} + +; Check that only loop body is inside the IR File. +; CHECK-LABEL: for.body: ; preds = +; CHECK-NEXT: %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.inc ] +; CHECK-NEXT: %arrayidx = getelementptr inbounds i32, ptr %Arr, i64 %indvars.iv +; CHECK-NEXT: %0 = load i32, ptr %arrayidx, align 4 +; CHECK-NEXT: %cmp1 = icmp eq i32 %0, %Value +; CHECK-NEXT: br i1 %cmp1, label %for.end.loopexit.split.loop.exit, label %for.inc diff --git a/llvm/test/AutoTuning/AutotuningDump/unroll.ll b/llvm/test/AutoTuning/AutotuningDump/unroll.ll new file mode 100644 index 000000000000..e8243da55fff --- /dev/null +++ b/llvm/test/AutoTuning/AutotuningDump/unroll.ll @@ -0,0 +1,35 @@ +; RUN: rm -rf %T.tmp/Output +; RUN: mkdir -p %T.tmp/Output +; RUN: rm %t.DEFAULT.yaml -rf +; RUN: sed 's#\[number\]#0#g; s#\[name\]#for.body#g' %S/Inputs/unroll_template.yaml > %t.DEFAULT.yaml +; RUN: env AUTOTUNE_DATADIR=%T.tmp/Output opt %s -S -passes='require<autotuning-dump>' \ +; RUN: -auto-tuning-input=%t.DEFAULT.yaml -auto-tuning-config-id=1 +; RUN: env AUTOTUNE_DATADIR=%T.tmp/Output opt %s -S -passes='require<autotuning-dump>' \ +; RUN: -auto-tuning-input=%t.DEFAULT.yaml -auto-tuning-config-id=2 +; RUN: cat %T.tmp/Output/unroll.ll/1.ll | FileCheck %s -check-prefix=DEFAULT +; RUN: cat %T.tmp/Output/unroll.ll/2.ll | FileCheck %s -check-prefix=DEFAULT +; UNSUPPORTED: windows + +define void @foo(i32* nocapture %a) { +entry: + br label %for.body +for.body: ; preds = %for.body, %entry + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv + %0 = load i32, i32* %arrayidx, align 4 + %inc = add nsw i32 %0, 1 + store i32 %inc, i32* %arrayidx, align 4 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp eq i64 %indvars.iv.next, 64 + br i1 %exitcond, label %for.end, label %for.body +for.end: ; preds = %for.body + ret void +} +; Check that only loop body is inside the IR File. +; DEFAULT-LABEL: for.body: ; preds = %for.body, %entry +; DEFAULT-NEXT: %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] +; DEFAULT-NEXT: %arrayidx = getelementptr inbounds i32, ptr %a, i64 %indvars.iv +; DEFAULT: %exitcond = icmp eq i64 %indvars.iv.next, 64 +; DEFAULT: br i1 %exitcond, label %for.end, label %for.body + +; RUN: rm -rf %T.tmp/Output diff --git a/llvm/test/AutoTuning/BaselineConfig/Inputs/autotune_datadir/baseline_config.yaml b/llvm/test/AutoTuning/BaselineConfig/Inputs/autotune_datadir/baseline_config.yaml new file mode 100644 index 000000000000..a5e669c17a71 --- /dev/null +++ b/llvm/test/AutoTuning/BaselineConfig/Inputs/autotune_datadir/baseline_config.yaml @@ -0,0 +1,9 @@ +!AutoTuning {Args: [{UnrollCount: 0}], CodeRegionHash: 12835463591102937421, + CodeRegionType: loop, Function: test, Invocation: 0, Name: for.body, + Pass: loop-unroll} +--- !AutoTuning {Args: [{VectorizationInterleave: 2}], + CodeRegionHash: 12835463591102937421, CodeRegionType: loop, Function: test, + Invocation: 0, Name: for.body, Pass: loop-vectorize} +--- !AutoTuning {Args: [{UnrollCount: 0}], CodeRegionHash: 8430337282115614432, + CodeRegionType: loop, Function: test, Invocation: 1, Name: vector.body, + Pass: loop-unroll} diff --git a/llvm/test/AutoTuning/BaselineConfig/Inputs/autotune_datadir/random_config.yaml b/llvm/test/AutoTuning/BaselineConfig/Inputs/autotune_datadir/random_config.yaml new file mode 100644 index 000000000000..738cf55ffe9a --- /dev/null +++ b/llvm/test/AutoTuning/BaselineConfig/Inputs/autotune_datadir/random_config.yaml @@ -0,0 +1,9 @@ +!AutoTuning {Args: [{UnrollCount: 2}], CodeRegionHash: 12835463591102937421, + CodeRegionType: loop, Function: test, Invocation: 0, Name: for.body, + Pass: loop-unroll} +--- !AutoTuning {Args: [{VectorizationInterleave: 2}], + CodeRegionHash: 12835463591102937421, CodeRegionType: loop, Function: test, + Invocation: 0, Name: for.body, Pass: loop-vectorize} +--- !AutoTuning {Args: [{UnrollCount: 0}], CodeRegionHash: 8430337282115614432, + CodeRegionType: loop, Function: test, Invocation: 1, Name: vector.body, + Pass: loop-unroll} diff --git a/llvm/test/AutoTuning/BaselineConfig/Inputs/test.ll b/llvm/test/AutoTuning/BaselineConfig/Inputs/test.ll new file mode 100644 index 000000000000..667a076b2d23 --- /dev/null +++ b/llvm/test/AutoTuning/BaselineConfig/Inputs/test.ll @@ -0,0 +1,117 @@ +; ModuleID = 'test.c' +source_filename = "test.c" +target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" +target triple = "aarch64-unknown-linux-gnu" + +@.str = private unnamed_addr constant [12 x i8] c"tmp <= 10.0\00", align 1 +@.str.1 = private unnamed_addr constant [7 x i8] c"test.c\00", align 1 +@__PRETTY_FUNCTION__.test = private unnamed_addr constant [12 x i8] c"void test()\00", align 1 + +; Function Attrs: nounwind uwtable +define dso_local void @test() #0 { +entry: + %cs = alloca i32, align 4 + %flush = alloca ptr, align 8 + %i = alloca i32, align 4 + %tmp = alloca double, align 8 + call void @llvm.lifetime.start.p0(i64 4, ptr %cs) #5 + store i32 16431360, ptr %cs, align 4, !tbaa !6 + call void @llvm.lifetime.start.p0(i64 8, ptr %flush) #5 + %0 = load i32, ptr %cs, align 4, !tbaa !6 + %conv = sext i32 %0 to i64 + %call = call noalias ptr @calloc(i64 noundef %conv, i64 noundef 8) #6 + store ptr %call, ptr %flush, align 8, !tbaa !10 + call void @llvm.lifetime.start.p0(i64 4, ptr %i) #5 + call void @llvm.lifetime.start.p0(i64 8, ptr %tmp) #5 + store double 0.000000e+00, ptr %tmp, align 8, !tbaa !12 + store i32 0, ptr %i, align 4, !tbaa !6 + br label %for.cond + +for.cond: ; preds = %for.inc, %entry + %1 = load i32, ptr %i, align 4, !tbaa !6 + %2 = load i32, ptr %cs, align 4, !tbaa !6 + %cmp = icmp slt i32 %1, %2 + br i1 %cmp, label %for.body, label %for.end + +for.body: ; preds = %for.cond + %3 = load ptr, ptr %flush, align 8, !tbaa !10 + %4 = load i32, ptr %i, align 4, !tbaa !6 + %idxprom = sext i32 %4 to i64 + %arrayidx = getelementptr inbounds double, ptr %3, i64 %idxprom + %5 = load double, ptr %arrayidx, align 8, !tbaa !12 + %6 = load double, ptr %tmp, align 8, !tbaa !12 + %add = fadd double %6, %5 + store double %add, ptr %tmp, align 8, !tbaa !12 + br label %for.inc + +for.inc: ; preds = %for.body + %7 = load i32, ptr %i, align 4, !tbaa !6 + %inc = add nsw i32 %7, 1 + store i32 %inc, ptr %i, align 4, !tbaa !6 + br label %for.cond, !llvm.loop !14 + +for.end: ; preds = %for.cond + %8 = load double, ptr %tmp, align 8, !tbaa !12 + %cmp2 = fcmp ole double %8, 1.000000e+01 + br i1 %cmp2, label %if.then, label %if.else + +if.then: ; preds = %for.end + br label %if.end + +if.else: ; preds = %for.end + call void @__assert_fail(ptr noundef @.str, ptr noundef @.str.1, i32 noundef 11, ptr noundef @__PRETTY_FUNCTION__.test) #7 + unreachable + +if.end: ; preds = %if.then + %9 = load ptr, ptr %flush, align 8, !tbaa !10 + call void @free(ptr noundef %9) #5 + call void @llvm.lifetime.end.p0(i64 8, ptr %tmp) #5 + call void @llvm.lifetime.end.p0(i64 4, ptr %i) #5 + call void @llvm.lifetime.end.p0(i64 8, ptr %flush) #5 + call void @llvm.lifetime.end.p0(i64 4, ptr %cs) #5 + ret void +} + +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) +declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture) #1 + +; Function Attrs: nounwind allocsize(0,1) +declare noalias ptr @calloc(i64 noundef, i64 noundef) #2 + +; Function Attrs: noreturn nounwind +declare void @__assert_fail(ptr noundef, ptr noundef, i32 noundef, ptr noundef) #3 + +; Function Attrs: nounwind +declare void @free(ptr noundef) #4 + +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) +declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture) #1 + +attributes #0 = { nounwind uwtable "frame-pointer"="non-leaf" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="generic" "target-features"="+fp-armv8,+neon,+outline-atomics,+v8a,-fmv" } +attributes #1 = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) } +attributes #2 = { nounwind allocsize(0,1) "frame-pointer"="non-leaf" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="generic" "target-features"="+fp-armv8,+neon,+outline-atomics,+v8a,-fmv" } +attributes #3 = { noreturn nounwind "frame-pointer"="non-leaf" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="generic" "target-features"="+fp-armv8,+neon,+outline-atomics,+v8a,-fmv" } +attributes #4 = { nounwind "frame-pointer"="non-leaf" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="generic" "target-features"="+fp-armv8,+neon,+outline-atomics,+v8a,-fmv" } +attributes #5 = { nounwind } +attributes #6 = { nounwind allocsize(0,1) } +attributes #7 = { noreturn nounwind } + +!llvm.module.flags = !{!0, !1, !2, !3, !4} +!llvm.ident = !{!5} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{i32 8, !"PIC Level", i32 2} +!2 = !{i32 7, !"PIE Level", i32 2} +!3 = !{i32 7, !"uwtable", i32 2} +!4 = !{i32 7, !"frame-pointer", i32 1} +!5 = !{!"Huawei BiSheng Compiler clang version 18.0.0 (ssh://git@codehub-dg-y.huawei.com:2222/CompilerKernel/BiShengKernel/BiSheng.git 026024071a7fb66b26b65fb81da702cc5f0cf405)"} +!6 = !{!7, !7, i64 0} +!7 = !{!"int", !8, i64 0} +!8 = !{!"omnipotent char", !9, i64 0} +!9 = !{!"Simple C/C++ TBAA"} +!10 = !{!11, !11, i64 0} +!11 = !{!"any pointer", !8, i64 0} +!12 = !{!13, !13, i64 0} +!13 = !{!"double", !8, i64 0} +!14 = distinct !{!14, !15} +!15 = !{!"llvm.loop.mustprogress"} diff --git a/llvm/test/AutoTuning/BaselineConfig/apply_baseline_config.ll b/llvm/test/AutoTuning/BaselineConfig/apply_baseline_config.ll new file mode 100644 index 000000000000..f905208a2f3b --- /dev/null +++ b/llvm/test/AutoTuning/BaselineConfig/apply_baseline_config.ll @@ -0,0 +1,11 @@ +; The purpose is to test the baseline IR is the same as the 1st iteration of +; autotuning process with --use-baseline-config enabled. +; RUN: rm %t.baseline %t.firstIt -f +; RUN: opt -O3 %S/Inputs/test.ll -o %t.baseline +; RUN: opt -O3 %S/Inputs/test.ll -o %t.firstIt_baseline \ +; RUN: -auto-tuning-input=%S/Inputs/autotune_datadir/baseline_config.yaml +; RUN: cmp %t.firstIt_baseline %t.baseline + +; RUN: opt -O3 %S/Inputs/test.ll -o %t.firstIt_random \ +; RUN: -auto-tuning-input=%S/Inputs/autotune_datadir/random_config.yaml +; RUN: not cmp %t.firstIt_random %t.baseline diff --git a/llvm/test/AutoTuning/BaselineConfig/opp.ll b/llvm/test/AutoTuning/BaselineConfig/opp.ll new file mode 100644 index 000000000000..b2897316fc22 --- /dev/null +++ b/llvm/test/AutoTuning/BaselineConfig/opp.ll @@ -0,0 +1,67 @@ +; REQUIRES: asserts +; RUN: rm %t.callsite_opp -rf +; RUN: opt %s -O3 -debug-only=inline -disable-output -S 2>&1 | \ +; RUN: FileCheck %s -check-prefix=DEFAULT +; RUN: opt %s -O3 -auto-tuning-opp=%t.callsite_opp -disable-output -S 2>&1 +; RUN: FileCheck %s --input-file %t.callsite_opp/opp.ll.yaml -check-prefix=AUTOTUNE + +@a = global i32 4 + +; Function Attrs: nounwind readnone uwtable +define i32 @simpleFunction(i32 %a) #0 { +entry: + call void @extern() + %a1 = load volatile i32, i32* @a + %x1 = add i32 %a1, %a1 + %a2 = load volatile i32, i32* @a + %x2 = add i32 %x1, %a2 + %a3 = load volatile i32, i32* @a + %x3 = add i32 %x2, %a3 + %a4 = load volatile i32, i32* @a + %x4 = add i32 %x3, %a4 + %a5 = load volatile i32, i32* @a + %x5 = add i32 %x4, %a5 + %a6 = load volatile i32, i32* @a + %x6 = add i32 %x5, %a6 + %a7 = load volatile i32, i32* @a + %x7 = add i32 %x6, %a6 + %a8 = load volatile i32, i32* @a + %x8 = add i32 %x7, %a8 + %a9 = load volatile i32, i32* @a + %x9 = add i32 %x8, %a9 + %a10 = load volatile i32, i32* @a + %x10 = add i32 %x9, %a10 + %a11 = load volatile i32, i32* @a + %x11 = add i32 %x10, %a11 + %a12 = load volatile i32, i32* @a + %x12 = add i32 %x11, %a12 + %add = add i32 %x12, %a + ret i32 %add +} + +; Function Attrs: nounwind readnone uwtable +define i32 @bar(i32 %a) #0 { +entry: + %0 = tail call i32 @simpleFunction(i32 6) + ret i32 %0 +} + +declare void @extern() + +attributes #0 = { nounwind readnone uwtable } +attributes #1 = { nounwind cold readnone uwtable } + + +; NOTE: Need to make sure the function inling have the same behaviour as O3 and +; 'BaselineConfig' +; DEFAULT: Inlining calls in: bar +; DEFAULT: Inlining (cost=115, threshold=375), Call: %0 = tail call i32 @simpleFunction(i32 6) + +; AUTOTUNE: Pass: inline +; AUTOTUNE-NEXT: Name: simpleFunction +; AUTOTUNE-NEXT: Function: bar +; AUTOTUNE-NEXT: CodeRegionType: callsite +; AUTOTUNE-NEXT: CodeRegionHash: {{[0-9]+}} +; AUTOTUNE-NEXT: DynamicConfigs: { ForceInline: [ 0, 1 ] } +; AUTOTUNE-NEXT: BaselineConfig: { ForceInline: '1' } +; AUTOTUNE-NEXT: Invocation: 0 diff --git a/llvm/test/AutoTuning/CodeRegionFilter/function-filtering.ll b/llvm/test/AutoTuning/CodeRegionFilter/function-filtering.ll new file mode 100644 index 000000000000..13acafae6fc4 --- /dev/null +++ b/llvm/test/AutoTuning/CodeRegionFilter/function-filtering.ll @@ -0,0 +1,62 @@ +; REQUIRES: asserts + +; RUN: rm -rf %t.filter +; RUN: opt %s -S -passes='function(require<opt-remark-emit>,loop-unroll),cgscc(inline)' \ +; RUN: -auto-tuning-opp=%t.filter -auto-tuning-type-filter=CallSite,Loop --disable-output +; RUN: FileCheck %s --input-file %t.filter/function-filtering.ll.yaml -check-prefix=DEFAULT + +; RUN: rm -rf %t.filter +; RUN: opt %s -S -passes='function(require<opt-remark-emit>,loop-unroll),cgscc(inline)' \ +; RUN: -auto-tuning-opp=%t.filter -auto-tuning-type-filter=CallSite,Loop \ +; RUN: -auto-tuning-function-filter=foo --disable-output +; RUN: FileCheck %s --input-file %t.filter/function-filtering.ll.yaml -check-prefix=FILTER_FOO + +; RUN: rm -rf %t.filter +; RUN: opt %s -S -passes='function(require<opt-remark-emit>,loop-unroll),cgscc(inline)' \ +; RUN: -auto-tuning-opp=%t.filter -auto-tuning-type-filter=CallSite,Loop \ +; RUN: -auto-tuning-function-filter=bar --disable-output +; RUN: FileCheck %s --input-file %t.filter/function-filtering.ll.yaml -check-prefix=FILTER_BAR + +; RUN: rm -rf %t.filter +; RUN: opt %s -S -passes='function(require<opt-remark-emit>,loop-unroll),cgscc(inline)' \ +; RUN: -auto-tuning-opp=%t.filter -auto-tuning-type-filter=CallSite,Loop \ +; RUN: -auto-tuning-function-filter=dummy -debug-only=autotuning | \ +; RUN: FileCheck %s -check-prefix=FILTER_DUMMY + +define void @foo(i32* nocapture %a) { +entry: + br label %for.body + +for.body: ; preds = %for.body, %entry + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv + %0 = load i32, i32* %arrayidx, align 4 + %inc = add nsw i32 %0, 1 + store i32 %inc, i32* %arrayidx, align 4 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp eq i64 %indvars.iv.next, 64 + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body + ret void +} + +define void @bar(i32* nocapture %a) { +entry: + call void @foo(i32* %a) + ret void +} + +; DEFAULT: --- !AutoTuning +; DEFAULT: --- !AutoTuning + +; FILTER_FOO: --- !AutoTuning +; FILTER_FOO: Function: foo +; FILTER_FOO-NOT: --- !AutoTuning + +; FILTER_BAR: --- !AutoTuning +; FILTER_BAR: Function: bar +; FILTER_BAR-NOT: --- !AutoTuning + +; FILTER_DUMMY-NOT: --- !AutoTuning +; FILTER_DUMMY-NOT: --- !AutoTuning diff --git a/llvm/test/AutoTuning/Error/Inputs/invalid-format.yaml b/llvm/test/AutoTuning/Error/Inputs/invalid-format.yaml new file mode 100644 index 000000000000..9c203e58f0ab --- /dev/null +++ b/llvm/test/AutoTuning/Error/Inputs/invalid-format.yaml @@ -0,0 +1,3 @@ +<inpus> + <input>this is a xml file</input> +</input> diff --git a/llvm/test/AutoTuning/Error/Inputs/template.yaml b/llvm/test/AutoTuning/Error/Inputs/template.yaml new file mode 100644 index 000000000000..1f02b52ffb38 --- /dev/null +++ b/llvm/test/AutoTuning/Error/Inputs/template.yaml @@ -0,0 +1,10 @@ +--- !AutoTuning +Pass: pass +Name: for.body +Function: foo +CodeRegionType: loop +CodeRegionHash: 0 +Args: + - UnrollCount: 2 + - PassOrder: [test, test2] +... diff --git a/llvm/test/AutoTuning/Error/file-not-found-error.ll b/llvm/test/AutoTuning/Error/file-not-found-error.ll new file mode 100644 index 000000000000..6a364239a271 --- /dev/null +++ b/llvm/test/AutoTuning/Error/file-not-found-error.ll @@ -0,0 +1,29 @@ +; RUN: rm %t.non-existing.yaml -rf +; RUN: not opt %s -S -passes='require<opt-remark-emit>,loop(loop-unroll-full)' \ +; RUN: -auto-tuning-input=%t.non-existing.yaml 2>&1 | \ +; RUN: FileCheck %s -check-prefix=ERROR + +; UNSUPPORTED: windows + +define void @foo(i32* nocapture %a) { +entry: + br label %for.body + +for.body: ; preds = %for.body, %entry + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv + %0 = load i32, i32* %arrayidx, align 4 + %inc = add nsw i32 %0, 1 + store i32 %inc, i32* %arrayidx, align 4 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp eq i64 %indvars.iv.next, 64 + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body + ret void +} + +; check if error massage is shown properly when input yaml is not found +; +; ERROR: Error parsing auto-tuning input. +; ERROR: No such file or directory diff --git a/llvm/test/AutoTuning/Error/invalid-yaml-error.ll b/llvm/test/AutoTuning/Error/invalid-yaml-error.ll new file mode 100644 index 000000000000..bfc8784c4ea4 --- /dev/null +++ b/llvm/test/AutoTuning/Error/invalid-yaml-error.ll @@ -0,0 +1,27 @@ +; RUN: not opt %s -S -passes='require<opt-remark-emit>,loop(loop-unroll-full)' \ +; RUN: -auto-tuning-input=%S/Inputs/invalid-format.yaml 2>&1 | \ +; RUN: FileCheck %s -check-prefix=ERROR + +; UNSUPPORTED: windows + +define void @foo(i32* nocapture %a) { +entry: + br label %for.body + +for.body: ; preds = %for.body, %entry + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv + %0 = load i32, i32* %arrayidx, align 4 + %inc = add nsw i32 %0, 1 + store i32 %inc, i32* %arrayidx, align 4 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp eq i64 %indvars.iv.next, 64 + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body + ret void +} + +; check if error massage is shown properly when input yaml is in invalid format +; +; ERROR: error: YAML:1:1: error: document root is not of mapping type. diff --git a/llvm/test/AutoTuning/Error/malformed-input-error.ll b/llvm/test/AutoTuning/Error/malformed-input-error.ll new file mode 100644 index 000000000000..0b73c3195503 --- /dev/null +++ b/llvm/test/AutoTuning/Error/malformed-input-error.ll @@ -0,0 +1,136 @@ +; Check if error messages are shown properly for malformed YAML files. + +; Missing Pass Field +; RUN: rm %t.missing-pass.yaml -rf +; RUN: sed 's#Pass: pass##g' %S/Inputs/template.yaml > %t.missing-pass.yaml +; RUN: not opt %s -S -passes='require<opt-remark-emit>,loop(loop-unroll-full)' \ +; RUN: -auto-tuning-input=%t.missing-pass.yaml 2>&1 | \ +; RUN: FileCheck %s -check-prefix=ERROR-FIELD + +; Missing Pass Value +; RUN: rm %t.missing-value-pass.yaml -rf +; RUN: sed 's#pass##g' %S/Inputs/template.yaml > %t.missing-value-pass.yaml +; RUN: not opt %s -S -passes='require<opt-remark-emit>,loop(loop-unroll-full)' \ +; RUN: -auto-tuning-input=%t.missing-value-pass.yaml 2>&1 | \ +; RUN: FileCheck %s -check-prefix=ERROR-PASS-VALUE + +; Missing Name Field +; RUN: rm %t.missing-name.yaml -rf +; RUN: sed 's#Name: for.body##g' %S/Inputs/template.yaml > %t.missing-name.yaml +; RUN: not opt %s -S -passes='require<opt-remark-emit>,loop(loop-unroll-full)' \ +; RUN: -auto-tuning-input=%t.missing-name.yaml 2>&1 | \ +; RUN: FileCheck %s -check-prefix=ERROR-NAME-FIELD + +; Missing Name Value +; RUN: rm %t.missing-value-name.yaml -rf +; RUN: sed 's#for.body##g' %S/Inputs/template.yaml > %t.missing-value-name.yaml +; RUN: not opt %s -S -passes='require<opt-remark-emit>,loop(loop-unroll-full)' \ +; RUN: -auto-tuning-input=%t.missing-value-name.yaml 2>&1 | \ +; RUN: FileCheck %s -check-prefix=ERROR-NAME-VALUE + +; Missing Function Field +; RUN: rm %t.missing-function.yaml -rf +; RUN: sed 's#Function: foo##g' %S/Inputs/template.yaml > %t.missing-function.yaml +; RUN: not opt %s -S -passes='require<opt-remark-emit>,loop(loop-unroll-full)' -auto-tuning-input=%t.missing-function.yaml 2>&1 | FileCheck %s -check-prefix=ERROR-FUNCTION-FIELD + +; Missing Function Value +; RUN: rm %t.missing-value-func.yaml -rf +; RUN: sed 's#foo##g' %S/Inputs/template.yaml > %t.missing-value-func.yaml +; RUN: not opt %s -S -passes='require<opt-remark-emit>,loop(loop-unroll-full)' \ +; RUN: -auto-tuning-input=%t.missing-value-func.yaml 2>&1 | \ +; RUN: FileCheck %s -check-prefix=ERROR-FUNC-VALUE + +; Missing CodeRegionType Field +; RUN: rm %t.missing-type.yaml -rf +; RUN: sed 's#CodeRegionType: loop##g' %S/Inputs/template.yaml > %t.missing-type.yaml +; RUN: not opt %s -S -passes='require<opt-remark-emit>,loop(loop-unroll-full)' \ +; RUN: -auto-tuning-input=%t.missing-type.yaml 2>&1 | \ +; RUN: FileCheck %s -check-prefix=ERROR-CODE-REGION-TYPE-FIELD + +; Missing CodeRegionType Value +; RUN: rm %t.missing-value-type.yaml -rf +; RUN: sed 's#loop##g' %S/Inputs/template.yaml > %t.missing-value-type.yaml +; RUN: not opt %s -S -passes='require<opt-remark-emit>,loop(loop-unroll-full)' \ +; RUN: -auto-tuning-input=%t.missing-value-type.yaml 2>&1 | \ +; RUN: FileCheck %s -check-prefix=ERROR-CODE-REGION-TYPE-VALUE + +; Invalid CodeRegionType Value +; RUN: rm %t.invalid-value-type.yaml -rf +; RUN: sed 's#loop#error-type#g' %S/Inputs/template.yaml > %t.invalid-value-type.yaml +; RUN: not opt %s -S -passes='require<opt-remark-emit>,loop(loop-unroll-full)' \ +; RUN: -auto-tuning-input=%t.invalid-value-type.yaml 2>&1 | \ +; RUN: FileCheck %s -check-prefix=ERROR-CODE-REGION-TYPE-INVALID + +; Missing Param Name +; RUN: rm %t.missing-param-name.yaml -rf +; RUN: sed 's#UnrollCount##g' %S/Inputs/template.yaml > %t.missing-param-name.yaml +; RUN: not opt %s -S -passes='require<opt-remark-emit>,loop(loop-unroll-full)' \ +; RUN: -auto-tuning-input=%t.missing-param-name.yaml 2>&1 | \ +; RUN: FileCheck %s -check-prefix=ERROR-PARAM-NAME + +; Missing Param Value +; RUN: rm %t.missing-value-param.yaml -rf +; RUN: sed 's#2##g' %S/Inputs/template.yaml > %t.missing-value-param.yaml +; RUN: not opt %s -S -passes='require<opt-remark-emit>,loop(loop-unroll-full)' \ +; RUN: -auto-tuning-input=%t.missing-value-param.yaml 2>&1 | \ +; RUN: FileCheck %s -check-prefix=ERROR-PARAM-VALUE + +; Empty Param List +; RUN: rm %t.empty-value-param-list.yaml -rf +; RUN: sed 's#\[test, test2\]#\[\]#g' %S/Inputs/template.yaml > %t.empty-value-param-list.yaml +; RUN: opt %s -S -passes='require<opt-remark-emit>,loop(loop-unroll-full)' \ +; RUN: -auto-tuning-input=%t.empty-value-param-list.yaml 2>&1 | \ +; RUN: FileCheck %s -check-prefix=VALID + +; UNSUPPORTED: windows + +define void @foo(i32* nocapture %a) { +entry: + br label %for.body + +for.body: ; preds = %for.body, %entry + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv + %0 = load i32, i32* %arrayidx, align 4 + %inc = add nsw i32 %0, 1 + store i32 %inc, i32* %arrayidx, align 4 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp eq i64 %indvars.iv.next, 64 + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body + ret void +} + +; check if error massage is shown properly for malformed YAML input files. +; + +; ERROR-FIELD: error: CodeRegionHash, CodeRegionType, or Pass missing. + +; ERROR-NAME-FIELD: error: Remark Name expected; enable -autotuning-omit-metadata. + +; ERROR-FUNCTION-FIELD: error: Remark Function Name expected; enable -autotuning-omit-metadata. + +; ERROR-PASS-VALUE: error: YAML:2:1: error: expected a value of scalar type. +; ERROR-PASS-VALUE: Pass: + +; ERROR-NAME-VALUE: error: YAML:3:1: error: expected a value of scalar type. +; ERROR-NAME-VALUE: Name: + +; ERROR-FUNC-VALUE: error: YAML:4:1: error: expected a value of scalar type. +; ERROR-FUNC-VALUE: Function: + +; ERROR-CODE-REGION-TYPE-FIELD: CodeRegionHash, CodeRegionType, or Pass missing. + +; ERROR-CODE-REGION-TYPE-VALUE: error: YAML:5:1: error: expected a value of scalar type. +; ERROR-CODE-REGION-TYPE-VALUE: CodeRegionType: + +; ERROR-CODE-REGION-TYPE-INVALID: Unsupported CodeRegionType:error-type + +; ERROR-PARAM-NAME: error: YAML:8:5: error: argument key is missing. +; ERROR-PARAM-NAME: - : 2 + +; ERROR-PARAM-VALUE: error: YAML:8:5: error: expected a value of scalar type. +; ERROR-PARAM-VALUE: - UnrollCount: + +; VALID-NOT: -auto-tuning-input=(input file) option failed. diff --git a/llvm/test/AutoTuning/Error/output-error.ll b/llvm/test/AutoTuning/Error/output-error.ll new file mode 100644 index 000000000000..61ffba50924b --- /dev/null +++ b/llvm/test/AutoTuning/Error/output-error.ll @@ -0,0 +1,28 @@ +; RUN: rm %t.opp -rf; touch %t.opp +; RUN: not opt %s -S -passes='require<opt-remark-emit>,loop(loop-unroll-full)' \ +; RUN: -auto-tuning-opp=%t.opp 2>&1 | FileCheck %s -check-prefix=ERROR-OPP + +; UNSUPPORTED: windows + +define void @foo(i32* nocapture %a) { +entry: + br label %for.body + +for.body: ; preds = %for.body, %entry + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv + %0 = load i32, i32* %arrayidx, align 4 + %inc = add nsw i32 %0, 1 + store i32 %inc, i32* %arrayidx, align 4 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp eq i64 %indvars.iv.next, 64 + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body + ret void +} + +; check if error massage is shown properly when output files cannot be created +; +; ERROR-OPP: Error generating auto-tuning opportunities. +; ERROR-OPP: error: Not a directory diff --git a/llvm/test/AutoTuning/Error/valid-input.ll b/llvm/test/AutoTuning/Error/valid-input.ll new file mode 100644 index 000000000000..dae90cdbe408 --- /dev/null +++ b/llvm/test/AutoTuning/Error/valid-input.ll @@ -0,0 +1,27 @@ +; RUN: opt %s -S -passes='require<opt-remark-emit>,loop(loop-unroll-full)' \ +; RUN: -auto-tuning-input=%S/Inputs/template.yaml 2>&1 | \ +; RUN: FileCheck %s -check-prefix=VALID +; UNSUPPORTED: windows + +define void @foo(i32* nocapture %a) { +entry: + br label %for.body + +for.body: ; preds = %for.body, %entry + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv + %0 = load i32, i32* %arrayidx, align 4 + %inc = add nsw i32 %0, 1 + store i32 %inc, i32* %arrayidx, align 4 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp eq i64 %indvars.iv.next, 64 + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body + ret void +} + +; check if error massage is shown properly when the input is valid +; + +; VALID-NOT: -auto-tuning-input=(input file) option failed. diff --git a/llvm/test/AutoTuning/IncrementalCompilation/Inputs/template.yaml b/llvm/test/AutoTuning/IncrementalCompilation/Inputs/template.yaml new file mode 100644 index 000000000000..a7d390be63e7 --- /dev/null +++ b/llvm/test/AutoTuning/IncrementalCompilation/Inputs/template.yaml @@ -0,0 +1,9 @@ +--- !AutoTuning +Pass: [dummy-pass] +CodeRegionType: [dummy-type] +Name: foo +DebugLoc: { File: [dummy-file], Line: 0, Column: 0 } +Function: foo +CodeRegionHash: 0 +Invocation: 0 +... diff --git a/llvm/test/AutoTuning/IncrementalCompilation/inc-compile-parse-input.ll b/llvm/test/AutoTuning/IncrementalCompilation/inc-compile-parse-input.ll new file mode 100644 index 000000000000..b9dc81089d40 --- /dev/null +++ b/llvm/test/AutoTuning/IncrementalCompilation/inc-compile-parse-input.ll @@ -0,0 +1,103 @@ +; REQUIRES: asserts +; RUN: rm %t.output -rf +; RUN: rm %t.inc_compile.yaml -rf +; RUN: sed 's#\[dummy-pass\]#inline#g' %S/Inputs/template.yaml > %t.temp.yaml +; RUN: sed 's#\[dummy-type\]#callsite#g' %t.temp.yaml > %t.temp2.yaml +; RUN: sed 's#\[dummy-file\]#%s#g' %t.temp2.yaml > %t.inc_compile.yaml +; RUN: opt -O3 %s -auto-tuning-input=%t.inc_compile.yaml \ +; RUN: -auto-tuning-compile-mode=CoarseGrain -print-after-all \ +; RUN: -debug-only=autotuning-compile \ +; RUN: -o %t.output 2>&1 | \ +; RUN: FileCheck %s -check-prefix=COARSEGRAIN + +; RUN: rm %t.output -rf +; RUN: rm %t.inc_compile.yaml -rf +; RUN: sed 's#\[dummy-pass\]#inline#g' %S/Inputs/template.yaml > %t.temp.yaml +; RUN: sed 's#\[dummy-type\]#callsite#g' %t.temp.yaml > %t.temp2.yaml +; RUN: sed 's#\[dummy-file\]#%s#g' %t.temp2.yaml > %t.inc_compile.yaml +; RUN: opt -O3 %s -auto-tuning-input=%t.inc_compile.yaml \ +; RUN: -auto-tuning-compile-mode=FineGrain -print-after-all \ +; RUN: -debug-only=autotuning-compile \ +; RUN: -o %t.output 2>&1 | \ +; RUN: FileCheck %s -check-prefixes=FINEGRAIN-1,FINEGRAIN-INLINE + +; RUN: rm %t.output -rf +; RUN: rm %t.inc_compile.yaml -rf +; RUN: sed 's#\[dummy-pass\]#loop-unroll#g' %S/Inputs/template.yaml > %t.temp.yaml +; RUN: sed 's#\[dummy-type\]#loop#g' %t.temp.yaml > %t.temp2.yaml +; RUN: sed 's#\[dummy-file\]#%s#g' %t.temp2.yaml > %t.inc_compile.yaml +; RUN: opt -O3 %s -auto-tuning-input=%t.inc_compile.yaml \ +; RUN: -auto-tuning-compile-mode=FineGrain -print-after-all \ +; RUN: -debug-only=autotuning-compile \ +; RUN: -o %t.output 2>&1 | \ +; RUN: FileCheck %s -check-prefixes=FINEGRAIN-1,FINEGRAIN-2,FINEGRAIN-UNROLL + +; ModuleID = 'test.c' +source_filename = "test.c" +target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" +target triple = "aarch64-unknown-linux-gnu" + +; Function Attrs: argmemonly nofree norecurse nosync nounwind uwtable +define dso_local i32 @test(i32* nocapture noundef %a, i32* nocapture noundef readonly %b, i32 noundef %size) local_unnamed_addr #0 { +entry: + %cmp11 = icmp sgt i32 %size, 0 + br i1 %cmp11, label %for.body.preheader, label %for.cond.cleanup + +for.body.preheader: ; preds = %entry + %wide.trip.count = zext i32 %size to i64 + br label %for.body + +for.cond.cleanup: ; preds = %for.body, %entry + ret i32 undef + +for.body: ; preds = %for.body.preheader, %for.body + %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ] + %arrayidx = getelementptr inbounds i32, i32* %b, i64 %indvars.iv + %0 = load i32, i32* %arrayidx, align 4 + %arrayidx2 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv + %1 = load i32, i32* %arrayidx2, align 4 + %add = add nsw i32 %1, %0 + store i32 %add, i32* %arrayidx2, align 4 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond.not = icmp eq i64 %indvars.iv.next, %wide.trip.count + br i1 %exitcond.not, label %for.cond.cleanup, label %for.body +} + +attributes #0 = { argmemonly nofree norecurse nosync nounwind uwtable "frame-pointer"="non-leaf" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="generic" "target-features"="+neon,+v8a" } + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!3, !4, !5, !6, !7, !8} +!llvm.ident = !{!9} + +!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "Huawei BiSheng Compiler clang version 12.0.0 (1c7b819ced36)", isOptimized: true, runtimeVersion: 0, emissionKind: LineTablesOnly, enums: !2, splitDebugInlining: false, nameTableKind: None) +!1 = !DIFile(filename: "test.c", directory: "/home/m00629332/code/autoTuner") +!2 = !{} +!3 = !{i32 2, !"Debug Info Version", i32 3} +!4 = !{i32 1, !"wchar_size", i32 4} +!5 = !{i32 1, !"branch-target-enforcement", i32 0} +!6 = !{i32 1, !"sign-return-address", i32 0} +!7 = !{i32 1, !"sign-return-address-all", i32 0} +!8 = !{i32 1, !"sign-return-address-with-bkey", i32 0} +!9 = !{!"Huawei BiSheng Compiler clang version 12.0.0 (1c7b819ced36)"} +!10 = distinct !DISubprogram(name: "dummy", scope: !1, file: !1, line: 1, type: !11, scopeLine: 1, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2) +!11 = !DISubroutineType(types: !2) +!12 = !DILocation(line: 2, column: 5, scope: !10) + +; COARSEGRAIN: AutoTuningCompile: Deciding to enable/disable optimization of module/functions. Pass: start +; COARSEGRAIN-NEXT: AutoTuningCompile: No change in opt pipeline for Basic/CoarseGrain incremental compilation mode. +; COARSEGRAIN-NOT: Skip pass {{.*}}: True + +; FINEGRAIN-1: AutoTuningCompile: Deciding to enable/disable optimization of module/functions. Pass: start +; FINEGRAIN-1-NEXT: AutoTuningCompile: SkipPasses enabled. +; FINEGRAIN-1-NOT: Skip pass {{.*}}: False +; FINEGRAIN-1: AutoTuningCompile: Deciding to enable/disable optimization of module/functions. Pass: inline +; FINEGRAIN-INLINE: AutoTuningCompile: SkipPasses disabled. +; FINEGRAIN-INLINE: Skip pass 'InlinerPass': False +; FINEGRAIN-INLINE-NEXT: *** IR Dump After InlinerPass +; FINEGRAIN-INLINE-NOT: Skip pass {{.*}}: True + +; FINEGRAIN-2: AutoTuningCompile: Old decision (SkipPasses = True ) continued. +; FINEGRAIN-2-NOT: Skip pass {{.*}}: False +; FINEGRAIN-2: AutoTuningCompile: Deciding to enable/disable optimization of module/functions. Pass: loop-unroll +; FINEGRAIN-UNROLL: AutoTuningCompile: SkipPasses disabled. +; FINEGRAIN-UNROLL-NOT: Skip pass {{.*}}: True diff --git a/llvm/test/AutoTuning/Inline/Inputs/template.yaml b/llvm/test/AutoTuning/Inline/Inputs/template.yaml new file mode 100644 index 000000000000..e04612183d1f --- /dev/null +++ b/llvm/test/AutoTuning/Inline/Inputs/template.yaml @@ -0,0 +1,9 @@ +--- !AutoTuning +Pass: inline +Name: simpleFunction-entry +Function: bar +CodeRegionType: callsite +CodeRegionHash: 5550568187071847048 +Args: + - ForceInline: [force-inline] +... diff --git a/llvm/test/AutoTuning/Inline/Inputs/template_no_metadata.yaml b/llvm/test/AutoTuning/Inline/Inputs/template_no_metadata.yaml new file mode 100644 index 000000000000..9fc88f56d6bc --- /dev/null +++ b/llvm/test/AutoTuning/Inline/Inputs/template_no_metadata.yaml @@ -0,0 +1,7 @@ +--- !AutoTuning +Pass: inline +CodeRegionType: callsite +CodeRegionHash: 5550568187071847048 +Args: + - ForceInline: [force-inline] +... diff --git a/llvm/test/AutoTuning/Inline/duplicate-calls.ll b/llvm/test/AutoTuning/Inline/duplicate-calls.ll new file mode 100644 index 000000000000..ad32262ad044 --- /dev/null +++ b/llvm/test/AutoTuning/Inline/duplicate-calls.ll @@ -0,0 +1,96 @@ +; RUN: rm %t.duplicate_calls -rf +; RUN: opt %s -S -passes='cgscc(inline)' -auto-tuning-opp=%t.duplicate_calls \ +; RUN: -auto-tuning-type-filter=CallSite --disable-output +; RUN: FileCheck %s --input-file %t.duplicate_calls/duplicate-calls.ll.yaml + +; ModuleID = 'duplicate-calls.c' +source_filename = "duplicate-calls.c" +target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" +target triple = "aarch64-unknown-linux-gnu" + +; Function Attrs: nounwind uwtable +define dso_local void @bar(i32* nocapture %result, i32* %cfb, i32 %bytes) local_unnamed_addr #0 !dbg !10 { +entry: + %call = tail call i32 @test(i32* %cfb, i32 %bytes) #1, !dbg !12 + store i32 %call, i32* %result, align 4, !dbg !13, !tbaa !14 + ret void, !dbg !18 +} + +declare dso_local i32 @test(i32*, i32) local_unnamed_addr #0 + +; Function Attrs: nounwind uwtable +define dso_local void @foo(i32* %cfb, i32* readnone %saved, i32* nocapture %result, i32 %bytes) local_unnamed_addr #0 !dbg !19 { +entry: + %tobool.not = icmp eq i32* %cfb, null, !dbg !20 + br i1 %tobool.not, label %if.else, label %if.then.split, !dbg !20 + +if.then.split: ; preds = %entry + tail call void @bar(i32* %result, i32* nonnull %cfb, i32 %bytes), !dbg !21 + br label %return, !dbg !22 + +if.else: ; preds = %entry + %tobool1.not = icmp eq i32* %saved, null, !dbg !23 + br i1 %tobool1.not, label %if.else.split, label %return, !dbg !23 + +if.else.split: ; preds = %if.else + tail call void @bar(i32* %result, i32* null, i32 %bytes), !dbg !21 + br label %return, !dbg !23 + +return: ; preds = %if.then.split, %if.else.split, %if.else + ret void, !dbg !24 +} + +attributes #0 = { nounwind uwtable "disable-tail-calls"="false" "frame-pointer"="non-leaf" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="generic" "target-features"="+neon" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { nounwind } + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!3, !4, !5, !6, !7, !8} +!llvm.ident = !{!9} + +!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "Huawei BiSheng Compiler clang version 12.0.0 (clang-0d5d71fe6c22 flang-8b17fc131076)", isOptimized: true, runtimeVersion: 0, emissionKind: LineTablesOnly, enums: !2, splitDebugInlining: false, nameTableKind: None) +!1 = !DIFile(filename: "duplicate-calls.c", directory: "/home/m00629332/benchmarks/cBench/source/security_pgp_d/src") +!2 = !{} +!3 = !{i32 2, !"Debug Info Version", i32 3} +!4 = !{i32 1, !"wchar_size", i32 4} +!5 = !{i32 1, !"branch-target-enforcement", i32 0} +!6 = !{i32 1, !"sign-return-address", i32 0} +!7 = !{i32 1, !"sign-return-address-all", i32 0} +!8 = !{i32 1, !"sign-return-address-with-bkey", i32 0} +!9 = !{!"Huawei BiSheng Compiler clang version 12.0.0 (clang-0d5d71fe6c22 flang-8b17fc131076)"} +!10 = distinct !DISubprogram(name: "bar", scope: !1, file: !1, line: 7, type: !11, scopeLine: 8, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2) +!11 = !DISubroutineType(types: !2) +!12 = !DILocation(line: 10, column: 16, scope: !10) +!13 = !DILocation(line: 10, column: 14, scope: !10) +!14 = !{!15, !15, i64 0} +!15 = !{!"int", !16, i64 0} +!16 = !{!"omnipotent char", !17, i64 0} +!17 = !{!"Simple C/C++ TBAA"} +!18 = !DILocation(line: 14, column: 1, scope: !10) +!19 = distinct !DISubprogram(name: "foo", scope: !1, file: !1, line: 17, type: !11, scopeLine: 18, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2) +!20 = !DILocation(line: 22, column: 6, scope: !19) +!21 = !DILocation(line: 27, column: 2, scope: !19) +!22 = !DILocation(line: 23, column: 3, scope: !19) +!23 = !DILocation(line: 24, column: 11, scope: !19) +!24 = !DILocation(line: 28, column: 1, scope: !19) + +; CHECK: --- !AutoTuning +; CHECK-NEXT: Pass: inline +; CHECK-NEXT: Name: bar-if.then.split +; CHECK-NEXT: DebugLoc: { File: duplicate-calls.c, Line: 27, Column: 2 } +; CHECK-NEXT: Function: foo +; CHECK-NEXT: CodeRegionType: callsite +; CHECK-NEXT: CodeRegionHash: +; CHECK-NEXT: DynamicConfigs: { ForceInline: [ 0, 1 ] } +; CHECK-NEXT: BaselineConfig: { ForceInline: '1' } +; CHECK-NEXT: Invocation: 0 +; CHECK-NEXT: ... +; CHECK-NEXT: --- !AutoTuning +; CHECK-NEXT: Pass: inline +; CHECK-NEXT: Name: bar-if.else.split +; CHECK-NEXT: DebugLoc: { File: duplicate-calls.c, Line: 27, Column: 2 } +; CHECK-NEXT: Function: foo +; CHECK-NEXT: CodeRegionType: callsite +; CHECK-NEXT: CodeRegionHash: +; CHECK-NEXT: DynamicConfigs: { ForceInline: [ 0, 1 ] } +; CHECK-NEXT: BaselineConfig: { ForceInline: '1' } +; CHECK-NEXT: Invocation: 0 diff --git a/llvm/test/AutoTuning/Inline/force-inline.ll b/llvm/test/AutoTuning/Inline/force-inline.ll new file mode 100644 index 000000000000..cedfc8df3483 --- /dev/null +++ b/llvm/test/AutoTuning/Inline/force-inline.ll @@ -0,0 +1,84 @@ +; REQUIRES: asserts +; RUN: opt < %s -passes=inline -debug-only=inline -disable-output -S 2>&1 | FileCheck %s -check-prefix=DEFAULT +; simpleFunction will be inlined with the default behavior. + +; RUN: rm %t.force-inline.yaml -rf +; RUN: sed 's#\[force-inline\]#true#g' %S/Inputs/template.yaml > %t.force-inline.yaml +; RUN: opt %s -passes=inline -debug-only=inline -disable-output -S \ +; RUN: -auto-tuning-input=%t.force-inline.yaml 2>&1 | \ +; RUN: FileCheck %s -check-prefix=FORCE-INLINE +; Test with ForceInline=true; + +; RUN: rm %t.force-inline.yaml -rf +; RUN: sed 's#\[force-inline\]#true#g' %S/Inputs/template_no_metadata.yaml > %t.force-inline.yaml +; RUN: opt %s -passes=inline -S -auto-tuning-input=%t.force-inline.yaml \ +; RUN: -debug-only=inline -disable-output -auto-tuning-omit-metadata 2>&1 | \ +; RUN: FileCheck %s -check-prefix=FORCE-INLINE +; Test with ForceInline=true; + +; RUN: rm %t.no-inline.yaml -rf +; RUN: sed 's#\[force-inline\]#false#g' %S/Inputs/template.yaml > %t.no-inline.yaml +; RUN: opt %s -passes=inline -debug-only=inline -disable-output -S \ +; RUN: -auto-tuning-input=%t.no-inline.yaml 2>&1 | \ +; RUN: FileCheck %s -check-prefix=NO-INLINE +; Test with ForceInline=false; + +; RUN: rm %t.no-inline.yaml -rf +; RUN: sed 's#\[force-inline\]#false#g' %S/Inputs/template_no_metadata.yaml > %t.no-inline.yaml +; RUN: opt %s -passes='cgscc(inline)' -debug-only=inline -disable-output -S \ +; RUN: -auto-tuning-input=%t.no-inline.yaml -auto-tuning-omit-metadata 2>&1 | \ +; RUN: FileCheck %s -check-prefix=NO-INLINE +; Test with ForceInline=false; + +@a = global i32 4 + +; Function Attrs: nounwind readnone uwtable +define i32 @simpleFunction(i32 %a) #0 { +entry: + call void @extern() + %a1 = load volatile i32, i32* @a + %x1 = add i32 %a1, %a1 + %a2 = load volatile i32, i32* @a + %x2 = add i32 %x1, %a2 + %a3 = load volatile i32, i32* @a + %x3 = add i32 %x2, %a3 + %a4 = load volatile i32, i32* @a + %x4 = add i32 %x3, %a4 + %a5 = load volatile i32, i32* @a + %x5 = add i32 %x4, %a5 + %a6 = load volatile i32, i32* @a + %x6 = add i32 %x5, %a6 + %a7 = load volatile i32, i32* @a + %x7 = add i32 %x6, %a6 + %a8 = load volatile i32, i32* @a + %x8 = add i32 %x7, %a8 + %a9 = load volatile i32, i32* @a + %x9 = add i32 %x8, %a9 + %a10 = load volatile i32, i32* @a + %x10 = add i32 %x9, %a10 + %a11 = load volatile i32, i32* @a + %x11 = add i32 %x10, %a11 + %a12 = load volatile i32, i32* @a + %x12 = add i32 %x11, %a12 + %add = add i32 %x12, %a + ret i32 %add +} + +; Function Attrs: nounwind readnone uwtable +define i32 @bar(i32 %a) #0 { +entry: + %0 = tail call i32 @simpleFunction(i32 6) + ret i32 %0 +} + +declare void @extern() + +attributes #0 = { nounwind readnone uwtable } +attributes #1 = { nounwind cold readnone uwtable } + +; DEFAULT: Inlining (cost=120, threshold=337) +; DEFAULT-SAME: simpleFunction +; FORCE-INLINE: Inlining (cost=always): Force inlined by auto-tuning +; FORCE-INLINE-SAME: simpleFunction +; NO-INLINE: NOT Inlining (cost=never): Force non-inlined by auto-tuning +; NO-INLINE-SAME: simpleFunction diff --git a/llvm/test/AutoTuning/Inline/inline-attribute.ll b/llvm/test/AutoTuning/Inline/inline-attribute.ll new file mode 100644 index 000000000000..50f583d0a51e --- /dev/null +++ b/llvm/test/AutoTuning/Inline/inline-attribute.ll @@ -0,0 +1,85 @@ +; RUN: rm %t.inline_opp -rf +; RUN: opt %s -S -passes='cgscc(inline)' -auto-tuning-opp=%t.inline_opp -auto-tuning-type-filter=CallSite --disable-output +; RUN: FileCheck %s --input-file %t.inline_opp/inline-attribute.ll.yaml -check-prefix=TEST-1 +; RUN: FileCheck %s --input-file %t.inline_opp/inline-attribute.ll.yaml -check-prefix=TEST-2 + +; ModuleID = 'inline.c' +source_filename = "inline.c" +target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" +target triple = "aarch64-unknown-linux-gnu" + +; Function Attrs: noinline norecurse nounwind readnone uwtable willreturn +define dso_local i32 @mul(i32 %a) local_unnamed_addr #0 !dbg !10 { +entry: + %mul = mul nsw i32 %a, %a, !dbg !12 + ret i32 %mul, !dbg !13 +} + +; Function Attrs: alwaysinline nounwind uwtable +define dso_local i32 @add(i32 %a) local_unnamed_addr #1 !dbg !14 { +entry: + %add = shl nsw i32 %a, 1, !dbg !15 + ret i32 %add, !dbg !16 +} + +; Function Attrs: nounwind uwtable +define dso_local i32 @inc(i32 %a) local_unnamed_addr #2 !dbg !17 { +entry: + %inc = add nsw i32 %a, 1, !dbg !18 + ret i32 %inc, !dbg !19 +} + +; Function Attrs: nounwind uwtable +define dso_local i32 @func(i32 %a) local_unnamed_addr #2 !dbg !20 { +entry: + %call = call i32 @add(i32 %a), !dbg !21 + %call1 = call i32 @mul(i32 %a), !dbg !22 + %add = add nsw i32 %call, %call1, !dbg !23 + %call2 = call i32 @inc(i32 %a), !dbg !24 + %add3 = add nsw i32 %add, %call2, !dbg !25 + ret i32 %add3, !dbg !26 +} + +attributes #0 = { noinline norecurse nounwind readnone uwtable willreturn "disable-tail-calls"="false" "frame-pointer"="non-leaf" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="generic" "target-features"="+neon" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { alwaysinline nounwind uwtable "disable-tail-calls"="false" "frame-pointer"="non-leaf" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="generic" "target-features"="+neon" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #2 = { nounwind uwtable "disable-tail-calls"="false" "frame-pointer"="non-leaf" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="generic" "target-features"="+neon" "unsafe-fp-math"="false" "use-soft-float"="false" } + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!3, !4, !5, !6, !7, !8} +!llvm.ident = !{!9} + +!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "Huawei Bisheng Compiler clang version 12.0.0 (729941c4adfa)", isOptimized: true, runtimeVersion: 0, emissionKind: LineTablesOnly, enums: !2, splitDebugInlining: false, nameTableKind: None) +!1 = !DIFile(filename: "test.c", directory: "/home/m00629332/code/autoTuner/ir-hashing") +!2 = !{} +!3 = !{i32 2, !"Debug Info Version", i32 3} +!4 = !{i32 1, !"wchar_size", i32 4} +!5 = !{i32 1, !"branch-target-enforcement", i32 0} +!6 = !{i32 1, !"sign-return-address", i32 0} +!7 = !{i32 1, !"sign-return-address-all", i32 0} +!8 = !{i32 1, !"sign-return-address-with-bkey", i32 0} +!9 = !{!"Huawei Bisheng Compiler clang version 12.0.0 (729941c4adfa)"} +!10 = distinct !DISubprogram(name: "mul", scope: !1, file: !1, line: 2, type: !11, scopeLine: 2, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2) +!11 = !DISubroutineType(types: !2) +!12 = !DILocation(line: 3, column: 13, scope: !10) +!13 = !DILocation(line: 3, column: 5, scope: !10) +!14 = distinct !DISubprogram(name: "add", scope: !1, file: !1, line: 7, type: !11, scopeLine: 7, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2) +!15 = !DILocation(line: 8, column: 13, scope: !14) +!16 = !DILocation(line: 8, column: 5, scope: !14) +!17 = distinct !DISubprogram(name: "inc", scope: !1, file: !1, line: 11, type: !11, scopeLine: 11, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2) +!18 = !DILocation(line: 12, column: 12, scope: !17) +!19 = !DILocation(line: 12, column: 5, scope: !17) +!20 = distinct !DISubprogram(name: "func", scope: !1, file: !1, line: 15, type: !11, scopeLine: 15, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2) +!21 = !DILocation(line: 16, column: 12, scope: !20) +!22 = !DILocation(line: 16, column: 19, scope: !20) +!23 = !DILocation(line: 16, column: 18, scope: !20) +!24 = !DILocation(line: 16, column: 26, scope: !20) +!25 = !DILocation(line: 16, column: 25, scope: !20) +!26 = !DILocation(line: 16, column: 5, scope: !20) + +; TEST-1: Pass: inline +; TEST-1-NOT: Pass: inline + +; TEST-2: Name: inc +; TEST-2-NEXT: DebugLoc: { File: test.c, Line: 16, Column: 26 } +; TEST-2-NEXT: Function: func +; TEST-2-NEXT: CodeRegionType: callsite diff --git a/llvm/test/AutoTuning/Inline/opp.ll b/llvm/test/AutoTuning/Inline/opp.ll new file mode 100644 index 000000000000..dfe1dac29476 --- /dev/null +++ b/llvm/test/AutoTuning/Inline/opp.ll @@ -0,0 +1,64 @@ +; RUN: rm %t.callsite_opp -rf +; RUN: sed 's#\[number\]#25#g; s#\[func_name\]#ColdFunction#g' %S/Inputs/template.yaml > %t.template25.yaml +; RUN: opt %s -passes=inline -S -auto-tuning-opp=%t.callsite_opp -auto-tuning-type-filter=CallSite + +; RUN: FileCheck %s --input-file %t.callsite_opp/opp.ll.yaml -check-prefix=CALLSITE + +@a = global i32 4 + +declare void @extern() +; Function Attrs: nounwind readnone uwtable +define i32 @simpleFunction(i32 %a) #1 { +entry: + call void @extern() + %a1 = load volatile i32, i32* @a + %x1 = add i32 %a1, %a1 + %a2 = load volatile i32, i32* @a + %x2 = add i32 %x1, %a2 + %a3 = load volatile i32, i32* @a + %x3 = add i32 %x2, %a3 + %a4 = load volatile i32, i32* @a + %x4 = add i32 %x3, %a4 + %a5 = load volatile i32, i32* @a + %x5 = add i32 %x4, %a5 + %a6 = load volatile i32, i32* @a + %x6 = add i32 %x5, %a6 + %a7 = load volatile i32, i32* @a + %x7 = add i32 %x6, %a6 + %a8 = load volatile i32, i32* @a + %x8 = add i32 %x7, %a8 + %a9 = load volatile i32, i32* @a + %x9 = add i32 %x8, %a9 + %a10 = load volatile i32, i32* @a + %x10 = add i32 %x9, %a10 + %a11 = load volatile i32, i32* @a + %x11 = add i32 %x10, %a11 + %a12 = load volatile i32, i32* @a + %x12 = add i32 %x11, %a12 + %add = add i32 %x12, %a + ret i32 %add +} + +define i32 @bar(i32 %a) #0 { +entry: + %0 = tail call i32 @simpleFunction(i32 6) + ret i32 %0 +} + +attributes #0 = { nounwind readnone uwtable } +attributes #1 = { nounwind cold readnone uwtable } + +; Check if code regions are properly generated as tuning opportunities. +; CALLSITE: --- !AutoTuning +; CALLSITE-NEXT: Pass: inline +; CALLSITE-NEXT: Name: simpleFunction +; CALLSITE-NEXT: Function: bar +; CALLSITE-NEXT: CodeRegionType: callsite +; CALLSITE-NEXT: CodeRegionHash: {{[0-9]+}} +; CALLSITE-NEXT: DynamicConfigs: { ForceInline: [ 0, 1 ] } +; CALLSITE-NEXT: BaselineConfig: { ForceInline: '1' } +; CALLSITE-NEXT: Invocation: 0 +; CALLSITE-NEXT: ... + +; Check if external functions are filtered out. +; EXTERNAL-NOT: Name: extern diff --git a/llvm/test/AutoTuning/LoopUnroll/Inputs/debug_loc_template.yaml b/llvm/test/AutoTuning/LoopUnroll/Inputs/debug_loc_template.yaml new file mode 100644 index 000000000000..6dc49a1f7dc2 --- /dev/null +++ b/llvm/test/AutoTuning/LoopUnroll/Inputs/debug_loc_template.yaml @@ -0,0 +1,10 @@ +--- !AutoTuning +Pass: loop-unroll +Name: for.cond +DebugLoc: { File: loop-opp.c, Line: 4, Column: 5 } +Function: foo +CodeRegionType: loop +Args: + - UnrollCount: [number] +Invocation: 0 +... diff --git a/llvm/test/AutoTuning/LoopUnroll/Inputs/loop_nest.yaml b/llvm/test/AutoTuning/LoopUnroll/Inputs/loop_nest.yaml new file mode 100644 index 000000000000..4920329dbd4b --- /dev/null +++ b/llvm/test/AutoTuning/LoopUnroll/Inputs/loop_nest.yaml @@ -0,0 +1,10 @@ +# CodeRegionHash is correct for only first code region only. +!AutoTuning {Args: [{UnrollCount: 2}], CodeRegionHash: 8456922293277663707, CodeRegionType: loop, + DebugLoc: {Column: 8, File: loop-nest.c, Line: 10}, Function: loop_nest, Invocation: 0, + Name: for.body6.us, Pass: loop-unroll} +--- !AutoTuning {Args: [{UnrollCount: 4}], CodeRegionHash: 8456922293277663707, CodeRegionType: loop, + DebugLoc: {Column: 5, File: loop-nest.c, Line: 9}, Function: loop_nest, Invocation: 0, + Name: for.cond4.preheader.us, Pass: loop-unroll} +--- !AutoTuning {Args: [{UnrollCount: 4}], CodeRegionHash: 8456922293277663707, CodeRegionType: loop, + DebugLoc: {Column: 3, File: loop-nest.c, Line: 8}, Function: loop_nest, Invocation: 0, + Name: for.cond1.preheader, Pass: loop-unroll} diff --git a/llvm/test/AutoTuning/LoopUnroll/Inputs/loop_peel.yaml b/llvm/test/AutoTuning/LoopUnroll/Inputs/loop_peel.yaml new file mode 100644 index 000000000000..a90cebbce88f --- /dev/null +++ b/llvm/test/AutoTuning/LoopUnroll/Inputs/loop_peel.yaml @@ -0,0 +1,9 @@ +--- !AutoTuning +Pass: loop-unroll +Name: loop +Function: invariant_backedge_1 +CodeRegionType: loop +Args: + - UnrollCount: [number] +Invocation: 0 +... diff --git a/llvm/test/AutoTuning/LoopUnroll/Inputs/unroll_raw_template.yaml b/llvm/test/AutoTuning/LoopUnroll/Inputs/unroll_raw_template.yaml new file mode 100644 index 000000000000..18681a0e2efe --- /dev/null +++ b/llvm/test/AutoTuning/LoopUnroll/Inputs/unroll_raw_template.yaml @@ -0,0 +1,10 @@ +--- !AutoTuning +Pass: loop-unroll +Name: label %5 +Function: main +CodeRegionType: loop +CodeRegionHash: [hash] +Args: +- UnrollCount: [number] +Invocation: 1 +... diff --git a/llvm/test/AutoTuning/LoopUnroll/Inputs/unroll_template.yaml b/llvm/test/AutoTuning/LoopUnroll/Inputs/unroll_template.yaml new file mode 100644 index 000000000000..166f877a232e --- /dev/null +++ b/llvm/test/AutoTuning/LoopUnroll/Inputs/unroll_template.yaml @@ -0,0 +1,10 @@ +--- !AutoTuning +Pass: loop-unroll +Name: [name] +Function: foo +CodeRegionType: loop +CodeRegionHash: [hash] +Args: + - UnrollCount: [number] +Invocation: 1 +... diff --git a/llvm/test/AutoTuning/LoopUnroll/Inputs/unroll_template_no_metadata.yaml b/llvm/test/AutoTuning/LoopUnroll/Inputs/unroll_template_no_metadata.yaml new file mode 100644 index 000000000000..b626473cf782 --- /dev/null +++ b/llvm/test/AutoTuning/LoopUnroll/Inputs/unroll_template_no_metadata.yaml @@ -0,0 +1,8 @@ +--- !AutoTuning +Pass: loop-unroll +CodeRegionType: loop +CodeRegionHash: [hash] +Args: + - UnrollCount: [number] +Invocation: 1 +... diff --git a/llvm/test/AutoTuning/LoopUnroll/debug_loc.ll b/llvm/test/AutoTuning/LoopUnroll/debug_loc.ll new file mode 100644 index 000000000000..85dd690d01c5 --- /dev/null +++ b/llvm/test/AutoTuning/LoopUnroll/debug_loc.ll @@ -0,0 +1,161 @@ +; RUN: opt %s -S -passes='require<opt-remark-emit>,loop(loop-unroll-full)' | \ +; RUN: FileCheck %s -check-prefix=DISABLE + +; RUN: rm %t.unroll_debug_loc0.yaml -rf +; RUN: sed 's#\[number\]#0#g' %S/Inputs/debug_loc_template.yaml > %t.unroll_debug_loc0.yaml +; RUN: opt %s -S -passes='require<opt-remark-emit>,loop(loop-unroll-full)' \ +; RUN: -auto-tuning-input=%t.unroll_debug_loc0.yaml | \ +; RUN: FileCheck %s -check-prefix=UNROLL0 + +; RUN: rm %t.unroll_debug_loc4.yaml -rf +; RUN: sed 's#\[number\]#4#g' %S/Inputs/debug_loc_template.yaml > %t.unroll_debug_loc4.yaml +; RUN: opt %s -S -passes='require<opt-remark-emit>,loop(loop-unroll-full)' \ +; RUN: -auto-tuning-code-region-matching-hash=false \ +; RUN: -auto-tuning-input=%t.unroll_debug_loc4.yaml | \ +; RUN: FileCheck %s -check-prefix=UNROLL4 + +; RUN: rm %t.unroll4.yaml -rf +; RUN: sed 's#\[number\]#4#g; s#\[name\]#for.cond#g; s#\[hash\]#11552168367013316892#g;'\ +; RUN: %S/Inputs/unroll_template.yaml > %t.unroll4.yaml +; RUN: opt %s -S -passes='require<opt-remark-emit>,loop(loop-unroll-full)' \ +; RUN: -auto-tuning-code-region-matching-hash=false \ +; RUN: -auto-tuning-input=%t.unroll4.yaml | \ +; RUN: FileCheck %s -check-prefix=UNROLL4-MISMATCH + +; UNSUPPORTED: windows + +; ModuleID = 'loop-opp.c' +source_filename = "loop-opp.c" +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +; Function Attrs: noinline nounwind uwtable +define i32 @foo(i32* %n) #0 !dbg !6 { +entry: + %n.addr = alloca i32*, align 8 + %b = alloca i32, align 4 + %i = alloca i32, align 4 + store i32* %n, i32** %n.addr, align 8 + call void @llvm.dbg.declare(metadata i32** %n.addr, metadata !11, metadata !12), !dbg !13 + call void @llvm.dbg.declare(metadata i32* %b, metadata !14, metadata !12), !dbg !15 + store i32 0, i32* %b, align 4, !dbg !15 + call void @llvm.dbg.declare(metadata i32* %i, metadata !16, metadata !12), !dbg !18 + store i32 0, i32* %i, align 4, !dbg !18 + br label %for.cond, !dbg !19 + +for.cond: ; preds = %for.inc, %entry + %0 = load i32, i32* %i, align 4, !dbg !20 + %1 = load i32*, i32** %n.addr, align 8, !dbg !23 + %2 = load i32, i32* %1, align 4, !dbg !24 + %cmp = icmp slt i32 %0, %2, !dbg !25 + br i1 %cmp, label %for.body, label %for.end, !dbg !26 + +for.body: ; preds = %for.cond + %3 = load i32, i32* %b, align 4, !dbg !28 + %add = add nsw i32 %3, 1, !dbg !30 + store i32 %add, i32* %b, align 4, !dbg !31 + br label %for.inc, !dbg !32 + +for.inc: ; preds = %for.body + %4 = load i32, i32* %i, align 4, !dbg !33 + %inc = add nsw i32 %4, 1, !dbg !33 + store i32 %inc, i32* %i, align 4, !dbg !33 + br label %for.cond, !dbg !35, !llvm.loop !36 + +for.end: ; preds = %for.cond + %5 = load i32, i32* %b, align 4, !dbg !39 + ret i32 %5, !dbg !40 +} + +; Function Attrs: nounwind readnone +declare void @llvm.dbg.declare(metadata, metadata, metadata) #1 + +attributes #0 = { noinline nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { nounwind readnone } + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!3, !4} +!llvm.ident = !{!5} + +!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "" ,isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !2) +!1 = !DIFile(filename: "loop-opp.c", directory: "") +!2 = !{} +!3 = !{i32 2, !"Dwarf Version", i32 4} +!4 = !{i32 2, !"Debug Info Version", i32 3} +!5 = !{!""} +!6 = distinct !DISubprogram(name: "foo", scope: !1, file: !1, line: 1, type: !7, isLocal: false, isDefinition: true, scopeLine: 2, flags: DIFlagPrototyped, isOptimized: false, unit: !0) +!7 = !DISubroutineType(types: !8) +!8 = !{!9, !10} +!9 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) +!10 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !9, size: 64) +!11 = !DILocalVariable(name: "n", arg: 1, scope: !6, file: !1, line: 1, type: !10) +!12 = !DIExpression() +!13 = !DILocation(line: 1, column: 20, scope: !6) +!14 = !DILocalVariable(name: "b", scope: !6, file: !1, line: 3, type: !9) +!15 = !DILocation(line: 3, column: 9, scope: !6) +!16 = !DILocalVariable(name: "i", scope: !17, file: !1, line: 4, type: !9) +!17 = distinct !DILexicalBlock(scope: !6, file: !1, line: 4, column: 5) +!18 = !DILocation(line: 4, column: 14, scope: !17) +!19 = !DILocation(line: 4, column: 10, scope: !17) +!20 = !DILocation(line: 4, column: 20, scope: !21) +!21 = !DILexicalBlockFile(scope: !22, file: !1, discriminator: 1) +!22 = distinct !DILexicalBlock(scope: !17, file: !1, line: 4, column: 5) +!23 = !DILocation(line: 4, column: 25, scope: !21) +!24 = !DILocation(line: 4, column: 24, scope: !21) +!25 = !DILocation(line: 4, column: 22, scope: !21) +!26 = !DILocation(line: 4, column: 5, scope: !27) +!27 = !DILexicalBlockFile(scope: !17, file: !1, discriminator: 1) +!28 = !DILocation(line: 6, column: 11, scope: !29) +!29 = distinct !DILexicalBlock(scope: !22, file: !1, line: 5, column: 5) +!30 = !DILocation(line: 6, column: 12, scope: !29) +!31 = !DILocation(line: 6, column: 9, scope: !29) +!32 = !DILocation(line: 7, column: 5, scope: !29) +!33 = !DILocation(line: 4, column: 28, scope: !34) +!34 = !DILexicalBlockFile(scope: !22, file: !1, discriminator: 2) +!35 = !DILocation(line: 4, column: 5, scope: !34) +!36 = distinct !{!36, !37, !38} +!37 = !DILocation(line: 4, column: 5, scope: !17) +!38 = !DILocation(line: 7, column: 5, scope: !17) +!39 = !DILocation(line: 8, column: 12, scope: !6) +!40 = !DILocation(line: 8, column: 5, scope: !6) + +; Auto-tuning-enabled loop unrolling - check that the loop is not unrolled when the auto-tuning feature is disabled when +; the input remark contains DebugLoc info. +; +; DISABLE-LABEL: @foo( +; DISABLE: for.cond +; DISABLE: for.body +; DISABLE-NOT: for.body.1 +; DISABLE: for.inc +; DISABLE-NOT: llvm.loop.unroll.disable + +; Auto-tuning-enabled loop unrolling - check that the loop is not unrolled +; when unroll count explicitly set to be 0. +; +; UNROLL0-LABEL: @foo( +; UNROLL0: for.cond +; UNROLL0: for.body +; UNROLL0-NOT: for.body.1 +; UNROLL0: for.inc +; UNROLL0-NOT: llvm.loop.unroll.disable + +; Auto-tuning-enabled loop unrolling - check that we can unroll the loop by 4 +; when explicitly requested. +; +; UNROLL4-LABEL: @foo( +; UNROLL4: for.cond +; UNROLL4: for.body +; UNROLL4: for.body.1 +; UNROLL4: for.body.2 +; UNROLL4: for.body.3 +; UNROLL4: llvm.loop.unroll.disable + +; Auto-tuning-enabled loop unrolling - check that the loop is not unrolled +; when DebugLoc is missing in the input remark. +; +; UNROLL4-MISMATCH-LABEL: @foo( +; UNROLL4-MISMATCH: for.cond +; UNROLL4-MISMATCH: for.body +; UNROLL4-MISMATCH-NOT: for.body.1 +; UNROLL4-MISMATCH: for.inc +; UNROLL4-MISMATCH-NOT: llvm.loop.unroll.disable diff --git a/llvm/test/AutoTuning/LoopUnroll/dynamic_config.ll b/llvm/test/AutoTuning/LoopUnroll/dynamic_config.ll new file mode 100644 index 000000000000..414c6ff2d1b0 --- /dev/null +++ b/llvm/test/AutoTuning/LoopUnroll/dynamic_config.ll @@ -0,0 +1,56 @@ +; RUN: rm %t.default_opp -rf +; RUN: opt %s -S -auto-tuning-opp=%t.default_opp -auto-tuning-type-filter=Loop \ +; RUN: -passes='require<opt-remark-emit>,loop(loop-unroll-full)' --disable-output +; RUN: FileCheck %s --input-file %t.default_opp/dynamic_config.ll.yaml + +; Function Attrs: nofree norecurse nounwind uwtable +define dso_local void @transform(i64* nocapture %W) local_unnamed_addr{ +entry: + br label %for.body + +for.body: ; preds = %entry, %for.body + %i.037 = phi i32 [ 16, %entry ], [ %inc, %for.body ] + %sub = add nsw i32 %i.037, -3 + %idxprom = sext i32 %sub to i64 + %arrayidx = getelementptr inbounds i64, i64* %W, i64 %idxprom + %0 = load i64, i64* %arrayidx, align 8 + %sub1 = add nsw i32 %i.037, -6 + %idxprom2 = sext i32 %sub1 to i64 + %arrayidx3 = getelementptr inbounds i64, i64* %W, i64 %idxprom2 + %1 = load i64, i64* %arrayidx3, align 8 + %xor = xor i64 %1, %0 + %idxprom4 = zext i32 %i.037 to i64 + %arrayidx5 = getelementptr inbounds i64, i64* %W, i64 %idxprom4 + store i64 %xor, i64* %arrayidx5, align 8 + %inc = add nuw nsw i32 %i.037, 1 + %cmp = icmp ult i32 %i.037, 79 + br i1 %cmp, label %for.body, label %for.body8.preheader + +for.body8.preheader: ; preds = %for.body + br label %for.body8 + +for.body8: ; preds = %for.body8.preheader, %for.body8 + %indvars.iv = phi i64 [ 80, %for.body8.preheader ], [ %indvars.iv.next, %for.body8 ] + %2 = add nsw i64 %indvars.iv, -4 + %arrayidx11 = getelementptr inbounds i64, i64* %W, i64 %2 + %3 = load i64, i64* %arrayidx11, align 8 + %4 = add nsw i64 %indvars.iv, -5 + %arrayidx14 = getelementptr inbounds i64, i64* %W, i64 %4 + %5 = load i64, i64* %arrayidx14, align 8 + %xor15 = xor i64 %5, %3 + %arrayidx17 = getelementptr inbounds i64, i64* %W, i64 %indvars.iv + store i64 %xor15, i64* %arrayidx17, align 8 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp ne i64 %indvars.iv.next, 256 + br i1 %exitcond, label %for.body8, label %for.end20 + +for.end20: ; preds = %for.body8 + ret void +} + +; CHECK: --- !AutoTuning +; CHECK: DynamicConfigs: { UnrollCount: [ 0, 1, 64, 16, 32 ] +; CHECK: ... +; CHECK-NEXT: --- !AutoTuning +; CHECK: DynamicConfigs: { UnrollCount: [ 0, 1, 64, 16, 32 ] +; CHECK: ... diff --git a/llvm/test/AutoTuning/LoopUnroll/loop_nest.ll b/llvm/test/AutoTuning/LoopUnroll/loop_nest.ll new file mode 100644 index 000000000000..7f3e27ca057a --- /dev/null +++ b/llvm/test/AutoTuning/LoopUnroll/loop_nest.ll @@ -0,0 +1,136 @@ +; REQUIRES: asserts +; CodeRegionHash matches for the first code region only. AutoTuner will find +; match for one code region when hash matching is enabled. AutoTuner will find +; match for all three code regions when hash matching is disabl3ed. + +; RUN: rm -rf %t.loop_nest.txt +; RUN: opt %s -passes='require<opt-remark-emit>,loop(loop-unroll-full)' \ +; RUN: -debug-only=autotuning -auto-tuning-input=%S/Inputs/loop_nest.yaml \ +; RUN: --disable-output &> %t.loop_nest.txt +; RUN: grep 'UnrollCount is set' %t.loop_nest.txt | wc -l | \ +; RUN: FileCheck %s -check-prefix=HASH_MATCHING_ENABLED + +; RUN: rm -rf %t.loop_nest.txt +; RUN: opt %s -passes='require<opt-remark-emit>,loop(loop-unroll-full)' \ +; RUN: -auto-tuning-input=%S/Inputs/loop_nest.yaml -debug-only=autotuning \ +; RUN: -auto-tuning-code-region-matching-hash=false --disable-output &> %t.loop_nest.txt +; RUN: grep 'UnrollCount is set' %t.loop_nest.txt | wc -l | \ +; RUN: FileCheck %s -check-prefix=HASH_MATCHING_DISABLED + +; ModuleID = 'loop-nest.c' +source_filename = "loop-nest.c" +target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" +target triple = "aarch64-unknown-linux-gnu" + +; Function Attrs: nofree norecurse nounwind uwtable +define dso_local void @loop_nest(i32 %ni, i32 %nj, i32 %nk, i32 %alpha, i32 %beta, i32** nocapture readonly %A, i32** nocapture readonly %B, i32** nocapture readonly %C) local_unnamed_addr #0 !dbg !10 { +entry: + %cmp41 = icmp sgt i32 %ni, 0, !dbg !12 + br i1 %cmp41, label %for.cond1.preheader.lr.ph, label %for.end23, !dbg !13 + +for.cond1.preheader.lr.ph: ; preds = %entry + %cmp238 = icmp slt i32 %nk, 1 + %cmp536 = icmp slt i32 %nj, 1 + %wide.trip.count51 = zext i32 %ni to i64, !dbg !12 + %wide.trip.count47 = zext i32 %nk to i64 + %wide.trip.count = zext i32 %nj to i64 + %brmerge = or i1 %cmp238, %cmp536 + br label %for.cond1.preheader, !dbg !13 + +for.cond1.preheader: ; preds = %for.cond1.preheader.lr.ph, %for.inc21 + %indvars.iv49 = phi i64 [ 0, %for.cond1.preheader.lr.ph ], [ %indvars.iv.next50, %for.inc21 ] + br i1 %brmerge, label %for.inc21, label %for.cond4.preheader.us.preheader, !dbg !14 + +for.cond4.preheader.us.preheader: ; preds = %for.cond1.preheader + %arrayidx15 = getelementptr inbounds i32*, i32** %C, i64 %indvars.iv49 + %arrayidx = getelementptr inbounds i32*, i32** %A, i64 %indvars.iv49 + %.pre = load i32*, i32** %arrayidx, align 8, !tbaa !15 + %.pre53 = load i32*, i32** %arrayidx15, align 8, !tbaa !15 + br label %for.cond4.preheader.us, !dbg !14 + +for.cond4.preheader.us: ; preds = %for.cond4.preheader.us.preheader, %for.cond4.for.inc18_crit_edge.us + %indvars.iv45 = phi i64 [ 0, %for.cond4.preheader.us.preheader ], [ %indvars.iv.next46, %for.cond4.for.inc18_crit_edge.us ] + %arrayidx8.us = getelementptr inbounds i32, i32* %.pre, i64 %indvars.iv45 + %arrayidx10.us = getelementptr inbounds i32*, i32** %B, i64 %indvars.iv45 + %0 = load i32*, i32** %arrayidx10.us, align 8, !tbaa !15 + br label %for.body6.us, !dbg !19 + +for.body6.us: ; preds = %for.cond4.preheader.us, %for.body6.us + %indvars.iv = phi i64 [ 0, %for.cond4.preheader.us ], [ %indvars.iv.next, %for.body6.us ] + %1 = load i32, i32* %arrayidx8.us, align 4, !dbg !20, !tbaa !21 + %mul.us = mul nsw i32 %1, %alpha, !dbg !23 + %arrayidx12.us = getelementptr inbounds i32, i32* %0, i64 %indvars.iv, !dbg !24 + %2 = load i32, i32* %arrayidx12.us, align 4, !dbg !24, !tbaa !21 + %mul13.us = mul nsw i32 %mul.us, %2, !dbg !25 + %arrayidx17.us = getelementptr inbounds i32, i32* %.pre53, i64 %indvars.iv, !dbg !26 + %3 = load i32, i32* %arrayidx17.us, align 4, !dbg !27, !tbaa !21 + %add.us = add nsw i32 %3, %mul13.us, !dbg !27 + store i32 %add.us, i32* %arrayidx17.us, align 4, !dbg !27, !tbaa !21 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1, !dbg !28 + %exitcond.not = icmp eq i64 %indvars.iv.next, %wide.trip.count, !dbg !29 + br i1 %exitcond.not, label %for.cond4.for.inc18_crit_edge.us, label %for.body6.us, !dbg !19, !llvm.loop !30 + +for.cond4.for.inc18_crit_edge.us: ; preds = %for.body6.us + %indvars.iv.next46 = add nuw nsw i64 %indvars.iv45, 1, !dbg !33 + %exitcond48.not = icmp eq i64 %indvars.iv.next46, %wide.trip.count47, !dbg !34 + br i1 %exitcond48.not, label %for.inc21, label %for.cond4.preheader.us, !dbg !14, !llvm.loop !35 + +for.inc21: ; preds = %for.cond4.for.inc18_crit_edge.us, %for.cond1.preheader + %indvars.iv.next50 = add nuw nsw i64 %indvars.iv49, 1, !dbg !37 + %exitcond52.not = icmp eq i64 %indvars.iv.next50, %wide.trip.count51, !dbg !12 + br i1 %exitcond52.not, label %for.end23, label %for.cond1.preheader, !dbg !13, !llvm.loop !38 + +for.end23: ; preds = %for.inc21, %entry + ret void, !dbg !40 +} + +attributes #0 = { nofree norecurse nounwind uwtable "disable-tail-calls"="false" "frame-pointer"="non-leaf" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="generic" "target-features"="+neon" "unsafe-fp-math"="false" "use-soft-float"="false" } + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!3, !4, !5, !6, !7, !8} +!llvm.ident = !{!9} + +!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "Huawei BiSheng Compiler clang version 12.0.0 (clang-a279e099a09a flang-9a86b70390a7)", isOptimized: true, runtimeVersion: 0, emissionKind: LineTablesOnly, enums: !2, splitDebugInlining: false, nameTableKind: None) +!1 = !DIFile(filename: "loop-nest.c", directory: "/home/m00629332/code/autoTuner") +!2 = !{} +!3 = !{i32 2, !"Debug Info Version", i32 3} +!4 = !{i32 1, !"wchar_size", i32 4} +!5 = !{i32 1, !"branch-target-enforcement", i32 0} +!6 = !{i32 1, !"sign-return-address", i32 0} +!7 = !{i32 1, !"sign-return-address-all", i32 0} +!8 = !{i32 1, !"sign-return-address-with-bkey", i32 0} +!9 = !{!"Huawei BiSheng Compiler clang version 12.0.0 (clang-a279e099a09a flang-9a86b70390a7)"} +!10 = distinct !DISubprogram(name: "loop_nest", scope: !1, file: !1, line: 1, type: !11, scopeLine: 5, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2) +!11 = !DISubroutineType(types: !2) +!12 = !DILocation(line: 8, column: 17, scope: !10) +!13 = !DILocation(line: 8, column: 3, scope: !10) +!14 = !DILocation(line: 9, column: 5, scope: !10) +!15 = !{!16, !16, i64 0} +!16 = !{!"any pointer", !17, i64 0} +!17 = !{!"omnipotent char", !18, i64 0} +!18 = !{!"Simple C/C++ TBAA"} +!19 = !DILocation(line: 10, column: 8, scope: !10) +!20 = !DILocation(line: 11, column: 23, scope: !10) +!21 = !{!22, !22, i64 0} +!22 = !{!"int", !17, i64 0} +!23 = !DILocation(line: 11, column: 21, scope: !10) +!24 = !DILocation(line: 11, column: 33, scope: !10) +!25 = !DILocation(line: 11, column: 31, scope: !10) +!26 = !DILocation(line: 11, column: 4, scope: !10) +!27 = !DILocation(line: 11, column: 12, scope: !10) +!28 = !DILocation(line: 10, column: 29, scope: !10) +!29 = !DILocation(line: 10, column: 22, scope: !10) +!30 = distinct !{!30, !19, !31, !32} +!31 = !DILocation(line: 11, column: 39, scope: !10) +!32 = !{!"llvm.loop.mustprogress"} +!33 = !DILocation(line: 9, column: 26, scope: !10) +!34 = !DILocation(line: 9, column: 19, scope: !10) +!35 = distinct !{!35, !14, !36, !32} +!36 = !DILocation(line: 12, column: 5, scope: !10) +!37 = !DILocation(line: 8, column: 24, scope: !10) +!38 = distinct !{!38, !13, !39, !32} +!39 = !DILocation(line: 13, column: 3, scope: !10) +!40 = !DILocation(line: 15, column: 1, scope: !10) + +; HASH_MATCHING_ENABLED: 1 +; HASH_MATCHING_DISABLED: 3 diff --git a/llvm/test/AutoTuning/LoopUnroll/loop_peel.ll b/llvm/test/AutoTuning/LoopUnroll/loop_peel.ll new file mode 100644 index 000000000000..f3839a49b20e --- /dev/null +++ b/llvm/test/AutoTuning/LoopUnroll/loop_peel.ll @@ -0,0 +1,53 @@ +; NOTE: This file is used to test when UnrollCount = 1 and when the compiler +; sees that Loop Peeling is beneficial and possible, then we do Loop Peeling. +; RUN: rm %t.unroll1.yaml -rf +; RUN: sed 's#\[number\]#1#g;' %S/Inputs/loop_peel.yaml > %t.unroll1.yaml +; RUN: opt %s -S -passes='require<opt-remark-emit>,loop(loop-unroll-full)' \ +; RUN: -auto-tuning-code-region-matching-hash=false \ +; RUN: -auto-tuning-input=%t.unroll1.yaml | FileCheck %s + +; RUN: rm %t.unroll0.yaml -rf +; RUN: sed 's#\[number\]#0#g;' %S/Inputs/loop_peel.yaml > %t.unroll0.yaml +; RUN: opt %s -S -passes='require<opt-remark-emit>,loop(loop-unroll-full)' \ +; RUN: -auto-tuning-code-region-matching-hash=false \ +; RUN: -auto-tuning-input=%t.unroll0.yaml | FileCheck %s --check-prefix=DISABLE + +; RUN: opt %s -S -passes='require<opt-remark-emit>,loop(loop-unroll-full)' \ +; RUN: -auto-tuning-code-region-matching-hash=false \ +; RUN: -auto-tuning-opp=%t.unroll_opp -auto-tuning-type-filter=Loop --disable-output +; RUN: FileCheck %s --input-file %t.unroll_opp/loop_peel.ll.yaml -check-prefix=TEST-1 + +define i32 @invariant_backedge_1(i32 %a, i32 %b) { +; CHECK-LABEL: @invariant_backedge_1 +; CHECK-NOT: %plus = phi +; CHECK: loop.peel: +; CHECK: loop: +; CHECK: %i = phi +; CHECK: %sum = phi +; DISABLE-LABEL: @invariant_backedge_1 +; DISABLE-NOT: loop.peel: +entry: + br label %loop + +loop: + %i = phi i32 [ 0, %entry ], [ %inc, %loop ] + %sum = phi i32 [ 0, %entry ], [ %incsum, %loop ] + %plus = phi i32 [ %a, %entry ], [ %b, %loop ] + + %incsum = add i32 %sum, %plus + %inc = add i32 %i, 1 + %cmp = icmp slt i32 %i, 1000 + + br i1 %cmp, label %loop, label %exit + +exit: + ret i32 %sum +} + +; Check for dynamic values when UnrollCount is set to 1: +; TEST-1: Pass: loop-unroll +; TEST-1-NEXT: Name: loop +; TEST-1-NEXT: Function: invariant_backedge_1 +; TEST-1-NEXT: CodeRegionType: loop +; TEST-1-NEXT: CodeRegionHash: {{[0-9]+}} +; TEST-1-NEXT: DynamicConfigs: { UnrollCount: [ 0, 1, 2 ] } diff --git a/llvm/test/AutoTuning/LoopUnroll/unroll-pragma.ll b/llvm/test/AutoTuning/LoopUnroll/unroll-pragma.ll new file mode 100644 index 000000000000..843b8e28f3d8 --- /dev/null +++ b/llvm/test/AutoTuning/LoopUnroll/unroll-pragma.ll @@ -0,0 +1,129 @@ +; RUN: rm %t.unroll_opp -rf +; RUN: opt %s -S -auto-tuning-opp=%t.unroll_opp -auto-tuning-type-filter=Loop \ +; RUN: -passes='require<opt-remark-emit>,loop(loop-unroll-full)' --disable-output +; RUN: FileCheck %s --input-file %t.unroll_opp/unroll-pragma.ll.yaml -check-prefix=TEST-1 +; RUN: FileCheck %s --input-file %t.unroll_opp/unroll-pragma.ll.yaml -check-prefix=TEST-2 + +; RUN: rm %t.unroll_opp -rf +; RUN: opt %s -S -auto-tuning-opp=%t.unroll_opp -auto-tuning-type-filter=Loop \ +; RUN: -passes='require<opt-remark-emit>,function(loop-unroll)' --disable-output +; RUN: FileCheck %s --input-file %t.unroll_opp/unroll-pragma.ll.yaml -check-prefix=TEST-1 +; RUN: FileCheck %s --input-file %t.unroll_opp/unroll-pragma.ll.yaml -check-prefix=TEST-2 + +; This function contains two loops. loop for.body is defined with a pragma +; unroll_count(4) and loop for.body9 is without a pragama. AutoTuner will only +; consider for.body9 as a tuning opportunity. + +; ModuleID = 'loop-unroll.c' +source_filename = "loop-unroll.c" +target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" +target triple = "aarch64-unknown-linux-gnu" + +; Function Attrs: nofree norecurse nounwind uwtable +define dso_local void @loop(i32* noalias nocapture readonly %a, i32* noalias nocapture readonly %b, i32* noalias nocapture %c, i32* noalias nocapture %d, i32 %len) local_unnamed_addr #0 !dbg !10 { +entry: + %cmp34 = icmp slt i32 0, %len, !dbg !12 + br i1 %cmp34, label %for.body.lr.ph, label %for.cond6.preheader, !dbg !13 + +for.body.lr.ph: ; preds = %entry + br label %for.body, !dbg !13 + +for.cond.for.cond6.preheader_crit_edge: ; preds = %for.body + br label %for.cond6.preheader, !dbg !13 + +for.cond6.preheader: ; preds = %for.cond.for.cond6.preheader_crit_edge, %entry + %cmp732 = icmp slt i32 0, %len, !dbg !14 + br i1 %cmp732, label %for.body9.lr.ph, label %for.cond.cleanup8, !dbg !15 + +for.body9.lr.ph: ; preds = %for.cond6.preheader + br label %for.body9, !dbg !15 + +for.body: ; preds = %for.body.lr.ph, %for.body + %i.035 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.body ] + %idxprom = zext i32 %i.035 to i64, !dbg !16 + %arrayidx = getelementptr inbounds i32, i32* %a, i64 %idxprom, !dbg !16 + %0 = load i32, i32* %arrayidx, align 4, !dbg !16, !tbaa !17 + %arrayidx2 = getelementptr inbounds i32, i32* %b, i64 %idxprom, !dbg !21 + %1 = load i32, i32* %arrayidx2, align 4, !dbg !21, !tbaa !17 + %add = add nsw i32 %1, %0, !dbg !22 + %arrayidx4 = getelementptr inbounds i32, i32* %c, i64 %idxprom, !dbg !23 + store i32 %add, i32* %arrayidx4, align 4, !dbg !24, !tbaa !17 + %inc = add nuw nsw i32 %i.035, 1, !dbg !25 + %cmp = icmp slt i32 %inc, %len, !dbg !12 + br i1 %cmp, label %for.body, label %for.cond.for.cond6.preheader_crit_edge, !dbg !13, !llvm.loop !26 + +for.cond6.for.cond.cleanup8_crit_edge: ; preds = %for.body9 + br label %for.cond.cleanup8, !dbg !15 + +for.cond.cleanup8: ; preds = %for.cond6.for.cond.cleanup8_crit_edge, %for.cond6.preheader + ret void, !dbg !30 + +for.body9: ; preds = %for.body9.lr.ph, %for.body9 + %i5.033 = phi i32 [ 0, %for.body9.lr.ph ], [ %inc17, %for.body9 ] + %idxprom10 = zext i32 %i5.033 to i64, !dbg !31 + %arrayidx11 = getelementptr inbounds i32, i32* %a, i64 %idxprom10, !dbg !31 + %2 = load i32, i32* %arrayidx11, align 4, !dbg !31, !tbaa !17 + %arrayidx13 = getelementptr inbounds i32, i32* %b, i64 %idxprom10, !dbg !32 + %3 = load i32, i32* %arrayidx13, align 4, !dbg !32, !tbaa !17 + %mul = mul nsw i32 %3, %2, !dbg !33 + %arrayidx15 = getelementptr inbounds i32, i32* %d, i64 %idxprom10, !dbg !34 + store i32 %mul, i32* %arrayidx15, align 4, !dbg !35, !tbaa !17 + %inc17 = add nuw nsw i32 %i5.033, 1, !dbg !36 + %cmp7 = icmp slt i32 %inc17, %len, !dbg !14 + br i1 %cmp7, label %for.body9, label %for.cond6.for.cond.cleanup8_crit_edge, !dbg !15, !llvm.loop !37 +} + +attributes #0 = { nofree norecurse nounwind uwtable "disable-tail-calls"="false" "frame-pointer"="non-leaf" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="generic" "target-features"="+neon" "unsafe-fp-math"="false" "use-soft-float"="false" } + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!3, !4, !5, !6, !7, !8} +!llvm.ident = !{!9} + +!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "Huawei Bisheng Compiler clang version 12.0.0 (0261bbf0b2fd)", isOptimized: true, runtimeVersion: 0, emissionKind: LineTablesOnly, enums: !2, splitDebugInlining: false, nameTableKind: None) +!1 = !DIFile(filename: "loop-unroll.c", directory: "/home/AutoTuner/") +!2 = !{} +!3 = !{i32 2, !"Debug Info Version", i32 3} +!4 = !{i32 1, !"wchar_size", i32 4} +!5 = !{i32 1, !"branch-target-enforcement", i32 0} +!6 = !{i32 1, !"sign-return-address", i32 0} +!7 = !{i32 1, !"sign-return-address-all", i32 0} +!8 = !{i32 1, !"sign-return-address-with-bkey", i32 0} +!9 = !{!"Huawei Bisheng Compiler clang version 12.0.0 (0261bbf0b2fd)"} +!10 = distinct !DISubprogram(name: "a", scope: !1, file: !1, line: 1, type: !11, scopeLine: 1, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2) +!11 = !DISubroutineType(types: !2) +!12 = !DILocation(line: 3, column: 20, scope: !10) +!13 = !DILocation(line: 3, column: 5, scope: !10) +!14 = !DILocation(line: 7, column: 20, scope: !10) +!15 = !DILocation(line: 7, column: 5, scope: !10) +!16 = !DILocation(line: 4, column: 16, scope: !10) +!17 = !{!18, !18, i64 0} +!18 = !{!"int", !19, i64 0} +!19 = !{!"omnipotent char", !20, i64 0} +!20 = !{!"Simple C/C++ TBAA"} +!21 = !DILocation(line: 4, column: 23, scope: !10) +!22 = !DILocation(line: 4, column: 21, scope: !10) +!23 = !DILocation(line: 4, column: 9, scope: !10) +!24 = !DILocation(line: 4, column: 14, scope: !10) +!25 = !DILocation(line: 3, column: 28, scope: !10) +!26 = distinct !{!26, !13, !27, !28, !29} +!27 = !DILocation(line: 5, column: 5, scope: !10) +!28 = !{!"llvm.loop.mustprogress"} +!29 = !{!"llvm.loop.unroll.count", i32 4} +!30 = !DILocation(line: 10, column: 1, scope: !10) +!31 = !DILocation(line: 8, column: 16, scope: !10) +!32 = !DILocation(line: 8, column: 23, scope: !10) +!33 = !DILocation(line: 8, column: 21, scope: !10) +!34 = !DILocation(line: 8, column: 9, scope: !10) +!35 = !DILocation(line: 8, column: 14, scope: !10) +!36 = !DILocation(line: 7, column: 28, scope: !10) +!37 = distinct !{!37, !15, !38, !28} +!38 = !DILocation(line: 9, column: 5, scope: !10) + + +; TEST-1: Pass: loop-unroll +; TEST-1-NOT: Pass: loop-unroll + +; TEST-2: Name: for.body9 +; TEST-2-NEXT: DebugLoc: { File: loop-unroll.c, Line: 7, Column: 5 } +; TEST-2-NEXT: Function: loop +; TEST-2-NEXT: CodeRegionType: loop diff --git a/llvm/test/AutoTuning/LoopUnroll/unroll.ll b/llvm/test/AutoTuning/LoopUnroll/unroll.ll new file mode 100644 index 000000000000..ba5c89fffaff --- /dev/null +++ b/llvm/test/AutoTuning/LoopUnroll/unroll.ll @@ -0,0 +1,101 @@ +; RUN: opt %s -S -passes=loop-unroll | FileCheck %s -check-prefix=DISABLE + +; RUN: rm %t.unroll0.yaml -rf +; RUN: sed 's#\[number\]#0#g; s#\[name\]#for.body#g; s#\[hash\]#14791762861362113823#g' \ +; RUN: %S/Inputs/unroll_template.yaml > %t.unroll0.yaml +; RUN: opt %s -S -passes=loop-unroll -auto-tuning-input=%t.unroll0.yaml \ +; RUN: -auto-tuning-code-region-matching-hash=false | \ +; RUN: FileCheck %s -check-prefix=UNROLL0 + +; RUN: rm %t.unroll0.yaml -rf +; RUN: sed 's#\[number\]#0#g; s#\[hash\]#14791762861362113823#g' \ +; RUN: %S/Inputs/unroll_template_no_metadata.yaml > %t.unroll0.yaml +; RUN: opt %s -S -passes=loop-unroll -auto-tuning-input=%t.unroll0.yaml \ +; RUN: -auto-tuning-omit-metadata | \ +; RUN: FileCheck %s -check-prefix=UNROLL0 + +; RUN: rm %t.result1 %t.unroll1.yaml -rf +; RUN: sed 's#\[number\]#1#g; s#\[name\]#for.body#g; s#\[hash\]#14791762861362113823#g' \ +; RUN: %S/Inputs/unroll_template.yaml > %t.unroll1.yaml +; RUN: opt %s -S -passes=loop-unroll -auto-tuning-input=%t.unroll1.yaml | \ +; RUN: FileCheck %s -check-prefix=UNROLL1 + +; RUN: rm %t.result1 %t.unroll1.yaml -rf +; RUN: sed 's#\[number\]#1#g; s#\[hash\]#14791762861362113823#g' \ +; RUN: %S/Inputs/unroll_template_no_metadata.yaml > %t.unroll1.yaml +; RUN: opt %s -S -passes=loop-unroll -auto-tuning-input=%t.unroll1.yaml \ +; RUN: -auto-tuning-omit-metadata | \ +; RUN: FileCheck %s -check-prefix=UNROLL1 + +; RUN: rm %t.result4 %t.unroll4.yaml -rf +; RUN: sed 's#\[number\]#4#g; s#\[name\]#for.body#g; s#\[hash\]#14791762861362113823#g' \ +; RUN: %S/Inputs/unroll_template.yaml > %t.unroll4.yaml +; RUN: opt %s -S -passes=loop-unroll -auto-tuning-input=%t.unroll4.yaml | \ +; RUN: FileCheck %s -check-prefix=UNROLL4 + +; RUN: rm %t.result4 %t.unroll4.yaml -rf +; RUN: sed 's#\[number\]#4#g; s#\[hash\]#14791762861362113823#g' \ +; RUN: %S/Inputs/unroll_template_no_metadata.yaml > %t.unroll4.yaml +; RUN: opt %s -S -passes=loop-unroll -auto-tuning-input=%t.unroll4.yaml \ +; RUN: -auto-tuning-omit-metadata | \ +; RUN: FileCheck %s -check-prefix=UNROLL4 + +; UNSUPPORTED: windows + +define void @foo(i32* nocapture %a) { +entry: + br label %for.body + +for.body: ; preds = %for.body, %entry + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv + %0 = load i32, i32* %arrayidx, align 4 + %inc = add nsw i32 %0, 1 + store i32 %inc, i32* %arrayidx, align 4 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp eq i64 %indvars.iv.next, 64 + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body + ret void +} + +; Auto-tuning-enabled loop unrolling - check that the loop is not unrolled when the auto-tuning feature is disabled +; +; DISABLE-LABEL: @foo( +; DISABLE: store i32 +; DISABLE-NOT: store i32 +; DISABLE: br i1 +; DISABLE-NOT: llvm.loop.unroll.disable + + +; Auto-tuning-enabled loop unrolling - check that the loop is not unrolled +; when unroll count explicitly set to be 0. +; +; UNROLL0-LABEL: @foo( +; UNROLL0: store i32 +; UNROLL0-NOT: store i32 +; UNROLL0: br i1 +; UNROLL0-NOT: llvm.loop.unroll.disable + + +; Auto-tuning-enabled loop unrolling - Requesting UnrollCount = 1 will perform +; Loop Peeling, and if Loop Peeling isn't possible/beneficial then Unroll Count +; is unchanged. +; +; UNROLL1-LABEL: @foo( +; UNROLL1: store i32 +; UNROLL1-NOT: store i32 +; UNROLL1: br i1 +; UNROLL1: llvm.loop.unroll.disable + +; Auto-tuning-enabled loop unrolling - check that we can unroll the loop by 4 +; when explicitly requested. +; +; UNROLL4-LABEL: @foo( +; UNROLL4: store i32 +; UNROLL4: store i32 +; UNROLL4: store i32 +; UNROLL4: store i32 +; UNROLL4: br i1 +; UNROLL4: llvm.loop.unroll.disable diff --git a/llvm/test/AutoTuning/LoopUnroll/unroll_raw.ll b/llvm/test/AutoTuning/LoopUnroll/unroll_raw.ll new file mode 100644 index 000000000000..480ccad640ae --- /dev/null +++ b/llvm/test/AutoTuning/LoopUnroll/unroll_raw.ll @@ -0,0 +1,113 @@ +; Test loop unrolling using auto-tuning YAML api with IRs generated when ASSERTION=OFF +; The IRs generated when ASSERTION=OFF usually only use slot numbers as variable names. + +; RUN: opt %s -S -passes='require<opt-remark-emit>,loop(loop-unroll-full)' | \ +; RUN: FileCheck %s -check-prefix=DISABLE + +; RUN: rm %t.result1_raw %t.unroll1_raw.yaml -rf +; RUN: sed 's#\[number\]#1#g; s#\[hash\]#18159364858606519094#g' \ +; RUN: %S/Inputs/unroll_raw_template.yaml > %t.unroll1_raw.yaml +; RUN: opt %s -S -passes='require<opt-remark-emit>,function(loop-unroll)' \ +; RUN: -auto-tuning-input=%t.unroll1_raw.yaml | FileCheck %s -check-prefix=UNROLL1 + +; RUN: rm %t.result2_raw %t.unroll2_raw.yaml -rf +; RUN: sed 's#\[number\]#2#g; s#\[hash\]#18159364858606519094#g' \ +; RUN: %S/Inputs/unroll_raw_template.yaml > %t.unroll2_raw.yaml +; RUN: opt %s -S -passes='require<opt-remark-emit>,function(loop-unroll)' \ +; RUN: -auto-tuning-input=%t.unroll2_raw.yaml | FileCheck %s -check-prefix=UNROLL2 + +; RUN: rm %t.result4_raw %t.unroll4_raw.yaml -rf +; RUN: sed 's#\[number\]#4#g; s#\[hash\]#18159364858606519094#g' \ +; RUN: %S/Inputs/unroll_raw_template.yaml > %t.unroll4_raw.yaml +; RUN: opt %s -S -passes='require<opt-remark-emit>,function(loop-unroll)' \ +; RUN: -auto-tuning-input=%t.unroll4_raw.yaml | FileCheck %s -check-prefix=UNROLL4 + +; UNSUPPORTED: windows + +; ModuleID = 't.ll' +source_filename = "t.ll" + +@.str = private unnamed_addr constant [4 x i8] c"%d\0A\00", align 1 + +define void @test(i32*) { + %2 = alloca i32*, align 8 + store i32* %0, i32** %2, align 8 + %3 = load i32*, i32** %2, align 8 + %4 = load i32, i32* %3, align 4 + %5 = add nsw i32 %4, 2 + %6 = load i32*, i32** %2, align 8 + store i32 %5, i32* %6, align 4 + ret void +} + +define i32 @main() { + %1 = alloca i32, align 4 + %2 = alloca i32, align 4 + store i32 0, i32* %1, align 4 + store i32 8, i32* %2, align 4 + %3 = load i32, i32* %2, align 4 + %4 = icmp sle i32 %3, 88 + br i1 %4, label %.lr.ph, label %13 + +.lr.ph: ; preds = %0 + br label %5 + +; <label>:5: ; preds = %.lr.ph, %8 + call void @test(i32* %2) + %6 = load i32, i32* %2, align 4 + %7 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), i32 %6) + br label %8 + +; <label>:8: ; preds = %5 + %9 = load i32, i32* %2, align 4 + %10 = add nsw i32 %9, 8 + store i32 %10, i32* %2, align 4 + %11 = load i32, i32* %2, align 4 + %12 = icmp sle i32 %11, 88 + br i1 %12, label %5, label %._crit_edge + +._crit_edge: ; preds = %8 + br label %13 + +; <label>:13: ; preds = %._crit_edge, %0 + %14 = load i32, i32* %1, align 4 + ret i32 %14 +} + +declare i32 @printf(i8*, ...) + + +; Auto-tuning-enabled loop unrolling - check that the loop is not unrolled when the auto-tuning feature is disabled +; +; DISABLE-LABEL: @main( +; DISABLE: call void @test(ptr %2) +; DISABLE-NOT: call void @test(ptr %2) +; DISABLE-NOT: llvm.loop.unroll.disable + + +; Auto-tuning-enabled loop unrolling - check that we can unroll the loop by 1 +; when explicitly requested. +; +; UNROLL1-LABEL: @main( +; UNROLL1: call void @test(ptr %2) +; UNROLL1-NOT: call void @test(ptr %2) + +; Auto-tuning-enabled loop unrolling - check that we can unroll the loop by 2 +; when explicitly requested. +; +; UNROLL2-LABEL: @main( +; UNROLL2: call void @test(ptr %2) +; UNROLL2: call void @test(ptr %2) +; UNROLL2-NOT: call void @test(ptr %2) +; UNROLL2: llvm.loop.unroll.disable + + +; Auto-tuning-enabled loop unrolling - check that we can unroll the loop by 4 +; when explicitly requested. +; +; UNROLL4-LABEL: @main( +; UNROLL4: call void @test(ptr %2) +; UNROLL4: call void @test(ptr %2) +; UNROLL4: call void @test(ptr %2) +; UNROLL4: call void @test(ptr %2) +; UNROLL4: llvm.loop.unroll.disable diff --git a/llvm/test/AutoTuning/LoopVectorize/Inputs/vectorize_template.yaml b/llvm/test/AutoTuning/LoopVectorize/Inputs/vectorize_template.yaml new file mode 100644 index 000000000000..b65fddf4e23f --- /dev/null +++ b/llvm/test/AutoTuning/LoopVectorize/Inputs/vectorize_template.yaml @@ -0,0 +1,9 @@ +--- !AutoTuning +Pass: loop-vectorize +Name: bb4 +Function: TestFoo +CodeRegionType: loop +CodeRegionHash: 14229620333597121971 +Args: +- VectorizationInterleave: [number] +... diff --git a/llvm/test/AutoTuning/LoopVectorize/Inputs/vectorize_template_no_metadata.yaml b/llvm/test/AutoTuning/LoopVectorize/Inputs/vectorize_template_no_metadata.yaml new file mode 100644 index 000000000000..87d2fc2587cb --- /dev/null +++ b/llvm/test/AutoTuning/LoopVectorize/Inputs/vectorize_template_no_metadata.yaml @@ -0,0 +1,7 @@ +--- !AutoTuning +Pass: loop-vectorize +CodeRegionType: loop +CodeRegionHash: 14229620333597121971 +Args: +- VectorizationInterleave: [number] +... diff --git a/llvm/test/AutoTuning/LoopVectorize/force-vector-interleave.ll b/llvm/test/AutoTuning/LoopVectorize/force-vector-interleave.ll new file mode 100644 index 000000000000..a1652babd8f4 --- /dev/null +++ b/llvm/test/AutoTuning/LoopVectorize/force-vector-interleave.ll @@ -0,0 +1,88 @@ +; RUN: rm %t.1 %t.2 %t.1.yaml -rf +; RUN: opt %s -passes=loop-vectorize -force-vector-interleave=1 -S -o %t.1 +; RUN: sed 's#\[number\]#1#g' %S/Inputs/vectorize_template.yaml > %t.1.yaml +; RUN: opt %s -passes=loop-vectorize -auto-tuning-input=%t.1.yaml \ +; RUN: -S -o %t.2 -debug-only=autotuning 2>&1 | \ +; RUN: FileCheck %s -check-prefix=NUMBER1 +; RUN: diff %t.1 %t.2 + +; RUN: rm %t.1 %t.2 %t.1.yaml -rf +; RUN: opt %s -passes=loop-vectorize -force-vector-interleave=1 -S -o %t.1 +; RUN: sed 's#\[number\]#1#g' %S/Inputs/vectorize_template_no_metadata.yaml > %t.1.yaml +; RUN: opt %s -passes=loop-vectorize -auto-tuning-input=%t.1.yaml \ +; RUN: -auto-tuning-omit-metadata -S -o %t.2 -debug-only=autotuning 2>&1 | \ +; RUN: FileCheck %s -check-prefix=NUMBER1 +; RUN: diff %t.1 %t.2 + +; RUN: rm %t.3 %t.4 %t.2.yaml -rf +; RUN: opt %s -passes=loop-vectorize -force-vector-interleave=2 -S -o %t.3 +; RUN: sed 's#\[number\]#2#g' %S/Inputs/vectorize_template.yaml > %t.2.yaml +; RUN: opt %s -passes=loop-vectorize -auto-tuning-input=%t.2.yaml \ +; RUN: -S -o %t.4 -debug-only=autotuning 2>&1 | \ +; RUN: FileCheck %s -check-prefix=NUMBER2 +; RUN: diff %t.3 %t.4 + +; RUN: rm %t.3 %t.4 %t.2.yaml -rf +; RUN: opt %s -passes=loop-vectorize -force-vector-interleave=2 -S -o %t.3 +; RUN: sed 's#\[number\]#2#g' %S/Inputs/vectorize_template_no_metadata.yaml > %t.2.yaml +; RUN: opt %s -passes=loop-vectorize -auto-tuning-input=%t.2.yaml \ +; RUN: -auto-tuning-omit-metadata -S -o %t.4 -debug-only=autotuning 2>&1 | \ +; RUN: FileCheck %s -check-prefix=NUMBER2 +; RUN: diff %t.3 %t.4 + +; Compiler should not generate tuning opportunities for AutoTuner if -force-vector-interleave is specified. +; RUN: rm %t.interleave_opp -rf +; RUN: opt %s -S -passes=loop-vectorize -auto-tuning-opp=%t.interleave_opp \ +; RUN: -force-vector-interleave=2 --disable-output +; RUN: FileCheck %s --input-file %t.interleave_opp/force-vector-interleave.ll.yaml \ +; RUN: -check-prefix=FORCE-INTERLEAVE + +; RUN: rm %t.interleave_opp -rf +; RUN: opt %s -S -passes=loop-vectorize -auto-tuning-opp=%t.interleave_opp \ +; RUN: -force-vector-interleave=0 --disable-output +; RUN: FileCheck %s --input-file %t.interleave_opp/force-vector-interleave.ll.yaml \ +; RUN: -check-prefix=FORCE-INTERLEAVE + +; RUN: rm %t.interleave_opp -rf +; RUN: opt %s -S -passes=loop-vectorize -auto-tuning-opp=%t.interleave_opp --disable-output +; RUN: FileCheck %s --input-file %t.interleave_opp/force-vector-interleave.ll.yaml \ +; RUN: -check-prefix=NO-FORCE-INTERLEAVE + +; REQUIRES: asserts +; UNSUPPORTED: windows +target datalayout = "e-m:e-i64:64-n32:64" +target triple = "powerpc64le-unknown-linux-gnu" + +define void @TestFoo(i1 %X, i1 %Y) { +bb: + br label %.loopexit5.outer + +.loopexit5.outer: + br label %.lr.ph12 + +.loopexit: + br i1 %X, label %.loopexit5.outer, label %.lr.ph12 + +.lr.ph12: + %f.110 = phi i32* [ %tmp1, %.loopexit ], [ null, %.loopexit5.outer ] + %tmp1 = getelementptr inbounds i32, i32* %f.110, i64 -2 + br i1 %Y, label %bb4, label %.loopexit + +bb4: + %j.27 = phi i32 [ 0, %.lr.ph12 ], [ %tmp7, %bb4 ] + %tmp5 = load i32, i32* %f.110, align 4 + %tmp7 = add nsw i32 %j.27, 1 + %exitcond = icmp eq i32 %tmp7, 0 + br i1 %exitcond, label %.loopexit, label %bb4 +} + +; NUMBER1: VectorizationInterleave is set for the CodeRegion: +; NUMBER1: Name: bb4 +; NUMBER1: FuncName: TestFoo +; NUMBER2: VectorizationInterleave is set for the CodeRegion: +; NUMBER2: Name: bb4 +; NUMBER2: FuncName: TestFoo + +; FORCE-INTERLEAVE-NOT: Pass: loop-vectorize +; NO-FORCE-INTERLEAVE: Pass: loop-vectorize +; NO-FORCE-INTERLEAVE: BaselineConfig: { VectorizationInterleave: diff --git a/llvm/test/AutoTuning/MachineScheduler/Inputs/misched_x86_template.yaml b/llvm/test/AutoTuning/MachineScheduler/Inputs/misched_x86_template.yaml new file mode 100644 index 000000000000..34ea66e45a0a --- /dev/null +++ b/llvm/test/AutoTuning/MachineScheduler/Inputs/misched_x86_template.yaml @@ -0,0 +1,10 @@ +--- !AutoTuning +Pass: machine-scheduler +Name: '%bb.1:for.cond.preheader' +Function: _preextrapolate_helper +CodeRegionType: machine_basic_block +CodeRegionHash: 17389215691512956355 +Args: +- ForceBottomUp: [bool1] +- ForceTopDown: [bool2] +... diff --git a/llvm/test/AutoTuning/MachineScheduler/misched_x86_bidirectional.ll b/llvm/test/AutoTuning/MachineScheduler/misched_x86_bidirectional.ll new file mode 100644 index 000000000000..aa4781dad204 --- /dev/null +++ b/llvm/test/AutoTuning/MachineScheduler/misched_x86_bidirectional.ll @@ -0,0 +1,73 @@ +; RUN: rm %t.bidirectional_result %t.misched_x86_bidirectional.yaml -rf +; RUN: sed ' s#\[bool1\]#false#g; s#\[bool2\]#false#g' %S/Inputs/misched_x86_template.yaml > %t.misched_x86_bidirectional.yaml +; RUN: llc -o - %s -march=x86-64 -mcpu=core2 -x86-early-ifcvt -enable-misched \ +; RUN: -auto-tuning-input=%t.misched_x86_bidirectional.yaml\ +; RUN: -verify-machineinstrs -debug-only=machine-scheduler 2>&1 \ +; RUN: | FileCheck %s + +; RUN: llc -o - %s -march=x86-64 -mcpu=core2 -x86-early-ifcvt -enable-misched \ +; RUN: -auto-tuning-input=%t.misched_x86_bidirectional.yaml\ +; RUN: -verify-machineinstrs -misched-topdown -debug-only=machine-scheduler 2>&1 \ +; RUN: | FileCheck %s -check-prefix=MIX-WITH-FLAG-TOPDOWN + +; RUN: llc -o - %s -march=x86-64 -mcpu=core2 -x86-early-ifcvt -enable-misched \ +; RUN: -auto-tuning-input=%t.misched_x86_bidirectional.yaml\ +; RUN: -verify-machineinstrs -misched-bottomup -debug-only=machine-scheduler 2>&1 \ +; RUN: | FileCheck %s -check-prefix=MIX-WITH-FLAG-BOTTOMUP + +; RUN: llc -o - %s -march=x86-64 -mcpu=core2 -x86-early-ifcvt -enable-misched \ +; RUN: -auto-tuning-input=%t.misched_x86_bidirectional.yaml\ +; RUN: -verify-machineinstrs -misched-bottomup=false -misched-topdown=false -debug-only=machine-scheduler 2>&1 \ +; RUN: | FileCheck %s -check-prefix=MIX-WITH-FLAG-BIDIRECTIONAL + +; REQUIRES: asserts +; UNSUPPORTED: windows +; +; Interesting MachineScheduler cases. + +declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind + +define fastcc void @_preextrapolate_helper() nounwind uwtable ssp { +entry: + br i1 undef, label %for.cond.preheader, label %if.end + +for.cond.preheader: ; preds = %entry + call void @llvm.memcpy.p0i8.p0i8.i64(i8* undef, i8* null, i64 128, i32 4, i1 false) nounwind + unreachable + +if.end: ; preds = %entry + ret void +} + +; check if the scheduling policy defined with xml is applied +; +; CHECK: _preextrapolate_helper:%bb.1 for.cond.preheader +; CHECK: ScheduleDAGMILive::schedule starting +; CHECK-NEXT: OnlyTopDown=0 OnlyBottomUp=0 + + + +; check if the scheduling policies defined with xml and '-misched-topdown' are applied +; MIX-WITH-FLAG-TOPDOWN: _preextrapolate_helper:%bb.0 entry +; MIX-WITH-FLAG-TOPDOWN: ScheduleDAGMILive::schedule starting +; MIX-WITH-FLAG-TOPDOWN-NEXT: OnlyTopDown=1 OnlyBottomUp=0 +; MIX-WITH-FLAG-TOPDOWN: _preextrapolate_helper:%bb.1 for.cond.preheader +; MIX-WITH-FLAG-TOPDOWN: ScheduleDAGMILive::schedule starting +; MIX-WITH-FLAG-TOPDOWN-NEXT: OnlyTopDown=0 OnlyBottomUp=0 + +; check if the scheduling policies defined with xml and '-misched-bottomup' are applied +; MIX-WITH-FLAG-BOTTOMUP: _preextrapolate_helper:%bb.0 entry +; MIX-WITH-FLAG-BOTTOMUP: ScheduleDAGMILive::schedule starting +; MIX-WITH-FLAG-BOTTOMUP-NEXT: OnlyTopDown=0 OnlyBottomUp=1 +; MIX-WITH-FLAG-BOTTOMUP: _preextrapolate_helper:%bb.1 for.cond.preheader +; MIX-WITH-FLAG-BOTTOMUP: ScheduleDAGMILive::schedule starting +; MIX-WITH-FLAG-BOTTOMUP-NEXT: OnlyTopDown=0 OnlyBottomUp=0 + +; check if the scheduling policies defined with xml and '-misched-topdown=false' and '-misched-bottomup=false' +; are applied +; MIX-WITH-FLAG-BIDIRECTIONAL: _preextrapolate_helper:%bb.0 entry +; MIX-WITH-FLAG-BIDIRECTIONAL: ScheduleDAGMILive::schedule starting +; MIX-WITH-FLAG-BIDIRECTIONAL-NEXT: OnlyTopDown=0 OnlyBottomUp=0 +; MIX-WITH-FLAG-BIDIRECTIONAL: _preextrapolate_helper:%bb.1 for.cond.preheader +; MIX-WITH-FLAG-BIDIRECTIONAL: ScheduleDAGMILive::schedule starting +; MIX-WITH-FLAG-BIDIRECTIONAL-NEXT: OnlyTopDown=0 OnlyBottomUp=0 diff --git a/llvm/test/AutoTuning/MachineScheduler/misched_x86_bottomup.ll b/llvm/test/AutoTuning/MachineScheduler/misched_x86_bottomup.ll new file mode 100644 index 000000000000..c1d6894c3fe2 --- /dev/null +++ b/llvm/test/AutoTuning/MachineScheduler/misched_x86_bottomup.ll @@ -0,0 +1,72 @@ +; RUN: rm %t.bottomup_result %t.misched_x86_bottomup.yaml -rf +; RUN: sed ' s#\[bool1\]#true#g; s#\[bool2\]#false#g' %S/Inputs/misched_x86_template.yaml > %t.misched_x86_bottomup.yaml +; RUN: llc -o - %s -march=x86-64 -mcpu=core2 -x86-early-ifcvt -enable-misched \ +; RUN: -auto-tuning-input=%t.misched_x86_bottomup.yaml\ +; RUN: -verify-machineinstrs -debug-only=machine-scheduler 2>&1\ +; RUN: | FileCheck %s + +; RUN: llc -o - %s -march=x86-64 -mcpu=core2 -x86-early-ifcvt -enable-misched \ +; RUN: -auto-tuning-input=%t.misched_x86_bottomup.yaml\ +; RUN: -verify-machineinstrs -misched-topdown -debug-only=machine-scheduler 2>&1 \ +; RUN: | FileCheck %s -check-prefix=MIX-WITH-FLAG-TOPDOWN + +; RUN: llc -o - %s -march=x86-64 -mcpu=core2 -x86-early-ifcvt -enable-misched \ +; RUN: -auto-tuning-input=%t.misched_x86_bottomup.yaml\ +; RUN: -verify-machineinstrs -misched-bottomup -debug-only=machine-scheduler 2>&1 \ +; RUN: | FileCheck %s -check-prefix=MIX-WITH-FLAG-BOTTOMUP + +; RUN: llc -o - %s -march=x86-64 -mcpu=core2 -x86-early-ifcvt -enable-misched \ +; RUN: -auto-tuning-input=%t.misched_x86_bottomup.yaml\ +; RUN: -verify-machineinstrs -misched-bottomup=false -misched-topdown=false -debug-only=machine-scheduler 2>&1 \ +; RUN: | FileCheck %s -check-prefix=MIX-WITH-FLAG-BIDIRECTIONAL + +; REQUIRES: asserts +; UNSUPPORTED: windows +; +; Interesting MachineScheduler cases. + +declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind + +define fastcc void @_preextrapolate_helper() nounwind uwtable ssp { +entry: + br i1 undef, label %for.cond.preheader, label %if.end + +for.cond.preheader: ; preds = %entry + call void @llvm.memcpy.p0i8.p0i8.i64(i8* undef, i8* null, i64 128, i32 4, i1 false) nounwind + unreachable + +if.end: ; preds = %entry + ret void +} + +; check if the scheduling policy defined with xml is applied +; +; CHECK: _preextrapolate_helper:%bb.1 for.cond.preheader +; CHECK: ScheduleDAGMILive::schedule starting +; CHECK-NEXT: RegionPolicy: ShouldTrackPressure=0 OnlyTopDown=0 OnlyBottomUp=1 + + +; check if the scheduling policies defined with xml and '-misched-topdown' are applied +; MIX-WITH-FLAG-TOPDOWN: _preextrapolate_helper:%bb.0 entry +; MIX-WITH-FLAG-TOPDOWN: ScheduleDAGMILive::schedule starting +; MIX-WITH-FLAG-TOPDOWN-NEXT: OnlyTopDown=1 OnlyBottomUp=0 +; MIX-WITH-FLAG-TOPDOWN: _preextrapolate_helper:%bb.1 for.cond.preheader +; MIX-WITH-FLAG-TOPDOWN: ScheduleDAGMILive::schedule starting +; MIX-WITH-FLAG-TOPDOWN-NEXT: OnlyTopDown=0 OnlyBottomUp=1 + +; check if the scheduling policies defined with xml and '-misched-bottomup' are applied +; MIX-WITH-FLAG-BOTTOMUP: _preextrapolate_helper:%bb.0 entry +; MIX-WITH-FLAG-BOTTOMUP: ScheduleDAGMILive::schedule starting +; MIX-WITH-FLAG-BOTTOMUP-NEXT: OnlyTopDown=0 OnlyBottomUp=1 +; MIX-WITH-FLAG-BOTTOMUP: _preextrapolate_helper:%bb.1 for.cond.preheader +; MIX-WITH-FLAG-BOTTOMUP: ScheduleDAGMILive::schedule starting +; MIX-WITH-FLAG-BOTTOMUP-NEXT: OnlyTopDown=0 OnlyBottomUp=1 + +; check if the scheduling policies defined with YAML and '-misched-topdown=false' and '-misched-bottomup=false' +; are applied +; MIX-WITH-FLAG-BIDIRECTIONAL: _preextrapolate_helper:%bb.0 entry +; MIX-WITH-FLAG-BIDIRECTIONAL: ScheduleDAGMILive::schedule starting +; MIX-WITH-FLAG-BIDIRECTIONAL-NEXT: RegionPolicy: ShouldTrackPressure=0 OnlyTopDown=0 OnlyBottomUp=0 +; MIX-WITH-FLAG-BIDIRECTIONAL: _preextrapolate_helper:%bb.1 for.cond.preheader +; MIX-WITH-FLAG-BIDIRECTIONAL: ScheduleDAGMILive::schedule starting +; MIX-WITH-FLAG-BIDIRECTIONAL-NEXT: OnlyTopDown=0 OnlyBottomUp=1 diff --git a/llvm/test/AutoTuning/MachineScheduler/misched_x86_topdown.ll b/llvm/test/AutoTuning/MachineScheduler/misched_x86_topdown.ll new file mode 100644 index 000000000000..53c527e87e41 --- /dev/null +++ b/llvm/test/AutoTuning/MachineScheduler/misched_x86_topdown.ll @@ -0,0 +1,72 @@ +; RUN: rm %t.topdown_result %t.misched_x86_topdown.yaml -rf +; RUN: sed 's#\[bool1\]#false#g; s#\[bool2\]#true#g' %S/Inputs/misched_x86_template.yaml > %t.misched_x86_topdown.yaml +; RUN: llc -o - %s -march=x86-64 -mcpu=core2 -x86-early-ifcvt -enable-misched \ +; RUN: -auto-tuning-input=%t.misched_x86_topdown.yaml\ +; RUN: -verify-machineinstrs -debug-only=machine-scheduler 2>&1\ +; RUN: | FileCheck %s + +; RUN: llc -o - %s -march=x86-64 -mcpu=core2 -x86-early-ifcvt -enable-misched \ +; RUN: -auto-tuning-input=%t.misched_x86_topdown.yaml\ +; RUN: -verify-machineinstrs -misched-topdown -debug-only=machine-scheduler 2>&1 \ +; RUN: | FileCheck %s -check-prefix=MIX-WITH-FLAG-TOPDOWN + +; RUN: llc -o - %s -march=x86-64 -mcpu=core2 -x86-early-ifcvt -enable-misched \ +; RUN: -auto-tuning-input=%t.misched_x86_topdown.yaml\ +; RUN: -verify-machineinstrs -misched-bottomup -debug-only=machine-scheduler 2>&1 \ +; RUN: | FileCheck %s -check-prefix=MIX-WITH-FLAG-BOTTOMUP + +; RUN: llc -o - %s -march=x86-64 -mcpu=core2 -x86-early-ifcvt -enable-misched \ +; RUN: -auto-tuning-input=%t.misched_x86_topdown.yaml\ +; RUN: -verify-machineinstrs -misched-bottomup=false -misched-topdown=false -debug-only=machine-scheduler 2>&1 \ +; RUN: | FileCheck %s -check-prefix=MIX-WITH-FLAG-BIDIRECTIONAL + +; REQUIRES: asserts +; UNSUPPORTED: windows +; +; Interesting MachineScheduler cases. + +declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind + +define fastcc void @_preextrapolate_helper() nounwind uwtable ssp { +entry: + br i1 undef, label %for.cond.preheader, label %if.end + +for.cond.preheader: ; preds = %entry + call void @llvm.memcpy.p0i8.p0i8.i64(i8* undef, i8* null, i64 128, i32 4, i1 false) nounwind + unreachable + +if.end: ; preds = %entry + ret void +} + +; check if the scheduling policy defined with xml is applied +; +; CHECK: _preextrapolate_helper:%bb.1 for.cond.preheader +; CHECK: ScheduleDAGMILive::schedule starting +; CHECK-NEXT: OnlyTopDown=1 OnlyBottomUp=0 + + +; check if the scheduling policies defined with xml and '-misched-topdown' are applied +; MIX-WITH-FLAG-TOPDOWN: _preextrapolate_helper:%bb.0 entry +; MIX-WITH-FLAG-TOPDOWN: ScheduleDAGMILive::schedule starting +; MIX-WITH-FLAG-TOPDOWN-NEXT: OnlyTopDown=1 OnlyBottomUp=0 +; MIX-WITH-FLAG-TOPDOWN: _preextrapolate_helper:%bb.1 for.cond.preheader +; MIX-WITH-FLAG-TOPDOWN: ScheduleDAGMILive::schedule starting +; MIX-WITH-FLAG-TOPDOWN-NEXT: OnlyTopDown=1 OnlyBottomUp=0 + +; check if the scheduling policies defined with xml and '-misched-bottomup' are applied +; MIX-WITH-FLAG-BOTTOMUP: _preextrapolate_helper:%bb.0 entry +; MIX-WITH-FLAG-BOTTOMUP: ScheduleDAGMILive::schedule starting +; MIX-WITH-FLAG-BOTTOMUP-NEXT: OnlyTopDown=0 OnlyBottomUp=1 +; MIX-WITH-FLAG-BOTTOMUP: _preextrapolate_helper:%bb.1 for.cond.preheader +; MIX-WITH-FLAG-BOTTOMUP: ScheduleDAGMILive::schedule starting +; MIX-WITH-FLAG-BOTTOMUP-NEXT: OnlyTopDown=1 OnlyBottomUp=0 + +; check if the scheduling policies defined with xml and '-misched-topdown=false' and '-misched-bottomup=false' +; are applied +; MIX-WITH-FLAG-BIDIRECTIONAL: _preextrapolate_helper:%bb.0 entry +; MIX-WITH-FLAG-BIDIRECTIONAL: ScheduleDAGMILive::schedule starting +; MIX-WITH-FLAG-BIDIRECTIONAL-NEXT: OnlyTopDown=0 OnlyBottomUp=0 +; MIX-WITH-FLAG-BIDIRECTIONAL: _preextrapolate_helper:%bb.1 for.cond.preheader +; MIX-WITH-FLAG-BIDIRECTIONAL: ScheduleDAGMILive::schedule starting +; MIX-WITH-FLAG-BIDIRECTIONAL-NEXT: OnlyTopDown=1 OnlyBottomUp=0 diff --git a/llvm/test/AutoTuning/MetaData/structural_hash.ll b/llvm/test/AutoTuning/MetaData/structural_hash.ll new file mode 100644 index 000000000000..2d8adca910bc --- /dev/null +++ b/llvm/test/AutoTuning/MetaData/structural_hash.ll @@ -0,0 +1,234 @@ +; RUN: rm %t.hash_opp -rf +; RUN: opt %s -S -passes='function(require<opt-remark-emit>,loop-unroll),cgscc(inline)' \ +; RUN: -auto-tuning-opp=%t.hash_opp -auto-tuning-type-filter=CallSite --disable-output +; RUN: FileCheck %s --input-file %t.hash_opp/structural_hash.ll.yaml -check-prefix=META-CALL1 +; RUN: FileCheck %s --input-file %t.hash_opp/structural_hash.ll.yaml -check-prefix=META-CALL2 +; RUN: FileCheck %s --input-file %t.hash_opp/structural_hash.ll.yaml -check-prefix=META-CALL3 + +; RUN: rm %t.hash_opp -rf +; RUN: opt %s -S -passes='function(require<opt-remark-emit>,loop-unroll),cgscc(inline)' \ +; RUN: -auto-tuning-type-filter=CallSite -auto-tuning-opp=%t.hash_opp \ +; RUN: -auto-tuning-omit-metadata --disable-output +; RUN: FileCheck %s --input-file %t.hash_opp/structural_hash.ll.yaml -check-prefix=NO-META-CALL + +; UNSUPPORTED: windows + +; ModuleID = 'loop_small.cpp' +source_filename = "loop_small.cpp" +target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" +target triple = "aarch64-unknown-linux-gnu" + +@arr = dso_local global [1000000 x i32] zeroinitializer, align 4, !dbg !0 + +; Function Attrs: nounwind uwtable mustprogress +define dso_local void @_Z1fv() #0 !dbg !18 { +entry: + %i = alloca i32, align 4 + call void @llvm.dbg.declare(metadata i32* %i, metadata !21, metadata !DIExpression()), !dbg !23 + store i32 0, i32* %i, align 4, !dbg !23 + br label %for.cond, !dbg !24 + +for.cond: ; preds = %for.inc, %entry + %0 = load i32, i32* %i, align 4, !dbg !25 + %cmp = icmp slt i32 %0, 2000, !dbg !27 + br i1 %cmp, label %for.body, label %for.end, !dbg !28 + +for.body: ; preds = %for.cond + %1 = load i32, i32* %i, align 4, !dbg !29 + %idxprom = sext i32 %1 to i64, !dbg !31 + %arrayidx = getelementptr inbounds [1000000 x i32], [1000000 x i32]* @arr, i64 0, i64 %idxprom, !dbg !31 + %2 = load i32, i32* %arrayidx, align 4, !dbg !32 + %add = add nsw i32 %2, 2, !dbg !32 + store i32 %add, i32* %arrayidx, align 4, !dbg !32 + br label %for.inc, !dbg !33 + +for.inc: ; preds = %for.body + %3 = load i32, i32* %i, align 4, !dbg !34 + %inc = add nsw i32 %3, 1, !dbg !34 + store i32 %inc, i32* %i, align 4, !dbg !34 + br label %for.cond, !dbg !35, !llvm.loop !36 + +for.end: ; preds = %for.cond + ret void, !dbg !39 +} + +; Function Attrs: nofree nosync nounwind readnone speculatable willreturn +declare void @llvm.dbg.declare(metadata, metadata, metadata) #1 + +; Function Attrs: nounwind uwtable mustprogress +define dso_local void @_Z1gv() #0 !dbg !40 { +entry: + %0 = load i32, i32* getelementptr inbounds ([1000000 x i32], [1000000 x i32]* @arr, i64 0, i64 0), align 4, !dbg !41 + %inc = add nsw i32 %0, 1, !dbg !41 + store i32 %inc, i32* getelementptr inbounds ([1000000 x i32], [1000000 x i32]* @arr, i64 0, i64 0), align 4, !dbg !41 + ret void, !dbg !42 +} + +; Function Attrs: norecurse nounwind uwtable mustprogress +define dso_local i32 @main() #2 !dbg !43 { +entry: + %retval = alloca i32, align 4 + %i = alloca i32, align 4 + store i32 0, i32* %retval, align 4 + call void @llvm.dbg.declare(metadata i32* %i, metadata !46, metadata !DIExpression()), !dbg !48 + store i32 0, i32* %i, align 4, !dbg !48 + br label %for.cond, !dbg !49 + +for.cond: ; preds = %for.inc, %entry + %0 = load i32, i32* %i, align 4, !dbg !50 + %cmp = icmp slt i32 %0, 1000000, !dbg !52 + br i1 %cmp, label %for.body, label %for.end, !dbg !53 + +for.body: ; preds = %for.cond + %1 = load i32, i32* %i, align 4, !dbg !54 + %idxprom = sext i32 %1 to i64, !dbg !55 + %arrayidx = getelementptr inbounds [1000000 x i32], [1000000 x i32]* @arr, i64 0, i64 %idxprom, !dbg !55 + store i32 0, i32* %arrayidx, align 4, !dbg !56 + br label %for.inc, !dbg !55 + +for.inc: ; preds = %for.body + %2 = load i32, i32* %i, align 4, !dbg !57 + %inc = add nsw i32 %2, 1, !dbg !57 + store i32 %inc, i32* %i, align 4, !dbg !57 + br label %for.cond, !dbg !58, !llvm.loop !59 + +for.end: ; preds = %for.cond + call void @_Z1fv(), !dbg !61 + call void @_Z1gv(), !dbg !62 + call void @_Z1fv(), !dbg !63 + %3 = load i32, i32* %retval, align 4, !dbg !64 + ret i32 %3, !dbg !64 +} + +attributes #0 = { nounwind uwtable mustprogress "disable-tail-calls"="false" "frame-pointer"="non-leaf" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="generic" "target-features"="+neon" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { nofree nosync nounwind readnone speculatable willreturn } +attributes #2 = { norecurse nounwind uwtable mustprogress "disable-tail-calls"="false" "frame-pointer"="non-leaf" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="generic" "target-features"="+neon" "unsafe-fp-math"="false" "use-soft-float"="false" } + +!llvm.dbg.cu = !{!2} +!llvm.module.flags = !{!10, !11, !12, !13, !14, !15, !16} +!llvm.ident = !{!17} + +!0 = !DIGlobalVariableExpression(var: !1, expr: !DIExpression()) +!1 = distinct !DIGlobalVariable(name: "arr", scope: !2, file: !3, line: 1, type: !6, isLocal: false, isDefinition: true) +!2 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !3, producer: "Huawei Bisheng Compiler clang version 12.0.0 (clang-6d7704116510 flang-6d7704116510)", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !4, globals: !5, splitDebugInlining: false, nameTableKind: None) +!3 = !DIFile(filename: "loop_small.cpp", directory: "/home/g84189222/boole3/llvm-project/tuneTest") +!4 = !{} +!5 = !{!0} +!6 = !DICompositeType(tag: DW_TAG_array_type, baseType: !7, size: 32000000, elements: !8) +!7 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) +!8 = !{!9} +!9 = !DISubrange(count: 1000000) +!10 = !{i32 7, !"Dwarf Version", i32 4} +!11 = !{i32 2, !"Debug Info Version", i32 3} +!12 = !{i32 1, !"wchar_size", i32 4} +!13 = !{i32 1, !"branch-target-enforcement", i32 0} +!14 = !{i32 1, !"sign-return-address", i32 0} +!15 = !{i32 1, !"sign-return-address-all", i32 0} +!16 = !{i32 1, !"sign-return-address-with-bkey", i32 0} +!17 = !{!"Huawei Bisheng Compiler clang version 12.0.0 (clang-6d7704116510 flang-6d7704116510)"} +!18 = distinct !DISubprogram(name: "f", linkageName: "_Z1fv", scope: !3, file: !3, line: 3, type: !19, scopeLine: 3, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !2, retainedNodes: !4) +!19 = !DISubroutineType(types: !20) +!20 = !{null} +!21 = !DILocalVariable(name: "i", scope: !22, file: !3, line: 4, type: !7) +!22 = distinct !DILexicalBlock(scope: !18, file: !3, line: 4, column: 2) +!23 = !DILocation(line: 4, column: 10, scope: !22) +!24 = !DILocation(line: 4, column: 6, scope: !22) +!25 = !DILocation(line: 4, column: 15, scope: !26) +!26 = distinct !DILexicalBlock(scope: !22, file: !3, line: 4, column: 2) +!27 = !DILocation(line: 4, column: 16, scope: !26) +!28 = !DILocation(line: 4, column: 2, scope: !22) +!29 = !DILocation(line: 5, column: 7, scope: !30) +!30 = distinct !DILexicalBlock(scope: !26, file: !3, line: 4, column: 27) +!31 = !DILocation(line: 5, column: 3, scope: !30) +!32 = !DILocation(line: 5, column: 10, scope: !30) +!33 = !DILocation(line: 6, column: 2, scope: !30) +!34 = !DILocation(line: 4, column: 24, scope: !26) +!35 = !DILocation(line: 4, column: 2, scope: !26) +!36 = distinct !{!36, !28, !37, !38} +!37 = !DILocation(line: 6, column: 2, scope: !22) +!38 = !{!"llvm.loop.mustprogress"} +!39 = !DILocation(line: 7, column: 1, scope: !18) +!40 = distinct !DISubprogram(name: "g", linkageName: "_Z1gv", scope: !3, file: !3, line: 8, type: !19, scopeLine: 8, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !2, retainedNodes: !4) +!41 = !DILocation(line: 9, column: 8, scope: !40) +!42 = !DILocation(line: 10, column: 1, scope: !40) +!43 = distinct !DISubprogram(name: "main", scope: !3, file: !3, line: 12, type: !44, scopeLine: 12, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !2, retainedNodes: !4) +!44 = !DISubroutineType(types: !45) +!45 = !{!7} +!46 = !DILocalVariable(name: "i", scope: !47, file: !3, line: 13, type: !7) +!47 = distinct !DILexicalBlock(scope: !43, file: !3, line: 13, column: 2) +!48 = !DILocation(line: 13, column: 10, scope: !47) +!49 = !DILocation(line: 13, column: 6, scope: !47) +!50 = !DILocation(line: 13, column: 15, scope: !51) +!51 = distinct !DILexicalBlock(scope: !47, file: !3, line: 13, column: 2) +!52 = !DILocation(line: 13, column: 16, scope: !51) +!53 = !DILocation(line: 13, column: 2, scope: !47) +!54 = !DILocation(line: 13, column: 35, scope: !51) +!55 = !DILocation(line: 13, column: 31, scope: !51) +!56 = !DILocation(line: 13, column: 38, scope: !51) +!57 = !DILocation(line: 13, column: 27, scope: !51) +!58 = !DILocation(line: 13, column: 2, scope: !51) +!59 = distinct !{!59, !53, !60, !38} +!60 = !DILocation(line: 13, column: 40, scope: !47) +!61 = !DILocation(line: 14, column: 2, scope: !43) +!62 = !DILocation(line: 15, column: 2, scope: !43) +!63 = !DILocation(line: 16, column: 2, scope: !43) +!64 = !DILocation(line: 17, column: 1, scope: !43) + +; META-CALL1: --- !AutoTuning +; META-CALL1: Pass: inline +; META-CALL1: Name: _Z1fv +; META-CALL1: DebugLoc: { File: loop_small.cpp, Line: 14, Column: 2 } +; META-CALL1-NEXT: Function: main +; META-CALL1-NEXT: CodeRegionType: callsite +; META-CALL1-NEXT: CodeRegionHash: {{[0-9]+}} +; META-CALL1-NEXT: DynamicConfigs: { ForceInline: [ 0, 1 ] } +; META-CALL1-NEXT: BaselineConfig: { ForceInline: '1' } +; META-CALL1-NEXT: Invocation: 0 +; META-CALL1-NEXT: ... +; META-CALL2: --- !AutoTuning +; META-CALL2: Pass: inline +; META-CALL2: Name: _Z1fv +; META-CALL2: DebugLoc: { File: loop_small.cpp, Line: 16, Column: 2 } +; META-CALL2-NEXT: Function: main +; META-CALL2-NEXT: CodeRegionType: callsite +; META-CALL2-NEXT: CodeRegionHash: {{[0-9]+}} +; META-CALL2-NEXT: DynamicConfigs: { ForceInline: [ 0, 1 ] } +; META-CALL2-NEXT: BaselineConfig: { ForceInline: '1' } +; META-CALL2-NEXT: Invocation: 0 +; META-CALL2-NEXT: ... +; META-CALL3: --- !AutoTuning +; META-CALL3: Pass: inline +; META-CALL3: Name: _Z1gv +; META-CALL3: DebugLoc: { File: loop_small.cpp, Line: 15, Column: 2 } +; META-CALL3-NEXT: Function: main +; META-CALL3-NEXT: CodeRegionType: callsite +; META-CALL3-NEXT: CodeRegionHash: {{[0-9]+}} +; META-CALL3-NEXT: DynamicConfigs: { ForceInline: [ 0, 1 ] } +; META-CALL3-NEXT: BaselineConfig: { ForceInline: '1' } +; META-CALL3-NEXT: Invocation: 0 +; META-CALL3-NEXT: ... + +; NO-META-CALL: --- !AutoTuning +; NO-META-CALL-NEXT: Pass: inline +; NO-META-CALL-NEXT: CodeRegionType: callsite +; NO-META-CALL-NEXT: CodeRegionHash: {{[0-9]+}} +; NO-META-CALL-NEXT: DynamicConfigs: { ForceInline: [ 0, 1 ] } +; NO-META-CALL-NEXT: BaselineConfig: { ForceInline: '1' } +; NO-META-CALL-NEXT: Invocation: 0 +; NO-META-CALL-NEXT: ... +; NO-META-CALL-NEXT: --- !AutoTuning +; NO-META-CALL-NEXT: Pass: inline +; NO-META-CALL-NEXT: CodeRegionType: callsite +; NO-META-CALL-NEXT: CodeRegionHash: {{[0-9]+}} +; NO-META-CALL-NEXT: DynamicConfigs: { ForceInline: [ 0, 1 ] } +; NO-META-CALL-NEXT: BaselineConfig: { ForceInline: '1' } +; NO-META-CALL-NEXT: Invocation: 0 +; NO-META-CALL-NEXT: ... +; NO-META-CALL-NEXT: --- !AutoTuning +; NO-META-CALL-NEXT: Pass: inline +; NO-META-CALL-NEXT: CodeRegionType: callsite +; NO-META-CALL-NEXT: CodeRegionHash: {{[0-9]+}} +; NO-META-CALL-NEXT: DynamicConfigs: { ForceInline: [ 0, 1 ] } +; NO-META-CALL-NEXT: BaselineConfig: { ForceInline: '1' } +; NO-META-CALL-NEXT: Invocation: 0 +; NO-META-CALL-NEXT: ... diff --git a/llvm/test/AutoTuning/MetaData/write_no_metadata.ll b/llvm/test/AutoTuning/MetaData/write_no_metadata.ll new file mode 100644 index 000000000000..344a3548a74f --- /dev/null +++ b/llvm/test/AutoTuning/MetaData/write_no_metadata.ll @@ -0,0 +1,191 @@ +; REQUIRES: x86-registered-target +; RUN: rm %t.default_opp -rf +; RUN: opt %s -S -auto-tuning-opp=%t.default_opp -auto-tuning-omit-metadata=1 \ +; RUN: -passes='require<opt-remark-emit>,loop(loop-unroll-full)' --disable-output +; RUN: FileCheck %s --input-file %t.default_opp/write_no_metadata.ll.yaml -check-prefix=DEFAULT + +; RUN: rm %t.module_opp -rf +; RUN: opt %s -S -passes='require<opt-remark-emit>,loop(loop-unroll-full)' \ +; RUN: -auto-tuning-opp=%t.module_opp -auto-tuning-type-filter=Other \ +; RUN: -auto-tuning-omit-metadata=1 --disable-output +; RUN: FileCheck %s --input-file %t.module_opp/write_no_metadata.ll.yaml -check-prefix=OTHER + +; RUN: rm %t.loop_opp -rf +; RUN: opt %s -S -passes='require<opt-remark-emit>,loop(loop-unroll-full)' \ +; RUN: -auto-tuning-opp=%t.loop_opp -auto-tuning-type-filter=Loop \ +; RUN: -auto-tuning-omit-metadata=1 --disable-output +; RUN: FileCheck %s --input-file %t.loop_opp/write_no_metadata.ll.yaml -check-prefix=LOOP + +; RUN: rm %t.function_opp -rf +; RUN: opt %s -S -passes='function(require<opt-remark-emit>,loop-unroll),cgscc(inline)' \ +; RUN: -auto-tuning-opp=%t.function_opp -auto-tuning-type-filter=CallSite \ +; RUN: -auto-tuning-omit-metadata=1 --disable-output +; RUN: FileCheck %s --input-file %t.function_opp/write_no_metadata.ll.yaml -check-prefix=CALLSITE + +; RUN: rm %t.function_loop_opp -rf +; RUN: opt %s -S -passes='function(require<opt-remark-emit>,loop-unroll),cgscc(inline)' \ +; RUN: -auto-tuning-opp=%t.function_loop_opp -auto-tuning-omit-metadata=1 \ +; RUN: -auto-tuning-type-filter=CallSite,Loop --disable-output +; RUN: FileCheck %s --input-file %t.function_loop_opp/write_no_metadata.ll.yaml -check-prefix=CALLSITE-LOOP1 +; RUN: FileCheck %s --input-file %t.function_loop_opp/write_no_metadata.ll.yaml -check-prefix=CALLSITE-LOOP2 + +; UNSUPPORTED: windows + +; ModuleID = 'loop-opp.c' +source_filename = "loop-opp.c" +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +; Function Attrs: noinline nounwind uwtable +define i32 @test(i32* %n) #0 !dbg !6 { +entry: + call void @callee(i32 6), !dbg !18 + %n.addr = alloca i32*, align 8 + %b = alloca i32, align 4 + %i = alloca i32, align 4 + store i32* %n, i32** %n.addr, align 8 + call void @llvm.dbg.declare(metadata i32** %n.addr, metadata !11, metadata !12), !dbg !13 + call void @llvm.dbg.declare(metadata i32* %b, metadata !14, metadata !12), !dbg !15 + store i32 0, i32* %b, align 4, !dbg !15 + call void @llvm.dbg.declare(metadata i32* %i, metadata !16, metadata !12), !dbg !18 + store i32 0, i32* %i, align 4, !dbg !18 + br label %for.cond, !dbg !19 + +for.cond: ; preds = %for.inc, %entry + %0 = load i32, i32* %i, align 4, !dbg !20 + %1 = load i32*, i32** %n.addr, align 8, !dbg !23 + %2 = load i32, i32* %1, align 4, !dbg !24 + %cmp = icmp slt i32 %0, %2, !dbg !25 + br i1 %cmp, label %for.body, label %for.end, !dbg !26 + +for.body: ; preds = %for.cond + %3 = load i32, i32* %b, align 4, !dbg !28 + %add = add nsw i32 %3, 1, !dbg !30 + store i32 %add, i32* %b, align 4, !dbg !31 + br label %for.inc, !dbg !32 + +for.inc: ; preds = %for.body + %4 = load i32, i32* %i, align 4, !dbg !33 + %inc = add nsw i32 %4, 1, !dbg !33 + store i32 %inc, i32* %i, align 4, !dbg !33 + br label %for.cond, !dbg !35, !llvm.loop !36 + +for.end: ; preds = %for.cond + %5 = load i32, i32* %b, align 4, !dbg !39 + ret i32 %5, !dbg !40 +} + +@a = global i32 4 +define void @callee(i32 %a) #2 { +entry: + %a1 = load volatile i32, i32* @a + %x1 = add i32 %a1, %a1 + %add = add i32 %x1, %a + ret void +} + +; Function Attrs: nounwind readnone +declare void @llvm.dbg.declare(metadata, metadata, metadata) #1 + +attributes #0 = { noinline nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { nounwind readnone } +attributes #0 = { nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!3, !4} +!llvm.ident = !{!5} + +!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "" ,isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !2) +!1 = !DIFile(filename: "loop-opp.c", directory: "") +!2 = !{} +!3 = !{i32 2, !"Dwarf Version", i32 4} +!4 = !{i32 2, !"Debug Info Version", i32 3} +!5 = !{!""} +!6 = distinct !DISubprogram(name: "test", scope: !1, file: !1, line: 1, type: !7, isLocal: false, isDefinition: true, scopeLine: 2, flags: DIFlagPrototyped, isOptimized: false, unit: !0) +!7 = !DISubroutineType(types: !8) +!8 = !{!9, !10} +!9 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) +!10 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !9, size: 64) +!11 = !DILocalVariable(name: "n", arg: 1, scope: !6, file: !1, line: 1, type: !10) +!12 = !DIExpression() +!13 = !DILocation(line: 1, column: 20, scope: !6) +!14 = !DILocalVariable(name: "b", scope: !6, file: !1, line: 3, type: !9) +!15 = !DILocation(line: 3, column: 9, scope: !6) +!16 = !DILocalVariable(name: "i", scope: !17, file: !1, line: 4, type: !9) +!17 = distinct !DILexicalBlock(scope: !6, file: !1, line: 4, column: 5) +!18 = !DILocation(line: 4, column: 14, scope: !17) +!19 = !DILocation(line: 4, column: 10, scope: !17) +!20 = !DILocation(line: 4, column: 20, scope: !21) +!21 = !DILexicalBlockFile(scope: !22, file: !1, discriminator: 1) +!22 = distinct !DILexicalBlock(scope: !17, file: !1, line: 4, column: 5) +!23 = !DILocation(line: 4, column: 25, scope: !21) +!24 = !DILocation(line: 4, column: 24, scope: !21) +!25 = !DILocation(line: 4, column: 22, scope: !21) +!26 = !DILocation(line: 4, column: 5, scope: !27) +!27 = !DILexicalBlockFile(scope: !17, file: !1, discriminator: 1) +!28 = !DILocation(line: 6, column: 11, scope: !29) +!29 = distinct !DILexicalBlock(scope: !22, file: !1, line: 5, column: 5) +!30 = !DILocation(line: 6, column: 12, scope: !29) +!31 = !DILocation(line: 6, column: 9, scope: !29) +!32 = !DILocation(line: 7, column: 5, scope: !29) +!33 = !DILocation(line: 4, column: 28, scope: !34) +!34 = !DILexicalBlockFile(scope: !22, file: !1, discriminator: 2) +!35 = !DILocation(line: 4, column: 5, scope: !34) +!36 = distinct !{!36, !37, !38} +!37 = !DILocation(line: 4, column: 5, scope: !17) +!38 = !DILocation(line: 7, column: 5, scope: !17) +!39 = !DILocation(line: 8, column: 12, scope: !6) +!40 = !DILocation(line: 8, column: 5, scope: !6) + +; DEFAULT: --- !AutoTuning +; DEFAULT-NEXT: Pass: loop-unroll +; DEFAULT-NEXT: CodeRegionType: loop +; DEFAULT-NEXT: CodeRegionHash: {{[0-9]+}} +; COM: Clang generate dynamic values for UnrollCount so we use regex +; DEFAULT-NEXT: DynamicConfigs: { UnrollCount: [ {{[0-9]+(, [0-9]+)*}} ] } +; DEFAULT-NEXT: BaselineConfig: { UnrollCount: '{{[0-9]+}}' } +; DEFAULT-NEXT: Invocation: 0 +; DEFAULT-NEXT: ... +; DEFAULT-NEXT: --- !AutoTuning +; DEFAULT-NEXT: Pass: all +; DEFAULT-NEXT: CodeRegionType: other +; COM: Module level hashes can differ based on the filepath so we check a regex +; DEFAULT-NEXT: CodeRegionHash: {{[0-9]+}} +; DEFAULT-NEXT: DynamicConfigs: { } +; DEFAULT-NEXT: BaselineConfig: { } +; DEFAULT-NEXT: Invocation: 0 +; DEFAULT-NEXT: ... + +; LOOP: --- !AutoTuning +; LOOP-NEXT: Pass: loop-unroll +; LOOP-NEXT: CodeRegionType: loop +; LOOP-NEXT: CodeRegionHash: {{[0-9]+}} +; COM: Clang generate dynamic values for UnrollCount so we use regex +; LOOP-NEXT: DynamicConfigs: { UnrollCount: [ {{[0-9]+(, [0-9]+)*}} ] } +; LOOP-NEXT: BaselineConfig: { UnrollCount: '{{[0-9]+}}' } +; LOOP-NEXT: Invocation: 0 +; LOOP-NEXT: ... + +; CALLSITE: --- !AutoTuning +; CALLSITE-NEXT: Pass: inline +; CALLSITE-NEXT: CodeRegionType: callsite +; CALLSITE-NEXT: CodeRegionHash: {{[0-9]+}} +; CALLSITE-NEXT: DynamicConfigs: { ForceInline: [ 0, 1 ] } +; CALLSITE-NEXT: BaselineConfig: { ForceInline: '1' } +; CALLSITE-NEXT: Invocation: 0 +; CALLSITE-NEXT: ... + +; CALLSITE-LOOP1: CodeRegionType: loop +; CALLSITE-LOOP1-NOT: CodeRegionType: other +; CALLSITE-LOOP2: CodeRegionType: callsite +; CALLSITE-LOOP2-NOT: CodeRegionType: other + +; OTHER: --- !AutoTuning +; OTHER-NEXT: Pass: all +; OTHER-NEXT: CodeRegionType: other +; COM: Module level hashes can differ based on the filepath so we check a regex +; OTHER-NEXT: CodeRegionHash: {{[0-9]+}} +; OTHER-NEXT: DynamicConfigs: { } +; OTHER-NEXT: BaselineConfig: { } +; OTHER-NEXT: Invocation: 0 +; OTHER-NEXT: ... diff --git a/llvm/test/AutoTuning/MetaData/write_with_metadata.ll b/llvm/test/AutoTuning/MetaData/write_with_metadata.ll new file mode 100644 index 000000000000..8b7ee9dcce37 --- /dev/null +++ b/llvm/test/AutoTuning/MetaData/write_with_metadata.ll @@ -0,0 +1,204 @@ +; REQUIRES: x86-registered-target +; RUN: rm %t.default_opp -rf +; RUN: opt %s -S -auto-tuning-opp=%t.default_opp -auto-tuning-omit-metadata=0 \ +; RUN: -passes='require<opt-remark-emit>,loop(loop-unroll-full)' --disable-output +; RUN: FileCheck %s --input-file %t.default_opp/write_with_metadata.ll.yaml -check-prefix=DEFAULT + +; RUN: rm %t.module_opp -rf +; RUN: opt %s -S -passes='require<opt-remark-emit>,loop(loop-unroll-full)' \ +; RUN: -auto-tuning-opp=%t.module_opp -auto-tuning-type-filter=Other \ +; RUN: -auto-tuning-omit-metadata=0 --disable-output +; RUN: FileCheck %s --input-file %t.module_opp/write_with_metadata.ll.yaml -check-prefix=OTHER + +; RUN: rm %t.loop_opp -rf +; RUN: opt %s -S -passes='require<opt-remark-emit>,loop(loop-unroll-full)' \ +; RUN: -auto-tuning-opp=%t.loop_opp -auto-tuning-type-filter=Loop \ +; RUN: -auto-tuning-omit-metadata=0 --disable-output +; RUN: FileCheck %s --input-file %t.loop_opp/write_with_metadata.ll.yaml -check-prefix=LOOP + +; RUN: rm %t.function_opp -rf +; RUN: opt %s -S -passes='function(require<opt-remark-emit>,loop-unroll),cgscc(inline)' \ +; RUN: -auto-tuning-opp=%t.function_opp -auto-tuning-type-filter=CallSite \ +; RUN: -auto-tuning-omit-metadata=0 --disable-output +; RUN: FileCheck %s --input-file %t.function_opp/write_with_metadata.ll.yaml -check-prefix=CALLSITE + +; RUN: rm %t.function_loop_opp -rf +; RUN: opt %s -S -passes='function(require<opt-remark-emit>,loop-unroll),cgscc(inline)' \ +; RUN: -auto-tuning-opp=%t.function_loop_opp -auto-tuning-type-filter=CallSite,Loop \ +; RUN: -auto-tuning-omit-metadata=0 --disable-output +; RUN: FileCheck %s --input-file %t.function_loop_opp/write_with_metadata.ll.yaml -check-prefix=CALLSITE-LOOP1 +; RUN: FileCheck %s --input-file %t.function_loop_opp/write_with_metadata.ll.yaml -check-prefix=CALLSITE-LOOP2 + +; UNSUPPORTED: windows + +; ModuleID = 'loop-opp.c' +source_filename = "loop-opp.c" +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +; Function Attrs: noinline nounwind uwtable +define i32 @test(i32* %n) #0 !dbg !6 { +entry: + call void @callee(i32 6), !dbg !18 + %n.addr = alloca i32*, align 8 + %b = alloca i32, align 4 + %i = alloca i32, align 4 + store i32* %n, i32** %n.addr, align 8 + call void @llvm.dbg.declare(metadata i32** %n.addr, metadata !11, metadata !12), !dbg !13 + call void @llvm.dbg.declare(metadata i32* %b, metadata !14, metadata !12), !dbg !15 + store i32 0, i32* %b, align 4, !dbg !15 + call void @llvm.dbg.declare(metadata i32* %i, metadata !16, metadata !12), !dbg !18 + store i32 0, i32* %i, align 4, !dbg !18 + br label %for.cond, !dbg !19 + +for.cond: ; preds = %for.inc, %entry + %0 = load i32, i32* %i, align 4, !dbg !20 + %1 = load i32*, i32** %n.addr, align 8, !dbg !23 + %2 = load i32, i32* %1, align 4, !dbg !24 + %cmp = icmp slt i32 %0, %2, !dbg !25 + br i1 %cmp, label %for.body, label %for.end, !dbg !26 + +for.body: ; preds = %for.cond + %3 = load i32, i32* %b, align 4, !dbg !28 + %add = add nsw i32 %3, 1, !dbg !30 + store i32 %add, i32* %b, align 4, !dbg !31 + br label %for.inc, !dbg !32 + +for.inc: ; preds = %for.body + %4 = load i32, i32* %i, align 4, !dbg !33 + %inc = add nsw i32 %4, 1, !dbg !33 + store i32 %inc, i32* %i, align 4, !dbg !33 + br label %for.cond, !dbg !35, !llvm.loop !36 + +for.end: ; preds = %for.cond + %5 = load i32, i32* %b, align 4, !dbg !39 + ret i32 %5, !dbg !40 +} + +@a = global i32 4 +define void @callee(i32 %a) #2 { +entry: + %a1 = load volatile i32, i32* @a + %x1 = add i32 %a1, %a1 + %add = add i32 %x1, %a + ret void +} + +; Function Attrs: nounwind readnone +declare void @llvm.dbg.declare(metadata, metadata, metadata) #1 + +attributes #0 = { noinline nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { nounwind readnone } +attributes #2 = { nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!3, !4} +!llvm.ident = !{!5} + +!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "" ,isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !2) +!1 = !DIFile(filename: "loop-opp.c", directory: "") +!2 = !{} +!3 = !{i32 2, !"Dwarf Version", i32 4} +!4 = !{i32 2, !"Debug Info Version", i32 3} +!5 = !{!""} +!6 = distinct !DISubprogram(name: "test", scope: !1, file: !1, line: 1, type: !7, isLocal: false, isDefinition: true, scopeLine: 2, flags: DIFlagPrototyped, isOptimized: false, unit: !0) +!7 = !DISubroutineType(types: !8) +!8 = !{!9, !10} +!9 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) +!10 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !9, size: 64) +!11 = !DILocalVariable(name: "n", arg: 1, scope: !6, file: !1, line: 1, type: !10) +!12 = !DIExpression() +!13 = !DILocation(line: 1, column: 20, scope: !6) +!14 = !DILocalVariable(name: "b", scope: !6, file: !1, line: 3, type: !9) +!15 = !DILocation(line: 3, column: 9, scope: !6) +!16 = !DILocalVariable(name: "i", scope: !17, file: !1, line: 4, type: !9) +!17 = distinct !DILexicalBlock(scope: !6, file: !1, line: 4, column: 5) +!18 = !DILocation(line: 4, column: 14, scope: !17) +!19 = !DILocation(line: 4, column: 10, scope: !17) +!20 = !DILocation(line: 4, column: 20, scope: !21) +!21 = !DILexicalBlockFile(scope: !22, file: !1, discriminator: 1) +!22 = distinct !DILexicalBlock(scope: !17, file: !1, line: 4, column: 5) +!23 = !DILocation(line: 4, column: 25, scope: !21) +!24 = !DILocation(line: 4, column: 24, scope: !21) +!25 = !DILocation(line: 4, column: 22, scope: !21) +!26 = !DILocation(line: 4, column: 5, scope: !27) +!27 = !DILexicalBlockFile(scope: !17, file: !1, discriminator: 1) +!28 = !DILocation(line: 6, column: 11, scope: !29) +!29 = distinct !DILexicalBlock(scope: !22, file: !1, line: 5, column: 5) +!30 = !DILocation(line: 6, column: 12, scope: !29) +!31 = !DILocation(line: 6, column: 9, scope: !29) +!32 = !DILocation(line: 7, column: 5, scope: !29) +!33 = !DILocation(line: 4, column: 28, scope: !34) +!34 = !DILexicalBlockFile(scope: !22, file: !1, discriminator: 2) +!35 = !DILocation(line: 4, column: 5, scope: !34) +!36 = distinct !{!36, !37, !38} +!37 = !DILocation(line: 4, column: 5, scope: !17) +!38 = !DILocation(line: 7, column: 5, scope: !17) +!39 = !DILocation(line: 8, column: 12, scope: !6) +!40 = !DILocation(line: 8, column: 5, scope: !6) + +; DEFAULT: --- !AutoTuning +; DEFAULT-NEXT: Pass: loop-unroll +; DEFAULT-NEXT: Name: for.cond +; DEFAULT-NEXT: DebugLoc: { File: loop-opp.c, Line: 4, Column: 5 } +; DEFAULT-NEXT: Function: test +; DEFAULT-NEXT: CodeRegionType: loop +; DEFAULT-NEXT: CodeRegionHash: {{[0-9]+}} +; DEFAULT-NEXT: DynamicConfigs: { UnrollCount: [ {{[0-9]+(, [0-9]+)*}} ] } +; DEFAULT-NEXT: BaselineConfig: { UnrollCount: '{{[0-9]+}}' } +; DEFAULT-NEXT: Invocation: 0 +; DEFAULT-NEXT: ... +; DEFAULT-NEXT: --- !AutoTuning +; DEFAULT-NEXT: Pass: all +; DEFAULT-NEXT: Name: +; DEFAULT-SAME: write_with_metadata.ll +; DEFAULT-NEXT: Function: none +; DEFAULT-NEXT: CodeRegionType: other +; COM: Module level hashes can differ based on the filepath so we check a regex +; DEFAULT-NEXT: CodeRegionHash: {{[0-9]+}} +; DEFAULT-NEXT: DynamicConfigs: { } +; DEFAULT-NEXT: BaselineConfig: { } +; DEFAULT-NEXT: Invocation: 0 +; DEFAULT-NEXT: ... + +; LOOP: --- !AutoTuning +; LOOP-NEXT: Pass: loop-unroll +; LOOP-NEXT: Name: for.cond +; LOOP-NEXT: DebugLoc: { File: loop-opp.c, Line: 4, Column: 5 } +; LOOP-NEXT: Function: test +; LOOP-NEXT: CodeRegionType: loop +; LOOP-NEXT: CodeRegionHash: {{[0-9]+}} +; LOOP-NEXT: DynamicConfigs: { UnrollCount: [ {{[0-9]+(, [0-9]+)*}} ] } +; LOOP-NEXT: BaselineConfig: { UnrollCount: '{{[0-9]+}}' } +; LOOP-NEXT: Invocation: 0 +; LOOP-NEXT: ... + +; CALLSITE: --- !AutoTuning +; CALLSITE-NEXT: Pass: inline +; CALLSITE-NEXT: Name: callee +; CALLSITE-NEXT: DebugLoc: { File: loop-opp.c, Line: 4, Column: 14 } +; CALLSITE-NEXT: Function: test +; CALLSITE-NEXT: CodeRegionType: callsite +; CALLSITE-NEXT: CodeRegionHash: {{[0-9]+}} +; CALLSITE-NEXT: DynamicConfigs: { ForceInline: [ 0, 1 ] } +; CALLSITE-NEXT: BaselineConfig: { ForceInline: '1' } +; CALLSITE-NEXT: Invocation: 0 +; CALLSITE-NEXT: ... + +; CALLSITE-LOOP1: CodeRegionType: loop +; CALLSITE-LOOP1-NOT: CodeRegionType: other +; CALLSITE-LOOP2: CodeRegionType: callsite +; CALLSITE-LOOP2-NOT: CodeRegionType: other + +; OTHER: --- !AutoTuning +; OTHER-NEXT: Pass: all +; OTHER-NEXT: Name: +; OTHER-SAME: write_with_metadata +; OTHER-NEXT: Function: none +; OTHER-NEXT: CodeRegionType: other +; COM: Module level hashes can differ based on the filepath so we check a regex +; OTHER-NEXT: CodeRegionHash: {{[0-9]+}} +; OTHER-NEXT: DynamicConfigs: { } +; OTHER-NEXT: BaselineConfig: { } +; OTHER-NEXT: Invocation: 0 +; OTHER-NEXT: ... diff --git a/llvm/test/AutoTuning/PGO/Inputs/pgo-instr.proftext b/llvm/test/AutoTuning/PGO/Inputs/pgo-instr.proftext new file mode 100644 index 000000000000..6ed79897d78c --- /dev/null +++ b/llvm/test/AutoTuning/PGO/Inputs/pgo-instr.proftext @@ -0,0 +1,17 @@ +# IR level Instrumentation Flag +:ir +hot +# Func Hash: +12884901887 +# Num Counters: +1 +# Counter Values: +9000 + +cold +# Func Hash: +12884901887 +# Num Counters: +1 +# Counter Values: +10 diff --git a/llvm/test/AutoTuning/PGO/Inputs/pgo-sample-cold.prof b/llvm/test/AutoTuning/PGO/Inputs/pgo-sample-cold.prof new file mode 100644 index 000000000000..a1cb2231992e --- /dev/null +++ b/llvm/test/AutoTuning/PGO/Inputs/pgo-sample-cold.prof @@ -0,0 +1,7 @@ +main:225715:0 + 2.1: 5553 + 3: 5391 + 3.1: _Z3sumii:0 + 0: 0 + 1: 0 + 2: 0 diff --git a/llvm/test/AutoTuning/PGO/Inputs/pgo-sample-hot.prof b/llvm/test/AutoTuning/PGO/Inputs/pgo-sample-hot.prof new file mode 100644 index 000000000000..386cdf8a7b5e --- /dev/null +++ b/llvm/test/AutoTuning/PGO/Inputs/pgo-sample-hot.prof @@ -0,0 +1,7 @@ +main:225715:0 + 2.1: 5553 + 3: 5391 + 3.1: _Z3sumii:5860 + 0: 5279 + 1: 5279 + 2: 5279 diff --git a/llvm/test/AutoTuning/PGO/pgo-instr-filters.ll b/llvm/test/AutoTuning/PGO/pgo-instr-filters.ll new file mode 100644 index 000000000000..6b279df18343 --- /dev/null +++ b/llvm/test/AutoTuning/PGO/pgo-instr-filters.ll @@ -0,0 +1,61 @@ +; RUN: rm %t.default-opp -rf +; RUN: llvm-profdata merge %S/Inputs/pgo-instr.proftext -o %t.profdata +; RUN: opt %s -passes='pgo-instr-use,inline' -pgo-test-profile-file=%t.profdata -S -auto-tuning-opp=%t.default-opp -auto-tuning-exclude-cold=false --disable-output +; RUN: FileCheck %s --input-file %t.default-opp/pgo-instr-filters.ll.yaml -check-prefix=NON-FILTER + +; RUN: rm %t.filtered-opp -rf +; RUN: llvm-profdata merge %S/Inputs/pgo-instr.proftext -o %t.profdata +; RUN: opt %s -passes='pgo-instr-use,inline' -pgo-test-profile-file=%t.profdata -S -auto-tuning-opp=%t.filtered-opp -auto-tuning-exclude-cold --disable-output -pgo-instr-old-cfg-hashing=true +; RUN: FileCheck %s --input-file %t.filtered-opp/pgo-instr-filters.ll.yaml -check-prefix=EXCLUDE-COLD + +; RUN: rm %t.filtered-opp -rf +; RUN: llvm-profdata merge %S/Inputs/pgo-instr.proftext -o %t.profdata +; RUN: opt %s -passes='pgo-instr-use,inline' -pgo-test-profile-file=%t.profdata -S -auto-tuning-opp=%t.filtered-opp -auto-tuning-hot-only --disable-output -pgo-instr-old-cfg-hashing=true +; RUN: FileCheck %s --input-file %t.filtered-opp/pgo-instr-filters.ll.yaml -check-prefix=HOT-ONLY + +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +@s = common dso_local local_unnamed_addr global i32 0, align 4 + +define void @cold() { + +entry: + %0 = tail call i32 @callee(i32 5) + store i32 1, i32* @s, align 4 + ret void +} + +define void @hot() { +entry: + %0 = load i32, i32* @s, align 4 + %1 = tail call i32 @callee(i32 5) + %add = add nsw i32 %0, 4 + store i32 %add, i32* @s, align 4 + ret void +} + +define void @unknown() { +entry: + %0 = tail call i32 @callee(i32 5) + store i32 1, i32* @s, align 4 + ret void +} + +define i32 @callee(i32 %a) { +entry: + %add = add nsw i32 %a, 4 + ret i32 %add +} + +; NON-FILTER-DAG: Function: cold +; NON-FILTER-DAG: Function: hot +; NON-FILTER-DAG: Function: unknown + +; EXCLUDE-COLD-NOT: Function: cold +; EXCLUDE-COLD-DAG: Function: hot +; EXCLUDE-COLD-DAG: Function: unknown + +; HOT-ONLY-NOT: Function: unknown +; HOT-ONLY-NOT: Function: cold +; HOT-ONLY-DAG: Function: hot diff --git a/llvm/test/AutoTuning/PGO/pgo-sample-filters.ll b/llvm/test/AutoTuning/PGO/pgo-sample-filters.ll new file mode 100644 index 000000000000..aa93299a7079 --- /dev/null +++ b/llvm/test/AutoTuning/PGO/pgo-sample-filters.ll @@ -0,0 +1,138 @@ +; RUN: rm %t.default-opp -rf +; RUN: opt %s -passes='sample-profile,inline' -sample-profile-file=%S/Inputs/pgo-sample-cold.prof -auto-tuning-opp=%t.default-opp -auto-tuning-exclude-cold=false --disable-output -S +; RUN: FileCheck %s -check-prefix=NON-FILTER < %t.default-opp/pgo-sample-filters.ll.yaml + +; Test -auto-tuning-exclude-cold with a cold caller in sample profile. +; RUN: rm %t.filtered-opp -rf +; RUN: opt %s -passes='sample-profile,inline' -sample-profile-file=%S/Inputs/pgo-sample-cold.prof -auto-tuning-opp=%t.filtered-opp -auto-tuning-exclude-cold --disable-output -S +; RUN: FileCheck %s -check-prefix=COLD-PROFILE-EXCLUDE-COLD < %t.filtered-opp/pgo-sample-filters.ll.yaml + +; Test -auto-tuning-hot-only with a cold caller in sample profile. +; RUN: rm %t.filtered-opp -rf +; RUN: opt %s -passes='sample-profile,inline' -sample-profile-file=%S/Inputs/pgo-sample-cold.prof -auto-tuning-opp=%t.filtered-opp -auto-tuning-hot-only --disable-output -S +; RUN: FileCheck %s -check-prefix=COLD-PROFILE-HOT-ONLY < %t.filtered-opp/pgo-sample-filters.ll.yaml + +; Test -auto-tuning-exclude-cold with a hot caller in sample profile. +; RUN: rm %t.filtered-opp -rf +; RUN: opt %s -passes='sample-profile,inline' -sample-profile-file=%S/Inputs/pgo-sample-hot.prof -auto-tuning-opp=%t.filtered-opp -auto-tuning-exclude-cold --disable-output -S +; RUN: FileCheck %s -check-prefix=HOT-PROFILE-EXCLUDE-COLD < %t.filtered-opp/pgo-sample-filters.ll.yaml + +; Test -auto-tuning-hot-only with a hot caller in sample profile. +; RUN: rm %t.filtered-opp -rf +; RUN: opt %s -passes='sample-profile,inline' -sample-profile-file=%S/Inputs/pgo-sample-hot.prof -auto-tuning-opp=%t.filtered-opp -auto-tuning-hot-only --disable-output -S +; RUN: FileCheck %s -check-prefix=HOT-PROFILE-HOT-ONLY < %t.filtered-opp/pgo-sample-filters.ll.yaml + + +@.str = private unnamed_addr constant [11 x i8] c"sum is %d\0A\00", align 1 + +define i32 @_Z3sumii(i32 %x, i32 %y) #0 !dbg !6 { +entry: + %0 = tail call i32 @callee(i32 5) + %x.addr = alloca i32, align 4 + %y.addr = alloca i32, align 4 + store i32 %x, i32* %x.addr, align 4 + store i32 %y, i32* %y.addr, align 4 + %tmp = load i32, i32* %x.addr, align 4, !dbg !8 + %tmp1 = load i32, i32* %y.addr, align 4, !dbg !8 + %add = add nsw i32 %tmp, %tmp1, !dbg !8 + ret i32 %add, !dbg !8 +} + +define i32 @main() #0 !dbg !9 { +entry: + %0 = tail call i32 @callee(i32 5) + %retval = alloca i32, align 4 + %s = alloca i32, align 4 + %i = alloca i32, align 4 + store i32 0, i32* %retval + store i32 0, i32* %i, align 4, !dbg !10 + br label %while.cond, !dbg !11 + +while.cond: ; preds = %if.end, %entry + %tmp = load i32, i32* %i, align 4, !dbg !12 + %inc = add nsw i32 %tmp, 1, !dbg !12 + store i32 %inc, i32* %i, align 4, !dbg !12 + %cmp = icmp slt i32 %tmp, 400000000, !dbg !12 + br i1 %cmp, label %while.body, label %while.end, !dbg !12 + +while.body: ; preds = %while.cond + %tmp1 = load i32, i32* %i, align 4, !dbg !14 + %cmp1 = icmp ne i32 %tmp1, 100, !dbg !14 + br i1 %cmp1, label %if.then, label %if.else, !dbg !14 + +if.then: ; preds = %while.body + %tmp2 = load i32, i32* %i, align 4, !dbg !16 + %tmp3 = load i32, i32* %s, align 4, !dbg !16 + %call = call i32 @_Z3sumii(i32 %tmp2, i32 %tmp3), !dbg !16 +; INLINE-NOT: call i32 @_Z3sumii +; NOTINLINE: call i32 @_Z3sumii + store i32 %call, i32* %s, align 4, !dbg !16 + br label %if.end, !dbg !16 + +if.else: ; preds = %while.body + store i32 30, i32* %s, align 4, !dbg !18 + br label %if.end + +if.end: ; preds = %if.else, %if.then + br label %while.cond, !dbg !20 + +while.end: ; preds = %while.cond + %tmp4 = load i32, i32* %s, align 4, !dbg !22 + %call2 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i32 0, i32 0), i32 %tmp4), !dbg !22 + ret i32 0, !dbg !23 +} + +define i32 @callee(i32 %a) #0 { +entry: + %add = add nsw i32 %a, 4 + ret i32 %add +} + +declare i32 @printf(i8*, ...) + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!3, !4} +!llvm.ident = !{!5} + +attributes #0 = {"use-sample-profile"} + +!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, producer: "clang version 3.5 ", isOptimized: false, runtimeVersion: 0, emissionKind: NoDebug, enums: !2, retainedTypes: !2, globals: !2, imports: !2) +!1 = !DIFile(filename: "calls.cc", directory: ".") +!2 = !{} +!3 = !{i32 2, !"Dwarf Version", i32 4} +!4 = !{i32 1, !"Debug Info Version", i32 3} +!5 = !{!"clang version 3.5 "} +!6 = distinct !DISubprogram(name: "sum", scope: !1, file: !1, line: 3, type: !7, scopeLine: 3, virtualIndex: 6, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !2) +!7 = !DISubroutineType(types: !2) +!8 = !DILocation(line: 4, scope: !6) +!9 = distinct !DISubprogram(name: "main", scope: !1, file: !1, line: 7, type: !7, scopeLine: 7, virtualIndex: 6, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !2) +!10 = !DILocation(line: 8, scope: !9) +!11 = !DILocation(line: 9, scope: !9) +!12 = !DILocation(line: 9, scope: !13) +!13 = !DILexicalBlockFile(scope: !9, file: !1, discriminator: 2) +!14 = !DILocation(line: 10, scope: !15) +!15 = distinct !DILexicalBlock(scope: !9, file: !1, line: 10) +!16 = !DILocation(line: 10, scope: !17) +!17 = !DILexicalBlockFile(scope: !15, file: !1, discriminator: 2) +!18 = !DILocation(line: 10, scope: !19) +!19 = !DILexicalBlockFile(scope: !15, file: !1, discriminator: 4) +!20 = !DILocation(line: 10, scope: !21) +!21 = !DILexicalBlockFile(scope: !15, file: !1, discriminator: 6) +!22 = !DILocation(line: 11, scope: !9) +!23 = !DILocation(line: 12, scope: !9) + +; Note that hotness of main is unknown. +; NON-FILTER-DAG: Function: _Z3sumii +; NON-FILTER-DAG: Function: main + +; COLD-PROFILE-EXCLUDE-COLD-NOT: Function: _Z3sumii +; COLD-PROFILE-EXCLUDE-COLD-DAG: Function: main + +; COLD-PROFILE-HOT-ONLY-NOT: Function: _Z3sumii +; COLD-PROFILE-HOT-ONLY-NOT: Function: main + +; HOT-PROFILE-EXCLUDE-COLD-DAG: Function: _Z3sumii +; HOT-PROFILE-EXCLUDE-COLD-DAG: Function: main + +; HOT-PROFILE-HOT-ONLY-NOT: Function: main +; HOT-PROFILE-HOT-ONLY-DAG: Function: _Z3sumii diff --git a/llvm/test/AutoTuning/PassInvocation/Inputs/pass_invocation.yaml b/llvm/test/AutoTuning/PassInvocation/Inputs/pass_invocation.yaml new file mode 100644 index 000000000000..00459fe9e23c --- /dev/null +++ b/llvm/test/AutoTuning/PassInvocation/Inputs/pass_invocation.yaml @@ -0,0 +1,10 @@ +--- !AutoTuning +Pass: loop-unroll +Name: for.body +Function: find +CodeRegionType: loop +CodeRegionHash: 145363925920731080 +Invocation: [number] +Args: + - UnrollCount: 2 +... diff --git a/llvm/test/AutoTuning/PassInvocation/pass_invocation_read.ll b/llvm/test/AutoTuning/PassInvocation/pass_invocation_read.ll new file mode 100644 index 000000000000..6e41507af8b8 --- /dev/null +++ b/llvm/test/AutoTuning/PassInvocation/pass_invocation_read.ll @@ -0,0 +1,64 @@ +; RUN: rm %t.config.yaml -rf +; RUN: sed 's#\[number\]#0#g;' %S/Inputs/pass_invocation.yaml > %t.config.yaml +; RUN: opt %s -S -O3 -print-after=loop-unroll-full -print-after=loop-unroll \ +; RUN: -auto-tuning-code-region-matching-hash=false \ +; RUN: -auto-tuning-input=%t.config.yaml --disable-output 2>&1 | \ +; RUN: FileCheck %s --check-prefix=INVOCATION-0 + +; RUN: rm %t.config.yaml -rf +; RUN: sed 's#\[number\]#1#g;' %S/Inputs/pass_invocation.yaml > %t.config.yaml +; RUN: opt %s -S -O3 -print-after=loop-unroll-full -print-after=loop-unroll \ +; RUN: -auto-tuning-code-region-matching-hash=false \ +; RUN: -auto-tuning-input=%t.config.yaml --disable-output 2>&1 | \ +; RUN: FileCheck %s --check-prefix=INVOCATION-1 + +; Function Attrs: norecurse nounwind readonly uwtable +define dso_local i64 @find(i64* nocapture readonly %a, i64 %n, i64 %Value) { +entry: + %cmp6.not = icmp eq i64 %n, 0 + br i1 %cmp6.not, label %for.end, label %for.body + +for.body: ; preds = %entry, %for.inc + %i.07 = phi i64 [ %inc, %for.inc ], [ 0, %entry ] + %arrayidx = getelementptr inbounds i64, i64* %a, i64 %i.07 + %0 = load i64, i64* %arrayidx, align 8 + %cmp1 = icmp eq i64 %0, %Value + br i1 %cmp1, label %for.end, label %for.inc + +for.inc: ; preds = %for.body + %inc = add nuw i64 %i.07, 1 + %cmp = icmp ult i64 %inc, %n + br i1 %cmp, label %for.body, label %for.end + +for.end: ; preds = %for.inc, %for.body, %entry + %i.0.lcssa = phi i64 [ 0, %entry ], [ %i.07, %for.body ], [ %inc, %for.inc ] + ret i64 %i.0.lcssa +} + +; INVOCATION-0: *** IR Dump After {{.*}}Unroll +; INVOCATION-0: for.body.preheader: ; preds = %entry +; INVOCATION-0: for.body: ; preds = %for.inc.1, %for.body.preheader +; INVOCATION-0: for.inc: ; preds = %for.body +; INVOCATION-0: for.body.1: ; preds = %for.inc +; INVOCATION-0: for.inc.1: ; preds = %for.body.1 +; INVOCATION-0: for.end.loopexit: ; preds = %for.inc.1, %for.body.1, %for.body, %for.inc +; INVOCATION-0: *** IR Dump After {{.*}}Unroll +; INVOCATION-0: for.body.preheader: ; preds = %entry +; INVOCATION-0: for.body: ; preds = %for.body.preheader, %for.inc.1 +; INVOCATION-0: for.inc: ; preds = %for.body +; INVOCATION-0: for.body.1: ; preds = %for.inc +; INVOCATION-0: for.inc.1: ; preds = %for.body.1 +; INVOCATION-0: for.end.loopexit: ; preds = %for.inc.1, %for.body.1, %for.body, %for.inc + +; INVOCATION-1: *** IR Dump After {{.*}}Unroll +; INVOCATION-1: for.body.preheader: ; preds = %entry +; INVOCATION-1: for.body: ; preds = %for.body.preheader, %for.inc +; INVOCATION-1: for.inc: ; preds = %for.body +; INVOCATION-1: for.end.loopexit: ; preds = %for.body, %for.inc +; INVOCATION-1: *** IR Dump After {{.*}}Unroll +; INVOCATION-1: for.body.preheader: ; preds = %entry +; INVOCATION-1: for.body: ; preds = %for.inc.1, %for.body.preheader +; INVOCATION-1: for.inc: ; preds = %for.body +; INVOCATION-1: for.body.1: ; preds = %for.inc +; INVOCATION-1: for.inc.1: ; preds = %for.body.1 +; INVOCATION-1: for.end.loopexit: ; preds = %for.inc.1, %for.body.1, %for.body, %for.inc diff --git a/llvm/test/AutoTuning/PassInvocation/pass_invocation_write.ll b/llvm/test/AutoTuning/PassInvocation/pass_invocation_write.ll new file mode 100644 index 000000000000..81097fdd5afa --- /dev/null +++ b/llvm/test/AutoTuning/PassInvocation/pass_invocation_write.ll @@ -0,0 +1,67 @@ +; REQUIRES: aarch64-registered-target +; RUN: rm %t.pass_invocation -rf +; RUN: opt %s -S -mtriple=aarch64-- -mcpu=tsv110 -auto-tuning-type-filter=Loop \ +; RUN: -O3 -auto-tuning-opp=%t.pass_invocation --disable-output +; RUN: FileCheck %s --input-file %t.pass_invocation/pass_invocation_write.ll.yaml + +; Function Attrs: nounwind uwtable +define dso_local void @sum(i32* noalias %a, i32* noalias %b, i32* noalias %c, i32 %n) { +entry: + br label %for.cond + +for.cond: ; preds = %for.body, %entry + %sum.0 = phi float [ 0.000000e+00, %entry ], [ %add, %for.body ] + %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.body ] + %cmp = icmp slt i32 %i.0, %n + br i1 %cmp, label %for.body, label %for.end + +for.body: ; preds = %for.cond + %idxprom = sext i32 %i.0 to i64 + %arrayidx = getelementptr inbounds i32, i32* %a, i64 %idxprom + %0 = load i32, i32* %arrayidx, align 4 + %idxprom1 = sext i32 %i.0 to i64 + %arrayidx2 = getelementptr inbounds i32, i32* %b, i64 %idxprom1 + %1 = load i32, i32* %arrayidx2, align 4 + %mul = mul nsw i32 %0, %1 + %conv = sitofp i32 %mul to float + %add = fadd contract float %sum.0, %conv + %inc = add nsw i32 %i.0, 1 + br label %for.cond + +for.end: ; preds = %for.cond + %conv3 = fptosi float %sum.0 to i32 + %arrayidx4 = getelementptr inbounds i32, i32* %c, i64 0 + store i32 %conv3, i32* %arrayidx4, align 4 + ret void +} + +; CHECK: --- !AutoTuning +; CHECK-NEXT: Pass: loop-unroll +; CHECK-NEXT: Name: for.body +; CHECK-NEXT: Function: sum +; CHECK-NEXT: CodeRegionType: loop +; CHECK-NEXT: CodeRegionHash: {{[0-9]+}} +; CHECK-NEXT: DynamicConfigs: { UnrollCount: [ 0, 1, 8, 4, 2 ] } +; CHECK-NEXT: BaselineConfig: { UnrollCount: '0' } +; CHECK-NEXT: Invocation: 0 +; CHECK-NEXT: ... +; CHECK-NEXT: --- !AutoTuning +; CHECK-NEXT: Pass: loop-vectorize +; CHECK-NEXT: Name: for.body +; CHECK-NEXT: Function: sum +; CHECK-NEXT: CodeRegionType: loop +; CHECK-NEXT: CodeRegionHash: {{[0-9]+}} +; CHECK-NEXT: DynamicConfigs: { VectorizationInterleave: [ 1, 2, 4 ] } +; CHECK-NEXT: BaselineConfig: { VectorizationInterleave: '2' } +; CHECK-NEXT: Invocation: 0 +; CHECK-NEXT: ... +; CHECK-NEXT: --- !AutoTuning +; CHECK-NEXT: Pass: loop-unroll +; CHECK-NEXT: Name: vector.body +; CHECK-NEXT: Function: sum +; CHECK-NEXT: CodeRegionType: loop +; CHECK-NEXT: CodeRegionHash: {{[0-9]+}} +; CHECK-NEXT: DynamicConfigs: { UnrollCount: [ 0, 1, 8, 4, 2 ] } +; CHECK-NEXT: BaselineConfig: { UnrollCount: '0' } +; CHECK-NEXT: Invocation: 1 +; CHECK-NEXT: ... diff --git a/llvm/test/AutoTuning/PhaseOrdering/Inputs/template.yaml b/llvm/test/AutoTuning/PhaseOrdering/Inputs/template.yaml new file mode 100644 index 000000000000..065d3cb85b72 --- /dev/null +++ b/llvm/test/AutoTuning/PhaseOrdering/Inputs/template.yaml @@ -0,0 +1,8 @@ +--- !AutoTuning +Pass: all +Name: [filename] +Function: none +CodeRegionType: other +Args: + - OptPass: [pass] +... diff --git a/llvm/test/AutoTuning/PhaseOrdering/pass-order.ll b/llvm/test/AutoTuning/PhaseOrdering/pass-order.ll new file mode 100644 index 000000000000..9d0210b3fdde --- /dev/null +++ b/llvm/test/AutoTuning/PhaseOrdering/pass-order.ll @@ -0,0 +1,65 @@ +; Run different orders of opt passes and verify that the order is respected +; ------------------------------------------------------------------------- +; Check to see if the order is correct, trivial case (autotuning disabled) +; RUN: opt %s -debug-pass-manager -S 2>&1 | FileCheck %s -check-prefix=DISABLE + +; One pass: +; RUN: rm %t.onepass_order.yaml -rf +; RUN: sed 's#\[filename\]#%s#g; s#\[pass\]#\[loop-extract\]#g' \ +; RUN: %S/Inputs/template.yaml > %t.onepass_order.yaml +; RUN: opt %s -debug-pass-manager -S -auto-tuning-input=%t.onepass_order.yaml \ +; RUN: 2>&1 | FileCheck %s -check-prefix=ONEPASS + +; Two passes (A->B): +; RUN: rm %t.twopass_order.yaml -rf +; RUN: sed 's#\[filename\]#%s#g; s#\[pass\]#\[loop-extract,strip\]#g' \ +; RUN: %S/Inputs/template.yaml > %t.twopass_order.yaml +; RUN: opt %s -debug-pass-manager -S -auto-tuning-input=%t.twopass_order.yaml \ +; RUN: 2>&1 | FileCheck %s -check-prefix=TWOPASS_AB + +; Two passes (B->A): +; RUN: rm %t.twopass_ba_order.yaml -rf +; RUN: sed 's#\[filename\]#%s#g; s#\[pass\]#\[strip, loop-extract\]#g' \ +; RUN: %S/Inputs/template.yaml > %t.twopass_ba_order.yaml +; RUN: opt %s -debug-pass-manager -S -auto-tuning-input=%t.twopass_ba_order.yaml \ +; RUN: 2>&1 | FileCheck %s -check-prefix=TWOPASS_BA + +; candidate IR that can change based on many optimizations +; for now just use the IR in the LoopUnroll test file +define void @foo(i32* nocapture %a) { +entry: + br label %for.body + +for.body: ; preds = %for.body, %entry + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv + %0 = load i32, i32* %arrayidx, align 4 + %inc = add nsw i32 %0, 1 + store i32 %inc, i32* %arrayidx, align 4 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp eq i64 %indvars.iv.next, 64 + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body + ret void +} + +; DISABLE-NOT: Running pass: LoopExtractorPass on [module] +; DISABLE-NOT: Running pass: StripSymbolsPass on [module] +; DISABLE: Running pass: VerifierPass on [module] +; DISABLE: Running pass: PrintModulePass on [module] + +; ONEPASS-NOT: Running pass: StripSymbolsPass on [module] +; ONEPASS: Running pass: LoopExtractorPass on [module] +; ONEPASS: Running pass: VerifierPass on [module] +; ONEPASS: Running pass: PrintModulePass on [module] + +; TWOPASS_AB: Running pass: LoopExtractorPass on [module] +; TWOPASS_AB: Running pass: StripSymbolsPass on [module] +; TWOPASS_AB: Running pass: VerifierPass on [module] +; TWOPASS_AB: Running pass: PrintModulePass on [module] + +; TWOPASS_BA: Running pass: StripSymbolsPass on [module] +; TWOPASS_BA: Running pass: LoopExtractorPass on [module] +; TWOPASS_BA: Running pass: VerifierPass on [module] +; TWOPASS_BA: Running pass: PrintModulePass on [module] diff --git a/llvm/test/AutoTuning/SwitchLowering/switch-opp.ll b/llvm/test/AutoTuning/SwitchLowering/switch-opp.ll new file mode 100644 index 000000000000..679549180bf4 --- /dev/null +++ b/llvm/test/AutoTuning/SwitchLowering/switch-opp.ll @@ -0,0 +1,47 @@ +; RUN: rm %t.switch_opp -rf +; RUN: llc %s -auto-tuning-opp=%t.switch_opp -auto-tuning-type-filter=Switch -o /dev/null +; RUN: FileCheck %s --input-file %t.switch_opp/switch-opp.ll.yaml + +; UNSUPPORTED: windows + +define i32 @test(i32 %arg) #0 { +entry: + switch i32 %arg, label %bb5 [ + i32 1, label %bb1 + i32 2, label %bb2 + i32 3, label %bb3 + i32 4, label %bb4 + ] + +bb1: ; pred = %entry + br label %bb2 + +bb2: ; pred = %entry, %bb1 + %res.0 = phi i32 [ 1, %entry ], [ 2, %bb1 ] + br label %bb3 + +bb3: ; pred = %entry, %bb2 + %res.1 = phi i32 [ 0, %entry ], [ %res.0, %bb2 ] + %phitmp = add nsw i32 %res.1, 2 + br label %bb4 + +bb4: ; pred = %entry, %bb3 + %res.2 = phi i32 [ 1, %entry ], [ %phitmp, %bb3 ] + br label %bb5 + +bb5: ; pred = %entry, %bb4 + %res.3 = phi i32 [ 0, %entry ], [ %res.2, %bb4 ] + %0 = add nsw i32 %res.3, 1 + ret i32 %0 +} + +; CHECK: --- !AutoTuning +; CHECK-NEXT: Pass: switch-lowering +; CHECK-NEXT: Name: 'i32 %arg' +; CHECK-NEXT: Function: test +; CHECK-NEXT: CodeRegionType: switch +; CHECK-NEXT: CodeRegionHash: {{[0-9]+}} +; CHECK-NEXT: DynamicConfigs: { } +; CHECK-NEXT: BaselineConfig: { } +; CHECK-NEXT: Invocation: 0 +; CHECK-NEXT: ... diff --git a/llvm/test/AutoTuning/lit.local.cfg b/llvm/test/AutoTuning/lit.local.cfg new file mode 100644 index 000000000000..13b4927257ab --- /dev/null +++ b/llvm/test/AutoTuning/lit.local.cfg @@ -0,0 +1,2 @@ +if not config.enable_enable_autotuner: + config.unsupported = True diff --git a/llvm/test/AutoTuning/opt-opp.ll b/llvm/test/AutoTuning/opt-opp.ll new file mode 100644 index 000000000000..97f7b1d121cc --- /dev/null +++ b/llvm/test/AutoTuning/opt-opp.ll @@ -0,0 +1,315 @@ +; REQUIRES: asserts +; REQUIRES: x86-registered-target + +; RUN: rm %t.default_opp -rf +; RUN: opt %s -S -passes='require<opt-remark-emit>,loop(loop-unroll-full)' \ +; RUN: -auto-tuning-opp=%t.default_opp --disable-output +; RUN: FileCheck %s --input-file %t.default_opp/opt-opp.ll.yaml -check-prefix=DEFAULT + +; RUN: rm %t.module_opp -rf +; RUN: opt %s -S -passes='require<opt-remark-emit>,loop(loop-unroll-full)' \ +; RUN: -auto-tuning-opp=%t.module_opp -auto-tuning-type-filter=Other --disable-output +; RUN: FileCheck %s --input-file %t.module_opp/opt-opp.ll.yaml -check-prefix=OTHER + +; RUN: rm %t.loop_opp -rf +; RUN: opt %s -S -passes='require<opt-remark-emit>,loop(loop-unroll-full)' \ +; RUN: -auto-tuning-opp=%t.loop_opp -auto-tuning-type-filter=Loop --disable-output +; RUN: FileCheck %s --input-file %t.loop_opp/opt-opp.ll.yaml -check-prefix=LOOP + +; RUN: rm %t.callsite_opp -rf +; RUN: opt %s -S -passes='function(require<opt-remark-emit>,loop-unroll),cgscc(inline)' \ +; RUN: -auto-tuning-opp=%t.callsite_opp -auto-tuning-type-filter=CallSite --disable-output +; RUN: FileCheck %s --input-file %t.callsite_opp/opt-opp.ll.yaml -check-prefix=CALLSITE + +; RUN: rm %t.callsite_loop_opp -rf +; RUN: opt %s -S -passes='function(require<opt-remark-emit>,loop-unroll),cgscc(inline)' \ +; RUN: -auto-tuning-opp=%t.callsite_loop_opp -auto-tuning-type-filter=CallSite,Loop --disable-output +; RUN: FileCheck %s --input-file %t.callsite_loop_opp/opt-opp.ll.yaml -check-prefix=CALLSITE-LOOP1 +; RUN: FileCheck %s --input-file %t.callsite_loop_opp/opt-opp.ll.yaml -check-prefix=CALLSITE-LOOP2 + +; RUN: rm %t.llvm_param_opp -rf +; RUN: opt %s -S -auto-tuning-opp=%t.llvm_param_opp \ +; RUN: -passes='function(require<opt-remark-emit>,loop-unroll),cgscc(inline)' \ +; RUN: -auto-tuning-type-filter=LLVMParam --disable-output +; RUN: FileCheck %s --input-file %t.llvm_param_opp/opt-opp.ll.yaml -check-prefix=LLVMPARAM + +; RUN: rm %t.program_param_opp -rf +; RUN: opt %s -S -passes='function(require<opt-remark-emit>,loop-unroll),cgscc(inline)' \ +; RUN: -auto-tuning-opp=%t.program_param_opp -auto-tuning-type-filter=ProgramParam --disable-output +; RUN: FileCheck %s --input-file %t.program_param_opp/opt-opp.ll.yaml -check-prefix=ProgramPARAM + +; Test if opp file with the same name exists already +; RUN: rm %t.default_opp -rf +; RUN: mkdir %t.default_opp && touch %t.default_opp/opt-opp.ll.yaml +; RUN: opt %s -S -passes='require<opt-remark-emit>,loop(loop-unroll-full)' \ +; RUN: -auto-tuning-opp=%t.default_opp --disable-output +; RUN: FileCheck %s --input-file %t.default_opp/opt-opp.ll.yaml.1 -check-prefix=DEFAULT + +; Test that the loop code region is included if its size >= the threshold. +; RUN: rm %t.loop.opp -rf +; RUN: opt %s -S -auto-tuning-opp=%t.loop.opp -auto-tuning-size-threshold=13 \ +; RUN: -passes='require<opt-remark-emit>,loop(loop-unroll-full)' \ +; RUN: -debug-only=autotuning --disable-output 2>&1 | \ +; RUN: FileCheck %s -check-prefix=SIZE-LOOP +; RUN: FileCheck %s --input-file %t.loop.opp/opt-opp.ll.yaml -check-prefix=SIZE-LOOP-OPP + +; Test that the loop code region is excluded if its size < the threshold. +; RUN: rm %t.loop.opp -rf +; RUN: opt %s -S -auto-tuning-opp=%t.loop.opp -auto-tuning-size-threshold=14 \ +; RUN: -passes='require<opt-remark-emit>,loop(loop-unroll-full)' \ +; RUN: -debug-only=autotuning --disable-output 2>&1 | \ +; RUN: FileCheck %s -check-prefix=SIZE-LOOP-FILTERED +; RUN: FileCheck %s --input-file %t.loop.opp/opt-opp.ll.yaml -check-prefix=SIZE-LOOP-OPP-FILTERED + +; Test that the callsite code region is included if its size >= the threshold. +; RUN: rm %t.callsite.opp -rf +; RUN: opt %s -S -passes=inline -auto-tuning-opp=%t.callsite.opp --disable-output \ +; RUN: -auto-tuning-size-threshold=2 -debug-only=autotuning 2>&1 | \ +; RUN: FileCheck %s -check-prefix=SIZE-CALLSITE +; RUN: FileCheck %s --input-file %t.callsite.opp/opt-opp.ll.yaml -check-prefix=SIZE-CALLSITE-OPP + +; Test that the callsite code region is excluded if its size < the threshold. +; RUN: rm %t.callsite.opp -rf +; RUN: opt %s -S -passes=inline -auto-tuning-opp=%t.callsite.opp \ +; RUN: -auto-tuning-size-threshold=24 --disable-output -debug-only=autotuning \ +; RUN: 2>&1 | FileCheck %s -check-prefix=SIZE-CALLSITE-FILTERED +; RUN: FileCheck %s --input-file %t.callsite.opp/opt-opp.ll.yaml -check-prefix=SIZE-CALLSITE-OPP-FILTERED + +; RUN: rm -rf %t.other +; RUN: opt %s -S -O3 -auto-tuning-opp=%t.other -auto-tuning-type-filter=Other +; RUN: grep "Name: \+'%S/opt-opp.ll'" %t.other/opt-opp.ll.yaml +; RUN: not grep "Name: \+opt-opp.ll" %t.other/opt-opp.ll.yaml + +; RUN: rm -rf %t.other +; RUN: opt %s -S -O3 -auto-tuning-opp=%t.other -auto-tuning-type-filter=Other \ +; RUN: -autotuning-project-dir=%S/ +; RUN: not grep "Name: \+'%S/opt-opp.ll'" %t.other/opt-opp.ll.yaml +; RUN: grep "Name: \+opt-opp.ll" %t.other/opt-opp.ll.yaml + +; UNSUPPORTED: windows + +; ModuleID = 'loop-opp.c' +source_filename = "loop-opp.c" +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +; Function Attrs: noinline nounwind uwtable +define i32 @test(i32* %n) #0 !dbg !6 { +entry: + call void @callee(i32 6), !dbg !18 + %n.addr = alloca i32*, align 8 + %b = alloca i32, align 4 + %i = alloca i32, align 4 + store i32* %n, i32** %n.addr, align 8 + call void @llvm.dbg.declare(metadata i32** %n.addr, metadata !11, metadata !12), !dbg !13 + call void @llvm.dbg.declare(metadata i32* %b, metadata !14, metadata !12), !dbg !15 + store i32 0, i32* %b, align 4, !dbg !15 + call void @llvm.dbg.declare(metadata i32* %i, metadata !16, metadata !12), !dbg !18 + store i32 0, i32* %i, align 4, !dbg !18 + br label %for.cond, !dbg !19 + +for.cond: ; preds = %for.inc, %entry + %0 = load i32, i32* %i, align 4, !dbg !20 + %1 = load i32*, i32** %n.addr, align 8, !dbg !23 + %2 = load i32, i32* %1, align 4, !dbg !24 + %cmp = icmp slt i32 %0, %2, !dbg !25 + br i1 %cmp, label %for.body, label %for.end, !dbg !26 + +for.body: ; preds = %for.cond + %3 = load i32, i32* %b, align 4, !dbg !28 + %add = add nsw i32 %3, 1, !dbg !30 + store i32 %add, i32* %b, align 4, !dbg !31 + br label %for.inc, !dbg !32 + +for.inc: ; preds = %for.body + %4 = load i32, i32* %i, align 4, !dbg !33 + %inc = add nsw i32 %4, 1, !dbg !33 + store i32 %inc, i32* %i, align 4, !dbg !33 + br label %for.cond, !dbg !35, !llvm.loop !36 + +for.end: ; preds = %for.cond + %5 = load i32, i32* %b, align 4, !dbg !39 + ret i32 %5, !dbg !40 +} + +@a = global i32 4 +define void @callee(i32 %a) #2 { +entry: + %a1 = load volatile i32, i32* @a + %x1 = add i32 %a1, %a1 + %add = add i32 %x1, %a + ret void +} + +; Function Attrs: nounwind readnone +declare void @llvm.dbg.declare(metadata, metadata, metadata) #1 + +attributes #0 = { noinline nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { nounwind readnone } +attributes #2 = { nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!3, !4} +!llvm.ident = !{!5} + +!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "" ,isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !2) +!1 = !DIFile(filename: "loop-opp.c", directory: "") +!2 = !{} +!3 = !{i32 2, !"Dwarf Version", i32 4} +!4 = !{i32 2, !"Debug Info Version", i32 3} +!5 = !{!""} +!6 = distinct !DISubprogram(name: "test", scope: !1, file: !1, line: 1, type: !7, isLocal: false, isDefinition: true, scopeLine: 2, flags: DIFlagPrototyped, isOptimized: false, unit: !0) +!7 = !DISubroutineType(types: !8) +!8 = !{!9, !10} +!9 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) +!10 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !9, size: 64) +!11 = !DILocalVariable(name: "n", arg: 1, scope: !6, file: !1, line: 1, type: !10) +!12 = !DIExpression() +!13 = !DILocation(line: 1, column: 20, scope: !6) +!14 = !DILocalVariable(name: "b", scope: !6, file: !1, line: 3, type: !9) +!15 = !DILocation(line: 3, column: 9, scope: !6) +!16 = !DILocalVariable(name: "i", scope: !17, file: !1, line: 4, type: !9) +!17 = distinct !DILexicalBlock(scope: !6, file: !1, line: 4, column: 5) +!18 = !DILocation(line: 4, column: 14, scope: !17) +!19 = !DILocation(line: 4, column: 10, scope: !17) +!20 = !DILocation(line: 4, column: 20, scope: !21) +!21 = !DILexicalBlockFile(scope: !22, file: !1, discriminator: 1) +!22 = distinct !DILexicalBlock(scope: !17, file: !1, line: 4, column: 5) +!23 = !DILocation(line: 4, column: 25, scope: !21) +!24 = !DILocation(line: 4, column: 24, scope: !21) +!25 = !DILocation(line: 4, column: 22, scope: !21) +!26 = !DILocation(line: 4, column: 5, scope: !27) +!27 = !DILexicalBlockFile(scope: !17, file: !1, discriminator: 1) +!28 = !DILocation(line: 6, column: 11, scope: !29) +!29 = distinct !DILexicalBlock(scope: !22, file: !1, line: 5, column: 5) +!30 = !DILocation(line: 6, column: 12, scope: !29) +!31 = !DILocation(line: 6, column: 9, scope: !29) +!32 = !DILocation(line: 7, column: 5, scope: !29) +!33 = !DILocation(line: 4, column: 28, scope: !34) +!34 = !DILexicalBlockFile(scope: !22, file: !1, discriminator: 2) +!35 = !DILocation(line: 4, column: 5, scope: !34) +!36 = distinct !{!36, !37, !38} +!37 = !DILocation(line: 4, column: 5, scope: !17) +!38 = !DILocation(line: 7, column: 5, scope: !17) +!39 = !DILocation(line: 8, column: 12, scope: !6) +!40 = !DILocation(line: 8, column: 5, scope: !6) + +; DEFAULT: --- !AutoTuning +; DEFAULT-NEXT: Pass: loop-unroll +; DEFAULT-NEXT: Name: for.cond +; DEFAULT-NEXT: DebugLoc: { File: loop-opp.c, Line: 4, Column: 5 } +; DEFAULT-NEXT: Function: test +; DEFAULT-NEXT: CodeRegionType: loop +; DEFAULT-NEXT: CodeRegionHash: {{[0-9]+}} +; DEFAULT-NEXT: DynamicConfigs: { UnrollCount: [ {{[0-9]+(, [0-9]+)*}} ] } +; DEFAULT-NEXT: BaselineConfig: { UnrollCount: '{{[0-9]+}}' } +; DEFAULT-NEXT: Invocation: 0 +; DEFAULT-NEXT: ... +; DEFAULT-NEXT: --- !AutoTuning +; DEFAULT-NEXT: Pass: all +; DEFAULT-NEXT: Name: +; DEFAULT-SAME: opt-opp.ll +; DEFAULT-NEXT: Function: none +; DEFAULT-NEXT: CodeRegionType: other +; COM: Module level hashes can differ based on the filepath so we check a regex +; DEFAULT-NEXT: CodeRegionHash: {{[0-9]+}} +; DEFAULT-NEXT: DynamicConfigs: { } +; DEFAULT-NEXT: BaselineConfig: { } +; DEFAULT-NEXT: Invocation: 0 +; DEFAULT-NEXT: ... + +; LOOP: --- !AutoTuning +; LOOP-NEXT: Pass: loop-unroll +; LOOP-NEXT: Name: for.cond +; LOOP-NEXT: DebugLoc: { File: loop-opp.c, Line: 4, Column: 5 } +; LOOP-NEXT: Function: test +; LOOP-NEXT: CodeRegionType: loop +; LOOP-NEXT: CodeRegionHash: {{[0-9]+}} +; LOOP-NEXT: DynamicConfigs: { UnrollCount: [ {{[0-9]+(, [0-9]+)*}} ] } +; LOOP-NEXT: BaselineConfig: { UnrollCount: '{{[0-9]+}}' } +; LOOP-NEXT: Invocation: 0 +; LOOP-NEXT: ... + +; CALLSITE: --- !AutoTuning +; CALLSITE-NEXT: Pass: inline +; CALLSITE-NEXT: Name: callee +; CALLSITE-NEXT: DebugLoc: { File: loop-opp.c, Line: 4, Column: 14 } +; CALLSITE-NEXT: Function: test +; CALLSITE-NEXT: CodeRegionType: callsite +; CALLSITE-NEXT: CodeRegionHash: {{[0-9]+}} +; CALLSITE-NEXT: DynamicConfigs: { ForceInline: [ 0, 1 ] } +; CALLSITE-NEXT: BaselineConfig: { ForceInline: '1' } +; CALLSITE-NEXT: Invocation: 0 +; CALLSITE-NEXT: ... + +; CALLSITE-LOOP1: CodeRegionType: loop +; CALLSITE-LOOP1-NOT: CodeRegionType: other +; CALLSITE-LOOP2: CodeRegionType: callsite +; CALLSITE-LOOP2-NOT: CodeRegionType: other + +; OTHER: --- !AutoTuning +; OTHER-NEXT: Pass: all +; OTHER-NEXT: Name: +; OTHER-SAME: opt-opp.ll +; OTHER-NEXT: Function: none +; OTHER-NEXT: CodeRegionType: other +; COM: Module level hashes can differ based on the filepath so we check a regex +; OTHER-NEXT: CodeRegionHash: {{[0-9]+}} +; OTHER-NEXT: DynamicConfigs: { } +; OTHER-NEXT: BaselineConfig: { } +; OTHER-NEXT: Invocation: 0 +; OTHER-NEXT: ... + +; LLVMPARAM: --- !AutoTuning +; LLVMPARAM-NEXT: Pass: none +; LLVMPARAM-NEXT: Name: +; LLVMPARAM-SAME: opt-opp.ll +; LLVMPARAM-NEXT: Function: none +; LLVMPARAM-NEXT: CodeRegionType: llvm-param +; LLVMPARAM-NEXT: CodeRegionHash: {{[0-9]+}} +; LLVMPARAM-NEXT: DynamicConfigs: { } +; LLVMPARAM-NEXT: BaselineConfig: { } +; LLVMPARAM-NEXT: Invocation: 0 +; LLVMPARAM-NEXT: ... + +; ProgramPARAM: --- !AutoTuning +; ProgramPARAM-NEXT: Pass: none +; ProgramPARAM-NEXT: Name: +; ProgramPARAM-SAME: opt-opp.ll +; ProgramPARAM-NEXT: Function: none +; ProgramPARAM-NEXT: CodeRegionType: program-param +; ProgramPARAM-NEXT: CodeRegionHash: {{[0-9]+}} +; ProgramPARAM-NEXT: DynamicConfigs: { } +; ProgramPARAM-NEXT: BaselineConfig: { } +; ProgramPARAM-NEXT: Invocation: 0 +; ProgramPARAM-NEXT: ... + +; SIZE-LOOP: PassName: loop-unroll +; SIZE-LOOP-NEXT: Type: loop +; SIZE-LOOP-NEXT: Size: 13 +; SIZE-LOOP: Module added as an tuning opportunity + +; SIZE-LOOP-OPP-DAG: Pass: loop-unroll +; SIZE-LOOP-OPP-DAG: Pass: all + +; SIZE-LOOP-FILTERED-NOT: PassName: loop-unroll +; SIZE-LOOP-FILTERED: Module added as an tuning opportunity + +; SIZE-LOOP-OPP-FILTERED-NOT: Pass: loop-unroll +; Ths "other" code regions should remain as-is. +; SIZE-LOOP-OPP-FILTERED: CodeRegionType: other + +; SIZE-CALLSITE: PassName: inline +; SIZE-CALLSITE-NEXT: Type: callsite +; SIZE-CALLSITE-NEXT: Size: 4 +; SIZE-CALLSITE: Module added as an tuning opportunity + +; SIZE-CALLSITE-OPP-DAG: Pass: inline +; SIZE-CALLSITE-OPP-DAG: Pass: all + +; SIZE-CALLSITE-FILTERED-NOT: PassName: inline +; SIZE-CALLSITE-FILTERED: Module added as an tuning opportunity + +; SIZE-CALLSITE-OPP-FILTERED-NOT: Pass: inline +; Ths "other" code regions should remain as-is. +; SIZE-CALLSITE-OPP-FILTERED: CodeRegionType: other diff --git a/llvm/test/lit.site.cfg.py.in b/llvm/test/lit.site.cfg.py.in index fc7ab6536309..0e9396e3b014 100644 --- a/llvm/test/lit.site.cfg.py.in +++ b/llvm/test/lit.site.cfg.py.in @@ -62,6 +62,7 @@ config.reverse_iteration = @LLVM_ENABLE_REVERSE_ITERATION@ config.dxil_tests = @LLVM_INCLUDE_DXIL_TESTS@ config.have_llvm_driver = @LLVM_TOOL_LLVM_DRIVER_BUILD@ config.use_classic_flang = @LLVM_ENABLE_CLASSIC_FLANG@ +config.enable_enable_autotuner = @LLVM_ENABLE_AUTOTUNER@ import lit.llvm lit.llvm.initialize(lit_config, config) diff --git a/llvm/tools/llc/llc.cpp b/llvm/tools/llc/llc.cpp index 8934130f9913..94b2028b25bc 100644 --- a/llvm/tools/llc/llc.cpp +++ b/llvm/tools/llc/llc.cpp @@ -645,6 +645,18 @@ static int compileModule(char **argv, LLVMContext &Context) { reportError(EC.message(), SplitDwarfOutputFile); } +#if defined(ENABLE_AUTOTUNER) + if (llvm::Error E = autotuning::Engine.init(M->getModuleIdentifier())) { + errs() << "error: " << toString(std::move(E)) << '\n'; + return 1; + } + if (autotuning::Engine.isEnabled() && autotuning::Engine.isParseInput() && + (autotuning::Engine.LLVMParams.size() || + autotuning::Engine.ProgramParams.size())) + llvm::cl::ParseAutoTunerOptions(autotuning::Engine.LLVMParams, + autotuning::Engine.ProgramParams); +#endif + // Build up all of the passes that we want to do to the module. legacy::PassManager PM; @@ -776,6 +788,13 @@ static int compileModule(char **argv, LLVMContext &Context) { } } +#if defined(ENABLE_AUTOTUNER) + if (llvm::Error E = autotuning::Engine.finalize()) { + errs() << "error: " << toString(std::move(E)) << '\n'; + return 1; + } +#endif + // Declare success. Out->keep(); if (DwoOut) diff --git a/llvm/tools/opt/NewPMDriver.cpp b/llvm/tools/opt/NewPMDriver.cpp index 6ae3f87099af..5ce9e4fee81f 100644 --- a/llvm/tools/opt/NewPMDriver.cpp +++ b/llvm/tools/opt/NewPMDriver.cpp @@ -39,6 +39,10 @@ #include "llvm/Transforms/Scalar/LoopPassManager.h" #include "llvm/Transforms/Utils/Debugify.h" +#if defined(ENABLE_AUTOTUNER) +#include "llvm/AutoTuner/AutoTuning.h" +#endif + using namespace llvm; using namespace opt_tool; @@ -459,6 +463,35 @@ bool llvm::runPassPipeline( MPM.addPass(NewPMDebugifyPass(DebugifyMode::OriginalDebugInfo, "", &DebugInfoBeforePass)); +#if defined(ENABLE_AUTOTUNER) + bool Changed = false; + // If autotuning is enabled (for applying configuration), use AutoTuner + // generated pass ordering instead of using passes specified with -passes=... + // with opt tool. + if (autotuning::Engine.isEnabled()) { + std::vector<std::string> PassesList; + Changed = autotuning::Engine.lookUpGlobalParams("OptPass", PassesList); + if (Changed && PassesList.size()) { + std::string PassPipeline = ""; + for (auto PassName : PassesList) + PassPipeline.append(PassName + ","); + PassPipeline.pop_back(); + + if (auto Err = PB.parsePassPipeline(MPM, PassPipeline)) + errs() << "AutoTuner: cannot add pass:" << toString(std::move(Err)) + << "\n"; + } + } + if (!Changed) { + // Add passes according to the -passes options. + if (!PassPipeline.empty()) { + if (auto Err = PB.parsePassPipeline(MPM, PassPipeline)) { + errs() << Arg0 << ": " << toString(std::move(Err)) << "\n"; + return false; + } + } + } +#else // Add passes according to the -passes options. if (!PassPipeline.empty()) { if (auto Err = PB.parsePassPipeline(MPM, PassPipeline)) { @@ -466,6 +499,7 @@ bool llvm::runPassPipeline( return false; } } +#endif if (VK > VK_NoVerifier) MPM.addPass(VerifierPass()); @@ -539,6 +573,14 @@ bool llvm::runPassPipeline( if (DebugifyEach && !DebugifyExport.empty()) exportDebugifyStats(DebugifyExport, Debugify.getDebugifyStatsMap()); +#if defined(ENABLE_AUTOTUNER) + // AUTO-TUNING - auto-tuning finalization for this module + if (Error E = autotuning::Engine.finalize()) { + errs() << "error: " << toString(std::move(E)) << '\n'; + return false; + } +#endif + return true; } diff --git a/llvm/tools/opt/opt.cpp b/llvm/tools/opt/opt.cpp index 9c20e7784223..1401352647cd 100644 --- a/llvm/tools/opt/opt.cpp +++ b/llvm/tools/opt/opt.cpp @@ -456,6 +456,9 @@ int main(int argc, char **argv) { initializeWriteBitcodePassPass(Registry); initializeReplaceWithVeclibLegacyPass(Registry); initializeJMCInstrumenterPass(Registry); +#if defined(ENABLE_AUTOTUNER) + initializeAutotuningDumpLegacyPass(Registry); +#endif SmallVector<PassPlugin, 1> PluginList; PassPlugins.setCallback([&](const std::string &PluginPath) { @@ -516,7 +519,11 @@ int main(int argc, char **argv) { RemarksFormat, RemarksWithHotness, RemarksHotnessThreshold); if (Error E = RemarksFileOrErr.takeError()) { +#if defined(ENABLE_AUTOTUNER) + errs() << "error: " << toString(std::move(E)) << '\n'; +#else errs() << toString(std::move(E)) << '\n'; +#endif return 1; } std::unique_ptr<ToolOutputFile> RemarksFile = std::move(*RemarksFileOrErr); @@ -641,6 +648,20 @@ int main(int argc, char **argv) { M->addModuleFlag(Module::Error, "UnifiedLTO", 1); } +#if defined(ENABLE_AUTOTUNER) + // AUTO-TUNING - auto-tuning initialization for this module + // if the auto-tuning flag is on + if (Error E = autotuning::Engine.init(M->getModuleIdentifier())) { + errs() << "error: " << toString(std::move(E)) << '\n'; + return 1; + } + if (autotuning::Engine.isEnabled() && autotuning::Engine.isParseInput() && + (autotuning::Engine.LLVMParams.size() || + autotuning::Engine.ProgramParams.size())) + llvm::cl::ParseAutoTunerOptions(autotuning::Engine.LLVMParams, + autotuning::Engine.ProgramParams); +#endif + // Add an appropriate TargetLibraryInfo pass for the module's triple. TargetLibraryInfoImpl TLII(ModuleTriple); @@ -778,6 +799,30 @@ int main(int argc, char **argv) { Passes.add(TPC); } +#if defined(ENABLE_AUTOTUNER) + // AUTO-TUNING - If auto-tuning is enabled, try to generate passes + // from auto-tuning interface and disable all optimization passes. + if (autotuning::Engine.isEnabled()) { + std::vector<std::string> PassesList; + bool Changed = autotuning::Engine.lookUpGlobalParams("OptPass", PassesList); + if (Changed) { + // disable all optimization passes of all optimization levels + OptLevelO0 = false; + OptLevelO1 = false; + OptLevelO2 = false; + OptLevelOs = false; + OptLevelOz = false; + OptLevelO3 = false; + for (auto const &Value : PassesList) { + const PassInfo *PassInf = (Registry.getPassInfo(StringRef(Value))); + if (PassInf) { + PassList.push_back(PassInf); + } + } + } + } +#endif + // Create a new optimization pass for each one specified on the command line for (unsigned i = 0; i < PassList.size(); ++i) { const PassInfo *PassInf = PassList[i]; @@ -878,6 +923,14 @@ int main(int argc, char **argv) { if (DebugifyEach && !DebugifyExport.empty()) exportDebugifyStats(DebugifyExport, Passes.getDebugifyStatsMap()); +#if defined(ENABLE_AUTOTUNER) + // AUTO-TUNING - auto-tuning finalization for this module + if (Error E = autotuning::Engine.finalize()) { + errs() << "error: " << toString(std::move(E)) << '\n'; + return 1; + } +#endif + // Declare success. if (!NoOutput) Out->keep(); -- 2.33.0
Locations
Projects
Search
Status Monitor
Help
Open Build Service
OBS Manuals
API Documentation
OBS Portal
Reporting a Bug
Contact
Mailing List
Forums
Chat (IRC)
Twitter
Open Build Service (OBS)
is an
openSUSE project
.
浙ICP备2022010568号-2