diff --git a/llvm/include/llvm/CodeGen/MachineFunction.h b/llvm/include/llvm/CodeGen/MachineFunction.h index d696add8a1af5..dcdbcaec168d2 100644 --- a/llvm/include/llvm/CodeGen/MachineFunction.h +++ b/llvm/include/llvm/CodeGen/MachineFunction.h @@ -88,6 +88,15 @@ template <> struct ilist_callback_traits { } }; +// The hotness of static data tracked by a MachineFunction and not represented +// as a global object in the module IR / MIR. Typical examples are +// MachineJumpTableInfo and MachineConstantPool. +enum class MachineFunctionDataHotness { + Unknown, + Cold, + Hot, +}; + /// MachineFunctionInfo - This class can be derived from and used by targets to /// hold private target-specific information for each MachineFunction. Objects /// of type are accessed/created with MF::getInfo and destroyed when the diff --git a/llvm/include/llvm/CodeGen/MachineJumpTableInfo.h b/llvm/include/llvm/CodeGen/MachineJumpTableInfo.h index e8e9c2f6338e0..56ecbe22ff6dd 100644 --- a/llvm/include/llvm/CodeGen/MachineJumpTableInfo.h +++ b/llvm/include/llvm/CodeGen/MachineJumpTableInfo.h @@ -28,6 +28,7 @@ namespace llvm { class MachineBasicBlock; class DataLayout; class raw_ostream; +enum class MachineFunctionDataHotness; /// MachineJumpTableEntry - One jump table in the jump table info. /// @@ -35,8 +36,11 @@ struct MachineJumpTableEntry { /// MBBs - The vector of basic blocks from which to create the jump table. std::vector MBBs; - explicit MachineJumpTableEntry(const std::vector &M) - : MBBs(M) {} + /// The hotness of MJTE is inferred from the hotness of the source basic + /// block(s) that reference it. + MachineFunctionDataHotness Hotness; + + explicit MachineJumpTableEntry(const std::vector &M); }; class MachineJumpTableInfo { @@ -107,6 +111,11 @@ class MachineJumpTableInfo { return JumpTables; } + // Update machine jump table entry's hotness. Return true if the hotness is + // updated. + bool updateJumpTableEntryHotness(size_t JTI, + MachineFunctionDataHotness Hotness); + /// RemoveJumpTable - Mark the specific index as being dead. This will /// prevent it from being emitted. void RemoveJumpTable(unsigned Idx) { diff --git a/llvm/include/llvm/CodeGen/Passes.h b/llvm/include/llvm/CodeGen/Passes.h index d1fac4a304cff..b5d2a7e6bf035 100644 --- a/llvm/include/llvm/CodeGen/Passes.h +++ b/llvm/include/llvm/CodeGen/Passes.h @@ -71,6 +71,10 @@ namespace llvm { /// using profile information. MachineFunctionPass *createMachineFunctionSplitterPass(); + /// createStaticDataSplitterPass - This pass partitions a static data section + /// into a hot and cold section using profile information. + MachineFunctionPass *createStaticDataSplitterPass(); + /// MachineFunctionPrinter pass - This pass prints out the machine function to /// the given stream as a debugging tool. MachineFunctionPass * diff --git a/llvm/include/llvm/InitializePasses.h b/llvm/include/llvm/InitializePasses.h index 1cb9013bc48cc..8111afcc1fb20 100644 --- a/llvm/include/llvm/InitializePasses.h +++ b/llvm/include/llvm/InitializePasses.h @@ -293,6 +293,7 @@ void initializeSpeculativeExecutionLegacyPassPass(PassRegistry &); void initializeSpillPlacementWrapperLegacyPass(PassRegistry &); void initializeStackColoringLegacyPass(PassRegistry &); void initializeStackFrameLayoutAnalysisPassPass(PassRegistry &); +void initializeStaticDataSplitterPass(PassRegistry &); void initializeStackMapLivenessPass(PassRegistry &); void initializeStackProtectorPass(PassRegistry &); void initializeStackSafetyGlobalInfoWrapperPassPass(PassRegistry &); diff --git a/llvm/include/llvm/Passes/MachinePassRegistry.def b/llvm/include/llvm/Passes/MachinePassRegistry.def index 5a4e79d7225db..f4cddafa00971 100644 --- a/llvm/include/llvm/Passes/MachinePassRegistry.def +++ b/llvm/include/llvm/Passes/MachinePassRegistry.def @@ -236,6 +236,7 @@ DUMMY_MACHINE_FUNCTION_PASS("livedebugvalues", LiveDebugValuesPass) DUMMY_MACHINE_FUNCTION_PASS("lrshrink", LiveRangeShrinkPass) DUMMY_MACHINE_FUNCTION_PASS("machine-combiner", MachineCombinerPass) DUMMY_MACHINE_FUNCTION_PASS("machine-cp", MachineCopyPropagationPass) +DUMMY_MACHINE_FUNCTION_PASS("static-data-splitter", StaticDataSplitter) DUMMY_MACHINE_FUNCTION_PASS("machine-function-splitter", MachineFunctionSplitterPass) DUMMY_MACHINE_FUNCTION_PASS("machine-latecleanup", MachineLateInstrsCleanupPass) DUMMY_MACHINE_FUNCTION_PASS("machine-sanmd", MachineSanitizerBinaryMetadata) diff --git a/llvm/lib/CodeGen/CMakeLists.txt b/llvm/lib/CodeGen/CMakeLists.txt index 145fd2fac8b56..88f863d8204d0 100644 --- a/llvm/lib/CodeGen/CMakeLists.txt +++ b/llvm/lib/CodeGen/CMakeLists.txt @@ -226,6 +226,7 @@ add_llvm_component_library(LLVMCodeGen StackMaps.cpp StackProtector.cpp StackSlotColoring.cpp + StaticDataSplitter.cpp SwiftErrorValueTracking.cpp SwitchLoweringUtils.cpp TailDuplication.cpp diff --git a/llvm/lib/CodeGen/CodeGen.cpp b/llvm/lib/CodeGen/CodeGen.cpp index 8efe540770913..84d92705de022 100644 --- a/llvm/lib/CodeGen/CodeGen.cpp +++ b/llvm/lib/CodeGen/CodeGen.cpp @@ -130,6 +130,7 @@ void llvm::initializeCodeGen(PassRegistry &Registry) { initializeStackMapLivenessPass(Registry); initializeStackProtectorPass(Registry); initializeStackSlotColoringPass(Registry); + initializeStaticDataSplitterPass(Registry); initializeStripDebugMachineModulePass(Registry); initializeTailDuplicateLegacyPass(Registry); initializeTargetPassConfigPass(Registry); diff --git a/llvm/lib/CodeGen/MachineFunction.cpp b/llvm/lib/CodeGen/MachineFunction.cpp index e6b9538fe9a02..faff2eca5080c 100644 --- a/llvm/lib/CodeGen/MachineFunction.cpp +++ b/llvm/lib/CodeGen/MachineFunction.cpp @@ -1291,6 +1291,10 @@ const unsigned MachineFunction::DebugOperandMemNumber = 1000000; // MachineJumpTableInfo implementation //===----------------------------------------------------------------------===// +MachineJumpTableEntry::MachineJumpTableEntry( + const std::vector &MBBs) + : MBBs(MBBs), Hotness(MachineFunctionDataHotness::Unknown) {} + /// Return the size of each entry in the jump table. unsigned MachineJumpTableInfo::getEntrySize(const DataLayout &TD) const { // The size of a jump table entry is 4 bytes unless the entry is just the @@ -1340,6 +1344,17 @@ unsigned MachineJumpTableInfo::createJumpTableIndex( return JumpTables.size()-1; } +bool MachineJumpTableInfo::updateJumpTableEntryHotness( + size_t JTI, MachineFunctionDataHotness Hotness) { + assert(JTI < JumpTables.size() && "Invalid JTI!"); + // Record the largest hotness value. + if (Hotness <= JumpTables[JTI].Hotness) + return false; + + JumpTables[JTI].Hotness = Hotness; + return true; +} + /// If Old is the target of any jump tables, update the jump tables to branch /// to New instead. bool MachineJumpTableInfo::ReplaceMBBInJumpTables(MachineBasicBlock *Old, diff --git a/llvm/lib/CodeGen/StaticDataSplitter.cpp b/llvm/lib/CodeGen/StaticDataSplitter.cpp new file mode 100644 index 0000000000000..25f02fde8a4b8 --- /dev/null +++ b/llvm/lib/CodeGen/StaticDataSplitter.cpp @@ -0,0 +1,181 @@ +//===- StaticDataSplitter.cpp ---------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// The pass uses branch profile data to assign hotness based section qualifiers +// for the following types of static data: +// - Jump tables +// - Constant pools (TODO) +// - Other module-internal data (TODO) +// +// For the original RFC of this pass please see +// https://discourse.llvm.org/t/rfc-profile-guided-static-data-partitioning/83744 + +#include "llvm/ADT/ScopeExit.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/ProfileSummaryInfo.h" +#include "llvm/CodeGen/MBFIWrapper.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineBlockFrequencyInfo.h" +#include "llvm/CodeGen/MachineBranchProbabilityInfo.h" +#include "llvm/CodeGen/MachineConstantPool.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineJumpTableInfo.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/InitializePasses.h" +#include "llvm/Pass.h" +#include "llvm/Support/CommandLine.h" + +using namespace llvm; + +#define DEBUG_TYPE "static-data-splitter" + +STATISTIC(NumHotJumpTables, "Number of hot jump tables seen"); +STATISTIC(NumColdJumpTables, "Number of cold jump tables seen"); +STATISTIC(NumUnknownJumpTables, + "Number of jump tables with unknown hotness. Option " + "-static-data-default-hotness specifies the hotness."); + +static cl::opt StaticDataDefaultHotness( + "static-data-default-hotness", cl::Hidden, + cl::desc("This option specifies the hotness of static data when profile " + "information is unavailable"), + cl::init(MachineFunctionDataHotness::Hot), + cl::values(clEnumValN(MachineFunctionDataHotness::Hot, "hot", "Hot"), + clEnumValN(MachineFunctionDataHotness::Cold, "cold", "Cold"))); + +class StaticDataSplitter : public MachineFunctionPass { + const MachineBranchProbabilityInfo *MBPI = nullptr; + const MachineBlockFrequencyInfo *MBFI = nullptr; + const ProfileSummaryInfo *PSI = nullptr; + + // Returns true iff any jump table is hot-cold categorized. + bool splitJumpTables(MachineFunction &MF); + + // Same as above but works on functions with profile information. + bool splitJumpTablesWithProfiles(const MachineFunction &MF, + MachineJumpTableInfo &MJTI); + +public: + static char ID; + + StaticDataSplitter() : MachineFunctionPass(ID) { + initializeStaticDataSplitterPass(*PassRegistry::getPassRegistry()); + } + + StringRef getPassName() const override { return "Static Data Splitter"; } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + MachineFunctionPass::getAnalysisUsage(AU); + AU.addRequired(); + AU.addRequired(); + AU.addRequired(); + } + + bool runOnMachineFunction(MachineFunction &MF) override; +}; + +bool StaticDataSplitter::runOnMachineFunction(MachineFunction &MF) { + MBPI = &getAnalysis().getMBPI(); + MBFI = &getAnalysis().getMBFI(); + PSI = &getAnalysis().getPSI(); + + return splitJumpTables(MF); +} + +bool StaticDataSplitter::splitJumpTablesWithProfiles( + const MachineFunction &MF, MachineJumpTableInfo &MJTI) { + int NumChangedJumpTables = 0; + + // Jump table could be used by either terminating instructions or + // non-terminating ones, so we walk all instructions and use + // `MachineOperand::isJTI()` to identify jump table operands. + // Similarly, `MachineOperand::isCPI()` can identify constant pool usages + // in the same loop. + for (const auto &MBB : MF) { + for (const MachineInstr &I : MBB) { + for (const MachineOperand &Op : I.operands()) { + if (!Op.isJTI()) + continue; + const int JTI = Op.getIndex(); + // This is not a source block of jump table. + if (JTI == -1) + continue; + + auto Hotness = MachineFunctionDataHotness::Hot; + + // Hotness is based on source basic block hotness. + // TODO: PSI APIs are about instruction hotness. Introduce API for data + // access hotness. + if (PSI->isColdBlock(&MBB, MBFI)) + Hotness = MachineFunctionDataHotness::Cold; + + if (MJTI.updateJumpTableEntryHotness(JTI, Hotness)) + ++NumChangedJumpTables; + } + } + } + return NumChangedJumpTables > 0; +} + +bool StaticDataSplitter::splitJumpTables(MachineFunction &MF) { + MachineJumpTableInfo *MJTI = MF.getJumpTableInfo(); + if (!MJTI || MJTI->getJumpTables().empty()) + return false; + + const bool ProfileAvailable = PSI && PSI->hasProfileSummary() && MBFI && + MF.getFunction().hasProfileData(); + auto statOnExit = llvm::make_scope_exit([&] { + if (!AreStatisticsEnabled()) + return; + + if (!ProfileAvailable) { + NumUnknownJumpTables += MJTI->getJumpTables().size(); + return; + } + + for (size_t JTI = 0; JTI < MJTI->getJumpTables().size(); JTI++) { + auto Hotness = MJTI->getJumpTables()[JTI].Hotness; + if (Hotness == MachineFunctionDataHotness::Hot) { + ++NumHotJumpTables; + } else { + assert(Hotness == MachineFunctionDataHotness::Cold && + "A jump table is either hot or cold when profile information is " + "available."); + ++NumColdJumpTables; + } + } + }); + + // Place jump tables according to block hotness if function has profile data. + if (ProfileAvailable) + return splitJumpTablesWithProfiles(MF, *MJTI); + + // If function profile is unavailable (e.g., module not instrumented, or new + // code paths lacking samples), -static-data-default-hotness specifies the + // hotness. + for (size_t JTI = 0; JTI < MJTI->getJumpTables().size(); JTI++) + MF.getJumpTableInfo()->updateJumpTableEntryHotness( + JTI, StaticDataDefaultHotness); + + return true; +} + +char StaticDataSplitter::ID = 0; + +INITIALIZE_PASS_BEGIN(StaticDataSplitter, DEBUG_TYPE, "Split static data", + false, false) +INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(MachineBlockFrequencyInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass) +INITIALIZE_PASS_END(StaticDataSplitter, DEBUG_TYPE, "Split static data", false, + false) + +MachineFunctionPass *llvm::createStaticDataSplitterPass() { + return new StaticDataSplitter(); +} diff --git a/llvm/lib/CodeGen/TargetPassConfig.cpp b/llvm/lib/CodeGen/TargetPassConfig.cpp index d407e9f0871d4..bca1eb07deb13 100644 --- a/llvm/lib/CodeGen/TargetPassConfig.cpp +++ b/llvm/lib/CodeGen/TargetPassConfig.cpp @@ -263,6 +263,11 @@ static cl::opt GCEmptyBlocks("gc-empty-basic-blocks", cl::init(false), cl::Hidden, cl::desc("Enable garbage-collecting empty basic blocks")); +static cl::opt + SplitStaticData("split-static-data", cl::Hidden, cl::init(false), + cl::desc("Split static data sections into hot and cold " + "sections using profile information")); + /// Allow standard passes to be disabled by command line options. This supports /// simple binary flags that either suppress the pass or do nothing. /// i.e. -disable-mypass=false has no effect. @@ -1257,6 +1262,8 @@ void TargetPassConfig::addMachinePasses() { } } addPass(createMachineFunctionSplitterPass()); + if (SplitStaticData) + addPass(createStaticDataSplitterPass()); } // We run the BasicBlockSections pass if either we need BB sections or BB // address map (or both). diff --git a/llvm/test/CodeGen/X86/jump-table-partition.ll b/llvm/test/CodeGen/X86/jump-table-partition.ll new file mode 100644 index 0000000000000..c85338de0c3d4 --- /dev/null +++ b/llvm/test/CodeGen/X86/jump-table-partition.ll @@ -0,0 +1,177 @@ +; -stats requires asserts +; requires: asserts + +; Stop after 'finalize-isel' for simpler MIR, and lower the minimum number of +; jump table entries so 'switch' needs fewer cases to generate a jump table. +; RUN: llc -stop-after=finalize-isel -min-jump-table-entries=2 %s -o %t.mir +; RUN: llc --run-pass=static-data-splitter -stats -x mir %t.mir -o - 2>&1 | FileCheck %s --check-prefix=STAT + +; Tests stat messages are expected. +; COM: Update test to verify section suffixes when target-lowering and assembler changes are implemented. +; COM: Also run static-data-splitter pass with -static-data-default-hotness=cold and check data section suffix. + +; STAT-DAG: 2 static-data-splitter - Number of cold jump tables seen +; STAT-DAG: 2 static-data-splitter - Number of hot jump tables seen +; STAT-DAG: 1 static-data-splitter - Number of jump tables with unknown hotness + +; In function @foo, the 2 switch instructions to jt0.* and jt1.* get lowered to hot jump tables, +; and the 2 switch instructions to jt2.* and jt3.* get lowered to cold jump tables. + +; @func_without_profile doesn't have profiles. It's jump table hotness is unknown. + +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +@str.9 = private constant [7 x i8] c".str.9\00" +@str.10 = private constant [8 x i8] c".str.10\00" +@str.11 = private constant [8 x i8] c".str.11\00" + +@case2 = private constant [7 x i8] c"case 2\00" +@case1 = private constant [7 x i8] c"case 1\00" +@default = private constant [8 x i8] c"default\00" +@jt3 = private constant [4 x i8] c"jt3\00" + +define i32 @foo(i32 %num) !prof !13 { +entry: + %mod3 = sdiv i32 %num, 3 + switch i32 %mod3, label %jt0.default [ + i32 1, label %jt0.bb1 + i32 2, label %jt0.bb2 + ], !prof !14 + +jt0.bb1: + call i32 @puts(ptr @case1) + br label %jt0.epilog + +jt0.bb2: + call i32 @puts(ptr @case2) + br label %jt0.epilog + +jt0.default: + call i32 @puts(ptr @default) + br label %jt0.epilog + +jt0.epilog: + %zero = icmp eq i32 %num, 0 + br i1 %zero, label %cold, label %hot, !prof !15 + +cold: + %c2 = call i32 @transform(i32 %num) + switch i32 %c2, label %jt2.default [ + i32 1, label %jt2.bb1 + i32 2, label %jt2.bb2 + ], !prof !14 + +jt2.bb1: + call i32 @puts(ptr @case1) + br label %jt1.epilog + +jt2.bb2: + call i32 @puts(ptr @case2) + br label %jt1.epilog + +jt2.default: + call i32 @puts(ptr @default) + br label %jt2.epilog + +jt2.epilog: + %c2cmp = icmp ne i32 %c2, 0 + br i1 %c2cmp, label %return, label %jt3.prologue, !prof !16 + +hot: + %c1 = call i32 @compute(i32 %num) + switch i32 %c1, label %jt1.default [ + i32 1, label %jt1.bb1 + i32 2, label %jt1.bb2 + ], !prof !14 + +jt1.bb1: + call i32 @puts(ptr @case1) + br label %jt1.epilog + +jt1.bb2: + call i32 @puts(ptr @case2) + br label %jt1.epilog + +jt1.default: + call i32 @puts(ptr @default) + br label %jt1.epilog + +jt1.epilog: + br label %return + +jt3.prologue: + %c3 = call i32 @cleanup(i32 %num) + switch i32 %c3, label %jt3.default [ + i32 1, label %jt3.bb1 + i32 2, label %jt3.bb2 + ], !prof !14 + +jt3.bb1: + call i32 @puts(ptr @case1) + br label %jt3.epilog + +jt3.bb2: + call i32 @puts(ptr @case2) + br label %jt3.epilog + +jt3.default: + call i32 @puts(ptr @default) + br label %jt3.epilog + +jt3.epilog: + call i32 @puts(ptr @jt3) + br label %return + +return: + ret i32 %mod3 +} + +define void @func_without_profile(i32 %num) { +entry: + switch i32 %num, label %sw.default [ + i32 1, label %sw.bb + i32 2, label %sw.bb1 + ] + +sw.bb: + call i32 @puts(ptr @str.10) + br label %sw.epilog + +sw.bb1: + call i32 @puts(ptr @str.9) + br label %sw.epilog + +sw.default: + call i32 @puts(ptr @str.11) + br label %sw.epilog + +sw.epilog: + ret void +} + +declare i32 @puts(ptr) +declare i32 @printf(ptr, ...) +declare i32 @compute(i32) +declare i32 @transform(i32) +declare i32 @cleanup(i32) + +!llvm.module.flags = !{!0} + +!0 = !{i32 1, !"ProfileSummary", !1} +!1 = !{!2, !3, !4, !5, !6, !7, !8, !9} +!2 = !{!"ProfileFormat", !"InstrProf"} +!3 = !{!"TotalCount", i64 230002} +!4 = !{!"MaxCount", i64 100000} +!5 = !{!"MaxInternalCount", i64 50000} +!6 = !{!"MaxFunctionCount", i64 100000} +!7 = !{!"NumCounts", i64 14} +!8 = !{!"NumFunctions", i64 3} +!9 = !{!"DetailedSummary", !10} +!10 = !{!11, !12} +!11 = !{i32 990000, i64 10000, i32 7} +!12 = !{i32 999999, i64 1, i32 9} +!13 = !{!"function_entry_count", i64 100000} +!14 = !{!"branch_weights", i32 60000, i32 20000, i32 20000} +!15 = !{!"branch_weights", i32 1, i32 99999} +!16 = !{!"branch_weights", i32 99998, i32 1}