diff --git a/bolt/docs/CommandLineArgumentReference.md b/bolt/docs/CommandLineArgumentReference.md index 6d3b797da3787..91918d614a90f 100644 --- a/bolt/docs/CommandLineArgumentReference.md +++ b/bolt/docs/CommandLineArgumentReference.md @@ -498,9 +498,12 @@ Automatically put hot code on 2MB page(s) (hugify) at runtime. No manual call to hugify is needed in the binary (which is what --hot-text relies on). -- `--icf` +- `--icf=` Fold functions with identical code + - `all`: Enable identical code folding + - `none`: Disable identical code folding (default) + - `safe`: Enable safe identical code folding - `--icp` diff --git a/bolt/include/bolt/Core/BinaryFunction.h b/bolt/include/bolt/Core/BinaryFunction.h index 0b3682353f736..624322b325a5f 100644 --- a/bolt/include/bolt/Core/BinaryFunction.h +++ b/bolt/include/bolt/Core/BinaryFunction.h @@ -428,6 +428,9 @@ class BinaryFunction { /// Function order for streaming into the destination binary. uint32_t Index{-1U}; + /// Function is referenced by a non-control flow instruction. + bool HasAddressTaken{false}; + /// Get basic block index assuming it belongs to this function. unsigned getIndex(const BinaryBasicBlock *BB) const { assert(BB->getIndex() < BasicBlocks.size()); @@ -817,6 +820,14 @@ class BinaryFunction { return nullptr; } + /// Return true if function is referenced in a non-control flow instruction. + /// This flag is set when the code and relocation analyses are being + /// performed, which occurs when safe ICF (Identical Code Folding) is enabled. + bool hasAddressTaken() const { return HasAddressTaken; } + + /// Set whether function is referenced in a non-control flow instruction. + void setHasAddressTaken(bool AddressTaken) { HasAddressTaken = AddressTaken; } + /// Returns the raw binary encoding of this function. ErrorOr> getData() const; @@ -2094,6 +2105,9 @@ class BinaryFunction { // adjustments. void handleAArch64IndirectCall(MCInst &Instruction, const uint64_t Offset); + /// Analyze instruction to identify a function reference. + void analyzeInstructionForFuncReference(const MCInst &Inst); + /// Scan function for references to other functions. In relocation mode, /// add relocations for external references. In non-relocation mode, detect /// and mark new entry points. diff --git a/bolt/include/bolt/Passes/IdenticalCodeFolding.h b/bolt/include/bolt/Passes/IdenticalCodeFolding.h index b4206fa360744..f59e75c618605 100644 --- a/bolt/include/bolt/Passes/IdenticalCodeFolding.h +++ b/bolt/include/bolt/Passes/IdenticalCodeFolding.h @@ -11,6 +11,7 @@ #include "bolt/Core/BinaryFunction.h" #include "bolt/Passes/BinaryPasses.h" +#include "llvm/ADT/SparseBitVector.h" namespace llvm { namespace bolt { @@ -20,22 +21,72 @@ namespace bolt { /// class IdenticalCodeFolding : public BinaryFunctionPass { protected: - bool shouldOptimize(const BinaryFunction &BF) const override { - if (BF.hasUnknownControlFlow()) - return false; - if (BF.isFolded()) - return false; - if (BF.hasSDTMarker()) - return false; - return BinaryFunctionPass::shouldOptimize(BF); - } + /// Return true if the function is safe to fold. + bool shouldOptimize(const BinaryFunction &BF) const override; public: + enum class ICFLevel { + None, /// No ICF. (Default) + Safe, /// Safe ICF. + All, /// Aggressive ICF. + }; explicit IdenticalCodeFolding(const cl::opt &PrintPass) : BinaryFunctionPass(PrintPass) {} const char *getName() const override { return "identical-code-folding"; } Error runOnFunctions(BinaryContext &BC) override; + +private: + /// Bit vector of memory addresses of vtables. + llvm::SparseBitVector<> VTableBitVector; + + /// Return true if the memory address is in a vtable. + bool isAddressInVTable(uint64_t Address) const { + return VTableBitVector.test(Address / 8); + } + + /// Mark memory address of a vtable as used. + void setAddressUsedInVTable(uint64_t Address) { + VTableBitVector.set(Address / 8); + } + + /// Scan symbol table and mark memory addresses of + /// vtables. + void initVTableReferences(const BinaryContext &BC); + + /// Analyze code section and relocations and mark functions that are not + /// safe to fold. + void markFunctionsUnsafeToFold(BinaryContext &BC); + + /// Process static and dynamic relocations in the data sections to identify + /// function references, and mark them as unsafe to fold. It filters out + /// symbol references that are in vtables. + void analyzeDataRelocations(BinaryContext &BC); + + /// Process functions that have been disassembled and mark functions that are + /// used in non-control flow instructions as unsafe to fold. + void analyzeFunctions(BinaryContext &BC); +}; + +class DeprecatedICFNumericOptionParser + : public cl::parser { +public: + explicit DeprecatedICFNumericOptionParser(cl::Option &O) + : cl::parser(O) {} + + bool parse(cl::Option &O, StringRef ArgName, StringRef Arg, + IdenticalCodeFolding::ICFLevel &Value) { + if (Arg == "0" || Arg == "1") { + Value = (Arg == "0") ? IdenticalCodeFolding::ICFLevel::None + : IdenticalCodeFolding::ICFLevel::All; + errs() << formatv("BOLT-WARNING: specifying numeric value \"{0}\" " + "for option -{1} is deprecated\n", + Arg, ArgName); + return false; + } + return cl::parser::parse(O, ArgName, Arg, + Value); + } }; } // namespace bolt diff --git a/bolt/lib/Core/BinaryFunction.cpp b/bolt/lib/Core/BinaryFunction.cpp index c12217d549479..63693eed0a9ca 100644 --- a/bolt/lib/Core/BinaryFunction.cpp +++ b/bolt/lib/Core/BinaryFunction.cpp @@ -1513,6 +1513,20 @@ MCSymbol *BinaryFunction::registerBranch(uint64_t Src, uint64_t Dst) { return Target; } +void BinaryFunction::analyzeInstructionForFuncReference(const MCInst &Inst) { + for (const MCOperand &Op : MCPlus::primeOperands(Inst)) { + if (!Op.isExpr()) + continue; + const MCExpr &Expr = *Op.getExpr(); + if (Expr.getKind() != MCExpr::SymbolRef) + continue; + const MCSymbol &Symbol = cast(Expr).getSymbol(); + // Set HasAddressTaken for a function regardless of the ICF level. + if (BinaryFunction *BF = BC.getFunctionForSymbol(&Symbol)) + BF->setHasAddressTaken(true); + } +} + bool BinaryFunction::scanExternalRefs() { bool Success = true; bool DisassemblyFailed = false; @@ -1633,6 +1647,8 @@ bool BinaryFunction::scanExternalRefs() { [](const MCOperand &Op) { return Op.isExpr(); })) { // Skip assembly if the instruction may not have any symbolic operands. continue; + } else { + analyzeInstructionForFuncReference(Instruction); } // Emit the instruction using temp emitter and generate relocations. diff --git a/bolt/lib/Passes/IdenticalCodeFolding.cpp b/bolt/lib/Passes/IdenticalCodeFolding.cpp index 38e080c9dd621..8923562776cc4 100644 --- a/bolt/lib/Passes/IdenticalCodeFolding.cpp +++ b/bolt/lib/Passes/IdenticalCodeFolding.cpp @@ -15,6 +15,7 @@ #include "bolt/Core/ParallelUtilities.h" #include "llvm/ADT/SmallVector.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Support/FormatVariadic.h" #include "llvm/Support/ThreadPool.h" #include "llvm/Support/Timer.h" #include @@ -42,8 +43,41 @@ TimeICF("time-icf", cl::ReallyHidden, cl::ZeroOrMore, cl::cat(BoltOptCategory)); + +cl::opt + ICF("icf", cl::desc("fold functions with identical code"), + cl::init(bolt::IdenticalCodeFolding::ICFLevel::None), + cl::values(clEnumValN(bolt::IdenticalCodeFolding::ICFLevel::All, "all", + "Enable identical code folding"), + clEnumValN(bolt::IdenticalCodeFolding::ICFLevel::All, "1", + "Enable identical code folding"), + clEnumValN(bolt::IdenticalCodeFolding::ICFLevel::All, "", + "Enable identical code folding"), + clEnumValN(bolt::IdenticalCodeFolding::ICFLevel::None, + "none", + "Disable identical code folding (default)"), + clEnumValN(bolt::IdenticalCodeFolding::ICFLevel::None, "0", + "Disable identical code folding (default)"), + clEnumValN(bolt::IdenticalCodeFolding::ICFLevel::Safe, + "safe", "Enable safe identical code folding")), + cl::ZeroOrMore, cl::ValueOptional, cl::cat(BoltOptCategory)); } // namespace opts +bool IdenticalCodeFolding::shouldOptimize(const BinaryFunction &BF) const { + if (BF.hasUnknownControlFlow()) + return false; + if (BF.isFolded()) + return false; + if (BF.hasSDTMarker()) + return false; + if (BF.isPseudo()) + return false; + if (opts::ICF == ICFLevel::Safe && BF.hasAddressTaken()) + return false; + return BinaryFunctionPass::shouldOptimize(BF); +} + /// Compare two jump tables in 2 functions. The function relies on consistent /// ordering of basic blocks in both binary functions (e.g. DFS). static bool equalJumpTables(const JumpTable &JumpTableA, @@ -340,6 +374,74 @@ typedef std::unordered_map, namespace llvm { namespace bolt { +void IdenticalCodeFolding::initVTableReferences(const BinaryContext &BC) { + for (const auto &[Address, Data] : BC.getBinaryData()) { + // Filter out all symbols that are not vtables. + if (!Data->getName().starts_with("_ZTV")) + continue; + for (uint64_t I = Address, End = I + Data->getSize(); I < End; I += 8) + setAddressUsedInVTable(I); + } +} + +void IdenticalCodeFolding::analyzeDataRelocations(BinaryContext &BC) { + initVTableReferences(BC); + // For static relocations there should be a symbol for function references. + for (const BinarySection &Sec : BC.sections()) { + if (!Sec.hasSectionRef() || !Sec.isData()) + continue; + for (const auto &Rel : Sec.relocations()) { + const uint64_t RelAddr = Rel.Offset + Sec.getAddress(); + if (isAddressInVTable(RelAddr)) + continue; + if (BinaryFunction *BF = BC.getFunctionForSymbol(Rel.Symbol)) + BF->setHasAddressTaken(true); + } + // For dynamic relocations there are two cases: + // 1: No symbol and only addend. + // 2: There is a symbol, but it does not references a function in a binary. + for (const auto &Rel : Sec.dynamicRelocations()) { + const uint64_t RelAddr = Rel.Offset + Sec.getAddress(); + if (isAddressInVTable(RelAddr)) + continue; + if (BinaryFunction *BF = BC.getBinaryFunctionAtAddress(Rel.Addend)) + BF->setHasAddressTaken(true); + } + } +} + +void IdenticalCodeFolding::analyzeFunctions(BinaryContext &BC) { + ParallelUtilities::WorkFuncTy WorkFun = [&](BinaryFunction &BF) { + for (const BinaryBasicBlock &BB : BF) + for (const MCInst &Inst : BB) + if (!(BC.MIB->isCall(Inst) || BC.MIB->isBranch(Inst))) + BF.analyzeInstructionForFuncReference(Inst); + }; + ParallelUtilities::PredicateTy SkipFunc = + [&](const BinaryFunction &BF) -> bool { return !BF.hasCFG(); }; + ParallelUtilities::runOnEachFunction( + BC, ParallelUtilities::SchedulingPolicy::SP_INST_LINEAR, WorkFun, + SkipFunc, "markUnsafe"); + + LLVM_DEBUG({ + for (const auto &BFIter : BC.getBinaryFunctions()) { + if (!BFIter.second.hasAddressTaken()) + continue; + dbgs() << "BOLT-DEBUG: skipping function with reference taken " + << BFIter.second.getOneName() << '\n'; + } + }); +} + +void IdenticalCodeFolding::markFunctionsUnsafeToFold(BinaryContext &BC) { + NamedRegionTimer MarkFunctionsUnsafeToFoldTimer( + "markFunctionsUnsafeToFold", "markFunctionsUnsafeToFold", "ICF breakdown", + "ICF breakdown", opts::TimeICF); + if (!BC.isX86()) + BC.outs() << "BOLT-WARNING: safe ICF is only supported for x86\n"; + analyzeDataRelocations(BC); + analyzeFunctions(BC); +} Error IdenticalCodeFolding::runOnFunctions(BinaryContext &BC) { const size_t OriginalFunctionCount = BC.getBinaryFunctions().size(); @@ -385,7 +487,7 @@ Error IdenticalCodeFolding::runOnFunctions(BinaryContext &BC) { "ICF breakdown", opts::TimeICF); for (auto &BFI : BC.getBinaryFunctions()) { BinaryFunction &BF = BFI.second; - if (!this->shouldOptimize(BF)) + if (!shouldOptimize(BF)) continue; CongruentBuckets[&BF].emplace(&BF); } @@ -475,7 +577,8 @@ Error IdenticalCodeFolding::runOnFunctions(BinaryContext &BC) { LLVM_DEBUG(SinglePass.stopTimer()); }; - + if (opts::ICF == ICFLevel::Safe) + markFunctionsUnsafeToFold(BC); hashFunctions(); createCongruentBuckets(); diff --git a/bolt/lib/Rewrite/BinaryPassManager.cpp b/bolt/lib/Rewrite/BinaryPassManager.cpp index b090604183348..3b7d62f335598 100644 --- a/bolt/lib/Rewrite/BinaryPassManager.cpp +++ b/bolt/lib/Rewrite/BinaryPassManager.cpp @@ -54,6 +54,9 @@ extern cl::opt PrintDynoStats; extern cl::opt DumpDotAll; extern cl::opt AsmDump; extern cl::opt PLT; +extern cl::opt + ICF; static cl::opt DynoStatsAll("dyno-stats-all", @@ -65,9 +68,6 @@ static cl::opt cl::desc("eliminate unreachable code"), cl::init(true), cl::cat(BoltOptCategory)); -cl::opt ICF("icf", cl::desc("fold functions with identical code"), - cl::cat(BoltOptCategory)); - static cl::opt JTFootprintReductionFlag( "jt-footprint-reduction", cl::desc("make jump tables size smaller at the cost of using more " @@ -398,7 +398,7 @@ Error BinaryFunctionPassManager::runAllPasses(BinaryContext &BC) { opts::StripRepRet); Manager.registerPass(std::make_unique(PrintICF), - opts::ICF); + opts::ICF != IdenticalCodeFolding::ICFLevel::None); Manager.registerPass( std::make_unique(NeverPrint, opts::SpecializeMemcpy1), @@ -423,7 +423,7 @@ Error BinaryFunctionPassManager::runAllPasses(BinaryContext &BC) { Manager.registerPass(std::make_unique(PrintInline)); Manager.registerPass(std::make_unique(PrintICF), - opts::ICF); + opts::ICF != IdenticalCodeFolding::ICFLevel::None); Manager.registerPass(std::make_unique(PrintPLT)); diff --git a/bolt/lib/Rewrite/BoltDiff.cpp b/bolt/lib/Rewrite/BoltDiff.cpp index 74b5ca18abce4..35f6710506646 100644 --- a/bolt/lib/Rewrite/BoltDiff.cpp +++ b/bolt/lib/Rewrite/BoltDiff.cpp @@ -28,7 +28,9 @@ using namespace bolt; namespace opts { extern cl::OptionCategory BoltDiffCategory; extern cl::opt NeverPrint; -extern cl::opt ICF; +extern cl::opt + ICF; static cl::opt IgnoreLTOSuffix( "ignore-lto-suffix", @@ -697,7 +699,7 @@ void RewriteInstance::compare(RewriteInstance &RI2) { } // Pre-pass ICF - if (opts::ICF) { + if (opts::ICF != IdenticalCodeFolding::ICFLevel::None) { IdenticalCodeFolding ICF(opts::NeverPrint); outs() << "BOLT-DIFF: Starting ICF pass for binary 1"; BC->logBOLTErrorsAndQuitOnFatal(ICF.runOnFunctions(*BC)); diff --git a/bolt/lib/Rewrite/RewriteInstance.cpp b/bolt/lib/Rewrite/RewriteInstance.cpp index a4c50cbc3e2bb..1a27c36f57a25 100644 --- a/bolt/lib/Rewrite/RewriteInstance.cpp +++ b/bolt/lib/Rewrite/RewriteInstance.cpp @@ -19,6 +19,7 @@ #include "bolt/Core/Relocation.h" #include "bolt/Passes/BinaryPasses.h" #include "bolt/Passes/CacheMetrics.h" +#include "bolt/Passes/IdenticalCodeFolding.h" #include "bolt/Passes/ReorderFunctions.h" #include "bolt/Profile/BoltAddressTranslation.h" #include "bolt/Profile/DataAggregator.h" @@ -86,6 +87,9 @@ extern cl::opt ReorderFunctions; extern cl::opt TerminalTrap; extern cl::opt TimeBuild; extern cl::opt TimeRewrite; +extern cl::opt + ICF; cl::opt AllowStripped("allow-stripped", cl::desc("allow processing of stripped binaries"), @@ -2050,6 +2054,13 @@ void RewriteInstance::adjustCommandLineOptions() { exit(1); } + if (!BC->HasRelocations && + opts::ICF == IdenticalCodeFolding::ICFLevel::Safe) { + BC->errs() << "BOLT-ERROR: binary built without relocations. Safe ICF is " + "not supported\n"; + exit(1); + } + if (opts::Instrument || (opts::ReorderFunctions != ReorderFunctions::RT_NONE && !opts::HotText.getNumOccurrences())) { diff --git a/bolt/test/X86/icf-safe-icp.test b/bolt/test/X86/icf-safe-icp.test new file mode 100644 index 0000000000000..a9227d311edce --- /dev/null +++ b/bolt/test/X86/icf-safe-icp.test @@ -0,0 +1,148 @@ +## Check that BOLT handles correctly folding functions with --icf=safe +## that can be referenced through a non control flow instruction when ICP optimization is enabled. +## This tests also checks that destructors are folded. + +# REQUIRES: system-linux, asserts +# RUN: llvm-mc -filetype=obj -triple x86_64-unknown-linux %s -o %t1.o +# RUN: %clang %cflags %t1.o -o %t.exe -Wl,-q +# RUN: llvm-bolt --no-threads %t.exe --icf -debug-only=bolt-icf -o %t.bolt 2>&1 | FileCheck --check-prefix=ICFCHECK %s +# RUN: llvm-bolt --no-threads %t.exe --icf=safe -debug-only=bolt-icf -o %t.bolt 2>&1 | FileCheck --check-prefix=SAFEICFCHECK %s + +# ICFCHECK: ICF iteration 1 +# ICFCHECK-NEXT: folding Derived3Destructor into Derived2Destructor +# ICFCHECK-NEXT: folding Derived3Func into Derived2Func + +# SAFEICFCHECK: skipping function with reference taken Derived3Func +# SAFEICFCHECK-NEXT: ICF iteration 1 +# SAFEICFCHECK-NEXT: folding Derived3Destructor into Derived2Destructor +# SAFEICFCHECK-NEXT: ===--------- + + +## generate profile +## clang++ -O2 -fprofile-generate=. main.cpp -c -o mainProf.o +## PROF=test.profdata +## clang++ -m64 -fprofile-use=$PROF \ +## -mllvm -disable-icp=true -mllvm -print-after-all \ +## -g0 -flto=thin -fwhole-program-vtables -fno-split-lto-unit -O2 \ +## -fdebug-types-section \ +## main.cpp -c -o mainProfLTO.bc +## PASS='pgo-icall-prom' +## clang++ -m64 -fprofile-use=$PROF \ +## -O3 -Rpass=$PASS \ +## -mllvm -print-before=$PASS \ +## -mllvm -print-after=$PASS \ +## -mllvm -filter-print-funcs=main \ +## -mllvm -debug-only=$PASS \ +## -x ir \ +## mainProfLTO.bc -c -o mainProfFinal.o + +## class Base { +## public: +## virtual int func(int a, int b) const = 0; +## +## virtual ~Base() {}; +## }; +## +## class Derived2 : public Base { +## int c = 5; +## public: +## __attribute__((noinline)) int func(int a, int b)const override { return a * (a - b) + this->c; } +## +## ~Derived2() {} +## }; +## +## class Derived3 : public Base { +## int c = 500; +## public: +## __attribute__((noinline)) int func(int a, int b) const override { return a * (a - b) + this->c; } +## ~Derived3() {} +## }; +## +## __attribute__((noinline)) Base *createType(int a) { +## Base *base = nullptr; +## if (a == 4) +## base = new Derived2(); +## else +## base = new Derived3(); +## return base; +## } +## +## extern int returnFive(); +## extern int returnFourOrFive(int val); +## int main(int argc, char **argv) { +## int sum = 0; +## int a = returnFourOrFive(argc); +## int b = returnFive(); +## Base *ptr = createType(a); +## Base *ptr2 = createType(b); +## sum += ptr->func(b, a) + ptr2->func(b, a); +## return 0; +## } +## clang++ -c helper.cpp -o helper.o +## int FooVar = 1; +## int BarVar = 2; +## +## int fooGlobalFuncHelper(int a, int b) { +## return 5; +## } +## Manually modified to remove "extra" assembly. + .globl main + .type main,@function +main: + leaq Derived3Func(%rip), %rcx + callq Derived3Func + .size main, .-main + + .weak Derived2Func + .type Derived2Func,@function +Derived2Func: + imull %esi, %eax + retq + .size Derived2Func, .-Derived2Func + + .weak Derived2Destructor + .type Derived2Destructor,@function +Derived2Destructor: + jmp _ZdlPvm@PLT + .size Derived2Destructor, .-Derived2Destructor + + .weak Derived3Func + .type Derived3Func,@function +Derived3Func: + imull %esi, %eax + retq + .size Derived3Func, .-Derived3Func + + .weak _ZN4BaseD2Ev + .type _ZN4BaseD2Ev,@function +_ZN4BaseD2Ev: + retq + .size _ZN4BaseD2Ev, .-_ZN4BaseD2Ev + + .weak Derived3Destructor + .type Derived3Destructor,@function +Derived3Destructor: + jmp _ZdlPvm@PLT + .size Derived3Destructor, .-Derived3Destructor + + .type _ZTV8Derived2,@object + .section .data.rel.ro._ZTV8Derived2,"awG",@progbits,_ZTV8Derived2,comdat + .weak _ZTV8Derived2 +_ZTV8Derived2: + .quad 0 + .quad _ZTI8Derived2 + .quad Derived2Func + .quad _ZN4BaseD2Ev + .quad Derived2Destructor + .size _ZTV8Derived2, 40 + + .type _ZTV8Derived3,@object + .section .data.rel.ro._ZTV8Derived3,"awG",@progbits,_ZTV8Derived3,comdat + .weak _ZTV8Derived3 +_ZTV8Derived3: + .quad 0 + .quad _ZTI8Derived3 + .quad Derived3Func + .quad _ZN4BaseD2Ev + .quad Derived3Destructor + .size _ZTV8Derived3, 40 diff --git a/bolt/test/X86/icf-safe-process-rela-data.test b/bolt/test/X86/icf-safe-process-rela-data.test new file mode 100644 index 0000000000000..cf71f55257777 --- /dev/null +++ b/bolt/test/X86/icf-safe-process-rela-data.test @@ -0,0 +1,64 @@ +## Check that BOLT handles correctly folding functions with --icf=safe that are only referenced from a .rela.data section. + +# REQUIRES: system-linux, asserts +# RUN: llvm-mc -filetype=obj -triple x86_64-unknown-linux %s -o %t1.o +# RUN: %clang %cflags %t1.o -o %t.exe -Wl,-q -no-pie +# RUN: llvm-bolt --no-threads %t.exe --icf -debug-only=bolt-icf -o %t.bolt 2>&1 | FileCheck --check-prefix=ICFCHECK %s +# RUN: llvm-bolt --no-threads %t.exe --icf=safe -debug-only=bolt-icf -o %t.bolt 2>&1 | FileCheck --check-prefix=SAFEICFCHECK %s + +# ICFCHECK: ICF iteration 1 +# ICFCHECK-NEXT: folding barAddFunc into fooAddFunc + +# SAFEICFCHECK: skipping function with reference taken fooAddFunc +# SAFEICFCHECK-NEXT: skipping function with reference taken barAddFunc +# SAFEICFCHECK-NEXT: ICF iteration 1 +# SAFEICFCHECK-NEXT: ===--------- + +## clang++ main.cpp +## Other functions removed for brevity. +## int main(int argc, char **argv) { +## const static int (*const funcGlobalBarAdd)(int, int) = barAddHdlper; +## const int (* const funcGlobalBarMul)(int, int) = fooGlobalFuncHelper; +## helper2(funcGlobalBarAdd, funcGlobalFooAdd, 3, 4) +## } +## Extra assembly removed. + + .globl fooAddFunc + .type fooAddFunc,@function +fooAddFunc: + addl -8(%rbp), %eax + retq + .size fooAddFunc, .-fooAddFunc + + .globl barAddFunc + .type barAddFunc,@function +barAddFunc: + addl -8(%rbp), %eax + retq + .size barAddFunc, .-barAddFunc + + .globl helperFunc + .type helperFunc,@function +helperFunc: + retq + .size helperFunc, .-helperFunc + + .globl main + .type main,@function +main: + movq localStaticVarBarAdd, %rdi + movq localStaticVarFooAdd, %rsi + callq helperFunc + retq + .size main, .-main + + .type localStaticVarBarAdd,@object # @localStaticVarBarAdd + .data +localStaticVarBarAdd: + .quad barAddFunc + .size localStaticVarBarAdd, 8 + + .type localStaticVarFooAdd,@object # @localStaticVarFooAdd +localStaticVarFooAdd: + .quad fooAddFunc + .size localStaticVarFooAdd, 8 diff --git a/bolt/test/X86/icf-safe-test1-no-relocs.test b/bolt/test/X86/icf-safe-test1-no-relocs.test new file mode 100644 index 0000000000000..b4e55a6d5504f --- /dev/null +++ b/bolt/test/X86/icf-safe-test1-no-relocs.test @@ -0,0 +1,20 @@ +## Check that BOLT reports an error for a binary with no relocations with the --icf=safe option. + +# REQUIRES: system-linux, asserts +# RUN: llvm-mc -filetype=obj -triple x86_64-unknown-linux %s -o %t1.o +# RUN: %clang %cflags %t1.o -o %t.exe +# RUN: not llvm-bolt --no-threads %t.exe --icf=safe -o %t.bolt 2>&1 | FileCheck --check-prefix=SAFEICFCHECK %s + +# SAFEICFCHECK: BOLT-ERROR: binary built without relocations. Safe ICF is not supported + +## int main(int argc, char **argv) { +## return temp; +## } + .globl main + .type main,@function +main: + .cfi_startproc + retq +.Lfunc_end8: + .size main, .-main + .cfi_endproc diff --git a/bolt/test/X86/icf-safe-test1.test b/bolt/test/X86/icf-safe-test1.test new file mode 100644 index 0000000000000..8a8e5ccf38e7c --- /dev/null +++ b/bolt/test/X86/icf-safe-test1.test @@ -0,0 +1,98 @@ +## Check that BOLT handles correctly folding functions with --icf=safe that can be referenced by non-control flow instructions. +## It invokes BOLT twice first testing CFG path, and second when functions have to be disassembled. + +# REQUIRES: system-linux, asserts +# RUN: llvm-mc -filetype=obj -triple x86_64-unknown-linux %s -o %t1.o +# RUN: %clang %cflags %t1.o -o %t.exe -Wl,-q +# RUN: llvm-bolt --no-threads %t.exe --icf -debug-only=bolt-icf \ +# RUN: -o %t.bolt 2>&1 | FileCheck --check-prefix=ICFCHECK %s +# RUN: llvm-bolt --no-threads %t.exe --icf=safe -debug-only=bolt-icf \ +# RUN: -o %t.bolt 2>&1 | FileCheck --check-prefix=SAFEICFCHECK %s +# RUN: llvm-bolt --no-threads %t.exe --icf=safe -debug-only=bolt-icf \ +# RUN: --skip-funcs=helper1Func,main -o %t.bolt 2>&1 | FileCheck --check-prefix=SAFEICFCHECKNOCFG %s + +# ICFCHECK: ICF iteration 1 +# ICFCHECK-NEXT: folding barAddFunc into fooAddFunc +# ICFCHECK-NEXT: folding barSubFunc into fooSubFunc + +# SAFEICFCHECK: skipping function with reference taken barAddFunc +# SAFEICFCHECK-NEXT: ICF iteration 1 +# SAFEICFCHECK-NEXT: folding barSubFunc into fooSubFunc +# SAFEICFCHECK-NEXT: ===--------- + +# SAFEICFCHECKNOCFG: skipping function with reference taken barAddFunc +# SAFEICFCHECKNOCFG-NEXT: ICF iteration 1 +# SAFEICFCHECKNOCFG-NEXT: folding barSubFunc into fooSubFunc +# SAFEICFCHECKNOCFG-NEXT: ===--------- + +## clang++ -c main.cpp -o main.o +## extern int FooVar; +## extern int BarVar; +## [[clang::noinline]] +## int fooSub(int a, int b) { +## return a - b; +## } +## [[clang::noinline]] +## int barSub(int a, int b) { +## return a - b; +## } +## [[clang::noinline]] +## int fooAdd(int a, int b) { +## return a + b; +## } +## [[clang::noinline]] +## int barAdd(int a, int b) { +## return a + b; +## } +## int main(int argc, char **argv) { +## int temp = helper1(barAdd, FooVar, BarVar) + +## fooSub(FooVar, BarVar) + +## barSub(FooVar, BarVar) + fooAdd(FooVar, BarVar); +## return temp; +## } + .globl fooSubFunc + .type fooSubFunc,@function +fooSubFunc: + subl -8(%rbp), %eax + retq + .size fooSubFunc, .-fooSubFunc + + .globl barSubFunc + .type barSubFunc,@function +barSubFunc: + subl -8(%rbp), %eax + retq + .size barSubFunc, .-barSubFunc + + .globl fooAddFunc + .type fooAddFunc,@function +fooAddFunc: + addl -8(%rbp), %eax + retq + .size fooAddFunc, .-fooAddFunc + + .globl barAddFunc + .type barAddFunc,@function +barAddFunc: + addl -8(%rbp), %eax + retq + .size barAddFunc, .-barAddFunc + + .globl helper1Func + .type helper1Func,@function +helper1Func: + leaq barAddFunc(%rip), %rax + cmpq %rax, -16(%rbp) + retq + .size helper1Func, .-helper1Func + + .globl main + .type main,@function +main: + leaq barAddFunc(%rip), %rdi + callq helper1Func + callq fooSubFunc + callq barSubFunc + callq fooAddFunc + retq + .size main, .-main diff --git a/bolt/test/X86/icf-safe-test2GlobalConstPtrNoPic.test b/bolt/test/X86/icf-safe-test2GlobalConstPtrNoPic.test new file mode 100644 index 0000000000000..ea2d8a5f11e06 --- /dev/null +++ b/bolt/test/X86/icf-safe-test2GlobalConstPtrNoPic.test @@ -0,0 +1,95 @@ +## Check that BOLT handles correctly folding functions with --icf=safe that can be referenced by non-control flow instructions, +## when binary is built with -fno-PIC/-fno-PIE. + +# REQUIRES: system-linux, asserts +# RUN: llvm-mc -filetype=obj -triple x86_64-unknown-linux %s -o %t1.o +# RUN: %clang %cflags %t1.o -o %t.exe -Wl,-q -no-pie +# RUN: llvm-bolt --no-threads %t.exe --icf -debug-only=bolt-icf -o %t.bolt 2>&1 | FileCheck --check-prefix=ICFCHECK %s +# RUN: llvm-bolt --no-threads %t.exe --icf=safe -debug-only=bolt-icf -o %t.bolt 2>&1 | FileCheck --check-prefix=SAFEICFCHECK %s + +# ICFCHECK: ICF iteration 1 +# ICFCHECK-NEXT: folding barAddFunc into fooAddFunc +# ICFCHECK-NEXT: folding barMulFunc into fooMulFunc + +# SAFEICFCHECK: skipping function with reference taken fooMulFunc +# SAFEICFCHECK-NEXT: skipping function with reference taken barMulFunc +# SAFEICFCHECK-NEXT: skipping function with reference taken barAddFunc +# SAFEICFCHECK-NEXT: ICF iteration 1 +# SAFEICFCHECK-NEXT: ===--------- + +## clang++ main.cpp -c -o -fno-PIC +## Similar code gets generated for external reference function. +## Other functions removed for brevity. +## const static int (*const funcGlobalBarAdd)(int, int) = barAdd; +## const int (*const funcGlobalBarMul)(int, int) = barMul; +## int main(int argc, char **argv) { +## int temp = helper1(funcGlobalBarAdd, FooVar, BarVar) +## return temp; +## } +## Manually modified to remove "extra" assembly. + .globl fooMulFunc + .type fooMulFunc,@function +fooMulFunc: + imull -8(%rbp), %eax + retq + .size fooMulFunc, .-fooMulFunc + + .globl barMulFunc + .type barMulFunc,@function +barMulFunc: + imull -8(%rbp), %eax + retq + .size barMulFunc, .-barMulFunc + + .globl fooAddFunc + .type fooAddFunc,@function +fooAddFunc: + addl -8(%rbp), %eax + retq + .size fooAddFunc, .-fooAddFunc + + .globl barAddFunc + .type barAddFunc,@function +barAddFunc: + addl -8(%rbp), %eax + retq + .size barAddFunc, .-barAddFunc + + .globl helperFunc + .type helperFunc,@function +helperFunc: + movabsq $barAddFunc, %rax + cmpq %rax, -16(%rbp) + retq + .size helperFunc, .-helperFunc + + .globl main + .type main,@function +main: + movl FooVar, %esi + movl BarVar, %edx + movabsq $barAddFunc, %rdi + callq helperFunc + movabsq $fooMulFunc, %rdi + movabsq $barMulFunc, %rsi + retq + .size main, .-main + + .type FooVar,@object + .data + .globl FooVar +FooVar: + .long 1 + .size FooVar, 4 + + .type BarVar,@object + .globl BarVar +BarVar: + .long 2 + .size BarVar, 4 + + .type .L.str,@object + .section .rodata.str1.1,"aMS",@progbits,1 +.L.str: + .asciz "val: %d\n" + .size .L.str, 9