diff --git a/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h b/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h index 5920dde9d77df..f20b01c186306 100644 --- a/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h +++ b/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h @@ -64,6 +64,13 @@ // - Perhaps a post-inlining function specialization pass could be more // aggressive on literal constants. // +// Limitations: +// ------------ +// - We are unable to consider specializations of functions called from indirect +// callsites whose pointer operand has a lattice value that is known to be +// constant, either from IPSCCP or previous iterations of FuncSpec. This is +// because SCCP has not yet replaced the uses of the known constant. +// // References: // ----------- // 2021 LLVM Dev Mtg “Introducing function specialisation, and can we enable diff --git a/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp b/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp index 20249a20a37e4..382d3af3d0b2c 100644 --- a/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp +++ b/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp @@ -84,14 +84,11 @@ static cl::opt SpecializeOnAddress( "funcspec-on-address", cl::init(false), cl::Hidden, cl::desc( "Enable function specialization on the address of global values")); -// Disabled by default as it can significantly increase compilation times. -// -// https://llvm-compile-time-tracker.com -// https://github.com/nikic/llvm-compile-time-tracker static cl::opt SpecializeLiteralConstant( - "funcspec-for-literal-constant", cl::init(false), cl::Hidden, cl::desc( - "Enable specialization of functions that take a literal constant as an " - "argument")); + "funcspec-for-literal-constant", cl::init(true), cl::Hidden, + cl::desc( + "Enable specialization of functions that take a literal constant as an " + "argument")); bool InstCostVisitor::canEliminateSuccessor(BasicBlock *BB, BasicBlock *Succ, DenseSet &DeadBlocks) { @@ -682,10 +679,11 @@ bool FunctionSpecializer::run() { (RequireMinSize && Metrics.NumInsts < MinFunctionSize)) continue; - // TODO: For now only consider recursive functions when running multiple - // times. This should change if specialization on literal constants gets - // enabled. - if (!Inserted && !Metrics.isRecursive && !SpecializeLiteralConstant) + // When specialization on literal constants is disabled, only consider + // recursive functions when running multiple times to save wasted analysis, + // as we will not be able to specialize on any newly found literal constant + // return values. + if (!SpecializeLiteralConstant && !Inserted && !Metrics.isRecursive) continue; int64_t Sz = *Metrics.NumInsts.getValue(); diff --git a/llvm/test/DebugInfo/Generic/ipsccp-remap-assign-id.ll b/llvm/test/DebugInfo/Generic/ipsccp-remap-assign-id.ll index 0e8f92cacf66d..42560fc3958d1 100644 --- a/llvm/test/DebugInfo/Generic/ipsccp-remap-assign-id.ll +++ b/llvm/test/DebugInfo/Generic/ipsccp-remap-assign-id.ll @@ -1,5 +1,5 @@ -; RUN: opt -passes=ipsccp %s -S -o - | FileCheck %s -; RUN: opt --try-experimental-debuginfo-iterators -passes=ipsccp %s -S -o - | FileCheck %s +; RUN: opt -passes=ipsccp -funcspec-for-literal-constant=false %s -S -o - | FileCheck %s +; RUN: opt --try-experimental-debuginfo-iterators -passes=ipsccp -funcspec-for-literal-constant=false %s -S -o - | FileCheck %s ;; Check the dbg.assign DIAssignID operand gets remapped after cloning. diff --git a/llvm/test/Transforms/FunctionSpecialization/compiler-crash-58759.ll b/llvm/test/Transforms/FunctionSpecialization/compiler-crash-58759.ll index f29cf0d123939..7291d83b81611 100644 --- a/llvm/test/Transforms/FunctionSpecialization/compiler-crash-58759.ll +++ b/llvm/test/Transforms/FunctionSpecialization/compiler-crash-58759.ll @@ -1,4 +1,4 @@ -; RUN: opt -S --passes="default" < %s | FileCheck %s +; RUN: opt -S --passes="default" -funcspec-for-literal-constant=false < %s | FileCheck %s define dso_local i32 @g0(i32 noundef %x) local_unnamed_addr { entry: diff --git a/llvm/test/Transforms/FunctionSpecialization/function-specialization-constant-expression.ll b/llvm/test/Transforms/FunctionSpecialization/function-specialization-constant-expression.ll index 16a4685116312..0c24169d02c2c 100644 --- a/llvm/test/Transforms/FunctionSpecialization/function-specialization-constant-expression.ll +++ b/llvm/test/Transforms/FunctionSpecialization/function-specialization-constant-expression.ll @@ -4,7 +4,7 @@ ; Note that this test case shows that function specialization pass would ; transform the function even if no specialization happened. -; RUN: opt -passes="ipsccp" -force-specialization -S < %s | FileCheck %s +; RUN: opt -passes="ipsccp" -force-specialization -funcspec-for-literal-constant=false -S < %s | FileCheck %s %struct = type { i8, i16, i32, i64, i64} @Global = internal constant %struct {i8 0, i16 1, i32 2, i64 3, i64 4} diff --git a/llvm/test/Transforms/FunctionSpecialization/function-specialization2.ll b/llvm/test/Transforms/FunctionSpecialization/function-specialization2.ll index ef830a0e9a4a9..6f36a394979d8 100644 --- a/llvm/test/Transforms/FunctionSpecialization/function-specialization2.ll +++ b/llvm/test/Transforms/FunctionSpecialization/function-specialization2.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --include-generated-funcs --version 5 -; RUN: opt -passes="ipsccp,deadargelim" -force-specialization -S < %s | FileCheck %s -; RUN: opt -passes="ipsccp,deadargelim" -funcspec-max-iters=1 -force-specialization -S < %s | FileCheck %s --check-prefix=ONE-ITER -; RUN: opt -passes="ipsccp,deadargelim" -funcspec-max-iters=0 -force-specialization -S < %s | FileCheck %s --check-prefix=DISABLED +; RUN: opt -passes="ipsccp,deadargelim" -funcspec-for-literal-constant=false -force-specialization -S < %s | FileCheck %s +; RUN: opt -passes="ipsccp,deadargelim" -funcspec-for-literal-constant=false -funcspec-max-iters=1 -force-specialization -S < %s | FileCheck %s --check-prefix=ONE-ITER +; RUN: opt -passes="ipsccp,deadargelim" -funcspec-for-literal-constant=false -funcspec-max-iters=0 -force-specialization -S < %s | FileCheck %s --check-prefix=DISABLED define internal i32 @func(ptr %0, i32 %1, ptr nocapture %2) { diff --git a/llvm/test/Transforms/FunctionSpecialization/function-specialization4.ll b/llvm/test/Transforms/FunctionSpecialization/function-specialization4.ll index 4e5a196d66829..a6a990c341593 100644 --- a/llvm/test/Transforms/FunctionSpecialization/function-specialization4.ll +++ b/llvm/test/Transforms/FunctionSpecialization/function-specialization4.ll @@ -1,8 +1,10 @@ ; RUN: opt -passes="ipsccp" -force-specialization \ -; RUN: -funcspec-max-clones=2 -S < %s | FileCheck %s +; RUN: -funcspec-for-literal-constant=false -funcspec-max-clones=2 \ +; RUN: -S < %s | FileCheck %s ; RUN: opt -passes="ipsccp" -force-specialization \ -; RUN: -funcspec-max-clones=1 -S < %s | FileCheck %s --check-prefix=CONST1 +; RUN: -funcspec-for-literal-constant=false -funcspec-max-clones=1 \ +; RUN: -S < %s | FileCheck %s --check-prefix=CONST1 target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" diff --git a/llvm/test/Transforms/FunctionSpecialization/get-possible-constants.ll b/llvm/test/Transforms/FunctionSpecialization/get-possible-constants.ll index dfa1e5a42776a..2f42125d8cf97 100644 --- a/llvm/test/Transforms/FunctionSpecialization/get-possible-constants.ll +++ b/llvm/test/Transforms/FunctionSpecialization/get-possible-constants.ll @@ -1,4 +1,4 @@ -; RUN: opt -S --passes="ipsccp" < %s | FileCheck %s +; RUN: opt -S --passes="ipsccp" -funcspec-for-literal-constant=false < %s | FileCheck %s define dso_local i32 @p0(i32 noundef %x) { entry: %add = add nsw i32 %x, 1 diff --git a/llvm/test/Transforms/FunctionSpecialization/global-rank.ll b/llvm/test/Transforms/FunctionSpecialization/global-rank.ll index 1926e29ddee01..06185332f22e0 100644 --- a/llvm/test/Transforms/FunctionSpecialization/global-rank.ll +++ b/llvm/test/Transforms/FunctionSpecialization/global-rank.ll @@ -1,4 +1,4 @@ -; RUN: opt -S --passes="ipsccp" -funcspec-max-clones=1 < %s | FileCheck %s +; RUN: opt -S --passes="ipsccp" -funcspec-for-literal-constant=false -funcspec-max-clones=1 < %s | FileCheck %s define internal i32 @f(i32 noundef %x, ptr nocapture noundef readonly %p, ptr nocapture noundef readonly %q) noinline { entry: %call = tail call i32 %p(i32 noundef %x) diff --git a/llvm/test/Transforms/FunctionSpecialization/identical-specializations.ll b/llvm/test/Transforms/FunctionSpecialization/identical-specializations.ll index 930ed6627f7f1..97d77971a92d3 100644 --- a/llvm/test/Transforms/FunctionSpecialization/identical-specializations.ll +++ b/llvm/test/Transforms/FunctionSpecialization/identical-specializations.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --include-generated-funcs --version 5 -; RUN: opt -passes="ipsccp" -force-specialization -S < %s | FileCheck %s +; RUN: opt -passes="ipsccp" -force-specialization -funcspec-for-literal-constant=false -S < %s | FileCheck %s define i64 @main(i64 %x, i64 %y, i1 %flag) { entry: diff --git a/llvm/test/Transforms/FunctionSpecialization/literal-const.ll b/llvm/test/Transforms/FunctionSpecialization/literal-const.ll index 3eae3dc261fb2..7d5e506064af7 100644 --- a/llvm/test/Transforms/FunctionSpecialization/literal-const.ll +++ b/llvm/test/Transforms/FunctionSpecialization/literal-const.ll @@ -1,7 +1,7 @@ ; RUN: opt -S --passes="ipsccp" \ +; RUN: -funcspec-for-literal-constant=false \ ; RUN: -force-specialization < %s | FileCheck %s -check-prefix CHECK-NOLIT ; RUN: opt -S --passes="ipsccp" \ -; RUN: -funcspec-for-literal-constant \ ; RUN: -force-specialization < %s | FileCheck %s -check-prefix CHECK-LIT define i32 @f0(i32 noundef %x) { diff --git a/llvm/test/Transforms/FunctionSpecialization/specialize-multiple-arguments.ll b/llvm/test/Transforms/FunctionSpecialization/specialize-multiple-arguments.ll index a653760abb2cc..73291600edb85 100644 --- a/llvm/test/Transforms/FunctionSpecialization/specialize-multiple-arguments.ll +++ b/llvm/test/Transforms/FunctionSpecialization/specialize-multiple-arguments.ll @@ -1,8 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -passes="ipsccp" -funcspec-max-clones=0 -funcspec-min-function-size=14 -S < %s | FileCheck %s --check-prefix=NONE -; RUN: opt -passes="ipsccp" -funcspec-max-clones=1 -funcspec-min-function-size=14 -S < %s | FileCheck %s --check-prefix=ONE -; RUN: opt -passes="ipsccp" -funcspec-max-clones=2 -funcspec-min-function-size=14 -S < %s | FileCheck %s --check-prefix=TWO -; RUN: opt -passes="ipsccp" -funcspec-max-clones=3 -funcspec-min-function-size=14 -S < %s | FileCheck %s --check-prefix=THREE +; RUN: opt -passes="ipsccp" -funcspec-for-literal-constant=false -funcspec-max-clones=0 -funcspec-min-function-size=14 -S < %s | FileCheck %s --check-prefix=NONE +; RUN: opt -passes="ipsccp" -funcspec-for-literal-constant=false -funcspec-max-clones=1 -funcspec-min-function-size=14 -S < %s | FileCheck %s --check-prefix=ONE +; RUN: opt -passes="ipsccp" -funcspec-for-literal-constant=false -funcspec-max-clones=2 -funcspec-min-function-size=14 -S < %s | FileCheck %s --check-prefix=TWO +; RUN: opt -passes="ipsccp" -funcspec-for-literal-constant=false -funcspec-max-clones=3 -funcspec-min-function-size=14 -S < %s | FileCheck %s --check-prefix=THREE ; Make sure that we iterate correctly after sorting the specializations: ; FnSpecialization: Specializations for function compute diff --git a/llvm/test/Transforms/FunctionSpecialization/track-ptr-return.ll b/llvm/test/Transforms/FunctionSpecialization/track-ptr-return.ll new file mode 100644 index 0000000000000..f4ba0e72a1b43 --- /dev/null +++ b/llvm/test/Transforms/FunctionSpecialization/track-ptr-return.ll @@ -0,0 +1,147 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --include-generated-funcs --version 5 +; RUN: opt -passes="ipsccp" -force-specialization \ +; RUN: -funcspec-max-iters=3 -S < %s | FileCheck %s + +; RUN: opt -passes="ipsccp" -force-specialization \ +; RUN: -funcspec-for-literal-constant=false -funcspec-max-iters=3 \ +; RUN: -S < %s | FileCheck %s --check-prefix=NOLIT + +@global_true = constant i1 true +@global_false = constant i1 false + +define i64 @main() { +entry: + %op1 = call ptr @select_op(ptr @global_true) + %op2 = call ptr @select_op(ptr @global_false) + + %c1 = call i64 @compute(ptr %op1) + %c2 = call i64 @compute(ptr %op2) + %add = add i64 %c1, %c2 + ret i64 %add +} + +define ptr @select_op(ptr %flag) { + %flag.val = load i1, ptr %flag + %op = select i1 %flag.val, ptr @plus, ptr @minus + ret ptr %op +} + +define internal i64 @compute(ptr %op) { +entry: + %res = call i64 %op(i64 1) + ret i64 %res +} + +define internal i64 @plus(i64 %x) { +entry: + %sum = add i64 %x, 1 + ret i64 %sum +} + +define internal i64 @minus(i64 %x) { +entry: + %diff = sub i64 %x, 1 + ret i64 %diff +} +; CHECK-LABEL: define i64 @main() { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[OP1:%.*]] = call ptr @select_op.specialized.1(ptr @global_true) +; CHECK-NEXT: [[OP2:%.*]] = call ptr @select_op.specialized.2(ptr @global_false) +; CHECK-NEXT: [[C1:%.*]] = call i64 @compute.specialized.3(ptr @plus) +; CHECK-NEXT: [[C2:%.*]] = call i64 @compute.specialized.4(ptr @minus) +; CHECK-NEXT: [[ADD:%.*]] = add i64 [[C1]], [[C2]] +; CHECK-NEXT: ret i64 [[ADD]] +; +; +; CHECK-LABEL: define ptr @select_op( +; CHECK-SAME: ptr [[FLAG:%.*]]) { +; CHECK-NEXT: [[FLAG_VAL:%.*]] = load i1, ptr [[FLAG]], align 1 +; CHECK-NEXT: [[OP:%.*]] = select i1 [[FLAG_VAL]], ptr @plus, ptr @minus +; CHECK-NEXT: ret ptr [[OP]] +; +; +; CHECK-LABEL: define internal i64 @plus( +; CHECK-SAME: i64 [[X:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[SUM:%.*]] = add i64 [[X]], 1 +; CHECK-NEXT: ret i64 [[SUM]] +; +; +; CHECK-LABEL: define internal i64 @minus( +; CHECK-SAME: i64 [[X:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[DIFF:%.*]] = sub i64 [[X]], 1 +; CHECK-NEXT: ret i64 [[DIFF]] +; +; +; CHECK-LABEL: define internal ptr @select_op.specialized.1( +; CHECK-SAME: ptr [[FLAG:%.*]]) { +; CHECK-NEXT: ret ptr poison +; +; +; CHECK-LABEL: define internal ptr @select_op.specialized.2( +; CHECK-SAME: ptr [[FLAG:%.*]]) { +; CHECK-NEXT: ret ptr poison +; +; +; CHECK-LABEL: define internal i64 @compute.specialized.3( +; CHECK-SAME: ptr [[OP:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[RES:%.*]] = call i64 @plus(i64 1) +; CHECK-NEXT: ret i64 [[RES]] +; +; +; CHECK-LABEL: define internal i64 @compute.specialized.4( +; CHECK-SAME: ptr [[OP:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[RES:%.*]] = call i64 @minus(i64 1) +; CHECK-NEXT: ret i64 [[RES]] +; +; +; NOLIT-LABEL: define i64 @main() { +; NOLIT-NEXT: [[ENTRY:.*:]] +; NOLIT-NEXT: [[OP1:%.*]] = call ptr @select_op.specialized.1(ptr @global_true) +; NOLIT-NEXT: [[OP2:%.*]] = call ptr @select_op.specialized.2(ptr @global_false) +; NOLIT-NEXT: [[C1:%.*]] = call i64 @compute(ptr @plus) +; NOLIT-NEXT: [[C2:%.*]] = call i64 @compute(ptr @minus) +; NOLIT-NEXT: [[ADD:%.*]] = add i64 [[C1]], [[C2]] +; NOLIT-NEXT: ret i64 [[ADD]] +; +; +; NOLIT-LABEL: define ptr @select_op( +; NOLIT-SAME: ptr [[FLAG:%.*]]) { +; NOLIT-NEXT: [[FLAG_VAL:%.*]] = load i1, ptr [[FLAG]], align 1 +; NOLIT-NEXT: [[OP:%.*]] = select i1 [[FLAG_VAL]], ptr @plus, ptr @minus +; NOLIT-NEXT: ret ptr [[OP]] +; +; +; NOLIT-LABEL: define internal i64 @compute( +; NOLIT-SAME: ptr [[OP:%.*]]) { +; NOLIT-NEXT: [[ENTRY:.*:]] +; NOLIT-NEXT: [[RES:%.*]] = call i64 [[OP]](i64 1) +; NOLIT-NEXT: ret i64 [[RES]] +; +; +; NOLIT-LABEL: define internal i64 @plus( +; NOLIT-SAME: i64 [[X:%.*]]) { +; NOLIT-NEXT: [[ENTRY:.*:]] +; NOLIT-NEXT: [[SUM:%.*]] = add i64 [[X]], 1 +; NOLIT-NEXT: ret i64 [[SUM]] +; +; +; NOLIT-LABEL: define internal i64 @minus( +; NOLIT-SAME: i64 [[X:%.*]]) { +; NOLIT-NEXT: [[ENTRY:.*:]] +; NOLIT-NEXT: [[DIFF:%.*]] = sub i64 [[X]], 1 +; NOLIT-NEXT: ret i64 [[DIFF]] +; +; +; NOLIT-LABEL: define internal ptr @select_op.specialized.1( +; NOLIT-SAME: ptr [[FLAG:%.*]]) { +; NOLIT-NEXT: ret ptr poison +; +; +; NOLIT-LABEL: define internal ptr @select_op.specialized.2( +; NOLIT-SAME: ptr [[FLAG:%.*]]) { +; NOLIT-NEXT: ret ptr poison +;