diff --git a/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h b/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h index b001771951e0f..329491f2a0fe9 100644 --- a/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h +++ b/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h @@ -311,9 +311,11 @@ class FunctionSpecializer { /// @param FuncSize Cost of specializing a function. /// @param AllSpecs A vector to add potential specializations to. /// @param SM A map for a function's specialisation range + /// @param ConsiderLiterals Whether to specialize on literal constants /// @return True, if any potential specializations were found bool findSpecializations(Function *F, unsigned FuncSize, - SmallVectorImpl &AllSpecs, SpecMap &SM); + SmallVectorImpl &AllSpecs, SpecMap &SM, + bool ConsiderLiterals); /// Compute the inlining bonus for replacing argument \p A with constant \p C. unsigned getInliningBonus(Argument *A, Constant *C); @@ -328,7 +330,7 @@ class FunctionSpecializer { /// Determine if it is possible to specialise the function for constant values /// of the formal parameter \p A. - bool isArgumentInteresting(Argument *A); + bool isArgumentInteresting(Argument *A, bool ConsiderLiterals); /// Check if the value \p V (an actual argument) is a constant or can only /// have a constant value. Return that constant. diff --git a/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp b/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp index 548335d750e33..db0f06ec8ac17 100644 --- a/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp +++ b/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp @@ -663,7 +663,8 @@ bool FunctionSpecializer::run() { if (Inserted && Metrics.isRecursive) promoteConstantStackValues(&F); - if (!findSpecializations(&F, FuncSize, AllSpecs, SM)) { + bool ConsiderLiterals = SpecializeLiteralConstant || Metrics.isRecursive; + if (!findSpecializations(&F, FuncSize, AllSpecs, SM, ConsiderLiterals)) { LLVM_DEBUG( dbgs() << "FnSpecialization: No possible specializations found for " << F.getName() << "\n"); @@ -803,7 +804,8 @@ static Function *cloneCandidateFunction(Function *F, unsigned NSpecs) { bool FunctionSpecializer::findSpecializations(Function *F, unsigned FuncSize, SmallVectorImpl &AllSpecs, - SpecMap &SM) { + SpecMap &SM, + bool ConsiderLiterals) { // A mapping from a specialisation signature to the index of the respective // entry in the all specialisation array. Used to ensure uniqueness of // specialisations. @@ -812,7 +814,7 @@ bool FunctionSpecializer::findSpecializations(Function *F, unsigned FuncSize, // Get a list of interesting arguments. SmallVector Args; for (Argument &Arg : F->args()) - if (isArgumentInteresting(&Arg)) + if (isArgumentInteresting(&Arg, ConsiderLiterals)) Args.push_back(&Arg); if (Args.empty()) @@ -1032,14 +1034,16 @@ unsigned FunctionSpecializer::getInliningBonus(Argument *A, Constant *C) { /// Determine if it is possible to specialise the function for constant values /// of the formal parameter \p A. -bool FunctionSpecializer::isArgumentInteresting(Argument *A) { +bool FunctionSpecializer::isArgumentInteresting(Argument *A, + bool ConsiderLiterals) { // No point in specialization if the argument is unused. if (A->user_empty()) return false; Type *Ty = A->getType(); - if (!Ty->isPointerTy() && (!SpecializeLiteralConstant || - (!Ty->isIntegerTy() && !Ty->isFloatingPointTy() && !Ty->isStructTy()))) + if (!Ty->isPointerTy() && + (!ConsiderLiterals || + (!Ty->isIntegerTy() && !Ty->isFloatingPointTy() && !Ty->isStructTy()))) return false; // SCCP solver does not record an argument that will be constructed on diff --git a/llvm/test/Transforms/FunctionSpecialization/function-specialization2.ll b/llvm/test/Transforms/FunctionSpecialization/function-specialization2.ll index ef830a0e9a4a9..00319f1424d76 100644 --- a/llvm/test/Transforms/FunctionSpecialization/function-specialization2.ll +++ b/llvm/test/Transforms/FunctionSpecialization/function-specialization2.ll @@ -64,27 +64,27 @@ define i32 @main(ptr %0, i32 %1) { ; CHECK-LABEL: define i32 @main( ; CHECK-SAME: ptr [[TMP0:%.*]], i32 [[TMP1:%.*]]) { ; CHECK-NEXT: call void @func.specialized.2(ptr [[TMP0]], i32 [[TMP1]]) -; CHECK-NEXT: call void @func.specialized.1(ptr [[TMP0]], i32 0) +; CHECK-NEXT: call void @func.specialized.1(ptr [[TMP0]]) ; CHECK-NEXT: ret i32 0 ; ; ; CHECK-LABEL: define internal void @func.specialized.1( -; CHECK-SAME: ptr [[TMP0:%.*]], i32 [[TMP1:%.*]]) { +; CHECK-SAME: ptr [[TMP0:%.*]]) { ; CHECK-NEXT: [[TMP2:%.*]] = alloca i32, align 4 -; CHECK-NEXT: store i32 [[TMP1]], ptr [[TMP2]], align 4 +; CHECK-NEXT: store i32 0, ptr [[TMP2]], align 4 ; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 ; CHECK-NEXT: [[TMP4:%.*]] = icmp slt i32 [[TMP3]], 1 -; CHECK-NEXT: br i1 [[TMP4]], label %[[BB12:.*]], label %[[BB6:.*]] -; CHECK: [[BB6]]: +; CHECK-NEXT: br i1 [[TMP4]], label %[[BB11:.*]], label %[[BB5:.*]] +; CHECK: [[BB5]]: ; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP2]], align 4 ; CHECK-NEXT: [[TMP7:%.*]] = sext i32 [[TMP6]] to i64 ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 [[TMP7]] ; CHECK-NEXT: call void @decrement(ptr [[TMP8]]) ; CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP2]], align 4 ; CHECK-NEXT: [[TMP10:%.*]] = add nsw i32 [[TMP9]], -1 -; CHECK-NEXT: call void @func.specialized.1(ptr [[TMP0]], i32 [[TMP10]]) -; CHECK-NEXT: br label %[[BB12]] -; CHECK: [[BB12]]: +; CHECK-NEXT: call void @func.specialized.3(ptr [[TMP0]], i32 [[TMP10]]) +; CHECK-NEXT: br label %[[BB11]] +; CHECK: [[BB11]]: ; CHECK-NEXT: ret void ; ; @@ -108,6 +108,46 @@ define i32 @main(ptr %0, i32 %1) { ; CHECK-NEXT: ret void ; ; +; CHECK-LABEL: define internal void @func.specialized.3( +; CHECK-SAME: ptr [[TMP0:%.*]], i32 [[TMP1:%.*]]) { +; CHECK-NEXT: [[TMP3:%.*]] = alloca i32, align 4 +; CHECK-NEXT: store i32 [[TMP1]], ptr [[TMP3]], align 4 +; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +; CHECK-NEXT: [[TMP5:%.*]] = icmp slt i32 [[TMP4]], 1 +; CHECK-NEXT: br i1 [[TMP5]], label %[[BB12:.*]], label %[[BB6:.*]] +; CHECK: [[BB6]]: +; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP3]], align 4 +; CHECK-NEXT: [[TMP8:%.*]] = sext i32 [[TMP7]] to i64 +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 [[TMP8]] +; CHECK-NEXT: call void @decrement(ptr [[TMP9]]) +; CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP3]], align 4 +; CHECK-NEXT: [[TMP11:%.*]] = add nsw i32 [[TMP10]], -1 +; CHECK-NEXT: call void @func.specialized.3(ptr [[TMP0]], i32 [[TMP11]]) +; CHECK-NEXT: br label %[[BB12]] +; CHECK: [[BB12]]: +; CHECK-NEXT: ret void +; +; +; ONE-ITER-LABEL: define internal void @func( +; ONE-ITER-SAME: ptr [[TMP0:%.*]], i32 [[TMP1:%.*]], ptr nocapture [[TMP2:%.*]]) { +; ONE-ITER-NEXT: [[TMP4:%.*]] = alloca i32, align 4 +; ONE-ITER-NEXT: store i32 [[TMP1]], ptr [[TMP4]], align 4 +; ONE-ITER-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 +; ONE-ITER-NEXT: [[TMP6:%.*]] = icmp slt i32 [[TMP5]], 1 +; ONE-ITER-NEXT: br i1 [[TMP6]], label %[[BB13:.*]], label %[[BB7:.*]] +; ONE-ITER: [[BB7]]: +; ONE-ITER-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP4]], align 4 +; ONE-ITER-NEXT: [[TMP9:%.*]] = sext i32 [[TMP8]] to i64 +; ONE-ITER-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 [[TMP9]] +; ONE-ITER-NEXT: call void [[TMP2]](ptr [[TMP10]]) +; ONE-ITER-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP4]], align 4 +; ONE-ITER-NEXT: [[TMP12:%.*]] = add nsw i32 [[TMP11]], -1 +; ONE-ITER-NEXT: call void @func(ptr [[TMP0]], i32 [[TMP12]], ptr [[TMP2]]) +; ONE-ITER-NEXT: br label %[[BB13]] +; ONE-ITER: [[BB13]]: +; ONE-ITER-NEXT: ret void +; +; ; ONE-ITER-LABEL: define internal void @increment( ; ONE-ITER-SAME: ptr nocapture [[TMP0:%.*]]) { ; ONE-ITER-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP0]], align 4 @@ -127,27 +167,27 @@ define i32 @main(ptr %0, i32 %1) { ; ONE-ITER-LABEL: define i32 @main( ; ONE-ITER-SAME: ptr [[TMP0:%.*]], i32 [[TMP1:%.*]]) { ; ONE-ITER-NEXT: call void @func.specialized.2(ptr [[TMP0]], i32 [[TMP1]]) -; ONE-ITER-NEXT: call void @func.specialized.1(ptr [[TMP0]], i32 0) +; ONE-ITER-NEXT: call void @func.specialized.1(ptr [[TMP0]]) ; ONE-ITER-NEXT: ret i32 0 ; ; ; ONE-ITER-LABEL: define internal void @func.specialized.1( -; ONE-ITER-SAME: ptr [[TMP0:%.*]], i32 [[TMP1:%.*]]) { +; ONE-ITER-SAME: ptr [[TMP0:%.*]]) { ; ONE-ITER-NEXT: [[TMP2:%.*]] = alloca i32, align 4 -; ONE-ITER-NEXT: store i32 [[TMP1]], ptr [[TMP2]], align 4 +; ONE-ITER-NEXT: store i32 0, ptr [[TMP2]], align 4 ; ONE-ITER-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 ; ONE-ITER-NEXT: [[TMP4:%.*]] = icmp slt i32 [[TMP3]], 1 -; ONE-ITER-NEXT: br i1 [[TMP4]], label %[[BB12:.*]], label %[[BB6:.*]] -; ONE-ITER: [[BB6]]: +; ONE-ITER-NEXT: br i1 [[TMP4]], label %[[BB11:.*]], label %[[BB5:.*]] +; ONE-ITER: [[BB5]]: ; ONE-ITER-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP2]], align 4 ; ONE-ITER-NEXT: [[TMP7:%.*]] = sext i32 [[TMP6]] to i64 ; ONE-ITER-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 [[TMP7]] ; ONE-ITER-NEXT: call void @decrement(ptr [[TMP8]]) ; ONE-ITER-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP2]], align 4 ; ONE-ITER-NEXT: [[TMP10:%.*]] = add nsw i32 [[TMP9]], -1 -; ONE-ITER-NEXT: call void @func.specialized.1(ptr [[TMP0]], i32 [[TMP10]]) -; ONE-ITER-NEXT: br label %[[BB12]] -; ONE-ITER: [[BB12]]: +; ONE-ITER-NEXT: call void @func(ptr [[TMP0]], i32 [[TMP10]], ptr @decrement) +; ONE-ITER-NEXT: br label %[[BB11]] +; ONE-ITER: [[BB11]]: ; ONE-ITER-NEXT: ret void ; ; diff --git a/llvm/test/Transforms/FunctionSpecialization/identical-specializations.ll b/llvm/test/Transforms/FunctionSpecialization/identical-specializations.ll index 930ed6627f7f1..e7e2fffac5d6e 100644 --- a/llvm/test/Transforms/FunctionSpecialization/identical-specializations.ll +++ b/llvm/test/Transforms/FunctionSpecialization/identical-specializations.ll @@ -51,7 +51,7 @@ entry: ; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: br i1 [[FLAG]], label %[[PLUS:.*]], label %[[MINUS:.*]] ; CHECK: [[PLUS]]: -; CHECK-NEXT: [[CMP0:%.*]] = call i64 @compute.specialized.2(i64 [[X]], i64 [[Y]], ptr @plus, ptr @minus) +; CHECK-NEXT: [[CMP0:%.*]] = call i64 @compute.specialized.1(i64 [[X]], i64 [[Y]], ptr @plus, ptr @minus) ; CHECK-NEXT: br label %[[MERGE:.*]] ; CHECK: [[MINUS]]: ; CHECK-NEXT: [[CMP1:%.*]] = call i64 @compute.specialized.3(i64 [[X]], i64 [[Y]], ptr @minus, ptr @plus) @@ -79,10 +79,10 @@ entry: ; CHECK-LABEL: define internal i64 @compute.specialized.1( ; CHECK-SAME: i64 [[X:%.*]], i64 [[Y:%.*]], ptr [[BINOP1:%.*]], ptr [[BINOP2:%.*]]) { ; CHECK-NEXT: [[ENTRY:.*:]] -; CHECK-NEXT: [[OP1:%.*]] = call i64 [[BINOP1]](i64 [[X]], i64 [[Y]]) ; CHECK-NEXT: [[OP0:%.*]] = call i64 @plus(i64 [[X]], i64 [[Y]]) -; CHECK-NEXT: [[OP2:%.*]] = call i64 @compute.specialized.1(i64 [[X]], i64 [[Y]], ptr [[BINOP1]], ptr @plus) -; CHECK-NEXT: [[ADD0:%.*]] = add i64 [[OP1]], [[OP0]] +; CHECK-NEXT: [[OP1:%.*]] = call i64 @minus(i64 [[X]], i64 [[Y]]) +; CHECK-NEXT: [[OP2:%.*]] = call i64 @compute.specialized.5(i64 [[X]], i64 [[Y]], ptr @plus, ptr @plus) +; CHECK-NEXT: [[ADD0:%.*]] = add i64 [[OP0]], [[OP1]] ; CHECK-NEXT: [[ADD1:%.*]] = add i64 [[ADD0]], [[OP2]] ; CHECK-NEXT: [[DIV:%.*]] = sdiv i64 [[ADD1]], [[X]] ; CHECK-NEXT: [[SUB:%.*]] = sub i64 [[DIV]], [[Y]] @@ -93,13 +93,13 @@ entry: ; CHECK-LABEL: define internal i64 @compute.specialized.2( ; CHECK-SAME: i64 [[X:%.*]], i64 [[Y:%.*]], ptr [[BINOP1:%.*]], ptr [[BINOP2:%.*]]) { ; CHECK-NEXT: [[ENTRY:.*:]] -; CHECK-NEXT: [[OP0:%.*]] = call i64 @plus(i64 [[X]], i64 [[Y]]) -; CHECK-NEXT: [[OP1:%.*]] = call i64 @minus(i64 [[X]], i64 [[Y]]) -; CHECK-NEXT: [[OP2:%.*]] = call i64 @compute.specialized.1(i64 [[X]], i64 [[Y]], ptr @plus, ptr @plus) +; CHECK-NEXT: [[OP0:%.*]] = call i64 @plus(i64 [[X]], i64 42) +; CHECK-NEXT: [[OP1:%.*]] = call i64 @minus(i64 [[X]], i64 42) +; CHECK-NEXT: [[OP2:%.*]] = call i64 @compute.specialized.4(i64 [[X]], i64 42, ptr @plus, ptr @plus) ; CHECK-NEXT: [[ADD0:%.*]] = add i64 [[OP0]], [[OP1]] ; CHECK-NEXT: [[ADD1:%.*]] = add i64 [[ADD0]], [[OP2]] ; CHECK-NEXT: [[DIV:%.*]] = sdiv i64 [[ADD1]], [[X]] -; CHECK-NEXT: [[SUB:%.*]] = sub i64 [[DIV]], [[Y]] +; CHECK-NEXT: [[SUB:%.*]] = sub i64 [[DIV]], 42 ; CHECK-NEXT: [[MUL:%.*]] = mul i64 [[SUB]], 2 ; CHECK-NEXT: ret i64 [[MUL]] ; @@ -117,3 +117,45 @@ entry: ; CHECK-NEXT: [[MUL:%.*]] = mul i64 [[SUB]], 2 ; CHECK-NEXT: ret i64 [[MUL]] ; +; +; CHECK-LABEL: define internal i64 @compute.specialized.4( +; CHECK-SAME: i64 [[X:%.*]], i64 [[Y:%.*]], ptr [[BINOP1:%.*]], ptr [[BINOP2:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[OP0:%.*]] = call i64 @plus(i64 [[X]], i64 42) +; CHECK-NEXT: [[OP1:%.*]] = call i64 @plus(i64 [[X]], i64 42) +; CHECK-NEXT: [[OP2:%.*]] = call i64 @compute.specialized.4(i64 [[X]], i64 42, ptr @plus, ptr @plus) +; CHECK-NEXT: [[ADD0:%.*]] = add i64 [[OP0]], [[OP1]] +; CHECK-NEXT: [[ADD1:%.*]] = add i64 [[ADD0]], [[OP2]] +; CHECK-NEXT: [[DIV:%.*]] = sdiv i64 [[ADD1]], [[X]] +; CHECK-NEXT: [[SUB:%.*]] = sub i64 [[DIV]], 42 +; CHECK-NEXT: [[MUL:%.*]] = mul i64 [[SUB]], 2 +; CHECK-NEXT: ret i64 [[MUL]] +; +; +; CHECK-LABEL: define internal i64 @compute.specialized.5( +; CHECK-SAME: i64 [[X:%.*]], i64 [[Y:%.*]], ptr [[BINOP1:%.*]], ptr [[BINOP2:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[OP0:%.*]] = call i64 @plus(i64 [[X]], i64 [[Y]]) +; CHECK-NEXT: [[OP1:%.*]] = call i64 @plus(i64 [[X]], i64 [[Y]]) +; CHECK-NEXT: [[OP2:%.*]] = call i64 @compute.specialized.5(i64 [[X]], i64 [[Y]], ptr @plus, ptr @plus) +; CHECK-NEXT: [[ADD0:%.*]] = add i64 [[OP0]], [[OP1]] +; CHECK-NEXT: [[ADD1:%.*]] = add i64 [[ADD0]], [[OP2]] +; CHECK-NEXT: [[DIV:%.*]] = sdiv i64 [[ADD1]], [[X]] +; CHECK-NEXT: [[SUB:%.*]] = sub i64 [[DIV]], [[Y]] +; CHECK-NEXT: [[MUL:%.*]] = mul i64 [[SUB]], 2 +; CHECK-NEXT: ret i64 [[MUL]] +; +; +; CHECK-LABEL: define internal i64 @compute.specialized.6( +; CHECK-SAME: i64 [[X:%.*]], i64 [[Y:%.*]], ptr [[BINOP1:%.*]], ptr [[BINOP2:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[OP0:%.*]] = call i64 [[BINOP1]](i64 [[X]], i64 [[Y]]) +; CHECK-NEXT: [[OP1:%.*]] = call i64 @plus(i64 [[X]], i64 [[Y]]) +; CHECK-NEXT: [[OP2:%.*]] = call i64 @compute.specialized.6(i64 [[X]], i64 [[Y]], ptr [[BINOP1]], ptr @plus) +; CHECK-NEXT: [[ADD0:%.*]] = add i64 [[OP0]], [[OP1]] +; CHECK-NEXT: [[ADD1:%.*]] = add i64 [[ADD0]], [[OP2]] +; CHECK-NEXT: [[DIV:%.*]] = sdiv i64 [[ADD1]], [[X]] +; CHECK-NEXT: [[SUB:%.*]] = sub i64 [[DIV]], [[Y]] +; CHECK-NEXT: [[MUL:%.*]] = mul i64 [[SUB]], 2 +; CHECK-NEXT: ret i64 [[MUL]] +;