diff --git a/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp b/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp index ece22428e3cbd..3229ec4f4d0f5 100644 --- a/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp +++ b/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp @@ -93,6 +93,7 @@ STATISTIC(NumNonNull, "Number of function pointer arguments marked non-null"); STATISTIC(NumMinMax, "Number of llvm.[us]{min,max} intrinsics removed"); STATISTIC(NumUDivURemsNarrowedExpanded, "Number of bound udiv's/urem's expanded"); +STATISTIC(NumZExt, "Number of non-negative deductions"); static bool processSelect(SelectInst *S, LazyValueInfo *LVI) { if (S->getType()->isVectorTy() || isa(S->getCondition())) @@ -1032,6 +1033,24 @@ static bool processSExt(SExtInst *SDI, LazyValueInfo *LVI) { return true; } +static bool processZExt(ZExtInst *ZExt, LazyValueInfo *LVI) { + if (ZExt->getType()->isVectorTy()) + return false; + + if (ZExt->hasNonNeg()) + return false; + + const Use &Base = ZExt->getOperandUse(0); + if (!LVI->getConstantRangeAtUse(Base, /*UndefAllowed*/ false) + .isAllNonNegative()) + return false; + + ++NumZExt; + ZExt->setNonNeg(); + + return true; +} + static bool processBinOp(BinaryOperator *BinOp, LazyValueInfo *LVI) { using OBO = OverflowingBinaryOperator; @@ -1162,6 +1181,9 @@ static bool runImpl(Function &F, LazyValueInfo *LVI, DominatorTree *DT, case Instruction::SExt: BBChanged |= processSExt(cast(&II), LVI); break; + case Instruction::ZExt: + BBChanged |= processZExt(cast(&II), LVI); + break; case Instruction::Add: case Instruction::Sub: case Instruction::Mul: diff --git a/llvm/test/Transforms/CorrelatedValuePropagation/zext.ll b/llvm/test/Transforms/CorrelatedValuePropagation/zext.ll new file mode 100644 index 0000000000000..44434c696fe37 --- /dev/null +++ b/llvm/test/Transforms/CorrelatedValuePropagation/zext.ll @@ -0,0 +1,173 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -passes=correlated-propagation -S | FileCheck %s + +declare void @use64(i64) + +define void @test1(i32 %n) { +; CHECK-LABEL: @test1( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[FOR_COND:%.*]] +; CHECK: for.cond: +; CHECK-NEXT: [[A:%.*]] = phi i32 [ [[N:%.*]], [[ENTRY:%.*]] ], [ [[EXT:%.*]], [[FOR_BODY:%.*]] ] +; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[A]], -1 +; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[EXT_WIDE:%.*]] = zext nneg i32 [[A]] to i64 +; CHECK-NEXT: call void @use64(i64 [[EXT_WIDE]]) +; CHECK-NEXT: [[EXT]] = trunc i64 [[EXT_WIDE]] to i32 +; CHECK-NEXT: br label [[FOR_COND]] +; CHECK: for.end: +; CHECK-NEXT: ret void +; +entry: + br label %for.cond + +for.cond: ; preds = %for.body, %entry + %a = phi i32 [ %n, %entry ], [ %ext, %for.body ] + %cmp = icmp sgt i32 %a, -1 + br i1 %cmp, label %for.body, label %for.end + +for.body: ; preds = %for.cond + %ext.wide = zext i32 %a to i64 + call void @use64(i64 %ext.wide) + %ext = trunc i64 %ext.wide to i32 + br label %for.cond + +for.end: ; preds = %for.cond + ret void +} + +;; Negative test to show transform doesn't happen unless n >= 0. +define void @test2(i32 %n) { +; CHECK-LABEL: @test2( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[FOR_COND:%.*]] +; CHECK: for.cond: +; CHECK-NEXT: [[A:%.*]] = phi i32 [ [[N:%.*]], [[ENTRY:%.*]] ], [ [[EXT:%.*]], [[FOR_BODY:%.*]] ] +; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[A]], -2 +; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[EXT_WIDE:%.*]] = zext i32 [[A]] to i64 +; CHECK-NEXT: call void @use64(i64 [[EXT_WIDE]]) +; CHECK-NEXT: [[EXT]] = trunc i64 [[EXT_WIDE]] to i32 +; CHECK-NEXT: br label [[FOR_COND]] +; CHECK: for.end: +; CHECK-NEXT: ret void +; +entry: + br label %for.cond + +for.cond: ; preds = %for.body, %entry + %a = phi i32 [ %n, %entry ], [ %ext, %for.body ] + %cmp = icmp sgt i32 %a, -2 + br i1 %cmp, label %for.body, label %for.end + +for.body: ; preds = %for.cond + %ext.wide = zext i32 %a to i64 + call void @use64(i64 %ext.wide) + %ext = trunc i64 %ext.wide to i32 + br label %for.cond + +for.end: ; preds = %for.cond + ret void +} + +;; Non looping test case. +define void @test3(i32 %n) { +; CHECK-LABEL: @test3( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[N:%.*]], -1 +; CHECK-NEXT: br i1 [[CMP]], label [[BB:%.*]], label [[EXIT:%.*]] +; CHECK: bb: +; CHECK-NEXT: [[EXT_WIDE:%.*]] = zext nneg i32 [[N]] to i64 +; CHECK-NEXT: call void @use64(i64 [[EXT_WIDE]]) +; CHECK-NEXT: [[EXT:%.*]] = trunc i64 [[EXT_WIDE]] to i32 +; CHECK-NEXT: br label [[EXIT]] +; CHECK: exit: +; CHECK-NEXT: ret void +; +entry: + %cmp = icmp sgt i32 %n, -1 + br i1 %cmp, label %bb, label %exit + +bb: + %ext.wide = zext i32 %n to i64 + call void @use64(i64 %ext.wide) + %ext = trunc i64 %ext.wide to i32 + br label %exit + +exit: + ret void +} + +;; Non looping negative test case. +define void @test4(i32 %n) { +; CHECK-LABEL: @test4( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[N:%.*]], -2 +; CHECK-NEXT: br i1 [[CMP]], label [[BB:%.*]], label [[EXIT:%.*]] +; CHECK: bb: +; CHECK-NEXT: [[EXT_WIDE:%.*]] = zext i32 [[N]] to i64 +; CHECK-NEXT: call void @use64(i64 [[EXT_WIDE]]) +; CHECK-NEXT: [[EXT:%.*]] = trunc i64 [[EXT_WIDE]] to i32 +; CHECK-NEXT: br label [[EXIT]] +; CHECK: exit: +; CHECK-NEXT: ret void +; +entry: + %cmp = icmp sgt i32 %n, -2 + br i1 %cmp, label %bb, label %exit + +bb: + %ext.wide = zext i32 %n to i64 + call void @use64(i64 %ext.wide) + %ext = trunc i64 %ext.wide to i32 + br label %exit + +exit: + ret void +} + +define i64 @may_including_undef(i1 %c.1, i1 %c.2) { +; CHECK-LABEL: @may_including_undef( +; CHECK-NEXT: br i1 [[C_1:%.*]], label [[TRUE_1:%.*]], label [[FALSE:%.*]] +; CHECK: true.1: +; CHECK-NEXT: br i1 [[C_2:%.*]], label [[TRUE_2:%.*]], label [[EXIT:%.*]] +; CHECK: true.2: +; CHECK-NEXT: br label [[EXIT]] +; CHECK: false: +; CHECK-NEXT: br label [[EXIT]] +; CHECK: exit: +; CHECK-NEXT: [[P:%.*]] = phi i32 [ 0, [[TRUE_1]] ], [ 1, [[TRUE_2]] ], [ undef, [[FALSE]] ] +; CHECK-NEXT: [[EXT:%.*]] = zext i32 [[P]] to i64 +; CHECK-NEXT: ret i64 [[EXT]] +; + br i1 %c.1, label %true.1, label %false + +true.1: + br i1 %c.2, label %true.2, label %exit + +true.2: + br label %exit + +false: + br label %exit + +exit: + %p = phi i32 [ 0, %true.1 ], [ 1, %true.2], [ undef, %false ] + %ext = zext i32 %p to i64 + ret i64 %ext +} + +define i64 @test_infer_at_use(i32 noundef %n) { +; CHECK-LABEL: @test_infer_at_use( +; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[N:%.*]], -1 +; CHECK-NEXT: [[EXT:%.*]] = zext nneg i32 [[N]] to i64 +; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[CMP]], i64 [[EXT]], i64 0 +; CHECK-NEXT: ret i64 [[SELECT]] +; + %cmp = icmp sgt i32 %n, -1 + %ext = zext i32 %n to i64 + %select = select i1 %cmp, i64 %ext, i64 0 + ret i64 %select +} diff --git a/llvm/test/Transforms/LoopVectorize/X86/float-induction-x86.ll b/llvm/test/Transforms/LoopVectorize/X86/float-induction-x86.ll index 2c78b7208c19f..baeaef0b67b04 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/float-induction-x86.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/float-induction-x86.ll @@ -20,11 +20,11 @@ define void @fp_iv_loop1(ptr noalias nocapture %A, i32 %N) #0 { ; AUTO_VEC-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[N:%.*]], 0 ; AUTO_VEC-NEXT: br i1 [[CMP4]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_END:%.*]] ; AUTO_VEC: for.body.preheader: -; AUTO_VEC-NEXT: [[ZEXT:%.*]] = zext i32 [[N]] to i64 +; AUTO_VEC-NEXT: [[ZEXT:%.*]] = zext nneg i32 [[N]] to i64 ; AUTO_VEC-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[N]], 32 ; AUTO_VEC-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[FOR_BODY:%.*]], label [[VECTOR_PH:%.*]] ; AUTO_VEC: vector.ph: -; AUTO_VEC-NEXT: [[N_VEC:%.*]] = and i64 [[ZEXT]], 4294967264 +; AUTO_VEC-NEXT: [[N_VEC:%.*]] = and i64 [[ZEXT]], 2147483616 ; AUTO_VEC-NEXT: [[DOTCAST:%.*]] = sitofp i64 [[N_VEC]] to float ; AUTO_VEC-NEXT: [[TMP0:%.*]] = fmul fast float [[DOTCAST]], 5.000000e-01 ; AUTO_VEC-NEXT: [[IND_END:%.*]] = fadd fast float [[TMP0]], 1.000000e+00 @@ -103,12 +103,12 @@ define void @fp_iv_loop2(ptr noalias nocapture %A, i32 %N) { ; AUTO_VEC-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[N:%.*]], 0 ; AUTO_VEC-NEXT: br i1 [[CMP4]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_END:%.*]] ; AUTO_VEC: for.body.preheader: -; AUTO_VEC-NEXT: [[ZEXT:%.*]] = zext i32 [[N]] to i64 +; AUTO_VEC-NEXT: [[ZEXT:%.*]] = zext nneg i32 [[N]] to i64 ; AUTO_VEC-NEXT: [[XTRAITER:%.*]] = and i64 [[ZEXT]], 7 ; AUTO_VEC-NEXT: [[TMP0:%.*]] = icmp ult i32 [[N]], 8 ; AUTO_VEC-NEXT: br i1 [[TMP0]], label [[FOR_END_LOOPEXIT_UNR_LCSSA:%.*]], label [[FOR_BODY_PREHEADER_NEW:%.*]] ; AUTO_VEC: for.body.preheader.new: -; AUTO_VEC-NEXT: [[UNROLL_ITER:%.*]] = and i64 [[ZEXT]], 4294967288 +; AUTO_VEC-NEXT: [[UNROLL_ITER:%.*]] = and i64 [[ZEXT]], 2147483640 ; AUTO_VEC-NEXT: br label [[FOR_BODY:%.*]] ; AUTO_VEC: for.body: ; AUTO_VEC-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[FOR_BODY_PREHEADER_NEW]] ], [ [[INDVARS_IV_NEXT_7:%.*]], [[FOR_BODY]] ] diff --git a/llvm/test/Transforms/PhaseOrdering/X86/excessive-unrolling.ll b/llvm/test/Transforms/PhaseOrdering/X86/excessive-unrolling.ll index 80f96b17c9083..4ff67b5601611 100644 --- a/llvm/test/Transforms/PhaseOrdering/X86/excessive-unrolling.ll +++ b/llvm/test/Transforms/PhaseOrdering/X86/excessive-unrolling.ll @@ -171,11 +171,11 @@ define void @test_runtime_trip_count(i32 %N) { ; CHECK-NEXT: [[CMP1:%.*]] = icmp sgt i32 [[N:%.*]], 0 ; CHECK-NEXT: br i1 [[CMP1]], label [[FOR_BODY_PREHEADER:%.*]], label [[EXIT:%.*]] ; CHECK: for.body.preheader: -; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[N]] to i64 +; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext nneg i32 [[N]] to i64 ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[N]], 4 ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[FOR_BODY_PREHEADER7:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: -; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[WIDE_TRIP_COUNT]], 4294967292 +; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[WIDE_TRIP_COUNT]], 2147483644 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] diff --git a/llvm/test/Transforms/PhaseOrdering/X86/pixel-splat.ll b/llvm/test/Transforms/PhaseOrdering/X86/pixel-splat.ll index e169f2570cd0e..40ea616331d46 100644 --- a/llvm/test/Transforms/PhaseOrdering/X86/pixel-splat.ll +++ b/llvm/test/Transforms/PhaseOrdering/X86/pixel-splat.ll @@ -24,11 +24,11 @@ define void @loop_or(ptr noalias %pIn, ptr noalias %pOut, i32 %s) { ; CHECK-NEXT: [[CMP1:%.*]] = icmp sgt i32 [[S:%.*]], 0 ; CHECK-NEXT: br i1 [[CMP1]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_END:%.*]] ; CHECK: for.body.preheader: -; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[S]] to i64 +; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext nneg i32 [[S]] to i64 ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[S]], 8 ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[FOR_BODY_PREHEADER5:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: -; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[WIDE_TRIP_COUNT]], 4294967288 +; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[WIDE_TRIP_COUNT]], 2147483640 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] diff --git a/llvm/test/Transforms/PhaseOrdering/X86/vdiv.ll b/llvm/test/Transforms/PhaseOrdering/X86/vdiv.ll index 0fbbafca696c8..246bb0095e1a2 100644 --- a/llvm/test/Transforms/PhaseOrdering/X86/vdiv.ll +++ b/llvm/test/Transforms/PhaseOrdering/X86/vdiv.ll @@ -26,7 +26,7 @@ define void @vdiv(ptr %x, ptr %y, double %a, i32 %N) #0 { ; CHECK-NEXT: [[OR_COND:%.*]] = select i1 [[MIN_ITERS_CHECK]], i1 true, i1 [[DIFF_CHECK]] ; CHECK-NEXT: br i1 [[OR_COND]], label [[FOR_BODY_PREHEADER9:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: -; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[WIDE_TRIP_COUNT]], 4294967280 +; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[WIDE_TRIP_COUNT]], 2147483632 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x double> poison, double [[A:%.*]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x double> [[BROADCAST_SPLATINSERT]], <4 x double> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[TMP1:%.*]] = fdiv fast <4 x double> , [[BROADCAST_SPLAT]]