37#define DEBUG_TYPE "vector-combine"
43STATISTIC(NumVecLoad,
"Number of vector loads formed");
44STATISTIC(NumVecCmp,
"Number of vector compares formed");
45STATISTIC(NumVecBO,
"Number of vector binops formed");
46STATISTIC(NumVecCmpBO,
"Number of vector compare + binop formed");
47STATISTIC(NumShufOfBitcast,
"Number of shuffles moved after bitcast");
48STATISTIC(NumScalarBO,
"Number of scalar binops formed");
49STATISTIC(NumScalarCmp,
"Number of scalar compares formed");
53 cl::desc(
"Disable all vector combine transforms"));
57 cl::desc(
"Disable binop extract to shuffle transforms"));
61 cl::desc(
"Max number of instructions to scan for vector combining."));
63static const unsigned InvalidIndex = std::numeric_limits<unsigned>::max();
71 bool TryEarlyFoldsOnly)
72 :
F(
F), Builder(
F.getContext()),
TTI(
TTI), DT(DT), AA(AA), AC(AC),
DL(
DL),
73 CostKind(CostKind), TryEarlyFoldsOnly(TryEarlyFoldsOnly) {}
89 bool TryEarlyFoldsOnly;
100 unsigned PreferredExtractIndex)
const;
104 unsigned PreferredExtractIndex);
128 bool foldSelectShuffle(
Instruction &
I,
bool FromReduction =
false);
135 if (
auto *NewI = dyn_cast<Instruction>(&New)) {
152 if (
auto *OpI = dyn_cast<Instruction>(
Op)) {
163 while (
auto *BitCast = dyn_cast<BitCastInst>(V))
164 V = BitCast->getOperand(0);
172 if (!Load || !Load->isSimple() || !Load->hasOneUse() ||
173 Load->getFunction()->hasFnAttribute(Attribute::SanitizeMemTag) ||
179 Type *ScalarTy = Load->getType()->getScalarType();
182 if (!ScalarSize || !MinVectorSize || MinVectorSize % ScalarSize != 0 ||
189bool VectorCombine::vectorizeLoadInsert(
Instruction &
I) {
203 auto *
Load = dyn_cast<LoadInst>(
X);
215 Value *SrcPtr =
Load->getPointerOperand()->stripPointerCasts();
216 assert(isa<PointerType>(SrcPtr->
getType()) &&
"Expected a pointer type");
218 unsigned MinVecNumElts = MinVectorSize / ScalarSize;
219 auto *MinVecTy = VectorType::get(ScalarTy, MinVecNumElts,
false);
220 unsigned OffsetEltIndex = 0;
228 unsigned OffsetBitWidth =
DL->getIndexTypeSizeInBits(SrcPtr->
getType());
239 uint64_t ScalarSizeInBytes = ScalarSize / 8;
240 if (
Offset.urem(ScalarSizeInBytes) != 0)
244 OffsetEltIndex =
Offset.udiv(ScalarSizeInBytes).getZExtValue();
245 if (OffsetEltIndex >= MinVecNumElts)
262 unsigned AS =
Load->getPointerAddressSpace();
280 auto *Ty = cast<FixedVectorType>(
I.getType());
281 unsigned OutputNumElts = Ty->getNumElements();
283 assert(OffsetEltIndex < MinVecNumElts &&
"Address offset too big");
284 Mask[0] = OffsetEltIndex;
291 if (OldCost < NewCost || !NewCost.
isValid())
302 replaceValue(
I, *VecLd);
312 auto *Shuf = cast<ShuffleVectorInst>(&
I);
313 if (!Shuf->isIdentityWithPadding())
318 cast<FixedVectorType>(Shuf->getOperand(0)->getType())->getNumElements();
319 unsigned OpIndex =
any_of(Shuf->getShuffleMask(), [&NumOpElts](
int M) {
320 return M >= (int)(NumOpElts);
323 auto *
Load = dyn_cast<LoadInst>(Shuf->getOperand(
OpIndex));
330 auto *Ty = cast<FixedVectorType>(
I.getType());
331 Value *SrcPtr =
Load->getPointerOperand()->stripPointerCasts();
332 assert(isa<PointerType>(SrcPtr->
getType()) &&
"Expected a pointer type");
339 unsigned AS =
Load->getPointerAddressSpace();
354 if (OldCost < NewCost || !NewCost.
isValid())
361 replaceValue(
I, *VecLd);
373 assert(Index0C && Index1C &&
"Expected constant extract indexes");
375 unsigned Index0 = Index0C->getZExtValue();
376 unsigned Index1 = Index1C->getZExtValue();
379 if (Index0 == Index1)
403 if (PreferredExtractIndex == Index0)
405 if (PreferredExtractIndex == Index1)
409 return Index0 > Index1 ? Ext0 : Ext1;
421 unsigned PreferredExtractIndex) {
424 assert(Ext0IndexC && Ext1IndexC &&
"Expected constant extract indexes");
426 unsigned Opcode =
I.getOpcode();
430 auto *VecTy = cast<VectorType>(Ext0Src->
getType());
439 assert((Opcode == Instruction::ICmp || Opcode == Instruction::FCmp) &&
440 "Expected a compare");
450 unsigned Ext0Index = Ext0IndexC->getZExtValue();
451 unsigned Ext1Index = Ext1IndexC->getZExtValue();
465 unsigned BestExtIndex = Extract0Cost > Extract1Cost ? Ext0Index : Ext1Index;
466 unsigned BestInsIndex = Extract0Cost > Extract1Cost ? Ext1Index : Ext0Index;
467 InstructionCost CheapExtractCost = std::min(Extract0Cost, Extract1Cost);
472 if (Ext0Src == Ext1Src && Ext0Index == Ext1Index) {
477 bool HasUseTax = Ext0 == Ext1 ? !Ext0->
hasNUses(2)
479 OldCost = CheapExtractCost + ScalarOpCost;
480 NewCost = VectorOpCost + CheapExtractCost + HasUseTax * CheapExtractCost;
484 OldCost = Extract0Cost + Extract1Cost + ScalarOpCost;
485 NewCost = VectorOpCost + CheapExtractCost +
490 ConvertToShuffle = getShuffleExtract(Ext0, Ext1, PreferredExtractIndex);
491 if (ConvertToShuffle) {
502 if (
auto *FixedVecTy = dyn_cast<FixedVectorType>(VecTy)) {
505 ShuffleMask[BestInsIndex] = BestExtIndex;
507 VecTy, ShuffleMask,
CostKind, 0,
nullptr,
512 {},
CostKind, 0,
nullptr, {ConvertToShuffle});
519 return OldCost < NewCost;
529 auto *VecTy = cast<FixedVectorType>(Vec->
getType());
531 ShufMask[NewIndex] = OldIndex;
544 if (!isa<FixedVectorType>(
X->getType()))
550 assert(isa<ConstantInt>(
C) &&
"Expected a constant index operand");
551 if (isa<Constant>(
X))
564 assert(isa<CmpInst>(&
I) &&
"Expected a compare");
567 "Expected matching constant extract indexes");
575 replaceValue(
I, *NewExt);
583 assert(isa<BinaryOperator>(&
I) &&
"Expected a binary operator");
586 "Expected matching constant extract indexes");
596 if (
auto *VecBOInst = dyn_cast<Instruction>(VecBO))
597 VecBOInst->copyIRFlags(&
I);
600 replaceValue(
I, *NewExt);
628 auto *Ext0 = cast<ExtractElementInst>(I0);
629 auto *Ext1 = cast<ExtractElementInst>(I1);
636 if (isExtractExtractCheap(Ext0, Ext1,
I, ExtractToChange, InsertIndex))
639 if (ExtractToChange) {
640 unsigned CheapExtractIdx = ExtractToChange == Ext0 ? C1 : C0;
645 if (ExtractToChange == Ext0)
652 foldExtExtCmp(Ext0, Ext1,
I);
654 foldExtExtBinop(Ext0, Ext1,
I);
680 auto *VecTy = cast<FixedVectorType>(
I.getType());
682 auto *SrcVecTy = dyn_cast<FixedVectorType>(SrcVec->
getType());
683 if (!SrcVecTy || ScalarTy != SrcVecTy->getScalarType())
687 unsigned NumElts = VecTy->getNumElements();
688 if (Index >= NumElts)
695 std::iota(
Mask.begin(),
Mask.end(), 0);
712 bool NeedLenChg = SrcVecTy->getNumElements() != NumElts;
723 if (NewCost > OldCost)
738 replaceValue(
I, *NewShuf);
757 auto *ResultTy = dyn_cast<FixedVectorType>(
I.getType());
777 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
779 if (NewCost > OldCost)
789 if (
auto *NewInst = dyn_cast<Instruction>(NewBO)) {
790 NewInst->copyIRFlags(VecBinOp);
791 NewInst->andIRFlags(SclBinOp);
796 replaceValue(
I, *NewBO);
815 auto *DestTy = dyn_cast<FixedVectorType>(
I.getType());
816 auto *SrcTy = dyn_cast<FixedVectorType>(V0->
getType());
817 if (!DestTy || !SrcTy)
820 unsigned DestEltSize = DestTy->getScalarSizeInBits();
821 unsigned SrcEltSize = SrcTy->getScalarSizeInBits();
822 if (SrcTy->getPrimitiveSizeInBits() % DestEltSize != 0)
825 bool IsUnary = isa<UndefValue>(V1);
832 if (!(BCTy0 && BCTy0->getElementType() == DestTy->getElementType()) &&
833 !(BCTy1 && BCTy1->getElementType() == DestTy->getElementType()))
838 if (DestEltSize <= SrcEltSize) {
841 assert(SrcEltSize % DestEltSize == 0 &&
"Unexpected shuffle mask");
842 unsigned ScaleFactor = SrcEltSize / DestEltSize;
847 assert(DestEltSize % SrcEltSize == 0 &&
"Unexpected shuffle mask");
848 unsigned ScaleFactor = DestEltSize / SrcEltSize;
855 unsigned NumSrcElts = SrcTy->getPrimitiveSizeInBits() / DestEltSize;
860 unsigned NumOps = IsUnary ? 1 : 2;
870 TargetTransformInfo::CastContextHint::None,
875 TargetTransformInfo::CastContextHint::None,
878 LLVM_DEBUG(
dbgs() <<
"Found a bitcasted shuffle: " <<
I <<
"\n OldCost: "
879 << OldCost <<
" vs NewCost: " << NewCost <<
"\n");
881 if (NewCost > OldCost || !NewCost.
isValid())
889 replaceValue(
I, *Shuf);
896bool VectorCombine::scalarizeVPIntrinsic(
Instruction &
I) {
897 if (!isa<VPIntrinsic>(
I))
910 if (!ScalarOp0 || !ScalarOp1)
918 auto IsAllTrueMask = [](
Value *MaskVal) {
920 if (
auto *ConstValue = dyn_cast<Constant>(SplattedVal))
921 return ConstValue->isAllOnesValue();
936 if (
auto *FVTy = dyn_cast<FixedVectorType>(VecTy))
937 Mask.resize(FVTy->getNumElements(), 0);
946 Args.push_back(
V->getType());
952 std::optional<unsigned> FunctionalOpcode =
954 std::optional<Intrinsic::ID> ScalarIntrID = std::nullopt;
955 if (!FunctionalOpcode) {
979 <<
", Cost of scalarizing:" << NewCost <<
"\n");
982 if (OldCost < NewCost || !NewCost.
isValid())
993 bool SafeToSpeculate;
996 .
hasFnAttr(Attribute::AttrKind::Speculatable);
999 *FunctionalOpcode, &VPI,
nullptr, &AC, &DT);
1000 if (!SafeToSpeculate &&
1007 {ScalarOp0, ScalarOp1})
1009 ScalarOp0, ScalarOp1);
1017bool VectorCombine::scalarizeBinopOrCmp(
Instruction &
I) {
1028 bool IsCmp = Pred != CmpInst::Predicate::BAD_ICMP_PREDICATE;
1030 for (
User *U :
I.users())
1040 Constant *VecC0 =
nullptr, *VecC1 =
nullptr;
1041 Value *V0 =
nullptr, *V1 =
nullptr;
1052 bool IsConst0 = !V0;
1053 bool IsConst1 = !V1;
1054 if (IsConst0 && IsConst1)
1056 if (!IsConst0 && !IsConst1 && Index0 != Index1)
1059 auto *VecTy0 = cast<VectorType>(Ins0->
getType());
1060 auto *VecTy1 = cast<VectorType>(Ins1->
getType());
1061 if (VecTy0->getElementCount().getKnownMinValue() <= Index0 ||
1062 VecTy1->getElementCount().getKnownMinValue() <= Index1)
1067 auto *I0 = dyn_cast_or_null<Instruction>(V0);
1068 auto *
I1 = dyn_cast_or_null<Instruction>(V1);
1069 if ((IsConst0 && I1 &&
I1->mayReadFromMemory()) ||
1075 Type *VecTy =
I.getType();
1080 "Unexpected types for insert element into binop or cmp");
1082 unsigned Opcode =
I.getOpcode();
1098 Instruction::InsertElement, VecTy,
CostKind, Index);
1100 (IsConst0 ? 0 : InsertCost) + (IsConst1 ? 0 : InsertCost) + VectorOpCost;
1102 (IsConst0 ? 0 : !Ins0->
hasOneUse() * InsertCost) +
1103 (IsConst1 ? 0 : !Ins1->
hasOneUse() * InsertCost);
1106 if (OldCost < NewCost || !NewCost.
isValid())
1126 Scalar->setName(
I.getName() +
".scalar");
1130 if (
auto *ScalarInst = dyn_cast<Instruction>(Scalar))
1131 ScalarInst->copyIRFlags(&
I);
1135 IsCmp ? Builder.
CreateCmp(Pred, VecC0, VecC1)
1138 replaceValue(
I, *Insert);
1146 auto *BI = dyn_cast<BinaryOperator>(&
I);
1150 if (!BI || !
I.getType()->isIntegerTy(1))
1155 Value *B0 =
I.getOperand(0), *B1 =
I.getOperand(1);
1175 auto *Ext0 = cast<ExtractElementInst>(I0);
1176 auto *Ext1 = cast<ExtractElementInst>(I1);
1180 assert((ConvertToShuf == Ext0 || ConvertToShuf == Ext1) &&
1181 "Unknown ExtractElementInst");
1186 unsigned CmpOpcode =
1188 auto *VecTy = dyn_cast<FixedVectorType>(
X->getType());
1201 Ext0Cost + Ext1Cost + CmpCost * 2 +
1207 int CheapIndex = ConvertToShuf == Ext0 ? Index1 : Index0;
1208 int ExpensiveIndex = ConvertToShuf == Ext0 ? Index0 : Index1;
1214 ShufMask[CheapIndex] = ExpensiveIndex;
1219 NewCost += Ext0->
hasOneUse() ? 0 : Ext0Cost;
1220 NewCost += Ext1->
hasOneUse() ? 0 : Ext1Cost;
1225 if (OldCost < NewCost || !NewCost.
isValid())
1235 Value *
LHS = ConvertToShuf == Ext0 ? Shuf : VCmp;
1236 Value *
RHS = ConvertToShuf == Ext0 ? VCmp : Shuf;
1239 replaceValue(
I, *NewExt);
1248 unsigned NumScanned = 0;
1258class ScalarizationResult {
1259 enum class StatusTy { Unsafe, Safe, SafeWithFreeze };
1264 ScalarizationResult(StatusTy
Status,
Value *ToFreeze =
nullptr)
1268 ScalarizationResult(
const ScalarizationResult &
Other) =
default;
1269 ~ScalarizationResult() {
1270 assert(!ToFreeze &&
"freeze() not called with ToFreeze being set");
1273 static ScalarizationResult unsafe() {
return {StatusTy::Unsafe}; }
1274 static ScalarizationResult safe() {
return {StatusTy::Safe}; }
1275 static ScalarizationResult safeWithFreeze(
Value *ToFreeze) {
1276 return {StatusTy::SafeWithFreeze, ToFreeze};
1280 bool isSafe()
const {
return Status == StatusTy::Safe; }
1282 bool isUnsafe()
const {
return Status == StatusTy::Unsafe; }
1285 bool isSafeWithFreeze()
const {
return Status == StatusTy::SafeWithFreeze; }
1290 Status = StatusTy::Unsafe;
1295 assert(isSafeWithFreeze() &&
1296 "should only be used when freezing is required");
1298 "UserI must be a user of ToFreeze");
1304 if (
U.get() == ToFreeze)
1321 uint64_t NumElements = VecTy->getElementCount().getKnownMinValue();
1323 if (
auto *
C = dyn_cast<ConstantInt>(
Idx)) {
1324 if (
C->getValue().ult(NumElements))
1325 return ScalarizationResult::safe();
1326 return ScalarizationResult::unsafe();
1329 unsigned IntWidth =
Idx->getType()->getScalarSizeInBits();
1330 APInt Zero(IntWidth, 0);
1331 APInt MaxElts(IntWidth, NumElements);
1337 true, &AC, CtxI, &DT)))
1338 return ScalarizationResult::safe();
1339 return ScalarizationResult::unsafe();
1352 if (ValidIndices.
contains(IdxRange))
1353 return ScalarizationResult::safeWithFreeze(IdxBase);
1354 return ScalarizationResult::unsafe();
1364 if (
auto *
C = dyn_cast<ConstantInt>(
Idx))
1366 C->getZExtValue() *
DL.getTypeStoreSize(ScalarType));
1378bool VectorCombine::foldSingleElementStore(
Instruction &
I) {
1379 auto *
SI = cast<StoreInst>(&
I);
1380 if (!
SI->isSimple() || !isa<VectorType>(
SI->getValueOperand()->getType()))
1388 if (!
match(
SI->getValueOperand(),
1393 if (
auto *Load = dyn_cast<LoadInst>(Source)) {
1394 auto VecTy = cast<VectorType>(
SI->getValueOperand()->getType());
1395 Value *SrcAddr =
Load->getPointerOperand()->stripPointerCasts();
1398 if (!
Load->isSimple() ||
Load->getParent() !=
SI->getParent() ||
1399 !
DL->typeSizeEqualsStoreSize(
Load->getType()->getScalarType()) ||
1400 SrcAddr !=
SI->getPointerOperand()->stripPointerCasts())
1404 if (ScalarizableIdx.isUnsafe() ||
1411 Worklist.
push(Load);
1413 if (ScalarizableIdx.isSafeWithFreeze())
1414 ScalarizableIdx.freeze(Builder, *cast<Instruction>(
Idx));
1416 SI->getValueOperand()->getType(),
SI->getPointerOperand(),
1417 {ConstantInt::get(Idx->getType(), 0), Idx});
1424 replaceValue(
I, *NSI);
1433bool VectorCombine::scalarizeLoadExtract(
Instruction &
I) {
1438 auto *LI = cast<LoadInst>(&
I);
1439 auto *VecTy = cast<VectorType>(LI->getType());
1440 if (LI->isVolatile() || !
DL->typeSizeEqualsStoreSize(VecTy->
getScalarType()))
1445 LI->getPointerAddressSpace(),
CostKind);
1449 unsigned NumInstChecked = 0;
1453 for (
auto &Pair : NeedFreeze)
1454 Pair.second.discard();
1461 auto *UI = dyn_cast<ExtractElementInst>(U);
1462 if (!UI || UI->getParent() != LI->getParent())
1469 make_range(std::next(LI->getIterator()), UI->getIterator())) {
1476 LastCheckedInst = UI;
1481 if (ScalarIdx.isUnsafe())
1483 if (ScalarIdx.isSafeWithFreeze()) {
1485 ScalarIdx.discard();
1488 auto *
Index = dyn_cast<ConstantInt>(UI->getIndexOperand());
1491 Index ?
Index->getZExtValue() : -1);
1498 if (ScalarizedCost >= OriginalCost)
1507 auto *EI = cast<ExtractElementInst>(U);
1508 Value *
Idx = EI->getIndexOperand();
1511 auto It = NeedFreeze.
find(EI);
1512 if (It != NeedFreeze.
end())
1513 It->second.freeze(Builder, *cast<Instruction>(
Idx));
1518 auto *NewLoad = cast<LoadInst>(Builder.
CreateLoad(
1519 VecTy->getElementType(),
GEP, EI->getName() +
".scalar"));
1522 LI->getAlign(), VecTy->getElementType(),
Idx, *
DL);
1523 NewLoad->setAlignment(ScalarOpAlignment);
1525 replaceValue(*EI, *NewLoad);
1528 FailureGuard.release();
1535bool VectorCombine::foldConcatOfBoolMasks(
Instruction &
I) {
1536 Type *Ty =
I.getType();
1541 if (
DL->isBigEndian())
1568 if (ShAmtX > ShAmtY) {
1576 uint64_t ShAmtDiff = ShAmtY - ShAmtX;
1577 unsigned NumSHL = (ShAmtX > 0) + (ShAmtY > 0);
1579 auto *MaskTy = dyn_cast<FixedVectorType>(SrcX->
getType());
1582 MaskTy->getNumElements() != ShAmtDiff ||
1583 MaskTy->getNumElements() > (
BitWidth / 2))
1592 std::iota(ConcatMask.begin(), ConcatMask.end(), 0);
1609 if (Ty != ConcatIntTy)
1615 LLVM_DEBUG(
dbgs() <<
"Found a concatenation of bitcasted bool masks: " <<
I
1616 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
1619 if (NewCost > OldCost)
1629 if (Ty != ConcatIntTy) {
1639 replaceValue(
I, *Result);
1645bool VectorCombine::foldPermuteOfBinops(
Instruction &
I) {
1656 Value *Op00, *Op01, *Op10, *Op11;
1664 if (!Match0 && !Match1)
1673 auto *ShuffleDstTy = dyn_cast<FixedVectorType>(
I.getType());
1674 auto *BinOpTy = dyn_cast<FixedVectorType>(BinOp->
getType());
1675 auto *Op0Ty = dyn_cast<FixedVectorType>(Op00->
getType());
1676 auto *Op1Ty = dyn_cast<FixedVectorType>(Op10->
getType());
1677 if (!ShuffleDstTy || !BinOpTy || !Op0Ty || !Op1Ty)
1680 unsigned NumSrcElts = BinOpTy->getNumElements();
1684 if ((BinOp->
isIntDivRem() || !isa<PoisonValue>(
I.getOperand(1))) &&
1685 any_of(OuterMask, [NumSrcElts](
int M) {
return M >= (int)NumSrcElts; }))
1690 for (
int M : OuterMask) {
1691 if (M < 0 || M >= (
int)NumSrcElts) {
1695 NewMask0.
push_back(Match0 ? Mask0[M] : M);
1696 NewMask1.
push_back(Match1 ? Mask1[M] : M);
1700 unsigned NumOpElts = Op0Ty->getNumElements();
1701 bool IsIdentity0 = ShuffleDstTy == Op0Ty &&
1702 all_of(NewMask0, [NumOpElts](
int M) {
return M < (int)NumOpElts; }) &&
1704 bool IsIdentity1 = ShuffleDstTy == Op1Ty &&
1705 all_of(NewMask1, [NumOpElts](
int M) {
return M < (int)NumOpElts; }) &&
1712 OuterMask,
CostKind, 0,
nullptr, {BinOp}, &
I);
1715 Mask0,
CostKind, 0,
nullptr, {Op00, Op01},
1719 Mask1,
CostKind, 0,
nullptr, {Op10, Op11},
1727 NewMask0,
CostKind, 0,
nullptr, {Op00, Op01});
1730 NewMask1,
CostKind, 0,
nullptr, {Op10, Op11});
1732 LLVM_DEBUG(
dbgs() <<
"Found a shuffle feeding a shuffled binop: " <<
I
1733 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
1737 if (NewCost > OldCost)
1747 if (
auto *NewInst = dyn_cast<Instruction>(NewBO))
1748 NewInst->copyIRFlags(BinOp);
1752 replaceValue(
I, *NewBO);
1758bool VectorCombine::foldShuffleOfBinops(
Instruction &
I) {
1766 if (
LHS->getOpcode() !=
RHS->getOpcode())
1770 bool IsCommutative =
false;
1775 auto *BO = cast<BinaryOperator>(LHS);
1779 IsCommutative = BinaryOperator::isCommutative(BO->getOpcode());
1783 IsCommutative = cast<CmpInst>(LHS)->isCommutative();
1787 auto *ShuffleDstTy = dyn_cast<FixedVectorType>(
I.getType());
1788 auto *BinResTy = dyn_cast<FixedVectorType>(
LHS->
getType());
1789 auto *BinOpTy = dyn_cast<FixedVectorType>(
X->getType());
1790 if (!ShuffleDstTy || !BinResTy || !BinOpTy ||
X->getType() !=
Z->getType())
1793 unsigned NumSrcElts = BinOpTy->getNumElements();
1796 if (IsCommutative &&
X != Z &&
Y != W && (
X == W ||
Y == Z))
1799 auto ConvertToUnary = [NumSrcElts](
int &
M) {
1800 if (M >= (
int)NumSrcElts)
1839 [NumSrcElts](
int M) {
return M < (int)NumSrcElts; })) {
1851 bool ReducedInstCount =
false;
1852 ReducedInstCount |= MergeInner(
X, 0, NewMask0,
CostKind);
1853 ReducedInstCount |= MergeInner(
Y, 0, NewMask1,
CostKind);
1854 ReducedInstCount |= MergeInner(Z, NumSrcElts, NewMask0,
CostKind);
1855 ReducedInstCount |= MergeInner(W, NumSrcElts, NewMask1,
CostKind);
1865 auto *ShuffleCmpTy =
1872 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
1877 ReducedInstCount |= (isa<Constant>(
X) && isa<Constant>(Z)) ||
1878 (isa<Constant>(
Y) && isa<Constant>(W));
1879 if (ReducedInstCount ? (NewCost > OldCost) : (NewCost >= OldCost))
1886 cast<BinaryOperator>(LHS)->getOpcode(), Shuf0, Shuf1)
1887 : Builder.
CreateCmp(PredLHS, Shuf0, Shuf1);
1890 if (
auto *NewInst = dyn_cast<Instruction>(NewBO)) {
1891 NewInst->copyIRFlags(LHS);
1892 NewInst->andIRFlags(RHS);
1897 replaceValue(
I, *NewBO);
1903bool VectorCombine::foldShuffleOfCastops(
Instruction &
I) {
1909 auto *C0 = dyn_cast<CastInst>(V0);
1910 auto *C1 = dyn_cast<CastInst>(V1);
1915 if (C0->getSrcTy() != C1->getSrcTy())
1919 if (Opcode != C1->getOpcode()) {
1921 Opcode = Instruction::SExt;
1926 auto *ShuffleDstTy = dyn_cast<FixedVectorType>(
I.getType());
1927 auto *CastDstTy = dyn_cast<FixedVectorType>(C0->getDestTy());
1928 auto *CastSrcTy = dyn_cast<FixedVectorType>(C0->getSrcTy());
1929 if (!ShuffleDstTy || !CastDstTy || !CastSrcTy)
1932 unsigned NumSrcElts = CastSrcTy->getNumElements();
1933 unsigned NumDstElts = CastDstTy->getNumElements();
1934 assert((NumDstElts == NumSrcElts || Opcode == Instruction::BitCast) &&
1935 "Only bitcasts expected to alter src/dst element counts");
1939 if (NumDstElts != NumSrcElts && (NumSrcElts % NumDstElts) != 0 &&
1940 (NumDstElts % NumSrcElts) != 0)
1944 if (NumSrcElts >= NumDstElts) {
1947 assert(NumSrcElts % NumDstElts == 0 &&
"Unexpected shuffle mask");
1948 unsigned ScaleFactor = NumSrcElts / NumDstElts;
1953 assert(NumDstElts % NumSrcElts == 0 &&
"Unexpected shuffle mask");
1954 unsigned ScaleFactor = NumDstElts / NumSrcElts;
1959 auto *NewShuffleDstTy =
1984 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
1986 if (NewCost > OldCost)
1994 if (
auto *NewInst = dyn_cast<Instruction>(Cast)) {
1995 NewInst->copyIRFlags(C0);
1996 NewInst->andIRFlags(C1);
2000 replaceValue(
I, *Cast);
2010bool VectorCombine::foldShuffleOfShuffles(
Instruction &
I) {
2012 Value *OuterV0, *OuterV1;
2018 Value *X0, *X1, *Y0, *Y1;
2023 if (!Match0 && !Match1)
2026 X0 = Match0 ? X0 : OuterV0;
2027 Y0 = Match0 ? Y0 : OuterV0;
2028 X1 = Match1 ? X1 : OuterV1;
2029 Y1 = Match1 ? Y1 : OuterV1;
2030 auto *ShuffleDstTy = dyn_cast<FixedVectorType>(
I.getType());
2031 auto *ShuffleSrcTy = dyn_cast<FixedVectorType>(X0->
getType());
2032 auto *ShuffleImmTy = dyn_cast<FixedVectorType>(OuterV0->
getType());
2033 if (!ShuffleDstTy || !ShuffleSrcTy || !ShuffleImmTy ||
2037 unsigned NumSrcElts = ShuffleSrcTy->getNumElements();
2038 unsigned NumImmElts = ShuffleImmTy->getNumElements();
2044 Value *NewX =
nullptr, *NewY =
nullptr;
2045 for (
int &M : NewMask) {
2046 Value *Src =
nullptr;
2047 if (0 <= M && M < (
int)NumImmElts) {
2051 Src =
M >= (int)NumSrcElts ? Y0 : X0;
2052 M =
M >= (int)NumSrcElts ? (M - NumSrcElts) :
M;
2054 }
else if (M >= (
int)NumImmElts) {
2059 Src =
M >= (int)NumSrcElts ? Y1 : X1;
2060 M =
M >= (int)NumSrcElts ? (M - NumSrcElts) :
M;
2064 assert(0 <= M && M < (
int)NumSrcElts &&
"Unexpected shuffle mask index");
2065 if (isa<UndefValue>(Src)) {
2068 if (!isa<PoisonValue>(Src))
2073 if (!NewX || NewX == Src) {
2077 if (!NewY || NewY == Src) {
2093 replaceValue(
I, *NewX);
2110 bool IsUnary =
all_of(NewMask, [&](
int M) {
return M < (int)NumSrcElts; });
2115 SK, ShuffleSrcTy, NewMask,
CostKind, 0,
nullptr, {NewX, NewY});
2117 NewCost += InnerCost0;
2119 NewCost += InnerCost1;
2122 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
2124 if (NewCost > OldCost)
2128 replaceValue(
I, *Shuf);
2134bool VectorCombine::foldShuffleOfIntrinsics(
Instruction &
I) {
2141 auto *II0 = dyn_cast<IntrinsicInst>(V0);
2142 auto *II1 = dyn_cast<IntrinsicInst>(V1);
2147 if (IID != II1->getIntrinsicID())
2150 auto *ShuffleDstTy = dyn_cast<FixedVectorType>(
I.getType());
2151 auto *II0Ty = dyn_cast<FixedVectorType>(II0->getType());
2152 if (!ShuffleDstTy || !II0Ty)
2158 for (
unsigned I = 0, E = II0->arg_size();
I != E; ++
I)
2160 II0->getArgOperand(
I) != II1->getArgOperand(
I))
2171 for (
unsigned I = 0, E = II0->arg_size();
I != E; ++
I)
2173 NewArgsTy.
push_back(II0->getArgOperand(
I)->getType());
2175 auto *VecTy = cast<FixedVectorType>(II0->getArgOperand(
I)->getType());
2177 VecTy->getNumElements() * 2));
2185 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
2188 if (NewCost > OldCost)
2192 for (
unsigned I = 0, E = II0->arg_size();
I != E; ++
I)
2197 II1->getArgOperand(
I), OldMask);
2204 if (
auto *NewInst = dyn_cast<Instruction>(NewIntrinsic)) {
2206 NewInst->andIRFlags(II1);
2209 replaceValue(
I, *NewIntrinsic);
2216 while (
auto *SV = dyn_cast<ShuffleVectorInst>(U->get())) {
2218 cast<FixedVectorType>(SV->getOperand(0)->getType())->getNumElements();
2219 int M = SV->getMaskValue(Lane);
2222 if (
static_cast<unsigned>(M) < NumElts) {
2223 U = &SV->getOperandUse(0);
2226 U = &SV->getOperandUse(1);
2237 auto [U, Lane] = IL;
2250 auto *Ty = cast<FixedVectorType>(Item.
front().first->get()->getType());
2251 unsigned NumElts = Ty->getNumElements();
2252 if (Item.
size() == NumElts || NumElts == 1 || Item.
size() % NumElts != 0)
2258 std::iota(ConcatMask.
begin(), ConcatMask.
end(), 0);
2262 unsigned NumSlices = Item.
size() / NumElts;
2267 for (
unsigned Slice = 0; Slice < NumSlices; ++Slice) {
2268 Use *SliceV = Item[Slice * NumElts].first;
2269 if (!SliceV || SliceV->get()->
getType() != Ty)
2271 for (
unsigned Elt = 0; Elt < NumElts; ++Elt) {
2272 auto [V, Lane] = Item[Slice * NumElts + Elt];
2273 if (Lane !=
static_cast<int>(Elt) || SliceV->get() != V->get())
2286 auto [FrontU, FrontLane] = Item.
front();
2288 if (IdentityLeafs.
contains(FrontU)) {
2289 return FrontU->get();
2295 if (ConcatLeafs.
contains(FrontU)) {
2297 cast<FixedVectorType>(FrontU->get()->getType())->getNumElements();
2299 for (
unsigned S = 0; S < Values.
size(); ++S)
2300 Values[S] = Item[S * NumElts].first->get();
2302 while (Values.
size() > 1) {
2305 std::iota(Mask.begin(), Mask.end(), 0);
2307 for (
unsigned S = 0; S < NewValues.
size(); ++S)
2315 auto *
I = cast<Instruction>(FrontU->get());
2316 auto *
II = dyn_cast<IntrinsicInst>(
I);
2317 unsigned NumOps =
I->getNumOperands() - (
II ? 1 : 0);
2319 for (
unsigned Idx = 0;
Idx < NumOps;
Idx++) {
2326 Ty, IdentityLeafs, SplatLeafs, ConcatLeafs,
2331 for (
const auto &Lane : Item)
2337 if (
auto *BI = dyn_cast<BinaryOperator>(
I)) {
2343 if (
auto *CI = dyn_cast<CmpInst>(
I)) {
2344 auto *
Value = Builder.
CreateCmp(CI->getPredicate(), Ops[0], Ops[1]);
2348 if (
auto *SI = dyn_cast<SelectInst>(
I)) {
2353 if (
auto *CI = dyn_cast<CastInst>(
I)) {
2364 assert(isa<UnaryInstruction>(
I) &&
"Unexpected instruction type in Generate");
2374bool VectorCombine::foldShuffleToIdentity(
Instruction &
I) {
2375 auto *Ty = dyn_cast<FixedVectorType>(
I.getType());
2376 if (!Ty ||
I.use_empty())
2380 for (
unsigned M = 0, E = Ty->getNumElements(); M < E; ++M)
2386 unsigned NumVisited = 0;
2388 while (!Worklist.
empty()) {
2393 auto [FrontU, FrontLane] = Item.
front();
2401 return X->getType() ==
Y->getType() &&
2406 if (FrontLane == 0 &&
2407 cast<FixedVectorType>(FrontU->get()->getType())->getNumElements() ==
2408 Ty->getNumElements() &&
2411 return !E.value().first || (IsEquiv(E.value().first->get(), FrontV) &&
2412 E.value().second == (
int)E.index());
2414 IdentityLeafs.
insert(FrontU);
2418 if (
auto *
C = dyn_cast<Constant>(FrontU);
2419 C &&
C->getSplatValue() &&
2423 return !U || (isa<Constant>(
U->get()) &&
2424 cast<Constant>(
U->get())->getSplatValue() ==
2425 cast<Constant>(FrontV)->getSplatValue());
2427 SplatLeafs.
insert(FrontU);
2432 auto [FrontU, FrontLane] = Item.
front();
2433 auto [
U, Lane] = IL;
2434 return !
U || (
U->get() == FrontU->get() && Lane == FrontLane);
2436 SplatLeafs.
insert(FrontU);
2442 auto CheckLaneIsEquivalentToFirst = [Item](
InstLane IL) {
2446 Value *
V = IL.first->get();
2447 if (
auto *
I = dyn_cast<Instruction>(V);
I && !
I->hasOneUse())
2451 if (
auto *CI = dyn_cast<CmpInst>(V))
2452 if (CI->getPredicate() != cast<CmpInst>(FrontV)->getPredicate())
2454 if (
auto *CI = dyn_cast<CastInst>(V))
2455 if (CI->getSrcTy()->getScalarType() !=
2456 cast<CastInst>(FrontV)->getSrcTy()->getScalarType())
2458 if (
auto *SI = dyn_cast<SelectInst>(V))
2459 if (!isa<VectorType>(
SI->getOperand(0)->getType()) ||
2460 SI->getOperand(0)->getType() !=
2461 cast<SelectInst>(FrontV)->getOperand(0)->getType())
2463 if (isa<CallInst>(V) && !isa<IntrinsicInst>(V))
2465 auto *
II = dyn_cast<IntrinsicInst>(V);
2466 return !
II || (isa<IntrinsicInst>(FrontV) &&
2467 II->getIntrinsicID() ==
2468 cast<IntrinsicInst>(FrontV)->getIntrinsicID() &&
2469 !
II->hasOperandBundles());
2473 if (isa<BinaryOperator, CmpInst>(FrontU)) {
2475 if (
auto *BO = dyn_cast<BinaryOperator>(FrontU);
2476 BO && BO->isIntDivRem())
2485 }
else if (
auto *BitCast = dyn_cast<BitCastInst>(FrontU)) {
2487 auto *DstTy = dyn_cast<FixedVectorType>(BitCast->getDestTy());
2488 auto *SrcTy = dyn_cast<FixedVectorType>(BitCast->getSrcTy());
2489 if (DstTy && SrcTy &&
2490 SrcTy->getNumElements() == DstTy->getNumElements()) {
2494 }
else if (isa<SelectInst>(FrontU)) {
2499 }
else if (
auto *
II = dyn_cast<IntrinsicInst>(FrontU);
2501 !
II->hasOperandBundles()) {
2502 for (
unsigned Op = 0, E =
II->getNumOperands() - 1;
Op < E;
Op++) {
2508 return !U || (cast<Instruction>(
U->get())->getOperand(
Op) ==
2509 cast<Instruction>(FrontV)->getOperand(
Op));
2521 ConcatLeafs.
insert(FrontU);
2528 if (NumVisited <= 1)
2531 LLVM_DEBUG(
dbgs() <<
"Found a superfluous identity shuffle: " <<
I <<
"\n");
2537 ConcatLeafs, Builder, &
TTI);
2538 replaceValue(
I, *V);
2545bool VectorCombine::foldShuffleFromReductions(
Instruction &
I) {
2546 auto *
II = dyn_cast<IntrinsicInst>(&
I);
2549 switch (
II->getIntrinsicID()) {
2550 case Intrinsic::vector_reduce_add:
2551 case Intrinsic::vector_reduce_mul:
2552 case Intrinsic::vector_reduce_and:
2553 case Intrinsic::vector_reduce_or:
2554 case Intrinsic::vector_reduce_xor:
2555 case Intrinsic::vector_reduce_smin:
2556 case Intrinsic::vector_reduce_smax:
2557 case Intrinsic::vector_reduce_umin:
2558 case Intrinsic::vector_reduce_umax:
2567 std::queue<Value *> Worklist;
2570 if (
auto *
Op = dyn_cast<Instruction>(
I.getOperand(0)))
2573 while (!Worklist.empty()) {
2574 Value *CV = Worklist.front();
2585 if (
auto *CI = dyn_cast<Instruction>(CV)) {
2586 if (CI->isBinaryOp()) {
2587 for (
auto *
Op : CI->operand_values())
2590 }
else if (
auto *SV = dyn_cast<ShuffleVectorInst>(CI)) {
2591 if (Shuffle && Shuffle != SV)
2608 for (
auto *V : Visited)
2609 for (
auto *U :
V->users())
2610 if (!Visited.contains(U) && U != &
I)
2614 dyn_cast<FixedVectorType>(
II->getOperand(0)->getType());
2619 if (!ShuffleInputType)
2627 sort(ConcatMask, [](
int X,
int Y) {
return (
unsigned)
X < (
unsigned)
Y; });
2631 bool IsTruncatingShuffle =
VecType->getNumElements() < NumInputElts;
2632 bool UsesSecondVec =
2633 any_of(ConcatMask, [&](
int M) {
return M >= (int)NumInputElts; });
2636 (UsesSecondVec && !IsTruncatingShuffle) ? VecType : ShuffleInputType;
2642 VecTyForCost, ConcatMask,
CostKind);
2644 LLVM_DEBUG(
dbgs() <<
"Found a reduction feeding from a shuffle: " << *Shuffle
2646 LLVM_DEBUG(
dbgs() <<
" OldCost: " << OldCost <<
" vs NewCost: " << NewCost
2648 if (NewCost < OldCost) {
2652 LLVM_DEBUG(
dbgs() <<
"Created new shuffle: " << *NewShuffle <<
"\n");
2653 replaceValue(*Shuffle, *NewShuffle);
2658 return foldSelectShuffle(*Shuffle,
true);
2665bool VectorCombine::foldCastFromReductions(
Instruction &
I) {
2666 auto *
II = dyn_cast<IntrinsicInst>(&
I);
2670 bool TruncOnly =
false;
2673 case Intrinsic::vector_reduce_add:
2674 case Intrinsic::vector_reduce_mul:
2677 case Intrinsic::vector_reduce_and:
2678 case Intrinsic::vector_reduce_or:
2679 case Intrinsic::vector_reduce_xor:
2686 Value *ReductionSrc =
I.getOperand(0);
2696 auto *SrcTy = cast<VectorType>(Src->getType());
2697 auto *ReductionSrcTy = cast<VectorType>(ReductionSrc->
getType());
2698 Type *ResultTy =
I.getType();
2701 ReductionOpc, ReductionSrcTy, std::nullopt,
CostKind);
2704 cast<CastInst>(ReductionSrc));
2711 if (OldCost <= NewCost || !NewCost.
isValid())
2715 II->getIntrinsicID(), {Src});
2717 replaceValue(
I, *NewCast);
2731bool VectorCombine::foldSelectShuffle(
Instruction &
I,
bool FromReduction) {
2732 auto *SVI = cast<ShuffleVectorInst>(&
I);
2733 auto *VT = cast<FixedVectorType>(
I.getType());
2734 auto *Op0 = dyn_cast<Instruction>(SVI->getOperand(0));
2735 auto *Op1 = dyn_cast<Instruction>(SVI->getOperand(1));
2736 if (!Op0 || !Op1 || Op0 == Op1 || !Op0->isBinaryOp() || !Op1->isBinaryOp() ||
2740 auto *SVI0A = dyn_cast<Instruction>(Op0->getOperand(0));
2741 auto *SVI0B = dyn_cast<Instruction>(Op0->getOperand(1));
2742 auto *SVI1A = dyn_cast<Instruction>(Op1->getOperand(0));
2743 auto *SVI1B = dyn_cast<Instruction>(Op1->getOperand(1));
2746 if (!
I ||
I->getOperand(0)->getType() != VT)
2749 return U != Op0 && U != Op1 &&
2750 !(isa<ShuffleVectorInst>(U) &&
2751 (InputShuffles.contains(cast<Instruction>(U)) ||
2752 isInstructionTriviallyDead(cast<Instruction>(U))));
2755 if (checkSVNonOpUses(SVI0A) || checkSVNonOpUses(SVI0B) ||
2756 checkSVNonOpUses(SVI1A) || checkSVNonOpUses(SVI1B))
2764 for (
auto *U :
I->users()) {
2765 auto *SV = dyn_cast<ShuffleVectorInst>(U);
2766 if (!SV || SV->getType() != VT)
2768 if ((SV->getOperand(0) != Op0 && SV->getOperand(0) != Op1) ||
2769 (SV->getOperand(1) != Op0 && SV->getOperand(1) != Op1))
2776 if (!collectShuffles(Op0) || !collectShuffles(Op1))
2780 if (FromReduction && Shuffles.
size() > 1)
2785 if (!FromReduction) {
2787 for (
auto *U : SV->users()) {
2790 Shuffles.push_back(SSV);
2802 int MaxV1Elt = 0, MaxV2Elt = 0;
2803 unsigned NumElts = VT->getNumElements();
2806 SVN->getShuffleMask(Mask);
2810 Value *SVOp0 = SVN->getOperand(0);
2811 Value *SVOp1 = SVN->getOperand(1);
2812 if (isa<UndefValue>(SVOp1)) {
2813 auto *SSV = cast<ShuffleVectorInst>(SVOp0);
2816 for (
unsigned I = 0, E =
Mask.size();
I != E;
I++) {
2822 if (SVOp0 == Op1 && SVOp1 == Op0) {
2826 if (SVOp0 != Op0 || SVOp1 != Op1)
2833 for (
unsigned I = 0;
I <
Mask.size();
I++) {
2836 }
else if (Mask[
I] <
static_cast<int>(NumElts)) {
2837 MaxV1Elt = std::max(MaxV1Elt, Mask[
I]);
2838 auto It =
find_if(V1, [&](
const std::pair<int, int> &
A) {
2839 return Mask[
I] ==
A.first;
2848 MaxV2Elt = std::max<int>(MaxV2Elt, Mask[
I] - NumElts);
2849 auto It =
find_if(V2, [&](
const std::pair<int, int> &
A) {
2850 return Mask[
I] -
static_cast<int>(NumElts) ==
A.first;
2853 ReconstructMask.
push_back(NumElts + It -
V2.begin());
2856 V2.emplace_back(Mask[
I] - NumElts, NumElts +
V2.size());
2864 sort(ReconstructMask);
2865 OrigReconstructMasks.
push_back(std::move(ReconstructMask));
2872 if (V1.
empty() ||
V2.empty() ||
2873 (MaxV1Elt ==
static_cast<int>(V1.
size()) - 1 &&
2874 MaxV2Elt ==
static_cast<int>(
V2.size()) - 1))
2881 auto *SV = dyn_cast<ShuffleVectorInst>(
I);
2884 if (isa<UndefValue>(SV->getOperand(1)))
2885 if (
auto *SSV = dyn_cast<ShuffleVectorInst>(SV->getOperand(0)))
2886 if (InputShuffles.contains(SSV))
2888 return SV->getMaskValue(M);
2896 std::pair<int, int>
Y) {
2897 int MXA = GetBaseMaskValue(
A,
X.first);
2898 int MYA = GetBaseMaskValue(
A,
Y.first);
2901 stable_sort(V1, [&](std::pair<int, int>
A, std::pair<int, int>
B) {
2902 return SortBase(SVI0A,
A,
B);
2904 stable_sort(V2, [&](std::pair<int, int>
A, std::pair<int, int>
B) {
2905 return SortBase(SVI1A,
A,
B);
2910 for (
const auto &Mask : OrigReconstructMasks) {
2912 for (
int M : Mask) {
2914 auto It =
find_if(V, [M](
auto A) {
return A.second ==
M; });
2915 assert(It !=
V.end() &&
"Expected all entries in Mask");
2916 return std::distance(
V.begin(), It);
2920 else if (M <
static_cast<int>(NumElts)) {
2921 ReconstructMask.
push_back(FindIndex(V1, M));
2923 ReconstructMask.
push_back(NumElts + FindIndex(V2, M));
2926 ReconstructMasks.push_back(std::move(ReconstructMask));
2932 for (
unsigned I = 0;
I < V1.
size();
I++) {
2933 V1A.
push_back(GetBaseMaskValue(SVI0A, V1[
I].first));
2934 V1B.
push_back(GetBaseMaskValue(SVI0B, V1[
I].first));
2936 for (
unsigned I = 0;
I <
V2.size();
I++) {
2937 V2A.
push_back(GetBaseMaskValue(SVI1A, V2[
I].first));
2938 V2B.
push_back(GetBaseMaskValue(SVI1B, V2[
I].first));
2940 while (V1A.
size() < NumElts) {
2944 while (V2A.
size() < NumElts) {
2950 auto *SV = dyn_cast<ShuffleVectorInst>(
I);
2956 VT, SV->getShuffleMask(),
CostKind);
2967 CostBefore += std::accumulate(Shuffles.begin(), Shuffles.end(),
2969 CostBefore += std::accumulate(InputShuffles.begin(), InputShuffles.end(),
2981 CostAfter += std::accumulate(ReconstructMasks.begin(), ReconstructMasks.end(),
2983 std::set<SmallVector<int>> OutputShuffleMasks({V1A, V1B, V2A, V2B});
2985 std::accumulate(OutputShuffleMasks.begin(), OutputShuffleMasks.end(),
2988 LLVM_DEBUG(
dbgs() <<
"Found a binop select shuffle pattern: " <<
I <<
"\n");
2990 <<
" vs CostAfter: " << CostAfter <<
"\n");
2991 if (CostBefore <= CostAfter)
2996 auto *SV = dyn_cast<ShuffleVectorInst>(
I);
2999 if (isa<UndefValue>(SV->getOperand(1)))
3000 if (
auto *SSV = dyn_cast<ShuffleVectorInst>(SV->getOperand(0)))
3001 if (InputShuffles.contains(SSV))
3003 return SV->getOperand(
Op);
3007 GetShuffleOperand(SVI0A, 1), V1A);
3010 GetShuffleOperand(SVI0B, 1), V1B);
3013 GetShuffleOperand(SVI1A, 1), V2A);
3016 GetShuffleOperand(SVI1B, 1), V2B);
3020 if (
auto *
I = dyn_cast<Instruction>(NOp0))
3021 I->copyIRFlags(Op0,
true);
3025 if (
auto *
I = dyn_cast<Instruction>(NOp1))
3026 I->copyIRFlags(Op1,
true);
3028 for (
int S = 0, E = ReconstructMasks.size(); S != E; S++) {
3031 replaceValue(*Shuffles[S], *NSV);
3034 Worklist.pushValue(NSV0A);
3035 Worklist.pushValue(NSV0B);
3036 Worklist.pushValue(NSV1A);
3037 Worklist.pushValue(NSV1B);
3038 for (
auto *S : Shuffles)
3050 Value *ZExted, *OtherOperand;
3056 Value *ZExtOperand =
I.getOperand(
I.getOperand(0) == OtherOperand ? 1 : 0);
3058 auto *BigTy = cast<FixedVectorType>(
I.getType());
3059 auto *SmallTy = cast<FixedVectorType>(ZExted->
getType());
3060 unsigned BW = SmallTy->getElementType()->getPrimitiveSizeInBits();
3062 if (
I.getOpcode() == Instruction::LShr) {
3079 Instruction::ZExt, BigTy, SmallTy,
3080 TargetTransformInfo::CastContextHint::None,
CostKind);
3086 auto *UI = cast<Instruction>(U);
3092 ShrinkCost += ZExtCost;
3107 ShrinkCost += ZExtCost;
3112 if (!isa<Constant>(OtherOperand))
3114 Instruction::Trunc, SmallTy, BigTy,
3115 TargetTransformInfo::CastContextHint::None,
CostKind);
3120 if (ShrinkCost > CurrentCost)
3124 Value *Op0 = ZExted;
3127 if (
I.getOperand(0) == OtherOperand)
3131 cast<Instruction>(NewBinOp)->copyIRFlags(&
I);
3132 cast<Instruction>(NewBinOp)->copyMetadata(
I);
3134 replaceValue(
I, *NewZExtr);
3140bool VectorCombine::foldInsExtVectorToShuffle(
Instruction &
I) {
3141 Value *DstVec, *SrcVec;
3149 auto *VecTy = dyn_cast<FixedVectorType>(
I.getType());
3150 if (!VecTy || SrcVec->
getType() != VecTy)
3153 unsigned NumElts = VecTy->getNumElements();
3154 if (ExtIdx >= NumElts || InsIdx >= NumElts)
3160 if (isa<PoisonValue>(DstVec) && !isa<UndefValue>(SrcVec)) {
3162 Mask[InsIdx] = ExtIdx;
3166 std::iota(
Mask.begin(),
Mask.end(), 0);
3167 Mask[InsIdx] = ExtIdx + NumElts;
3171 auto *
Ins = cast<InsertElementInst>(&
I);
3172 auto *
Ext = cast<ExtractElementInst>(
I.getOperand(1));
3185 if (!
Ext->hasOneUse())
3188 LLVM_DEBUG(
dbgs() <<
"Found a insert/extract shuffle-like pair: " <<
I
3189 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
3192 if (OldCost < NewCost)
3196 if (isa<UndefValue>(DstVec) && !isa<UndefValue>(SrcVec)) {
3202 replaceValue(
I, *Shuf);
3209bool VectorCombine::run() {
3219 bool MadeChange =
false;
3222 bool IsVectorType = isa<VectorType>(
I.getType());
3223 bool IsFixedVectorType = isa<FixedVectorType>(
I.getType());
3224 auto Opcode =
I.getOpcode();
3232 if (IsFixedVectorType) {
3234 case Instruction::InsertElement:
3235 MadeChange |= vectorizeLoadInsert(
I);
3237 case Instruction::ShuffleVector:
3238 MadeChange |= widenSubvectorLoad(
I);
3248 MadeChange |= scalarizeBinopOrCmp(
I);
3249 MadeChange |= scalarizeLoadExtract(
I);
3250 MadeChange |= scalarizeVPIntrinsic(
I);
3253 if (Opcode == Instruction::Store)
3254 MadeChange |= foldSingleElementStore(
I);
3257 if (TryEarlyFoldsOnly)
3264 if (IsFixedVectorType) {
3266 case Instruction::InsertElement:
3267 MadeChange |= foldInsExtFNeg(
I);
3268 MadeChange |= foldInsExtBinop(
I);
3269 MadeChange |= foldInsExtVectorToShuffle(
I);
3271 case Instruction::ShuffleVector:
3272 MadeChange |= foldPermuteOfBinops(
I);
3273 MadeChange |= foldShuffleOfBinops(
I);
3274 MadeChange |= foldShuffleOfCastops(
I);
3275 MadeChange |= foldShuffleOfShuffles(
I);
3276 MadeChange |= foldShuffleOfIntrinsics(
I);
3277 MadeChange |= foldSelectShuffle(
I);
3278 MadeChange |= foldShuffleToIdentity(
I);
3280 case Instruction::BitCast:
3281 MadeChange |= foldBitcastShuffle(
I);
3284 MadeChange |= shrinkType(
I);
3289 case Instruction::Call:
3290 MadeChange |= foldShuffleFromReductions(
I);
3291 MadeChange |= foldCastFromReductions(
I);
3293 case Instruction::ICmp:
3294 case Instruction::FCmp:
3295 MadeChange |= foldExtractExtract(
I);
3297 case Instruction::Or:
3298 MadeChange |= foldConcatOfBoolMasks(
I);
3302 MadeChange |= foldExtractExtract(
I);
3303 MadeChange |= foldExtractedCmps(
I);
3316 if (
I.isDebugOrPseudoInst())
3322 while (!Worklist.isEmpty()) {
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This is the interface for LLVM's primary stateless and local alias analysis.
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static cl::opt< TargetTransformInfo::TargetCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(TargetTransformInfo::TCK_RecipThroughput), cl::values(clEnumValN(TargetTransformInfo::TCK_RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(TargetTransformInfo::TCK_Latency, "latency", "Instruction latency"), clEnumValN(TargetTransformInfo::TCK_CodeSize, "code-size", "Code size"), clEnumValN(TargetTransformInfo::TCK_SizeAndLatency, "size-latency", "Code size and latency")))
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
This file defines the DenseMap class.
std::optional< std::vector< StOtherPiece > > Other
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
This is the interface for a simple mod/ref and alias analysis over globals.
static void eraseInstruction(Instruction &I, ICFLoopSafetyInfo &SafetyInfo, MemorySSAUpdater &MSSAU)
uint64_t IntrinsicInst * II
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
FunctionAnalysisManager FAM
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file defines the make_scope_exit function, which executes user-defined cleanup logic at scope ex...
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
static SymbolRef::Type getType(const Symbol *Sym)
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
static bool isFreeConcat(ArrayRef< InstLane > Item, TTI::TargetCostKind CostKind, const TargetTransformInfo &TTI)
Detect concat of multiple values into a vector.
static SmallVector< InstLane > generateInstLaneVectorFromOperand(ArrayRef< InstLane > Item, int Op)
static Value * createShiftShuffle(Value *Vec, unsigned OldIndex, unsigned NewIndex, IRBuilder<> &Builder)
Create a shuffle that translates (shifts) 1 element from the input vector to a new element location.
static Value * peekThroughBitcasts(Value *V)
Return the source operand of a potentially bitcasted value.
static Align computeAlignmentAfterScalarization(Align VectorAlignment, Type *ScalarType, Value *Idx, const DataLayout &DL)
The memory operation on a vector of ScalarType had alignment of VectorAlignment.
static ScalarizationResult canScalarizeAccess(VectorType *VecTy, Value *Idx, Instruction *CtxI, AssumptionCache &AC, const DominatorTree &DT)
Check if it is legal to scalarize a memory access to VecTy at index Idx.
static cl::opt< bool > DisableVectorCombine("disable-vector-combine", cl::init(false), cl::Hidden, cl::desc("Disable all vector combine transforms"))
static InstLane lookThroughShuffles(Use *U, int Lane)
static bool canWidenLoad(LoadInst *Load, const TargetTransformInfo &TTI)
static const unsigned InvalidIndex
static Value * generateNewInstTree(ArrayRef< InstLane > Item, FixedVectorType *Ty, const SmallPtrSet< Use *, 4 > &IdentityLeafs, const SmallPtrSet< Use *, 4 > &SplatLeafs, const SmallPtrSet< Use *, 4 > &ConcatLeafs, IRBuilder<> &Builder, const TargetTransformInfo *TTI)
std::pair< Use *, int > InstLane
static cl::opt< unsigned > MaxInstrsToScan("vector-combine-max-scan-instrs", cl::init(30), cl::Hidden, cl::desc("Max number of instructions to scan for vector combining."))
static cl::opt< bool > DisableBinopExtractShuffle("disable-binop-extract-shuffle", cl::init(false), cl::Hidden, cl::desc("Disable binop extract to shuffle transforms"))
static bool isMemModifiedBetween(BasicBlock::iterator Begin, BasicBlock::iterator End, const MemoryLocation &Loc, AAResults &AA)
static ExtractElementInst * translateExtract(ExtractElementInst *ExtElt, unsigned NewIndex, IRBuilder<> &Builder)
Given an extract element instruction with constant index operand, shuffle the source vector (shift th...
static constexpr int Concat[]
A manager for alias analyses.
ModRefInfo getModRefInfo(const Instruction *I, const std::optional< MemoryLocation > &OptLoc)
Check whether or not an instruction may read or write the optionally specified memory location.
Class for arbitrary precision integers.
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
A container for analyses that lazily runs them and caches their results.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
const T & front() const
front - Get the first element.
size_t size() const
size - Get the array size.
A function analysis which provides an AssumptionCache.
A cache of @llvm.assume calls within a function.
bool hasFnAttr(Attribute::AttrKind Kind) const
Return true if the attribute exists for the function.
LLVM Basic Block Representation.
InstListType::iterator iterator
Instruction iterators...
BinaryOps getOpcode() const
Represents analyses that only rely on functions' control flow.
Value * getArgOperand(unsigned i) const
iterator_range< User::op_iterator > args()
Iteration adapter for range-for loops.
static Type * makeCmpResultType(Type *opnd_type)
Create a result type for fcmp/icmp.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
bool isFPPredicate() const
An abstraction over a floating-point predicate, and a pack of an integer predicate with samesign info...
static std::optional< CmpPredicate > getMatching(CmpPredicate A, CmpPredicate B)
Compares two CmpPredicates taking samesign into account and returns the canonicalized CmpPredicate if...
static Constant * getExtractElement(Constant *Vec, Constant *Idx, Type *OnlyIfReducedTy=nullptr)
This is the shared class of boolean and integer constants.
const APInt & getValue() const
Return the constant as an APInt value reference.
This class represents a range of values.
ConstantRange urem(const ConstantRange &Other) const
Return a new range representing the possible values resulting from an unsigned remainder operation of...
ConstantRange binaryAnd(const ConstantRange &Other) const
Return a new range representing the possible values resulting from a binary-and of a value in this ra...
bool contains(const APInt &Val) const
Return true if the specified value is in the set.
static Constant * get(ArrayRef< Constant * > V)
This is an important base class in LLVM.
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
iterator find(const_arg_type_t< KeyT > Val)
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&...Args)
Analysis pass which computes a DominatorTree.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
bool isReachableFromEntry(const Use &U) const
Provide an overload for a Use.
This class represents a cast from floating point to signed integer.
This class represents a cast from floating point to unsigned integer.
Class to represent fixed width SIMD vectors.
unsigned getNumElements() const
static FixedVectorType * getDoubleElementsVectorType(FixedVectorType *VTy)
static FixedVectorType * get(Type *ElementType, unsigned NumElts)
Value * CreateInsertElement(Type *VecTy, Value *NewElt, Value *Idx, const Twine &Name="")
Value * CreateExtractElement(Value *Vec, Value *Idx, const Twine &Name="")
LoadInst * CreateAlignedLoad(Type *Ty, Value *Ptr, MaybeAlign Align, const char *Name)
Value * CreateVectorSplat(unsigned NumElts, Value *V, const Twine &Name="")
Return a vector value that contains.
Value * CreateSelect(Value *C, Value *True, Value *False, const Twine &Name="", Instruction *MDFrom=nullptr)
Value * CreateFreeze(Value *V, const Twine &Name="")
Value * CreateCast(Instruction::CastOps Op, Value *V, Type *DestTy, const Twine &Name="", MDNode *FPMathTag=nullptr, FMFSource FMFSource={})
Value * CreateInBoundsGEP(Type *Ty, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &Name="")
Value * CreatePointerBitCastOrAddrSpaceCast(Value *V, Type *DestTy, const Twine &Name="")
ConstantInt * getInt64(uint64_t C)
Get a constant 64-bit value.
Value * CreateUnOp(Instruction::UnaryOps Opc, Value *V, const Twine &Name="", MDNode *FPMathTag=nullptr)
CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > Types, ArrayRef< Value * > Args, FMFSource FMFSource={}, const Twine &Name="")
Create a call to intrinsic ID with Args, mangled using Types.
ConstantInt * getInt32(uint32_t C)
Get a constant 32-bit value.
Value * CreateCmp(CmpInst::Predicate Pred, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
LoadInst * CreateLoad(Type *Ty, Value *Ptr, const char *Name)
Provided to resolve 'CreateLoad(Ty, Ptr, "...")' correctly, instead of converting the string to 'bool...
Value * CreateShl(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)
Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")
StoreInst * CreateStore(Value *Val, Value *Ptr, bool isVolatile=false)
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="", bool IsNUW=false, bool IsNSW=false)
PointerType * getPtrTy(unsigned AddrSpace=0)
Fetch the type representing a pointer.
Value * CreateBinOp(Instruction::BinaryOps Opc, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Value * CreateFNegFMF(Value *V, FMFSource FMFSource, const Twine &Name="", MDNode *FPMathTag=nullptr)
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
InstructionWorklist - This is the worklist management logic for InstCombine and other simplification ...
void pushUsersToWorkList(Instruction &I)
When an instruction is simplified, add all users of the instruction to the work lists because they mi...
void push(Instruction *I)
Push the instruction onto the worklist stack.
void remove(Instruction *I)
Remove I from the worklist if it exists.
void copyIRFlags(const Value *V, bool IncludeWrapFlags=true)
Convenience method to copy supported exact, fast-math, and (optionally) wrapping flags from V to this...
bool comesBefore(const Instruction *Other) const
Given an instruction Other in the same basic block as this instruction, return true if this instructi...
bool mayReadFromMemory() const LLVM_READONLY
Return true if this instruction may read memory.
void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
An instruction for reading from memory.
Representation for a specific memory location.
static MemoryLocation get(const LoadInst *LI)
Return a location with information about the memory reference by the given instruction.
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
void preserveSet()
Mark an analysis set as preserved.
This class represents a sign extension of integer types.
This class represents a cast from signed integer to floating point.
This instruction constructs a fixed permutation of two input vectors.
int getMaskValue(unsigned Elt) const
Return the shuffle mask value of this instruction for the given element index.
VectorType * getType() const
Overload to return most specific vector type.
static void getShuffleMask(const Constant *Mask, SmallVectorImpl< int > &Result)
Convert the input shuffle mask operand to a vector of integers.
static bool isIdentityMask(ArrayRef< int > Mask, int NumSrcElts)
Return true if this shuffle mask chooses elements from exactly one source vector without lane crossin...
static void commuteShuffleMask(MutableArrayRef< int > Mask, unsigned InVecNumElts)
Change values in a shuffle permute mask assuming the two vector operands of length InVecNumElts have ...
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
bool contains(ConstPtrType Ptr) const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
void assign(size_type NumElts, ValueParamT Elt)
reference emplace_back(ArgTypes &&... Args)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
void setAlignment(Align Align)
Analysis pass providing the TargetTransformInfo.
This class represents a truncation of integer types.
The instances of the Type class are immutable: once they are created, they are never changed.
bool isVectorTy() const
True if this is an instance of VectorType.
bool isPointerTy() const
True if this is an instance of PointerType.
static IntegerType * getIntNTy(LLVMContext &C, unsigned N)
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
bool isIntegerTy() const
True if this is an instance of IntegerType.
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
This class represents a cast unsigned integer to floating point.
A Use represents the edge between a Value definition and its users.
Value * getOperand(unsigned i) const
static bool isVPBinOp(Intrinsic::ID ID)
This is the common base class for vector predication intrinsics.
std::optional< unsigned > getFunctionalIntrinsicID() const
std::optional< unsigned > getFunctionalOpcode() const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
const Value * stripAndAccumulateInBoundsConstantOffsets(const DataLayout &DL, APInt &Offset) const
This is a wrapper around stripAndAccumulateConstantOffsets with the in-bounds requirement set to fals...
bool hasOneUse() const
Return true if there is exactly one use of this value.
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
iterator_range< user_iterator > users()
Align getPointerAlignment(const DataLayout &DL) const
Returns an alignment of the pointer value.
unsigned getValueID() const
Return an ID for the concrete type of this object.
bool hasNUses(unsigned N) const
Return true if this Value has exactly N uses.
StringRef getName() const
Return a constant reference to the value's name.
PreservedAnalyses run(Function &F, FunctionAnalysisManager &)
This class represents zero extension of integer types.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
constexpr char Attrs[]
Key for Kernel::Metadata::mAttrs.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
@ C
The default llvm calling convention, compatible with C.
AttributeList getAttributes(LLVMContext &C, ID id)
Return the attributes for an intrinsic.
class_match< PoisonValue > m_Poison()
Match an arbitrary poison constant.
BinaryOp_match< LHS, RHS, Instruction::And > m_And(const LHS &L, const RHS &R)
class_match< BinaryOperator > m_BinOp()
Match an arbitrary binary operation and ignore it.
BinaryOp_match< LHS, RHS, Instruction::URem > m_URem(const LHS &L, const RHS &R)
class_match< Constant > m_Constant()
Match an arbitrary Constant and ignore it.
CastInst_match< OpTy, TruncInst > m_Trunc(const OpTy &Op)
Matches Trunc.
specific_intval< false > m_SpecificInt(const APInt &V)
Match a specific integer value or vector with all elements equal to the value.
bool match(Val *V, const Pattern &P)
bind_ty< Instruction > m_Instruction(Instruction *&I)
Match an instruction, capturing it if we match.
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
DisjointOr_match< LHS, RHS > m_DisjointOr(const LHS &L, const RHS &R)
TwoOps_match< Val_t, Idx_t, Instruction::ExtractElement > m_ExtractElt(const Val_t &Val, const Idx_t &Idx)
Matches ExtractElementInst.
class_match< ConstantInt > m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
match_combine_and< LTy, RTy > m_CombineAnd(const LTy &L, const RTy &R)
Combine two pattern matchers matching L && R.
cst_pred_ty< is_zero_int > m_ZeroInt()
Match an integer 0 or a vector with all elements equal to 0.
OneUse_match< T > m_OneUse(const T &SubPattern)
TwoOps_match< V1_t, V2_t, Instruction::ShuffleVector > m_Shuffle(const V1_t &v1, const V2_t &v2)
Matches ShuffleVectorInst independently of mask value.
OneOps_match< OpTy, Instruction::Load > m_Load(const OpTy &Op)
Matches LoadInst.
CastInst_match< OpTy, ZExtInst > m_ZExt(const OpTy &Op)
Matches ZExt.
BinOpPred_match< LHS, RHS, is_bitwiselogic_op, true > m_c_BitwiseLogic(const LHS &L, const RHS &R)
Matches bitwise logic operations in either order.
class_match< CmpInst > m_Cmp()
Matches any compare instruction and ignore it.
CastOperator_match< OpTy, Instruction::BitCast > m_BitCast(const OpTy &Op)
Matches BitCast.
match_combine_or< CastInst_match< OpTy, SExtInst >, NNegZExt_match< OpTy > > m_SExtLike(const OpTy &Op)
Match either "sext" or "zext nneg".
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
BinaryOp_match< LHS, RHS, Instruction::LShr > m_LShr(const LHS &L, const RHS &R)
match_combine_or< CastInst_match< OpTy, ZExtInst >, CastInst_match< OpTy, SExtInst > > m_ZExtOrSExt(const OpTy &Op)
FNeg_match< OpTy > m_FNeg(const OpTy &X)
Match 'fneg X' as 'fsub -0.0, X'.
BinaryOp_match< LHS, RHS, Instruction::Shl > m_Shl(const LHS &L, const RHS &R)
auto m_Undef()
Match an arbitrary undef constant.
ThreeOps_match< Val_t, Elt_t, Idx_t, Instruction::InsertElement > m_InsertElt(const Val_t &Val, const Elt_t &Elt, const Idx_t &Idx)
Matches InsertElementInst.
initializer< Ty > init(const Ty &Val)
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
void stable_sort(R &&Range)
UnaryFunction for_each(R &&Range, UnaryFunction F)
Provide wrappers to std::for_each which take ranges instead of having to pass begin/end explicitly.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
detail::scope_exit< std::decay_t< Callable > > make_scope_exit(Callable &&F)
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
unsigned getArithmeticReductionInstruction(Intrinsic::ID RdxID)
Returns the arithmetic instruction opcode used when expanding a reduction.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
bool mustSuppressSpeculation(const LoadInst &LI)
Return true if speculation of the given load must be suppressed to avoid ordering or interfering with...
bool widenShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Try to transform a shuffle mask by replacing elements with the scaled index for an equivalent mask of...
Value * getSplatValue(const Value *V)
Get splat value if the input is a splat vector or return nullptr.
ConstantRange computeConstantRange(const Value *V, bool ForSigned, bool UseInstrInfo=true, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Determine the possible constant range of an integer or vector of integer value.
bool isSafeToSpeculativelyExecuteWithOpcode(unsigned Opcode, const Instruction *Inst, const Instruction *CtxI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr, bool UseVariableInfo=true)
This returns the same result as isSafeToSpeculativelyExecute if Opcode is the actual opcode of Inst.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
bool isInstructionTriviallyDead(Instruction *I, const TargetLibraryInfo *TLI=nullptr)
Return true if the result produced by the instruction is not used, and the instruction will return.
bool isSplatValue(const Value *V, int Index=-1, unsigned Depth=0)
Return true if each element of the vector value V is poisoned or equal to every other non-poisoned el...
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
bool isModSet(const ModRefInfo MRI)
void sort(IteratorTy Start, IteratorTy End)
bool isSafeToLoadUnconditionally(Value *V, Align Alignment, const APInt &Size, const DataLayout &DL, Instruction *ScanFrom, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr)
Return true if we know that executing a load from this value cannot trap.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
bool isSafeToSpeculativelyExecute(const Instruction *I, const Instruction *CtxI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr, bool UseVariableInfo=true)
Return true if the instruction does not have any effects besides calculating the result and does not ...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
void propagateIRFlags(Value *I, ArrayRef< Value * > VL, Value *OpValue=nullptr, bool IncludeWrapFlags=true)
Get the intersection (logical and) of all of the potential IR flags of each scalar operation (VL) tha...
bool isKnownNonZero(const Value *V, const SimplifyQuery &Q, unsigned Depth=0)
Return true if the given value is known to be non-zero when defined.
constexpr int PoisonMaskElem
void narrowShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Replace each shuffle mask index with the scaled sequential indices for an equivalent mask of narrowed...
bool isVectorIntrinsicWithScalarOpAtArg(Intrinsic::ID ID, unsigned ScalarOpdIdx, const TargetTransformInfo *TTI)
Identifies if the vector form of the intrinsic has a scalar operand.
void computeKnownBits(const Value *V, KnownBits &Known, const DataLayout &DL, unsigned Depth=0, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true)
Determine which bits of V are known to be either zero or one and return them in the KnownZero/KnownOn...
DWARFExpression::Operation Op
constexpr unsigned BitWidth
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
bool isGuaranteedNotToBePoison(const Value *V, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Returns true if V cannot be poison, but may be undef.
bool isTriviallyVectorizable(Intrinsic::ID ID)
Identify if the intrinsic is trivially vectorizable.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
This struct is a compact representation of a valid (non-zero power of two) alignment.
unsigned countMaxActiveBits() const
Returns the maximum number of bits needed to represent all possible unsigned values with these known ...
APInt getMaxValue() const
Return the maximal unsigned value possible given these KnownBits.