68#define DEBUG_TYPE "x86-isel"
71 "x86-experimental-pref-innermost-loop-alignment",
cl::init(4),
73 "Sets the preferable loop alignment for experiments (as log2 bytes) "
74 "for innermost loops only. If specified, this option overrides "
75 "alignment set by x86-experimental-pref-loop-alignment."),
79 "x86-br-merging-base-cost",
cl::init(2),
81 "Sets the cost threshold for when multiple conditionals will be merged "
82 "into one branch versus be split in multiple branches. Merging "
83 "conditionals saves branches at the cost of additional instructions. "
84 "This value sets the instruction cost limit, below which conditionals "
85 "will be merged, and above which conditionals will be split. Set to -1 "
86 "to never merge branches."),
90 "x86-br-merging-ccmp-bias",
cl::init(6),
91 cl::desc(
"Increases 'x86-br-merging-base-cost' in cases that the target "
92 "supports conditional compare instructions."),
97 cl::desc(
"Replace narrow shifts with wider shifts."),
101 "x86-br-merging-likely-bias",
cl::init(0),
102 cl::desc(
"Increases 'x86-br-merging-base-cost' in cases that it is likely "
103 "that all conditionals will be executed. For example for merging "
104 "the conditionals (a == b && c > d), if its known that a == b is "
105 "likely, then it is likely that if the conditionals are split "
106 "both sides will be executed, so it may be desirable to increase "
107 "the instruction cost threshold. Set to -1 to never merge likely "
112 "x86-br-merging-unlikely-bias",
cl::init(-1),
114 "Decreases 'x86-br-merging-base-cost' in cases that it is unlikely "
115 "that all conditionals will be executed. For example for merging "
116 "the conditionals (a == b && c > d), if its known that a == b is "
117 "unlikely, then it is unlikely that if the conditionals are split "
118 "both sides will be executed, so it may be desirable to decrease "
119 "the instruction cost threshold. Set to -1 to never merge unlikely "
124 "mul-constant-optimization",
cl::init(
true),
125 cl::desc(
"Replace 'mul x, Const' with more effective instructions like "
132 bool UseX87 = !Subtarget.useSoftFloat() && Subtarget.hasX87();
149 if (Subtarget.isAtom())
151 else if (Subtarget.is64Bit())
160 if (Subtarget.hasSlowDivide32())
162 if (Subtarget.hasSlowDivide64() && Subtarget.is64Bit())
168 static const struct {
170 const char *
const Name;
180 for (
const auto &LC : LibraryCalls) {
201 if (Subtarget.is64Bit())
218 for (
auto VT : {MVT::f32, MVT::f64, MVT::f80}) {
227 if (Subtarget.is64Bit())
236 if (Subtarget.is64Bit())
244 if (Subtarget.is64Bit())
255 if (Subtarget.is64Bit())
259 if (!Subtarget.useSoftFloat()) {
323 if (!Subtarget.is64Bit()) {
332 for (
MVT VT : { MVT::i8, MVT::i16, MVT::i32 }) {
337 if (Subtarget.is64Bit()) {
343 if (Subtarget.hasAVX10_2()) {
346 for (
MVT VT : {MVT::i32, MVT::v4i32, MVT::v8i32, MVT::v16i32, MVT::v2i64,
351 if (Subtarget.hasAVX10_2_512()) {
355 if (Subtarget.is64Bit()) {
372 if (Subtarget.is64Bit()) {
377 }
else if (!Subtarget.is64Bit())
390 for (
auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {
401 for (
auto VT : { MVT::f32, MVT::f64, MVT::f80, MVT::f128,
402 MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {
406 if (Subtarget.is64Bit())
417 if (!Subtarget.useSoftFloat() && Subtarget.hasX87()) {
435 if (!Subtarget.hasBMI()) {
438 if (Subtarget.is64Bit()) {
444 if (Subtarget.hasLZCNT()) {
450 for (
auto VT : {MVT::i8, MVT::i16, MVT::i32, MVT::i64}) {
451 if (VT == MVT::i64 && !Subtarget.is64Bit())
465 (!Subtarget.useSoftFloat() && Subtarget.hasF16C()) ?
Custom :
Expand);
472 for (
auto VT : {MVT::f32, MVT::f64, MVT::f80, MVT::f128}) {
477 for (
MVT VT : {MVT::f32, MVT::f64, MVT::f80, MVT::f128}) {
490 if (Subtarget.is64Bit())
492 if (Subtarget.hasPOPCNT()) {
506 if (!Subtarget.hasMOVBE())
510 for (
auto VT : { MVT::f32, MVT::f64, MVT::f80, MVT::f128 }) {
516 for (
auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {
517 if (VT == MVT::i64 && !Subtarget.is64Bit())
537 for (
auto VT : { MVT::i32, MVT::i64 }) {
538 if (VT == MVT::i64 && !Subtarget.is64Bit())
549 for (
auto VT : { MVT::i32, MVT::i64 }) {
550 if (VT == MVT::i64 && !Subtarget.is64Bit())
563 for (
auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {
573 if (!Subtarget.is64Bit())
576 if (Subtarget.is64Bit() && Subtarget.
hasAVX()) {
609 bool Is64Bit = Subtarget.is64Bit();
663 if (!Subtarget.useSoftFloat() && Subtarget.
hasSSE2()) {
667 : &X86::FR16RegClass);
669 : &X86::FR32RegClass);
671 : &X86::FR64RegClass);
679 for (
auto VT : { MVT::f32, MVT::f64 }) {
700 setF16Action(MVT::f16,
Promote);
747 }
else if (!Subtarget.useSoftFloat() && Subtarget.
hasSSE1() &&
748 (UseX87 || Is64Bit)) {
786 for (
auto VT : { MVT::f32, MVT::f64 }) {
799 if (UseX87 && (
getRegClassFor(MVT::f32) == &X86::RFP32RegClass)) {
800 addLegalFPImmediate(
APFloat(+0.0f));
801 addLegalFPImmediate(
APFloat(+1.0f));
802 addLegalFPImmediate(
APFloat(-0.0f));
803 addLegalFPImmediate(
APFloat(-1.0f));
805 addLegalFPImmediate(
APFloat(+0.0f));
810 addLegalFPImmediate(
APFloat(+0.0));
811 addLegalFPImmediate(
APFloat(+1.0));
812 addLegalFPImmediate(
APFloat(-0.0));
813 addLegalFPImmediate(
APFloat(-1.0));
815 addLegalFPImmediate(
APFloat(+0.0));
846 addLegalFPImmediate(TmpFlt);
848 addLegalFPImmediate(TmpFlt);
854 addLegalFPImmediate(TmpFlt2);
856 addLegalFPImmediate(TmpFlt2);
904 if (!Subtarget.useSoftFloat() && Subtarget.is64Bit() && Subtarget.
hasSSE1()) {
906 : &X86::VR128RegClass);
983 for (
auto VT : { MVT::v8f16, MVT::v16f16, MVT::v32f16,
984 MVT::v4f32, MVT::v8f32, MVT::v16f32,
985 MVT::v2f64, MVT::v4f64, MVT::v8f64 }) {
1068 if (!Subtarget.useSoftFloat() && Subtarget.hasMMX()) {
1073 if (!Subtarget.useSoftFloat() && Subtarget.
hasSSE1()) {
1075 : &X86::VR128RegClass);
1103 if (!Subtarget.useSoftFloat() && Subtarget.
hasSSE2()) {
1105 : &X86::VR128RegClass);
1110 : &X86::VR128RegClass);
1112 : &X86::VR128RegClass);
1114 : &X86::VR128RegClass);
1116 : &X86::VR128RegClass);
1118 : &X86::VR128RegClass);
1120 for (
auto VT : { MVT::f64, MVT::v4f32, MVT::v2f64 }) {
1127 for (
auto VT : { MVT::v2i8, MVT::v4i8, MVT::v8i8,
1128 MVT::v2i16, MVT::v4i16, MVT::v2i32 }) {
1163 for (
auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
1186 for (
auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
1206 for (
auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32 }) {
1214 for (
auto VT : { MVT::v8f16, MVT::v2f64, MVT::v2i64 }) {
1219 if (VT == MVT::v2i64 && !Subtarget.is64Bit())
1225 setF16Action(MVT::v8f16,
Expand);
1250 for (
auto VT : {MVT::v2i8, MVT::v4i8, MVT::v8i8, MVT::v2i16, MVT::v4i16}) {
1324 for (
auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
1328 if (VT == MVT::v2i64)
continue;
1342 if (Subtarget.hasGFNI()) {
1349 if (!Subtarget.useSoftFloat() && Subtarget.
hasSSSE3()) {
1354 for (
auto VT : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64}) {
1366 if (!Subtarget.useSoftFloat() && Subtarget.
hasSSE41()) {
1367 for (
MVT RoundedTy : {MVT::f32, MVT::f64, MVT::v4f32, MVT::v2f64}) {
1407 for (
auto VT : { MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
1422 if (Subtarget.is64Bit() && !Subtarget.
hasAVX512()) {
1434 if (!Subtarget.useSoftFloat() && Subtarget.
hasSSE42()) {
1438 if (!Subtarget.useSoftFloat() && Subtarget.hasXOP()) {
1439 for (
auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64,
1440 MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 }) {
1446 for (
auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 })
1450 if (!Subtarget.useSoftFloat() && Subtarget.
hasAVX()) {
1454 : &X86::VR256RegClass);
1456 : &X86::VR256RegClass);
1458 : &X86::VR256RegClass);
1460 : &X86::VR256RegClass);
1462 : &X86::VR256RegClass);
1464 : &X86::VR256RegClass);
1466 : &X86::VR256RegClass);
1468 for (
auto VT : { MVT::v8f32, MVT::v4f64 }) {
1532 for (
auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 }) {
1538 if (VT == MVT::v4i64)
continue;
1559 for (
auto VT : { MVT::v16i16, MVT::v8i32, MVT::v4i64 }) {
1570 for (
auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 }) {
1590 for (
auto VT : { MVT::f32, MVT::f64, MVT::v4f32, MVT::v8f32,
1591 MVT::v2f64, MVT::v4f64 }) {
1597 for (
auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 }) {
1638 for (
auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32 }) {
1646 for (
auto VT : {MVT::v16i16, MVT::v8i32, MVT::v4i64}) {
1668 for (
auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64,
1669 MVT::v4f32, MVT::v8f32, MVT::v2f64, MVT::v4f64 }) {
1676 for (
auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64,
1677 MVT::v8f16, MVT::v4f32, MVT::v2f64 }) {
1682 for (
MVT VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64,
1683 MVT::v16f16, MVT::v8f32, MVT::v4f64 }) {
1694 setF16Action(MVT::v16f16,
Expand);
1710 for (
auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64,
1711 MVT::v4f32, MVT::v8f32, MVT::v2f64, MVT::v4f64 })
1716 if (!Subtarget.useSoftFloat() && !Subtarget.hasFP16() &&
1717 Subtarget.hasF16C()) {
1718 for (
MVT VT : { MVT::f16, MVT::v2f16, MVT::v4f16, MVT::v8f16 }) {
1722 for (
MVT VT : { MVT::f32, MVT::v2f32, MVT::v4f32, MVT::v8f32 }) {
1737 if (!Subtarget.useSoftFloat() && Subtarget.
hasAVX512()) {
1765 if (!Subtarget.hasDQI()) {
1778 for (
auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
1784 for (
auto VT : { MVT::v1i1, MVT::v2i1, MVT::v4i1, MVT::v8i1, MVT::v16i1 })
1787 for (
auto VT : { MVT::v2i1, MVT::v4i1, MVT::v8i1, MVT::v16i1 }) {
1800 for (
auto VT : { MVT::v1i1, MVT::v2i1, MVT::v4i1, MVT::v8i1 })
1803 if (Subtarget.hasDQI() && Subtarget.hasVLX()) {
1804 for (
MVT VT : {MVT::v4f32, MVT::v8f32, MVT::v2f64, MVT::v4f64}) {
1813 if (!Subtarget.useSoftFloat() && Subtarget.
useAVX512Regs()) {
1814 bool HasBWI = Subtarget.hasBWI();
1834 for (
MVT VT : { MVT::v16f32, MVT::v8f64 }) {
1850 if (Subtarget.hasDQI())
1853 for (
MVT VT : { MVT::v16i1, MVT::v16i8 }) {
1860 for (
MVT VT : { MVT::v16i16, MVT::v16i32 }) {
1897 if (!Subtarget.hasVLX()) {
1898 for (
auto VT : {MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64,
1899 MVT::v4f32, MVT::v8f32, MVT::v2f64, MVT::v4f64}) {
1925 for (
auto VT : { MVT::v16f32, MVT::v8f64 }) {
1942 for (
auto VT : {MVT::v32i16, MVT::v16i32, MVT::v8i64}) {
1969 for (
auto VT : { MVT::v64i8, MVT::v32i16, MVT::v16i32, MVT::v8i64 }) {
1993 for (
auto VT : { MVT::v16i32, MVT::v8i64 }) {
2002 for (
auto VT : { MVT::v64i8, MVT::v32i16 }) {
2023 if (Subtarget.hasDQI()) {
2031 if (Subtarget.hasCDI()) {
2033 for (
auto VT : { MVT::v16i32, MVT::v8i64} ) {
2038 if (Subtarget.hasVPOPCNTDQ()) {
2039 for (
auto VT : { MVT::v16i32, MVT::v8i64 })
2046 for (
auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64,
2047 MVT::v16f16, MVT::v8f32, MVT::v4f64 })
2050 for (
auto VT : { MVT::v64i8, MVT::v32i16, MVT::v16i32, MVT::v8i64,
2051 MVT::v32f16, MVT::v16f32, MVT::v8f64 }) {
2062 setF16Action(MVT::v32f16,
Expand);
2071 for (
auto VT : { MVT::v16i32, MVT::v8i64, MVT::v16f32, MVT::v8f64 }) {
2078 for (
auto VT : { MVT::v64i8, MVT::v32i16 }) {
2087 if (Subtarget.hasVBMI2()) {
2088 for (
auto VT : {MVT::v32i16, MVT::v16i32, MVT::v8i64}) {
2102 if (!Subtarget.useSoftFloat() && Subtarget.hasVBMI2()) {
2103 for (
auto VT : {MVT::v8i16, MVT::v4i32, MVT::v2i64, MVT::v16i16, MVT::v8i32,
2113 if (!Subtarget.useSoftFloat() && Subtarget.
hasAVX512()) {
2121 if (Subtarget.hasDQI()) {
2126 "Unexpected operation action!");
2134 for (
auto VT : { MVT::v2i64, MVT::v4i64 }) {
2142 for (
auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64 }) {
2151 for (
auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64,
2152 MVT::v4f32, MVT::v8f32, MVT::v2f64, MVT::v4f64 })
2155 if (Subtarget.hasDQI()) {
2166 if (Subtarget.hasCDI()) {
2167 for (
auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64 }) {
2172 if (Subtarget.hasVPOPCNTDQ()) {
2173 for (
auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64 })
2180 for (
MVT VT : {MVT::v8i32, MVT::v8f32, MVT::v4i32, MVT::v4f32, MVT::v4i64,
2181 MVT::v4f64, MVT::v2i64, MVT::v2f64, MVT::v16i8, MVT::v8i16,
2182 MVT::v16i16, MVT::v8i8})
2187 for (
MVT VT : {MVT::v16i32, MVT::v16f32, MVT::v8i64, MVT::v8f64})
2191 if (Subtarget.hasVLX())
2192 for (
MVT VT : {MVT::v8i32, MVT::v8f32, MVT::v4i32, MVT::v4f32, MVT::v4i64,
2193 MVT::v4f64, MVT::v2i64, MVT::v2f64})
2197 if (Subtarget.hasVBMI2())
2198 for (
MVT VT : {MVT::v32i16, MVT::v64i8})
2202 if (Subtarget.hasVBMI2() && Subtarget.hasVLX())
2203 for (
MVT VT : {MVT::v16i8, MVT::v8i16, MVT::v32i8, MVT::v16i16})
2209 if (!Subtarget.useSoftFloat() && Subtarget.hasBWI()) {
2213 for (
auto VT : { MVT::v32i1, MVT::v64i1 }) {
2226 for (
auto VT : { MVT::v16i1, MVT::v32i1 })
2234 for (
auto VT : { MVT::v32i8, MVT::v16i8, MVT::v16i16, MVT::v8i16 }) {
2243 if (Subtarget.hasBITALG()) {
2244 for (
auto VT : { MVT::v16i8, MVT::v32i8, MVT::v8i16, MVT::v16i16 })
2249 if (!Subtarget.useSoftFloat() && Subtarget.hasFP16()) {
2250 auto setGroup = [&] (
MVT VT) {
2319 setGroup(MVT::v32f16);
2363 if (Subtarget.hasVLX()) {
2364 setGroup(MVT::v8f16);
2365 setGroup(MVT::v16f16);
2418 if (!Subtarget.useSoftFloat() &&
2419 (Subtarget.hasAVXNECONVERT() || Subtarget.hasBF16())) {
2421 : &X86::VR128RegClass);
2423 : &X86::VR256RegClass);
2429 for (
auto VT : {MVT::v8bf16, MVT::v16bf16}) {
2430 setF16Action(VT,
Expand);
2431 if (!Subtarget.hasBF16())
2448 if (!Subtarget.useSoftFloat() && Subtarget.hasBF16() &&
2451 setF16Action(MVT::v32bf16,
Expand);
2462 if (!Subtarget.useSoftFloat() && Subtarget.hasAVX10_2()) {
2463 for (
auto VT : {MVT::v8bf16, MVT::v16bf16}) {
2476 if (Subtarget.hasAVX10_2_512()) {
2489 for (
auto VT : {MVT::f16, MVT::f32, MVT::f64}) {
2495 if (!Subtarget.useSoftFloat() && Subtarget.hasVLX()) {
2508 if (Subtarget.hasBWI()) {
2513 if (Subtarget.hasFP16()) {
2545 if (!Subtarget.useSoftFloat() && Subtarget.hasAMXTILE()) {
2553 if (!Subtarget.is64Bit()) {
2563 for (
auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {
2564 if (VT == MVT::i64 && !Subtarget.is64Bit())
2608 if (Subtarget.is32Bit() &&
2748 unsigned XorOp = Subtarget.is64Bit() ? X86::XOR64_FP : X86::XOR32_FP;
2755 if ((VT == MVT::v32i1 || VT == MVT::v64i1) && Subtarget.
hasAVX512() &&
2756 !Subtarget.hasBWI())
2781 bool AssumeSingleUse) {
2782 if (!AssumeSingleUse && !
Op.hasOneUse())
2788 auto *Ld = cast<LoadSDNode>(
Op.getNode());
2789 if (!Subtarget.
hasAVX() && !Subtarget.hasSSEUnalignedMem() &&
2790 Ld->getValueSizeInBits(0) == 128 && Ld->getAlign() <
Align(16))
2801 bool AssumeSingleUse) {
2802 assert(Subtarget.
hasAVX() &&
"Expected AVX for broadcast from memory");
2808 auto *Ld = cast<LoadSDNode>(
Op.getNode());
2809 return !Ld->isVolatile() ||
2814 if (!
Op.hasOneUse())
2827 if (
Op.hasOneUse()) {
2828 unsigned Opcode =
Op.getNode()->user_begin()->getOpcode();
2841 default:
return false;
2882 default:
return false;
2903 int ReturnAddrIndex = FuncInfo->
getRAIndex();
2905 if (ReturnAddrIndex == 0) {
2918 bool HasSymbolicDisplacement) {
2925 if (!HasSymbolicDisplacement)
2943 return Offset < 16 * 1024 * 1024;
2967 switch (SetCCOpcode) {
2992 if (SetCCOpcode ==
ISD::SETGT && RHSC->isAllOnes()) {
2997 if (SetCCOpcode ==
ISD::SETLT && RHSC->isZero()) {
3001 if (SetCCOpcode ==
ISD::SETGE && RHSC->isZero()) {
3005 if (SetCCOpcode ==
ISD::SETLT && RHSC->isOne()) {
3020 SetCCOpcode = getSetCCSwappedOperands(SetCCOpcode);
3024 switch (SetCCOpcode) {
3040 switch (SetCCOpcode) {
3094 unsigned Intrinsic)
const {
3100 switch (Intrinsic) {
3101 case Intrinsic::x86_aesenc128kl:
3102 case Intrinsic::x86_aesdec128kl:
3104 Info.ptrVal =
I.getArgOperand(1);
3109 case Intrinsic::x86_aesenc256kl:
3110 case Intrinsic::x86_aesdec256kl:
3112 Info.ptrVal =
I.getArgOperand(1);
3117 case Intrinsic::x86_aesencwide128kl:
3118 case Intrinsic::x86_aesdecwide128kl:
3120 Info.ptrVal =
I.getArgOperand(0);
3125 case Intrinsic::x86_aesencwide256kl:
3126 case Intrinsic::x86_aesdecwide256kl:
3128 Info.ptrVal =
I.getArgOperand(0);
3133 case Intrinsic::x86_cmpccxadd32:
3134 case Intrinsic::x86_cmpccxadd64:
3135 case Intrinsic::x86_atomic_bts:
3136 case Intrinsic::x86_atomic_btc:
3137 case Intrinsic::x86_atomic_btr: {
3139 Info.ptrVal =
I.getArgOperand(0);
3140 unsigned Size =
I.getType()->getScalarSizeInBits();
3147 case Intrinsic::x86_atomic_bts_rm:
3148 case Intrinsic::x86_atomic_btc_rm:
3149 case Intrinsic::x86_atomic_btr_rm: {
3151 Info.ptrVal =
I.getArgOperand(0);
3152 unsigned Size =
I.getArgOperand(1)->getType()->getScalarSizeInBits();
3159 case Intrinsic::x86_aadd32:
3160 case Intrinsic::x86_aadd64:
3161 case Intrinsic::x86_aand32:
3162 case Intrinsic::x86_aand64:
3163 case Intrinsic::x86_aor32:
3164 case Intrinsic::x86_aor64:
3165 case Intrinsic::x86_axor32:
3166 case Intrinsic::x86_axor64:
3167 case Intrinsic::x86_atomic_add_cc:
3168 case Intrinsic::x86_atomic_sub_cc:
3169 case Intrinsic::x86_atomic_or_cc:
3170 case Intrinsic::x86_atomic_and_cc:
3171 case Intrinsic::x86_atomic_xor_cc: {
3173 Info.ptrVal =
I.getArgOperand(0);
3174 unsigned Size =
I.getArgOperand(1)->getType()->getScalarSizeInBits();
3185 switch (IntrData->
Type) {
3190 Info.ptrVal =
I.getArgOperand(0);
3196 ScalarVT = MVT::i16;
3198 ScalarVT = MVT::i32;
3208 Info.ptrVal =
nullptr;
3220 Info.ptrVal =
nullptr;
3241 bool ForCodeSize)
const {
3242 for (
const APFloat &FPImm : LegalFPImmediates)
3243 if (Imm.bitwiseIsEqual(FPImm))
3251 assert(cast<LoadSDNode>(Load)->
isSimple() &&
"illegal to narrow");
3255 SDValue BasePtr = cast<LoadSDNode>(Load)->getBasePtr();
3257 if (
const auto *GA = dyn_cast<GlobalAddressSDNode>(BasePtr.getOperand(0)))
3263 EVT VT = Load->getValueType(0);
3267 if (
Use.getResNo() != 0)
3291 if (BitSize == 0 || BitSize > 64)
3338 (EltSizeInBits != 32 || !Subtarget.isPMULLDSlow()))
3342 return (MulC + 1).isPowerOf2() || (MulC - 1).isPowerOf2() ||
3343 (1 - MulC).isPowerOf2() || (-(MulC + 1)).isPowerOf2();
3347 unsigned Index)
const {
3390 return Subtarget.hasBMI() || Subtarget.
canUseCMOV() ||
3399 return Subtarget.hasLZCNT() || Subtarget.
canUseCMOV() ||
3407 return !Subtarget.
hasSSE2() || VT == MVT::f80;
3411 return (VT == MVT::f64 && Subtarget.
hasSSE2()) ||
3412 (VT == MVT::f32 && Subtarget.
hasSSE1()) || VT == MVT::f16;
3422 if (!Subtarget.hasDQI() && BitcastVT == MVT::v8i1 && LoadVT == MVT::i8)
3440 unsigned MaxIntSize = Subtarget.is64Bit() ? 64 : 32;
3452 return Subtarget.hasFastLZCNT();
3461 EVT VT =
Y.getValueType();
3466 if (!Subtarget.hasBMI())
3470 if (VT != MVT::i32 && VT != MVT::i64)
3473 return !isa<ConstantSDNode>(
Y) || cast<ConstantSDNode>(
Y)->isOpaque();
3477 EVT VT =
Y.getValueType();
3487 if (VT == MVT::v4i32)
3494 return X.getValueType().isScalarInteger();
3500 unsigned OldShiftOpcode,
unsigned NewShiftOpcode,
3504 X, XC,
CC,
Y, OldShiftOpcode, NewShiftOpcode, DAG))
3507 if (
X.getValueType().isScalarInteger())
3521 EVT VT,
unsigned ShiftOpc,
bool MayTransformRotate,
3522 const APInt &ShiftOrRotateAmt,
const std::optional<APInt> &AndMask)
const {
3526 bool PreferRotate =
false;
3535 PreferRotate = Subtarget.hasBMI2();
3536 if (!PreferRotate) {
3539 PreferRotate = (MaskBits != 8) && (MaskBits != 16) && (MaskBits != 32);
3544 assert(AndMask.has_value() &&
"Null andmask when querying about shift+and");
3546 if (PreferRotate && MayTransformRotate)
3580 if (PreferRotate || !MayTransformRotate || VT.
isVector())
3590 const Value *Rhs)
const {
3594 if (BaseCost >= 0 && Subtarget.hasCCMP())
3597 if (BaseCost >= 0 && Opc == Instruction::And &&
3612 N->getOperand(0).getOpcode() ==
ISD::SRL) ||
3614 N->getOperand(0).getOpcode() ==
ISD::SHL)) &&
3615 "Expected shift-shift mask");
3617 EVT VT =
N->getValueType(0);
3618 if ((Subtarget.hasFastVectorShiftMasks() && VT.
isVector()) ||
3619 (Subtarget.hasFastScalarShiftMasks() && !VT.
isVector())) {
3623 return N->getOperand(1) ==
N->getOperand(0).getOperand(1);
3629 EVT VT =
Y.getValueType();
3636 if (VT == MVT::i64 && !Subtarget.is64Bit())
3696 [CmpVal](
int M) { return isUndefOrEqual(M, CmpVal); });
3708 [](
int M) { return M == SM_SentinelUndef; });
3713 unsigned NumElts = Mask.size();
3719 unsigned NumElts = Mask.size();
3725 return (Val >=
Low && Val <
Hi);
3768 unsigned NumElts = Mask.size();
3779 unsigned Size,
int Low,
int Step = 1) {
3780 for (
unsigned i = Pos, e = Pos +
Size; i != e; ++i,
Low += Step)
3792 for (
unsigned i = Pos, e = Pos +
Size; i != e; ++i,
Low += Step)
3808 unsigned NumElts = Mask.size();
3827 WidenedMask.
assign(Mask.size() / 2, 0);
3828 for (
int i = 0,
Size = Mask.size(); i <
Size; i += 2) {
3830 int M1 = Mask[i + 1];
3841 WidenedMask[i / 2] =
M1 / 2;
3845 WidenedMask[i / 2] =
M0 / 2;
3862 WidenedMask[i / 2] =
M0 / 2;
3869 assert(WidenedMask.
size() == Mask.size() / 2 &&
3870 "Incorrect size of mask after widening the elements!");
3876 const APInt &Zeroable,
3883 assert(!Zeroable.
isZero() &&
"V2's non-undef elements are used?!");
3884 for (
int i = 0,
Size = Mask.size(); i !=
Size; ++i)
3900 unsigned NumSrcElts = Mask.size();
3901 assert(((NumSrcElts % NumDstElts) == 0 || (NumDstElts % NumSrcElts) == 0) &&
3902 "Illegal shuffle scale factor");
3905 if (NumDstElts >= NumSrcElts) {
3906 int Scale = NumDstElts / NumSrcElts;
3914 while (ScaledMask.
size() > NumDstElts) {
3918 ScaledMask = std::move(WidenedMask);
3940 const SDLoc &dl,
bool IsMask =
false) {
3945 MVT ConstVecVT = VT;
3954 for (
unsigned i = 0; i < NumElts; ++i) {
3955 bool IsUndef = Values[i] < 0 && IsMask;
3972 "Unequal constant and undef arrays");
3976 MVT ConstVecVT = VT;
3985 for (
unsigned i = 0, e = Bits.size(); i != e; ++i) {
3990 const APInt &V = Bits[i];
3995 }
else if (EltVT == MVT::f32) {
3998 }
else if (EltVT == MVT::f64) {
4021 "Unexpected vector type");
4035 "Unexpected vector type");
4049 LHS.getValueType() !=
RHS.getValueType() ||
4050 LHS.getOperand(0) !=
RHS.getOperand(0))
4054 if (Src.getValueSizeInBits() != (
LHS.getValueSizeInBits() * 2))
4057 unsigned NumElts =
LHS.getValueType().getVectorNumElements();
4058 if ((
LHS.getConstantOperandAPInt(1) == 0 &&
4059 RHS.getConstantOperandAPInt(1) == NumElts) ||
4060 (AllowCommute &&
RHS.getConstantOperandAPInt(1) == 0 &&
4061 LHS.getConstantOperandAPInt(1) == NumElts))
4068 const SDLoc &dl,
unsigned vectorWidth) {
4076 unsigned ElemsPerChunk = vectorWidth / ElVT.
getSizeInBits();
4081 IdxVal &= ~(ElemsPerChunk - 1);
4086 Vec->
ops().slice(IdxVal, ElemsPerChunk));
4120 unsigned vectorWidth) {
4121 assert((vectorWidth == 128 || vectorWidth == 256) &&
4122 "Unsupported vector width");
4128 EVT ResultVT = Result.getValueType();
4136 IdxVal &= ~(ElemsPerChunk - 1);
4162 "Unsupported vector widening type");
4183 const SDLoc &dl,
unsigned WideSizeInBits) {
4186 "Unsupported vector widening type");
4190 return widenSubVector(VT, Vec, ZeroNewElements, Subtarget, DAG, dl);
4198 if ((!Subtarget.hasDQI() && NumElts == 8) || NumElts < 8)
4199 return Subtarget.hasDQI() ? MVT::v8i1 : MVT::v16i1;
4209 return widenSubVector(VT, Vec, ZeroNewElements, Subtarget, DAG, dl);
4217 assert(Ops.
empty() &&
"Expected an empty ops vector");
4220 Ops.
append(
N->op_begin(),
N->op_end());
4227 const APInt &
Idx =
N->getConstantOperandAPInt(2);
4228 EVT VT = Src.getValueType();
4233 if (
Idx == 0 && Src.isUndef()) {
4241 Src.getOperand(1).getValueType() == SubVT &&
4265 if (Src.isUndef()) {
4285 unsigned NumSubOps = SubOps.
size();
4286 unsigned HalfNumSubOps = NumSubOps / 2;
4287 assert((NumSubOps % 2) == 0 &&
"Unexpected number of subvectors");
4293 EVT HalfVT = V.getValueType().getHalfNumVectorElementsVT(*DAG.
getContext());
4307 EVT VT =
Op.getValueType();
4310 assert((NumElems % 2) == 0 && (SizeInBits % 2) == 0 &&
4311 "Can't split odd sized vector");
4317 return std::make_pair(
Lo,
Lo);
4320 return std::make_pair(
Lo,
Hi);
4326 EVT VT =
Op.getValueType();
4331 for (
unsigned I = 0;
I != NumOps; ++
I) {
4333 if (!
SrcOp.getValueType().isVector()) {
4343 DAG.
getNode(
Op.getOpcode(), dl, LoVT, LoOps),
4344 DAG.
getNode(
Op.getOpcode(), dl, HiVT, HiOps));
4353 [[maybe_unused]]
EVT VT =
Op.getValueType();
4354 assert((
Op.getOperand(0).getValueType().is256BitVector() ||
4355 Op.getOperand(0).getValueType().is512BitVector()) &&
4357 assert(
Op.getOperand(0).getValueType().getVectorNumElements() ==
4368 [[maybe_unused]]
EVT VT =
Op.getValueType();
4369 assert(
Op.getOperand(0).getValueType() == VT &&
4370 Op.getOperand(1).getValueType() == VT &&
"Unexpected VTs!");
4382template <
typename F>
4385 F Builder,
bool CheckBWI =
true) {
4386 assert(Subtarget.
hasSSE2() &&
"Target assumed to support at least SSE2");
4387 unsigned NumSubs = 1;
4394 }
else if (Subtarget.
hasAVX2()) {
4407 return Builder(DAG,
DL, Ops);
4410 for (
unsigned i = 0; i != NumSubs; ++i) {
4413 EVT OpVT =
Op.getValueType();
4437 if (!OpVT.isInteger() || OpEltSizeInBits < 32 ||
4444 APInt SplatValue, SplatUndef;
4445 unsigned SplatBitSize;
4447 if (BV->isConstantSplat(SplatValue, SplatUndef, SplatBitSize,
4448 HasAnyUndefs, OpEltSizeInBits) &&
4449 !HasAnyUndefs && SplatValue.
getBitWidth() == OpEltSizeInBits)
4464 MVT OpVT =
Op.getSimpleValueType();
4468 assert(OpVT == VT &&
"Vector type mismatch");
4470 if (
SDValue BroadcastOp = MakeBroadcastOp(
Op, OpVT, DstVT)) {
4496 unsigned IdxVal =
Op.getConstantOperandVal(2);
4502 if (IdxVal == 0 && Vec.
isUndef())
4505 MVT OpVT =
Op.getSimpleValueType();
4524 assert(IdxVal + SubVecNumElems <= NumElems &&
4526 "Unexpected index value in INSERT_SUBVECTOR");
4546 Undef, SubVec, ZeroIdx);
4549 assert(IdxVal != 0 &&
"Unexpected index");
4556 assert(IdxVal != 0 &&
"Unexpected index");
4559 [](
SDValue V) { return V.isUndef(); })) {
4564 unsigned ShiftLeft = NumElems - SubVecNumElems;
4565 unsigned ShiftRight = NumElems - SubVecNumElems - IdxVal;
4568 if (ShiftRight != 0)
4576 if (IdxVal + SubVecNumElems == NumElems) {
4579 if (SubVecNumElems * 2 == NumElems) {
4589 Undef, Vec, ZeroIdx);
4606 unsigned ShiftLeft = NumElems - SubVecNumElems;
4607 unsigned ShiftRight = NumElems - SubVecNumElems - IdxVal;
4610 if (WideOpVT != MVT::v64i1 || Subtarget.is64Bit()) {
4633 unsigned LowShift = NumElems - IdxVal;
4640 unsigned HighShift = IdxVal + SubVecNumElems;
4671 "Expected a 128/256/512-bit vector type");
4679 EVT InVT = In.getValueType();
4683 "Unknown extension opcode");
4689 "Expected VTs to be the same size!");
4693 InVT = In.getValueType();
4711 bool Lo,
bool Unary) {
4713 "Illegal vector type to unpack");
4714 assert(Mask.empty() &&
"Expected an empty shuffle mask vector");
4717 for (
int i = 0; i < NumElts; ++i) {
4718 unsigned LaneStart = (i / NumEltsInLane) * NumEltsInLane;
4719 int Pos = (i % NumEltsInLane) / 2 + LaneStart;
4720 Pos += (Unary ? 0 : NumElts * (i % 2));
4721 Pos += (
Lo ? 0 : NumEltsInLane / 2);
4722 Mask.push_back(Pos);
4732 assert(Mask.empty() &&
"Expected an empty shuffle mask vector");
4734 for (
int i = 0; i < NumElts; ++i) {
4736 Pos += (
Lo ? 0 : NumElts / 2);
4737 Mask.push_back(Pos);
4747 for (
int I = 0, NumElts = Mask.size();
I != NumElts; ++
I) {
4751 SDValue V = (M < NumElts) ? V1 : V2;
4754 Ops[
I] = V.getOperand(M % NumElts);
4783 bool PackHiHalf =
false) {
4784 MVT OpVT =
LHS.getSimpleValueType();
4786 bool UsePackUS = Subtarget.
hasSSE41() || EltSizeInBits == 8;
4787 assert(OpVT ==
RHS.getSimpleValueType() &&
4790 "Unexpected PACK operand types");
4791 assert((EltSizeInBits == 8 || EltSizeInBits == 16 || EltSizeInBits == 32) &&
4792 "Unexpected PACK result type");
4795 if (EltSizeInBits == 32) {
4797 int Offset = PackHiHalf ? 1 : 0;
4799 for (
int I = 0;
I != NumElts;
I += 4) {
4852 MVT VT = V2.getSimpleValueType();
4857 for (
int i = 0; i != NumElems; ++i)
4859 MaskVec[i] = (i ==
Idx) ? NumElems : i;
4867 return dyn_cast<ConstantPoolSDNode>(
Ptr);
4891 assert(LD &&
"Unexpected null LoadSDNode");
4899 bool AllowWholeUndefs =
true,
4900 bool AllowPartialUndefs =
false) {
4901 assert(EltBits.
empty() &&
"Expected an empty EltBits vector");
4905 EVT VT =
Op.getValueType();
4907 assert((SizeInBits % EltSizeInBits) == 0 &&
"Can't split constant!");
4908 unsigned NumElts = SizeInBits / EltSizeInBits;
4913 unsigned SrcEltSizeInBits = SrcEltBits[0].getBitWidth();
4914 assert((NumSrcElts * SrcEltSizeInBits) == SizeInBits &&
4915 "Constant bit sizes don't match");
4918 bool AllowUndefs = AllowWholeUndefs || AllowPartialUndefs;
4923 if (NumSrcElts == NumElts) {
4924 UndefElts = UndefSrcElts;
4925 EltBits.
assign(SrcEltBits.begin(), SrcEltBits.end());
4930 APInt UndefBits(SizeInBits, 0);
4931 APInt MaskBits(SizeInBits, 0);
4933 for (
unsigned i = 0; i != NumSrcElts; ++i) {
4934 unsigned BitOffset = i * SrcEltSizeInBits;
4935 if (UndefSrcElts[i])
4936 UndefBits.
setBits(BitOffset, BitOffset + SrcEltSizeInBits);
4937 MaskBits.
insertBits(SrcEltBits[i], BitOffset);
4941 UndefElts =
APInt(NumElts, 0);
4944 for (
unsigned i = 0; i != NumElts; ++i) {
4945 unsigned BitOffset = i * EltSizeInBits;
4950 if (!AllowWholeUndefs)
4958 if (UndefEltBits.
getBoolValue() && !AllowPartialUndefs)
4961 EltBits[i] = MaskBits.
extractBits(EltSizeInBits, BitOffset);
4968 unsigned UndefBitIndex) {
4971 if (isa<UndefValue>(Cst)) {
4972 Undefs.
setBit(UndefBitIndex);
4975 if (
auto *CInt = dyn_cast<ConstantInt>(Cst)) {
4976 Mask = CInt->getValue();
4979 if (
auto *CFP = dyn_cast<ConstantFP>(Cst)) {
4980 Mask = CFP->getValueAPF().bitcastToAPInt();
4983 if (
auto *CDS = dyn_cast<ConstantDataSequential>(Cst)) {
4984 Type *Ty = CDS->getType();
4986 Type *EltTy = CDS->getElementType();
4990 if (!IsInteger && !IsFP)
4993 for (
unsigned I = 0, E = CDS->getNumElements();
I != E; ++
I)
4995 Mask.insertBits(CDS->getElementAsAPInt(
I),
I * EltBits);
4997 Mask.insertBits(CDS->getElementAsAPFloat(
I).bitcastToAPInt(),
5008 return CastBitData(UndefSrcElts, SrcEltBits);
5012 if (
auto *Cst = dyn_cast<ConstantSDNode>(
Op)) {
5015 return CastBitData(UndefSrcElts, SrcEltBits);
5017 if (
auto *Cst = dyn_cast<ConstantFPSDNode>(
Op)) {
5019 APInt RawBits = Cst->getValueAPF().bitcastToAPInt();
5021 return CastBitData(UndefSrcElts, SrcEltBits);
5025 if (
auto *BV = dyn_cast<BuildVectorSDNode>(
Op)) {
5029 if (BV->getConstantRawBits(
true, SrcEltSizeInBits, SrcEltBits, Undefs)) {
5031 for (
unsigned I = 0, E = SrcEltBits.
size();
I != E; ++
I)
5034 return CastBitData(UndefSrcElts, SrcEltBits);
5042 if (!CstTy->
isVectorTy() || (CstSizeInBits % SizeInBits) != 0)
5046 unsigned NumSrcElts = SizeInBits / SrcEltSizeInBits;
5047 if ((SizeInBits % SrcEltSizeInBits) != 0)
5050 APInt UndefSrcElts(NumSrcElts, 0);
5052 for (
unsigned i = 0; i != NumSrcElts; ++i)
5057 return CastBitData(UndefSrcElts, SrcEltBits);
5063 auto *MemIntr = cast<MemIntrinsicSDNode>(
Op);
5070 unsigned NumSrcElts = SizeInBits / SrcEltSizeInBits;
5072 APInt UndefSrcElts(NumSrcElts, 0);
5074 if (CollectConstantBits(
C, SrcEltBits[0], UndefSrcElts, 0)) {
5075 if (UndefSrcElts[0])
5076 UndefSrcElts.
setBits(0, NumSrcElts);
5077 if (SrcEltBits[0].
getBitWidth() != SrcEltSizeInBits)
5078 SrcEltBits[0] = SrcEltBits[0].trunc(SrcEltSizeInBits);
5079 SrcEltBits.
append(NumSrcElts - 1, SrcEltBits[0]);
5080 return CastBitData(UndefSrcElts, SrcEltBits);
5087 auto *MemIntr = cast<MemIntrinsicSDNode>(
Op);
5094 unsigned SubVecSizeInBits = MemIntr->getMemoryVT().getStoreSizeInBits();
5095 if (!CstTy->
isVectorTy() || (CstSizeInBits % SubVecSizeInBits) != 0 ||
5096 (SizeInBits % SubVecSizeInBits) != 0)
5099 unsigned NumSubElts = SubVecSizeInBits / CstEltSizeInBits;
5100 unsigned NumSubVecs = SizeInBits / SubVecSizeInBits;
5101 APInt UndefSubElts(NumSubElts, 0);
5103 APInt(CstEltSizeInBits, 0));
5104 for (
unsigned i = 0; i != NumSubElts; ++i) {
5108 for (
unsigned j = 1; j != NumSubVecs; ++j)
5109 SubEltBits[i + (j * NumSubElts)] = SubEltBits[i];
5113 return CastBitData(UndefSubElts, SubEltBits);
5120 isa<ConstantSDNode>(
Op.getOperand(0).getOperand(0))) {
5122 unsigned NumSrcElts = SizeInBits / SrcEltSizeInBits;
5124 APInt UndefSrcElts(NumSrcElts, 0);
5126 const APInt &
C =
Op.getOperand(0).getConstantOperandAPInt(0);
5127 SrcEltBits.
push_back(
C.zextOrTrunc(SrcEltSizeInBits));
5128 SrcEltBits.
append(NumSrcElts - 1,
APInt(SrcEltSizeInBits, 0));
5129 return CastBitData(UndefSrcElts, SrcEltBits);
5137 bool AllowUndefs = EltSizeInBits >= SrcEltSizeInBits;
5139 APInt UndefSrcElts, UndefSubElts;
5142 UndefSubElts, EltSubBits,
5143 AllowWholeUndefs && AllowUndefs,
5144 AllowPartialUndefs && AllowUndefs) &&
5146 UndefSrcElts, EltSrcBits,
5147 AllowWholeUndefs && AllowUndefs,
5148 AllowPartialUndefs && AllowUndefs)) {
5149 unsigned BaseIdx =
Op.getConstantOperandVal(2);
5150 UndefSrcElts.
insertBits(UndefSubElts, BaseIdx);
5151 for (
unsigned i = 0, e = EltSubBits.
size(); i != e; ++i)
5152 EltSrcBits[BaseIdx + i] = EltSubBits[i];
5153 return CastBitData(UndefSrcElts, EltSrcBits);
5164 UndefElts, EltBits, AllowWholeUndefs,
5165 AllowPartialUndefs)) {
5166 EVT SrcVT =
Op.getOperand(0).getValueType();
5169 unsigned BaseIdx =
Op.getConstantOperandVal(1);
5170 UndefElts = UndefElts.
extractBits(NumSubElts, BaseIdx);
5171 if ((BaseIdx + NumSubElts) != NumSrcElts)
5172 EltBits.
erase(EltBits.
begin() + BaseIdx + NumSubElts, EltBits.
end());
5180 if (
auto *SVN = dyn_cast<ShuffleVectorSDNode>(
Op)) {
5186 if ((!AllowWholeUndefs || !AllowPartialUndefs) &&
5190 APInt UndefElts0, UndefElts1;
5194 UndefElts0, EltBits0, AllowWholeUndefs,
5195 AllowPartialUndefs))
5199 UndefElts1, EltBits1, AllowWholeUndefs,
5200 AllowPartialUndefs))
5204 for (
int i = 0; i != (int)NumElts; ++i) {
5209 }
else if (M < (
int)NumElts) {
5214 if (UndefElts1[M - NumElts])
5216 EltBits.
push_back(EltBits1[M - NumElts]);
5231 Op,
Op.getScalarValueSizeInBits(), UndefElts, EltBits,
5232 true, AllowPartialUndefs)) {
5233 int SplatIndex = -1;
5234 for (
int i = 0, e = EltBits.
size(); i != e; ++i) {
5237 if (0 <= SplatIndex && EltBits[i] != EltBits[SplatIndex]) {
5243 if (0 <= SplatIndex) {
5244 SplatVal = EltBits[SplatIndex];
5255 unsigned MaskEltSizeInBits,
5266 for (
const APInt &Elt : EltBits)
5281 bool IsPow2OrUndef =
true;
5282 for (
unsigned I = 0, E = EltBits.
size();
I != E; ++
I)
5283 IsPow2OrUndef &= UndefElts[
I] || EltBits[
I].isPowerOf2();
5284 return IsPow2OrUndef;
5291 EVT VT = V.getValueType();
5297 return V.getOperand(0);
5301 (
isNullConstant(V.getOperand(1)) || V.getOperand(0).hasOneUse())) {
5303 Not = DAG.
getBitcast(V.getOperand(0).getValueType(), Not);
5313 V.getOperand(0).hasOneUse()) {
5317 V.getScalarValueSizeInBits(), UndefElts,
5321 bool MinSigned =
false;
5322 for (
APInt &Elt : EltBits) {
5323 MinSigned |= Elt.isMinSignedValue();
5328 MVT VT = V.getSimpleValueType();
5338 for (
SDValue &CatOp : CatOps) {
5342 CatOp = DAG.
getBitcast(CatOp.getValueType(), NotCat);
5349 V.getOperand(0).hasOneUse() && V.getOperand(1).hasOneUse()) {
5364 bool Unary,
unsigned NumStages = 1) {
5365 assert(Mask.empty() &&
"Expected an empty shuffle mask vector");
5369 unsigned Offset = Unary ? 0 : NumElts;
5370 unsigned Repetitions = 1u << (NumStages - 1);
5371 unsigned Increment = 1u << NumStages;
5372 assert((NumEltsPerLane >> NumStages) > 0 &&
"Illegal packing compaction");
5374 for (
unsigned Lane = 0; Lane != NumLanes; ++Lane) {
5375 for (
unsigned Stage = 0; Stage != Repetitions; ++Stage) {
5376 for (
unsigned Elt = 0; Elt != NumEltsPerLane; Elt += Increment)
5377 Mask.push_back(Elt + (Lane * NumEltsPerLane));
5378 for (
unsigned Elt = 0; Elt != NumEltsPerLane; Elt += Increment)
5379 Mask.push_back(Elt + (Lane * NumEltsPerLane) +
Offset);
5389 int NumInnerElts = NumElts / 2;
5390 int NumEltsPerLane = NumElts / NumLanes;
5391 int NumInnerEltsPerLane = NumInnerElts / NumLanes;
5397 for (
int Lane = 0; Lane != NumLanes; ++Lane) {
5398 for (
int Elt = 0; Elt != NumInnerEltsPerLane; ++Elt) {
5399 int OuterIdx = (Lane * NumEltsPerLane) + Elt;
5400 int InnerIdx = (Lane * NumInnerEltsPerLane) + Elt;
5401 if (DemandedElts[OuterIdx])
5402 DemandedLHS.
setBit(InnerIdx);
5403 if (DemandedElts[OuterIdx + NumInnerEltsPerLane])
5404 DemandedRHS.
setBit(InnerIdx);
5413 DemandedLHS, DemandedRHS);
5414 DemandedLHS |= DemandedLHS << 1;
5415 DemandedRHS |= DemandedRHS << 1;
5431 MVT VT =
N.getSimpleValueType();
5438 assert(Mask.empty() &&
"getTargetShuffleMask expects an empty Mask vector");
5439 assert(Ops.
empty() &&
"getTargetShuffleMask expects an empty Ops vector");
5442 bool IsFakeUnary =
false;
5443 switch (
N.getOpcode()) {
5445 assert(
N.getOperand(0).getValueType() == VT &&
"Unexpected value type");
5446 assert(
N.getOperand(1).getValueType() == VT &&
"Unexpected value type");
5447 ImmN =
N.getConstantOperandVal(
N.getNumOperands() - 1);
5449 IsUnary = IsFakeUnary =
N.getOperand(0) ==
N.getOperand(1);
5452 assert(
N.getOperand(0).getValueType() == VT &&
"Unexpected value type");
5453 assert(
N.getOperand(1).getValueType() == VT &&
"Unexpected value type");
5454 ImmN =
N.getConstantOperandVal(
N.getNumOperands() - 1);
5456 IsUnary = IsFakeUnary =
N.getOperand(0) ==
N.getOperand(1);
5459 assert(
N.getOperand(0).getValueType() == VT &&
"Unexpected value type");
5460 assert(
N.getOperand(1).getValueType() == VT &&
"Unexpected value type");
5461 ImmN =
N.getConstantOperandVal(
N.getNumOperands() - 1);
5463 IsUnary = IsFakeUnary =
N.getOperand(0) ==
N.getOperand(1);
5466 assert(
N.getOperand(0).getValueType() == VT &&
"Unexpected value type");
5467 if (isa<ConstantSDNode>(
N.getOperand(1)) &&
5468 isa<ConstantSDNode>(
N.getOperand(2))) {
5469 int BitLen =
N.getConstantOperandVal(1);
5470 int BitIdx =
N.getConstantOperandVal(2);
5476 assert(
N.getOperand(0).getValueType() == VT &&
"Unexpected value type");
5477 assert(
N.getOperand(1).getValueType() == VT &&
"Unexpected value type");
5478 if (isa<ConstantSDNode>(
N.getOperand(2)) &&
5479 isa<ConstantSDNode>(
N.getOperand(3))) {
5480 int BitLen =
N.getConstantOperandVal(2);
5481 int BitIdx =
N.getConstantOperandVal(3);
5483 IsUnary = IsFakeUnary =
N.getOperand(0) ==
N.getOperand(1);
5487 assert(
N.getOperand(0).getValueType() == VT &&
"Unexpected value type");
5488 assert(
N.getOperand(1).getValueType() == VT &&
"Unexpected value type");
5490 IsUnary = IsFakeUnary =
N.getOperand(0) ==
N.getOperand(1);
5493 assert(
N.getOperand(0).getValueType() == VT &&
"Unexpected value type");
5494 assert(
N.getOperand(1).getValueType() == VT &&
"Unexpected value type");
5496 IsUnary = IsFakeUnary =
N.getOperand(0) ==
N.getOperand(1);
5499 assert(
N.getOperand(0).getValueType() == VT &&
"Unexpected value type");
5500 assert(
N.getOperand(1).getValueType() == VT &&
"Unexpected value type");
5502 IsUnary = IsFakeUnary =
N.getOperand(0) ==
N.getOperand(1);
5505 assert(
N.getOperand(0).getValueType() == VT &&
"Unexpected value type");
5506 assert(
N.getOperand(1).getValueType() == VT &&
"Unexpected value type");
5508 IsUnary = IsFakeUnary =
N.getOperand(0) ==
N.getOperand(1);
5512 "Only 32-bit and 64-bit elements are supported!");
5513 assert(
N.getOperand(0).getValueType() == VT &&
"Unexpected value type");
5514 assert(
N.getOperand(1).getValueType() == VT &&
"Unexpected value type");
5515 ImmN =
N.getConstantOperandVal(
N.getNumOperands() - 1);
5517 IsUnary = IsFakeUnary =
N.getOperand(0) ==
N.getOperand(1);
5523 assert(
N.getOperand(0).getValueType() == VT &&
"Unexpected value type");
5524 assert(
N.getOperand(1).getValueType() == VT &&
"Unexpected value type");
5525 ImmN =
N.getConstantOperandVal(
N.getNumOperands() - 1);
5527 IsUnary = IsFakeUnary =
N.getOperand(0) ==
N.getOperand(1);
5533 assert(
N.getOperand(0).getValueType() == VT &&
"Unexpected value type");
5534 ImmN =
N.getConstantOperandVal(
N.getNumOperands() - 1);
5540 assert(
N.getOperand(0).getValueType() == VT &&
"Unexpected value type");
5541 ImmN =
N.getConstantOperandVal(
N.getNumOperands() - 1);
5547 assert(
N.getOperand(0).getValueType() == VT &&
"Unexpected value type");
5548 ImmN =
N.getConstantOperandVal(
N.getNumOperands() - 1);
5553 assert(
N.getOperand(0).getValueType() == VT &&
"Unexpected value type");
5554 ImmN =
N.getConstantOperandVal(
N.getNumOperands() - 1);
5559 assert(
N.getOperand(0).getValueType() == VT &&
"Unexpected value type");
5560 ImmN =
N.getConstantOperandVal(
N.getNumOperands() - 1);
5565 assert(
N.getOperand(0).getValueType() == VT &&
"Unexpected value type");
5573 if (
N.getOperand(0).getValueType() == VT) {
5580 assert(
N.getOperand(0).getValueType() == VT &&
"Unexpected value type");
5582 SDValue MaskNode =
N.getOperand(1);
5592 assert(
N.getOperand(0).getValueType() == VT &&
"Unexpected value type");
5593 assert(
N.getOperand(1).getValueType() == VT &&
"Unexpected value type");
5595 SDValue MaskNode =
N.getOperand(1);
5603 assert(
N.getOperand(0).getValueType() == VT &&
"Unexpected value type");
5604 ImmN =
N.getConstantOperandVal(
N.getNumOperands() - 1);
5611 assert(
N.getOperand(0).getValueType() == VT &&
"Unexpected value type");
5612 assert(
N.getOperand(1).getValueType() == VT &&
"Unexpected value type");
5616 assert(
N.getOperand(0).getValueType() == VT &&
"Unexpected value type");
5617 assert(
N.getOperand(1).getValueType() == VT &&
"Unexpected value type");
5618 ImmN =
N.getConstantOperandVal(
N.getNumOperands() - 1);
5620 IsUnary = IsFakeUnary =
N.getOperand(0) ==
N.getOperand(1);
5623 assert(
N.getOperand(0).getValueType() == VT &&
"Unexpected value type");
5624 assert(
N.getOperand(1).getValueType() == VT &&
"Unexpected value type");
5625 ImmN =
N.getConstantOperandVal(
N.getNumOperands() - 1);
5627 IsUnary = IsFakeUnary =
N.getOperand(0) ==
N.getOperand(1);
5630 assert(
N.getOperand(0).getValueType() == VT &&
"Unexpected value type");
5635 assert(
N.getOperand(0).getValueType() == VT &&
"Unexpected value type");
5640 assert(
N.getOperand(0).getValueType() == VT &&
"Unexpected value type");
5645 assert(
N.getOperand(0).getValueType() == VT &&
"Unexpected value type");
5646 assert(
N.getOperand(1).getValueType() == VT &&
"Unexpected value type");
5647 IsUnary = IsFakeUnary =
N.getOperand(0) ==
N.getOperand(1);
5648 SDValue MaskNode =
N.getOperand(2);
5649 SDValue CtrlNode =
N.getOperand(3);
5650 if (
ConstantSDNode *CtrlOp = dyn_cast<ConstantSDNode>(CtrlNode)) {
5651 unsigned CtrlImm = CtrlOp->getZExtValue();
5662 assert(
N.getOperand(0).getValueType() == VT &&
"Unexpected value type");
5663 assert(
N.getOperand(1).getValueType() == VT &&
"Unexpected value type");
5664 IsUnary = IsFakeUnary =
N.getOperand(0) ==
N.getOperand(1);
5665 SDValue MaskNode =
N.getOperand(2);
5673 assert(
N.getOperand(1).getValueType() == VT &&
"Unexpected value type");
5677 SDValue MaskNode =
N.getOperand(0);
5686 assert(
N.getOperand(0).getValueType() == VT &&
"Unexpected value type");
5687 assert(
N.getOperand(2).getValueType() == VT &&
"Unexpected value type");
5688 IsUnary = IsFakeUnary =
N.getOperand(0) ==
N.getOperand(2);
5692 SDValue MaskNode =
N.getOperand(1);
5709 if (!AllowSentinelZero &&
isAnyZero(Mask))
5717 if (M >= (
int)Mask.size())
5724 if (!IsUnary || IsFakeUnary)
5750 int Size = Mask.size();
5760 int ScalarSizeInBits = VectorSizeInBits /
Size;
5761 assert(!(VectorSizeInBits % ScalarSizeInBits) &&
"Illegal shuffle mask size");
5763 for (
int i = 0; i <
Size; ++i) {
5770 if ((M >= 0 && M <
Size && V1IsZero) || (M >=
Size && V2IsZero)) {
5785 if ((
Size % V.getNumOperands()) == 0) {
5786 int Scale =
Size / V->getNumOperands();
5793 APInt Val = Cst->getAPIntValue();
5794 Val = Val.
extractBits(ScalarSizeInBits, (M % Scale) * ScalarSizeInBits);
5798 APInt Val = Cst->getValueAPF().bitcastToAPInt();
5799 Val = Val.
extractBits(ScalarSizeInBits, (M % Scale) * ScalarSizeInBits);
5808 if ((V.getNumOperands() %
Size) == 0) {
5809 int Scale = V->getNumOperands() /
Size;
5810 bool AllUndef =
true;
5811 bool AllZero =
true;
5812 for (
int j = 0; j < Scale; ++j) {
5813 SDValue Op = V.getOperand((M * Scale) + j);
5814 AllUndef &=
Op.isUndef();
5837 MVT VT =
N.getSimpleValueType();
5841 int Size = Mask.size();
5843 SDValue V2 = IsUnary ? V1 : Ops[1];
5850 "Illegal split of shuffle value type");
5854 APInt UndefSrcElts[2];
5856 bool IsSrcConstant[2] = {
5858 SrcEltBits[0],
true,
5861 SrcEltBits[1],
true,
5864 for (
int i = 0; i <
Size; ++i) {
5878 unsigned SrcIdx = M /
Size;
5893 (
Size % V.getValueType().getVectorNumElements()) == 0) {
5894 int Scale =
Size / V.getValueType().getVectorNumElements();
5895 int Idx = M / Scale;
5906 SDValue Vec = V.getOperand(0);
5909 int Idx = V.getConstantOperandVal(2);
5910 int NumSubElts = V.getOperand(1).getValueType().getVectorNumElements();
5911 if (M <
Idx || (
Idx + NumSubElts) <= M)
5918 if (IsSrcConstant[SrcIdx]) {
5919 if (UndefSrcElts[SrcIdx][M])
5921 else if (SrcEltBits[SrcIdx][M] == 0)
5927 "Different mask size from vector size!");
5933 const APInt &KnownUndef,
5934 const APInt &KnownZero,
5935 bool ResolveKnownZeros=
true) {
5936 unsigned NumElts = Mask.size();
5938 KnownZero.
getBitWidth() == NumElts &&
"Shuffle mask size mismatch");
5940 for (
unsigned i = 0; i != NumElts; ++i) {
5943 else if (ResolveKnownZeros && KnownZero[i])
5952 unsigned NumElts = Mask.size();
5955 for (
unsigned i = 0; i != NumElts; ++i) {
5967 EVT CondVT =
Cond.getValueType();
5980 for (
int i = 0; i != (int)NumElts; ++i) {
5985 if (UndefElts[i] || (!IsBLENDV && EltBits[i].
isZero()) ||
5986 (IsBLENDV && EltBits[i].isNonNegative()))
5998 bool ResolveKnownElts);
6008 bool ResolveKnownElts) {
6012 MVT VT =
N.getSimpleValueType();
6016 if ((NumBitsPerElt % 8) != 0 || (NumSizeInBits % 8) != 0)
6019 unsigned NumSizeInBytes = NumSizeInBits / 8;
6020 unsigned NumBytesPerElt = NumBitsPerElt / 8;
6022 unsigned Opcode =
N.getOpcode();
6026 ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(
N)->getMask();
6028 Mask.append(ShuffleMask.
begin(), ShuffleMask.
end());
6043 uint64_t ZeroMask = IsAndN ? 255 : 0;
6050 assert(UndefElts.
isZero() &&
"Unexpected UNDEF element in AND/ANDNP mask");
6051 for (
int i = 0, e = (
int)EltBits.
size(); i != e; ++i) {
6052 const APInt &ByteBits = EltBits[i];
6053 if (ByteBits != 0 && ByteBits != 255)
6078 size_t MaskSize = std::max(SrcMask0.
size(), SrcMask1.
size());
6082 for (
int i = 0; i != (int)MaskSize; ++i) {
6092 Mask.push_back(i + MaskSize);
6105 if (!
N->isOnlyUserOf(Sub.
getNode()))
6108 uint64_t InsertIdx =
N.getConstantOperandVal(2);
6115 unsigned MaxElts = std::max(NumElts, NumSubSrcBCElts);
6116 assert((MaxElts % NumElts) == 0 && (MaxElts % NumSubSrcBCElts) == 0 &&
6117 "Subvector valuetype mismatch");
6118 InsertIdx *= (MaxElts / NumElts);
6119 ExtractIdx *= (MaxElts / NumSubSrcBCElts);
6120 NumSubElts *= (MaxElts / NumElts);
6121 bool SrcIsUndef = Src.isUndef();
6122 for (
int i = 0; i != (int)MaxElts; ++i)
6124 for (
int i = 0; i != (int)NumSubElts; ++i)
6125 Mask[InsertIdx + i] = (SrcIsUndef ? 0 : MaxElts) + ExtractIdx + i;
6134 if (
Depth > 0 && InsertIdx == NumSubElts && NumElts == (2 * NumSubElts) &&
6135 NumBitsPerElt == 64 && NumSizeInBits == 512 &&
6137 Src.getOperand(0).isUndef() &&
6138 Src.getOperand(1).getValueType() == SubVT &&
6139 Src.getConstantOperandVal(2) == 0) {
6140 for (
int i = 0; i != (int)NumSubElts; ++i)
6142 for (
int i = 0; i != (int)NumSubElts; ++i)
6143 Mask.push_back(i + NumElts);
6158 Depth + 1, ResolveKnownElts))
6168 if (SubMask.
size() != NumSubElts) {
6169 assert(((SubMask.
size() % NumSubElts) == 0 ||
6170 (NumSubElts % SubMask.
size()) == 0) &&
"Illegal submask scale");
6171 if ((NumSubElts % SubMask.
size()) == 0) {
6172 int Scale = NumSubElts / SubMask.
size();
6175 SubMask = ScaledSubMask;
6177 int Scale = SubMask.
size() / NumSubElts;
6178 NumSubElts = SubMask.
size();
6188 for (
int i = 0; i != (int)NumElts; ++i)
6190 for (
int i = 0; i != (int)NumSubElts; ++i) {
6193 int InputIdx = M / NumSubElts;
6194 M = (NumElts * (1 + InputIdx)) + (M % NumSubElts);
6196 Mask[i + InsertIdx] = M;
6208 unsigned DstIdx = 0;
6211 if (!isa<ConstantSDNode>(
N.getOperand(2)) ||
6212 N.getConstantOperandAPInt(2).uge(NumElts))
6214 DstIdx =
N.getConstantOperandVal(2);
6219 for (
unsigned i = 0; i != NumElts; ++i)
6239 if ((MinBitsPerElt % 8) != 0)
6250 if (!SrcExtract || !isa<ConstantSDNode>(SrcExtract.
getOperand(1)))
6259 unsigned DstByte = DstIdx * NumBytesPerElt;
6270 for (
int i = 0; i != (int)NumSizeInBytes; ++i)
6271 Mask.push_back(NumSizeInBytes + i);
6274 unsigned MinBytesPerElts = MinBitsPerElt / 8;
6275 MinBytesPerElts = std::min(MinBytesPerElts, NumBytesPerElt);
6276 for (
unsigned i = 0; i != MinBytesPerElts; ++i)
6277 Mask[DstByte + i] = SrcByte + i;
6278 for (
unsigned i = MinBytesPerElts; i < NumBytesPerElt; ++i)
6288 "Unexpected input value type");
6290 APInt EltsLHS, EltsRHS;
6295 bool Offset0 =
false, Offset1 =
false;
6324 bool IsUnary = (N0 == N1);
6332 if (Offset0 || Offset1) {
6334 if ((Offset0 &&
isInRange(M, 0, NumElts)) ||
6335 (Offset1 &&
isInRange(M, NumElts, 2 * NumElts)))
6352 EVT SrcVT = Src.getValueType();
6359 unsigned Scale = NumBitsPerSrcElt / NumBitsPerElt;
6360 assert((NumBitsPerSrcElt % NumBitsPerElt) == 0 &&
"Illegal truncation");
6361 for (
unsigned i = 0; i != NumSrcElts; ++i)
6362 Mask.push_back(i * Scale);
6371 if (!Amt || (*Amt % 8) != 0)
6381 for (
unsigned i = 0; i != NumSizeInBytes; i += NumBytesPerElt)
6382 for (
unsigned j = ByteShift; j != NumBytesPerElt; ++j)
6383 Mask[i + j] = i + j - ByteShift;
6385 for (
unsigned i = 0; i != NumSizeInBytes; i += NumBytesPerElt)
6386 for (
unsigned j = ByteShift; j != NumBytesPerElt; ++j)
6387 Mask[i + j - ByteShift] = i + j;
6393 uint64_t ShiftVal =
N.getConstantOperandVal(1);
6395 if (NumBitsPerElt <= ShiftVal) {
6401 if ((ShiftVal % 8) != 0)
6411 for (
unsigned i = 0; i != NumSizeInBytes; i += NumBytesPerElt)
6412 for (
unsigned j = ByteShift; j != NumBytesPerElt; ++j)
6413 Mask[i + j] = i + j - ByteShift;
6415 for (
unsigned i = 0; i != NumSizeInBytes; i += NumBytesPerElt)
6416 for (
unsigned j = ByteShift; j != NumBytesPerElt; ++j)
6417 Mask[i + j - ByteShift] = i + j;
6424 uint64_t RotateVal =
N.getConstantOperandAPInt(1).urem(NumBitsPerElt);
6425 if ((RotateVal % 8) != 0)
6428 int Offset = RotateVal / 8;
6430 for (
int i = 0; i != (int)NumElts; ++i) {
6431 int BaseIdx = i * NumBytesPerElt;
6432 for (
int j = 0; j != (int)NumBytesPerElt; ++j) {
6433 Mask.push_back(BaseIdx + ((
Offset + j) % NumBytesPerElt));
6440 if (!Src.getSimpleValueType().isVector()) {
6443 Src.getOperand(0).getValueType().getScalarType() !=
6446 Src = Src.getOperand(0);
6449 Mask.append(NumElts, 0);
6454 EVT SrcVT = Src.getValueType();
6459 (NumBitsPerSrcElt % 8) != 0)
6463 APInt DemandedSrcElts =
6468 assert((NumBitsPerElt % NumBitsPerSrcElt) == 0 &&
"Unexpected extension");
6469 unsigned Scale = NumBitsPerElt / NumBitsPerSrcElt;
6470 for (
unsigned I = 0;
I != NumElts; ++
I)
6471 Mask.append(Scale,
I);
6480 EVT SrcVT = Src.getValueType();
6502 int MaskWidth = Mask.size();
6504 for (
int i = 0, e = Inputs.
size(); i < e; ++i) {
6505 int lo = UsedInputs.
size() * MaskWidth;
6506 int hi = lo + MaskWidth;
6511 if ((lo <= M) && (M < hi))
6515 if (
none_of(Mask, [lo, hi](
int i) {
return (lo <= i) && (i < hi); })) {
6523 bool IsRepeat =
false;
6524 for (
int j = 0, ue = UsedInputs.
size(); j != ue; ++j) {
6525 if (UsedInputs[j] != Inputs[i])
6529 M = (M < hi) ? ((M - lo) + (j * MaskWidth)) : (M - MaskWidth);
6538 Inputs = UsedInputs;
6549 bool ResolveKnownElts) {
6553 EVT VT =
Op.getValueType();
6558 if (ResolveKnownElts)
6563 ResolveKnownElts)) {
6574 bool ResolveKnownElts) {
6575 APInt KnownUndef, KnownZero;
6577 KnownZero, DAG,
Depth, ResolveKnownElts);
6583 bool ResolveKnownElts =
true) {
6584 EVT VT =
Op.getValueType();
6588 unsigned NumElts =
Op.getValueType().getVectorNumElements();
6600 "Unknown broadcast load type");
6611 Opcode,
DL, Tys, Ops, MemVT,
6625 EVT VT =
Op.getValueType();
6626 unsigned Opcode =
Op.getOpcode();
6630 if (
auto *SV = dyn_cast<ShuffleVectorSDNode>(
Op)) {
6631 int Elt = SV->getMaskElt(Index);
6636 SDValue Src = (Elt < (int)NumElems) ? SV->getOperand(0) : SV->getOperand(1);
6650 int Elt = ShuffleMask[Index];
6657 assert(0 <= Elt && Elt < (2 * NumElems) &&
"Shuffle index out of range");
6666 uint64_t SubIdx =
Op.getConstantOperandVal(2);
6669 if (SubIdx <= Index && Index < (SubIdx + NumSubElts))
6676 EVT SubVT =
Op.getOperand(0).getValueType();
6678 uint64_t SubIdx = Index / NumSubElts;
6679 uint64_t SubElt = Index % NumSubElts;
6686 uint64_t SrcIdx =
Op.getConstantOperandVal(1);
6693 EVT SrcVT = Src.getValueType();
6704 isa<ConstantSDNode>(
Op.getOperand(2))) {
6705 if (
Op.getConstantOperandAPInt(2) == Index)
6706 return Op.getOperand(1);
6711 return (Index == 0) ?
Op.getOperand(0)
6715 return Op.getOperand(Index);
6722 const APInt &NonZeroMask,
6723 unsigned NumNonZero,
unsigned NumZero,
6726 MVT VT =
Op.getSimpleValueType();
6729 ((VT == MVT::v16i8 || VT == MVT::v4i32) && Subtarget.
hasSSE41())) &&
6730 "Illegal vector insertion");
6735 for (
unsigned i = 0; i < NumElts; ++i) {
6736 bool IsNonZero = NonZeroMask[i];
6745 if (NumZero || 0 != i)
6748 assert(0 == i &&
"Expected insertion into zero-index");
6764 const APInt &NonZeroMask,
6765 unsigned NumNonZero,
unsigned NumZero,
6768 if (NumNonZero > 8 && !Subtarget.
hasSSE41())
6782 for (
unsigned I = 0;
I != 4; ++
I) {
6783 if (!NonZeroMask[
I])
6791 assert(V &&
"Failed to fold v16i8 vector to zero");
6796 for (
unsigned i = V ? 4 : 0; i < 16; i += 2) {
6797 bool ThisIsNonZero = NonZeroMask[i];
6798 bool NextIsNonZero = NonZeroMask[i + 1];
6799 if (!ThisIsNonZero && !NextIsNonZero)
6803 if (ThisIsNonZero) {
6804 if (NumZero || NextIsNonZero)
6810 if (NextIsNonZero) {
6812 if (i == 0 && NumZero)
6828 if (i != 0 || NumZero)
6846 const APInt &NonZeroMask,
6847 unsigned NumNonZero,
unsigned NumZero,
6850 if (NumNonZero > 4 && !Subtarget.
hasSSE41())
6866 if (Subtarget.
hasSSE3() && !Subtarget.hasXOP() &&
6867 Op.getOperand(0) ==
Op.getOperand(2) &&
6868 Op.getOperand(1) ==
Op.getOperand(3) &&
6869 Op.getOperand(0) !=
Op.getOperand(1)) {
6870 MVT VT =
Op.getSimpleValueType();
6874 SDValue Ops[4] = {
Op.getOperand(0),
Op.getOperand(1),
6882 std::bitset<4> Zeroable, Undefs;
6883 for (
int i = 0; i < 4; ++i) {
6888 assert(Zeroable.size() - Zeroable.count() > 1 &&
6889 "We expect at least two non-zero elements!");
6894 unsigned FirstNonZeroIdx;
6895 for (
unsigned i = 0; i < 4; ++i) {
6906 if (!FirstNonZero.
getNode()) {
6908 FirstNonZeroIdx = i;
6912 assert(FirstNonZero.
getNode() &&
"Unexpected build vector of all zeros!");
6918 unsigned EltMaskIdx, EltIdx;
6920 for (EltIdx = 0; EltIdx < 4; ++EltIdx) {
6921 if (Zeroable[EltIdx]) {
6923 Mask[EltIdx] = EltIdx+4;
6927 Elt =
Op->getOperand(EltIdx);
6930 if (Elt.
getOperand(0) != V1 || EltMaskIdx != EltIdx)
6932 Mask[EltIdx] = EltIdx;
6937 SDValue VZeroOrUndef = (Zeroable == Undefs)
6950 if (Elt == FirstNonZero && EltIdx == FirstNonZeroIdx)
6953 bool CanFold =
true;
6954 for (
unsigned i = EltIdx + 1; i < 4 && CanFold; ++i) {
6968 assert(V1.
getNode() &&
"Expected at least two non-zero elements!");
6971 if (V2.getSimpleValueType() != MVT::v4f32)
6975 unsigned ZMask = Zeroable.to_ulong();
6977 unsigned InsertPSMask = EltMaskIdx << 6 | EltIdx << 4 | ZMask;
6978 assert((InsertPSMask & ~0xFFu) == 0 &&
"Invalid mask!");
6990 MVT ShVT = MVT::v16i8;
6993 assert(NumBits % 8 == 0 &&
"Only support byte sized shifts");
7008 EVT PVT = LD->getValueType(0);
7009 if (PVT != MVT::i32 && PVT != MVT::f32)
7015 FI = FINode->getIndex();
7018 isa<FrameIndexSDNode>(
Ptr.getOperand(0))) {
7019 FI = cast<FrameIndexSDNode>(
Ptr.getOperand(0))->getIndex();
7029 SDValue Chain = LD->getChain();
7033 if (!InferredAlign || *InferredAlign < RequiredAlign) {
7050 int64_t StartOffset =
Offset & ~int64_t(RequiredAlign.
value() - 1);
7057 int EltNo = (
Offset - StartOffset) >> 2;
7062 LD->getPointerInfo().getWithOffset(StartOffset));
7075 auto *BaseLd = cast<LoadSDNode>(Elt);
7076 if (!BaseLd->isSimple())
7089 if (
auto *AmtC = dyn_cast<ConstantSDNode>(Elt.
getOperand(1))) {
7090 uint64_t Amt = AmtC->getZExtValue();
7092 ByteOffset += Amt / 8;
7098 if (
auto *IdxC = dyn_cast<ConstantSDNode>(Elt.
getOperand(1))) {
7100 unsigned SrcSizeInBits = Src.getScalarValueSizeInBits();
7102 if (DstSizeInBits == SrcSizeInBits && (SrcSizeInBits % 8) == 0 &&
7105 ByteOffset +=
Idx * (SrcSizeInBits / 8);
7123 bool IsAfterLegalize) {
7127 unsigned NumElems = Elts.
size();
7129 int LastLoadedElt = -1;
7139 for (
unsigned i = 0; i < NumElems; ++i) {
7158 if (!
findEltLoadSrc(Elt, Loads[i], ByteOffsets[i]) || ByteOffsets[i] < 0)
7160 unsigned LoadSizeInBits = Loads[i]->getValueSizeInBits(0);
7161 if (((ByteOffsets[i] * 8) + EltSizeInBits) > LoadSizeInBits)
7169 "Incomplete element masks");
7172 if (UndefMask.
popcount() == NumElems)
7183 "Register/Memory size mismatch");
7185 assert(LDBase &&
"Did not find base load for merging consecutive loads");
7187 unsigned BaseSizeInBytes = BaseSizeInBits / 8;
7188 int NumLoadedElts = (1 + LastLoadedElt - FirstLoadedElt);
7189 int LoadSizeInBits = NumLoadedElts * BaseSizeInBits;
7190 assert((BaseSizeInBits % 8) == 0 &&
"Sub-byte element loads detected");
7193 if (ByteOffsets[FirstLoadedElt] != 0)
7200 int64_t ByteOffset = ByteOffsets[EltIdx];
7201 if (ByteOffset && (ByteOffset % BaseSizeInBytes) == 0) {
7202 int64_t BaseIdx = EltIdx - (ByteOffset / BaseSizeInBytes);
7203 return (0 <= BaseIdx && BaseIdx < (
int)NumElems && LoadMask[BaseIdx] &&
7204 Loads[BaseIdx] == Ld && ByteOffsets[BaseIdx] == 0);
7207 EltIdx - FirstLoadedElt);
7213 bool IsConsecutiveLoad =
true;
7214 bool IsConsecutiveLoadWithZeros =
true;
7215 for (
int i = FirstLoadedElt + 1; i <= LastLoadedElt; ++i) {
7217 if (!CheckConsecutiveLoad(LDBase, i)) {
7218 IsConsecutiveLoad =
false;
7219 IsConsecutiveLoadWithZeros =
false;
7222 }
else if (ZeroMask[i]) {
7223 IsConsecutiveLoad =
false;
7230 "Cannot merge volatile or atomic loads.");
7235 for (
auto *LD : Loads)
7250 if (FirstLoadedElt == 0 &&
7251 (NumLoadedElts == (
int)NumElems || IsDereferenceable) &&
7252 (IsConsecutiveLoad || IsConsecutiveLoadWithZeros)) {
7263 return DAG.
getBitcast(VT, Elts[FirstLoadedElt]);
7266 return CreateLoad(VT, LDBase);
7270 if (!IsAfterLegalize && VT.
isVector()) {
7272 if ((NumMaskElts % NumElems) == 0) {
7273 unsigned Scale = NumMaskElts / NumElems;
7275 for (
unsigned i = 0; i < NumElems; ++i) {
7278 int Offset = ZeroMask[i] ? NumMaskElts : 0;
7279 for (
unsigned j = 0; j != Scale; ++j)
7280 ClearMask[(i * Scale) + j] = (i * Scale) + j +
Offset;
7282 SDValue V = CreateLoad(VT, LDBase);
7292 unsigned HalfNumElems = NumElems / 2;
7298 DAG, Subtarget, IsAfterLegalize);
7306 if (IsConsecutiveLoad && FirstLoadedElt == 0 &&
7307 ((LoadSizeInBits == 16 && Subtarget.hasFP16()) || LoadSizeInBits == 32 ||
7308 LoadSizeInBits == 64) &&
7315 if (!Subtarget.
hasSSE2() && VT == MVT::v4f32)
7323 for (
auto *LD : Loads)
7334 for (
unsigned SubElems = 1; SubElems < NumElems; SubElems *= 2) {
7335 unsigned RepeatSize = SubElems * BaseSizeInBits;
7336 unsigned ScalarSize = std::min(RepeatSize, 64u);
7337 if (!Subtarget.
hasAVX2() && ScalarSize < 32)
7342 if (RepeatSize > ScalarSize && SubElems == 1)
7347 for (
unsigned i = 0; i != NumElems &&
Match; ++i) {
7351 if (RepeatedLoads[i % SubElems].
isUndef())
7352 RepeatedLoads[i % SubElems] = Elt;
7354 Match &= (RepeatedLoads[i % SubElems] == Elt);
7359 Match &= !RepeatedLoads.
back().isUndef();
7367 if (RepeatSize > ScalarSize)
7369 RepeatSize / ScalarSize);
7375 RepeatVT, RepeatedLoads,
DL, DAG, Subtarget, IsAfterLegalize)) {
7376 SDValue Broadcast = RepeatLoad;
7377 if (RepeatSize > ScalarSize) {
7405 bool IsAfterLegalize) {
7424 auto getConstantScalar = [&](
const APInt &Val) ->
Constant * {
7426 if (ScalarSize == 16)
7428 if (ScalarSize == 32)
7430 assert(ScalarSize == 64 &&
"Unsupported floating point scalar size");
7437 for (
unsigned I = 0, E = Bits.size();
I != E; ++
I)
7439 : getConstantScalar(Bits[
I]));
7448 auto getConstantScalar = [&](
const APInt &Val) ->
Constant * {
7450 if (ScalarSize == 16)
7452 if (ScalarSize == 32)
7454 assert(ScalarSize == 64 &&
"Unsupported floating point scalar size");
7460 if (ScalarSize == SplatBitSize)
7461 return getConstantScalar(SplatValue);
7463 unsigned NumElm = SplatBitSize / ScalarSize;
7465 for (
unsigned I = 0;
I != NumElm; ++
I) {
7467 ConstantVec.
push_back(getConstantScalar(Val));
7473 for (
auto *U :
N->users()) {
7474 unsigned Opc = U->getOpcode();
7484 if (
N->hasOneUse()) {
7516 "Unsupported vector type for broadcast.");
7523 assert((NumElts % Sequence.size()) == 0 &&
"Sequence doesn't fit.");
7524 if (Sequence.size() == 1)
7534 if (!Sequence.empty() && Subtarget.hasCDI()) {
7536 unsigned SeqLen = Sequence.size();
7537 bool UpperZeroOrUndef =
7542 if (UpperZeroOrUndef && ((Op0.getOpcode() ==
ISD::BITCAST) ||
7547 : Op0.getOperand(0).getOperand(0);
7550 if ((EltType == MVT::i64 && MaskVT == MVT::v8i1) ||
7551 (EltType == MVT::i32 && MaskVT == MVT::v16i1)) {
7565 unsigned NumUndefElts = UndefElements.
count();
7566 if (!Ld || (NumElts - NumUndefElts) <= 1) {
7567 APInt SplatValue, Undef;
7568 unsigned SplatBitSize;
7571 if (BVOp->
isConstantSplat(SplatValue, Undef, SplatBitSize, HasUndef) &&
7581 if (SplatBitSize == 32 || SplatBitSize == 64 ||
7582 (SplatBitSize < 32 && Subtarget.
hasAVX2())) {
7589 Align Alignment = cast<ConstantPoolSDNode>(CP)->getAlign();
7599 if (SplatBitSize > 64) {
7605 Align Alignment = cast<ConstantPoolSDNode>(VCP)->getAlign();
7611 Ops, VVT, MPI, Alignment,
7621 if (!Ld || NumElts - NumUndefElts != 1)
7624 if (!(UndefElements[0] || (ScalarSize != 32 && ScalarSize != 64)))
7628 bool ConstSplatVal =
7656 if (ConstSplatVal && (Subtarget.
hasAVX2() || OptForSize)) {
7664 if (ScalarSize == 32 ||
7665 (ScalarSize == 64 && (IsGE256 || Subtarget.hasVLX())) ||
7666 (CVT == MVT::f16 && Subtarget.
hasAVX2()) ||
7667 (OptForSize && (ScalarSize == 64 || Subtarget.
hasAVX2()))) {
7670 C = CI->getConstantIntValue();
7672 C = CF->getConstantFPValue();
7674 assert(
C &&
"Invalid constant type");
7678 Align Alignment = cast<ConstantPoolSDNode>(CP)->getAlign();
7691 (ScalarSize == 32 || (IsGE256 && ScalarSize == 64)))
7702 if (ScalarSize == 32 || (IsGE256 && ScalarSize == 64) ||
7703 (Subtarget.hasVLX() && ScalarSize == 64)) {
7704 auto *LN = cast<LoadSDNode>(Ld);
7706 SDValue Ops[] = {LN->getChain(), LN->getBasePtr()};
7709 LN->getMemoryVT(), LN->getMemOperand());
7717 (ScalarSize == 8 || ScalarSize == 16 || ScalarSize == 64)) {
7718 auto *LN = cast<LoadSDNode>(Ld);
7720 SDValue Ops[] = {LN->getChain(), LN->getBasePtr()};
7723 LN->getMemoryVT(), LN->getMemOperand());
7728 if (ScalarSize == 16 && Subtarget.hasFP16() && IsGE256)
7743 if (!isa<ShuffleVectorSDNode>(ExtractedFromVec))
7764 ExtractedFromVec = ShuffleVec;
7772 MVT VT =
Op.getSimpleValueType();
7785 for (
unsigned i = 0; i != NumElems; ++i) {
7786 unsigned Opc =
Op.getOperand(i).getOpcode();
7793 if (InsertIndices.
size() > 1)
7800 SDValue ExtractedFromVec =
Op.getOperand(i).getOperand(0);
7801 SDValue ExtIdx =
Op.getOperand(i).getOperand(1);
7804 if (!isa<ConstantSDNode>(ExtIdx))
7813 VecIn1 = ExtractedFromVec;
7814 else if (VecIn1 != ExtractedFromVec) {
7816 VecIn2 = ExtractedFromVec;
7817 else if (VecIn2 != ExtractedFromVec)
7822 if (ExtractedFromVec == VecIn1)
7824 else if (ExtractedFromVec == VecIn2)
7825 Mask[i] =
Idx + NumElems;
7834 for (
unsigned Idx : InsertIndices)
7844 MVT VT =
Op.getSimpleValueType();
7860 MVT VT =
Op.getSimpleValueType();
7862 "Unexpected type in LowerBUILD_VECTORvXi1!");
7869 bool IsSplat =
true;
7870 bool HasConstElts =
false;
7876 if (
auto *InC = dyn_cast<ConstantSDNode>(In)) {
7877 Immediate |= (InC->getZExtValue() & 0x1) << idx;
7878 HasConstElts =
true;
7884 else if (In !=
Op.getOperand(SplatIdx))
7895 assert(
Cond.getValueType() == MVT::i8 &&
"Unexpected VT!");
7901 if (VT == MVT::v64i1 && !Subtarget.is64Bit()) {
7922 if (VT == MVT::v64i1 && !Subtarget.is64Bit()) {
7939 for (
unsigned InsertIdx : NonConstIdx) {
7941 Op.getOperand(InsertIdx),
7982 unsigned BaseIdx,
unsigned LastIdx,
7984 EVT VT =
N->getValueType(0);
7986 assert(BaseIdx * 2 <= LastIdx &&
"Invalid Indices in input!");
7988 "Invalid Vector in input!");
7991 bool CanFold =
true;
7992 unsigned ExpectedVExtractIdx = BaseIdx;
7993 unsigned NumElts = LastIdx - BaseIdx;
7998 for (
unsigned i = 0, e = NumElts; i != e && CanFold; ++i) {
8002 if (
Op->isUndef()) {
8004 if (i * 2 == NumElts)
8005 ExpectedVExtractIdx = BaseIdx;
8006 ExpectedVExtractIdx += 2;
8010 CanFold =
Op->getOpcode() == Opcode &&
Op->hasOneUse();
8031 if (i * 2 < NumElts) {
8043 if (i * 2 == NumElts)
8044 ExpectedVExtractIdx = BaseIdx;
8048 if (I0 == ExpectedVExtractIdx)
8050 else if (IsCommutable && I1 == ExpectedVExtractIdx) {
8057 ExpectedVExtractIdx += 2;
8096 unsigned X86Opcode,
bool Mode,
8097 bool isUndefLO,
bool isUndefHI) {
8100 "Invalid nodes in input!");
8114 if (!isUndefLO && !V0->
isUndef())
8115 LO = DAG.
getNode(X86Opcode,
DL, NewVT, V0_LO, V0_HI);
8116 if (!isUndefHI && !V1->
isUndef())
8117 HI = DAG.
getNode(X86Opcode,
DL, NewVT, V1_LO, V1_HI);
8121 LO = DAG.
getNode(X86Opcode,
DL, NewVT, V0_LO, V1_LO);
8124 HI = DAG.
getNode(X86Opcode,
DL, NewVT, V0_HI, V1_HI);
8138 unsigned &NumExtracts,
8155 unsigned Opc[2] = {0, 0};
8156 for (
unsigned i = 0, e = NumElts; i != e; ++i) {
8160 unsigned Opcode =
Op.getOpcode();
8186 if (Opc[i % 2] != 0 && Opc[i % 2] != Opcode)
8188 Opc[i % 2] = Opcode;
8225 if (!Opc[0] || !Opc[1] || Opc[0] == Opc[1] ||
8265 unsigned ExpectedUses) {
8295 unsigned NumExtracts;
8307 return DAG.
getNode(Opc,
DL, VT, Opnd0, Opnd1, Opnd2);
8320 Mask.push_back(
I + E + 1);
8344 unsigned NumEltsIn128Bits = NumElts / Num128BitChunks;
8345 unsigned NumEltsIn64Bits = NumEltsIn128Bits / 2;
8346 for (
unsigned i = 0; i != Num128BitChunks; ++i) {
8347 for (
unsigned j = 0; j != NumEltsIn128Bits; ++j) {
8359 GenericOpcode =
Op.getOpcode();
8360 switch (GenericOpcode) {
8366 default:
return false;
8377 !isa<ConstantSDNode>(Op1.
getOperand(1)) || !
Op.hasOneUse())
8382 if (j < NumEltsIn64Bits) {
8390 SDValue SourceVec = (j < NumEltsIn64Bits) ? V0 : V1;
8397 unsigned ExpectedIndex = i * NumEltsIn128Bits +
8398 (j % NumEltsIn64Bits) * 2;
8399 if (ExpectedIndex == ExtIndex0 && ExtIndex1 == ExtIndex0 + 1)
8408 if (ExpectedIndex == ExtIndex1 && ExtIndex0 == ExtIndex1 + 1)
8439 for (
unsigned i = 0; i != NumElts; ++i)
8444 unsigned HalfNumElts = NumElts / 2;
8453 return DAG.
getNode(HOpcode,
DL, VT, V0, V1);
8461 unsigned NumNonUndefs =
8463 if (NumNonUndefs < 2)
8470 if (((VT == MVT::v4f32 || VT == MVT::v2f64) && Subtarget.
hasSSE3()) ||
8471 ((VT == MVT::v8i16 || VT == MVT::v4i32) && Subtarget.
hasSSSE3()) ||
8472 ((VT == MVT::v8f32 || VT == MVT::v4f64) && Subtarget.
hasAVX()) ||
8473 ((VT == MVT::v16i16 || VT == MVT::v8i32) && Subtarget.
hasAVX2())) {
8486 unsigned Half = NumElts / 2;
8487 unsigned NumUndefsLO = 0;
8488 unsigned NumUndefsHI = 0;
8489 for (
unsigned i = 0, e = Half; i != e; ++i)
8493 for (
unsigned i = Half, e = NumElts; i != e; ++i)
8498 if (VT == MVT::v8i32 || VT == MVT::v16i16) {
8501 bool CanFold =
true;
8522 if (NumUndefsLO + 1 == Half || NumUndefsHI + 1 == Half)
8531 bool isUndefLO = NumUndefsLO == Half;
8532 bool isUndefHI = NumUndefsHI == Half;
8538 if (VT == MVT::v8f32 || VT == MVT::v4f64 || VT == MVT::v8i32 ||
8539 VT == MVT::v16i16) {
8558 if (NumUndefsLO + 1 == Half || NumUndefsHI + 1 == Half)
8563 bool isUndefLO = NumUndefsLO == Half;
8564 bool isUndefHI = NumUndefsHI == Half;
8566 isUndefLO, isUndefHI);
8584 MVT VT =
Op->getSimpleValueType(0);
8590 unsigned Opcode =
Op->getOperand(0).getOpcode();
8591 for (
unsigned i = 1; i < NumElems; ++i)
8592 if (Opcode !=
Op->getOperand(i).getOpcode())
8596 bool IsShift =
false;
8610 if (
Op->getSplatValue())
8623 if (!isa<ConstantSDNode>(
RHS))
8640 if (IsShift &&
any_of(RHSElts, [&](
SDValue V) {
return RHSElts[0] != V; }))
8661 MVT VT =
Op.getSimpleValueType();
8671 if (VT == MVT::v4i32 || VT == MVT::v8i32 || VT == MVT::v16i32)
8693 "Illegal variable permute mask size");
8701 SDLoc(IndicesVec), SizeInBits);
8705 IndicesVT, IndicesVec);
8717 Subtarget, DAG,
SDLoc(IndicesVec));
8732 EVT SrcVT =
Idx.getValueType();
8742 for (
uint64_t i = 0; i != Scale; ++i) {
8743 IndexScale |= Scale << (i * NumDstBits);
8744 IndexOffset |= i << (i * NumDstBits);
8754 unsigned Opcode = 0;
8763 if (Subtarget.hasVLX() && Subtarget.hasBWI())
8767 ShuffleVT = MVT::v16i8;
8772 if (Subtarget.
hasAVX()) {
8774 ShuffleVT = MVT::v4f32;
8777 ShuffleVT = MVT::v16i8;
8782 if (Subtarget.
hasAVX()) {
8786 ShuffleVT = MVT::v2f64;
8792 DAG.getVectorShuffle(VT,
DL, SrcVec, SrcVec, {0, 0}),
8798 if (Subtarget.hasVLX() && Subtarget.hasVBMI())
8800 else if (Subtarget.hasXOP()) {
8809 }
else if (Subtarget.hasAVX()) {
8820 EVT VT =
Idx.getValueType();
8826 SDValue Ops[] = {LoLo, HiHi, IndicesVec};
8832 if (Subtarget.hasVLX() && Subtarget.hasBWI())
8834 else if (Subtarget.hasAVX()) {
8836 IndicesVec = ScaleIndices(IndicesVec, 2);
8839 MVT::v32i8, DAG.
getBitcast(MVT::v32i8, SrcVec),
8840 DAG.
getBitcast(MVT::v32i8, IndicesVec),
DL, DAG, Subtarget));
8845 if (Subtarget.hasAVX2())
8847 else if (Subtarget.hasAVX()) {
8850 {0, 1, 2, 3, 0, 1, 2, 3});
8852 {4, 5, 6, 7, 4, 5, 6, 7});
8853 if (Subtarget.hasXOP())
8869 if (Subtarget.hasAVX512()) {
8870 if (!Subtarget.hasVLX()) {
8872 SrcVec =
widenSubVector(WidenSrcVT, SrcVec,
false, Subtarget, DAG,
8874 IndicesVec =
widenSubVector(MVT::v8i64, IndicesVec,
false, Subtarget,
8875 DAG,
SDLoc(IndicesVec));
8881 }
else if (Subtarget.hasAVX()) {
8889 if (Subtarget.hasXOP())
8904 if (Subtarget.hasVBMI())
8908 if (Subtarget.hasBWI())
8915 if (Subtarget.hasAVX512())
8924 "Illegal variable permute shuffle type");
8928 IndicesVec = ScaleIndices(IndicesVec, Scale);
8931 IndicesVec = DAG.
getBitcast(ShuffleIdxVT, IndicesVec);
8935 ? DAG.
getNode(Opcode,
DL, ShuffleVT, IndicesVec, SrcVec)
8936 : DAG.
getNode(Opcode,
DL, ShuffleVT, SrcVec, IndicesVec);
8959 for (
unsigned Idx = 0, E = V.getNumOperands();
Idx != E; ++
Idx) {
8968 SrcVec =
Op.getOperand(0);
8969 else if (SrcVec !=
Op.getOperand(0))
8971 SDValue ExtractedIndex =
Op->getOperand(1);
8975 ExtractedIndex = ExtractedIndex.
getOperand(0);
8984 else if (IndicesVec != ExtractedIndex.
getOperand(0))
8987 auto *PermIdx = dyn_cast<ConstantSDNode>(ExtractedIndex.
getOperand(1));
8988 if (!PermIdx || PermIdx->getAPIntValue() !=
Idx)
8992 MVT VT = V.getSimpleValueType();
9000 MVT VT =
Op.getSimpleValueType();
9002 MVT OpEltVT =
Op.getOperand(0).getSimpleValueType();
9010 (Subtarget.hasAVXNECONVERT() || Subtarget.hasBF16()))
9021 bool IsAllConstants =
true;
9022 bool OneUseFrozenUndefs =
true;
9024 unsigned NumConstants = NumElems;
9025 for (
unsigned i = 0; i < NumElems; ++i) {
9032 OneUseFrozenUndefs = OneUseFrozenUndefs && Elt->
hasOneUse();
9033 FrozenUndefMask.
setBit(i);
9038 IsAllConstants =
false;
9053 if (OneUseFrozenUndefs && (UndefMask | FrozenUndefMask).isAllOnes())
9057 if ((UndefMask | FrozenUndefMask | ZeroMask).isAllOnes())
9065 if (
unsigned NumFrozenUndefElts = FrozenUndefMask.
popcount();
9066 NumFrozenUndefElts >= 2 && NumFrozenUndefElts < NumElems) {
9069 for (
unsigned i = 0; i < NumElems; ++i) {
9075 if (!FrozenUndefMask[i])
9076 Elts[i] =
Op.getOperand(i);
9078 BlendMask[i] += NumElems;
9093 unsigned UpperElems = NumElems / 2;
9094 APInt UndefOrZeroMask = FrozenUndefMask | UndefMask | ZeroMask;
9095 unsigned NumUpperUndefsOrZeros = UndefOrZeroMask.
countl_one();
9096 if (NumUpperUndefsOrZeros >= UpperElems) {
9098 NumUpperUndefsOrZeros >= (NumElems - (NumElems / 4)))
9099 UpperElems = NumElems - (NumElems / 4);
9101 bool UndefUpper = UndefMask.
countl_one() >= UpperElems;
9105 return widenSubVector(VT, NewBV, !UndefUpper, Subtarget, DAG, dl);
9112 return HorizontalOp;
9118 unsigned NumZero = ZeroMask.
popcount();
9119 unsigned NumNonZero = NonZeroMask.
popcount();
9127 if (NumConstants == NumElems - 1 && NumNonZero != 1 &&
9128 FrozenUndefMask.
isZero() &&
9135 Type *EltType =
Op.getValueType().getScalarType().getTypeForEVT(Context);
9139 for (
unsigned i = 0; i != NumElems; ++i) {
9141 if (
auto *
C = dyn_cast<ConstantSDNode>(Elt))
9142 ConstVecOps[i] = ConstantInt::get(Context,
C->getAPIntValue());
9143 else if (
auto *
C = dyn_cast<ConstantFPSDNode>(Elt))
9144 ConstVecOps[i] = ConstantFP::get(Context,
C->getValueAPF());
9147 "Expected one variable element in this vector");
9161 SDValue LegalDAGConstVec = LowerConstantPool(DAGConstVec, DAG);
9167 if (InsertC < NumEltsInLow128Bits)
9173 assert(Subtarget.
hasAVX() &&
"Must have AVX with >16-byte vector");
9176 for (
unsigned i = 0; i != NumElts; ++i)
9177 ShuffleMask.
push_back(i == InsertC ? NumElts : i);
9183 if (NumNonZero == 1) {
9195 if (EltVT == MVT::i32 || EltVT == MVT::f16 || EltVT == MVT::f32 ||
9196 EltVT == MVT::f64 || (EltVT == MVT::i64 && Subtarget.is64Bit()) ||
9197 (EltVT == MVT::i16 && Subtarget.hasFP16())) {
9200 "Expected an SSE value type!");
9209 if (EltVT == MVT::i16 || EltVT == MVT::i8) {
9219 if (NumElems == 2 &&
Idx == 1 &&
9225 VT,
Op.getOperand(1)),
9226 NumBits/2, DAG, *
this, dl);
9237 if (EVTBits == 32) {
9244 if (Values.
size() == 1) {
9245 if (EVTBits == 32) {
9252 if (
Op.getNode()->isOnlyUserOf(Item.
getNode()))
9277 if (Subtarget.
hasAVX2() && EVTBits == 32 && Values.
size() == 2) {
9278 SDValue Ops[4] = {
Op.getOperand(0),
Op.getOperand(1),
9282 for (
unsigned i = 2; i != NumElems; ++i)
9283 if (Ops[i % 2] !=
Op.getOperand(i))
9287 if (CanSplat(
Op, NumElems, Ops)) {
9309 HVT, dl,
Op->ops().slice(NumElems / 2, NumElems /2));
9316 if (EVTBits == 64) {
9317 if (NumNonZero == 1) {
9321 Op.getOperand(
Idx));
9328 if (EVTBits == 8 && NumElems == 16)
9330 NumZero, DAG, Subtarget))
9333 if (EltVT == MVT::i16 && NumElems == 8)
9335 NumZero, DAG, Subtarget))
9339 if (EVTBits == 32 && NumElems == 4)
9344 if (NumElems == 4 && NumZero > 0) {
9346 for (
unsigned i = 0; i < 4; ++i) {
9347 bool isZero = !NonZeroMask[i];
9354 for (
unsigned i = 0; i < 2; ++i) {
9361 Ops[i] = getMOVL(DAG, dl, VT, Ops[i*2+1], Ops[i*2]);
9364 Ops[i] = getMOVL(DAG, dl, VT, Ops[i*2], Ops[i*2+1]);
9367 Ops[i] =
getUnpackl(DAG, dl, VT, Ops[i*2], Ops[i*2+1]);
9377 static_cast<int>(Reverse2 ? NumElems+1 : NumElems),
9378 static_cast<int>(Reverse2 ? NumElems : NumElems+1)
9383 assert(Values.
size() > 1 &&
"Expected non-undef and non-splat vector");
9390 if (Subtarget.
hasSSE41() && EltVT != MVT::f16) {
9392 if (!
Op.getOperand(0).isUndef())
9397 for (
unsigned i = 1; i < NumElems; ++i) {
9398 if (
Op.getOperand(i).isUndef())
continue;
9409 for (
unsigned i = 0; i < NumElems; ++i) {
9410 if (!
Op.getOperand(i).isUndef())
9420 for (
unsigned Scale = 1; Scale < NumElems; Scale *= 2) {
9423 for(
unsigned i = 0; i != Scale; ++i)
9425 for (
unsigned i = 0; i != Scale; ++i)
9426 Mask.push_back(NumElems+i);
9429 for (
unsigned i = 0, e = NumElems / (2 * Scale); i !=
e; ++i)
9441 MVT ResVT =
Op.getSimpleValueType();
9444 ResVT.
is512BitVector()) &&
"Value type must be 256-/512-bit wide");
9447 unsigned NumFreezeUndef = 0;
9448 unsigned NumZero = 0;
9449 unsigned NumNonZero = 0;
9450 unsigned NonZeros = 0;
9451 for (
unsigned i = 0; i != NumOperands; ++i) {
9465 assert(i <
sizeof(NonZeros) * CHAR_BIT);
9472 if (NumNonZero > 2) {
9476 Ops.
slice(0, NumOperands/2));
9478 Ops.
slice(NumOperands/2));
9487 MVT SubVT =
Op.getOperand(0).getSimpleValueType();
9489 for (
unsigned i = 0; i != NumOperands; ++i) {
9490 if ((NonZeros & (1 << i)) == 0)
9508 MVT ResVT =
Op.getSimpleValueType();
9512 "Unexpected number of operands in CONCAT_VECTORS");
9516 for (
unsigned i = 0; i != NumOperands; ++i) {
9520 assert(i <
sizeof(NonZeros) * CHAR_BIT);
9532 if (
isPowerOf2_64(NonZeros) && Zeros != 0 && NonZeros > Zeros &&
9533 Log2_64(NonZeros) != NumOperands - 1) {
9557 if (NumOperands > 2) {
9561 Ops.
slice(0, NumOperands / 2));
9563 Ops.
slice(NumOperands / 2));
9582 MVT VT =
Op.getSimpleValueType();
9616 for (
int i = 0,
Size = Mask.size(); i <
Size; ++i) {
9617 assert(Mask[i] >= -1 &&
"Out of bound mask element!");
9618 if (Mask[i] >= 0 && Mask[i] != i)
9630 unsigned ScalarSizeInBits,
9632 assert(LaneSizeInBits && ScalarSizeInBits &&
9633 (LaneSizeInBits % ScalarSizeInBits) == 0 &&
9634 "Illegal shuffle lane size");
9635 int LaneSize = LaneSizeInBits / ScalarSizeInBits;
9636 int Size = Mask.size();
9637 for (
int i = 0; i <
Size; ++i)
9638 if (Mask[i] >= 0 && (Mask[i] %
Size) / LaneSize != i / LaneSize)
9653 unsigned ScalarSizeInBits,
9655 assert(LaneSizeInBits && ScalarSizeInBits &&
9656 (LaneSizeInBits % ScalarSizeInBits) == 0 &&
9657 "Illegal shuffle lane size");
9658 int NumElts = Mask.size();
9659 int NumEltsPerLane = LaneSizeInBits / ScalarSizeInBits;
9660 int NumLanes = NumElts / NumEltsPerLane;
9662 for (
int i = 0; i != NumLanes; ++i) {
9664 for (
int j = 0; j != NumEltsPerLane; ++j) {
9665 int M = Mask[(i * NumEltsPerLane) + j];
9668 int Lane = (M % NumElts) / NumEltsPerLane;
9669 if (SrcLane >= 0 && SrcLane != Lane)
9693 RepeatedMask.
assign(LaneSize, -1);
9694 int Size = Mask.size();
9695 for (
int i = 0; i <
Size; ++i) {
9699 if ((Mask[i] %
Size) / LaneSize != i / LaneSize)
9705 int LocalM = Mask[i] <
Size ? Mask[i] % LaneSize
9706 : Mask[i] % LaneSize + LaneSize;
9707 if (RepeatedMask[i % LaneSize] < 0)
9709 RepeatedMask[i % LaneSize] = LocalM;
9710 else if (RepeatedMask[i % LaneSize] != LocalM)
9740 unsigned EltSizeInBits,
9743 int LaneSize = LaneSizeInBits / EltSizeInBits;
9745 int Size = Mask.size();
9746 for (
int i = 0; i <
Size; ++i) {
9756 if ((Mask[i] %
Size) / LaneSize != i / LaneSize)
9762 int LaneM = Mask[i] /
Size;
9763 int LocalM = (Mask[i] % LaneSize) + (LaneM * LaneSize);
9766 RepeatedMask[i % LaneSize] = LocalM;
9767 else if (RepeatedMask[i % LaneSize] != LocalM)
9780 Mask, RepeatedMask);
9786 int Idx,
int ExpectedIdx) {
9787 assert(0 <=
Idx &&
Idx < MaskSize && 0 <= ExpectedIdx &&
9788 ExpectedIdx < MaskSize &&
"Out of range element index");
9789 if (!
Op || !ExpectedOp ||
Op.getOpcode() != ExpectedOp.
getOpcode())
9792 switch (
Op.getOpcode()) {
9804 return (
Op == ExpectedOp &&
9805 (
int)
Op.getValueType().getVectorNumElements() == MaskSize);
9815 if (
Op == ExpectedOp &&
Op.getOperand(0) ==
Op.getOperand(1)) {
9816 MVT VT =
Op.getSimpleValueType();
9818 if (MaskSize == NumElts) {
9820 int NumEltsPerLane = NumElts / NumLanes;
9821 int NumHalfEltsPerLane = NumEltsPerLane / 2;
9823 (
Idx / NumEltsPerLane) == (ExpectedIdx / NumEltsPerLane);
9825 (
Idx % NumHalfEltsPerLane) == (ExpectedIdx % NumHalfEltsPerLane);
9826 return SameLane && SameElt;
9848 int Size = Mask.size();
9849 if (
Size != (
int)ExpectedMask.
size())
9852 for (
int i = 0; i <
Size; ++i) {
9853 assert(Mask[i] >= -1 &&
"Out of bound mask element!");
9854 int MaskIdx = Mask[i];
9855 int ExpectedIdx = ExpectedMask[i];
9856 if (0 <= MaskIdx && MaskIdx != ExpectedIdx) {
9859 MaskIdx = MaskIdx <
Size ? MaskIdx : (MaskIdx -
Size);
9860 ExpectedIdx = ExpectedIdx <
Size ? ExpectedIdx : (ExpectedIdx -
Size);
9882 int Size = Mask.size();
9883 if (
Size != (
int)ExpectedMask.
size())
9887 "Illegal target shuffle mask");
9895 !V1.getValueType().isVector()))
9898 !V2.getValueType().isVector()))
9904 for (
int i = 0; i <
Size; ++i) {
9905 int MaskIdx = Mask[i];
9906 int ExpectedIdx = ExpectedMask[i];
9916 int BitIdx = ExpectedIdx <
Size ? ExpectedIdx : (ExpectedIdx -
Size);
9917 APInt &ZeroMask = ExpectedIdx <
Size ? ZeroV1 : ZeroV2;
9925 MaskIdx = MaskIdx <
Size ? MaskIdx : (MaskIdx -
Size);
9926 ExpectedIdx = ExpectedIdx <
Size ? ExpectedIdx : (ExpectedIdx -
Size);
9940 if (VT != MVT::v8i32 && VT != MVT::v8f32)
9951 return IsUnpackwdMask;
9965 for (
unsigned i = 0; i != 4; ++i) {
9980 assert(Mask.size() % 2 == 0 &&
"Expecting even number of elements in mask");
9981 unsigned HalfSize = Mask.size() / 2;
9982 for (
unsigned i = 0; i != HalfSize; ++i) {
9983 if (Mask[i] != Mask[i + HalfSize])
9998 assert(Mask.size() == 4 &&
"Only 4-lane shuffle masks");
9999 assert(Mask[0] >= -1 && Mask[0] < 4 &&
"Out of bound mask element!");
10000 assert(Mask[1] >= -1 && Mask[1] < 4 &&
"Out of bound mask element!");
10001 assert(Mask[2] >= -1 && Mask[2] < 4 &&
"Out of bound mask element!");
10002 assert(Mask[3] >= -1 && Mask[3] < 4 &&
"Out of bound mask element!");
10006 int FirstIndex =
find_if(Mask, [](
int M) {
return M >= 0; }) - Mask.begin();
10007 assert(0 <= FirstIndex && FirstIndex < 4 &&
"All undef shuffle mask");
10009 int FirstElt = Mask[FirstIndex];
10010 if (
all_of(Mask, [FirstElt](
int M) {
return M < 0 || M == FirstElt; }))
10011 return (FirstElt << 6) | (FirstElt << 4) | (FirstElt << 2) | FirstElt;
10014 Imm |= (Mask[0] < 0 ? 0 : Mask[0]) << 0;
10015 Imm |= (Mask[1] < 0 ? 1 : Mask[1]) << 2;
10016 Imm |= (Mask[2] < 0 ? 2 : Mask[2]) << 4;
10017 Imm |= (Mask[3] < 0 ? 3 : Mask[3]) << 6;
10029 assert((Mask.size() == 2 || Mask.size() == 4 || Mask.size() == 8) &&
10030 "Unexpected SHUFPD mask size");
10031 assert(
all_of(Mask, [](
int M) {
return -1 <= M && M <= 1; }) &&
10032 "Unexpected SHUFPD mask elements");
10036 int FirstIndex =
find_if(Mask, [](
int M) {
return M >= 0; }) - Mask.begin();
10037 assert(0 <= FirstIndex && FirstIndex < (
int)Mask.size() &&
10038 "All undef shuffle mask");
10040 int FirstElt = Mask[FirstIndex];
10041 if (
all_of(Mask, [FirstElt](
int M) {
return M < 0 || M == FirstElt; }) &&
10042 count_if(Mask, [FirstElt](
int M) {
return M == FirstElt; }) > 1) {
10044 for (
unsigned I = 0, E = Mask.size();
I != E; ++
I)
10045 Imm |= FirstElt <<
I;
10052 for (
unsigned I = 0, E = Mask.size();
I != E; ++
I)
10053 Imm |= (Mask[
I] < 0 ? (
I & 1) : Mask[
I]) <<
I;
10072 bool &IsZeroSideLeft) {
10073 int NextElement = -1;
10075 for (
int i = 0, e = Mask.size(); i < e; i++) {
10077 assert(Mask[i] >= -1 &&
"Out of bound mask element!");
10083 if (NextElement < 0) {
10084 NextElement = Mask[i] != 0 ?
VectorType.getVectorNumElements() : 0;
10085 IsZeroSideLeft = NextElement != 0;
10088 if (NextElement != Mask[i])
10101 int Size = Mask.size();
10115 for (
int i = 0; i < NumBytes; ++i) {
10116 int M = Mask[i / NumEltBytes];
10118 PSHUFBMask[i] = DAG.
getUNDEF(MVT::i8);
10121 if (Zeroable[i / NumEltBytes]) {
10122 PSHUFBMask[i] = ZeroMask;
10128 if (V && V != SrcV)
10134 if ((M / LaneSize) != ((i / NumEltBytes) / LaneSize))
10138 M = M * NumEltBytes + (i % NumEltBytes);
10141 assert(V &&
"Failed to find a source input");
10156 const APInt &Zeroable,
10159 bool IsLeftZeroSide =
true;
10163 unsigned VEXPANDMask = (~Zeroable).getZExtValue();
10168 assert((NumElts == 4 || NumElts == 8 || NumElts == 16) &&
10169 "Unexpected number of vector elements");
10171 Subtarget, DAG,
DL);
10173 SDValue ExpandedVector = IsLeftZeroSide ? V2 : V1;
10178 unsigned &UnpackOpcode,
bool IsUnary,
10184 bool Undef1 =
true, Undef2 =
true, Zero1 =
true, Zero2 =
true;
10185 for (
int i = 0; i != NumElts; i += 2) {
10186 int M1 = TargetMask[i + 0];
10187 int M2 = TargetMask[i + 1];
10193 assert(!((Undef1 || Zero1) && (Undef2 || Zero2)) &&
10194 "Zeroable shuffle detected");
10200 (IsUnary ? V1 : V2))) {
10202 V2 = (Undef2 ? DAG.
getUNDEF(VT) : (IsUnary ? V1 : V2));
10203 V1 = (Undef1 ? DAG.
getUNDEF(VT) : V1);
10209 (IsUnary ? V1 : V2))) {
10211 V2 = (Undef2 ? DAG.
getUNDEF(VT) : (IsUnary ? V1 : V2));
10212 V1 = (Undef1 ? DAG.
getUNDEF(VT) : V1);
10217 if (IsUnary && (Zero1 || Zero2)) {
10219 if ((Subtarget.
hasSSE41() || VT == MVT::v2i64 || VT == MVT::v2f64) &&
10223 bool MatchLo =
true, MatchHi =
true;
10224 for (
int i = 0; (i != NumElts) && (MatchLo || MatchHi); ++i) {
10225 int M = TargetMask[i];
10228 if ((((i & 1) == 0) && Zero1) || (((i & 1) == 1) && Zero2) ||
10232 MatchLo &= (M == Unpckl[i]);
10233 MatchHi &= (M == Unpckh[i]);
10236 if (MatchLo || MatchHi) {
10300 unsigned UnpackOpcode;
10312 DAG.
getUNDEF(MVT::v4f64), {0, 2, 1, 3});
10314 return DAG.
getNode(UnpackOpcode,
DL, VT, V1, V1);
10325 unsigned NumElts = Mask.size();
10327 unsigned MaxScale = 64 / EltSizeInBits;
10329 for (
unsigned Scale = 2; Scale <= MaxScale; Scale += Scale) {
10330 unsigned SrcEltBits = EltSizeInBits * Scale;
10331 if (SrcEltBits < 32 && !Subtarget.hasBWI())
10333 unsigned NumSrcElts = NumElts / Scale;
10336 unsigned UpperElts = NumElts - NumSrcElts;
10342 if ((NumSrcElts * EltSizeInBits) >= 128) {
10360 MVT SrcVT = Src.getSimpleValueType();
10370 if (NumSrcElts == NumDstElts)
10373 if (NumSrcElts > NumDstElts) {
10379 if ((NumSrcElts * DstEltSizeInBits) >= 128) {
10396 if (DstVT != TruncVT)
10420 const APInt &Zeroable,
10423 assert((VT == MVT::v16i8 || VT == MVT::v8i16) &&
"Unexpected VTRUNC type");
10429 unsigned MaxScale = 64 / EltSizeInBits;
10430 for (
unsigned Scale = 2; Scale <= MaxScale; Scale += Scale) {
10431 unsigned SrcEltBits = EltSizeInBits * Scale;
10432 unsigned NumSrcElts = NumElts / Scale;
10433 unsigned UpperElts = NumElts - NumSrcElts;
10442 Src.getScalarValueSizeInBits() == SrcEltBits) {
10443 Src = Src.getOperand(0);
10444 }
else if (Subtarget.hasVLX()) {
10457 if (!Subtarget.hasBWI() && Src.getScalarValueSizeInBits() < 32)
10470 const APInt &Zeroable,
10474 "Unexpected VTRUNC type");
10480 unsigned MaxScale = 64 / EltSizeInBits;
10481 for (
unsigned Scale = 2; Scale <= MaxScale; Scale += Scale) {
10483 unsigned SrcEltBits = EltSizeInBits * Scale;
10484 if (SrcEltBits < 32 && !Subtarget.hasBWI())
10489 unsigned NumHalfSrcElts = NumElts / Scale;
10490 unsigned NumSrcElts = 2 * NumHalfSrcElts;
10497 unsigned UpperElts = NumElts - NumSrcElts;
10498 if (UpperElts > 0 &&
10509 return Lo.getOperand(0) ==
Hi.getOperand(0);
10512 auto *LDLo = cast<LoadSDNode>(
Lo);
10513 auto *LDHi = cast<LoadSDNode>(
Hi);
10515 LDHi, LDLo,
Lo.getValueType().getStoreSize(), 1);
10573 bool IsSingleInput) {
10576 int ShuffleModulus = Mask.size() * (IsSingleInput ? 1 : 2);
10578 "We should only be called with masks with a power-of-2 size!");
10581 int Offset = MatchEven ? 0 : 1;
10586 bool ViableForN[3] = {
true,
true,
true};
10588 for (
int i = 0, e = Mask.size(); i < e; ++i) {
10594 bool IsAnyViable =
false;
10595 for (
unsigned j = 0; j != std::size(ViableForN); ++j)
10596 if (ViableForN[j]) {
10601 IsAnyViable =
true;
10603 ViableForN[j] =
false;
10610 for (
unsigned j = 0; j != std::size(ViableForN); ++j)
10626 unsigned MaxStages = 1) {
10629 assert(0 < MaxStages && MaxStages <= 3 && (BitSize << MaxStages) <= 64 &&
10630 "Illegal maximum compaction");
10633 unsigned NumSrcBits = PackVT.getScalarSizeInBits();
10634 unsigned NumPackedBits = NumSrcBits - BitSize;
10638 unsigned NumBits2 = N2.getScalarValueSizeInBits();
10641 if ((!N1.
isUndef() && !IsZero1 && NumBits1 != NumSrcBits) ||
10642 (!N2.isUndef() && !IsZero2 && NumBits2 != NumSrcBits))
10644 if (Subtarget.
hasSSE41() || BitSize == 8) {
10657 if ((N1.
isUndef() || IsZero1 || IsAllOnes1 ||
10659 (N2.isUndef() || IsZero2 || IsAllOnes2 ||
10671 for (
unsigned NumStages = 1; NumStages <= MaxStages; ++NumStages) {
10679 if (MatchPACK(V1, V2, PackVT))
10686 if (MatchPACK(V1, V1, PackVT))
10698 unsigned PackOpcode;
10701 unsigned MaxStages =
Log2_32(64 / EltBits);
10703 Subtarget, MaxStages))
10707 unsigned NumStages =
Log2_32(CurrentEltBits / EltBits);
10710 if (NumStages != 1 && SizeBits == 128 && Subtarget.hasVLX())
10715 unsigned MaxPackBits = 16;
10716 if (CurrentEltBits > 16 &&
10722 for (
unsigned i = 0; i != NumStages; ++i) {
10723 unsigned SrcEltBits = std::min(MaxPackBits, CurrentEltBits);
10724 unsigned NumSrcElts = SizeBits / SrcEltBits;
10732 CurrentEltBits /= 2;
10735 "Failed to lower compaction shuffle");
10745 const APInt &Zeroable,
10752 if (EltVT == MVT::i64 && !Subtarget.is64Bit()) {
10758 if (EltVT == MVT::f32 || EltVT == MVT::f64) {
10771 for (
int i = 0,
Size = Mask.size(); i <
Size; ++i) {
10774 if (Mask[i] %
Size != i)
10777 V = Mask[i] <
Size ? V1 : V2;
10778 else if (V != (Mask[i] <
Size ? V1 : V2))
10806 for (
int i = 0,
Size = Mask.size(); i <
Size; ++i) {
10807 if (Mask[i] >= 0 && Mask[i] != i && Mask[i] != i +
Size)
10823 const APInt &Zeroable,
bool &ForceV1Zero,
10824 bool &ForceV2Zero,
uint64_t &BlendMask) {
10825 bool V1IsZeroOrUndef =
10827 bool V2IsZeroOrUndef =
10831 ForceV1Zero =
false, ForceV2Zero =
false;
10832 assert(Mask.size() <= 64 &&
"Shuffle mask too big for blend mask");
10834 int NumElts = Mask.size();
10836 int NumEltsPerLane = NumElts / NumLanes;
10837 assert((NumLanes * NumEltsPerLane) == NumElts &&
"Value type mismatch");
10841 bool ForceWholeLaneMasks =
10846 for (
int Lane = 0; Lane != NumLanes; ++Lane) {
10848 bool LaneV1InUse =
false;
10849 bool LaneV2InUse =
false;
10851 for (
int LaneElt = 0; LaneElt != NumEltsPerLane; ++LaneElt) {
10852 int Elt = (Lane * NumEltsPerLane) + LaneElt;
10856 if (M == Elt || (0 <= M && M < NumElts &&
10859 LaneV1InUse =
true;
10862 if (M == (Elt + NumElts) ||
10865 LaneBlendMask |= 1ull << LaneElt;
10866 Mask[Elt] = Elt + NumElts;
10867 LaneV2InUse =
true;
10870 if (Zeroable[Elt]) {
10871 if (V1IsZeroOrUndef) {
10872 ForceV1Zero =
true;
10874 LaneV1InUse =
true;
10877 if (V2IsZeroOrUndef) {
10878 ForceV2Zero =
true;
10879 LaneBlendMask |= 1ull << LaneElt;
10880 Mask[Elt] = Elt + NumElts;
10881 LaneV2InUse =
true;
10891 if (ForceWholeLaneMasks && LaneV2InUse && !LaneV1InUse)
10892 LaneBlendMask = (1ull << NumEltsPerLane) - 1;
10894 BlendMask |= LaneBlendMask << (Lane * NumEltsPerLane);
10907 const APInt &Zeroable,
10911 bool ForceV1Zero =
false, ForceV2Zero =
false;
10928 assert(Subtarget.
hasAVX2() &&
"256-bit integer blends require AVX2!");
10932 assert(Subtarget.
hasAVX() &&
"256-bit float blends require AVX!");
10939 assert(Subtarget.
hasSSE41() &&
"128-bit blends require SSE41!");
10942 case MVT::v16i16: {
10943 assert(Subtarget.
hasAVX2() &&
"v16i16 blends require AVX2!");
10947 assert(RepeatedMask.
size() == 8 &&
"Repeated mask size doesn't match!");
10949 for (
int i = 0; i < 8; ++i)
10950 if (RepeatedMask[i] >= 8)
10951 BlendMask |= 1ull << i;
10958 uint64_t LoMask = BlendMask & 0xFF;
10959 uint64_t HiMask = (BlendMask >> 8) & 0xFF;
10960 if (LoMask == 0 || LoMask == 255 || HiMask == 0 || HiMask == 255) {
10966 MVT::v16i16,
DL,
Lo,
Hi,
10967 {0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31});
10972 assert(Subtarget.
hasAVX2() &&
"256-bit byte-blends require AVX2!");
10975 assert(Subtarget.
hasSSE41() &&
"128-bit byte-blends require SSE41!");
10982 if (Subtarget.hasBWI() && Subtarget.hasVLX()) {
10989 if (Subtarget.hasVLX())
11022 for (
int i = 0,
Size = Mask.size(); i <
Size; ++i)
11023 for (
int j = 0; j < Scale; ++j)
11070 bool ImmBlends =
false) {
11076 for (
int i = 0,
Size = Mask.size(); i <
Size; ++i) {
11080 assert(Mask[i] <
Size * 2 &&
"Shuffle input is out of bounds.");
11082 if (BlendMask[Mask[i] %
Size] < 0)
11083 BlendMask[Mask[i] %
Size] = Mask[i];
11084 else if (BlendMask[Mask[i] %
Size] != Mask[i])
11087 PermuteMask[i] = Mask[i] %
Size;
11109 int NumElts = Mask.size();
11111 int NumLaneElts = NumElts / NumLanes;
11112 int NumHalfLaneElts = NumLaneElts / 2;
11114 bool MatchLo =
true, MatchHi =
true;
11118 for (
int Elt = 0; Elt != NumElts; ++Elt) {
11126 if (M < NumElts && (
Op.isUndef() ||
Op == V1))
11128 else if (NumElts <= M && (
Op.isUndef() ||
Op == V2)) {
11134 bool MatchLoAnyLane =
false, MatchHiAnyLane =
false;
11135 for (
int Lane = 0; Lane != NumElts; Lane += NumLaneElts) {
11136 int Lo = Lane, Mid = Lane + NumHalfLaneElts,
Hi = Lane + NumLaneElts;
11139 if (MatchLoAnyLane || MatchHiAnyLane) {
11140 assert((MatchLoAnyLane ^ MatchHiAnyLane) &&
11141 "Failed to match UNPCKLO/UNPCKHI");
11145 MatchLo &= MatchLoAnyLane;
11146 MatchHi &= MatchHiAnyLane;
11147 if (!MatchLo && !MatchHi)
11150 assert((MatchLo ^ MatchHi) &&
"Failed to match UNPCKLO/UNPCKHI");
11156 for (
int Elt = 0; Elt != NumElts; ++Elt) {
11163 bool IsFirstOp = M < NumElts;
11165 NumLaneElts * (NormM / NumLaneElts) + (2 * (NormM % NumHalfLaneElts));
11166 if ((IsFirstOp && V1 == Ops[0]) || (!IsFirstOp && V2 == Ops[0]))
11167 PermuteMask[Elt] = BaseMaskElt;
11168 else if ((IsFirstOp && V1 == Ops[1]) || (!IsFirstOp && V2 == Ops[1]))
11169 PermuteMask[Elt] = BaseMaskElt + 1;
11170 assert(PermuteMask[Elt] != -1 &&
11171 "Input mask element is defined but failed to assign permute mask");
11193 int Size = Mask.size();
11194 assert(Mask.size() >= 2 &&
"Single element masks are invalid.");
11205 bool UnpackLo = NumLoInputs >= NumHiInputs;
11207 auto TryUnpack = [&](
int ScalarSize,
int Scale) {
11211 for (
int i = 0; i <
Size; ++i) {
11216 int UnpackIdx = i / Scale;
11220 if ((UnpackIdx % 2 == 0) != (Mask[i] <
Size))
11226 VMask[(UnpackIdx / 2) * Scale + i % Scale + (UnpackLo ? 0 :
Size / 2)] =
11249 UnpackVT, V1, V2));
11255 for (
int ScalarSize = 64; ScalarSize >= OrigScalarSize; ScalarSize /= 2)
11256 if (
SDValue Unpack = TryUnpack(ScalarSize, ScalarSize / OrigScalarSize))
11267 if (NumLoInputs == 0 || NumHiInputs == 0) {
11268 assert((NumLoInputs > 0 || NumHiInputs > 0) &&
11269 "We have to have *some* inputs!");
11270 int HalfOffset = NumLoInputs == 0 ?
Size / 2 : 0;
11278 for (
int i = 0; i <
Size; ++i) {
11282 assert(Mask[i] %
Size >= HalfOffset &&
"Found input from wrong half!");
11285 2 * ((Mask[i] %
Size) - HalfOffset) + (Mask[i] <
Size ? 0 : 1);
11314 int NumEltsPerLane = NumElts / NumLanes;
11317 bool Blend1 =
true;
11318 bool Blend2 =
true;
11319 std::pair<int, int> Range1 = std::make_pair(INT_MAX, INT_MIN);
11320 std::pair<int, int> Range2 = std::make_pair(INT_MAX, INT_MIN);
11321 for (
int Lane = 0; Lane != NumElts; Lane += NumEltsPerLane) {
11322 for (
int Elt = 0; Elt != NumEltsPerLane; ++Elt) {
11323 int M = Mask[Lane + Elt];
11327 Blend1 &= (M == (Lane + Elt));
11328 assert(Lane <= M && M < (Lane + NumEltsPerLane) &&
"Out of range mask");
11329 M = M % NumEltsPerLane;
11330 Range1.first = std::min(Range1.first, M);
11331 Range1.second = std::max(Range1.second, M);
11334 Blend2 &= (M == (Lane + Elt));
11335 assert(Lane <= M && M < (Lane + NumEltsPerLane) &&
"Out of range mask");
11336 M = M % NumEltsPerLane;
11337 Range2.first = std::min(Range2.first, M);
11338 Range2.second = std::max(Range2.second, M);
11346 if (!(0 <= Range1.first && Range1.second < NumEltsPerLane) ||
11347 !(0 <= Range2.first && Range2.second < NumEltsPerLane))
11361 for (
int Lane = 0; Lane != NumElts; Lane += NumEltsPerLane) {
11362 for (
int Elt = 0; Elt != NumEltsPerLane; ++Elt) {
11363 int M = Mask[Lane + Elt];
11367 PermMask[Lane + Elt] = Lane + ((M + Ofs - RotAmt) % NumEltsPerLane);
11369 PermMask[Lane + Elt] = Lane + ((M - Ofs - RotAmt) % NumEltsPerLane);
11376 if (Range2.second < Range1.first)
11377 return RotateAndPermute(V1, V2, Range1.first, 0);
11378 if (Range1.second < Range2.first)
11379 return RotateAndPermute(V2, V1, Range2.first, NumElts);
11393 size_t NumUndefs = 0;
11394 std::optional<int> UniqueElt;
11395 for (
int Elt : Mask) {
11400 if (UniqueElt.has_value() && UniqueElt.value() != Elt)
11406 return NumUndefs <= Mask.size() / 2 && UniqueElt.has_value();
11419 int NumElts = Mask.size();
11421 int NumEltsPerLane = NumElts / NumLanes;
11425 bool IsAlternating =
true;
11426 bool V1Zero =
true, V2Zero =
true;
11430 for (
int i = 0; i < NumElts; ++i) {
11432 if (M >= 0 && M < NumElts) {
11435 V1Zero &= Zeroable[i];
11436 IsAlternating &= (i & 1) == 0;
11437 }
else if (M >= NumElts) {
11438 V2Mask[i] = M - NumElts;
11439 FinalMask[i] = i + NumElts;
11440 V2Zero &= Zeroable[i];
11441 IsAlternating &= (i & 1) == 1;
11448 auto canonicalizeBroadcastableInput = [
DL, VT, &Subtarget,
11451 unsigned EltSizeInBits = Input.getScalarValueSizeInBits();
11452 if (!Subtarget.
hasAVX2() && (!Subtarget.
hasAVX() || EltSizeInBits < 32 ||
11458 "Expected to demand only the 0'th element.");
11461 int &InputMaskElt =
I.value();
11462 if (InputMaskElt >= 0)
11463 InputMaskElt =
I.index();
11473 canonicalizeBroadcastableInput(V1, V1Mask);
11474 canonicalizeBroadcastableInput(V2, V2Mask);
11499 DL, VT, V1, V2, Mask, Subtarget, DAG))
11507 DL, VT, V1, V2, Mask, Subtarget, DAG))
11516 V1Mask.
assign(NumElts, -1);
11517 V2Mask.
assign(NumElts, -1);
11518 FinalMask.
assign(NumElts, -1);
11519 for (
int i = 0; i != NumElts; i += NumEltsPerLane)
11520 for (
int j = 0; j != NumEltsPerLane; ++j) {
11521 int M = Mask[i + j];
11522 if (M >= 0 && M < NumElts) {
11523 V1Mask[i + (j / 2)] = M;
11524 FinalMask[i + j] = i + (j / 2);
11525 }
else if (M >= NumElts) {
11526 V2Mask[i + (j / 2)] = M - NumElts;
11527 FinalMask[i + j] = i + (j / 2) + NumElts;
11541 assert(EltSizeInBits < 64 &&
"Can't rotate 64-bit integers");
11544 int MinSubElts = Subtarget.
hasAVX512() ? std::max(32 / EltSizeInBits, 2) : 2;
11545 int MaxSubElts = 64 / EltSizeInBits;
11546 unsigned RotateAmt, NumSubElts;
11548 MaxSubElts, NumSubElts, RotateAmt))
11550 unsigned NumElts = Mask.size();
11565 if (!IsLegal && Subtarget.
hasSSE3())
11578 if ((RotateAmt % 16) == 0)
11581 unsigned ShlAmt = RotateAmt;
11603 int NumElts = Mask.size();
11614 for (
int i = 0; i < NumElts; ++i) {
11617 "Unexpected mask index.");
11622 int StartIdx = i - (M % NumElts);
11630 int CandidateRotation = StartIdx < 0 ? -StartIdx : NumElts - StartIdx;
11633 Rotation = CandidateRotation;
11634 else if (Rotation != CandidateRotation)
11639 SDValue MaskV = M < NumElts ? V1 : V2;
11650 else if (TargetV != MaskV)
11657 assert(Rotation != 0 &&
"Failed to locate a viable rotation!");
11658 assert((
Lo ||
Hi) &&
"Failed to find a rotated input vector!");
11703 int NumElts = RepeatedMask.
size();
11704 int Scale = 16 / NumElts;
11705 return Rotation * Scale;
11716 if (ByteRotation <= 0)
11728 "512-bit PALIGNR requires BWI instructions");
11735 "Rotate-based lowering only supports 128-bit lowering!");
11736 assert(Mask.size() <= 16 &&
11737 "Can shuffle at most 16 bytes in a 128-bit vector!");
11738 assert(ByteVT == MVT::v16i8 &&
11739 "SSE2 rotate lowering only needed for v16i8!");
11742 int LoByteShift = 16 - ByteRotation;
11743 int HiByteShift = ByteRotation;
11767 const APInt &Zeroable,
11771 "Only 32-bit and 64-bit elements are supported!");
11775 &&
"VLX required for 128/256-bit vectors");
11787 unsigned NumElts = Mask.size();
11790 assert((ZeroLo + ZeroHi) < NumElts &&
"Zeroable shuffle detected");
11791 if (!ZeroLo && !ZeroHi)
11795 SDValue Src = Mask[ZeroLo] < (int)NumElts ? V1 : V2;
11796 int Low = Mask[ZeroLo] < (int)NumElts ? 0 : NumElts;
11804 SDValue Src = Mask[0] < (int)NumElts ? V1 : V2;
11805 int Low = Mask[0] < (int)NumElts ? 0 : NumElts;
11818 const APInt &Zeroable,
11828 if (!ZeroLo && !ZeroHi)
11831 unsigned NumElts = Mask.size();
11832 unsigned Len = NumElts - (ZeroLo + ZeroHi);
11842 SDValue Res = Mask[ZeroLo] < (int)NumElts ? V1 : V2;
11851 unsigned Shift = (NumElts - 1) - (Mask[ZeroLo + Len - 1] % NumElts);
11856 }
else if (ZeroHi == 0) {
11857 unsigned Shift = Mask[ZeroLo] % NumElts;
11862 }
else if (!Subtarget.
hasSSSE3()) {
11866 unsigned Shift = (NumElts - 1) - (Mask[ZeroLo + Len - 1] % NumElts);
11869 Shift += Mask[ZeroLo] % NumElts;
11905 int MaskOffset,
const APInt &Zeroable,
11907 int Size = Mask.size();
11908 unsigned SizeInBits =
Size * ScalarSizeInBits;
11910 auto CheckZeros = [&](
int Shift,
int Scale,
bool Left) {
11911 for (
int i = 0; i <
Size; i += Scale)
11912 for (
int j = 0; j < Shift; ++j)
11913 if (!Zeroable[i + j + (
Left ? 0 : (Scale - Shift))])
11919 auto MatchShift = [&](
int Shift,
int Scale,
bool Left) {
11920 for (
int i = 0; i !=
Size; i += Scale) {
11921 unsigned Pos =
Left ? i + Shift : i;
11922 unsigned Low =
Left ? i : i + Shift;
11923 unsigned Len = Scale - Shift;
11928 int ShiftEltBits = ScalarSizeInBits * Scale;
11929 bool ByteShift = ShiftEltBits > 64;
11932 int ShiftAmt = Shift * ScalarSizeInBits / (ByteShift ? 8 : 1);
11936 Scale = ByteShift ? Scale / 2 : Scale;
11942 return (
int)ShiftAmt;
11951 unsigned MaxWidth = ((SizeInBits == 512) && !Subtarget.hasBWI() ? 64 : 128);
11952 for (
int Scale = 2; Scale * ScalarSizeInBits <= MaxWidth; Scale *= 2)
11953 for (
int Shift = 1; Shift != Scale; ++Shift)
11954 for (
bool Left : {
true,
false})
11955 if (CheckZeros(Shift, Scale,
Left)) {
11956 int ShiftAmt = MatchShift(Shift, Scale,
Left);
11967 const APInt &Zeroable,
11970 int Size = Mask.size();
11979 Mask, 0, Zeroable, Subtarget);
11982 if (ShiftAmt < 0) {
11984 Mask,
Size, Zeroable, Subtarget);
11995 "Illegal integer vector type");
11997 V = DAG.
getNode(Opcode,
DL, ShiftVT, V,
12007 int Size = Mask.size();
12008 int HalfSize =
Size / 2;
12018 int Len = HalfSize;
12019 for (; Len > 0; --Len)
12020 if (!Zeroable[Len - 1])
12022 assert(Len > 0 &&
"Zeroable shuffle mask");
12027 for (
int i = 0; i != Len; ++i) {
12036 if (i > M || M >= HalfSize)
12039 if (
Idx < 0 || (Src == V &&
Idx == (M - i))) {
12047 if (!Src ||
Idx < 0)
12050 assert((
Idx + Len) <= HalfSize &&
"Illegal extraction mask");
12063 int Size = Mask.size();
12064 int HalfSize =
Size / 2;
12071 for (
int Idx = 0;
Idx != HalfSize; ++
Idx) {
12087 for (
int Hi =
Idx + 1;
Hi <= HalfSize; ++
Hi) {
12089 int Len =
Hi -
Idx;
12103 }
else if ((!
Base || (
Base == V1)) &&
12106 }
else if ((!
Base || (
Base == V2)) &&
12156 assert(Scale > 1 &&
"Need a scale to extend.");
12159 int NumEltsPerLane = 128 / EltBits;
12160 int OffsetLane =
Offset / NumEltsPerLane;
12161 assert((EltBits == 8 || EltBits == 16 || EltBits == 32) &&
12162 "Only 8, 16, and 32 bit elements can be extended.");
12163 assert(Scale * EltBits <= 64 &&
"Cannot zero extend past 64 bits.");
12164 assert(0 <=
Offset &&
"Extension offset must be positive.");
12166 "Extension offset must be in the first lane or start an upper lane.");
12169 auto SafeOffset = [&](
int Idx) {
12170 return OffsetLane == (
Idx / NumEltsPerLane);
12174 auto ShuffleOffset = [&](
SDValue V) {
12179 for (
int i = 0; i * Scale < NumElements; ++i) {
12180 int SrcIdx = i +
Offset;
12181 ShMask[i] = SafeOffset(SrcIdx) ? SrcIdx : -1;
12194 NumElements / Scale);
12196 InputV = ShuffleOffset(InputV);
12198 DL, ExtVT, InputV, DAG);
12207 if (AnyExt && EltBits == 32) {
12215 if (AnyExt && EltBits == 16 && Scale > 2) {
12216 int PSHUFDMask[4] = {
Offset / 2, -1,
12221 int PSHUFWMask[4] = {1, -1, -1, -1};
12224 VT, DAG.
getNode(OddEvenOp,
DL, MVT::v8i16,
12231 if ((Scale * EltBits) == 64 && EltBits < 32 && Subtarget.hasSSE4A()) {
12232 assert(NumElements == (
int)Mask.size() &&
"Unexpected shuffle mask size!");
12235 int LoIdx =
Offset * EltBits;
12244 int HiIdx = (
Offset + 1) * EltBits;
12256 if (Scale > 4 && EltBits == 8 && Subtarget.
hasSSSE3()) {
12257 assert(NumElements == 16 &&
"Unexpected byte vector width!");
12259 for (
int i = 0; i < 16; ++i) {
12261 if ((i % Scale == 0 && SafeOffset(
Idx))) {
12268 InputV = DAG.
getBitcast(MVT::v16i8, InputV);
12276 int AlignToUnpack =
Offset % (NumElements / Scale);
12277 if (AlignToUnpack) {
12279 for (
int i = AlignToUnpack; i < NumElements; ++i)
12280 ShMask[i - AlignToUnpack] = i;
12282 Offset -= AlignToUnpack;
12288 if (
Offset >= (NumElements / 2)) {
12290 Offset -= (NumElements / 2);
12297 InputV = DAG.
getNode(UnpackLoHi,
DL, InputVT, InputV, Ext);
12301 }
while (Scale > 1);
12322 int NumLanes = Bits / 128;
12324 int NumEltsPerLane = NumElements / NumLanes;
12326 "Exceeds 32-bit integer zero extension limit");
12327 assert((
int)Mask.size() == NumElements &&
"Unexpected shuffle mask size");
12333 bool AnyExt =
true;
12336 for (
int i = 0; i < NumElements; ++i) {
12340 if (i % Scale != 0) {
12352 SDValue V = M < NumElements ? V1 : V2;
12353 M = M % NumElements;
12356 Offset = M - (i / Scale);
12357 }
else if (InputV != V)
12364 (
Offset % NumEltsPerLane) == 0))
12369 if (
Offset && (
Offset / NumEltsPerLane) != (M / NumEltsPerLane))
12372 if ((M % NumElements) != (
Offset + (i / Scale)))
12385 if (
Offset != 0 && Matches < 2)
12389 InputV, Mask, Subtarget, DAG);
12393 assert(Bits % 64 == 0 &&
12394 "The number of bits in a vector must be divisible by 64 on x86!");
12395 int NumExtElements = Bits / 64;
12399 for (; NumExtElements < NumElements; NumExtElements *= 2) {
12400 assert(NumElements % NumExtElements == 0 &&
12401 "The input vector size must be divisible by the extended size.");
12412 auto CanZExtLowHalf = [&]() {
12413 for (
int i = NumElements / 2; i != NumElements; ++i)
12423 if (
SDValue V = CanZExtLowHalf()) {
12438 MVT VT = V.getSimpleValueType();
12444 MVT NewVT = V.getSimpleValueType();
12465 return V->hasOneUse() &&
12469template<
typename T>
12471 T EltVT = VT.getScalarType();
12472 return (EltVT == MVT::bf16 && !Subtarget.hasAVX10_2()) ||
12473 (EltVT == MVT::f16 && !Subtarget.hasFP16());
12493 find_if(Mask, [&Mask](
int M) {
return M >= (int)Mask.size(); }) -
12496 bool IsV1Zeroable =
true;
12497 for (
int i = 0,
Size = Mask.size(); i <
Size; ++i)
12498 if (i != V2Index && !Zeroable[i]) {
12499 IsV1Zeroable =
false;
12504 if (!IsV1Zeroable) {
12506 V1Mask[V2Index] = -1;
12521 if (EltVT == MVT::i8 || (EltVT == MVT::i16 && !Subtarget.hasFP16())) {
12525 if (!IsV1Zeroable && !(IsV1Constant && V2Index == 0))
12534 if (!IsV1Zeroable) {
12545 }
else if (Mask[V2Index] != (
int)Mask.size() || EltVT == MVT::i8 ||
12546 (EltVT == MVT::i16 && !Subtarget.hasAVX10_2())) {
12552 if (!IsV1Zeroable) {
12555 assert(VT == ExtVT &&
"Cannot change extended type when non-zeroable!");
12562 unsigned MovOpc = 0;
12563 if (EltVT == MVT::f16)
12565 else if (EltVT == MVT::f32)
12567 else if (EltVT == MVT::f64)
12571 return DAG.
getNode(MovOpc,
DL, ExtVT, V1, V2);
12582 if (V2Index != 0) {
12589 V2Shuffle[V2Index] = 0;
12611 "We can only lower integer broadcasts with AVX2!");
12617 assert(V0VT.
isVector() &&
"Unexpected non-vector vector-sized value!");
12627 if (V0EltSize <= EltSize)
12630 assert(((V0EltSize % EltSize) == 0) &&
12631 "Scalar type sizes must all be powers of 2 on x86!");
12634 const unsigned Scale = V0EltSize / EltSize;
12635 const unsigned V0BroadcastIdx = BroadcastIdx / Scale;
12647 if (
const int OffsetIdx = BroadcastIdx % Scale)
12661 assert(Mask.size() == 4 &&
"Unsupported mask size!");
12662 assert(Mask[0] >= -1 && Mask[0] < 8 &&
"Out of bound mask element!");
12663 assert(Mask[1] >= -1 && Mask[1] < 8 &&
"Out of bound mask element!");
12664 assert(Mask[2] >= -1 && Mask[2] < 8 &&
"Out of bound mask element!");
12665 assert(Mask[3] >= -1 && Mask[3] < 8 &&
"Out of bound mask element!");
12669 if (Mask[0] >= 0 && Mask[1] >= 0 && (Mask[0] < 4) != (Mask[1] < 4))
12671 if (Mask[2] >= 0 && Mask[3] >= 0 && (Mask[2] < 4) != (Mask[3] < 4))
12683 assert((Input == 0 || Input == 1) &&
"Only two inputs to shuffles.");
12684 int Size = Mask.size();
12685 for (
int i = 0; i <
Size; ++i)
12686 if (Mask[i] >= 0 && Mask[i] /
Size == Input && Mask[i] %
Size != i)
12701 "VPERM* family of shuffles requires 32-bit or 64-bit elements");
12721 if (ExtIndex1 == 0 && ExtIndex0 == NumElts)
12723 else if (ExtIndex0 != 0 || ExtIndex1 != NumElts)
12729 if (NumElts == 4 &&
12734 NewMask.
append(NumElts, -1);
12754 if (!((Subtarget.
hasSSE3() && VT == MVT::v2f64) ||
12755 (Subtarget.
hasAVX() && (EltVT == MVT::f64 || EltVT == MVT::f32)) ||
12762 unsigned Opcode = (VT == MVT::v2f64 && !Subtarget.
hasAVX2())
12769 if (BroadcastIdx < 0)
12771 assert(BroadcastIdx < (
int)Mask.size() &&
"We only expect to be called with "
12772 "a sorted mask where the broadcast "
12774 int NumActiveElts =
count_if(Mask, [](
int M) {
return M >= 0; });
12780 int BitOffset = BroadcastIdx * NumEltBits;
12783 switch (V.getOpcode()) {
12785 V = V.getOperand(0);
12789 int OpBitWidth = V.getOperand(0).getValueSizeInBits();
12790 int OpIdx = BitOffset / OpBitWidth;
12791 V = V.getOperand(OpIdx);
12792 BitOffset %= OpBitWidth;
12797 unsigned EltBitWidth = V.getScalarValueSizeInBits();
12798 unsigned Idx = V.getConstantOperandVal(1);
12799 unsigned BeginOffset =
Idx * EltBitWidth;
12800 BitOffset += BeginOffset;
12801 V = V.getOperand(0);
12805 SDValue VOuter = V.getOperand(0), VInner = V.getOperand(1);
12807 int Idx = (int)V.getConstantOperandVal(2);
12808 int NumSubElts = (int)VInner.getSimpleValueType().getVectorNumElements();
12809 int BeginOffset =
Idx * EltBitWidth;
12810 int EndOffset = BeginOffset + NumSubElts * EltBitWidth;
12811 if (BeginOffset <= BitOffset && BitOffset < EndOffset) {
12812 BitOffset -= BeginOffset;
12822 assert((BitOffset % NumEltBits) == 0 &&
"Illegal bit-offset");
12823 BroadcastIdx = BitOffset / NumEltBits;
12826 bool BitCastSrc = V.getScalarValueSizeInBits() != NumEltBits;
12835 DL, VT, V, BroadcastIdx, Subtarget, DAG))
12836 return TruncBroadcast;
12842 V = V.getOperand(BroadcastIdx);
12848 cast<LoadSDNode>(V)->isSimple()) {
12858 assert((
int)(
Offset * 8) == BitOffset &&
"Unexpected bit-offset");
12875 assert(SVT == MVT::f64 &&
"Unexpected VT!");
12880 }
else if (!BroadcastFromReg) {
12883 }
else if (BitOffset != 0) {
12891 if (VT == MVT::v4f64 || VT == MVT::v4i64)
12896 if (BitOffset < 128 && NumActiveElts > 1 &&
12897 V.getScalarValueSizeInBits() == NumEltBits) {
12898 assert((BitOffset % V.getScalarValueSizeInBits()) == 0 &&
12899 "Unexpected bit-offset");
12901 ExtractMask[0] = BitOffset / V.getScalarValueSizeInBits();
12906 if ((BitOffset % 128) != 0)
12909 assert((BitOffset % V.getScalarValueSizeInBits()) == 0 &&
12910 "Unexpected bit-offset");
12911 assert((V.getValueSizeInBits() == 256 || V.getValueSizeInBits() == 512) &&
12912 "Unexpected vector size");
12913 unsigned ExtractIdx = BitOffset / V.getScalarValueSizeInBits();
12921 if (Subtarget.
hasAVX()) {
12929 if (!V.getValueType().isVector()) {
12930 assert(V.getScalarValueSizeInBits() == NumEltBits &&
12931 "Unexpected scalar size");
12940 if (V.getValueSizeInBits() > 128)
12945 unsigned NumSrcElts = V.getValueSizeInBits() / NumEltBits;
12957 unsigned &InsertPSMask,
12958 const APInt &Zeroable,
12961 assert(V2.getSimpleValueType().is128BitVector() &&
"Bad operand type!");
12962 assert(Mask.size() == 4 &&
"Unexpected mask size for v4 shuffle!");
12969 unsigned ZMask = 0;
12970 int VADstIndex = -1;
12971 int VBDstIndex = -1;
12972 bool VAUsedInPlace =
false;
12974 for (
int i = 0; i < 4; ++i) {
12982 if (i == CandidateMask[i]) {
12983 VAUsedInPlace =
true;
12988 if (VADstIndex >= 0 || VBDstIndex >= 0)
12991 if (CandidateMask[i] < 4) {
13001 if (VADstIndex < 0 && VBDstIndex < 0)
13006 unsigned VBSrcIndex = 0;
13007 if (VADstIndex >= 0) {
13010 VBSrcIndex = CandidateMask[VADstIndex];
13011 VBDstIndex = VADstIndex;
13014 VBSrcIndex = CandidateMask[VBDstIndex] - 4;
13019 if (!VAUsedInPlace)
13027 InsertPSMask = VBSrcIndex << 6 | VBDstIndex << 4 | ZMask;
13028 assert((InsertPSMask & ~0xFFu) == 0 &&
"Invalid mask!");
13032 if (matchAsInsertPS(V1, V2, Mask))
13038 if (matchAsInsertPS(V2, V1, CommutedMask))
13048 assert(V2.getSimpleValueType() == MVT::v4f32 &&
"Bad operand type!");
13051 unsigned InsertPSMask = 0;
13072 assert(V2.getSimpleValueType() == MVT::v2f64 &&
"Bad operand type!");
13073 assert(Mask.size() == 2 &&
"Unexpected mask size for v2 shuffle!");
13075 if (V2.isUndef()) {
13078 Mask, Subtarget, DAG))
13083 unsigned SHUFPDMask = (Mask[0] == 1) | ((Mask[1] == 1) << 1);
13085 if (Subtarget.
hasAVX()) {
13098 assert(Mask[0] >= 0 &&
"No undef lanes in multi-input v2 shuffles!");
13099 assert(Mask[1] >= 0 &&
"No undef lanes in multi-input v2 shuffles!");
13100 assert(Mask[0] < 2 &&
"We sort V1 to be the first input.");
13101 assert(Mask[1] >= 2 &&
"We sort V2 to be the second input.");
13110 DL, MVT::v2f64, V1, V2, Mask, Zeroable, Subtarget, DAG))
13114 int InverseMask[2] = {Mask[0] < 0 ? -1 : (Mask[0] ^ 2),
13115 Mask[1] < 0 ? -1 : (Mask[1] ^ 2)};
13117 DL, MVT::v2f64, V2, V1, InverseMask, Zeroable, Subtarget, DAG))
13133 Zeroable, Subtarget, DAG))
13140 unsigned SHUFPDMask = (Mask[0] == 1) | (((Mask[1] - 2) == 1) << 1);
13156 assert(V2.getSimpleValueType() == MVT::v2i64 &&
"Bad operand type!");
13157 assert(Mask.size() == 2 &&
"Unexpected mask size for v2 shuffle!");
13159 if (V2.isUndef()) {
13162 Mask, Subtarget, DAG))
13169 int WidenedMask[4] = {Mask[0] < 0 ? -1 : (Mask[0] * 2),
13170 Mask[0] < 0 ? -1 : ((Mask[0] * 2) + 1),
13171 Mask[1] < 0 ? -1 : (Mask[1] * 2),
13172 Mask[1] < 0 ? -1 : ((Mask[1] * 2) + 1)};
13178 assert(Mask[0] != -1 &&
"No undef lanes in multi-input v2 shuffles!");
13179 assert(Mask[1] != -1 &&
"No undef lanes in multi-input v2 shuffles!");
13180 assert(Mask[0] < 2 &&
"We sort V1 to be the first input.");
13181 assert(Mask[1] >= 2 &&
"We sort V2 to be the second input.");
13196 DL, MVT::v2i64, V1, V2, Mask, Zeroable, Subtarget, DAG))
13200 int InverseMask[2] = {Mask[0] ^ 2, Mask[1] ^ 2};
13202 DL, MVT::v2i64, V2, V1, InverseMask, Zeroable, Subtarget, DAG))
13207 bool IsBlendSupported = Subtarget.
hasSSE41();
13208 if (IsBlendSupported)
13210 Zeroable, Subtarget, DAG))
13220 if (Subtarget.hasVLX())
13222 Zeroable, Subtarget, DAG))
13232 if (IsBlendSupported)
13234 Zeroable, Subtarget, DAG);
13254 SDValue LowV = V1, HighV = V2;
13256 int NumV2Elements =
count_if(Mask, [](
int M) {
return M >= 4; });
13258 if (NumV2Elements == 1) {
13259 int V2Index =
find_if(Mask, [](
int M) {
return M >= 4; }) - Mask.begin();
13263 int V2AdjIndex = V2Index ^ 1;
13265 if (Mask[V2AdjIndex] < 0) {
13271 NewMask[V2Index] -= 4;
13275 int V1Index = V2AdjIndex;
13276 int BlendMask[4] = {Mask[V2Index] - 4, 0, Mask[V1Index], 0};
13288 NewMask[V1Index] = 2;
13289 NewMask[V2Index] = 0;
13291 }
else if (NumV2Elements == 2) {
13292 if (Mask[0] < 4 && Mask[1] < 4) {
13297 }
else if (Mask[2] < 4 && Mask[3] < 4) {
13312 int BlendMask[4] = {Mask[0] < 4 ? Mask[0] : Mask[1],
13313 Mask[2] < 4 ? Mask[2] : Mask[3],
13314 (Mask[0] >= 4 ? Mask[0] : Mask[1]) - 4,
13315 (Mask[2] >= 4 ? Mask[2] : Mask[3]) - 4};
13322 NewMask[0] = Mask[0] < 4 ? 0 : 2;
13323 NewMask[1] = Mask[0] < 4 ? 2 : 0;
13324 NewMask[2] = Mask[2] < 4 ? 1 : 3;
13325 NewMask[3] = Mask[2] < 4 ? 3 : 1;
13327 }
else if (NumV2Elements == 3) {
13348 assert(V2.getSimpleValueType() == MVT::v4f32 &&
"Bad operand type!");
13349 assert(Mask.size() == 4 &&
"Unexpected mask size for v4 shuffle!");
13353 Zeroable, Subtarget, DAG))
13356 int NumV2Elements =
count_if(Mask, [](
int M) {
return M >= 4; });
13358 if (NumV2Elements == 0) {
13361 Mask, Subtarget, DAG))
13372 if (Subtarget.
hasAVX()) {
13396 DL, MVT::v4i32, V1, V2, Mask, Zeroable, Subtarget, DAG)) {
13410 if (NumV2Elements == 1 && Mask[0] >= 4)
13412 DL, MVT::v4f32, V1, V2, Mask, Zeroable, Subtarget, DAG))
13452 assert(V2.getSimpleValueType() == MVT::v4i32 &&
"Bad operand type!");
13453 assert(Mask.size() == 4 &&
"Unexpected mask size for v4 shuffle!");
13459 Zeroable, Subtarget, DAG))
13462 int NumV2Elements =
count_if(Mask, [](
int M) {
return M >= 4; });
13465 if (Subtarget.preferLowerShuffleAsShift()) {
13468 Subtarget, DAG,
true))
13470 if (NumV2Elements == 0)
13476 if (NumV2Elements == 0) {
13478 if (
count_if(Mask, [](
int M) {
return M >= 0 && M < 4; }) > 1) {
13480 Mask, Subtarget, DAG))
13489 const int UnpackLoMask[] = {0, 0, 1, 1};
13490 const int UnpackHiMask[] = {2, 2, 3, 3};
13492 Mask = UnpackLoMask;
13494 Mask = UnpackHiMask;
13511 if (NumV2Elements == 1)
13513 DL, MVT::v4i32, V1, V2, Mask, Zeroable, Subtarget, DAG))
13518 bool IsBlendSupported = Subtarget.
hasSSE41();
13519 if (IsBlendSupported)
13521 Zeroable, Subtarget, DAG))
13525 Zeroable, Subtarget, DAG))
13535 if (Subtarget.hasVLX())
13536 if (SDValue Rotate = lowerShuffleAsVALIGN(DL, MVT::v4i32, V1, V2, Mask,
13537 Zeroable, Subtarget, DAG))
13540 if (SDValue Rotate = lowerShuffleAsByteRotate(DL, MVT::v4i32, V1, V2, Mask,
13551 if (IsBlendSupported)
13553 Zeroable, Subtarget, DAG);
13557 Mask, Subtarget, DAG))
13594 assert(Mask.size() == 8 &&
"Shuffle mask length doesn't match!");
13606 for (
int i = 0; i != 4; ++i)
13607 HiMask[i] = (HiMask[i] < 0 ? HiMask[i] : (HiMask[i] - 4));
13613 copy_if(LoMask, std::back_inserter(LoInputs), [](
int M) {
return M >= 0; });
13617 copy_if(HiMask, std::back_inserter(HiInputs), [](
int M) {
return M >= 0; });
13621 int NumHToL = LoInputs.
size() - NumLToL;
13623 int NumHToH = HiInputs.
size() - NumLToH;
13642 if ((NumHToL + NumHToH) == 0 || (NumLToL + NumLToH) == 0) {
13643 int PSHUFDMask[4] = { -1, -1, -1, -1 };
13645 int DOffset = ((NumHToL + NumHToH) == 0 ? 0 : 2);
13648 for (
int DWord = 0; DWord != 4; ++DWord) {
13649 int M0 = Mask[2 * DWord + 0];
13650 int M1 = Mask[2 * DWord + 1];
13653 if (
M0 < 0 &&
M1 < 0)
13656 bool Match =
false;
13657 for (
int j = 0, e = DWordPairs.
size(); j < e; ++j) {
13658 auto &DWordPair = DWordPairs[j];
13661 DWordPair.first = (
M0 >= 0 ?
M0 : DWordPair.first);
13662 DWordPair.second = (
M1 >= 0 ?
M1 : DWordPair.second);
13663 PSHUFDMask[DWord] = DOffset + j;
13669 PSHUFDMask[DWord] = DOffset + DWordPairs.
size();
13674 if (DWordPairs.
size() <= 2) {
13675 DWordPairs.
resize(2, std::make_pair(-1, -1));
13676 int PSHUFHalfMask[4] = {DWordPairs[0].first, DWordPairs[0].second,
13677 DWordPairs[1].first, DWordPairs[1].second};
13678 if ((NumHToL + NumHToH) == 0)
13679 return ShuffleDWordPairs(PSHUFHalfMask, PSHUFDMask,
X86ISD::PSHUFLW);
13680 if ((NumLToL + NumLToH) == 0)
13681 return ShuffleDWordPairs(PSHUFHalfMask, PSHUFDMask,
X86ISD::PSHUFHW);
13717 int AOffset,
int BOffset) {
13719 "Must call this with A having 3 or 1 inputs from the A half.");
13721 "Must call this with B having 1 or 3 inputs from the B half.");
13723 "Must call this with either 3:1 or 1:3 inputs (summing to 4).");
13725 bool ThreeAInputs = AToAInputs.
size() == 3;
13731 int ADWord = 0, BDWord = 0;
13732 int &TripleDWord = ThreeAInputs ? ADWord : BDWord;
13733 int &OneInputDWord = ThreeAInputs ? BDWord : ADWord;
13734 int TripleInputOffset = ThreeAInputs ? AOffset : BOffset;
13735 ArrayRef<int> TripleInputs = ThreeAInputs ? AToAInputs : BToAInputs;
13736 int OneInput = ThreeAInputs ? BToAInputs[0] : AToAInputs[0];
13737 int TripleInputSum = 0 + 1 + 2 + 3 + (4 * TripleInputOffset);
13738 int TripleNonInputIdx =
13739 TripleInputSum - std::accumulate(TripleInputs.
begin(), TripleInputs.
end(), 0);
13740 TripleDWord = TripleNonInputIdx / 2;
13744 OneInputDWord = (OneInput / 2) ^ 1;
13751 if (BToBInputs.
size() == 2 && AToBInputs.
size() == 2) {
13756 int NumFlippedAToBInputs =
llvm::count(AToBInputs, 2 * ADWord) +
13758 int NumFlippedBToBInputs =
llvm::count(BToBInputs, 2 * BDWord) +
13760 if ((NumFlippedAToBInputs == 1 &&
13761 (NumFlippedBToBInputs == 0 || NumFlippedBToBInputs == 2)) ||
13762 (NumFlippedBToBInputs == 1 &&
13763 (NumFlippedAToBInputs == 0 || NumFlippedAToBInputs == 2))) {
13768 auto FixFlippedInputs = [&V, &
DL, &Mask, &DAG](
int PinnedIdx,
int DWord,
13770 int FixIdx = PinnedIdx ^ 1;
13771 bool IsFixIdxInput =
is_contained(Inputs, PinnedIdx ^ 1);
13775 int FixFreeIdx = 2 * (DWord ^ (PinnedIdx / 2 == DWord));
13776 bool IsFixFreeIdxInput =
is_contained(Inputs, FixFreeIdx);
13777 if (IsFixIdxInput == IsFixFreeIdxInput)
13780 assert(IsFixIdxInput != IsFixFreeIdxInput &&
13781 "We need to be changing the number of flipped inputs!");
13782 int PSHUFHalfMask[] = {0, 1, 2, 3};
13783 std::swap(PSHUFHalfMask[FixFreeIdx % 4], PSHUFHalfMask[FixIdx % 4]);
13789 for (
int &M : Mask)
13790 if (M >= 0 && M == FixIdx)
13792 else if (M >= 0 && M == FixFreeIdx)
13795 if (NumFlippedBToBInputs != 0) {
13797 BToAInputs.
size() == 3 ? TripleNonInputIdx : OneInput;
13798 FixFlippedInputs(BPinnedIdx, BDWord, BToBInputs);
13800 assert(NumFlippedAToBInputs != 0 &&
"Impossible given predicates!");
13801 int APinnedIdx = ThreeAInputs ? TripleNonInputIdx : OneInput;
13802 FixFlippedInputs(APinnedIdx, ADWord, AToBInputs);
13807 int PSHUFDMask[] = {0, 1, 2, 3};
13808 PSHUFDMask[ADWord] = BDWord;
13809 PSHUFDMask[BDWord] = ADWord;
13816 for (
int &M : Mask)
13817 if (M >= 0 && M/2 == ADWord)
13818 M = 2 * BDWord + M % 2;
13819 else if (M >= 0 && M/2 == BDWord)
13820 M = 2 * ADWord + M % 2;
13826 if ((NumLToL == 3 && NumHToL == 1) || (NumLToL == 1 && NumHToL == 3))
13827 return balanceSides(LToLInputs, HToLInputs, HToHInputs, LToHInputs, 0, 4);
13828 if ((NumHToH == 3 && NumLToH == 1) || (NumHToH == 1 && NumLToH == 3))
13829 return balanceSides(HToHInputs, LToHInputs, LToLInputs, HToLInputs, 4, 0);
13836 int PSHUFLMask[4] = {-1, -1, -1, -1};
13837 int PSHUFHMask[4] = {-1, -1, -1, -1};
13838 int PSHUFDMask[4] = {-1, -1, -1, -1};
13843 auto fixInPlaceInputs =
13847 if (InPlaceInputs.
empty())
13849 if (InPlaceInputs.
size() == 1) {
13850 SourceHalfMask[InPlaceInputs[0] - HalfOffset] =
13851 InPlaceInputs[0] - HalfOffset;
13852 PSHUFDMask[InPlaceInputs[0] / 2] = InPlaceInputs[0] / 2;
13855 if (IncomingInputs.
empty()) {
13857 for (
int Input : InPlaceInputs) {
13858 SourceHalfMask[Input - HalfOffset] = Input - HalfOffset;
13859 PSHUFDMask[Input / 2] = Input / 2;
13864 assert(InPlaceInputs.
size() == 2 &&
"Cannot handle 3 or 4 inputs!");
13865 SourceHalfMask[InPlaceInputs[0] - HalfOffset] =
13866 InPlaceInputs[0] - HalfOffset;
13869 int AdjIndex = InPlaceInputs[0] ^ 1;
13870 SourceHalfMask[AdjIndex - HalfOffset] = InPlaceInputs[1] - HalfOffset;
13871 std::replace(HalfMask.
begin(), HalfMask.
end(), InPlaceInputs[1], AdjIndex);
13872 PSHUFDMask[AdjIndex / 2] = AdjIndex / 2;
13874 fixInPlaceInputs(LToLInputs, HToLInputs, PSHUFLMask, LoMask, 0);
13875 fixInPlaceInputs(HToHInputs, LToHInputs, PSHUFHMask, HiMask, 4);
13881 auto moveInputsToRightHalf = [&PSHUFDMask](
13886 auto isWordClobbered = [](
ArrayRef<int> SourceHalfMask,
int Word) {
13887 return SourceHalfMask[Word] >= 0 && SourceHalfMask[Word] != Word;
13889 auto isDWordClobbered = [&isWordClobbered](
ArrayRef<int> SourceHalfMask,
13891 int LowWord = Word & ~1;
13892 int HighWord = Word | 1;
13893 return isWordClobbered(SourceHalfMask, LowWord) ||
13894 isWordClobbered(SourceHalfMask, HighWord);
13897 if (IncomingInputs.
empty())
13900 if (ExistingInputs.
empty()) {
13902 for (
int Input : IncomingInputs) {
13905 if (isWordClobbered(SourceHalfMask, Input - SourceOffset)) {
13906 if (SourceHalfMask[SourceHalfMask[Input - SourceOffset]] < 0) {
13907 SourceHalfMask[SourceHalfMask[Input - SourceOffset]] =
13908 Input - SourceOffset;
13910 for (
int &M : HalfMask)
13911 if (M == SourceHalfMask[Input - SourceOffset] + SourceOffset)
13913 else if (M == Input)
13914 M = SourceHalfMask[Input - SourceOffset] + SourceOffset;
13916 assert(SourceHalfMask[SourceHalfMask[Input - SourceOffset]] ==
13917 Input - SourceOffset &&
13918 "Previous placement doesn't match!");
13923 Input = SourceHalfMask[Input - SourceOffset] + SourceOffset;
13927 if (PSHUFDMask[(Input - SourceOffset + DestOffset) / 2] < 0)
13928 PSHUFDMask[(Input - SourceOffset + DestOffset) / 2] = Input / 2;
13930 assert(PSHUFDMask[(Input - SourceOffset + DestOffset) / 2] ==
13932 "Previous placement doesn't match!");
13938 for (
int &M : HalfMask)
13939 if (M >= SourceOffset && M < SourceOffset + 4) {
13940 M = M - SourceOffset + DestOffset;
13941 assert(M >= 0 &&
"This should never wrap below zero!");
13949 if (IncomingInputs.
size() == 1) {
13950 if (isWordClobbered(SourceHalfMask, IncomingInputs[0] - SourceOffset)) {
13951 int InputFixed =
find(SourceHalfMask, -1) - std::begin(SourceHalfMask) +
13953 SourceHalfMask[InputFixed - SourceOffset] =
13954 IncomingInputs[0] - SourceOffset;
13955 std::replace(HalfMask.
begin(), HalfMask.
end(), IncomingInputs[0],
13957 IncomingInputs[0] = InputFixed;
13959 }
else if (IncomingInputs.
size() == 2) {
13960 if (IncomingInputs[0] / 2 != IncomingInputs[1] / 2 ||
13961 isDWordClobbered(SourceHalfMask, IncomingInputs[0] - SourceOffset)) {
13965 int InputsFixed[2] = {IncomingInputs[0] - SourceOffset,
13966 IncomingInputs[1] - SourceOffset};
13971 if (!isWordClobbered(SourceHalfMask, InputsFixed[0]) &&
13972 SourceHalfMask[InputsFixed[0] ^ 1] < 0) {
13973 SourceHalfMask[InputsFixed[0]] = InputsFixed[0];
13974 SourceHalfMask[InputsFixed[0] ^ 1] = InputsFixed[1];
13975 InputsFixed[1] = InputsFixed[0] ^ 1;
13976 }
else if (!isWordClobbered(SourceHalfMask, InputsFixed[1]) &&
13977 SourceHalfMask[InputsFixed[1] ^ 1] < 0) {
13978 SourceHalfMask[InputsFixed[1]] = InputsFixed[1];
13979 SourceHalfMask[InputsFixed[1] ^ 1] = InputsFixed[0];
13980 InputsFixed[0] = InputsFixed[1] ^ 1;
13981 }
else if (SourceHalfMask[2 * ((InputsFixed[0] / 2) ^ 1)] < 0 &&
13982 SourceHalfMask[2 * ((InputsFixed[0] / 2) ^ 1) + 1] < 0) {
13986 SourceHalfMask[2 * ((InputsFixed[0] / 2) ^ 1)] = InputsFixed[0];
13987 SourceHalfMask[2 * ((InputsFixed[0] / 2) ^ 1) + 1] = InputsFixed[1];
13988 InputsFixed[0] = 2 * ((InputsFixed[0] / 2) ^ 1);
13989 InputsFixed[1] = 2 * ((InputsFixed[0] / 2) ^ 1) + 1;
13995 for (
int i = 0; i < 4; ++i)
13996 assert((SourceHalfMask[i] < 0 || SourceHalfMask[i] == i) &&
13997 "We can't handle any clobbers here!");
13998 assert(InputsFixed[1] != (InputsFixed[0] ^ 1) &&
13999 "Cannot have adjacent inputs here!");
14001 SourceHalfMask[InputsFixed[0] ^ 1] = InputsFixed[1];
14002 SourceHalfMask[InputsFixed[1]] = InputsFixed[0] ^ 1;
14006 for (
int &M : FinalSourceHalfMask)
14007 if (M == (InputsFixed[0] ^ 1) + SourceOffset)
14008 M = InputsFixed[1] + SourceOffset;
14009 else if (M == InputsFixed[1] + SourceOffset)
14010 M = (InputsFixed[0] ^ 1) + SourceOffset;
14012 InputsFixed[1] = InputsFixed[0] ^ 1;
14016 for (
int &M : HalfMask)
14017 if (M == IncomingInputs[0])
14018 M = InputsFixed[0] + SourceOffset;
14019 else if (M == IncomingInputs[1])
14020 M = InputsFixed[1] + SourceOffset;
14022 IncomingInputs[0] = InputsFixed[0] + SourceOffset;
14023 IncomingInputs[1] = InputsFixed[1] + SourceOffset;
14030 int FreeDWord = (PSHUFDMask[DestOffset / 2] < 0 ? 0 : 1) + DestOffset / 2;
14031 assert(PSHUFDMask[FreeDWord] < 0 &&
"DWord not free");
14032 PSHUFDMask[FreeDWord] = IncomingInputs[0] / 2;
14033 for (
int &M : HalfMask)
14034 for (
int Input : IncomingInputs)
14036 M = FreeDWord * 2 + Input % 2;
14038 moveInputsToRightHalf(HToLInputs, LToLInputs, PSHUFHMask, LoMask, HiMask,
14040 moveInputsToRightHalf(LToHInputs, HToHInputs, PSHUFLMask, HiMask, LoMask,
14060 "Failed to lift all the high half inputs to the low mask!");
14061 assert(
count_if(HiMask, [](
int M) {
return M >= 0 && M < 4; }) == 0 &&
14062 "Failed to lift all the low half inputs to the high mask!");
14070 for (
int &M : HiMask)
14086 "Lane crossing shuffle masks not supported");
14089 int Size = Mask.size();
14090 int Scale = NumBytes /
Size;
14097 for (
int i = 0; i < NumBytes; ++i) {
14098 int M = Mask[i / Scale];
14102 const int ZeroMask = 0x80;
14103 int V1Idx = M <
Size ? M * Scale + i % Scale : ZeroMask;
14104 int V2Idx = M <
Size ? ZeroMask : (M -
Size) * Scale + i % Scale;
14105 if (Zeroable[i / Scale])
14106 V1Idx = V2Idx = ZeroMask;
14110 V1InUse |= (ZeroMask != V1Idx);
14111 V2InUse |= (ZeroMask != V2Idx);
14124 if (V1InUse && V2InUse)
14127 V = V1InUse ? V1 : V2;
14150 assert(V2.getSimpleValueType() == MVT::v8i16 &&
"Bad operand type!");
14151 assert(Mask.size() == 8 &&
"Unexpected mask size for v8 shuffle!");
14156 Zeroable, Subtarget, DAG))
14164 int NumV2Inputs =
count_if(Mask, [](
int M) {
return M >= 8; });
14166 if (NumV2Inputs == 0) {
14170 Subtarget, DAG,
false))
14175 Mask, Subtarget, DAG))
14204 "All single-input shuffles should be canonicalized to be V1-input "
14214 if (Subtarget.hasSSE4A())
14220 if (NumV2Inputs == 1)
14222 DL, MVT::v8i16, V1, V2, Mask, Zeroable, Subtarget, DAG))
14227 bool IsBlendSupported = Subtarget.
hasSSE41();
14228 if (IsBlendSupported)
14230 Zeroable, Subtarget, DAG))
14234 Zeroable, Subtarget, DAG))
14262 Zeroable, Subtarget, DAG))
14267 if ((NumEvenDrops == 1 || (NumEvenDrops == 2 && Subtarget.
hasSSE41())) &&
14268 !Subtarget.hasVLX()) {
14270 unsigned PackOpc = 0;
14271 if (NumEvenDrops == 2 && Subtarget.
hasAVX2() &&
14282 }
else if (Subtarget.
hasSSE41()) {
14285 for (
unsigned i = 0; i != 4; i += 1 << (NumEvenDrops - 1))
14294 }
else if (!Subtarget.
hasSSSE3()) {
14307 if (NumEvenDrops == 2) {
14308 Result = DAG.
getBitcast(MVT::v4i32, Result);
14309 Result = DAG.
getNode(PackOpc,
DL, MVT::v8i16, Result, Result);
14317 if (NumOddDrops == 1) {
14318 bool HasSSE41 = Subtarget.
hasSSE41();
14326 MVT::v8i16, V1, V2);
14331 Mask, Subtarget, DAG))
14336 if (!IsBlendSupported && Subtarget.
hasSSSE3()) {
14337 bool V1InUse, V2InUse;
14339 Zeroable, DAG, V1InUse, V2InUse);
14345 Zeroable, Subtarget, DAG);
14354 assert(V2.getSimpleValueType() == MVT::v8f16 &&
"Bad operand type!");
14355 assert(Mask.size() == 8 &&
"Unexpected mask size for v8 shuffle!");
14356 int NumV2Elements =
count_if(Mask, [](
int M) {
return M >= 8; });
14358 if (Subtarget.hasFP16()) {
14359 if (NumV2Elements == 0) {
14362 Mask, Subtarget, DAG))
14365 if (NumV2Elements == 1 && Mask[0] >= 8)
14367 DL, MVT::v8f16, V1, V2, Mask, Zeroable, Subtarget, DAG))
14394 MVT ShuffleVT = VT;
14404 for (
int &M : AdjustedMask)
14406 M += (Scale - 1) * NumElts;
14419 if (VT != ShuffleVT)
14437 assert(V2.getSimpleValueType() == MVT::v16i8 &&
"Bad operand type!");
14438 assert(Mask.size() == 16 &&
"Unexpected mask size for v16 shuffle!");
14458 Zeroable, Subtarget, DAG))
14471 if (Subtarget.hasSSE4A())
14476 int NumV2Elements =
count_if(Mask, [](
int M) {
return M >= 16; });
14479 if (NumV2Elements == 0) {
14482 Mask, Subtarget, DAG))
14502 for (
int i = 0; i < 16; i += 2)
14503 if (Mask[i] >= 0 && Mask[i + 1] >= 0 && Mask[i] != Mask[i + 1])
14508 auto tryToWidenViaDuplication = [&]() ->
SDValue {
14509 if (!canWidenViaDuplication(Mask))
14512 copy_if(Mask, std::back_inserter(LoInputs),
14513 [](
int M) {
return M >= 0 && M < 8; });
14517 copy_if(Mask, std::back_inserter(HiInputs), [](
int M) {
return M >= 8; });
14521 bool TargetLo = LoInputs.
size() >= HiInputs.
size();
14522 ArrayRef<int> InPlaceInputs = TargetLo ? LoInputs : HiInputs;
14523 ArrayRef<int> MovingInputs = TargetLo ? HiInputs : LoInputs;
14525 int PreDupI16Shuffle[] = {-1, -1, -1, -1, -1, -1, -1, -1};
14527 for (
int I : InPlaceInputs) {
14528 PreDupI16Shuffle[
I/2] =
I/2;
14531 int j = TargetLo ? 0 : 4, je = j + 4;
14532 for (
int i = 0, ie = MovingInputs.
size(); i < ie; ++i) {
14535 if (PreDupI16Shuffle[j] != MovingInputs[i] / 2) {
14538 while (j < je && PreDupI16Shuffle[j] >= 0)
14546 PreDupI16Shuffle[j] = MovingInputs[i] / 2;
14550 LaneMap[MovingInputs[i]] = 2 * j + MovingInputs[i] % 2;
14555 DAG.
getUNDEF(MVT::v8i16), PreDupI16Shuffle));
14558 bool EvenInUse =
false, OddInUse =
false;
14559 for (
int i = 0; i < 16; i += 2) {
14560 EvenInUse |= (Mask[i + 0] >= 0);
14561 OddInUse |= (Mask[i + 1] >= 0);
14562 if (EvenInUse && OddInUse)
14566 MVT::v16i8, EvenInUse ? V1 : DAG.
getUNDEF(MVT::v16i8),
14567 OddInUse ? V1 : DAG.
getUNDEF(MVT::v16i8));
14569 int PostDupI16Shuffle[8] = {-1, -1, -1, -1, -1, -1, -1, -1};
14570 for (
int i = 0; i < 16; ++i)
14571 if (Mask[i] >= 0) {
14572 int MappedMask = LaneMap[Mask[i]] - (TargetLo ? 0 : 8);
14573 assert(MappedMask < 8 &&
"Invalid v8 shuffle mask!");
14574 if (PostDupI16Shuffle[i / 2] < 0)
14575 PostDupI16Shuffle[i / 2] = MappedMask;
14577 assert(PostDupI16Shuffle[i / 2] == MappedMask &&
14578 "Conflicting entries in the original shuffle!");
14583 DAG.
getUNDEF(MVT::v8i16), PostDupI16Shuffle));
14585 if (
SDValue V = tryToWidenViaDuplication())
14590 Zeroable, Subtarget, DAG))
14599 Zeroable, Subtarget, DAG))
14603 bool IsSingleInput = V2.isUndef();
14622 if (Subtarget.
hasSSSE3() && (IsSingleInput || NumEvenDrops != 1)) {
14623 bool V1InUse =
false;
14624 bool V2InUse =
false;
14627 DL, MVT::v16i8, V1, V2, Mask, Zeroable, DAG, V1InUse, V2InUse);
14632 if (V1InUse && V2InUse) {
14635 Zeroable, Subtarget, DAG))
14647 DL, MVT::v16i8, V1, V2, Mask, Subtarget, DAG))
14651 if (Subtarget.hasVBMI())
14656 if (Subtarget.hasXOP()) {
14664 DL, MVT::v16i8, V1, V2, Mask, Subtarget, DAG))
14672 if (NumV2Elements == 1)
14674 DL, MVT::v16i8, V1, V2, Mask, Zeroable, Subtarget, DAG))
14687 if (NumEvenDrops) {
14693 assert(NumEvenDrops <= 3 &&
14694 "No support for dropping even elements more than 3 times.");
14696 for (
unsigned i = 0; i != 8; i += 1 << (NumEvenDrops - 1))
14701 if (!IsSingleInput)
14707 IsSingleInput ? V1 : V2);
14708 for (
int i = 1; i < NumEvenDrops; ++i) {
14709 Result = DAG.
getBitcast(MVT::v8i16, Result);
14716 if (NumOddDrops == 1) {
14720 if (!IsSingleInput)
14725 IsSingleInput ? V1 : V2);
14729 if (NumV2Elements > 0)
14731 Zeroable, Subtarget, DAG);
14738 std::array<int, 8> LoBlendMask = {{-1, -1, -1, -1, -1, -1, -1, -1}};
14739 std::array<int, 8> HiBlendMask = {{-1, -1, -1, -1, -1, -1, -1, -1}};
14740 for (
int i = 0; i < 16; ++i)
14742 (i < 8 ? LoBlendMask[i] : HiBlendMask[i % 8]) = Mask[i];
14748 if (
none_of(LoBlendMask, [](
int M) {
return M >= 0 && M % 2 == 1; }) &&
14749 none_of(HiBlendMask, [](
int M) {
return M >= 0 && M % 2 == 1; })) {
14756 VHiHalf = DAG.
getUNDEF(MVT::v8i16);
14759 for (
int &M : LoBlendMask)
14762 for (
int &M : HiBlendMask)
14788 const APInt &Zeroable,
14791 if (VT == MVT::v8bf16) {
14828 "Only for 256-bit or wider vector shuffles!");
14830 assert(V2.getSimpleValueType() == VT &&
"Bad operand type!");
14836 int SplitNumElements = NumElements / 2;
14842 auto SplitVector = [&](
SDValue V) {
14845 return std::make_pair(DAG.
getBitcast(SplitVT, LoV),
14849 SDValue LoV1, HiV1, LoV2, HiV2;
14850 std::tie(LoV1, HiV1) = SplitVector(V1);
14851 std::tie(LoV2, HiV2) = SplitVector(V2);
14854 auto GetHalfBlendPiecesReq = [&](
const ArrayRef<int> &HalfMask,
bool &UseLoV1,
14855 bool &UseHiV1,
bool &UseLoV2,
14857 UseLoV1 = UseHiV1 = UseLoV2 = UseHiV2 =
false;
14858 for (
int i = 0; i < SplitNumElements; ++i) {
14859 int M = HalfMask[i];
14860 if (M >= NumElements) {
14861 if (M >= NumElements + SplitNumElements)
14865 }
else if (M >= 0) {
14866 if (M >= SplitNumElements)
14874 auto CheckHalfBlendUsable = [&](
const ArrayRef<int> &HalfMask) ->
bool {
14878 bool UseLoV1, UseHiV1, UseLoV2, UseHiV2;
14879 GetHalfBlendPiecesReq(HalfMask, UseLoV1, UseHiV1, UseLoV2, UseHiV2);
14881 return !(UseHiV1 || UseHiV2);
14888 for (
int i = 0; i < SplitNumElements; ++i) {
14889 int M = HalfMask[i];
14890 if (M >= NumElements) {
14891 V2BlendMask[i] = M - NumElements;
14892 BlendMask[i] = SplitNumElements + i;
14893 }
else if (M >= 0) {
14894 V1BlendMask[i] = M;
14899 bool UseLoV1, UseHiV1, UseLoV2, UseHiV2;
14900 GetHalfBlendPiecesReq(HalfMask, UseLoV1, UseHiV1, UseLoV2, UseHiV2);
14905 assert((!SimpleOnly || (!UseHiV1 && !UseHiV2)) &&
"Shuffle isn't simple");
14908 if (!UseLoV1 && !UseHiV1 && !UseLoV2 && !UseHiV2)
14910 if (!UseLoV2 && !UseHiV2)
14912 if (!UseLoV1 && !UseHiV1)
14916 if (UseLoV1 && UseHiV1) {
14920 V1Blend = UseLoV1 ? LoV1 : HiV1;
14921 for (
int i = 0; i < SplitNumElements; ++i)
14922 if (BlendMask[i] >= 0 && BlendMask[i] < SplitNumElements)
14923 BlendMask[i] = V1BlendMask[i] - (UseLoV1 ? 0 : SplitNumElements);
14925 if (UseLoV2 && UseHiV2) {
14929 V2Blend = UseLoV2 ? LoV2 : HiV2;
14930 for (
int i = 0; i < SplitNumElements; ++i)
14931 if (BlendMask[i] >= SplitNumElements)
14932 BlendMask[i] = V2BlendMask[i] + (UseLoV2 ? SplitNumElements : 0);
14937 if (!CheckHalfBlendUsable(LoMask) || !CheckHalfBlendUsable(HiMask))
14955 const APInt &Zeroable,
14958 assert(!V2.isUndef() &&
"This routine must not be used to lower single-input "
14959 "shuffles as it could then recurse on itself.");
14960 int Size = Mask.size();
14965 auto DoBothBroadcast = [&] {
14966 int V1BroadcastIdx = -1, V2BroadcastIdx = -1;
14969 if (V2BroadcastIdx < 0)
14970 V2BroadcastIdx = M -
Size;
14971 else if (M -
Size != V2BroadcastIdx)
14973 }
else if (M >= 0) {
14974 if (V1BroadcastIdx < 0)
14975 V1BroadcastIdx = M;
14976 else if (M != V1BroadcastIdx)
14981 if (DoBothBroadcast())
14989 int LaneSize =
Size / LaneCount;
14991 LaneInputs[0].
resize(LaneCount,
false);
14992 LaneInputs[1].
resize(LaneCount,
false);
14993 for (
int i = 0; i <
Size; ++i)
14995 LaneInputs[Mask[i] /
Size][(Mask[i] %
Size) / LaneSize] =
true;
14996 if (LaneInputs[0].
count() <= 1 && LaneInputs[1].
count() <= 1)
15012 assert(VT == MVT::v4f64 &&
"Only for v4f64 shuffles");
15014 int LHSMask[4] = {-1, -1, -1, -1};
15015 int RHSMask[4] = {-1, -1, -1, -1};
15016 int SHUFPDMask[4] = {-1, -1, -1, -1};
15020 for (
int i = 0; i != 4; ++i) {
15024 int LaneBase = i & ~1;
15025 auto &LaneMask = (i & 1) ? RHSMask : LHSMask;
15026 LaneMask[LaneBase + (M & 1)] = M;
15027 SHUFPDMask[i] = M & 1;
15049 int NumEltsPerLane = NumElts / NumLanes;
15050 bool CanUseSublanes = Subtarget.
hasAVX2() && V2.isUndef();
15057 auto getSublanePermute = [&](
int NumSublanes) ->
SDValue {
15058 int NumSublanesPerLane = NumSublanes / NumLanes;
15059 int NumEltsPerSublane = NumElts / NumSublanes;
15067 for (
int i = 0; i != NumElts; ++i) {
15072 int SrcSublane = M / NumEltsPerSublane;
15073 int DstLane = i / NumEltsPerLane;
15077 bool Found =
false;
15078 int DstSubStart = DstLane * NumSublanesPerLane;
15079 int DstSubEnd = DstSubStart + NumSublanesPerLane;
15080 for (
int DstSublane = DstSubStart; DstSublane < DstSubEnd; ++DstSublane) {
15081 if (!
isUndefOrEqual(CrossLaneMaskLarge[DstSublane], SrcSublane))
15085 CrossLaneMaskLarge[DstSublane] = SrcSublane;
15086 int DstSublaneOffset = DstSublane * NumEltsPerSublane;
15087 InLaneMask[i] = DstSublaneOffset + M % NumEltsPerSublane;
15088 DemandedCrossLane.
setBit(InLaneMask[i]);
15098 if (!CanUseSublanes) {
15103 int NumIdentityLanes = 0;
15104 bool OnlyShuffleLowestLane =
true;
15105 for (
int i = 0; i != NumLanes; ++i) {
15106 int LaneOffset = i * NumEltsPerLane;
15108 i * NumEltsPerLane))
15109 NumIdentityLanes++;
15110 else if (CrossLaneMask[LaneOffset] != 0)
15111 OnlyShuffleLowestLane =
false;
15113 if (OnlyShuffleLowestLane && NumIdentityLanes == (NumLanes - 1))
15120 if (CrossLaneMask == Mask || InLaneMask == Mask)
15125 for (
int i = 0; i != NumElts; ++i)
15126 if (!DemandedCrossLane[i])
15135 if (
SDValue V = getSublanePermute(NumLanes))
15139 if (!CanUseSublanes)
15143 if (
SDValue V = getSublanePermute(NumLanes * 2))
15148 if (!Subtarget.hasFastVariableCrossLaneShuffle())
15151 return getSublanePermute(NumLanes * 4);
15157 int Size = Mask.size();
15158 InLaneMask.
assign(Mask.begin(), Mask.end());
15159 for (
int i = 0; i <
Size; ++i) {
15160 int &M = InLaneMask[i];
15163 if (((M %
Size) / LaneSize) != (i / LaneSize))
15164 M = (M % LaneSize) + ((i / LaneSize) * LaneSize) +
Size;
15180 int Size = Mask.size();
15181 int LaneSize =
Size / 2;
15186 if (VT == MVT::v4f64 &&
15187 !
all_of(Mask, [LaneSize](
int M) {
return M < LaneSize; }))
15195 bool LaneCrossing[2] = {
false,
false};
15196 for (
int i = 0; i <
Size; ++i)
15197 if (Mask[i] >= 0 && ((Mask[i] %
Size) / LaneSize) != (i / LaneSize))
15198 LaneCrossing[(Mask[i] %
Size) / LaneSize] =
true;
15199 AllLanes = LaneCrossing[0] && LaneCrossing[1];
15201 bool LaneUsed[2] = {
false,
false};
15202 for (
int i = 0; i <
Size; ++i)
15204 LaneUsed[(Mask[i] %
Size) / LaneSize] =
true;
15205 AllLanes = LaneUsed[0] && LaneUsed[1];
15210 "This last part of this routine only works on single input shuffles");
15216 "In-lane shuffle mask expected");
15236 const APInt &Zeroable,
15239 if (V2.isUndef()) {
15249 VT, MemVT, Ld, Ofs, DAG))
15264 bool IsLowZero = (Zeroable & 0x3) == 0x3;
15265 bool IsHighZero = (Zeroable & 0xc) == 0xc;
15268 if (WidenedMask[0] == 0 && IsHighZero) {
15288 if (!IsLowZero && !IsHighZero) {
15307 if (Subtarget.hasVLX()) {
15308 if (WidenedMask[0] < 2 && WidenedMask[1] >= 2) {
15309 unsigned PermMask = ((WidenedMask[0] % 2) << 0) |
15310 ((WidenedMask[1] % 2) << 1);
15330 assert((WidenedMask[0] >= 0 || IsLowZero) &&
15331 (WidenedMask[1] >= 0 || IsHighZero) &&
"Undef half?");
15333 unsigned PermMask = 0;
15334 PermMask |= IsLowZero ? 0x08 : (WidenedMask[0] << 0);
15335 PermMask |= IsHighZero ? 0x80 : (WidenedMask[1] << 4);
15338 if ((PermMask & 0x0a) != 0x00 && (PermMask & 0xa0) != 0x00)
15340 if ((PermMask & 0x0a) != 0x02 && (PermMask & 0xa0) != 0x20)
15357 assert(!V2.isUndef() &&
"This is only useful with multiple inputs.");
15362 int NumElts = Mask.size();
15370 for (
int Lane = 0; Lane != NumLanes; ++Lane) {
15371 int Srcs[2] = {-1, -1};
15373 for (
int i = 0; i != NumLaneElts; ++i) {
15374 int M = Mask[(Lane * NumLaneElts) + i];
15381 int LaneSrc = M / NumLaneElts;
15383 if (Srcs[0] < 0 || Srcs[0] == LaneSrc)
15385 else if (Srcs[1] < 0 || Srcs[1] == LaneSrc)
15390 Srcs[Src] = LaneSrc;
15391 InLaneMask[i] = (M % NumLaneElts) + Src * NumElts;
15398 LaneSrcs[Lane][0] = Srcs[0];
15399 LaneSrcs[Lane][1] = Srcs[1];
15402 assert(
M1.size() == M2.size() &&
"Unexpected mask size");
15403 for (
int i = 0, e =
M1.size(); i != e; ++i)
15404 if (
M1[i] >= 0 && M2[i] >= 0 &&
M1[i] != M2[i])
15410 assert(Mask.size() == MergedMask.size() &&
"Unexpected mask size");
15411 for (
int i = 0, e = MergedMask.size(); i != e; ++i) {
15415 assert((MergedMask[i] < 0 || MergedMask[i] == M) &&
15416 "Unexpected mask element");
15421 if (MatchMasks(InLaneMask, RepeatMask)) {
15423 MergeMasks(InLaneMask, RepeatMask);
15428 std::swap(LaneSrcs[Lane][0], LaneSrcs[Lane][1]);
15431 if (MatchMasks(InLaneMask, RepeatMask)) {
15433 MergeMasks(InLaneMask, RepeatMask);
15442 for (
int Lane = 0; Lane != NumLanes; ++Lane) {
15444 if (LaneSrcs[Lane][0] >= 0)
15447 for (
int i = 0; i != NumLaneElts; ++i) {
15448 int M = Mask[(Lane * NumLaneElts) + i];
15453 if (RepeatMask[i] < 0)
15454 RepeatMask[i] = M % NumLaneElts;
15456 if (RepeatMask[i] < NumElts) {
15457 if (RepeatMask[i] != M % NumLaneElts)
15459 LaneSrcs[Lane][0] = M / NumLaneElts;
15461 if (RepeatMask[i] != ((M % NumLaneElts) + NumElts))
15463 LaneSrcs[Lane][1] = M / NumLaneElts;
15467 if (LaneSrcs[Lane][0] < 0 && LaneSrcs[Lane][1] < 0)
15472 for (
int Lane = 0; Lane != NumLanes; ++Lane) {
15473 int Src = LaneSrcs[Lane][0];
15474 for (
int i = 0; i != NumLaneElts; ++i) {
15477 M = Src * NumLaneElts + i;
15478 NewMask[Lane * NumLaneElts + i] = M;
15485 if (isa<ShuffleVectorSDNode>(NewV1) &&
15486 cast<ShuffleVectorSDNode>(NewV1)->getMask() == Mask)
15489 for (
int Lane = 0; Lane != NumLanes; ++Lane) {
15490 int Src = LaneSrcs[Lane][1];
15491 for (
int i = 0; i != NumLaneElts; ++i) {
15494 M = Src * NumLaneElts + i;
15495 NewMask[Lane * NumLaneElts + i] = M;
15502 if (isa<ShuffleVectorSDNode>(NewV2) &&
15503 cast<ShuffleVectorSDNode>(NewV2)->getMask() == Mask)
15506 for (
int i = 0; i != NumElts; ++i) {
15511 NewMask[i] = RepeatMask[i % NumLaneElts];
15512 if (NewMask[i] < 0)
15515 NewMask[i] += (i / NumLaneElts) * NumLaneElts;
15528 int &HalfIdx1,
int &HalfIdx2) {
15529 assert((Mask.size() == HalfMask.
size() * 2) &&
15530 "Expected input mask to be twice as long as output");
15535 if (UndefLower == UndefUpper)
15538 unsigned HalfNumElts = HalfMask.
size();
15539 unsigned MaskIndexOffset = UndefLower ? HalfNumElts : 0;
15542 for (
unsigned i = 0; i != HalfNumElts; ++i) {
15543 int M = Mask[i + MaskIndexOffset];
15551 int HalfIdx = M / HalfNumElts;
15554 int HalfElt = M % HalfNumElts;
15558 if (HalfIdx1 < 0 || HalfIdx1 == HalfIdx) {
15559 HalfMask[i] = HalfElt;
15560 HalfIdx1 = HalfIdx;
15563 if (HalfIdx2 < 0 || HalfIdx2 == HalfIdx) {
15564 HalfMask[i] = HalfElt + HalfNumElts;
15565 HalfIdx2 = HalfIdx;
15580 int HalfIdx2,
bool UndefLower,
15589 auto getHalfVector = [&](
int HalfIdx) {
15592 SDValue V = (HalfIdx < 2 ? V1 : V2);
15593 HalfIdx = (HalfIdx % 2) * HalfNumElts;
15599 SDValue Half1 = getHalfVector(HalfIdx1);
15600 SDValue Half2 = getHalfVector(HalfIdx2);
15610 unsigned Offset = UndefLower ? HalfNumElts : 0;
15623 "Expected 256-bit or 512-bit vector");
15630 "Completely undef shuffle mask should have been simplified already");
15654 int HalfIdx1, HalfIdx2;
15659 assert(HalfMask.
size() == HalfNumElts &&
"Unexpected shuffle mask length");
15662 unsigned NumLowerHalves =
15663 (HalfIdx1 == 0 || HalfIdx1 == 2) + (HalfIdx2 == 0 || HalfIdx2 == 2);
15664 unsigned NumUpperHalves =
15665 (HalfIdx1 == 1 || HalfIdx1 == 3) + (HalfIdx2 == 1 || HalfIdx2 == 3);
15666 assert(NumLowerHalves + NumUpperHalves <= 2 &&
"Only 1 or 2 halves allowed");
15674 if (NumUpperHalves == 0)
15678 if (NumUpperHalves == 1) {
15682 if (EltWidth == 32 && NumLowerHalves && HalfVT.
is128BitVector() &&
15685 Subtarget.hasFastVariableCrossLaneShuffle()))
15691 if (EltWidth == 64 && V2.isUndef())
15695 if (EltWidth == 8 && HalfIdx1 == 0 && HalfIdx2 == 1)
15707 assert(NumUpperHalves == 2 &&
"Half vector count went wrong");
15712 if (NumUpperHalves == 0) {
15715 if (Subtarget.
hasAVX2() && EltWidth == 64)
15738 int NumLaneElts = NumElts / NumLanes;
15743 for (
unsigned BroadcastSize : {16, 32, 64}) {
15752 for (
int i = 0; i != NumElts; i += NumBroadcastElts)
15753 for (
int j = 0; j != NumBroadcastElts; ++j) {
15754 int M = Mask[i + j];
15757 int &R = RepeatMask[j];
15758 if (0 != ((M % NumElts) / NumLaneElts))
15760 if (0 <= R && R != M)
15768 if (!FindRepeatingBroadcastMask(RepeatMask))
15776 for (
int i = 0; i != NumElts; i += NumBroadcastElts)
15777 for (
int j = 0; j != NumBroadcastElts; ++j)
15778 BroadcastMask[i + j] = j;
15782 if (BroadcastMask == Mask)
15800 auto ShuffleSubLanes = [&](
int SubLaneScale) {
15801 int NumSubLanes = NumLanes * SubLaneScale;
15802 int NumSubLaneElts = NumLaneElts / SubLaneScale;
15807 int TopSrcSubLane = -1;
15813 for (
int DstSubLane = 0; DstSubLane != NumSubLanes; ++DstSubLane) {
15818 for (
int Elt = 0; Elt != NumSubLaneElts; ++Elt) {
15819 int M = Mask[(DstSubLane * NumSubLaneElts) + Elt];
15822 int Lane = (M % NumElts) / NumLaneElts;
15823 if ((0 <= SrcLane) && (SrcLane != Lane))
15826 int LocalM = (M % NumLaneElts) + (M < NumElts ? 0 : NumElts);
15827 SubLaneMask[Elt] = LocalM;
15835 for (
int SubLane = 0; SubLane != SubLaneScale; ++SubLane) {
15837 for (
int i = 0; i != NumSubLaneElts; ++i) {
15838 if (
M1[i] < 0 || M2[i] < 0)
15840 if (
M1[i] != M2[i])
15846 auto &RepeatedSubLaneMask = RepeatedSubLaneMasks[SubLane];
15847 if (!MatchMasks(SubLaneMask, RepeatedSubLaneMask))
15851 for (
int i = 0; i != NumSubLaneElts; ++i) {
15852 int M = SubLaneMask[i];
15855 assert((RepeatedSubLaneMask[i] < 0 || RepeatedSubLaneMask[i] == M) &&
15856 "Unexpected mask element");
15857 RepeatedSubLaneMask[i] = M;
15862 int SrcSubLane = (SrcLane * SubLaneScale) + SubLane;
15863 TopSrcSubLane = std::max(TopSrcSubLane, SrcSubLane);
15864 Dst2SrcSubLanes[DstSubLane] = SrcSubLane;
15869 if (Dst2SrcSubLanes[DstSubLane] < 0)
15872 assert(0 <= TopSrcSubLane && TopSrcSubLane < NumSubLanes &&
15873 "Unexpected source lane");
15877 for (
int SubLane = 0; SubLane <= TopSrcSubLane; ++SubLane) {
15878 int Lane = SubLane / SubLaneScale;
15879 auto &RepeatedSubLaneMask = RepeatedSubLaneMasks[SubLane % SubLaneScale];
15880 for (
int Elt = 0; Elt != NumSubLaneElts; ++Elt) {
15881 int M = RepeatedSubLaneMask[Elt];
15884 int Idx = (SubLane * NumSubLaneElts) + Elt;
15885 RepeatedMask[
Idx] = M + (Lane * NumLaneElts);
15891 for (
int i = 0; i != NumElts; i += NumSubLaneElts) {
15892 int SrcSubLane = Dst2SrcSubLanes[i / NumSubLaneElts];
15893 if (SrcSubLane < 0)
15895 for (
int j = 0; j != NumSubLaneElts; ++j)
15896 SubLaneMask[i + j] = j + (SrcSubLane * NumSubLaneElts);
15901 if (RepeatedMask == Mask || SubLaneMask == Mask)
15915 int MinSubLaneScale = 1, MaxSubLaneScale = 1;
15918 MinSubLaneScale = 2;
15920 (!OnlyLowestElts && V2.isUndef() && VT == MVT::v32i8) ? 4 : 2;
15922 if (Subtarget.hasBWI() && VT == MVT::v64i8)
15923 MinSubLaneScale = MaxSubLaneScale = 4;
15925 for (
int Scale = MinSubLaneScale; Scale <= MaxSubLaneScale; Scale *= 2)
15926 if (
SDValue Shuffle = ShuffleSubLanes(Scale))
15933 bool &ForceV1Zero,
bool &ForceV2Zero,
15935 const APInt &Zeroable) {
15938 (NumElts == 2 || NumElts == 4 || NumElts == 8) &&
15939 "Unexpected data type for VSHUFPD");
15941 "Illegal shuffle mask");
15943 bool ZeroLane[2] = {
true,
true };
15944 for (
int i = 0; i < NumElts; ++i)
15945 ZeroLane[i & 1] &= Zeroable[i];
15949 bool IsSHUFPD =
true;
15950 bool IsCommutable =
true;
15952 for (
int i = 0; i < NumElts; ++i) {
15957 int Val = (i & 6) + NumElts * (i & 1);
15958 int CommutVal = (i & 0xe) + NumElts * ((i & 1) ^ 1);
15959 if (Mask[i] < Val || Mask[i] > Val + 1)
15961 if (Mask[i] < CommutVal || Mask[i] > CommutVal + 1)
15962 IsCommutable =
false;
15963 SHUFPDMask[i] = Mask[i] % 2;
15966 if (!IsSHUFPD && !IsCommutable)
15969 if (!IsSHUFPD && IsCommutable)
15972 ForceV1Zero = ZeroLane[0];
15973 ForceV2Zero = ZeroLane[1];
15980 const APInt &Zeroable,
15983 assert((VT == MVT::v2f64 || VT == MVT::v4f64 || VT == MVT::v8f64) &&
15984 "Unexpected data type for VSHUFPD");
15986 unsigned Immediate = 0;
15987 bool ForceV1Zero =
false, ForceV2Zero =
false;
16008 const APInt &Zeroable,
16010 assert(VT == MVT::v32i8 &&
"Unexpected type!");
16017 if (Zeroable.
countl_one() < (Mask.size() - 8))
16029 { 0, 1, 2, 3, 16, 17, 18, 19,
16030 4, 5, 6, 7, 20, 21, 22, 23 });
16057 if (VT != MVT::v8f32 && VT != MVT::v8i32 && VT != MVT::v16i16 &&
16061 auto IsInterleavingPattern = [&](
ArrayRef<int> Mask,
unsigned Begin0,
16063 size_t Size = Mask.size();
16064 assert(
Size % 2 == 0 &&
"Expected even mask size");
16065 for (
unsigned I = 0;
I <
Size;
I += 2) {
16066 if (Mask[
I] != (
int)(Begin0 +
I / 2) ||
16067 Mask[
I + 1] != (
int)(Begin1 +
I / 2))
16074 size_t FirstQtr = NumElts / 2;
16075 size_t ThirdQtr = NumElts + NumElts / 2;
16076 bool IsFirstHalf = IsInterleavingPattern(Mask, 0, NumElts);
16077 bool IsSecondHalf = IsInterleavingPattern(Mask, FirstQtr, ThirdQtr);
16078 if (!IsFirstHalf && !IsSecondHalf)
16088 if (Shuffles.
size() != 2)
16091 auto *SVN1 = cast<ShuffleVectorSDNode>(Shuffles[0]);
16092 auto *SVN2 = cast<ShuffleVectorSDNode>(Shuffles[1]);
16095 if (IsInterleavingPattern(SVN1->getMask(), 0, NumElts) &&
16096 IsInterleavingPattern(SVN2->getMask(), FirstQtr, ThirdQtr)) {
16097 FirstHalf = Shuffles[0];
16098 SecondHalf = Shuffles[1];
16099 }
else if (IsInterleavingPattern(SVN1->getMask(), FirstQtr, ThirdQtr) &&
16100 IsInterleavingPattern(SVN2->getMask(), 0, NumElts)) {
16101 FirstHalf = Shuffles[1];
16102 SecondHalf = Shuffles[0];
16131 assert(V2.getSimpleValueType() == MVT::v4f64 &&
"Bad operand type!");
16132 assert(Mask.size() == 4 &&
"Unexpected mask size for v4 shuffle!");
16138 if (V2.isUndef()) {
16141 Mask, Subtarget, DAG))
16151 unsigned VPERMILPMask = (Mask[0] == 1) | ((Mask[1] == 1) << 1) |
16152 ((Mask[2] == 3) << 2) | ((Mask[3] == 3) << 3);
16165 DL, MVT::v4f64, V1, V2, Mask, Subtarget, DAG))
16170 Mask, DAG, Subtarget))
16183 Zeroable, Subtarget, DAG))
16188 Zeroable, Subtarget, DAG))
16199 !
all_of(Mask, [](
int M) {
return M < 2 || (4 <= M && M < 6); }) &&
16206 if (V1IsInPlace || V2IsInPlace)
16208 Zeroable, Subtarget, DAG);
16213 DL, MVT::v4f64, V1, V2, Mask, Subtarget, DAG))
16220 if (!(Subtarget.
hasAVX2() && (V1IsInPlace || V2IsInPlace)))
16222 DL, MVT::v4f64, V1, V2, Mask, Subtarget, DAG))
16226 if (Subtarget.hasVLX())
16228 Zeroable, Subtarget, DAG))
16235 Zeroable, Subtarget, DAG);
16251 assert(V2.getSimpleValueType() == MVT::v4i64 &&
"Bad operand type!");
16252 assert(Mask.size() == 4 &&
"Unexpected mask size for v4 shuffle!");
16253 assert(Subtarget.
hasAVX2() &&
"We can only lower v4i64 with AVX2!");
16260 Zeroable, Subtarget, DAG))
16269 if (Subtarget.preferLowerShuffleAsShift())
16272 Subtarget, DAG,
true))
16275 if (V2.isUndef()) {
16302 if (Subtarget.hasVLX()) {
16304 Zeroable, Subtarget, DAG))
16308 Zeroable, Subtarget, DAG))
16326 if (V1IsInPlace || V2IsInPlace)
16328 Zeroable, Subtarget, DAG);
16333 DL, MVT::v4i64, V1, V2, Mask, Subtarget, DAG))
16345 if (!V1IsInPlace && !V2IsInPlace)
16347 DL, MVT::v4i64, V1, V2, Mask, Subtarget, DAG))
16352 Zeroable, Subtarget, DAG);
16364 assert(V2.getSimpleValueType() == MVT::v8f32 &&
"Bad operand type!");
16365 assert(Mask.size() == 8 &&
"Unexpected mask size for v8 shuffle!");
16368 Zeroable, Subtarget, DAG))
16386 Zeroable, Subtarget, DAG))
16394 "Repeated masks must be half the mask width!");
16418 DL, MVT::v8f32, V1, V2, Mask, Subtarget, DAG))
16423 if (V2.isUndef()) {
16440 DL, MVT::v8f32, V1, V2, Mask, Subtarget, DAG))
16444 if (Subtarget.hasVLX())
16446 Zeroable, Subtarget, DAG))
16470 Zeroable, Subtarget, DAG);
16486 assert(V2.getSimpleValueType() == MVT::v8i32 &&
"Bad operand type!");
16487 assert(Mask.size() == 8 &&
"Unexpected mask size for v8 shuffle!");
16488 assert(Subtarget.
hasAVX2() &&
"We can only lower v8i32 with AVX2!");
16490 int NumV2Elements =
count_if(Mask, [](
int M) {
return M >= 8; });
16496 Zeroable, Subtarget, DAG))
16515 Zeroable, Subtarget, DAG))
16524 if (Subtarget.preferLowerShuffleAsShift()) {
16527 Subtarget, DAG,
true))
16529 if (NumV2Elements == 0)
16539 bool Is128BitLaneRepeatedShuffle =
16541 if (Is128BitLaneRepeatedShuffle) {
16542 assert(RepeatedMask.
size() == 4 &&
"Unexpected repeated mask size!");
16558 if (!Subtarget.preferLowerShuffleAsShift() && NumV2Elements == 0)
16564 if (Subtarget.hasVLX()) {
16566 Zeroable, Subtarget, DAG))
16570 Zeroable, Subtarget, DAG))
16582 DL, MVT::v8i32, V1, V2, Mask, Subtarget, DAG))
16585 if (V2.isUndef()) {
16604 CastV1, CastV2, DAG);
16611 DL, MVT::v8i32, V1, V2, Mask, Subtarget, DAG))
16616 Zeroable, Subtarget, DAG);
16628 assert(V2.getSimpleValueType() == MVT::v16i16 &&
"Bad operand type!");
16629 assert(Mask.size() == 16 &&
"Unexpected mask size for v16 shuffle!");
16630 assert(Subtarget.
hasAVX2() &&
"We can only lower v16i16 with AVX2!");
16636 DL, MVT::v16i16, V1, V2, Mask, Zeroable, Subtarget, DAG))
16645 Zeroable, Subtarget, DAG))
16665 Subtarget, DAG,
false))
16676 DL, MVT::v16i16, V1, V2, Mask, Subtarget, DAG))
16679 if (V2.isUndef()) {
16694 DL, MVT::v16i16, V1, V2, Mask, DAG, Subtarget))
16707 DL, MVT::v16i16, V1, RepeatedMask, Subtarget, DAG);
16712 Zeroable, Subtarget, DAG))
16716 if (Subtarget.hasBWI())
16722 DL, MVT::v16i16, V1, V2, Mask, Subtarget, DAG))
16727 DL, MVT::v16i16, V1, V2, Mask, DAG, Subtarget))
16751 assert(V2.getSimpleValueType() == MVT::v32i8 &&
"Bad operand type!");
16752 assert(Mask.size() == 32 &&
"Unexpected mask size for v32 shuffle!");
16753 assert(Subtarget.
hasAVX2() &&
"We can only lower v32i8 with AVX2!");
16759 Zeroable, Subtarget, DAG))
16768 Zeroable, Subtarget, DAG))
16805 DL, MVT::v32i8, V1, V2, Mask, Subtarget, DAG))
16817 DL, MVT::v32i8, V1, V2, Mask, DAG, Subtarget))
16825 Zeroable, Subtarget, DAG))
16829 if (Subtarget.hasVBMI())
16835 DL, MVT::v32i8, V1, V2, Mask, Subtarget, DAG))
16840 DL, MVT::v32i8, V1, V2, Mask, DAG, Subtarget))
16846 if (Subtarget.hasVLX())
16848 Mask, Zeroable, DAG))
16875 int NumV2Elements =
count_if(Mask, [NumElts](
int M) {
return M >= NumElts; });
16877 if (NumV2Elements == 1 && Mask[0] >= NumElts)
16879 DL, VT, V1, V2, Mask, Zeroable, Subtarget, DAG))
16895 if (ElementBits < 32) {
16913 if (VT == MVT::v16f16 || VT == MVT::v16bf16) {
16945 "Unexpected element type size for 128bit shuffle.");
16955 assert(Widened128Mask.
size() == 4 &&
"Shuffle widening mismatch");
16958 if (Widened128Mask[0] == 0 && (Zeroable & 0xf0) == 0xf0 &&
16959 (Widened128Mask[1] == 1 || (Zeroable & 0x0c) == 0x0c)) {
16960 unsigned NumElts = ((Zeroable & 0x0c) == 0x0c) ? 2 : 4;
16971 bool OnlyUsesV1 =
isShuffleEquivalent(Mask, {0, 1, 2, 3, 0, 1, 2, 3}, V1, V2);
16973 isShuffleEquivalent(Mask, {0, 1, 2, 3, 8, 9, 10, 11}, V1, V2)) {
16983 bool IsInsert =
true;
16985 for (
int i = 0; i < 4; ++i) {
16986 assert(Widened128Mask[i] >= -1 &&
"Illegal shuffle sentinel value");
16987 if (Widened128Mask[i] < 0)
16991 if (Widened128Mask[i] < 4) {
16992 if (Widened128Mask[i] != i) {
16998 if (V2Index >= 0 || Widened128Mask[i] != 4) {
17005 if (IsInsert && V2Index >= 0) {
17018 Widened128Mask.
clear();
17024 int PermMask[4] = {-1, -1, -1, -1};
17026 for (
int i = 0; i < 4; ++i) {
17027 assert(Widened128Mask[i] >= -1 &&
"Illegal shuffle sentinel value");
17028 if (Widened128Mask[i] < 0)
17031 SDValue Op = Widened128Mask[i] >= 4 ? V2 : V1;
17038 PermMask[i] = Widened128Mask[i] % 4;
17051 assert(V2.getSimpleValueType() == MVT::v8f64 &&
"Bad operand type!");
17052 assert(Mask.size() == 8 &&
"Unexpected mask size for v8 shuffle!");
17054 if (V2.isUndef()) {
17056 if (
isShuffleEquivalent(Mask, {0, 0, 2, 2, 4, 4, 6, 6}, V1, V2))
17062 unsigned VPERMILPMask = (Mask[0] == 1) | ((Mask[1] == 1) << 1) |
17063 ((Mask[2] == 3) << 2) | ((Mask[3] == 3) << 3) |
17064 ((Mask[4] == 5) << 4) | ((Mask[5] == 5) << 5) |
17065 ((Mask[6] == 7) << 6) | ((Mask[7] == 7) << 7);
17077 V2, Subtarget, DAG))
17085 Zeroable, Subtarget, DAG))
17093 Zeroable, Subtarget, DAG))
17105 assert(V2.getSimpleValueType() == MVT::v16f32 &&
"Bad operand type!");
17106 assert(Mask.size() == 16 &&
"Unexpected mask size for v16 shuffle!");
17112 assert(RepeatedMask.
size() == 4 &&
"Unexpected repeated mask size!");
17129 Zeroable, Subtarget, DAG))
17137 Zeroable, Subtarget, DAG))
17141 DL, MVT::v16i32, V1, V2, Mask, Zeroable, Subtarget, DAG))
17147 DL, MVT::v16f32, V1, V2, Mask, Subtarget, DAG))
17152 if (V2.isUndef() &&
17160 Zeroable, Subtarget, DAG))
17172 assert(V2.getSimpleValueType() == MVT::v8i64 &&
"Bad operand type!");
17173 assert(Mask.size() == 8 &&
"Unexpected mask size for v8 shuffle!");
17176 if (Subtarget.preferLowerShuffleAsShift())
17179 Subtarget, DAG,
true))
17182 if (V2.isUndef()) {
17204 V2, Subtarget, DAG))
17215 Zeroable, Subtarget, DAG))
17219 if (Subtarget.hasBWI())
17233 Zeroable, Subtarget, DAG))
17245 assert(V2.getSimpleValueType() == MVT::v16i32 &&
"Bad operand type!");
17246 assert(Mask.size() == 16 &&
"Unexpected mask size for v16 shuffle!");
17248 int NumV2Elements =
count_if(Mask, [](
int M) {
return M >= 16; });
17254 DL, MVT::v16i32, V1, V2, Mask, Zeroable, Subtarget, DAG))
17258 if (Subtarget.preferLowerShuffleAsShift()) {
17261 Subtarget, DAG,
true))
17263 if (NumV2Elements == 0)
17273 bool Is128BitLaneRepeatedShuffle =
17275 if (Is128BitLaneRepeatedShuffle) {
17276 assert(RepeatedMask.
size() == 4 &&
"Unexpected repeated mask size!");
17289 Subtarget, DAG,
false))
17292 if (!Subtarget.preferLowerShuffleAsShift() && NumV2Elements != 0)
17299 Zeroable, Subtarget, DAG))
17303 if (Subtarget.hasBWI())
17314 CastV1, CastV2, DAG);
17321 DL, MVT::v16i32, V1, V2, Mask, Subtarget, DAG))
17326 Zeroable, Subtarget, DAG))
17330 Zeroable, Subtarget, DAG))
17342 assert(V2.getSimpleValueType() == MVT::v32i16 &&
"Bad operand type!");
17343 assert(Mask.size() == 32 &&
"Unexpected mask size for v32 shuffle!");
17344 assert(Subtarget.hasBWI() &&
"We can only lower v32i16 with AVX-512-BWI!");
17350 DL, MVT::v32i16, V1, V2, Mask, Zeroable, Subtarget, DAG))
17365 Subtarget, DAG,
false))
17373 if (V2.isUndef()) {
17385 RepeatedMask, Subtarget, DAG);
17390 Zeroable, Subtarget, DAG))
17394 Zeroable, Subtarget, DAG))
17401 DL, MVT::v32i16, V1, V2, Mask, Subtarget, DAG))
17413 assert(V2.getSimpleValueType() == MVT::v64i8 &&
"Bad operand type!");
17414 assert(Mask.size() == 64 &&
"Unexpected mask size for v64 shuffle!");
17415 assert(Subtarget.hasBWI() &&
"We can only lower v64i8 with AVX-512-BWI!");
17421 DL, MVT::v64i8, V1, V2, Mask, Zeroable, Subtarget, DAG))
17452 Zeroable, Subtarget, DAG))
17456 Zeroable, Subtarget, DAG))
17462 DL, MVT::v64i8, V1, V2, Mask, Subtarget, DAG))
17466 DL, MVT::v64i8, V1, V2, Mask, DAG, Subtarget))
17470 Zeroable, Subtarget, DAG))
17477 Mask, Subtarget, DAG))
17482 bool V1InUse, V2InUse;
17484 DAG, V1InUse, V2InUse);
17491 DL, MVT::v64i8, V1, V2, Mask, Subtarget, DAG))
17495 if (Subtarget.hasVBMI())
17508 const APInt &Zeroable,
17512 "Cannot lower 512-bit vectors w/ basic ISA!");
17516 int NumElts = Mask.size();
17517 int NumV2Elements =
count_if(Mask, [NumElts](
int M) {
return M >= NumElts; });
17519 if (NumV2Elements == 1 && Mask[0] >= NumElts)
17521 DL, VT, V1, V2, Mask, Zeroable, Subtarget, DAG))
17534 if ((VT == MVT::v32i16 || VT == MVT::v64i8) && !Subtarget.hasBWI()) {
17546 if (VT == MVT::v32f16 || VT == MVT::v32bf16) {
17547 if (!Subtarget.hasBWI())
17589 int NumElts = Mask.size();
17590 for (
int i = 0; i != NumElts; ++i) {
17593 "Unexpected mask index.");
17598 if (ShiftAmt < 0) {
17605 if (ShiftAmt != M - i)
17608 assert(ShiftAmt >= 0 &&
"All undef?");
17622 int MaskOffset,
const APInt &Zeroable) {
17623 int Size = Mask.size();
17625 auto CheckZeros = [&](
int Shift,
bool Left) {
17626 for (
int j = 0; j < Shift; ++j)
17627 if (!Zeroable[j + (
Left ? 0 : (
Size - Shift))])
17633 auto MatchShift = [&](
int Shift,
bool Left) {
17634 unsigned Pos =
Left ? Shift : 0;
17635 unsigned Low =
Left ? 0 : Shift;
17636 unsigned Len =
Size - Shift;
17640 for (
int Shift = 1; Shift !=
Size; ++Shift)
17641 for (
bool Left : {
true,
false})
17642 if (CheckZeros(Shift,
Left) && MatchShift(Shift,
Left)) {
17657 const APInt &Zeroable,
17661 "Cannot lower 512-bit vectors w/o basic ISA!");
17663 int NumElts = Mask.size();
17664 int NumV2Elements =
count_if(Mask, [NumElts](
int M) {
return M >= NumElts; });
17667 int SubvecElts = 0;
17669 for (
int i = 0; i != NumElts; ++i) {
17670 if (Mask[i] >= 0) {
17674 Src = Mask[i] / NumElts;
17675 if (Src != (Mask[i] / NumElts) || (Mask[i] % NumElts) != i)
17681 assert(SubvecElts != NumElts &&
"Identity shuffle?");
17684 SubvecElts = llvm::bit_floor<uint32_t>(SubvecElts);
17688 if ((
int)Zeroable.
countl_one() >= (NumElts - SubvecElts)) {
17689 assert(Src >= 0 &&
"Expected a source!");
17709 if (ShiftAmt >= 0) {
17720 ShiftAmt += WideElts - NumElts;
17723 Res = DAG.
getNode(Opcode,
DL, WideVT, Res,
17750 ExtVT = MVT::v2i64;
17753 ExtVT = MVT::v4i32;
17758 ExtVT = Subtarget.hasVLX() ? MVT::v8i32 : MVT::v8i64;
17768 assert(Subtarget.hasBWI() &&
"Expected AVX512BW support");
17776 ExtVT = MVT::v64i8;
17786 if ((Subtarget.hasBWI() && (NumElems >= 32)) ||
17787 (Subtarget.hasDQI() && (NumElems < 32)))
17797 int NumElements = Mask.size();
17799 int NumV1Elements = 0, NumV2Elements = 0;
17803 else if (M < NumElements)
17811 if (NumV2Elements > NumV1Elements)
17814 assert(NumV1Elements > 0 &&
"No V1 indices");
17816 if (NumV2Elements == 0)
17824 if (NumV1Elements == NumV2Elements) {
17825 int LowV1Elements = 0, LowV2Elements = 0;
17826 for (
int M : Mask.slice(0, NumElements / 2))
17827 if (M >= NumElements)
17831 if (LowV2Elements > LowV1Elements)
17833 if (LowV2Elements == LowV1Elements) {
17834 int SumV1Indices = 0, SumV2Indices = 0;
17835 for (
int i = 0,
Size = Mask.size(); i <
Size; ++i)
17836 if (Mask[i] >= NumElements)
17838 else if (Mask[i] >= 0)
17840 if (SumV2Indices < SumV1Indices)
17842 if (SumV2Indices == SumV1Indices) {
17843 int NumV1OddIndices = 0, NumV2OddIndices = 0;
17844 for (
int i = 0,
Size = Mask.size(); i <
Size; ++i)
17845 if (Mask[i] >= NumElements)
17846 NumV2OddIndices += i % 2;
17847 else if (Mask[i] >= 0)
17848 NumV1OddIndices += i % 2;
17849 if (NumV2OddIndices < NumV1OddIndices)
17863 if (!V.getValueType().isSimple())
17866 MVT VT = V.getSimpleValueType().getScalarType();
17867 if ((VT == MVT::i16 || VT == MVT::i8) && !Subtarget.hasBWI())
17872 if ((VT == MVT::i16 || VT == MVT::i8) &&
17873 V.getSimpleValueType().getSizeInBits() < 512)
17876 auto HasMaskOperation = [&](
SDValue V) {
17879 switch (V->getOpcode()) {
17898 if (!V->hasOneUse())
17904 if (HasMaskOperation(V))
17929 MVT VT =
Op.getSimpleValueType();
17935 "Can't lower MMX shuffles");
17937 bool V1IsUndef = V1.
isUndef();
17938 bool V2IsUndef = V2.isUndef();
17939 if (V1IsUndef && V2IsUndef)
17952 any_of(OrigMask, [NumElements](
int M) {
return M >= NumElements; })) {
17954 for (
int &M : NewMask)
17955 if (M >= NumElements)
17961 int MaskUpperLimit = OrigMask.
size() * (V2IsUndef ? 1 : 2);
17962 (void)MaskUpperLimit;
17964 [&](
int M) {
return -1 <= M && M < MaskUpperLimit; }) &&
17965 "Out of bounds shuffle index");
17970 APInt KnownUndef, KnownZero;
17973 APInt Zeroable = KnownUndef | KnownZero;
17999 int NewNumElts = NumElements / 2;
18007 bool UsedZeroVector =
false;
18009 "V2's non-undef elements are used?!");
18010 for (
int i = 0; i != NewNumElts; ++i)
18012 WidenedMask[i] = i + NewNumElts;
18013 UsedZeroVector =
true;
18017 if (UsedZeroVector)
18038 assert(NumElements == (
int)Mask.size() &&
18039 "canonicalizeShuffleMaskWithHorizOp "
18040 "shouldn't alter the shuffle mask size");
18069 "Need AVX512 for custom VECTOR_COMPRESS lowering.");
18084 if (NumVecBits != 128 && NumVecBits != 256)
18087 if (NumElementBits == 32 || NumElementBits == 64) {
18088 unsigned NumLargeElements = 512 / NumElementBits;
18096 Subtarget, DAG,
DL);
18100 Subtarget, DAG,
DL);
18108 if (VecVT == MVT::v8i16 || VecVT == MVT::v8i8 || VecVT == MVT::v16i8 ||
18109 VecVT == MVT::v16i16) {
18114 Passthru = Passthru.
isUndef()
18133 MVT VT =
Op.getSimpleValueType();
18152 MVT VT =
Op.getSimpleValueType();
18174 MVT CondVT =
Cond.getSimpleValueType();
18175 unsigned CondEltSize =
Cond.getScalarValueSizeInBits();
18176 if (CondEltSize == 1)
18187 if ((VT == MVT::v32i16 || VT == MVT::v64i8) && !Subtarget.hasBWI())
18200 return DAG.
getSelect(dl, VT, Mask, LHS, RHS);
18204 if (CondEltSize != EltSize) {
18221 !Subtarget.hasXOP()) {
18227 if (FreeCond && (FreeLHS || FreeRHS))
18247 case MVT::v16i16: {
18260 MVT VT =
Op.getSimpleValueType();
18263 assert(isa<ConstantSDNode>(
Idx) &&
"Constant index expected");
18278 unsigned IdxVal =
Idx->getAsZExtVal();
18284 if (VT == MVT::f32) {
18290 if (!
Op.hasOneUse())
18295 User->getValueType(0) != MVT::i32))
18302 if (VT == MVT::i32 || VT == MVT::i64)
18316 auto* IdxC = dyn_cast<ConstantSDNode>(
Idx);
18317 MVT EltVT =
Op.getSimpleValueType();
18320 "Unexpected vector type in ExtractBitFromMaskVector");
18328 if (NumElts == 1) {
18340 unsigned IdxVal = IdxC->getZExtValue();
18357 MVT VT =
N->getSimpleValueType(0);
18361 switch (
User->getOpcode()) {
18367 return DemandedElts;
18369 DemandedElts.
setBit(
User->getConstantOperandVal(1));
18372 if (!
User->getValueType(0).isSimple() ||
18373 !
User->getValueType(0).isVector()) {
18375 return DemandedElts;
18383 return DemandedElts;
18386 return DemandedElts;
18390X86TargetLowering::LowerEXTRACT_VECTOR_ELT(
SDValue Op,
18396 auto* IdxC = dyn_cast<ConstantSDNode>(
Idx);
18435 unsigned IdxVal = IdxC->getZExtValue();
18449 IdxVal &= ElemsPerChunk - 1;
18456 MVT VT =
Op.getSimpleValueType();
18458 if (VT == MVT::i16) {
18463 if (Subtarget.hasFP16())
18483 if (VT == MVT::i8) {
18488 int DWordIdx = IdxVal / 4;
18489 if (DWordIdx == 0 && DemandedElts == (DemandedElts & 15)) {
18493 int ShiftVal = (IdxVal % 4) * 8;
18500 int WordIdx = IdxVal / 2;
18501 if (DemandedElts == (DemandedElts & (3 << (WordIdx * 2)))) {
18505 int ShiftVal = (IdxVal % 2) * 8;
18519 Mask[0] =
static_cast<int>(IdxVal);
18535 int Mask[2] = { 1, -1 };
18554 if (!isa<ConstantSDNode>(
Idx)) {
18573 MVT VT =
Op.getSimpleValueType();
18578 if (EltVT == MVT::i1)
18585 auto *N2C = dyn_cast<ConstantSDNode>(N2);
18587 if (EltVT == MVT::bf16) {
18599 if (!(Subtarget.hasBWI() ||
18600 (Subtarget.
hasAVX512() && EltSizeInBits >= 32) ||
18601 (Subtarget.
hasSSE41() && (EltVT == MVT::f32 || EltVT == MVT::f64))))
18614 for (
unsigned I = 0;
I != NumElts; ++
I)
18619 return DAG.
getSelectCC(dl, IdxSplat, Indices, EltSplat, N0,
18623 if (N2C->getAPIntValue().uge(NumElts))
18625 uint64_t IdxVal = N2C->getZExtValue();
18630 if (IsZeroElt || IsAllOnesElt) {
18633 if (IsAllOnesElt &&
18634 ((VT == MVT::v16i8 && !Subtarget.
hasSSE41()) ||
18635 ((VT == MVT::v32i8 || VT == MVT::v16i16) && !Subtarget.
hasInt256()))) {
18639 CstVectorElts[IdxVal] = OnesCst;
18648 for (
unsigned i = 0; i != NumElts; ++i)
18649 BlendMask.
push_back(i == IdxVal ? i + NumElts : i);
18665 if ((Subtarget.
hasAVX() && (EltVT == MVT::f64 || EltVT == MVT::f32)) ||
18666 (Subtarget.
hasAVX2() && (EltVT == MVT::i32 || EltVT == MVT::i64))) {
18673 unsigned NumEltsIn128 = 128 / EltSizeInBits;
18675 "Vectors will always have power-of-two number of elements.");
18680 if (IdxVal >= NumEltsIn128 &&
18681 ((Subtarget.
hasAVX2() && EltSizeInBits != 8) ||
18682 (Subtarget.
hasAVX() && (EltSizeInBits >= 32) &&
18686 for (
unsigned i = 0; i != NumElts; ++i)
18687 BlendMask.
push_back(i == IdxVal ? i + NumElts : i);
18696 unsigned IdxIn128 = IdxVal & (NumEltsIn128 - 1);
18708 if (EltVT == MVT::i32 || EltVT == MVT::f32 || EltVT == MVT::f64 ||
18709 EltVT == MVT::f16 || EltVT == MVT::i64) {
18716 if (EltVT == MVT::i16 || EltVT == MVT::i8) {
18727 if (VT == MVT::v8i16 || (VT == MVT::v16i8 && Subtarget.
hasSSE41())) {
18729 if (VT == MVT::v8i16) {
18733 assert(VT == MVT::v16i8 &&
"PINSRB requires v16i8 vector");
18738 assert(N1.getValueType() != MVT::i32 &&
"Unexpected VT");
18741 return DAG.
getNode(Opc, dl, VT, N0, N1, N2);
18745 if (EltVT == MVT::f32) {
18775 if (EltVT == MVT::i32 || EltVT == MVT::i64)
18785 MVT OpVT =
Op.getSimpleValueType();
18806 "Expected an SSE type!");
18810 if (OpVT == MVT::v4i32 || (OpVT == MVT::v8i16 && Subtarget.hasFP16()))
18823 assert(
Op.getSimpleValueType().getVectorElementType() == MVT::i1);
18830 assert(
Op.getSimpleValueType().getVectorElementType() == MVT::i1 &&
18831 "Only vXi1 extract_subvectors need custom lowering");
18835 uint64_t IdxVal =
Op.getConstantOperandVal(1);
18852unsigned X86TargetLowering::getGlobalWrapperKind(
18853 const GlobalValue *GV,
const unsigned char OpFlags)
const {
18887 CP->getConstVal(), PtrVT,
CP->getAlign(),
CP->getOffset(), OpFlag);
18890 DAG.
getNode(getGlobalWrapperKind(
nullptr, OpFlag),
DL, PtrVT, Result);
18912 DAG.
getNode(getGlobalWrapperKind(
nullptr, OpFlag),
DL, PtrVT, Result);
18925 return LowerGlobalOrExternal(
Op, DAG,
false);
18931 unsigned char OpFlags =
18933 const BlockAddress *BA = cast<BlockAddressSDNode>(
Op)->getBlockAddress();
18934 int64_t
Offset = cast<BlockAddressSDNode>(
Op)->getOffset();
18939 DAG.
getNode(getGlobalWrapperKind(
nullptr, OpFlags), dl, PtrVT, Result);
18953 bool ForCall)
const {
18958 const char *ExternalSym =
nullptr;
18959 if (
const auto *
G = dyn_cast<GlobalAddressSDNode>(
Op)) {
18960 GV =
G->getGlobal();
18963 const auto *ES = cast<ExternalSymbolSDNode>(
Op);
18964 ExternalSym = ES->getSymbol();
18969 unsigned char OpFlags;
18987 int64_t GlobalOffset = 0;
19000 if (ForCall && !NeedsLoad && !HasPICReg &&
Offset == 0)
19003 Result = DAG.
getNode(getGlobalWrapperKind(GV, OpFlags), dl, PtrVT, Result);
19028 return LowerGlobalOrExternal(
Op, DAG,
false);
19032 const EVT PtrVT,
unsigned ReturnReg,
19033 unsigned char OperandFlags,
19034 bool LoadGlobalBaseReg =
false,
19035 bool LocalDynamic =
false) {
19043 if (LocalDynamic && UseTLSDESC) {
19050 "Unexpected TLSDESC DAG");
19054 "Unexpected TLSDESC DAG");
19056 auto *CopyFromRegOp = CallSeqEndOp->getGluedUser();
19058 "Unexpected TLSDESC DAG");
19059 Ret =
SDValue(CopyFromRegOp, 0);
19072 if (LoadGlobalBaseReg) {
19078 Chain = DAG.
getNode(CallType, dl, NodeTys, {Chain, TGA, InGlue});
19080 Chain = DAG.
getNode(CallType, dl, NodeTys, {Chain, TGA});
19128 bool Is64Bit,
bool Is64BitLP64) {
19138 unsigned ReturnReg = Is64BitLP64 ? X86::RAX : X86::EAX;
19177 unsigned char OperandFlags = 0;
19234 if (Subtarget.is64Bit()) {
19246 PositionIndependent);
19253 unsigned char OpFlag = 0;
19254 unsigned WrapperKind = 0;
19258 bool PIC32 = PositionIndependent && !Subtarget.is64Bit();
19293 unsigned Reg = Subtarget.is64Bit() ? X86::RAX : X86::EAX;
19319 SDValue TlsArray = Subtarget.is64Bit()
19334 if (Subtarget.is64Bit())
19365 if (Subtarget.is64Bit() && Subtarget.
isTargetELF()) {
19406 "Unexpected opcode!");
19407 bool IsStrict =
Op->isStrictFPOpcode();
19408 unsigned OpNo = IsStrict ? 1 : 0;
19410 MVT SrcVT = Src.getSimpleValueType();
19411 MVT VT =
Op.getSimpleValueType();
19413 if (!Subtarget.hasDQI() || SrcVT != MVT::i64 || Subtarget.is64Bit() ||
19414 (VT != MVT::f32 && VT != MVT::f64))
19420 unsigned NumElts = Subtarget.hasVLX() ? 4 : 8;
19427 {Op.getOperand(0), InVec});
19447 "Unexpected opcode!");
19448 bool IsStrict =
Op->isStrictFPOpcode();
19449 SDValue Src =
Op.getOperand(IsStrict ? 1 : 0);
19450 MVT SrcVT = Src.getSimpleValueType();
19451 MVT VT =
Op.getSimpleValueType();
19453 if (SrcVT != MVT::i64 || Subtarget.is64Bit() || VT != MVT::f16)
19458 assert(Subtarget.hasFP16() &&
"Expected FP16");
19462 SDValue CvtVec = DAG.
getNode(
Op.getOpcode(), dl, {MVT::v2f16, MVT::Other},
19463 {Op.getOperand(0), InVec});
19481 if (!Subtarget.
hasSSE2() || FromVT != MVT::v4i32)
19484 return ToVT == MVT::v4f32 || (Subtarget.
hasAVX() && ToVT == MVT::v4f64);
19488 if (!Subtarget.
hasAVX512() || FromVT != MVT::v4i32)
19491 return ToVT == MVT::v4f32 || ToVT == MVT::v4f64;
19509 !isa<ConstantSDNode>(Extract.
getOperand(1)))
19530 if (FromVT != Vec128VT)
19554 MVT SrcVT =
X.getSimpleValueType();
19555 if (SrcVT != MVT::f32 && SrcVT != MVT::f64)
19560 if (!Subtarget.
hasSSE2() || (VT != MVT::f32 && VT != MVT::f64) ||
19572 unsigned ToIntOpcode =
19574 unsigned ToFPOpcode =
19593 bool IsStrict =
Op->isStrictFPOpcode();
19594 MVT VT =
Op->getSimpleValueType(0);
19595 SDValue Src =
Op->getOperand(IsStrict ? 1 : 0);
19597 if (Subtarget.hasDQI()) {
19598 assert(!Subtarget.hasVLX() &&
"Unexpected features");
19600 assert((Src.getSimpleValueType() == MVT::v2i64 ||
19601 Src.getSimpleValueType() == MVT::v4i64) &&
19602 "Unsupported custom type");
19605 assert((VT == MVT::v4f32 || VT == MVT::v2f64 || VT == MVT::v4f64) &&
19607 MVT WideVT = VT == MVT::v4f32 ? MVT::v8f32 : MVT::v8f64;
19617 Res = DAG.
getNode(
Op.getOpcode(),
DL, {WideVT, MVT::Other},
19618 {Op->getOperand(0), Src});
19621 Res = DAG.
getNode(
Op.getOpcode(),
DL, WideVT, Src);
19634 if (VT != MVT::v4f32 || IsSigned)
19646 for (
int i = 0; i != 4; ++i) {
19652 {
Op.getOperand(0), Elt});
19653 Chains[i] = SignCvts[i].getValue(1);
19664 {Chain, SignCvt, SignCvt});
19681 bool IsStrict =
Op->isStrictFPOpcode();
19682 SDValue Src =
Op.getOperand(IsStrict ? 1 : 0);
19684 MVT VT =
Op.getSimpleValueType();
19692 DAG.
getNode(
Op.getOpcode(), dl, {NVT, MVT::Other}, {Chain, Src}),
19695 DAG.
getNode(
Op.getOpcode(), dl, NVT, Src), Rnd);
19700 if (VT == MVT::v4i32 && Subtarget.
hasSSE2() && IsSigned)
19702 if (VT == MVT::v8i32 && Subtarget.
hasAVX() && IsSigned)
19704 if (Subtarget.hasVLX() && (VT == MVT::v4i32 || VT == MVT::v8i32))
19707 if (VT == MVT::v16i32)
19709 if (VT == MVT::v8i64 && Subtarget.hasDQI())
19712 if (Subtarget.hasDQI() && Subtarget.hasVLX() &&
19713 (VT == MVT::v2i64 || VT == MVT::v4i64))
19720 bool IsStrict =
Op->isStrictFPOpcode();
19721 unsigned OpNo = IsStrict ? 1 : 0;
19724 MVT SrcVT = Src.getSimpleValueType();
19725 MVT VT =
Op.getSimpleValueType();
19734 return LowerWin64_INT128_TO_FP(
Op, DAG);
19743 if (SrcVT == MVT::v2i32 && VT == MVT::v2f64) {
19755 if (SrcVT == MVT::v2i64 || SrcVT == MVT::v4i64)
19761 assert(SrcVT <= MVT::i64 && SrcVT >= MVT::i16 &&
19762 "Unknown SINT_TO_FP to lower!");
19768 if (SrcVT == MVT::i32 && UseSSEReg)
19770 if (SrcVT == MVT::i64 && UseSSEReg && Subtarget.is64Bit())
19779 if (SrcVT == MVT::i16 && (UseSSEReg || VT == MVT::f128)) {
19788 if (VT == MVT::f128 || !Subtarget.hasX87())
19792 if (SrcVT == MVT::i64 && Subtarget.
hasSSE2() && !Subtarget.is64Bit())
19796 ValueToStore = DAG.
getBitcast(MVT::f64, ValueToStore);
19806 Chain = DAG.
getStore(Chain, dl, ValueToStore, StackSlot, MPI, Alignment);
19807 std::pair<SDValue, SDValue> Tmp =
19808 BuildFILD(VT, SrcVT, dl, Chain, StackSlot, MPI, Alignment, DAG);
19823 Tys = DAG.
getVTList(MVT::f80, MVT::Other);
19825 Tys = DAG.
getVTList(DstVT, MVT::Other);
19827 SDValue FILDOps[] = {Chain, Pointer};
19831 Chain = Result.getValue(1);
19841 SDValue FSTOps[] = {Chain, Result, StackSlot};
19849 DstVT,
DL, Chain, StackSlot,
19851 Chain = Result.getValue(1);
19854 return { Result, Chain };
19863 bool HasFastHOps = Subtarget.hasFastHorizontalOps();
19864 return !IsSingleSource || IsOptimizingSize || HasFastHOps;
19874 assert(!
Op->isStrictFPOpcode() &&
"Expected non-strict uint_to_fp!");
19891 static const uint32_t CV0[] = { 0x43300000, 0x45300000, 0, 0 };
19899 APInt(64, 0x4330000000000000ULL))));
19902 APInt(64, 0x4530000000000000ULL))));
19916 MVT::v2f64, dl, CLod0.
getValue(1), CPIdx1,
19939 unsigned OpNo =
Op.getNode()->isStrictFPOpcode() ? 1 : 0;
19942 llvm::bit_cast<double>(0x4330000000000000ULL), dl, MVT::f64);
19961 if (
Op.getNode()->isStrictFPOpcode()) {
19966 {Chain,
Or, Bias});
19973 Sub, Sub.
getValue(1), dl,
Op.getSimpleValueType());
19975 return DAG.
getMergeValues({ResultPair.first, ResultPair.second}, dl);
19989 if (
Op.getSimpleValueType() != MVT::v2f64)
19992 bool IsStrict =
Op->isStrictFPOpcode();
19994 SDValue N0 =
Op.getOperand(IsStrict ? 1 : 0);
19998 if (!Subtarget.hasVLX()) {
20006 {Op.getOperand(0), N0});
20018 {
Op.getOperand(0), N0});
20028 llvm::bit_cast<double>(0x4330000000000000ULL),
DL, MVT::v2f64);
20035 {
Op.getOperand(0),
Or, VBias});
20042 bool IsStrict =
Op->isStrictFPOpcode();
20043 SDValue V =
Op->getOperand(IsStrict ? 1 : 0);
20044 MVT VecIntVT = V.getSimpleValueType();
20045 assert((VecIntVT == MVT::v4i32 || VecIntVT == MVT::v8i32) &&
20046 "Unsupported custom type");
20050 assert(!Subtarget.hasVLX() &&
"Unexpected features");
20051 MVT VT =
Op->getSimpleValueType(0);
20054 if (VT == MVT::v8f64)
20057 assert((VT == MVT::v4f32 || VT == MVT::v8f32 || VT == MVT::v4f64) &&
20059 MVT WideVT = VT == MVT::v4f64 ? MVT::v8f64 : MVT::v16f32;
20060 MVT WideIntVT = VT == MVT::v4f64 ? MVT::v8i32 : MVT::v16i32;
20070 {
Op->getOperand(0), V});
20084 if (Subtarget.
hasAVX() && VecIntVT == MVT::v4i32 &&
20085 Op->getSimpleValueType(0) == MVT::v4f64) {
20105 {
Op.getOperand(0),
Or, VBias});
20121 bool Is128 = VecIntVT == MVT::v4i32;
20122 MVT VecFloatVT = Is128 ? MVT::v4f32 : MVT::v8f32;
20125 if (VecFloatVT !=
Op->getSimpleValueType(0))
20146 MVT VecI16VT = Is128 ? MVT::v8i16 : MVT::v16i16;
20187 {
Op.getOperand(0), HighBitcast, VecCstFSub});
20189 {FHigh.
getValue(1), LowBitcast, FHigh});
20199 unsigned OpNo =
Op.getNode()->isStrictFPOpcode() ? 1 : 0;
20219 bool IsStrict =
Op->isStrictFPOpcode();
20220 unsigned OpNo = IsStrict ? 1 : 0;
20224 MVT SrcVT = Src.getSimpleValueType();
20225 MVT DstVT =
Op->getSimpleValueType(0);
20229 if (DstVT == MVT::f128)
20241 return LowerWin64_INT128_TO_FP(
Op, DAG);
20247 (SrcVT == MVT::i32 || (SrcVT == MVT::i64 && Subtarget.is64Bit()))) {
20254 if (SrcVT == MVT::i32 && Subtarget.is64Bit()) {
20269 if (SrcVT == MVT::i64 && DstVT == MVT::f64 && Subtarget.
hasSSE2() &&
20274 if (SrcVT == MVT::i32 && Subtarget.
hasSSE2() && DstVT != MVT::f80 &&
20277 if (Subtarget.is64Bit() && SrcVT == MVT::i64 &&
20278 (DstVT == MVT::f32 || DstVT == MVT::f64))
20283 int SSFI = cast<FrameIndexSDNode>(StackSlot)->getIndex();
20284 Align SlotAlign(8);
20287 if (SrcVT == MVT::i32) {
20290 SDValue Store1 = DAG.
getStore(Chain, dl, Src, StackSlot, MPI, SlotAlign);
20293 std::pair<SDValue, SDValue> Tmp =
20294 BuildFILD(DstVT, MVT::i64, dl, Store2, StackSlot, MPI, SlotAlign, DAG);
20301 assert(SrcVT == MVT::i64 &&
"Unexpected type in UINT_TO_FP");
20307 ValueToStore = DAG.
getBitcast(MVT::f64, ValueToStore);
20310 DAG.
getStore(Chain, dl, ValueToStore, StackSlot, MPI, SlotAlign);
20327 APInt FF(64, 0x5F80000000000000ULL);
20330 Align CPAlignment = cast<ConstantPoolSDNode>(FudgePtr)->getAlign();
20349 if (Subtarget.
isOSWindows() && DstVT == MVT::f32)
20353 DAG.
getNode(Opc, dl, {MVT::f80, MVT::Other}, {Chain, Fild, Fudge});
20355 if (DstVT == MVT::f80)
20363 if (Subtarget.
isOSWindows() && DstVT == MVT::f32)
20380 bool IsStrict =
Op->isStrictFPOpcode();
20383 EVT DstTy =
Op.getValueType();
20388 if (TheVT != MVT::f32 && TheVT != MVT::f64 && TheVT != MVT::f80) {
20397 bool UnsignedFixup = !IsSigned && DstTy == MVT::i64;
20401 if (!IsSigned && DstTy != MVT::i64) {
20404 assert(DstTy == MVT::i32 &&
"Unexpected FP_TO_UINT");
20408 assert(DstTy.getSimpleVT() <= MVT::i64 &&
20409 DstTy.getSimpleVT() >= MVT::i16 &&
20410 "Unknown FP_TO_INT to lower!");
20415 unsigned MemSize = DstTy.getStoreSize();
20424 if (UnsignedFixup) {
20444 bool LosesInfo =
false;
20445 if (TheVT == MVT::f64)
20449 else if (TheVT == MVT::f80)
20454 "FP conversion should have been exact");
20464 Chain =
Cmp.getValue(1);
20489 { Chain,
Value, FltOfs });
20490 Chain =
Value.getValue(1);
20500 assert(DstTy == MVT::i64 &&
"Invalid FP_TO_SINT to lower!");
20503 SDValue Ops[] = { Chain, StackSlot };
20506 assert(FLDSize <= MemSize &&
"Stack slot not big enough");
20510 Chain =
Value.getValue(1);
20533 MVT VT =
Op.getSimpleValueType();
20535 MVT InVT = In.getSimpleValueType();
20536 unsigned Opc =
Op.getOpcode();
20540 "Unexpected extension opcode");
20542 "Expected same number of elements");
20546 "Unexpected element type");
20550 "Unexpected element type");
20554 if (VT == MVT::v32i16 && !Subtarget.hasBWI()) {
20555 assert(InVT == MVT::v32i8 &&
"Unexpected VT!");
20579 if (
auto *Shuf = dyn_cast<ShuffleVectorSDNode>(In))
20595 assert((VT == MVT::v16i8 || VT == MVT::v16i16) &&
"Unexpected VT.");
20609 MVT VT =
Op->getSimpleValueType(0);
20611 MVT InVT = In.getSimpleValueType();
20625 if (!Subtarget.hasBWI()) {
20634 MVT WideVT = ExtVT;
20659 return SelectedVal;
20665 MVT SVT = In.getSimpleValueType();
20684 "Unexpected PACK opcode");
20691 EVT SrcVT = In.getValueType();
20694 if (SrcVT == DstVT)
20704 assert(SrcSizeInBits > DstSizeInBits &&
"Illegal truncation");
20712 EVT InVT = MVT::i16, OutVT = MVT::i8;
20721 if (SrcSizeInBits <= 128) {
20738 if (
Hi.isUndef()) {
20745 unsigned SubSizeInBits = SrcSizeInBits / 2;
20747 OutVT =
EVT::getVectorVT(Ctx, OutVT, SubSizeInBits / OutVT.getSizeInBits());
20768 int Scale = 64 / OutVT.getScalarSizeInBits();
20781 assert(SrcSizeInBits >= 256 &&
"Expected 256-bit vector or greater");
20813 EVT SrcVT = In.getValueType();
20830 EVT SrcVT = In.getValueType();
20837 if (!((SrcSVT == MVT::i16 || SrcSVT == MVT::i32 || SrcSVT == MVT::i64) &&
20838 (DstSVT == MVT::i8 || DstSVT == MVT::i16 || DstSVT == MVT::i32)))
20841 assert(NumSrcEltBits > NumDstEltBits &&
"Bad truncation");
20842 unsigned NumStages =
Log2_32(NumSrcEltBits / NumDstEltBits);
20847 if ((DstSVT == MVT::i32 && SrcVT.
getSizeInBits() <= 128) ||
20848 (DstSVT == MVT::i16 && SrcVT.
getSizeInBits() <= (64 * NumStages)) ||
20849 (DstVT == MVT::v2i8 && SrcVT == MVT::v2i64 && Subtarget.
hasSSSE3()))
20854 if (SrcVT == MVT::v4i64 && DstVT == MVT::v4i32 &&
20860 if (Subtarget.
hasAVX512() && NumStages > 1)
20863 unsigned NumPackedSignBits = std::min<unsigned>(NumDstEltBits, 16);
20864 unsigned NumPackedZeroBits = Subtarget.
hasSSE41() ? NumPackedSignBits : 8;
20885 if (DstSVT == MVT::i32 && NumSignBits != NumSrcEltBits &&
20889 unsigned MinSignBits = NumSrcEltBits - NumPackedSignBits;
20890 if (MinSignBits < NumSignBits) {
20898 if (In.getOpcode() ==
ISD::SRL && In->hasOneUse())
20900 if (*ShAmt == MinSignBits) {
20916 MVT SrcVT = In.getSimpleValueType();
20919 if (!((SrcSVT == MVT::i16 || SrcSVT == MVT::i32 || SrcSVT == MVT::i64) &&
20920 (DstSVT == MVT::i8 || DstSVT == MVT::i16 || DstSVT == MVT::i32)))
20936 unsigned PackOpcode;
20949 MVT SrcVT = In.getSimpleValueType();
20953 if (!((SrcSVT == MVT::i16 || SrcSVT == MVT::i32 || SrcSVT == MVT::i64) &&
20954 (DstSVT == MVT::i8 || DstSVT == MVT::i16) &&
isPowerOf2_32(NumElems) &&
20959 if (Subtarget.
hasSSSE3() && NumElems == 8) {
20960 if (SrcSVT == MVT::i16)
20962 if (SrcSVT == MVT::i32 && (DstSVT == MVT::i8 || !Subtarget.
hasSSE41()))
20981 if (Subtarget.
hasSSE41() || DstSVT == MVT::i8)
20984 if (SrcSVT == MVT::i16 || SrcSVT == MVT::i32)
20988 if (DstSVT == MVT::i16 && SrcSVT == MVT::i64) {
21000 MVT VT =
Op.getSimpleValueType();
21002 MVT InVT = In.getSimpleValueType();
21008 if (Subtarget.hasBWI()) {
21024 "Unexpected vector type.");
21026 assert((NumElts == 8 || NumElts == 16) &&
"Unexpected number of elements");
21038 if (InVT == MVT::v16i8) {
21042 {8, 9, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1});
21045 assert(InVT == MVT::v16i16 &&
"Unexpected VT!");
21071 if (Subtarget.hasDQI())
21078 MVT VT =
Op.getSimpleValueType();
21080 MVT InVT =
In.getSimpleValueType();
21082 "Invalid TRUNCATE operation");
21087 if ((InVT == MVT::v8i64 || InVT == MVT::v16i32 || InVT == MVT::v16i64) &&
21089 assert((InVT == MVT::v16i64 || Subtarget.hasVLX()) &&
21090 "Unexpected subtarget!");
21132 if (InVT == MVT::v32i16 && !Subtarget.hasBWI()) {
21133 assert(VT == MVT::v32i8 &&
"Unexpected VT!");
21141 if (InVT != MVT::v16i16 || Subtarget.hasBWI() ||
21149 if ((VT == MVT::v4i32) && (InVT == MVT::v4i64)) {
21152 static const int ShufMask[] = {0, 2, 4, 6, -1, -1, -1, -1};
21163 static const int ShufMask[] = {0, 2, 4, 6};
21165 DAG.
getBitcast(MVT::v4i32, OpHi), ShufMask);
21168 if ((VT == MVT::v8i16) && (InVT == MVT::v8i32)) {
21172 static const int ShufMask1[] = { 0, 1, 4, 5, 8, 9, 12, 13,
21173 -1, -1, -1, -1, -1, -1, -1, -1,
21174 16, 17, 20, 21, 24, 25, 28, 29,
21175 -1, -1, -1, -1, -1, -1, -1, -1 };
21180 static const int ShufMask2[] = {0, 2, -1, -1};
21192 if (VT == MVT::v16i8 && InVT == MVT::v16i16)
21203 MVT SrcVT = Src.getSimpleValueType();
21205 assert(DstBits == 32 &&
"expandFP_TO_UINT_SSE - only vXi32 supported");
21224 if (VT == MVT::v8i32 && !Subtarget.
hasAVX2()) {
21237 bool IsStrict =
Op->isStrictFPOpcode();
21240 MVT VT =
Op->getSimpleValueType(0);
21241 SDValue Src =
Op.getOperand(IsStrict ? 1 : 0);
21243 MVT SrcVT = Src.getSimpleValueType();
21250 return DAG.
getNode(
Op.getOpcode(), dl, {VT, MVT::Other},
21251 {Chain, DAG.getNode(ISD::STRICT_FP_EXTEND, dl,
21252 {NVT, MVT::Other}, {Chain, Src})});
21253 return DAG.
getNode(
Op.getOpcode(), dl, VT,
21259 if (VT.isVector()) {
21260 if (VT == MVT::v2i1 && SrcVT == MVT::v2f64) {
21261 MVT ResVT = MVT::v4i32;
21262 MVT TruncVT = MVT::v4i1;
21269 if (!IsSigned && !Subtarget.hasVLX()) {
21272 ResVT = MVT::v8i32;
21273 TruncVT = MVT::v8i1;
21274 Opc =
Op.getOpcode();
21284 Res = DAG.
getNode(Opc, dl, {ResVT, MVT::Other}, {Chain, Src});
21287 Res = DAG.
getNode(Opc, dl, ResVT, Src);
21299 if (VT == MVT::v8i16 || VT == MVT::v16i16 || VT == MVT::v32i16)
21304 if (EleVT != MVT::i64)
21305 ResVT = EleVT == MVT::i32 ? MVT::v4i32 : MVT::v8i16;
21307 if (SrcVT != MVT::v8f16) {
21318 dl, {ResVT, MVT::Other}, {Chain, Src});
21341 if (VT.getVectorElementType() == MVT::i16) {
21344 "Expected f32/f64 vector!");
21349 dl, {NVT, MVT::Other}, {Chain, Src});
21365 if (VT == MVT::v8i32 && SrcVT == MVT::v8f64) {
21366 assert(!IsSigned &&
"Expected unsigned conversion!");
21372 if ((VT == MVT::v4i32 || VT == MVT::v8i32) &&
21373 (SrcVT == MVT::v4f64 || SrcVT == MVT::v4f32 || SrcVT == MVT::v8f32) &&
21375 assert(!IsSigned &&
"Expected unsigned conversion!");
21376 assert(!Subtarget.hasVLX() &&
"Unexpected features!");
21377 MVT WideVT = SrcVT == MVT::v4f64 ? MVT::v8f64 : MVT::v16f32;
21378 MVT ResVT = SrcVT == MVT::v4f64 ? MVT::v8i32 : MVT::v16i32;
21404 if ((VT == MVT::v2i64 || VT == MVT::v4i64) &&
21405 (SrcVT == MVT::v2f64 || SrcVT == MVT::v4f64 || SrcVT == MVT::v4f32) &&
21407 assert(!Subtarget.hasVLX() &&
"Unexpected features!");
21408 MVT WideVT = SrcVT == MVT::v4f32 ? MVT::v8f32 : MVT::v8f64;
21418 Res = DAG.
getNode(
Op.getOpcode(), dl, {MVT::v8i64, MVT::Other},
21422 Res = DAG.
getNode(
Op.getOpcode(), dl, MVT::v8i64, Src);
21433 if (VT == MVT::v2i64 && SrcVT == MVT::v2f32) {
21434 if (!Subtarget.hasVLX()) {
21443 Tmp = DAG.
getNode(
Op.getOpcode(), dl, {MVT::v8i64, MVT::Other},
21451 assert(Subtarget.hasDQI() && Subtarget.hasVLX() &&
"Requires AVX512DQVL");
21457 return DAG.
getNode(Opc, dl, {VT, MVT::Other}, {
Op->getOperand(0), Tmp});
21460 return DAG.
getNode(Opc, dl, VT, Tmp);
21465 if ((VT == MVT::v4i32 && SrcVT == MVT::v4f32) ||
21466 (VT == MVT::v4i32 && SrcVT == MVT::v4f64) ||
21467 (VT == MVT::v8i32 && SrcVT == MVT::v8f32)) {
21468 assert(!IsSigned &&
"Expected unsigned conversion!");
21477 bool UseSSEReg = isScalarFPTypeInSSEReg(SrcVT);
21479 if (!IsSigned && UseSSEReg) {
21486 if (!IsStrict && ((VT == MVT::i32 && !Subtarget.is64Bit()) ||
21487 (VT == MVT::i64 && Subtarget.is64Bit()))) {
21488 unsigned DstBits = VT.getScalarSizeInBits();
21518 if (VT == MVT::i64)
21521 assert(VT == MVT::i32 &&
"Unexpected VT!");
21526 if (Subtarget.is64Bit()) {
21549 if (VT == MVT::i16 && (UseSSEReg || SrcVT == MVT::f128)) {
21550 assert(IsSigned &&
"Expected i16 FP_TO_UINT to have been promoted!");
21565 if (UseSSEReg && IsSigned)
21569 if (SrcVT == MVT::f128) {
21576 MakeLibCallOptions CallOptions;
21577 std::pair<SDValue, SDValue> Tmp =
21578 makeLibCall(DAG, LC, VT, Src, CallOptions, dl, Chain);
21587 if (
SDValue V = FP_TO_INTHelper(
Op, DAG, IsSigned, Chain)) {
21593 llvm_unreachable(
"Expected FP_TO_INTHelper to handle all remaining cases.");
21599 EVT DstVT =
Op.getSimpleValueType();
21600 MVT SrcVT = Src.getSimpleValueType();
21605 if (SrcVT == MVT::f16)
21612 return LRINT_LLRINTHelper(
Op.getNode(), DAG);
21617 EVT DstVT =
N->getValueType(0);
21619 EVT SrcVT = Src.getValueType();
21621 if (SrcVT != MVT::f32 && SrcVT != MVT::f64 && SrcVT != MVT::f80) {
21634 EVT OtherVT = UseSSE ? SrcVT : DstVT;
21636 int SPFI = cast<FrameIndexSDNode>(
StackPtr.getNode())->getIndex();
21641 assert(DstVT == MVT::i64 &&
"Invalid LRINT/LLRINT to lower!");
21642 Chain = DAG.
getStore(Chain,
DL, Src, StackPtr, MPI);
21649 Chain = Src.getValue(1);
21654 StoreOps, DstVT, MPI, std::nullopt,
21657 return DAG.
getLoad(DstVT,
DL, Chain, StackPtr, MPI);
21674 EVT SrcVT = Src.getValueType();
21675 EVT DstVT =
Node->getValueType(0);
21683 EVT SatVT = cast<VTSDNode>(
Node->getOperand(1))->getVT();
21687 assert(SatWidth <= DstWidth && SatWidth <= TmpWidth &&
21688 "Expected saturation width smaller than result width");
21691 if (TmpWidth < 32) {
21698 if (SatWidth == 32 && !IsSigned && Subtarget.is64Bit()) {
21705 if (SatWidth < TmpWidth)
21710 APInt MinInt, MaxInt;
21735 if (AreExactFloatBounds) {
21736 if (DstVT != TmpVT) {
21744 SDValue FpToInt = DAG.
getNode(FpToIntOpcode, dl, TmpVT, BothClamped);
21758 SDValue FpToInt = DAG.
getNode(FpToIntOpcode, dl, DstVT, BothClamped);
21778 if (DstVT != TmpVT) {
21801 if (!IsSigned || DstVT != TmpVT) {
21812 bool IsStrict =
Op->isStrictFPOpcode();
21815 MVT VT =
Op.getSimpleValueType();
21818 MVT SVT =
In.getSimpleValueType();
21822 if (VT == MVT::f128 || (SVT == MVT::f16 && VT == MVT::f80 &&
21826 if ((SVT == MVT::v8f16 && Subtarget.hasF16C()) ||
21830 if (SVT == MVT::f16) {
21831 if (Subtarget.hasFP16())
21834 if (VT != MVT::f32) {
21839 {MVT::f32, MVT::Other}, {Chain,
In})});
21845 if (!Subtarget.hasF16C()) {
21849 assert(VT == MVT::f32 && SVT == MVT::f16 &&
"unexpected extend libcall");
21860 Entry.IsSExt =
false;
21861 Entry.IsZExt =
true;
21862 Args.push_back(Entry);
21867 CLI.setDebugLoc(
DL).setChain(Chain).setLibCallee(
21905 assert(Subtarget.hasF16C() &&
"Unexpected features!");
21906 if (SVT == MVT::v2f16)
21913 {
Op->getOperand(0), Res});
21915 }
else if (VT == MVT::v4f64 || VT == MVT::v8f64) {
21919 assert(SVT == MVT::v2f32 &&
"Only customize MVT::v2f32 type legalization!");
21925 {
Op->getOperand(0), Res});
21930 bool IsStrict =
Op->isStrictFPOpcode();
21935 MVT VT =
Op.getSimpleValueType();
21936 MVT SVT =
In.getSimpleValueType();
21938 if (SVT == MVT::f128 || (VT == MVT::f16 && SVT == MVT::f80))
21941 if (VT == MVT::f16 && (SVT == MVT::f64 || SVT == MVT::f32) &&
21942 !Subtarget.hasFP16() && (SVT == MVT::f64 || !Subtarget.hasF16C())) {
21954 Entry.IsSExt =
false;
21955 Entry.IsZExt =
true;
21956 Args.push_back(Entry);
21960 : RTLIB::FPROUND_F32_F16),
21962 CLI.setDebugLoc(
DL).setChain(Chain).setLibCallee(
21979 ((Subtarget.hasBF16() && Subtarget.hasVLX()) ||
21980 Subtarget.hasAVXNECONVERT()))
21985 if (VT.
getScalarType() == MVT::f16 && !Subtarget.hasFP16()) {
21986 if (!Subtarget.hasF16C() || SVT.
getScalarType() != MVT::f32)
22000 {Chain, Res, Rnd});
22022 bool IsStrict =
Op->isStrictFPOpcode();
22023 SDValue Src =
Op.getOperand(IsStrict ? 1 : 0);
22024 assert(Src.getValueType() == MVT::i16 &&
Op.getValueType() == MVT::f32 &&
22035 {
Op.getOperand(0), Res});
22051 bool IsStrict =
Op->isStrictFPOpcode();
22052 SDValue Src =
Op.getOperand(IsStrict ? 1 : 0);
22053 assert(Src.getValueType() == MVT::f32 &&
Op.getValueType() == MVT::i16 &&
22086 MVT SVT =
Op.getOperand(0).getSimpleValueType();
22087 if (SVT == MVT::f32 && ((Subtarget.hasBF16() && Subtarget.hasVLX()) ||
22088 Subtarget.hasAVXNECONVERT())) {
22097 MakeLibCallOptions CallOptions;
22100 makeLibCall(DAG, LC, MVT::f16,
Op.getOperand(0), CallOptions,
DL).first;
22116 bool IsFP =
Op.getSimpleValueType().isFloatingPoint();
22117 if (IsFP && !Subtarget.
hasSSE3())
22119 if (!IsFP && !Subtarget.
hasSSSE3())
22125 LHS.getOperand(0) !=
RHS.getOperand(0) ||
22126 !isa<ConstantSDNode>(
LHS.getOperand(1)) ||
22127 !isa<ConstantSDNode>(
RHS.getOperand(1)) ||
22134 switch (
Op.getOpcode()) {
22144 unsigned LExtIndex =
LHS.getConstantOperandVal(1);
22145 unsigned RExtIndex =
RHS.getConstantOperandVal(1);
22146 if ((LExtIndex & 1) == 1 && (RExtIndex & 1) == 0 &&
22150 if ((LExtIndex & 1) != 0 || RExtIndex != (LExtIndex + 1))
22154 EVT VecVT =
X.getValueType();
22156 unsigned NumLanes =
BitWidth / 128;
22159 "Not expecting illegal vector widths here");
22164 unsigned LaneIdx = LExtIndex / NumEltsPerLane;
22166 LExtIndex %= NumEltsPerLane;
22181 assert((
Op.getValueType() == MVT::f32 ||
Op.getValueType() == MVT::f64) &&
22182 "Only expecting float/double");
22193 MVT VT =
Op.getSimpleValueType();
22200 Point5Pred.
next(
true);
22214 "Wrong opcode for lowering FABS or FNEG.");
22226 MVT VT =
Op.getSimpleValueType();
22228 bool IsF128 = (VT == MVT::f128);
22231 "Unexpected type in LowerFABSorFNEG");
22241 bool IsFakeVector = !VT.
isVector() && !IsF128;
22244 LogicVT = (VT == MVT::f64) ? MVT::v2f64
22245 : (VT == MVT::f32) ? MVT::v4f32
22263 return DAG.
getNode(LogicOp, dl, LogicVT, Operand, Mask);
22268 SDValue LogicNode = DAG.
getNode(LogicOp, dl, LogicVT, Operand, Mask);
22279 MVT VT =
Op.getSimpleValueType();
22290 bool IsF128 = (VT == MVT::f128);
22293 "Unexpected type in LowerFCOPYSIGN");
22302 bool IsFakeVector = !VT.
isVector() && !IsF128;
22305 LogicVT = (VT == MVT::f64) ? MVT::v2f64
22306 : (VT == MVT::f32) ? MVT::v4f32
22326 APFloat APF = Op0CN->getValueAPF();
22338 return !IsFakeVector ?
Or
22346 MVT VT =
Op.getSimpleValueType();
22349 assert((OpVT == MVT::f32 || OpVT == MVT::f64) &&
22350 "Unexpected type for FGETSIGN");
22353 MVT VecVT = (OpVT == MVT::f32 ? MVT::v4f32 : MVT::v2f64);
22368 if (Src.getValueType().getScalarSizeInBits() < 32)
22379 if (Src.getValueType() == MVT::i64 &&
22422template <
typename F>
22424 EVT VecVT,
EVT CmpVT,
bool HasPT,
F SToV) {
22430 if (VecVT != CmpVT)
22439 if (VecVT != CmpVT)
22458 EVT OpVT =
X.getValueType();
22473 auto IsVectorBitCastCheap = [](
SDValue X) {
22475 return isa<ConstantSDNode>(
X) ||
X.getValueType().isVector() ||
22478 if ((!IsVectorBitCastCheap(
X) || !IsVectorBitCastCheap(
Y)) &&
22479 !IsOrXorXorTreeCCZero)
22485 bool NoImplicitFloatOps =
22487 Attribute::NoImplicitFloat);
22488 if (!Subtarget.useSoftFloat() && !NoImplicitFloatOps &&
22489 ((OpSize == 128 && Subtarget.
hasSSE2()) ||
22490 (OpSize == 256 && Subtarget.
hasAVX()) ||
22492 bool HasPT = Subtarget.
hasSSE41();
22497 bool PreferKOT = Subtarget.preferMaskRegisters();
22498 bool NeedZExt = PreferKOT && !Subtarget.hasVLX() && OpSize != 512;
22500 EVT VecVT = MVT::v16i8;
22501 EVT CmpVT = PreferKOT ? MVT::v16i1 : VecVT;
22502 if (OpSize == 256) {
22503 VecVT = MVT::v32i8;
22504 CmpVT = PreferKOT ? MVT::v32i1 : VecVT;
22506 EVT CastVT = VecVT;
22507 bool NeedsAVX512FCast =
false;
22508 if (OpSize == 512 || NeedZExt) {
22509 if (Subtarget.hasBWI()) {
22510 VecVT = MVT::v64i8;
22511 CmpVT = MVT::v64i1;
22515 VecVT = MVT::v16i32;
22516 CmpVT = MVT::v16i1;
22517 CastVT = OpSize == 512 ? VecVT
22518 : OpSize == 256 ? MVT::v8i32
22520 NeedsAVX512FCast =
true;
22525 bool TmpZext =
false;
22526 EVT TmpCastVT = CastVT;
22530 if (OrigSize < OpSize) {
22531 if (OrigSize == 128) {
22532 TmpCastVT = NeedsAVX512FCast ? MVT::v4i32 : MVT::v16i8;
22535 }
else if (OrigSize == 256) {
22536 TmpCastVT = NeedsAVX512FCast ? MVT::v8i32 : MVT::v32i8;
22543 if (!NeedZExt && !TmpZext)
22551 if (IsOrXorXorTreeCCZero) {
22560 if (VecVT != CmpVT) {
22562 }
else if (HasPT) {
22569 if (VecVT != CmpVT) {
22570 EVT KRegVT = CmpVT == MVT::v64i1 ? MVT::i64
22571 : CmpVT == MVT::v32i1 ? MVT::i32
22578 DAG.
getBitcast(OpSize == 256 ? MVT::v4i64 : MVT::v2i64, Cmp);
22587 assert(Cmp.getValueType() == MVT::v16i8 &&
22588 "Non 128-bit vector on pre-SSE41 target");
22606 EVT VT = MVT::Other;
22610 assert(
Op.getOpcode() ==
unsigned(BinOp) &&
22611 "Unexpected bit reduction opcode");
22615 for (
unsigned Slot = 0, e = Opnds.
size(); Slot < e; ++Slot) {
22618 if (
I->getOpcode() ==
unsigned(BinOp)) {
22631 auto *
Idx = dyn_cast<ConstantSDNode>(
I->getOperand(1));
22637 if (M == SrcOpMap.
end()) {
22638 VT = Src.getValueType();
22640 if (!SrcOpMap.
empty() && VT != SrcOpMap.
begin()->first.getValueType())
22644 M = SrcOpMap.
insert(std::make_pair(Src, EltCount)).first;
22649 unsigned CIdx =
Idx->getZExtValue();
22650 if (M->second[CIdx])
22652 M->second.setBit(CIdx);
22658 SrcMask->push_back(SrcOpMap[
SrcOp]);
22661 for (
const auto &
I : SrcOpMap)
22662 if (!
I.second.isAllOnes())
22674 EVT VT =
LHS.getValueType();
22677 assert(ScalarSize == 1 &&
"Element Mask vs Vector bitwidth mismatch");
22692 APInt Mask = OriginalMask;
22694 auto MaskBits = [&](
SDValue Src) {
22695 if (Mask.isAllOnes())
22697 EVT SrcVT = Src.getValueType();
22706 if (IntVT != MVT::i64)
22709 MVT::i32, MVT::i32);
22711 MVT::i32, MVT::i32);
22728 bool UsePTEST = Subtarget.
hasSSE41();
22729 if (!UsePTEST && !Mask.isAllOnes() && ScalarSize > 32)
22733 unsigned TestSize = UseKORTEST ? 512 : (Subtarget.
hasAVX() ? 256 : 128);
22737 if (ScalarSize > TestSize) {
22738 if (!Mask.isAllOnes())
22752 VT = Split.first.getValueType();
22756 }
else if (!UsePTEST && !KnownRHS.
isZero()) {
22759 MVT SVT = ScalarSize >= 32 ? MVT::i32 : MVT::i8;
22768 VT = Split.first.getValueType();
22780 VT = Split.first.getValueType();
22806 MVT MaskVT = ScalarSize >= 32 ? MVT::v4i32 : MVT::v16i8;
22827 if (!CmpNull && !CmpAllOnes)
22831 if (!Subtarget.
hasSSE2() || !
Op->hasOneUse())
22839 switch (
Op.getOpcode()) {
22843 Op.getScalarValueSizeInBits());
22848 if (
auto *Cst = dyn_cast<ConstantSDNode>(
Op.getOperand(1))) {
22849 Mask = Cst->getAPIntValue();
22850 Op =
Op.getOperand(0);
22863 EVT VT = VecIns[0].getValueType();
22865 [VT](
SDValue V) {
return VT == V.getValueType(); }) &&
22866 "Reduction source vector mismatch");
22874 for (
unsigned Slot = 0, e = VecIns.
size(); e - Slot > 1;
22875 Slot += 2, e += 1) {
22886 CC, Mask, Subtarget, DAG, X86CC);
22895 EVT MatchVT =
Match.getValueType();
22899 CC, Mask, Subtarget, DAG, X86CC);
22903 if (Mask.isAllOnes()) {
22904 assert(!
Op.getValueType().isVector() &&
22905 "Illegal vector type for reduction pattern");
22907 if (Src.getValueType().isFixedLengthVector() &&
22908 Src.getValueType().getScalarType() == MVT::i1) {
22914 EVT LHSVT =
LHS.getValueType();
22915 ISD::CondCode SrcCC = cast<CondCodeSDNode>(Src.getOperand(2))->get();
22927 SDValue Inner = Src.getOperand(0);
22929 if (llvm::has_single_bit<uint32_t>(InnerVT.
getSizeInBits())) {
22935 SrcMask, Subtarget, DAG, X86CC);
22981 bool NeedCF =
false;
22982 bool NeedOF =
false;
22995 switch (
Op->getOpcode()) {
23000 if (
Op.getNode()->getFlags().hasNoSignedWrap())
23013 if (
Op.getResNo() != 0 || NeedOF || NeedCF) {
23018 unsigned Opcode = 0;
23019 unsigned NumOperands = 0;
23082 return SDValue(New.getNode(), 1);
23091 return EmitTest(Op0, X86CC, dl, DAG, Subtarget);
23095 assert((CmpVT == MVT::i8 || CmpVT == MVT::i16 ||
23096 CmpVT == MVT::i32 || CmpVT == MVT::i64) &&
"Unexpected VT!");
23102 if (CmpVT == MVT::i16 && !Subtarget.hasFastImm16() &&
23105 auto *COp0 = dyn_cast<ConstantSDNode>(Op0);
23106 auto *COp1 = dyn_cast<ConstantSDNode>(Op1);
23108 if ((COp0 && !COp0->getAPIntValue().isSignedIntN(8)) ||
23109 (COp1 && !COp1->getAPIntValue().isSignedIntN(8))) {
23110 unsigned ExtendOp =
23125 Op0 = DAG.
getNode(ExtendOp, dl, CmpVT, Op0);
23126 Op1 = DAG.
getNode(ExtendOp, dl, CmpVT, Op1);
23147 return Add.getValue(1);
23156 return Add.getValue(1);
23170bool X86TargetLowering::optimizeFMulOrFDivAsShiftAddBitcast(
23175 EVT FPVT =
N->getValueType(0);
23190 EVT VT =
Op.getValueType();
23201 return Subtarget.hasFastVectorFSQRT();
23202 return Subtarget.hasFastScalarFSQRT();
23209 int &RefinementSteps,
23210 bool &UseOneConstNR,
23211 bool Reciprocal)
const {
23213 EVT VT =
Op.getValueType();
23223 if ((VT == MVT::f32 && Subtarget.
hasSSE1()) ||
23224 (VT == MVT::v4f32 && Subtarget.
hasSSE1() && Reciprocal) ||
23225 (VT == MVT::v4f32 && Subtarget.
hasSSE2() && !Reciprocal) ||
23226 (VT == MVT::v8f32 && Subtarget.
hasAVX()) ||
23229 RefinementSteps = 1;
23231 UseOneConstNR =
false;
23235 if (RefinementSteps == 0 && !Reciprocal)
23241 Subtarget.hasFP16()) {
23242 assert(Reciprocal &&
"Don't replace SQRT with RSQRT for half type");
23244 RefinementSteps = 0;
23246 if (VT == MVT::f16) {
23263 int &RefinementSteps)
const {
23265 EVT VT =
Op.getValueType();
23274 if ((VT == MVT::f32 && Subtarget.
hasSSE1()) ||
23275 (VT == MVT::v4f32 && Subtarget.
hasSSE1()) ||
23276 (VT == MVT::v8f32 && Subtarget.
hasAVX()) ||
23285 RefinementSteps = 1;
23293 Subtarget.hasFP16()) {
23295 RefinementSteps = 0;
23297 if (VT == MVT::f16) {
23316unsigned X86TargetLowering::combineRepeatedFPDivisors()
const {
23321X86TargetLowering::BuildSDIVPow2(
SDNode *
N,
const APInt &Divisor,
23329 "Unexpected divisor!");
23337 EVT VT =
N->getValueType(0);
23339 if (VT != MVT::i16 && VT != MVT::i32 &&
23340 !(Subtarget.is64Bit() && VT == MVT::i64))
23344 if (Divisor == 2 ||
23371 unsigned AndBitWidth =
And.getValueSizeInBits();
23392 if ((!isUInt<32>(AndRHSVal) || (OptForSize && !isUInt<8>(AndRHSVal))) &&
23396 Src.getValueType());
23402 if (!Src.getNode())
23407 Src = Src.getOperand(0);
23428 SDValue &Op1,
bool &IsAlwaysSignaling) {
23441 switch (SetCCOpcode) {
23447 case ISD::SETGT: Swap =
true; [[fallthrough]];
23451 case ISD::SETGE: Swap =
true; [[fallthrough]];
23469 switch (SetCCOpcode) {
23471 IsAlwaysSignaling =
true;
23481 IsAlwaysSignaling =
false;
23493 "Unsupported VTs!");
23517 MVT VT =
Op.getSimpleValueType();
23519 "Cannot set masked compare for this operation");
23529 return DAG.
getSetCC(dl, VT, Op0, Op1, SetCCOpcode);
23538 auto *BV = dyn_cast<BuildVectorSDNode>(V.getNode());
23539 if (!BV || !V.getValueType().isSimple())
23542 MVT VT = V.getSimpleValueType();
23547 for (
unsigned i = 0; i < NumElts; ++i) {
23548 auto *Elt = dyn_cast<ConstantSDNode>(BV->
getOperand(i));
23553 const APInt &EltC = Elt->getAPIntValue();
23578 if (VET != MVT::i8 && VET != MVT::i16)
23629 SDValue Op0 =
Op.getOperand(IsStrict ? 1 : 0);
23630 SDValue Op1 =
Op.getOperand(IsStrict ? 2 : 1);
23632 MVT VT =
Op->getSimpleValueType(0);
23639 assert(EltVT == MVT::bf16 || EltVT == MVT::f16 || EltVT == MVT::f32 ||
23640 EltVT == MVT::f64);
23644 if (Subtarget.
hasAVX512() && !Subtarget.hasVLX())
23661 return DAG.
getNode(
Op.getOpcode(), dl, {VT, MVT::Other},
23662 {Chain, Op0, Op1, CC});
23681 (!IsStrict || Subtarget.hasVLX() ||
23686 (Num == 32 && (EltVT == MVT::f16 || EltVT == MVT::bf16)));
23698 bool IsAlwaysSignaling;
23700 if (!Subtarget.
hasAVX()) {
23708 if (IsStrict && IsAlwaysSignaling && !IsSignaling)
23712 if (IsStrict && !IsAlwaysSignaling && IsSignaling) {
23714 Opc, dl, {VT, MVT::Other},
23729 unsigned CombineOpc;
23744 Opc, dl, {VT, MVT::Other},
23747 Opc, dl, {VT, MVT::Other},
23757 Cmp = DAG.
getNode(CombineOpc, dl, VT, Cmp0, Cmp1);
23761 Opc, dl, {VT, MVT::Other},
23763 Chain = Cmp.getValue(1);
23772 SSECC |= (IsAlwaysSignaling ^ IsSignaling) << 4;
23774 Opc, dl, {VT, MVT::Other},
23776 Chain = Cmp.getValue(1);
23783 Op.getSimpleValueType().getFixedSizeInBits()) {
23788 Cmp = DAG.
getSetCC(dl,
Op.getSimpleValueType(), Cmp,
23803 assert(!IsStrict &&
"Strict SETCC only handles FP operands.");
23807 "Expected operands with same type!");
23809 "Invalid number of packed elements for source and destination!");
23814 "Value types for source and destination must be the same!");
23821 "Unexpected operand type");
23849 return DAG.
getNode(Opc, dl, VT, Op0, Op1,
23937 bool Invert =
false;
23942 case ISD::SETUGT: Invert =
true; [[fallthrough]];
23944 case ISD::SETULT: Invert =
true; [[fallthrough]];
23954 Result = DAG.
getNOT(dl, Result, VT);
23980 if (VT == MVT::v2i64) {
23991 static const int MaskHi[] = { 1, 1, 3, 3 };
24002 static const int MaskHi[] = { 1, 1, 3, 3 };
24016 static const int MaskLo[] = {0, 0, 2, 2};
24026 : 0x0000000080000000ULL,
24041 static const int MaskHi[] = { 1, 1, 3, 3 };
24042 static const int MaskLo[] = { 0, 0, 2, 2 };
24051 Result = DAG.
getNOT(dl, Result, MVT::v4i32);
24059 assert(Subtarget.
hasSSE2() && !FlipSigns &&
"Don't know how to lower!");
24069 static const int Mask[] = { 1, 0, 3, 2 };
24074 Result = DAG.
getNOT(dl, Result, MVT::v4i32);
24094 Result = DAG.
getNOT(dl, Result, VT);
24112 if (!(Subtarget.
hasAVX512() && VT == MVT::v16i1) &&
24113 !(Subtarget.hasDQI() && VT == MVT::v8i1) &&
24114 !(Subtarget.hasBWI() && (VT == MVT::v32i1 || VT == MVT::v64i1)))
24127 bool KTestable =
false;
24128 if (Subtarget.hasDQI() && (VT == MVT::v8i1 || VT == MVT::v16i1))
24130 if (Subtarget.hasBWI() && (VT == MVT::v32i1 || VT == MVT::v64i1))
24208 if (VT == MVT::i32 || VT == MVT::i64 || Op0->
hasOneUse()) {
24239 SDValue EFLAGS =
EmitCmp(Op0, Op1, CondCode, dl, DAG, Subtarget);
24248 MVT VT =
Op->getSimpleValueType(0);
24252 assert(VT == MVT::i8 &&
"SetCC type must be 8-bit integer");
24254 SDValue Op0 =
Op.getOperand(IsStrict ? 1 : 0);
24255 SDValue Op1 =
Op.getOperand(IsStrict ? 2 : 1);
24258 cast<CondCodeSDNode>(
Op.getOperand(IsStrict ? 3 : 2))->get();
24272 "Unexpected setcc expansion!");
24290 if (
auto *Op1C = dyn_cast<ConstantSDNode>(Op1)) {
24291 const APInt &Op1Val = Op1C->getAPIntValue();
24296 APInt Op1ValPlusOne = Op1Val + 1;
24308 SDValue EFLAGS = emitFlagsForSetcc(Op0, Op1,
CC, dl, DAG, X86CC);
24313 if (Subtarget.hasAVX10_2()) {
24332 dl, {MVT::i32, MVT::Other}, {Chain, Op0, Op1});
24350 assert(
LHS.getSimpleValueType().isInteger() &&
"SETCCCARRY is integer only.");
24367static std::pair<SDValue, SDValue>
24369 assert(
Op.getResNo() == 0 &&
"Unexpected result number!");
24373 unsigned BaseOp = 0;
24375 switch (
Op.getOpcode()) {
24407 Overflow =
Value.getValue(1);
24410 return std::make_pair(
Value, Overflow);
24424 assert(
Op->getValueType(1) == MVT::i8 &&
"Unexpected VT!");
24430 unsigned Opc =
Op.getOpcode();
24434 if (
Op.getResNo() == 1 &&
24447 SDValue VOp0 = V.getOperand(0);
24449 unsigned Bits = V.getValueSizeInBits();
24455 unsigned X86CC,
const SDLoc &
DL,
24459 EVT VT =
LHS.getValueType();
24465 auto SplatLSB = [&](
EVT SplatVT) {
24469 if (CmpVT.
bitsGT(SplatVT))
24471 else if (CmpVT.
bitsLT(SplatVT))
24481 return SplatLSB(VT);
24485 isa<ConstantSDNode>(
RHS)) {
24493 auto isIdentityPatternZero = [&]() {
24494 switch (
RHS.getOpcode()) {
24500 if (
RHS.getOperand(0) ==
LHS ||
RHS.getOperand(1) ==
LHS) {
24501 Src1 =
RHS.getOperand(
RHS.getOperand(0) ==
LHS ? 1 : 0);
24510 if (
RHS.getOperand(0) ==
LHS) {
24511 Src1 =
RHS.getOperand(1);
24520 auto isIdentityPatternOnes = [&]() {
24521 switch (
LHS.getOpcode()) {
24525 if (
LHS.getOperand(0) ==
RHS ||
LHS.getOperand(1) ==
RHS) {
24526 Src1 =
LHS.getOperand(
LHS.getOperand(0) ==
RHS ? 1 : 0);
24543 if (!Subtarget.
canUseCMOV() && isIdentityPatternZero()) {
24550 if (!Subtarget.
canUseCMOV() && isIdentityPatternOnes()) {
24587 bool AddTest =
true;
24606 VT ==
Cond.getOperand(0).getSimpleValueType() &&
Cond->hasOneUse()) {
24608 bool IsAlwaysSignaling;
24611 CondOp0, CondOp1, IsAlwaysSignaling);
24621 if (SSECC < 8 || Subtarget.
hasAVX()) {
24641 MVT VecVT = VT == MVT::f32 ? MVT::v4f32 : MVT::v2f64;
24646 MVT VCmpVT = VT == MVT::f32 ? MVT::v4i32 : MVT::v2i64;
24667 !
isSoftF16(
Cond.getOperand(0).getSimpleValueType(), Subtarget)) {
24673 Op1 =
Op.getOperand(1);
24674 Op2 =
Op.getOperand(2);
24702 if (Subtarget.
canUseCMOV() && (VT == MVT::i32 || VT == MVT::i64) &&
24703 ((CondCode ==
X86::COND_NE && MatchFFSMinus1(Op1, Op2)) ||
24707 DL, DAG, Subtarget)) {
24709 }
else if ((VT == MVT::i32 || VT == MVT::i64) &&
isNullConstant(Op2) &&
24710 Cmp.getNode()->hasOneUse() && (CmpOp0 == Op1) &&
24723 Shift = DAG.
getNOT(
DL, Shift, VT);
24743 unsigned CondOpcode =
Cond.getOpcode();
24746 CC =
Cond.getOperand(0);
24749 bool IllegalFPCMov =
false;
24752 IllegalFPCMov = !
hasFPCMov(cast<ConstantSDNode>(
CC)->getSExtValue());
24814 if (
Op.getValueType() == MVT::i8 &&
24817 if (
T1.getValueType() == T2.getValueType() &&
24832 if ((
Op.getValueType() == MVT::i8 && Subtarget.
canUseCMOV()) ||
24851 MVT VT =
Op->getSimpleValueType(0);
24853 MVT InVT = In.getSimpleValueType();
24869 MVT WideVT = ExtVT;
24880 if ((Subtarget.hasDQI() && WideEltVT.
getSizeInBits() >= 32) ||
24882 V = DAG.
getNode(
Op.getOpcode(), dl, WideVT, In);
24886 V = DAG.
getSelect(dl, WideVT, In, NegOne, Zero);
24906 MVT InVT = In.getSimpleValueType();
24924 MVT VT =
Op->getSimpleValueType(0);
24925 MVT InVT = In.getSimpleValueType();
24931 if (SVT != MVT::i64 && SVT != MVT::i32 && SVT != MVT::i16)
24933 if (InSVT != MVT::i32 && InSVT != MVT::i16 && InSVT != MVT::i8)
24941 unsigned Opc =
Op.getOpcode();
24951 InVT = In.getSimpleValueType();
24961 return DAG.
getNode(
Op.getOpcode(), dl, VT, In);
24968 return DAG.
getNode(ExtOpc, dl, VT, In);
24972 if (Subtarget.
hasAVX()) {
24979 for (
int i = 0; i != HalfNumElts; ++i)
24980 HiMask[i] = HalfNumElts + i;
24997 unsigned Scale = InNumElts / NumElts;
24999 for (
unsigned I = 0;
I != NumElts; ++
I)
25000 ShuffleMask.
append(Scale,
I);
25011 if (InVT != MVT::v4i32) {
25012 MVT DestVT = VT == MVT::v2i64 ? MVT::v4i32 : VT;
25021 for (
unsigned i = 0; i != DestElts; ++i)
25022 Mask[i * Scale + (Scale - 1)] = i;
25032 if (VT == MVT::v2i64) {
25036 SignExt = DAG.
getVectorShuffle(MVT::v4i32, dl, SignExt, Sign, {0, 4, 1, 5});
25045 MVT VT =
Op->getSimpleValueType(0);
25047 MVT InVT = In.getSimpleValueType();
25055 "Expected same number of elements");
25059 "Unexpected element type");
25063 "Unexpected element type");
25065 if (VT == MVT::v32i16 && !Subtarget.hasBWI()) {
25066 assert(InVT == MVT::v32i8 &&
"Unexpected VT!");
25086 for (
unsigned i = 0; i != NumElems/2; ++i)
25087 ShufMask[i] = i + NumElems/2;
25097 SDValue StoredVal = Store->getValue();
25100 "Expecting 256/512-bit op");
25107 if (!Store->isSimple())
25112 std::tie(Value0, Value1) =
splitVector(StoredVal, DAG,
DL);
25114 SDValue Ptr0 = Store->getBasePtr();
25118 DAG.
getStore(Store->getChain(),
DL, Value0, Ptr0, Store->getPointerInfo(),
25119 Store->getOriginalAlign(),
25120 Store->getMemOperand()->getFlags());
25122 Store->getPointerInfo().getWithOffset(HalfOffset),
25123 Store->getOriginalAlign(),
25124 Store->getMemOperand()->getFlags());
25132 SDValue StoredVal = Store->getValue();
25135 StoredVal = DAG.
getBitcast(StoreVT, StoredVal);
25140 if (!Store->isSimple())
25149 for (
unsigned i = 0; i != NumElems; ++i) {
25150 unsigned Offset = i * ScalarSize;
25156 Store->getPointerInfo().getWithOffset(
Offset),
25157 Store->getOriginalAlign(),
25158 Store->getMemOperand()->getFlags());
25174 assert(NumElts <= 8 &&
"Unexpected VT");
25177 "Expected AVX512F without AVX512DQI");
25181 DAG.
getUNDEF(MVT::v16i1), StoredVal,
25183 StoredVal = DAG.
getBitcast(MVT::i16, StoredVal);
25204 ((StoreVT == MVT::v32i16 || StoreVT == MVT::v64i8) &&
25205 !Subtarget.hasBWI())) {
25218 "Unexpected type action!");
25227 MVT StVT = Subtarget.is64Bit() && StoreVT.
isInteger() ? MVT::i64 : MVT::f64;
25229 StoredVal = DAG.
getBitcast(CastVT, StoredVal);
25253 MVT RegVT =
Op.getSimpleValueType();
25254 assert(RegVT.
isVector() &&
"We only custom lower vector loads.");
25256 "We only custom lower integer vector loads.");
25266 "Expected AVX512F without AVX512DQI");
25288 Opc =
Op.getOpcode();
25292 Op.getOperand(0).hasOneUse() &&
25294 Op.getOperand(1).hasOneUse());
25305 Cond.getOperand(0).getValueType() != MVT::f128 &&
25306 !
isSoftF16(
Cond.getOperand(0).getValueType(), Subtarget)) {
25326 Overflow,
Op->getFlags());
25329 if (
LHS.getSimpleValueType().isInteger()) {
25333 EFLAGS,
Op->getFlags());
25342 if (
Op.getNode()->hasOneUse()) {
25359 CCVal, Cmp,
Op->getFlags());
25362 Cmp,
Op->getFlags());
25372 Cmp,
Op->getFlags());
25375 Cmp,
Op->getFlags());
25382 Cmp,
Op->getFlags());
25393 Overflow,
Op->getFlags());
25400 EVT CondVT =
Cond.getValueType();
25422X86TargetLowering::LowerDYNAMIC_STACKALLOC(
SDValue Op,
25428 SplitStack || EmitStackProbeCall;
25436 EVT VT =
Node->getValueType(0);
25442 bool Is64Bit = Subtarget.is64Bit();
25449 assert(
SPReg &&
"Target cannot require DYNAMIC_STACKALLOC expansion and"
25450 " not tell us which reg is the stack pointer!");
25457 Chain =
Result.getValue(1);
25463 if (Alignment && *Alignment > StackAlign)
25468 }
else if (SplitStack) {
25473 for (
const auto &
A :
F.args()) {
25474 if (
A.hasNestAttr())
25476 "have nested arguments.");
25482 Chain =
Result.getValue(1);
25514 const Value *SV = cast<SrcValueSDNode>(
Op.getOperand(2))->getValue();
25517 if (!Subtarget.is64Bit() ||
25535 Op.getOperand(0),
DL,
25543 Op.getOperand(0),
DL,
25560 Op.getOperand(0),
DL, RSFIN, FIN,
25567 assert(Subtarget.is64Bit() &&
25568 "LowerVAARG only handles 64-bit va_arg!");
25578 const Value *SV = cast<SrcValueSDNode>(
Op.getOperand(2))->getValue();
25579 unsigned Align =
Op.getConstantOperandVal(3);
25582 EVT ArgVT =
Op.getNode()->getValueType(0);
25590 assert(ArgVT != MVT::f80 &&
"va_arg for f80 not yet implemented");
25595 "Unhandled argument type in LowerVAARG");
25599 if (ArgMode == 2) {
25601 assert(!Subtarget.useSoftFloat() &&
25608 SDValue InstOps[] = {Chain, SrcPtr,
25618 Chain =
VAARG.getValue(1);
25628 assert(Subtarget.is64Bit() &&
"This code only handles 64-bit va_copy!");
25637 const Value *DstSV = cast<SrcValueSDNode>(
Op.getOperand(3))->getValue();
25638 const Value *SrcSV = cast<SrcValueSDNode>(
Op.getOperand(4))->getValue();
25642 Chain,
DL, DstPtr, SrcPtr,
25677 if (VT !=
SrcOp.getSimpleValueType())
25685 if (ShiftAmt >= ElementType.getSizeInBits()) {
25687 ShiftAmt = ElementType.getSizeInBits() - 1;
25693 &&
"Unknown target vector shift-by-constant node");
25729 "Illegal vector splat index");
25732 if (ShAmtIdx != 0) {
25734 Mask[0] = ShAmtIdx;
25751 bool IsMasked =
false;
25760 AmtVT = MVT::v4i32;
25772 {ShAmt.getOperand(1), Mask}))) {
25791 }
else if (Subtarget.
hasSSE41()) {
25793 MVT::v2i64, ShAmt);
25828 assert(MaskVT.
bitsLE(Mask.getSimpleValueType()) &&
"Unexpected mask size!");
25830 if (Mask.getSimpleValueType() == MVT::i64 && Subtarget.is32Bit()) {
25831 assert(MaskVT == MVT::v64i1 &&
"Expected v64i1 mask!");
25832 assert(Subtarget.hasBWI() &&
"Expected AVX512BW target!");
25841 Mask.getSimpleValueType().getSizeInBits());
25857 MVT VT =
Op.getSimpleValueType();
25869 return DAG.
getNode(OpcodeSelect, dl, VT, VMask,
Op, PreservedSrc);
25884 if (
auto *MaskConst = dyn_cast<ConstantSDNode>(Mask))
25885 if (MaskConst->getZExtValue() & 0x1)
25888 MVT VT =
Op.getSimpleValueType();
25891 assert(Mask.getValueType() == MVT::i8 &&
"Unexpect type");
25908 "querying registration node size for function without personality");
25917 "can only recover FP for 32-bit MSVC EH personality functions");
25953 if (Subtarget.is64Bit())
25954 return DAG.
getNode(
ISD::ADD, dl, PtrVT, EntryEBP, ParentFrameOffset);
25961 return DAG.
getNode(
ISD::SUB, dl, PtrVT, RegNodeBase, ParentFrameOffset);
25967 auto isRoundModeCurDirection = [](
SDValue Rnd) {
25968 if (
auto *
C = dyn_cast<ConstantSDNode>(Rnd))
25973 auto isRoundModeSAE = [](
SDValue Rnd) {
25974 if (
auto *
C = dyn_cast<ConstantSDNode>(Rnd)) {
25975 unsigned RC =
C->getZExtValue();
25987 auto isRoundModeSAEToX = [](
SDValue Rnd,
unsigned &RC) {
25988 if (
auto *
C = dyn_cast<ConstantSDNode>(Rnd)) {
25989 RC =
C->getZExtValue();
26004 unsigned IntNo =
Op.getConstantOperandVal(0);
26005 MVT VT =
Op.getSimpleValueType();
26012 switch(IntrData->
Type) {
26017 unsigned IntrWithRoundingModeOpcode = IntrData->
Opc1;
26018 if (IntrWithRoundingModeOpcode != 0) {
26021 if (isRoundModeSAEToX(Rnd, RC))
26022 return DAG.
getNode(IntrWithRoundingModeOpcode, dl,
Op.getValueType(),
26025 if (!isRoundModeCurDirection(Rnd))
26035 if (isRoundModeCurDirection(Sae))
26036 Opc = IntrData->
Opc0;
26037 else if (isRoundModeSAE(Sae))
26038 Opc = IntrData->
Opc1;
26042 return DAG.
getNode(Opc, dl,
Op.getValueType(),
Op.getOperand(1));
26050 unsigned IntrWithRoundingModeOpcode = IntrData->
Opc1;
26051 if (IntrWithRoundingModeOpcode != 0) {
26054 if (isRoundModeSAEToX(Rnd, RC))
26055 return DAG.
getNode(IntrWithRoundingModeOpcode, dl,
Op.getValueType(),
26056 Op.getOperand(1), Src2,
26058 if (!isRoundModeCurDirection(Rnd))
26063 Op.getOperand(1), Src2);
26069 if (isRoundModeCurDirection(Sae))
26070 Opc = IntrData->
Opc0;
26071 else if (isRoundModeSAE(Sae))
26072 Opc = IntrData->
Opc1;
26076 return DAG.
getNode(Opc, dl,
Op.getValueType(),
Op.getOperand(1),
26093 unsigned IntrWithRoundingModeOpcode = IntrData->
Opc1;
26094 if (IntrWithRoundingModeOpcode != 0) {
26097 if (isRoundModeSAEToX(Rnd, RC))
26098 return DAG.
getNode(IntrWithRoundingModeOpcode, dl,
Op.getValueType(),
26101 if (!isRoundModeCurDirection(Rnd))
26106 {Src1, Src2, Src3});
26116 Op.getOperand(1),
Op.getOperand(2),
Op.getOperand(3),
26126 unsigned IntrWithRoundingModeOpcode = IntrData->
Opc1;
26127 if (IntrWithRoundingModeOpcode != 0) {
26130 if (isRoundModeSAEToX(Rnd, RC))
26132 DAG.
getNode(IntrWithRoundingModeOpcode, dl,
Op.getValueType(),
26134 Mask, PassThru, Subtarget, DAG);
26135 if (!isRoundModeCurDirection(Rnd))
26139 DAG.
getNode(IntrData->
Opc0, dl, VT, Src), Mask, PassThru,
26149 if (isRoundModeCurDirection(Rnd))
26150 Opc = IntrData->
Opc0;
26151 else if (isRoundModeSAE(Rnd))
26152 Opc = IntrData->
Opc1;
26164 unsigned IntrWithRoundingModeOpcode = IntrData->
Opc1;
26168 bool HasRounding = IntrWithRoundingModeOpcode != 0;
26173 if (isRoundModeSAEToX(Rnd, RC))
26175 DAG.
getNode(IntrWithRoundingModeOpcode, dl, VT, Src1, Src2,
26177 Mask, passThru, Subtarget, DAG);
26178 if (!isRoundModeCurDirection(Rnd))
26183 Mask, passThru, Subtarget, DAG);
26187 "Unexpected intrinsic form");
26189 unsigned Opc = IntrData->
Opc0;
26192 if (isRoundModeSAE(Sae))
26193 Opc = IntrWithRoundingModeOpcode;
26194 else if (!isRoundModeCurDirection(Sae))
26199 Mask, passThru, Subtarget, DAG);
26210 if (isRoundModeCurDirection(Rnd))
26211 NewOp = DAG.
getNode(IntrData->
Opc0, dl, VT, Src1, Src2);
26212 else if (isRoundModeSAEToX(Rnd, RC))
26213 NewOp = DAG.
getNode(IntrData->
Opc1, dl, VT, Src1, Src2,
26227 if (isRoundModeCurDirection(Sae))
26228 Opc = IntrData->
Opc0;
26229 else if (isRoundModeSAE(Sae))
26230 Opc = IntrData->
Opc1;
26235 Mask, passThru, Subtarget, DAG);
26243 if (IntrData->
Opc1 != 0) {
26246 if (isRoundModeSAEToX(Rnd, RC))
26247 NewOp = DAG.
getNode(IntrData->
Opc1, dl, VT, Src1, Src2,
26249 else if (!isRoundModeCurDirection(Rnd))
26253 NewOp = DAG.
getNode(IntrData->
Opc0, dl, VT, Src1, Src2);
26262 unsigned Opc = IntrData->
Opc0;
26263 if (IntrData->
Opc1 != 0) {
26265 if (isRoundModeSAE(Sae))
26266 Opc = IntrData->
Opc1;
26267 else if (!isRoundModeCurDirection(Sae))
26272 Mask, PassThru, Subtarget, DAG);
26282 if (isRoundModeCurDirection(Sae))
26283 Opc = IntrData->
Opc0;
26284 else if (isRoundModeSAE(Sae))
26285 Opc = IntrData->
Opc1;
26290 Mask, PassThru, Subtarget, DAG);
26299 unsigned Opc = IntrData->
Opc0;
26300 if (IntrData->
Opc1 != 0) {
26302 if (isRoundModeSAE(Sae))
26303 Opc = IntrData->
Opc1;
26304 else if (!isRoundModeCurDirection(Sae))
26308 Mask, PassThru, Subtarget, DAG);
26319 return DAG.
getNode(IntrData->
Opc0, dl, VT, Src3, Src2, Src1);
26326 return DAG.
getNode(IntrData->
Opc0, dl, VT,Src2, Src1);
26334 MVT VT =
Op.getSimpleValueType();
26344 if (IntrData->
Opc1 != 0) {
26347 if (isRoundModeSAEToX(Rnd, RC))
26348 NewOp = DAG.
getNode(IntrData->
Opc1, dl, VT, Src1, Src2, Src3,
26350 else if (!isRoundModeCurDirection(Rnd))
26354 NewOp = DAG.
getNode(IntrData->
Opc0, dl, VT, Src1, Src2, Src3);
26364 Op.getOperand(2),
Op.getOperand(3),
Op.getOperand(1));
26381 MVT MaskVT =
Op.getSimpleValueType();
26387 if (IntrData->
Opc1 != 0) {
26389 if (isRoundModeSAE(Sae))
26390 return DAG.
getNode(IntrData->
Opc1, dl, MaskVT,
Op.getOperand(1),
26391 Op.getOperand(2),
CC, Mask, Sae);
26392 if (!isRoundModeCurDirection(Sae))
26397 {Op.getOperand(1), Op.getOperand(2), CC, Mask});
26406 if (IntrData->
Opc1 != 0) {
26408 if (isRoundModeSAE(Sae))
26410 else if (!isRoundModeCurDirection(Sae))
26414 if (!
Cmp.getNode())
26435 bool HasAVX10_2_COMX =
26440 bool HasAVX10_2_COMX_Ty = (
LHS.getSimpleValueType() != MVT::v8bf16);
26442 auto ComiOpCode = IntrData->
Opc0;
26445 if (HasAVX10_2_COMX && HasAVX10_2_COMX_Ty)
26454 if (HasAVX10_2_COMX && HasAVX10_2_COMX_Ty)
26463 if (HasAVX10_2_COMX && HasAVX10_2_COMX_Ty)
26487 unsigned CondVal =
Op.getConstantOperandVal(3);
26491 if (isRoundModeCurDirection(Sae))
26494 else if (isRoundModeSAE(Sae))
26511 "Unexpected VSHIFT amount type");
26514 if (
auto *CShAmt = dyn_cast<ConstantSDNode>(ShAmt))
26516 Op.getSimpleValueType(),
SrcOp,
26517 CShAmt->getZExtValue(), DAG);
26521 SrcOp, ShAmt, 0, Subtarget, DAG);
26525 SDValue DataToCompress =
Op.getOperand(1);
26528 return Op.getOperand(1);
26534 return DAG.
getNode(IntrData->
Opc0, dl, VT, DataToCompress, PassThru,
26548 unsigned Opc = IntrData->
Opc0;
26549 if (IntrData->
Opc1 != 0) {
26551 if (isRoundModeSAE(Sae))
26552 Opc = IntrData->
Opc1;
26553 else if (!isRoundModeCurDirection(Sae))
26557 SDValue FixupImm = DAG.
getNode(Opc, dl, VT, Src1, Src2, Src3, Imm);
26568 uint64_t Round =
Op.getConstantOperandVal(2);
26577 uint64_t Round =
Op.getConstantOperandVal(3);
26587 Op.getValueType());
26589 Op.getOperand(1), Control);
26600 Res = DAG.
getNode(IntrData->
Opc1, dl, VTs,
Op.getOperand(2),
26605 Res = DAG.
getNode(IntrData->
Opc0, dl, VTs,
Op.getOperand(2),
26621 return DAG.
getNode(IntrData->
Opc0, dl,
Op.getValueType(), Src);
26623 MVT SrcVT = Src.getSimpleValueType();
26627 {Src, PassThru, Mask});
26636 return DAG.
getNode(IntrData->
Opc0, dl,
Op.getValueType(), {Src, Src2});
26642 {Src, Src2, PassThru, Mask});
26651 unsigned Opc = IntrData->
Opc0;
26652 bool SAE = Src.getValueType().is512BitVector() &&
26653 (isRoundModeSAEToX(Rnd, RC) || isRoundModeSAE(Rnd));
26660 return DAG.
getNode(Opc, dl,
Op.getValueType(), Src, Rnd);
26665 Opc = IntrData->
Opc1;
26666 MVT SrcVT = Src.getSimpleValueType();
26669 return DAG.
getNode(Opc, dl,
Op.getValueType(), Src, Rnd, PassThru, Mask);
26677 return DAG.
getNode(IntrData->
Opc0, dl,
Op.getValueType(), Src);
26683 return DAG.
getNode(IntrData->
Opc1, dl,
Op.getValueType(), Src, PassThru,
26697 case Intrinsic::x86_avx512_ktestc_b:
26698 case Intrinsic::x86_avx512_ktestc_w:
26699 case Intrinsic::x86_avx512_ktestc_d:
26700 case Intrinsic::x86_avx512_ktestc_q:
26701 case Intrinsic::x86_avx512_ktestz_b:
26702 case Intrinsic::x86_avx512_ktestz_w:
26703 case Intrinsic::x86_avx512_ktestz_d:
26704 case Intrinsic::x86_avx512_ktestz_q:
26705 case Intrinsic::x86_sse41_ptestz:
26706 case Intrinsic::x86_sse41_ptestc:
26707 case Intrinsic::x86_sse41_ptestnzc:
26708 case Intrinsic::x86_avx_ptestz_256:
26709 case Intrinsic::x86_avx_ptestc_256:
26710 case Intrinsic::x86_avx_ptestnzc_256:
26711 case Intrinsic::x86_avx_vtestz_ps:
26712 case Intrinsic::x86_avx_vtestc_ps:
26713 case Intrinsic::x86_avx_vtestnzc_ps:
26714 case Intrinsic::x86_avx_vtestz_pd:
26715 case Intrinsic::x86_avx_vtestc_pd:
26716 case Intrinsic::x86_avx_vtestnzc_pd:
26717 case Intrinsic::x86_avx_vtestz_ps_256:
26718 case Intrinsic::x86_avx_vtestc_ps_256:
26719 case Intrinsic::x86_avx_vtestnzc_ps_256:
26720 case Intrinsic::x86_avx_vtestz_pd_256:
26721 case Intrinsic::x86_avx_vtestc_pd_256:
26722 case Intrinsic::x86_avx_vtestnzc_pd_256: {
26727 case Intrinsic::x86_avx512_ktestc_b:
26728 case Intrinsic::x86_avx512_ktestc_w:
26729 case Intrinsic::x86_avx512_ktestc_d:
26730 case Intrinsic::x86_avx512_ktestc_q:
26735 case Intrinsic::x86_avx512_ktestz_b:
26736 case Intrinsic::x86_avx512_ktestz_w:
26737 case Intrinsic::x86_avx512_ktestz_d:
26738 case Intrinsic::x86_avx512_ktestz_q:
26742 case Intrinsic::x86_avx_vtestz_ps:
26743 case Intrinsic::x86_avx_vtestz_pd:
26744 case Intrinsic::x86_avx_vtestz_ps_256:
26745 case Intrinsic::x86_avx_vtestz_pd_256:
26748 case Intrinsic::x86_sse41_ptestz:
26749 case Intrinsic::x86_avx_ptestz_256:
26753 case Intrinsic::x86_avx_vtestc_ps:
26754 case Intrinsic::x86_avx_vtestc_pd:
26755 case Intrinsic::x86_avx_vtestc_ps_256:
26756 case Intrinsic::x86_avx_vtestc_pd_256:
26759 case Intrinsic::x86_sse41_ptestc:
26760 case Intrinsic::x86_avx_ptestc_256:
26764 case Intrinsic::x86_avx_vtestnzc_ps:
26765 case Intrinsic::x86_avx_vtestnzc_pd:
26766 case Intrinsic::x86_avx_vtestnzc_ps_256:
26767 case Intrinsic::x86_avx_vtestnzc_pd_256:
26770 case Intrinsic::x86_sse41_ptestnzc:
26771 case Intrinsic::x86_avx_ptestnzc_256:
26784 case Intrinsic::x86_sse42_pcmpistria128:
26785 case Intrinsic::x86_sse42_pcmpestria128:
26786 case Intrinsic::x86_sse42_pcmpistric128:
26787 case Intrinsic::x86_sse42_pcmpestric128:
26788 case Intrinsic::x86_sse42_pcmpistrio128:
26789 case Intrinsic::x86_sse42_pcmpestrio128:
26790 case Intrinsic::x86_sse42_pcmpistris128:
26791 case Intrinsic::x86_sse42_pcmpestris128:
26792 case Intrinsic::x86_sse42_pcmpistriz128:
26793 case Intrinsic::x86_sse42_pcmpestriz128: {
26798 case Intrinsic::x86_sse42_pcmpistria128:
26802 case Intrinsic::x86_sse42_pcmpestria128:
26806 case Intrinsic::x86_sse42_pcmpistric128:
26810 case Intrinsic::x86_sse42_pcmpestric128:
26814 case Intrinsic::x86_sse42_pcmpistrio128:
26818 case Intrinsic::x86_sse42_pcmpestrio128:
26822 case Intrinsic::x86_sse42_pcmpistris128:
26826 case Intrinsic::x86_sse42_pcmpestris128:
26830 case Intrinsic::x86_sse42_pcmpistriz128:
26834 case Intrinsic::x86_sse42_pcmpestriz128:
26846 case Intrinsic::x86_sse42_pcmpistri128:
26847 case Intrinsic::x86_sse42_pcmpestri128: {
26849 if (IntNo == Intrinsic::x86_sse42_pcmpistri128)
26856 return DAG.
getNode(Opcode, dl, VTs, NewOps);
26859 case Intrinsic::x86_sse42_pcmpistrm128:
26860 case Intrinsic::x86_sse42_pcmpestrm128: {
26862 if (IntNo == Intrinsic::x86_sse42_pcmpistrm128)
26872 case Intrinsic::eh_sjlj_lsda: {
26877 MCSymbol *S = Context.getOrCreateSymbol(
Twine(
"GCC_except_table") +
26879 return DAG.
getNode(getGlobalWrapperKind(
nullptr, 0), dl, VT,
26883 case Intrinsic::x86_seh_lsda: {
26887 auto *Fn = cast<Function>(cast<GlobalAddressSDNode>(Op1)->getGlobal());
26897 case Intrinsic::eh_recoverfp: {
26899 SDValue IncomingFPOp =
Op.getOperand(2);
26901 auto *Fn = dyn_cast_or_null<Function>(GSD ? GSD->
getGlobal() :
nullptr);
26904 "llvm.eh.recoverfp must take a function as the first argument");
26908 case Intrinsic::localaddress: {
26914 if (
RegInfo->hasBasePointer(MF))
26917 bool CantUseFP =
RegInfo->hasStackRealignment(MF);
26919 Reg =
RegInfo->getPtrSizedStackRegister(MF);
26921 Reg =
RegInfo->getPtrSizedFrameRegister(MF);
26925 case Intrinsic::x86_avx512_vp2intersect_q_512:
26926 case Intrinsic::x86_avx512_vp2intersect_q_256:
26927 case Intrinsic::x86_avx512_vp2intersect_q_128:
26928 case Intrinsic::x86_avx512_vp2intersect_d_512:
26929 case Intrinsic::x86_avx512_vp2intersect_d_256:
26930 case Intrinsic::x86_avx512_vp2intersect_d_128: {
26931 MVT MaskVT =
Op.getSimpleValueType();
26938 Op->getOperand(1),
Op->getOperand(2));
26946 case Intrinsic::x86_mmx_pslli_w:
26947 case Intrinsic::x86_mmx_pslli_d:
26948 case Intrinsic::x86_mmx_pslli_q:
26949 case Intrinsic::x86_mmx_psrli_w:
26950 case Intrinsic::x86_mmx_psrli_d:
26951 case Intrinsic::x86_mmx_psrli_q:
26952 case Intrinsic::x86_mmx_psrai_w:
26953 case Intrinsic::x86_mmx_psrai_d: {
26957 if (
auto *
C = dyn_cast<ConstantSDNode>(ShAmt)) {
26960 unsigned ShiftAmount =
C->getAPIntValue().getLimitedValue(255);
26961 if (ShiftAmount == 0)
26962 return Op.getOperand(1);
26965 Op.getOperand(0),
Op.getOperand(1),
26969 unsigned NewIntrinsic;
26972 case Intrinsic::x86_mmx_pslli_w:
26973 NewIntrinsic = Intrinsic::x86_mmx_psll_w;
26975 case Intrinsic::x86_mmx_pslli_d:
26976 NewIntrinsic = Intrinsic::x86_mmx_psll_d;
26978 case Intrinsic::x86_mmx_pslli_q:
26979 NewIntrinsic = Intrinsic::x86_mmx_psll_q;
26981 case Intrinsic::x86_mmx_psrli_w:
26982 NewIntrinsic = Intrinsic::x86_mmx_psrl_w;
26984 case Intrinsic::x86_mmx_psrli_d:
26985 NewIntrinsic = Intrinsic::x86_mmx_psrl_d;
26987 case Intrinsic::x86_mmx_psrli_q:
26988 NewIntrinsic = Intrinsic::x86_mmx_psrl_q;
26990 case Intrinsic::x86_mmx_psrai_w:
26991 NewIntrinsic = Intrinsic::x86_mmx_psra_w;
26993 case Intrinsic::x86_mmx_psrai_d:
26994 NewIntrinsic = Intrinsic::x86_mmx_psra_d;
27005 Op.getOperand(1), ShAmt);
27007 case Intrinsic::thread_pointer: {
27018 "Target OS doesn't support __builtin_thread_pointer() yet.");
27028 auto *
C = dyn_cast<ConstantSDNode>(ScaleOp);
27035 EVT MaskVT = Mask.getValueType().changeVectorElementTypeToInteger();
27048 SDValue Ops[] = {Chain, Src, Mask,
Base, Index, Scale };
27059 MVT VT =
Op.getSimpleValueType();
27061 auto *
C = dyn_cast<ConstantSDNode>(ScaleOp);
27068 unsigned MinElts = std::min(Index.getSimpleValueType().getVectorNumElements(),
27074 if (Mask.getValueType() != MaskVT)
27075 Mask =
getMaskNode(Mask, MaskVT, Subtarget, DAG, dl);
27086 SDValue Ops[] = {Chain, Src, Mask,
Base, Index, Scale };
27098 auto *
C = dyn_cast<ConstantSDNode>(ScaleOp);
27105 unsigned MinElts = std::min(Index.getSimpleValueType().getVectorNumElements(),
27106 Src.getSimpleValueType().getVectorNumElements());
27111 if (Mask.getValueType() != MaskVT)
27112 Mask =
getMaskNode(Mask, MaskVT, Subtarget, DAG, dl);
27117 SDValue Ops[] = {Chain, Src, Mask,
Base, Index, Scale};
27129 auto *
C = dyn_cast<ConstantSDNode>(ScaleOp);
27139 MVT::getVectorVT(MVT::i1, Index.getSimpleValueType().getVectorNumElements());
27141 SDValue Ops[] = {VMask,
Base, Scale, Index, Disp, Segment, Chain};
27156 unsigned TargetOpcode,
27164 assert(
N->getNumOperands() == 3 &&
"Unexpected number of operands!");
27170 SDValue N1Ops[] = {Chain, Glue};
27177 if (Subtarget.is64Bit()) {
27186 Chain = HI.getValue(1);
27187 Glue = HI.getValue(2);
27189 if (Subtarget.is64Bit()) {
27219 if (Opcode != X86::RDTSCP)
27248 auto *FINode = dyn_cast<FrameIndexSDNode>(RegNode);
27266 auto *FINode = dyn_cast<FrameIndexSDNode>(EHGuard);
27300 if (!Subtarget.is64Bit())
27309 unsigned IntNo =
Op.getConstantOperandVal(1);
27314 case Intrinsic::swift_async_context_addr: {
27320 X86FI->setHasSwiftAsyncContext(
true);
27332 int PtrSize = Subtarget.is64Bit() ? 8 : 4;
27333 if (!X86FI->getSwiftAsyncContextFrameIdx())
27334 X86FI->setSwiftAsyncContextFrameIdx(
27339 PtrSize == 8 ? MVT::i64 : MVT::i32);
27342 Op->getOperand(0));
27346 case llvm::Intrinsic::x86_seh_ehregnode:
27348 case llvm::Intrinsic::x86_seh_ehguard:
27350 case llvm::Intrinsic::x86_rdpkru: {
27357 case llvm::Intrinsic::x86_wrpkru: {
27362 Op.getOperand(0),
Op.getOperand(2),
27366 case llvm::Intrinsic::asan_check_memaccess: {
27372 case llvm::Intrinsic::x86_flags_read_u32:
27373 case llvm::Intrinsic::x86_flags_read_u64:
27374 case llvm::Intrinsic::x86_flags_write_u32:
27375 case llvm::Intrinsic::x86_flags_write_u64: {
27384 case Intrinsic::x86_lwpins32:
27385 case Intrinsic::x86_lwpins64:
27386 case Intrinsic::x86_umwait:
27387 case Intrinsic::x86_tpause: {
27395 case Intrinsic::x86_umwait:
27398 case Intrinsic::x86_tpause:
27401 case Intrinsic::x86_lwpins32:
27402 case Intrinsic::x86_lwpins64:
27408 DAG.
getNode(Opcode, dl, VTs, Chain,
Op->getOperand(2),
27409 Op->getOperand(3),
Op->getOperand(4));
27414 case Intrinsic::x86_enqcmd:
27415 case Intrinsic::x86_enqcmds: {
27422 case Intrinsic::x86_enqcmd:
27425 case Intrinsic::x86_enqcmds:
27435 case Intrinsic::x86_aesenc128kl:
27436 case Intrinsic::x86_aesdec128kl:
27437 case Intrinsic::x86_aesenc256kl:
27438 case Intrinsic::x86_aesdec256kl: {
27446 case Intrinsic::x86_aesenc128kl:
27449 case Intrinsic::x86_aesdec128kl:
27452 case Intrinsic::x86_aesenc256kl:
27455 case Intrinsic::x86_aesdec256kl:
27464 Opcode,
DL, VTs, {Chain,
Op.getOperand(2),
Op.getOperand(3)}, MemVT,
27469 {ZF, Operation.getValue(0), Operation.getValue(2)});
27471 case Intrinsic::x86_aesencwide128kl:
27472 case Intrinsic::x86_aesdecwide128kl:
27473 case Intrinsic::x86_aesencwide256kl:
27474 case Intrinsic::x86_aesdecwide256kl: {
27477 {MVT::i32, MVT::v2i64, MVT::v2i64, MVT::v2i64, MVT::v2i64, MVT::v2i64,
27478 MVT::v2i64, MVT::v2i64, MVT::v2i64, MVT::Other});
27484 case Intrinsic::x86_aesencwide128kl:
27487 case Intrinsic::x86_aesdecwide128kl:
27490 case Intrinsic::x86_aesencwide256kl:
27493 case Intrinsic::x86_aesdecwide256kl:
27503 {Chain,
Op.getOperand(2),
Op.getOperand(3),
Op.getOperand(4),
27504 Op.getOperand(5),
Op.getOperand(6),
Op.getOperand(7),
27505 Op.getOperand(8),
Op.getOperand(9),
Op.getOperand(10)},
27510 {ZF, Operation.getValue(1), Operation.getValue(2),
27511 Operation.getValue(3), Operation.getValue(4),
27512 Operation.getValue(5), Operation.getValue(6),
27513 Operation.getValue(7), Operation.getValue(8),
27514 Operation.getValue(9)});
27516 case Intrinsic::x86_testui: {
27525 case Intrinsic::x86_t2rpntlvwz0rs_internal:
27526 case Intrinsic::x86_t2rpntlvwz0rst1_internal:
27527 case Intrinsic::x86_t2rpntlvwz1rs_internal:
27528 case Intrinsic::x86_t2rpntlvwz1rst1_internal:
27529 case Intrinsic::x86_t2rpntlvwz0_internal:
27530 case Intrinsic::x86_t2rpntlvwz0t1_internal:
27531 case Intrinsic::x86_t2rpntlvwz1_internal:
27532 case Intrinsic::x86_t2rpntlvwz1t1_internal: {
27535 unsigned IntNo =
Op.getConstantOperandVal(1);
27540 case Intrinsic::x86_t2rpntlvwz0_internal:
27541 Opc = X86::PT2RPNTLVWZ0V;
27543 case Intrinsic::x86_t2rpntlvwz0t1_internal:
27544 Opc = X86::PT2RPNTLVWZ0T1V;
27546 case Intrinsic::x86_t2rpntlvwz1_internal:
27547 Opc = X86::PT2RPNTLVWZ1V;
27549 case Intrinsic::x86_t2rpntlvwz1t1_internal:
27550 Opc = X86::PT2RPNTLVWZ1T1V;
27552 case Intrinsic::x86_t2rpntlvwz0rs_internal:
27553 Opc = X86::PT2RPNTLVWZ0RSV;
27555 case Intrinsic::x86_t2rpntlvwz0rst1_internal:
27556 Opc = X86::PT2RPNTLVWZ0RST1V;
27558 case Intrinsic::x86_t2rpntlvwz1rs_internal:
27559 Opc = X86::PT2RPNTLVWZ1RSV;
27561 case Intrinsic::x86_t2rpntlvwz1rst1_internal:
27562 Opc = X86::PT2RPNTLVWZ1RST1V;
27586 case Intrinsic::x86_atomic_bts_rm:
27587 case Intrinsic::x86_atomic_btc_rm:
27588 case Intrinsic::x86_atomic_btr_rm: {
27590 MVT VT =
Op.getSimpleValueType();
27594 unsigned Opc = IntNo == Intrinsic::x86_atomic_bts_rm ?
X86ISD::LBTS_RM
27600 {Chain, Op1, Op2}, VT, MMO);
27605 case Intrinsic::x86_atomic_bts:
27606 case Intrinsic::x86_atomic_btc:
27607 case Intrinsic::x86_atomic_btr: {
27609 MVT VT =
Op.getSimpleValueType();
27613 unsigned Opc = IntNo == Intrinsic::x86_atomic_bts ?
X86ISD::LBTS
27620 {Chain, Op1, Op2, Size}, VT, MMO);
27629 case Intrinsic::x86_cmpccxadd32:
27630 case Intrinsic::x86_cmpccxadd64: {
27643 case Intrinsic::x86_aadd32:
27644 case Intrinsic::x86_aadd64:
27645 case Intrinsic::x86_aand32:
27646 case Intrinsic::x86_aand64:
27647 case Intrinsic::x86_aor32:
27648 case Intrinsic::x86_aor64:
27649 case Intrinsic::x86_axor32:
27650 case Intrinsic::x86_axor64: {
27660 case Intrinsic::x86_aadd32:
27661 case Intrinsic::x86_aadd64:
27664 case Intrinsic::x86_aand32:
27665 case Intrinsic::x86_aand64:
27668 case Intrinsic::x86_aor32:
27669 case Intrinsic::x86_aor64:
27672 case Intrinsic::x86_axor32:
27673 case Intrinsic::x86_axor64:
27679 {Chain, Op1, Op2}, VT, MMO);
27681 case Intrinsic::x86_atomic_add_cc:
27682 case Intrinsic::x86_atomic_sub_cc:
27683 case Intrinsic::x86_atomic_or_cc:
27684 case Intrinsic::x86_atomic_and_cc:
27685 case Intrinsic::x86_atomic_xor_cc: {
27696 case Intrinsic::x86_atomic_add_cc:
27699 case Intrinsic::x86_atomic_sub_cc:
27702 case Intrinsic::x86_atomic_or_cc:
27705 case Intrinsic::x86_atomic_and_cc:
27708 case Intrinsic::x86_atomic_xor_cc:
27715 {Chain, Op1, Op2}, VT, MMO);
27724 switch(IntrData->
Type) {
27737 SDValue(Result.getNode(), 1)};
27742 SDValue(Result.getNode(), 2));
27752 Scale, Chain, Subtarget);
27774 Scale, Chain, Subtarget);
27777 const APInt &HintVal =
Op.getConstantOperandAPInt(6);
27778 assert((HintVal == 2 || HintVal == 3) &&
27779 "Wrong prefetch hint in intrinsic: should be 2 or 3");
27780 unsigned Opcode = (HintVal == 2 ? IntrData->
Opc1 : IntrData->
Opc0);
27826 SDValue DataToTruncate =
Op.getOperand(3);
27831 assert(MemIntr &&
"Expected MemIntrinsicSDNode!");
27836 switch (TruncationOp) {
27880 unsigned Depth =
Op.getConstantOperandVal(0);
27885 SDValue FrameAddr = LowerFRAMEADDR(
Op, DAG);
27910 EVT VT =
Op.getValueType();
27918 int FrameAddrIndex = FuncInfo->
getFAIndex();
27919 if (!FrameAddrIndex) {
27921 unsigned SlotSize =
RegInfo->getSlotSize();
27923 SlotSize, 0,
false);
27929 unsigned FrameReg =
27932 unsigned Depth =
Op.getConstantOperandVal(0);
27933 assert(((FrameReg == X86::RBP && VT == MVT::i64) ||
27934 (FrameReg == X86::EBP && VT == MVT::i32)) &&
27935 "Invalid Frame Register!");
27950 .
Case(
"esp", X86::ESP)
27951 .
Case(
"rsp", X86::RSP)
27952 .
Case(
"ebp", X86::EBP)
27953 .
Case(
"rbp", X86::RBP)
27954 .
Case(
"r14", X86::R14)
27955 .
Case(
"r15", X86::R15)
27958 if (Reg == X86::EBP || Reg == X86::RBP) {
27959 if (!TFI.
hasFP(MF))
27961 " is allocatable: function has no frame pointer");
27966 assert((FrameReg == X86::EBP || FrameReg == X86::RBP) &&
27967 "Invalid Frame Register!");
27985 const Constant *PersonalityFn)
const {
27993 const Constant *PersonalityFn)
const {
27996 return X86::NoRegister;
28013 assert(((FrameReg == X86::RBP && PtrVT == MVT::i64) ||
28014 (FrameReg == X86::EBP && PtrVT == MVT::i32)) &&
28015 "Invalid Frame Register!");
28017 Register StoreAddrReg = (PtrVT == MVT::i64) ? X86::RCX : X86::ECX;
28024 Chain = DAG.
getCopyToReg(Chain, dl, StoreAddrReg, StoreAddr);
28039 if (!Subtarget.is64Bit()) {
28045 Op.getOperand(0),
Op.getOperand(1));
28052 Op.getOperand(0),
Op.getOperand(1));
28063 return Op.getOperand(0);
28074 const Value *TrmpAddr = cast<SrcValueSDNode>(
Op.getOperand(4))->getValue();
28077 if (Subtarget.is64Bit()) {
28081 const unsigned char JMP64r = 0xFF;
28082 const unsigned char MOV64ri = 0xB8;
28084 const unsigned char N86R10 =
TRI->getEncodingValue(X86::R10) & 0x7;
28085 const unsigned char N86R11 =
TRI->getEncodingValue(X86::R11) & 0x7;
28087 const unsigned char REX_WB = 0x40 | 0x08 | 0x01;
28090 unsigned OpCode = ((MOV64ri | N86R11) << 8) | REX_WB;
28102 OpCode = ((MOV64ri | N86R10) << 8) | REX_WB;
28114 OpCode = (JMP64r << 8) | REX_WB;
28120 unsigned char ModRM = N86R11 | (4 << 3) | (3 << 6);
28129 cast<Function>(cast<SrcValueSDNode>(
Op.getOperand(5))->getValue());
28140 NestReg = X86::ECX;
28146 if (!
Attrs.isEmpty() && !
Func->isVarArg()) {
28147 unsigned InRegCount = 0;
28151 E = FTy->param_end();
I != E; ++
I, ++
Idx)
28152 if (
Attrs.hasParamAttr(
Idx, Attribute::InReg)) {
28155 InRegCount += (
DL.getTypeSizeInBits(*
I) + 31) / 32;
28158 if (InRegCount > 2) {
28172 NestReg = X86::EAX;
28184 const unsigned char MOV32ri = 0xB8;
28185 const unsigned char N86Reg =
TRI->getEncodingValue(NestReg) & 0x7;
28195 const unsigned char JMP = 0xE9;
28236 MVT VT =
Op.getSimpleValueType();
28247 SDValue Ops[] = {Chain, StackSlot};
28249 DAG.
getVTList(MVT::Other), Ops, MVT::i16, MPI,
28279 SDValue Chain =
Op.getNode()->getOperand(0);
28291 SDValue Ops[] = {Chain, StackSlot};
28302 SDValue NewRM =
Op.getNode()->getOperand(1);
28304 if (
auto *CVal = dyn_cast<ConstantSDNode>(NewRM)) {
28349 SDValue OpsLD[] = {Chain, StackSlot};
28399 auto *
Node = cast<FPStateAccessSDNode>(
Op);
28400 EVT MemVT =
Node->getMemoryVT();
28405 if (Subtarget.hasX87()) {
28408 {Chain, Ptr}, MemVT, MMO);
28417 {Chain,
Ptr}, MemVT, MMO);
28441 if (Subtarget.hasX87())
28444 {Chain, Ptr}, MemVT, MMO);
28465 auto *
Node = cast<FPStateAccessSDNode>(
Op);
28466 EVT MemVT =
Node->getMemoryVT();
28476 SDValue Chain =
Op.getNode()->getOperand(0);
28486 FPEnvVals.
push_back(ConstantInt::get(ItemTy, X87CW));
28488 for (
unsigned I = 0;
I < 6; ++
I)
28493 FPEnvVals.
push_back(ConstantInt::get(ItemTy, 0x1F80));
28507 assert((Amt < 8) &&
"Shift/Rotation amount out of range");
28510 return 0x8040201008040201ULL;
28512 return ((0x0102040810204080ULL >> (Amt)) &
28513 (0x0101010101010101ULL * (0xFF >> (Amt))));
28515 return ((0x0102040810204080ULL << (Amt)) &
28516 (0x0101010101010101ULL * ((0xFF << (Amt)) & 0xFF)));
28519 (0x8080808080808080ULL >> (64 - (8 * Amt))));
28530 MVT VT,
unsigned Amt = 0) {
28532 (VT.
getSizeInBits() % 64) == 0 &&
"Illegal GFNI control type");
28536 uint64_t Bits = (Imm >> (
I % 64)) & 255;
28552 MVT VT =
Op.getSimpleValueType();
28556 assert((EltVT == MVT::i8 || EltVT == MVT::i16) &&
28557 "Unsupported element type");
28560 if (NumElems > 16 ||
28566 "Unsupported value type for operation");
28581 MVT VT =
Op.getSimpleValueType();
28587 const int LUT[16] = { 4, 3, 2, 2,
28593 for (
int i = 0; i < NumBytes; ++i)
28609 if (CurrVT.is512BitVector()) {
28627 while (CurrVT != VT) {
28628 int CurrScalarSizeInBits = CurrVT.getScalarSizeInBits();
28629 int CurrNumElts = CurrVT.getVectorNumElements();
28635 if (CurrVT.is512BitVector()) {
28663 MVT VT =
Op.getSimpleValueType();
28665 if (Subtarget.hasCDI() &&
28678 assert(Subtarget.
hasSSSE3() &&
"Expected SSSE3 support for PSHUFB");
28684 MVT VT =
Op.getSimpleValueType();
28688 unsigned Opc =
Op.getOpcode();
28693 Op =
Op.getOperand(0);
28694 if (VT == MVT::i8) {
28704 PassThru = DAG.
getConstant(NumBits + NumBits - 1, dl, OpVT);
28730 MVT VT =
Op.getSimpleValueType();
28737 "Only scalar CTTZ requires custom lowering");
28749 if (NonZeroSrc || !PassThru.
isUndef())
28761 MVT VT =
Op.getSimpleValueType();
28764 if (VT == MVT::i16 || VT == MVT::i32)
28767 if (VT == MVT::v32i16 || VT == MVT::v64i8)
28770 assert(
Op.getSimpleValueType().is256BitVector() &&
28771 Op.getSimpleValueType().isInteger() &&
28772 "Only handle AVX 256-bit vector integer operation");
28778 MVT VT =
Op.getSimpleValueType();
28780 unsigned Opcode =
Op.getOpcode();
28783 if (VT == MVT::v32i16 || VT == MVT::v64i8 ||
28785 assert(
Op.getSimpleValueType().isInteger() &&
28786 "Only handle AVX vector integer operation");
28792 EVT SetCCResultType =
28804 if (
C &&
C->getAPIntValue().isSignMask()) {
28817 if (SetCCResultType == VT &&
28825 (!VT.
isVector() || VT == MVT::v2i64)) {
28832 SDValue SumDiff = Result.getValue(0);
28833 SDValue Overflow = Result.getValue(1);
28838 Result = DAG.
getSelect(
DL, VT, SumNeg, SatMax, SatMin);
28839 return DAG.
getSelect(
DL, VT, Overflow, Result, SumDiff);
28848 MVT VT =
Op.getSimpleValueType();
28851 if (VT == MVT::i16 || VT == MVT::i32 || VT == MVT::i64) {
28863 if ((VT == MVT::v2i64 || VT == MVT::v4i64) && Subtarget.
hasSSE41()) {
28871 "Only handle AVX 256-bit vector integer operation");
28875 if ((VT == MVT::v32i16 || VT == MVT::v64i8) && !Subtarget.hasBWI())
28884 MVT VT =
Op.getSimpleValueType();
28891 if (VT == MVT::v32i16 || VT == MVT::v64i8)
28900 MVT VT =
Op.getSimpleValueType();
28907 if (VT == MVT::v32i16 || VT == MVT::v64i8)
28917 EVT VT =
Op.getValueType();
28925 if (Subtarget.hasAVX10_2() && TLI.
isTypeLegal(VT)) {
28929 else if (VT == MVT::f16 || VT == MVT::f32 || VT == MVT::f64)
28935 return DAG.
getNode(Opc,
DL, VT,
X,
Y, Imm,
Op->getFlags());
28941 APInt OppositeZero = PreferredZero;
28973 if (
auto *CstOp = dyn_cast<ConstantFPSDNode>(
Op))
28974 return CstOp->getValueAPF().bitcastToAPInt() == Zero;
28975 if (
auto *CstOp = dyn_cast<ConstantSDNode>(
Op))
28976 return CstOp->getAPIntValue() == Zero;
28979 for (
const SDValue &OpVal :
Op->op_values()) {
28980 if (OpVal.isUndef())
28982 auto *CstOp = dyn_cast<ConstantFPSDNode>(OpVal);
28985 if (!CstOp->getValueAPF().isZero())
28987 if (CstOp->getValueAPF().bitcastToAPInt() != Zero)
28998 Op->getFlags().hasNoSignedZeros() ||
29002 if (IgnoreSignedZero || MatchesZero(
Y, PreferredZero) ||
29003 MatchesZero(
X, OppositeZero)) {
29007 }
else if (MatchesZero(
X, PreferredZero) || MatchesZero(
Y, OppositeZero)) {
29010 }
else if (!VT.
isVector() && (VT == MVT::f16 || Subtarget.hasDQI()) &&
29011 (
Op->getFlags().hasNoNaNs() || IsXNeverNaN || IsYNeverNaN)) {
29030 return DAG.
getNode(MinMaxOp,
DL, VT, NewX, NewY,
Op->getFlags());
29033 if (Subtarget.is64Bit() || VT != MVT::f64) {
29061 Op->getFlags().hasNoNaNs() || (IsXNeverNaN && IsYNeverNaN);
29082 MVT VT =
Op.getSimpleValueType();
29089 if ((VT == MVT::v32i16 || VT == MVT::v64i8) && !Subtarget.
useBWIRegs())
29101 if (VT.
bitsGE(MVT::i32)) {
29136 MVT VT =
Op.getSimpleValueType();
29142 if ((VT == MVT::v32i16 || VT == MVT::v64i8) && !Subtarget.hasBWI())
29150 if (VT == MVT::v16i8 || VT == MVT::v32i8 || VT == MVT::v64i8) {
29153 unsigned NumEltsPerLane = NumElts / NumLanes;
29155 if ((VT == MVT::v16i8 && Subtarget.
hasInt256()) ||
29170 bool BIsBuildVector = isa<BuildVectorSDNode>(
B);
29171 bool IsLoLaneAllZeroOrUndef = BIsBuildVector;
29172 bool IsHiLaneAllZeroOrUndef = BIsBuildVector;
29173 if (BIsBuildVector) {
29175 if ((
Idx % NumEltsPerLane) >= (NumEltsPerLane / 2))
29181 if (!(IsLoLaneAllZeroOrUndef || IsHiLaneAllZeroOrUndef)) {
29206 for (
unsigned i = 0; i != NumElts; i += 16) {
29207 for (
unsigned j = 0; j != 8; ++j) {
29225 return getPack(DAG, Subtarget, dl, VT, RLo, RHi);
29229 if (VT == MVT::v4i32) {
29231 "Should not custom lower when pmulld is available!");
29234 static const int UnpackMask[] = {1, 1, 3, 3};
29252 static const int ShufMask[] = { 0, 4, 2, 6 };
29256 assert((VT == MVT::v2i64 || VT == MVT::v4i64 || VT == MVT::v8i64) &&
29257 "Only know how to lower V2I64/V4I64/V8I64 multiply");
29258 assert(!Subtarget.hasDQI() &&
"DQI should use MULLQ");
29284 if (!ALoIsZero && !BLoIsZero)
29288 if (!ALoIsZero && !BHiIsZero) {
29294 if (!AHiIsZero && !BLoIsZero) {
29306 MVT VT,
bool IsSigned,
29340 for (
unsigned i = 0; i != NumElts; i += 16) {
29341 for (
unsigned j = 0; j != 8; ++j) {
29342 SDValue LoOp =
B.getOperand(i + j);
29343 SDValue HiOp =
B.getOperand(i + j + 8);
29364 }
else if (IsSigned) {
29379 *
Low =
getPack(DAG, Subtarget, dl, VT, RLo, RHi);
29381 return getPack(DAG, Subtarget, dl, VT, RLo, RHi,
true);
29387 MVT VT =
Op.getSimpleValueType();
29397 if ((VT == MVT::v32i16 || VT == MVT::v64i8) && !Subtarget.hasBWI())
29400 if (VT == MVT::v4i32 || VT == MVT::v8i32 || VT == MVT::v16i32) {
29402 (VT == MVT::v8i32 && Subtarget.
hasInt256()) ||
29403 (VT == MVT::v16i32 && Subtarget.
hasAVX512()));
29417 const int Mask[] = {1, -1, 3, -1, 5, -1, 7, -1,
29418 9, -1, 11, -1, 13, -1, 15, -1};
29444 for (
int i = 0; i != (int)NumElts; ++i)
29445 ShufMask[i] = (i / 2) * 2 + ((i % 2) * NumElts) + 1;
29451 if (IsSigned && !Subtarget.
hasSSE41()) {
29466 assert((VT == MVT::v16i8 || (VT == MVT::v32i8 && Subtarget.
hasInt256()) ||
29467 (VT == MVT::v64i8 && Subtarget.hasBWI())) &&
29468 "Unsupported vector type");
29476 if ((VT == MVT::v16i8 && Subtarget.
hasInt256()) ||
29493 MVT VT =
Op.getSimpleValueType();
29503 EVT OvfVT =
Op->getValueType(1);
29505 if ((VT == MVT::v32i8 && !Subtarget.
hasInt256()) ||
29506 (VT == MVT::v64i8 && !Subtarget.hasBWI())) {
29515 EVT LoOvfVT, HiOvfVT;
29536 if ((VT == MVT::v16i8 && Subtarget.
hasInt256()) ||
29561 if (!Subtarget.hasBWI()) {
29582 if (!Subtarget.hasBWI()) {
29624 EVT VT =
Op.getValueType();
29626 "Unexpected return type for lowering");
29628 if (isa<ConstantSDNode>(
Op->getOperand(1))) {
29636 switch (
Op->getOpcode()) {
29652 EVT ArgVT =
Op->getOperand(i).getValueType();
29654 "Unexpected argument type for lowering");
29656 int SPFI = cast<FrameIndexSDNode>(
StackPtr.getNode())->getIndex();
29661 DAG.
getStore(InChain, dl,
Op->getOperand(i), StackPtr, MPI,
Align(16));
29663 Entry.IsSExt =
false;
29664 Entry.IsZExt =
false;
29665 Args.push_back(Entry);
29672 CLI.setDebugLoc(dl)
29676 static_cast<EVT>(MVT::v2i64).getTypeForEVT(*DAG.
getContext()), Callee,
29690 EVT VT =
Op.getValueType();
29691 bool IsStrict =
Op->isStrictFPOpcode();
29693 SDValue Arg =
Op.getOperand(IsStrict ? 1 : 0);
29697 "Unexpected return type for lowering");
29705 assert(LC != RTLIB::UNKNOWN_LIBCALL &&
"Unexpected request for libcall!");
29708 MakeLibCallOptions CallOptions;
29714 std::tie(Result, Chain) =
29715 makeLibCall(DAG, LC, MVT::v2i64, Arg, CallOptions, dl, Chain);
29723 EVT VT =
Op.getValueType();
29724 bool IsStrict =
Op->isStrictFPOpcode();
29726 SDValue Arg =
Op.getOperand(IsStrict ? 1 : 0);
29730 "Unexpected argument type for lowering");
29738 assert(LC != RTLIB::UNKNOWN_LIBCALL &&
"Unexpected request for libcall!");
29741 MakeLibCallOptions CallOptions;
29746 int SPFI = cast<FrameIndexSDNode>(
StackPtr.getNode())->getIndex();
29749 Chain = DAG.
getStore(Chain, dl, Arg, StackPtr, MPI,
Align(16));
29752 std::tie(Result, Chain) =
29753 makeLibCall(DAG, LC, VT, StackPtr, CallOptions, dl, Chain);
29762 "Unexpected shift opcode");
29780 bool AShift = LShift && (Subtarget.
hasAVX512() ||
29781 (VT != MVT::v2i64 && VT != MVT::v4i64));
29782 return (Opcode ==
ISD::SRA) ? AShift : LShift;
29798 "Unexpected shift opcode");
29818 bool AShift = LShift && VT != MVT::v2i64 && VT != MVT::v4i64;
29819 return (Opcode ==
ISD::SRA) ? AShift : LShift;
29824 MVT VT =
Op.getSimpleValueType();
29831 auto ArithmeticShiftRight64 = [&](
uint64_t ShiftAmt) {
29832 assert((VT == MVT::v2i64 || VT == MVT::v4i64) &&
"Unexpected SRA type");
29837 if (ShiftAmt == 63 && Subtarget.
hasSSE42()) {
29839 "Unsupported PCMPGT op");
29843 if (ShiftAmt >= 32) {
29848 ShiftAmt - 32, DAG);
29849 if (VT == MVT::v2i64)
29851 if (VT == MVT::v4i64)
29853 {9, 1, 11, 3, 13, 5, 15, 7});
29861 if (VT == MVT::v2i64)
29863 if (VT == MVT::v4i64)
29865 {8, 1, 10, 3, 12, 5, 14, 7});
29871 APInt APIntShiftAmt;
29876 if (APIntShiftAmt.
uge(EltSizeInBits))
29885 if (
Op.getOpcode() ==
ISD::SHL && ShiftAmt == 1) {
29899 if (((!Subtarget.hasXOP() && VT == MVT::v2i64) ||
29900 (Subtarget.
hasInt256() && VT == MVT::v4i64)) &&
29902 return ArithmeticShiftRight64(ShiftAmt);
29909 Mask = DAG.
getNode(
Op.getOpcode(), dl, VT, Mask, Amt);
29913 if (VT == MVT::v16i8 || (Subtarget.
hasInt256() && VT == MVT::v32i8) ||
29914 (Subtarget.hasBWI() && VT == MVT::v64i8)) {
29919 if (
Op.getOpcode() ==
ISD::SHL && ShiftAmt == 1) {
29930 if (
Op.getOpcode() ==
ISD::SRA && ShiftAmt == 7) {
29933 assert(VT == MVT::v64i8 &&
"Unexpected element type!");
29941 if (VT == MVT::v16i8 && Subtarget.hasXOP())
29944 if (Subtarget.hasGFNI()) {
29985 MVT VT =
Op.getSimpleValueType();
29989 unsigned Opcode =
Op.getOpcode();
29992 int BaseShAmtIdx = -1;
30001 VT == MVT::v64i8) &&
30002 !Subtarget.hasXOP()) {
30013 BaseShAmt, BaseShAmtIdx, Subtarget, DAG);
30023 BaseShAmtIdx, Subtarget, DAG);
30033 BaseShAmtIdx, Subtarget, DAG);
30051 if (!(VT == MVT::v8i16 || VT == MVT::v4i32 ||
30052 (Subtarget.
hasInt256() && VT == MVT::v16i16) ||
30053 (Subtarget.
hasAVX512() && VT == MVT::v32i16) ||
30054 (!Subtarget.
hasAVX512() && VT == MVT::v16i8) ||
30055 (Subtarget.
hasInt256() && VT == MVT::v32i8) ||
30056 (Subtarget.hasBWI() && VT == MVT::v64i8)))
30066 APInt One(SVTBits, 1);
30068 for (
unsigned I = 0;
I != NumElems; ++
I) {
30069 if (UndefElts[
I] || EltBits[
I].uge(SVTBits))
30071 uint64_t ShAmt = EltBits[
I].getZExtValue();
30079 if (VT == MVT::v4i32) {
30088 if (VT == MVT::v8i16 && !Subtarget.
hasAVX2()) {
30104 MVT VT =
Op.getSimpleValueType();
30112 unsigned Opc =
Op.getOpcode();
30116 assert(VT.
isVector() &&
"Custom lowering only for vector shifts!");
30117 assert(Subtarget.
hasSSE2() &&
"Only custom lower when we have SSE2!");
30131 if (((VT == MVT::v2i64 && !Subtarget.hasXOP()) ||
30132 (VT == MVT::v4i64 && Subtarget.
hasInt256())) &&
30144 if (Subtarget.hasXOP() && (VT == MVT::v2i64 || VT == MVT::v4i32 ||
30145 VT == MVT::v8i16 || VT == MVT::v16i8)) {
30156 if (VT == MVT::v2i64 && Opc !=
ISD::SRA) {
30168 for (
unsigned I = 0;
I != NumElts; ++
I) {
30170 if (
A.isUndef() ||
A->getAsAPIntVal().uge(EltSizeInBits))
30172 unsigned CstAmt =
A->getAsAPIntVal().getZExtValue();
30173 if (UniqueCstAmt.
count(CstAmt)) {
30174 UniqueCstAmt[CstAmt].setBit(
I);
30179 assert(!UniqueCstAmt.
empty() &&
"Illegal constant shift amounts");
30192 if (UniqueCstAmt.
size() == 2 &&
30193 (VT == MVT::v8i16 || VT == MVT::v4i32 ||
30194 (VT == MVT::v16i16 && Subtarget.
hasInt256()))) {
30195 unsigned AmtA = UniqueCstAmt.
begin()->first;
30196 unsigned AmtB = std::next(UniqueCstAmt.
begin())->first;
30197 const APInt &MaskA = UniqueCstAmt.
begin()->second;
30198 const APInt &MaskB = std::next(UniqueCstAmt.
begin())->second;
30200 for (
unsigned I = 0;
I != NumElts; ++
I) {
30202 ShuffleMask[
I] =
I;
30204 ShuffleMask[
I] =
I + NumElts;
30208 if ((VT != MVT::v16i16 ||
30223 (VT == MVT::v16i8 || VT == MVT::v32i8 || VT == MVT::v64i8 ||
30224 VT == MVT::v8i16 || VT == MVT::v16i16 || VT == MVT::v32i16) &&
30225 !Subtarget.hasXOP()) {
30235 int WideEltSizeInBits = EltSizeInBits;
30236 while (WideEltSizeInBits < 32) {
30239 if (WideEltSizeInBits >= 16 && !Subtarget.
hasAVX2()) {
30242 TmpAmtWideElts.
resize(AmtWideElts.
size() / 2);
30243 bool SameShifts =
true;
30244 for (
unsigned SrcI = 0, E = AmtWideElts.
size(); SrcI != E; SrcI += 2) {
30245 unsigned DstI = SrcI / 2;
30247 if (AmtWideElts[SrcI].
isUndef() && AmtWideElts[SrcI + 1].
isUndef()) {
30248 TmpAmtWideElts[DstI] = AmtWideElts[SrcI];
30253 if (AmtWideElts[SrcI].
isUndef()) {
30254 TmpAmtWideElts[DstI] = AmtWideElts[SrcI + 1];
30259 if (AmtWideElts[SrcI + 1].
isUndef()) {
30260 TmpAmtWideElts[DstI] = AmtWideElts[SrcI];
30264 if (AmtWideElts[SrcI].
getNode()->getAsAPIntVal() ==
30265 AmtWideElts[SrcI + 1].
getNode()->getAsAPIntVal()) {
30266 TmpAmtWideElts[DstI] = AmtWideElts[SrcI];
30271 SameShifts =
false;
30277 WideEltSizeInBits *= 2;
30278 std::swap(TmpAmtWideElts, AmtWideElts);
30280 APInt APIntShiftAmt;
30284 if (WideEltSizeInBits * AmtWideElts.
size() >= 512 &&
30285 WideEltSizeInBits < 32 && !Subtarget.hasBWI()) {
30286 Profitable =
false;
30290 if (WideEltSizeInBits * AmtWideElts.
size() >= 512 && IsConstantSplat) {
30291 Profitable =
false;
30296 if (EltSizeInBits == 8 && Subtarget.hasGFNI()) {
30297 Profitable =
false;
30303 if (WideEltSizeInBits * AmtWideElts.
size() >= 256 &&
30304 (WideEltSizeInBits < 32 || IsConstantSplat) && !Subtarget.
hasAVX2()) {
30305 Profitable =
false;
30307 unsigned WideNumElts = AmtWideElts.
size();
30309 if (Profitable && WideNumElts != NumElts) {
30320 SDValue ShiftedR = DAG.
getNode(LogicalOpc, dl, WideVT, RWide, AmtWide);
30332 SDValue Mask = DAG.
getNode(LogicalOpc, dl, VT, SplatFullMask, Amt);
30367 SDValue SignBitMask = DAG.
getNode(LogicalOpc, dl, VT, SplatHighBit, Amt);
30372 return Subtraction;
30379 if (Opc ==
ISD::SHL && !(VT == MVT::v32i8 && (Subtarget.hasXOP() ||
30386 if (Opc ==
ISD::SRL && ConstantAmt &&
30387 (VT == MVT::v8i16 || (VT == MVT::v16i16 && Subtarget.
hasInt256()))) {
30394 return DAG.
getSelect(dl, VT, ZAmt, R, Res);
30402 if (Opc ==
ISD::SRA && ConstantAmt &&
30403 (VT == MVT::v8i16 || (VT == MVT::v16i16 && Subtarget.
hasInt256())) &&
30404 ((Subtarget.
hasSSE41() && !Subtarget.hasXOP() &&
30417 Res = DAG.
getSelect(dl, VT, Amt0, R, Res);
30418 return DAG.
getSelect(dl, VT, Amt1, Sra1, Res);
30427 if (VT == MVT::v4i32) {
30428 SDValue Amt0, Amt1, Amt2, Amt3;
30439 if (Subtarget.
hasAVX()) {
30448 {4, 5, 6, 7, -1, -1, -1, -1});
30458 unsigned ShOpc = ConstantAmt ? Opc : X86OpcV;
30478 if ((VT == MVT::v16i8 && Subtarget.
hasSSSE3()) ||
30479 (VT == MVT::v32i8 && Subtarget.
hasInt256()) ||
30480 (VT == MVT::v64i8 && Subtarget.hasBWI())) {
30482 unsigned NumEltsPerLane = NumElts / NumLanes;
30484 for (
unsigned Lane = 0; Lane != NumLanes; ++Lane) {
30485 unsigned LoElt = Lane * NumEltsPerLane;
30491 for (
unsigned I = 0;
I != 8; ++
I) {
30493 LUT.push_back(LaneSplat.
shl(
I));
30501 if (
LUT.size() == NumElts) {
30512 if ((Subtarget.
hasInt256() && VT == MVT::v8i16) ||
30516 (Subtarget.hasBWI() && Subtarget.hasVLX() && VT == MVT::v16i8)) {
30517 assert((!Subtarget.hasBWI() || VT == MVT::v32i8 || VT == MVT::v16i8) &&
30518 "Unexpected vector type");
30519 MVT EvtSVT = Subtarget.hasBWI() ? MVT::i16 : MVT::i32;
30522 R = DAG.
getNode(ExtOpc, dl, ExtVT, R);
30525 DAG.
getNode(Opc, dl, ExtVT, R, Amt));
30531 (VT == MVT::v16i8 || (VT == MVT::v32i8 && Subtarget.
hasInt256()) ||
30532 (VT == MVT::v64i8 && Subtarget.hasBWI())) &&
30533 !Subtarget.hasXOP()) {
30544 "Constant build vector expected");
30546 if (VT == MVT::v16i8 && Subtarget.
hasInt256()) {
30555 for (
unsigned i = 0; i != NumElts; i += 16) {
30556 for (
int j = 0; j != 8; ++j) {
30567 LoR = DAG.
getNode(X86OpcI, dl, VT16, LoR, Cst8);
30568 HiR = DAG.
getNode(X86OpcI, dl, VT16, HiR, Cst8);
30576 if (VT == MVT::v16i8 ||
30577 (VT == MVT::v32i8 && Subtarget.
hasInt256() && !Subtarget.hasXOP()) ||
30578 (VT == MVT::v64i8 && Subtarget.hasBWI())) {
30593 }
else if (Subtarget.
hasSSE41()) {
30620 R = SignBitSelect(VT, Amt, M, R);
30627 R = SignBitSelect(VT, Amt, M, R);
30634 R = SignBitSelect(VT, Amt, M, R);
30654 RLo = SignBitSelect(ExtVT, ALo, MLo, RLo);
30655 RHi = SignBitSelect(ExtVT, AHi, MHi, RHi);
30664 RLo = SignBitSelect(ExtVT, ALo, MLo, RLo);
30665 RHi = SignBitSelect(ExtVT, AHi, MHi, RHi);
30674 RLo = SignBitSelect(ExtVT, ALo, MLo, RLo);
30675 RHi = SignBitSelect(ExtVT, AHi, MHi, RHi);
30685 if (Subtarget.
hasInt256() && !Subtarget.hasXOP() && VT == MVT::v16i16) {
30686 MVT ExtVT = MVT::v8i32;
30703 if (VT == MVT::v8i16) {
30706 bool UseSSE41 = Subtarget.
hasSSE41() &&
30742 R = SignBitSelect(Amt, M, R);
30749 R = SignBitSelect(Amt, M, R);
30756 R = SignBitSelect(Amt, M, R);
30763 R = SignBitSelect(Amt, M, R);
30771 if (VT == MVT::v32i16 || VT == MVT::v64i8)
30779 MVT VT =
Op.getSimpleValueType();
30781 "Unexpected funnel shift opcode!");
30791 APInt APIntShiftAmt;
30795 if (Subtarget.hasVBMI2() && EltSizeInBits > 8) {
30800 uint64_t ShiftAmt = APIntShiftAmt.
urem(EltSizeInBits);
30803 {Op0, Op1, Imm}, DAG, Subtarget);
30806 {Op0, Op1, Amt}, DAG, Subtarget);
30808 assert((VT == MVT::v16i8 || VT == MVT::v32i8 || VT == MVT::v64i8 ||
30809 VT == MVT::v8i16 || VT == MVT::v16i16 || VT == MVT::v32i16 ||
30810 VT == MVT::v4i32 || VT == MVT::v8i32 || VT == MVT::v16i32) &&
30811 "Unexpected funnel shift type!");
30819 uint64_t ShiftAmt = APIntShiftAmt.
urem(EltSizeInBits);
30820 uint64_t ShXAmt = IsFSHR ? (EltSizeInBits - ShiftAmt) : ShiftAmt;
30821 uint64_t ShYAmt = IsFSHR ? ShiftAmt : (EltSizeInBits - ShiftAmt);
30822 assert((ShXAmt + ShYAmt) == EltSizeInBits &&
"Illegal funnel shift");
30825 if (EltSizeInBits == 8 &&
30826 (Subtarget.hasXOP() ||
30859 if (IsCst && EltSizeInBits == 16)
30868 if ((VT.
is256BitVector() && ((Subtarget.hasXOP() && EltSizeInBits < 16) ||
30871 EltSizeInBits < 32)) {
30873 Op = DAG.
getNode(
Op.getOpcode(),
DL, VT, Op0, Op1, AmtMod);
30879 int ScalarAmtIdx = -1;
30882 if (EltSizeInBits == 16)
30888 ScalarAmtIdx, Subtarget, DAG);
30890 ScalarAmtIdx, Subtarget, DAG);
30896 std::min<unsigned>(EltSizeInBits * 2, Subtarget.hasBWI() ? 16 : 32));
30912 EltSizeInBits, DAG);
30914 Res = DAG.
getNode(ShiftOpc,
DL, WideVT, Res, AmtMod);
30917 EltSizeInBits, DAG);
30922 if (((IsCst || !Subtarget.
hasAVX512()) && !IsFSHR && EltSizeInBits <= 16) ||
30938 (VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32 || VT == MVT::i64) &&
30939 "Unexpected funnel shift type!");
30943 bool ExpandFunnel = !OptForSize && Subtarget.isSHLDSlow();
30947 if ((VT == MVT::i8 || (ExpandFunnel && VT == MVT::i16)) &&
30948 !isa<ConstantSDNode>(Amt)) {
30965 if (VT == MVT::i8 || ExpandFunnel)
30969 if (VT == MVT::i16) {
30973 return DAG.
getNode(FSHOp,
DL, VT, Op0, Op1, Amt);
30981 MVT VT =
Op.getSimpleValueType();
30982 assert(VT.
isVector() &&
"Custom lowering only for vector rotates!");
30987 unsigned Opcode =
Op.getOpcode();
30993 APInt CstSplatValue;
30997 if (IsCstSplat && CstSplatValue.
urem(EltSizeInBits) == 0)
31001 if ((Subtarget.hasVLX() ||
31002 (Subtarget.
hasAVX512() && Subtarget.hasEVEX512())) &&
31003 32 <= EltSizeInBits) {
31017 if (Subtarget.hasVBMI2() && 16 == EltSizeInBits) {
31019 return DAG.
getNode(FunnelOpc,
DL, VT, R, R, Amt);
31031 if (Subtarget.hasXOP())
31037 if (IsCstSplat && Subtarget.hasGFNI() && VT.
getScalarType() == MVT::i8 &&
31052 if (Subtarget.hasXOP()) {
31053 assert(IsROTL &&
"Only ROTL expected");
31072 uint64_t ShlAmt = IsROTL ? RotAmt : (EltSizeInBits - RotAmt);
31073 uint64_t SrlAmt = IsROTL ? (EltSizeInBits - RotAmt) : RotAmt;
31086 (VT == MVT::v4i32 || VT == MVT::v8i16 || VT == MVT::v16i8 ||
31087 ((VT == MVT::v8i32 || VT == MVT::v16i16 || VT == MVT::v32i8) &&
31089 ((VT == MVT::v32i16 || VT == MVT::v64i8) && Subtarget.
useBWIRegs())) &&
31090 "Only vXi32/vXi16/vXi8 vector rotates supported");
31101 if (EltSizeInBits == 8 || EltSizeInBits == 16 || EltSizeInBits == 32) {
31102 int BaseRotAmtIdx = -1;
31104 if (EltSizeInBits == 16 && Subtarget.
hasSSE41()) {
31106 return DAG.
getNode(FunnelOpc,
DL, VT, R, R, Amt);
31112 BaseRotAmtIdx, Subtarget, DAG);
31114 BaseRotAmtIdx, Subtarget, DAG);
31126 if (!(ConstantAmt && EltSizeInBits != 8) &&
31141 if (EltSizeInBits == 8) {
31159 R = DAG.
getNode(ShiftOpc,
DL, WideVT, R, Amt);
31206 R = SignBitSelect(VT, Amt, M, R);
31216 R = SignBitSelect(VT, Amt, M, R);
31226 return SignBitSelect(VT, Amt, M, R);
31235 if (IsSplatAmt || LegalVarShifts || (Subtarget.
hasAVX2() && !ConstantAmt)) {
31253 assert(IsROTL &&
"Only ROTL supported");
31262 if (EltSizeInBits == 16) {
31271 assert(VT == MVT::v4i32 &&
"Only v4i32 vector rotate expected");
31272 static const int OddMask[] = {1, 1, 3, 3};
31294bool X86TargetLowering::needsCmpXchgNb(
Type *MemType)
const {
31299 if (OpWidth == 128)
31306X86TargetLowering::shouldExpandAtomicStoreInIR(
StoreInst *SI)
const {
31307 Type *MemType =
SI->getValueOperand()->getType();
31309 if (!
SI->getFunction()->hasFnAttribute(Attribute::NoImplicitFloat) &&
31310 !Subtarget.useSoftFloat()) {
31312 (Subtarget.
hasSSE1() || Subtarget.hasX87()))
31326X86TargetLowering::shouldExpandAtomicLoadInIR(
LoadInst *LI)
const {
31330 !Subtarget.useSoftFloat()) {
31335 (Subtarget.
hasSSE1() || Subtarget.hasX87()))
31359 if (
auto *
C = dyn_cast<ConstantInt>(V)) {
31369 if (
auto *
I = dyn_cast<Instruction>(V)) {
31376 I = dyn_cast<Instruction>(PeekI);
31386 if (
I->getOpcode() == Instruction::Shl) {
31395 auto *ShiftVal = dyn_cast<ConstantInt>(
I->getOperand(0));
31398 if (ShiftVal->equalsInt(1))
31404 Value *BitV =
I->getOperand(1);
31408 uint64_t ShiftMask =
I->getType()->getPrimitiveSizeInBits() - 1;
31412 return {BitV, BTK};
31419X86TargetLowering::shouldExpandLogicAtomicRMWInIR(
AtomicRMWInst *AI)
const {
31442 I->getOpcode() != Instruction::And ||
31447 unsigned OtherIdx =
I->getOperand(0) == AI ? 1 : 0;
31450 if (AI ==
I->getOperand(OtherIdx))
31456 auto *C2 = dyn_cast<ConstantInt>(
I->getOperand(OtherIdx));
31461 return ~C1->getValue() == C2->getValue()
31475 assert(BitChange.first !=
nullptr && BitTested.first !=
nullptr);
31478 if (BitChange.first != BitTested.first)
31494void X86TargetLowering::emitBitTestAtomicRMWIntrinsic(
AtomicRMWInst *AI)
const {
31496 Builder.CollectMetadataToCopy(AI, {LLVMContext::MD_pcsections});
31503 IID_C = Intrinsic::x86_atomic_bts;
31504 IID_I = Intrinsic::x86_atomic_bts_rm;
31507 IID_C = Intrinsic::x86_atomic_btc;
31508 IID_I = Intrinsic::x86_atomic_btc_rm;
31511 IID_C = Intrinsic::x86_atomic_btr;
31512 IID_I = Intrinsic::x86_atomic_btr_rm;
31521 assert(BitTested.first !=
nullptr);
31524 auto *
C = cast<ConstantInt>(
I->getOperand(
I->getOperand(0) == AI ? 1 : 0));
31528 {Addr, Builder.getInt8(Imm)});
31537 unsigned ShiftBits =
SI->getType()->getPrimitiveSizeInBits();
31539 Builder.CreateAnd(SI, Builder.getIntN(ShiftBits, ShiftBits - 1));
31546 Result = Builder.CreateIntrinsic(IID_I, AI->
getType(), {Addr, BitPos});
31551 for (
auto It =
I->user_begin(); It !=
I->user_end(); ++It) {
31552 if (
auto *ICmp = dyn_cast<ICmpInst>(*It)) {
31553 if (ICmp->isEquality()) {
31554 auto *C0 = dyn_cast<ConstantInt>(ICmp->getOperand(0));
31555 auto *C1 = dyn_cast<ConstantInt>(ICmp->getOperand(1));
31557 assert(C0 ==
nullptr || C1 ==
nullptr);
31558 if ((C0 ? C0 : C1)->
isZero())
31563 Result = Builder.CreateShl(Result, BitPos);
31568 I->replaceAllUsesWith(Result);
31569 I->eraseFromParent();
31586 if (
match(
I->user_back(),
31589 if (
match(
I->user_back(),
31599 if (
match(
I->user_back(),
31602 if (
match(
I->user_back(),
31615 if (
match(
I->user_back(),
31624 if (
match(
I->user_back(),
31627 if (
match(
I->user_back(),
31637void X86TargetLowering::emitCmpArithAtomicRMWIntrinsic(
31640 Builder.CollectMetadataToCopy(AI, {LLVMContext::MD_pcsections});
31647 ICI = cast<ICmpInst>(TempI->
user_back());
31672 IID = Intrinsic::x86_atomic_add_cc;
31675 IID = Intrinsic::x86_atomic_sub_cc;
31678 IID = Intrinsic::x86_atomic_or_cc;
31681 IID = Intrinsic::x86_atomic_and_cc;
31684 IID = Intrinsic::x86_atomic_xor_cc;
31691 {Addr, AI->getValOperand(), Builder.getInt32((unsigned)CC)});
31701X86TargetLowering::shouldExpandAtomicRMWInIR(
AtomicRMWInst *AI)
const {
31702 unsigned NativeWidth = Subtarget.is64Bit() ? 64 : 32;
31727 return shouldExpandLogicAtomicRMWInIR(AI);
31749X86TargetLowering::lowerIdempotentRMWIntoFencedLoad(
AtomicRMWInst *AI)
const {
31750 unsigned NativeWidth = Subtarget.is64Bit() ? 64 : 32;
31767 Builder.CollectMetadataToCopy(AI, {LLVMContext::MD_pcsections});
31768 Module *
M = Builder.GetInsertBlock()->getParent()->getParent();
31804 Builder.CreateCall(MFence, {});
31807 LoadInst *Loaded = Builder.CreateAlignedLoad(
31850 if (Subtarget.is64Bit()) {
31905 MVT T =
Op.getSimpleValueType();
31909 switch(
T.SimpleTy) {
31911 case MVT::i8: Reg = X86::AL;
size = 1;
break;
31912 case MVT::i16: Reg = X86::AX;
size = 2;
break;
31913 case MVT::i32: Reg = X86::EAX;
size = 4;
break;
31915 assert(Subtarget.is64Bit() &&
"Node not type legal!");
31916 Reg = X86::RAX;
size = 8;
31944 MVT InVT = V.getSimpleValueType();
31946 if (InVT == MVT::v64i8) {
31957 if (InVT == MVT::v32i8 && !Subtarget.
hasInt256()) {
31973 MVT SrcVT = Src.getSimpleValueType();
31974 MVT DstVT =
Op.getSimpleValueType();
31978 if (SrcVT == MVT::i64 && DstVT == MVT::v64i1) {
31979 assert(!Subtarget.is64Bit() &&
"Expected 32-bit mode");
31980 assert(Subtarget.hasBWI() &&
"Expected BWI target");
31990 if ((SrcVT == MVT::v16i1 || SrcVT == MVT::v32i1) && DstVT.
isScalarInteger()) {
31991 assert(!Subtarget.
hasAVX512() &&
"Should use K-registers with AVX512");
31992 MVT SExtVT = SrcVT == MVT::v16i1 ? MVT::v16i8 : MVT::v32i8;
31999 assert((SrcVT == MVT::v2i32 || SrcVT == MVT::v4i16 || SrcVT == MVT::v8i8 ||
32000 SrcVT == MVT::i64) &&
"Unexpected VT!");
32003 if (!(DstVT == MVT::f64 && SrcVT == MVT::i64) &&
32004 !(DstVT == MVT::x86mmx && SrcVT.
isVector()))
32017 assert(SrcVT == MVT::i64 && !Subtarget.is64Bit() &&
32018 "Unexpected source type in LowerBITCAST");
32022 MVT V2X64VT = DstVT == MVT::f64 ? MVT::v2f64 : MVT::v2i64;
32025 if (DstVT == MVT::x86mmx)
32042 MVT ByteVecVT = V.getSimpleValueType();
32045 "Expected value to have byte element type.");
32046 assert(EltVT != MVT::i8 &&
32047 "Horizontal byte sum only makes sense for wider elements!");
32053 if (EltVT == MVT::i64) {
32060 if (EltVT == MVT::i32) {
32089 assert(EltVT == MVT::i16 &&
"Unknown how to handle type");
32105 MVT VT =
Op.getSimpleValueType();
32109 assert(EltVT == MVT::i8 &&
"Only vXi8 vector CTPOP lowering supported.");
32121 const int LUT[16] = { 0, 1, 1, 2,
32127 for (
int i = 0; i < NumElts; ++i)
32152 MVT VT =
Op.getSimpleValueType();
32154 "Unknown CTPOP type to handle");
32158 if (Subtarget.hasVPOPCNTDQ()) {
32195 MVT VT =
N.getSimpleValueType();
32210 unsigned ShiftedActiveBits = Known.
getBitWidth() - (LZ + TZ);
32213 if (ShiftedActiveBits <= 2) {
32214 if (ActiveBits > 2)
32225 if (ShiftedActiveBits <= 3) {
32226 if (ActiveBits > 3)
32240 if (ShiftedActiveBits <= 4 &&
32243 if (ActiveBits > 4)
32257 if (ShiftedActiveBits <= 8) {
32259 if (ActiveBits > 8)
32278 "We only do custom lowering for vector population count.");
32283 MVT VT =
Op.getSimpleValueType();
32305 "Only 128-bit vector bitreverse lowering supported.");
32312 for (
int i = 0; i != NumElts; ++i) {
32313 for (
int j = ScalarSizeInBytes - 1; j >= 0; --j) {
32314 int SourceByte = 16 + (i * ScalarSizeInBytes) + j;
32315 int PermuteByte = SourceByte | (2 << 5);
32329 MVT VT =
Op.getSimpleValueType();
32334 assert(Subtarget.
hasSSSE3() &&
"SSSE3 required for BITREVERSE");
32350 (VT == MVT::i32 || VT == MVT::i64 || VT == MVT::i16 || VT == MVT::i8) &&
32351 "Only tested for i8/i16/i32/i64");
32373 "Only byte vector BITREVERSE supported");
32378 if (Subtarget.hasGFNI()) {
32391 const int LoLUT[16] = {
32392 0x00, 0x80, 0x40, 0xC0,
32393 0x20, 0xA0, 0x60, 0xE0,
32394 0x10, 0x90, 0x50, 0xD0,
32395 0x30, 0xB0, 0x70, 0xF0};
32396 const int HiLUT[16] = {
32397 0x00, 0x08, 0x04, 0x0C,
32398 0x02, 0x0A, 0x06, 0x0E,
32399 0x01, 0x09, 0x05, 0x0D,
32400 0x03, 0x0B, 0x07, 0x0F};
32403 for (
unsigned i = 0; i < NumElts; ++i) {
32419 MVT VT =
Op.getSimpleValueType();
32422 if (VT == MVT::i8 ||
32434 if (Subtarget.hasPOPCNT())
32437 if (VT == MVT::i64) {
32446 if (VT != MVT::i16) {
32473 unsigned NewOpc = 0;
32474 switch (
N->getOpcode()) {
32498 {N->getOperand(0), N->getOperand(1), N->getOperand(2)},
32499 N->getSimpleValueType(0), MMO);
32509 unsigned Opc =
N->getOpcode();
32510 MVT VT =
N->getSimpleValueType(0);
32516 if (
N->hasAnyUseOfValue(0)) {
32527 "Used AtomicRMW ops other than Add should have been expanded!");
32550 assert(!
N->hasAnyUseOfValue(0));
32557 assert(!
N->hasAnyUseOfValue(0));
32565 assert(!
N->hasAnyUseOfValue(0));
32573 auto *
Node = cast<AtomicSDNode>(
Op.getNode());
32575 EVT VT =
Node->getMemoryVT();
32583 if (!IsSeqCst && IsTypeLegal)
32586 if (!IsTypeLegal && !Subtarget.useSoftFloat() &&
32588 Attribute::NoImplicitFloat)) {
32592 if (VT == MVT::i128 && Subtarget.is64Bit() && Subtarget.
hasAVX()) {
32595 Node->getMemOperand());
32600 if (VT == MVT::i64) {
32604 MVT StVT = Subtarget.
hasSSE2() ? MVT::v2i64 : MVT::v4f32;
32609 MVT::i64,
Node->getMemOperand());
32610 }
else if (Subtarget.hasX87()) {
32614 int SPFI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
32620 SDValue LdOps[] = {Chain, StackPtr};
32624 Chain =
Value.getValue(1);
32630 StoreOps, MVT::i64,
Node->getMemOperand());
32648 Node->getOperand(0),
Node->getOperand(2),
32649 Node->getOperand(1),
Node->getMemOperand());
32655 MVT VT =
N->getSimpleValueType(0);
32656 unsigned Opc =
Op.getOpcode();
32673 Op.getOperand(0),
Op.getOperand(1),
32679 if (
N->getValueType(1) == MVT::i1)
32702 Entry.IsSExt =
false;
32703 Entry.IsZExt =
false;
32704 Args.push_back(Entry);
32706 bool isF64 = ArgVT == MVT::f64;
32711 RTLIB::Libcall LC = isF64 ? RTLIB::SINCOS_STRET_F64 : RTLIB::SINCOS_STRET_F32;
32724 std::pair<SDValue, SDValue> CallResult = TLI.
LowerCallTo(CLI);
32728 return CallResult.first;
32744 bool FillWithZeroes =
false) {
32754 "input and widen element type must match");
32758 assert(WidenNumElts > InNumElts && WidenNumElts % InNumElts == 0 &&
32759 "Unexpected request for vector widening");
32777 Ops.
append(WidenNumElts - InNumElts, FillVal);
32789 "MGATHER/MSCATTER are supported on AVX-512 arch only");
32793 MVT VT = Src.getSimpleValueType();
32801 SDValue BasePtr =
N->getBasePtr();
32803 if (VT == MVT::v2f32 || VT == MVT::v2i32) {
32804 assert(Mask.getValueType() == MVT::v2i1 &&
"Unexpected mask type");
32806 if (Index.getValueType() == MVT::v2i64 && Subtarget.hasVLX()) {
32811 SDValue Ops[] = {Chain, Src, Mask, BasePtr, Index, Scale};
32813 N->getMemoryVT(),
N->getMemOperand());
32818 MVT IndexVT = Index.getSimpleValueType();
32822 if (IndexVT == MVT::v2i32)
32828 !Index.getSimpleValueType().is512BitVector()) {
32844 SDValue Ops[] = {Chain, Src, Mask, BasePtr, Index, Scale};
32846 N->getMemoryVT(),
N->getMemOperand());
32853 MVT VT =
Op.getSimpleValueType();
32856 MVT MaskVT = Mask.getSimpleValueType();
32857 SDValue PassThru =
N->getPassThru();
32867 VT, dl,
N->getChain(),
N->getBasePtr(),
N->getOffset(), Mask,
32869 N->getMemOperand(),
N->getAddressingMode(),
N->getExtensionType(),
32870 N->isExpandingLoad());
32877 "Expanding masked load is supported on AVX-512 target only!");
32880 "Expanding masked load is supported for 32 and 64-bit types only!");
32883 "Cannot lower masked load op.");
32886 (Subtarget.hasBWI() &&
32887 (ScalarVT == MVT::i8 || ScalarVT == MVT::i16))) &&
32888 "Unsupported masked load op.");
32897 assert(Mask.getSimpleValueType().getScalarType() == MVT::i1 &&
32898 "Unexpected mask type");
32904 WideDataVT, dl,
N->getChain(),
N->getBasePtr(),
N->getOffset(), Mask,
32905 PassThru,
N->getMemoryVT(),
N->getMemOperand(),
N->getAddressingMode(),
32906 N->getExtensionType(),
N->isExpandingLoad());
32918 SDValue DataToStore =
N->getValue();
32925 "Expanding masked load is supported on AVX-512 target only!");
32928 "Expanding masked load is supported for 32 and 64-bit types only!");
32931 "Cannot lower masked store op.");
32934 (Subtarget.hasBWI() &&
32935 (ScalarVT == MVT::i8 || ScalarVT == MVT::i16))) &&
32936 "Unsupported masked store op.");
32944 assert(Mask.getSimpleValueType().getScalarType() == MVT::i1 &&
32945 "Unexpected mask type");
32949 DataToStore =
ExtendToType(DataToStore, WideDataVT, DAG);
32951 return DAG.
getMaskedStore(
N->getChain(), dl, DataToStore,
N->getBasePtr(),
32952 N->getOffset(), Mask,
N->getMemoryVT(),
32953 N->getMemOperand(),
N->getAddressingMode(),
32954 N->isTruncatingStore(),
N->isCompressingStore());
32960 "MGATHER/MSCATTER are supported on AVX-512/AVX-2 arch only");
32964 MVT VT =
Op.getSimpleValueType();
32967 SDValue PassThru =
N->getPassThru();
32968 MVT IndexVT = Index.getSimpleValueType();
32973 if (IndexVT == MVT::v2i32)
33000 SDValue Ops[] = {
N->getChain(), PassThru, Mask,
N->getBasePtr(), Index,
33004 N->getMemOperand());
33013 MVT DstVT =
Op.getSimpleValueType();
33016 unsigned SrcAS =
N->getSrcAddressSpace();
33018 assert(SrcAS !=
N->getDestAddressSpace() &&
33019 "addrspacecast must be between different address spaces");
33023 }
else if (DstVT == MVT::i64) {
33025 }
else if (DstVT == MVT::i32) {
33043 if (
Op->getGluedNode())
33053 EVT VT =
Op.getValueType();
33066 unsigned IsData =
Op.getConstantOperandVal(4);
33070 if (!IsData && !Subtarget.hasPREFETCHI())
33071 return Op.getOperand(0);
33078 SDValue Operand =
N->getOperand(0);
33090 {Chain, Operand, One});
33096 const APInt Operand(32, OpNo);
33098 std::string Str(
" $");
33100 std::string OpNoStr1(Str + OpNoStr);
33101 std::string OpNoStr2(Str +
"{" + OpNoStr +
":");
33104 for (
auto &AsmStr : AsmStrs) {
33107 if (AsmStr.ends_with(OpNoStr1))
33108 I = AsmStr.size() - OpNoStr1.size();
33112 I = AsmStr.find(OpNoStr1 +
",");
33114 I = AsmStr.find(OpNoStr2);
33119 assert(
I > 0 &&
"Unexpected inline asm string!");
33127 auto TmpStr = AsmStr.substr(0,
I);
33128 I = TmpStr.rfind(
':');
33130 TmpStr = TmpStr.substr(
I + 1);
33131 return TmpStr.take_while(llvm::isAlpha);
33156 return SDValue(CmpZero.getNode(), 1);
33174 SDValue Ops[] = {Chain,
Ptr, ScalarPassThru, COND_NE, Flags};
33192 SDValue Ops[] = {Chain, ScalarVal,
Ptr, COND_NE, Flags};
33198 switch (
Op.getOpcode()) {
33288 return LowerFRAME_TO_ARGS_OFFSET(
Op, DAG);
33294 return lowerEH_SJLJ_SETUP_DISPATCH(
Op, DAG);
33365 unsigned Opc =
N->getOpcode();
33369 dbgs() <<
"ReplaceNodeResults: ";
33372 llvm_unreachable(
"Do not know how to custom type legalize this operation!");
33374 EVT VT =
N->getValueType(0);
33386 EVT VT =
N->getValueType(0);
33392 {
N->getOperand(0),
Lo});
33394 {
N->getOperand(0),
Hi});
33396 Lo.getValue(1),
Hi.getValue(1));
33406 assert(
N->getValueType(0) == MVT::i64 &&
"Unexpected VT!");
33412 if ((LZ + TZ) >= 32) {
33422 bool NoImplicitFloatOps =
33424 Attribute::NoImplicitFloat);
33425 if (
isTypeLegal(MVT::v2i64) && !NoImplicitFloatOps) {
33440 EVT VT =
N->getValueType(0);
33452 ConcatOps[0] = Res;
33459 EVT VT =
N->getValueType(0);
33461 VT == MVT::v2i32 &&
"Unexpected VT!");
33498 EVT VT =
N->getValueType(0);
33499 EVT InVT =
N->getOperand(0).getValueType();
33501 "Expected a VT that divides into 128 bits.");
33503 "Unexpected type action!");
33514 Ops[0] =
N->getOperand(0);
33516 Ops[0] =
N->getOperand(1);
33530 EVT VT =
N->getValueType(0);
33531 assert(VT == MVT::v2f32 &&
"Unexpected type (!= v2f32) on FMIN/FMAX.");
33535 N->getOperand(IsStrict ? 1 : 0), UNDEF);
33537 N->getOperand(IsStrict ? 2 : 1), UNDEF);
33540 Res = DAG.
getNode(Opc, dl, {MVT::v4f32, MVT::Other},
33541 {
N->getOperand(0),
LHS,
RHS});
33553 EVT VT =
N->getValueType(0);
33556 "Unexpected type action!");
33564 Ops0[0] =
N->getOperand(0);
33579 MVT VT =
N->getSimpleValueType(0);
33588 EVT InVT = In.getValueType();
33596 unsigned PackOpcode;
33600 dl, DAG, Subtarget)) {
33613 for (
unsigned I = 0;
I < MinElts; ++
I)
33614 TruncMask[
I] = Scale *
I;
33617 "Illegal vector type in truncation");
33630 if ((InBits == 256 && Subtarget.hasVLX()) || InBits == 512) {
33635 if (InVT == MVT::v4i64 && VT == MVT::v4i8 &&
isTypeLegal(MVT::v8i64)) {
33642 if (Subtarget.hasVLX() && InVT == MVT::v8i64 && VT == MVT::v8i8 &&
33653 { 0, 1, 2, 3, 16, 17, 18, 19,
33654 -1, -1, -1, -1, -1, -1, -1, -1 });
33661 if ((InEltVT == MVT::i16 || InEltVT == MVT::i32 || InEltVT == MVT::i64) &&
33662 (EltVT == MVT::i8 || EltVT == MVT::i16 || EltVT == MVT::i32) &&
33665 !(MinElts <= 4 && InEltVT == MVT::i64 && EltVT == MVT::i8)))) {
33677 assert(
N->getValueType(0) == MVT::v8i8 &&
33678 "Do not know how to legalize this Node");
33682 EVT VT =
N->getValueType(0);
33684 EVT InVT = In.getValueType();
33685 if (!Subtarget.
hasSSE41() && VT == MVT::v4i64 &&
33686 (InVT == MVT::v4i16 || InVT == MVT::v4i8)){
33688 "Unexpected type action!");
33714 if (VT == MVT::v16i32 || VT == MVT::v8i64) {
33726 In = DAG.
getNode(Opc, dl, InVT, In);
33739 unsigned HalfNumElts = NumElts / 2;
33741 for (
unsigned i = 0; i != HalfNumElts; ++i)
33742 ShufMask[i] = i + HalfNumElts;
33754 if (!Subtarget.hasAVX10_2())
33758 EVT VT =
N->getValueType(0);
33760 EVT OpVT =
Op.getValueType();
33763 if (VT == MVT::v2i32 && OpVT == MVT::v2f64) {
33776 bool IsStrict =
N->isStrictFPOpcode();
33778 EVT VT =
N->getValueType(0);
33779 SDValue Src =
N->getOperand(IsStrict ? 1 : 0);
33781 EVT SrcVT = Src.getValueType();
33788 DAG.
getNode(Opc, dl, {VT, MVT::Other},
33790 {NVT, MVT::Other}, {Chain, Src})});
33803 if (VT.
isVector() && Subtarget.hasFP16() &&
33806 EVT ResVT = EleVT == MVT::i32 ? MVT::v4i32 : MVT::v8i16;
33808 if (SrcVT != MVT::v8f16) {
33819 DAG.
getNode(Opc, dl, {ResVT, MVT::Other}, {
N->getOperand(0), Src});
33823 Res = DAG.
getNode(Opc, dl, ResVT, Src);
33835 ConcatOps[0] = Res;
33848 "Unexpected type action!");
33858 {
N->getOperand(0), Src});
33865 if (PromoteVT == MVT::v2i32)
33873 if (PromoteVT == MVT::v2i32)
33885 ConcatOps[0] = Res;
33894 if (VT == MVT::v2i32) {
33896 "Strict unsigned conversion requires AVX512");
33899 "Unexpected type action!");
33900 if (Src.getValueType() == MVT::v2f64) {
33901 if (!IsSigned && !Subtarget.
hasAVX512()) {
33914 if (!IsSigned && !Subtarget.hasVLX()) {
33924 Opc =
N->getOpcode();
33929 Res = DAG.
getNode(Opc, dl, {MVT::v4i32, MVT::Other},
33930 {
N->getOperand(0), Src});
33933 Res = DAG.
getNode(Opc, dl, MVT::v4i32, Src);
33943 if (Src.getValueType() == MVT::v2f32 && IsStrict) {
33947 {
N->getOperand(0), Src});
33958 assert(!VT.
isVector() &&
"Vectors should have been handled above!");
33960 if ((Subtarget.hasDQI() && VT == MVT::i64 &&
33961 (SrcVT == MVT::f32 || SrcVT == MVT::f64)) ||
33962 (Subtarget.hasFP16() && SrcVT == MVT::f16)) {
33963 assert(!Subtarget.is64Bit() &&
"i64 should be legal");
33964 unsigned NumElts = Subtarget.hasVLX() ? 2 : 8;
33970 if (NumElts != SrcElts) {
34004 if (
SDValue V = FP_TO_INTHelper(
SDValue(
N, 0), DAG, IsSigned, Chain)) {
34013 if (
SDValue V = LRINT_LLRINTHelper(
N, DAG))
34022 bool IsStrict =
N->isStrictFPOpcode();
34024 EVT VT =
N->getValueType(0);
34025 SDValue Src =
N->getOperand(IsStrict ? 1 : 0);
34027 Subtarget.hasVLX()) {
34028 if (Src.getValueType().getVectorElementType() == MVT::i16)
34031 if (VT == MVT::v2f16 && Src.getValueType() == MVT::v2i32)
34039 {
N->getOperand(0), Src});
34048 if (VT != MVT::v2f32)
34050 EVT SrcVT = Src.getValueType();
34051 if (Subtarget.hasDQI() && Subtarget.hasVLX() && SrcVT == MVT::v2i64) {
34056 {
N->getOperand(0), Src});
34065 if (SrcVT == MVT::v2i64 && !IsSigned && Subtarget.is64Bit() &&
34075 for (
int i = 0; i != 2; ++i) {
34081 {
N->getOperand(0), Elt});
34089 SignCvts[0].getValue(1), SignCvts[1].getValue(1));
34091 {Chain, SignCvt, SignCvt});
34106 if (SrcVT != MVT::v2i32)
34109 if (IsSigned || Subtarget.
hasAVX512()) {
34118 {
N->getOperand(0), Src});
34127 llvm::bit_cast<double>(0x4330000000000000ULL), dl, MVT::v2f64);
34133 {
N->getOperand(0),
Or, VBias});
34135 {MVT::v4f32, MVT::Other},
34148 bool IsStrict =
N->isStrictFPOpcode();
34150 SDValue Src =
N->getOperand(IsStrict ? 1 : 0);
34151 SDValue Rnd =
N->getOperand(IsStrict ? 2 : 1);
34152 EVT SrcVT = Src.getValueType();
34153 EVT VT =
N->getValueType(0);
34155 if (VT == MVT::v2f16 && Src.getValueType() == MVT::v2f32) {
34161 assert(Subtarget.hasF16C() &&
"Cannot widen f16 without F16C");
34167 {Chain, Src, Rnd});
34173 Results.push_back(V.getValue(1));
34186 Results.push_back(V.getValue(1));
34193 assert(
N->getValueType(0) == MVT::v2f32 &&
34194 "Do not know how to legalize this Node");
34195 if (!Subtarget.hasFP16() || !Subtarget.hasVLX())
34197 bool IsStrict =
N->isStrictFPOpcode();
34198 SDValue Src =
N->getOperand(IsStrict ? 1 : 0);
34199 if (Src.getValueType().getVectorElementType() != MVT::f16)
34206 {
N->getOperand(0), V});
34211 Results.push_back(V.getValue(1));
34215 unsigned IntNo =
N->getConstantOperandVal(1);
34218 "legalize this intrinsic operation!");
34219 case Intrinsic::x86_rdtsc:
34222 case Intrinsic::x86_rdtscp:
34225 case Intrinsic::x86_rdpmc:
34229 case Intrinsic::x86_rdpru:
34233 case Intrinsic::x86_xgetbv:
34243 EVT T =
N->getValueType(0);
34244 assert((
T == MVT::i64 ||
T == MVT::i128) &&
"can only expand cmpxchg pair");
34245 bool Regs64bit =
T == MVT::i128;
34247 "64-bit ATOMIC_CMP_SWAP_WITH_SUCCESS requires CMPXCHG16B");
34248 MVT HalfT = Regs64bit ? MVT::i64 : MVT::i32;
34250 std::tie(cpInL, cpInH) =
34253 Regs64bit ? X86::RAX : X86::EAX, cpInL,
SDValue());
34258 std::tie(swapInL, swapInH) =
34288 Regs64bit ? X86::RAX : X86::EAX,
34289 HalfT, Result.getValue(1));
34291 Regs64bit ? X86::RDX : X86::EDX,
34307 (
N->getValueType(0) == MVT::i64 ||
N->getValueType(0) == MVT::i128) &&
34309 bool NoImplicitFloatOps =
34311 Attribute::NoImplicitFloat);
34312 if (!Subtarget.useSoftFloat() && !NoImplicitFloatOps) {
34313 auto *Node = cast<AtomicSDNode>(
N);
34315 if (
N->getValueType(0) == MVT::i128) {
34316 if (Subtarget.is64Bit() && Subtarget.
hasAVX()) {
34318 Node->getBasePtr(), Node->getMemOperand());
34333 MVT LdVT = Subtarget.
hasSSE2() ? MVT::v2i64 : MVT::v4f32;
34335 SDValue Ops[] = { Node->getChain(), Node->getBasePtr() };
34337 MVT::i64, Node->getMemOperand());
34355 if (Subtarget.hasX87()) {
34359 SDValue Ops[] = { Node->getChain(), Node->getBasePtr() };
34361 dl, Tys, Ops, MVT::i64,
34362 Node->getMemOperand());
34363 SDValue Chain = Result.getValue(1);
34370 int SPFI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
34373 SDValue StoreOps[] = { Chain, Result, StackPtr };
34381 Result = DAG.
getLoad(MVT::i64, dl, Chain, StackPtr, MPI);
34383 Results.push_back(Result.getValue(1));
34409 EVT DstVT =
N->getValueType(0);
34410 EVT SrcVT =
N->getOperand(0).getValueType();
34414 if (SrcVT == MVT::v64i1 && DstVT == MVT::i64 && Subtarget.hasBWI()) {
34415 assert(!Subtarget.is64Bit() &&
"Expected 32-bit mode");
34425 if (DstVT.
isVector() && SrcVT == MVT::x86mmx) {
34429 "Unexpected type action!");
34441 EVT VT =
N->getValueType(0);
34442 if ((VT == MVT::v2f32 || VT == MVT::v2i32) &&
34443 (Subtarget.hasVLX() || !Subtarget.
hasAVX512())) {
34444 auto *Gather = cast<MaskedGatherSDNode>(
N);
34445 SDValue Index = Gather->getIndex();
34446 if (Index.getValueType() != MVT::v2i64)
34449 "Unexpected type action!");
34451 SDValue Mask = Gather->getMask();
34452 assert(Mask.getValueType() == MVT::v2i1 &&
"Unexpected mask type");
34454 Gather->getPassThru(),
34456 if (!Subtarget.hasVLX()) {
34463 SDValue Ops[] = { Gather->getChain(), PassThru, Mask,
34464 Gather->getBasePtr(), Index, Gather->getScale() };
34467 Gather->getMemoryVT(), Gather->getMemOperand());
34478 MVT VT =
N->getSimpleValueType(0);
34481 "Unexpected type action!");
34484 auto *Ld = cast<LoadSDNode>(
N);
34486 MVT LdVT = Subtarget.is64Bit() && VT.
isInteger() ? MVT::i64 : MVT::f64;
34487 SDValue Res = DAG.
getLoad(LdVT, dl, Ld->getChain(), Ld->getBasePtr(),
34488 Ld->getPointerInfo(), Ld->getOriginalAlign(),
34501 SDValue Ops[] = {Ld->getChain(), Ld->getBasePtr()};
34503 MVT::i64, Ld->getMemOperand());
34514 assert(
N->getValueType(0) == MVT::i64 &&
"Unexpected VT!");
34515 assert((Subtarget.hasXOP() || Subtarget.hasGFNI()) &&
"Expected XOP/GFNI");
34523 assert(
N->getSimpleValueType(0) == MVT::f16 &&
34524 "Unexpected Value type of EXTRACT_VECTOR_ELT!");
34525 assert(Subtarget.hasFP16() &&
"Expected FP16");
34541#define NODE_NAME_CASE(NODE) case X86ISD::NODE: return "X86ISD::" #NODE;
35005#undef NODE_NAME_CASE
35039 switch (AM.
Scale) {
35104 return NumBits1 > NumBits2;
35122 return isInt<32>(Imm);
35127 return isInt<32>(Imm);
35131 return isInt<32>(Imm);
35139 return NumBits1 > NumBits2;
35149 return VT1 == MVT::i32 && VT2 == MVT::i64 && Subtarget.is64Bit();
35177 if (!Subtarget.is64Bit())
35183 if (isa<MaskedLoadSDNode>(ExtVal.
getOperand(0)))
35197 if (Subtarget.useSoftFloat())
35210 return Subtarget.hasFP16();
35222 EVT DestVT)
const {
35224 return !(SrcVT == MVT::i32 && DestVT == MVT::i16);
35267 if (VT == MVT::v32i8 || VT == MVT::v16i16)
35284 EVT ConditionVT)
const {
35303 if (mi.readsRegister(X86::EFLAGS,
nullptr))
35306 if (mi.definesRegister(X86::EFLAGS,
nullptr))
35313 if (Succ->isLiveIn(X86::EFLAGS))
35363 Register DstReg =
MI.getOperand(0).getReg();
35365 Register mainDstReg =
MRI.createVirtualRegister(RC);
35366 Register fallDstReg =
MRI.createVirtualRegister(RC);
35386 BuildMI(fallMBB, MIMD,
TII->get(X86::XABORT_DEF));
35387 BuildMI(fallMBB, MIMD,
TII->get(TargetOpcode::COPY), fallDstReg)
35393 BuildMI(*sinkMBB, sinkMBB->
begin(), MIMD,
TII->get(X86::PHI), DstReg)
35397 MI.eraseFromParent();
35414 assert(
MI.getNumOperands() == 10 &&
"VAARG should have 10 operands!");
35417 Register DestReg =
MI.getOperand(0).getReg();
35423 unsigned ArgSize =
MI.getOperand(6).getImm();
35424 unsigned ArgMode =
MI.getOperand(7).getImm();
35430 assert(
MI.hasOneMemOperand() &&
"Expected VAARG to have one memoperand");
35457 unsigned TotalNumIntRegs = 6;
35458 unsigned TotalNumXMMRegs = 8;
35459 bool UseGPOffset = (ArgMode == 1);
35460 bool UseFPOffset = (ArgMode == 2);
35461 unsigned MaxOffset = TotalNumIntRegs * 8 +
35462 (UseFPOffset ? TotalNumXMMRegs * 16 : 0);
35465 unsigned ArgSizeA8 = (ArgSize + 7) & ~7;
35466 bool NeedsAlign = (Alignment > 8);
35473 unsigned OffsetDestReg = 0;
35474 unsigned OverflowDestReg = 0;
35475 unsigned OffsetReg = 0;
35477 if (!UseGPOffset && !UseFPOffset) {
35481 OverflowDestReg = DestReg;
35483 offsetMBB =
nullptr;
35484 overflowMBB = thisMBB;
35500 OffsetDestReg =
MRI.createVirtualRegister(AddrRegClass);
35501 OverflowDestReg =
MRI.createVirtualRegister(AddrRegClass);
35511 MF->
insert(MBBIter, offsetMBB);
35512 MF->
insert(MBBIter, overflowMBB);
35513 MF->
insert(MBBIter, endMBB);
35529 OffsetReg =
MRI.createVirtualRegister(OffsetRegClass);
35530 BuildMI(thisMBB, MIMD,
TII->get(X86::MOV32rm), OffsetReg)
35534 .
addDisp(Disp, UseFPOffset ? 4 : 0)
35539 BuildMI(thisMBB, MIMD,
TII->get(X86::CMP32ri))
35541 .
addImm(MaxOffset + 8 - ArgSizeA8);
35545 BuildMI(thisMBB, MIMD,
TII->get(X86::JCC_1))
35554 Register RegSaveReg =
MRI.createVirtualRegister(AddrRegClass);
35568 Register OffsetReg64 =
MRI.createVirtualRegister(AddrRegClass);
35569 BuildMI(offsetMBB, MIMD,
TII->get(X86::SUBREG_TO_REG), OffsetReg64)
35572 .
addImm(X86::sub_32bit);
35575 BuildMI(offsetMBB, MIMD,
TII->get(X86::ADD64rr), OffsetDestReg)
35580 BuildMI(offsetMBB, MIMD,
TII->get(X86::ADD32rr), OffsetDestReg)
35586 Register NextOffsetReg =
MRI.createVirtualRegister(OffsetRegClass);
35587 BuildMI(offsetMBB, MIMD,
TII->get(X86::ADD32ri), NextOffsetReg)
35589 .
addImm(UseFPOffset ? 16 : 8);
35592 BuildMI(offsetMBB, MIMD,
TII->get(X86::MOV32mr))
35596 .
addDisp(Disp, UseFPOffset ? 4 : 0)
35602 BuildMI(offsetMBB, MIMD,
TII->get(X86::JMP_1))
35611 Register OverflowAddrReg =
MRI.createVirtualRegister(AddrRegClass);
35626 Register TmpReg =
MRI.createVirtualRegister(AddrRegClass);
35633 .
addReg(OverflowAddrReg)
35643 BuildMI(overflowMBB, MIMD,
TII->get(TargetOpcode::COPY), OverflowDestReg)
35644 .
addReg(OverflowAddrReg);
35649 Register NextAddrReg =
MRI.createVirtualRegister(AddrRegClass);
35654 .
addReg(OverflowDestReg)
35671 TII->get(X86::PHI), DestReg)
35677 MI.eraseFromParent();
35695 SelectItr->addRegisterKilled(X86::EFLAGS,
TRI);
35703 switch (
MI.getOpcode()) {
35704 case X86::CMOV_FR16:
35705 case X86::CMOV_FR16X:
35706 case X86::CMOV_FR32:
35707 case X86::CMOV_FR32X:
35708 case X86::CMOV_FR64:
35709 case X86::CMOV_FR64X:
35710 case X86::CMOV_GR8:
35711 case X86::CMOV_GR16:
35712 case X86::CMOV_GR32:
35713 case X86::CMOV_RFP32:
35714 case X86::CMOV_RFP64:
35715 case X86::CMOV_RFP80:
35716 case X86::CMOV_VR64:
35717 case X86::CMOV_VR128:
35718 case X86::CMOV_VR128X:
35719 case X86::CMOV_VR256:
35720 case X86::CMOV_VR256X:
35721 case X86::CMOV_VR512:
35722 case X86::CMOV_VK1:
35723 case X86::CMOV_VK2:
35724 case X86::CMOV_VK4:
35725 case X86::CMOV_VK8:
35726 case X86::CMOV_VK16:
35727 case X86::CMOV_VK32:
35728 case X86::CMOV_VK64:
35764 Register DestReg = MIIt->getOperand(0).getReg();
35765 Register Op1Reg = MIIt->getOperand(1).getReg();
35766 Register Op2Reg = MIIt->getOperand(2).getReg();
35771 if (MIIt->getOperand(3).getImm() == OppCC)
35774 if (
auto It = RegRewriteTable.
find(Op1Reg); It != RegRewriteTable.
end())
35775 Op1Reg = It->second.first;
35777 if (
auto It = RegRewriteTable.
find(Op2Reg); It != RegRewriteTable.
end())
35778 Op2Reg = It->second.second;
35781 BuildMI(*SinkMBB, SinkInsertionPoint, MIMD,
TII->get(X86::PHI), DestReg)
35788 RegRewriteTable[DestReg] = std::make_pair(Op1Reg, Op2Reg);
35796X86TargetLowering::EmitLoweredCascadedSelect(
MachineInstr &FirstCMOV,
35880 F->insert(It, FirstInsertedMBB);
35881 F->insert(It, SecondInsertedMBB);
35882 F->insert(It, SinkMBB);
35887 FirstInsertedMBB->
addLiveIn(X86::EFLAGS);
35892 if (!SecondCascadedCMOV.
killsRegister(X86::EFLAGS,
nullptr) &&
35894 SecondInsertedMBB->
addLiveIn(X86::EFLAGS);
35921 BuildMI(FirstInsertedMBB, MIMD,
TII->get(X86::JCC_1))
35931 BuildMI(*SinkMBB, SinkMBB->
begin(), MIMD,
TII->get(X86::PHI), DestReg)
35933 .
addMBB(SecondInsertedMBB)
36014 (NextMIIt->getOperand(3).getImm() ==
CC ||
36015 NextMIIt->getOperand(3).getImm() == OppCC)) {
36016 LastCMOV = &*NextMIIt;
36023 if (LastCMOV == &
MI && NextMIIt != ThisMBB->
end() &&
36024 NextMIIt->getOpcode() ==
MI.getOpcode() &&
36025 NextMIIt->getOperand(2).getReg() ==
MI.getOperand(2).getReg() &&
36026 NextMIIt->getOperand(1).getReg() ==
MI.getOperand(0).getReg() &&
36027 NextMIIt->getOperand(1).isKill()) {
36028 return EmitLoweredCascadedSelect(
MI, *NextMIIt, ThisMBB);
36037 F->insert(It, FalseMBB);
36038 F->insert(It, SinkMBB);
36041 unsigned CallFrameSize =
TII->getCallFrameSizeAt(
MI);
36058 if (
MI.isDebugInstr())
36062 SinkMBB->
splice(SinkMBB->
end(), ThisMBB,
36086 ThisMBB->
erase(MIItBegin, MIItEnd);
36093 return X86::SUB64ri32;
36095 return X86::SUB32ri;
36115 MF->
insert(MBBIter, testMBB);
36116 MF->
insert(MBBIter, blockMBB);
36117 MF->
insert(MBBIter, tailMBB);
36119 Register sizeVReg =
MI.getOperand(1).getReg();
36123 Register TmpStackPtr =
MRI.createVirtualRegister(
36125 Register FinalStackPtr =
MRI.createVirtualRegister(
36144 BuildMI(testMBB, MIMD,
TII->get(X86::JCC_1))
36162 const unsigned XORMIOpc =
36176 BuildMI(tailMBB, MIMD,
TII->get(TargetOpcode::COPY),
36177 MI.getOperand(0).getReg())
36186 MI.eraseFromParent();
36202 const bool Is64Bit = Subtarget.is64Bit();
36205 const unsigned TlsReg = Is64Bit ? X86::FS : X86::GS;
36206 const unsigned TlsOffset = IsLP64 ? 0x70 : Is64Bit ? 0x40 : 0x30;
36232 Register mallocPtrVReg =
MRI.createVirtualRegister(AddrRegClass),
36233 bumpSPPtrVReg =
MRI.createVirtualRegister(AddrRegClass),
36234 tmpSPVReg =
MRI.createVirtualRegister(AddrRegClass),
36235 SPLimitVReg =
MRI.createVirtualRegister(AddrRegClass),
36236 sizeVReg =
MI.getOperand(1).getReg(),
36242 MF->
insert(MBBIter, bumpMBB);
36243 MF->
insert(MBBIter, mallocMBB);
36244 MF->
insert(MBBIter, continueMBB);
36252 BuildMI(BB, MIMD,
TII->get(TargetOpcode::COPY), tmpSPVReg).
addReg(physSPReg);
36253 BuildMI(BB, MIMD,
TII->get(IsLP64 ? X86::SUB64rr:X86::SUB32rr), SPLimitVReg)
36255 BuildMI(BB, MIMD,
TII->get(IsLP64 ? X86::CMP64mr:X86::CMP32mr))
36262 BuildMI(bumpMBB, MIMD,
TII->get(TargetOpcode::COPY), physSPReg)
36264 BuildMI(bumpMBB, MIMD,
TII->get(TargetOpcode::COPY), bumpSPPtrVReg)
36272 BuildMI(mallocMBB, MIMD,
TII->get(X86::MOV64rr), X86::RDI)
36274 BuildMI(mallocMBB, MIMD,
TII->get(X86::CALL64pcrel32))
36279 }
else if (Is64Bit) {
36280 BuildMI(mallocMBB, MIMD,
TII->get(X86::MOV32rr), X86::EDI)
36282 BuildMI(mallocMBB, MIMD,
TII->get(X86::CALL64pcrel32))
36288 BuildMI(mallocMBB, MIMD,
TII->get(X86::SUB32ri), physSPReg).
addReg(physSPReg)
36291 BuildMI(mallocMBB, MIMD,
TII->get(X86::CALLpcrel32))
36298 BuildMI(mallocMBB, MIMD,
TII->get(X86::ADD32ri), physSPReg).
addReg(physSPReg)
36301 BuildMI(mallocMBB, MIMD,
TII->get(TargetOpcode::COPY), mallocPtrVReg)
36302 .
addReg(IsLP64 ? X86::RAX : X86::EAX);
36312 BuildMI(*continueMBB, continueMBB->
begin(), MIMD,
TII->get(X86::PHI),
36313 MI.getOperand(0).getReg())
36320 MI.eraseFromParent();
36323 return continueMBB;
36336 "SEH does not use catchret!");
36339 if (!Subtarget.is32Bit())
36350 MI.getOperand(0).setMBB(RestoreMBB);
36356 auto RestoreMBBI = RestoreMBB->
begin();
36357 BuildMI(*RestoreMBB, RestoreMBBI, MIMD,
TII.get(X86::JMP_4)).
addMBB(TargetMBB);
36373 assert(
MI.getOperand(3).isGlobal() &&
"This should be a global");
36379 Subtarget.is64Bit() ?
36382 if (Subtarget.is64Bit()) {
36384 BuildMI(*BB,
MI, MIMD,
TII->get(X86::MOV64rm), X86::RDI)
36389 MI.getOperand(3).getTargetFlags())
36396 BuildMI(*BB,
MI, MIMD,
TII->get(X86::MOV32rm), X86::EAX)
36401 MI.getOperand(3).getTargetFlags())
36408 BuildMI(*BB,
MI, MIMD,
TII->get(X86::MOV32rm), X86::EAX)
36413 MI.getOperand(3).getTargetFlags())
36420 MI.eraseFromParent();
36426 case X86::INDIRECT_THUNK_CALL32:
36427 return X86::CALLpcrel32;
36428 case X86::INDIRECT_THUNK_CALL64:
36429 return X86::CALL64pcrel32;
36430 case X86::INDIRECT_THUNK_TCRETURN32:
36431 return X86::TCRETURNdi;
36432 case X86::INDIRECT_THUNK_TCRETURN64:
36433 return X86::TCRETURNdi64;
36440 if (Subtarget.useRetpolineExternalThunk()) {
36456 assert(!Subtarget.is64Bit() &&
"Should not be using a 32-bit thunk!");
36457 return "__x86_indirect_thunk_eax";
36459 assert(!Subtarget.is64Bit() &&
"Should not be using a 32-bit thunk!");
36460 return "__x86_indirect_thunk_ecx";
36462 assert(!Subtarget.is64Bit() &&
"Should not be using a 32-bit thunk!");
36463 return "__x86_indirect_thunk_edx";
36465 assert(!Subtarget.is64Bit() &&
"Should not be using a 32-bit thunk!");
36466 return "__x86_indirect_thunk_edi";
36468 assert(Subtarget.is64Bit() &&
"Should not be using a 64-bit thunk!");
36469 return "__x86_indirect_thunk_r11";
36474 if (Subtarget.useRetpolineIndirectCalls() ||
36475 Subtarget.useRetpolineIndirectBranches()) {
36479 assert(!Subtarget.is64Bit() &&
"Should not be using a 32-bit thunk!");
36480 return "__llvm_retpoline_eax";
36482 assert(!Subtarget.is64Bit() &&
"Should not be using a 32-bit thunk!");
36483 return "__llvm_retpoline_ecx";
36485 assert(!Subtarget.is64Bit() &&
"Should not be using a 32-bit thunk!");
36486 return "__llvm_retpoline_edx";
36488 assert(!Subtarget.is64Bit() &&
"Should not be using a 32-bit thunk!");
36489 return "__llvm_retpoline_edi";
36491 assert(Subtarget.is64Bit() &&
"Should not be using a 64-bit thunk!");
36492 return "__llvm_retpoline_r11";
36497 if (Subtarget.useLVIControlFlowIntegrity()) {
36498 assert(Subtarget.is64Bit() &&
"Should not be using a 64-bit thunk!");
36499 return "__llvm_lvi_thunk_r11";
36501 llvm_unreachable(
"getIndirectThunkSymbol() invoked without thunk feature");
36511 Register CalleeVReg =
MI.getOperand(0).getReg();
36521 if (Subtarget.is64Bit())
36524 AvailableRegs.
append({X86::EAX, X86::ECX, X86::EDX, X86::EDI});
36527 for (
const auto &MO :
MI.operands()) {
36528 if (MO.isReg() && MO.isUse())
36529 llvm::replace(AvailableRegs,
static_cast<unsigned>(MO.getReg()), 0U);
36533 unsigned AvailableReg = 0;
36534 for (
unsigned MaybeReg : AvailableRegs) {
36536 AvailableReg = MaybeReg;
36542 "available registers");
36546 BuildMI(*BB,
MI, MIMD,
TII->get(TargetOpcode::COPY), AvailableReg)
36548 MI.getOperand(0).ChangeToES(Symbol);
36549 MI.setDesc(
TII->get(Opc));
36567void X86TargetLowering::emitSetJmpShadowStackFix(
MachineInstr &
MI,
36581 Register ZReg =
MRI.createVirtualRegister(PtrRC);
36582 unsigned XorRROpc = (PVT == MVT::i64) ? X86::XOR64rr : X86::XOR32rr;
36589 Register SSPCopyReg =
MRI.createVirtualRegister(PtrRC);
36590 unsigned RdsspOpc = (PVT == MVT::i64) ? X86::RDSSPQ : X86::RDSSPD;
36594 unsigned PtrStoreOpc = (PVT == MVT::i64) ? X86::MOV64mr : X86::MOV32mr;
36597 const unsigned MemOpndSlot = 1;
36600 MIB.
addDisp(
MI.getOperand(MemOpndSlot + i), SSPOffset);
36602 MIB.
add(
MI.getOperand(MemOpndSlot + i));
36624 unsigned MemOpndSlot = 0;
36626 unsigned CurOp = 0;
36628 DstReg =
MI.getOperand(CurOp++).getReg();
36630 assert(
TRI->isTypeLegalForClass(*RC, MVT::i32) &&
"Invalid destination!");
36632 Register mainDstReg =
MRI.createVirtualRegister(RC);
36633 Register restoreDstReg =
MRI.createVirtualRegister(RC);
36635 MemOpndSlot = CurOp;
36638 assert((PVT == MVT::i64 || PVT == MVT::i32) &&
36639 "Invalid Pointer Size!");
36674 unsigned PtrStoreOpc = 0;
36675 unsigned LabelReg = 0;
36681 if (!UseImmLabel) {
36682 PtrStoreOpc = (PVT == MVT::i64) ? X86::MOV64mr : X86::MOV32mr;
36684 LabelReg =
MRI.createVirtualRegister(PtrRC);
36685 if (Subtarget.is64Bit()) {
36686 MIB =
BuildMI(*thisMBB,
MI, MIMD,
TII->get(X86::LEA64r), LabelReg)
36694 MIB =
BuildMI(*thisMBB,
MI, MIMD,
TII->get(X86::LEA32r), LabelReg)
36702 PtrStoreOpc = (PVT == MVT::i64) ? X86::MOV64mi32 : X86::MOV32mi;
36704 MIB =
BuildMI(*thisMBB,
MI, MIMD,
TII->get(PtrStoreOpc));
36707 MIB.
addDisp(
MI.getOperand(MemOpndSlot + i), LabelOffset);
36709 MIB.
add(
MI.getOperand(MemOpndSlot + i));
36718 emitSetJmpShadowStackFix(
MI, thisMBB);
36722 MIB =
BuildMI(*thisMBB,
MI, MIMD,
TII->get(X86::EH_SjLj_Setup))
36732 BuildMI(mainMBB, MIMD,
TII->get(X86::MOV32r0), mainDstReg);
36736 BuildMI(*sinkMBB, sinkMBB->
begin(), MIMD,
TII->get(X86::PHI), DstReg)
36743 if (
RegInfo->hasBasePointer(*MF)) {
36744 const bool Uses64BitFramePtr =
36750 unsigned Opm = Uses64BitFramePtr ? X86::MOV64rm : X86::MOV32rm;
36755 BuildMI(restoreMBB, MIMD,
TII->get(X86::MOV32ri), restoreDstReg).
addImm(1);
36759 MI.eraseFromParent();
36818 MF->
insert(
I, fixShadowLoopPrepareMBB);
36819 MF->
insert(
I, fixShadowLoopMBB);
36830 Register ZReg =
MRI.createVirtualRegister(&X86::GR32RegClass);
36831 BuildMI(checkSspMBB, MIMD,
TII->get(X86::MOV32r0), ZReg);
36833 if (PVT == MVT::i64) {
36834 Register TmpZReg =
MRI.createVirtualRegister(PtrRC);
36835 BuildMI(checkSspMBB, MIMD,
TII->get(X86::SUBREG_TO_REG), TmpZReg)
36838 .
addImm(X86::sub_32bit);
36843 Register SSPCopyReg =
MRI.createVirtualRegister(PtrRC);
36844 unsigned RdsspOpc = (PVT == MVT::i64) ? X86::RDSSPQ : X86::RDSSPD;
36849 unsigned TestRROpc = (PVT == MVT::i64) ? X86::TEST64rr : X86::TEST32rr;
36850 BuildMI(checkSspMBB, MIMD,
TII->get(TestRROpc))
36853 BuildMI(checkSspMBB, MIMD,
TII->get(X86::JCC_1))
36860 Register PrevSSPReg =
MRI.createVirtualRegister(PtrRC);
36861 unsigned PtrLoadOpc = (PVT == MVT::i64) ? X86::MOV64rm : X86::MOV32rm;
36864 BuildMI(fallMBB, MIMD,
TII->get(PtrLoadOpc), PrevSSPReg);
36869 else if (MO.
isReg())
36878 Register SspSubReg =
MRI.createVirtualRegister(PtrRC);
36879 unsigned SubRROpc = (PVT == MVT::i64) ? X86::SUB64rr : X86::SUB32rr;
36880 BuildMI(fallMBB, MIMD,
TII->get(SubRROpc), SspSubReg)
36885 BuildMI(fallMBB, MIMD,
TII->get(X86::JCC_1))
36892 unsigned ShrRIOpc = (PVT == MVT::i64) ? X86::SHR64ri : X86::SHR32ri;
36893 unsigned Offset = (PVT == MVT::i64) ? 3 : 2;
36894 Register SspFirstShrReg =
MRI.createVirtualRegister(PtrRC);
36895 BuildMI(fixShadowMBB, MIMD,
TII->get(ShrRIOpc), SspFirstShrReg)
36900 unsigned IncsspOpc = (PVT == MVT::i64) ? X86::INCSSPQ : X86::INCSSPD;
36904 Register SspSecondShrReg =
MRI.createVirtualRegister(PtrRC);
36905 BuildMI(fixShadowMBB, MIMD,
TII->get(ShrRIOpc), SspSecondShrReg)
36910 BuildMI(fixShadowMBB, MIMD,
TII->get(X86::JCC_1))
36917 unsigned ShlR1Opc = (PVT == MVT::i64) ? X86::SHL64ri : X86::SHL32ri;
36918 Register SspAfterShlReg =
MRI.createVirtualRegister(PtrRC);
36919 BuildMI(fixShadowLoopPrepareMBB, MIMD,
TII->get(ShlR1Opc), SspAfterShlReg)
36920 .
addReg(SspSecondShrReg)
36924 Register Value128InReg =
MRI.createVirtualRegister(PtrRC);
36925 unsigned MovRIOpc = (PVT == MVT::i64) ? X86::MOV64ri32 : X86::MOV32ri;
36926 BuildMI(fixShadowLoopPrepareMBB, MIMD,
TII->get(MovRIOpc), Value128InReg)
36928 fixShadowLoopPrepareMBB->
addSuccessor(fixShadowLoopMBB);
36932 Register DecReg =
MRI.createVirtualRegister(PtrRC);
36933 Register CounterReg =
MRI.createVirtualRegister(PtrRC);
36934 BuildMI(fixShadowLoopMBB, MIMD,
TII->get(X86::PHI), CounterReg)
36936 .
addMBB(fixShadowLoopPrepareMBB)
36938 .
addMBB(fixShadowLoopMBB);
36941 BuildMI(fixShadowLoopMBB, MIMD,
TII->get(IncsspOpc)).
addReg(Value128InReg);
36944 unsigned DecROpc = (PVT == MVT::i64) ? X86::DEC64r : X86::DEC32r;
36945 BuildMI(fixShadowLoopMBB, MIMD,
TII->get(DecROpc), DecReg).
addReg(CounterReg);
36948 BuildMI(fixShadowLoopMBB, MIMD,
TII->get(X86::JCC_1))
36949 .
addMBB(fixShadowLoopMBB)
36969 assert((PVT == MVT::i64 || PVT == MVT::i32) &&
36970 "Invalid Pointer Size!");
36973 (PVT == MVT::i64) ? &X86::GR64RegClass : &X86::GR32RegClass;
36977 Register FP = (PVT == MVT::i64) ? X86::RBP : X86::EBP;
36985 unsigned PtrLoadOpc = (PVT == MVT::i64) ? X86::MOV64rm : X86::MOV32rm;
36986 unsigned IJmpOpc = (PVT == MVT::i64) ? X86::JMP64r : X86::JMP32r;
36992 thisMBB = emitLongJmpShadowStackFix(
MI, thisMBB);
37009 MIB =
BuildMI(*thisMBB,
MI, MIMD,
TII->get(PtrLoadOpc), Tmp);
37013 MIB.
addDisp(MO, LabelOffset);
37014 else if (MO.
isReg())
37023 MIB =
BuildMI(*thisMBB,
MI, MIMD,
TII->get(PtrLoadOpc), SP);
37026 MIB.
addDisp(
MI.getOperand(i), SPOffset);
37028 MIB.
add(
MI.getOperand(i));
37037 MI.eraseFromParent();
37051 assert((PVT == MVT::i64 || PVT == MVT::i32) &&
"Invalid Pointer Size!");
37060 Op = (PVT == MVT::i64) ? X86::MOV64mi32 : X86::MOV32mi;
37063 (PVT == MVT::i64) ? &X86::GR64RegClass : &X86::GR32RegClass;
37064 VR =
MRI->createVirtualRegister(TRC);
37065 Op = (PVT == MVT::i64) ? X86::MOV64mr : X86::MOV32mr;
37067 if (Subtarget.is64Bit())
37103 unsigned MaxCSNum = 0;
37104 for (
auto &
MBB : *MF) {
37109 for (
const auto &
MI :
MBB) {
37110 if (
MI.isDebugInstr())
37113 assert(
MI.isEHLabel() &&
"expected EH_LABEL");
37114 Sym =
MI.getOperand(0).getMCSymbol();
37118 if (!MF->hasCallSiteLandingPad(
Sym))
37121 for (
unsigned CSI : MF->getCallSiteLandingPad(
Sym)) {
37122 CallSiteNumToLPad[CSI].push_back(&
MBB);
37123 MaxCSNum = std::max(MaxCSNum, CSI);
37128 std::vector<MachineBasicBlock *> LPadList;
37132 for (
unsigned CSI = 1; CSI <= MaxCSNum; ++CSI) {
37133 for (
auto &LP : CallSiteNumToLPad[CSI]) {
37134 LPadList.push_back(LP);
37135 InvokeBBs.
insert(LP->pred_begin(), LP->pred_end());
37139 assert(!LPadList.empty() &&
37140 "No landing pad destinations for the dispatch jump table!");
37156 MF->push_back(DispatchBB);
37157 MF->push_back(DispContBB);
37158 MF->push_back(TrapBB);
37162 SetupEntryBlockForSjLj(
MI, BB, DispatchBB, FI);
37173 const bool FPIs64Bit =
37180 unsigned Op = FPIs64Bit ? X86::MOV64rm : X86::MOV32rm;
37185 BuildMI(DispatchBB, MIMD,
TII->get(X86::NOOP))
37190 Register IReg =
MRI->createVirtualRegister(&X86::GR32_NOSPRegClass);
37192 Subtarget.is64Bit() ? 8 : 4);
37193 BuildMI(DispatchBB, MIMD,
TII->get(X86::CMP32ri))
37195 .
addImm(LPadList.size());
37196 BuildMI(DispatchBB, MIMD,
TII->get(X86::JCC_1))
37200 if (Subtarget.is64Bit()) {
37201 Register BReg =
MRI->createVirtualRegister(&X86::GR64RegClass);
37202 Register IReg64 =
MRI->createVirtualRegister(&X86::GR64_NOSPRegClass);
37205 BuildMI(DispContBB, MIMD,
TII->get(X86::LEA64r), BReg)
37212 BuildMI(DispContBB, MIMD,
TII->get(TargetOpcode::SUBREG_TO_REG), IReg64)
37215 .
addImm(X86::sub_32bit);
37220 BuildMI(DispContBB, MIMD,
TII->get(X86::JMP64m))
37228 Register OReg =
MRI->createVirtualRegister(&X86::GR32RegClass);
37229 Register OReg64 =
MRI->createVirtualRegister(&X86::GR64RegClass);
37230 Register TReg =
MRI->createVirtualRegister(&X86::GR64RegClass);
37233 BuildMI(DispContBB, MIMD,
TII->get(X86::MOV32rm), OReg)
37240 BuildMI(DispContBB, MIMD,
TII->get(X86::MOVSX64rr32), OReg64)
37243 BuildMI(DispContBB, MIMD,
TII->get(X86::ADD64rr), TReg)
37255 BuildMI(DispContBB, MIMD,
TII->get(X86::JMP32m))
37265 for (
auto &LP : LPadList)
37266 if (SeenMBBs.
insert(LP).second)
37271 const MCPhysReg *SavedRegs = MF->getRegInfo().getCalleeSavedRegs();
37279 for (
auto *MBBS : Successors) {
37280 if (MBBS->isEHPad()) {
37297 for (
auto &MOp :
II.operands())
37299 DefRegs[MOp.getReg()] =
true;
37302 for (
unsigned RegIdx = 0; SavedRegs[RegIdx]; ++RegIdx) {
37303 unsigned Reg = SavedRegs[RegIdx];
37314 for (
auto &LP : MBBLPads)
37315 LP->setIsEHPad(
false);
37318 MI.eraseFromParent();
37333 unsigned AdjStackDown =
TII.getCallFrameSetupOpcode();
37339 unsigned AdjStackUp =
TII.getCallFrameDestroyOpcode();
37354 auto TMMImmToTMMReg = [](
unsigned Imm) {
37355 assert (Imm < 8 &&
"Illegal tmm index");
37356 return X86::TMM0 + Imm;
37358 auto TMMImmToTMMPair = [](
unsigned Imm) {
37359 assert(Imm < 8 &&
"Illegal tmm pair index.");
37360 return X86::TMM0_TMM1 + Imm / 2;
37362 switch (
MI.getOpcode()) {
37365 case X86::INDIRECT_THUNK_CALL32:
37366 case X86::INDIRECT_THUNK_CALL64:
37367 case X86::INDIRECT_THUNK_TCRETURN32:
37368 case X86::INDIRECT_THUNK_TCRETURN64:
37369 return EmitLoweredIndirectThunk(
MI, BB);
37370 case X86::CATCHRET:
37371 return EmitLoweredCatchRet(
MI, BB);
37372 case X86::SEG_ALLOCA_32:
37373 case X86::SEG_ALLOCA_64:
37374 return EmitLoweredSegAlloca(
MI, BB);
37375 case X86::PROBED_ALLOCA_32:
37376 case X86::PROBED_ALLOCA_64:
37377 return EmitLoweredProbedAlloca(
MI, BB);
37378 case X86::TLSCall_32:
37379 case X86::TLSCall_64:
37380 return EmitLoweredTLSCall(
MI, BB);
37381 case X86::CMOV_FR16:
37382 case X86::CMOV_FR16X:
37383 case X86::CMOV_FR32:
37384 case X86::CMOV_FR32X:
37385 case X86::CMOV_FR64:
37386 case X86::CMOV_FR64X:
37387 case X86::CMOV_GR8:
37388 case X86::CMOV_GR16:
37389 case X86::CMOV_GR32:
37390 case X86::CMOV_RFP32:
37391 case X86::CMOV_RFP64:
37392 case X86::CMOV_RFP80:
37393 case X86::CMOV_VR64:
37394 case X86::CMOV_VR128:
37395 case X86::CMOV_VR128X:
37396 case X86::CMOV_VR256:
37397 case X86::CMOV_VR256X:
37398 case X86::CMOV_VR512:
37399 case X86::CMOV_VK1:
37400 case X86::CMOV_VK2:
37401 case X86::CMOV_VK4:
37402 case X86::CMOV_VK8:
37403 case X86::CMOV_VK16:
37404 case X86::CMOV_VK32:
37405 case X86::CMOV_VK64:
37406 return EmitLoweredSelect(
MI, BB);
37408 case X86::FP80_ADDr:
37409 case X86::FP80_ADDm32: {
37412 int OrigCWFrameIdx =
37432 BuildMI(*BB,
MI, MIMD,
TII->get(TargetOpcode::COPY), NewCW16)
37436 int NewCWFrameIdx =
37447 if (
MI.getOpcode() == X86::FP80_ADDr) {
37449 .
add(
MI.getOperand(0))
37450 .
add(
MI.getOperand(1))
37451 .
add(
MI.getOperand(2));
37454 .
add(
MI.getOperand(0))
37455 .
add(
MI.getOperand(1))
37456 .
add(
MI.getOperand(2))
37457 .
add(
MI.getOperand(3))
37458 .
add(
MI.getOperand(4))
37459 .
add(
MI.getOperand(5))
37460 .
add(
MI.getOperand(6));
37467 MI.eraseFromParent();
37471 case X86::FP32_TO_INT16_IN_MEM:
37472 case X86::FP32_TO_INT32_IN_MEM:
37473 case X86::FP32_TO_INT64_IN_MEM:
37474 case X86::FP64_TO_INT16_IN_MEM:
37475 case X86::FP64_TO_INT32_IN_MEM:
37476 case X86::FP64_TO_INT64_IN_MEM:
37477 case X86::FP80_TO_INT16_IN_MEM:
37478 case X86::FP80_TO_INT32_IN_MEM:
37479 case X86::FP80_TO_INT64_IN_MEM: {
37482 int OrigCWFrameIdx =
37500 BuildMI(*BB,
MI, MIMD,
TII->get(TargetOpcode::COPY), NewCW16)
37504 int NewCWFrameIdx =
37512 TII->get(X86::FLDCW16m)), NewCWFrameIdx);
37516 switch (
MI.getOpcode()) {
37519 case X86::FP32_TO_INT16_IN_MEM: Opc = X86::IST_Fp16m32;
break;
37520 case X86::FP32_TO_INT32_IN_MEM: Opc = X86::IST_Fp32m32;
break;
37521 case X86::FP32_TO_INT64_IN_MEM: Opc = X86::IST_Fp64m32;
break;
37522 case X86::FP64_TO_INT16_IN_MEM: Opc = X86::IST_Fp16m64;
break;
37523 case X86::FP64_TO_INT32_IN_MEM: Opc = X86::IST_Fp32m64;
break;
37524 case X86::FP64_TO_INT64_IN_MEM: Opc = X86::IST_Fp64m64;
break;
37525 case X86::FP80_TO_INT16_IN_MEM: Opc = X86::IST_Fp16m80;
break;
37526 case X86::FP80_TO_INT32_IN_MEM: Opc = X86::IST_Fp32m80;
break;
37527 case X86::FP80_TO_INT64_IN_MEM: Opc = X86::IST_Fp64m80;
break;
37539 MI.eraseFromParent();
37547 case X86::VAARG_64:
37548 case X86::VAARG_X32:
37549 return EmitVAARGWithCustomInserter(
MI, BB);
37551 case X86::EH_SjLj_SetJmp32:
37552 case X86::EH_SjLj_SetJmp64:
37553 return emitEHSjLjSetJmp(
MI, BB);
37555 case X86::EH_SjLj_LongJmp32:
37556 case X86::EH_SjLj_LongJmp64:
37557 return emitEHSjLjLongJmp(
MI, BB);
37559 case X86::Int_eh_sjlj_setup_dispatch:
37560 return EmitSjLjDispatchBlock(
MI, BB);
37562 case TargetOpcode::STATEPOINT:
37567 case TargetOpcode::STACKMAP:
37568 case TargetOpcode::PATCHPOINT:
37571 case TargetOpcode::PATCHABLE_EVENT_CALL:
37572 case TargetOpcode::PATCHABLE_TYPED_EVENT_CALL:
37573 return emitPatchableEventCall(
MI, BB);
37575 case X86::LCMPXCHG8B: {
37588 if (!Subtarget.is32Bit() || !
TRI->hasBasePointer(*MF))
37595 assert(
TRI->getBaseRegister() == X86::ESI &&
37596 "LCMPXCHG8B custom insertion for i686 is written with X86::ESI as a "
37597 "base pointer in mind");
37602 Register computedAddrVReg =
MRI.createVirtualRegister(AddrRegClass);
37607 if (AM.
IndexReg == X86::NoRegister)
37614 while (RMBBI != BB->
rend() &&
37615 (RMBBI->definesRegister(X86::EAX,
nullptr) ||
37616 RMBBI->definesRegister(X86::EBX,
nullptr) ||
37617 RMBBI->definesRegister(X86::ECX,
nullptr) ||
37618 RMBBI->definesRegister(X86::EDX,
nullptr))) {
37623 BuildMI(*BB, *
MBBI, MIMD,
TII->get(X86::LEA32r), computedAddrVReg), AM);
37629 case X86::LCMPXCHG16B_NO_RBX: {
37632 if (
TRI->hasBasePointer(*MF) &&
37633 (BasePtr == X86::RBX || BasePtr == X86::EBX)) {
37639 BuildMI(*BB,
MI, MIMD,
TII->get(TargetOpcode::COPY), SaveRBX)
37643 BuildMI(*BB,
MI, MIMD,
TII->get(X86::LCMPXCHG16B_SAVE_RBX), Dst);
37650 BuildMI(*BB,
MI, MIMD,
TII->get(TargetOpcode::COPY), X86::RBX)
37657 MI.eraseFromParent();
37660 case X86::MWAITX: {
37663 bool IsRBX = (BasePtr == X86::RBX || BasePtr == X86::EBX);
37666 if (!IsRBX || !
TRI->hasBasePointer(*MF)) {
37667 BuildMI(*BB,
MI, MIMD,
TII->get(TargetOpcode::COPY), X86::ECX)
37668 .
addReg(
MI.getOperand(0).getReg());
37669 BuildMI(*BB,
MI, MIMD,
TII->get(TargetOpcode::COPY), X86::EAX)
37670 .
addReg(
MI.getOperand(1).getReg());
37671 BuildMI(*BB,
MI, MIMD,
TII->get(TargetOpcode::COPY), X86::EBX)
37672 .
addReg(
MI.getOperand(2).getReg());
37674 MI.eraseFromParent();
37680 BuildMI(*BB,
MI, MIMD,
TII->get(TargetOpcode::COPY), X86::ECX)
37681 .
addReg(
MI.getOperand(0).getReg());
37682 BuildMI(*BB,
MI, MIMD,
TII->get(TargetOpcode::COPY), X86::EAX)
37683 .
addReg(
MI.getOperand(1).getReg());
37684 assert(Subtarget.is64Bit() &&
"Expected 64-bit mode!");
37688 BuildMI(*BB,
MI, MIMD,
TII->get(TargetOpcode::COPY), SaveRBX)
37692 BuildMI(*BB,
MI, MIMD,
TII->get(X86::MWAITX_SAVE_RBX))
37694 .
addReg(
MI.getOperand(2).getReg())
37696 MI.eraseFromParent();
37700 case TargetOpcode::PREALLOCATED_SETUP: {
37701 assert(Subtarget.is32Bit() &&
"preallocated only used in 32-bit");
37704 int64_t PreallocatedId =
MI.getOperand(0).getImm();
37706 assert(StackAdjustment != 0 &&
"0 stack adjustment");
37708 << StackAdjustment <<
"\n");
37709 BuildMI(*BB,
MI, MIMD,
TII->get(X86::SUB32ri), X86::ESP)
37711 .
addImm(StackAdjustment);
37712 MI.eraseFromParent();
37715 case TargetOpcode::PREALLOCATED_ARG: {
37716 assert(Subtarget.is32Bit() &&
"preallocated calls only used in 32-bit");
37717 int64_t PreallocatedId =
MI.getOperand(1).getImm();
37718 int64_t ArgIdx =
MI.getOperand(2).getImm();
37722 <<
", arg offset " << ArgOffset <<
"\n");
37725 MI.getOperand(0).getReg()),
37726 X86::ESP,
false, ArgOffset);
37727 MI.eraseFromParent();
37730 case X86::PTDPBSSD:
37731 case X86::PTDPBSUD:
37732 case X86::PTDPBUSD:
37733 case X86::PTDPBUUD:
37734 case X86::PTDPBF16PS:
37735 case X86::PTDPFP16PS:
37736 case X86::PTCMMIMFP16PS:
37737 case X86::PTCMMRLFP16PS:
37738 case X86::PTDPBF8PS:
37739 case X86::PTDPBHF8PS:
37740 case X86::PTDPHBF8PS:
37741 case X86::PTDPHF8PS:
37742 case X86::PTTDPBF16PS:
37743 case X86::PTTDPFP16PS:
37744 case X86::PTTCMMIMFP16PS:
37745 case X86::PTTCMMRLFP16PS:
37746 case X86::PTCONJTCMMIMFP16PS:
37747 case X86::PTMMULTF32PS:
37748 case X86::PTTMMULTF32PS: {
37750 switch (
MI.getOpcode()) {
37752 case X86::PTDPBSSD: Opc = X86::TDPBSSD;
break;
37753 case X86::PTDPBSUD: Opc = X86::TDPBSUD;
break;
37754 case X86::PTDPBUSD: Opc = X86::TDPBUSD;
break;
37755 case X86::PTDPBUUD: Opc = X86::TDPBUUD;
break;
37756 case X86::PTDPBF16PS: Opc = X86::TDPBF16PS;
break;
37757 case X86::PTDPFP16PS: Opc = X86::TDPFP16PS;
break;
37758 case X86::PTCMMIMFP16PS:
37759 Opc = X86::TCMMIMFP16PS;
37761 case X86::PTCMMRLFP16PS:
37762 Opc = X86::TCMMRLFP16PS;
37764 case X86::PTDPBF8PS: Opc = X86::TDPBF8PS;
break;
37765 case X86::PTDPBHF8PS: Opc = X86::TDPBHF8PS;
break;
37766 case X86::PTDPHBF8PS: Opc = X86::TDPHBF8PS;
break;
37767 case X86::PTDPHF8PS: Opc = X86::TDPHF8PS;
break;
37768 case X86::PTTDPBF16PS:
37769 Opc = X86::TTDPBF16PS;
37771 case X86::PTTDPFP16PS:
37772 Opc = X86::TTDPFP16PS;
37774 case X86::PTTCMMIMFP16PS:
37775 Opc = X86::TTCMMIMFP16PS;
37777 case X86::PTTCMMRLFP16PS:
37778 Opc = X86::TTCMMRLFP16PS;
37780 case X86::PTCONJTCMMIMFP16PS:
37781 Opc = X86::TCONJTCMMIMFP16PS;
37783 case X86::PTMMULTF32PS:
37784 Opc = X86::TMMULTF32PS;
37786 case X86::PTTMMULTF32PS:
37787 Opc = X86::TTMMULTF32PS;
37797 MI.eraseFromParent();
37800 case X86::PTILEZERO: {
37801 unsigned Imm =
MI.getOperand(0).getImm();
37802 BuildMI(*BB,
MI, MIMD,
TII->get(X86::TILEZERO), TMMImmToTMMReg(Imm));
37803 MI.eraseFromParent();
37808 case X86::PTILEZEROV: {
37813 case X86::PTILELOADDRS:
37814 case X86::PTILELOADDRST1:
37815 case X86::PTILELOADD:
37816 case X86::PTILELOADDT1:
37817 case X86::PTILESTORED: {
37819 switch (
MI.getOpcode()) {
37821#define GET_EGPR_IF_ENABLED(OPC) (Subtarget.hasEGPR() ? OPC##_EVEX : OPC)
37822 case X86::PTILELOADD:
37825 case X86::PTILELOADDT1:
37828 case X86::PTILESTORED:
37831 case X86::PTILELOADDRS:
37834 case X86::PTILELOADDRST1:
37838#undef GET_EGPR_IF_ENABLED
37841 unsigned CurOp = 0;
37842 if (Opc != X86::TILESTORED && Opc != X86::TILESTORED_EVEX)
37843 MIB.
addReg(TMMImmToTMMReg(
MI.getOperand(CurOp++).getImm()),
37846 MIB.
add(
MI.getOperand(CurOp++));
37847 MIB.
add(
MI.getOperand(CurOp++));
37848 MIB.
add(
MI.getOperand(CurOp++));
37849 MIB.
add(
MI.getOperand(CurOp++));
37850 MIB.
add(
MI.getOperand(CurOp++));
37852 if (Opc == X86::TILESTORED || Opc == X86::TILESTORED_EVEX)
37853 MIB.
addReg(TMMImmToTMMReg(
MI.getOperand(CurOp++).getImm()),
37856 MI.eraseFromParent();
37859 case X86::PT2RPNTLVWZ0:
37860 case X86::PT2RPNTLVWZ0T1:
37861 case X86::PT2RPNTLVWZ1:
37862 case X86::PT2RPNTLVWZ1T1:
37863 case X86::PT2RPNTLVWZ0RS:
37864 case X86::PT2RPNTLVWZ0RST1:
37865 case X86::PT2RPNTLVWZ1RS:
37866 case X86::PT2RPNTLVWZ1RST1: {
37869#define GET_EGPR_IF_ENABLED(OPC) (Subtarget.hasEGPR() ? OPC##_EVEX : OPC)
37870 switch (
MI.getOpcode()) {
37873 case X86::PT2RPNTLVWZ0:
37876 case X86::PT2RPNTLVWZ0T1:
37879 case X86::PT2RPNTLVWZ1:
37882 case X86::PT2RPNTLVWZ1T1:
37885 case X86::PT2RPNTLVWZ0RS:
37888 case X86::PT2RPNTLVWZ0RST1:
37891 case X86::PT2RPNTLVWZ1RS:
37894 case X86::PT2RPNTLVWZ1RST1:
37898#undef GET_EGPR_IF_ENABLED
37902 MIB.
add(
MI.getOperand(1));
37903 MIB.
add(
MI.getOperand(2));
37904 MIB.
add(
MI.getOperand(3));
37905 MIB.
add(
MI.getOperand(4));
37906 MIB.
add(
MI.getOperand(5));
37907 MI.eraseFromParent();
37910 case X86::PTTRANSPOSED:
37911 case X86::PTCONJTFP16: {
37913 unsigned Opc =
MI.getOpcode() == X86::PTTRANSPOSED ? X86::TTRANSPOSED
37920 MI.eraseFromParent();
37923 case X86::PTCVTROWPS2BF16Hrri:
37924 case X86::PTCVTROWPS2BF16Lrri:
37925 case X86::PTCVTROWPS2PHHrri:
37926 case X86::PTCVTROWPS2PHLrri:
37927 case X86::PTCVTROWD2PSrri:
37928 case X86::PTILEMOVROWrri: {
37931 switch (
MI.getOpcode()) {
37934 case X86::PTCVTROWD2PSrri:
37935 Opc = X86::TCVTROWD2PSrri;
37937 case X86::PTCVTROWPS2BF16Hrri:
37938 Opc = X86::TCVTROWPS2BF16Hrri;
37940 case X86::PTCVTROWPS2PHHrri:
37941 Opc = X86::TCVTROWPS2PHHrri;
37943 case X86::PTCVTROWPS2BF16Lrri:
37944 Opc = X86::TCVTROWPS2BF16Lrri;
37946 case X86::PTCVTROWPS2PHLrri:
37947 Opc = X86::TCVTROWPS2PHLrri;
37949 case X86::PTILEMOVROWrri:
37950 Opc = X86::TILEMOVROWrri;
37954 MIB.
add(
MI.getOperand(0));
37956 MIB.
addImm(
MI.getOperand(2).getImm());
37958 MI.eraseFromParent();
37961 case X86::PTCVTROWPS2BF16Hrre:
37962 case X86::PTCVTROWPS2BF16Lrre:
37963 case X86::PTCVTROWPS2PHHrre:
37964 case X86::PTCVTROWPS2PHLrre:
37965 case X86::PTCVTROWD2PSrre:
37966 case X86::PTILEMOVROWrre: {
37969 switch (
MI.getOpcode()) {
37972 case X86::PTCVTROWD2PSrre:
37973 Opc = X86::TCVTROWD2PSrre;
37975 case X86::PTCVTROWPS2BF16Hrre:
37976 Opc = X86::TCVTROWPS2BF16Hrre;
37978 case X86::PTCVTROWPS2BF16Lrre:
37979 Opc = X86::TCVTROWPS2BF16Lrre;
37981 case X86::PTCVTROWPS2PHHrre:
37982 Opc = X86::TCVTROWPS2PHHrre;
37984 case X86::PTCVTROWPS2PHLrre:
37985 Opc = X86::TCVTROWPS2PHLrre;
37987 case X86::PTILEMOVROWrre:
37988 Opc = X86::TILEMOVROWrre;
37992 MIB.
add(
MI.getOperand(0));
37994 MIB.
add(
MI.getOperand(2));
37996 MI.eraseFromParent();
38009 const APInt &DemandedElts,
38011 EVT VT =
Op.getValueType();
38012 unsigned Opcode =
Op.getOpcode();
38019 auto NeedsSignExtension = [&](
SDValue V,
unsigned ActiveBits) {
38022 for (
unsigned i = 0, e = V.getNumOperands(); i != e; ++i) {
38023 if (!DemandedElts[i] || V.getOperand(i).isUndef())
38025 const APInt &Val = V.getConstantOperandAPInt(i);
38035 if (EltSize > ActiveBits && EltSize > 1 &&
isTypeLegal(VT) &&
38037 NeedsSignExtension(
Op.getOperand(1), ActiveBits)) {
38061 const APInt &Mask =
C->getAPIntValue();
38076 Width = std::min(Width, EltSize);
38083 if (ZeroExtendMask == Mask)
38100 const APInt &DemandedElts,
38103 unsigned NumSrcElts =
LHS.getValueType().getVectorNumElements();
38112 Known = Known.
zext(64);
38117 const APInt &DemandedElts,
38120 unsigned NumSrcElts =
LHS.getValueType().getVectorNumElements();
38124 APInt DemandedLoElts =
38126 APInt DemandedHiElts =
38139 const APInt &DemandedElts,
38142 unsigned NumSrcElts =
LHS.getValueType().getVectorNumElements();
38147 APInt DemandedLoElts =
38149 APInt DemandedHiElts =
38165 APInt DemandedEltsLHS, DemandedEltsRHS;
38167 DemandedElts, DemandedEltsLHS,
38170 const auto ComputeForSingleOpFunc =
38172 return KnownBitsFunc(
38177 if (DemandedEltsRHS.
isZero())
38178 return ComputeForSingleOpFunc(
Op.getOperand(0), DemandedEltsLHS);
38179 if (DemandedEltsLHS.
isZero())
38180 return ComputeForSingleOpFunc(
Op.getOperand(1), DemandedEltsRHS);
38182 return ComputeForSingleOpFunc(
Op.getOperand(0), DemandedEltsLHS)
38183 .intersectWith(ComputeForSingleOpFunc(
Op.getOperand(1), DemandedEltsRHS));
38188 const APInt &DemandedElts,
38190 unsigned Depth)
const {
38193 unsigned Opc =
Op.getOpcode();
38194 EVT VT =
Op.getValueType();
38199 "Should use MaskedValueIsZero if you don't know whether Op"
38200 " is a target node!");
38222 }
else if (!
Op.getOperand(0).isUndef()) {
38232 if (!
Op.getOperand(0).isUndef() &&
38244 unsigned NumLoBits =
Op.getOperand(0).getValueType().getVectorNumElements();
38251 EVT SrcVT = Src.getValueType();
38253 Op.getConstantOperandVal(1));
38262 unsigned ShAmt =
Op.getConstantOperandVal(1);
38276 Known.
Zero <<= ShAmt;
38277 Known.
One <<= ShAmt;
38293 APInt DemandedLHS, DemandedRHS;
38300 if (!!DemandedLHS) {
38304 if (!!DemandedRHS) {
38329 if (!Src.getSimpleValueType().isVector()) {
38336 if (
Op.getResNo() == 0) {
38366 LHS.getValueType() ==
RHS.getValueType() &&
38367 LHS.getValueType().getScalarType() == MVT::i8 &&
38368 "Unexpected PSADBW types");
38393 LHS.getValueType() ==
RHS.getValueType() &&
38394 LHS.getValueType().getVectorElementType() == MVT::i16 &&
38395 "Unexpected PMADDWD types");
38403 LHS.getValueType() ==
RHS.getValueType() &&
38404 LHS.getValueType().getVectorElementType() == MVT::i8 &&
38405 "Unexpected PMADDUBSW types");
38435 if (
auto* Cst1 = dyn_cast<ConstantSDNode>(Op1)) {
38436 unsigned Shift = Cst1->getAPIntValue().extractBitsAsZExtValue(8, 0);
38437 unsigned Length = Cst1->getAPIntValue().extractBitsAsZExtValue(8, 8);
38494 EVT SrcVT =
Op.getOperand(0).getValueType();
38497 if (NumElts > NumSrcElts && DemandedElts.
countr_zero() >= NumSrcElts)
38509 EVT SrcVT =
Op.getOperand(1).getValueType();
38512 if (NumElts > NumSrcElts && DemandedElts.
countr_zero() >= NumSrcElts)
38531 for (
unsigned I = 0;
I != NumElts; ++
I) {
38532 if (!DemandedElts[
I])
38534 if (UndefElts[
I]) {
38548 Op, DemandedElts,
Depth, DAG,
38552 KnownLHS, KnownRHS);
38557 switch (
Op->getConstantOperandVal(0)) {
38558 case Intrinsic::x86_sse2_pmadd_wd:
38559 case Intrinsic::x86_avx2_pmadd_wd:
38560 case Intrinsic::x86_avx512_pmaddw_d_512: {
38564 LHS.getValueType() ==
RHS.getValueType() &&
38565 LHS.getValueType().getScalarType() == MVT::i16 &&
38566 "Unexpected PMADDWD types");
38570 case Intrinsic::x86_ssse3_pmadd_ub_sw_128:
38571 case Intrinsic::x86_avx2_pmadd_ub_sw:
38572 case Intrinsic::x86_avx512_pmaddubs_w_512: {
38576 LHS.getValueType() ==
RHS.getValueType() &&
38577 LHS.getValueType().getScalarType() == MVT::i8 &&
38578 "Unexpected PMADDUBSW types");
38582 case Intrinsic::x86_sse2_psad_bw:
38583 case Intrinsic::x86_avx2_psad_bw:
38584 case Intrinsic::x86_avx512_psad_bw_512: {
38588 LHS.getValueType() ==
RHS.getValueType() &&
38589 LHS.getValueType().getScalarType() == MVT::i8 &&
38590 "Unexpected PSADBW types");
38605 unsigned NumOps = Ops.
size();
38607 if (Mask.size() == NumElts) {
38610 for (
unsigned i = 0; i != NumElts; ++i) {
38611 if (!DemandedElts[i])
38624 assert(0 <= M && (
unsigned)M < (NumOps * NumElts) &&
38625 "Shuffle index out of range");
38627 unsigned OpIdx = (
unsigned)M / NumElts;
38628 unsigned EltIdx = (
unsigned)M % NumElts;
38634 DemandedOps[OpIdx].setBit(EltIdx);
38637 for (
unsigned i = 0; i != NumOps && !Known.
isUnknown(); ++i) {
38638 if (!DemandedOps[i])
38651 unsigned Depth)
const {
38652 EVT VT =
Op.getValueType();
38654 unsigned Opcode =
Op.getOpcode();
38662 MVT SrcVT = Src.getSimpleValueType();
38664 assert(VTBits < NumSrcBits &&
"Illegal truncation input type");
38667 if (Tmp > (NumSrcBits - VTBits))
38668 return Tmp - (NumSrcBits - VTBits);
38674 APInt DemandedLHS, DemandedRHS;
38680 auto NumSignBitsPACKSS = [&](
SDValue V,
const APInt &Elts) ->
unsigned {
38684 V.getScalarValueSizeInBits() == 32) {
38696 unsigned SrcBits =
Op.getOperand(0).getScalarValueSizeInBits();
38697 unsigned Tmp0 = SrcBits, Tmp1 = SrcBits;
38699 Tmp0 = NumSignBitsPACKSS(
Op.getOperand(0), DemandedLHS);
38701 Tmp1 = NumSignBitsPACKSS(
Op.getOperand(1), DemandedRHS);
38702 unsigned Tmp = std::min(Tmp0, Tmp1);
38703 if (Tmp > (SrcBits - VTBits))
38704 return Tmp - (SrcBits - VTBits);
38710 if (!Src.getSimpleValueType().isVector())
38717 const APInt &ShiftVal =
Op.getConstantOperandAPInt(1);
38718 if (ShiftVal.
uge(VTBits))
38721 if (ShiftVal.
uge(Tmp))
38728 APInt ShiftVal =
Op.getConstantOperandAPInt(1);
38729 if (ShiftVal.
uge(VTBits - 1))
38738 if (VT == MVT::f32 || VT == MVT::f64 ||
38739 ((VT == MVT::v4f32 || VT == MVT::v2f64) && DemandedElts == 1))
38754 if (Tmp0 == 1)
return 1;
38757 return std::min(Tmp0, Tmp1);
38762 if (Tmp0 == 1)
return 1;
38764 return std::min(Tmp0, Tmp1);
38774 unsigned NumOps = Ops.
size();
38776 if (Mask.size() == NumElts) {
38778 for (
unsigned i = 0; i != NumElts; ++i) {
38779 if (!DemandedElts[i])
38790 assert(0 <= M && (
unsigned)M < (NumOps * NumElts) &&
38791 "Shuffle index out of range");
38793 unsigned OpIdx = (
unsigned)M / NumElts;
38794 unsigned EltIdx = (
unsigned)M % NumElts;
38799 DemandedOps[OpIdx].setBit(EltIdx);
38801 unsigned Tmp0 = VTBits;
38802 for (
unsigned i = 0; i != NumOps && Tmp0 > 1; ++i) {
38803 if (!DemandedOps[i])
38807 Tmp0 = std::min(Tmp0, Tmp1);
38820 return N->getOperand(0);
38843 bool AllowFloatDomain,
bool AllowIntDomain,
38846 MVT &SrcVT,
MVT &DstVT) {
38847 unsigned NumMaskElts = Mask.size();
38851 if (Mask[0] == 0 &&
38852 (MaskEltSize == 32 || (MaskEltSize == 16 && Subtarget.hasFP16()))) {
38857 if (MaskEltSize == 16)
38860 SrcVT = DstVT = !Subtarget.
hasSSE2() ? MVT::v4f32 : MaskVT;
38869 unsigned MaxScale = 64 / MaskEltSize;
38872 for (
unsigned Scale = 2; Scale <= MaxScale; Scale *= 2) {
38873 bool MatchAny =
true;
38874 bool MatchZero =
true;
38875 bool MatchSign = UseSign;
38876 unsigned NumDstElts = NumMaskElts / Scale;
38877 for (
unsigned i = 0;
38878 i != NumDstElts && (MatchAny || MatchSign || MatchZero); ++i) {
38880 MatchAny = MatchSign = MatchZero =
false;
38883 unsigned Pos = (i * Scale) + 1;
38884 unsigned Len = Scale - 1;
38889 if (MatchAny || MatchSign || MatchZero) {
38890 assert((MatchSign || MatchZero) &&
38891 "Failed to match sext/zext but matched aext?");
38892 unsigned SrcSize = std::max(128u, NumDstElts * MaskEltSize);
38911 if (((MaskEltSize == 32) || (MaskEltSize == 64 && Subtarget.
hasSSE2()) ||
38912 (MaskEltSize == 16 && Subtarget.hasFP16())) &&
38916 if (MaskEltSize == 16)
38919 SrcVT = DstVT = !Subtarget.
hasSSE2() ? MVT::v4f32 : MaskVT;
38929 SrcVT = DstVT = MVT::v2f64;
38934 SrcVT = DstVT = MVT::v4f32;
38939 SrcVT = DstVT = MVT::v4f32;
38945 assert(Subtarget.
hasAVX() &&
"AVX required for 256-bit vector shuffles");
38948 SrcVT = DstVT = MVT::v4f64;
38951 if (
isTargetShuffleEquivalent(MaskVT, Mask, {0, 0, 2, 2, 4, 4, 6, 6}, DAG,
38954 SrcVT = DstVT = MVT::v8f32;
38957 if (
isTargetShuffleEquivalent(MaskVT, Mask, {1, 1, 3, 3, 5, 5, 7, 7}, DAG,
38960 SrcVT = DstVT = MVT::v8f32;
38967 "AVX512 required for 512-bit vector shuffles");
38968 if (
isTargetShuffleEquivalent(MaskVT, Mask, {0, 0, 2, 2, 4, 4, 6, 6}, DAG,
38971 SrcVT = DstVT = MVT::v8f64;
38976 {0, 0, 2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12, 14, 14}, DAG, V1)) {
38978 SrcVT = DstVT = MVT::v16f32;
38983 {1, 1, 3, 3, 5, 5, 7, 7, 9, 9, 11, 11, 13, 13, 15, 15}, DAG, V1)) {
38985 SrcVT = DstVT = MVT::v16f32;
38997 const APInt &Zeroable,
38998 bool AllowFloatDomain,
bool AllowIntDomain,
39001 unsigned &Shuffle,
MVT &ShuffleVT,
39002 unsigned &PermuteImm) {
39003 unsigned NumMaskElts = Mask.size();
39005 unsigned MaskScalarSizeInBits = InputSizeInBits / NumMaskElts;
39010 if (!ContainsZeros && MaskScalarSizeInBits == 64) {
39016 ShuffleVT = (AllowFloatDomain ? MVT::v4f64 : MVT::v4i64);
39024 ShuffleVT = (AllowFloatDomain ? MVT::v8f64 : MVT::v8i64);
39029 }
else if (AllowFloatDomain && Subtarget.
hasAVX()) {
39034 for (
int i = 0, e = Mask.size(); i != e; ++i) {
39038 assert(((M / 2) == (i / 2)) &&
"Out of range shuffle mask index");
39039 PermuteImm |= (M & 1) << i;
39047 for (
unsigned Order = 0; Order < 2; ++Order) {
39048 if (Subtarget.preferLowerShuffleAsShift() ? (Order == 1) : (Order == 0)) {
39052 if ((MaskScalarSizeInBits == 64 || MaskScalarSizeInBits == 32) &&
39053 !ContainsZeros && (AllowIntDomain || Subtarget.
hasAVX())) {
39058 if (MaskScalarSizeInBits == 64)
39062 ShuffleVT = (AllowIntDomain ? MVT::i32 : MVT::f32);
39070 if (!ContainsZeros && AllowIntDomain && MaskScalarSizeInBits == 16 &&
39092 int OffsetHiMask[4];
39093 for (
int i = 0; i != 4; ++i)
39094 OffsetHiMask[i] = (HiMask[i] < 0 ? HiMask[i] : HiMask[i] - 4);
39105 if (!ContainsZeros && AllowIntDomain && MaskScalarSizeInBits < 64 &&
39110 if (0 < RotateAmt) {
39118 if (AllowIntDomain &&
39124 Zeroable, Subtarget);
39125 if (0 < ShiftAmt && (!ShuffleVT.
is512BitVector() || Subtarget.hasBWI() ||
39146 bool AllowFloatDomain,
bool AllowIntDomain,
39149 unsigned &Shuffle,
MVT &SrcVT,
MVT &DstVT,
39151 unsigned NumMaskElts = Mask.size();
39157 AllowFloatDomain) {
39161 SrcVT = DstVT = Subtarget.
hasSSE2() ? MVT::v2f64 : MVT::v4f32;
39165 AllowFloatDomain) {
39168 SrcVT = DstVT = Subtarget.
hasSSE2() ? MVT::v2f64 : MVT::v4f32;
39172 Subtarget.
hasSSE2() && (AllowFloatDomain || !Subtarget.
hasSSE41())) {
39175 SrcVT = DstVT = MVT::v2f64;
39179 (AllowFloatDomain || !Subtarget.
hasSSE41())) {
39181 SrcVT = DstVT = MVT::v4f32;
39184 if (
isTargetShuffleEquivalent(MaskVT, Mask, {8, 1, 2, 3, 4, 5, 6, 7},
39186 Subtarget.hasFP16()) {
39188 SrcVT = DstVT = MVT::v8f16;
39194 if (((MaskVT == MVT::v8i16 || MaskVT == MVT::v16i8) && Subtarget.
hasSSE2()) ||
39195 ((MaskVT == MVT::v16i16 || MaskVT == MVT::v32i8) && Subtarget.
hasInt256()) ||
39196 ((MaskVT == MVT::v32i16 || MaskVT == MVT::v64i8) && Subtarget.hasBWI())) {
39204 if (MaskVT == MVT::v4i32 && Subtarget.
hasSSE2() &&
39207 V2.getScalarValueSizeInBits() == 64) {
39211 if (Subtarget.
hasSSE41() && MinLZV1 >= 48 && MinLZV2 >= 48) {
39212 SrcVT = MVT::v4i32;
39213 DstVT = MVT::v8i16;
39218 if (MinLZV1 >= 56 && MinLZV2 >= 56) {
39219 SrcVT = MVT::v8i16;
39220 DstVT = MVT::v16i8;
39226 SrcVT = MVT::v4i32;
39227 DstVT = MVT::v8i16;
39234 if ((MaskVT == MVT::v4f32 && Subtarget.
hasSSE1()) ||
39239 (32 <= EltSizeInBits || Subtarget.hasBWI()))) {
39242 SrcVT = DstVT = MaskVT;
39244 SrcVT = DstVT = (32 == EltSizeInBits ? MVT::v8f32 : MVT::v4f64);
39253 SizeInBits == V2.getValueSizeInBits() &&
39255 (EltSizeInBits % V2.getScalarValueSizeInBits()) == 0) {
39256 bool IsBlend =
true;
39258 unsigned NumV2Elts = V2.getValueType().getVectorNumElements();
39259 unsigned Scale1 = NumV1Elts / NumMaskElts;
39260 unsigned Scale2 = NumV2Elts / NumMaskElts;
39263 for (
unsigned i = 0; i != NumMaskElts; ++i) {
39268 DemandedZeroV1.
setBits(i * Scale1, (i + 1) * Scale1);
39269 DemandedZeroV2.
setBits(i * Scale2, (i + 1) * Scale2);
39273 DemandedZeroV2.
setBits(i * Scale2, (i + 1) * Scale2);
39276 if (M == (
int)(i + NumMaskElts)) {
39277 DemandedZeroV1.
setBits(i * Scale1, (i + 1) * Scale1);
39290 if (NumV1Elts == NumV2Elts && NumV1Elts == NumMaskElts) {
39294 auto computeKnownBitsElementWise = [&DAG](
SDValue V) {
39295 unsigned NumElts = V.getValueType().getVectorNumElements();
39297 for (
unsigned EltIdx = 0; EltIdx != NumElts; ++EltIdx) {
39300 if (PeepholeKnown.
isZero())
39308 KnownBits V1Known = computeKnownBitsElementWise(V1);
39309 KnownBits V2Known = computeKnownBitsElementWise(V2);
39311 for (
unsigned i = 0; i != NumMaskElts && IsBlend; ++i) {
39316 IsBlend &= V1Known.
Zero[i] && V2Known.
Zero[i];
39320 IsBlend &= V2Known.
Zero[i] || V1Known.
One[i];
39323 if (M == (
int)(i + NumMaskElts)) {
39324 IsBlend &= V1Known.
Zero[i] || V2Known.
One[i];
39343 bool AllowFloatDomain,
bool AllowIntDomain,
SDValue &V1,
SDValue &V2,
39345 unsigned &Shuffle,
MVT &ShuffleVT,
unsigned &PermuteImm) {
39346 unsigned NumMaskElts = Mask.size();
39350 if (AllowIntDomain && (EltSizeInBits == 64 || EltSizeInBits == 32) &&
39356 if (0 < Rotation) {
39358 if (EltSizeInBits == 64)
39362 PermuteImm = Rotation;
39373 if (0 < ByteRotation) {
39376 PermuteImm = ByteRotation;
39384 (MaskVT == MVT::v16i16 && Subtarget.
hasAVX2())) {
39386 bool ForceV1Zero =
false, ForceV2Zero =
false;
39389 ForceV2Zero, BlendMask)) {
39390 if (MaskVT == MVT::v16i16) {
39396 "Repeated mask size doesn't match!");
39398 for (
int i = 0; i < 8; ++i)
39399 if (RepeatedMask[i] >= 8)
39400 PermuteImm |= 1 << i;
39404 ShuffleVT = MaskVT;
39412 ShuffleVT = MaskVT;
39420 if (AllowFloatDomain && EltSizeInBits == 32 && Subtarget.
hasSSE41() &&
39424 ShuffleVT = MVT::v4f32;
39429 if (AllowFloatDomain && EltSizeInBits == 64 &&
39433 bool ForceV1Zero =
false, ForceV2Zero =
false;
39435 PermuteImm, Mask, Zeroable)) {
39445 if (AllowFloatDomain && EltSizeInBits == 32 &&
39453 auto MatchHalf = [&](
unsigned Offset,
int &S0,
int &
S1) {
39455 int M1 = RepeatedMask[
Offset + 1];
39476 int ShufMask[4] = {-1, -1, -1, -1};
39477 SDValue Lo = MatchHalf(0, ShufMask[0], ShufMask[1]);
39478 SDValue Hi = MatchHalf(2, ShufMask[2], ShufMask[3]);
39492 if (AllowFloatDomain && EltSizeInBits == 32 && Subtarget.
hasSSE41() &&
39496 ShuffleVT = MVT::v4f32;
39505 bool HasVariableMask,
bool AllowVariableCrossLaneMask,
39520 bool HasVariableMask,
39521 bool AllowVariableCrossLaneMask,
39522 bool AllowVariablePerLaneMask,
39525 assert(!BaseMask.
empty() &&
"Cannot combine an empty shuffle mask!");
39527 "Unexpected number of shuffle inputs!");
39535 auto CanonicalizeShuffleInput = [&](
MVT VT,
SDValue Op) {
39545 bool UnaryShuffle = (Inputs.
size() == 1);
39551 MVT VT2 = V2.getSimpleValueType();
39553 (RootSizeInBits % VT2.
getSizeInBits()) == 0 &&
"Vector size mismatch");
39557 unsigned NumBaseMaskElts = BaseMask.
size();
39558 if (NumBaseMaskElts == 1) {
39559 assert(BaseMask[0] == 0 &&
"Invalid shuffle index found!");
39560 return CanonicalizeShuffleInput(RootVT, V1);
39564 unsigned BaseMaskEltSizeInBits = RootSizeInBits / NumBaseMaskElts;
39572 bool IsMaskedShuffle =
false;
39573 if (RootSizeInBits == 512 || (Subtarget.hasVLX() && RootSizeInBits >= 128)) {
39576 IsMaskedShuffle =
true;
39583 if (UnaryShuffle && !
isAnyZero(BaseMask) &&
39587 return CanonicalizeShuffleInput(RootVT, V1);
39597 if (Mask.size() <= NumElts &&
39599 for (
unsigned i = 0; i != NumElts; ++i)
39603 return CanonicalizeShuffleInput(RootVT, V1);
39609 (NumBaseMaskElts == 2 || NumBaseMaskElts == 4)) {
39617 "Unexpected lane shuffle");
39618 Res = CanonicalizeShuffleInput(RootVT, V1);
39619 unsigned SubIdx = Mask[0] * (NumRootElts / NumBaseMaskElts);
39627 assert((BaseMaskEltSizeInBits % 128) == 0 &&
"Illegal mask size");
39631 auto MatchSHUF128 = [&](
MVT ShuffleVT,
const SDLoc &
DL,
39634 int PermMask[4] = {-1, -1, -1, -1};
39637 for (
int i = 0; i < 4; ++i) {
39638 assert(ScaledMask[i] >= -1 &&
"Illegal shuffle sentinel value");
39639 if (ScaledMask[i] < 0)
39642 SDValue Op = ScaledMask[i] >= 4 ? V2 : V1;
39649 PermMask[i] = ScaledMask[i] % 4;
39653 CanonicalizeShuffleInput(ShuffleVT, Ops[0]),
39654 CanonicalizeShuffleInput(ShuffleVT, Ops[1]),
39661 bool PreferPERMQ = UnaryShuffle &&
isUndefOrInRange(ScaledMask[0], 0, 2) &&
39665 (ScaledMask[0] < 0 || ScaledMask[2] < 0 ||
39666 ScaledMask[0] == (ScaledMask[2] % 2)) &&
39667 (ScaledMask[1] < 0 || ScaledMask[3] < 0 ||
39668 ScaledMask[1] == (ScaledMask[3] % 2));
39670 if (!
isAnyZero(ScaledMask) && !PreferPERMQ) {
39673 MVT ShuffleVT = (FloatDomain ? MVT::v8f64 : MVT::v8i64);
39674 if (
SDValue V = MatchSHUF128(ShuffleVT,
DL, ScaledMask, V1, V2, DAG))
39688 Res = CanonicalizeShuffleInput(RootVT, V1);
39697 if (BaseMask[0] == 0 && (BaseMask[1] == 0 || BaseMask[1] == 2) &&
39701 SDValue Lo = CanonicalizeShuffleInput(RootVT, V1);
39702 SDValue Hi = CanonicalizeShuffleInput(RootVT, BaseMask[1] == 0 ? V1 : V2);
39713 if (UnaryShuffle &&
39716 unsigned PermMask = 0;
39717 PermMask |= ((Mask[0] < 0 ? 0x8 : (Mask[0] & 1)) << 0);
39718 PermMask |= ((Mask[1] < 0 ? 0x8 : (Mask[1] & 1)) << 4);
39728 if (!UnaryShuffle && !IsMaskedShuffle) {
39730 "Unexpected shuffle sentinel value");
39732 if (!((Mask[0] == 0 && Mask[1] == 3) || (Mask[0] == 2 && Mask[1] == 1))) {
39733 unsigned PermMask = 0;
39734 PermMask |= ((Mask[0] & 3) << 0);
39735 PermMask |= ((Mask[1] & 3) << 4);
39739 CanonicalizeShuffleInput(RootVT,
LHS),
39740 CanonicalizeShuffleInput(RootVT,
RHS),
39748 if (BaseMaskEltSizeInBits > 64) {
39749 assert((BaseMaskEltSizeInBits % 64) == 0 &&
"Illegal mask size");
39750 int MaskScale = BaseMaskEltSizeInBits / 64;
39753 Mask = std::move(ScaledMask);
39759 if (IsMaskedShuffle && NumRootElts > Mask.size()) {
39760 assert((NumRootElts % Mask.size()) == 0 &&
"Illegal mask size");
39761 int MaskScale = NumRootElts / Mask.size();
39764 Mask = std::move(ScaledMask);
39767 unsigned NumMaskElts = Mask.size();
39768 unsigned MaskEltSizeInBits = RootSizeInBits / NumMaskElts;
39772 FloatDomain &= (32 <= MaskEltSizeInBits);
39782 MVT ShuffleSrcVT, ShuffleVT;
39783 unsigned Shuffle, PermuteImm;
39788 bool AllowFloatDomain = FloatDomain || (
Depth >= 3);
39789 bool AllowIntDomain = (!FloatDomain || (
Depth >= 3)) && Subtarget.
hasSSE2() &&
39793 APInt KnownUndef, KnownZero;
39795 APInt Zeroable = KnownUndef | KnownZero;
39797 if (UnaryShuffle) {
39801 (Subtarget.
hasAVX() && 32 <= MaskEltSizeInBits)) &&
39802 (!IsMaskedShuffle || NumRootElts == NumMaskElts)) {
39816 Res = CanonicalizeShuffleInput(MaskVT, V1);
39824 DAG, Subtarget, Shuffle, ShuffleSrcVT, ShuffleVT) &&
39825 (!IsMaskedShuffle ||
39829 Res = CanonicalizeShuffleInput(ShuffleSrcVT, V1);
39830 Res = DAG.
getNode(Shuffle,
DL, ShuffleVT, Res);
39835 AllowIntDomain, DAG, Subtarget, Shuffle, ShuffleVT,
39837 (!IsMaskedShuffle ||
39841 Res = CanonicalizeShuffleInput(ShuffleVT, V1);
39842 Res = DAG.
getNode(Shuffle,
DL, ShuffleVT, Res,
39851 if (!UnaryShuffle && AllowFloatDomain && RootSizeInBits == 128 &&
39854 if (MaskEltSizeInBits == 32) {
39855 SDValue SrcV1 = V1, SrcV2 = V2;
39862 CanonicalizeShuffleInput(MVT::v4f32, SrcV1),
39863 CanonicalizeShuffleInput(MVT::v4f32, SrcV2),
39868 if (MaskEltSizeInBits == 64 &&
39871 V2.getScalarValueSizeInBits() <= 32) {
39874 PermuteImm = ( 2 << 4) | ( 0 << 0);
39876 CanonicalizeShuffleInput(MVT::v4f32, V1),
39877 CanonicalizeShuffleInput(MVT::v4f32, V2),
39886 NewV2,
DL, DAG, Subtarget, Shuffle, ShuffleSrcVT,
39887 ShuffleVT, UnaryShuffle) &&
39891 NewV1 = CanonicalizeShuffleInput(ShuffleSrcVT, NewV1);
39892 NewV2 = CanonicalizeShuffleInput(ShuffleSrcVT, NewV2);
39893 Res = DAG.
getNode(Shuffle,
DL, ShuffleVT, NewV1, NewV2);
39900 AllowIntDomain, NewV1, NewV2,
DL, DAG,
39901 Subtarget, Shuffle, ShuffleVT, PermuteImm) &&
39905 NewV1 = CanonicalizeShuffleInput(ShuffleVT, NewV1);
39906 NewV2 = CanonicalizeShuffleInput(ShuffleVT, NewV2);
39907 Res = DAG.
getNode(Shuffle,
DL, ShuffleVT, NewV1, NewV2,
39917 if (Subtarget.hasSSE4A() && AllowIntDomain && RootSizeInBits == 128) {
39923 V1 = CanonicalizeShuffleInput(IntMaskVT, V1);
39933 V1 = CanonicalizeShuffleInput(IntMaskVT, V1);
39934 V2 = CanonicalizeShuffleInput(IntMaskVT, V2);
39943 if (AllowIntDomain && MaskEltSizeInBits < 64 && Subtarget.
hasAVX512()) {
39953 V1 = CanonicalizeShuffleInput(ShuffleSrcVT, V1);
39954 Res = DAG.
getNode(Opc,
DL, ShuffleVT, V1);
39961 if (RootSizeInBits < 512 &&
39964 (MaskEltSizeInBits > 8 || Subtarget.hasBWI()) &&
39974 V1 = CanonicalizeShuffleInput(ShuffleSrcVT, V1);
39975 V2 = CanonicalizeShuffleInput(ShuffleSrcVT, V2);
39990 int VariableCrossLaneShuffleDepth =
39991 Subtarget.hasFastVariableCrossLaneShuffle() ? 1 : 2;
39992 int VariablePerLaneShuffleDepth =
39993 Subtarget.hasFastVariablePerLaneShuffle() ? 1 : 2;
39994 AllowVariableCrossLaneMask &=
39995 (
Depth >= VariableCrossLaneShuffleDepth) || HasVariableMask;
39996 AllowVariablePerLaneMask &=
39997 (
Depth >= VariablePerLaneShuffleDepth) || HasVariableMask;
40000 bool AllowBWIVPERMV3 =
40001 (
Depth >= (VariableCrossLaneShuffleDepth + 2) || HasVariableMask);
40005 AllowVariableCrossLaneMask = AllowVariablePerLaneMask =
true;
40007 bool MaskContainsZeros =
isAnyZero(Mask);
40011 if (UnaryShuffle && AllowVariableCrossLaneMask && !MaskContainsZeros) {
40013 (MaskVT == MVT::v8f32 || MaskVT == MVT::v8i32)) {
40015 Res = CanonicalizeShuffleInput(MaskVT, V1);
40021 (MaskVT == MVT::v8f64 || MaskVT == MVT::v8i64 ||
40022 MaskVT == MVT::v16f32 || MaskVT == MVT::v16i32)) ||
40023 (Subtarget.hasBWI() &&
40024 (MaskVT == MVT::v16i16 || MaskVT == MVT::v32i16)) ||
40025 (Subtarget.hasVBMI() &&
40026 (MaskVT == MVT::v32i8 || MaskVT == MVT::v64i8))) {
40027 V1 = CanonicalizeShuffleInput(MaskVT, V1);
40036 if (UnaryShuffle && AllowVariableCrossLaneMask &&
40038 (MaskVT == MVT::v8f64 || MaskVT == MVT::v8i64 ||
40039 MaskVT == MVT::v4f64 || MaskVT == MVT::v4i64 ||
40040 MaskVT == MVT::v8f32 || MaskVT == MVT::v8i32 ||
40041 MaskVT == MVT::v16f32 || MaskVT == MVT::v16i32)) ||
40042 (Subtarget.hasBWI() && AllowBWIVPERMV3 &&
40043 (MaskVT == MVT::v16i16 || MaskVT == MVT::v32i16)) ||
40044 (Subtarget.hasVBMI() && AllowBWIVPERMV3 &&
40045 (MaskVT == MVT::v32i8 || MaskVT == MVT::v64i8)))) {
40047 for (
unsigned i = 0; i != NumMaskElts; ++i)
40049 Mask[i] = NumMaskElts + i;
40050 V1 = CanonicalizeShuffleInput(MaskVT, V1);
40059 Inputs, Root, BaseMask,
Depth, HasVariableMask,
40060 AllowVariableCrossLaneMask, AllowVariablePerLaneMask, DAG,
40062 return WideShuffle;
40066 if (AllowVariableCrossLaneMask && !MaskContainsZeros &&
40068 (MaskVT == MVT::v8f64 || MaskVT == MVT::v8i64 ||
40069 MaskVT == MVT::v4f64 || MaskVT == MVT::v4i64 ||
40070 MaskVT == MVT::v16f32 || MaskVT == MVT::v16i32 ||
40071 MaskVT == MVT::v8f32 || MaskVT == MVT::v8i32)) ||
40072 (Subtarget.hasBWI() && AllowBWIVPERMV3 &&
40073 (MaskVT == MVT::v16i16 || MaskVT == MVT::v32i16)) ||
40074 (Subtarget.hasVBMI() && AllowBWIVPERMV3 &&
40075 (MaskVT == MVT::v32i8 || MaskVT == MVT::v64i8)))) {
40076 V1 = CanonicalizeShuffleInput(MaskVT, V1);
40077 V2 = CanonicalizeShuffleInput(MaskVT, V2);
40086 if (UnaryShuffle && MaskContainsZeros && AllowVariablePerLaneMask &&
40091 APInt UndefElts(NumMaskElts, 0);
40093 for (
unsigned i = 0; i != NumMaskElts; ++i) {
40104 Res = CanonicalizeShuffleInput(MaskVT, V1);
40105 unsigned AndOpcode =
40107 Res = DAG.
getNode(AndOpcode,
DL, MaskVT, Res, BitMask);
40114 if (UnaryShuffle && AllowVariablePerLaneMask && !MaskContainsZeros &&
40115 ((MaskVT == MVT::v8f32 && Subtarget.
hasAVX()) ||
40116 (MaskVT == MVT::v16f32 && Subtarget.
hasAVX512()))) {
40118 for (
int M : Mask) {
40124 Res = CanonicalizeShuffleInput(MaskVT, V1);
40131 if (AllowVariablePerLaneMask && Subtarget.hasXOP() &&
40132 (MaskVT == MVT::v2f64 || MaskVT == MVT::v4f64 || MaskVT == MVT::v4f32 ||
40133 MaskVT == MVT::v8f32)) {
40139 unsigned NumEltsPerLane = NumMaskElts / NumLanes;
40141 unsigned M2ZImm = 0;
40142 for (
int M : Mask) {
40152 int Index = (M % NumEltsPerLane) + ((M / NumMaskElts) * NumEltsPerLane);
40156 V1 = CanonicalizeShuffleInput(MaskVT, V1);
40157 V2 = CanonicalizeShuffleInput(MaskVT, V2);
40169 if (UnaryShuffle && AllowVariablePerLaneMask &&
40175 int Ratio = NumBytes / NumMaskElts;
40176 for (
int i = 0; i < NumBytes; ++i) {
40177 int M = Mask[i / Ratio];
40186 M = Ratio * M + i % Ratio;
40187 assert((M / 16) == (i / 16) &&
"Lane crossing detected");
40191 Res = CanonicalizeShuffleInput(ByteVT, V1);
40201 Subtarget.hasXOP()) {
40207 int Ratio = NumBytes / NumMaskElts;
40208 for (
int i = 0; i < NumBytes; ++i) {
40209 int M = Mask[i / Ratio];
40218 M = Ratio * M + i % Ratio;
40221 MVT ByteVT = MVT::v16i8;
40222 V1 = CanonicalizeShuffleInput(ByteVT, V1);
40223 V2 = CanonicalizeShuffleInput(ByteVT, V2);
40232 Inputs, Root, BaseMask,
Depth, HasVariableMask,
40233 AllowVariableCrossLaneMask, AllowVariablePerLaneMask, DAG, Subtarget))
40234 return WideShuffle;
40238 if (!UnaryShuffle && AllowVariablePerLaneMask && !MaskContainsZeros &&
40240 (MaskVT == MVT::v2f64 || MaskVT == MVT::v4f64 || MaskVT == MVT::v8f64 ||
40241 MaskVT == MVT::v2i64 || MaskVT == MVT::v4i64 || MaskVT == MVT::v8i64 ||
40242 MaskVT == MVT::v4f32 || MaskVT == MVT::v4i32 || MaskVT == MVT::v8f32 ||
40243 MaskVT == MVT::v8i32 || MaskVT == MVT::v16f32 ||
40244 MaskVT == MVT::v16i32)) ||
40245 (Subtarget.hasBWI() && AllowBWIVPERMV3 &&
40246 (MaskVT == MVT::v8i16 || MaskVT == MVT::v16i16 ||
40247 MaskVT == MVT::v32i16)) ||
40248 (Subtarget.hasVBMI() && AllowBWIVPERMV3 &&
40249 (MaskVT == MVT::v16i8 || MaskVT == MVT::v32i8 ||
40250 MaskVT == MVT::v64i8)))) {
40251 V1 = CanonicalizeShuffleInput(MaskVT, V1);
40252 V2 = CanonicalizeShuffleInput(MaskVT, V2);
40271 bool HasVariableMask,
bool AllowVariableCrossLaneMask,
40274 unsigned NumMaskElts = BaseMask.
size();
40275 unsigned NumInputs = Inputs.
size();
40276 if (NumInputs == 0)
40281 unsigned RootEltSizeInBits = RootSizeInBits / NumMaskElts;
40282 assert((RootSizeInBits % NumMaskElts) == 0 &&
"Unexpected root shuffle mask");
40286 unsigned WideSizeInBits = RootSizeInBits;
40287 for (
SDValue Input : Inputs) {
40295 Input.getOperand(0).isUndef()) {
40302 WideSizeInBits < Input.getValueSizeInBits())
40303 WideSizeInBits = Input.getValueSizeInBits();
40307 unsigned Scale = WideSizeInBits / RootSizeInBits;
40308 if (WideSizeInBits <= RootSizeInBits ||
40309 (WideSizeInBits % RootSizeInBits) != 0)
40314 for (
int &M : WideMask) {
40317 M = (M % NumMaskElts) + ((M / NumMaskElts) * Scale * NumMaskElts);
40323 int AdjustedMasks = 0;
40325 for (
unsigned I = 0;
I != NumInputs; ++
I) {
40335 Idx = (
Idx * InputEltSizeInBits) / RootEltSizeInBits;
40337 int lo =
I * WideMask.
size();
40338 int hi = (
I + 1) * WideMask.
size();
40339 for (
int &M : WideMask)
40340 if (lo <= M && M < hi)
40359 assert(!WideInputs.
empty() &&
"Shuffle with no inputs detected");
40364 if (AdjustedMasks == 0 || WideInputs.
size() > 2)
40372 while (WideMask.
size() > 1) {
40376 WideMask = std::move(WidenedMask);
40383 std::swap(WideInputs[0], WideInputs[1]);
40387 Depth += AdjustedMasks;
40391 SDValue WideRoot = WideInputs.
front().getValueSizeInBits() >
40392 WideInputs.
back().getValueSizeInBits()
40393 ? WideInputs.
front()
40394 : WideInputs.
back();
40396 "WideRootSize mismatch");
40400 HasVariableMask, AllowVariableCrossLaneMask,
40401 AllowVariablePerLaneMask, DAG, Subtarget)) {
40416 if (Mask.empty() || Ops.
empty())
40428 return V.getOpcode() != Opcode0 || V.getValueType() != VT0;
40435 if (!isHoriz && !isPack)
40440 return Op.hasOneUse() &&
40446 int NumEltsPerLane = NumElts / NumLanes;
40447 int NumHalfEltsPerLane = NumEltsPerLane / 2;
40449 unsigned EltSizeInBits = RootSizeInBits / Mask.size();
40451 if (NumEltsPerLane >= 4 &&
40462 auto GetHOpSrc = [&](
int M) {
40475 SDValue M2 = GetHOpSrc(ScaledMask[2]);
40476 SDValue M3 = GetHOpSrc(ScaledMask[3]);
40477 if (
M0 &&
M1 && M2 && M3) {
40484 if (Ops.
size() >= 2) {
40486 auto GetHOpSrc = [&](
int M,
int &OutM) {
40490 SDValue Src = BC[M / 4].getOperand((M % 4) >= 2);
40491 if (!
LHS ||
LHS == Src) {
40496 if (!
RHS ||
RHS == Src) {
40498 OutM = (M % 2) + 2;
40503 int PostMask[4] = {-1, -1, -1, -1};
40504 if (GetHOpSrc(ScaledMask[0], PostMask[0]) &&
40505 GetHOpSrc(ScaledMask[1], PostMask[1]) &&
40506 GetHOpSrc(ScaledMask[2], PostMask[2]) &&
40507 GetHOpSrc(ScaledMask[3], PostMask[3])) {
40522 if (2 < Ops.
size())
40530 if (Ops.
size() == 2) {
40545 for (
int &M : Mask) {
40548 int SubLane = ((M % NumEltsPerLane) >= NumHalfEltsPerLane) ? 1 : 0;
40549 M -= NumElts + (SubLane * NumHalfEltsPerLane);
40551 M += NumHalfEltsPerLane;
40557 for (
int i = 0; i != NumElts; ++i) {
40562 (M % NumEltsPerLane) >= NumHalfEltsPerLane)
40563 M -= NumHalfEltsPerLane;
40565 (M % NumEltsPerLane) >= NumHalfEltsPerLane)
40566 M -= NumHalfEltsPerLane;
40577 bool SingleOp = (Ops.
size() == 1);
40578 if (isPack || OneUseOps ||
40582 Lo =
Lo.getOperand(WideMask128[0] & 1);
40583 Hi =
Hi.getOperand(WideMask128[1] & 1);
40599 if (Ops.
size() == 1 && NumLanes == 2 &&
40602 int M0 = WideMask64[0];
40603 int M1 = WideMask64[1];
40623 bool HasVariableMask,
40627 unsigned NumMaskElts = Mask.size();
40628 unsigned MaskSizeInBits = SizeInBits / NumMaskElts;
40629 unsigned NumOps = Ops.
size();
40634 for (
unsigned I = 0;
I != NumOps; ++
I)
40645 if (IsOptimizingSize && !HasVariableMask &&
40650 APInt UndefElts(NumMaskElts, 0);
40651 APInt ZeroElts(NumMaskElts, 0);
40652 APInt ConstantElts(NumMaskElts, 0);
40655 for (
unsigned i = 0; i != NumMaskElts; ++i) {
40664 assert(0 <= M && M < (
int)(NumMaskElts * NumOps));
40666 unsigned SrcOpIdx = (
unsigned)M / NumMaskElts;
40667 unsigned SrcMaskIdx = (
unsigned)M % NumMaskElts;
40669 auto &SrcUndefElts = UndefEltsOps[SrcOpIdx];
40670 if (SrcUndefElts[SrcMaskIdx]) {
40675 auto &SrcEltBits = RawBitsOps[SrcOpIdx];
40676 APInt &Bits = SrcEltBits[SrcMaskIdx];
40683 ConstantBitData[i] = Bits;
40685 assert((UndefElts | ZeroElts | ConstantElts).isAllOnes());
40688 if ((UndefElts | ZeroElts).isAllOnes())
40693 if (VT.
isFloatingPoint() && (MaskSizeInBits == 32 || MaskSizeInBits == 64))
40746 unsigned MaxDepth,
bool HasVariableMask,
bool AllowVariableCrossLaneMask,
40750 (RootMask.
size() > 1 || (RootMask[0] == 0 && SrcOpIndex == 0)) &&
40751 "Illegal shuffle root mask");
40753 assert(RootVT.
isVector() &&
"Shuffles operate on vector types!");
40759 if (
Depth >= MaxDepth)
40766 EVT VT =
Op.getValueType();
40775 "Can only combine shuffles upto size of the root op.");
40779 for (
int M : RootMask) {
40780 int BaseIdx = RootMask.
size() * SrcOpIndex;
40782 OpDemandedElts.
setBit(M - BaseIdx);
40787 unsigned NumOpMaskElts = RootMask.
size() / Scale;
40788 assert((RootMask.
size() % Scale) == 0 &&
"Root mask size mismatch");
40792 "Out of range elements referenced in root mask");
40793 OpDemandedElts = OpDemandedElts.
extractBits(NumOpMaskElts, 0);
40801 APInt OpUndef, OpZero;
40804 OpZero, DAG,
Depth,
false)) {
40812 (RootSizeInBits %
Op.getOperand(0).getValueSizeInBits()) == 0 &&
40815 int ExtractIdx =
Op.getConstantOperandVal(1);
40817 OpInputs.
assign({SrcVec});
40819 std::iota(OpMask.
begin(), OpMask.
end(), ExtractIdx);
40829 unsigned OpMaskSize = OpMask.
size();
40830 if (OpInputs.
size() > 1) {
40831 unsigned PaddedMaskSize = NumSubVecs * OpMaskSize;
40832 for (
int &M : OpMask) {
40835 int EltIdx = M % OpMaskSize;
40836 int OpIdx = M / OpMaskSize;
40837 M = (PaddedMaskSize * OpIdx) + EltIdx;
40840 OpZero = OpZero.
zext(NumSubVecs * OpMaskSize);
40841 OpUndef = OpUndef.
zext(NumSubVecs * OpMaskSize);
40849 bool EmptyRoot = (
Depth == 0) && (RootMask.
size() == 1);
40853 bool ResolveKnownZeros =
true;
40856 for (
int i = 0, e = OpMask.
size(); i != e; ++i) {
40862 ResolveKnownZeros =
false;
40868 ResolveKnownZeros);
40881 for (
int i = 0, e = Ops.
size(); i < e; ++i)
40891 return Ops.
size() - 1;
40895 for (
SDValue OpInput : OpInputs)
40897 AddOp(OpInput, OpInputIdx.
empty() ? SrcOpIndex : -1));
40900 RootMask.
size() % OpMask.
size() == 0) ||
40901 (OpMask.
size() > RootMask.
size() &&
40902 OpMask.
size() % RootMask.
size() == 0) ||
40903 OpMask.
size() == RootMask.
size()) &&
40904 "The smaller number of elements must divide the larger.");
40909 assert(llvm::has_single_bit<uint32_t>(RootMask.
size()) &&
40910 "Non-power-of-2 shuffle mask sizes");
40911 assert(llvm::has_single_bit<uint32_t>(OpMask.
size()) &&
40912 "Non-power-of-2 shuffle mask sizes");
40916 unsigned MaskWidth = std::max<unsigned>(OpMask.
size(), RootMask.
size());
40917 unsigned RootRatio =
40918 std::max<unsigned>(1, OpMask.
size() >> RootMaskSizeLog2);
40919 unsigned OpRatio = std::max<unsigned>(1, RootMask.
size() >> OpMaskSizeLog2);
40920 assert((RootRatio == 1 || OpRatio == 1) &&
40921 "Must not have a ratio for both incoming and op masks!");
40935 for (
unsigned i = 0; i < MaskWidth; ++i) {
40936 unsigned RootIdx = i >> RootRatioLog2;
40937 if (RootMask[RootIdx] < 0) {
40939 Mask[i] = RootMask[RootIdx];
40943 unsigned RootMaskedIdx =
40945 ? RootMask[RootIdx]
40946 : (RootMask[RootIdx] << RootRatioLog2) + (i & (RootRatio - 1));
40950 if ((RootMaskedIdx < (SrcOpIndex * MaskWidth)) ||
40951 (((SrcOpIndex + 1) * MaskWidth) <= RootMaskedIdx)) {
40952 Mask[i] = RootMaskedIdx;
40956 RootMaskedIdx = RootMaskedIdx & (MaskWidth - 1);
40957 unsigned OpIdx = RootMaskedIdx >> OpRatioLog2;
40958 if (OpMask[OpIdx] < 0) {
40961 Mask[i] = OpMask[OpIdx];
40966 unsigned OpMaskedIdx = OpRatio == 1 ? OpMask[OpIdx]
40967 : (OpMask[OpIdx] << OpRatioLog2) +
40968 (RootMaskedIdx & (OpRatio - 1));
40970 OpMaskedIdx = OpMaskedIdx & (MaskWidth - 1);
40971 int InputIdx = OpMask[OpIdx] / (int)OpMask.
size();
40972 assert(0 <= OpInputIdx[InputIdx] &&
"Unknown target shuffle input");
40973 OpMaskedIdx += OpInputIdx[InputIdx] * MaskWidth;
40975 Mask[i] = OpMaskedIdx;
40981 for (
unsigned I = 0, E = Ops.
size();
I != E; ++
I) {
40985 Op =
Op.getOperand(1);
40986 unsigned Scale = RootSizeInBits /
Op.getValueSizeInBits();
40987 int Lo =
I * Mask.size();
40988 int Hi = (
I + 1) * Mask.size();
40989 int NewHi =
Lo + (Mask.size() / Scale);
40990 for (
int &M : Mask) {
40991 if (
Lo <= M && NewHi <= M && M <
Hi)
41000 (RootSizeInBits %
Op.getOperand(0).getValueSizeInBits()) == 0 &&
41002 Op =
Op.getOperand(0);
41016 assert(!Ops.
empty() &&
"Shuffle with no inputs detected");
41017 HasVariableMask |= IsOpVariableMask;
41031 for (
int i = 0, e = Ops.
size(); i < e; ++i) {
41037 bool AllowCrossLaneVar =
false;
41038 bool AllowPerLaneVar =
false;
41039 if (Ops[i].
getNode()->hasOneUse() ||
41041 AllowCrossLaneVar = AllowVariableCrossLaneMask;
41042 AllowPerLaneVar = AllowVariablePerLaneMask;
41045 Ops, i, Root, ResolvedMask, CombinedNodes,
Depth + 1, MaxDepth,
41046 HasVariableMask, AllowCrossLaneVar, AllowPerLaneVar, DAG,
41054 RootVT, Ops, Mask, HasVariableMask, DAG,
DL, Subtarget))
41062 unsigned EltSizeInBits = RootSizeInBits / Mask.size();
41074 Ops, Mask, RootSizeInBits,
DL, DAG, Subtarget))
41079 int OpIdx =
I.index();
41083 int Lo = OpIdx * Mask.size();
41084 int Hi =
Lo + Mask.size();
41087 APInt OpDemandedElts(Mask.size(), 0);
41088 for (
int MaskElt : Mask) {
41090 int OpEltIdx = MaskElt -
Lo;
41091 OpDemandedElts.
setBit(OpEltIdx);
41096 if (
Op.getValueSizeInBits() < RootSizeInBits) {
41098 unsigned NumExpectedVectorElts = Mask.size();
41099 unsigned EltSizeInBits = RootSizeInBits / NumExpectedVectorElts;
41100 unsigned NumOpVectorElts =
Op.getValueSizeInBits() / EltSizeInBits;
41102 NumExpectedVectorElts - NumOpVectorElts, NumOpVectorElts) &&
41103 "Demanding the virtual undef widening padding?");
41104 OpDemandedElts = OpDemandedElts.
trunc(NumOpVectorElts);
41108 unsigned NumOpElts =
Op.getValueType().getVectorNumElements();
41114 Op, OpScaledDemandedElts, DAG))
41123 return Op.getValueSizeInBits() < RootSizeInBits;
41126 if (
Op.getValueSizeInBits() < RootSizeInBits)
41134 if (Ops.
size() <= 2) {
41140 while (Mask.size() > 1) {
41144 Mask = std::move(WidenedMask);
41156 Ops, Root, Mask,
Depth, HasVariableMask, AllowVariableCrossLaneMask,
41157 AllowVariablePerLaneMask, DAG, Subtarget))
41164 if (Ops.
size() != 2 || !Subtarget.
hasAVX2() || RootSizeInBits != 128 ||
41165 (RootSizeInBits / Mask.size()) != 64 ||
41168 LHS.getOperand(0) !=
RHS.getOperand(0))
41175 Ops, Root, Mask,
Depth, HasVariableMask, AllowVariableCrossLaneMask,
41176 AllowVariablePerLaneMask, DAG, Subtarget);
41194 MVT VT =
N.getSimpleValueType();
41206 for (
int i = 1, NumLanes = VT.
getSizeInBits() / 128; i < NumLanes; ++i)
41207 for (
int j = 0; j < LaneElts; ++j)
41208 assert(Mask[j] == Mask[i * LaneElts + j] - (LaneElts * i) &&
41209 "Mask doesn't repeat in high 128-bit lanes!");
41211 Mask.resize(LaneElts);
41214 switch (
N.getOpcode()) {
41221 Mask.erase(Mask.begin(), Mask.begin() + 4);
41222 for (
int &M : Mask)
41240 "Called with something other than an x86 128-bit half shuffle!");
41247 for (; V.hasOneUse(); V = V.getOperand(0)) {
41248 switch (V.getOpcode()) {
41264 if (Mask[0] != 0 || Mask[1] != 1 ||
41265 !(Mask[2] >= 2 && Mask[2] < 4 && Mask[3] >= 2 && Mask[3] < 4))
41274 if (Mask[2] != 2 || Mask[3] != 3 ||
41275 !(Mask[0] >= 0 && Mask[0] < 2 && Mask[1] >= 0 && Mask[1] < 2))
41285 if (V.getSimpleValueType().getVectorElementType() != MVT::i8 &&
41286 V.getSimpleValueType().getVectorElementType() != MVT::i16)
41290 unsigned CombineOp =
41292 if (V.getOperand(0) != V.getOperand(1) ||
41293 !V->isOnlyUserOf(V.getOperand(0).getNode()))
41296 V = V.getOperand(0);
41298 switch (V.getOpcode()) {
41304 if (V.getOpcode() == CombineOp)
41311 V = V.getOperand(0);
41315 }
while (V.hasOneUse());
41322 if (!V.hasOneUse())
41328 for (
int &M : Mask)
41330 V = DAG.
getNode(V.getOpcode(),
DL, V.getValueType(), V.getOperand(0),
41334 while (!Chain.
empty()) {
41337 if (V.getValueType() != W.getOperand(0).getValueType())
41338 V = DAG.
getBitcast(W.getOperand(0).getValueType(), V);
41340 switch (W.getOpcode()) {
41346 V = DAG.
getNode(W.getOpcode(),
DL, W.getValueType(), V, V);
41352 V = DAG.
getNode(W.getOpcode(),
DL, W.getValueType(), V, W.getOperand(1));
41356 if (V.getValueType() !=
N.getValueType())
41368 if (VT != MVT::v4f32 && VT != MVT::v8f32 && VT != MVT::v16f32)
41373 if (V.getOpcode() !=
X86ISD::SHUFP || !Parent->isOnlyUserOf(V.getNode()))
41375 SDValue N0 = V.getOperand(0);
41376 SDValue N1 = V.getOperand(1);
41377 unsigned Imm = V.getConstantOperandVal(2);
41382 Imm = ((Imm & 0x0F) << 4) | ((Imm & 0xF0) >> 4);
41387 switch (
N.getOpcode()) {
41389 if (
SDValue NewSHUFP = commuteSHUFP(
N,
N.getOperand(0))) {
41390 unsigned Imm =
N.getConstantOperandVal(1);
41398 unsigned Imm =
N.getConstantOperandVal(2);
41400 if (
SDValue NewSHUFP = commuteSHUFP(
N, N0))
41403 }
else if (
SDValue NewSHUFP = commuteSHUFP(
N, N0)) {
41406 }
else if (
SDValue NewSHUFP = commuteSHUFP(
N, N1)) {
41443 APInt Demanded0, DemandedLHS0, DemandedRHS0;
41444 APInt Demanded1, DemandedLHS1, DemandedRHS1;
41449 DemandedRHS0,
true) ||
41451 DemandedRHS1,
true))
41456 if (!DemandedRHS0.
isZero() || !DemandedRHS1.
isZero() ||
41464 for (
unsigned I = 0;
I != NumElts; ++
I) {
41465 if (Demanded0[
I]) {
41466 int M = ScaledMask0[
I];
41469 "BlendMask demands LHS AND RHS");
41470 NewBlendMask[M] = M;
41471 NewPermuteMask[
I] = M;
41473 }
else if (Demanded1[
I]) {
41474 int M = ScaledMask1[
I];
41477 "BlendMask demands LHS AND RHS");
41478 NewBlendMask[M] = M + NumElts;
41479 NewPermuteMask[
I] = M;
41489 if (VT == MVT::v16i16) {
41528 EVT ShuffleVT =
N.getValueType();
41529 unsigned Opc =
N.getOpcode();
41531 auto IsMergeableWithShuffle = [Opc, &DAG](
SDValue Op,
bool FoldShuf =
true,
41532 bool FoldLoad =
false) {
41542 (
Op.getOpcode() == Opc &&
Op->hasOneUse()) ||
41548 auto IsSafeToMoveShuffle = [ShuffleVT](
SDValue Op,
unsigned BinOp) {
41572 if (
N.getOperand(0).getValueType() == ShuffleVT &&
41573 N->isOnlyUserOf(
N.getOperand(0).getNode())) {
41577 if (TLI.
isBinOp(SrcOpcode) && IsSafeToMoveShuffle(N0, SrcOpcode)) {
41587 if (
N.getNumOperands() == 2) {
41588 LHS = DAG.
getNode(Opc,
DL, ShuffleVT, Op00,
N.getOperand(1));
41589 RHS = DAG.
getNode(Opc,
DL, ShuffleVT, Op01,
N.getOperand(1));
41600 if (SrcOpcode ==
ISD::SINT_TO_FP && IsSafeToMoveShuffle(N0, SrcOpcode) &&
41605 N.getNumOperands() == 2
41606 ? DAG.
getNode(Opc,
DL, ShuffleVT, Op00,
N.getOperand(1))
41607 : DAG.
getNode(Opc,
DL, ShuffleVT, Op00);
41617 unsigned InsertPSMask =
N.getConstantOperandVal(2);
41618 unsigned ZeroMask = InsertPSMask & 0xF;
41629 if (
N->isOnlyUserOf(
N.getOperand(0).getNode()) &&
41630 N->isOnlyUserOf(
N.getOperand(1).getNode())) {
41636 IsSafeToMoveShuffle(N0, SrcOpcode) &&
41637 IsSafeToMoveShuffle(N1, SrcOpcode)) {
41644 if (((IsMergeableWithShuffle(Op00) && IsMergeableWithShuffle(Op10)) ||
41645 (IsMergeableWithShuffle(Op01) && IsMergeableWithShuffle(Op11))) ||
41646 ((IsMergeableWithShuffle(Op00) || IsMergeableWithShuffle(Op10)) &&
41647 (IsMergeableWithShuffle(Op01) || IsMergeableWithShuffle(Op11)))) {
41653 if (
N.getNumOperands() == 3) {
41654 LHS = DAG.
getNode(Opc,
DL, ShuffleVT, Op00, Op10,
N.getOperand(2));
41655 RHS = DAG.
getNode(Opc,
DL, ShuffleVT, Op01, Op11,
N.getOperand(2));
41669 IsSafeToMoveShuffle(N0, SrcOpcode) &&
41670 IsSafeToMoveShuffle(N1, SrcOpcode)) {
41676 if (
N.getNumOperands() == 3) {
41677 Res = DAG.
getNode(Opc,
DL, ShuffleVT, Op00, Op10,
N.getOperand(2));
41679 Res = DAG.
getNode(Opc,
DL, ShuffleVT, Op00, Op10);
41692 IsSafeToMoveShuffle(N0, SrcOpcode) &&
41693 IsSafeToMoveShuffle(N1, SrcOpcode)) {
41699 DAG.
getNode(SrcOpcode,
DL, OpDstVT, Res));
41714 MVT VT = V.getSimpleValueType();
41722 if (!Src1.
isUndef() && (SrcVT0 != SrcVT1 || SrcOpc0 != SrcOpc1))
41731 Res = DAG.
getNode(SrcOpc0,
DL, SrcVT0, Res);
41736 if (SrcVT0 == MVT::v4f64) {
41738 if ((Mask & 0x3) != ((Mask >> 2) & 0x3))
41770 using namespace SDPatternMatch;
41772 MVT VT =
N.getSimpleValueType();
41775 unsigned Opcode =
N.getOpcode();
41786 if (VT == MVT::v2f64 && Src.hasOneUse() &&
41803 EVT SrcVT = Src.getValueType();
41813 for (
unsigned i = 0; i != Scale; ++i)
41814 DemandedMask[i] = i;
41816 {BC}, 0, BC, DemandedMask, {}, 0,
41819 true, DAG, Subtarget))
41821 DAG.getBitcast(SrcVT, Res));
41858 Src.getValueType().getScalarType() == Src.getOperand(0).getValueType())
41864 Src.getValueType() ==
41865 Src.getOperand(0).getValueType().getScalarType() &&
41866 TLI.
isTypeLegal(Src.getOperand(0).getValueType()))
41874 User->getValueSizeInBits(0).getFixedValue() >
41891 bool NoReplaceExtract = Src.hasOneUse();
41893 if (NoReplaceExtract) {
41906 if (SrcVT == MVT::i16 && Src.getOpcode() ==
ISD::TRUNCATE &&
41907 Src.hasOneUse() && Src.getOperand(0).hasOneUse()) {
41909 SDValue TruncIn = Src.getOperand(0);
41933 LoadSDNode *LN = cast<LoadSDNode>(Src.getOperand(0));
41951 isa<ConstantSDNode>(TruncIn.
getOperand(1)) &&
41959 unsigned Offset = ShiftAmt / 8;
41979 MemSDNode *LN = cast<MemIntrinsicSDNode>(Src);
41994 if ((SrcVT == MVT::v2f64 || SrcVT == MVT::v4f32 || SrcVT == MVT::v2i64 ||
41995 SrcVT == MVT::v4i32) &&
42021 auto *LN = cast<LoadSDNode>(N0);
42035 auto *LN = cast<MemSDNode>(N0);
42038 SDValue Ops[] = {LN->getChain(), LN->getBasePtr()};
42041 LN->getMemoryVT(), LN->getMemOperand());
42070 if (
auto *
C = dyn_cast<ConstantSDNode>(N0.
getOperand(0))) {
42076 ConstantVec[0] =
const_cast<ConstantInt *
>(
C->getConstantIntValue());
42083 Align Alignment = cast<ConstantPoolSDNode>(CP)->getAlign();
42098 SDValue In = V.getOperand(1);
42100 In.getValueSizeInBits() /
42123 if ((EltBits % SrcBits) == 0 && SrcBits >= 32) {
42125 APInt BlendMask =
N.getConstantOperandAPInt(2).zextOrTrunc(NumElts);
42145 LHS.getOperand(1) !=
RHS.getOperand(1) &&
42150 "BLENDI decode mismatch");
42151 MVT ShufVT =
LHS.getSimpleValueType();
42156 ShufVT, {MaskLHS, MaskRHS}, ByteMask,
42157 true, DAG,
DL, Subtarget)) {
42159 LHS.getOperand(0), NewMask);
42161 RHS.getOperand(0), NewMask);
42163 DAG.getBitcast(VT, NewLHS),
42164 DAG.getBitcast(VT, NewRHS),
N.getOperand(2));
42175 if (VT == MVT::v4f32) {
42176 bool Updated =
false;
42180 for (
int i = 0; i != 2; ++i) {
42189 Mask[Ofs + 0] = SubScaledMask[Mask[Ofs + 0] % 4] + (i * 4);
42190 Mask[Ofs + 1] = SubScaledMask[Mask[Ofs + 1] % 4] + (i * 4);
42197 for (
int &M : Mask)
42214 EVT SrcVT = Src.getValueType();
42228 uint64_t Mask =
N->getConstantOperandVal(2);
42231 if ((Mask & 0x0A) == 0x0A &&
42236 if ((Mask & 0xA0) == 0xA0 &&
42241 if (NewLHS || NewRHS)
42243 NewRHS ? NewRHS :
RHS,
42254 EVT SrcVT =
LHS.getOperand(0).getValueType();
42255 if (
RHS.isUndef() || SrcVT ==
RHS.getOperand(0).getValueType()) {
42259 N->getOperand(2)));
42269 auto FindSubVector128 = [&](
unsigned Idx) {
42275 return SubOps[
Idx & 1];
42276 unsigned NumElts = Src.getValueType().getVectorNumElements();
42278 Src.getOperand(1).getValueSizeInBits() == 128 &&
42279 Src.getConstantOperandAPInt(2) == (NumElts / 2)) {
42280 return Src.getOperand(1);
42284 unsigned Imm =
N.getConstantOperandVal(2);
42285 if (
SDValue SubLo = FindSubVector128(Imm & 0x0F)) {
42286 if (
SDValue SubHi = FindSubVector128((Imm & 0xF0) >> 4)) {
42302 switch (V.getOpcode()) {
42311 MVT InnerVT = V.getSimpleValueType();
42316 Res = DAG.
getNode(V.getOpcode(),
DL, InnerVT, Res, V.getOperand(1));
42325 assert(Mask.size() == 4);
42352 return DAG.
getNode(Opcode,
DL, VT, N0, SclVec);
42359 assert(VT == MVT::v4f32 &&
"INSERTPS ValueType must be MVT::v4f32");
42362 unsigned InsertPSMask =
N.getConstantOperandVal(2);
42363 unsigned SrcIdx = (InsertPSMask >> 6) & 0x3;
42364 unsigned DstIdx = (InsertPSMask >> 4) & 0x3;
42365 unsigned ZeroMask = InsertPSMask & 0xF;
42368 if (((ZeroMask | (1u << DstIdx)) == 0xF) && !Op0.
isUndef())
42373 if ((ZeroMask & (1u << DstIdx)) && !Op1.
isUndef())
42380 APInt KnownUndef1, KnownZero1;
42383 if (KnownUndef1[SrcIdx] || KnownZero1[SrcIdx]) {
42385 InsertPSMask |= (1u << DstIdx);
42390 int M = TargetMask1[SrcIdx];
42391 assert(0 <= M && M < 8 &&
"Shuffle index out of range");
42392 InsertPSMask = (InsertPSMask & 0x3f) | ((M & 0x3) << 6);
42393 Op1 = Ops1[M < 4 ? 0 : 1];
42401 APInt KnownUndef0, KnownZero0;
42404 bool Updated =
false;
42405 bool UseInput00 =
false;
42406 bool UseInput01 =
false;
42407 for (
int i = 0; i != 4; ++i) {
42408 if ((InsertPSMask & (1u << i)) || (i == (int)DstIdx)) {
42413 if (KnownUndef0[i] || KnownZero0[i]) {
42415 InsertPSMask |= (1u << i);
42421 int M = TargetMask0[i];
42422 if (M != i && M != (i + 4))
42426 UseInput00 |= (0 <= M && M < 4);
42427 UseInput01 |= (4 <= M);
42432 if (UseInput00 && !UseInput01) {
42435 }
else if (!UseInput00 && UseInput01) {
42449 auto *MemIntr = cast<MemIntrinsicSDNode>(Op1);
42450 if (MemIntr->getMemoryVT().getScalarSizeInBits() == 32) {
42452 MemIntr->getBasePtr(),
42453 MemIntr->getMemOperand());
42470 SDValue Ops[] = {
N.getOperand(0),
N.getOperand(2)};
42484 assert(Mask.size() == NumElts &&
"Unexpected shuffle mask size");
42487 MVT MaskVT =
N.getOperand(1).getSimpleValueType();
42490 for (
int &M : Mask)
42491 M = (M < 0 ? M : M & (Mask.size() - 1));
42507 for (
int &M : Mask)
42508 M = (M < (int)NumElts ? M : (M - (NumElts / 2)));
42532 return N.getOperand(0);
42536 switch (
N.getOpcode()) {
42547 int DMask[] = {0, 1, 2, 3};
42549 DMask[DOffset + 0] = DOffset + 1;
42550 DMask[DOffset + 1] = DOffset + 0;
42561 if (Mask[0] == Mask[1] && Mask[2] == Mask[3] &&
42564 V.getOpcode() !=
N.getOpcode() &&
42565 V.hasOneUse() && V.getOperand(0).hasOneUse()) {
42573 for (
int i = 0; i < 4; ++i) {
42574 WordMask[i + NOffset] = Mask[i] + NOffset;
42575 WordMask[i + VOffset] = VMask[i] + VOffset;
42579 for (
int i = 0; i < 8; ++i)
42580 MappedMask[i] = 2 * DMask[WordMask[i] / 2] + WordMask[i] % 2;
42581 if (
ArrayRef<int>(MappedMask).equals({0, 0, 1, 1, 2, 2, 3, 3}) ||
42582 ArrayRef<int>(MappedMask).equals({4, 4, 5, 5, 6, 6, 7, 7})) {
42609 int ParitySrc[2] = {-1, -1};
42610 unsigned Size = Mask.size();
42611 for (
unsigned i = 0; i !=
Size; ++i) {
42617 if ((M %
Size) != i)
42621 int Src = M /
Size;
42622 if (ParitySrc[i % 2] >= 0 && ParitySrc[i % 2] != Src)
42624 ParitySrc[i % 2] = Src;
42628 if (ParitySrc[0] < 0 || ParitySrc[1] < 0 || ParitySrc[0] == ParitySrc[1])
42631 Op0Even = ParitySrc[0] == 0;
42648 EVT VT =
N->getValueType(0);
42670 if (!V1->
hasOneUse() || !V2->hasOneUse())
42678 if ((V2->getOperand(0) !=
LHS || V2->getOperand(1) !=
RHS) &&
42679 (V2->getOperand(0) !=
RHS || V2->getOperand(1) !=
LHS))
42683 LHS = V2->getOperand(0);
RHS = V2->getOperand(1);
42713 MVT VT =
N->getSimpleValueType(0);
42721 SDValue FMAdd = Op0, FMSub = Op1;
42738 bool IsSubAdd = Op0Even ? Op0 == FMAdd : Op1 == FMAdd;
42757 MVT VT =
N->getSimpleValueType(0);
42763 return DAG.
getNode(Opc,
DL, VT, Opnd0, Opnd1, Opnd2);
42790 if (!Subtarget.
hasAVX2() || !isa<ShuffleVectorSDNode>(
N))
42793 EVT VT =
N->getValueType(0);
42820 auto *SVOp = cast<ShuffleVectorSDNode>(
N);
42821 for (
int Elt : SVOp->getMask())
42822 Mask.push_back(Elt < NumElts ? Elt : (Elt - NumElts / 2));
42846 int HalfIdx1, HalfIdx2;
42849 (HalfIdx1 % 2 == 1) || (HalfIdx2 % 2 == 1))
42859 HalfIdx2,
false, DAG,
true);
42865 if (
auto *Shuf = dyn_cast<ShuffleVectorSDNode>(
N))
42872 EVT VT =
N->getValueType(0);
42881 VT,
SDValue(
N, 0), dl, DAG, Subtarget,
true))
42933 SDValue Mask =
Op.getOperand(MaskIndex);
42934 if (!Mask.hasOneUse())
42938 APInt MaskUndef, MaskZero;
42947 auto *Load = dyn_cast<LoadSDNode>(BC);
42948 if (!Load || !Load->getBasePtr().hasOneUse())
42955 Type *CTy =
C->getType();
42961 unsigned NumCstElts = cast<FixedVectorType>(CTy)->getNumElements();
42962 if (NumCstElts != NumElts && NumCstElts != (NumElts * 2))
42964 unsigned Scale = NumCstElts / NumElts;
42967 bool Simplified =
false;
42969 for (
unsigned i = 0; i != NumCstElts; ++i) {
42970 Constant *Elt =
C->getAggregateElement(i);
42971 if (!DemandedElts[i / Scale] && !isa<UndefValue>(Elt)) {
42984 SDValue LegalCV = LowerConstantPool(CV, TLO.
DAG);
42996 unsigned Opc =
Op.getOpcode();
42997 EVT VT =
Op.getValueType();
43003 APInt LHSUndef, LHSZero;
43004 APInt RHSUndef, RHSZero;
43014 KnownZero = LHSZero | RHSZero;
43019 APInt LHSUndef, LHSZero;
43020 APInt RHSUndef, RHSZero;
43035 APInt DemandedLHSElts = DemandedSrcElts & ~RHSZero;
43039 APInt DemandedRHSElts = DemandedSrcElts & ~LHSZero;
43049 LHS.getValueType() ==
RHS.getValueType() &&
43050 LHS.getValueType().getScalarType() == MVT::i8 &&
43051 "Unexpected PSADBW types");
43055 unsigned NumSrcElts =
LHS.getValueType().getVectorNumElements();
43061 if (NewLHS || NewRHS) {
43062 NewLHS = NewLHS ? NewLHS :
LHS;
43063 NewRHS = NewRHS ? NewRHS :
RHS;
43081 unsigned UseOpc = Use->getOpcode();
43082 return (UseOpc == X86ISD::VSHL || UseOpc == X86ISD::VSRL ||
43083 UseOpc == X86ISD::VSRA) &&
43084 Use->getOperand(0) != Amt;
43087 APInt AmtUndef, AmtZero;
43091 Depth + 1, AssumeSingleUse))
43112 Src, DemandedElts, TLO.
DAG,
Depth + 1))
43122 APInt LHSUndef, LHSZero;
43123 APInt RHSUndef, RHSZero;
43139 KnownZero = LHSZero;
43144 APInt LHSUndef, LHSZero;
43145 APInt RHSUndef, RHSZero;
43158 auto *Amt = cast<ConstantSDNode>(
Op.getOperand(1));
43159 assert(Amt->getAPIntValue().ult(NumElts) &&
"Out of range shift amount");
43160 unsigned ShiftAmt = Amt->getZExtValue();
43170 unsigned C1 = Src.getConstantOperandVal(1);
43172 int Diff = ShiftAmt - C1;
43181 Op, TLO.
DAG.
getNode(NewOpc, dl, VT, Src.getOperand(0), NewSA));
43185 APInt DemandedSrc = DemandedElts.
lshr(ShiftAmt);
43190 KnownUndef <<= ShiftAmt;
43191 KnownZero <<= ShiftAmt;
43197 auto *Amt = cast<ConstantSDNode>(
Op.getOperand(1));
43198 assert(Amt->getAPIntValue().ult(NumElts) &&
"Out of range shift amount");
43199 unsigned ShiftAmt = Amt->getZExtValue();
43209 unsigned C1 = Src.getConstantOperandVal(1);
43211 int Diff = ShiftAmt - C1;
43220 Op, TLO.
DAG.
getNode(NewOpc, dl, VT, Src.getOperand(0), NewSA));
43224 APInt DemandedSrc = DemandedElts.
shl(ShiftAmt);
43239 auto GetDemandedMasks = [&](
SDValue Op,
bool Invert =
false) {
43245 APInt OpElts = DemandedElts;
43250 for (
int I = 0;
I != NumElts; ++
I) {
43251 if (!DemandedElts[
I])
43253 if (UndefElts[
I]) {
43258 }
else if ((Invert && !EltBits[
I].isAllOnes()) ||
43259 (!Invert && !EltBits[
I].
isZero())) {
43260 OpBits |= Invert ? ~EltBits[
I] : EltBits[
I];
43265 return std::make_pair(OpBits, OpElts);
43267 APInt BitsLHS, EltsLHS;
43268 APInt BitsRHS, EltsRHS;
43269 std::tie(BitsLHS, EltsLHS) = GetDemandedMasks(
RHS);
43270 std::tie(BitsRHS, EltsRHS) = GetDemandedMasks(
LHS,
true);
43272 APInt LHSUndef, LHSZero;
43273 APInt RHSUndef, RHSZero;
43286 if (NewLHS || NewRHS) {
43287 NewLHS = NewLHS ? NewLHS :
LHS;
43288 NewRHS = NewRHS ? NewRHS :
RHS;
43300 EVT SrcVT = Src.getValueType();
43301 APInt SrcUndef, SrcZero;
43313 APInt DemandedLHS, DemandedRHS;
43316 APInt LHSUndef, LHSZero;
43320 APInt RHSUndef, RHSZero;
43334 if (NewN0 || NewN1) {
43335 NewN0 = NewN0 ? NewN0 : N0;
43336 NewN1 = NewN1 ? NewN1 : N1;
43350 APInt DemandedLHS, DemandedRHS;
43353 APInt LHSUndef, LHSZero;
43357 APInt RHSUndef, RHSZero;
43366 if (N0 != N1 && !DemandedElts.
isAllOnes()) {
43371 if (NewN0 || NewN1) {
43372 NewN0 = NewN0 ? NewN0 : N0;
43373 NewN1 = NewN1 ? NewN1 : N1;
43384 MVT SrcVT = Src.getSimpleValueType();
43386 APInt SrcUndef, SrcZero;
43399 DemandedElts, TLO.
DAG, Subtarget,
SDLoc(
Op)))
43404 APInt SelUndef, SelZero;
43406 SelZero, TLO,
Depth + 1))
43410 APInt LHSUndef, LHSZero;
43412 LHSZero, TLO,
Depth + 1))
43415 APInt RHSUndef, RHSZero;
43417 RHSZero, TLO,
Depth + 1))
43420 KnownZero = LHSZero & RHSZero;
43421 KnownUndef = LHSUndef & RHSUndef;
43427 APInt DemandedUpperElts = DemandedElts;
43437 if (DemandedElts == 1 &&
Op.getValue(1).use_empty() &&
isTypeLegal(SVT)) {
43439 auto *Mem = cast<MemSDNode>(
Op);
43441 Mem->getMemOperand());
43449 MVT SrcVT = Src.getSimpleValueType();
43451 if (DemandedElts == 1) {
43454 else if (Src.getValueType() != VT)
43461 APInt SrcUndef, SrcZero;
43469 Src, SrcElts, TLO.
DAG,
Depth + 1))
43497 DemandedElts.
lshr(NumElts / 2) == 0) {
43499 unsigned ExtSizeInBits = SizeInBits / 2;
43503 ExtSizeInBits = SizeInBits / 4;
43510 if (Src.getValueSizeInBits() > ExtSizeInBits)
43516 TLO.
DAG,
DL, ExtSizeInBits));
43520 auto *MemIntr = cast<MemIntrinsicSDNode>(
Op);
43524 SDValue Ops[] = {MemIntr->getOperand(0), MemIntr->getOperand(1)};
43527 MemIntr->getMemOperand());
43531 TLO.
DAG,
DL, ExtSizeInBits));
43535 auto *MemIntr = cast<MemIntrinsicSDNode>(
Op);
43536 EVT MemVT = MemIntr->getMemoryVT();
43541 MemIntr->getBasePtr(), MemIntr->getMemOperand());
43545 TLO.
DAG,
DL, ExtSizeInBits));
43554 TLO.
DAG,
DL, ExtSizeInBits));
43582 if (VT == MVT::v4f64 || VT == MVT::v4i64) {
43598 unsigned LoMask =
Op.getConstantOperandVal(2) & 0xF;
43602 unsigned EltIdx = (LoMask & 0x1) * (NumElts / 2);
43603 unsigned SrcIdx = (LoMask & 0x2) >> 1;
43617 unsigned Scale = SizeInBits / ExtSizeInBits;
43619 MVT SrcVT =
SrcOp.getSimpleValueType();
43620 unsigned SrcExtSize =
43675 "Unsupported vector size");
43699 APInt OpUndef, OpZero;
43707 if (OpMask.
size() != (
unsigned)NumElts ||
43709 return VT.getSizeInBits() != V.getValueSizeInBits() ||
43710 !V.getValueType().isVector();
43714 KnownZero = OpZero;
43715 KnownUndef = OpUndef;
43718 int NumSrcs = OpInputs.
size();
43719 for (
int i = 0; i != NumElts; ++i)
43720 if (!DemandedElts[i])
43732 for (
int Src = 0; Src != NumSrcs; ++Src)
43737 for (
int Src = 0; Src != NumSrcs; ++Src) {
43742 int Lo = Src * NumElts;
43744 for (
int i = 0; i != NumElts; ++i)
43745 if (DemandedElts[i]) {
43746 int M = OpMask[i] -
Lo;
43747 if (0 <= M && M < NumElts)
43752 APInt SrcUndef, SrcZero;
43769 for (
int i = 0; i != NumElts; ++i)
43770 if (DemandedElts[i])
43771 DemandedMask[i] = i;
43776 true,
true, TLO.
DAG,
43788 unsigned Depth)
const {
43789 EVT VT =
Op.getValueType();
43791 unsigned Opc =
Op.getOpcode();
43796 MVT SrcVT = Src.getSimpleValueType();
43818 bool Is32BitAVX512 = !Subtarget.is64Bit() && Subtarget.
hasAVX512();
43820 DemandedMaskLHS = DemandedMask;
43822 DemandedMaskRHS = DemandedMask;
43825 KnownLHS, TLO,
Depth + 1))
43828 KnownRHS, TLO,
Depth + 1))
43832 KnownRHS = KnownRHS.
trunc(32);
43842 LHS, DemandedMaskLHS, OriginalDemandedElts, TLO.
DAG,
Depth + 1);
43844 RHS, DemandedMaskRHS, OriginalDemandedElts, TLO.
DAG,
Depth + 1);
43845 if (DemandedLHS || DemandedRHS) {
43846 DemandedLHS = DemandedLHS ? DemandedLHS :
LHS;
43847 DemandedRHS = DemandedRHS ? DemandedRHS :
RHS;
43859 Known, TLO,
Depth + 1))
43863 OriginalDemandedElts, Known2, TLO,
Depth + 1))
43868 OriginalDemandedElts, TLO))
43884 APInt DemandedMask = OriginalDemandedBits.
lshr(ShAmt);
43893 int Diff = ShAmt - Shift2Amt;
43906 unsigned NumSignBits =
43909 if (NumSignBits > ShAmt && (NumSignBits - ShAmt) >= UpperDemandedBits)
43916 Known.
Zero <<= ShAmt;
43917 Known.
One <<= ShAmt;
43925 Op0, DemandedMask, OriginalDemandedElts, TLO.
DAG,
Depth + 1)) {
43941 APInt DemandedMask = OriginalDemandedBits << ShAmt;
43956 Op0, DemandedMask, OriginalDemandedElts, TLO.
DAG,
Depth + 1)) {
43972 APInt DemandedMask = OriginalDemandedBits << ShAmt;
43981 unsigned NumSignBits =
43983 if (ShAmt < NumSignBits)
44013 Op0, DemandedMask, OriginalDemandedElts, TLO.
DAG,
Depth + 1)) {
44028 Sel, SignMask, OriginalDemandedElts, TLO.
DAG,
Depth + 1);
44030 LHS, OriginalDemandedBits, OriginalDemandedElts, TLO.
DAG,
Depth + 1);
44032 RHS, OriginalDemandedBits, OriginalDemandedElts, TLO.
DAG,
Depth + 1);
44034 if (NewSel || NewLHS || NewRHS) {
44035 NewSel = NewSel ? NewSel : Sel;
44036 NewLHS = NewLHS ? NewLHS :
LHS;
44037 NewRHS = NewRHS ? NewRHS :
RHS;
44039 NewSel, NewLHS, NewRHS));
44046 auto *CIdx = dyn_cast<ConstantSDNode>(
Op.getOperand(1));
44050 if (CIdx && CIdx->getAPIntValue().ult(NumVecElts)) {
44051 unsigned Idx = CIdx->getZExtValue();
44056 APInt DemandedVecBits = OriginalDemandedBits.
trunc(VecBitWidth);
44057 if (DemandedVecBits == 0)
44060 APInt KnownUndef, KnownZero;
44063 KnownZero, TLO,
Depth + 1))
44068 KnownVec, TLO,
Depth + 1))
44072 Vec, DemandedVecBits, DemandedVecElts, TLO.
DAG,
Depth + 1))
44085 auto *CIdx = dyn_cast<ConstantSDNode>(
Op.getOperand(2));
44089 unsigned Idx = CIdx->getZExtValue();
44090 if (!OriginalDemandedElts[
Idx])
44094 APInt DemandedVecElts(OriginalDemandedElts);
44097 KnownVec, TLO,
Depth + 1))
44102 APInt DemandedSclBits = OriginalDemandedBits.
zext(NumSclBits);
44117 APInt DemandedLHS, DemandedRHS;
44123 KnownLHS, TLO,
Depth + 1))
44126 KnownRHS, TLO,
Depth + 1))
44131 Op.getOperand(0), SignMask, DemandedLHS, TLO.
DAG,
Depth + 1);
44133 Op.getOperand(1), SignMask, DemandedRHS, TLO.
DAG,
Depth + 1);
44134 if (DemandedOp0 || DemandedOp1) {
44135 SDValue Op0 = DemandedOp0 ? DemandedOp0 :
Op.getOperand(0);
44136 SDValue Op1 = DemandedOp1 ? DemandedOp1 :
Op.getOperand(1);
44144 MVT SrcVT = Src.getSimpleValueType();
44155 Src->hasOneUse()) {
44175 MVT SrcVT = Src.getSimpleValueType();
44180 if (OriginalDemandedBits.
countr_zero() >= NumElts)
44191 APInt KnownUndef, KnownZero;
44207 if (KnownSrc.
One[SrcBits - 1])
44209 else if (KnownSrc.
Zero[SrcBits - 1])
44214 Src, DemandedSrcBits, DemandedElts, TLO.
DAG,
Depth + 1))
44224 "Illegal vector type for X86ISD::TESTP");
44229 bool AssumeSingleUse = (Op0 == Op1) &&
Op->isOnlyUserOf(Op0.
getNode());
44231 AssumeSingleUse) ||
44238 OriginalDemandedElts, Known2, TLO,
Depth + 1))
44241 OriginalDemandedElts, Known, TLO,
Depth + 1))
44254 if (
auto *Cst1 = dyn_cast<ConstantSDNode>(Op1)) {
44256 uint64_t Val1 = Cst1->getZExtValue();
44257 uint64_t MaskedVal1 = Val1 & 0xFFFF;
44265 unsigned Shift = Cst1->getAPIntValue().extractBitsAsZExtValue(8, 0);
44266 unsigned Length = Cst1->getAPIntValue().extractBitsAsZExtValue(8, 8);
44292 if (LengthBits.
isZero())
44302 unsigned DemandedBitsLZ = OriginalDemandedBits.
countl_zero();
44329 Op, OriginalDemandedBits, OriginalDemandedElts, Known, TLO,
Depth);
44336 unsigned Opc =
Op.getOpcode();
44337 EVT VT =
Op.getValueType();
44344 auto *CIdx = dyn_cast<ConstantSDNode>(
Op.getOperand(2));
44347 !DemandedElts[CIdx->getZExtValue()])
44355 unsigned ShAmt =
Op.getConstantOperandVal(1);
44359 if (NumSignBits > ShAmt && (NumSignBits - ShAmt) >= UpperDemandedBits)
44367 return Op.getOperand(0);
44374 return Op.getOperand(1);
44406 APInt ShuffleUndef, ShuffleZero;
44410 ShuffleUndef, ShuffleZero, DAG,
Depth,
false)) {
44414 if (ShuffleMask.
size() == (
unsigned)NumElts &&
44416 return VT.getSizeInBits() == V.getValueSizeInBits();
44421 if (DemandedElts.
isSubsetOf(ShuffleUndef | ShuffleZero))
44426 for (
int i = 0; i != NumElts; ++i) {
44427 int M = ShuffleMask[i];
44428 if (!DemandedElts[i] || ShuffleUndef[i])
44430 int OpIdx = M / NumElts;
44431 int EltIdx = M % NumElts;
44432 if (M < 0 || EltIdx != i) {
44437 if (IdentityOp == 0)
44441 "Multiple identity shuffles detected");
44443 if (IdentityOp != 0)
44457 switch (
Op.getOpcode()) {
44471 assert(0 <= M.value() && M.value() < (
int)(Ops.
size() * NumElts) &&
44472 "Shuffle mask index out of range");
44473 DemandedSrcElts[M.value() / NumElts].setBit(M.value() % NumElts);
44476 if (!DemandedSrcElts[
Op.index()].isZero() &&
44493 switch (
Op.getOpcode()) {
44513 switch (
Op->getConstantOperandVal(0)) {
44514 case Intrinsic::x86_sse2_pmadd_wd:
44515 case Intrinsic::x86_avx2_pmadd_wd:
44516 case Intrinsic::x86_avx512_pmaddw_d_512:
44517 case Intrinsic::x86_ssse3_pmadd_ub_sw_128:
44518 case Intrinsic::x86_avx2_pmadd_ub_sw:
44519 case Intrinsic::x86_avx512_pmaddubs_w_512:
44528 const APInt &DemandedElts,
44531 unsigned Depth)
const {
44533 unsigned Opc =
Op.getOpcode();
44549 bool AllowTruncate) {
44550 switch (Src.getOpcode()) {
44552 if (!AllowTruncate)
44556 return Src.getOperand(0).getValueSizeInBits() ==
Size;
44566 return Src.getOperand(0).getScalarValueSizeInBits() == 1 &&
44592 EVT SrcVT = Src.getValueType();
44593 if (SrcVT != MVT::v4i1)
44596 switch (Src.getOpcode()) {
44598 if (Src.getOperand(0).getValueType() == MVT::v4i32 &&
44600 cast<CondCodeSDNode>(Src.getOperand(2))->get() ==
ISD::SETLT) {
44601 SDValue Op0 = Src.getOperand(0);
44626 switch (Src.getOpcode()) {
44636 Src.getOpcode(),
DL, SExtVT,
44642 DL, SExtVT, Src.getOperand(0),
44658 EVT SrcVT = Src.getValueType();
44676 bool PreferMovMsk = Src.getOpcode() ==
ISD::TRUNCATE && Src.hasOneUse() &&
44677 (Src.getOperand(0).getValueType() == MVT::v16i8 ||
44678 Src.getOperand(0).getValueType() == MVT::v32i8 ||
44679 Src.getOperand(0).getValueType() == MVT::v64i8);
44683 if (Src.getOpcode() ==
ISD::SETCC && Src.hasOneUse() &&
44684 cast<CondCodeSDNode>(Src.getOperand(2))->get() ==
ISD::SETLT &&
44686 EVT CmpVT = Src.getOperand(0).getValueType();
44689 (EltVT == MVT::i8 || EltVT == MVT::i32 || EltVT == MVT::i64))
44690 PreferMovMsk =
true;
44702 SubSrcOps.
size() >= 2) {
44703 SDValue LowerOp = SubSrcOps[0];
44727 bool PropagateSExt =
false;
44732 SExtVT = MVT::v2i64;
44735 SExtVT = MVT::v4i32;
44738 if (Subtarget.
hasAVX() &&
44740 SExtVT = MVT::v4i64;
44741 PropagateSExt =
true;
44745 SExtVT = MVT::v8i16;
44753 SExtVT = MVT::v8i32;
44754 PropagateSExt =
true;
44758 SExtVT = MVT::v16i8;
44765 SExtVT = MVT::v32i8;
44771 if (Subtarget.hasBWI())
44773 SExtVT = MVT::v64i8;
44778 SExtVT = MVT::v64i8;
44787 if (SExtVT == MVT::v16i8 || SExtVT == MVT::v32i8 || SExtVT == MVT::v64i8) {
44790 if (SExtVT == MVT::v8i16) {
44805 EVT SrcVT =
Op.getValueType();
44807 "Expected a vXi1 vector");
44809 "Expected a constant build vector");
44814 if (!In.isUndef() && (In->getAsZExtVal() & 0x1))
44833 EVT DstVT =
N->getValueType(0);
44835 EVT SrcVT =
Op.getValueType();
44837 if (!
Op.hasOneUse())
44857 LHS.getOperand(0).getValueType() == DstVT)
44862 RHS.getOperand(0).getValueType() == DstVT)
44884 auto CreateMMXElement = [&](
SDValue V) {
44887 if (V.getValueType().isFloatingPoint()) {
44888 if (Subtarget.
hasSSE1() && !isa<ConstantFPSDNode>(V)) {
44907 if (
Splat.isUndef())
44922 unsigned ShufMask = (NumElts > 2 ? 0 : 0x44);
44931 for (
unsigned i = 0; i != NumElts; ++i)
44936 while (Ops.
size() > 1) {
44937 unsigned NumOps = Ops.
size();
44938 unsigned IntrinOp =
44939 (NumOps == 2 ? Intrinsic::x86_mmx_punpckldq
44940 : (NumOps == 4 ? Intrinsic::x86_mmx_punpcklwd
44941 : Intrinsic::x86_mmx_punpcklbw));
44944 for (
unsigned i = 0; i != NumOps; i += 2)
44946 Ops[i], Ops[i + 1]);
44960 unsigned Depth = 0) {
44965 unsigned Opc = V.getOpcode();
44969 SDValue Src = V.getOperand(0);
44970 EVT SrcVT = Src.getValueType();
44976 auto *
C = cast<ConstantSDNode>(V);
44979 if (
C->isAllOnes())
44985 SDValue Src = V.getOperand(0);
44990 Subtarget,
Depth + 1))
44998 SDValue Src = V.getOperand(0);
45000 Src.getScalarValueSizeInBits());
45003 Subtarget,
Depth + 1))
45015 Subtarget,
Depth + 1))
45017 Subtarget,
Depth + 1))
45018 return DAG.
getNode(Opc,
DL, VT, N0, N1);
45023 SDValue Src0 = V.getOperand(0);
45024 if ((VT == MVT::v8i1 && !Subtarget.hasDQI()) ||
45025 ((VT == MVT::v32i1 || VT == MVT::v64i1) && !Subtarget.hasBWI()))
45028 if (
auto *Amt = dyn_cast<ConstantSDNode>(V.getOperand(1)))
45050 EVT VT =
N->getValueType(0);
45067 if ((VT == MVT::v4i1 || VT == MVT::v2i1) && SrcVT.
isScalarInteger() &&
45077 if ((SrcVT == MVT::v4i1 || SrcVT == MVT::v2i1) && VT.
isScalarInteger() &&
45122 if (VT == MVT::i8 && SrcVT == MVT::v8i1 && Subtarget.
hasAVX512() &&
45134 auto *BCast = cast<MemIntrinsicSDNode>(N0);
45136 unsigned MemSize = BCast->getMemoryVT().getScalarSizeInBits();
45138 if (MemSize >= 32) {
45146 SDValue Ops[] = { BCast->getChain(), BCast->getBasePtr() };
45149 MemVT, BCast->getMemOperand());
45158 if (VT == MVT::x86mmx) {
45178 (SrcVT == MVT::v2i32 || SrcVT == MVT::v4i16 || SrcVT == MVT::v8i8) &&
45180 bool LowUndef =
true, AllUndefOrZero =
true;
45183 LowUndef &=
Op.isUndef() || (i >= e/2);
45186 if (AllUndefOrZero) {
45199 (SrcVT == MVT::v2f32 || SrcVT == MVT::v2i32 || SrcVT == MVT::v4i16 ||
45200 SrcVT == MVT::v8i8))
45233 if (
auto *
C = dyn_cast<ConstantSDNode>(N0)) {
45234 if (
C->isAllOnes())
45255 SDValue MovmskIn = Src.getOperand(0);
45261 if (MovMskElts <= NumElts &&
45269 if (
EVT(CmpVT) == VT)
45274 unsigned NumConcats = NumElts / MovMskElts;
45303 if (!((Subtarget.
hasSSE1() && VT == MVT::f32) ||
45304 (Subtarget.
hasSSE2() && VT == MVT::f64) ||
45305 (Subtarget.hasFP16() && VT == MVT::f16) ||
45318 !isa<ConstantSDNode>(LogicOp0.
getOperand(0))) {
45327 !isa<ConstantSDNode>(LogicOp1.
getOperand(0))) {
45339 Op0 =
Mul.getOperand(0);
45340 Op1 =
Mul.getOperand(1);
45349 Op.getOperand(0).getScalarValueSizeInBits() <= 8)
45352 auto *BV = dyn_cast<BuildVectorSDNode>(
Op);
45389 unsigned &LogBias,
const SDLoc &
DL,
45405 if (Subtarget.hasVNNI() && !Subtarget.hasVLX())
45471 if (ExtractVT != MVT::i16 && ExtractVT != MVT::i8)
45481 EVT SrcVT = Src.getValueType();
45483 if (SrcSVT != ExtractVT || (SrcVT.
getSizeInBits() % 128) != 0)
45493 SrcVT =
Lo.getValueType();
45496 assert(((SrcVT == MVT::v8i16 && ExtractVT == MVT::i16) ||
45497 (SrcVT == MVT::v16i8 && ExtractVT == MVT::i8)) &&
45498 "Unexpected value type");
45518 if (ExtractVT == MVT::i8) {
45521 {1, 16, 3, 16, 5, 16, 7, 16, 9, 16, 11, 16, 13, 16, 15, 16});
45526 MinPos = DAG.
getBitcast(MVT::v8i16, MinPos);
45546 if (ExtractVT != MVT::i64 && ExtractVT != MVT::i32 && ExtractVT != MVT::i16 &&
45547 ExtractVT != MVT::i8 && ExtractVT != MVT::i1)
45553 if (!
Match && ExtractVT == MVT::i1)
45565 EVT MatchVT =
Match.getValueType();
45567 unsigned MaxElts = Subtarget.
hasInt256() ? 32 : 16;
45571 if (ExtractVT == MVT::i1) {
45597 while (NumElts > MaxElts) {
45608 Movmsk = DAG.
getZExtOrTrunc(Movmsk,
DL, NumElts > 32 ? MVT::i64 : MVT::i32);
45611 unsigned MatchSizeInBits =
Match.getValueSizeInBits();
45612 if (!(MatchSizeInBits == 128 ||
45613 (MatchSizeInBits == 256 && Subtarget.
hasAVX())))
45620 if (
Match.getValueType().getVectorNumElements() < 2)
45631 MatchSizeInBits =
Match.getValueSizeInBits();
45646 assert((NumElts <= 32 || NumElts == 64) &&
45647 "Not expecting more than 64 elements");
45649 MVT CmpVT = NumElts == 64 ? MVT::i64 : MVT::i32;
45679 if (!Subtarget.hasVNNI() && !Subtarget.hasAVXVNNI())
45685 if (ExtractVT != MVT::i32)
45717 unsigned StageBias;
45725 if (Stages > StageBias) {
45728 for (
unsigned i = Stages - StageBias; i > 0; --i) {
45730 for (
unsigned j = 0, MaskEnd = 1 << (i - 1); j < MaskEnd; ++j)
45731 Mask[j] = MaskEnd + j;
45757 if (ExtractVT != MVT::i32 && ExtractVT != MVT::i64)
45802 for(
unsigned i = Stages - 3; i > 0; --i) {
45804 for(
unsigned j = 0, MaskEnd = 1 << (i - 1); j < MaskEnd; ++j)
45805 Mask[j] = MaskEnd + j;
45835 "Only EXTRACT_VECTOR_ELT supported so far");
45838 EVT VT =
N->getValueType(0);
45841 return Use->getOpcode() == ISD::STORE ||
45842 Use->getOpcode() == ISD::INSERT_VECTOR_ELT ||
45843 Use->getOpcode() == ISD::SCALAR_TO_VECTOR;
45846 auto *LoadVec = dyn_cast<LoadSDNode>(SrcVec);
45857 DAG.
getLoad(VT, dl, LoadVec->getChain(), NewPtr, MPI, Alignment,
45858 LoadVec->getMemOperand()->getFlags(), LoadVec->getAAInfo());
45878 EVT VT =
N->getValueType(0);
45879 EVT SrcVT = Src.getValueType();
45885 if (SrcSVT == MVT::i1 || !isa<ConstantSDNode>(
Idx))
45888 const APInt &IdxC =
N->getConstantOperandAPInt(1);
45889 if (IdxC.
uge(NumSrcElts))
45897 EVT SrcOpVT =
SrcOp.getValueType();
45901 unsigned Offset = IdxC.
urem(Scale) * SrcEltBits;
45914 auto *MemIntr = cast<MemIntrinsicSDNode>(SrcBC);
45916 if (MemIntr->getMemoryVT().getSizeInBits() == SrcBCWidth &&
45917 VT.
getSizeInBits() == SrcBCWidth && SrcEltBits == SrcBCWidth) {
45919 MemIntr->getBasePtr(),
45920 MemIntr->getPointerInfo(),
45921 MemIntr->getOriginalAlign(),
45922 MemIntr->getMemOperand()->getFlags());
45936 if (IdxC.
ult(Scale)) {
45964 auto GetLegalExtract = [&Subtarget, &DAG, &dl](
SDValue Vec,
EVT VecVT,
45967 if ((VecVT.is256BitVector() || VecVT.is512BitVector()) &&
45968 (VecSVT == MVT::i8 || VecSVT == MVT::i16 || VecSVT == MVT::i32 ||
45969 VecSVT == MVT::i64)) {
45971 unsigned NumEltsPerLane = 128 / EltSizeInBits;
45972 unsigned LaneOffset = (
Idx & ~(NumEltsPerLane - 1)) * EltSizeInBits;
45973 unsigned LaneIdx = LaneOffset / Vec.getScalarValueSizeInBits();
45976 Idx &= (NumEltsPerLane - 1);
45978 if ((VecVT == MVT::v4i32 || VecVT == MVT::v2i64) &&
45984 if ((VecVT == MVT::v8i16 && Subtarget.
hasSSE2()) ||
45985 (VecVT == MVT::v16i8 && Subtarget.
hasSSE41())) {
46006 if (Mask.size() != NumSrcElts) {
46007 if ((NumSrcElts % Mask.size()) == 0) {
46009 int Scale = NumSrcElts / Mask.size();
46011 Mask = std::move(ScaledMask);
46012 }
else if ((Mask.size() % NumSrcElts) == 0) {
46015 int Scale = Mask.size() / NumSrcElts;
46016 int Lo = Scale * ExtractIdx;
46017 int Hi = Scale * (ExtractIdx + 1);
46018 for (
int i = 0, e = (
int)Mask.size(); i != e; ++i)
46019 if (i <
Lo ||
Hi <= i)
46023 while (Mask.size() > NumSrcElts &&
46025 Mask = std::move(WidenedMask);
46032 if (Mask.size() == NumSrcElts) {
46036 unsigned Scale = Mask.size() / NumSrcElts;
46042 ExtractIdx = Mask[ScaledIdx];
46046 "Failed to widen vector type");
46058 ExtractIdx = ExtractIdx % Mask.size();
46059 if (
SDValue V = GetLegalExtract(
SrcOp, ExtractVT, ExtractIdx))
46090 if (OpVT != MVT::f32 && OpVT != MVT::f64)
46102 if (!(VT == MVT::f16 && Subtarget.hasFP16()) && VT != MVT::f32 &&
46115 "Unexpected cond type for combine");
46192 "Reduction doesn't end in an extract from index 0");
46204 auto WidenToV16I8 = [&](
SDValue V,
bool ZeroExtend) {
46205 if (V.getValueType() == MVT::v4i8) {
46206 if (ZeroExtend && Subtarget.
hasSSE41()) {
46223 if (VT != MVT::i8 || NumElts < 4 || !
isPowerOf2_32(NumElts))
46237 Rdx = WidenToV16I8(Rdx,
false);
46242 Rdx = DAG.
getNode(Opc,
DL, MVT::v8i16, Rdx,
46244 {4, 5, 6, 7, -1, -1, -1, -1}));
46245 Rdx = DAG.
getNode(Opc,
DL, MVT::v8i16, Rdx,
46247 {2, 3, -1, -1, -1, -1, -1, -1}));
46248 Rdx = DAG.
getNode(Opc,
DL, MVT::v8i16, Rdx,
46250 {1, -1, -1, -1, -1, -1, -1, -1}));
46256 if (VecVT == MVT::v4i8 || VecVT == MVT::v8i8) {
46257 Rdx = WidenToV16I8(Rdx,
true);
46269 if (VT == MVT::i8) {
46273 VecVT =
Lo.getValueType();
46276 assert(VecVT == MVT::v16i8 &&
"v16i8 reduction expected");
46279 MVT::v16i8,
DL, Rdx, Rdx,
46280 {8, 9, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1});
46292 if (Opc ==
ISD::ADD && NumElts >= 4 && EltSizeInBits >= 16 &&
46303 Rdx = WidenToV16I8(Rdx,
true);
46320 VecVT =
Lo.getValueType();
46345 if (((VecVT == MVT::v16i16 || VecVT == MVT::v8i32) && Subtarget.
hasSSSE3()) ||
46346 ((VecVT == MVT::v8f32 || VecVT == MVT::v4f64) && Subtarget.
hasSSE3())) {
46353 if (!((VecVT == MVT::v8i16 || VecVT == MVT::v4i32) && Subtarget.
hasSSSE3()) &&
46354 !((VecVT == MVT::v4f32 || VecVT == MVT::v2f64) && Subtarget.
hasSSE3()))
46359 for (
unsigned i = 0; i != ReductionSteps; ++i)
46360 Rdx = DAG.
getNode(HorizOpcode,
DL, VecVT, Rdx, Rdx);
46375 SDValue InputVector =
N->getOperand(0);
46376 SDValue EltIdx =
N->getOperand(1);
46377 auto *CIdx = dyn_cast<ConstantSDNode>(EltIdx);
46380 EVT VT =
N->getValueType(0);
46381 SDLoc dl(InputVector);
46387 if (CIdx && CIdx->getAPIntValue().uge(NumSrcElts))
46392 APInt UndefVecElts;
46399 if (UndefVecElts[
Idx])
46408 if (Src.getValueType().getScalarType() == MVT::i1 &&
46429 "Vector type mismatch");
46442 if (VT == MVT::i64 && SrcVT == MVT::v1i64 &&
46449 if (VT == MVT::i32 && SrcVT == MVT::v2i32 &&
46482 N, InputVector.
getValueType(), InputVector, CIdx->getZExtValue(),
46497 bool IsVar = !CIdx;
46499 unsigned ResNo = InputVector.
getResNo();
46500 auto IsBoolExtract = [&BoolExtracts, &ResNo, &IsVar](
SDNode *
Use) {
46502 Use->getOperand(0).getResNo() == ResNo &&
46503 Use->getValueType(0) == MVT::i1) {
46505 IsVar |= !isa<ConstantSDNode>(
Use->getOperand(1));
46511 if (
all_of(InputVector->
users(), IsBoolExtract) &&
46512 (IsVar || BoolExtracts.
size() > 1)) {
46566 if (SVT != MVT::i64 && SVT != MVT::i32 && SVT != MVT::i16 && SVT != MVT::i8)
46582 if (NumElts > EltSizeInBits) {
46587 assert((NumElts % EltSizeInBits) == 0 &&
"Unexpected integer scale");
46588 unsigned Scale = NumElts / EltSizeInBits;
46590 bool UseBroadcast = Subtarget.
hasInt256() &&
46597 for (
unsigned i = 0; i != Scale; ++i) {
46598 int Offset = UseBroadcast ? (i * EltSizeInBits) : 0;
46602 }
else if (Subtarget.
hasAVX2() && NumElts < EltSizeInBits &&
46603 (SclVT == MVT::i8 || SclVT == MVT::i16 || SclVT == MVT::i32)) {
46608 assert((EltSizeInBits % NumElts) == 0 &&
"Unexpected integer scale");
46610 (NumElts * EltSizeInBits) / NumElts);
46621 for (
unsigned i = 0; i != NumElts; ++i) {
46622 int BitIdx = (i % EltSizeInBits);
46652 EVT VT =
LHS.getValueType();
46653 EVT CondVT =
Cond.getValueType();
46659 assert(CondVT.
isVector() &&
"Vector select expects a vector selector!");
46669 if (TValIsAllZeros && FValIsAllZeros) {
46686 if (!TValIsAllOnes && !FValIsAllZeros &&
Cond.hasOneUse() &&
46694 if (TValIsAllZeros || FValIsAllOnes) {
46697 cast<CondCodeSDNode>(
CC)->
get(),
Cond.getOperand(0).getValueType());
46701 TValIsAllOnes = FValIsAllOnes;
46702 FValIsAllZeros = TValIsAllZeros;
46711 if (TValIsAllOnes && FValIsAllZeros)
46718 if (TValIsAllOnes) {
46725 if (FValIsAllZeros) {
46732 if (TValIsAllZeros) {
46753 unsigned Opcode =
N->getOpcode();
46758 EVT VT =
N->getValueType(0);
46785 auto *TrueC = dyn_cast<ConstantSDNode>(
LHS);
46786 auto *FalseC = dyn_cast<ConstantSDNode>(
RHS);
46787 if (!TrueC || !FalseC)
46791 EVT VT =
N->getValueType(0);
46798 if (
Cond.getValueType() != MVT::i1)
46805 const APInt &TrueVal = TrueC->getAPIntValue();
46806 const APInt &FalseVal = FalseC->getAPIntValue();
46809 if ((TrueVal.isAllOnes() || FalseVal.isAllOnes()) &&
46817 APInt Diff = TrueVal.ssub_ov(FalseVal, OV);
46823 ((VT == MVT::i32 || VT == MVT::i64) &&
46824 (AbsDiff == 3 || AbsDiff == 5 || AbsDiff == 9))) {
46829 if (TrueVal.slt(FalseVal)) {
46838 if (!AbsDiff.
isOne())
46842 if (!FalseC->isZero())
46868 EVT VT =
N->getValueType(0);
46891 if (VT == MVT::v32i8 && !Subtarget.
hasAVX2())
46899 if (BitWidth < 8 || BitWidth > 64)
46914 if (OnlyUsedAsSelectCond(
Cond)) {
46931 Cond, U->getOperand(1), U->getOperand(2));
46942 N->getOperand(1),
N->getOperand(2));
46966 EVT MaskVT = Mask.getValueType();
46969 "Mask must be zero/all-bits");
46971 if (
X.getValueType() != MaskVT ||
Y.getValueType() != MaskVT)
46977 return N->getOpcode() ==
ISD::SUB &&
N->getOperand(1) == V &&
46982 if (IsNegV(
Y.getNode(),
X))
46984 else if (IsNegV(
X.getNode(),
Y))
47033 Cond.getOperand(0).getValueType());
47035 Cond.getOperand(1), NewCC);
47060 EVT VT =
LHS.getValueType();
47061 EVT CondVT =
Cond.getValueType();
47070 (!CondConstantVector || CondVT.
getScalarType() == MVT::i8) &&
47073 DL, DAG, Subtarget))
47098 for (
int i = 0; i != NumElts; ++i) {
47101 if (CondMask[i] < NumElts) {
47102 LHSMask[i] =
isUndefOrZero(LHSMask[i]) ? 0x80 : LHSMask[i];
47106 RHSMask[i] =
isUndefOrZero(RHSMask[i]) ? 0x80 : RHSMask[i];
47128 bool IsStrict =
Cond->isStrictFPOpcode();
47130 cast<CondCodeSDNode>(
Cond.getOperand(IsStrict ? 3 : 2))->get();
47134 unsigned Opcode = 0;
47273 DL, {
N->getValueType(0), MVT::Other},
47288 Cond.getOpcode() ==
ISD::SETCC && (VT == MVT::f32 || VT == MVT::f64)) {
47327 Op.hasOneUse() &&
Op.getOperand(0).hasOneUse() &&
47332 bool SelectableLHS = SelectableOp(
LHS,
RHS);
47333 bool SelectableRHS = SelectableOp(
RHS,
LHS);
47334 if (SelectableLHS || SelectableRHS) {
47335 EVT SrcVT = SelectableLHS ?
LHS.getOperand(0).getValueType()
47336 :
RHS.getOperand(0).getValueType();
47354 Cond.hasOneUse()) {
47355 EVT CondVT =
Cond.getValueType();
47379 if (
LHS == Cond0 &&
RHS == Cond1) {
47404 cast<CondCodeSDNode>(InnerSetCC.
getOperand(2))->get();
47472 DL, VT,
LHS.getOperand(0),
LHS.getOperand(1));
47484 DL, VT,
RHS.getOperand(0),
RHS.getOperand(1));
47512 Cond.getScalarValueSizeInBits(),
47514 Cond.hasOneUse()) {
47516 Cond.getOperand(0).getOperand(1));
47524 Cond.hasOneUse()) {
47545 LHS.getOperand(0).getValueType() == IntVT)) &&
47547 RHS.getOperand(0).getValueType() == IntVT))) {
47551 LHS =
LHS.getOperand(0);
47556 RHS =
RHS.getOperand(0);
47572 cast<CondCodeSDNode>(
Cond.getOperand(2))->get() ==
ISD::SETEQ &&
47573 Cond.getOperand(0).getValueType() == VT) {
47577 if (
C &&
C->getAPIntValue().isPowerOf2()) {
47588 bool CanShiftBlend =
47590 (Subtarget.
hasAVX2() && EltBitWidth == 64) ||
47591 (Subtarget.hasXOP()));
47592 if (CanShiftBlend &&
47594 return C->getAPIntValue().isPowerOf2();
47600 auto *MaskVal = cast<ConstantSDNode>(Mask.getOperand(i));
47602 MaskVal->getAPIntValue().exactLogBase2());
47627 (Cmp.getOpcode() ==
X86ISD::SUB && !Cmp->hasAnyUseOfValue(0))))
47633 if (!Cmp.hasOneUse())
47648 SDValue CmpLHS = Cmp.getOperand(0);
47649 SDValue CmpRHS = Cmp.getOperand(1);
47660 auto *OpRHSC = dyn_cast<ConstantSDNode>(OpRHS);
47664 APInt Addend = OpRHSC->getAPIntValue();
47668 auto *CmpRHSC = dyn_cast<ConstantSDNode>(CmpRHS);
47672 APInt Comparison = CmpRHSC->getAPIntValue();
47673 APInt NegAddend = -Addend;
47677 if (Comparison != NegAddend) {
47678 APInt IncComparison = Comparison + 1;
47679 if (IncComparison == NegAddend) {
47681 Comparison = IncComparison;
47683 }
else if (
CC ==
X86::COND_LE && !Comparison.isMaxSignedValue()) {
47684 Comparison = IncComparison;
47688 APInt DecComparison = Comparison - 1;
47689 if (DecComparison == NegAddend) {
47691 Comparison = DecComparison;
47693 }
else if (
CC ==
X86::COND_L && !Comparison.isMinSignedValue()) {
47694 Comparison = DecComparison;
47702 if (Comparison == NegAddend) {
47705 auto *AN = cast<AtomicSDNode>(CmpLHS.
getNode());
47710 AN->getMemOperand());
47719 if (!Comparison.isZero())
47746 if (!Cmp.hasOneUse())
47754 Src = Cmp.getOperand(0);
47758 if (Src.getOpcode() !=
ISD::SRA || !Src.hasOneUse())
47760 Src = Src.getOperand(0);
47765 Src = Cmp.getOperand(1);
47767 Src = Cmp.getOperand(0);
47776 MVT SrcVT = Src.getSimpleValueType();
47781 if (Src.getOpcode() ==
ISD::SHL) {
47783 Src = Src.getOperand(0);
47813 (Cmp.getOpcode() ==
X86ISD::SUB && !Cmp->hasAnyUseOfValue(0))))
47822 SDValue Op1 = Cmp.getOperand(0);
47823 SDValue Op2 = Cmp.getOperand(1);
47828 bool checkAgainstTrue =
false;
47830 if ((
C = dyn_cast<ConstantSDNode>(Op1)))
47832 else if ((
C = dyn_cast<ConstantSDNode>(Op2)))
47837 if (
C->getZExtValue() == 1) {
47838 needOppositeCond = !needOppositeCond;
47839 checkAgainstTrue =
true;
47840 }
else if (
C->getZExtValue() != 0)
47844 bool truncatedToBoolWithAnd =
false;
47858 truncatedToBoolWithAnd =
true;
47869 if (checkAgainstTrue && !truncatedToBoolWithAnd)
47872 "Invalid use of SETCC_CARRY!");
47877 if (needOppositeCond)
47893 Op =
Op.getOperand(0);
47901 bool FValIsFalse =
true;
47906 needOppositeCond = !needOppositeCond;
47907 FValIsFalse =
false;
47915 if (needOppositeCond)
47941 switch (
Cond->getOpcode()) {
47942 default:
return false;
47949 SetCC0 =
Cond->getOperand(0);
47950 SetCC1 =
Cond->getOperand(1);
47972 bool FoundAndLSB =
false;
47998 !isa<ConstantSDNode>(CarryOp1.
getOperand(1))) {
48011 }
else if (FoundAndLSB) {
48018 return getBT(Carry, BitNo,
DL, DAG);
48143 assert(VT == MVT::i32 &&
"Expected i32 EFLAGS comparison result");
48150 if ((EltBits == 32 || EltBits == 64) && Subtarget.
hasAVX()) {
48156 }
else if (EltBits == 16) {
48190 if (Src0 && Src1) {
48213 unsigned CmpOpcode = EFLAGS.
getOpcode();
48216 auto *CmpConstant = dyn_cast<ConstantSDNode>(EFLAGS.
getOperand(1));
48219 const APInt &CmpVal = CmpConstant->getAPIntValue();
48236 "Unexpected MOVMSK operand");
48242 NumElts <= CmpBits && CmpVal.
isMask(NumElts);
48243 if (!IsAnyOf && !IsAllOf)
48265 if ((BCNumEltBits == 32 || BCNumEltBits == 64) &&
48266 BCNumEltBits > NumEltBits &&
48285 EVT SubVT = Ops[0].getValueType().changeTypeToInteger();
48301 if (IsAllOf && Subtarget.
hasSSE41() && IsOneUse) {
48319 LHS.getOperand(0),
LHS.getOperand(1));
48321 RHS.getOperand(0),
RHS.getOperand(1));
48341 if (IsAnyOf && CmpBits == 8 && VecOp1.
isUndef()) {
48355 if (CmpBits >= 16 && Subtarget.
hasInt256() &&
48356 (IsAnyOf || (SignExt0 && SignExt1))) {
48361 Result.getValueType().getVectorNumElements() <= NumElts) {
48363 Result.getOperand(0), Result.getOperand(1));
48367 Result = DAG.
getBitcast(MVT::v32i8, Result);
48369 unsigned CmpMask = IsAnyOf ? 0 : 0xFFFFFFFF;
48370 if (!SignExt0 || !SignExt1) {
48372 "Only perform v16i16 signmasks for any_of patterns");
48399 if (NumElts <= CmpBits &&
48401 ShuffleMask, DAG) &&
48403 ShuffleInputs[0].getValueSizeInBits() == VecVT.
getSizeInBits() &&
48418 if (NumElts <= CmpBits && Subtarget.
hasAVX() &&
48419 !Subtarget.preferMovmskOverVTest() && IsOneUse &&
48420 (NumEltBits == 32 || NumEltBits == 64)) {
48467 SDValue FalseOp =
N->getOperand(0);
48468 SDValue TrueOp =
N->getOperand(1);
48473 if (TrueOp == FalseOp)
48493 if (
ConstantSDNode *FalseC = dyn_cast<ConstantSDNode>(FalseOp)) {
48496 if (TrueC->getAPIntValue().ult(FalseC->getAPIntValue())) {
48505 if (FalseC->getAPIntValue() == 0 && TrueC->getAPIntValue().isPowerOf2()) {
48511 unsigned ShAmt = TrueC->getAPIntValue().logBase2();
48519 if (FalseC->getAPIntValue()+1 == TrueC->getAPIntValue()) {
48524 FalseC->getValueType(0),
Cond);
48532 if (
N->getValueType(0) == MVT::i32 ||
N->getValueType(0) == MVT::i64) {
48533 APInt Diff = TrueC->getAPIntValue() - FalseC->getAPIntValue();
48535 "Implicit constant truncation");
48537 bool isFastMultiplier =
false;
48538 if (Diff.
ult(10)) {
48548 isFastMultiplier =
true;
48553 if (isFastMultiplier) {
48564 if (FalseC->getAPIntValue() != 0)
48593 (CmpAgainst = dyn_cast<ConstantSDNode>(
Cond.getOperand(1))) &&
48594 !isa<ConstantSDNode>(
Cond.getOperand(0))) {
48597 CmpAgainst == dyn_cast<ConstantSDNode>(FalseOp)) {
48603 CmpAgainst == dyn_cast<ConstantSDNode>(TrueOp)) {
48623 auto *Sub1C = dyn_cast<ConstantSDNode>(
Cond.getOperand(1));
48624 if (Cond0 == TrueOp && Sub1C && Sub1C->getZExtValue() == 2) {
48625 EVT CondVT =
Cond->getValueType(0);
48626 EVT OuterVT =
N->getValueType(0);
48665 SDValue LOps[] = {FalseOp, TrueOp,
48689 if (Const ==
Cond.getOperand(0))
48690 Const =
Cond.getOperand(1);
48693 if (isa<ConstantSDNode>(Const) &&
Add.getOpcode() ==
ISD::ADD &&
48694 Add.hasOneUse() && isa<ConstantSDNode>(
Add.getOperand(1)) &&
48697 Add.getOperand(0).getOperand(0) ==
Cond.getOperand(0)) {
48698 EVT VT =
N->getValueType(0);
48715 EVT VT =
N->getOperand(0).getValueType();
48719 assert(
N->getNumOperands() == 2 &&
"NumOperands of Mul are 2");
48720 unsigned SignBits[2] = {1, 1};
48721 bool IsPositive[2] = {
false,
false};
48722 for (
unsigned i = 0; i < 2; i++) {
48729 bool AllPositive = IsPositive[0] && IsPositive[1];
48730 unsigned MinSignBits = std::min(SignBits[0], SignBits[1]);
48732 if (MinSignBits >= 25)
48733 Mode = ShrinkMode::MULS8;
48735 else if (AllPositive && MinSignBits >= 24)
48736 Mode = ShrinkMode::MULU8;
48738 else if (MinSignBits >= 17)
48739 Mode = ShrinkMode::MULS16;
48741 else if (AllPositive && MinSignBits >= 16)
48742 Mode = ShrinkMode::MULU16;
48788 if (Subtarget.
hasSSE41() && (OptForMinSize || !Subtarget.isPMULLDSlow()))
48797 EVT VT =
N->getOperand(0).getValueType();
48799 if ((NumElts % 2) != 0)
48811 if (Mode == ShrinkMode::MULU8 || Mode == ShrinkMode::MULS8)
48821 ReducedVT, NewN0, NewN1);
48827 for (
unsigned i = 0, e = NumElts / 2; i < e; i++) {
48828 ShuffleMask[2 * i] = i;
48829 ShuffleMask[2 * i + 1] = i + NumElts;
48835 for (
unsigned i = 0, e = NumElts / 2; i < e; i++) {
48836 ShuffleMask[2 * i] = i + NumElts / 2;
48837 ShuffleMask[2 * i + 1] = i + NumElts * 3 / 2;
48848 auto combineMulShlAddOrSub = [&](
int Mult,
int Shift,
bool isAdd) {
48858 auto combineMulMulAddOrSub = [&](
int Mul1,
int Mul2,
bool isAdd) {
48873 return combineMulShlAddOrSub(5, 1,
true);
48876 return combineMulShlAddOrSub(5, 2,
true);
48879 return combineMulShlAddOrSub(5, 3,
true);
48883 combineMulShlAddOrSub(5, 2,
true));
48886 return combineMulShlAddOrSub(9, 1,
true);
48889 return combineMulShlAddOrSub(9, 2,
true);
48892 return combineMulShlAddOrSub(9, 3,
true);
48895 return combineMulShlAddOrSub(3, 2,
true);
48898 return combineMulShlAddOrSub(3, 3,
false);
48901 return combineMulMulAddOrSub(5, 5,
true);
48904 return combineMulMulAddOrSub(9, 3,
true);
48908 combineMulMulAddOrSub(9, 3,
true));
48918 if (ScaleShift >= 1 && ScaleShift < 4) {
48919 unsigned ShiftAmt =
Log2_64((MulAmt & (MulAmt - 1)));
48940 if (Subtarget.isPMADDWDSlow())
48943 EVT VT =
N->getValueType(0);
48956 if (32 <= (2 * NumElts) && Subtarget.
hasAVX512() && !Subtarget.hasBWI())
49001 if (Src.getScalarValueSizeInBits() == 16 && VT.
getSizeInBits() <= 128)
49005 if (Src.getScalarValueSizeInBits() < 16 && !Subtarget.
hasSSE41()) {
49013 N->isOnlyUserOf(
Op.getNode())) {
49015 if (Src.getScalarValueSizeInBits() == 16)
49020 N->isOnlyUserOf(
Op.getNode())) {
49026 SDValue ZeroN0 = GetZeroableOp(N0);
49027 SDValue ZeroN1 = GetZeroableOp(N1);
49028 if (!ZeroN0 && !ZeroN1)
49030 N0 = ZeroN0 ? ZeroN0 : N0;
49031 N1 = ZeroN1 ? ZeroN1 : N1;
49050 EVT VT =
N->getValueType(0);
49090 EVT VT =
N->getValueType(0);
49102 if (VT != MVT::i64 && VT != MVT::i32 &&
49132 int64_t SignMulAmt =
C.getSExtValue();
49133 assert(SignMulAmt !=
INT64_MIN &&
"Int min should have been handled!");
49134 uint64_t AbsMulAmt = SignMulAmt < 0 ? -SignMulAmt : SignMulAmt;
49137 if (VT == MVT::i64 || VT == MVT::i32) {
49138 if (AbsMulAmt == 3 || AbsMulAmt == 5 || AbsMulAmt == 9) {
49141 if (SignMulAmt < 0)
49149 if ((AbsMulAmt % 9) == 0) {
49151 MulAmt2 = AbsMulAmt / 9;
49152 }
else if ((AbsMulAmt % 5) == 0) {
49154 MulAmt2 = AbsMulAmt / 5;
49155 }
else if ((AbsMulAmt % 3) == 0) {
49157 MulAmt2 = AbsMulAmt / 3;
49163 (SignMulAmt >= 0 && (MulAmt2 == 3 || MulAmt2 == 5 || MulAmt2 == 9)))) {
49165 if (
isPowerOf2_64(MulAmt2) && !(SignMulAmt >= 0 &&
N->hasOneUse() &&
49166 N->user_begin()->getOpcode() ==
ISD::ADD))
49188 if (SignMulAmt < 0)
49190 }
else if (!Subtarget.slowLEA())
49201 if (SignMulAmt < 0)
49209 if (SignMulAmt < 0)
49213 }
else if (SignMulAmt >= 0 &&
isPowerOf2_64(AbsMulAmt - 2) &&
49214 (!VT.
isVector() || Subtarget.fastImmVectorShift())) {
49222 }
else if (SignMulAmt >= 0 &&
isPowerOf2_64(AbsMulAmt + 2) &&
49223 (!VT.
isVector() || Subtarget.fastImmVectorShift())) {
49231 }
else if (SignMulAmt >= 0 && VT.
isVector() &&
49232 Subtarget.fastImmVectorShift()) {
49233 uint64_t AbsMulAmtLowBit = AbsMulAmt & (-AbsMulAmt);
49235 std::optional<unsigned> Opc;
49237 ShiftAmt1 = AbsMulAmt - AbsMulAmtLowBit;
49240 ShiftAmt1 = AbsMulAmt + AbsMulAmtLowBit;
49251 NewMul = DAG.
getNode(*Opc,
DL, VT, Shift1, Shift2);
49270 "SRL or SRA node is required here!");
49276 SDValue ShiftOperand =
N->getOperand(0);
49281 EVT VT =
N->getValueType(0);
49294 unsigned ExtOpc =
LHS.getOpcode();
49296 RHS.getOpcode() != ExtOpc)
49300 LHS =
LHS.getOperand(0);
49301 RHS =
RHS.getOperand(0);
49304 EVT MulVT =
LHS.getValueType();
49312 return DAG.
getNode(ExtOpc,
DL, VT, Mulh);
49354 bool MaskOK =
false;
49376 if (MaskOK && Mask != 0)
49433 for (
MVT SVT : { MVT::i8, MVT::i16, MVT::i32 }) {
49436 if (ShiftSize >=
Size || ShlConst !=
Size - ShiftSize)
49440 if (SraConst.
eq(ShlConst))
49442 if (SraConst.
ult(ShlConst))
49497 auto *ShiftC = dyn_cast<ConstantSDNode>(N1);
49498 auto *AndC = dyn_cast<ConstantSDNode>(N0.
getOperand(1));
49499 if (!ShiftC || !AndC)
49505 APInt MaskVal = AndC->getAPIntValue();
49514 APInt NewMaskVal = MaskVal.
lshr(ShiftC->getAPIntValue());
49517 if ((OldMaskSize > 8 && NewMaskSize <= 8) ||
49518 (OldMaskSize > 32 && NewMaskSize <= 32)) {
49529 unsigned Opcode =
N->getOpcode();
49533 EVT VT =
N->getValueType(0);
49558 ShuffleOps[0].getValueType().is256BitVector() &&
49588 if (IsShuf0 || IsShuf1) {
49591 ScaledMask0.
assign({0, 1});
49595 ScaledMask1.
assign({0, 1});
49599 int PostShuffle[4] = {-1, -1, -1, -1};
49605 if (!
LHS ||
LHS == Src) {
49609 if (!
RHS ||
RHS == Src) {
49616 if (FindShuffleOpAndIdx(ScaledMask0[0], PostShuffle[0], Ops0) &&
49617 FindShuffleOpAndIdx(ScaledMask0[1], PostShuffle[1], Ops0) &&
49618 FindShuffleOpAndIdx(ScaledMask1[0], PostShuffle[2], Ops1) &&
49619 FindShuffleOpAndIdx(ScaledMask1[1], PostShuffle[3], Ops1)) {
49640 [](
SDValue Op) {
return Op.getValueType().is256BitVector(); }) &&
49642 [](
SDValue Op) {
return Op.getValueType().is256BitVector(); }) &&
49649 if ((Op00 == Op11) && (Op01 == Op10)) {
49653 if ((Op00 == Op10) && (Op01 == Op11)) {
49654 const int Map[4] = {0, 2, 1, 3};
49656 {Map[ScaledMask0[0]], Map[ScaledMask1[0]], Map[ScaledMask0[1]],
49657 Map[ScaledMask1[1]]});
49674 unsigned Opcode =
N->getOpcode();
49676 "Unexpected pack opcode");
49678 EVT VT =
N->getValueType(0);
49683 unsigned SrcBitsPerElt = 2 * DstBitsPerElt;
49686 "Unexpected PACKSS/PACKUS input type");
49691 APInt UndefElts0, UndefElts1;
49702 unsigned NumSrcElts = NumDstElts / 2;
49703 unsigned NumDstEltsPerLane = NumDstElts / NumLanes;
49704 unsigned NumSrcEltsPerLane = NumSrcElts / NumLanes;
49706 APInt Undefs(NumDstElts, 0);
49708 for (
unsigned Lane = 0; Lane != NumLanes; ++Lane) {
49709 for (
unsigned Elt = 0; Elt != NumDstEltsPerLane; ++Elt) {
49710 unsigned SrcIdx = Lane * NumSrcEltsPerLane + Elt % NumSrcEltsPerLane;
49711 auto &UndefElts = (Elt >= NumSrcEltsPerLane ? UndefElts1 : UndefElts0);
49712 auto &EltBits = (Elt >= NumSrcEltsPerLane ? EltBits1 : EltBits0);
49714 if (UndefElts[SrcIdx]) {
49715 Undefs.
setBit(Lane * NumDstEltsPerLane + Elt);
49719 APInt &Val = EltBits[SrcIdx];
49730 if (Val.
isIntN(DstBitsPerElt))
49731 Val = Val.
trunc(DstBitsPerElt);
49737 Bits[Lane * NumDstEltsPerLane + Elt] = Val;
49755 if (Not0 && Not1) {
49773 if (Subtarget.hasVLX())
49799 assert((Src0 || Src1) &&
"Found PACK(UNDEF,UNDEF)");
49827 "Unexpected horizontal add/sub opcode");
49830 MVT VT =
N->getSimpleValueType(0);
49835 if (
LHS !=
RHS &&
LHS.getOpcode() ==
N->getOpcode() &&
49836 LHS.getOpcode() ==
RHS.getOpcode() &&
49837 LHS.getValueType() ==
RHS.getValueType() &&
49838 N->isOnlyUserOf(
LHS.getNode()) &&
N->isOnlyUserOf(
RHS.getNode())) {
49847 LHS0.
isUndef() ? LHS1 : LHS0,
49848 RHS0.
isUndef() ? RHS1 : RHS0);
49875 "Unexpected shift opcode");
49876 EVT VT =
N->getValueType(0);
49892 EltBits[0].getZExtValue(), DAG);
49906 unsigned Opcode =
N->getOpcode();
49909 "Unexpected shift opcode");
49911 EVT VT =
N->getValueType(0);
49916 "Unexpected value type");
49925 unsigned ShiftVal =
N->getConstantOperandVal(1);
49926 if (ShiftVal >= NumBitsPerElt) {
49929 ShiftVal = NumBitsPerElt - 1;
49949 unsigned NewShiftVal = Amt0 + Amt1;
49950 if (NewShiftVal >= NumBitsPerElt) {
49955 NewShiftVal = NumBitsPerElt - 1;
49968 return MergeShifts(N0.
getOperand(0), ShiftVal, 1);
49971 if (LogicalShift && (ShiftVal % 8) == 0) {
49981 if (Opcode ==
X86ISD::VSRAI && NumBitsPerElt == 32 && ShiftVal == 31 &&
50000 auto TryConstantFold = [&](
SDValue V) {
50008 "Unexpected shift value type");
50012 for (
unsigned i = 0, e = EltBits.
size(); i != e; ++i) {
50013 APInt &Elt = EltBits[i];
50029 if (
N->isOnlyUserOf(N0.
getNode())) {
50030 if (
SDValue C = TryConstantFold(N0))
50059 EVT VT =
N->getValueType(0);
50060 unsigned Opcode =
N->getOpcode();
50064 "Unexpected vector insertion");
50117 if (VT == MVT::f32 || VT == MVT::f64 ||
50118 (VT == MVT::f16 && Subtarget.hasFP16())) {
50119 bool ExpectingFlags =
false;
50121 for (
const SDNode *U :
N->users()) {
50122 if (ExpectingFlags)
50125 switch (U->getOpcode()) {
50130 ExpectingFlags =
true;
50140 if (!ExpectingFlags) {
50165 N->getSimpleValueType(0));
50172 MVT IntVT = is64BitFP ? MVT::i64 : MVT::i32;
50174 if (is64BitFP && !Subtarget.is64Bit()) {
50181 MVT::v2f64, OnesOrZeroesF);
50194 return OneBitOfTruth;
50204 assert(
N->getOpcode() ==
ISD::AND &&
"Unexpected opcode combine into ANDNP");
50206 MVT VT =
N->getSimpleValueType(0);
50236 assert(
N->getOpcode() ==
ISD::AND &&
"Unexpected opcode combine into ANDNP");
50238 EVT VT =
N->getValueType(0);
50245 auto GetNot = [&DAG](
SDValue V) {
50250 if (!SVN || !SVN->hasOneUse() || !SVN->isSplat() ||
50251 !SVN->getOperand(1).isUndef()) {
50258 if (!isa<ConstantSDNode>(IVEN.
getOperand(2)) ||
50365 return DAG.
getNode(
N.getOpcode(),
DL, VT, N0, N1);
50377 EVT VT =
N.getValueType();
50390 switch (
N.getOpcode()) {
50406 default:
llvm_unreachable(
"Unexpected input node for FP logic conversion");
50424 "Unexpected bit opcode");
50436 if (N00Type != N10Type || !((Subtarget.
hasSSE1() && N00Type == MVT::f32) ||
50437 (Subtarget.
hasSSE2() && N00Type == MVT::f64) ||
50438 (Subtarget.hasFP16() && N00Type == MVT::f16)))
50456 if (!Subtarget.
hasAVX() &&
50485 "Unexpected bit opcode");
50517 "Unexpected bit opcode");
50554 "Unexpected bit opcode");
50606 if (
N->getValueType(0) == VT &&
50646 return DAG.
getBitcast(
N->getValueType(0), Shift);
50667 return Subtarget.hasBMI2() &&
50668 (VT == MVT::i32 || (VT == MVT::i64 && Subtarget.is64Bit()));
50676 MVT VT =
N->getSimpleValueType(0);
50709 MVT VT =
Node->getSimpleValueType(0);
50717 for (
unsigned i = 0; i < 2; i++) {
50719 auto *Ld = dyn_cast<LoadSDNode>(
Node->getOperand(i));
50722 const Value *
MemOp = Ld->getMemOperand()->getValue();
50730 if (
auto *
GEP = dyn_cast<GetElementPtrInst>(
MemOp)) {
50731 if (
auto *GV = dyn_cast<GlobalVariable>(
GEP->getOperand(0))) {
50732 if (GV->isConstant() && GV->hasDefinitiveInitializer()) {
50735 if (!isa<ConstantDataArray>(
Init) ||
50744 uint64_t ArrayElementCount =
Init->getType()->getArrayNumElements();
50745 bool ConstantsMatch =
true;
50746 for (
uint64_t j = 0; j < ArrayElementCount; j++) {
50747 auto *Elem = cast<ConstantInt>(
Init->getAggregateElement(j));
50748 if (Elem->getZExtValue() != (((
uint64_t)1 << j) - 1)) {
50749 ConstantsMatch =
false;
50753 if (!ConstantsMatch)
50786 EVT VT =
N->getValueType(0);
50790 auto *C1 = dyn_cast<ConstantSDNode>(
N->getOperand(1));
50798 if (!Src.hasOneUse())
50803 if (!Src.getOperand(0).hasOneUse())
50805 Src = Src.getOperand(0);
50808 if (Src.getOpcode() !=
ISD::BITCAST || !Src.getOperand(0).hasOneUse())
50811 Src = Src.getOperand(0);
50812 EVT SrcVT = Src.getValueType();
50824 SDValue SubVec = Src.getOperand(0);
50834 auto IsLegalSetCC = [&](
SDValue V) {
50837 EVT SetccVT = V.getOperand(0).getValueType();
50866 static constexpr unsigned kMaxDepth = 2;
50869 if (!
Op.hasOneUse())
50874 if (
Op.getOpcode() == Opc) {
50876 if (
Depth++ >= kMaxDepth)
50879 for (
unsigned OpIdx = 0; OpIdx < 2; ++OpIdx)
50882 return DAG.
getNode(
Op.getOpcode(),
DL,
Op.getValueType(), R,
50883 Op.getOperand(1 - OpIdx));
50889 return DAG.
getNode(Opc,
DL,
Op.getValueType(), OpMustEq,
Op);
50895 return DAG.
getNode(Opc,
DL,
Op.getValueType(), OpMustEq,
Op);
50902 return DAG.
getNode(Opc,
DL,
Op.getValueType(), OpMustEq,
Op);
50909 EVT VT =
N->getValueType(0);
50912 (VT != MVT::i32 && VT != MVT::i64))
50918 for (
unsigned OpIdx = 0; OpIdx < 2; ++OpIdx)
50921 N->getOperand(1 - OpIdx), 0))
50950 SDNode *BrCond = *Flag->user_begin();
50953 unsigned CondNo = 2;
50983 if (BrCond != NewBrCond.
getNode())
51004 SDValue SetCC0 =
N->getOperand(0);
51005 SDValue SetCC1 =
N->getOperand(1);
51010 auto GetCombineToOpc = [&](
SDValue V) ->
unsigned {
51012 unsigned Opc =
Op.getOpcode();
51020 unsigned NewOpc = 0;
51024 if (!(NewOpc = GetCombineToOpc(SetCC1))) {
51026 if (!(NewOpc = GetCombineToOpc(SetCC1)))
51036 bool IsOR =
N->getOpcode() ==
ISD::OR;
51062 {Sub.getOperand(0), Sub.getOperand(0),
51063 CFlags, SrcCC, SetCC0.getOperand(1)});
51073 EVT VT =
N->getValueType(0);
51078 if (Subtarget.
hasSSE1() && !Subtarget.
hasSSE2() && VT == MVT::v4i32) {
51086 if (VT == MVT::i64 && Subtarget.is64Bit() && !isa<ConstantSDNode>(N1)) {
51099 if (VT == MVT::i1) {
51103 SrcOps.
size() == 1) {
51104 unsigned NumElts = SrcOps[0].getValueType().getVectorNumElements();
51107 if (!Mask && TLI.
isTypeLegal(SrcOps[0].getValueType()))
51111 "Unexpected partial reduction mask");
51137 APInt MulCLowBit = MulC & (-MulC);
51139 (MulCLowBit + MulC).isPowerOf2()) {
51142 assert(MulCLowBitLog != -1 &&
51143 "Isolated lowbit is somehow not a power of 2!");
51167 DAG, DCI, Subtarget))
51211 Src.getOperand(0)->hasOneUse())
51212 Src = Src.getOperand(0);
51213 bool ContainsNOT =
false;
51217 Src = Src.getOperand(0);
51219 ContainsNOT =
true;
51221 if (Src.getOpcode() ==
ISD::SRL &&
51222 !isa<ConstantSDNode>(Src.getOperand(1))) {
51223 SDValue BitNo = Src.getOperand(1);
51224 Src = Src.getOperand(0);
51227 Src = Src.getOperand(0);
51229 ContainsNOT =
true;
51232 if (!(Subtarget.hasBMI2() && !ContainsNOT && VT.
getSizeInBits() >= 32))
51246 auto GetDemandedMasks = [&](
SDValue Op) {
51257 for (
int I = 0;
I != NumElts; ++
I) {
51258 if (UndefElts[
I]) {
51263 }
else if (!EltBits[
I].
isZero()) {
51271 APInt Bits0, Elts0;
51272 APInt Bits1, Elts1;
51273 std::tie(Bits0, Elts0) = GetDemandedMasks(N1);
51274 std::tie(Bits1, Elts1) = GetDemandedMasks(N0);
51287 if (NewN0 || NewN1)
51289 NewN1 ? NewN1 : N1);
51306 return M.isZero() || M.isAllOnes();
51314 for (
unsigned i = 0; i != Scale; ++i) {
51317 int VecIdx = Scale *
Idx + i;
51318 ShuffleMask[VecIdx] = EltBits[i].isZero() ?
SM_SentinelZero : VecIdx;
51322 {SrcVec}, 0, SrcVec, ShuffleMask, {}, 1,
51325 true, DAG, Subtarget))
51343 MVT VT =
N->getSimpleValueType(0);
51345 if (!VT.
isVector() || (EltSizeInBits % 8) != 0)
51355 if (!(Subtarget.hasXOP() ||
useVPTERNLOG(Subtarget, VT) ||
51360 APInt UndefElts0, UndefElts1;
51371 for (
unsigned i = 0, e = EltBits0.
size(); i != e; ++i) {
51373 if (UndefElts0[i] || UndefElts1[i])
51375 if (EltBits0[i] != ~EltBits1[i])
51382 MVT OpSVT = EltSizeInBits <= 32 ? MVT::i32 : MVT::i64;
51446 EVT VT =
N->getValueType(0);
51460 EVT MaskVT = Mask.getValueType();
51477 if (Subtarget.hasVLX())
51497 EVT VT = Cmp.getOperand(0).getValueType();
51522 auto isORCandidate = [](
SDValue N) {
51523 return (
N->getOpcode() ==
ISD::OR &&
N->hasOneUse());
51529 if (!
N->hasOneUse() || !
N->getSimpleValueType(0).bitsGE(MVT::i32) ||
51530 !isORCandidate(
N->getOperand(0)))
51534 auto isSetCCCandidate = [](
SDValue N) {
51539 N->getOperand(1).getValueType().bitsGE(MVT::i32);
51542 SDNode *OR =
N->getOperand(0).getNode();
51548 while (((isORCandidate(
LHS) && isSetCCCandidate(
RHS)) ||
51549 (isORCandidate(
RHS) && isSetCCCandidate(
LHS)))) {
51552 LHS = OR->getOperand(0);
51553 RHS = OR->getOperand(1);
51557 if (!(isSetCCCandidate(
LHS) && isSetCCCandidate(
RHS)) ||
51558 !isORCandidate(
SDValue(OR, 0)))
51575 while (!ORNodes.
empty()) {
51577 LHS = OR->getOperand(0);
51578 RHS = OR->getOperand(1);
51597 if (NotOp == And1_R)
51599 if (NotOp != And1_L)
51651 bool ZeroSecondOpOnly =
false) {
51657 Y =
Y.getOperand(0);
51663 EFLAGS =
Y.getOperand(1);
51674 auto *ConstantX = dyn_cast<ConstantSDNode>(
X);
51675 if (ConstantX && !ZeroSecondOpOnly) {
51690 !isa<ConstantSDNode>(EFLAGS.
getOperand(1))) {
51713 if (ZeroSecondOpOnly)
51725 !isa<ConstantSDNode>(EFLAGS.
getOperand(1))) {
51755 !isa<ConstantSDNode>(EFLAGS.
getOperand(1))) {
51775 EVT ZVT = Z.getValueType();
51834 bool IsSub =
N->getOpcode() ==
ISD::SUB;
51837 EVT VT =
N->getValueType(0);
51865 if (
auto *N1C = dyn_cast<ConstantSDNode>(N1)) {
51867 bool N1COdd = N1C->getZExtValue() & 1;
51868 if (IsSub ? N1COdd : !N1COdd)
51893 EVT VT =
N->getValueType(0);
51898 if (Subtarget.
hasSSE1() && !Subtarget.
hasSSE2() && VT == MVT::v4i32) {
51907 if (VT == MVT::i1) {
51911 SrcOps.
size() == 1) {
51912 unsigned NumElts = SrcOps[0].getValueType().getVectorNumElements();
51915 if (!Mask && TLI.
isTypeLegal(SrcOps[0].getValueType()))
51919 "Unexpected partial reduction mask");
51941 DAG, DCI, Subtarget))
51957 if ((VT == MVT::i32 || VT == MVT::i64) &&
51965 if (
auto *CN = dyn_cast<ConstantSDNode>(N1)) {
51966 uint64_t Val = CN->getZExtValue();
51967 if (Val == 1 || Val == 2 || Val == 3 || Val == 4 || Val == 7 || Val == 8) {
51987 unsigned HalfElts = NumElts / 2;
52024 for (
int I = 0;
I != NumElts; ++
I)
52025 if (!EltBits[
I].isAllOnes())
52030 if (SimplifyUndemandedElts(N0, N1) || SimplifyUndemandedElts(N1, N0)) {
52054 EVT ResultType =
N->getValueType(0);
52055 if (ResultType != MVT::i8 && ResultType != MVT::i1)
52076 if (ShiftTy != MVT::i16 && ShiftTy != MVT::i32 && ShiftTy != MVT::i64)
52080 if (!isa<ConstantSDNode>(Shift.
getOperand(1)) ||
52096 if (SetCCResultType != ResultType)
52110 EVT VT =
N->getValueType(0);
52120 case MVT::v2i64:
if (!Subtarget.
hasSSE2())
return SDValue();
break;
52124 case MVT::v4i64:
if (!Subtarget.
hasAVX2())
return SDValue();
break;
52167 EVT InVT = In.getValueType();
52171 "Unexpected types for truncate operation");
52207 unsigned NumSrcBits = In.getScalarValueSizeInBits();
52208 assert(NumSrcBits > NumDstBits &&
"Unexpected types for truncate operation");
52210 APInt SignedMax, SignedMin;
52238 EVT InVT = In.getValueType();
52246 InVT == MVT::v16i32 && VT == MVT::v16i8) {
52250 DL, DAG, Subtarget);
52251 assert(Mid &&
"Failed to pack!");
52262 bool PreferAVX512 = ((Subtarget.
hasAVX512() && InSVT == MVT::i32) ||
52263 (Subtarget.hasBWI() && InSVT == MVT::i16)) &&
52270 (SVT == MVT::i8 || SVT == MVT::i16) &&
52271 (InSVT == MVT::i16 || InSVT == MVT::i32)) {
52274 if (SVT == MVT::i8 && InSVT == MVT::i32) {
52278 assert(Mid &&
"Failed to pack!");
52281 assert(V &&
"Failed to pack!");
52283 }
else if (SVT == MVT::i8 || Subtarget.
hasSSE41())
52294 Subtarget.
hasAVX512() && (InSVT != MVT::i16 || Subtarget.hasBWI()) &&
52295 (SVT == MVT::i32 || SVT == MVT::i16 || SVT == MVT::i8)) {
52296 unsigned TruncOpc = 0;
52311 ResElts *= NumConcats;
52313 ConcatOps[0] = SatVal;
52335 auto *Ld = cast<LoadSDNode>(
N);
52338 SDValue Chain = Ld->getChain();
52351 auto MatchingBits = [](
const APInt &Undefs,
const APInt &UserUndefs,
52356 if (UserUndefs[
I] || Bits[
I] != UserBits[
I])
52365 auto *UserLd = dyn_cast<MemSDNode>(
User);
52366 if (
User !=
N && UserLd &&
52370 UserLd->getChain() == Chain && !
User->hasAnyUseOfValue(1) &&
52371 User->getValueSizeInBits(0).getFixedValue() >
52373 EVT UserVT =
User->getValueType(0);
52374 SDValue UserPtr = UserLd->getBasePtr();
52379 if (UserC && UserPtr !=
Ptr) {
52383 APInt Undefs, UserUndefs;
52390 UserUndefs, UserBits)) {
52391 if (MatchingBits(Undefs, UserUndefs, Bits, UserBits)) {
52409 auto *Ld = cast<LoadSDNode>(
N);
52411 EVT MemVT = Ld->getMemoryVT();
52422 ((Ld->isNonTemporal() && !Subtarget.
hasInt256() &&
52423 Ld->getAlign() >=
Align(16)) ||
52425 *Ld->getMemOperand(), &
Fast) &&
52431 unsigned HalfOffset = 16;
52432 SDValue Ptr1 = Ld->getBasePtr();
52438 DAG.
getLoad(HalfVT, dl, Ld->getChain(), Ptr1, Ld->getPointerInfo(),
52439 Ld->getOriginalAlign(),
52442 Ld->getPointerInfo().getWithOffset(HalfOffset),
52443 Ld->getOriginalAlign(),
52459 SDValue IntLoad = DAG.
getLoad(IntVT, dl, Ld->getChain(), Ld->getBasePtr(),
52460 Ld->getPointerInfo(),
52461 Ld->getOriginalAlign(),
52473 SDValue Chain = Ld->getChain();
52475 auto *UserLd = dyn_cast<MemSDNode>(
User);
52476 if (
User !=
N && UserLd &&
52478 UserLd->getChain() == Chain && UserLd->getBasePtr() ==
Ptr &&
52479 UserLd->getMemoryVT().getSizeInBits() == MemVT.
getSizeInBits() &&
52480 !
User->hasAnyUseOfValue(1) &&
52481 User->getValueSizeInBits(0).getFixedValue() >
52495 unsigned AddrSpace = Ld->getAddressSpace();
52502 return DAG.
getExtLoad(Ext, dl, RegVT, Ld->getChain(), Cast,
52503 Ld->getPointerInfo(), MemVT, Ld->getOriginalAlign(),
52522 auto *BV = dyn_cast<BuildVectorSDNode>(V);
52526 int TrueIndex = -1;
52528 for (
unsigned i = 0; i < NumElts; ++i) {
52532 auto *ConstNode = dyn_cast<ConstantSDNode>(
Op);
52535 if (ConstNode->getAPIntValue().countr_one() >= 1) {
52537 if (TrueIndex >= 0)
52554 if (TrueMaskElt < 0)
52562 if (TrueMaskElt != 0) {
52582 assert(
ML->isUnindexed() &&
"Unexpected indexed masked load!");
52596 EVT VT =
ML->getValueType(0);
52600 if (EltVT == MVT::i64 && !Subtarget.is64Bit()) {
52607 ML->getPointerInfo().getWithOffset(
Offset),
52608 Alignment,
ML->getMemOperand()->getFlags());
52616 return DCI.
CombineTo(
ML, Insert, Load.getValue(1),
true);
52622 assert(
ML->isUnindexed() &&
"Unexpected indexed masked load!");
52627 EVT VT =
ML->getValueType(0);
52636 if (LoadFirstElt && LoadLastElt) {
52638 ML->getMemOperand());
52640 ML->getPassThru());
52650 if (
ML->getPassThru().isUndef())
52659 VT,
DL,
ML->getChain(),
ML->getBasePtr(),
ML->getOffset(),
ML->getMask(),
52660 DAG.
getUNDEF(VT),
ML->getMemoryVT(),
ML->getMemOperand(),
52661 ML->getAddressingMode(),
ML->getExtensionType());
52663 ML->getPassThru());
52671 auto *Mld = cast<MaskedLoadSDNode>(
N);
52674 if (Mld->isExpandingLoad())
52690 SDValue Mask = Mld->getMask();
52691 if (Mask.getScalarValueSizeInBits() != 1) {
52692 EVT VT = Mld->getValueType(0);
52703 VT,
SDLoc(
N), Mld->getChain(), Mld->getBasePtr(), Mld->getOffset(),
52704 NewMask, Mld->getPassThru(), Mld->getMemoryVT(), Mld->getMemOperand(),
52705 Mld->getAddressingMode(), Mld->getExtensionType());
52733 if (EltVT == MVT::i64 && !Subtarget.is64Bit()) {
52762 return ScalarStore;
52767 if (Mask.getScalarValueSizeInBits() != 1) {
52810 StoredVal = DAG.
getBitcast(NewVT, StoredVal);
52819 if (VT == MVT::v1i1 && VT == StVT && Subtarget.
hasAVX512() &&
52832 if ((VT == MVT::v1i1 || VT == MVT::v2i1 || VT == MVT::v4i1) && VT == StVT &&
52837 Ops[0] = StoredVal;
52845 if ((VT == MVT::v8i1 || VT == MVT::v16i1 || VT == MVT::v32i1 ||
52846 VT == MVT::v64i1) && VT == StVT && TLI.
isTypeLegal(VT) &&
52849 if (!DCI.
isBeforeLegalize() && VT == MVT::v64i1 && !Subtarget.is64Bit()) {
52851 StoredVal->
ops().slice(0, 32));
52854 StoredVal->
ops().slice(32, 32));
52879 if ((VT == MVT::f16 || VT == MVT::bf16 || VT == MVT::f32 || VT == MVT::f64) &&
52889 SignMask = ~SignMask;
52930 MVT NTVT = Subtarget.hasSSE4A()
52932 : (TLI.
isTypeLegal(MVT::i64) ? MVT::v2i64 : MVT::v4i32);
52964 auto IsExtractedElement = [](
SDValue V) {
52966 V = V.getOperand(0);
52967 unsigned Opc = V.getOpcode();
52970 V.getOperand(0).hasOneUse())
52971 return V.getOperand(0);
52974 if (
SDValue Extract = IsExtractedElement(StoredVal)) {
52979 MVT SrcVT = Src.getSimpleValueType();
53028 if ((VT == MVT::i16 || VT == MVT::i32 || VT == MVT::i64) &&
53029 Subtarget.hasCF() && St->
isSimple()) {
53039 auto *Ld = dyn_cast<LoadSDNode>(St->
getChain());
53040 if (!Ld || !Ld->isSimple() || Ld->getBasePtr() != St->
getBasePtr())
53043 bool InvertCC =
false;
53074 bool NoImplicitFloatOps =
F.hasFnAttribute(Attribute::NoImplicitFloat);
53076 !Subtarget.useSoftFloat() && !NoImplicitFloatOps && Subtarget.
hasSSE2();
53078 if (!F64IsLegal || Subtarget.is64Bit())
53081 if (VT == MVT::i64 && isa<LoadSDNode>(St->
getValue()) &&
53084 auto *Ld = cast<LoadSDNode>(St->
getValue());
53097 Ld->getBasePtr(), Ld->getMemOperand());
53111 if (VT == MVT::i64 &&
53131 auto *St = cast<MemIntrinsicSDNode>(
N);
53133 SDValue StoredVal =
N->getOperand(1);
53135 EVT MemVT = St->getMemoryVT();
53166 bool IsCommutative,
53168 bool ForceHorizOp) {
53170 if (
LHS.isUndef() ||
RHS.isUndef())
53182 MVT VT =
LHS.getSimpleValueType();
53184 "Unsupported vector type for horizontal add/sub");
53189 bool UseSubVector =
false;
53191 Op.getOperand(0).getValueType().is256BitVector() &&
53193 Op =
Op.getOperand(0);
53194 UseSubVector =
true;
53204 if (!UseSubVector && SrcOps.
size() <= 2 &&
53210 if (UseSubVector && SrcOps.
size() == 1 &&
53214 ShuffleMask.
assign(Mask.begin(), Mask.end());
53226 GetShuffle(
LHS,
A,
B, LMask);
53232 GetShuffle(
RHS,
C,
D, RMask);
53235 unsigned NumShuffles = (LMask.
empty() ? 0 : 1) + (RMask.
empty() ? 0 : 1);
53236 if (NumShuffles == 0)
53239 if (LMask.
empty()) {
53241 for (
unsigned i = 0; i != NumElts; ++i)
53245 if (RMask.
empty()) {
53247 for (
unsigned i = 0; i != NumElts; ++i)
53269 if (!(
A ==
C &&
B ==
D))
53272 PostShuffleMask.
clear();
53282 unsigned NumEltsPer128BitChunk = NumElts / Num128BitChunks;
53283 unsigned NumEltsPer64BitChunk = NumEltsPer128BitChunk / 2;
53284 assert((NumEltsPer128BitChunk % 2 == 0) &&
53285 "Vector type should have an even number of elements in each lane");
53286 for (
unsigned j = 0; j != NumElts; j += NumEltsPer128BitChunk) {
53287 for (
unsigned i = 0; i != NumEltsPer128BitChunk; ++i) {
53289 int LIdx = LMask[i + j], RIdx = RMask[i + j];
53290 if (LIdx < 0 || RIdx < 0 ||
53291 (!
A.getNode() && (LIdx < (
int)NumElts || RIdx < (
int)NumElts)) ||
53292 (!
B.getNode() && (LIdx >= (
int)NumElts || RIdx >= (
int)NumElts)))
53297 if (!((RIdx & 1) == 1 && (LIdx + 1) == RIdx) &&
53298 !((LIdx & 1) == 1 && (RIdx + 1) == LIdx && IsCommutative))
53303 int Base = LIdx & ~1u;
53304 int Index = ((
Base % NumEltsPer128BitChunk) / 2) +
53305 ((
Base % NumElts) & ~(NumEltsPer128BitChunk - 1));
53310 if ((
B &&
Base >= (
int)NumElts) || (!
B && i >= NumEltsPer64BitChunk))
53311 Index += NumEltsPer64BitChunk;
53312 PostShuffleMask[i + j] = Index;
53319 bool IsIdentityPostShuffle =
53321 if (IsIdentityPostShuffle)
53322 PostShuffleMask.
clear();
53332 return User->getOpcode() == HOpcode &&
User->getValueType(0) == VT;
53340 if (!ForceHorizOp &&
53342 (NumShuffles < 2 || !IsIdentityPostShuffle),
53354 EVT VT =
N->getValueType(0);
53355 unsigned Opcode =
N->getOpcode();
53359 auto MergableHorizOp = [
N](
unsigned HorizOpcode) {
53360 return N->hasOneUse() &&
53362 (
N->user_begin()->getOperand(0).getOpcode() == HorizOpcode ||
53363 N->user_begin()->getOperand(1).getOpcode() == HorizOpcode);
53369 if ((Subtarget.
hasSSE3() && (VT == MVT::v4f32 || VT == MVT::v2f64)) ||
53370 (Subtarget.
hasAVX() && (VT == MVT::v8f32 || VT == MVT::v4f64))) {
53375 PostShuffleMask, MergableHorizOp(HorizOpcode))) {
53377 if (!PostShuffleMask.
empty())
53379 DAG.
getUNDEF(VT), PostShuffleMask);
53386 if (Subtarget.
hasSSSE3() && (VT == MVT::v8i16 || VT == MVT::v4i32 ||
53387 VT == MVT::v16i16 || VT == MVT::v8i32)) {
53392 PostShuffleMask, MergableHorizOp(HorizOpcode))) {
53398 {
LHS,
RHS}, HOpBuilder);
53399 if (!PostShuffleMask.
empty())
53401 DAG.
getUNDEF(VT), PostShuffleMask);
53426 EVT VT =
N->getValueType(0);
53429 int CombineOpcode =
53431 auto combineConjugation = [&](
SDValue &r) {
53434 if (XOR->getOpcode() ==
ISD::XOR && XOR.hasOneUse()) {
53437 APInt ConjugationInt32 =
APInt(32, 0x80000000);
53438 APInt ConjugationInt64 =
APInt(64, 0x8000000080000000ULL);
53455 if (combineConjugation(Res))
53458 if (combineConjugation(Res))
53467 auto AllowContract = [&DAG](
const SDNodeFlags &Flags) {
53469 Flags.hasAllowContract();
53472 auto HasNoSignedZero = [&DAG](
const SDNodeFlags &Flags) {
53474 Flags.hasNoSignedZeros();
53476 auto IsVectorAllNegativeZero = [&DAG](
SDValue Op) {
53479 return Bits.getBitWidth() == 32 && Bits.isConstant() &&
53480 Bits.getConstant() == AI;
53483 if (
N->getOpcode() !=
ISD::FADD || !Subtarget.hasFP16() ||
53484 !AllowContract(
N->getFlags()))
53487 EVT VT =
N->getValueType(0);
53488 if (VT != MVT::v8f16 && VT != MVT::v16f16 && VT != MVT::v32f16)
53494 SDValue FAddOp1, MulOp0, MulOp1;
53495 auto GetCFmulFrom = [&MulOp0, &MulOp1, &IsConj, &AllowContract,
53496 &IsVectorAllNegativeZero,
53497 &HasNoSignedZero](
SDValue N) ->
bool {
53511 HasNoSignedZero(Op0->
getFlags())) ||
53512 IsVectorAllNegativeZero(Op0->
getOperand(2)))) {
53522 if (GetCFmulFrom(
LHS))
53524 else if (GetCFmulFrom(
RHS))
53535 DAG.
getNode(NewOp,
SDLoc(
N), CVT, MulOp0, MulOp1, FAddOp1,
N->getFlags());
53553 EVT VT =
N->getValueType(0);
53555 EVT SrcVT = Src.getValueType();
53558 if (!Subtarget.hasDQI() || !Subtarget.hasVLX() || VT != MVT::v2i64 ||
53559 SrcVT != MVT::v2f32)
53577 unsigned SrcOpcode = Src.getOpcode();
53580 EVT VT =
N->getValueType(0);
53581 EVT SrcVT = Src.getValueType();
53588 unsigned Opcode =
Op.getOpcode();
53591 Op.getOperand(0).getScalarValueSizeInBits() <= TruncSizeInBits)
53605 return DAG.
getNode(SrcOpcode,
DL, VT, Trunc0, Trunc1);
53609 if (!Src.hasOneUse())
53620 switch (SrcOpcode) {
53627 return TruncateArithmetic(Src.getOperand(0), Src.getOperand(1));
53634 SDValue Op0 = Src.getOperand(0);
53635 SDValue Op1 = Src.getOperand(1);
53638 return TruncateArithmetic(Op0, Op1);
53666 EVT InVT = Src.getValueType();
53680 auto IsSext = [&DAG](
SDValue V) {
53683 auto IsZext = [&DAG](
SDValue V) {
53687 bool IsSigned = IsSext(
LHS) && IsSext(
RHS);
53688 bool IsUnsigned = IsZext(
LHS) && IsZext(
RHS);
53689 if (!IsSigned && !IsUnsigned)
53693 auto isOpTruncateFree = [](
SDValue Op) {
53696 return Op.getOperand(0).getScalarValueSizeInBits() <= 16;
53699 bool IsTruncateFree = isOpTruncateFree(
LHS) && isOpTruncateFree(
RHS);
53706 if (IsUnsigned && !IsTruncateFree && Subtarget.
hasInt256() &&
53708 (InSizeInBits % 16) == 0) {
53740 if (ScalarVT != MVT::i16 || NumElems < 8 || !
isPowerOf2_32(NumElems))
53803 for (
unsigned i = 0; i != NumElems; ++i) {
53814 auto *ConstN00Elt = dyn_cast<ConstantSDNode>(N00Elt.
getOperand(1));
53815 auto *ConstN01Elt = dyn_cast<ConstantSDNode>(N01Elt.
getOperand(1));
53816 auto *ConstN10Elt = dyn_cast<ConstantSDNode>(N10Elt.
getOperand(1));
53817 auto *ConstN11Elt = dyn_cast<ConstantSDNode>(N11Elt.
getOperand(1));
53818 if (!ConstN00Elt || !ConstN01Elt || !ConstN10Elt || !ConstN11Elt)
53820 unsigned IdxN00 = ConstN00Elt->getZExtValue();
53821 unsigned IdxN01 = ConstN01Elt->getZExtValue();
53822 unsigned IdxN10 = ConstN10Elt->getZExtValue();
53823 unsigned IdxN11 = ConstN11Elt->getZExtValue();
53825 if (IdxN00 > IdxN10) {
53830 if (IdxN00 != 2 * i || IdxN10 != 2 * i + 1 ||
53831 IdxN01 != 2 * i || IdxN11 != 2 * i + 1)
53842 if (ZExtIn != N00In || SExtIn != N01In ||
53843 ZExtIn != N10In || SExtIn != N11In)
53847 auto ExtractVec = [&DAG, &
DL, NumElems](
SDValue &Ext) {
53848 EVT ExtVT = Ext.getValueType();
53855 ExtractVec(ZExtIn);
53856 ExtractVec(SExtIn);
53862 EVT InVT = Ops[0].getValueType();
53864 "Unexpected scalar element type");
53876 EVT VT =
N->getValueType(0);
53898 if (Src.getOpcode() ==
ISD::BITCAST && VT == MVT::i32) {
53899 SDValue BCSrc = Src.getOperand(0);
53914 EVT VT =
N->getValueType(0);
53942 return N->getOperand(0);
53948 unsigned ScalarSize =
N->getValueType(0).getScalarSizeInBits();
53951 EVT VT =
Op->getValueType(0);
53957 unsigned Opc =
Op.getOpcode();
53962 if (!
Op.getOperand(1).isUndef())
53965 if (NegOp0.getValueType() == VT)
53967 cast<ShuffleVectorSDNode>(
Op)->getMask());
53980 NegInsVal,
Op.getOperand(2));
54003 for (
unsigned I = 0, E = EltBits.
size();
I < E;
I++)
54004 if (!UndefElts[
I] && !EltBits[
I].isSignMask())
54089 EVT OrigVT =
N->getValueType(0);
54124 bool LegalOperations,
54127 unsigned Depth)
const {
54134 EVT VT =
Op.getValueType();
54136 unsigned Opc =
Op.getOpcode();
54148 !(SVT == MVT::f32 || SVT == MVT::f64) ||
54154 if (!Flags.hasNoSignedZeros())
54160 for (
int i = 0; i != 3; ++i)
54162 Op.getOperand(i), DAG, LegalOperations, ForCodeSize,
Depth + 1);
54164 bool NegA = !!NewOps[0];
54165 bool NegB = !!NewOps[1];
54166 bool NegC = !!NewOps[2];
54175 NewOps[i] =
Op.getOperand(i);
54192 MVT VT =
N->getSimpleValueType(0);
54205 unsigned IntOpcode;
54206 switch (
N->getOpcode()) {
54238 "Invalid opcode for combing with CTLZ");
54239 if (Subtarget.hasFastLZCNT())
54242 EVT VT =
N->getValueType(0);
54243 if (VT != MVT::i8 && VT != MVT::i16 && VT != MVT::i32 &&
54244 (VT != MVT::i64 || !Subtarget.is64Bit()))
54260 }
else if (
N->getOpcode() ==
ISD::SUB) {
54269 auto *
C = dyn_cast<ConstantSDNode>(OpSizeTM1);
54277 if (VT == MVT::i8) {
54296 EVT VT =
N->getValueType(0);
54300 if (Subtarget.
hasSSE1() && !Subtarget.
hasSSE2() && VT == MVT::v4i32) {
54320 DAG, DCI, Subtarget))
54366 auto *N1C = dyn_cast<ConstantSDNode>(N1);
54367 auto *N001C = dyn_cast<ConstantSDNode>(TruncExtSrc.
getOperand(1));
54368 if (N1C && !N1C->isOpaque() && N001C && !N001C->isOpaque()) {
54386 EVT VT =
N->getValueType(0);
54391 EVT SrcVT = Src.getValueType();
54398 for (
unsigned I = 0;
I != NumElts; ++
I)
54399 ReverseMask[
I] = (NumElts - 1) -
I;
54413 unsigned Opcode =
N->getOpcode();
54416 EVT VT =
N->getValueType(0);
54437 EVT VT =
N->getValueType(0);
54466 if (V.getValueType().isVector())
54476 EVT VT =
N->getValueType(0);
54480 if (!((VT == MVT::f32 && Subtarget.
hasSSE1()) ||
54481 (VT == MVT::f64 && Subtarget.
hasSSE2()) ||
54482 (VT == MVT::v4f32 && Subtarget.
hasSSE1() && !Subtarget.
hasSSE2())))
54485 auto isAllOnesConstantFP = [](
SDValue V) {
54486 if (V.getSimpleValueType().isVector())
54488 auto *
C = dyn_cast<ConstantFPSDNode>(V);
54489 return C &&
C->getConstantFPValue()->isAllOnesValue();
54525 return N->getOperand(1);
54542 return N->getOperand(1);
54546 return N->getOperand(0);
54565 unsigned NewOp = 0;
54566 switch (
N->getOpcode()) {
54573 N->getOperand(0),
N->getOperand(1));
54578 EVT VT =
N->getValueType(0);
54579 if (Subtarget.useSoftFloat() ||
isSoftF16(VT, Subtarget))
54584 if (!((Subtarget.
hasSSE1() && VT == MVT::f32) ||
54585 (Subtarget.
hasSSE2() && VT == MVT::f64) ||
54586 (Subtarget.hasFP16() && VT == MVT::f16) ||
54598 return DAG.
getNode(MinMaxOp,
DL, VT, Op0, Op1,
N->getFlags());
54603 return DAG.
getNode(MinMaxOp,
DL, VT, Op0, Op1,
N->getFlags());
54605 return DAG.
getNode(MinMaxOp,
DL, VT, Op1, Op0,
N->getFlags());
54639 return DAG.
getSelect(
DL, VT, IsOp0Nan, Op1, MinOrMax);
54644 EVT VT =
N->getValueType(0);
54653 MVT InVT = In.getSimpleValueType();
54657 LoadSDNode *LN = cast<LoadSDNode>(
N->getOperand(0));
54679 EVT VT =
N->getValueType(0);
54682 SDValue In =
N->getOperand(IsStrict ? 1 : 0);
54683 MVT InVT = In.getSimpleValueType();
54695 DAG.
getNode(
N->getOpcode(), dl, {VT, MVT::Other},
54696 {N->getOperand(0), DAG.getBitcast(InVT, VZLoad)});
54718 MVT VT =
N->getSimpleValueType(0);
54750 EVT SrcVT = Src.getValueType();
54758 APInt Undefs0, Undefs1;
54767 for (
int I = 0;
I != NumElts; ++
I)
54768 ResultBits.
push_back(~EltBits0[
I] & EltBits1[
I]);
54778 for (
APInt &Elt : EltBits0)
54794 auto GetDemandedMasks = [&](
SDValue Op,
bool Invert =
false) {
54803 for (
int I = 0;
I != NumElts; ++
I) {
54804 if (UndefElts[
I]) {
54809 }
else if ((Invert && !EltBits[
I].isAllOnes()) ||
54810 (!Invert && !EltBits[
I].
isZero())) {
54818 APInt Bits0, Elts0;
54819 APInt Bits1, Elts1;
54820 std::tie(Bits0, Elts0) = GetDemandedMasks(N1);
54821 std::tie(Bits1, Elts1) = GetDemandedMasks(N0,
true);
54877 SDValue Src =
N->getOperand(IsStrict ? 1 : 0);
54879 if (
N->getValueType(0) == MVT::v4f32 && Src.getValueType() == MVT::v8i16) {
54890 LoadSDNode *LN = cast<LoadSDNode>(
N->getOperand(IsStrict ? 1 : 0));
54895 N->getOpcode(), dl, {MVT::v4f32, MVT::Other},
54896 {N->getOperand(0), DAG.getBitcast(MVT::v8i16, VZLoad)});
54918 EVT DstVT =
N->getValueType(0);
54922 EVT ExtraVT = cast<VTSDNode>(N1)->getVT();
54924 if (ExtraVT != MVT::i8 && ExtraVT != MVT::i16)
54928 SDValue IntermediateBitwidthOp;
54931 IntermediateBitwidthOp = N0;
54943 if (!isa<ConstantSDNode>(CMovOp0.
getNode()) ||
54944 !isa<ConstantSDNode>(CMovOp1.
getNode()))
54950 if (IntermediateBitwidthOp) {
54951 unsigned IntermediateOpc = IntermediateBitwidthOp.
getOpcode();
54952 CMovOp0 = DAG.
getNode(IntermediateOpc,
DL, DstVT, CMovOp0);
54953 CMovOp1 = DAG.
getNode(IntermediateOpc,
DL, DstVT, CMovOp1);
54959 EVT CMovVT = DstVT;
54961 if (DstVT == MVT::i16) {
54970 if (CMovVT != DstVT)
54983 EVT VT =
N->getValueType(0);
54986 EVT ExtraVT = cast<VTSDNode>(N1)->getVT();
55030 EVT VT = Ext->getValueType(0);
55031 if (VT != MVT::i64)
55041 bool NSW =
Add->getFlags().hasNoSignedWrap();
55042 bool NUW =
Add->getFlags().hasNoUnsignedWrap();
55048 if ((Sext && !NSW) || (!Sext && !NUW))
55054 auto *AddOp1C = dyn_cast<ConstantSDNode>(AddOp1);
55063 bool HasLEAPotential =
false;
55064 for (
auto *
User : Ext->users()) {
55066 HasLEAPotential =
true;
55070 if (!HasLEAPotential)
55074 int64_t AddC = Sext ? AddOp1C->getSExtValue() : AddOp1C->getZExtValue();
55081 Flags.setNoSignedWrap(NSW);
55082 Flags.setNoUnsignedWrap(NUW);
55104 unsigned ExtendOpcode = Extend->
getOpcode();
55111 if (!isa<ConstantSDNode>(CMovOp0.
getNode()) ||
55112 !isa<ConstantSDNode>(CMovOp1.
getNode()))
55116 if (TargetVT != MVT::i32 && TargetVT != MVT::i64)
55121 if (VT != MVT::i16 && !(ExtendOpcode ==
ISD::SIGN_EXTEND && VT == MVT::i32))
55126 EVT ExtendVT = TargetVT;
55128 ExtendVT = MVT::i32;
55130 CMovOp0 = DAG.
getNode(ExtendOpcode,
DL, ExtendVT, CMovOp0);
55131 CMovOp1 = DAG.
getNode(ExtendOpcode,
DL, ExtendVT, CMovOp1);
55137 if (ExtendVT != TargetVT)
55138 Res = DAG.
getNode(ExtendOpcode,
DL, TargetVT, Res);
55148 EVT VT =
N->getValueType(0);
55157 if (SVT != MVT::i8 && SVT != MVT::i16 && SVT != MVT::i32 &&
55158 SVT != MVT::i64 && SVT != MVT::f32 && SVT != MVT::f64)
55193 EVT VT =
N->getValueType(0);
55201 bool ReplaceOtherUses = !N0.
hasOneUse();
55204 if (ReplaceOtherUses) {
55223 DAG, DCI, Subtarget))
55248 "ConstantFP build vector expected");
55260 EVT VT = V.getValueType();
55262 for (
const SDValue &
Op : V->op_values()) {
55263 if (
auto *Cst = dyn_cast<ConstantFPSDNode>(
Op)) {
55284 for (
const SDValue &
Op : V->op_values()) {
55285 if (
auto *Cst = dyn_cast<ConstantFPSDNode>(
Op)) {
55286 if (Cst->isNegative())
55298 EVT VT =
N->getValueType(0);
55300 bool IsStrict =
N->isTargetOpcode()
55302 :
N->isStrictFPOpcode();
55309 SDValue A =
N->getOperand(IsStrict ? 1 : 0);
55310 SDValue B =
N->getOperand(IsStrict ? 2 : 1);
55311 SDValue C =
N->getOperand(IsStrict ? 3 : 2);
55316 if (!IsStrict && Flags.hasAllowReassociation() &&
55323 if (((ScalarVT != MVT::f32 && ScalarVT != MVT::f64) ||
55325 !(ScalarVT == MVT::f16 && Subtarget.hasFP16()) &&
55326 !(ScalarVT == MVT::bf16 && Subtarget.hasAVX10_2()))
55329 auto invertIfNegative = [&DAG, &TLI, &DCI](
SDValue &V) {
55341 SDValue Vec = V.getOperand(0);
55343 Vec, DAG, LegalOperations, CodeSize)) {
55345 NegV, V.getOperand(1));
55361 bool NegA = invertIfNegative(
A);
55362 bool NegB = invertIfNegative(
B);
55363 bool NegC = invertIfNegative(
C);
55365 if (!NegA && !NegB && !NegC)
55368 unsigned NewOpcode =
55374 assert(
N->getNumOperands() == 4 &&
"Shouldn't be greater than 4");
55375 return DAG.
getNode(NewOpcode, dl, {VT, MVT::Other},
55376 {
N->getOperand(0),
A,
B,
C});
55378 if (
N->getNumOperands() == 4)
55379 return DAG.
getNode(NewOpcode, dl, VT,
A,
B,
C,
N->getOperand(3));
55380 return DAG.
getNode(NewOpcode, dl, VT,
A,
B,
C);
55389 EVT VT =
N->getValueType(0);
55402 if (
N->getNumOperands() == 4)
55403 return DAG.
getNode(NewOpcode, dl, VT,
N->getOperand(0),
N->getOperand(1),
55404 NegN2,
N->getOperand(3));
55405 return DAG.
getNode(NewOpcode, dl, VT,
N->getOperand(0),
N->getOperand(1),
55414 EVT VT =
N->getValueType(0);
55422 bool ReplaceOtherUses = !N0.
hasOneUse();
55425 if (ReplaceOtherUses) {
55442 DAG, DCI, Subtarget))
55494 EVT VT =
N->getValueType(0);
55495 EVT OpVT =
LHS.getValueType();
55503 if (VT == MVT::i1) {
55551 LHS.getOperand(0).getScalarValueSizeInBits() >= 32 &&
55553 EVT SrcVT =
LHS.getOperand(0).getValueType();
55572 if (
auto *
C = dyn_cast<ConstantSDNode>(
RHS)) {
55573 const APInt &CInt =
C->getAPIntValue();
55606 if (IsSEXT0 && IsVZero1) {
55608 "Unexpected operand type");
55617 "Unexpected condition code!");
55627 bool CanMakeSigned =
false;
55653 RHS, DAG,
false,
true))
55684 NewCC,
DL, DAG, Subtarget))
55686 return DAG.
getSetCC(
DL, VT, LHSOut, RHSOut, NewCC);
55760 if (Subtarget.
hasSSE1() && !Subtarget.
hasSSE2() && VT == MVT::v4i32 &&
55761 LHS.getValueType() == MVT::v4f32)
55781 MVT SrcVT = Src.getSimpleValueType();
55782 MVT VT =
N->getSimpleValueType(0);
55786 assert(VT == MVT::i32 && NumElts <= NumBits &&
"Unexpected MOVMSK types");
55795 for (
unsigned Idx = 0;
Idx != NumElts; ++
Idx)
55796 if (!UndefElts[
Idx] && EltBits[
Idx].isNegative())
55805 Src.getOperand(0).getScalarValueSizeInBits() == EltWidth)
55843 MVT ShiftVT = SrcVT;
55844 SDValue ShiftLHS = Src.getOperand(0);
55845 SDValue ShiftRHS = Src.getOperand(1);
55849 ShiftLHS = DAG.
getBitcast(ShiftVT, ShiftLHS);
55850 ShiftRHS = DAG.
getBitcast(ShiftVT, ShiftRHS);
55853 ShiftLHS, ShiftAmt, DAG);
55855 ShiftRHS, ShiftAmt, DAG);
55864 if (
N->isOnlyUserOf(Src.getNode())) {
55870 UndefElts, EltBits)) {
55872 for (
unsigned Idx = 0;
Idx != NumElts; ++
Idx) {
55873 if (!UndefElts[
Idx] && EltBits[
Idx].isNegative())
55897 MVT VT =
N->getSimpleValueType(0);
55911 auto *
MemOp = cast<X86MaskedGatherScatterSDNode>(
N);
55915 if (Mask.getScalarValueSizeInBits() != 1) {
55933 if (
auto *Gather = dyn_cast<MaskedGatherSDNode>(GorS)) {
55934 SDValue Ops[] = { Gather->getChain(), Gather->getPassThru(),
55935 Gather->getMask(),
Base, Index, Scale } ;
55937 Gather->getMemoryVT(),
DL, Ops,
55938 Gather->getMemOperand(),
55939 Gather->getIndexType(),
55940 Gather->getExtensionType());
55942 auto *Scatter = cast<MaskedScatterSDNode>(GorS);
55943 SDValue Ops[] = { Scatter->getChain(), Scatter->getValue(),
55944 Scatter->getMask(),
Base, Index, Scale };
55946 Scatter->getMemoryVT(),
DL,
55947 Ops, Scatter->getMemOperand(),
55948 Scatter->getIndexType(),
55949 Scatter->isTruncatingStore());
55955 auto *GorS = cast<MaskedGatherScatterSDNode>(
N);
55956 SDValue Index = GorS->getIndex();
55958 SDValue Scale = GorS->getScale();
55962 unsigned IndexWidth = Index.getScalarValueSizeInBits();
55971 if (
auto *BV = dyn_cast<BuildVectorSDNode>(Index)) {
55974 EVT NewVT = Index.getValueType().changeVectorElementType(MVT::i32);
55986 Index.getOperand(0).getScalarValueSizeInBits() <= 32 &&
55988 EVT NewVT = Index.getValueType().changeVectorElementType(MVT::i32);
55999 if (Index.getOpcode() ==
ISD::ADD &&
56000 Index.getValueType().getVectorElementType() == PtrVT &&
56001 isa<ConstantSDNode>(Scale)) {
56003 if (
auto *BV = dyn_cast<BuildVectorSDNode>(Index.getOperand(1))) {
56007 if (UndefElts.
none()) {
56009 APInt Adder =
C->getAPIntValue() * ScaleAmt;
56013 Index = Index.getOperand(0);
56025 Index.getOperand(1),
Splat);
56028 Index.getOperand(0),
Splat);
56036 unsigned IndexWidth = Index.getScalarValueSizeInBits();
56039 if (IndexWidth != 32 && IndexWidth != 64) {
56040 MVT EltVT = IndexWidth > 32 ? MVT::i64 : MVT::i32;
56041 EVT IndexVT = Index.getValueType().changeVectorElementType(EltVT);
56048 SDValue Mask = GorS->getMask();
56049 if (Mask.getScalarValueSizeInBits() != 1) {
56066 SDValue EFLAGS =
N->getOperand(1);
56079 SDValue EFLAGS =
N->getOperand(3);
56088 N->getOperand(1),
Cond, Flags);
56108 EVT VT =
N->getValueType(0);
56109 bool IsStrict =
N->isStrictFPOpcode();
56111 SDValue Op0 =
N->getOperand(IsStrict ? 1 : 0);
56121 if (
auto *BV = dyn_cast<BuildVectorSDNode>(Op0.
getOperand(1))) {
56133 SourceConst = DAG.
getNode(
N->getOpcode(),
DL, {VT, MVT::Other},
56134 {N->getOperand(0), SDValue(BV, 0)});
56170 if (SrcWidth % DestWidth != 0)
56176 unsigned NumElts = VecWidth / DestWidth;
56182 return DAG.
getNode(
N->getOpcode(),
DL,
N->getValueType(0), NewExtElt);
56187 bool IsStrict =
N->isStrictFPOpcode();
56188 SDValue Op0 =
N->getOperand(IsStrict ? 1 : 0);
56189 EVT VT =
N->getValueType(0);
56202 if ((ScalarSize == 16 && Subtarget.hasFP16()) || ScalarSize == 32 ||
56208 (Subtarget.hasFP16() && ScalarSize < 16) ? MVT::i16
56209 : ScalarSize < 32 ? MVT::i32
56215 {
N->getOperand(0),
P});
56231 {
N->getOperand(0),
P});
56242 {
N->getOperand(0), Op0});
56254 bool IsStrict =
N->isStrictFPOpcode();
56259 SDValue Op0 =
N->getOperand(IsStrict ? 1 : 0);
56260 EVT VT =
N->getValueType(0);
56273 if ((ScalarSize == 16 && Subtarget.hasFP16()) || ScalarSize == 32 ||
56279 (Subtarget.hasFP16() && ScalarSize < 16) ? MVT::i16
56280 : ScalarSize < 32 ? MVT::i32
56286 {
N->getOperand(0),
P});
56300 {
N->getOperand(0),
P});
56310 if (NumSignBits >= (
BitWidth - 31)) {
56311 EVT TruncVT = MVT::i32;
56319 {
N->getOperand(0), Trunc});
56324 assert(InVT == MVT::v2i64 &&
"Unexpected VT!");
56330 {
N->getOperand(0), Shuf});
56337 if (!Subtarget.useSoftFloat() && Subtarget.hasX87() &&
56342 if (VT == MVT::f16 || VT == MVT::f128)
56347 if (Subtarget.hasDQI() && VT != MVT::f80)
56351 Op0.
hasOneUse() && !Subtarget.is64Bit() && InVT == MVT::i64) {
56352 std::pair<SDValue, SDValue> Tmp =
56373 if (!Subtarget.hasAVX10_2())
56377 EVT SrcVT =
N->getOperand(0).getValueType();
56378 EVT DstVT =
N->getValueType(0);
56381 if (SrcVT == MVT::v2f32 && DstVT == MVT::v2i64) {
56386 N->getOperand(0), V2F32Value);
56398 assert(Flags.getValueType() == MVT::i32 &&
"Unexpected VT!");
56402 switch (
User->getOpcode()) {
56433 assert(Flags.getValueType() == MVT::i32 &&
"Unexpected VT!");
56437 switch (
User->getOpcode()) {
56472 EVT VT =
Op.getValueType();
56483 Op.hasOneUse() && isa<ConstantSDNode>(
Op.getOperand(1)) &&
56486 const APInt &ShAmt =
Op.getConstantOperandAPInt(1);
56492 if (Mask.isSignedIntN(32)) {
56509 Src.getOperand(0).getValueType().getScalarType() == MVT::i1) {
56510 SDValue BoolVec = Src.getOperand(0);
56511 unsigned ShAmt = 0;
56534 EVT SrcVT = Src.getValueType();
56545 Op =
Op.getOperand(0);
56550 EVT OpVT =
Op.getValueType();
56564 switch (
Op.getOpcode()) {
56569 if (isa<ConstantSDNode>(
Op.getOperand(1)))
56595 Op = DAG.
getNode(NewOpc, dl, VTs, Op0, Op1);
56603 return Op.getValue(1);
56610 "Expected X86ISD::ADD or X86ISD::SUB");
56615 MVT VT =
LHS.getSimpleValueType();
56619 if (IsSub &&
isOneConstant(
N->getOperand(1)) && !
N->hasAnyUseOfValue(0))
56624 if (!
N->hasAnyUseOfValue(1)) {
56637 if (GenericAddSub->hasOneUse() &&
56638 GenericAddSub->user_begin()->isOnlyUserOf(
N))
56645 MatchGeneric(
LHS,
RHS,
false);
56657 SDValue BorrowIn =
N->getOperand(2);
56660 MVT VT =
N->getSimpleValueType(0);
56668 !
N->hasAnyUseOfValue(1))
56670 LHS.getOperand(1), BorrowIn);
56680 SDValue CarryIn =
N->getOperand(2);
56681 auto *LHSC = dyn_cast<ConstantSDNode>(
LHS);
56682 auto *RHSC = dyn_cast<ConstantSDNode>(
RHS);
56692 if (LHSC && RHSC && LHSC->isZero() && RHSC->isZero() &&
56697 EVT VT =
N->getValueType(0);
56710 if (LHSC && RHSC && !LHSC->isZero() && !
N->hasAnyUseOfValue(1)) {
56712 APInt Sum = LHSC->getAPIntValue() + RHSC->getAPIntValue();
56719 MVT VT =
N->getSimpleValueType(0);
56726 if (
LHS.getOpcode() ==
ISD::ADD && RHSC && RHSC->isZero() &&
56727 !
N->hasAnyUseOfValue(1))
56729 LHS.getOperand(1), CarryIn);
56737 using namespace SDPatternMatch;
56782 APInt Idx0L, Idx0H, Idx1L, Idx1H;
56783 SDValue Vec0L, Vec0H, Vec1L, Vec1H;
56799 if (Idx0L != 2 * i || Idx1L != 2 * i + 1 || Idx0H != 2 * i + 2 ||
56800 Idx1H != 2 * i + 3)
56808 Mul.getValueType().getVectorNumElements() != 2 * e)
56812 if (
Mul != Vec0L ||
Mul != Vec1L ||
Mul != Vec0H ||
Mul != Vec1H)
56819 Mode == ShrinkMode::MULU16)
56829 EVT InVT = Ops[0].getValueType();
56847 using namespace SDPatternMatch;
56890 SDValue N00In, N01In, N10In, N11In;
56891 APInt IdxN00, IdxN01, IdxN10, IdxN11;
56903 if (IdxN00 != 2 * i || IdxN10 != 2 * i + 1 || IdxN01 != 2 * i ||
56904 IdxN11 != 2 * i + 1)
56924 if (In0 != N00In || In1 != N01In || In0 != N10In || In1 != N11In)
56930 EVT OpVT = Ops[0].getValueType();
56932 "Unexpected scalar element type");
56982 if (!Op0HiZero || !Op1HiZero)
56987 for (
int i = 0; i != (int)NumElts; ++i) {
56988 Mask.push_back(2 * i);
56989 Mask.push_back(2 * (i + NumElts));
57011 auto isSuitableCmov = [](
SDValue V) {
57014 if (!isa<ConstantSDNode>(V.getOperand(0)) ||
57015 !isa<ConstantSDNode>(V.getOperand(1)))
57018 (V.getConstantOperandAPInt(0).isSignedIntN(32) &&
57019 V.getConstantOperandAPInt(1).isSignedIntN(32));
57024 SDValue OtherOp =
N->getOperand(1);
57025 if (!isSuitableCmov(Cmov))
57027 if (!isSuitableCmov(Cmov))
57035 EVT VT =
N->getValueType(0);
57045 !isa<ConstantSDNode>(OtherOp.
getOperand(0)) &&
57047 auto *MemNode = dyn_cast<MemSDNode>(Use);
57048 return MemNode && MemNode->getBasePtr().getNode() == N;
57071 EVT VT =
N->getValueType(0);
57128 if (Subtarget.hasVNNI() && Subtarget.
useAVX512Regs() && VT == MVT::v16i32) {
57129 using namespace SDPatternMatch;
57130 SDValue Accum, Lo0, Lo1, Hi0, Hi1;
57166 assert(
Cond.getResNo() == 1 &&
"Unexpected result number");
57180 if (!(TrueOp ==
X && FalseOp == NegX) && !(TrueOp == NegX && FalseOp ==
X))
57194 (FalseOp ==
Cond.getValue(0) || TrueOp ==
Cond.getValue(0)) &&
57215 EVT VT =
N->getValueType(0);
57216 auto *Op0C = dyn_cast<ConstantSDNode>(Op0);
57223 APInt NewImm = Op0C->getAPIntValue() - 1;
57256 cast<MemSDNode>(
N)->getMemoryVT(),
57257 cast<MemSDNode>(
N)->getMemOperand());
57263 EVT VT =
N->getValueType(0);
57268 auto IsNonOpaqueConstant = [&](
SDValue Op) {
57324 unsigned Opcode =
N->getOpcode();
57326 "Unknown PCMP opcode");
57330 MVT VT =
N->getSimpleValueType(0);
57343 APInt LHSUndefs, RHSUndefs;
57350 for (
unsigned I = 0;
I != NumElts; ++
I) {
57352 Results[
I] = (LHSBits[
I] == RHSBits[
I]) ? Ones : Zero;
57354 bool AnyUndef = LHSUndefs[
I] || RHSUndefs[
I];
57355 Results[
I] = (!AnyUndef && LHSBits[
I].sgt(RHSBits[
I])) ? Ones : Zero;
57368static std::optional<unsigned>
57370 unsigned NumSignificantBitsRHS) {
57372 assert(SVT == MVT::f32 &&
"Only tested for float so far");
57375 "Only PCMPEQ/PCMPGT currently supported");
57382 if (FPPrec >= NumSignificantBitsLHS && FPPrec >= NumSignificantBitsRHS)
57385 return std::nullopt;
57395 assert(Subtarget.
hasAVX() &&
"AVX assumed for concat_vectors");
57408 unsigned NumOps = Ops.
size();
57432 (EltSizeInBits >= 32 &&
57448 Op0.
getValueType() == cast<MemSDNode>(SrcVec)->getMemoryVT())
57497 return Op.getOpcode() == Op0.
getOpcode() &&
Op.hasOneUse();
57517 bool AllConstants =
true;
57518 bool AllSubs =
true;
57521 if (isa<LoadSDNode>(BC0) &&
all_of(SubOps, [&](
SDValue SubOp) {
57525 for (
unsigned I = 0, E = SubOps.size();
I != E; ++
I) {
57535 return AllConstants || AllSubs;
57541 (EltSizeInBits >= 32 || Subtarget.
hasInt256()) &&
57542 (IsConcatFree(VT, Ops, 0) || IsConcatFree(VT, Ops, 1))) {
57545 for (
int M : cast<ShuffleVectorSDNode>(Ops[0])->getMask()) {
57546 M = M >= NumSubElts ? M + NumSubElts : M;
57549 for (
int M : cast<ShuffleVectorSDNode>(Ops[1])->getMask()) {
57551 M = (M >= NumSubElts ? M + NumSubElts : M) + NumSubElts;
57555 ConcatSubOperand(VT, Ops, 1), NewMask);
57563 if (VT == MVT::v4f64 || VT == MVT::v4i64)
57565 ConcatSubOperand(VT, Ops, 0),
57566 ConcatSubOperand(VT, Ops, 0));
57568 if (VT == MVT::v8f32 || (VT == MVT::v8i32 && Subtarget.
hasInt256()))
57571 DL, VT, ConcatSubOperand(VT, Ops, 0),
57581 ConcatSubOperand(VT, Ops, 0));
57591 ConcatSubOperand(VT, Ops, 0),
57592 ConcatSubOperand(VT, Ops, 1), Op0.
getOperand(2));
57599 if (!IsSplat && EltSizeInBits >= 32 &&
57609 ConcatSubOperand(VT, Ops, 0),
57610 ConcatSubOperand(VT, Ops, 1));
57620 ConcatSubOperand(VT, Ops, 0), Op0.
getOperand(1));
57624 if (!IsSplat && EltSizeInBits == 32 &&
57636 if (!IsSplat && NumOps == 2 && VT == MVT::v4f64) {
57637 uint64_t Idx0 = Ops[0].getConstantOperandVal(1);
57638 uint64_t Idx1 = Ops[1].getConstantOperandVal(1);
57641 ConcatSubOperand(VT, Ops, 0),
57655 ConcatSubOperand(SrcVT, Ops, 0),
57656 ConcatSubOperand(SrcVT, Ops, 1));
57660 if (!IsSplat && NumOps == 2 &&
57665 for (
unsigned i = 0; i != NumOps; ++i) {
57670 for (
int M : SubMask) {
57672 M += i * NumSrcElts;
57676 if (ConcatMask.
size() == (NumOps * NumSrcElts)) {
57678 Ops[1].getOperand(1), DAG,
DL);
57691 for (
unsigned i = 0; i != NumOps; ++i) {
57696 for (
int M : SubMask) {
57698 int Src = M < NumSrcElts ? 0 : 2;
57699 M += M < NumSrcElts ? 0 : NumSrcElts;
57702 if (Ops[0].getOperand(Src) != Ops[i].getOperand(Src))
57703 M += i * NumSrcElts;
57708 if (ConcatMask.
size() == (NumOps * NumSrcElts)) {
57710 Ops[1].getOperand(0), DAG,
DL);
57712 Ops[1].getOperand(2), DAG,
DL);
57722 assert(NumOps == 2 &&
"Bad concat_vectors operands");
57723 unsigned Imm0 = Ops[0].getConstantOperandVal(2);
57724 unsigned Imm1 = Ops[1].getConstantOperandVal(2);
57726 if ((Imm0 & 0x88) == 0 && (Imm1 & 0x88) == 0) {
57727 int Mask[4] = {(int)(Imm0 & 0x03), (int)((Imm0 >> 4) & 0x3), (
int)(Imm1 & 0x03),
57728 (
int)((Imm1 >> 4) & 0x3)};
57731 Ops[0].getOperand(1), DAG,
DL);
57733 Ops[1].getOperand(1), DAG,
DL);
57745 unsigned Imm0 = Ops[0].getConstantOperandVal(2);
57746 unsigned Imm1 = Ops[1].getConstantOperandVal(2);
57747 unsigned Imm = ((Imm0 & 1) << 0) | ((Imm0 & 2) << 1) | 0x08 |
57748 ((Imm1 & 1) << 4) | ((Imm1 & 2) << 5) | 0x80;
57750 Ops[0].getOperand(1), DAG,
DL);
57752 Ops[1].getOperand(1), DAG,
DL);
57760 EVT SrcVT = Ops[0].getOperand(0).getValueType();
57762 SrcVT == Ops[1].getOperand(0).getValueType() &&
57768 ConcatSubOperand(NewSrcVT, Ops, 0));
57776 if (!IsSplat && NumOps == 2 &&
57779 (EltSizeInBits >= 32 || Subtarget.
useBWIRegs())))) {
57780 EVT SrcVT = Ops[0].getOperand(0).getValueType();
57782 SrcVT == Ops[1].getOperand(0).getValueType()) {
57785 ConcatSubOperand(NewSrcVT, Ops, 0));
57793 if (VT == MVT::v4i64 && !Subtarget.
hasInt256() &&
57795 return Op.getConstantOperandAPInt(1) == 32;
57801 {8, 0, 8, 2, 8, 4, 8, 6});
57804 {1, 8, 3, 8, 5, 8, 7, 8});
57815 (EltSizeInBits >= 32 || Subtarget.
useBWIRegs()))) &&
57817 return Op0.getOperand(1) == Op.getOperand(1);
57820 ConcatSubOperand(VT, Ops, 0), Op0.
getOperand(1));
57828 return Op0.getOperand(1) == Op.getOperand(1);
57831 ConcatSubOperand(VT, Ops, 0), Op0.
getOperand(1));
57841 ConcatSubOperand(VT, Ops, 0),
57842 ConcatSubOperand(VT, Ops, 1));
57848 (Subtarget.
hasInt256() || VT == MVT::v8i32) &&
57849 (IsConcatFree(VT, Ops, 0) || IsConcatFree(VT, Ops, 1))) {
57852 ConcatSubOperand(VT, Ops, 0),
57853 ConcatSubOperand(VT, Ops, 1));
57857 unsigned MaxSigBitsLHS = 0, MaxSigBitsRHS = 0;
57858 for (
unsigned I = 0;
I != NumOps; ++
I) {
57860 std::max(MaxSigBitsLHS,
57863 std::max(MaxSigBitsRHS,
57865 if (MaxSigBitsLHS == EltSizeInBits && MaxSigBitsRHS == EltSizeInBits)
57877 if (std::optional<unsigned> CastOpc =
57884 bool IsAlwaysSignaling;
57901 ConcatSubOperand(VT, Ops, 0));
57909 return Op0.getOperand(2) == Op.getOperand(2);
57912 ConcatSubOperand(VT, Ops, 0),
57913 ConcatSubOperand(VT, Ops, 1), Op0.
getOperand(2));
57921 (EltSizeInBits >= 32 || Subtarget.
useBWIRegs())))) {
57923 ConcatSubOperand(VT, Ops, 0),
57924 ConcatSubOperand(VT, Ops, 1));
57933 if (!IsSplat && (IsConcatFree(VT, Ops, 0) || IsConcatFree(VT, Ops, 1)) &&
57937 ConcatSubOperand(VT, Ops, 0),
57938 ConcatSubOperand(VT, Ops, 1));
57945 ConcatSubOperand(VT, Ops, 0),
57946 ConcatSubOperand(VT, Ops, 1));
57956 ConcatSubOperand(VT, Ops, 0),
57957 ConcatSubOperand(VT, Ops, 1));
57968 ConcatSubOperand(SrcVT, Ops, 0),
57969 ConcatSubOperand(SrcVT, Ops, 1));
57977 return Op0.getOperand(2) == Op.getOperand(2);
57980 ConcatSubOperand(VT, Ops, 0),
57981 ConcatSubOperand(VT, Ops, 1), Op0.
getOperand(2));
57986 uint64_t Mask0 = Ops[0].getConstantOperandVal(2);
57987 uint64_t Mask1 = Ops[1].getConstantOperandVal(2);
57990 Mask0 = (Mask0 << 8) | Mask0;
57991 Mask1 = (Mask1 << 8) | Mask1;
57998 return DAG.
getSelect(
DL, VT, Sel, ConcatSubOperand(VT, Ops, 1),
57999 ConcatSubOperand(VT, Ops, 0));
58003 if (!IsSplat && Subtarget.
hasAVX512() &&
58006 (EltSizeInBits >= 32 || Subtarget.hasBWI())) {
58007 EVT SelVT = Ops[0].getOperand(0).getValueType();
58014 ConcatSubOperand(VT, Ops, 1),
58015 ConcatSubOperand(VT, Ops, 2));
58021 (EltSizeInBits >= 32 || Subtarget.
hasInt256()) &&
58022 IsConcatFree(VT, Ops, 1) && IsConcatFree(VT, Ops, 2)) {
58023 EVT SelVT = Ops[0].getOperand(0).getValueType();
58028 ConcatSubOperand(VT, Ops, 1),
58029 ConcatSubOperand(VT, Ops, 2));
58041 *FirstLd->getMemOperand(), &
Fast) &&
58053 for (
unsigned I = 0;
I != NumOps; ++
I) {
58060 EltBits.
append(OpEltBits);
58084 auto *Mem = cast<MemSDNode>(Op0);
58115 EVT VT =
N->getValueType(0);
58116 EVT SrcVT =
N->getOperand(0).getValueType();
58124 for (
unsigned I = 0, E = Ops.
size();
I != E; ++
I) {
58127 Constant.insertBits(
C->getAPIntValue(),
I * SubSizeInBits);
58128 if (
I == (E - 1)) {
58154 MVT OpVT =
N->getSimpleValueType(0);
58160 SDValue SubVec =
N->getOperand(1);
58162 uint64_t IdxVal =
N->getConstantOperandVal(2);
58195 Ins.getOperand(1).getValueSizeInBits().getFixedValue() <=
58199 Ins.getOperand(1),
N->getOperand(2));
58224 if (ExtIdxVal != 0) {
58229 for (
int i = 0; i != VecNumElts; ++i)
58232 for (
int i = 0; i != SubVecNumElts; ++i)
58233 Mask[i + IdxVal] = i + ExtIdxVal + VecNumElts;
58251 if (SubVectorOps.
size() == 2 &&
58275 auto *MemIntr = cast<MemIntrinsicSDNode>(SubVec);
58277 SDValue Ops[] = { MemIntr->getChain(), MemIntr->getBasePtr() };
58280 MemIntr->getMemoryVT(),
58281 MemIntr->getMemOperand());
58290 auto *VecLd = dyn_cast<LoadSDNode>(Vec);
58291 auto *SubLd = dyn_cast<LoadSDNode>(SubVec);
58292 if (VecLd && SubLd &&
58310 SDValue Sel = Ext->getOperand(0);
58318 MVT VT = Ext->getSimpleValueType(0);
58326 MVT WideVT = Ext->getOperand(0).getSimpleValueType();
58329 "Unexpected vector type with legal operations");
58333 unsigned ExtIdx = Ext->getConstantOperandVal(1);
58334 if (SelElts % CastedElts == 0) {
58337 ExtIdx *= (SelElts / CastedElts);
58338 }
else if (CastedElts % SelElts == 0) {
58341 unsigned IndexDivisor = CastedElts / SelElts;
58342 if (ExtIdx % IndexDivisor != 0)
58344 ExtIdx /= IndexDivisor;
58346 llvm_unreachable(
"Element count of simple vector types are not divisible?");
58350 unsigned NarrowElts = SelElts / NarrowingFactor;
58372 if (!
N->getValueType(0).isSimple())
58375 MVT VT =
N->getSimpleValueType(0);
58377 unsigned IdxVal =
N->getConstantOperandVal(1);
58389 auto isConcatenatedNot = [](
SDValue V) {
58393 SDValue NotOp = V->getOperand(0);
58396 if (isConcatenatedNot(InVecBC.
getOperand(0)) ||
58457 cast<MemIntrinsicSDNode>(InVec)->getMemoryVT() == VT)
58461 if ((InSizeInBits % SizeInBits) == 0 && (IdxVal % NumSubElts) == 0) {
58465 unsigned NumSubVecs = InSizeInBits / SizeInBits;
58469 unsigned SubVecIdx = IdxVal / NumSubElts;
58474 SDValue Src = ShuffleInputs[ScaledMask[SubVecIdx] / NumSubVecs];
58475 if (Src.getValueSizeInBits() == InSizeInBits) {
58476 unsigned SrcSubVecIdx = ScaledMask[SubVecIdx] % NumSubVecs;
58477 unsigned SrcEltIdx = SrcSubVecIdx * NumSubElts;
58484 auto IsExtractFree = [](
SDValue V) {
58485 if (V.hasOneUse()) {
58495 return V.isUndef();
58502 if (IdxVal == 0 && VT == MVT::v2f64 && InVecVT == MVT::v4f64) {
58523 Subtarget.hasVLX())) &&
58524 (VT == MVT::v4i32 || VT == MVT::v4f32)) {
58526 if (Src.getValueType().getScalarSizeInBits() == 32)
58532 (SizeInBits == 128 || SizeInBits == 256) &&
58535 if (Ext.getValueSizeInBits() > SizeInBits)
58538 return DAG.
getNode(ExtOp,
DL, VT, Ext);
58547 return DAG.
getNode(InOpcode,
DL, VT, Ext0, Ext1, Ext2);
58549 if (IdxVal == 0 && InOpcode ==
ISD::TRUNCATE && Subtarget.hasVLX() &&
58550 (SizeInBits == 128 || SizeInBits == 256)) {
58554 return DAG.
getNode(InOpcode,
DL, VT, Ext);
58557 if (SizeInBits == 128 || SizeInBits == 256) {
58558 switch (InOpcode) {
58606 DAG.getTargetConstant(M,
DL, MVT::i8));
58618 DAG.
getNode(InOpcode,
DL, InVecVT, Src0, Mask, Src1);
58641 using namespace SDPatternMatch;
58642 EVT VT =
N->getValueType(0);
58650 if (VT == MVT::v1i1 && Src.getOpcode() ==
ISD::AND && Src.hasOneUse() &&
58656 Src.hasOneUse() && Src.getOperand(0).getValueType().isVector() &&
58657 Src.getOperand(0).getValueType().getVectorElementType() == MVT::i1 &&
58660 Src.getOperand(1));
58664 if ((VT == MVT::v2i64 || VT == MVT::v2f64) && Src.hasOneUse()) {
58665 auto IsExt64 = [&DAG](
SDValue Op,
bool IsZeroExt) {
58666 if (
Op.getValueType() != MVT::i64)
58669 if (
Op.getOpcode() == Opc &&
58670 Op.getOperand(0).getScalarValueSizeInBits() <= 32)
58671 return Op.getOperand(0);
58673 if (
auto *Ld = dyn_cast<LoadSDNode>(
Op))
58674 if (Ld->getExtensionType() == Ext &&
58675 Ld->getMemoryVT().getScalarSizeInBits() <= 32)
58698 if (VT == MVT::v2i64 && Src.getOpcode() ==
ISD::BITCAST) {
58701 if (
SrcOp.getValueType() == MVT::f64)
58705 if (
SrcOp.getValueType() == MVT::x86mmx)
58709 if (VT == MVT::v4i32) {
58714 m_AllOf(m_SpecificVT(MVT::f16),
m_Value(HalfSrc))))))
58726 unsigned BroadcastSizeInBits =
58727 User->getValueSizeInBits(0).getFixedValue();
58728 if (BroadcastSizeInBits == SizeInBits)
58730 if (BroadcastSizeInBits > SizeInBits)
58738 switch (Src.getOpcode()) {
58742 if (
auto *Amt = dyn_cast<ConstantSDNode>(Src.getOperand(1))) {
58749 Amt->getZExtValue(), DAG);
58755 if (
auto *Amt = dyn_cast<ConstantSDNode>(Src.getOperand(2))) {
58761 Amt->getAPIntValue().urem(Src.getScalarValueSizeInBits());
58766 return DAG.
getNode(Src.getOpcode(),
DL, VT, SrcVec0, SrcVec1,
58808 LHS.getOperand(0).getValueType() == MVT::v4i32) {
58811 LHS.getOperand(0), { 0, -1, 1, -1 });
58818 RHS.getOperand(0).getValueType() == MVT::v4i32) {
58821 RHS.getOperand(0), { 0, -1, 1, -1 });
58832 MVT VT =
N->getSimpleValueType(0);
58835 unsigned Opc =
N->getOpcode();
58838 "Unexpected PMADD opcode");
58847 APInt LHSUndefs, RHSUndefs;
58849 unsigned SrcEltBits =
LHS.getScalarValueSizeInBits();
58854 for (
unsigned I = 0, E = LHSBits.
size();
I != E;
I += 2) {
58855 APInt LHSLo = LHSBits[
I + 0], LHSHi = LHSBits[
I + 1];
58856 APInt RHSLo = RHSBits[
I + 0], RHSHi = RHSBits[
I + 1];
58857 LHSLo = IsPMADDWD ? LHSLo.
sext(DstEltBits) : LHSLo.
zext(DstEltBits);
58858 LHSHi = IsPMADDWD ? LHSHi.sext(DstEltBits) : LHSHi.zext(DstEltBits);
58860 APInt Hi = LHSHi * RHSHi.sext(DstEltBits);
58862 Result.push_back(Res);
58878 EVT VT =
N->getValueType(0);
58880 unsigned Opcode =
N->getOpcode();
58881 unsigned InOpcode = In.getOpcode();
58888 auto *Ld = cast<LoadSDNode>(In);
58889 if (Ld->isSimple()) {
58890 MVT SVT = In.getSimpleValueType().getVectorElementType();
58897 Ext,
DL, VT, Ld->getChain(), Ld->getBasePtr(), Ld->getPointerInfo(),
58898 MemVT, Ld->getOriginalAlign(), Ld->getMemOperand()->
getFlags());
58906 if (Opcode == InOpcode)
58907 return DAG.
getNode(Opcode,
DL, VT, In.getOperand(0));
58914 In.getOperand(0).getOperand(0).getValueSizeInBits() ==
58915 In.getValueSizeInBits())
58916 return DAG.
getNode(Opcode,
DL, VT, In.getOperand(0).getOperand(0));
58925 EVT EltVT = In.getOperand(0).getValueType();
58927 for (
unsigned I = 0;
I != NumElts; ++
I)
58928 Elts[
I * Scale] = In.getOperand(
I);
58945 EVT VT =
N->getValueType(0);
58957 SDValue Src =
N->getOperand(0).getOperand(0);
58958 uint64_t Amt =
N->getConstantOperandVal(1) +
58959 N->getOperand(0).getConstantOperandVal(1);
58960 EVT SrcVT = Src.getValueType();
58982 if (Subtarget.useSoftFloat() || !Subtarget.hasF16C())
58988 if (
N->getValueType(0) != MVT::f32 ||
58989 N->getOperand(0).getOperand(0).getValueType() != MVT::f32)
58994 N->getOperand(0).getOperand(0));
59005 EVT VT =
N->getValueType(0);
59006 bool IsStrict =
N->isStrictFPOpcode();
59007 SDValue Src =
N->getOperand(IsStrict ? 1 : 0);
59008 EVT SrcVT = Src.getValueType();
59013 !IsStrict && Src.getOperand(0).getValueType() == VT)
59014 return Src.getOperand(0);
59019 assert(!IsStrict &&
"Strict FP doesn't support BF16");
59033 if (!Subtarget.hasF16C() || Subtarget.useSoftFloat())
59036 if (Subtarget.hasFP16())
59056 unsigned NumConcats = 8 / NumElts;
59066 std::max(4U, NumElts));
59070 {
N->getOperand(0), Src});
59077 assert(NumElts == 2 &&
"Unexpected size");
59103 "Unknown broadcast load type");
59106 if (
N->hasAnyUseOfValue(1))
59109 auto *MemIntrin = cast<MemIntrinsicSDNode>(
N);
59112 SDValue Chain = MemIntrin->getChain();
59113 EVT VT =
N->getSimpleValueType(0);
59114 EVT MemVT = MemIntrin->getMemoryVT();
59119 if (
User !=
N &&
User->getOpcode() ==
N->getOpcode() &&
59120 cast<MemIntrinsicSDNode>(
User)->getBasePtr() ==
Ptr &&
59121 cast<MemIntrinsicSDNode>(
User)->getChain() == Chain &&
59122 cast<MemIntrinsicSDNode>(
User)->getMemoryVT().getSizeInBits() ==
59124 !
User->hasAnyUseOfValue(1) &&
59137 if (!Subtarget.hasF16C() || Subtarget.useSoftFloat())
59140 bool IsStrict =
N->isStrictFPOpcode();
59141 EVT VT =
N->getValueType(0);
59142 SDValue Src =
N->getOperand(IsStrict ? 1 : 0);
59143 EVT SrcVT = Src.getValueType();
59153 if (Subtarget.hasFP16()) {
59159 Src.getNumOperands() == 2) {
59161 SDValue Op0 = Src.getOperand(0);
59162 SDValue Op1 = Src.getOperand(1);
59169 int Mask[8] = {0, 1, 2, 3, 8, 9, 10, 11};
59171 assert(IsOp0Strict &&
"Op0 must be strict node");
59175 Cvt0 = DAG.
getNode(Opc, dl, {MVT::v8f16, MVT::Other},
59177 Cvt1 = DAG.
getNode(Opc, dl, {MVT::v8f16, MVT::Other},
59205 {
N->getOperand(0), Src, Rnd});
59231 LoadSDNode *LN = cast<LoadSDNode>(Src.getNode());
59249 unsigned NumBits =
N->getSimpleValueType(0).getSizeInBits();
59263 bool MadeChange =
false, CastReturnVal =
false;
59265 for (
const SDValue &Arg :
N->op_values()) {
59268 Args.push_back(DAG.
getBitcast(MVT::x86mmx, Arg));
59270 Args.push_back(Arg);
59274 if (VTs.
NumVTs > 0 && VTs.
VTs[0] == MVT::v1i64) {
59276 NewVTArr[0] = MVT::x86mmx;
59279 CastReturnVal =
true;
59284 if (CastReturnVal) {
59286 for (
unsigned i = 0, e = Result->getNumValues(); i != e; ++i)
59288 Returns[0] = DAG.
getBitcast(MVT::v1i64, Returns[0]);
59300 unsigned IntNo =
N->getConstantOperandVal(0);
59314 unsigned IntNo =
N->getConstantOperandVal(1);
59328 unsigned IntNo =
N->getConstantOperandVal(1);
59340 switch (
N->getOpcode()) {
59539 return Subtarget.
canUseCMOV() && (VT == MVT::i32 || VT == MVT::i64);
59569 if (VT == MVT::i16) {
59590 return Subtarget.hasNDD();
59603 Metadata *IsCFProtectionSupported = M->getModuleFlag(
"cf-protection-branch");
59604 if (IsCFProtectionSupported) {
59643 EVT VT =
Op.getValueType();
59644 bool Is8BitMulByConstant = VT == MVT::i8 &&
Op.getOpcode() ==
ISD::MUL &&
59645 isa<ConstantSDNode>(
Op.getOperand(1));
59651 if (VT != MVT::i16 && !Is8BitMulByConstant)
59655 if (!
Op.hasOneUse())
59660 auto *Ld = cast<LoadSDNode>(Load);
59661 auto *St = cast<StoreSDNode>(
User);
59662 return Ld->getBasePtr() == St->getBasePtr();
59668 if (!
Op.hasOneUse())
59673 auto *Ld = cast<AtomicSDNode>(Load);
59674 auto *St = cast<AtomicSDNode>(
User);
59675 return Ld->getBasePtr() == St->getBasePtr();
59679 if (!
Op.hasOneUse())
59682 EVT VT =
User->getValueType(0);
59684 (VT == MVT::i32 || VT == MVT::i64));
59687 bool Commute =
false;
59688 switch (
Op.getOpcode()) {
59689 default:
return false;
59706 if (Subtarget.hasZU() && IsFoldableZext(
Op) &&
59707 (isa<ConstantSDNode>(
Op.getOperand(0)) ||
59708 isa<ConstantSDNode>(
Op.getOperand(1))))
59722 (!Commute || !isa<ConstantSDNode>(N0) ||
59723 (
Op.getOpcode() !=
ISD::MUL && IsFoldableRMW(N1,
Op))))
59726 ((Commute && !isa<ConstantSDNode>(N1)) ||
59727 (
Op.getOpcode() !=
ISD::MUL && IsFoldableRMW(N0,
Op))))
59729 if (IsFoldableAtomicRMW(N0,
Op) ||
59730 (Commute && IsFoldableAtomicRMW(N1,
Op)))
59751 S = S.
substr(Piece.size());
59764 if (AsmPieces.
size() == 3 || AsmPieces.
size() == 4) {
59769 if (AsmPieces.
size() == 3)
59781 const std::string &AsmStr = IA->getAsmString();
59789 SplitString(AsmStr, AsmPieces,
";\n");
59791 switch (AsmPieces.
size()) {
59792 default:
return false;
59799 if (
matchAsm(AsmPieces[0], {
"bswap",
"$0"}) ||
59800 matchAsm(AsmPieces[0], {
"bswapl",
"$0"}) ||
59801 matchAsm(AsmPieces[0], {
"bswapq",
"$0"}) ||
59802 matchAsm(AsmPieces[0], {
"bswap",
"${0:q}"}) ||
59803 matchAsm(AsmPieces[0], {
"bswapl",
"${0:q}"}) ||
59804 matchAsm(AsmPieces[0], {
"bswapq",
"${0:q}"})) {
59812 IA->getConstraintString().compare(0, 5,
"=r,0,") == 0 &&
59813 (
matchAsm(AsmPieces[0], {
"rorw",
"$$8,",
"${0:w}"}) ||
59814 matchAsm(AsmPieces[0], {
"rolw",
"$$8,",
"${0:w}"}))) {
59816 StringRef ConstraintsStr = IA->getConstraintString();
59825 IA->getConstraintString().compare(0, 5,
"=r,0,") == 0 &&
59826 matchAsm(AsmPieces[0], {
"rorw",
"$$8,",
"${0:w}"}) &&
59827 matchAsm(AsmPieces[1], {
"rorl",
"$$16,",
"$0"}) &&
59828 matchAsm(AsmPieces[2], {
"rorw",
"$$8,",
"${0:w}"})) {
59830 StringRef ConstraintsStr = IA->getConstraintString();
59839 if (Constraints.size() >= 2 &&
59840 Constraints[0].Codes.size() == 1 && Constraints[0].Codes[0] ==
"A" &&
59841 Constraints[1].Codes.size() == 1 && Constraints[1].Codes[0] ==
"0") {
59843 if (
matchAsm(AsmPieces[0], {
"bswap",
"%eax"}) &&
59844 matchAsm(AsmPieces[1], {
"bswap",
"%edx"}) &&
59845 matchAsm(AsmPieces[2], {
"xchgl",
"%eax,",
"%edx"}))
59891 if (Constraint.
size() == 1) {
59892 switch (Constraint[0]) {
59929 else if (Constraint.
size() == 2) {
59930 switch (Constraint[0]) {
59934 if (Constraint[1] !=
's')
59938 switch (Constraint[1]) {
59952 switch (Constraint[1]) {
59972 Value *CallOperandVal =
Info.CallOperandVal;
59975 if (!CallOperandVal)
59979 switch (*Constraint) {
60009 switch (Constraint[1]) {
60041 switch (Constraint[1]) {
60066 if (
auto *
C = dyn_cast<ConstantInt>(
Info.CallOperandVal))
60067 if (
C->getZExtValue() <= 31)
60071 if (
auto *
C = dyn_cast<ConstantInt>(CallOperandVal))
60072 if (
C->getZExtValue() <= 63)
60076 if (
auto *
C = dyn_cast<ConstantInt>(CallOperandVal))
60077 if ((
C->getSExtValue() >= -0x80) && (
C->getSExtValue() <= 0x7f))
60081 if (
auto *
C = dyn_cast<ConstantInt>(CallOperandVal))
60082 if ((
C->getZExtValue() == 0xff) || (
C->getZExtValue() == 0xffff))
60086 if (
auto *
C = dyn_cast<ConstantInt>(CallOperandVal))
60087 if (
C->getZExtValue() <= 3)
60091 if (
auto *
C = dyn_cast<ConstantInt>(CallOperandVal))
60092 if (
C->getZExtValue() <= 0xff)
60097 if (isa<ConstantFP>(CallOperandVal))
60101 if (
auto *
C = dyn_cast<ConstantInt>(CallOperandVal))
60102 if ((
C->getSExtValue() >= -0x80000000LL) &&
60103 (
C->getSExtValue() <= 0x7fffffffLL))
60107 if (
auto *
C = dyn_cast<ConstantInt>(CallOperandVal))
60108 if (
C->getZExtValue() <= 0xffffffff)
60160 std::vector<SDValue> &Ops,
60163 char ConstraintLetter = Constraint[0];
60164 switch (ConstraintLetter) {
60167 if (
auto *
C = dyn_cast<ConstantSDNode>(
Op)) {
60168 if (
C->getZExtValue() <= 31) {
60170 Op.getValueType());
60176 if (
auto *
C = dyn_cast<ConstantSDNode>(
Op)) {
60177 if (
C->getZExtValue() <= 63) {
60179 Op.getValueType());
60185 if (
auto *
C = dyn_cast<ConstantSDNode>(
Op)) {
60186 if (isInt<8>(
C->getSExtValue())) {
60188 Op.getValueType());
60194 if (
auto *
C = dyn_cast<ConstantSDNode>(
Op)) {
60195 if (
C->getZExtValue() == 0xff ||
C->getZExtValue() == 0xffff ||
60196 (Subtarget.is64Bit() &&
C->getZExtValue() == 0xffffffff)) {
60198 Op.getValueType());
60204 if (
auto *
C = dyn_cast<ConstantSDNode>(
Op)) {
60205 if (
C->getZExtValue() <= 3) {
60207 Op.getValueType());
60213 if (
auto *
C = dyn_cast<ConstantSDNode>(
Op)) {
60214 if (
C->getZExtValue() <= 255) {
60216 Op.getValueType());
60222 if (
auto *
C = dyn_cast<ConstantSDNode>(
Op)) {
60223 if (
C->getZExtValue() <= 127) {
60225 Op.getValueType());
60232 if (
auto *
C = dyn_cast<ConstantSDNode>(
Op)) {
60234 C->getSExtValue())) {
60245 assert(Constraint[1] ==
's');
60248 if (
const auto *BA = dyn_cast<BlockAddressSDNode>(
Op)) {
60250 BA->getValueType(0)));
60254 isa<ConstantSDNode>(
Op->getOperand(1))) {
60255 Offset = cast<ConstantSDNode>(
Op->getOperand(1))->getSExtValue();
60256 Op =
Op->getOperand(0);
60258 if (
const auto *GA = dyn_cast<GlobalAddressSDNode>(
Op))
60266 if (
auto *
C = dyn_cast<ConstantSDNode>(
Op)) {
60268 C->getZExtValue())) {
60270 Op.getValueType());
60280 if (
auto *CST = dyn_cast<ConstantSDNode>(
Op)) {
60281 bool IsBool = CST->getConstantIntValue()->getBitWidth() == 1;
60286 : CST->getSExtValue();
60295 !(isa<BlockAddressSDNode>(
Op) || isa<BasicBlockSDNode>(
Op)))
60300 if (
auto *GA = dyn_cast<GlobalAddressSDNode>(
Op))
60310 if (Result.getNode()) {
60311 Ops.push_back(Result);
60351 return Subtarget.hasEGPR() && Subtarget.useInlineAsmGPR32();
60354std::pair<unsigned, const TargetRegisterClass *>
60360 if (Constraint.
size() == 1) {
60362 switch (Constraint[0]) {
60366 if (Subtarget.is64Bit())
60367 return std::make_pair(X86::RAX, &X86::GR64_ADRegClass);
60368 assert((Subtarget.is32Bit() || Subtarget.is16Bit()) &&
60369 "Expecting 64, 32 or 16 bit subtarget");
60370 return std::make_pair(X86::EAX, &X86::GR32_ADRegClass);
60377 if (VT == MVT::v1i1 || VT == MVT::i1)
60378 return std::make_pair(0U, &X86::VK1RegClass);
60379 if (VT == MVT::v8i1 || VT == MVT::i8)
60380 return std::make_pair(0U, &X86::VK8RegClass);
60381 if (VT == MVT::v16i1 || VT == MVT::i16)
60382 return std::make_pair(0U, &X86::VK16RegClass);
60384 if (Subtarget.hasBWI()) {
60385 if (VT == MVT::v32i1 || VT == MVT::i32)
60386 return std::make_pair(0U, &X86::VK32RegClass);
60387 if (VT == MVT::v64i1 || VT == MVT::i64)
60388 return std::make_pair(0U, &X86::VK64RegClass);
60392 if (Subtarget.is64Bit()) {
60393 if (VT == MVT::i8 || VT == MVT::i1)
60395 ? &X86::GR8RegClass
60396 : &X86::GR8_NOREX2RegClass);
60397 if (VT == MVT::i16)
60399 ? &X86::GR16RegClass
60400 : &X86::GR16_NOREX2RegClass);
60401 if (VT == MVT::i32 || VT == MVT::f32)
60403 ? &X86::GR32RegClass
60404 : &X86::GR32_NOREX2RegClass);
60405 if (VT != MVT::f80 && !VT.
isVector())
60407 ? &X86::GR64RegClass
60408 : &X86::GR64_NOREX2RegClass);
60414 if (VT == MVT::i8 || VT == MVT::i1)
60415 return std::make_pair(0U, &X86::GR8_ABCD_LRegClass);
60416 if (VT == MVT::i16)
60417 return std::make_pair(0U, &X86::GR16_ABCDRegClass);
60418 if (VT == MVT::i32 || VT == MVT::f32 ||
60419 (!VT.
isVector() && !Subtarget.is64Bit()))
60420 return std::make_pair(0U, &X86::GR32_ABCDRegClass);
60421 if (VT != MVT::f80 && !VT.
isVector())
60422 return std::make_pair(0U, &X86::GR64_ABCDRegClass);
60426 if (VT == MVT::i8 || VT == MVT::i1)
60428 ? &X86::GR8RegClass
60429 : &X86::GR8_NOREX2RegClass);
60430 if (VT == MVT::i16)
60432 ? &X86::GR16RegClass
60433 : &X86::GR16_NOREX2RegClass);
60434 if (VT == MVT::i32 || VT == MVT::f32 ||
60435 (!VT.
isVector() && !Subtarget.is64Bit()))
60437 ? &X86::GR32RegClass
60438 : &X86::GR32_NOREX2RegClass);
60439 if (VT != MVT::f80 && !VT.
isVector())
60441 ? &X86::GR64RegClass
60442 : &X86::GR64_NOREX2RegClass);
60445 if (VT == MVT::i8 || VT == MVT::i1)
60446 return std::make_pair(0U, &X86::GR8_NOREXRegClass);
60447 if (VT == MVT::i16)
60448 return std::make_pair(0U, &X86::GR16_NOREXRegClass);
60449 if (VT == MVT::i32 || VT == MVT::f32 ||
60450 (!VT.
isVector() && !Subtarget.is64Bit()))
60451 return std::make_pair(0U, &X86::GR32_NOREXRegClass);
60452 if (VT != MVT::f80 && !VT.
isVector())
60453 return std::make_pair(0U, &X86::GR64_NOREXRegClass);
60459 return std::make_pair(0U, &X86::RFP32RegClass);
60461 return std::make_pair(0U, &X86::RFP64RegClass);
60462 if (VT == MVT::f32 || VT == MVT::f64 || VT == MVT::f80)
60463 return std::make_pair(0U, &X86::RFP80RegClass);
60466 if (!Subtarget.hasMMX())
break;
60467 return std::make_pair(0U, &X86::VR64RegClass);
60470 if (!Subtarget.
hasSSE1())
break;
60471 bool VConstraint = (Constraint[0] ==
'v');
60477 if (VConstraint && Subtarget.hasFP16())
60478 return std::make_pair(0U, &X86::FR16XRegClass);
60482 if (VConstraint && Subtarget.hasVLX())
60483 return std::make_pair(0U, &X86::FR32XRegClass);
60484 return std::make_pair(0U, &X86::FR32RegClass);
60487 if (VConstraint && Subtarget.hasVLX())
60488 return std::make_pair(0U, &X86::FR64XRegClass);
60489 return std::make_pair(0U, &X86::FR64RegClass);
60491 if (Subtarget.is64Bit()) {
60492 if (VConstraint && Subtarget.hasVLX())
60493 return std::make_pair(0U, &X86::VR128XRegClass);
60494 return std::make_pair(0U, &X86::VR128RegClass);
60499 if (!Subtarget.hasFP16())
60502 return std::make_pair(0U, &X86::VR128XRegClass);
60503 return std::make_pair(0U, &X86::VR128RegClass);
60505 if (!Subtarget.hasBF16() || !Subtarget.hasVLX())
60508 return std::make_pair(0U, &X86::VR128XRegClass);
60509 return std::make_pair(0U, &X86::VR128RegClass);
60517 if (VConstraint && Subtarget.hasVLX())
60518 return std::make_pair(0U, &X86::VR128XRegClass);
60519 return std::make_pair(0U, &X86::VR128RegClass);
60522 if (!Subtarget.hasFP16())
60525 return std::make_pair(0U, &X86::VR256XRegClass);
60526 return std::make_pair(0U, &X86::VR256RegClass);
60528 if (!Subtarget.hasBF16() || !Subtarget.hasVLX())
60531 return std::make_pair(0U, &X86::VR256XRegClass);
60532 return std::make_pair(0U, &X86::VR256RegClass);
60539 if (VConstraint && Subtarget.hasVLX())
60540 return std::make_pair(0U, &X86::VR256XRegClass);
60542 return std::make_pair(0U, &X86::VR256RegClass);
60545 if (!Subtarget.hasFP16())
60548 return std::make_pair(0U, &X86::VR512RegClass);
60549 return std::make_pair(0U, &X86::VR512_0_15RegClass);
60551 if (!Subtarget.hasBF16())
60554 return std::make_pair(0U, &X86::VR512RegClass);
60555 return std::make_pair(0U, &X86::VR512_0_15RegClass);
60564 return std::make_pair(0U, &X86::VR512RegClass);
60565 return std::make_pair(0U, &X86::VR512_0_15RegClass);
60569 }
else if (Constraint.
size() == 2 && Constraint[0] ==
'Y') {
60570 switch (Constraint[1]) {
60578 if (!Subtarget.hasMMX())
break;
60579 return std::make_pair(0U, &X86::VR64RegClass);
60581 if (!Subtarget.
hasSSE1())
break;
60586 if (!Subtarget.hasFP16())
60588 return std::make_pair(X86::XMM0, &X86::FR16XRegClass);
60591 return std::make_pair(X86::XMM0, &X86::FR32RegClass);
60594 return std::make_pair(X86::XMM0, &X86::FR64RegClass);
60596 if (!Subtarget.hasFP16())
60598 return std::make_pair(X86::XMM0, &X86::VR128RegClass);
60600 if (!Subtarget.hasBF16() || !Subtarget.hasVLX())
60602 return std::make_pair(X86::XMM0, &X86::VR128RegClass);
60610 return std::make_pair(X86::XMM0, &X86::VR128RegClass);
60613 if (!Subtarget.hasFP16())
60615 return std::make_pair(X86::YMM0, &X86::VR256RegClass);
60617 if (!Subtarget.hasBF16() || !Subtarget.hasVLX())
60619 return std::make_pair(X86::YMM0, &X86::VR256RegClass);
60627 return std::make_pair(X86::YMM0, &X86::VR256RegClass);
60630 if (!Subtarget.hasFP16())
60632 return std::make_pair(X86::ZMM0, &X86::VR512_0_15RegClass);
60634 if (!Subtarget.hasBF16())
60636 return std::make_pair(X86::ZMM0, &X86::VR512_0_15RegClass);
60644 return std::make_pair(X86::ZMM0, &X86::VR512_0_15RegClass);
60651 if (VT == MVT::v1i1 || VT == MVT::i1)
60652 return std::make_pair(0U, &X86::VK1WMRegClass);
60653 if (VT == MVT::v8i1 || VT == MVT::i8)
60654 return std::make_pair(0U, &X86::VK8WMRegClass);
60655 if (VT == MVT::v16i1 || VT == MVT::i16)
60656 return std::make_pair(0U, &X86::VK16WMRegClass);
60658 if (Subtarget.hasBWI()) {
60659 if (VT == MVT::v32i1 || VT == MVT::i32)
60660 return std::make_pair(0U, &X86::VK32WMRegClass);
60661 if (VT == MVT::v64i1 || VT == MVT::i64)
60662 return std::make_pair(0U, &X86::VK64WMRegClass);
60666 }
else if (Constraint.
size() == 2 && Constraint[0] ==
'j') {
60667 switch (Constraint[1]) {
60671 if (VT == MVT::i8 || VT == MVT::i1)
60672 return std::make_pair(0U, &X86::GR8_NOREX2RegClass);
60673 if (VT == MVT::i16)
60674 return std::make_pair(0U, &X86::GR16_NOREX2RegClass);
60675 if (VT == MVT::i32 || VT == MVT::f32)
60676 return std::make_pair(0U, &X86::GR32_NOREX2RegClass);
60677 if (VT != MVT::f80 && !VT.
isVector())
60678 return std::make_pair(0U, &X86::GR64_NOREX2RegClass);
60681 if (VT == MVT::i8 || VT == MVT::i1)
60682 return std::make_pair(0U, &X86::GR8RegClass);
60683 if (VT == MVT::i16)
60684 return std::make_pair(0U, &X86::GR16RegClass);
60685 if (VT == MVT::i32 || VT == MVT::f32)
60686 return std::make_pair(0U, &X86::GR32RegClass);
60687 if (VT != MVT::f80 && !VT.
isVector())
60688 return std::make_pair(0U, &X86::GR64RegClass);
60694 return std::make_pair(0U, &X86::GR32RegClass);
60698 std::pair<Register, const TargetRegisterClass*> Res;
60705 if (VT == MVT::Other || VT == MVT::f32 || VT == MVT::f64 || VT == MVT::f80) {
60707 if (Constraint.
size() == 7 && Constraint[0] ==
'{' &&
60708 tolower(Constraint[1]) ==
's' && tolower(Constraint[2]) ==
't' &&
60709 Constraint[3] ==
'(' &&
60710 (Constraint[4] >=
'0' && Constraint[4] <=
'7') &&
60711 Constraint[5] ==
')' && Constraint[6] ==
'}') {
60714 if (Constraint[4] ==
'7')
60715 return std::make_pair(X86::FP7, &X86::RFP80_7RegClass);
60716 return std::make_pair(X86::FP0 + Constraint[4] -
'0',
60717 &X86::RFP80RegClass);
60721 if (
StringRef(
"{st}").equals_insensitive(Constraint))
60722 return std::make_pair(X86::FP0, &X86::RFP80RegClass);
60726 if (
StringRef(
"{flags}").equals_insensitive(Constraint))
60727 return std::make_pair(X86::EFLAGS, &X86::CCRRegClass);
60731 if (
StringRef(
"{dirflag}").equals_insensitive(Constraint) &&
60733 return std::make_pair(X86::DF, &X86::DFCCRRegClass);
60737 if (
StringRef(
"{fpsr}").equals_insensitive(Constraint) && VT == MVT::Other)
60738 return std::make_pair(X86::FPSW, &X86::FPCCRRegClass);
60744 if (!Subtarget.is64Bit() &&
60746 TRI->getEncodingValue(Res.first) >= 8) {
60748 return std::make_pair(0,
nullptr);
60753 TRI->getEncodingValue(Res.first) & 0x10) {
60755 return std::make_pair(0,
nullptr);
60762 if (
TRI->isTypeLegalForClass(*Res.second, VT) || VT == MVT::Other)
60777 return std::make_pair(0,
nullptr);
60780 bool is64Bit = Subtarget.is64Bit();
60782 Size == 8 ? (
is64Bit ? &X86::GR8RegClass : &X86::GR8_NOREXRegClass)
60783 :
Size == 16 ? (
is64Bit ? &X86::GR16RegClass : &X86::GR16_NOREXRegClass)
60784 :
Size == 32 ? (
is64Bit ? &X86::GR32RegClass : &X86::GR32_NOREXRegClass)
60785 : (
is64Bit ? &X86::GR64RegClass :
nullptr);
60791 return std::make_pair(X86::EAX, &X86::GR32_ADRegClass);
60793 return std::make_pair(X86::EDX, &X86::GR32_DCRegClass);
60795 return std::make_pair(X86::ECX, &X86::GR32_CBRegClass);
60797 return std::make_pair(X86::EBX, &X86::GR32_BSIRegClass);
60799 return std::make_pair(X86::ESI, &X86::GR32_SIDIRegClass);
60801 return std::make_pair(X86::EDI, &X86::GR32_DIBPRegClass);
60803 return std::make_pair(X86::EBP, &X86::GR32_BPSPRegClass);
60805 return std::make_pair(0,
nullptr);
60809 return std::make_pair(DestReg, RC);
60813 return std::make_pair(0,
nullptr);
60821 if (VT == MVT::f16)
60822 Res.second = &X86::FR16XRegClass;
60823 else if (VT == MVT::f32 || VT == MVT::i32)
60824 Res.second = &X86::FR32XRegClass;
60825 else if (VT == MVT::f64 || VT == MVT::i64)
60826 Res.second = &X86::FR64XRegClass;
60827 else if (
TRI->isTypeLegalForClass(X86::VR128XRegClass, VT))
60828 Res.second = &X86::VR128XRegClass;
60829 else if (
TRI->isTypeLegalForClass(X86::VR256XRegClass, VT))
60830 Res.second = &X86::VR256XRegClass;
60831 else if (
TRI->isTypeLegalForClass(X86::VR512RegClass, VT))
60832 Res.second = &X86::VR512RegClass;
60836 Res.second =
nullptr;
60839 if (VT == MVT::v1i1 || VT == MVT::i1)
60840 Res.second = &X86::VK1RegClass;
60841 else if (VT == MVT::v8i1 || VT == MVT::i8)
60842 Res.second = &X86::VK8RegClass;
60843 else if (VT == MVT::v16i1 || VT == MVT::i16)
60844 Res.second = &X86::VK16RegClass;
60845 else if (VT == MVT::v32i1 || VT == MVT::i32)
60846 Res.second = &X86::VK32RegClass;
60847 else if (VT == MVT::v64i1 || VT == MVT::i64)
60848 Res.second = &X86::VK64RegClass;
60852 Res.second =
nullptr;
60867 bool OptSize = Attr.
hasFnAttr(Attribute::MinSize);
60872 if (!Subtarget.is64Bit())
60881void X86TargetLowering::insertCopiesSplitCSR(
60885 const MCPhysReg *IStart =
TRI->getCalleeSavedRegsViaCopy(Entry->getParent());
60895 RC = &X86::GR64RegClass;
60906 Entry->getParent()->getFunction().hasFnAttribute(Attribute::NoUnwind) &&
60907 "Function should be nounwind in insertCopiesSplitCSR!");
60908 Entry->addLiveIn(*
I);
60913 for (
auto *Exit : Exits)
60915 TII->get(TargetOpcode::COPY), *
I)
60921 return Subtarget.is64Bit();
60929 "Invalid call instruction for a KCFI check");
60934 switch (
MBBI->getOpcode()) {
60936 case X86::CALL64m_NT:
60937 case X86::TAILJMPm64:
60938 case X86::TAILJMPm64_REX: {
60941 if (!
TII->unfoldMemoryOperand(MF, *OrigCall, X86::R11,
true,
60944 for (
auto *NewMI : NewMIs)
60947 "Unexpected instruction after memory operand unfolding");
60948 if (OrigCall->shouldUpdateAdditionalCallInfo())
60950 MBBI->setCFIType(MF, OrigCall->getCFIType());
60960 switch (
MBBI->getOpcode()) {
60962 case X86::CALL64r_NT:
60963 case X86::TAILJMPr64:
60964 case X86::TAILJMPr64_REX:
60965 assert(
Target.isReg() &&
"Unexpected target operand for an indirect call");
60966 Target.setIsRenamable(
false);
60967 TargetReg =
Target.getReg();
60969 case X86::CALL64pcrel32:
60970 case X86::TAILJMPd64:
60971 assert(
Target.isSymbol() &&
"Unexpected target operand for a direct call");
60975 "Unexpected register for an indirect thunk call");
60976 TargetReg = X86::R11;
61030 if (Subtarget.is64Bit())
61044 if (
ML &&
ML->isInnermost() &&
unsigned const MachineRegisterInfo * MRI
static SDValue Widen(SelectionDAG *CurDAG, SDValue N)
static AArch64CC::CondCode parseConstraintCode(llvm::StringRef Constraint)
static SDValue LowerFunnelShift(SDValue Op, SelectionDAG &DAG)
static SDValue getSETCC(AArch64CC::CondCode CC, SDValue NZCV, const SDLoc &DL, SelectionDAG &DAG)
Helper function to create 'CSET', which is equivalent to 'CSINC <Wd>, WZR, WZR, invert(<cond>)'.
static SDValue LowerPREFETCH(SDValue Op, SelectionDAG &DAG)
static SDValue LowerXALUO(SDValue Op, SelectionDAG &DAG)
static SDValue foldVectorXorShiftIntoCmp(SDNode *N, SelectionDAG &DAG, const AArch64Subtarget *Subtarget)
Turn vector tests of the signbit in the form of: xor (sra X, elt_size(X)-1), -1 into: cmge X,...
amdgpu aa AMDGPU Address space based Alias Analysis Wrapper
static msgpack::DocNode getNode(msgpack::DocNode DN, msgpack::Type Type, MCValue Val)
#define NODE_NAME_CASE(node)
AMDGPU Register Bank Select
static SDValue LowerShift(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *ST)
static SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *ST)
static SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *ST)
static SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
static SDValue LowerCTTZ(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *ST)
static SDValue getZeroVector(EVT VT, SelectionDAG &DAG, const SDLoc &dl)
getZeroVector - Returns a vector of specified type with all zero elements.
static SDValue LowerMUL(SDValue Op, SelectionDAG &DAG)
static SDValue LowerMLOAD(SDValue Op, SelectionDAG &DAG)
static SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *ST)
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MachineBasicBlock MachineBasicBlock::iterator MBBI
Function Alias Analysis Results
BlockVerifier::State From
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
Analysis containing CSE Info
#define LLVM_ATTRIBUTE_UNUSED
This file contains the declarations for the subclasses of Constant, which represent the different fla...
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
Looks at all the uses of the given value Returns the Liveness deduced from the uses of this value Adds all uses that cause the result to be MaybeLive to MaybeLiveRetUses If the result is MaybeLiveUses might be modified but its content should be ignored(since it might not be complete). DeadArgumentEliminationPass
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
static bool isSigned(unsigned int Opcode)
const HexagonInstrInfo * TII
static bool isUndef(ArrayRef< int > Mask)
std::pair< Value *, Value * > ShuffleOps
We are building a shuffle to create V, which is a sequence of insertelement, extractelement pairs.
static int matchShuffleAsBitRotate(ArrayRef< int > Mask, int NumSubElts)
Try to lower a vector shuffle as a bit rotation.
static Value * LowerCTLZ(LLVMContext &Context, Value *V, Instruction *IP)
Emit the code to lower ctlz of V before the specified instruction IP.
static Value * LowerCTPOP(LLVMContext &Context, Value *V, Instruction *IP)
Emit the code to lower ctpop of V before the specified instruction IP.
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)
static SDValue lower256BitShuffle(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Dispatching routine to lower various 256-bit LoongArch vector shuffles.
static SDValue lower128BitShuffle(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Dispatching routine to lower various 128-bit LoongArch vector shuffles.
static bool isAndOrOfSetCCs(SDValue Op, unsigned &Opc)
Return true if node is an ISD::AND or ISD::OR of two M68k::SETcc nodes each of which has no other use...
static bool hasNonFlagsUse(SDValue Op)
return true if Op has a use that doesn't just read flags.
static bool isCMOVPseudo(MachineInstr &MI)
static SDValue combineCarryThroughADD(SDValue CCR)
static bool isTruncWithZeroHighBitsInput(SDValue V, SelectionDAG &DAG)
unsigned const TargetRegisterInfo * TRI
uint64_t IntrinsicInst * II
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
PowerPC Reduce CR logical Operation
PowerPC TLS Dynamic Call Fixup
static constexpr Register SPReg
static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static unsigned negateFMAOpcode(unsigned Opcode, bool NegMul, bool NegAcc)
const SmallVectorImpl< MachineOperand > & Cond
static bool isValid(const char C)
Returns true if C is a valid mangled character: <0-9a-zA-Z_>.
Contains matchers for matching SelectionDAG nodes and values.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static bool isSimple(Instruction *I)
static Type * getValueType(Value *V)
Returns the type of the given value/instruction V.
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
static StringRef substr(StringRef Str, uint64_t Len)
This file implements the SmallBitVector class.
This file defines the SmallSet class.
static SPCC::CondCodes GetOppositeBranchCondition(SPCC::CondCodes CC)
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
This file implements the StringSwitch template, which mimics a switch() statement whose cases are str...
This file describes how to lower LLVM code to machine code.
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
static unsigned getBitWidth(Type *Ty, const DataLayout &DL)
Returns the bitwidth of the given scalar or pointer type.
static KnownBits computeKnownBitsForHorizontalOperation(const Operator *I, const APInt &DemandedElts, unsigned Depth, const SimplifyQuery &Q, const function_ref< KnownBits(const KnownBits &, const KnownBits &)> KnownBitsFunc)
static SDValue extractSubVector(SDValue Vec, unsigned IdxVal, SelectionDAG &DAG, const SDLoc &DL, unsigned VectorWidth)
static bool is64Bit(const char *name)
#define GET_EGPR_IF_ENABLED(OPC)
static unsigned getSUBriOpcode(bool IsLP64)
static SDValue convertIntLogicToFPLogic(unsigned Opc, const SDLoc &DL, EVT VT, SDValue N0, SDValue N1, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget &Subtarget)
If both input operands of a logic op are being cast from floating-point types or FP compares,...
static bool isNoopOrBroadcastShuffleMask(ArrayRef< int > Mask)
static bool matchLogicBlend(SDNode *N, SDValue &X, SDValue &Y, SDValue &Mask)
static MVT widenMaskVectorType(MVT VT, const X86Subtarget &Subtarget)
Widen a mask vector type to a minimum of v8i1/v16i1 to allow use of KSHIFT and bitcast with integer t...
static SDValue combineAndLoadToBZHI(SDNode *Node, SelectionDAG &DAG, const X86Subtarget &Subtarget)
static SDValue combineOrCmpEqZeroToCtlzSrl(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget &Subtarget)
static SDValue combineAndnp(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget &Subtarget)
Do target-specific dag combines on X86ISD::ANDNP nodes.
static SDValue combineExtractWithShuffle(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget &Subtarget)
static SDValue combineLogicBlendIntoPBLENDV(SDNode *N, const SDLoc &DL, SelectionDAG &DAG, const X86Subtarget &Subtarget)
static SDValue LowerStore(SDValue Op, const X86Subtarget &Subtarget, SelectionDAG &DAG)
static SDValue createPSADBW(SelectionDAG &DAG, const SDValue &Zext0, const SDValue &Zext1, const SDLoc &DL, const X86Subtarget &Subtarget)
static SDValue combineAddOrSubToADCOrSBB(bool IsSub, const SDLoc &DL, EVT VT, SDValue X, SDValue Y, SelectionDAG &DAG, bool ZeroSecondOpOnly=false)
If this is an add or subtract where one operand is produced by a cmp+setcc, then try to convert it to...
static bool matchScalarReduction(SDValue Op, ISD::NodeType BinOp, SmallVectorImpl< SDValue > &SrcOps, SmallVectorImpl< APInt > *SrcMask=nullptr)
Helper for matching BINOP(EXTRACTELT(X,0),BINOP(EXTRACTELT(X,1),...)) style scalarized (associative) ...
static unsigned translateX86FSETCC(ISD::CondCode SetCCOpcode, SDValue &Op0, SDValue &Op1, bool &IsAlwaysSignaling)
Turns an ISD::CondCode into a value suitable for SSE floating-point mask CMPs.
static SDValue detectPMADDUBSW(SDValue In, EVT VT, SelectionDAG &DAG, const X86Subtarget &Subtarget, const SDLoc &DL)
static SDValue checkBoolTestSetCCCombine(SDValue Cmp, X86::CondCode &CC)
static bool useEGPRInlineAsm(const X86Subtarget &Subtarget)
static SDValue getNullFPConstForNullVal(SDValue V, SelectionDAG &DAG, const X86Subtarget &Subtarget)
If a value is a scalar FP zero or a vector FP zero (potentially including undefined elements),...
static bool matchBinaryPermuteShuffle(MVT MaskVT, ArrayRef< int > Mask, const APInt &Zeroable, bool AllowFloatDomain, bool AllowIntDomain, SDValue &V1, SDValue &V2, const SDLoc &DL, SelectionDAG &DAG, const X86Subtarget &Subtarget, unsigned &Shuffle, MVT &ShuffleVT, unsigned &PermuteImm)
static SDValue combineSub(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget &Subtarget)
static bool isGRClass(const TargetRegisterClass &RC)
Check if RC is a general purpose register class.
static bool getTargetShuffleMask(SDValue N, bool AllowSentinelZero, SmallVectorImpl< SDValue > &Ops, SmallVectorImpl< int > &Mask, bool &IsUnary)
Calculates the shuffle mask corresponding to the target-specific opcode.
static SDValue vectorizeExtractedCast(SDValue Cast, const SDLoc &DL, SelectionDAG &DAG, const X86Subtarget &Subtarget)
Given a scalar cast operation that is extracted from a vector, try to vectorize the cast op followed ...
static SDValue combineShiftRightLogical(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget &Subtarget)
static SDValue lowerShuffleAsInsertPS(const SDLoc &DL, SDValue V1, SDValue V2, ArrayRef< int > Mask, const APInt &Zeroable, SelectionDAG &DAG)
static SDValue combineSubSetcc(SDNode *N, SelectionDAG &DAG)
static int match1BitShuffleAsKSHIFT(unsigned &Opcode, ArrayRef< int > Mask, int MaskOffset, const APInt &Zeroable)
static bool isHorizontalBinOpPart(const BuildVectorSDNode *N, unsigned Opcode, const SDLoc &DL, SelectionDAG &DAG, unsigned BaseIdx, unsigned LastIdx, SDValue &V0, SDValue &V1)
This is a helper function of LowerToHorizontalOp().
static SDValue SplitAndExtendv16i1(unsigned ExtOpc, MVT VT, SDValue In, const SDLoc &dl, SelectionDAG &DAG)
static SDValue getShuffleHalfVectors(const SDLoc &DL, SDValue V1, SDValue V2, ArrayRef< int > HalfMask, int HalfIdx1, int HalfIdx2, bool UndefLower, SelectionDAG &DAG, bool UseConcat=false)
Given the output values from getHalfShuffleMask(), create a half width shuffle of extracted vectors f...
static SDValue combineSignExtendInReg(SDNode *N, SelectionDAG &DAG, const X86Subtarget &Subtarget)
static SDValue getTargetVShiftNode(unsigned Opc, const SDLoc &dl, MVT VT, SDValue SrcOp, SDValue ShAmt, int ShAmtIdx, const X86Subtarget &Subtarget, SelectionDAG &DAG)
Handle vector element shifts by a splat shift amount.
static SDValue combineZext(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget &Subtarget)
static SDValue incDecVectorConstant(SDValue V, SelectionDAG &DAG, bool IsInc, bool NSW)
Given a buildvector constant, return a new vector constant with each element incremented or decrement...
static bool isAddSubOrSubAdd(const BuildVectorSDNode *BV, const X86Subtarget &Subtarget, SelectionDAG &DAG, SDValue &Opnd0, SDValue &Opnd1, unsigned &NumExtracts, bool &IsSubAdd)
Returns true iff BV builds a vector with the result equivalent to the result of ADDSUB/SUBADD operati...
static bool cheapX86FSETCC_SSE(ISD::CondCode SetCCOpcode)
static SDValue lowerV4F32Shuffle(const SDLoc &DL, ArrayRef< int > Mask, const APInt &Zeroable, SDValue V1, SDValue V2, const X86Subtarget &Subtarget, SelectionDAG &DAG)
Lower 4-lane 32-bit floating point shuffles.
static MachineBasicBlock * emitXBegin(MachineInstr &MI, MachineBasicBlock *MBB, const TargetInstrInfo *TII)
Utility function to emit xbegin specifying the start of an RTM region.
static SDValue EltsFromConsecutiveLoads(EVT VT, ArrayRef< SDValue > Elts, const SDLoc &DL, SelectionDAG &DAG, const X86Subtarget &Subtarget, bool IsAfterLegalize)
Given the initializing elements 'Elts' of a vector of type 'VT', see if the elements can be replaced ...
static bool scaleShuffleElements(ArrayRef< int > Mask, unsigned NumDstElts, SmallVectorImpl< int > &ScaledMask)
static SDValue GetTLSADDR(SelectionDAG &DAG, GlobalAddressSDNode *GA, const EVT PtrVT, unsigned ReturnReg, unsigned char OperandFlags, bool LoadGlobalBaseReg=false, bool LocalDynamic=false)
static SDValue LowerANY_EXTEND(SDValue Op, const X86Subtarget &Subtarget, SelectionDAG &DAG)
static SDValue LowerTruncateVecPackWithSignBits(MVT DstVT, SDValue In, const SDLoc &DL, const X86Subtarget &Subtarget, SelectionDAG &DAG)
This function lowers a vector truncation of 'extended sign-bits' or 'extended zero-bits' values.
static cl::opt< int > BrMergingCcmpBias("x86-br-merging-ccmp-bias", cl::init(6), cl::desc("Increases 'x86-br-merging-base-cost' in cases that the target " "supports conditional compare instructions."), cl::Hidden)
static APInt getExtractedDemandedElts(SDNode *N)
static SDValue LowerADJUST_TRAMPOLINE(SDValue Op, SelectionDAG &DAG)
static SDValue lowerV8I32Shuffle(const SDLoc &DL, ArrayRef< int > Mask, const APInt &Zeroable, SDValue V1, SDValue V2, const X86Subtarget &Subtarget, SelectionDAG &DAG)
Handle lowering of 8-lane 32-bit integer shuffles.
static SDValue LowerVectorCTLZInRegLUT(SDValue Op, const SDLoc &DL, const X86Subtarget &Subtarget, SelectionDAG &DAG)
static SDValue combinePTESTCC(SDValue EFLAGS, X86::CondCode &CC, SelectionDAG &DAG, const X86Subtarget &Subtarget)
If we are inverting an PTEST/TESTP operand, attempt to adjust the CC to avoid the inversion.
static unsigned getAltBitOpcode(unsigned Opcode)
static Constant * getConstantVector(MVT VT, ArrayRef< APInt > Bits, const APInt &Undefs, LLVMContext &C)
static SDValue LowerABD(SDValue Op, const X86Subtarget &Subtarget, SelectionDAG &DAG)
static SDValue LowerREADCYCLECOUNTER(SDValue Op, const X86Subtarget &Subtarget, SelectionDAG &DAG)
static SDValue combineBitcast(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget &Subtarget)
static SDValue promoteXINT_TO_FP(SDValue Op, const SDLoc &dl, SelectionDAG &DAG)
static SDValue combineCastedMaskArithmetic(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget &Subtarget)
static SDValue insert1BitVector(SDValue Op, SelectionDAG &DAG, const X86Subtarget &Subtarget)
Insert i1-subvector to i1-vector.
static SDValue materializeVectorConstant(SDValue Op, const SDLoc &DL, SelectionDAG &DAG, const X86Subtarget &Subtarget)
Create a vector constant without a load.
static SDValue lowerShuffleWithPSHUFB(const SDLoc &DL, MVT VT, ArrayRef< int > Mask, SDValue V1, SDValue V2, const APInt &Zeroable, const X86Subtarget &Subtarget, SelectionDAG &DAG)
Try to lower a shuffle with a single PSHUFB of V1 or V2.
static SDValue combineFP16_TO_FP(SDNode *N, SelectionDAG &DAG, const X86Subtarget &Subtarget)
static SDValue combineBMILogicOp(SDNode *N, SelectionDAG &DAG, const X86Subtarget &Subtarget)
static SDValue LowerUINT_TO_FP_i64(SDValue Op, const SDLoc &dl, SelectionDAG &DAG, const X86Subtarget &Subtarget)
64-bit unsigned integer to double expansion.
static bool useVectorCast(unsigned Opcode, MVT FromVT, MVT ToVT, const X86Subtarget &Subtarget)
static bool isX86CCSigned(unsigned X86CC)
Return true if the condition is an signed comparison operation.
static SDValue LowerFP_TO_FP16(SDValue Op, SelectionDAG &DAG)
static SDValue lowerV4X128Shuffle(const SDLoc &DL, MVT VT, ArrayRef< int > Mask, const APInt &Zeroable, SDValue V1, SDValue V2, const X86Subtarget &Subtarget, SelectionDAG &DAG)
Try to lower a vector shuffle as a 128-bit shuffles.
static SDValue matchPMADDWD(SelectionDAG &DAG, SDNode *N, const SDLoc &DL, EVT VT, const X86Subtarget &Subtarget)
static SDValue combineSelect(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget &Subtarget)
Do target-specific dag combines on SELECT and VSELECT nodes.
static bool isUndefOrZeroInRange(ArrayRef< int > Mask, unsigned Pos, unsigned Size)
Return true if every element in Mask, beginning from position Pos and ending in Pos+Size is undef or ...
static SDValue combineToConsecutiveLoads(EVT VT, SDValue Op, const SDLoc &DL, SelectionDAG &DAG, const X86Subtarget &Subtarget, bool IsAfterLegalize)
static SDValue LowerCONCAT_VECTORSvXi1(SDValue Op, const X86Subtarget &Subtarget, SelectionDAG &DAG)
static SDValue getConstVector(ArrayRef< int > Values, MVT VT, SelectionDAG &DAG, const SDLoc &dl, bool IsMask=false)
static SDValue commuteSelect(SDNode *N, SelectionDAG &DAG, const SDLoc &DL, const X86Subtarget &Subtarget)
static MachineInstrBuilder createPHIsForCMOVsInSinkBB(MachineBasicBlock::iterator MIItBegin, MachineBasicBlock::iterator MIItEnd, MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB, MachineBasicBlock *SinkMBB)
static SDValue insert128BitVector(SDValue Result, SDValue Vec, unsigned IdxVal, SelectionDAG &DAG, const SDLoc &dl)
Generate a DAG to put 128-bits into a vector > 128 bits.
static bool onlyZeroFlagUsed(SDValue Flags)
static SDValue extract256BitVector(SDValue Vec, unsigned IdxVal, SelectionDAG &DAG, const SDLoc &dl)
Generate a DAG to grab 256-bits from a 512-bit vector.
static SDValue combineFAndFNotToFAndn(SDNode *N, SelectionDAG &DAG, const X86Subtarget &Subtarget)
static SDValue combineGatherScatter(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI)
static SDValue combineMulToPMADDWD(SDNode *N, const SDLoc &DL, SelectionDAG &DAG, const X86Subtarget &Subtarget)
static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget &Subtarget, SelectionDAG &DAG)
static SDValue lowerShuffleAsLanePermuteAndShuffle(const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef< int > Mask, SelectionDAG &DAG, const X86Subtarget &Subtarget)
Lower a vector shuffle crossing multiple 128-bit lanes by shuffling one source with a lane permutatio...
static SDValue checkSignTestSetCCCombine(SDValue Cmp, X86::CondCode &CC, SelectionDAG &DAG)
static bool isFoldableUseOfShuffle(SDNode *N)
static bool getTargetShuffleInputs(SDValue Op, const APInt &DemandedElts, SmallVectorImpl< SDValue > &Inputs, SmallVectorImpl< int > &Mask, const SelectionDAG &DAG, unsigned Depth, bool ResolveKnownElts)
static SDValue getVectorMaskingNode(SDValue Op, SDValue Mask, SDValue PreservedSrc, const X86Subtarget &Subtarget, SelectionDAG &DAG)
Return (and Op, Mask) for compare instructions or (vselect Mask, Op, PreservedSrc) for others along w...
static SDValue lowerAddSub(SDValue Op, SelectionDAG &DAG, const X86Subtarget &Subtarget)
static SDValue truncateVectorWithPACKSS(EVT DstVT, SDValue In, const SDLoc &DL, const X86Subtarget &Subtarget, SelectionDAG &DAG)
Truncate using inreg sign extension and X86ISD::PACKSS.
static SDValue combineMaskedLoadConstantMask(MaskedLoadSDNode *ML, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI)
static bool isShuffleMaskInputInPlace(int Input, ArrayRef< int > Mask)
Test whether the specified input (0 or 1) is in-place blended by the given mask.
static bool isMultiLaneShuffleMask(unsigned LaneSizeInBits, unsigned ScalarSizeInBits, ArrayRef< int > Mask)
Test whether elements in each LaneSizeInBits lane in this shuffle mask come from multiple lanes - thi...
static SDValue LowerVSETCCWithSUBUS(SDValue Op0, SDValue Op1, MVT VT, ISD::CondCode Cond, const SDLoc &dl, const X86Subtarget &Subtarget, SelectionDAG &DAG)
As another special case, use PSUBUS[BW] when it's profitable.
static SDValue combineFP_EXTEND(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget &Subtarget)
static bool is128BitLaneRepeatedShuffleMask(MVT VT, ArrayRef< int > Mask, SmallVectorImpl< int > &RepeatedMask)
Test whether a shuffle mask is equivalent within each 128-bit lane.
static SDValue getPMOVMSKB(const SDLoc &DL, SDValue V, SelectionDAG &DAG, const X86Subtarget &Subtarget)
static SDValue combineSCALAR_TO_VECTOR(SDNode *N, SelectionDAG &DAG, const X86Subtarget &Subtarget)
static SDValue combineUIntToFP(SDNode *N, SelectionDAG &DAG, const X86Subtarget &Subtarget)
static void getPackDemandedElts(EVT VT, const APInt &DemandedElts, APInt &DemandedLHS, APInt &DemandedRHS)
static SDValue LowerVectorCTPOPInRegLUT(SDValue Op, const SDLoc &DL, const X86Subtarget &Subtarget, SelectionDAG &DAG)
static SDValue LowerSELECTWithCmpZero(SDValue CmpVal, SDValue LHS, SDValue RHS, unsigned X86CC, const SDLoc &DL, SelectionDAG &DAG, const X86Subtarget &Subtarget)
static SDValue combineADC(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI)
static std::optional< unsigned > CastIntSETCCtoFP(MVT VT, ISD::CondCode CC, unsigned NumSignificantBitsLHS, unsigned NumSignificantBitsRHS)
static SDValue lowerShuffleAsVTRUNCAndUnpack(const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef< int > Mask, const APInt &Zeroable, SelectionDAG &DAG)
static bool isShuffleFoldableLoad(SDValue V)
Helper to test for a load that can be folded with x86 shuffles.
static SDValue narrowVectorSelect(SDNode *N, SelectionDAG &DAG, const SDLoc &DL, const X86Subtarget &Subtarget)
If both arms of a vector select are concatenated vectors, split the select, and concatenate the resul...
static SDValue lowerShuffleAsElementInsertion(const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef< int > Mask, const APInt &Zeroable, const X86Subtarget &Subtarget, SelectionDAG &DAG)
Try to lower insertion of a single element into a zero vector.
static SDValue combineXor(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget &Subtarget)
static bool isUnpackWdShuffleMask(ArrayRef< int > Mask, MVT VT, const SelectionDAG &DAG)
static SDValue LowerTruncateVecPack(MVT DstVT, SDValue In, const SDLoc &DL, const X86Subtarget &Subtarget, SelectionDAG &DAG)
This function lowers a vector truncation from vXi32/vXi64 to vXi8/vXi16 into X86ISD::PACKUS/X86ISD::P...
static SDValue lowerShuffleAsRepeatedMaskAndLanePermute(const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef< int > Mask, const X86Subtarget &Subtarget, SelectionDAG &DAG)
Handle case where shuffle sources are coming from the same 128-bit lane and every lane can be represe...
static SDValue getSHUFPDImmForMask(ArrayRef< int > Mask, const SDLoc &DL, SelectionDAG &DAG)
static void computeKnownBitsForPSADBW(SDValue LHS, SDValue RHS, KnownBits &Known, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth)
static int getSEHRegistrationNodeSize(const Function *Fn)
static SDValue combineShuffleOfConcatUndef(SDNode *N, const SDLoc &DL, SelectionDAG &DAG, const X86Subtarget &Subtarget)
static SDValue getScalarMaskingNode(SDValue Op, SDValue Mask, SDValue PreservedSrc, const X86Subtarget &Subtarget, SelectionDAG &DAG)
Creates an SDNode for a predicated scalar operation.
static SDValue buildFromShuffleMostly(SDValue Op, const SDLoc &DL, SelectionDAG &DAG)
static SDValue lowerBuildVectorToBitOp(BuildVectorSDNode *Op, const SDLoc &DL, const X86Subtarget &Subtarget, SelectionDAG &DAG)
If a BUILD_VECTOR's source elements all apply the same bit operation and one of their operands is con...
static SDValue LowerZERO_EXTEND(SDValue Op, const X86Subtarget &Subtarget, SelectionDAG &DAG)
static SDValue isFNEG(SelectionDAG &DAG, SDNode *N, unsigned Depth=0)
Returns the negated value if the node N flips sign of FP value.
static SDValue lowerShuffleWithPERMV(const SDLoc &DL, MVT VT, ArrayRef< int > OriginalMask, SDValue V1, SDValue V2, const X86Subtarget &Subtarget, SelectionDAG &DAG)
static SDValue lowerV16I16Shuffle(const SDLoc &DL, ArrayRef< int > Mask, const APInt &Zeroable, SDValue V1, SDValue V2, const X86Subtarget &Subtarget, SelectionDAG &DAG)
Handle lowering of 16-lane 16-bit integer shuffles.
static SDValue lowerShuffleWithUndefHalf(const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef< int > Mask, const X86Subtarget &Subtarget, SelectionDAG &DAG)
Lower shuffles where an entire half of a 256 or 512-bit vector is UNDEF.
static SDValue lowerAtomicArith(SDValue N, SelectionDAG &DAG, const X86Subtarget &Subtarget)
Lower atomic_load_ops into LOCK-prefixed operations.
static SDValue convertShiftLeftToScale(SDValue Amt, const SDLoc &dl, const X86Subtarget &Subtarget, SelectionDAG &DAG)
static SDValue lowerV32I8Shuffle(const SDLoc &DL, ArrayRef< int > Mask, const APInt &Zeroable, SDValue V1, SDValue V2, const X86Subtarget &Subtarget, SelectionDAG &DAG)
Handle lowering of 32-lane 8-bit integer shuffles.
static bool checkAndUpdateEFLAGSKill(MachineBasicBlock::iterator SelectItr, MachineBasicBlock *BB, const TargetRegisterInfo *TRI)
static void computeKnownBitsForPMADDWD(SDValue LHS, SDValue RHS, KnownBits &Known, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth)
static SDValue LowerBITCAST(SDValue Op, const X86Subtarget &Subtarget, SelectionDAG &DAG)
static SDValue LowerEXTRACT_VECTOR_ELT_SSE4(SDValue Op, SelectionDAG &DAG)
static SDValue lowerShuffleAsTruncBroadcast(const SDLoc &DL, MVT VT, SDValue V0, int BroadcastIdx, const X86Subtarget &Subtarget, SelectionDAG &DAG)
Try to lower broadcast of a single - truncated - integer element, coming from a scalar_to_vector/buil...
static SDValue ExpandHorizontalBinOp(const SDValue &V0, const SDValue &V1, const SDLoc &DL, SelectionDAG &DAG, unsigned X86Opcode, bool Mode, bool isUndefLO, bool isUndefHI)
Emit a sequence of two 128-bit horizontal add/sub followed by a concat_vector.
static SDValue combineBitOpWithPACK(unsigned Opc, const SDLoc &DL, EVT VT, SDValue N0, SDValue N1, SelectionDAG &DAG)
static SDValue LowerToHorizontalOp(const BuildVectorSDNode *BV, const SDLoc &DL, const X86Subtarget &Subtarget, SelectionDAG &DAG)
Lower BUILD_VECTOR to a horizontal add/sub operation if possible.
SDValue getGFNICtrlMask(unsigned Opcode, SelectionDAG &DAG, const SDLoc &DL, MVT VT, unsigned Amt=0)
static SDValue combineFP_ROUND(SDNode *N, SelectionDAG &DAG, const X86Subtarget &Subtarget)
static SDValue combineAndShuffleNot(SDNode *N, SelectionDAG &DAG, const X86Subtarget &Subtarget)
Try to fold: and (vector_shuffle<Z,...,Z> (insert_vector_elt undef, (xor X, -1), Z),...
static SDValue lowerShuffleAsBitMask(const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef< int > Mask, const APInt &Zeroable, const X86Subtarget &Subtarget, SelectionDAG &DAG)
Try to emit a bitmask instruction for a shuffle.
static SDValue lowerShuffleWithUNPCK256(const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef< int > Mask, SelectionDAG &DAG)
Check if the mask can be mapped to a preliminary shuffle (vperm 64-bit) followed by unpack 256-bit.
static bool is256BitLaneRepeatedShuffleMask(MVT VT, ArrayRef< int > Mask, SmallVectorImpl< int > &RepeatedMask)
Test whether a shuffle mask is equivalent within each 256-bit lane.
static SDValue LowerMSCATTER(SDValue Op, const X86Subtarget &Subtarget, SelectionDAG &DAG)
static SDValue LowerShiftByScalarVariable(SDValue Op, SelectionDAG &DAG, const X86Subtarget &Subtarget)
static SDValue LowerSIGN_EXTEND_Mask(SDValue Op, const SDLoc &dl, const X86Subtarget &Subtarget, SelectionDAG &DAG)
static SDValue getVectorShuffle(SelectionDAG &DAG, EVT VT, const SDLoc &dl, SDValue V1, SDValue V2, ArrayRef< int > Mask)
static SDValue combineMaskedStore(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget &Subtarget)
static SDValue combineCommutableSHUFP(SDValue N, MVT VT, const SDLoc &DL, SelectionDAG &DAG)
static SDValue LowerUINT_TO_FP_i32(SDValue Op, const SDLoc &dl, SelectionDAG &DAG, const X86Subtarget &Subtarget)
32-bit unsigned integer to float expansion.
static SDValue combineAdd(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget &Subtarget)
static SDValue LowerTruncateVecI1(SDValue Op, const SDLoc &DL, SelectionDAG &DAG, const X86Subtarget &Subtarget)
static SDValue combineVTRUNC(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI)
static SDValue LowerLoad(SDValue Op, const X86Subtarget &Subtarget, SelectionDAG &DAG)
static cl::opt< int > ExperimentalPrefInnermostLoopAlignment("x86-experimental-pref-innermost-loop-alignment", cl::init(4), cl::desc("Sets the preferable loop alignment for experiments (as log2 bytes) " "for innermost loops only. If specified, this option overrides " "alignment set by x86-experimental-pref-loop-alignment."), cl::Hidden)
static SDValue createVariablePermute(MVT VT, SDValue SrcVec, SDValue IndicesVec, const SDLoc &DL, SelectionDAG &DAG, const X86Subtarget &Subtarget)
Look for opportunities to create a VPERMV/VPERMILPV/PSHUFB variable permute from a vector of source v...
static SDValue getHopForBuildVector(const BuildVectorSDNode *BV, const SDLoc &DL, SelectionDAG &DAG, unsigned HOpcode, SDValue V0, SDValue V1)
static SDValue LowerRotate(SDValue Op, const X86Subtarget &Subtarget, SelectionDAG &DAG)
static SDValue lowerShuffleAsSpecificZeroOrAnyExtend(const SDLoc &DL, MVT VT, int Scale, int Offset, bool AnyExt, SDValue InputV, ArrayRef< int > Mask, const X86Subtarget &Subtarget, SelectionDAG &DAG)
Lower a vector shuffle as a zero or any extension.
static bool needCarryOrOverflowFlag(SDValue Flags)
static SDValue combineCVTPH2PS(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI)
static SDValue getOnesVector(EVT VT, SelectionDAG &DAG, const SDLoc &dl)
Returns a vector of specified type with all bits set.
static SDValue combineMaskedLoad(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget &Subtarget)
static bool isUndefLowerHalf(ArrayRef< int > Mask)
Return true if the mask creates a vector whose lower half is undefined.
static SDValue LowerMULH(SDValue Op, const X86Subtarget &Subtarget, SelectionDAG &DAG)
static SDValue combineOrXorWithSETCC(unsigned Opc, const SDLoc &DL, EVT VT, SDValue N0, SDValue N1, SelectionDAG &DAG)
static SDValue combineRedundantDWordShuffle(SDValue N, MutableArrayRef< int > Mask, const SDLoc &DL, SelectionDAG &DAG)
Search for a combinable shuffle across a chain ending in pshufd.
static SDValue getBMIMatchingOp(unsigned Opc, SelectionDAG &DAG, SDValue OpMustEq, SDValue Op, unsigned Depth)
static SDValue getTargetVShiftByConstNode(unsigned Opc, const SDLoc &dl, MVT VT, SDValue SrcOp, uint64_t ShiftAmt, SelectionDAG &DAG)
Handle vector element shifts where the shift amount is a constant.
static SDValue getPack(SelectionDAG &DAG, const X86Subtarget &Subtarget, const SDLoc &dl, MVT VT, SDValue LHS, SDValue RHS, bool PackHiHalf=false)
Returns a node that packs the LHS + RHS nodes together at half width.
static SDValue combineMOVDQ2Q(SDNode *N, SelectionDAG &DAG)
static bool matchUnaryShuffle(MVT MaskVT, ArrayRef< int > Mask, bool AllowFloatDomain, bool AllowIntDomain, SDValue V1, const SelectionDAG &DAG, const X86Subtarget &Subtarget, unsigned &Shuffle, MVT &SrcVT, MVT &DstVT)
static bool isConstantPowerOf2(SDValue V, unsigned EltSizeInBIts, bool AllowUndefs)
static SDValue lowerFPToIntToFP(SDValue CastToFP, const SDLoc &DL, SelectionDAG &DAG, const X86Subtarget &Subtarget)
Given a scalar cast to FP with a cast to integer operand (almost an ftrunc), try to vectorize the cas...
static SDValue combineX86SubCmpForFlags(SDNode *N, SDValue Flag, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget &ST)
static bool getHalfShuffleMask(ArrayRef< int > Mask, MutableArrayRef< int > HalfMask, int &HalfIdx1, int &HalfIdx2)
If the input shuffle mask results in a vector that is undefined in all upper or lower half elements a...
static cl::opt< int > BrMergingBaseCostThresh("x86-br-merging-base-cost", cl::init(2), cl::desc("Sets the cost threshold for when multiple conditionals will be merged " "into one branch versus be split in multiple branches. Merging " "conditionals saves branches at the cost of additional instructions. " "This value sets the instruction cost limit, below which conditionals " "will be merged, and above which conditionals will be split. Set to -1 " "to never merge branches."), cl::Hidden)
static bool getFauxShuffleMask(SDValue N, const APInt &DemandedElts, SmallVectorImpl< int > &Mask, SmallVectorImpl< SDValue > &Ops, const SelectionDAG &DAG, unsigned Depth, bool ResolveKnownElts)
static bool hasBZHI(const X86Subtarget &Subtarget, MVT VT)
static SDValue emitLockedStackOp(SelectionDAG &DAG, const X86Subtarget &Subtarget, SDValue Chain, const SDLoc &DL)
Emit a locked operation on a stack location which does not change any memory location,...
static bool matchShuffleWithSHUFPD(MVT VT, SDValue &V1, SDValue &V2, bool &ForceV1Zero, bool &ForceV2Zero, unsigned &ShuffleImm, ArrayRef< int > Mask, const APInt &Zeroable)
static SDValue lowerV8F16Shuffle(const SDLoc &DL, ArrayRef< int > Mask, const APInt &Zeroable, SDValue V1, SDValue V2, const X86Subtarget &Subtarget, SelectionDAG &DAG)
Lower 8-lane 16-bit floating point shuffles.
static SDValue LowerFSINCOS(SDValue Op, const X86Subtarget &Subtarget, SelectionDAG &DAG)
static SDValue lowerAtomicArithWithLOCK(SDValue N, SelectionDAG &DAG, const X86Subtarget &Subtarget)
static SDValue lowerShuffleAsBitBlend(const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef< int > Mask, SelectionDAG &DAG)
Try to emit a blend instruction for a shuffle using bit math.
static SDValue reduceMaskedLoadToScalarLoad(MaskedLoadSDNode *ML, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget &Subtarget)
If exactly one element of the mask is set for a non-extending masked load, it is a scalar load and ve...
static SDValue lower1BitShuffleAsKSHIFTR(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, const X86Subtarget &Subtarget, SelectionDAG &DAG)
static SDValue combineVectorPack(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget &Subtarget)
static SDValue expandIntrinsicWChainHelper(SDNode *N, const SDLoc &DL, SelectionDAG &DAG, unsigned TargetOpcode, unsigned SrcReg, const X86Subtarget &Subtarget, SmallVectorImpl< SDValue > &Results)
Handles the lowering of builtin intrinsics with chain that return their value into registers EDX:EAX.
static bool matchUnaryPermuteShuffle(MVT MaskVT, ArrayRef< int > Mask, const APInt &Zeroable, bool AllowFloatDomain, bool AllowIntDomain, const SelectionDAG &DAG, const X86Subtarget &Subtarget, unsigned &Shuffle, MVT &ShuffleVT, unsigned &PermuteImm)
static bool shouldExpandCmpArithRMWInIR(AtomicRMWInst *AI)
static SDValue combineVSelectToBLENDV(SDNode *N, SelectionDAG &DAG, const SDLoc &DL, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget &Subtarget)
If this is a dynamic select (non-constant condition) and we can match this node with one of the varia...
static SDValue combineVectorCompareAndMaskUnaryOp(SDNode *N, SelectionDAG &DAG)
static SDValue LowerBuildVectorAsInsert(SDValue Op, const SDLoc &DL, const APInt &NonZeroMask, unsigned NumNonZero, unsigned NumZero, SelectionDAG &DAG, const X86Subtarget &Subtarget)
static bool isRepeatedTargetShuffleMask(unsigned LaneSizeInBits, unsigned EltSizeInBits, ArrayRef< int > Mask, SmallVectorImpl< int > &RepeatedMask)
Test whether a target shuffle mask is equivalent within each sub-lane.
static SDValue combineVectorSizedSetCCEquality(EVT VT, SDValue X, SDValue Y, ISD::CondCode CC, const SDLoc &DL, SelectionDAG &DAG, const X86Subtarget &Subtarget)
Try to map a 128-bit or larger integer comparison to vector instructions before type legalization spl...
static bool isLaneCrossingShuffleMask(unsigned LaneSizeInBits, unsigned ScalarSizeInBits, ArrayRef< int > Mask)
Test whether there are elements crossing LaneSizeInBits lanes in this shuffle mask.
static SDValue FixupMMXIntrinsicTypes(SDNode *N, SelectionDAG &DAG)
static SDValue getScatterNode(unsigned Opc, SDValue Op, SelectionDAG &DAG, SDValue Src, SDValue Mask, SDValue Base, SDValue Index, SDValue ScaleOp, SDValue Chain, const X86Subtarget &Subtarget)
static SDValue LowerAndToBT(SDValue And, ISD::CondCode CC, const SDLoc &dl, SelectionDAG &DAG, X86::CondCode &X86CC)
Result of 'and' is compared against zero.
static SDValue combinePMULH(SDValue Src, EVT VT, const SDLoc &DL, SelectionDAG &DAG, const X86Subtarget &Subtarget)
static SDValue lowerShuffleAsZeroOrAnyExtend(const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef< int > Mask, const APInt &Zeroable, const X86Subtarget &Subtarget, SelectionDAG &DAG)
Try to lower a vector shuffle as a zero extension on any microarch.
static SDValue combineBasicSADPattern(SDNode *Extract, SelectionDAG &DAG, const X86Subtarget &Subtarget)
static bool supportedVectorShiftWithBaseAmnt(EVT VT, const X86Subtarget &Subtarget, unsigned Opcode)
static SDValue combineVPMADD(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI)
static SDValue LowerMULO(SDValue Op, const X86Subtarget &Subtarget, SelectionDAG &DAG)
static SDValue lowerShuffleWithSHUFPD(const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef< int > Mask, const APInt &Zeroable, const X86Subtarget &Subtarget, SelectionDAG &DAG)
static SDValue combineBitOpWithShift(unsigned Opc, const SDLoc &DL, EVT VT, SDValue N0, SDValue N1, SelectionDAG &DAG)
static SDValue LowerHorizontalByteSum(SDValue V, MVT VT, const X86Subtarget &Subtarget, SelectionDAG &DAG)
Compute the horizontal sum of bytes in V for the elements of VT.
static SDValue LowerFP16_TO_FP(SDValue Op, SelectionDAG &DAG)
static SDValue lowerV32I16Shuffle(const SDLoc &DL, ArrayRef< int > Mask, const APInt &Zeroable, SDValue V1, SDValue V2, const X86Subtarget &Subtarget, SelectionDAG &DAG)
Handle lowering of 32-lane 16-bit integer shuffles.
static SDValue combineBitcastToBoolVector(EVT VT, SDValue V, const SDLoc &DL, SelectionDAG &DAG, const X86Subtarget &Subtarget, unsigned Depth=0)
static SDValue getPrefetchNode(unsigned Opc, SDValue Op, SelectionDAG &DAG, SDValue Mask, SDValue Base, SDValue Index, SDValue ScaleOp, SDValue Chain, const X86Subtarget &Subtarget)
static SDValue MarkEHGuard(SDValue Op, SelectionDAG &DAG)
static SDValue combineX86CloadCstore(SDNode *N, SelectionDAG &DAG)
static SDValue lowerShuffleWithEXPAND(const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef< int > Mask, const APInt &Zeroable, const X86Subtarget &Subtarget, SelectionDAG &DAG)
static void computeInLaneShuffleMask(const ArrayRef< int > &Mask, int LaneSize, SmallVector< int > &InLaneMask)
Helper to get compute inlane shuffle mask for a complete shuffle mask.
static SDValue lowerX86CmpEqZeroToCtlzSrl(SDValue Op, SelectionDAG &DAG)
static SDValue combineVectorInsert(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget &Subtarget)
static SDValue combineTESTP(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget &Subtarget)
static SDValue combineAnd(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget &Subtarget)
static SDValue getBROADCAST_LOAD(unsigned Opcode, const SDLoc &DL, EVT VT, EVT MemVT, MemSDNode *Mem, unsigned Offset, SelectionDAG &DAG)
static bool isUndefUpperHalf(ArrayRef< int > Mask)
Return true if the mask creates a vector whose upper half is undefined.
static SDValue LowerATOMIC_STORE(SDValue Op, SelectionDAG &DAG, const X86Subtarget &Subtarget)
uint64_t getGFNICtrlImm(unsigned Opcode, unsigned Amt=0)
static SDValue lowerShuffleWithPACK(const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef< int > Mask, const X86Subtarget &Subtarget, SelectionDAG &DAG)
static SDValue LowerShiftParts(SDValue Op, SelectionDAG &DAG)
Lower SRA_PARTS and friends, which return two i32 values and take a 2 x i32 value to shift plus a shi...
static SDValue combineFMulcFCMulc(SDNode *N, SelectionDAG &DAG, const X86Subtarget &Subtarget)
static bool canReduceVMulWidth(SDNode *N, SelectionDAG &DAG, ShrinkMode &Mode)
static std::pair< SDValue, SDValue > getX86XALUOOp(X86::CondCode &Cond, SDValue Op, SelectionDAG &DAG)
static SDValue combineVectorShiftVar(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget &Subtarget)
static SDValue LowerAVG(SDValue Op, const X86Subtarget &Subtarget, SelectionDAG &DAG)
static SDValue combineSetCC(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget &Subtarget)
static SDValue combineCompareEqual(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget &Subtarget)
Recognize the distinctive (AND (setcc ...) (setcc ..)) where both setccs reference the same FP CMP,...
static bool isVKClass(const TargetRegisterClass &RC)
Check if RC is a mask register class.
static int canLowerByDroppingElements(ArrayRef< int > Mask, bool MatchEven, bool IsSingleInput)
Check whether a compaction lowering can be done by dropping even/odd elements and compute how many ti...
static SDValue combineTruncatedArithmetic(SDNode *N, SelectionDAG &DAG, const X86Subtarget &Subtarget, const SDLoc &DL)
Attempt to pre-truncate inputs to arithmetic ops if it will simplify the codegen.
static SDValue lowerShuffleAsBroadcast(const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef< int > Mask, const X86Subtarget &Subtarget, SelectionDAG &DAG)
Try to lower broadcast of a single element.
static SDValue combineCVTP2I_CVTTP2I(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI)
static void resolveTargetShuffleInputsAndMask(SmallVectorImpl< SDValue > &Inputs, SmallVectorImpl< int > &Mask)
Removes unused/repeated shuffle source inputs and adjusts the shuffle mask.
static SDValue lowerV64I8Shuffle(const SDLoc &DL, ArrayRef< int > Mask, const APInt &Zeroable, SDValue V1, SDValue V2, const X86Subtarget &Subtarget, SelectionDAG &DAG)
Handle lowering of 64-lane 8-bit integer shuffles.
static SDValue combineBitOpWithMOVMSK(unsigned Opc, const SDLoc &DL, SDValue N0, SDValue N1, SelectionDAG &DAG)
static SDValue combineShuffleToAddSubOrFMAddSub(SDNode *N, const SDLoc &DL, const X86Subtarget &Subtarget, SelectionDAG &DAG)
Try to combine a shuffle into a target-specific add-sub or mul-add-sub node.
static SDValue lowerShuffleAsLanePermuteAndPermute(const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef< int > Mask, SelectionDAG &DAG, const X86Subtarget &Subtarget)
Lower a vector shuffle crossing multiple 128-bit lanes as a lane permutation followed by a per-lane p...
static SDValue lowerV8I16Shuffle(const SDLoc &DL, ArrayRef< int > Mask, const APInt &Zeroable, SDValue V1, SDValue V2, const X86Subtarget &Subtarget, SelectionDAG &DAG)
Generic lowering of 8-lane i16 shuffles.
static SDValue getEXTEND_VECTOR_INREG(unsigned Opcode, const SDLoc &DL, EVT VT, SDValue In, SelectionDAG &DAG)
static bool canonicalizeShuffleMaskWithCommute(ArrayRef< int > Mask)
Helper function that returns true if the shuffle mask should be commuted to improve canonicalization.
static bool matchAsm(StringRef S, ArrayRef< const char * > Pieces)
static SDValue lowerUINT_TO_FP_v2i32(SDValue Op, const SDLoc &DL, SelectionDAG &DAG, const X86Subtarget &Subtarget)
static SDValue getV4X86ShuffleImm8ForMask(ArrayRef< int > Mask, const SDLoc &DL, SelectionDAG &DAG)
static SDValue splitVSETCC(EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond, SelectionDAG &DAG, const SDLoc &dl)
Break a VSETCC 256/512-bit vector into two new 128/256 ones and then concatenate the result back.
static SDValue splitVectorStore(StoreSDNode *Store, SelectionDAG &DAG)
Change a vector store into a pair of half-size vector stores.
static SDValue widenSubVector(MVT VT, SDValue Vec, bool ZeroNewElements, const X86Subtarget &Subtarget, SelectionDAG &DAG, const SDLoc &dl)
Widen a vector to a larger size with the same scalar type, with the new elements either zero or undef...
static bool supportedVectorVarShift(EVT VT, const X86Subtarget &Subtarget, unsigned Opcode)
static bool isUndefInRange(ArrayRef< int > Mask, unsigned Pos, unsigned Size)
Return true if every element in Mask, beginning from position Pos and ending in Pos+Size is the undef...
static SDValue LowerToTLSGeneralDynamicModelX32(GlobalAddressSDNode *GA, SelectionDAG &DAG, const EVT PtrVT)
static SDValue MatchVectorAllEqualTest(SDValue LHS, SDValue RHS, ISD::CondCode CC, const SDLoc &DL, const X86Subtarget &Subtarget, SelectionDAG &DAG, X86::CondCode &X86CC)
static SDValue combineFAndn(SDNode *N, SelectionDAG &DAG, const X86Subtarget &Subtarget)
Do target-specific dag combines on X86ISD::FANDN nodes.
static SDValue LowerToTLSExecModel(GlobalAddressSDNode *GA, SelectionDAG &DAG, const EVT PtrVT, TLSModel::Model model, bool is64Bit, bool isPIC)
static SDValue foldMaskedMergeImpl(SDValue And0_L, SDValue And0_R, SDValue And1_L, SDValue And1_R, const SDLoc &DL, SelectionDAG &DAG)
static bool supportedVectorShiftWithImm(EVT VT, const X86Subtarget &Subtarget, unsigned Opcode)
static SDValue lower1BitShuffle(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, const APInt &Zeroable, const X86Subtarget &Subtarget, SelectionDAG &DAG)
static SDValue combineToExtendBoolVectorInReg(unsigned Opcode, const SDLoc &DL, EVT VT, SDValue N0, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget &Subtarget)
static SDValue splitVectorIntBinary(SDValue Op, SelectionDAG &DAG, const SDLoc &dl)
Break a binary integer operation into 2 half sized ops and then concatenate the result back.
static SDValue createMMXBuildVector(BuildVectorSDNode *BV, SelectionDAG &DAG, const X86Subtarget &Subtarget)
static LLVM_ATTRIBUTE_UNUSED bool isBlendOrUndef(ArrayRef< int > Mask)
Return true if every element in Mask, is an in-place blend/select mask or is undef.
static const char * getIndirectThunkSymbol(const X86Subtarget &Subtarget, unsigned Reg)
static SDValue LowerADDSUBO_CARRY(SDValue Op, SelectionDAG &DAG)
static unsigned getV4X86ShuffleImm(ArrayRef< int > Mask)
Get a 4-lane 8-bit shuffle immediate for a mask.
static SDValue combineShiftLeft(SDNode *N, SelectionDAG &DAG, const X86Subtarget &Subtarget)
static void resolveTargetShuffleFromZeroables(SmallVectorImpl< int > &Mask, const APInt &KnownUndef, const APInt &KnownZero, bool ResolveKnownZeros=true)
static SDValue LowerBUILD_VECTORvXi1(SDValue Op, const SDLoc &dl, SelectionDAG &DAG, const X86Subtarget &Subtarget)
static SDValue InsertBitToMaskVector(SDValue Op, SelectionDAG &DAG, const X86Subtarget &Subtarget)
Insert one bit to mask vector, like v16i1 or v8i1.
static SDValue getGatherNode(SDValue Op, SelectionDAG &DAG, SDValue Src, SDValue Mask, SDValue Base, SDValue Index, SDValue ScaleOp, SDValue Chain, const X86Subtarget &Subtarget)
static SDValue lowerShuffleAsLanePermuteAndRepeatedMask(const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef< int > Mask, const X86Subtarget &Subtarget, SelectionDAG &DAG)
Lower a vector shuffle by first fixing the 128-bit lanes and then shuffling each lane.
static bool isSoftF16(T VT, const X86Subtarget &Subtarget)
static SDValue lowerV16I32Shuffle(const SDLoc &DL, ArrayRef< int > Mask, const APInt &Zeroable, SDValue V1, SDValue V2, const X86Subtarget &Subtarget, SelectionDAG &DAG)
Handle lowering of 16-lane 32-bit integer shuffles.
static SDValue combineExtractVectorElt(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget &Subtarget)
Detect vector gather/scatter index generation and convert it from being a bunch of shuffles and extra...
static bool isSingleSHUFPSMask(ArrayRef< int > Mask)
Test whether this can be lowered with a single SHUFPS instruction.
static SDValue LowerFCanonicalize(SDValue Op, SelectionDAG &DAG)
static bool checkBoolTestAndOrSetCCCombine(SDValue Cond, X86::CondCode &CC0, X86::CondCode &CC1, SDValue &Flags, bool &isAnd)
Check whether Cond is an AND/OR of SETCCs off of the same EFLAGS.
static bool isX86LogicalCmp(SDValue Op)
Return true if opcode is a X86 logical comparison.
static bool isAnyInRange(ArrayRef< int > Mask, int Low, int Hi)
Return true if the value of any element in Mask falls within the specified range (L,...
static SDValue combineEXTRACT_SUBVECTOR(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget &Subtarget)
static cl::opt< bool > WidenShift("x86-widen-shift", cl::init(true), cl::desc("Replace narrow shifts with wider shifts."), cl::Hidden)
static SDValue combineSextInRegCmov(SDNode *N, SelectionDAG &DAG)
static SDValue PromoteMaskArithmetic(SDValue N, const SDLoc &DL, EVT VT, SelectionDAG &DAG, unsigned Depth)
static SDValue detectSSatPattern(SDValue In, EVT VT, bool MatchPackUS=false)
Detect patterns of truncation with signed saturation: (truncate (smin ((smax (x, signed_min_of_dest_t...
const unsigned FPStateSize
static bool matchShuffleWithUNPCK(MVT VT, SDValue &V1, SDValue &V2, unsigned &UnpackOpcode, bool IsUnary, ArrayRef< int > TargetMask, const SDLoc &DL, SelectionDAG &DAG, const X86Subtarget &Subtarget)
static SDValue combineFneg(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget &Subtarget)
Do target-specific dag combines on floating point negations.
static SDValue combineLoad(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget &Subtarget)
static SDValue combineToHorizontalAddSub(SDNode *N, SelectionDAG &DAG, const X86Subtarget &Subtarget)
static SDValue combineXorSubCTLZ(SDNode *N, const SDLoc &DL, SelectionDAG &DAG, const X86Subtarget &Subtarget)
static SDValue insertSubVector(SDValue Result, SDValue Vec, unsigned IdxVal, SelectionDAG &DAG, const SDLoc &dl, unsigned vectorWidth)
static bool isHopBuildVector(const BuildVectorSDNode *BV, SelectionDAG &DAG, unsigned &HOpcode, SDValue &V0, SDValue &V1)
static SDValue combineSelectOfTwoConstants(SDNode *N, SelectionDAG &DAG, const SDLoc &DL)
static SDValue combineFOr(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget &Subtarget)
Do target-specific dag combines on X86ISD::FOR and X86ISD::FXOR nodes.
static SDValue LowerINSERT_SUBVECTOR(SDValue Op, const X86Subtarget &Subtarget, SelectionDAG &DAG)
static SDValue combineINTRINSIC_VOID(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI)
static bool createShuffleMaskFromVSELECT(SmallVectorImpl< int > &Mask, SDValue Cond, bool IsBLENDV=false)
static bool checkBitcastSrcVectorSize(SDValue Src, unsigned Size, bool AllowTruncate)
static SDValue matchTruncateWithPACK(unsigned &PackOpcode, EVT DstVT, SDValue In, const SDLoc &DL, SelectionDAG &DAG, const X86Subtarget &Subtarget)
Helper to determine if In truncated to DstVT has the necessary signbits / leading zero bits to be tru...
static SDValue getMaskNode(SDValue Mask, MVT MaskVT, const X86Subtarget &Subtarget, SelectionDAG &DAG, const SDLoc &dl)
Return Mask with the necessary casting or extending for Mask according to MaskVT when lowering maskin...
static SDValue lowerV8F64Shuffle(const SDLoc &DL, ArrayRef< int > Mask, const APInt &Zeroable, SDValue V1, SDValue V2, const X86Subtarget &Subtarget, SelectionDAG &DAG)
Handle lowering of 8-lane 64-bit floating point shuffles.
static bool shouldUseHorizontalOp(bool IsSingleSource, SelectionDAG &DAG, const X86Subtarget &Subtarget)
Horizontal vector math instructions may be slower than normal math with shuffles.
static bool isFRClass(const TargetRegisterClass &RC)
Check if RC is a vector register class.
static SDValue splitAndLowerShuffle(const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef< int > Mask, SelectionDAG &DAG, bool SimpleOnly)
Generic routine to split vector shuffle into half-sized shuffles.
static SDValue LowerToTLSGeneralDynamicModel64(GlobalAddressSDNode *GA, SelectionDAG &DAG, const EVT PtrVT)
static SDValue IsNOT(SDValue V, SelectionDAG &DAG)
static SDValue combineFMinFMax(SDNode *N, SelectionDAG &DAG)
Do target-specific dag combines on X86ISD::FMIN and X86ISD::FMAX nodes.
static SDValue combineOr(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget &Subtarget)
static SDValue LowerMGATHER(SDValue Op, const X86Subtarget &Subtarget, SelectionDAG &DAG)
static SDValue EmitTest(SDValue Op, unsigned X86CC, const SDLoc &dl, SelectionDAG &DAG, const X86Subtarget &Subtarget)
Emit nodes that will be selected as "test Op0,Op0", or something equivalent.
static SDValue getVShift(bool isLeft, EVT VT, SDValue SrcOp, unsigned NumBits, SelectionDAG &DAG, const TargetLowering &TLI, const SDLoc &dl)
Return a vector logical shift node.
static bool isFreeToSplitVector(SDNode *N, SelectionDAG &DAG)
static SDValue combineVPDPBUSDPattern(SDNode *Extract, SelectionDAG &DAG, const X86Subtarget &Subtarget)
static SDValue LowerVACOPY(SDValue Op, const X86Subtarget &Subtarget, SelectionDAG &DAG)
static SDValue combineINTRINSIC_WO_CHAIN(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI)
static SDValue lowerV4I32Shuffle(const SDLoc &DL, ArrayRef< int > Mask, const APInt &Zeroable, SDValue V1, SDValue V2, const X86Subtarget &Subtarget, SelectionDAG &DAG)
Lower 4-lane i32 vector shuffles.
static SDValue combineX86ShuffleChain(ArrayRef< SDValue > Inputs, SDValue Root, ArrayRef< int > BaseMask, int Depth, bool HasVariableMask, bool AllowVariableCrossLaneMask, bool AllowVariablePerLaneMask, SelectionDAG &DAG, const X86Subtarget &Subtarget)
Combine an arbitrary chain of shuffles into a single instruction if possible.
static SDValue widenMaskVector(SDValue Vec, bool ZeroNewElements, const X86Subtarget &Subtarget, SelectionDAG &DAG, const SDLoc &dl)
Widen a mask vector to a minimum of v8i1/v16i1 to allow use of KSHIFT and bitcast with integer types.
static SDValue LowerAsSplatVectorLoad(SDValue SrcOp, MVT VT, const SDLoc &dl, SelectionDAG &DAG)
static bool isInRange(int Val, int Low, int Hi)
Return true if Val falls within the specified range (L, H].
static SDValue combineKSHIFT(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI)
static SDValue combineTargetShuffle(SDValue N, const SDLoc &DL, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget &Subtarget)
Try to combine x86 target specific shuffles.
static SDValue LowerSIGN_EXTEND(SDValue Op, const X86Subtarget &Subtarget, SelectionDAG &DAG)
static std::pair< SDValue, SDValue > splitVector(SDValue Op, SelectionDAG &DAG, const SDLoc &dl)
static SDValue getBT(SDValue Src, SDValue BitNo, const SDLoc &DL, SelectionDAG &DAG)
Helper for attempting to create a X86ISD::BT node.
static SDValue EmitTruncSStore(bool SignedSat, SDValue Chain, const SDLoc &DL, SDValue Val, SDValue Ptr, EVT MemVT, MachineMemOperand *MMO, SelectionDAG &DAG)
Emit Truncating Store with signed or unsigned saturation.
static SDValue ExtendToType(SDValue InOp, MVT NVT, SelectionDAG &DAG, bool FillWithZeroes=false)
Widen a vector input to a vector of NVT.
static void getHorizDemandedElts(EVT VT, const APInt &DemandedElts, APInt &DemandedLHS, APInt &DemandedRHS)
static SDValue LowerMSTORE(SDValue Op, const X86Subtarget &Subtarget, SelectionDAG &DAG)
static SDValue LowerADDRSPACECAST(SDValue Op, SelectionDAG &DAG)
static SDValue combineFMA(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget &Subtarget)
static SDValue lowerShuffleAsBlendAndPermute(const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef< int > Mask, SelectionDAG &DAG, bool ImmBlends=false)
Try to lower as a blend of elements from two inputs followed by a single-input permutation.
static bool matchShuffleAsEXTRQ(MVT VT, SDValue &V1, SDValue &V2, ArrayRef< int > Mask, uint64_t &BitLen, uint64_t &BitIdx, const APInt &Zeroable)
const unsigned X87StateSize
static SDValue lowerV8I64Shuffle(const SDLoc &DL, ArrayRef< int > Mask, const APInt &Zeroable, SDValue V1, SDValue V2, const X86Subtarget &Subtarget, SelectionDAG &DAG)
Handle lowering of 8-lane 64-bit integer shuffles.
static bool isLegalConversion(MVT VT, bool IsSigned, const X86Subtarget &Subtarget)
static bool isUndefOrEqual(int Val, int CmpVal)
Val is the undef sentinel value or equal to the specified value.
static SDValue lowerShuffleAsVTRUNC(const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef< int > Mask, const APInt &Zeroable, const X86Subtarget &Subtarget, SelectionDAG &DAG)
static bool isTargetShuffle(unsigned Opcode)
static bool isSingleElementRepeatedMask(ArrayRef< int > Mask)
Check if the Mask consists of the same element repeated multiple times.
static SDValue LowerCVTPS2PH(SDValue Op, SelectionDAG &DAG)
static SDValue combineShiftRightArithmetic(SDNode *N, SelectionDAG &DAG, const X86Subtarget &Subtarget)
static SDValue combinePMULDQ(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget &Subtarget)
static SDValue LowerIntVSETCC_AVX512(SDValue Op, const SDLoc &dl, SelectionDAG &DAG)
static SDValue lowerShuffleWithSSE4A(const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef< int > Mask, const APInt &Zeroable, SelectionDAG &DAG)
Try to lower a vector shuffle using SSE4a EXTRQ/INSERTQ.
static SDValue lowerShuffleOfExtractsAsVperm(const SDLoc &DL, SDValue N0, SDValue N1, ArrayRef< int > Mask, SelectionDAG &DAG)
If we are extracting two 128-bit halves of a vector and shuffling the result, match that to a 256-bit...
static SDValue lowerV4F64Shuffle(const SDLoc &DL, ArrayRef< int > Mask, const APInt &Zeroable, SDValue V1, SDValue V2, const X86Subtarget &Subtarget, SelectionDAG &DAG)
Handle lowering of 4-lane 64-bit floating point shuffles.
static SDValue getAVX512Node(unsigned Opcode, const SDLoc &DL, MVT VT, ArrayRef< SDValue > Ops, SelectionDAG &DAG, const X86Subtarget &Subtarget)
static SDValue lowerToAddSubOrFMAddSub(const BuildVectorSDNode *BV, const SDLoc &DL, const X86Subtarget &Subtarget, SelectionDAG &DAG)
Try to fold a build_vector that performs an 'addsub' or 'fmaddsub' or 'fsubadd' operation accordingly...
static SDValue lowerV8I16GeneralSingleInputShuffle(const SDLoc &DL, MVT VT, SDValue V, MutableArrayRef< int > Mask, const X86Subtarget &Subtarget, SelectionDAG &DAG)
Lowering of single-input v8i16 shuffles is the cornerstone of SSE2 shuffle lowering,...
static SDValue foldXorTruncShiftIntoCmp(SDNode *N, SelectionDAG &DAG)
Try to turn tests against the signbit in the form of: XOR(TRUNCATE(SRL(X, size(X)-1)),...
static SDValue combineX86ShufflesConstants(MVT VT, ArrayRef< SDValue > Ops, ArrayRef< int > Mask, bool HasVariableMask, SelectionDAG &DAG, const SDLoc &DL, const X86Subtarget &Subtarget)
static SDValue lowerV2F64Shuffle(const SDLoc &DL, ArrayRef< int > Mask, const APInt &Zeroable, SDValue V1, SDValue V2, const X86Subtarget &Subtarget, SelectionDAG &DAG)
Handle lowering of 2-lane 64-bit floating point shuffles.
static SDValue isUpperSubvectorUndef(SDValue V, const SDLoc &DL, SelectionDAG &DAG)
static cl::opt< int > BrMergingLikelyBias("x86-br-merging-likely-bias", cl::init(0), cl::desc("Increases 'x86-br-merging-base-cost' in cases that it is likely " "that all conditionals will be executed. For example for merging " "the conditionals (a == b && c > d), if its known that a == b is " "likely, then it is likely that if the conditionals are split " "both sides will be executed, so it may be desirable to increase " "the instruction cost threshold. Set to -1 to never merge likely " "branches."), cl::Hidden)
static bool clobbersFlagRegisters(const SmallVector< StringRef, 4 > &AsmPieces)
static SDValue getInvertedVectorForFMA(SDValue V, SelectionDAG &DAG)
static bool IsElementEquivalent(int MaskSize, SDValue Op, SDValue ExpectedOp, int Idx, int ExpectedIdx)
Checks whether the vector elements referenced by two shuffle masks are equivalent.
static int matchShuffleAsElementRotate(SDValue &V1, SDValue &V2, ArrayRef< int > Mask)
Try to match a vector shuffle as an element rotation.
static SDValue combineVEXTRACT_STORE(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget &Subtarget)
static bool isUndefOrZeroOrInRange(int Val, int Low, int Hi)
Return true if Val is undef, zero or if its value falls within the specified range (L,...
static const Constant * getTargetConstantFromBasePtr(SDValue Ptr)
static SDValue signExtendBitcastSrcVector(SelectionDAG &DAG, EVT SExtVT, SDValue Src, const SDLoc &DL)
static SDValue lowerShuffleAsBlend(const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef< int > Original, const APInt &Zeroable, const X86Subtarget &Subtarget, SelectionDAG &DAG)
Try to emit a blend instruction for a shuffle.
static bool findEltLoadSrc(SDValue Elt, LoadSDNode *&Ld, int64_t &ByteOffset)
static SDValue combineConcatVectorOps(const SDLoc &DL, MVT VT, ArrayRef< SDValue > Ops, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget &Subtarget)
Helper that combines an array of subvector ops as if they were the operands of a ISD::CONCAT_VECTORS ...
static bool isUndefOrInRange(int Val, int Low, int Hi)
Return true if Val is undef or if its value falls within the specified range (L, H].
static SDValue combineAddOfPMADDWD(SelectionDAG &DAG, SDValue N0, SDValue N1, const SDLoc &DL, EVT VT)
static bool collectConcatOps(SDNode *N, SmallVectorImpl< SDValue > &Ops, SelectionDAG &DAG)
static SDValue truncateVectorWithPACK(unsigned Opcode, EVT DstVT, SDValue In, const SDLoc &DL, SelectionDAG &DAG, const X86Subtarget &Subtarget)
Helper to recursively truncate vector elements in half with PACKSS/PACKUS.
static SDValue LowerFGETSIGN(SDValue Op, SelectionDAG &DAG)
static SDValue combineSBB(SDNode *N, SelectionDAG &DAG)
static void computeKnownBitsForPMADDUBSW(SDValue LHS, SDValue RHS, KnownBits &Known, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth)
static SDValue combineSIntToFP(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget &Subtarget)
static std::pair< Value *, BitTestKind > FindSingleBitChange(Value *V)
static SDValue combineToFPTruncExtElt(SDNode *N, SelectionDAG &DAG)
If we are converting a value to floating-point, try to replace scalar truncate of an extracted vector...
static bool is128BitLaneCrossingShuffleMask(MVT VT, ArrayRef< int > Mask)
Test whether there are elements crossing 128-bit lanes in this shuffle mask.
static SDValue LowerI64IntToFP16(SDValue Op, const SDLoc &dl, SelectionDAG &DAG, const X86Subtarget &Subtarget)
static SDValue lowerV4I64Shuffle(const SDLoc &DL, ArrayRef< int > Mask, const APInt &Zeroable, SDValue V1, SDValue V2, const X86Subtarget &Subtarget, SelectionDAG &DAG)
Handle lowering of 4-lane 64-bit integer shuffles.
static SDValue EmitCmp(SDValue Op0, SDValue Op1, unsigned X86CC, const SDLoc &dl, SelectionDAG &DAG, const X86Subtarget &Subtarget)
Emit nodes that will be selected as "cmp Op0,Op1", or something equivalent.
static SDValue combinevXi1ConstantToInteger(SDValue Op, SelectionDAG &DAG)
const unsigned FPStateSizeInBits
static SDValue reduceMaskedStoreToScalarStore(MaskedStoreSDNode *MS, SelectionDAG &DAG, const X86Subtarget &Subtarget)
If exactly one element of the mask is set for a non-truncating masked store, it is a vector extract a...
static unsigned convertIntLogicToFPLogicOpcode(unsigned Opcode)
static SDValue lowerX86FPLogicOp(SDNode *N, SelectionDAG &DAG, const X86Subtarget &Subtarget)
static SDValue narrowExtractedVectorSelect(SDNode *Ext, const SDLoc &DL, SelectionDAG &DAG)
If we are extracting a subvector of a vector select and the select condition is composed of concatena...
static SDValue combineScalarAndWithMaskSetcc(SDNode *N, SelectionDAG &DAG, const X86Subtarget &Subtarget)
static SDValue lowerShuffleAsLanePermuteAndSHUFP(const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef< int > Mask, SelectionDAG &DAG)
static bool isNoopShuffleMask(ArrayRef< int > Mask)
Tiny helper function to identify a no-op mask.
static SDValue getUnpackh(SelectionDAG &DAG, const SDLoc &dl, EVT VT, SDValue V1, SDValue V2)
Returns a vector_shuffle node for an unpackh operation.
static SDValue combineExtractFromVectorLoad(SDNode *N, EVT VecVT, SDValue SrcVec, uint64_t Idx, const SDLoc &dl, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI)
static SDValue combineAndMaskToShift(SDNode *N, SelectionDAG &DAG, const X86Subtarget &Subtarget)
If this is a zero/all-bits result that is bitwise-anded with a low bits mask.
static SDValue lowerShuffleAsByteShiftMask(const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef< int > Mask, const APInt &Zeroable, const X86Subtarget &Subtarget, SelectionDAG &DAG)
Try to lower a vector shuffle as a byte shift sequence.
static SDValue combineFP_TO_xINT_SAT(SDNode *N, SelectionDAG &DAG, const X86Subtarget &Subtarget)
static SDValue combineX86ShuffleChainWithExtract(ArrayRef< SDValue > Inputs, SDValue Root, ArrayRef< int > BaseMask, int Depth, bool HasVariableMask, bool AllowVariableCrossLaneMask, bool AllowVariablePerLaneMask, SelectionDAG &DAG, const X86Subtarget &Subtarget)
static bool isTargetShuffleVariableMask(unsigned Opcode)
static bool isLogicOp(unsigned Opcode)
static SDValue lowerShuffleAsShift(const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef< int > Mask, const APInt &Zeroable, const X86Subtarget &Subtarget, SelectionDAG &DAG, bool BitwiseOnly)
static SDValue LowerBuildVectorv8i16(SDValue Op, const SDLoc &DL, const APInt &NonZeroMask, unsigned NumNonZero, unsigned NumZero, SelectionDAG &DAG, const X86Subtarget &Subtarget)
Custom lower build_vector of v8i16.
static bool matchBinaryShuffle(MVT MaskVT, ArrayRef< int > Mask, bool AllowFloatDomain, bool AllowIntDomain, SDValue &V1, SDValue &V2, const SDLoc &DL, SelectionDAG &DAG, const X86Subtarget &Subtarget, unsigned &Shuffle, MVT &SrcVT, MVT &DstVT, bool IsUnary)
static SDValue lowerShuffleAsUNPCKAndPermute(const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef< int > Mask, SelectionDAG &DAG)
Try to lower as an unpack of elements from two inputs followed by a single-input permutation.
static bool canScaleShuffleElements(ArrayRef< int > Mask, unsigned NumDstElts)
static SDValue LowerBITREVERSE_XOP(SDValue Op, SelectionDAG &DAG)
static SDValue getShuffleVectorZeroOrUndef(SDValue V2, int Idx, bool IsZero, const X86Subtarget &Subtarget, SelectionDAG &DAG)
Return a vector_shuffle of the specified vector of zero or undef vector.
static SDValue lowerBuildVectorAsBroadcast(BuildVectorSDNode *BVOp, const SDLoc &dl, const X86Subtarget &Subtarget, SelectionDAG &DAG)
Attempt to use the vbroadcast instruction to generate a splat value from a splat BUILD_VECTOR which u...
static SDValue combineMulToPMULDQ(SDNode *N, const SDLoc &DL, SelectionDAG &DAG, const X86Subtarget &Subtarget)
static SDValue combineAndNotOrIntoAndNotAnd(SDNode *N, SelectionDAG &DAG)
Folds (and X, (or Y, ~Z)) --> (and X, ~(and ~Y, Z)) This undoes the inverse fold performed in InstCom...
static SDValue LowerPARITY(SDValue Op, const X86Subtarget &Subtarget, SelectionDAG &DAG)
static SDValue lowerV16F32Shuffle(const SDLoc &DL, ArrayRef< int > Mask, const APInt &Zeroable, SDValue V1, SDValue V2, const X86Subtarget &Subtarget, SelectionDAG &DAG)
Handle lowering of 16-lane 32-bit floating point shuffles.
static SDValue LowerMINMAX(SDValue Op, const X86Subtarget &Subtarget, SelectionDAG &DAG)
static SDValue combineToExtendCMOV(SDNode *Extend, SelectionDAG &DAG)
static bool isHorizontalBinOp(unsigned HOpcode, SDValue &LHS, SDValue &RHS, SelectionDAG &DAG, const X86Subtarget &Subtarget, bool IsCommutative, SmallVectorImpl< int > &PostShuffleMask, bool ForceHorizOp)
Return 'true' if this vector operation is "horizontal" and return the operands for the horizontal ope...
static bool getTargetShuffleMaskIndices(SDValue MaskNode, unsigned MaskEltSizeInBits, SmallVectorImpl< uint64_t > &RawMask, APInt &UndefElts)
static SDValue promoteExtBeforeAdd(SDNode *Ext, SelectionDAG &DAG, const X86Subtarget &Subtarget)
sext(add_nsw(x, C)) --> add(sext(x), C_sext) zext(add_nuw(x, C)) --> add(zext(x), C_zext) Promoting a...
static const Constant * getTargetConstantFromNode(LoadSDNode *Load)
static SDValue canonicalizeBitSelect(SDNode *N, const SDLoc &DL, SelectionDAG &DAG, const X86Subtarget &Subtarget)
static bool canCombineAsMaskOperation(SDValue V, const X86Subtarget &Subtarget)
static SDValue lowerShuffleAsVALIGN(const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef< int > Mask, const APInt &Zeroable, const X86Subtarget &Subtarget, SelectionDAG &DAG)
Try to lower a vector shuffle as a dword/qword rotation.
static SDValue lowerVECTOR_COMPRESS(SDValue Op, const X86Subtarget &Subtarget, SelectionDAG &DAG)
static bool isProfitableToUseFlagOp(SDValue Op)
static SDValue LowerBITREVERSE(SDValue Op, const X86Subtarget &Subtarget, SelectionDAG &DAG)
static SDValue LowerFROUND(SDValue Op, SelectionDAG &DAG)
ISD::FROUND is defined to round to nearest with ties rounding away from 0.
static SDValue detectUSatPattern(SDValue In, EVT VT, SelectionDAG &DAG, const SDLoc &DL)
Detect patterns of truncation with unsigned saturation:
static SDValue narrowShuffle(ShuffleVectorSDNode *Shuf, SelectionDAG &DAG)
If we have a shuffle of AVX/AVX512 (256/512 bit) vectors that only uses the low half of each source v...
static X86::CondCode TranslateX86CC(ISD::CondCode SetCCOpcode, const SDLoc &DL, bool isFP, SDValue &LHS, SDValue &RHS, SelectionDAG &DAG)
Do a one-to-one translation of a ISD::CondCode to the X86-specific condition code,...
static int matchShuffleAsShift(MVT &ShiftVT, unsigned &Opcode, unsigned ScalarSizeInBits, ArrayRef< int > Mask, int MaskOffset, const APInt &Zeroable, const X86Subtarget &Subtarget)
Try to lower a vector shuffle as a bit shift (shifts in zeros).
static SDValue getFlagsOfCmpZeroFori1(SelectionDAG &DAG, const SDLoc &DL, SDValue Mask)
static SDValue lower512BitShuffle(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, const APInt &Zeroable, const X86Subtarget &Subtarget, SelectionDAG &DAG)
High-level routine to lower various 512-bit x86 vector shuffles.
static SDValue LowerBuildVectorv16i8(SDValue Op, const SDLoc &DL, const APInt &NonZeroMask, unsigned NumNonZero, unsigned NumZero, SelectionDAG &DAG, const X86Subtarget &Subtarget)
Custom lower build_vector of v16i8.
static bool getTargetConstantBitsFromNode(SDValue Op, unsigned EltSizeInBits, APInt &UndefElts, SmallVectorImpl< APInt > &EltBits, bool AllowWholeUndefs=true, bool AllowPartialUndefs=false)
static bool detectExtMul(SelectionDAG &DAG, const SDValue &Mul, SDValue &Op0, SDValue &Op1)
static SDValue combineVSelectWithAllOnesOrZeros(SDNode *N, SelectionDAG &DAG, const SDLoc &DL, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget &Subtarget)
If a vector select has an operand that is -1 or 0, try to simplify the select to a bitwise logic oper...
static SDValue combineMOVMSK(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget &Subtarget)
static SDValue combineLRINT_LLRINT(SDNode *N, SelectionDAG &DAG, const X86Subtarget &Subtarget)
static SDValue lowerAddSubToHorizontalOp(SDValue Op, const SDLoc &DL, SelectionDAG &DAG, const X86Subtarget &Subtarget)
Depending on uarch and/or optimizing for size, we might prefer to use a vector operation in place of ...
static SDValue combineShiftToPMULH(SDNode *N, SelectionDAG &DAG, const SDLoc &DL, const X86Subtarget &Subtarget)
static bool getParamsForOneTrueMaskedElt(MaskedLoadStoreSDNode *MaskedOp, SelectionDAG &DAG, SDValue &Addr, SDValue &Index, Align &Alignment, unsigned &Offset)
Given a masked memory load/store operation, return true if it has one mask bit set.
static SDValue reduceVMULWidth(SDNode *N, const SDLoc &DL, SelectionDAG &DAG, const X86Subtarget &Subtarget)
When the operands of vector mul are extended from smaller size values, like i8 and i16,...
static X86::CondCode TranslateIntegerX86CC(ISD::CondCode SetCCOpcode)
static SDValue MarkEHRegistrationNode(SDValue Op, SelectionDAG &DAG)
static SDValue combineBROADCAST_LOAD(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI)
static SDValue combineFMinNumFMaxNum(SDNode *N, SelectionDAG &DAG, const X86Subtarget &Subtarget)
static SDValue combineCMP(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget &Subtarget)
static bool isFMAddSubOrFMSubAdd(const X86Subtarget &Subtarget, SelectionDAG &DAG, SDValue &Opnd0, SDValue &Opnd1, SDValue &Opnd2, unsigned ExpectedUses)
Returns true if is possible to fold MUL and an idiom that has already been recognized as ADDSUB/SUBAD...
static SDValue combineX86AddSub(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget &ST)
static SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG)
static SDValue createVPDPBUSD(SelectionDAG &DAG, SDValue LHS, SDValue RHS, unsigned &LogBias, const SDLoc &DL, const X86Subtarget &Subtarget)
static SDValue LowerFMINIMUM_FMAXIMUM(SDValue Op, const X86Subtarget &Subtarget, SelectionDAG &DAG)
static SDValue lowerV2X128Shuffle(const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef< int > Mask, const APInt &Zeroable, const X86Subtarget &Subtarget, SelectionDAG &DAG)
Handle lowering 2-lane 128-bit shuffles.
static SDValue lowerUINT_TO_FP_vec(SDValue Op, const SDLoc &dl, SelectionDAG &DAG, const X86Subtarget &Subtarget)
static SDValue getSplitVectorSrc(SDValue LHS, SDValue RHS, bool AllowCommute)
static SDValue LowerFABSorFNEG(SDValue Op, SelectionDAG &DAG)
The only differences between FABS and FNEG are the mask and the logic op.
ShrinkMode
Different mul shrinking modes.
static SDValue concatSubVectors(SDValue V1, SDValue V2, SelectionDAG &DAG, const SDLoc &dl)
static SDValue combineINTRINSIC_W_CHAIN(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI)
static SDValue canonicalizeShuffleMaskWithHorizOp(MutableArrayRef< SDValue > Ops, MutableArrayRef< int > Mask, unsigned RootSizeInBits, const SDLoc &DL, SelectionDAG &DAG, const X86Subtarget &Subtarget)
static SDValue combineConstantPoolLoads(SDNode *N, const SDLoc &dl, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget &Subtarget)
static void computeZeroableShuffleElements(ArrayRef< int > Mask, SDValue V1, SDValue V2, APInt &KnownUndef, APInt &KnownZero)
Compute whether each element of a shuffle is zeroable.
static SDValue EmitMaskedTruncSStore(bool SignedSat, SDValue Chain, const SDLoc &DL, SDValue Val, SDValue Ptr, SDValue Mask, EVT MemVT, MachineMemOperand *MMO, SelectionDAG &DAG)
Emit Masked Truncating Store with signed or unsigned saturation.
static SDValue lowerVSELECTtoVectorShuffle(SDValue Op, const X86Subtarget &Subtarget, SelectionDAG &DAG)
Try to lower a VSELECT instruction to a vector shuffle.
static bool matchShuffleAsBlend(MVT VT, SDValue V1, SDValue V2, MutableArrayRef< int > Mask, const APInt &Zeroable, bool &ForceV1Zero, bool &ForceV2Zero, uint64_t &BlendMask)
static SDValue adjustBitcastSrcVectorSSE1(SelectionDAG &DAG, SDValue Src, const SDLoc &DL)
static SDValue LowerAVXCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG, const X86Subtarget &Subtarget)
static SDValue combineMulSpecial(uint64_t MulAmt, SDNode *N, SelectionDAG &DAG, EVT VT, const SDLoc &DL)
static SDValue combineBitcastvxi1(SelectionDAG &DAG, EVT VT, SDValue Src, const SDLoc &DL, const X86Subtarget &Subtarget)
static SDValue getUnpackl(SelectionDAG &DAG, const SDLoc &dl, EVT VT, SDValue V1, SDValue V2)
Returns a vector_shuffle node for an unpackl operation.
static SDValue getScalarValueForVectorElement(SDValue V, int Idx, SelectionDAG &DAG)
Try to get a scalar value for a specific element of a vector.
static SDValue LowerZERO_EXTEND_Mask(SDValue Op, const SDLoc &DL, const X86Subtarget &Subtarget, SelectionDAG &DAG)
static unsigned getOpcodeForIndirectThunk(unsigned RPOpc)
static SDValue lowerV16I8Shuffle(const SDLoc &DL, ArrayRef< int > Mask, const APInt &Zeroable, SDValue V1, SDValue V2, const X86Subtarget &Subtarget, SelectionDAG &DAG)
Generic lowering of v16i8 shuffles.
static unsigned getSHUFPDImm(ArrayRef< int > Mask)
static bool isNullFPScalarOrVectorConst(SDValue V)
static bool hasIdenticalHalvesShuffleMask(ArrayRef< int > Mask)
Return true if a shuffle mask chooses elements identically in its top and bottom halves.
static bool matchShuffleWithPACK(MVT VT, MVT &SrcVT, SDValue &V1, SDValue &V2, unsigned &PackOpcode, ArrayRef< int > TargetMask, const SelectionDAG &DAG, const X86Subtarget &Subtarget, unsigned MaxStages=1)
static SDValue combineVectorCompare(SDNode *N, SelectionDAG &DAG, const X86Subtarget &Subtarget)
static bool matchShuffleAsVTRUNC(MVT &SrcVT, MVT &DstVT, MVT VT, ArrayRef< int > Mask, const APInt &Zeroable, const X86Subtarget &Subtarget)
static SDValue combineBITREVERSE(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget &Subtarget)
static SDValue combineArithReduction(SDNode *ExtElt, SelectionDAG &DAG, const X86Subtarget &Subtarget)
Try to convert a vector reduction sequence composed of binops and shuffles into horizontal ops.
static SDValue combineINSERT_SUBVECTOR(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget &Subtarget)
static SDValue lowerShuffleAsBitRotate(const SDLoc &DL, MVT VT, SDValue V1, ArrayRef< int > Mask, const X86Subtarget &Subtarget, SelectionDAG &DAG)
Lower shuffle using X86ISD::VROTLI rotations.
static SDValue lowerShuffleAsDecomposedShuffleMerge(const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef< int > Mask, const APInt &Zeroable, const X86Subtarget &Subtarget, SelectionDAG &DAG)
Generic routine to decompose a shuffle and blend into independent blends and permutes.
static SDValue combineEXTEND_VECTOR_INREG(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget &Subtarget)
static bool useVPTERNLOG(const X86Subtarget &Subtarget, MVT VT)
static SDValue combineBlendOfPermutes(MVT VT, SDValue N0, SDValue N1, ArrayRef< int > BlendMask, const APInt &DemandedElts, SelectionDAG &DAG, const X86Subtarget &Subtarget, const SDLoc &DL)
static SDValue combineSetCCAtomicArith(SDValue Cmp, X86::CondCode &CC, SelectionDAG &DAG, const X86Subtarget &Subtarget)
Combine: (brcond/cmov/setcc .., (cmp (atomic_load_add x, 1), 0), COND_S) to: (brcond/cmov/setcc ....
static SDValue combineSetCCEFLAGS(SDValue EFLAGS, X86::CondCode &CC, SelectionDAG &DAG, const X86Subtarget &Subtarget)
Optimize an EFLAGS definition used according to the condition code CC into a simpler EFLAGS value,...
static bool isBroadcastShuffleMask(ArrayRef< int > Mask)
static SDValue LowerEXTEND_VECTOR_INREG(SDValue Op, const X86Subtarget &Subtarget, SelectionDAG &DAG)
static SDValue combinePDEP(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI)
static SDValue combineExtSetcc(SDNode *N, SelectionDAG &DAG, const X86Subtarget &Subtarget)
static SDValue canonicalizeShuffleWithOp(SDValue N, SelectionDAG &DAG, const SDLoc &DL)
static SDValue matchPMADDWD_2(SelectionDAG &DAG, SDNode *N, const SDLoc &DL, EVT VT, const X86Subtarget &Subtarget)
static SDValue LowerVectorCTLZ(SDValue Op, const SDLoc &DL, const X86Subtarget &Subtarget, SelectionDAG &DAG)
static SDValue combineX86ShufflesRecursively(ArrayRef< SDValue > SrcOps, int SrcOpIndex, SDValue Root, ArrayRef< int > RootMask, ArrayRef< const SDNode * > SrcNodes, unsigned Depth, unsigned MaxDepth, bool HasVariableMask, bool AllowVariableCrossLaneMask, bool AllowVariablePerLaneMask, SelectionDAG &DAG, const X86Subtarget &Subtarget)
Fully generic combining of x86 shuffle instructions.
static SDValue lowerShuffleAsByteRotate(const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef< int > Mask, const X86Subtarget &Subtarget, SelectionDAG &DAG)
static StringRef getInstrStrFromOpNo(const SmallVectorImpl< StringRef > &AsmStrs, unsigned OpNo)
static bool isSequentialOrUndefOrZeroInRange(ArrayRef< int > Mask, unsigned Pos, unsigned Size, int Low, int Step=1)
Return true if every element in Mask, beginning from position Pos and ending in Pos+Size,...
static SDValue lowerShuffleAsSplitOrBlend(const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef< int > Mask, const APInt &Zeroable, const X86Subtarget &Subtarget, SelectionDAG &DAG)
Either split a vector in halves or decompose the shuffles and the blend/unpack.
static bool canWidenShuffleElements(ArrayRef< int > Mask, SmallVectorImpl< int > &WidenedMask)
Helper function to test whether a shuffle mask could be simplified by widening the elements being shu...
static SDValue splitVectorIntUnary(SDValue Op, SelectionDAG &DAG, const SDLoc &dl)
Break an unary integer operation into 2 half sized ops and then concatenate the result back.
static SDValue combineSext(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget &Subtarget)
static SDValue lowerV2I64Shuffle(const SDLoc &DL, ArrayRef< int > Mask, const APInt &Zeroable, SDValue V1, SDValue V2, const X86Subtarget &Subtarget, SelectionDAG &DAG)
Handle lowering of 2-lane 64-bit integer shuffles.
static SDValue LowerADDSAT_SUBSAT(SDValue Op, SelectionDAG &DAG, const X86Subtarget &Subtarget)
static SDValue combineLogicBlendIntoConditionalNegate(EVT VT, SDValue Mask, SDValue X, SDValue Y, const SDLoc &DL, SelectionDAG &DAG, const X86Subtarget &Subtarget)
static SDValue getShuffleScalarElt(SDValue Op, unsigned Index, SelectionDAG &DAG, unsigned Depth)
Returns the scalar element that will make up the i'th element of the result of the vector shuffle.
static unsigned getTargetVShiftUniformOpcode(unsigned Opc, bool IsVariable)
static SDValue foldXor1SetCC(SDNode *N, SelectionDAG &DAG)
Fold a xor(setcc cond, val), 1 --> setcc (inverted(cond), val)
static bool matchShuffleAsInsertPS(SDValue &V1, SDValue &V2, unsigned &InsertPSMask, const APInt &Zeroable, ArrayRef< int > Mask, SelectionDAG &DAG)
static bool isNonZeroElementsInOrder(const APInt &Zeroable, ArrayRef< int > Mask, const EVT &VectorType, bool &IsZeroSideLeft)
static SDValue lowerUINT_TO_FP_vXi32(SDValue Op, const SDLoc &DL, SelectionDAG &DAG, const X86Subtarget &Subtarget)
static SDValue combineMul(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget &Subtarget)
static SDValue emitOrXorXorTree(SDValue X, const SDLoc &DL, SelectionDAG &DAG, EVT VecVT, EVT CmpVT, bool HasPT, F SToV)
Recursive helper for combineVectorSizedSetCCEquality() to emit the memcmp expansion.
static SDValue truncateAVX512SetCCNoBWI(EVT VT, EVT OpVT, SDValue LHS, SDValue RHS, ISD::CondCode CC, const SDLoc &DL, SelectionDAG &DAG, const X86Subtarget &Subtarget)
If we have AVX512, but not BWI and this is a vXi16/vXi8 setcc, just pre-promote its result type since...
static int matchShuffleAsByteRotate(MVT VT, SDValue &V1, SDValue &V2, ArrayRef< int > Mask)
Try to lower a vector shuffle as a byte rotation.
static SDValue lowerShuffleAsPermuteAndUnpack(const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef< int > Mask, const X86Subtarget &Subtarget, SelectionDAG &DAG)
Try to lower a shuffle as a permute of the inputs followed by an UNPCK instruction.
static SDValue combineAndOrForCcmpCtest(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget &ST)
static SDValue narrowLoadToVZLoad(LoadSDNode *LN, MVT MemVT, MVT VT, SelectionDAG &DAG)
static SDValue scalarizeExtEltFP(SDNode *ExtElt, SelectionDAG &DAG, const X86Subtarget &Subtarget, TargetLowering::DAGCombinerInfo &DCI)
Extracting a scalar FP value from vector element 0 is free, so extract each operand first,...
static SDValue combineSetCCMOVMSK(SDValue EFLAGS, X86::CondCode &CC, SelectionDAG &DAG, const X86Subtarget &Subtarget)
static bool isAddSubOrSubAddMask(ArrayRef< int > Mask, bool &Op0Even)
Checks if the shuffle mask takes subsequent elements alternately from two vectors.
static bool isCompletePermute(ArrayRef< int > Mask)
Return true if every element of a single input is referenced by the shuffle mask.
static SDValue recoverFramePointer(SelectionDAG &DAG, const Function *Fn, SDValue EntryEBP)
When the MSVC runtime transfers control to us, either to an outlined function or when returning to a ...
static SDValue combineX86INT_TO_FP(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI)
static void getReadTimeStampCounter(SDNode *N, const SDLoc &DL, unsigned Opcode, SelectionDAG &DAG, const X86Subtarget &Subtarget, SmallVectorImpl< SDValue > &Results)
Handles the lowering of builtin intrinsics that read the time stamp counter (x86_rdtsc and x86_rdtscp...
static SDValue LowerShiftByScalarImmediate(SDValue Op, SelectionDAG &DAG, const X86Subtarget &Subtarget)
static SDValue LowerVectorAllEqual(const SDLoc &DL, SDValue LHS, SDValue RHS, ISD::CondCode CC, const APInt &OriginalMask, const X86Subtarget &Subtarget, SelectionDAG &DAG, X86::CondCode &X86CC)
static bool is128BitUnpackShuffleMask(ArrayRef< int > Mask, const SelectionDAG &DAG)
static bool isOrXorXorTree(SDValue X, bool Root=true)
Recursive helper for combineVectorSizedSetCCEquality() to see if we have a recognizable memcmp expans...
static SDValue LowerAVXExtend(SDValue Op, const SDLoc &dl, SelectionDAG &DAG, const X86Subtarget &Subtarget)
static SDValue LowerSCALAR_TO_VECTOR(SDValue Op, const X86Subtarget &Subtarget, SelectionDAG &DAG)
static SDValue combineTruncate(SDNode *N, SelectionDAG &DAG, const X86Subtarget &Subtarget)
static SDValue combineFAnd(SDNode *N, SelectionDAG &DAG, const X86Subtarget &Subtarget)
Do target-specific dag combines on X86ISD::FAND nodes.
static SDValue combineFaddCFmul(SDNode *N, SelectionDAG &DAG, const X86Subtarget &Subtarget)
static SDValue combineCONCAT_VECTORS(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget &Subtarget)
static ConstantPoolSDNode * getTargetConstantPoolFromBasePtr(SDValue Ptr)
static SDValue canonicalizeLaneShuffleWithRepeatedOps(SDValue V, SelectionDAG &DAG, const SDLoc &DL)
Attempt to fold vpermf128(op(),op()) -> op(vpermf128(),vpermf128()).
static bool isShuffleEquivalent(ArrayRef< int > Mask, ArrayRef< int > ExpectedMask, SDValue V1=SDValue(), SDValue V2=SDValue())
Checks whether a shuffle mask is equivalent to an explicit list of arguments.
static SDValue lowerV8F32Shuffle(const SDLoc &DL, ArrayRef< int > Mask, const APInt &Zeroable, SDValue V1, SDValue V2, const X86Subtarget &Subtarget, SelectionDAG &DAG)
Handle lowering of 8-lane 32-bit floating point shuffles.
static SDValue LowerI64IntToFP_AVX512DQ(SDValue Op, const SDLoc &dl, SelectionDAG &DAG, const X86Subtarget &Subtarget)
static SDValue LowerBUILD_VECTORAsVariablePermute(SDValue V, const SDLoc &DL, SelectionDAG &DAG, const X86Subtarget &Subtarget)
static SDValue lowerShuffleAsByteRotateAndPermute(const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef< int > Mask, const X86Subtarget &Subtarget, SelectionDAG &DAG)
Helper to form a PALIGNR-based rotate+permute, merging 2 inputs and then permuting the elements of th...
static SDValue combineX86GatherScatter(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI)
static SDValue combineVectorHADDSUB(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget &Subtarget)
static SDValue LowerVectorCTPOP(SDValue Op, const SDLoc &DL, const X86Subtarget &Subtarget, SelectionDAG &DAG)
static SDValue getAVX512TruncNode(const SDLoc &DL, MVT DstVT, SDValue Src, const X86Subtarget &Subtarget, SelectionDAG &DAG, bool ZeroUppers)
static void createPackShuffleMask(MVT VT, SmallVectorImpl< int > &Mask, bool Unary, unsigned NumStages=1)
Create a shuffle mask that matches the PACKSS/PACKUS truncation.
static bool isUndefOrEqualInRange(ArrayRef< int > Mask, int CmpVal, unsigned Pos, unsigned Size)
Return true if every element in Mask, beginning from position Pos and ending in Pos+Size is the undef...
static SDValue combineFaddFsub(SDNode *N, SelectionDAG &DAG, const X86Subtarget &Subtarget)
Do target-specific dag combines on floating-point adds/subs.
static SDValue LowerToTLSGeneralDynamicModel32(GlobalAddressSDNode *GA, SelectionDAG &DAG, const EVT PtrVT)
static SDValue splitVectorOp(SDValue Op, SelectionDAG &DAG, const SDLoc &dl)
Break an operation into 2 half sized ops and then concatenate the results.
static cl::opt< bool > MulConstantOptimization("mul-constant-optimization", cl::init(true), cl::desc("Replace 'mul x, Const' with more effective instructions like " "SHIFT, LEA, etc."), cl::Hidden)
static SDValue getIndexFromUnindexedLoad(LoadSDNode *Ld)
static bool isAnyZero(ArrayRef< int > Mask)
Return true if the value of any element in Mask is the zero sentinel value.
static SDValue combineShuffle(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget &Subtarget)
static SDValue truncateVectorWithPACKUS(EVT DstVT, SDValue In, const SDLoc &DL, const X86Subtarget &Subtarget, SelectionDAG &DAG)
Truncate using inreg zero extension (AND mask) and X86ISD::PACKUS.
static SDValue lowerINT_TO_FP_vXi64(SDValue Op, const SDLoc &DL, SelectionDAG &DAG, const X86Subtarget &Subtarget)
static void resolveZeroablesFromTargetShuffle(const SmallVectorImpl< int > &Mask, APInt &KnownUndef, APInt &KnownZero)
static SDValue rebuildGatherScatter(MaskedGatherScatterSDNode *GorS, SDValue Index, SDValue Base, SDValue Scale, SelectionDAG &DAG)
static SDValue combineSubABS(EVT VT, const SDLoc &DL, SDValue N0, SDValue N1, SelectionDAG &DAG)
static SmallVector< int, 4 > getPSHUFShuffleMask(SDValue N)
Get the PSHUF-style mask from PSHUF node.
static SDValue scalarizeVectorStore(StoreSDNode *Store, MVT StoreVT, SelectionDAG &DAG)
Scalarize a vector store, bitcasting to TargetVT to determine the scalar type.
static SDValue LowerBUILD_VECTORvXbf16(SDValue Op, SelectionDAG &DAG, const X86Subtarget &Subtarget)
static SDValue combineShuffleToFMAddSub(SDNode *N, const SDLoc &DL, const X86Subtarget &Subtarget, SelectionDAG &DAG)
Combine shuffle of two fma nodes into FMAddSub or FMSubAdd.
static SDValue getAVX2GatherNode(unsigned Opc, SDValue Op, SelectionDAG &DAG, SDValue Src, SDValue Mask, SDValue Base, SDValue Index, SDValue ScaleOp, SDValue Chain, const X86Subtarget &Subtarget)
static SDValue lowerShufflePairAsUNPCKAndPermute(const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef< int > Mask, SelectionDAG &DAG)
static bool isUndefOrZero(int Val)
Val is either the undef or zero sentinel value.
SDValue SplitOpsAndApply(SelectionDAG &DAG, const X86Subtarget &Subtarget, const SDLoc &DL, EVT VT, ArrayRef< SDValue > Ops, F Builder, bool CheckBWI=true)
static SDValue combineBEXTR(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget &Subtarget)
static SDValue combineFMADDSUB(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI)
static SDValue combineCMov(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget &Subtarget)
Optimize X86ISD::CMOV [LHS, RHS, CONDCODE (e.g. X86::COND_NE), CONDVAL].
static bool isEFLAGSLiveAfter(MachineBasicBlock::iterator Itr, MachineBasicBlock *BB)
static SDValue extract128BitVector(SDValue Vec, unsigned IdxVal, SelectionDAG &DAG, const SDLoc &dl)
Generate a DAG to grab 128-bits from a vector > 128 bits.
static SDValue EmitAVX512Test(SDValue Op0, SDValue Op1, ISD::CondCode CC, const SDLoc &dl, SelectionDAG &DAG, const X86Subtarget &Subtarget, SDValue &X86CC)
static SDValue lowerShuffleWithSHUFPS(const SDLoc &DL, MVT VT, ArrayRef< int > Mask, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower a vector shuffle using the SHUFPS instruction.
static SDValue combineStore(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget &Subtarget)
static SDValue combineX86SetCC(SDNode *N, SelectionDAG &DAG, const X86Subtarget &Subtarget)
static SDValue combineMinMaxReduction(SDNode *Extract, SelectionDAG &DAG, const X86Subtarget &Subtarget)
static LLVM_ATTRIBUTE_UNUSED bool isHorizOp(unsigned Opcode)
static SDValue combineHorizOpWithShuffle(SDNode *N, SelectionDAG &DAG, const X86Subtarget &Subtarget)
static SDValue LowerVectorCTLZ_AVX512CDI(SDValue Op, SelectionDAG &DAG, const X86Subtarget &Subtarget)
Lower a vector CTLZ using native supported vector CTLZ instruction.
static SDValue ExtractBitFromMaskVector(SDValue Op, SelectionDAG &DAG, const X86Subtarget &Subtarget)
Extract one bit from mask vector, like v16i1 or v8i1.
static SDValue LowervXi8MulWithUNPCK(SDValue A, SDValue B, const SDLoc &dl, MVT VT, bool IsSigned, const X86Subtarget &Subtarget, SelectionDAG &DAG, SDValue *Low=nullptr)
static SDValue lowerShuffleAsBlendOfPSHUFBs(const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef< int > Mask, const APInt &Zeroable, SelectionDAG &DAG, bool &V1InUse, bool &V2InUse)
Helper to form a PSHUFB-based shuffle+blend, opportunistically avoiding the blend if only one input i...
static bool matchShuffleAsINSERTQ(MVT VT, SDValue &V1, SDValue &V2, ArrayRef< int > Mask, uint64_t &BitLen, uint64_t &BitIdx)
static SDValue getBitSelect(const SDLoc &DL, MVT VT, SDValue LHS, SDValue RHS, SDValue Mask, SelectionDAG &DAG)
static SDValue combineAVG(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget &Subtarget)
static bool isSequentialOrUndefInRange(ArrayRef< int > Mask, unsigned Pos, unsigned Size, int Low, int Step=1)
Return true if every element in Mask, beginning from position Pos and ending in Pos + Size,...
static cl::opt< int > BrMergingUnlikelyBias("x86-br-merging-unlikely-bias", cl::init(-1), cl::desc("Decreases 'x86-br-merging-base-cost' in cases that it is unlikely " "that all conditionals will be executed. For example for merging " "the conditionals (a == b && c > d), if its known that a == b is " "unlikely, then it is unlikely that if the conditionals are split " "both sides will be executed, so it may be desirable to decrease " "the instruction cost threshold. Set to -1 to never merge unlikely " "branches."), cl::Hidden)
static SDValue createSetFPEnvNodes(SDValue Ptr, SDValue Chain, const SDLoc &DL, EVT MemVT, MachineMemOperand *MMO, SelectionDAG &DAG, const X86Subtarget &Subtarget)
static bool getTargetShuffleAndZeroables(SDValue N, SmallVectorImpl< int > &Mask, SmallVectorImpl< SDValue > &Ops, APInt &KnownUndef, APInt &KnownZero)
Decode a target shuffle mask and inputs and see if any values are known to be undef or zero from thei...
static SDValue combinePredicateReduction(SDNode *Extract, SelectionDAG &DAG, const X86Subtarget &Subtarget)
static SDValue LowerBuildVectorv4x32(SDValue Op, const SDLoc &DL, SelectionDAG &DAG, const X86Subtarget &Subtarget)
Custom lower build_vector of v4i32 or v4f32.
static bool isTargetShuffleEquivalent(MVT VT, ArrayRef< int > Mask, ArrayRef< int > ExpectedMask, const SelectionDAG &DAG, SDValue V1=SDValue(), SDValue V2=SDValue())
Checks whether a target shuffle mask is equivalent to an explicit pattern.
static SDValue foldMaskedMerge(SDNode *Node, SelectionDAG &DAG)
Fold "masked merge" expressions like (m & x) | (~m & y) into the equivalent ((x ^ y) & m) ^ y) patter...
static SDValue LowerABS(SDValue Op, const X86Subtarget &Subtarget, SelectionDAG &DAG)
static bool detectZextAbsDiff(const SDValue &Abs, SDValue &Op0, SDValue &Op1)
static SDValue pushAddIntoCmovOfConsts(SDNode *N, const SDLoc &DL, SelectionDAG &DAG, const X86Subtarget &Subtarget)
CMOV of constants requires materializing constant operands in registers.
static SDValue LowerCMP_SWAP(SDValue Op, const X86Subtarget &Subtarget, SelectionDAG &DAG)
static SDValue LowerToTLSLocalDynamicModel(GlobalAddressSDNode *GA, SelectionDAG &DAG, const EVT PtrVT, bool Is64Bit, bool Is64BitLP64)
static SDValue combineAndNotIntoANDNP(SDNode *N, SelectionDAG &DAG)
Try to fold: (and (xor X, -1), Y) -> (andnp X, Y).
static SDValue combineTruncateWithSat(SDValue In, EVT VT, const SDLoc &DL, SelectionDAG &DAG, const X86Subtarget &Subtarget)
static SDValue combineBT(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI)
static SDValue expandFP_TO_UINT_SSE(MVT VT, SDValue Src, const SDLoc &dl, SelectionDAG &DAG, const X86Subtarget &Subtarget)
static int getUnderlyingExtractedFromVec(SDValue &ExtractedFromVec, SDValue ExtIdx)
For an EXTRACT_VECTOR_ELT with a constant index return the real underlying vector and index.
static SDValue combineVectorShiftImm(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget &Subtarget)
static bool isUnaryOp(unsigned Opcode)
static bool isRepeatedShuffleMask(unsigned LaneSizeInBits, MVT VT, ArrayRef< int > Mask, SmallVectorImpl< int > &RepeatedMask)
Test whether a shuffle mask is equivalent within each sub-lane.
static SDValue combineBrCond(SDNode *N, SelectionDAG &DAG, const X86Subtarget &Subtarget)
Optimize branch condition evaluation.
static bool hasFPCMov(unsigned X86CC)
Is there a floating point cmov for the specific X86 condition code? Current x86 isa includes the foll...
static int getOneTrueElt(SDValue V)
If V is a build vector of boolean constants and exactly one of those constants is true,...
static SDValue lowerShuffleWithVPMOV(const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef< int > Mask, const APInt &Zeroable, const X86Subtarget &Subtarget, SelectionDAG &DAG)
static SDValue lowerShuffleWithUNPCK(const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef< int > Mask, SelectionDAG &DAG)
static constexpr int Concat[]
if(isa< SExtInst >(LHS)) std auto IsFreeTruncation
static const unsigned FramePtr
opStatus convert(const fltSemantics &ToSemantics, roundingMode RM, bool *losesInfo)
static APFloat getAllOnesValue(const fltSemantics &Semantics)
Returns a float which is bitcasted from an all one value int.
opStatus next(bool nextDown)
static APFloat getZero(const fltSemantics &Sem, bool Negative=false)
Factory for Positive and Negative Zero.
Class for arbitrary precision integers.
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
void clearBit(unsigned BitPosition)
Set a given bit to 0.
bool isNegatedPowerOf2() const
Check if this APInt's negated value is a power of two greater than zero.
APInt zext(unsigned width) const
Zero extend to a new width.
static APInt getSignMask(unsigned BitWidth)
Get the SignMask for a specific bit width.
bool isMinSignedValue() const
Determine if this is the smallest signed value.
uint64_t getZExtValue() const
Get zero extended value.
void setHighBits(unsigned hiBits)
Set the top hiBits bits.
unsigned popcount() const
Count the number of bits set.
void setBitsFrom(unsigned loBit)
Set the top bits starting from loBit.
uint64_t extractBitsAsZExtValue(unsigned numBits, unsigned bitPosition) const
APInt zextOrTrunc(unsigned width) const
Zero extend or truncate to width.
unsigned getActiveBits() const
Compute the number of active bits in the value.
APInt trunc(unsigned width) const
Truncate to new width.
static APInt getMaxValue(unsigned numBits)
Gets maximum unsigned value of APInt for specific bit width.
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
APInt abs() const
Get the absolute value.
bool isAllOnes() const
Determine if all bits are set. This is true for zero-width values.
static APInt getBitsSet(unsigned numBits, unsigned loBit, unsigned hiBit)
Get a value with a block of bits set.
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
bool isSignMask() const
Check if the APInt's value is returned by getSignMask.
APInt urem(const APInt &RHS) const
Unsigned remainder operation.
void setSignBit()
Set the sign bit to 1.
unsigned getBitWidth() const
Return the number of bits in the APInt.
bool ult(const APInt &RHS) const
Unsigned less than comparison.
static APInt getSignedMaxValue(unsigned numBits)
Gets maximum signed value of APInt for a specific bit width.
static APInt getMinValue(unsigned numBits)
Gets minimum unsigned value of APInt for a specific bit width.
bool isNegative() const
Determine sign of this APInt.
bool intersects(const APInt &RHS) const
This operation tests if there are any pairs of corresponding bits between this APInt and RHS that are...
bool eq(const APInt &RHS) const
Equality comparison.
int32_t exactLogBase2() const
void clearAllBits()
Set every bit to 0.
void ashrInPlace(unsigned ShiftAmt)
Arithmetic right-shift this APInt by ShiftAmt in place.
unsigned countr_zero() const
Count the number of trailing zero bits.
bool isSignedIntN(unsigned N) const
Check if this APInt has an N-bits signed integer value.
unsigned getNumSignBits() const
Computes the number of leading bits of this APInt that are equal to its sign bit.
unsigned countl_zero() const
The APInt version of std::countl_zero.
static APInt getSplat(unsigned NewLen, const APInt &V)
Return a value containing V broadcasted over NewLen bits.
static APInt getSignedMinValue(unsigned numBits)
Gets minimum signed value of APInt for a specific bit width.
unsigned getSignificantBits() const
Get the minimum bit size for this signed APInt.
void flipAllBits()
Toggle every bit to its opposite value.
unsigned countl_one() const
Count the number of leading one bits.
void insertBits(const APInt &SubBits, unsigned bitPosition)
Insert the bits from a smaller APInt starting at bitPosition.
void clearLowBits(unsigned loBits)
Set bottom loBits bits to 0.
unsigned logBase2() const
APInt ashr(unsigned ShiftAmt) const
Arithmetic right-shift function.
void setAllBits()
Set every bit to 1.
bool getBoolValue() const
Convert APInt to a boolean value.
bool isMask(unsigned numBits) const
bool isMaxSignedValue() const
Determine if this is the largest signed value.
bool isNonNegative() const
Determine if this APInt Value is non-negative (>= 0)
bool ule(const APInt &RHS) const
Unsigned less or equal comparison.
APInt sext(unsigned width) const
Sign extend to a new width.
void setBits(unsigned loBit, unsigned hiBit)
Set the bits from loBit (inclusive) to hiBit (exclusive) to 1.
APInt shl(unsigned shiftAmt) const
Left-shift function.
bool isSubsetOf(const APInt &RHS) const
This operation checks that all bits set in this APInt are also set in RHS.
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
bool isSignBitSet() const
Determine if sign bit of this APInt is set.
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
void setLowBits(unsigned loBits)
Set the bottom loBits bits.
APInt extractBits(unsigned numBits, unsigned bitPosition) const
Return an APInt with the extracted bits [bitPosition,bitPosition+numBits).
bool isIntN(unsigned N) const
Check if this APInt has an N-bits unsigned integer value.
bool isOne() const
Determine if this is a value of 1.
static APInt getBitsSetFrom(unsigned numBits, unsigned loBit)
Constructs an APInt value that has a contiguous range of bits set.
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
int64_t getSExtValue() const
Get sign extended value.
void lshrInPlace(unsigned ShiftAmt)
Logical right-shift this APInt by ShiftAmt in place.
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
unsigned countr_one() const
Count the number of trailing one bits.
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
bool isMaxValue() const
Determine if this is the largest unsigned value.
APInt truncSSat(unsigned width) const
Truncate to new width with signed saturation.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
ArrayRef< T > drop_back(size_t N=1) const
Drop the last N elements of the array.
bool empty() const
empty - Check if the array is empty.
ArrayRef< T > slice(size_t N, size_t M) const
slice(n, m) - Chop off the first N elements of the array, and keep M elements in the array.
static ArrayType * get(Type *ElementType, uint64_t NumElements)
This static method is the primary way to construct an ArrayType.
static AtomicOrdering getStrongestFailureOrdering(AtomicOrdering SuccessOrdering)
Returns the strongest permitted ordering on failure, given the desired ordering on success.
an instruction that atomically reads a memory location, combines it with another value,...
Align getAlign() const
Return the alignment of the memory that is being allocated by the instruction.
BinOp
This enumeration lists the possible modifications atomicrmw can make.
@ USubCond
Subtract only if no unsigned overflow.
@ Min
*p = old <signed v ? old : v
@ USubSat
*p = usub.sat(old, v) usub.sat matches the behavior of llvm.usub.sat.
@ UIncWrap
Increment one up to a maximum value.
@ Max
*p = old >signed v ? old : v
@ UMin
*p = old <unsigned v ? old : v
@ FMin
*p = minnum(old, v) minnum matches the behavior of llvm.minnum.
@ UMax
*p = old >unsigned v ? old : v
@ FMax
*p = maxnum(old, v) maxnum matches the behavior of llvm.maxnum.
@ UDecWrap
Decrement one until a minimum value or zero.
Value * getPointerOperand()
BinOp getOperation() const
SyncScope::ID getSyncScopeID() const
Returns the synchronization scope ID of this rmw instruction.
AtomicOrdering getOrdering() const
Returns the ordering constraint of this rmw instruction.
This is an SDNode representing atomic operations.
bool hasFnAttr(Attribute::AttrKind Kind) const
Return true if the attribute exists for the function.
StringRef getValueAsString() const
Return the attribute's value as a string.
LLVM Basic Block Representation.
size_type count() const
count - Returns the number of bits which are set.
bool none() const
none - Returns true if none of the bits are set.
The address of a basic block.
A "pseudo-class" with methods for operating on BUILD_VECTORs.
bool getRepeatedSequence(const APInt &DemandedElts, SmallVectorImpl< SDValue > &Sequence, BitVector *UndefElements=nullptr) const
Find the shortest repeating sequence of values in the build vector.
SDValue getSplatValue(const APInt &DemandedElts, BitVector *UndefElements=nullptr) const
Returns the demanded splatted value or a null value if this is not a splat.
bool isConstantSplat(APInt &SplatValue, APInt &SplatUndef, unsigned &SplatBitSize, bool &HasAnyUndefs, unsigned MinSplatBits=0, bool isBigEndian=false) const
Check if this is a constant splat, and if so, find the smallest element size that splats the vector.
ConstantSDNode * getConstantSplatNode(const APInt &DemandedElts, BitVector *UndefElements=nullptr) const
Returns the demanded splatted constant or null if this is not a constant splat.
Value * getCalledOperand() const
This class represents a function call, abstracting a target machine's calling convention.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
@ ICMP_SLT
signed less than
@ ICMP_SGT
signed greater than
Predicate getPredicate() const
Return the predicate for this instruction.
An abstraction over a floating-point predicate, and a pack of an integer predicate with samesign info...
static Constant * get(ArrayType *T, ArrayRef< Constant * > V)
static Constant * get(LLVMContext &Context, ArrayRef< uint8_t > Elts)
get() constructors - Return a constant with vector type with an element count and element type matchi...
This is the shared class of boolean and integer constants.
static bool isValueValidForType(Type *Ty, uint64_t V)
This static method returns true if the type Ty is big enough to represent the value V.
bool isMachineConstantPoolEntry() const
const Constant * getConstVal() const
uint64_t getZExtValue() const
const APInt & getAPIntValue() const
static Constant * get(ArrayRef< Constant * > V)
This is an important base class in LLVM.
static Constant * getIntegerValue(Type *Ty, const APInt &V)
Return the value for an integer or pointer constant, or a vector thereof, with the given scalar value...
static Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
Constant * getAggregateElement(unsigned Elt) const
For aggregates (struct/array/vector) return the constant that corresponds to the specified element if...
This class represents an Operation in the Expression.
uint64_t getNumOperands() const
A parsed version of the target data layout string in and methods for querying it.
TypeSize getTypeAllocSize(Type *Ty) const
Returns the offset in bytes between successive objects of the specified type, including alignment pad...
iterator find(const_arg_type_t< KeyT > Val)
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Tagged union holding either a T or a Error.
This is a fast-path instruction selection class that generates poor code and doesn't support illegal ...
static FixedVectorType * get(Type *ElementType, unsigned NumElts)
FunctionLoweringInfo - This contains information that is global to a function that is used when lower...
Type::subtype_iterator param_iterator
bool hasOptSize() const
Optimize this function for size (-Os) or minimum size (-Oz).
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
uint64_t getFnAttributeAsParsedInteger(StringRef Kind, uint64_t Default=0) const
For a string attribute Kind, parse attribute as an integer.
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
bool hasPersonalityFn() const
Check whether this function has a personality function.
Constant * getPersonalityFn() const
Get the personality function associated with this function.
AttributeList getAttributes() const
Return the attribute list for this Function.
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
int64_t getOffset() const
const GlobalValue * getGlobal() const
static StringRef dropLLVMManglingEscape(StringRef Name)
If the given string begins with the GlobalValue name mangling escape character '\1',...
bool isAbsoluteSymbolRef() const
Returns whether this is a reference to an absolute symbol.
ThreadLocalMode getThreadLocalMode() const
Module * getParent()
Get the module that this global value is contained inside of...
This instruction compares its operands according to the predicate given to the constructor.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
std::vector< ConstraintInfo > ConstraintInfoVector
InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Instruction * user_back()
Specialize the methods defined in Value, as we know that an instruction can only be used by other ins...
const Function * getFunction() const
Return the function this instruction belongs to.
Class to represent integer types.
unsigned getBitWidth() const
Get the number of bits in this IntegerType.
static bool LowerToByteSwap(CallInst *CI)
Try to replace a call instruction with a call to a bswap intrinsic.
This is an important class for using LLVM in a threaded context.
bool isIndexed() const
Return true if this is a pre/post inc/dec load/store.
An instruction for reading from memory.
void setAtomic(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Sets the ordering constraint and the synchronization scope ID of this load instruction.
This class is used to represent ISD::LOAD nodes.
const SDValue & getBasePtr() const
bool usesWindowsCFI() const
MCSymbol * getOrCreateParentFrameOffsetSymbol(const Twine &FuncName)
MCSymbol * getOrCreateLSDASymbol(const Twine &FuncName)
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
static MVT getFloatingPointVT(unsigned BitWidth)
bool is128BitVector() const
Return true if this is a 128-bit vector type.
@ INVALID_SIMPLE_VALUE_TYPE
uint64_t getScalarSizeInBits() const
MVT changeVectorElementType(MVT EltVT) const
Return a VT for a vector type whose attributes match ourselves with the exception of the element type...
bool bitsLE(MVT VT) const
Return true if this has no more bits than VT.
unsigned getVectorNumElements() const
bool isVector() const
Return true if this is a vector value type.
bool isInteger() const
Return true if this is an integer or a vector integer type.
bool isScalableVector() const
Return true if this is a vector value type where the runtime length is machine dependent.
bool is32BitVector() const
Return true if this is a 32-bit vector type.
MVT changeTypeToInteger()
Return the type converted to an equivalently sized integer or vector with integer element type.
static MVT getVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
bool bitsLT(MVT VT) const
Return true if this has less bits than VT.
bool is512BitVector() const
Return true if this is a 512-bit vector type.
static auto integer_valuetypes()
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
static auto fixedlen_vector_valuetypes()
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
const fltSemantics & getFltSemantics() const
Returns an APFloat semantics tag appropriate for the value type.
bool bitsGT(MVT VT) const
Return true if this has more bits than VT.
bool is256BitVector() const
Return true if this is a 256-bit vector type.
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
bool bitsGE(MVT VT) const
Return true if this has no less bits than VT.
bool isScalarInteger() const
Return true if this is an integer, not including vectors.
static MVT getVectorVT(MVT VT, unsigned NumElements)
MVT getVectorElementType() const
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
static MVT getIntegerVT(unsigned BitWidth)
MVT getDoubleNumVectorElementsVT() const
MVT getHalfNumVectorElementsVT() const
Return a VT for a vector type with the same element type but half the number of elements.
MVT getScalarType() const
If this is a vector, return the element type, otherwise return this.
bool is64BitVector() const
Return true if this is a 64-bit vector type.
MVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
bool isEHPad() const
Returns true if the block is a landing pad.
instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
void push_back(MachineInstr *MI)
void setCallFrameSize(unsigned N)
Set the call frame size on entry to this basic block.
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
unsigned succ_size() const
void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
void removeSuccessor(MachineBasicBlock *Succ, bool NormalizeSuccProbs=false)
Remove successor from the successors list of this MachineBasicBlock.
Instructions::iterator instr_iterator
succ_reverse_iterator succ_rbegin()
void eraseFromParent()
This method unlinks 'this' from the containing function and deletes it.
void addLiveIn(MCRegister PhysReg, LaneBitmask LaneMask=LaneBitmask::getAll())
Adds the specified register as a live in.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
instr_iterator erase(instr_iterator I)
Remove an instruction from the instruction list and delete it.
iterator_range< succ_iterator > successors()
iterator insertAfter(iterator I, MachineInstr *MI)
Insert MI into the instruction list after I.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
MachineInstrBundleIterator< MachineInstr > iterator
succ_reverse_iterator succ_rend()
void setMachineBlockAddressTaken()
Set this block to indicate that its address is used as something other than the target of a terminato...
bool isLiveIn(MCRegister Reg, LaneBitmask LaneMask=LaneBitmask::getAll()) const
Return true if the specified register is in the live in set.
void setIsEHPad(bool V=true)
Indicates the block is a landing pad.
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool IsImmutable, bool isAliased=false)
Create a new object at a fixed location on the stack.
void setAdjustsStack(bool V)
int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
void setFrameAddressIsTaken(bool T)
void setReturnAddressIsTaken(bool s)
void setHasCopyImplyingStackAdjustment(bool B)
bool isFixedObjectIndex(int ObjectIdx) const
Returns true if the specified index corresponds to a fixed stack object.
void setObjectAlignment(int ObjectIdx, Align Alignment)
setObjectAlignment - Change the alignment of the specified stack object.
int getFunctionContextIndex() const
Return the index for the function context object.
const WinEHFuncInfo * getWinEHFuncInfo() const
getWinEHFuncInfo - Return information about how the current function uses Windows exception handling.
void moveAdditionalCallInfo(const MachineInstr *Old, const MachineInstr *New)
Move the call site info from Old to \New call site info.
unsigned getFunctionNumber() const
getFunctionNumber - Return a unique ID for the current function.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
void push_back(MachineBasicBlock *MBB)
MCContext & getContext() const
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
bool shouldSplitStack() const
Should we be emitting segmented stack stuff for the function.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineBasicBlock - Allocate a new MachineBasicBlock.
void insert(iterator MBBI, MachineBasicBlock *MBB)
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
const MachineInstrBuilder & setMemRefs(ArrayRef< MachineMemOperand * > MMOs) const
const MachineInstrBuilder & addExternalSymbol(const char *FnName, unsigned TargetFlags=0) const
const MachineInstrBuilder & setMIFlag(MachineInstr::MIFlag Flag) const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addRegMask(const uint32_t *Mask) const
const MachineInstrBuilder & addGlobalAddress(const GlobalValue *GV, int64_t Offset=0, unsigned TargetFlags=0) const
const MachineInstrBuilder & addDisp(const MachineOperand &Disp, int64_t off, unsigned char TargetFlags=0) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addJumpTableIndex(unsigned Idx, unsigned TargetFlags=0) const
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
MachineInstr * getInstr() const
If conversion operators fail, use this method to get the MachineInstr explicitly.
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
Representation of each machine instruction.
bool killsRegister(Register Reg, const TargetRegisterInfo *TRI) const
Return true if the MachineInstr kills the specified register.
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
const MachineOperand & getOperand(unsigned i) const
unsigned createJumpTableIndex(const std::vector< MachineBasicBlock * > &DestBBs)
createJumpTableIndex - Create a new jump table.
@ EK_LabelDifference32
EK_LabelDifference32 - Each entry is the address of the block minus the address of the jump table.
@ EK_BlockAddress
EK_BlockAddress - Each entry is a plain address of block, e.g.: .word LBB123.
A description of a memory reference used in the backend.
Flags
Flags values. These may be or'd together.
@ MOVolatile
The memory access is volatile.
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
Flags getFlags() const
Return the raw flags of the source value,.
MachineOperand class - Representation of each machine instruction operand.
bool isReg() const
isReg - Tests if this is a MO_Register operand.
Register getReg() const
getReg - Returns the register number.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
An SDNode that represents everything that will be needed to construct a MachineInstr.
This class is used to represent an MGATHER node.
This is a base class used to represent MGATHER and MSCATTER nodes.
This class is used to represent an MLOAD node.
This base class is used to represent MLOAD and MSTORE nodes.
const SDValue & getMask() const
ISD::MemIndexedMode getAddressingMode() const
Return the addressing mode for this load or store: unindexed, pre-inc, pre-dec, post-inc,...
This class is used to represent an MSCATTER node.
This class is used to represent an MSTORE node.
bool isCompressingStore() const
Returns true if the op does a compression to the vector before storing.
const SDValue & getOffset() const
const SDValue & getBasePtr() const
const SDValue & getMask() const
const SDValue & getValue() const
bool isTruncatingStore() const
Return true if the op does a truncation before store.
This SDNode is used for target intrinsics that touch memory and need an associated MachineMemOperand.
This is an abstract virtual class for memory operations.
unsigned getAddressSpace() const
Return the address space for the associated pointer.
AAMDNodes getAAInfo() const
Returns the AA info that describes the dereference.
SyncScope::ID getSyncScopeID() const
Returns the synchronization scope ID for this memory operation.
Align getOriginalAlign() const
Returns alignment and volatility of the memory access.
bool isSimple() const
Returns true if the memory operation is neither atomic or volatile.
AtomicOrdering getSuccessOrdering() const
Return the atomic ordering requirements for this memory operation.
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
const SDValue & getBasePtr() const
const MachinePointerInfo & getPointerInfo() const
const SDValue & getChain() const
bool isNonTemporal() const
EVT getMemoryVT() const
Return the type of the in-memory value.
A Module instance is used to store all the information related to an LLVM module.
Metadata * getModuleFlag(StringRef Key) const
Return the corresponding value if Key appears in module flags, otherwise return null.
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
static PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space.
static PointerType * getUnqual(Type *ElementType)
This constructs a pointer to an object of the specified type in the default address space (address sp...
Wrapper class representing virtual and physical registers.
constexpr bool isValid() const
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
bool isStrictFPOpcode()
Test if this node is a strict floating point pseudo-op.
ArrayRef< SDUse > ops() const
const APInt & getAsAPIntVal() const
Helper method returns the APInt value of a ConstantSDNode.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
SDNode * getGluedUser() const
If this node has a glue value with a user, return the user (there is at most one).
bool hasOneUse() const
Return true if there is exactly one use of this node.
bool isOnlyUserOf(const SDNode *N) const
Return true if this node is the only use of N.
iterator_range< value_op_iterator > op_values() const
SDNodeFlags getFlags() const
MVT getSimpleValueType(unsigned ResNo) const
Return the type of a specified result as a simple type.
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
unsigned getNumValues() const
Return the number of values defined/returned by this operator.
unsigned getNumOperands() const
Return the number of values used by this operation.
SDVTList getVTList() const
const SDValue & getOperand(unsigned Num) const
uint64_t getConstantOperandVal(unsigned Num) const
Helper method returns the integer value of a ConstantSDNode operand.
static bool areOnlyUsersOf(ArrayRef< const SDNode * > Nodes, const SDNode *N)
Return true if all the users of N are contained in Nodes.
bool hasAnyUseOfValue(unsigned Value) const
Return true if there are any use of the indicated value.
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
bool isUndef() const
Return true if the type of the node type undefined.
iterator_range< user_iterator > users()
bool hasNUsesOfValue(unsigned NUses, unsigned Value) const
Return true if there are exactly NUSES uses of the indicated value.
void setFlags(SDNodeFlags NewFlags)
user_iterator user_begin() const
Provide iteration support to walk over all users of an SDNode.
op_iterator op_end() const
op_iterator op_begin() const
Represents a use of a SDNode.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
TypeSize getValueSizeInBits() const
Returns the size of the value in bits.
const SDValue & getOperand(unsigned i) const
const APInt & getConstantOperandAPInt(unsigned i) const
uint64_t getScalarValueSizeInBits() const
unsigned getResNo() const
get the index which selects a specific result in the SDNode
uint64_t getConstantOperandVal(unsigned i) const
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
unsigned getOpcode() const
unsigned getNumOperands() const
Targets can subclass this to parameterize the SelectionDAG lowering and instruction selection process...
virtual bool isTargetStrictFPOpcode(unsigned Opcode) const
Returns true if a node with the given target-specific opcode has strict floating-point semantics.
Help to insert SDNodeFlags automatically in transforming.
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
bool willNotOverflowAdd(bool IsSigned, SDValue N0, SDValue N1) const
Determine if the result of the addition of 2 nodes can never overflow.
static unsigned getOpcode_EXTEND_VECTOR_INREG(unsigned Opcode)
Convert *_EXTEND to *_EXTEND_VECTOR_INREG opcode.
SDValue getShiftAmountOperand(EVT LHSTy, SDValue Op)
Return the specified value casted to the target's desired shift amount type.
SDValue getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned TargetFlags=0)
SDValue getExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT, unsigned Opcode)
Convert Op, which must be of integer type, to the integer type VT, by either any/sign/zero-extending ...
SDValue getSplatSourceVector(SDValue V, int &SplatIndex)
If V is a splatted value, return the source vector and its splat index.
unsigned ComputeMaxSignificantBits(SDValue Op, unsigned Depth=0) const
Get the upper bound on bit size for this Value Op as a signed integer.
SDValue getMaskedGather(SDVTList VTs, EVT MemVT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType, ISD::LoadExtType ExtTy)
SDValue getAddrSpaceCast(const SDLoc &dl, EVT VT, SDValue Ptr, unsigned SrcAS, unsigned DestAS)
Return an AddrSpaceCastSDNode.
const TargetSubtargetInfo & getSubtarget() const
SDValue getCopyToReg(SDValue Chain, const SDLoc &dl, Register Reg, SDValue N)
SDValue getMergeValues(ArrayRef< SDValue > Ops, const SDLoc &dl)
Create a MERGE_VALUES node from the given operands.
SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
SDValue getShiftAmountConstant(uint64_t Val, EVT VT, const SDLoc &DL)
SDValue getAllOnesConstant(const SDLoc &DL, EVT VT, bool IsTarget=false, bool IsOpaque=false)
MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
SDValue getFreeze(SDValue V)
Return a freeze using the SDLoc of the value operand.
SDValue getConstantPool(const Constant *C, EVT VT, MaybeAlign Align=std::nullopt, int Offs=0, bool isT=false, unsigned TargetFlags=0)
SDValue makeEquivalentMemoryOrdering(SDValue OldChain, SDValue NewMemOpChain)
If an existing load has uses of its chain, create a token factor node with that chain and the new mem...
bool isConstantIntBuildVectorOrConstantInt(SDValue N, bool AllowOpaques=true) const
Test whether the given value is a constant int or similar node.
SDValue getJumpTableDebugInfo(int JTI, SDValue Chain, const SDLoc &DL)
SDValue getSetCC(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond, SDValue Chain=SDValue(), bool IsSignaling=false)
Helper function to make it easier to build SetCC's if you just have an ISD::CondCode instead of an SD...
SDValue getConstantFP(double Val, const SDLoc &DL, EVT VT, bool isTarget=false)
Create a ConstantFPSDNode wrapping a constant value.
SDValue getRegister(Register Reg, EVT VT)
SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands,...
SDValue getAtomic(unsigned Opcode, const SDLoc &dl, EVT MemVT, SDValue Chain, SDValue Ptr, SDValue Val, MachineMemOperand *MMO)
Gets a node for an atomic op, produces result (if relevant) and chain and takes 2 operands.
SDValue getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Src, SDValue Size, Align Alignment, bool isVol, bool AlwaysInline, const CallInst *CI, std::optional< bool > OverrideTailCall, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo, const AAMDNodes &AAInfo=AAMDNodes(), BatchAAResults *BatchAA=nullptr)
bool shouldOptForSize() const
std::pair< SDValue, SDValue > SplitVectorOperand(const SDNode *N, unsigned OpNo)
Split the node's operand with EXTRACT_SUBVECTOR and return the low/high part.
SDValue getNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a bitwise NOT operation as (XOR Val, -1).
const TargetLowering & getTargetLoweringInfo() const
bool isEqualTo(SDValue A, SDValue B) const
Test whether two SDValues are known to compare equal.
static constexpr unsigned MaxRecursionDepth
SDValue expandVACopy(SDNode *Node)
Expand the specified ISD::VACOPY node as the Legalize pass would.
std::pair< EVT, EVT > GetSplitDestVTs(const EVT &VT) const
Compute the VTs needed for the low/hi parts of a type which is split (or expanded) into two not neces...
SDValue getTargetJumpTable(int JTI, EVT VT, unsigned TargetFlags=0)
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
SDValue getCALLSEQ_END(SDValue Chain, SDValue Op1, SDValue Op2, SDValue InGlue, const SDLoc &DL)
Return a new CALLSEQ_END node, which always must have a glue result (to ensure it's not CSE'd).
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
bool isSplatValue(SDValue V, const APInt &DemandedElts, APInt &UndefElts, unsigned Depth=0) const
Test whether V has a splatted value for all the demanded elements.
SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type.
SDValue getCopyFromReg(SDValue Chain, const SDLoc &dl, Register Reg, EVT VT)
SDValue getSelect(const SDLoc &DL, EVT VT, SDValue Cond, SDValue LHS, SDValue RHS, SDNodeFlags Flags=SDNodeFlags())
Helper function to make it easier to build Select's if you just have operands and don't want to check...
SDValue getNegative(SDValue Val, const SDLoc &DL, EVT VT)
Create negative operation as (SUB 0, Val).
SDValue simplifySelect(SDValue Cond, SDValue TVal, SDValue FVal)
Try to simplify a select/vselect into 1 of its operands or a constant.
SDValue getZeroExtendInReg(SDValue Op, const SDLoc &DL, EVT VT)
Return the expression required to zero extend the Op value assuming it was the smaller SrcTy value.
const DataLayout & getDataLayout() const
SDValue expandVAArg(SDNode *Node)
Expand the specified ISD::VAARG node as the Legalize pass would.
bool doesNodeExist(unsigned Opcode, SDVTList VTList, ArrayRef< SDValue > Ops)
Check if a node exists without modifying its flags.
const SelectionDAGTargetInfo & getSelectionDAGInfo() const
bool areNonVolatileConsecutiveLoads(LoadSDNode *LD, LoadSDNode *Base, unsigned Bytes, int Dist) const
Return true if loads are next to each other and can be merged.
SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
SDValue getMemBasePlusOffset(SDValue Base, TypeSize Offset, const SDLoc &DL, const SDNodeFlags Flags=SDNodeFlags())
Returns sum of the base pointer and offset.
SDValue getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, EVT SVT, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
void ReplaceAllUsesWith(SDValue From, SDValue To)
Modify anything using 'From' to use 'To' instead.
SDValue getCommutedVectorShuffle(const ShuffleVectorSDNode &SV)
Returns an ISD::VECTOR_SHUFFLE node semantically equivalent to the shuffle node in input but with swa...
std::pair< SDValue, SDValue > SplitVector(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the vector with EXTRACT_SUBVECTOR using the provided VTs and return the low/high part.
bool isGuaranteedNotToBeUndefOrPoison(SDValue Op, bool PoisonOnly=false, unsigned Depth=0) const
Return true if this function can prove that Op is never poison and, if PoisonOnly is false,...
SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
SDValue getSignedConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
MaybeAlign InferPtrAlign(SDValue Ptr) const
Infer alignment of a load / store address.
SDValue getCALLSEQ_START(SDValue Chain, uint64_t InSize, uint64_t OutSize, const SDLoc &DL)
Return a new CALLSEQ_START node, that starts new call frame, in which InSize bytes are set up inside ...
bool SignBitIsZero(SDValue Op, unsigned Depth=0) const
Return true if the sign bit of Op is known to be zero.
SDValue getTargetExtractSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand)
A convenience function for creating TargetInstrInfo::EXTRACT_SUBREG nodes.
SDValue getSExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either sign-extending or trunca...
bool isKnownNeverZero(SDValue Op, unsigned Depth=0) const
Test whether the given SDValue is known to contain non-zero value(s).
SDValue FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDValue > Ops, SDNodeFlags Flags=SDNodeFlags())
SDValue getMaskedStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Base, SDValue Offset, SDValue Mask, EVT MemVT, MachineMemOperand *MMO, ISD::MemIndexedMode AM, bool IsTruncating=false, bool IsCompressing=false)
SDValue getExternalSymbol(const char *Sym, EVT VT)
const TargetMachine & getTarget() const
std::pair< SDValue, SDValue > getStrictFPExtendOrRound(SDValue Op, SDValue Chain, const SDLoc &DL, EVT VT)
Convert Op, which must be a STRICT operation of float type, to the float type VT, by either extending...
SDValue getAnyExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either any-extending or truncat...
SDValue getSelectCC(const SDLoc &DL, SDValue LHS, SDValue RHS, SDValue True, SDValue False, ISD::CondCode Cond)
Helper function to make it easier to build SelectCC's if you just have an ISD::CondCode instead of an...
bool isKnownNeverZeroFloat(SDValue Op) const
Test whether the given floating point SDValue is known to never be positive or negative zero.
SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
SDValue getValueType(EVT)
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
SDValue getFPExtendOrRound(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of float type, to the float type VT, by either extending or rounding (by tr...
bool isKnownNeverNaN(SDValue Op, bool SNaN=false, unsigned Depth=0) const
Test whether the given SDValue (or all elements of it, if it is a vector) is known to never be NaN.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
unsigned ComputeNumSignBits(SDValue Op, unsigned Depth=0) const
Return the number of times the sign bit of the register is replicated into the other bits.
bool MaskedVectorIsZero(SDValue Op, const APInt &DemandedElts, unsigned Depth=0) const
Return true if 'Op' is known to be zero in DemandedElts.
SDValue getTargetBlockAddress(const BlockAddress *BA, EVT VT, int64_t Offset=0, unsigned TargetFlags=0)
bool isBaseWithConstantOffset(SDValue Op) const
Return true if the specified operand is an ISD::ADD with a ConstantSDNode on the right-hand side,...
SDValue getVectorIdxConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.getNode() alone.
MachineFunction & getMachineFunction() const
SDValue getSplatBuildVector(EVT VT, const SDLoc &DL, SDValue Op)
Return a splat ISD::BUILD_VECTOR node, consisting of Op splatted to all elements.
SDValue getFrameIndex(int FI, EVT VT, bool isTarget=false)
KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
SDValue getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either zero-extending or trunca...
SDValue getCondCode(ISD::CondCode Cond)
bool MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth=0) const
Return true if 'Op & Mask' is known to be zero.
std::optional< uint64_t > getValidShiftAmount(SDValue V, const APInt &DemandedElts, unsigned Depth=0) const
If a SHL/SRA/SRL node V has a uniform shift amount that is less than the element bit-width of the shi...
LLVMContext * getContext() const
SDValue getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef< SDValue > Ops, EVT MemVT, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags Flags=MachineMemOperand::MOLoad|MachineMemOperand::MOStore, LocationSize Size=0, const AAMDNodes &AAInfo=AAMDNodes())
Creates a MemIntrinsicNode that may produce a result and takes a list of operands.
SDValue getTargetExternalSymbol(const char *Sym, EVT VT, unsigned TargetFlags=0)
SDValue getMCSymbol(MCSymbol *Sym, EVT VT)
SDValue CreateStackTemporary(TypeSize Bytes, Align Alignment)
Create a stack temporary based on the size in bytes and the alignment.
SDNode * UpdateNodeOperands(SDNode *N, SDValue Op)
Mutate the specified node in-place to have the specified operands.
SDNode * getNodeIfExists(unsigned Opcode, SDVTList VTList, ArrayRef< SDValue > Ops, const SDNodeFlags Flags)
Get the specified node if it's already available, or else return NULL.
SDValue getTargetConstantPool(const Constant *C, EVT VT, MaybeAlign Align=std::nullopt, int Offset=0, unsigned TargetFlags=0)
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
SDValue getMaskedLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Base, SDValue Offset, SDValue Mask, SDValue Src0, EVT MemVT, MachineMemOperand *MMO, ISD::MemIndexedMode AM, ISD::LoadExtType, bool IsExpanding=false)
SDValue getSplat(EVT VT, const SDLoc &DL, SDValue Op)
Returns a node representing a splat of one value into all lanes of the provided vector type.
std::pair< SDValue, SDValue > SplitScalar(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the scalar node with EXTRACT_ELEMENT using the provided VTs and return the low/high part.
static unsigned getOpcode_EXTEND(unsigned Opcode)
Convert *_EXTEND_VECTOR_INREG to *_EXTEND opcode.
SDValue matchBinOpReduction(SDNode *Extract, ISD::NodeType &BinOp, ArrayRef< ISD::NodeType > CandidateBinOps, bool AllowPartials=false)
Match a binop + shuffle pyramid that represents a horizontal reduction over the elements of a vector ...
SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)
Return an ISD::VECTOR_SHUFFLE node.
SDValue getMaskedScatter(SDVTList VTs, EVT MemVT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType, bool IsTruncating=false)
static bool isBitRotateMask(ArrayRef< int > Mask, unsigned EltSizeInBits, unsigned MinSubElts, unsigned MaxSubElts, unsigned &NumSubElts, unsigned &RotateAmt)
Checks if the shuffle is a bit rotation of the first operand across multiple subelements,...
This SDNode is used to implement the code generator support for the llvm IR shufflevector instruction...
int getMaskElt(unsigned Idx) const
ArrayRef< int > getMask() const
static void commuteMask(MutableArrayRef< int > Mask)
Change values in a shuffle permute mask assuming the two vector operands have swapped position.
This is a 'bitvector' (really, a variable-sized bit array), optimized for the case when the array is ...
void resize(unsigned N, bool t=false)
Grow or shrink the bitvector.
size_type count() const
Returns the number of bits which are set.
void reserve(size_type NumEntries)
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void assign(size_type NumElts, ValueParamT Elt)
iterator erase(const_iterator CI)
typename SuperClass::const_iterator const_iterator
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void push_back(const T &Elt)
pointer data()
Return a pointer to the vector's buffer, even if empty().
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
This class is used to represent ISD::STORE nodes.
const SDValue & getBasePtr() const
const SDValue & getValue() const
bool isTruncatingStore() const
Return true if the op does a truncation before store.
StringRef - Represent a constant reference to a string, i.e.
constexpr StringRef substr(size_t Start, size_t N=npos) const
Return a reference to the substring from [Start, Start + N).
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
constexpr bool empty() const
empty - Check if the string is empty.
constexpr size_t size() const
size - Get the string size.
bool ends_with(StringRef Suffix) const
Check if this string ends with the given Suffix.
static constexpr size_t npos
bool equals_insensitive(StringRef RHS) const
Check for string equality, ignoring case.
size_t find_first_not_of(char C, size_t From=0) const
Find the first character in the string that is not C or npos if not found.
A switch()-like statement whose cases are string literals.
StringSwitch & Case(StringLiteral S, T Value)
static StructType * get(LLVMContext &Context, ArrayRef< Type * > Elements, bool isPacked=false)
This static method is the primary way to create a literal StructType.
Information about stack frame layout on the target.
bool hasFP(const MachineFunction &MF) const
hasFP - Return true if the specified function should have a dedicated frame pointer register.
Align getStackAlign() const
getStackAlignment - This method returns the number of bytes to which the stack pointer must be aligne...
TargetInstrInfo - Interface to description of machine instruction set.
Provides information about what library functions are available for the current target.
bool isOperationExpand(unsigned Op, EVT VT) const
Return true if the specified operation is illegal on this target or unlikely to be made legal with cu...
void setBooleanVectorContents(BooleanContent Ty)
Specify how the target extends the result of a vector boolean value from a vector of i1 to a wider ty...
void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action)
Indicate that the specified operation does not work with the specified type and indicate what to do a...
void setMaxDivRemBitWidthSupported(unsigned SizeInBits)
Set the size in bits of the maximum div/rem the backend supports.
virtual bool hasAndNot(SDValue X) const
Return true if the target has a bitwise and-not operation: X = ~A & B This can be used to simplify se...
bool PredictableSelectIsExpensive
Tells the code generator that select is more expensive than a branch if the branch is usually predict...
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
LegalizeAction
This enum indicates whether operations are valid for a target, and if not, what action should be used...
CallingConv::ID getLibcallCallingConv(RTLIB::Libcall Call) const
Get the CallingConv that should be used for the specified libcall.
unsigned MaxStoresPerMemcpyOptSize
Likewise for functions with the OptSize attribute.
MachineBasicBlock * emitPatchPoint(MachineInstr &MI, MachineBasicBlock *MBB) const
Replace/modify any TargetFrameIndex operands with a targte-dependent sequence of memory operands that...
virtual const TargetRegisterClass * getRegClassFor(MVT VT, bool isDivergent=false) const
Return the register class that should be used for the specified value type.
ShiftLegalizationStrategy
Return the preferred strategy to legalize tihs SHIFT instruction, with ExpansionFactor being the recu...
const TargetMachine & getTargetMachine() const
unsigned MaxLoadsPerMemcmp
Specify maximum number of load instructions per memcmp call.
void setOperationPromotedToType(unsigned Opc, MVT OrigVT, MVT DestVT)
Convenience method to set an operation to Promote and specify the type in a single call.
LegalizeTypeAction
This enum indicates whether a types are legal for a target, and if not, what action should be used to...
virtual bool areJTsAllowed(const Function *Fn) const
Return true if lowering to a jump table is allowed.
void setLibcallCallingConv(RTLIB::Libcall Call, CallingConv::ID CC)
Set the CallingConv that should be used for the specified libcall.
bool isOperationLegalOrPromote(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal using promotion.
void addBypassSlowDiv(unsigned int SlowBitWidth, unsigned int FastBitWidth)
Tells the code generator which bitwidths to bypass.
void setMaxLargeFPConvertBitWidthSupported(unsigned SizeInBits)
Set the size in bits of the maximum fp convert the backend supports.
bool isTruncStoreLegal(EVT ValVT, EVT MemVT) const
Return true if the specified store with truncation is legal on this target.
virtual bool isLoadBitCastBeneficial(EVT LoadVT, EVT BitcastVT, const SelectionDAG &DAG, const MachineMemOperand &MMO) const
Return true if the following transform is beneficial: fold (conv (load x)) -> (load (conv*)x) On arch...
void setPrefLoopAlignment(Align Alignment)
Set the target's preferred loop alignment.
virtual bool isCommutativeBinOp(unsigned Opcode) const
Returns true if the opcode is a commutative binary operation.
void setMaxAtomicSizeInBitsSupported(unsigned SizeInBits)
Set the maximum atomic operation size supported by the backend.
virtual TargetLoweringBase::LegalizeTypeAction getPreferredVectorAction(MVT VT) const
Return the preferred vector type legalization action.
Register getStackPointerRegisterToSaveRestore() const
If a physical register, this specifies the register that llvm.savestack/llvm.restorestack should save...
bool isOperationCustom(unsigned Op, EVT VT) const
Return true if the operation uses custom lowering, regardless of whether the type is legal or not.
unsigned MaxStoresPerMemsetOptSize
Likewise for functions with the OptSize attribute.
void setBooleanContents(BooleanContent Ty)
Specify how the target extends the result of integer and floating point boolean values from i1 to a w...
unsigned MaxStoresPerMemmove
Specify maximum number of store instructions per memmove call.
virtual Align getPrefLoopAlignment(MachineLoop *ML=nullptr) const
Return the preferred loop alignment.
void computeRegisterProperties(const TargetRegisterInfo *TRI)
Once all of the register classes are added, this allows us to compute derived properties we expose.
virtual bool shouldFoldConstantShiftPairToMask(const SDNode *N, CombineLevel Level) const
Return true if it is profitable to fold a pair of shifts into a mask.
virtual EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, EVT VT) const
Return the ValueType of the result of SETCC operations.
virtual EVT getTypeToTransformTo(LLVMContext &Context, EVT VT) const
For types supported by the target, this is an identity function.
unsigned MaxStoresPerMemmoveOptSize
Likewise for functions with the OptSize attribute.
BooleanContent getBooleanContents(bool isVec, bool isFloat) const
For targets without i1 registers, this gives the nature of the high-bits of boolean values held in ty...
virtual MVT getPreferredSwitchConditionType(LLVMContext &Context, EVT ConditionVT) const
Returns preferred type for switch condition.
void addRegisterClass(MVT VT, const TargetRegisterClass *RC)
Add the specified register class as an available regclass for the specified value type.
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
bool EnableExtLdPromotion
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
void setLibcallName(RTLIB::Libcall Call, const char *Name)
Rename the default libcall routine name for the specified libcall.
void setPrefFunctionAlignment(Align Alignment)
Set the target's preferred function alignment.
bool isOperationLegal(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target.
unsigned MaxStoresPerMemset
Specify maximum number of store instructions per memset call.
virtual bool shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y, unsigned OldShiftOpcode, unsigned NewShiftOpcode, SelectionDAG &DAG) const
Given the pattern (X & (C l>>/<< Y)) ==/!= 0 return true if it should be transformed into: ((X <</l>>...
void setTruncStoreAction(MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified truncating store does not work with the specified type and indicate what ...
BooleanContent
Enum that describes how the target represents true/false values.
@ ZeroOrOneBooleanContent
@ ZeroOrNegativeOneBooleanContent
virtual ShiftLegalizationStrategy preferredShiftLegalizationStrategy(SelectionDAG &DAG, SDNode *N, unsigned ExpansionFactor) const
bool isOperationLegalOrCustom(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
virtual bool allowsMemoryAccess(LLVMContext &Context, const DataLayout &DL, EVT VT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *Fast=nullptr) const
Return true if the target supports a memory access of this type for the given address space and align...
unsigned MaxLoadsPerMemcmpOptSize
Likewise for functions with the OptSize attribute.
virtual bool isBinOp(unsigned Opcode) const
Return true if the node is a math/logic binary operator.
void setStackPointerRegisterToSaveRestore(Register R)
If set to a physical register, this specifies the register that llvm.savestack/llvm....
bool isLoadExtLegal(unsigned ExtType, EVT ValVT, EVT MemVT) const
Return true if the specified load with extension is legal on this target.
AtomicExpansionKind
Enum that specifies what an atomic load/AtomicRMWInst is expanded to, if at all.
void setCondCodeAction(ArrayRef< ISD::CondCode > CCs, MVT VT, LegalizeAction Action)
Indicate that the specified condition code is or isn't supported on the target and indicate what to d...
AndOrSETCCFoldKind
Enum of different potentially desirable ways to fold (and/or (setcc ...), (setcc ....
void setTargetDAGCombine(ArrayRef< ISD::NodeType > NTs)
Targets should invoke this method for each target independent node that they want to provide a custom...
void setLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified load with extension does not work with the specified type and indicate wh...
NegatibleCost
Enum that specifies when a float negation is beneficial.
LegalizeTypeAction getTypeAction(LLVMContext &Context, EVT VT) const
Return how we should legalize values of this type, either it is already legal (return 'Legal') or we ...
const char * getLibcallName(RTLIB::Libcall Call) const
Get the libcall routine name for the specified libcall.
std::vector< ArgListEntry > ArgListTy
unsigned MaxStoresPerMemcpy
Specify maximum number of store instructions per memcpy call.
void setSchedulingPreference(Sched::Preference Pref)
Specify the target scheduling preference.
virtual bool shouldConvertPhiType(Type *From, Type *To) const
Given a set in interconnected phis of type 'From' that are loaded/stored or bitcast to type 'To',...
bool isOperationLegalOrCustomOrPromote(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
static ISD::NodeType getExtendForContent(BooleanContent Content)
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
SDValue buildSDIVPow2WithCMov(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, SmallVectorImpl< SDNode * > &Created) const
Build sdiv by power-of-2 with conditional move instructions Ref: "Hacker's Delight" by Henry Warren 1...
bool SimplifyDemandedVectorElts(SDValue Op, const APInt &DemandedEltMask, APInt &KnownUndef, APInt &KnownZero, TargetLoweringOpt &TLO, unsigned Depth=0, bool AssumeSingleUse=false) const
Look at Vector Op.
void softenSetCCOperands(SelectionDAG &DAG, EVT VT, SDValue &NewLHS, SDValue &NewRHS, ISD::CondCode &CCCode, const SDLoc &DL, const SDValue OldLHS, const SDValue OldRHS) const
Soften the operands of a comparison.
std::pair< SDValue, SDValue > makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT, ArrayRef< SDValue > Ops, MakeLibCallOptions CallOptions, const SDLoc &dl, SDValue Chain=SDValue()) const
Returns a pair of (return value, chain).
virtual SDValue expandIndirectJTBranch(const SDLoc &dl, SDValue Value, SDValue Addr, int JTI, SelectionDAG &DAG) const
Expands target specific indirect branch for the case of JumpTable expansion.
SDValue getCheaperNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOps, bool OptForSize, unsigned Depth=0) const
This is the helper function to return the newly negated expression only when the cost is cheaper.
SDValue SimplifyMultipleUseDemandedBits(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, SelectionDAG &DAG, unsigned Depth=0) const
More limited version of SimplifyDemandedBits that can be used to "look through" ops that don't contri...
SDValue SimplifyMultipleUseDemandedVectorElts(SDValue Op, const APInt &DemandedElts, SelectionDAG &DAG, unsigned Depth=0) const
Helper wrapper around SimplifyMultipleUseDemandedBits, demanding all bits from only some vector eleme...
virtual ConstraintType getConstraintType(StringRef Constraint) const
Given a constraint, return the type of constraint it is for this target.
bool ShrinkDemandedConstant(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, TargetLoweringOpt &TLO) const
Check to see if the specified operand of the specified instruction is a constant integer.
virtual SDValue LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA, SelectionDAG &DAG) const
Lower TLS global address SDNode for target independent emulated TLS model.
virtual const char * LowerXConstraint(EVT ConstraintVT) const
Try to replace an X constraint, which matches anything, with another that has more specific requireme...
std::pair< SDValue, SDValue > LowerCallTo(CallLoweringInfo &CLI) const
This function lowers an abstract call to a function into an actual call.
bool expandDIVREMByConstant(SDNode *N, SmallVectorImpl< SDValue > &Result, EVT HiLoVT, SelectionDAG &DAG, SDValue LL=SDValue(), SDValue LH=SDValue()) const
Attempt to expand an n-bit div/rem/divrem by constant using a n/2-bit urem by constant and other arit...
bool isPositionIndependent() const
virtual SDValue getNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOps, bool OptForSize, NegatibleCost &Cost, unsigned Depth=0) const
Return the newly negated expression if the cost is not expensive and set the cost in Cost to indicate...
virtual ConstraintWeight getSingleConstraintMatchWeight(AsmOperandInfo &info, const char *constraint) const
Examine constraint string and operand type and determine a weight value.
virtual SDValue SimplifyMultipleUseDemandedBitsForTargetNode(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, SelectionDAG &DAG, unsigned Depth) const
More limited version of SimplifyDemandedBits that can be used to "look through" ops that don't contri...
virtual std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const
Given a physical register constraint (e.g.
bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth=0, bool AssumeSingleUse=false) const
Look at Op.
virtual bool SimplifyDemandedBitsForTargetNode(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth=0) const
Attempt to simplify any target nodes based on the demanded bits/elts, returning true on success.
bool verifyReturnAddressArgumentIsConstant(SDValue Op, SelectionDAG &DAG) const
virtual bool isSplatValueForTargetNode(SDValue Op, const APInt &DemandedElts, APInt &UndefElts, const SelectionDAG &DAG, unsigned Depth=0) const
Return true if vector Op has the same value across all DemandedElts, indicating any elements which ma...
SDValue getVectorElementPointer(SelectionDAG &DAG, SDValue VecPtr, EVT VecVT, SDValue Index) const
Get a pointer to vector element Idx located in memory for a vector of type VecVT starting at a base a...
virtual void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const
Lower the specified operand into the Ops vector.
virtual bool isGuaranteedNotToBeUndefOrPoisonForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, bool PoisonOnly, unsigned Depth) const
Return true if this function can prove that Op is never poison and, if PoisonOnly is false,...
void expandShiftParts(SDNode *N, SDValue &Lo, SDValue &Hi, SelectionDAG &DAG) const
Expand shift-by-parts.
virtual bool canCreateUndefOrPoisonForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, bool PoisonOnly, bool ConsiderFlags, unsigned Depth) const
Return true if Op can create undef or poison from non-undef & non-poison operands.
Primary interface to the complete machine description for the target machine.
TLSModel::Model getTLSModel(const GlobalValue *GV) const
Returns the TLS model which should be used for the given global variable.
const Triple & getTargetTriple() const
bool useTLSDESC() const
Returns true if this target uses TLS Descriptors.
bool useEmulatedTLS() const
Returns true if this target uses emulated TLS.
CodeModel::Model getCodeModel() const
Returns the code model.
const MCAsmInfo * getMCAsmInfo() const
Return target specific asm information.
unsigned NoSignedZerosFPMath
NoSignedZerosFPMath - This flag is enabled when the -enable-no-signed-zeros-fp-math is specified on t...
unsigned NoNaNsFPMath
NoNaNsFPMath - This flag is enabled when the -enable-no-nans-fp-math flag is specified on the command...
FPOpFusion::FPOpFusionMode AllowFPOpFusion
AllowFPOpFusion - This flag is set by the -fp-contract=xxx option.
bool contains(Register Reg) const
Return true if the specified register is included in this register class.
bool hasSuperClassEq(const TargetRegisterClass *RC) const
Returns true if RC is a super-class of or equal to this class.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const TargetInstrInfo * getInstrInfo() const
Target - Wrapper for Target specific information.
bool isOSMSVCRT() const
Is this a "Windows" OS targeting a "MSVCRT.dll" environment.
bool isOSBinFormatCOFF() const
Tests whether the OS uses the COFF binary format.
bool isOSDarwin() const
Is this a "Darwin" OS (macOS, iOS, tvOS, watchOS, XROS, or DriverKit).
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
static constexpr TypeSize getFixed(ScalarTy ExactSize)
The instances of the Type class are immutable: once they are created, they are never changed.
bool isVectorTy() const
True if this is an instance of VectorType.
static IntegerType * getInt1Ty(LLVMContext &C)
Type * getArrayElementType() const
bool isFloatTy() const
Return true if this is 'float', a 32-bit IEEE fp type.
uint64_t getArrayNumElements() const
static IntegerType * getIntNTy(LLVMContext &C, unsigned N)
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
bool isHalfTy() const
Return true if this is 'half', a 16-bit IEEE fp type.
bool isDoubleTy() const
Return true if this is 'double', a 64-bit IEEE fp type.
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
static IntegerType * getInt32Ty(LLVMContext &C)
bool isIntegerTy() const
True if this is an instance of IntegerType.
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
static UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
A Use represents the edge between a Value definition and its users.
User * getUser() const
Returns the User that contains this Use.
unsigned getOperandNo() const
Return the operand # of this use in its User.
Value * getOperand(unsigned i) const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
user_iterator user_begin()
bool hasOneUse() const
Return true if there is exactly one use of this value.
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
LLVMContext & getContext() const
All values hold a context through their type.
StringRef getName() const
Return a constant reference to the value's name.
static bool isValidElementType(Type *ElemTy)
Return true if the specified type is valid as a element type.
bool has128ByteRedZone(const MachineFunction &MF) const
Return true if the function has a redzone (accessible bytes past the frame of the top of stack functi...
bool Uses64BitFramePtr
True if the 64-bit frame or stack pointer should be used.
unsigned getGlobalBaseReg(MachineFunction *MF) const
getGlobalBaseReg - Return a virtual register initialized with the the global base register value.
X86MachineFunctionInfo - This class is derived from MachineFunction and contains private X86 target-s...
void setFAIndex(int Index)
void setAMXProgModel(AMXProgModelEnum Model)
unsigned getVarArgsGPOffset() const
int getRegSaveFrameIndex() const
ArrayRef< size_t > getPreallocatedArgOffsets(const size_t Id)
void setIsSplitCSR(bool s)
unsigned getVarArgsFPOffset() const
int getRestoreBasePointerOffset() const
int getVarArgsFrameIndex() const
void setRestoreBasePointer(const MachineFunction *MF)
void setHasPreallocatedCall(bool v)
void incNumLocalDynamicTLSAccesses()
size_t getPreallocatedStackSize(const size_t Id)
void setRAIndex(int Index)
unsigned getPtrSizedFrameRegister(const MachineFunction &MF) const
bool hasBasePointer(const MachineFunction &MF) const
Register getFrameRegister(const MachineFunction &MF) const override
const uint32_t * getDarwinTLSCallPreservedMask() const
const uint32_t * getCallPreservedMask(const MachineFunction &MF, CallingConv::ID) const override
Register getStackRegister() const
unsigned getSlotSize() const
Register getBaseRegister() const
const uint32_t * getNoPreservedMask() const override
bool canExtendTo512BW() const
bool isTargetMachO() const
bool useIndirectThunkBranches() const
bool hasBitScanPassThrough() const
bool isPICStyleGOT() const
const X86TargetLowering * getTargetLowering() const override
bool hasMFence() const
Use mfence if we have SSE2 or we're on x86-64 (even if we asked for no-sse2).
bool isPICStyleStubPIC() const
bool isTargetWindowsMSVC() const
bool canUseCMPXCHG8B() const
bool isTargetDarwin() const
bool isTargetWin64() const
bool isTarget64BitLP64() const
Is this x86_64 with the LP64 programming model (standard AMD64, no x32)?
const Triple & getTargetTriple() const
const X86InstrInfo * getInstrInfo() const override
bool useAVX512Regs() const
bool isCallingConvWin64(CallingConv::ID CC) const
bool canExtendTo512DQ() const
bool hasSSEPrefetch() const
bool canUseCMPXCHG16B() const
unsigned char classifyGlobalReference(const GlobalValue *GV, const Module &M) const
bool isPICStyleRIPRel() const
bool isTargetCygMing() const
unsigned char classifyLocalReference(const GlobalValue *GV) const
Classify a global variable reference for the current subtarget according to how we should reference i...
unsigned char classifyBlockAddressReference() const
Classify a blockaddress reference for the current subtarget according to how we should reference it i...
const X86RegisterInfo * getRegisterInfo() const override
bool isTargetWindowsGNU() const
unsigned getPreferVectorWidth() const
bool isTargetWindowsItanium() const
bool isTargetNaCl64() const
const X86FrameLowering * getFrameLowering() const override
unsigned char classifyGlobalFunctionReference(const GlobalValue *GV, const Module &M) const
Classify a global function reference for the current subtarget.
bool shouldFormOverflowOp(unsigned Opcode, EVT VT, bool MathUsed) const override
Overflow nodes should get combined/lowered to optimal instructions (they should allow eliminating exp...
Align getPrefLoopAlignment(MachineLoop *ML) const override
Return the preferred loop alignment.
bool isLegalAddImmediate(int64_t Imm) const override
Return true if the specified immediate is legal add immediate, that is the target has add instruction...
bool preferSextInRegOfTruncate(EVT TruncVT, EVT VT, EVT ExtVT) const override
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override
This method will be invoked for all target nodes and for any target-independent nodes that the target...
bool preferABDSToABSWithNSW(EVT VT) const override
bool isCheapToSpeculateCtlz(Type *Ty) const override
Return true if it is cheap to speculate a call to intrinsic ctlz.
unsigned getJumpTableEncoding() const override
Return the entry encoding for a jump table in the current function.
std::pair< SDValue, SDValue > BuildFILD(EVT DstVT, EVT SrcVT, const SDLoc &DL, SDValue Chain, SDValue Pointer, MachinePointerInfo PtrInfo, Align Alignment, SelectionDAG &DAG) const
bool SimplifyDemandedVectorEltsForTargetNode(SDValue Op, const APInt &DemandedElts, APInt &KnownUndef, APInt &KnownZero, TargetLoweringOpt &TLO, unsigned Depth) const override
Attempt to simplify any target nodes based on the demanded vector elements, returning true on success...
SDValue LowerAsmOutputForConstraint(SDValue &Chain, SDValue &Flag, const SDLoc &DL, const AsmOperandInfo &Constraint, SelectionDAG &DAG) const override
Handle Lowering flag assembly outputs.
const char * LowerXConstraint(EVT ConstraintVT) const override
Try to replace an X constraint, which matches anything, with another that has more specific requireme...
SDValue SimplifyMultipleUseDemandedBitsForTargetNode(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, SelectionDAG &DAG, unsigned Depth) const override
More limited version of SimplifyDemandedBits that can be used to "look through" ops that don't contri...
bool useLoadStackGuardNode(const Module &M) const override
If this function returns true, SelectionDAGBuilder emits a LOAD_STACK_GUARD node when it is lowering ...
bool isSplatValueForTargetNode(SDValue Op, const APInt &DemandedElts, APInt &UndefElts, const SelectionDAG &DAG, unsigned Depth) const override
Return true if vector Op has the same value across all DemandedElts, indicating any elements which ma...
bool convertSelectOfConstantsToMath(EVT VT) const override
Return true if a select of constants (select Cond, C1, C2) should be transformed into simple math ops...
ConstraintType getConstraintType(StringRef Constraint) const override
Given a constraint letter, return the type of constraint for this target.
Register getExceptionSelectorRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception typeid on entry to a la...
ShiftLegalizationStrategy preferredShiftLegalizationStrategy(SelectionDAG &DAG, SDNode *N, unsigned ExpansionFactor) const override
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override
Provide custom lowering hooks for some operations.
bool isLegalStoreImmediate(int64_t Imm) const override
Return true if the specified immediate is legal for the value input of a store instruction.
SDValue visitMaskedStore(SelectionDAG &DAG, const SDLoc &DL, SDValue Chain, MachineMemOperand *MMO, SDValue Ptr, SDValue Val, SDValue Mask) const override
SDValue getNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOperations, bool ForCodeSize, NegatibleCost &Cost, unsigned Depth) const override
Return the newly negated expression if the cost is not expensive and set the cost in Cost to indicate...
bool isTypeDesirableForOp(unsigned Opc, EVT VT) const override
Return true if the target has native support for the specified value type and it is 'desirable' to us...
bool isCtlzFast() const override
Return true if ctlz instruction is fast.
Register getRegisterByName(const char *RegName, LLT VT, const MachineFunction &MF) const override
Return the register ID of the name passed in.
bool shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y, unsigned OldShiftOpcode, unsigned NewShiftOpcode, SelectionDAG &DAG) const override
Given the pattern (X & (C l>>/<< Y)) ==/!= 0 return true if it should be transformed into: ((X <</l>>...
bool supportSwiftError() const override
Return true if the target supports swifterror attribute.
bool isCheapToSpeculateCttz(Type *Ty) const override
Return true if it is cheap to speculate a call to intrinsic cttz.
bool shouldSplatInsEltVarIndex(EVT VT) const override
Return true if inserting a scalar into a variable element of an undef vector is more efficiently hand...
bool isInlineAsmTargetBranch(const SmallVectorImpl< StringRef > &AsmStrs, unsigned OpNo) const override
On x86, return true if the operand with index OpNo is a CALL or JUMP instruction, which can use eithe...
MVT hasFastEqualityCompare(unsigned NumBits) const override
Vector-sized comparisons are fast using PCMPEQ + PMOVMSK or PTEST.
bool SimplifyDemandedVectorEltsForTargetShuffle(SDValue Op, const APInt &DemandedElts, unsigned MaskIndex, TargetLoweringOpt &TLO, unsigned Depth) const
bool isLegalICmpImmediate(int64_t Imm) const override
Return true if the specified immediate is legal icmp immediate, that is the target has icmp instructi...
bool shouldFoldSelectWithIdentityConstant(unsigned BinOpcode, EVT VT) const override
Return true if pulling a binary operation into a select with an identity constant is profitable.
bool hasInlineStackProbe(const MachineFunction &MF) const override
Returns true if stack probing through inline assembly is requested.
MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *MBB) const override
This method should be implemented by targets that mark instructions with the 'usesCustomInserter' fla...
unsigned preferedOpcodeForCmpEqPiecesOfOperand(EVT VT, unsigned ShiftOpc, bool MayTransformRotate, const APInt &ShiftOrRotateAmt, const std::optional< APInt > &AndMask) const override
bool isXAndYEqZeroPreferableToXAndYEqY(ISD::CondCode Cond, EVT VT) const override
bool canMergeStoresTo(unsigned AddressSpace, EVT MemVT, const MachineFunction &MF) const override
Returns if it's reasonable to merge stores to MemVT size.
bool ExpandInlineAsm(CallInst *CI) const override
This hook allows the target to expand an inline asm call to be explicit llvm code if it wants to.
bool hasAndNot(SDValue Y) const override
Return true if the target has a bitwise and-not operation: X = ~A & B This can be used to simplify se...
bool shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtTy, EVT NewVT) const override
Return true if we believe it is correct and profitable to reduce the load node to a smaller type.
bool SimplifyDemandedBitsForTargetNode(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth) const override
Attempt to simplify any target nodes based on the demanded bits/elts, returning true on success.
bool shouldConvertConstantLoadToIntImm(const APInt &Imm, Type *Ty) const override
Returns true if it is beneficial to convert a load of a constant to just the constant itself.
bool preferScalarizeSplat(SDNode *N) const override
std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const override
Given a physical register constraint (e.g.
bool isNarrowingProfitable(SDNode *N, EVT SrcVT, EVT DestVT) const override
Return true if it's profitable to narrow operations of type SrcVT to DestVT.
bool isFPImmLegal(const APFloat &Imm, EVT VT, bool ForCodeSize) const override
Returns true if the target can instruction select the specified FP immediate natively.
bool shouldFoldConstantShiftPairToMask(const SDNode *N, CombineLevel Level) const override
Return true if it is profitable to fold a pair of shifts into a mask.
MachineInstr * EmitKCFICheck(MachineBasicBlock &MBB, MachineBasicBlock::instr_iterator &MBBI, const TargetInstrInfo *TII) const override
bool isLoadBitCastBeneficial(EVT LoadVT, EVT BitcastVT, const SelectionDAG &DAG, const MachineMemOperand &MMO) const override
Return true if the following transform is beneficial: fold (conv (load x)) -> (load (conv*)x) On arch...
bool hasAndNotCompare(SDValue Y) const override
Return true if the target should transform: (X & Y) == Y —> (~X & Y) == 0 (X & Y) !...
bool reduceSelectOfFPConstantLoads(EVT CmpOpVT) const override
Return true if it is profitable to convert a select of FP constants into a constant pool load whose a...
StringRef getStackProbeSymbolName(const MachineFunction &MF) const override
Returns the name of the symbol used to emit stack probes or the empty string if not applicable.
bool hasBitTest(SDValue X, SDValue Y) const override
Return true if the target has a bit-test instruction: (X & (1 << Y)) ==/!= 0 This knowledge can be us...
bool allowTruncateForTailCall(Type *Ty1, Type *Ty2) const override
Return true if a truncation from FromTy to ToTy is permitted when deciding whether a call is in tail ...
bool isShuffleMaskLegal(ArrayRef< int > Mask, EVT VT) const override
Targets can use this to indicate that they only support some VECTOR_SHUFFLE operations,...
bool useStackGuardXorFP() const override
If this function returns true, stack protection checks should XOR the frame pointer (or whichever poi...
unsigned ComputeNumSignBitsForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth) const override
Determine the number of bits in the operation that are sign bits.
bool shouldScalarizeBinop(SDValue) const override
Scalar ops always have equal or better analysis/performance/power than the vector equivalent,...
bool isTruncateFree(Type *Ty1, Type *Ty2) const override
Return true if it's free to truncate a value of type Ty1 to type Ty2.
bool decomposeMulByConstant(LLVMContext &Context, EVT VT, SDValue C) const override
Return true if it is profitable to transform an integer multiplication-by-constant into simpler opera...
bool areJTsAllowed(const Function *Fn) const override
Returns true if lowering to a jump table is allowed.
bool isCommutativeBinOp(unsigned Opcode) const override
Returns true if the opcode is a commutative binary operation.
bool isScalarFPTypeInSSEReg(EVT VT) const
Return true if the specified scalar FP type is computed in an SSE register, not on the X87 floating p...
const char * getTargetNodeName(unsigned Opcode) const override
This method returns the name of a target specific DAG node.
MVT getPreferredSwitchConditionType(LLVMContext &Context, EVT ConditionVT) const override
Returns preferred type for switch condition.
SDValue visitMaskedLoad(SelectionDAG &DAG, const SDLoc &DL, SDValue Chain, MachineMemOperand *MMO, SDValue &NewLoad, SDValue Ptr, SDValue PassThru, SDValue Mask) const override
bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, EVT VT) const override
Return true if an FMA operation is faster than a pair of fmul and fadd instructions.
bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT, unsigned Index) const override
Return true if EXTRACT_SUBVECTOR is cheap for this result type with this index.
bool isVectorClearMaskLegal(ArrayRef< int > Mask, EVT VT) const override
Similar to isShuffleMaskLegal.
ConstraintWeight getSingleConstraintMatchWeight(AsmOperandInfo &Info, const char *Constraint) const override
Examine constraint string and operand type and determine a weight value.
bool isIntDivCheap(EVT VT, AttributeList Attr) const override
Return true if integer divide is usually cheaper than a sequence of several shifts,...
LegalizeTypeAction getPreferredVectorAction(MVT VT) const override
Customize the preferred legalization strategy for certain types.
bool shouldConvertPhiType(Type *From, Type *To) const override
Given a set in interconnected phis of type 'From' that are loaded/stored or bitcast to type 'To',...
bool hasStackProbeSymbol(const MachineFunction &MF) const override
Returns true if stack probing through a function call is requested.
bool isZExtFree(Type *Ty1, Type *Ty2) const override
Return true if any actual instruction that defines a value of type Ty1 implicit zero-extends the valu...
bool allowsMemoryAccess(LLVMContext &Context, const DataLayout &DL, EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *Fast=nullptr) const override
This function returns true if the memory access is aligned or if the target allows this specific unal...
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS, Instruction *I=nullptr) const override
Return true if the addressing mode represented by AM is legal for this target, for a load/store of th...
SDValue emitStackGuardXorFP(SelectionDAG &DAG, SDValue Val, const SDLoc &DL) const override
TargetLowering::AndOrSETCCFoldKind isDesirableToCombineLogicOpOfSETCC(const SDNode *LogicOp, const SDNode *SETCC0, const SDNode *SETCC1) const override
Return prefered fold type, Abs if this is a vector, AddAnd if its an integer, None otherwise.
bool shouldFoldMaskToVariableShiftPair(SDValue Y) const override
There are two ways to clear extreme bits (either low or high): Mask: x & (-1 << y) (the instcombine c...
bool addressingModeSupportsTLS(const GlobalValue &GV) const override
Returns true if the targets addressing mode can target thread local storage (TLS).
SDValue getReturnAddressFrameIndex(SelectionDAG &DAG) const
bool targetShrinkDemandedConstant(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, TargetLoweringOpt &TLO) const override
Register getExceptionPointerRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception address on entry to an ...
SDValue expandIndirectJTBranch(const SDLoc &dl, SDValue Value, SDValue Addr, int JTI, SelectionDAG &DAG) const override
Expands target specific indirect branch for the case of JumpTable expansion.
FastISel * createFastISel(FunctionLoweringInfo &funcInfo, const TargetLibraryInfo *libInfo) const override
This method returns a target specific FastISel object, or null if the target does not support "fast" ...
void computeKnownBitsForTargetNode(const SDValue Op, KnownBits &Known, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth=0) const override
Determine which of the bits specified in Mask are known to be either zero or one and return them in t...
bool isBinOp(unsigned Opcode) const override
Add x86-specific opcodes to the default list.
bool isGuaranteedNotToBeUndefOrPoisonForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, bool PoisonOnly, unsigned Depth) const override
Return true if this function can prove that Op is never poison and, if PoisonOnly is false,...
bool IsDesirableToPromoteOp(SDValue Op, EVT &PVT) const override
Return true if the target has native support for the specified value type and it is 'desirable' to us...
SDValue unwrapAddress(SDValue N) const override
CondMergingParams getJumpConditionMergingParams(Instruction::BinaryOps Opc, const Value *Lhs, const Value *Rhs) const override
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, EVT VT) const override
Return the value type to use for ISD::SETCC.
X86TargetLowering(const X86TargetMachine &TM, const X86Subtarget &STI)
bool isVectorLoadExtDesirable(SDValue) const override
Return true if folding a vector load into ExtVal (a sign, zero, or any extend node) is profitable.
bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I, MachineFunction &MF, unsigned Intrinsic) const override
Given an intrinsic, checks if on the target the intrinsic will need to map to a MemIntrinsicNode (tou...
const Constant * getTargetConstantFromLoad(LoadSDNode *LD) const override
This method returns the constant pool value that will be loaded by LD.
EVT getTypeToTransformTo(LLVMContext &Context, EVT VT) const override
For types supported by the target, this is an identity function.
bool canCreateUndefOrPoisonForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, bool PoisonOnly, bool ConsiderFlags, unsigned Depth) const override
Return true if Op can create undef or poison from non-undef & non-poison operands.
unsigned getStackProbeSize(const MachineFunction &MF) const
bool ShouldShrinkFPConstant(EVT VT) const override
If true, then instruction selection should seek to shrink the FP constant of the specified type to a ...
void ReplaceNodeResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG) const override
Replace the results of node with an illegal result type with new values built out of custom code.
bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const override
Return if the target supports combining a chain like:
void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const override
Lower the specified operand into the Ops vector.
bool needsFixedCatchObjects() const override
constexpr ScalarTy getFixedValue() const
An efficient, type-erasing, non-owning reference to a callable.
const ParentTy * getParent() const
self_iterator getIterator()
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
constexpr char Attrs[]
Key for Kernel::Metadata::mAttrs.
APInt ScaleBitMask(const APInt &A, unsigned NewBitWidth, bool MatchAllBits=false)
Splat/Merge neighboring bits to widen/narrow the bitmask represented by.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
@ X86_ThisCall
Similar to X86_StdCall.
@ X86_StdCall
stdcall is mostly used by the Win32 API.
@ Fast
Attempts to make calls as fast as possible (e.g.
@ Tail
Attemps to make calls as fast as possible while guaranteeing that tail call optimization can always b...
@ SwiftTail
This follows the Swift calling convention in how arguments are passed but guarantees tail calls will ...
@ C
The default llvm calling convention, compatible with C.
@ X86_FastCall
'fast' analog of X86_StdCall.
bool isConstantSplatVectorAllOnes(const SDNode *N, bool BuildVectorOnly=false)
Return true if the specified node is a BUILD_VECTOR or SPLAT_VECTOR where all of the elements are ~0 ...
bool isNON_EXTLoad(const SDNode *N)
Returns true if the specified node is a non-extending load.
NodeType
ISD::NodeType enum - This enum defines the target-independent operators for a SelectionDAG.
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
@ MERGE_VALUES
MERGE_VALUES - This node takes multiple discrete operands and returns them all as its individual resu...
@ STACKRESTORE
STACKRESTORE has two operands, an input chain and a pointer to restore to it returns an output chain.
@ STACKSAVE
STACKSAVE - STACKSAVE has one operand, an input chain.
@ STRICT_FSETCC
STRICT_FSETCC/STRICT_FSETCCS - Constrained versions of SETCC, used for floating-point operands only.
@ DELETED_NODE
DELETED_NODE - This is an illegal value that is used to catch errors.
@ MLOAD
Masked load and store - consecutive vector load and store operations with additional mask operand tha...
@ EH_SJLJ_LONGJMP
OUTCHAIN = EH_SJLJ_LONGJMP(INCHAIN, buffer) This corresponds to the eh.sjlj.longjmp intrinsic.
@ FGETSIGN
INT = FGETSIGN(FP) - Return the sign bit of the specified floating point value as an integer 0/1 valu...
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
@ INSERT_SUBVECTOR
INSERT_SUBVECTOR(VECTOR1, VECTOR2, IDX) - Returns a vector with VECTOR2 inserted into VECTOR1.
@ BSWAP
Byte Swap and Counting operators.
@ VAEND
VAEND, VASTART - VAEND and VASTART have three operands: an input chain, pointer, and a SRCVALUE.
@ ATOMIC_STORE
OUTCHAIN = ATOMIC_STORE(INCHAIN, val, ptr) This corresponds to "store atomic" instruction.
@ FRAME_TO_ARGS_OFFSET
FRAME_TO_ARGS_OFFSET - This node represents offset from frame pointer to first (possible) on-stack ar...
@ RESET_FPENV
Set floating-point environment to default state.
@ FMAD
FMAD - Perform a * b + c, while getting the same result as the separately rounded operations.
@ ADD
Simple integer binary arithmetic operators.
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
@ FATAN2
FATAN2 - atan2, inspired by libm.
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
@ EH_SJLJ_SETUP_DISPATCH
OUTCHAIN = EH_SJLJ_SETUP_DISPATCH(INCHAIN) The target initializes the dispatch table here.
@ ATOMIC_CMP_SWAP_WITH_SUCCESS
Val, Success, OUTCHAIN = ATOMIC_CMP_SWAP_WITH_SUCCESS(INCHAIN, ptr, cmp, swap) N.b.
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
@ CONCAT_VECTORS
CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of vector type with the same length ...
@ FADD
Simple binary floating point operators.
@ ABS
ABS - Determine the unsigned absolute value of a signed integer value of the same bitwidth.
@ MEMBARRIER
MEMBARRIER - Compiler barrier only; generate a no-op.
@ ATOMIC_FENCE
OUTCHAIN = ATOMIC_FENCE(INCHAIN, ordering, scope) This corresponds to the fence instruction.
@ SIGN_EXTEND_VECTOR_INREG
SIGN_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register sign-extension of the low ...
@ SDIVREM
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
@ FP16_TO_FP
FP16_TO_FP, FP_TO_FP16 - These operators are used to perform promotions and truncation for half-preci...
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
@ INIT_TRAMPOLINE
INIT_TRAMPOLINE - This corresponds to the init_trampoline intrinsic.
@ FLDEXP
FLDEXP - ldexp, inspired by libm (op0 * 2**op1).
@ STRICT_FSQRT
Constrained versions of libm-equivalent floating point intrinsics.
@ BUILTIN_OP_END
BUILTIN_OP_END - This must be the last enum value in this list.
@ EH_LABEL
EH_LABEL - Represents a label in mid basic block used to track locations needed for debug and excepti...
@ EH_RETURN
OUTCHAIN = EH_RETURN(INCHAIN, OFFSET, HANDLER) - This node represents 'eh_return' gcc dwarf builtin,...
@ SET_ROUNDING
Set rounding mode.
@ SIGN_EXTEND
Conversion operators.
@ AVGCEILS
AVGCEILS/AVGCEILU - Rounding averaging add - Add two integers using an integer of type i[N+2],...
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
@ ADDROFRETURNADDR
ADDROFRETURNADDR - Represents the llvm.addressofreturnaddress intrinsic.
@ BR
Control flow instructions. These all have token chains.
@ CTTZ_ZERO_UNDEF
Bit counting operators with an undefined result for zero inputs.
@ PREFETCH
PREFETCH - This corresponds to a prefetch intrinsic.
@ FSINCOS
FSINCOS - Compute both fsin and fcos as a single operation.
@ SETCCCARRY
Like SetCC, ops #0 and #1 are the LHS and RHS operands to compare, but op #2 is a boolean indicating ...
@ FNEG
Perform various unary floating-point operations inspired by libm.
@ BR_CC
BR_CC - Conditional branch.
@ SSUBO
Same for subtraction.
@ BR_JT
BR_JT - Jumptable branch.
@ GC_TRANSITION_START
GC_TRANSITION_START/GC_TRANSITION_END - These operators mark the beginning and end of GC transition s...
@ FCANONICALIZE
Returns platform specific canonical encoding of a floating point number.
@ SSUBSAT
RESULT = [US]SUBSAT(LHS, RHS) - Perform saturation subtraction on 2 integers with the same bit width ...
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
@ ATOMIC_LOAD
Val, OUTCHAIN = ATOMIC_LOAD(INCHAIN, ptr) This corresponds to "load atomic" instruction.
@ UNDEF
UNDEF - An undefined node.
@ SPLAT_VECTOR
SPLAT_VECTOR(VAL) - Returns a vector with the scalar value VAL duplicated in all lanes.
@ VACOPY
VACOPY - VACOPY has 5 operands: an input chain, a destination pointer, a source pointer,...
@ CopyFromReg
CopyFromReg - This node indicates that the input value is a virtual or physical register that is defi...
@ SADDO
RESULT, BOOL = [SU]ADDO(LHS, RHS) - Overflow-aware nodes for addition.
@ GET_ROUNDING
Returns current rounding mode: -1 Undefined 0 Round to 0 1 Round to nearest, ties to even 2 Round to ...
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
@ SHL
Shift and rotation operations.
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
@ FMINNUM_IEEE
FMINNUM_IEEE/FMAXNUM_IEEE - Perform floating-point minimumNumber or maximumNumber on two values,...
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
@ CopyToReg
CopyToReg - This node has three operands: a chain, a register number to set to this value,...
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
@ DEBUGTRAP
DEBUGTRAP - Trap intended to get the attention of a debugger.
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
@ LOCAL_RECOVER
LOCAL_RECOVER - Represents the llvm.localrecover intrinsic.
@ FMINNUM
FMINNUM/FMAXNUM - Perform floating-point minimum or maximum on two values.
@ UBSANTRAP
UBSANTRAP - Trap with an immediate describing the kind of sanitizer failure.
@ SMULO
Same for multiplication.
@ DYNAMIC_STACKALLOC
DYNAMIC_STACKALLOC - Allocate some number of bytes on the stack aligned to a specified boundary.
@ ANY_EXTEND_VECTOR_INREG
ANY_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register any-extension of the low la...
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
@ SMIN
[US]{MIN/MAX} - Binary minimum or maximum of signed or unsigned integers.
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
@ VSELECT
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
@ UADDO_CARRY
Carry-using nodes for multiple precision addition and subtraction.
@ STRICT_SINT_TO_FP
STRICT_[US]INT_TO_FP - Convert a signed or unsigned integer to a floating point value.
@ MGATHER
Masked gather and scatter - load and store operations for a vector of random addresses with additiona...
@ BF16_TO_FP
BF16_TO_FP, FP_TO_BF16 - These operators are used to perform promotions and truncation for bfloat16.
@ FRAMEADDR
FRAMEADDR, RETURNADDR - These nodes represent llvm.frameaddress and llvm.returnaddress on the DAG.
@ STRICT_FP_ROUND
X = STRICT_FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision ...
@ STRICT_FP_TO_SINT
STRICT_FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
@ FMINIMUM
FMINIMUM/FMAXIMUM - NaN-propagating minimum/maximum that also treat -0.0 as less than 0....
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
@ READCYCLECOUNTER
READCYCLECOUNTER - This corresponds to the readcyclecounter intrinsic.
@ TargetConstant
TargetConstant* - Like Constant*, but the DAG does not do any folding, simplification,...
@ STRICT_FP_EXTEND
X = STRICT_FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
@ AND
Bitwise operators - logical and, logical or, logical xor.
@ TRAP
TRAP - Trapping instruction.
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
@ GET_FPENV_MEM
Gets the current floating-point environment.
@ AVGFLOORS
AVGFLOORS/AVGFLOORU - Averaging add - Add two integers using an integer of type i[N+1],...
@ STRICT_FADD
Constrained versions of the binary floating point operators.
@ FREEZE
FREEZE - FREEZE(VAL) returns an arbitrary value if VAL is UNDEF (or is evaluated to UNDEF),...
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
@ ATOMIC_SWAP
Val, OUTCHAIN = ATOMIC_SWAP(INCHAIN, ptr, amt) Val, OUTCHAIN = ATOMIC_LOAD_[OpName](INCHAIN,...
@ FFREXP
FFREXP - frexp, extract fractional and exponent component of a floating-point value.
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
@ VECTOR_COMPRESS
VECTOR_COMPRESS(Vec, Mask, Passthru) consecutively place vector elements based on mask e....
@ ZERO_EXTEND_VECTOR_INREG
ZERO_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register zero-extension of the low ...
@ ADDRSPACECAST
ADDRSPACECAST - This operator converts between pointers of different address spaces.
@ FP_TO_SINT_SAT
FP_TO_[US]INT_SAT - Convert floating point value in operand 0 to a signed or unsigned scalar integer ...
@ EH_SJLJ_SETJMP
RESULT, OUTCHAIN = EH_SJLJ_SETJMP(INCHAIN, buffer) This corresponds to the eh.sjlj....
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
@ VAARG
VAARG - VAARG has four operands: an input chain, a pointer, a SRCVALUE, and the alignment.
@ BRCOND
BRCOND - Conditional branch.
@ SHL_PARTS
SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded integer shift operations.
@ AssertSext
AssertSext, AssertZext - These nodes record if a register contains a value that has already been zero...
@ FCOPYSIGN
FCOPYSIGN(X, Y) - Return the value of X with the sign of Y.
@ SADDSAT
RESULT = [US]ADDSAT(LHS, RHS) - Perform saturation addition on 2 integers with the same bit width (W)...
@ SET_FPENV_MEM
Sets the current floating point environment.
@ FMINIMUMNUM
FMINIMUMNUM/FMAXIMUMNUM - minimumnum/maximumnum that is same with FMINNUM_IEEE and FMAXNUM_IEEE besid...
@ ABDS
ABDS/ABDU - Absolute difference - Return the absolute difference between two numbers interpreted as s...
@ ADJUST_TRAMPOLINE
ADJUST_TRAMPOLINE - This corresponds to the adjust_trampoline intrinsic.
@ SADDO_CARRY
Carry-using overflow-aware nodes for multiple precision addition and subtraction.
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
bool isExtVecInRegOpcode(unsigned Opcode)
bool isOverflowIntrOpRes(SDValue Op)
Returns true if the specified value is the overflow result from one of the overflow intrinsic nodes.
bool isBuildVectorOfConstantSDNodes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR node of all ConstantSDNode or undef.
bool isNormalStore(const SDNode *N)
Returns true if the specified node is a non-truncating and unindexed store.
bool matchUnaryPredicate(SDValue Op, std::function< bool(ConstantSDNode *)> Match, bool AllowUndefs=false)
Hook for matching ConstantSDNode predicate.
bool isExtOpcode(unsigned Opcode)
bool isConstantSplatVectorAllZeros(const SDNode *N, bool BuildVectorOnly=false)
Return true if the specified node is a BUILD_VECTOR or SPLAT_VECTOR where all of the elements are 0 o...
CondCode getSetCCInverse(CondCode Operation, EVT Type)
Return the operation corresponding to !(X op Y), where 'op' is a valid SetCC operation.
bool isBitwiseLogicOp(unsigned Opcode)
Whether this is bitwise logic opcode.
bool isTrueWhenEqual(CondCode Cond)
Return true if the specified condition returns true if the two operands to the condition are equal.
bool isUNINDEXEDLoad(const SDNode *N)
Returns true if the specified node is an unindexed load.
bool isEXTLoad(const SDNode *N)
Returns true if the specified node is a EXTLOAD.
bool isFreezeUndef(const SDNode *N)
Return true if the specified node is FREEZE(UNDEF).
CondCode getSetCCSwappedOperands(CondCode Operation)
Return the operation corresponding to (Y op X) when given the operation for (X op Y).
bool isBuildVectorAllZeros(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are 0 or undef.
bool isSignedIntSetCC(CondCode Code)
Return true if this is a setcc instruction that performs a signed comparison when used with integer o...
bool isConstantSplatVector(const SDNode *N, APInt &SplatValue)
Node predicates.
bool isBuildVectorOfConstantFPSDNodes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR node of all ConstantFPSDNode or undef.
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
bool isBuildVectorAllOnes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are ~0 or undef.
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
bool isUnsignedIntSetCC(CondCode Code)
Return true if this is a setcc instruction that performs an unsigned comparison when used with intege...
bool isNormalLoad(const SDNode *N)
Returns true if the specified node is a non-extending and unindexed load.
ID ArrayRef< Type * > Tys
Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
cst_pred_ty< is_all_ones > m_AllOnes()
Match an integer or vector with all bits set.
BinaryOp_match< LHS, RHS, Instruction::And > m_And(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
class_match< BinaryOperator > m_BinOp()
Match an arbitrary binary operation and ignore it.
cst_pred_ty< is_sign_mask > m_SignMask()
Match an integer or vector with only the sign bit(s) set.
BinaryOp_match< LHS, RHS, Instruction::And, true > m_c_And(const LHS &L, const RHS &R)
Matches an And with LHS and RHS in either order.
specific_intval< false > m_SpecificInt(const APInt &V)
Match a specific integer value or vector with all elements equal to the value.
bool match(Val *V, const Pattern &P)
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
CmpClass_match< LHS, RHS, ICmpInst, true > m_c_ICmp(CmpPredicate &Pred, const LHS &L, const RHS &R)
Matches an ICmp with a predicate over LHS and RHS in either order.
TwoOps_match< Val_t, Idx_t, Instruction::ExtractElement > m_ExtractElt(const Val_t &Val, const Idx_t &Idx)
Matches ExtractElementInst.
MaxMin_match< ICmpInst, LHS, RHS, smin_pred_ty > m_SMin(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::Xor, true > m_c_Xor(const LHS &L, const RHS &R)
Matches an Xor with LHS and RHS in either order.
BinaryOp_match< LHS, RHS, Instruction::Mul > m_Mul(const LHS &L, const RHS &R)
cst_pred_ty< is_zero_int > m_ZeroInt()
Match an integer 0 or a vector with all elements equal to 0.
OneUse_match< T > m_OneUse(const T &SubPattern)
SpecificCmpClass_match< LHS, RHS, ICmpInst > m_SpecificICmp(CmpPredicate MatchPred, const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::Add, true > m_c_Add(const LHS &L, const RHS &R)
Matches a Add with LHS and RHS in either order.
CastOperator_match< OpTy, Instruction::BitCast > m_BitCast(const OpTy &Op)
Matches BitCast.
MaxMin_match< ICmpInst, LHS, RHS, smax_pred_ty > m_SMax(const LHS &L, const RHS &R)
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
CmpClass_match< LHS, RHS, ICmpInst > m_ICmp(CmpPredicate &Pred, const LHS &L, const RHS &R)
auto m_Undef()
Match an arbitrary undef constant.
BinaryOp_match< cst_pred_ty< is_all_ones >, ValTy, Instruction::Xor, true > m_Not(const ValTy &V)
Matches a 'Not' as 'xor V, -1' or 'xor -1, V'.
BinaryOp_match< LHS, RHS, Instruction::Or > m_Or(const LHS &L, const RHS &R)
CastInst_match< OpTy, SExtInst > m_SExt(const OpTy &Op)
Matches SExt.
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
BinaryOp_match< LHS, RHS, Instruction::Or, true > m_c_Or(const LHS &L, const RHS &R)
Matches an Or with LHS and RHS in either order.
BinaryOp_match< LHS, RHS, Instruction::Sub > m_Sub(const LHS &L, const RHS &R)
MaxMin_match< ICmpInst, LHS, RHS, umin_pred_ty > m_UMin(const LHS &L, const RHS &R)
Libcall getSINTTOFP(EVT OpVT, EVT RetVT)
getSINTTOFP - Return the SINTTOFP_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
Libcall getUINTTOFP(EVT OpVT, EVT RetVT)
getUINTTOFP - Return the UINTTOFP_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
Libcall
RTLIB::Libcall enum - This enum defines all of the runtime library calls the backend can emit.
Libcall getFPTOUINT(EVT OpVT, EVT RetVT)
getFPTOUINT - Return the FPTOUINT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
Libcall getFPTOSINT(EVT OpVT, EVT RetVT)
getFPTOSINT - Return the FPTOSINT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
Libcall getFPROUND(EVT OpVT, EVT RetVT)
getFPROUND - Return the FPROUND_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
@ Implicit
Not emitted register (e.g. carry, or temporary result).
@ Define
Register definition.
@ Kill
The last use of a register.
@ Undef
Value of the register doesn't matter.
@ SingleThread
Synchronized with respect to signal handlers executing in the same thread.
@ System
Synchronized with respect to all concurrently executing threads.
@ X86
Windows x64, Windows Itanium (IA-64)
Reg
All possible values of the reg field in the ModR/M byte.
@ MO_TLSLD
MO_TLSLD - On a symbol operand this indicates that the immediate is the offset of the GOT entry with ...
@ MO_GOTPCREL_NORELAX
MO_GOTPCREL_NORELAX - Same as MO_GOTPCREL except that R_X86_64_GOTPCREL relocations are guaranteed to...
@ MO_COFFSTUB
MO_COFFSTUB - On a symbol operand "FOO", this indicates that the reference is actually to the "....
@ MO_NTPOFF
MO_NTPOFF - On a symbol operand this indicates that the immediate is the negative thread-pointer offs...
@ MO_INDNTPOFF
MO_INDNTPOFF - On a symbol operand this indicates that the immediate is the absolute address of the G...
@ MO_GOTNTPOFF
MO_GOTNTPOFF - On a symbol operand this indicates that the immediate is the offset of the GOT entry w...
@ MO_TPOFF
MO_TPOFF - On a symbol operand this indicates that the immediate is the thread-pointer offset for the...
@ MO_TLVP_PIC_BASE
MO_TLVP_PIC_BASE - On a symbol operand this indicates that the immediate is some TLS offset from the ...
@ MO_TLSGD
MO_TLSGD - On a symbol operand this indicates that the immediate is the offset of the GOT entry with ...
@ MO_NO_FLAG
MO_NO_FLAG - No flag for the operand.
@ MO_TLVP
MO_TLVP - On a symbol operand this indicates that the immediate is some TLS offset.
@ MO_DLLIMPORT
MO_DLLIMPORT - On a symbol operand "FOO", this indicates that the reference is actually to the "__imp...
@ MO_GOTTPOFF
MO_GOTTPOFF - On a symbol operand this indicates that the immediate is the offset of the GOT entry wi...
@ MO_SECREL
MO_SECREL - On a symbol operand this indicates that the immediate is the offset from beginning of sec...
@ MO_DTPOFF
MO_DTPOFF - On a symbol operand this indicates that the immediate is the offset of the GOT entry with...
@ MO_TLSLDM
MO_TLSLDM - On a symbol operand this indicates that the immediate is the offset of the GOT entry with...
@ MO_GOTPCREL
MO_GOTPCREL - On a symbol operand this indicates that the immediate is offset to the GOT entry for th...
@ FST
This instruction implements a truncating store from FP stack slots.
@ CMPM
Vector comparison generating mask bits for fp and integer signed and unsigned data types.
@ FMAX
Floating point max and min.
@ BT
X86 bit-test instructions.
@ HADD
Integer horizontal add/sub.
@ MOVQ2DQ
Copies a 64-bit value from an MMX vector to the low word of an XMM vector, with the high word zero fi...
@ BLENDI
Blend where the selector is an immediate.
@ CMP
X86 compare and logical compare instructions.
@ BLENDV
Dynamic (non-constant condition) vector blend where only the sign bits of the condition elements are ...
@ ADDSUB
Combined add and sub on an FP vector.
@ STRICT_FMAX
Floating point max and min.
@ STRICT_CMPM
Vector comparison generating mask bits for fp and integer signed and unsigned data types.
@ FHADD
Floating point horizontal add/sub.
@ NT_BRIND
BRIND node with NoTrack prefix.
@ FSETCCM
X86 FP SETCC, similar to above, but with output as an i1 mask and and a version with SAE.
@ PEXTRB
Extract an 8-bit value from a vector and zero extend it to i32, corresponds to X86::PEXTRB.
@ FXOR
Bitwise logical XOR of floating point values.
@ BRCOND
X86 conditional branches.
@ FSETCC
X86 FP SETCC, implemented with CMP{cc}SS/CMP{cc}SD.
@ PINSRB
Insert the lower 8-bits of a 32-bit value to a vector, corresponds to X86::PINSRB.
@ INSERTPS
Insert any element of a 4 x float vector into any element of a destination 4 x floatvector.
@ PSHUFB
Shuffle 16 8-bit values within a vector.
@ PEXTRW
Extract a 16-bit value from a vector and zero extend it to i32, corresponds to X86::PEXTRW.
@ AADD
RAO arithmetic instructions.
@ FANDN
Bitwise logical ANDNOT of floating point values.
@ GlobalBaseReg
On Darwin, this node represents the result of the popl at function entry, used for PIC code.
@ FMAXC
Commutative FMIN and FMAX.
@ EXTRQI
SSE4A Extraction and Insertion.
@ FLD
This instruction implements an extending load to FP stack slots.
@ PSADBW
Compute Sum of Absolute Differences.
@ FOR
Bitwise logical OR of floating point values.
@ FIST
This instruction implements a fp->int store from FP stack slots.
@ FP_TO_INT_IN_MEM
This instruction implements FP_TO_SINT with the integer destination in memory and a FP reg source.
@ LADD
LOCK-prefixed arithmetic read-modify-write instructions.
@ MMX_MOVW2D
Copies a GPR into the low 32-bit word of a MMX vector and zero out the high word.
@ Wrapper
A wrapper node for TargetConstantPool, TargetJumpTable, TargetExternalSymbol, TargetGlobalAddress,...
@ PINSRW
Insert the lower 16-bits of a 32-bit value to a vector, corresponds to X86::PINSRW.
@ CMPCCXADD
Compare and Add if Condition is Met.
@ MMX_MOVD2W
Copies a 32-bit value from the low word of a MMX vector to a GPR.
@ FILD
This instruction implements SINT_TO_FP with the integer source in memory and FP reg result.
@ MOVDQ2Q
Copies a 64-bit value from the low word of an XMM vector to an MMX vector.
@ ANDNP
Bitwise Logical AND NOT of Packed FP values.
@ VAARG_64
These instructions grab the address of the next argument from a va_list.
@ FAND
Bitwise logical AND of floating point values.
@ CMOV
X86 conditional moves.
@ WrapperRIP
Special wrapper used under X86-64 PIC mode for RIP relative displacements.
@ FSHL
X86 funnel/double shift i16 instructions.
@ FRSQRT
Floating point reciprocal-sqrt and reciprocal approximation.
bool mayFoldLoadIntoBroadcastFromMem(SDValue Op, MVT EltVT, const X86Subtarget &Subtarget, bool AssumeSingleUse=false)
Check if Op is a load operation that could be folded into a vector splat instruction as a memory oper...
bool isZeroNode(SDValue Elt)
Returns true if Elt is a constant zero or floating point constant +0.0.
CondCode GetOppositeBranchCondition(CondCode CC)
GetOppositeBranchCondition - Return the inverse of the specified cond, e.g.
bool mayFoldIntoZeroExtend(SDValue Op)
Check if Op is an operation that could be folded into a zero extend x86 instruction.
bool mayFoldIntoStore(SDValue Op)
Check if Op is a value that could be used to fold a store into some other x86 instruction as a memory...
bool isExtendedSwiftAsyncFrameSupported(const X86Subtarget &Subtarget, const MachineFunction &MF)
True if the target supports the extended frame for async Swift functions.
int getCCMPCondFlagsFromCondCode(CondCode CC)
bool mayFoldLoad(SDValue Op, const X86Subtarget &Subtarget, bool AssumeSingleUse=false)
Check if Op is a load operation that could be folded into some other x86 instruction as a memory oper...
FastISel * createFastISel(FunctionLoweringInfo &funcInfo, const TargetLibraryInfo *libInfo)
bool isOffsetSuitableForCodeModel(int64_t Offset, CodeModel::Model M, bool hasSymbolicDisplacement)
Returns true of the given offset can be fit into displacement field of the instruction.
bool isConstantSplat(SDValue Op, APInt &SplatVal, bool AllowPartialUndefs)
If Op is a constant whose elements are all the same constant or undefined, return true and return the...
initializer< Ty > init(const Ty &Val)
NodeAddr< FuncNode * > Func
This is an optimization pass for GlobalISel generic memory operations.
void DecodeZeroExtendMask(unsigned SrcScalarBits, unsigned DstScalarBits, unsigned NumDstElts, bool IsAnyExtend, SmallVectorImpl< int > &ShuffleMask)
Decode a zero extension instruction as a shuffle mask.
IterT next_nodbg(IterT It, IterT End, bool SkipPseudoOp=true)
Increment It, then continue incrementing it while it points to a debug instruction.
static bool isGlobalStubReference(unsigned char TargetFlag)
isGlobalStubReference - Return true if the specified TargetFlag operand is a reference to a stub for ...
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
@ Low
Lower the current thread's priority such that it does not affect foreground tasks significantly.
void DecodeMOVHLPSMask(unsigned NElts, SmallVectorImpl< int > &ShuffleMask)
Decode a MOVHLPS instruction as a v2f64/v4f32 shuffle mask.
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
static bool isGlobalRelativeToPICBase(unsigned char TargetFlag)
isGlobalRelativeToPICBase - Return true if the specified global value reference is relative to a 32-b...
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
int popcount(T Value) noexcept
Count the number of set bits in a value.
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
void DecodeZeroMoveLowMask(unsigned NumElts, SmallVectorImpl< int > &ShuffleMask)
Decode a move lower and zero upper instruction as a shuffle mask.
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
void DecodeVPERMILPMask(unsigned NumElts, unsigned ScalarBits, ArrayRef< uint64_t > RawMask, const APInt &UndefElts, SmallVectorImpl< int > &ShuffleMask)
Decode a VPERMILPD/VPERMILPS variable mask from a raw array of constants.
bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
bool isAllOnesOrAllOnesSplat(const MachineInstr &MI, const MachineRegisterInfo &MRI, bool AllowUndefs=false)
Return true if the value is a constant -1 integer or a splatted vector of a constant -1 integer (with...
void DecodePSHUFLWMask(unsigned NumElts, unsigned Imm, SmallVectorImpl< int > &ShuffleMask)
Decodes the shuffle masks for pshuflw.
static const IntrinsicData * getIntrinsicWithChain(unsigned IntNo)
SDValue peekThroughBitcasts(SDValue V)
Return the non-bitcasted source operand of V if it exists.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
unsigned Log2_64_Ceil(uint64_t Value)
Return the ceil log base 2 of the specified value, 64 if the value is zero.
MCRegister getX86SubSuperRegister(MCRegister Reg, unsigned Size, bool High=false)
@ SjLj
setjmp/longjmp based exceptions
bool isIntOrFPConstant(SDValue V)
Return true if V is either a integer or FP constant.
static void setDirectAddressInInstr(MachineInstr *MI, unsigned Operand, unsigned Reg)
Replace the address used in the instruction with the direct memory reference.
void DecodeVPERMV3Mask(ArrayRef< uint64_t > RawMask, const APInt &UndefElts, SmallVectorImpl< int > &ShuffleMask)
Decode a VPERMT2 W/D/Q/PS/PD mask from a raw array of constants.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
int bit_width(T Value)
Returns the number of bits needed to represent Value if Value is nonzero.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
void DecodeBLENDMask(unsigned NumElts, unsigned Imm, SmallVectorImpl< int > &ShuffleMask)
Decode a BLEND immediate mask into a shuffle mask.
void decodeVSHUF64x2FamilyMask(unsigned NumElts, unsigned ScalarSize, unsigned Imm, SmallVectorImpl< int > &ShuffleMask)
Decode a shuffle packed values at 128-bit granularity (SHUFF32x4/SHUFF64x2/SHUFI32x4/SHUFI64x2) immed...
void DecodeVPERMMask(unsigned NumElts, unsigned Imm, SmallVectorImpl< int > &ShuffleMask)
Decodes the shuffle masks for VPERMQ/VPERMPD.
static const MachineInstrBuilder & addFrameReference(const MachineInstrBuilder &MIB, int FI, int Offset=0, bool mem=true)
addFrameReference - This function is used to add a reference to the base of an abstract object on the...
void DecodeEXTRQIMask(unsigned NumElts, unsigned EltSize, int Len, int Idx, SmallVectorImpl< int > &ShuffleMask)
Decode a SSE4A EXTRQ instruction as a shuffle mask.
static const MachineInstrBuilder & addFullAddress(const MachineInstrBuilder &MIB, const X86AddressMode &AM)
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
static const IntrinsicData * getIntrinsicWithoutChain(unsigned IntNo)
auto unique(Range &&R, Predicate P)
bool isNullOrNullSplat(const MachineInstr &MI, const MachineRegisterInfo &MRI, bool AllowUndefs=false)
Return true if the value is a constant 0 integer or a splatted vector of a constant 0 integer (with n...
void DecodePSRLDQMask(unsigned NumElts, unsigned Imm, SmallVectorImpl< int > &ShuffleMask)
T bit_ceil(T Value)
Returns the smallest integral power of two no smaller than Value if Value is nonzero.
OutputIt copy_if(R &&Range, OutputIt Out, UnaryPredicate P)
Provide wrappers to std::copy_if which take ranges instead of having to pass begin/end explicitly.
unsigned Log2_64(uint64_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
bool isMinSignedConstant(SDValue V)
Returns true if V is a constant min signed integer value.
ConstantFPSDNode * isConstOrConstSplatFP(SDValue N, bool AllowUndefs=false)
Returns the SDNode if it is a constant splat BuildVector or constant float.
void DecodeINSERTPSMask(unsigned Imm, SmallVectorImpl< int > &ShuffleMask, bool SrcIsMem)
Decode a 128-bit INSERTPS instruction as a v4f32 shuffle mask.
void DecodeVPERM2X128Mask(unsigned NumElts, unsigned Imm, SmallVectorImpl< int > &ShuffleMask)
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
unsigned M1(unsigned Val)
void DecodeVPERMIL2PMask(unsigned NumElts, unsigned ScalarBits, unsigned M2Z, ArrayRef< uint64_t > RawMask, const APInt &UndefElts, SmallVectorImpl< int > &ShuffleMask)
Decode a VPERMIL2PD/VPERMIL2PS variable mask from a raw array of constants.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
void DecodeMOVLHPSMask(unsigned NElts, SmallVectorImpl< int > &ShuffleMask)
Decode a MOVLHPS instruction as a v2f64/v4f32 shuffle mask.
bool getShuffleDemandedElts(int SrcWidth, ArrayRef< int > Mask, const APInt &DemandedElts, APInt &DemandedLHS, APInt &DemandedRHS, bool AllowUndefElts=false)
Transform a shuffle mask's output demanded element mask into demanded element masks for the 2 operand...
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
int countl_zero(T Val)
Count number of 0's from the most significant bit to the least stopping at the first 1.
bool isBitwiseNot(SDValue V, bool AllowUndefs=false)
Returns true if V is a bitwise not operation.
auto reverse(ContainerTy &&C)
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
void getHorizDemandedEltsForFirstOperand(unsigned VectorBitWidth, const APInt &DemandedElts, APInt &DemandedLHS, APInt &DemandedRHS)
Compute the demanded elements mask of horizontal binary operations.
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
static const MachineInstrBuilder & addRegOffset(const MachineInstrBuilder &MIB, unsigned Reg, bool isKill, int Offset)
addRegOffset - This function is used to add a memory reference of the form [Reg + Offset],...
void createUnpackShuffleMask(EVT VT, SmallVectorImpl< int > &Mask, bool Lo, bool Unary)
Generate unpacklo/unpackhi shuffle mask.
constexpr uint32_t Hi_32(uint64_t Value)
Return the high 32 bits of a 64 bit value.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
void DecodeINSERTQIMask(unsigned NumElts, unsigned EltSize, int Len, int Idx, SmallVectorImpl< int > &ShuffleMask)
Decode a SSE4A INSERTQ instruction as a shuffle mask.
SDValue peekThroughOneUseBitcasts(SDValue V)
Return the non-bitcasted and one-use source operand of V if it exists.
EHPersonality classifyEHPersonality(const Value *Pers)
See if the given exception handling personality function is one that we understand.
constexpr uint32_t Lo_32(uint64_t Value)
Return the low 32 bits of a 64 bit value.
void DecodeVPERMVMask(ArrayRef< uint64_t > RawMask, const APInt &UndefElts, SmallVectorImpl< int > &ShuffleMask)
Decode a VPERM W/D/Q/PS/PD mask from a raw array of constants.
static void verifyIntrinsicTables()
AtomicOrdering
Atomic ordering for LLVM's memory model.
@ Mod
The access may modify the value stored in memory.
void createSplat2ShuffleMask(MVT VT, SmallVectorImpl< int > &Mask, bool Lo)
Similar to unpacklo/unpackhi, but without the 128-bit lane limitation imposed by AVX and specific to ...
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
bool isFuncletEHPersonality(EHPersonality Pers)
Returns true if this is a personality function that invokes handler funclets (which must return to it...
void DecodeVALIGNMask(unsigned NumElts, unsigned Imm, SmallVectorImpl< int > &ShuffleMask)
auto lower_bound(R &&Range, T &&Value)
Provide wrappers to std::lower_bound which take ranges instead of having to pass begin/end explicitly...
void narrowShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Replace each shuffle mask index with the scaled sequential indices for an equivalent mask of narrowed...
void replace(R &&Range, const T &OldValue, const T &NewValue)
Provide wrappers to std::replace which take ranges instead of having to pass begin/end explicitly.
@ UMin
Unsigned integer min implemented in terms of select(cmp()).
@ Or
Bitwise or logical OR of integers.
@ Mul
Product of integers.
@ Xor
Bitwise or logical XOR of integers.
@ SMax
Signed integer max implemented in terms of select(cmp()).
@ And
Bitwise or logical AND of integers.
@ SMin
Signed integer min implemented in terms of select(cmp()).
void DecodeScalarMoveMask(unsigned NumElts, bool IsLoad, SmallVectorImpl< int > &ShuffleMask)
Decode a scalar float move instruction as a shuffle mask.
bool isNullConstantOrUndef(SDValue V)
Returns true if V is a constant integer zero or an UNDEF node.
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
static X86AddressMode getAddressFromInstr(const MachineInstr *MI, unsigned Operand)
Compute the addressing mode from an machine instruction starting with the given operand.
void DecodeVPPERMMask(ArrayRef< uint64_t > RawMask, const APInt &UndefElts, SmallVectorImpl< int > &ShuffleMask)
Decode a VPPERM mask from a raw array of constants such as from BUILD_VECTOR.
DWARFExpression::Operation Op
void DecodePALIGNRMask(unsigned NumElts, unsigned Imm, SmallVectorImpl< int > &ShuffleMask)
void DecodeMOVSLDUPMask(unsigned NumElts, SmallVectorImpl< int > &ShuffleMask)
RoundingMode
Rounding mode.
@ TowardZero
roundTowardZero.
@ NearestTiesToEven
roundTiesToEven.
@ TowardPositive
roundTowardPositive.
@ TowardNegative
roundTowardNegative.
unsigned M0(unsigned Val)
bool isAsynchronousEHPersonality(EHPersonality Pers)
Returns true if this personality function catches asynchronous exceptions.
ConstantSDNode * isConstOrConstSplat(SDValue N, bool AllowUndefs=false, bool AllowTruncation=false)
Returns the SDNode if it is a constant splat BuildVector or constant int.
constexpr unsigned BitWidth
void DecodeUNPCKLMask(unsigned NumElts, unsigned ScalarBits, SmallVectorImpl< int > &ShuffleMask)
Decodes the shuffle masks for unpcklps/unpcklpd and punpckl*.
void DecodePSLLDQMask(unsigned NumElts, unsigned Imm, SmallVectorImpl< int > &ShuffleMask)
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
void DecodeUNPCKHMask(unsigned NumElts, unsigned ScalarBits, SmallVectorImpl< int > &ShuffleMask)
Decodes the shuffle masks for unpckhps/unpckhpd and punpckh*.
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
bool isOneConstant(SDValue V)
Returns true if V is a constant integer one.
static uint32_t extractBits(uint64_t Val, uint32_t Hi, uint32_t Lo)
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
bool isNullFPConstant(SDValue V)
Returns true if V is an FP constant with a value of positive zero.
const char * toString(DWARFSectionKind Kind)
bool all_equal(std::initializer_list< T > Values)
Returns true if all Values in the initializer lists are equal or the list.
@ INTR_TYPE_SCALAR_MASK_SAE
@ INTR_TYPE_3OP_SCALAR_MASK_SAE
@ INTR_TYPE_SCALAR_MASK_RND
void DecodePSHUFMask(unsigned NumElts, unsigned ScalarBits, unsigned Imm, SmallVectorImpl< int > &ShuffleMask)
Decodes the shuffle masks for pshufd/pshufw/vpermilpd/vpermilps.
void DecodeMOVDDUPMask(unsigned NumElts, SmallVectorImpl< int > &ShuffleMask)
void array_pod_sort(IteratorTy Start, IteratorTy End)
array_pod_sort - This sorts an array with the specified start and end extent.
void DecodeVectorBroadcast(unsigned NumElts, SmallVectorImpl< int > &ShuffleMask)
Decodes a broadcast of the first element of a vector.
void DecodeSHUFPMask(unsigned NumElts, unsigned ScalarBits, unsigned Imm, SmallVectorImpl< int > &ShuffleMask)
Decodes the shuffle masks for shufp*.
void DecodePSHUFHWMask(unsigned NumElts, unsigned Imm, SmallVectorImpl< int > &ShuffleMask)
Decodes the shuffle masks for pshufhw.
void DecodeMOVSHDUPMask(unsigned NumElts, SmallVectorImpl< int > &ShuffleMask)
bool isAllOnesConstant(SDValue V)
Returns true if V is an integer constant with all bits set.
void DecodePSHUFBMask(ArrayRef< uint64_t > RawMask, const APInt &UndefElts, SmallVectorImpl< int > &ShuffleMask)
Decode a PSHUFB mask from a raw array of constants such as from BUILD_VECTOR.
int getSplatIndex(ArrayRef< int > Mask)
If all non-negative Mask elements are the same value, return that value.
static const MachineInstrBuilder & addDirectMem(const MachineInstrBuilder &MIB, unsigned Reg)
addDirectMem - This function is used to add a direct memory reference to the current instruction – th...
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
static const fltSemantics & IEEEsingle() LLVM_READNONE
static constexpr roundingMode rmNearestTiesToEven
static constexpr roundingMode rmTowardZero
static const fltSemantics & x87DoubleExtended() LLVM_READNONE
static const fltSemantics & IEEEquad() LLVM_READNONE
static unsigned int semanticsPrecision(const fltSemantics &)
static const fltSemantics & IEEEdouble() LLVM_READNONE
static const fltSemantics & IEEEhalf() LLVM_READNONE
static const fltSemantics & BFloat() LLVM_READNONE
opStatus
IEEE-754R 7: Default exception handling.
This struct is a compact representation of a valid (non-zero power of two) alignment.
uint64_t value() const
This is a hole in the type system and should not be abused.
EVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
EVT changeTypeToInteger() const
Return the type converted to an equivalently sized integer or vector with integer element type.
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
bool bitsLT(EVT VT) const
Return true if this has less bits than VT.
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
ElementCount getVectorElementCount() const
EVT getDoubleNumVectorElementsVT(LLVMContext &Context) const
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
bool isByteSized() const
Return true if the bit size is a multiple of 8.
unsigned getVectorMinNumElements() const
Given a vector type, return the minimum number of elements it contains.
uint64_t getScalarSizeInBits() const
static EVT getEVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
TypeSize getStoreSizeInBits() const
Return the number of bits overwritten by a store of the specified value type.
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
bool is128BitVector() const
Return true if this is a 128-bit vector type.
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Returns the EVT that represents an integer with the given number of bits.
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
bool is512BitVector() const
Return true if this is a 512-bit vector type.
static EVT getFloatingPointVT(unsigned BitWidth)
Returns the EVT that represents a floating-point type with the given number of bits.
bool isVector() const
Return true if this is a vector value type.
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
bool is256BitVector() const
Return true if this is a 256-bit vector type.
Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
EVT getVectorElementType() const
Given a vector type, return the type of each element.
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
EVT changeVectorElementType(EVT EltVT) const
Return a VT for a vector type whose attributes match ourselves with the exception of the element type...
const fltSemantics & getFltSemantics() const
Returns an APFloat semantics tag appropriate for the value type.
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
EVT getHalfNumVectorElementsVT(LLVMContext &Context) const
bool isInteger() const
Return true if this is an integer or a vector integer type.
bool is64BitVector() const
Return true if this is a 64-bit vector type.
static KnownBits makeConstant(const APInt &C)
Create known bits from a known constant.
static KnownBits sadd_sat(const KnownBits &LHS, const KnownBits &RHS)
Compute knownbits resulting from llvm.sadd.sat(LHS, RHS)
static std::optional< bool > eq(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_EQ result.
KnownBits anyextOrTrunc(unsigned BitWidth) const
Return known bits for an "any" extension or truncation of the value we're tracking.
bool isNonNegative() const
Returns true if this value is known to be non-negative.
bool isZero() const
Returns true if value is all zero.
unsigned countMinTrailingZeros() const
Returns the minimum number of trailing zero bits.
bool isUnknown() const
Returns true if we don't know any bits.
unsigned countMaxTrailingZeros() const
Returns the maximum number of trailing zero bits possible.
KnownBits trunc(unsigned BitWidth) const
Return known bits for a truncation of the value we're tracking.
unsigned countMaxPopulation() const
Returns the maximum number of bits that could be one.
void setAllZero()
Make all bits known to be zero and discard any previous information.
unsigned getBitWidth() const
Get the bit width of this value.
KnownBits zext(unsigned BitWidth) const
Return known bits for a zero extension of the value we're tracking.
bool isConstant() const
Returns true if we know the value of all bits.
void resetAll()
Resets the known state of all bits.
bool isNonZero() const
Returns true if this value is known to be non-zero.
static KnownBits abdu(const KnownBits &LHS, const KnownBits &RHS)
Compute known bits for abdu(LHS, RHS).
KnownBits extractBits(unsigned NumBits, unsigned BitPosition) const
Return a subset of the known bits from [bitPosition,bitPosition+numBits).
unsigned countMaxActiveBits() const
Returns the maximum number of bits needed to represent all possible unsigned values with these known ...
KnownBits intersectWith(const KnownBits &RHS) const
Returns KnownBits information that is known to be true for both this and RHS.
KnownBits sext(unsigned BitWidth) const
Return known bits for a sign extension of the value we're tracking.
static KnownBits add(const KnownBits &LHS, const KnownBits &RHS, bool NSW=false, bool NUW=false)
Compute knownbits resulting from addition of LHS and RHS.
KnownBits zextOrTrunc(unsigned BitWidth) const
Return known bits for a zero extension or truncation of the value we're tracking.
unsigned countMinLeadingZeros() const
Returns the minimum number of leading zero bits.
APInt getMaxValue() const
Return the maximal unsigned value possible given these KnownBits.
static KnownBits computeForAddSub(bool Add, bool NSW, bool NUW, const KnownBits &LHS, const KnownBits &RHS)
Compute known bits resulting from adding LHS and RHS.
bool isNegative() const
Returns true if this value is known to be negative.
void setAllOnes()
Make all bits known to be one and discard any previous information.
static KnownBits mul(const KnownBits &LHS, const KnownBits &RHS, bool NoUndefSelfMultiply=false)
Compute known bits resulting from multiplying LHS and RHS.
static std::optional< bool > sgt(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_SGT result.
bool isAllOnes() const
Returns true if value is all one bits.
const APInt & getConstant() const
Returns the value when all bits have a known value.
This class contains a discriminated union of information about pointers in memory operands,...
bool isDereferenceable(unsigned Size, LLVMContext &C, const DataLayout &DL) const
Return true if memory region [V, V+Offset+Size) is known to be dereferenceable.
static MachinePointerInfo getConstantPool(MachineFunction &MF)
Return a MachinePointerInfo record that refers to the constant pool.
MachinePointerInfo getWithOffset(int64_t O) const
static MachinePointerInfo getGOT(MachineFunction &MF)
Return a MachinePointerInfo record that refers to a GOT entry.
static MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
These are IR-level optimization flags that may be propagated to SDNodes.
bool hasNoSignedZeros() const
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg + ScalableOffset*...
This contains information for each constraint that we are lowering.
MVT ConstraintVT
The ValueType for the operand value.
std::string ConstraintCode
This contains the actual string for the code, like "m".
This structure contains all information that is necessary for lowering calls.
CallLoweringInfo & setLibCallee(CallingConv::ID CC, Type *ResultType, SDValue Target, ArgListTy &&ArgsList)
CallLoweringInfo & setDebugLoc(const SDLoc &dl)
CallLoweringInfo & setChain(SDValue InChain)
bool isBeforeLegalizeOps() const
bool isAfterLegalizeDAG() const
void AddToWorklist(SDNode *N)
bool isCalledByLegalizer() const
bool recursivelyDeleteUnusedNodes(SDNode *N)
bool isBeforeLegalize() const
SDValue CombineTo(SDNode *N, ArrayRef< SDValue > To, bool AddTo=true)
void CommitTargetLoweringOpt(const TargetLoweringOpt &TLO)
A convenience struct that encapsulates a DAG, and two SDValues for returning information from TargetL...
bool CombineTo(SDValue O, SDValue N)
X86AddressMode - This struct holds a generalized full x86 address mode.