[llvm] 74a98fd - Revert "[x86] combineMul - use computeKnownBits directly to find MUL_IMM constant splat."
Hans Wennborg via llvm-commits
llvm-commits at lists.llvm.org
Mon Oct 7 02:49:31 PDT 2024
Author: Hans Wennborg
Date: 2024-10-07T11:48:24+02:00
New Revision: 74a98fdbc4435a9f8cdf9ea6d1d9a0396921d3d6
URL: https://github.com/llvm/llvm-project/commit/74a98fdbc4435a9f8cdf9ea6d1d9a0396921d3d6
DIFF: https://github.com/llvm/llvm-project/commit/74a98fdbc4435a9f8cdf9ea6d1d9a0396921d3d6.diff
LOG: Revert "[x86] combineMul - use computeKnownBits directly to find MUL_IMM constant splat."
> As we're after a constant splat value we can avoid all the complexities of trying to recreate the correct constant via getTargetConstantFromNode.
This caused builds to fail with an assertion:
X86ISelLowering.cpp:48569
Assertion `C.getZExtValue() != 0 && C.getZExtValue() != maxUIntN(VT.getScalarSizeInBits())
&& "Both cases that could cause potential overflows should have " "already been handled."
See https://github.com/llvm/llvm-project/issues/111325
This reverts commit 1bc87c9f3cb20a51191f522bf4d69338ad6bb4e6.
Added:
Modified:
llvm/lib/Target/X86/X86ISelLowering.cpp
llvm/test/CodeGen/X86/combine-add.ll
llvm/test/CodeGen/X86/vector-mul.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 393915cf8795ed..d95894a93c5ea8 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -48492,15 +48492,26 @@ static SDValue combineMul(SDNode *N, SelectionDAG &DAG,
(!VT.isVector() || !VT.isSimple() || !VT.isInteger()))
return SDValue();
- KnownBits Known1 = DAG.computeKnownBits(N->getOperand(1));
- if (!Known1.isConstant())
- return SDValue();
+ ConstantSDNode *CNode = isConstOrConstSplat(
+ N->getOperand(1), /*AllowUndefs*/ true, /*AllowTrunc*/ false);
+ const APInt *C = nullptr;
+ if (!CNode) {
+ if (VT.isVector())
+ if (auto *RawC = getTargetConstantFromNode(N->getOperand(1)))
+ if (auto *SplatC = RawC->getSplatValue())
+ if (auto *SplatCI = dyn_cast<ConstantInt>(SplatC))
+ C = &(SplatCI->getValue());
+
+ if (!C || C->getBitWidth() != VT.getScalarSizeInBits())
+ return SDValue();
+ } else {
+ C = &(CNode->getAPIntValue());
+ }
- const APInt &C = Known1.getConstant();
- if (isPowerOf2_64(C.getZExtValue()))
+ if (isPowerOf2_64(C->getZExtValue()))
return SDValue();
- int64_t SignMulAmt = C.getSExtValue();
+ int64_t SignMulAmt = C->getSExtValue();
assert(SignMulAmt != INT64_MIN && "Int min should have been handled!");
uint64_t AbsMulAmt = SignMulAmt < 0 ? -SignMulAmt : SignMulAmt;
@@ -48559,12 +48570,12 @@ static SDValue combineMul(SDNode *N, SelectionDAG &DAG,
if (SignMulAmt < 0)
NewMul = DAG.getNegative(NewMul, DL, VT);
} else if (!Subtarget.slowLEA())
- NewMul = combineMulSpecial(C.getZExtValue(), N, DAG, VT, DL);
+ NewMul = combineMulSpecial(C->getZExtValue(), N, DAG, VT, DL);
}
if (!NewMul) {
EVT ShiftVT = VT.isVector() ? VT : MVT::i8;
- assert(C.getZExtValue() != 0 &&
- C.getZExtValue() != maxUIntN(VT.getScalarSizeInBits()) &&
+ assert(C->getZExtValue() != 0 &&
+ C->getZExtValue() != maxUIntN(VT.getScalarSizeInBits()) &&
"Both cases that could cause potential overflows should have "
"already been handled.");
if (isPowerOf2_64(AbsMulAmt - 1)) {
diff --git a/llvm/test/CodeGen/X86/combine-add.ll b/llvm/test/CodeGen/X86/combine-add.ll
index 01a0320bc6b2f1..55d72832e7de6f 100644
--- a/llvm/test/CodeGen/X86/combine-add.ll
+++ b/llvm/test/CodeGen/X86/combine-add.ll
@@ -265,8 +265,8 @@ define void @PR52039(ptr %pa, ptr %pb) {
; AVX2: # %bb.0:
; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm0 = [10,10,10,10,10,10,10,10]
; AVX2-NEXT: vpsubd (%rdi), %ymm0, %ymm0
-; AVX2-NEXT: vpaddd %ymm0, %ymm0, %ymm1
-; AVX2-NEXT: vpaddd %ymm1, %ymm0, %ymm1
+; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm1 = [3,3,3,3,3,3,3,3]
+; AVX2-NEXT: vpmulld %ymm1, %ymm0, %ymm1
; AVX2-NEXT: vmovdqu %ymm0, (%rsi)
; AVX2-NEXT: vmovdqu %ymm1, (%rdi)
; AVX2-NEXT: vzeroupper
diff --git a/llvm/test/CodeGen/X86/vector-mul.ll b/llvm/test/CodeGen/X86/vector-mul.ll
index 19bbf7dc0a0e1a..a3eeee59c4438c 100644
--- a/llvm/test/CodeGen/X86/vector-mul.ll
+++ b/llvm/test/CodeGen/X86/vector-mul.ll
@@ -335,11 +335,22 @@ define <4 x i32> @mul_v4i32_17(<4 x i32> %a0) nounwind {
; SSE-NEXT: paddd %xmm1, %xmm0
; SSE-NEXT: ret{{[l|q]}}
;
-; X64-AVX-LABEL: mul_v4i32_17:
-; X64-AVX: # %bb.0:
-; X64-AVX-NEXT: vpslld $4, %xmm0, %xmm1
-; X64-AVX-NEXT: vpaddd %xmm1, %xmm0, %xmm0
-; X64-AVX-NEXT: retq
+; X64-XOP-LABEL: mul_v4i32_17:
+; X64-XOP: # %bb.0:
+; X64-XOP-NEXT: vpslld $4, %xmm0, %xmm1
+; X64-XOP-NEXT: vpaddd %xmm1, %xmm0, %xmm0
+; X64-XOP-NEXT: retq
+;
+; X64-AVX2-LABEL: mul_v4i32_17:
+; X64-AVX2: # %bb.0:
+; X64-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [17,17,17,17]
+; X64-AVX2-NEXT: vpmulld %xmm1, %xmm0, %xmm0
+; X64-AVX2-NEXT: retq
+;
+; X64-AVX512DQ-LABEL: mul_v4i32_17:
+; X64-AVX512DQ: # %bb.0:
+; X64-AVX512DQ-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
+; X64-AVX512DQ-NEXT: retq
%1 = mul <4 x i32> %a0, <i32 17, i32 17, i32 17, i32 17>
ret <4 x i32> %1
}
@@ -460,14 +471,13 @@ define <8 x i32> @mul_v8i32_17(<8 x i32> %a0) nounwind {
;
; X64-AVX2-LABEL: mul_v8i32_17:
; X64-AVX2: # %bb.0:
-; X64-AVX2-NEXT: vpslld $4, %ymm0, %ymm1
-; X64-AVX2-NEXT: vpaddd %ymm1, %ymm0, %ymm0
+; X64-AVX2-NEXT: vpbroadcastd {{.*#+}} ymm1 = [17,17,17,17,17,17,17,17]
+; X64-AVX2-NEXT: vpmulld %ymm1, %ymm0, %ymm0
; X64-AVX2-NEXT: retq
;
; X64-AVX512DQ-LABEL: mul_v8i32_17:
; X64-AVX512DQ: # %bb.0:
-; X64-AVX512DQ-NEXT: vpslld $4, %ymm0, %ymm1
-; X64-AVX512DQ-NEXT: vpaddd %ymm1, %ymm0, %ymm0
+; X64-AVX512DQ-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %ymm0
; X64-AVX512DQ-NEXT: retq
%1 = mul <8 x i32> %a0, <i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17>
ret <8 x i32> %1
@@ -586,13 +596,24 @@ define <4 x i32> @mul_v4i32_neg33(<4 x i32> %a0) nounwind {
; SSE-NEXT: psubd %xmm1, %xmm0
; SSE-NEXT: ret{{[l|q]}}
;
-; X64-AVX-LABEL: mul_v4i32_neg33:
-; X64-AVX: # %bb.0:
-; X64-AVX-NEXT: vpslld $5, %xmm0, %xmm1
-; X64-AVX-NEXT: vpaddd %xmm1, %xmm0, %xmm0
-; X64-AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; X64-AVX-NEXT: vpsubd %xmm0, %xmm1, %xmm0
-; X64-AVX-NEXT: retq
+; X64-XOP-LABEL: mul_v4i32_neg33:
+; X64-XOP: # %bb.0:
+; X64-XOP-NEXT: vpslld $5, %xmm0, %xmm1
+; X64-XOP-NEXT: vpaddd %xmm1, %xmm0, %xmm0
+; X64-XOP-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; X64-XOP-NEXT: vpsubd %xmm0, %xmm1, %xmm0
+; X64-XOP-NEXT: retq
+;
+; X64-AVX2-LABEL: mul_v4i32_neg33:
+; X64-AVX2: # %bb.0:
+; X64-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [4294967263,4294967263,4294967263,4294967263]
+; X64-AVX2-NEXT: vpmulld %xmm1, %xmm0, %xmm0
+; X64-AVX2-NEXT: retq
+;
+; X64-AVX512DQ-LABEL: mul_v4i32_neg33:
+; X64-AVX512DQ: # %bb.0:
+; X64-AVX512DQ-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
+; X64-AVX512DQ-NEXT: retq
%1 = mul <4 x i32> %a0, <i32 -33, i32 -33, i32 -33, i32 -33>
ret <4 x i32> %1
}
@@ -747,18 +768,13 @@ define <8 x i32> @mul_v8i32_neg33(<8 x i32> %a0) nounwind {
;
; X64-AVX2-LABEL: mul_v8i32_neg33:
; X64-AVX2: # %bb.0:
-; X64-AVX2-NEXT: vpslld $5, %ymm0, %ymm1
-; X64-AVX2-NEXT: vpaddd %ymm1, %ymm0, %ymm0
-; X64-AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; X64-AVX2-NEXT: vpsubd %ymm0, %ymm1, %ymm0
+; X64-AVX2-NEXT: vpbroadcastd {{.*#+}} ymm1 = [4294967263,4294967263,4294967263,4294967263,4294967263,4294967263,4294967263,4294967263]
+; X64-AVX2-NEXT: vpmulld %ymm1, %ymm0, %ymm0
; X64-AVX2-NEXT: retq
;
; X64-AVX512DQ-LABEL: mul_v8i32_neg33:
; X64-AVX512DQ: # %bb.0:
-; X64-AVX512DQ-NEXT: vpslld $5, %ymm0, %ymm1
-; X64-AVX512DQ-NEXT: vpaddd %ymm1, %ymm0, %ymm0
-; X64-AVX512DQ-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; X64-AVX512DQ-NEXT: vpsubd %ymm0, %ymm1, %ymm0
+; X64-AVX512DQ-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %ymm0
; X64-AVX512DQ-NEXT: retq
%1 = mul <8 x i32> %a0, <i32 -33, i32 -33, i32 -33, i32 -33, i32 -33, i32 -33, i32 -33, i32 -33>
ret <8 x i32> %1
@@ -1097,11 +1113,22 @@ define <4 x i32> @mul_v4i32_7(<4 x i32> %a0) nounwind {
; SSE-NEXT: movdqa %xmm1, %xmm0
; SSE-NEXT: ret{{[l|q]}}
;
-; X64-AVX-LABEL: mul_v4i32_7:
-; X64-AVX: # %bb.0:
-; X64-AVX-NEXT: vpslld $3, %xmm0, %xmm1
-; X64-AVX-NEXT: vpsubd %xmm0, %xmm1, %xmm0
-; X64-AVX-NEXT: retq
+; X64-XOP-LABEL: mul_v4i32_7:
+; X64-XOP: # %bb.0:
+; X64-XOP-NEXT: vpslld $3, %xmm0, %xmm1
+; X64-XOP-NEXT: vpsubd %xmm0, %xmm1, %xmm0
+; X64-XOP-NEXT: retq
+;
+; X64-AVX2-LABEL: mul_v4i32_7:
+; X64-AVX2: # %bb.0:
+; X64-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [7,7,7,7]
+; X64-AVX2-NEXT: vpmulld %xmm1, %xmm0, %xmm0
+; X64-AVX2-NEXT: retq
+;
+; X64-AVX512DQ-LABEL: mul_v4i32_7:
+; X64-AVX512DQ: # %bb.0:
+; X64-AVX512DQ-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
+; X64-AVX512DQ-NEXT: retq
%1 = mul <4 x i32> %a0, <i32 7, i32 7, i32 7, i32 7>
ret <4 x i32> %1
}
@@ -1195,11 +1222,22 @@ define <4 x i32> @mul_v4i32_neg63(<4 x i32> %a0) nounwind {
; SSE-NEXT: psubd %xmm1, %xmm0
; SSE-NEXT: ret{{[l|q]}}
;
-; X64-AVX-LABEL: mul_v4i32_neg63:
-; X64-AVX: # %bb.0:
-; X64-AVX-NEXT: vpslld $6, %xmm0, %xmm1
-; X64-AVX-NEXT: vpsubd %xmm1, %xmm0, %xmm0
-; X64-AVX-NEXT: retq
+; X64-XOP-LABEL: mul_v4i32_neg63:
+; X64-XOP: # %bb.0:
+; X64-XOP-NEXT: vpslld $6, %xmm0, %xmm1
+; X64-XOP-NEXT: vpsubd %xmm1, %xmm0, %xmm0
+; X64-XOP-NEXT: retq
+;
+; X64-AVX2-LABEL: mul_v4i32_neg63:
+; X64-AVX2: # %bb.0:
+; X64-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [4294967233,4294967233,4294967233,4294967233]
+; X64-AVX2-NEXT: vpmulld %xmm1, %xmm0, %xmm0
+; X64-AVX2-NEXT: retq
+;
+; X64-AVX512DQ-LABEL: mul_v4i32_neg63:
+; X64-AVX512DQ: # %bb.0:
+; X64-AVX512DQ-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
+; X64-AVX512DQ-NEXT: retq
%1 = mul <4 x i32> %a0, <i32 -63, i32 -63, i32 -63, i32 -63>
ret <4 x i32> %1
}
More information about the llvm-commits
mailing list