[llvm] 3d862c7 - [x86] combineMul - use computeKnownBits directly to find MUL_IMM constant splat. (REAPPLIED)
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Mon Oct 7 03:01:48 PDT 2024
Author: Simon Pilgrim
Date: 2024-10-07T11:01:22+01:00
New Revision: 3d862c78bbb5ecbdfe93996bdf2dcfc64325ae87
URL: https://github.com/llvm/llvm-project/commit/3d862c78bbb5ecbdfe93996bdf2dcfc64325ae87
DIFF: https://github.com/llvm/llvm-project/commit/3d862c78bbb5ecbdfe93996bdf2dcfc64325ae87.diff
LOG: [x86] combineMul - use computeKnownBits directly to find MUL_IMM constant splat. (REAPPLIED)
As we're after a constant splat value we can avoid all the complexities of trying to recreate the correct constant via getTargetConstantFromNode.
Added:
Modified:
llvm/lib/Target/X86/X86ISelLowering.cpp
llvm/test/CodeGen/X86/combine-add.ll
llvm/test/CodeGen/X86/mul-constant-i64.ll
llvm/test/CodeGen/X86/vector-mul.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index d95894a93c5ea8..73f6b51907eb35 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -48492,26 +48492,15 @@ static SDValue combineMul(SDNode *N, SelectionDAG &DAG,
(!VT.isVector() || !VT.isSimple() || !VT.isInteger()))
return SDValue();
- ConstantSDNode *CNode = isConstOrConstSplat(
- N->getOperand(1), /*AllowUndefs*/ true, /*AllowTrunc*/ false);
- const APInt *C = nullptr;
- if (!CNode) {
- if (VT.isVector())
- if (auto *RawC = getTargetConstantFromNode(N->getOperand(1)))
- if (auto *SplatC = RawC->getSplatValue())
- if (auto *SplatCI = dyn_cast<ConstantInt>(SplatC))
- C = &(SplatCI->getValue());
-
- if (!C || C->getBitWidth() != VT.getScalarSizeInBits())
- return SDValue();
- } else {
- C = &(CNode->getAPIntValue());
- }
+ KnownBits Known1 = DAG.computeKnownBits(N->getOperand(1));
+ if (!Known1.isConstant())
+ return SDValue();
- if (isPowerOf2_64(C->getZExtValue()))
+ const APInt &C = Known1.getConstant();
+ if (isPowerOf2_64(C.getZExtValue()) || C.isZero() || C.isAllOnes())
return SDValue();
- int64_t SignMulAmt = C->getSExtValue();
+ int64_t SignMulAmt = C.getSExtValue();
assert(SignMulAmt != INT64_MIN && "Int min should have been handled!");
uint64_t AbsMulAmt = SignMulAmt < 0 ? -SignMulAmt : SignMulAmt;
@@ -48570,14 +48559,10 @@ static SDValue combineMul(SDNode *N, SelectionDAG &DAG,
if (SignMulAmt < 0)
NewMul = DAG.getNegative(NewMul, DL, VT);
} else if (!Subtarget.slowLEA())
- NewMul = combineMulSpecial(C->getZExtValue(), N, DAG, VT, DL);
+ NewMul = combineMulSpecial(C.getZExtValue(), N, DAG, VT, DL);
}
if (!NewMul) {
EVT ShiftVT = VT.isVector() ? VT : MVT::i8;
- assert(C->getZExtValue() != 0 &&
- C->getZExtValue() != maxUIntN(VT.getScalarSizeInBits()) &&
- "Both cases that could cause potential overflows should have "
- "already been handled.");
if (isPowerOf2_64(AbsMulAmt - 1)) {
// (mul x, 2^N + 1) => (add (shl x, N), x)
NewMul = DAG.getNode(
diff --git a/llvm/test/CodeGen/X86/combine-add.ll b/llvm/test/CodeGen/X86/combine-add.ll
index 55d72832e7de6f..01a0320bc6b2f1 100644
--- a/llvm/test/CodeGen/X86/combine-add.ll
+++ b/llvm/test/CodeGen/X86/combine-add.ll
@@ -265,8 +265,8 @@ define void @PR52039(ptr %pa, ptr %pb) {
; AVX2: # %bb.0:
; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm0 = [10,10,10,10,10,10,10,10]
; AVX2-NEXT: vpsubd (%rdi), %ymm0, %ymm0
-; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm1 = [3,3,3,3,3,3,3,3]
-; AVX2-NEXT: vpmulld %ymm1, %ymm0, %ymm1
+; AVX2-NEXT: vpaddd %ymm0, %ymm0, %ymm1
+; AVX2-NEXT: vpaddd %ymm1, %ymm0, %ymm1
; AVX2-NEXT: vmovdqu %ymm0, (%rsi)
; AVX2-NEXT: vmovdqu %ymm1, (%rdi)
; AVX2-NEXT: vzeroupper
diff --git a/llvm/test/CodeGen/X86/mul-constant-i64.ll b/llvm/test/CodeGen/X86/mul-constant-i64.ll
index 27e32e5613bcf2..a2a1c511302ce7 100644
--- a/llvm/test/CodeGen/X86/mul-constant-i64.ll
+++ b/llvm/test/CodeGen/X86/mul-constant-i64.ll
@@ -1614,3 +1614,90 @@ define i64 @test_mul_spec(i64 %x) nounwind {
%mul3 = mul nsw i64 %add, %add2
ret i64 %mul3
}
+
+define i64 @PR111325(i64 %a0, i1 %a1) {
+; X86-LABEL: PR111325:
+; X86: # %bb.0: # %entry
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: andb $1, %cl
+; X86-NEXT: xorl %eax, %eax
+; X86-NEXT: xorl %edx, %edx
+; X86-NEXT: subl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: cmpb $1, %cl
+; X86-NEXT: sbbl %eax, %eax
+; X86-NEXT: orl %edx, %eax
+; X86-NEXT: xorl %edx, %edx
+; X86-NEXT: retl
+;
+; X86-NOOPT-LABEL: PR111325:
+; X86-NOOPT: # %bb.0: # %entry
+; X86-NOOPT-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NOOPT-NEXT: andb $1, %cl
+; X86-NOOPT-NEXT: xorl %eax, %eax
+; X86-NOOPT-NEXT: xorl %edx, %edx
+; X86-NOOPT-NEXT: subl {{[0-9]+}}(%esp), %edx
+; X86-NOOPT-NEXT: cmpb $1, %cl
+; X86-NOOPT-NEXT: sbbl %eax, %eax
+; X86-NOOPT-NEXT: orl %edx, %eax
+; X86-NOOPT-NEXT: xorl %edx, %edx
+; X86-NOOPT-NEXT: retl
+;
+; X64-HSW-LABEL: PR111325:
+; X64-HSW: # %bb.0: # %entry
+; X64-HSW-NEXT: movl $4294967295, %ecx # imm = 0xFFFFFFFF
+; X64-HSW-NEXT: movl $4294967295, %eax # imm = 0xFFFFFFFF
+; X64-HSW-NEXT: imull %edi, %eax
+; X64-HSW-NEXT: testb $1, %sil
+; X64-HSW-NEXT: cmoveq %rcx, %rax
+; X64-HSW-NEXT: retq
+;
+; X64-JAG-LABEL: PR111325:
+; X64-JAG: # %bb.0: # %entry
+; X64-JAG-NEXT: movl $4294967295, %eax # imm = 0xFFFFFFFF
+; X64-JAG-NEXT: movl $4294967295, %ecx # imm = 0xFFFFFFFF
+; X64-JAG-NEXT: imull %edi, %eax
+; X64-JAG-NEXT: testb $1, %sil
+; X64-JAG-NEXT: cmoveq %rcx, %rax
+; X64-JAG-NEXT: retq
+;
+; X64-SLM-LABEL: PR111325:
+; X64-SLM: # %bb.0: # %entry
+; X64-SLM-NEXT: movl $4294967295, %eax # imm = 0xFFFFFFFF
+; X64-SLM-NEXT: movl $4294967295, %ecx # imm = 0xFFFFFFFF
+; X64-SLM-NEXT: imull %edi, %eax
+; X64-SLM-NEXT: testb $1, %sil
+; X64-SLM-NEXT: cmoveq %rcx, %rax
+; X64-SLM-NEXT: retq
+;
+; X64-HSW-NOOPT-LABEL: PR111325:
+; X64-HSW-NOOPT: # %bb.0: # %entry
+; X64-HSW-NOOPT-NEXT: movl $4294967295, %ecx # imm = 0xFFFFFFFF
+; X64-HSW-NOOPT-NEXT: movl $4294967295, %eax # imm = 0xFFFFFFFF
+; X64-HSW-NOOPT-NEXT: imull %edi, %eax
+; X64-HSW-NOOPT-NEXT: testb $1, %sil
+; X64-HSW-NOOPT-NEXT: cmoveq %rcx, %rax
+; X64-HSW-NOOPT-NEXT: retq
+;
+; X64-JAG-NOOPT-LABEL: PR111325:
+; X64-JAG-NOOPT: # %bb.0: # %entry
+; X64-JAG-NOOPT-NEXT: movl $4294967295, %eax # imm = 0xFFFFFFFF
+; X64-JAG-NOOPT-NEXT: movl $4294967295, %ecx # imm = 0xFFFFFFFF
+; X64-JAG-NOOPT-NEXT: imull %edi, %eax
+; X64-JAG-NOOPT-NEXT: testb $1, %sil
+; X64-JAG-NOOPT-NEXT: cmoveq %rcx, %rax
+; X64-JAG-NOOPT-NEXT: retq
+;
+; X64-SLM-NOOPT-LABEL: PR111325:
+; X64-SLM-NOOPT: # %bb.0: # %entry
+; X64-SLM-NOOPT-NEXT: movl $4294967295, %eax # imm = 0xFFFFFFFF
+; X64-SLM-NOOPT-NEXT: movl $4294967295, %ecx # imm = 0xFFFFFFFF
+; X64-SLM-NOOPT-NEXT: imull %edi, %eax
+; X64-SLM-NOOPT-NEXT: testb $1, %sil
+; X64-SLM-NOOPT-NEXT: cmoveq %rcx, %rax
+; X64-SLM-NOOPT-NEXT: retq
+entry:
+ %mul = mul i64 %a0, 4294967295
+ %mask = and i64 %mul, 4294967295
+ %sel = select i1 %a1, i64 %mask, i64 4294967295
+ ret i64 %sel
+}
diff --git a/llvm/test/CodeGen/X86/vector-mul.ll b/llvm/test/CodeGen/X86/vector-mul.ll
index a3eeee59c4438c..19bbf7dc0a0e1a 100644
--- a/llvm/test/CodeGen/X86/vector-mul.ll
+++ b/llvm/test/CodeGen/X86/vector-mul.ll
@@ -335,22 +335,11 @@ define <4 x i32> @mul_v4i32_17(<4 x i32> %a0) nounwind {
; SSE-NEXT: paddd %xmm1, %xmm0
; SSE-NEXT: ret{{[l|q]}}
;
-; X64-XOP-LABEL: mul_v4i32_17:
-; X64-XOP: # %bb.0:
-; X64-XOP-NEXT: vpslld $4, %xmm0, %xmm1
-; X64-XOP-NEXT: vpaddd %xmm1, %xmm0, %xmm0
-; X64-XOP-NEXT: retq
-;
-; X64-AVX2-LABEL: mul_v4i32_17:
-; X64-AVX2: # %bb.0:
-; X64-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [17,17,17,17]
-; X64-AVX2-NEXT: vpmulld %xmm1, %xmm0, %xmm0
-; X64-AVX2-NEXT: retq
-;
-; X64-AVX512DQ-LABEL: mul_v4i32_17:
-; X64-AVX512DQ: # %bb.0:
-; X64-AVX512DQ-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
-; X64-AVX512DQ-NEXT: retq
+; X64-AVX-LABEL: mul_v4i32_17:
+; X64-AVX: # %bb.0:
+; X64-AVX-NEXT: vpslld $4, %xmm0, %xmm1
+; X64-AVX-NEXT: vpaddd %xmm1, %xmm0, %xmm0
+; X64-AVX-NEXT: retq
%1 = mul <4 x i32> %a0, <i32 17, i32 17, i32 17, i32 17>
ret <4 x i32> %1
}
@@ -471,13 +460,14 @@ define <8 x i32> @mul_v8i32_17(<8 x i32> %a0) nounwind {
;
; X64-AVX2-LABEL: mul_v8i32_17:
; X64-AVX2: # %bb.0:
-; X64-AVX2-NEXT: vpbroadcastd {{.*#+}} ymm1 = [17,17,17,17,17,17,17,17]
-; X64-AVX2-NEXT: vpmulld %ymm1, %ymm0, %ymm0
+; X64-AVX2-NEXT: vpslld $4, %ymm0, %ymm1
+; X64-AVX2-NEXT: vpaddd %ymm1, %ymm0, %ymm0
; X64-AVX2-NEXT: retq
;
; X64-AVX512DQ-LABEL: mul_v8i32_17:
; X64-AVX512DQ: # %bb.0:
-; X64-AVX512DQ-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %ymm0
+; X64-AVX512DQ-NEXT: vpslld $4, %ymm0, %ymm1
+; X64-AVX512DQ-NEXT: vpaddd %ymm1, %ymm0, %ymm0
; X64-AVX512DQ-NEXT: retq
%1 = mul <8 x i32> %a0, <i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17>
ret <8 x i32> %1
@@ -596,24 +586,13 @@ define <4 x i32> @mul_v4i32_neg33(<4 x i32> %a0) nounwind {
; SSE-NEXT: psubd %xmm1, %xmm0
; SSE-NEXT: ret{{[l|q]}}
;
-; X64-XOP-LABEL: mul_v4i32_neg33:
-; X64-XOP: # %bb.0:
-; X64-XOP-NEXT: vpslld $5, %xmm0, %xmm1
-; X64-XOP-NEXT: vpaddd %xmm1, %xmm0, %xmm0
-; X64-XOP-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; X64-XOP-NEXT: vpsubd %xmm0, %xmm1, %xmm0
-; X64-XOP-NEXT: retq
-;
-; X64-AVX2-LABEL: mul_v4i32_neg33:
-; X64-AVX2: # %bb.0:
-; X64-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [4294967263,4294967263,4294967263,4294967263]
-; X64-AVX2-NEXT: vpmulld %xmm1, %xmm0, %xmm0
-; X64-AVX2-NEXT: retq
-;
-; X64-AVX512DQ-LABEL: mul_v4i32_neg33:
-; X64-AVX512DQ: # %bb.0:
-; X64-AVX512DQ-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
-; X64-AVX512DQ-NEXT: retq
+; X64-AVX-LABEL: mul_v4i32_neg33:
+; X64-AVX: # %bb.0:
+; X64-AVX-NEXT: vpslld $5, %xmm0, %xmm1
+; X64-AVX-NEXT: vpaddd %xmm1, %xmm0, %xmm0
+; X64-AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; X64-AVX-NEXT: vpsubd %xmm0, %xmm1, %xmm0
+; X64-AVX-NEXT: retq
%1 = mul <4 x i32> %a0, <i32 -33, i32 -33, i32 -33, i32 -33>
ret <4 x i32> %1
}
@@ -768,13 +747,18 @@ define <8 x i32> @mul_v8i32_neg33(<8 x i32> %a0) nounwind {
;
; X64-AVX2-LABEL: mul_v8i32_neg33:
; X64-AVX2: # %bb.0:
-; X64-AVX2-NEXT: vpbroadcastd {{.*#+}} ymm1 = [4294967263,4294967263,4294967263,4294967263,4294967263,4294967263,4294967263,4294967263]
-; X64-AVX2-NEXT: vpmulld %ymm1, %ymm0, %ymm0
+; X64-AVX2-NEXT: vpslld $5, %ymm0, %ymm1
+; X64-AVX2-NEXT: vpaddd %ymm1, %ymm0, %ymm0
+; X64-AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; X64-AVX2-NEXT: vpsubd %ymm0, %ymm1, %ymm0
; X64-AVX2-NEXT: retq
;
; X64-AVX512DQ-LABEL: mul_v8i32_neg33:
; X64-AVX512DQ: # %bb.0:
-; X64-AVX512DQ-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %ymm0
+; X64-AVX512DQ-NEXT: vpslld $5, %ymm0, %ymm1
+; X64-AVX512DQ-NEXT: vpaddd %ymm1, %ymm0, %ymm0
+; X64-AVX512DQ-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; X64-AVX512DQ-NEXT: vpsubd %ymm0, %ymm1, %ymm0
; X64-AVX512DQ-NEXT: retq
%1 = mul <8 x i32> %a0, <i32 -33, i32 -33, i32 -33, i32 -33, i32 -33, i32 -33, i32 -33, i32 -33>
ret <8 x i32> %1
@@ -1113,22 +1097,11 @@ define <4 x i32> @mul_v4i32_7(<4 x i32> %a0) nounwind {
; SSE-NEXT: movdqa %xmm1, %xmm0
; SSE-NEXT: ret{{[l|q]}}
;
-; X64-XOP-LABEL: mul_v4i32_7:
-; X64-XOP: # %bb.0:
-; X64-XOP-NEXT: vpslld $3, %xmm0, %xmm1
-; X64-XOP-NEXT: vpsubd %xmm0, %xmm1, %xmm0
-; X64-XOP-NEXT: retq
-;
-; X64-AVX2-LABEL: mul_v4i32_7:
-; X64-AVX2: # %bb.0:
-; X64-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [7,7,7,7]
-; X64-AVX2-NEXT: vpmulld %xmm1, %xmm0, %xmm0
-; X64-AVX2-NEXT: retq
-;
-; X64-AVX512DQ-LABEL: mul_v4i32_7:
-; X64-AVX512DQ: # %bb.0:
-; X64-AVX512DQ-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
-; X64-AVX512DQ-NEXT: retq
+; X64-AVX-LABEL: mul_v4i32_7:
+; X64-AVX: # %bb.0:
+; X64-AVX-NEXT: vpslld $3, %xmm0, %xmm1
+; X64-AVX-NEXT: vpsubd %xmm0, %xmm1, %xmm0
+; X64-AVX-NEXT: retq
%1 = mul <4 x i32> %a0, <i32 7, i32 7, i32 7, i32 7>
ret <4 x i32> %1
}
@@ -1222,22 +1195,11 @@ define <4 x i32> @mul_v4i32_neg63(<4 x i32> %a0) nounwind {
; SSE-NEXT: psubd %xmm1, %xmm0
; SSE-NEXT: ret{{[l|q]}}
;
-; X64-XOP-LABEL: mul_v4i32_neg63:
-; X64-XOP: # %bb.0:
-; X64-XOP-NEXT: vpslld $6, %xmm0, %xmm1
-; X64-XOP-NEXT: vpsubd %xmm1, %xmm0, %xmm0
-; X64-XOP-NEXT: retq
-;
-; X64-AVX2-LABEL: mul_v4i32_neg63:
-; X64-AVX2: # %bb.0:
-; X64-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [4294967233,4294967233,4294967233,4294967233]
-; X64-AVX2-NEXT: vpmulld %xmm1, %xmm0, %xmm0
-; X64-AVX2-NEXT: retq
-;
-; X64-AVX512DQ-LABEL: mul_v4i32_neg63:
-; X64-AVX512DQ: # %bb.0:
-; X64-AVX512DQ-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
-; X64-AVX512DQ-NEXT: retq
+; X64-AVX-LABEL: mul_v4i32_neg63:
+; X64-AVX: # %bb.0:
+; X64-AVX-NEXT: vpslld $6, %xmm0, %xmm1
+; X64-AVX-NEXT: vpsubd %xmm1, %xmm0, %xmm0
+; X64-AVX-NEXT: retq
%1 = mul <4 x i32> %a0, <i32 -63, i32 -63, i32 -63, i32 -63>
ret <4 x i32> %1
}
More information about the llvm-commits
mailing list