[llvm] 6865cff - [X86] combineMOVMSK - fold movmsk(icmp_eq(and(x,c1),c1)) -> movmsk(shl(x,c2)) iff pow2splat(c1)
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Mon Apr 3 07:11:27 PDT 2023
Author: Simon Pilgrim
Date: 2023-04-03T15:11:13+01:00
New Revision: 6865cff8ea8b07d9f2385fd92cecb422404f0f35
URL: https://github.com/llvm/llvm-project/commit/6865cff8ea8b07d9f2385fd92cecb422404f0f35
DIFF: https://github.com/llvm/llvm-project/commit/6865cff8ea8b07d9f2385fd92cecb422404f0f35.diff
LOG: [X86] combineMOVMSK - fold movmsk(icmp_eq(and(x,c1),c1)) -> movmsk(shl(x,c2)) iff pow2splat(c1)
We already have a similar fold for movmsk(icmp_eq(and(x,c1),0)) which we can probably merge this with, but it will involve generalizing a lot of the knownbits code
Added:
Modified:
llvm/lib/Target/X86/X86ISelLowering.cpp
llvm/test/CodeGen/X86/bitcast-vector-bool.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index f66a6f9c499b..13a59ab7fb0e 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -54537,6 +54537,32 @@ static SDValue combineMOVMSK(SDNode *N, SelectionDAG &DAG,
DAG.getConstant(NotMask, DL, VT));
}
+ // Fold movmsk(icmp_eq(and(x,c1),c1)) -> movmsk(shl(x,c2))
+ // iff pow2splat(c1).
+ // Use KnownBits to determine if only a single bit is non-zero
+ // in each element (pow2 or zero), and shift that bit to the msb.
+ // TODO: Merge with the movmsk(icmp_eq(and(x,c1),0)) fold below?
+ if (Src.getOpcode() == X86ISD::PCMPEQ &&
+ Src.getOperand(0).getOpcode() == ISD::AND &&
+ Src.getOperand(1) == Src.getOperand(0).getOperand(1)) {
+ KnownBits KnownSrc = DAG.computeKnownBits(Src.getOperand(1));
+ if (KnownSrc.countMaxPopulation() == 1) {
+ SDLoc DL(N);
+ MVT ShiftVT = SrcVT;
+ SDValue ShiftSrc = Src.getOperand(0);
+ if (ShiftVT.getScalarType() == MVT::i8) {
+ // vXi8 shifts - we only care about the signbit so can use PSLLW.
+ ShiftVT = MVT::getVectorVT(MVT::i16, NumElts / 2);
+ ShiftSrc = DAG.getBitcast(ShiftVT, ShiftSrc);
+ }
+ unsigned ShiftAmt = KnownSrc.countMinLeadingZeros();
+ ShiftSrc = getTargetVShiftByConstNode(X86ISD::VSHLI, DL, ShiftVT,
+ ShiftSrc, ShiftAmt, DAG);
+ ShiftSrc = DAG.getBitcast(SrcVT, ShiftSrc);
+ return DAG.getNode(X86ISD::MOVMSK, DL, VT, ShiftSrc);
+ }
+ }
+
// Fold movmsk(icmp_eq(and(x,c1),0)) -> movmsk(not(shl(x,c2)))
// iff pow2splat(c1).
// Use KnownBits to determine if only a single bit is non-zero
diff --git a/llvm/test/CodeGen/X86/bitcast-vector-bool.ll b/llvm/test/CodeGen/X86/bitcast-vector-bool.ll
index d41e783780a8..7477044c86a7 100644
--- a/llvm/test/CodeGen/X86/bitcast-vector-bool.ll
+++ b/llvm/test/CodeGen/X86/bitcast-vector-bool.ll
@@ -109,9 +109,7 @@ define i2 @bitcast_v4i32_to_v2i2(<4 x i32> %a0) nounwind {
define i1 @trunc_v4i32_cmp(<4 x i32> %a0) nounwind {
; SSE2-SSSE3-LABEL: trunc_v4i32_cmp:
; SSE2-SSSE3: # %bb.0:
-; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [1,1,1,1]
-; SSE2-SSSE3-NEXT: pand %xmm1, %xmm0
-; SSE2-SSSE3-NEXT: pcmpeqd %xmm1, %xmm0
+; SSE2-SSSE3-NEXT: pslld $31, %xmm0
; SSE2-SSSE3-NEXT: movmskps %xmm0, %eax
; SSE2-SSSE3-NEXT: xorl $15, %eax
; SSE2-SSSE3-NEXT: sete %al
@@ -263,9 +261,7 @@ define i8 @bitcast_v16i8_to_v2i8(<16 x i8> %a0) nounwind {
define i1 @trunc_v16i8_cmp(<16 x i8> %a0) nounwind {
; SSE2-SSSE3-LABEL: trunc_v16i8_cmp:
; SSE2-SSSE3: # %bb.0:
-; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
-; SSE2-SSSE3-NEXT: pand %xmm1, %xmm0
-; SSE2-SSSE3-NEXT: pcmpeqb %xmm1, %xmm0
+; SSE2-SSSE3-NEXT: psllw $7, %xmm0
; SSE2-SSSE3-NEXT: pmovmskb %xmm0, %eax
; SSE2-SSSE3-NEXT: xorl $65535, %eax # imm = 0xFFFF
; SSE2-SSSE3-NEXT: setne %al
@@ -402,9 +398,7 @@ define i1 @trunc_v8i132_cmp(<8 x i32> %a0) nounwind {
; SSE2-SSSE3-LABEL: trunc_v8i132_cmp:
; SSE2-SSSE3: # %bb.0:
; SSE2-SSSE3-NEXT: pand %xmm1, %xmm0
-; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [1,1,1,1]
-; SSE2-SSSE3-NEXT: pand %xmm1, %xmm0
-; SSE2-SSSE3-NEXT: pcmpeqd %xmm1, %xmm0
+; SSE2-SSSE3-NEXT: pslld $31, %xmm0
; SSE2-SSSE3-NEXT: movmskps %xmm0, %eax
; SSE2-SSSE3-NEXT: xorl $15, %eax
; SSE2-SSSE3-NEXT: setne %al
@@ -588,9 +582,7 @@ define i1 @trunc_v32i8_cmp(<32 x i8> %a0) nounwind {
; SSE2-SSSE3-LABEL: trunc_v32i8_cmp:
; SSE2-SSSE3: # %bb.0:
; SSE2-SSSE3-NEXT: pand %xmm1, %xmm0
-; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
-; SSE2-SSSE3-NEXT: pand %xmm1, %xmm0
-; SSE2-SSSE3-NEXT: pcmpeqb %xmm1, %xmm0
+; SSE2-SSSE3-NEXT: psllw $7, %xmm0
; SSE2-SSSE3-NEXT: pmovmskb %xmm0, %eax
; SSE2-SSSE3-NEXT: xorl $65535, %eax # imm = 0xFFFF
; SSE2-SSSE3-NEXT: sete %al
More information about the llvm-commits
mailing list