[llvm] cc8a34b - [X86] Refactor movmsk(icmp_eq(and(x,c1),0)) -> movmsk(not(shl(x,c2))) fold to use KnownBits
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Thu Mar 23 10:50:04 PDT 2023
Author: Simon Pilgrim
Date: 2023-03-23T17:49:46Z
New Revision: cc8a34b11b0cff9c28780401a61d1cfb9a0f8b36
URL: https://github.com/llvm/llvm-project/commit/cc8a34b11b0cff9c28780401a61d1cfb9a0f8b36
DIFF: https://github.com/llvm/llvm-project/commit/cc8a34b11b0cff9c28780401a61d1cfb9a0f8b36.diff
LOG: [X86] Refactor movmsk(icmp_eq(and(x,c1),0)) -> movmsk(not(shl(x,c2))) fold to use KnownBits
We don't need an explicit AND mask, we can use KnownBits to determine if each element has (the same) single non-zero bit and shift that into the msb/signbit for MOVMSK to access directly.
Added:
Modified:
llvm/lib/Target/X86/X86ISelLowering.cpp
llvm/test/CodeGen/X86/vector-reduce-or-cmp.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 3a4173e44379..a87dc476a184 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -54442,25 +54442,25 @@ static SDValue combineMOVMSK(SDNode *N, SelectionDAG &DAG,
// Fold movmsk(icmp_eq(and(x,c1),0)) -> movmsk(not(shl(x,c2)))
// iff pow2splat(c1).
+ // Use KnownBits to determine if only a single bit is non-zero
+ // in each element (pow2 or zero), and shift that bit to the msb.
if (Src.getOpcode() == X86ISD::PCMPEQ &&
- Src.getOperand(0).getOpcode() == ISD::AND &&
ISD::isBuildVectorAllZeros(Src.getOperand(1).getNode())) {
- SDValue LHS = Src.getOperand(0).getOperand(0);
- SDValue RHS = Src.getOperand(0).getOperand(1);
- KnownBits KnownRHS = DAG.computeKnownBits(RHS);
- if (KnownRHS.isConstant() && KnownRHS.getConstant().isPowerOf2()) {
+ KnownBits KnownSrc = DAG.computeKnownBits(Src.getOperand(0));
+ if (KnownSrc.countMaxPopulation() == 1) {
SDLoc DL(N);
MVT ShiftVT = SrcVT;
+ SDValue ShiftSrc = Src.getOperand(0);
if (ShiftVT.getScalarType() == MVT::i8) {
// vXi8 shifts - we only care about the signbit so can use PSLLW.
ShiftVT = MVT::getVectorVT(MVT::i16, NumElts / 2);
- LHS = DAG.getBitcast(ShiftVT, LHS);
+ ShiftSrc = DAG.getBitcast(ShiftVT, ShiftSrc);
}
- unsigned ShiftAmt = KnownRHS.getConstant().countl_zero();
- LHS = getTargetVShiftByConstNode(X86ISD::VSHLI, DL, ShiftVT, LHS,
- ShiftAmt, DAG);
- LHS = DAG.getNOT(DL, DAG.getBitcast(SrcVT, LHS), SrcVT);
- return DAG.getNode(X86ISD::MOVMSK, DL, VT, LHS);
+ unsigned ShiftAmt = KnownSrc.countMinLeadingZeros();
+ ShiftSrc = getTargetVShiftByConstNode(X86ISD::VSHLI, DL, ShiftVT,
+ ShiftSrc, ShiftAmt, DAG);
+ ShiftSrc = DAG.getNOT(DL, DAG.getBitcast(SrcVT, ShiftSrc), SrcVT);
+ return DAG.getNode(X86ISD::MOVMSK, DL, VT, ShiftSrc);
}
}
diff --git a/llvm/test/CodeGen/X86/vector-reduce-or-cmp.ll b/llvm/test/CodeGen/X86/vector-reduce-or-cmp.ll
index 761ad105f75d..f22d70506815 100644
--- a/llvm/test/CodeGen/X86/vector-reduce-or-cmp.ll
+++ b/llvm/test/CodeGen/X86/vector-reduce-or-cmp.ll
@@ -863,10 +863,8 @@ define i1 @mask_v8i32(<8 x i32> %a0) {
; SSE2: # %bb.0:
; SSE2-NEXT: por %xmm1, %xmm0
; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; SSE2-NEXT: pxor %xmm1, %xmm1
-; SSE2-NEXT: pcmpeqb %xmm0, %xmm1
-; SSE2-NEXT: pmovmskb %xmm1, %eax
-; SSE2-NEXT: xorl $65535, %eax # imm = 0xFFFF
+; SSE2-NEXT: pmovmskb %xmm0, %eax
+; SSE2-NEXT: testl %eax, %eax
; SSE2-NEXT: sete %al
; SSE2-NEXT: retq
;
More information about the llvm-commits
mailing list