[llvm] 93c9b39 - [X86] Fix MOVMSK(CONCAT(X, Y)) -> MOVMSK(AND/OR(X, Y)) fold for float types and demanded elements
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Fri Jan 28 03:05:17 PST 2022
Author: Simon Pilgrim
Date: 2022-01-28T11:01:47Z
New Revision: 93c9b39d25ce842a911e800f68432668403b8aca
URL: https://github.com/llvm/llvm-project/commit/93c9b39d25ce842a911e800f68432668403b8aca
DIFF: https://github.com/llvm/llvm-project/commit/93c9b39d25ce842a911e800f68432668403b8aca.diff
LOG: [X86] Fix MOVMSK(CONCAT(X,Y)) -> MOVMSK(AND/OR(X,Y)) fold for float types and demanded elements
rG9103b73fe052 was assuming that we could OR/AND with the source vector, but that will fail on float/double vectors without bitcasting - it also missed the case that any_of checks might be testing less than all the source elements
Added:
Modified:
llvm/lib/Target/X86/X86ISelLowering.cpp
llvm/test/CodeGen/X86/combine-movmsk-avx.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index aff72452af6c7..961c39832b627 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -44500,14 +44500,16 @@ static SDValue combineSetCCMOVMSK(SDValue EFLAGS, X86::CondCode &CC,
// MOVMSK(CONCAT(X,Y)) != 0 -> MOVMSK(OR(X,Y)).
// MOVMSK(CONCAT(X,Y)) == -1 -> MOVMSK(AND(X,Y)).
// MOVMSK(CONCAT(X,Y)) != -1 -> MOVMSK(AND(X,Y)).
- if (VecVT.is256BitVector()) {
+ if (VecVT.is256BitVector() && NumElts <= CmpBits) {
SmallVector<SDValue> Ops;
if (collectConcatOps(peekThroughBitcasts(Vec).getNode(), Ops) &&
Ops.size() == 2) {
SDLoc DL(EFLAGS);
- EVT SubVT = Ops[0].getValueType();
+ EVT SubVT = Ops[0].getValueType().changeTypeToInteger();
APInt CmpMask = APInt::getLowBitsSet(32, IsAnyOf ? 0 : NumElts / 2);
- SDValue V = DAG.getNode(IsAnyOf ? ISD::OR : ISD::AND, DL, SubVT, Ops);
+ SDValue V = DAG.getNode(IsAnyOf ? ISD::OR : ISD::AND, DL, SubVT,
+ DAG.getBitcast(SubVT, Ops[0]),
+ DAG.getBitcast(SubVT, Ops[1]));
V = DAG.getBitcast(VecVT.getHalfNumVectorElementsVT(), V);
return DAG.getNode(X86ISD::CMP, DL, MVT::i32,
DAG.getNode(X86ISD::MOVMSK, DL, MVT::i32, V),
diff --git a/llvm/test/CodeGen/X86/combine-movmsk-avx.ll b/llvm/test/CodeGen/X86/combine-movmsk-avx.ll
index 17d01e1d3362c..9e393a76a5b38 100644
--- a/llvm/test/CodeGen/X86/combine-movmsk-avx.ll
+++ b/llvm/test/CodeGen/X86/combine-movmsk-avx.ll
@@ -134,3 +134,36 @@ define i32 @movmskps_sext_v8i32(<8 x i16> %a0) {
%3 = tail call i32 @llvm.x86.avx.movmsk.ps.256(<8 x float> %2)
ret i32 %3
}
+
+define i32 @movmskps_concat_v4f32(<4 x float> %a0, <4 x float> %a1) {
+; CHECK-LABEL: movmskps_concat_v4f32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vorps %xmm1, %xmm0, %xmm0
+; CHECK-NEXT: vmovmskps %xmm0, %eax
+; CHECK-NEXT: negl %eax
+; CHECK-NEXT: sbbl %eax, %eax
+; CHECK-NEXT: retq
+ %1 = shufflevector <4 x float> %a0, <4 x float> %a1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+ %2 = tail call i32 @llvm.x86.avx.movmsk.ps.256(<8 x float> %1)
+ %3 = icmp ne i32 %2, 0
+ %4 = sext i1 %3 to i32
+ ret i32 %4
+}
+
+define i32 @movmskps_demanded_concat_v4f32(<4 x float> %a0, <4 x float> %a1) {
+; CHECK-LABEL: movmskps_demanded_concat_v4f32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
+; CHECK-NEXT: vmovmskps %ymm0, %eax
+; CHECK-NEXT: andl $3, %eax
+; CHECK-NEXT: negl %eax
+; CHECK-NEXT: sbbl %eax, %eax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+ %1 = shufflevector <4 x float> %a0, <4 x float> %a1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+ %2 = tail call i32 @llvm.x86.avx.movmsk.ps.256(<8 x float> %1)
+ %3 = and i32 %2, 3
+ %4 = icmp ne i32 %3, 0
+ %5 = sext i1 %4 to i32
+ ret i32 %5
+}
More information about the llvm-commits
mailing list