[llvm] 7144021 - [X86][SSE1] Add support for logic+movmsk patterns (PR42870)
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Tue Mar 24 07:29:23 PDT 2020
Author: Simon Pilgrim
Date: 2020-03-24T14:28:40Z
New Revision: 714402147daabad11c0bad2e54be997b7fb06b1d
URL: https://github.com/llvm/llvm-project/commit/714402147daabad11c0bad2e54be997b7fb06b1d
DIFF: https://github.com/llvm/llvm-project/commit/714402147daabad11c0bad2e54be997b7fb06b1d.diff
LOG: [X86][SSE1] Add support for logic+movmsk patterns (PR42870)
rL368506 handled the basic case, but we need to account for boolean logic patterns as well.
Added:
Modified:
llvm/lib/Target/X86/X86ISelLowering.cpp
llvm/test/CodeGen/X86/pr42870.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 017993399c14..3828de5edd68 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -37027,6 +37027,51 @@ static bool checkBitcastSrcVectorSize(SDValue Src, unsigned Size) {
return false;
}
+// Helper to flip between AND/OR/XOR opcodes and their X86ISD FP equivalents.
+static unsigned getAltBitOpcode(unsigned Opcode) {
+ switch(Opcode) {
+ case ISD::AND: return X86ISD::FAND;
+ case ISD::OR: return X86ISD::FOR;
+ case ISD::XOR: return X86ISD::FXOR;
+ case X86ISD::ANDNP: return X86ISD::FANDN;
+ }
+ llvm_unreachable("Unknown bitwise opcode");
+}
+
+// Helper to adjust v4i32 MOVMSK expansion to work with SSE1-only targets.
+static SDValue adjustBitcastSrcVectorSSE1(SelectionDAG &DAG, SDValue Src,
+ const SDLoc &DL) {
+ EVT SrcVT = Src.getValueType();
+ if (SrcVT != MVT::v4i1)
+ return SDValue();
+
+ switch (Src.getOpcode()) {
+ case ISD::SETCC:
+ if (Src.getOperand(0).getValueType() == MVT::v4i32 &&
+ ISD::isBuildVectorAllZeros(Src.getOperand(1).getNode()) &&
+ cast<CondCodeSDNode>(Src.getOperand(2))->get() == ISD::SETLT) {
+ SDValue Op0 = Src.getOperand(0);
+ if (ISD::isNormalLoad(Op0.getNode()))
+ return DAG.getBitcast(MVT::v4f32, Op0);
+ if (Op0.getOpcode() == ISD::BITCAST &&
+ Op0.getOperand(0).getValueType() == MVT::v4f32)
+ return Op0.getOperand(0);
+ }
+ break;
+ case ISD::AND:
+ case ISD::XOR:
+ case ISD::OR: {
+ SDValue Op0 = adjustBitcastSrcVectorSSE1(DAG, Src.getOperand(0), DL);
+ SDValue Op1 = adjustBitcastSrcVectorSSE1(DAG, Src.getOperand(1), DL);
+ if (Op0 && Op1)
+ return DAG.getNode(getAltBitOpcode(Src.getOpcode()), DL, MVT::v4f32, Op0,
+ Op1);
+ break;
+ }
+ }
+ return SDValue();
+}
+
// Helper to push sign extension of vXi1 SETCC result through bitops.
static SDValue signExtendBitcastSrcVector(SelectionDAG &DAG, EVT SExtVT,
SDValue Src, const SDLoc &DL) {
@@ -37057,6 +37102,16 @@ static SDValue combineBitcastvxi1(SelectionDAG &DAG, EVT VT, SDValue Src,
if (!SrcVT.isSimple() || SrcVT.getScalarType() != MVT::i1)
return SDValue();
+ // Recognize the IR pattern for the movmsk intrinsic under SSE1 before type
+ // legalization destroys the v4i32 type.
+ if (Subtarget.hasSSE1() && !Subtarget.hasSSE2()) {
+ if (SDValue V = adjustBitcastSrcVectorSSE1(DAG, Src, DL)) {
+ V = DAG.getNode(X86ISD::MOVMSK, DL, MVT::i32,
+ DAG.getBitcast(MVT::v4f32, V));
+ return DAG.getZExtOrTrunc(V, DL, VT);
+ }
+ }
+
// If the input is a truncate from v16i8 or v32i8 go ahead and use a
// movmskb even with avx512. This will be better than truncating to vXi1 and
// using a kmov. This can especially help KNL if the input is a v16i8/v32i8
@@ -37319,24 +37374,6 @@ static SDValue combineBitcast(SDNode *N, SelectionDAG &DAG,
if (SDValue V = combineBitcastvxi1(DAG, VT, N0, dl, Subtarget))
return V;
- // Recognize the IR pattern for the movmsk intrinsic under SSE1 before type
- // legalization destroys the v4i32 type.
- if (Subtarget.hasSSE1() && !Subtarget.hasSSE2() && SrcVT == MVT::v4i1 &&
- VT.isScalarInteger() && N0.getOpcode() == ISD::SETCC &&
- N0.getOperand(0).getValueType() == MVT::v4i32 &&
- ISD::isBuildVectorAllZeros(N0.getOperand(1).getNode()) &&
- cast<CondCodeSDNode>(N0.getOperand(2))->get() == ISD::SETLT) {
- SDValue N00 = N0.getOperand(0);
- // Only do this if we can avoid scalarizing the input.
- if (ISD::isNormalLoad(N00.getNode()) ||
- (N00.getOpcode() == ISD::BITCAST &&
- N00.getOperand(0).getValueType() == MVT::v4f32)) {
- SDValue V = DAG.getNode(X86ISD::MOVMSK, dl, MVT::i32,
- DAG.getBitcast(MVT::v4f32, N00));
- return DAG.getZExtOrTrunc(V, dl, VT);
- }
- }
-
// If this is a bitcast between a MVT::v4i1/v2i1 and an illegal integer
// type, widen both sides to avoid a trip through memory.
if ((VT == MVT::v4i1 || VT == MVT::v2i1) && SrcVT.isScalarInteger() &&
diff --git a/llvm/test/CodeGen/X86/pr42870.ll b/llvm/test/CodeGen/X86/pr42870.ll
index e4ffcb4787e8..c42cb7cb8b28 100644
--- a/llvm/test/CodeGen/X86/pr42870.ll
+++ b/llvm/test/CodeGen/X86/pr42870.ll
@@ -33,26 +33,8 @@ start:
define i32 @test_and(<4 x float> %a, <4 x float> %b) {
; CHECK-LABEL: test_and:
; CHECK: ## %bb.0: ## %start
-; CHECK-NEXT: subl $28, %esp
-; CHECK-NEXT: .cfi_def_cfa_offset 32
; CHECK-NEXT: andps %xmm1, %xmm0
-; CHECK-NEXT: movaps %xmm0, (%esp)
-; CHECK-NEXT: cmpl $0, (%esp)
-; CHECK-NEXT: sets %al
-; CHECK-NEXT: cmpl $0, {{[0-9]+}}(%esp)
-; CHECK-NEXT: sets %cl
-; CHECK-NEXT: addb %cl, %cl
-; CHECK-NEXT: orb %al, %cl
-; CHECK-NEXT: cmpl $0, {{[0-9]+}}(%esp)
-; CHECK-NEXT: sets %al
-; CHECK-NEXT: cmpl $0, {{[0-9]+}}(%esp)
-; CHECK-NEXT: sets %dl
-; CHECK-NEXT: addb %dl, %dl
-; CHECK-NEXT: orb %al, %dl
-; CHECK-NEXT: shlb $2, %dl
-; CHECK-NEXT: orb %cl, %dl
-; CHECK-NEXT: movzbl %dl, %eax
-; CHECK-NEXT: addl $28, %esp
+; CHECK-NEXT: movmskps %xmm0, %eax
; CHECK-NEXT: retl
start:
%0 = bitcast <4 x float> %a to <4 x i32>
@@ -68,26 +50,8 @@ start:
define i32 @test_or(<4 x float> %a, <4 x float> %b) {
; CHECK-LABEL: test_or:
; CHECK: ## %bb.0: ## %start
-; CHECK-NEXT: subl $28, %esp
-; CHECK-NEXT: .cfi_def_cfa_offset 32
; CHECK-NEXT: orps %xmm1, %xmm0
-; CHECK-NEXT: movaps %xmm0, (%esp)
-; CHECK-NEXT: cmpl $0, (%esp)
-; CHECK-NEXT: sets %al
-; CHECK-NEXT: cmpl $0, {{[0-9]+}}(%esp)
-; CHECK-NEXT: sets %cl
-; CHECK-NEXT: addb %cl, %cl
-; CHECK-NEXT: orb %al, %cl
-; CHECK-NEXT: cmpl $0, {{[0-9]+}}(%esp)
-; CHECK-NEXT: sets %al
-; CHECK-NEXT: cmpl $0, {{[0-9]+}}(%esp)
-; CHECK-NEXT: sets %dl
-; CHECK-NEXT: addb %dl, %dl
-; CHECK-NEXT: orb %al, %dl
-; CHECK-NEXT: shlb $2, %dl
-; CHECK-NEXT: orb %cl, %dl
-; CHECK-NEXT: movzbl %dl, %eax
-; CHECK-NEXT: addl $28, %esp
+; CHECK-NEXT: movmskps %xmm0, %eax
; CHECK-NEXT: retl
start:
%0 = bitcast <4 x float> %a to <4 x i32>
@@ -103,42 +67,8 @@ start:
define i32 @test_xor(<4 x float> %a, <4 x float> %b) {
; CHECK-LABEL: test_xor:
; CHECK: ## %bb.0: ## %start
-; CHECK-NEXT: pushl %ebx
-; CHECK-NEXT: .cfi_def_cfa_offset 8
-; CHECK-NEXT: subl $40, %esp
-; CHECK-NEXT: .cfi_def_cfa_offset 48
-; CHECK-NEXT: .cfi_offset %ebx, -8
-; CHECK-NEXT: movaps %xmm0, {{[0-9]+}}(%esp)
-; CHECK-NEXT: movaps %xmm1, (%esp)
-; CHECK-NEXT: cmpl $0, {{[0-9]+}}(%esp)
-; CHECK-NEXT: sets %al
-; CHECK-NEXT: cmpl $0, {{[0-9]+}}(%esp)
-; CHECK-NEXT: sets %cl
-; CHECK-NEXT: cmpl $0, {{[0-9]+}}(%esp)
-; CHECK-NEXT: sets %dl
-; CHECK-NEXT: cmpl $0, {{[0-9]+}}(%esp)
-; CHECK-NEXT: sets %ah
-; CHECK-NEXT: cmpl $0, {{[0-9]+}}(%esp)
-; CHECK-NEXT: sets %ch
-; CHECK-NEXT: cmpl $0, {{[0-9]+}}(%esp)
-; CHECK-NEXT: sets %dh
-; CHECK-NEXT: cmpl $0, {{[0-9]+}}(%esp)
-; CHECK-NEXT: sets %bl
-; CHECK-NEXT: cmpl $0, (%esp)
-; CHECK-NEXT: sets %bh
-; CHECK-NEXT: xorb %ah, %bh
-; CHECK-NEXT: xorb %dl, %bl
-; CHECK-NEXT: addb %bl, %bl
-; CHECK-NEXT: orb %bh, %bl
-; CHECK-NEXT: xorb %cl, %dh
-; CHECK-NEXT: xorb %al, %ch
-; CHECK-NEXT: addb %ch, %ch
-; CHECK-NEXT: orb %dh, %ch
-; CHECK-NEXT: shlb $2, %ch
-; CHECK-NEXT: orb %bl, %ch
-; CHECK-NEXT: movzbl %ch, %eax
-; CHECK-NEXT: addl $40, %esp
-; CHECK-NEXT: popl %ebx
+; CHECK-NEXT: xorps %xmm1, %xmm0
+; CHECK-NEXT: movmskps %xmm0, %eax
; CHECK-NEXT: retl
start:
%0 = bitcast <4 x float> %a to <4 x i32>
More information about the llvm-commits
mailing list