[PATCH] D65689: [X86] WIP Match the IR pattern form movmsk on SSE1 only targets where v4i32 isn't legal
Phabricator via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Sat Aug 10 00:52:10 PDT 2019
This revision was not accepted when it landed; it landed in state "Needs Review".
This revision was automatically updated to reflect the committed changes.
Closed by commit rL368506: [X86] Match the IR pattern form movmsk on SSE1 only targets where v4i32 isn't… (authored by ctopper, committed by ).
Changed prior to commit:
https://reviews.llvm.org/D65689?vs=213155&id=214510#toc
Repository:
rL LLVM
CHANGES SINCE LAST ACTION
https://reviews.llvm.org/D65689/new/
https://reviews.llvm.org/D65689
Files:
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
llvm/trunk/test/CodeGen/X86/pr42870.ll
Index: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
===================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
@@ -34820,6 +34820,24 @@
if (SDValue V = combineBitcastvxi1(DAG, VT, N0, dl, Subtarget))
return V;
+ // Recognize the IR pattern for the movmsk intrinsic under SSE1 befoer type
+ // legalization destroys the v4i32 type.
+ if (Subtarget.hasSSE1() && !Subtarget.hasSSE2() && SrcVT == MVT::v4i1 &&
+ VT.isScalarInteger() && N0.getOpcode() == ISD::SETCC &&
+ N0.getOperand(0).getValueType() == MVT::v4i32 &&
+ ISD::isBuildVectorAllZeros(N0.getOperand(1).getNode()) &&
+ cast<CondCodeSDNode>(N0.getOperand(2))->get() == ISD::SETLT) {
+ SDValue N00 = N0.getOperand(0);
+ // Only do this if we can avoid scalarizing the input.
+ if (ISD::isNormalLoad(N00.getNode()) ||
+ (N00.getOpcode() == ISD::BITCAST &&
+ N00.getOperand(0).getValueType() == MVT::v4f32)) {
+ SDValue V = DAG.getNode(X86ISD::MOVMSK, dl, MVT::i32,
+ DAG.getBitcast(MVT::v4f32, N00));
+ return DAG.getZExtOrTrunc(V, dl, VT);
+ }
+ }
+
// If this is a bitcast between a MVT::v4i1/v2i1 and an illegal integer
// type, widen both sides to avoid a trip through memory.
if ((VT == MVT::v4i1 || VT == MVT::v2i1) && SrcVT.isScalarInteger() &&
@@ -41775,7 +41793,8 @@
}
static SDValue combineMOVMSK(SDNode *N, SelectionDAG &DAG,
- TargetLowering::DAGCombinerInfo &DCI) {
+ TargetLowering::DAGCombinerInfo &DCI,
+ const X86Subtarget &Subtarget) {
SDValue Src = N->getOperand(0);
MVT SrcVT = Src.getSimpleValueType();
MVT VT = N->getSimpleValueType(0);
@@ -41796,7 +41815,7 @@
// Look through int->fp bitcasts that don't change the element width.
unsigned EltWidth = SrcVT.getScalarSizeInBits();
- if (Src.getOpcode() == ISD::BITCAST &&
+ if (Subtarget.hasSSE2() && Src.getOpcode() == ISD::BITCAST &&
Src.getOperand(0).getScalarValueSizeInBits() == EltWidth)
return DAG.getNode(X86ISD::MOVMSK, SDLoc(N), VT, Src.getOperand(0));
@@ -43759,7 +43778,7 @@
case X86ISD::FMSUBADD_RND:
case X86ISD::FMADDSUB:
case X86ISD::FMSUBADD: return combineFMADDSUB(N, DAG, Subtarget);
- case X86ISD::MOVMSK: return combineMOVMSK(N, DAG, DCI);
+ case X86ISD::MOVMSK: return combineMOVMSK(N, DAG, DCI, Subtarget);
case X86ISD::MGATHER:
case X86ISD::MSCATTER:
case ISD::MGATHER:
Index: llvm/trunk/test/CodeGen/X86/pr42870.ll
===================================================================
--- llvm/trunk/test/CodeGen/X86/pr42870.ll
+++ llvm/trunk/test/CodeGen/X86/pr42870.ll
@@ -0,0 +1,31 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=sse | FileCheck %s
+
+define i32 @foo(<4 x float>* %a) {
+; CHECK-LABEL: foo:
+; CHECK: ## %bb.0: ## %start
+; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
+; CHECK-NEXT: movaps (%eax), %xmm0
+; CHECK-NEXT: movmskps %xmm0, %eax
+; CHECK-NEXT: retl
+start:
+ %0 = bitcast <4 x float>* %a to <4 x i32>*
+ %1 = load <4 x i32>, <4 x i32>* %0, align 16
+ %2 = icmp slt <4 x i32> %1, zeroinitializer
+ %3 = bitcast <4 x i1> %2 to i4
+ %4 = zext i4 %3 to i32
+ ret i32 %4
+}
+
+define i32 @bar(<4 x float> %a) {
+; CHECK-LABEL: bar:
+; CHECK: ## %bb.0: ## %start
+; CHECK-NEXT: movmskps %xmm0, %eax
+; CHECK-NEXT: retl
+start:
+ %0 = bitcast <4 x float> %a to <4 x i32>
+ %1 = icmp slt <4 x i32> %0, zeroinitializer
+ %2 = bitcast <4 x i1> %1 to i4
+ %3 = zext i4 %2 to i32
+ ret i32 %3
+}
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D65689.214510.patch
Type: text/x-patch
Size: 3803 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20190810/9a2f9fdd/attachment.bin>
More information about the llvm-commits
mailing list