[llvm] r258622 - [X86][SSE] Remove INSERTPS dependencies from unreferenced operands.

Simon Pilgrim via llvm-commits llvm-commits at lists.llvm.org
Sat Jan 23 05:37:09 PST 2016


Author: rksimon
Date: Sat Jan 23 07:37:07 2016
New Revision: 258622

URL: http://llvm.org/viewvc/llvm-project?rev=258622&view=rev
Log:
[X86][SSE] Remove INSERTPS dependencies from unreferenced operands.

If the INSERTPS zeroes out all the referenced elements from either of the 2 input vectors (and the input is not already UNDEF), then set that input to UNDEF to reduce dependencies.

Modified:
    llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
    llvm/trunk/test/CodeGen/X86/insertps-combine.ll

Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=258622&r1=258621&r2=258622&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Sat Jan 23 07:37:07 2016
@@ -23937,9 +23937,19 @@ static SDValue PerformTargetShuffleCombi
     SDValue Op1 = N.getOperand(1);
     SDValue Op2 = N.getOperand(2);
     unsigned InsertPSMask = cast<ConstantSDNode>(Op2)->getZExtValue();
-    unsigned DstIdx = (InsertPSMask >> 4) & 3;
+    unsigned DstIdx = (InsertPSMask >> 4) & 0x3;
+    unsigned ZeroMask = InsertPSMask & 0xF;
+
+    // If we zero out all elements from Op0 then we don't need to reference it.
+    if (((ZeroMask | (1u << DstIdx)) == 0xF) && !Op0.isUndef())
+      return DAG.getNode(X86ISD::INSERTPS, DL, VT, DAG.getUNDEF(VT), Op1,
+                         DAG.getConstant(InsertPSMask, DL, MVT::i8));
+
+    // If we zero out the element from Op1 then we don't need to reference it.
+    if ((ZeroMask & (1u << DstIdx)) && !Op1.isUndef())
+      return DAG.getNode(X86ISD::INSERTPS, DL, VT, Op0, DAG.getUNDEF(VT),
+                         DAG.getConstant(InsertPSMask, DL, MVT::i8));
 
-    // Attempt to merge insertps with an inner target shuffle node.
     SmallVector<int, 8> TargetMask;
     if (!setTargetShuffleZeroElements(Op0, TargetMask))
       return SDValue();
@@ -23979,7 +23989,7 @@ static SDValue PerformTargetShuffleCombi
     }
 
     if (Updated)
-      return DAG.getNode(X86ISD::INSERTPS, DL, MVT::v4f32, Op0, Op1,
+      return DAG.getNode(X86ISD::INSERTPS, DL, VT, Op0, Op1,
                          DAG.getConstant(InsertPSMask, DL, MVT::i8));
 
     return SDValue();

Modified: llvm/trunk/test/CodeGen/X86/insertps-combine.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/insertps-combine.ll?rev=258622&r1=258621&r2=258622&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/insertps-combine.ll (original)
+++ llvm/trunk/test/CodeGen/X86/insertps-combine.ll Sat Jan 23 07:37:07 2016
@@ -98,6 +98,38 @@ define <4 x float> @shuffle_v4f32_0z6z(<
   ret <4 x float> %vecinit4
 }
 
+define <4 x float> @insertps_undef_input0(<4 x float> %a0, <4 x float> %a1) {
+; SSE-LABEL: insertps_undef_input0:
+; SSE:       # BB#0:
+; SSE-NEXT:    insertps {{.*#+}} xmm0 = zero,xmm1[0],zero,zero
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: insertps_undef_input0:
+; AVX:       # BB#0:
+; AVX-NEXT:    vinsertps {{.*#+}} xmm0 = zero,xmm1[0],zero,zero
+; AVX-NEXT:    retq
+  %res0 = fadd <4 x float> %a0, <float 1.0, float 1.0, float 1.0, float 1.0>
+  %res1 = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %res0, <4 x float> %a1, i8 21)
+  %res2 = shufflevector <4 x float> %res1, <4 x float> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
+  ret <4 x float> %res2
+}
+
+define <4 x float> @insertps_undef_input1(<4 x float> %a0, <4 x float> %a1) {
+; SSE-LABEL: insertps_undef_input1:
+; SSE:       # BB#0:
+; SSE-NEXT:    insertps {{.*#+}} xmm0 = zero,zero,zero,xmm0[3]
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: insertps_undef_input1:
+; AVX:       # BB#0:
+; AVX-NEXT:    vinsertps {{.*#+}} xmm0 = zero,zero,zero,xmm0[3]
+; AVX-NEXT:    retq
+  %res0 = fadd <4 x float> %a1, <float 1.0, float 1.0, float 1.0, float 1.0>
+  %res1 = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %a0, <4 x float> %res0, i8 21)
+  %res2 = shufflevector <4 x float> %res1, <4 x float> zeroinitializer, <4 x i32> <i32 0, i32 5, i32 2, i32 3>
+  ret <4 x float> %res2
+}
+
 define float @extract_zero_insertps_z0z7(<4 x float> %a0, <4 x float> %a1) {
 ; SSE-LABEL: extract_zero_insertps_z0z7:
 ; SSE:       # BB#0:




More information about the llvm-commits mailing list