[llvm] r312248 - [InstCombine] improve demanded vector elements analysis of insertelement

Sanjay Patel via llvm-commits llvm-commits at lists.llvm.org
Thu Aug 31 08:57:17 PDT 2017


Author: spatel
Date: Thu Aug 31 08:57:17 2017
New Revision: 312248

URL: http://llvm.org/viewvc/llvm-project?rev=312248&view=rev
Log:
[InstCombine] improve demanded vector elements analysis of insertelement

Recurse instead of returning on the first found optimization. Also, return early in the caller
instead of continuing because that allows another round of simplification before we might
potentially lose undef information from a shuffle mask by eliminating the shuffle.

As noted in the review, we could probably do better and be more efficient by moving all of
demanded elements into a separate pass, but this is yet another quick fix to instcombine.

Differential Revision: https://reviews.llvm.org/D37236

Modified:
    llvm/trunk/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
    llvm/trunk/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
    llvm/trunk/test/Transforms/InstCombine/X86/x86-pshufb.ll
    llvm/trunk/test/Transforms/InstCombine/vec_demanded_elts.ll

Modified: llvm/trunk/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp?rev=312248&r1=312247&r2=312248&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp (original)
+++ llvm/trunk/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp Thu Aug 31 08:57:17 2017
@@ -993,22 +993,23 @@ Value *InstCombiner::SimplifyDemandedVec
       break;
     }
 
+    // The element inserted overwrites whatever was there, so the input demanded
+    // set is simpler than the output set.
+    unsigned IdxNo = Idx->getZExtValue();
+    APInt PreInsertDemandedElts = DemandedElts;
+    if (IdxNo < VWidth)
+      PreInsertDemandedElts.clearBit(IdxNo);
+    TmpV = SimplifyDemandedVectorElts(I->getOperand(0), PreInsertDemandedElts,
+                                      UndefElts, Depth + 1);
+    if (TmpV) { I->setOperand(0, TmpV); MadeChange = true; }
+
     // If this is inserting an element that isn't demanded, remove this
     // insertelement.
-    unsigned IdxNo = Idx->getZExtValue();
     if (IdxNo >= VWidth || !DemandedElts[IdxNo]) {
       Worklist.Add(I);
       return I->getOperand(0);
     }
 
-    // Otherwise, the element inserted overwrites whatever was there, so the
-    // input demanded set is simpler than the output set.
-    APInt DemandedElts2 = DemandedElts;
-    DemandedElts2.clearBit(IdxNo);
-    TmpV = SimplifyDemandedVectorElts(I->getOperand(0), DemandedElts2,
-                                      UndefElts, Depth + 1);
-    if (TmpV) { I->setOperand(0, TmpV); MadeChange = true; }
-
     // The inserted element is defined.
     UndefElts.clearBit(IdxNo);
     break;

Modified: llvm/trunk/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/InstCombine/InstCombineVectorOps.cpp?rev=312248&r1=312247&r2=312248&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/InstCombine/InstCombineVectorOps.cpp (original)
+++ llvm/trunk/lib/Transforms/InstCombine/InstCombineVectorOps.cpp Thu Aug 31 08:57:17 2017
@@ -1165,9 +1165,7 @@ Instruction *InstCombiner::visitShuffleV
   if (Value *V = SimplifyDemandedVectorElts(&SVI, AllOnesEltMask, UndefElts)) {
     if (V != &SVI)
       return replaceInstUsesWith(SVI, V);
-    LHS = SVI.getOperand(0);
-    RHS = SVI.getOperand(1);
-    MadeChange = true;
+    return &SVI;
   }
 
   unsigned LHSWidth = LHS->getType()->getVectorNumElements();

Modified: llvm/trunk/test/Transforms/InstCombine/X86/x86-pshufb.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/X86/x86-pshufb.ll?rev=312248&r1=312247&r2=312248&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/InstCombine/X86/x86-pshufb.ll (original)
+++ llvm/trunk/test/Transforms/InstCombine/X86/x86-pshufb.ll Thu Aug 31 08:57:17 2017
@@ -485,9 +485,8 @@ define <16 x i8> @demanded_elts_insertio
 
 define <32 x i8> @demanded_elts_insertion_avx2(<32 x i8> %InVec, <32 x i8> %BaseMask, i8 %M0, i8 %M22) {
 ; CHECK-LABEL: @demanded_elts_insertion_avx2(
-; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <32 x i8> %BaseMask, i8 %M0, i32 0
-; CHECK-NEXT:    [[TMP2:%.*]] = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %InVec, <32 x i8> [[TMP1]])
-; CHECK-NEXT:    ret <32 x i8> [[TMP2]]
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %InVec, <32 x i8> %BaseMask)
+; CHECK-NEXT:    ret <32 x i8> [[TMP1]]
 ;
   %1 = insertelement <32 x i8> %BaseMask, i8 %M0, i32 0
   %2 = insertelement <32 x i8> %1, i8 %M22, i32 22

Modified: llvm/trunk/test/Transforms/InstCombine/vec_demanded_elts.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/vec_demanded_elts.ll?rev=312248&r1=312247&r2=312248&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/InstCombine/vec_demanded_elts.ll (original)
+++ llvm/trunk/test/Transforms/InstCombine/vec_demanded_elts.ll Thu Aug 31 08:57:17 2017
@@ -142,13 +142,11 @@ define <2 x i64> @PR24922(<2 x i64> %v)
   ret <2 x i64> %result
 }
 
-; FIXME: The shuffle only demands the 0th (undef) element of 'out123', so everything should fold away.
+; The shuffle only demands the 0th (undef) element of 'out123', so everything should fold away.
 
 define <4 x float> @inselt_shuf_no_demand(float %a1, float %a2, float %a3) {
 ; CHECK-LABEL: @inselt_shuf_no_demand(
-; CHECK-NEXT:    [[OUT1:%.*]] = insertelement <4 x float> undef, float %a1, i32 1
-; CHECK-NEXT:    [[OUT12:%.*]] = insertelement <4 x float> [[OUT1]], float %a2, i32 2
-; CHECK-NEXT:    ret <4 x float> [[OUT12]]
+; CHECK-NEXT:    ret <4 x float> undef
 ;
   %out1 = insertelement <4 x float> undef, float %a1, i32 1
   %out12 = insertelement <4 x float> %out1, float %a2, i32 2
@@ -157,13 +155,11 @@ define <4 x float> @inselt_shuf_no_deman
   ret <4 x float> %shuffle
 }
 
-; FIXME: The shuffle only demands the 0th (undef) element of 'out123', so everything should fold away.
+; The shuffle only demands the 0th (undef) element of 'out123', so everything should fold away.
 
 define <4 x float> @inselt_shuf_no_demand_commute(float %a1, float %a2, float %a3) {
 ; CHECK-LABEL: @inselt_shuf_no_demand_commute(
-; CHECK-NEXT:    [[OUT1:%.*]] = insertelement <4 x float> undef, float %a1, i32 1
-; CHECK-NEXT:    [[OUT12:%.*]] = insertelement <4 x float> [[OUT1]], float %a2, i32 2
-; CHECK-NEXT:    ret <4 x float> [[OUT12]]
+; CHECK-NEXT:    ret <4 x float> undef
 ;
   %out1 = insertelement <4 x float> undef, float %a1, i32 1
   %out12 = insertelement <4 x float> %out1, float %a2, i32 2
@@ -172,15 +168,14 @@ define <4 x float> @inselt_shuf_no_deman
   ret <4 x float> %shuffle
 }
 
-; FIXME: The add uses 'out012' giving it multiple uses after the shuffle is transformed to also
+; The add uses 'out012' giving it multiple uses after the shuffle is transformed to also
 ; use 'out012'. The analysis should be able to see past that.
 
 define <4 x i32> @inselt_shuf_no_demand_multiuse(i32 %a0, i32 %a1, <4 x i32> %b) {
 ; CHECK-LABEL: @inselt_shuf_no_demand_multiuse(
 ; CHECK-NEXT:    [[OUT0:%.*]] = insertelement <4 x i32> undef, i32 %a0, i32 0
 ; CHECK-NEXT:    [[OUT01:%.*]] = insertelement <4 x i32> [[OUT0]], i32 %a1, i32 1
-; CHECK-NEXT:    [[OUT012:%.*]] = insertelement <4 x i32> [[OUT01]], i32 %a0, i32 2
-; CHECK-NEXT:    [[FOO:%.*]] = add <4 x i32> [[OUT012]], %b
+; CHECK-NEXT:    [[FOO:%.*]] = add <4 x i32> [[OUT01]], %b
 ; CHECK-NEXT:    ret <4 x i32> [[FOO]]
 ;
   %out0 = insertelement <4 x i32> undef, i32 %a0, i32 0




More information about the llvm-commits mailing list