[llvm] r292101 - [InstCombine][SSE] Add DemandedElts support for PSHUFB instructions

Simon Pilgrim via llvm-commits llvm-commits at lists.llvm.org
Mon Jan 16 03:30:42 PST 2017


Author: rksimon
Date: Mon Jan 16 05:30:41 2017
New Revision: 292101

URL: http://llvm.org/viewvc/llvm-project?rev=292101&view=rev
Log:
[InstCombine][SSE] Add DemandedElts support for PSHUFB instructions

Simplify a pshufb shuffle mask based on the elements of the mask that are actually demanded.

Differential Revision: https://reviews.llvm.org/D28745

Modified:
    llvm/trunk/lib/Transforms/InstCombine/InstCombineCalls.cpp
    llvm/trunk/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
    llvm/trunk/test/Transforms/InstCombine/x86-pshufb.ll

Modified: llvm/trunk/lib/Transforms/InstCombine/InstCombineCalls.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/InstCombine/InstCombineCalls.cpp?rev=292101&r1=292100&r2=292101&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/InstCombine/InstCombineCalls.cpp (original)
+++ llvm/trunk/lib/Transforms/InstCombine/InstCombineCalls.cpp Mon Jan 16 05:30:41 2017
@@ -2315,10 +2315,20 @@ Instruction *InstCombiner::visitCallInst
 
   case Intrinsic::x86_ssse3_pshuf_b_128:
   case Intrinsic::x86_avx2_pshuf_b:
-  case Intrinsic::x86_avx512_pshuf_b_512:
+  case Intrinsic::x86_avx512_pshuf_b_512: {
     if (Value *V = simplifyX86pshufb(*II, *Builder))
       return replaceInstUsesWith(*II, V);
+
+    unsigned VWidth = II->getType()->getVectorNumElements();
+    APInt UndefElts(VWidth, 0);
+    APInt DemandedElts = APInt::getAllOnesValue(VWidth);
+    if (Value *V = SimplifyDemandedVectorElts(II, DemandedElts, UndefElts)) {
+      if (V != II)
+        return replaceInstUsesWith(*II, V);
+      return II;
+    }
     break;
+  }
 
   case Intrinsic::x86_avx_vpermilvar_ps:
   case Intrinsic::x86_avx_vpermilvar_ps_256:

Modified: llvm/trunk/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp?rev=292101&r1=292100&r2=292101&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp (original)
+++ llvm/trunk/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp Mon Jan 16 05:30:41 2017
@@ -1472,6 +1472,16 @@ Value *InstCombiner::SimplifyDemandedVec
       break;
     }
 
+    case Intrinsic::x86_ssse3_pshuf_b_128:
+    case Intrinsic::x86_avx2_pshuf_b:
+    case Intrinsic::x86_avx512_pshuf_b_512: {
+      Value *Op1 = II->getArgOperand(1);
+      TmpV = SimplifyDemandedVectorElts(Op1, DemandedElts, UndefElts,
+                                        Depth + 1);
+      if (TmpV) { II->setArgOperand(1, TmpV); MadeChange = true; }
+      break;
+    }
+
     // SSE4A instructions leave the upper 64-bits of the 128-bit result
     // in an undefined state.
     case Intrinsic::x86_sse4a_extrq:

Modified: llvm/trunk/test/Transforms/InstCombine/x86-pshufb.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/x86-pshufb.ll?rev=292101&r1=292100&r2=292101&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/InstCombine/x86-pshufb.ll (original)
+++ llvm/trunk/test/Transforms/InstCombine/x86-pshufb.ll Mon Jan 16 05:30:41 2017
@@ -469,15 +469,12 @@ define <64 x i8> @fold_with_allundef_elt
 }
 
 ; Demanded elts tests.
-; FIXME: Missed opportunities to pass demanded elts through the pshufb shuffle mask
 
 define <16 x i8> @demanded_elts_insertion(<16 x i8> %InVec, <16 x i8> %BaseMask, i8 %M0, i8 %M15) {
 ; CHECK-LABEL: @demanded_elts_insertion(
-; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <16 x i8> %BaseMask, i8 %M0, i32 0
-; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <16 x i8> [[TMP1]], i8 %M15, i32 15
-; CHECK-NEXT:    [[TMP3:%.*]] = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %InVec, <16 x i8> [[TMP2]])
-; CHECK-NEXT:    [[TMP4:%.*]] = shufflevector <16 x i8> [[TMP3]], <16 x i8> undef, <16 x i32> <i32 undef, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 undef>
-; CHECK-NEXT:    ret <16 x i8> [[TMP4]]
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %InVec, <16 x i8> %BaseMask)
+; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <16 x i8> [[TMP1]], <16 x i8> undef, <16 x i32> <i32 undef, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 undef>
+; CHECK-NEXT:    ret <16 x i8> [[TMP2]]
 ;
   %1 = insertelement <16 x i8> %BaseMask, i8 %M0, i32 0
   %2 = insertelement <16 x i8> %1, i8 %M15, i32 15
@@ -489,9 +486,8 @@ define <16 x i8> @demanded_elts_insertio
 define <32 x i8> @demanded_elts_insertion_avx2(<32 x i8> %InVec, <32 x i8> %BaseMask, i8 %M0, i8 %M22) {
 ; CHECK-LABEL: @demanded_elts_insertion_avx2(
 ; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <32 x i8> %BaseMask, i8 %M0, i32 0
-; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <32 x i8> [[TMP1]], i8 %M22, i32 22
-; CHECK-NEXT:    [[TMP3:%.*]] = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %InVec, <32 x i8> [[TMP2]])
-; CHECK-NEXT:    ret <32 x i8> [[TMP3]]
+; CHECK-NEXT:    [[TMP2:%.*]] = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %InVec, <32 x i8> [[TMP1]])
+; CHECK-NEXT:    ret <32 x i8> [[TMP2]]
 ;
   %1 = insertelement <32 x i8> %BaseMask, i8 %M0, i32 0
   %2 = insertelement <32 x i8> %1, i8 %M22, i32 22
@@ -502,11 +498,10 @@ define <32 x i8> @demanded_elts_insertio
 
 define <64 x i8> @demanded_elts_insertion_avx512(<64 x i8> %InVec, <64 x i8> %BaseMask, i8 %M0, i8 %M30) {
 ; CHECK-LABEL: @demanded_elts_insertion_avx512(
-; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <64 x i8> %BaseMask, i8 %M0, i32 0
-; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <64 x i8> [[TMP1]], i8 %M30, i32 30
-; CHECK-NEXT:    [[TMP3:%.*]] = tail call <64 x i8> @llvm.x86.avx512.pshuf.b.512(<64 x i8> %InVec, <64 x i8> [[TMP2]])
-; CHECK-NEXT:    [[TMP4:%.*]] = shufflevector <64 x i8> [[TMP3]], <64 x i8> undef, <64 x i32> zeroinitializer
-; CHECK-NEXT:    ret <64 x i8> [[TMP4]]
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <64 x i8> undef, i8 %M0, i32 0
+; CHECK-NEXT:    [[TMP2:%.*]] = tail call <64 x i8> @llvm.x86.avx512.pshuf.b.512(<64 x i8> %InVec, <64 x i8> [[TMP1]])
+; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <64 x i8> [[TMP2]], <64 x i8> undef, <64 x i32> zeroinitializer
+; CHECK-NEXT:    ret <64 x i8> [[TMP3]]
 ;
   %1 = insertelement <64 x i8> %BaseMask, i8 %M0, i32 0
   %2 = insertelement <64 x i8> %1, i8 %M30, i32 30




More information about the llvm-commits mailing list