[PATCH] D28745: [InstCombine][SSE] Add DemandedElts support for PSHUFB instructions

Simon Pilgrim via Phabricator via llvm-commits llvm-commits at lists.llvm.org
Mon Jan 16 03:41:51 PST 2017


This revision was automatically updated to reflect the committed changes.
Closed by commit rL292101: [InstCombine][SSE] Add DemandedElts support for PSHUFB instructions (authored by RKSimon).

Changed prior to commit:
  https://reviews.llvm.org/D28745?vs=84496&id=84542#toc

Repository:
  rL LLVM

https://reviews.llvm.org/D28745

Files:
  llvm/trunk/lib/Transforms/InstCombine/InstCombineCalls.cpp
  llvm/trunk/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
  llvm/trunk/test/Transforms/InstCombine/x86-pshufb.ll


Index: llvm/trunk/test/Transforms/InstCombine/x86-pshufb.ll
===================================================================
--- llvm/trunk/test/Transforms/InstCombine/x86-pshufb.ll
+++ llvm/trunk/test/Transforms/InstCombine/x86-pshufb.ll
@@ -469,15 +469,12 @@
 }
 
 ; Demanded elts tests.
-; FIXME: Missed opportunities to pass demanded elts through the pshufb shuffle mask
 
 define <16 x i8> @demanded_elts_insertion(<16 x i8> %InVec, <16 x i8> %BaseMask, i8 %M0, i8 %M15) {
 ; CHECK-LABEL: @demanded_elts_insertion(
-; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <16 x i8> %BaseMask, i8 %M0, i32 0
-; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <16 x i8> [[TMP1]], i8 %M15, i32 15
-; CHECK-NEXT:    [[TMP3:%.*]] = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %InVec, <16 x i8> [[TMP2]])
-; CHECK-NEXT:    [[TMP4:%.*]] = shufflevector <16 x i8> [[TMP3]], <16 x i8> undef, <16 x i32> <i32 undef, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 undef>
-; CHECK-NEXT:    ret <16 x i8> [[TMP4]]
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %InVec, <16 x i8> %BaseMask)
+; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <16 x i8> [[TMP1]], <16 x i8> undef, <16 x i32> <i32 undef, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 undef>
+; CHECK-NEXT:    ret <16 x i8> [[TMP2]]
 ;
   %1 = insertelement <16 x i8> %BaseMask, i8 %M0, i32 0
   %2 = insertelement <16 x i8> %1, i8 %M15, i32 15
@@ -489,9 +486,8 @@
 define <32 x i8> @demanded_elts_insertion_avx2(<32 x i8> %InVec, <32 x i8> %BaseMask, i8 %M0, i8 %M22) {
 ; CHECK-LABEL: @demanded_elts_insertion_avx2(
 ; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <32 x i8> %BaseMask, i8 %M0, i32 0
-; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <32 x i8> [[TMP1]], i8 %M22, i32 22
-; CHECK-NEXT:    [[TMP3:%.*]] = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %InVec, <32 x i8> [[TMP2]])
-; CHECK-NEXT:    ret <32 x i8> [[TMP3]]
+; CHECK-NEXT:    [[TMP2:%.*]] = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %InVec, <32 x i8> [[TMP1]])
+; CHECK-NEXT:    ret <32 x i8> [[TMP2]]
 ;
   %1 = insertelement <32 x i8> %BaseMask, i8 %M0, i32 0
   %2 = insertelement <32 x i8> %1, i8 %M22, i32 22
@@ -502,11 +498,10 @@
 
 define <64 x i8> @demanded_elts_insertion_avx512(<64 x i8> %InVec, <64 x i8> %BaseMask, i8 %M0, i8 %M30) {
 ; CHECK-LABEL: @demanded_elts_insertion_avx512(
-; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <64 x i8> %BaseMask, i8 %M0, i32 0
-; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <64 x i8> [[TMP1]], i8 %M30, i32 30
-; CHECK-NEXT:    [[TMP3:%.*]] = tail call <64 x i8> @llvm.x86.avx512.pshuf.b.512(<64 x i8> %InVec, <64 x i8> [[TMP2]])
-; CHECK-NEXT:    [[TMP4:%.*]] = shufflevector <64 x i8> [[TMP3]], <64 x i8> undef, <64 x i32> zeroinitializer
-; CHECK-NEXT:    ret <64 x i8> [[TMP4]]
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <64 x i8> undef, i8 %M0, i32 0
+; CHECK-NEXT:    [[TMP2:%.*]] = tail call <64 x i8> @llvm.x86.avx512.pshuf.b.512(<64 x i8> %InVec, <64 x i8> [[TMP1]])
+; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <64 x i8> [[TMP2]], <64 x i8> undef, <64 x i32> zeroinitializer
+; CHECK-NEXT:    ret <64 x i8> [[TMP3]]
 ;
   %1 = insertelement <64 x i8> %BaseMask, i8 %M0, i32 0
   %2 = insertelement <64 x i8> %1, i8 %M30, i32 30
Index: llvm/trunk/lib/Transforms/InstCombine/InstCombineCalls.cpp
===================================================================
--- llvm/trunk/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ llvm/trunk/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -2315,10 +2315,20 @@
 
   case Intrinsic::x86_ssse3_pshuf_b_128:
   case Intrinsic::x86_avx2_pshuf_b:
-  case Intrinsic::x86_avx512_pshuf_b_512:
+  case Intrinsic::x86_avx512_pshuf_b_512: {
     if (Value *V = simplifyX86pshufb(*II, *Builder))
       return replaceInstUsesWith(*II, V);
+
+    unsigned VWidth = II->getType()->getVectorNumElements();
+    APInt UndefElts(VWidth, 0);
+    APInt DemandedElts = APInt::getAllOnesValue(VWidth);
+    if (Value *V = SimplifyDemandedVectorElts(II, DemandedElts, UndefElts)) {
+      if (V != II)
+        return replaceInstUsesWith(*II, V);
+      return II;
+    }
     break;
+  }
 
   case Intrinsic::x86_avx_vpermilvar_ps:
   case Intrinsic::x86_avx_vpermilvar_ps_256:
Index: llvm/trunk/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
===================================================================
--- llvm/trunk/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
+++ llvm/trunk/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
@@ -1472,6 +1472,16 @@
       break;
     }
 
+    case Intrinsic::x86_ssse3_pshuf_b_128:
+    case Intrinsic::x86_avx2_pshuf_b:
+    case Intrinsic::x86_avx512_pshuf_b_512: {
+      Value *Op1 = II->getArgOperand(1);
+      TmpV = SimplifyDemandedVectorElts(Op1, DemandedElts, UndefElts,
+                                        Depth + 1);
+      if (TmpV) { II->setArgOperand(1, TmpV); MadeChange = true; }
+      break;
+    }
+
     // SSE4A instructions leave the upper 64-bits of the 128-bit result
     // in an undefined state.
     case Intrinsic::x86_sse4a_extrq:


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D28745.84542.patch
Type: text/x-patch
Size: 5252 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20170116/a8417ce8/attachment.bin>


More information about the llvm-commits mailing list