[PATCH] D49092: [X86][AVX] Use Zeroable mask to improve shuffle mask widening

Simon Pilgrim via Phabricator via llvm-commits llvm-commits at lists.llvm.org
Thu Jul 12 06:09:08 PDT 2018


This revision was automatically updated to reflect the committed changes.
Closed by commit rL336900: [X86][AVX] Use Zeroable mask to improve shuffle mask widening (authored by RKSimon, committed by ).

Changed prior to commit:
  https://reviews.llvm.org/D49092?vs=154652&id=155159#toc

Repository:
  rL LLVM

https://reviews.llvm.org/D49092

Files:
  llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
  llvm/trunk/test/CodeGen/X86/avx-cast.ll


Index: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
===================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
@@ -4973,6 +4973,19 @@
   return true;
 }
 
+static bool canWidenShuffleElements(ArrayRef<int> Mask,
+                                    const APInt &Zeroable,
+                                    SmallVectorImpl<int> &WidenedMask) {
+  SmallVector<int, 32> TargetMask(Mask.begin(), Mask.end());
+  for (int i = 0, Size = TargetMask.size(); i < Size; ++i) {
+    if (TargetMask[i] == SM_SentinelUndef)
+      continue;
+    if (Zeroable[i])
+      TargetMask[i] = SM_SentinelZero;
+  }
+  return canWidenShuffleElements(TargetMask, WidenedMask);
+}
+
 static bool canWidenShuffleElements(ArrayRef<int> Mask) {
   SmallVector<int, 32> WidenedMask;
   return canWidenShuffleElements(Mask, WidenedMask);
@@ -13144,7 +13157,7 @@
     return SDValue();
 
   SmallVector<int, 4> WidenedMask;
-  if (!canWidenShuffleElements(Mask, WidenedMask))
+  if (!canWidenShuffleElements(Mask, Zeroable, WidenedMask))
     return SDValue();
 
   bool IsLowZero = (Zeroable & 0x3) == 0x3;
@@ -13213,7 +13226,8 @@
   //    [6]   - ignore
   //    [7]   - zero high half of destination
 
-  assert(WidenedMask[0] >= 0 && WidenedMask[1] >= 0 && "Undef half?");
+  assert((WidenedMask[0] >= 0 || IsLowZero) &&
+         (WidenedMask[1] >= 0 || IsHighZero) && "Undef half?");
 
   unsigned PermMask = 0;
   PermMask |= IsLowZero  ? 0x08 : (WidenedMask[0] << 0);
@@ -14326,6 +14340,7 @@
   // function lowerV2X128VectorShuffle() is better solution.
   assert(VT.is512BitVector() && "Unexpected vector size for 512bit shuffle.");
 
+  // TODO - use Zeroable like we do for lowerV2X128VectorShuffle?
   SmallVector<int, 4> WidenedMask;
   if (!canWidenShuffleElements(Mask, WidenedMask))
     return SDValue();
Index: llvm/trunk/test/CodeGen/X86/avx-cast.ll
===================================================================
--- llvm/trunk/test/CodeGen/X86/avx-cast.ll
+++ llvm/trunk/test/CodeGen/X86/avx-cast.ll
@@ -20,9 +20,7 @@
 define <4 x double> @castB(<2 x double> %m) nounwind uwtable readnone ssp {
 ; AVX-LABEL: castB:
 ; AVX:       ## %bb.0:
-; AVX-NEXT:    ## kill: def $xmm0 killed $xmm0 def $ymm0
-; AVX-NEXT:    vxorps %xmm1, %xmm1, %xmm1
-; AVX-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
+; AVX-NEXT:    vmovaps %xmm0, %xmm0
 ; AVX-NEXT:    retq
   %shuffle.i = shufflevector <2 x double> %m, <2 x double> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 2>
   ret <4 x double> %shuffle.i
@@ -33,9 +31,7 @@
 define <4 x i64> @castC(<2 x i64> %m) nounwind uwtable readnone ssp {
 ; AVX-LABEL: castC:
 ; AVX:       ## %bb.0:
-; AVX-NEXT:    ## kill: def $xmm0 killed $xmm0 def $ymm0
-; AVX-NEXT:    vxorps %xmm1, %xmm1, %xmm1
-; AVX-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
+; AVX-NEXT:    vmovaps %xmm0, %xmm0
 ; AVX-NEXT:    retq
   %shuffle.i = shufflevector <2 x i64> %m, <2 x i64> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 2>
   ret <4 x i64> %shuffle.i


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D49092.155159.patch
Type: text/x-patch
Size: 3124 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20180712/4df1ed6e/attachment.bin>


More information about the llvm-commits mailing list