[llvm-commits] [llvm] r163506 - in /llvm/trunk: lib/Target/X86/X86ISelLowering.cpp test/CodeGen/X86/avx2-shuffle.ll

Elena Demikhovsky elena.demikhovsky at intel.com
Mon Sep 10 05:13:12 PDT 2012


Author: delena
Date: Mon Sep 10 07:13:11 2012
New Revision: 163506

URL: http://llvm.org/viewvc/llvm-project?rev=163506&view=rev
Log:
The VPSHUFB 256-bit instruction may be generated when one of input vector is undefined or zeroinitializer.
I've added the "zeroinitializer" case in this patch.

Modified:
    llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
    llvm/trunk/test/CodeGen/X86/avx2-shuffle.ll

Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=163506&r1=163505&r2=163506&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Mon Sep 10 07:13:11 2012
@@ -6030,14 +6030,25 @@
   SDValue V1 = SVOp->getOperand(0);
   SDValue V2 = SVOp->getOperand(1);
   DebugLoc dl = SVOp->getDebugLoc();
-  ArrayRef<int> MaskVals = SVOp->getMask();
+  SmallVector<int, 32> MaskVals(SVOp->getMask().begin(), SVOp->getMask().end());
 
   bool V2IsUndef = V2.getOpcode() == ISD::UNDEF;
+  bool V1IsAllZero = ISD::isBuildVectorAllZeros(V1.getNode());
+  bool V2IsAllZero = ISD::isBuildVectorAllZeros(V2.getNode());
 
-  if (VT != MVT::v32i8 || !TLI.getSubtarget()->hasAVX2() || !V2IsUndef)
-    return SDValue();
-
-  SmallVector<SDValue,32> pshufbMask;
+  // VPSHUFB may be generated if 
+  // (1) one of input vector is undefined or zeroinitializer.
+  // The mask value 0x80 puts 0 in the corresponding slot of the vector.
+  // And (2) the mask indexes don't cross the 128-bit lane.
+  if (VT != MVT::v32i8 || !TLI.getSubtarget()->hasAVX2() ||
+      (!V2IsUndef && !V2IsAllZero && !V1IsAllZero))
+    return SDValue();
+
+  if (V1IsAllZero && !V2IsAllZero) {
+    CommuteVectorShuffleMask(MaskVals, 32);
+    V1 = V2;
+  }
+  SmallVector<SDValue, 32> pshufbMask;
   for (unsigned i = 0; i != 32; i++) {
     int EltIdx = MaskVals[i];
     if (EltIdx < 0 || EltIdx >= 32)

Modified: llvm/trunk/test/CodeGen/X86/avx2-shuffle.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx2-shuffle.ll?rev=163506&r1=163505&r2=163506&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx2-shuffle.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx2-shuffle.ll Mon Sep 10 07:13:11 2012
@@ -36,4 +36,27 @@
                                                                 i32 18, i32 19, i32 30, i32 16, i32 25, i32 23, i32 17, i32 25, 
                                                                 i32 20, i32 19, i32 31, i32 17, i32 23, i32 undef, i32 29, i32 18>
   ret <32 x i8>%S
-}
\ No newline at end of file
+}
+
+; CHECK: vpshufb1_test
+; CHECK; vpshufb {{.*\(%r.*}}, %ymm
+; CHECK: ret
+define <32 x i8> @vpshufb1_test(<32 x i8> %a) nounwind {
+  %S = shufflevector <32 x i8> %a, <32 x i8> zeroinitializer, <32 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15, 
+                                                                i32 1, i32 9, i32 36, i32 11, i32 5, i32 13, i32 7, i32 15,  
+                                                                i32 18, i32 49, i32 30, i32 16, i32 25, i32 23, i32 17, i32 25, 
+                                                                i32 20, i32 19, i32 31, i32 17, i32 23, i32 undef, i32 29, i32 18>
+  ret <32 x i8>%S
+}
+
+
+; CHECK: vpshufb2_test
+; CHECK; vpshufb {{.*\(%r.*}}, %ymm
+; CHECK: ret
+define <32 x i8> @vpshufb2_test(<32 x i8> %a) nounwind {
+  %S = shufflevector <32 x i8> zeroinitializer, <32 x i8> %a, <32 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15, 
+                                                                i32 1, i32 9, i32 36, i32 11, i32 5, i32 13, i32 7, i32 15,  
+                                                                i32 18, i32 49, i32 30, i32 16, i32 25, i32 23, i32 17, i32 25, 
+                                                                i32 20, i32 19, i32 31, i32 17, i32 23, i32 undef, i32 29, i32 18>
+  ret <32 x i8>%S
+}





More information about the llvm-commits mailing list