[llvm-commits] [llvm] r145005 - in /llvm/trunk: lib/Target/X86/X86ISelLowering.cpp lib/Target/X86/X86InstrSSE.td test/CodeGen/X86/avx2-shift.ll

Sat Nov 19 17:44:41 PST 2011

Craig, 

The code for handling splat-shift in the legalizer is there for a reason.  In some cases vector SIGN_EXTEND_INREG is legalized using shifts. During this process the shift amount is also legalized to a constant-pool load. It is too late to handle this case in the DAG combiner because it can't handle constant-pool loads. 

Nadav

-----Original Message-----
From: llvm-commits-bounces at cs.uiuc.edu [mailto:llvm-commits-bounces at cs.uiuc.edu] On Behalf Of Craig Topper
Sent: Saturday, November 19, 2011 19:12
To: llvm-commits at cs.uiuc.edu
Subject: [llvm-commits] [llvm] r145005 - in /llvm/trunk: lib/Target/X86/X86ISelLowering.cpp lib/Target/X86/X86InstrSSE.td test/CodeGen/X86/avx2-shift.ll

Author: ctopper
Date: Sat Nov 19 18:12:05 2011
New Revision: 145005

URL: http://llvm.org/viewvc/llvm-project?rev=145005&view=rev
Log:
Add code for lowering v32i8 shifts by a splat to AVX2 immediate shift instructions. Remove 256-bit splat handling from LowerShift as it was already handled by PerformShiftCombine.

Modified:
    llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
    llvm/trunk/lib/Target/X86/X86InstrSSE.td
    llvm/trunk/test/CodeGen/X86/avx2-shift.ll

Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=145005&r1=145004&r2=145005&view=diff
==============================================================================

--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Sat Nov 19 18:12:05 2011
@@ -10338,47 +10338,48 @@
         return Res;
       }
 
-      if (Subtarget->hasAVX2()) {
-        if (VT == MVT::v4i64 && Op.getOpcode() == ISD::SHL)
-         return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
-                       DAG.getConstant(Intrinsic::x86_avx2_pslli_q, MVT::i32),
-                       R, DAG.getConstant(ShiftAmt, MVT::i32));
-
-        if (VT == MVT::v8i32 && Op.getOpcode() == ISD::SHL)
-         return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
-                       DAG.getConstant(Intrinsic::x86_avx2_pslli_d, MVT::i32),
-                       R, DAG.getConstant(ShiftAmt, MVT::i32));
-
-        if (VT == MVT::v16i16 && Op.getOpcode() == ISD::SHL)
-         return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
-                       DAG.getConstant(Intrinsic::x86_avx2_pslli_w, MVT::i32),
-                       R, DAG.getConstant(ShiftAmt, MVT::i32));
-
-        if (VT == MVT::v4i64 && Op.getOpcode() == ISD::SRL)
-         return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
-                       DAG.getConstant(Intrinsic::x86_avx2_psrli_q, MVT::i32),
-                       R, DAG.getConstant(ShiftAmt, MVT::i32));
-
-        if (VT == MVT::v8i32 && Op.getOpcode() == ISD::SRL)
-         return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
-                       DAG.getConstant(Intrinsic::x86_avx2_psrli_d, MVT::i32),
-                       R, DAG.getConstant(ShiftAmt, MVT::i32));
-
-        if (VT == MVT::v16i16 && Op.getOpcode() == ISD::SRL)
-         return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
-                       DAG.getConstant(Intrinsic::x86_avx2_psrli_w, MVT::i32),
-                       R, DAG.getConstant(ShiftAmt, MVT::i32));
-
-        if (VT == MVT::v8i32 && Op.getOpcode() == ISD::SRA)
-         return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
-                       DAG.getConstant(Intrinsic::x86_avx2_psrai_d, MVT::i32),
-                       R, DAG.getConstant(ShiftAmt, MVT::i32));
-
-        if (VT == MVT::v16i16 && Op.getOpcode() == ISD::SRA)
-         return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
-                       DAG.getConstant(Intrinsic::x86_avx2_psrai_w, MVT::i32),
-                       R, DAG.getConstant(ShiftAmt, MVT::i32));
+      if (Subtarget->hasAVX2() && VT == MVT::v32i8) {
+        if (Op.getOpcode() == ISD::SHL) {
+          // Make a large shift.
+          SDValue SHL =
+            DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
+                        DAG.getConstant(Intrinsic::x86_avx2_pslli_w, MVT::i32),
+                        R, DAG.getConstant(ShiftAmt, MVT::i32));
+          // Zero out the rightmost bits.
+          SmallVector<SDValue, 32> V(32, DAG.getConstant(uint8_t(-1U << ShiftAmt),
+                                                         MVT::i8));
+          return DAG.getNode(ISD::AND, dl, VT, SHL,
+                             DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &V[0], 32));
         }
+        if (Op.getOpcode() == ISD::SRL) {
+          // Make a large shift.
+          SDValue SRL =
+            DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
+                        DAG.getConstant(Intrinsic::x86_avx2_psrli_w, MVT::i32),
+                        R, DAG.getConstant(ShiftAmt, MVT::i32));
+          // Zero out the leftmost bits.
+          SmallVector<SDValue, 32> V(32, DAG.getConstant(uint8_t(-1U) >> ShiftAmt,
+                                                         MVT::i8));
+          return DAG.getNode(ISD::AND, dl, VT, SRL,
+                             DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &V[0], 32));
+        }
+        if (Op.getOpcode() == ISD::SRA) {
+          if (ShiftAmt == 7) {
+            // R s>> 7  ===  R s< 0
+            SDValue Zeros = getZeroVector(VT, true /* HasXMMInt */, DAG, dl);
+            return DAG.getNode(X86ISD::PCMPGTB, dl, VT, Zeros, R);
+          }
+
+          // R s>> a === ((R u>> a) ^ m) - m
+          SDValue Res = DAG.getNode(ISD::SRL, dl, VT, R, Amt);
+          SmallVector<SDValue, 32> V(32, DAG.getConstant(128 >> ShiftAmt,
+                                                         MVT::i8));
+          SDValue Mask = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &V[0], 32);
+          Res = DAG.getNode(ISD::XOR, dl, VT, Res, Mask);
+          Res = DAG.getNode(ISD::SUB, dl, VT, Res, Mask);
+          return Res;
+        }
+      }
     }
   }
 

Modified: llvm/trunk/lib/Target/X86/X86InstrSSE.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrSSE.td?rev=145005&r1=145004&r2=145005&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrSSE.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrSSE.td Sat Nov 19 18:12:05 2011
@@ -311,17 +311,16 @@
 // JIT implementation, it does not expand the instructions below like
 // X86MCInstLower does.
 let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
-    isCodeGenOnly = 1, ExeDomain = SSEPackedInt in
+    isCodeGenOnly = 1, ExeDomain = SSEPackedInt in {
   def V_SETALLONES : PDI<0x76, MRMInitReg, (outs VR128:$dst), (ins), "",
                          [(set VR128:$dst, (v4i32 immAllOnesV))]>;
-let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
-    isCodeGenOnly = 1, ExeDomain = SSEPackedInt, Predicates = [HasAVX] in
+  let Predicates = [HasAVX] in
   def AVX_SETALLONES : PDI<0x76, MRMInitReg, (outs VR128:$dst), (ins), "",
                          [(set VR128:$dst, (v4i32 immAllOnesV))]>, VEX_4V;
-let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
-    isCodeGenOnly = 1, ExeDomain = SSEPackedInt, Predicates = [HasAVX2] in
+  let Predicates = [HasAVX2] in
   def AVX2_SETALLONES : PDI<0x76, MRMInitReg, (outs VR256:$dst), (ins), "",
                           [(set VR256:$dst, (v8i32 immAllOnesV))]>, VEX_4V;
+}
 
 
 //===----------------------------------------------------------------------===//

Modified: llvm/trunk/test/CodeGen/X86/avx2-shift.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx2-shift.ll?rev=145005&r1=145004&r2=145005&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx2-shift.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx2-shift.ll Sat Nov 19 18:12:05 2011
@@ -58,14 +58,14 @@
 }
 
 ; CHECK: variable_sra0
-; CHECK: psravd
+; CHECK: vpsravd
 ; CHECK: ret
 define <4 x i32> @variable_sra0(<4 x i32> %x, <4 x i32> %y) {
   %k = ashr <4 x i32> %x, %y
   ret <4 x i32> %k
 }
 ; CHECK: variable_sra1
-; CHECK: psravd
+; CHECK: vpsravd
 ; CHECK: ret
 define <8 x i32> @variable_sra1(<8 x i32> %x, <8 x i32> %y) {
   %k = ashr <8 x i32> %x, %y
@@ -127,7 +127,7 @@
 }
 
 ; CHECK: variable_sra0_load
-; CHECK: psravd (%
+; CHECK: vpsravd (%
 ; CHECK: ret
 define <4 x i32> @variable_sra0_load(<4 x i32> %x, <4 x i32>* %y) {
   %y1 = load <4 x i32>* %y
@@ -136,7 +136,7 @@
 }
 
 ; CHECK: variable_sra1_load
-; CHECK: psravd (%
+; CHECK: vpsravd (%
 ; CHECK: ret
 define <8 x i32> @variable_sra1_load(<8 x i32> %x, <8 x i32>* %y) {
   %y1 = load <8 x i32>* %y
@@ -145,7 +145,7 @@
 }
 
 ; CHECK: variable_shl0_load
-; CHECK: psllvd (%
+; CHECK: vpsllvd (%
 ; CHECK: ret
 define <4 x i32> @variable_shl0_load(<4 x i32> %x, <4 x i32>* %y) {
   %y1 = load <4 x i32>* %y
@@ -153,7 +153,7 @@
   ret <4 x i32> %k
 }
 ; CHECK: variable_shl1_load
-; CHECK: psllvd (%
+; CHECK: vpsllvd (%
 ; CHECK: ret
 define <8 x i32> @variable_shl1_load(<8 x i32> %x, <8 x i32>* %y) {
   %y1 = load <8 x i32>* %y
@@ -161,7 +161,7 @@
   ret <8 x i32> %k
 }
 ; CHECK: variable_shl2_load
-; CHECK: psllvq (%
+; CHECK: vpsllvq (%
 ; CHECK: ret
 define <2 x i64> @variable_shl2_load(<2 x i64> %x, <2 x i64>* %y) {
   %y1 = load <2 x i64>* %y
@@ -169,7 +169,7 @@
   ret <2 x i64> %k
 }
 ; CHECK: variable_shl3_load
-; CHECK: psllvq (%
+; CHECK: vpsllvq (%
 ; CHECK: ret
 define <4 x i64> @variable_shl3_load(<4 x i64> %x, <4 x i64>* %y) {
   %y1 = load <4 x i64>* %y
@@ -177,7 +177,7 @@
   ret <4 x i64> %k
 }
 ; CHECK: variable_srl0_load
-; CHECK: psrlvd (%
+; CHECK: vpsrlvd (%
 ; CHECK: ret
 define <4 x i32> @variable_srl0_load(<4 x i32> %x, <4 x i32>* %y) {
   %y1 = load <4 x i32>* %y
@@ -185,7 +185,7 @@
   ret <4 x i32> %k
 }
 ; CHECK: variable_srl1_load
-; CHECK: psrlvd (%
+; CHECK: vpsrlvd (%
 ; CHECK: ret
 define <8 x i32> @variable_srl1_load(<8 x i32> %x, <8 x i32>* %y) {
   %y1 = load <8 x i32>* %y
@@ -193,7 +193,7 @@
   ret <8 x i32> %k
 }
 ; CHECK: variable_srl2_load
-; CHECK: psrlvq (%
+; CHECK: vpsrlvq (%
 ; CHECK: ret
 define <2 x i64> @variable_srl2_load(<2 x i64> %x, <2 x i64>* %y) {
   %y1 = load <2 x i64>* %y
@@ -201,10 +201,48 @@
   ret <2 x i64> %k
 }
 ; CHECK: variable_srl3_load
-; CHECK: psrlvq (%
+; CHECK: vpsrlvq (%
 ; CHECK: ret
 define <4 x i64> @variable_srl3_load(<4 x i64> %x, <4 x i64>* %y) {
   %y1 = load <4 x i64>* %y
   %k = lshr <4 x i64> %x, %y1
   ret <4 x i64> %k
 }
+
+define <32 x i8> @shl9(<32 x i8> %A) nounwind {
+  %B = shl <32 x i8> %A, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+  ret <32 x i8> %B
+; CHECK: shl9:
+; CHECK: vpsllw $3
+; CHECK: vpand
+; CHECK: ret
+}
+
+define <32 x i8> @shr9(<32 x i8> %A) nounwind {
+  %B = lshr <32 x i8> %A, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+  ret <32 x i8> %B
+; CHECK: shr9:
+; CHECK: vpsrlw $3
+; CHECK: vpand
+; CHECK: ret
+}
+
+define <32 x i8> @sra_v32i8_7(<32 x i8> %A) nounwind {
+  %B = ashr <32 x i8> %A, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
+  ret <32 x i8> %B
+; CHECK: sra_v32i8_7:
+; CHECK: vxorps
+; CHECK: vpcmpgtb
+; CHECK: ret
+}
+
+define <32 x i8> @sra_v32i8(<32 x i8> %A) nounwind {
+  %B = ashr <32 x i8> %A, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+  ret <32 x i8> %B
+; CHECK: sra_v32i8:
+; CHECK: vpsrlw $3
+; CHECK: vpand
+; CHECK: vpxor
+; CHECK: vpsubb
+; CHECK: ret
+}


_______________________________________________
llvm-commits mailing list
llvm-commits at cs.uiuc.edu
http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits
---------------------------------------------------------------------
Intel Israel (74) Limited

This e-mail and any attachments may contain confidential material for
the sole use of the intended recipient(s). Any review or distribution
by others is strictly prohibited. If you are not the intended
recipient, please contact the sender and delete all copies.