[llvm-commits] [llvm] r145005 - in /llvm/trunk: lib/Target/X86/X86ISelLowering.cpp lib/Target/X86/X86InstrSSE.td test/CodeGen/X86/avx2-shift.ll

Sat Nov 19 22:04:05 PST 2011

Ok thanks. I'll restore the code. Can you tell me how to write a test case?

Sent from my iPhone

On Nov 19, 2011, at 5:44 PM, "Rotem, Nadav" <nadav.rotem at intel.com> wrote:

> Craig, 
> 
> The code for handling splat-shift in the legalizer is there for a reason.  In some cases vector SIGN_EXTEND_INREG is legalized using shifts. During this process the shift amount is also legalized to a constant-pool load. It is too late to handle this case in the DAG combiner because it can't handle constant-pool loads. 
> 
> Nadav
> 
> -----Original Message-----
> From: llvm-commits-bounces at cs.uiuc.edu [mailto:llvm-commits-bounces at cs.uiuc.edu] On Behalf Of Craig Topper
> Sent: Saturday, November 19, 2011 19:12
> To: llvm-commits at cs.uiuc.edu
> Subject: [llvm-commits] [llvm] r145005 - in /llvm/trunk: lib/Target/X86/X86ISelLowering.cpp lib/Target/X86/X86InstrSSE.td test/CodeGen/X86/avx2-shift.ll
> 
> Author: ctopper
> Date: Sat Nov 19 18:12:05 2011
> New Revision: 145005
> 
> URL: http://llvm.org/viewvc/llvm-project?rev=145005&view=rev
> Log:
> Add code for lowering v32i8 shifts by a splat to AVX2 immediate shift instructions. Remove 256-bit splat handling from LowerShift as it was already handled by PerformShiftCombine.
> 
> Modified:
>    llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
>    llvm/trunk/lib/Target/X86/X86InstrSSE.td
>    llvm/trunk/test/CodeGen/X86/avx2-shift.ll
> 
> Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=145005&r1=145004&r2=145005&view=diff
> ==============================================================================
> --- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
> +++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Sat Nov 19 18:12:05 2011
> @@ -10338,47 +10338,48 @@
>         return Res;
>       }
> 
> -      if (Subtarget->hasAVX2()) {
> -        if (VT == MVT::v4i64 && Op.getOpcode() == ISD::SHL)
> -         return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
> -                       DAG.getConstant(Intrinsic::x86_avx2_pslli_q, MVT::i32),
> -                       R, DAG.getConstant(ShiftAmt, MVT::i32));
> -
> -        if (VT == MVT::v8i32 && Op.getOpcode() == ISD::SHL)
> -         return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
> -                       DAG.getConstant(Intrinsic::x86_avx2_pslli_d, MVT::i32),
> -                       R, DAG.getConstant(ShiftAmt, MVT::i32));
> -
> -        if (VT == MVT::v16i16 && Op.getOpcode() == ISD::SHL)
> -         return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
> -                       DAG.getConstant(Intrinsic::x86_avx2_pslli_w, MVT::i32),
> -                       R, DAG.getConstant(ShiftAmt, MVT::i32));
> -
> -        if (VT == MVT::v4i64 && Op.getOpcode() == ISD::SRL)
> -         return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
> -                       DAG.getConstant(Intrinsic::x86_avx2_psrli_q, MVT::i32),
> -                       R, DAG.getConstant(ShiftAmt, MVT::i32));
> -
> -        if (VT == MVT::v8i32 && Op.getOpcode() == ISD::SRL)
> -         return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
> -                       DAG.getConstant(Intrinsic::x86_avx2_psrli_d, MVT::i32),
> -                       R, DAG.getConstant(ShiftAmt, MVT::i32));
> -
> -        if (VT == MVT::v16i16 && Op.getOpcode() == ISD::SRL)
> -         return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
> -                       DAG.getConstant(Intrinsic::x86_avx2_psrli_w, MVT::i32),
> -                       R, DAG.getConstant(ShiftAmt, MVT::i32));
> -
> -        if (VT == MVT::v8i32 && Op.getOpcode() == ISD::SRA)
> -         return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
> -                       DAG.getConstant(Intrinsic::x86_avx2_psrai_d, MVT::i32),
> -                       R, DAG.getConstant(ShiftAmt, MVT::i32));
> -
> -        if (VT == MVT::v16i16 && Op.getOpcode() == ISD::SRA)
> -         return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
> -                       DAG.getConstant(Intrinsic::x86_avx2_psrai_w, MVT::i32),
> -                       R, DAG.getConstant(ShiftAmt, MVT::i32));
> +      if (Subtarget->hasAVX2() && VT == MVT::v32i8) {
> +        if (Op.getOpcode() == ISD::SHL) {
> +          // Make a large shift.
> +          SDValue SHL =
> +            DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
> +                        DAG.getConstant(Intrinsic::x86_avx2_pslli_w, MVT::i32),
> +                        R, DAG.getConstant(ShiftAmt, MVT::i32));
> +          // Zero out the rightmost bits.
> +          SmallVector<SDValue, 32> V(32, DAG.getConstant(uint8_t(-1U << ShiftAmt),
> +                                                         MVT::i8));
> +          return DAG.getNode(ISD::AND, dl, VT, SHL,
> +                             DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &V[0], 32));
>         }
> +        if (Op.getOpcode() == ISD::SRL) {
> +          // Make a large shift.
> +          SDValue SRL =
> +            DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
> +                        DAG.getConstant(Intrinsic::x86_avx2_psrli_w, MVT::i32),
> +                        R, DAG.getConstant(ShiftAmt, MVT::i32));
> +          // Zero out the leftmost bits.
> +          SmallVector<SDValue, 32> V(32, DAG.getConstant(uint8_t(-1U) >> ShiftAmt,
> +                                                         MVT::i8));
> +          return DAG.getNode(ISD::AND, dl, VT, SRL,
> +                             DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &V[0], 32));
> +        }
> +        if (Op.getOpcode() == ISD::SRA) {
> +          if (ShiftAmt == 7) {
> +            // R s>> 7  ===  R s< 0
> +            SDValue Zeros = getZeroVector(VT, true /* HasXMMInt */, DAG, dl);
> +            return DAG.getNode(X86ISD::PCMPGTB, dl, VT, Zeros, R);
> +          }
> +
> +          // R s>> a === ((R u>> a) ^ m) - m
> +          SDValue Res = DAG.getNode(ISD::SRL, dl, VT, R, Amt);
> +          SmallVector<SDValue, 32> V(32, DAG.getConstant(128 >> ShiftAmt,
> +                                                         MVT::i8));
> +          SDValue Mask = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &V[0], 32);
> +          Res = DAG.getNode(ISD::XOR, dl, VT, Res, Mask);
> +          Res = DAG.getNode(ISD::SUB, dl, VT, Res, Mask);
> +          return Res;
> +        }
> +      }
>     }
>   }
> 
> 
> Modified: llvm/trunk/lib/Target/X86/X86InstrSSE.td
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrSSE.td?rev=145005&r1=145004&r2=145005&view=diff
> ==============================================================================
> --- llvm/trunk/lib/Target/X86/X86InstrSSE.td (original)
> +++ llvm/trunk/lib/Target/X86/X86InstrSSE.td Sat Nov 19 18:12:05 2011
> @@ -311,17 +311,16 @@
> // JIT implementation, it does not expand the instructions below like
> // X86MCInstLower does.
> let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
> -    isCodeGenOnly = 1, ExeDomain = SSEPackedInt in
> +    isCodeGenOnly = 1, ExeDomain = SSEPackedInt in {
>   def V_SETALLONES : PDI<0x76, MRMInitReg, (outs VR128:$dst), (ins), "",
>                          [(set VR128:$dst, (v4i32 immAllOnesV))]>;
> -let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
> -    isCodeGenOnly = 1, ExeDomain = SSEPackedInt, Predicates = [HasAVX] in
> +  let Predicates = [HasAVX] in
>   def AVX_SETALLONES : PDI<0x76, MRMInitReg, (outs VR128:$dst), (ins), "",
>                          [(set VR128:$dst, (v4i32 immAllOnesV))]>, VEX_4V;
> -let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
> -    isCodeGenOnly = 1, ExeDomain = SSEPackedInt, Predicates = [HasAVX2] in
> +  let Predicates = [HasAVX2] in
>   def AVX2_SETALLONES : PDI<0x76, MRMInitReg, (outs VR256:$dst), (ins), "",
>                           [(set VR256:$dst, (v8i32 immAllOnesV))]>, VEX_4V;
> +}
> 
> 
> //===----------------------------------------------------------------------===//
> 
> Modified: llvm/trunk/test/CodeGen/X86/avx2-shift.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx2-shift.ll?rev=145005&r1=145004&r2=145005&view=diff
> ==============================================================================
> --- llvm/trunk/test/CodeGen/X86/avx2-shift.ll (original)
> +++ llvm/trunk/test/CodeGen/X86/avx2-shift.ll Sat Nov 19 18:12:05 2011
> @@ -58,14 +58,14 @@
> }
> 
> ; CHECK: variable_sra0
> -; CHECK: psravd
> +; CHECK: vpsravd
> ; CHECK: ret
> define <4 x i32> @variable_sra0(<4 x i32> %x, <4 x i32> %y) {
>   %k = ashr <4 x i32> %x, %y
>   ret <4 x i32> %k
> }
> ; CHECK: variable_sra1
> -; CHECK: psravd
> +; CHECK: vpsravd
> ; CHECK: ret
> define <8 x i32> @variable_sra1(<8 x i32> %x, <8 x i32> %y) {
>   %k = ashr <8 x i32> %x, %y
> @@ -127,7 +127,7 @@
> }
> 
> ; CHECK: variable_sra0_load
> -; CHECK: psravd (%
> +; CHECK: vpsravd (%
> ; CHECK: ret
> define <4 x i32> @variable_sra0_load(<4 x i32> %x, <4 x i32>* %y) {
>   %y1 = load <4 x i32>* %y
> @@ -136,7 +136,7 @@
> }
> 
> ; CHECK: variable_sra1_load
> -; CHECK: psravd (%
> +; CHECK: vpsravd (%
> ; CHECK: ret
> define <8 x i32> @variable_sra1_load(<8 x i32> %x, <8 x i32>* %y) {
>   %y1 = load <8 x i32>* %y
> @@ -145,7 +145,7 @@
> }
> 
> ; CHECK: variable_shl0_load
> -; CHECK: psllvd (%
> +; CHECK: vpsllvd (%
> ; CHECK: ret
> define <4 x i32> @variable_shl0_load(<4 x i32> %x, <4 x i32>* %y) {
>   %y1 = load <4 x i32>* %y
> @@ -153,7 +153,7 @@
>   ret <4 x i32> %k
> }
> ; CHECK: variable_shl1_load
> -; CHECK: psllvd (%
> +; CHECK: vpsllvd (%
> ; CHECK: ret
> define <8 x i32> @variable_shl1_load(<8 x i32> %x, <8 x i32>* %y) {
>   %y1 = load <8 x i32>* %y
> @@ -161,7 +161,7 @@
>   ret <8 x i32> %k
> }
> ; CHECK: variable_shl2_load
> -; CHECK: psllvq (%
> +; CHECK: vpsllvq (%
> ; CHECK: ret
> define <2 x i64> @variable_shl2_load(<2 x i64> %x, <2 x i64>* %y) {
>   %y1 = load <2 x i64>* %y
> @@ -169,7 +169,7 @@
>   ret <2 x i64> %k
> }
> ; CHECK: variable_shl3_load
> -; CHECK: psllvq (%
> +; CHECK: vpsllvq (%
> ; CHECK: ret
> define <4 x i64> @variable_shl3_load(<4 x i64> %x, <4 x i64>* %y) {
>   %y1 = load <4 x i64>* %y
> @@ -177,7 +177,7 @@
>   ret <4 x i64> %k
> }
> ; CHECK: variable_srl0_load
> -; CHECK: psrlvd (%
> +; CHECK: vpsrlvd (%
> ; CHECK: ret
> define <4 x i32> @variable_srl0_load(<4 x i32> %x, <4 x i32>* %y) {
>   %y1 = load <4 x i32>* %y
> @@ -185,7 +185,7 @@
>   ret <4 x i32> %k
> }
> ; CHECK: variable_srl1_load
> -; CHECK: psrlvd (%
> +; CHECK: vpsrlvd (%
> ; CHECK: ret
> define <8 x i32> @variable_srl1_load(<8 x i32> %x, <8 x i32>* %y) {
>   %y1 = load <8 x i32>* %y
> @@ -193,7 +193,7 @@
>   ret <8 x i32> %k
> }
> ; CHECK: variable_srl2_load
> -; CHECK: psrlvq (%
> +; CHECK: vpsrlvq (%
> ; CHECK: ret
> define <2 x i64> @variable_srl2_load(<2 x i64> %x, <2 x i64>* %y) {
>   %y1 = load <2 x i64>* %y
> @@ -201,10 +201,48 @@
>   ret <2 x i64> %k
> }
> ; CHECK: variable_srl3_load
> -; CHECK: psrlvq (%
> +; CHECK: vpsrlvq (%
> ; CHECK: ret
> define <4 x i64> @variable_srl3_load(<4 x i64> %x, <4 x i64>* %y) {
>   %y1 = load <4 x i64>* %y
>   %k = lshr <4 x i64> %x, %y1
>   ret <4 x i64> %k
> }
> +
> +define <32 x i8> @shl9(<32 x i8> %A) nounwind {
> +  %B = shl <32 x i8> %A, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
> +  ret <32 x i8> %B
> +; CHECK: shl9:
> +; CHECK: vpsllw $3
> +; CHECK: vpand
> +; CHECK: ret
> +}
> +
> +define <32 x i8> @shr9(<32 x i8> %A) nounwind {
> +  %B = lshr <32 x i8> %A, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
> +  ret <32 x i8> %B
> +; CHECK: shr9:
> +; CHECK: vpsrlw $3
> +; CHECK: vpand
> +; CHECK: ret
> +}
> +
> +define <32 x i8> @sra_v32i8_7(<32 x i8> %A) nounwind {
> +  %B = ashr <32 x i8> %A, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
> +  ret <32 x i8> %B
> +; CHECK: sra_v32i8_7:
> +; CHECK: vxorps
> +; CHECK: vpcmpgtb
> +; CHECK: ret
> +}
> +
> +define <32 x i8> @sra_v32i8(<32 x i8> %A) nounwind {
> +  %B = ashr <32 x i8> %A, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
> +  ret <32 x i8> %B
> +; CHECK: sra_v32i8:
> +; CHECK: vpsrlw $3
> +; CHECK: vpand
> +; CHECK: vpxor
> +; CHECK: vpsubb
> +; CHECK: ret
> +}
> 
> 
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits
> ---------------------------------------------------------------------
> Intel Israel (74) Limited
> 
> This e-mail and any attachments may contain confidential material for
> the sole use of the intended recipient(s). Any review or distribution
> by others is strictly prohibited. If you are not the intended
> recipient, please contact the sender and delete all copies.
>