[llvm] r245613 - [X86] Look for scalar through one bitcast when lowering to VBROADCAST.

Simon Pilgrim via llvm-commits llvm-commits at lists.llvm.org
Thu Aug 20 14:11:29 PDT 2015


Thanks Ahmed (and for the broadcast patches too!).  Simon

> On 20 Aug 2015, at 22:02, Ahmed Bougacha via llvm-commits <llvm-commits at lists.llvm.org> wrote:
> 
> Author: ab
> Date: Thu Aug 20 16:02:39 2015
> New Revision: 245613
> 
> URL: http://llvm.org/viewvc/llvm-project?rev=245613&view=rev
> Log:
> [X86] Look for scalar through one bitcast when lowering to VBROADCAST.
> 
> Fixes PR23464: one way to use the broadcast intrinsics is:
> 
>  _mm256_broadcastw_epi16(_mm_cvtsi32_si128(*(int*)src));
> 
> We don't currently fold this, but now that we use native IR for
> the intrinsics (r245605), we can look through one bitcast to find
> the broadcast scalar.
> 
> Differential Revision: http://reviews.llvm.org/D10557
> 
> Modified:
>    llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
>    llvm/trunk/lib/Target/X86/X86InstrSSE.td
>    llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v16.ll
>    llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v8.ll
>    llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v16.ll
>    llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v32.ll
> 
> Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=245613&r1=245612&r2=245613&view=diff
> ==============================================================================
> --- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
> +++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Thu Aug 20 16:02:39 2015
> @@ -7553,6 +7553,23 @@ static SDValue lowerVectorShuffleAsBroad
> 
>   // Check if this is a broadcast of a scalar. We special case lowering
>   // for scalars so that we can more effectively fold with loads.
> +  // First, look through bitcast: if the original value has a larger element
> +  // type than the shuffle, the broadcast element is in essence truncated.
> +  // Make that explicit to ease folding.
> +  if (V.getOpcode() == ISD::BITCAST && VT.isInteger()) {
> +    EVT EltVT = VT.getVectorElementType();
> +    SDValue V0 = V.getOperand(0);
> +    EVT V0VT = V0.getValueType();
> +
> +    if (V0VT.isInteger() && V0VT.getVectorElementType().bitsGT(EltVT) &&
> +        ((V0.getOpcode() == ISD::BUILD_VECTOR ||
> +         (V0.getOpcode() == ISD::SCALAR_TO_VECTOR && BroadcastIdx == 0)))) {
> +      V = DAG.getNode(ISD::TRUNCATE, DL, EltVT, V0.getOperand(BroadcastIdx));
> +      BroadcastIdx = 0;
> +    }
> +  }
> +
> +  // Also check the simpler case, where we can directly reuse the scalar.
>   if (V.getOpcode() == ISD::BUILD_VECTOR ||
>       (V.getOpcode() == ISD::SCALAR_TO_VECTOR && BroadcastIdx == 0)) {
>     V = V.getOperand(BroadcastIdx);
> 
> Modified: llvm/trunk/lib/Target/X86/X86InstrSSE.td
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrSSE.td?rev=245613&r1=245612&r2=245613&view=diff
> ==============================================================================
> --- llvm/trunk/lib/Target/X86/X86InstrSSE.td (original)
> +++ llvm/trunk/lib/Target/X86/X86InstrSSE.td Thu Aug 20 16:02:39 2015
> @@ -8338,6 +8338,13 @@ defm VPBROADCASTD  : avx2_broadcast<0x58
> defm VPBROADCASTQ  : avx2_broadcast<0x59, "vpbroadcastq", i64mem, loadi64, v2i64, v4i64>;
> 
> let Predicates = [HasAVX2] in {
> +  // loadi16 is tricky to fold, because !isTypeDesirableForOp, justifiably.
> +  // This means we'll encounter truncated i32 loads; match that here.
> +  def : Pat<(v8i16 (X86VBroadcast (i16 (trunc (i32 (load addr:$src)))))),
> +            (VPBROADCASTWrm addr:$src)>;
> +  def : Pat<(v16i16 (X86VBroadcast (i16 (trunc (i32 (load addr:$src)))))),
> +            (VPBROADCASTWYrm addr:$src)>;
> +
>   // Provide aliases for broadcast from the same register class that
>   // automatically does the extract.
>   def : Pat<(v32i8 (X86VBroadcast (v32i8 VR256:$src))),
> 
> Modified: llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v16.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v16.ll?rev=245613&r1=245612&r2=245613&view=diff
> ==============================================================================
> --- llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v16.ll (original)
> +++ llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v16.ll Thu Aug 20 16:02:39 2015
> @@ -1404,8 +1404,7 @@ define <16 x i8> @insert_dup_mem_v16i8_i
> ;
> ; AVX2-LABEL: insert_dup_mem_v16i8_i32:
> ; AVX2:       # BB#0:
> -; AVX2-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
> -; AVX2-NEXT:    vpbroadcastb %xmm0, %xmm0
> +; AVX2-NEXT:    vpbroadcastb (%rdi), %xmm0
> ; AVX2-NEXT:    retq
>   %tmp = load i32, i32* %ptr, align 4
>   %tmp1 = insertelement <4 x i32> zeroinitializer, i32 %tmp, i32 0
> @@ -1451,9 +1450,7 @@ define <16 x i8> @insert_dup_mem_v16i8_s
> ;
> ; AVX2-LABEL: insert_dup_mem_v16i8_sext_i8:
> ; AVX2:       # BB#0:
> -; AVX2-NEXT:    movsbl (%rdi), %eax
> -; AVX2-NEXT:    vmovd %eax, %xmm0
> -; AVX2-NEXT:    vpbroadcastb %xmm0, %xmm0
> +; AVX2-NEXT:    vpbroadcastb (%rdi), %xmm0
> ; AVX2-NEXT:    retq
>   %tmp = load i8, i8* %ptr, align 1
>   %tmp1 = sext i8 %tmp to i32
> 
> Modified: llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v8.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v8.ll?rev=245613&r1=245612&r2=245613&view=diff
> ==============================================================================
> --- llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v8.ll (original)
> +++ llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v8.ll Thu Aug 20 16:02:39 2015
> @@ -2175,8 +2175,7 @@ define <8 x i16> @insert_dup_mem_v8i16_i
> ;
> ; AVX2-LABEL: insert_dup_mem_v8i16_i32:
> ; AVX2:       # BB#0:
> -; AVX2-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
> -; AVX2-NEXT:    vpbroadcastw %xmm0, %xmm0
> +; AVX2-NEXT:    vpbroadcastw (%rdi), %xmm0
> ; AVX2-NEXT:    retq
>   %tmp = load i32, i32* %ptr, align 4
>   %tmp1 = insertelement <4 x i32> zeroinitializer, i32 %tmp, i32 0
> 
> Modified: llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v16.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v16.ll?rev=245613&r1=245612&r2=245613&view=diff
> ==============================================================================
> --- llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v16.ll (original)
> +++ llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v16.ll Thu Aug 20 16:02:39 2015
> @@ -3301,8 +3301,7 @@ define <16 x i16> @insert_dup_mem_v16i16
> ;
> ; AVX2-LABEL: insert_dup_mem_v16i16_i32:
> ; AVX2:       # BB#0:
> -; AVX2-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
> -; AVX2-NEXT:    vpbroadcastw %xmm0, %ymm0
> +; AVX2-NEXT:    vpbroadcastw (%rdi), %ymm0
> ; AVX2-NEXT:    retq
>   %tmp = load i32, i32* %ptr, align 4
>   %tmp1 = insertelement <4 x i32> zeroinitializer, i32 %tmp, i32 0
> 
> Modified: llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v32.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v32.ll?rev=245613&r1=245612&r2=245613&view=diff
> ==============================================================================
> --- llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v32.ll (original)
> +++ llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v32.ll Thu Aug 20 16:02:39 2015
> @@ -1986,8 +1986,7 @@ define <32 x i8> @insert_dup_mem_v32i8_i
> ;
> ; AVX2-LABEL: insert_dup_mem_v32i8_i32:
> ; AVX2:       # BB#0:
> -; AVX2-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
> -; AVX2-NEXT:    vpbroadcastb %xmm0, %ymm0
> +; AVX2-NEXT:    vpbroadcastb (%rdi), %ymm0
> ; AVX2-NEXT:    retq
>   %tmp = load i32, i32* %ptr, align 4
>   %tmp1 = insertelement <4 x i32> zeroinitializer, i32 %tmp, i32 0
> @@ -2008,9 +2007,7 @@ define <32 x i8> @insert_dup_mem_v32i8_s
> ;
> ; AVX2-LABEL: insert_dup_mem_v32i8_sext_i8:
> ; AVX2:       # BB#0:
> -; AVX2-NEXT:    movsbl (%rdi), %eax
> -; AVX2-NEXT:    vmovd %eax, %xmm0
> -; AVX2-NEXT:    vpbroadcastb %xmm0, %ymm0
> +; AVX2-NEXT:    vpbroadcastb (%rdi), %ymm0
> ; AVX2-NEXT:    retq
>   %tmp = load i8, i8* %ptr, align 1
>   %tmp1 = sext i8 %tmp to i32
> 
> 
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at lists.llvm.org
> http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits




More information about the llvm-commits mailing list