[llvm] r262397 - DAGCombiner: Turn truncate of a bitcasted vector to an extract

Thu Mar 3 00:27:29 PST 2016

Hi Matt,

What about Big Endian targets? Shouldn't we extract the highest vector 
element instead of element 0 then?

Regards,
Mikael

On 03/01/2016 10:31 PM, Matt Arsenault via llvm-commits wrote:
> Author: arsenm
> Date: Tue Mar  1 15:31:53 2016
> New Revision: 262397
>
> URL: http://llvm.org/viewvc/llvm-project?rev=262397&view=rev
> Log:
> DAGCombiner: Turn truncate of a bitcasted vector to an extract
>
> On AMDGPU where operations i64 operations are often bitcasted to v2i32
> and back, this pattern shows up regularly where it breaks some
> expected combines on i64, such as load width reducing.
>
> This fixes some test failures in a future commit when i64 loads
> are changed to promote.
>
> Added:
>      llvm/trunk/test/CodeGen/AMDGPU/trunc-bitcast-vector.ll
> Modified:
>      llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
>      llvm/trunk/test/CodeGen/AMDGPU/half.ll
>
> Modified: llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp?rev=262397&r1=262396&r2=262397&view=diff
> ==============================================================================
> --- llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp (original)
> +++ llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp Tue Mar  1 15:31:53 2016
> @@ -7176,6 +7176,22 @@ SDValue DAGCombiner::visitTRUNCATE(SDNod
>       }
>     }
>
> +  // Fold truncate of a bitcast of a vector to an extract of the low vector
> +  // element.
> +  //
> +  // e.g. trunc (i64 (bitcast v2i32:x)) -> extract_vector_elt v2i32:x, 0
> +  if (N0.getOpcode() == ISD::BITCAST && !VT.isVector()) {
> +    SDValue VecSrc = N0.getOperand(0);
> +    EVT SrcVT = VecSrc.getValueType();
> +    if (SrcVT.isVector() && SrcVT.getScalarType() == VT) {
> +      SDLoc SL(N);
> +
> +      EVT IdxVT = TLI.getVectorIdxTy(DAG.getDataLayout());
> +      return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, VT,
> +                         VecSrc, DAG.getConstant(0, SL, IdxVT));
> +    }
> +  }
> +
>     // Simplify the operands using demanded-bits information.
>     if (!VT.isVector() &&
>         SimplifyDemandedBits(SDValue(N, 0)))
>
> Modified: llvm/trunk/test/CodeGen/AMDGPU/half.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/half.ll?rev=262397&r1=262396&r2=262397&view=diff
> ==============================================================================
> --- llvm/trunk/test/CodeGen/AMDGPU/half.ll (original)
> +++ llvm/trunk/test/CodeGen/AMDGPU/half.ll Tue Mar  1 15:31:53 2016
> @@ -396,12 +396,11 @@ define void @global_extload_v2f16_to_v2f
>   ; GCN-LABEL: {{^}}global_extload_v3f16_to_v3f64:
>
>   ; GCN: buffer_load_dwordx2 [[LOAD:v\[[0-9]+:[0-9]+\]]]
> -; GCN: v_lshrrev_b32_e32 {{v[0-9]+}}, 16, {{v[0-9]+}}
> -
>   ; GCN: v_cvt_f32_f16_e32
>   ; GCN: v_cvt_f32_f16_e32
> +; GCN-DAG: v_lshrrev_b32_e32 {{v[0-9]+}}, 16, {{v[0-9]+}}
>   ; GCN: v_cvt_f32_f16_e32
> -; GCN-NOT: v_cvt_f32_f16_e32
> +; GCN-NOT: v_cvt_f32_f16
>
>   ; GCN: v_cvt_f64_f32_e32
>   ; GCN: v_cvt_f64_f32_e32
>
> Added: llvm/trunk/test/CodeGen/AMDGPU/trunc-bitcast-vector.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/trunc-bitcast-vector.ll?rev=262397&view=auto
> ==============================================================================
> --- llvm/trunk/test/CodeGen/AMDGPU/trunc-bitcast-vector.ll (added)
> +++ llvm/trunk/test/CodeGen/AMDGPU/trunc-bitcast-vector.ll Tue Mar  1 15:31:53 2016
> @@ -0,0 +1,93 @@
> +; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck %s
> +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck %s
> +
> +; CHECK-LABEL: {{^}}trunc_i64_bitcast_v2i32:
> +; CHECK: buffer_load_dword v
> +; CHECK: buffer_store_dword v
> +define void @trunc_i64_bitcast_v2i32(i32 addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
> +  %ld = load <2 x i32>, <2 x i32> addrspace(1)* %in
> +  %bc = bitcast <2 x i32> %ld to i64
> +  %trunc = trunc i64 %bc to i32
> +  store i32 %trunc, i32 addrspace(1)* %out
> +  ret void
> +}
> +
> +; CHECK-LABEL: {{^}}trunc_i96_bitcast_v3i32:
> +; CHECK: buffer_load_dword v
> +; CHECK: buffer_store_dword v
> +define void @trunc_i96_bitcast_v3i32(i32 addrspace(1)* %out, <3 x i32> addrspace(1)* %in) {
> +  %ld = load <3 x i32>, <3 x i32> addrspace(1)* %in
> +  %bc = bitcast <3 x i32> %ld to i96
> +  %trunc = trunc i96 %bc to i32
> +  store i32 %trunc, i32 addrspace(1)* %out
> +  ret void
> +}
> +
> +; CHECK-LABEL: {{^}}trunc_i128_bitcast_v4i32:
> +; CHECK: buffer_load_dword v
> +; CHECK: buffer_store_dword v
> +define void @trunc_i128_bitcast_v4i32(i32 addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
> +  %ld = load <4 x i32>, <4 x i32> addrspace(1)* %in
> +  %bc = bitcast <4 x i32> %ld to i128
> +  %trunc = trunc i128 %bc to i32
> +  store i32 %trunc, i32 addrspace(1)* %out
> +  ret void
> +}
> +
> +; Don't want load width reduced in this case.
> +; CHECK-LABEL: {{^}}trunc_i16_bitcast_v2i16:
> +; CHECK: buffer_load_dword [[VAL:v[0-9]+]]
> +; CHECK: buffer_store_short [[VAL]]
> +define void @trunc_i16_bitcast_v2i16(i16 addrspace(1)* %out, <2 x i16> addrspace(1)* %in) {
> +  %ld = load <2 x i16>, <2 x i16> addrspace(1)* %in
> +  %bc = bitcast <2 x i16> %ld to i32
> +  %trunc = trunc i32 %bc to i16
> +  store i16 %trunc, i16 addrspace(1)* %out
> +  ret void
> +}
> +
> +; FIXME: Don't want load width reduced here.
> +; CHECK-LABEL: {{^}}trunc_i16_bitcast_v4i16:
> +; CHECK: buffer_load_ushort [[VAL:v[0-9]+]]
> +; CHECK: buffer_store_short [[VAL]]
> +define void @trunc_i16_bitcast_v4i16(i16 addrspace(1)* %out, <4 x i16> addrspace(1)* %in) {
> +  %ld = load <4 x i16>, <4 x i16> addrspace(1)* %in
> +  %bc = bitcast <4 x i16> %ld to i64
> +  %trunc = trunc i64 %bc to i16
> +  store i16 %trunc, i16 addrspace(1)* %out
> +  ret void
> +}
> +
> +; FIXME: Don't want load width reduced in this case.
> +; CHECK-LABEL: {{^}}trunc_i8_bitcast_v2i8:
> +; CHECK: buffer_load_ubyte [[VAL:v[0-9]+]]
> +; CHECK: buffer_store_byte [[VAL]]
> +define void @trunc_i8_bitcast_v2i8(i8 addrspace(1)* %out, <2 x i8> addrspace(1)* %in) {
> +  %ld = load <2 x i8>, <2 x i8> addrspace(1)* %in
> +  %bc = bitcast <2 x i8> %ld to i16
> +  %trunc = trunc i16 %bc to i8
> +  store i8 %trunc, i8 addrspace(1)* %out
> +  ret void
> +}
> +
> +; CHECK-LABEL: {{^}}trunc_i32_bitcast_v4i8:
> +; CHECK: buffer_load_dword [[VAL:v[0-9]+]]
> +; CHECK: buffer_store_byte [[VAL]]
> +define void @trunc_i32_bitcast_v4i8(i8 addrspace(1)* %out, <4 x i8> addrspace(1)* %in) {
> +  %ld = load <4 x i8>, <4 x i8> addrspace(1)* %in
> +  %bc = bitcast <4 x i8> %ld to i32
> +  %trunc = trunc i32 %bc to i8
> +  store i8 %trunc, i8 addrspace(1)* %out
> +  ret void
> +}
> +
> +; CHECK-LABEL: {{^}}trunc_i24_bitcast_v3i8:
> +; CHECK: buffer_load_dword [[VAL:v[0-9]+]]
> +; CHECK: buffer_store_byte [[VAL]]
> +define void @trunc_i24_bitcast_v3i8(i8 addrspace(1)* %out, <3 x i8> addrspace(1)* %in) {
> +  %ld = load <3 x i8>, <3 x i8> addrspace(1)* %in
> +  %bc = bitcast <3 x i8> %ld to i24
> +  %trunc = trunc i24 %bc to i8
> +  store i8 %trunc, i8 addrspace(1)* %out
> +  ret void
> +}
>
>
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at lists.llvm.org
> http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits
>