[llvm] r262397 - DAGCombiner: Turn truncate of a bitcasted vector to an extract
Mikael Holmén via llvm-commits
llvm-commits at lists.llvm.org
Thu Mar 3 00:27:29 PST 2016
Hi Matt,
What about Big Endian targets? Shouldn't we extract the highest vector
element instead of element 0 then?
Regards,
Mikael
On 03/01/2016 10:31 PM, Matt Arsenault via llvm-commits wrote:
> Author: arsenm
> Date: Tue Mar 1 15:31:53 2016
> New Revision: 262397
>
> URL: http://llvm.org/viewvc/llvm-project?rev=262397&view=rev
> Log:
> DAGCombiner: Turn truncate of a bitcasted vector to an extract
>
> On AMDGPU where operations i64 operations are often bitcasted to v2i32
> and back, this pattern shows up regularly where it breaks some
> expected combines on i64, such as load width reducing.
>
> This fixes some test failures in a future commit when i64 loads
> are changed to promote.
>
> Added:
> llvm/trunk/test/CodeGen/AMDGPU/trunc-bitcast-vector.ll
> Modified:
> llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
> llvm/trunk/test/CodeGen/AMDGPU/half.ll
>
> Modified: llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp?rev=262397&r1=262396&r2=262397&view=diff
> ==============================================================================
> --- llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp (original)
> +++ llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp Tue Mar 1 15:31:53 2016
> @@ -7176,6 +7176,22 @@ SDValue DAGCombiner::visitTRUNCATE(SDNod
> }
> }
>
> + // Fold truncate of a bitcast of a vector to an extract of the low vector
> + // element.
> + //
> + // e.g. trunc (i64 (bitcast v2i32:x)) -> extract_vector_elt v2i32:x, 0
> + if (N0.getOpcode() == ISD::BITCAST && !VT.isVector()) {
> + SDValue VecSrc = N0.getOperand(0);
> + EVT SrcVT = VecSrc.getValueType();
> + if (SrcVT.isVector() && SrcVT.getScalarType() == VT) {
> + SDLoc SL(N);
> +
> + EVT IdxVT = TLI.getVectorIdxTy(DAG.getDataLayout());
> + return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, VT,
> + VecSrc, DAG.getConstant(0, SL, IdxVT));
> + }
> + }
> +
> // Simplify the operands using demanded-bits information.
> if (!VT.isVector() &&
> SimplifyDemandedBits(SDValue(N, 0)))
>
> Modified: llvm/trunk/test/CodeGen/AMDGPU/half.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/half.ll?rev=262397&r1=262396&r2=262397&view=diff
> ==============================================================================
> --- llvm/trunk/test/CodeGen/AMDGPU/half.ll (original)
> +++ llvm/trunk/test/CodeGen/AMDGPU/half.ll Tue Mar 1 15:31:53 2016
> @@ -396,12 +396,11 @@ define void @global_extload_v2f16_to_v2f
> ; GCN-LABEL: {{^}}global_extload_v3f16_to_v3f64:
>
> ; GCN: buffer_load_dwordx2 [[LOAD:v\[[0-9]+:[0-9]+\]]]
> -; GCN: v_lshrrev_b32_e32 {{v[0-9]+}}, 16, {{v[0-9]+}}
> -
> ; GCN: v_cvt_f32_f16_e32
> ; GCN: v_cvt_f32_f16_e32
> +; GCN-DAG: v_lshrrev_b32_e32 {{v[0-9]+}}, 16, {{v[0-9]+}}
> ; GCN: v_cvt_f32_f16_e32
> -; GCN-NOT: v_cvt_f32_f16_e32
> +; GCN-NOT: v_cvt_f32_f16
>
> ; GCN: v_cvt_f64_f32_e32
> ; GCN: v_cvt_f64_f32_e32
>
> Added: llvm/trunk/test/CodeGen/AMDGPU/trunc-bitcast-vector.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/trunc-bitcast-vector.ll?rev=262397&view=auto
> ==============================================================================
> --- llvm/trunk/test/CodeGen/AMDGPU/trunc-bitcast-vector.ll (added)
> +++ llvm/trunk/test/CodeGen/AMDGPU/trunc-bitcast-vector.ll Tue Mar 1 15:31:53 2016
> @@ -0,0 +1,93 @@
> +; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck %s
> +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck %s
> +
> +; CHECK-LABEL: {{^}}trunc_i64_bitcast_v2i32:
> +; CHECK: buffer_load_dword v
> +; CHECK: buffer_store_dword v
> +define void @trunc_i64_bitcast_v2i32(i32 addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
> + %ld = load <2 x i32>, <2 x i32> addrspace(1)* %in
> + %bc = bitcast <2 x i32> %ld to i64
> + %trunc = trunc i64 %bc to i32
> + store i32 %trunc, i32 addrspace(1)* %out
> + ret void
> +}
> +
> +; CHECK-LABEL: {{^}}trunc_i96_bitcast_v3i32:
> +; CHECK: buffer_load_dword v
> +; CHECK: buffer_store_dword v
> +define void @trunc_i96_bitcast_v3i32(i32 addrspace(1)* %out, <3 x i32> addrspace(1)* %in) {
> + %ld = load <3 x i32>, <3 x i32> addrspace(1)* %in
> + %bc = bitcast <3 x i32> %ld to i96
> + %trunc = trunc i96 %bc to i32
> + store i32 %trunc, i32 addrspace(1)* %out
> + ret void
> +}
> +
> +; CHECK-LABEL: {{^}}trunc_i128_bitcast_v4i32:
> +; CHECK: buffer_load_dword v
> +; CHECK: buffer_store_dword v
> +define void @trunc_i128_bitcast_v4i32(i32 addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
> + %ld = load <4 x i32>, <4 x i32> addrspace(1)* %in
> + %bc = bitcast <4 x i32> %ld to i128
> + %trunc = trunc i128 %bc to i32
> + store i32 %trunc, i32 addrspace(1)* %out
> + ret void
> +}
> +
> +; Don't want load width reduced in this case.
> +; CHECK-LABEL: {{^}}trunc_i16_bitcast_v2i16:
> +; CHECK: buffer_load_dword [[VAL:v[0-9]+]]
> +; CHECK: buffer_store_short [[VAL]]
> +define void @trunc_i16_bitcast_v2i16(i16 addrspace(1)* %out, <2 x i16> addrspace(1)* %in) {
> + %ld = load <2 x i16>, <2 x i16> addrspace(1)* %in
> + %bc = bitcast <2 x i16> %ld to i32
> + %trunc = trunc i32 %bc to i16
> + store i16 %trunc, i16 addrspace(1)* %out
> + ret void
> +}
> +
> +; FIXME: Don't want load width reduced here.
> +; CHECK-LABEL: {{^}}trunc_i16_bitcast_v4i16:
> +; CHECK: buffer_load_ushort [[VAL:v[0-9]+]]
> +; CHECK: buffer_store_short [[VAL]]
> +define void @trunc_i16_bitcast_v4i16(i16 addrspace(1)* %out, <4 x i16> addrspace(1)* %in) {
> + %ld = load <4 x i16>, <4 x i16> addrspace(1)* %in
> + %bc = bitcast <4 x i16> %ld to i64
> + %trunc = trunc i64 %bc to i16
> + store i16 %trunc, i16 addrspace(1)* %out
> + ret void
> +}
> +
> +; FIXME: Don't want load width reduced in this case.
> +; CHECK-LABEL: {{^}}trunc_i8_bitcast_v2i8:
> +; CHECK: buffer_load_ubyte [[VAL:v[0-9]+]]
> +; CHECK: buffer_store_byte [[VAL]]
> +define void @trunc_i8_bitcast_v2i8(i8 addrspace(1)* %out, <2 x i8> addrspace(1)* %in) {
> + %ld = load <2 x i8>, <2 x i8> addrspace(1)* %in
> + %bc = bitcast <2 x i8> %ld to i16
> + %trunc = trunc i16 %bc to i8
> + store i8 %trunc, i8 addrspace(1)* %out
> + ret void
> +}
> +
> +; CHECK-LABEL: {{^}}trunc_i32_bitcast_v4i8:
> +; CHECK: buffer_load_dword [[VAL:v[0-9]+]]
> +; CHECK: buffer_store_byte [[VAL]]
> +define void @trunc_i32_bitcast_v4i8(i8 addrspace(1)* %out, <4 x i8> addrspace(1)* %in) {
> + %ld = load <4 x i8>, <4 x i8> addrspace(1)* %in
> + %bc = bitcast <4 x i8> %ld to i32
> + %trunc = trunc i32 %bc to i8
> + store i8 %trunc, i8 addrspace(1)* %out
> + ret void
> +}
> +
> +; CHECK-LABEL: {{^}}trunc_i24_bitcast_v3i8:
> +; CHECK: buffer_load_dword [[VAL:v[0-9]+]]
> +; CHECK: buffer_store_byte [[VAL]]
> +define void @trunc_i24_bitcast_v3i8(i8 addrspace(1)* %out, <3 x i8> addrspace(1)* %in) {
> + %ld = load <3 x i8>, <3 x i8> addrspace(1)* %in
> + %bc = bitcast <3 x i8> %ld to i24
> + %trunc = trunc i24 %bc to i8
> + store i8 %trunc, i8 addrspace(1)* %out
> + ret void
> +}
>
>
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at lists.llvm.org
> http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits
>
More information about the llvm-commits
mailing list