[llvm] r281822 - AMDGPU: Push bitcasts through build_vector
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Sat Sep 17 08:44:16 PDT 2016
Author: arsenm
Date: Sat Sep 17 10:44:16 2016
New Revision: 281822
URL: http://llvm.org/viewvc/llvm-project?rev=281822&view=rev
Log:
AMDGPU: Push bitcasts through build_vector
This reduces the number of copies and reg_sequences
when using fp constant vectors. This significantly
reduces the code size in local-stack-alloc-bug.ll
Added:
llvm/trunk/test/CodeGen/AMDGPU/bitcast-vector-extract.ll
Modified:
llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.cpp?rev=281822&r1=281821&r2=281822&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.cpp Sat Sep 17 10:44:16 2016
@@ -2548,6 +2548,33 @@ SDValue AMDGPUTargetLowering::PerformDAG
break;
case ISD::BITCAST: {
EVT DestVT = N->getValueType(0);
+
+ // Push casts through vector builds. This helps avoid emitting a large
+ // number of copies when materializing floating point vector constants.
+ //
+ // vNt1 bitcast (vNt0 (build_vector t0:x, t0:y)) =>
+ // vnt1 = build_vector (t1 (bitcast t0:x)), (t1 (bitcast t0:y))
+ if (DestVT.isVector()) {
+ SDValue Src = N->getOperand(0);
+ if (Src.getOpcode() == ISD::BUILD_VECTOR) {
+ EVT SrcVT = Src.getValueType();
+ unsigned NElts = DestVT.getVectorNumElements();
+
+ if (SrcVT.getVectorNumElements() == NElts) {
+ EVT DestEltVT = DestVT.getVectorElementType();
+
+ SmallVector<SDValue, 8> CastedElts;
+ SDLoc SL(N);
+ for (unsigned I = 0, E = SrcVT.getVectorNumElements(); I != E; ++I) {
+ SDValue Elt = Src.getOperand(I);
+ CastedElts.push_back(DAG.getNode(ISD::BITCAST, DL, DestEltVT, Elt));
+ }
+
+ return DAG.getBuildVector(DestVT, SL, CastedElts);
+ }
+ }
+ }
+
if (DestVT.getSizeInBits() != 64 && !DestVT.isVector())
break;
Added: llvm/trunk/test/CodeGen/AMDGPU/bitcast-vector-extract.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/bitcast-vector-extract.ll?rev=281822&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/bitcast-vector-extract.ll (added)
+++ llvm/trunk/test/CodeGen/AMDGPU/bitcast-vector-extract.ll Sat Sep 17 10:44:16 2016
@@ -0,0 +1,69 @@
+; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
+; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
+
+; The bitcast should be pushed through the bitcasts so the vectors can
+; be broken down and the shared components can be CSEd
+
+; GCN-LABEL: {{^}}store_bitcast_constant_v8i32_to_v8f32:
+; GCN: buffer_store_dwordx4
+; GCN: buffer_store_dwordx4
+; GCN-NOT: v_mov_b32
+; GCN: buffer_store_dwordx4
+; GCN-NOT: v_mov_b32
+; GCN: buffer_store_dwordx4
+define void @store_bitcast_constant_v8i32_to_v8f32(<8 x float> addrspace(1)* %out, <8 x i32> %vec) {
+ %vec0.bc = bitcast <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8> to <8 x float>
+ store volatile <8 x float> %vec0.bc, <8 x float> addrspace(1)* %out
+
+ %vec1.bc = bitcast <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 9> to <8 x float>
+ store volatile <8 x float> %vec1.bc, <8 x float> addrspace(1)* %out
+ ret void
+}
+
+; GCN-LABEL: {{^}}store_bitcast_constant_v4i64_to_v8f32:
+; GCN: buffer_store_dwordx4
+; GCN: buffer_store_dwordx4
+; GCN-NOT: v_mov_b32
+; GCN: buffer_store_dwordx4
+; GCN-NOT: v_mov_b32
+; GCN: buffer_store_dwordx4
+define void @store_bitcast_constant_v4i64_to_v8f32(<8 x float> addrspace(1)* %out, <4 x i64> %vec) {
+ %vec0.bc = bitcast <4 x i64> <i64 7, i64 7, i64 7, i64 8> to <8 x float>
+ store volatile <8 x float> %vec0.bc, <8 x float> addrspace(1)* %out
+
+ %vec1.bc = bitcast <4 x i64> <i64 7, i64 7, i64 7, i64 9> to <8 x float>
+ store volatile <8 x float> %vec1.bc, <8 x float> addrspace(1)* %out
+ ret void
+}
+
+; GCN-LABEL: {{^}}store_bitcast_constant_v4i64_to_v4f64:
+; GCN: buffer_store_dwordx4
+; GCN: buffer_store_dwordx4
+; GCN-NOT: v_mov_b32
+; GCN: buffer_store_dwordx4
+; GCN-NOT: v_mov_b32
+; GCN: buffer_store_dwordx4
+define void @store_bitcast_constant_v4i64_to_v4f64(<4 x double> addrspace(1)* %out, <4 x i64> %vec) {
+ %vec0.bc = bitcast <4 x i64> <i64 7, i64 7, i64 7, i64 8> to <4 x double>
+ store volatile <4 x double> %vec0.bc, <4 x double> addrspace(1)* %out
+
+ %vec1.bc = bitcast <4 x i64> <i64 7, i64 7, i64 7, i64 9> to <4 x double>
+ store volatile <4 x double> %vec1.bc, <4 x double> addrspace(1)* %out
+ ret void
+}
+
+; GCN-LABEL: {{^}}store_bitcast_constant_v8i32_to_v16i16:
+; GCN: buffer_store_dwordx4
+; GCN: buffer_store_dwordx4
+; GCN-NOT: v_mov_b32
+; GCN: buffer_store_dwordx4
+; GCN-NOT: v_mov_b32
+; GCN: buffer_store_dwordx4
+define void @store_bitcast_constant_v8i32_to_v16i16(<8 x float> addrspace(1)* %out, <16 x i16> %vec) {
+ %vec0.bc = bitcast <16 x i16> <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 8> to <8 x float>
+ store volatile <8 x float> %vec0.bc, <8 x float> addrspace(1)* %out
+
+ %vec1.bc = bitcast <16 x i16> <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 9> to <8 x float>
+ store volatile <8 x float> %vec1.bc, <8 x float> addrspace(1)* %out
+ ret void
+}
More information about the llvm-commits
mailing list