[PATCH] D60457: [CodeGen] Fixed de-optimization of legalize subvector extract
Tim Renouf via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Wed Apr 17 03:12:06 PDT 2019
tpr updated this revision to Diff 195528.
tpr added a comment.
V3: Further reduced test case.
Repository:
rL LLVM
CHANGES SINCE LAST ACTION
https://reviews.llvm.org/D60457/new/
https://reviews.llvm.org/D60457
Files:
lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
test/CodeGen/AMDGPU/extract_subvector_vec4_vec3.ll
Index: test/CodeGen/AMDGPU/extract_subvector_vec4_vec3.ll
===================================================================
--- /dev/null
+++ test/CodeGen/AMDGPU/extract_subvector_vec4_vec3.ll
@@ -0,0 +1,19 @@
+; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 <%s -stop-after=amdgpu-isel | FileCheck -check-prefix=GCN %s
+
+; GCN-LABEL: body:
+; GCN-NOT: BUFFER_STORE{{.*}}store{{.*}}into{{.*}}stack
+; GCN: S_ENDPGM
+
+define amdgpu_hs void @main([0 x i8] addrspace(6)* inreg %arg) {
+main_body:
+ %tmp25 = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> undef, i32 undef, i32 0, i32 0)
+ %tmp27 = bitcast <4 x float> %tmp25 to <16 x i8>
+ %tmp28 = shufflevector <16 x i8> %tmp27, <16 x i8> undef, <12 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
+ %tmp29 = bitcast <12 x i8> %tmp28 to <3 x i32>
+ call void @llvm.amdgcn.raw.buffer.store.v3i32(<3 x i32> %tmp29, <4 x i32> undef, i32 undef, i32 0, i32 0) #3
+ ret void
+}
+
+declare void @llvm.amdgcn.raw.buffer.store.v3i32(<3 x i32>, <4 x i32>, i32, i32, i32 immarg)
+declare <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32>, i32, i32, i32 immarg)
+
Index: lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
===================================================================
--- lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -4063,6 +4063,24 @@
}
}
+ // Handle a case like bitcast v12i8 -> v3i32. Normally that would get widened
+ // to v16i8 -> v4i32, but for a target where v3i32 is legal but v12i8 is not,
+ // we end up here. Handling the case here with EXTRACT_SUBVECTOR avoids
+ // having to copy via memory.
+ if (VT.isVector()) {
+ EVT EltVT = VT.getVectorElementType();
+ unsigned EltSize = EltVT.getSizeInBits();
+ if (InWidenSize % EltSize == 0) {
+ unsigned NewNumElts = InWidenSize / EltSize;
+ EVT NewVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NewNumElts);
+ if (TLI.isTypeLegal(NewVT)) {
+ SDValue BitOp = DAG.getNode(ISD::BITCAST, dl, NewVT, InOp);
+ return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, BitOp,
+ DAG.getConstant(0, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
+ }
+ }
+ }
+
return CreateStackStoreLoad(InOp, VT);
}
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D60457.195528.patch
Type: text/x-patch
Size: 2323 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20190417/57d2e3a2/attachment.bin>
More information about the llvm-commits
mailing list