[PATCH] D39040: AMDGPU: Fix creating invalid copy when adjusting dmask
Matt Arsenault via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Tue Oct 17 22:44:10 PDT 2017
arsenm created this revision.
Herald added subscribers: t-tye, tpr, dstuttard, yaxunl, nhaehnle, wdng, kzhuravl.
I'm making a guess as to how dmask works. I'm not sure if
the subregister index should always be sub0 or not.
https://reviews.llvm.org/D39040
Files:
lib/Target/AMDGPU/SIISelLowering.cpp
test/CodeGen/AMDGPU/adjust-writemask-invalid-copy.ll
Index: test/CodeGen/AMDGPU/adjust-writemask-invalid-copy.ll
===================================================================
--- /dev/null
+++ test/CodeGen/AMDGPU/adjust-writemask-invalid-copy.ll
@@ -0,0 +1,38 @@
+; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
+
+; GCN-LABEL: {{^}}adjust_writemask_crash_0:
+; GCN: image_get_lod v[0:1], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}} dmask:0x2
+; GCN-NOT: v1
+; GCN-NOT: v0
+; GCN: buffer_store_dword v1
+define amdgpu_ps void @adjust_writemask_crash_0() #0 {
+main_body:
+ %tmp = call <2 x float> @llvm.amdgcn.image.getlod.v2f32.v2f32.v8i32(<2 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 3, i1 false, i1 false, i1 false, i1 false, i1 false)
+ %tmp1 = bitcast <2 x float> %tmp to <2 x i32>
+ %tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
+ %tmp3 = bitcast <4 x i32> %tmp2 to <4 x float>
+ %tmp4 = extractelement <4 x float> %tmp3, i32 0
+ store volatile float %tmp4, float addrspace(1)* undef
+ ret void
+}
+
+; GCN-LABEL: {{^}}adjust_writemask_crash_1:
+; GCN: image_get_lod v[0:1], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}} dmask:0x1
+; GCN-NOT: v1
+; GCN-NOT: v0
+; GCN: buffer_store_dword v0
+define amdgpu_ps void @adjust_writemask_crash_1() #0 {
+main_body:
+ %tmp = call <2 x float> @llvm.amdgcn.image.getlod.v2f32.v2f32.v8i32(<2 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 3, i1 false, i1 false, i1 false, i1 false, i1 false)
+ %tmp1 = bitcast <2 x float> %tmp to <2 x i32>
+ %tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> undef, <4 x i32> <i32 1, i32 0, i32 undef, i32 undef>
+ %tmp3 = bitcast <4 x i32> %tmp2 to <4 x float>
+ %tmp4 = extractelement <4 x float> %tmp3, i32 1
+ store volatile float %tmp4, float addrspace(1)* undef
+ ret void
+}
+
+declare <2 x float> @llvm.amdgcn.image.getlod.v2f32.v2f32.v8i32(<2 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1
+
+attributes #0 = { nounwind }
+attributes #1 = { nounwind readonly }
Index: lib/Target/AMDGPU/SIISelLowering.cpp
===================================================================
--- lib/Target/AMDGPU/SIISelLowering.cpp
+++ lib/Target/AMDGPU/SIISelLowering.cpp
@@ -6428,13 +6428,14 @@
// If we only got one lane, replace it with a copy
// (if NewDmask has only one bit set...)
- if (NewDmask && (NewDmask & (NewDmask-1)) == 0) {
- SDValue RC = DAG.getTargetConstant(AMDGPU::VGPR_32RegClassID, SDLoc(),
- MVT::i32);
- SDNode *Copy = DAG.getMachineNode(TargetOpcode::COPY_TO_REGCLASS,
- SDLoc(), Users[Lane]->getValueType(0),
- SDValue(Node, 0), RC);
- DAG.ReplaceAllUsesWith(Users[Lane], Copy);
+ if (NewDmask && (NewDmask & (NewDmask - 1)) == 0) {
+ const SIRegisterInfo *TRI = getSubtarget()->getRegisterInfo();
+ unsigned SubReg = TRI->getSubRegFromChannel(countTrailingZeros(NewDmask));
+ //SDValue Copy = DAG.getTargetExtractSubreg(AMDGPU::sub0, SDLoc(Node),
+ SDValue Copy = DAG.getTargetExtractSubreg(SubReg, SDLoc(Node),
+ Users[Lane]->getValueType(0),
+ SDValue(Node, 0));
+ DAG.ReplaceAllUsesWith(Users[Lane], Copy.getNode());
return;
}
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D39040.119431.patch
Type: text/x-patch
Size: 3422 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20171018/2c37e964/attachment.bin>
More information about the llvm-commits
mailing list