[PATCH] D104241: AMDGPU: Fix assert on m0_lo16/m0_hi16
Matt Arsenault via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Mon Jun 14 10:16:10 PDT 2021
arsenm created this revision.
arsenm added reviewers: rampitec, scott.linder, kerbowa.
Herald added subscribers: foad, hiraditya, t-tye, tpr, dstuttard, yaxunl, nhaehnle, jvesely, kzhuravl.
arsenm requested review of this revision.
Herald added a subscriber: wdng.
Herald added a project: LLVM.
These get added (redundantly) to the bundle expanded for indirect
register accesses. We hit this path only when there is a call in the
function.
https://reviews.llvm.org/D104241
Files:
llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
llvm/test/CodeGen/AMDGPU/indirect-addressing-si-gfx9.ll
Index: llvm/test/CodeGen/AMDGPU/indirect-addressing-si-gfx9.ll
===================================================================
--- llvm/test/CodeGen/AMDGPU/indirect-addressing-si-gfx9.ll
+++ llvm/test/CodeGen/AMDGPU/indirect-addressing-si-gfx9.ll
@@ -66,6 +66,23 @@
ret void
}
+declare hidden void @foo()
+
+; For functions with calls, we were not accounting for m0_lo16/m0_hi16
+; uses on the BUNDLE created when expanding the insert register pseudo.
+; GCN-LABEL: {{^}}insertelement_with_call:
+; GCN: s_set_gpr_idx_on s{{[0-9]+}}, gpr_idx(DST)
+; GCN-NEXT: v_mov_b32_e32 {{v[0-9]+}}, 8
+; GCN-NEXT: s_set_gpr_idx_off
+; GCN: s_swappc_b64
+define amdgpu_kernel void @insertelement_with_call(<16 x i32> addrspace(1)* %ptr, i32 %idx) #0 {
+ %vec = load <16 x i32>, <16 x i32> addrspace(1)* %ptr
+ %i6 = insertelement <16 x i32> %vec, i32 8, i32 %idx
+ call void @foo()
+ store <16 x i32> %i6, <16 x i32> addrspace(1)* null
+ ret void
+}
+
declare i32 @llvm.amdgcn.workitem.id.x() #1
declare void @llvm.amdgcn.s.barrier() #2
Index: llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
===================================================================
--- llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
+++ llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
@@ -833,6 +833,8 @@
case AMDGPU::EXEC_HI:
case AMDGPU::SCC:
case AMDGPU::M0:
+ case AMDGPU::M0_LO16:
+ case AMDGPU::M0_HI16:
case AMDGPU::SRC_SHARED_BASE:
case AMDGPU::SRC_SHARED_LIMIT:
case AMDGPU::SRC_PRIVATE_BASE:
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D104241.351911.patch
Type: text/x-patch
Size: 1543 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20210614/5f8a2b6f/attachment.bin>
More information about the llvm-commits
mailing list