[llvm] ad4a182 - AMDGPU: Fix assert on m0_lo16/m0_hi16

Fri Jun 18 15:49:00 PDT 2021

Author: Matt Arsenault
Date: 2021-06-18T18:48:53-04:00
New Revision: ad4a18251a37a244e3d4375538abfc0894a1e6ac

URL: https://github.com/llvm/llvm-project/commit/ad4a18251a37a244e3d4375538abfc0894a1e6ac
DIFF: https://github.com/llvm/llvm-project/commit/ad4a18251a37a244e3d4375538abfc0894a1e6ac.diff

LOG: AMDGPU: Fix assert on m0_lo16/m0_hi16

These get added (redundantly) to the bundle expanded for indirect
register accesses. We hit this path only when there is a call in the
function.

Added: 
    

Modified: 
    llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
    llvm/test/CodeGen/AMDGPU/indirect-addressing-si-gfx9.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
index d3ac254d7e83..8f69e20cc051 100644

--- a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
@@ -833,6 +833,8 @@ AMDGPUAsmPrinter::SIFunctionResourceInfo AMDGPUAsmPrinter::analyzeResourceUsage(
         case AMDGPU::EXEC_HI:
         case AMDGPU::SCC:
         case AMDGPU::M0:
+        case AMDGPU::M0_LO16:
+        case AMDGPU::M0_HI16:
         case AMDGPU::SRC_SHARED_BASE:
         case AMDGPU::SRC_SHARED_LIMIT:
         case AMDGPU::SRC_PRIVATE_BASE:

diff  --git a/llvm/test/CodeGen/AMDGPU/indirect-addressing-si-gfx9.ll b/llvm/test/CodeGen/AMDGPU/indirect-addressing-si-gfx9.ll
index dbe904ab3982..730bcb44d1a8 100644
--- a/llvm/test/CodeGen/AMDGPU/indirect-addressing-si-gfx9.ll
+++ b/llvm/test/CodeGen/AMDGPU/indirect-addressing-si-gfx9.ll
@@ -66,6 +66,23 @@ entry:
   ret void
 }
 
+declare hidden void @foo()
+
+; For functions with calls, we were not accounting for m0_lo16/m0_hi16
+; uses on the BUNDLE created when expanding the insert register pseudo.
+; GCN-LABEL: {{^}}insertelement_with_call:
+; GCN: s_set_gpr_idx_on s{{[0-9]+}}, gpr_idx(DST)
+; GCN-NEXT: v_mov_b32_e32 {{v[0-9]+}}, 8
+; GCN-NEXT: s_set_gpr_idx_off
+; GCN: s_swappc_b64
+define amdgpu_kernel void @insertelement_with_call(<16 x i32> addrspace(1)* %ptr, i32 %idx) #0 {
+  %vec = load <16 x i32>, <16 x i32> addrspace(1)* %ptr
+  %i6 = insertelement <16 x i32> %vec, i32 8, i32 %idx
+  call void @foo()
+  store <16 x i32> %i6, <16 x i32> addrspace(1)* null
+  ret void
+}
+
 declare i32 @llvm.amdgcn.workitem.id.x() #1
 declare void @llvm.amdgcn.s.barrier() #2