[llvm] [AMDGPU] Fix indirect dst bug for non-sgpr index (PR #98907)

via llvm-commits llvm-commits at lists.llvm.org
Mon Jul 15 07:04:15 PDT 2024


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-backend-amdgpu

Author: David Stuttard (dstutt)

<details>
<summary>Changes</summary>

When emitting indirect dst, if the idx is not SGPR there was a bug that didn't
take into account that the subregister might be different from
computeIndirectRegAndOffset.


---
Full diff: https://github.com/llvm/llvm-project/pull/98907.diff


2 Files Affected:

- (modified) llvm/lib/Target/AMDGPU/SIISelLowering.cpp (+2-2) 
- (modified) llvm/test/CodeGen/AMDGPU/indirect-addressing-si.ll (+18) 


``````````diff
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index a733295d2a511..bb8e21772e566 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -4814,14 +4814,14 @@ static MachineBasicBlock *emitIndirectDst(MachineInstr &MI,
         .addReg(PhiReg)
         .add(*Val)
         .addReg(SGPRIdxReg)
-        .addImm(AMDGPU::sub0);
+        .addImm(SubReg);
   } else {
     const MCInstrDesc &MovRelDesc = TII->getIndirectRegWriteMovRelPseudo(
         TRI.getRegSizeInBits(*VecRC), 32, false);
     BuildMI(*LoopBB, InsPt, DL, MovRelDesc, Dst)
         .addReg(PhiReg)
         .add(*Val)
-        .addImm(AMDGPU::sub0);
+        .addImm(SubReg);
   }
 
   MI.eraseFromParent();
diff --git a/llvm/test/CodeGen/AMDGPU/indirect-addressing-si.ll b/llvm/test/CodeGen/AMDGPU/indirect-addressing-si.ll
index 1f92427fe8a23..f095aef7a0cc8 100644
--- a/llvm/test/CodeGen/AMDGPU/indirect-addressing-si.ll
+++ b/llvm/test/CodeGen/AMDGPU/indirect-addressing-si.ll
@@ -543,6 +543,24 @@ bb8:                                              ; preds = %bb2
   ret void
 }
 
+; GCN-LABEL: {{^}}insert_or_disj_index:
+; GCN: v_mov_b32_e32 v[[#VIDX:]], 0
+
+; MOVREL: s_mov_b32 m0, s{{[0-9]+}}
+; MOVREL: v_movreld_b32_e32 v[[#VIDX + 1]], v{{[0-9]+}}
+
+; IDXMODE: s_set_gpr_idx_on s{{[0-9]+}}, gpr_idx(DST)
+; IDXMODE: v_mov_b32_e32 v[[#VIDX + 1]], v{{[0-9]+}}
+; IDXMODE: s_set_gpr_idx_off
+define amdgpu_cs void @insert_or_disj_index(ptr addrspace(1) %out, ptr addrspace(4) %in, i32 %val, <4 x i32> inreg %desc, i32 inreg %A) {
+entry:
+  %idx = call i32 @llvm.amdgcn.raw.buffer.load.i32(<4 x i32> %desc, i32 %A, i32 0, i32 0)
+  %off = or disjoint i32 %idx, 1
+  %v = insertelement <16 x i32> zeroinitializer, i32 %val, i32 %off
+  store <16 x i32> %v, ptr addrspace(1) %out
+  ret void
+}
+
 declare i32 @llvm.amdgcn.workitem.id.x() #1
 declare void @llvm.amdgcn.s.barrier() #2
 

``````````

</details>


https://github.com/llvm/llvm-project/pull/98907


More information about the llvm-commits mailing list