[llvm] [AMDGPU] Fix indirect dst bug for non-sgpr index (PR #98907)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Jul 15 07:04:15 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-amdgpu
Author: David Stuttard (dstutt)
<details>
<summary>Changes</summary>
When emitting indirect dst, if the idx is not SGPR there was a bug that didn't
take into account that the subregister might be different from
computeIndirectRegAndOffset.
---
Full diff: https://github.com/llvm/llvm-project/pull/98907.diff
2 Files Affected:
- (modified) llvm/lib/Target/AMDGPU/SIISelLowering.cpp (+2-2)
- (modified) llvm/test/CodeGen/AMDGPU/indirect-addressing-si.ll (+18)
``````````diff
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index a733295d2a511..bb8e21772e566 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -4814,14 +4814,14 @@ static MachineBasicBlock *emitIndirectDst(MachineInstr &MI,
.addReg(PhiReg)
.add(*Val)
.addReg(SGPRIdxReg)
- .addImm(AMDGPU::sub0);
+ .addImm(SubReg);
} else {
const MCInstrDesc &MovRelDesc = TII->getIndirectRegWriteMovRelPseudo(
TRI.getRegSizeInBits(*VecRC), 32, false);
BuildMI(*LoopBB, InsPt, DL, MovRelDesc, Dst)
.addReg(PhiReg)
.add(*Val)
- .addImm(AMDGPU::sub0);
+ .addImm(SubReg);
}
MI.eraseFromParent();
diff --git a/llvm/test/CodeGen/AMDGPU/indirect-addressing-si.ll b/llvm/test/CodeGen/AMDGPU/indirect-addressing-si.ll
index 1f92427fe8a23..f095aef7a0cc8 100644
--- a/llvm/test/CodeGen/AMDGPU/indirect-addressing-si.ll
+++ b/llvm/test/CodeGen/AMDGPU/indirect-addressing-si.ll
@@ -543,6 +543,24 @@ bb8: ; preds = %bb2
ret void
}
+; GCN-LABEL: {{^}}insert_or_disj_index:
+; GCN: v_mov_b32_e32 v[[#VIDX:]], 0
+
+; MOVREL: s_mov_b32 m0, s{{[0-9]+}}
+; MOVREL: v_movreld_b32_e32 v[[#VIDX + 1]], v{{[0-9]+}}
+
+; IDXMODE: s_set_gpr_idx_on s{{[0-9]+}}, gpr_idx(DST)
+; IDXMODE: v_mov_b32_e32 v[[#VIDX + 1]], v{{[0-9]+}}
+; IDXMODE: s_set_gpr_idx_off
+define amdgpu_cs void @insert_or_disj_index(ptr addrspace(1) %out, ptr addrspace(4) %in, i32 %val, <4 x i32> inreg %desc, i32 inreg %A) {
+entry:
+ %idx = call i32 @llvm.amdgcn.raw.buffer.load.i32(<4 x i32> %desc, i32 %A, i32 0, i32 0)
+ %off = or disjoint i32 %idx, 1
+ %v = insertelement <16 x i32> zeroinitializer, i32 %val, i32 %off
+ store <16 x i32> %v, ptr addrspace(1) %out
+ ret void
+}
+
declare i32 @llvm.amdgcn.workitem.id.x() #1
declare void @llvm.amdgcn.s.barrier() #2
``````````
</details>
https://github.com/llvm/llvm-project/pull/98907
More information about the llvm-commits
mailing list