[llvm] 5d12fa7 - [AMDGPU] Fix indirect dst bug for non-sgpr index (#98907)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Jul 16 01:12:43 PDT 2024
Author: David Stuttard
Date: 2024-07-16T09:12:39+01:00
New Revision: 5d12fa7d72a43eb54a3d8f953766323b97da5ca8
URL: https://github.com/llvm/llvm-project/commit/5d12fa7d72a43eb54a3d8f953766323b97da5ca8
DIFF: https://github.com/llvm/llvm-project/commit/5d12fa7d72a43eb54a3d8f953766323b97da5ca8.diff
LOG: [AMDGPU] Fix indirect dst bug for non-sgpr index (#98907)
When emitting indirect dst, if the idx is not SGPR there was a bug that
didn't
take into account that the subregister might be different from
computeIndirectRegAndOffset.
Added:
Modified:
llvm/lib/Target/AMDGPU/SIISelLowering.cpp
llvm/test/CodeGen/AMDGPU/indirect-addressing-si.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index a733295d2a511..bb8e21772e566 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -4814,14 +4814,14 @@ static MachineBasicBlock *emitIndirectDst(MachineInstr &MI,
.addReg(PhiReg)
.add(*Val)
.addReg(SGPRIdxReg)
- .addImm(AMDGPU::sub0);
+ .addImm(SubReg);
} else {
const MCInstrDesc &MovRelDesc = TII->getIndirectRegWriteMovRelPseudo(
TRI.getRegSizeInBits(*VecRC), 32, false);
BuildMI(*LoopBB, InsPt, DL, MovRelDesc, Dst)
.addReg(PhiReg)
.add(*Val)
- .addImm(AMDGPU::sub0);
+ .addImm(SubReg);
}
MI.eraseFromParent();
diff --git a/llvm/test/CodeGen/AMDGPU/indirect-addressing-si.ll b/llvm/test/CodeGen/AMDGPU/indirect-addressing-si.ll
index 1f92427fe8a23..f095aef7a0cc8 100644
--- a/llvm/test/CodeGen/AMDGPU/indirect-addressing-si.ll
+++ b/llvm/test/CodeGen/AMDGPU/indirect-addressing-si.ll
@@ -543,6 +543,24 @@ bb8: ; preds = %bb2
ret void
}
+; GCN-LABEL: {{^}}insert_or_disj_index:
+; GCN: v_mov_b32_e32 v[[#VIDX:]], 0
+
+; MOVREL: s_mov_b32 m0, s{{[0-9]+}}
+; MOVREL: v_movreld_b32_e32 v[[#VIDX + 1]], v{{[0-9]+}}
+
+; IDXMODE: s_set_gpr_idx_on s{{[0-9]+}}, gpr_idx(DST)
+; IDXMODE: v_mov_b32_e32 v[[#VIDX + 1]], v{{[0-9]+}}
+; IDXMODE: s_set_gpr_idx_off
+define amdgpu_cs void @insert_or_disj_index(ptr addrspace(1) %out, ptr addrspace(4) %in, i32 %val, <4 x i32> inreg %desc, i32 inreg %A) {
+entry:
+ %idx = call i32 @llvm.amdgcn.raw.buffer.load.i32(<4 x i32> %desc, i32 %A, i32 0, i32 0)
+ %off = or disjoint i32 %idx, 1
+ %v = insertelement <16 x i32> zeroinitializer, i32 %val, i32 %off
+ store <16 x i32> %v, ptr addrspace(1) %out
+ ret void
+}
+
declare i32 @llvm.amdgcn.workitem.id.x() #1
declare void @llvm.amdgcn.s.barrier() #2
More information about the llvm-commits
mailing list