[llvm] AMDGPU: Fix trying to query end iterator for DebugLoc (PR #129886)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Mar 5 05:55:00 PST 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-amdgpu
Author: Matt Arsenault (arsenm)
<details>
<summary>Changes</summary>
When inserting instructions by iterator, you always need the separate
DebugLoc argument. This fixes a reported regression after
39bf765bb671fa7df3fe6c164cc9532fcb8653bd.
---
Full diff: https://github.com/llvm/llvm-project/pull/129886.diff
3 Files Affected:
- (modified) llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp (+8-8)
- (added) llvm/test/CodeGen/AMDGPU/fix-sgpr-copies-phi-block-end-iterator-debugloc.ll (+49)
- (modified) llvm/test/CodeGen/AMDGPU/si-fix-sgpr-copies.mir (+46)
``````````diff
diff --git a/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp b/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
index 52d00485385c2..4342e7a369c13 100644
--- a/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
@@ -155,7 +155,8 @@ class SIFixSGPRCopies {
// have any other uses.
bool tryMoveVGPRConstToSGPR(MachineOperand &MO, Register NewDst,
MachineBasicBlock *BlockToInsertTo,
- MachineBasicBlock::iterator PointToInsertTo);
+ MachineBasicBlock::iterator PointToInsertTo,
+ const DebugLoc &DL);
};
class SIFixSGPRCopiesLegacy : public MachineFunctionPass {
@@ -682,11 +683,11 @@ bool SIFixSGPRCopies::run(MachineFunction &MF) {
MachineBasicBlock::iterator PointToInsertCopy =
MI.isPHI() ? BlockToInsertCopy->getFirstInstrTerminator() : I;
+ const DebugLoc &DL = MI.getDebugLoc();
if (!tryMoveVGPRConstToSGPR(MO, NewDst, BlockToInsertCopy,
- PointToInsertCopy)) {
+ PointToInsertCopy, DL)) {
MachineInstr *NewCopy =
- BuildMI(*BlockToInsertCopy, PointToInsertCopy,
- PointToInsertCopy->getDebugLoc(),
+ BuildMI(*BlockToInsertCopy, PointToInsertCopy, DL,
TII->get(AMDGPU::COPY), NewDst)
.addReg(MO.getReg());
MO.setReg(NewDst);
@@ -855,7 +856,7 @@ void SIFixSGPRCopies::processPHINode(MachineInstr &MI) {
bool SIFixSGPRCopies::tryMoveVGPRConstToSGPR(
MachineOperand &MaybeVGPRConstMO, Register DstReg,
MachineBasicBlock *BlockToInsertTo,
- MachineBasicBlock::iterator PointToInsertTo) {
+ MachineBasicBlock::iterator PointToInsertTo, const DebugLoc &DL) {
MachineInstr *DefMI = MRI->getVRegDef(MaybeVGPRConstMO.getReg());
if (!DefMI || !DefMI->isMoveImmediate())
@@ -869,8 +870,7 @@ bool SIFixSGPRCopies::tryMoveVGPRConstToSGPR(
MRI->getRegClass(MaybeVGPRConstMO.getReg());
unsigned MoveSize = TRI->getRegSizeInBits(*SrcRC);
unsigned MoveOp = MoveSize == 64 ? AMDGPU::S_MOV_B64 : AMDGPU::S_MOV_B32;
- BuildMI(*BlockToInsertTo, PointToInsertTo, PointToInsertTo->getDebugLoc(),
- TII->get(MoveOp), DstReg)
+ BuildMI(*BlockToInsertTo, PointToInsertTo, DL, TII->get(MoveOp), DstReg)
.add(*SrcConst);
if (MRI->hasOneUse(MaybeVGPRConstMO.getReg()))
DefMI->eraseFromParent();
@@ -896,7 +896,7 @@ bool SIFixSGPRCopies::lowerSpecialCase(MachineInstr &MI,
.add(MI.getOperand(1));
MI.getOperand(1).setReg(TmpReg);
} else if (tryMoveVGPRConstToSGPR(MI.getOperand(1), DstReg, MI.getParent(),
- MI)) {
+ MI, MI.getDebugLoc())) {
I = std::next(I);
MI.eraseFromParent();
}
diff --git a/llvm/test/CodeGen/AMDGPU/fix-sgpr-copies-phi-block-end-iterator-debugloc.ll b/llvm/test/CodeGen/AMDGPU/fix-sgpr-copies-phi-block-end-iterator-debugloc.ll
new file mode 100644
index 0000000000000..13184cf17a2e5
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/fix-sgpr-copies-phi-block-end-iterator-debugloc.ll
@@ -0,0 +1,49 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck %s
+
+define i32 @rocrand_regression(ptr addrspace(1) %arg, i32 %arg0, i1 %cmp7) {
+; CHECK-LABEL: rocrand_regression:
+; CHECK: ; %bb.0: ; %entry
+; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT: v_and_b32_e32 v0, 1, v3
+; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
+; CHECK-NEXT: s_xor_b64 s[4:5], vcc, -1
+; CHECK-NEXT: s_mov_b32 s8, 0
+; CHECK-NEXT: .LBB0_1: ; %do.body
+; CHECK-NEXT: ; =>This Loop Header: Depth=1
+; CHECK-NEXT: ; Child Loop BB0_2 Depth 2
+; CHECK-NEXT: s_mov_b64 s[6:7], 0
+; CHECK-NEXT: .LBB0_2: ; %while.cond
+; CHECK-NEXT: ; Parent Loop BB0_1 Depth=1
+; CHECK-NEXT: ; => This Inner Loop Header: Depth=2
+; CHECK-NEXT: s_and_b64 s[10:11], exec, s[4:5]
+; CHECK-NEXT: s_or_b64 s[6:7], s[10:11], s[6:7]
+; CHECK-NEXT: s_andn2_b64 exec, exec, s[6:7]
+; CHECK-NEXT: s_cbranch_execnz .LBB0_2
+; CHECK-NEXT: ; %bb.3: ; %do.cond
+; CHECK-NEXT: ; in Loop: Header=BB0_1 Depth=1
+; CHECK-NEXT: s_or_b64 exec, exec, s[6:7]
+; CHECK-NEXT: s_or_b32 s8, s8, 1
+; CHECK-NEXT: s_cbranch_execnz .LBB0_1
+; CHECK-NEXT: ; %bb.4: ; %DummyReturnBlock
+; CHECK-NEXT: s_setpc_b64 s[30:31]
+entry:
+ br label %do.body
+
+do.body: ; preds = %do.cond, %entry
+ %phi.0 = phi i32 [ %arg0, %do.cond ], [ 0, %entry ]
+ %phi.1 = phi i32 [ %add6, %do.cond ], [ 0, %entry ]
+ %add6 = or i32 %phi.1, 1
+ store i32 %phi.1, ptr addrspace(1) %arg, align 4
+ br label %while.cond
+
+while.cond: ; preds = %while.cond, %do.body
+ %phi.2 = phi i32 [ %phi.0, %do.body ], [ 0, %while.cond ]
+ br i1 %cmp7, label %while.cond, label %do.cond
+
+do.cond: ; preds = %while.cond
+ br i1 true, label %do.body, label %do.end
+
+do.end: ; preds = %do.cond
+ ret i32 %phi.2
+}
diff --git a/llvm/test/CodeGen/AMDGPU/si-fix-sgpr-copies.mir b/llvm/test/CodeGen/AMDGPU/si-fix-sgpr-copies.mir
index f1b88c7616298..767942b7dca7f 100644
--- a/llvm/test/CodeGen/AMDGPU/si-fix-sgpr-copies.mir
+++ b/llvm/test/CodeGen/AMDGPU/si-fix-sgpr-copies.mir
@@ -89,3 +89,49 @@ body: |
S_ENDPGM 0, implicit %1
...
+
+# GCN-LABEL: name: find_debug_loc_end_iterator_regression
+# GCN: %6:vreg_1 = COPY %4
+# GCN: %14:sgpr_32 = S_MOV_B32 0
+
+# GCN: %7:vgpr_32 = PHI %5, %bb.0, %1, %bb.3
+# GCN: %8:sreg_32 = PHI %14, %bb.0, %9, %bb.3
+
+# GCN: %11:sreg_64 = PHI %10, %bb.1, %12, %bb.2
+# GCN: %13:sreg_64 = COPY %6
+---
+name: find_debug_loc_end_iterator_regression
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr2, $vgpr3
+
+ %0:vgpr_32 = COPY $vgpr3
+ %1:vgpr_32 = COPY $vgpr2
+ %2:sreg_64 = V_CMP_EQ_U32_e64 killed %0, 1, implicit $exec
+ %3:sreg_64 = S_MOV_B64 -1
+ %4:sreg_64 = S_XOR_B64 killed %2, killed %3, implicit-def dead $scc
+ %5:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+ %6:vreg_1 = COPY %4
+
+ bb.1:
+ %7:vgpr_32 = PHI %5, %bb.0, %1, %bb.3
+ %8:sreg_32 = PHI %5, %bb.0, %9, %bb.3
+ %10:sreg_64 = S_MOV_B64 0
+
+ bb.2:
+ %11:sreg_64 = PHI %10, %bb.1, %12, %bb.2
+ %13:sreg_64 = COPY %6
+ %12:sreg_64 = SI_IF_BREAK %13, %11, implicit-def dead $scc
+ SI_LOOP %12, %bb.2, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+ S_BRANCH %bb.3
+
+ bb.3:
+ %9:sreg_32 = S_OR_B32 %8, 1, implicit-def dead $scc
+ S_CBRANCH_VCCNZ %bb.1, implicit undef $vcc
+ S_BRANCH %bb.4
+
+ bb.4:
+ SI_RETURN
+
+...
``````````
</details>
https://github.com/llvm/llvm-project/pull/129886
More information about the llvm-commits
mailing list