[llvm] AMDGPU: Fix assert when multi operands to update after folding imm (PR #148205)
via llvm-commits
llvm-commits at lists.llvm.org
Fri Jul 11 03:48:58 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-amdgpu
Author: None (macurtis-amd)
<details>
<summary>Changes</summary>
In the original motivating test case, [FoldList](https://github.com/llvm/llvm-project/blob/d8a2141ff98ee35cd1886f536ccc3548b012820b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp#L1764) had entries:
```
#<!-- -->0: UseMI: %224:sreg_32 = S_OR_B32 %219.sub0:sreg_64, %219.sub1:sreg_64, implicit-def dead $scc
UseOpNo: 1
#<!-- -->1: UseMI: %224:sreg_32 = S_OR_B32 %219.sub0:sreg_64, %219.sub1:sreg_64, implicit-def dead $scc
UseOpNo: 2
```
After calling [updateOperand(#<!-- -->0)](https://github.com/llvm/llvm-project/blob/d8a2141ff98ee35cd1886f536ccc3548b012820b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp#L1773), [tryConstantFoldOp(#<!-- -->0.UseMI)](https://github.com/llvm/llvm-project/blob/d8a2141ff98ee35cd1886f536ccc3548b012820b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp#L1786) removed operand 1, and entry #&#<!-- -->8203;1.UseOpNo was no longer valid, resulting in an [assert](https://github.com/llvm/llvm-project/blob/4a35214bddbb67f9597a500d48ab8c4fb25af150/llvm/include/llvm/ADT/ArrayRef.h#L452).
This change defers constant folding until all operands have been updated so that UseOpNo values remain stable.
---
Full diff: https://github.com/llvm/llvm-project/pull/148205.diff
2 Files Affected:
- (modified) llvm/lib/Target/AMDGPU/SIFoldOperands.cpp (+10-4)
- (added) llvm/test/CodeGen/AMDGPU/bug-multi-operands-to-update-after-fold.ll (+58)
``````````diff
diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
index 0ed06c37507af..0f2a932f984b1 100644
--- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
@@ -1761,6 +1761,7 @@ bool SIFoldOperandsImpl::foldInstOperand(MachineInstr &MI,
for (MachineInstr *Copy : CopiesToReplace)
Copy->addImplicitDefUseOperands(*MF);
+ SmallVector<MachineInstr *, 4> ConstantFoldCandidates;
for (FoldCandidate &Fold : FoldList) {
assert(!Fold.isReg() || Fold.Def.OpToFold);
if (Fold.isReg() && Fold.getReg().isVirtual()) {
@@ -1783,16 +1784,21 @@ bool SIFoldOperandsImpl::foldInstOperand(MachineInstr &MI,
<< static_cast<int>(Fold.UseOpNo) << " of "
<< *Fold.UseMI);
- if (Fold.isImm() && tryConstantFoldOp(Fold.UseMI)) {
- LLVM_DEBUG(dbgs() << "Constant folded " << *Fold.UseMI);
- Changed = true;
- }
+ if (Fold.isImm() && !is_contained(ConstantFoldCandidates, Fold.UseMI))
+ ConstantFoldCandidates.push_back(Fold.UseMI);
} else if (Fold.Commuted) {
// Restoring instruction's original operand order if fold has failed.
TII->commuteInstruction(*Fold.UseMI, false);
}
}
+
+ for (MachineInstr *MI : ConstantFoldCandidates) {
+ if (tryConstantFoldOp(MI)) {
+ LLVM_DEBUG(dbgs() << "Constant folded " << *MI);
+ Changed = true;
+ }
+ }
return true;
}
diff --git a/llvm/test/CodeGen/AMDGPU/bug-multi-operands-to-update-after-fold.ll b/llvm/test/CodeGen/AMDGPU/bug-multi-operands-to-update-after-fold.ll
new file mode 100644
index 0000000000000..a81fc6a25e43e
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/bug-multi-operands-to-update-after-fold.ll
@@ -0,0 +1,58 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -O3 -mtriple=amdgcn-amd-hsa -mcpu=gfx1031 -o - < %s | FileCheck %s
+
+%struct.bar = type { %struct.bar.0, %struct.bar.0, %struct.bar.0 }
+%struct.bar.0 = type { %struct.blam }
+%struct.blam = type { i32, i32, i32, i32 }
+
+ at global = external addrspace(3) global %struct.bar
+
+define void @snork() {
+; CHECK-LABEL: snork:
+; CHECK: ; %bb.0: ; %bb
+; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT: s_mov_b32 s4, 0
+; CHECK-NEXT: v_mov_b32_e32 v4, global at abs32@lo
+; CHECK-NEXT: s_mov_b32 s5, s4
+; CHECK-NEXT: s_mov_b32 s6, s4
+; CHECK-NEXT: s_mov_b32 s7, s4
+; CHECK-NEXT: v_mov_b32_e32 v0, s4
+; CHECK-NEXT: v_mov_b32_e32 v1, s5
+; CHECK-NEXT: v_mov_b32_e32 v2, s6
+; CHECK-NEXT: v_mov_b32_e32 v3, s7
+; CHECK-NEXT: s_cmp_lg_u32 0, 0
+; CHECK-NEXT: ds_write_b128 v4, v[0:3] offset:32
+; CHECK-NEXT: s_waitcnt lgkmcnt(0)
+; CHECK-NEXT: s_setpc_b64 s[30:31]
+bb:
+ %call = call float @llvm.amdgcn.rcp.f32(float 0.000000e+00)
+ %fmul = fmul ninf float %call, 0.000000e+00
+ %fptoui = fptoui float %fmul to i32
+ %zext = zext i32 %fptoui to i64
+ %mul = mul i64 2, %zext
+ %trunc = trunc i64 %mul to i32
+ store i32 %trunc, ptr addrspace(3) getelementptr inbounds (%struct.bar, ptr addrspace(3) @global, i32 0, i32 2), align 16
+ store i32 0, ptr addrspace(3) getelementptr inbounds nuw (i8, ptr addrspace(3) @global, i32 36), align 4
+ store i32 0, ptr addrspace(3) getelementptr inbounds nuw (i8, ptr addrspace(3) @global, i32 40), align 8
+ store i32 %trunc, ptr addrspace(3) getelementptr inbounds nuw (i8, ptr addrspace(3) @global, i32 44), align 4
+ %load = load <4 x i32>, ptr addrspace(3) getelementptr inbounds (%struct.bar, ptr addrspace(3) @global, i32 0, i32 2), align 16
+ %extractelement = extractelement <4 x i32> %load, i64 0
+ %icmp = icmp ne i32 %extractelement, 0
+ %extractelement1 = extractelement <4 x i32> %load, i64 3
+ %icmp2 = icmp ne i32 %extractelement1, 0
+ %select = select i1 %icmp, i1 true, i1 %icmp2
+ br i1 %select, label %bb5, label %bb3
+
+bb3: ; preds = %bb
+ %and = and <4 x i32> %load, splat (i32 1)
+ %extractelement4 = extractelement <4 x i32> %and, i64 0
+ br label %bb5
+
+bb5: ; preds = %bb3, %bb
+ ret void
+}
+
+; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare float @llvm.amdgcn.rcp.f32(float) #0
+
+attributes #0 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
``````````
</details>
https://github.com/llvm/llvm-project/pull/148205
More information about the llvm-commits
mailing list