[llvm] [AMDGPU] Prevent hang in SIFoldOperands (PR #82099)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Feb 21 12:44:18 PST 2024
https://github.com/choikwa updated https://github.com/llvm/llvm-project/pull/82099
>From 5164ef45fae0259764ca49752ed4a40b3402dc80 Mon Sep 17 00:00:00 2001
From: Kevin Choi <kevin.choi at amd.com>
Date: Sat, 17 Feb 2024 01:34:47 -0600
Subject: [PATCH] [AMDGPU] Prevent hang in SIFoldOperands by caching Uses
foldOperands() for REG_SEQUENCE has recursion that can trigger infinite loop
as the method can modify use operand order which messes up the range-based
for loop. Cache the uses for processing beforehand so that iterators don't
get messed up.
Added repro mir testcase.
---
llvm/lib/Target/AMDGPU/SIFoldOperands.cpp | 19 ++++++++++---------
.../CodeGen/AMDGPU/si-fold-reg-sequence.mir | 16 ++++++++++++++++
2 files changed, 26 insertions(+), 9 deletions(-)
create mode 100644 llvm/test/CodeGen/AMDGPU/si-fold-reg-sequence.mir
diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
index d16d8ebd41a54f..5b59caa4ed6164 100644
--- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
@@ -219,10 +219,8 @@ bool SIFoldOperands::canUseImmWithOpSel(FoldCandidate &Fold) const {
default:
return false;
case AMDGPU::OPERAND_REG_IMM_V2FP16:
- case AMDGPU::OPERAND_REG_IMM_V2BF16:
case AMDGPU::OPERAND_REG_IMM_V2INT16:
case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
- case AMDGPU::OPERAND_REG_INLINE_C_V2BF16:
case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
break;
}
@@ -774,21 +772,24 @@ void SIFoldOperands::foldOperand(
if (UseMI->isRegSequence()) {
Register RegSeqDstReg = UseMI->getOperand(0).getReg();
unsigned RegSeqDstSubReg = UseMI->getOperand(UseOpIdx + 1).getImm();
-
- for (auto &RSUse : make_early_inc_range(MRI->use_nodbg_operands(RegSeqDstReg))) {
- MachineInstr *RSUseMI = RSUse.getParent();
+
+ // Grab the use operands first
+ SmallVector<MachineOperand *, 4> UsesToProcess;
+ for (auto &Use : MRI->use_nodbg_operands(RegSeqDstReg))
+ UsesToProcess.push_back(&Use);
+ for (auto *RSUse : UsesToProcess) {
+ MachineInstr *RSUseMI = RSUse->getParent();
if (tryToFoldACImm(UseMI->getOperand(0), RSUseMI,
- RSUseMI->getOperandNo(&RSUse), FoldList))
+ RSUseMI->getOperandNo(RSUse), FoldList))
continue;
- if (RSUse.getSubReg() != RegSeqDstSubReg)
+ if (RSUse->getSubReg() != RegSeqDstSubReg)
continue;
- foldOperand(OpToFold, RSUseMI, RSUseMI->getOperandNo(&RSUse), FoldList,
+ foldOperand(OpToFold, RSUseMI, RSUseMI->getOperandNo(RSUse), FoldList,
CopiesToReplace);
}
-
return;
}
diff --git a/llvm/test/CodeGen/AMDGPU/si-fold-reg-sequence.mir b/llvm/test/CodeGen/AMDGPU/si-fold-reg-sequence.mir
new file mode 100644
index 00000000000000..ef63262c20db3c
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/si-fold-reg-sequence.mir
@@ -0,0 +1,16 @@
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -run-pass=si-fold-operands -verify-machineinstrs -o - %s
+
+---
+name: fold_reg_sequence
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1, $vgpr2
+ %33:sreg_32 = S_MOV_B32 0
+ %34:sreg_32 = S_MOV_B32 429
+ %35:sreg_64 = REG_SEQUENCE killed %34, %subreg.sub0, %33, %subreg.sub1
+ %49:vgpr_32 = V_MUL_HI_U32_e64 $vgpr2, %35.sub0, implicit $exec
+ %75:vgpr_32 = GLOBAL_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec :: (load (s32), addrspace 1)
+ %77:vgpr_32 = V_MUL_HI_U32_e64 %75, %35.sub0, implicit $exec
+ S_ENDPGM 0
+...
+
More information about the llvm-commits
mailing list