[llvm] [AMDGPU] Prevent cyclic behaviour in SIFoldOperands (PR #82099)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Feb 19 22:13:08 PST 2024
https://github.com/choikwa updated https://github.com/llvm/llvm-project/pull/82099
>From 20ded735a3e7054759bfb36a2a7f0451237be5f6 Mon Sep 17 00:00:00 2001
From: Kevin Choi <kevin.choi at amd.com>
Date: Sat, 17 Feb 2024 01:34:47 -0600
Subject: [PATCH] [AMDGPU] Prevent hang in SIFoldOperands
SI Folding REG_SEQUENCE can hang if RSUse is used in multiple MIs
Added repro mir testcase.
---
llvm/lib/Target/AMDGPU/SIFoldOperands.cpp | 7 ++++++-
.../test/CodeGen/AMDGPU/si-fold-reg-sequence.mir | 16 ++++++++++++++++
2 files changed, 22 insertions(+), 1 deletion(-)
create mode 100644 llvm/test/CodeGen/AMDGPU/si-fold-reg-sequence.mir
diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
index 8bf05682cbe7ea..53a098ffde9d7f 100644
--- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
@@ -13,6 +13,7 @@
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
#include "SIMachineFunctionInfo.h"
#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/DenseSet.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineOperand.h"
@@ -74,6 +75,7 @@ class SIFoldOperands : public MachineFunctionPass {
const SIRegisterInfo *TRI;
const GCNSubtarget *ST;
const SIMachineFunctionInfo *MFI;
+ mutable DenseSet<MachineInstr *> SeenMI;
bool frameIndexMayFold(const MachineInstr &UseMI, int OpNo,
const MachineOperand &OpToFold) const;
@@ -772,7 +774,6 @@ void SIFoldOperands::foldOperand(
if (UseMI->isRegSequence()) {
Register RegSeqDstReg = UseMI->getOperand(0).getReg();
unsigned RegSeqDstSubReg = UseMI->getOperand(UseOpIdx + 1).getImm();
-
for (auto &RSUse : make_early_inc_range(MRI->use_nodbg_operands(RegSeqDstReg))) {
MachineInstr *RSUseMI = RSUse.getParent();
@@ -783,6 +784,10 @@ void SIFoldOperands::foldOperand(
if (RSUse.getSubReg() != RegSeqDstSubReg)
continue;
+ if (SeenMI.contains(RSUseMI))
+ continue;
+ SeenMI.insert(RSUseMI);
+
foldOperand(OpToFold, RSUseMI, RSUseMI->getOperandNo(&RSUse), FoldList,
CopiesToReplace);
}
diff --git a/llvm/test/CodeGen/AMDGPU/si-fold-reg-sequence.mir b/llvm/test/CodeGen/AMDGPU/si-fold-reg-sequence.mir
new file mode 100644
index 00000000000000..ef63262c20db3c
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/si-fold-reg-sequence.mir
@@ -0,0 +1,16 @@
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -run-pass=si-fold-operands -verify-machineinstrs -o - %s
+
+---
+name: fold_reg_sequence
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1, $vgpr2
+ %33:sreg_32 = S_MOV_B32 0
+ %34:sreg_32 = S_MOV_B32 429
+ %35:sreg_64 = REG_SEQUENCE killed %34, %subreg.sub0, %33, %subreg.sub1
+ %49:vgpr_32 = V_MUL_HI_U32_e64 $vgpr2, %35.sub0, implicit $exec
+ %75:vgpr_32 = GLOBAL_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec :: (load (s32), addrspace 1)
+ %77:vgpr_32 = V_MUL_HI_U32_e64 %75, %35.sub0, implicit $exec
+ S_ENDPGM 0
+...
+
More information about the llvm-commits
mailing list