[llvm] [AMDGPU] Fix machine verification failure after SIFoldOperandsImpl::tryFoldOMod (PR #113544)
Jay Foad via llvm-commits
llvm-commits at lists.llvm.org
Tue Oct 29 05:00:25 PDT 2024
https://github.com/jayfoad updated https://github.com/llvm/llvm-project/pull/113544
>From 7eb47efc4a7a096f69bfb93ff25f4a360d287d12 Mon Sep 17 00:00:00 2001
From: Jay Foad <jay.foad at amd.com>
Date: Thu, 24 Oct 2024 11:35:45 +0100
Subject: [PATCH 1/2] [AMDGPU] Fix machine verification failure after
SIFoldOperandsImpl::tryFoldOMod
Fixes #54201
---
llvm/lib/Target/AMDGPU/SIFoldOperands.cpp | 6 +++
llvm/test/CodeGen/AMDGPU/fold-omod-crash.mir | 50 ++++++++++++++++++++
2 files changed, 56 insertions(+)
create mode 100644 llvm/test/CodeGen/AMDGPU/fold-omod-crash.mir
diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
index c912a580854c1c..3731fbea76f9be 100644
--- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
@@ -1793,6 +1793,12 @@ bool SIFoldOperandsImpl::tryFoldOMod(MachineInstr &MI) {
DefOMod->setImm(OMod);
MRI->replaceRegWith(MI.getOperand(0).getReg(), Def->getOperand(0).getReg());
+ if (Def->getParent() != MI.getParent()) {
+ // Kill flags can be wrong if we replaced a def inside a loop with a def
+ // outside the loop.
+ for (auto &Use : MRI->use_nodbg_operands(Def->getOperand(0).getReg()))
+ Use.setIsKill(false);
+ }
MI.eraseFromParent();
// Use of output modifiers forces VOP3 encoding for a VOP2 mac/fmac
diff --git a/llvm/test/CodeGen/AMDGPU/fold-omod-crash.mir b/llvm/test/CodeGen/AMDGPU/fold-omod-crash.mir
new file mode 100644
index 00000000000000..8065e2cfc00432
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/fold-omod-crash.mir
@@ -0,0 +1,50 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
+# RUN: llc -mtriple=amdgcn -mcpu=gfx900 -run-pass=si-fold-operands %s -verify-machineinstrs -o - | FileCheck %s -check-prefix=GFX9
+
+# When V_ADD_F32 is replaced with an output modifier on V_RSQ_F32, check that
+# the kill flag is cleared on the use of %4 in V_MUL_F32.
+---
+name: main
+tracksRegLiveness: true
+machineFunctionInfo:
+ mode:
+ ieee: false
+ fp32-input-denormals: false
+ fp32-output-denormals: false
+body: |
+ ; GFX9-LABEL: name: main
+ ; GFX9: bb.0:
+ ; GFX9-NEXT: successors: %bb.1(0x80000000)
+ ; GFX9-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9
+ ; GFX9-NEXT: {{ $}}
+ ; GFX9-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX9-NEXT: [[DEF1:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
+ ; GFX9-NEXT: [[V_RSQ_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_RSQ_F32_e64 0, undef [[DEF]], 0, 1, implicit $mode, implicit $exec
+ ; GFX9-NEXT: {{ $}}
+ ; GFX9-NEXT: bb.1:
+ ; GFX9-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
+ ; GFX9-NEXT: {{ $}}
+ ; GFX9-NEXT: [[DEF2:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX9-NEXT: [[V_MUL_F32_e64_:%[0-9]+]]:vgpr_32 = V_MUL_F32_e64 0, killed undef [[DEF2]], 0, [[V_RSQ_F32_e64_]], 0, 0, implicit $mode, implicit $exec
+ ; GFX9-NEXT: SI_LOOP undef [[DEF1]], %bb.1, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+ ; GFX9-NEXT: S_BRANCH %bb.2
+ ; GFX9-NEXT: {{ $}}
+ ; GFX9-NEXT: bb.2:
+ ; GFX9-NEXT: S_ENDPGM 0
+ bb.0:
+ liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9
+
+ %0:vgpr_32 = IMPLICIT_DEF
+ %1:sreg_64 = IMPLICIT_DEF
+ %2:vgpr_32 = nofpexcept V_RSQ_F32_e64 0, undef %0, 0, 0, implicit $mode, implicit $exec
+
+ bb.1:
+ %3:vgpr_32 = IMPLICIT_DEF
+ %4:vgpr_32 = nsz reassoc nofpexcept V_ADD_F32_e64 0, undef %2, 0, undef %2, 0, 0, implicit $mode, implicit $exec
+ %5:vgpr_32 = V_MUL_F32_e64 0, killed undef %3, 0, killed %4, 0, 0, implicit $mode, implicit $exec
+ SI_LOOP undef %1, %bb.1, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+ S_BRANCH %bb.2
+
+ bb.2:
+ S_ENDPGM 0
+...
>From b25d1e26fdaee8e2ad6931c13a8646a03998b339 Mon Sep 17 00:00:00 2001
From: Jay Foad <jay.foad at amd.com>
Date: Tue, 29 Oct 2024 11:47:58 +0000
Subject: [PATCH 2/2] Use clearKillFlags
---
llvm/lib/Target/AMDGPU/SIFoldOperands.cpp | 9 +++------
1 file changed, 3 insertions(+), 6 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
index 3731fbea76f9be..f0c7837e0bb75a 100644
--- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
@@ -1793,12 +1793,9 @@ bool SIFoldOperandsImpl::tryFoldOMod(MachineInstr &MI) {
DefOMod->setImm(OMod);
MRI->replaceRegWith(MI.getOperand(0).getReg(), Def->getOperand(0).getReg());
- if (Def->getParent() != MI.getParent()) {
- // Kill flags can be wrong if we replaced a def inside a loop with a def
- // outside the loop.
- for (auto &Use : MRI->use_nodbg_operands(Def->getOperand(0).getReg()))
- Use.setIsKill(false);
- }
+ // Kill flags can be wrong if we replaced a def inside a loop with a def
+ // outside the loop.
+ MRI->clearKillFlags(Def->getOperand(0).getReg());
MI.eraseFromParent();
// Use of output modifiers forces VOP3 encoding for a VOP2 mac/fmac
More information about the llvm-commits
mailing list