[llvm] [AMDGPU] Fix machine verification failure after SIFoldOperandsImpl::tryFoldOMod (PR #113544)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Oct 24 03:50:29 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-amdgpu
Author: Jay Foad (jayfoad)
<details>
<summary>Changes</summary>
Fixes #<!-- -->54201
---
Full diff: https://github.com/llvm/llvm-project/pull/113544.diff
2 Files Affected:
- (modified) llvm/lib/Target/AMDGPU/SIFoldOperands.cpp (+6)
- (added) llvm/test/CodeGen/AMDGPU/fold-omod-crash.mir (+50)
``````````diff
diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
index c912a580854c1c..3731fbea76f9be 100644
--- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
@@ -1793,6 +1793,12 @@ bool SIFoldOperandsImpl::tryFoldOMod(MachineInstr &MI) {
DefOMod->setImm(OMod);
MRI->replaceRegWith(MI.getOperand(0).getReg(), Def->getOperand(0).getReg());
+ if (Def->getParent() != MI.getParent()) {
+ // Kill flags can be wrong if we replaced a def inside a loop with a def
+ // outside the loop.
+ for (auto &Use : MRI->use_nodbg_operands(Def->getOperand(0).getReg()))
+ Use.setIsKill(false);
+ }
MI.eraseFromParent();
// Use of output modifiers forces VOP3 encoding for a VOP2 mac/fmac
diff --git a/llvm/test/CodeGen/AMDGPU/fold-omod-crash.mir b/llvm/test/CodeGen/AMDGPU/fold-omod-crash.mir
new file mode 100644
index 00000000000000..8065e2cfc00432
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/fold-omod-crash.mir
@@ -0,0 +1,50 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
+# RUN: llc -mtriple=amdgcn -mcpu=gfx900 -run-pass=si-fold-operands %s -verify-machineinstrs -o - | FileCheck %s -check-prefix=GFX9
+
+# When V_ADD_F32 is replaced with an output modifier on V_RSQ_F32, check that
+# the kill flag is cleared on the use of %4 in V_MUL_F32.
+---
+name: main
+tracksRegLiveness: true
+machineFunctionInfo:
+ mode:
+ ieee: false
+ fp32-input-denormals: false
+ fp32-output-denormals: false
+body: |
+ ; GFX9-LABEL: name: main
+ ; GFX9: bb.0:
+ ; GFX9-NEXT: successors: %bb.1(0x80000000)
+ ; GFX9-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9
+ ; GFX9-NEXT: {{ $}}
+ ; GFX9-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX9-NEXT: [[DEF1:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
+ ; GFX9-NEXT: [[V_RSQ_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_RSQ_F32_e64 0, undef [[DEF]], 0, 1, implicit $mode, implicit $exec
+ ; GFX9-NEXT: {{ $}}
+ ; GFX9-NEXT: bb.1:
+ ; GFX9-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
+ ; GFX9-NEXT: {{ $}}
+ ; GFX9-NEXT: [[DEF2:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX9-NEXT: [[V_MUL_F32_e64_:%[0-9]+]]:vgpr_32 = V_MUL_F32_e64 0, killed undef [[DEF2]], 0, [[V_RSQ_F32_e64_]], 0, 0, implicit $mode, implicit $exec
+ ; GFX9-NEXT: SI_LOOP undef [[DEF1]], %bb.1, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+ ; GFX9-NEXT: S_BRANCH %bb.2
+ ; GFX9-NEXT: {{ $}}
+ ; GFX9-NEXT: bb.2:
+ ; GFX9-NEXT: S_ENDPGM 0
+ bb.0:
+ liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9
+
+ %0:vgpr_32 = IMPLICIT_DEF
+ %1:sreg_64 = IMPLICIT_DEF
+ %2:vgpr_32 = nofpexcept V_RSQ_F32_e64 0, undef %0, 0, 0, implicit $mode, implicit $exec
+
+ bb.1:
+ %3:vgpr_32 = IMPLICIT_DEF
+ %4:vgpr_32 = nsz reassoc nofpexcept V_ADD_F32_e64 0, undef %2, 0, undef %2, 0, 0, implicit $mode, implicit $exec
+ %5:vgpr_32 = V_MUL_F32_e64 0, killed undef %3, 0, killed %4, 0, 0, implicit $mode, implicit $exec
+ SI_LOOP undef %1, %bb.1, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+ S_BRANCH %bb.2
+
+ bb.2:
+ S_ENDPGM 0
+...
``````````
</details>
https://github.com/llvm/llvm-project/pull/113544
More information about the llvm-commits
mailing list