[llvm] a156362 - [AMDGPU] Fix machine verification failure after SIFoldOperandsImpl::tryFoldOMod (#113544)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Oct 29 07:59:41 PDT 2024
Author: Jay Foad
Date: 2024-10-29T14:59:37Z
New Revision: a156362e93eba9513611dc0989d516e9946cae48
URL: https://github.com/llvm/llvm-project/commit/a156362e93eba9513611dc0989d516e9946cae48
DIFF: https://github.com/llvm/llvm-project/commit/a156362e93eba9513611dc0989d516e9946cae48.diff
LOG: [AMDGPU] Fix machine verification failure after SIFoldOperandsImpl::tryFoldOMod (#113544)
Fixes #54201
Added:
llvm/test/CodeGen/AMDGPU/fold-omod-crash.mir
Modified:
llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
index c912a580854c1c..f0c7837e0bb75a 100644
--- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
@@ -1793,6 +1793,9 @@ bool SIFoldOperandsImpl::tryFoldOMod(MachineInstr &MI) {
DefOMod->setImm(OMod);
MRI->replaceRegWith(MI.getOperand(0).getReg(), Def->getOperand(0).getReg());
+ // Kill flags can be wrong if we replaced a def inside a loop with a def
+ // outside the loop.
+ MRI->clearKillFlags(Def->getOperand(0).getReg());
MI.eraseFromParent();
// Use of output modifiers forces VOP3 encoding for a VOP2 mac/fmac
diff --git a/llvm/test/CodeGen/AMDGPU/fold-omod-crash.mir b/llvm/test/CodeGen/AMDGPU/fold-omod-crash.mir
new file mode 100644
index 00000000000000..8065e2cfc00432
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/fold-omod-crash.mir
@@ -0,0 +1,50 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
+# RUN: llc -mtriple=amdgcn -mcpu=gfx900 -run-pass=si-fold-operands %s -verify-machineinstrs -o - | FileCheck %s -check-prefix=GFX9
+
+# When V_ADD_F32 is replaced with an output modifier on V_RSQ_F32, check that
+# the kill flag is cleared on the use of %4 in V_MUL_F32.
+---
+name: main
+tracksRegLiveness: true
+machineFunctionInfo:
+ mode:
+ ieee: false
+ fp32-input-denormals: false
+ fp32-output-denormals: false
+body: |
+ ; GFX9-LABEL: name: main
+ ; GFX9: bb.0:
+ ; GFX9-NEXT: successors: %bb.1(0x80000000)
+ ; GFX9-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9
+ ; GFX9-NEXT: {{ $}}
+ ; GFX9-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX9-NEXT: [[DEF1:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
+ ; GFX9-NEXT: [[V_RSQ_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_RSQ_F32_e64 0, undef [[DEF]], 0, 1, implicit $mode, implicit $exec
+ ; GFX9-NEXT: {{ $}}
+ ; GFX9-NEXT: bb.1:
+ ; GFX9-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
+ ; GFX9-NEXT: {{ $}}
+ ; GFX9-NEXT: [[DEF2:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX9-NEXT: [[V_MUL_F32_e64_:%[0-9]+]]:vgpr_32 = V_MUL_F32_e64 0, killed undef [[DEF2]], 0, [[V_RSQ_F32_e64_]], 0, 0, implicit $mode, implicit $exec
+ ; GFX9-NEXT: SI_LOOP undef [[DEF1]], %bb.1, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+ ; GFX9-NEXT: S_BRANCH %bb.2
+ ; GFX9-NEXT: {{ $}}
+ ; GFX9-NEXT: bb.2:
+ ; GFX9-NEXT: S_ENDPGM 0
+ bb.0:
+ liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9
+
+ %0:vgpr_32 = IMPLICIT_DEF
+ %1:sreg_64 = IMPLICIT_DEF
+ %2:vgpr_32 = nofpexcept V_RSQ_F32_e64 0, undef %0, 0, 0, implicit $mode, implicit $exec
+
+ bb.1:
+ %3:vgpr_32 = IMPLICIT_DEF
+ %4:vgpr_32 = nsz reassoc nofpexcept V_ADD_F32_e64 0, undef %2, 0, undef %2, 0, 0, implicit $mode, implicit $exec
+ %5:vgpr_32 = V_MUL_F32_e64 0, killed undef %3, 0, killed %4, 0, 0, implicit $mode, implicit $exec
+ SI_LOOP undef %1, %bb.1, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+ S_BRANCH %bb.2
+
+ bb.2:
+ S_ENDPGM 0
+...
More information about the llvm-commits
mailing list