[llvm] a156362 - [AMDGPU] Fix machine verification failure after SIFoldOperandsImpl::tryFoldOMod (#113544)

via llvm-commits llvm-commits at lists.llvm.org
Tue Oct 29 07:59:41 PDT 2024


Author: Jay Foad
Date: 2024-10-29T14:59:37Z
New Revision: a156362e93eba9513611dc0989d516e9946cae48

URL: https://github.com/llvm/llvm-project/commit/a156362e93eba9513611dc0989d516e9946cae48
DIFF: https://github.com/llvm/llvm-project/commit/a156362e93eba9513611dc0989d516e9946cae48.diff

LOG: [AMDGPU] Fix machine verification failure after SIFoldOperandsImpl::tryFoldOMod (#113544)

Fixes #54201

Added: 
    llvm/test/CodeGen/AMDGPU/fold-omod-crash.mir

Modified: 
    llvm/lib/Target/AMDGPU/SIFoldOperands.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
index c912a580854c1c..f0c7837e0bb75a 100644
--- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
@@ -1793,6 +1793,9 @@ bool SIFoldOperandsImpl::tryFoldOMod(MachineInstr &MI) {
 
   DefOMod->setImm(OMod);
   MRI->replaceRegWith(MI.getOperand(0).getReg(), Def->getOperand(0).getReg());
+  // Kill flags can be wrong if we replaced a def inside a loop with a def
+  // outside the loop.
+  MRI->clearKillFlags(Def->getOperand(0).getReg());
   MI.eraseFromParent();
 
   // Use of output modifiers forces VOP3 encoding for a VOP2 mac/fmac

diff  --git a/llvm/test/CodeGen/AMDGPU/fold-omod-crash.mir b/llvm/test/CodeGen/AMDGPU/fold-omod-crash.mir
new file mode 100644
index 00000000000000..8065e2cfc00432
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/fold-omod-crash.mir
@@ -0,0 +1,50 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
+# RUN: llc -mtriple=amdgcn -mcpu=gfx900 -run-pass=si-fold-operands %s -verify-machineinstrs -o - | FileCheck %s -check-prefix=GFX9
+
+# When V_ADD_F32 is replaced with an output modifier on V_RSQ_F32, check that
+# the kill flag is cleared on the use of %4 in V_MUL_F32.
+---
+name: main
+tracksRegLiveness: true
+machineFunctionInfo:
+  mode:
+    ieee: false
+    fp32-input-denormals: false
+    fp32-output-denormals: false
+body: |
+  ; GFX9-LABEL: name: main
+  ; GFX9: bb.0:
+  ; GFX9-NEXT:   successors: %bb.1(0x80000000)
+  ; GFX9-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9
+  ; GFX9-NEXT: {{  $}}
+  ; GFX9-NEXT:   [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+  ; GFX9-NEXT:   [[DEF1:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
+  ; GFX9-NEXT:   [[V_RSQ_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_RSQ_F32_e64 0, undef [[DEF]], 0, 1, implicit $mode, implicit $exec
+  ; GFX9-NEXT: {{  $}}
+  ; GFX9-NEXT: bb.1:
+  ; GFX9-NEXT:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
+  ; GFX9-NEXT: {{  $}}
+  ; GFX9-NEXT:   [[DEF2:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+  ; GFX9-NEXT:   [[V_MUL_F32_e64_:%[0-9]+]]:vgpr_32 = V_MUL_F32_e64 0, killed undef [[DEF2]], 0, [[V_RSQ_F32_e64_]], 0, 0, implicit $mode, implicit $exec
+  ; GFX9-NEXT:   SI_LOOP undef [[DEF1]], %bb.1, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+  ; GFX9-NEXT:   S_BRANCH %bb.2
+  ; GFX9-NEXT: {{  $}}
+  ; GFX9-NEXT: bb.2:
+  ; GFX9-NEXT:   S_ENDPGM 0
+  bb.0:
+    liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9
+
+    %0:vgpr_32 = IMPLICIT_DEF
+    %1:sreg_64 = IMPLICIT_DEF
+    %2:vgpr_32 = nofpexcept V_RSQ_F32_e64 0, undef %0, 0, 0, implicit $mode, implicit $exec
+
+  bb.1:
+    %3:vgpr_32 = IMPLICIT_DEF
+    %4:vgpr_32 = nsz reassoc nofpexcept V_ADD_F32_e64 0, undef %2, 0, undef %2, 0, 0, implicit $mode, implicit $exec
+    %5:vgpr_32 = V_MUL_F32_e64 0, killed undef %3, 0, killed %4, 0, 0, implicit $mode, implicit $exec
+    SI_LOOP undef %1, %bb.1, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+    S_BRANCH %bb.2
+
+  bb.2:
+    S_ENDPGM 0
+...


        


More information about the llvm-commits mailing list