[llvm] [AMDGPU][MachineVerifier] test failures in SIFoldOperands (PR #166600)

Abhay Kanhere via llvm-commits llvm-commits at lists.llvm.org
Fri Nov 7 08:39:08 PST 2025


https://github.com/AbhayKanhere updated https://github.com/llvm/llvm-project/pull/166600

>From c37f1d1f49111bc2e818de8592270566355bdd90 Mon Sep 17 00:00:00 2001
From: Abhay Kanhere <abhay at kanhere.net>
Date: Wed, 5 Nov 2025 09:51:02 -0800
Subject: [PATCH 1/4] [AMDGPU][MachineVerifier] test failures in SIFoldOperands

After PR:https://github.com/llvm/llvm-project/pull/151421 merged
following fails in SIFoldOperands showed up.

LLVM :: CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.mfma.gfx90a.ll
LLVM :: CodeGen/AMDGPU/llvm.amdgcn.mfma.gfx90a.ll
LLVM :: CodeGen/AMDGPU/llvm.amdgcn.mfma.ll
LLVM :: CodeGen/AMDGPU/mfma-loop.ll
LLVM :: CodeGen/AMDGPU/rewrite-vgpr-mfma-to-agpr.ll

In Folding code, if folded operand is register ensure earlyClobber
is set.
---
 llvm/lib/Target/AMDGPU/SIFoldOperands.cpp | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
index 6616b30410590..cf9a6f021fd9b 100644
--- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
@@ -689,6 +689,10 @@ bool SIFoldOperandsImpl::updateOperand(FoldCandidate &Fold) const {
     if (!TII->isOperandLegal(*MI, OpNo, &New))
       return false;
 
+    const MCInstrDesc &MCID = MI->getDesc();
+    if (MCID.getOperandConstraint(OpNo, MCOI::EARLY_CLOBBER) != -1) {
+      MI->getOperand(OpNo).setIsEarlyClobber(true);
+    }
     Old.ChangeToImmediate(*ImmVal);
     return true;
   }

>From 53599abd7639aeb57179442673ee1c367c821aa2 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <arsenm2 at gmail.com>
Date: Wed, 5 Nov 2025 10:50:50 -0800
Subject: [PATCH 2/4] Apply suggestion from @shiltian

Co-authored-by: Shilei Tian <i at tianshilei.me>
---
 llvm/lib/Target/AMDGPU/SIFoldOperands.cpp | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
index cf9a6f021fd9b..aed75f55b0ee2 100644
--- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
@@ -690,9 +690,8 @@ bool SIFoldOperandsImpl::updateOperand(FoldCandidate &Fold) const {
       return false;
 
     const MCInstrDesc &MCID = MI->getDesc();
-    if (MCID.getOperandConstraint(OpNo, MCOI::EARLY_CLOBBER) != -1) {
+    if (MCID.getOperandConstraint(OpNo, MCOI::EARLY_CLOBBER) != -1)
       MI->getOperand(OpNo).setIsEarlyClobber(true);
-    }
     Old.ChangeToImmediate(*ImmVal);
     return true;
   }

>From cc04948527f0c7afdae7170ecdbd60c14872db76 Mon Sep 17 00:00:00 2001
From: Abhay Kanhere <abhay at kanhere.net>
Date: Wed, 5 Nov 2025 14:35:07 -0800
Subject: [PATCH 3/4] Set early clobber checking for all defs in
 SIFoldOperands. Fixed tests - added -verify-machineinstrs so fails are
 visible sooner. This resolves all fails so far.

---
 llvm/lib/CodeGen/MachineVerifier.cpp          |  4 +-
 llvm/lib/Target/AMDGPU/SIFoldOperands.cpp     |  5 +-
 .../GlobalISel/llvm.amdgcn.mfma.gfx90a.ll     |  2 +-
 .../CodeGen/AMDGPU/llvm.amdgcn.mfma.gfx90a.ll |  8 +--
 llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.ll  | 49 ++++++++++---------
 llvm/test/CodeGen/AMDGPU/mfma-loop.ll         |  6 +--
 .../AMDGPU/rewrite-vgpr-mfma-to-agpr.ll       |  2 +-
 7 files changed, 40 insertions(+), 36 deletions(-)

diff --git a/llvm/lib/CodeGen/MachineVerifier.cpp b/llvm/lib/CodeGen/MachineVerifier.cpp
index fdf10480b6e05..e759d85d03cf4 100644
--- a/llvm/lib/CodeGen/MachineVerifier.cpp
+++ b/llvm/lib/CodeGen/MachineVerifier.cpp
@@ -2587,9 +2587,9 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) {
   // Verify earlyClobber def operand
   if (MCID.getOperandConstraint(MONum, MCOI::EARLY_CLOBBER) != -1) {
     if (!MO->isReg())
-      report("Early clobber must be a register", MI);
+      report("Early clobber must be a register", MO, MONum);
     if (!MO->isEarlyClobber())
-      report("Missing earlyClobber flag", MI);
+      report("Missing earlyClobber flag", MO, MONum);
   }
 
   switch (MO->getType()) {
diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
index aed75f55b0ee2..da0d3e64057e3 100644
--- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
@@ -690,8 +690,9 @@ bool SIFoldOperandsImpl::updateOperand(FoldCandidate &Fold) const {
       return false;
 
     const MCInstrDesc &MCID = MI->getDesc();
-    if (MCID.getOperandConstraint(OpNo, MCOI::EARLY_CLOBBER) != -1)
-      MI->getOperand(OpNo).setIsEarlyClobber(true);
+    for (unsigned I = 0; I < MI->getNumDefs(); ++I)
+      if (MCID.getOperandConstraint(I, MCOI::EARLY_CLOBBER) != -1)
+        MI->getOperand(I).setIsEarlyClobber(true);
     Old.ChangeToImmediate(*ImmVal);
     return true;
   }
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.mfma.gfx90a.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.mfma.gfx90a.ll
index 5720b882f4e73..cc21305a5a193 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.mfma.gfx90a.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.mfma.gfx90a.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx90a < %s | FileCheck --check-prefixes=GCN %s
+; RUN: llc -verify-machineinstrs  -global-isel -mtriple=amdgcn -mcpu=gfx90a < %s | FileCheck --check-prefixes=GCN %s
 
 declare <32 x float> @llvm.amdgcn.mfma.f32.32x32x4bf16.1k(<4 x i16>, <4 x i16>, <32 x float>, i32, i32, i32)
 declare <16 x float> @llvm.amdgcn.mfma.f32.16x16x4bf16.1k(<4 x i16>, <4 x i16>, <16 x float>, i32, i32, i32)
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.gfx90a.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.gfx90a.ll
index 22bc62acce15d..679b289e13969 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.gfx90a.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.gfx90a.ll
@@ -1,8 +1,8 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
-; RUN: llc -mtriple=amdgcn -mcpu=gfx90a < %s | FileCheck -enable-var-scope --check-prefixes=GCN,GFX90A %s
-; RUN: llc -mtriple=amdgcn -mcpu=gfx942 < %s | FileCheck -enable-var-scope --check-prefixes=GCN,GFX942 %s
-; RUN: llc -mtriple=amdgcn -mcpu=gfx90a -amdgpu-mfma-vgpr-form < %s | FileCheck -enable-var-scope --check-prefixes=VGPR,GFX90A-VGPR %s
-; RUN: llc -mtriple=amdgcn -mcpu=gfx942 -amdgpu-mfma-vgpr-form < %s | FileCheck -enable-var-scope --check-prefixes=VGPR,GFX942-VGPR %s
+; RUN: llc -verify-machineinstrs -mtriple=amdgcn -mcpu=gfx90a < %s | FileCheck -enable-var-scope --check-prefixes=GCN,GFX90A %s
+; RUN: llc -verify-machineinstrs -mtriple=amdgcn -mcpu=gfx942 < %s | FileCheck -enable-var-scope --check-prefixes=GCN,GFX942 %s
+; RUN: llc -verify-machineinstrs -mtriple=amdgcn -mcpu=gfx90a -amdgpu-mfma-vgpr-form < %s | FileCheck -enable-var-scope --check-prefixes=VGPR,GFX90A-VGPR %s
+; RUN: llc -verify-machineinstrs -mtriple=amdgcn -mcpu=gfx942 -amdgpu-mfma-vgpr-form < %s | FileCheck -enable-var-scope --check-prefixes=VGPR,GFX942-VGPR %s
 
 declare <32 x float> @llvm.amdgcn.mfma.f32.32x32x4bf16.1k(<4 x i16>, <4 x i16>, <32 x float>, i32, i32, i32)
 declare <16 x float> @llvm.amdgcn.mfma.f32.16x16x4bf16.1k(<4 x i16>, <4 x i16>, <16 x float>, i32, i32, i32)
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.ll
index 7e30af96bb8b9..e7d7f87e4fc4c 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.ll
@@ -1,9 +1,9 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
-; RUN: llc -mtriple=amdgcn -mcpu=gfx908 < %s | FileCheck -enable-var-scope --check-prefixes=GCN,NOLIT-SRCC,GFX908,GFX908_A %s
-; RUN: llc -mtriple=amdgcn -mcpu=gfx908 -mattr=-mfma-inline-literal-bug < %s | FileCheck -enable-var-scope --check-prefixes=GCN,LIT-SRCC,GFX908,GFX908_A %s
-; RUN: llc -mtriple=amdgcn -mcpu=gfx90a < %s | FileCheck -enable-var-scope --check-prefixes=GCN,GFX90A,GFX908_A,GFX90A_42 %s
-; RUN: llc -mtriple=amdgcn -mcpu=gfx942 < %s | FileCheck -enable-var-scope --check-prefixes=GCN,GFX942,GFX90A_42 %s
-; RUN: llc -mtriple=amdgcn -mcpu=gfx942 -amdgpu-mfma-vgpr-form < %s | FileCheck -enable-var-scope --check-prefix=GFX942-VGPR %s
+; RUN: llc -verify-machineinstrs -mtriple=amdgcn -mcpu=gfx908 < %s | FileCheck -enable-var-scope --check-prefixes=GCN,NOLIT-SRCC,GFX908,GFX908_A %s
+; RUN: llc -verify-machineinstrs -mtriple=amdgcn -mcpu=gfx908 -mattr=-mfma-inline-literal-bug < %s | FileCheck -enable-var-scope --check-prefixes=GCN,LIT-SRCC,GFX908,GFX908_A %s
+; RUN: llc -verify-machineinstrs -mtriple=amdgcn -mcpu=gfx90a < %s | FileCheck -enable-var-scope --check-prefixes=GCN,GFX90A,GFX908_A,GFX90A_42 %s
+; RUN: llc -verify-machineinstrs -mtriple=amdgcn -mcpu=gfx942 < %s | FileCheck -enable-var-scope --check-prefixes=GCN,GFX942,GFX90A_42 %s
+; RUN: llc -verify-machineinstrs -mtriple=amdgcn -mcpu=gfx942 -amdgpu-mfma-vgpr-form < %s | FileCheck -enable-var-scope --check-prefix=GFX942-VGPR %s
 
 declare <32 x float> @llvm.amdgcn.mfma.f32.32x32x1f32(float, float, <32 x float>, i32, i32, i32)
 declare <16 x float> @llvm.amdgcn.mfma.f32.16x16x1f32(float, float, <16 x float>, i32, i32, i32)
@@ -3186,13 +3186,14 @@ define amdgpu_kernel void @test_mfma_i32_16x16x4i8_splatimm_src2_64(ptr addrspac
 ;
 ; GFX942-VGPR-LABEL: test_mfma_i32_16x16x4i8_splatimm_src2_64:
 ; GFX942-VGPR:       ; %bb.0: ; %bb
-; GFX942-VGPR-NEXT:    v_mov_b32_e32 v0, 1
-; GFX942-VGPR-NEXT:    v_mov_b32_e32 v1, 2
+; GFX942-VGPR-NEXT:    v_mov_b32_e32 v16, 1
+; GFX942-VGPR-NEXT:    v_mov_b32_e32 v17, 2
 ; GFX942-VGPR-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX942-VGPR-NEXT:    s_nop 0
+; GFX942-VGPR-NEXT:    v_mfma_i32_16x16x4_4b_i8 v[0:15], v16, v17, 64 cbsz:1 abid:2 blgp:3
 ; GFX942-VGPR-NEXT:    v_mov_b32_e32 v16, 0
-; GFX942-VGPR-NEXT:    v_mfma_i32_16x16x4_4b_i8 v[0:15], v0, v1, 64 cbsz:1 abid:2 blgp:3
 ; GFX942-VGPR-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX942-VGPR-NEXT:    s_nop 9
+; GFX942-VGPR-NEXT:    s_nop 8
 ; GFX942-VGPR-NEXT:    global_store_dwordx4 v16, v[12:15], s[0:1] offset:48
 ; GFX942-VGPR-NEXT:    global_store_dwordx4 v16, v[8:11], s[0:1] offset:32
 ; GFX942-VGPR-NEXT:    global_store_dwordx4 v16, v[4:7], s[0:1] offset:16
@@ -4538,13 +4539,14 @@ define amdgpu_kernel void @test_mfma_f32_16x16x1f32_imm_splat(ptr addrspace(1) %
 ;
 ; GFX942-VGPR-LABEL: test_mfma_f32_16x16x1f32_imm_splat:
 ; GFX942-VGPR:       ; %bb.0: ; %bb
-; GFX942-VGPR-NEXT:    v_mov_b32_e32 v0, 1.0
-; GFX942-VGPR-NEXT:    v_mov_b32_e32 v1, 2.0
+; GFX942-VGPR-NEXT:    v_mov_b32_e32 v16, 1.0
+; GFX942-VGPR-NEXT:    v_mov_b32_e32 v17, 2.0
 ; GFX942-VGPR-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX942-VGPR-NEXT:    s_nop 0
+; GFX942-VGPR-NEXT:    v_mfma_f32_16x16x1_4b_f32 v[0:15], v16, v17, 1.0
 ; GFX942-VGPR-NEXT:    v_mov_b32_e32 v16, 0
-; GFX942-VGPR-NEXT:    v_mfma_f32_16x16x1_4b_f32 v[0:15], v0, v1, 1.0
 ; GFX942-VGPR-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX942-VGPR-NEXT:    s_nop 8
+; GFX942-VGPR-NEXT:    s_nop 7
 ; GFX942-VGPR-NEXT:    global_store_dwordx4 v16, v[12:15], s[0:1] offset:48
 ; GFX942-VGPR-NEXT:    global_store_dwordx4 v16, v[8:11], s[0:1] offset:32
 ; GFX942-VGPR-NEXT:    global_store_dwordx4 v16, v[4:7], s[0:1] offset:16
@@ -4689,15 +4691,16 @@ define amdgpu_kernel void @test_mfma_f32_32x32x8f16_imm_splat(ptr addrspace(1) %
 ;
 ; GFX942-VGPR-LABEL: test_mfma_f32_32x32x8f16_imm_splat:
 ; GFX942-VGPR:       ; %bb.0: ; %bb
-; GFX942-VGPR-NEXT:    v_mov_b32_e32 v0, 0x3c003c00
-; GFX942-VGPR-NEXT:    v_mov_b32_e32 v1, v0
-; GFX942-VGPR-NEXT:    v_mov_b32_e32 v2, 0x40004000
-; GFX942-VGPR-NEXT:    v_mov_b32_e32 v3, v2
+; GFX942-VGPR-NEXT:    v_mov_b32_e32 v16, 0x3c003c00
+; GFX942-VGPR-NEXT:    v_mov_b32_e32 v17, v16
+; GFX942-VGPR-NEXT:    v_mov_b32_e32 v18, 0x40004000
+; GFX942-VGPR-NEXT:    v_mov_b32_e32 v19, v18
 ; GFX942-VGPR-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX942-VGPR-NEXT:    s_nop 0
+; GFX942-VGPR-NEXT:    v_mfma_f32_32x32x8_f16 v[0:15], v[16:17], v[18:19], 1.0
 ; GFX942-VGPR-NEXT:    v_mov_b32_e32 v16, 0
-; GFX942-VGPR-NEXT:    v_mfma_f32_32x32x8_f16 v[0:15], v[0:1], v[2:3], 1.0
 ; GFX942-VGPR-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX942-VGPR-NEXT:    s_nop 9
+; GFX942-VGPR-NEXT:    s_nop 8
 ; GFX942-VGPR-NEXT:    global_store_dwordx4 v16, v[12:15], s[0:1] offset:48
 ; GFX942-VGPR-NEXT:    global_store_dwordx4 v16, v[8:11], s[0:1] offset:32
 ; GFX942-VGPR-NEXT:    global_store_dwordx4 v16, v[4:7], s[0:1] offset:16
@@ -4908,14 +4911,14 @@ define amdgpu_kernel void @test_mfma_f32_32x32x1f32_imm_splat(ptr addrspace(1) %
 ;
 ; GFX942-VGPR-LABEL: test_mfma_f32_32x32x1f32_imm_splat:
 ; GFX942-VGPR:       ; %bb.0: ; %bb
-; GFX942-VGPR-NEXT:    v_mov_b32_e32 v0, 1.0
-; GFX942-VGPR-NEXT:    v_mov_b32_e32 v1, 2.0
+; GFX942-VGPR-NEXT:    v_mov_b32_e32 v32, 1.0
+; GFX942-VGPR-NEXT:    v_mov_b32_e32 v33, 2.0
 ; GFX942-VGPR-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX942-VGPR-NEXT:    s_nop 0
+; GFX942-VGPR-NEXT:    v_mfma_f32_32x32x1_2b_f32 v[0:31], v32, v33, 0
 ; GFX942-VGPR-NEXT:    v_mov_b32_e32 v32, 0
-; GFX942-VGPR-NEXT:    v_mfma_f32_32x32x1_2b_f32 v[0:31], v0, v1, 0
 ; GFX942-VGPR-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX942-VGPR-NEXT:    s_nop 15
-; GFX942-VGPR-NEXT:    s_nop 0
 ; GFX942-VGPR-NEXT:    global_store_dwordx4 v32, v[28:31], s[0:1] offset:112
 ; GFX942-VGPR-NEXT:    global_store_dwordx4 v32, v[24:27], s[0:1] offset:96
 ; GFX942-VGPR-NEXT:    global_store_dwordx4 v32, v[20:23], s[0:1] offset:80
diff --git a/llvm/test/CodeGen/AMDGPU/mfma-loop.ll b/llvm/test/CodeGen/AMDGPU/mfma-loop.ll
index 4bb653848cbf0..e330c72ba0fc4 100644
--- a/llvm/test/CodeGen/AMDGPU/mfma-loop.ll
+++ b/llvm/test/CodeGen/AMDGPU/mfma-loop.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
-; RUN: llc -mtriple=amdgcn -mcpu=gfx908 < %s | FileCheck -enable-var-scope -check-prefixes=GFX908 %s
-; RUN: llc -mtriple=amdgcn -mcpu=gfx90a < %s | FileCheck -enable-var-scope -check-prefixes=GFX90A %s
-; RUN: llc -mtriple=amdgcn -mcpu=gfx942 < %s | FileCheck -enable-var-scope -check-prefixes=GFX942 %s
+; RUN: llc -verify-machineinstrs -mtriple=amdgcn -mcpu=gfx908 < %s | FileCheck -enable-var-scope -check-prefixes=GFX908 %s
+; RUN: llc -verify-machineinstrs -mtriple=amdgcn -mcpu=gfx90a < %s | FileCheck -enable-var-scope -check-prefixes=GFX90A %s
+; RUN: llc -verify-machineinstrs -mtriple=amdgcn -mcpu=gfx942 < %s | FileCheck -enable-var-scope -check-prefixes=GFX942 %s
 
 
 ; Check that we do not copy agprs to vgprs and back inside the loop.
diff --git a/llvm/test/CodeGen/AMDGPU/rewrite-vgpr-mfma-to-agpr.ll b/llvm/test/CodeGen/AMDGPU/rewrite-vgpr-mfma-to-agpr.ll
index 8803f3ae4906f..fc799162e999a 100644
--- a/llvm/test/CodeGen/AMDGPU/rewrite-vgpr-mfma-to-agpr.ll
+++ b/llvm/test/CodeGen/AMDGPU/rewrite-vgpr-mfma-to-agpr.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
-; RUN: llc -mcpu=gfx942 -amdgpu-mfma-vgpr-form < %s | FileCheck %s
+; RUN: llc -verify-machineinstrs -mcpu=gfx942 -amdgpu-mfma-vgpr-form < %s | FileCheck %s
 
 target triple = "amdgcn-amd-amdhsa"
 

>From 5ce9af2426df9037294168938c96cfe416946679 Mon Sep 17 00:00:00 2001
From: Abhay Kanhere <abhay at kanhere.net>
Date: Fri, 7 Nov 2025 08:34:16 -0800
Subject: [PATCH 4/4] review comment resolved removed change in MachineVerifier

---
 llvm/lib/CodeGen/MachineVerifier.cpp      | 2 +-
 llvm/lib/Target/AMDGPU/SIFoldOperands.cpp | 8 ++++----
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/llvm/lib/CodeGen/MachineVerifier.cpp b/llvm/lib/CodeGen/MachineVerifier.cpp
index e759d85d03cf4..060384843b5f9 100644
--- a/llvm/lib/CodeGen/MachineVerifier.cpp
+++ b/llvm/lib/CodeGen/MachineVerifier.cpp
@@ -2589,7 +2589,7 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) {
     if (!MO->isReg())
       report("Early clobber must be a register", MO, MONum);
     if (!MO->isEarlyClobber())
-      report("Missing earlyClobber flag", MO, MONum);
+      report("Missing earlyClobber flag", MI);
   }
 
   switch (MO->getType()) {
diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
index da0d3e64057e3..81ae1410c6146 100644
--- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
@@ -681,6 +681,10 @@ bool SIFoldOperandsImpl::updateOperand(FoldCandidate &Fold) const {
         return false;
       MI->setDesc(TII->get(NewMFMAOpc));
       MI->untieRegOperand(0);
+      const MCInstrDesc &MCID = MI->getDesc();
+      for (unsigned I = 0; I < MI->getNumDefs(); ++I)
+        if (MCID.getOperandConstraint(I, MCOI::EARLY_CLOBBER) != -1)
+          MI->getOperand(I).setIsEarlyClobber(true);
     }
 
     // TODO: Should we try to avoid adding this to the candidate list?
@@ -689,10 +693,6 @@ bool SIFoldOperandsImpl::updateOperand(FoldCandidate &Fold) const {
     if (!TII->isOperandLegal(*MI, OpNo, &New))
       return false;
 
-    const MCInstrDesc &MCID = MI->getDesc();
-    for (unsigned I = 0; I < MI->getNumDefs(); ++I)
-      if (MCID.getOperandConstraint(I, MCOI::EARLY_CLOBBER) != -1)
-        MI->getOperand(I).setIsEarlyClobber(true);
     Old.ChangeToImmediate(*ImmVal);
     return true;
   }



More information about the llvm-commits mailing list