[llvm] AMDGPU: Update live intervals in convertToThreeAddress (PR #104610)

Matt Arsenault via llvm-commits llvm-commits at lists.llvm.org
Wed Sep 4 07:05:31 PDT 2024


https://github.com/arsenm updated https://github.com/llvm/llvm-project/pull/104610

>From 71c4a29c5cc1f44feecb3d3765c8d4e013c08c0c Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault at amd.com>
Date: Fri, 16 Aug 2024 18:13:40 +0400
Subject: [PATCH 1/7] AMDGPU: Update live intervals in convertToThreeAddress

Fixes #98741
---
 llvm/lib/Target/AMDGPU/SIInstrInfo.cpp        |  37 ++++--
 .../test/CodeGen/AMDGPU/gfx10-twoaddr-fma.mir | 117 ++++++++++++++++--
 2 files changed, 134 insertions(+), 20 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index a857bdba53c3e8..4b602c1c401f33 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -3934,14 +3934,32 @@ MachineInstr *SIInstrInfo::convertToThreeAddress(MachineInstr &MI,
       (ST.getConstantBusLimit(Opc) > 1 || !Src0->isReg() ||
        !RI.isSGPRReg(MBB.getParent()->getRegInfo(), Src0->getReg()))) {
     MachineInstr *DefMI;
-    const auto killDef = [&]() -> void {
+    const auto killDef = [&](SlotIndex NewIdx) -> void {
       const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
       // The only user is the instruction which will be killed.
       Register DefReg = DefMI->getOperand(0).getReg();
+
+      if (LIS) {
+        LiveInterval &DefLI = LIS->getInterval(DefReg);
+        LiveRange::Segment *OldSeg = DefLI.getSegmentContaining(NewIdx);
+
+        if (OldSeg->end == NewIdx.getRegSlot()) {
+          DefLI.removeSegment(OldSeg->start, NewIdx.getRegSlot(), true);
+
+          for (auto &SR : DefLI.subranges()) {
+            LiveRange::Segment *OldSegSR = SR.getSegmentContaining(NewIdx);
+            SR.removeSegment(OldSegSR->start, NewIdx.getRegSlot(), true);
+          }
+
+          DefLI.removeEmptySubRanges();
+        }
+      }
+
       if (!MRI.hasOneNonDBGUse(DefReg))
         return;
       // We cannot just remove the DefMI here, calling pass will crash.
       DefMI->setDesc(get(AMDGPU::IMPLICIT_DEF));
+      DefMI->getOperand(0).setIsDead(true);
       for (unsigned I = DefMI->getNumOperands() - 1; I != 0; --I)
         DefMI->removeOperand(I);
       if (LV)
@@ -3963,9 +3981,10 @@ MachineInstr *SIInstrInfo::convertToThreeAddress(MachineInstr &MI,
                   .addImm(Imm)
                   .setMIFlags(MI.getFlags());
         updateLiveVariables(LV, MI, *MIB);
+        SlotIndex NewIdx;
         if (LIS)
-          LIS->ReplaceMachineInstrInMaps(MI, *MIB);
-        killDef();
+          NewIdx = LIS->ReplaceMachineInstrInMaps(MI, *MIB);
+        killDef(NewIdx);
         return MIB;
       }
     }
@@ -3983,9 +4002,11 @@ MachineInstr *SIInstrInfo::convertToThreeAddress(MachineInstr &MI,
                   .add(*Src2)
                   .setMIFlags(MI.getFlags());
         updateLiveVariables(LV, MI, *MIB);
+
+        SlotIndex NewIdx;
         if (LIS)
-          LIS->ReplaceMachineInstrInMaps(MI, *MIB);
-        killDef();
+          NewIdx = LIS->ReplaceMachineInstrInMaps(MI, *MIB);
+        killDef(NewIdx);
         return MIB;
       }
     }
@@ -4005,10 +4026,12 @@ MachineInstr *SIInstrInfo::convertToThreeAddress(MachineInstr &MI,
                   .add(*Src2)
                   .setMIFlags(MI.getFlags());
         updateLiveVariables(LV, MI, *MIB);
+
+        SlotIndex NewIdx;
         if (LIS)
-          LIS->ReplaceMachineInstrInMaps(MI, *MIB);
+          NewIdx = LIS->ReplaceMachineInstrInMaps(MI, *MIB);
         if (DefMI)
-          killDef();
+          killDef(NewIdx);
         return MIB;
       }
     }
diff --git a/llvm/test/CodeGen/AMDGPU/gfx10-twoaddr-fma.mir b/llvm/test/CodeGen/AMDGPU/gfx10-twoaddr-fma.mir
index 1768e39d1a06c5..afb36041d7f4b4 100644
--- a/llvm/test/CodeGen/AMDGPU/gfx10-twoaddr-fma.mir
+++ b/llvm/test/CodeGen/AMDGPU/gfx10-twoaddr-fma.mir
@@ -1,29 +1,120 @@
-# RUN: llc -mtriple=amdgcn -mcpu=gfx1010 %s -run-pass twoaddressinstruction -verify-machineinstrs -o - | FileCheck --check-prefixes=GFX10 %s
-# RUN: llc -mtriple=amdgcn -mcpu=gfx1010 %s --passes=two-address-instruction -verify-each -o - | FileCheck --check-prefixes=GFX10 %s
+# RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -run-pass=twoaddressinstruction -verify-machineinstrs -o - %s | FileCheck --check-prefixes=GFX10 %s
+# RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -passes=two-address-instruction -verify-each -o - %s | FileCheck --check-prefixes=GFX10 %s
+# RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -run-pass=liveintervals,twoaddressinstruction -verify-machineinstrs -o - %s | FileCheck --check-prefixes=GFX10 %s
 
 # GFX10-LABEL: name: test_fmamk_reg_imm_f16
-# GFX10: %2:vgpr_32 = IMPLICIT_DEF
+# GFX10: dead %2:vgpr_32 = IMPLICIT_DEF
 # GFX10-NOT: V_MOV_B32
 # GFX10: V_FMAMK_F16 killed %0.sub0, 1078523331, killed %1, implicit $mode, implicit $exec
 ---
 name:            test_fmamk_reg_imm_f16
-registers:
-  - { id: 0, class: vreg_64 }
-  - { id: 1, class: vgpr_32 }
-  - { id: 2, class: vgpr_32 }
-  - { id: 3, class: vgpr_32 }
 body:             |
   bb.0:
 
-    %0 = IMPLICIT_DEF
-    %1 = COPY %0.sub1
-    %2 = V_MOV_B32_e32 1078523331, implicit $exec
-    %3 = V_FMAC_F16_e32 killed %0.sub0, %2, killed %1, implicit $mode, implicit $exec
+    %0:vreg_64 = IMPLICIT_DEF
+    %1:vgpr_32 = COPY %0.sub1
+    %2:vgpr_32 = V_MOV_B32_e32 1078523331, implicit $exec
+    %3:vgpr_32 = V_FMAC_F16_e32 killed %0.sub0, %2, killed %1, implicit $mode, implicit $exec
+
+...
+
+# GFX10-LABEL: name: test_fmamk_reg_imm_f16__imm_is_subreg
+# GFX10: %0:vreg_64 = IMPLICIT_DEF
+# GFX10: %1:vgpr_32 = COPY %0.sub1
+# GFX10: dead undef %2.sub0:vreg_64 = IMPLICIT_DEF
+# GFX10: %3:vgpr_32 = V_FMAMK_F16 killed %0.sub0, 1078523331, killed %1, implicit $mode, implicit $exec
+---
+name:            test_fmamk_reg_imm_f16__imm_is_subreg
+body:             |
+  bb.0:
+
+    %0:vreg_64 = IMPLICIT_DEF
+    %1:vgpr_32 = COPY %0.sub1
+    undef %2.sub0:vreg_64 = V_MOV_B32_e32 1078523331, implicit $exec
+    %3:vgpr_32 = V_FMAC_F16_e32 killed %0.sub0, %2.sub0, killed %1, implicit $mode, implicit $exec
+
+...
+
+# GFX10-LABEL: name: test_fmamk_reg_imm_f16__imm_is_subreg_fully_defined
+# GFX10: %0:vreg_64 = IMPLICIT_DEF
+# GFX10: %1:vgpr_32 = COPY %0.sub1
+# GFX10: undef %2.sub1:vreg_64 = V_MOV_B32_e32 9999, implicit $exec
+# GFX10: %2.sub0:vreg_64 = V_MOV_B32_e32 1078523331, implicit $exec
+# GFX10: %3:vgpr_32 = V_FMA_F16_gfx9_e64 0, killed %0.sub0, 0, %2.sub0, 0, killed %1, 0, 0, 0, implicit $mode, implicit $e
+---
+name:            test_fmamk_reg_imm_f16__imm_is_subreg_fully_defined
+body:             |
+  bb.0:
+
+    %0:vreg_64 = IMPLICIT_DEF
+    %1:vgpr_32 = COPY %0.sub1
+    undef %2.sub1 = V_MOV_B32_e32 9999, implicit $exec
+    %2.sub0:vreg_64 = V_MOV_B32_e32 1078523331, implicit $exec
+    %3:vgpr_32 = V_FMAC_F16_e32 killed %0.sub0, %2.sub0, killed %1, implicit $mode, implicit $exec
+
+...
+
+# GFX10-LABEL: name: test_fmamk_reg_imm_f16__use_imm_before_mac
+# GFX10: %0:vreg_64 = IMPLICIT_DEF
+# GFX10: %1:vgpr_32 = COPY %0.sub1
+# GFX10: %2:vgpr_32 = V_MOV_B32_e32 1078523331, implicit $exec
+# GFX10: S_NOP 0, implicit %2
+# GFX10: %3:vgpr_32 = V_FMAMK_F16 killed %0.sub0, 1078523331, killed %1, implicit $mode, implicit $exec
+---
+name:            test_fmamk_reg_imm_f16__use_imm_before_mac
+body:             |
+  bb.0:
+
+    %0:vreg_64 = IMPLICIT_DEF
+    %1:vgpr_32 = COPY %0.sub1
+    %2:vgpr_32 = V_MOV_B32_e32 1078523331, implicit $exec
+    S_NOP 0, implicit %2
+    %3:vgpr_32 = V_FMAC_F16_e32 killed %0.sub0, %2, killed %1, implicit $mode, implicit $exec
+
+...
+
+# GFX10-LABEL: name: test_fmamk_reg_imm_f16__use_imm_after_mac
+# GFX10: %0:vreg_64 = IMPLICIT_DEF
+# GFX10: %1:vgpr_32 = COPY %0.sub1
+# GFX10: %2:vgpr_32 = V_MOV_B32_e32 1078523331, implicit $exec
+# GFX10: %3:vgpr_32 = V_FMAMK_F16 killed %0.sub0, 1078523331, killed %1, implicit $mode, implicit $exec
+---
+name:            test_fmamk_reg_imm_f16__use_imm_after_mac
+body:             |
+  bb.0:
+
+    %0:vreg_64 = IMPLICIT_DEF
+    %1:vgpr_32 = COPY %0.sub1
+    %2:vgpr_32 = V_MOV_B32_e32 1078523331, implicit $exec
+    %3:vgpr_32 = V_FMAC_F16_e32 killed %0.sub0, %2, killed %1, implicit $mode, implicit $exec
+    S_NOP 0, implicit %2
+
+...
+
+# GFX10-LABEL: name: test_fmamk_reg_imm_f16__use_imm_before_after_mac
+# GFX10: %0:vreg_64 = IMPLICIT_DEF
+# GFX10: %1:vgpr_32 = COPY %0.sub1
+# GFX10: %2:vgpr_32 = V_MOV_B32_e32 1078523331, implicit $exec
+# GFX10: S_NOP 0, implicit %2
+# GFX10: %3:vgpr_32 = V_FMAMK_F16 killed %0.sub0, 1078523331, killed %1, implicit $mode, implicit $exec
+# GFX10: S_NOP 0, implicit %2
+
+---
+name:            test_fmamk_reg_imm_f16__use_imm_before_after_mac
+body:             |
+  bb.0:
+
+    %0:vreg_64 = IMPLICIT_DEF
+    %1:vgpr_32 = COPY %0.sub1
+    %2:vgpr_32 = V_MOV_B32_e32 1078523331, implicit $exec
+    S_NOP 0, implicit %2
+    %3:vgpr_32 = V_FMAC_F16_e32 killed %0.sub0, %2, killed %1, implicit $mode, implicit $exec
+    S_NOP 0, implicit %2
 
 ...
 
 # GFX10-LABEL: name: test_fmamk_imm_reg_f16
-# GFX10: %2:vgpr_32 = IMPLICIT_DEF
+# GFX10: dead %2:vgpr_32 = IMPLICIT_DEF
 # GFX10-NOT: V_MOV_B32
 # GFX10: V_FMAMK_F16 killed %0.sub0, 1078523331, killed %1, implicit $mode, implicit $exec
 ---

>From 41eaa27d40d111a8ae04c9c3f52cfc593c69d7a7 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault at amd.com>
Date: Mon, 19 Aug 2024 17:25:26 +0400
Subject: [PATCH 2/7] Comments

---
 llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index 4b602c1c401f33..34be5c9a4d9d5d 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -3942,13 +3942,15 @@ MachineInstr *SIInstrInfo::convertToThreeAddress(MachineInstr &MI,
       if (LIS) {
         LiveInterval &DefLI = LIS->getInterval(DefReg);
         LiveRange::Segment *OldSeg = DefLI.getSegmentContaining(NewIdx);
+        assert(OldSeg && "segment not found for instruction in LiveInterval");
 
         if (OldSeg->end == NewIdx.getRegSlot()) {
-          DefLI.removeSegment(OldSeg->start, NewIdx.getRegSlot(), true);
+          DefLI.removeSegment(*OldSeg, true);
 
           for (auto &SR : DefLI.subranges()) {
             LiveRange::Segment *OldSegSR = SR.getSegmentContaining(NewIdx);
-            SR.removeSegment(OldSegSR->start, NewIdx.getRegSlot(), true);
+            if (OldSegSR->end == NewIdx.getRegSlot())
+              SR.removeSegment(*OldSegSR, true);
           }
 
           DefLI.removeEmptySubRanges();

>From 1f8b5a86d0514ae5435bb6481a272d99f888cbda Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault at amd.com>
Date: Mon, 19 Aug 2024 17:28:27 +0400
Subject: [PATCH 3/7] Rename variable

---
 llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index 34be5c9a4d9d5d..719f47bb06fbe2 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -3934,22 +3934,22 @@ MachineInstr *SIInstrInfo::convertToThreeAddress(MachineInstr &MI,
       (ST.getConstantBusLimit(Opc) > 1 || !Src0->isReg() ||
        !RI.isSGPRReg(MBB.getParent()->getRegInfo(), Src0->getReg()))) {
     MachineInstr *DefMI;
-    const auto killDef = [&](SlotIndex NewIdx) -> void {
+    const auto killDef = [&](SlotIndex OldDefIdx) -> void {
       const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
       // The only user is the instruction which will be killed.
       Register DefReg = DefMI->getOperand(0).getReg();
 
       if (LIS) {
         LiveInterval &DefLI = LIS->getInterval(DefReg);
-        LiveRange::Segment *OldSeg = DefLI.getSegmentContaining(NewIdx);
+        LiveRange::Segment *OldSeg = DefLI.getSegmentContaining(OldDefIdx);
         assert(OldSeg && "segment not found for instruction in LiveInterval");
 
-        if (OldSeg->end == NewIdx.getRegSlot()) {
+        if (OldSeg->end == OldDefIdx.getRegSlot()) {
           DefLI.removeSegment(*OldSeg, true);
 
           for (auto &SR : DefLI.subranges()) {
-            LiveRange::Segment *OldSegSR = SR.getSegmentContaining(NewIdx);
-            if (OldSegSR->end == NewIdx.getRegSlot())
+            LiveRange::Segment *OldSegSR = SR.getSegmentContaining(OldDefIdx);
+            if (OldSegSR->end == OldDefIdx.getRegSlot())
               SR.removeSegment(*OldSegSR, true);
           }
 

>From 48b4c2b24746a0234b2a080eaa5db3f2e1ec5d75 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault at amd.com>
Date: Wed, 4 Sep 2024 15:16:32 +0400
Subject: [PATCH 4/7] Rename variable

---
 llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index 719f47bb06fbe2..a542fbe9d97a5d 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -3934,22 +3934,22 @@ MachineInstr *SIInstrInfo::convertToThreeAddress(MachineInstr &MI,
       (ST.getConstantBusLimit(Opc) > 1 || !Src0->isReg() ||
        !RI.isSGPRReg(MBB.getParent()->getRegInfo(), Src0->getReg()))) {
     MachineInstr *DefMI;
-    const auto killDef = [&](SlotIndex OldDefIdx) -> void {
+    const auto killDef = [&](SlotIndex OldUseIdx) -> void {
       const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
       // The only user is the instruction which will be killed.
       Register DefReg = DefMI->getOperand(0).getReg();
 
       if (LIS) {
         LiveInterval &DefLI = LIS->getInterval(DefReg);
-        LiveRange::Segment *OldSeg = DefLI.getSegmentContaining(OldDefIdx);
+        LiveRange::Segment *OldSeg = DefLI.getSegmentContaining(OldUseIdx);
         assert(OldSeg && "segment not found for instruction in LiveInterval");
 
-        if (OldSeg->end == OldDefIdx.getRegSlot()) {
+        if (OldSeg->end == OldUseIdx.getRegSlot()) {
           DefLI.removeSegment(*OldSeg, true);
 
           for (auto &SR : DefLI.subranges()) {
-            LiveRange::Segment *OldSegSR = SR.getSegmentContaining(OldDefIdx);
-            if (OldSegSR->end == OldDefIdx.getRegSlot())
+            LiveRange::Segment *OldSegSR = SR.getSegmentContaining(OldUseIdx);
+            if (OldSegSR->end == OldUseIdx.getRegSlot())
               SR.removeSegment(*OldSegSR, true);
           }
 

>From 7ec8ac95ea6f9ce7693474c541b25dc6e15a51d4 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault at amd.com>
Date: Wed, 4 Sep 2024 16:41:02 +0400
Subject: [PATCH 5/7] Remove less

---
 llvm/lib/Target/AMDGPU/SIInstrInfo.cpp         | 11 ++++++++---
 llvm/test/CodeGen/AMDGPU/gfx10-twoaddr-fma.mir |  3 ++-
 2 files changed, 10 insertions(+), 4 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index a542fbe9d97a5d..c7f8f9defdb767 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -3945,12 +3945,17 @@ MachineInstr *SIInstrInfo::convertToThreeAddress(MachineInstr &MI,
         assert(OldSeg && "segment not found for instruction in LiveInterval");
 
         if (OldSeg->end == OldUseIdx.getRegSlot()) {
-          DefLI.removeSegment(*OldSeg, true);
+          // We only want to leave the dead def.
+          DefLI.removeSegment(OldSeg->start.getDeadSlot(), OldUseIdx.getRegSlot(),
+                              true);
 
           for (auto &SR : DefLI.subranges()) {
             LiveRange::Segment *OldSegSR = SR.getSegmentContaining(OldUseIdx);
-            if (OldSegSR->end == OldUseIdx.getRegSlot())
-              SR.removeSegment(*OldSegSR, true);
+            if (OldSegSR->end == OldUseIdx.getRegSlot()) {
+              // We only want to leave the dead def.
+              SR.removeSegment(OldSegSR->start.getDeadSlot(),
+                               OldUseIdx.getRegSlot(), true);
+            }
           }
 
           DefLI.removeEmptySubRanges();
diff --git a/llvm/test/CodeGen/AMDGPU/gfx10-twoaddr-fma.mir b/llvm/test/CodeGen/AMDGPU/gfx10-twoaddr-fma.mir
index afb36041d7f4b4..ccb2f5f6a1888b 100644
--- a/llvm/test/CodeGen/AMDGPU/gfx10-twoaddr-fma.mir
+++ b/llvm/test/CodeGen/AMDGPU/gfx10-twoaddr-fma.mir
@@ -1,6 +1,6 @@
 # RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -run-pass=twoaddressinstruction -verify-machineinstrs -o - %s | FileCheck --check-prefixes=GFX10 %s
 # RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -passes=two-address-instruction -verify-each -o - %s | FileCheck --check-prefixes=GFX10 %s
-# RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -run-pass=liveintervals,twoaddressinstruction -verify-machineinstrs -o - %s | FileCheck --check-prefixes=GFX10 %s
+# RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -run-pass=livevars,liveintervals,twoaddressinstruction -verify-machineinstrs -o - %s | FileCheck --check-prefixes=GFX10 %s
 
 # GFX10-LABEL: name: test_fmamk_reg_imm_f16
 # GFX10: dead %2:vgpr_32 = IMPLICIT_DEF
@@ -8,6 +8,7 @@
 # GFX10: V_FMAMK_F16 killed %0.sub0, 1078523331, killed %1, implicit $mode, implicit $exec
 ---
 name:            test_fmamk_reg_imm_f16
+tracksRegLiveness: true
 body:             |
   bb.0:
 

>From b69a9f8efe76ee99f57ea44588fe5b9c6af55530 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault at amd.com>
Date: Wed, 4 Sep 2024 16:42:35 +0400
Subject: [PATCH 6/7] Add tracksRegLiveness to every function

---
 llvm/test/CodeGen/AMDGPU/gfx10-twoaddr-fma.mir | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/llvm/test/CodeGen/AMDGPU/gfx10-twoaddr-fma.mir b/llvm/test/CodeGen/AMDGPU/gfx10-twoaddr-fma.mir
index ccb2f5f6a1888b..bf11ed874afd0d 100644
--- a/llvm/test/CodeGen/AMDGPU/gfx10-twoaddr-fma.mir
+++ b/llvm/test/CodeGen/AMDGPU/gfx10-twoaddr-fma.mir
@@ -26,6 +26,7 @@ body:             |
 # GFX10: %3:vgpr_32 = V_FMAMK_F16 killed %0.sub0, 1078523331, killed %1, implicit $mode, implicit $exec
 ---
 name:            test_fmamk_reg_imm_f16__imm_is_subreg
+tracksRegLiveness: true
 body:             |
   bb.0:
 
@@ -44,9 +45,9 @@ body:             |
 # GFX10: %3:vgpr_32 = V_FMA_F16_gfx9_e64 0, killed %0.sub0, 0, %2.sub0, 0, killed %1, 0, 0, 0, implicit $mode, implicit $e
 ---
 name:            test_fmamk_reg_imm_f16__imm_is_subreg_fully_defined
+tracksRegLiveness: true
 body:             |
   bb.0:
-
     %0:vreg_64 = IMPLICIT_DEF
     %1:vgpr_32 = COPY %0.sub1
     undef %2.sub1 = V_MOV_B32_e32 9999, implicit $exec
@@ -63,6 +64,7 @@ body:             |
 # GFX10: %3:vgpr_32 = V_FMAMK_F16 killed %0.sub0, 1078523331, killed %1, implicit $mode, implicit $exec
 ---
 name:            test_fmamk_reg_imm_f16__use_imm_before_mac
+tracksRegLiveness: true
 body:             |
   bb.0:
 
@@ -81,6 +83,7 @@ body:             |
 # GFX10: %3:vgpr_32 = V_FMAMK_F16 killed %0.sub0, 1078523331, killed %1, implicit $mode, implicit $exec
 ---
 name:            test_fmamk_reg_imm_f16__use_imm_after_mac
+tracksRegLiveness: true
 body:             |
   bb.0:
 
@@ -102,6 +105,7 @@ body:             |
 
 ---
 name:            test_fmamk_reg_imm_f16__use_imm_before_after_mac
+tracksRegLiveness: true
 body:             |
   bb.0:
 
@@ -120,6 +124,7 @@ body:             |
 # GFX10: V_FMAMK_F16 killed %0.sub0, 1078523331, killed %1, implicit $mode, implicit $exec
 ---
 name:            test_fmamk_imm_reg_f16
+tracksRegLiveness: true
 registers:
   - { id: 0, class: vreg_64 }
   - { id: 1, class: vgpr_32 }
@@ -141,6 +146,7 @@ body:             |
 # GFX10: V_FMAAK_F16 killed %0.sub0, %0.sub1, 1078523331, implicit $mode, implicit $exec
 ---
 name:            test_fmaak_f16
+tracksRegLiveness: true
 registers:
   - { id: 0, class: vreg_64 }
   - { id: 1, class: vgpr_32 }

>From 5ae0d57ba370e9055de28c82e95b0c315dfab142 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault at amd.com>
Date: Wed, 4 Sep 2024 18:05:01 +0400
Subject: [PATCH 7/7] Hackily use shrinkToUses

---
 llvm/lib/Target/AMDGPU/SIInstrInfo.cpp        | 51 +++++++++----------
 .../test/CodeGen/AMDGPU/gfx10-twoaddr-fma.mir | 34 ++++++++-----
 2 files changed, 45 insertions(+), 40 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index c7f8f9defdb767..1df14a269ddd37 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -3935,42 +3935,37 @@ MachineInstr *SIInstrInfo::convertToThreeAddress(MachineInstr &MI,
        !RI.isSGPRReg(MBB.getParent()->getRegInfo(), Src0->getReg()))) {
     MachineInstr *DefMI;
     const auto killDef = [&](SlotIndex OldUseIdx) -> void {
-      const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
+      MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
       // The only user is the instruction which will be killed.
       Register DefReg = DefMI->getOperand(0).getReg();
 
+      if (MRI.hasOneNonDBGUse(DefReg)) {
+        // We cannot just remove the DefMI here, calling pass will crash.
+        DefMI->setDesc(get(AMDGPU::IMPLICIT_DEF));
+        DefMI->getOperand(0).setIsDead(true);
+        for (unsigned I = DefMI->getNumOperands() - 1; I != 0; --I)
+          DefMI->removeOperand(I);
+        if (LV)
+          LV->getVarInfo(DefReg).AliveBlocks.clear();
+      }
+
       if (LIS) {
         LiveInterval &DefLI = LIS->getInterval(DefReg);
-        LiveRange::Segment *OldSeg = DefLI.getSegmentContaining(OldUseIdx);
-        assert(OldSeg && "segment not found for instruction in LiveInterval");
-
-        if (OldSeg->end == OldUseIdx.getRegSlot()) {
-          // We only want to leave the dead def.
-          DefLI.removeSegment(OldSeg->start.getDeadSlot(), OldUseIdx.getRegSlot(),
-                              true);
-
-          for (auto &SR : DefLI.subranges()) {
-            LiveRange::Segment *OldSegSR = SR.getSegmentContaining(OldUseIdx);
-            if (OldSegSR->end == OldUseIdx.getRegSlot()) {
-              // We only want to leave the dead def.
-              SR.removeSegment(OldSegSR->start.getDeadSlot(),
-                               OldUseIdx.getRegSlot(), true);
-            }
-          }
 
-          DefLI.removeEmptySubRanges();
+        // We cannot delete the original instruction here, so hack out the use
+        // in the original instruction with a dummy register so we can use
+        // shrinkToUses to deal with any multi-use edge cases. Other targets do
+        // not have the complexity of deleting a use to consider here.
+        Register DummyReg = MRI.cloneVirtualRegister(DefReg);
+        for (MachineOperand &MIOp : MI.uses()) {
+          if (MIOp.isReg() && MIOp.getReg() == DefReg) {
+            MIOp.setIsUndef(true);
+            MIOp.setReg(DummyReg);
+          }
         }
-      }
 
-      if (!MRI.hasOneNonDBGUse(DefReg))
-        return;
-      // We cannot just remove the DefMI here, calling pass will crash.
-      DefMI->setDesc(get(AMDGPU::IMPLICIT_DEF));
-      DefMI->getOperand(0).setIsDead(true);
-      for (unsigned I = DefMI->getNumOperands() - 1; I != 0; --I)
-        DefMI->removeOperand(I);
-      if (LV)
-        LV->getVarInfo(DefReg).AliveBlocks.clear();
+        LIS->shrinkToUses(&DefLI);
+      }
     };
 
     int64_t Imm;
diff --git a/llvm/test/CodeGen/AMDGPU/gfx10-twoaddr-fma.mir b/llvm/test/CodeGen/AMDGPU/gfx10-twoaddr-fma.mir
index bf11ed874afd0d..f814dd335d20ca 100644
--- a/llvm/test/CodeGen/AMDGPU/gfx10-twoaddr-fma.mir
+++ b/llvm/test/CodeGen/AMDGPU/gfx10-twoaddr-fma.mir
@@ -1,11 +1,13 @@
-# RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -run-pass=twoaddressinstruction -verify-machineinstrs -o - %s | FileCheck --check-prefixes=GFX10 %s
-# RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -passes=two-address-instruction -verify-each -o - %s | FileCheck --check-prefixes=GFX10 %s
-# RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -run-pass=livevars,liveintervals,twoaddressinstruction -verify-machineinstrs -o - %s | FileCheck --check-prefixes=GFX10 %s
+# RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -run-pass=twoaddressinstruction -verify-machineinstrs -o - %s | FileCheck --check-prefixes=GFX10,GFX10-NOLIS %s
+# RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -passes=two-address-instruction -verify-each -o - %s | FileCheck --check-prefixes=GFX10,GFX10-NOLIS %s
+# RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -run-pass=liveintervals,twoaddressinstruction -verify-machineinstrs -o - %s | FileCheck --check-prefixes=GFX10,GFX10-LIS %s
+
 
 # GFX10-LABEL: name: test_fmamk_reg_imm_f16
 # GFX10: dead %2:vgpr_32 = IMPLICIT_DEF
 # GFX10-NOT: V_MOV_B32
-# GFX10: V_FMAMK_F16 killed %0.sub0, 1078523331, killed %1, implicit $mode, implicit $exec
+# GFX10-NOLIS: V_FMAMK_F16 killed %0.sub0, 1078523331, killed %1, implicit $mode, implicit $exec
+# GFX10-LIS: V_FMAMK_F16 %0.sub0, 1078523331, %1, implicit $mode, implicit $exec
 ---
 name:            test_fmamk_reg_imm_f16
 tracksRegLiveness: true
@@ -23,7 +25,8 @@ body:             |
 # GFX10: %0:vreg_64 = IMPLICIT_DEF
 # GFX10: %1:vgpr_32 = COPY %0.sub1
 # GFX10: dead undef %2.sub0:vreg_64 = IMPLICIT_DEF
-# GFX10: %3:vgpr_32 = V_FMAMK_F16 killed %0.sub0, 1078523331, killed %1, implicit $mode, implicit $exec
+# GFX10-NOLIS: %3:vgpr_32 = V_FMAMK_F16 killed %0.sub0, 1078523331, killed %1, implicit $mode, implicit $exec
+# GFX10-LIS: %3:vgpr_32 = V_FMAMK_F16 %0.sub0, 1078523331, %1, implicit $mode, implicit $exec
 ---
 name:            test_fmamk_reg_imm_f16__imm_is_subreg
 tracksRegLiveness: true
@@ -42,7 +45,8 @@ body:             |
 # GFX10: %1:vgpr_32 = COPY %0.sub1
 # GFX10: undef %2.sub1:vreg_64 = V_MOV_B32_e32 9999, implicit $exec
 # GFX10: %2.sub0:vreg_64 = V_MOV_B32_e32 1078523331, implicit $exec
-# GFX10: %3:vgpr_32 = V_FMA_F16_gfx9_e64 0, killed %0.sub0, 0, %2.sub0, 0, killed %1, 0, 0, 0, implicit $mode, implicit $e
+# GFX10-NOLIS: %3:vgpr_32 = V_FMA_F16_gfx9_e64 0, killed %0.sub0, 0, %2.sub0, 0, killed %1, 0, 0, 0, implicit $mode, implicit $e
+# GFX10-LIS: %3:vgpr_32 = V_FMA_F16_gfx9_e64 0, %0.sub0, 0, %2.sub0, 0, %1, 0, 0, 0, implicit $mode, implicit $e
 ---
 name:            test_fmamk_reg_imm_f16__imm_is_subreg_fully_defined
 tracksRegLiveness: true
@@ -61,7 +65,8 @@ body:             |
 # GFX10: %1:vgpr_32 = COPY %0.sub1
 # GFX10: %2:vgpr_32 = V_MOV_B32_e32 1078523331, implicit $exec
 # GFX10: S_NOP 0, implicit %2
-# GFX10: %3:vgpr_32 = V_FMAMK_F16 killed %0.sub0, 1078523331, killed %1, implicit $mode, implicit $exec
+# GFX10-NOLIS: %3:vgpr_32 = V_FMAMK_F16 killed %0.sub0, 1078523331, killed %1, implicit $mode, implicit $exec
+# GFX10-LIS: %3:vgpr_32 = V_FMAMK_F16 %0.sub0, 1078523331, %1, implicit $mode, implicit $exec
 ---
 name:            test_fmamk_reg_imm_f16__use_imm_before_mac
 tracksRegLiveness: true
@@ -80,7 +85,8 @@ body:             |
 # GFX10: %0:vreg_64 = IMPLICIT_DEF
 # GFX10: %1:vgpr_32 = COPY %0.sub1
 # GFX10: %2:vgpr_32 = V_MOV_B32_e32 1078523331, implicit $exec
-# GFX10: %3:vgpr_32 = V_FMAMK_F16 killed %0.sub0, 1078523331, killed %1, implicit $mode, implicit $exec
+# GFX10-NOLIS: %3:vgpr_32 = V_FMAMK_F16 killed %0.sub0, 1078523331, killed %1, implicit $mode, implicit $exec
+# GFX10-LIS: %3:vgpr_32 = V_FMAMK_F16 %0.sub0, 1078523331, %1, implicit $mode, implicit $exec
 ---
 name:            test_fmamk_reg_imm_f16__use_imm_after_mac
 tracksRegLiveness: true
@@ -100,7 +106,8 @@ body:             |
 # GFX10: %1:vgpr_32 = COPY %0.sub1
 # GFX10: %2:vgpr_32 = V_MOV_B32_e32 1078523331, implicit $exec
 # GFX10: S_NOP 0, implicit %2
-# GFX10: %3:vgpr_32 = V_FMAMK_F16 killed %0.sub0, 1078523331, killed %1, implicit $mode, implicit $exec
+# GFX10-NOLIS: %3:vgpr_32 = V_FMAMK_F16 killed %0.sub0, 1078523331, killed %1, implicit $mode, implicit $exec
+# GFX10-LIS: %3:vgpr_32 = V_FMAMK_F16 %0.sub0, 1078523331, %1, implicit $mode, implicit $exec
 # GFX10: S_NOP 0, implicit %2
 
 ---
@@ -121,7 +128,8 @@ body:             |
 # GFX10-LABEL: name: test_fmamk_imm_reg_f16
 # GFX10: dead %2:vgpr_32 = IMPLICIT_DEF
 # GFX10-NOT: V_MOV_B32
-# GFX10: V_FMAMK_F16 killed %0.sub0, 1078523331, killed %1, implicit $mode, implicit $exec
+# GFX10-NOLIS: V_FMAMK_F16 killed %0.sub0, 1078523331, killed %1, implicit $mode, implicit $exec
+# GFX10-LIS: V_FMAMK_F16 %0.sub0, 1078523331, %1, implicit $mode, implicit $exec
 ---
 name:            test_fmamk_imm_reg_f16
 tracksRegLiveness: true
@@ -143,7 +151,8 @@ body:             |
 # GFX10-LABEL: name: test_fmaak_f16
 # GFX10: %1:vgpr_32 = V_MOV_B32_e32 1078523331, implicit $exec
 # GFX10-NOT: V_MOV_B32
-# GFX10: V_FMAAK_F16 killed %0.sub0, %0.sub1, 1078523331, implicit $mode, implicit $exec
+# GFX10-NOLIS: V_FMAAK_F16 killed %0.sub0, %0.sub1, 1078523331, implicit $mode, implicit $exec
+# GFX10-LIS: V_FMAAK_F16 %0.sub0, %0.sub1, 1078523331, implicit $mode, implicit $exec
 ---
 name:            test_fmaak_f16
 tracksRegLiveness: true
@@ -163,7 +172,8 @@ body:             |
 # GFX10-LABEL: name: test_fmaak_inline_literal_f16
 # GFX10: %1:vgpr_32 = V_MOV_B32_e32 49664, implicit $exec
 # GFX10-NOT: V_MOV_B32
-# GFX10: %2:vgpr_32 = V_FMAAK_F16 16384, killed %0, 49664, implicit $mode, implicit $exec
+# GFX10-NOLIS: %2:vgpr_32 = V_FMAAK_F16 16384, killed %0, 49664, implicit $mode, implicit $exec
+# GFX10-LIS: %2:vgpr_32 = V_FMAAK_F16 16384, %0, 49664, implicit $mode, implicit $exec
 
 ---
 name:            test_fmaak_inline_literal_f16



More information about the llvm-commits mailing list