[llvm] AMDGPU: Update live intervals in convertToThreeAddress (PR #104610)
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Fri Sep 6 00:17:24 PDT 2024
https://github.com/arsenm updated https://github.com/llvm/llvm-project/pull/104610
>From c71c5dd6030234aa95642447c134a1b69ff439c6 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault at amd.com>
Date: Fri, 16 Aug 2024 18:13:40 +0400
Subject: [PATCH 1/8] AMDGPU: Update live intervals in convertToThreeAddress
Fixes #98741
---
llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | 37 ++++--
.../test/CodeGen/AMDGPU/gfx10-twoaddr-fma.mir | 117 ++++++++++++++++--
2 files changed, 134 insertions(+), 20 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index 90e11df500bc9b..e401d3275905ee 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -4058,14 +4058,32 @@ MachineInstr *SIInstrInfo::convertToThreeAddress(MachineInstr &MI,
(ST.getConstantBusLimit(Opc) > 1 || !Src0->isReg() ||
!RI.isSGPRReg(MBB.getParent()->getRegInfo(), Src0->getReg()))) {
MachineInstr *DefMI;
- const auto killDef = [&]() -> void {
+ const auto killDef = [&](SlotIndex NewIdx) -> void {
const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
// The only user is the instruction which will be killed.
Register DefReg = DefMI->getOperand(0).getReg();
+
+ if (LIS) {
+ LiveInterval &DefLI = LIS->getInterval(DefReg);
+ LiveRange::Segment *OldSeg = DefLI.getSegmentContaining(NewIdx);
+
+ if (OldSeg->end == NewIdx.getRegSlot()) {
+ DefLI.removeSegment(OldSeg->start, NewIdx.getRegSlot(), true);
+
+ for (auto &SR : DefLI.subranges()) {
+ LiveRange::Segment *OldSegSR = SR.getSegmentContaining(NewIdx);
+ SR.removeSegment(OldSegSR->start, NewIdx.getRegSlot(), true);
+ }
+
+ DefLI.removeEmptySubRanges();
+ }
+ }
+
if (!MRI.hasOneNonDBGUse(DefReg))
return;
// We cannot just remove the DefMI here, calling pass will crash.
DefMI->setDesc(get(AMDGPU::IMPLICIT_DEF));
+ DefMI->getOperand(0).setIsDead(true);
for (unsigned I = DefMI->getNumOperands() - 1; I != 0; --I)
DefMI->removeOperand(I);
if (LV)
@@ -4087,9 +4105,10 @@ MachineInstr *SIInstrInfo::convertToThreeAddress(MachineInstr &MI,
.addImm(Imm)
.setMIFlags(MI.getFlags());
updateLiveVariables(LV, MI, *MIB);
+ SlotIndex NewIdx;
if (LIS)
- LIS->ReplaceMachineInstrInMaps(MI, *MIB);
- killDef();
+ NewIdx = LIS->ReplaceMachineInstrInMaps(MI, *MIB);
+ killDef(NewIdx);
return MIB;
}
}
@@ -4107,9 +4126,11 @@ MachineInstr *SIInstrInfo::convertToThreeAddress(MachineInstr &MI,
.add(*Src2)
.setMIFlags(MI.getFlags());
updateLiveVariables(LV, MI, *MIB);
+
+ SlotIndex NewIdx;
if (LIS)
- LIS->ReplaceMachineInstrInMaps(MI, *MIB);
- killDef();
+ NewIdx = LIS->ReplaceMachineInstrInMaps(MI, *MIB);
+ killDef(NewIdx);
return MIB;
}
}
@@ -4129,10 +4150,12 @@ MachineInstr *SIInstrInfo::convertToThreeAddress(MachineInstr &MI,
.add(*Src2)
.setMIFlags(MI.getFlags());
updateLiveVariables(LV, MI, *MIB);
+
+ SlotIndex NewIdx;
if (LIS)
- LIS->ReplaceMachineInstrInMaps(MI, *MIB);
+ NewIdx = LIS->ReplaceMachineInstrInMaps(MI, *MIB);
if (DefMI)
- killDef();
+ killDef(NewIdx);
return MIB;
}
}
diff --git a/llvm/test/CodeGen/AMDGPU/gfx10-twoaddr-fma.mir b/llvm/test/CodeGen/AMDGPU/gfx10-twoaddr-fma.mir
index 1768e39d1a06c5..afb36041d7f4b4 100644
--- a/llvm/test/CodeGen/AMDGPU/gfx10-twoaddr-fma.mir
+++ b/llvm/test/CodeGen/AMDGPU/gfx10-twoaddr-fma.mir
@@ -1,29 +1,120 @@
-# RUN: llc -mtriple=amdgcn -mcpu=gfx1010 %s -run-pass twoaddressinstruction -verify-machineinstrs -o - | FileCheck --check-prefixes=GFX10 %s
-# RUN: llc -mtriple=amdgcn -mcpu=gfx1010 %s --passes=two-address-instruction -verify-each -o - | FileCheck --check-prefixes=GFX10 %s
+# RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -run-pass=twoaddressinstruction -verify-machineinstrs -o - %s | FileCheck --check-prefixes=GFX10 %s
+# RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -passes=two-address-instruction -verify-each -o - %s | FileCheck --check-prefixes=GFX10 %s
+# RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -run-pass=liveintervals,twoaddressinstruction -verify-machineinstrs -o - %s | FileCheck --check-prefixes=GFX10 %s
# GFX10-LABEL: name: test_fmamk_reg_imm_f16
-# GFX10: %2:vgpr_32 = IMPLICIT_DEF
+# GFX10: dead %2:vgpr_32 = IMPLICIT_DEF
# GFX10-NOT: V_MOV_B32
# GFX10: V_FMAMK_F16 killed %0.sub0, 1078523331, killed %1, implicit $mode, implicit $exec
---
name: test_fmamk_reg_imm_f16
-registers:
- - { id: 0, class: vreg_64 }
- - { id: 1, class: vgpr_32 }
- - { id: 2, class: vgpr_32 }
- - { id: 3, class: vgpr_32 }
body: |
bb.0:
- %0 = IMPLICIT_DEF
- %1 = COPY %0.sub1
- %2 = V_MOV_B32_e32 1078523331, implicit $exec
- %3 = V_FMAC_F16_e32 killed %0.sub0, %2, killed %1, implicit $mode, implicit $exec
+ %0:vreg_64 = IMPLICIT_DEF
+ %1:vgpr_32 = COPY %0.sub1
+ %2:vgpr_32 = V_MOV_B32_e32 1078523331, implicit $exec
+ %3:vgpr_32 = V_FMAC_F16_e32 killed %0.sub0, %2, killed %1, implicit $mode, implicit $exec
+
+...
+
+# GFX10-LABEL: name: test_fmamk_reg_imm_f16__imm_is_subreg
+# GFX10: %0:vreg_64 = IMPLICIT_DEF
+# GFX10: %1:vgpr_32 = COPY %0.sub1
+# GFX10: dead undef %2.sub0:vreg_64 = IMPLICIT_DEF
+# GFX10: %3:vgpr_32 = V_FMAMK_F16 killed %0.sub0, 1078523331, killed %1, implicit $mode, implicit $exec
+---
+name: test_fmamk_reg_imm_f16__imm_is_subreg
+body: |
+ bb.0:
+
+ %0:vreg_64 = IMPLICIT_DEF
+ %1:vgpr_32 = COPY %0.sub1
+ undef %2.sub0:vreg_64 = V_MOV_B32_e32 1078523331, implicit $exec
+ %3:vgpr_32 = V_FMAC_F16_e32 killed %0.sub0, %2.sub0, killed %1, implicit $mode, implicit $exec
+
+...
+
+# GFX10-LABEL: name: test_fmamk_reg_imm_f16__imm_is_subreg_fully_defined
+# GFX10: %0:vreg_64 = IMPLICIT_DEF
+# GFX10: %1:vgpr_32 = COPY %0.sub1
+# GFX10: undef %2.sub1:vreg_64 = V_MOV_B32_e32 9999, implicit $exec
+# GFX10: %2.sub0:vreg_64 = V_MOV_B32_e32 1078523331, implicit $exec
+# GFX10: %3:vgpr_32 = V_FMA_F16_gfx9_e64 0, killed %0.sub0, 0, %2.sub0, 0, killed %1, 0, 0, 0, implicit $mode, implicit $e
+---
+name: test_fmamk_reg_imm_f16__imm_is_subreg_fully_defined
+body: |
+ bb.0:
+
+ %0:vreg_64 = IMPLICIT_DEF
+ %1:vgpr_32 = COPY %0.sub1
+ undef %2.sub1 = V_MOV_B32_e32 9999, implicit $exec
+ %2.sub0:vreg_64 = V_MOV_B32_e32 1078523331, implicit $exec
+ %3:vgpr_32 = V_FMAC_F16_e32 killed %0.sub0, %2.sub0, killed %1, implicit $mode, implicit $exec
+
+...
+
+# GFX10-LABEL: name: test_fmamk_reg_imm_f16__use_imm_before_mac
+# GFX10: %0:vreg_64 = IMPLICIT_DEF
+# GFX10: %1:vgpr_32 = COPY %0.sub1
+# GFX10: %2:vgpr_32 = V_MOV_B32_e32 1078523331, implicit $exec
+# GFX10: S_NOP 0, implicit %2
+# GFX10: %3:vgpr_32 = V_FMAMK_F16 killed %0.sub0, 1078523331, killed %1, implicit $mode, implicit $exec
+---
+name: test_fmamk_reg_imm_f16__use_imm_before_mac
+body: |
+ bb.0:
+
+ %0:vreg_64 = IMPLICIT_DEF
+ %1:vgpr_32 = COPY %0.sub1
+ %2:vgpr_32 = V_MOV_B32_e32 1078523331, implicit $exec
+ S_NOP 0, implicit %2
+ %3:vgpr_32 = V_FMAC_F16_e32 killed %0.sub0, %2, killed %1, implicit $mode, implicit $exec
+
+...
+
+# GFX10-LABEL: name: test_fmamk_reg_imm_f16__use_imm_after_mac
+# GFX10: %0:vreg_64 = IMPLICIT_DEF
+# GFX10: %1:vgpr_32 = COPY %0.sub1
+# GFX10: %2:vgpr_32 = V_MOV_B32_e32 1078523331, implicit $exec
+# GFX10: %3:vgpr_32 = V_FMAMK_F16 killed %0.sub0, 1078523331, killed %1, implicit $mode, implicit $exec
+---
+name: test_fmamk_reg_imm_f16__use_imm_after_mac
+body: |
+ bb.0:
+
+ %0:vreg_64 = IMPLICIT_DEF
+ %1:vgpr_32 = COPY %0.sub1
+ %2:vgpr_32 = V_MOV_B32_e32 1078523331, implicit $exec
+ %3:vgpr_32 = V_FMAC_F16_e32 killed %0.sub0, %2, killed %1, implicit $mode, implicit $exec
+ S_NOP 0, implicit %2
+
+...
+
+# GFX10-LABEL: name: test_fmamk_reg_imm_f16__use_imm_before_after_mac
+# GFX10: %0:vreg_64 = IMPLICIT_DEF
+# GFX10: %1:vgpr_32 = COPY %0.sub1
+# GFX10: %2:vgpr_32 = V_MOV_B32_e32 1078523331, implicit $exec
+# GFX10: S_NOP 0, implicit %2
+# GFX10: %3:vgpr_32 = V_FMAMK_F16 killed %0.sub0, 1078523331, killed %1, implicit $mode, implicit $exec
+# GFX10: S_NOP 0, implicit %2
+
+---
+name: test_fmamk_reg_imm_f16__use_imm_before_after_mac
+body: |
+ bb.0:
+
+ %0:vreg_64 = IMPLICIT_DEF
+ %1:vgpr_32 = COPY %0.sub1
+ %2:vgpr_32 = V_MOV_B32_e32 1078523331, implicit $exec
+ S_NOP 0, implicit %2
+ %3:vgpr_32 = V_FMAC_F16_e32 killed %0.sub0, %2, killed %1, implicit $mode, implicit $exec
+ S_NOP 0, implicit %2
...
# GFX10-LABEL: name: test_fmamk_imm_reg_f16
-# GFX10: %2:vgpr_32 = IMPLICIT_DEF
+# GFX10: dead %2:vgpr_32 = IMPLICIT_DEF
# GFX10-NOT: V_MOV_B32
# GFX10: V_FMAMK_F16 killed %0.sub0, 1078523331, killed %1, implicit $mode, implicit $exec
---
>From 15573e49b115ff7197b6ab80de1c1b819c9bc674 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault at amd.com>
Date: Mon, 19 Aug 2024 17:25:26 +0400
Subject: [PATCH 2/8] Comments
---
llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | 6 ++++--
1 file changed, 4 insertions(+), 2 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index e401d3275905ee..c5b320699a1d02 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -4066,13 +4066,15 @@ MachineInstr *SIInstrInfo::convertToThreeAddress(MachineInstr &MI,
if (LIS) {
LiveInterval &DefLI = LIS->getInterval(DefReg);
LiveRange::Segment *OldSeg = DefLI.getSegmentContaining(NewIdx);
+ assert(OldSeg && "segment not found for instruction in LiveInterval");
if (OldSeg->end == NewIdx.getRegSlot()) {
- DefLI.removeSegment(OldSeg->start, NewIdx.getRegSlot(), true);
+ DefLI.removeSegment(*OldSeg, true);
for (auto &SR : DefLI.subranges()) {
LiveRange::Segment *OldSegSR = SR.getSegmentContaining(NewIdx);
- SR.removeSegment(OldSegSR->start, NewIdx.getRegSlot(), true);
+ if (OldSegSR->end == NewIdx.getRegSlot())
+ SR.removeSegment(*OldSegSR, true);
}
DefLI.removeEmptySubRanges();
>From a332148d28f8f6f422df94e18b0449dd91573ed3 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault at amd.com>
Date: Mon, 19 Aug 2024 17:28:27 +0400
Subject: [PATCH 3/8] Rename variable
---
llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | 10 +++++-----
1 file changed, 5 insertions(+), 5 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index c5b320699a1d02..ab22a7b311c2cb 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -4058,22 +4058,22 @@ MachineInstr *SIInstrInfo::convertToThreeAddress(MachineInstr &MI,
(ST.getConstantBusLimit(Opc) > 1 || !Src0->isReg() ||
!RI.isSGPRReg(MBB.getParent()->getRegInfo(), Src0->getReg()))) {
MachineInstr *DefMI;
- const auto killDef = [&](SlotIndex NewIdx) -> void {
+ const auto killDef = [&](SlotIndex OldDefIdx) -> void {
const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
// The only user is the instruction which will be killed.
Register DefReg = DefMI->getOperand(0).getReg();
if (LIS) {
LiveInterval &DefLI = LIS->getInterval(DefReg);
- LiveRange::Segment *OldSeg = DefLI.getSegmentContaining(NewIdx);
+ LiveRange::Segment *OldSeg = DefLI.getSegmentContaining(OldDefIdx);
assert(OldSeg && "segment not found for instruction in LiveInterval");
- if (OldSeg->end == NewIdx.getRegSlot()) {
+ if (OldSeg->end == OldDefIdx.getRegSlot()) {
DefLI.removeSegment(*OldSeg, true);
for (auto &SR : DefLI.subranges()) {
- LiveRange::Segment *OldSegSR = SR.getSegmentContaining(NewIdx);
- if (OldSegSR->end == NewIdx.getRegSlot())
+ LiveRange::Segment *OldSegSR = SR.getSegmentContaining(OldDefIdx);
+ if (OldSegSR->end == OldDefIdx.getRegSlot())
SR.removeSegment(*OldSegSR, true);
}
>From 270f7806331e3e7cf435f20407f28775da39345c Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault at amd.com>
Date: Wed, 4 Sep 2024 15:16:32 +0400
Subject: [PATCH 4/8] Rename variable
---
llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | 10 +++++-----
1 file changed, 5 insertions(+), 5 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index ab22a7b311c2cb..bff43336b017d9 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -4058,22 +4058,22 @@ MachineInstr *SIInstrInfo::convertToThreeAddress(MachineInstr &MI,
(ST.getConstantBusLimit(Opc) > 1 || !Src0->isReg() ||
!RI.isSGPRReg(MBB.getParent()->getRegInfo(), Src0->getReg()))) {
MachineInstr *DefMI;
- const auto killDef = [&](SlotIndex OldDefIdx) -> void {
+ const auto killDef = [&](SlotIndex OldUseIdx) -> void {
const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
// The only user is the instruction which will be killed.
Register DefReg = DefMI->getOperand(0).getReg();
if (LIS) {
LiveInterval &DefLI = LIS->getInterval(DefReg);
- LiveRange::Segment *OldSeg = DefLI.getSegmentContaining(OldDefIdx);
+ LiveRange::Segment *OldSeg = DefLI.getSegmentContaining(OldUseIdx);
assert(OldSeg && "segment not found for instruction in LiveInterval");
- if (OldSeg->end == OldDefIdx.getRegSlot()) {
+ if (OldSeg->end == OldUseIdx.getRegSlot()) {
DefLI.removeSegment(*OldSeg, true);
for (auto &SR : DefLI.subranges()) {
- LiveRange::Segment *OldSegSR = SR.getSegmentContaining(OldDefIdx);
- if (OldSegSR->end == OldDefIdx.getRegSlot())
+ LiveRange::Segment *OldSegSR = SR.getSegmentContaining(OldUseIdx);
+ if (OldSegSR->end == OldUseIdx.getRegSlot())
SR.removeSegment(*OldSegSR, true);
}
>From 1fc765d473ecad1c93b414233b1b431bfb901d9f Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault at amd.com>
Date: Wed, 4 Sep 2024 16:41:02 +0400
Subject: [PATCH 5/8] Remove less
---
llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | 11 ++++++++---
llvm/test/CodeGen/AMDGPU/gfx10-twoaddr-fma.mir | 3 ++-
2 files changed, 10 insertions(+), 4 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index bff43336b017d9..3f11ffca76f3fb 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -4069,12 +4069,17 @@ MachineInstr *SIInstrInfo::convertToThreeAddress(MachineInstr &MI,
assert(OldSeg && "segment not found for instruction in LiveInterval");
if (OldSeg->end == OldUseIdx.getRegSlot()) {
- DefLI.removeSegment(*OldSeg, true);
+ // We only want to leave the dead def.
+ DefLI.removeSegment(OldSeg->start.getDeadSlot(), OldUseIdx.getRegSlot(),
+ true);
for (auto &SR : DefLI.subranges()) {
LiveRange::Segment *OldSegSR = SR.getSegmentContaining(OldUseIdx);
- if (OldSegSR->end == OldUseIdx.getRegSlot())
- SR.removeSegment(*OldSegSR, true);
+ if (OldSegSR->end == OldUseIdx.getRegSlot()) {
+ // We only want to leave the dead def.
+ SR.removeSegment(OldSegSR->start.getDeadSlot(),
+ OldUseIdx.getRegSlot(), true);
+ }
}
DefLI.removeEmptySubRanges();
diff --git a/llvm/test/CodeGen/AMDGPU/gfx10-twoaddr-fma.mir b/llvm/test/CodeGen/AMDGPU/gfx10-twoaddr-fma.mir
index afb36041d7f4b4..ccb2f5f6a1888b 100644
--- a/llvm/test/CodeGen/AMDGPU/gfx10-twoaddr-fma.mir
+++ b/llvm/test/CodeGen/AMDGPU/gfx10-twoaddr-fma.mir
@@ -1,6 +1,6 @@
# RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -run-pass=twoaddressinstruction -verify-machineinstrs -o - %s | FileCheck --check-prefixes=GFX10 %s
# RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -passes=two-address-instruction -verify-each -o - %s | FileCheck --check-prefixes=GFX10 %s
-# RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -run-pass=liveintervals,twoaddressinstruction -verify-machineinstrs -o - %s | FileCheck --check-prefixes=GFX10 %s
+# RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -run-pass=livevars,liveintervals,twoaddressinstruction -verify-machineinstrs -o - %s | FileCheck --check-prefixes=GFX10 %s
# GFX10-LABEL: name: test_fmamk_reg_imm_f16
# GFX10: dead %2:vgpr_32 = IMPLICIT_DEF
@@ -8,6 +8,7 @@
# GFX10: V_FMAMK_F16 killed %0.sub0, 1078523331, killed %1, implicit $mode, implicit $exec
---
name: test_fmamk_reg_imm_f16
+tracksRegLiveness: true
body: |
bb.0:
>From 7d7a5e68db28c1f524966e5943941d10fff07e42 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault at amd.com>
Date: Wed, 4 Sep 2024 16:42:35 +0400
Subject: [PATCH 6/8] Add tracksRegLiveness to every function
---
llvm/test/CodeGen/AMDGPU/gfx10-twoaddr-fma.mir | 8 +++++++-
1 file changed, 7 insertions(+), 1 deletion(-)
diff --git a/llvm/test/CodeGen/AMDGPU/gfx10-twoaddr-fma.mir b/llvm/test/CodeGen/AMDGPU/gfx10-twoaddr-fma.mir
index ccb2f5f6a1888b..bf11ed874afd0d 100644
--- a/llvm/test/CodeGen/AMDGPU/gfx10-twoaddr-fma.mir
+++ b/llvm/test/CodeGen/AMDGPU/gfx10-twoaddr-fma.mir
@@ -26,6 +26,7 @@ body: |
# GFX10: %3:vgpr_32 = V_FMAMK_F16 killed %0.sub0, 1078523331, killed %1, implicit $mode, implicit $exec
---
name: test_fmamk_reg_imm_f16__imm_is_subreg
+tracksRegLiveness: true
body: |
bb.0:
@@ -44,9 +45,9 @@ body: |
# GFX10: %3:vgpr_32 = V_FMA_F16_gfx9_e64 0, killed %0.sub0, 0, %2.sub0, 0, killed %1, 0, 0, 0, implicit $mode, implicit $e
---
name: test_fmamk_reg_imm_f16__imm_is_subreg_fully_defined
+tracksRegLiveness: true
body: |
bb.0:
-
%0:vreg_64 = IMPLICIT_DEF
%1:vgpr_32 = COPY %0.sub1
undef %2.sub1 = V_MOV_B32_e32 9999, implicit $exec
@@ -63,6 +64,7 @@ body: |
# GFX10: %3:vgpr_32 = V_FMAMK_F16 killed %0.sub0, 1078523331, killed %1, implicit $mode, implicit $exec
---
name: test_fmamk_reg_imm_f16__use_imm_before_mac
+tracksRegLiveness: true
body: |
bb.0:
@@ -81,6 +83,7 @@ body: |
# GFX10: %3:vgpr_32 = V_FMAMK_F16 killed %0.sub0, 1078523331, killed %1, implicit $mode, implicit $exec
---
name: test_fmamk_reg_imm_f16__use_imm_after_mac
+tracksRegLiveness: true
body: |
bb.0:
@@ -102,6 +105,7 @@ body: |
---
name: test_fmamk_reg_imm_f16__use_imm_before_after_mac
+tracksRegLiveness: true
body: |
bb.0:
@@ -120,6 +124,7 @@ body: |
# GFX10: V_FMAMK_F16 killed %0.sub0, 1078523331, killed %1, implicit $mode, implicit $exec
---
name: test_fmamk_imm_reg_f16
+tracksRegLiveness: true
registers:
- { id: 0, class: vreg_64 }
- { id: 1, class: vgpr_32 }
@@ -141,6 +146,7 @@ body: |
# GFX10: V_FMAAK_F16 killed %0.sub0, %0.sub1, 1078523331, implicit $mode, implicit $exec
---
name: test_fmaak_f16
+tracksRegLiveness: true
registers:
- { id: 0, class: vreg_64 }
- { id: 1, class: vgpr_32 }
>From 61b61f2b4b03c2ba85b2c02a7c916a3845d5935e Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault at amd.com>
Date: Wed, 4 Sep 2024 18:05:01 +0400
Subject: [PATCH 7/8] Hackily use shrinkToUses
---
llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | 51 +++++++++----------
.../test/CodeGen/AMDGPU/gfx10-twoaddr-fma.mir | 34 ++++++++-----
2 files changed, 45 insertions(+), 40 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index 3f11ffca76f3fb..de68ff2d02c2aa 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -4059,42 +4059,37 @@ MachineInstr *SIInstrInfo::convertToThreeAddress(MachineInstr &MI,
!RI.isSGPRReg(MBB.getParent()->getRegInfo(), Src0->getReg()))) {
MachineInstr *DefMI;
const auto killDef = [&](SlotIndex OldUseIdx) -> void {
- const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
+ MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
// The only user is the instruction which will be killed.
Register DefReg = DefMI->getOperand(0).getReg();
+ if (MRI.hasOneNonDBGUse(DefReg)) {
+ // We cannot just remove the DefMI here, calling pass will crash.
+ DefMI->setDesc(get(AMDGPU::IMPLICIT_DEF));
+ DefMI->getOperand(0).setIsDead(true);
+ for (unsigned I = DefMI->getNumOperands() - 1; I != 0; --I)
+ DefMI->removeOperand(I);
+ if (LV)
+ LV->getVarInfo(DefReg).AliveBlocks.clear();
+ }
+
if (LIS) {
LiveInterval &DefLI = LIS->getInterval(DefReg);
- LiveRange::Segment *OldSeg = DefLI.getSegmentContaining(OldUseIdx);
- assert(OldSeg && "segment not found for instruction in LiveInterval");
-
- if (OldSeg->end == OldUseIdx.getRegSlot()) {
- // We only want to leave the dead def.
- DefLI.removeSegment(OldSeg->start.getDeadSlot(), OldUseIdx.getRegSlot(),
- true);
-
- for (auto &SR : DefLI.subranges()) {
- LiveRange::Segment *OldSegSR = SR.getSegmentContaining(OldUseIdx);
- if (OldSegSR->end == OldUseIdx.getRegSlot()) {
- // We only want to leave the dead def.
- SR.removeSegment(OldSegSR->start.getDeadSlot(),
- OldUseIdx.getRegSlot(), true);
- }
- }
- DefLI.removeEmptySubRanges();
+ // We cannot delete the original instruction here, so hack out the use
+ // in the original instruction with a dummy register so we can use
+ // shrinkToUses to deal with any multi-use edge cases. Other targets do
+ // not have the complexity of deleting a use to consider here.
+ Register DummyReg = MRI.cloneVirtualRegister(DefReg);
+ for (MachineOperand &MIOp : MI.uses()) {
+ if (MIOp.isReg() && MIOp.getReg() == DefReg) {
+ MIOp.setIsUndef(true);
+ MIOp.setReg(DummyReg);
+ }
}
- }
- if (!MRI.hasOneNonDBGUse(DefReg))
- return;
- // We cannot just remove the DefMI here, calling pass will crash.
- DefMI->setDesc(get(AMDGPU::IMPLICIT_DEF));
- DefMI->getOperand(0).setIsDead(true);
- for (unsigned I = DefMI->getNumOperands() - 1; I != 0; --I)
- DefMI->removeOperand(I);
- if (LV)
- LV->getVarInfo(DefReg).AliveBlocks.clear();
+ LIS->shrinkToUses(&DefLI);
+ }
};
int64_t Imm;
diff --git a/llvm/test/CodeGen/AMDGPU/gfx10-twoaddr-fma.mir b/llvm/test/CodeGen/AMDGPU/gfx10-twoaddr-fma.mir
index bf11ed874afd0d..f814dd335d20ca 100644
--- a/llvm/test/CodeGen/AMDGPU/gfx10-twoaddr-fma.mir
+++ b/llvm/test/CodeGen/AMDGPU/gfx10-twoaddr-fma.mir
@@ -1,11 +1,13 @@
-# RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -run-pass=twoaddressinstruction -verify-machineinstrs -o - %s | FileCheck --check-prefixes=GFX10 %s
-# RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -passes=two-address-instruction -verify-each -o - %s | FileCheck --check-prefixes=GFX10 %s
-# RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -run-pass=livevars,liveintervals,twoaddressinstruction -verify-machineinstrs -o - %s | FileCheck --check-prefixes=GFX10 %s
+# RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -run-pass=twoaddressinstruction -verify-machineinstrs -o - %s | FileCheck --check-prefixes=GFX10,GFX10-NOLIS %s
+# RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -passes=two-address-instruction -verify-each -o - %s | FileCheck --check-prefixes=GFX10,GFX10-NOLIS %s
+# RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -run-pass=liveintervals,twoaddressinstruction -verify-machineinstrs -o - %s | FileCheck --check-prefixes=GFX10,GFX10-LIS %s
+
# GFX10-LABEL: name: test_fmamk_reg_imm_f16
# GFX10: dead %2:vgpr_32 = IMPLICIT_DEF
# GFX10-NOT: V_MOV_B32
-# GFX10: V_FMAMK_F16 killed %0.sub0, 1078523331, killed %1, implicit $mode, implicit $exec
+# GFX10-NOLIS: V_FMAMK_F16 killed %0.sub0, 1078523331, killed %1, implicit $mode, implicit $exec
+# GFX10-LIS: V_FMAMK_F16 %0.sub0, 1078523331, %1, implicit $mode, implicit $exec
---
name: test_fmamk_reg_imm_f16
tracksRegLiveness: true
@@ -23,7 +25,8 @@ body: |
# GFX10: %0:vreg_64 = IMPLICIT_DEF
# GFX10: %1:vgpr_32 = COPY %0.sub1
# GFX10: dead undef %2.sub0:vreg_64 = IMPLICIT_DEF
-# GFX10: %3:vgpr_32 = V_FMAMK_F16 killed %0.sub0, 1078523331, killed %1, implicit $mode, implicit $exec
+# GFX10-NOLIS: %3:vgpr_32 = V_FMAMK_F16 killed %0.sub0, 1078523331, killed %1, implicit $mode, implicit $exec
+# GFX10-LIS: %3:vgpr_32 = V_FMAMK_F16 %0.sub0, 1078523331, %1, implicit $mode, implicit $exec
---
name: test_fmamk_reg_imm_f16__imm_is_subreg
tracksRegLiveness: true
@@ -42,7 +45,8 @@ body: |
# GFX10: %1:vgpr_32 = COPY %0.sub1
# GFX10: undef %2.sub1:vreg_64 = V_MOV_B32_e32 9999, implicit $exec
# GFX10: %2.sub0:vreg_64 = V_MOV_B32_e32 1078523331, implicit $exec
-# GFX10: %3:vgpr_32 = V_FMA_F16_gfx9_e64 0, killed %0.sub0, 0, %2.sub0, 0, killed %1, 0, 0, 0, implicit $mode, implicit $e
+# GFX10-NOLIS: %3:vgpr_32 = V_FMA_F16_gfx9_e64 0, killed %0.sub0, 0, %2.sub0, 0, killed %1, 0, 0, 0, implicit $mode, implicit $e
+# GFX10-LIS: %3:vgpr_32 = V_FMA_F16_gfx9_e64 0, %0.sub0, 0, %2.sub0, 0, %1, 0, 0, 0, implicit $mode, implicit $e
---
name: test_fmamk_reg_imm_f16__imm_is_subreg_fully_defined
tracksRegLiveness: true
@@ -61,7 +65,8 @@ body: |
# GFX10: %1:vgpr_32 = COPY %0.sub1
# GFX10: %2:vgpr_32 = V_MOV_B32_e32 1078523331, implicit $exec
# GFX10: S_NOP 0, implicit %2
-# GFX10: %3:vgpr_32 = V_FMAMK_F16 killed %0.sub0, 1078523331, killed %1, implicit $mode, implicit $exec
+# GFX10-NOLIS: %3:vgpr_32 = V_FMAMK_F16 killed %0.sub0, 1078523331, killed %1, implicit $mode, implicit $exec
+# GFX10-LIS: %3:vgpr_32 = V_FMAMK_F16 %0.sub0, 1078523331, %1, implicit $mode, implicit $exec
---
name: test_fmamk_reg_imm_f16__use_imm_before_mac
tracksRegLiveness: true
@@ -80,7 +85,8 @@ body: |
# GFX10: %0:vreg_64 = IMPLICIT_DEF
# GFX10: %1:vgpr_32 = COPY %0.sub1
# GFX10: %2:vgpr_32 = V_MOV_B32_e32 1078523331, implicit $exec
-# GFX10: %3:vgpr_32 = V_FMAMK_F16 killed %0.sub0, 1078523331, killed %1, implicit $mode, implicit $exec
+# GFX10-NOLIS: %3:vgpr_32 = V_FMAMK_F16 killed %0.sub0, 1078523331, killed %1, implicit $mode, implicit $exec
+# GFX10-LIS: %3:vgpr_32 = V_FMAMK_F16 %0.sub0, 1078523331, %1, implicit $mode, implicit $exec
---
name: test_fmamk_reg_imm_f16__use_imm_after_mac
tracksRegLiveness: true
@@ -100,7 +106,8 @@ body: |
# GFX10: %1:vgpr_32 = COPY %0.sub1
# GFX10: %2:vgpr_32 = V_MOV_B32_e32 1078523331, implicit $exec
# GFX10: S_NOP 0, implicit %2
-# GFX10: %3:vgpr_32 = V_FMAMK_F16 killed %0.sub0, 1078523331, killed %1, implicit $mode, implicit $exec
+# GFX10-NOLIS: %3:vgpr_32 = V_FMAMK_F16 killed %0.sub0, 1078523331, killed %1, implicit $mode, implicit $exec
+# GFX10-LIS: %3:vgpr_32 = V_FMAMK_F16 %0.sub0, 1078523331, %1, implicit $mode, implicit $exec
# GFX10: S_NOP 0, implicit %2
---
@@ -121,7 +128,8 @@ body: |
# GFX10-LABEL: name: test_fmamk_imm_reg_f16
# GFX10: dead %2:vgpr_32 = IMPLICIT_DEF
# GFX10-NOT: V_MOV_B32
-# GFX10: V_FMAMK_F16 killed %0.sub0, 1078523331, killed %1, implicit $mode, implicit $exec
+# GFX10-NOLIS: V_FMAMK_F16 killed %0.sub0, 1078523331, killed %1, implicit $mode, implicit $exec
+# GFX10-LIS: V_FMAMK_F16 %0.sub0, 1078523331, %1, implicit $mode, implicit $exec
---
name: test_fmamk_imm_reg_f16
tracksRegLiveness: true
@@ -143,7 +151,8 @@ body: |
# GFX10-LABEL: name: test_fmaak_f16
# GFX10: %1:vgpr_32 = V_MOV_B32_e32 1078523331, implicit $exec
# GFX10-NOT: V_MOV_B32
-# GFX10: V_FMAAK_F16 killed %0.sub0, %0.sub1, 1078523331, implicit $mode, implicit $exec
+# GFX10-NOLIS: V_FMAAK_F16 killed %0.sub0, %0.sub1, 1078523331, implicit $mode, implicit $exec
+# GFX10-LIS: V_FMAAK_F16 %0.sub0, %0.sub1, 1078523331, implicit $mode, implicit $exec
---
name: test_fmaak_f16
tracksRegLiveness: true
@@ -163,7 +172,8 @@ body: |
# GFX10-LABEL: name: test_fmaak_inline_literal_f16
# GFX10: %1:vgpr_32 = V_MOV_B32_e32 49664, implicit $exec
# GFX10-NOT: V_MOV_B32
-# GFX10: %2:vgpr_32 = V_FMAAK_F16 16384, killed %0, 49664, implicit $mode, implicit $exec
+# GFX10-NOLIS: %2:vgpr_32 = V_FMAAK_F16 16384, killed %0, 49664, implicit $mode, implicit $exec
+# GFX10-LIS: %2:vgpr_32 = V_FMAAK_F16 16384, %0, 49664, implicit $mode, implicit $exec
---
name: test_fmaak_inline_literal_f16
>From 7a564772ca3f85e3ec3299fbcec8d40f2286be21 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault at amd.com>
Date: Fri, 6 Sep 2024 11:16:58 +0400
Subject: [PATCH 8/8] Remove unused index argument
---
llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | 17 +++++++----------
1 file changed, 7 insertions(+), 10 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index de68ff2d02c2aa..c6f28af1e5e731 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -4058,7 +4058,7 @@ MachineInstr *SIInstrInfo::convertToThreeAddress(MachineInstr &MI,
(ST.getConstantBusLimit(Opc) > 1 || !Src0->isReg() ||
!RI.isSGPRReg(MBB.getParent()->getRegInfo(), Src0->getReg()))) {
MachineInstr *DefMI;
- const auto killDef = [&](SlotIndex OldUseIdx) -> void {
+ const auto killDef = [&]() -> void {
MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
// The only user is the instruction which will be killed.
Register DefReg = DefMI->getOperand(0).getReg();
@@ -4107,10 +4107,9 @@ MachineInstr *SIInstrInfo::convertToThreeAddress(MachineInstr &MI,
.addImm(Imm)
.setMIFlags(MI.getFlags());
updateLiveVariables(LV, MI, *MIB);
- SlotIndex NewIdx;
if (LIS)
- NewIdx = LIS->ReplaceMachineInstrInMaps(MI, *MIB);
- killDef(NewIdx);
+ LIS->ReplaceMachineInstrInMaps(MI, *MIB);
+ killDef();
return MIB;
}
}
@@ -4129,10 +4128,9 @@ MachineInstr *SIInstrInfo::convertToThreeAddress(MachineInstr &MI,
.setMIFlags(MI.getFlags());
updateLiveVariables(LV, MI, *MIB);
- SlotIndex NewIdx;
if (LIS)
- NewIdx = LIS->ReplaceMachineInstrInMaps(MI, *MIB);
- killDef(NewIdx);
+ LIS->ReplaceMachineInstrInMaps(MI, *MIB);
+ killDef();
return MIB;
}
}
@@ -4153,11 +4151,10 @@ MachineInstr *SIInstrInfo::convertToThreeAddress(MachineInstr &MI,
.setMIFlags(MI.getFlags());
updateLiveVariables(LV, MI, *MIB);
- SlotIndex NewIdx;
if (LIS)
- NewIdx = LIS->ReplaceMachineInstrInMaps(MI, *MIB);
+ LIS->ReplaceMachineInstrInMaps(MI, *MIB);
if (DefMI)
- killDef(NewIdx);
+ killDef();
return MIB;
}
}
More information about the llvm-commits
mailing list