[llvm] 3b17cb1 - [AMDGPU] Kill def when folding immediate in two-addr pass
Stanislav Mekhanoshin via llvm-commits
llvm-commits at lists.llvm.org
Fri Dec 3 09:55:13 PST 2021
Author: Stanislav Mekhanoshin
Date: 2021-12-03T09:37:49-08:00
New Revision: 3b17cb1506e5d79cfb2b7c0b903395c3f0a1a310
URL: https://github.com/llvm/llvm-project/commit/3b17cb1506e5d79cfb2b7c0b903395c3f0a1a310
DIFF: https://github.com/llvm/llvm-project/commit/3b17cb1506e5d79cfb2b7c0b903395c3f0a1a310.diff
LOG: [AMDGPU] Kill def when folding immediate in two-addr pass
Two-address pass works right before RA and if an immediate
was folded into an instruction there is nothing to remove
the dead def. We end up with something like:
v_mov_b32_e32 v14, 0xc1700000
v_mov_b32_e32 v14, 0x41200000
v_fmaak_f32 v51, s67, v19, 0xc1700000
v_fmaak_f32 v38, v51, v19, 0x4120000
The patch kills the dead move instruction right in the folding.
Differential Revision: https://reviews.llvm.org/D114999
Added:
Modified:
llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
llvm/test/CodeGen/AMDGPU/mul24-pass-ordering.ll
llvm/test/CodeGen/AMDGPU/twoaddr-fma.mir
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index 92f5322b8ad24..3d8474a1422a8 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -3106,23 +3106,26 @@ bool SIInstrInfo::areMemAccessesTriviallyDisjoint(const MachineInstr &MIa,
}
static bool getFoldableImm(Register Reg, const MachineRegisterInfo &MRI,
- int64_t &Imm) {
+ int64_t &Imm, MachineInstr **DefMI = nullptr) {
if (Reg.isPhysical())
return false;
auto *Def = MRI.getUniqueVRegDef(Reg);
if (Def && SIInstrInfo::isFoldableCopy(*Def) && Def->getOperand(1).isImm()) {
Imm = Def->getOperand(1).getImm();
+ if (DefMI)
+ *DefMI = Def;
return true;
}
return false;
}
-static bool getFoldableImm(const MachineOperand *MO, int64_t &Imm) {
+static bool getFoldableImm(const MachineOperand *MO, int64_t &Imm,
+ MachineInstr **DefMI = nullptr) {
if (!MO->isReg())
return false;
const MachineFunction *MF = MO->getParent()->getParent()->getParent();
const MachineRegisterInfo &MRI = MF->getRegInfo();
- return getFoldableImm(MO->getReg(), MRI, Imm);
+ return getFoldableImm(MO->getReg(), MRI, Imm, DefMI);
}
static void updateLiveVariables(LiveVariables *LV, MachineInstr &MI,
@@ -3195,8 +3198,20 @@ MachineInstr *SIInstrInfo::convertToThreeAddress(MachineInstr &MI,
// If we have an SGPR input, we will violate the constant bus restriction.
(ST.getConstantBusLimit(Opc) > 1 || !Src0->isReg() ||
!RI.isSGPRReg(MBB.getParent()->getRegInfo(), Src0->getReg()))) {
+ MachineInstr *DefMI;
+ const auto killDef = [&DefMI, &MBB, this]() -> void {
+ const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
+ // The only user is the instruction which will be killed.
+ if (!MRI.hasOneNonDBGUse(DefMI->getOperand(0).getReg()))
+ return;
+ // We cannot just remove the DefMI here, calling pass will crash.
+ DefMI->setDesc(get(AMDGPU::IMPLICIT_DEF));
+ for (unsigned I = DefMI->getNumOperands() - 1; I != 0; --I)
+ DefMI->RemoveOperand(I);
+ };
+
int64_t Imm;
- if (getFoldableImm(Src2, Imm)) {
+ if (getFoldableImm(Src2, Imm, &DefMI)) {
unsigned NewOpc =
IsFMA ? (IsF16 ? AMDGPU::V_FMAAK_F16 : AMDGPU::V_FMAAK_F32)
: (IsF16 ? AMDGPU::V_MADAK_F16 : AMDGPU::V_MADAK_F32);
@@ -3209,13 +3224,14 @@ MachineInstr *SIInstrInfo::convertToThreeAddress(MachineInstr &MI,
updateLiveVariables(LV, MI, *MIB);
if (LIS)
LIS->ReplaceMachineInstrInMaps(MI, *MIB);
+ killDef();
return MIB;
}
}
unsigned NewOpc = IsFMA
? (IsF16 ? AMDGPU::V_FMAMK_F16 : AMDGPU::V_FMAMK_F32)
: (IsF16 ? AMDGPU::V_MADMK_F16 : AMDGPU::V_MADMK_F32);
- if (getFoldableImm(Src1, Imm)) {
+ if (getFoldableImm(Src1, Imm, &DefMI)) {
if (pseudoToMCOpcode(NewOpc) != -1) {
MIB = BuildMI(MBB, MI, MI.getDebugLoc(), get(NewOpc))
.add(*Dst)
@@ -3225,10 +3241,11 @@ MachineInstr *SIInstrInfo::convertToThreeAddress(MachineInstr &MI,
updateLiveVariables(LV, MI, *MIB);
if (LIS)
LIS->ReplaceMachineInstrInMaps(MI, *MIB);
+ killDef();
return MIB;
}
}
- if (getFoldableImm(Src0, Imm)) {
+ if (getFoldableImm(Src0, Imm, &DefMI)) {
if (pseudoToMCOpcode(NewOpc) != -1 &&
isOperandLegal(
MI, AMDGPU::getNamedOperandIdx(NewOpc, AMDGPU::OpName::src0),
@@ -3241,6 +3258,7 @@ MachineInstr *SIInstrInfo::convertToThreeAddress(MachineInstr &MI,
updateLiveVariables(LV, MI, *MIB);
if (LIS)
LIS->ReplaceMachineInstrInMaps(MI, *MIB);
+ killDef();
return MIB;
}
}
diff --git a/llvm/test/CodeGen/AMDGPU/mul24-pass-ordering.ll b/llvm/test/CodeGen/AMDGPU/mul24-pass-ordering.ll
index 375a32a7ab257..708b7d926a886 100644
--- a/llvm/test/CodeGen/AMDGPU/mul24-pass-ordering.ll
+++ b/llvm/test/CodeGen/AMDGPU/mul24-pass-ordering.ll
@@ -68,8 +68,8 @@ define void @lsr_order_mul24_1(i32 %arg, i32 %arg1, i32 %arg2, float addrspace(3
; GFX9-NEXT: v_lshlrev_b32_e32 v8, 2, v2
; GFX9-NEXT: v_add_u32_e32 v9, v17, v12
; GFX9-NEXT: s_mov_b64 s[10:11], 0
-; GFX9-NEXT: v_mov_b32_e32 v3, 0x3727c5ac
; GFX9-NEXT: v_mov_b32_e32 v4, 0
+; GFX9-NEXT: ; implicit-def: $vgpr3
; GFX9-NEXT: .LBB1_2: ; %bb23
; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX9-NEXT: v_cvt_f32_u32_e32 v3, v0
diff --git a/llvm/test/CodeGen/AMDGPU/twoaddr-fma.mir b/llvm/test/CodeGen/AMDGPU/twoaddr-fma.mir
index 343864c4cd678..49de2263ccc4b 100644
--- a/llvm/test/CodeGen/AMDGPU/twoaddr-fma.mir
+++ b/llvm/test/CodeGen/AMDGPU/twoaddr-fma.mir
@@ -1,6 +1,8 @@
# RUN: llc -march=amdgcn -mcpu=gfx1010 %s -run-pass twoaddressinstruction -verify-machineinstrs -o - | FileCheck -check-prefix=GCN %s
# GCN-LABEL: name: test_fmamk_reg_imm_f32
+# GCN: %2:vgpr_32 = IMPLICIT_DEF
+# GCN-NOT: V_MOV_B32
# GCN: V_FMAMK_F32 killed %0.sub0, 1078523331, killed %1, implicit $mode, implicit $exec
---
name: test_fmamk_reg_imm_f32
@@ -20,6 +22,8 @@ body: |
...
# GCN-LABEL: name: test_fmamk_imm_reg_f32
+# GCN: %2:vgpr_32 = IMPLICIT_DEF
+# GCN-NOT: V_MOV_B32
# GCN: V_FMAMK_F32 killed %0.sub0, 1078523331, killed %1, implicit $mode, implicit $exec
---
name: test_fmamk_imm_reg_f32
@@ -39,6 +43,8 @@ body: |
...
# GCN-LABEL: name: test_fmaak_f32
+# GCN: %1:vgpr_32 = IMPLICIT_DEF
+# GCN-NOT: V_MOV_B32
# GCN: V_FMAAK_F32 killed %0.sub0, %0.sub1, 1078523331, implicit $mode, implicit $exec
---
name: test_fmaak_f32
@@ -56,6 +62,8 @@ body: |
...
# GCN-LABEL: name: test_fmamk_reg_imm_f16
+# GCN: %2:vgpr_32 = IMPLICIT_DEF
+# GCN-NOT: V_MOV_B32
# GCN: V_FMAMK_F16 killed %0.sub0, 1078523331, killed %1, implicit $mode, implicit $exec
---
name: test_fmamk_reg_imm_f16
@@ -75,6 +83,8 @@ body: |
...
# GCN-LABEL: name: test_fmamk_imm_reg_f16
+# GCN: %2:vgpr_32 = IMPLICIT_DEF
+# GCN-NOT: V_MOV_B32
# GCN: V_FMAMK_F16 killed %0.sub0, 1078523331, killed %1, implicit $mode, implicit $exec
---
name: test_fmamk_imm_reg_f16
@@ -94,6 +104,8 @@ body: |
...
# GCN-LABEL: name: test_fmaak_f16
+# GCN: %1:vgpr_32 = IMPLICIT_DEF
+# GCN-NOT: V_MOV_B32
# GCN: V_FMAAK_F16 killed %0.sub0, %0.sub1, 1078523331, implicit $mode, implicit $exec
---
name: test_fmaak_f16
@@ -110,6 +122,8 @@ body: |
...
# GCN-LABEL: name: test_fmaak_sgpr_src0_f32
+# GCN: %1:vgpr_32 = IMPLICIT_DEF
+# GCN-NOT: V_MOV_B32
# GCN: %2:vgpr_32 = V_FMAMK_F32 killed %0, 1078523331, %3:vgpr_32, implicit $mode, implicit $exec
---
@@ -129,6 +143,8 @@ body: |
...
# GCN-LABEL: name: test_fmaak_inlineimm_src0_f32
+# GCN: %0:vgpr_32 = IMPLICIT_DEF
+# GCN-NOT: V_MOV_B32
# GCN: %1:vgpr_32 = V_FMAMK_F32 1073741824, 1078523331, %2:vgpr_32, implicit $mode, implicit $exec
---
@@ -183,6 +199,8 @@ body: |
...
# GCN-LABEL: name: test_fmaak_inline_literal_f16
+# GCN: %1:vgpr_32 = IMPLICIT_DEF
+# GCN-NOT: V_MOV_B32
# GCN: %2:vgpr_32 = V_FMAAK_F16 16384, killed %0, 49664, implicit $mode, implicit $exec
---
@@ -201,3 +219,47 @@ body: |
...
+# GCN-LABEL: name: test_fmamk_reg_imm_f32_2_folds
+# GCN: %2:vgpr_32 = IMPLICIT_DEF
+# GCN-NOT: V_MOV_B32
+# GCN: V_FMAMK_F32 %0.sub0, 1078523331, %1, implicit $mode, implicit $exec
+# GCN: V_FMAMK_F32 killed %0.sub0, 1078523331, killed %1, implicit $mode, implicit $exec
+---
+name: test_fmamk_reg_imm_f32_2_folds
+registers:
+ - { id: 0, class: vreg_64 }
+ - { id: 1, class: vgpr_32 }
+ - { id: 2, class: vgpr_32 }
+ - { id: 3, class: vgpr_32 }
+ - { id: 4, class: vgpr_32 }
+body: |
+ bb.0:
+
+ %0 = IMPLICIT_DEF
+ %1 = COPY %0.sub1
+ %2 = V_MOV_B32_e32 1078523331, implicit $exec
+ %3 = V_FMAC_F32_e32 %0.sub0, %2, %1, implicit $mode, implicit $exec
+ %4 = V_FMAC_F32_e32 killed %0.sub0, %2, killed %1, implicit $mode, implicit $exec
+
+...
+
+# GCN-LABEL: name: test_fmamk_reg_imm_f32_used_imm
+# GCN: %2:vgpr_32 = V_MOV_B32_e32 1078523331, implicit $exec
+# GCN: V_FMAMK_F32 killed %0.sub0, 1078523331, killed %1, implicit $mode, implicit $exec
+---
+name: test_fmamk_reg_imm_f32_used_imm
+registers:
+ - { id: 0, class: vreg_64 }
+ - { id: 1, class: vgpr_32 }
+ - { id: 2, class: vgpr_32 }
+ - { id: 3, class: vgpr_32 }
+body: |
+ bb.0:
+
+ %0 = IMPLICIT_DEF
+ %1 = COPY %0.sub1
+ %2 = V_MOV_B32_e32 1078523331, implicit $exec
+ %3 = V_FMAC_F32_e32 killed %0.sub0, %2, killed %1, implicit $mode, implicit $exec
+ S_NOP 0, implicit %2
+
+...
More information about the llvm-commits
mailing list