[llvm] 3b17cb1 - [AMDGPU] Kill def when folding immediate in two-addr pass

Stanislav Mekhanoshin via llvm-commits llvm-commits at lists.llvm.org
Fri Dec 3 09:55:13 PST 2021


Author: Stanislav Mekhanoshin
Date: 2021-12-03T09:37:49-08:00
New Revision: 3b17cb1506e5d79cfb2b7c0b903395c3f0a1a310

URL: https://github.com/llvm/llvm-project/commit/3b17cb1506e5d79cfb2b7c0b903395c3f0a1a310
DIFF: https://github.com/llvm/llvm-project/commit/3b17cb1506e5d79cfb2b7c0b903395c3f0a1a310.diff

LOG: [AMDGPU] Kill def when folding immediate in two-addr pass

Two-address pass works right before RA and if an immediate
was folded into an instruction there is nothing to remove
the dead def. We end up with something like:

	v_mov_b32_e32 v14, 0xc1700000
	v_mov_b32_e32 v14, 0x41200000
	v_fmaak_f32 v51, s67, v19, 0xc1700000
	v_fmaak_f32 v38, v51, v19, 0x4120000

The patch kills the dead move instruction right in the folding.

Differential Revision: https://reviews.llvm.org/D114999

Added: 
    

Modified: 
    llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
    llvm/test/CodeGen/AMDGPU/mul24-pass-ordering.ll
    llvm/test/CodeGen/AMDGPU/twoaddr-fma.mir

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index 92f5322b8ad24..3d8474a1422a8 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -3106,23 +3106,26 @@ bool SIInstrInfo::areMemAccessesTriviallyDisjoint(const MachineInstr &MIa,
 }
 
 static bool getFoldableImm(Register Reg, const MachineRegisterInfo &MRI,
-                           int64_t &Imm) {
+                           int64_t &Imm, MachineInstr **DefMI = nullptr) {
   if (Reg.isPhysical())
     return false;
   auto *Def = MRI.getUniqueVRegDef(Reg);
   if (Def && SIInstrInfo::isFoldableCopy(*Def) && Def->getOperand(1).isImm()) {
     Imm = Def->getOperand(1).getImm();
+    if (DefMI)
+      *DefMI = Def;
     return true;
   }
   return false;
 }
 
-static bool getFoldableImm(const MachineOperand *MO, int64_t &Imm) {
+static bool getFoldableImm(const MachineOperand *MO, int64_t &Imm,
+                           MachineInstr **DefMI = nullptr) {
   if (!MO->isReg())
     return false;
   const MachineFunction *MF = MO->getParent()->getParent()->getParent();
   const MachineRegisterInfo &MRI = MF->getRegInfo();
-  return getFoldableImm(MO->getReg(), MRI, Imm);
+  return getFoldableImm(MO->getReg(), MRI, Imm, DefMI);
 }
 
 static void updateLiveVariables(LiveVariables *LV, MachineInstr &MI,
@@ -3195,8 +3198,20 @@ MachineInstr *SIInstrInfo::convertToThreeAddress(MachineInstr &MI,
       // If we have an SGPR input, we will violate the constant bus restriction.
       (ST.getConstantBusLimit(Opc) > 1 || !Src0->isReg() ||
        !RI.isSGPRReg(MBB.getParent()->getRegInfo(), Src0->getReg()))) {
+    MachineInstr *DefMI;
+    const auto killDef = [&DefMI, &MBB, this]() -> void {
+      const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
+      // The only user is the instruction which will be killed.
+      if (!MRI.hasOneNonDBGUse(DefMI->getOperand(0).getReg()))
+        return;
+      // We cannot just remove the DefMI here, calling pass will crash.
+      DefMI->setDesc(get(AMDGPU::IMPLICIT_DEF));
+      for (unsigned I = DefMI->getNumOperands() - 1; I != 0; --I)
+        DefMI->RemoveOperand(I);
+    };
+
     int64_t Imm;
-    if (getFoldableImm(Src2, Imm)) {
+    if (getFoldableImm(Src2, Imm, &DefMI)) {
       unsigned NewOpc =
           IsFMA ? (IsF16 ? AMDGPU::V_FMAAK_F16 : AMDGPU::V_FMAAK_F32)
                 : (IsF16 ? AMDGPU::V_MADAK_F16 : AMDGPU::V_MADAK_F32);
@@ -3209,13 +3224,14 @@ MachineInstr *SIInstrInfo::convertToThreeAddress(MachineInstr &MI,
         updateLiveVariables(LV, MI, *MIB);
         if (LIS)
           LIS->ReplaceMachineInstrInMaps(MI, *MIB);
+        killDef();
         return MIB;
       }
     }
     unsigned NewOpc = IsFMA
                           ? (IsF16 ? AMDGPU::V_FMAMK_F16 : AMDGPU::V_FMAMK_F32)
                           : (IsF16 ? AMDGPU::V_MADMK_F16 : AMDGPU::V_MADMK_F32);
-    if (getFoldableImm(Src1, Imm)) {
+    if (getFoldableImm(Src1, Imm, &DefMI)) {
       if (pseudoToMCOpcode(NewOpc) != -1) {
         MIB = BuildMI(MBB, MI, MI.getDebugLoc(), get(NewOpc))
                   .add(*Dst)
@@ -3225,10 +3241,11 @@ MachineInstr *SIInstrInfo::convertToThreeAddress(MachineInstr &MI,
         updateLiveVariables(LV, MI, *MIB);
         if (LIS)
           LIS->ReplaceMachineInstrInMaps(MI, *MIB);
+        killDef();
         return MIB;
       }
     }
-    if (getFoldableImm(Src0, Imm)) {
+    if (getFoldableImm(Src0, Imm, &DefMI)) {
       if (pseudoToMCOpcode(NewOpc) != -1 &&
           isOperandLegal(
               MI, AMDGPU::getNamedOperandIdx(NewOpc, AMDGPU::OpName::src0),
@@ -3241,6 +3258,7 @@ MachineInstr *SIInstrInfo::convertToThreeAddress(MachineInstr &MI,
         updateLiveVariables(LV, MI, *MIB);
         if (LIS)
           LIS->ReplaceMachineInstrInMaps(MI, *MIB);
+        killDef();
         return MIB;
       }
     }

diff  --git a/llvm/test/CodeGen/AMDGPU/mul24-pass-ordering.ll b/llvm/test/CodeGen/AMDGPU/mul24-pass-ordering.ll
index 375a32a7ab257..708b7d926a886 100644
--- a/llvm/test/CodeGen/AMDGPU/mul24-pass-ordering.ll
+++ b/llvm/test/CodeGen/AMDGPU/mul24-pass-ordering.ll
@@ -68,8 +68,8 @@ define void @lsr_order_mul24_1(i32 %arg, i32 %arg1, i32 %arg2, float addrspace(3
 ; GFX9-NEXT:    v_lshlrev_b32_e32 v8, 2, v2
 ; GFX9-NEXT:    v_add_u32_e32 v9, v17, v12
 ; GFX9-NEXT:    s_mov_b64 s[10:11], 0
-; GFX9-NEXT:    v_mov_b32_e32 v3, 0x3727c5ac
 ; GFX9-NEXT:    v_mov_b32_e32 v4, 0
+; GFX9-NEXT:    ; implicit-def: $vgpr3
 ; GFX9-NEXT:  .LBB1_2: ; %bb23
 ; GFX9-NEXT:    ; =>This Inner Loop Header: Depth=1
 ; GFX9-NEXT:    v_cvt_f32_u32_e32 v3, v0

diff  --git a/llvm/test/CodeGen/AMDGPU/twoaddr-fma.mir b/llvm/test/CodeGen/AMDGPU/twoaddr-fma.mir
index 343864c4cd678..49de2263ccc4b 100644
--- a/llvm/test/CodeGen/AMDGPU/twoaddr-fma.mir
+++ b/llvm/test/CodeGen/AMDGPU/twoaddr-fma.mir
@@ -1,6 +1,8 @@
 # RUN: llc -march=amdgcn -mcpu=gfx1010 %s -run-pass twoaddressinstruction -verify-machineinstrs -o - | FileCheck -check-prefix=GCN %s
 
 # GCN-LABEL: name: test_fmamk_reg_imm_f32
+# GCN: %2:vgpr_32 = IMPLICIT_DEF
+# GCN-NOT: V_MOV_B32
 # GCN: V_FMAMK_F32 killed %0.sub0, 1078523331, killed %1, implicit $mode, implicit $exec
 ---
 name:            test_fmamk_reg_imm_f32
@@ -20,6 +22,8 @@ body:             |
 ...
 
 # GCN-LABEL: name: test_fmamk_imm_reg_f32
+# GCN: %2:vgpr_32 = IMPLICIT_DEF
+# GCN-NOT: V_MOV_B32
 # GCN: V_FMAMK_F32 killed %0.sub0, 1078523331, killed %1, implicit $mode, implicit $exec
 ---
 name:            test_fmamk_imm_reg_f32
@@ -39,6 +43,8 @@ body:             |
 ...
 
 # GCN-LABEL: name: test_fmaak_f32
+# GCN: %1:vgpr_32 = IMPLICIT_DEF
+# GCN-NOT: V_MOV_B32
 # GCN: V_FMAAK_F32 killed %0.sub0, %0.sub1, 1078523331, implicit $mode, implicit $exec
 ---
 name:            test_fmaak_f32
@@ -56,6 +62,8 @@ body:             |
 ...
 
 # GCN-LABEL: name: test_fmamk_reg_imm_f16
+# GCN: %2:vgpr_32 = IMPLICIT_DEF
+# GCN-NOT: V_MOV_B32
 # GCN: V_FMAMK_F16 killed %0.sub0, 1078523331, killed %1, implicit $mode, implicit $exec
 ---
 name:            test_fmamk_reg_imm_f16
@@ -75,6 +83,8 @@ body:             |
 ...
 
 # GCN-LABEL: name: test_fmamk_imm_reg_f16
+# GCN: %2:vgpr_32 = IMPLICIT_DEF
+# GCN-NOT: V_MOV_B32
 # GCN: V_FMAMK_F16 killed %0.sub0, 1078523331, killed %1, implicit $mode, implicit $exec
 ---
 name:            test_fmamk_imm_reg_f16
@@ -94,6 +104,8 @@ body:             |
 ...
 
 # GCN-LABEL: name: test_fmaak_f16
+# GCN: %1:vgpr_32 = IMPLICIT_DEF
+# GCN-NOT: V_MOV_B32
 # GCN: V_FMAAK_F16 killed %0.sub0, %0.sub1, 1078523331, implicit $mode, implicit $exec
 ---
 name:            test_fmaak_f16
@@ -110,6 +122,8 @@ body:             |
 ...
 
 # GCN-LABEL: name: test_fmaak_sgpr_src0_f32
+# GCN: %1:vgpr_32 = IMPLICIT_DEF
+# GCN-NOT: V_MOV_B32
 # GCN: %2:vgpr_32 = V_FMAMK_F32 killed %0, 1078523331, %3:vgpr_32, implicit $mode, implicit $exec
 
 ---
@@ -129,6 +143,8 @@ body:             |
 ...
 
 # GCN-LABEL: name: test_fmaak_inlineimm_src0_f32
+# GCN: %0:vgpr_32 = IMPLICIT_DEF
+# GCN-NOT: V_MOV_B32
 # GCN: %1:vgpr_32 = V_FMAMK_F32 1073741824, 1078523331, %2:vgpr_32, implicit $mode, implicit $exec
 
 ---
@@ -183,6 +199,8 @@ body:             |
 ...
 
 # GCN-LABEL: name: test_fmaak_inline_literal_f16
+# GCN: %1:vgpr_32 = IMPLICIT_DEF
+# GCN-NOT: V_MOV_B32
 # GCN: %2:vgpr_32 = V_FMAAK_F16 16384, killed %0, 49664, implicit $mode, implicit $exec
 
 ---
@@ -201,3 +219,47 @@ body:             |
 
 ...
 
+# GCN-LABEL: name: test_fmamk_reg_imm_f32_2_folds
+# GCN: %2:vgpr_32 = IMPLICIT_DEF
+# GCN-NOT: V_MOV_B32
+# GCN: V_FMAMK_F32 %0.sub0, 1078523331, %1, implicit $mode, implicit $exec
+# GCN: V_FMAMK_F32 killed %0.sub0, 1078523331, killed %1, implicit $mode, implicit $exec
+---
+name:            test_fmamk_reg_imm_f32_2_folds
+registers:
+  - { id: 0, class: vreg_64 }
+  - { id: 1, class: vgpr_32 }
+  - { id: 2, class: vgpr_32 }
+  - { id: 3, class: vgpr_32 }
+  - { id: 4, class: vgpr_32 }
+body:             |
+  bb.0:
+
+    %0 = IMPLICIT_DEF
+    %1 = COPY %0.sub1
+    %2 = V_MOV_B32_e32 1078523331, implicit $exec
+    %3 = V_FMAC_F32_e32 %0.sub0, %2, %1, implicit $mode, implicit $exec
+    %4 = V_FMAC_F32_e32 killed %0.sub0, %2, killed %1, implicit $mode, implicit $exec
+
+...
+
+# GCN-LABEL: name: test_fmamk_reg_imm_f32_used_imm
+# GCN: %2:vgpr_32 = V_MOV_B32_e32 1078523331, implicit $exec
+# GCN: V_FMAMK_F32 killed %0.sub0, 1078523331, killed %1, implicit $mode, implicit $exec
+---
+name:            test_fmamk_reg_imm_f32_used_imm
+registers:
+  - { id: 0, class: vreg_64 }
+  - { id: 1, class: vgpr_32 }
+  - { id: 2, class: vgpr_32 }
+  - { id: 3, class: vgpr_32 }
+body:             |
+  bb.0:
+
+    %0 = IMPLICIT_DEF
+    %1 = COPY %0.sub1
+    %2 = V_MOV_B32_e32 1078523331, implicit $exec
+    %3 = V_FMAC_F32_e32 killed %0.sub0, %2, killed %1, implicit $mode, implicit $exec
+    S_NOP 0, implicit %2
+
+...


        


More information about the llvm-commits mailing list