[llvm] 6185835 - [AMDGPU] Allow rematerialization of SOP with virtual registers

Stanislav Mekhanoshin via llvm-commits llvm-commits at lists.llvm.org
Wed Oct 20 11:46:59 PDT 2021


Author: Stanislav Mekhanoshin
Date: 2021-10-20T11:46:50-07:00
New Revision: 618583565687f5a494066fc902a977f6057fc93e

URL: https://github.com/llvm/llvm-project/commit/618583565687f5a494066fc902a977f6057fc93e
DIFF: https://github.com/llvm/llvm-project/commit/618583565687f5a494066fc902a977f6057fc93e.diff

LOG: [AMDGPU] Allow rematerialization of SOP with virtual registers

D106408 was doing this for all targets although it was
reverted due to couple performance regressions on some targets.
The difference for AMDGPU is the ability to rematerialize SOP
instructions with virtual register uses like we already do for VOP.

Differential Revision: https://reviews.llvm.org/D110743

Added: 
    

Modified: 
    llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
    llvm/test/CodeGen/AMDGPU/remat-sop.mir

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index d5bd71fd941be..00fd102240d2e 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -109,7 +109,7 @@ static bool nodesHaveSameOperandValue(SDNode *N0, SDNode* N1, unsigned OpName) {
 
 bool SIInstrInfo::isReallyTriviallyReMaterializable(const MachineInstr &MI,
                                                     AAResults *AA) const {
-  if (isVOP1(MI) || isVOP2(MI) || isVOP3(MI) || isSDWA(MI)) {
+  if (isVOP1(MI) || isVOP2(MI) || isVOP3(MI) || isSDWA(MI) || isSALU(MI)) {
     // Normally VALU use of exec would block the rematerialization, but that
     // is OK in this case to have an implicit exec read as all VALU do.
     // We really want all of the generic logic for this except for this.
@@ -117,6 +117,10 @@ bool SIInstrInfo::isReallyTriviallyReMaterializable(const MachineInstr &MI,
     // Another potential implicit use is mode register. The core logic of
     // the RA will not attempt rematerialization if mode is set anywhere
     // in the function, otherwise it is safe since mode is not changed.
+
+    // There is 
diff erence to generic method which does not allow
+    // rematerialization if there are virtual register uses. We allow this,
+    // therefore this method includes SOP instructions as well.
     return !MI.hasImplicitDef() &&
            MI.getNumImplicitOperands() == MI.getDesc().getNumImplicitUses() &&
            !MI.mayRaiseFPException();

diff  --git a/llvm/test/CodeGen/AMDGPU/remat-sop.mir b/llvm/test/CodeGen/AMDGPU/remat-sop.mir
index ed799bfca0283..39008fefa45b9 100644
--- a/llvm/test/CodeGen/AMDGPU/remat-sop.mir
+++ b/llvm/test/CodeGen/AMDGPU/remat-sop.mir
@@ -51,6 +51,66 @@ body:             |
     S_NOP 0, implicit %2
     S_ENDPGM 0
 ...
+# The liverange of %0 covers a point of rematerialization, source value is
+# available.
+---
+name:            test_remat_s_mov_b32_vreg_src_long_lr
+tracksRegLiveness: true
+machineFunctionInfo:
+  stackPtrOffsetReg:  $sgpr32
+body:             |
+  bb.0:
+    ; GCN-LABEL: name: test_remat_s_mov_b32_vreg_src_long_lr
+    ; GCN: renamable $sgpr0 = IMPLICIT_DEF
+    ; GCN: renamable $sgpr1 = S_MOV_B32 renamable $sgpr0
+    ; GCN: S_NOP 0, implicit killed renamable $sgpr1
+    ; GCN: renamable $sgpr1 = S_MOV_B32 renamable $sgpr0
+    ; GCN: S_NOP 0, implicit killed renamable $sgpr1
+    ; GCN: renamable $sgpr1 = S_MOV_B32 renamable $sgpr0
+    ; GCN: S_NOP 0, implicit killed renamable $sgpr1
+    ; GCN: S_NOP 0, implicit killed renamable $sgpr0
+    ; GCN: S_ENDPGM 0
+    %0:sreg_32 = IMPLICIT_DEF
+    %1:sreg_32 = S_MOV_B32 %0:sreg_32
+    %2:sreg_32 = S_MOV_B32 %0:sreg_32
+    %3:sreg_32 = S_MOV_B32 %0:sreg_32
+    S_NOP 0, implicit %1
+    S_NOP 0, implicit %2
+    S_NOP 0, implicit %3
+    S_NOP 0, implicit %0
+    S_ENDPGM 0
+...
+# The liverange of %0 does not cover a point of rematerialization, source value is
+# unavailable and we do not want to artificially extend the liverange.
+---
+name:            test_no_remat_s_mov_b32_vreg_src_short_lr
+tracksRegLiveness: true
+machineFunctionInfo:
+  stackPtrOffsetReg:  $sgpr32
+body:             |
+  bb.0:
+    ; GCN-LABEL: name: test_no_remat_s_mov_b32_vreg_src_short_lr
+    ; GCN: renamable $sgpr0 = IMPLICIT_DEF
+    ; GCN: renamable $sgpr1 = S_MOV_B32 renamable $sgpr0
+    ; GCN: SI_SPILL_S32_SAVE killed renamable $sgpr1, %stack.1, implicit $exec, implicit $sgpr32 :: (store (s32) into %stack.1, addrspace 5)
+    ; GCN: renamable $sgpr1 = S_MOV_B32 renamable $sgpr0
+    ; GCN: SI_SPILL_S32_SAVE killed renamable $sgpr1, %stack.0, implicit $exec, implicit $sgpr32 :: (store (s32) into %stack.0, addrspace 5)
+    ; GCN: renamable $sgpr0 = S_MOV_B32 killed renamable $sgpr0
+    ; GCN: renamable $sgpr1 = SI_SPILL_S32_RESTORE %stack.1, implicit $exec, implicit $sgpr32 :: (load (s32) from %stack.1, addrspace 5)
+    ; GCN: S_NOP 0, implicit killed renamable $sgpr1
+    ; GCN: renamable $sgpr1 = SI_SPILL_S32_RESTORE %stack.0, implicit $exec, implicit $sgpr32 :: (load (s32) from %stack.0, addrspace 5)
+    ; GCN: S_NOP 0, implicit killed renamable $sgpr1
+    ; GCN: S_NOP 0, implicit killed renamable $sgpr0
+    ; GCN: S_ENDPGM 0
+    %0:sreg_32 = IMPLICIT_DEF
+    %1:sreg_32 = S_MOV_B32 %0:sreg_32
+    %2:sreg_32 = S_MOV_B32 %0:sreg_32
+    %3:sreg_32 = S_MOV_B32 %0:sreg_32
+    S_NOP 0, implicit %1
+    S_NOP 0, implicit %2
+    S_NOP 0, implicit %3
+    S_ENDPGM 0
+...
 ---
 name:            test_remat_s_mov_b64
 tracksRegLiveness: true


        


More information about the llvm-commits mailing list