[PATCH] D110743: [AMDGPU] Allow rematerialization of SOP with virtual registers

Stanislav Mekhanoshin via Phabricator via llvm-commits llvm-commits at lists.llvm.org
Wed Sep 29 11:11:56 PDT 2021


rampitec created this revision.
rampitec added a reviewer: foad.
Herald added subscribers: kerbowa, hiraditya, t-tye, tpr, dstuttard, yaxunl, nhaehnle, jvesely, kzhuravl, arsenm.
rampitec requested review of this revision.
Herald added a subscriber: wdng.
Herald added a project: LLVM.

D106408 <https://reviews.llvm.org/D106408> was doing this for all targets although it was
reverted due to couple performance regressions on some targets.
The difference for AMDGPU is the ability to rematerialize SOP
instructions with virtual register uses like we already do for VOP.


https://reviews.llvm.org/D110743

Files:
  llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
  llvm/test/CodeGen/AMDGPU/remat-sop.mir


Index: llvm/test/CodeGen/AMDGPU/remat-sop.mir
===================================================================
--- llvm/test/CodeGen/AMDGPU/remat-sop.mir
+++ llvm/test/CodeGen/AMDGPU/remat-sop.mir
@@ -51,6 +51,66 @@
     S_NOP 0, implicit %2
     S_ENDPGM 0
 ...
+# The liverange of %0 covers a point of rematerialization, source value is
+# availabe.
+---
+name:            test_remat_s_mov_b32_vreg_src_long_lr
+tracksRegLiveness: true
+machineFunctionInfo:
+  stackPtrOffsetReg:  $sgpr32
+body:             |
+  bb.0:
+    ; GCN-LABEL: name: test_remat_s_mov_b32_vreg_src_long_lr
+    ; GCN: renamable $sgpr0 = IMPLICIT_DEF
+    ; GCN: renamable $sgpr1 = S_MOV_B32 renamable $sgpr0
+    ; GCN: S_NOP 0, implicit killed renamable $sgpr1
+    ; GCN: renamable $sgpr1 = S_MOV_B32 renamable $sgpr0
+    ; GCN: S_NOP 0, implicit killed renamable $sgpr1
+    ; GCN: renamable $sgpr1 = S_MOV_B32 renamable $sgpr0
+    ; GCN: S_NOP 0, implicit killed renamable $sgpr1
+    ; GCN: S_NOP 0, implicit killed renamable $sgpr0
+    ; GCN: S_ENDPGM 0
+    %0:sreg_32 = IMPLICIT_DEF
+    %1:sreg_32 = S_MOV_B32 %0:sreg_32
+    %2:sreg_32 = S_MOV_B32 %0:sreg_32
+    %3:sreg_32 = S_MOV_B32 %0:sreg_32
+    S_NOP 0, implicit %1
+    S_NOP 0, implicit %2
+    S_NOP 0, implicit %3
+    S_NOP 0, implicit %0
+    S_ENDPGM 0
+...
+# The liverange of %0 does not cover a point of rematerialization, source value is
+# unavailabe and we do not want to artificially extend the liverange.
+---
+name:            test_no_remat_s_mov_b32_vreg_src_short_lr
+tracksRegLiveness: true
+machineFunctionInfo:
+  stackPtrOffsetReg:  $sgpr32
+body:             |
+  bb.0:
+    ; GCN-LABEL: name: test_no_remat_s_mov_b32_vreg_src_short_lr
+    ; GCN: renamable $sgpr0 = IMPLICIT_DEF
+    ; GCN: renamable $sgpr1 = S_MOV_B32 renamable $sgpr0
+    ; GCN: SI_SPILL_S32_SAVE killed renamable $sgpr1, %stack.1, implicit $exec, implicit $sgpr32 :: (store (s32) into %stack.1, addrspace 5)
+    ; GCN: renamable $sgpr1 = S_MOV_B32 renamable $sgpr0
+    ; GCN: SI_SPILL_S32_SAVE killed renamable $sgpr1, %stack.0, implicit $exec, implicit $sgpr32 :: (store (s32) into %stack.0, addrspace 5)
+    ; GCN: renamable $sgpr0 = S_MOV_B32 killed renamable $sgpr0
+    ; GCN: renamable $sgpr1 = SI_SPILL_S32_RESTORE %stack.1, implicit $exec, implicit $sgpr32 :: (load (s32) from %stack.1, addrspace 5)
+    ; GCN: S_NOP 0, implicit killed renamable $sgpr1
+    ; GCN: renamable $sgpr1 = SI_SPILL_S32_RESTORE %stack.0, implicit $exec, implicit $sgpr32 :: (load (s32) from %stack.0, addrspace 5)
+    ; GCN: S_NOP 0, implicit killed renamable $sgpr1
+    ; GCN: S_NOP 0, implicit killed renamable $sgpr0
+    ; GCN: S_ENDPGM 0
+    %0:sreg_32 = IMPLICIT_DEF
+    %1:sreg_32 = S_MOV_B32 %0:sreg_32
+    %2:sreg_32 = S_MOV_B32 %0:sreg_32
+    %3:sreg_32 = S_MOV_B32 %0:sreg_32
+    S_NOP 0, implicit %1
+    S_NOP 0, implicit %2
+    S_NOP 0, implicit %3
+    S_ENDPGM 0
+...
 ---
 name:            test_remat_s_mov_b64
 tracksRegLiveness: true
Index: llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
===================================================================
--- llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -109,7 +109,7 @@
 
 bool SIInstrInfo::isReallyTriviallyReMaterializable(const MachineInstr &MI,
                                                     AAResults *AA) const {
-  if (isVOP1(MI) || isVOP2(MI) || isVOP3(MI) || isSDWA(MI)) {
+  if (isVOP1(MI) || isVOP2(MI) || isVOP3(MI) || isSDWA(MI) || isSALU(MI)) {
     // Normally VALU use of exec would block the rematerialization, but that
     // is OK in this case to have an implicit exec read as all VALU do.
     // We really want all of the generic logic for this except for this.
@@ -117,6 +117,10 @@
     // Another potential implicit use is mode register. The core logic of
     // the RA will not attempt rematerialization if mode is set anywhere
     // in the function, otherwise it is safe since mode is not changed.
+
+    // There is difference to generic method which does not allow
+    // rematerialization if there are virtual register uses. We allow this,
+    // therefor this method includes SOP instructions as well.
     return !MI.hasImplicitDef() &&
            MI.getNumImplicitOperands() == MI.getDesc().getNumImplicitUses() &&
            !MI.mayRaiseFPException();


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D110743.375966.patch
Type: text/x-patch
Size: 4344 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20210929/4fbb753a/attachment-0001.bin>


More information about the llvm-commits mailing list