[PATCH] D110743: [AMDGPU] Allow rematerialization of SOP with virtual registers

Stanislav Mekhanoshin via Phabricator via llvm-commits llvm-commits at lists.llvm.org
Wed Oct 20 11:29:25 PDT 2021


rampitec updated this revision to Diff 381041.
rampitec marked 3 inline comments as done.
rampitec added a comment.

Fixed typos in comments.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D110743/new/

https://reviews.llvm.org/D110743

Files:
  llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
  llvm/test/CodeGen/AMDGPU/remat-sop.mir


Index: llvm/test/CodeGen/AMDGPU/remat-sop.mir
===================================================================
--- llvm/test/CodeGen/AMDGPU/remat-sop.mir
+++ llvm/test/CodeGen/AMDGPU/remat-sop.mir
@@ -51,6 +51,66 @@
     S_NOP 0, implicit %2
     S_ENDPGM 0
 ...
+# The liverange of %0 covers a point of rematerialization, source value is
+# available.
+---
+name:            test_remat_s_mov_b32_vreg_src_long_lr
+tracksRegLiveness: true
+machineFunctionInfo:
+  stackPtrOffsetReg:  $sgpr32
+body:             |
+  bb.0:
+    ; GCN-LABEL: name: test_remat_s_mov_b32_vreg_src_long_lr
+    ; GCN: renamable $sgpr0 = IMPLICIT_DEF
+    ; GCN: renamable $sgpr1 = S_MOV_B32 renamable $sgpr0
+    ; GCN: S_NOP 0, implicit killed renamable $sgpr1
+    ; GCN: renamable $sgpr1 = S_MOV_B32 renamable $sgpr0
+    ; GCN: S_NOP 0, implicit killed renamable $sgpr1
+    ; GCN: renamable $sgpr1 = S_MOV_B32 renamable $sgpr0
+    ; GCN: S_NOP 0, implicit killed renamable $sgpr1
+    ; GCN: S_NOP 0, implicit killed renamable $sgpr0
+    ; GCN: S_ENDPGM 0
+    %0:sreg_32 = IMPLICIT_DEF
+    %1:sreg_32 = S_MOV_B32 %0:sreg_32
+    %2:sreg_32 = S_MOV_B32 %0:sreg_32
+    %3:sreg_32 = S_MOV_B32 %0:sreg_32
+    S_NOP 0, implicit %1
+    S_NOP 0, implicit %2
+    S_NOP 0, implicit %3
+    S_NOP 0, implicit %0
+    S_ENDPGM 0
+...
+# The liverange of %0 does not cover a point of rematerialization, source value is
+# unavailable and we do not want to artificially extend the liverange.
+---
+name:            test_no_remat_s_mov_b32_vreg_src_short_lr
+tracksRegLiveness: true
+machineFunctionInfo:
+  stackPtrOffsetReg:  $sgpr32
+body:             |
+  bb.0:
+    ; GCN-LABEL: name: test_no_remat_s_mov_b32_vreg_src_short_lr
+    ; GCN: renamable $sgpr0 = IMPLICIT_DEF
+    ; GCN: renamable $sgpr1 = S_MOV_B32 renamable $sgpr0
+    ; GCN: SI_SPILL_S32_SAVE killed renamable $sgpr1, %stack.1, implicit $exec, implicit $sgpr32 :: (store (s32) into %stack.1, addrspace 5)
+    ; GCN: renamable $sgpr1 = S_MOV_B32 renamable $sgpr0
+    ; GCN: SI_SPILL_S32_SAVE killed renamable $sgpr1, %stack.0, implicit $exec, implicit $sgpr32 :: (store (s32) into %stack.0, addrspace 5)
+    ; GCN: renamable $sgpr0 = S_MOV_B32 killed renamable $sgpr0
+    ; GCN: renamable $sgpr1 = SI_SPILL_S32_RESTORE %stack.1, implicit $exec, implicit $sgpr32 :: (load (s32) from %stack.1, addrspace 5)
+    ; GCN: S_NOP 0, implicit killed renamable $sgpr1
+    ; GCN: renamable $sgpr1 = SI_SPILL_S32_RESTORE %stack.0, implicit $exec, implicit $sgpr32 :: (load (s32) from %stack.0, addrspace 5)
+    ; GCN: S_NOP 0, implicit killed renamable $sgpr1
+    ; GCN: S_NOP 0, implicit killed renamable $sgpr0
+    ; GCN: S_ENDPGM 0
+    %0:sreg_32 = IMPLICIT_DEF
+    %1:sreg_32 = S_MOV_B32 %0:sreg_32
+    %2:sreg_32 = S_MOV_B32 %0:sreg_32
+    %3:sreg_32 = S_MOV_B32 %0:sreg_32
+    S_NOP 0, implicit %1
+    S_NOP 0, implicit %2
+    S_NOP 0, implicit %3
+    S_ENDPGM 0
+...
 ---
 name:            test_remat_s_mov_b64
 tracksRegLiveness: true
Index: llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
===================================================================
--- llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -109,7 +109,7 @@
 
 bool SIInstrInfo::isReallyTriviallyReMaterializable(const MachineInstr &MI,
                                                     AAResults *AA) const {
-  if (isVOP1(MI) || isVOP2(MI) || isVOP3(MI) || isSDWA(MI)) {
+  if (isVOP1(MI) || isVOP2(MI) || isVOP3(MI) || isSDWA(MI) || isSALU(MI)) {
     // Normally VALU use of exec would block the rematerialization, but that
     // is OK in this case to have an implicit exec read as all VALU do.
     // We really want all of the generic logic for this except for this.
@@ -117,6 +117,10 @@
     // Another potential implicit use is mode register. The core logic of
     // the RA will not attempt rematerialization if mode is set anywhere
     // in the function, otherwise it is safe since mode is not changed.
+
+    // There is difference to generic method which does not allow
+    // rematerialization if there are virtual register uses. We allow this,
+    // therefore this method includes SOP instructions as well.
     return !MI.hasImplicitDef() &&
            MI.getNumImplicitOperands() == MI.getDesc().getNumImplicitUses() &&
            !MI.mayRaiseFPException();


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D110743.381041.patch
Type: text/x-patch
Size: 4347 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20211020/5e6a0503/attachment-0001.bin>


More information about the llvm-commits mailing list