[llvm-branch-commits] [llvm] 2f49cd1 - [AMDGPU] Do not release VGPRs at -O0

Tobias Hieta via llvm-branch-commits llvm-branch-commits at lists.llvm.org
Mon Aug 14 01:36:53 PDT 2023


Author: Jay Foad
Date: 2023-08-14T10:33:27+02:00
New Revision: 2f49cd1a773da7d2227fa7db952f50137a812300

URL: https://github.com/llvm/llvm-project/commit/2f49cd1a773da7d2227fa7db952f50137a812300
DIFF: https://github.com/llvm/llvm-project/commit/2f49cd1a773da7d2227fa7db952f50137a812300.diff

LOG: [AMDGPU] Do not release VGPRs at -O0

This was an oversight when the GFX11 early release VGPRs optimization
was reimplemented in D153279.

Sending the DEALLOC_VGPRS message is a performance optimization so there
is no need to do it at -O0. In addition it makes some kinds of post
mortem debugging hard or impossible, since VGPR values are no longer
available to inspect at the s_endpgm instruction.

Differential Revision: https://reviews.llvm.org/D157599

(cherry picked from commit 3091bdb86d55e404866823b64d21fd87c247d893)

Added: 
    llvm/test/CodeGen/AMDGPU/release-vgprs.mir

Modified: 
    llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
index 4b0283b27a6f5c..a74b917f82bfd0 100644
--- a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
@@ -382,6 +382,8 @@ class SIInsertWaitcnts : public MachineFunctionPass {
   bool ForceEmitZeroWaitcnts;
   bool ForceEmitWaitcnt[NUM_INST_CNTS];
 
+  bool OptNone;
+
   // S_ENDPGM instructions before which we should insert a DEALLOC_VGPRS
   // message.
   DenseSet<MachineInstr *> ReleaseVGPRInsts;
@@ -1040,7 +1042,7 @@ bool SIInsertWaitcnts::generateWaitcntInstBefore(MachineInstr &MI,
   // do this if there are no outstanding scratch stores.
   else if (MI.getOpcode() == AMDGPU::S_ENDPGM ||
            MI.getOpcode() == AMDGPU::S_ENDPGM_SAVED) {
-    if (ST->getGeneration() >= AMDGPUSubtarget::GFX11 &&
+    if (ST->getGeneration() >= AMDGPUSubtarget::GFX11 && !OptNone &&
         ScoreBrackets.getScoreRange(VS_CNT) != 0 &&
         !ScoreBrackets.hasPendingEvent(SCRATCH_WRITE_ACCESS))
       ReleaseVGPRInsts.insert(&MI);
@@ -1822,6 +1824,9 @@ bool SIInsertWaitcnts::runOnMachineFunction(MachineFunction &MF) {
   for (auto T : inst_counter_types())
     ForceEmitWaitcnt[T] = false;
 
+  OptNone = MF.getFunction().hasOptNone() ||
+            MF.getTarget().getOptLevel() == CodeGenOpt::None;
+
   HardwareLimits Limits = {};
   Limits.VmcntMax = AMDGPU::getVmcntBitMask(IV);
   Limits.ExpcntMax = AMDGPU::getExpcntBitMask(IV);

diff  --git a/llvm/test/CodeGen/AMDGPU/release-vgprs.mir b/llvm/test/CodeGen/AMDGPU/release-vgprs.mir
new file mode 100644
index 00000000000000..3a879e818af797
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/release-vgprs.mir
@@ -0,0 +1,558 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -O2 -march=amdgcn -mcpu=gfx1100 -run-pass=si-insert-waitcnts -verify-machineinstrs -o - %s | FileCheck %s -check-prefixes=CHECK,OPT
+# RUN: llc -O0 -march=amdgcn -mcpu=gfx1100 -run-pass=si-insert-waitcnts -verify-machineinstrs -o - %s | FileCheck %s -check-prefixes=CHECK,NOOPT
+
+--- |
+  define amdgpu_ps void @tbuffer_store1() { ret void }
+  define amdgpu_ps void @tbuffer_store2() { ret void }
+  define amdgpu_ps void @flat_store() { ret void }
+  define amdgpu_ps void @global_store() { ret void }
+  define amdgpu_ps void @buffer_store_format() { ret void }
+  define amdgpu_ps void @ds_write_b32() { ret void }
+  define amdgpu_ps void @global_store_dword() { ret void }
+  define amdgpu_ps void @multiple_basic_blocks1() { ret void }
+  define amdgpu_ps void @multiple_basic_blocks2() { ret void }
+  define amdgpu_ps void @multiple_basic_blocks3() { ret void }
+  define amdgpu_ps void @recursive_loop() { ret void }
+  define amdgpu_ps void @recursive_loop_vmem() { ret void }
+  define amdgpu_ps void @image_store() { ret void }
+  define amdgpu_ps void @scratch_store() { ret void }
+  define amdgpu_ps void @buffer_atomic() { ret void }
+  define amdgpu_ps void @flat_atomic() { ret void }
+  define amdgpu_ps void @global_atomic() { ret void }
+  define amdgpu_ps void @image_atomic() { ret void }
+  define amdgpu_ps void @global_store_optnone() noinline optnone { ret void }
+...
+
+---
+name:            tbuffer_store1
+body:             |
+  bb.0:
+    ; OPT-LABEL: name: tbuffer_store1
+    ; OPT: S_WAITCNT 0
+    ; OPT-NEXT: TBUFFER_STORE_FORMAT_XYZW_OFFSET_exact killed renamable $vgpr0_vgpr1_vgpr2_vgpr3, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, killed renamable $sgpr4, 42, 117, 0, 0, implicit $exec
+    ; OPT-NEXT: S_NOP 0
+    ; OPT-NEXT: S_SENDMSG 3, implicit $exec, implicit $m0
+    ; OPT-NEXT: S_ENDPGM 0
+    ;
+    ; NOOPT-LABEL: name: tbuffer_store1
+    ; NOOPT: S_WAITCNT 0
+    ; NOOPT-NEXT: TBUFFER_STORE_FORMAT_XYZW_OFFSET_exact killed renamable $vgpr0_vgpr1_vgpr2_vgpr3, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, killed renamable $sgpr4, 42, 117, 0, 0, implicit $exec
+    ; NOOPT-NEXT: S_ENDPGM 0
+    TBUFFER_STORE_FORMAT_XYZW_OFFSET_exact killed renamable $vgpr0_vgpr1_vgpr2_vgpr3, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, killed renamable $sgpr4, 42, 117, 0, 0, implicit $exec
+    S_ENDPGM 0
+...
+
+---
+name:            tbuffer_store2
+body:             |
+  bb.0:
+    ; OPT-LABEL: name: tbuffer_store2
+    ; OPT: S_WAITCNT 0
+    ; OPT-NEXT: TBUFFER_STORE_FORMAT_XYZW_OFFEN_exact killed renamable $vgpr0_vgpr1_vgpr2_vgpr3, killed renamable $vgpr4, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 115, 0, 0, implicit $exec :: (dereferenceable store (s128), align 1, addrspace 7)
+    ; OPT-NEXT: S_NOP 0
+    ; OPT-NEXT: S_SENDMSG 3, implicit $exec, implicit $m0
+    ; OPT-NEXT: S_ENDPGM 0
+    ;
+    ; NOOPT-LABEL: name: tbuffer_store2
+    ; NOOPT: S_WAITCNT 0
+    ; NOOPT-NEXT: TBUFFER_STORE_FORMAT_XYZW_OFFEN_exact killed renamable $vgpr0_vgpr1_vgpr2_vgpr3, killed renamable $vgpr4, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 115, 0, 0, implicit $exec :: (dereferenceable store (s128), align 1, addrspace 7)
+    ; NOOPT-NEXT: S_ENDPGM 0
+    TBUFFER_STORE_FORMAT_XYZW_OFFEN_exact killed renamable $vgpr0_vgpr1_vgpr2_vgpr3, killed renamable $vgpr4, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 115, 0, 0, implicit $exec :: (dereferenceable store (s128), align 1, addrspace 7)
+    S_ENDPGM 0
+...
+
+---
+name:            flat_store
+body:             |
+  bb.0:
+    ; CHECK-LABEL: name: flat_store
+    ; CHECK: S_WAITCNT 0
+    ; CHECK-NEXT: FLAT_STORE_DWORDX4 $vgpr49_vgpr50, $vgpr26_vgpr27_vgpr28_vgpr29, 0, 0, implicit $exec, implicit $flat_scr
+    ; CHECK-NEXT: S_ENDPGM 0
+    FLAT_STORE_DWORDX4 $vgpr49_vgpr50, $vgpr26_vgpr27_vgpr28_vgpr29, 0, 0, implicit $exec, implicit $flat_scr
+    S_ENDPGM 0
+...
+
+---
+name:            global_store
+body:             |
+  bb.0:
+    ; OPT-LABEL: name: global_store
+    ; OPT: S_WAITCNT 0
+    ; OPT-NEXT: GLOBAL_STORE_DWORD undef renamable $vgpr0_vgpr1, killed renamable $vgpr1, 0, 4, implicit $exec
+    ; OPT-NEXT: S_WAITCNT_VSCNT undef $sgpr_null, 0
+    ; OPT-NEXT: S_NOP 0
+    ; OPT-NEXT: S_SENDMSG 3, implicit $exec, implicit $m0
+    ; OPT-NEXT: S_ENDPGM 0
+    ;
+    ; NOOPT-LABEL: name: global_store
+    ; NOOPT: S_WAITCNT 0
+    ; NOOPT-NEXT: GLOBAL_STORE_DWORD undef renamable $vgpr0_vgpr1, killed renamable $vgpr1, 0, 4, implicit $exec
+    ; NOOPT-NEXT: S_WAITCNT_VSCNT undef $sgpr_null, 0
+    ; NOOPT-NEXT: S_ENDPGM 0
+    GLOBAL_STORE_DWORD undef renamable $vgpr0_vgpr1, killed renamable $vgpr1, 0, 4, implicit $exec
+    S_WAITCNT_VSCNT undef $sgpr_null, 0
+    S_ENDPGM 0
+...
+
+---
+name:            buffer_store_format
+body:             |
+  bb.0:
+    ; OPT-LABEL: name: buffer_store_format
+    ; OPT: S_WAITCNT 0
+    ; OPT-NEXT: BUFFER_STORE_FORMAT_D16_X_OFFEN_exact killed renamable $vgpr0, killed renamable $vgpr1, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, killed renamable $sgpr4, 0, 0, 0, implicit $exec
+    ; OPT-NEXT: S_NOP 0
+    ; OPT-NEXT: S_SENDMSG 3, implicit $exec, implicit $m0
+    ; OPT-NEXT: S_ENDPGM 0
+    ;
+    ; NOOPT-LABEL: name: buffer_store_format
+    ; NOOPT: S_WAITCNT 0
+    ; NOOPT-NEXT: BUFFER_STORE_FORMAT_D16_X_OFFEN_exact killed renamable $vgpr0, killed renamable $vgpr1, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, killed renamable $sgpr4, 0, 0, 0, implicit $exec
+    ; NOOPT-NEXT: S_ENDPGM 0
+    BUFFER_STORE_FORMAT_D16_X_OFFEN_exact killed renamable $vgpr0, killed renamable $vgpr1, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, killed renamable $sgpr4, 0, 0, 0, implicit $exec
+    S_ENDPGM 0
+...
+
+---
+name:            ds_write_b32
+body:             |
+  bb.0:
+    ; CHECK-LABEL: name: ds_write_b32
+    ; CHECK: renamable $vgpr0 = IMPLICIT_DEF
+    ; CHECK-NEXT: renamable $vgpr1 = IMPLICIT_DEF
+    ; CHECK-NEXT: S_WAITCNT 0
+    ; CHECK-NEXT: DS_WRITE_B32_gfx9 killed renamable $vgpr0, killed renamable $vgpr1, 12, 0, implicit $exec
+    ; CHECK-NEXT: S_ENDPGM 0
+    renamable $vgpr0 = IMPLICIT_DEF
+    renamable $vgpr1 = IMPLICIT_DEF
+    DS_WRITE_B32_gfx9 killed renamable $vgpr0, killed renamable $vgpr1, 12, 0, implicit $exec
+    S_ENDPGM 0
+
+...
+---
+name:            global_store_dword
+body:             |
+  bb.0:
+    liveins: $vgpr0, $sgpr0_sgpr1
+
+    ; OPT-LABEL: name: global_store_dword
+    ; OPT: liveins: $vgpr0, $sgpr0_sgpr1
+    ; OPT-NEXT: {{  $}}
+    ; OPT-NEXT: S_WAITCNT 0
+    ; OPT-NEXT: renamable $vgpr0 = V_MAD_I32_I24_e64 killed $vgpr1, killed $vgpr0, killed $sgpr2, 0, implicit $exec
+    ; OPT-NEXT: GLOBAL_STORE_DWORD_SADDR killed renamable $vgpr2, killed renamable $vgpr0, killed renamable $sgpr0_sgpr1, 0, 0, implicit $exec
+    ; OPT-NEXT: S_NOP 0
+    ; OPT-NEXT: S_SENDMSG 3, implicit $exec, implicit $m0
+    ; OPT-NEXT: S_ENDPGM 0
+    ;
+    ; NOOPT-LABEL: name: global_store_dword
+    ; NOOPT: liveins: $vgpr0, $sgpr0_sgpr1
+    ; NOOPT-NEXT: {{  $}}
+    ; NOOPT-NEXT: S_WAITCNT 0
+    ; NOOPT-NEXT: renamable $vgpr0 = V_MAD_I32_I24_e64 killed $vgpr1, killed $vgpr0, killed $sgpr2, 0, implicit $exec
+    ; NOOPT-NEXT: GLOBAL_STORE_DWORD_SADDR killed renamable $vgpr2, killed renamable $vgpr0, killed renamable $sgpr0_sgpr1, 0, 0, implicit $exec
+    ; NOOPT-NEXT: S_ENDPGM 0
+    renamable $vgpr0 = V_MAD_I32_I24_e64 killed $vgpr1, killed $vgpr0, killed $sgpr2, 0, implicit $exec
+    GLOBAL_STORE_DWORD_SADDR killed renamable $vgpr2, killed renamable $vgpr0, killed renamable $sgpr0_sgpr1, 0, 0, implicit $exec
+    S_ENDPGM 0
+...
+
+---
+name:            multiple_basic_blocks1
+body:             |
+  ; CHECK-LABEL: name: multiple_basic_blocks1
+  ; CHECK: bb.0:
+  ; CHECK-NEXT:   successors: %bb.1(0x80000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   S_WAITCNT 0
+  ; CHECK-NEXT:   renamable $vgpr0 = BUFFER_LOAD_FORMAT_X_IDXEN killed renamable $vgpr0, renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec
+  ; CHECK-NEXT:   S_BRANCH %bb.1
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.1:
+  ; CHECK-NEXT:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   S_WAITCNT 1015
+  ; CHECK-NEXT:   $vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr2, implicit $exec
+  ; CHECK-NEXT:   S_CMP_LG_U32 killed renamable $sgpr3, renamable $sgpr4, implicit-def $scc
+  ; CHECK-NEXT:   S_CBRANCH_SCC1 %bb.1, implicit killed $scc
+  ; CHECK-NEXT:   S_BRANCH %bb.2
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.2:
+  ; CHECK-NEXT:   S_ENDPGM 0
+  bb.0:
+    successors: %bb.1
+
+    renamable $vgpr0 = BUFFER_LOAD_FORMAT_X_IDXEN killed renamable $vgpr0, renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec
+    S_BRANCH %bb.1
+
+  bb.1:
+    successors: %bb.1, %bb.2
+
+    $vgpr1 = V_ADD_U32_e32 renamable $vgpr0, renamable $vgpr2, implicit $exec
+    S_CMP_LG_U32 killed renamable $sgpr3, renamable $sgpr4, implicit-def $scc
+    S_CBRANCH_SCC1 %bb.1, implicit killed $scc
+    S_BRANCH %bb.2
+
+  bb.2:
+    S_ENDPGM 0
+
+...
+
+
+# One block has a VMEM store as the last instruction, we should release the VGPRS
+...
+---
+name:            multiple_basic_blocks2
+body:             |
+  ; OPT-LABEL: name: multiple_basic_blocks2
+  ; OPT: bb.0:
+  ; OPT-NEXT:   successors: %bb.2(0x80000000)
+  ; OPT-NEXT: {{  $}}
+  ; OPT-NEXT:   S_WAITCNT 0
+  ; OPT-NEXT:   TBUFFER_STORE_FORMAT_X_OFFSET_exact killed renamable $vgpr0, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 125, 0, 0, implicit $exec
+  ; OPT-NEXT:   $vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr2, implicit $exec
+  ; OPT-NEXT:   S_BRANCH %bb.2
+  ; OPT-NEXT: {{  $}}
+  ; OPT-NEXT: bb.1:
+  ; OPT-NEXT:   successors: %bb.2(0x80000000)
+  ; OPT-NEXT: {{  $}}
+  ; OPT-NEXT:   $vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr2, implicit $exec
+  ; OPT-NEXT:   TBUFFER_STORE_FORMAT_X_OFFSET_exact killed renamable $vgpr0, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 125, 0, 0, implicit $exec
+  ; OPT-NEXT:   S_BRANCH %bb.2
+  ; OPT-NEXT: {{  $}}
+  ; OPT-NEXT: bb.2:
+  ; OPT-NEXT:   S_NOP 0
+  ; OPT-NEXT:   S_SENDMSG 3, implicit $exec, implicit $m0
+  ; OPT-NEXT:   S_ENDPGM 0
+  ;
+  ; NOOPT-LABEL: name: multiple_basic_blocks2
+  ; NOOPT: bb.0:
+  ; NOOPT-NEXT:   successors: %bb.2(0x80000000)
+  ; NOOPT-NEXT: {{  $}}
+  ; NOOPT-NEXT:   S_WAITCNT 0
+  ; NOOPT-NEXT:   TBUFFER_STORE_FORMAT_X_OFFSET_exact killed renamable $vgpr0, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 125, 0, 0, implicit $exec
+  ; NOOPT-NEXT:   $vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr2, implicit $exec
+  ; NOOPT-NEXT:   S_BRANCH %bb.2
+  ; NOOPT-NEXT: {{  $}}
+  ; NOOPT-NEXT: bb.1:
+  ; NOOPT-NEXT:   successors: %bb.2(0x80000000)
+  ; NOOPT-NEXT: {{  $}}
+  ; NOOPT-NEXT:   $vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr2, implicit $exec
+  ; NOOPT-NEXT:   TBUFFER_STORE_FORMAT_X_OFFSET_exact killed renamable $vgpr0, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 125, 0, 0, implicit $exec
+  ; NOOPT-NEXT:   S_BRANCH %bb.2
+  ; NOOPT-NEXT: {{  $}}
+  ; NOOPT-NEXT: bb.2:
+  ; NOOPT-NEXT:   S_ENDPGM 0
+  bb.0:
+    successors: %bb.2
+
+    TBUFFER_STORE_FORMAT_X_OFFSET_exact killed renamable $vgpr0, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 125, 0, 0, implicit $exec
+    $vgpr1 = V_ADD_U32_e32 renamable $vgpr0, renamable $vgpr2, implicit $exec
+    S_BRANCH %bb.2
+
+  bb.1:
+    successors: %bb.2
+
+    $vgpr1 = V_ADD_U32_e32 renamable $vgpr0, renamable $vgpr2, implicit $exec
+    TBUFFER_STORE_FORMAT_X_OFFSET_exact killed renamable $vgpr0, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 125, 0, 0, implicit $exec
+    S_BRANCH %bb.2
+
+  bb.2:
+    S_ENDPGM 0
+...
+
+
+# One parent block has a VMEM store, release VGPRs
+---
+name:            multiple_basic_blocks3
+body:             |
+  ; OPT-LABEL: name: multiple_basic_blocks3
+  ; OPT: bb.0:
+  ; OPT-NEXT:   successors: %bb.2(0x80000000)
+  ; OPT-NEXT: {{  $}}
+  ; OPT-NEXT:   S_WAITCNT 0
+  ; OPT-NEXT:   $vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr2, implicit $exec
+  ; OPT-NEXT:   TBUFFER_STORE_FORMAT_X_OFFSET_exact killed renamable $vgpr0, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 125, 0, 0, implicit $exec
+  ; OPT-NEXT:   S_BRANCH %bb.2
+  ; OPT-NEXT: {{  $}}
+  ; OPT-NEXT: bb.1:
+  ; OPT-NEXT:   successors: %bb.2(0x80000000)
+  ; OPT-NEXT: {{  $}}
+  ; OPT-NEXT:   $vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr2, implicit $exec
+  ; OPT-NEXT:   S_BRANCH %bb.2
+  ; OPT-NEXT: {{  $}}
+  ; OPT-NEXT: bb.2:
+  ; OPT-NEXT:   successors: %bb.4(0x80000000)
+  ; OPT-NEXT: {{  $}}
+  ; OPT-NEXT:   S_BRANCH %bb.4
+  ; OPT-NEXT: {{  $}}
+  ; OPT-NEXT: bb.3:
+  ; OPT-NEXT:   successors: %bb.4(0x80000000)
+  ; OPT-NEXT: {{  $}}
+  ; OPT-NEXT:   $vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr2, implicit $exec
+  ; OPT-NEXT:   S_BRANCH %bb.4
+  ; OPT-NEXT: {{  $}}
+  ; OPT-NEXT: bb.4:
+  ; OPT-NEXT:   S_NOP 0
+  ; OPT-NEXT:   S_SENDMSG 3, implicit $exec, implicit $m0
+  ; OPT-NEXT:   S_ENDPGM 0
+  ;
+  ; NOOPT-LABEL: name: multiple_basic_blocks3
+  ; NOOPT: bb.0:
+  ; NOOPT-NEXT:   successors: %bb.2(0x80000000)
+  ; NOOPT-NEXT: {{  $}}
+  ; NOOPT-NEXT:   S_WAITCNT 0
+  ; NOOPT-NEXT:   $vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr2, implicit $exec
+  ; NOOPT-NEXT:   TBUFFER_STORE_FORMAT_X_OFFSET_exact killed renamable $vgpr0, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 125, 0, 0, implicit $exec
+  ; NOOPT-NEXT:   S_BRANCH %bb.2
+  ; NOOPT-NEXT: {{  $}}
+  ; NOOPT-NEXT: bb.1:
+  ; NOOPT-NEXT:   successors: %bb.2(0x80000000)
+  ; NOOPT-NEXT: {{  $}}
+  ; NOOPT-NEXT:   $vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr2, implicit $exec
+  ; NOOPT-NEXT:   S_BRANCH %bb.2
+  ; NOOPT-NEXT: {{  $}}
+  ; NOOPT-NEXT: bb.2:
+  ; NOOPT-NEXT:   successors: %bb.4(0x80000000)
+  ; NOOPT-NEXT: {{  $}}
+  ; NOOPT-NEXT:   S_BRANCH %bb.4
+  ; NOOPT-NEXT: {{  $}}
+  ; NOOPT-NEXT: bb.3:
+  ; NOOPT-NEXT:   successors: %bb.4(0x80000000)
+  ; NOOPT-NEXT: {{  $}}
+  ; NOOPT-NEXT:   $vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr2, implicit $exec
+  ; NOOPT-NEXT:   S_BRANCH %bb.4
+  ; NOOPT-NEXT: {{  $}}
+  ; NOOPT-NEXT: bb.4:
+  ; NOOPT-NEXT:   S_ENDPGM 0
+  bb.0:
+    successors: %bb.2
+
+    $vgpr1 = V_ADD_U32_e32 renamable $vgpr0, renamable $vgpr2, implicit $exec
+    TBUFFER_STORE_FORMAT_X_OFFSET_exact killed renamable $vgpr0, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 125, 0, 0, implicit $exec
+    S_BRANCH %bb.2
+
+  bb.1:
+    successors: %bb.2
+
+    $vgpr1 = V_ADD_U32_e32 renamable $vgpr0, renamable $vgpr2, implicit $exec
+    S_BRANCH %bb.2
+
+  bb.2:
+    successors: %bb.4
+
+    S_BRANCH %bb.4
+
+  bb.3:
+    successors: %bb.4
+
+    $vgpr1 = V_ADD_U32_e32 renamable $vgpr0, renamable $vgpr2, implicit $exec
+    S_BRANCH %bb.4
+
+  bb.4:
+    S_ENDPGM 0
+...
+
+---
+name:            recursive_loop
+body:             |
+  ; CHECK-LABEL: name: recursive_loop
+  ; CHECK: bb.0:
+  ; CHECK-NEXT:   successors: %bb.1(0x80000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   S_WAITCNT 0
+  ; CHECK-NEXT:   renamable $vgpr0 = BUFFER_LOAD_FORMAT_X_IDXEN killed renamable $vgpr0, renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec
+  ; CHECK-NEXT:   S_BRANCH %bb.1
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.1:
+  ; CHECK-NEXT:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   S_CMP_LG_U32 killed renamable $sgpr3, renamable $sgpr4, implicit-def $scc
+  ; CHECK-NEXT:   S_CBRANCH_SCC1 %bb.1, implicit killed $scc
+  ; CHECK-NEXT:   S_BRANCH %bb.2
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.2:
+  ; CHECK-NEXT:   S_ENDPGM 0
+  bb.0:
+    successors: %bb.1
+
+    renamable $vgpr0 = BUFFER_LOAD_FORMAT_X_IDXEN killed renamable $vgpr0, renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec
+    S_BRANCH %bb.1
+
+  bb.1:
+    successors: %bb.1, %bb.2
+
+    S_CMP_LG_U32 killed renamable $sgpr3, renamable $sgpr4, implicit-def $scc
+    S_CBRANCH_SCC1 %bb.1, implicit killed $scc
+    S_BRANCH %bb.2
+
+  bb.2:
+    S_ENDPGM 0
+...
+
+---
+name:            recursive_loop_vmem
+body:             |
+  ; OPT-LABEL: name: recursive_loop_vmem
+  ; OPT: bb.0:
+  ; OPT-NEXT:   successors: %bb.1(0x80000000)
+  ; OPT-NEXT: {{  $}}
+  ; OPT-NEXT:   S_WAITCNT 0
+  ; OPT-NEXT:   renamable $vgpr0 = BUFFER_LOAD_FORMAT_X_IDXEN killed renamable $vgpr0, renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec
+  ; OPT-NEXT:   S_BRANCH %bb.1
+  ; OPT-NEXT: {{  $}}
+  ; OPT-NEXT: bb.1:
+  ; OPT-NEXT:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
+  ; OPT-NEXT: {{  $}}
+  ; OPT-NEXT:   S_WAITCNT 1015
+  ; OPT-NEXT:   TBUFFER_STORE_FORMAT_XYZW_OFFEN_exact killed renamable $vgpr0_vgpr1_vgpr2_vgpr3, killed renamable $vgpr4, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 115, 0, 0, implicit $exec
+  ; OPT-NEXT:   S_CMP_LG_U32 killed renamable $sgpr3, renamable $sgpr4, implicit-def $scc
+  ; OPT-NEXT:   S_CBRANCH_SCC1 %bb.1, implicit killed $scc
+  ; OPT-NEXT:   S_BRANCH %bb.2
+  ; OPT-NEXT: {{  $}}
+  ; OPT-NEXT: bb.2:
+  ; OPT-NEXT:   S_NOP 0
+  ; OPT-NEXT:   S_SENDMSG 3, implicit $exec, implicit $m0
+  ; OPT-NEXT:   S_ENDPGM 0
+  ;
+  ; NOOPT-LABEL: name: recursive_loop_vmem
+  ; NOOPT: bb.0:
+  ; NOOPT-NEXT:   successors: %bb.1(0x80000000)
+  ; NOOPT-NEXT: {{  $}}
+  ; NOOPT-NEXT:   S_WAITCNT 0
+  ; NOOPT-NEXT:   renamable $vgpr0 = BUFFER_LOAD_FORMAT_X_IDXEN killed renamable $vgpr0, renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec
+  ; NOOPT-NEXT:   S_BRANCH %bb.1
+  ; NOOPT-NEXT: {{  $}}
+  ; NOOPT-NEXT: bb.1:
+  ; NOOPT-NEXT:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
+  ; NOOPT-NEXT: {{  $}}
+  ; NOOPT-NEXT:   S_WAITCNT 1015
+  ; NOOPT-NEXT:   TBUFFER_STORE_FORMAT_XYZW_OFFEN_exact killed renamable $vgpr0_vgpr1_vgpr2_vgpr3, killed renamable $vgpr4, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 115, 0, 0, implicit $exec
+  ; NOOPT-NEXT:   S_CMP_LG_U32 killed renamable $sgpr3, renamable $sgpr4, implicit-def $scc
+  ; NOOPT-NEXT:   S_CBRANCH_SCC1 %bb.1, implicit killed $scc
+  ; NOOPT-NEXT:   S_BRANCH %bb.2
+  ; NOOPT-NEXT: {{  $}}
+  ; NOOPT-NEXT: bb.2:
+  ; NOOPT-NEXT:   S_ENDPGM 0
+  bb.0:
+    successors: %bb.1
+
+    renamable $vgpr0 = BUFFER_LOAD_FORMAT_X_IDXEN killed renamable $vgpr0, renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec
+    S_BRANCH %bb.1
+
+  bb.1:
+    successors: %bb.1, %bb.2
+
+    TBUFFER_STORE_FORMAT_XYZW_OFFEN_exact killed renamable $vgpr0_vgpr1_vgpr2_vgpr3, killed renamable $vgpr4, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 115, 0, 0, implicit $exec
+    S_CMP_LG_U32 killed renamable $sgpr3, renamable $sgpr4, implicit-def $scc
+    S_CBRANCH_SCC1 %bb.1, implicit killed $scc
+    S_BRANCH %bb.2
+
+  bb.2:
+    S_ENDPGM 0
+...
+
+---
+name:            image_store
+body:             |
+  bb.0:
+    ; OPT-LABEL: name: image_store
+    ; OPT: S_WAITCNT 0
+    ; OPT-NEXT: IMAGE_STORE_V2_V1_gfx11 killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, 12, 0, 1, 0, 0, -1, 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x s32>), addrspace 7)
+    ; OPT-NEXT: S_NOP 0
+    ; OPT-NEXT: S_SENDMSG 3, implicit $exec, implicit $m0
+    ; OPT-NEXT: S_ENDPGM 0
+    ;
+    ; NOOPT-LABEL: name: image_store
+    ; NOOPT: S_WAITCNT 0
+    ; NOOPT-NEXT: IMAGE_STORE_V2_V1_gfx11 killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, 12, 0, 1, 0, 0, -1, 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x s32>), addrspace 7)
+    ; NOOPT-NEXT: S_ENDPGM 0
+  IMAGE_STORE_V2_V1_gfx11 killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, 12, 0, 1, 0, 0, -1, 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x s32>), addrspace 7)
+  S_ENDPGM 0
+...
+
+---
+name:            scratch_store
+body:             |
+  bb.0:
+    ; CHECK-LABEL: name: scratch_store
+    ; CHECK: S_WAITCNT 0
+    ; CHECK-NEXT: renamable $sgpr0 = S_AND_B32 killed renamable $sgpr0, -16, implicit-def dead $scc
+    ; CHECK-NEXT: SCRATCH_STORE_DWORD_SADDR killed renamable $vgpr0, killed renamable $sgpr0, 0, 0, implicit $exec, implicit $flat_scr
+    ; CHECK-NEXT: S_ENDPGM 0
+    renamable $sgpr0 = S_AND_B32 killed renamable $sgpr0, -16, implicit-def dead $scc
+    SCRATCH_STORE_DWORD_SADDR killed renamable $vgpr0, killed renamable $sgpr0, 0, 0, implicit $exec, implicit $flat_scr
+    S_ENDPGM 0
+...
+
+---
+name:            buffer_atomic
+body:             |
+  bb.0:
+    ; OPT-LABEL: name: buffer_atomic
+    ; OPT: S_WAITCNT 0
+    ; OPT-NEXT: BUFFER_ATOMIC_ADD_F32_OFFEN killed renamable $vgpr0, killed renamable $vgpr2, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s64), align 1, addrspace 7)
+    ; OPT-NEXT: S_NOP 0
+    ; OPT-NEXT: S_SENDMSG 3, implicit $exec, implicit $m0
+    ; OPT-NEXT: S_ENDPGM 0
+    ;
+    ; NOOPT-LABEL: name: buffer_atomic
+    ; NOOPT: S_WAITCNT 0
+    ; NOOPT-NEXT: BUFFER_ATOMIC_ADD_F32_OFFEN killed renamable $vgpr0, killed renamable $vgpr2, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s64), align 1, addrspace 7)
+    ; NOOPT-NEXT: S_ENDPGM 0
+    BUFFER_ATOMIC_ADD_F32_OFFEN killed renamable $vgpr0, killed renamable $vgpr2, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s64), align 1, addrspace 7)
+    S_ENDPGM 0
+...
+
+---
+name:            flat_atomic
+body:             |
+  bb.0:
+    ; CHECK-LABEL: name: flat_atomic
+    ; CHECK: S_WAITCNT 0
+    ; CHECK-NEXT: renamable $vgpr0_vgpr1 = FLAT_ATOMIC_DEC_X2_RTN killed renamable $vgpr0_vgpr1, killed renamable $vgpr2_vgpr3, 40, 1, implicit $exec, implicit $flat_scr
+    ; CHECK-NEXT: S_ENDPGM 0
+    renamable $vgpr0_vgpr1 = FLAT_ATOMIC_DEC_X2_RTN killed renamable $vgpr0_vgpr1, killed renamable $vgpr2_vgpr3, 40, 1, implicit $exec, implicit $flat_scr
+    S_ENDPGM 0
+...
+
+
+---
+name:            global_atomic
+body:             |
+  bb.0:
+    ; CHECK-LABEL: name: global_atomic
+    ; CHECK: S_WAITCNT 0
+    ; CHECK-NEXT: renamable $vgpr0_vgpr1 = GLOBAL_ATOMIC_INC_X2_SADDR_RTN killed renamable $vgpr0, killed renamable $vgpr1_vgpr2, killed renamable $sgpr0_sgpr1, 40, 1, implicit $exec
+    ; CHECK-NEXT: S_ENDPGM 0
+    renamable $vgpr0_vgpr1 = GLOBAL_ATOMIC_INC_X2_SADDR_RTN killed renamable $vgpr0, killed renamable $vgpr1_vgpr2, killed renamable $sgpr0_sgpr1, 40, 1, implicit $exec
+    S_ENDPGM 0
+...
+
+---
+name:            image_atomic
+body:             |
+  bb.0:
+    ; CHECK-LABEL: name: image_atomic
+    ; CHECK: S_WAITCNT 0
+    ; CHECK-NEXT: renamable $vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_ATOMIC_CMPSWAP_V2_V1_gfx11 killed renamable $vgpr0_vgpr1_vgpr2_vgpr3, killed renamable $vgpr4, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, 15, 0, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s64), addrspace 7)
+    ; CHECK-NEXT: S_ENDPGM 0
+    renamable $vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_ATOMIC_CMPSWAP_V2_V1_gfx11 killed renamable $vgpr0_vgpr1_vgpr2_vgpr3, killed renamable $vgpr4, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, 15, 0, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s64), addrspace 7)
+    S_ENDPGM 0
+...
+
+---
+name:            global_store_optnone
+body:             |
+  bb.0:
+    ; CHECK-LABEL: name: global_store_optnone
+    ; CHECK: S_WAITCNT 0
+    ; CHECK-NEXT: GLOBAL_STORE_DWORD undef renamable $vgpr0_vgpr1, killed renamable $vgpr1, 0, 4, implicit $exec
+    ; CHECK-NEXT: S_WAITCNT_VSCNT undef $sgpr_null, 0
+    ; CHECK-NEXT: S_ENDPGM 0
+    GLOBAL_STORE_DWORD undef renamable $vgpr0_vgpr1, killed renamable $vgpr1, 0, 4, implicit $exec
+    S_WAITCNT_VSCNT undef $sgpr_null, 0
+    S_ENDPGM 0
+...


        


More information about the llvm-branch-commits mailing list