[llvm] r362754 - AMDGPU: Insert skip branches over return blocks

Matt Arsenault via llvm-commits llvm-commits at lists.llvm.org
Thu Jun 6 15:51:51 PDT 2019


Author: arsenm
Date: Thu Jun  6 15:51:51 2019
New Revision: 362754

URL: http://llvm.org/viewvc/llvm-project?rev=362754&view=rev
Log:
AMDGPU: Insert skip branches over return blocks

SIInsertSkips really doesn't understand the control flow, and makes
very stupid assumptions about the block layout. This was able to get
away with not skipping return blocks, since usually after
structurization there is only one placed at the end of the
function. Tail duplication can break this assumption.

Added:
    llvm/trunk/test/CodeGen/AMDGPU/skip-branch-taildup-ret.mir
Modified:
    llvm/trunk/lib/Target/AMDGPU/SIInsertSkips.cpp
    llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.cpp

Modified: llvm/trunk/lib/Target/AMDGPU/SIInsertSkips.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIInsertSkips.cpp?rev=362754&r1=362753&r2=362754&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIInsertSkips.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIInsertSkips.cpp Thu Jun  6 15:51:51 2019
@@ -109,9 +109,6 @@ static bool opcodeEmitsNoInsts(unsigned
 
 bool SIInsertSkips::shouldSkip(const MachineBasicBlock &From,
                                const MachineBasicBlock &To) const {
-  if (From.succ_empty())
-    return false;
-
   unsigned NumInstr = 0;
   const MachineFunction *MF = From.getParent();
 

Modified: llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.cpp?rev=362754&r1=362753&r2=362754&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.cpp Thu Jun  6 15:51:51 2019
@@ -2479,6 +2479,10 @@ bool SIInstrInfo::hasUnwantedEffectsWhen
   if (MI.mayStore() && isSMRD(MI))
     return true; // scalar store or atomic
 
+  // This will terminate the function when other lanes may need to continue.
+  if (MI.isReturn())
+    return true;
+
   // These instructions cause shader I/O that may cause hardware lockups
   // when executed with an empty EXEC mask.
   //

Added: llvm/trunk/test/CodeGen/AMDGPU/skip-branch-taildup-ret.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/skip-branch-taildup-ret.mir?rev=362754&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/skip-branch-taildup-ret.mir (added)
+++ llvm/trunk/test/CodeGen/AMDGPU/skip-branch-taildup-ret.mir Thu Jun  6 15:51:51 2019
@@ -0,0 +1,194 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -verify-machineinstrs -run-pass=si-insert-skips -amdgpu-skip-threshold=1000000 -o -  %s | FileCheck %s
+
+---
+name: skip_branch_taildup_endpgm
+machineFunctionInfo:
+  isEntryFunction: true
+body:             |
+  ; CHECK-LABEL: name: skip_branch_taildup_endpgm
+  ; CHECK: bb.0:
+  ; CHECK:   successors: %bb.3(0x40000000), %bb.1(0x40000000)
+  ; CHECK:   renamable $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM renamable $sgpr4_sgpr5, 4, 0, 0 :: (dereferenceable invariant load 8, align 16, addrspace 4)
+  ; CHECK:   renamable $vgpr0 = V_LSHLREV_B32_e32 2, killed $vgpr0, implicit $exec
+  ; CHECK:   S_WAITCNT 127
+  ; CHECK:   $vgpr1 = V_MOV_B32_e32 $sgpr1, implicit $exec, implicit $exec
+  ; CHECK:   renamable $vgpr0 = V_ADD_I32_e32 $sgpr0, killed $vgpr0, implicit-def $vcc, implicit $exec
+  ; CHECK:   renamable $vgpr1 = V_ADDC_U32_e32 0, killed $vgpr1, implicit-def $vcc, implicit killed $vcc, implicit $exec
+  ; CHECK:   renamable $vgpr0 = FLAT_LOAD_DWORD renamable $vgpr0_vgpr1, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4, addrspace 1)
+  ; CHECK:   renamable $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed renamable $sgpr4_sgpr5, 0, 0, 0 :: (dereferenceable invariant load 8, align 16, addrspace 4)
+  ; CHECK:   S_WAITCNT 112
+  ; CHECK:   V_CMP_NE_U32_e32 0, killed $vgpr0, implicit-def $vcc, implicit $exec
+  ; CHECK:   $sgpr2_sgpr3 = S_AND_SAVEEXEC_B64 $vcc, implicit-def $exec, implicit-def $scc, implicit $exec
+  ; CHECK:   renamable $sgpr2_sgpr3 = S_XOR_B64 $exec, killed renamable $sgpr2_sgpr3, implicit-def dead $scc
+  ; CHECK:   SI_MASK_BRANCH %bb.1, implicit $exec
+  ; CHECK:   S_CBRANCH_EXECZ %bb.1, implicit $exec
+  ; CHECK:   S_BRANCH %bb.3
+  ; CHECK: bb.1:
+  ; CHECK:   successors: %bb.4(0x40000000), %bb.2(0x40000000)
+  ; CHECK:   renamable $sgpr2_sgpr3 = S_OR_SAVEEXEC_B64 killed renamable $sgpr2_sgpr3, implicit-def $exec, implicit-def $scc, implicit $exec
+  ; CHECK:   $exec = S_XOR_B64 $exec, renamable $sgpr2_sgpr3, implicit-def $scc
+  ; CHECK:   SI_MASK_BRANCH %bb.2, implicit $exec
+  ; CHECK:   S_CBRANCH_EXECZ %bb.2, implicit $exec
+  ; CHECK:   S_BRANCH %bb.4
+  ; CHECK: bb.2:
+  ; CHECK:   $exec = S_OR_B64 $exec, killed renamable $sgpr2_sgpr3, implicit-def $scc
+  ; CHECK:   renamable $vgpr0 = V_MOV_B32_e32 32, implicit $exec
+  ; CHECK:   S_ENDPGM 0
+  ; CHECK: bb.3:
+  ; CHECK:   successors: %bb.4(0x40000000), %bb.2(0x40000000)
+  ; CHECK:   renamable $vgpr2 = V_MOV_B32_e32 15, implicit $exec
+  ; CHECK:   $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
+  ; CHECK:   $vgpr1 = V_MOV_B32_e32 $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
+  ; CHECK:   renamable $sgpr2_sgpr3 = S_OR_SAVEEXEC_B64 killed renamable $sgpr2_sgpr3, implicit-def $exec, implicit-def $scc, implicit $exec
+  ; CHECK:   $exec = S_XOR_B64 $exec, renamable $sgpr2_sgpr3, implicit-def $scc
+  ; CHECK:   SI_MASK_BRANCH %bb.2, implicit $exec
+  ; CHECK:   S_CBRANCH_EXECZ %bb.2, implicit $exec
+  ; CHECK: bb.4:
+  ; CHECK:   renamable $vgpr2 = V_MOV_B32_e32 8, implicit $exec
+  ; CHECK:   $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
+  ; CHECK:   $vgpr1 = V_MOV_B32_e32 $sgpr1, implicit $exec, implicit killed $sgpr0_sgpr1, implicit $exec
+  ; CHECK:   $exec = S_OR_B64 $exec, killed renamable $sgpr2_sgpr3, implicit-def $scc
+  ; CHECK:   renamable $vgpr0 = V_MOV_B32_e32 32, implicit $exec
+  ; CHECK:   S_ENDPGM 0
+  bb.0:
+    successors: %bb.1, %bb.2
+    liveins: $vgpr0, $sgpr4_sgpr5, $sgpr7
+
+    renamable $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM renamable $sgpr4_sgpr5, 4, 0, 0 :: (dereferenceable invariant load 8, align 16, addrspace 4)
+    renamable $vgpr0 = V_LSHLREV_B32_e32 2, killed $vgpr0, implicit $exec
+    S_WAITCNT 127
+    $vgpr1 = V_MOV_B32_e32 $sgpr1, implicit $exec, implicit $exec
+    renamable $vgpr0 = V_ADD_I32_e32 $sgpr0, killed $vgpr0, implicit-def $vcc, implicit $exec
+    renamable $vgpr1 = V_ADDC_U32_e32 0, killed $vgpr1, implicit-def $vcc, implicit killed $vcc, implicit $exec
+    renamable $vgpr0 = FLAT_LOAD_DWORD renamable $vgpr0_vgpr1, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4, addrspace 1)
+    renamable $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed renamable $sgpr4_sgpr5, 0, 0, 0 :: (dereferenceable invariant load 8, align 16, addrspace 4)
+    S_WAITCNT 112
+    V_CMP_NE_U32_e32 0, killed $vgpr0, implicit-def $vcc, implicit $exec
+    $sgpr2_sgpr3 = S_AND_SAVEEXEC_B64 $vcc, implicit-def $exec, implicit-def $scc, implicit $exec
+    renamable $sgpr2_sgpr3 = S_XOR_B64 $exec, killed renamable $sgpr2_sgpr3, implicit-def dead $scc
+    SI_MASK_BRANCH %bb.2, implicit $exec
+    S_BRANCH %bb.1
+
+  bb.2:
+    successors: %bb.3, %bb.4
+    liveins: $sgpr0_sgpr1, $sgpr2_sgpr3
+
+    renamable $sgpr2_sgpr3 = S_OR_SAVEEXEC_B64 killed renamable $sgpr2_sgpr3, implicit-def $exec, implicit-def $scc, implicit $exec
+    $exec = S_XOR_B64 $exec, renamable $sgpr2_sgpr3, implicit-def $scc
+    SI_MASK_BRANCH %bb.4, implicit $exec
+    S_BRANCH %bb.3
+
+  bb.4:
+    liveins: $sgpr2_sgpr3
+
+    $exec = S_OR_B64 $exec, killed renamable $sgpr2_sgpr3, implicit-def $scc
+    renamable $vgpr0 = V_MOV_B32_e32 32, implicit $exec
+    S_ENDPGM 0
+
+  bb.1:
+    successors: %bb.3, %bb.4
+    liveins: $sgpr0_sgpr1, $sgpr2_sgpr3
+
+    renamable $vgpr2 = V_MOV_B32_e32 15, implicit $exec
+    $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
+    $vgpr1 = V_MOV_B32_e32 $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
+    renamable $sgpr2_sgpr3 = S_OR_SAVEEXEC_B64 killed renamable $sgpr2_sgpr3, implicit-def $exec, implicit-def $scc, implicit $exec
+    $exec = S_XOR_B64 $exec, renamable $sgpr2_sgpr3, implicit-def $scc
+    SI_MASK_BRANCH %bb.4, implicit $exec
+    S_BRANCH %bb.3
+
+  bb.3:
+    liveins: $sgpr0_sgpr1, $sgpr2_sgpr3
+
+    renamable $vgpr2 = V_MOV_B32_e32 8, implicit $exec
+    $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
+    $vgpr1 = V_MOV_B32_e32 $sgpr1, implicit $exec, implicit killed $sgpr0_sgpr1, implicit $exec
+    $exec = S_OR_B64 $exec, killed renamable $sgpr2_sgpr3, implicit-def $scc
+    renamable $vgpr0 = V_MOV_B32_e32 32, implicit $exec
+    S_ENDPGM 0
+
+...
+
+---
+name: skip_branch_taildup_ret
+body:             |
+  ; CHECK-LABEL: name: skip_branch_taildup_ret
+  ; CHECK: bb.0:
+  ; CHECK:   successors: %bb.3(0x40000000), %bb.1(0x40000000)
+  ; CHECK:   S_WAITCNT 0
+  ; CHECK:   V_CMP_NE_U32_e32 0, killed $vgpr0, implicit-def $vcc, implicit $exec
+  ; CHECK:   $sgpr6_sgpr7 = S_AND_SAVEEXEC_B64 $vcc, implicit-def $exec, implicit-def $scc, implicit $exec
+  ; CHECK:   renamable $sgpr6_sgpr7 = S_XOR_B64 $exec, killed renamable $sgpr6_sgpr7, implicit-def dead $scc
+  ; CHECK:   SI_MASK_BRANCH %bb.1, implicit $exec
+  ; CHECK:   S_CBRANCH_EXECZ %bb.1, implicit $exec
+  ; CHECK:   S_BRANCH %bb.3
+  ; CHECK: bb.1:
+  ; CHECK:   successors: %bb.4(0x40000000), %bb.2(0x40000000)
+  ; CHECK:   renamable $sgpr6_sgpr7 = S_OR_SAVEEXEC_B64 killed renamable $sgpr6_sgpr7, implicit-def $exec, implicit-def $scc, implicit $exec
+  ; CHECK:   $exec = S_XOR_B64 $exec, renamable $sgpr6_sgpr7, implicit-def $scc
+  ; CHECK:   SI_MASK_BRANCH %bb.2, implicit $exec
+  ; CHECK:   S_CBRANCH_EXECZ %bb.2, implicit $exec
+  ; CHECK:   S_BRANCH %bb.4
+  ; CHECK: bb.2:
+  ; CHECK:   $exec = S_OR_B64 $exec, killed renamable $sgpr6_sgpr7, implicit-def $scc
+  ; CHECK:   renamable $vgpr0 = V_MOV_B32_e32 32, implicit $exec
+  ; CHECK:   S_SETPC_B64_return $sgpr30_sgpr31
+  ; CHECK: bb.3:
+  ; CHECK:   successors: %bb.4(0x40000000), %bb.2(0x40000000)
+  ; CHECK:   renamable $vgpr0 = V_MOV_B32_e32 15, implicit $exec
+  ; CHECK:   renamable $sgpr6_sgpr7 = S_OR_SAVEEXEC_B64 killed renamable $sgpr6_sgpr7, implicit-def $exec, implicit-def $scc, implicit $exec
+  ; CHECK:   $exec = S_XOR_B64 $exec, renamable $sgpr6_sgpr7, implicit-def $scc
+  ; CHECK:   SI_MASK_BRANCH %bb.2, implicit $exec
+  ; CHECK:   S_CBRANCH_EXECZ %bb.2, implicit $exec
+  ; CHECK: bb.4:
+  ; CHECK:   renamable $vgpr0 = V_MOV_B32_e32 8, implicit $exec
+  ; CHECK:   $exec = S_OR_B64 $exec, killed renamable $sgpr6_sgpr7, implicit-def $scc
+  ; CHECK:   renamable $vgpr0 = V_MOV_B32_e32 32, implicit $exec
+  ; CHECK:   S_SETPC_B64_return $sgpr30_sgpr31
+  bb.0:
+    successors: %bb.1, %bb.2
+    liveins: $vgpr0, $sgpr30_sgpr31, $vgpr1_vgpr2
+
+    S_WAITCNT 0
+    V_CMP_NE_U32_e32 0, killed $vgpr0, implicit-def $vcc, implicit $exec
+    $sgpr6_sgpr7 = S_AND_SAVEEXEC_B64 $vcc, implicit-def $exec, implicit-def $scc, implicit $exec
+    renamable $sgpr6_sgpr7 = S_XOR_B64 $exec, killed renamable $sgpr6_sgpr7, implicit-def dead $scc
+    SI_MASK_BRANCH %bb.2, implicit $exec
+    S_BRANCH %bb.1
+
+  bb.2:
+    successors: %bb.3, %bb.4
+    liveins: $sgpr6_sgpr7, $sgpr30_sgpr31, $vgpr1_vgpr2
+
+    renamable $sgpr6_sgpr7 = S_OR_SAVEEXEC_B64 killed renamable $sgpr6_sgpr7, implicit-def $exec, implicit-def $scc, implicit $exec
+    $exec = S_XOR_B64 $exec, renamable $sgpr6_sgpr7, implicit-def $scc
+    SI_MASK_BRANCH %bb.4, implicit $exec
+    S_BRANCH %bb.3
+
+  bb.4:
+    liveins: $sgpr6_sgpr7, $sgpr30_sgpr31
+
+    $exec = S_OR_B64 $exec, killed renamable $sgpr6_sgpr7, implicit-def $scc
+    renamable $vgpr0 = V_MOV_B32_e32 32, implicit $exec
+    S_SETPC_B64_return $sgpr30_sgpr31
+
+  bb.1:
+    successors: %bb.3, %bb.4
+    liveins: $sgpr6_sgpr7, $sgpr30_sgpr31, $vgpr1_vgpr2
+
+    renamable $vgpr0 = V_MOV_B32_e32 15, implicit $exec
+    renamable $sgpr6_sgpr7 = S_OR_SAVEEXEC_B64 killed renamable $sgpr6_sgpr7, implicit-def $exec, implicit-def $scc, implicit $exec
+    $exec = S_XOR_B64 $exec, renamable $sgpr6_sgpr7, implicit-def $scc
+    SI_MASK_BRANCH %bb.4, implicit $exec
+    S_BRANCH %bb.3
+
+  bb.3:
+    liveins: $sgpr6_sgpr7, $sgpr30_sgpr31, $vgpr1_vgpr2
+
+    renamable $vgpr0 = V_MOV_B32_e32 8, implicit $exec
+    $exec = S_OR_B64 $exec, killed renamable $sgpr6_sgpr7, implicit-def $scc
+    renamable $vgpr0 = V_MOV_B32_e32 32, implicit $exec
+    S_SETPC_B64_return $sgpr30_sgpr31
+
+...




More information about the llvm-commits mailing list