[llvm] d383ade - [BranchRelaxation] Fall through only if block has no unconditional branches

Anshil Gandhi via llvm-commits llvm-commits at lists.llvm.org
Thu Oct 13 21:49:02 PDT 2022


Author: Anshil Gandhi
Date: 2022-10-13T22:48:41-06:00
New Revision: d383adec4d3914492e67267462e6f00fdd4934af

URL: https://github.com/llvm/llvm-project/commit/d383adec4d3914492e67267462e6f00fdd4934af
DIFF: https://github.com/llvm/llvm-project/commit/d383adec4d3914492e67267462e6f00fdd4934af.diff

LOG: [BranchRelaxation] Fall through only if block has no unconditional branches

Prior to inserting an unconditional branch from X to its
fall through basic block, check if X has any terminators to
avoid inserting additional branches.

Reviewed By: arsenm

Differential Revision: https://reviews.llvm.org/D134557

Added: 
    llvm/test/CodeGen/AMDGPU/branch-relax-no-terminators.mir

Modified: 
    llvm/lib/CodeGen/BranchRelaxation.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/CodeGen/BranchRelaxation.cpp b/llvm/lib/CodeGen/BranchRelaxation.cpp
index 29508f8f35a67..87d5d053318fc 100644
--- a/llvm/lib/CodeGen/BranchRelaxation.cpp
+++ b/llvm/lib/CodeGen/BranchRelaxation.cpp
@@ -23,6 +23,7 @@
 #include "llvm/Pass.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/Format.h"
 #include "llvm/Support/raw_ostream.h"
 #include <cassert>
@@ -431,7 +432,8 @@ bool BranchRelaxation::fixupConditionalBranch(MachineInstr &MI) {
 
 bool BranchRelaxation::fixupUnconditionalBranch(MachineInstr &MI) {
   MachineBasicBlock *MBB = MI.getParent();
-
+  MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
+  SmallVector<MachineOperand, 4> Cond;
   unsigned OldBrSize = TII->getInstSizeInBytes(MI);
   MachineBasicBlock *DestBB = TII->getBranchDestBlock(MI);
 
@@ -444,6 +446,20 @@ bool BranchRelaxation::fixupUnconditionalBranch(MachineInstr &MI) {
 
   MachineBasicBlock *BranchBB = MBB;
 
+  auto RemoveBranch = [&](MachineBasicBlock *MBB) {
+    unsigned &BBSize = BlockInfo[MBB->getNumber()].Size;
+    int RemovedSize = 0;
+    TII->removeBranch(*MBB, &RemovedSize);
+    BBSize -= RemovedSize;
+  };
+
+  auto InsertUncondBranch = [&](MachineBasicBlock *MBB,
+                                MachineBasicBlock *Dst) {
+    TII->insertUnconditionalBranch(*MBB, Dst, DebugLoc());
+    // Recalculate the block size.
+    BlockInfo[MBB->getNumber()].Size = computeBlockSize(*MBB);
+  };
+
   // If this was an expanded conditional branch, there is already a single
   // unconditional branch in a block.
   if (!MBB->empty()) {
@@ -482,11 +498,15 @@ bool BranchRelaxation::fixupUnconditionalBranch(MachineInstr &MI) {
     // restore blocks are just duplicated for each far branch.
     assert(!DestBB->isEntryBlock());
     MachineBasicBlock *PrevBB = &*std::prev(DestBB->getIterator());
-    if (auto *FT = PrevBB->getFallThrough()) {
-      assert(FT == DestBB);
-      TII->insertUnconditionalBranch(*PrevBB, FT, DebugLoc());
-      // Recalculate the block size.
-      BlockInfo[PrevBB->getNumber()].Size = computeBlockSize(*PrevBB);
+    // Fall through only if PrevBB has no unconditional branch as one of its
+    // terminators.
+    if (TII->analyzeBranch(*PrevBB, TBB, FBB, Cond))
+      report_fatal_error("Could not analyze terminators.");
+    if (!FBB) {
+      if (!Cond.empty() && TBB && TBB == DestBB)
+        RemoveBranch(PrevBB);
+      if (!TBB || (TBB && !Cond.empty()))
+        InsertUncondBranch(PrevBB, DestBB);
     }
     // Now, RestoreBB could be placed directly before DestBB.
     MF->splice(DestBB->getIterator(), RestoreBB->getIterator());

diff  --git a/llvm/test/CodeGen/AMDGPU/branch-relax-no-terminators.mir b/llvm/test/CodeGen/AMDGPU/branch-relax-no-terminators.mir
new file mode 100644
index 0000000000000..a61d446304da1
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/branch-relax-no-terminators.mir
@@ -0,0 +1,163 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a --amdgpu-s-branch-bits=5 -run-pass branch-relaxation %s -o - | FileCheck %s
+
+---
+name:            branch_no_terminators
+alignment:       1
+tracksRegLiveness: true
+liveins:
+  - { reg: '$sgpr12' }
+body:             |
+  ; CHECK-LABEL: name: branch_no_terminators
+  ; CHECK: bb.0.entry:
+  ; CHECK-NEXT:   successors: %bb.1(0x50000000), %bb.5(0x30000000)
+  ; CHECK-NEXT:   liveins: $sgpr12, $sgpr30, $sgpr31, $sgpr33, $sgpr34, $sgpr35, $sgpr36, $sgpr37, $sgpr38, $sgpr39, $sgpr40, $sgpr41, $sgpr42, $sgpr43, $sgpr44, $sgpr45, $sgpr46, $sgpr47, $sgpr48, $sgpr49, $sgpr50, $sgpr51, $sgpr52, $sgpr53, $sgpr54, $sgpr55, $sgpr56, $sgpr57, $sgpr58, $sgpr59, $sgpr60, $sgpr61, $sgpr62, $sgpr63, $sgpr64, $sgpr65, $sgpr66, $sgpr67, $sgpr68, $sgpr69, $sgpr70, $sgpr71, $sgpr72, $sgpr73, $sgpr74, $sgpr75, $sgpr76, $sgpr77, $sgpr78, $sgpr79, $sgpr80, $sgpr81, $sgpr82, $sgpr83, $sgpr84, $sgpr85, $sgpr86, $sgpr87, $sgpr88, $sgpr89, $sgpr90, $sgpr91, $sgpr92, $sgpr93, $sgpr94, $sgpr95, $sgpr96, $sgpr97, $sgpr98, $sgpr99, $sgpr100, $sgpr101, $vgpr0, $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   S_WAITCNT 0
+  ; CHECK-NEXT:   $sgpr4_sgpr5 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
+  ; CHECK-NEXT:   BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec
+  ; CHECK-NEXT:   BUFFER_STORE_DWORD_OFFSET $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec
+  ; CHECK-NEXT:   $exec = S_MOV_B64 killed $sgpr4_sgpr5
+  ; CHECK-NEXT:   $vgpr0 = V_WRITELANE_B32 killed $sgpr30, 0, $vgpr0
+  ; CHECK-NEXT:   $vgpr0 = V_WRITELANE_B32 killed $sgpr31, 1, $vgpr0
+  ; CHECK-NEXT:   $vgpr0 = V_WRITELANE_B32 $sgpr33, 2, $vgpr0
+  ; CHECK-NEXT:   $sgpr81 = S_MOV_B32 killed $sgpr12
+  ; CHECK-NEXT:   $vgpr0 = V_WRITELANE_B32 killed $sgpr94, 63, $vgpr0
+  ; CHECK-NEXT:   $vgpr1 = V_WRITELANE_B32 killed $sgpr101, 6, $vgpr1
+  ; CHECK-NEXT:   S_CMP_EQ_U32 killed renamable $sgpr81, 0, implicit-def $scc
+  ; CHECK-NEXT:   S_CBRANCH_SCC0 %bb.1, implicit killed $scc
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.5.entry:
+  ; CHECK-NEXT:   successors: %bb.6(0x80000000)
+  ; CHECK-NEXT:   liveins: $vcc_hi, $vcc_lo, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $sgpr14, $sgpr15, $sgpr16, $sgpr17, $sgpr18, $sgpr19, $sgpr20, $sgpr21, $sgpr22, $sgpr23, $sgpr24, $sgpr25, $sgpr26, $sgpr27, $sgpr28, $sgpr29, $sgpr30, $sgpr31, $sgpr32, $sgpr33, $sgpr34, $sgpr35, $sgpr36, $sgpr37, $sgpr38, $sgpr39, $sgpr40, $sgpr41, $sgpr42, $sgpr43, $sgpr44, $sgpr45, $sgpr46, $sgpr47, $sgpr48, $sgpr49, $sgpr50, $sgpr51, $sgpr52, $sgpr53, $sgpr54, $sgpr55, $sgpr56, $sgpr57, $sgpr58, $sgpr59, $sgpr60, $sgpr61, $sgpr62, $sgpr63, $sgpr64, $sgpr65, $sgpr66, $sgpr67, $sgpr68, $sgpr69, $sgpr70, $sgpr71, $sgpr72, $sgpr73, $sgpr74, $sgpr75, $sgpr76, $sgpr77, $sgpr78, $sgpr79, $sgpr80, $sgpr81, $sgpr82, $sgpr83, $sgpr84, $sgpr85, $sgpr86, $sgpr87, $sgpr88, $sgpr89, $sgpr90, $sgpr91, $sgpr92, $sgpr93, $sgpr94, $sgpr95, $sgpr96, $sgpr97, $sgpr98, $sgpr99, $sgpr100, $sgpr101, $vgpr0, $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   $exec = S_NOT_B64 $exec, implicit-def dead $scc, implicit-def $vgpr2
+  ; CHECK-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $private_rsrc_reg, $sp_reg, 0, 0, 0, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5)
+  ; CHECK-NEXT:   $vgpr2 = V_WRITELANE_B32 $sgpr0, 0, undef $vgpr2, implicit $sgpr0_sgpr1
+  ; CHECK-NEXT:   $vgpr2 = V_WRITELANE_B32 $sgpr1, 1, $vgpr2, implicit $sgpr0_sgpr1
+  ; CHECK-NEXT:   $sgpr0_sgpr1 = S_GETPC_B64 post-instr-symbol <mcsymbol >
+  ; CHECK-NEXT:   $sgpr0 = S_ADD_U32 $sgpr0, target-flags(<unknown target flag>) <mcsymbol >, implicit-def $scc
+  ; CHECK-NEXT:   $sgpr1 = S_ADDC_U32 $sgpr1, target-flags(<unknown target flag>) <mcsymbol >, implicit-def $scc, implicit $scc
+  ; CHECK-NEXT:   S_SETPC_B64 $sgpr0_sgpr1
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.1:
+  ; CHECK-NEXT:   successors: %bb.2(0x80000000)
+  ; CHECK-NEXT:   liveins: $vcc_hi, $vcc_lo, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $sgpr14, $sgpr15, $sgpr16, $sgpr17, $sgpr18, $sgpr19, $sgpr20, $sgpr21, $sgpr22, $sgpr23, $sgpr24, $sgpr25, $sgpr26, $sgpr27, $sgpr28, $sgpr29, $sgpr30, $sgpr31, $sgpr34, $sgpr35, $sgpr36, $sgpr37, $sgpr38, $sgpr39, $sgpr40, $sgpr41, $sgpr42, $sgpr43, $sgpr44, $sgpr45, $sgpr46, $sgpr47, $sgpr48, $sgpr49, $sgpr50, $sgpr51, $sgpr52, $sgpr53, $sgpr54, $sgpr55, $sgpr56, $sgpr57, $sgpr58, $sgpr59, $sgpr60, $sgpr61, $sgpr62, $sgpr63, $sgpr64, $sgpr65, $sgpr66, $sgpr67, $sgpr68, $sgpr69, $sgpr70, $sgpr71, $sgpr72, $sgpr73, $sgpr74, $sgpr75, $sgpr76, $sgpr77, $sgpr78, $sgpr79, $sgpr80, $sgpr81, $sgpr82, $sgpr83, $sgpr84, $sgpr85, $sgpr86, $sgpr87, $sgpr88, $sgpr89, $sgpr90, $sgpr91, $sgpr92, $sgpr93, $sgpr94, $sgpr95, $sgpr96, $sgpr97, $sgpr98, $sgpr99, $sgpr100, $sgpr101, $vgpr0, $vgpr1
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   $vgpr1 = V_WRITELANE_B32 killed $sgpr81, 7, $vgpr1
+  ; CHECK-NEXT:   $sgpr81 = S_MOV_B32 killed $sgpr82
+  ; CHECK-NEXT:   $sgpr82 = S_MOV_B32 killed $sgpr83
+  ; CHECK-NEXT:   $sgpr83 = S_MOV_B32 killed $sgpr84
+  ; CHECK-NEXT:   $sgpr84 = S_MOV_B32 killed $sgpr85
+  ; CHECK-NEXT:   $sgpr101 = S_MOV_B32 killed $vcc_lo
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.2:
+  ; CHECK-NEXT:   successors: %bb.3(0x04000000), %bb.7(0x7c000000)
+  ; CHECK-NEXT:   liveins: $vcc_hi, $vcc_lo, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $sgpr14, $sgpr15, $sgpr16, $sgpr17, $sgpr18, $sgpr19, $sgpr20, $sgpr21, $sgpr22, $sgpr23, $sgpr24, $sgpr25, $sgpr26, $sgpr27, $sgpr28, $sgpr29, $sgpr30, $sgpr31, $sgpr34, $sgpr35, $sgpr36, $sgpr37, $sgpr38, $sgpr39, $sgpr40, $sgpr41, $sgpr42, $sgpr43, $sgpr44, $sgpr45, $sgpr46, $sgpr47, $sgpr48, $sgpr49, $sgpr50, $sgpr51, $sgpr52, $sgpr53, $sgpr54, $sgpr55, $sgpr56, $sgpr57, $sgpr58, $sgpr59, $sgpr60, $sgpr61, $sgpr62, $sgpr63, $sgpr64, $sgpr65, $sgpr66, $sgpr67, $sgpr68, $sgpr69, $sgpr70, $sgpr71, $sgpr72, $sgpr73, $sgpr74, $sgpr75, $sgpr76, $sgpr77, $sgpr78, $sgpr79, $sgpr80, $sgpr81, $sgpr82, $sgpr83, $sgpr84, $sgpr85, $sgpr86, $sgpr87, $sgpr88, $sgpr89, $sgpr90, $sgpr91, $sgpr92, $sgpr93, $sgpr94, $sgpr95, $sgpr96, $sgpr97, $sgpr98, $sgpr99, $sgpr100, $sgpr101, $vgpr0, $vgpr1
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   INLINEASM &"v_nop_e64\0A    v_nop_e64\0A    v_nop_e64\0A    v_nop_e64\0A    v_nop_e64\0A    v_nop_e64\0A    v_nop_e64\0A    v_nop_e64", 1 /* sideeffect attdialect */, 2097162 /* regdef:SReg_32 */, def renamable $sgpr4
+  ; CHECK-NEXT:   S_CMP_LG_U32 killed renamable $sgpr4, 0, implicit-def $scc
+  ; CHECK-NEXT:   S_CBRANCH_SCC0 %bb.3, implicit killed $scc
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.7:
+  ; CHECK-NEXT:   successors: %bb.2(0x80000000)
+  ; CHECK-NEXT:   liveins: $vcc_hi, $vcc_lo, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $sgpr14, $sgpr15, $sgpr16, $sgpr17, $sgpr18, $sgpr19, $sgpr20, $sgpr21, $sgpr22, $sgpr23, $sgpr24, $sgpr25, $sgpr26, $sgpr27, $sgpr28, $sgpr29, $sgpr30, $sgpr31, $sgpr34, $sgpr35, $sgpr36, $sgpr37, $sgpr38, $sgpr39, $sgpr40, $sgpr41, $sgpr42, $sgpr43, $sgpr44, $sgpr45, $sgpr46, $sgpr47, $sgpr48, $sgpr49, $sgpr50, $sgpr51, $sgpr52, $sgpr53, $sgpr54, $sgpr55, $sgpr56, $sgpr57, $sgpr58, $sgpr59, $sgpr60, $sgpr61, $sgpr62, $sgpr63, $sgpr64, $sgpr65, $sgpr66, $sgpr67, $sgpr68, $sgpr69, $sgpr70, $sgpr71, $sgpr72, $sgpr73, $sgpr74, $sgpr75, $sgpr76, $sgpr77, $sgpr78, $sgpr79, $sgpr80, $sgpr81, $sgpr82, $sgpr83, $sgpr84, $sgpr85, $sgpr86, $sgpr87, $sgpr88, $sgpr89, $sgpr90, $sgpr91, $sgpr92, $sgpr93, $sgpr94, $sgpr95, $sgpr96, $sgpr97, $sgpr98, $sgpr99, $sgpr100, $sgpr101, $vgpr0, $vgpr1
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   $sgpr0_sgpr1 = S_GETPC_B64 post-instr-symbol <mcsymbol >
+  ; CHECK-NEXT:   $sgpr0 = S_ADD_U32 $sgpr0, target-flags(<unknown target flag>) <mcsymbol >, implicit-def $scc
+  ; CHECK-NEXT:   $sgpr1 = S_ADDC_U32 $sgpr1, target-flags(<unknown target flag>) <mcsymbol >, implicit-def $scc, implicit $scc
+  ; CHECK-NEXT:   S_SETPC_B64 $sgpr0_sgpr1
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.3:
+  ; CHECK-NEXT:   successors: %bb.4(0x80000000)
+  ; CHECK-NEXT:   liveins: $vcc_hi, $vcc_lo, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $sgpr14, $sgpr15, $sgpr16, $sgpr17, $sgpr18, $sgpr19, $sgpr20, $sgpr21, $sgpr22, $sgpr23, $sgpr24, $sgpr25, $sgpr26, $sgpr27, $sgpr28, $sgpr29, $sgpr30, $sgpr31, $sgpr34, $sgpr35, $sgpr36, $sgpr37, $sgpr38, $sgpr39, $sgpr40, $sgpr41, $sgpr42, $sgpr43, $sgpr44, $sgpr45, $sgpr46, $sgpr47, $sgpr48, $sgpr49, $sgpr50, $sgpr51, $sgpr52, $sgpr53, $sgpr54, $sgpr55, $sgpr56, $sgpr57, $sgpr58, $sgpr59, $sgpr60, $sgpr61, $sgpr62, $sgpr63, $sgpr64, $sgpr65, $sgpr66, $sgpr67, $sgpr68, $sgpr69, $sgpr70, $sgpr71, $sgpr72, $sgpr73, $sgpr74, $sgpr75, $sgpr76, $sgpr77, $sgpr78, $sgpr79, $sgpr80, $sgpr81, $sgpr82, $sgpr83, $sgpr84, $sgpr85, $sgpr86, $sgpr87, $sgpr88, $sgpr89, $sgpr90, $sgpr91, $sgpr92, $sgpr93, $sgpr94, $sgpr95, $sgpr96, $sgpr97, $sgpr98, $sgpr99, $sgpr100, $sgpr101, $vgpr0, $vgpr1
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   $sgpr4 = S_MOV_B32 killed $sgpr5
+  ; CHECK-NEXT:   $sgpr5 = S_MOV_B32 killed $sgpr6
+  ; CHECK-NEXT:   $sgpr6 = S_MOV_B32 killed $sgpr7
+  ; CHECK-NEXT:   $sgpr7 = S_MOV_B32 killed $sgpr8
+  ; CHECK-NEXT:   $sgpr8 = S_MOV_B32 killed $sgpr9
+  ; CHECK-NEXT:   $sgpr9 = S_MOV_B32 killed $sgpr10
+  ; CHECK-NEXT:   $sgpr10 = S_MOV_B32 killed $sgpr11
+  ; CHECK-NEXT:   S_BRANCH %bb.4
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.6:
+  ; CHECK-NEXT:   successors: %bb.4(0x80000000)
+  ; CHECK-NEXT:   liveins: $vcc_hi, $vcc_lo, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $sgpr14, $sgpr15, $sgpr16, $sgpr17, $sgpr18, $sgpr19, $sgpr20, $sgpr21, $sgpr22, $sgpr23, $sgpr24, $sgpr25, $sgpr26, $sgpr27, $sgpr28, $sgpr29, $sgpr30, $sgpr31, $sgpr34, $sgpr35, $sgpr36, $sgpr37, $sgpr38, $sgpr39, $sgpr40, $sgpr41, $sgpr42, $sgpr43, $sgpr44, $sgpr45, $sgpr46, $sgpr47, $sgpr48, $sgpr49, $sgpr50, $sgpr51, $sgpr52, $sgpr53, $sgpr54, $sgpr55, $sgpr56, $sgpr57, $sgpr58, $sgpr59, $sgpr60, $sgpr61, $sgpr62, $sgpr63, $sgpr64, $sgpr65, $sgpr66, $sgpr67, $sgpr68, $sgpr69, $sgpr70, $sgpr71, $sgpr72, $sgpr73, $sgpr74, $sgpr75, $sgpr76, $sgpr77, $sgpr78, $sgpr79, $sgpr80, $sgpr81, $sgpr82, $sgpr83, $sgpr84, $sgpr85, $sgpr86, $sgpr87, $sgpr88, $sgpr89, $sgpr90, $sgpr91, $sgpr92, $sgpr93, $sgpr94, $sgpr95, $sgpr96, $sgpr97, $sgpr98, $sgpr99, $sgpr100, $sgpr101, $vgpr0, $vgpr1, $vgpr2, $sgpr32, $sgpr33, $sgpr2_sgpr3
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   $sgpr0 = V_READLANE_B32 $vgpr2, 0, implicit-def $sgpr0_sgpr1
+  ; CHECK-NEXT:   $sgpr1 = V_READLANE_B32 killed $vgpr2, 1
+  ; CHECK-NEXT:   $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $private_rsrc_reg, $sp_reg, 0, 0, 0, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5)
+  ; CHECK-NEXT:   $exec = S_NOT_B64 $exec, implicit-def dead $scc, implicit killed $vgpr2
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.4:
+  ; CHECK-NEXT:   liveins: $vcc_hi, $vcc_lo, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $sgpr14, $sgpr15, $sgpr16, $sgpr17, $sgpr18, $sgpr19, $sgpr20, $sgpr21, $sgpr22, $sgpr23, $sgpr24, $sgpr25, $sgpr26, $sgpr27, $sgpr28, $sgpr29, $sgpr30, $sgpr31, $sgpr34, $sgpr35, $sgpr36, $sgpr37, $sgpr38, $sgpr39, $sgpr40, $sgpr41, $sgpr42, $sgpr43, $sgpr44, $sgpr45, $sgpr46, $sgpr47, $sgpr48, $sgpr49, $sgpr50, $sgpr51, $sgpr52, $sgpr53, $sgpr54, $sgpr55, $sgpr56, $sgpr57, $sgpr58, $sgpr59, $sgpr60, $sgpr61, $sgpr62, $sgpr63, $sgpr64, $sgpr65, $sgpr66, $sgpr67, $sgpr68, $sgpr69, $sgpr70, $sgpr71, $sgpr72, $sgpr73, $sgpr74, $sgpr75, $sgpr76, $sgpr77, $sgpr78, $sgpr79, $sgpr80, $sgpr81, $sgpr82, $sgpr83, $sgpr84, $sgpr85, $sgpr86, $sgpr87, $sgpr88, $sgpr89, $sgpr90, $sgpr91, $sgpr92, $sgpr93, $sgpr94, $sgpr95, $sgpr96, $sgpr97, $sgpr98, $sgpr99, $sgpr100, $sgpr101, $vgpr0, $vgpr1, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr32, $sgpr33, $sgpr0_sgpr1_sgpr2_sgpr3
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   $sgpr101 = V_READLANE_B32 $vgpr1, 6
+  ; CHECK-NEXT:   $sgpr100 = V_READLANE_B32 $vgpr1, 5
+  ; CHECK-NEXT:   $sgpr4_sgpr5 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
+  ; CHECK-NEXT:   $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec
+  ; CHECK-NEXT:   $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec
+  ; CHECK-NEXT:   $exec = S_MOV_B64 killed $sgpr4_sgpr5
+  ; CHECK-NEXT:   S_WAITCNT 3952
+  ; CHECK-NEXT:   S_SETPC_B64_return undef $sgpr30_sgpr31
+  bb.0.entry:
+    successors: %bb.1(0x50000000), %bb.4(0x30000000)
+    liveins: $sgpr12, $sgpr30, $sgpr31, $sgpr33, $sgpr34, $sgpr35, $sgpr36, $sgpr37, $sgpr38, $sgpr39, $sgpr40, $sgpr41, $sgpr42, $sgpr43, $sgpr44, $sgpr45, $sgpr46, $sgpr47, $sgpr48, $sgpr49, $sgpr50, $sgpr51, $sgpr52, $sgpr53, $sgpr54, $sgpr55, $sgpr56, $sgpr57, $sgpr58, $sgpr59, $sgpr60, $sgpr61, $sgpr62, $sgpr63, $sgpr64, $sgpr65, $sgpr66, $sgpr67, $sgpr68, $sgpr69, $sgpr70, $sgpr71, $sgpr72, $sgpr73, $sgpr74, $sgpr75, $sgpr76, $sgpr77, $sgpr78, $sgpr79, $sgpr80, $sgpr81, $sgpr82, $sgpr83, $sgpr84, $sgpr85, $sgpr86, $sgpr87, $sgpr88, $sgpr89, $sgpr90, $sgpr91, $sgpr92, $sgpr93, $sgpr94, $sgpr95, $sgpr96, $sgpr97, $sgpr98, $sgpr99, $sgpr100, $sgpr101, $vgpr0, $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32
+
+    S_WAITCNT 0
+    $sgpr4_sgpr5 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
+    BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec
+    BUFFER_STORE_DWORD_OFFSET $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec
+    $exec = S_MOV_B64 killed $sgpr4_sgpr5
+    $vgpr0 = V_WRITELANE_B32 killed $sgpr30, 0, $vgpr0
+    $vgpr0 = V_WRITELANE_B32 killed $sgpr31, 1, $vgpr0
+    $vgpr0 = V_WRITELANE_B32 $sgpr33, 2, $vgpr0
+    $sgpr81 = S_MOV_B32 killed $sgpr12
+    $vgpr0 = V_WRITELANE_B32 killed $sgpr94, 63, $vgpr0
+    $vgpr1 = V_WRITELANE_B32 killed $sgpr101, 6, $vgpr1
+    S_CMP_EQ_U32 killed renamable $sgpr81, 0, implicit-def $scc
+    S_CBRANCH_SCC1 %bb.4, implicit killed $scc
+
+  bb.1:
+    liveins: $vcc_hi, $vcc_lo, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $sgpr14, $sgpr15, $sgpr16, $sgpr17, $sgpr18, $sgpr19, $sgpr20, $sgpr21, $sgpr22, $sgpr23, $sgpr24, $sgpr25, $sgpr26, $sgpr27, $sgpr28, $sgpr29, $sgpr30, $sgpr31, $sgpr34, $sgpr35, $sgpr36, $sgpr37, $sgpr38, $sgpr39, $sgpr40, $sgpr41, $sgpr42, $sgpr43, $sgpr44, $sgpr45, $sgpr46, $sgpr47, $sgpr48, $sgpr49, $sgpr50, $sgpr51, $sgpr52, $sgpr53, $sgpr54, $sgpr55, $sgpr56, $sgpr57, $sgpr58, $sgpr59, $sgpr60, $sgpr61, $sgpr62, $sgpr63, $sgpr64, $sgpr65, $sgpr66, $sgpr67, $sgpr68, $sgpr69, $sgpr70, $sgpr71, $sgpr72, $sgpr73, $sgpr74, $sgpr75, $sgpr76, $sgpr77, $sgpr78, $sgpr79, $sgpr80, $sgpr81, $sgpr82, $sgpr83, $sgpr84, $sgpr85, $sgpr86, $sgpr87, $sgpr88, $sgpr89, $sgpr90, $sgpr91, $sgpr92, $sgpr93, $sgpr94, $sgpr95, $sgpr96, $sgpr97, $sgpr98, $sgpr99, $sgpr100, $sgpr101, $vgpr0, $vgpr1
+
+    $vgpr1 = V_WRITELANE_B32 killed $sgpr81, 7, $vgpr1
+    $sgpr81 = S_MOV_B32 killed $sgpr82
+    $sgpr82 = S_MOV_B32 killed $sgpr83
+    $sgpr83 = S_MOV_B32 killed $sgpr84
+    $sgpr84 = S_MOV_B32 killed $sgpr85
+    $sgpr101 = S_MOV_B32 killed $vcc_lo
+
+  bb.2:
+    successors: %bb.3(0x04000000), %bb.2(0x7c000000)
+    liveins: $vcc_hi, $vcc_lo, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $sgpr14, $sgpr15, $sgpr16, $sgpr17, $sgpr18, $sgpr19, $sgpr20, $sgpr21, $sgpr22, $sgpr23, $sgpr24, $sgpr25, $sgpr26, $sgpr27, $sgpr28, $sgpr29, $sgpr30, $sgpr31, $sgpr34, $sgpr35, $sgpr36, $sgpr37, $sgpr38, $sgpr39, $sgpr40, $sgpr41, $sgpr42, $sgpr43, $sgpr44, $sgpr45, $sgpr46, $sgpr47, $sgpr48, $sgpr49, $sgpr50, $sgpr51, $sgpr52, $sgpr53, $sgpr54, $sgpr55, $sgpr56, $sgpr57, $sgpr58, $sgpr59, $sgpr60, $sgpr61, $sgpr62, $sgpr63, $sgpr64, $sgpr65, $sgpr66, $sgpr67, $sgpr68, $sgpr69, $sgpr70, $sgpr71, $sgpr72, $sgpr73, $sgpr74, $sgpr75, $sgpr76, $sgpr77, $sgpr78, $sgpr79, $sgpr80, $sgpr81, $sgpr82, $sgpr83, $sgpr84, $sgpr85, $sgpr86, $sgpr87, $sgpr88, $sgpr89, $sgpr90, $sgpr91, $sgpr92, $sgpr93, $sgpr94, $sgpr95, $sgpr96, $sgpr97, $sgpr98, $sgpr99, $sgpr100, $sgpr101, $vgpr0, $vgpr1
+
+    INLINEASM &"v_nop_e64\0A    v_nop_e64\0A    v_nop_e64\0A    v_nop_e64\0A    v_nop_e64\0A    v_nop_e64\0A    v_nop_e64\0A    v_nop_e64", 1 /* sideeffect attdialect */, 2097162 /* regdef:SReg_32 */, def renamable $sgpr4
+    S_CMP_LG_U32 killed renamable $sgpr4, 0, implicit-def $scc
+    S_CBRANCH_SCC1 %bb.2, implicit killed $scc
+
+  bb.3:
+    liveins: $vcc_hi, $vcc_lo, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $sgpr14, $sgpr15, $sgpr16, $sgpr17, $sgpr18, $sgpr19, $sgpr20, $sgpr21, $sgpr22, $sgpr23, $sgpr24, $sgpr25, $sgpr26, $sgpr27, $sgpr28, $sgpr29, $sgpr30, $sgpr31, $sgpr34, $sgpr35, $sgpr36, $sgpr37, $sgpr38, $sgpr39, $sgpr40, $sgpr41, $sgpr42, $sgpr43, $sgpr44, $sgpr45, $sgpr46, $sgpr47, $sgpr48, $sgpr49, $sgpr50, $sgpr51, $sgpr52, $sgpr53, $sgpr54, $sgpr55, $sgpr56, $sgpr57, $sgpr58, $sgpr59, $sgpr60, $sgpr61, $sgpr62, $sgpr63, $sgpr64, $sgpr65, $sgpr66, $sgpr67, $sgpr68, $sgpr69, $sgpr70, $sgpr71, $sgpr72, $sgpr73, $sgpr74, $sgpr75, $sgpr76, $sgpr77, $sgpr78, $sgpr79, $sgpr80, $sgpr81, $sgpr82, $sgpr83, $sgpr84, $sgpr85, $sgpr86, $sgpr87, $sgpr88, $sgpr89, $sgpr90, $sgpr91, $sgpr92, $sgpr93, $sgpr94, $sgpr95, $sgpr96, $sgpr97, $sgpr98, $sgpr99, $sgpr100, $sgpr101, $vgpr0, $vgpr1
+
+    $sgpr4 = S_MOV_B32 killed $sgpr5
+    $sgpr5 = S_MOV_B32 killed $sgpr6
+    $sgpr6 = S_MOV_B32 killed $sgpr7
+    $sgpr7 = S_MOV_B32 killed $sgpr8
+    $sgpr8 = S_MOV_B32 killed $sgpr9
+    $sgpr9 = S_MOV_B32 killed $sgpr10
+    $sgpr10 = S_MOV_B32 killed $sgpr11
+
+  bb.4:
+    liveins: $vcc_hi, $vcc_lo, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $sgpr14, $sgpr15, $sgpr16, $sgpr17, $sgpr18, $sgpr19, $sgpr20, $sgpr21, $sgpr22, $sgpr23, $sgpr24, $sgpr25, $sgpr26, $sgpr27, $sgpr28, $sgpr29, $sgpr30, $sgpr31, $sgpr34, $sgpr35, $sgpr36, $sgpr37, $sgpr38, $sgpr39, $sgpr40, $sgpr41, $sgpr42, $sgpr43, $sgpr44, $sgpr45, $sgpr46, $sgpr47, $sgpr48, $sgpr49, $sgpr50, $sgpr51, $sgpr52, $sgpr53, $sgpr54, $sgpr55, $sgpr56, $sgpr57, $sgpr58, $sgpr59, $sgpr60, $sgpr61, $sgpr62, $sgpr63, $sgpr64, $sgpr65, $sgpr66, $sgpr67, $sgpr68, $sgpr69, $sgpr70, $sgpr71, $sgpr72, $sgpr73, $sgpr74, $sgpr75, $sgpr76, $sgpr77, $sgpr78, $sgpr79, $sgpr80, $sgpr81, $sgpr82, $sgpr83, $sgpr84, $sgpr85, $sgpr86, $sgpr87, $sgpr88, $sgpr89, $sgpr90, $sgpr91, $sgpr92, $sgpr93, $sgpr94, $sgpr95, $sgpr96, $sgpr97, $sgpr98, $sgpr99, $sgpr100, $sgpr101, $vgpr0, $vgpr1, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr32, $sgpr33, $sgpr0_sgpr1_sgpr2_sgpr3
+
+    $sgpr101 = V_READLANE_B32 $vgpr1, 6
+    $sgpr100 = V_READLANE_B32 $vgpr1, 5
+    $sgpr4_sgpr5 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
+    $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec
+    $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec
+    $exec = S_MOV_B64 killed $sgpr4_sgpr5
+    S_WAITCNT 3952
+    S_SETPC_B64_return undef $sgpr30_sgpr31
+
+...


        


More information about the llvm-commits mailing list