[llvm] ef949ec - [MachineSink] Use SkipPHIsAndLabels for sink insertion points

Carl Ritson via llvm-commits llvm-commits at lists.llvm.org
Tue Feb 15 19:45:10 PST 2022


Author: Carl Ritson
Date: 2022-02-16T12:44:22+09:00
New Revision: ef949ecba57410e8b856f3246128312c79207933

URL: https://github.com/llvm/llvm-project/commit/ef949ecba57410e8b856f3246128312c79207933
DIFF: https://github.com/llvm/llvm-project/commit/ef949ecba57410e8b856f3246128312c79207933.diff

LOG: [MachineSink] Use SkipPHIsAndLabels for sink insertion points

For AMDGPU the insertion point for a block may not be the first
non-PHI instruction.  This happens when a block contains EXEC
mask manipulation related to control flow (converging lanes).

Use SkipPHIsAndLabels to determine the block insertion point
so that the target can skip any block prologue instructions.

Reviewed By: rampitec, ruiling

Differential Revision: https://reviews.llvm.org/D119399

Added: 
    llvm/test/CodeGen/AMDGPU/sink-after-control-flow.mir

Modified: 
    llvm/lib/CodeGen/MachineSink.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/CodeGen/MachineSink.cpp b/llvm/lib/CodeGen/MachineSink.cpp
index e584ebe88538d..7ed33f9fdeacd 100644
--- a/llvm/lib/CodeGen/MachineSink.cpp
+++ b/llvm/lib/CodeGen/MachineSink.cpp
@@ -1272,7 +1272,8 @@ bool MachineSinking::SinkIntoLoop(MachineLoop *L, MachineInstr &I) {
   }
 
   LLVM_DEBUG(dbgs() << "LoopSink: Sinking instruction!\n");
-  SinkBlock->splice(SinkBlock->getFirstNonPHI(), Preheader, I);
+  SinkBlock->splice(SinkBlock->SkipPHIsAndLabels(SinkBlock->begin()), Preheader,
+                    I);
 
   // The instruction is moved from its basic block, so do not retain the
   // debug information.
@@ -1392,9 +1393,8 @@ bool MachineSinking::SinkInstruction(MachineInstr &MI, bool &SawStore,
   }
 
   // Determine where to insert into. Skip phi nodes.
-  MachineBasicBlock::iterator InsertPos = SuccToSinkTo->begin();
-  while (InsertPos != SuccToSinkTo->end() && InsertPos->isPHI())
-    ++InsertPos;
+  MachineBasicBlock::iterator InsertPos =
+      SuccToSinkTo->SkipPHIsAndLabels(SuccToSinkTo->begin());
 
   // Collect debug users of any vreg that this inst defines.
   SmallVector<MIRegs, 4> DbgUsersToSink;
@@ -1796,7 +1796,8 @@ bool PostRAMachineSinking::tryToSinkCopy(MachineBasicBlock &CurBB,
     // Clear the kill flag if SrcReg is killed between MI and the end of the
     // block.
     clearKillFlags(&MI, CurBB, UsedOpsInCopy, UsedRegUnits, TRI);
-    MachineBasicBlock::iterator InsertPos = SuccBB->getFirstNonPHI();
+    MachineBasicBlock::iterator InsertPos =
+        SuccBB->SkipPHIsAndLabels(SuccBB->begin());
     performSink(MI, *SuccBB, InsertPos, DbgValsToSink);
     updateLiveIn(&MI, SuccBB, UsedOpsInCopy, DefedRegsInCopy);
 

diff  --git a/llvm/test/CodeGen/AMDGPU/sink-after-control-flow.mir b/llvm/test/CodeGen/AMDGPU/sink-after-control-flow.mir
new file mode 100644
index 0000000000000..4feef2149b422
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/sink-after-control-flow.mir
@@ -0,0 +1,122 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs -run-pass=machine-sink -o - %s | FileCheck -check-prefixes=GFX10 %s
+
+# Test that MachineSink pass respects block prologues when sinking instructions.
+# Specifically an instruction must not be sunk before exec mask manipulation.
+
+---
+name:            _amdgpu_hs_main
+alignment:       1
+tracksRegLiveness: true
+machineFunctionInfo:
+  isEntryFunction: true
+body:             |
+  ; GFX10-LABEL: name: _amdgpu_hs_main
+  ; GFX10: bb.0:
+  ; GFX10-NEXT:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
+  ; GFX10-NEXT: {{  $}}
+  ; GFX10-NEXT:   [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+  ; GFX10-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 8
+  ; GFX10-NEXT:   [[V_BFE_U32_e64_:%[0-9]+]]:vgpr_32 = V_BFE_U32_e64 [[DEF]], 8, 5, implicit $exec
+  ; GFX10-NEXT:   [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 5
+  ; GFX10-NEXT:   [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_32 = V_CMP_NE_U32_e64 [[V_BFE_U32_e64_]], killed [[S_MOV_B32_1]], implicit $exec
+  ; GFX10-NEXT:   [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[V_CMP_NE_U32_e64_]], -1, implicit-def $scc
+  ; GFX10-NEXT:   [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[S_XOR_B32_]], $exec_lo, implicit-def $scc
+  ; GFX10-NEXT:   [[S_XOR_B32_1:%[0-9]+]]:sreg_32 = S_XOR_B32 $exec_lo, [[S_AND_B32_]], implicit-def $scc
+  ; GFX10-NEXT:   $exec_lo = S_MOV_B32_term [[S_AND_B32_]]
+  ; GFX10-NEXT:   S_CBRANCH_EXECZ %bb.2, implicit $exec
+  ; GFX10-NEXT:   S_BRANCH %bb.1
+  ; GFX10-NEXT: {{  $}}
+  ; GFX10-NEXT: bb.1:
+  ; GFX10-NEXT:   successors: %bb.2(0x80000000)
+  ; GFX10-NEXT: {{  $}}
+  ; GFX10-NEXT:   [[DEF1:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
+  ; GFX10-NEXT:   S_BRANCH %bb.2
+  ; GFX10-NEXT: {{  $}}
+  ; GFX10-NEXT: bb.2:
+  ; GFX10-NEXT:   successors: %bb.3(0x40000000), %bb.4(0x40000000)
+  ; GFX10-NEXT: {{  $}}
+  ; GFX10-NEXT:   $exec_lo = S_OR_B32 $exec_lo, [[S_XOR_B32_1]], implicit-def $scc
+  ; GFX10-NEXT:   [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[S_MOV_B32_]], [[DEF]], implicit $exec
+  ; GFX10-NEXT:   [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 31
+  ; GFX10-NEXT:   [[V_CMP_NE_U32_e64_1:%[0-9]+]]:sreg_32 = V_CMP_NE_U32_e64 [[V_BFE_U32_e64_]], killed [[S_MOV_B32_2]], implicit $exec
+  ; GFX10-NEXT:   [[S_XOR_B32_2:%[0-9]+]]:sreg_32 = S_XOR_B32 [[V_CMP_NE_U32_e64_1]], -1, implicit-def $scc
+  ; GFX10-NEXT:   [[S_AND_B32_1:%[0-9]+]]:sreg_32 = S_AND_B32 [[S_XOR_B32_2]], $exec_lo, implicit-def $scc
+  ; GFX10-NEXT:   [[S_XOR_B32_3:%[0-9]+]]:sreg_32 = S_XOR_B32 $exec_lo, [[S_AND_B32_1]], implicit-def $scc
+  ; GFX10-NEXT:   $exec_lo = S_MOV_B32_term [[S_AND_B32_1]]
+  ; GFX10-NEXT:   S_CBRANCH_EXECZ %bb.4, implicit $exec
+  ; GFX10-NEXT:   S_BRANCH %bb.3
+  ; GFX10-NEXT: {{  $}}
+  ; GFX10-NEXT: bb.3:
+  ; GFX10-NEXT:   successors: %bb.4(0x80000000)
+  ; GFX10-NEXT: {{  $}}
+  ; GFX10-NEXT:   S_BRANCH %bb.4
+  ; GFX10-NEXT: {{  $}}
+  ; GFX10-NEXT: bb.4:
+  ; GFX10-NEXT:   successors: %bb.5(0x80000000)
+  ; GFX10-NEXT: {{  $}}
+  ; GFX10-NEXT:   $exec_lo = S_OR_B32 $exec_lo, [[S_XOR_B32_3]], implicit-def $scc
+  ; GFX10-NEXT:   [[DEF2:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
+  ; GFX10-NEXT:   [[S_MOV_B32_3:%[0-9]+]]:sreg_32 = S_MOV_B32 16
+  ; GFX10-NEXT:   [[S_MOV_B32_4:%[0-9]+]]:sreg_32 = S_MOV_B32 4
+  ; GFX10-NEXT:   [[V_LSHL_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = nuw nsw V_LSHL_ADD_U32_e64 [[V_LSHRREV_B32_e64_]], [[S_MOV_B32_4]], killed [[S_MOV_B32_3]], implicit $exec
+  ; GFX10-NEXT:   S_BRANCH %bb.5
+  ; GFX10-NEXT: {{  $}}
+  ; GFX10-NEXT: bb.5:
+  ; GFX10-NEXT:   $exec_lo = S_OR_B32 $exec_lo, [[DEF2]], implicit-def $scc
+  ; GFX10-NEXT:   S_ENDPGM 0
+  bb.0:
+    successors: %bb.4(0x40000000), %bb.5(0x40000000)
+
+    %0:sgpr_32 = IMPLICIT_DEF
+    %14:sreg_32 = IMPLICIT_DEF
+    %15:vgpr_32 = IMPLICIT_DEF
+    %16:sreg_32 = S_MOV_B32 8
+    %17:vgpr_32 = V_LSHRREV_B32_e64 %16, %15, implicit $exec
+    %18:vgpr_32 = V_BFE_U32_e64 %15, 8, 5, implicit $exec
+    %19:sreg_32 = S_MOV_B32 5
+    %20:sreg_32 = V_CMP_NE_U32_e64 %18, killed %19, implicit $exec
+    %21:sreg_32 = S_XOR_B32 %20, -1, implicit-def $scc
+    %22:sreg_32 = S_AND_B32 %21, $exec_lo, implicit-def $scc
+    %23:sreg_32 = S_XOR_B32 $exec_lo, %22, implicit-def $scc
+    $exec_lo = S_MOV_B32_term %22
+    S_CBRANCH_EXECZ %bb.5, implicit $exec
+    S_BRANCH %bb.4
+
+  bb.4:
+    successors: %bb.5(0x80000000)
+
+    S_BRANCH %bb.5
+
+  bb.5:
+    successors: %bb.6(0x40000000), %bb.7(0x40000000)
+
+    $exec_lo = S_OR_B32 $exec_lo, %23, implicit-def $scc
+    %24:sreg_32 = S_MOV_B32 31
+    %25:sreg_32 = V_CMP_NE_U32_e64 %18, killed %24, implicit $exec
+    %26:sreg_32 = S_XOR_B32 %25, -1, implicit-def $scc
+    %27:sreg_32 = S_AND_B32 %26, $exec_lo, implicit-def $scc
+    %28:sreg_32 = S_XOR_B32 $exec_lo, %27, implicit-def $scc
+    $exec_lo = S_MOV_B32_term %27
+    S_CBRANCH_EXECZ %bb.7, implicit $exec
+    S_BRANCH %bb.6
+
+  bb.6:
+    successors: %bb.7(0x80000000)
+
+    S_BRANCH %bb.7
+
+  bb.7:
+    successors: %bb.8(0x80000000)
+
+    $exec_lo = S_OR_B32 $exec_lo, %28, implicit-def $scc
+    %29:sreg_32 = S_MOV_B32 16
+    %30:sreg_32 = S_MOV_B32 4
+    %31:vgpr_32 = nuw nsw V_LSHL_ADD_U32_e64 %17, %30, killed %29, implicit $exec
+    S_BRANCH %bb.8
+
+  bb.8:
+    $exec_lo = S_OR_B32 $exec_lo, %14, implicit-def $scc
+    S_ENDPGM 0
+
+...


        


More information about the llvm-commits mailing list