[llvm] f7988a3 - [AMDGPU][SIPreEmitPeephole] Fix mustRetainExeczBranch (#120121)

via llvm-commits llvm-commits at lists.llvm.org
Tue Dec 17 02:47:41 PST 2024


Author: Mirko BrkuĊĦanin
Date: 2024-12-17T11:47:38+01:00
New Revision: f7988a338ddb53b03e7cb89d839616925bd0ade1

URL: https://github.com/llvm/llvm-project/commit/f7988a338ddb53b03e7cb89d839616925bd0ade1
DIFF: https://github.com/llvm/llvm-project/commit/f7988a338ddb53b03e7cb89d839616925bd0ade1.diff

LOG: [AMDGPU][SIPreEmitPeephole] Fix mustRetainExeczBranch (#120121)

Do not remove S_CBRANCH_EXECZ if one of the following blocks contains an
unconditional branch to a block other than the one immediately following
it. This can cause unwanted behavior like infinite loops.

Added: 
    llvm/test/CodeGen/AMDGPU/remove-not-short-exec-branch-on-unconditional-jump.mir

Modified: 
    llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp b/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp
index 701084844cd9b4..2bb70c138a50c4 100644
--- a/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp
+++ b/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp
@@ -362,6 +362,10 @@ bool SIPreEmitPeephole::mustRetainExeczBranch(
       if (MI.isConditionalBranch())
         return true;
 
+      if (MI.isUnconditionalBranch() &&
+          TII->getBranchDestBlock(MI) != MBB.getNextNode())
+        return true;
+
       if (MI.isMetaInstruction())
         continue;
 

diff  --git a/llvm/test/CodeGen/AMDGPU/remove-not-short-exec-branch-on-unconditional-jump.mir b/llvm/test/CodeGen/AMDGPU/remove-not-short-exec-branch-on-unconditional-jump.mir
new file mode 100644
index 00000000000000..f45f48434fa23a
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/remove-not-short-exec-branch-on-unconditional-jump.mir
@@ -0,0 +1,90 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
+# RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64 -run-pass=si-pre-emit-peephole %s -o - | FileCheck %s
+# Do no remove S_CBRANCH_EXECZ if the following block contains an unconditional
+# branch to a block other than the one immediately following it.
+
+---
+name:            test
+body:             |
+  ; CHECK-LABEL: name: test
+  ; CHECK: bb.0:
+  ; CHECK-NEXT:   successors: %bb.5(0x40000000), %bb.1(0x40000000)
+  ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   $sgpr0_sgpr1 = S_MOV_B64 $exec
+  ; CHECK-NEXT:   V_CMPX_EQ_U32_nosdst_e32 0, killed $vgpr0, implicit-def $exec, implicit $exec
+  ; CHECK-NEXT:   S_CBRANCH_EXECZ %bb.5, implicit $exec
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.1:
+  ; CHECK-NEXT:   successors: %bb.2(0x80000000)
+  ; CHECK-NEXT:   liveins: $vgpr1, $sgpr0_sgpr1
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   renamable $sgpr2_sgpr3 = IMPLICIT_DEF
+  ; CHECK-NEXT:   renamable $sgpr4_sgpr5 = IMPLICIT_DEF
+  ; CHECK-NEXT:   S_BRANCH %bb.2
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.2:
+  ; CHECK-NEXT:   successors: %bb.4(0x40000000), %bb.3(0x40000000)
+  ; CHECK-NEXT:   liveins: $vgpr1, $sgpr0_sgpr1, $sgpr2_sgpr3, $sgpr4_sgpr5
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   $exec = S_OR_B64 $exec, killed renamable $sgpr4_sgpr5, implicit-def $scc
+  ; CHECK-NEXT:   S_CBRANCH_EXECZ %bb.4, implicit $exec
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.3:
+  ; CHECK-NEXT:   successors: %bb.1(0x80000000)
+  ; CHECK-NEXT:   liveins: $vgpr1, $sgpr0_sgpr1, $sgpr2_sgpr3
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   renamable $sgpr4_sgpr5 = IMPLICIT_DEF
+  ; CHECK-NEXT:   S_BRANCH %bb.1
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.4:
+  ; CHECK-NEXT:   successors: %bb.5(0x80000000)
+  ; CHECK-NEXT:   liveins: $vgpr1, $sgpr0_sgpr1, $sgpr2_sgpr3
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   $exec = S_OR_B64 $exec, killed renamable $sgpr2_sgpr3, implicit-def $scc
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.5:
+  ; CHECK-NEXT:   liveins: $vgpr1, $sgpr0_sgpr1
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   $exec = S_OR_B64 $exec, killed renamable $sgpr0_sgpr1, implicit-def $scc
+  ; CHECK-NEXT:   renamable $vgpr0 = V_CVT_F32_U32_e32 killed $vgpr1, implicit $mode, implicit $exec
+  ; CHECK-NEXT:   SI_RETURN_TO_EPILOG killed $vgpr0
+  bb.0:
+    liveins: $vgpr0, $vgpr1
+
+    $sgpr0_sgpr1 = S_MOV_B64 $exec
+    V_CMPX_EQ_U32_nosdst_e32 0, killed $vgpr0, implicit-def $exec, implicit $exec
+    S_CBRANCH_EXECZ %bb.5, implicit $exec
+
+  bb.1:
+    liveins: $vgpr1, $sgpr0_sgpr1
+
+    renamable $sgpr2_sgpr3 = IMPLICIT_DEF
+    renamable $sgpr4_sgpr5 = IMPLICIT_DEF
+    S_BRANCH %bb.2
+
+  bb.2:
+    liveins: $vgpr1, $sgpr0_sgpr1, $sgpr2_sgpr3, $sgpr4_sgpr5
+
+    $exec = S_OR_B64 $exec, killed renamable $sgpr4_sgpr5, implicit-def $scc
+    S_CBRANCH_EXECZ %bb.4, implicit $exec
+
+  bb.3:
+    liveins: $vgpr1, $sgpr0_sgpr1, $sgpr2_sgpr3
+
+    renamable $sgpr4_sgpr5 = IMPLICIT_DEF
+    S_BRANCH %bb.1
+
+  bb.4:
+    liveins: $vgpr1, $sgpr0_sgpr1, $sgpr2_sgpr3
+
+    $exec = S_OR_B64 $exec, killed renamable $sgpr2_sgpr3, implicit-def $scc
+
+  bb.5:
+    liveins: $vgpr1, $sgpr0_sgpr1
+
+    $exec = S_OR_B64 $exec, killed renamable $sgpr0_sgpr1, implicit-def $scc
+    renamable $vgpr0 = V_CVT_F32_U32_e32 killed $vgpr1, implicit $mode, implicit $exec
+    SI_RETURN_TO_EPILOG killed $vgpr0
+
+...


        


More information about the llvm-commits mailing list