[llvm] [AMDGPU][SIPreEmitPeephole] Fix mustRetainExeczBranch (PR #120121)
Mirko BrkuĊĦanin via llvm-commits
llvm-commits at lists.llvm.org
Mon Dec 16 10:06:53 PST 2024
https://github.com/mbrkusanin created https://github.com/llvm/llvm-project/pull/120121
Do not remove S_CBRANCH_EXECZ if one of the following blocks contains an
unconditional branch to a block other than the one immediately following it.
This can cause unwanted behavior like infinite loops.
>From e4b173f9edba8b2797b6a8c2bd5dcf942cf0b655 Mon Sep 17 00:00:00 2001
From: Mirko Brkusanin <Mirko.Brkusanin at amd.com>
Date: Mon, 16 Dec 2024 18:56:29 +0100
Subject: [PATCH 1/2] [AMDGPU][SIPreEmitPeephole] Precommit test
---
...hort-exec-branch-on-unconditional-jump.mir | 89 +++++++++++++++++++
1 file changed, 89 insertions(+)
create mode 100644 llvm/test/CodeGen/AMDGPU/remove-not-short-exec-branch-on-unconditional-jump.mir
diff --git a/llvm/test/CodeGen/AMDGPU/remove-not-short-exec-branch-on-unconditional-jump.mir b/llvm/test/CodeGen/AMDGPU/remove-not-short-exec-branch-on-unconditional-jump.mir
new file mode 100644
index 00000000000000..7699a51dd0673b
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/remove-not-short-exec-branch-on-unconditional-jump.mir
@@ -0,0 +1,89 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
+# RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64 -run-pass=si-pre-emit-peephole %s -o - | FileCheck %s
+# Do no remove S_CBRANCH_EXECZ if the following block contains an unconditional
+# branch to a block other than the one immediately following it.
+
+---
+name: test
+body: |
+ ; CHECK-LABEL: name: test
+ ; CHECK: bb.0:
+ ; CHECK-NEXT: successors: %bb.5(0x40000000), %bb.1(0x40000000)
+ ; CHECK-NEXT: liveins: $vgpr0, $vgpr1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec
+ ; CHECK-NEXT: V_CMPX_EQ_U32_nosdst_e32 0, killed $vgpr0, implicit-def $exec, implicit $exec
+ ; CHECK-NEXT: S_CBRANCH_EXECZ %bb.5, implicit $exec
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: successors: %bb.2(0x80000000)
+ ; CHECK-NEXT: liveins: $vgpr1, $sgpr0_sgpr1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: renamable $sgpr2_sgpr3 = IMPLICIT_DEF
+ ; CHECK-NEXT: renamable $sgpr4_sgpr5 = IMPLICIT_DEF
+ ; CHECK-NEXT: S_BRANCH %bb.2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2:
+ ; CHECK-NEXT: successors: %bb.3(0x40000000)
+ ; CHECK-NEXT: liveins: $vgpr1, $sgpr0_sgpr1, $sgpr2_sgpr3, $sgpr4_sgpr5
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: $exec = S_OR_B64 $exec, killed renamable $sgpr4_sgpr5, implicit-def $scc
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.3:
+ ; CHECK-NEXT: successors: %bb.1(0x80000000)
+ ; CHECK-NEXT: liveins: $vgpr1, $sgpr0_sgpr1, $sgpr2_sgpr3
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: renamable $sgpr4_sgpr5 = IMPLICIT_DEF
+ ; CHECK-NEXT: S_BRANCH %bb.1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.4:
+ ; CHECK-NEXT: successors: %bb.5(0x80000000)
+ ; CHECK-NEXT: liveins: $vgpr1, $sgpr0_sgpr1, $sgpr2_sgpr3
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: $exec = S_OR_B64 $exec, killed renamable $sgpr2_sgpr3, implicit-def $scc
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.5:
+ ; CHECK-NEXT: liveins: $vgpr1, $sgpr0_sgpr1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: $exec = S_OR_B64 $exec, killed renamable $sgpr0_sgpr1, implicit-def $scc
+ ; CHECK-NEXT: renamable $vgpr0 = V_CVT_F32_U32_e32 killed $vgpr1, implicit $mode, implicit $exec
+ ; CHECK-NEXT: SI_RETURN_TO_EPILOG killed $vgpr0
+ bb.0:
+ liveins: $vgpr0, $vgpr1
+
+ $sgpr0_sgpr1 = S_MOV_B64 $exec
+ V_CMPX_EQ_U32_nosdst_e32 0, killed $vgpr0, implicit-def $exec, implicit $exec
+ S_CBRANCH_EXECZ %bb.5, implicit $exec
+
+ bb.1:
+ liveins: $vgpr1, $sgpr0_sgpr1
+
+ renamable $sgpr2_sgpr3 = IMPLICIT_DEF
+ renamable $sgpr4_sgpr5 = IMPLICIT_DEF
+ S_BRANCH %bb.2
+
+ bb.2:
+ liveins: $vgpr1, $sgpr0_sgpr1, $sgpr2_sgpr3, $sgpr4_sgpr5
+
+ $exec = S_OR_B64 $exec, killed renamable $sgpr4_sgpr5, implicit-def $scc
+ S_CBRANCH_EXECZ %bb.4, implicit $exec
+
+ bb.3:
+ liveins: $vgpr1, $sgpr0_sgpr1, $sgpr2_sgpr3
+
+ renamable $sgpr4_sgpr5 = IMPLICIT_DEF
+ S_BRANCH %bb.1
+
+ bb.4:
+ liveins: $vgpr1, $sgpr0_sgpr1, $sgpr2_sgpr3
+
+ $exec = S_OR_B64 $exec, killed renamable $sgpr2_sgpr3, implicit-def $scc
+
+ bb.5:
+ liveins: $vgpr1, $sgpr0_sgpr1
+
+ $exec = S_OR_B64 $exec, killed renamable $sgpr0_sgpr1, implicit-def $scc
+ renamable $vgpr0 = V_CVT_F32_U32_e32 killed $vgpr1, implicit $mode, implicit $exec
+ SI_RETURN_TO_EPILOG killed $vgpr0
+
+...
>From 3e6084e04597c117e4d539c5beb80ec12fd894c0 Mon Sep 17 00:00:00 2001
From: Mirko Brkusanin <Mirko.Brkusanin at amd.com>
Date: Mon, 16 Dec 2024 18:57:00 +0100
Subject: [PATCH 2/2] [AMDGPU][SIPreEmitPeephole] Fix mustRetainExeczBranch
---
llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp | 4 ++++
.../remove-not-short-exec-branch-on-unconditional-jump.mir | 3 ++-
2 files changed, 6 insertions(+), 1 deletion(-)
diff --git a/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp b/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp
index 701084844cd9b4..2bb70c138a50c4 100644
--- a/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp
+++ b/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp
@@ -362,6 +362,10 @@ bool SIPreEmitPeephole::mustRetainExeczBranch(
if (MI.isConditionalBranch())
return true;
+ if (MI.isUnconditionalBranch() &&
+ TII->getBranchDestBlock(MI) != MBB.getNextNode())
+ return true;
+
if (MI.isMetaInstruction())
continue;
diff --git a/llvm/test/CodeGen/AMDGPU/remove-not-short-exec-branch-on-unconditional-jump.mir b/llvm/test/CodeGen/AMDGPU/remove-not-short-exec-branch-on-unconditional-jump.mir
index 7699a51dd0673b..f45f48434fa23a 100644
--- a/llvm/test/CodeGen/AMDGPU/remove-not-short-exec-branch-on-unconditional-jump.mir
+++ b/llvm/test/CodeGen/AMDGPU/remove-not-short-exec-branch-on-unconditional-jump.mir
@@ -24,10 +24,11 @@ body: |
; CHECK-NEXT: S_BRANCH %bb.2
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.2:
- ; CHECK-NEXT: successors: %bb.3(0x40000000)
+ ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.3(0x40000000)
; CHECK-NEXT: liveins: $vgpr1, $sgpr0_sgpr1, $sgpr2_sgpr3, $sgpr4_sgpr5
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: $exec = S_OR_B64 $exec, killed renamable $sgpr4_sgpr5, implicit-def $scc
+ ; CHECK-NEXT: S_CBRANCH_EXECZ %bb.4, implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.3:
; CHECK-NEXT: successors: %bb.1(0x80000000)
More information about the llvm-commits
mailing list