[llvm] [StructurizeCFG] nested-if zerocost hoist bugfix (PR #155408)

Vigneshwar Jayakumar via llvm-commits llvm-commits at lists.llvm.org
Wed Aug 27 15:49:43 PDT 2025


https://github.com/VigneshwarJ updated https://github.com/llvm/llvm-project/pull/155408

>From 36559d1aeb76310425e63e65e073fa68af933f54 Mon Sep 17 00:00:00 2001
From: vigneshwar jayakumar <vigneshwar.jayakumar at amd.com>
Date: Tue, 26 Aug 2025 07:50:21 -0500
Subject: [PATCH 1/2] [StructurizeCFG] nested-if zerocost hoist bugfix

When zero cost instructions are hoisted, the simplifyHoistedPhi function
was setting incoming phi values which were not dominating the use
causing runtime failure. This was set to poison by rebuildSSA function.
This commit fixes the issue.
---
 llvm/lib/Transforms/Scalar/StructurizeCFG.cpp |  3 +-
 llvm/test/CodeGen/AMDGPU/structurize-hoist.ll | 53 +++++++++++++++++++
 .../StructurizeCFG/hoist-zerocost.ll          | 50 +++++++++++++++++
 3 files changed, 105 insertions(+), 1 deletion(-)

diff --git a/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp b/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp
index bb7dbc2980f59..e05625344ee29 100644
--- a/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp
+++ b/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp
@@ -997,7 +997,8 @@ void StructurizeCFG::simplifyHoistedPhis() {
         continue;
 
       OtherPhi->setIncomingValue(PoisonValBBIdx, V);
-      Phi->setIncomingValue(i, OtherV);
+      if (DT->dominates(OtherV, Phi))
+        Phi->setIncomingValue(i, OtherV);
     }
   }
 }
diff --git a/llvm/test/CodeGen/AMDGPU/structurize-hoist.ll b/llvm/test/CodeGen/AMDGPU/structurize-hoist.ll
index 42436a1b4c279..9bfb0a6ed408f 100644
--- a/llvm/test/CodeGen/AMDGPU/structurize-hoist.ll
+++ b/llvm/test/CodeGen/AMDGPU/structurize-hoist.ll
@@ -178,3 +178,56 @@ latch:
 end:
   ret void
 }
+
+define void @test_nested_if(ptr %ptr, i32 %val, i1 %cond) {
+; GFX900-LABEL: test_nested_if:
+; GFX900:       ; %bb.0: ; %entry
+; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT:    flat_load_dword v2, v[0:1]
+; GFX900-NEXT:    v_and_b32_e32 v3, 1, v3
+; GFX900-NEXT:    v_cmp_eq_u32_e64 s[4:5], 1, v3
+; GFX900-NEXT:    s_mov_b64 s[8:9], -1
+; GFX900-NEXT:    s_xor_b64 s[10:11], s[4:5], -1
+; GFX900-NEXT:    s_and_saveexec_b64 s[6:7], s[10:11]
+; GFX900-NEXT:    s_cbranch_execz .LBB3_4
+; GFX900-NEXT:  ; %bb.1: ; %then
+; GFX900-NEXT:    s_and_saveexec_b64 s[12:13], s[10:11]
+; GFX900-NEXT:    s_cbranch_execz .LBB3_3
+; GFX900-NEXT:  ; %bb.2: ; %then_2
+; GFX900-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX900-NEXT:    flat_load_dword v2, v[0:1]
+; GFX900-NEXT:    s_xor_b64 s[8:9], exec, -1
+; GFX900-NEXT:  .LBB3_3: ; %Flow1
+; GFX900-NEXT:    s_or_b64 exec, exec, s[12:13]
+; GFX900-NEXT:    s_andn2_b64 s[4:5], s[4:5], exec
+; GFX900-NEXT:    s_and_b64 s[8:9], s[8:9], exec
+; GFX900-NEXT:    s_or_b64 s[4:5], s[4:5], s[8:9]
+; GFX900-NEXT:  .LBB3_4: ; %Flow
+; GFX900-NEXT:    s_or_b64 exec, exec, s[6:7]
+; GFX900-NEXT:    s_and_saveexec_b64 s[6:7], s[4:5]
+; GFX900-NEXT:    s_or_b64 exec, exec, s[6:7]
+; GFX900-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX900-NEXT:    flat_store_dword v[0:1], v2
+; GFX900-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX900-NEXT:    s_setpc_b64 s[30:31]
+entry:
+  %load_then = load %pair, ptr %ptr
+  br i1 %cond, label %else, label %then
+
+then:
+  %a16 = icmp slt i32  %val, 255
+  br i1 %cond, label %else, label %then_2
+
+then_2:
+  %loaded = load i32, ptr  %ptr
+  br label %merge
+
+else:
+  %a_else = extractvalue %pair %load_then, 0
+  br label %merge
+
+merge:
+  %phi = phi i32  [ %loaded, %then_2 ], [ %a_else, %else ]
+  store i32 %phi, ptr  %ptr
+  ret void
+}
diff --git a/llvm/test/Transforms/StructurizeCFG/hoist-zerocost.ll b/llvm/test/Transforms/StructurizeCFG/hoist-zerocost.ll
index 10d4fa2be0a70..d084e199ceb89 100644
--- a/llvm/test/Transforms/StructurizeCFG/hoist-zerocost.ll
+++ b/llvm/test/Transforms/StructurizeCFG/hoist-zerocost.ll
@@ -159,3 +159,53 @@ latch:
 end:
   ret void
 }
+
+define void @test_nested_if(ptr %ptr, i32 %val, i1 %cond) {
+; CHECK-LABEL: define void @test_nested_if(
+; CHECK-SAME: ptr [[PTR:%.*]], i32 [[VAL:%.*]], i1 [[COND:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    [[COND_INV:%.*]] = xor i1 [[COND]], true
+; CHECK-NEXT:    [[LOAD_THEN:%.*]] = load [[PAIR:%.*]], ptr [[PTR]], align 4
+; CHECK-NEXT:    [[A_ELSE:%.*]] = extractvalue [[PAIR]] [[LOAD_THEN]], 0
+; CHECK-NEXT:    br i1 [[COND_INV]], label %[[THEN:.*]], label %[[FLOW:.*]]
+; CHECK:       [[THEN]]:
+; CHECK-NEXT:    [[A16:%.*]] = icmp slt i32 [[VAL]], 255
+; CHECK-NEXT:    br i1 [[COND_INV]], label %[[THEN_2:.*]], label %[[FLOW1:.*]]
+; CHECK:       [[FLOW]]:
+; CHECK-NEXT:    [[TMP0:%.*]] = phi i32 [ [[TMP2:%.*]], %[[FLOW1]] ], [ [[A_ELSE]], %[[ENTRY]] ]
+; CHECK-NEXT:    [[TMP1:%.*]] = phi i1 [ [[TMP3:%.*]], %[[FLOW1]] ], [ [[COND]], %[[ENTRY]] ]
+; CHECK-NEXT:    br i1 [[TMP1]], label %[[ELSE:.*]], label %[[MERGE:.*]]
+; CHECK:       [[THEN_2]]:
+; CHECK-NEXT:    [[LOADED:%.*]] = load i32, ptr [[PTR]], align 4
+; CHECK-NEXT:    br label %[[FLOW1]]
+; CHECK:       [[FLOW1]]:
+; CHECK-NEXT:    [[TMP2]] = phi i32 [ [[LOADED]], %[[THEN_2]] ], [ [[A_ELSE]], %[[THEN]] ]
+; CHECK-NEXT:    [[TMP3]] = phi i1 [ false, %[[THEN_2]] ], [ true, %[[THEN]] ]
+; CHECK-NEXT:    br label %[[FLOW]]
+; CHECK:       [[ELSE]]:
+; CHECK-NEXT:    br label %[[MERGE]]
+; CHECK:       [[MERGE]]:
+; CHECK-NEXT:    store i32 [[TMP0]], ptr [[PTR]], align 4
+; CHECK-NEXT:    ret void
+;
+entry:
+  %load_then = load %pair, ptr %ptr
+  br i1 %cond, label %else, label %then
+
+then:
+  %a16 = icmp slt i32  %val, 255
+  br i1 %cond, label %else, label %then_2
+
+then_2:
+  %loaded = load i32, ptr  %ptr
+  br label %merge
+
+else:
+  %a_else = extractvalue %pair %load_then, 0
+  br label %merge
+
+merge:
+  %phi = phi i32  [ %loaded, %then_2 ], [ %a_else, %else ]
+  store i32 %phi, ptr  %ptr
+  ret void
+}

>From e6198963b2114f9ccda955b5a04c68be2b57ccf9 Mon Sep 17 00:00:00 2001
From: vigneshwar jayakumar <vigneshwar.jayakumar at amd.com>
Date: Wed, 27 Aug 2025 17:49:01 -0500
Subject: [PATCH 2/2] update tests

---
 llvm/test/CodeGen/AMDGPU/structurize-hoist.ll | 90 +++++++++++++------
 1 file changed, 64 insertions(+), 26 deletions(-)

diff --git a/llvm/test/CodeGen/AMDGPU/structurize-hoist.ll b/llvm/test/CodeGen/AMDGPU/structurize-hoist.ll
index 9bfb0a6ed408f..b4036517cc0d5 100644
--- a/llvm/test/CodeGen/AMDGPU/structurize-hoist.ll
+++ b/llvm/test/CodeGen/AMDGPU/structurize-hoist.ll
@@ -183,51 +183,89 @@ define void @test_nested_if(ptr %ptr, i32 %val, i1 %cond) {
 ; GFX900-LABEL: test_nested_if:
 ; GFX900:       ; %bb.0: ; %entry
 ; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT:    flat_load_dword v2, v[0:1]
+; GFX900-NEXT:    flat_load_dword v4, v[0:1]
 ; GFX900-NEXT:    v_and_b32_e32 v3, 1, v3
-; GFX900-NEXT:    v_cmp_eq_u32_e64 s[4:5], 1, v3
-; GFX900-NEXT:    s_mov_b64 s[8:9], -1
-; GFX900-NEXT:    s_xor_b64 s[10:11], s[4:5], -1
-; GFX900-NEXT:    s_and_saveexec_b64 s[6:7], s[10:11]
+; GFX900-NEXT:    v_cmp_eq_u32_e64 s[6:7], 1, v3
+; GFX900-NEXT:    s_mov_b64 s[10:11], -1
+; GFX900-NEXT:    s_xor_b64 s[4:5], s[6:7], -1
+; GFX900-NEXT:    s_mov_b64 s[12:13], s[6:7]
+; GFX900-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX900-NEXT:    v_mov_b32_e32 v3, v4
+; GFX900-NEXT:    s_and_saveexec_b64 s[8:9], s[4:5]
 ; GFX900-NEXT:    s_cbranch_execz .LBB3_4
-; GFX900-NEXT:  ; %bb.1: ; %then
-; GFX900-NEXT:    s_and_saveexec_b64 s[12:13], s[10:11]
+; GFX900-NEXT:  ; %bb.1: ; %if
+; GFX900-NEXT:    s_and_saveexec_b64 s[12:13], s[4:5]
 ; GFX900-NEXT:    s_cbranch_execz .LBB3_3
-; GFX900-NEXT:  ; %bb.2: ; %then_2
-; GFX900-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX900-NEXT:    flat_load_dword v2, v[0:1]
-; GFX900-NEXT:    s_xor_b64 s[8:9], exec, -1
-; GFX900-NEXT:  .LBB3_3: ; %Flow1
+; GFX900-NEXT:  ; %bb.2: ; %if_2
+; GFX900-NEXT:    flat_load_dword v3, v[0:1]
+; GFX900-NEXT:    s_xor_b64 s[10:11], exec, -1
+; GFX900-NEXT:  .LBB3_3: ; %Flow3
 ; GFX900-NEXT:    s_or_b64 exec, exec, s[12:13]
+; GFX900-NEXT:    s_andn2_b64 s[12:13], s[6:7], exec
+; GFX900-NEXT:    s_and_b64 s[10:11], s[10:11], exec
+; GFX900-NEXT:    s_or_b64 s[12:13], s[12:13], s[10:11]
+; GFX900-NEXT:  .LBB3_4: ; %Flow2
+; GFX900-NEXT:    s_or_b64 exec, exec, s[8:9]
+; GFX900-NEXT:    s_and_saveexec_b64 s[8:9], s[12:13]
+; GFX900-NEXT:    s_or_b64 exec, exec, s[8:9]
+; GFX900-NEXT:    s_and_saveexec_b64 s[8:9], s[6:7]
+; GFX900-NEXT:    s_cbranch_execz .LBB3_8
+; GFX900-NEXT:  ; %bb.5: ; %if_3
+; GFX900-NEXT:    s_movk_i32 s6, 0xfe
+; GFX900-NEXT:    v_cmp_lt_i32_e32 vcc, s6, v2
+; GFX900-NEXT:    s_mov_b64 s[6:7], -1
+; GFX900-NEXT:    s_and_saveexec_b64 s[10:11], vcc
+; GFX900-NEXT:    s_cbranch_execz .LBB3_7
+; GFX900-NEXT:  ; %bb.6: ; %if_4
+; GFX900-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX900-NEXT:    v_add_u32_e32 v4, 1, v3
+; GFX900-NEXT:    s_xor_b64 s[6:7], exec, -1
+; GFX900-NEXT:  .LBB3_7: ; %Flow1
+; GFX900-NEXT:    s_or_b64 exec, exec, s[10:11]
 ; GFX900-NEXT:    s_andn2_b64 s[4:5], s[4:5], exec
-; GFX900-NEXT:    s_and_b64 s[8:9], s[8:9], exec
-; GFX900-NEXT:    s_or_b64 s[4:5], s[4:5], s[8:9]
-; GFX900-NEXT:  .LBB3_4: ; %Flow
-; GFX900-NEXT:    s_or_b64 exec, exec, s[6:7]
+; GFX900-NEXT:    s_and_b64 s[6:7], s[6:7], exec
+; GFX900-NEXT:    s_or_b64 s[4:5], s[4:5], s[6:7]
+; GFX900-NEXT:  .LBB3_8: ; %Flow
+; GFX900-NEXT:    s_or_b64 exec, exec, s[8:9]
 ; GFX900-NEXT:    s_and_saveexec_b64 s[6:7], s[4:5]
 ; GFX900-NEXT:    s_or_b64 exec, exec, s[6:7]
-; GFX900-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX900-NEXT:    flat_store_dword v[0:1], v2
+; GFX900-NEXT:    flat_store_dword v[0:1], v4
 ; GFX900-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GFX900-NEXT:    s_setpc_b64 s[30:31]
 entry:
-  %load_then = load %pair, ptr %ptr
-  br i1 %cond, label %else, label %then
+  %load = load %pair, ptr %ptr
+  br i1 %cond, label %else, label %if
 
-then:
+if:
   %a16 = icmp slt i32  %val, 255
-  br i1 %cond, label %else, label %then_2
+  br i1 %cond, label %else, label %if_2
 
-then_2:
+if_2:
   %loaded = load i32, ptr  %ptr
   br label %merge
 
 else:
-  %a_else = extractvalue %pair %load_then, 0
+  %a_else = extractvalue %pair %load, 0
   br label %merge
 
 merge:
-  %phi = phi i32  [ %loaded, %then_2 ], [ %a_else, %else ]
-  store i32 %phi, ptr  %ptr
+  %phi = phi i32  [ %loaded, %if_2 ], [ %a_else, %else ]
+  br i1 %cond, label %if_3, label %else_2
+
+if_3:
+  %a17 = icmp slt i32  %val, 255
+  br i1 %a17, label %else_2, label %if_4
+
+if_4:
+  %sum_load = add i32 %phi, 1
+  br label %merge_2
+
+else_2:
+  %a_else_2 = extractvalue %pair %load, 0
+  br label %merge_2
+
+merge_2:
+  %phi_2 = phi i32  [ %sum_load, %if_4 ], [ %a_else_2, %else_2 ]
+  store i32 %phi_2, ptr  %ptr
   ret void
 }



More information about the llvm-commits mailing list