[llvm] [WIP][Don't merge] (PR #186127)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Mar 12 07:37:49 PDT 2026
https://github.com/ambergorzynski created https://github.com/llvm/llvm-project/pull/186127
[This line](https://github.com/ambergorzynski/llvm-project/blob/main/llvm/lib/Target/AMDGPU/SILowerI1Copies.cpp#L646) is untested by the existing LLVM test suite (checked using code coverage and by inserting an `abort`).
We propose a new test that exercises this case. The test is demonstrated by adding an abort to show that it is the only test that fails (the abort is removed before merging).
>From b7ea84dad73c68ac6771f64fd1d1faeaf952ed58 Mon Sep 17 00:00:00 2001
From: agorzyns <amber.gorzynski at amd.com>
Date: Thu, 12 Mar 2026 09:32:57 -0500
Subject: [PATCH 1/2] [NFC][AMDGPU] Abort to demonstrate test
---
llvm/lib/Target/AMDGPU/SILowerI1Copies.cpp | 1 +
1 file changed, 1 insertion(+)
diff --git a/llvm/lib/Target/AMDGPU/SILowerI1Copies.cpp b/llvm/lib/Target/AMDGPU/SILowerI1Copies.cpp
index 0b8c71a4a2453..c264dcbbd1a36 100644
--- a/llvm/lib/Target/AMDGPU/SILowerI1Copies.cpp
+++ b/llvm/lib/Target/AMDGPU/SILowerI1Copies.cpp
@@ -643,6 +643,7 @@ bool Vreg1LoweringHelper::lowerCopiesToI1() {
PDT->findNearestCommonDominator(DomBlocks);
unsigned FoundLoopLevel = LF.findLoop(PostDomBound);
if (FoundLoopLevel) {
+ abort();
SSAUpdater.Initialize(DstReg);
SSAUpdater.AddAvailableValue(&MBB, DstReg);
LF.addLoopEntries(FoundLoopLevel, SSAUpdater, *MRI, LaneMaskRegAttrs);
>From 8d51397f1a3fd8d124187a08f11591b07dc23432 Mon Sep 17 00:00:00 2001
From: agorzyns <amber.gorzynski at amd.com>
Date: Thu, 12 Mar 2026 09:33:34 -0500
Subject: [PATCH 2/2] [NFC][AMDGPU] New test for untested case in
SILowerI1Copies
---
.../test/CodeGen/AMDGPU/si-lower-i1-copies.ll | 149 ++++++++++++++++++
1 file changed, 149 insertions(+)
create mode 100644 llvm/test/CodeGen/AMDGPU/si-lower-i1-copies.ll
diff --git a/llvm/test/CodeGen/AMDGPU/si-lower-i1-copies.ll b/llvm/test/CodeGen/AMDGPU/si-lower-i1-copies.ll
new file mode 100644
index 0000000000000..afc2d830e1743
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/si-lower-i1-copies.ll
@@ -0,0 +1,149 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -O1 < %s | FileCheck %s
+
+declare void @bar(ptr captures(none)) #0
+
+define void @foo(i32 %0) #0 {
+; CHECK-LABEL: foo:
+; CHECK: ; %bb.0: ; %entry
+; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT: s_mov_b32 s16, s33
+; CHECK-NEXT: s_mov_b32 s33, s32
+; CHECK-NEXT: s_or_saveexec_b64 s[18:19], -1
+; CHECK-NEXT: buffer_store_dword v42, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill
+; CHECK-NEXT: s_mov_b64 exec, s[18:19]
+; CHECK-NEXT: v_writelane_b32 v42, s16, 20
+; CHECK-NEXT: s_addk_i32 s32, 0x400
+; CHECK-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
+; CHECK-NEXT: buffer_store_dword v41, off, s[0:3], s33 ; 4-byte Folded Spill
+; CHECK-NEXT: v_writelane_b32 v42, s30, 0
+; CHECK-NEXT: v_writelane_b32 v42, s31, 1
+; CHECK-NEXT: v_writelane_b32 v42, s34, 2
+; CHECK-NEXT: v_writelane_b32 v42, s35, 3
+; CHECK-NEXT: v_writelane_b32 v42, s36, 4
+; CHECK-NEXT: v_writelane_b32 v42, s37, 5
+; CHECK-NEXT: v_writelane_b32 v42, s38, 6
+; CHECK-NEXT: v_writelane_b32 v42, s39, 7
+; CHECK-NEXT: v_writelane_b32 v42, s48, 8
+; CHECK-NEXT: v_writelane_b32 v42, s49, 9
+; CHECK-NEXT: v_writelane_b32 v42, s50, 10
+; CHECK-NEXT: v_writelane_b32 v42, s51, 11
+; CHECK-NEXT: v_writelane_b32 v42, s52, 12
+; CHECK-NEXT: v_writelane_b32 v42, s53, 13
+; CHECK-NEXT: v_writelane_b32 v42, s54, 14
+; CHECK-NEXT: v_writelane_b32 v42, s55, 15
+; CHECK-NEXT: v_writelane_b32 v42, s64, 16
+; CHECK-NEXT: v_writelane_b32 v42, s65, 17
+; CHECK-NEXT: v_writelane_b32 v42, s66, 18
+; CHECK-NEXT: v_writelane_b32 v42, s67, 19
+; CHECK-NEXT: v_mov_b32_e32 v40, v31
+; CHECK-NEXT: v_mov_b32_e32 v41, v0
+; CHECK-NEXT: s_mov_b32 s52, s15
+; CHECK-NEXT: s_mov_b32 s53, s14
+; CHECK-NEXT: s_mov_b32 s54, s13
+; CHECK-NEXT: s_mov_b32 s55, s12
+; CHECK-NEXT: s_mov_b64 s[34:35], s[10:11]
+; CHECK-NEXT: s_mov_b64 s[36:37], s[8:9]
+; CHECK-NEXT: s_mov_b64 s[38:39], s[6:7]
+; CHECK-NEXT: s_mov_b64 s[48:49], s[4:5]
+; CHECK-NEXT: s_mov_b64 s[64:65], 0
+; CHECK-NEXT: v_cmp_gt_i32_e64 s[50:51], 0, v41
+; CHECK-NEXT: ; implicit-def: $sgpr4_sgpr5
+; CHECK-NEXT: s_mov_b64 s[6:7], 0
+; CHECK-NEXT: s_branch .LBB0_2
+; CHECK-NEXT: .LBB0_1: ; %Flow
+; CHECK-NEXT: ; in Loop: Header=BB0_2 Depth=1
+; CHECK-NEXT: s_or_b64 exec, exec, s[66:67]
+; CHECK-NEXT: s_and_b64 s[4:5], exec, s[6:7]
+; CHECK-NEXT: s_or_b64 s[64:65], s[4:5], s[64:65]
+; CHECK-NEXT: ; implicit-def: $sgpr4_sgpr5
+; CHECK-NEXT: s_mov_b64 s[6:7], 0
+; CHECK-NEXT: s_andn2_b64 exec, exec, s[64:65]
+; CHECK-NEXT: s_cbranch_execz .LBB0_5
+; CHECK-NEXT: .LBB0_2: ; %for.cond
+; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
+; CHECK-NEXT: v_cmp_lt_i32_e32 vcc, -1, v41
+; CHECK-NEXT: s_andn2_b64 s[4:5], s[4:5], exec
+; CHECK-NEXT: s_and_b64 s[8:9], s[50:51], exec
+; CHECK-NEXT: s_or_b64 s[4:5], s[4:5], s[8:9]
+; CHECK-NEXT: s_or_b64 s[6:7], vcc, s[6:7]
+; CHECK-NEXT: s_andn2_b64 exec, exec, s[6:7]
+; CHECK-NEXT: s_cbranch_execnz .LBB0_2
+; CHECK-NEXT: ; %bb.3: ; %BB
+; CHECK-NEXT: ; in Loop: Header=BB0_2 Depth=1
+; CHECK-NEXT: s_or_b64 exec, exec, s[6:7]
+; CHECK-NEXT: s_mov_b64 s[6:7], -1
+; CHECK-NEXT: s_and_saveexec_b64 s[66:67], s[4:5]
+; CHECK-NEXT: s_cbranch_execz .LBB0_1
+; CHECK-NEXT: ; %bb.4: ; %for.body
+; CHECK-NEXT: ; in Loop: Header=BB0_2 Depth=1
+; CHECK-NEXT: s_getpc_b64 s[4:5]
+; CHECK-NEXT: s_add_u32 s4, s4, bar at gotpcrel32@lo+4
+; CHECK-NEXT: s_addc_u32 s5, s5, bar at gotpcrel32@hi+12
+; CHECK-NEXT: s_load_dwordx2 s[16:17], s[4:5], 0x0
+; CHECK-NEXT: v_mov_b32_e32 v1, 0
+; CHECK-NEXT: s_mov_b64 s[4:5], s[48:49]
+; CHECK-NEXT: s_mov_b64 s[6:7], s[38:39]
+; CHECK-NEXT: s_mov_b64 s[8:9], s[36:37]
+; CHECK-NEXT: s_mov_b64 s[10:11], s[34:35]
+; CHECK-NEXT: s_mov_b32 s12, s55
+; CHECK-NEXT: s_mov_b32 s13, s54
+; CHECK-NEXT: s_mov_b32 s14, s53
+; CHECK-NEXT: s_mov_b32 s15, s52
+; CHECK-NEXT: v_mov_b32_e32 v31, v40
+; CHECK-NEXT: s_waitcnt lgkmcnt(0)
+; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17]
+; CHECK-NEXT: s_xor_b64 s[6:7], exec, -1
+; CHECK-NEXT: s_branch .LBB0_1
+; CHECK-NEXT: .LBB0_5: ; %for.cond.cleanup
+; CHECK-NEXT: s_or_b64 exec, exec, s[64:65]
+; CHECK-NEXT: v_readlane_b32 s67, v42, 19
+; CHECK-NEXT: v_readlane_b32 s66, v42, 18
+; CHECK-NEXT: v_readlane_b32 s65, v42, 17
+; CHECK-NEXT: v_readlane_b32 s64, v42, 16
+; CHECK-NEXT: v_readlane_b32 s55, v42, 15
+; CHECK-NEXT: v_readlane_b32 s54, v42, 14
+; CHECK-NEXT: v_readlane_b32 s53, v42, 13
+; CHECK-NEXT: v_readlane_b32 s52, v42, 12
+; CHECK-NEXT: v_readlane_b32 s51, v42, 11
+; CHECK-NEXT: v_readlane_b32 s50, v42, 10
+; CHECK-NEXT: v_readlane_b32 s49, v42, 9
+; CHECK-NEXT: v_readlane_b32 s48, v42, 8
+; CHECK-NEXT: v_readlane_b32 s39, v42, 7
+; CHECK-NEXT: v_readlane_b32 s38, v42, 6
+; CHECK-NEXT: v_readlane_b32 s37, v42, 5
+; CHECK-NEXT: v_readlane_b32 s36, v42, 4
+; CHECK-NEXT: v_readlane_b32 s35, v42, 3
+; CHECK-NEXT: v_readlane_b32 s34, v42, 2
+; CHECK-NEXT: v_readlane_b32 s31, v42, 1
+; CHECK-NEXT: v_readlane_b32 s30, v42, 0
+; CHECK-NEXT: buffer_load_dword v41, off, s[0:3], s33 ; 4-byte Folded Reload
+; CHECK-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
+; CHECK-NEXT: s_mov_b32 s32, s33
+; CHECK-NEXT: v_readlane_b32 s4, v42, 20
+; CHECK-NEXT: s_or_saveexec_b64 s[6:7], -1
+; CHECK-NEXT: buffer_load_dword v42, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload
+; CHECK-NEXT: s_mov_b64 exec, s[6:7]
+; CHECK-NEXT: s_mov_b32 s33, s4
+; CHECK-NEXT: s_waitcnt vmcnt(0)
+; CHECK-NEXT: s_setpc_b64 s[30:31]
+entry:
+ br label %for.cond
+
+for.cond: ; preds = %for.body, %for.cond, %entry
+ %cmp1 = icmp slt i32 %0, 0
+ br i1 %cmp1, label %for.cond, label %BB
+
+BB: ; preds = %for.cond
+ br i1 %cmp1, label %for.body, label %for.cond.cleanup
+
+for.cond.cleanup: ; preds = %BB
+ ret void
+
+for.body: ; preds = %BB
+ call void @bar(ptr poison)
+ br label %for.cond
+}
+
+attributes #0 = { nounwind }
+
More information about the llvm-commits
mailing list