[llvm] [AMDGPU] Allow sinking of free vector ops (PR #162580)

Matt Arsenault via llvm-commits llvm-commits at lists.llvm.org
Thu Oct 9 22:48:29 PDT 2025


================
@@ -0,0 +1,49 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -march=amdgcn -mcpu=gfx942 < %s | FileCheck -enable-var-scope --check-prefix=GCN %s
+
+define amdgpu_kernel void @runningSum(ptr addrspace(1) %out, i32 %inputElement0, i32 %inputElement1, i32 %inputIter) {
+; GCN-LABEL: runningSum:
+; GCN:       ; %bb.0: ; %bb.0
+; GCN-NEXT:    s_load_dwordx2 s[2:3], s[4:5], 0x30
+; GCN-NEXT:    s_waitcnt lgkmcnt(0)
+; GCN-NEXT:    s_mov_b32 s0, s2
+; GCN-NEXT:    s_mov_b32 s1, s2
+; GCN-NEXT:  .LBB0_1: ; %loopBody
+; GCN-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GCN-NEXT:    s_add_i32 s1, s2, s1
+; GCN-NEXT:    s_add_i32 s0, s2, s0
+; GCN-NEXT:    s_add_i32 s3, s3, -1
+; GCN-NEXT:    s_cmp_lg_u32 s3, 0
+; GCN-NEXT:    s_cbranch_scc1 .LBB0_1
+; GCN-NEXT:  ; %bb.2: ; %loopExit
+; GCN-NEXT:    s_load_dwordx2 s[2:3], s[4:5], 0x24
+; GCN-NEXT:    v_mov_b32_e32 v0, 0
+; GCN-NEXT:    v_mov_b64_e32 v[2:3], s[0:1]
+; GCN-NEXT:    s_waitcnt lgkmcnt(0)
+; GCN-NEXT:    global_store_dwordx2 v0, v[2:3], s[2:3]
+; GCN-NEXT:    s_endpgm
+bb.0:
+ br label %preheader
+
+preheader:
+  %vecElement0 = insertelement <2 x i32> poison, i32 %inputElement0, i64 0
+  %broadcast0 = shufflevector <2 x i32> %vecElement0, <2 x i32> poison, <2 x i32> zeroinitializer
+  %vecElement1 = insertelement <2 x i32> poison, i32 %inputElement1, i64 0
+  %broadcast1 = shufflevector <2 x i32> %vecElement1, <2 x i32> poison, <2 x i32> zeroinitializer
+  br label %loopBody
+
+loopBody:
+  %previousSum = phi <2 x i32> [ %broadcast1, %preheader ], [ %runningSum, %loopBody ]
+  %iterCount = phi i32 [ %inputIter, %preheader ], [ %itersLeft, %loopBody ]
+  %runningSum = add <2 x i32> %broadcast1, %previousSum
+  %itersLeft = sub i32 %iterCount, 1
+  %cond = icmp eq i32 %itersLeft, 0
+  br i1 %cond, label %loopExit, label %loopBody, !llvm.loop !0
----------------
arsenm wrote:

I don't think this one function is stressing all of the conditions tested here 

https://github.com/llvm/llvm-project/pull/162580


More information about the llvm-commits mailing list