<table border="1" cellspacing="0" cellpadding="8">
    <tr>
        <th>Issue</th>
        <td>
            <a href=https://github.com/llvm/llvm-project/issues/139317>139317</a>
        </td>
    </tr>

    <tr>
        <th>Summary</th>
        <td>
            [AMDGPU] incorrect operand folding for `llvm.stepvector` + packed ops
        </td>
    </tr>

    <tr>
      <th>Labels</th>
      <td>
            new issue
      </td>
    </tr>

    <tr>
      <th>Assignees</th>
      <td>
      </td>
    </tr>

    <tr>
      <th>Reporter</th>
      <td>
          raiseirql
      </td>
    </tr>
</table>

<pre>
    The below llvm code produces incorrect amdgpu code. In particular, see this sequence. The code is loading a float16x8, adding [0..7] and storing the result back out. The third `v_pk_add_f16` has had the constant [2.0, 3.0] removed. The error is introduced inside `si-fold-operands`. I was able to reproduce with `7babf22461deb846827859de2e472a062815095b`.

Let me know if I can provide any more details.

```assembly
        v_pk_add_f16 v7, v7, s8
 v_pk_add_f16 v6, v6, s9
        v_pk_add_f16 v5, v5, 0 <<<<=== not correct
 v_pk_add_f16 v4, v4, 1.0 op_sel:[0,1] op_sel_hi:[1,0]
``` 

```llvm
; ModuleID = 'stepper.mojo'
source_filename = "stepper.mojo"
target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-p9:192:256:256:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9"
target triple = "amdgcn-amd-amdhsa"

; Function Attrs: nofree norecurse nosync nounwind memory(argmem: readwrite)
define dso_local amdgpu_kernel void @stepper_test_kernel_DType_I6A6AcB6A6AsA6A6A_68a5362b97a102776ef47f0e8e894a38(ptr noundef readonly captures(none) %0, ptr noundef writeonly captures(none) %1, i32 noundef %2) #0 {
  %.global = addrspacecast ptr %1 to ptr addrspace(1)
 %.global1 = addrspacecast ptr %0 to ptr addrspace(1)
  %4 = tail call <8 x i32> @llvm.stepvector.v8i32()
  %5 = sitofp <8 x i32> %4 to <8 x half>
  %6 = zext i32 %2 to i64
  %.not = icmp eq i32 %2, 0
  br i1 %.not, label %._crit_edge, label %.lr.ph

.lr.ph: ; preds = %3, %.lr.ph
  %7 = phi i64 [ %8, %.lr.ph ], [ 0, %3 ]
  %8 = add nuw nsw i64 %7, 8
  %9 = getelementptr inbounds nuw half, ptr addrspace(1) %.global1, i64 %7
  %10 = load <8 x half>, ptr addrspace(1) %9, align 2
  %11 = fadd contract <8 x half> %10, %5
  %12 = getelementptr inbounds nuw half, ptr addrspace(1) %.global, i64 %7
  store <8 x half> %11, ptr addrspace(1) %12, align 2
  %13 = icmp samesign ult i64 %8, %6
  br i1 %13, label %.lr.ph, label %._crit_edge

._crit_edge: ; preds = %.lr.ph, %3
  ret void
}

; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(none)
declare <8 x i32> @llvm.stepvector.v8i32() #1

attributes #0 = { nofree norecurse nosync nounwind memory(argmem: readwrite) "amdgpu-agpr-alloc"="0" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx942" "target-features" "uniform-work-group-size"="false" }
attributes #1 = { mustprogress nocallback nofree nosync nounwind willreturn memory(none) }

!llvm.module.flags = !{!0}

!0 = !{i32 2, !"Debug Info Version", i32 3}
``` 


```assembly
        .amdgcn_target "amdgcn-amd-amdhsa--gfx942"
        .amdhsa_code_object_version 6
 .text
        .globl stepper_test_kernel_DType_I6A6AcB6A6AsA6A6A_68a5362b97a102776ef47f0e8e894a38
 .p2align        8
        .type stepper_test_kernel_DType_I6A6AcB6A6AsA6A6A_68a5362b97a102776ef47f0e8e894a38,@function
stepper_test_kernel_DType_I6A6AcB6A6AsA6A6A_68a5362b97a102776ef47f0e8e894a38:
stepper_test_kernel_DType_I6A6AcB6A6AsA6A6A_68a5362b97a102776ef47f0e8e894a38$local:
 .type stepper_test_kernel_DType_I6A6AcB6A6AsA6A6A_68a5362b97a102776ef47f0e8e894a38$local,@function
 s_load_dword s6, s[0:1], 0x10
        s_mov_b32 s7, 0
 s_waitcnt lgkmcnt(0)
        s_cmp_eq_u32 s6, 0
        s_cbranch_scc1 .LBB0_3
        s_load_dwordx4 s[0:3], s[0:1], 0x0
        s_mov_b32 s8, 0x47004600
        s_mov_b32 s9, 0x45004400
        s_mov_b64 s[4:5], 0
 v_mov_b32_e32 v2, 0
        v_mov_b64_e32 v[0:1], s[6:7]
.LBB0_2:
 s_waitcnt lgkmcnt(0)
        global_load_dwordx4 v[4:7], v2, s[0:1]
 s_add_u32 s4, s4, 8
        s_addc_u32 s5, s5, 0
        s_add_u32 s0, s0, 16
        s_addc_u32 s1, s1, 0
        v_cmp_lt_u64_e32 vcc, s[4:5], v[0:1]
        s_waitcnt vmcnt(0)
        v_pk_add_f16 v7, v7, s8
 v_pk_add_f16 v6, v6, s9
        v_pk_add_f16 v5, v5, 0
        v_pk_add_f16 v4, v4, 1.0 op_sel:[0,1] op_sel_hi:[1,0]
        global_store_dwordx4 v2, v[4:7], s[2:3]
        s_add_u32 s2, s2, 16
        s_addc_u32 s3, s3, 0
 s_cbranch_vccnz .LBB0_2
.LBB0_3:
        s_endpgm
``` 
</pre>
<img width="1" height="1" alt="" src="http://email.email.llvm.org/o/eJy0WF9v67YO_zTqi-DAlv_EeehDerozFNiAC9zd-2ooFp1olSUfSXba8-kHSnaapGkxYF2RKrFI_kiRFEWLOyf3GuCelA-kfLzjoz8Ye2-5dCDtD3W3M-L1_o8D0B0oc6RKTT1tjQA6WCPGFhyVujXWQusp78V-GAN5RZ80Hbj1sh0Vt4R9ow6A-oN01MGPEXQLK4q4AUw6qgwXUu8pp50y3GfVS41SXIRZUj6kq9WalI-Ua0GdNxan_QGoBTcqT3e8faZm9BHVH6QVlFTp1AzPDRei6bKKVCk9cEcPXATJ1mjnufaIzlYpqstXKeqw0JsJRMQCa41FE6X2cdGCSu2kAFTgZNIZJRIzgOVaOFKlK_pEj9xRvlNAvaEWZmfRo_QHFFrv-K5jrKgyAbu6qGq2rsuNAAbFmvG0YnVWpptyh2Ak3ZJ0-xt42gN91uZIZUefaMs1xmBCM7h-pb2xQAV4LpWbZUiVxg93DvqdeiXpls5_536h0xrXHkdXI9cluQrkMLrNhyBl4ApjSkn-7ezzGD9UG0_nbHmvpQjyYcxWKTVD40CRfIuxJ-xbhoGJk81BxvmMsG8YsPPF0qvFY8ric_5AfzdiVPD0SNEWwtbOwzCAXfXmT0PYmqRbZ0bbQtNJBZr3MDOyS0ZG0q3ndg-eCu654q9m9AsrJAPJt1URhmTIzh8Yybd5HJIhP38oztnKc0p1_rAm-TarUpJvWVmdRqTUSGHzmAwb_LFh7xnlomXKqgCWTKyIpGlRMxX1zLOpZsDpBJZMJ8RkKjOcLTOWTFkacPArmVgaIPAr0QG2KpL_5izZlsmvWaIxfLgWZNpcONRbOaiT37GitDrhvcD_g-ORdw7n91G3XhpNt95bR_It1aazAFQbC-1oHf5yr7ql2oz6KLWgPfTGvhJWc7vvoUcZC1wcrfRAGOa2gE5qoMKZRpmWq7mqNc9gNSg6GSkoKdI5IxoPzs-05vGP1wGap2pbbdsHHN0Wx6aqeZlXbLdZ8yxl63UFXbHuUqih3hQ8rwmrB2-DkQK6YJDR6pW2fPCjBUdYrY1G-yhhZahT5_zB-E8EcJNQmbOTAGEli7Q8pWT9EHY0YeVqr8yOq-B8LoR1A2-h5c4HdYiE1Qx_n6iE1Vn02xlA9jFC-hkCMhRBFqsYbblCW77V9AXNJ_kv6Hjcziv0_gStN3Y11Uhj9RlGGTCc9KYbrgFQgzfL7IGrjuS_LIJVEPwJLz74C_2EzLhnFhdhBUMm2fYDhR8nvlD1AtfOUpktvDit-A5UmGhaK30DYg-X88quhkPM6_l3vqWY4YMF4ebNUOYodM4eLFoH8nCQaCaeZDhZX7BSrJA4UT7QdKbkNJbNgFEvAaN6PFLtjhGLleFEqBe2TWDbgwcFPWiPcZR6h1nlgmRw55yd1wE-z4-QkIuGGTxLAzq2AdfR-QRxE3oEJfeashNSzL8O19Ma7S1v_RVkVDi7ojwJsi9a4Lv1Yb8Ct2zIPkPL2K3V5W_553gP2L9RbIBmhUvoq8tkzPJbKfdRdsZUPJu4lY9vGCE5UZ0FHyokAqwfP6vU_ej8YM3egnNUY6FVoYM7VfDLun2USlnwo9VvJXwucaFmt4q_OfhvFQusflm0kHtv5W704OaSiOtbP_zj02Q5wYYx4fvBJlwp0-IJlj8SxlLC2BmHNklr-kEB-inhwV3vOAR0fFQ--THCCO-p0g3ct4dEio9pg7fviJ3iPnGtjcJa-nccB-DDTdGDcR6Dd5Mo-0HJVuLRfpOuhEvi2XnL5H5UXu6tFAl6PeF2_44l-OEm9NHY570145BIkbx8Tn79nPzzJll66D_GXqi3oRfqghx7n6QdxlN27LuXTcEu6R3w-XgPs6OWnbF9QEyitU7-hBNEx5WLSRK24mWWZ6cs_7qdSN_2PMvC1utDw73qFN8vZSPDhoNl6TlvekbDA5XFopIRxh5hN-7pk-4M_T9YFzfF0tDkM8h16__5q88qdpXN3HHeajOT5OT_K8GD4w2-sjZm9ye0vpmiUTSU25WHF38ugMeBol_aK6KagcVDYf6rz1X61wG-ViP7Roq0mys4viN9JXi-_WpEVoS-PSL_K_6YFVz7hboGu5dGHI0V1MXX5fDymm-zuQVLX7L0LFqu6c3U7HJG3frUQbrmyKVvtadq_9y32hNWp0t7u8i1_dDAj2ZE0eqt-TyRd5br9tC4ts3o6reHh7TJLxjeLH0pTlbms5Xvrf7A6DpSi3WaFlX6AdNmZirTtChuMVXRAnx9LBeN8XpgxmggZ3Ri18ucFvFIvzIaIavwnhka3egENqfF3_BxbOUuHTXNZq5nHdGmC3cFcC5EDE24zYhjfbFwLkQbWcKFSRzTa5bIERrVOGbVByihkYzjpYswUZRvxsVLbbuYfO7v6WoJJw2Lm6bbTvqXr5E-ZPpnd0VXIQ7d-VuM2eKQ80ijw9iyRW5GKaYC-zRKoQmP47zVl406ta3-SZckXdI1n9P1BAVaDPv-4sy7E_e52OQbfgf32bqo6qwuWXl3uF-XNYe67HjXFQWrxY6zuiyzHHZVuYa8u5P3LGVlWqabbMNYWq5qsS6zkjEouqKqoSNFCj2XahUOc2P3d9K5Ee6zfJNn67vw6uDC1TFjGo40UPHMLB_v7D0KJbtx77ARl867NxgvvQp3ztvfH3_9z_8wWm_XyPNNKu2MCne_nbF0vsc76-XDcc8e6MDbZxDUDO5utOr-4P2ALxiEfSfs-176w7hbtaYn7Hu4CIxfyWANHt-EfQ82O8K-z4ua7tlfAQAA__8LTb5z">