[llvm] [AMDGPU]Try to simplify select v32i4 case by legalizing v16i4 (PR #173328)
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Tue Jan 20 08:26:16 PST 2026
================
@@ -65,6 +65,21 @@ define amdgpu_kernel void @v_select_v16i8(ptr addrspace(1) %out, ptr addrspace(1
ret void
}
+; GCN-LABEL: {{^}}v_select_v32i4:
+; GCN: v_cndmask_b32_e32
+; GCN: v_cndmask_b32_e32
+; GCN: v_cndmask_b32_e32
+; GCN: v_cndmask_b32_e32
+; GCN-NOT: cndmask
+define amdgpu_kernel void @v_select_v32i4(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(4) %b.ptr, i32 %c) #0 {
+ %a = load <32 x i4>, ptr addrspace(1) %a.ptr, align 2
+ %b = load <32 x i4>, ptr addrspace(4) %b.ptr, align 2
+ %cmp = icmp eq i32 %c, 0
+ %select = select i1 %cmp, <32 x i4> %a, <32 x i4> %b
+ store <32 x i4> %select, ptr addrspace(1) %out, align 2
+ ret void
+}
----------------
arsenm wrote:
Should test more combinations, and FP cases
https://github.com/llvm/llvm-project/pull/173328
More information about the llvm-commits
mailing list