[llvm] [AMDGPU] Switch V_CNDMASK operands to shrink it into VOP2 (PR #135162)

Mirko BrkuĊĦanin via llvm-commits llvm-commits at lists.llvm.org
Fri Apr 11 10:04:00 PDT 2025


================
@@ -0,0 +1,121 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 < %s | FileCheck %s -check-prefix=GCN
+; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck %s -check-prefix=GCN
+
+define amdgpu_cs void @test_u32(i32 %a, i32 %x, i32 %y, i32 %p, i32 %q, i32 %r, i32 %s, ptr addrspace(1) %out) {
+; GCN-LABEL: test_u32:
+; GCN:       ; %bb.0: ; %.entry
+; GCN-NEXT:    v_cmp_ne_u32_e32 vcc_lo, -1, v0
+; GCN-NEXT:    v_dual_cndmask_b32 v0, v1, v2 :: v_dual_cndmask_b32 v1, 0, v3
+; GCN-NEXT:    v_dual_cndmask_b32 v2, 0, v4 :: v_dual_cndmask_b32 v3, v5, v6
+; GCN-NEXT:    global_store_b128 v[7:8], v[0:3], off
+; GCN-NEXT:    s_endpgm
+.entry:
+  %vcc = icmp eq i32 %a, -1
+  %val1 = select i1 %vcc, i32 %x, i32 %y
+  %val2 = select i1 %vcc, i32 0, i32 %p
+  %val3 = select i1 %vcc, i32 0, i32 %q
+  %val4 = select i1 %vcc, i32 %r, i32 %s
+  %ret0 = insertelement <4 x i32> poison, i32 %val1, i32 0
+  %ret1 = insertelement <4 x i32> %ret0, i32 %val2, i32 1
+  %ret2 = insertelement <4 x i32> %ret1, i32 %val3, i32 2
+  %ret3 = insertelement <4 x i32> %ret2, i32 %val4, i32 3
+  store <4 x i32> %ret3, ptr addrspace(1) %out
+  ret void
+}
+
+define amdgpu_cs void @test_u32_negative_case(i32 %a, i32 %x, i32 %y, i32 %p, i32 %q, i32 %r, i32 %s, ptr addrspace(1) %out) {
+; GCN-LABEL: test_u32_negative_case:
+; GCN:       ; %bb.0: ; %.entry
+; GCN-NEXT:    v_cmp_ne_u32_e32 vcc_lo, -1, v0
+; GCN-NEXT:    v_dual_cndmask_b32 v0, v1, v2 :: v_dual_cndmask_b32 v1, 0, v3
+; GCN-NEXT:    v_dual_cndmask_b32 v2, 0, v4 :: v_dual_cndmask_b32 v3, v5, v6
+; GCN-NEXT:    global_store_b128 v[7:8], v[0:3], off
+; GCN-NEXT:    s_endpgm
+.entry:
+  %vcc = icmp eq i32 %a, -1
+  %val1 = select i1 %vcc, i32 %x, i32 %y
+  %val2 = select i1 %vcc, i32 0, i32 %p
+  %val3 = select i1 %vcc, i32 0, i32 %q
+  %val4 = select i1 %vcc, i32 %r, i32 %s
+  %ret0 = insertelement <4 x i32> poison, i32 %val1, i32 0
+  %ret1 = insertelement <4 x i32> %ret0, i32 %val2, i32 1
+  %ret2 = insertelement <4 x i32> %ret1, i32 %val3, i32 2
+  %ret3 = insertelement <4 x i32> %ret2, i32 %val4, i32 3
+  store <4 x i32> %ret3, ptr addrspace(1) %out
+  ret void
+}
----------------
mbrkusanin wrote:

Am I missing something? I can't see the difference between this and test above.

https://github.com/llvm/llvm-project/pull/135162


More information about the llvm-commits mailing list