[llvm] [AMDGPU] Switch V_CNDMASK operands to shrink it into VOP2 (PR #135162)
Mirko BrkuĊĦanin via llvm-commits
llvm-commits at lists.llvm.org
Fri Apr 11 10:04:00 PDT 2025
================
@@ -0,0 +1,121 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 < %s | FileCheck %s -check-prefix=GCN
+; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck %s -check-prefix=GCN
+
+define amdgpu_cs void @test_u32(i32 %a, i32 %x, i32 %y, i32 %p, i32 %q, i32 %r, i32 %s, ptr addrspace(1) %out) {
+; GCN-LABEL: test_u32:
+; GCN: ; %bb.0: ; %.entry
+; GCN-NEXT: v_cmp_ne_u32_e32 vcc_lo, -1, v0
+; GCN-NEXT: v_dual_cndmask_b32 v0, v1, v2 :: v_dual_cndmask_b32 v1, 0, v3
+; GCN-NEXT: v_dual_cndmask_b32 v2, 0, v4 :: v_dual_cndmask_b32 v3, v5, v6
+; GCN-NEXT: global_store_b128 v[7:8], v[0:3], off
+; GCN-NEXT: s_endpgm
+.entry:
+ %vcc = icmp eq i32 %a, -1
+ %val1 = select i1 %vcc, i32 %x, i32 %y
+ %val2 = select i1 %vcc, i32 0, i32 %p
+ %val3 = select i1 %vcc, i32 0, i32 %q
+ %val4 = select i1 %vcc, i32 %r, i32 %s
+ %ret0 = insertelement <4 x i32> poison, i32 %val1, i32 0
+ %ret1 = insertelement <4 x i32> %ret0, i32 %val2, i32 1
+ %ret2 = insertelement <4 x i32> %ret1, i32 %val3, i32 2
+ %ret3 = insertelement <4 x i32> %ret2, i32 %val4, i32 3
+ store <4 x i32> %ret3, ptr addrspace(1) %out
+ ret void
+}
+
+define amdgpu_cs void @test_u32_negative_case(i32 %a, i32 %x, i32 %y, i32 %p, i32 %q, i32 %r, i32 %s, ptr addrspace(1) %out) {
+; GCN-LABEL: test_u32_negative_case:
+; GCN: ; %bb.0: ; %.entry
+; GCN-NEXT: v_cmp_ne_u32_e32 vcc_lo, -1, v0
+; GCN-NEXT: v_dual_cndmask_b32 v0, v1, v2 :: v_dual_cndmask_b32 v1, 0, v3
+; GCN-NEXT: v_dual_cndmask_b32 v2, 0, v4 :: v_dual_cndmask_b32 v3, v5, v6
+; GCN-NEXT: global_store_b128 v[7:8], v[0:3], off
+; GCN-NEXT: s_endpgm
+.entry:
+ %vcc = icmp eq i32 %a, -1
+ %val1 = select i1 %vcc, i32 %x, i32 %y
+ %val2 = select i1 %vcc, i32 0, i32 %p
+ %val3 = select i1 %vcc, i32 0, i32 %q
+ %val4 = select i1 %vcc, i32 %r, i32 %s
+ %ret0 = insertelement <4 x i32> poison, i32 %val1, i32 0
+ %ret1 = insertelement <4 x i32> %ret0, i32 %val2, i32 1
+ %ret2 = insertelement <4 x i32> %ret1, i32 %val3, i32 2
+ %ret3 = insertelement <4 x i32> %ret2, i32 %val4, i32 3
+ store <4 x i32> %ret3, ptr addrspace(1) %out
+ ret void
+}
----------------
mbrkusanin wrote:
Am I missing something? I can't see the difference between this and test above.
https://github.com/llvm/llvm-project/pull/135162
More information about the llvm-commits
mailing list