[llvm] [AMDGPU] Swap select operands to allow later v_cndmask shrinking into vop2 (PR #142140)
Ana Mihajlovic via llvm-commits
llvm-commits at lists.llvm.org
Tue Jun 10 02:43:35 PDT 2025
https://github.com/mihajlovicana updated https://github.com/llvm/llvm-project/pull/142140
>From 25b2dd8526b52f407bbc51431289768564c11363 Mon Sep 17 00:00:00 2001
From: Ana Mihajlovic <Ana.Mihajlovic at amd.com>
Date: Fri, 30 May 2025 11:43:34 +0200
Subject: [PATCH] test precommit
---
llvm/test/CodeGen/AMDGPU/shrink-cndmask.ll | 764 +++++++++++++++++++++
1 file changed, 764 insertions(+)
create mode 100644 llvm/test/CodeGen/AMDGPU/shrink-cndmask.ll
diff --git a/llvm/test/CodeGen/AMDGPU/shrink-cndmask.ll b/llvm/test/CodeGen/AMDGPU/shrink-cndmask.ll
new file mode 100644
index 0000000000000..12ccdfff07c6f
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/shrink-cndmask.ll
@@ -0,0 +1,764 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 < %s | FileCheck %s -check-prefix=GCN
+; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck %s -check-prefix=GCN
+
+;tests for integer 32
+define amdgpu_cs void @test_i32_sge(i32 %a, i32 %p, i32 %q, ptr addrspace(1) %out) {
+; GCN-LABEL: test_i32_sge:
+; GCN: ; %bb.0: ; %.entry
+; GCN-NEXT: v_cmp_lt_i32_e32 vcc_lo, 1, v0
+; GCN-NEXT: v_dual_cndmask_b32 v0, 0, v1 :: v_dual_cndmask_b32 v1, 0, v2
+; GCN-NEXT: global_store_b64 v[3:4], v[0:1], off
+; GCN-NEXT: s_endpgm
+.entry:
+ %vcc = icmp sge i32 %a, 2
+ %val1 = select i1 %vcc, i32 %p, i32 0
+ %val2 = select i1 %vcc, i32 %q, i32 0
+ %ret0 = insertelement <2 x i32> poison, i32 %val1, i32 0
+ %ret1 = insertelement <2 x i32> %ret0, i32 %val2, i32 1
+ store <2 x i32> %ret1, ptr addrspace(1) %out
+ ret void
+}
+
+define amdgpu_cs void @test_i32_sle(i32 %a, i32 %p, i32 %q, ptr addrspace(1) %out) {
+; GCN-LABEL: test_i32_sle:
+; GCN: ; %bb.0: ; %.entry
+; GCN-NEXT: v_cmp_gt_i32_e32 vcc_lo, 3, v0
+; GCN-NEXT: v_dual_cndmask_b32 v0, 0, v1 :: v_dual_cndmask_b32 v1, 0, v2
+; GCN-NEXT: global_store_b64 v[3:4], v[0:1], off
+; GCN-NEXT: s_endpgm
+.entry:
+ %vcc = icmp sle i32 %a, 2
+ %val1 = select i1 %vcc, i32 %p, i32 0
+ %val2 = select i1 %vcc, i32 %q, i32 0
+ %ret0 = insertelement <2 x i32> poison, i32 %val1, i32 0
+ %ret1 = insertelement <2 x i32> %ret0, i32 %val2, i32 1
+ store <2 x i32> %ret1, ptr addrspace(1) %out
+ ret void
+}
+
+define amdgpu_cs void @test_i32_sgt(i32 %a, i32 %p, i32 %q, ptr addrspace(1) %out) {
+; GCN-LABEL: test_i32_sgt:
+; GCN: ; %bb.0: ; %.entry
+; GCN-NEXT: v_cmp_gt_i32_e32 vcc_lo, 2, v0
+; GCN-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc_lo
+; GCN-NEXT: v_cndmask_b32_e64 v1, v2, 0, vcc_lo
+; GCN-NEXT: global_store_b64 v[3:4], v[0:1], off
+; GCN-NEXT: s_endpgm
+.entry:
+ %vcc = icmp sgt i32 2, %a
+ %val1 = select i1 %vcc, i32 0, i32 %p
+ %val2 = select i1 %vcc, i32 0, i32 %q
+ %ret0 = insertelement <2 x i32> poison, i32 %val1, i32 0
+ %ret1 = insertelement <2 x i32> %ret0, i32 %val2, i32 1
+ store <2 x i32> %ret1, ptr addrspace(1) %out
+ ret void
+}
+
+define amdgpu_cs void @test_i32_slt(i32 %a, i32 %p, i32 %q, ptr addrspace(1) %out) {
+; GCN-LABEL: test_i32_slt:
+; GCN: ; %bb.0: ; %.entry
+; GCN-NEXT: v_cmp_lt_i32_e32 vcc_lo, 2, v0
+; GCN-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc_lo
+; GCN-NEXT: v_cndmask_b32_e64 v1, v2, 0, vcc_lo
+; GCN-NEXT: global_store_b64 v[3:4], v[0:1], off
+; GCN-NEXT: s_endpgm
+.entry:
+ %vcc = icmp slt i32 2, %a
+ %val1 = select i1 %vcc, i32 0, i32 %p
+ %val2 = select i1 %vcc, i32 0, i32 %q
+ %ret0 = insertelement <2 x i32> poison, i32 %val1, i32 0
+ %ret1 = insertelement <2 x i32> %ret0, i32 %val2, i32 1
+ store <2 x i32> %ret1, ptr addrspace(1) %out
+ ret void
+}
+
+;tests for integer 64
+define amdgpu_cs void @test_i64_sge(i64 %a, i64 %p, i64 %q, ptr addrspace(1) %out) {
+; GCN-LABEL: test_i64_sge:
+; GCN: ; %bb.0: ; %.entry
+; GCN-NEXT: v_cmp_lt_i64_e32 vcc_lo, 1, v[0:1]
+; GCN-NEXT: v_dual_cndmask_b32 v1, 0, v3 :: v_dual_cndmask_b32 v0, 0, v2
+; GCN-NEXT: v_dual_cndmask_b32 v3, 0, v5 :: v_dual_cndmask_b32 v2, 0, v4
+; GCN-NEXT: global_store_b128 v[6:7], v[0:3], off
+; GCN-NEXT: s_endpgm
+.entry:
+ %vcc = icmp sge i64 %a, 2
+ %val1 = select i1 %vcc, i64 %p, i64 0
+ %val2 = select i1 %vcc, i64 %q, i64 0
+ %ret0 = insertelement <2 x i64> poison, i64 %val1, i64 0
+ %ret1 = insertelement <2 x i64> %ret0, i64 %val2, i64 1
+ store <2 x i64> %ret1, ptr addrspace(1) %out
+ ret void
+}
+
+define amdgpu_cs void @test_i64_sle(i64 %a, i64 %p, i64 %q, ptr addrspace(1) %out) {
+; GCN-LABEL: test_i64_sle:
+; GCN: ; %bb.0: ; %.entry
+; GCN-NEXT: v_cmp_gt_i64_e32 vcc_lo, 3, v[0:1]
+; GCN-NEXT: v_dual_cndmask_b32 v1, 0, v3 :: v_dual_cndmask_b32 v0, 0, v2
+; GCN-NEXT: v_dual_cndmask_b32 v3, 0, v5 :: v_dual_cndmask_b32 v2, 0, v4
+; GCN-NEXT: global_store_b128 v[6:7], v[0:3], off
+; GCN-NEXT: s_endpgm
+.entry:
+ %vcc = icmp sle i64 %a, 2
+ %val1 = select i1 %vcc, i64 %p, i64 0
+ %val2 = select i1 %vcc, i64 %q, i64 0
+ %ret0 = insertelement <2 x i64> poison, i64 %val1, i64 0
+ %ret1 = insertelement <2 x i64> %ret0, i64 %val2, i64 1
+ store <2 x i64> %ret1, ptr addrspace(1) %out
+ ret void
+}
+
+define amdgpu_cs void @test_i64_sgt(i64 %a, i64 %p, i64 %q, ptr addrspace(1) %out) {
+; GCN-LABEL: test_i64_sgt:
+; GCN: ; %bb.0: ; %.entry
+; GCN-NEXT: v_cmp_gt_i64_e32 vcc_lo, 2, v[0:1]
+; GCN-NEXT: v_cndmask_b32_e64 v1, v3, 0, vcc_lo
+; GCN-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc_lo
+; GCN-NEXT: v_cndmask_b32_e64 v3, v5, 0, vcc_lo
+; GCN-NEXT: v_cndmask_b32_e64 v2, v4, 0, vcc_lo
+; GCN-NEXT: global_store_b128 v[6:7], v[0:3], off
+; GCN-NEXT: s_endpgm
+.entry:
+ %vcc = icmp sgt i64 2, %a
+ %val1 = select i1 %vcc, i64 0, i64 %p
+ %val2 = select i1 %vcc, i64 0, i64 %q
+ %ret0 = insertelement <2 x i64> poison, i64 %val1, i64 0
+ %ret1 = insertelement <2 x i64> %ret0, i64 %val2, i64 1
+ store <2 x i64> %ret1, ptr addrspace(1) %out
+ ret void
+}
+
+define amdgpu_cs void @test_i64_slt(i64 %a, i64 %p, i64 %q, ptr addrspace(1) %out) {
+; GCN-LABEL: test_i64_slt:
+; GCN: ; %bb.0: ; %.entry
+; GCN-NEXT: v_cmp_lt_i64_e32 vcc_lo, 2, v[0:1]
+; GCN-NEXT: v_cndmask_b32_e64 v1, v3, 0, vcc_lo
+; GCN-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc_lo
+; GCN-NEXT: v_cndmask_b32_e64 v3, v5, 0, vcc_lo
+; GCN-NEXT: v_cndmask_b32_e64 v2, v4, 0, vcc_lo
+; GCN-NEXT: global_store_b128 v[6:7], v[0:3], off
+; GCN-NEXT: s_endpgm
+.entry:
+ %vcc = icmp slt i64 2, %a
+ %val1 = select i1 %vcc, i64 0, i64 %p
+ %val2 = select i1 %vcc, i64 0, i64 %q
+ %ret0 = insertelement <2 x i64> poison, i64 %val1, i64 0
+ %ret1 = insertelement <2 x i64> %ret0, i64 %val2, i64 1
+ store <2 x i64> %ret1, ptr addrspace(1) %out
+ ret void
+}
+
+;tests for unsigned 32
+define amdgpu_cs void @test_u32_eq(i32 %a, i32 %p, i32 %q, ptr addrspace(1) %out) {
+; GCN-LABEL: test_u32_eq:
+; GCN: ; %bb.0: ; %.entry
+; GCN-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
+; GCN-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc_lo
+; GCN-NEXT: v_cndmask_b32_e64 v1, v2, 0, vcc_lo
+; GCN-NEXT: global_store_b64 v[3:4], v[0:1], off
+; GCN-NEXT: s_endpgm
+.entry:
+ %vcc = icmp eq i32 1, %a
+ %val1 = select i1 %vcc, i32 0, i32 %p
+ %val2 = select i1 %vcc, i32 0, i32 %q
+ %ret0 = insertelement <2 x i32> poison, i32 %val1, i32 0
+ %ret1 = insertelement <2 x i32> %ret0, i32 %val2, i32 1
+ store <2 x i32> %ret1, ptr addrspace(1) %out
+ ret void
+}
+
+define amdgpu_cs void @test_negative_case(i32 %a, i32 %p, i32 %q, ptr addrspace(1) %out) {
+; GCN-LABEL: test_negative_case:
+; GCN: ; %bb.0: ; %.entry
+; GCN-NEXT: v_cmp_eq_u32_e32 vcc_lo, -1, v0
+; GCN-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc_lo
+; GCN-NEXT: v_cndmask_b32_e64 v1, v2, 0, vcc_lo
+; GCN-NEXT: global_store_b64 v[3:4], v[0:1], off
+; GCN-NEXT: s_endpgm
+.entry:
+ %vcc = icmp eq i32 %a, -1
+ %val1 = select i1 %vcc, i32 %p, i32 0
+ %val2 = select i1 %vcc, i32 0, i32 %q
+ %ret0 = insertelement <2 x i32> poison, i32 %val1, i32 0
+ %ret1 = insertelement <2 x i32> %ret0, i32 %val2, i32 1
+ store <2 x i32> %ret1, ptr addrspace(1) %out
+ ret void
+}
+
+define amdgpu_cs void @test_mixed(i32 %a, i32 %p, i32 %q, i32 %r, i32 %s, ptr addrspace(1) %out) {
+; GCN-LABEL: test_mixed:
+; GCN: ; %bb.0: ; %.entry
+; GCN-NEXT: v_cmp_eq_u32_e32 vcc_lo, -1, v0
+; GCN-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc_lo
+; GCN-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc_lo
+; GCN-NEXT: v_cndmask_b32_e64 v2, v3, 0, vcc_lo
+; GCN-NEXT: v_cndmask_b32_e64 v3, v4, 0, vcc_lo
+; GCN-NEXT: global_store_b128 v[5:6], v[0:3], off
+; GCN-NEXT: s_endpgm
+.entry:
+ %vcc = icmp eq i32 -1, %a
+ %val1 = select i1 %vcc, i32 0, i32 %p
+ %val2 = select i1 %vcc, i32 %q, i32 0
+ %val3 = select i1 %vcc, i32 0, i32 %r
+ %val4 = select i1 %vcc, i32 0, i32 %s
+ %ret0 = insertelement <4 x i32> poison, i32 %val1, i32 0
+ %ret1 = insertelement <4 x i32> %ret0, i32 %val2, i32 1
+ %ret2 = insertelement <4 x i32> %ret1, i32 %val3, i32 2
+ %ret3 = insertelement <4 x i32> %ret2, i32 %val4, i32 3
+ store <4 x i32> %ret3, ptr addrspace(1) %out
+ ret void
+}
+
+define amdgpu_cs void @test_sgpr(i32 %a, i32 %p, i32 inreg %q, i32 inreg %r, ptr addrspace(1) %out) {
+; GCN-LABEL: test_sgpr:
+; GCN: ; %bb.0: ; %.entry
+; GCN-NEXT: v_cmp_eq_u32_e32 vcc_lo, -1, v0
+; GCN-NEXT: v_cndmask_b32_e32 v4, 0, v1, vcc_lo
+; GCN-NEXT: v_cndmask_b32_e64 v5, s0, 0, vcc_lo
+; GCN-NEXT: v_cndmask_b32_e64 v6, s1, 0, vcc_lo
+; GCN-NEXT: global_store_b96 v[2:3], v[4:6], off
+; GCN-NEXT: s_endpgm
+.entry:
+ %vcc = icmp eq i32 %a, -1
+ %val1 = select i1 %vcc, i32 %p, i32 0
+ %val2 = select i1 %vcc, i32 0, i32 %q
+ %val3 = select i1 %vcc, i32 0, i32 %r
+ %ret0 = insertelement <3 x i32> poison, i32 %val1, i32 0
+ %ret1 = insertelement <3 x i32> %ret0, i32 %val2, i32 1
+ %ret2 = insertelement <3 x i32> %ret1, i32 %val3, i32 2
+ store <3 x i32> %ret2, ptr addrspace(1) %out
+ ret void
+}
+
+define amdgpu_cs void @test_u32_ne(i32 %a, i32 %p, i32 %q, ptr addrspace(1) %out) {
+; GCN-LABEL: test_u32_ne:
+; GCN: ; %bb.0: ; %.entry
+; GCN-NEXT: v_cmp_ne_u32_e32 vcc_lo, 1, v0
+; GCN-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc_lo
+; GCN-NEXT: v_cndmask_b32_e64 v1, v2, 0, vcc_lo
+; GCN-NEXT: global_store_b64 v[3:4], v[0:1], off
+; GCN-NEXT: s_endpgm
+.entry:
+ %vcc = icmp ne i32 1, %a
+ %val1 = select i1 %vcc, i32 0, i32 %p
+ %val2 = select i1 %vcc, i32 0, i32 %q
+ %ret0 = insertelement <2 x i32> poison, i32 %val1, i32 0
+ %ret1 = insertelement <2 x i32> %ret0, i32 %val2, i32 1
+ store <2 x i32> %ret1, ptr addrspace(1) %out
+ ret void
+}
+
+define amdgpu_cs void @test_u32_uge(i32 %a, i32 %p, i32 %q, ptr addrspace(1) %out) {
+; GCN-LABEL: test_u32_uge:
+; GCN: ; %bb.0: ; %.entry
+; GCN-NEXT: v_cmp_lt_u32_e32 vcc_lo, 1, v0
+; GCN-NEXT: v_dual_cndmask_b32 v0, 0, v1 :: v_dual_cndmask_b32 v1, 0, v2
+; GCN-NEXT: global_store_b64 v[3:4], v[0:1], off
+; GCN-NEXT: s_endpgm
+.entry:
+ %vcc = icmp uge i32 %a, 2
+ %val1 = select i1 %vcc, i32 %p, i32 0
+ %val2 = select i1 %vcc, i32 %q, i32 0
+ %ret0 = insertelement <2 x i32> poison, i32 %val1, i32 0
+ %ret1 = insertelement <2 x i32> %ret0, i32 %val2, i32 1
+ store <2 x i32> %ret1, ptr addrspace(1) %out
+ ret void
+}
+
+define amdgpu_cs void @test_u32_ule(i32 %a, i32 %p, i32 %q, ptr addrspace(1) %out) {
+; GCN-LABEL: test_u32_ule:
+; GCN: ; %bb.0: ; %.entry
+; GCN-NEXT: v_cmp_gt_u32_e32 vcc_lo, 3, v0
+; GCN-NEXT: v_dual_cndmask_b32 v0, 0, v1 :: v_dual_cndmask_b32 v1, 0, v2
+; GCN-NEXT: global_store_b64 v[3:4], v[0:1], off
+; GCN-NEXT: s_endpgm
+.entry:
+ %vcc = icmp ule i32 %a, 2
+ %val1 = select i1 %vcc, i32 %p, i32 0
+ %val2 = select i1 %vcc, i32 %q, i32 0
+ %ret0 = insertelement <2 x i32> poison, i32 %val1, i32 0
+ %ret1 = insertelement <2 x i32> %ret0, i32 %val2, i32 1
+ store <2 x i32> %ret1, ptr addrspace(1) %out
+ ret void
+}
+
+define amdgpu_cs void @test_u32_ugt(i32 %a, i32 %p, i32 %q, ptr addrspace(1) %out) {
+; GCN-LABEL: test_u32_ugt:
+; GCN: ; %bb.0: ; %.entry
+; GCN-NEXT: v_cmp_gt_u32_e32 vcc_lo, 2, v0
+; GCN-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc_lo
+; GCN-NEXT: v_cndmask_b32_e64 v1, v2, 0, vcc_lo
+; GCN-NEXT: global_store_b64 v[3:4], v[0:1], off
+; GCN-NEXT: s_endpgm
+.entry:
+ %vcc = icmp ugt i32 2, %a
+ %val1 = select i1 %vcc, i32 0, i32 %p
+ %val2 = select i1 %vcc, i32 0, i32 %q
+ %ret0 = insertelement <2 x i32> poison, i32 %val1, i32 0
+ %ret1 = insertelement <2 x i32> %ret0, i32 %val2, i32 1
+ store <2 x i32> %ret1, ptr addrspace(1) %out
+ ret void
+}
+
+define amdgpu_cs void @test_u32_ult(i32 %a, i32 %p, i32 %q, ptr addrspace(1) %out) {
+; GCN-LABEL: test_u32_ult:
+; GCN: ; %bb.0: ; %.entry
+; GCN-NEXT: v_cmp_lt_u32_e32 vcc_lo, 2, v0
+; GCN-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc_lo
+; GCN-NEXT: v_cndmask_b32_e64 v1, v2, 0, vcc_lo
+; GCN-NEXT: global_store_b64 v[3:4], v[0:1], off
+; GCN-NEXT: s_endpgm
+.entry:
+ %vcc = icmp ult i32 2, %a
+ %val1 = select i1 %vcc, i32 0, i32 %p
+ %val2 = select i1 %vcc, i32 0, i32 %q
+ %ret0 = insertelement <2 x i32> poison, i32 %val1, i32 0
+ %ret1 = insertelement <2 x i32> %ret0, i32 %val2, i32 1
+ store <2 x i32> %ret1, ptr addrspace(1) %out
+ ret void
+}
+
+;tests for unsigned 64
+define amdgpu_cs void @test_u64_eq(i64 %a, i64 %p, i64 %q, ptr addrspace(1) %out) {
+; GCN-LABEL: test_u64_eq:
+; GCN: ; %bb.0: ; %.entry
+; GCN-NEXT: v_cmp_eq_u64_e32 vcc_lo, 1, v[0:1]
+; GCN-NEXT: v_cndmask_b32_e64 v1, v3, 0, vcc_lo
+; GCN-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc_lo
+; GCN-NEXT: v_cndmask_b32_e64 v3, v5, 0, vcc_lo
+; GCN-NEXT: v_cndmask_b32_e64 v2, v4, 0, vcc_lo
+; GCN-NEXT: global_store_b128 v[6:7], v[0:3], off
+; GCN-NEXT: s_endpgm
+.entry:
+ %vcc = icmp eq i64 1, %a
+ %val1 = select i1 %vcc, i64 0, i64 %p
+ %val2 = select i1 %vcc, i64 0, i64 %q
+ %ret0 = insertelement <2 x i64> poison, i64 %val1, i64 0
+ %ret1 = insertelement <2 x i64> %ret0, i64 %val2, i64 1
+ store <2 x i64> %ret1, ptr addrspace(1) %out
+ ret void
+}
+
+define amdgpu_cs void @test_u64_ne(i64 %a, i64 %p, i64 %q, ptr addrspace(1) %out) {
+; GCN-LABEL: test_u64_ne:
+; GCN: ; %bb.0: ; %.entry
+; GCN-NEXT: v_cmp_ne_u64_e32 vcc_lo, 1, v[0:1]
+; GCN-NEXT: v_cndmask_b32_e64 v1, v3, 0, vcc_lo
+; GCN-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc_lo
+; GCN-NEXT: v_cndmask_b32_e64 v3, v5, 0, vcc_lo
+; GCN-NEXT: v_cndmask_b32_e64 v2, v4, 0, vcc_lo
+; GCN-NEXT: global_store_b128 v[6:7], v[0:3], off
+; GCN-NEXT: s_endpgm
+.entry:
+ %vcc = icmp ne i64 1, %a
+ %val1 = select i1 %vcc, i64 0, i64 %p
+ %val2 = select i1 %vcc, i64 0, i64 %q
+ %ret0 = insertelement <2 x i64> poison, i64 %val1, i64 0
+ %ret1 = insertelement <2 x i64> %ret0, i64 %val2, i64 1
+ store <2 x i64> %ret1, ptr addrspace(1) %out
+ ret void
+}
+
+define amdgpu_cs void @test_u64_uge(i64 %a, i64 %p, i64 %q, ptr addrspace(1) %out) {
+; GCN-LABEL: test_u64_uge:
+; GCN: ; %bb.0: ; %.entry
+; GCN-NEXT: v_cmp_lt_u64_e32 vcc_lo, 1, v[0:1]
+; GCN-NEXT: v_dual_cndmask_b32 v1, 0, v3 :: v_dual_cndmask_b32 v0, 0, v2
+; GCN-NEXT: v_dual_cndmask_b32 v3, 0, v5 :: v_dual_cndmask_b32 v2, 0, v4
+; GCN-NEXT: global_store_b128 v[6:7], v[0:3], off
+; GCN-NEXT: s_endpgm
+.entry:
+ %vcc = icmp uge i64 %a, 2
+ %val1 = select i1 %vcc, i64 %p, i64 0
+ %val2 = select i1 %vcc, i64 %q, i64 0
+ %ret0 = insertelement <2 x i64> poison, i64 %val1, i64 0
+ %ret1 = insertelement <2 x i64> %ret0, i64 %val2, i64 1
+ store <2 x i64> %ret1, ptr addrspace(1) %out
+ ret void
+}
+
+define amdgpu_cs void @test_u64_ule(i64 %a, i64 %p, i64 %q, ptr addrspace(1) %out) {
+; GCN-LABEL: test_u64_ule:
+; GCN: ; %bb.0: ; %.entry
+; GCN-NEXT: v_cmp_gt_u64_e32 vcc_lo, 3, v[0:1]
+; GCN-NEXT: v_dual_cndmask_b32 v1, 0, v3 :: v_dual_cndmask_b32 v0, 0, v2
+; GCN-NEXT: v_dual_cndmask_b32 v3, 0, v5 :: v_dual_cndmask_b32 v2, 0, v4
+; GCN-NEXT: global_store_b128 v[6:7], v[0:3], off
+; GCN-NEXT: s_endpgm
+.entry:
+ %vcc = icmp ule i64 %a, 2
+ %val1 = select i1 %vcc, i64 %p, i64 0
+ %val2 = select i1 %vcc, i64 %q, i64 0
+ %ret0 = insertelement <2 x i64> poison, i64 %val1, i64 0
+ %ret1 = insertelement <2 x i64> %ret0, i64 %val2, i64 1
+ store <2 x i64> %ret1, ptr addrspace(1) %out
+ ret void
+}
+
+define amdgpu_cs void @test_u64_ugt(i64 %a, i64 %p, i64 %q, ptr addrspace(1) %out) {
+; GCN-LABEL: test_u64_ugt:
+; GCN: ; %bb.0: ; %.entry
+; GCN-NEXT: v_cmp_gt_u64_e32 vcc_lo, 2, v[0:1]
+; GCN-NEXT: v_cndmask_b32_e64 v1, v3, 0, vcc_lo
+; GCN-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc_lo
+; GCN-NEXT: v_cndmask_b32_e64 v3, v5, 0, vcc_lo
+; GCN-NEXT: v_cndmask_b32_e64 v2, v4, 0, vcc_lo
+; GCN-NEXT: global_store_b128 v[6:7], v[0:3], off
+; GCN-NEXT: s_endpgm
+.entry:
+ %vcc = icmp ugt i64 2, %a
+ %val1 = select i1 %vcc, i64 0, i64 %p
+ %val2 = select i1 %vcc, i64 0, i64 %q
+ %ret0 = insertelement <2 x i64> poison, i64 %val1, i64 0
+ %ret1 = insertelement <2 x i64> %ret0, i64 %val2, i64 1
+ store <2 x i64> %ret1, ptr addrspace(1) %out
+ ret void
+}
+
+define amdgpu_cs void @test_u64_ult(i64 %a, i64 %p, i64 %q, ptr addrspace(1) %out) {
+; GCN-LABEL: test_u64_ult:
+; GCN: ; %bb.0: ; %.entry
+; GCN-NEXT: v_cmp_lt_u64_e32 vcc_lo, 2, v[0:1]
+; GCN-NEXT: v_cndmask_b32_e64 v1, v3, 0, vcc_lo
+; GCN-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc_lo
+; GCN-NEXT: v_cndmask_b32_e64 v3, v5, 0, vcc_lo
+; GCN-NEXT: v_cndmask_b32_e64 v2, v4, 0, vcc_lo
+; GCN-NEXT: global_store_b128 v[6:7], v[0:3], off
+; GCN-NEXT: s_endpgm
+.entry:
+ %vcc = icmp ult i64 2, %a
+ %val1 = select i1 %vcc, i64 0, i64 %p
+ %val2 = select i1 %vcc, i64 0, i64 %q
+ %ret0 = insertelement <2 x i64> poison, i64 %val1, i64 0
+ %ret1 = insertelement <2 x i64> %ret0, i64 %val2, i64 1
+ store <2 x i64> %ret1, ptr addrspace(1) %out
+ ret void
+}
+
+;tests for float 32
+define amdgpu_cs void @test_f32_oeq(float %a, float %p, float %q, ptr addrspace(1) %out) {
+; GCN-LABEL: test_f32_oeq:
+; GCN: ; %bb.0: ; %.entry
+; GCN-NEXT: v_cmp_eq_f32_e32 vcc_lo, 2.0, v0
+; GCN-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc_lo
+; GCN-NEXT: v_cndmask_b32_e64 v1, v2, 0, vcc_lo
+; GCN-NEXT: global_store_b64 v[3:4], v[0:1], off
+; GCN-NEXT: s_endpgm
+.entry:
+ %vcc = fcmp oeq float %a, 2.0
+ %val1 = select i1 %vcc, float 0.0, float %p
+ %val2 = select i1 %vcc, float 0.0, float %q
+ %ret0 = insertelement <2 x float> poison, float %val1, i32 0
+ %ret1 = insertelement <2 x float> %ret0, float %val2, i32 1
+ store <2 x float> %ret1, ptr addrspace(1) %out
+ ret void
+}
+
+define amdgpu_cs void @test_f32_negative_modifiers(float %a, float %p, float %q, ptr addrspace(1) %out) {
+; GCN-LABEL: test_f32_negative_modifiers:
+; GCN: ; %bb.0: ; %.entry
+; GCN-NEXT: v_cmp_eq_f32_e32 vcc_lo, 2.0, v0
+; GCN-NEXT: v_cndmask_b32_e64 v0, -v1, 0, vcc_lo
+; GCN-NEXT: v_cndmask_b32_e64 v1, -v2, 0, vcc_lo
+; GCN-NEXT: global_store_b64 v[3:4], v[0:1], off
+; GCN-NEXT: s_endpgm
+.entry:
+ %r = fneg float %p
+ %s = fneg float %q
+ %vcc = fcmp oeq float 2.0, %a
+ %val1 = select i1 %vcc, float 0.0, float %r
+ %val2 = select i1 %vcc, float 0.0, float %s
+ %ret0 = insertelement <2 x float> poison, float %val1, i32 0
+ %ret1 = insertelement <2 x float> %ret0, float %val2, i32 1
+ store <2 x float> %ret1, ptr addrspace(1) %out
+ ret void
+}
+
+define amdgpu_cs void @test_f32_one(float %a, float %p, float %q, ptr addrspace(1) %out) {
+; GCN-LABEL: test_f32_one:
+; GCN: ; %bb.0: ; %.entry
+; GCN-NEXT: v_cmp_lg_f32_e32 vcc_lo, 2.0, v0
+; GCN-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc_lo
+; GCN-NEXT: v_cndmask_b32_e64 v1, v2, 0, vcc_lo
+; GCN-NEXT: global_store_b64 v[3:4], v[0:1], off
+; GCN-NEXT: s_endpgm
+.entry:
+ %vcc = fcmp one float %a, 2.0
+ %val1 = select i1 %vcc, float 0.0, float %p
+ %val2 = select i1 %vcc, float 0.0, float %q
+ %ret0 = insertelement <2 x float> poison, float %val1, i32 0
+ %ret1 = insertelement <2 x float> %ret0, float %val2, i32 1
+ store <2 x float> %ret1, ptr addrspace(1) %out
+ ret void
+}
+
+define amdgpu_cs void @test_f32_ord(float %a, float %p, float %q, ptr addrspace(1) %out) {
+; GCN-LABEL: test_f32_ord:
+; GCN: ; %bb.0: ; %.entry
+; GCN-NEXT: v_cmp_o_f32_e32 vcc_lo, v0, v0
+; GCN-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc_lo
+; GCN-NEXT: v_cndmask_b32_e64 v1, v2, 0, vcc_lo
+; GCN-NEXT: global_store_b64 v[3:4], v[0:1], off
+; GCN-NEXT: s_endpgm
+.entry:
+ %vcc = fcmp ord float %a, 2.0
+ %val1 = select i1 %vcc, float 0.0, float %p
+ %val2 = select i1 %vcc, float 0.0, float %q
+ %ret0 = insertelement <2 x float> poison, float %val1, i32 0
+ %ret1 = insertelement <2 x float> %ret0, float %val2, i32 1
+ store <2 x float> %ret1, ptr addrspace(1) %out
+ ret void
+}
+
+define amdgpu_cs void @test_f32_uno(float %a, float %p, float %q, ptr addrspace(1) %out) {
+; GCN-LABEL: test_f32_uno:
+; GCN: ; %bb.0: ; %.entry
+; GCN-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0
+; GCN-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc_lo
+; GCN-NEXT: v_cndmask_b32_e64 v1, v2, 0, vcc_lo
+; GCN-NEXT: global_store_b64 v[3:4], v[0:1], off
+; GCN-NEXT: s_endpgm
+.entry:
+ %vcc = fcmp uno float %a, 2.0
+ %val1 = select i1 %vcc, float 0.0, float %p
+ %val2 = select i1 %vcc, float 0.0, float %q
+ %ret0 = insertelement <2 x float> poison, float %val1, i32 0
+ %ret1 = insertelement <2 x float> %ret0, float %val2, i32 1
+ store <2 x float> %ret1, ptr addrspace(1) %out
+ ret void
+}
+
+define amdgpu_cs void @test_f32_oge(float %a, float %p, float %q, ptr addrspace(1) %out) {
+; GCN-LABEL: test_f32_oge:
+; GCN: ; %bb.0: ; %.entry
+; GCN-NEXT: v_cmp_ge_f32_e32 vcc_lo, 2.0, v0
+; GCN-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc_lo
+; GCN-NEXT: v_cndmask_b32_e64 v1, v2, 0, vcc_lo
+; GCN-NEXT: global_store_b64 v[3:4], v[0:1], off
+; GCN-NEXT: s_endpgm
+.entry:
+ %vcc = fcmp oge float 2.0, %a
+ %val1 = select i1 %vcc, float 0.0, float %p
+ %val2 = select i1 %vcc, float 0.0, float %q
+ %ret0 = insertelement <2 x float> poison, float %val1, i32 0
+ %ret1 = insertelement <2 x float> %ret0, float %val2, i32 1
+ store <2 x float> %ret1, ptr addrspace(1) %out
+ ret void
+}
+
+define amdgpu_cs void @test_f32_ole(float %a, float %p, float %q, ptr addrspace(1) %out) {
+; GCN-LABEL: test_f32_ole:
+; GCN: ; %bb.0: ; %.entry
+; GCN-NEXT: v_cmp_le_f32_e32 vcc_lo, 2.0, v0
+; GCN-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc_lo
+; GCN-NEXT: v_cndmask_b32_e64 v1, v2, 0, vcc_lo
+; GCN-NEXT: global_store_b64 v[3:4], v[0:1], off
+; GCN-NEXT: s_endpgm
+.entry:
+ %vcc = fcmp ole float 2.0, %a
+ %val1 = select i1 %vcc, float 0.0, float %p
+ %val2 = select i1 %vcc, float 0.0, float %q
+ %ret0 = insertelement <2 x float> poison, float %val1, i32 0
+ %ret1 = insertelement <2 x float> %ret0, float %val2, i32 1
+ store <2 x float> %ret1, ptr addrspace(1) %out
+ ret void
+}
+
+define amdgpu_cs void @test_f32_ogt(float %a, float %p, float %q, ptr addrspace(1) %out) {
+; GCN-LABEL: test_f32_ogt:
+; GCN: ; %bb.0: ; %.entry
+; GCN-NEXT: v_cmp_gt_f32_e32 vcc_lo, 2.0, v0
+; GCN-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc_lo
+; GCN-NEXT: v_cndmask_b32_e64 v1, v2, 0, vcc_lo
+; GCN-NEXT: global_store_b64 v[3:4], v[0:1], off
+; GCN-NEXT: s_endpgm
+.entry:
+ %vcc = fcmp ogt float 2.0, %a
+ %val1 = select i1 %vcc, float 0.0, float %p
+ %val2 = select i1 %vcc, float 0.0, float %q
+ %ret0 = insertelement <2 x float> poison, float %val1, i32 0
+ %ret1 = insertelement <2 x float> %ret0, float %val2, i32 1
+ store <2 x float> %ret1, ptr addrspace(1) %out
+ ret void
+}
+
+define amdgpu_cs void @test_f32_olt(float %a, float %p, float %q, ptr addrspace(1) %out) {
+; GCN-LABEL: test_f32_olt:
+; GCN: ; %bb.0: ; %.entry
+; GCN-NEXT: v_cmp_lt_f32_e32 vcc_lo, 2.0, v0
+; GCN-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc_lo
+; GCN-NEXT: v_cndmask_b32_e64 v1, v2, 0, vcc_lo
+; GCN-NEXT: global_store_b64 v[3:4], v[0:1], off
+; GCN-NEXT: s_endpgm
+.entry:
+ %vcc = fcmp olt float 2.0, %a
+ %val1 = select i1 %vcc, float 0.0, float %p
+ %val2 = select i1 %vcc, float 0.0, float %q
+ %ret0 = insertelement <2 x float> poison, float %val1, i32 0
+ %ret1 = insertelement <2 x float> %ret0, float %val2, i32 1
+ store <2 x float> %ret1, ptr addrspace(1) %out
+ ret void
+}
+
+;tests for float64
+define amdgpu_cs void @test_f64_oeq(double %a, double %p, double %q, ptr addrspace(1) %out) {
+; GCN-LABEL: test_f64_oeq:
+; GCN: ; %bb.0: ; %.entry
+; GCN-NEXT: v_cmp_eq_f64_e32 vcc_lo, 2.0, v[0:1]
+; GCN-NEXT: v_cndmask_b32_e64 v1, v3, 0, vcc_lo
+; GCN-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc_lo
+; GCN-NEXT: v_cndmask_b32_e64 v3, v5, 0, vcc_lo
+; GCN-NEXT: v_cndmask_b32_e64 v2, v4, 0, vcc_lo
+; GCN-NEXT: global_store_b128 v[6:7], v[0:3], off
+; GCN-NEXT: s_endpgm
+.entry:
+ %vcc = fcmp oeq double 2.0, %a
+ %val1 = select i1 %vcc, double 0.0, double %p
+ %val2 = select i1 %vcc, double 0.0, double %q
+ %ret0 = insertelement <2 x double> poison, double %val1, i32 0
+ %ret1 = insertelement <2 x double> %ret0, double %val2, i32 1
+ store <2 x double> %ret1, ptr addrspace(1) %out
+ ret void
+}
+
+define amdgpu_cs void @test_f64_one(double %a, double %p, double %q, ptr addrspace(1) %out) {
+; GCN-LABEL: test_f64_one:
+; GCN: ; %bb.0: ; %.entry
+; GCN-NEXT: v_cmp_lg_f64_e32 vcc_lo, 2.0, v[0:1]
+; GCN-NEXT: v_cndmask_b32_e64 v1, v3, 0, vcc_lo
+; GCN-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc_lo
+; GCN-NEXT: v_cndmask_b32_e64 v3, v5, 0, vcc_lo
+; GCN-NEXT: v_cndmask_b32_e64 v2, v4, 0, vcc_lo
+; GCN-NEXT: global_store_b128 v[6:7], v[0:3], off
+; GCN-NEXT: s_endpgm
+.entry:
+ %vcc = fcmp one double 2.0, %a
+ %val1 = select i1 %vcc, double 0.0, double %p
+ %val2 = select i1 %vcc, double 0.0, double %q
+ %ret0 = insertelement <2 x double> poison, double %val1, i32 0
+ %ret1 = insertelement <2 x double> %ret0, double %val2, i32 1
+ store <2 x double> %ret1, ptr addrspace(1) %out
+ ret void
+}
+
+define amdgpu_cs void @test_f64_oge(double %a, double %p, double %q, ptr addrspace(1) %out) {
+; GCN-LABEL: test_f64_oge:
+; GCN: ; %bb.0: ; %.entry
+; GCN-NEXT: v_cmp_ge_f64_e32 vcc_lo, 2.0, v[0:1]
+; GCN-NEXT: v_cndmask_b32_e64 v1, v3, 0, vcc_lo
+; GCN-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc_lo
+; GCN-NEXT: v_cndmask_b32_e64 v3, v5, 0, vcc_lo
+; GCN-NEXT: v_cndmask_b32_e64 v2, v4, 0, vcc_lo
+; GCN-NEXT: global_store_b128 v[6:7], v[0:3], off
+; GCN-NEXT: s_endpgm
+.entry:
+ %vcc = fcmp oge double 2.0, %a
+ %val1 = select i1 %vcc, double 0.0, double %p
+ %val2 = select i1 %vcc, double 0.0, double %q
+ %ret0 = insertelement <2 x double> poison, double %val1, i32 0
+ %ret1 = insertelement <2 x double> %ret0, double %val2, i32 1
+ store <2 x double> %ret1, ptr addrspace(1) %out
+ ret void
+}
+
+define amdgpu_cs void @test_f64_ole(double %a, double %p, double %q, ptr addrspace(1) %out) {
+; GCN-LABEL: test_f64_ole:
+; GCN: ; %bb.0: ; %.entry
+; GCN-NEXT: v_cmp_le_f64_e32 vcc_lo, 2.0, v[0:1]
+; GCN-NEXT: v_cndmask_b32_e64 v1, v3, 0, vcc_lo
+; GCN-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc_lo
+; GCN-NEXT: v_cndmask_b32_e64 v3, v5, 0, vcc_lo
+; GCN-NEXT: v_cndmask_b32_e64 v2, v4, 0, vcc_lo
+; GCN-NEXT: global_store_b128 v[6:7], v[0:3], off
+; GCN-NEXT: s_endpgm
+.entry:
+ %vcc = fcmp ole double 2.0, %a
+ %val1 = select i1 %vcc, double 0.0, double %p
+ %val2 = select i1 %vcc, double 0.0, double %q
+ %ret0 = insertelement <2 x double> poison, double %val1, i32 0
+ %ret1 = insertelement <2 x double> %ret0, double %val2, i32 1
+ store <2 x double> %ret1, ptr addrspace(1) %out
+ ret void
+}
+
+define amdgpu_cs void @test_f64_ogt(double %a, double %p, double %q, ptr addrspace(1) %out) {
+; GCN-LABEL: test_f64_ogt:
+; GCN: ; %bb.0: ; %.entry
+; GCN-NEXT: v_cmp_gt_f64_e32 vcc_lo, 2.0, v[0:1]
+; GCN-NEXT: v_cndmask_b32_e64 v1, v3, 0, vcc_lo
+; GCN-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc_lo
+; GCN-NEXT: v_cndmask_b32_e64 v3, v5, 0, vcc_lo
+; GCN-NEXT: v_cndmask_b32_e64 v2, v4, 0, vcc_lo
+; GCN-NEXT: global_store_b128 v[6:7], v[0:3], off
+; GCN-NEXT: s_endpgm
+.entry:
+ %vcc = fcmp ogt double 2.0, %a
+ %val1 = select i1 %vcc, double 0.0, double %p
+ %val2 = select i1 %vcc, double 0.0, double %q
+ %ret0 = insertelement <2 x double> poison, double %val1, i32 0
+ %ret1 = insertelement <2 x double> %ret0, double %val2, i32 1
+ store <2 x double> %ret1, ptr addrspace(1) %out
+ ret void
+}
+
+define amdgpu_cs void @test_f64_olt(double %a, double %p, double %q, ptr addrspace(1) %out) {
+; GCN-LABEL: test_f64_olt:
+; GCN: ; %bb.0: ; %.entry
+; GCN-NEXT: v_cmp_lt_f64_e32 vcc_lo, 2.0, v[0:1]
+; GCN-NEXT: v_cndmask_b32_e64 v1, v3, 0, vcc_lo
+; GCN-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc_lo
+; GCN-NEXT: v_cndmask_b32_e64 v3, v5, 0, vcc_lo
+; GCN-NEXT: v_cndmask_b32_e64 v2, v4, 0, vcc_lo
+; GCN-NEXT: global_store_b128 v[6:7], v[0:3], off
+; GCN-NEXT: s_endpgm
+.entry:
+ %vcc = fcmp olt double 2.0, %a
+ %val1 = select i1 %vcc, double 0.0, double %p
+ %val2 = select i1 %vcc, double 0.0, double %q
+ %ret0 = insertelement <2 x double> poison, double %val1, i32 0
+ %ret1 = insertelement <2 x double> %ret0, double %val2, i32 1
+ store <2 x double> %ret1, ptr addrspace(1) %out
+ ret void
+}
+
+define amdgpu_cs void @test_f64_ord(double %a, double %p, double %q, ptr addrspace(1) %out) {
+; GCN-LABEL: test_f64_ord:
+; GCN: ; %bb.0: ; %.entry
+; GCN-NEXT: v_cmp_o_f64_e32 vcc_lo, v[0:1], v[0:1]
+; GCN-NEXT: v_cndmask_b32_e64 v1, v3, 0, vcc_lo
+; GCN-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc_lo
+; GCN-NEXT: v_cndmask_b32_e64 v3, v5, 0, vcc_lo
+; GCN-NEXT: v_cndmask_b32_e64 v2, v4, 0, vcc_lo
+; GCN-NEXT: global_store_b128 v[6:7], v[0:3], off
+; GCN-NEXT: s_endpgm
+.entry:
+ %vcc = fcmp ord double 2.0, %a
+ %val1 = select i1 %vcc, double 0.0, double %p
+ %val2 = select i1 %vcc, double 0.0, double %q
+ %ret0 = insertelement <2 x double> poison, double %val1, i32 0
+ %ret1 = insertelement <2 x double> %ret0, double %val2, i32 1
+ store <2 x double> %ret1, ptr addrspace(1) %out
+ ret void
+}
+
+define amdgpu_cs void @test_f64_uno(double %a, double %p, double %q, ptr addrspace(1) %out) {
+; GCN-LABEL: test_f64_uno:
+; GCN: ; %bb.0: ; %.entry
+; GCN-NEXT: v_cmp_u_f64_e32 vcc_lo, v[0:1], v[0:1]
+; GCN-NEXT: v_cndmask_b32_e64 v1, v3, 0, vcc_lo
+; GCN-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc_lo
+; GCN-NEXT: v_cndmask_b32_e64 v3, v5, 0, vcc_lo
+; GCN-NEXT: v_cndmask_b32_e64 v2, v4, 0, vcc_lo
+; GCN-NEXT: global_store_b128 v[6:7], v[0:3], off
+; GCN-NEXT: s_endpgm
+.entry:
+ %vcc = fcmp uno double 2.0, %a
+ %val1 = select i1 %vcc, double 0.0, double %p
+ %val2 = select i1 %vcc, double 0.0, double %q
+ %ret0 = insertelement <2 x double> poison, double %val1, i32 0
+ %ret1 = insertelement <2 x double> %ret0, double %val2, i32 1
+ store <2 x double> %ret1, ptr addrspace(1) %out
+ ret void
+}
More information about the llvm-commits
mailing list