[llvm] [AMDGPU] Switch V_CNDMASK operands to shrink it into VOP2 (PR #135162)
Mirko BrkuĊĦanin via llvm-commits
llvm-commits at lists.llvm.org
Mon Apr 14 05:31:51 PDT 2025
================
@@ -0,0 +1,429 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 < %s | FileCheck %s -check-prefix=GCN
+; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck %s -check-prefix=GCN
+
+define amdgpu_cs void @test_u32_eq(i32 %a, i32 %p, i32 %q, i32 %r, i32 %s, ptr addrspace(1) %out) {
+; GCN-LABEL: test_u32_eq:
+; GCN: ; %bb.0: ; %.entry
+; GCN-NEXT: v_cmp_ne_u32_e32 vcc_lo, 1, v0
+; GCN-NEXT: v_dual_cndmask_b32 v0, 0, v1 :: v_dual_cndmask_b32 v1, 0, v2
+; GCN-NEXT: global_store_b64 v[5:6], v[0:1], off
+; GCN-NEXT: s_endpgm
+.entry:
+ %vcc = icmp eq i32 1, %a
+ %val1 = select i1 %vcc, i32 0, i32 %p
+ %val2 = select i1 %vcc, i32 0, i32 %q
+ %ret0 = insertelement <2 x i32> poison, i32 %val1, i32 0
+ %ret1 = insertelement <2 x i32> %ret0, i32 %val2, i32 1
+ store <2 x i32> %ret1, ptr addrspace(1) %out
+ ret void
+}
+
+define amdgpu_cs void @test_u32_ne(i32 %a, i32 %p, i32 %q, i32 %r, i32 %s, ptr addrspace(1) %out) {
+; GCN-LABEL: test_u32_ne:
+; GCN: ; %bb.0: ; %.entry
+; GCN-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
+; GCN-NEXT: v_dual_cndmask_b32 v0, 0, v1 :: v_dual_cndmask_b32 v1, 0, v2
+; GCN-NEXT: global_store_b64 v[5:6], v[0:1], off
+; GCN-NEXT: s_endpgm
+.entry:
+ %vcc = icmp ne i32 1, %a
+ %val1 = select i1 %vcc, i32 0, i32 %p
+ %val2 = select i1 %vcc, i32 0, i32 %q
+ %ret0 = insertelement <2 x i32> poison, i32 %val1, i32 0
+ %ret1 = insertelement <2 x i32> %ret0, i32 %val2, i32 1
+ store <2 x i32> %ret1, ptr addrspace(1) %out
+ ret void
+}
+
+define amdgpu_cs void @test_u32_uge(i32 %a, i32 %p, i32 %q, i32 %r, i32 %s, ptr addrspace(1) %out) {
+; GCN-LABEL: test_u32_uge:
+; GCN: ; %bb.0: ; %.entry
+; GCN-NEXT: v_cmp_lt_u32_e32 vcc_lo, 1, v0
+; GCN-NEXT: v_dual_cndmask_b32 v0, 0, v1 :: v_dual_cndmask_b32 v1, 0, v2
+; GCN-NEXT: global_store_b64 v[5:6], v[0:1], off
+; GCN-NEXT: s_endpgm
+.entry:
+ %vcc = icmp uge i32 %a, 2
+ %val1 = select i1 %vcc, i32 %p, i32 0
+ %val2 = select i1 %vcc, i32 %q, i32 0
+ %ret0 = insertelement <2 x i32> poison, i32 %val1, i32 0
+ %ret1 = insertelement <2 x i32> %ret0, i32 %val2, i32 1
+ store <2 x i32> %ret1, ptr addrspace(1) %out
+ ret void
+}
+
+define amdgpu_cs void @test_u32_ule(i32 %a, i32 %p, i32 %q, i32 %r, i32 %s, ptr addrspace(1) %out) {
+; GCN-LABEL: test_u32_ule:
+; GCN: ; %bb.0: ; %.entry
+; GCN-NEXT: v_cmp_gt_u32_e32 vcc_lo, 3, v0
+; GCN-NEXT: v_dual_cndmask_b32 v0, 0, v1 :: v_dual_cndmask_b32 v1, 0, v2
+; GCN-NEXT: global_store_b64 v[5:6], v[0:1], off
+; GCN-NEXT: s_endpgm
+.entry:
+ %vcc = icmp ule i32 %a, 2
+ %val1 = select i1 %vcc, i32 %p, i32 0
+ %val2 = select i1 %vcc, i32 %q, i32 0
+ %ret0 = insertelement <2 x i32> poison, i32 %val1, i32 0
+ %ret1 = insertelement <2 x i32> %ret0, i32 %val2, i32 1
+ store <2 x i32> %ret1, ptr addrspace(1) %out
+ ret void
+}
+
+define amdgpu_cs void @test_u32_ugt(i32 %a, i32 %p, i32 %q, i32 %r, i32 %s, ptr addrspace(1) %out) {
+; GCN-LABEL: test_u32_ugt:
+; GCN: ; %bb.0: ; %.entry
+; GCN-NEXT: v_cmp_le_u32_e32 vcc_lo, 2, v0
+; GCN-NEXT: v_dual_cndmask_b32 v0, 0, v1 :: v_dual_cndmask_b32 v1, 0, v2
+; GCN-NEXT: global_store_b64 v[5:6], v[0:1], off
+; GCN-NEXT: s_endpgm
+.entry:
+ %vcc = icmp ugt i32 2, %a ; le
+ %val1 = select i1 %vcc, i32 0, i32 %p
+ %val2 = select i1 %vcc, i32 0, i32 %q
+ %ret0 = insertelement <2 x i32> poison, i32 %val1, i32 0
+ %ret1 = insertelement <2 x i32> %ret0, i32 %val2, i32 1
+ store <2 x i32> %ret1, ptr addrspace(1) %out
+ ret void
+}
+
+define amdgpu_cs void @test_u32_ult(i32 %a, i32 %p, i32 %q, i32 %r, i32 %s, ptr addrspace(1) %out) {
+; GCN-LABEL: test_u32_ult:
+; GCN: ; %bb.0: ; %.entry
+; GCN-NEXT: v_cmp_ge_u32_e32 vcc_lo, 2, v0
+; GCN-NEXT: v_dual_cndmask_b32 v0, 0, v1 :: v_dual_cndmask_b32 v1, 0, v2
+; GCN-NEXT: global_store_b64 v[5:6], v[0:1], off
+; GCN-NEXT: s_endpgm
+.entry:
+ %vcc = icmp ult i32 2, %a
+ %val1 = select i1 %vcc, i32 0, i32 %p
+ %val2 = select i1 %vcc, i32 0, i32 %q
+ %ret0 = insertelement <2 x i32> poison, i32 %val1, i32 0
+ %ret1 = insertelement <2 x i32> %ret0, i32 %val2, i32 1
+ store <2 x i32> %ret1, ptr addrspace(1) %out
+ ret void
+}
+
+define amdgpu_cs void @test_u64_eq(i64 %argA, i64 %arg0, i64 %arg1, i64 %arg2, ptr addrspace(1) %out) {
+; GCN-LABEL: test_u64_eq:
+; GCN: ; %bb.0: ; %.entry
+; GCN-NEXT: v_cmp_ne_u64_e32 vcc_lo, 1, v[0:1]
+; GCN-NEXT: v_dual_cndmask_b32 v1, v3, v5 :: v_dual_cndmask_b32 v0, v2, v4
+; GCN-NEXT: v_dual_cndmask_b32 v3, 0, v7 :: v_dual_cndmask_b32 v2, 1, v6
+; GCN-NEXT: global_store_b128 v[8:9], v[0:3], off
+; GCN-NEXT: s_endpgm
+.entry:
+ %vcc = icmp eq i64 1, %argA
+ %val1 = select i1 %vcc, i64 %arg0, i64 %arg1
+ %val2 = select i1 %vcc, i64 1, i64 %arg2
+ %ret0 = insertelement <2 x i64> poison, i64 %val1, i32 0
+ %ret1 = insertelement <2 x i64> %ret0, i64 %val2, i32 1
+ store <2 x i64> %ret1, ptr addrspace(1) %out
+ ret void
+}
+
+
+define amdgpu_cs void @test_u64_ne(i64 %argA, i64 %arg0, i64 %arg1, i64 %arg2, ptr addrspace(1) %out) {
+; GCN-LABEL: test_u64_ne:
+; GCN: ; %bb.0: ; %.entry
+; GCN-NEXT: v_cmp_eq_u64_e32 vcc_lo, 1, v[0:1]
+; GCN-NEXT: v_dual_cndmask_b32 v1, v3, v5 :: v_dual_cndmask_b32 v0, v2, v4
+; GCN-NEXT: v_dual_cndmask_b32 v3, 0, v7 :: v_dual_cndmask_b32 v2, 1, v6
+; GCN-NEXT: global_store_b128 v[8:9], v[0:3], off
+; GCN-NEXT: s_endpgm
+.entry:
+ %vcc = icmp ne i64 1, %argA
+ %val1 = select i1 %vcc, i64 %arg0, i64 %arg1
+ %val2 = select i1 %vcc, i64 1, i64 %arg2
+ %ret0 = insertelement <2 x i64> poison, i64 %val1, i32 0
+ %ret1 = insertelement <2 x i64> %ret0, i64 %val2, i32 1
+ store <2 x i64> %ret1, ptr addrspace(1) %out
+ ret void
+}
+
+define amdgpu_cs void @test_u64_uge(i64 %argA, i64 %arg0, i64 %arg1, i64 %arg2, ptr addrspace(1) %out) {
+; GCN-LABEL: test_u64_uge:
+; GCN: ; %bb.0: ; %.entry
+; GCN-NEXT: v_cmp_le_u64_e32 vcc_lo, 3, v[0:1]
+; GCN-NEXT: v_dual_cndmask_b32 v1, v3, v5 :: v_dual_cndmask_b32 v0, v2, v4
+; GCN-NEXT: v_dual_cndmask_b32 v3, 0, v7 :: v_dual_cndmask_b32 v2, 1, v6
+; GCN-NEXT: global_store_b128 v[8:9], v[0:3], off
+; GCN-NEXT: s_endpgm
+.entry:
+ %vcc = icmp uge i64 2, %argA
+ %val1 = select i1 %vcc, i64 %arg0, i64 %arg1
+ %val2 = select i1 %vcc, i64 1, i64 %arg2
+ %ret0 = insertelement <2 x i64> poison, i64 %val1, i32 0
+ %ret1 = insertelement <2 x i64> %ret0, i64 %val2, i32 1
+ store <2 x i64> %ret1, ptr addrspace(1) %out
+ ret void
+}
+
+
+define amdgpu_cs void @test_u64_ule(i64 %argA, i64 %arg0, i64 %arg1, i64 %arg2, ptr addrspace(1) %out) {
+; GCN-LABEL: test_u64_ule:
+; GCN: ; %bb.0: ; %.entry
+; GCN-NEXT: v_cmp_ge_u64_e32 vcc_lo, 1, v[0:1]
+; GCN-NEXT: v_dual_cndmask_b32 v1, v3, v5 :: v_dual_cndmask_b32 v0, v2, v4
+; GCN-NEXT: v_dual_cndmask_b32 v3, 0, v7 :: v_dual_cndmask_b32 v2, 1, v6
+; GCN-NEXT: global_store_b128 v[8:9], v[0:3], off
+; GCN-NEXT: s_endpgm
+.entry:
+ %vcc = icmp ule i64 2, %argA
+ %val1 = select i1 %vcc, i64 %arg0, i64 %arg1
+ %val2 = select i1 %vcc, i64 1, i64 %arg2
+ %ret0 = insertelement <2 x i64> poison, i64 %val1, i32 0
+ %ret1 = insertelement <2 x i64> %ret0, i64 %val2, i32 1
+ store <2 x i64> %ret1, ptr addrspace(1) %out
+ ret void
+}
+
+define amdgpu_cs void @test_u64_ugt(i64 %argA, i64 %arg0, i64 %arg1, i64 %arg2, ptr addrspace(1) %out) {
+; GCN-LABEL: test_u64_ugt:
+; GCN: ; %bb.0: ; %.entry
+; GCN-NEXT: v_cmp_le_u64_e32 vcc_lo, 2, v[0:1]
+; GCN-NEXT: v_dual_cndmask_b32 v1, v3, v5 :: v_dual_cndmask_b32 v0, v2, v4
+; GCN-NEXT: v_dual_cndmask_b32 v3, 0, v7 :: v_dual_cndmask_b32 v2, 1, v6
+; GCN-NEXT: global_store_b128 v[8:9], v[0:3], off
+; GCN-NEXT: s_endpgm
+.entry:
+ %vcc = icmp ugt i64 2, %argA
+ %val1 = select i1 %vcc, i64 %arg0, i64 %arg1
+ %val2 = select i1 %vcc, i64 1, i64 %arg2
+ %ret0 = insertelement <2 x i64> poison, i64 %val1, i32 0
+ %ret1 = insertelement <2 x i64> %ret0, i64 %val2, i32 1
+ store <2 x i64> %ret1, ptr addrspace(1) %out
+ ret void
+}
+
+define amdgpu_cs void @test_u64_ult(i64 %argA, i64 %arg0, i64 %arg1, i64 %arg2, ptr addrspace(1) %out) {
+; GCN-LABEL: test_u64_ult:
+; GCN: ; %bb.0: ; %.entry
+; GCN-NEXT: v_cmp_ge_u64_e32 vcc_lo, 2, v[0:1]
+; GCN-NEXT: v_dual_cndmask_b32 v1, v3, v5 :: v_dual_cndmask_b32 v0, v2, v4
+; GCN-NEXT: v_dual_cndmask_b32 v3, 0, v7 :: v_dual_cndmask_b32 v2, 1, v6
+; GCN-NEXT: global_store_b128 v[8:9], v[0:3], off
+; GCN-NEXT: s_endpgm
+.entry:
+ %vcc = icmp ult i64 2, %argA
+ %val1 = select i1 %vcc, i64 %arg0, i64 %arg1
+ %val2 = select i1 %vcc, i64 1, i64 %arg2
+ %ret0 = insertelement <2 x i64> poison, i64 %val1, i32 0
+ %ret1 = insertelement <2 x i64> %ret0, i64 %val2, i32 1
+ store <2 x i64> %ret1, ptr addrspace(1) %out
+ ret void
+}
+
+define amdgpu_cs void @test_f32_oeq(float %a, float %p, float %q, ptr addrspace(1) %out) {
+; GCN-LABEL: test_f32_oeq:
+; GCN: ; %bb.0: ; %.entry
+; GCN-NEXT: v_cmp_neq_f32_e32 vcc_lo, 2.0, v0
+; GCN-NEXT: v_dual_cndmask_b32 v0, 0, v1 :: v_dual_cndmask_b32 v1, 0, v2
+; GCN-NEXT: global_store_b64 v[3:4], v[0:1], off
+; GCN-NEXT: s_endpgm
+.entry:
+ %vcc = fcmp oeq float %a, 2.0
+ %val1 = select i1 %vcc, float 0.0, float %p
+ %val2 = select i1 %vcc, float 0.0, float %q
+ %ret0 = insertelement <2 x float> poison, float %val1, i32 0
+ %ret1 = insertelement <2 x float> %ret0, float %val2, i32 1
+ store <2 x float> %ret1, ptr addrspace(1) %out
+ ret void
+}
+
+
+define amdgpu_cs void @test_f32_one(float %argA, float %arg0, float %arg1, float %arg2, ptr addrspace(1) %out) {
+; GCN-LABEL: test_f32_one:
+; GCN: ; %bb.0: ; %.entry
+; GCN-NEXT: v_cmp_neq_f32_e32 vcc_lo, 1.0, v0
+; GCN-NEXT: v_dual_cndmask_b32 v0, v1, v2 :: v_dual_cndmask_b32 v1, 0, v3
+; GCN-NEXT: global_store_b64 v[4:5], v[0:1], off
+; GCN-NEXT: s_endpgm
+.entry:
+ %vcc = fcmp oeq float %argA, 1.0
+ %val1 = select i1 %vcc, float %arg0, float %arg1
+ %val2 = select i1 %vcc, float 0.0, float %arg2
+ %ret0 = insertelement <2 x float> poison, float %val1, i32 0
+ %ret1 = insertelement <2 x float> %ret0, float %val2, i32 1
+ store <2 x float> %ret1, ptr addrspace(1) %out
+ ret void
+}
+
+define amdgpu_cs void @test_f32_oge(float %a, float %p, float %q, ptr addrspace(1) %out) {
+; GCN-LABEL: test_f32_oge:
+; GCN: ; %bb.0: ; %.entry
+; GCN-NEXT: v_cmp_le_f32_e32 vcc_lo, 2.0, v0
+; GCN-NEXT: v_dual_cndmask_b32 v0, 0, v1 :: v_dual_cndmask_b32 v1, 0, v2
+; GCN-NEXT: global_store_b64 v[3:4], v[0:1], off
+; GCN-NEXT: s_endpgm
+.entry:
+ %vcc = fcmp oge float %a, 2.0
+ %val1 = select i1 %vcc, float %p, float 0.0
+ %val2 = select i1 %vcc, float %q, float 0.0
+ %ret0 = insertelement <2 x float> poison, float %val1, i32 0
+ %ret1 = insertelement <2 x float> %ret0, float %val2, i32 1
+ store <2 x float> %ret1, ptr addrspace(1) %out
+ ret void
+}
+
+define amdgpu_cs void @test_f32_ole(float %a, float %p, float %q, ptr addrspace(1) %out) {
+; GCN-LABEL: test_f32_ole:
+; GCN: ; %bb.0: ; %.entry
+; GCN-NEXT: v_cmp_nle_f32_e32 vcc_lo, 2.0, v0
+; GCN-NEXT: v_dual_cndmask_b32 v0, 0, v1 :: v_dual_cndmask_b32 v1, 0, v2
+; GCN-NEXT: global_store_b64 v[3:4], v[0:1], off
+; GCN-NEXT: s_endpgm
+.entry:
+ %vcc = fcmp ole float 2.0, %a
+ %val1 = select i1 %vcc, float 0.0, float %p
+ %val2 = select i1 %vcc, float 0.0, float %q
+ %ret0 = insertelement <2 x float> poison, float %val1, i32 0
+ %ret1 = insertelement <2 x float> %ret0, float %val2, i32 1
+ store <2 x float> %ret1, ptr addrspace(1) %out
+ ret void
+}
+
+define amdgpu_cs void @test_f32_ogt(float %a, float %p, float %q, ptr addrspace(1) %out) {
+; GCN-LABEL: test_f32_ogt:
+; GCN: ; %bb.0: ; %.entry
+; GCN-NEXT: v_cmp_ngt_f32_e32 vcc_lo, 2.0, v0
+; GCN-NEXT: v_dual_cndmask_b32 v0, 0, v1 :: v_dual_cndmask_b32 v1, 0, v2
+; GCN-NEXT: global_store_b64 v[3:4], v[0:1], off
+; GCN-NEXT: s_endpgm
+.entry:
+ %vcc = fcmp ogt float 2.0, %a
+ %val1 = select i1 %vcc, float 0.0, float %p
+ %val2 = select i1 %vcc, float 0.0, float %q
+ %ret0 = insertelement <2 x float> poison, float %val1, i32 0
+ %ret1 = insertelement <2 x float> %ret0, float %val2, i32 1
+ store <2 x float> %ret1, ptr addrspace(1) %out
+ ret void
+}
+
+define amdgpu_cs void @test_f32_olt(float %a, float %p, float %q, ptr addrspace(1) %out) {
+; GCN-LABEL: test_f32_olt:
+; GCN: ; %bb.0: ; %.entry
+; GCN-NEXT: v_cmp_nlt_f32_e32 vcc_lo, 2.0, v0
+; GCN-NEXT: v_dual_cndmask_b32 v0, 0, v1 :: v_dual_cndmask_b32 v1, 0, v2
+; GCN-NEXT: global_store_b64 v[3:4], v[0:1], off
+; GCN-NEXT: s_endpgm
+.entry:
+ %vcc = fcmp olt float 2.0, %a
+ %val1 = select i1 %vcc, float 0.0, float %p
+ %val2 = select i1 %vcc, float 0.0, float %q
+ %ret0 = insertelement <2 x float> poison, float %val1, i32 0
+ %ret1 = insertelement <2 x float> %ret0, float %val2, i32 1
+ store <2 x float> %ret1, ptr addrspace(1) %out
+ ret void
+}
+
+define amdgpu_cs void @test_f64_oeq(double %argA, double %arg0, double %arg1, double %arg2, ptr addrspace(1) %out) {
+; GCN-LABEL: test_f64_oeq:
+; GCN: ; %bb.0: ; %.entry
+; GCN-NEXT: v_cmp_neq_f64_e32 vcc_lo, 1.0, v[0:1]
+; GCN-NEXT: v_dual_cndmask_b32 v1, v3, v5 :: v_dual_cndmask_b32 v0, v2, v4
+; GCN-NEXT: v_dual_cndmask_b32 v3, 0, v7 :: v_dual_cndmask_b32 v2, 0, v6
+; GCN-NEXT: global_store_b128 v[8:9], v[0:3], off
+; GCN-NEXT: s_endpgm
+.entry:
+ %vcc = fcmp oeq double %argA, 1.0
+ %val1 = select i1 %vcc, double %arg0, double %arg1
+ %val2 = select i1 %vcc, double 0.0, double %arg2
+ %ret0 = insertelement <2 x double> poison, double %val1, i32 0
+ %ret1 = insertelement <2 x double> %ret0, double %val2, i32 1
+ store <2 x double> %ret1, ptr addrspace(1) %out
+ ret void
+}
+
+define amdgpu_cs void @test_f64_one(double %argA, double %arg0, double %arg1, double %arg2, ptr addrspace(1) %out) {
+; GCN-LABEL: test_f64_one:
+; GCN: ; %bb.0: ; %.entry
+; GCN-NEXT: v_cmp_lg_f64_e32 vcc_lo, 1.0, v[0:1]
+; GCN-NEXT: v_dual_cndmask_b32 v1, v5, v3 :: v_dual_cndmask_b32 v0, v4, v2
+; GCN-NEXT: v_cndmask_b32_e64 v3, v7, 0, vcc_lo
+; GCN-NEXT: v_cndmask_b32_e64 v2, v6, 0, vcc_lo
+; GCN-NEXT: global_store_b128 v[8:9], v[0:3], off
+; GCN-NEXT: s_endpgm
+.entry:
+ %vcc = fcmp one double %argA, 1.0
+ %val1 = select i1 %vcc, double %arg0, double %arg1
+ %val2 = select i1 %vcc, double 0.0, double %arg2
+ %ret0 = insertelement <2 x double> poison, double %val1, i32 0
+ %ret1 = insertelement <2 x double> %ret0, double %val2, i32 1
+ store <2 x double> %ret1, ptr addrspace(1) %out
+ ret void
+}
+
+define amdgpu_cs void @test_f64_oge(double %argA, double %arg0, double %arg1, double %arg2, ptr addrspace(1) %out) {
+; GCN-LABEL: test_f64_oge:
+; GCN: ; %bb.0: ; %.entry
+; GCN-NEXT: v_cmp_nge_f64_e32 vcc_lo, 1.0, v[0:1]
+; GCN-NEXT: v_dual_cndmask_b32 v1, v3, v5 :: v_dual_cndmask_b32 v0, v2, v4
+; GCN-NEXT: v_dual_cndmask_b32 v3, 0, v7 :: v_dual_cndmask_b32 v2, 0, v6
+; GCN-NEXT: global_store_b128 v[8:9], v[0:3], off
+; GCN-NEXT: s_endpgm
+.entry:
+ %vcc = fcmp oge double 1.0, %argA
+ %val1 = select i1 %vcc, double %arg0, double %arg1
+ %val2 = select i1 %vcc, double 0.0, double %arg2
+ %ret0 = insertelement <2 x double> poison, double %val1, i32 0
+ %ret1 = insertelement <2 x double> %ret0, double %val2, i32 1
+ store <2 x double> %ret1, ptr addrspace(1) %out
+ ret void
+}
+
+define amdgpu_cs void @test_f64_ole(double %argA, double %arg0, double %arg1, double %arg2, ptr addrspace(1) %out) {
+; GCN-LABEL: test_f64_ole:
+; GCN: ; %bb.0: ; %.entry
+; GCN-NEXT: v_cmp_nle_f64_e32 vcc_lo, 1.0, v[0:1]
+; GCN-NEXT: v_dual_cndmask_b32 v1, v3, v5 :: v_dual_cndmask_b32 v0, v2, v4
+; GCN-NEXT: v_dual_cndmask_b32 v3, 0, v7 :: v_dual_cndmask_b32 v2, 0, v6
+; GCN-NEXT: global_store_b128 v[8:9], v[0:3], off
+; GCN-NEXT: s_endpgm
+.entry:
+ %vcc = fcmp ole double 1.0, %argA
+ %val1 = select i1 %vcc, double %arg0, double %arg1
+ %val2 = select i1 %vcc, double 0.0, double %arg2
+ %ret0 = insertelement <2 x double> poison, double %val1, i32 0
+ %ret1 = insertelement <2 x double> %ret0, double %val2, i32 1
+ store <2 x double> %ret1, ptr addrspace(1) %out
+ ret void
+}
+
+define amdgpu_cs void @test_f64_ogt(double %argA, double %arg0, double %arg1, double %arg2, ptr addrspace(1) %out) {
+; GCN-LABEL: test_f64_ogt:
+; GCN: ; %bb.0: ; %.entry
+; GCN-NEXT: v_cmp_ngt_f64_e32 vcc_lo, 1.0, v[0:1]
+; GCN-NEXT: v_dual_cndmask_b32 v1, v3, v5 :: v_dual_cndmask_b32 v0, v2, v4
+; GCN-NEXT: v_dual_cndmask_b32 v3, 0, v7 :: v_dual_cndmask_b32 v2, 0, v6
+; GCN-NEXT: global_store_b128 v[8:9], v[0:3], off
+; GCN-NEXT: s_endpgm
+.entry:
+ %vcc = fcmp ogt double 1.0, %argA
+ %val1 = select i1 %vcc, double %arg0, double %arg1
+ %val2 = select i1 %vcc, double 0.0, double %arg2
+ %ret0 = insertelement <2 x double> poison, double %val1, i32 0
+ %ret1 = insertelement <2 x double> %ret0, double %val2, i32 1
+ store <2 x double> %ret1, ptr addrspace(1) %out
+ ret void
+}
+
+define amdgpu_cs void @test_f64_olt(double %argA, double %arg0, double %arg1, double %arg2, ptr addrspace(1) %out) {
+; GCN-LABEL: test_f64_olt:
+; GCN: ; %bb.0: ; %.entry
+; GCN-NEXT: v_cmp_nlt_f64_e32 vcc_lo, 1.0, v[0:1]
+; GCN-NEXT: v_dual_cndmask_b32 v1, v3, v5 :: v_dual_cndmask_b32 v0, v2, v4
+; GCN-NEXT: v_dual_cndmask_b32 v3, 0, v7 :: v_dual_cndmask_b32 v2, 0, v6
+; GCN-NEXT: global_store_b128 v[8:9], v[0:3], off
+; GCN-NEXT: s_endpgm
+.entry:
+ %vcc = fcmp olt double 1.0, %argA
+ %val1 = select i1 %vcc, double %arg0, double %arg1
+ %val2 = select i1 %vcc, double 0.0, double %arg2
+ %ret0 = insertelement <2 x double> poison, double %val1, i32 0
+ %ret1 = insertelement <2 x double> %ret0, double %val2, i32 1
+ store <2 x double> %ret1, ptr addrspace(1) %out
+ ret void
+}
+
+
----------------
mbrkusanin wrote:
remove extra new line
https://github.com/llvm/llvm-project/pull/135162
More information about the llvm-commits
mailing list