[llvm] [CodeGen] [AMDGPU] Adds pre-commit test for fmul-select combine (PR #111107)
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Thu Oct 10 00:27:16 PDT 2024
================
@@ -0,0 +1,1191 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+;RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefix=GFX9 %s
+;RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1030 < %s | FileCheck -check-prefix=GFX1030 %s
+;RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 < %s | FileCheck -check-prefix=GFX1100 %s
+
+define float @fmul_select_f32_test1(float %x, i32 %bool.arg1, i32 %bool.arg2) {
+; GFX9-LABEL: fmul_select_f32_test1:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
+; GFX9-NEXT: v_cndmask_b32_e64 v1, 1.0, 2.0, vcc
+; GFX9-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1030-LABEL: fmul_select_f32_test1:
+; GFX1030: ; %bb.0:
+; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
+; GFX1030-NEXT: v_cndmask_b32_e64 v1, 1.0, 2.0, vcc_lo
+; GFX1030-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX1030-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1100-LABEL: fmul_select_f32_test1:
+; GFX1100: ; %bb.0:
+; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
+; GFX1100-NEXT: v_cndmask_b32_e64 v1, 1.0, 2.0, vcc_lo
+; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1100-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX1100-NEXT: s_setpc_b64 s[30:31]
+ %bool = icmp eq i32 %bool.arg1, %bool.arg2
+ %y = select i1 %bool, float 2.000000e+00, float 1.000000e+00
+ %ldexp = fmul float %x, %y
+ ret float %ldexp
+}
+
+define float @fmul_select_f32_test2(float %x, i32 %bool.arg1, i32 %bool.arg2) {
+; GFX9-LABEL: fmul_select_f32_test2:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
+; GFX9-NEXT: v_cndmask_b32_e64 v1, 1.0, 0.5, vcc
+; GFX9-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1030-LABEL: fmul_select_f32_test2:
+; GFX1030: ; %bb.0:
+; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
+; GFX1030-NEXT: v_cndmask_b32_e64 v1, 1.0, 0.5, vcc_lo
+; GFX1030-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX1030-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1100-LABEL: fmul_select_f32_test2:
+; GFX1100: ; %bb.0:
+; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
+; GFX1100-NEXT: v_cndmask_b32_e64 v1, 1.0, 0.5, vcc_lo
+; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1100-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX1100-NEXT: s_setpc_b64 s[30:31]
+ %bool = icmp eq i32 %bool.arg1, %bool.arg2
+ %y = select i1 %bool, float 5.000000e-01, float 1.000000e+00
+ %ldexp = fmul float %x, %y
+ ret float %ldexp
+}
+
+define <2 x float> @fmul_select_v2f32_test3(<2 x float> %x, <2 x i32> %bool.arg1, <2 x i32> %bool.arg2) {
+; GFX9-LABEL: fmul_select_v2f32_test3:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5
+; GFX9-NEXT: v_cndmask_b32_e64 v3, 1.0, 2.0, vcc
+; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v2, v4
+; GFX9-NEXT: v_cndmask_b32_e64 v2, 1.0, 2.0, vcc
+; GFX9-NEXT: v_mul_f32_e32 v0, v0, v2
+; GFX9-NEXT: v_mul_f32_e32 v1, v1, v3
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1030-LABEL: fmul_select_v2f32_test3:
+; GFX1030: ; %bb.0:
+; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v4
+; GFX1030-NEXT: v_cndmask_b32_e64 v2, 1.0, 2.0, vcc_lo
+; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v3, v5
+; GFX1030-NEXT: v_mul_f32_e32 v0, v0, v2
+; GFX1030-NEXT: v_cndmask_b32_e64 v3, 1.0, 2.0, vcc_lo
+; GFX1030-NEXT: v_mul_f32_e32 v1, v1, v3
+; GFX1030-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1100-LABEL: fmul_select_v2f32_test3:
+; GFX1100: ; %bb.0:
+; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v4
+; GFX1100-NEXT: v_cndmask_b32_e64 v2, 1.0, 2.0, vcc_lo
+; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v3, v5
+; GFX1100-NEXT: v_cndmask_b32_e64 v3, 1.0, 2.0, vcc_lo
+; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1100-NEXT: v_dual_mul_f32 v0, v0, v2 :: v_dual_mul_f32 v1, v1, v3
+; GFX1100-NEXT: s_setpc_b64 s[30:31]
+ %bool = icmp eq <2 x i32> %bool.arg1, %bool.arg2
+ %y = select <2 x i1> %bool, <2 x float> <float 2.000000e+00, float 2.000000e+00>, <2 x float> <float 1.000000e+00, float 1.000000e+00>
+ %ldexp = fmul <2 x float> %x, %y
+ ret <2 x float> %ldexp
+}
+
+define <2 x float> @fmul_select_v2f32_test4(<2 x float> %x, <2 x i32> %bool.arg1, <2 x i32> %bool.arg2) {
+; GFX9-LABEL: fmul_select_v2f32_test4:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5
+; GFX9-NEXT: v_cndmask_b32_e64 v3, 1.0, 0.5, vcc
+; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v2, v4
+; GFX9-NEXT: v_cndmask_b32_e64 v2, 1.0, 0.5, vcc
+; GFX9-NEXT: v_mul_f32_e32 v0, v0, v2
+; GFX9-NEXT: v_mul_f32_e32 v1, v1, v3
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1030-LABEL: fmul_select_v2f32_test4:
+; GFX1030: ; %bb.0:
+; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v4
+; GFX1030-NEXT: v_cndmask_b32_e64 v2, 1.0, 0.5, vcc_lo
+; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v3, v5
+; GFX1030-NEXT: v_mul_f32_e32 v0, v0, v2
+; GFX1030-NEXT: v_cndmask_b32_e64 v3, 1.0, 0.5, vcc_lo
+; GFX1030-NEXT: v_mul_f32_e32 v1, v1, v3
+; GFX1030-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1100-LABEL: fmul_select_v2f32_test4:
+; GFX1100: ; %bb.0:
+; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v4
+; GFX1100-NEXT: v_cndmask_b32_e64 v2, 1.0, 0.5, vcc_lo
+; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v3, v5
+; GFX1100-NEXT: v_cndmask_b32_e64 v3, 1.0, 0.5, vcc_lo
+; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1100-NEXT: v_dual_mul_f32 v0, v0, v2 :: v_dual_mul_f32 v1, v1, v3
+; GFX1100-NEXT: s_setpc_b64 s[30:31]
+ %bool = icmp eq <2 x i32> %bool.arg1, %bool.arg2
+ %y = select <2 x i1> %bool, <2 x float> <float 5.000000e-01, float 5.000000e-01>, <2 x float> <float 1.000000e+00, float 1.000000e+00>
+ %ldexp = fmul <2 x float> %x, %y
+ ret <2 x float> %ldexp
+}
+
+define float @fmul_select_f32_test5(float %x, i32 %bool.arg1, i32 %bool.arg2) {
+; GFX9-LABEL: fmul_select_f32_test5:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
+; GFX9-NEXT: v_cndmask_b32_e64 v1, -1.0, -2.0, vcc
+; GFX9-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1030-LABEL: fmul_select_f32_test5:
+; GFX1030: ; %bb.0:
+; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
+; GFX1030-NEXT: v_cndmask_b32_e64 v1, -1.0, -2.0, vcc_lo
+; GFX1030-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX1030-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1100-LABEL: fmul_select_f32_test5:
+; GFX1100: ; %bb.0:
+; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
+; GFX1100-NEXT: v_cndmask_b32_e64 v1, -1.0, -2.0, vcc_lo
+; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1100-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX1100-NEXT: s_setpc_b64 s[30:31]
+ %bool = icmp eq i32 %bool.arg1, %bool.arg2
+ %y = select i1 %bool, float -2.000000e+00, float -1.000000e+00
+ %ldexp = fmul float %x, %y
+ ret float %ldexp
+}
+
+define float @fmul_select_f32_test6(float %x, i32 %bool.arg1, i32 %bool.arg2) {
+; GFX9-LABEL: fmul_select_f32_test6:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_mov_b32_e32 v3, 0x41000000
+; GFX9-NEXT: v_mov_b32_e32 v4, 0xc0400000
+; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
+; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc
+; GFX9-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1030-LABEL: fmul_select_f32_test6:
+; GFX1030: ; %bb.0:
+; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1030-NEXT: v_mov_b32_e32 v3, 0xc0400000
+; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
+; GFX1030-NEXT: v_cndmask_b32_e32 v1, 0x41000000, v3, vcc_lo
+; GFX1030-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX1030-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1100-LABEL: fmul_select_f32_test6:
+; GFX1100: ; %bb.0:
+; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1100-NEXT: v_mov_b32_e32 v3, 0xc0400000
+; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
+; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-NEXT: v_cndmask_b32_e32 v1, 0x41000000, v3, vcc_lo
+; GFX1100-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX1100-NEXT: s_setpc_b64 s[30:31]
+ %bool = icmp eq i32 %bool.arg1, %bool.arg2
+ %y = select i1 %bool, float -3.000000e+00, float 8.000000e+00
+ %ldexp = fmul float %x, %y
+ ret float %ldexp
+}
+
+define float @fmul_select_f32_test7(float %x, i32 %bool.arg1, i32 %bool.arg2) {
+; GFX9-LABEL: fmul_select_f32_test7:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_mov_b32_e32 v3, 0x41000000
+; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
+; GFX9-NEXT: v_cndmask_b32_e64 v1, v3, 4.0, vcc
+; GFX9-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1030-LABEL: fmul_select_f32_test7:
+; GFX1030: ; %bb.0:
+; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
+; GFX1030-NEXT: v_cndmask_b32_e64 v1, 0x41000000, 4.0, vcc_lo
+; GFX1030-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX1030-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1100-LABEL: fmul_select_f32_test7:
+; GFX1100: ; %bb.0:
+; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
+; GFX1100-NEXT: v_cndmask_b32_e64 v1, 0x41000000, 4.0, vcc_lo
+; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1100-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX1100-NEXT: s_setpc_b64 s[30:31]
+ %bool = icmp eq i32 %bool.arg1, %bool.arg2
+ %y = select i1 %bool, float 4.000000e+00, float 8.000000e+00
+ %ldexp = fmul float %x, %y
+ ret float %ldexp
+}
+
+define float @fmul_select_f32_test8(float %x, i32 %bool.arg1, i32 %bool.arg2) {
+; GFX9-LABEL: fmul_select_f32_test8:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_mov_b32_e32 v3, 0xc1000000
+; GFX9-NEXT: v_mov_b32_e32 v4, 0x41800000
+; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
+; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc
+; GFX9-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1030-LABEL: fmul_select_f32_test8:
+; GFX1030: ; %bb.0:
+; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1030-NEXT: v_mov_b32_e32 v3, 0x41800000
+; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
+; GFX1030-NEXT: v_cndmask_b32_e32 v1, 0xc1000000, v3, vcc_lo
+; GFX1030-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX1030-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1100-LABEL: fmul_select_f32_test8:
+; GFX1100: ; %bb.0:
+; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1100-NEXT: v_mov_b32_e32 v3, 0x41800000
+; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
+; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-NEXT: v_cndmask_b32_e32 v1, 0xc1000000, v3, vcc_lo
+; GFX1100-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX1100-NEXT: s_setpc_b64 s[30:31]
+ %bool = icmp eq i32 %bool.arg1, %bool.arg2
+ %y = select i1 %bool, float 1.600000e+01, float -8.000000e+00
+ %ldexp = fmul float %x, %y
+ ret float %ldexp
+}
+
+define float @fmul_select_f32_test9(float %x, i32 %bool.arg1, i32 %bool.arg2) {
+; GFX9-LABEL: fmul_select_f32_test9:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
+; GFX9-NEXT: v_cndmask_b32_e64 v1, 2.0, 0, vcc
+; GFX9-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1030-LABEL: fmul_select_f32_test9:
+; GFX1030: ; %bb.0:
+; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
+; GFX1030-NEXT: v_cndmask_b32_e64 v1, 2.0, 0, vcc_lo
+; GFX1030-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX1030-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1100-LABEL: fmul_select_f32_test9:
+; GFX1100: ; %bb.0:
+; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
+; GFX1100-NEXT: v_cndmask_b32_e64 v1, 2.0, 0, vcc_lo
+; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1100-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX1100-NEXT: s_setpc_b64 s[30:31]
+ %bool = icmp eq i32 %bool.arg1, %bool.arg2
+ %y = select i1 %bool, float 0.000000e+00, float 2.000000e+00
+ %ldexp = fmul float %x, %y
+ ret float %ldexp
+}
+
+define float @fmul_select_f32_test10(float %x, i32 %bool.arg1, i32 %bool.arg2) {
+; GFX9-LABEL: fmul_select_f32_test10:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_bfrev_b32_e32 v3, 1
+; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
+; GFX9-NEXT: v_cndmask_b32_e32 v1, 0, v3, vcc
+; GFX9-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1030-LABEL: fmul_select_f32_test10:
+; GFX1030: ; %bb.0:
+; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
+; GFX1030-NEXT: v_cndmask_b32_e64 v1, 0, 0x80000000, vcc_lo
+; GFX1030-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX1030-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1100-LABEL: fmul_select_f32_test10:
+; GFX1100: ; %bb.0:
+; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
+; GFX1100-NEXT: v_cndmask_b32_e64 v1, 0, 0x80000000, vcc_lo
+; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1100-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX1100-NEXT: s_setpc_b64 s[30:31]
+ %bool = icmp eq i32 %bool.arg1, %bool.arg2
+ %y = select i1 %bool, float -0.000000e+00, float 0.000000e+00
+ %ldexp = fmul float %x, %y
+ ret float %ldexp
+}
+
+
+
+define double @fmul_select_f64_test1(double %x, i32 %bool.arg1, i32 %bool.arg2) {
+; GFX9-LABEL: fmul_select_f64_test1:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_mov_b32_e32 v4, 0x3ff00000
+; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3
+; GFX9-NEXT: v_cndmask_b32_e64 v3, v4, 2.0, vcc
+; GFX9-NEXT: v_mov_b32_e32 v2, 0
+; GFX9-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3]
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1030-LABEL: fmul_select_f64_test1:
+; GFX1030: ; %bb.0:
+; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v3
+; GFX1030-NEXT: v_mov_b32_e32 v4, 0
+; GFX1030-NEXT: v_cndmask_b32_e64 v5, 0x3ff00000, 2.0, vcc_lo
+; GFX1030-NEXT: v_mul_f64 v[0:1], v[0:1], v[4:5]
+; GFX1030-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1100-LABEL: fmul_select_f64_test1:
+; GFX1100: ; %bb.0:
+; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v3
+; GFX1100-NEXT: v_mov_b32_e32 v4, 0
+; GFX1100-NEXT: v_cndmask_b32_e64 v5, 0x3ff00000, 2.0, vcc_lo
+; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1100-NEXT: v_mul_f64 v[0:1], v[0:1], v[4:5]
+; GFX1100-NEXT: s_setpc_b64 s[30:31]
+ %bool = icmp eq i32 %bool.arg1, %bool.arg2
+ %y = select i1 %bool, double 2.000000e+00, double 1.000000e+00
+ %ldexp = fmul double %x, %y
+ ret double %ldexp
+}
+
+define double @fmul_select_f64_test2(double %x, i32 %bool.arg1, i32 %bool.arg2) {
+; GFX9-LABEL: fmul_select_f64_test2:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_mov_b32_e32 v4, 0x3ff00000
+; GFX9-NEXT: v_mov_b32_e32 v5, 0x3fe00000
+; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3
+; GFX9-NEXT: v_cndmask_b32_e32 v3, v4, v5, vcc
+; GFX9-NEXT: v_mov_b32_e32 v2, 0
+; GFX9-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3]
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1030-LABEL: fmul_select_f64_test2:
+; GFX1030: ; %bb.0:
+; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1030-NEXT: v_mov_b32_e32 v5, 0x3fe00000
+; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v3
+; GFX1030-NEXT: v_mov_b32_e32 v4, 0
+; GFX1030-NEXT: v_cndmask_b32_e32 v5, 0x3ff00000, v5, vcc_lo
+; GFX1030-NEXT: v_mul_f64 v[0:1], v[0:1], v[4:5]
+; GFX1030-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1100-LABEL: fmul_select_f64_test2:
+; GFX1100: ; %bb.0:
+; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1100-NEXT: v_dual_mov_b32 v5, 0x3fe00000 :: v_dual_mov_b32 v4, 0
+; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v3
+; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-NEXT: v_cndmask_b32_e32 v5, 0x3ff00000, v5, vcc_lo
+; GFX1100-NEXT: v_mul_f64 v[0:1], v[0:1], v[4:5]
+; GFX1100-NEXT: s_setpc_b64 s[30:31]
+ %bool = icmp eq i32 %bool.arg1, %bool.arg2
+ %y = select i1 %bool, double 5.000000e-01, double 1.000000e+00
+ %ldexp = fmul double %x, %y
+ ret double %ldexp
+}
+
+define <2 x double> @fmul_select_v2f64_test3(<2 x double> %x, <2 x i32> %bool.arg1, <2 x i32> %bool.arg2) {
+; GFX9-LABEL: fmul_select_v2f64_test3:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_mov_b32_e32 v11, 0x3ff00000
+; GFX9-NEXT: v_mov_b32_e32 v8, 0
+; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v4, v6
+; GFX9-NEXT: v_cndmask_b32_e64 v10, v11, 2.0, vcc
+; GFX9-NEXT: v_mov_b32_e32 v9, v8
+; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v5, v7
+; GFX9-NEXT: v_mul_f64 v[0:1], v[0:1], v[9:10]
+; GFX9-NEXT: v_cndmask_b32_e64 v9, v11, 2.0, vcc
+; GFX9-NEXT: v_mul_f64 v[2:3], v[2:3], v[8:9]
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1030-LABEL: fmul_select_v2f64_test3:
+; GFX1030: ; %bb.0:
+; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v4, v6
+; GFX1030-NEXT: v_mov_b32_e32 v8, 0
+; GFX1030-NEXT: v_cndmask_b32_e64 v11, 0x3ff00000, 2.0, vcc_lo
+; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v5, v7
+; GFX1030-NEXT: v_mov_b32_e32 v10, v8
+; GFX1030-NEXT: v_cndmask_b32_e64 v9, 0x3ff00000, 2.0, vcc_lo
+; GFX1030-NEXT: v_mul_f64 v[0:1], v[0:1], v[10:11]
+; GFX1030-NEXT: v_mul_f64 v[2:3], v[2:3], v[8:9]
+; GFX1030-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1100-LABEL: fmul_select_v2f64_test3:
+; GFX1100: ; %bb.0:
+; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v4, v6
+; GFX1100-NEXT: v_mov_b32_e32 v8, 0
+; GFX1100-NEXT: v_cndmask_b32_e64 v11, 0x3ff00000, 2.0, vcc_lo
+; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v5, v7
+; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX1100-NEXT: v_mov_b32_e32 v10, v8
+; GFX1100-NEXT: v_cndmask_b32_e64 v9, 0x3ff00000, 2.0, vcc_lo
+; GFX1100-NEXT: v_mul_f64 v[0:1], v[0:1], v[10:11]
+; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2)
+; GFX1100-NEXT: v_mul_f64 v[2:3], v[2:3], v[8:9]
+; GFX1100-NEXT: s_setpc_b64 s[30:31]
+ %bool = icmp eq <2 x i32> %bool.arg1, %bool.arg2
+ %y = select <2 x i1> %bool, <2 x double> <double 2.000000e+00, double 2.000000e+00>, <2 x double> <double 1.000000e+00, double 1.000000e+00>
+ %ldexp = fmul <2 x double> %x, %y
+ ret <2 x double> %ldexp
+}
+
+define <2 x double> @fmul_select_v2f64_test4(<2 x double> %x, <2 x i32> %bool.arg1, <2 x i32> %bool.arg2) {
+; GFX9-LABEL: fmul_select_v2f64_test4:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_mov_b32_e32 v11, 0x3ff00000
+; GFX9-NEXT: v_mov_b32_e32 v12, 0x3fe00000
+; GFX9-NEXT: v_mov_b32_e32 v8, 0
+; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v4, v6
+; GFX9-NEXT: v_cndmask_b32_e32 v10, v11, v12, vcc
+; GFX9-NEXT: v_mov_b32_e32 v9, v8
+; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v5, v7
+; GFX9-NEXT: v_mul_f64 v[0:1], v[0:1], v[9:10]
+; GFX9-NEXT: v_cndmask_b32_e32 v9, v11, v12, vcc
+; GFX9-NEXT: v_mul_f64 v[2:3], v[2:3], v[8:9]
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1030-LABEL: fmul_select_v2f64_test4:
+; GFX1030: ; %bb.0:
+; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1030-NEXT: v_mov_b32_e32 v9, 0x3fe00000
+; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v4, v6
+; GFX1030-NEXT: v_mov_b32_e32 v8, 0
+; GFX1030-NEXT: v_cndmask_b32_e32 v11, 0x3ff00000, v9, vcc_lo
+; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v5, v7
+; GFX1030-NEXT: v_mov_b32_e32 v10, v8
+; GFX1030-NEXT: v_cndmask_b32_e32 v9, 0x3ff00000, v9, vcc_lo
+; GFX1030-NEXT: v_mul_f64 v[0:1], v[0:1], v[10:11]
+; GFX1030-NEXT: v_mul_f64 v[2:3], v[2:3], v[8:9]
+; GFX1030-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1100-LABEL: fmul_select_v2f64_test4:
+; GFX1100: ; %bb.0:
+; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1100-NEXT: v_dual_mov_b32 v9, 0x3fe00000 :: v_dual_mov_b32 v8, 0
+; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v4, v6
+; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX1100-NEXT: v_dual_mov_b32 v10, v8 :: v_dual_cndmask_b32 v11, 0x3ff00000, v9
+; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v5, v7
+; GFX1100-NEXT: v_mul_f64 v[0:1], v[0:1], v[10:11]
+; GFX1100-NEXT: v_cndmask_b32_e32 v9, 0x3ff00000, v9, vcc_lo
+; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1100-NEXT: v_mul_f64 v[2:3], v[2:3], v[8:9]
+; GFX1100-NEXT: s_setpc_b64 s[30:31]
+ %bool = icmp eq <2 x i32> %bool.arg1, %bool.arg2
+ %y = select <2 x i1> %bool, <2 x double> <double 5.000000e-01, double 5.000000e-01>, <2 x double> <double 1.000000e+00, double 1.000000e+00>
+ %ldexp = fmul <2 x double> %x, %y
+ ret <2 x double> %ldexp
+}
+
+define double @fmul_select_f64_test5(double %x, i32 %bool.arg1, i32 %bool.arg2) {
+; GFX9-LABEL: fmul_select_f64_test5:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_mov_b32_e32 v4, 0xbff00000
+; GFX9-NEXT: v_mov_b32_e32 v5, 0xbfe00000
+; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3
+; GFX9-NEXT: v_cndmask_b32_e32 v3, v4, v5, vcc
+; GFX9-NEXT: v_mov_b32_e32 v2, 0
+; GFX9-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3]
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1030-LABEL: fmul_select_f64_test5:
+; GFX1030: ; %bb.0:
+; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1030-NEXT: v_mov_b32_e32 v5, 0xbfe00000
+; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v3
+; GFX1030-NEXT: v_mov_b32_e32 v4, 0
+; GFX1030-NEXT: v_cndmask_b32_e32 v5, 0xbff00000, v5, vcc_lo
+; GFX1030-NEXT: v_mul_f64 v[0:1], v[0:1], v[4:5]
+; GFX1030-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1100-LABEL: fmul_select_f64_test5:
+; GFX1100: ; %bb.0:
+; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1100-NEXT: v_dual_mov_b32 v5, 0xbfe00000 :: v_dual_mov_b32 v4, 0
+; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v3
+; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-NEXT: v_cndmask_b32_e32 v5, 0xbff00000, v5, vcc_lo
+; GFX1100-NEXT: v_mul_f64 v[0:1], v[0:1], v[4:5]
+; GFX1100-NEXT: s_setpc_b64 s[30:31]
+ %bool = icmp eq i32 %bool.arg1, %bool.arg2
+ %y = select i1 %bool, double -5.000000e-01, double -1.000000e+00
+ %ldexp = fmul double %x, %y
+ ret double %ldexp
+}
+
+define double @fmul_select_f64_test6(double %x, i32 %bool.arg1, i32 %bool.arg2) {
+; GFX9-LABEL: fmul_select_f64_test6:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_mov_b32_e32 v4, 0xbff00000
+; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3
+; GFX9-NEXT: v_cndmask_b32_e64 v3, v4, -2.0, vcc
+; GFX9-NEXT: v_mov_b32_e32 v2, 0
+; GFX9-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3]
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1030-LABEL: fmul_select_f64_test6:
+; GFX1030: ; %bb.0:
+; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v3
+; GFX1030-NEXT: v_mov_b32_e32 v4, 0
+; GFX1030-NEXT: v_cndmask_b32_e64 v5, 0xbff00000, -2.0, vcc_lo
+; GFX1030-NEXT: v_mul_f64 v[0:1], v[0:1], v[4:5]
+; GFX1030-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1100-LABEL: fmul_select_f64_test6:
+; GFX1100: ; %bb.0:
+; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v3
+; GFX1100-NEXT: v_mov_b32_e32 v4, 0
+; GFX1100-NEXT: v_cndmask_b32_e64 v5, 0xbff00000, -2.0, vcc_lo
+; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1100-NEXT: v_mul_f64 v[0:1], v[0:1], v[4:5]
+; GFX1100-NEXT: s_setpc_b64 s[30:31]
+ %bool = icmp eq i32 %bool.arg1, %bool.arg2
+ %y = select i1 %bool, double -2.000000e+00, double -1.000000e+00
+ %ldexp = fmul double %x, %y
+ ret double %ldexp
+}
+
+define double @fmul_select_f64_test7(double %x, i32 %bool.arg1, i32 %bool.arg2) {
+; GFX9-LABEL: fmul_select_f64_test7:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_mov_b32_e32 v4, 0xbff00000
+; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3
+; GFX9-NEXT: v_cndmask_b32_e64 v3, v4, 2.0, vcc
+; GFX9-NEXT: v_mov_b32_e32 v2, 0
+; GFX9-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3]
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1030-LABEL: fmul_select_f64_test7:
+; GFX1030: ; %bb.0:
+; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v3
+; GFX1030-NEXT: v_mov_b32_e32 v4, 0
+; GFX1030-NEXT: v_cndmask_b32_e64 v5, 0xbff00000, 2.0, vcc_lo
+; GFX1030-NEXT: v_mul_f64 v[0:1], v[0:1], v[4:5]
+; GFX1030-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1100-LABEL: fmul_select_f64_test7:
+; GFX1100: ; %bb.0:
+; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v3
+; GFX1100-NEXT: v_mov_b32_e32 v4, 0
+; GFX1100-NEXT: v_cndmask_b32_e64 v5, 0xbff00000, 2.0, vcc_lo
+; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1100-NEXT: v_mul_f64 v[0:1], v[0:1], v[4:5]
+; GFX1100-NEXT: s_setpc_b64 s[30:31]
+ %bool = icmp eq i32 %bool.arg1, %bool.arg2
+ %y = select i1 %bool, double 2.000000e+00, double -1.000000e+00
+ %ldexp = fmul double %x, %y
+ ret double %ldexp
+}
+
+define double @fmul_select_f64_test8(double %x, i32 %bool.arg1, i32 %bool.arg2) {
+; GFX9-LABEL: fmul_select_f64_test8:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_mov_b32_e32 v4, 0xc0400000
+; GFX9-NEXT: v_mov_b32_e32 v5, 0xc0100000
+; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3
+; GFX9-NEXT: v_cndmask_b32_e32 v3, v4, v5, vcc
+; GFX9-NEXT: v_mov_b32_e32 v2, 0
+; GFX9-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3]
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1030-LABEL: fmul_select_f64_test8:
+; GFX1030: ; %bb.0:
+; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1030-NEXT: v_mov_b32_e32 v5, 0xc0100000
+; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v3
+; GFX1030-NEXT: v_mov_b32_e32 v4, 0
+; GFX1030-NEXT: v_cndmask_b32_e32 v5, 0xc0400000, v5, vcc_lo
+; GFX1030-NEXT: v_mul_f64 v[0:1], v[0:1], v[4:5]
+; GFX1030-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1100-LABEL: fmul_select_f64_test8:
+; GFX1100: ; %bb.0:
+; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1100-NEXT: v_dual_mov_b32 v5, 0xc0100000 :: v_dual_mov_b32 v4, 0
+; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v3
+; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-NEXT: v_cndmask_b32_e32 v5, 0xc0400000, v5, vcc_lo
+; GFX1100-NEXT: v_mul_f64 v[0:1], v[0:1], v[4:5]
+; GFX1100-NEXT: s_setpc_b64 s[30:31]
+ %bool = icmp eq i32 %bool.arg1, %bool.arg2
+ %y = select i1 %bool, double -4.000000e+00, double -3.200000e+01
+ %ldexp = fmul double %x, %y
+ ret double %ldexp
+}
+
+define <2 x double> @fmul_select_v2f64_test9(<2 x double> %x, <2 x i32> %bool.arg1, <2 x i32> %bool.arg2) {
+; GFX9-LABEL: fmul_select_v2f64_test9:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_mov_b32_e32 v11, 0xbff00000
+; GFX9-NEXT: v_mov_b32_e32 v8, 0
+; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v4, v6
+; GFX9-NEXT: v_cndmask_b32_e64 v10, v11, -2.0, vcc
+; GFX9-NEXT: v_mov_b32_e32 v9, v8
+; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v5, v7
+; GFX9-NEXT: v_mul_f64 v[0:1], v[0:1], v[9:10]
+; GFX9-NEXT: v_cndmask_b32_e64 v9, v11, -2.0, vcc
+; GFX9-NEXT: v_mul_f64 v[2:3], v[2:3], v[8:9]
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1030-LABEL: fmul_select_v2f64_test9:
+; GFX1030: ; %bb.0:
+; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v4, v6
+; GFX1030-NEXT: v_mov_b32_e32 v8, 0
+; GFX1030-NEXT: v_cndmask_b32_e64 v11, 0xbff00000, -2.0, vcc_lo
+; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v5, v7
+; GFX1030-NEXT: v_mov_b32_e32 v10, v8
+; GFX1030-NEXT: v_cndmask_b32_e64 v9, 0xbff00000, -2.0, vcc_lo
+; GFX1030-NEXT: v_mul_f64 v[0:1], v[0:1], v[10:11]
+; GFX1030-NEXT: v_mul_f64 v[2:3], v[2:3], v[8:9]
+; GFX1030-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1100-LABEL: fmul_select_v2f64_test9:
+; GFX1100: ; %bb.0:
+; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v4, v6
+; GFX1100-NEXT: v_mov_b32_e32 v8, 0
+; GFX1100-NEXT: v_cndmask_b32_e64 v11, 0xbff00000, -2.0, vcc_lo
+; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v5, v7
+; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX1100-NEXT: v_mov_b32_e32 v10, v8
+; GFX1100-NEXT: v_cndmask_b32_e64 v9, 0xbff00000, -2.0, vcc_lo
+; GFX1100-NEXT: v_mul_f64 v[0:1], v[0:1], v[10:11]
+; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2)
+; GFX1100-NEXT: v_mul_f64 v[2:3], v[2:3], v[8:9]
+; GFX1100-NEXT: s_setpc_b64 s[30:31]
+ %bool = icmp eq <2 x i32> %bool.arg1, %bool.arg2
+ %y = select <2 x i1> %bool, <2 x double> <double -2.000000e+00, double -2.000000e+00>, <2 x double> <double -1.000000e+00, double -1.000000e+00>
+ %ldexp = fmul <2 x double> %x, %y
+ ret <2 x double> %ldexp
+}
+
+define <2 x double> @fmul_select_v2f64_test10(<2 x double> %x, <2 x i32> %bool.arg1, <2 x i32> %bool.arg2) {
+; GFX9-LABEL: fmul_select_v2f64_test10:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_mov_b32_e32 v8, 0
+; GFX9-NEXT: v_mov_b32_e32 v9, 0xbff00000
+; GFX9-NEXT: v_mov_b32_e32 v10, 0x3fe00000
+; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v4, v6
+; GFX9-NEXT: v_mov_b32_e32 v11, 0x3ff00000
+; GFX9-NEXT: v_cndmask_b32_e32 v10, v9, v10, vcc
+; GFX9-NEXT: v_mov_b32_e32 v9, v8
+; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v5, v7
+; GFX9-NEXT: v_mul_f64 v[0:1], v[0:1], v[9:10]
+; GFX9-NEXT: v_cndmask_b32_e64 v9, v11, 2.0, vcc
+; GFX9-NEXT: v_mul_f64 v[2:3], v[2:3], v[8:9]
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1030-LABEL: fmul_select_v2f64_test10:
+; GFX1030: ; %bb.0:
+; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1030-NEXT: v_mov_b32_e32 v9, 0x3fe00000
+; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v4, v6
+; GFX1030-NEXT: v_mov_b32_e32 v8, 0
+; GFX1030-NEXT: v_cndmask_b32_e32 v11, 0xbff00000, v9, vcc_lo
+; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v5, v7
+; GFX1030-NEXT: v_mov_b32_e32 v10, v8
+; GFX1030-NEXT: v_cndmask_b32_e64 v9, 0x3ff00000, 2.0, vcc_lo
+; GFX1030-NEXT: v_mul_f64 v[0:1], v[0:1], v[10:11]
+; GFX1030-NEXT: v_mul_f64 v[2:3], v[2:3], v[8:9]
+; GFX1030-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1100-LABEL: fmul_select_v2f64_test10:
+; GFX1100: ; %bb.0:
+; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1100-NEXT: v_dual_mov_b32 v9, 0x3fe00000 :: v_dual_mov_b32 v8, 0
+; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v4, v6
+; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX1100-NEXT: v_dual_mov_b32 v10, v8 :: v_dual_cndmask_b32 v11, 0xbff00000, v9
+; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v5, v7
+; GFX1100-NEXT: v_mul_f64 v[0:1], v[0:1], v[10:11]
+; GFX1100-NEXT: v_cndmask_b32_e64 v9, 0x3ff00000, 2.0, vcc_lo
+; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1100-NEXT: v_mul_f64 v[2:3], v[2:3], v[8:9]
+; GFX1100-NEXT: s_setpc_b64 s[30:31]
+ %bool = icmp eq <2 x i32> %bool.arg1, %bool.arg2
+ %y = select <2 x i1> %bool, <2 x double> <double 5.000000e-01, double 2.000000e+00>, <2 x double> <double -1.000000e+00, double 1.000000e+00>
+ %ldexp = fmul <2 x double> %x, %y
+ ret <2 x double> %ldexp
+}
+
+define double @fmul_select_f64_test11(double %x, i32 %bool.arg1, i32 %bool.arg2) {
+; GFX9-LABEL: fmul_select_f64_test11:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_bfrev_b32_e32 v4, 1
+; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3
+; GFX9-NEXT: v_cndmask_b32_e64 v3, v4, -2.0, vcc
+; GFX9-NEXT: v_mov_b32_e32 v2, 0
+; GFX9-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3]
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1030-LABEL: fmul_select_f64_test11:
+; GFX1030: ; %bb.0:
+; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v3
+; GFX1030-NEXT: v_mov_b32_e32 v4, 0
+; GFX1030-NEXT: v_cndmask_b32_e64 v5, 0x80000000, -2.0, vcc_lo
+; GFX1030-NEXT: v_mul_f64 v[0:1], v[0:1], v[4:5]
+; GFX1030-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1100-LABEL: fmul_select_f64_test11:
+; GFX1100: ; %bb.0:
+; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v3
+; GFX1100-NEXT: v_mov_b32_e32 v4, 0
+; GFX1100-NEXT: v_cndmask_b32_e64 v5, 0x80000000, -2.0, vcc_lo
+; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1100-NEXT: v_mul_f64 v[0:1], v[0:1], v[4:5]
+; GFX1100-NEXT: s_setpc_b64 s[30:31]
+ %bool = icmp eq i32 %bool.arg1, %bool.arg2
+ %y = select i1 %bool, double -2.000000e+00, double -0.000000e+00
+ %ldexp = fmul double %x, %y
+ ret double %ldexp
+}
+
+define double @fmul_select_f64_test12(double %x, i32 %bool.arg1, i32 %bool.arg2) {
+; GFX9-LABEL: fmul_select_f64_test12:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, v2, v3
+; GFX9-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
+; GFX9-NEXT: v_lshlrev_b32_e32 v3, 31, v2
+; GFX9-NEXT: v_mov_b32_e32 v2, 0
+; GFX9-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3]
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1030-LABEL: fmul_select_f64_test12:
+; GFX1030: ; %bb.0:
+; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1030-NEXT: v_cmp_ne_u32_e32 vcc_lo, v2, v3
+; GFX1030-NEXT: v_mov_b32_e32 v2, 0
+; GFX1030-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc_lo
+; GFX1030-NEXT: v_lshlrev_b32_e32 v3, 31, v3
+; GFX1030-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3]
+; GFX1030-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1100-LABEL: fmul_select_f64_test12:
+; GFX1100: ; %bb.0:
+; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1100-NEXT: v_cmp_ne_u32_e32 vcc_lo, v2, v3
+; GFX1100-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc_lo
+; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_lshlrev_b32 v3, 31, v3
+; GFX1100-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3]
+; GFX1100-NEXT: s_setpc_b64 s[30:31]
+ %bool = icmp eq i32 %bool.arg1, %bool.arg2
+ %y = select i1 %bool, double 0.000000e+00, double -0.000000e+00
+ %ldexp = fmul double %x, %y
+ ret double %ldexp
+}
+
+define double @fmul_select_f64_test13(double %x, i32 %bool.arg1, i32 %bool.arg2) {
+; GFX9-LABEL: fmul_select_f64_test13:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_mov_b32_e32 v5, 0x40300000
+; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3
+; GFX9-NEXT: v_mov_b32_e32 v4, 0
+; GFX9-NEXT: v_cndmask_b32_e64 v5, v5, 0, vcc
+; GFX9-NEXT: v_mul_f64 v[0:1], v[0:1], v[4:5]
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1030-LABEL: fmul_select_f64_test13:
+; GFX1030: ; %bb.0:
+; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v3
+; GFX1030-NEXT: v_mov_b32_e32 v4, 0
+; GFX1030-NEXT: v_cndmask_b32_e64 v5, 0x40300000, 0, vcc_lo
+; GFX1030-NEXT: v_mul_f64 v[0:1], v[0:1], v[4:5]
+; GFX1030-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1100-LABEL: fmul_select_f64_test13:
+; GFX1100: ; %bb.0:
+; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v3
+; GFX1100-NEXT: v_mov_b32_e32 v4, 0
+; GFX1100-NEXT: v_cndmask_b32_e64 v5, 0x40300000, 0, vcc_lo
+; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1100-NEXT: v_mul_f64 v[0:1], v[0:1], v[4:5]
+; GFX1100-NEXT: s_setpc_b64 s[30:31]
+ %bool = icmp eq i32 %bool.arg1, %bool.arg2
+ %y = select i1 %bool, double 0.000000e+00, double 1.600000e+01
+ %ldexp = fmul double %x, %y
+ ret double %ldexp
+}
+
+define half @fmul_select_f16_test1(half %x, i32 %bool.arg1, i32 %bool.arg2) {
+; GFX9-LABEL: fmul_select_f16_test1:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_mov_b32_e32 v3, 0x3c00
+; GFX9-NEXT: v_mov_b32_e32 v4, 0x4000
+; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
+; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc
+; GFX9-NEXT: v_mul_f16_e32 v0, v0, v1
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1030-LABEL: fmul_select_f16_test1:
+; GFX1030: ; %bb.0:
+; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1030-NEXT: v_mov_b32_e32 v3, 0x4000
+; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
+; GFX1030-NEXT: v_cndmask_b32_e32 v1, 0x3c00, v3, vcc_lo
+; GFX1030-NEXT: v_mul_f16_e32 v0, v0, v1
+; GFX1030-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1100-LABEL: fmul_select_f16_test1:
+; GFX1100: ; %bb.0:
+; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1100-NEXT: v_mov_b32_e32 v3, 0x4000
+; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
+; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-NEXT: v_cndmask_b32_e32 v1, 0x3c00, v3, vcc_lo
+; GFX1100-NEXT: v_mul_f16_e32 v0, v0, v1
+; GFX1100-NEXT: s_setpc_b64 s[30:31]
+ %bool = icmp eq i32 %bool.arg1, %bool.arg2
+ %y = select i1 %bool, half 2.000000e+00, half 1.000000e+00
+ %ldexp = fmul half %x, %y
+ ret half %ldexp
+}
+
+define half @fmul_select_f16_test2(half %x, i32 %bool.arg1, i32 %bool.arg2) {
+; GFX9-LABEL: fmul_select_f16_test2:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_mov_b32_e32 v3, 0x3c00
+; GFX9-NEXT: v_mov_b32_e32 v4, 0x3800
+; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
+; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc
+; GFX9-NEXT: v_mul_f16_e32 v0, v0, v1
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1030-LABEL: fmul_select_f16_test2:
+; GFX1030: ; %bb.0:
+; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1030-NEXT: v_mov_b32_e32 v3, 0x3800
+; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
+; GFX1030-NEXT: v_cndmask_b32_e32 v1, 0x3c00, v3, vcc_lo
+; GFX1030-NEXT: v_mul_f16_e32 v0, v0, v1
+; GFX1030-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1100-LABEL: fmul_select_f16_test2:
+; GFX1100: ; %bb.0:
+; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1100-NEXT: v_mov_b32_e32 v3, 0x3800
+; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
+; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-NEXT: v_cndmask_b32_e32 v1, 0x3c00, v3, vcc_lo
+; GFX1100-NEXT: v_mul_f16_e32 v0, v0, v1
+; GFX1100-NEXT: s_setpc_b64 s[30:31]
+ %bool = icmp eq i32 %bool.arg1, %bool.arg2
+ %y = select i1 %bool, half 5.000000e-01, half 1.000000e+00
+ %ldexp = fmul half %x, %y
+ ret half %ldexp
+}
+
+define <2 x half> @fmul_select_v2f16_test3(<2 x half> %x, <2 x i32> %bool.arg1, <2 x i32> %bool.arg2) {
+; GFX9-LABEL: fmul_select_v2f16_test3:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_mov_b32_e32 v5, 0x3c00
+; GFX9-NEXT: v_mov_b32_e32 v6, 0x4000
+; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v2, v4
+; GFX9-NEXT: v_cndmask_b32_e32 v2, v5, v6, vcc
+; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v1, v3
+; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v6, vcc
+; GFX9-NEXT: v_pack_b32_f16 v1, v1, v2
+; GFX9-NEXT: v_pk_mul_f16 v0, v0, v1
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1030-LABEL: fmul_select_v2f16_test3:
+; GFX1030: ; %bb.0:
+; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1030-NEXT: v_mov_b32_e32 v5, 0x4000
+; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v4
+; GFX1030-NEXT: v_cndmask_b32_e32 v2, 0x3c00, v5, vcc_lo
+; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v3
+; GFX1030-NEXT: v_cndmask_b32_e32 v1, 0x3c00, v5, vcc_lo
+; GFX1030-NEXT: v_pack_b32_f16 v1, v1, v2
+; GFX1030-NEXT: v_pk_mul_f16 v0, v0, v1
+; GFX1030-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1100-LABEL: fmul_select_v2f16_test3:
+; GFX1100: ; %bb.0:
+; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1100-NEXT: v_mov_b32_e32 v5, 0x4000
+; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v4
+; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
+; GFX1100-NEXT: v_cndmask_b32_e32 v2, 0x3c00, v5, vcc_lo
+; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v3
+; GFX1100-NEXT: v_cndmask_b32_e32 v1, 0x3c00, v5, vcc_lo
+; GFX1100-NEXT: v_pack_b32_f16 v1, v1, v2
+; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1100-NEXT: v_pk_mul_f16 v0, v0, v1
+; GFX1100-NEXT: s_setpc_b64 s[30:31]
+ %bool = icmp eq <2 x i32> %bool.arg1, %bool.arg2
+ %y = select <2 x i1> %bool, <2 x half> <half 2.000000e+00, half 2.000000e+00>, <2 x half> <half 1.000000e+00, half 1.000000e+00>
+ %ldexp = fmul <2 x half> %x, %y
+ ret <2 x half> %ldexp
+}
+
+define <2 x half> @fmul_select_v2f16_test4(<2 x half> %x, <2 x i32> %bool.arg1, <2 x i32> %bool.arg2) {
+; GFX9-LABEL: fmul_select_v2f16_test4:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_mov_b32_e32 v5, 0x3c00
+; GFX9-NEXT: v_mov_b32_e32 v6, 0x3800
+; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v2, v4
+; GFX9-NEXT: v_cndmask_b32_e32 v2, v5, v6, vcc
+; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v1, v3
+; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v6, vcc
+; GFX9-NEXT: v_pack_b32_f16 v1, v1, v2
+; GFX9-NEXT: v_pk_mul_f16 v0, v0, v1
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1030-LABEL: fmul_select_v2f16_test4:
+; GFX1030: ; %bb.0:
+; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1030-NEXT: v_mov_b32_e32 v5, 0x3800
+; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v4
+; GFX1030-NEXT: v_cndmask_b32_e32 v2, 0x3c00, v5, vcc_lo
+; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v3
+; GFX1030-NEXT: v_cndmask_b32_e32 v1, 0x3c00, v5, vcc_lo
+; GFX1030-NEXT: v_pack_b32_f16 v1, v1, v2
+; GFX1030-NEXT: v_pk_mul_f16 v0, v0, v1
+; GFX1030-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1100-LABEL: fmul_select_v2f16_test4:
+; GFX1100: ; %bb.0:
+; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1100-NEXT: v_mov_b32_e32 v5, 0x3800
+; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v4
+; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
+; GFX1100-NEXT: v_cndmask_b32_e32 v2, 0x3c00, v5, vcc_lo
+; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v3
+; GFX1100-NEXT: v_cndmask_b32_e32 v1, 0x3c00, v5, vcc_lo
+; GFX1100-NEXT: v_pack_b32_f16 v1, v1, v2
+; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1100-NEXT: v_pk_mul_f16 v0, v0, v1
+; GFX1100-NEXT: s_setpc_b64 s[30:31]
+ %bool = icmp eq <2 x i32> %bool.arg1, %bool.arg2
+ %y = select <2 x i1> %bool, <2 x half> <half 5.000000e-01, half 5.000000e-01>, <2 x half> <half 1.000000e+00, half 1.000000e+00>
+ %ldexp = fmul <2 x half> %x, %y
+ ret <2 x half> %ldexp
+}
+
+define half @fmul_select_f16_test5(half %x, i32 %bool.arg1, i32 %bool.arg2) {
+; GFX9-LABEL: fmul_select_f16_test5:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_mov_b32_e32 v3, 0x4800
+; GFX9-NEXT: v_mov_b32_e32 v4, 0x4000
+; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
+; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc
+; GFX9-NEXT: v_mul_f16_e32 v0, v0, v1
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1030-LABEL: fmul_select_f16_test5:
+; GFX1030: ; %bb.0:
+; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1030-NEXT: v_mov_b32_e32 v3, 0x4000
+; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
+; GFX1030-NEXT: v_cndmask_b32_e32 v1, 0x4800, v3, vcc_lo
+; GFX1030-NEXT: v_mul_f16_e32 v0, v0, v1
+; GFX1030-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1100-LABEL: fmul_select_f16_test5:
+; GFX1100: ; %bb.0:
+; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1100-NEXT: v_mov_b32_e32 v3, 0x4000
+; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
+; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-NEXT: v_cndmask_b32_e32 v1, 0x4800, v3, vcc_lo
+; GFX1100-NEXT: v_mul_f16_e32 v0, v0, v1
+; GFX1100-NEXT: s_setpc_b64 s[30:31]
+ %bool = icmp eq i32 %bool.arg1, %bool.arg2
+ %y = select i1 %bool, half 2.000000e+00, half 8.000000e+00
+ %ldexp = fmul half %x, %y
+ ret half %ldexp
+}
+
+define half @fmul_select_f16_test6(half %x, i32 %bool.arg1, i32 %bool.arg2) {
+; GFX9-LABEL: fmul_select_f16_test6:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_mov_b32_e32 v3, 0x4200
+; GFX9-NEXT: v_mov_b32_e32 v4, 0xc800
+; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
+; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc
+; GFX9-NEXT: v_mul_f16_e32 v0, v0, v1
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1030-LABEL: fmul_select_f16_test6:
+; GFX1030: ; %bb.0:
+; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1030-NEXT: v_mov_b32_e32 v3, 0xc800
+; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
+; GFX1030-NEXT: v_cndmask_b32_e32 v1, 0x4200, v3, vcc_lo
+; GFX1030-NEXT: v_mul_f16_e32 v0, v0, v1
+; GFX1030-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1100-LABEL: fmul_select_f16_test6:
+; GFX1100: ; %bb.0:
+; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1100-NEXT: v_mov_b32_e32 v3, 0xc800
+; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
+; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-NEXT: v_cndmask_b32_e32 v1, 0x4200, v3, vcc_lo
+; GFX1100-NEXT: v_mul_f16_e32 v0, v0, v1
+; GFX1100-NEXT: s_setpc_b64 s[30:31]
+ %bool = icmp eq i32 %bool.arg1, %bool.arg2
+ %y = select i1 %bool, half -8.000000e+00, half 3.000000e+00
+ %ldexp = fmul half %x, %y
+ ret half %ldexp
+}
+
+define half @fmul_select_f16_test7(half %x, i32 %bool.arg1, i32 %bool.arg2) {
+; GFX9-LABEL: fmul_select_f16_test7:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_mov_b32_e32 v3, 0xc400
+; GFX9-NEXT: v_mov_b32_e32 v4, 0x4800
+; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
+; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc
+; GFX9-NEXT: v_mul_f16_e32 v0, v0, v1
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1030-LABEL: fmul_select_f16_test7:
+; GFX1030: ; %bb.0:
+; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1030-NEXT: v_mov_b32_e32 v3, 0x4800
+; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
+; GFX1030-NEXT: v_cndmask_b32_e32 v1, 0xc400, v3, vcc_lo
+; GFX1030-NEXT: v_mul_f16_e32 v0, v0, v1
+; GFX1030-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1100-LABEL: fmul_select_f16_test7:
+; GFX1100: ; %bb.0:
+; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1100-NEXT: v_mov_b32_e32 v3, 0x4800
+; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
+; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-NEXT: v_cndmask_b32_e32 v1, 0xc400, v3, vcc_lo
+; GFX1100-NEXT: v_mul_f16_e32 v0, v0, v1
+; GFX1100-NEXT: s_setpc_b64 s[30:31]
+ %bool = icmp eq i32 %bool.arg1, %bool.arg2
+ %y = select i1 %bool, half 8.000000e+00, half -4.000000e+00
+ %ldexp = fmul half %x, %y
+ ret half %ldexp
+}
+
+define half @fmul_select_f16_test8(half %x, i32 %bool.arg1, i32 %bool.arg2) {
+; GFX9-LABEL: fmul_select_f16_test8:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_mov_b32_e32 v3, 0x8000
+; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
+; GFX9-NEXT: v_cndmask_b32_e32 v1, 0, v3, vcc
+; GFX9-NEXT: v_mul_f16_e32 v0, v0, v1
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1030-LABEL: fmul_select_f16_test8:
+; GFX1030: ; %bb.0:
+; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
+; GFX1030-NEXT: v_cndmask_b32_e64 v1, 0, 0x8000, vcc_lo
+; GFX1030-NEXT: v_mul_f16_e32 v0, v0, v1
+; GFX1030-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1100-LABEL: fmul_select_f16_test8:
+; GFX1100: ; %bb.0:
+; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
+; GFX1100-NEXT: v_cndmask_b32_e64 v1, 0, 0x8000, vcc_lo
+; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1100-NEXT: v_mul_f16_e32 v0, v0, v1
+; GFX1100-NEXT: s_setpc_b64 s[30:31]
+ %bool = icmp eq i32 %bool.arg1, %bool.arg2
+ %y = select i1 %bool, half -0.000000e+00, half 0.000000e+00
+ %ldexp = fmul half %x, %y
+ ret half %ldexp
+}
+
+define half @fmul_select_f16_test9(half %x, i32 %bool.arg1, i32 %bool.arg2) {
+; GFX9-LABEL: fmul_select_f16_test9:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_mov_b32_e32 v3, 0xd000
+; GFX9-NEXT: v_mov_b32_e32 v4, 0xcc00
+; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
+; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc
+; GFX9-NEXT: v_mul_f16_e32 v0, v0, v1
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1030-LABEL: fmul_select_f16_test9:
+; GFX1030: ; %bb.0:
+; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1030-NEXT: v_mov_b32_e32 v3, 0xcc00
+; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
+; GFX1030-NEXT: v_cndmask_b32_e32 v1, 0xd000, v3, vcc_lo
+; GFX1030-NEXT: v_mul_f16_e32 v0, v0, v1
+; GFX1030-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1100-LABEL: fmul_select_f16_test9:
+; GFX1100: ; %bb.0:
+; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1100-NEXT: v_mov_b32_e32 v3, 0xcc00
+; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
+; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-NEXT: v_cndmask_b32_e32 v1, 0xd000, v3, vcc_lo
+; GFX1100-NEXT: v_mul_f16_e32 v0, v0, v1
+; GFX1100-NEXT: s_setpc_b64 s[30:31]
+ %bool = icmp eq i32 %bool.arg1, %bool.arg2
+ %y = select i1 %bool, half -1.600000e+01, half -3.200000e+01
+ %ldexp = fmul half %x, %y
+ ret half %ldexp
----------------
arsenm wrote:
Also vectors
https://github.com/llvm/llvm-project/pull/111107
More information about the llvm-commits
mailing list