[llvm] [CodeGen] [AMDGPU] Adds pre-commit test for fmul-select combine (PR #111107)
Vikash Gupta via llvm-commits
llvm-commits at lists.llvm.org
Mon Oct 14 23:13:51 PDT 2024
https://github.com/vg0204 updated https://github.com/llvm/llvm-project/pull/111107
>From 277c51ac5f06c8783fb00f1c5a7c9c278537f77d Mon Sep 17 00:00:00 2001
From: vikashgu <Vikash.Gupta at amd.com>
Date: Fri, 4 Oct 2024 06:53:37 +0000
Subject: [PATCH 1/6] [CodeGen] [AMDGPU] Adds pre-commit test for fmul-select
combine
This adds the f32/f64 test cases for below pattern :
fmul x, select(y, 2.0, 1.0)
fmul x, select(y, 0.5, 1.0)
It acts as pre-commit tests for dagCombine above pattern into cheaper
ldexp in f64 case.
---
llvm/test/CodeGen/AMDGPU/combine-fmul-sel.ll | 342 +++++++++++++++++++
1 file changed, 342 insertions(+)
create mode 100644 llvm/test/CodeGen/AMDGPU/combine-fmul-sel.ll
diff --git a/llvm/test/CodeGen/AMDGPU/combine-fmul-sel.ll b/llvm/test/CodeGen/AMDGPU/combine-fmul-sel.ll
new file mode 100644
index 00000000000000..c20cf332422fef
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/combine-fmul-sel.ll
@@ -0,0 +1,342 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+;RUN: llc < %s -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs | FileCheck -check-prefix=GFX9 %s
+;RUN: llc < %s -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1030 -verify-machineinstrs | FileCheck -check-prefix=GFX1030 %s
+;RUN: llc < %s -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -verify-machineinstrs | FileCheck -check-prefix=GFX1100 %s
+
+define float @fmul_select_f32_test1(float %x, i1 %bool) {
+; GFX9-LABEL: fmul_select_f32_test1:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_and_b32_e32 v1, 1, v1
+; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 1, v1
+; GFX9-NEXT: v_cndmask_b32_e64 v1, 1.0, 2.0, vcc
+; GFX9-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1030-LABEL: fmul_select_f32_test1:
+; GFX1030: ; %bb.0:
+; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1030-NEXT: v_and_b32_e32 v1, 1, v1
+; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v1
+; GFX1030-NEXT: v_cndmask_b32_e64 v1, 1.0, 2.0, vcc_lo
+; GFX1030-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX1030-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1100-LABEL: fmul_select_f32_test1:
+; GFX1100: ; %bb.0:
+; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1100-NEXT: v_and_b32_e32 v1, 1, v1
+; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v1
+; GFX1100-NEXT: v_cndmask_b32_e64 v1, 1.0, 2.0, vcc_lo
+; GFX1100-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX1100-NEXT: s_setpc_b64 s[30:31]
+ %1 = select i1 %bool, float 2.000000e+00, float 1.000000e+00
+ %ldexp = fmul float %x, %1
+ ret float %ldexp
+}
+
+define float @fmul_select_f32_test2(float %x, i1 %bool) {
+; GFX9-LABEL: fmul_select_f32_test2:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_and_b32_e32 v1, 1, v1
+; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 1, v1
+; GFX9-NEXT: v_cndmask_b32_e64 v1, 1.0, 0.5, vcc
+; GFX9-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1030-LABEL: fmul_select_f32_test2:
+; GFX1030: ; %bb.0:
+; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1030-NEXT: v_and_b32_e32 v1, 1, v1
+; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v1
+; GFX1030-NEXT: v_cndmask_b32_e64 v1, 1.0, 0.5, vcc_lo
+; GFX1030-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX1030-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1100-LABEL: fmul_select_f32_test2:
+; GFX1100: ; %bb.0:
+; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1100-NEXT: v_and_b32_e32 v1, 1, v1
+; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v1
+; GFX1100-NEXT: v_cndmask_b32_e64 v1, 1.0, 0.5, vcc_lo
+; GFX1100-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX1100-NEXT: s_setpc_b64 s[30:31]
+ %1 = select i1 %bool, float 0.500000e+00, float 1.000000e+00
+ %ldexp = fmul float %x, %1
+ ret float %ldexp
+}
+
+define <2 x float> @fmul_select_v2f32_test1(<2 x float> %x, <2 x i1> %bool) {
+; GFX9-LABEL: fmul_select_v2f32_test1:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_and_b32_e32 v3, 1, v3
+; GFX9-NEXT: v_and_b32_e32 v2, 1, v2
+; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 1, v3
+; GFX9-NEXT: v_cndmask_b32_e64 v3, 1.0, 2.0, vcc
+; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 1, v2
+; GFX9-NEXT: v_cndmask_b32_e64 v2, 1.0, 2.0, vcc
+; GFX9-NEXT: v_mul_f32_e32 v0, v0, v2
+; GFX9-NEXT: v_mul_f32_e32 v1, v1, v3
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1030-LABEL: fmul_select_v2f32_test1:
+; GFX1030: ; %bb.0:
+; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1030-NEXT: v_and_b32_e32 v2, 1, v2
+; GFX1030-NEXT: v_and_b32_e32 v3, 1, v3
+; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v2
+; GFX1030-NEXT: v_cndmask_b32_e64 v2, 1.0, 2.0, vcc_lo
+; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v3
+; GFX1030-NEXT: v_mul_f32_e32 v0, v0, v2
+; GFX1030-NEXT: v_cndmask_b32_e64 v3, 1.0, 2.0, vcc_lo
+; GFX1030-NEXT: v_mul_f32_e32 v1, v1, v3
+; GFX1030-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1100-LABEL: fmul_select_v2f32_test1:
+; GFX1100: ; %bb.0:
+; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1100-NEXT: v_and_b32_e32 v2, 1, v2
+; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v2
+; GFX1100-NEXT: v_cndmask_b32_e64 v2, 1.0, 2.0, vcc_lo
+; GFX1100-NEXT: v_dual_mul_f32 v0, v0, v2 :: v_dual_and_b32 v3, 1, v3
+; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v3
+; GFX1100-NEXT: v_cndmask_b32_e64 v3, 1.0, 2.0, vcc_lo
+; GFX1100-NEXT: v_mul_f32_e32 v1, v1, v3
+; GFX1100-NEXT: s_setpc_b64 s[30:31]
+ %1 = select <2 x i1> %bool, <2 x float> <float 2.000000e+00, float 2.000000e+00>, <2 x float> <float 1.000000e+00, float 1.000000e+00>
+ %ldexp = fmul <2 x float> %x, %1
+ ret <2 x float> %ldexp
+}
+
+define <2 x float> @fmul_select_v2f32_test2(<2 x float> %x, <2 x i1> %bool) {
+; GFX9-LABEL: fmul_select_v2f32_test2:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_and_b32_e32 v3, 1, v3
+; GFX9-NEXT: v_and_b32_e32 v2, 1, v2
+; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 1, v3
+; GFX9-NEXT: v_cndmask_b32_e64 v3, 1.0, 0.5, vcc
+; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 1, v2
+; GFX9-NEXT: v_cndmask_b32_e64 v2, 1.0, 0.5, vcc
+; GFX9-NEXT: v_mul_f32_e32 v0, v0, v2
+; GFX9-NEXT: v_mul_f32_e32 v1, v1, v3
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1030-LABEL: fmul_select_v2f32_test2:
+; GFX1030: ; %bb.0:
+; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1030-NEXT: v_and_b32_e32 v2, 1, v2
+; GFX1030-NEXT: v_and_b32_e32 v3, 1, v3
+; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v2
+; GFX1030-NEXT: v_cndmask_b32_e64 v2, 1.0, 0.5, vcc_lo
+; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v3
+; GFX1030-NEXT: v_mul_f32_e32 v0, v0, v2
+; GFX1030-NEXT: v_cndmask_b32_e64 v3, 1.0, 0.5, vcc_lo
+; GFX1030-NEXT: v_mul_f32_e32 v1, v1, v3
+; GFX1030-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1100-LABEL: fmul_select_v2f32_test2:
+; GFX1100: ; %bb.0:
+; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1100-NEXT: v_and_b32_e32 v2, 1, v2
+; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v2
+; GFX1100-NEXT: v_cndmask_b32_e64 v2, 1.0, 0.5, vcc_lo
+; GFX1100-NEXT: v_dual_mul_f32 v0, v0, v2 :: v_dual_and_b32 v3, 1, v3
+; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v3
+; GFX1100-NEXT: v_cndmask_b32_e64 v3, 1.0, 0.5, vcc_lo
+; GFX1100-NEXT: v_mul_f32_e32 v1, v1, v3
+; GFX1100-NEXT: s_setpc_b64 s[30:31]
+ %1 = select <2 x i1> %bool, <2 x float> <float 0.500000e+00, float 0.500000e+00>, <2 x float> <float 1.000000e+00, float 1.000000e+00>
+ %ldexp = fmul <2 x float> %x, %1
+ ret <2 x float> %ldexp
+}
+
+define double @fmul_select_f64_test1(double %x, i1 %bool) {
+; GFX9-LABEL: fmul_select_f64_test1:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_and_b32_e32 v2, 1, v2
+; GFX9-NEXT: v_mov_b32_e32 v3, 0x3ff00000
+; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 1, v2
+; GFX9-NEXT: v_cndmask_b32_e64 v3, v3, 2.0, vcc
+; GFX9-NEXT: v_mov_b32_e32 v2, 0
+; GFX9-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3]
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1030-LABEL: fmul_select_f64_test1:
+; GFX1030: ; %bb.0:
+; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1030-NEXT: v_and_b32_e32 v3, 1, v2
+; GFX1030-NEXT: v_mov_b32_e32 v2, 0
+; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v3
+; GFX1030-NEXT: v_cndmask_b32_e64 v3, 0x3ff00000, 2.0, vcc_lo
+; GFX1030-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3]
+; GFX1030-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1100-LABEL: fmul_select_f64_test1:
+; GFX1100: ; %bb.0:
+; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1100-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_and_b32 v3, 1, v2
+; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v3
+; GFX1100-NEXT: v_cndmask_b32_e64 v3, 0x3ff00000, 2.0, vcc_lo
+; GFX1100-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3]
+; GFX1100-NEXT: s_setpc_b64 s[30:31]
+ %1 = select i1 %bool, double 2.000000e+00, double 1.000000e+00
+ %ldexp = fmul double %x, %1
+ ret double %ldexp
+}
+
+define double @fmul_select_f64_test2(double %x, i1 %bool) {
+; GFX9-LABEL: fmul_select_f64_test2:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_and_b32_e32 v2, 1, v2
+; GFX9-NEXT: v_mov_b32_e32 v3, 0x3ff00000
+; GFX9-NEXT: v_mov_b32_e32 v4, 0x3fe00000
+; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 1, v2
+; GFX9-NEXT: v_cndmask_b32_e32 v3, v3, v4, vcc
+; GFX9-NEXT: v_mov_b32_e32 v2, 0
+; GFX9-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3]
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1030-LABEL: fmul_select_f64_test2:
+; GFX1030: ; %bb.0:
+; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1030-NEXT: v_and_b32_e32 v3, 1, v2
+; GFX1030-NEXT: v_mov_b32_e32 v4, 0x3fe00000
+; GFX1030-NEXT: v_mov_b32_e32 v2, 0
+; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v3
+; GFX1030-NEXT: v_cndmask_b32_e32 v3, 0x3ff00000, v4, vcc_lo
+; GFX1030-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3]
+; GFX1030-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1100-LABEL: fmul_select_f64_test2:
+; GFX1100: ; %bb.0:
+; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1100-NEXT: v_dual_mov_b32 v4, 0x3fe00000 :: v_dual_and_b32 v3, 1, v2
+; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v3
+; GFX1100-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_cndmask_b32 v3, 0x3ff00000, v4
+; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1100-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3]
+; GFX1100-NEXT: s_setpc_b64 s[30:31]
+ %1 = select i1 %bool, double 0.500000e+00, double 1.000000e+00
+ %ldexp = fmul double %x, %1
+ ret double %ldexp
+}
+
+define <2 x double> @fmul_select_v2f64_test1(<2 x double> %x, <2 x i1> %bool) {
+; GFX9-LABEL: fmul_select_v2f64_test1:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_and_b32_e32 v6, 1, v4
+; GFX9-NEXT: v_and_b32_e32 v7, 1, v5
+; GFX9-NEXT: v_mov_b32_e32 v8, 0x3ff00000
+; GFX9-NEXT: v_mov_b32_e32 v4, 0
+; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 1, v6
+; GFX9-NEXT: v_cndmask_b32_e64 v6, v8, 2.0, vcc
+; GFX9-NEXT: v_mov_b32_e32 v5, v4
+; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 1, v7
+; GFX9-NEXT: v_mul_f64 v[0:1], v[0:1], v[5:6]
+; GFX9-NEXT: v_cndmask_b32_e64 v5, v8, 2.0, vcc
+; GFX9-NEXT: v_mul_f64 v[2:3], v[2:3], v[4:5]
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1030-LABEL: fmul_select_v2f64_test1:
+; GFX1030: ; %bb.0:
+; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1030-NEXT: v_and_b32_e32 v6, 1, v4
+; GFX1030-NEXT: v_and_b32_e32 v5, 1, v5
+; GFX1030-NEXT: v_mov_b32_e32 v4, 0
+; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v6
+; GFX1030-NEXT: v_mov_b32_e32 v6, v4
+; GFX1030-NEXT: v_cndmask_b32_e64 v7, 0x3ff00000, 2.0, vcc_lo
+; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v5
+; GFX1030-NEXT: v_mul_f64 v[0:1], v[0:1], v[6:7]
+; GFX1030-NEXT: v_cndmask_b32_e64 v5, 0x3ff00000, 2.0, vcc_lo
+; GFX1030-NEXT: v_mul_f64 v[2:3], v[2:3], v[4:5]
+; GFX1030-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1100-LABEL: fmul_select_v2f64_test1:
+; GFX1100: ; %bb.0:
+; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1100-NEXT: v_and_b32_e32 v6, 1, v4
+; GFX1100-NEXT: v_dual_mov_b32 v4, 0 :: v_dual_and_b32 v5, 1, v5
+; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v6
+; GFX1100-NEXT: v_mov_b32_e32 v6, v4
+; GFX1100-NEXT: v_cndmask_b32_e64 v7, 0x3ff00000, 2.0, vcc_lo
+; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v5
+; GFX1100-NEXT: v_mul_f64 v[0:1], v[0:1], v[6:7]
+; GFX1100-NEXT: v_cndmask_b32_e64 v5, 0x3ff00000, 2.0, vcc_lo
+; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1100-NEXT: v_mul_f64 v[2:3], v[2:3], v[4:5]
+; GFX1100-NEXT: s_setpc_b64 s[30:31]
+ %1 = select <2 x i1> %bool, <2 x double> <double 2.000000e+00, double 2.000000e+00>, <2 x double> <double 1.000000e+00, double 1.000000e+00>
+ %ldexp = fmul <2 x double> %x, %1
+ ret <2 x double> %ldexp
+}
+
+define <2 x double> @fmul_select_v2f64_test2(<2 x double> %x, <2 x i1> %bool) {
+; GFX9-LABEL: fmul_select_v2f64_test2:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_and_b32_e32 v6, 1, v4
+; GFX9-NEXT: v_and_b32_e32 v7, 1, v5
+; GFX9-NEXT: v_mov_b32_e32 v8, 0x3ff00000
+; GFX9-NEXT: v_mov_b32_e32 v9, 0x3fe00000
+; GFX9-NEXT: v_mov_b32_e32 v4, 0
+; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 1, v6
+; GFX9-NEXT: v_cndmask_b32_e32 v6, v8, v9, vcc
+; GFX9-NEXT: v_mov_b32_e32 v5, v4
+; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 1, v7
+; GFX9-NEXT: v_mul_f64 v[0:1], v[0:1], v[5:6]
+; GFX9-NEXT: v_cndmask_b32_e32 v5, v8, v9, vcc
+; GFX9-NEXT: v_mul_f64 v[2:3], v[2:3], v[4:5]
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1030-LABEL: fmul_select_v2f64_test2:
+; GFX1030: ; %bb.0:
+; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1030-NEXT: v_and_b32_e32 v6, 1, v4
+; GFX1030-NEXT: v_mov_b32_e32 v8, 0x3fe00000
+; GFX1030-NEXT: v_and_b32_e32 v5, 1, v5
+; GFX1030-NEXT: v_mov_b32_e32 v4, 0
+; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v6
+; GFX1030-NEXT: v_mov_b32_e32 v6, v4
+; GFX1030-NEXT: v_cndmask_b32_e32 v7, 0x3ff00000, v8, vcc_lo
+; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v5
+; GFX1030-NEXT: v_mul_f64 v[0:1], v[0:1], v[6:7]
+; GFX1030-NEXT: v_cndmask_b32_e32 v5, 0x3ff00000, v8, vcc_lo
+; GFX1030-NEXT: v_mul_f64 v[2:3], v[2:3], v[4:5]
+; GFX1030-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1100-LABEL: fmul_select_v2f64_test2:
+; GFX1100: ; %bb.0:
+; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1100-NEXT: v_and_b32_e32 v6, 1, v4
+; GFX1100-NEXT: v_dual_mov_b32 v8, 0x3fe00000 :: v_dual_and_b32 v5, 1, v5
+; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v6
+; GFX1100-NEXT: v_dual_mov_b32 v4, 0 :: v_dual_cndmask_b32 v7, 0x3ff00000, v8
+; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v5
+; GFX1100-NEXT: v_dual_mov_b32 v6, v4 :: v_dual_cndmask_b32 v5, 0x3ff00000, v8
+; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1100-NEXT: v_mul_f64 v[0:1], v[0:1], v[6:7]
+; GFX1100-NEXT: v_mul_f64 v[2:3], v[2:3], v[4:5]
+; GFX1100-NEXT: s_setpc_b64 s[30:31]
+ %1 = select <2 x i1> %bool, <2 x double> <double 0.500000e+00, double 0.500000e+00>, <2 x double> <double 1.000000e+00, double 1.000000e+00>
+ %ldexp = fmul <2 x double> %x, %1
+ ret <2 x double> %ldexp
+}
>From 258b672b2db20b70878fab6f4f361cef1350c0ed Mon Sep 17 00:00:00 2001
From: vikashgu <Vikash.Gupta at amd.com>
Date: Mon, 7 Oct 2024 11:02:14 +0000
Subject: [PATCH 2/6] Added test cases for f16, wrong constants, & negative
constants.
---
llvm/test/CodeGen/AMDGPU/combine-fmul-sel.ll | 342 --------
.../CodeGen/AMDGPU/dagcombine-fmul-sel.ll | 781 ++++++++++++++++++
2 files changed, 781 insertions(+), 342 deletions(-)
delete mode 100644 llvm/test/CodeGen/AMDGPU/combine-fmul-sel.ll
create mode 100644 llvm/test/CodeGen/AMDGPU/dagcombine-fmul-sel.ll
diff --git a/llvm/test/CodeGen/AMDGPU/combine-fmul-sel.ll b/llvm/test/CodeGen/AMDGPU/combine-fmul-sel.ll
deleted file mode 100644
index c20cf332422fef..00000000000000
--- a/llvm/test/CodeGen/AMDGPU/combine-fmul-sel.ll
+++ /dev/null
@@ -1,342 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
-;RUN: llc < %s -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs | FileCheck -check-prefix=GFX9 %s
-;RUN: llc < %s -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1030 -verify-machineinstrs | FileCheck -check-prefix=GFX1030 %s
-;RUN: llc < %s -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -verify-machineinstrs | FileCheck -check-prefix=GFX1100 %s
-
-define float @fmul_select_f32_test1(float %x, i1 %bool) {
-; GFX9-LABEL: fmul_select_f32_test1:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_and_b32_e32 v1, 1, v1
-; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 1, v1
-; GFX9-NEXT: v_cndmask_b32_e64 v1, 1.0, 2.0, vcc
-; GFX9-NEXT: v_mul_f32_e32 v0, v0, v1
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX1030-LABEL: fmul_select_f32_test1:
-; GFX1030: ; %bb.0:
-; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1030-NEXT: v_and_b32_e32 v1, 1, v1
-; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v1
-; GFX1030-NEXT: v_cndmask_b32_e64 v1, 1.0, 2.0, vcc_lo
-; GFX1030-NEXT: v_mul_f32_e32 v0, v0, v1
-; GFX1030-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX1100-LABEL: fmul_select_f32_test1:
-; GFX1100: ; %bb.0:
-; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-NEXT: v_and_b32_e32 v1, 1, v1
-; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
-; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v1
-; GFX1100-NEXT: v_cndmask_b32_e64 v1, 1.0, 2.0, vcc_lo
-; GFX1100-NEXT: v_mul_f32_e32 v0, v0, v1
-; GFX1100-NEXT: s_setpc_b64 s[30:31]
- %1 = select i1 %bool, float 2.000000e+00, float 1.000000e+00
- %ldexp = fmul float %x, %1
- ret float %ldexp
-}
-
-define float @fmul_select_f32_test2(float %x, i1 %bool) {
-; GFX9-LABEL: fmul_select_f32_test2:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_and_b32_e32 v1, 1, v1
-; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 1, v1
-; GFX9-NEXT: v_cndmask_b32_e64 v1, 1.0, 0.5, vcc
-; GFX9-NEXT: v_mul_f32_e32 v0, v0, v1
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX1030-LABEL: fmul_select_f32_test2:
-; GFX1030: ; %bb.0:
-; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1030-NEXT: v_and_b32_e32 v1, 1, v1
-; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v1
-; GFX1030-NEXT: v_cndmask_b32_e64 v1, 1.0, 0.5, vcc_lo
-; GFX1030-NEXT: v_mul_f32_e32 v0, v0, v1
-; GFX1030-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX1100-LABEL: fmul_select_f32_test2:
-; GFX1100: ; %bb.0:
-; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-NEXT: v_and_b32_e32 v1, 1, v1
-; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
-; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v1
-; GFX1100-NEXT: v_cndmask_b32_e64 v1, 1.0, 0.5, vcc_lo
-; GFX1100-NEXT: v_mul_f32_e32 v0, v0, v1
-; GFX1100-NEXT: s_setpc_b64 s[30:31]
- %1 = select i1 %bool, float 0.500000e+00, float 1.000000e+00
- %ldexp = fmul float %x, %1
- ret float %ldexp
-}
-
-define <2 x float> @fmul_select_v2f32_test1(<2 x float> %x, <2 x i1> %bool) {
-; GFX9-LABEL: fmul_select_v2f32_test1:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_and_b32_e32 v3, 1, v3
-; GFX9-NEXT: v_and_b32_e32 v2, 1, v2
-; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 1, v3
-; GFX9-NEXT: v_cndmask_b32_e64 v3, 1.0, 2.0, vcc
-; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 1, v2
-; GFX9-NEXT: v_cndmask_b32_e64 v2, 1.0, 2.0, vcc
-; GFX9-NEXT: v_mul_f32_e32 v0, v0, v2
-; GFX9-NEXT: v_mul_f32_e32 v1, v1, v3
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX1030-LABEL: fmul_select_v2f32_test1:
-; GFX1030: ; %bb.0:
-; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1030-NEXT: v_and_b32_e32 v2, 1, v2
-; GFX1030-NEXT: v_and_b32_e32 v3, 1, v3
-; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v2
-; GFX1030-NEXT: v_cndmask_b32_e64 v2, 1.0, 2.0, vcc_lo
-; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v3
-; GFX1030-NEXT: v_mul_f32_e32 v0, v0, v2
-; GFX1030-NEXT: v_cndmask_b32_e64 v3, 1.0, 2.0, vcc_lo
-; GFX1030-NEXT: v_mul_f32_e32 v1, v1, v3
-; GFX1030-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX1100-LABEL: fmul_select_v2f32_test1:
-; GFX1100: ; %bb.0:
-; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-NEXT: v_and_b32_e32 v2, 1, v2
-; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
-; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v2
-; GFX1100-NEXT: v_cndmask_b32_e64 v2, 1.0, 2.0, vcc_lo
-; GFX1100-NEXT: v_dual_mul_f32 v0, v0, v2 :: v_dual_and_b32 v3, 1, v3
-; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
-; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v3
-; GFX1100-NEXT: v_cndmask_b32_e64 v3, 1.0, 2.0, vcc_lo
-; GFX1100-NEXT: v_mul_f32_e32 v1, v1, v3
-; GFX1100-NEXT: s_setpc_b64 s[30:31]
- %1 = select <2 x i1> %bool, <2 x float> <float 2.000000e+00, float 2.000000e+00>, <2 x float> <float 1.000000e+00, float 1.000000e+00>
- %ldexp = fmul <2 x float> %x, %1
- ret <2 x float> %ldexp
-}
-
-define <2 x float> @fmul_select_v2f32_test2(<2 x float> %x, <2 x i1> %bool) {
-; GFX9-LABEL: fmul_select_v2f32_test2:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_and_b32_e32 v3, 1, v3
-; GFX9-NEXT: v_and_b32_e32 v2, 1, v2
-; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 1, v3
-; GFX9-NEXT: v_cndmask_b32_e64 v3, 1.0, 0.5, vcc
-; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 1, v2
-; GFX9-NEXT: v_cndmask_b32_e64 v2, 1.0, 0.5, vcc
-; GFX9-NEXT: v_mul_f32_e32 v0, v0, v2
-; GFX9-NEXT: v_mul_f32_e32 v1, v1, v3
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX1030-LABEL: fmul_select_v2f32_test2:
-; GFX1030: ; %bb.0:
-; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1030-NEXT: v_and_b32_e32 v2, 1, v2
-; GFX1030-NEXT: v_and_b32_e32 v3, 1, v3
-; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v2
-; GFX1030-NEXT: v_cndmask_b32_e64 v2, 1.0, 0.5, vcc_lo
-; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v3
-; GFX1030-NEXT: v_mul_f32_e32 v0, v0, v2
-; GFX1030-NEXT: v_cndmask_b32_e64 v3, 1.0, 0.5, vcc_lo
-; GFX1030-NEXT: v_mul_f32_e32 v1, v1, v3
-; GFX1030-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX1100-LABEL: fmul_select_v2f32_test2:
-; GFX1100: ; %bb.0:
-; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-NEXT: v_and_b32_e32 v2, 1, v2
-; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
-; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v2
-; GFX1100-NEXT: v_cndmask_b32_e64 v2, 1.0, 0.5, vcc_lo
-; GFX1100-NEXT: v_dual_mul_f32 v0, v0, v2 :: v_dual_and_b32 v3, 1, v3
-; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
-; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v3
-; GFX1100-NEXT: v_cndmask_b32_e64 v3, 1.0, 0.5, vcc_lo
-; GFX1100-NEXT: v_mul_f32_e32 v1, v1, v3
-; GFX1100-NEXT: s_setpc_b64 s[30:31]
- %1 = select <2 x i1> %bool, <2 x float> <float 0.500000e+00, float 0.500000e+00>, <2 x float> <float 1.000000e+00, float 1.000000e+00>
- %ldexp = fmul <2 x float> %x, %1
- ret <2 x float> %ldexp
-}
-
-define double @fmul_select_f64_test1(double %x, i1 %bool) {
-; GFX9-LABEL: fmul_select_f64_test1:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_and_b32_e32 v2, 1, v2
-; GFX9-NEXT: v_mov_b32_e32 v3, 0x3ff00000
-; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 1, v2
-; GFX9-NEXT: v_cndmask_b32_e64 v3, v3, 2.0, vcc
-; GFX9-NEXT: v_mov_b32_e32 v2, 0
-; GFX9-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3]
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX1030-LABEL: fmul_select_f64_test1:
-; GFX1030: ; %bb.0:
-; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1030-NEXT: v_and_b32_e32 v3, 1, v2
-; GFX1030-NEXT: v_mov_b32_e32 v2, 0
-; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v3
-; GFX1030-NEXT: v_cndmask_b32_e64 v3, 0x3ff00000, 2.0, vcc_lo
-; GFX1030-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3]
-; GFX1030-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX1100-LABEL: fmul_select_f64_test1:
-; GFX1100: ; %bb.0:
-; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_and_b32 v3, 1, v2
-; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
-; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v3
-; GFX1100-NEXT: v_cndmask_b32_e64 v3, 0x3ff00000, 2.0, vcc_lo
-; GFX1100-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3]
-; GFX1100-NEXT: s_setpc_b64 s[30:31]
- %1 = select i1 %bool, double 2.000000e+00, double 1.000000e+00
- %ldexp = fmul double %x, %1
- ret double %ldexp
-}
-
-define double @fmul_select_f64_test2(double %x, i1 %bool) {
-; GFX9-LABEL: fmul_select_f64_test2:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_and_b32_e32 v2, 1, v2
-; GFX9-NEXT: v_mov_b32_e32 v3, 0x3ff00000
-; GFX9-NEXT: v_mov_b32_e32 v4, 0x3fe00000
-; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 1, v2
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v3, v4, vcc
-; GFX9-NEXT: v_mov_b32_e32 v2, 0
-; GFX9-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3]
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX1030-LABEL: fmul_select_f64_test2:
-; GFX1030: ; %bb.0:
-; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1030-NEXT: v_and_b32_e32 v3, 1, v2
-; GFX1030-NEXT: v_mov_b32_e32 v4, 0x3fe00000
-; GFX1030-NEXT: v_mov_b32_e32 v2, 0
-; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v3
-; GFX1030-NEXT: v_cndmask_b32_e32 v3, 0x3ff00000, v4, vcc_lo
-; GFX1030-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3]
-; GFX1030-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX1100-LABEL: fmul_select_f64_test2:
-; GFX1100: ; %bb.0:
-; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-NEXT: v_dual_mov_b32 v4, 0x3fe00000 :: v_dual_and_b32 v3, 1, v2
-; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v3
-; GFX1100-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_cndmask_b32 v3, 0x3ff00000, v4
-; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1100-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3]
-; GFX1100-NEXT: s_setpc_b64 s[30:31]
- %1 = select i1 %bool, double 0.500000e+00, double 1.000000e+00
- %ldexp = fmul double %x, %1
- ret double %ldexp
-}
-
-define <2 x double> @fmul_select_v2f64_test1(<2 x double> %x, <2 x i1> %bool) {
-; GFX9-LABEL: fmul_select_v2f64_test1:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_and_b32_e32 v6, 1, v4
-; GFX9-NEXT: v_and_b32_e32 v7, 1, v5
-; GFX9-NEXT: v_mov_b32_e32 v8, 0x3ff00000
-; GFX9-NEXT: v_mov_b32_e32 v4, 0
-; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 1, v6
-; GFX9-NEXT: v_cndmask_b32_e64 v6, v8, 2.0, vcc
-; GFX9-NEXT: v_mov_b32_e32 v5, v4
-; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 1, v7
-; GFX9-NEXT: v_mul_f64 v[0:1], v[0:1], v[5:6]
-; GFX9-NEXT: v_cndmask_b32_e64 v5, v8, 2.0, vcc
-; GFX9-NEXT: v_mul_f64 v[2:3], v[2:3], v[4:5]
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX1030-LABEL: fmul_select_v2f64_test1:
-; GFX1030: ; %bb.0:
-; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1030-NEXT: v_and_b32_e32 v6, 1, v4
-; GFX1030-NEXT: v_and_b32_e32 v5, 1, v5
-; GFX1030-NEXT: v_mov_b32_e32 v4, 0
-; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v6
-; GFX1030-NEXT: v_mov_b32_e32 v6, v4
-; GFX1030-NEXT: v_cndmask_b32_e64 v7, 0x3ff00000, 2.0, vcc_lo
-; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v5
-; GFX1030-NEXT: v_mul_f64 v[0:1], v[0:1], v[6:7]
-; GFX1030-NEXT: v_cndmask_b32_e64 v5, 0x3ff00000, 2.0, vcc_lo
-; GFX1030-NEXT: v_mul_f64 v[2:3], v[2:3], v[4:5]
-; GFX1030-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX1100-LABEL: fmul_select_v2f64_test1:
-; GFX1100: ; %bb.0:
-; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-NEXT: v_and_b32_e32 v6, 1, v4
-; GFX1100-NEXT: v_dual_mov_b32 v4, 0 :: v_dual_and_b32 v5, 1, v5
-; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v6
-; GFX1100-NEXT: v_mov_b32_e32 v6, v4
-; GFX1100-NEXT: v_cndmask_b32_e64 v7, 0x3ff00000, 2.0, vcc_lo
-; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v5
-; GFX1100-NEXT: v_mul_f64 v[0:1], v[0:1], v[6:7]
-; GFX1100-NEXT: v_cndmask_b32_e64 v5, 0x3ff00000, 2.0, vcc_lo
-; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1100-NEXT: v_mul_f64 v[2:3], v[2:3], v[4:5]
-; GFX1100-NEXT: s_setpc_b64 s[30:31]
- %1 = select <2 x i1> %bool, <2 x double> <double 2.000000e+00, double 2.000000e+00>, <2 x double> <double 1.000000e+00, double 1.000000e+00>
- %ldexp = fmul <2 x double> %x, %1
- ret <2 x double> %ldexp
-}
-
-define <2 x double> @fmul_select_v2f64_test2(<2 x double> %x, <2 x i1> %bool) {
-; GFX9-LABEL: fmul_select_v2f64_test2:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_and_b32_e32 v6, 1, v4
-; GFX9-NEXT: v_and_b32_e32 v7, 1, v5
-; GFX9-NEXT: v_mov_b32_e32 v8, 0x3ff00000
-; GFX9-NEXT: v_mov_b32_e32 v9, 0x3fe00000
-; GFX9-NEXT: v_mov_b32_e32 v4, 0
-; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 1, v6
-; GFX9-NEXT: v_cndmask_b32_e32 v6, v8, v9, vcc
-; GFX9-NEXT: v_mov_b32_e32 v5, v4
-; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 1, v7
-; GFX9-NEXT: v_mul_f64 v[0:1], v[0:1], v[5:6]
-; GFX9-NEXT: v_cndmask_b32_e32 v5, v8, v9, vcc
-; GFX9-NEXT: v_mul_f64 v[2:3], v[2:3], v[4:5]
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX1030-LABEL: fmul_select_v2f64_test2:
-; GFX1030: ; %bb.0:
-; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1030-NEXT: v_and_b32_e32 v6, 1, v4
-; GFX1030-NEXT: v_mov_b32_e32 v8, 0x3fe00000
-; GFX1030-NEXT: v_and_b32_e32 v5, 1, v5
-; GFX1030-NEXT: v_mov_b32_e32 v4, 0
-; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v6
-; GFX1030-NEXT: v_mov_b32_e32 v6, v4
-; GFX1030-NEXT: v_cndmask_b32_e32 v7, 0x3ff00000, v8, vcc_lo
-; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v5
-; GFX1030-NEXT: v_mul_f64 v[0:1], v[0:1], v[6:7]
-; GFX1030-NEXT: v_cndmask_b32_e32 v5, 0x3ff00000, v8, vcc_lo
-; GFX1030-NEXT: v_mul_f64 v[2:3], v[2:3], v[4:5]
-; GFX1030-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX1100-LABEL: fmul_select_v2f64_test2:
-; GFX1100: ; %bb.0:
-; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-NEXT: v_and_b32_e32 v6, 1, v4
-; GFX1100-NEXT: v_dual_mov_b32 v8, 0x3fe00000 :: v_dual_and_b32 v5, 1, v5
-; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v6
-; GFX1100-NEXT: v_dual_mov_b32 v4, 0 :: v_dual_cndmask_b32 v7, 0x3ff00000, v8
-; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v5
-; GFX1100-NEXT: v_dual_mov_b32 v6, v4 :: v_dual_cndmask_b32 v5, 0x3ff00000, v8
-; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX1100-NEXT: v_mul_f64 v[0:1], v[0:1], v[6:7]
-; GFX1100-NEXT: v_mul_f64 v[2:3], v[2:3], v[4:5]
-; GFX1100-NEXT: s_setpc_b64 s[30:31]
- %1 = select <2 x i1> %bool, <2 x double> <double 0.500000e+00, double 0.500000e+00>, <2 x double> <double 1.000000e+00, double 1.000000e+00>
- %ldexp = fmul <2 x double> %x, %1
- ret <2 x double> %ldexp
-}
diff --git a/llvm/test/CodeGen/AMDGPU/dagcombine-fmul-sel.ll b/llvm/test/CodeGen/AMDGPU/dagcombine-fmul-sel.ll
new file mode 100644
index 00000000000000..62ba95b9f8a0c0
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/dagcombine-fmul-sel.ll
@@ -0,0 +1,781 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+;RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefix=GFX9 %s
+;RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1030 < %s | FileCheck -check-prefix=GFX1030 %s
+;RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 < %s | FileCheck -check-prefix=GFX1100 %s
+
+define float @fmul_select_f32_test1(float %x, i32 %bool.arg1, i32 %bool.arg2) {
+; GFX9-LABEL: fmul_select_f32_test1:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
+; GFX9-NEXT: v_cndmask_b32_e64 v1, 1.0, 2.0, vcc
+; GFX9-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1030-LABEL: fmul_select_f32_test1:
+; GFX1030: ; %bb.0:
+; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
+; GFX1030-NEXT: v_cndmask_b32_e64 v1, 1.0, 2.0, vcc_lo
+; GFX1030-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX1030-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1100-LABEL: fmul_select_f32_test1:
+; GFX1100: ; %bb.0:
+; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
+; GFX1100-NEXT: v_cndmask_b32_e64 v1, 1.0, 2.0, vcc_lo
+; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1100-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX1100-NEXT: s_setpc_b64 s[30:31]
+ %bool = icmp eq i32 %bool.arg1, %bool.arg2
+ %1 = select i1 %bool, float 2.000000e+00, float 1.000000e+00
+ %ldexp = fmul float %x, %1
+ ret float %ldexp
+}
+
+define float @fmul_select_f32_test2(float %x, i32 %bool.arg1, i32 %bool.arg2) {
+; GFX9-LABEL: fmul_select_f32_test2:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
+; GFX9-NEXT: v_cndmask_b32_e64 v1, 1.0, 0.5, vcc
+; GFX9-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1030-LABEL: fmul_select_f32_test2:
+; GFX1030: ; %bb.0:
+; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
+; GFX1030-NEXT: v_cndmask_b32_e64 v1, 1.0, 0.5, vcc_lo
+; GFX1030-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX1030-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1100-LABEL: fmul_select_f32_test2:
+; GFX1100: ; %bb.0:
+; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
+; GFX1100-NEXT: v_cndmask_b32_e64 v1, 1.0, 0.5, vcc_lo
+; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1100-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX1100-NEXT: s_setpc_b64 s[30:31]
+ %bool = icmp eq i32 %bool.arg1, %bool.arg2
+ %1 = select i1 %bool, float 0.500000e+00, float 1.000000e+00
+ %ldexp = fmul float %x, %1
+ ret float %ldexp
+}
+
+define <2 x float> @fmul_select_f32_test3(<2 x float> %x, <2 x i32> %bool.arg1, <2 x i32> %bool.arg2) {
+; GFX9-LABEL: fmul_select_f32_test3:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5
+; GFX9-NEXT: v_cndmask_b32_e64 v3, 1.0, 2.0, vcc
+; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v2, v4
+; GFX9-NEXT: v_cndmask_b32_e64 v2, 1.0, 2.0, vcc
+; GFX9-NEXT: v_mul_f32_e32 v0, v0, v2
+; GFX9-NEXT: v_mul_f32_e32 v1, v1, v3
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1030-LABEL: fmul_select_f32_test3:
+; GFX1030: ; %bb.0:
+; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v4
+; GFX1030-NEXT: v_cndmask_b32_e64 v2, 1.0, 2.0, vcc_lo
+; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v3, v5
+; GFX1030-NEXT: v_mul_f32_e32 v0, v0, v2
+; GFX1030-NEXT: v_cndmask_b32_e64 v3, 1.0, 2.0, vcc_lo
+; GFX1030-NEXT: v_mul_f32_e32 v1, v1, v3
+; GFX1030-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1100-LABEL: fmul_select_f32_test3:
+; GFX1100: ; %bb.0:
+; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v4
+; GFX1100-NEXT: v_cndmask_b32_e64 v2, 1.0, 2.0, vcc_lo
+; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v3, v5
+; GFX1100-NEXT: v_cndmask_b32_e64 v3, 1.0, 2.0, vcc_lo
+; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1100-NEXT: v_dual_mul_f32 v0, v0, v2 :: v_dual_mul_f32 v1, v1, v3
+; GFX1100-NEXT: s_setpc_b64 s[30:31]
+ %bool = icmp eq <2 x i32> %bool.arg1, %bool.arg2
+ %1 = select <2 x i1> %bool, <2 x float> <float 2.000000e+00, float 2.000000e+00>, <2 x float> <float 1.000000e+00, float 1.000000e+00>
+ %ldexp = fmul <2 x float> %x, %1
+ ret <2 x float> %ldexp
+}
+
+define <2 x float> @fmul_select_f32_test4(<2 x float> %x, <2 x i32> %bool.arg1, <2 x i32> %bool.arg2) {
+; GFX9-LABEL: fmul_select_f32_test4:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5
+; GFX9-NEXT: v_cndmask_b32_e64 v3, 1.0, 0.5, vcc
+; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v2, v4
+; GFX9-NEXT: v_cndmask_b32_e64 v2, 1.0, 0.5, vcc
+; GFX9-NEXT: v_mul_f32_e32 v0, v0, v2
+; GFX9-NEXT: v_mul_f32_e32 v1, v1, v3
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1030-LABEL: fmul_select_f32_test4:
+; GFX1030: ; %bb.0:
+; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v4
+; GFX1030-NEXT: v_cndmask_b32_e64 v2, 1.0, 0.5, vcc_lo
+; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v3, v5
+; GFX1030-NEXT: v_mul_f32_e32 v0, v0, v2
+; GFX1030-NEXT: v_cndmask_b32_e64 v3, 1.0, 0.5, vcc_lo
+; GFX1030-NEXT: v_mul_f32_e32 v1, v1, v3
+; GFX1030-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1100-LABEL: fmul_select_f32_test4:
+; GFX1100: ; %bb.0:
+; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v4
+; GFX1100-NEXT: v_cndmask_b32_e64 v2, 1.0, 0.5, vcc_lo
+; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v3, v5
+; GFX1100-NEXT: v_cndmask_b32_e64 v3, 1.0, 0.5, vcc_lo
+; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1100-NEXT: v_dual_mul_f32 v0, v0, v2 :: v_dual_mul_f32 v1, v1, v3
+; GFX1100-NEXT: s_setpc_b64 s[30:31]
+ %bool = icmp eq <2 x i32> %bool.arg1, %bool.arg2
+ %1 = select <2 x i1> %bool, <2 x float> <float 0.500000e+00, float 0.500000e+00>, <2 x float> <float 1.000000e+00, float 1.000000e+00>
+ %ldexp = fmul <2 x float> %x, %1
+ ret <2 x float> %ldexp
+}
+
+define float @fmul_select_f32_test5(float %x, i32 %bool.arg1, i32 %bool.arg2) {
+; GFX9-LABEL: fmul_select_f32_test5:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
+; GFX9-NEXT: v_cndmask_b32_e64 v1, -1.0, -2.0, vcc
+; GFX9-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1030-LABEL: fmul_select_f32_test5:
+; GFX1030: ; %bb.0:
+; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
+; GFX1030-NEXT: v_cndmask_b32_e64 v1, -1.0, -2.0, vcc_lo
+; GFX1030-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX1030-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1100-LABEL: fmul_select_f32_test5:
+; GFX1100: ; %bb.0:
+; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
+; GFX1100-NEXT: v_cndmask_b32_e64 v1, -1.0, -2.0, vcc_lo
+; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1100-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX1100-NEXT: s_setpc_b64 s[30:31]
+ %bool = icmp eq i32 %bool.arg1, %bool.arg2
+ %1 = select i1 %bool, float -2.000000e+00, float -1.000000e+00
+ %ldexp = fmul float %x, %1
+ ret float %ldexp
+}
+
+define float @fmul_select_f32_test6(float %x, i32 %bool.arg1, i32 %bool.arg2) {
+; GFX9-LABEL: fmul_select_f32_test6:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_mov_b32_e32 v3, 0x41000000
+; GFX9-NEXT: v_mov_b32_e32 v4, 0x40400000
+; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
+; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc
+; GFX9-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1030-LABEL: fmul_select_f32_test6:
+; GFX1030: ; %bb.0:
+; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1030-NEXT: v_mov_b32_e32 v3, 0x40400000
+; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
+; GFX1030-NEXT: v_cndmask_b32_e32 v1, 0x41000000, v3, vcc_lo
+; GFX1030-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX1030-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1100-LABEL: fmul_select_f32_test6:
+; GFX1100: ; %bb.0:
+; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1100-NEXT: v_mov_b32_e32 v3, 0x40400000
+; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
+; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-NEXT: v_cndmask_b32_e32 v1, 0x41000000, v3, vcc_lo
+; GFX1100-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX1100-NEXT: s_setpc_b64 s[30:31]
+ %bool = icmp eq i32 %bool.arg1, %bool.arg2
+ %1 = select i1 %bool, float 3.000000e+00, float 8.000000e+00
+ %ldexp = fmul float %x, %1
+ ret float %ldexp
+}
+
+define double @fmul_select_f64_test1(double %x, i32 %bool.arg1, i32 %bool.arg2) {
+; GFX9-LABEL: fmul_select_f64_test1:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_mov_b32_e32 v4, 0x3ff00000
+; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3
+; GFX9-NEXT: v_cndmask_b32_e64 v3, v4, 2.0, vcc
+; GFX9-NEXT: v_mov_b32_e32 v2, 0
+; GFX9-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3]
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1030-LABEL: fmul_select_f64_test1:
+; GFX1030: ; %bb.0:
+; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v3
+; GFX1030-NEXT: v_mov_b32_e32 v4, 0
+; GFX1030-NEXT: v_cndmask_b32_e64 v5, 0x3ff00000, 2.0, vcc_lo
+; GFX1030-NEXT: v_mul_f64 v[0:1], v[0:1], v[4:5]
+; GFX1030-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1100-LABEL: fmul_select_f64_test1:
+; GFX1100: ; %bb.0:
+; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v3
+; GFX1100-NEXT: v_mov_b32_e32 v4, 0
+; GFX1100-NEXT: v_cndmask_b32_e64 v5, 0x3ff00000, 2.0, vcc_lo
+; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1100-NEXT: v_mul_f64 v[0:1], v[0:1], v[4:5]
+; GFX1100-NEXT: s_setpc_b64 s[30:31]
+ %bool = icmp eq i32 %bool.arg1, %bool.arg2
+ %1 = select i1 %bool, double 2.000000e+00, double 1.000000e+00
+ %ldexp = fmul double %x, %1
+ ret double %ldexp
+}
+
+define double @fmul_select_f64_test2(double %x, i32 %bool.arg1, i32 %bool.arg2) {
+; GFX9-LABEL: fmul_select_f64_test2:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_mov_b32_e32 v4, 0x3ff00000
+; GFX9-NEXT: v_mov_b32_e32 v5, 0x3fe00000
+; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3
+; GFX9-NEXT: v_cndmask_b32_e32 v3, v4, v5, vcc
+; GFX9-NEXT: v_mov_b32_e32 v2, 0
+; GFX9-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3]
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1030-LABEL: fmul_select_f64_test2:
+; GFX1030: ; %bb.0:
+; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1030-NEXT: v_mov_b32_e32 v5, 0x3fe00000
+; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v3
+; GFX1030-NEXT: v_mov_b32_e32 v4, 0
+; GFX1030-NEXT: v_cndmask_b32_e32 v5, 0x3ff00000, v5, vcc_lo
+; GFX1030-NEXT: v_mul_f64 v[0:1], v[0:1], v[4:5]
+; GFX1030-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1100-LABEL: fmul_select_f64_test2:
+; GFX1100: ; %bb.0:
+; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1100-NEXT: v_dual_mov_b32 v5, 0x3fe00000 :: v_dual_mov_b32 v4, 0
+; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v3
+; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-NEXT: v_cndmask_b32_e32 v5, 0x3ff00000, v5, vcc_lo
+; GFX1100-NEXT: v_mul_f64 v[0:1], v[0:1], v[4:5]
+; GFX1100-NEXT: s_setpc_b64 s[30:31]
+ %bool = icmp eq i32 %bool.arg1, %bool.arg2
+ %1 = select i1 %bool, double 0.500000e+00, double 1.000000e+00
+ %ldexp = fmul double %x, %1
+ ret double %ldexp
+}
+
+define <2 x double> @fmul_select_f64_test3(<2 x double> %x, <2 x i32> %bool.arg1, <2 x i32> %bool.arg2) {
+; GFX9-LABEL: fmul_select_f64_test3:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_mov_b32_e32 v11, 0x3ff00000
+; GFX9-NEXT: v_mov_b32_e32 v8, 0
+; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v4, v6
+; GFX9-NEXT: v_cndmask_b32_e64 v10, v11, 2.0, vcc
+; GFX9-NEXT: v_mov_b32_e32 v9, v8
+; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v5, v7
+; GFX9-NEXT: v_mul_f64 v[0:1], v[0:1], v[9:10]
+; GFX9-NEXT: v_cndmask_b32_e64 v9, v11, 2.0, vcc
+; GFX9-NEXT: v_mul_f64 v[2:3], v[2:3], v[8:9]
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1030-LABEL: fmul_select_f64_test3:
+; GFX1030: ; %bb.0:
+; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v4, v6
+; GFX1030-NEXT: v_mov_b32_e32 v8, 0
+; GFX1030-NEXT: v_cndmask_b32_e64 v11, 0x3ff00000, 2.0, vcc_lo
+; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v5, v7
+; GFX1030-NEXT: v_mov_b32_e32 v10, v8
+; GFX1030-NEXT: v_cndmask_b32_e64 v9, 0x3ff00000, 2.0, vcc_lo
+; GFX1030-NEXT: v_mul_f64 v[0:1], v[0:1], v[10:11]
+; GFX1030-NEXT: v_mul_f64 v[2:3], v[2:3], v[8:9]
+; GFX1030-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1100-LABEL: fmul_select_f64_test3:
+; GFX1100: ; %bb.0:
+; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v4, v6
+; GFX1100-NEXT: v_mov_b32_e32 v8, 0
+; GFX1100-NEXT: v_cndmask_b32_e64 v11, 0x3ff00000, 2.0, vcc_lo
+; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v5, v7
+; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX1100-NEXT: v_mov_b32_e32 v10, v8
+; GFX1100-NEXT: v_cndmask_b32_e64 v9, 0x3ff00000, 2.0, vcc_lo
+; GFX1100-NEXT: v_mul_f64 v[0:1], v[0:1], v[10:11]
+; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2)
+; GFX1100-NEXT: v_mul_f64 v[2:3], v[2:3], v[8:9]
+; GFX1100-NEXT: s_setpc_b64 s[30:31]
+ %bool = icmp eq <2 x i32> %bool.arg1, %bool.arg2
+ %1 = select <2 x i1> %bool, <2 x double> <double 2.000000e+00, double 2.000000e+00>, <2 x double> <double 1.000000e+00, double 1.000000e+00>
+ %ldexp = fmul <2 x double> %x, %1
+ ret <2 x double> %ldexp
+}
+
+define <2 x double> @fmul_select_f64_test4(<2 x double> %x, <2 x i32> %bool.arg1, <2 x i32> %bool.arg2) {
+; GFX9-LABEL: fmul_select_f64_test4:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_mov_b32_e32 v11, 0x3ff00000
+; GFX9-NEXT: v_mov_b32_e32 v12, 0x3fe00000
+; GFX9-NEXT: v_mov_b32_e32 v8, 0
+; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v4, v6
+; GFX9-NEXT: v_cndmask_b32_e32 v10, v11, v12, vcc
+; GFX9-NEXT: v_mov_b32_e32 v9, v8
+; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v5, v7
+; GFX9-NEXT: v_mul_f64 v[0:1], v[0:1], v[9:10]
+; GFX9-NEXT: v_cndmask_b32_e32 v9, v11, v12, vcc
+; GFX9-NEXT: v_mul_f64 v[2:3], v[2:3], v[8:9]
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1030-LABEL: fmul_select_f64_test4:
+; GFX1030: ; %bb.0:
+; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1030-NEXT: v_mov_b32_e32 v9, 0x3fe00000
+; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v4, v6
+; GFX1030-NEXT: v_mov_b32_e32 v8, 0
+; GFX1030-NEXT: v_cndmask_b32_e32 v11, 0x3ff00000, v9, vcc_lo
+; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v5, v7
+; GFX1030-NEXT: v_mov_b32_e32 v10, v8
+; GFX1030-NEXT: v_cndmask_b32_e32 v9, 0x3ff00000, v9, vcc_lo
+; GFX1030-NEXT: v_mul_f64 v[0:1], v[0:1], v[10:11]
+; GFX1030-NEXT: v_mul_f64 v[2:3], v[2:3], v[8:9]
+; GFX1030-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1100-LABEL: fmul_select_f64_test4:
+; GFX1100: ; %bb.0:
+; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1100-NEXT: v_dual_mov_b32 v9, 0x3fe00000 :: v_dual_mov_b32 v8, 0
+; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v4, v6
+; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX1100-NEXT: v_dual_mov_b32 v10, v8 :: v_dual_cndmask_b32 v11, 0x3ff00000, v9
+; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v5, v7
+; GFX1100-NEXT: v_mul_f64 v[0:1], v[0:1], v[10:11]
+; GFX1100-NEXT: v_cndmask_b32_e32 v9, 0x3ff00000, v9, vcc_lo
+; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1100-NEXT: v_mul_f64 v[2:3], v[2:3], v[8:9]
+; GFX1100-NEXT: s_setpc_b64 s[30:31]
+ %bool = icmp eq <2 x i32> %bool.arg1, %bool.arg2
+ %1 = select <2 x i1> %bool, <2 x double> <double 0.500000e+00, double 0.500000e+00>, <2 x double> <double 1.000000e+00, double 1.000000e+00>
+ %ldexp = fmul <2 x double> %x, %1
+ ret <2 x double> %ldexp
+}
+
+define double @fmul_select_f64_test5(double %x, i32 %bool.arg1, i32 %bool.arg2) {
+; GFX9-LABEL: fmul_select_f64_test5:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_mov_b32_e32 v4, 0xbff00000
+; GFX9-NEXT: v_mov_b32_e32 v5, 0xbfe00000
+; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3
+; GFX9-NEXT: v_cndmask_b32_e32 v3, v4, v5, vcc
+; GFX9-NEXT: v_mov_b32_e32 v2, 0
+; GFX9-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3]
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1030-LABEL: fmul_select_f64_test5:
+; GFX1030: ; %bb.0:
+; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1030-NEXT: v_mov_b32_e32 v5, 0xbfe00000
+; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v3
+; GFX1030-NEXT: v_mov_b32_e32 v4, 0
+; GFX1030-NEXT: v_cndmask_b32_e32 v5, 0xbff00000, v5, vcc_lo
+; GFX1030-NEXT: v_mul_f64 v[0:1], v[0:1], v[4:5]
+; GFX1030-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1100-LABEL: fmul_select_f64_test5:
+; GFX1100: ; %bb.0:
+; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1100-NEXT: v_dual_mov_b32 v5, 0xbfe00000 :: v_dual_mov_b32 v4, 0
+; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v3
+; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-NEXT: v_cndmask_b32_e32 v5, 0xbff00000, v5, vcc_lo
+; GFX1100-NEXT: v_mul_f64 v[0:1], v[0:1], v[4:5]
+; GFX1100-NEXT: s_setpc_b64 s[30:31]
+ %bool = icmp eq i32 %bool.arg1, %bool.arg2
+ %1 = select i1 %bool, double -0.500000e+00, double -1.000000e+00
+ %ldexp = fmul double %x, %1
+ ret double %ldexp
+}
+
+define double @fmul_select_f64_test6(double %x, i32 %bool.arg1, i32 %bool.arg2) {
+; GFX9-LABEL: fmul_select_f64_test6:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_mov_b32_e32 v4, 0xbff00000
+; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3
+; GFX9-NEXT: v_cndmask_b32_e64 v3, v4, -2.0, vcc
+; GFX9-NEXT: v_mov_b32_e32 v2, 0
+; GFX9-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3]
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1030-LABEL: fmul_select_f64_test6:
+; GFX1030: ; %bb.0:
+; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v3
+; GFX1030-NEXT: v_mov_b32_e32 v4, 0
+; GFX1030-NEXT: v_cndmask_b32_e64 v5, 0xbff00000, -2.0, vcc_lo
+; GFX1030-NEXT: v_mul_f64 v[0:1], v[0:1], v[4:5]
+; GFX1030-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1100-LABEL: fmul_select_f64_test6:
+; GFX1100: ; %bb.0:
+; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v3
+; GFX1100-NEXT: v_mov_b32_e32 v4, 0
+; GFX1100-NEXT: v_cndmask_b32_e64 v5, 0xbff00000, -2.0, vcc_lo
+; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1100-NEXT: v_mul_f64 v[0:1], v[0:1], v[4:5]
+; GFX1100-NEXT: s_setpc_b64 s[30:31]
+ %bool = icmp eq i32 %bool.arg1, %bool.arg2
+ %1 = select i1 %bool, double -2.000000e+00, double -1.000000e+00
+ %ldexp = fmul double %x, %1
+ ret double %ldexp
+}
+
+define double @fmul_select_f64_test7(double %x, i32 %bool.arg1, i32 %bool.arg2) {
+; GFX9-LABEL: fmul_select_f64_test7:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_mov_b32_e32 v4, 0xbff00000
+; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3
+; GFX9-NEXT: v_cndmask_b32_e64 v3, v4, 2.0, vcc
+; GFX9-NEXT: v_mov_b32_e32 v2, 0
+; GFX9-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3]
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1030-LABEL: fmul_select_f64_test7:
+; GFX1030: ; %bb.0:
+; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v3
+; GFX1030-NEXT: v_mov_b32_e32 v4, 0
+; GFX1030-NEXT: v_cndmask_b32_e64 v5, 0xbff00000, 2.0, vcc_lo
+; GFX1030-NEXT: v_mul_f64 v[0:1], v[0:1], v[4:5]
+; GFX1030-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1100-LABEL: fmul_select_f64_test7:
+; GFX1100: ; %bb.0:
+; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v3
+; GFX1100-NEXT: v_mov_b32_e32 v4, 0
+; GFX1100-NEXT: v_cndmask_b32_e64 v5, 0xbff00000, 2.0, vcc_lo
+; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1100-NEXT: v_mul_f64 v[0:1], v[0:1], v[4:5]
+; GFX1100-NEXT: s_setpc_b64 s[30:31]
+ %bool = icmp eq i32 %bool.arg1, %bool.arg2
+ %1 = select i1 %bool, double 2.000000e+00, double -1.000000e+00
+ %ldexp = fmul double %x, %1
+ ret double %ldexp
+}
+
+define double @fmul_select_f64_test8(double %x, i32 %bool.arg1, i32 %bool.arg2) {
+; GFX9-LABEL: fmul_select_f64_test8:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_mov_b32_e32 v4, 0xbff00000
+; GFX9-NEXT: v_mov_b32_e32 v5, 0x40100000
+; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3
+; GFX9-NEXT: v_cndmask_b32_e32 v3, v4, v5, vcc
+; GFX9-NEXT: v_mov_b32_e32 v2, 0
+; GFX9-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3]
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1030-LABEL: fmul_select_f64_test8:
+; GFX1030: ; %bb.0:
+; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1030-NEXT: v_mov_b32_e32 v5, 0x40100000
+; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v3
+; GFX1030-NEXT: v_mov_b32_e32 v4, 0
+; GFX1030-NEXT: v_cndmask_b32_e32 v5, 0xbff00000, v5, vcc_lo
+; GFX1030-NEXT: v_mul_f64 v[0:1], v[0:1], v[4:5]
+; GFX1030-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1100-LABEL: fmul_select_f64_test8:
+; GFX1100: ; %bb.0:
+; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1100-NEXT: v_dual_mov_b32 v5, 0x40100000 :: v_dual_mov_b32 v4, 0
+; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v3
+; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-NEXT: v_cndmask_b32_e32 v5, 0xbff00000, v5, vcc_lo
+; GFX1100-NEXT: v_mul_f64 v[0:1], v[0:1], v[4:5]
+; GFX1100-NEXT: s_setpc_b64 s[30:31]
+ %bool = icmp eq i32 %bool.arg1, %bool.arg2
+ %1 = select i1 %bool, double 4.000000e+00, double -1.000000e+00
+ %ldexp = fmul double %x, %1
+ ret double %ldexp
+}
+
+define <2 x double> @fmul_select_f64_test9(<2 x double> %x, <2 x i32> %bool.arg1, <2 x i32> %bool.arg2) {
+; GFX9-LABEL: fmul_select_f64_test9:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_mov_b32_e32 v11, 0xbff00000
+; GFX9-NEXT: v_mov_b32_e32 v8, 0
+; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v4, v6
+; GFX9-NEXT: v_cndmask_b32_e64 v10, v11, -2.0, vcc
+; GFX9-NEXT: v_mov_b32_e32 v9, v8
+; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v5, v7
+; GFX9-NEXT: v_mul_f64 v[0:1], v[0:1], v[9:10]
+; GFX9-NEXT: v_cndmask_b32_e64 v9, v11, -2.0, vcc
+; GFX9-NEXT: v_mul_f64 v[2:3], v[2:3], v[8:9]
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1030-LABEL: fmul_select_f64_test9:
+; GFX1030: ; %bb.0:
+; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v4, v6
+; GFX1030-NEXT: v_mov_b32_e32 v8, 0
+; GFX1030-NEXT: v_cndmask_b32_e64 v11, 0xbff00000, -2.0, vcc_lo
+; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v5, v7
+; GFX1030-NEXT: v_mov_b32_e32 v10, v8
+; GFX1030-NEXT: v_cndmask_b32_e64 v9, 0xbff00000, -2.0, vcc_lo
+; GFX1030-NEXT: v_mul_f64 v[0:1], v[0:1], v[10:11]
+; GFX1030-NEXT: v_mul_f64 v[2:3], v[2:3], v[8:9]
+; GFX1030-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1100-LABEL: fmul_select_f64_test9:
+; GFX1100: ; %bb.0:
+; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v4, v6
+; GFX1100-NEXT: v_mov_b32_e32 v8, 0
+; GFX1100-NEXT: v_cndmask_b32_e64 v11, 0xbff00000, -2.0, vcc_lo
+; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v5, v7
+; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX1100-NEXT: v_mov_b32_e32 v10, v8
+; GFX1100-NEXT: v_cndmask_b32_e64 v9, 0xbff00000, -2.0, vcc_lo
+; GFX1100-NEXT: v_mul_f64 v[0:1], v[0:1], v[10:11]
+; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2)
+; GFX1100-NEXT: v_mul_f64 v[2:3], v[2:3], v[8:9]
+; GFX1100-NEXT: s_setpc_b64 s[30:31]
+ %bool = icmp eq <2 x i32> %bool.arg1, %bool.arg2
+ %1 = select <2 x i1> %bool, <2 x double> <double -2.000000e+00, double -2.000000e+00>, <2 x double> <double -1.000000e+00, double -1.000000e+00>
+ %ldexp = fmul <2 x double> %x, %1
+ ret <2 x double> %ldexp
+}
+
+define <2 x double> @fmul_select_f64_test10(<2 x double> %x, <2 x i32> %bool.arg1, <2 x i32> %bool.arg2) {
+; GFX9-LABEL: fmul_select_f64_test10:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_mov_b32_e32 v8, 0
+; GFX9-NEXT: v_mov_b32_e32 v9, 0xbff00000
+; GFX9-NEXT: v_mov_b32_e32 v10, 0x3fe00000
+; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v4, v6
+; GFX9-NEXT: v_mov_b32_e32 v11, 0x3ff00000
+; GFX9-NEXT: v_cndmask_b32_e32 v10, v9, v10, vcc
+; GFX9-NEXT: v_mov_b32_e32 v9, v8
+; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v5, v7
+; GFX9-NEXT: v_mul_f64 v[0:1], v[0:1], v[9:10]
+; GFX9-NEXT: v_cndmask_b32_e64 v9, v11, 2.0, vcc
+; GFX9-NEXT: v_mul_f64 v[2:3], v[2:3], v[8:9]
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1030-LABEL: fmul_select_f64_test10:
+; GFX1030: ; %bb.0:
+; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1030-NEXT: v_mov_b32_e32 v9, 0x3fe00000
+; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v4, v6
+; GFX1030-NEXT: v_mov_b32_e32 v8, 0
+; GFX1030-NEXT: v_cndmask_b32_e32 v11, 0xbff00000, v9, vcc_lo
+; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v5, v7
+; GFX1030-NEXT: v_mov_b32_e32 v10, v8
+; GFX1030-NEXT: v_cndmask_b32_e64 v9, 0x3ff00000, 2.0, vcc_lo
+; GFX1030-NEXT: v_mul_f64 v[0:1], v[0:1], v[10:11]
+; GFX1030-NEXT: v_mul_f64 v[2:3], v[2:3], v[8:9]
+; GFX1030-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1100-LABEL: fmul_select_f64_test10:
+; GFX1100: ; %bb.0:
+; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1100-NEXT: v_dual_mov_b32 v9, 0x3fe00000 :: v_dual_mov_b32 v8, 0
+; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v4, v6
+; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX1100-NEXT: v_dual_mov_b32 v10, v8 :: v_dual_cndmask_b32 v11, 0xbff00000, v9
+; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v5, v7
+; GFX1100-NEXT: v_mul_f64 v[0:1], v[0:1], v[10:11]
+; GFX1100-NEXT: v_cndmask_b32_e64 v9, 0x3ff00000, 2.0, vcc_lo
+; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1100-NEXT: v_mul_f64 v[2:3], v[2:3], v[8:9]
+; GFX1100-NEXT: s_setpc_b64 s[30:31]
+ %bool = icmp eq <2 x i32> %bool.arg1, %bool.arg2
+ %1 = select <2 x i1> %bool, <2 x double> <double 0.500000e+00, double 2.000000e+00>, <2 x double> <double -1.000000e+00, double 1.000000e+00>
+ %ldexp = fmul <2 x double> %x, %1
+ ret <2 x double> %ldexp
+}
+
+define half @fmul_select_f16_test1(half %x, i32 %bool.arg1, i32 %bool.arg2) {
+; GFX9-LABEL: fmul_select_f16_test1:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_mov_b32_e32 v3, 0x3c00
+; GFX9-NEXT: v_mov_b32_e32 v4, 0x4000
+; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
+; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc
+; GFX9-NEXT: v_mul_f16_e32 v0, v0, v1
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1030-LABEL: fmul_select_f16_test1:
+; GFX1030: ; %bb.0:
+; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1030-NEXT: v_mov_b32_e32 v3, 0x4000
+; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
+; GFX1030-NEXT: v_cndmask_b32_e32 v1, 0x3c00, v3, vcc_lo
+; GFX1030-NEXT: v_mul_f16_e32 v0, v0, v1
+; GFX1030-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1100-LABEL: fmul_select_f16_test1:
+; GFX1100: ; %bb.0:
+; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1100-NEXT: v_mov_b32_e32 v3, 0x4000
+; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
+; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-NEXT: v_cndmask_b32_e32 v1, 0x3c00, v3, vcc_lo
+; GFX1100-NEXT: v_mul_f16_e32 v0, v0, v1
+; GFX1100-NEXT: s_setpc_b64 s[30:31]
+ %bool = icmp eq i32 %bool.arg1, %bool.arg2
+ %1 = select i1 %bool, half 2.000000e+00, half 1.000000e+00
+ %ldexp = fmul half %x, %1
+ ret half %ldexp
+}
+
+define half @fmul_select_f16_test2(half %x, i32 %bool.arg1, i32 %bool.arg2) {
+; GFX9-LABEL: fmul_select_f16_test2:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_mov_b32_e32 v3, 0x3c00
+; GFX9-NEXT: v_mov_b32_e32 v4, 0x3800
+; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
+; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc
+; GFX9-NEXT: v_mul_f16_e32 v0, v0, v1
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1030-LABEL: fmul_select_f16_test2:
+; GFX1030: ; %bb.0:
+; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1030-NEXT: v_mov_b32_e32 v3, 0x3800
+; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
+; GFX1030-NEXT: v_cndmask_b32_e32 v1, 0x3c00, v3, vcc_lo
+; GFX1030-NEXT: v_mul_f16_e32 v0, v0, v1
+; GFX1030-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1100-LABEL: fmul_select_f16_test2:
+; GFX1100: ; %bb.0:
+; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1100-NEXT: v_mov_b32_e32 v3, 0x3800
+; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
+; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-NEXT: v_cndmask_b32_e32 v1, 0x3c00, v3, vcc_lo
+; GFX1100-NEXT: v_mul_f16_e32 v0, v0, v1
+; GFX1100-NEXT: s_setpc_b64 s[30:31]
+ %bool = icmp eq i32 %bool.arg1, %bool.arg2
+ %1 = select i1 %bool, half 0.500000e+00, half 1.000000e+00
+ %ldexp = fmul half %x, %1
+ ret half %ldexp
+}
+
+define <2 x half> @fmul_select_f16_test3(<2 x half> %x, <2 x i32> %bool.arg1, <2 x i32> %bool.arg2) {
+; GFX9-LABEL: fmul_select_f16_test3:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_mov_b32_e32 v5, 0x3c00
+; GFX9-NEXT: v_mov_b32_e32 v6, 0x4000
+; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v2, v4
+; GFX9-NEXT: v_cndmask_b32_e32 v2, v5, v6, vcc
+; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v1, v3
+; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v6, vcc
+; GFX9-NEXT: v_pack_b32_f16 v1, v1, v2
+; GFX9-NEXT: v_pk_mul_f16 v0, v0, v1
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1030-LABEL: fmul_select_f16_test3:
+; GFX1030: ; %bb.0:
+; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1030-NEXT: v_mov_b32_e32 v5, 0x4000
+; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v4
+; GFX1030-NEXT: v_cndmask_b32_e32 v2, 0x3c00, v5, vcc_lo
+; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v3
+; GFX1030-NEXT: v_cndmask_b32_e32 v1, 0x3c00, v5, vcc_lo
+; GFX1030-NEXT: v_pack_b32_f16 v1, v1, v2
+; GFX1030-NEXT: v_pk_mul_f16 v0, v0, v1
+; GFX1030-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1100-LABEL: fmul_select_f16_test3:
+; GFX1100: ; %bb.0:
+; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1100-NEXT: v_mov_b32_e32 v5, 0x4000
+; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v4
+; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
+; GFX1100-NEXT: v_cndmask_b32_e32 v2, 0x3c00, v5, vcc_lo
+; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v3
+; GFX1100-NEXT: v_cndmask_b32_e32 v1, 0x3c00, v5, vcc_lo
+; GFX1100-NEXT: v_pack_b32_f16 v1, v1, v2
+; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1100-NEXT: v_pk_mul_f16 v0, v0, v1
+; GFX1100-NEXT: s_setpc_b64 s[30:31]
+ %bool = icmp eq <2 x i32> %bool.arg1, %bool.arg2
+ %1 = select <2 x i1> %bool, <2 x half> <half 2.000000e+00, half 2.000000e+00>, <2 x half> <half 1.000000e+00, half 1.000000e+00>
+ %ldexp = fmul <2 x half> %x, %1
+ ret <2 x half> %ldexp
+}
+
+define <2 x half> @fmul_select_f16_test4(<2 x half> %x, <2 x i32> %bool.arg1, <2 x i32> %bool.arg2) {
+; GFX9-LABEL: fmul_select_f16_test4:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_mov_b32_e32 v5, 0x3c00
+; GFX9-NEXT: v_mov_b32_e32 v6, 0x3800
+; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v2, v4
+; GFX9-NEXT: v_cndmask_b32_e32 v2, v5, v6, vcc
+; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v1, v3
+; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v6, vcc
+; GFX9-NEXT: v_pack_b32_f16 v1, v1, v2
+; GFX9-NEXT: v_pk_mul_f16 v0, v0, v1
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1030-LABEL: fmul_select_f16_test4:
+; GFX1030: ; %bb.0:
+; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1030-NEXT: v_mov_b32_e32 v5, 0x3800
+; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v4
+; GFX1030-NEXT: v_cndmask_b32_e32 v2, 0x3c00, v5, vcc_lo
+; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v3
+; GFX1030-NEXT: v_cndmask_b32_e32 v1, 0x3c00, v5, vcc_lo
+; GFX1030-NEXT: v_pack_b32_f16 v1, v1, v2
+; GFX1030-NEXT: v_pk_mul_f16 v0, v0, v1
+; GFX1030-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1100-LABEL: fmul_select_f16_test4:
+; GFX1100: ; %bb.0:
+; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1100-NEXT: v_mov_b32_e32 v5, 0x3800
+; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v4
+; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
+; GFX1100-NEXT: v_cndmask_b32_e32 v2, 0x3c00, v5, vcc_lo
+; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v3
+; GFX1100-NEXT: v_cndmask_b32_e32 v1, 0x3c00, v5, vcc_lo
+; GFX1100-NEXT: v_pack_b32_f16 v1, v1, v2
+; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1100-NEXT: v_pk_mul_f16 v0, v0, v1
+; GFX1100-NEXT: s_setpc_b64 s[30:31]
+ %bool = icmp eq <2 x i32> %bool.arg1, %bool.arg2
+ %1 = select <2 x i1> %bool, <2 x half> <half 0.500000e+00, half 0.500000e+00>, <2 x half> <half 1.000000e+00, half 1.000000e+00>
+ %ldexp = fmul <2 x half> %x, %1
+ ret <2 x half> %ldexp
+}
>From 575d7ab36e265a9177e9318e953602cdbb6e1cfa Mon Sep 17 00:00:00 2001
From: vikashgu <Vikash.Gupta at amd.com>
Date: Tue, 8 Oct 2024 10:30:33 +0000
Subject: [PATCH 3/6] Refactored test to use named values.
---
.../CodeGen/AMDGPU/dagcombine-fmul-sel.ll | 80 +++++++++----------
1 file changed, 40 insertions(+), 40 deletions(-)
diff --git a/llvm/test/CodeGen/AMDGPU/dagcombine-fmul-sel.ll b/llvm/test/CodeGen/AMDGPU/dagcombine-fmul-sel.ll
index 62ba95b9f8a0c0..5405f83b7ab8c3 100644
--- a/llvm/test/CodeGen/AMDGPU/dagcombine-fmul-sel.ll
+++ b/llvm/test/CodeGen/AMDGPU/dagcombine-fmul-sel.ll
@@ -29,8 +29,8 @@ define float @fmul_select_f32_test1(float %x, i32 %bool.arg1, i32 %bool.arg2) {
; GFX1100-NEXT: v_mul_f32_e32 v0, v0, v1
; GFX1100-NEXT: s_setpc_b64 s[30:31]
%bool = icmp eq i32 %bool.arg1, %bool.arg2
- %1 = select i1 %bool, float 2.000000e+00, float 1.000000e+00
- %ldexp = fmul float %x, %1
+ %y = select i1 %bool, float 2.000000e+00, float 1.000000e+00
+ %ldexp = fmul float %x, %y
ret float %ldexp
}
@@ -60,8 +60,8 @@ define float @fmul_select_f32_test2(float %x, i32 %bool.arg1, i32 %bool.arg2) {
; GFX1100-NEXT: v_mul_f32_e32 v0, v0, v1
; GFX1100-NEXT: s_setpc_b64 s[30:31]
%bool = icmp eq i32 %bool.arg1, %bool.arg2
- %1 = select i1 %bool, float 0.500000e+00, float 1.000000e+00
- %ldexp = fmul float %x, %1
+ %y = select i1 %bool, float 5.000000e-01, float 1.000000e+00
+ %ldexp = fmul float %x, %y
ret float %ldexp
}
@@ -99,8 +99,8 @@ define <2 x float> @fmul_select_f32_test3(<2 x float> %x, <2 x i32> %bool.arg1,
; GFX1100-NEXT: v_dual_mul_f32 v0, v0, v2 :: v_dual_mul_f32 v1, v1, v3
; GFX1100-NEXT: s_setpc_b64 s[30:31]
%bool = icmp eq <2 x i32> %bool.arg1, %bool.arg2
- %1 = select <2 x i1> %bool, <2 x float> <float 2.000000e+00, float 2.000000e+00>, <2 x float> <float 1.000000e+00, float 1.000000e+00>
- %ldexp = fmul <2 x float> %x, %1
+ %y = select <2 x i1> %bool, <2 x float> <float 2.000000e+00, float 2.000000e+00>, <2 x float> <float 1.000000e+00, float 1.000000e+00>
+ %ldexp = fmul <2 x float> %x, %y
ret <2 x float> %ldexp
}
@@ -138,8 +138,8 @@ define <2 x float> @fmul_select_f32_test4(<2 x float> %x, <2 x i32> %bool.arg1,
; GFX1100-NEXT: v_dual_mul_f32 v0, v0, v2 :: v_dual_mul_f32 v1, v1, v3
; GFX1100-NEXT: s_setpc_b64 s[30:31]
%bool = icmp eq <2 x i32> %bool.arg1, %bool.arg2
- %1 = select <2 x i1> %bool, <2 x float> <float 0.500000e+00, float 0.500000e+00>, <2 x float> <float 1.000000e+00, float 1.000000e+00>
- %ldexp = fmul <2 x float> %x, %1
+ %y = select <2 x i1> %bool, <2 x float> <float 5.000000e-01, float 5.000000e-01>, <2 x float> <float 1.000000e+00, float 1.000000e+00>
+ %ldexp = fmul <2 x float> %x, %y
ret <2 x float> %ldexp
}
@@ -169,8 +169,8 @@ define float @fmul_select_f32_test5(float %x, i32 %bool.arg1, i32 %bool.arg2) {
; GFX1100-NEXT: v_mul_f32_e32 v0, v0, v1
; GFX1100-NEXT: s_setpc_b64 s[30:31]
%bool = icmp eq i32 %bool.arg1, %bool.arg2
- %1 = select i1 %bool, float -2.000000e+00, float -1.000000e+00
- %ldexp = fmul float %x, %1
+ %y = select i1 %bool, float -2.000000e+00, float -1.000000e+00
+ %ldexp = fmul float %x, %y
ret float %ldexp
}
@@ -204,8 +204,8 @@ define float @fmul_select_f32_test6(float %x, i32 %bool.arg1, i32 %bool.arg2) {
; GFX1100-NEXT: v_mul_f32_e32 v0, v0, v1
; GFX1100-NEXT: s_setpc_b64 s[30:31]
%bool = icmp eq i32 %bool.arg1, %bool.arg2
- %1 = select i1 %bool, float 3.000000e+00, float 8.000000e+00
- %ldexp = fmul float %x, %1
+ %y = select i1 %bool, float 3.000000e+00, float 8.000000e+00
+ %ldexp = fmul float %x, %y
ret float %ldexp
}
@@ -239,8 +239,8 @@ define double @fmul_select_f64_test1(double %x, i32 %bool.arg1, i32 %bool.arg2)
; GFX1100-NEXT: v_mul_f64 v[0:1], v[0:1], v[4:5]
; GFX1100-NEXT: s_setpc_b64 s[30:31]
%bool = icmp eq i32 %bool.arg1, %bool.arg2
- %1 = select i1 %bool, double 2.000000e+00, double 1.000000e+00
- %ldexp = fmul double %x, %1
+ %y = select i1 %bool, double 2.000000e+00, double 1.000000e+00
+ %ldexp = fmul double %x, %y
ret double %ldexp
}
@@ -276,8 +276,8 @@ define double @fmul_select_f64_test2(double %x, i32 %bool.arg1, i32 %bool.arg2)
; GFX1100-NEXT: v_mul_f64 v[0:1], v[0:1], v[4:5]
; GFX1100-NEXT: s_setpc_b64 s[30:31]
%bool = icmp eq i32 %bool.arg1, %bool.arg2
- %1 = select i1 %bool, double 0.500000e+00, double 1.000000e+00
- %ldexp = fmul double %x, %1
+ %y = select i1 %bool, double 5.000000e-01, double 1.000000e+00
+ %ldexp = fmul double %x, %y
ret double %ldexp
}
@@ -324,8 +324,8 @@ define <2 x double> @fmul_select_f64_test3(<2 x double> %x, <2 x i32> %bool.arg1
; GFX1100-NEXT: v_mul_f64 v[2:3], v[2:3], v[8:9]
; GFX1100-NEXT: s_setpc_b64 s[30:31]
%bool = icmp eq <2 x i32> %bool.arg1, %bool.arg2
- %1 = select <2 x i1> %bool, <2 x double> <double 2.000000e+00, double 2.000000e+00>, <2 x double> <double 1.000000e+00, double 1.000000e+00>
- %ldexp = fmul <2 x double> %x, %1
+ %y = select <2 x i1> %bool, <2 x double> <double 2.000000e+00, double 2.000000e+00>, <2 x double> <double 1.000000e+00, double 1.000000e+00>
+ %ldexp = fmul <2 x double> %x, %y
ret <2 x double> %ldexp
}
@@ -373,8 +373,8 @@ define <2 x double> @fmul_select_f64_test4(<2 x double> %x, <2 x i32> %bool.arg1
; GFX1100-NEXT: v_mul_f64 v[2:3], v[2:3], v[8:9]
; GFX1100-NEXT: s_setpc_b64 s[30:31]
%bool = icmp eq <2 x i32> %bool.arg1, %bool.arg2
- %1 = select <2 x i1> %bool, <2 x double> <double 0.500000e+00, double 0.500000e+00>, <2 x double> <double 1.000000e+00, double 1.000000e+00>
- %ldexp = fmul <2 x double> %x, %1
+ %y = select <2 x i1> %bool, <2 x double> <double 5.000000e-01, double 5.000000e-01>, <2 x double> <double 1.000000e+00, double 1.000000e+00>
+ %ldexp = fmul <2 x double> %x, %y
ret <2 x double> %ldexp
}
@@ -410,8 +410,8 @@ define double @fmul_select_f64_test5(double %x, i32 %bool.arg1, i32 %bool.arg2)
; GFX1100-NEXT: v_mul_f64 v[0:1], v[0:1], v[4:5]
; GFX1100-NEXT: s_setpc_b64 s[30:31]
%bool = icmp eq i32 %bool.arg1, %bool.arg2
- %1 = select i1 %bool, double -0.500000e+00, double -1.000000e+00
- %ldexp = fmul double %x, %1
+ %y = select i1 %bool, double -5.000000e-01, double -1.000000e+00
+ %ldexp = fmul double %x, %y
ret double %ldexp
}
@@ -445,8 +445,8 @@ define double @fmul_select_f64_test6(double %x, i32 %bool.arg1, i32 %bool.arg2)
; GFX1100-NEXT: v_mul_f64 v[0:1], v[0:1], v[4:5]
; GFX1100-NEXT: s_setpc_b64 s[30:31]
%bool = icmp eq i32 %bool.arg1, %bool.arg2
- %1 = select i1 %bool, double -2.000000e+00, double -1.000000e+00
- %ldexp = fmul double %x, %1
+ %y = select i1 %bool, double -2.000000e+00, double -1.000000e+00
+ %ldexp = fmul double %x, %y
ret double %ldexp
}
@@ -480,8 +480,8 @@ define double @fmul_select_f64_test7(double %x, i32 %bool.arg1, i32 %bool.arg2)
; GFX1100-NEXT: v_mul_f64 v[0:1], v[0:1], v[4:5]
; GFX1100-NEXT: s_setpc_b64 s[30:31]
%bool = icmp eq i32 %bool.arg1, %bool.arg2
- %1 = select i1 %bool, double 2.000000e+00, double -1.000000e+00
- %ldexp = fmul double %x, %1
+ %y = select i1 %bool, double 2.000000e+00, double -1.000000e+00
+ %ldexp = fmul double %x, %y
ret double %ldexp
}
@@ -517,8 +517,8 @@ define double @fmul_select_f64_test8(double %x, i32 %bool.arg1, i32 %bool.arg2)
; GFX1100-NEXT: v_mul_f64 v[0:1], v[0:1], v[4:5]
; GFX1100-NEXT: s_setpc_b64 s[30:31]
%bool = icmp eq i32 %bool.arg1, %bool.arg2
- %1 = select i1 %bool, double 4.000000e+00, double -1.000000e+00
- %ldexp = fmul double %x, %1
+ %y = select i1 %bool, double 4.000000e+00, double -1.000000e+00
+ %ldexp = fmul double %x, %y
ret double %ldexp
}
@@ -565,8 +565,8 @@ define <2 x double> @fmul_select_f64_test9(<2 x double> %x, <2 x i32> %bool.arg1
; GFX1100-NEXT: v_mul_f64 v[2:3], v[2:3], v[8:9]
; GFX1100-NEXT: s_setpc_b64 s[30:31]
%bool = icmp eq <2 x i32> %bool.arg1, %bool.arg2
- %1 = select <2 x i1> %bool, <2 x double> <double -2.000000e+00, double -2.000000e+00>, <2 x double> <double -1.000000e+00, double -1.000000e+00>
- %ldexp = fmul <2 x double> %x, %1
+ %y = select <2 x i1> %bool, <2 x double> <double -2.000000e+00, double -2.000000e+00>, <2 x double> <double -1.000000e+00, double -1.000000e+00>
+ %ldexp = fmul <2 x double> %x, %y
ret <2 x double> %ldexp
}
@@ -615,8 +615,8 @@ define <2 x double> @fmul_select_f64_test10(<2 x double> %x, <2 x i32> %bool.arg
; GFX1100-NEXT: v_mul_f64 v[2:3], v[2:3], v[8:9]
; GFX1100-NEXT: s_setpc_b64 s[30:31]
%bool = icmp eq <2 x i32> %bool.arg1, %bool.arg2
- %1 = select <2 x i1> %bool, <2 x double> <double 0.500000e+00, double 2.000000e+00>, <2 x double> <double -1.000000e+00, double 1.000000e+00>
- %ldexp = fmul <2 x double> %x, %1
+ %y = select <2 x i1> %bool, <2 x double> <double 5.000000e-01, double 2.000000e+00>, <2 x double> <double -1.000000e+00, double 1.000000e+00>
+ %ldexp = fmul <2 x double> %x, %y
ret <2 x double> %ldexp
}
@@ -650,8 +650,8 @@ define half @fmul_select_f16_test1(half %x, i32 %bool.arg1, i32 %bool.arg2) {
; GFX1100-NEXT: v_mul_f16_e32 v0, v0, v1
; GFX1100-NEXT: s_setpc_b64 s[30:31]
%bool = icmp eq i32 %bool.arg1, %bool.arg2
- %1 = select i1 %bool, half 2.000000e+00, half 1.000000e+00
- %ldexp = fmul half %x, %1
+ %y = select i1 %bool, half 2.000000e+00, half 1.000000e+00
+ %ldexp = fmul half %x, %y
ret half %ldexp
}
@@ -685,8 +685,8 @@ define half @fmul_select_f16_test2(half %x, i32 %bool.arg1, i32 %bool.arg2) {
; GFX1100-NEXT: v_mul_f16_e32 v0, v0, v1
; GFX1100-NEXT: s_setpc_b64 s[30:31]
%bool = icmp eq i32 %bool.arg1, %bool.arg2
- %1 = select i1 %bool, half 0.500000e+00, half 1.000000e+00
- %ldexp = fmul half %x, %1
+ %y = select i1 %bool, half 5.000000e-01, half 1.000000e+00
+ %ldexp = fmul half %x, %y
ret half %ldexp
}
@@ -730,8 +730,8 @@ define <2 x half> @fmul_select_f16_test3(<2 x half> %x, <2 x i32> %bool.arg1, <2
; GFX1100-NEXT: v_pk_mul_f16 v0, v0, v1
; GFX1100-NEXT: s_setpc_b64 s[30:31]
%bool = icmp eq <2 x i32> %bool.arg1, %bool.arg2
- %1 = select <2 x i1> %bool, <2 x half> <half 2.000000e+00, half 2.000000e+00>, <2 x half> <half 1.000000e+00, half 1.000000e+00>
- %ldexp = fmul <2 x half> %x, %1
+ %y = select <2 x i1> %bool, <2 x half> <half 2.000000e+00, half 2.000000e+00>, <2 x half> <half 1.000000e+00, half 1.000000e+00>
+ %ldexp = fmul <2 x half> %x, %y
ret <2 x half> %ldexp
}
@@ -775,7 +775,7 @@ define <2 x half> @fmul_select_f16_test4(<2 x half> %x, <2 x i32> %bool.arg1, <2
; GFX1100-NEXT: v_pk_mul_f16 v0, v0, v1
; GFX1100-NEXT: s_setpc_b64 s[30:31]
%bool = icmp eq <2 x i32> %bool.arg1, %bool.arg2
- %1 = select <2 x i1> %bool, <2 x half> <half 0.500000e+00, half 0.500000e+00>, <2 x half> <half 1.000000e+00, half 1.000000e+00>
- %ldexp = fmul <2 x half> %x, %1
+ %y = select <2 x i1> %bool, <2 x half> <half 5.000000e-01, half 5.000000e-01>, <2 x half> <half 1.000000e+00, half 1.000000e+00>
+ %ldexp = fmul <2 x half> %x, %y
ret <2 x half> %ldexp
}
>From 5821f45c460acf461e6a1ef8ae49a0cda46a7604 Mon Sep 17 00:00:00 2001
From: vikashgu <Vikash.Gupta at amd.com>
Date: Thu, 10 Oct 2024 06:54:48 +0000
Subject: [PATCH 4/6] Added -0/0 test cases for all datatypes, along with
mismatched signs cases.
---
.../CodeGen/AMDGPU/dagcombine-fmul-sel.ll | 496 ++++++++++++++++--
1 file changed, 453 insertions(+), 43 deletions(-)
diff --git a/llvm/test/CodeGen/AMDGPU/dagcombine-fmul-sel.ll b/llvm/test/CodeGen/AMDGPU/dagcombine-fmul-sel.ll
index 5405f83b7ab8c3..9d3d435611376d 100644
--- a/llvm/test/CodeGen/AMDGPU/dagcombine-fmul-sel.ll
+++ b/llvm/test/CodeGen/AMDGPU/dagcombine-fmul-sel.ll
@@ -65,8 +65,8 @@ define float @fmul_select_f32_test2(float %x, i32 %bool.arg1, i32 %bool.arg2) {
ret float %ldexp
}
-define <2 x float> @fmul_select_f32_test3(<2 x float> %x, <2 x i32> %bool.arg1, <2 x i32> %bool.arg2) {
-; GFX9-LABEL: fmul_select_f32_test3:
+define <2 x float> @fmul_select_v2f32_test3(<2 x float> %x, <2 x i32> %bool.arg1, <2 x i32> %bool.arg2) {
+; GFX9-LABEL: fmul_select_v2f32_test3:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5
@@ -77,7 +77,7 @@ define <2 x float> @fmul_select_f32_test3(<2 x float> %x, <2 x i32> %bool.arg1,
; GFX9-NEXT: v_mul_f32_e32 v1, v1, v3
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1030-LABEL: fmul_select_f32_test3:
+; GFX1030-LABEL: fmul_select_v2f32_test3:
; GFX1030: ; %bb.0:
; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v4
@@ -88,7 +88,7 @@ define <2 x float> @fmul_select_f32_test3(<2 x float> %x, <2 x i32> %bool.arg1,
; GFX1030-NEXT: v_mul_f32_e32 v1, v1, v3
; GFX1030-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1100-LABEL: fmul_select_f32_test3:
+; GFX1100-LABEL: fmul_select_v2f32_test3:
; GFX1100: ; %bb.0:
; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v4
@@ -104,8 +104,8 @@ define <2 x float> @fmul_select_f32_test3(<2 x float> %x, <2 x i32> %bool.arg1,
ret <2 x float> %ldexp
}
-define <2 x float> @fmul_select_f32_test4(<2 x float> %x, <2 x i32> %bool.arg1, <2 x i32> %bool.arg2) {
-; GFX9-LABEL: fmul_select_f32_test4:
+define <2 x float> @fmul_select_v2f32_test4(<2 x float> %x, <2 x i32> %bool.arg1, <2 x i32> %bool.arg2) {
+; GFX9-LABEL: fmul_select_v2f32_test4:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5
@@ -116,7 +116,7 @@ define <2 x float> @fmul_select_f32_test4(<2 x float> %x, <2 x i32> %bool.arg1,
; GFX9-NEXT: v_mul_f32_e32 v1, v1, v3
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1030-LABEL: fmul_select_f32_test4:
+; GFX1030-LABEL: fmul_select_v2f32_test4:
; GFX1030: ; %bb.0:
; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v4
@@ -127,7 +127,7 @@ define <2 x float> @fmul_select_f32_test4(<2 x float> %x, <2 x i32> %bool.arg1,
; GFX1030-NEXT: v_mul_f32_e32 v1, v1, v3
; GFX1030-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1100-LABEL: fmul_select_f32_test4:
+; GFX1100-LABEL: fmul_select_v2f32_test4:
; GFX1100: ; %bb.0:
; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v4
@@ -179,7 +179,7 @@ define float @fmul_select_f32_test6(float %x, i32 %bool.arg1, i32 %bool.arg2) {
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v3, 0x41000000
-; GFX9-NEXT: v_mov_b32_e32 v4, 0x40400000
+; GFX9-NEXT: v_mov_b32_e32 v4, 0xc0400000
; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc
; GFX9-NEXT: v_mul_f32_e32 v0, v0, v1
@@ -188,7 +188,7 @@ define float @fmul_select_f32_test6(float %x, i32 %bool.arg1, i32 %bool.arg2) {
; GFX1030-LABEL: fmul_select_f32_test6:
; GFX1030: ; %bb.0:
; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1030-NEXT: v_mov_b32_e32 v3, 0x40400000
+; GFX1030-NEXT: v_mov_b32_e32 v3, 0xc0400000
; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
; GFX1030-NEXT: v_cndmask_b32_e32 v1, 0x41000000, v3, vcc_lo
; GFX1030-NEXT: v_mul_f32_e32 v0, v0, v1
@@ -197,18 +197,150 @@ define float @fmul_select_f32_test6(float %x, i32 %bool.arg1, i32 %bool.arg2) {
; GFX1100-LABEL: fmul_select_f32_test6:
; GFX1100: ; %bb.0:
; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-NEXT: v_mov_b32_e32 v3, 0x40400000
+; GFX1100-NEXT: v_mov_b32_e32 v3, 0xc0400000
; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1100-NEXT: v_cndmask_b32_e32 v1, 0x41000000, v3, vcc_lo
; GFX1100-NEXT: v_mul_f32_e32 v0, v0, v1
; GFX1100-NEXT: s_setpc_b64 s[30:31]
%bool = icmp eq i32 %bool.arg1, %bool.arg2
- %y = select i1 %bool, float 3.000000e+00, float 8.000000e+00
+ %y = select i1 %bool, float -3.000000e+00, float 8.000000e+00
%ldexp = fmul float %x, %y
ret float %ldexp
}
+define float @fmul_select_f32_test7(float %x, i32 %bool.arg1, i32 %bool.arg2) {
+; GFX9-LABEL: fmul_select_f32_test7:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_mov_b32_e32 v3, 0x41000000
+; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
+; GFX9-NEXT: v_cndmask_b32_e64 v1, v3, 4.0, vcc
+; GFX9-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1030-LABEL: fmul_select_f32_test7:
+; GFX1030: ; %bb.0:
+; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
+; GFX1030-NEXT: v_cndmask_b32_e64 v1, 0x41000000, 4.0, vcc_lo
+; GFX1030-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX1030-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1100-LABEL: fmul_select_f32_test7:
+; GFX1100: ; %bb.0:
+; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
+; GFX1100-NEXT: v_cndmask_b32_e64 v1, 0x41000000, 4.0, vcc_lo
+; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1100-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX1100-NEXT: s_setpc_b64 s[30:31]
+ %bool = icmp eq i32 %bool.arg1, %bool.arg2
+ %y = select i1 %bool, float 4.000000e+00, float 8.000000e+00
+ %ldexp = fmul float %x, %y
+ ret float %ldexp
+}
+
+define float @fmul_select_f32_test8(float %x, i32 %bool.arg1, i32 %bool.arg2) {
+; GFX9-LABEL: fmul_select_f32_test8:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_mov_b32_e32 v3, 0xc1000000
+; GFX9-NEXT: v_mov_b32_e32 v4, 0x41800000
+; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
+; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc
+; GFX9-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1030-LABEL: fmul_select_f32_test8:
+; GFX1030: ; %bb.0:
+; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1030-NEXT: v_mov_b32_e32 v3, 0x41800000
+; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
+; GFX1030-NEXT: v_cndmask_b32_e32 v1, 0xc1000000, v3, vcc_lo
+; GFX1030-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX1030-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1100-LABEL: fmul_select_f32_test8:
+; GFX1100: ; %bb.0:
+; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1100-NEXT: v_mov_b32_e32 v3, 0x41800000
+; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
+; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-NEXT: v_cndmask_b32_e32 v1, 0xc1000000, v3, vcc_lo
+; GFX1100-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX1100-NEXT: s_setpc_b64 s[30:31]
+ %bool = icmp eq i32 %bool.arg1, %bool.arg2
+ %y = select i1 %bool, float 1.600000e+01, float -8.000000e+00
+ %ldexp = fmul float %x, %y
+ ret float %ldexp
+}
+
+define float @fmul_select_f32_test9(float %x, i32 %bool.arg1, i32 %bool.arg2) {
+; GFX9-LABEL: fmul_select_f32_test9:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
+; GFX9-NEXT: v_cndmask_b32_e64 v1, 2.0, 0, vcc
+; GFX9-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1030-LABEL: fmul_select_f32_test9:
+; GFX1030: ; %bb.0:
+; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
+; GFX1030-NEXT: v_cndmask_b32_e64 v1, 2.0, 0, vcc_lo
+; GFX1030-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX1030-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1100-LABEL: fmul_select_f32_test9:
+; GFX1100: ; %bb.0:
+; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
+; GFX1100-NEXT: v_cndmask_b32_e64 v1, 2.0, 0, vcc_lo
+; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1100-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX1100-NEXT: s_setpc_b64 s[30:31]
+ %bool = icmp eq i32 %bool.arg1, %bool.arg2
+ %y = select i1 %bool, float 0.000000e+00, float 2.000000e+00
+ %ldexp = fmul float %x, %y
+ ret float %ldexp
+}
+
+define float @fmul_select_f32_test10(float %x, i32 %bool.arg1, i32 %bool.arg2) {
+; GFX9-LABEL: fmul_select_f32_test10:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_bfrev_b32_e32 v3, 1
+; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
+; GFX9-NEXT: v_cndmask_b32_e32 v1, 0, v3, vcc
+; GFX9-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1030-LABEL: fmul_select_f32_test10:
+; GFX1030: ; %bb.0:
+; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
+; GFX1030-NEXT: v_cndmask_b32_e64 v1, 0, 0x80000000, vcc_lo
+; GFX1030-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX1030-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1100-LABEL: fmul_select_f32_test10:
+; GFX1100: ; %bb.0:
+; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
+; GFX1100-NEXT: v_cndmask_b32_e64 v1, 0, 0x80000000, vcc_lo
+; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1100-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX1100-NEXT: s_setpc_b64 s[30:31]
+ %bool = icmp eq i32 %bool.arg1, %bool.arg2
+ %y = select i1 %bool, float -0.000000e+00, float 0.000000e+00
+ %ldexp = fmul float %x, %y
+ ret float %ldexp
+}
+
+
+
define double @fmul_select_f64_test1(double %x, i32 %bool.arg1, i32 %bool.arg2) {
; GFX9-LABEL: fmul_select_f64_test1:
; GFX9: ; %bb.0:
@@ -281,8 +413,8 @@ define double @fmul_select_f64_test2(double %x, i32 %bool.arg1, i32 %bool.arg2)
ret double %ldexp
}
-define <2 x double> @fmul_select_f64_test3(<2 x double> %x, <2 x i32> %bool.arg1, <2 x i32> %bool.arg2) {
-; GFX9-LABEL: fmul_select_f64_test3:
+define <2 x double> @fmul_select_v2f64_test3(<2 x double> %x, <2 x i32> %bool.arg1, <2 x i32> %bool.arg2) {
+; GFX9-LABEL: fmul_select_v2f64_test3:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v11, 0x3ff00000
@@ -296,7 +428,7 @@ define <2 x double> @fmul_select_f64_test3(<2 x double> %x, <2 x i32> %bool.arg1
; GFX9-NEXT: v_mul_f64 v[2:3], v[2:3], v[8:9]
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1030-LABEL: fmul_select_f64_test3:
+; GFX1030-LABEL: fmul_select_v2f64_test3:
; GFX1030: ; %bb.0:
; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v4, v6
@@ -309,7 +441,7 @@ define <2 x double> @fmul_select_f64_test3(<2 x double> %x, <2 x i32> %bool.arg1
; GFX1030-NEXT: v_mul_f64 v[2:3], v[2:3], v[8:9]
; GFX1030-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1100-LABEL: fmul_select_f64_test3:
+; GFX1100-LABEL: fmul_select_v2f64_test3:
; GFX1100: ; %bb.0:
; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v4, v6
@@ -329,8 +461,8 @@ define <2 x double> @fmul_select_f64_test3(<2 x double> %x, <2 x i32> %bool.arg1
ret <2 x double> %ldexp
}
-define <2 x double> @fmul_select_f64_test4(<2 x double> %x, <2 x i32> %bool.arg1, <2 x i32> %bool.arg2) {
-; GFX9-LABEL: fmul_select_f64_test4:
+define <2 x double> @fmul_select_v2f64_test4(<2 x double> %x, <2 x i32> %bool.arg1, <2 x i32> %bool.arg2) {
+; GFX9-LABEL: fmul_select_v2f64_test4:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v11, 0x3ff00000
@@ -345,7 +477,7 @@ define <2 x double> @fmul_select_f64_test4(<2 x double> %x, <2 x i32> %bool.arg1
; GFX9-NEXT: v_mul_f64 v[2:3], v[2:3], v[8:9]
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1030-LABEL: fmul_select_f64_test4:
+; GFX1030-LABEL: fmul_select_v2f64_test4:
; GFX1030: ; %bb.0:
; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1030-NEXT: v_mov_b32_e32 v9, 0x3fe00000
@@ -359,7 +491,7 @@ define <2 x double> @fmul_select_f64_test4(<2 x double> %x, <2 x i32> %bool.arg1
; GFX1030-NEXT: v_mul_f64 v[2:3], v[2:3], v[8:9]
; GFX1030-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1100-LABEL: fmul_select_f64_test4:
+; GFX1100-LABEL: fmul_select_v2f64_test4:
; GFX1100: ; %bb.0:
; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-NEXT: v_dual_mov_b32 v9, 0x3fe00000 :: v_dual_mov_b32 v8, 0
@@ -489,8 +621,8 @@ define double @fmul_select_f64_test8(double %x, i32 %bool.arg1, i32 %bool.arg2)
; GFX9-LABEL: fmul_select_f64_test8:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_mov_b32_e32 v4, 0xbff00000
-; GFX9-NEXT: v_mov_b32_e32 v5, 0x40100000
+; GFX9-NEXT: v_mov_b32_e32 v4, 0xc0400000
+; GFX9-NEXT: v_mov_b32_e32 v5, 0xc0100000
; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3
; GFX9-NEXT: v_cndmask_b32_e32 v3, v4, v5, vcc
; GFX9-NEXT: v_mov_b32_e32 v2, 0
@@ -500,30 +632,30 @@ define double @fmul_select_f64_test8(double %x, i32 %bool.arg1, i32 %bool.arg2)
; GFX1030-LABEL: fmul_select_f64_test8:
; GFX1030: ; %bb.0:
; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1030-NEXT: v_mov_b32_e32 v5, 0x40100000
+; GFX1030-NEXT: v_mov_b32_e32 v5, 0xc0100000
; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v3
; GFX1030-NEXT: v_mov_b32_e32 v4, 0
-; GFX1030-NEXT: v_cndmask_b32_e32 v5, 0xbff00000, v5, vcc_lo
+; GFX1030-NEXT: v_cndmask_b32_e32 v5, 0xc0400000, v5, vcc_lo
; GFX1030-NEXT: v_mul_f64 v[0:1], v[0:1], v[4:5]
; GFX1030-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-LABEL: fmul_select_f64_test8:
; GFX1100: ; %bb.0:
; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-NEXT: v_dual_mov_b32 v5, 0x40100000 :: v_dual_mov_b32 v4, 0
+; GFX1100-NEXT: v_dual_mov_b32 v5, 0xc0100000 :: v_dual_mov_b32 v4, 0
; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v3
; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1100-NEXT: v_cndmask_b32_e32 v5, 0xbff00000, v5, vcc_lo
+; GFX1100-NEXT: v_cndmask_b32_e32 v5, 0xc0400000, v5, vcc_lo
; GFX1100-NEXT: v_mul_f64 v[0:1], v[0:1], v[4:5]
; GFX1100-NEXT: s_setpc_b64 s[30:31]
%bool = icmp eq i32 %bool.arg1, %bool.arg2
- %y = select i1 %bool, double 4.000000e+00, double -1.000000e+00
+ %y = select i1 %bool, double -4.000000e+00, double -3.200000e+01
%ldexp = fmul double %x, %y
ret double %ldexp
}
-define <2 x double> @fmul_select_f64_test9(<2 x double> %x, <2 x i32> %bool.arg1, <2 x i32> %bool.arg2) {
-; GFX9-LABEL: fmul_select_f64_test9:
+define <2 x double> @fmul_select_v2f64_test9(<2 x double> %x, <2 x i32> %bool.arg1, <2 x i32> %bool.arg2) {
+; GFX9-LABEL: fmul_select_v2f64_test9:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v11, 0xbff00000
@@ -537,7 +669,7 @@ define <2 x double> @fmul_select_f64_test9(<2 x double> %x, <2 x i32> %bool.arg1
; GFX9-NEXT: v_mul_f64 v[2:3], v[2:3], v[8:9]
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1030-LABEL: fmul_select_f64_test9:
+; GFX1030-LABEL: fmul_select_v2f64_test9:
; GFX1030: ; %bb.0:
; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v4, v6
@@ -550,7 +682,7 @@ define <2 x double> @fmul_select_f64_test9(<2 x double> %x, <2 x i32> %bool.arg1
; GFX1030-NEXT: v_mul_f64 v[2:3], v[2:3], v[8:9]
; GFX1030-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1100-LABEL: fmul_select_f64_test9:
+; GFX1100-LABEL: fmul_select_v2f64_test9:
; GFX1100: ; %bb.0:
; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v4, v6
@@ -570,8 +702,8 @@ define <2 x double> @fmul_select_f64_test9(<2 x double> %x, <2 x i32> %bool.arg1
ret <2 x double> %ldexp
}
-define <2 x double> @fmul_select_f64_test10(<2 x double> %x, <2 x i32> %bool.arg1, <2 x i32> %bool.arg2) {
-; GFX9-LABEL: fmul_select_f64_test10:
+define <2 x double> @fmul_select_v2f64_test10(<2 x double> %x, <2 x i32> %bool.arg1, <2 x i32> %bool.arg2) {
+; GFX9-LABEL: fmul_select_v2f64_test10:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v8, 0
@@ -587,7 +719,7 @@ define <2 x double> @fmul_select_f64_test10(<2 x double> %x, <2 x i32> %bool.arg
; GFX9-NEXT: v_mul_f64 v[2:3], v[2:3], v[8:9]
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1030-LABEL: fmul_select_f64_test10:
+; GFX1030-LABEL: fmul_select_v2f64_test10:
; GFX1030: ; %bb.0:
; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1030-NEXT: v_mov_b32_e32 v9, 0x3fe00000
@@ -601,7 +733,7 @@ define <2 x double> @fmul_select_f64_test10(<2 x double> %x, <2 x i32> %bool.arg
; GFX1030-NEXT: v_mul_f64 v[2:3], v[2:3], v[8:9]
; GFX1030-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1100-LABEL: fmul_select_f64_test10:
+; GFX1100-LABEL: fmul_select_v2f64_test10:
; GFX1100: ; %bb.0:
; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-NEXT: v_dual_mov_b32 v9, 0x3fe00000 :: v_dual_mov_b32 v8, 0
@@ -620,6 +752,112 @@ define <2 x double> @fmul_select_f64_test10(<2 x double> %x, <2 x i32> %bool.arg
ret <2 x double> %ldexp
}
+define double @fmul_select_f64_test11(double %x, i32 %bool.arg1, i32 %bool.arg2) {
+; GFX9-LABEL: fmul_select_f64_test11:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_bfrev_b32_e32 v4, 1
+; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3
+; GFX9-NEXT: v_cndmask_b32_e64 v3, v4, -2.0, vcc
+; GFX9-NEXT: v_mov_b32_e32 v2, 0
+; GFX9-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3]
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1030-LABEL: fmul_select_f64_test11:
+; GFX1030: ; %bb.0:
+; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v3
+; GFX1030-NEXT: v_mov_b32_e32 v4, 0
+; GFX1030-NEXT: v_cndmask_b32_e64 v5, 0x80000000, -2.0, vcc_lo
+; GFX1030-NEXT: v_mul_f64 v[0:1], v[0:1], v[4:5]
+; GFX1030-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1100-LABEL: fmul_select_f64_test11:
+; GFX1100: ; %bb.0:
+; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v3
+; GFX1100-NEXT: v_mov_b32_e32 v4, 0
+; GFX1100-NEXT: v_cndmask_b32_e64 v5, 0x80000000, -2.0, vcc_lo
+; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1100-NEXT: v_mul_f64 v[0:1], v[0:1], v[4:5]
+; GFX1100-NEXT: s_setpc_b64 s[30:31]
+ %bool = icmp eq i32 %bool.arg1, %bool.arg2
+ %y = select i1 %bool, double -2.000000e+00, double -0.000000e+00
+ %ldexp = fmul double %x, %y
+ ret double %ldexp
+}
+
+define double @fmul_select_f64_test12(double %x, i32 %bool.arg1, i32 %bool.arg2) {
+; GFX9-LABEL: fmul_select_f64_test12:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, v2, v3
+; GFX9-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
+; GFX9-NEXT: v_lshlrev_b32_e32 v3, 31, v2
+; GFX9-NEXT: v_mov_b32_e32 v2, 0
+; GFX9-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3]
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1030-LABEL: fmul_select_f64_test12:
+; GFX1030: ; %bb.0:
+; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1030-NEXT: v_cmp_ne_u32_e32 vcc_lo, v2, v3
+; GFX1030-NEXT: v_mov_b32_e32 v2, 0
+; GFX1030-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc_lo
+; GFX1030-NEXT: v_lshlrev_b32_e32 v3, 31, v3
+; GFX1030-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3]
+; GFX1030-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1100-LABEL: fmul_select_f64_test12:
+; GFX1100: ; %bb.0:
+; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1100-NEXT: v_cmp_ne_u32_e32 vcc_lo, v2, v3
+; GFX1100-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc_lo
+; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_lshlrev_b32 v3, 31, v3
+; GFX1100-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3]
+; GFX1100-NEXT: s_setpc_b64 s[30:31]
+ %bool = icmp eq i32 %bool.arg1, %bool.arg2
+ %y = select i1 %bool, double 0.000000e+00, double -0.000000e+00
+ %ldexp = fmul double %x, %y
+ ret double %ldexp
+}
+
+define double @fmul_select_f64_test13(double %x, i32 %bool.arg1, i32 %bool.arg2) {
+; GFX9-LABEL: fmul_select_f64_test13:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_mov_b32_e32 v5, 0x40300000
+; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3
+; GFX9-NEXT: v_mov_b32_e32 v4, 0
+; GFX9-NEXT: v_cndmask_b32_e64 v5, v5, 0, vcc
+; GFX9-NEXT: v_mul_f64 v[0:1], v[0:1], v[4:5]
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1030-LABEL: fmul_select_f64_test13:
+; GFX1030: ; %bb.0:
+; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v3
+; GFX1030-NEXT: v_mov_b32_e32 v4, 0
+; GFX1030-NEXT: v_cndmask_b32_e64 v5, 0x40300000, 0, vcc_lo
+; GFX1030-NEXT: v_mul_f64 v[0:1], v[0:1], v[4:5]
+; GFX1030-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1100-LABEL: fmul_select_f64_test13:
+; GFX1100: ; %bb.0:
+; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v3
+; GFX1100-NEXT: v_mov_b32_e32 v4, 0
+; GFX1100-NEXT: v_cndmask_b32_e64 v5, 0x40300000, 0, vcc_lo
+; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1100-NEXT: v_mul_f64 v[0:1], v[0:1], v[4:5]
+; GFX1100-NEXT: s_setpc_b64 s[30:31]
+ %bool = icmp eq i32 %bool.arg1, %bool.arg2
+ %y = select i1 %bool, double 0.000000e+00, double 1.600000e+01
+ %ldexp = fmul double %x, %y
+ ret double %ldexp
+}
+
define half @fmul_select_f16_test1(half %x, i32 %bool.arg1, i32 %bool.arg2) {
; GFX9-LABEL: fmul_select_f16_test1:
; GFX9: ; %bb.0:
@@ -690,8 +928,8 @@ define half @fmul_select_f16_test2(half %x, i32 %bool.arg1, i32 %bool.arg2) {
ret half %ldexp
}
-define <2 x half> @fmul_select_f16_test3(<2 x half> %x, <2 x i32> %bool.arg1, <2 x i32> %bool.arg2) {
-; GFX9-LABEL: fmul_select_f16_test3:
+define <2 x half> @fmul_select_v2f16_test3(<2 x half> %x, <2 x i32> %bool.arg1, <2 x i32> %bool.arg2) {
+; GFX9-LABEL: fmul_select_v2f16_test3:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v5, 0x3c00
@@ -704,7 +942,7 @@ define <2 x half> @fmul_select_f16_test3(<2 x half> %x, <2 x i32> %bool.arg1, <2
; GFX9-NEXT: v_pk_mul_f16 v0, v0, v1
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1030-LABEL: fmul_select_f16_test3:
+; GFX1030-LABEL: fmul_select_v2f16_test3:
; GFX1030: ; %bb.0:
; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1030-NEXT: v_mov_b32_e32 v5, 0x4000
@@ -716,7 +954,7 @@ define <2 x half> @fmul_select_f16_test3(<2 x half> %x, <2 x i32> %bool.arg1, <2
; GFX1030-NEXT: v_pk_mul_f16 v0, v0, v1
; GFX1030-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1100-LABEL: fmul_select_f16_test3:
+; GFX1100-LABEL: fmul_select_v2f16_test3:
; GFX1100: ; %bb.0:
; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-NEXT: v_mov_b32_e32 v5, 0x4000
@@ -735,8 +973,8 @@ define <2 x half> @fmul_select_f16_test3(<2 x half> %x, <2 x i32> %bool.arg1, <2
ret <2 x half> %ldexp
}
-define <2 x half> @fmul_select_f16_test4(<2 x half> %x, <2 x i32> %bool.arg1, <2 x i32> %bool.arg2) {
-; GFX9-LABEL: fmul_select_f16_test4:
+define <2 x half> @fmul_select_v2f16_test4(<2 x half> %x, <2 x i32> %bool.arg1, <2 x i32> %bool.arg2) {
+; GFX9-LABEL: fmul_select_v2f16_test4:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v5, 0x3c00
@@ -749,7 +987,7 @@ define <2 x half> @fmul_select_f16_test4(<2 x half> %x, <2 x i32> %bool.arg1, <2
; GFX9-NEXT: v_pk_mul_f16 v0, v0, v1
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1030-LABEL: fmul_select_f16_test4:
+; GFX1030-LABEL: fmul_select_v2f16_test4:
; GFX1030: ; %bb.0:
; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1030-NEXT: v_mov_b32_e32 v5, 0x3800
@@ -761,7 +999,7 @@ define <2 x half> @fmul_select_f16_test4(<2 x half> %x, <2 x i32> %bool.arg1, <2
; GFX1030-NEXT: v_pk_mul_f16 v0, v0, v1
; GFX1030-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1100-LABEL: fmul_select_f16_test4:
+; GFX1100-LABEL: fmul_select_v2f16_test4:
; GFX1100: ; %bb.0:
; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-NEXT: v_mov_b32_e32 v5, 0x3800
@@ -779,3 +1017,175 @@ define <2 x half> @fmul_select_f16_test4(<2 x half> %x, <2 x i32> %bool.arg1, <2
%ldexp = fmul <2 x half> %x, %y
ret <2 x half> %ldexp
}
+
+define half @fmul_select_f16_test5(half %x, i32 %bool.arg1, i32 %bool.arg2) {
+; GFX9-LABEL: fmul_select_f16_test5:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_mov_b32_e32 v3, 0x4800
+; GFX9-NEXT: v_mov_b32_e32 v4, 0x4000
+; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
+; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc
+; GFX9-NEXT: v_mul_f16_e32 v0, v0, v1
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1030-LABEL: fmul_select_f16_test5:
+; GFX1030: ; %bb.0:
+; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1030-NEXT: v_mov_b32_e32 v3, 0x4000
+; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
+; GFX1030-NEXT: v_cndmask_b32_e32 v1, 0x4800, v3, vcc_lo
+; GFX1030-NEXT: v_mul_f16_e32 v0, v0, v1
+; GFX1030-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1100-LABEL: fmul_select_f16_test5:
+; GFX1100: ; %bb.0:
+; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1100-NEXT: v_mov_b32_e32 v3, 0x4000
+; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
+; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-NEXT: v_cndmask_b32_e32 v1, 0x4800, v3, vcc_lo
+; GFX1100-NEXT: v_mul_f16_e32 v0, v0, v1
+; GFX1100-NEXT: s_setpc_b64 s[30:31]
+ %bool = icmp eq i32 %bool.arg1, %bool.arg2
+ %y = select i1 %bool, half 2.000000e+00, half 8.000000e+00
+ %ldexp = fmul half %x, %y
+ ret half %ldexp
+}
+
+define half @fmul_select_f16_test6(half %x, i32 %bool.arg1, i32 %bool.arg2) {
+; GFX9-LABEL: fmul_select_f16_test6:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_mov_b32_e32 v3, 0x4200
+; GFX9-NEXT: v_mov_b32_e32 v4, 0xc800
+; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
+; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc
+; GFX9-NEXT: v_mul_f16_e32 v0, v0, v1
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1030-LABEL: fmul_select_f16_test6:
+; GFX1030: ; %bb.0:
+; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1030-NEXT: v_mov_b32_e32 v3, 0xc800
+; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
+; GFX1030-NEXT: v_cndmask_b32_e32 v1, 0x4200, v3, vcc_lo
+; GFX1030-NEXT: v_mul_f16_e32 v0, v0, v1
+; GFX1030-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1100-LABEL: fmul_select_f16_test6:
+; GFX1100: ; %bb.0:
+; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1100-NEXT: v_mov_b32_e32 v3, 0xc800
+; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
+; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-NEXT: v_cndmask_b32_e32 v1, 0x4200, v3, vcc_lo
+; GFX1100-NEXT: v_mul_f16_e32 v0, v0, v1
+; GFX1100-NEXT: s_setpc_b64 s[30:31]
+ %bool = icmp eq i32 %bool.arg1, %bool.arg2
+ %y = select i1 %bool, half -8.000000e+00, half 3.000000e+00
+ %ldexp = fmul half %x, %y
+ ret half %ldexp
+}
+
+define half @fmul_select_f16_test7(half %x, i32 %bool.arg1, i32 %bool.arg2) {
+; GFX9-LABEL: fmul_select_f16_test7:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_mov_b32_e32 v3, 0xc400
+; GFX9-NEXT: v_mov_b32_e32 v4, 0x4800
+; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
+; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc
+; GFX9-NEXT: v_mul_f16_e32 v0, v0, v1
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1030-LABEL: fmul_select_f16_test7:
+; GFX1030: ; %bb.0:
+; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1030-NEXT: v_mov_b32_e32 v3, 0x4800
+; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
+; GFX1030-NEXT: v_cndmask_b32_e32 v1, 0xc400, v3, vcc_lo
+; GFX1030-NEXT: v_mul_f16_e32 v0, v0, v1
+; GFX1030-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1100-LABEL: fmul_select_f16_test7:
+; GFX1100: ; %bb.0:
+; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1100-NEXT: v_mov_b32_e32 v3, 0x4800
+; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
+; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-NEXT: v_cndmask_b32_e32 v1, 0xc400, v3, vcc_lo
+; GFX1100-NEXT: v_mul_f16_e32 v0, v0, v1
+; GFX1100-NEXT: s_setpc_b64 s[30:31]
+ %bool = icmp eq i32 %bool.arg1, %bool.arg2
+ %y = select i1 %bool, half 8.000000e+00, half -4.000000e+00
+ %ldexp = fmul half %x, %y
+ ret half %ldexp
+}
+
+define half @fmul_select_f16_test8(half %x, i32 %bool.arg1, i32 %bool.arg2) {
+; GFX9-LABEL: fmul_select_f16_test8:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_mov_b32_e32 v3, 0x8000
+; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
+; GFX9-NEXT: v_cndmask_b32_e32 v1, 0, v3, vcc
+; GFX9-NEXT: v_mul_f16_e32 v0, v0, v1
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1030-LABEL: fmul_select_f16_test8:
+; GFX1030: ; %bb.0:
+; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
+; GFX1030-NEXT: v_cndmask_b32_e64 v1, 0, 0x8000, vcc_lo
+; GFX1030-NEXT: v_mul_f16_e32 v0, v0, v1
+; GFX1030-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1100-LABEL: fmul_select_f16_test8:
+; GFX1100: ; %bb.0:
+; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
+; GFX1100-NEXT: v_cndmask_b32_e64 v1, 0, 0x8000, vcc_lo
+; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1100-NEXT: v_mul_f16_e32 v0, v0, v1
+; GFX1100-NEXT: s_setpc_b64 s[30:31]
+ %bool = icmp eq i32 %bool.arg1, %bool.arg2
+ %y = select i1 %bool, half -0.000000e+00, half 0.000000e+00
+ %ldexp = fmul half %x, %y
+ ret half %ldexp
+}
+
+define half @fmul_select_f16_test9(half %x, i32 %bool.arg1, i32 %bool.arg2) {
+; GFX9-LABEL: fmul_select_f16_test9:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_mov_b32_e32 v3, 0xd000
+; GFX9-NEXT: v_mov_b32_e32 v4, 0xcc00
+; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
+; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc
+; GFX9-NEXT: v_mul_f16_e32 v0, v0, v1
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1030-LABEL: fmul_select_f16_test9:
+; GFX1030: ; %bb.0:
+; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1030-NEXT: v_mov_b32_e32 v3, 0xcc00
+; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
+; GFX1030-NEXT: v_cndmask_b32_e32 v1, 0xd000, v3, vcc_lo
+; GFX1030-NEXT: v_mul_f16_e32 v0, v0, v1
+; GFX1030-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1100-LABEL: fmul_select_f16_test9:
+; GFX1100: ; %bb.0:
+; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1100-NEXT: v_mov_b32_e32 v3, 0xcc00
+; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
+; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-NEXT: v_cndmask_b32_e32 v1, 0xd000, v3, vcc_lo
+; GFX1100-NEXT: v_mul_f16_e32 v0, v0, v1
+; GFX1100-NEXT: s_setpc_b64 s[30:31]
+ %bool = icmp eq i32 %bool.arg1, %bool.arg2
+ %y = select i1 %bool, half -1.600000e+01, half -3.200000e+01
+ %ldexp = fmul half %x, %y
+ ret half %ldexp
+}
>From c93df3c077e5e66f34feaf4f14da90eba5c1448c Mon Sep 17 00:00:00 2001
From: vikashgu <Vikash.Gupta at amd.com>
Date: Thu, 10 Oct 2024 10:14:46 +0000
Subject: [PATCH 5/6] Added bfloat test cases, large exponent test cases, &
addresses other suggested changes.
---
.../CodeGen/AMDGPU/dagcombine-fmul-sel.ll | 537 ++++++++++++++++++
1 file changed, 537 insertions(+)
diff --git a/llvm/test/CodeGen/AMDGPU/dagcombine-fmul-sel.ll b/llvm/test/CodeGen/AMDGPU/dagcombine-fmul-sel.ll
index 9d3d435611376d..5d8ea4ad56e25c 100644
--- a/llvm/test/CodeGen/AMDGPU/dagcombine-fmul-sel.ll
+++ b/llvm/test/CodeGen/AMDGPU/dagcombine-fmul-sel.ll
@@ -1,4 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+;RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 < %s | FileCheck -check-prefix=GFX9 %s
;RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefix=GFX9 %s
;RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1030 < %s | FileCheck -check-prefix=GFX1030 %s
;RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 < %s | FileCheck -check-prefix=GFX1100 %s
@@ -339,7 +340,69 @@ define float @fmul_select_f32_test10(float %x, i32 %bool.arg1, i32 %bool.arg2) {
ret float %ldexp
}
+define float @fmul_select_f32_test11(float %x, i32 %bool.arg1, i32 %bool.arg2) {
+; GFX9-LABEL: fmul_select_f32_test10:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_bfrev_b32_e32 v3, 1
+; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
+; GFX9-NEXT: v_cndmask_b32_e32 v1, 0, v3, vcc
+; GFX9-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1030-LABEL: fmul_select_f32_test10:
+; GFX1030: ; %bb.0:
+; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
+; GFX1030-NEXT: v_cndmask_b32_e64 v1, 0, 0x80000000, vcc_lo
+; GFX1030-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX1030-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1100-LABEL: fmul_select_f32_test10:
+; GFX1100: ; %bb.0:
+; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
+; GFX1100-NEXT: v_cndmask_b32_e64 v1, 0, 0x80000000, vcc_lo
+; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1100-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX1100-NEXT: s_setpc_b64 s[30:31]
+ %bool = icmp eq i32 %bool.arg1, %bool.arg2
+ %y = select i1 %bool, float -3.0223145e+23, float -7.2057594e+16
+ %ldexp = fmul float %x, %y
+ ret float %ldexp
+}
+define float @fmul_select_f32_test12(float %x, i32 %bool.arg1, i32 %bool.arg2) {
+; GFX9-LABEL: fmul_select_f32_test10:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_bfrev_b32_e32 v3, 1
+; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
+; GFX9-NEXT: v_cndmask_b32_e32 v1, 0, v3, vcc
+; GFX9-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1030-LABEL: fmul_select_f32_test10:
+; GFX1030: ; %bb.0:
+; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
+; GFX1030-NEXT: v_cndmask_b32_e64 v1, 0, 0x80000000, vcc_lo
+; GFX1030-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX1030-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1100-LABEL: fmul_select_f32_test10:
+; GFX1100: ; %bb.0:
+; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
+; GFX1100-NEXT: v_cndmask_b32_e64 v1, 0, 0x80000000, vcc_lo
+; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1100-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX1100-NEXT: s_setpc_b64 s[30:31]
+ %bool = icmp eq i32 %bool.arg1, %bool.arg2
+ %y = select i1 %bool, float 3.5527137e-15, float 2.9514791e+20
+ %ldexp = fmul float %x, %y
+ ret float %ldexp
+}
define double @fmul_select_f64_test1(double %x, i32 %bool.arg1, i32 %bool.arg2) {
; GFX9-LABEL: fmul_select_f64_test1:
@@ -858,6 +921,77 @@ define double @fmul_select_f64_test13(double %x, i32 %bool.arg1, i32 %bool.arg2)
ret double %ldexp
}
+define double @fmul_select_f64_test14(double %x, i32 %bool.arg1, i32 %bool.arg2) {
+; GFX9-LABEL: fmul_select_f64_test13:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_mov_b32_e32 v5, 0x40300000
+; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3
+; GFX9-NEXT: v_mov_b32_e32 v4, 0
+; GFX9-NEXT: v_cndmask_b32_e64 v5, v5, 0, vcc
+; GFX9-NEXT: v_mul_f64 v[0:1], v[0:1], v[4:5]
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1030-LABEL: fmul_select_f64_test13:
+; GFX1030: ; %bb.0:
+; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v3
+; GFX1030-NEXT: v_mov_b32_e32 v4, 0
+; GFX1030-NEXT: v_cndmask_b32_e64 v5, 0x40300000, 0, vcc_lo
+; GFX1030-NEXT: v_mul_f64 v[0:1], v[0:1], v[4:5]
+; GFX1030-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1100-LABEL: fmul_select_f64_test13:
+; GFX1100: ; %bb.0:
+; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v3
+; GFX1100-NEXT: v_mov_b32_e32 v4, 0
+; GFX1100-NEXT: v_cndmask_b32_e64 v5, 0x40300000, 0, vcc_lo
+; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1100-NEXT: v_mul_f64 v[0:1], v[0:1], v[4:5]
+; GFX1100-NEXT: s_setpc_b64 s[30:31]
+ %bool = icmp eq i32 %bool.arg1, %bool.arg2
+ %y = select i1 %bool, double 4.9517602e+27, double 7.4505806e-09
+ %ldexp = fmul double %x, %y
+ ret double %ldexp
+}
+
+define double @fmul_select_f64_test15(double %x, i32 %bool.arg1, i32 %bool.arg2) {
+; GFX9-LABEL: fmul_select_f64_test13:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_mov_b32_e32 v5, 0x40300000
+; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3
+; GFX9-NEXT: v_mov_b32_e32 v4, 0
+; GFX9-NEXT: v_cndmask_b32_e64 v5, v5, 0, vcc
+; GFX9-NEXT: v_mul_f64 v[0:1], v[0:1], v[4:5]
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1030-LABEL: fmul_select_f64_test13:
+; GFX1030: ; %bb.0:
+; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v3
+; GFX1030-NEXT: v_mov_b32_e32 v4, 0
+; GFX1030-NEXT: v_cndmask_b32_e64 v5, 0x40300000, 0, vcc_lo
+; GFX1030-NEXT: v_mul_f64 v[0:1], v[0:1], v[4:5]
+; GFX1030-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1100-LABEL: fmul_select_f64_test13:
+; GFX1100: ; %bb.0:
+; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v3
+; GFX1100-NEXT: v_mov_b32_e32 v4, 0
+; GFX1100-NEXT: v_cndmask_b32_e64 v5, 0x40300000, 0, vcc_lo
+; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1100-NEXT: v_mul_f64 v[0:1], v[0:1], v[4:5]
+; GFX1100-NEXT: s_setpc_b64 s[30:31]
+ %bool = icmp eq i32 %bool.arg1, %bool.arg2
+ %y = select i1 %bool, double 2.2737368e-13, double 5.8207661e-11
+ %ldexp = fmul double %x, %y
+ ret double %ldexp
+}
+
+
define half @fmul_select_f16_test1(half %x, i32 %bool.arg1, i32 %bool.arg2) {
; GFX9-LABEL: fmul_select_f16_test1:
; GFX9: ; %bb.0:
@@ -1189,3 +1323,406 @@ define half @fmul_select_f16_test9(half %x, i32 %bool.arg1, i32 %bool.arg2) {
%ldexp = fmul half %x, %y
ret half %ldexp
}
+
+define bfloat @fmul_select_bf16_test1(bfloat %x, i32 %bool.arg1, i32 %bool.arg2) {
+; GFX9-LABEL: fmul_select_f16_test1:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_mov_b32_e32 v3, 0x3c00
+; GFX9-NEXT: v_mov_b32_e32 v4, 0x4000
+; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
+; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc
+; GFX9-NEXT: v_mul_f16_e32 v0, v0, v1
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1030-LABEL: fmul_select_f16_test1:
+; GFX1030: ; %bb.0:
+; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1030-NEXT: v_mov_b32_e32 v3, 0x4000
+; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
+; GFX1030-NEXT: v_cndmask_b32_e32 v1, 0x3c00, v3, vcc_lo
+; GFX1030-NEXT: v_mul_f16_e32 v0, v0, v1
+; GFX1030-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1100-LABEL: fmul_select_f16_test1:
+; GFX1100: ; %bb.0:
+; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1100-NEXT: v_mov_b32_e32 v3, 0x4000
+; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
+; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-NEXT: v_cndmask_b32_e32 v1, 0x3c00, v3, vcc_lo
+; GFX1100-NEXT: v_mul_f16_e32 v0, v0, v1
+; GFX1100-NEXT: s_setpc_b64 s[30:31]
+ %bool = icmp eq i32 %bool.arg1, %bool.arg2
+ %y = select i1 %bool, bfloat 2.000000e+00, bfloat 1.000000e+00
+ %ldexp = fmul bfloat %x, %y
+ ret bfloat %ldexp
+}
+
+define bfloat @fmul_select_bf16_test2(bfloat %x, i32 %bool.arg1, i32 %bool.arg2) {
+; GFX9-LABEL: fmul_select_f16_test2:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_mov_b32_e32 v3, 0x3c00
+; GFX9-NEXT: v_mov_b32_e32 v4, 0x3800
+; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
+; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc
+; GFX9-NEXT: v_mul_f16_e32 v0, v0, v1
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1030-LABEL: fmul_select_f16_test2:
+; GFX1030: ; %bb.0:
+; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1030-NEXT: v_mov_b32_e32 v3, 0x3800
+; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
+; GFX1030-NEXT: v_cndmask_b32_e32 v1, 0x3c00, v3, vcc_lo
+; GFX1030-NEXT: v_mul_f16_e32 v0, v0, v1
+; GFX1030-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1100-LABEL: fmul_select_f16_test2:
+; GFX1100: ; %bb.0:
+; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1100-NEXT: v_mov_b32_e32 v3, 0x3800
+; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
+; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-NEXT: v_cndmask_b32_e32 v1, 0x3c00, v3, vcc_lo
+; GFX1100-NEXT: v_mul_f16_e32 v0, v0, v1
+; GFX1100-NEXT: s_setpc_b64 s[30:31]
+ %bool = icmp eq i32 %bool.arg1, %bool.arg2
+ %y = select i1 %bool, bfloat 5.000000e-01, bfloat 1.000000e+00
+ %ldexp = fmul bfloat %x, %y
+ ret bfloat %ldexp
+}
+
+define <2 x bfloat> @fmul_select_v2bf16_test3(<2 x bfloat> %x, <2 x i32> %bool.arg1, <2 x i32> %bool.arg2) {
+; GFX9-LABEL: fmul_select_v2f16_test3:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_mov_b32_e32 v5, 0x3c00
+; GFX9-NEXT: v_mov_b32_e32 v6, 0x4000
+; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v2, v4
+; GFX9-NEXT: v_cndmask_b32_e32 v2, v5, v6, vcc
+; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v1, v3
+; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v6, vcc
+; GFX9-NEXT: v_pack_b32_f16 v1, v1, v2
+; GFX9-NEXT: v_pk_mul_f16 v0, v0, v1
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1030-LABEL: fmul_select_v2f16_test3:
+; GFX1030: ; %bb.0:
+; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1030-NEXT: v_mov_b32_e32 v5, 0x4000
+; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v4
+; GFX1030-NEXT: v_cndmask_b32_e32 v2, 0x3c00, v5, vcc_lo
+; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v3
+; GFX1030-NEXT: v_cndmask_b32_e32 v1, 0x3c00, v5, vcc_lo
+; GFX1030-NEXT: v_pack_b32_f16 v1, v1, v2
+; GFX1030-NEXT: v_pk_mul_f16 v0, v0, v1
+; GFX1030-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1100-LABEL: fmul_select_v2f16_test3:
+; GFX1100: ; %bb.0:
+; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1100-NEXT: v_mov_b32_e32 v5, 0x4000
+; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v4
+; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
+; GFX1100-NEXT: v_cndmask_b32_e32 v2, 0x3c00, v5, vcc_lo
+; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v3
+; GFX1100-NEXT: v_cndmask_b32_e32 v1, 0x3c00, v5, vcc_lo
+; GFX1100-NEXT: v_pack_b32_f16 v1, v1, v2
+; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1100-NEXT: v_pk_mul_f16 v0, v0, v1
+; GFX1100-NEXT: s_setpc_b64 s[30:31]
+ %bool = icmp eq <2 x i32> %bool.arg1, %bool.arg2
+ %y = select <2 x i1> %bool, <2 x bfloat> <bfloat 2.000000e+00, bfloat 2.000000e+00>, <2 x bfloat> <bfloat 1.000000e+00, bfloat 1.000000e+00>
+ %ldexp = fmul <2 x bfloat> %x, %y
+ ret <2 x bfloat> %ldexp
+}
+
+define <2 x bfloat> @fmul_select_v2bf16_test4(<2 x bfloat> %x, <2 x i32> %bool.arg1, <2 x i32> %bool.arg2) {
+; GFX9-LABEL: fmul_select_v2f16_test4:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_mov_b32_e32 v5, 0x3c00
+; GFX9-NEXT: v_mov_b32_e32 v6, 0x3800
+; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v2, v4
+; GFX9-NEXT: v_cndmask_b32_e32 v2, v5, v6, vcc
+; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v1, v3
+; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v6, vcc
+; GFX9-NEXT: v_pack_b32_f16 v1, v1, v2
+; GFX9-NEXT: v_pk_mul_f16 v0, v0, v1
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1030-LABEL: fmul_select_v2f16_test4:
+; GFX1030: ; %bb.0:
+; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1030-NEXT: v_mov_b32_e32 v5, 0x3800
+; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v4
+; GFX1030-NEXT: v_cndmask_b32_e32 v2, 0x3c00, v5, vcc_lo
+; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v3
+; GFX1030-NEXT: v_cndmask_b32_e32 v1, 0x3c00, v5, vcc_lo
+; GFX1030-NEXT: v_pack_b32_f16 v1, v1, v2
+; GFX1030-NEXT: v_pk_mul_f16 v0, v0, v1
+; GFX1030-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1100-LABEL: fmul_select_v2f16_test4:
+; GFX1100: ; %bb.0:
+; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1100-NEXT: v_mov_b32_e32 v5, 0x3800
+; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v4
+; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
+; GFX1100-NEXT: v_cndmask_b32_e32 v2, 0x3c00, v5, vcc_lo
+; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v3
+; GFX1100-NEXT: v_cndmask_b32_e32 v1, 0x3c00, v5, vcc_lo
+; GFX1100-NEXT: v_pack_b32_f16 v1, v1, v2
+; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1100-NEXT: v_pk_mul_f16 v0, v0, v1
+; GFX1100-NEXT: s_setpc_b64 s[30:31]
+ %bool = icmp eq <2 x i32> %bool.arg1, %bool.arg2
+ %y = select <2 x i1> %bool, <2 x bfloat> <bfloat 5.000000e-01, bfloat 5.000000e-01>, <2 x bfloat> <bfloat 1.000000e+00, bfloat 1.000000e+00>
+ %ldexp = fmul <2 x bfloat> %x, %y
+ ret <2 x bfloat> %ldexp
+}
+
+define bfloat @fmul_select_bf16_test5(bfloat %x, i32 %bool.arg1, i32 %bool.arg2) {
+; GFX9-LABEL: fmul_select_f16_test5:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_mov_b32_e32 v3, 0x4800
+; GFX9-NEXT: v_mov_b32_e32 v4, 0x4000
+; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
+; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc
+; GFX9-NEXT: v_mul_f16_e32 v0, v0, v1
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1030-LABEL: fmul_select_f16_test5:
+; GFX1030: ; %bb.0:
+; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1030-NEXT: v_mov_b32_e32 v3, 0x4000
+; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
+; GFX1030-NEXT: v_cndmask_b32_e32 v1, 0x4800, v3, vcc_lo
+; GFX1030-NEXT: v_mul_f16_e32 v0, v0, v1
+; GFX1030-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1100-LABEL: fmul_select_f16_test5:
+; GFX1100: ; %bb.0:
+; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1100-NEXT: v_mov_b32_e32 v3, 0x4000
+; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
+; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-NEXT: v_cndmask_b32_e32 v1, 0x4800, v3, vcc_lo
+; GFX1100-NEXT: v_mul_f16_e32 v0, v0, v1
+; GFX1100-NEXT: s_setpc_b64 s[30:31]
+ %bool = icmp eq i32 %bool.arg1, %bool.arg2
+ %y = select i1 %bool, bfloat 2.000000e+00, bfloat 8.000000e+00
+ %ldexp = fmul bfloat %x, %y
+ ret bfloat %ldexp
+}
+
+define bfloat @fmul_select_bf16_test6(bfloat %x, i32 %bool.arg1, i32 %bool.arg2) {
+; GFX9-LABEL: fmul_select_f16_test6:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_mov_b32_e32 v3, 0x4200
+; GFX9-NEXT: v_mov_b32_e32 v4, 0xc800
+; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
+; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc
+; GFX9-NEXT: v_mul_f16_e32 v0, v0, v1
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1030-LABEL: fmul_select_f16_test6:
+; GFX1030: ; %bb.0:
+; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1030-NEXT: v_mov_b32_e32 v3, 0xc800
+; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
+; GFX1030-NEXT: v_cndmask_b32_e32 v1, 0x4200, v3, vcc_lo
+; GFX1030-NEXT: v_mul_f16_e32 v0, v0, v1
+; GFX1030-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1100-LABEL: fmul_select_f16_test6:
+; GFX1100: ; %bb.0:
+; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1100-NEXT: v_mov_b32_e32 v3, 0xc800
+; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
+; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-NEXT: v_cndmask_b32_e32 v1, 0x4200, v3, vcc_lo
+; GFX1100-NEXT: v_mul_f16_e32 v0, v0, v1
+; GFX1100-NEXT: s_setpc_b64 s[30:31]
+ %bool = icmp eq i32 %bool.arg1, %bool.arg2
+ %y = select i1 %bool, bfloat -8.000000e+00, bfloat 3.000000e+00
+ %ldexp = fmul bfloat %x, %y
+ ret bfloat %ldexp
+}
+
+define half @fmul_select_bf16_test7(bfloat %x, i32 %bool.arg1, i32 %bool.arg2) {
+; GFX9-LABEL: fmul_select_f16_test7:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_mov_b32_e32 v3, 0xc400
+; GFX9-NEXT: v_mov_b32_e32 v4, 0x4800
+; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
+; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc
+; GFX9-NEXT: v_mul_f16_e32 v0, v0, v1
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1030-LABEL: fmul_select_f16_test7:
+; GFX1030: ; %bb.0:
+; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1030-NEXT: v_mov_b32_e32 v3, 0x4800
+; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
+; GFX1030-NEXT: v_cndmask_b32_e32 v1, 0xc400, v3, vcc_lo
+; GFX1030-NEXT: v_mul_f16_e32 v0, v0, v1
+; GFX1030-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1100-LABEL: fmul_select_f16_test7:
+; GFX1100: ; %bb.0:
+; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1100-NEXT: v_mov_b32_e32 v3, 0x4800
+; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
+; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-NEXT: v_cndmask_b32_e32 v1, 0xc400, v3, vcc_lo
+; GFX1100-NEXT: v_mul_f16_e32 v0, v0, v1
+; GFX1100-NEXT: s_setpc_b64 s[30:31]
+ %bool = icmp eq i32 %bool.arg1, %bool.arg2
+ %y = select i1 %bool, bfloat 8.000000e+00, bfloat -4.000000e+00
+ %ldexp = fmul bfloat %x, %y
+ ret bfloat %ldexp
+}
+
+define bfloat @fmul_select_bf16_test8(bfloat %x, i32 %bool.arg1, i32 %bool.arg2) {
+; GFX9-LABEL: fmul_select_f16_test8:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_mov_b32_e32 v3, 0x8000
+; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
+; GFX9-NEXT: v_cndmask_b32_e32 v1, 0, v3, vcc
+; GFX9-NEXT: v_mul_f16_e32 v0, v0, v1
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1030-LABEL: fmul_select_f16_test8:
+; GFX1030: ; %bb.0:
+; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
+; GFX1030-NEXT: v_cndmask_b32_e64 v1, 0, 0x8000, vcc_lo
+; GFX1030-NEXT: v_mul_f16_e32 v0, v0, v1
+; GFX1030-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1100-LABEL: fmul_select_f16_test8:
+; GFX1100: ; %bb.0:
+; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
+; GFX1100-NEXT: v_cndmask_b32_e64 v1, 0, 0x8000, vcc_lo
+; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1100-NEXT: v_mul_f16_e32 v0, v0, v1
+; GFX1100-NEXT: s_setpc_b64 s[30:31]
+ %bool = icmp eq i32 %bool.arg1, %bool.arg2
+ %y = select i1 %bool, bfloat -0.000000e+00, bfloat 0.000000e+00
+ %ldexp = fmul bfloat %x, %y
+ ret bfloat %ldexp
+}
+
+define bfloat @fmul_select_bf16_test9(bfloat %x, i32 %bool.arg1, i32 %bool.arg2) {
+; GFX9-LABEL: fmul_select_f16_test9:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_mov_b32_e32 v3, 0xd000
+; GFX9-NEXT: v_mov_b32_e32 v4, 0xcc00
+; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
+; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc
+; GFX9-NEXT: v_mul_f16_e32 v0, v0, v1
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1030-LABEL: fmul_select_f16_test9:
+; GFX1030: ; %bb.0:
+; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1030-NEXT: v_mov_b32_e32 v3, 0xcc00
+; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
+; GFX1030-NEXT: v_cndmask_b32_e32 v1, 0xd000, v3, vcc_lo
+; GFX1030-NEXT: v_mul_f16_e32 v0, v0, v1
+; GFX1030-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1100-LABEL: fmul_select_f16_test9:
+; GFX1100: ; %bb.0:
+; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1100-NEXT: v_mov_b32_e32 v3, 0xcc00
+; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
+; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-NEXT: v_cndmask_b32_e32 v1, 0xd000, v3, vcc_lo
+; GFX1100-NEXT: v_mul_f16_e32 v0, v0, v1
+; GFX1100-NEXT: s_setpc_b64 s[30:31]
+ %bool = icmp eq i32 %bool.arg1, %bool.arg2
+ %y = select i1 %bool, bfloat -1.600000e+01, bfloat -3.200000e+01
+ %ldexp = fmul bfloat %x, %y
+ ret bfloat %ldexp
+}
+
+define bfloat @fmul_select_bf16_test10(bfloat %x, i32 %bool.arg1, i32 %bool.arg2) {
+; GFX9-LABEL: fmul_select_f16_test9:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_mov_b32_e32 v3, 0xd000
+; GFX9-NEXT: v_mov_b32_e32 v4, 0xcc00
+; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
+; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc
+; GFX9-NEXT: v_mul_f16_e32 v0, v0, v1
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1030-LABEL: fmul_select_f16_test9:
+; GFX1030: ; %bb.0:
+; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1030-NEXT: v_mov_b32_e32 v3, 0xcc00
+; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
+; GFX1030-NEXT: v_cndmask_b32_e32 v1, 0xd000, v3, vcc_lo
+; GFX1030-NEXT: v_mul_f16_e32 v0, v0, v1
+; GFX1030-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1100-LABEL: fmul_select_f16_test9:
+; GFX1100: ; %bb.0:
+; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1100-NEXT: v_mov_b32_e32 v3, 0xcc00
+; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
+; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-NEXT: v_cndmask_b32_e32 v1, 0xd000, v3, vcc_lo
+; GFX1100-NEXT: v_mul_f16_e32 v0, v0, v1
+; GFX1100-NEXT: s_setpc_b64 s[30:31]
+ %bool = icmp eq i32 %bool.arg1, %bool.arg2
+ %y = select i1 %bool, bfloat -3.6893488e+19, bfloat -7.2057594e+16
+ %ldexp = fmul bfloat %x, %y
+ ret bfloat %ldexp
+}
+
+define bfloat @fmul_select_bf16_test11(bfloat %x, i32 %bool.arg1, i32 %bool.arg2) {
+; GFX9-LABEL: fmul_select_f16_test9:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_mov_b32_e32 v3, 0xd000
+; GFX9-NEXT: v_mov_b32_e32 v4, 0xcc00
+; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
+; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc
+; GFX9-NEXT: v_mul_f16_e32 v0, v0, v1
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1030-LABEL: fmul_select_f16_test9:
+; GFX1030: ; %bb.0:
+; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1030-NEXT: v_mov_b32_e32 v3, 0xcc00
+; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
+; GFX1030-NEXT: v_cndmask_b32_e32 v1, 0xd000, v3, vcc_lo
+; GFX1030-NEXT: v_mul_f16_e32 v0, v0, v1
+; GFX1030-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1100-LABEL: fmul_select_f16_test9:
+; GFX1100: ; %bb.0:
+; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1100-NEXT: v_mov_b32_e32 v3, 0xcc00
+; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
+; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-NEXT: v_cndmask_b32_e32 v1, 0xd000, v3, vcc_lo
+; GFX1100-NEXT: v_mul_f16_e32 v0, v0, v1
+; GFX1100-NEXT: s_setpc_b64 s[30:31]
+ %bool = icmp eq i32 %bool.arg1, %bool.arg2
+ %y = select i1 %bool, bfloat 2.3841857e-07, bfloat 3.3554432e+01
+ %ldexp = fmul bfloat %x, %y
+ ret bfloat %ldexp
+}
+
>From 4efec1725ddc9eef9f548581a072a958ff7412a8 Mon Sep 17 00:00:00 2001
From: vikashgu <Vikash.Gupta at amd.com>
Date: Tue, 15 Oct 2024 06:13:22 +0000
Subject: [PATCH 6/6] Added test cases for very big as well as small exponents
(>64 & <-16).
---
.../CodeGen/AMDGPU/dagcombine-fmul-sel.ll | 1283 ++++++++++++++---
1 file changed, 1096 insertions(+), 187 deletions(-)
diff --git a/llvm/test/CodeGen/AMDGPU/dagcombine-fmul-sel.ll b/llvm/test/CodeGen/AMDGPU/dagcombine-fmul-sel.ll
index 5d8ea4ad56e25c..449efbce978a5d 100644
--- a/llvm/test/CodeGen/AMDGPU/dagcombine-fmul-sel.ll
+++ b/llvm/test/CodeGen/AMDGPU/dagcombine-fmul-sel.ll
@@ -1,10 +1,18 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
-;RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 < %s | FileCheck -check-prefix=GFX9 %s
+;RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 < %s | FileCheck -check-prefix=GFX7 %s
;RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefix=GFX9 %s
;RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1030 < %s | FileCheck -check-prefix=GFX1030 %s
;RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 < %s | FileCheck -check-prefix=GFX1100 %s
define float @fmul_select_f32_test1(float %x, i32 %bool.arg1, i32 %bool.arg2) {
+; GFX7-LABEL: fmul_select_f32_test1:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
+; GFX7-NEXT: v_cndmask_b32_e64 v1, 1.0, 2.0, vcc
+; GFX7-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
; GFX9-LABEL: fmul_select_f32_test1:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -36,6 +44,14 @@ define float @fmul_select_f32_test1(float %x, i32 %bool.arg1, i32 %bool.arg2) {
}
define float @fmul_select_f32_test2(float %x, i32 %bool.arg1, i32 %bool.arg2) {
+; GFX7-LABEL: fmul_select_f32_test2:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
+; GFX7-NEXT: v_cndmask_b32_e64 v1, 1.0, 0.5, vcc
+; GFX7-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
; GFX9-LABEL: fmul_select_f32_test2:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -67,6 +83,17 @@ define float @fmul_select_f32_test2(float %x, i32 %bool.arg1, i32 %bool.arg2) {
}
define <2 x float> @fmul_select_v2f32_test3(<2 x float> %x, <2 x i32> %bool.arg1, <2 x i32> %bool.arg2) {
+; GFX7-LABEL: fmul_select_v2f32_test3:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5
+; GFX7-NEXT: v_cndmask_b32_e64 v3, 1.0, 2.0, vcc
+; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v2, v4
+; GFX7-NEXT: v_cndmask_b32_e64 v2, 1.0, 2.0, vcc
+; GFX7-NEXT: v_mul_f32_e32 v0, v0, v2
+; GFX7-NEXT: v_mul_f32_e32 v1, v1, v3
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
; GFX9-LABEL: fmul_select_v2f32_test3:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -106,6 +133,17 @@ define <2 x float> @fmul_select_v2f32_test3(<2 x float> %x, <2 x i32> %bool.arg1
}
define <2 x float> @fmul_select_v2f32_test4(<2 x float> %x, <2 x i32> %bool.arg1, <2 x i32> %bool.arg2) {
+; GFX7-LABEL: fmul_select_v2f32_test4:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5
+; GFX7-NEXT: v_cndmask_b32_e64 v3, 1.0, 0.5, vcc
+; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v2, v4
+; GFX7-NEXT: v_cndmask_b32_e64 v2, 1.0, 0.5, vcc
+; GFX7-NEXT: v_mul_f32_e32 v0, v0, v2
+; GFX7-NEXT: v_mul_f32_e32 v1, v1, v3
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
; GFX9-LABEL: fmul_select_v2f32_test4:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -145,6 +183,14 @@ define <2 x float> @fmul_select_v2f32_test4(<2 x float> %x, <2 x i32> %bool.arg1
}
define float @fmul_select_f32_test5(float %x, i32 %bool.arg1, i32 %bool.arg2) {
+; GFX7-LABEL: fmul_select_f32_test5:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
+; GFX7-NEXT: v_cndmask_b32_e64 v1, -1.0, -2.0, vcc
+; GFX7-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
; GFX9-LABEL: fmul_select_f32_test5:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -176,6 +222,16 @@ define float @fmul_select_f32_test5(float %x, i32 %bool.arg1, i32 %bool.arg2) {
}
define float @fmul_select_f32_test6(float %x, i32 %bool.arg1, i32 %bool.arg2) {
+; GFX7-LABEL: fmul_select_f32_test6:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: v_mov_b32_e32 v3, 0x41000000
+; GFX7-NEXT: v_mov_b32_e32 v4, 0xc0400000
+; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
+; GFX7-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc
+; GFX7-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
; GFX9-LABEL: fmul_select_f32_test6:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -211,6 +267,15 @@ define float @fmul_select_f32_test6(float %x, i32 %bool.arg1, i32 %bool.arg2) {
}
define float @fmul_select_f32_test7(float %x, i32 %bool.arg1, i32 %bool.arg2) {
+; GFX7-LABEL: fmul_select_f32_test7:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: v_mov_b32_e32 v3, 0x41000000
+; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
+; GFX7-NEXT: v_cndmask_b32_e64 v1, v3, 4.0, vcc
+; GFX7-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
; GFX9-LABEL: fmul_select_f32_test7:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -243,6 +308,16 @@ define float @fmul_select_f32_test7(float %x, i32 %bool.arg1, i32 %bool.arg2) {
}
define float @fmul_select_f32_test8(float %x, i32 %bool.arg1, i32 %bool.arg2) {
+; GFX7-LABEL: fmul_select_f32_test8:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: v_mov_b32_e32 v3, 0xc1000000
+; GFX7-NEXT: v_mov_b32_e32 v4, 0x41800000
+; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
+; GFX7-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc
+; GFX7-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
; GFX9-LABEL: fmul_select_f32_test8:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -278,6 +353,14 @@ define float @fmul_select_f32_test8(float %x, i32 %bool.arg1, i32 %bool.arg2) {
}
define float @fmul_select_f32_test9(float %x, i32 %bool.arg1, i32 %bool.arg2) {
+; GFX7-LABEL: fmul_select_f32_test9:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
+; GFX7-NEXT: v_cndmask_b32_e64 v1, 2.0, 0, vcc
+; GFX7-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
; GFX9-LABEL: fmul_select_f32_test9:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -309,6 +392,15 @@ define float @fmul_select_f32_test9(float %x, i32 %bool.arg1, i32 %bool.arg2) {
}
define float @fmul_select_f32_test10(float %x, i32 %bool.arg1, i32 %bool.arg2) {
+; GFX7-LABEL: fmul_select_f32_test10:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: v_bfrev_b32_e32 v3, 1
+; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
+; GFX7-NEXT: v_cndmask_b32_e32 v1, 0, v3, vcc
+; GFX7-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
; GFX9-LABEL: fmul_select_f32_test10:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -341,70 +433,106 @@ define float @fmul_select_f32_test10(float %x, i32 %bool.arg1, i32 %bool.arg2) {
}
define float @fmul_select_f32_test11(float %x, i32 %bool.arg1, i32 %bool.arg2) {
-; GFX9-LABEL: fmul_select_f32_test10:
+; GFX7-LABEL: fmul_select_f32_test11:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: v_mov_b32_e32 v3, 0xdb800000
+; GFX7-NEXT: v_mov_b32_e32 v4, 0xe6800000
+; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
+; GFX7-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc
+; GFX7-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: fmul_select_f32_test11:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_bfrev_b32_e32 v3, 1
+; GFX9-NEXT: v_mov_b32_e32 v3, 0xdb800000
+; GFX9-NEXT: v_mov_b32_e32 v4, 0xe6800000
; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
-; GFX9-NEXT: v_cndmask_b32_e32 v1, 0, v3, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc
; GFX9-NEXT: v_mul_f32_e32 v0, v0, v1
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1030-LABEL: fmul_select_f32_test10:
+; GFX1030-LABEL: fmul_select_f32_test11:
; GFX1030: ; %bb.0:
; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1030-NEXT: v_mov_b32_e32 v3, 0xe6800000
; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
-; GFX1030-NEXT: v_cndmask_b32_e64 v1, 0, 0x80000000, vcc_lo
+; GFX1030-NEXT: v_cndmask_b32_e32 v1, 0xdb800000, v3, vcc_lo
; GFX1030-NEXT: v_mul_f32_e32 v0, v0, v1
; GFX1030-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1100-LABEL: fmul_select_f32_test10:
+; GFX1100-LABEL: fmul_select_f32_test11:
; GFX1100: ; %bb.0:
; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1100-NEXT: v_mov_b32_e32 v3, 0xe6800000
; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
-; GFX1100-NEXT: v_cndmask_b32_e64 v1, 0, 0x80000000, vcc_lo
-; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-NEXT: v_cndmask_b32_e32 v1, 0xdb800000, v3, vcc_lo
; GFX1100-NEXT: v_mul_f32_e32 v0, v0, v1
; GFX1100-NEXT: s_setpc_b64 s[30:31]
%bool = icmp eq i32 %bool.arg1, %bool.arg2
- %y = select i1 %bool, float -3.0223145e+23, float -7.2057594e+16
+ %y = select i1 %bool, float 0xC4D0000000000000, float 0xC370000000000000
%ldexp = fmul float %x, %y
ret float %ldexp
}
define float @fmul_select_f32_test12(float %x, i32 %bool.arg1, i32 %bool.arg2) {
-; GFX9-LABEL: fmul_select_f32_test10:
+; GFX7-LABEL: fmul_select_f32_test12:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: v_mov_b32_e32 v3, 0x61800000
+; GFX7-NEXT: v_mov_b32_e32 v4, 0x27800000
+; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
+; GFX7-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc
+; GFX7-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: fmul_select_f32_test12:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_bfrev_b32_e32 v3, 1
+; GFX9-NEXT: v_mov_b32_e32 v3, 0x61800000
+; GFX9-NEXT: v_mov_b32_e32 v4, 0x27800000
; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
-; GFX9-NEXT: v_cndmask_b32_e32 v1, 0, v3, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc
; GFX9-NEXT: v_mul_f32_e32 v0, v0, v1
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1030-LABEL: fmul_select_f32_test10:
+; GFX1030-LABEL: fmul_select_f32_test12:
; GFX1030: ; %bb.0:
; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1030-NEXT: v_mov_b32_e32 v3, 0x27800000
; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
-; GFX1030-NEXT: v_cndmask_b32_e64 v1, 0, 0x80000000, vcc_lo
+; GFX1030-NEXT: v_cndmask_b32_e32 v1, 0x61800000, v3, vcc_lo
; GFX1030-NEXT: v_mul_f32_e32 v0, v0, v1
; GFX1030-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1100-LABEL: fmul_select_f32_test10:
+; GFX1100-LABEL: fmul_select_f32_test12:
; GFX1100: ; %bb.0:
; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1100-NEXT: v_mov_b32_e32 v3, 0x27800000
; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
-; GFX1100-NEXT: v_cndmask_b32_e64 v1, 0, 0x80000000, vcc_lo
-; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-NEXT: v_cndmask_b32_e32 v1, 0x61800000, v3, vcc_lo
; GFX1100-NEXT: v_mul_f32_e32 v0, v0, v1
; GFX1100-NEXT: s_setpc_b64 s[30:31]
%bool = icmp eq i32 %bool.arg1, %bool.arg2
- %y = select i1 %bool, float 3.5527137e-15, float 2.9514791e+20
+ %y = select i1 %bool, float 0x3CF0000000000000, float 0x4430000000000000
%ldexp = fmul float %x, %y
ret float %ldexp
}
define double @fmul_select_f64_test1(double %x, i32 %bool.arg1, i32 %bool.arg2) {
+; GFX7-LABEL: fmul_select_f64_test1:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: v_mov_b32_e32 v4, 0x3ff00000
+; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3
+; GFX7-NEXT: v_cndmask_b32_e64 v3, v4, 2.0, vcc
+; GFX7-NEXT: v_mov_b32_e32 v2, 0
+; GFX7-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3]
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
; GFX9-LABEL: fmul_select_f64_test1:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -440,6 +568,17 @@ define double @fmul_select_f64_test1(double %x, i32 %bool.arg1, i32 %bool.arg2)
}
define double @fmul_select_f64_test2(double %x, i32 %bool.arg1, i32 %bool.arg2) {
+; GFX7-LABEL: fmul_select_f64_test2:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: v_mov_b32_e32 v4, 0x3ff00000
+; GFX7-NEXT: v_mov_b32_e32 v5, 0x3fe00000
+; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3
+; GFX7-NEXT: v_cndmask_b32_e32 v3, v4, v5, vcc
+; GFX7-NEXT: v_mov_b32_e32 v2, 0
+; GFX7-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3]
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
; GFX9-LABEL: fmul_select_f64_test2:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -477,6 +616,20 @@ define double @fmul_select_f64_test2(double %x, i32 %bool.arg1, i32 %bool.arg2)
}
define <2 x double> @fmul_select_v2f64_test3(<2 x double> %x, <2 x i32> %bool.arg1, <2 x i32> %bool.arg2) {
+; GFX7-LABEL: fmul_select_v2f64_test3:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: v_mov_b32_e32 v11, 0x3ff00000
+; GFX7-NEXT: v_mov_b32_e32 v8, 0
+; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v4, v6
+; GFX7-NEXT: v_cndmask_b32_e64 v10, v11, 2.0, vcc
+; GFX7-NEXT: v_mov_b32_e32 v9, v8
+; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v5, v7
+; GFX7-NEXT: v_mul_f64 v[0:1], v[0:1], v[9:10]
+; GFX7-NEXT: v_cndmask_b32_e64 v9, v11, 2.0, vcc
+; GFX7-NEXT: v_mul_f64 v[2:3], v[2:3], v[8:9]
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
; GFX9-LABEL: fmul_select_v2f64_test3:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -525,6 +678,21 @@ define <2 x double> @fmul_select_v2f64_test3(<2 x double> %x, <2 x i32> %bool.ar
}
define <2 x double> @fmul_select_v2f64_test4(<2 x double> %x, <2 x i32> %bool.arg1, <2 x i32> %bool.arg2) {
+; GFX7-LABEL: fmul_select_v2f64_test4:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: v_mov_b32_e32 v11, 0x3ff00000
+; GFX7-NEXT: v_mov_b32_e32 v12, 0x3fe00000
+; GFX7-NEXT: v_mov_b32_e32 v8, 0
+; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v4, v6
+; GFX7-NEXT: v_cndmask_b32_e32 v10, v11, v12, vcc
+; GFX7-NEXT: v_mov_b32_e32 v9, v8
+; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v5, v7
+; GFX7-NEXT: v_mul_f64 v[0:1], v[0:1], v[9:10]
+; GFX7-NEXT: v_cndmask_b32_e32 v9, v11, v12, vcc
+; GFX7-NEXT: v_mul_f64 v[2:3], v[2:3], v[8:9]
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
; GFX9-LABEL: fmul_select_v2f64_test4:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -574,6 +742,17 @@ define <2 x double> @fmul_select_v2f64_test4(<2 x double> %x, <2 x i32> %bool.ar
}
define double @fmul_select_f64_test5(double %x, i32 %bool.arg1, i32 %bool.arg2) {
+; GFX7-LABEL: fmul_select_f64_test5:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: v_mov_b32_e32 v4, 0xbff00000
+; GFX7-NEXT: v_mov_b32_e32 v5, 0xbfe00000
+; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3
+; GFX7-NEXT: v_cndmask_b32_e32 v3, v4, v5, vcc
+; GFX7-NEXT: v_mov_b32_e32 v2, 0
+; GFX7-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3]
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
; GFX9-LABEL: fmul_select_f64_test5:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -611,6 +790,16 @@ define double @fmul_select_f64_test5(double %x, i32 %bool.arg1, i32 %bool.arg2)
}
define double @fmul_select_f64_test6(double %x, i32 %bool.arg1, i32 %bool.arg2) {
+; GFX7-LABEL: fmul_select_f64_test6:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: v_mov_b32_e32 v4, 0xbff00000
+; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3
+; GFX7-NEXT: v_cndmask_b32_e64 v3, v4, -2.0, vcc
+; GFX7-NEXT: v_mov_b32_e32 v2, 0
+; GFX7-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3]
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
; GFX9-LABEL: fmul_select_f64_test6:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -646,6 +835,16 @@ define double @fmul_select_f64_test6(double %x, i32 %bool.arg1, i32 %bool.arg2)
}
define double @fmul_select_f64_test7(double %x, i32 %bool.arg1, i32 %bool.arg2) {
+; GFX7-LABEL: fmul_select_f64_test7:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: v_mov_b32_e32 v4, 0xbff00000
+; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3
+; GFX7-NEXT: v_cndmask_b32_e64 v3, v4, 2.0, vcc
+; GFX7-NEXT: v_mov_b32_e32 v2, 0
+; GFX7-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3]
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
; GFX9-LABEL: fmul_select_f64_test7:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -681,6 +880,17 @@ define double @fmul_select_f64_test7(double %x, i32 %bool.arg1, i32 %bool.arg2)
}
define double @fmul_select_f64_test8(double %x, i32 %bool.arg1, i32 %bool.arg2) {
+; GFX7-LABEL: fmul_select_f64_test8:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: v_mov_b32_e32 v4, 0xc0400000
+; GFX7-NEXT: v_mov_b32_e32 v5, 0xc0100000
+; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3
+; GFX7-NEXT: v_cndmask_b32_e32 v3, v4, v5, vcc
+; GFX7-NEXT: v_mov_b32_e32 v2, 0
+; GFX7-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3]
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
; GFX9-LABEL: fmul_select_f64_test8:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -718,6 +928,20 @@ define double @fmul_select_f64_test8(double %x, i32 %bool.arg1, i32 %bool.arg2)
}
define <2 x double> @fmul_select_v2f64_test9(<2 x double> %x, <2 x i32> %bool.arg1, <2 x i32> %bool.arg2) {
+; GFX7-LABEL: fmul_select_v2f64_test9:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: v_mov_b32_e32 v11, 0xbff00000
+; GFX7-NEXT: v_mov_b32_e32 v8, 0
+; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v4, v6
+; GFX7-NEXT: v_cndmask_b32_e64 v10, v11, -2.0, vcc
+; GFX7-NEXT: v_mov_b32_e32 v9, v8
+; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v5, v7
+; GFX7-NEXT: v_mul_f64 v[0:1], v[0:1], v[9:10]
+; GFX7-NEXT: v_cndmask_b32_e64 v9, v11, -2.0, vcc
+; GFX7-NEXT: v_mul_f64 v[2:3], v[2:3], v[8:9]
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
; GFX9-LABEL: fmul_select_v2f64_test9:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -766,6 +990,22 @@ define <2 x double> @fmul_select_v2f64_test9(<2 x double> %x, <2 x i32> %bool.ar
}
define <2 x double> @fmul_select_v2f64_test10(<2 x double> %x, <2 x i32> %bool.arg1, <2 x i32> %bool.arg2) {
+; GFX7-LABEL: fmul_select_v2f64_test10:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: v_mov_b32_e32 v8, 0
+; GFX7-NEXT: v_mov_b32_e32 v9, 0xbff00000
+; GFX7-NEXT: v_mov_b32_e32 v10, 0x3fe00000
+; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v4, v6
+; GFX7-NEXT: v_mov_b32_e32 v11, 0x3ff00000
+; GFX7-NEXT: v_cndmask_b32_e32 v10, v9, v10, vcc
+; GFX7-NEXT: v_mov_b32_e32 v9, v8
+; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v5, v7
+; GFX7-NEXT: v_mul_f64 v[0:1], v[0:1], v[9:10]
+; GFX7-NEXT: v_cndmask_b32_e64 v9, v11, 2.0, vcc
+; GFX7-NEXT: v_mul_f64 v[2:3], v[2:3], v[8:9]
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
; GFX9-LABEL: fmul_select_v2f64_test10:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -816,6 +1056,16 @@ define <2 x double> @fmul_select_v2f64_test10(<2 x double> %x, <2 x i32> %bool.a
}
define double @fmul_select_f64_test11(double %x, i32 %bool.arg1, i32 %bool.arg2) {
+; GFX7-LABEL: fmul_select_f64_test11:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: v_bfrev_b32_e32 v4, 1
+; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3
+; GFX7-NEXT: v_cndmask_b32_e64 v3, v4, -2.0, vcc
+; GFX7-NEXT: v_mov_b32_e32 v2, 0
+; GFX7-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3]
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
; GFX9-LABEL: fmul_select_f64_test11:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -851,6 +1101,16 @@ define double @fmul_select_f64_test11(double %x, i32 %bool.arg1, i32 %bool.arg2)
}
define double @fmul_select_f64_test12(double %x, i32 %bool.arg1, i32 %bool.arg2) {
+; GFX7-LABEL: fmul_select_f64_test12:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: v_cmp_ne_u32_e32 vcc, v2, v3
+; GFX7-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
+; GFX7-NEXT: v_lshlrev_b32_e32 v3, 31, v2
+; GFX7-NEXT: v_mov_b32_e32 v2, 0
+; GFX7-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3]
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
; GFX9-LABEL: fmul_select_f64_test12:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -887,6 +1147,16 @@ define double @fmul_select_f64_test12(double %x, i32 %bool.arg1, i32 %bool.arg2)
}
define double @fmul_select_f64_test13(double %x, i32 %bool.arg1, i32 %bool.arg2) {
+; GFX7-LABEL: fmul_select_f64_test13:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: v_mov_b32_e32 v5, 0x40300000
+; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3
+; GFX7-NEXT: v_mov_b32_e32 v4, 0
+; GFX7-NEXT: v_cndmask_b32_e64 v5, v5, 0, vcc
+; GFX7-NEXT: v_mul_f64 v[0:1], v[0:1], v[4:5]
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
; GFX9-LABEL: fmul_select_f64_test13:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -922,33 +1192,54 @@ define double @fmul_select_f64_test13(double %x, i32 %bool.arg1, i32 %bool.arg2)
}
define double @fmul_select_f64_test14(double %x, i32 %bool.arg1, i32 %bool.arg2) {
-; GFX9-LABEL: fmul_select_f64_test13:
+; GFX7-LABEL: fmul_select_f64_test14:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: v_mov_b32_e32 v4, 0x3e400000
+; GFX7-NEXT: v_mov_b32_e32 v5, 0x45b00000
+; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3
+; GFX7-NEXT: v_cndmask_b32_e32 v3, v4, v5, vcc
+; GFX7-NEXT: v_mov_b32_e32 v2, 0x1c5f68
+; GFX7-NEXT: v_mov_b32_e32 v4, 0x252c7e7
+; GFX7-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc
+; GFX7-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3]
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: fmul_select_f64_test14:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_mov_b32_e32 v5, 0x40300000
+; GFX9-NEXT: v_mov_b32_e32 v4, 0x3e400000
+; GFX9-NEXT: v_mov_b32_e32 v5, 0x45b00000
; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3
-; GFX9-NEXT: v_mov_b32_e32 v4, 0
-; GFX9-NEXT: v_cndmask_b32_e64 v5, v5, 0, vcc
-; GFX9-NEXT: v_mul_f64 v[0:1], v[0:1], v[4:5]
+; GFX9-NEXT: v_cndmask_b32_e32 v3, v4, v5, vcc
+; GFX9-NEXT: v_mov_b32_e32 v2, 0x1c5f68
+; GFX9-NEXT: v_mov_b32_e32 v4, 0x252c7e7
+; GFX9-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc
+; GFX9-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3]
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1030-LABEL: fmul_select_f64_test13:
+; GFX1030-LABEL: fmul_select_f64_test14:
; GFX1030: ; %bb.0:
; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1030-NEXT: v_mov_b32_e32 v4, 0x45b00000
+; GFX1030-NEXT: v_mov_b32_e32 v5, 0x252c7e7
; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v3
-; GFX1030-NEXT: v_mov_b32_e32 v4, 0
-; GFX1030-NEXT: v_cndmask_b32_e64 v5, 0x40300000, 0, vcc_lo
-; GFX1030-NEXT: v_mul_f64 v[0:1], v[0:1], v[4:5]
+; GFX1030-NEXT: v_cndmask_b32_e32 v3, 0x3e400000, v4, vcc_lo
+; GFX1030-NEXT: v_cndmask_b32_e32 v2, 0x1c5f68, v5, vcc_lo
+; GFX1030-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3]
; GFX1030-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1100-LABEL: fmul_select_f64_test13:
+; GFX1100-LABEL: fmul_select_f64_test14:
; GFX1100: ; %bb.0:
; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1100-NEXT: v_mov_b32_e32 v4, 0x45b00000
+; GFX1100-NEXT: v_mov_b32_e32 v5, 0x252c7e7
; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v3
-; GFX1100-NEXT: v_mov_b32_e32 v4, 0
-; GFX1100-NEXT: v_cndmask_b32_e64 v5, 0x40300000, 0, vcc_lo
+; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX1100-NEXT: v_cndmask_b32_e32 v3, 0x3e400000, v4, vcc_lo
+; GFX1100-NEXT: v_cndmask_b32_e32 v2, 0x1c5f68, v5, vcc_lo
; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1100-NEXT: v_mul_f64 v[0:1], v[0:1], v[4:5]
+; GFX1100-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3]
; GFX1100-NEXT: s_setpc_b64 s[30:31]
%bool = icmp eq i32 %bool.arg1, %bool.arg2
%y = select i1 %bool, double 4.9517602e+27, double 7.4505806e-09
@@ -957,33 +1248,54 @@ define double @fmul_select_f64_test14(double %x, i32 %bool.arg1, i32 %bool.arg2)
}
define double @fmul_select_f64_test15(double %x, i32 %bool.arg1, i32 %bool.arg2) {
-; GFX9-LABEL: fmul_select_f64_test13:
+; GFX7-LABEL: fmul_select_f64_test15:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: v_mov_b32_e32 v4, 0x3dd00000
+; GFX7-NEXT: v_mov_b32_e32 v5, 0x3d500000
+; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3
+; GFX7-NEXT: v_cndmask_b32_e32 v3, v4, v5, vcc
+; GFX7-NEXT: v_mov_b32_e32 v2, 0x6628df
+; GFX7-NEXT: v_mov_b32_e32 v4, 0x56132d8
+; GFX7-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc
+; GFX7-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3]
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: fmul_select_f64_test15:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_mov_b32_e32 v5, 0x40300000
+; GFX9-NEXT: v_mov_b32_e32 v4, 0x3dd00000
+; GFX9-NEXT: v_mov_b32_e32 v5, 0x3d500000
; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3
-; GFX9-NEXT: v_mov_b32_e32 v4, 0
-; GFX9-NEXT: v_cndmask_b32_e64 v5, v5, 0, vcc
-; GFX9-NEXT: v_mul_f64 v[0:1], v[0:1], v[4:5]
+; GFX9-NEXT: v_cndmask_b32_e32 v3, v4, v5, vcc
+; GFX9-NEXT: v_mov_b32_e32 v2, 0x6628df
+; GFX9-NEXT: v_mov_b32_e32 v4, 0x56132d8
+; GFX9-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc
+; GFX9-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3]
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1030-LABEL: fmul_select_f64_test13:
+; GFX1030-LABEL: fmul_select_f64_test15:
; GFX1030: ; %bb.0:
; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1030-NEXT: v_mov_b32_e32 v4, 0x3d500000
+; GFX1030-NEXT: v_mov_b32_e32 v5, 0x56132d8
; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v3
-; GFX1030-NEXT: v_mov_b32_e32 v4, 0
-; GFX1030-NEXT: v_cndmask_b32_e64 v5, 0x40300000, 0, vcc_lo
-; GFX1030-NEXT: v_mul_f64 v[0:1], v[0:1], v[4:5]
+; GFX1030-NEXT: v_cndmask_b32_e32 v3, 0x3dd00000, v4, vcc_lo
+; GFX1030-NEXT: v_cndmask_b32_e32 v2, 0x6628df, v5, vcc_lo
+; GFX1030-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3]
; GFX1030-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1100-LABEL: fmul_select_f64_test13:
+; GFX1100-LABEL: fmul_select_f64_test15:
; GFX1100: ; %bb.0:
; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1100-NEXT: v_mov_b32_e32 v4, 0x3d500000
+; GFX1100-NEXT: v_mov_b32_e32 v5, 0x56132d8
; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v3
-; GFX1100-NEXT: v_mov_b32_e32 v4, 0
-; GFX1100-NEXT: v_cndmask_b32_e64 v5, 0x40300000, 0, vcc_lo
+; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX1100-NEXT: v_cndmask_b32_e32 v3, 0x3dd00000, v4, vcc_lo
+; GFX1100-NEXT: v_cndmask_b32_e32 v2, 0x6628df, v5, vcc_lo
; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1100-NEXT: v_mul_f64 v[0:1], v[0:1], v[4:5]
+; GFX1100-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3]
; GFX1100-NEXT: s_setpc_b64 s[30:31]
%bool = icmp eq i32 %bool.arg1, %bool.arg2
%y = select i1 %bool, double 2.2737368e-13, double 5.8207661e-11
@@ -993,6 +1305,16 @@ define double @fmul_select_f64_test15(double %x, i32 %bool.arg1, i32 %bool.arg2)
define half @fmul_select_f16_test1(half %x, i32 %bool.arg1, i32 %bool.arg2) {
+; GFX7-LABEL: fmul_select_f16_test1:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
+; GFX7-NEXT: v_cndmask_b32_e64 v1, 1.0, 2.0, vcc
+; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX7-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
; GFX9-LABEL: fmul_select_f16_test1:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1028,6 +1350,16 @@ define half @fmul_select_f16_test1(half %x, i32 %bool.arg1, i32 %bool.arg2) {
}
define half @fmul_select_f16_test2(half %x, i32 %bool.arg1, i32 %bool.arg2) {
+; GFX7-LABEL: fmul_select_f16_test2:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
+; GFX7-NEXT: v_cndmask_b32_e64 v1, 1.0, 0.5, vcc
+; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX7-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
; GFX9-LABEL: fmul_select_f16_test2:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1063,6 +1395,21 @@ define half @fmul_select_f16_test2(half %x, i32 %bool.arg1, i32 %bool.arg2) {
}
define <2 x half> @fmul_select_v2f16_test3(<2 x half> %x, <2 x i32> %bool.arg1, <2 x i32> %bool.arg2) {
+; GFX7-LABEL: fmul_select_v2f16_test3:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1
+; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5
+; GFX7-NEXT: v_cndmask_b32_e64 v3, 1.0, 2.0, vcc
+; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v2, v4
+; GFX7-NEXT: v_cndmask_b32_e64 v2, 1.0, 2.0, vcc
+; GFX7-NEXT: v_mul_f32_e32 v1, v1, v3
+; GFX7-NEXT: v_mul_f32_e32 v0, v0, v2
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
; GFX9-LABEL: fmul_select_v2f16_test3:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1108,6 +1455,21 @@ define <2 x half> @fmul_select_v2f16_test3(<2 x half> %x, <2 x i32> %bool.arg1,
}
define <2 x half> @fmul_select_v2f16_test4(<2 x half> %x, <2 x i32> %bool.arg1, <2 x i32> %bool.arg2) {
+; GFX7-LABEL: fmul_select_v2f16_test4:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1
+; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5
+; GFX7-NEXT: v_cndmask_b32_e64 v3, 1.0, 0.5, vcc
+; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v2, v4
+; GFX7-NEXT: v_cndmask_b32_e64 v2, 1.0, 0.5, vcc
+; GFX7-NEXT: v_mul_f32_e32 v1, v1, v3
+; GFX7-NEXT: v_mul_f32_e32 v0, v0, v2
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
; GFX9-LABEL: fmul_select_v2f16_test4:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1153,6 +1515,17 @@ define <2 x half> @fmul_select_v2f16_test4(<2 x half> %x, <2 x i32> %bool.arg1,
}
define half @fmul_select_f16_test5(half %x, i32 %bool.arg1, i32 %bool.arg2) {
+; GFX7-LABEL: fmul_select_f16_test5:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX7-NEXT: v_mov_b32_e32 v3, 0x41000000
+; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
+; GFX7-NEXT: v_cndmask_b32_e64 v1, v3, 2.0, vcc
+; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX7-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
; GFX9-LABEL: fmul_select_f16_test5:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1188,6 +1561,18 @@ define half @fmul_select_f16_test5(half %x, i32 %bool.arg1, i32 %bool.arg2) {
}
define half @fmul_select_f16_test6(half %x, i32 %bool.arg1, i32 %bool.arg2) {
+; GFX7-LABEL: fmul_select_f16_test6:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX7-NEXT: v_mov_b32_e32 v3, 0x40400000
+; GFX7-NEXT: v_mov_b32_e32 v4, 0xc1000000
+; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
+; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX7-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc
+; GFX7-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
; GFX9-LABEL: fmul_select_f16_test6:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1223,6 +1608,17 @@ define half @fmul_select_f16_test6(half %x, i32 %bool.arg1, i32 %bool.arg2) {
}
define half @fmul_select_f16_test7(half %x, i32 %bool.arg1, i32 %bool.arg2) {
+; GFX7-LABEL: fmul_select_f16_test7:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX7-NEXT: v_mov_b32_e32 v3, 0x41000000
+; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
+; GFX7-NEXT: v_cndmask_b32_e32 v1, -4.0, v3, vcc
+; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX7-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
; GFX9-LABEL: fmul_select_f16_test7:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1258,6 +1654,17 @@ define half @fmul_select_f16_test7(half %x, i32 %bool.arg1, i32 %bool.arg2) {
}
define half @fmul_select_f16_test8(half %x, i32 %bool.arg1, i32 %bool.arg2) {
+; GFX7-LABEL: fmul_select_f16_test8:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX7-NEXT: v_bfrev_b32_e32 v3, 1
+; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
+; GFX7-NEXT: v_cndmask_b32_e32 v1, 0, v3, vcc
+; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX7-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
; GFX9-LABEL: fmul_select_f16_test8:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1290,6 +1697,18 @@ define half @fmul_select_f16_test8(half %x, i32 %bool.arg1, i32 %bool.arg2) {
}
define half @fmul_select_f16_test9(half %x, i32 %bool.arg1, i32 %bool.arg2) {
+; GFX7-LABEL: fmul_select_f16_test9:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX7-NEXT: v_mov_b32_e32 v3, 0xc2000000
+; GFX7-NEXT: v_mov_b32_e32 v4, 0xc1800000
+; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
+; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX7-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc
+; GFX7-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
; GFX9-LABEL: fmul_select_f16_test9:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1325,33 +1744,71 @@ define half @fmul_select_f16_test9(half %x, i32 %bool.arg1, i32 %bool.arg2) {
}
define bfloat @fmul_select_bf16_test1(bfloat %x, i32 %bool.arg1, i32 %bool.arg2) {
-; GFX9-LABEL: fmul_select_f16_test1:
+; GFX7-LABEL: fmul_select_bf16_test1:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: v_mul_f32_e32 v0, 1.0, v0
+; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
+; GFX7-NEXT: v_cndmask_b32_e64 v1, 1.0, 2.0, vcc
+; GFX7-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
+; GFX7-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX7-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: fmul_select_bf16_test1:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_mov_b32_e32 v3, 0x3c00
+; GFX9-NEXT: v_mov_b32_e32 v3, 0x3f80
; GFX9-NEXT: v_mov_b32_e32 v4, 0x4000
; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc
-; GFX9-NEXT: v_mul_f16_e32 v0, v0, v1
+; GFX9-NEXT: v_lshlrev_b32_e32 v1, 16, v1
+; GFX9-NEXT: v_lshlrev_b32_e32 v0, 16, v0
+; GFX9-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX9-NEXT: v_bfe_u32 v1, v0, 16, 1
+; GFX9-NEXT: s_movk_i32 s4, 0x7fff
+; GFX9-NEXT: v_add3_u32 v1, v1, v0, s4
+; GFX9-NEXT: v_or_b32_e32 v2, 0x400000, v0
+; GFX9-NEXT: v_cmp_u_f32_e32 vcc, v0, v0
+; GFX9-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc
+; GFX9-NEXT: v_lshrrev_b32_e32 v0, 16, v0
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1030-LABEL: fmul_select_f16_test1:
+; GFX1030-LABEL: fmul_select_bf16_test1:
; GFX1030: ; %bb.0:
; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1030-NEXT: v_mov_b32_e32 v3, 0x4000
; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
-; GFX1030-NEXT: v_cndmask_b32_e32 v1, 0x3c00, v3, vcc_lo
-; GFX1030-NEXT: v_mul_f16_e32 v0, v0, v1
+; GFX1030-NEXT: v_lshlrev_b32_e32 v0, 16, v0
+; GFX1030-NEXT: v_cndmask_b32_e32 v1, 0x3f80, v3, vcc_lo
+; GFX1030-NEXT: v_lshlrev_b32_e32 v1, 16, v1
+; GFX1030-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX1030-NEXT: v_bfe_u32 v1, v0, 16, 1
+; GFX1030-NEXT: v_or_b32_e32 v2, 0x400000, v0
+; GFX1030-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0
+; GFX1030-NEXT: v_add3_u32 v1, v1, v0, 0x7fff
+; GFX1030-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc_lo
+; GFX1030-NEXT: v_lshrrev_b32_e32 v0, 16, v0
; GFX1030-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1100-LABEL: fmul_select_f16_test1:
+; GFX1100-LABEL: fmul_select_bf16_test1:
; GFX1100: ; %bb.0:
; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-NEXT: v_mov_b32_e32 v3, 0x4000
+; GFX1100-NEXT: v_dual_mov_b32 v3, 0x4000 :: v_dual_lshlrev_b32 v0, 16, v0
; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1100-NEXT: v_cndmask_b32_e32 v1, 0x3c00, v3, vcc_lo
-; GFX1100-NEXT: v_mul_f16_e32 v0, v0, v1
+; GFX1100-NEXT: v_cndmask_b32_e32 v1, 0x3f80, v3, vcc_lo
+; GFX1100-NEXT: v_lshlrev_b32_e32 v1, 16, v1
+; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX1100-NEXT: v_bfe_u32 v1, v0, 16, 1
+; GFX1100-NEXT: v_or_b32_e32 v2, 0x400000, v0
+; GFX1100-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0
+; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-NEXT: v_add3_u32 v1, v1, v0, 0x7fff
+; GFX1100-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc_lo
+; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1100-NEXT: v_lshrrev_b32_e32 v0, 16, v0
; GFX1100-NEXT: s_setpc_b64 s[30:31]
%bool = icmp eq i32 %bool.arg1, %bool.arg2
%y = select i1 %bool, bfloat 2.000000e+00, bfloat 1.000000e+00
@@ -1360,33 +1817,71 @@ define bfloat @fmul_select_bf16_test1(bfloat %x, i32 %bool.arg1, i32 %bool.arg2)
}
define bfloat @fmul_select_bf16_test2(bfloat %x, i32 %bool.arg1, i32 %bool.arg2) {
-; GFX9-LABEL: fmul_select_f16_test2:
+; GFX7-LABEL: fmul_select_bf16_test2:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: v_mul_f32_e32 v0, 1.0, v0
+; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
+; GFX7-NEXT: v_cndmask_b32_e64 v1, 1.0, 0.5, vcc
+; GFX7-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
+; GFX7-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX7-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: fmul_select_bf16_test2:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_mov_b32_e32 v3, 0x3c00
-; GFX9-NEXT: v_mov_b32_e32 v4, 0x3800
+; GFX9-NEXT: v_mov_b32_e32 v3, 0x3f80
+; GFX9-NEXT: v_mov_b32_e32 v4, 0x3f00
; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc
-; GFX9-NEXT: v_mul_f16_e32 v0, v0, v1
+; GFX9-NEXT: v_lshlrev_b32_e32 v1, 16, v1
+; GFX9-NEXT: v_lshlrev_b32_e32 v0, 16, v0
+; GFX9-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX9-NEXT: v_bfe_u32 v1, v0, 16, 1
+; GFX9-NEXT: s_movk_i32 s4, 0x7fff
+; GFX9-NEXT: v_add3_u32 v1, v1, v0, s4
+; GFX9-NEXT: v_or_b32_e32 v2, 0x400000, v0
+; GFX9-NEXT: v_cmp_u_f32_e32 vcc, v0, v0
+; GFX9-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc
+; GFX9-NEXT: v_lshrrev_b32_e32 v0, 16, v0
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1030-LABEL: fmul_select_f16_test2:
+; GFX1030-LABEL: fmul_select_bf16_test2:
; GFX1030: ; %bb.0:
; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1030-NEXT: v_mov_b32_e32 v3, 0x3800
+; GFX1030-NEXT: v_mov_b32_e32 v3, 0x3f00
; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
-; GFX1030-NEXT: v_cndmask_b32_e32 v1, 0x3c00, v3, vcc_lo
-; GFX1030-NEXT: v_mul_f16_e32 v0, v0, v1
+; GFX1030-NEXT: v_lshlrev_b32_e32 v0, 16, v0
+; GFX1030-NEXT: v_cndmask_b32_e32 v1, 0x3f80, v3, vcc_lo
+; GFX1030-NEXT: v_lshlrev_b32_e32 v1, 16, v1
+; GFX1030-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX1030-NEXT: v_bfe_u32 v1, v0, 16, 1
+; GFX1030-NEXT: v_or_b32_e32 v2, 0x400000, v0
+; GFX1030-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0
+; GFX1030-NEXT: v_add3_u32 v1, v1, v0, 0x7fff
+; GFX1030-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc_lo
+; GFX1030-NEXT: v_lshrrev_b32_e32 v0, 16, v0
; GFX1030-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1100-LABEL: fmul_select_f16_test2:
+; GFX1100-LABEL: fmul_select_bf16_test2:
; GFX1100: ; %bb.0:
; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-NEXT: v_mov_b32_e32 v3, 0x3800
+; GFX1100-NEXT: v_dual_mov_b32 v3, 0x3f00 :: v_dual_lshlrev_b32 v0, 16, v0
; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1100-NEXT: v_cndmask_b32_e32 v1, 0x3c00, v3, vcc_lo
-; GFX1100-NEXT: v_mul_f16_e32 v0, v0, v1
+; GFX1100-NEXT: v_cndmask_b32_e32 v1, 0x3f80, v3, vcc_lo
+; GFX1100-NEXT: v_lshlrev_b32_e32 v1, 16, v1
+; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX1100-NEXT: v_bfe_u32 v1, v0, 16, 1
+; GFX1100-NEXT: v_or_b32_e32 v2, 0x400000, v0
+; GFX1100-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0
+; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-NEXT: v_add3_u32 v1, v1, v0, 0x7fff
+; GFX1100-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc_lo
+; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1100-NEXT: v_lshrrev_b32_e32 v0, 16, v0
; GFX1100-NEXT: s_setpc_b64 s[30:31]
%bool = icmp eq i32 %bool.arg1, %bool.arg2
%y = select i1 %bool, bfloat 5.000000e-01, bfloat 1.000000e+00
@@ -1395,43 +1890,110 @@ define bfloat @fmul_select_bf16_test2(bfloat %x, i32 %bool.arg1, i32 %bool.arg2)
}
define <2 x bfloat> @fmul_select_v2bf16_test3(<2 x bfloat> %x, <2 x i32> %bool.arg1, <2 x i32> %bool.arg2) {
-; GFX9-LABEL: fmul_select_v2f16_test3:
+; GFX7-LABEL: fmul_select_v2bf16_test3:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v2, v4
+; GFX7-NEXT: v_mul_f32_e32 v0, 1.0, v0
+; GFX7-NEXT: v_mul_f32_e32 v1, 1.0, v1
+; GFX7-NEXT: v_cndmask_b32_e64 v2, 1.0, 2.0, vcc
+; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5
+; GFX7-NEXT: v_cndmask_b32_e64 v3, 1.0, 2.0, vcc
+; GFX7-NEXT: v_and_b32_e32 v1, 0xffff0000, v1
+; GFX7-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
+; GFX7-NEXT: v_mul_f32_e32 v1, v1, v3
+; GFX7-NEXT: v_mul_f32_e32 v0, v0, v2
+; GFX7-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
+; GFX7-NEXT: v_and_b32_e32 v1, 0xffff0000, v1
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: fmul_select_v2bf16_test3:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_mov_b32_e32 v5, 0x3c00
+; GFX9-NEXT: v_mov_b32_e32 v5, 0x3f80
; GFX9-NEXT: v_mov_b32_e32 v6, 0x4000
; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v2, v4
; GFX9-NEXT: v_cndmask_b32_e32 v2, v5, v6, vcc
; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v1, v3
; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v6, vcc
-; GFX9-NEXT: v_pack_b32_f16 v1, v1, v2
-; GFX9-NEXT: v_pk_mul_f16 v0, v0, v1
+; GFX9-NEXT: v_lshlrev_b32_e32 v3, 16, v0
+; GFX9-NEXT: v_lshlrev_b32_e32 v1, 16, v1
+; GFX9-NEXT: v_mul_f32_e32 v1, v3, v1
+; GFX9-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
+; GFX9-NEXT: v_lshlrev_b32_e32 v2, 16, v2
+; GFX9-NEXT: v_bfe_u32 v3, v1, 16, 1
+; GFX9-NEXT: s_movk_i32 s4, 0x7fff
+; GFX9-NEXT: v_mul_f32_e32 v0, v0, v2
+; GFX9-NEXT: v_add3_u32 v3, v3, v1, s4
+; GFX9-NEXT: v_or_b32_e32 v4, 0x400000, v1
+; GFX9-NEXT: v_cmp_u_f32_e32 vcc, v1, v1
+; GFX9-NEXT: v_bfe_u32 v2, v0, 16, 1
+; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc
+; GFX9-NEXT: v_add3_u32 v2, v2, v0, s4
+; GFX9-NEXT: v_or_b32_e32 v3, 0x400000, v0
+; GFX9-NEXT: v_cmp_u_f32_e32 vcc, v0, v0
+; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v3, vcc
+; GFX9-NEXT: s_mov_b32 s4, 0x7060302
+; GFX9-NEXT: v_perm_b32 v0, v0, v1, s4
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1030-LABEL: fmul_select_v2f16_test3:
+; GFX1030-LABEL: fmul_select_v2bf16_test3:
; GFX1030: ; %bb.0:
; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1030-NEXT: v_mov_b32_e32 v5, 0x4000
-; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v4
-; GFX1030-NEXT: v_cndmask_b32_e32 v2, 0x3c00, v5, vcc_lo
; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v3
-; GFX1030-NEXT: v_cndmask_b32_e32 v1, 0x3c00, v5, vcc_lo
-; GFX1030-NEXT: v_pack_b32_f16 v1, v1, v2
-; GFX1030-NEXT: v_pk_mul_f16 v0, v0, v1
+; GFX1030-NEXT: v_lshlrev_b32_e32 v3, 16, v0
+; GFX1030-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
+; GFX1030-NEXT: v_cndmask_b32_e32 v1, 0x3f80, v5, vcc_lo
+; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v4
+; GFX1030-NEXT: v_lshlrev_b32_e32 v1, 16, v1
+; GFX1030-NEXT: v_cndmask_b32_e32 v2, 0x3f80, v5, vcc_lo
+; GFX1030-NEXT: v_mul_f32_e32 v1, v3, v1
+; GFX1030-NEXT: v_lshlrev_b32_e32 v2, 16, v2
+; GFX1030-NEXT: v_or_b32_e32 v4, 0x400000, v1
+; GFX1030-NEXT: v_mul_f32_e32 v0, v0, v2
+; GFX1030-NEXT: v_bfe_u32 v2, v1, 16, 1
+; GFX1030-NEXT: v_cmp_u_f32_e32 vcc_lo, v1, v1
+; GFX1030-NEXT: v_bfe_u32 v3, v0, 16, 1
+; GFX1030-NEXT: v_add3_u32 v2, v2, v1, 0x7fff
+; GFX1030-NEXT: v_or_b32_e32 v5, 0x400000, v0
+; GFX1030-NEXT: v_add3_u32 v3, v3, v0, 0x7fff
+; GFX1030-NEXT: v_cndmask_b32_e32 v1, v2, v4, vcc_lo
+; GFX1030-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0
+; GFX1030-NEXT: v_cndmask_b32_e32 v0, v3, v5, vcc_lo
+; GFX1030-NEXT: v_perm_b32 v0, v0, v1, 0x7060302
; GFX1030-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1100-LABEL: fmul_select_v2f16_test3:
+; GFX1100-LABEL: fmul_select_v2bf16_test3:
; GFX1100: ; %bb.0:
; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-NEXT: v_mov_b32_e32 v5, 0x4000
-; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v4
-; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
-; GFX1100-NEXT: v_cndmask_b32_e32 v2, 0x3c00, v5, vcc_lo
; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v3
-; GFX1100-NEXT: v_cndmask_b32_e32 v1, 0x3c00, v5, vcc_lo
-; GFX1100-NEXT: v_pack_b32_f16 v1, v1, v2
+; GFX1100-NEXT: v_lshlrev_b32_e32 v3, 16, v0
+; GFX1100-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
+; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_2) | instid1(VALU_DEP_1)
+; GFX1100-NEXT: v_cndmask_b32_e32 v1, 0x3f80, v5, vcc_lo
+; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v4
+; GFX1100-NEXT: v_cndmask_b32_e32 v2, 0x3f80, v5, vcc_lo
+; GFX1100-NEXT: v_lshlrev_b32_e32 v2, 16, v2
+; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-NEXT: v_dual_mul_f32 v0, v0, v2 :: v_dual_lshlrev_b32 v1, 16, v1
+; GFX1100-NEXT: v_or_b32_e32 v5, 0x400000, v0
+; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX1100-NEXT: v_mul_f32_e32 v1, v3, v1
+; GFX1100-NEXT: v_bfe_u32 v3, v0, 16, 1
+; GFX1100-NEXT: v_bfe_u32 v2, v1, 16, 1
+; GFX1100-NEXT: v_or_b32_e32 v4, 0x400000, v1
+; GFX1100-NEXT: v_cmp_u_f32_e32 vcc_lo, v1, v1
+; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX1100-NEXT: v_add3_u32 v3, v3, v0, 0x7fff
+; GFX1100-NEXT: v_add3_u32 v2, v2, v1, 0x7fff
+; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_4)
+; GFX1100-NEXT: v_cndmask_b32_e32 v1, v2, v4, vcc_lo
+; GFX1100-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0
+; GFX1100-NEXT: v_cndmask_b32_e32 v0, v3, v5, vcc_lo
; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1100-NEXT: v_pk_mul_f16 v0, v0, v1
+; GFX1100-NEXT: v_perm_b32 v0, v0, v1, 0x7060302
; GFX1100-NEXT: s_setpc_b64 s[30:31]
%bool = icmp eq <2 x i32> %bool.arg1, %bool.arg2
%y = select <2 x i1> %bool, <2 x bfloat> <bfloat 2.000000e+00, bfloat 2.000000e+00>, <2 x bfloat> <bfloat 1.000000e+00, bfloat 1.000000e+00>
@@ -1440,43 +2002,110 @@ define <2 x bfloat> @fmul_select_v2bf16_test3(<2 x bfloat> %x, <2 x i32> %bool.a
}
define <2 x bfloat> @fmul_select_v2bf16_test4(<2 x bfloat> %x, <2 x i32> %bool.arg1, <2 x i32> %bool.arg2) {
-; GFX9-LABEL: fmul_select_v2f16_test4:
+; GFX7-LABEL: fmul_select_v2bf16_test4:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v2, v4
+; GFX7-NEXT: v_mul_f32_e32 v0, 1.0, v0
+; GFX7-NEXT: v_mul_f32_e32 v1, 1.0, v1
+; GFX7-NEXT: v_cndmask_b32_e64 v2, 1.0, 0.5, vcc
+; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5
+; GFX7-NEXT: v_cndmask_b32_e64 v3, 1.0, 0.5, vcc
+; GFX7-NEXT: v_and_b32_e32 v1, 0xffff0000, v1
+; GFX7-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
+; GFX7-NEXT: v_mul_f32_e32 v1, v1, v3
+; GFX7-NEXT: v_mul_f32_e32 v0, v0, v2
+; GFX7-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
+; GFX7-NEXT: v_and_b32_e32 v1, 0xffff0000, v1
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: fmul_select_v2bf16_test4:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_mov_b32_e32 v5, 0x3c00
-; GFX9-NEXT: v_mov_b32_e32 v6, 0x3800
+; GFX9-NEXT: v_mov_b32_e32 v5, 0x3f80
+; GFX9-NEXT: v_mov_b32_e32 v6, 0x3f00
; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v2, v4
; GFX9-NEXT: v_cndmask_b32_e32 v2, v5, v6, vcc
; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v1, v3
; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v6, vcc
-; GFX9-NEXT: v_pack_b32_f16 v1, v1, v2
-; GFX9-NEXT: v_pk_mul_f16 v0, v0, v1
+; GFX9-NEXT: v_lshlrev_b32_e32 v3, 16, v0
+; GFX9-NEXT: v_lshlrev_b32_e32 v1, 16, v1
+; GFX9-NEXT: v_mul_f32_e32 v1, v3, v1
+; GFX9-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
+; GFX9-NEXT: v_lshlrev_b32_e32 v2, 16, v2
+; GFX9-NEXT: v_bfe_u32 v3, v1, 16, 1
+; GFX9-NEXT: s_movk_i32 s4, 0x7fff
+; GFX9-NEXT: v_mul_f32_e32 v0, v0, v2
+; GFX9-NEXT: v_add3_u32 v3, v3, v1, s4
+; GFX9-NEXT: v_or_b32_e32 v4, 0x400000, v1
+; GFX9-NEXT: v_cmp_u_f32_e32 vcc, v1, v1
+; GFX9-NEXT: v_bfe_u32 v2, v0, 16, 1
+; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc
+; GFX9-NEXT: v_add3_u32 v2, v2, v0, s4
+; GFX9-NEXT: v_or_b32_e32 v3, 0x400000, v0
+; GFX9-NEXT: v_cmp_u_f32_e32 vcc, v0, v0
+; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v3, vcc
+; GFX9-NEXT: s_mov_b32 s4, 0x7060302
+; GFX9-NEXT: v_perm_b32 v0, v0, v1, s4
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1030-LABEL: fmul_select_v2f16_test4:
+; GFX1030-LABEL: fmul_select_v2bf16_test4:
; GFX1030: ; %bb.0:
; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1030-NEXT: v_mov_b32_e32 v5, 0x3800
-; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v4
-; GFX1030-NEXT: v_cndmask_b32_e32 v2, 0x3c00, v5, vcc_lo
+; GFX1030-NEXT: v_mov_b32_e32 v5, 0x3f00
; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v3
-; GFX1030-NEXT: v_cndmask_b32_e32 v1, 0x3c00, v5, vcc_lo
-; GFX1030-NEXT: v_pack_b32_f16 v1, v1, v2
-; GFX1030-NEXT: v_pk_mul_f16 v0, v0, v1
+; GFX1030-NEXT: v_lshlrev_b32_e32 v3, 16, v0
+; GFX1030-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
+; GFX1030-NEXT: v_cndmask_b32_e32 v1, 0x3f80, v5, vcc_lo
+; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v4
+; GFX1030-NEXT: v_lshlrev_b32_e32 v1, 16, v1
+; GFX1030-NEXT: v_cndmask_b32_e32 v2, 0x3f80, v5, vcc_lo
+; GFX1030-NEXT: v_mul_f32_e32 v1, v3, v1
+; GFX1030-NEXT: v_lshlrev_b32_e32 v2, 16, v2
+; GFX1030-NEXT: v_or_b32_e32 v4, 0x400000, v1
+; GFX1030-NEXT: v_mul_f32_e32 v0, v0, v2
+; GFX1030-NEXT: v_bfe_u32 v2, v1, 16, 1
+; GFX1030-NEXT: v_cmp_u_f32_e32 vcc_lo, v1, v1
+; GFX1030-NEXT: v_bfe_u32 v3, v0, 16, 1
+; GFX1030-NEXT: v_add3_u32 v2, v2, v1, 0x7fff
+; GFX1030-NEXT: v_or_b32_e32 v5, 0x400000, v0
+; GFX1030-NEXT: v_add3_u32 v3, v3, v0, 0x7fff
+; GFX1030-NEXT: v_cndmask_b32_e32 v1, v2, v4, vcc_lo
+; GFX1030-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0
+; GFX1030-NEXT: v_cndmask_b32_e32 v0, v3, v5, vcc_lo
+; GFX1030-NEXT: v_perm_b32 v0, v0, v1, 0x7060302
; GFX1030-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1100-LABEL: fmul_select_v2f16_test4:
+; GFX1100-LABEL: fmul_select_v2bf16_test4:
; GFX1100: ; %bb.0:
; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-NEXT: v_mov_b32_e32 v5, 0x3800
-; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v4
-; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
-; GFX1100-NEXT: v_cndmask_b32_e32 v2, 0x3c00, v5, vcc_lo
+; GFX1100-NEXT: v_mov_b32_e32 v5, 0x3f00
; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v3
-; GFX1100-NEXT: v_cndmask_b32_e32 v1, 0x3c00, v5, vcc_lo
-; GFX1100-NEXT: v_pack_b32_f16 v1, v1, v2
+; GFX1100-NEXT: v_lshlrev_b32_e32 v3, 16, v0
+; GFX1100-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
+; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_2) | instid1(VALU_DEP_1)
+; GFX1100-NEXT: v_cndmask_b32_e32 v1, 0x3f80, v5, vcc_lo
+; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v4
+; GFX1100-NEXT: v_cndmask_b32_e32 v2, 0x3f80, v5, vcc_lo
+; GFX1100-NEXT: v_lshlrev_b32_e32 v2, 16, v2
+; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-NEXT: v_dual_mul_f32 v0, v0, v2 :: v_dual_lshlrev_b32 v1, 16, v1
+; GFX1100-NEXT: v_or_b32_e32 v5, 0x400000, v0
+; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX1100-NEXT: v_mul_f32_e32 v1, v3, v1
+; GFX1100-NEXT: v_bfe_u32 v3, v0, 16, 1
+; GFX1100-NEXT: v_bfe_u32 v2, v1, 16, 1
+; GFX1100-NEXT: v_or_b32_e32 v4, 0x400000, v1
+; GFX1100-NEXT: v_cmp_u_f32_e32 vcc_lo, v1, v1
+; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX1100-NEXT: v_add3_u32 v3, v3, v0, 0x7fff
+; GFX1100-NEXT: v_add3_u32 v2, v2, v1, 0x7fff
+; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_4)
+; GFX1100-NEXT: v_cndmask_b32_e32 v1, v2, v4, vcc_lo
+; GFX1100-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0
+; GFX1100-NEXT: v_cndmask_b32_e32 v0, v3, v5, vcc_lo
; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1100-NEXT: v_pk_mul_f16 v0, v0, v1
+; GFX1100-NEXT: v_perm_b32 v0, v0, v1, 0x7060302
; GFX1100-NEXT: s_setpc_b64 s[30:31]
%bool = icmp eq <2 x i32> %bool.arg1, %bool.arg2
%y = select <2 x i1> %bool, <2 x bfloat> <bfloat 5.000000e-01, bfloat 5.000000e-01>, <2 x bfloat> <bfloat 1.000000e+00, bfloat 1.000000e+00>
@@ -1485,33 +2114,72 @@ define <2 x bfloat> @fmul_select_v2bf16_test4(<2 x bfloat> %x, <2 x i32> %bool.a
}
define bfloat @fmul_select_bf16_test5(bfloat %x, i32 %bool.arg1, i32 %bool.arg2) {
-; GFX9-LABEL: fmul_select_f16_test5:
+; GFX7-LABEL: fmul_select_bf16_test5:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: v_mul_f32_e32 v0, 1.0, v0
+; GFX7-NEXT: v_mov_b32_e32 v3, 0x41000000
+; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
+; GFX7-NEXT: v_cndmask_b32_e64 v1, v3, 2.0, vcc
+; GFX7-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
+; GFX7-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX7-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: fmul_select_bf16_test5:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_mov_b32_e32 v3, 0x4800
+; GFX9-NEXT: v_mov_b32_e32 v3, 0x4100
; GFX9-NEXT: v_mov_b32_e32 v4, 0x4000
; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc
-; GFX9-NEXT: v_mul_f16_e32 v0, v0, v1
+; GFX9-NEXT: v_lshlrev_b32_e32 v1, 16, v1
+; GFX9-NEXT: v_lshlrev_b32_e32 v0, 16, v0
+; GFX9-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX9-NEXT: v_bfe_u32 v1, v0, 16, 1
+; GFX9-NEXT: s_movk_i32 s4, 0x7fff
+; GFX9-NEXT: v_add3_u32 v1, v1, v0, s4
+; GFX9-NEXT: v_or_b32_e32 v2, 0x400000, v0
+; GFX9-NEXT: v_cmp_u_f32_e32 vcc, v0, v0
+; GFX9-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc
+; GFX9-NEXT: v_lshrrev_b32_e32 v0, 16, v0
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1030-LABEL: fmul_select_f16_test5:
+; GFX1030-LABEL: fmul_select_bf16_test5:
; GFX1030: ; %bb.0:
; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1030-NEXT: v_mov_b32_e32 v3, 0x4000
; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
-; GFX1030-NEXT: v_cndmask_b32_e32 v1, 0x4800, v3, vcc_lo
-; GFX1030-NEXT: v_mul_f16_e32 v0, v0, v1
+; GFX1030-NEXT: v_lshlrev_b32_e32 v0, 16, v0
+; GFX1030-NEXT: v_cndmask_b32_e32 v1, 0x4100, v3, vcc_lo
+; GFX1030-NEXT: v_lshlrev_b32_e32 v1, 16, v1
+; GFX1030-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX1030-NEXT: v_bfe_u32 v1, v0, 16, 1
+; GFX1030-NEXT: v_or_b32_e32 v2, 0x400000, v0
+; GFX1030-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0
+; GFX1030-NEXT: v_add3_u32 v1, v1, v0, 0x7fff
+; GFX1030-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc_lo
+; GFX1030-NEXT: v_lshrrev_b32_e32 v0, 16, v0
; GFX1030-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1100-LABEL: fmul_select_f16_test5:
+; GFX1100-LABEL: fmul_select_bf16_test5:
; GFX1100: ; %bb.0:
; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-NEXT: v_mov_b32_e32 v3, 0x4000
+; GFX1100-NEXT: v_dual_mov_b32 v3, 0x4000 :: v_dual_lshlrev_b32 v0, 16, v0
; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1100-NEXT: v_cndmask_b32_e32 v1, 0x4800, v3, vcc_lo
-; GFX1100-NEXT: v_mul_f16_e32 v0, v0, v1
+; GFX1100-NEXT: v_cndmask_b32_e32 v1, 0x4100, v3, vcc_lo
+; GFX1100-NEXT: v_lshlrev_b32_e32 v1, 16, v1
+; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX1100-NEXT: v_bfe_u32 v1, v0, 16, 1
+; GFX1100-NEXT: v_or_b32_e32 v2, 0x400000, v0
+; GFX1100-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0
+; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-NEXT: v_add3_u32 v1, v1, v0, 0x7fff
+; GFX1100-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc_lo
+; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1100-NEXT: v_lshrrev_b32_e32 v0, 16, v0
; GFX1100-NEXT: s_setpc_b64 s[30:31]
%bool = icmp eq i32 %bool.arg1, %bool.arg2
%y = select i1 %bool, bfloat 2.000000e+00, bfloat 8.000000e+00
@@ -1520,33 +2188,73 @@ define bfloat @fmul_select_bf16_test5(bfloat %x, i32 %bool.arg1, i32 %bool.arg2)
}
define bfloat @fmul_select_bf16_test6(bfloat %x, i32 %bool.arg1, i32 %bool.arg2) {
-; GFX9-LABEL: fmul_select_f16_test6:
+; GFX7-LABEL: fmul_select_bf16_test6:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: v_mul_f32_e32 v0, 1.0, v0
+; GFX7-NEXT: v_mov_b32_e32 v3, 0x40400000
+; GFX7-NEXT: v_mov_b32_e32 v4, 0xc1000000
+; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
+; GFX7-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc
+; GFX7-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
+; GFX7-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX7-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: fmul_select_bf16_test6:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_mov_b32_e32 v3, 0x4200
-; GFX9-NEXT: v_mov_b32_e32 v4, 0xc800
+; GFX9-NEXT: v_mov_b32_e32 v3, 0x4040
+; GFX9-NEXT: v_mov_b32_e32 v4, 0xffffc100
; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc
-; GFX9-NEXT: v_mul_f16_e32 v0, v0, v1
+; GFX9-NEXT: v_lshlrev_b32_e32 v1, 16, v1
+; GFX9-NEXT: v_lshlrev_b32_e32 v0, 16, v0
+; GFX9-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX9-NEXT: v_bfe_u32 v1, v0, 16, 1
+; GFX9-NEXT: s_movk_i32 s4, 0x7fff
+; GFX9-NEXT: v_add3_u32 v1, v1, v0, s4
+; GFX9-NEXT: v_or_b32_e32 v2, 0x400000, v0
+; GFX9-NEXT: v_cmp_u_f32_e32 vcc, v0, v0
+; GFX9-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc
+; GFX9-NEXT: v_lshrrev_b32_e32 v0, 16, v0
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1030-LABEL: fmul_select_f16_test6:
+; GFX1030-LABEL: fmul_select_bf16_test6:
; GFX1030: ; %bb.0:
; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1030-NEXT: v_mov_b32_e32 v3, 0xc800
+; GFX1030-NEXT: v_mov_b32_e32 v3, 0xffffc100
; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
-; GFX1030-NEXT: v_cndmask_b32_e32 v1, 0x4200, v3, vcc_lo
-; GFX1030-NEXT: v_mul_f16_e32 v0, v0, v1
+; GFX1030-NEXT: v_lshlrev_b32_e32 v0, 16, v0
+; GFX1030-NEXT: v_cndmask_b32_e32 v1, 0x4040, v3, vcc_lo
+; GFX1030-NEXT: v_lshlrev_b32_e32 v1, 16, v1
+; GFX1030-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX1030-NEXT: v_bfe_u32 v1, v0, 16, 1
+; GFX1030-NEXT: v_or_b32_e32 v2, 0x400000, v0
+; GFX1030-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0
+; GFX1030-NEXT: v_add3_u32 v1, v1, v0, 0x7fff
+; GFX1030-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc_lo
+; GFX1030-NEXT: v_lshrrev_b32_e32 v0, 16, v0
; GFX1030-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1100-LABEL: fmul_select_f16_test6:
+; GFX1100-LABEL: fmul_select_bf16_test6:
; GFX1100: ; %bb.0:
; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-NEXT: v_mov_b32_e32 v3, 0xc800
+; GFX1100-NEXT: v_dual_mov_b32 v3, 0xffffc100 :: v_dual_lshlrev_b32 v0, 16, v0
; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1100-NEXT: v_cndmask_b32_e32 v1, 0x4200, v3, vcc_lo
-; GFX1100-NEXT: v_mul_f16_e32 v0, v0, v1
+; GFX1100-NEXT: v_cndmask_b32_e32 v1, 0x4040, v3, vcc_lo
+; GFX1100-NEXT: v_lshlrev_b32_e32 v1, 16, v1
+; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX1100-NEXT: v_bfe_u32 v1, v0, 16, 1
+; GFX1100-NEXT: v_or_b32_e32 v2, 0x400000, v0
+; GFX1100-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0
+; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-NEXT: v_add3_u32 v1, v1, v0, 0x7fff
+; GFX1100-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc_lo
+; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1100-NEXT: v_lshrrev_b32_e32 v0, 16, v0
; GFX1100-NEXT: s_setpc_b64 s[30:31]
%bool = icmp eq i32 %bool.arg1, %bool.arg2
%y = select i1 %bool, bfloat -8.000000e+00, bfloat 3.000000e+00
@@ -1554,34 +2262,73 @@ define bfloat @fmul_select_bf16_test6(bfloat %x, i32 %bool.arg1, i32 %bool.arg2)
ret bfloat %ldexp
}
-define half @fmul_select_bf16_test7(bfloat %x, i32 %bool.arg1, i32 %bool.arg2) {
-; GFX9-LABEL: fmul_select_f16_test7:
+define bfloat @fmul_select_bf16_test7(bfloat %x, i32 %bool.arg1, i32 %bool.arg2) {
+; GFX7-LABEL: fmul_select_bf16_test7:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: v_mul_f32_e32 v0, 1.0, v0
+; GFX7-NEXT: v_mov_b32_e32 v3, 0x41000000
+; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
+; GFX7-NEXT: v_cndmask_b32_e32 v1, -4.0, v3, vcc
+; GFX7-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
+; GFX7-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX7-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: fmul_select_bf16_test7:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_mov_b32_e32 v3, 0xc400
-; GFX9-NEXT: v_mov_b32_e32 v4, 0x4800
+; GFX9-NEXT: v_mov_b32_e32 v3, 0xffffc080
+; GFX9-NEXT: v_mov_b32_e32 v4, 0x4100
; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc
-; GFX9-NEXT: v_mul_f16_e32 v0, v0, v1
+; GFX9-NEXT: v_lshlrev_b32_e32 v1, 16, v1
+; GFX9-NEXT: v_lshlrev_b32_e32 v0, 16, v0
+; GFX9-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX9-NEXT: v_bfe_u32 v1, v0, 16, 1
+; GFX9-NEXT: s_movk_i32 s4, 0x7fff
+; GFX9-NEXT: v_add3_u32 v1, v1, v0, s4
+; GFX9-NEXT: v_or_b32_e32 v2, 0x400000, v0
+; GFX9-NEXT: v_cmp_u_f32_e32 vcc, v0, v0
+; GFX9-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc
+; GFX9-NEXT: v_lshrrev_b32_e32 v0, 16, v0
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1030-LABEL: fmul_select_f16_test7:
+; GFX1030-LABEL: fmul_select_bf16_test7:
; GFX1030: ; %bb.0:
; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1030-NEXT: v_mov_b32_e32 v3, 0x4800
+; GFX1030-NEXT: v_mov_b32_e32 v3, 0x4100
; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
-; GFX1030-NEXT: v_cndmask_b32_e32 v1, 0xc400, v3, vcc_lo
-; GFX1030-NEXT: v_mul_f16_e32 v0, v0, v1
+; GFX1030-NEXT: v_lshlrev_b32_e32 v0, 16, v0
+; GFX1030-NEXT: v_cndmask_b32_e32 v1, 0xffffc080, v3, vcc_lo
+; GFX1030-NEXT: v_lshlrev_b32_e32 v1, 16, v1
+; GFX1030-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX1030-NEXT: v_bfe_u32 v1, v0, 16, 1
+; GFX1030-NEXT: v_or_b32_e32 v2, 0x400000, v0
+; GFX1030-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0
+; GFX1030-NEXT: v_add3_u32 v1, v1, v0, 0x7fff
+; GFX1030-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc_lo
+; GFX1030-NEXT: v_lshrrev_b32_e32 v0, 16, v0
; GFX1030-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1100-LABEL: fmul_select_f16_test7:
+; GFX1100-LABEL: fmul_select_bf16_test7:
; GFX1100: ; %bb.0:
; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-NEXT: v_mov_b32_e32 v3, 0x4800
+; GFX1100-NEXT: v_dual_mov_b32 v3, 0x4100 :: v_dual_lshlrev_b32 v0, 16, v0
; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1100-NEXT: v_cndmask_b32_e32 v1, 0xc400, v3, vcc_lo
-; GFX1100-NEXT: v_mul_f16_e32 v0, v0, v1
+; GFX1100-NEXT: v_cndmask_b32_e32 v1, 0xffffc080, v3, vcc_lo
+; GFX1100-NEXT: v_lshlrev_b32_e32 v1, 16, v1
+; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX1100-NEXT: v_bfe_u32 v1, v0, 16, 1
+; GFX1100-NEXT: v_or_b32_e32 v2, 0x400000, v0
+; GFX1100-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0
+; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-NEXT: v_add3_u32 v1, v1, v0, 0x7fff
+; GFX1100-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc_lo
+; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1100-NEXT: v_lshrrev_b32_e32 v0, 16, v0
; GFX1100-NEXT: s_setpc_b64 s[30:31]
%bool = icmp eq i32 %bool.arg1, %bool.arg2
%y = select i1 %bool, bfloat 8.000000e+00, bfloat -4.000000e+00
@@ -1590,30 +2337,72 @@ define half @fmul_select_bf16_test7(bfloat %x, i32 %bool.arg1, i32 %bool.arg2) {
}
define bfloat @fmul_select_bf16_test8(bfloat %x, i32 %bool.arg1, i32 %bool.arg2) {
-; GFX9-LABEL: fmul_select_f16_test8:
+; GFX7-LABEL: fmul_select_bf16_test8:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
+; GFX7-NEXT: v_mul_f32_e32 v0, 1.0, v0
+; GFX7-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
+; GFX7-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
+; GFX7-NEXT: v_lshlrev_b32_e32 v1, 31, v1
+; GFX7-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX7-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: fmul_select_bf16_test8:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_mov_b32_e32 v3, 0x8000
; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
-; GFX9-NEXT: v_cndmask_b32_e32 v1, 0, v3, vcc
-; GFX9-NEXT: v_mul_f16_e32 v0, v0, v1
+; GFX9-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
+; GFX9-NEXT: v_mov_b32_e32 v2, 15
+; GFX9-NEXT: v_lshlrev_b16_sdwa v1, v2, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX9-NEXT: v_lshlrev_b32_e32 v0, 16, v0
+; GFX9-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX9-NEXT: v_bfe_u32 v1, v0, 16, 1
+; GFX9-NEXT: s_movk_i32 s4, 0x7fff
+; GFX9-NEXT: v_add3_u32 v1, v1, v0, s4
+; GFX9-NEXT: v_or_b32_e32 v2, 0x400000, v0
+; GFX9-NEXT: v_cmp_u_f32_e32 vcc, v0, v0
+; GFX9-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc
+; GFX9-NEXT: v_lshrrev_b32_e32 v0, 16, v0
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1030-LABEL: fmul_select_f16_test8:
+; GFX1030-LABEL: fmul_select_bf16_test8:
; GFX1030: ; %bb.0:
; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
-; GFX1030-NEXT: v_cndmask_b32_e64 v1, 0, 0x8000, vcc_lo
-; GFX1030-NEXT: v_mul_f16_e32 v0, v0, v1
+; GFX1030-NEXT: v_lshlrev_b32_e32 v0, 16, v0
+; GFX1030-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc_lo
+; GFX1030-NEXT: v_lshlrev_b16 v1, 15, v1
+; GFX1030-NEXT: v_lshlrev_b32_e32 v1, 16, v1
+; GFX1030-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX1030-NEXT: v_bfe_u32 v1, v0, 16, 1
+; GFX1030-NEXT: v_or_b32_e32 v2, 0x400000, v0
+; GFX1030-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0
+; GFX1030-NEXT: v_add3_u32 v1, v1, v0, 0x7fff
+; GFX1030-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc_lo
+; GFX1030-NEXT: v_lshrrev_b32_e32 v0, 16, v0
; GFX1030-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1100-LABEL: fmul_select_f16_test8:
+; GFX1100-LABEL: fmul_select_bf16_test8:
; GFX1100: ; %bb.0:
; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
-; GFX1100-NEXT: v_cndmask_b32_e64 v1, 0, 0x8000, vcc_lo
+; GFX1100-NEXT: v_lshlrev_b32_e32 v0, 16, v0
+; GFX1100-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc_lo
+; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-NEXT: v_lshlrev_b16 v1, 15, v1
+; GFX1100-NEXT: v_lshlrev_b32_e32 v1, 16, v1
+; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX1100-NEXT: v_bfe_u32 v1, v0, 16, 1
+; GFX1100-NEXT: v_or_b32_e32 v2, 0x400000, v0
+; GFX1100-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0
+; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-NEXT: v_add3_u32 v1, v1, v0, 0x7fff
+; GFX1100-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc_lo
; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1100-NEXT: v_mul_f16_e32 v0, v0, v1
+; GFX1100-NEXT: v_lshrrev_b32_e32 v0, 16, v0
; GFX1100-NEXT: s_setpc_b64 s[30:31]
%bool = icmp eq i32 %bool.arg1, %bool.arg2
%y = select i1 %bool, bfloat -0.000000e+00, bfloat 0.000000e+00
@@ -1622,33 +2411,73 @@ define bfloat @fmul_select_bf16_test8(bfloat %x, i32 %bool.arg1, i32 %bool.arg2)
}
define bfloat @fmul_select_bf16_test9(bfloat %x, i32 %bool.arg1, i32 %bool.arg2) {
-; GFX9-LABEL: fmul_select_f16_test9:
+; GFX7-LABEL: fmul_select_bf16_test9:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: v_mul_f32_e32 v0, 1.0, v0
+; GFX7-NEXT: v_mov_b32_e32 v3, 0xc2000000
+; GFX7-NEXT: v_mov_b32_e32 v4, 0xc1800000
+; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
+; GFX7-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc
+; GFX7-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
+; GFX7-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX7-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: fmul_select_bf16_test9:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_mov_b32_e32 v3, 0xd000
-; GFX9-NEXT: v_mov_b32_e32 v4, 0xcc00
+; GFX9-NEXT: v_mov_b32_e32 v3, 0xffffc200
+; GFX9-NEXT: v_mov_b32_e32 v4, 0xffffc180
; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc
-; GFX9-NEXT: v_mul_f16_e32 v0, v0, v1
+; GFX9-NEXT: v_lshlrev_b32_e32 v1, 16, v1
+; GFX9-NEXT: v_lshlrev_b32_e32 v0, 16, v0
+; GFX9-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX9-NEXT: v_bfe_u32 v1, v0, 16, 1
+; GFX9-NEXT: s_movk_i32 s4, 0x7fff
+; GFX9-NEXT: v_add3_u32 v1, v1, v0, s4
+; GFX9-NEXT: v_or_b32_e32 v2, 0x400000, v0
+; GFX9-NEXT: v_cmp_u_f32_e32 vcc, v0, v0
+; GFX9-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc
+; GFX9-NEXT: v_lshrrev_b32_e32 v0, 16, v0
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1030-LABEL: fmul_select_f16_test9:
+; GFX1030-LABEL: fmul_select_bf16_test9:
; GFX1030: ; %bb.0:
; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1030-NEXT: v_mov_b32_e32 v3, 0xcc00
+; GFX1030-NEXT: v_mov_b32_e32 v3, 0xffffc180
; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
-; GFX1030-NEXT: v_cndmask_b32_e32 v1, 0xd000, v3, vcc_lo
-; GFX1030-NEXT: v_mul_f16_e32 v0, v0, v1
+; GFX1030-NEXT: v_lshlrev_b32_e32 v0, 16, v0
+; GFX1030-NEXT: v_cndmask_b32_e32 v1, 0xffffc200, v3, vcc_lo
+; GFX1030-NEXT: v_lshlrev_b32_e32 v1, 16, v1
+; GFX1030-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX1030-NEXT: v_bfe_u32 v1, v0, 16, 1
+; GFX1030-NEXT: v_or_b32_e32 v2, 0x400000, v0
+; GFX1030-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0
+; GFX1030-NEXT: v_add3_u32 v1, v1, v0, 0x7fff
+; GFX1030-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc_lo
+; GFX1030-NEXT: v_lshrrev_b32_e32 v0, 16, v0
; GFX1030-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1100-LABEL: fmul_select_f16_test9:
+; GFX1100-LABEL: fmul_select_bf16_test9:
; GFX1100: ; %bb.0:
; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-NEXT: v_mov_b32_e32 v3, 0xcc00
+; GFX1100-NEXT: v_dual_mov_b32 v3, 0xffffc180 :: v_dual_lshlrev_b32 v0, 16, v0
; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1100-NEXT: v_cndmask_b32_e32 v1, 0xd000, v3, vcc_lo
-; GFX1100-NEXT: v_mul_f16_e32 v0, v0, v1
+; GFX1100-NEXT: v_cndmask_b32_e32 v1, 0xffffc200, v3, vcc_lo
+; GFX1100-NEXT: v_lshlrev_b32_e32 v1, 16, v1
+; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX1100-NEXT: v_bfe_u32 v1, v0, 16, 1
+; GFX1100-NEXT: v_or_b32_e32 v2, 0x400000, v0
+; GFX1100-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0
+; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-NEXT: v_add3_u32 v1, v1, v0, 0x7fff
+; GFX1100-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc_lo
+; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1100-NEXT: v_lshrrev_b32_e32 v0, 16, v0
; GFX1100-NEXT: s_setpc_b64 s[30:31]
%bool = icmp eq i32 %bool.arg1, %bool.arg2
%y = select i1 %bool, bfloat -1.600000e+01, bfloat -3.200000e+01
@@ -1657,71 +2486,151 @@ define bfloat @fmul_select_bf16_test9(bfloat %x, i32 %bool.arg1, i32 %bool.arg2)
}
define bfloat @fmul_select_bf16_test10(bfloat %x, i32 %bool.arg1, i32 %bool.arg2) {
-; GFX9-LABEL: fmul_select_f16_test9:
+; GFX7-LABEL: fmul_select_bf16_test10:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: v_mul_f32_e32 v0, 1.0, v0
+; GFX7-NEXT: v_mov_b32_e32 v3, 0xdb800000
+; GFX7-NEXT: v_bfrev_b32_e32 v4, 7
+; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
+; GFX7-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc
+; GFX7-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
+; GFX7-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX7-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: fmul_select_bf16_test10:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_mov_b32_e32 v3, 0xd000
-; GFX9-NEXT: v_mov_b32_e32 v4, 0xcc00
+; GFX9-NEXT: v_mov_b32_e32 v3, 0xffffdb80
+; GFX9-NEXT: v_mov_b32_e32 v4, 0xffffe000
; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc
-; GFX9-NEXT: v_mul_f16_e32 v0, v0, v1
+; GFX9-NEXT: v_lshlrev_b32_e32 v1, 16, v1
+; GFX9-NEXT: v_lshlrev_b32_e32 v0, 16, v0
+; GFX9-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX9-NEXT: v_bfe_u32 v1, v0, 16, 1
+; GFX9-NEXT: s_movk_i32 s4, 0x7fff
+; GFX9-NEXT: v_add3_u32 v1, v1, v0, s4
+; GFX9-NEXT: v_or_b32_e32 v2, 0x400000, v0
+; GFX9-NEXT: v_cmp_u_f32_e32 vcc, v0, v0
+; GFX9-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc
+; GFX9-NEXT: v_lshrrev_b32_e32 v0, 16, v0
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1030-LABEL: fmul_select_f16_test9:
+; GFX1030-LABEL: fmul_select_bf16_test10:
; GFX1030: ; %bb.0:
; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1030-NEXT: v_mov_b32_e32 v3, 0xcc00
+; GFX1030-NEXT: v_mov_b32_e32 v3, 0xffffe000
; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
-; GFX1030-NEXT: v_cndmask_b32_e32 v1, 0xd000, v3, vcc_lo
-; GFX1030-NEXT: v_mul_f16_e32 v0, v0, v1
+; GFX1030-NEXT: v_lshlrev_b32_e32 v0, 16, v0
+; GFX1030-NEXT: v_cndmask_b32_e32 v1, 0xffffdb80, v3, vcc_lo
+; GFX1030-NEXT: v_lshlrev_b32_e32 v1, 16, v1
+; GFX1030-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX1030-NEXT: v_bfe_u32 v1, v0, 16, 1
+; GFX1030-NEXT: v_or_b32_e32 v2, 0x400000, v0
+; GFX1030-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0
+; GFX1030-NEXT: v_add3_u32 v1, v1, v0, 0x7fff
+; GFX1030-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc_lo
+; GFX1030-NEXT: v_lshrrev_b32_e32 v0, 16, v0
; GFX1030-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1100-LABEL: fmul_select_f16_test9:
+; GFX1100-LABEL: fmul_select_bf16_test10:
; GFX1100: ; %bb.0:
; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-NEXT: v_mov_b32_e32 v3, 0xcc00
+; GFX1100-NEXT: v_dual_mov_b32 v3, 0xffffe000 :: v_dual_lshlrev_b32 v0, 16, v0
; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1100-NEXT: v_cndmask_b32_e32 v1, 0xd000, v3, vcc_lo
-; GFX1100-NEXT: v_mul_f16_e32 v0, v0, v1
+; GFX1100-NEXT: v_cndmask_b32_e32 v1, 0xffffdb80, v3, vcc_lo
+; GFX1100-NEXT: v_lshlrev_b32_e32 v1, 16, v1
+; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX1100-NEXT: v_bfe_u32 v1, v0, 16, 1
+; GFX1100-NEXT: v_or_b32_e32 v2, 0x400000, v0
+; GFX1100-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0
+; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-NEXT: v_add3_u32 v1, v1, v0, 0x7fff
+; GFX1100-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc_lo
+; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1100-NEXT: v_lshrrev_b32_e32 v0, 16, v0
; GFX1100-NEXT: s_setpc_b64 s[30:31]
%bool = icmp eq i32 %bool.arg1, %bool.arg2
- %y = select i1 %bool, bfloat -3.6893488e+19, bfloat -7.2057594e+16
+ %y = select i1 %bool, bfloat 0xRE000, bfloat 0xRDB80
%ldexp = fmul bfloat %x, %y
ret bfloat %ldexp
}
define bfloat @fmul_select_bf16_test11(bfloat %x, i32 %bool.arg1, i32 %bool.arg2) {
-; GFX9-LABEL: fmul_select_f16_test9:
+; GFX7-LABEL: fmul_select_bf16_test11:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: v_mul_f32_e32 v0, 1.0, v0
+; GFX7-NEXT: v_bfrev_b32_e32 v3, 50
+; GFX7-NEXT: v_mov_b32_e32 v4, 0x34800000
+; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
+; GFX7-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc
+; GFX7-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
+; GFX7-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX7-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: fmul_select_bf16_test11:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_mov_b32_e32 v3, 0xd000
-; GFX9-NEXT: v_mov_b32_e32 v4, 0xcc00
+; GFX9-NEXT: v_mov_b32_e32 v3, 0x4c00
+; GFX9-NEXT: v_mov_b32_e32 v4, 0x3480
; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc
-; GFX9-NEXT: v_mul_f16_e32 v0, v0, v1
+; GFX9-NEXT: v_lshlrev_b32_e32 v1, 16, v1
+; GFX9-NEXT: v_lshlrev_b32_e32 v0, 16, v0
+; GFX9-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX9-NEXT: v_bfe_u32 v1, v0, 16, 1
+; GFX9-NEXT: s_movk_i32 s4, 0x7fff
+; GFX9-NEXT: v_add3_u32 v1, v1, v0, s4
+; GFX9-NEXT: v_or_b32_e32 v2, 0x400000, v0
+; GFX9-NEXT: v_cmp_u_f32_e32 vcc, v0, v0
+; GFX9-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc
+; GFX9-NEXT: v_lshrrev_b32_e32 v0, 16, v0
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1030-LABEL: fmul_select_f16_test9:
+; GFX1030-LABEL: fmul_select_bf16_test11:
; GFX1030: ; %bb.0:
; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1030-NEXT: v_mov_b32_e32 v3, 0xcc00
+; GFX1030-NEXT: v_mov_b32_e32 v3, 0x3480
; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
-; GFX1030-NEXT: v_cndmask_b32_e32 v1, 0xd000, v3, vcc_lo
-; GFX1030-NEXT: v_mul_f16_e32 v0, v0, v1
+; GFX1030-NEXT: v_lshlrev_b32_e32 v0, 16, v0
+; GFX1030-NEXT: v_cndmask_b32_e32 v1, 0x4c00, v3, vcc_lo
+; GFX1030-NEXT: v_lshlrev_b32_e32 v1, 16, v1
+; GFX1030-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX1030-NEXT: v_bfe_u32 v1, v0, 16, 1
+; GFX1030-NEXT: v_or_b32_e32 v2, 0x400000, v0
+; GFX1030-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0
+; GFX1030-NEXT: v_add3_u32 v1, v1, v0, 0x7fff
+; GFX1030-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc_lo
+; GFX1030-NEXT: v_lshrrev_b32_e32 v0, 16, v0
; GFX1030-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1100-LABEL: fmul_select_f16_test9:
+; GFX1100-LABEL: fmul_select_bf16_test11:
; GFX1100: ; %bb.0:
; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-NEXT: v_mov_b32_e32 v3, 0xcc00
+; GFX1100-NEXT: v_dual_mov_b32 v3, 0x3480 :: v_dual_lshlrev_b32 v0, 16, v0
; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1100-NEXT: v_cndmask_b32_e32 v1, 0xd000, v3, vcc_lo
-; GFX1100-NEXT: v_mul_f16_e32 v0, v0, v1
+; GFX1100-NEXT: v_cndmask_b32_e32 v1, 0x4c00, v3, vcc_lo
+; GFX1100-NEXT: v_lshlrev_b32_e32 v1, 16, v1
+; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX1100-NEXT: v_bfe_u32 v1, v0, 16, 1
+; GFX1100-NEXT: v_or_b32_e32 v2, 0x400000, v0
+; GFX1100-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0
+; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-NEXT: v_add3_u32 v1, v1, v0, 0x7fff
+; GFX1100-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc_lo
+; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1100-NEXT: v_lshrrev_b32_e32 v0, 16, v0
; GFX1100-NEXT: s_setpc_b64 s[30:31]
%bool = icmp eq i32 %bool.arg1, %bool.arg2
- %y = select i1 %bool, bfloat 2.3841857e-07, bfloat 3.3554432e+01
+ %y = select i1 %bool, bfloat 0xR3480, bfloat 0xR4C00
%ldexp = fmul bfloat %x, %y
ret bfloat %ldexp
}
More information about the llvm-commits
mailing list