[llvm] 79707ba - AMDGPU: Add baseline test for gfx8 fptrunc combine

Matt Arsenault via llvm-commits llvm-commits at lists.llvm.org
Thu May 18 11:45:05 PDT 2023


Author: Matt Arsenault
Date: 2023-05-18T19:44:56+01:00
New Revision: 79707ba0dd59bde3e5bbf5db068d43be0c8def84

URL: https://github.com/llvm/llvm-project/commit/79707ba0dd59bde3e5bbf5db068d43be0c8def84
DIFF: https://github.com/llvm/llvm-project/commit/79707ba0dd59bde3e5bbf5db068d43be0c8def84.diff

LOG: AMDGPU: Add baseline test for gfx8 fptrunc combine

Added: 
    llvm/test/CodeGen/AMDGPU/fmed3-cast-combine.ll

Modified: 
    

Removed: 
    


################################################################################
diff  --git a/llvm/test/CodeGen/AMDGPU/fmed3-cast-combine.ll b/llvm/test/CodeGen/AMDGPU/fmed3-cast-combine.ll
new file mode 100644
index 0000000000000..9ecd6e26803aa
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/fmed3-cast-combine.ll
@@ -0,0 +1,1372 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
+; Test no legal f16. Should just keep the cast to f32 and
+; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=kaveri < %s | FileCheck -check-prefixes=GCN,GFX7,GFX7-SDAG %s
+; RUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=kaveri < %s | FileCheck -check-prefixes=GCN,GFX7,GFX7-GISEL %s
+
+; Test legal f16, no f16 fmed3. Should expand to min/max sequence
+; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=fiji < %s | FileCheck -check-prefixes=GCN,GFX8,GFX8-SDAG %s
+; RUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=fiji < %s | FileCheck -check-prefixes=GCN,GFX8,GFX8-GISEL %s
+
+; Legal f16 med3. InstCombine ought to shrink the f32 op to f16.
+; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9,GFX9-SDAG %s
+; RUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9,GFX9-GISEL %s
+
+
+declare float @llvm.amdgcn.fmed3.f32(float, float, float) #0
+declare float @llvm.fabs.f32(float) #0
+declare half @llvm.fabs.f16(half) #0
+
+define half @fmed3_f32_fpext_f16(half %arg0, half %arg1, half %arg2) #1 {
+; GFX7-SDAG-LABEL: fmed3_f32_fpext_f16:
+; GFX7-SDAG:       ; %bb.0:
+; GFX7-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-SDAG-NEXT:    v_med3_f32 v0, v0, v1, v2
+; GFX7-SDAG-NEXT:    v_cvt_f16_f32_e32 v0, v0
+; GFX7-SDAG-NEXT:    v_cvt_f32_f16_e32 v0, v0
+; GFX7-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX7-GISEL-LABEL: fmed3_f32_fpext_f16:
+; GFX7-GISEL:       ; %bb.0:
+; GFX7-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-GISEL-NEXT:    v_cvt_f32_f16_e32 v0, v0
+; GFX7-GISEL-NEXT:    v_cvt_f32_f16_e32 v1, v1
+; GFX7-GISEL-NEXT:    v_cvt_f32_f16_e32 v2, v2
+; GFX7-GISEL-NEXT:    v_med3_f32 v0, v0, v1, v2
+; GFX7-GISEL-NEXT:    v_cvt_f16_f32_e32 v0, v0
+; GFX7-GISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX8-LABEL: fmed3_f32_fpext_f16:
+; GFX8:       ; %bb.0:
+; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT:    v_cvt_f32_f16_e32 v0, v0
+; GFX8-NEXT:    v_cvt_f32_f16_e32 v1, v1
+; GFX8-NEXT:    v_cvt_f32_f16_e32 v2, v2
+; GFX8-NEXT:    v_med3_f32 v0, v0, v1, v2
+; GFX8-NEXT:    v_cvt_f16_f32_e32 v0, v0
+; GFX8-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: fmed3_f32_fpext_f16:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT:    v_cvt_f32_f16_e32 v0, v0
+; GFX9-NEXT:    v_cvt_f32_f16_e32 v1, v1
+; GFX9-NEXT:    v_cvt_f32_f16_e32 v2, v2
+; GFX9-NEXT:    v_med3_f32 v0, v0, v1, v2
+; GFX9-NEXT:    v_cvt_f16_f32_e32 v0, v0
+; GFX9-NEXT:    s_setpc_b64 s[30:31]
+  %arg0.ext = fpext half %arg0 to float
+  %arg1.ext = fpext half %arg1 to float
+  %arg2.ext = fpext half %arg2 to float
+  %med3 = call float @llvm.amdgcn.fmed3.f32(float %arg0.ext, float %arg1.ext, float %arg2.ext)
+  %cast  = fptrunc float %med3 to half
+  ret half %cast
+}
+
+define half @fmed3_f32_fpext_f16_flags(half %arg0, half %arg1, half %arg2) #1 {
+; GFX7-SDAG-LABEL: fmed3_f32_fpext_f16_flags:
+; GFX7-SDAG:       ; %bb.0:
+; GFX7-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-SDAG-NEXT:    v_med3_f32 v0, v0, v1, v2
+; GFX7-SDAG-NEXT:    v_cvt_f16_f32_e32 v0, v0
+; GFX7-SDAG-NEXT:    v_cvt_f32_f16_e32 v0, v0
+; GFX7-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX7-GISEL-LABEL: fmed3_f32_fpext_f16_flags:
+; GFX7-GISEL:       ; %bb.0:
+; GFX7-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-GISEL-NEXT:    v_cvt_f32_f16_e32 v0, v0
+; GFX7-GISEL-NEXT:    v_cvt_f32_f16_e32 v1, v1
+; GFX7-GISEL-NEXT:    v_cvt_f32_f16_e32 v2, v2
+; GFX7-GISEL-NEXT:    v_med3_f32 v0, v0, v1, v2
+; GFX7-GISEL-NEXT:    v_cvt_f16_f32_e32 v0, v0
+; GFX7-GISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX8-LABEL: fmed3_f32_fpext_f16_flags:
+; GFX8:       ; %bb.0:
+; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT:    v_cvt_f32_f16_e32 v0, v0
+; GFX8-NEXT:    v_cvt_f32_f16_e32 v1, v1
+; GFX8-NEXT:    v_cvt_f32_f16_e32 v2, v2
+; GFX8-NEXT:    v_med3_f32 v0, v0, v1, v2
+; GFX8-NEXT:    v_cvt_f16_f32_e32 v0, v0
+; GFX8-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: fmed3_f32_fpext_f16_flags:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT:    v_cvt_f32_f16_e32 v0, v0
+; GFX9-NEXT:    v_cvt_f32_f16_e32 v1, v1
+; GFX9-NEXT:    v_cvt_f32_f16_e32 v2, v2
+; GFX9-NEXT:    v_med3_f32 v0, v0, v1, v2
+; GFX9-NEXT:    v_cvt_f16_f32_e32 v0, v0
+; GFX9-NEXT:    s_setpc_b64 s[30:31]
+  %arg0.ext = fpext half %arg0 to float
+  %arg1.ext = fpext half %arg1 to float
+  %arg2.ext = fpext half %arg2 to float
+  %med3 = call nsz float @llvm.amdgcn.fmed3.f32(float %arg0.ext, float %arg1.ext, float %arg2.ext)
+  %cast = fptrunc float %med3 to half
+  ret half %cast
+}
+
+define half @fmed3_f32_fpext_f16_multi_use(half %arg0, half %arg1, half %arg2, ptr addrspace(1) %ptr) #1 {
+; GFX7-SDAG-LABEL: fmed3_f32_fpext_f16_multi_use:
+; GFX7-SDAG:       ; %bb.0:
+; GFX7-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-SDAG-NEXT:    v_med3_f32 v1, v0, v1, v2
+; GFX7-SDAG-NEXT:    v_cvt_f16_f32_e32 v0, v1
+; GFX7-SDAG-NEXT:    flat_store_dword v[3:4], v1
+; GFX7-SDAG-NEXT:    v_cvt_f32_f16_e32 v0, v0
+; GFX7-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX7-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX7-GISEL-LABEL: fmed3_f32_fpext_f16_multi_use:
+; GFX7-GISEL:       ; %bb.0:
+; GFX7-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-GISEL-NEXT:    v_cvt_f32_f16_e32 v0, v0
+; GFX7-GISEL-NEXT:    v_cvt_f32_f16_e32 v1, v1
+; GFX7-GISEL-NEXT:    v_cvt_f32_f16_e32 v2, v2
+; GFX7-GISEL-NEXT:    v_med3_f32 v1, v0, v1, v2
+; GFX7-GISEL-NEXT:    v_cvt_f16_f32_e32 v0, v1
+; GFX7-GISEL-NEXT:    flat_store_dword v[3:4], v1
+; GFX7-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX7-GISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX8-LABEL: fmed3_f32_fpext_f16_multi_use:
+; GFX8:       ; %bb.0:
+; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT:    v_cvt_f32_f16_e32 v0, v0
+; GFX8-NEXT:    v_cvt_f32_f16_e32 v1, v1
+; GFX8-NEXT:    v_cvt_f32_f16_e32 v2, v2
+; GFX8-NEXT:    v_med3_f32 v1, v0, v1, v2
+; GFX8-NEXT:    v_cvt_f16_f32_e32 v0, v1
+; GFX8-NEXT:    flat_store_dword v[3:4], v1
+; GFX8-NEXT:    s_waitcnt vmcnt(0)
+; GFX8-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: fmed3_f32_fpext_f16_multi_use:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT:    v_cvt_f32_f16_e32 v0, v0
+; GFX9-NEXT:    v_cvt_f32_f16_e32 v1, v1
+; GFX9-NEXT:    v_cvt_f32_f16_e32 v2, v2
+; GFX9-NEXT:    v_med3_f32 v1, v0, v1, v2
+; GFX9-NEXT:    v_cvt_f16_f32_e32 v0, v1
+; GFX9-NEXT:    global_store_dword v[3:4], v1, off
+; GFX9-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-NEXT:    s_setpc_b64 s[30:31]
+  %arg0.ext = fpext half %arg0 to float
+  %arg1.ext = fpext half %arg1 to float
+  %arg2.ext = fpext half %arg2 to float
+  %med3 = call float @llvm.amdgcn.fmed3.f32(float %arg0.ext, float %arg1.ext, float %arg2.ext)
+  store float %med3, ptr addrspace(1) %ptr
+  %cast = fptrunc float %med3 to half
+  ret half %cast
+}
+
+define half @fmed3_f32_fpext_f16_k0(half %arg1, half %arg2) #1 {
+; GFX7-SDAG-LABEL: fmed3_f32_fpext_f16_k0:
+; GFX7-SDAG:       ; %bb.0:
+; GFX7-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-SDAG-NEXT:    v_med3_f32 v0, 2.0, v0, v1
+; GFX7-SDAG-NEXT:    v_cvt_f16_f32_e32 v0, v0
+; GFX7-SDAG-NEXT:    v_cvt_f32_f16_e32 v0, v0
+; GFX7-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX7-GISEL-LABEL: fmed3_f32_fpext_f16_k0:
+; GFX7-GISEL:       ; %bb.0:
+; GFX7-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-GISEL-NEXT:    v_cvt_f32_f16_e32 v0, v0
+; GFX7-GISEL-NEXT:    v_cvt_f32_f16_e32 v1, v1
+; GFX7-GISEL-NEXT:    v_med3_f32 v0, 2.0, v0, v1
+; GFX7-GISEL-NEXT:    v_cvt_f16_f32_e32 v0, v0
+; GFX7-GISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX8-LABEL: fmed3_f32_fpext_f16_k0:
+; GFX8:       ; %bb.0:
+; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT:    v_cvt_f32_f16_e32 v0, v0
+; GFX8-NEXT:    v_cvt_f32_f16_e32 v1, v1
+; GFX8-NEXT:    v_med3_f32 v0, 2.0, v0, v1
+; GFX8-NEXT:    v_cvt_f16_f32_e32 v0, v0
+; GFX8-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: fmed3_f32_fpext_f16_k0:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT:    v_cvt_f32_f16_e32 v0, v0
+; GFX9-NEXT:    v_cvt_f32_f16_e32 v1, v1
+; GFX9-NEXT:    v_med3_f32 v0, 2.0, v0, v1
+; GFX9-NEXT:    v_cvt_f16_f32_e32 v0, v0
+; GFX9-NEXT:    s_setpc_b64 s[30:31]
+  %arg1.ext = fpext half %arg1 to float
+  %arg2.ext = fpext half %arg2 to float
+  %med3 = call float @llvm.amdgcn.fmed3.f32(float 2.0, float %arg1.ext, float %arg2.ext)
+  %cast = fptrunc float %med3 to half
+  ret half %cast
+}
+
+define half @fmed3_f32_fpext_f16_k1(half %arg0, half %arg2) #1 {
+; GFX7-SDAG-LABEL: fmed3_f32_fpext_f16_k1:
+; GFX7-SDAG:       ; %bb.0:
+; GFX7-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-SDAG-NEXT:    v_med3_f32 v0, v0, 2.0, v1
+; GFX7-SDAG-NEXT:    v_cvt_f16_f32_e32 v0, v0
+; GFX7-SDAG-NEXT:    v_cvt_f32_f16_e32 v0, v0
+; GFX7-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX7-GISEL-LABEL: fmed3_f32_fpext_f16_k1:
+; GFX7-GISEL:       ; %bb.0:
+; GFX7-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-GISEL-NEXT:    v_cvt_f32_f16_e32 v0, v0
+; GFX7-GISEL-NEXT:    v_cvt_f32_f16_e32 v1, v1
+; GFX7-GISEL-NEXT:    v_med3_f32 v0, v0, 2.0, v1
+; GFX7-GISEL-NEXT:    v_cvt_f16_f32_e32 v0, v0
+; GFX7-GISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX8-LABEL: fmed3_f32_fpext_f16_k1:
+; GFX8:       ; %bb.0:
+; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT:    v_cvt_f32_f16_e32 v0, v0
+; GFX8-NEXT:    v_cvt_f32_f16_e32 v1, v1
+; GFX8-NEXT:    v_med3_f32 v0, v0, 2.0, v1
+; GFX8-NEXT:    v_cvt_f16_f32_e32 v0, v0
+; GFX8-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: fmed3_f32_fpext_f16_k1:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT:    v_cvt_f32_f16_e32 v0, v0
+; GFX9-NEXT:    v_cvt_f32_f16_e32 v1, v1
+; GFX9-NEXT:    v_med3_f32 v0, v0, 2.0, v1
+; GFX9-NEXT:    v_cvt_f16_f32_e32 v0, v0
+; GFX9-NEXT:    s_setpc_b64 s[30:31]
+  %arg0.ext = fpext half %arg0 to float
+  %arg2.ext = fpext half %arg2 to float
+  %med3 = call float @llvm.amdgcn.fmed3.f32(float %arg0.ext, float 2.0, float %arg2.ext)
+  %cast = fptrunc float %med3 to half
+  ret half %cast
+}
+
+define half @fmed3_f32_fpext_f16_k2(half %arg0, half %arg1) #1 {
+; GFX7-SDAG-LABEL: fmed3_f32_fpext_f16_k2:
+; GFX7-SDAG:       ; %bb.0:
+; GFX7-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-SDAG-NEXT:    v_med3_f32 v0, v0, v1, 2.0
+; GFX7-SDAG-NEXT:    v_cvt_f16_f32_e32 v0, v0
+; GFX7-SDAG-NEXT:    v_cvt_f32_f16_e32 v0, v0
+; GFX7-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX7-GISEL-LABEL: fmed3_f32_fpext_f16_k2:
+; GFX7-GISEL:       ; %bb.0:
+; GFX7-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-GISEL-NEXT:    v_cvt_f32_f16_e32 v0, v0
+; GFX7-GISEL-NEXT:    v_cvt_f32_f16_e32 v1, v1
+; GFX7-GISEL-NEXT:    v_med3_f32 v0, v0, v1, 2.0
+; GFX7-GISEL-NEXT:    v_cvt_f16_f32_e32 v0, v0
+; GFX7-GISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX8-LABEL: fmed3_f32_fpext_f16_k2:
+; GFX8:       ; %bb.0:
+; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT:    v_cvt_f32_f16_e32 v0, v0
+; GFX8-NEXT:    v_cvt_f32_f16_e32 v1, v1
+; GFX8-NEXT:    v_med3_f32 v0, v0, v1, 2.0
+; GFX8-NEXT:    v_cvt_f16_f32_e32 v0, v0
+; GFX8-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: fmed3_f32_fpext_f16_k2:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT:    v_cvt_f32_f16_e32 v0, v0
+; GFX9-NEXT:    v_cvt_f32_f16_e32 v1, v1
+; GFX9-NEXT:    v_med3_f32 v0, v0, v1, 2.0
+; GFX9-NEXT:    v_cvt_f16_f32_e32 v0, v0
+; GFX9-NEXT:    s_setpc_b64 s[30:31]
+  %arg0.ext = fpext half %arg0 to float
+  %arg1.ext = fpext half %arg1 to float
+  %med3 = call float @llvm.amdgcn.fmed3.f32(float %arg0.ext, float %arg1.ext, float 2.0)
+  %cast = fptrunc float %med3 to half
+  ret half %cast
+}
+
+define half @fmed3_f32_fpext_f16_k0_k1(half %arg2) #1 {
+; GFX7-SDAG-LABEL: fmed3_f32_fpext_f16_k0_k1:
+; GFX7-SDAG:       ; %bb.0:
+; GFX7-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-SDAG-NEXT:    v_mov_b32_e32 v1, 0x41800000
+; GFX7-SDAG-NEXT:    v_med3_f32 v0, 0, v1, v0
+; GFX7-SDAG-NEXT:    v_cvt_f16_f32_e32 v0, v0
+; GFX7-SDAG-NEXT:    v_cvt_f32_f16_e32 v0, v0
+; GFX7-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX7-GISEL-LABEL: fmed3_f32_fpext_f16_k0_k1:
+; GFX7-GISEL:       ; %bb.0:
+; GFX7-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-GISEL-NEXT:    v_cvt_f32_f16_e32 v0, v0
+; GFX7-GISEL-NEXT:    v_mov_b32_e32 v1, 0x41800000
+; GFX7-GISEL-NEXT:    v_med3_f32 v0, 0, v1, v0
+; GFX7-GISEL-NEXT:    v_cvt_f16_f32_e32 v0, v0
+; GFX7-GISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX8-LABEL: fmed3_f32_fpext_f16_k0_k1:
+; GFX8:       ; %bb.0:
+; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT:    v_cvt_f32_f16_e32 v0, v0
+; GFX8-NEXT:    v_mov_b32_e32 v1, 0x41800000
+; GFX8-NEXT:    v_med3_f32 v0, 0, v1, v0
+; GFX8-NEXT:    v_cvt_f16_f32_e32 v0, v0
+; GFX8-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: fmed3_f32_fpext_f16_k0_k1:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT:    v_cvt_f32_f16_e32 v0, v0
+; GFX9-NEXT:    v_mov_b32_e32 v1, 0x41800000
+; GFX9-NEXT:    v_med3_f32 v0, 0, v1, v0
+; GFX9-NEXT:    v_cvt_f16_f32_e32 v0, v0
+; GFX9-NEXT:    s_setpc_b64 s[30:31]
+  %arg2.ext = fpext half %arg2 to float
+  %med3 = call float @llvm.amdgcn.fmed3.f32(float 0.0, float 16.0, float %arg2.ext)
+  %cast = fptrunc float %med3 to half
+  ret half %cast
+}
+
+define half @fmed3_f32_fpext_f16_k0_k2(half %arg1) #1 {
+; GFX7-SDAG-LABEL: fmed3_f32_fpext_f16_k0_k2:
+; GFX7-SDAG:       ; %bb.0:
+; GFX7-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-SDAG-NEXT:    v_med3_f32 v0, 0, v0, 2.0
+; GFX7-SDAG-NEXT:    v_cvt_f16_f32_e32 v0, v0
+; GFX7-SDAG-NEXT:    v_cvt_f32_f16_e32 v0, v0
+; GFX7-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX7-GISEL-LABEL: fmed3_f32_fpext_f16_k0_k2:
+; GFX7-GISEL:       ; %bb.0:
+; GFX7-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-GISEL-NEXT:    v_cvt_f32_f16_e32 v0, v0
+; GFX7-GISEL-NEXT:    v_med3_f32 v0, 0, v0, 2.0
+; GFX7-GISEL-NEXT:    v_cvt_f16_f32_e32 v0, v0
+; GFX7-GISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX8-LABEL: fmed3_f32_fpext_f16_k0_k2:
+; GFX8:       ; %bb.0:
+; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT:    v_cvt_f32_f16_e32 v0, v0
+; GFX8-NEXT:    v_med3_f32 v0, 0, v0, 2.0
+; GFX8-NEXT:    v_cvt_f16_f32_e32 v0, v0
+; GFX8-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: fmed3_f32_fpext_f16_k0_k2:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT:    v_cvt_f32_f16_e32 v0, v0
+; GFX9-NEXT:    v_med3_f32 v0, 0, v0, 2.0
+; GFX9-NEXT:    v_cvt_f16_f32_e32 v0, v0
+; GFX9-NEXT:    s_setpc_b64 s[30:31]
+  %arg1.ext = fpext half %arg1 to float
+  %med3 = call float @llvm.amdgcn.fmed3.f32(float 0.0, float %arg1.ext, float 2.0)
+  %cast = fptrunc float %med3 to half
+  ret half %cast
+}
+
+define half @fmed3_f32_fpext_f16_fabs(half %arg0, half %arg1, half %arg2) #1 {
+; GFX7-SDAG-LABEL: fmed3_f32_fpext_f16_fabs:
+; GFX7-SDAG:       ; %bb.0:
+; GFX7-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-SDAG-NEXT:    v_cvt_f16_f32_e32 v0, v0
+; GFX7-SDAG-NEXT:    v_cvt_f16_f32_e32 v1, v1
+; GFX7-SDAG-NEXT:    v_cvt_f16_f32_e32 v2, v2
+; GFX7-SDAG-NEXT:    v_cvt_f32_f16_e64 v0, |v0|
+; GFX7-SDAG-NEXT:    v_cvt_f32_f16_e64 v1, |v1|
+; GFX7-SDAG-NEXT:    v_cvt_f32_f16_e64 v2, |v2|
+; GFX7-SDAG-NEXT:    v_med3_f32 v0, v0, v1, v2
+; GFX7-SDAG-NEXT:    v_cvt_f16_f32_e32 v0, v0
+; GFX7-SDAG-NEXT:    v_cvt_f32_f16_e32 v0, v0
+; GFX7-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX7-GISEL-LABEL: fmed3_f32_fpext_f16_fabs:
+; GFX7-GISEL:       ; %bb.0:
+; GFX7-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-GISEL-NEXT:    v_cvt_f32_f16_e64 v0, |v0|
+; GFX7-GISEL-NEXT:    v_cvt_f32_f16_e64 v1, |v1|
+; GFX7-GISEL-NEXT:    v_cvt_f32_f16_e64 v2, |v2|
+; GFX7-GISEL-NEXT:    v_med3_f32 v0, v0, v1, v2
+; GFX7-GISEL-NEXT:    v_cvt_f16_f32_e32 v0, v0
+; GFX7-GISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX8-LABEL: fmed3_f32_fpext_f16_fabs:
+; GFX8:       ; %bb.0:
+; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT:    v_cvt_f32_f16_e64 v0, |v0|
+; GFX8-NEXT:    v_cvt_f32_f16_e64 v1, |v1|
+; GFX8-NEXT:    v_cvt_f32_f16_e64 v2, |v2|
+; GFX8-NEXT:    v_med3_f32 v0, v0, v1, v2
+; GFX8-NEXT:    v_cvt_f16_f32_e32 v0, v0
+; GFX8-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: fmed3_f32_fpext_f16_fabs:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT:    v_cvt_f32_f16_e64 v0, |v0|
+; GFX9-NEXT:    v_cvt_f32_f16_e64 v1, |v1|
+; GFX9-NEXT:    v_cvt_f32_f16_e64 v2, |v2|
+; GFX9-NEXT:    v_med3_f32 v0, v0, v1, v2
+; GFX9-NEXT:    v_cvt_f16_f32_e32 v0, v0
+; GFX9-NEXT:    s_setpc_b64 s[30:31]
+  %fabs.arg0 = call half @llvm.fabs.f16(half %arg0)
+  %fabs.arg1 = call half @llvm.fabs.f16(half %arg1)
+  %fabs.arg2 = call half @llvm.fabs.f16(half %arg2)
+  %arg0.ext = fpext half %fabs.arg0 to float
+  %arg1.ext = fpext half %fabs.arg1 to float
+  %arg2.ext = fpext half %fabs.arg2 to float
+  %med3 = call float @llvm.amdgcn.fmed3.f32(float %arg0.ext, float %arg1.ext, float %arg2.ext)
+  %cast = fptrunc float %med3 to half
+  ret half %cast
+}
+
+define half @fmed3_fabs_f32_fpext_f16(half %arg0, half %arg1, half %arg2) #1 {
+; GFX7-SDAG-LABEL: fmed3_fabs_f32_fpext_f16:
+; GFX7-SDAG:       ; %bb.0:
+; GFX7-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-SDAG-NEXT:    v_med3_f32 v0, |v0|, |v1|, |v2|
+; GFX7-SDAG-NEXT:    v_cvt_f16_f32_e32 v0, v0
+; GFX7-SDAG-NEXT:    v_cvt_f32_f16_e32 v0, v0
+; GFX7-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX7-GISEL-LABEL: fmed3_fabs_f32_fpext_f16:
+; GFX7-GISEL:       ; %bb.0:
+; GFX7-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-GISEL-NEXT:    v_cvt_f32_f16_e32 v0, v0
+; GFX7-GISEL-NEXT:    v_cvt_f32_f16_e32 v1, v1
+; GFX7-GISEL-NEXT:    v_cvt_f32_f16_e32 v2, v2
+; GFX7-GISEL-NEXT:    v_med3_f32 v0, |v0|, |v1|, |v2|
+; GFX7-GISEL-NEXT:    v_cvt_f16_f32_e32 v0, v0
+; GFX7-GISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX8-LABEL: fmed3_fabs_f32_fpext_f16:
+; GFX8:       ; %bb.0:
+; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT:    v_cvt_f32_f16_e32 v0, v0
+; GFX8-NEXT:    v_cvt_f32_f16_e32 v1, v1
+; GFX8-NEXT:    v_cvt_f32_f16_e32 v2, v2
+; GFX8-NEXT:    v_med3_f32 v0, |v0|, |v1|, |v2|
+; GFX8-NEXT:    v_cvt_f16_f32_e32 v0, v0
+; GFX8-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: fmed3_fabs_f32_fpext_f16:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT:    v_cvt_f32_f16_e32 v0, v0
+; GFX9-NEXT:    v_cvt_f32_f16_e32 v1, v1
+; GFX9-NEXT:    v_cvt_f32_f16_e32 v2, v2
+; GFX9-NEXT:    v_med3_f32 v0, |v0|, |v1|, |v2|
+; GFX9-NEXT:    v_cvt_f16_f32_e32 v0, v0
+; GFX9-NEXT:    s_setpc_b64 s[30:31]
+  %arg0.ext = fpext half %arg0 to float
+  %arg1.ext = fpext half %arg1 to float
+  %arg2.ext = fpext half %arg2 to float
+  %fabs.ext.arg0 = call float @llvm.fabs.f32(float %arg0.ext)
+  %fabs.ext.arg1 = call float @llvm.fabs.f32(float %arg1.ext)
+  %fabs.ext.arg2 = call float @llvm.fabs.f32(float %arg2.ext)
+  %med3 = call float @llvm.amdgcn.fmed3.f32(float %fabs.ext.arg0, float %fabs.ext.arg1, float %fabs.ext.arg2)
+  %cast = fptrunc float %med3 to half
+  ret half %cast
+}
+
+define half @fmed3_f32_fpext_f16_fneg(half %arg0, half %arg1, half %arg2) #1 {
+; GFX7-SDAG-LABEL: fmed3_f32_fpext_f16_fneg:
+; GFX7-SDAG:       ; %bb.0:
+; GFX7-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-SDAG-NEXT:    v_med3_f32 v0, -v0, -v1, -v2
+; GFX7-SDAG-NEXT:    v_cvt_f16_f32_e32 v0, v0
+; GFX7-SDAG-NEXT:    v_cvt_f32_f16_e32 v0, v0
+; GFX7-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX7-GISEL-LABEL: fmed3_f32_fpext_f16_fneg:
+; GFX7-GISEL:       ; %bb.0:
+; GFX7-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-GISEL-NEXT:    v_cvt_f32_f16_e64 v0, -v0
+; GFX7-GISEL-NEXT:    v_cvt_f32_f16_e64 v1, -v1
+; GFX7-GISEL-NEXT:    v_cvt_f32_f16_e64 v2, -v2
+; GFX7-GISEL-NEXT:    v_med3_f32 v0, v0, v1, v2
+; GFX7-GISEL-NEXT:    v_cvt_f16_f32_e32 v0, v0
+; GFX7-GISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX8-LABEL: fmed3_f32_fpext_f16_fneg:
+; GFX8:       ; %bb.0:
+; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT:    v_cvt_f32_f16_e64 v0, -v0
+; GFX8-NEXT:    v_cvt_f32_f16_e64 v1, -v1
+; GFX8-NEXT:    v_cvt_f32_f16_e64 v2, -v2
+; GFX8-NEXT:    v_med3_f32 v0, v0, v1, v2
+; GFX8-NEXT:    v_cvt_f16_f32_e32 v0, v0
+; GFX8-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: fmed3_f32_fpext_f16_fneg:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT:    v_cvt_f32_f16_e64 v0, -v0
+; GFX9-NEXT:    v_cvt_f32_f16_e64 v1, -v1
+; GFX9-NEXT:    v_cvt_f32_f16_e64 v2, -v2
+; GFX9-NEXT:    v_med3_f32 v0, v0, v1, v2
+; GFX9-NEXT:    v_cvt_f16_f32_e32 v0, v0
+; GFX9-NEXT:    s_setpc_b64 s[30:31]
+  %fneg.arg0 = fneg half %arg0
+  %fneg.arg1 = fneg half %arg1
+  %fneg.arg2 = fneg half %arg2
+  %arg0.ext = fpext half %fneg.arg0 to float
+  %arg1.ext = fpext half %fneg.arg1 to float
+  %arg2.ext = fpext half %fneg.arg2 to float
+  %med3 = call float @llvm.amdgcn.fmed3.f32(float %arg0.ext, float %arg1.ext, float %arg2.ext)
+  %cast = fptrunc float %med3 to half
+  ret half %cast
+}
+
+define half @fmed3_fneg_f32_fpext_f16(half %arg0, half %arg1, half %arg2) #1 {
+; GFX7-SDAG-LABEL: fmed3_fneg_f32_fpext_f16:
+; GFX7-SDAG:       ; %bb.0:
+; GFX7-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-SDAG-NEXT:    v_med3_f32 v0, -v0, -v1, -v2
+; GFX7-SDAG-NEXT:    v_cvt_f16_f32_e32 v0, v0
+; GFX7-SDAG-NEXT:    v_cvt_f32_f16_e32 v0, v0
+; GFX7-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX7-GISEL-LABEL: fmed3_fneg_f32_fpext_f16:
+; GFX7-GISEL:       ; %bb.0:
+; GFX7-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-GISEL-NEXT:    v_cvt_f32_f16_e32 v0, v0
+; GFX7-GISEL-NEXT:    v_cvt_f32_f16_e32 v1, v1
+; GFX7-GISEL-NEXT:    v_cvt_f32_f16_e32 v2, v2
+; GFX7-GISEL-NEXT:    v_med3_f32 v0, -v0, -v1, -v2
+; GFX7-GISEL-NEXT:    v_cvt_f16_f32_e32 v0, v0
+; GFX7-GISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX8-LABEL: fmed3_fneg_f32_fpext_f16:
+; GFX8:       ; %bb.0:
+; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT:    v_cvt_f32_f16_e32 v0, v0
+; GFX8-NEXT:    v_cvt_f32_f16_e32 v1, v1
+; GFX8-NEXT:    v_cvt_f32_f16_e32 v2, v2
+; GFX8-NEXT:    v_med3_f32 v0, -v0, -v1, -v2
+; GFX8-NEXT:    v_cvt_f16_f32_e32 v0, v0
+; GFX8-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: fmed3_fneg_f32_fpext_f16:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT:    v_cvt_f32_f16_e32 v0, v0
+; GFX9-NEXT:    v_cvt_f32_f16_e32 v1, v1
+; GFX9-NEXT:    v_cvt_f32_f16_e32 v2, v2
+; GFX9-NEXT:    v_med3_f32 v0, -v0, -v1, -v2
+; GFX9-NEXT:    v_cvt_f16_f32_e32 v0, v0
+; GFX9-NEXT:    s_setpc_b64 s[30:31]
+  %arg0.ext = fpext half %arg0 to float
+  %arg1.ext = fpext half %arg1 to float
+  %arg2.ext = fpext half %arg2 to float
+  %fneg.ext.arg0 = fneg float %arg0.ext
+  %fneg.ext.arg1 = fneg float %arg1.ext
+  %fneg.ext.arg2 = fneg float %arg2.ext
+  %med3 = call float @llvm.amdgcn.fmed3.f32(float %fneg.ext.arg0, float %fneg.ext.arg1, float %fneg.ext.arg2)
+  %cast = fptrunc float %med3 to half
+  ret half %cast
+}
+
+define half @fmed3_f32_fpext_f16_fneg_fabs(half %arg0, half %arg1, half %arg2) #1 {
+; GFX7-SDAG-LABEL: fmed3_f32_fpext_f16_fneg_fabs:
+; GFX7-SDAG:       ; %bb.0:
+; GFX7-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-SDAG-NEXT:    v_cvt_f16_f32_e32 v0, v0
+; GFX7-SDAG-NEXT:    v_cvt_f16_f32_e32 v1, v1
+; GFX7-SDAG-NEXT:    v_cvt_f16_f32_e32 v2, v2
+; GFX7-SDAG-NEXT:    v_cvt_f32_f16_e64 v0, |v0|
+; GFX7-SDAG-NEXT:    v_cvt_f32_f16_e64 v1, |v1|
+; GFX7-SDAG-NEXT:    v_cvt_f32_f16_e64 v2, |v2|
+; GFX7-SDAG-NEXT:    v_med3_f32 v0, -v0, -v1, -v2
+; GFX7-SDAG-NEXT:    v_cvt_f16_f32_e32 v0, v0
+; GFX7-SDAG-NEXT:    v_cvt_f32_f16_e32 v0, v0
+; GFX7-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX7-GISEL-LABEL: fmed3_f32_fpext_f16_fneg_fabs:
+; GFX7-GISEL:       ; %bb.0:
+; GFX7-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-GISEL-NEXT:    v_cvt_f32_f16_e64 v0, -|v0|
+; GFX7-GISEL-NEXT:    v_cvt_f32_f16_e64 v1, -|v1|
+; GFX7-GISEL-NEXT:    v_cvt_f32_f16_e64 v2, -|v2|
+; GFX7-GISEL-NEXT:    v_med3_f32 v0, v0, v1, v2
+; GFX7-GISEL-NEXT:    v_cvt_f16_f32_e32 v0, v0
+; GFX7-GISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX8-LABEL: fmed3_f32_fpext_f16_fneg_fabs:
+; GFX8:       ; %bb.0:
+; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT:    v_cvt_f32_f16_e64 v0, -|v0|
+; GFX8-NEXT:    v_cvt_f32_f16_e64 v1, -|v1|
+; GFX8-NEXT:    v_cvt_f32_f16_e64 v2, -|v2|
+; GFX8-NEXT:    v_med3_f32 v0, v0, v1, v2
+; GFX8-NEXT:    v_cvt_f16_f32_e32 v0, v0
+; GFX8-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: fmed3_f32_fpext_f16_fneg_fabs:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT:    v_cvt_f32_f16_e64 v0, -|v0|
+; GFX9-NEXT:    v_cvt_f32_f16_e64 v1, -|v1|
+; GFX9-NEXT:    v_cvt_f32_f16_e64 v2, -|v2|
+; GFX9-NEXT:    v_med3_f32 v0, v0, v1, v2
+; GFX9-NEXT:    v_cvt_f16_f32_e32 v0, v0
+; GFX9-NEXT:    s_setpc_b64 s[30:31]
+  %fabs.arg0 = call half @llvm.fabs.f16(half %arg0)
+  %fabs.arg1 = call half @llvm.fabs.f16(half %arg1)
+  %fabs.arg2 = call half @llvm.fabs.f16(half %arg2)
+  %fneg.fabs.arg0 = fneg half %fabs.arg0
+  %fneg.fabs.arg1 = fneg half %fabs.arg1
+  %fneg.fabs.arg2 = fneg half %fabs.arg2
+  %arg0.ext = fpext half %fneg.fabs.arg0 to float
+  %arg1.ext = fpext half %fneg.fabs.arg1 to float
+  %arg2.ext = fpext half %fneg.fabs.arg2 to float
+  %med3 = call float @llvm.amdgcn.fmed3.f32(float %arg0.ext, float %arg1.ext, float %arg2.ext)
+  %cast = fptrunc float %med3 to half
+  ret half %cast
+}
+
+define half @fmed3_fneg_fabs_f32_fpext_f16(half %arg0, half %arg1, half %arg2) #1 {
+; GFX7-SDAG-LABEL: fmed3_fneg_fabs_f32_fpext_f16:
+; GFX7-SDAG:       ; %bb.0:
+; GFX7-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-SDAG-NEXT:    v_med3_f32 v0, -|v0|, -|v1|, -|v2|
+; GFX7-SDAG-NEXT:    v_cvt_f16_f32_e32 v0, v0
+; GFX7-SDAG-NEXT:    v_cvt_f32_f16_e32 v0, v0
+; GFX7-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX7-GISEL-LABEL: fmed3_fneg_fabs_f32_fpext_f16:
+; GFX7-GISEL:       ; %bb.0:
+; GFX7-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-GISEL-NEXT:    v_cvt_f32_f16_e32 v0, v0
+; GFX7-GISEL-NEXT:    v_cvt_f32_f16_e32 v1, v1
+; GFX7-GISEL-NEXT:    v_cvt_f32_f16_e32 v2, v2
+; GFX7-GISEL-NEXT:    v_med3_f32 v0, -|v0|, -|v1|, -|v2|
+; GFX7-GISEL-NEXT:    v_cvt_f16_f32_e32 v0, v0
+; GFX7-GISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX8-LABEL: fmed3_fneg_fabs_f32_fpext_f16:
+; GFX8:       ; %bb.0:
+; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT:    v_cvt_f32_f16_e32 v0, v0
+; GFX8-NEXT:    v_cvt_f32_f16_e32 v1, v1
+; GFX8-NEXT:    v_cvt_f32_f16_e32 v2, v2
+; GFX8-NEXT:    v_med3_f32 v0, -|v0|, -|v1|, -|v2|
+; GFX8-NEXT:    v_cvt_f16_f32_e32 v0, v0
+; GFX8-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: fmed3_fneg_fabs_f32_fpext_f16:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT:    v_cvt_f32_f16_e32 v0, v0
+; GFX9-NEXT:    v_cvt_f32_f16_e32 v1, v1
+; GFX9-NEXT:    v_cvt_f32_f16_e32 v2, v2
+; GFX9-NEXT:    v_med3_f32 v0, -|v0|, -|v1|, -|v2|
+; GFX9-NEXT:    v_cvt_f16_f32_e32 v0, v0
+; GFX9-NEXT:    s_setpc_b64 s[30:31]
+  %arg0.ext = fpext half %arg0 to float
+  %arg1.ext = fpext half %arg1 to float
+  %arg2.ext = fpext half %arg2 to float
+  %fabs.ext.arg0 = call float @llvm.fabs.f32(float %arg0.ext)
+  %fabs.ext.arg1 = call float @llvm.fabs.f32(float %arg1.ext)
+  %fabs.ext.arg2 = call float @llvm.fabs.f32(float %arg2.ext)
+  %fneg.fabs.ext.arg0 = fneg float %fabs.ext.arg0
+  %fneg.fabs.ext.arg1 = fneg float %fabs.ext.arg1
+  %fneg.fabs.ext.arg2 = fneg float %fabs.ext.arg2
+  %med3 = call float @llvm.amdgcn.fmed3.f32(float %fneg.fabs.ext.arg0, float %fneg.fabs.ext.arg1, float %fneg.fabs.ext.arg2)
+  %cast = fptrunc float %med3 to half
+  ret half %cast
+}
+; --------------------------------------------------------------------------------
+; Negative tests
+; --------------------------------------------------------------------------------
+
+define bfloat @fmed3_f32_fpext_f16_fptrunc_bf16(half %arg0, half %arg1, half %arg2) #1 {
+; GFX7-SDAG-LABEL: fmed3_f32_fpext_f16_fptrunc_bf16:
+; GFX7-SDAG:       ; %bb.0:
+; GFX7-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-SDAG-NEXT:    v_med3_f32 v0, v0, v1, v2
+; GFX7-SDAG-NEXT:    v_and_b32_e32 v0, 0xffff0000, v0
+; GFX7-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX7-GISEL-LABEL: fmed3_f32_fpext_f16_fptrunc_bf16:
+; GFX7-GISEL:       ; %bb.0:
+; GFX7-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-GISEL-NEXT:    v_cvt_f32_f16_e32 v0, v0
+; GFX7-GISEL-NEXT:    v_cvt_f32_f16_e32 v1, v1
+; GFX7-GISEL-NEXT:    v_cvt_f32_f16_e32 v2, v2
+; GFX7-GISEL-NEXT:    v_med3_f32 v0, v0, v1, v2
+; GFX7-GISEL-NEXT:    v_cvt_f16_f32_e32 v0, v0
+; GFX7-GISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX8-SDAG-LABEL: fmed3_f32_fpext_f16_fptrunc_bf16:
+; GFX8-SDAG:       ; %bb.0:
+; GFX8-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-SDAG-NEXT:    v_cvt_f32_f16_e32 v0, v0
+; GFX8-SDAG-NEXT:    v_cvt_f32_f16_e32 v1, v1
+; GFX8-SDAG-NEXT:    v_cvt_f32_f16_e32 v2, v2
+; GFX8-SDAG-NEXT:    v_med3_f32 v0, v0, v1, v2
+; GFX8-SDAG-NEXT:    v_and_b32_e32 v0, 0xffff0000, v0
+; GFX8-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX8-GISEL-LABEL: fmed3_f32_fpext_f16_fptrunc_bf16:
+; GFX8-GISEL:       ; %bb.0:
+; GFX8-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-GISEL-NEXT:    v_cvt_f32_f16_e32 v0, v0
+; GFX8-GISEL-NEXT:    v_cvt_f32_f16_e32 v1, v1
+; GFX8-GISEL-NEXT:    v_cvt_f32_f16_e32 v2, v2
+; GFX8-GISEL-NEXT:    v_med3_f32 v0, v0, v1, v2
+; GFX8-GISEL-NEXT:    v_cvt_f16_f32_e32 v0, v0
+; GFX8-GISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-SDAG-LABEL: fmed3_f32_fpext_f16_fptrunc_bf16:
+; GFX9-SDAG:       ; %bb.0:
+; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-SDAG-NEXT:    v_cvt_f32_f16_e32 v0, v0
+; GFX9-SDAG-NEXT:    v_cvt_f32_f16_e32 v1, v1
+; GFX9-SDAG-NEXT:    v_cvt_f32_f16_e32 v2, v2
+; GFX9-SDAG-NEXT:    v_med3_f32 v0, v0, v1, v2
+; GFX9-SDAG-NEXT:    v_and_b32_e32 v0, 0xffff0000, v0
+; GFX9-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-GISEL-LABEL: fmed3_f32_fpext_f16_fptrunc_bf16:
+; GFX9-GISEL:       ; %bb.0:
+; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GISEL-NEXT:    v_cvt_f32_f16_e32 v0, v0
+; GFX9-GISEL-NEXT:    v_cvt_f32_f16_e32 v1, v1
+; GFX9-GISEL-NEXT:    v_cvt_f32_f16_e32 v2, v2
+; GFX9-GISEL-NEXT:    v_med3_f32 v0, v0, v1, v2
+; GFX9-GISEL-NEXT:    v_cvt_f16_f32_e32 v0, v0
+; GFX9-GISEL-NEXT:    s_setpc_b64 s[30:31]
+  %arg0.ext = fpext half %arg0 to float
+  %arg1.ext = fpext half %arg1 to float
+  %arg2.ext = fpext half %arg2 to float
+  %med3 = call float @llvm.amdgcn.fmed3.f32(float %arg0.ext, float %arg1.ext, float %arg2.ext)
+  %cast = fptrunc float %med3 to bfloat
+  ret bfloat %cast
+}
+
+define half @fmed3_f32_fpext_f16_multi_use_0(half %arg0, half %arg1, half %arg2, ptr addrspace(1) %ptr) #1 {
+; GFX7-SDAG-LABEL: fmed3_f32_fpext_f16_multi_use_0:
+; GFX7-SDAG:       ; %bb.0:
+; GFX7-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-SDAG-NEXT:    v_med3_f32 v1, v0, v1, v2
+; GFX7-SDAG-NEXT:    v_cvt_f16_f32_e32 v1, v1
+; GFX7-SDAG-NEXT:    flat_store_dword v[3:4], v0
+; GFX7-SDAG-NEXT:    v_cvt_f32_f16_e32 v1, v1
+; GFX7-SDAG-NEXT:    v_mov_b32_e32 v0, v1
+; GFX7-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX7-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX7-GISEL-LABEL: fmed3_f32_fpext_f16_multi_use_0:
+; GFX7-GISEL:       ; %bb.0:
+; GFX7-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-GISEL-NEXT:    v_cvt_f32_f16_e32 v5, v0
+; GFX7-GISEL-NEXT:    v_cvt_f32_f16_e32 v0, v1
+; GFX7-GISEL-NEXT:    v_cvt_f32_f16_e32 v1, v2
+; GFX7-GISEL-NEXT:    flat_store_dword v[3:4], v5
+; GFX7-GISEL-NEXT:    v_med3_f32 v0, v5, v0, v1
+; GFX7-GISEL-NEXT:    v_cvt_f16_f32_e32 v0, v0
+; GFX7-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX7-GISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX8-LABEL: fmed3_f32_fpext_f16_multi_use_0:
+; GFX8:       ; %bb.0:
+; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT:    v_cvt_f32_f16_e32 v5, v0
+; GFX8-NEXT:    v_cvt_f32_f16_e32 v0, v1
+; GFX8-NEXT:    v_cvt_f32_f16_e32 v1, v2
+; GFX8-NEXT:    flat_store_dword v[3:4], v5
+; GFX8-NEXT:    v_med3_f32 v0, v5, v0, v1
+; GFX8-NEXT:    v_cvt_f16_f32_e32 v0, v0
+; GFX8-NEXT:    s_waitcnt vmcnt(0)
+; GFX8-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: fmed3_f32_fpext_f16_multi_use_0:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT:    v_cvt_f32_f16_e32 v5, v0
+; GFX9-NEXT:    v_cvt_f32_f16_e32 v0, v1
+; GFX9-NEXT:    v_cvt_f32_f16_e32 v1, v2
+; GFX9-NEXT:    global_store_dword v[3:4], v5, off
+; GFX9-NEXT:    v_med3_f32 v0, v5, v0, v1
+; GFX9-NEXT:    v_cvt_f16_f32_e32 v0, v0
+; GFX9-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-NEXT:    s_setpc_b64 s[30:31]
+  %arg0.ext = fpext half %arg0 to float
+  store float %arg0.ext, ptr addrspace(1) %ptr
+  %arg1.ext = fpext half %arg1 to float
+  %arg2.ext = fpext half %arg2 to float
+  %med3 = call float @llvm.amdgcn.fmed3.f32(float %arg0.ext, float %arg1.ext, float %arg2.ext)
+  %cast = fptrunc float %med3 to half
+  ret half %cast
+}
+
+define half @fmed3_f32_fpext_f16_multi_use_1(half %arg0, half %arg1, half %arg2, ptr addrspace(1) %ptr) #1 {
+; GFX7-SDAG-LABEL: fmed3_f32_fpext_f16_multi_use_1:
+; GFX7-SDAG:       ; %bb.0:
+; GFX7-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-SDAG-NEXT:    v_med3_f32 v0, v0, v1, v2
+; GFX7-SDAG-NEXT:    v_cvt_f16_f32_e32 v0, v0
+; GFX7-SDAG-NEXT:    flat_store_dword v[3:4], v1
+; GFX7-SDAG-NEXT:    v_cvt_f32_f16_e32 v0, v0
+; GFX7-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX7-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX7-GISEL-LABEL: fmed3_f32_fpext_f16_multi_use_1:
+; GFX7-GISEL:       ; %bb.0:
+; GFX7-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-GISEL-NEXT:    v_cvt_f32_f16_e32 v0, v0
+; GFX7-GISEL-NEXT:    v_cvt_f32_f16_e32 v1, v1
+; GFX7-GISEL-NEXT:    v_cvt_f32_f16_e32 v2, v2
+; GFX7-GISEL-NEXT:    flat_store_dword v[3:4], v1
+; GFX7-GISEL-NEXT:    v_med3_f32 v0, v0, v1, v2
+; GFX7-GISEL-NEXT:    v_cvt_f16_f32_e32 v0, v0
+; GFX7-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX7-GISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX8-LABEL: fmed3_f32_fpext_f16_multi_use_1:
+; GFX8:       ; %bb.0:
+; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT:    v_cvt_f32_f16_e32 v0, v0
+; GFX8-NEXT:    v_cvt_f32_f16_e32 v1, v1
+; GFX8-NEXT:    v_cvt_f32_f16_e32 v2, v2
+; GFX8-NEXT:    flat_store_dword v[3:4], v1
+; GFX8-NEXT:    v_med3_f32 v0, v0, v1, v2
+; GFX8-NEXT:    v_cvt_f16_f32_e32 v0, v0
+; GFX8-NEXT:    s_waitcnt vmcnt(0)
+; GFX8-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: fmed3_f32_fpext_f16_multi_use_1:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT:    v_cvt_f32_f16_e32 v0, v0
+; GFX9-NEXT:    v_cvt_f32_f16_e32 v1, v1
+; GFX9-NEXT:    v_cvt_f32_f16_e32 v2, v2
+; GFX9-NEXT:    global_store_dword v[3:4], v1, off
+; GFX9-NEXT:    v_med3_f32 v0, v0, v1, v2
+; GFX9-NEXT:    v_cvt_f16_f32_e32 v0, v0
+; GFX9-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-NEXT:    s_setpc_b64 s[30:31]
+  %arg0.ext = fpext half %arg0 to float
+  %arg1.ext = fpext half %arg1 to float
+  store float %arg1.ext, ptr addrspace(1) %ptr
+  %arg2.ext = fpext half %arg2 to float
+  %med3 = call float @llvm.amdgcn.fmed3.f32(float %arg0.ext, float %arg1.ext, float %arg2.ext)
+  %cast = fptrunc float %med3 to half
+  ret half %cast
+}
+
+define half @fmed3_f32_fpext_f16_multi_use_2(half %arg0, half %arg1, half %arg2, ptr addrspace(1) %ptr) #1 {
+; GFX7-SDAG-LABEL: fmed3_f32_fpext_f16_multi_use_2:
+; GFX7-SDAG:       ; %bb.0:
+; GFX7-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-SDAG-NEXT:    v_med3_f32 v0, v0, v1, v2
+; GFX7-SDAG-NEXT:    v_cvt_f16_f32_e32 v0, v0
+; GFX7-SDAG-NEXT:    flat_store_dword v[3:4], v2
+; GFX7-SDAG-NEXT:    v_cvt_f32_f16_e32 v0, v0
+; GFX7-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX7-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX7-GISEL-LABEL: fmed3_f32_fpext_f16_multi_use_2:
+; GFX7-GISEL:       ; %bb.0:
+; GFX7-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-GISEL-NEXT:    v_cvt_f32_f16_e32 v0, v0
+; GFX7-GISEL-NEXT:    v_cvt_f32_f16_e32 v1, v1
+; GFX7-GISEL-NEXT:    v_cvt_f32_f16_e32 v2, v2
+; GFX7-GISEL-NEXT:    v_med3_f32 v0, v0, v1, v2
+; GFX7-GISEL-NEXT:    v_cvt_f16_f32_e32 v0, v0
+; GFX7-GISEL-NEXT:    flat_store_dword v[3:4], v2
+; GFX7-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX7-GISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX8-LABEL: fmed3_f32_fpext_f16_multi_use_2:
+; GFX8:       ; %bb.0:
+; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT:    v_cvt_f32_f16_e32 v0, v0
+; GFX8-NEXT:    v_cvt_f32_f16_e32 v1, v1
+; GFX8-NEXT:    v_cvt_f32_f16_e32 v2, v2
+; GFX8-NEXT:    v_med3_f32 v0, v0, v1, v2
+; GFX8-NEXT:    v_cvt_f16_f32_e32 v0, v0
+; GFX8-NEXT:    flat_store_dword v[3:4], v2
+; GFX8-NEXT:    s_waitcnt vmcnt(0)
+; GFX8-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: fmed3_f32_fpext_f16_multi_use_2:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT:    v_cvt_f32_f16_e32 v0, v0
+; GFX9-NEXT:    v_cvt_f32_f16_e32 v1, v1
+; GFX9-NEXT:    v_cvt_f32_f16_e32 v2, v2
+; GFX9-NEXT:    v_med3_f32 v0, v0, v1, v2
+; GFX9-NEXT:    v_cvt_f16_f32_e32 v0, v0
+; GFX9-NEXT:    global_store_dword v[3:4], v2, off
+; GFX9-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-NEXT:    s_setpc_b64 s[30:31]
+  %arg0.ext = fpext half %arg0 to float
+  %arg1.ext = fpext half %arg1 to float
+  %arg2.ext = fpext half %arg2 to float
+  store float %arg2.ext, ptr addrspace(1) %ptr
+  %med3 = call float @llvm.amdgcn.fmed3.f32(float %arg0.ext, float %arg1.ext, float %arg2.ext)
+  %cast = fptrunc float %med3 to
+  half ret half %cast
+}
+
+define half @fmed3_f32_fpext_bf16(bfloat %arg0, bfloat %arg1, bfloat %arg2) #1 {
+; GFX7-SDAG-LABEL: fmed3_f32_fpext_bf16:
+; GFX7-SDAG:       ; %bb.0:
+; GFX7-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-SDAG-NEXT:    v_med3_f32 v0, v0, v1, v2
+; GFX7-SDAG-NEXT:    v_cvt_f16_f32_e32 v0, v0
+; GFX7-SDAG-NEXT:    v_cvt_f32_f16_e32 v0, v0
+; GFX7-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX7-GISEL-LABEL: fmed3_f32_fpext_bf16:
+; GFX7-GISEL:       ; %bb.0:
+; GFX7-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-GISEL-NEXT:    v_cvt_f32_f16_e32 v0, v0
+; GFX7-GISEL-NEXT:    v_cvt_f32_f16_e32 v1, v1
+; GFX7-GISEL-NEXT:    v_cvt_f32_f16_e32 v2, v2
+; GFX7-GISEL-NEXT:    v_med3_f32 v0, v0, v1, v2
+; GFX7-GISEL-NEXT:    v_cvt_f16_f32_e32 v0, v0
+; GFX7-GISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX8-SDAG-LABEL: fmed3_f32_fpext_bf16:
+; GFX8-SDAG:       ; %bb.0:
+; GFX8-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-SDAG-NEXT:    v_med3_f32 v0, v0, v1, v2
+; GFX8-SDAG-NEXT:    v_cvt_f16_f32_e32 v0, v0
+; GFX8-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX8-GISEL-LABEL: fmed3_f32_fpext_bf16:
+; GFX8-GISEL:       ; %bb.0:
+; GFX8-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-GISEL-NEXT:    v_cvt_f32_f16_e32 v0, v0
+; GFX8-GISEL-NEXT:    v_cvt_f32_f16_e32 v1, v1
+; GFX8-GISEL-NEXT:    v_cvt_f32_f16_e32 v2, v2
+; GFX8-GISEL-NEXT:    v_med3_f32 v0, v0, v1, v2
+; GFX8-GISEL-NEXT:    v_cvt_f16_f32_e32 v0, v0
+; GFX8-GISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-SDAG-LABEL: fmed3_f32_fpext_bf16:
+; GFX9-SDAG:       ; %bb.0:
+; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-SDAG-NEXT:    v_med3_f32 v0, v0, v1, v2
+; GFX9-SDAG-NEXT:    v_cvt_f16_f32_e32 v0, v0
+; GFX9-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-GISEL-LABEL: fmed3_f32_fpext_bf16:
+; GFX9-GISEL:       ; %bb.0:
+; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GISEL-NEXT:    v_cvt_f32_f16_e32 v0, v0
+; GFX9-GISEL-NEXT:    v_cvt_f32_f16_e32 v1, v1
+; GFX9-GISEL-NEXT:    v_cvt_f32_f16_e32 v2, v2
+; GFX9-GISEL-NEXT:    v_med3_f32 v0, v0, v1, v2
+; GFX9-GISEL-NEXT:    v_cvt_f16_f32_e32 v0, v0
+; GFX9-GISEL-NEXT:    s_setpc_b64 s[30:31]
+  %arg0.ext = fpext bfloat %arg0 to float
+  %arg1.ext = fpext bfloat %arg1 to float
+  %arg2.ext = fpext bfloat %arg2 to float
+  %med3 = call float @llvm.amdgcn.fmed3.f32(float %arg0.ext, float %arg1.ext, float %arg2.ext)
+  %cast = fptrunc float %med3 to half
+  ret half %cast
+}
+
+define half @fmed3_f32_fpext_f16_bf16_0(bfloat %arg0, half %arg1, half %arg2) #1 {
+; GFX7-SDAG-LABEL: fmed3_f32_fpext_f16_bf16_0:
+; GFX7-SDAG:       ; %bb.0:
+; GFX7-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-SDAG-NEXT:    v_med3_f32 v0, v0, v1, v2
+; GFX7-SDAG-NEXT:    v_cvt_f16_f32_e32 v0, v0
+; GFX7-SDAG-NEXT:    v_cvt_f32_f16_e32 v0, v0
+; GFX7-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX7-GISEL-LABEL: fmed3_f32_fpext_f16_bf16_0:
+; GFX7-GISEL:       ; %bb.0:
+; GFX7-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-GISEL-NEXT:    v_cvt_f32_f16_e32 v0, v0
+; GFX7-GISEL-NEXT:    v_cvt_f32_f16_e32 v1, v1
+; GFX7-GISEL-NEXT:    v_cvt_f32_f16_e32 v2, v2
+; GFX7-GISEL-NEXT:    v_med3_f32 v0, v0, v1, v2
+; GFX7-GISEL-NEXT:    v_cvt_f16_f32_e32 v0, v0
+; GFX7-GISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX8-SDAG-LABEL: fmed3_f32_fpext_f16_bf16_0:
+; GFX8-SDAG:       ; %bb.0:
+; GFX8-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-SDAG-NEXT:    v_cvt_f32_f16_e32 v1, v1
+; GFX8-SDAG-NEXT:    v_cvt_f32_f16_e32 v2, v2
+; GFX8-SDAG-NEXT:    v_med3_f32 v0, v0, v1, v2
+; GFX8-SDAG-NEXT:    v_cvt_f16_f32_e32 v0, v0
+; GFX8-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX8-GISEL-LABEL: fmed3_f32_fpext_f16_bf16_0:
+; GFX8-GISEL:       ; %bb.0:
+; GFX8-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-GISEL-NEXT:    v_cvt_f32_f16_e32 v0, v0
+; GFX8-GISEL-NEXT:    v_cvt_f32_f16_e32 v1, v1
+; GFX8-GISEL-NEXT:    v_cvt_f32_f16_e32 v2, v2
+; GFX8-GISEL-NEXT:    v_med3_f32 v0, v0, v1, v2
+; GFX8-GISEL-NEXT:    v_cvt_f16_f32_e32 v0, v0
+; GFX8-GISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-SDAG-LABEL: fmed3_f32_fpext_f16_bf16_0:
+; GFX9-SDAG:       ; %bb.0:
+; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-SDAG-NEXT:    v_cvt_f32_f16_e32 v1, v1
+; GFX9-SDAG-NEXT:    v_cvt_f32_f16_e32 v2, v2
+; GFX9-SDAG-NEXT:    v_med3_f32 v0, v0, v1, v2
+; GFX9-SDAG-NEXT:    v_cvt_f16_f32_e32 v0, v0
+; GFX9-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-GISEL-LABEL: fmed3_f32_fpext_f16_bf16_0:
+; GFX9-GISEL:       ; %bb.0:
+; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GISEL-NEXT:    v_cvt_f32_f16_e32 v0, v0
+; GFX9-GISEL-NEXT:    v_cvt_f32_f16_e32 v1, v1
+; GFX9-GISEL-NEXT:    v_cvt_f32_f16_e32 v2, v2
+; GFX9-GISEL-NEXT:    v_med3_f32 v0, v0, v1, v2
+; GFX9-GISEL-NEXT:    v_cvt_f16_f32_e32 v0, v0
+; GFX9-GISEL-NEXT:    s_setpc_b64 s[30:31]
+  %arg0.ext = fpext bfloat %arg0 to float
+  %arg1.ext = fpext half %arg1 to float
+  %arg2.ext = fpext half %arg2 to float
+  %med3 = call float @llvm.amdgcn.fmed3.f32(float %arg0.ext, float %arg1.ext, float %arg2.ext)
+  %cast = fptrunc float %med3 to half
+  ret half %cast
+}
+
+define half @fmed3_f32_fpext_f16_bf16_1(half %arg0, bfloat %arg1, half %arg2) #1 {
+; GFX7-SDAG-LABEL: fmed3_f32_fpext_f16_bf16_1:
+; GFX7-SDAG:       ; %bb.0:
+; GFX7-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-SDAG-NEXT:    v_med3_f32 v0, v0, v1, v2
+; GFX7-SDAG-NEXT:    v_cvt_f16_f32_e32 v0, v0
+; GFX7-SDAG-NEXT:    v_cvt_f32_f16_e32 v0, v0
+; GFX7-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX7-GISEL-LABEL: fmed3_f32_fpext_f16_bf16_1:
+; GFX7-GISEL:       ; %bb.0:
+; GFX7-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-GISEL-NEXT:    v_cvt_f32_f16_e32 v0, v0
+; GFX7-GISEL-NEXT:    v_cvt_f32_f16_e32 v1, v1
+; GFX7-GISEL-NEXT:    v_cvt_f32_f16_e32 v2, v2
+; GFX7-GISEL-NEXT:    v_med3_f32 v0, v0, v1, v2
+; GFX7-GISEL-NEXT:    v_cvt_f16_f32_e32 v0, v0
+; GFX7-GISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX8-SDAG-LABEL: fmed3_f32_fpext_f16_bf16_1:
+; GFX8-SDAG:       ; %bb.0:
+; GFX8-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-SDAG-NEXT:    v_cvt_f32_f16_e32 v0, v0
+; GFX8-SDAG-NEXT:    v_cvt_f32_f16_e32 v2, v2
+; GFX8-SDAG-NEXT:    v_med3_f32 v0, v0, v1, v2
+; GFX8-SDAG-NEXT:    v_cvt_f16_f32_e32 v0, v0
+; GFX8-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX8-GISEL-LABEL: fmed3_f32_fpext_f16_bf16_1:
+; GFX8-GISEL:       ; %bb.0:
+; GFX8-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-GISEL-NEXT:    v_cvt_f32_f16_e32 v0, v0
+; GFX8-GISEL-NEXT:    v_cvt_f32_f16_e32 v1, v1
+; GFX8-GISEL-NEXT:    v_cvt_f32_f16_e32 v2, v2
+; GFX8-GISEL-NEXT:    v_med3_f32 v0, v0, v1, v2
+; GFX8-GISEL-NEXT:    v_cvt_f16_f32_e32 v0, v0
+; GFX8-GISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-SDAG-LABEL: fmed3_f32_fpext_f16_bf16_1:
+; GFX9-SDAG:       ; %bb.0:
+; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-SDAG-NEXT:    v_cvt_f32_f16_e32 v0, v0
+; GFX9-SDAG-NEXT:    v_cvt_f32_f16_e32 v2, v2
+; GFX9-SDAG-NEXT:    v_med3_f32 v0, v0, v1, v2
+; GFX9-SDAG-NEXT:    v_cvt_f16_f32_e32 v0, v0
+; GFX9-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-GISEL-LABEL: fmed3_f32_fpext_f16_bf16_1:
+; GFX9-GISEL:       ; %bb.0:
+; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GISEL-NEXT:    v_cvt_f32_f16_e32 v0, v0
+; GFX9-GISEL-NEXT:    v_cvt_f32_f16_e32 v1, v1
+; GFX9-GISEL-NEXT:    v_cvt_f32_f16_e32 v2, v2
+; GFX9-GISEL-NEXT:    v_med3_f32 v0, v0, v1, v2
+; GFX9-GISEL-NEXT:    v_cvt_f16_f32_e32 v0, v0
+; GFX9-GISEL-NEXT:    s_setpc_b64 s[30:31]
+  %arg0.ext = fpext half %arg0 to float
+  %arg1.ext = fpext bfloat %arg1 to float
+  %arg2.ext = fpext half %arg2 to float
+  %med3 = call float @llvm.amdgcn.fmed3.f32(float %arg0.ext, float %arg1.ext, float %arg2.ext)
+  %cast = fptrunc float %med3 to half
+  ret half %cast
+}
+
+define half @fmed3_f32_fpext_f16_bf16_2(half %arg0, half %arg1, bfloat %arg2) #1 {
+; GFX7-SDAG-LABEL: fmed3_f32_fpext_f16_bf16_2:
+; GFX7-SDAG:       ; %bb.0:
+; GFX7-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-SDAG-NEXT:    v_med3_f32 v0, v0, v1, v2
+; GFX7-SDAG-NEXT:    v_cvt_f16_f32_e32 v0, v0
+; GFX7-SDAG-NEXT:    v_cvt_f32_f16_e32 v0, v0
+; GFX7-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX7-GISEL-LABEL: fmed3_f32_fpext_f16_bf16_2:
+; GFX7-GISEL:       ; %bb.0:
+; GFX7-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-GISEL-NEXT:    v_cvt_f32_f16_e32 v0, v0
+; GFX7-GISEL-NEXT:    v_cvt_f32_f16_e32 v1, v1
+; GFX7-GISEL-NEXT:    v_cvt_f32_f16_e32 v2, v2
+; GFX7-GISEL-NEXT:    v_med3_f32 v0, v0, v1, v2
+; GFX7-GISEL-NEXT:    v_cvt_f16_f32_e32 v0, v0
+; GFX7-GISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX8-SDAG-LABEL: fmed3_f32_fpext_f16_bf16_2:
+; GFX8-SDAG:       ; %bb.0:
+; GFX8-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-SDAG-NEXT:    v_cvt_f32_f16_e32 v0, v0
+; GFX8-SDAG-NEXT:    v_cvt_f32_f16_e32 v1, v1
+; GFX8-SDAG-NEXT:    v_med3_f32 v0, v0, v1, v2
+; GFX8-SDAG-NEXT:    v_cvt_f16_f32_e32 v0, v0
+; GFX8-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX8-GISEL-LABEL: fmed3_f32_fpext_f16_bf16_2:
+; GFX8-GISEL:       ; %bb.0:
+; GFX8-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-GISEL-NEXT:    v_cvt_f32_f16_e32 v0, v0
+; GFX8-GISEL-NEXT:    v_cvt_f32_f16_e32 v1, v1
+; GFX8-GISEL-NEXT:    v_cvt_f32_f16_e32 v2, v2
+; GFX8-GISEL-NEXT:    v_med3_f32 v0, v0, v1, v2
+; GFX8-GISEL-NEXT:    v_cvt_f16_f32_e32 v0, v0
+; GFX8-GISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-SDAG-LABEL: fmed3_f32_fpext_f16_bf16_2:
+; GFX9-SDAG:       ; %bb.0:
+; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-SDAG-NEXT:    v_cvt_f32_f16_e32 v0, v0
+; GFX9-SDAG-NEXT:    v_cvt_f32_f16_e32 v1, v1
+; GFX9-SDAG-NEXT:    v_med3_f32 v0, v0, v1, v2
+; GFX9-SDAG-NEXT:    v_cvt_f16_f32_e32 v0, v0
+; GFX9-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-GISEL-LABEL: fmed3_f32_fpext_f16_bf16_2:
+; GFX9-GISEL:       ; %bb.0:
+; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GISEL-NEXT:    v_cvt_f32_f16_e32 v0, v0
+; GFX9-GISEL-NEXT:    v_cvt_f32_f16_e32 v1, v1
+; GFX9-GISEL-NEXT:    v_cvt_f32_f16_e32 v2, v2
+; GFX9-GISEL-NEXT:    v_med3_f32 v0, v0, v1, v2
+; GFX9-GISEL-NEXT:    v_cvt_f16_f32_e32 v0, v0
+; GFX9-GISEL-NEXT:    s_setpc_b64 s[30:31]
+  %arg0.ext = fpext half %arg0 to float
+  %arg1.ext = fpext half %arg1 to float
+  %arg2.ext = fpext bfloat %arg2 to float
+  %med3 = call float @llvm.amdgcn.fmed3.f32(float %arg0.ext, float %arg1.ext, float %arg2.ext)
+  %cast = fptrunc float %med3 to half
+  ret half %cast
+}
+
+define half @fmed3_f32_fpext_f16_unrepresentable_k0(half %arg1, half %arg2) #1 {
+; GFX7-SDAG-LABEL: fmed3_f32_fpext_f16_unrepresentable_k0:
+; GFX7-SDAG:       ; %bb.0:
+; GFX7-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-SDAG-NEXT:    s_mov_b32 s4, 0x4f800000
+; GFX7-SDAG-NEXT:    v_med3_f32 v0, s4, v0, v1
+; GFX7-SDAG-NEXT:    v_cvt_f16_f32_e32 v0, v0
+; GFX7-SDAG-NEXT:    v_cvt_f32_f16_e32 v0, v0
+; GFX7-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX7-GISEL-LABEL: fmed3_f32_fpext_f16_unrepresentable_k0:
+; GFX7-GISEL:       ; %bb.0:
+; GFX7-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-GISEL-NEXT:    v_cvt_f32_f16_e32 v0, v0
+; GFX7-GISEL-NEXT:    v_cvt_f32_f16_e32 v1, v1
+; GFX7-GISEL-NEXT:    v_mov_b32_e32 v2, 0x4f800000
+; GFX7-GISEL-NEXT:    v_med3_f32 v0, v2, v0, v1
+; GFX7-GISEL-NEXT:    v_cvt_f16_f32_e32 v0, v0
+; GFX7-GISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX8-SDAG-LABEL: fmed3_f32_fpext_f16_unrepresentable_k0:
+; GFX8-SDAG:       ; %bb.0:
+; GFX8-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-SDAG-NEXT:    v_cvt_f32_f16_e32 v0, v0
+; GFX8-SDAG-NEXT:    v_cvt_f32_f16_e32 v1, v1
+; GFX8-SDAG-NEXT:    s_mov_b32 s4, 0x4f800000
+; GFX8-SDAG-NEXT:    v_med3_f32 v0, s4, v0, v1
+; GFX8-SDAG-NEXT:    v_cvt_f16_f32_e32 v0, v0
+; GFX8-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX8-GISEL-LABEL: fmed3_f32_fpext_f16_unrepresentable_k0:
+; GFX8-GISEL:       ; %bb.0:
+; GFX8-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-GISEL-NEXT:    v_cvt_f32_f16_e32 v0, v0
+; GFX8-GISEL-NEXT:    v_cvt_f32_f16_e32 v1, v1
+; GFX8-GISEL-NEXT:    v_mov_b32_e32 v2, 0x4f800000
+; GFX8-GISEL-NEXT:    v_med3_f32 v0, v2, v0, v1
+; GFX8-GISEL-NEXT:    v_cvt_f16_f32_e32 v0, v0
+; GFX8-GISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-SDAG-LABEL: fmed3_f32_fpext_f16_unrepresentable_k0:
+; GFX9-SDAG:       ; %bb.0:
+; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-SDAG-NEXT:    v_cvt_f32_f16_e32 v0, v0
+; GFX9-SDAG-NEXT:    v_cvt_f32_f16_e32 v1, v1
+; GFX9-SDAG-NEXT:    s_mov_b32 s4, 0x4f800000
+; GFX9-SDAG-NEXT:    v_med3_f32 v0, s4, v0, v1
+; GFX9-SDAG-NEXT:    v_cvt_f16_f32_e32 v0, v0
+; GFX9-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-GISEL-LABEL: fmed3_f32_fpext_f16_unrepresentable_k0:
+; GFX9-GISEL:       ; %bb.0:
+; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GISEL-NEXT:    v_cvt_f32_f16_e32 v0, v0
+; GFX9-GISEL-NEXT:    v_cvt_f32_f16_e32 v1, v1
+; GFX9-GISEL-NEXT:    v_mov_b32_e32 v2, 0x4f800000
+; GFX9-GISEL-NEXT:    v_med3_f32 v0, v2, v0, v1
+; GFX9-GISEL-NEXT:    v_cvt_f16_f32_e32 v0, v0
+; GFX9-GISEL-NEXT:    s_setpc_b64 s[30:31]
+  %arg1.ext = fpext half %arg1 to float
+  %arg2.ext = fpext half %arg2 to float
+  %med3 = call float @llvm.amdgcn.fmed3.f32(float 0x41f0000000000000, float %arg1.ext, float %arg2.ext)
+  %cast = fptrunc float %med3 to half
+  ret half %cast
+}
+
+define half @fmed3_f32_fpext_f16_unrepresentable_k1(half %arg0, half %arg2) #1 {
+; GFX7-SDAG-LABEL: fmed3_f32_fpext_f16_unrepresentable_k1:
+; GFX7-SDAG:       ; %bb.0:
+; GFX7-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-SDAG-NEXT:    s_mov_b32 s4, 0x4f800000
+; GFX7-SDAG-NEXT:    v_med3_f32 v0, v0, s4, v1
+; GFX7-SDAG-NEXT:    v_cvt_f16_f32_e32 v0, v0
+; GFX7-SDAG-NEXT:    v_cvt_f32_f16_e32 v0, v0
+; GFX7-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX7-GISEL-LABEL: fmed3_f32_fpext_f16_unrepresentable_k1:
+; GFX7-GISEL:       ; %bb.0:
+; GFX7-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-GISEL-NEXT:    v_cvt_f32_f16_e32 v0, v0
+; GFX7-GISEL-NEXT:    v_cvt_f32_f16_e32 v1, v1
+; GFX7-GISEL-NEXT:    v_mov_b32_e32 v2, 0x4f800000
+; GFX7-GISEL-NEXT:    v_med3_f32 v0, v0, v2, v1
+; GFX7-GISEL-NEXT:    v_cvt_f16_f32_e32 v0, v0
+; GFX7-GISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX8-SDAG-LABEL: fmed3_f32_fpext_f16_unrepresentable_k1:
+; GFX8-SDAG:       ; %bb.0:
+; GFX8-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-SDAG-NEXT:    v_cvt_f32_f16_e32 v0, v0
+; GFX8-SDAG-NEXT:    v_cvt_f32_f16_e32 v1, v1
+; GFX8-SDAG-NEXT:    s_mov_b32 s4, 0x4f800000
+; GFX8-SDAG-NEXT:    v_med3_f32 v0, v0, s4, v1
+; GFX8-SDAG-NEXT:    v_cvt_f16_f32_e32 v0, v0
+; GFX8-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX8-GISEL-LABEL: fmed3_f32_fpext_f16_unrepresentable_k1:
+; GFX8-GISEL:       ; %bb.0:
+; GFX8-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-GISEL-NEXT:    v_cvt_f32_f16_e32 v0, v0
+; GFX8-GISEL-NEXT:    v_cvt_f32_f16_e32 v1, v1
+; GFX8-GISEL-NEXT:    v_mov_b32_e32 v2, 0x4f800000
+; GFX8-GISEL-NEXT:    v_med3_f32 v0, v0, v2, v1
+; GFX8-GISEL-NEXT:    v_cvt_f16_f32_e32 v0, v0
+; GFX8-GISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-SDAG-LABEL: fmed3_f32_fpext_f16_unrepresentable_k1:
+; GFX9-SDAG:       ; %bb.0:
+; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-SDAG-NEXT:    v_cvt_f32_f16_e32 v0, v0
+; GFX9-SDAG-NEXT:    v_cvt_f32_f16_e32 v1, v1
+; GFX9-SDAG-NEXT:    s_mov_b32 s4, 0x4f800000
+; GFX9-SDAG-NEXT:    v_med3_f32 v0, v0, s4, v1
+; GFX9-SDAG-NEXT:    v_cvt_f16_f32_e32 v0, v0
+; GFX9-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-GISEL-LABEL: fmed3_f32_fpext_f16_unrepresentable_k1:
+; GFX9-GISEL:       ; %bb.0:
+; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GISEL-NEXT:    v_cvt_f32_f16_e32 v0, v0
+; GFX9-GISEL-NEXT:    v_cvt_f32_f16_e32 v1, v1
+; GFX9-GISEL-NEXT:    v_mov_b32_e32 v2, 0x4f800000
+; GFX9-GISEL-NEXT:    v_med3_f32 v0, v0, v2, v1
+; GFX9-GISEL-NEXT:    v_cvt_f16_f32_e32 v0, v0
+; GFX9-GISEL-NEXT:    s_setpc_b64 s[30:31]
+  %arg0.ext = fpext half %arg0 to float
+  %arg2.ext = fpext half %arg2 to float
+  %med3 = call float @llvm.amdgcn.fmed3.f32(float %arg0.ext, float 0x41f0000000000000, float %arg2.ext)
+  %cast = fptrunc float %med3 to half
+  ret half %cast
+}
+
+define half @fmed3_f32_fpext_f16_unrepresentable_k2(half %arg0, half %arg1) #1 {
+; GFX7-SDAG-LABEL: fmed3_f32_fpext_f16_unrepresentable_k2:
+; GFX7-SDAG:       ; %bb.0:
+; GFX7-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-SDAG-NEXT:    s_mov_b32 s4, 0x4f800000
+; GFX7-SDAG-NEXT:    v_med3_f32 v0, v0, v1, s4
+; GFX7-SDAG-NEXT:    v_cvt_f16_f32_e32 v0, v0
+; GFX7-SDAG-NEXT:    v_cvt_f32_f16_e32 v0, v0
+; GFX7-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX7-GISEL-LABEL: fmed3_f32_fpext_f16_unrepresentable_k2:
+; GFX7-GISEL:       ; %bb.0:
+; GFX7-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-GISEL-NEXT:    v_cvt_f32_f16_e32 v0, v0
+; GFX7-GISEL-NEXT:    v_cvt_f32_f16_e32 v1, v1
+; GFX7-GISEL-NEXT:    v_mov_b32_e32 v2, 0x4f800000
+; GFX7-GISEL-NEXT:    v_med3_f32 v0, v0, v1, v2
+; GFX7-GISEL-NEXT:    v_cvt_f16_f32_e32 v0, v0
+; GFX7-GISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX8-SDAG-LABEL: fmed3_f32_fpext_f16_unrepresentable_k2:
+; GFX8-SDAG:       ; %bb.0:
+; GFX8-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-SDAG-NEXT:    v_cvt_f32_f16_e32 v0, v0
+; GFX8-SDAG-NEXT:    v_cvt_f32_f16_e32 v1, v1
+; GFX8-SDAG-NEXT:    s_mov_b32 s4, 0x4f800000
+; GFX8-SDAG-NEXT:    v_med3_f32 v0, v0, v1, s4
+; GFX8-SDAG-NEXT:    v_cvt_f16_f32_e32 v0, v0
+; GFX8-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX8-GISEL-LABEL: fmed3_f32_fpext_f16_unrepresentable_k2:
+; GFX8-GISEL:       ; %bb.0:
+; GFX8-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-GISEL-NEXT:    v_cvt_f32_f16_e32 v0, v0
+; GFX8-GISEL-NEXT:    v_cvt_f32_f16_e32 v1, v1
+; GFX8-GISEL-NEXT:    v_mov_b32_e32 v2, 0x4f800000
+; GFX8-GISEL-NEXT:    v_med3_f32 v0, v0, v1, v2
+; GFX8-GISEL-NEXT:    v_cvt_f16_f32_e32 v0, v0
+; GFX8-GISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-SDAG-LABEL: fmed3_f32_fpext_f16_unrepresentable_k2:
+; GFX9-SDAG:       ; %bb.0:
+; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-SDAG-NEXT:    v_cvt_f32_f16_e32 v0, v0
+; GFX9-SDAG-NEXT:    v_cvt_f32_f16_e32 v1, v1
+; GFX9-SDAG-NEXT:    s_mov_b32 s4, 0x4f800000
+; GFX9-SDAG-NEXT:    v_med3_f32 v0, v0, v1, s4
+; GFX9-SDAG-NEXT:    v_cvt_f16_f32_e32 v0, v0
+; GFX9-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-GISEL-LABEL: fmed3_f32_fpext_f16_unrepresentable_k2:
+; GFX9-GISEL:       ; %bb.0:
+; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GISEL-NEXT:    v_cvt_f32_f16_e32 v0, v0
+; GFX9-GISEL-NEXT:    v_cvt_f32_f16_e32 v1, v1
+; GFX9-GISEL-NEXT:    v_mov_b32_e32 v2, 0x4f800000
+; GFX9-GISEL-NEXT:    v_med3_f32 v0, v0, v1, v2
+; GFX9-GISEL-NEXT:    v_cvt_f16_f32_e32 v0, v0
+; GFX9-GISEL-NEXT:    s_setpc_b64 s[30:31]
+  %arg0.ext = fpext half %arg0 to float
+  %arg1.ext = fpext half %arg1 to float
+  %med3 = call float @llvm.amdgcn.fmed3.f32(float %arg0.ext, float %arg1.ext, float 0x41f0000000000000)
+  %cast = fptrunc float %med3 to half
+  ret half %cast
+}
+
+attributes #0 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
+attributes #1 = { nocallback nofree nosync nounwind speculatable willreturn }
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; GCN: {{.*}}
+; GFX7: {{.*}}


        


More information about the llvm-commits mailing list