[llvm] [GlobalISel] Fall back for bf16 conversions. (PR #71470)

via llvm-commits llvm-commits at lists.llvm.org
Mon Nov 6 17:05:48 PST 2023


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-backend-amdgpu

Author: Amara Emerson (aemerson)

<details>
<summary>Changes</summary>

We don't support these correctly since we don't yet have FP types.
AMDGPU tests were silently miscompiling bf16 as if they were fp16.


---

Patch is 68.88 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/71470.diff


7 Files Affected:

- (modified) llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp (+3) 
- (modified) llvm/test/CodeGen/AMDGPU/fmed3-cast-combine.ll (+120-266) 
- (modified) llvm/test/CodeGen/AMDGPU/llvm.exp.ll (+72-147) 
- (modified) llvm/test/CodeGen/AMDGPU/llvm.exp2.ll (+16-23) 
- (modified) llvm/test/CodeGen/AMDGPU/llvm.log.ll (+95-164) 
- (modified) llvm/test/CodeGen/AMDGPU/llvm.log10.ll (+95-164) 
- (modified) llvm/test/CodeGen/AMDGPU/llvm.log2.ll (+29-44) 


``````````diff
diff --git a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
index d8f9e30b2599779..3098c8ea468a9d1 100644
--- a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
@@ -1484,6 +1484,9 @@ bool IRTranslator::translateBitCast(const User &U,
 
 bool IRTranslator::translateCast(unsigned Opcode, const User &U,
                                  MachineIRBuilder &MIRBuilder) {
+  if (U.getType()->getScalarType()->isBFloatTy() ||
+      U.getOperand(0)->getType()->getScalarType()->isBFloatTy())
+    return false;
   Register Op = getOrCreateVReg(*U.getOperand(0));
   Register Res = getOrCreateVReg(U);
   MIRBuilder.buildInstr(Opcode, {Res}, {Op});
diff --git a/llvm/test/CodeGen/AMDGPU/fmed3-cast-combine.ll b/llvm/test/CodeGen/AMDGPU/fmed3-cast-combine.ll
index e3457421a49036e..e9bf515daabca9f 100644
--- a/llvm/test/CodeGen/AMDGPU/fmed3-cast-combine.ll
+++ b/llvm/test/CodeGen/AMDGPU/fmed3-cast-combine.ll
@@ -1,15 +1,15 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
 ; Test no legal f16. Should just keep the cast to f32 and
 ; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=kaveri < %s | FileCheck -check-prefixes=GCN,GFX7,GFX7-SDAG %s
-; RUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=kaveri < %s | FileCheck -check-prefixes=GCN,GFX7,GFX7-GISEL %s
+; RUN: llc -global-isel=1 -global-isel-abort=2 -mtriple=amdgcn-amd-amdhsa -mcpu=kaveri < %s | FileCheck -check-prefixes=GCN,GFX7,GFX7-GISEL %s
 
 ; Test legal f16, no f16 fmed3. Should expand to min/max sequence
 ; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=fiji < %s | FileCheck -check-prefixes=GCN,GFX8,GFX8-SDAG %s
-; RUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=fiji < %s | FileCheck -check-prefixes=GCN,GFX8,GFX8-GISEL %s
+; RUN: llc -global-isel=1 -global-isel-abort=2 -mtriple=amdgcn-amd-amdhsa -mcpu=fiji < %s | FileCheck -check-prefixes=GCN,GFX8,GFX8-GISEL %s
 
 ; Legal f16 med3. InstCombine ought to shrink the f32 op to f16 so the codegen doesn't really matter for this.
 ; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9,GFX9-SDAG %s
-; RUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9,GFX9-GISEL %s
+; RUN: llc -global-isel=1 -global-isel-abort=2 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9,GFX9-GISEL %s
 
 
 declare float @llvm.amdgcn.fmed3.f32(float, float, float) #0
@@ -773,61 +773,32 @@ define half @fmed3_fneg_fabs_f32_fpext_f16(half %arg0, half %arg1, half %arg2) #
 ; --------------------------------------------------------------------------------
 
 define bfloat @fmed3_f32_fpext_f16_fptrunc_bf16(half %arg0, half %arg1, half %arg2) #1 {
-; GFX7-SDAG-LABEL: fmed3_f32_fpext_f16_fptrunc_bf16:
-; GFX7-SDAG:       ; %bb.0:
-; GFX7-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-SDAG-NEXT:    v_med3_f32 v0, v0, v1, v2
-; GFX7-SDAG-NEXT:    v_and_b32_e32 v0, 0xffff0000, v0
-; GFX7-SDAG-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX7-GISEL-LABEL: fmed3_f32_fpext_f16_fptrunc_bf16:
-; GFX7-GISEL:       ; %bb.0:
-; GFX7-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-GISEL-NEXT:    v_cvt_f32_f16_e32 v0, v0
-; GFX7-GISEL-NEXT:    v_cvt_f32_f16_e32 v1, v1
-; GFX7-GISEL-NEXT:    v_cvt_f32_f16_e32 v2, v2
-; GFX7-GISEL-NEXT:    v_med3_f32 v0, v0, v1, v2
-; GFX7-GISEL-NEXT:    v_cvt_f16_f32_e32 v0, v0
-; GFX7-GISEL-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX8-SDAG-LABEL: fmed3_f32_fpext_f16_fptrunc_bf16:
-; GFX8-SDAG:       ; %bb.0:
-; GFX8-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-SDAG-NEXT:    v_cvt_f32_f16_e32 v0, v0
-; GFX8-SDAG-NEXT:    v_cvt_f32_f16_e32 v1, v1
-; GFX8-SDAG-NEXT:    v_cvt_f32_f16_e32 v2, v2
-; GFX8-SDAG-NEXT:    v_med3_f32 v0, v0, v1, v2
-; GFX8-SDAG-NEXT:    v_and_b32_e32 v0, 0xffff0000, v0
-; GFX8-SDAG-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX8-GISEL-LABEL: fmed3_f32_fpext_f16_fptrunc_bf16:
-; GFX8-GISEL:       ; %bb.0:
-; GFX8-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-GISEL-NEXT:    v_min_f16_e32 v3, v0, v1
-; GFX8-GISEL-NEXT:    v_max_f16_e32 v0, v0, v1
-; GFX8-GISEL-NEXT:    v_max_f16_e32 v1, v3, v2
-; GFX8-GISEL-NEXT:    v_min_f16_e32 v0, v0, v1
-; GFX8-GISEL-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX9-SDAG-LABEL: fmed3_f32_fpext_f16_fptrunc_bf16:
-; GFX9-SDAG:       ; %bb.0:
-; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-SDAG-NEXT:    v_cvt_f32_f16_e32 v0, v0
-; GFX9-SDAG-NEXT:    v_cvt_f32_f16_e32 v1, v1
-; GFX9-SDAG-NEXT:    v_cvt_f32_f16_e32 v2, v2
-; GFX9-SDAG-NEXT:    v_med3_f32 v0, v0, v1, v2
-; GFX9-SDAG-NEXT:    v_and_b32_e32 v0, 0xffff0000, v0
-; GFX9-SDAG-NEXT:    s_setpc_b64 s[30:31]
+; GFX7-LABEL: fmed3_f32_fpext_f16_fptrunc_bf16:
+; GFX7:       ; %bb.0:
+; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT:    v_med3_f32 v0, v0, v1, v2
+; GFX7-NEXT:    v_and_b32_e32 v0, 0xffff0000, v0
+; GFX7-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX8-LABEL: fmed3_f32_fpext_f16_fptrunc_bf16:
+; GFX8:       ; %bb.0:
+; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT:    v_cvt_f32_f16_e32 v0, v0
+; GFX8-NEXT:    v_cvt_f32_f16_e32 v1, v1
+; GFX8-NEXT:    v_cvt_f32_f16_e32 v2, v2
+; GFX8-NEXT:    v_med3_f32 v0, v0, v1, v2
+; GFX8-NEXT:    v_and_b32_e32 v0, 0xffff0000, v0
+; GFX8-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX9-GISEL-LABEL: fmed3_f32_fpext_f16_fptrunc_bf16:
-; GFX9-GISEL:       ; %bb.0:
-; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-GISEL-NEXT:    v_cvt_f32_f16_e32 v0, v0
-; GFX9-GISEL-NEXT:    v_cvt_f32_f16_e32 v1, v1
-; GFX9-GISEL-NEXT:    v_cvt_f32_f16_e32 v2, v2
-; GFX9-GISEL-NEXT:    v_med3_f32 v0, v0, v1, v2
-; GFX9-GISEL-NEXT:    v_cvt_f16_f32_e32 v0, v0
-; GFX9-GISEL-NEXT:    s_setpc_b64 s[30:31]
+; GFX9-LABEL: fmed3_f32_fpext_f16_fptrunc_bf16:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT:    v_cvt_f32_f16_e32 v0, v0
+; GFX9-NEXT:    v_cvt_f32_f16_e32 v1, v1
+; GFX9-NEXT:    v_cvt_f32_f16_e32 v2, v2
+; GFX9-NEXT:    v_med3_f32 v0, v0, v1, v2
+; GFX9-NEXT:    v_and_b32_e32 v0, 0xffff0000, v0
+; GFX9-NEXT:    s_setpc_b64 s[30:31]
   %arg0.ext = fpext half %arg0 to float
   %arg1.ext = fpext half %arg1 to float
   %arg2.ext = fpext half %arg2 to float
@@ -1039,56 +1010,27 @@ define half @fmed3_f32_fpext_f16_multi_use_2(half %arg0, half %arg1, half %arg2,
 }
 
 define half @fmed3_f32_fpext_bf16(bfloat %arg0, bfloat %arg1, bfloat %arg2) #1 {
-; GFX7-SDAG-LABEL: fmed3_f32_fpext_bf16:
-; GFX7-SDAG:       ; %bb.0:
-; GFX7-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-SDAG-NEXT:    v_med3_f32 v0, v0, v1, v2
-; GFX7-SDAG-NEXT:    v_cvt_f16_f32_e32 v0, v0
-; GFX7-SDAG-NEXT:    v_cvt_f32_f16_e32 v0, v0
-; GFX7-SDAG-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX7-GISEL-LABEL: fmed3_f32_fpext_bf16:
-; GFX7-GISEL:       ; %bb.0:
-; GFX7-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-GISEL-NEXT:    v_cvt_f32_f16_e32 v0, v0
-; GFX7-GISEL-NEXT:    v_cvt_f32_f16_e32 v1, v1
-; GFX7-GISEL-NEXT:    v_cvt_f32_f16_e32 v2, v2
-; GFX7-GISEL-NEXT:    v_med3_f32 v0, v0, v1, v2
-; GFX7-GISEL-NEXT:    v_cvt_f16_f32_e32 v0, v0
-; GFX7-GISEL-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX8-SDAG-LABEL: fmed3_f32_fpext_bf16:
-; GFX8-SDAG:       ; %bb.0:
-; GFX8-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-SDAG-NEXT:    v_med3_f32 v0, v0, v1, v2
-; GFX8-SDAG-NEXT:    v_cvt_f16_f32_e32 v0, v0
-; GFX8-SDAG-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX8-GISEL-LABEL: fmed3_f32_fpext_bf16:
-; GFX8-GISEL:       ; %bb.0:
-; GFX8-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-GISEL-NEXT:    v_min_f16_e32 v3, v0, v1
-; GFX8-GISEL-NEXT:    v_max_f16_e32 v0, v0, v1
-; GFX8-GISEL-NEXT:    v_max_f16_e32 v1, v3, v2
-; GFX8-GISEL-NEXT:    v_min_f16_e32 v0, v0, v1
-; GFX8-GISEL-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX9-SDAG-LABEL: fmed3_f32_fpext_bf16:
-; GFX9-SDAG:       ; %bb.0:
-; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-SDAG-NEXT:    v_med3_f32 v0, v0, v1, v2
-; GFX9-SDAG-NEXT:    v_cvt_f16_f32_e32 v0, v0
-; GFX9-SDAG-NEXT:    s_setpc_b64 s[30:31]
+; GFX7-LABEL: fmed3_f32_fpext_bf16:
+; GFX7:       ; %bb.0:
+; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT:    v_med3_f32 v0, v0, v1, v2
+; GFX7-NEXT:    v_cvt_f16_f32_e32 v0, v0
+; GFX7-NEXT:    v_cvt_f32_f16_e32 v0, v0
+; GFX7-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX8-LABEL: fmed3_f32_fpext_bf16:
+; GFX8:       ; %bb.0:
+; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT:    v_med3_f32 v0, v0, v1, v2
+; GFX8-NEXT:    v_cvt_f16_f32_e32 v0, v0
+; GFX8-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX9-GISEL-LABEL: fmed3_f32_fpext_bf16:
-; GFX9-GISEL:       ; %bb.0:
-; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-GISEL-NEXT:    v_cvt_f32_f16_e32 v0, v0
-; GFX9-GISEL-NEXT:    v_cvt_f32_f16_e32 v1, v1
-; GFX9-GISEL-NEXT:    v_cvt_f32_f16_e32 v2, v2
-; GFX9-GISEL-NEXT:    v_med3_f32 v0, v0, v1, v2
-; GFX9-GISEL-NEXT:    v_cvt_f16_f32_e32 v0, v0
-; GFX9-GISEL-NEXT:    s_setpc_b64 s[30:31]
+; GFX9-LABEL: fmed3_f32_fpext_bf16:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT:    v_med3_f32 v0, v0, v1, v2
+; GFX9-NEXT:    v_cvt_f16_f32_e32 v0, v0
+; GFX9-NEXT:    s_setpc_b64 s[30:31]
   %arg0.ext = fpext bfloat %arg0 to float
   %arg1.ext = fpext bfloat %arg1 to float
   %arg2.ext = fpext bfloat %arg2 to float
@@ -1098,60 +1040,31 @@ define half @fmed3_f32_fpext_bf16(bfloat %arg0, bfloat %arg1, bfloat %arg2) #1 {
 }
 
 define half @fmed3_f32_fpext_f16_bf16_0(bfloat %arg0, half %arg1, half %arg2) #1 {
-; GFX7-SDAG-LABEL: fmed3_f32_fpext_f16_bf16_0:
-; GFX7-SDAG:       ; %bb.0:
-; GFX7-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-SDAG-NEXT:    v_med3_f32 v0, v0, v1, v2
-; GFX7-SDAG-NEXT:    v_cvt_f16_f32_e32 v0, v0
-; GFX7-SDAG-NEXT:    v_cvt_f32_f16_e32 v0, v0
-; GFX7-SDAG-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX7-GISEL-LABEL: fmed3_f32_fpext_f16_bf16_0:
-; GFX7-GISEL:       ; %bb.0:
-; GFX7-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-GISEL-NEXT:    v_cvt_f32_f16_e32 v0, v0
-; GFX7-GISEL-NEXT:    v_cvt_f32_f16_e32 v1, v1
-; GFX7-GISEL-NEXT:    v_cvt_f32_f16_e32 v2, v2
-; GFX7-GISEL-NEXT:    v_med3_f32 v0, v0, v1, v2
-; GFX7-GISEL-NEXT:    v_cvt_f16_f32_e32 v0, v0
-; GFX7-GISEL-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX8-SDAG-LABEL: fmed3_f32_fpext_f16_bf16_0:
-; GFX8-SDAG:       ; %bb.0:
-; GFX8-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-SDAG-NEXT:    v_cvt_f32_f16_e32 v1, v1
-; GFX8-SDAG-NEXT:    v_cvt_f32_f16_e32 v2, v2
-; GFX8-SDAG-NEXT:    v_med3_f32 v0, v0, v1, v2
-; GFX8-SDAG-NEXT:    v_cvt_f16_f32_e32 v0, v0
-; GFX8-SDAG-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX8-GISEL-LABEL: fmed3_f32_fpext_f16_bf16_0:
-; GFX8-GISEL:       ; %bb.0:
-; GFX8-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-GISEL-NEXT:    v_min_f16_e32 v3, v0, v1
-; GFX8-GISEL-NEXT:    v_max_f16_e32 v0, v0, v1
-; GFX8-GISEL-NEXT:    v_max_f16_e32 v1, v3, v2
-; GFX8-GISEL-NEXT:    v_min_f16_e32 v0, v0, v1
-; GFX8-GISEL-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX9-SDAG-LABEL: fmed3_f32_fpext_f16_bf16_0:
-; GFX9-SDAG:       ; %bb.0:
-; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-SDAG-NEXT:    v_cvt_f32_f16_e32 v1, v1
-; GFX9-SDAG-NEXT:    v_cvt_f32_f16_e32 v2, v2
-; GFX9-SDAG-NEXT:    v_med3_f32 v0, v0, v1, v2
-; GFX9-SDAG-NEXT:    v_cvt_f16_f32_e32 v0, v0
-; GFX9-SDAG-NEXT:    s_setpc_b64 s[30:31]
+; GFX7-LABEL: fmed3_f32_fpext_f16_bf16_0:
+; GFX7:       ; %bb.0:
+; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT:    v_med3_f32 v0, v0, v1, v2
+; GFX7-NEXT:    v_cvt_f16_f32_e32 v0, v0
+; GFX7-NEXT:    v_cvt_f32_f16_e32 v0, v0
+; GFX7-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX8-LABEL: fmed3_f32_fpext_f16_bf16_0:
+; GFX8:       ; %bb.0:
+; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT:    v_cvt_f32_f16_e32 v1, v1
+; GFX8-NEXT:    v_cvt_f32_f16_e32 v2, v2
+; GFX8-NEXT:    v_med3_f32 v0, v0, v1, v2
+; GFX8-NEXT:    v_cvt_f16_f32_e32 v0, v0
+; GFX8-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX9-GISEL-LABEL: fmed3_f32_fpext_f16_bf16_0:
-; GFX9-GISEL:       ; %bb.0:
-; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-GISEL-NEXT:    v_cvt_f32_f16_e32 v0, v0
-; GFX9-GISEL-NEXT:    v_cvt_f32_f16_e32 v1, v1
-; GFX9-GISEL-NEXT:    v_cvt_f32_f16_e32 v2, v2
-; GFX9-GISEL-NEXT:    v_med3_f32 v0, v0, v1, v2
-; GFX9-GISEL-NEXT:    v_cvt_f16_f32_e32 v0, v0
-; GFX9-GISEL-NEXT:    s_setpc_b64 s[30:31]
+; GFX9-LABEL: fmed3_f32_fpext_f16_bf16_0:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT:    v_cvt_f32_f16_e32 v1, v1
+; GFX9-NEXT:    v_cvt_f32_f16_e32 v2, v2
+; GFX9-NEXT:    v_med3_f32 v0, v0, v1, v2
+; GFX9-NEXT:    v_cvt_f16_f32_e32 v0, v0
+; GFX9-NEXT:    s_setpc_b64 s[30:31]
   %arg0.ext = fpext bfloat %arg0 to float
   %arg1.ext = fpext half %arg1 to float
   %arg2.ext = fpext half %arg2 to float
@@ -1161,60 +1074,31 @@ define half @fmed3_f32_fpext_f16_bf16_0(bfloat %arg0, half %arg1, half %arg2) #1
 }
 
 define half @fmed3_f32_fpext_f16_bf16_1(half %arg0, bfloat %arg1, half %arg2) #1 {
-; GFX7-SDAG-LABEL: fmed3_f32_fpext_f16_bf16_1:
-; GFX7-SDAG:       ; %bb.0:
-; GFX7-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-SDAG-NEXT:    v_med3_f32 v0, v0, v1, v2
-; GFX7-SDAG-NEXT:    v_cvt_f16_f32_e32 v0, v0
-; GFX7-SDAG-NEXT:    v_cvt_f32_f16_e32 v0, v0
-; GFX7-SDAG-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX7-GISEL-LABEL: fmed3_f32_fpext_f16_bf16_1:
-; GFX7-GISEL:       ; %bb.0:
-; GFX7-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-GISEL-NEXT:    v_cvt_f32_f16_e32 v0, v0
-; GFX7-GISEL-NEXT:    v_cvt_f32_f16_e32 v1, v1
-; GFX7-GISEL-NEXT:    v_cvt_f32_f16_e32 v2, v2
-; GFX7-GISEL-NEXT:    v_med3_f32 v0, v0, v1, v2
-; GFX7-GISEL-NEXT:    v_cvt_f16_f32_e32 v0, v0
-; GFX7-GISEL-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX8-SDAG-LABEL: fmed3_f32_fpext_f16_bf16_1:
-; GFX8-SDAG:       ; %bb.0:
-; GFX8-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-SDAG-NEXT:    v_cvt_f32_f16_e32 v0, v0
-; GFX8-SDAG-NEXT:    v_cvt_f32_f16_e32 v2, v2
-; GFX8-SDAG-NEXT:    v_med3_f32 v0, v0, v1, v2
-; GFX8-SDAG-NEXT:    v_cvt_f16_f32_e32 v0, v0
-; GFX8-SDAG-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX8-GISEL-LABEL: fmed3_f32_fpext_f16_bf16_1:
-; GFX8-GISEL:       ; %bb.0:
-; GFX8-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-GISEL-NEXT:    v_min_f16_e32 v3, v0, v1
-; GFX8-GISEL-NEXT:    v_max_f16_e32 v0, v0, v1
-; GFX8-GISEL-NEXT:    v_max_f16_e32 v1, v3, v2
-; GFX8-GISEL-NEXT:    v_min_f16_e32 v0, v0, v1
-; GFX8-GISEL-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX9-SDAG-LABEL: fmed3_f32_fpext_f16_bf16_1:
-; GFX9-SDAG:       ; %bb.0:
-; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-SDAG-NEXT:    v_cvt_f32_f16_e32 v0, v0
-; GFX9-SDAG-NEXT:    v_cvt_f32_f16_e32 v2, v2
-; GFX9-SDAG-NEXT:    v_med3_f32 v0, v0, v1, v2
-; GFX9-SDAG-NEXT:    v_cvt_f16_f32_e32 v0, v0
-; GFX9-SDAG-NEXT:    s_setpc_b64 s[30:31]
+; GFX7-LABEL: fmed3_f32_fpext_f16_bf16_1:
+; GFX7:       ; %bb.0:
+; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT:    v_med3_f32 v0, v0, v1, v2
+; GFX7-NEXT:    v_cvt_f16_f32_e32 v0, v0
+; GFX7-NEXT:    v_cvt_f32_f16_e32 v0, v0
+; GFX7-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX8-LABEL: fmed3_f32_fpext_f16_bf16_1:
+; GFX8:       ; %bb.0:
+; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT:    v_cvt_f32_f16_e32 v0, v0
+; GFX8-NEXT:    v_cvt_f32_f16_e32 v2, v2
+; GFX8-NEXT:    v_med3_f32 v0, v0, v1, v2
+; GFX8-NEXT:    v_cvt_f16_f32_e32 v0, v0
+; GFX8-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX9-GISEL-LABEL: fmed3_f32_fpext_f16_bf16_1:
-; GFX9-GISEL:       ; %bb.0:
-; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-GISEL-NEXT:    v_cvt_f32_f16_e32 v0, v0
-; GFX9-GISEL-NEXT:    v_cvt_f32_f16_e32 v1, v1
-; GFX9-GISEL-NEXT:    v_cvt_f32_f16_e32 v2, v2
-; GFX9-GISEL-NEXT:    v_med3_f32 v0, v0, v1, v2
-; GFX9-GISEL-NEXT:    v_cvt_f16_f32_e32 v0, v0
-; GFX9-GISEL-NEXT:    s_setpc_b64 s[30:31]
+; GFX9-LABEL: fmed3_f32_fpext_f16_bf16_1:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT:    v_cvt_f32_f16_e32 v0, v0
+; GFX9-NEXT:    v_cvt_f32_f16_e32 v2, v2
+; GFX9-NEXT:    v_med3_f32 v0, v0, v1, v2
+; GFX9-NEXT:    v_cvt_f16_f32_e32 v0, v0
+; GFX9-NEXT:    s_setpc_b64 s[30:31]
   %arg0.ext = fpext half %arg0 to float
   %arg1.ext = fpext bfloat %arg1 to float
   %arg2.ext = fpext half %arg2 to float
@@ -1224,60 +1108,31 @@ define half @fmed3_f32_fpext_f16_bf16_1(half %arg0, bfloat %arg1, half %arg2) #1
 }
 
 define half @fmed3_f32_fpext_f16_bf16_2(half %arg0, half %arg1, bfloat %arg2) #1 {
-; GFX7-SDAG-LABEL: fmed3_f32_fpext_f16_bf16_2:
-; GFX7-SDAG:       ; %bb.0:
-; GFX7-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-SDAG-NEXT:    v_med3_f32 v0, v0, v1, v2
-; GFX7-SDAG-NEXT:    v_cvt_f16_f32_e32 v0, v0
-; GFX7-SDAG-NEXT:    v_cvt_f32_f16_e32 v0, v0
-; GFX7-SDAG-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX7-GISEL-LABEL: fmed3_f32_fpext_f16_bf16_2:
-; GFX7-GISEL:       ; %bb.0:
-; GFX7-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-GISEL-NEXT:    v_cvt_f32_f16_e32 v0, v0
-; GFX7-GISEL-NEXT:    v_cvt_f32_f16_e32 v1, v1
-; GFX7-GISEL-NEXT:    v_cvt_f32_f16_e32 v2, v2
-; GFX7-GISEL-NEXT:    v_med3_f32 v0, v0, v1, v2
-; GFX7-GISEL-NEXT:    v_cvt_f16_f32_e32 v0, v0
-; GFX7-GISEL-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX8-SDAG-LABEL: fmed3_f32_fpext_f16_bf16_2:
-; GFX8-SDAG:       ; %bb.0:
-; GFX8-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-SDAG-NEXT:    v_cvt_f32_f16_e32 v0, v0
-; GFX8-SDAG-NEXT:    v_cvt_f32_f16_e32 v1, v1
-; GFX8-SDAG-NEXT:    v_med3_f32 v0, v0, v1, v2
-; GFX8-SDAG-NEXT:    v_cvt_f16_f32_e32 v0, v0
-; GFX8-SDAG-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX8-GISEL-LABEL: fmed3_f32_fpext_f16_bf16_2:
-; GFX8-GISEL:       ; %bb.0:
-; GFX8-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-GISEL-NEXT:    v_min_f16_e32 v3, v0, v1
-; GFX8-GISEL-NEXT:    v_max_f16_e32 v0, v0, v1
-; GFX8-GISEL-NEXT:    v_max_f16_e32 v1, v3, v2
-; GFX8-GISEL-NEXT:    v_min_f16_e32 v0, v0, v1
-; GFX8-GISEL-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX9-SDAG-LABEL: fmed3_f32_fpext_f16_bf16_2:
-; GFX9-SDAG:       ; %bb.0:
-; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-SDAG-NEXT:    v_cvt_f32_f16_e32 v0, v0
-; GFX9-SDAG-NEXT:    v_cvt_f32_f16_e32 v1, v1
-; GFX9-SDAG-NEXT:    v_med3_f32 v0, v0, v1, v2
-; GFX9-SDAG-NEXT:    v_cvt_f16_f32_e32 v0, v0
-; GFX9-SDAG-NEXT:    s_setpc_b64 s[30:31]
+; GFX7-LABEL: fmed3_f32_fpext_f16_bf16_2:
+; GFX7:       ; %bb.0:
+; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT:    v_med3_f32 v0, v0, v1, v2
+; GFX7-NEXT:    v_cvt_f16_f32_e32 v0, v0
+; GFX7-NEXT:    v_cvt_f32_f16_e32 v0, v0
+; GFX7-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX8-LABEL: fmed3_f32_fpext_f16_bf16_2:
+; GFX8:       ; %bb.0:
+; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT:    v_cvt_f32_f16_e32 v0, v0
+; GFX8-NEXT:    v_cvt_f32_f16_e32 v1, v1
+; GFX8-NEXT:    v_med3_f32 v0, v0, v1, v2
+; GFX8-NEXT:    v_cvt_f16_f32_e32 v0, v0
+; GFX8-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX9-GISEL-LABEL: fmed3_f32_fpext_f16_bf16_2:
-; GFX9-GISEL:       ; %bb.0:
-; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-GISEL-NEXT:    v_cvt_f32_f16_e32 v0, v0
-; GFX9-GISEL-NEXT:    v_cvt_f32_f16_e32 v1, v1
-; GFX9-GISEL-NEXT:    v_cvt_f32_f16_e32 v2, v2
-; GFX9-GISEL-NEXT:    v_med3_f32 v0, v0, v1, v2
-; GFX9-GISEL-NEXT:    v_cvt_f16_f32_e32 v0, v0
-; GFX9-GISEL-NEXT:    s_setpc_b64 s[30:31]
+; GFX9-LABEL: fmed3_f32_fpext_f16_bf16_2:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT:    v_cvt_f32_f16_e32 v0, v0
+; GFX9-NEXT:    v_cvt_f32_f16_e32 v1, v1
+; GFX9-NEXT:    v_med3_f32 v0, v0, v1, v2
+; GFX9-NEXT:    v_cvt_f16_f32_e32 v0, v0
+; GFX9-NEXT:    s_setpc_b64 s[30:31]
   %arg0.ext = fpext half %arg0 to float
   %arg1.ext = fpext half %arg1 to float
   %arg2.ext = fpext bfloat %arg2 to float
@@ -1488,4 +1343,3 @@ attributes #0 = { nocallback nofree nosync nounwind speculatable willreturn me...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/71470


More information about the llvm-commits mailing list