[clang] [llvm] [AMDGPU] Add support for `v_exp_bf16` on gfx1250 (PR #149229)
Shilei Tian via llvm-commits
llvm-commits at lists.llvm.org
Wed Jul 16 19:52:29 PDT 2025
https://github.com/shiltian updated https://github.com/llvm/llvm-project/pull/149229
>From 4efddc290b2e44b95630976aaaa1a539ce2ab647 Mon Sep 17 00:00:00 2001
From: Shilei Tian <i at tianshilei.me>
Date: Wed, 16 Jul 2025 19:56:48 -0400
Subject: [PATCH] [AMDGPU] Add support for `v_exp_bf16` on gfx1250
Co-authored-by: Mekhanoshin, Stanislav <Stanislav.Mekhanoshin at amd.com>
---
clang/include/clang/Basic/BuiltinsAMDGPU.def | 1 +
clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp | 1 +
.../CodeGenOpenCL/builtins-amdgcn-gfx1250.cl | 19 +
llvm/lib/Target/AMDGPU/VOP1Instructions.td | 2 +
llvm/test/CodeGen/AMDGPU/bf16-math.ll | 23 +
.../CodeGen/AMDGPU/llvm.amdgcn.exp.bf16.ll | 33 +
llvm/test/CodeGen/AMDGPU/llvm.exp2.bf16.ll | 1013 +++++++++++++++++
llvm/test/MC/AMDGPU/gfx1250_asm_vop1-fake16.s | 45 +
llvm/test/MC/AMDGPU/gfx1250_asm_vop1.s | 48 +
.../MC/AMDGPU/gfx1250_asm_vop1_dpp16-fake16.s | 56 +
llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp16.s | 60 +
.../MC/AMDGPU/gfx1250_asm_vop1_dpp8-fake16.s | 12 +
llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp8.s | 16 +
.../gfx1250_asm_vop3_from_vop1-fake16.s | 45 +
.../MC/AMDGPU/gfx1250_asm_vop3_from_vop1.s | 48 +
.../gfx1250_asm_vop3_from_vop1_dpp16-fake16.s | 56 +
.../AMDGPU/gfx1250_asm_vop3_from_vop1_dpp16.s | 60 +
.../gfx1250_asm_vop3_from_vop1_dpp8-fake16.s | 16 +
.../AMDGPU/gfx1250_asm_vop3_from_vop1_dpp8.s | 20 +
.../Disassembler/AMDGPU/gfx1250_dasm_vop1.txt | 63 +
.../AMDGPU/gfx1250_dasm_vop1_dpp16.txt | 59 +
.../AMDGPU/gfx1250_dasm_vop1_dpp8.txt | 15 +
.../AMDGPU/gfx1250_dasm_vop3_from_vop1.txt | 64 ++
.../gfx1250_dasm_vop3_from_vop1_dpp16.txt | 60 +
.../gfx1250_dasm_vop3_from_vop1_dpp8.txt | 20 +
25 files changed, 1855 insertions(+)
create mode 100644 llvm/test/CodeGen/AMDGPU/llvm.amdgcn.exp.bf16.ll
create mode 100644 llvm/test/CodeGen/AMDGPU/llvm.exp2.bf16.ll
diff --git a/clang/include/clang/Basic/BuiltinsAMDGPU.def b/clang/include/clang/Basic/BuiltinsAMDGPU.def
index eee0a94f6fc64..7eb5e2acc8b37 100644
--- a/clang/include/clang/Basic/BuiltinsAMDGPU.def
+++ b/clang/include/clang/Basic/BuiltinsAMDGPU.def
@@ -673,6 +673,7 @@ TARGET_BUILTIN(__builtin_amdgcn_tanh_bf16, "yy", "nc", "bf16-trans-insts")
TARGET_BUILTIN(__builtin_amdgcn_rcp_bf16, "yy", "nc", "bf16-trans-insts")
TARGET_BUILTIN(__builtin_amdgcn_rsq_bf16, "yy", "nc", "bf16-trans-insts")
TARGET_BUILTIN(__builtin_amdgcn_log_bf16, "yy", "nc", "bf16-trans-insts")
+TARGET_BUILTIN(__builtin_amdgcn_exp2_bf16, "yy", "nc", "bf16-trans-insts")
TARGET_BUILTIN(__builtin_amdgcn_cvt_f16_fp8, "hiIi", "nc", "gfx1250-insts")
TARGET_BUILTIN(__builtin_amdgcn_cvt_f16_bf8, "hiIi", "nc", "gfx1250-insts")
diff --git a/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp b/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp
index 0312205d4ff8d..f7450373d1309 100644
--- a/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp
+++ b/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp
@@ -439,6 +439,7 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
case AMDGPU::BI__builtin_amdgcn_log_bf16:
return emitBuiltinWithOneOverloadedType<1>(*this, E, Intrinsic::amdgcn_log);
case AMDGPU::BI__builtin_amdgcn_exp2f:
+ case AMDGPU::BI__builtin_amdgcn_exp2_bf16:
return emitBuiltinWithOneOverloadedType<1>(*this, E,
Intrinsic::amdgcn_exp2);
case AMDGPU::BI__builtin_amdgcn_log_clampf:
diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250.cl
index bdf169a1a97da..7b1fd8aefe5be 100644
--- a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250.cl
+++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250.cl
@@ -118,6 +118,25 @@ void test_log_bf16(global __bf16* out, __bf16 a)
*out = __builtin_amdgcn_log_bf16(a);
}
+// CHECK-LABEL: @test_exp2_bf16(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
+// CHECK-NEXT: [[A_ADDR:%.*]] = alloca bfloat, align 2, addrspace(5)
+// CHECK-NEXT: [[OUT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[OUT_ADDR]] to ptr
+// CHECK-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+// CHECK-NEXT: store ptr addrspace(1) [[OUT:%.*]], ptr [[OUT_ADDR_ASCAST]], align 8
+// CHECK-NEXT: store bfloat [[A:%.*]], ptr [[A_ADDR_ASCAST]], align 2
+// CHECK-NEXT: [[TMP0:%.*]] = load bfloat, ptr [[A_ADDR_ASCAST]], align 2
+// CHECK-NEXT: [[TMP1:%.*]] = call bfloat @llvm.amdgcn.exp2.bf16(bfloat [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR_ASCAST]], align 8
+// CHECK-NEXT: store bfloat [[TMP1]], ptr addrspace(1) [[TMP2]], align 2
+// CHECK-NEXT: ret void
+//
+void test_exp2_bf16(global __bf16* out, __bf16 a)
+{
+ *out = __builtin_amdgcn_exp2_bf16(a);
+}
+
// CHECK-LABEL: @test_cvt_f16_fp8(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
diff --git a/llvm/lib/Target/AMDGPU/VOP1Instructions.td b/llvm/lib/Target/AMDGPU/VOP1Instructions.td
index e1bc39302e126..d93f5e5b81454 100644
--- a/llvm/lib/Target/AMDGPU/VOP1Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP1Instructions.td
@@ -533,6 +533,7 @@ defm V_RCP_BF16 : VOP1Inst_t16 <"v_rcp_bf16", VOP_BF16_BF16, AMDGPUrcp>;
defm V_SQRT_BF16 : VOP1Inst_t16 <"v_sqrt_bf16", VOP_BF16_BF16, any_amdgcn_sqrt>;
defm V_RSQ_BF16 : VOP1Inst_t16 <"v_rsq_bf16", VOP_BF16_BF16, AMDGPUrsq>;
defm V_LOG_BF16 : VOP1Inst_t16 <"v_log_bf16", VOP_BF16_BF16, AMDGPUlogf16>;
+defm V_EXP_BF16 : VOP1Inst_t16 <"v_exp_bf16", VOP_BF16_BF16, AMDGPUexpf16>;
}
} // End TRANS = 1, SchedRW = [WriteTrans32]
defm V_FREXP_MANT_F16 : VOP1Inst_t16 <"v_frexp_mant_f16", VOP_F16_F16, int_amdgcn_frexp_mant>;
@@ -1145,6 +1146,7 @@ defm V_RCP_BF16 : VOP1_Real_FULL_t16_and_fake16_gfx1250<0x079>;
defm V_SQRT_BF16 : VOP1_Real_FULL_t16_and_fake16_gfx1250<0x07a>;
defm V_RSQ_BF16 : VOP1_Real_FULL_t16_and_fake16_gfx1250<0x07b>;
defm V_LOG_BF16 : VOP1_Real_FULL_t16_and_fake16_gfx1250<0x07c>;
+defm V_EXP_BF16 : VOP1_Real_FULL_t16_and_fake16_gfx1250<0x07d>;
//===----------------------------------------------------------------------===//
// GFX10.
diff --git a/llvm/test/CodeGen/AMDGPU/bf16-math.ll b/llvm/test/CodeGen/AMDGPU/bf16-math.ll
index 05eee2d4d549d..029604c2933a9 100644
--- a/llvm/test/CodeGen/AMDGPU/bf16-math.ll
+++ b/llvm/test/CodeGen/AMDGPU/bf16-math.ll
@@ -25,4 +25,27 @@ define amdgpu_ps void @llvm_log2_bf16_s(ptr addrspace(1) %out, bfloat inreg %src
ret void
}
+define amdgpu_ps void @llvm_exp2_bf16_v(ptr addrspace(1) %out, bfloat %src) {
+; GCN-LABEL: llvm_exp2_bf16_v:
+; GCN: ; %bb.0:
+; GCN-NEXT: v_exp_bf16_e32 v2, v2
+; GCN-NEXT: global_store_b16 v[0:1], v2, off
+; GCN-NEXT: s_endpgm
+ %exp = call bfloat @llvm.exp2.bf16(bfloat %src)
+ store bfloat %exp, ptr addrspace(1) %out, align 2
+ ret void
+}
+
+define amdgpu_ps void @llvm_exp2_bf16_s(ptr addrspace(1) %out, bfloat inreg %src) {
+; GCN-LABEL: llvm_exp2_bf16_s:
+; GCN: ; %bb.0:
+; GCN-NEXT: v_exp_bf16_e32 v2, s0
+; GCN-NEXT: global_store_b16 v[0:1], v2, off
+; GCN-NEXT: s_endpgm
+ %exp = call bfloat @llvm.exp2.bf16(bfloat %src)
+ store bfloat %exp, ptr addrspace(1) %out, align 2
+ ret void
+}
+
declare bfloat @llvm.log2.bf16(bfloat)
+declare bfloat @llvm.exp2.bf16(bfloat)
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.exp.bf16.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.exp.bf16.ll
new file mode 100644
index 0000000000000..6304923790ad5
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.exp.bf16.ll
@@ -0,0 +1,33 @@
+; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1250 < %s | FileCheck -check-prefixes=GCN %s
+; xUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1250 < %s | FileCheck -check-prefix=GCN %s
+
+; FIXME: GlobalISel does not work with bf16
+
+declare bfloat @llvm.amdgcn.exp2.bf16(bfloat) #0
+
+; GCN-LABEL: {{^}}exp_bf16:
+; GCN: v_exp_bf16_e32 {{v[0-9]+}}, {{s[0-9]+}}
+define amdgpu_kernel void @exp_bf16(ptr addrspace(1) %out, bfloat %src) #1 {
+ %exp = call bfloat @llvm.amdgcn.exp2.bf16(bfloat %src) #0
+ store bfloat %exp, ptr addrspace(1) %out, align 2
+ ret void
+}
+
+; GCN-LABEL: {{^}}exp_bf16_constant_4
+; GCN: v_exp_bf16_e32 v0, 4.0
+define amdgpu_kernel void @exp_bf16_constant_4(ptr addrspace(1) %out) #1 {
+ %exp = call bfloat @llvm.amdgcn.exp2.bf16(bfloat 4.0) #0
+ store bfloat %exp, ptr addrspace(1) %out, align 2
+ ret void
+}
+
+; GCN-LABEL: {{^}}exp_bf16_constant_100
+; GCN: v_exp_bf16_e32 {{v[0-9]+}}, 0x42c8
+define amdgpu_kernel void @exp_bf16_constant_100(ptr addrspace(1) %out) #1 {
+ %exp = call bfloat @llvm.amdgcn.exp2.bf16(bfloat 100.0) #0
+ store bfloat %exp, ptr addrspace(1) %out, align 2
+ ret void
+}
+
+attributes #0 = { nounwind readnone }
+attributes #1 = { nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.exp2.bf16.ll b/llvm/test/CodeGen/AMDGPU/llvm.exp2.bf16.ll
new file mode 100644
index 0000000000000..0f37639059169
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/llvm.exp2.bf16.ll
@@ -0,0 +1,1013 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -mattr=+real-true16 %s -o - | FileCheck %s -check-prefixes=GFX1200-SDAG-TRUE16
+; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -mattr=-real-true16 %s -o - | FileCheck %s -check-prefixes=GFX1200-SDAG-FAKE16
+; xUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -mattr=+real-true16 %s -o - | FileCheck %s -check-prefixes=GFX1200-GI-TRUE16
+; xUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -mattr=-real-true16 %s -o - | FileCheck %s -check-prefixes=GFX1200-GI-FAKE16
+; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1250 -mattr=+real-true16 %s -o - | FileCheck %s -check-prefixes=GFX1250-SDAG-TRUE16
+; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1250 -mattr=-real-true16 %s -o - | FileCheck %s -check-prefixes=GFX1250-SDAG-FAKE16
+; xUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1250 -mattr=+real-true16 %s -o - | FileCheck %s -check-prefixes=GFX1250-GI-TRUE16
+; xUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1250 -mattr=-real-true16 %s -o - | FileCheck %s -check-prefixes=GFX1250-GI-FAKE16
+
+define bfloat @v_exp2_bf16(bfloat %in) {
+; GFX1200-SDAG-TRUE16-LABEL: v_exp2_bf16:
+; GFX1200-SDAG-TRUE16: ; %bb.0:
+; GFX1200-SDAG-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1200-SDAG-TRUE16-NEXT: s_wait_expcnt 0x0
+; GFX1200-SDAG-TRUE16-NEXT: s_wait_samplecnt 0x0
+; GFX1200-SDAG-TRUE16-NEXT: s_wait_bvhcnt 0x0
+; GFX1200-SDAG-TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1200-SDAG-TRUE16-NEXT: v_lshlrev_b32_e32 v0, 16, v0
+; GFX1200-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_2)
+; GFX1200-SDAG-TRUE16-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0xc2fc0000, v0
+; GFX1200-SDAG-TRUE16-NEXT: s_wait_alu 0xfffd
+; GFX1200-SDAG-TRUE16-NEXT: v_cndmask_b32_e64 v2, 0, 0x42800000, vcc_lo
+; GFX1200-SDAG-TRUE16-NEXT: v_cndmask_b32_e64 v1, 0, 0xffffffc0, vcc_lo
+; GFX1200-SDAG-TRUE16-NEXT: v_add_f32_e32 v0, v0, v2
+; GFX1200-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(TRANS32_DEP_1)
+; GFX1200-SDAG-TRUE16-NEXT: v_exp_f32_e32 v0, v0
+; GFX1200-SDAG-TRUE16-NEXT: v_ldexp_f32 v0, v0, v1
+; GFX1200-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_3)
+; GFX1200-SDAG-TRUE16-NEXT: v_bfe_u32 v1, v0, 16, 1
+; GFX1200-SDAG-TRUE16-NEXT: v_or_b32_e32 v2, 0x400000, v0
+; GFX1200-SDAG-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0
+; GFX1200-SDAG-TRUE16-NEXT: v_add3_u32 v1, v1, v0, 0x7fff
+; GFX1200-SDAG-TRUE16-NEXT: s_wait_alu 0xfffd
+; GFX1200-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1200-SDAG-TRUE16-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc_lo
+; GFX1200-SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.l, v0.h
+; GFX1200-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1200-SDAG-FAKE16-LABEL: v_exp2_bf16:
+; GFX1200-SDAG-FAKE16: ; %bb.0:
+; GFX1200-SDAG-FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1200-SDAG-FAKE16-NEXT: s_wait_expcnt 0x0
+; GFX1200-SDAG-FAKE16-NEXT: s_wait_samplecnt 0x0
+; GFX1200-SDAG-FAKE16-NEXT: s_wait_bvhcnt 0x0
+; GFX1200-SDAG-FAKE16-NEXT: s_wait_kmcnt 0x0
+; GFX1200-SDAG-FAKE16-NEXT: v_lshlrev_b32_e32 v0, 16, v0
+; GFX1200-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_2)
+; GFX1200-SDAG-FAKE16-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0xc2fc0000, v0
+; GFX1200-SDAG-FAKE16-NEXT: s_wait_alu 0xfffd
+; GFX1200-SDAG-FAKE16-NEXT: v_cndmask_b32_e64 v2, 0, 0x42800000, vcc_lo
+; GFX1200-SDAG-FAKE16-NEXT: v_cndmask_b32_e64 v1, 0, 0xffffffc0, vcc_lo
+; GFX1200-SDAG-FAKE16-NEXT: v_add_f32_e32 v0, v0, v2
+; GFX1200-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(TRANS32_DEP_1)
+; GFX1200-SDAG-FAKE16-NEXT: v_exp_f32_e32 v0, v0
+; GFX1200-SDAG-FAKE16-NEXT: v_ldexp_f32 v0, v0, v1
+; GFX1200-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_3)
+; GFX1200-SDAG-FAKE16-NEXT: v_bfe_u32 v1, v0, 16, 1
+; GFX1200-SDAG-FAKE16-NEXT: v_or_b32_e32 v2, 0x400000, v0
+; GFX1200-SDAG-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0
+; GFX1200-SDAG-FAKE16-NEXT: v_add3_u32 v1, v1, v0, 0x7fff
+; GFX1200-SDAG-FAKE16-NEXT: s_wait_alu 0xfffd
+; GFX1200-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1200-SDAG-FAKE16-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc_lo
+; GFX1200-SDAG-FAKE16-NEXT: v_lshrrev_b32_e32 v0, 16, v0
+; GFX1200-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-SDAG-TRUE16-LABEL: v_exp2_bf16:
+; GFX1250-SDAG-TRUE16: ; %bb.0:
+; GFX1250-SDAG-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250-SDAG-TRUE16-NEXT: v_exp_bf16_e32 v0.l, v0.l
+; GFX1250-SDAG-TRUE16-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX1250-SDAG-FAKE16-LABEL: v_exp2_bf16:
+; GFX1250-SDAG-FAKE16: ; %bb.0:
+; GFX1250-SDAG-FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-FAKE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250-SDAG-FAKE16-NEXT: v_exp_bf16_e32 v0, v0
+; GFX1250-SDAG-FAKE16-NEXT: s_set_pc_i64 s[30:31]
+ %result = call bfloat @llvm.exp2.bf16(bfloat %in)
+ ret bfloat %result
+}
+
+define bfloat @v_exp2_fabs_bf16(bfloat %in) {
+; GFX1200-SDAG-TRUE16-LABEL: v_exp2_fabs_bf16:
+; GFX1200-SDAG-TRUE16: ; %bb.0:
+; GFX1200-SDAG-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1200-SDAG-TRUE16-NEXT: s_wait_expcnt 0x0
+; GFX1200-SDAG-TRUE16-NEXT: s_wait_samplecnt 0x0
+; GFX1200-SDAG-TRUE16-NEXT: s_wait_bvhcnt 0x0
+; GFX1200-SDAG-TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1200-SDAG-TRUE16-NEXT: v_and_b16 v0.l, 0x7fff, v0.l
+; GFX1200-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1200-SDAG-TRUE16-NEXT: v_lshlrev_b32_e32 v0, 16, v0
+; GFX1200-SDAG-TRUE16-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0xc2fc0000, v0
+; GFX1200-SDAG-TRUE16-NEXT: s_wait_alu 0xfffd
+; GFX1200-SDAG-TRUE16-NEXT: v_cndmask_b32_e64 v2, 0, 0x42800000, vcc_lo
+; GFX1200-SDAG-TRUE16-NEXT: v_cndmask_b32_e64 v1, 0, 0xffffffc0, vcc_lo
+; GFX1200-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1200-SDAG-TRUE16-NEXT: v_add_f32_e32 v0, v0, v2
+; GFX1200-SDAG-TRUE16-NEXT: v_exp_f32_e32 v0, v0
+; GFX1200-SDAG-TRUE16-NEXT: s_delay_alu instid0(TRANS32_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1200-SDAG-TRUE16-NEXT: v_ldexp_f32 v0, v0, v1
+; GFX1200-SDAG-TRUE16-NEXT: v_bfe_u32 v1, v0, 16, 1
+; GFX1200-SDAG-TRUE16-NEXT: v_or_b32_e32 v2, 0x400000, v0
+; GFX1200-SDAG-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0
+; GFX1200-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX1200-SDAG-TRUE16-NEXT: v_add3_u32 v1, v1, v0, 0x7fff
+; GFX1200-SDAG-TRUE16-NEXT: s_wait_alu 0xfffd
+; GFX1200-SDAG-TRUE16-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc_lo
+; GFX1200-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1200-SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.l, v0.h
+; GFX1200-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1200-SDAG-FAKE16-LABEL: v_exp2_fabs_bf16:
+; GFX1200-SDAG-FAKE16: ; %bb.0:
+; GFX1200-SDAG-FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1200-SDAG-FAKE16-NEXT: s_wait_expcnt 0x0
+; GFX1200-SDAG-FAKE16-NEXT: s_wait_samplecnt 0x0
+; GFX1200-SDAG-FAKE16-NEXT: s_wait_bvhcnt 0x0
+; GFX1200-SDAG-FAKE16-NEXT: s_wait_kmcnt 0x0
+; GFX1200-SDAG-FAKE16-NEXT: v_and_b32_e32 v0, 0x7fff, v0
+; GFX1200-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1200-SDAG-FAKE16-NEXT: v_lshlrev_b32_e32 v0, 16, v0
+; GFX1200-SDAG-FAKE16-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0xc2fc0000, v0
+; GFX1200-SDAG-FAKE16-NEXT: s_wait_alu 0xfffd
+; GFX1200-SDAG-FAKE16-NEXT: v_cndmask_b32_e64 v1, 0, 0x42800000, vcc_lo
+; GFX1200-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX1200-SDAG-FAKE16-NEXT: v_add_f32_e32 v0, v0, v1
+; GFX1200-SDAG-FAKE16-NEXT: v_cndmask_b32_e64 v1, 0, 0xffffffc0, vcc_lo
+; GFX1200-SDAG-FAKE16-NEXT: v_exp_f32_e32 v0, v0
+; GFX1200-SDAG-FAKE16-NEXT: s_delay_alu instid0(TRANS32_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1200-SDAG-FAKE16-NEXT: v_ldexp_f32 v0, v0, v1
+; GFX1200-SDAG-FAKE16-NEXT: v_bfe_u32 v1, v0, 16, 1
+; GFX1200-SDAG-FAKE16-NEXT: v_or_b32_e32 v2, 0x400000, v0
+; GFX1200-SDAG-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0
+; GFX1200-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX1200-SDAG-FAKE16-NEXT: v_add3_u32 v1, v1, v0, 0x7fff
+; GFX1200-SDAG-FAKE16-NEXT: s_wait_alu 0xfffd
+; GFX1200-SDAG-FAKE16-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc_lo
+; GFX1200-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1200-SDAG-FAKE16-NEXT: v_lshrrev_b32_e32 v0, 16, v0
+; GFX1200-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-SDAG-TRUE16-LABEL: v_exp2_fabs_bf16:
+; GFX1250-SDAG-TRUE16: ; %bb.0:
+; GFX1250-SDAG-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250-SDAG-TRUE16-NEXT: v_exp_bf16_e64 v0.l, |v0.l|
+; GFX1250-SDAG-TRUE16-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX1250-SDAG-FAKE16-LABEL: v_exp2_fabs_bf16:
+; GFX1250-SDAG-FAKE16: ; %bb.0:
+; GFX1250-SDAG-FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-FAKE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250-SDAG-FAKE16-NEXT: v_exp_bf16_e64 v0, |v0|
+; GFX1250-SDAG-FAKE16-NEXT: s_set_pc_i64 s[30:31]
+ %fabs = call bfloat @llvm.fabs.bf16(bfloat %in)
+ %result = call bfloat @llvm.exp2.bf16(bfloat %fabs)
+ ret bfloat %result
+}
+
+define bfloat @v_exp2_fneg_fabs_bf16(bfloat %in) {
+; GFX1200-SDAG-TRUE16-LABEL: v_exp2_fneg_fabs_bf16:
+; GFX1200-SDAG-TRUE16: ; %bb.0:
+; GFX1200-SDAG-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1200-SDAG-TRUE16-NEXT: s_wait_expcnt 0x0
+; GFX1200-SDAG-TRUE16-NEXT: s_wait_samplecnt 0x0
+; GFX1200-SDAG-TRUE16-NEXT: s_wait_bvhcnt 0x0
+; GFX1200-SDAG-TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1200-SDAG-TRUE16-NEXT: v_or_b16 v0.l, 0x8000, v0.l
+; GFX1200-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1200-SDAG-TRUE16-NEXT: v_lshlrev_b32_e32 v0, 16, v0
+; GFX1200-SDAG-TRUE16-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0xc2fc0000, v0
+; GFX1200-SDAG-TRUE16-NEXT: s_wait_alu 0xfffd
+; GFX1200-SDAG-TRUE16-NEXT: v_cndmask_b32_e64 v2, 0, 0x42800000, vcc_lo
+; GFX1200-SDAG-TRUE16-NEXT: v_cndmask_b32_e64 v1, 0, 0xffffffc0, vcc_lo
+; GFX1200-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1200-SDAG-TRUE16-NEXT: v_add_f32_e32 v0, v0, v2
+; GFX1200-SDAG-TRUE16-NEXT: v_exp_f32_e32 v0, v0
+; GFX1200-SDAG-TRUE16-NEXT: s_delay_alu instid0(TRANS32_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1200-SDAG-TRUE16-NEXT: v_ldexp_f32 v0, v0, v1
+; GFX1200-SDAG-TRUE16-NEXT: v_bfe_u32 v1, v0, 16, 1
+; GFX1200-SDAG-TRUE16-NEXT: v_or_b32_e32 v2, 0x400000, v0
+; GFX1200-SDAG-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0
+; GFX1200-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX1200-SDAG-TRUE16-NEXT: v_add3_u32 v1, v1, v0, 0x7fff
+; GFX1200-SDAG-TRUE16-NEXT: s_wait_alu 0xfffd
+; GFX1200-SDAG-TRUE16-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc_lo
+; GFX1200-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1200-SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.l, v0.h
+; GFX1200-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1200-SDAG-FAKE16-LABEL: v_exp2_fneg_fabs_bf16:
+; GFX1200-SDAG-FAKE16: ; %bb.0:
+; GFX1200-SDAG-FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1200-SDAG-FAKE16-NEXT: s_wait_expcnt 0x0
+; GFX1200-SDAG-FAKE16-NEXT: s_wait_samplecnt 0x0
+; GFX1200-SDAG-FAKE16-NEXT: s_wait_bvhcnt 0x0
+; GFX1200-SDAG-FAKE16-NEXT: s_wait_kmcnt 0x0
+; GFX1200-SDAG-FAKE16-NEXT: v_or_b32_e32 v0, 0x8000, v0
+; GFX1200-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1200-SDAG-FAKE16-NEXT: v_lshlrev_b32_e32 v0, 16, v0
+; GFX1200-SDAG-FAKE16-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0xc2fc0000, v0
+; GFX1200-SDAG-FAKE16-NEXT: s_wait_alu 0xfffd
+; GFX1200-SDAG-FAKE16-NEXT: v_cndmask_b32_e64 v1, 0, 0x42800000, vcc_lo
+; GFX1200-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX1200-SDAG-FAKE16-NEXT: v_add_f32_e32 v0, v0, v1
+; GFX1200-SDAG-FAKE16-NEXT: v_cndmask_b32_e64 v1, 0, 0xffffffc0, vcc_lo
+; GFX1200-SDAG-FAKE16-NEXT: v_exp_f32_e32 v0, v0
+; GFX1200-SDAG-FAKE16-NEXT: s_delay_alu instid0(TRANS32_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1200-SDAG-FAKE16-NEXT: v_ldexp_f32 v0, v0, v1
+; GFX1200-SDAG-FAKE16-NEXT: v_bfe_u32 v1, v0, 16, 1
+; GFX1200-SDAG-FAKE16-NEXT: v_or_b32_e32 v2, 0x400000, v0
+; GFX1200-SDAG-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0
+; GFX1200-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX1200-SDAG-FAKE16-NEXT: v_add3_u32 v1, v1, v0, 0x7fff
+; GFX1200-SDAG-FAKE16-NEXT: s_wait_alu 0xfffd
+; GFX1200-SDAG-FAKE16-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc_lo
+; GFX1200-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1200-SDAG-FAKE16-NEXT: v_lshrrev_b32_e32 v0, 16, v0
+; GFX1200-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-SDAG-TRUE16-LABEL: v_exp2_fneg_fabs_bf16:
+; GFX1250-SDAG-TRUE16: ; %bb.0:
+; GFX1250-SDAG-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250-SDAG-TRUE16-NEXT: v_exp_bf16_e64 v0.l, -|v0.l|
+; GFX1250-SDAG-TRUE16-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX1250-SDAG-FAKE16-LABEL: v_exp2_fneg_fabs_bf16:
+; GFX1250-SDAG-FAKE16: ; %bb.0:
+; GFX1250-SDAG-FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-FAKE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250-SDAG-FAKE16-NEXT: v_exp_bf16_e64 v0, -|v0|
+; GFX1250-SDAG-FAKE16-NEXT: s_set_pc_i64 s[30:31]
+ %fabs = call bfloat @llvm.fabs.bf16(bfloat %in)
+ %fneg.fabs = fneg bfloat %fabs
+ %result = call bfloat @llvm.exp2.bf16(bfloat %fneg.fabs)
+ ret bfloat %result
+}
+
+define bfloat @v_exp2_fneg_bf16(bfloat %in) {
+; GFX1200-SDAG-TRUE16-LABEL: v_exp2_fneg_bf16:
+; GFX1200-SDAG-TRUE16: ; %bb.0:
+; GFX1200-SDAG-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1200-SDAG-TRUE16-NEXT: s_wait_expcnt 0x0
+; GFX1200-SDAG-TRUE16-NEXT: s_wait_samplecnt 0x0
+; GFX1200-SDAG-TRUE16-NEXT: s_wait_bvhcnt 0x0
+; GFX1200-SDAG-TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1200-SDAG-TRUE16-NEXT: v_xor_b16 v0.l, 0x8000, v0.l
+; GFX1200-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1200-SDAG-TRUE16-NEXT: v_lshlrev_b32_e32 v0, 16, v0
+; GFX1200-SDAG-TRUE16-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0xc2fc0000, v0
+; GFX1200-SDAG-TRUE16-NEXT: s_wait_alu 0xfffd
+; GFX1200-SDAG-TRUE16-NEXT: v_cndmask_b32_e64 v2, 0, 0x42800000, vcc_lo
+; GFX1200-SDAG-TRUE16-NEXT: v_cndmask_b32_e64 v1, 0, 0xffffffc0, vcc_lo
+; GFX1200-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1200-SDAG-TRUE16-NEXT: v_add_f32_e32 v0, v0, v2
+; GFX1200-SDAG-TRUE16-NEXT: v_exp_f32_e32 v0, v0
+; GFX1200-SDAG-TRUE16-NEXT: s_delay_alu instid0(TRANS32_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1200-SDAG-TRUE16-NEXT: v_ldexp_f32 v0, v0, v1
+; GFX1200-SDAG-TRUE16-NEXT: v_bfe_u32 v1, v0, 16, 1
+; GFX1200-SDAG-TRUE16-NEXT: v_or_b32_e32 v2, 0x400000, v0
+; GFX1200-SDAG-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0
+; GFX1200-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX1200-SDAG-TRUE16-NEXT: v_add3_u32 v1, v1, v0, 0x7fff
+; GFX1200-SDAG-TRUE16-NEXT: s_wait_alu 0xfffd
+; GFX1200-SDAG-TRUE16-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc_lo
+; GFX1200-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1200-SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.l, v0.h
+; GFX1200-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1200-SDAG-FAKE16-LABEL: v_exp2_fneg_bf16:
+; GFX1200-SDAG-FAKE16: ; %bb.0:
+; GFX1200-SDAG-FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1200-SDAG-FAKE16-NEXT: s_wait_expcnt 0x0
+; GFX1200-SDAG-FAKE16-NEXT: s_wait_samplecnt 0x0
+; GFX1200-SDAG-FAKE16-NEXT: s_wait_bvhcnt 0x0
+; GFX1200-SDAG-FAKE16-NEXT: s_wait_kmcnt 0x0
+; GFX1200-SDAG-FAKE16-NEXT: v_xor_b32_e32 v0, 0x8000, v0
+; GFX1200-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1200-SDAG-FAKE16-NEXT: v_lshlrev_b32_e32 v0, 16, v0
+; GFX1200-SDAG-FAKE16-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0xc2fc0000, v0
+; GFX1200-SDAG-FAKE16-NEXT: s_wait_alu 0xfffd
+; GFX1200-SDAG-FAKE16-NEXT: v_cndmask_b32_e64 v1, 0, 0x42800000, vcc_lo
+; GFX1200-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX1200-SDAG-FAKE16-NEXT: v_add_f32_e32 v0, v0, v1
+; GFX1200-SDAG-FAKE16-NEXT: v_cndmask_b32_e64 v1, 0, 0xffffffc0, vcc_lo
+; GFX1200-SDAG-FAKE16-NEXT: v_exp_f32_e32 v0, v0
+; GFX1200-SDAG-FAKE16-NEXT: s_delay_alu instid0(TRANS32_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1200-SDAG-FAKE16-NEXT: v_ldexp_f32 v0, v0, v1
+; GFX1200-SDAG-FAKE16-NEXT: v_bfe_u32 v1, v0, 16, 1
+; GFX1200-SDAG-FAKE16-NEXT: v_or_b32_e32 v2, 0x400000, v0
+; GFX1200-SDAG-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0
+; GFX1200-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX1200-SDAG-FAKE16-NEXT: v_add3_u32 v1, v1, v0, 0x7fff
+; GFX1200-SDAG-FAKE16-NEXT: s_wait_alu 0xfffd
+; GFX1200-SDAG-FAKE16-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc_lo
+; GFX1200-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1200-SDAG-FAKE16-NEXT: v_lshrrev_b32_e32 v0, 16, v0
+; GFX1200-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-SDAG-TRUE16-LABEL: v_exp2_fneg_bf16:
+; GFX1250-SDAG-TRUE16: ; %bb.0:
+; GFX1250-SDAG-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250-SDAG-TRUE16-NEXT: v_exp_bf16_e64 v0.l, -v0.l
+; GFX1250-SDAG-TRUE16-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX1250-SDAG-FAKE16-LABEL: v_exp2_fneg_bf16:
+; GFX1250-SDAG-FAKE16: ; %bb.0:
+; GFX1250-SDAG-FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-FAKE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250-SDAG-FAKE16-NEXT: v_exp_bf16_e64 v0, -v0
+; GFX1250-SDAG-FAKE16-NEXT: s_set_pc_i64 s[30:31]
+ %fneg = fneg bfloat %in
+ %result = call bfloat @llvm.exp2.bf16(bfloat %fneg)
+ ret bfloat %result
+}
+
+define bfloat @v_exp2_bf16_fast(bfloat %in) {
+; GFX1200-SDAG-TRUE16-LABEL: v_exp2_bf16_fast:
+; GFX1200-SDAG-TRUE16: ; %bb.0:
+; GFX1200-SDAG-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1200-SDAG-TRUE16-NEXT: s_wait_expcnt 0x0
+; GFX1200-SDAG-TRUE16-NEXT: s_wait_samplecnt 0x0
+; GFX1200-SDAG-TRUE16-NEXT: s_wait_bvhcnt 0x0
+; GFX1200-SDAG-TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1200-SDAG-TRUE16-NEXT: v_lshlrev_b32_e32 v0, 16, v0
+; GFX1200-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_2)
+; GFX1200-SDAG-TRUE16-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0xc2fc0000, v0
+; GFX1200-SDAG-TRUE16-NEXT: s_wait_alu 0xfffd
+; GFX1200-SDAG-TRUE16-NEXT: v_cndmask_b32_e64 v2, 0, 0x42800000, vcc_lo
+; GFX1200-SDAG-TRUE16-NEXT: v_cndmask_b32_e64 v1, 0, 0xffffffc0, vcc_lo
+; GFX1200-SDAG-TRUE16-NEXT: v_add_f32_e32 v0, v0, v2
+; GFX1200-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(TRANS32_DEP_1)
+; GFX1200-SDAG-TRUE16-NEXT: v_exp_f32_e32 v0, v0
+; GFX1200-SDAG-TRUE16-NEXT: v_ldexp_f32 v0, v0, v1
+; GFX1200-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_3)
+; GFX1200-SDAG-TRUE16-NEXT: v_bfe_u32 v1, v0, 16, 1
+; GFX1200-SDAG-TRUE16-NEXT: v_or_b32_e32 v2, 0x400000, v0
+; GFX1200-SDAG-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0
+; GFX1200-SDAG-TRUE16-NEXT: v_add3_u32 v1, v1, v0, 0x7fff
+; GFX1200-SDAG-TRUE16-NEXT: s_wait_alu 0xfffd
+; GFX1200-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1200-SDAG-TRUE16-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc_lo
+; GFX1200-SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.l, v0.h
+; GFX1200-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1200-SDAG-FAKE16-LABEL: v_exp2_bf16_fast:
+; GFX1200-SDAG-FAKE16: ; %bb.0:
+; GFX1200-SDAG-FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1200-SDAG-FAKE16-NEXT: s_wait_expcnt 0x0
+; GFX1200-SDAG-FAKE16-NEXT: s_wait_samplecnt 0x0
+; GFX1200-SDAG-FAKE16-NEXT: s_wait_bvhcnt 0x0
+; GFX1200-SDAG-FAKE16-NEXT: s_wait_kmcnt 0x0
+; GFX1200-SDAG-FAKE16-NEXT: v_lshlrev_b32_e32 v0, 16, v0
+; GFX1200-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_2)
+; GFX1200-SDAG-FAKE16-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0xc2fc0000, v0
+; GFX1200-SDAG-FAKE16-NEXT: s_wait_alu 0xfffd
+; GFX1200-SDAG-FAKE16-NEXT: v_cndmask_b32_e64 v2, 0, 0x42800000, vcc_lo
+; GFX1200-SDAG-FAKE16-NEXT: v_cndmask_b32_e64 v1, 0, 0xffffffc0, vcc_lo
+; GFX1200-SDAG-FAKE16-NEXT: v_add_f32_e32 v0, v0, v2
+; GFX1200-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(TRANS32_DEP_1)
+; GFX1200-SDAG-FAKE16-NEXT: v_exp_f32_e32 v0, v0
+; GFX1200-SDAG-FAKE16-NEXT: v_ldexp_f32 v0, v0, v1
+; GFX1200-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_3)
+; GFX1200-SDAG-FAKE16-NEXT: v_bfe_u32 v1, v0, 16, 1
+; GFX1200-SDAG-FAKE16-NEXT: v_or_b32_e32 v2, 0x400000, v0
+; GFX1200-SDAG-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0
+; GFX1200-SDAG-FAKE16-NEXT: v_add3_u32 v1, v1, v0, 0x7fff
+; GFX1200-SDAG-FAKE16-NEXT: s_wait_alu 0xfffd
+; GFX1200-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1200-SDAG-FAKE16-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc_lo
+; GFX1200-SDAG-FAKE16-NEXT: v_lshrrev_b32_e32 v0, 16, v0
+; GFX1200-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-SDAG-TRUE16-LABEL: v_exp2_bf16_fast:
+; GFX1250-SDAG-TRUE16: ; %bb.0:
+; GFX1250-SDAG-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250-SDAG-TRUE16-NEXT: v_exp_bf16_e32 v0.l, v0.l
+; GFX1250-SDAG-TRUE16-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX1250-SDAG-FAKE16-LABEL: v_exp2_bf16_fast:
+; GFX1250-SDAG-FAKE16: ; %bb.0:
+; GFX1250-SDAG-FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-FAKE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250-SDAG-FAKE16-NEXT: v_exp_bf16_e32 v0, v0
+; GFX1250-SDAG-FAKE16-NEXT: s_set_pc_i64 s[30:31]
+ %result = call fast bfloat @llvm.exp2.bf16(bfloat %in)
+ ret bfloat %result
+}
+
+define <2 x bfloat> @v_exp2_v2bf16(<2 x bfloat> %in) {
+; GFX1200-SDAG-TRUE16-LABEL: v_exp2_v2bf16:
+; GFX1200-SDAG-TRUE16: ; %bb.0:
+; GFX1200-SDAG-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1200-SDAG-TRUE16-NEXT: s_wait_expcnt 0x0
+; GFX1200-SDAG-TRUE16-NEXT: s_wait_samplecnt 0x0
+; GFX1200-SDAG-TRUE16-NEXT: s_wait_bvhcnt 0x0
+; GFX1200-SDAG-TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1200-SDAG-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 16, v0
+; GFX1200-SDAG-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
+; GFX1200-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX1200-SDAG-TRUE16-NEXT: v_cmp_gt_f32_e64 s0, 0xc2fc0000, v0
+; GFX1200-SDAG-TRUE16-NEXT: s_wait_alu 0xf1ff
+; GFX1200-SDAG-TRUE16-NEXT: v_cndmask_b32_e64 v3, 0, 0x42800000, s0
+; GFX1200-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_3)
+; GFX1200-SDAG-TRUE16-NEXT: v_add_f32_e32 v0, v0, v3
+; GFX1200-SDAG-TRUE16-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0xc2fc0000, v1
+; GFX1200-SDAG-TRUE16-NEXT: v_cndmask_b32_e64 v3, 0, 0xffffffc0, s0
+; GFX1200-SDAG-TRUE16-NEXT: v_exp_f32_e32 v0, v0
+; GFX1200-SDAG-TRUE16-NEXT: s_wait_alu 0xfffd
+; GFX1200-SDAG-TRUE16-NEXT: v_cndmask_b32_e64 v2, 0, 0x42800000, vcc_lo
+; GFX1200-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(TRANS32_DEP_1)
+; GFX1200-SDAG-TRUE16-NEXT: v_add_f32_e32 v1, v1, v2
+; GFX1200-SDAG-TRUE16-NEXT: v_cndmask_b32_e64 v2, 0, 0xffffffc0, vcc_lo
+; GFX1200-SDAG-TRUE16-NEXT: v_ldexp_f32 v0, v0, v3
+; GFX1200-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1200-SDAG-TRUE16-NEXT: v_exp_f32_e32 v1, v1
+; GFX1200-SDAG-TRUE16-NEXT: v_bfe_u32 v3, v0, 16, 1
+; GFX1200-SDAG-TRUE16-NEXT: v_or_b32_e32 v5, 0x400000, v0
+; GFX1200-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(TRANS32_DEP_1)
+; GFX1200-SDAG-TRUE16-NEXT: v_add3_u32 v3, v3, v0, 0x7fff
+; GFX1200-SDAG-TRUE16-NEXT: v_ldexp_f32 v1, v1, v2
+; GFX1200-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_3)
+; GFX1200-SDAG-TRUE16-NEXT: v_bfe_u32 v2, v1, 16, 1
+; GFX1200-SDAG-TRUE16-NEXT: v_or_b32_e32 v4, 0x400000, v1
+; GFX1200-SDAG-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v1, v1
+; GFX1200-SDAG-TRUE16-NEXT: v_add3_u32 v2, v2, v1, 0x7fff
+; GFX1200-SDAG-TRUE16-NEXT: s_wait_alu 0xfffd
+; GFX1200-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_3)
+; GFX1200-SDAG-TRUE16-NEXT: v_cndmask_b32_e32 v1, v2, v4, vcc_lo
+; GFX1200-SDAG-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0
+; GFX1200-SDAG-TRUE16-NEXT: s_wait_alu 0xfffd
+; GFX1200-SDAG-TRUE16-NEXT: v_cndmask_b32_e32 v0, v3, v5, vcc_lo
+; GFX1200-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v1.h
+; GFX1200-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1200-SDAG-TRUE16-NEXT: v_bfi_b32 v0, 0xffff, v1, v0
+; GFX1200-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1200-SDAG-FAKE16-LABEL: v_exp2_v2bf16:
+; GFX1200-SDAG-FAKE16: ; %bb.0:
+; GFX1200-SDAG-FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1200-SDAG-FAKE16-NEXT: s_wait_expcnt 0x0
+; GFX1200-SDAG-FAKE16-NEXT: s_wait_samplecnt 0x0
+; GFX1200-SDAG-FAKE16-NEXT: s_wait_bvhcnt 0x0
+; GFX1200-SDAG-FAKE16-NEXT: s_wait_kmcnt 0x0
+; GFX1200-SDAG-FAKE16-NEXT: v_lshlrev_b32_e32 v1, 16, v0
+; GFX1200-SDAG-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
+; GFX1200-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX1200-SDAG-FAKE16-NEXT: v_cmp_gt_f32_e64 s0, 0xc2fc0000, v0
+; GFX1200-SDAG-FAKE16-NEXT: s_wait_alu 0xf1ff
+; GFX1200-SDAG-FAKE16-NEXT: v_cndmask_b32_e64 v3, 0, 0x42800000, s0
+; GFX1200-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_3)
+; GFX1200-SDAG-FAKE16-NEXT: v_add_f32_e32 v0, v0, v3
+; GFX1200-SDAG-FAKE16-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0xc2fc0000, v1
+; GFX1200-SDAG-FAKE16-NEXT: v_cndmask_b32_e64 v3, 0, 0xffffffc0, s0
+; GFX1200-SDAG-FAKE16-NEXT: v_exp_f32_e32 v0, v0
+; GFX1200-SDAG-FAKE16-NEXT: s_wait_alu 0xfffd
+; GFX1200-SDAG-FAKE16-NEXT: v_cndmask_b32_e64 v2, 0, 0x42800000, vcc_lo
+; GFX1200-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(TRANS32_DEP_1)
+; GFX1200-SDAG-FAKE16-NEXT: v_add_f32_e32 v1, v1, v2
+; GFX1200-SDAG-FAKE16-NEXT: v_cndmask_b32_e64 v2, 0, 0xffffffc0, vcc_lo
+; GFX1200-SDAG-FAKE16-NEXT: v_ldexp_f32 v0, v0, v3
+; GFX1200-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1200-SDAG-FAKE16-NEXT: v_exp_f32_e32 v1, v1
+; GFX1200-SDAG-FAKE16-NEXT: v_bfe_u32 v3, v0, 16, 1
+; GFX1200-SDAG-FAKE16-NEXT: v_or_b32_e32 v5, 0x400000, v0
+; GFX1200-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(TRANS32_DEP_1)
+; GFX1200-SDAG-FAKE16-NEXT: v_add3_u32 v3, v3, v0, 0x7fff
+; GFX1200-SDAG-FAKE16-NEXT: v_ldexp_f32 v1, v1, v2
+; GFX1200-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_3)
+; GFX1200-SDAG-FAKE16-NEXT: v_bfe_u32 v2, v1, 16, 1
+; GFX1200-SDAG-FAKE16-NEXT: v_or_b32_e32 v4, 0x400000, v1
+; GFX1200-SDAG-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v1, v1
+; GFX1200-SDAG-FAKE16-NEXT: v_add3_u32 v2, v2, v1, 0x7fff
+; GFX1200-SDAG-FAKE16-NEXT: s_wait_alu 0xfffd
+; GFX1200-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_1)
+; GFX1200-SDAG-FAKE16-NEXT: v_cndmask_b32_e32 v1, v2, v4, vcc_lo
+; GFX1200-SDAG-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0
+; GFX1200-SDAG-FAKE16-NEXT: s_wait_alu 0xfffd
+; GFX1200-SDAG-FAKE16-NEXT: v_cndmask_b32_e32 v0, v3, v5, vcc_lo
+; GFX1200-SDAG-FAKE16-NEXT: v_perm_b32 v0, v0, v1, 0x7060302
+; GFX1200-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-SDAG-TRUE16-LABEL: v_exp2_v2bf16:
+; GFX1250-SDAG-TRUE16: ; %bb.0:
+; GFX1250-SDAG-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250-SDAG-TRUE16-NEXT: v_exp_bf16_e32 v0.h, v0.h
+; GFX1250-SDAG-TRUE16-NEXT: v_exp_bf16_e32 v0.l, v0.l
+; GFX1250-SDAG-TRUE16-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX1250-SDAG-FAKE16-LABEL: v_exp2_v2bf16:
+; GFX1250-SDAG-FAKE16: ; %bb.0:
+; GFX1250-SDAG-FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-FAKE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250-SDAG-FAKE16-NEXT: v_lshrrev_b32_e32 v1, 16, v0
+; GFX1250-SDAG-FAKE16-NEXT: v_exp_bf16_e32 v0, v0
+; GFX1250-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(TRANS32_DEP_1)
+; GFX1250-SDAG-FAKE16-NEXT: v_exp_bf16_e32 v1, v1
+; GFX1250-SDAG-FAKE16-NEXT: v_nop
+; GFX1250-SDAG-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
+; GFX1250-SDAG-FAKE16-NEXT: s_set_pc_i64 s[30:31]
+ %result = call <2 x bfloat> @llvm.exp2.v2bf16(<2 x bfloat> %in)
+ ret <2 x bfloat> %result
+}
+
+define <2 x bfloat> @v_exp2_fabs_v2bf16(<2 x bfloat> %in) {
+; GFX1200-SDAG-TRUE16-LABEL: v_exp2_fabs_v2bf16:
+; GFX1200-SDAG-TRUE16: ; %bb.0:
+; GFX1200-SDAG-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1200-SDAG-TRUE16-NEXT: s_wait_expcnt 0x0
+; GFX1200-SDAG-TRUE16-NEXT: s_wait_samplecnt 0x0
+; GFX1200-SDAG-TRUE16-NEXT: s_wait_bvhcnt 0x0
+; GFX1200-SDAG-TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1200-SDAG-TRUE16-NEXT: v_and_b32_e32 v1, 0x7fff, v0
+; GFX1200-SDAG-TRUE16-NEXT: v_and_b32_e32 v0, 0x7fff0000, v0
+; GFX1200-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX1200-SDAG-TRUE16-NEXT: v_cmp_gt_f32_e64 s0, 0xc2fc0000, v0
+; GFX1200-SDAG-TRUE16-NEXT: s_wait_alu 0xf1ff
+; GFX1200-SDAG-TRUE16-NEXT: v_cndmask_b32_e64 v3, 0, 0x42800000, s0
+; GFX1200-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX1200-SDAG-TRUE16-NEXT: v_dual_add_f32 v0, v0, v3 :: v_dual_lshlrev_b32 v1, 16, v1
+; GFX1200-SDAG-TRUE16-NEXT: v_cndmask_b32_e64 v3, 0, 0xffffffc0, s0
+; GFX1200-SDAG-TRUE16-NEXT: v_exp_f32_e32 v0, v0
+; GFX1200-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(TRANS32_DEP_1)
+; GFX1200-SDAG-TRUE16-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0xc2fc0000, v1
+; GFX1200-SDAG-TRUE16-NEXT: s_wait_alu 0xfffd
+; GFX1200-SDAG-TRUE16-NEXT: v_cndmask_b32_e64 v2, 0, 0x42800000, vcc_lo
+; GFX1200-SDAG-TRUE16-NEXT: v_ldexp_f32 v0, v0, v3
+; GFX1200-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_3)
+; GFX1200-SDAG-TRUE16-NEXT: v_add_f32_e32 v1, v1, v2
+; GFX1200-SDAG-TRUE16-NEXT: v_cndmask_b32_e64 v2, 0, 0xffffffc0, vcc_lo
+; GFX1200-SDAG-TRUE16-NEXT: v_bfe_u32 v3, v0, 16, 1
+; GFX1200-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX1200-SDAG-TRUE16-NEXT: v_exp_f32_e32 v1, v1
+; GFX1200-SDAG-TRUE16-NEXT: v_or_b32_e32 v5, 0x400000, v0
+; GFX1200-SDAG-TRUE16-NEXT: v_add3_u32 v3, v3, v0, 0x7fff
+; GFX1200-SDAG-TRUE16-NEXT: s_delay_alu instid0(TRANS32_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1200-SDAG-TRUE16-NEXT: v_ldexp_f32 v1, v1, v2
+; GFX1200-SDAG-TRUE16-NEXT: v_bfe_u32 v2, v1, 16, 1
+; GFX1200-SDAG-TRUE16-NEXT: v_or_b32_e32 v4, 0x400000, v1
+; GFX1200-SDAG-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v1, v1
+; GFX1200-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX1200-SDAG-TRUE16-NEXT: v_add3_u32 v2, v2, v1, 0x7fff
+; GFX1200-SDAG-TRUE16-NEXT: s_wait_alu 0xfffd
+; GFX1200-SDAG-TRUE16-NEXT: v_cndmask_b32_e32 v1, v2, v4, vcc_lo
+; GFX1200-SDAG-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0
+; GFX1200-SDAG-TRUE16-NEXT: s_wait_alu 0xfffd
+; GFX1200-SDAG-TRUE16-NEXT: v_cndmask_b32_e32 v0, v3, v5, vcc_lo
+; GFX1200-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1200-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v1.h
+; GFX1200-SDAG-TRUE16-NEXT: v_bfi_b32 v0, 0xffff, v1, v0
+; GFX1200-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1200-SDAG-FAKE16-LABEL: v_exp2_fabs_v2bf16:
+; GFX1200-SDAG-FAKE16: ; %bb.0:
+; GFX1200-SDAG-FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1200-SDAG-FAKE16-NEXT: s_wait_expcnt 0x0
+; GFX1200-SDAG-FAKE16-NEXT: s_wait_samplecnt 0x0
+; GFX1200-SDAG-FAKE16-NEXT: s_wait_bvhcnt 0x0
+; GFX1200-SDAG-FAKE16-NEXT: s_wait_kmcnt 0x0
+; GFX1200-SDAG-FAKE16-NEXT: v_and_b32_e32 v1, 0x7fff, v0
+; GFX1200-SDAG-FAKE16-NEXT: v_and_b32_e32 v0, 0x7fff0000, v0
+; GFX1200-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1)
+; GFX1200-SDAG-FAKE16-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0xc2fc0000, v0
+; GFX1200-SDAG-FAKE16-NEXT: s_wait_alu 0xfffd
+; GFX1200-SDAG-FAKE16-NEXT: v_cndmask_b32_e64 v2, 0, 0x42800000, vcc_lo
+; GFX1200-SDAG-FAKE16-NEXT: v_add_f32_e32 v0, v0, v2
+; GFX1200-SDAG-FAKE16-NEXT: v_cndmask_b32_e64 v2, 0, 0xffffffc0, vcc_lo
+; GFX1200-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(TRANS32_DEP_1)
+; GFX1200-SDAG-FAKE16-NEXT: v_exp_f32_e32 v0, v0
+; GFX1200-SDAG-FAKE16-NEXT: v_ldexp_f32 v0, v0, v2
+; GFX1200-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_3)
+; GFX1200-SDAG-FAKE16-NEXT: v_bfe_u32 v2, v0, 16, 1
+; GFX1200-SDAG-FAKE16-NEXT: v_or_b32_e32 v4, 0x400000, v0
+; GFX1200-SDAG-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0
+; GFX1200-SDAG-FAKE16-NEXT: v_add3_u32 v2, v2, v0, 0x7fff
+; GFX1200-SDAG-FAKE16-NEXT: s_wait_alu 0xfffd
+; GFX1200-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1200-SDAG-FAKE16-NEXT: v_dual_cndmask_b32 v0, v2, v4 :: v_dual_lshlrev_b32 v1, 16, v1
+; GFX1200-SDAG-FAKE16-NEXT: v_cmp_gt_f32_e64 s0, 0xc2fc0000, v1
+; GFX1200-SDAG-FAKE16-NEXT: s_wait_alu 0xf1ff
+; GFX1200-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1200-SDAG-FAKE16-NEXT: v_cndmask_b32_e64 v3, 0, 0x42800000, s0
+; GFX1200-SDAG-FAKE16-NEXT: v_add_f32_e32 v1, v1, v3
+; GFX1200-SDAG-FAKE16-NEXT: v_cndmask_b32_e64 v3, 0, 0xffffffc0, s0
+; GFX1200-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(TRANS32_DEP_1)
+; GFX1200-SDAG-FAKE16-NEXT: v_exp_f32_e32 v1, v1
+; GFX1200-SDAG-FAKE16-NEXT: v_ldexp_f32 v1, v1, v3
+; GFX1200-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_3)
+; GFX1200-SDAG-FAKE16-NEXT: v_bfe_u32 v3, v1, 16, 1
+; GFX1200-SDAG-FAKE16-NEXT: v_or_b32_e32 v5, 0x400000, v1
+; GFX1200-SDAG-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v1, v1
+; GFX1200-SDAG-FAKE16-NEXT: v_add3_u32 v3, v3, v1, 0x7fff
+; GFX1200-SDAG-FAKE16-NEXT: s_wait_alu 0xfffd
+; GFX1200-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1200-SDAG-FAKE16-NEXT: v_cndmask_b32_e32 v1, v3, v5, vcc_lo
+; GFX1200-SDAG-FAKE16-NEXT: v_perm_b32 v0, v0, v1, 0x7060302
+; GFX1200-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-SDAG-TRUE16-LABEL: v_exp2_fabs_v2bf16:
+; GFX1250-SDAG-TRUE16: ; %bb.0:
+; GFX1250-SDAG-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250-SDAG-TRUE16-NEXT: v_and_b32_e32 v1, 0x7fff7fff, v0
+; GFX1250-SDAG-TRUE16-NEXT: v_bfe_u32 v2, v0, 16, 15
+; GFX1250-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250-SDAG-TRUE16-NEXT: v_exp_bf16_e32 v0.l, v1.l
+; GFX1250-SDAG-TRUE16-NEXT: v_exp_bf16_e32 v0.h, v2.l
+; GFX1250-SDAG-TRUE16-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX1250-SDAG-FAKE16-LABEL: v_exp2_fabs_v2bf16:
+; GFX1250-SDAG-FAKE16: ; %bb.0:
+; GFX1250-SDAG-FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-FAKE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250-SDAG-FAKE16-NEXT: v_and_b32_e32 v1, 0x7fff7fff, v0
+; GFX1250-SDAG-FAKE16-NEXT: v_bfe_u32 v0, v0, 16, 15
+; GFX1250-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250-SDAG-FAKE16-NEXT: v_exp_bf16_e32 v1, v1
+; GFX1250-SDAG-FAKE16-NEXT: v_exp_bf16_e32 v0, v0
+; GFX1250-SDAG-FAKE16-NEXT: v_nop
+; GFX1250-SDAG-FAKE16-NEXT: s_delay_alu instid0(TRANS32_DEP_1)
+; GFX1250-SDAG-FAKE16-NEXT: v_perm_b32 v0, v0, v1, 0x5040100
+; GFX1250-SDAG-FAKE16-NEXT: s_set_pc_i64 s[30:31]
+ %fabs = call <2 x bfloat> @llvm.fabs.v2bf16(<2 x bfloat> %in)
+ %result = call <2 x bfloat> @llvm.exp2.v2bf16(<2 x bfloat> %fabs)
+ ret <2 x bfloat> %result
+}
+
+define <2 x bfloat> @v_exp2_fneg_fabs_v2bf16(<2 x bfloat> %in) {
+; GFX1200-SDAG-TRUE16-LABEL: v_exp2_fneg_fabs_v2bf16:
+; GFX1200-SDAG-TRUE16: ; %bb.0:
+; GFX1200-SDAG-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1200-SDAG-TRUE16-NEXT: s_wait_expcnt 0x0
+; GFX1200-SDAG-TRUE16-NEXT: s_wait_samplecnt 0x0
+; GFX1200-SDAG-TRUE16-NEXT: s_wait_bvhcnt 0x0
+; GFX1200-SDAG-TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1200-SDAG-TRUE16-NEXT: v_bfe_u32 v1, v0, 16, 15
+; GFX1200-SDAG-TRUE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v0
+; GFX1200-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1200-SDAG-TRUE16-NEXT: v_xor_b16 v0.l, 0x8000, v1.l
+; GFX1200-SDAG-TRUE16-NEXT: v_xor_b16 v1.l, 0x8000, v2.l
+; GFX1200-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1200-SDAG-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1
+; GFX1200-SDAG-TRUE16-NEXT: v_cmp_gt_f32_e64 s0, 0xc2fc0000, v1
+; GFX1200-SDAG-TRUE16-NEXT: s_wait_alu 0xf1ff
+; GFX1200-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1200-SDAG-TRUE16-NEXT: v_cndmask_b32_e64 v2, 0, 0x42800000, s0
+; GFX1200-SDAG-TRUE16-NEXT: v_dual_add_f32 v1, v1, v2 :: v_dual_lshlrev_b32 v0, 16, v0
+; GFX1200-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_3)
+; GFX1200-SDAG-TRUE16-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0xc2fc0000, v0
+; GFX1200-SDAG-TRUE16-NEXT: v_cndmask_b32_e64 v2, 0, 0xffffffc0, s0
+; GFX1200-SDAG-TRUE16-NEXT: v_exp_f32_e32 v1, v1
+; GFX1200-SDAG-TRUE16-NEXT: s_wait_alu 0xfffd
+; GFX1200-SDAG-TRUE16-NEXT: v_cndmask_b32_e64 v4, 0, 0x42800000, vcc_lo
+; GFX1200-SDAG-TRUE16-NEXT: v_cndmask_b32_e64 v3, 0, 0xffffffc0, vcc_lo
+; GFX1200-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(TRANS32_DEP_1)
+; GFX1200-SDAG-TRUE16-NEXT: v_add_f32_e32 v0, v0, v4
+; GFX1200-SDAG-TRUE16-NEXT: v_ldexp_f32 v1, v1, v2
+; GFX1200-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1200-SDAG-TRUE16-NEXT: v_exp_f32_e32 v0, v0
+; GFX1200-SDAG-TRUE16-NEXT: v_bfe_u32 v2, v1, 16, 1
+; GFX1200-SDAG-TRUE16-NEXT: v_or_b32_e32 v4, 0x400000, v1
+; GFX1200-SDAG-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v1, v1
+; GFX1200-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(TRANS32_DEP_1)
+; GFX1200-SDAG-TRUE16-NEXT: v_add3_u32 v2, v2, v1, 0x7fff
+; GFX1200-SDAG-TRUE16-NEXT: v_ldexp_f32 v0, v0, v3
+; GFX1200-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX1200-SDAG-TRUE16-NEXT: v_bfe_u32 v3, v0, 16, 1
+; GFX1200-SDAG-TRUE16-NEXT: v_or_b32_e32 v5, 0x400000, v0
+; GFX1200-SDAG-TRUE16-NEXT: v_add3_u32 v3, v3, v0, 0x7fff
+; GFX1200-SDAG-TRUE16-NEXT: s_wait_alu 0xfffd
+; GFX1200-SDAG-TRUE16-NEXT: v_cndmask_b32_e32 v1, v2, v4, vcc_lo
+; GFX1200-SDAG-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0
+; GFX1200-SDAG-TRUE16-NEXT: s_wait_alu 0xfffd
+; GFX1200-SDAG-TRUE16-NEXT: v_cndmask_b32_e32 v0, v3, v5, vcc_lo
+; GFX1200-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1200-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v1.h
+; GFX1200-SDAG-TRUE16-NEXT: v_bfi_b32 v0, 0xffff, v1, v0
+; GFX1200-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1200-SDAG-FAKE16-LABEL: v_exp2_fneg_fabs_v2bf16:
+; GFX1200-SDAG-FAKE16: ; %bb.0:
+; GFX1200-SDAG-FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1200-SDAG-FAKE16-NEXT: s_wait_expcnt 0x0
+; GFX1200-SDAG-FAKE16-NEXT: s_wait_samplecnt 0x0
+; GFX1200-SDAG-FAKE16-NEXT: s_wait_bvhcnt 0x0
+; GFX1200-SDAG-FAKE16-NEXT: s_wait_kmcnt 0x0
+; GFX1200-SDAG-FAKE16-NEXT: v_and_b32_e32 v1, 0x7fff7fff, v0
+; GFX1200-SDAG-FAKE16-NEXT: v_bfe_u32 v0, v0, 16, 15
+; GFX1200-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1200-SDAG-FAKE16-NEXT: v_xor_b32_e32 v0, 0x8000, v0
+; GFX1200-SDAG-FAKE16-NEXT: v_lshlrev_b32_e32 v0, 16, v0
+; GFX1200-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX1200-SDAG-FAKE16-NEXT: v_cmp_gt_f32_e64 s0, 0xc2fc0000, v0
+; GFX1200-SDAG-FAKE16-NEXT: s_wait_alu 0xf1ff
+; GFX1200-SDAG-FAKE16-NEXT: v_cndmask_b32_e64 v3, 0, 0x42800000, s0
+; GFX1200-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_3)
+; GFX1200-SDAG-FAKE16-NEXT: v_add_f32_e32 v0, v0, v3
+; GFX1200-SDAG-FAKE16-NEXT: v_xor_b32_e32 v1, 0x8000, v1
+; GFX1200-SDAG-FAKE16-NEXT: v_cndmask_b32_e64 v3, 0, 0xffffffc0, s0
+; GFX1200-SDAG-FAKE16-NEXT: v_exp_f32_e32 v0, v0
+; GFX1200-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1200-SDAG-FAKE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1
+; GFX1200-SDAG-FAKE16-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0xc2fc0000, v1
+; GFX1200-SDAG-FAKE16-NEXT: s_delay_alu instid0(TRANS32_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2)
+; GFX1200-SDAG-FAKE16-NEXT: v_ldexp_f32 v0, v0, v3
+; GFX1200-SDAG-FAKE16-NEXT: s_wait_alu 0xfffd
+; GFX1200-SDAG-FAKE16-NEXT: v_cndmask_b32_e64 v2, 0, 0x42800000, vcc_lo
+; GFX1200-SDAG-FAKE16-NEXT: v_bfe_u32 v3, v0, 16, 1
+; GFX1200-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_4)
+; GFX1200-SDAG-FAKE16-NEXT: v_add_f32_e32 v1, v1, v2
+; GFX1200-SDAG-FAKE16-NEXT: v_cndmask_b32_e64 v2, 0, 0xffffffc0, vcc_lo
+; GFX1200-SDAG-FAKE16-NEXT: v_or_b32_e32 v5, 0x400000, v0
+; GFX1200-SDAG-FAKE16-NEXT: v_add3_u32 v3, v3, v0, 0x7fff
+; GFX1200-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(TRANS32_DEP_1)
+; GFX1200-SDAG-FAKE16-NEXT: v_exp_f32_e32 v1, v1
+; GFX1200-SDAG-FAKE16-NEXT: v_ldexp_f32 v1, v1, v2
+; GFX1200-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_3)
+; GFX1200-SDAG-FAKE16-NEXT: v_bfe_u32 v2, v1, 16, 1
+; GFX1200-SDAG-FAKE16-NEXT: v_or_b32_e32 v4, 0x400000, v1
+; GFX1200-SDAG-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v1, v1
+; GFX1200-SDAG-FAKE16-NEXT: v_add3_u32 v2, v2, v1, 0x7fff
+; GFX1200-SDAG-FAKE16-NEXT: s_wait_alu 0xfffd
+; GFX1200-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_1)
+; GFX1200-SDAG-FAKE16-NEXT: v_cndmask_b32_e32 v1, v2, v4, vcc_lo
+; GFX1200-SDAG-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0
+; GFX1200-SDAG-FAKE16-NEXT: s_wait_alu 0xfffd
+; GFX1200-SDAG-FAKE16-NEXT: v_cndmask_b32_e32 v0, v3, v5, vcc_lo
+; GFX1200-SDAG-FAKE16-NEXT: v_perm_b32 v0, v0, v1, 0x7060302
+; GFX1200-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-SDAG-TRUE16-LABEL: v_exp2_fneg_fabs_v2bf16:
+; GFX1250-SDAG-TRUE16: ; %bb.0:
+; GFX1250-SDAG-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250-SDAG-TRUE16-NEXT: v_and_b32_e32 v1, 0x7fff7fff, v0
+; GFX1250-SDAG-TRUE16-NEXT: v_bfe_u32 v2, v0, 16, 15
+; GFX1250-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250-SDAG-TRUE16-NEXT: v_exp_bf16_e64 v0.l, -v1.l
+; GFX1250-SDAG-TRUE16-NEXT: v_exp_bf16_e64 v0.h, -v2.l
+; GFX1250-SDAG-TRUE16-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX1250-SDAG-FAKE16-LABEL: v_exp2_fneg_fabs_v2bf16:
+; GFX1250-SDAG-FAKE16: ; %bb.0:
+; GFX1250-SDAG-FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-FAKE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250-SDAG-FAKE16-NEXT: v_and_b32_e32 v1, 0x7fff7fff, v0
+; GFX1250-SDAG-FAKE16-NEXT: v_bfe_u32 v0, v0, 16, 15
+; GFX1250-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250-SDAG-FAKE16-NEXT: v_exp_bf16_e64 v1, -v1
+; GFX1250-SDAG-FAKE16-NEXT: v_exp_bf16_e64 v0, -v0
+; GFX1250-SDAG-FAKE16-NEXT: v_nop
+; GFX1250-SDAG-FAKE16-NEXT: s_delay_alu instid0(TRANS32_DEP_1)
+; GFX1250-SDAG-FAKE16-NEXT: v_perm_b32 v0, v0, v1, 0x5040100
+; GFX1250-SDAG-FAKE16-NEXT: s_set_pc_i64 s[30:31]
+ %fabs = call <2 x bfloat> @llvm.fabs.v2bf16(<2 x bfloat> %in)
+ %fneg.fabs = fneg <2 x bfloat> %fabs
+ %result = call <2 x bfloat> @llvm.exp2.v2bf16(<2 x bfloat> %fneg.fabs)
+ ret <2 x bfloat> %result
+}
+
+define <2 x bfloat> @v_exp2_fneg_v2bf16(<2 x bfloat> %in) {
+; GFX1200-SDAG-TRUE16-LABEL: v_exp2_fneg_v2bf16:
+; GFX1200-SDAG-TRUE16: ; %bb.0:
+; GFX1200-SDAG-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1200-SDAG-TRUE16-NEXT: s_wait_expcnt 0x0
+; GFX1200-SDAG-TRUE16-NEXT: s_wait_samplecnt 0x0
+; GFX1200-SDAG-TRUE16-NEXT: s_wait_bvhcnt 0x0
+; GFX1200-SDAG-TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1200-SDAG-TRUE16-NEXT: v_xor_b16 v0.l, 0x8000, v0.l
+; GFX1200-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX1200-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v0.l
+; GFX1200-SDAG-TRUE16-NEXT: v_xor_b16 v0.l, 0x8000, v0.h
+; GFX1200-SDAG-TRUE16-NEXT: v_lshlrev_b32_e32 v0, 16, v0
+; GFX1200-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1200-SDAG-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1
+; GFX1200-SDAG-TRUE16-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0xc2fc0000, v1
+; GFX1200-SDAG-TRUE16-NEXT: s_wait_alu 0xfffd
+; GFX1200-SDAG-TRUE16-NEXT: v_cndmask_b32_e64 v2, 0, 0xffffffc0, vcc_lo
+; GFX1200-SDAG-TRUE16-NEXT: v_cndmask_b32_e64 v3, 0, 0x42800000, vcc_lo
+; GFX1200-SDAG-TRUE16-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0xc2fc0000, v0
+; GFX1200-SDAG-TRUE16-NEXT: s_wait_alu 0xfffd
+; GFX1200-SDAG-TRUE16-NEXT: v_cndmask_b32_e64 v4, 0, 0x42800000, vcc_lo
+; GFX1200-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX1200-SDAG-TRUE16-NEXT: v_dual_add_f32 v0, v0, v4 :: v_dual_add_f32 v1, v1, v3
+; GFX1200-SDAG-TRUE16-NEXT: v_cndmask_b32_e64 v3, 0, 0xffffffc0, vcc_lo
+; GFX1200-SDAG-TRUE16-NEXT: v_exp_f32_e32 v0, v0
+; GFX1200-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(TRANS32_DEP_2)
+; GFX1200-SDAG-TRUE16-NEXT: v_exp_f32_e32 v1, v1
+; GFX1200-SDAG-TRUE16-NEXT: v_ldexp_f32 v0, v0, v3
+; GFX1200-SDAG-TRUE16-NEXT: s_delay_alu instid0(TRANS32_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1200-SDAG-TRUE16-NEXT: v_ldexp_f32 v1, v1, v2
+; GFX1200-SDAG-TRUE16-NEXT: v_bfe_u32 v3, v0, 16, 1
+; GFX1200-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
+; GFX1200-SDAG-TRUE16-NEXT: v_bfe_u32 v2, v1, 16, 1
+; GFX1200-SDAG-TRUE16-NEXT: v_or_b32_e32 v4, 0x400000, v1
+; GFX1200-SDAG-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v1, v1
+; GFX1200-SDAG-TRUE16-NEXT: v_or_b32_e32 v5, 0x400000, v0
+; GFX1200-SDAG-TRUE16-NEXT: v_add3_u32 v3, v3, v0, 0x7fff
+; GFX1200-SDAG-TRUE16-NEXT: v_add3_u32 v2, v2, v1, 0x7fff
+; GFX1200-SDAG-TRUE16-NEXT: s_wait_alu 0xfffd
+; GFX1200-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_3)
+; GFX1200-SDAG-TRUE16-NEXT: v_cndmask_b32_e32 v1, v2, v4, vcc_lo
+; GFX1200-SDAG-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0
+; GFX1200-SDAG-TRUE16-NEXT: s_wait_alu 0xfffd
+; GFX1200-SDAG-TRUE16-NEXT: v_cndmask_b32_e32 v0, v3, v5, vcc_lo
+; GFX1200-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v1.h
+; GFX1200-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1200-SDAG-TRUE16-NEXT: v_bfi_b32 v0, 0xffff, v1, v0
+; GFX1200-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1200-SDAG-FAKE16-LABEL: v_exp2_fneg_v2bf16:
+; GFX1200-SDAG-FAKE16: ; %bb.0:
+; GFX1200-SDAG-FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1200-SDAG-FAKE16-NEXT: s_wait_expcnt 0x0
+; GFX1200-SDAG-FAKE16-NEXT: s_wait_samplecnt 0x0
+; GFX1200-SDAG-FAKE16-NEXT: s_wait_bvhcnt 0x0
+; GFX1200-SDAG-FAKE16-NEXT: s_wait_kmcnt 0x0
+; GFX1200-SDAG-FAKE16-NEXT: v_lshrrev_b32_e32 v1, 16, v0
+; GFX1200-SDAG-FAKE16-NEXT: v_xor_b32_e32 v0, 0x8000, v0
+; GFX1200-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1200-SDAG-FAKE16-NEXT: v_xor_b32_e32 v1, 0x8000, v1
+; GFX1200-SDAG-FAKE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1
+; GFX1200-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX1200-SDAG-FAKE16-NEXT: v_cmp_gt_f32_e64 s0, 0xc2fc0000, v1
+; GFX1200-SDAG-FAKE16-NEXT: s_wait_alu 0xf1ff
+; GFX1200-SDAG-FAKE16-NEXT: v_cndmask_b32_e64 v3, 0, 0x42800000, s0
+; GFX1200-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1200-SDAG-FAKE16-NEXT: v_dual_add_f32 v1, v1, v3 :: v_dual_lshlrev_b32 v0, 16, v0
+; GFX1200-SDAG-FAKE16-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0xc2fc0000, v0
+; GFX1200-SDAG-FAKE16-NEXT: v_cndmask_b32_e64 v3, 0, 0xffffffc0, s0
+; GFX1200-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_2) | instid1(VALU_DEP_1)
+; GFX1200-SDAG-FAKE16-NEXT: v_exp_f32_e32 v1, v1
+; GFX1200-SDAG-FAKE16-NEXT: s_wait_alu 0xfffd
+; GFX1200-SDAG-FAKE16-NEXT: v_cndmask_b32_e64 v2, 0, 0x42800000, vcc_lo
+; GFX1200-SDAG-FAKE16-NEXT: v_add_f32_e32 v0, v0, v2
+; GFX1200-SDAG-FAKE16-NEXT: v_cndmask_b32_e64 v2, 0, 0xffffffc0, vcc_lo
+; GFX1200-SDAG-FAKE16-NEXT: s_delay_alu instid0(TRANS32_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX1200-SDAG-FAKE16-NEXT: v_ldexp_f32 v1, v1, v3
+; GFX1200-SDAG-FAKE16-NEXT: v_exp_f32_e32 v0, v0
+; GFX1200-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX1200-SDAG-FAKE16-NEXT: v_bfe_u32 v3, v1, 16, 1
+; GFX1200-SDAG-FAKE16-NEXT: v_or_b32_e32 v5, 0x400000, v1
+; GFX1200-SDAG-FAKE16-NEXT: v_add3_u32 v3, v3, v1, 0x7fff
+; GFX1200-SDAG-FAKE16-NEXT: s_delay_alu instid0(TRANS32_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1200-SDAG-FAKE16-NEXT: v_ldexp_f32 v0, v0, v2
+; GFX1200-SDAG-FAKE16-NEXT: v_bfe_u32 v2, v0, 16, 1
+; GFX1200-SDAG-FAKE16-NEXT: v_or_b32_e32 v4, 0x400000, v0
+; GFX1200-SDAG-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0
+; GFX1200-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX1200-SDAG-FAKE16-NEXT: v_add3_u32 v2, v2, v0, 0x7fff
+; GFX1200-SDAG-FAKE16-NEXT: s_wait_alu 0xfffd
+; GFX1200-SDAG-FAKE16-NEXT: v_cndmask_b32_e32 v0, v2, v4, vcc_lo
+; GFX1200-SDAG-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v1, v1
+; GFX1200-SDAG-FAKE16-NEXT: s_wait_alu 0xfffd
+; GFX1200-SDAG-FAKE16-NEXT: v_cndmask_b32_e32 v1, v3, v5, vcc_lo
+; GFX1200-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1200-SDAG-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x7060302
+; GFX1200-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-SDAG-TRUE16-LABEL: v_exp2_fneg_v2bf16:
+; GFX1250-SDAG-TRUE16: ; %bb.0:
+; GFX1250-SDAG-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250-SDAG-TRUE16-NEXT: v_exp_bf16_e64 v0.h, -v0.h
+; GFX1250-SDAG-TRUE16-NEXT: v_exp_bf16_e64 v0.l, -v0.l
+; GFX1250-SDAG-TRUE16-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX1250-SDAG-FAKE16-LABEL: v_exp2_fneg_v2bf16:
+; GFX1250-SDAG-FAKE16: ; %bb.0:
+; GFX1250-SDAG-FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-FAKE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250-SDAG-FAKE16-NEXT: v_lshrrev_b32_e32 v1, 16, v0
+; GFX1250-SDAG-FAKE16-NEXT: v_exp_bf16_e64 v0, -v0
+; GFX1250-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(TRANS32_DEP_1)
+; GFX1250-SDAG-FAKE16-NEXT: v_exp_bf16_e64 v1, -v1
+; GFX1250-SDAG-FAKE16-NEXT: v_nop
+; GFX1250-SDAG-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
+; GFX1250-SDAG-FAKE16-NEXT: s_set_pc_i64 s[30:31]
+ %fneg = fneg <2 x bfloat> %in
+ %result = call <2 x bfloat> @llvm.exp2.v2bf16(<2 x bfloat> %fneg)
+ ret <2 x bfloat> %result
+}
+
+define <2 x bfloat> @v_exp2_v2bf16_fast(<2 x bfloat> %in) {
+; GFX1200-SDAG-TRUE16-LABEL: v_exp2_v2bf16_fast:
+; GFX1200-SDAG-TRUE16: ; %bb.0:
+; GFX1200-SDAG-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1200-SDAG-TRUE16-NEXT: s_wait_expcnt 0x0
+; GFX1200-SDAG-TRUE16-NEXT: s_wait_samplecnt 0x0
+; GFX1200-SDAG-TRUE16-NEXT: s_wait_bvhcnt 0x0
+; GFX1200-SDAG-TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1200-SDAG-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 16, v0
+; GFX1200-SDAG-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
+; GFX1200-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX1200-SDAG-TRUE16-NEXT: v_cmp_gt_f32_e64 s0, 0xc2fc0000, v0
+; GFX1200-SDAG-TRUE16-NEXT: s_wait_alu 0xf1ff
+; GFX1200-SDAG-TRUE16-NEXT: v_cndmask_b32_e64 v3, 0, 0x42800000, s0
+; GFX1200-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_3)
+; GFX1200-SDAG-TRUE16-NEXT: v_add_f32_e32 v0, v0, v3
+; GFX1200-SDAG-TRUE16-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0xc2fc0000, v1
+; GFX1200-SDAG-TRUE16-NEXT: v_cndmask_b32_e64 v3, 0, 0xffffffc0, s0
+; GFX1200-SDAG-TRUE16-NEXT: v_exp_f32_e32 v0, v0
+; GFX1200-SDAG-TRUE16-NEXT: s_wait_alu 0xfffd
+; GFX1200-SDAG-TRUE16-NEXT: v_cndmask_b32_e64 v2, 0, 0x42800000, vcc_lo
+; GFX1200-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(TRANS32_DEP_1)
+; GFX1200-SDAG-TRUE16-NEXT: v_add_f32_e32 v1, v1, v2
+; GFX1200-SDAG-TRUE16-NEXT: v_cndmask_b32_e64 v2, 0, 0xffffffc0, vcc_lo
+; GFX1200-SDAG-TRUE16-NEXT: v_ldexp_f32 v0, v0, v3
+; GFX1200-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1200-SDAG-TRUE16-NEXT: v_exp_f32_e32 v1, v1
+; GFX1200-SDAG-TRUE16-NEXT: v_bfe_u32 v3, v0, 16, 1
+; GFX1200-SDAG-TRUE16-NEXT: v_or_b32_e32 v5, 0x400000, v0
+; GFX1200-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(TRANS32_DEP_1)
+; GFX1200-SDAG-TRUE16-NEXT: v_add3_u32 v3, v3, v0, 0x7fff
+; GFX1200-SDAG-TRUE16-NEXT: v_ldexp_f32 v1, v1, v2
+; GFX1200-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_3)
+; GFX1200-SDAG-TRUE16-NEXT: v_bfe_u32 v2, v1, 16, 1
+; GFX1200-SDAG-TRUE16-NEXT: v_or_b32_e32 v4, 0x400000, v1
+; GFX1200-SDAG-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v1, v1
+; GFX1200-SDAG-TRUE16-NEXT: v_add3_u32 v2, v2, v1, 0x7fff
+; GFX1200-SDAG-TRUE16-NEXT: s_wait_alu 0xfffd
+; GFX1200-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_3)
+; GFX1200-SDAG-TRUE16-NEXT: v_cndmask_b32_e32 v1, v2, v4, vcc_lo
+; GFX1200-SDAG-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0
+; GFX1200-SDAG-TRUE16-NEXT: s_wait_alu 0xfffd
+; GFX1200-SDAG-TRUE16-NEXT: v_cndmask_b32_e32 v0, v3, v5, vcc_lo
+; GFX1200-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v1.h
+; GFX1200-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1200-SDAG-TRUE16-NEXT: v_bfi_b32 v0, 0xffff, v1, v0
+; GFX1200-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1200-SDAG-FAKE16-LABEL: v_exp2_v2bf16_fast:
+; GFX1200-SDAG-FAKE16: ; %bb.0:
+; GFX1200-SDAG-FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1200-SDAG-FAKE16-NEXT: s_wait_expcnt 0x0
+; GFX1200-SDAG-FAKE16-NEXT: s_wait_samplecnt 0x0
+; GFX1200-SDAG-FAKE16-NEXT: s_wait_bvhcnt 0x0
+; GFX1200-SDAG-FAKE16-NEXT: s_wait_kmcnt 0x0
+; GFX1200-SDAG-FAKE16-NEXT: v_lshlrev_b32_e32 v1, 16, v0
+; GFX1200-SDAG-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
+; GFX1200-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX1200-SDAG-FAKE16-NEXT: v_cmp_gt_f32_e64 s0, 0xc2fc0000, v0
+; GFX1200-SDAG-FAKE16-NEXT: s_wait_alu 0xf1ff
+; GFX1200-SDAG-FAKE16-NEXT: v_cndmask_b32_e64 v3, 0, 0x42800000, s0
+; GFX1200-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_3)
+; GFX1200-SDAG-FAKE16-NEXT: v_add_f32_e32 v0, v0, v3
+; GFX1200-SDAG-FAKE16-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0xc2fc0000, v1
+; GFX1200-SDAG-FAKE16-NEXT: v_cndmask_b32_e64 v3, 0, 0xffffffc0, s0
+; GFX1200-SDAG-FAKE16-NEXT: v_exp_f32_e32 v0, v0
+; GFX1200-SDAG-FAKE16-NEXT: s_wait_alu 0xfffd
+; GFX1200-SDAG-FAKE16-NEXT: v_cndmask_b32_e64 v2, 0, 0x42800000, vcc_lo
+; GFX1200-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(TRANS32_DEP_1)
+; GFX1200-SDAG-FAKE16-NEXT: v_add_f32_e32 v1, v1, v2
+; GFX1200-SDAG-FAKE16-NEXT: v_cndmask_b32_e64 v2, 0, 0xffffffc0, vcc_lo
+; GFX1200-SDAG-FAKE16-NEXT: v_ldexp_f32 v0, v0, v3
+; GFX1200-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1200-SDAG-FAKE16-NEXT: v_exp_f32_e32 v1, v1
+; GFX1200-SDAG-FAKE16-NEXT: v_bfe_u32 v3, v0, 16, 1
+; GFX1200-SDAG-FAKE16-NEXT: v_or_b32_e32 v5, 0x400000, v0
+; GFX1200-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(TRANS32_DEP_1)
+; GFX1200-SDAG-FAKE16-NEXT: v_add3_u32 v3, v3, v0, 0x7fff
+; GFX1200-SDAG-FAKE16-NEXT: v_ldexp_f32 v1, v1, v2
+; GFX1200-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_3)
+; GFX1200-SDAG-FAKE16-NEXT: v_bfe_u32 v2, v1, 16, 1
+; GFX1200-SDAG-FAKE16-NEXT: v_or_b32_e32 v4, 0x400000, v1
+; GFX1200-SDAG-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v1, v1
+; GFX1200-SDAG-FAKE16-NEXT: v_add3_u32 v2, v2, v1, 0x7fff
+; GFX1200-SDAG-FAKE16-NEXT: s_wait_alu 0xfffd
+; GFX1200-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_1)
+; GFX1200-SDAG-FAKE16-NEXT: v_cndmask_b32_e32 v1, v2, v4, vcc_lo
+; GFX1200-SDAG-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0
+; GFX1200-SDAG-FAKE16-NEXT: s_wait_alu 0xfffd
+; GFX1200-SDAG-FAKE16-NEXT: v_cndmask_b32_e32 v0, v3, v5, vcc_lo
+; GFX1200-SDAG-FAKE16-NEXT: v_perm_b32 v0, v0, v1, 0x7060302
+; GFX1200-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-SDAG-TRUE16-LABEL: v_exp2_v2bf16_fast:
+; GFX1250-SDAG-TRUE16: ; %bb.0:
+; GFX1250-SDAG-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250-SDAG-TRUE16-NEXT: v_exp_bf16_e32 v0.h, v0.h
+; GFX1250-SDAG-TRUE16-NEXT: v_exp_bf16_e32 v0.l, v0.l
+; GFX1250-SDAG-TRUE16-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX1250-SDAG-FAKE16-LABEL: v_exp2_v2bf16_fast:
+; GFX1250-SDAG-FAKE16: ; %bb.0:
+; GFX1250-SDAG-FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-FAKE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250-SDAG-FAKE16-NEXT: v_lshrrev_b32_e32 v1, 16, v0
+; GFX1250-SDAG-FAKE16-NEXT: v_exp_bf16_e32 v0, v0
+; GFX1250-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(TRANS32_DEP_1)
+; GFX1250-SDAG-FAKE16-NEXT: v_exp_bf16_e32 v1, v1
+; GFX1250-SDAG-FAKE16-NEXT: v_nop
+; GFX1250-SDAG-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
+; GFX1250-SDAG-FAKE16-NEXT: s_set_pc_i64 s[30:31]
+ %result = call fast <2 x bfloat> @llvm.exp2.v2bf16(<2 x bfloat> %in)
+ ret <2 x bfloat> %result
+}
+
+declare bfloat @llvm.exp2.bf16(bfloat) #0
+declare <2 x bfloat> @llvm.exp2.v2bf16(<2 x bfloat>) #0
+declare bfloat @llvm.fabs.bf16(bfloat) #0
+declare <2 x bfloat> @llvm.fabs.v2bf16(<2 x bfloat>) #0
+
+attributes #0 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1-fake16.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1-fake16.s
index 0f5ce56f1a2cf..426f480200e4b 100644
--- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1-fake16.s
+++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1-fake16.s
@@ -253,6 +253,51 @@ v_log_bf16 v5, src_scc
v_log_bf16 v127, 0x8000
// GFX1250: v_log_bf16_e32 v127, 0x8000 ; encoding: [0xff,0xf8,0xfe,0x7e,0x00,0x80,0x00,0x00]
+v_exp_bf16 v5, v1
+// GFX1250: v_exp_bf16_e32 v5, v1 ; encoding: [0x01,0xfb,0x0a,0x7e]
+
+v_exp_bf16 v5, v127
+// GFX1250: v_exp_bf16_e32 v5, v127 ; encoding: [0x7f,0xfb,0x0a,0x7e]
+
+v_exp_bf16 v5, s1
+// GFX1250: v_exp_bf16_e32 v5, s1 ; encoding: [0x01,0xfa,0x0a,0x7e]
+
+v_exp_bf16 v5, s105
+// GFX1250: v_exp_bf16_e32 v5, s105 ; encoding: [0x69,0xfa,0x0a,0x7e]
+
+v_exp_bf16 v5, vcc_lo
+// GFX1250: v_exp_bf16_e32 v5, vcc_lo ; encoding: [0x6a,0xfa,0x0a,0x7e]
+
+v_exp_bf16 v5, vcc_hi
+// GFX1250: v_exp_bf16_e32 v5, vcc_hi ; encoding: [0x6b,0xfa,0x0a,0x7e]
+
+v_exp_bf16 v5, ttmp15
+// GFX1250: v_exp_bf16_e32 v5, ttmp15 ; encoding: [0x7b,0xfa,0x0a,0x7e]
+
+v_exp_bf16 v5, m0
+// GFX1250: v_exp_bf16_e32 v5, m0 ; encoding: [0x7d,0xfa,0x0a,0x7e]
+
+v_exp_bf16 v5, exec_lo
+// GFX1250: v_exp_bf16_e32 v5, exec_lo ; encoding: [0x7e,0xfa,0x0a,0x7e]
+
+v_exp_bf16 v5, exec_hi
+// GFX1250: v_exp_bf16_e32 v5, exec_hi ; encoding: [0x7f,0xfa,0x0a,0x7e]
+
+v_exp_bf16 v5, null
+// GFX1250: v_exp_bf16_e32 v5, null ; encoding: [0x7c,0xfa,0x0a,0x7e]
+
+v_exp_bf16 v5, -1
+// GFX1250: v_exp_bf16_e32 v5, -1 ; encoding: [0xc1,0xfa,0x0a,0x7e]
+
+v_exp_bf16 v5, 0.5
+// GFX1250: v_exp_bf16_e32 v5, 0.5 ; encoding: [0xf0,0xfa,0x0a,0x7e]
+
+v_exp_bf16 v5, src_scc
+// GFX1250: v_exp_bf16_e32 v5, src_scc ; encoding: [0xfd,0xfa,0x0a,0x7e]
+
+v_exp_bf16 v127, 0x8000
+// GFX1250: v_exp_bf16_e32 v127, 0x8000 ; encoding: [0xff,0xfa,0xfe,0x7e,0x00,0x80,0x00,0x00]
+
v_cvt_f32_bf16 v5, v1
// GFX1250: v_cvt_f32_bf16_e32 v5, v1 ; encoding: [0x01,0xe5,0x0a,0x7e]
diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1.s
index 9dd11e6249b27..93999043d0fb8 100644
--- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1.s
+++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1.s
@@ -268,6 +268,54 @@ v_log_bf16 v127, 0x8000
v_log_bf16 v5.h, v1.h
// GFX1250: v_log_bf16_e32 v5.h, v1.h ; encoding: [0x81,0xf9,0x0a,0x7f]
+v_exp_bf16 v5, v1
+// GFX1250: v_exp_bf16_e32 v5, v1 ; encoding: [0x01,0xfb,0x0a,0x7e]
+
+v_exp_bf16 v5, v127
+// GFX1250: v_exp_bf16_e32 v5, v127 ; encoding: [0x7f,0xfb,0x0a,0x7e]
+
+v_exp_bf16 v5, s1
+// GFX1250: v_exp_bf16_e32 v5, s1 ; encoding: [0x01,0xfa,0x0a,0x7e]
+
+v_exp_bf16 v5, s105
+// GFX1250: v_exp_bf16_e32 v5, s105 ; encoding: [0x69,0xfa,0x0a,0x7e]
+
+v_exp_bf16 v5, vcc_lo
+// GFX1250: v_exp_bf16_e32 v5, vcc_lo ; encoding: [0x6a,0xfa,0x0a,0x7e]
+
+v_exp_bf16 v5, vcc_hi
+// GFX1250: v_exp_bf16_e32 v5, vcc_hi ; encoding: [0x6b,0xfa,0x0a,0x7e]
+
+v_exp_bf16 v5, ttmp15
+// GFX1250: v_exp_bf16_e32 v5, ttmp15 ; encoding: [0x7b,0xfa,0x0a,0x7e]
+
+v_exp_bf16 v5, m0
+// GFX1250: v_exp_bf16_e32 v5, m0 ; encoding: [0x7d,0xfa,0x0a,0x7e]
+
+v_exp_bf16 v5, exec_lo
+// GFX1250: v_exp_bf16_e32 v5, exec_lo ; encoding: [0x7e,0xfa,0x0a,0x7e]
+
+v_exp_bf16 v5, exec_hi
+// GFX1250: v_exp_bf16_e32 v5, exec_hi ; encoding: [0x7f,0xfa,0x0a,0x7e]
+
+v_exp_bf16 v5, null
+// GFX1250: v_exp_bf16_e32 v5, null ; encoding: [0x7c,0xfa,0x0a,0x7e]
+
+v_exp_bf16 v5, -1
+// GFX1250: v_exp_bf16_e32 v5, -1 ; encoding: [0xc1,0xfa,0x0a,0x7e]
+
+v_exp_bf16 v5, 0.5
+// GFX1250: v_exp_bf16_e32 v5, 0.5 ; encoding: [0xf0,0xfa,0x0a,0x7e]
+
+v_exp_bf16 v5, src_scc
+// GFX1250: v_exp_bf16_e32 v5, src_scc ; encoding: [0xfd,0xfa,0x0a,0x7e]
+
+v_exp_bf16 v127, 0x8000
+// GFX1250: v_exp_bf16_e32 v127, 0x8000 ; encoding: [0xff,0xfa,0xfe,0x7e,0x00,0x80,0x00,0x00]
+
+v_exp_bf16 v5.h, v1.h
+// GFX1250: v_exp_bf16_e32 v5.h, v1.h ; encoding: [0x81,0xfb,0x0a,0x7f]
+
v_cvt_f32_bf16 v5, v1
// GFX1250: v_cvt_f32_bf16_e32 v5, v1 ; encoding: [0x01,0xe5,0x0a,0x7e]
diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp16-fake16.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp16-fake16.s
index 3882e43b5daf4..459c2d3e7b751 100644
--- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp16-fake16.s
+++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp16-fake16.s
@@ -282,6 +282,62 @@ v_log_bf16 v127, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi
// GFX1250: v_log_bf16_dpp v127, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xf8,0xfe,0x7e,0x7f,0x6f,0x35,0x30]
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+v_exp_bf16 v5, v1 quad_perm:[3,2,1,0]
+// GFX1250: v_exp_bf16_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xfa,0x0a,0x7e,0x01,0x1b,0x00,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_exp_bf16 v5, v1 quad_perm:[0,1,2,3]
+// GFX1250: v_exp_bf16_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xfa,0x0a,0x7e,0x01,0xe4,0x00,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_exp_bf16 v5, v1 row_mirror
+// GFX1250: v_exp_bf16_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xfa,0x0a,0x7e,0x01,0x40,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_exp_bf16 v5, v1 row_half_mirror
+// GFX1250: v_exp_bf16_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xfa,0x0a,0x7e,0x01,0x41,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_exp_bf16 v5, v1 row_shl:1
+// GFX1250: v_exp_bf16_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xfa,0x0a,0x7e,0x01,0x01,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_exp_bf16 v5, v1 row_shl:15
+// GFX1250: v_exp_bf16_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xfa,0x0a,0x7e,0x01,0x0f,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_exp_bf16 v5, v1 row_shr:1
+// GFX1250: v_exp_bf16_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xfa,0x0a,0x7e,0x01,0x11,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_exp_bf16 v5, v1 row_shr:15
+// GFX1250: v_exp_bf16_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xfa,0x0a,0x7e,0x01,0x1f,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_exp_bf16 v5, v1 row_ror:1
+// GFX1250: v_exp_bf16_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xfa,0x0a,0x7e,0x01,0x21,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_exp_bf16 v5, v1 row_ror:15
+// GFX1250: v_exp_bf16_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xfa,0x0a,0x7e,0x01,0x2f,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_exp_bf16 v5, v1 row_share:0 row_mask:0xf bank_mask:0xf
+// GFX1250: v_exp_bf16_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xfa,0x0a,0x7e,0x01,0x50,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_exp_bf16 v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX1250: v_exp_bf16_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0xfa,0x0a,0x7e,0x01,0x5f,0x01,0x01]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_exp_bf16 v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX1250: v_exp_bf16_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0xfa,0x0a,0x7e,0x01,0x60,0x09,0x13]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_exp_bf16 v127, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX1250: v_exp_bf16_dpp v127, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xfa,0xfe,0x7e,0x7f,0x6f,0x35,0x30]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
v_cvt_f32_bf16 v5, v1 quad_perm:[3,2,1,0]
// GFX1250: v_cvt_f32_bf16_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xe4,0x0a,0x7e,0x01,0x1b,0x00,0xff]
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp16.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp16.s
index 2f849b15edee9..30355596be48b 100644
--- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp16.s
+++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp16.s
@@ -302,6 +302,66 @@ v_log_bf16 v5.h, v1.h quad_perm:[3,2,1,0]
// GFX1250: v_log_bf16_dpp v5.h, v1.h quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xf8,0x0a,0x7f,0x81,0x1b,0x00,0xff]
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+v_exp_bf16 v5, v1 quad_perm:[3,2,1,0]
+// GFX1250: v_exp_bf16_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xfa,0x0a,0x7e,0x01,0x1b,0x00,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_exp_bf16 v5, v1 quad_perm:[0,1,2,3]
+// GFX1250: v_exp_bf16_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xfa,0x0a,0x7e,0x01,0xe4,0x00,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_exp_bf16 v5, v1 row_mirror
+// GFX1250: v_exp_bf16_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xfa,0x0a,0x7e,0x01,0x40,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_exp_bf16 v5, v1 row_half_mirror
+// GFX1250: v_exp_bf16_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xfa,0x0a,0x7e,0x01,0x41,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_exp_bf16 v5, v1 row_shl:1
+// GFX1250: v_exp_bf16_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xfa,0x0a,0x7e,0x01,0x01,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_exp_bf16 v5, v1 row_shl:15
+// GFX1250: v_exp_bf16_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xfa,0x0a,0x7e,0x01,0x0f,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_exp_bf16 v5, v1 row_shr:1
+// GFX1250: v_exp_bf16_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xfa,0x0a,0x7e,0x01,0x11,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_exp_bf16 v5, v1 row_shr:15
+// GFX1250: v_exp_bf16_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xfa,0x0a,0x7e,0x01,0x1f,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_exp_bf16 v5, v1 row_ror:1
+// GFX1250: v_exp_bf16_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xfa,0x0a,0x7e,0x01,0x21,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_exp_bf16 v5, v1 row_ror:15
+// GFX1250: v_exp_bf16_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xfa,0x0a,0x7e,0x01,0x2f,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_exp_bf16 v5, v1 row_share:0 row_mask:0xf bank_mask:0xf
+// GFX1250: v_exp_bf16_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xfa,0x0a,0x7e,0x01,0x50,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_exp_bf16 v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX1250: v_exp_bf16_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0xfa,0x0a,0x7e,0x01,0x5f,0x01,0x01]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_exp_bf16 v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX1250: v_exp_bf16_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0xfa,0x0a,0x7e,0x01,0x60,0x09,0x13]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_exp_bf16 v127, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX1250: v_exp_bf16_dpp v127, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xfa,0xfe,0x7e,0x7f,0x6f,0x35,0x30]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_exp_bf16 v5.h, v1.h quad_perm:[3,2,1,0]
+// GFX1250: v_exp_bf16_dpp v5.h, v1.h quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xfa,0x0a,0x7f,0x81,0x1b,0x00,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
v_cvt_f32_bf16 v5, v1 quad_perm:[3,2,1,0]
// GFX1250: v_cvt_f32_bf16_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xe4,0x0a,0x7e,0x01,0x1b,0x00,0xff]
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp8-fake16.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp8-fake16.s
index 85cf08bdb3a31..50e3e0acae4d2 100644
--- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp8-fake16.s
+++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp8-fake16.s
@@ -62,6 +62,18 @@ v_log_bf16 v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
// GFX1250: v_log_bf16_dpp v127, v127 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xf8,0xfe,0x7e,0x7f,0x00,0x00,0x00]
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+v_exp_bf16 v5, v1 dpp8:[7,6,5,4,3,2,1,0]
+// GFX1250: v_exp_bf16_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xfa,0x0a,0x7e,0x01,0x77,0x39,0x05]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_exp_bf16 v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX1250: v_exp_bf16_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0xfa,0x0a,0x7e,0x01,0x77,0x39,0x05]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_exp_bf16 v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX1250: v_exp_bf16_dpp v127, v127 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xfa,0xfe,0x7e,0x7f,0x00,0x00,0x00]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
v_cvt_f32_bf16 v5, v1 dpp8:[7,6,5,4,3,2,1,0]
// GFX1250: v_cvt_f32_bf16_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xe4,0x0a,0x7e,0x01,0x77,0x39,0x05]
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp8.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp8.s
index d9b320ac6c094..34a15116ebed4 100644
--- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp8.s
+++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp8.s
@@ -82,6 +82,22 @@ v_log_bf16 v5.h, v1.h dpp8:[7,6,5,4,3,2,1,0]
// GFX1250: v_log_bf16_dpp v5.h, v1.h dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xf8,0x0a,0x7f,0x81,0x77,0x39,0x05]
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+v_exp_bf16 v5, v1 dpp8:[7,6,5,4,3,2,1,0]
+// GFX1250: v_exp_bf16_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xfa,0x0a,0x7e,0x01,0x77,0x39,0x05]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_exp_bf16 v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX1250: v_exp_bf16_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0xfa,0x0a,0x7e,0x01,0x77,0x39,0x05]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_exp_bf16 v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX1250: v_exp_bf16_dpp v127, v127 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xfa,0xfe,0x7e,0x7f,0x00,0x00,0x00]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_exp_bf16 v5.h, v1.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX1250: v_exp_bf16_dpp v5.h, v1.h dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xfa,0x0a,0x7f,0x81,0x77,0x39,0x05]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
v_cvt_f32_bf16 v5, v1 dpp8:[7,6,5,4,3,2,1,0]
// GFX1250: v_cvt_f32_bf16_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xe4,0x0a,0x7e,0x01,0x77,0x39,0x05]
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1-fake16.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1-fake16.s
index 0d4de4c8c877a..1d5df8d131228 100644
--- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1-fake16.s
+++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1-fake16.s
@@ -307,6 +307,51 @@ v_log_bf16_e64 v5, src_scc mul:4
v_log_bf16_e64 v255, -|0x8000| clamp div:2
// GFX1250: v_log_bf16_e64 v255, -|0x8000| clamp div:2 ; encoding: [0xff,0x81,0xfc,0xd5,0xff,0x00,0x00,0x38,0x00,0x80,0x00,0x00]
+v_exp_bf16_e64 v5, v1
+// GFX1250: v_exp_bf16_e64 v5, v1 ; encoding: [0x05,0x00,0xfd,0xd5,0x01,0x01,0x00,0x00]
+
+v_exp_bf16_e64 v5, v255
+// GFX1250: v_exp_bf16_e64 v5, v255 ; encoding: [0x05,0x00,0xfd,0xd5,0xff,0x01,0x00,0x00]
+
+v_exp_bf16_e64 v5, s1
+// GFX1250: v_exp_bf16_e64 v5, s1 ; encoding: [0x05,0x00,0xfd,0xd5,0x01,0x00,0x00,0x00]
+
+v_exp_bf16_e64 v5, s105
+// GFX1250: v_exp_bf16_e64 v5, s105 ; encoding: [0x05,0x00,0xfd,0xd5,0x69,0x00,0x00,0x00]
+
+v_exp_bf16_e64 v5, vcc_lo
+// GFX1250: v_exp_bf16_e64 v5, vcc_lo ; encoding: [0x05,0x00,0xfd,0xd5,0x6a,0x00,0x00,0x00]
+
+v_exp_bf16_e64 v5, vcc_hi
+// GFX1250: v_exp_bf16_e64 v5, vcc_hi ; encoding: [0x05,0x00,0xfd,0xd5,0x6b,0x00,0x00,0x00]
+
+v_exp_bf16_e64 v5, ttmp15
+// GFX1250: v_exp_bf16_e64 v5, ttmp15 ; encoding: [0x05,0x00,0xfd,0xd5,0x7b,0x00,0x00,0x00]
+
+v_exp_bf16_e64 v5, m0
+// GFX1250: v_exp_bf16_e64 v5, m0 ; encoding: [0x05,0x00,0xfd,0xd5,0x7d,0x00,0x00,0x00]
+
+v_exp_bf16_e64 v5, exec_lo
+// GFX1250: v_exp_bf16_e64 v5, exec_lo ; encoding: [0x05,0x00,0xfd,0xd5,0x7e,0x00,0x00,0x00]
+
+v_exp_bf16_e64 v5, exec_hi
+// GFX1250: v_exp_bf16_e64 v5, exec_hi ; encoding: [0x05,0x00,0xfd,0xd5,0x7f,0x00,0x00,0x00]
+
+v_exp_bf16_e64 v5, null
+// GFX1250: v_exp_bf16_e64 v5, null ; encoding: [0x05,0x00,0xfd,0xd5,0x7c,0x00,0x00,0x00]
+
+v_exp_bf16_e64 v5, -1
+// GFX1250: v_exp_bf16_e64 v5, -1 ; encoding: [0x05,0x00,0xfd,0xd5,0xc1,0x00,0x00,0x00]
+
+v_exp_bf16_e64 v5, 0.5 mul:2
+// GFX1250: v_exp_bf16_e64 v5, 0.5 mul:2 ; encoding: [0x05,0x00,0xfd,0xd5,0xf0,0x00,0x00,0x08]
+
+v_exp_bf16_e64 v5, src_scc mul:4
+// GFX1250: v_exp_bf16_e64 v5, src_scc mul:4 ; encoding: [0x05,0x00,0xfd,0xd5,0xfd,0x00,0x00,0x10]
+
+v_exp_bf16_e64 v255, -|0x8000| clamp div:2
+// GFX1250: v_exp_bf16_e64 v255, -|0x8000| clamp div:2 ; encoding: [0xff,0x81,0xfd,0xd5,0xff,0x00,0x00,0x38,0x00,0x80,0x00,0x00]
+
v_cvt_f32_bf16_e64 v5, v1
// GFX1250: v_cvt_f32_bf16_e64 v5, v1 ; encoding: [0x05,0x00,0xf2,0xd5,0x01,0x01,0x00,0x00]
diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1.s
index 8bf5d242660b6..a461a4cfc8212 100644
--- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1.s
+++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1.s
@@ -322,6 +322,54 @@ v_log_bf16_e64 v255, -|0x8000| clamp div:2
v_log_bf16 v5.h, v128.h
// GFX1250: v_log_bf16_e64 v5.h, v128.h op_sel:[1,1] ; encoding: [0x05,0x48,0xfc,0xd5,0x80,0x01,0x00,0x00]
+v_exp_bf16_e64 v5, v1
+// GFX1250: v_exp_bf16_e64 v5, v1 ; encoding: [0x05,0x00,0xfd,0xd5,0x01,0x01,0x00,0x00]
+
+v_exp_bf16_e64 v5, v255
+// GFX1250: v_exp_bf16_e64 v5, v255 ; encoding: [0x05,0x00,0xfd,0xd5,0xff,0x01,0x00,0x00]
+
+v_exp_bf16_e64 v5, s1
+// GFX1250: v_exp_bf16_e64 v5, s1 ; encoding: [0x05,0x00,0xfd,0xd5,0x01,0x00,0x00,0x00]
+
+v_exp_bf16_e64 v5, s105
+// GFX1250: v_exp_bf16_e64 v5, s105 ; encoding: [0x05,0x00,0xfd,0xd5,0x69,0x00,0x00,0x00]
+
+v_exp_bf16_e64 v5, vcc_lo
+// GFX1250: v_exp_bf16_e64 v5, vcc_lo ; encoding: [0x05,0x00,0xfd,0xd5,0x6a,0x00,0x00,0x00]
+
+v_exp_bf16_e64 v5, vcc_hi
+// GFX1250: v_exp_bf16_e64 v5, vcc_hi ; encoding: [0x05,0x00,0xfd,0xd5,0x6b,0x00,0x00,0x00]
+
+v_exp_bf16_e64 v5, ttmp15
+// GFX1250: v_exp_bf16_e64 v5, ttmp15 ; encoding: [0x05,0x00,0xfd,0xd5,0x7b,0x00,0x00,0x00]
+
+v_exp_bf16_e64 v5, m0
+// GFX1250: v_exp_bf16_e64 v5, m0 ; encoding: [0x05,0x00,0xfd,0xd5,0x7d,0x00,0x00,0x00]
+
+v_exp_bf16_e64 v5, exec_lo
+// GFX1250: v_exp_bf16_e64 v5, exec_lo ; encoding: [0x05,0x00,0xfd,0xd5,0x7e,0x00,0x00,0x00]
+
+v_exp_bf16_e64 v5, exec_hi
+// GFX1250: v_exp_bf16_e64 v5, exec_hi ; encoding: [0x05,0x00,0xfd,0xd5,0x7f,0x00,0x00,0x00]
+
+v_exp_bf16_e64 v5, null
+// GFX1250: v_exp_bf16_e64 v5, null ; encoding: [0x05,0x00,0xfd,0xd5,0x7c,0x00,0x00,0x00]
+
+v_exp_bf16_e64 v5, -1
+// GFX1250: v_exp_bf16_e64 v5, -1 ; encoding: [0x05,0x00,0xfd,0xd5,0xc1,0x00,0x00,0x00]
+
+v_exp_bf16_e64 v5, 0.5 mul:2
+// GFX1250: v_exp_bf16_e64 v5, 0.5 mul:2 ; encoding: [0x05,0x00,0xfd,0xd5,0xf0,0x00,0x00,0x08]
+
+v_exp_bf16_e64 v5, src_scc mul:4
+// GFX1250: v_exp_bf16_e64 v5, src_scc mul:4 ; encoding: [0x05,0x00,0xfd,0xd5,0xfd,0x00,0x00,0x10]
+
+v_exp_bf16_e64 v255, -|0x8000| clamp div:2
+// GFX1250: v_exp_bf16_e64 v255, -|0x8000| clamp div:2 ; encoding: [0xff,0x81,0xfd,0xd5,0xff,0x00,0x00,0x38,0x00,0x80,0x00,0x00]
+
+v_exp_bf16 v5.h, v128.h
+// GFX1250: v_exp_bf16_e64 v5.h, v128.h op_sel:[1,1] ; encoding: [0x05,0x48,0xfd,0xd5,0x80,0x01,0x00,0x00]
+
v_cvt_f32_bf16_e64 v5, v1
// GFX1250: v_cvt_f32_bf16_e64 v5, v1 ; encoding: [0x05,0x00,0xf2,0xd5,0x01,0x01,0x00,0x00]
diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1_dpp16-fake16.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1_dpp16-fake16.s
index 4231fcf7c5e92..182315f93b2b2 100644
--- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1_dpp16-fake16.s
+++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1_dpp16-fake16.s
@@ -282,6 +282,62 @@ v_log_bf16_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask
// GFX1250: v_log_bf16_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x81,0xfc,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x05,0x30]
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+v_exp_bf16_e64_dpp v5, v1 quad_perm:[3,2,1,0]
+// GFX1250: v_exp_bf16_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xfd,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_exp_bf16_e64_dpp v5, v1 quad_perm:[0,1,2,3]
+// GFX1250: v_exp_bf16_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xfd,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_exp_bf16_e64_dpp v5, v1 row_mirror
+// GFX1250: v_exp_bf16_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xfd,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_exp_bf16_e64_dpp v5, v1 row_half_mirror
+// GFX1250: v_exp_bf16_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xfd,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_exp_bf16_e64_dpp v5, v1 row_shl:1
+// GFX1250: v_exp_bf16_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xfd,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_exp_bf16_e64_dpp v5, v1 row_shl:15
+// GFX1250: v_exp_bf16_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xfd,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_exp_bf16_e64_dpp v5, v1 row_shr:1
+// GFX1250: v_exp_bf16_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xfd,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_exp_bf16_e64_dpp v5, v1 row_shr:15
+// GFX1250: v_exp_bf16_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xfd,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_exp_bf16_e64_dpp v5, v1 row_ror:1
+// GFX1250: v_exp_bf16_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xfd,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_exp_bf16_e64_dpp v5, v1 row_ror:15
+// GFX1250: v_exp_bf16_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xfd,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_exp_bf16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf
+// GFX1250: v_exp_bf16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xfd,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_exp_bf16_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX1250: v_exp_bf16_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0xfd,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_exp_bf16_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX1250: v_exp_bf16_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x00,0xfd,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x09,0x13]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_exp_bf16_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX1250: v_exp_bf16_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x81,0xfd,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x05,0x30]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
v_cvt_f32_bf16_e64_dpp v5, v1 quad_perm:[3,2,1,0]
// GFX1250: v_cvt_f32_bf16_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xf2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff]
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1_dpp16.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1_dpp16.s
index 1a094e285e730..da02b07191a62 100644
--- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1_dpp16.s
+++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1_dpp16.s
@@ -302,6 +302,66 @@ v_log_bf16_e64_dpp v5.h, v128.h quad_perm:[3,2,1,0]
// GFX1250: v_log_bf16_e64_dpp v5.h, v128.h op_sel:[1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x48,0xfc,0xd5,0xfa,0x00,0x00,0x00,0x80,0x1b,0x00,0xff]
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+v_exp_bf16_e64_dpp v5, v1 quad_perm:[3,2,1,0]
+// GFX1250: v_exp_bf16_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xfd,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_exp_bf16_e64_dpp v5, v1 quad_perm:[0,1,2,3]
+// GFX1250: v_exp_bf16_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xfd,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_exp_bf16_e64_dpp v5, v1 row_mirror
+// GFX1250: v_exp_bf16_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xfd,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_exp_bf16_e64_dpp v5, v1 row_half_mirror
+// GFX1250: v_exp_bf16_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xfd,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_exp_bf16_e64_dpp v5, v1 row_shl:1
+// GFX1250: v_exp_bf16_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xfd,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_exp_bf16_e64_dpp v5, v1 row_shl:15
+// GFX1250: v_exp_bf16_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xfd,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_exp_bf16_e64_dpp v5, v1 row_shr:1
+// GFX1250: v_exp_bf16_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xfd,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_exp_bf16_e64_dpp v5, v1 row_shr:15
+// GFX1250: v_exp_bf16_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xfd,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_exp_bf16_e64_dpp v5, v1 row_ror:1
+// GFX1250: v_exp_bf16_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xfd,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_exp_bf16_e64_dpp v5, v1 row_ror:15
+// GFX1250: v_exp_bf16_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xfd,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_exp_bf16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf
+// GFX1250: v_exp_bf16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xfd,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_exp_bf16_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX1250: v_exp_bf16_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0xfd,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_exp_bf16_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX1250: v_exp_bf16_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x00,0xfd,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x09,0x13]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_exp_bf16_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX1250: v_exp_bf16_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x81,0xfd,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x05,0x30]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_exp_bf16_e64_dpp v5.h, v128.h quad_perm:[3,2,1,0]
+// GFX1250: v_exp_bf16_e64_dpp v5.h, v128.h op_sel:[1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x48,0xfd,0xd5,0xfa,0x00,0x00,0x00,0x80,0x1b,0x00,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
v_cvt_f32_bf16_e64_dpp v5, v1 quad_perm:[3,2,1,0]
// GFX1250: v_cvt_f32_bf16_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xf2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff]
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1_dpp8-fake16.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1_dpp8-fake16.s
index f6a2103ed9077..744ea732ad95c 100644
--- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1_dpp8-fake16.s
+++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1_dpp8-fake16.s
@@ -82,6 +82,22 @@ v_log_bf16_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0
// GFX1250: v_log_bf16_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x81,0xfc,0xd5,0xe9,0x00,0x00,0x38,0xff,0x00,0x00,0x00]
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+v_exp_bf16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0]
+// GFX1250: v_exp_bf16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xfd,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_exp_bf16_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX1250: v_exp_bf16_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xfd,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_exp_bf16_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX1250: v_exp_bf16_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x00,0xfd,0xd5,0xea,0x00,0x00,0x10,0x01,0x77,0x39,0x05]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_exp_bf16_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX1250: v_exp_bf16_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x81,0xfd,0xd5,0xe9,0x00,0x00,0x38,0xff,0x00,0x00,0x00]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
v_cvt_f32_bf16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0]
// GFX1250: v_cvt_f32_bf16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xf2,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05]
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1_dpp8.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1_dpp8.s
index 5a1b1414dda37..8bf45cb4345fe 100644
--- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1_dpp8.s
+++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1_dpp8.s
@@ -102,6 +102,26 @@ v_log_bf16_e64_dpp v5.h, v128.h dpp8:[7,6,5,4,3,2,1,0]
// GFX1250: v_log_bf16_e64_dpp v5.h, v128.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x48,0xfc,0xd5,0xe9,0x00,0x00,0x00,0x80,0x77,0x39,0x05]
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+v_exp_bf16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0]
+// GFX1250: v_exp_bf16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xfd,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_exp_bf16_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX1250: v_exp_bf16_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xfd,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_exp_bf16_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX1250: v_exp_bf16_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x00,0xfd,0xd5,0xea,0x00,0x00,0x10,0x01,0x77,0x39,0x05]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_exp_bf16_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX1250: v_exp_bf16_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x81,0xfd,0xd5,0xe9,0x00,0x00,0x38,0xff,0x00,0x00,0x00]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_exp_bf16_e64_dpp v5.h, v128.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX1250: v_exp_bf16_e64_dpp v5.h, v128.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x48,0xfd,0xd5,0xe9,0x00,0x00,0x00,0x80,0x77,0x39,0x05]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
v_cvt_f32_bf16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0]
// GFX1250: v_cvt_f32_bf16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xf2,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05]
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop1.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop1.txt
index c318dd7fc4ee0..c1ea84585a66f 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop1.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop1.txt
@@ -344,6 +344,69 @@
0x81,0xf9,0x0a,0x7f
# GFX1250-REAL16: v_log_bf16_e32 v5.h, v1.h ; encoding: [0x81,0xf9,0x0a,0x7f]
+0xff,0xfa,0xfe,0x7e,0x00,0x80,0x00,0x00
+# GFX1250-REAL16: v_exp_bf16_e32 v127.l, 0x8000 ; encoding: [0xff,0xfa,0xfe,0x7e,0x00,0x80,0x00,0x00]
+# GFX1250-FAKE16: v_exp_bf16_e32 v127, 0x8000 ; encoding: [0xff,0xfa,0xfe,0x7e,0x00,0x80,0x00,0x00]
+
+0xc1,0xfa,0x0a,0x7e
+# GFX1250-REAL16: v_exp_bf16_e32 v5.l, -1 ; encoding: [0xc1,0xfa,0x0a,0x7e]
+# GFX1250-FAKE16: v_exp_bf16_e32 v5, -1 ; encoding: [0xc1,0xfa,0x0a,0x7e]
+
+0xf0,0xfa,0x0a,0x7e
+# GFX1250-REAL16: v_exp_bf16_e32 v5.l, 0.5 ; encoding: [0xf0,0xfa,0x0a,0x7e]
+# GFX1250-FAKE16: v_exp_bf16_e32 v5, 0.5 ; encoding: [0xf0,0xfa,0x0a,0x7e]
+
+0x7f,0xfa,0x0a,0x7e
+# GFX1250-REAL16: v_exp_bf16_e32 v5.l, exec_hi ; encoding: [0x7f,0xfa,0x0a,0x7e]
+# GFX1250-FAKE16: v_exp_bf16_e32 v5, exec_hi ; encoding: [0x7f,0xfa,0x0a,0x7e]
+
+0x7e,0xfa,0x0a,0x7e
+# GFX1250-REAL16: v_exp_bf16_e32 v5.l, exec_lo ; encoding: [0x7e,0xfa,0x0a,0x7e]
+# GFX1250-FAKE16: v_exp_bf16_e32 v5, exec_lo ; encoding: [0x7e,0xfa,0x0a,0x7e]
+
+0x7d,0xfa,0x0a,0x7e
+# GFX1250-REAL16: v_exp_bf16_e32 v5.l, m0 ; encoding: [0x7d,0xfa,0x0a,0x7e]
+# GFX1250-FAKE16: v_exp_bf16_e32 v5, m0 ; encoding: [0x7d,0xfa,0x0a,0x7e]
+
+0x7c,0xfa,0x0a,0x7e
+# GFX1250-REAL16: v_exp_bf16_e32 v5.l, null ; encoding: [0x7c,0xfa,0x0a,0x7e]
+# GFX1250-FAKE16: v_exp_bf16_e32 v5, null ; encoding: [0x7c,0xfa,0x0a,0x7e]
+
+0x01,0xfa,0x0a,0x7e
+# GFX1250-REAL16: v_exp_bf16_e32 v5.l, s1 ; encoding: [0x01,0xfa,0x0a,0x7e]
+# GFX1250-FAKE16: v_exp_bf16_e32 v5, s1 ; encoding: [0x01,0xfa,0x0a,0x7e]
+
+0x69,0xfa,0x0a,0x7e
+# GFX1250-REAL16: v_exp_bf16_e32 v5.l, s105 ; encoding: [0x69,0xfa,0x0a,0x7e]
+# GFX1250-FAKE16: v_exp_bf16_e32 v5, s105 ; encoding: [0x69,0xfa,0x0a,0x7e]
+
+0xfd,0xfa,0x0a,0x7e
+# GFX1250-REAL16: v_exp_bf16_e32 v5.l, src_scc ; encoding: [0xfd,0xfa,0x0a,0x7e]
+# GFX1250-FAKE16: v_exp_bf16_e32 v5, src_scc ; encoding: [0xfd,0xfa,0x0a,0x7e]
+
+0x7b,0xfa,0x0a,0x7e
+# GFX1250-REAL16: v_exp_bf16_e32 v5.l, ttmp15 ; encoding: [0x7b,0xfa,0x0a,0x7e]
+# GFX1250-FAKE16: v_exp_bf16_e32 v5, ttmp15 ; encoding: [0x7b,0xfa,0x0a,0x7e]
+
+0x01,0xfb,0x0a,0x7e
+# GFX1250-REAL16: v_exp_bf16_e32 v5.l, v1.l ; encoding: [0x01,0xfb,0x0a,0x7e]
+# GFX1250-FAKE16: v_exp_bf16_e32 v5, v1 ; encoding: [0x01,0xfb,0x0a,0x7e]
+
+0x7f,0xfb,0x0a,0x7e
+# GFX1250-REAL16: v_exp_bf16_e32 v5.l, v127.l ; encoding: [0x7f,0xfb,0x0a,0x7e]
+# GFX1250-FAKE16: v_exp_bf16_e32 v5, v127 ; encoding: [0x7f,0xfb,0x0a,0x7e]
+
+0x6b,0xfa,0x0a,0x7e
+# GFX1250-REAL16: v_exp_bf16_e32 v5.l, vcc_hi ; encoding: [0x6b,0xfa,0x0a,0x7e]
+# GFX1250-FAKE16: v_exp_bf16_e32 v5, vcc_hi ; encoding: [0x6b,0xfa,0x0a,0x7e]
+
+0x6a,0xfa,0x0a,0x7e
+# GFX1250-REAL16: v_exp_bf16_e32 v5.l, vcc_lo ; encoding: [0x6a,0xfa,0x0a,0x7e]
+# GFX1250-FAKE16: v_exp_bf16_e32 v5, vcc_lo ; encoding: [0x6a,0xfa,0x0a,0x7e]
+
+0x81,0xfb,0x0a,0x7f
+# GFX1250-REAL16: v_exp_bf16_e32 v5.h, v1.h ; encoding: [0x81,0xfb,0x0a,0x7f]
+
0xff,0xe4,0xfe,0x7e,0x00,0x80,0x00,0x00
# GFX1250: v_cvt_f32_bf16_e32 v127, 0x8000 ; encoding: [0xff,0xe4,0xfe,0x7e,0x00,0x80,0x00,0x00]
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop1_dpp16.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop1_dpp16.txt
index 22ed09e957de7..bb5f1442920fd 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop1_dpp16.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop1_dpp16.txt
@@ -297,6 +297,65 @@
0xfa,0xf8,0x0a,0x7f,0x81,0x1b,0x00,0xff
# GFX1250-REAL16: v_log_bf16_dpp v5.h, v1.h quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xf8,0x0a,0x7f,0x81,0x1b,0x00,0xff]
+0xfa,0xfa,0xfe,0x7e,0x7f,0x6f,0x35,0x30
+# GFX1250-REAL16: v_exp_bf16_dpp v127.l, -|v127.l| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xfa,0xfe,0x7e,0x7f,0x6f,0x35,0x30]
+# GFX1250-FAKE16: v_exp_bf16_dpp v127, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xfa,0xfe,0x7e,0x7f,0x6f,0x35,0x30]
+
+0xfa,0xfa,0x0a,0x7e,0x01,0xe4,0x00,0xff
+# GFX1250-REAL16: v_exp_bf16_dpp v5.l, v1.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xfa,0x0a,0x7e,0x01,0xe4,0x00,0xff]
+# GFX1250-FAKE16: v_exp_bf16_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xfa,0x0a,0x7e,0x01,0xe4,0x00,0xff]
+
+0xfa,0xfa,0x0a,0x7e,0x01,0x1b,0x00,0xff
+# GFX1250-REAL16: v_exp_bf16_dpp v5.l, v1.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xfa,0x0a,0x7e,0x01,0x1b,0x00,0xff]
+# GFX1250-FAKE16: v_exp_bf16_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xfa,0x0a,0x7e,0x01,0x1b,0x00,0xff]
+
+0xfa,0xfa,0x0a,0x7e,0x01,0x41,0x01,0xff
+# GFX1250-REAL16: v_exp_bf16_dpp v5.l, v1.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xfa,0x0a,0x7e,0x01,0x41,0x01,0xff]
+# GFX1250-FAKE16: v_exp_bf16_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xfa,0x0a,0x7e,0x01,0x41,0x01,0xff]
+
+0xfa,0xfa,0x0a,0x7e,0x01,0x40,0x01,0xff
+# GFX1250-REAL16: v_exp_bf16_dpp v5.l, v1.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xfa,0x0a,0x7e,0x01,0x40,0x01,0xff]
+# GFX1250-FAKE16: v_exp_bf16_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xfa,0x0a,0x7e,0x01,0x40,0x01,0xff]
+
+0xfa,0xfa,0x0a,0x7e,0x01,0x21,0x01,0xff
+# GFX1250-REAL16: v_exp_bf16_dpp v5.l, v1.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xfa,0x0a,0x7e,0x01,0x21,0x01,0xff]
+# GFX1250-FAKE16: v_exp_bf16_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xfa,0x0a,0x7e,0x01,0x21,0x01,0xff]
+
+0xfa,0xfa,0x0a,0x7e,0x01,0x2f,0x01,0xff
+# GFX1250-REAL16: v_exp_bf16_dpp v5.l, v1.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xfa,0x0a,0x7e,0x01,0x2f,0x01,0xff]
+# GFX1250-FAKE16: v_exp_bf16_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xfa,0x0a,0x7e,0x01,0x2f,0x01,0xff]
+
+0xfa,0xfa,0x0a,0x7e,0x01,0x50,0x01,0xff
+# GFX1250-REAL16: v_exp_bf16_dpp v5.l, v1.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xfa,0x0a,0x7e,0x01,0x50,0x01,0xff]
+# GFX1250-FAKE16: v_exp_bf16_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xfa,0x0a,0x7e,0x01,0x50,0x01,0xff]
+
+0xfa,0xfa,0x0a,0x7e,0x01,0x5f,0x01,0x01
+# GFX1250-REAL16: v_exp_bf16_dpp v5.l, v1.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0xfa,0x0a,0x7e,0x01,0x5f,0x01,0x01]
+# GFX1250-FAKE16: v_exp_bf16_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0xfa,0x0a,0x7e,0x01,0x5f,0x01,0x01]
+
+0xfa,0xfa,0x0a,0x7e,0x01,0x01,0x01,0xff
+# GFX1250-REAL16: v_exp_bf16_dpp v5.l, v1.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xfa,0x0a,0x7e,0x01,0x01,0x01,0xff]
+# GFX1250-FAKE16: v_exp_bf16_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xfa,0x0a,0x7e,0x01,0x01,0x01,0xff]
+
+0xfa,0xfa,0x0a,0x7e,0x01,0x0f,0x01,0xff
+# GFX1250-REAL16: v_exp_bf16_dpp v5.l, v1.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xfa,0x0a,0x7e,0x01,0x0f,0x01,0xff]
+# GFX1250-FAKE16: v_exp_bf16_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xfa,0x0a,0x7e,0x01,0x0f,0x01,0xff]
+
+0xfa,0xfa,0x0a,0x7e,0x01,0x11,0x01,0xff
+# GFX1250-REAL16: v_exp_bf16_dpp v5.l, v1.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xfa,0x0a,0x7e,0x01,0x11,0x01,0xff]
+# GFX1250-FAKE16: v_exp_bf16_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xfa,0x0a,0x7e,0x01,0x11,0x01,0xff]
+
+0xfa,0xfa,0x0a,0x7e,0x01,0x1f,0x01,0xff
+# GFX1250-REAL16: v_exp_bf16_dpp v5.l, v1.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xfa,0x0a,0x7e,0x01,0x1f,0x01,0xff]
+# GFX1250-FAKE16: v_exp_bf16_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xfa,0x0a,0x7e,0x01,0x1f,0x01,0xff]
+
+0xfa,0xfa,0x0a,0x7e,0x01,0x60,0x09,0x13
+# GFX1250-REAL16: v_exp_bf16_dpp v5.l, v1.l row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0xfa,0x0a,0x7e,0x01,0x60,0x09,0x13]
+# GFX1250-FAKE16: v_exp_bf16_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0xfa,0x0a,0x7e,0x01,0x60,0x09,0x13]
+
+0xfa,0xfa,0x0a,0x7f,0x81,0x1b,0x00,0xff
+# GFX1250-REAL16: v_exp_bf16_dpp v5.h, v1.h quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xfa,0x0a,0x7f,0x81,0x1b,0x00,0xff]
+
0xfa,0xe4,0xfe,0x7e,0x7f,0x6f,0x35,0x30
# GFX1250: v_cvt_f32_bf16_dpp v127, -|v127.l| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xe4,0xfe,0x7e,0x7f,0x6f,0x35,0x30]
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop1_dpp8.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop1_dpp8.txt
index d8458e8808b39..1b7da587d20fd 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop1_dpp8.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop1_dpp8.txt
@@ -80,6 +80,21 @@
0xe9,0xf8,0x0a,0x7f,0x81,0x77,0x39,0x05
# GFX1250-REAL16: v_log_bf16_dpp v5.h, v1.h dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xf8,0x0a,0x7f,0x81,0x77,0x39,0x05]
+0xe9,0xfa,0xfe,0x7e,0x7f,0x00,0x00,0x00
+# GFX1250-REAL16: v_exp_bf16_dpp v127.l, v127.l dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xfa,0xfe,0x7e,0x7f,0x00,0x00,0x00]
+# GFX1250-FAKE16: v_exp_bf16_dpp v127, v127 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xfa,0xfe,0x7e,0x7f,0x00,0x00,0x00]
+
+0xe9,0xfa,0x0a,0x7e,0x01,0x77,0x39,0x05
+# GFX1250-REAL16: v_exp_bf16_dpp v5.l, v1.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xfa,0x0a,0x7e,0x01,0x77,0x39,0x05]
+# GFX1250-FAKE16: v_exp_bf16_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xfa,0x0a,0x7e,0x01,0x77,0x39,0x05]
+
+0xea,0xfa,0x0a,0x7e,0x01,0x77,0x39,0x05
+# GFX1250-REAL16: v_exp_bf16_dpp v5.l, v1.l dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0xfa,0x0a,0x7e,0x01,0x77,0x39,0x05]
+# GFX1250-FAKE16: v_exp_bf16_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0xfa,0x0a,0x7e,0x01,0x77,0x39,0x05]
+
+0xe9,0xfa,0x0a,0x7f,0x81,0x77,0x39,0x05
+# GFX1250-REAL16: v_exp_bf16_dpp v5.h, v1.h dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xfa,0x0a,0x7f,0x81,0x77,0x39,0x05]
+
0xe9,0xe4,0xfe,0x7e,0x7f,0x00,0x00,0x00
# GFX1250: v_cvt_f32_bf16_dpp v127, v127.l dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xe4,0xfe,0x7e,0x7f,0x00,0x00,0x00]
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3_from_vop1.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3_from_vop1.txt
index d1a7158ce582e..43f6f5d66f25a 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3_from_vop1.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3_from_vop1.txt
@@ -322,6 +322,70 @@
# GFX1250-REAL16: v_log_bf16_e64 v5.h, v128.h op_sel:[1,1] ; encoding: [0x05,0x48,0xfc,0xd5,0x80,0x01,0x00,0x00]
# GFX1250-FAKE16: v_log_bf16_e64 v5, v128 ; encoding: [0x05,0x00,0xfc,0xd5,0x80,0x01,0x00,0x00]
+0xff,0x81,0xfd,0xd5,0xff,0x00,0x00,0x38,0x00,0x80,0x00,0x00
+# GFX1250-REAL16: v_exp_bf16_e64 v255.l, -|0x8000| clamp div:2 ; encoding: [0xff,0x81,0xfd,0xd5,0xff,0x00,0x00,0x38,0x00,0x80,0x00,0x00]
+# GFX1250-FAKE16: v_exp_bf16_e64 v255, -|0x8000| clamp div:2 ; encoding: [0xff,0x81,0xfd,0xd5,0xff,0x00,0x00,0x38,0x00,0x80,0x00,0x00]
+
+0x05,0x00,0xfd,0xd5,0xc1,0x00,0x00,0x00
+# GFX1250-REAL16: v_exp_bf16_e64 v5.l, -1 ; encoding: [0x05,0x00,0xfd,0xd5,0xc1,0x00,0x00,0x00]
+# GFX1250-FAKE16: v_exp_bf16_e64 v5, -1 ; encoding: [0x05,0x00,0xfd,0xd5,0xc1,0x00,0x00,0x00]
+
+0x05,0x00,0xfd,0xd5,0xf0,0x00,0x00,0x08
+# GFX1250-REAL16: v_exp_bf16_e64 v5.l, 0.5 mul:2 ; encoding: [0x05,0x00,0xfd,0xd5,0xf0,0x00,0x00,0x08]
+# GFX1250-FAKE16: v_exp_bf16_e64 v5, 0.5 mul:2 ; encoding: [0x05,0x00,0xfd,0xd5,0xf0,0x00,0x00,0x08]
+
+0x05,0x00,0xfd,0xd5,0x7f,0x00,0x00,0x00
+# GFX1250-REAL16: v_exp_bf16_e64 v5.l, exec_hi ; encoding: [0x05,0x00,0xfd,0xd5,0x7f,0x00,0x00,0x00]
+# GFX1250-FAKE16: v_exp_bf16_e64 v5, exec_hi ; encoding: [0x05,0x00,0xfd,0xd5,0x7f,0x00,0x00,0x00]
+
+0x05,0x00,0xfd,0xd5,0x7e,0x00,0x00,0x00
+# GFX1250-REAL16: v_exp_bf16_e64 v5.l, exec_lo ; encoding: [0x05,0x00,0xfd,0xd5,0x7e,0x00,0x00,0x00]
+# GFX1250-FAKE16: v_exp_bf16_e64 v5, exec_lo ; encoding: [0x05,0x00,0xfd,0xd5,0x7e,0x00,0x00,0x00]
+
+0x05,0x00,0xfd,0xd5,0x7d,0x00,0x00,0x00
+# GFX1250-REAL16: v_exp_bf16_e64 v5.l, m0 ; encoding: [0x05,0x00,0xfd,0xd5,0x7d,0x00,0x00,0x00]
+# GFX1250-FAKE16: v_exp_bf16_e64 v5, m0 ; encoding: [0x05,0x00,0xfd,0xd5,0x7d,0x00,0x00,0x00]
+
+0x05,0x00,0xfd,0xd5,0x7c,0x00,0x00,0x00
+# GFX1250-REAL16: v_exp_bf16_e64 v5.l, null ; encoding: [0x05,0x00,0xfd,0xd5,0x7c,0x00,0x00,0x00]
+# GFX1250-FAKE16: v_exp_bf16_e64 v5, null ; encoding: [0x05,0x00,0xfd,0xd5,0x7c,0x00,0x00,0x00]
+
+0x05,0x00,0xfd,0xd5,0x01,0x00,0x00,0x00
+# GFX1250-REAL16: v_exp_bf16_e64 v5.l, s1 ; encoding: [0x05,0x00,0xfd,0xd5,0x01,0x00,0x00,0x00]
+# GFX1250-FAKE16: v_exp_bf16_e64 v5, s1 ; encoding: [0x05,0x00,0xfd,0xd5,0x01,0x00,0x00,0x00]
+
+0x05,0x00,0xfd,0xd5,0x69,0x00,0x00,0x00
+# GFX1250-REAL16: v_exp_bf16_e64 v5.l, s105 ; encoding: [0x05,0x00,0xfd,0xd5,0x69,0x00,0x00,0x00]
+# GFX1250-FAKE16: v_exp_bf16_e64 v5, s105 ; encoding: [0x05,0x00,0xfd,0xd5,0x69,0x00,0x00,0x00]
+
+0x05,0x00,0xfd,0xd5,0xfd,0x00,0x00,0x10
+# GFX1250-REAL16: v_exp_bf16_e64 v5.l, src_scc mul:4 ; encoding: [0x05,0x00,0xfd,0xd5,0xfd,0x00,0x00,0x10]
+# GFX1250-FAKE16: v_exp_bf16_e64 v5, src_scc mul:4 ; encoding: [0x05,0x00,0xfd,0xd5,0xfd,0x00,0x00,0x10]
+
+0x05,0x00,0xfd,0xd5,0x7b,0x00,0x00,0x00
+# GFX1250-REAL16: v_exp_bf16_e64 v5.l, ttmp15 ; encoding: [0x05,0x00,0xfd,0xd5,0x7b,0x00,0x00,0x00]
+# GFX1250-FAKE16: v_exp_bf16_e64 v5, ttmp15 ; encoding: [0x05,0x00,0xfd,0xd5,0x7b,0x00,0x00,0x00]
+
+0x05,0x00,0xfd,0xd5,0x01,0x01,0x00,0x00
+# GFX1250-REAL16: v_exp_bf16_e64 v5.l, v1.l ; encoding: [0x05,0x00,0xfd,0xd5,0x01,0x01,0x00,0x00]
+# GFX1250-FAKE16: v_exp_bf16_e64 v5, v1 ; encoding: [0x05,0x00,0xfd,0xd5,0x01,0x01,0x00,0x00]
+
+0x05,0x00,0xfd,0xd5,0xff,0x01,0x00,0x00
+# GFX1250-REAL16: v_exp_bf16_e64 v5.l, v255.l ; encoding: [0x05,0x00,0xfd,0xd5,0xff,0x01,0x00,0x00]
+# GFX1250-FAKE16: v_exp_bf16_e64 v5, v255 ; encoding: [0x05,0x00,0xfd,0xd5,0xff,0x01,0x00,0x00]
+
+0x05,0x00,0xfd,0xd5,0x6b,0x00,0x00,0x00
+# GFX1250-REAL16: v_exp_bf16_e64 v5.l, vcc_hi ; encoding: [0x05,0x00,0xfd,0xd5,0x6b,0x00,0x00,0x00]
+# GFX1250-FAKE16: v_exp_bf16_e64 v5, vcc_hi ; encoding: [0x05,0x00,0xfd,0xd5,0x6b,0x00,0x00,0x00]
+
+0x05,0x00,0xfd,0xd5,0x6a,0x00,0x00,0x00
+# GFX1250-REAL16: v_exp_bf16_e64 v5.l, vcc_lo ; encoding: [0x05,0x00,0xfd,0xd5,0x6a,0x00,0x00,0x00]
+# GFX1250-FAKE16: v_exp_bf16_e64 v5, vcc_lo ; encoding: [0x05,0x00,0xfd,0xd5,0x6a,0x00,0x00,0x00]
+
+0x05,0x48,0xfd,0xd5,0x80,0x01,0x00,0x00
+# GFX1250-REAL16: v_exp_bf16_e64 v5.h, v128.h op_sel:[1,1] ; encoding: [0x05,0x48,0xfd,0xd5,0x80,0x01,0x00,0x00]
+# GFX1250-FAKE16: v_exp_bf16_e64 v5, v128 ; encoding: [0x05,0x00,0xfd,0xd5,0x80,0x01,0x00,0x00]
+
0x01,0x00,0xed,0xd5,0x83,0x00,0x00,0x00
# GFX1250: v_cvt_f32_bf8_e64 v1, 3 ; encoding: [0x01,0x00,0xed,0xd5,0x83,0x00,0x00,0x00]
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3_from_vop1_dpp16.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3_from_vop1_dpp16.txt
index 56f65d0711664..016a669e9ae5c 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3_from_vop1_dpp16.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3_from_vop1_dpp16.txt
@@ -122,6 +122,66 @@
# GFX1250-REAL16: v_log_bf16_e64_dpp v5.h, v128.h op_sel:[1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x48,0xfc,0xd5,0xfa,0x00,0x00,0x00,0x80,0x1b,0x00,0xff]
# GFX1250-FAKE16: v_log_bf16_e64_dpp v5, v128 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xfc,0xd5,0xfa,0x00,0x00,0x00,0x80,0x1b,0x00,0xff]
+0xff,0x81,0xfd,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x05,0x30
+# GFX1250-REAL16: v_exp_bf16_e64_dpp v255.l, -|v255.l| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x81,0xfd,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x05,0x30]
+# GFX1250-FAKE16: v_exp_bf16_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x81,0xfd,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x05,0x30]
+
+0x05,0x00,0xfd,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01
+# GFX1250-REAL16: v_exp_bf16_e64_dpp v5.l, v1.l mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0xfd,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01]
+# GFX1250-FAKE16: v_exp_bf16_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0xfd,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01]
+
+0x05,0x00,0xfd,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x09,0x13
+# GFX1250-REAL16: v_exp_bf16_e64_dpp v5.l, v1.l mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x00,0xfd,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x09,0x13]
+# GFX1250-FAKE16: v_exp_bf16_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x00,0xfd,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x09,0x13]
+
+0x05,0x00,0xfd,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff
+# GFX1250-REAL16: v_exp_bf16_e64_dpp v5.l, v1.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xfd,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff]
+# GFX1250-FAKE16: v_exp_bf16_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xfd,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff]
+
+0x05,0x00,0xfd,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff
+# GFX1250-REAL16: v_exp_bf16_e64_dpp v5.l, v1.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xfd,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff]
+# GFX1250-FAKE16: v_exp_bf16_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xfd,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff]
+
+0x05,0x00,0xfd,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff
+# GFX1250-REAL16: v_exp_bf16_e64_dpp v5.l, v1.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xfd,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff]
+# GFX1250-FAKE16: v_exp_bf16_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xfd,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff]
+
+0x05,0x00,0xfd,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff
+# GFX1250-REAL16: v_exp_bf16_e64_dpp v5.l, v1.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xfd,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff]
+# GFX1250-FAKE16: v_exp_bf16_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xfd,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff]
+
+0x05,0x00,0xfd,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff
+# GFX1250-REAL16: v_exp_bf16_e64_dpp v5.l, v1.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xfd,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff]
+# GFX1250-FAKE16: v_exp_bf16_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xfd,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff]
+
+0x05,0x00,0xfd,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff
+# GFX1250-REAL16: v_exp_bf16_e64_dpp v5.l, v1.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xfd,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff]
+# GFX1250-FAKE16: v_exp_bf16_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xfd,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff]
+
+0x05,0x00,0xfd,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff
+# GFX1250-REAL16: v_exp_bf16_e64_dpp v5.l, v1.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xfd,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff]
+# GFX1250-FAKE16: v_exp_bf16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xfd,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff]
+
+0x05,0x00,0xfd,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff
+# GFX1250-REAL16: v_exp_bf16_e64_dpp v5.l, v1.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xfd,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff]
+# GFX1250-FAKE16: v_exp_bf16_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xfd,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff]
+
+0x05,0x00,0xfd,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff
+# GFX1250-REAL16: v_exp_bf16_e64_dpp v5.l, v1.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xfd,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff]
+# GFX1250-FAKE16: v_exp_bf16_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xfd,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff]
+
+0x05,0x00,0xfd,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff
+# GFX1250-REAL16: v_exp_bf16_e64_dpp v5.l, v1.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xfd,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff]
+# GFX1250-FAKE16: v_exp_bf16_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xfd,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff]
+
+0x05,0x00,0xfd,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff
+# GFX1250-REAL16: v_exp_bf16_e64_dpp v5.l, v1.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xfd,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff]
+# GFX1250-FAKE16: v_exp_bf16_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xfd,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff]
+
+0x05,0x48,0xfd,0xd5,0xfa,0x00,0x00,0x00,0x80,0x1b,0x00,0xff
+# GFX1250-REAL16: v_exp_bf16_e64_dpp v5.h, v128.h op_sel:[1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x48,0xfd,0xd5,0xfa,0x00,0x00,0x00,0x80,0x1b,0x00,0xff]
+# GFX1250-FAKE16: v_exp_bf16_e64_dpp v5, v128 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xfd,0xd5,0xfa,0x00,0x00,0x00,0x80,0x1b,0x00,0xff]
+
0xff,0x81,0xf9,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x05,0x30
# GFX1250-REAL16: v_rcp_bf16_e64_dpp v255.l, -|v255.l| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x81,0xf9,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x05,0x30]
# GFX1250-FAKE16: v_rcp_bf16_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x81,0xf9,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x05,0x30]
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3_from_vop1_dpp8.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3_from_vop1_dpp8.txt
index 9ff9e54c1b40c..cda17a850d9b6 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3_from_vop1_dpp8.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3_from_vop1_dpp8.txt
@@ -42,6 +42,26 @@
# GFX1250-REAL16: v_log_bf16_e64_dpp v5.h, v128.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x48,0xfc,0xd5,0xe9,0x00,0x00,0x00,0x80,0x77,0x39,0x05]
# GFX1250-FAKE16: v_log_bf16_e64_dpp v5, v128 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xfc,0xd5,0xe9,0x00,0x00,0x00,0x80,0x77,0x39,0x05]
+0xff,0x81,0xfd,0xd5,0xe9,0x00,0x00,0x38,0xff,0x00,0x00,0x00
+# GFX1250-REAL16: v_exp_bf16_e64_dpp v255.l, -|v255.l| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x81,0xfd,0xd5,0xe9,0x00,0x00,0x38,0xff,0x00,0x00,0x00]
+# GFX1250-FAKE16: v_exp_bf16_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x81,0xfd,0xd5,0xe9,0x00,0x00,0x38,0xff,0x00,0x00,0x00]
+
+0x05,0x00,0xfd,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05
+# GFX1250-REAL16: v_exp_bf16_e64_dpp v5.l, v1.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xfd,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05]
+# GFX1250-FAKE16: v_exp_bf16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xfd,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05]
+
+0x05,0x00,0xfd,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05
+# GFX1250-REAL16: v_exp_bf16_e64_dpp v5.l, v1.l mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xfd,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05]
+# GFX1250-FAKE16: v_exp_bf16_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xfd,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05]
+
+0x05,0x00,0xfd,0xd5,0xea,0x00,0x00,0x10,0x01,0x77,0x39,0x05
+# GFX1250-REAL16: v_exp_bf16_e64_dpp v5.l, v1.l mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x00,0xfd,0xd5,0xea,0x00,0x00,0x10,0x01,0x77,0x39,0x05]
+# GFX1250-FAKE16: v_exp_bf16_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x00,0xfd,0xd5,0xea,0x00,0x00,0x10,0x01,0x77,0x39,0x05]
+
+0x05,0x48,0xfd,0xd5,0xe9,0x00,0x00,0x00,0x80,0x77,0x39,0x05
+# GFX1250-REAL16: v_exp_bf16_e64_dpp v5.h, v128.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x48,0xfd,0xd5,0xe9,0x00,0x00,0x00,0x80,0x77,0x39,0x05]
+# GFX1250-FAKE16: v_exp_bf16_e64_dpp v5, v128 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xfd,0xd5,0xe9,0x00,0x00,0x00,0x80,0x77,0x39,0x05]
+
0xff,0x81,0xf9,0xd5,0xe9,0x00,0x00,0x38,0xff,0x00,0x00,0x00
# GFX1250-REAL16: v_rcp_bf16_e64_dpp v255.l, -|v255.l| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x81,0xf9,0xd5,0xe9,0x00,0x00,0x38,0xff,0x00,0x00,0x00]
# GFX1250-FAKE16: v_rcp_bf16_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x81,0xf9,0xd5,0xe9,0x00,0x00,0x38,0xff,0x00,0x00,0x00]
More information about the llvm-commits
mailing list