[llvm-branch-commits] [llvm] [AMDGPU] Add support for `v_sqrt_bf16` on gfx1250 (PR #148921)
Shilei Tian via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Tue Jul 15 11:38:55 PDT 2025
https://github.com/shiltian created https://github.com/llvm/llvm-project/pull/148921
None
>From 6893db624adb4a4f76ea6b7acfb8b30387f8bfc5 Mon Sep 17 00:00:00 2001
From: Shilei Tian <i at tianshilei.me>
Date: Tue, 15 Jul 2025 14:31:34 -0400
Subject: [PATCH] [AMDGPU] Add support for `v_sqrt_bf16` on gfx1250
---
llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 4 +
llvm/lib/Target/AMDGPU/VOP1Instructions.td | 2 +
.../CodeGen/AMDGPU/llvm.amdgcn.sqrt.bf16.ll | 33 ++++++
llvm/test/CodeGen/AMDGPU/llvm.sqrt.bf16.ll | 106 ++++++++++++++++++
llvm/test/MC/AMDGPU/gfx1250_asm_vop1-fake16.s | 45 ++++++++
llvm/test/MC/AMDGPU/gfx1250_asm_vop1.s | 48 ++++++++
.../MC/AMDGPU/gfx1250_asm_vop1_dpp16-fake16.s | 56 +++++++++
llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp16.s | 60 ++++++++++
.../MC/AMDGPU/gfx1250_asm_vop1_dpp8-fake16.s | 12 ++
llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp8.s | 16 +++
.../gfx1250_asm_vop3_from_vop1-fake16.s | 45 ++++++++
.../MC/AMDGPU/gfx1250_asm_vop3_from_vop1.s | 48 ++++++++
.../gfx1250_asm_vop3_from_vop1_dpp16-fake16.s | 56 +++++++++
.../AMDGPU/gfx1250_asm_vop3_from_vop1_dpp16.s | 60 ++++++++++
.../gfx1250_asm_vop3_from_vop1_dpp8-fake16.s | 16 +++
.../AMDGPU/gfx1250_asm_vop3_from_vop1_dpp8.s | 20 ++++
.../Disassembler/AMDGPU/gfx1250_dasm_vop1.txt | 63 +++++++++++
.../AMDGPU/gfx1250_dasm_vop1_dpp16.txt | 59 ++++++++++
.../AMDGPU/gfx1250_dasm_vop1_dpp8.txt | 16 +++
.../AMDGPU/gfx1250_dasm_vop3_from_vop1.txt | 64 +++++++++++
.../gfx1250_dasm_vop3_from_vop1_dpp16.txt | 60 ++++++++++
.../gfx1250_dasm_vop3_from_vop1_dpp8.txt | 20 ++++
22 files changed, 909 insertions(+)
create mode 100644 llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sqrt.bf16.ll
create mode 100644 llvm/test/CodeGen/AMDGPU/llvm.sqrt.bf16.ll
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 6cf2055c8e565..e2acde04f35e5 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -938,6 +938,10 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
setOperationAction(ISD::BUILD_VECTOR, MVT::v2bf16, Legal);
}
+ if (Subtarget->hasBF16TransInsts()) {
+ setOperationAction({ISD::FEXP2, ISD::FLOG2, ISD::FSQRT}, MVT::bf16, Legal);
+ }
+
if (Subtarget->hasCvtPkF16F32Inst()) {
setOperationAction(ISD::FP_ROUND,
{MVT::v2f16, MVT::v4f16, MVT::v8f16, MVT::v16f16},
diff --git a/llvm/lib/Target/AMDGPU/VOP1Instructions.td b/llvm/lib/Target/AMDGPU/VOP1Instructions.td
index 28f239ba8c396..e2f371079179d 100644
--- a/llvm/lib/Target/AMDGPU/VOP1Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP1Instructions.td
@@ -530,6 +530,7 @@ defm V_COS_F16 : VOP1Inst_t16 <"v_cos_f16", VOP_F16_F16, AMDGPUcos>;
let SubtargetPredicate = HasBF16TransInsts in {
defm V_TANH_BF16 : VOP1Inst_t16 <"v_tanh_bf16", VOP_BF16_BF16, int_amdgcn_tanh>;
defm V_RCP_BF16 : VOP1Inst_t16 <"v_rcp_bf16", VOP_BF16_BF16, AMDGPUrcp>;
+defm V_SQRT_BF16 : VOP1Inst_t16 <"v_sqrt_bf16", VOP_BF16_BF16, any_amdgcn_sqrt>;
}
} // End TRANS = 1, SchedRW = [WriteTrans32]
defm V_FREXP_MANT_F16 : VOP1Inst_t16 <"v_frexp_mant_f16", VOP_F16_F16, int_amdgcn_frexp_mant>;
@@ -1139,6 +1140,7 @@ defm V_CVT_PK_F16_BF8 : VOP1_Real_FULL_t16_and_fake16_gfx1250<0x076>;
defm V_CVT_F16_FP8 : VOP1_Real_FULL_t16_and_fake16_gfx1250<0x077>;
defm V_CVT_F16_BF8 : VOP1_Real_FULL_t16_and_fake16_gfx1250<0x078>;
defm V_RCP_BF16 : VOP1_Real_FULL_t16_and_fake16_gfx1250<0x079>;
+defm V_SQRT_BF16 : VOP1_Real_FULL_t16_and_fake16_gfx1250<0x07a>;
//===----------------------------------------------------------------------===//
// GFX10.
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sqrt.bf16.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sqrt.bf16.ll
new file mode 100644
index 0000000000000..5287b5dba848f
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sqrt.bf16.ll
@@ -0,0 +1,33 @@
+; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1250 < %s | FileCheck -check-prefixes=GCN %s
+; xUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1250 < %s | FileCheck -check-prefix=GCN %s
+
+; FIXME: GlobalISel does not work with bf16
+
+declare bfloat @llvm.amdgcn.sqrt.bf16(bfloat) #0
+
+; GCN-LABEL: {{^}}sqrt_bf16:
+; GCN: v_sqrt_bf16_e32 {{v[0-9]+}}, {{s[0-9]+}}
+define amdgpu_kernel void @sqrt_bf16(ptr addrspace(1) %out, bfloat %src) #1 {
+ %sqrt = call bfloat @llvm.amdgcn.sqrt.bf16(bfloat %src) #0
+ store bfloat %sqrt, ptr addrspace(1) %out, align 2
+ ret void
+}
+
+; GCN-LABEL: {{^}}sqrt_bf16_constant_4
+; GCN: v_sqrt_bf16_e32 v0, 4.0
+define amdgpu_kernel void @sqrt_bf16_constant_4(ptr addrspace(1) %out) #1 {
+ %sqrt = call bfloat @llvm.amdgcn.sqrt.bf16(bfloat 4.0) #0
+ store bfloat %sqrt, ptr addrspace(1) %out, align 2
+ ret void
+}
+
+; GCN-LABEL: {{^}}sqrt_bf16_constant_100
+; GCN: v_sqrt_bf16_e32 {{v[0-9]+}}, 0x42c8
+define amdgpu_kernel void @sqrt_bf16_constant_100(ptr addrspace(1) %out) #1 {
+ %sqrt = call bfloat @llvm.amdgcn.sqrt.bf16(bfloat 100.0) #0
+ store bfloat %sqrt, ptr addrspace(1) %out, align 2
+ ret void
+}
+
+attributes #0 = { nounwind readnone }
+attributes #1 = { nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.sqrt.bf16.ll b/llvm/test/CodeGen/AMDGPU/llvm.sqrt.bf16.ll
new file mode 100644
index 0000000000000..5936d6aa86b82
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/llvm.sqrt.bf16.ll
@@ -0,0 +1,106 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1250 -mattr=+real-true16 -mattr=-flat-for-global < %s | FileCheck -check-prefixes=GFX12-TRUE16 %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1250 -mattr=-real-true16 -mattr=-flat-for-global < %s | FileCheck -check-prefixes=GFX12-FAKE16 %s
+
+; FIXME: t16 doesn't work at the moment because the store of s16 under t16 mode fails to select.
+
+declare bfloat @llvm.sqrt.bf16(bfloat %a)
+declare <2 x bfloat> @llvm.sqrt.v2bf16(<2 x bfloat> %a)
+
+define amdgpu_kernel void @sqrt_bf16(ptr addrspace(1) %r, ptr addrspace(1) %a) {
+; GFX12-TRUE16-LABEL: sqrt_bf16:
+; GFX12-TRUE16: ; %bb.0: ; %entry
+; GFX12-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x0
+; GFX12-TRUE16-NEXT: s_mov_b32 s6, -1
+; GFX12-TRUE16-NEXT: s_mov_b32 s7, 0x31016000
+; GFX12-TRUE16-NEXT: s_mov_b32 s10, s6
+; GFX12-TRUE16-NEXT: s_mov_b32 s11, s7
+; GFX12-TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX12-TRUE16-NEXT: s_mov_b32 s8, s2
+; GFX12-TRUE16-NEXT: s_mov_b32 s9, s3
+; GFX12-TRUE16-NEXT: s_mov_b32 s4, s0
+; GFX12-TRUE16-NEXT: buffer_load_u16 v0, off, s[8:11], null
+; GFX12-TRUE16-NEXT: s_mov_b32 s5, s1
+; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
+; GFX12-TRUE16-NEXT: v_sqrt_bf16_e32 v0.l, v0.l
+; GFX12-TRUE16-NEXT: buffer_store_b16 v0, off, s[4:7], null
+; GFX12-TRUE16-NEXT: s_endpgm
+;
+; GFX12-FAKE16-LABEL: sqrt_bf16:
+; GFX12-FAKE16: ; %bb.0: ; %entry
+; GFX12-FAKE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x0
+; GFX12-FAKE16-NEXT: s_mov_b32 s6, -1
+; GFX12-FAKE16-NEXT: s_mov_b32 s7, 0x31016000
+; GFX12-FAKE16-NEXT: s_mov_b32 s10, s6
+; GFX12-FAKE16-NEXT: s_mov_b32 s11, s7
+; GFX12-FAKE16-NEXT: s_wait_kmcnt 0x0
+; GFX12-FAKE16-NEXT: s_mov_b32 s8, s2
+; GFX12-FAKE16-NEXT: s_mov_b32 s9, s3
+; GFX12-FAKE16-NEXT: s_mov_b32 s4, s0
+; GFX12-FAKE16-NEXT: buffer_load_u16 v0, off, s[8:11], null
+; GFX12-FAKE16-NEXT: s_mov_b32 s5, s1
+; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
+; GFX12-FAKE16-NEXT: v_sqrt_bf16_e32 v0, v0
+; GFX12-FAKE16-NEXT: buffer_store_b16 v0, off, s[4:7], null
+; GFX12-FAKE16-NEXT: s_endpgm
+entry:
+ %a.val = load bfloat, ptr addrspace(1) %a
+ %r.val = call bfloat @llvm.sqrt.bf16(bfloat %a.val)
+ store bfloat %r.val, ptr addrspace(1) %r
+ ret void
+}
+
+define amdgpu_kernel void @sqrt_v2bf16(ptr addrspace(1) %r, ptr addrspace(1) %a) {
+; GFX12-TRUE16-LABEL: sqrt_v2bf16:
+; GFX12-TRUE16: ; %bb.0: ; %entry
+; GFX12-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x0
+; GFX12-TRUE16-NEXT: s_mov_b32 s6, -1
+; GFX12-TRUE16-NEXT: s_mov_b32 s7, 0x31016000
+; GFX12-TRUE16-NEXT: s_mov_b32 s10, s6
+; GFX12-TRUE16-NEXT: s_mov_b32 s11, s7
+; GFX12-TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX12-TRUE16-NEXT: s_mov_b32 s8, s2
+; GFX12-TRUE16-NEXT: s_mov_b32 s9, s3
+; GFX12-TRUE16-NEXT: s_mov_b32 s4, s0
+; GFX12-TRUE16-NEXT: buffer_load_b32 v0, off, s[8:11], null
+; GFX12-TRUE16-NEXT: s_mov_b32 s5, s1
+; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
+; GFX12-TRUE16-NEXT: v_sqrt_bf16_e32 v1.l, v0.l
+; GFX12-TRUE16-NEXT: v_lshrrev_b32_e32 v0, 16, v0
+; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(TRANS32_DEP_2)
+; GFX12-TRUE16-NEXT: v_sqrt_bf16_e32 v0.l, v0.l
+; GFX12-TRUE16-NEXT: v_and_b32_e32 v1, 0xffff, v1
+; GFX12-TRUE16-NEXT: s_delay_alu instid0(TRANS32_DEP_1) | instid1(VALU_DEP_1)
+; GFX12-TRUE16-NEXT: v_lshl_or_b32 v0, v0, 16, v1
+; GFX12-TRUE16-NEXT: buffer_store_b32 v0, off, s[4:7], null
+; GFX12-TRUE16-NEXT: s_endpgm
+;
+; GFX12-FAKE16-LABEL: sqrt_v2bf16:
+; GFX12-FAKE16: ; %bb.0: ; %entry
+; GFX12-FAKE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x0
+; GFX12-FAKE16-NEXT: s_mov_b32 s6, -1
+; GFX12-FAKE16-NEXT: s_mov_b32 s7, 0x31016000
+; GFX12-FAKE16-NEXT: s_mov_b32 s10, s6
+; GFX12-FAKE16-NEXT: s_mov_b32 s11, s7
+; GFX12-FAKE16-NEXT: s_wait_kmcnt 0x0
+; GFX12-FAKE16-NEXT: s_mov_b32 s8, s2
+; GFX12-FAKE16-NEXT: s_mov_b32 s9, s3
+; GFX12-FAKE16-NEXT: s_mov_b32 s4, s0
+; GFX12-FAKE16-NEXT: buffer_load_b32 v0, off, s[8:11], null
+; GFX12-FAKE16-NEXT: s_mov_b32 s5, s1
+; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
+; GFX12-FAKE16-NEXT: v_sqrt_bf16_e32 v1, v0
+; GFX12-FAKE16-NEXT: v_lshrrev_b32_e32 v0, 16, v0
+; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(TRANS32_DEP_2)
+; GFX12-FAKE16-NEXT: v_sqrt_bf16_e32 v0, v0
+; GFX12-FAKE16-NEXT: v_and_b32_e32 v1, 0xffff, v1
+; GFX12-FAKE16-NEXT: s_delay_alu instid0(TRANS32_DEP_1) | instid1(VALU_DEP_1)
+; GFX12-FAKE16-NEXT: v_lshl_or_b32 v0, v0, 16, v1
+; GFX12-FAKE16-NEXT: buffer_store_b32 v0, off, s[4:7], null
+; GFX12-FAKE16-NEXT: s_endpgm
+entry:
+ %a.val = load <2 x bfloat>, ptr addrspace(1) %a
+ %r.val = call <2 x bfloat> @llvm.sqrt.v2bf16(<2 x bfloat> %a.val)
+ store <2 x bfloat> %r.val, ptr addrspace(1) %r
+ ret void
+}
diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1-fake16.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1-fake16.s
index ce8f54a7ef9fc..c587b66e65011 100644
--- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1-fake16.s
+++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1-fake16.s
@@ -118,6 +118,51 @@ v_rcp_bf16 v5, src_scc
v_rcp_bf16 v127, 0x8000
// GFX1250: v_rcp_bf16_e32 v127, 0x8000 ; encoding: [0xff,0xf2,0xfe,0x7e,0x00,0x80,0x00,0x00]
+v_sqrt_bf16 v5, v1
+// GFX1250: v_sqrt_bf16_e32 v5, v1 ; encoding: [0x01,0xf5,0x0a,0x7e]
+
+v_sqrt_bf16 v5, v127
+// GFX1250: v_sqrt_bf16_e32 v5, v127 ; encoding: [0x7f,0xf5,0x0a,0x7e]
+
+v_sqrt_bf16 v5, s1
+// GFX1250: v_sqrt_bf16_e32 v5, s1 ; encoding: [0x01,0xf4,0x0a,0x7e]
+
+v_sqrt_bf16 v5, s105
+// GFX1250: v_sqrt_bf16_e32 v5, s105 ; encoding: [0x69,0xf4,0x0a,0x7e]
+
+v_sqrt_bf16 v5, vcc_lo
+// GFX1250: v_sqrt_bf16_e32 v5, vcc_lo ; encoding: [0x6a,0xf4,0x0a,0x7e]
+
+v_sqrt_bf16 v5, vcc_hi
+// GFX1250: v_sqrt_bf16_e32 v5, vcc_hi ; encoding: [0x6b,0xf4,0x0a,0x7e]
+
+v_sqrt_bf16 v5, ttmp15
+// GFX1250: v_sqrt_bf16_e32 v5, ttmp15 ; encoding: [0x7b,0xf4,0x0a,0x7e]
+
+v_sqrt_bf16 v5, m0
+// GFX1250: v_sqrt_bf16_e32 v5, m0 ; encoding: [0x7d,0xf4,0x0a,0x7e]
+
+v_sqrt_bf16 v5, exec_lo
+// GFX1250: v_sqrt_bf16_e32 v5, exec_lo ; encoding: [0x7e,0xf4,0x0a,0x7e]
+
+v_sqrt_bf16 v5, exec_hi
+// GFX1250: v_sqrt_bf16_e32 v5, exec_hi ; encoding: [0x7f,0xf4,0x0a,0x7e]
+
+v_sqrt_bf16 v5, null
+// GFX1250: v_sqrt_bf16_e32 v5, null ; encoding: [0x7c,0xf4,0x0a,0x7e]
+
+v_sqrt_bf16 v5, -1
+// GFX1250: v_sqrt_bf16_e32 v5, -1 ; encoding: [0xc1,0xf4,0x0a,0x7e]
+
+v_sqrt_bf16 v5, 0.5
+// GFX1250: v_sqrt_bf16_e32 v5, 0.5 ; encoding: [0xf0,0xf4,0x0a,0x7e]
+
+v_sqrt_bf16 v5, src_scc
+// GFX1250: v_sqrt_bf16_e32 v5, src_scc ; encoding: [0xfd,0xf4,0x0a,0x7e]
+
+v_sqrt_bf16 v127, 0x8000
+// GFX1250: v_sqrt_bf16_e32 v127, 0x8000 ; encoding: [0xff,0xf4,0xfe,0x7e,0x00,0x80,0x00,0x00]
+
v_cvt_f32_bf16 v5, v1
// GFX1250: v_cvt_f32_bf16_e32 v5, v1 ; encoding: [0x01,0xe5,0x0a,0x7e]
diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1.s
index 7001a1f1c4622..719eb3abc02a3 100644
--- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1.s
+++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1.s
@@ -124,6 +124,54 @@ v_rcp_bf16 v127, 0x8000
v_rcp_bf16 v5.h, v1.h
// GFX1250: v_rcp_bf16_e32 v5.h, v1.h ; encoding: [0x81,0xf3,0x0a,0x7f]
+v_sqrt_bf16 v5, v1
+// GFX1250: v_sqrt_bf16_e32 v5, v1 ; encoding: [0x01,0xf5,0x0a,0x7e]
+
+v_sqrt_bf16 v5, v127
+// GFX1250: v_sqrt_bf16_e32 v5, v127 ; encoding: [0x7f,0xf5,0x0a,0x7e]
+
+v_sqrt_bf16 v5, s1
+// GFX1250: v_sqrt_bf16_e32 v5, s1 ; encoding: [0x01,0xf4,0x0a,0x7e]
+
+v_sqrt_bf16 v5, s105
+// GFX1250: v_sqrt_bf16_e32 v5, s105 ; encoding: [0x69,0xf4,0x0a,0x7e]
+
+v_sqrt_bf16 v5, vcc_lo
+// GFX1250: v_sqrt_bf16_e32 v5, vcc_lo ; encoding: [0x6a,0xf4,0x0a,0x7e]
+
+v_sqrt_bf16 v5, vcc_hi
+// GFX1250: v_sqrt_bf16_e32 v5, vcc_hi ; encoding: [0x6b,0xf4,0x0a,0x7e]
+
+v_sqrt_bf16 v5, ttmp15
+// GFX1250: v_sqrt_bf16_e32 v5, ttmp15 ; encoding: [0x7b,0xf4,0x0a,0x7e]
+
+v_sqrt_bf16 v5, m0
+// GFX1250: v_sqrt_bf16_e32 v5, m0 ; encoding: [0x7d,0xf4,0x0a,0x7e]
+
+v_sqrt_bf16 v5, exec_lo
+// GFX1250: v_sqrt_bf16_e32 v5, exec_lo ; encoding: [0x7e,0xf4,0x0a,0x7e]
+
+v_sqrt_bf16 v5, exec_hi
+// GFX1250: v_sqrt_bf16_e32 v5, exec_hi ; encoding: [0x7f,0xf4,0x0a,0x7e]
+
+v_sqrt_bf16 v5, null
+// GFX1250: v_sqrt_bf16_e32 v5, null ; encoding: [0x7c,0xf4,0x0a,0x7e]
+
+v_sqrt_bf16 v5, -1
+// GFX1250: v_sqrt_bf16_e32 v5, -1 ; encoding: [0xc1,0xf4,0x0a,0x7e]
+
+v_sqrt_bf16 v5, 0.5
+// GFX1250: v_sqrt_bf16_e32 v5, 0.5 ; encoding: [0xf0,0xf4,0x0a,0x7e]
+
+v_sqrt_bf16 v5, src_scc
+// GFX1250: v_sqrt_bf16_e32 v5, src_scc ; encoding: [0xfd,0xf4,0x0a,0x7e]
+
+v_sqrt_bf16 v127, 0x8000
+// GFX1250: v_sqrt_bf16_e32 v127, 0x8000 ; encoding: [0xff,0xf4,0xfe,0x7e,0x00,0x80,0x00,0x00]
+
+v_sqrt_bf16 v5.h, v1.h
+// GFX1250: v_sqrt_bf16_e32 v5.h, v1.h ; encoding: [0x81,0xf5,0x0a,0x7f]
+
v_cvt_f32_bf16 v5, v1
// GFX1250: v_cvt_f32_bf16_e32 v5, v1 ; encoding: [0x01,0xe5,0x0a,0x7e]
diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp16-fake16.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp16-fake16.s
index 3de8fc29bb01a..44859fcffe223 100644
--- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp16-fake16.s
+++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp16-fake16.s
@@ -114,6 +114,62 @@ v_rcp_bf16 v127, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi
// GFX1250: v_rcp_bf16_dpp v127, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xf2,0xfe,0x7e,0x7f,0x6f,0x35,0x30]
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+v_sqrt_bf16 v5, v1 quad_perm:[3,2,1,0]
+// GFX1250: v_sqrt_bf16_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xf4,0x0a,0x7e,0x01,0x1b,0x00,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_sqrt_bf16 v5, v1 quad_perm:[0,1,2,3]
+// GFX1250: v_sqrt_bf16_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xf4,0x0a,0x7e,0x01,0xe4,0x00,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_sqrt_bf16 v5, v1 row_mirror
+// GFX1250: v_sqrt_bf16_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xf4,0x0a,0x7e,0x01,0x40,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_sqrt_bf16 v5, v1 row_half_mirror
+// GFX1250: v_sqrt_bf16_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xf4,0x0a,0x7e,0x01,0x41,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_sqrt_bf16 v5, v1 row_shl:1
+// GFX1250: v_sqrt_bf16_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xf4,0x0a,0x7e,0x01,0x01,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_sqrt_bf16 v5, v1 row_shl:15
+// GFX1250: v_sqrt_bf16_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xf4,0x0a,0x7e,0x01,0x0f,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_sqrt_bf16 v5, v1 row_shr:1
+// GFX1250: v_sqrt_bf16_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xf4,0x0a,0x7e,0x01,0x11,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_sqrt_bf16 v5, v1 row_shr:15
+// GFX1250: v_sqrt_bf16_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xf4,0x0a,0x7e,0x01,0x1f,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_sqrt_bf16 v5, v1 row_ror:1
+// GFX1250: v_sqrt_bf16_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xf4,0x0a,0x7e,0x01,0x21,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_sqrt_bf16 v5, v1 row_ror:15
+// GFX1250: v_sqrt_bf16_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xf4,0x0a,0x7e,0x01,0x2f,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_sqrt_bf16 v5, v1 row_share:0 row_mask:0xf bank_mask:0xf
+// GFX1250: v_sqrt_bf16_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xf4,0x0a,0x7e,0x01,0x50,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_sqrt_bf16 v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX1250: v_sqrt_bf16_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0xf4,0x0a,0x7e,0x01,0x5f,0x01,0x01]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_sqrt_bf16 v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX1250: v_sqrt_bf16_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0xf4,0x0a,0x7e,0x01,0x60,0x09,0x13]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_sqrt_bf16 v127, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX1250: v_sqrt_bf16_dpp v127, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xf4,0xfe,0x7e,0x7f,0x6f,0x35,0x30]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
v_cvt_f32_bf16 v5, v1 quad_perm:[3,2,1,0]
// GFX1250: v_cvt_f32_bf16_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xe4,0x0a,0x7e,0x01,0x1b,0x00,0xff]
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp16.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp16.s
index 4632b1574731b..8fef387700972 100644
--- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp16.s
+++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp16.s
@@ -122,6 +122,66 @@ v_rcp_bf16 v5.h, v1.h quad_perm:[3,2,1,0]
// GFX1250: v_rcp_bf16_dpp v5.h, v1.h quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xf2,0x0a,0x7f,0x81,0x1b,0x00,0xff]
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+v_sqrt_bf16 v5, v1 quad_perm:[3,2,1,0]
+// GFX1250: v_sqrt_bf16_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xf4,0x0a,0x7e,0x01,0x1b,0x00,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_sqrt_bf16 v5, v1 quad_perm:[0,1,2,3]
+// GFX1250: v_sqrt_bf16_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xf4,0x0a,0x7e,0x01,0xe4,0x00,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_sqrt_bf16 v5, v1 row_mirror
+// GFX1250: v_sqrt_bf16_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xf4,0x0a,0x7e,0x01,0x40,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_sqrt_bf16 v5, v1 row_half_mirror
+// GFX1250: v_sqrt_bf16_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xf4,0x0a,0x7e,0x01,0x41,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_sqrt_bf16 v5, v1 row_shl:1
+// GFX1250: v_sqrt_bf16_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xf4,0x0a,0x7e,0x01,0x01,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_sqrt_bf16 v5, v1 row_shl:15
+// GFX1250: v_sqrt_bf16_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xf4,0x0a,0x7e,0x01,0x0f,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_sqrt_bf16 v5, v1 row_shr:1
+// GFX1250: v_sqrt_bf16_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xf4,0x0a,0x7e,0x01,0x11,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_sqrt_bf16 v5, v1 row_shr:15
+// GFX1250: v_sqrt_bf16_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xf4,0x0a,0x7e,0x01,0x1f,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_sqrt_bf16 v5, v1 row_ror:1
+// GFX1250: v_sqrt_bf16_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xf4,0x0a,0x7e,0x01,0x21,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_sqrt_bf16 v5, v1 row_ror:15
+// GFX1250: v_sqrt_bf16_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xf4,0x0a,0x7e,0x01,0x2f,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_sqrt_bf16 v5, v1 row_share:0 row_mask:0xf bank_mask:0xf
+// GFX1250: v_sqrt_bf16_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xf4,0x0a,0x7e,0x01,0x50,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_sqrt_bf16 v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX1250: v_sqrt_bf16_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0xf4,0x0a,0x7e,0x01,0x5f,0x01,0x01]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_sqrt_bf16 v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX1250: v_sqrt_bf16_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0xf4,0x0a,0x7e,0x01,0x60,0x09,0x13]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_sqrt_bf16 v127, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX1250: v_sqrt_bf16_dpp v127, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xf4,0xfe,0x7e,0x7f,0x6f,0x35,0x30]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_sqrt_bf16 v5.h, v1.h quad_perm:[3,2,1,0]
+// GFX1250: v_sqrt_bf16_dpp v5.h, v1.h quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xf4,0x0a,0x7f,0x81,0x1b,0x00,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
v_cvt_f32_bf16 v5, v1 quad_perm:[3,2,1,0]
// GFX1250: v_cvt_f32_bf16_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xe4,0x0a,0x7e,0x01,0x1b,0x00,0xff]
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp8-fake16.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp8-fake16.s
index 1a6028ad32bcf..28368456a35df 100644
--- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp8-fake16.s
+++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp8-fake16.s
@@ -26,6 +26,18 @@ v_rcp_bf16 v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
// GFX1250: v_rcp_bf16_dpp v127, v127 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xf2,0xfe,0x7e,0x7f,0x00,0x00,0x00]
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+v_sqrt_bf16 v5, v1 dpp8:[7,6,5,4,3,2,1,0]
+// GFX1250: v_sqrt_bf16_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xf4,0x0a,0x7e,0x01,0x77,0x39,0x05]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_sqrt_bf16 v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX1250: v_sqrt_bf16_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0xf4,0x0a,0x7e,0x01,0x77,0x39,0x05]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_sqrt_bf16 v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX1250: v_sqrt_bf16_dpp v127, v127 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xf4,0xfe,0x7e,0x7f,0x00,0x00,0x00]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
v_cvt_f32_bf16 v5, v1 dpp8:[7,6,5,4,3,2,1,0]
// GFX1250: v_cvt_f32_bf16_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xe4,0x0a,0x7e,0x01,0x77,0x39,0x05]
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp8.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp8.s
index 13085da3a630d..1ed8f5faff3fc 100644
--- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp8.s
+++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp8.s
@@ -34,6 +34,22 @@ v_rcp_bf16 v5.h, v1.h dpp8:[7,6,5,4,3,2,1,0]
// GFX1250: v_rcp_bf16_dpp v5.h, v1.h dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xf2,0x0a,0x7f,0x81,0x77,0x39,0x05]
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+v_sqrt_bf16 v5, v1 dpp8:[7,6,5,4,3,2,1,0]
+// GFX1250: v_sqrt_bf16_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xf4,0x0a,0x7e,0x01,0x77,0x39,0x05]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_sqrt_bf16 v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX1250: v_sqrt_bf16_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0xf4,0x0a,0x7e,0x01,0x77,0x39,0x05]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_sqrt_bf16 v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX1250: v_sqrt_bf16_dpp v127, v127 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xf4,0xfe,0x7e,0x7f,0x00,0x00,0x00]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_sqrt_bf16 v5.h, v1.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX1250: v_sqrt_bf16_dpp v5.h, v1.h dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xf4,0x0a,0x7f,0x81,0x77,0x39,0x05]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
v_cvt_f32_bf16 v5, v1 dpp8:[7,6,5,4,3,2,1,0]
// GFX1250: v_cvt_f32_bf16_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xe4,0x0a,0x7e,0x01,0x77,0x39,0x05]
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1-fake16.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1-fake16.s
index a8bf9cb034694..4f7be4833681d 100644
--- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1-fake16.s
+++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1-fake16.s
@@ -172,6 +172,51 @@ v_rcp_bf16_e64 v5, src_scc mul:4
v_rcp_bf16_e64 v255, -|0x8000| clamp div:2
// GFX1250: v_rcp_bf16_e64 v255, -|0x8000| clamp div:2 ; encoding: [0xff,0x81,0xf9,0xd5,0xff,0x00,0x00,0x38,0x00,0x80,0x00,0x00]
+v_sqrt_bf16_e64 v5, v1
+// GFX1250: v_sqrt_bf16_e64 v5, v1 ; encoding: [0x05,0x00,0xfa,0xd5,0x01,0x01,0x00,0x00]
+
+v_sqrt_bf16_e64 v5, v255
+// GFX1250: v_sqrt_bf16_e64 v5, v255 ; encoding: [0x05,0x00,0xfa,0xd5,0xff,0x01,0x00,0x00]
+
+v_sqrt_bf16_e64 v5, s1
+// GFX1250: v_sqrt_bf16_e64 v5, s1 ; encoding: [0x05,0x00,0xfa,0xd5,0x01,0x00,0x00,0x00]
+
+v_sqrt_bf16_e64 v5, s105
+// GFX1250: v_sqrt_bf16_e64 v5, s105 ; encoding: [0x05,0x00,0xfa,0xd5,0x69,0x00,0x00,0x00]
+
+v_sqrt_bf16_e64 v5, vcc_lo
+// GFX1250: v_sqrt_bf16_e64 v5, vcc_lo ; encoding: [0x05,0x00,0xfa,0xd5,0x6a,0x00,0x00,0x00]
+
+v_sqrt_bf16_e64 v5, vcc_hi
+// GFX1250: v_sqrt_bf16_e64 v5, vcc_hi ; encoding: [0x05,0x00,0xfa,0xd5,0x6b,0x00,0x00,0x00]
+
+v_sqrt_bf16_e64 v5, ttmp15
+// GFX1250: v_sqrt_bf16_e64 v5, ttmp15 ; encoding: [0x05,0x00,0xfa,0xd5,0x7b,0x00,0x00,0x00]
+
+v_sqrt_bf16_e64 v5, m0
+// GFX1250: v_sqrt_bf16_e64 v5, m0 ; encoding: [0x05,0x00,0xfa,0xd5,0x7d,0x00,0x00,0x00]
+
+v_sqrt_bf16_e64 v5, exec_lo
+// GFX1250: v_sqrt_bf16_e64 v5, exec_lo ; encoding: [0x05,0x00,0xfa,0xd5,0x7e,0x00,0x00,0x00]
+
+v_sqrt_bf16_e64 v5, exec_hi
+// GFX1250: v_sqrt_bf16_e64 v5, exec_hi ; encoding: [0x05,0x00,0xfa,0xd5,0x7f,0x00,0x00,0x00]
+
+v_sqrt_bf16_e64 v5, null
+// GFX1250: v_sqrt_bf16_e64 v5, null ; encoding: [0x05,0x00,0xfa,0xd5,0x7c,0x00,0x00,0x00]
+
+v_sqrt_bf16_e64 v5, -1
+// GFX1250: v_sqrt_bf16_e64 v5, -1 ; encoding: [0x05,0x00,0xfa,0xd5,0xc1,0x00,0x00,0x00]
+
+v_sqrt_bf16_e64 v5, 0.5 mul:2
+// GFX1250: v_sqrt_bf16_e64 v5, 0.5 mul:2 ; encoding: [0x05,0x00,0xfa,0xd5,0xf0,0x00,0x00,0x08]
+
+v_sqrt_bf16_e64 v5, src_scc mul:4
+// GFX1250: v_sqrt_bf16_e64 v5, src_scc mul:4 ; encoding: [0x05,0x00,0xfa,0xd5,0xfd,0x00,0x00,0x10]
+
+v_sqrt_bf16_e64 v255, -|0x8000| clamp div:2
+// GFX1250: v_sqrt_bf16_e64 v255, -|0x8000| clamp div:2 ; encoding: [0xff,0x81,0xfa,0xd5,0xff,0x00,0x00,0x38,0x00,0x80,0x00,0x00]
+
v_cvt_f32_bf16_e64 v5, v1
// GFX1250: v_cvt_f32_bf16_e64 v5, v1 ; encoding: [0x05,0x00,0xf2,0xd5,0x01,0x01,0x00,0x00]
diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1.s
index 09b776487ed56..8b16e42566fde 100644
--- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1.s
+++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1.s
@@ -178,6 +178,54 @@ v_rcp_bf16_e64 v255, -|0x8000| clamp div:2
v_rcp_bf16 v5.h, v128.h
// GFX1250: v_rcp_bf16_e64 v5.h, v128.h op_sel:[1,1] ; encoding: [0x05,0x48,0xf9,0xd5,0x80,0x01,0x00,0x00]
+v_sqrt_bf16_e64 v5, v1
+// GFX1250: v_sqrt_bf16_e64 v5, v1 ; encoding: [0x05,0x00,0xfa,0xd5,0x01,0x01,0x00,0x00]
+
+v_sqrt_bf16_e64 v5, v255
+// GFX1250: v_sqrt_bf16_e64 v5, v255 ; encoding: [0x05,0x00,0xfa,0xd5,0xff,0x01,0x00,0x00]
+
+v_sqrt_bf16_e64 v5, s1
+// GFX1250: v_sqrt_bf16_e64 v5, s1 ; encoding: [0x05,0x00,0xfa,0xd5,0x01,0x00,0x00,0x00]
+
+v_sqrt_bf16_e64 v5, s105
+// GFX1250: v_sqrt_bf16_e64 v5, s105 ; encoding: [0x05,0x00,0xfa,0xd5,0x69,0x00,0x00,0x00]
+
+v_sqrt_bf16_e64 v5, vcc_lo
+// GFX1250: v_sqrt_bf16_e64 v5, vcc_lo ; encoding: [0x05,0x00,0xfa,0xd5,0x6a,0x00,0x00,0x00]
+
+v_sqrt_bf16_e64 v5, vcc_hi
+// GFX1250: v_sqrt_bf16_e64 v5, vcc_hi ; encoding: [0x05,0x00,0xfa,0xd5,0x6b,0x00,0x00,0x00]
+
+v_sqrt_bf16_e64 v5, ttmp15
+// GFX1250: v_sqrt_bf16_e64 v5, ttmp15 ; encoding: [0x05,0x00,0xfa,0xd5,0x7b,0x00,0x00,0x00]
+
+v_sqrt_bf16_e64 v5, m0
+// GFX1250: v_sqrt_bf16_e64 v5, m0 ; encoding: [0x05,0x00,0xfa,0xd5,0x7d,0x00,0x00,0x00]
+
+v_sqrt_bf16_e64 v5, exec_lo
+// GFX1250: v_sqrt_bf16_e64 v5, exec_lo ; encoding: [0x05,0x00,0xfa,0xd5,0x7e,0x00,0x00,0x00]
+
+v_sqrt_bf16_e64 v5, exec_hi
+// GFX1250: v_sqrt_bf16_e64 v5, exec_hi ; encoding: [0x05,0x00,0xfa,0xd5,0x7f,0x00,0x00,0x00]
+
+v_sqrt_bf16_e64 v5, null
+// GFX1250: v_sqrt_bf16_e64 v5, null ; encoding: [0x05,0x00,0xfa,0xd5,0x7c,0x00,0x00,0x00]
+
+v_sqrt_bf16_e64 v5, -1
+// GFX1250: v_sqrt_bf16_e64 v5, -1 ; encoding: [0x05,0x00,0xfa,0xd5,0xc1,0x00,0x00,0x00]
+
+v_sqrt_bf16_e64 v5, 0.5 mul:2
+// GFX1250: v_sqrt_bf16_e64 v5, 0.5 mul:2 ; encoding: [0x05,0x00,0xfa,0xd5,0xf0,0x00,0x00,0x08]
+
+v_sqrt_bf16_e64 v5, src_scc mul:4
+// GFX1250: v_sqrt_bf16_e64 v5, src_scc mul:4 ; encoding: [0x05,0x00,0xfa,0xd5,0xfd,0x00,0x00,0x10]
+
+v_sqrt_bf16_e64 v255, -|0x8000| clamp div:2
+// GFX1250: v_sqrt_bf16_e64 v255, -|0x8000| clamp div:2 ; encoding: [0xff,0x81,0xfa,0xd5,0xff,0x00,0x00,0x38,0x00,0x80,0x00,0x00]
+
+v_sqrt_bf16 v5.h, v128.h
+// GFX1250: v_sqrt_bf16_e64 v5.h, v128.h op_sel:[1,1] ; encoding: [0x05,0x48,0xfa,0xd5,0x80,0x01,0x00,0x00]
+
v_cvt_f32_bf16_e64 v5, v1
// GFX1250: v_cvt_f32_bf16_e64 v5, v1 ; encoding: [0x05,0x00,0xf2,0xd5,0x01,0x01,0x00,0x00]
diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1_dpp16-fake16.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1_dpp16-fake16.s
index f11276114183a..2c2aef4940b57 100644
--- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1_dpp16-fake16.s
+++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1_dpp16-fake16.s
@@ -114,6 +114,62 @@ v_rcp_bf16_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask
// GFX1250: v_rcp_bf16_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x81,0xf9,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x05,0x30]
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+v_sqrt_bf16_e64_dpp v5, v1 quad_perm:[3,2,1,0]
+// GFX1250: v_sqrt_bf16_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xfa,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_sqrt_bf16_e64_dpp v5, v1 quad_perm:[0,1,2,3]
+// GFX1250: v_sqrt_bf16_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xfa,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_sqrt_bf16_e64_dpp v5, v1 row_mirror
+// GFX1250: v_sqrt_bf16_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xfa,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_sqrt_bf16_e64_dpp v5, v1 row_half_mirror
+// GFX1250: v_sqrt_bf16_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xfa,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_sqrt_bf16_e64_dpp v5, v1 row_shl:1
+// GFX1250: v_sqrt_bf16_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xfa,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_sqrt_bf16_e64_dpp v5, v1 row_shl:15
+// GFX1250: v_sqrt_bf16_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xfa,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_sqrt_bf16_e64_dpp v5, v1 row_shr:1
+// GFX1250: v_sqrt_bf16_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xfa,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_sqrt_bf16_e64_dpp v5, v1 row_shr:15
+// GFX1250: v_sqrt_bf16_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xfa,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_sqrt_bf16_e64_dpp v5, v1 row_ror:1
+// GFX1250: v_sqrt_bf16_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xfa,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_sqrt_bf16_e64_dpp v5, v1 row_ror:15
+// GFX1250: v_sqrt_bf16_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xfa,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_sqrt_bf16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf
+// GFX1250: v_sqrt_bf16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xfa,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_sqrt_bf16_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX1250: v_sqrt_bf16_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0xfa,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_sqrt_bf16_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX1250: v_sqrt_bf16_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x00,0xfa,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x09,0x13]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_sqrt_bf16_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX1250: v_sqrt_bf16_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x81,0xfa,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x05,0x30]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
v_cvt_f32_bf16_e64_dpp v5, v1 quad_perm:[3,2,1,0]
// GFX1250: v_cvt_f32_bf16_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xf2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff]
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1_dpp16.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1_dpp16.s
index b591fa19524e5..1588b6b391198 100644
--- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1_dpp16.s
+++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1_dpp16.s
@@ -122,6 +122,66 @@ v_rcp_bf16_e64_dpp v5.h, v128.h quad_perm:[3,2,1,0]
// GFX1250: v_rcp_bf16_e64_dpp v5.h, v128.h op_sel:[1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x48,0xf9,0xd5,0xfa,0x00,0x00,0x00,0x80,0x1b,0x00,0xff]
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+v_sqrt_bf16_e64_dpp v5, v1 quad_perm:[3,2,1,0]
+// GFX1250: v_sqrt_bf16_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xfa,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_sqrt_bf16_e64_dpp v5, v1 quad_perm:[0,1,2,3]
+// GFX1250: v_sqrt_bf16_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xfa,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_sqrt_bf16_e64_dpp v5, v1 row_mirror
+// GFX1250: v_sqrt_bf16_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xfa,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_sqrt_bf16_e64_dpp v5, v1 row_half_mirror
+// GFX1250: v_sqrt_bf16_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xfa,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_sqrt_bf16_e64_dpp v5, v1 row_shl:1
+// GFX1250: v_sqrt_bf16_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xfa,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_sqrt_bf16_e64_dpp v5, v1 row_shl:15
+// GFX1250: v_sqrt_bf16_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xfa,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_sqrt_bf16_e64_dpp v5, v1 row_shr:1
+// GFX1250: v_sqrt_bf16_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xfa,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_sqrt_bf16_e64_dpp v5, v1 row_shr:15
+// GFX1250: v_sqrt_bf16_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xfa,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_sqrt_bf16_e64_dpp v5, v1 row_ror:1
+// GFX1250: v_sqrt_bf16_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xfa,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_sqrt_bf16_e64_dpp v5, v1 row_ror:15
+// GFX1250: v_sqrt_bf16_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xfa,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_sqrt_bf16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf
+// GFX1250: v_sqrt_bf16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xfa,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_sqrt_bf16_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX1250: v_sqrt_bf16_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0xfa,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_sqrt_bf16_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX1250: v_sqrt_bf16_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x00,0xfa,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x09,0x13]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_sqrt_bf16_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX1250: v_sqrt_bf16_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x81,0xfa,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x05,0x30]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_sqrt_bf16_e64_dpp v5.h, v128.h quad_perm:[3,2,1,0]
+// GFX1250: v_sqrt_bf16_e64_dpp v5.h, v128.h op_sel:[1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x48,0xfa,0xd5,0xfa,0x00,0x00,0x00,0x80,0x1b,0x00,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
v_cvt_f32_bf16_e64_dpp v5, v1 quad_perm:[3,2,1,0]
// GFX1250: v_cvt_f32_bf16_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xf2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff]
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1_dpp8-fake16.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1_dpp8-fake16.s
index 1cbd20be79810..0402565695975 100644
--- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1_dpp8-fake16.s
+++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1_dpp8-fake16.s
@@ -34,6 +34,22 @@ v_rcp_bf16_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0
// GFX1250: v_rcp_bf16_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x81,0xf9,0xd5,0xe9,0x00,0x00,0x38,0xff,0x00,0x00,0x00]
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+v_sqrt_bf16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0]
+// GFX1250: v_sqrt_bf16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xfa,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_sqrt_bf16_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX1250: v_sqrt_bf16_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xfa,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_sqrt_bf16_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX1250: v_sqrt_bf16_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x00,0xfa,0xd5,0xea,0x00,0x00,0x10,0x01,0x77,0x39,0x05]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_sqrt_bf16_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX1250: v_sqrt_bf16_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x81,0xfa,0xd5,0xe9,0x00,0x00,0x38,0xff,0x00,0x00,0x00]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
v_cvt_f32_bf16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0]
// GFX1250: v_cvt_f32_bf16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xf2,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05]
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1_dpp8.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1_dpp8.s
index 4553476af3634..71cda1b36dd3c 100644
--- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1_dpp8.s
+++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1_dpp8.s
@@ -42,6 +42,26 @@ v_rcp_bf16_e64_dpp v5.h, v128.h dpp8:[7,6,5,4,3,2,1,0]
// GFX1250: v_rcp_bf16_e64_dpp v5.h, v128.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x48,0xf9,0xd5,0xe9,0x00,0x00,0x00,0x80,0x77,0x39,0x05]
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+v_sqrt_bf16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0]
+// GFX1250: v_sqrt_bf16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xfa,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_sqrt_bf16_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX1250: v_sqrt_bf16_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xfa,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_sqrt_bf16_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX1250: v_sqrt_bf16_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x00,0xfa,0xd5,0xea,0x00,0x00,0x10,0x01,0x77,0x39,0x05]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_sqrt_bf16_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX1250: v_sqrt_bf16_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x81,0xfa,0xd5,0xe9,0x00,0x00,0x38,0xff,0x00,0x00,0x00]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_sqrt_bf16_e64_dpp v5.h, v128.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX1250: v_sqrt_bf16_e64_dpp v5.h, v128.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x48,0xfa,0xd5,0xe9,0x00,0x00,0x00,0x80,0x77,0x39,0x05]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
v_cvt_f32_bf16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0]
// GFX1250: v_cvt_f32_bf16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xf2,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05]
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop1.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop1.txt
index 7debbd4727110..739a2034a079e 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop1.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop1.txt
@@ -152,6 +152,69 @@
0x81,0xf3,0x0a,0x7f
# GFX1250-REAL16: v_rcp_bf16_e32 v5.h, v1.h ; encoding: [0x81,0xf3,0x0a,0x7f]
+0xff,0xf4,0xfe,0x7e,0x00,0x80,0x00,0x00
+# GFX1250-REAL16: v_sqrt_bf16_e32 v127.l, 0x8000 ; encoding: [0xff,0xf4,0xfe,0x7e,0x00,0x80,0x00,0x00]
+# GFX1250-FAKE16: v_sqrt_bf16_e32 v127, 0x8000 ; encoding: [0xff,0xf4,0xfe,0x7e,0x00,0x80,0x00,0x00]
+
+0xc1,0xf4,0x0a,0x7e
+# GFX1250-REAL16: v_sqrt_bf16_e32 v5.l, -1 ; encoding: [0xc1,0xf4,0x0a,0x7e]
+# GFX1250-FAKE16: v_sqrt_bf16_e32 v5, -1 ; encoding: [0xc1,0xf4,0x0a,0x7e]
+
+0xf0,0xf4,0x0a,0x7e
+# GFX1250-REAL16: v_sqrt_bf16_e32 v5.l, 0.5 ; encoding: [0xf0,0xf4,0x0a,0x7e]
+# GFX1250-FAKE16: v_sqrt_bf16_e32 v5, 0.5 ; encoding: [0xf0,0xf4,0x0a,0x7e]
+
+0x7f,0xf4,0x0a,0x7e
+# GFX1250-REAL16: v_sqrt_bf16_e32 v5.l, exec_hi ; encoding: [0x7f,0xf4,0x0a,0x7e]
+# GFX1250-FAKE16: v_sqrt_bf16_e32 v5, exec_hi ; encoding: [0x7f,0xf4,0x0a,0x7e]
+
+0x7e,0xf4,0x0a,0x7e
+# GFX1250-REAL16: v_sqrt_bf16_e32 v5.l, exec_lo ; encoding: [0x7e,0xf4,0x0a,0x7e]
+# GFX1250-FAKE16: v_sqrt_bf16_e32 v5, exec_lo ; encoding: [0x7e,0xf4,0x0a,0x7e]
+
+0x7d,0xf4,0x0a,0x7e
+# GFX1250-REAL16: v_sqrt_bf16_e32 v5.l, m0 ; encoding: [0x7d,0xf4,0x0a,0x7e]
+# GFX1250-FAKE16: v_sqrt_bf16_e32 v5, m0 ; encoding: [0x7d,0xf4,0x0a,0x7e]
+
+0x7c,0xf4,0x0a,0x7e
+# GFX1250-REAL16: v_sqrt_bf16_e32 v5.l, null ; encoding: [0x7c,0xf4,0x0a,0x7e]
+# GFX1250-FAKE16: v_sqrt_bf16_e32 v5, null ; encoding: [0x7c,0xf4,0x0a,0x7e]
+
+0x01,0xf4,0x0a,0x7e
+# GFX1250-REAL16: v_sqrt_bf16_e32 v5.l, s1 ; encoding: [0x01,0xf4,0x0a,0x7e]
+# GFX1250-FAKE16: v_sqrt_bf16_e32 v5, s1 ; encoding: [0x01,0xf4,0x0a,0x7e]
+
+0x69,0xf4,0x0a,0x7e
+# GFX1250-REAL16: v_sqrt_bf16_e32 v5.l, s105 ; encoding: [0x69,0xf4,0x0a,0x7e]
+# GFX1250-FAKE16: v_sqrt_bf16_e32 v5, s105 ; encoding: [0x69,0xf4,0x0a,0x7e]
+
+0xfd,0xf4,0x0a,0x7e
+# GFX1250-REAL16: v_sqrt_bf16_e32 v5.l, src_scc ; encoding: [0xfd,0xf4,0x0a,0x7e]
+# GFX1250-FAKE16: v_sqrt_bf16_e32 v5, src_scc ; encoding: [0xfd,0xf4,0x0a,0x7e]
+
+0x7b,0xf4,0x0a,0x7e
+# GFX1250-REAL16: v_sqrt_bf16_e32 v5.l, ttmp15 ; encoding: [0x7b,0xf4,0x0a,0x7e]
+# GFX1250-FAKE16: v_sqrt_bf16_e32 v5, ttmp15 ; encoding: [0x7b,0xf4,0x0a,0x7e]
+
+0x01,0xf5,0x0a,0x7e
+# GFX1250-REAL16: v_sqrt_bf16_e32 v5.l, v1.l ; encoding: [0x01,0xf5,0x0a,0x7e]
+# GFX1250-FAKE16: v_sqrt_bf16_e32 v5, v1 ; encoding: [0x01,0xf5,0x0a,0x7e]
+
+0x7f,0xf5,0x0a,0x7e
+# GFX1250-REAL16: v_sqrt_bf16_e32 v5.l, v127.l ; encoding: [0x7f,0xf5,0x0a,0x7e]
+# GFX1250-FAKE16: v_sqrt_bf16_e32 v5, v127 ; encoding: [0x7f,0xf5,0x0a,0x7e]
+
+0x6b,0xf4,0x0a,0x7e
+# GFX1250-REAL16: v_sqrt_bf16_e32 v5.l, vcc_hi ; encoding: [0x6b,0xf4,0x0a,0x7e]
+# GFX1250-FAKE16: v_sqrt_bf16_e32 v5, vcc_hi ; encoding: [0x6b,0xf4,0x0a,0x7e]
+
+0x6a,0xf4,0x0a,0x7e
+# GFX1250-REAL16: v_sqrt_bf16_e32 v5.l, vcc_lo ; encoding: [0x6a,0xf4,0x0a,0x7e]
+# GFX1250-FAKE16: v_sqrt_bf16_e32 v5, vcc_lo ; encoding: [0x6a,0xf4,0x0a,0x7e]
+
+0x81,0xf5,0x0a,0x7f
+# GFX1250-REAL16: v_sqrt_bf16_e32 v5.h, v1.h ; encoding: [0x81,0xf5,0x0a,0x7f]
+
0xff,0xe4,0xfe,0x7e,0x00,0x80,0x00,0x00
# GFX1250: v_cvt_f32_bf16_e32 v127, 0x8000 ; encoding: [0xff,0xe4,0xfe,0x7e,0x00,0x80,0x00,0x00]
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop1_dpp16.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop1_dpp16.txt
index 098a0ba39a5ae..04b38093d30f6 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop1_dpp16.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop1_dpp16.txt
@@ -120,6 +120,65 @@
0xfa,0xf2,0x0a,0x7f,0x81,0x1b,0x00,0xff
# GFX1250-REAL16: v_rcp_bf16_dpp v5.h, v1.h quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xf2,0x0a,0x7f,0x81,0x1b,0x00,0xff]
+0xfa,0xf4,0xfe,0x7e,0x7f,0x6f,0x35,0x30
+# GFX1250-REAL16: v_sqrt_bf16_dpp v127.l, -|v127.l| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xf4,0xfe,0x7e,0x7f,0x6f,0x35,0x30]
+# GFX1250-FAKE16: v_sqrt_bf16_dpp v127, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xf4,0xfe,0x7e,0x7f,0x6f,0x35,0x30]
+
+0xfa,0xf4,0x0a,0x7e,0x01,0xe4,0x00,0xff
+# GFX1250-REAL16: v_sqrt_bf16_dpp v5.l, v1.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xf4,0x0a,0x7e,0x01,0xe4,0x00,0xff]
+# GFX1250-FAKE16: v_sqrt_bf16_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xf4,0x0a,0x7e,0x01,0xe4,0x00,0xff]
+
+0xfa,0xf4,0x0a,0x7e,0x01,0x1b,0x00,0xff
+# GFX1250-REAL16: v_sqrt_bf16_dpp v5.l, v1.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xf4,0x0a,0x7e,0x01,0x1b,0x00,0xff]
+# GFX1250-FAKE16: v_sqrt_bf16_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xf4,0x0a,0x7e,0x01,0x1b,0x00,0xff]
+
+0xfa,0xf4,0x0a,0x7e,0x01,0x41,0x01,0xff
+# GFX1250-REAL16: v_sqrt_bf16_dpp v5.l, v1.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xf4,0x0a,0x7e,0x01,0x41,0x01,0xff]
+# GFX1250-FAKE16: v_sqrt_bf16_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xf4,0x0a,0x7e,0x01,0x41,0x01,0xff]
+
+0xfa,0xf4,0x0a,0x7e,0x01,0x40,0x01,0xff
+# GFX1250-REAL16: v_sqrt_bf16_dpp v5.l, v1.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xf4,0x0a,0x7e,0x01,0x40,0x01,0xff]
+# GFX1250-FAKE16: v_sqrt_bf16_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xf4,0x0a,0x7e,0x01,0x40,0x01,0xff]
+
+0xfa,0xf4,0x0a,0x7e,0x01,0x21,0x01,0xff
+# GFX1250-REAL16: v_sqrt_bf16_dpp v5.l, v1.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xf4,0x0a,0x7e,0x01,0x21,0x01,0xff]
+# GFX1250-FAKE16: v_sqrt_bf16_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xf4,0x0a,0x7e,0x01,0x21,0x01,0xff]
+
+0xfa,0xf4,0x0a,0x7e,0x01,0x2f,0x01,0xff
+# GFX1250-REAL16: v_sqrt_bf16_dpp v5.l, v1.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xf4,0x0a,0x7e,0x01,0x2f,0x01,0xff]
+# GFX1250-FAKE16: v_sqrt_bf16_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xf4,0x0a,0x7e,0x01,0x2f,0x01,0xff]
+
+0xfa,0xf4,0x0a,0x7e,0x01,0x50,0x01,0xff
+# GFX1250-REAL16: v_sqrt_bf16_dpp v5.l, v1.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xf4,0x0a,0x7e,0x01,0x50,0x01,0xff]
+# GFX1250-FAKE16: v_sqrt_bf16_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xf4,0x0a,0x7e,0x01,0x50,0x01,0xff]
+
+0xfa,0xf4,0x0a,0x7e,0x01,0x5f,0x01,0x01
+# GFX1250-REAL16: v_sqrt_bf16_dpp v5.l, v1.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0xf4,0x0a,0x7e,0x01,0x5f,0x01,0x01]
+# GFX1250-FAKE16: v_sqrt_bf16_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0xf4,0x0a,0x7e,0x01,0x5f,0x01,0x01]
+
+0xfa,0xf4,0x0a,0x7e,0x01,0x01,0x01,0xff
+# GFX1250-REAL16: v_sqrt_bf16_dpp v5.l, v1.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xf4,0x0a,0x7e,0x01,0x01,0x01,0xff]
+# GFX1250-FAKE16: v_sqrt_bf16_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xf4,0x0a,0x7e,0x01,0x01,0x01,0xff]
+
+0xfa,0xf4,0x0a,0x7e,0x01,0x0f,0x01,0xff
+# GFX1250-REAL16: v_sqrt_bf16_dpp v5.l, v1.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xf4,0x0a,0x7e,0x01,0x0f,0x01,0xff]
+# GFX1250-FAKE16: v_sqrt_bf16_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xf4,0x0a,0x7e,0x01,0x0f,0x01,0xff]
+
+0xfa,0xf4,0x0a,0x7e,0x01,0x11,0x01,0xff
+# GFX1250-REAL16: v_sqrt_bf16_dpp v5.l, v1.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xf4,0x0a,0x7e,0x01,0x11,0x01,0xff]
+# GFX1250-FAKE16: v_sqrt_bf16_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xf4,0x0a,0x7e,0x01,0x11,0x01,0xff]
+
+0xfa,0xf4,0x0a,0x7e,0x01,0x1f,0x01,0xff
+# GFX1250-REAL16: v_sqrt_bf16_dpp v5.l, v1.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xf4,0x0a,0x7e,0x01,0x1f,0x01,0xff]
+# GFX1250-FAKE16: v_sqrt_bf16_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xf4,0x0a,0x7e,0x01,0x1f,0x01,0xff]
+
+0xfa,0xf4,0x0a,0x7e,0x01,0x60,0x09,0x13
+# GFX1250-REAL16: v_sqrt_bf16_dpp v5.l, v1.l row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0xf4,0x0a,0x7e,0x01,0x60,0x09,0x13]
+# GFX1250-FAKE16: v_sqrt_bf16_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0xf4,0x0a,0x7e,0x01,0x60,0x09,0x13]
+
+0xfa,0xf4,0x0a,0x7f,0x81,0x1b,0x00,0xff
+# GFX1250-REAL16: v_sqrt_bf16_dpp v5.h, v1.h quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xf4,0x0a,0x7f,0x81,0x1b,0x00,0xff]
+
0xfa,0xe4,0xfe,0x7e,0x7f,0x6f,0x35,0x30
# GFX1250: v_cvt_f32_bf16_dpp v127, -|v127.l| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xe4,0xfe,0x7e,0x7f,0x6f,0x35,0x30]
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop1_dpp8.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop1_dpp8.txt
index b097a4ebe9758..58994519a5234 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop1_dpp8.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop1_dpp8.txt
@@ -34,6 +34,22 @@
# GFX1250-REAL16: v_rcp_bf16_dpp v5.h, v1.h dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xf2,0x0a,0x7f,0x81,0x77,0x39,0x05]
# GFX1250-FAKE16: v_add_f64_e32 v[156:157], v[129:130], v[187:188] ; encoding: [0x81,0x77,0x39,0x05]
+0xe9,0xf4,0xfe,0x7e,0x7f,0x00,0x00,0x00
+# GFX1250-REAL16: v_sqrt_bf16_dpp v127.l, v127.l dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xf4,0xfe,0x7e,0x7f,0x00,0x00,0x00]
+# GFX1250-FAKE16: v_sqrt_bf16_dpp v127, v127 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xf4,0xfe,0x7e,0x7f,0x00,0x00,0x00]
+
+0xe9,0xf4,0x0a,0x7e,0x01,0x77,0x39,0x05
+# GFX1250-REAL16: v_sqrt_bf16_dpp v5.l, v1.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xf4,0x0a,0x7e,0x01,0x77,0x39,0x05]
+# GFX1250-FAKE16: v_sqrt_bf16_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xf4,0x0a,0x7e,0x01,0x77,0x39,0x05]
+
+0xea,0xf4,0x0a,0x7e,0x01,0x77,0x39,0x05
+# GFX1250-REAL16: v_sqrt_bf16_dpp v5.l, v1.l dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0xf4,0x0a,0x7e,0x01,0x77,0x39,0x05]
+# GFX1250-FAKE16: v_sqrt_bf16_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0xf4,0x0a,0x7e,0x01,0x77,0x39,0x05]
+
+0xe9,0xf4,0x0a,0x7f,0x81,0x77,0x39,0x05
+# GFX1250-REAL16: v_sqrt_bf16_dpp v5.h, v1.h dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xf4,0x0a,0x7f,0x81,0x77,0x39,0x05]
+# GFX1250-FAKE16: v_add_f64_e32 v[156:157], v[129:130], v[187:188] ; encoding: [0x81,0x77,0x39,0x05]
+
0xe9,0xe4,0xfe,0x7e,0x7f,0x00,0x00,0x00
# GFX1250: v_cvt_f32_bf16_dpp v127, v127.l dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xe4,0xfe,0x7e,0x7f,0x00,0x00,0x00]
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3_from_vop1.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3_from_vop1.txt
index 3231b2fa55663..89d9b02cdbd52 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3_from_vop1.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3_from_vop1.txt
@@ -130,6 +130,70 @@
# GFX1250-REAL16: v_rcp_bf16_e64 v5.h, v128.h op_sel:[1,1] ; encoding: [0x05,0x48,0xf9,0xd5,0x80,0x01,0x00,0x00]
# GFX1250-FAKE16: v_rcp_bf16_e64 v5, v128 ; encoding: [0x05,0x00,0xf9,0xd5,0x80,0x01,0x00,0x00]
+0xff,0x81,0xfa,0xd5,0xff,0x00,0x00,0x38,0x00,0x80,0x00,0x00
+# GFX1250-REAL16: v_sqrt_bf16_e64 v255.l, -|0x8000| clamp div:2 ; encoding: [0xff,0x81,0xfa,0xd5,0xff,0x00,0x00,0x38,0x00,0x80,0x00,0x00]
+# GFX1250-FAKE16: v_sqrt_bf16_e64 v255, -|0x8000| clamp div:2 ; encoding: [0xff,0x81,0xfa,0xd5,0xff,0x00,0x00,0x38,0x00,0x80,0x00,0x00]
+
+0x05,0x00,0xfa,0xd5,0xc1,0x00,0x00,0x00
+# GFX1250-REAL16: v_sqrt_bf16_e64 v5.l, -1 ; encoding: [0x05,0x00,0xfa,0xd5,0xc1,0x00,0x00,0x00]
+# GFX1250-FAKE16: v_sqrt_bf16_e64 v5, -1 ; encoding: [0x05,0x00,0xfa,0xd5,0xc1,0x00,0x00,0x00]
+
+0x05,0x00,0xfa,0xd5,0xf0,0x00,0x00,0x08
+# GFX1250-REAL16: v_sqrt_bf16_e64 v5.l, 0.5 mul:2 ; encoding: [0x05,0x00,0xfa,0xd5,0xf0,0x00,0x00,0x08]
+# GFX1250-FAKE16: v_sqrt_bf16_e64 v5, 0.5 mul:2 ; encoding: [0x05,0x00,0xfa,0xd5,0xf0,0x00,0x00,0x08]
+
+0x05,0x00,0xfa,0xd5,0x7f,0x00,0x00,0x00
+# GFX1250-REAL16: v_sqrt_bf16_e64 v5.l, exec_hi ; encoding: [0x05,0x00,0xfa,0xd5,0x7f,0x00,0x00,0x00]
+# GFX1250-FAKE16: v_sqrt_bf16_e64 v5, exec_hi ; encoding: [0x05,0x00,0xfa,0xd5,0x7f,0x00,0x00,0x00]
+
+0x05,0x00,0xfa,0xd5,0x7e,0x00,0x00,0x00
+# GFX1250-REAL16: v_sqrt_bf16_e64 v5.l, exec_lo ; encoding: [0x05,0x00,0xfa,0xd5,0x7e,0x00,0x00,0x00]
+# GFX1250-FAKE16: v_sqrt_bf16_e64 v5, exec_lo ; encoding: [0x05,0x00,0xfa,0xd5,0x7e,0x00,0x00,0x00]
+
+0x05,0x00,0xfa,0xd5,0x7d,0x00,0x00,0x00
+# GFX1250-REAL16: v_sqrt_bf16_e64 v5.l, m0 ; encoding: [0x05,0x00,0xfa,0xd5,0x7d,0x00,0x00,0x00]
+# GFX1250-FAKE16: v_sqrt_bf16_e64 v5, m0 ; encoding: [0x05,0x00,0xfa,0xd5,0x7d,0x00,0x00,0x00]
+
+0x05,0x00,0xfa,0xd5,0x7c,0x00,0x00,0x00
+# GFX1250-REAL16: v_sqrt_bf16_e64 v5.l, null ; encoding: [0x05,0x00,0xfa,0xd5,0x7c,0x00,0x00,0x00]
+# GFX1250-FAKE16: v_sqrt_bf16_e64 v5, null ; encoding: [0x05,0x00,0xfa,0xd5,0x7c,0x00,0x00,0x00]
+
+0x05,0x00,0xfa,0xd5,0x01,0x00,0x00,0x00
+# GFX1250-REAL16: v_sqrt_bf16_e64 v5.l, s1 ; encoding: [0x05,0x00,0xfa,0xd5,0x01,0x00,0x00,0x00]
+# GFX1250-FAKE16: v_sqrt_bf16_e64 v5, s1 ; encoding: [0x05,0x00,0xfa,0xd5,0x01,0x00,0x00,0x00]
+
+0x05,0x00,0xfa,0xd5,0x69,0x00,0x00,0x00
+# GFX1250-REAL16: v_sqrt_bf16_e64 v5.l, s105 ; encoding: [0x05,0x00,0xfa,0xd5,0x69,0x00,0x00,0x00]
+# GFX1250-FAKE16: v_sqrt_bf16_e64 v5, s105 ; encoding: [0x05,0x00,0xfa,0xd5,0x69,0x00,0x00,0x00]
+
+0x05,0x00,0xfa,0xd5,0xfd,0x00,0x00,0x10
+# GFX1250-REAL16: v_sqrt_bf16_e64 v5.l, src_scc mul:4 ; encoding: [0x05,0x00,0xfa,0xd5,0xfd,0x00,0x00,0x10]
+# GFX1250-FAKE16: v_sqrt_bf16_e64 v5, src_scc mul:4 ; encoding: [0x05,0x00,0xfa,0xd5,0xfd,0x00,0x00,0x10]
+
+0x05,0x00,0xfa,0xd5,0x7b,0x00,0x00,0x00
+# GFX1250-REAL16: v_sqrt_bf16_e64 v5.l, ttmp15 ; encoding: [0x05,0x00,0xfa,0xd5,0x7b,0x00,0x00,0x00]
+# GFX1250-FAKE16: v_sqrt_bf16_e64 v5, ttmp15 ; encoding: [0x05,0x00,0xfa,0xd5,0x7b,0x00,0x00,0x00]
+
+0x05,0x00,0xfa,0xd5,0x01,0x01,0x00,0x00
+# GFX1250-REAL16: v_sqrt_bf16_e64 v5.l, v1.l ; encoding: [0x05,0x00,0xfa,0xd5,0x01,0x01,0x00,0x00]
+# GFX1250-FAKE16: v_sqrt_bf16_e64 v5, v1 ; encoding: [0x05,0x00,0xfa,0xd5,0x01,0x01,0x00,0x00]
+
+0x05,0x00,0xfa,0xd5,0xff,0x01,0x00,0x00
+# GFX1250-REAL16: v_sqrt_bf16_e64 v5.l, v255.l ; encoding: [0x05,0x00,0xfa,0xd5,0xff,0x01,0x00,0x00]
+# GFX1250-FAKE16: v_sqrt_bf16_e64 v5, v255 ; encoding: [0x05,0x00,0xfa,0xd5,0xff,0x01,0x00,0x00]
+
+0x05,0x00,0xfa,0xd5,0x6b,0x00,0x00,0x00
+# GFX1250-REAL16: v_sqrt_bf16_e64 v5.l, vcc_hi ; encoding: [0x05,0x00,0xfa,0xd5,0x6b,0x00,0x00,0x00]
+# GFX1250-FAKE16: v_sqrt_bf16_e64 v5, vcc_hi ; encoding: [0x05,0x00,0xfa,0xd5,0x6b,0x00,0x00,0x00]
+
+0x05,0x00,0xfa,0xd5,0x6a,0x00,0x00,0x00
+# GFX1250-REAL16: v_sqrt_bf16_e64 v5.l, vcc_lo ; encoding: [0x05,0x00,0xfa,0xd5,0x6a,0x00,0x00,0x00]
+# GFX1250-FAKE16: v_sqrt_bf16_e64 v5, vcc_lo ; encoding: [0x05,0x00,0xfa,0xd5,0x6a,0x00,0x00,0x00]
+
+0x05,0x48,0xfa,0xd5,0x80,0x01,0x00,0x00
+# GFX1250-REAL16: v_sqrt_bf16_e64 v5.h, v128.h op_sel:[1,1] ; encoding: [0x05,0x48,0xfa,0xd5,0x80,0x01,0x00,0x00]
+# GFX1250-FAKE16: v_sqrt_bf16_e64 v5, v128 ; encoding: [0x05,0x00,0xfa,0xd5,0x80,0x01,0x00,0x00]
+
0x01,0x00,0xed,0xd5,0x83,0x00,0x00,0x00
# GFX1250: v_cvt_f32_bf8_e64 v1, 3 ; encoding: [0x01,0x00,0xed,0xd5,0x83,0x00,0x00,0x00]
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3_from_vop1_dpp16.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3_from_vop1_dpp16.txt
index ae51033314d0f..9e45aca0168d6 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3_from_vop1_dpp16.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3_from_vop1_dpp16.txt
@@ -62,6 +62,66 @@
# GFX1250-REAL16: v_rcp_bf16_e64_dpp v5.h, v128.h op_sel:[1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x48,0xf9,0xd5,0xfa,0x00,0x00,0x00,0x80,0x1b,0x00,0xff]
# GFX1250-FAKE16: v_rcp_bf16_e64_dpp v5, v128 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xf9,0xd5,0xfa,0x00,0x00,0x00,0x80,0x1b,0x00,0xff]
+0xff,0x81,0xfa,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x05,0x30
+# GFX1250-REAL16: v_sqrt_bf16_e64_dpp v255.l, -|v255.l| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x81,0xfa,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x05,0x30]
+# GFX1250-FAKE16: v_sqrt_bf16_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x81,0xfa,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x05,0x30]
+
+0x05,0x00,0xfa,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01
+# GFX1250-REAL16: v_sqrt_bf16_e64_dpp v5.l, v1.l mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0xfa,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01]
+# GFX1250-FAKE16: v_sqrt_bf16_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0xfa,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01]
+
+0x05,0x00,0xfa,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x09,0x13
+# GFX1250-REAL16: v_sqrt_bf16_e64_dpp v5.l, v1.l mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x00,0xfa,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x09,0x13]
+# GFX1250-FAKE16: v_sqrt_bf16_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x00,0xfa,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x09,0x13]
+
+0x05,0x00,0xfa,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff
+# GFX1250-REAL16: v_sqrt_bf16_e64_dpp v5.l, v1.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xfa,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff]
+# GFX1250-FAKE16: v_sqrt_bf16_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xfa,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff]
+
+0x05,0x00,0xfa,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff
+# GFX1250-REAL16: v_sqrt_bf16_e64_dpp v5.l, v1.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xfa,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff]
+# GFX1250-FAKE16: v_sqrt_bf16_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xfa,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff]
+
+0x05,0x00,0xfa,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff
+# GFX1250-REAL16: v_sqrt_bf16_e64_dpp v5.l, v1.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xfa,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff]
+# GFX1250-FAKE16: v_sqrt_bf16_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xfa,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff]
+
+0x05,0x00,0xfa,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff
+# GFX1250-REAL16: v_sqrt_bf16_e64_dpp v5.l, v1.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xfa,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff]
+# GFX1250-FAKE16: v_sqrt_bf16_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xfa,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff]
+
+0x05,0x00,0xfa,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff
+# GFX1250-REAL16: v_sqrt_bf16_e64_dpp v5.l, v1.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xfa,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff]
+# GFX1250-FAKE16: v_sqrt_bf16_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xfa,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff]
+
+0x05,0x00,0xfa,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff
+# GFX1250-REAL16: v_sqrt_bf16_e64_dpp v5.l, v1.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xfa,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff]
+# GFX1250-FAKE16: v_sqrt_bf16_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xfa,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff]
+
+0x05,0x00,0xfa,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff
+# GFX1250-REAL16: v_sqrt_bf16_e64_dpp v5.l, v1.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xfa,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff]
+# GFX1250-FAKE16: v_sqrt_bf16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xfa,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff]
+
+0x05,0x00,0xfa,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff
+# GFX1250-REAL16: v_sqrt_bf16_e64_dpp v5.l, v1.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xfa,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff]
+# GFX1250-FAKE16: v_sqrt_bf16_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xfa,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff]
+
+0x05,0x00,0xfa,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff
+# GFX1250-REAL16: v_sqrt_bf16_e64_dpp v5.l, v1.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xfa,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff]
+# GFX1250-FAKE16: v_sqrt_bf16_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xfa,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff]
+
+0x05,0x00,0xfa,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff
+# GFX1250-REAL16: v_sqrt_bf16_e64_dpp v5.l, v1.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xfa,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff]
+# GFX1250-FAKE16: v_sqrt_bf16_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xfa,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff]
+
+0x05,0x00,0xfa,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff
+# GFX1250-REAL16: v_sqrt_bf16_e64_dpp v5.l, v1.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xfa,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff]
+# GFX1250-FAKE16: v_sqrt_bf16_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xfa,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff]
+
+0x05,0x48,0xfa,0xd5,0xfa,0x00,0x00,0x00,0x80,0x1b,0x00,0xff
+# GFX1250-REAL16: v_sqrt_bf16_e64_dpp v5.h, v128.h op_sel:[1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x48,0xfa,0xd5,0xfa,0x00,0x00,0x00,0x80,0x1b,0x00,0xff]
+# GFX1250-FAKE16: v_sqrt_bf16_e64_dpp v5, v128 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xfa,0xd5,0xfa,0x00,0x00,0x00,0x80,0x1b,0x00,0xff]
+
0xff,0x81,0xca,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x05,0x30
# GFX1250-REAL16: v_tanh_bf16_e64_dpp v255.l, -|v255.l| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x81,0xca,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x05,0x30]
# GFX1250-FAKE16: v_tanh_bf16_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x81,0xca,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x05,0x30]
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3_from_vop1_dpp8.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3_from_vop1_dpp8.txt
index fe626884691fd..18959f8dec20a 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3_from_vop1_dpp8.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3_from_vop1_dpp8.txt
@@ -22,6 +22,26 @@
# GFX1250-REAL16: v_rcp_bf16_e64_dpp v5.h, v128.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x48,0xf9,0xd5,0xe9,0x00,0x00,0x00,0x80,0x77,0x39,0x05]
# GFX1250-FAKE16: v_rcp_bf16_e64_dpp v5, v128 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xf9,0xd5,0xe9,0x00,0x00,0x00,0x80,0x77,0x39,0x05]
+0xff,0x81,0xfa,0xd5,0xe9,0x00,0x00,0x38,0xff,0x00,0x00,0x00
+# GFX1250-REAL16: v_sqrt_bf16_e64_dpp v255.l, -|v255.l| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x81,0xfa,0xd5,0xe9,0x00,0x00,0x38,0xff,0x00,0x00,0x00]
+# GFX1250-FAKE16: v_sqrt_bf16_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x81,0xfa,0xd5,0xe9,0x00,0x00,0x38,0xff,0x00,0x00,0x00]
+
+0x05,0x00,0xfa,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05
+# GFX1250-REAL16: v_sqrt_bf16_e64_dpp v5.l, v1.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xfa,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05]
+# GFX1250-FAKE16: v_sqrt_bf16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xfa,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05]
+
+0x05,0x00,0xfa,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05
+# GFX1250-REAL16: v_sqrt_bf16_e64_dpp v5.l, v1.l mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xfa,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05]
+# GFX1250-FAKE16: v_sqrt_bf16_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xfa,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05]
+
+0x05,0x00,0xfa,0xd5,0xea,0x00,0x00,0x10,0x01,0x77,0x39,0x05
+# GFX1250-REAL16: v_sqrt_bf16_e64_dpp v5.l, v1.l mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x00,0xfa,0xd5,0xea,0x00,0x00,0x10,0x01,0x77,0x39,0x05]
+# GFX1250-FAKE16: v_sqrt_bf16_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x00,0xfa,0xd5,0xea,0x00,0x00,0x10,0x01,0x77,0x39,0x05]
+
+0x05,0x48,0xfa,0xd5,0xe9,0x00,0x00,0x00,0x80,0x77,0x39,0x05
+# GFX1250-REAL16: v_sqrt_bf16_e64_dpp v5.h, v128.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x48,0xfa,0xd5,0xe9,0x00,0x00,0x00,0x80,0x77,0x39,0x05]
+# GFX1250-FAKE16: v_sqrt_bf16_e64_dpp v5, v128 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xfa,0xd5,0xe9,0x00,0x00,0x00,0x80,0x77,0x39,0x05]
+
0xff,0x81,0xca,0xd5,0xe9,0x00,0x00,0x38,0xff,0x00,0x00,0x00
# GFX1250-REAL16: v_tanh_bf16_e64_dpp v255.l, -|v255.l| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x81,0xca,0xd5,0xe9,0x00,0x00,0x38,0xff,0x00,0x00,0x00]
# GFX1250-FAKE16: v_tanh_bf16_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x81,0xca,0xd5,0xe9,0x00,0x00,0x38,0xff,0x00,0x00,0x00]
More information about the llvm-branch-commits
mailing list