[clang] [llvm] [AMDGPU] Add support for `v_rcp_bf16` on gfx1250 (PR #148916)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Jul 15 11:28:30 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-clang-codegen
Author: Shilei Tian (shiltian)
<details>
<summary>Changes</summary>
Co-authored-by: Mekhanoshin, Stanislav <Stanislav.Mekhanoshin@<!-- -->amd.com>
---
Patch is 67.39 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/148916.diff
23 Files Affected:
- (modified) clang/include/clang/Basic/BuiltinsAMDGPU.def (+1)
- (modified) clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp (+1)
- (modified) clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250.cl (+19)
- (modified) llvm/lib/Target/AMDGPU/VOP1Instructions.td (+2)
- (added) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.rcp.bf16.ll (+36)
- (modified) llvm/test/MC/AMDGPU/gfx1250_asm_vop1-fake16.s (+45)
- (modified) llvm/test/MC/AMDGPU/gfx1250_asm_vop1.s (+48)
- (modified) llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp16-fake16.s (+56)
- (modified) llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp16.s (+60)
- (modified) llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp8-fake16.s (+12)
- (modified) llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp8.s (+16)
- (modified) llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1-fake16.s (+45)
- (modified) llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1.s (+48)
- (modified) llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1_dpp16-fake16.s (+56)
- (modified) llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1_dpp16.s (+60)
- (modified) llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1_dpp8-fake16.s (+16)
- (modified) llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1_dpp8.s (+20)
- (modified) llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop1.txt (+63)
- (modified) llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop1_dpp16.txt (+59)
- (modified) llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop1_dpp8.txt (+16)
- (modified) llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3_from_vop1.txt (+64)
- (modified) llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3_from_vop1_dpp16.txt (+60)
- (modified) llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3_from_vop1_dpp8.txt (+20)
``````````diff
diff --git a/clang/include/clang/Basic/BuiltinsAMDGPU.def b/clang/include/clang/Basic/BuiltinsAMDGPU.def
index ab432b7a8ad58..71e4b3486167a 100644
--- a/clang/include/clang/Basic/BuiltinsAMDGPU.def
+++ b/clang/include/clang/Basic/BuiltinsAMDGPU.def
@@ -669,6 +669,7 @@ TARGET_BUILTIN(__builtin_amdgcn_s_wait_asynccnt, "vIUs", "n", "gfx1250-insts")
TARGET_BUILTIN(__builtin_amdgcn_s_wait_tensorcnt, "vIUs", "n", "gfx1250-insts")
TARGET_BUILTIN(__builtin_amdgcn_tanh_bf16, "yy", "nc", "bf16-trans-insts")
+TARGET_BUILTIN(__builtin_amdgcn_rcp_bf16, "yy", "nc", "bf16-trans-insts")
TARGET_BUILTIN(__builtin_amdgcn_cvt_f16_fp8, "hiIi", "nc", "gfx1250-insts")
TARGET_BUILTIN(__builtin_amdgcn_cvt_f16_bf8, "hiIi", "nc", "gfx1250-insts")
diff --git a/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp b/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp
index bd44874eac470..0d8c2ed284994 100644
--- a/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp
+++ b/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp
@@ -411,6 +411,7 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
case AMDGPU::BI__builtin_amdgcn_rcp:
case AMDGPU::BI__builtin_amdgcn_rcpf:
case AMDGPU::BI__builtin_amdgcn_rcph:
+ case AMDGPU::BI__builtin_amdgcn_rcp_bf16:
return emitBuiltinWithOneOverloadedType<1>(*this, E, Intrinsic::amdgcn_rcp);
case AMDGPU::BI__builtin_amdgcn_sqrt:
case AMDGPU::BI__builtin_amdgcn_sqrtf:
diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250.cl
index 830ceabb1cb29..e50f02ad27357 100644
--- a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250.cl
+++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250.cl
@@ -61,6 +61,25 @@ void test_tanh_bf16(global __bf16* out, __bf16 a)
*out = __builtin_amdgcn_tanh_bf16(a);
}
+// CHECK-LABEL: @test_rcp_bf16(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
+// CHECK-NEXT: [[A_ADDR:%.*]] = alloca bfloat, align 2, addrspace(5)
+// CHECK-NEXT: [[OUT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[OUT_ADDR]] to ptr
+// CHECK-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+// CHECK-NEXT: store ptr addrspace(1) [[OUT:%.*]], ptr [[OUT_ADDR_ASCAST]], align 8
+// CHECK-NEXT: store bfloat [[A:%.*]], ptr [[A_ADDR_ASCAST]], align 2
+// CHECK-NEXT: [[TMP0:%.*]] = load bfloat, ptr [[A_ADDR_ASCAST]], align 2
+// CHECK-NEXT: [[TMP1:%.*]] = call bfloat @llvm.amdgcn.rcp.bf16(bfloat [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR_ASCAST]], align 8
+// CHECK-NEXT: store bfloat [[TMP1]], ptr addrspace(1) [[TMP2]], align 2
+// CHECK-NEXT: ret void
+//
+void test_rcp_bf16(global __bf16* out, __bf16 a)
+{
+ *out = __builtin_amdgcn_rcp_bf16(a);
+}
+
// CHECK-LABEL: @test_cvt_f16_fp8(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
diff --git a/llvm/lib/Target/AMDGPU/VOP1Instructions.td b/llvm/lib/Target/AMDGPU/VOP1Instructions.td
index 57857c688283d..28f239ba8c396 100644
--- a/llvm/lib/Target/AMDGPU/VOP1Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP1Instructions.td
@@ -529,6 +529,7 @@ defm V_COS_F16 : VOP1Inst_t16 <"v_cos_f16", VOP_F16_F16, AMDGPUcos>;
let SubtargetPredicate = HasBF16TransInsts in {
defm V_TANH_BF16 : VOP1Inst_t16 <"v_tanh_bf16", VOP_BF16_BF16, int_amdgcn_tanh>;
+defm V_RCP_BF16 : VOP1Inst_t16 <"v_rcp_bf16", VOP_BF16_BF16, AMDGPUrcp>;
}
} // End TRANS = 1, SchedRW = [WriteTrans32]
defm V_FREXP_MANT_F16 : VOP1Inst_t16 <"v_frexp_mant_f16", VOP_F16_F16, int_amdgcn_frexp_mant>;
@@ -1137,6 +1138,7 @@ defm V_CVT_PK_F16_FP8 : VOP1_Real_FULL_t16_and_fake16_gfx1250<0x075>;
defm V_CVT_PK_F16_BF8 : VOP1_Real_FULL_t16_and_fake16_gfx1250<0x076>;
defm V_CVT_F16_FP8 : VOP1_Real_FULL_t16_and_fake16_gfx1250<0x077>;
defm V_CVT_F16_BF8 : VOP1_Real_FULL_t16_and_fake16_gfx1250<0x078>;
+defm V_RCP_BF16 : VOP1_Real_FULL_t16_and_fake16_gfx1250<0x079>;
//===----------------------------------------------------------------------===//
// GFX10.
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.rcp.bf16.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.rcp.bf16.ll
new file mode 100644
index 0000000000000..3c49d0b9c01b1
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.rcp.bf16.ll
@@ -0,0 +1,36 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; xUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1250 -mattr=+real-true16 < %s | FileCheck -check-prefix=SDAG-TRUE16 %s
+; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1250 -mattr=-real-true16 < %s | FileCheck -check-prefix=SDAG-FAKE16 %s
+; xUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1250 -mattr=+real-true16 < %s | FileCheck -check-prefix=GI-TRUE16 %s
+; xUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1250 -mattr=-real-true16 < %s | FileCheck -check-prefix=GI-FAKE16 %s
+
+; FIXME: t16 doesn't work at the moment because the store of s16 under t16 mode fails to select.
+; FIXME: GlobalISel does not work with bf16
+
+declare bfloat @llvm.amdgcn.rcp.bf16(bfloat) #0
+
+define amdgpu_kernel void @rcp_bf16(ptr addrspace(1) %out, bfloat %src) #1 {
+; SDAG-TRUE16-LABEL: rcp_bf16:
+; SDAG-TRUE16: ; %bb.0:
+; SDAG-TRUE16-NEXT: s_load_b96 s[0:2], s[4:5], 0x0
+; SDAG-TRUE16-NEXT: v_mov_b32_e32 v1, 0
+; SDAG-TRUE16-NEXT: s_wait_kmcnt 0x0
+; SDAG-TRUE16-NEXT: v_rcp_bf16_e32 v0.l, s2
+; SDAG-TRUE16-NEXT: flat_store_b16 v1, v0, s[0:1]
+; SDAG-TRUE16-NEXT: s_endpgm
+;
+; SDAG-FAKE16-LABEL: rcp_bf16:
+; SDAG-FAKE16: ; %bb.0:
+; SDAG-FAKE16-NEXT: s_load_b96 s[0:2], s[4:5], 0x0
+; SDAG-FAKE16-NEXT: v_mov_b32_e32 v1, 0
+; SDAG-FAKE16-NEXT: s_wait_kmcnt 0x0
+; SDAG-FAKE16-NEXT: v_rcp_bf16_e32 v0, s2
+; SDAG-FAKE16-NEXT: global_store_b16 v1, v0, s[0:1]
+; SDAG-FAKE16-NEXT: s_endpgm
+ %rcp = call bfloat @llvm.amdgcn.rcp.bf16(bfloat %src) #0
+ store bfloat %rcp, ptr addrspace(1) %out, align 2
+ ret void
+}
+
+attributes #0 = { nounwind readnone }
+attributes #1 = { nounwind }
diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1-fake16.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1-fake16.s
index cf8ad0e8b7b65..ce8f54a7ef9fc 100644
--- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1-fake16.s
+++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1-fake16.s
@@ -73,6 +73,51 @@ v_tanh_bf16 v5, src_scc
v_tanh_bf16 v127, 0x8000
// GFX1250: v_tanh_bf16_e32 v127, 0x8000 ; encoding: [0xff,0x94,0xfe,0x7e,0x00,0x80,0x00,0x00]
+v_rcp_bf16 v5, v1
+// GFX1250: v_rcp_bf16_e32 v5, v1 ; encoding: [0x01,0xf3,0x0a,0x7e]
+
+v_rcp_bf16 v5, v127
+// GFX1250: v_rcp_bf16_e32 v5, v127 ; encoding: [0x7f,0xf3,0x0a,0x7e]
+
+v_rcp_bf16 v5, s1
+// GFX1250: v_rcp_bf16_e32 v5, s1 ; encoding: [0x01,0xf2,0x0a,0x7e]
+
+v_rcp_bf16 v5, s105
+// GFX1250: v_rcp_bf16_e32 v5, s105 ; encoding: [0x69,0xf2,0x0a,0x7e]
+
+v_rcp_bf16 v5, vcc_lo
+// GFX1250: v_rcp_bf16_e32 v5, vcc_lo ; encoding: [0x6a,0xf2,0x0a,0x7e]
+
+v_rcp_bf16 v5, vcc_hi
+// GFX1250: v_rcp_bf16_e32 v5, vcc_hi ; encoding: [0x6b,0xf2,0x0a,0x7e]
+
+v_rcp_bf16 v5, ttmp15
+// GFX1250: v_rcp_bf16_e32 v5, ttmp15 ; encoding: [0x7b,0xf2,0x0a,0x7e]
+
+v_rcp_bf16 v5, m0
+// GFX1250: v_rcp_bf16_e32 v5, m0 ; encoding: [0x7d,0xf2,0x0a,0x7e]
+
+v_rcp_bf16 v5, exec_lo
+// GFX1250: v_rcp_bf16_e32 v5, exec_lo ; encoding: [0x7e,0xf2,0x0a,0x7e]
+
+v_rcp_bf16 v5, exec_hi
+// GFX1250: v_rcp_bf16_e32 v5, exec_hi ; encoding: [0x7f,0xf2,0x0a,0x7e]
+
+v_rcp_bf16 v5, null
+// GFX1250: v_rcp_bf16_e32 v5, null ; encoding: [0x7c,0xf2,0x0a,0x7e]
+
+v_rcp_bf16 v5, -1
+// GFX1250: v_rcp_bf16_e32 v5, -1 ; encoding: [0xc1,0xf2,0x0a,0x7e]
+
+v_rcp_bf16 v5, 0.5
+// GFX1250: v_rcp_bf16_e32 v5, 0.5 ; encoding: [0xf0,0xf2,0x0a,0x7e]
+
+v_rcp_bf16 v5, src_scc
+// GFX1250: v_rcp_bf16_e32 v5, src_scc ; encoding: [0xfd,0xf2,0x0a,0x7e]
+
+v_rcp_bf16 v127, 0x8000
+// GFX1250: v_rcp_bf16_e32 v127, 0x8000 ; encoding: [0xff,0xf2,0xfe,0x7e,0x00,0x80,0x00,0x00]
+
v_cvt_f32_bf16 v5, v1
// GFX1250: v_cvt_f32_bf16_e32 v5, v1 ; encoding: [0x01,0xe5,0x0a,0x7e]
diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1.s
index 055f540fc2203..7001a1f1c4622 100644
--- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1.s
+++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1.s
@@ -76,6 +76,54 @@ v_tanh_bf16 v127, 0x8000
v_tanh_bf16 v5.h, v1.h
// GFX1250: v_tanh_bf16_e32 v5.h, v1.h ; encoding: [0x81,0x95,0x0a,0x7f]
+v_rcp_bf16 v5, v1
+// GFX1250: v_rcp_bf16_e32 v5, v1 ; encoding: [0x01,0xf3,0x0a,0x7e]
+
+v_rcp_bf16 v5, v127
+// GFX1250: v_rcp_bf16_e32 v5, v127 ; encoding: [0x7f,0xf3,0x0a,0x7e]
+
+v_rcp_bf16 v5, s1
+// GFX1250: v_rcp_bf16_e32 v5, s1 ; encoding: [0x01,0xf2,0x0a,0x7e]
+
+v_rcp_bf16 v5, s105
+// GFX1250: v_rcp_bf16_e32 v5, s105 ; encoding: [0x69,0xf2,0x0a,0x7e]
+
+v_rcp_bf16 v5, vcc_lo
+// GFX1250: v_rcp_bf16_e32 v5, vcc_lo ; encoding: [0x6a,0xf2,0x0a,0x7e]
+
+v_rcp_bf16 v5, vcc_hi
+// GFX1250: v_rcp_bf16_e32 v5, vcc_hi ; encoding: [0x6b,0xf2,0x0a,0x7e]
+
+v_rcp_bf16 v5, ttmp15
+// GFX1250: v_rcp_bf16_e32 v5, ttmp15 ; encoding: [0x7b,0xf2,0x0a,0x7e]
+
+v_rcp_bf16 v5, m0
+// GFX1250: v_rcp_bf16_e32 v5, m0 ; encoding: [0x7d,0xf2,0x0a,0x7e]
+
+v_rcp_bf16 v5, exec_lo
+// GFX1250: v_rcp_bf16_e32 v5, exec_lo ; encoding: [0x7e,0xf2,0x0a,0x7e]
+
+v_rcp_bf16 v5, exec_hi
+// GFX1250: v_rcp_bf16_e32 v5, exec_hi ; encoding: [0x7f,0xf2,0x0a,0x7e]
+
+v_rcp_bf16 v5, null
+// GFX1250: v_rcp_bf16_e32 v5, null ; encoding: [0x7c,0xf2,0x0a,0x7e]
+
+v_rcp_bf16 v5, -1
+// GFX1250: v_rcp_bf16_e32 v5, -1 ; encoding: [0xc1,0xf2,0x0a,0x7e]
+
+v_rcp_bf16 v5, 0.5
+// GFX1250: v_rcp_bf16_e32 v5, 0.5 ; encoding: [0xf0,0xf2,0x0a,0x7e]
+
+v_rcp_bf16 v5, src_scc
+// GFX1250: v_rcp_bf16_e32 v5, src_scc ; encoding: [0xfd,0xf2,0x0a,0x7e]
+
+v_rcp_bf16 v127, 0x8000
+// GFX1250: v_rcp_bf16_e32 v127, 0x8000 ; encoding: [0xff,0xf2,0xfe,0x7e,0x00,0x80,0x00,0x00]
+
+v_rcp_bf16 v5.h, v1.h
+// GFX1250: v_rcp_bf16_e32 v5.h, v1.h ; encoding: [0x81,0xf3,0x0a,0x7f]
+
v_cvt_f32_bf16 v5, v1
// GFX1250: v_cvt_f32_bf16_e32 v5, v1 ; encoding: [0x01,0xe5,0x0a,0x7e]
diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp16-fake16.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp16-fake16.s
index 4e5754f3961c1..3de8fc29bb01a 100644
--- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp16-fake16.s
+++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp16-fake16.s
@@ -58,6 +58,62 @@ v_tanh_bf16 v127, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 f
// GFX1250: v_tanh_bf16_dpp v127, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0x94,0xfe,0x7e,0x7f,0x6f,0x35,0x30]
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+v_rcp_bf16 v5, v1 quad_perm:[3,2,1,0]
+// GFX1250: v_rcp_bf16_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xf2,0x0a,0x7e,0x01,0x1b,0x00,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_rcp_bf16 v5, v1 quad_perm:[0,1,2,3]
+// GFX1250: v_rcp_bf16_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xf2,0x0a,0x7e,0x01,0xe4,0x00,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_rcp_bf16 v5, v1 row_mirror
+// GFX1250: v_rcp_bf16_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xf2,0x0a,0x7e,0x01,0x40,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_rcp_bf16 v5, v1 row_half_mirror
+// GFX1250: v_rcp_bf16_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xf2,0x0a,0x7e,0x01,0x41,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_rcp_bf16 v5, v1 row_shl:1
+// GFX1250: v_rcp_bf16_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xf2,0x0a,0x7e,0x01,0x01,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_rcp_bf16 v5, v1 row_shl:15
+// GFX1250: v_rcp_bf16_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xf2,0x0a,0x7e,0x01,0x0f,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_rcp_bf16 v5, v1 row_shr:1
+// GFX1250: v_rcp_bf16_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xf2,0x0a,0x7e,0x01,0x11,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_rcp_bf16 v5, v1 row_shr:15
+// GFX1250: v_rcp_bf16_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xf2,0x0a,0x7e,0x01,0x1f,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_rcp_bf16 v5, v1 row_ror:1
+// GFX1250: v_rcp_bf16_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xf2,0x0a,0x7e,0x01,0x21,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_rcp_bf16 v5, v1 row_ror:15
+// GFX1250: v_rcp_bf16_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xf2,0x0a,0x7e,0x01,0x2f,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_rcp_bf16 v5, v1 row_share:0 row_mask:0xf bank_mask:0xf
+// GFX1250: v_rcp_bf16_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xf2,0x0a,0x7e,0x01,0x50,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_rcp_bf16 v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX1250: v_rcp_bf16_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0xf2,0x0a,0x7e,0x01,0x5f,0x01,0x01]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_rcp_bf16 v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX1250: v_rcp_bf16_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0xf2,0x0a,0x7e,0x01,0x60,0x09,0x13]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_rcp_bf16 v127, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX1250: v_rcp_bf16_dpp v127, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xf2,0xfe,0x7e,0x7f,0x6f,0x35,0x30]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
v_cvt_f32_bf16 v5, v1 quad_perm:[3,2,1,0]
// GFX1250: v_cvt_f32_bf16_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xe4,0x0a,0x7e,0x01,0x1b,0x00,0xff]
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp16.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp16.s
index a6787254ae60f..4632b1574731b 100644
--- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp16.s
+++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp16.s
@@ -62,6 +62,66 @@ v_tanh_bf16 v5.h, v1.h quad_perm:[3,2,1,0]
// GFX1250: v_tanh_bf16_dpp v5.h, v1.h quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x94,0x0a,0x7f,0x81,0x1b,0x00,0xff]
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+v_rcp_bf16 v5, v1 quad_perm:[3,2,1,0]
+// GFX1250: v_rcp_bf16_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xf2,0x0a,0x7e,0x01,0x1b,0x00,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_rcp_bf16 v5, v1 quad_perm:[0,1,2,3]
+// GFX1250: v_rcp_bf16_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xf2,0x0a,0x7e,0x01,0xe4,0x00,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_rcp_bf16 v5, v1 row_mirror
+// GFX1250: v_rcp_bf16_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xf2,0x0a,0x7e,0x01,0x40,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_rcp_bf16 v5, v1 row_half_mirror
+// GFX1250: v_rcp_bf16_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xf2,0x0a,0x7e,0x01,0x41,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_rcp_bf16 v5, v1 row_shl:1
+// GFX1250: v_rcp_bf16_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xf2,0x0a,0x7e,0x01,0x01,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_rcp_bf16 v5, v1 row_shl:15
+// GFX1250: v_rcp_bf16_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xf2,0x0a,0x7e,0x01,0x0f,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_rcp_bf16 v5, v1 row_shr:1
+// GFX1250: v_rcp_bf16_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xf2,0x0a,0x7e,0x01,0x11,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_rcp_bf16 v5, v1 row_shr:15
+// GFX1250: v_rcp_bf16_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xf2,0x0a,0x7e,0x01,0x1f,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_rcp_bf16 v5, v1 row_ror:1
+// GFX1250: v_rcp_bf16_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xf2,0x0a,0x7e,0x01,0x21,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_rcp_bf16 v5, v1 row_ror:15
+// GFX1250: v_rcp_bf16_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xf2,0x0a,0x7e,0x01,0x2f,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_rcp_bf16 v5, v1 row_share:0 row_mask:0xf bank_mask:0xf
+// GFX1250: v_rcp_bf16_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xf2,0x0a,0x7e,0x01,0x50,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_rcp_bf16 v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX1250: v_rcp_bf16_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0xf2,0x0a,0x7e,0x01,0x5f,0x01,0x01]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_rcp_bf16 v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX1250: v_rcp_bf16_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0xf2,0x0a,0x7e,0x01,0x60,0x09,0x13]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_rcp_bf16 v127, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX1250: v_rcp_bf16_dpp v127, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xf2,0xfe,0x7e,0x7f,0x6f,0x35,0x30]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_rcp_bf16 v5.h, v1.h quad_perm:[3,2,1,0]
+// GFX1250: v_rcp_bf16_dpp v5.h, v1.h quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xf2,0x0a,0x7f,0x81,0x1b,0x00,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
v_cvt_f32_bf16 v5, v1 quad_perm:[3,2,1,0]
// GFX1250: v_cvt_f32_bf16_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xe4,0x0a,0x7e,0x01,0x1b,0x00,0xff]
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp8-fake16.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp8-fake16.s
index e6c35d5e3b863..1a6028ad32bcf 100644
--- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp8-fake16.s
+++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp8-fake16.s
@@ -14,6 +14,18 @@ v_tanh_bf16 v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
// GFX1250: v_tanh_bf16_dpp v127, v127 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0x94,0xfe,0x7e,0x7f,0x00,0x00,0x00]
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+v_rcp_bf16 v5, v1 dpp8:[7,6,5,4,3,2,1,0]
+// GFX1250: v_rcp_bf16_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xf2,0x0a,0x7e,0x01,0x77,0x39,0x05]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_rcp_bf16 v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX1250: v_rc...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/148916
More information about the llvm-commits
mailing list