[llvm-branch-commits] [llvm] AMDGPU: Add V_CVT_F32_BF16 for gfx950 (PR #116311)
Matt Arsenault via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Thu Nov 14 17:44:22 PST 2024
https://github.com/arsenm created https://github.com/llvm/llvm-project/pull/116311
None
>From 0451d01ded8d5669bf1b46071aeb1703484acac1 Mon Sep 17 00:00:00 2001
From: Pravin Jagtap <Pravin.Jagtap at amd.com>
Date: Wed, 31 Jan 2024 07:27:18 -0500
Subject: [PATCH] AMDGPU: Add V_CVT_F32_BF16 for gfx950
---
llvm/lib/Target/AMDGPU/AMDGPU.td | 12 +-
llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h | 5 +
llvm/lib/Target/AMDGPU/SIInstrInfo.td | 1 +
llvm/lib/Target/AMDGPU/VOP1Instructions.td | 6 +
llvm/test/MC/AMDGPU/gfx950_asm_vop1.s | 75 +++++++++++-
.../Disassembler/AMDGPU/gfx950_dasm_vop1.txt | 110 +++++++++++++++++-
6 files changed, 206 insertions(+), 3 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td
index 9ff51287eb19a4..d722cede255174 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.td
@@ -438,6 +438,12 @@ def FeatureRealTrue16Insts : SubtargetFeature<"real-true16",
"Use true 16-bit registers"
>;
+def FeatureBF16ConversionInsts : SubtargetFeature<"bf16-cvt-insts",
+ "HasBF16ConversionInsts",
+ "true",
+ "Has bf16 conversion instructions"
+>;
+
def FeatureVOP3P : SubtargetFeature<"vop3p",
"HasVOP3PInsts",
"true",
@@ -1502,7 +1508,8 @@ def FeatureISAVersion9_5_Common : FeatureSet<
FeatureCvtFP8VOP1Bug,
FeatureGFX950Insts,
FeatureAddressableLocalMemorySize163840,
- FeaturePrngInst
+ FeaturePrngInst,
+ FeatureBF16ConversionInsts
])>;
def FeatureISAVersion9_4_0 : FeatureSet<
@@ -2138,6 +2145,9 @@ def UseFakeTrue16Insts : True16PredicateClass<"Subtarget->hasTrue16BitInsts() &&
// FIXME When we default to RealTrue16 instead of Fake, change the line as follows.
// AssemblerPredicate<(all_of FeatureTrue16BitInsts, (not FeatureRealTrue16Insts))>;
+def HasBF16ConversionInsts : Predicate<"Subtarget->hasBF16ConversionInsts()">,
+ AssemblerPredicate<(all_of FeatureBF16ConversionInsts)>;
+
def HasVOP3PInsts : Predicate<"Subtarget->hasVOP3PInsts()">,
AssemblerPredicate<(all_of FeatureVOP3P)>;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
index 334322f533e54a..ece26a4adb3754 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
@@ -51,6 +51,7 @@ class AMDGPUSubtarget {
bool Has16BitInsts = false;
bool HasTrue16BitInsts = false;
bool EnableRealTrue16Insts = false;
+ bool HasBF16ConversionInsts = false;
bool HasMadMixInsts = false;
bool HasMadMacF32Insts = false;
bool HasDsSrc2Insts = false;
@@ -166,6 +167,10 @@ class AMDGPUSubtarget {
// supported and the support for fake True16 instructions is removed.
bool useRealTrue16Insts() const;
+ bool hasBF16ConversionInsts() const {
+ return HasBF16ConversionInsts;
+ }
+
bool hasMadMixInsts() const {
return HasMadMixInsts;
}
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
index 9f8e6a082d9652..71bd5ece32bc48 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
@@ -2774,6 +2774,7 @@ def VOP_I32_I32 : VOPProfile <[i32, i32, untyped, untyped]>;
def VOP_F16_F32 : VOPProfile <[f16, f32, untyped, untyped]>;
def VOP_F32_F16 : VOPProfile <[f32, f16, untyped, untyped]>;
def VOP_I64_I64 : VOPProfile <[i64, i64, untyped, untyped]>;
+def VOP_F32_BF16 : VOPProfile <[f32, bf16, untyped, untyped]>;
def VOP_F32_F32_F16 : VOPProfile <[f32, f32, f16, untyped]>;
def VOP_F32_F32_F32 : VOPProfile <[f32, f32, f32, untyped]>;
diff --git a/llvm/lib/Target/AMDGPU/VOP1Instructions.td b/llvm/lib/Target/AMDGPU/VOP1Instructions.td
index e99f562688926d..1ff065f0629161 100644
--- a/llvm/lib/Target/AMDGPU/VOP1Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP1Instructions.td
@@ -311,6 +311,9 @@ let OtherPredicates = [UseRealTrue16Insts] in
let OtherPredicates = [UseFakeTrue16Insts] in
defm V_CVT_F32_F16_fake16 : VOP1Inst <"v_cvt_f32_f16_fake16", VOPProfile_Fake16<VOP_F32_F16>, any_fpextend>;
+let SubtargetPredicate = HasBF16ConversionInsts in
+defm V_CVT_F32_BF16 : VOP1Inst_t16 <"v_cvt_f32_bf16", VOP_F32_BF16>;
+
let ReadsModeReg = 0, mayRaiseFPException = 0 in {
defm V_CVT_RPI_I32_F32 : VOP1Inst <"v_cvt_rpi_i32_f32", VOP_I32_F32, cvt_rpi_i32_f32>;
defm V_CVT_FLR_I32_F32 : VOP1Inst <"v_cvt_flr_i32_f32", VOP_I32_F32, cvt_flr_i32_f32>;
@@ -1514,6 +1517,9 @@ defm V_SCREEN_PARTITION_4SE_B32 : VOP1_Real_gfx9 <0x37>;
let AssemblerPredicate = isGFX940Plus in
defm V_MOV_B64 : VOP1_Real_gfx9 <0x38>;
+let AssemblerPredicate = HasGFX950Insts in
+defm V_CVT_F32_BF16 : VOP1_Real_gfx9 <0x5b>;
+
defm V_CVT_F32_FP8 : VOP1_Real_NoDstSel_SDWA_gfx9<0x54>;
defm V_CVT_F32_BF8 : VOP1_Real_NoDstSel_SDWA_gfx9<0x55>;
defm V_CVT_PK_F32_FP8 : VOP1_Real_NoDstSel_SDWA_gfx9<0x56>;
diff --git a/llvm/test/MC/AMDGPU/gfx950_asm_vop1.s b/llvm/test/MC/AMDGPU/gfx950_asm_vop1.s
index 0cb292ffe63dde..66dae85ee8e3e5 100644
--- a/llvm/test/MC/AMDGPU/gfx950_asm_vop1.s
+++ b/llvm/test/MC/AMDGPU/gfx950_asm_vop1.s
@@ -1,4 +1,5 @@
-// RUN: llvm-mc -arch=amdgcn -mcpu=gfx950 -show-encoding %s | FileCheck --check-prefix=GFX950 %s
+// RUN: llvm-mc -triple=amdgcn -mcpu=gfx950 -show-encoding %s | FileCheck --check-prefix=GFX950 %s
+// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx940 %s 2>&1 | FileCheck -check-prefix=GFX940-ERR --strict-whitespace %s
v_prng_b32 v5, v1
// GFX950: v_prng_b32_e32 v5, v1 ; encoding: [0x01,0xb1,0x0a,0x7e]
@@ -55,3 +56,75 @@ v_prng_b32 v5, src_scc
v_prng_b32 v255, 0xaf123456
// GFX950: v_prng_b32_e32 v255, 0xaf123456 ; encoding: [0xff,0xb0,0xfe,0x7f,0x56,0x34,0x12,0xaf]
// GFX940-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+
+v_cvt_f32_bf16 v5, v1
+// GFX950: v_cvt_f32_bf16_e32 v5, v1 ; encoding: [0x01,0xb7,0x0a,0x7e]
+// GFX940-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+
+v_cvt_f32_bf16 v5, v127
+// GFX950: v_cvt_f32_bf16_e32 v5, v127 ; encoding: [0x7f,0xb7,0x0a,0x7e]
+// GFX940-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+
+v_cvt_f32_bf16 v5, s1
+// GFX950: v_cvt_f32_bf16_e32 v5, s1 ; encoding: [0x01,0xb6,0x0a,0x7e]
+// GFX940-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+
+v_cvt_f32_bf16 v5, vcc_lo
+// GFX950: v_cvt_f32_bf16_e32 v5, vcc_lo ; encoding: [0x6a,0xb6,0x0a,0x7e]
+// GFX940-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+
+v_cvt_f32_bf16 v5, vcc_hi
+// GFX950: v_cvt_f32_bf16_e32 v5, vcc_hi ; encoding: [0x6b,0xb6,0x0a,0x7e]
+// GFX940-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+
+v_cvt_f32_bf16 v5, ttmp15
+// GFX950: v_cvt_f32_bf16_e32 v5, ttmp15 ; encoding: [0x7b,0xb6,0x0a,0x7e]
+// GFX940-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+
+v_cvt_f32_bf16 v5, m0
+// GFX950: v_cvt_f32_bf16_e32 v5, m0 ; encoding: [0x7c,0xb6,0x0a,0x7e]
+// GFX940-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+
+v_cvt_f32_bf16 v5, exec_lo
+// GFX950: v_cvt_f32_bf16_e32 v5, exec_lo ; encoding: [0x7e,0xb6,0x0a,0x7e]
+// GFX940-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+
+v_cvt_f32_bf16 v5, exec_hi
+// GFX950: v_cvt_f32_bf16_e32 v5, exec_hi ; encoding: [0x7f,0xb6,0x0a,0x7e]
+// GFX940-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+
+v_cvt_f32_bf16 v5, -1
+// GFX950: v_cvt_f32_bf16_e32 v5, -1 ; encoding: [0xc1,0xb6,0x0a,0x7e]
+// GFX940-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+
+v_cvt_f32_bf16 v5, 0.5
+// GFX950: v_cvt_f32_bf16_e32 v5, 0.5 ; encoding: [0xf0,0xb6,0x0a,0x7e]
+// GFX940-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+
+v_cvt_f32_bf16 v5, src_scc
+// GFX950: v_cvt_f32_bf16_e32 v5, src_scc ; encoding: [0xfd,0xb6,0x0a,0x7e]
+// GFX940-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+
+v_cvt_f32_bf16 v127, 0x8000
+// GFX950: v_cvt_f32_bf16_e32 v127, 0x8000 ; encoding: [0xff,0xb6,0xfe,0x7e,0x00,0x80,0x00,0x00]
+// GFX940-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+
+v_cvt_f32_bf16 v5, -v1
+// GFX950: v_cvt_f32_bf16_e64 v5, -v1 ; encoding: [0x05,0x00,0x9b,0xd1,0x01,0x01,0x00,0x20]
+// GFX940-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+
+v_cvt_f32_bf16 v5, |v1|
+// GFX950: v_cvt_f32_bf16_e64 v5, |v1| ; encoding: [0x05,0x01,0x9b,0xd1,0x01,0x01,0x00,0x00]
+// GFX940-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+
+v_cvt_f32_bf16 v5, -|v1|
+// GFX950: v_cvt_f32_bf16_e64 v5, -|v1| ; encoding: [0x05,0x01,0x9b,0xd1,0x01,0x01,0x00,0x20]
+// GFX940-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+
+v_cvt_f32_bf16 v5, v1 clamp mul:2
+// GFX950: v_cvt_f32_bf16_e64 v5, v1 clamp mul:2 ; encoding: [0x05,0x80,0x9b,0xd1,0x01,0x01,0x00,0x08]
+// GFX940-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+
+v_cvt_f32_bf16_e64 v5, v1 clamp div:2
+// GFX950: v_cvt_f32_bf16_e64 v5, v1 clamp div:2 ; encoding: [0x05,0x80,0x9b,0xd1,0x01,0x01,0x00,0x18]
+// GFX940-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx950_dasm_vop1.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx950_dasm_vop1.txt
index 91ab05e99f1e7c..336a26907891a2 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx950_dasm_vop1.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx950_dasm_vop1.txt
@@ -40,4 +40,112 @@
0xfd,0xb0,0x0a,0x7e
# GFX950: v_prng_b32_e32 v255, 0xaf123456 ; encoding: [0xff,0xb0,0xfe,0x7f,0x56,0x34,0x12,0xaf]
-0xff,0xb0,0xfe,0x7f,0x56,0x34,0x12,0xaf
\ No newline at end of file
+0xff,0xb0,0xfe,0x7f,0x56,0x34,0x12,0xaf
+
+# GFX950: v_cvt_f32_bf16_e32 v5, v1 ; encoding: [0x01,0xb7,0x0a,0x7e]
+0x01,0xb7,0x0a,0x7e
+
+# GFX950: v_cvt_f32_bf16_e32 v5, v127 ; encoding: [0x7f,0xb7,0x0a,0x7e]
+0x7f,0xb7,0x0a,0x7e
+
+# GFX950: v_cvt_f32_bf16_e32 v5, s1 ; encoding: [0x01,0xb6,0x0a,0x7e]
+0x01,0xb6,0x0a,0x7e
+
+# GFX950: v_cvt_f32_bf16_e32 v5, vcc_lo ; encoding: [0x6a,0xb6,0x0a,0x7e]
+0x6a,0xb6,0x0a,0x7e
+
+# GFX950: v_cvt_f32_bf16_e32 v5, vcc_hi ; encoding: [0x6b,0xb6,0x0a,0x7e]
+0x6b,0xb6,0x0a,0x7e
+
+# GFX950: v_cvt_f32_bf16_e32 v5, ttmp15 ; encoding: [0x7b,0xb6,0x0a,0x7e]
+0x7b,0xb6,0x0a,0x7e
+
+# GFX950: v_cvt_f32_bf16_e32 v5, m0 ; encoding: [0x7c,0xb6,0x0a,0x7e]
+0x7c,0xb6,0x0a,0x7e
+
+# GFX950: v_cvt_f32_bf16_e32 v5, exec_lo ; encoding: [0x7e,0xb6,0x0a,0x7e]
+0x7e,0xb6,0x0a,0x7e
+
+# GFX950: v_cvt_f32_bf16_e32 v5, exec_hi ; encoding: [0x7f,0xb6,0x0a,0x7e]
+0x7f,0xb6,0x0a,0x7e
+
+# GFX950: v_cvt_f32_bf16_e32 v5, -1 ; encoding: [0xc1,0xb6,0x0a,0x7e]
+0xc1,0xb6,0x0a,0x7e
+
+# GFX950: v_cvt_f32_bf16_e32 v5, 0.5 ; encoding: [0xf0,0xb6,0x0a,0x7e]
+0xf0,0xb6,0x0a,0x7e
+
+# GFX950: v_cvt_f32_bf16_e32 v5, src_scc ; encoding: [0xfd,0xb6,0x0a,0x7e]
+0xfd,0xb6,0x0a,0x7e
+
+# GFX950: v_cvt_f32_bf16_e32 v127, 0x8000 ; encoding: [0xff,0xb6,0xfe,0x7e,0x00,0x80,0x00,0x00]
+0xff,0xb6,0xfe,0x7e,0x00,0x80,0x00,0x00
+
+# GFX950: v_cvt_f32_bf16_e64 v5, -v1 ; encoding: [0x05,0x00,0x9b,0xd1,0x01,0x01,0x00,0x20]
+0x05,0x00,0x9b,0xd1,0x01,0x01,0x00,0x20
+
+# GFX950: v_cvt_f32_bf16_e64 v5, |v1| ; encoding: [0x05,0x01,0x9b,0xd1,0x01,0x01,0x00,0x00]
+0x05,0x01,0x9b,0xd1,0x01,0x01,0x00,0x00
+
+# GFX950: v_cvt_f32_bf16_e64 v5, -|v1| ; encoding: [0x05,0x01,0x9b,0xd1,0x01,0x01,0x00,0x20]
+0x05,0x01,0x9b,0xd1,0x01,0x01,0x00,0x20
+
+# GFX950: v_cvt_f32_bf16_e64 v5, 0.5 clamp mul:2 ; encoding: [0x05,0x80,0x9b,0xd1,0xf0,0x00,0x00,0x08]
+0x05,0x80,0x9b,0xd1,0xf0,0x00,0x00,0x08
+
+# GFX950: v_cvt_f32_bf16_e64 v5, v1 clamp div:2 ; encoding: [0x05,0x80,0x9b,0xd1,0x01,0x01,0x00,0x18]
+0x05,0x80,0x9b,0xd1,0x01,0x01,0x00,0x18
+
+# GFX950: v_cvt_f32_bf16_e64 v5, v1 clamp div:2 ; encoding: [0x05,0x80,0x9b,0xd1,0x01,0x01,0x00,0x18]
+0x05,0x80,0x9b,0xd1,0x01,0x01,0x00,0x18
+
+# GFX950: v_cvt_f32_bf16_e32 v5, v1 ; encoding: [0x01,0xb7,0x0a,0x7e]
+0x01,0xb7,0x0a,0x7e
+
+# GFX950: v_cvt_f32_bf16_e32 v5, v127 ; encoding: [0x7f,0xb7,0x0a,0x7e]
+0x7f,0xb7,0x0a,0x7e
+
+# GFX950: v_cvt_f32_bf16_e32 v5, s1 ; encoding: [0x01,0xb6,0x0a,0x7e]
+0x01,0xb6,0x0a,0x7e
+
+# GFX950: v_cvt_f32_bf16_e32 v5, vcc_lo ; encoding: [0x6a,0xb6,0x0a,0x7e]
+0x6a,0xb6,0x0a,0x7e
+
+# GFX950: v_cvt_f32_bf16_e32 v5, vcc_hi ; encoding: [0x6b,0xb6,0x0a,0x7e]
+0x6b,0xb6,0x0a,0x7e
+
+# GFX950: v_cvt_f32_bf16_e32 v5, ttmp15 ; encoding: [0x7b,0xb6,0x0a,0x7e]
+0x7b,0xb6,0x0a,0x7e
+
+# GFX950: v_cvt_f32_bf16_e32 v5, m0 ; encoding: [0x7c,0xb6,0x0a,0x7e]
+0x7c,0xb6,0x0a,0x7e
+
+# GFX950: v_cvt_f32_bf16_e32 v5, exec_lo ; encoding: [0x7e,0xb6,0x0a,0x7e]
+0x7e,0xb6,0x0a,0x7e
+
+# GFX950: v_cvt_f32_bf16_e32 v5, exec_hi ; encoding: [0x7f,0xb6,0x0a,0x7e]
+0x7f,0xb6,0x0a,0x7e
+
+# GFX950: v_cvt_f32_bf16_e32 v5, -1 ; encoding: [0xc1,0xb6,0x0a,0x7e]
+0xc1,0xb6,0x0a,0x7e
+
+# GFX950: v_cvt_f32_bf16_e32 v5, 0.5 ; encoding: [0xf0,0xb6,0x0a,0x7e]
+0xf0,0xb6,0x0a,0x7e
+
+# GFX950: v_cvt_f32_bf16_e32 v5, src_scc ; encoding: [0xfd,0xb6,0x0a,0x7e]
+0xfd,0xb6,0x0a,0x7e
+
+# GFX950: v_cvt_f32_bf16_e32 v127, 0x8000 ; encoding: [0xff,0xb6,0xfe,0x7e,0x00,0x80,0x00,0x00]
+0xff,0xb6,0xfe,0x7e,0x00,0x80,0x00,0x00
+
+# GFX950: v_cvt_f32_bf16_e64 v5, -v1 ; encoding: [0x05,0x00,0x9b,0xd1,0x01,0x01,0x00,0x20]
+0x05,0x00,0x9b,0xd1,0x01,0x01,0x00,0x20
+
+# GFX950: v_cvt_f32_bf16_e64 v5, |v1| ; encoding: [0x05,0x01,0x9b,0xd1,0x01,0x01,0x00,0x00]
+0x05,0x01,0x9b,0xd1,0x01,0x01,0x00,0x00
+
+# GFX950: v_cvt_f32_bf16_e64 v5, -|v1| ; encoding: [0x05,0x01,0x9b,0xd1,0x01,0x01,0x00,0x20]
+0x05,0x01,0x9b,0xd1,0x01,0x01,0x00,0x20
+
+# GFX950: v_cvt_f32_bf16_e64 v5, v1 clamp mul:2 ; encoding: [0x05,0x80,0x9b,0xd1,0x01,0x01,0x00,0x08]
+0x05,0x80,0x9b,0xd1,0x01,0x01,0x00,0x08
More information about the llvm-branch-commits
mailing list