[clang] [llvm] [AMDGPU] Add fp8/bf8 conversion instructions for gfx1170 (PR #180191)
via cfe-commits
cfe-commits at lists.llvm.org
Fri Feb 6 05:41:46 PST 2026
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-amdgpu
Author: Mirko BrkuĊĦanin (mbrkusanin)
<details>
<summary>Changes</summary>
---
Patch is 171.15 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/180191.diff
38 Files Affected:
- (modified) clang/test/CodeGenOpenCL/builtins-amdgcn-fp8.cl (+1)
- (modified) llvm/lib/Target/AMDGPU/AMDGPU.td (+7-1)
- (modified) llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp (+15-1)
- (modified) llvm/lib/Target/AMDGPU/VOP1Instructions.td (+17-16)
- (modified) llvm/lib/Target/AMDGPU/VOP3Instructions.td (+4-4)
- (modified) llvm/lib/Target/AMDGPU/VOPInstructions.td (+2-1)
- (modified) llvm/lib/TargetParser/TargetParser.cpp (+2)
- (modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.fp8.dpp.ll (+59-2)
- (modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.fp8.dpp.mir (+80-78)
- (modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.fp8.ll (+235)
- (added) llvm/test/MC/AMDGPU/gfx1150_unsupported.s (+56)
- (added) llvm/test/MC/AMDGPU/gfx1170_asm_vop1-fake16.s (+47)
- (added) llvm/test/MC/AMDGPU/gfx1170_asm_vop1.s (+54)
- (added) llvm/test/MC/AMDGPU/gfx1170_asm_vop1_dpp16-fake16.s (+14)
- (added) llvm/test/MC/AMDGPU/gfx1170_asm_vop1_dpp16.s (+16)
- (added) llvm/test/MC/AMDGPU/gfx1170_asm_vop1_dpp8-fake16.s (+14)
- (added) llvm/test/MC/AMDGPU/gfx1170_asm_vop1_dpp8.s (+16)
- (added) llvm/test/MC/AMDGPU/gfx1170_asm_vop3-fake16.s (+62)
- (added) llvm/test/MC/AMDGPU/gfx1170_asm_vop3.s (+62)
- (added) llvm/test/MC/AMDGPU/gfx1170_asm_vop3_dpp16-fake16.s (+134)
- (added) llvm/test/MC/AMDGPU/gfx1170_asm_vop3_dpp16.s (+134)
- (added) llvm/test/MC/AMDGPU/gfx1170_asm_vop3_dpp8-fake16.s (+74)
- (added) llvm/test/MC/AMDGPU/gfx1170_asm_vop3_dpp8.s (+74)
- (added) llvm/test/MC/AMDGPU/gfx1170_asm_vop3_err.s (+5)
- (added) llvm/test/MC/AMDGPU/gfx1170_asm_vop3_from_vop1-fake16.s (+140)
- (added) llvm/test/MC/AMDGPU/gfx1170_asm_vop3_from_vop1.s (+152)
- (added) llvm/test/MC/AMDGPU/gfx1170_asm_vop3_from_vop1_dpp16-fake16.s (+44)
- (added) llvm/test/MC/AMDGPU/gfx1170_asm_vop3_from_vop1_dpp16.s (+44)
- (added) llvm/test/MC/AMDGPU/gfx1170_asm_vop3_from_vop1_dpp8-fake16.s (+44)
- (added) llvm/test/MC/AMDGPU/gfx1170_asm_vop3_from_vop1_dpp8.s (+44)
- (added) llvm/test/MC/Disassembler/AMDGPU/gfx1170_dasm_vop1_dpp16.txt (+15)
- (added) llvm/test/MC/Disassembler/AMDGPU/gfx1170_dasm_vop1_dpp8.txt (+15)
- (added) llvm/test/MC/Disassembler/AMDGPU/gfx1170_dasm_vop3.txt (+63)
- (added) llvm/test/MC/Disassembler/AMDGPU/gfx1170_dasm_vop3_dpp16.txt (+147)
- (added) llvm/test/MC/Disassembler/AMDGPU/gfx1170_dasm_vop3_dpp8.txt (+77)
- (added) llvm/test/MC/Disassembler/AMDGPU/gfx1170_dasm_vop3_from_vop1.txt (+57)
- (added) llvm/test/MC/Disassembler/AMDGPU/gfx1170_dasm_vop3_from_vop1_dpp16.txt (+45)
- (added) llvm/test/MC/Disassembler/AMDGPU/gfx1170_dasm_vop3_from_vop1_dpp8.txt (+45)
``````````diff
diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-fp8.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-fp8.cl
index cdfe9fcd89091..83a44be930ae0 100644
--- a/clang/test/CodeGenOpenCL/builtins-amdgcn-fp8.cl
+++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-fp8.cl
@@ -1,5 +1,6 @@
// REQUIRES: amdgpu-registered-target
// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx942 -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1170 -emit-llvm -o - %s | FileCheck %s
// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1200 -emit-llvm -o - %s | FileCheck %s
// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1250 -emit-llvm -o - %s | FileCheck %s
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td
index accaeda1cb239..9d723c86031f2 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.td
@@ -1884,7 +1884,8 @@ def FeatureISAVersion11_5_3 : FeatureSet<
def FeatureISAVersion11_7_0 : FeatureSet<
!listconcat(FeatureISAVersion11_Common.Features,
[FeatureSALUFloatInsts,
- FeatureDPPSrc1SGPR])>;
+ FeatureDPPSrc1SGPR,
+ FeatureFP8ConversionInsts])>;
def FeatureISAVersion12 : FeatureSet<
[FeatureGFX12,
@@ -2388,6 +2389,11 @@ def isGFX11Plus :
Predicate<"Subtarget->getGeneration() >= AMDGPUSubtarget::GFX11">,
AssemblerPredicate<(all_of FeatureGFX11Insts)>;
+def isGFX11PlusNot12_50 :
+ Predicate<"Subtarget->getGeneration() >= AMDGPUSubtarget::GFX11 &&"
+ "(Subtarget->getGeneration() >= AMDGPUSubtarget::GFX13 || !Subtarget->hasGFX1250Insts())">,
+ AssemblerPredicate<(all_of FeatureGFX11Insts, (any_of FeatureGFX13Insts, (not FeatureGFX1250Insts)))>;
+
def isGFX12Only :
Predicate<"Subtarget->getGeneration() == AMDGPUSubtarget::GFX12">,
AssemblerPredicate<(all_of FeatureGFX12Insts, (not FeatureGFX13Insts))>;
diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
index d2c707646699b..184485ebf17c6 100644
--- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
+++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
@@ -9533,6 +9533,8 @@ void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands,
Opc == AMDGPU::V_CVT_SCALEF32_PK_FP4_BF16_vi ||
Opc == AMDGPU::V_CVT_SR_BF8_F32_vi ||
Opc == AMDGPU::V_CVT_SR_FP8_F32_vi ||
+ Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_gfx11 ||
+ Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_gfx11 ||
Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_gfx12 ||
Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_gfx12) {
Inst.addOperand(MCOperand::createImm(0)); // Placeholder for src2_mods
@@ -9542,7 +9544,19 @@ void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands,
// Adding vdst_in operand is already covered for these DPP instructions in
// cvtVOP3DPP.
if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::vdst_in) &&
- !(Opc == AMDGPU::V_CVT_PK_BF8_F32_t16_e64_dpp_gfx12 ||
+ !(Opc == AMDGPU::V_CVT_PK_BF8_F32_t16_e64_dpp_gfx11 ||
+ Opc == AMDGPU::V_CVT_PK_FP8_F32_t16_e64_dpp_gfx11 ||
+ Opc == AMDGPU::V_CVT_PK_BF8_F32_t16_e64_dpp8_gfx11 ||
+ Opc == AMDGPU::V_CVT_PK_FP8_F32_t16_e64_dpp8_gfx11 ||
+ Opc == AMDGPU::V_CVT_PK_BF8_F32_fake16_e64_dpp_gfx11 ||
+ Opc == AMDGPU::V_CVT_PK_FP8_F32_fake16_e64_dpp_gfx11 ||
+ Opc == AMDGPU::V_CVT_PK_BF8_F32_fake16_e64_dpp8_gfx11 ||
+ Opc == AMDGPU::V_CVT_PK_FP8_F32_fake16_e64_dpp8_gfx11 ||
+ Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp_gfx11 ||
+ Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp8_gfx11 ||
+ Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp_gfx11 ||
+ Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp8_gfx11 ||
+ Opc == AMDGPU::V_CVT_PK_BF8_F32_t16_e64_dpp_gfx12 ||
Opc == AMDGPU::V_CVT_PK_FP8_F32_t16_e64_dpp_gfx12 ||
Opc == AMDGPU::V_CVT_PK_BF8_F32_t16_e64_dpp8_gfx12 ||
Opc == AMDGPU::V_CVT_PK_FP8_F32_t16_e64_dpp8_gfx12 ||
diff --git a/llvm/lib/Target/AMDGPU/VOP1Instructions.td b/llvm/lib/Target/AMDGPU/VOP1Instructions.td
index 1f053201da226..56e7623496eea 100644
--- a/llvm/lib/Target/AMDGPU/VOP1Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP1Instructions.td
@@ -760,9 +760,9 @@ def V_CVT_F16_F8_True16_Profile : VOP3_Profile_True16<V_CVT_F16_F8_Profile>;
def V_CVT_F16_F8_Fake16_Profile : VOP3_Profile_Fake16<V_CVT_F16_F8_Profile>;
}
-let SubtargetPredicate = isGFX12Plus, OtherPredicates = [HasFP8ConversionInsts],
+let SubtargetPredicate = isGFX11Plus, OtherPredicates = [HasFP8ConversionInsts],
mayRaiseFPException = 0, SchedRW = [WriteFloatCvt] in {
- let SubtargetPredicate = isGFX12PlusNot12_50 in
+ let SubtargetPredicate = isGFX11PlusNot12_50 in
defm V_CVT_F32_FP8_OP_SEL : VOP1Inst<"v_cvt_f32_fp8_op_sel", VOPProfile_Base_CVT_F_F8_ByteSel<f32>>;
let SubtargetPredicate = isGFX125xOnly in
defm V_CVT_F32_FP8_gfx1250 : VOP1Inst<"v_cvt_f32_fp8_gfx1250", VOPProfile_Base_CVT_F_F8_ByteSel<f32, 1>>;
@@ -786,7 +786,7 @@ class Cvt_F_F8_Pat_ByteSel<SDPatternOperator node, VOP3_Pseudo inst, bit HasOpSe
>;
let OtherPredicates = [HasFP8ConversionInsts] in {
- let SubtargetPredicate = isGFX12PlusNot12_50 in
+ let SubtargetPredicate = isGFX11PlusNot12_50 in
def : Cvt_F_F8_Pat_ByteSel<int_amdgcn_cvt_f32_fp8, V_CVT_F32_FP8_OP_SEL_e64>;
let SubtargetPredicate = isGFX125xOnly in {
def : GCNPat<(int_amdgcn_cvt_f32_fp8 i32:$src0, timm:$byte_sel),
@@ -794,7 +794,7 @@ let OtherPredicates = [HasFP8ConversionInsts] in {
def : GCNPat<(int_amdgcn_cvt_f32_fp8_e5m3 i32:$src0, timm:$byte_sel),
(V_CVT_F32_FP8_gfx1250_e64 $src0, DSTCLAMP.ENABLE, (as_i32timm $byte_sel))>;
}
- let SubtargetPredicate = isGFX12Plus in
+ let SubtargetPredicate = isGFX11Plus in
def : Cvt_F_F8_Pat_ByteSel<int_amdgcn_cvt_f32_bf8, V_CVT_F32_BF8_OP_SEL_e64>;
}
@@ -806,7 +806,7 @@ class Cvt_PK_F32_F8_Pat_OpSel<SDPatternOperator node, int index,
(inst_e32 $src))
>;
-let SubtargetPredicate = isGFX12Plus, OtherPredicates = [HasFP8ConversionInsts] in {
+let SubtargetPredicate = isGFX11Plus, OtherPredicates = [HasFP8ConversionInsts] in {
foreach Index = [0, -1] in {
def : Cvt_PK_F32_F8_Pat_OpSel<int_amdgcn_cvt_pk_f32_fp8, Index,
V_CVT_PK_F32_FP8_fake16_e32, V_CVT_PK_F32_FP8_fake16_e64>;
@@ -1140,8 +1140,9 @@ multiclass VOP1Only_Real_gfx11_gfx12_gfx13<bits<9> op> :
multiclass VOP1_Real_FULL_gfx11_gfx12<bits<9> op> :
VOP1_Real_FULL<GFX11Gen, op>, VOP1_Real_FULL<GFX12Gen, op>;
-multiclass VOP1_Real_e32_with_name_gfx12_gfx13<bits<9> op, string opName,
- string asmName> :
+multiclass VOP1_Real_e32_with_name_gfx11_gfx12_gfx13<bits<9> op, string opName,
+ string asmName> :
+ VOP1_Real_e32_with_name<GFX11Gen, op, opName, asmName>,
VOP1_Real_e32_with_name<GFX12Gen, op, opName, asmName>,
VOP1_Real_e32_with_name<GFX13Gen, op, opName, asmName>;
@@ -1179,16 +1180,16 @@ multiclass VOP1_Real_OpSelIsDPP_gfx1250_gfx13<bits<9> op> :
defm V_CVT_F32_FP8 : VOP1_Real_FULL_with_name_gfx11_gfx12_gfx13_not_gfx1250<0x06c, "V_CVT_F32_FP8_OP_SEL", "v_cvt_f32_fp8">;
defm V_CVT_F32_FP8 : VOP1_Real_FULL_with_name<GFX1250Gen, 0x06c, "V_CVT_F32_FP8_gfx1250", "v_cvt_f32_fp8">;
-defm V_CVT_F32_BF8 : VOP1_Real_FULL_with_name_gfx12_gfx13<0x06d, "V_CVT_F32_BF8_OP_SEL", "v_cvt_f32_bf8">;
+defm V_CVT_F32_BF8 : VOP1_Real_FULL_with_name_gfx11_gfx12_gfx13<0x06d, "V_CVT_F32_BF8_OP_SEL", "v_cvt_f32_bf8">;
-defm V_CVT_PK_F32_FP8_fake16 : VOP1_Real_e32_with_name_gfx12_gfx13<0x06e, "V_CVT_PK_F32_FP8_fake16", "v_cvt_pk_f32_fp8">;
-defm V_CVT_PK_F32_FP8_t16 : VOP1_Real_e32_with_name_gfx12_gfx13<0x06e, "V_CVT_PK_F32_FP8_t16", "v_cvt_pk_f32_fp8">;
-defm V_CVT_PK_F32_FP8_fake16 : VOP3_Real_with_name_gfx12_gfx13<0x1ee, "V_CVT_PK_F32_FP8_fake16", "v_cvt_pk_f32_fp8">;
-defm V_CVT_PK_F32_FP8_t16 : VOP3_Real_with_name_gfx12_gfx13<0x1ee, "V_CVT_PK_F32_FP8_t16", "v_cvt_pk_f32_fp8">;
-defm V_CVT_PK_F32_BF8_fake16 : VOP1_Real_e32_with_name_gfx12_gfx13<0x06f, "V_CVT_PK_F32_BF8_fake16", "v_cvt_pk_f32_bf8">;
-defm V_CVT_PK_F32_BF8_t16 : VOP1_Real_e32_with_name_gfx12_gfx13<0x06f, "V_CVT_PK_F32_BF8_t16", "v_cvt_pk_f32_bf8">;
-defm V_CVT_PK_F32_BF8_fake16 : VOP3_Real_with_name_gfx12_gfx13<0x1ef, "V_CVT_PK_F32_BF8_fake16", "v_cvt_pk_f32_bf8">;
-defm V_CVT_PK_F32_BF8_t16 : VOP3_Real_with_name_gfx12_gfx13<0x1ef, "V_CVT_PK_F32_BF8_t16", "v_cvt_pk_f32_bf8">;
+defm V_CVT_PK_F32_FP8_fake16 : VOP1_Real_e32_with_name_gfx11_gfx12_gfx13<0x06e, "V_CVT_PK_F32_FP8_fake16", "v_cvt_pk_f32_fp8">;
+defm V_CVT_PK_F32_FP8_t16 : VOP1_Real_e32_with_name_gfx11_gfx12_gfx13<0x06e, "V_CVT_PK_F32_FP8_t16", "v_cvt_pk_f32_fp8">;
+defm V_CVT_PK_F32_FP8_fake16 : VOP3_Real_with_name_gfx11_gfx12_gfx13<0x1ee, "V_CVT_PK_F32_FP8_fake16", "v_cvt_pk_f32_fp8">;
+defm V_CVT_PK_F32_FP8_t16 : VOP3_Real_with_name_gfx11_gfx12_gfx13<0x1ee, "V_CVT_PK_F32_FP8_t16", "v_cvt_pk_f32_fp8">;
+defm V_CVT_PK_F32_BF8_fake16 : VOP1_Real_e32_with_name_gfx11_gfx12_gfx13<0x06f, "V_CVT_PK_F32_BF8_fake16", "v_cvt_pk_f32_bf8">;
+defm V_CVT_PK_F32_BF8_t16 : VOP1_Real_e32_with_name_gfx11_gfx12_gfx13<0x06f, "V_CVT_PK_F32_BF8_t16", "v_cvt_pk_f32_bf8">;
+defm V_CVT_PK_F32_BF8_fake16 : VOP3_Real_with_name_gfx11_gfx12_gfx13<0x1ef, "V_CVT_PK_F32_BF8_fake16", "v_cvt_pk_f32_bf8">;
+defm V_CVT_PK_F32_BF8_t16 : VOP3_Real_with_name_gfx11_gfx12_gfx13<0x1ef, "V_CVT_PK_F32_BF8_t16", "v_cvt_pk_f32_bf8">;
defm V_CVT_NEAREST_I32_F32 : VOP1_Real_FULL_with_name_gfx11_gfx12_gfx13<0x00c,
"V_CVT_RPI_I32_F32", "v_cvt_nearest_i32_f32">;
diff --git a/llvm/lib/Target/AMDGPU/VOP3Instructions.td b/llvm/lib/Target/AMDGPU/VOP3Instructions.td
index 56127c7e2f48f..60b06edc211b1 100644
--- a/llvm/lib/Target/AMDGPU/VOP3Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP3Instructions.td
@@ -821,13 +821,13 @@ let OtherPredicates = [HasFP8ConversionInsts], mayRaiseFPException = 0,
VOP3_CVT_PK_F8_F32_Profile_t16<>,
VOP3_CVT_PK_F8_F32_Profile_fake16<>>;
- let SubtargetPredicate = isGFX12Plus in {
+ let SubtargetPredicate = isGFX11Plus in {
let OtherPredicates = [HasFP8ConversionInsts, NotHasFP8E5M3Insts] in
defm V_CVT_SR_FP8_F32_gfx12 : VOP3Inst<"v_cvt_sr_fp8_f32_gfx12", VOP3_CVT_SR_F8_ByteSel_Profile<f32>>;
let OtherPredicates = [HasFP8ConversionInsts, HasFP8E5M3Insts] in
defm V_CVT_SR_FP8_F32_gfx1250 : VOP3Inst<"v_cvt_sr_fp8_f32_gfx1250", VOP3_CVT_SR_F8_ByteSel_Profile<f32, true>>;
defm V_CVT_SR_BF8_F32_gfx12 : VOP3Inst<"v_cvt_sr_bf8_f32_gfx12", VOP3_CVT_SR_F8_ByteSel_Profile<f32>>;
- }
+ } // End SubtargetPredicate = isGFX11Plus
}
// These instructions have non-standard use of op_sel. In particular they are
@@ -931,7 +931,7 @@ let SubtargetPredicate = isGFX940Plus in {
}
}
-let SubtargetPredicate = isGFX12Plus in {
+let SubtargetPredicate = isGFX11Plus in {
let OtherPredicates = [HasFP8ConversionInsts, NotHasFP8E5M3Insts] in
def : Cvt_SR_F8_ByteSel_Pat<int_amdgcn_cvt_sr_fp8_f32, V_CVT_SR_FP8_F32_gfx12_e64, f32>;
let OtherPredicates = [HasFP8ConversionInsts, HasFP8E5M3Insts] in {
@@ -939,7 +939,7 @@ let SubtargetPredicate = isGFX12Plus in {
def : Cvt_SR_F8_ByteSel_E5M3_Pat<int_amdgcn_cvt_sr_fp8_f32_e5m3, V_CVT_SR_FP8_F32_gfx1250_e64, f32, DSTCLAMP.ENABLE>;
}
def : Cvt_SR_F8_ByteSel_Pat<int_amdgcn_cvt_sr_bf8_f32, V_CVT_SR_BF8_F32_gfx12_e64, f32>;
-}
+} // End SubtargetPredicate = isGFX11Plus
}
class ThreeOp_i32_Pats <SDPatternOperator op1, SDPatternOperator op2, Instruction inst> : GCNPat <
diff --git a/llvm/lib/Target/AMDGPU/VOPInstructions.td b/llvm/lib/Target/AMDGPU/VOPInstructions.td
index 4694658952c6a..09fdb004c1363 100644
--- a/llvm/lib/Target/AMDGPU/VOPInstructions.td
+++ b/llvm/lib/Target/AMDGPU/VOPInstructions.td
@@ -1873,8 +1873,9 @@ multiclass VOP3_Real_with_name<GFXGen Gen, bits<10> op, string opName,
}
}
-multiclass VOP3_Real_with_name_gfx12_gfx13<
+multiclass VOP3_Real_with_name_gfx11_gfx12_gfx13<
bits<10> op, string opName, string asmName, string pseudo_mnemonic = "", bit isSingle = 0> :
+ VOP3_Real_with_name<GFX11Gen, op, opName, asmName, pseudo_mnemonic, isSingle>,
VOP3_Real_with_name<GFX12Gen, op, opName, asmName, pseudo_mnemonic, isSingle>,
VOP3_Real_with_name<GFX13Gen, op, opName, asmName, pseudo_mnemonic, isSingle>;
diff --git a/llvm/lib/TargetParser/TargetParser.cpp b/llvm/lib/TargetParser/TargetParser.cpp
index 671fc79149d18..d317ca4e1194a 100644
--- a/llvm/lib/TargetParser/TargetParser.cpp
+++ b/llvm/lib/TargetParser/TargetParser.cpp
@@ -519,6 +519,8 @@ static void fillAMDGCNFeatureMap(StringRef GPU, const Triple &T,
break;
case GK_GFX1170:
// TODO-GFX1170: Update features map for gfx1170
+ Features["fp8-conversion-insts"] = true;
+ [[fallthrough]];
case GK_GFX1153:
case GK_GFX1152:
case GK_GFX1151:
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.fp8.dpp.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.fp8.dpp.ll
index 3372868455d65..4280d10fc2b33 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.fp8.dpp.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.fp8.dpp.ll
@@ -1,8 +1,14 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 < %s | FileCheck -check-prefix=GFX12 %s
-; RUN: llc -mtriple=amdgcn -mcpu=gfx1250 < %s | FileCheck -check-prefixes=GFX1250 %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx1170 < %s | FileCheck -check-prefixes=GFX1170PLUS,GFX1170 %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 < %s | FileCheck -check-prefixes=GFX1170PLUS,GFX12 %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx1250 < %s | FileCheck -check-prefixes=GFX1170PLUS,GFX1250 %s
define amdgpu_cs float @test_cvt_f32_bf8_byte0(i32 %a) {
+; GFX1170-LABEL: test_cvt_f32_bf8_byte0:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: v_cvt_f32_bf8_dpp v0, v0 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf bound_ctrl:1
+; GFX1170-NEXT: ; return to shader part epilog
+;
; GFX12-LABEL: test_cvt_f32_bf8_byte0:
; GFX12: ; %bb.0:
; GFX12-NEXT: v_cvt_f32_bf8_dpp v0, v0 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf bound_ctrl:1
@@ -19,6 +25,11 @@ define amdgpu_cs float @test_cvt_f32_bf8_byte0(i32 %a) {
}
define amdgpu_cs float @test_cvt_f32_bf8_byte1(i32 %a) {
+; GFX1170-LABEL: test_cvt_f32_bf8_byte1:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: v_cvt_f32_bf8_e64_dpp v0, v0 byte_sel:1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf bound_ctrl:1
+; GFX1170-NEXT: ; return to shader part epilog
+;
; GFX12-LABEL: test_cvt_f32_bf8_byte1:
; GFX12: ; %bb.0:
; GFX12-NEXT: v_cvt_f32_bf8_e64_dpp v0, v0 byte_sel:1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf bound_ctrl:1
@@ -35,6 +46,11 @@ define amdgpu_cs float @test_cvt_f32_bf8_byte1(i32 %a) {
}
define amdgpu_cs float @test_cvt_f32_bf8_byte2(i32 %a) {
+; GFX1170-LABEL: test_cvt_f32_bf8_byte2:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: v_cvt_f32_bf8_e64_dpp v0, v0 byte_sel:2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf bound_ctrl:1
+; GFX1170-NEXT: ; return to shader part epilog
+;
; GFX12-LABEL: test_cvt_f32_bf8_byte2:
; GFX12: ; %bb.0:
; GFX12-NEXT: v_cvt_f32_bf8_e64_dpp v0, v0 byte_sel:2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf bound_ctrl:1
@@ -51,6 +67,11 @@ define amdgpu_cs float @test_cvt_f32_bf8_byte2(i32 %a) {
}
define amdgpu_cs float @test_cvt_f32_fp8_byte3(i32 %a) {
+; GFX1170-LABEL: test_cvt_f32_fp8_byte3:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: v_cvt_f32_fp8_e64_dpp v0, v0 byte_sel:3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf bound_ctrl:1
+; GFX1170-NEXT: ; return to shader part epilog
+;
; GFX12-LABEL: test_cvt_f32_fp8_byte3:
; GFX12: ; %bb.0:
; GFX12-NEXT: v_cvt_f32_fp8_e64_dpp v0, v0 byte_sel:3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf bound_ctrl:1
@@ -67,6 +88,14 @@ define amdgpu_cs float @test_cvt_f32_fp8_byte3(i32 %a) {
}
define amdgpu_cs void @test_cvt_pk_bf8_f32_word0(i32 %a, float %y, i32 %old, ptr addrspace(1) %out) {
+; GFX1170-LABEL: test_cvt_pk_bf8_f32_word0:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: v_mov_b32_dpp v0, v0 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf bound_ctrl:1
+; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-NEXT: v_cvt_pk_bf8_f32 v2.l, v0, v1
+; GFX1170-NEXT: global_store_b32 v[3:4], v2, off
+; GFX1170-NEXT: s_endpgm
+;
; GFX12-LABEL: test_cvt_pk_bf8_f32_word0:
; GFX12: ; %bb.0:
; GFX12-NEXT: v_cvt_pk_bf8_f32_e64_dpp v2, v0, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf bound_ctrl:1
@@ -88,6 +117,14 @@ define amdgpu_cs void @test_cvt_pk_bf8_f32_word0(i32 %a, float %y, i32 %old, ptr
}
define amdgpu_cs void @test_cvt_pk_fp8_f32_word1(i32 %a, float %y, i32 %old, ptr addrspace(1) %out) {
+; GFX1170-LABEL: test_cvt_pk_fp8_f32_word1:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: v_mov_b32_dpp v0, v0 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf bound_ctrl:1
+; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-NEXT: v_cvt_pk_fp8_f32 v2.h, v0, v1 op_sel:[0,0,1]
+; GFX1170-NEXT: global_store_b32 v[3:4], v2, off
+; GFX1170-NEXT: s_endpgm
+;
; GFX12-LABEL: test_cvt_pk_fp8_f32_word1:
; GFX12: ; %bb.0:
; GFX12-NEXT: v_mov_b32_dpp v0, v0 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf bound_ctrl:1
@@ -113,6 +150,12 @@ define amdgpu_cs void @test_cvt_pk_fp8_f32_word1(i32 %a, float %y, i32 %old, ptr
}
define amdgpu_cs void @test_cvt_sr_bf8_f32_byte0(i32 %a, i32 %r, i32 %old, ptr addrspace(1) %out) {
+; GFX1170-LABEL: test_cvt_sr_bf8_f32_byte0:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: v_cvt_sr_bf8_f32_e64_dpp v2, v0, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf bound_ctrl:1
+; GFX1170-NEXT: global_store_b32 v[3:4], v2, off
+; GFX1170-NEXT: s_endpgm
+;
; GFX12-LABEL: test_cvt_sr_bf8_f32_byte0:
; GFX12: ; %bb.0:
; GFX12-NEXT: v_cvt_sr_bf8_f32_e64_dpp v2, v0, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf bound_ctrl:1
@@ -134,6 +177,12 @@ define amdgpu_cs void @test_cvt_sr_bf8_f32_byte0(i32 %a, i32 %r, i32 %old, ptr a
}
define amdgpu_cs void @test_cvt_sr_fp8_f32_byte1(i32 %a, i32 %r, i32 %old, ptr addrspace(1) %out) {
+; GFX1170-LABEL: test_cvt_sr_fp8_f32_byte1:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: v_cvt_sr_fp8_f32_e64_dpp v2, v0, v1 byte_sel:1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf bound_ctrl:1
+; GFX1170-NEXT: global_store_b32 v[3:4], v2, off
+; GFX1170-NEXT: s_endpgm
+;
; GFX12-LABEL: test_cvt_sr_fp8_f32_byte1:
; GFX12: ; %bb.0:
; GFX12-NEXT: v_cvt_sr_fp8_f32_e64_dpp v2, v0, v1 byte_sel:1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf bound_ctrl:1
@@ -155,6 +204,12 @@ define amdgpu_cs void @test_cvt_sr_fp8_f32_byte1(i32 %a, i32 %r, i32 %old, ptr a
}
define amdgpu_cs void @test_cvt_sr_fp8_f32_byte2(i32 %a, i32 %r, i32 %old, ptr addrspace(1) %out) {
+; GFX1170-LABEL: test_cvt_sr_fp8_f32_byte2:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: v_cvt_sr_fp8_f32_e64_dpp v2, v0, v1 byte_sel:2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf bound_ctrl:1
+; GFX1170-NEXT: global_store_b32 v[3:4], v2, off
+; GFX1170-NEXT: s_endpgm
+;
; GFX12-LABEL: test_cvt_sr_fp8_f32_byte2:
; GFX12: ; %bb.0:
; GFX12-NEXT: v_cvt_sr_fp8_f32_e64_dpp v2, v0, v1 byte_sel:2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf bound_ctrl:1
@@ -187,3 +242,5 @@ declare i32 @llvm.amdgcn.mov.dpp8.i32(i32, i32) #1
attributes #0 = { nounwind convergent }
attributes #1 = { nounwind readnone convergent }
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; GFX1170PLUS: {{.*}}
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.fp8.dpp.mir b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.fp8.dpp.mir
index b1e23808e91a9..ce36dd0aa26f9 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.fp8.dpp.mir
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.fp8.dpp.mir
@@ -1,6 +1,8 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs -run-pass=gcn-dpp-combine %s -o - | FileCheck -check-p...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/180191
More information about the cfe-commits
mailing list