[llvm-branch-commits] [llvm] [clang] PR for llvm/llvm-project#79277 (PR #79340)
via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Fri Jan 26 19:57:00 PST 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-mc
@llvm/pr-subscribers-clang
Author: None (github-actions[bot])
<details>
<summary>Changes</summary>
resolves llvm/llvm-project#<!-- -->79277
---
Patch is 116.08 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/79340.diff
34 Files Affected:
- (modified) clang/test/CodeGenOpenCL/amdgpu-features.cl (+2-2)
- (modified) clang/test/CodeGenOpenCL/builtins-amdgcn-fp8.cl (+18-17)
- (modified) llvm/lib/Target/AMDGPU/AMDGPU.td (+1)
- (modified) llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp (+29-2)
- (modified) llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp (+26)
- (modified) llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp (+10)
- (modified) llvm/lib/Target/AMDGPU/SIInstrInfo.td (+5-2)
- (modified) llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp (+11)
- (modified) llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h (+3)
- (modified) llvm/lib/Target/AMDGPU/VOP1Instructions.td (+88-5)
- (modified) llvm/lib/Target/AMDGPU/VOP3Instructions.td (+49-4)
- (modified) llvm/lib/Target/AMDGPU/VOPInstructions.td (+20-9)
- (modified) llvm/lib/TargetParser/TargetParser.cpp (+1)
- (added) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.fp8.dpp.ll (+142)
- (added) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.fp8.dpp.mir (+197)
- (modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.fp8.ll (+405-61)
- (modified) llvm/test/MC/AMDGPU/gfx12_asm_vop1.s (+45)
- (modified) llvm/test/MC/AMDGPU/gfx12_asm_vop1_dpp16.s (+12)
- (modified) llvm/test/MC/AMDGPU/gfx12_asm_vop1_dpp8.s (+12)
- (modified) llvm/test/MC/AMDGPU/gfx12_asm_vop3.s (+36)
- (modified) llvm/test/MC/AMDGPU/gfx12_asm_vop3_dpp16.s (+108)
- (modified) llvm/test/MC/AMDGPU/gfx12_asm_vop3_dpp8.s (+48)
- (modified) llvm/test/MC/AMDGPU/gfx12_asm_vop3_from_vop1.s (+138)
- (modified) llvm/test/MC/AMDGPU/gfx12_asm_vop3_from_vop1_dpp16.s (+12)
- (modified) llvm/test/MC/AMDGPU/gfx12_asm_vop3_from_vop1_dpp8.s (+12)
- (modified) llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop1.txt (+36)
- (modified) llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop1_dpp16.txt (+12)
- (modified) llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop1_dpp8.txt (+12)
- (modified) llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3.txt (+36)
- (modified) llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_dpp16.txt (+108)
- (modified) llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_dpp8.txt (+48)
- (modified) llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_from_vop1.txt (+36)
- (modified) llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_from_vop1_dpp16.txt (+12)
- (modified) llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_from_vop1_dpp8.txt (+12)
``````````diff
diff --git a/clang/test/CodeGenOpenCL/amdgpu-features.cl b/clang/test/CodeGenOpenCL/amdgpu-features.cl
index 1ba2b129f6895ae..9c8ca0bb96f6125 100644
--- a/clang/test/CodeGenOpenCL/amdgpu-features.cl
+++ b/clang/test/CodeGenOpenCL/amdgpu-features.cl
@@ -100,8 +100,8 @@
// GFX1103: "target-features"="+16-bit-insts,+atomic-fadd-rtn-insts,+ci-insts,+dl-insts,+dot10-insts,+dot5-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize32"
// GFX1150: "target-features"="+16-bit-insts,+atomic-fadd-rtn-insts,+ci-insts,+dl-insts,+dot10-insts,+dot5-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize32"
// GFX1151: "target-features"="+16-bit-insts,+atomic-fadd-rtn-insts,+ci-insts,+dl-insts,+dot10-insts,+dot5-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize32"
-// GFX1200: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+ci-insts,+dl-insts,+dot10-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize32"
-// GFX1201: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+ci-insts,+dl-insts,+dot10-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize32"
+// GFX1200: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+ci-insts,+dl-insts,+dot10-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+fp8-conversion-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize32"
+// GFX1201: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+ci-insts,+dl-insts,+dot10-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+fp8-conversion-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize32"
// GFX1103-W64: "target-features"="+16-bit-insts,+atomic-fadd-rtn-insts,+ci-insts,+dl-insts,+dot10-insts,+dot5-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize64"
diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-fp8.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-fp8.cl
index 56d757012a5e780..4e3a56b4201bb6d 100644
--- a/clang/test/CodeGenOpenCL/builtins-amdgcn-fp8.cl
+++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-fp8.cl
@@ -1,59 +1,60 @@
// REQUIRES: amdgpu-registered-target
-// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx940 -S -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-GFX940
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx940 -S -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1200 -S -emit-llvm -o - %s | FileCheck %s
typedef float v2f __attribute__((ext_vector_type(2)));
-// CHECK-GFX940-LABEL: @test_cvt_f32_bf8
-// CHECK-GFX940: call float @llvm.amdgcn.cvt.f32.bf8(i32 %a, i32 0)
+// CHECK-LABEL: @test_cvt_f32_bf8
+// CHECK: call float @llvm.amdgcn.cvt.f32.bf8(i32 %a, i32 0)
void test_cvt_f32_bf8(global int* out, int a)
{
*out = __builtin_amdgcn_cvt_f32_bf8(a, 0);
}
-// CHECK-GFX940-LABEL: @test_cvt_f32_fp8
-// CHECK-GFX940: call float @llvm.amdgcn.cvt.f32.fp8(i32 %a, i32 1)
+// CHECK-LABEL: @test_cvt_f32_fp8
+// CHECK: call float @llvm.amdgcn.cvt.f32.fp8(i32 %a, i32 1)
void test_cvt_f32_fp8(global int* out, int a)
{
*out = __builtin_amdgcn_cvt_f32_fp8(a, 1);
}
-// CHECK-GFX940-LABEL: @test_cvt_pk_f32_bf8
-// CHECK-GFX940: call <2 x float> @llvm.amdgcn.cvt.pk.f32.bf8(i32 %a, i1 false)
+// CHECK-LABEL: @test_cvt_pk_f32_bf8
+// CHECK: call <2 x float> @llvm.amdgcn.cvt.pk.f32.bf8(i32 %a, i1 false)
void test_cvt_pk_f32_bf8(global v2f* out, int a)
{
*out = __builtin_amdgcn_cvt_pk_f32_bf8(a, false);
}
-// CHECK-GFX940-LABEL: @test_cvt_pk_f32_fp8
-// CHECK-GFX940: call <2 x float> @llvm.amdgcn.cvt.pk.f32.fp8(i32 %a, i1 true)
+// CHECK-LABEL: @test_cvt_pk_f32_fp8
+// CHECK: call <2 x float> @llvm.amdgcn.cvt.pk.f32.fp8(i32 %a, i1 true)
void test_cvt_pk_f32_fp8(global v2f* out, int a)
{
*out = __builtin_amdgcn_cvt_pk_f32_fp8(a, true);
}
-// CHECK-GFX940-LABEL: @test_cvt_pk_bf8_f32
-// CHECK-GFX940: call i32 @llvm.amdgcn.cvt.pk.bf8.f32(float %a, float %b, i32 %old, i1 false)
+// CHECK-LABEL: @test_cvt_pk_bf8_f32
+// CHECK: call i32 @llvm.amdgcn.cvt.pk.bf8.f32(float %a, float %b, i32 %old, i1 false)
void test_cvt_pk_bf8_f32(global int* out, int old, float a, float b)
{
*out = __builtin_amdgcn_cvt_pk_bf8_f32(a, b, old, false);
}
-// CHECK-GFX940-LABEL: @test_cvt_pk_fp8_f32
-// CHECK-GFX940: call i32 @llvm.amdgcn.cvt.pk.fp8.f32(float %a, float %b, i32 %old, i1 true)
+// CHECK-LABEL: @test_cvt_pk_fp8_f32
+// CHECK: call i32 @llvm.amdgcn.cvt.pk.fp8.f32(float %a, float %b, i32 %old, i1 true)
void test_cvt_pk_fp8_f32(global int* out, int old, float a, float b)
{
*out = __builtin_amdgcn_cvt_pk_fp8_f32(a, b, old, true);
}
-// CHECK-GFX940-LABEL: @test_cvt_sr_bf8_f32
-// CHECK-GFX940: call i32 @llvm.amdgcn.cvt.sr.bf8.f32(float %a, i32 %b, i32 %old, i32 2)
+// CHECK-LABEL: @test_cvt_sr_bf8_f32
+// CHECK: call i32 @llvm.amdgcn.cvt.sr.bf8.f32(float %a, i32 %b, i32 %old, i32 2)
void test_cvt_sr_bf8_f32(global int* out, int old, float a, int b)
{
*out = __builtin_amdgcn_cvt_sr_bf8_f32(a, b, old, 2);
}
-// CHECK-GFX940-LABEL: @test_cvt_sr_fp8_f32
-// CHECK-GFX940: call i32 @llvm.amdgcn.cvt.sr.fp8.f32(float %a, i32 %b, i32 %old, i32 3)
+// CHECK-LABEL: @test_cvt_sr_fp8_f32
+// CHECK: call i32 @llvm.amdgcn.cvt.sr.fp8.f32(float %a, i32 %b, i32 %old, i32 3)
void test_cvt_sr_fp8_f32(global int* out, int old, float a, int b)
{
*out = __builtin_amdgcn_cvt_sr_fp8_f32(a, b, old, 3);
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td
index cb29d5d94759812..250e3e350c02e9b 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.td
@@ -1506,6 +1506,7 @@ def FeatureISAVersion12 : FeatureSet<
FeatureFlatAtomicFaddF32Inst,
FeatureImageInsts,
FeatureExtendedImageInsts,
+ FeatureFP8ConversionInsts,
FeaturePackedTID,
FeatureVcmpxPermlaneHazard,
FeatureSALUFloatInsts,
diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
index 489cf85693edb2c..6b1d04b04066fe7 100644
--- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
+++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
@@ -3500,6 +3500,9 @@ bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) {
return !isInlineConstant(Inst, OpIdx);
} else if (MO.isReg()) {
auto Reg = MO.getReg();
+ if (!Reg) {
+ return false;
+ }
const MCRegisterInfo *TRI = getContext().getRegisterInfo();
auto PReg = mc2PseudoReg(Reg);
return isSGPR(PReg, TRI) && PReg != SGPR_NULL;
@@ -8303,12 +8306,20 @@ void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands,
const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0;
if (Opc == AMDGPU::V_CVT_SR_BF8_F32_vi ||
- Opc == AMDGPU::V_CVT_SR_FP8_F32_vi) {
+ Opc == AMDGPU::V_CVT_SR_FP8_F32_vi ||
+ Opc == AMDGPU::V_CVT_SR_BF8_F32_e64_gfx12 ||
+ Opc == AMDGPU::V_CVT_SR_FP8_F32_e64_gfx12) {
Inst.addOperand(MCOperand::createImm(0)); // Placeholder for src2_mods
Inst.addOperand(Inst.getOperand(0));
}
- if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::vdst_in)) {
+ // Adding vdst_in operand is already covered for these DPP instructions in
+ // cvtVOP3DPP.
+ if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::vdst_in) &&
+ !(Opc == AMDGPU::V_CVT_PK_BF8_F32_e64_dpp_gfx12 ||
+ Opc == AMDGPU::V_CVT_PK_FP8_F32_e64_dpp_gfx12 ||
+ Opc == AMDGPU::V_CVT_PK_BF8_F32_e64_dpp8_gfx12 ||
+ Opc == AMDGPU::V_CVT_PK_FP8_F32_e64_dpp8_gfx12)) {
assert(!IsPacked);
Inst.addOperand(Inst.getOperand(0));
}
@@ -8770,6 +8781,22 @@ void AMDGPUAsmParser::cvtVOP3DPP(MCInst &Inst, const OperandVector &Operands,
}
}
+ int VdstInIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in);
+ if (VdstInIdx == static_cast<int>(Inst.getNumOperands())) {
+ Inst.addOperand(Inst.getOperand(0));
+ }
+
+ bool IsVOP3CvtSrDpp = Opc == AMDGPU::V_CVT_SR_BF8_F32_e64_dpp8_gfx12 ||
+ Opc == AMDGPU::V_CVT_SR_FP8_F32_e64_dpp8_gfx12 ||
+ Opc == AMDGPU::V_CVT_SR_BF8_F32_e64_dpp_gfx12 ||
+ Opc == AMDGPU::V_CVT_SR_FP8_F32_e64_dpp_gfx12;
+ if (IsVOP3CvtSrDpp) {
+ if (Src2ModIdx == static_cast<int>(Inst.getNumOperands())) {
+ Inst.addOperand(MCOperand::createImm(0));
+ Inst.addOperand(MCOperand::createReg(0));
+ }
+ }
+
auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(),
MCOI::TIED_TO);
if (TiedTo != -1) {
diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
index 86096b0d80b4246..2c6aa2ee348a1f2 100644
--- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
+++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
@@ -712,6 +712,13 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
AMDGPU::OpName::src2_modifiers);
}
+ if (Res && (MI.getOpcode() == AMDGPU::V_CVT_SR_BF8_F32_e64_dpp ||
+ MI.getOpcode() == AMDGPU::V_CVT_SR_FP8_F32_e64_dpp)) {
+ // Insert dummy unused src2_modifiers.
+ insertNamedMCOperand(MI, MCOperand::createImm(0),
+ AMDGPU::OpName::src2_modifiers);
+ }
+
if (Res && (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::DS) &&
!AMDGPU::hasGDS(STI)) {
insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::gds);
@@ -942,6 +949,7 @@ void AMDGPUDisassembler::convertMacDPPInst(MCInst &MI) const {
// first add optional MI operands to check FI
DecodeStatus AMDGPUDisassembler::convertDPP8Inst(MCInst &MI) const {
unsigned Opc = MI.getOpcode();
+
if (MCII->get(Opc).TSFlags & SIInstrFlags::VOP3P) {
convertVOP3PDPPInst(MI);
} else if ((MCII->get(Opc).TSFlags & SIInstrFlags::VOPC) ||
@@ -951,6 +959,15 @@ DecodeStatus AMDGPUDisassembler::convertDPP8Inst(MCInst &MI) const {
if (isMacDPP(MI))
convertMacDPPInst(MI);
+ int VDstInIdx =
+ AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vdst_in);
+ if (VDstInIdx != -1)
+ insertNamedMCOperand(MI, MI.getOperand(0), AMDGPU::OpName::vdst_in);
+
+ if (MI.getOpcode() == AMDGPU::V_CVT_SR_BF8_F32_e64_dpp8_gfx12 ||
+ MI.getOpcode() == AMDGPU::V_CVT_SR_FP8_F32_e64_dpp8_gfx12)
+ insertNamedMCOperand(MI, MI.getOperand(0), AMDGPU::OpName::src2);
+
unsigned DescNumOps = MCII->get(Opc).getNumOperands();
if (MI.getNumOperands() < DescNumOps &&
AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel)) {
@@ -977,6 +994,15 @@ DecodeStatus AMDGPUDisassembler::convertVOP3DPPInst(MCInst &MI) const {
if (isMacDPP(MI))
convertMacDPPInst(MI);
+ int VDstInIdx =
+ AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vdst_in);
+ if (VDstInIdx != -1)
+ insertNamedMCOperand(MI, MI.getOperand(0), AMDGPU::OpName::vdst_in);
+
+ if (MI.getOpcode() == AMDGPU::V_CVT_SR_BF8_F32_e64_dpp_gfx12 ||
+ MI.getOpcode() == AMDGPU::V_CVT_SR_FP8_F32_e64_dpp_gfx12)
+ insertNamedMCOperand(MI, MI.getOperand(0), AMDGPU::OpName::src2);
+
unsigned Opc = MI.getOpcode();
unsigned DescNumOps = MCII->get(Opc).getNumOperands();
if (MI.getNumOperands() < DescNumOps &&
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
index e73e53aa270f911..9e64e3fd79576db 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
@@ -1305,6 +1305,16 @@ void AMDGPUInstPrinter::printOpSel(const MCInst *MI, unsigned,
const MCSubtargetInfo &STI,
raw_ostream &O) {
unsigned Opc = MI->getOpcode();
+ if (isCvt_F32_Fp8_Bf8_e64(Opc)) {
+ auto SrcMod =
+ AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers);
+ unsigned Mod = MI->getOperand(SrcMod).getImm();
+ unsigned Index0 = !!(Mod & SISrcMods::OP_SEL_0);
+ unsigned Index1 = !!(Mod & SISrcMods::OP_SEL_1);
+ if (Index0 || Index1)
+ O << " op_sel:[" << Index0 << ',' << Index1 << ']';
+ return;
+ }
if (isPermlane16(Opc)) {
auto FIN = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers);
auto BCN = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1_modifiers);
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
index a6820544f4b4d22..4ab840a4c848523 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
@@ -1684,8 +1684,9 @@ class getIns64 <RegisterOperand Src0RC, RegisterOperand Src1RC,
!if(HasOMod,
(ins Src0Mod:$src0_modifiers, Src0RC:$src0,
clampmod0:$clamp, omod0:$omod),
- (ins Src0Mod:$src0_modifiers, Src0RC:$src0,
- clampmod0:$clamp))
+ !if (HasClamp,
+ (ins Src0Mod:$src0_modifiers, Src0RC:$src0, clampmod0:$clamp),
+ (ins Src0Mod:$src0_modifiers, Src0RC:$src0)))
/* else */,
// VOP1 without modifiers
!if (HasClamp,
@@ -2279,6 +2280,8 @@ class VOPProfile <list<ValueType> _ArgVT, bit _EnableClamp = 0> {
field bit IsSingle = 0;
field bit IsWMMA = 0;
+ field bit IsFP8 = 0;
+
field bit HasDst = !ne(DstVT.Value, untyped.Value);
field bit HasDst32 = HasDst;
field bit EmitDst = HasDst; // force dst encoding, see v_movreld_b32 special case
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
index 0bf9452d822e970..106fdb19f27895b 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
@@ -529,6 +529,17 @@ bool isPermlane16(unsigned Opc) {
Opc == AMDGPU::V_PERMLANEX16_VAR_B32_e64_gfx12;
}
+bool isCvt_F32_Fp8_Bf8_e64(unsigned Opc) {
+ return Opc == AMDGPU::V_CVT_F32_BF8_e64_gfx12 ||
+ Opc == AMDGPU::V_CVT_F32_FP8_e64_gfx12 ||
+ Opc == AMDGPU::V_CVT_F32_BF8_e64_dpp_gfx12 ||
+ Opc == AMDGPU::V_CVT_F32_FP8_e64_dpp_gfx12 ||
+ Opc == AMDGPU::V_CVT_F32_BF8_e64_dpp8_gfx12 ||
+ Opc == AMDGPU::V_CVT_F32_FP8_e64_dpp8_gfx12 ||
+ Opc == AMDGPU::V_CVT_PK_F32_BF8_e64_gfx12 ||
+ Opc == AMDGPU::V_CVT_PK_F32_FP8_e64_gfx12;
+}
+
bool isGenericAtomic(unsigned Opc) {
return Opc == AMDGPU::G_AMDGPU_ATOMIC_FMIN ||
Opc == AMDGPU::G_AMDGPU_ATOMIC_FMAX ||
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
index d3f55c79201747f..11b0bc5c81711e0 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
@@ -535,6 +535,9 @@ bool isPermlane16(unsigned Opc);
LLVM_READNONE
bool isGenericAtomic(unsigned Opc);
+LLVM_READNONE
+bool isCvt_F32_Fp8_Bf8_e64(unsigned Opc);
+
namespace VOPD {
enum Component : unsigned {
diff --git a/llvm/lib/Target/AMDGPU/VOP1Instructions.td b/llvm/lib/Target/AMDGPU/VOP1Instructions.td
index 95a1d86963473a8..ef652fce65482c4 100644
--- a/llvm/lib/Target/AMDGPU/VOP1Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP1Instructions.td
@@ -571,6 +571,7 @@ let SubtargetPredicate = isGFX9Only in {
} // End SubtargetPredicate = isGFX9Only
class VOPProfile_Base_CVT_F32_F8<ValueType vt> : VOPProfileI2F <vt, i32> {
+ let HasExtDPP = 1;
let HasExtSDWA = 1;
let HasExtSDWA9 = 1;
let HasExt = 1;
@@ -599,6 +600,7 @@ class Cvt_F32_F8_Pat<SDPatternOperator node, int index,
(inst_sdwa 0, $src, 0, 0, index)
>;
+let SubtargetPredicate = isGFX9Only in {
let OtherPredicates = [HasCvtFP8VOP1Bug] in {
def : GCNPat<(f32 (int_amdgcn_cvt_f32_fp8 i32:$src, 0)),
(V_CVT_F32_FP8_sdwa 0, $src, 0, 0, 0)>;
@@ -617,6 +619,7 @@ foreach Index = [1, 2, 3] in {
def : Cvt_F32_F8_Pat<int_amdgcn_cvt_f32_fp8, Index, V_CVT_F32_FP8_sdwa>;
def : Cvt_F32_F8_Pat<int_amdgcn_cvt_f32_bf8, Index, V_CVT_F32_BF8_sdwa>;
}
+} // End SubtargetPredicate = isGFX9Only
class Cvt_PK_F32_F8_Pat<SDPatternOperator node, int index,
VOP1_Pseudo inst_e32, VOP1_SDWA_Pseudo inst_sdwa> : GCNPat<
@@ -626,11 +629,77 @@ class Cvt_PK_F32_F8_Pat<SDPatternOperator node, int index,
(inst_e32 $src))
>;
-foreach Index = [0, -1] in {
- def : Cvt_PK_F32_F8_Pat<int_amdgcn_cvt_pk_f32_fp8, Index,
- V_CVT_PK_F32_FP8_e32, V_CVT_PK_F32_FP8_sdwa>;
- def : Cvt_PK_F32_F8_Pat<int_amdgcn_cvt_pk_f32_bf8, Index,
- V_CVT_PK_F32_BF8_e32, V_CVT_PK_F32_BF8_sdwa>;
+let SubtargetPredicate = isGFX9Only in {
+ foreach Index = [0, -1] in {
+ def : Cvt_PK_F32_F8_Pat<int_amdgcn_cvt_pk_f32_fp8, Index,
+ V_CVT_PK_F32_FP8_e32, V_CVT_PK_F32_FP8_sdwa>;
+ def : Cvt_PK_F32_F8_Pat<int_amdgcn_cvt_pk_f32_bf8, Index,
+ V_CVT_PK_F32_BF8_e32, V_CVT_PK_F32_BF8_sdwa>;
+ }
+}
+
+
+// Similar to VOPProfile_Base_CVT_F32_F8, but for VOP3 instructions.
+def VOPProfile_Base_CVT_PK_F32_F8_OpSel : VOPProfileI2F <v2f32, i32> {
+ let HasOpSel = 1;
+ let HasExtVOP3DPP = 0;
+}
+
+def VOPProfile_Base_CVT_F32_F8_OpSel : VOPProfile<[f32, i32, untyped, untyped]> {
+ let HasOpSel = 1;
+ let HasExtDPP = 1;
+ let HasExtVOP3DPP = 1;
+ let IsFP8 = 1;
+ let HasClamp = 0;
+ let HasOMod = 0;
+ let HasModifiers = 1;
+ let Src1VOP3DPP = Src1RC64;
+}
+
+let SubtargetPredicate = isGFX12Plus, mayRaiseFPException = 0,
+ SchedRW = [WriteFloatCvt] in {
+ defm V_CVT_F32_FP8_OP_SEL : VOP1Inst<"v_cvt_f32_fp8_op_sel", VOPProfile_Base_CVT_F32_F8_OpSel>;
+ defm V_CVT_F32_BF8_OP_SEL : VOP1Inst<"v_cvt_f32_bf8_op_sel", VOPProfile_Base_CVT_F32_F8_OpSel>;
+ defm V_CVT_PK_F32_FP8_OP_SEL : VOP1Inst<"v_cvt_pk_f32_fp8_op_sel", VOPProfile_Base_CVT_PK_F32_F8_OpSel>;
+ defm V_CVT_PK_F32_BF8_OP_SEL : VOP1Inst<"v_cvt_pk_f32_bf8_op_sel", VOPProfile_Base_CVT_PK_F32_F8_OpSel>;
+}
+
+class Cvt_F32_F8_Pat_OpSel<SDPatternOperator node, bits<2> index,
+ VOP1_Pseudo inst_e32, VOP3_Pseudo inst_e64> : GCNPat<
+ (f32 (node i32:$src, index)),
+ !if (index,
+ (inst_e64 !if(index{0},
+ !if(index{1}, !or(SRCMODS.OP_SEL_0, SRCMODS.OP_SEL_1),
+ SRCMODS.OP_SEL_0),
+ !if(index{1}, SRCMODS.OP_SEL_1, 0)),
+ $src, 0),
+ (inst_e32 $src))
+>;
+
+let SubtargetPredicate = isGFX12Plus in {
+ foreach Index = [0, 1, 2, 3] in {
+ def : Cvt_F32_F8_Pat_OpSel<int_amdgcn_cvt_f32_fp8, Index,
+ V_CVT_F32_FP8_e32, V_CVT_F32_FP8_OP_SEL_e64>;
+ def : Cvt_F32_F8_Pat_OpSel<int_amdgcn_cvt_f32_bf8, Index,
+ V_CVT_F32_BF8_e32, V_CVT_F32_BF8_OP_SEL_e64>;
+ }
+}
+
+class Cvt_PK_F32_F8_Pat_OpSel<SDPatternOperator node, int index,
+ VOP1_Pseudo inst_e32, VOP3_Pseudo inst_e64> : GCNPat<
+ (v2f32 (node i32:$src, index)),
+ !if (index,
+ (inst_e64 SRCMODS.OP_SEL_0, $src, 0, 0, SRCMODS.NONE),
+ (inst_e32 $src))
+>;
+
+let SubtargetPredicate = isGFX12Plus in {
+ foreach Index = [0, -1] in {
+ def : Cvt_PK_F32_F8_Pat_OpSel<int_amdgcn_cvt_pk_f32_fp8, Index,
+ V_CVT_PK_F32_FP8_e32, V_CVT_PK_F32_FP8_OP_SEL_e64>;
+ def : Cvt_PK_F32_F8_Pat_OpSel<int_amdgcn_cvt_pk_f32_bf8, Index,
+ V_CVT_...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/79340
More information about the llvm-branch-commits
mailing list