[llvm] [llvm][AArch64][Assembly]: Add FP8FMA assembly and disassembly. (PR #70134)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Oct 24 15:16:32 PDT 2023
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-aarch64
Author: None (hassnaaHamdi)
<details>
<summary>Changes</summary>
This patch adds the feature flag FP8FMA and the assembly/disassembly for the following instructions of NEON and SVE2:
* NEON:
- FMLALBlane
- FMLALTlane
- FMLALLBBlane
- FMLALLBTlane
- FMLALLTBlane
- FMLALLTTlane
- FMLALB
- FMLALT
- FMLALLB
- FMLALLBT
- FMLALLTB
- FMLALLTT
* SVE2:
- FMLALB_ZZZI
- FMLALT_ZZZI
- FMLALB_ZZZ
- FMLALT_ZZZ
- FMLALLBB_ZZZI
- FMLALLBT_ZZZI
- FMLALLTB_ZZZI
- FMLALLTT_ZZZI
- FMLALLBB_ZZZ
- FMLALLBT_ZZZ
- FMLALLTB_ZZZ
- FMLALLTT_ZZZ
That is according to this documentation:
https://developer.arm.com/documentation/ddi0602/2023-09
---
Patch is 49.24 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/70134.diff
21 Files Affected:
- (modified) llvm/include/llvm/TargetParser/AArch64TargetParser.h (+4)
- (modified) llvm/lib/Target/AArch64/AArch64.td (+8-2)
- (modified) llvm/lib/Target/AArch64/AArch64InstrFormats.td (+34)
- (modified) llvm/lib/Target/AArch64/AArch64InstrInfo.td (+22)
- (modified) llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp (+2)
- (modified) llvm/lib/Target/AArch64/AArch64RegisterInfo.td (+16)
- (modified) llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td (+20)
- (modified) llvm/lib/Target/AArch64/AArch64SchedA64FX.td (+1-1)
- (modified) llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp (+14-5)
- (modified) llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp (+11)
- (modified) llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp (+2-1)
- (modified) llvm/lib/Target/AArch64/SVEInstrFormats.td (+90)
- (added) llvm/test/MC/AArch64/FP8/directive-arch-negative.s (+13)
- (added) llvm/test/MC/AArch64/FP8/directive-arch.s (+13)
- (added) llvm/test/MC/AArch64/FP8/mla-diagnostic.s (+85)
- (added) llvm/test/MC/AArch64/FP8/mla.s (+98)
- (added) llvm/test/MC/AArch64/FP8_SVE2/fmlal-diagnostics.s (+64)
- (added) llvm/test/MC/AArch64/FP8_SVE2/fmlal.s (+114)
- (added) llvm/test/MC/AArch64/FP8_SVE2/fmlall-diagnostics.s (+85)
- (added) llvm/test/MC/AArch64/FP8_SVE2/fmlall.s (+222)
- (modified) llvm/unittests/TargetParser/TargetParserTest.cpp (+5-1)
``````````diff
diff --git a/llvm/include/llvm/TargetParser/AArch64TargetParser.h b/llvm/include/llvm/TargetParser/AArch64TargetParser.h
index bf14473f133fab7..9a3185d3ed38e56 100644
--- a/llvm/include/llvm/TargetParser/AArch64TargetParser.h
+++ b/llvm/include/llvm/TargetParser/AArch64TargetParser.h
@@ -160,6 +160,8 @@ enum ArchExtKind : unsigned {
AEK_ITE = 56, // FEAT_ITE
AEK_GCS = 57, // FEAT_GCS
AEK_FPMR = 58, // FEAT_FPMR
+ AEK_FP8FMA = 59, // FEAT_FP8FMA
+ AEK_SSVE_FP8FMA = 60, // FEAT_SSVE_FP8FMA
AEK_NUM_EXTENSIONS
};
using ExtensionBitset = Bitset<AEK_NUM_EXTENSIONS>;
@@ -269,6 +271,8 @@ inline constexpr ExtensionInfo Extensions[] = {
{"wfxt", AArch64::AEK_NONE, {}, {}, FEAT_WFXT, "+wfxt", 550},
{"gcs", AArch64::AEK_GCS, "+gcs", "-gcs", FEAT_INIT, "", 0},
{"fpmr", AArch64::AEK_FPMR, "+fpmr", "-fpmr", FEAT_INIT, "", 0},
+ {"fp8fma", AArch64::AEK_FP8FMA, "+fp8fma", "-fp8fma", FEAT_INIT, "+fpmr", 0},
+ {"ssve-fp8fma", AArch64::AEK_SSVE_FP8FMA, "+ssve-fp8fma", "-ssve-fp8fma", FEAT_INIT, "+sme2", 0},
// Special cases
{"none", AArch64::AEK_NONE, {}, {}, FEAT_INIT, "", ExtensionInfo::MaxFMVPriority},
};
diff --git a/llvm/lib/Target/AArch64/AArch64.td b/llvm/lib/Target/AArch64/AArch64.td
index ced1d4389203653..64f619de4fe9b1a 100644
--- a/llvm/lib/Target/AArch64/AArch64.td
+++ b/llvm/lib/Target/AArch64/AArch64.td
@@ -511,6 +511,12 @@ def FeatureSME2 : SubtargetFeature<"sme2", "HasSME2", "true",
def FeatureSME2p1 : SubtargetFeature<"sme2p1", "HasSME2p1", "true",
"Enable Scalable Matrix Extension 2.1 (FEAT_SME2p1) instructions", [FeatureSME2]>;
+def FeatureFP8FMA : SubtargetFeature<"fp8fma", "HasFP8FMA", "true",
+ "Enable fp8 multiply-add instructions (FEAT_FP8FMA)">;
+
+def FeatureSSVE_FP8FMA : SubtargetFeature<"ssve-fp8fma", "HasSSVE_FP8FMA", "true",
+ "Enable SVE2 fp8 multiply-add instructions (FEAT_SSVE_FP8FMA)", [FeatureSME2]>;
+
def FeatureAppleA7SysReg : SubtargetFeature<"apple-a7-sysreg", "HasAppleA7SysReg", "true",
"Apple A7 (the CPU formerly known as Cyclone)">;
@@ -741,7 +747,7 @@ let F = [HasSVE2p1, HasSVE2p1_or_HasSME2, HasSVE2p1_or_HasSME2p1] in
def SVE2p1Unsupported : AArch64Unsupported;
def SVE2Unsupported : AArch64Unsupported {
- let F = !listconcat([HasSVE2, HasSVE2orSME,
+ let F = !listconcat([HasSVE2, HasSVE2orSME, HasSSVE_FP8FMA,
HasSVE2AES, HasSVE2SHA3, HasSVE2SM4, HasSVE2BitPerm],
SVE2p1Unsupported.F);
}
@@ -755,7 +761,7 @@ let F = [HasSME2p1, HasSVE2p1_or_HasSME2p1] in
def SME2p1Unsupported : AArch64Unsupported;
def SME2Unsupported : AArch64Unsupported {
- let F = !listconcat([HasSME2, HasSVE2p1_or_HasSME2],
+ let F = !listconcat([HasSME2, HasSVE2p1_or_HasSME2, HasSSVE_FP8FMA],
SME2p1Unsupported.F);
}
diff --git a/llvm/lib/Target/AArch64/AArch64InstrFormats.td b/llvm/lib/Target/AArch64/AArch64InstrFormats.td
index e5dbfa404b3c6bf..25f6f04acd475a8 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrFormats.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrFormats.td
@@ -6055,6 +6055,15 @@ multiclass SIMDThreeSameVectorFML<bit U, bit b13, bits<3> size, string asm,
v4f32, v8f16, OpNode>;
}
+multiclass SIMDThreeSameVectorMLA<bit Q, string asm>{
+ def v8f16 : BaseSIMDThreeSameVectorDot<Q, 0b0, 0b11, 0b1111, asm, ".8h", ".16b",
+ V128, v8f16, v16i8, null_frag>;
+}
+
+multiclass SIMDThreeSameVectorMLAL<bit Q, bits<2> sz, string asm>{
+ def v4f32 : BaseSIMDThreeSameVectorDot<Q, 0b0, sz, 0b1000, asm, ".4s", ".16b",
+ V128, v4f32, v16i8, null_frag>;
+}
//----------------------------------------------------------------------------
// AdvSIMD two register vector instructions.
@@ -8468,6 +8477,31 @@ class BF16ToSinglePrecision<string asm>
}
} // End of let mayStore = 0, mayLoad = 0, hasSideEffects = 0
+//----------------------------------------------------------------------------
+class BaseSIMDThreeSameVectorIndexB<bit Q, bit U, bits<2> sz, bits<4> opc,
+ string asm, string dst_kind,
+ RegisterOperand RegType,
+ RegisterOperand RegType_lo>
+ : BaseSIMDIndexedTied<Q, U, 0b0, sz, opc,
+ RegType, RegType, RegType_lo, VectorIndexB,
+ asm, "", dst_kind, ".16b", ".b", []> {
+
+ // idx = H:L:M
+ bits<4> idx;
+ let Inst{11} = idx{3};
+ let Inst{21-19} = idx{2-0};
+}
+
+multiclass SIMDThreeSameVectorMLAIndex<bit Q, string asm> {
+ def v8f16 : BaseSIMDThreeSameVectorIndexB<Q, 0b0, 0b11, 0b0000, asm, ".8h",
+ V128, V128_0to7>;
+}
+
+multiclass SIMDThreeSameVectorMLALIndex<bit Q, bits<2> sz, string asm> {
+ def v4f32 : BaseSIMDThreeSameVectorIndexB<Q, 0b1, sz, 0b1000, asm, ".4s",
+ V128, V128_0to7>;
+}
+
//----------------------------------------------------------------------------
// Armv8.6 Matrix Multiply Extension
//----------------------------------------------------------------------------
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index 382d3956f105f8c..664fea4d73f4631 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -162,6 +162,13 @@ def HasSME2p1 : Predicate<"Subtarget->hasSME2p1()">,
AssemblerPredicateWithAll<(all_of FeatureSME2p1), "sme2p1">;
def HasFPMR : Predicate<"Subtarget->hasFPMR()">,
AssemblerPredicateWithAll<(all_of FeatureFPMR), "fpmr">;
+def HasFP8FMA : Predicate<"Subtarget->hasFP8FMA()">,
+ AssemblerPredicateWithAll<(all_of FeatureFP8FMA), "fp8fma">;
+def HasSSVE_FP8FMA : Predicate<"Subtarget->SSVE_FP8FMA() || "
+ "(Subtarget->hasSVE2() && Subtarget->hasFP8FMA())">,
+ AssemblerPredicateWithAll<(any_of FeatureSSVE_FP8FMA,
+ (all_of FeatureSVE2, FeatureFP8FMA)),
+ "ssve-fp8fma or (sve2 and fp8fma)">;
// A subset of SVE(2) instructions are legal in Streaming SVE execution mode,
// they should be enabled if either has been specified.
@@ -9249,6 +9256,21 @@ let Predicates = [HasD128] in {
}
}
+let Predicates = [HasFP8FMA] in {
+ defm FMLALBlane : SIMDThreeSameVectorMLAIndex<0b0, "fmlalb">;
+ defm FMLALTlane : SIMDThreeSameVectorMLAIndex<0b1, "fmlalt">;
+ defm FMLALLBBlane : SIMDThreeSameVectorMLALIndex<0b0, 0b00, "fmlallbb">;
+ defm FMLALLBTlane : SIMDThreeSameVectorMLALIndex<0b0, 0b01, "fmlallbt">;
+ defm FMLALLTBlane : SIMDThreeSameVectorMLALIndex<0b1, 0b00, "fmlalltb">;
+ defm FMLALLTTlane : SIMDThreeSameVectorMLALIndex<0b1, 0b01, "fmlalltt">;
+
+ defm FMLALB : SIMDThreeSameVectorMLA<0b0, "fmlalb">;
+ defm FMLALT : SIMDThreeSameVectorMLA<0b1, "fmlalt">;
+ defm FMLALLBB : SIMDThreeSameVectorMLAL<0b0, 0b00, "fmlallbb">;
+ defm FMLALLBT : SIMDThreeSameVectorMLAL<0b0, 0b01, "fmlallbt">;
+ defm FMLALLTB : SIMDThreeSameVectorMLAL<0b1, 0b00, "fmlalltb">;
+ defm FMLALLTT : SIMDThreeSameVectorMLAL<0b1, 0b01, "fmlalltt">;
+} // End let Predicates = [HasFP8FMA]
include "AArch64InstrAtomics.td"
include "AArch64SVEInstrInfo.td"
diff --git a/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp b/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp
index cc5384613f1ab38..e6129c764186e93 100644
--- a/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp
@@ -981,6 +981,8 @@ unsigned AArch64RegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,
case AArch64::FPR64_loRegClassID:
case AArch64::FPR16_loRegClassID:
return 16;
+ case AArch64::FPR128_0to7RegClassID:
+ return 8;
}
}
diff --git a/llvm/lib/Target/AArch64/AArch64RegisterInfo.td b/llvm/lib/Target/AArch64/AArch64RegisterInfo.td
index eb26591908fd79c..497c143584d48d4 100644
--- a/llvm/lib/Target/AArch64/AArch64RegisterInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64RegisterInfo.td
@@ -467,6 +467,13 @@ def FPR128_lo : RegisterClass<"AArch64",
v8bf16],
128, (trunc FPR128, 16)>;
+// The lower 8 vector registers. Some instructions can only take registers
+// in this range.
+def FPR128_0to7 : RegisterClass<"AArch64",
+ [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64, v8f16,
+ v8bf16],
+ 128, (trunc FPR128, 8)>;
+
// Pairs, triples, and quads of 64-bit vector registers.
def DSeqPairs : RegisterTuples<[dsub0, dsub1], [(rotl FPR64, 0), (rotl FPR64, 1)]>;
def DSeqTriples : RegisterTuples<[dsub0, dsub1, dsub2],
@@ -534,6 +541,15 @@ def V128_lo : RegisterOperand<FPR128_lo, "printVRegOperand"> {
let ParserMatchClass = VectorRegLoAsmOperand;
}
+def VectorReg0to7AsmOperand : AsmOperandClass {
+ let Name = "VectorReg0to7";
+ let PredicateMethod = "isNeonVectorReg0to7";
+}
+
+def V128_0to7 : RegisterOperand<FPR128_0to7, "printVRegOperand"> {
+ let ParserMatchClass = VectorReg0to7AsmOperand;
+}
+
class TypedVecListAsmOperand<int count, string vecty, int lanes, int eltsize>
: AsmOperandClass {
let Name = "TypedVectorList" # count # "_" # lanes # eltsize;
diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
index d599ac4689e5cb3..4fbbd33a3cdfdb0 100644
--- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -4002,3 +4002,23 @@ defm UZPQ1_ZZZ : sve2p1_permute_vec_elems_q<0b010, "uzpq1">;
defm UZPQ2_ZZZ : sve2p1_permute_vec_elems_q<0b011, "uzpq2">;
defm TBLQ_ZZZ : sve2p1_tblq<"tblq">;
} // End HasSVE2p1_or_HasSME2p1
+
+let Predicates = [HasSSVE_FP8FMA] in {
+// FP8 Widening Multiply-Add Long - Indexed Group
+def FMLALB_ZZZI : sve2_fp8_mla_long_by_indexed_elem<0b0, "fmlalb">;
+def FMLALT_ZZZI : sve2_fp8_mla_long_by_indexed_elem<0b1, "fmlalt">;
+// FP8 Widening Multiply-Add Long Group
+def FMLALB_ZZZ : sve2_fp8_mla_long<0b0, "fmlalb">;
+def FMLALT_ZZZ : sve2_fp8_mla_long<0b1, "fmlalt">;
+
+// FP8 Widening Multiply-Add Long Long - Indexed Group
+def FMLALLBB_ZZZI : sve2_fp8_mla_long_long_by_indexed_elem<0b00, "fmlallbb">;
+def FMLALLBT_ZZZI : sve2_fp8_mla_long_long_by_indexed_elem<0b01, "fmlallbt">;
+def FMLALLTB_ZZZI : sve2_fp8_mla_long_long_by_indexed_elem<0b10, "fmlalltb">;
+def FMLALLTT_ZZZI : sve2_fp8_mla_long_long_by_indexed_elem<0b11, "fmlalltt">;
+// FP8 Widening Multiply-Add Long Long Group
+def FMLALLBB_ZZZ : sve2_fp8_mla_long_long<0b00, "fmlallbb">;
+def FMLALLBT_ZZZ : sve2_fp8_mla_long_long<0b01, "fmlallbt">;
+def FMLALLTB_ZZZ : sve2_fp8_mla_long_long<0b10, "fmlalltb">;
+def FMLALLTT_ZZZ : sve2_fp8_mla_long_long<0b11, "fmlalltt">;
+} // End HasSSVE_FP8FMA
diff --git a/llvm/lib/Target/AArch64/AArch64SchedA64FX.td b/llvm/lib/Target/AArch64/AArch64SchedA64FX.td
index f3e844dc5b270d1..1de0e8291ef12d9 100644
--- a/llvm/lib/Target/AArch64/AArch64SchedA64FX.td
+++ b/llvm/lib/Target/AArch64/AArch64SchedA64FX.td
@@ -23,7 +23,7 @@ def A64FXModel : SchedMachineModel {
list<Predicate> UnsupportedFeatures =
[HasSVE2, HasSVE2AES, HasSVE2SM4, HasSVE2SHA3, HasSVE2BitPerm, HasPAuth,
HasSVE2orSME, HasMTE, HasMatMulInt8, HasBF16, HasSME2, HasSME2p1, HasSVE2p1,
- HasSVE2p1_or_HasSME2p1, HasSMEF16F16];
+ HasSVE2p1_or_HasSME2p1, HasSMEF16F16, HasSSVE_FP8FMA];
let FullInstRWOverlapCheck = 0;
}
diff --git a/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp b/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
index 6e70deec3f89092..62c6ecf196e598f 100644
--- a/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
+++ b/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
@@ -1223,6 +1223,12 @@ class AArch64Operand : public MCParsedAsmOperand {
Reg.RegNum));
}
+ bool isNeonVectorReg0to7() const {
+ return Kind == k_Register && Reg.Kind == RegKind::NeonVector &&
+ (AArch64MCRegisterClasses[AArch64::FPR128_0to7RegClassID].contains(
+ Reg.RegNum));
+ }
+
bool isMatrix() const { return Kind == k_MatrixRegister; }
bool isMatrixTileList() const { return Kind == k_MatrixTileList; }
@@ -1766,6 +1772,11 @@ class AArch64Operand : public MCParsedAsmOperand {
Inst.addOperand(MCOperand::createReg(getReg()));
}
+ void addVectorReg0to7Operands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::createReg(getReg()));
+ }
+
enum VecListIndexType {
VecListIdx_DReg = 0,
VecListIdx_QReg = 1,
@@ -2603,13 +2614,9 @@ static std::optional<std::pair<int, int>> parseVectorKind(StringRef Suffix,
.Case("", {0, 0})
.Case(".1d", {1, 64})
.Case(".1q", {1, 128})
- // '.2h' needed for fp16 scalar pairwise reductions
- .Case(".2h", {2, 16})
+ .Case(".2b", {2, 8})
.Case(".2s", {2, 32})
.Case(".2d", {2, 64})
- // '.4b' is another special case for the ARMv8.2a dot product
- // operand
- .Case(".4b", {4, 8})
.Case(".4h", {4, 16})
.Case(".4s", {4, 32})
.Case(".8b", {8, 8})
@@ -3639,6 +3646,8 @@ static const struct Extension {
{"ssbs", {AArch64::FeatureSSBS}},
{"tme", {AArch64::FeatureTME}},
{"fpmr", {AArch64::FeatureFPMR}},
+ {"fp8fma", {AArch64::FeatureFP8FMA}},
+ {"ssve-fp8fma", {AArch64::FeatureFP8FMA}},
};
static void setRequiredFeatureString(FeatureBitset FBS, std::string &Str) {
diff --git a/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp b/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp
index df817f62f99fbf7..d943b8fe88f49af 100644
--- a/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp
+++ b/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp
@@ -44,6 +44,9 @@ static DecodeStatus DecodeFPR128RegisterClass(MCInst &Inst, unsigned RegNo,
static DecodeStatus DecodeFPR128_loRegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address,
const MCDisassembler *Decoder);
+static DecodeStatus DecodeFPR128_0to7RegisterClass(MCInst &Inst, unsigned RegNo,
+ uint64_t Address,
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeFPR64RegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address,
const MCDisassembler *Decoder);
@@ -437,6 +440,14 @@ DecodeFPR128_loRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Addr,
return DecodeFPR128RegisterClass(Inst, RegNo, Addr, Decoder);
}
+static DecodeStatus
+DecodeFPR128_0to7RegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Addr,
+ const MCDisassembler *Decoder) {
+ if (RegNo > 7)
+ return Fail;
+ return DecodeFPR128RegisterClass(Inst, RegNo, Addr, Decoder);
+}
+
static DecodeStatus DecodeFPR64RegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Addr,
const MCDisassembler *Decoder) {
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp
index 26954c62e03f1fc..6266152bca48f1f 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp
@@ -245,9 +245,10 @@ AArch64RegisterBankInfo::getRegBankFromRegClass(const TargetRegisterClass &RC,
case AArch64::FPR32_with_hsub_in_FPR16_loRegClassID:
case AArch64::FPR32RegClassID:
case AArch64::FPR64RegClassID:
- case AArch64::FPR64_loRegClassID:
case AArch64::FPR128RegClassID:
+ case AArch64::FPR64_loRegClassID:
case AArch64::FPR128_loRegClassID:
+ case AArch64::FPR128_0to7RegClassID:
case AArch64::DDRegClassID:
case AArch64::DDDRegClassID:
case AArch64::DDDDRegClassID:
diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td
index 7bb457d9188210c..284cf6f7ef22ebc 100644
--- a/llvm/lib/Target/AArch64/SVEInstrFormats.td
+++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td
@@ -10078,3 +10078,93 @@ multiclass sve2p1_tblq<string mnemonic> {
def _S : sve2p1_permute_vec_elems_q<0b10, 0b110, mnemonic, ZPR32, Z_s>;
def _D : sve2p1_permute_vec_elems_q<0b11, 0b110, mnemonic, ZPR64, Z_d>;
}
+
+// FP8 Widening Multiply-Add Long - Indexed Group
+class sve2_fp8_mla_long_by_indexed_elem<bit T, string mnemonic>
+ : I<(outs ZPR16:$Zda),
+ (ins ZPR16:$_Zda, ZPR8:$Zn, ZPR3b8:$Zm, VectorIndexB:$imm4),
+ mnemonic, "\t$Zda, $Zn, $Zm$imm4",
+ "", []>, Sched<[]>{
+ bits<5> Zda;
+ bits<5> Zn;
+ bits<3> Zm;
+ bits<4> imm4;
+ let Inst{31-24} = 0b01100100;
+ let Inst{23} = T;
+ let Inst{22-21} = 0b01;
+ let Inst{20-19} = imm4{3-2};
+ let Inst{18-16} = Zm;
+ let Inst{15-12} = 0b0101;
+ let Inst{11-10} = imm4{1-0};
+ let Inst{9-5} = Zn;
+ let Inst{4-0} = Zda;
+ let Constraints = "$Zda = $_Zda";
+ let DestructiveInstType = DestructiveOther;
+ let ElementSize = ZPR16.ElementSize;
+}
+
+// FP8 Widening Multiply-Add Long Group
+class sve2_fp8_mla_long<bit T, string mnemonic>
+ : I<(outs ZPR16:$Zda),
+ (ins ZPR16:$_Zda, ZPR8:$Zn, ZPR8:$Zm),
+ mnemonic, "\t$Zda, $Zn, $Zm",
+ "", []>, Sched<[]>{
+ bits<5> Zda;
+ bits<5> Zn;
+ bits<5> Zm;
+ let Inst{31-21} = 0b01100100101;
+ let Inst{20-16} = Zm;
+ let Inst{15-13} = 0b100;
+ let Inst{12} = T;
+ let Inst{11-10} = 0b10;
+ let Inst{9-5} = Zn;
+ let Inst{4-0} = Zda;
+ let Constraints = "$Zda = $_Zda";
+ let DestructiveInstType = DestructiveOther;
+ let ElementSize = ZPR16.ElementSize;
+}
+
+// FP8 Widening Multiply-Add Long Long - Indexed Group
+class sve2_fp8_mla_long_long_by_indexed_elem<bits<2> TT, string mnemonic>
+ : I<(outs ZPR32:$Zda),
+ (ins ZPR32:$_Zda, ZPR8:$Zn, ZPR3b8:$Zm, VectorIndexB:$imm4),
+ mnemonic, "\t$Zda, $Zn, $Zm$imm4",
+ "", []>, Sched<[]>{
+ bits<5> Zda;
+ bits<5> Zn;
+ bits<3> Zm;
+ bits<4> imm4;
+ let Inst{31-24} = 0b01100100;
+ let Inst{23-22} = TT;
+ let Inst{21} = 0b1;
+ let Inst{20-19} = imm4{3-2};
+ let Inst{18-16} = Zm;
+ let Inst{15-12} = 0b1100;
+ let Inst{11-10} = imm4{1-0};
+ let Inst{9-5} = Zn;
+ let Inst{4-0} = Zda;
+ let Constraints = "$Zda = $_Zda";
+ let DestructiveInstType = DestructiveOther;
+ let ElementSize = ZPR32.ElementSize;
+}
+
+// FP8 Widening Multiply-Add Long Long Group
+class sve2_fp8_mla_long_long<bits<2> TT, string mnemonic>
+ : I<(outs ZPR32:$Zda),
+ (ins ZPR32:$_Zda, ZPR8:$Zn, ZPR8:$Zm),
+ mnemonic, "\t$Zda, $Zn, $Zm",
+ "", []>, Sched<[]>{
+ bits<5> Zda;
+ bits<5> Zn;
+ bits<5> Zm;
+ let Inst{31-21} = 0b01100100001;
+ let Inst{20-16} = Zm;
+ let Inst{15-14} = 0b10;
+ let Inst{13-12} = TT;
+ let Inst{11-10} = 0b10;
+ let Inst{9-5} = Zn;
+ let Inst{4-0} = Zda;
+ let Constraints = "$Zda = $_Zda";
+ let DestructiveInstType = DestructiveOther;
+ let ElementSize = ZPR32.ElementSize;
+}
diff --git a/llvm/test/MC/AArch64/FP8/directive-arch-negative.s b/llvm/test/MC/AArch64/FP8/directive-arch-negative.s
new file mode 100644
index 000000000000000..3a1a5837a9eea3c
--- /dev/null
+++ b/llvm/test/MC/AArch64/FP8/directive-arch-negative.s
@@ -0,0 +1,13 @@
+// RUN: not llvm-mc -triple aarch64 -filetype asm -o - %s 2>&1 | FileCheck %s
+
+.arch armv9-a+fp8fma
+.arch armv9-a+nofp8fma
+fmlalb v0.8h, v0.16b, v0.16b
+// CHECK: error: instruction requires: fp8fma
+// CHECK: fmlalb v0.8h, v0.16b, v0.16b
+
+.arch armv9-a+ssve-fp8fma
+.arch armv9-a+nossve-fp8fma
+fmlalb v0.8h, v0.16b, v0.16b
+// CHECK: error: instruction requires: ssve-fp8fma
+// CHECK: fmlalb v0.8h, v0.16b, v0.16b
\ No newline at end of file
diff --git a/llvm/test/MC/AArch64/FP8/directive-arch.s b/llvm/test/MC/AArch64/FP8/directive-arch.s
new file mode 100644
index 000000000000000..f8bf1c42d6448b2
--- /dev/null
+++ b/llvm/test/MC/A...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/70134
More information about the llvm-commits
mailing list