[llvm] [llvm][AArch64][Assembly]: Add FP8FMA assembly and disassembly. (PR #70237)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Oct 25 10:56:30 PDT 2023
https://github.com/hassnaaHamdi created https://github.com/llvm/llvm-project/pull/70237
This patch adds the feature flag FP8FMA and the assembly/disassembly for the following instructions of NEON and SVE2:
* NEON:
- FDOTlane
- FDOT
* SVE2:
- FDOT_ZZZI_BtoH
- FDOT_ZZZ_BtoH
- FDOT_ZZZI_BtoS
- FDOT_ZZZ_BtoS
>From 92dd4217280833703abc12d7f8c52f697db45629 Mon Sep 17 00:00:00 2001
From: Hassnaa Hamdi <hassnaa.hamdi at arm.com>
Date: Wed, 25 Oct 2023 15:50:49 +0000
Subject: [PATCH] [llvm][AArch64][Assembly]: Add FP8FMA assembly and
disassembly.
This patch adds the feature flag FP8FMA and the assembly/disassembly
for the following instructions of NEON and SVE2:
* NEON:
- FDOTlane
- FDOT
* SVE2:
- FDOT_ZZZI_BtoH
- FDOT_ZZZ_BtoH
- FDOT_ZZZI_BtoS
- FDOT_ZZZ_BtoS
Change-Id: I24b73f91e8df9dcd710c514045980b6f1d16e963
---
.../llvm/TargetParser/AArch64TargetParser.h | 8 ++
.../llvm/TargetParser/SubtargetFeature.h | 2 +-
llvm/lib/Target/AArch64/AArch64.td | 12 ++
.../lib/Target/AArch64/AArch64InstrFormats.td | 77 +++++++++++-
llvm/lib/Target/AArch64/AArch64InstrInfo.td | 23 ++++
.../lib/Target/AArch64/AArch64SVEInstrInfo.td | 17 +++
.../AArch64/AsmParser/AArch64AsmParser.cpp | 5 +
llvm/lib/Target/AArch64/SVEInstrFormats.td | 10 ++
.../MC/AArch64/FP8/directive-arch-negative.s | 13 ++
llvm/test/MC/AArch64/FP8/directive-arch.s | 13 ++
llvm/test/MC/AArch64/FP8/dot-diagnostic.s | 59 +++++++++
llvm/test/MC/AArch64/FP8/dot.s | 63 ++++++++++
.../MC/AArch64/FP8_SVE2/fdot-diagnostics.s | 65 ++++++++++
llvm/test/MC/AArch64/FP8_SVE2/fdot.s | 113 ++++++++++++++++++
.../test/MC/AArch64/SVE2p1/fdot-diagnostics.s | 2 +-
.../TargetParser/TargetParserTest.cpp | 11 +-
16 files changed, 486 insertions(+), 7 deletions(-)
create mode 100644 llvm/test/MC/AArch64/FP8/directive-arch-negative.s
create mode 100644 llvm/test/MC/AArch64/FP8/directive-arch.s
create mode 100644 llvm/test/MC/AArch64/FP8/dot-diagnostic.s
create mode 100644 llvm/test/MC/AArch64/FP8/dot.s
create mode 100644 llvm/test/MC/AArch64/FP8_SVE2/fdot-diagnostics.s
create mode 100644 llvm/test/MC/AArch64/FP8_SVE2/fdot.s
diff --git a/llvm/include/llvm/TargetParser/AArch64TargetParser.h b/llvm/include/llvm/TargetParser/AArch64TargetParser.h
index bf14473f133fab7..6c54f908644de6c 100644
--- a/llvm/include/llvm/TargetParser/AArch64TargetParser.h
+++ b/llvm/include/llvm/TargetParser/AArch64TargetParser.h
@@ -160,6 +160,10 @@ enum ArchExtKind : unsigned {
AEK_ITE = 56, // FEAT_ITE
AEK_GCS = 57, // FEAT_GCS
AEK_FPMR = 58, // FEAT_FPMR
+ AEK_FP8DOT2 = 59, // FEAT_FP8DOT2
+ AEK_SSVE_FP8DOT2 = 60, // FEAT_SSVE_FP8DOT2
+ AEK_FP8DOT4 = 61, // FEAT_FP8DOT4
+ AEK_SSVE_FP8DOT4 = 62, // FEAT_SSVE_FP8DOT4
AEK_NUM_EXTENSIONS
};
using ExtensionBitset = Bitset<AEK_NUM_EXTENSIONS>;
@@ -269,6 +273,10 @@ inline constexpr ExtensionInfo Extensions[] = {
{"wfxt", AArch64::AEK_NONE, {}, {}, FEAT_WFXT, "+wfxt", 550},
{"gcs", AArch64::AEK_GCS, "+gcs", "-gcs", FEAT_INIT, "", 0},
{"fpmr", AArch64::AEK_FPMR, "+fpmr", "-fpmr", FEAT_INIT, "", 0},
+ {"fp8dot2", AArch64::AEK_FP8DOT2, "+fp8dot2", "-fp8dot2", FEAT_INIT, "fpmr", 0},
+ {"ssve-fp8dot2", AArch64::AEK_SSVE_FP8DOT2, "+ssve-fp8dot2", "-ssve-fp8dot2", FEAT_INIT, "+sme2", 0},
+ {"fp8dot4", AArch64::AEK_FP8DOT4, "+fp8dot4", "-fp8dot4", FEAT_INIT, "fpmr", 0},
+ {"ssve-fp8dot4", AArch64::AEK_SSVE_FP8DOT4, "+ssve-fp8dot4", "-ssve-fp8dot4", FEAT_INIT, "+sme2", 0},
// Special cases
{"none", AArch64::AEK_NONE, {}, {}, FEAT_INIT, "", ExtensionInfo::MaxFMVPriority},
};
diff --git a/llvm/include/llvm/TargetParser/SubtargetFeature.h b/llvm/include/llvm/TargetParser/SubtargetFeature.h
index e4dddfb78effbcd..2e1f00dad2df365 100644
--- a/llvm/include/llvm/TargetParser/SubtargetFeature.h
+++ b/llvm/include/llvm/TargetParser/SubtargetFeature.h
@@ -31,7 +31,7 @@ namespace llvm {
class raw_ostream;
class Triple;
-const unsigned MAX_SUBTARGET_WORDS = 4;
+const unsigned MAX_SUBTARGET_WORDS = 5;
const unsigned MAX_SUBTARGET_FEATURES = MAX_SUBTARGET_WORDS * 64;
/// Container class for subtarget features.
diff --git a/llvm/lib/Target/AArch64/AArch64.td b/llvm/lib/Target/AArch64/AArch64.td
index ced1d4389203653..af10f3329c575a5 100644
--- a/llvm/lib/Target/AArch64/AArch64.td
+++ b/llvm/lib/Target/AArch64/AArch64.td
@@ -511,6 +511,18 @@ def FeatureSME2 : SubtargetFeature<"sme2", "HasSME2", "true",
def FeatureSME2p1 : SubtargetFeature<"sme2p1", "HasSME2p1", "true",
"Enable Scalable Matrix Extension 2.1 (FEAT_SME2p1) instructions", [FeatureSME2]>;
+def FeatureFP8DOT2: SubtargetFeature<"fp8dot2", "HasFP8DOT2", "true",
+ "Enable fp8 2-way dot instructions (FEAT_FP8DOT2)">;
+
+def FeatureSSVE_FP8DOT2 : SubtargetFeature<"ssve-fp8dot2", "HasSSVE_FP8DOT2", "true",
+ "Enable SVE2 fp8 2-way dot product instructions (FEAT_SSVE_FP8DOT2)", [FeatureSME2]>;
+
+def FeatureFP8DOT4: SubtargetFeature<"fp8dot4", "HasFP8DOT4", "true",
+ "Enable fp8 4-way dot instructions (FEAT_FP8DOT4)">;
+
+def FeatureSSVE_FP8DOT4 : SubtargetFeature<"ssve-fp8dot4", "HasSSVE_FP8DOT4", "true",
+ "Enable SVE2 fp8 4-way dot product instructions (FEAT_SSVE_FP8DOT4)", [FeatureSME2]>;
+
def FeatureAppleA7SysReg : SubtargetFeature<"apple-a7-sysreg", "HasAppleA7SysReg", "true",
"Apple A7 (the CPU formerly known as Cyclone)">;
diff --git a/llvm/lib/Target/AArch64/AArch64InstrFormats.td b/llvm/lib/Target/AArch64/AArch64InstrFormats.td
index e5dbfa404b3c6bf..8a7e358cbf59e9b 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrFormats.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrFormats.td
@@ -6056,6 +6056,57 @@ multiclass SIMDThreeSameVectorFML<bit U, bit b13, bits<3> size, string asm,
}
+// FP8 assembly/disassembly classes
+
+//----------------------------------------------------------------------------
+// FP8 Advanced SIMD three-register extension
+//----------------------------------------------------------------------------
+class BaseSIMDThreeVectors<bit Q, bit U, bits<2> size, bits<4> op,
+ RegisterOperand regtype1,
+ RegisterOperand regtype2, string asm,
+ string kind1, string kind2>
+ : I<(outs regtype1:$Rd), (ins regtype2:$Rn, regtype2:$Rm), asm,
+ "\t$Rd" # kind1 # ", $Rn" # kind2 # ", $Rm" # kind2, "", []>, Sched<[]> {
+ bits<5> Rd;
+ bits<5> Rn;
+ bits<5> Rm;
+ let Inst{31} = 0;
+ let Inst{30} = Q;
+ let Inst{29} = U;
+ let Inst{28-24} = 0b01110;
+ let Inst{23-22} = size;
+ let Inst{21} = 0b0;
+ let Inst{20-16} = Rm;
+ let Inst{15} = 0b1;
+ let Inst{14-11} = op;
+ let Inst{10} = 0b1;
+ let Inst{9-5} = Rn;
+ let Inst{4-0} = Rd;
+}
+
+// TODO : Create v16f8 value type
+// FCVTN, FCVTN2 (FP32 to FP8)
+multiclass SIMDThreeVectorCvt<string asm> {
+ def v8f8 : BaseSIMDThreeVectors<0b0, 0b0, 0b00, 0b1110, V64, V128, asm, ".8b", ".4s">;
+ def 2v16f8 : BaseSIMDThreeSameVectorDot<0b1, 0b0, 0b00, 0b1110, asm#2, ".16b", ".4s",
+ V128, v16i8, v4f32, null_frag>;
+}
+
+// TODO: Create a new Value Type v8f8 and v16f8
+multiclass SIMDThreeSameVectorDOT2<string asm> {
+ def v4f16 : BaseSIMDThreeSameVectorDot<0b0, 0b0, 0b01, 0b1111, asm, ".4h", ".8b",
+ V64, v4f16, v8i8, null_frag>;
+ def v8f16 : BaseSIMDThreeSameVectorDot<0b1, 0b0, 0b01, 0b1111, asm, ".8h", ".16b",
+ V128, v8f16, v16i8, null_frag>;
+}
+
+multiclass SIMDThreeSameVectorDOT4<string asm> {
+ def v2f32 : BaseSIMDThreeSameVectorDot<0b0, 0b0, 0b00, 0b1111, asm, ".2s", ".8b",
+ V64, v2f32, v8i8, null_frag>;
+ def v4f32 : BaseSIMDThreeSameVectorDot<0b1, 0b0, 0b00, 0b1111, asm, ".4s", ".16b",
+ V128, v4f32, v16i8, null_frag>;
+}
+
//----------------------------------------------------------------------------
// AdvSIMD two register vector instructions.
//----------------------------------------------------------------------------
@@ -8483,10 +8534,10 @@ class SIMDThreeSameVectorMatMul<bit B, bit U, string asm, SDPatternOperator OpNo
//----------------------------------------------------------------------------
// ARMv8.2-A Dot Product Instructions (Indexed)
class BaseSIMDThreeSameVectorIndexS<bit Q, bit U, bits<2> size, bits<4> opc, string asm,
- string dst_kind, string lhs_kind, string rhs_kind,
- RegisterOperand RegType,
- ValueType AccumType, ValueType InputType,
- SDPatternOperator OpNode> :
+ string dst_kind, string lhs_kind, string rhs_kind,
+ RegisterOperand RegType,
+ ValueType AccumType, ValueType InputType,
+ SDPatternOperator OpNode> :
BaseSIMDIndexedTied<Q, U, 0b0, size, opc, RegType, RegType, V128,
VectorIndexS, asm, "", dst_kind, lhs_kind, rhs_kind,
[(set (AccumType RegType:$dst),
@@ -8508,6 +8559,14 @@ multiclass SIMDThreeSameVectorDotIndex<bit U, bit Mixed, bits<2> size, string as
V128, v4i32, v16i8, OpNode>;
}
+// TODO: The vectors v8i8 and v16i8 should be v8f8 and v16f8
+multiclass SIMDThreeSameVectorFP8DOT4Index<string asm> {
+ def v8f8 : BaseSIMDThreeSameVectorIndexS<0b0, 0b0, 0b00, 0b0000, asm, ".2s", ".8b", ".4b",
+ V64, v2f32, v8i8, null_frag>;
+ def v16f8 : BaseSIMDThreeSameVectorIndexS<0b1, 0b0, 0b00, 0b0000, asm, ".4s", ".16b",".4b",
+ V128, v4f32, v16i8, null_frag>;
+}
+
// ARMv8.2-A Fused Multiply Add-Long Instructions (Indexed)
let mayRaiseFPException = 1, Uses = [FPCR] in
class BaseSIMDThreeSameVectorIndexH<bit Q, bit U, bits<2> sz, bits<4> opc, string asm,
@@ -8537,6 +8596,16 @@ multiclass SIMDThreeSameVectorFMLIndex<bit U, bits<4> opc, string asm,
V128, V128_lo, v4f32, v8f16, OpNode>;
}
+//----------------------------------------------------------------------------
+// FP8 Advanced SIMD vector x indexed element
+// TODO: Replace value types v8i8 and v16i8 by v8f8 and v16f8
+multiclass SIMDThreeSameVectorFP8DOT2Index<string asm> {
+ def v4f16 : BaseSIMDThreeSameVectorIndexH<0b0, 0b0, 0b01, 0b0000, asm, ".4h", ".8b", ".2b",
+ V64, V128_lo, v4f16, v8i8, null_frag>;
+ def v8f16 : BaseSIMDThreeSameVectorIndexH<0b1, 0b0, 0b01, 0b0000, asm, ".8h", ".16b", ".2b",
+ V128, V128_lo, v8f16, v8i16, null_frag>;
+}
+
multiclass SIMDFPIndexed<bit U, bits<4> opc, string asm,
SDPatternOperator OpNode> {
let mayRaiseFPException = 1, Uses = [FPCR] in {
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index 382d3956f105f8c..6f3acbdf536a0b1 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -162,6 +162,20 @@ def HasSME2p1 : Predicate<"Subtarget->hasSME2p1()">,
AssemblerPredicateWithAll<(all_of FeatureSME2p1), "sme2p1">;
def HasFPMR : Predicate<"Subtarget->hasFPMR()">,
AssemblerPredicateWithAll<(all_of FeatureFPMR), "fpmr">;
+def HasFP8DOT2 : Predicate<"Subtarget->hasFP8DOT2()">,
+ AssemblerPredicateWithAll<(all_of FeatureFP8DOT2), "fp8dot2">;
+def HasSSVE_FP8DOT2 : Predicate<"Subtarget->hasSSVE_FP8DOT2() || "
+ "(Subtarget->hasSVE2() && Subtarget->hasFP8DOT2())">,
+ AssemblerPredicateWithAll<(any_of FeatureSSVE_FP8DOT2,
+ (all_of FeatureSVE2, FeatureFP8DOT2)),
+ "ssve-fp8dot2 or (sve2 and fp8dot2)">;
+def HasFP8DOT4 : Predicate<"Subtarget->hasFP8DOT4()">,
+ AssemblerPredicateWithAll<(all_of FeatureFP8DOT4), "fp8dot4">;
+def HasSSVE_FP8DOT4 : Predicate<"Subtarget->hasSSVE_FP8DOT4() || "
+ "(Subtarget->hasSVE2() && Subtarget->hasFP8DOT4())">,
+ AssemblerPredicateWithAll<(any_of FeatureSSVE_FP8DOT4,
+ (all_of FeatureSVE2, FeatureFP8DOT4)),
+ "ssve-fp8dot4 or (sve2 and fp8dot4)">;
// A subset of SVE(2) instructions are legal in Streaming SVE execution mode,
// they should be enabled if either has been specified.
@@ -9249,6 +9263,15 @@ let Predicates = [HasD128] in {
}
}
+let Predicates = [HasFP8DOT2] in {
+ defm FDOTlane : SIMDThreeSameVectorFP8DOT2Index<"fdot">;
+ defm FDOT : SIMDThreeSameVectorDOT2<"fdot">;
+} // End let Predicates = [HasFP8DOT2]
+
+let Predicates = [HasFP8DOT4] in {
+ defm FDOTlane : SIMDThreeSameVectorFP8DOT4Index<"fdot">;
+ defm FDOT : SIMDThreeSameVectorDOT4<"fdot">;
+} // End let Predicates = [HasFP8DOT4]
include "AArch64InstrAtomics.td"
include "AArch64SVEInstrInfo.td"
diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
index d599ac4689e5cb3..2178f93b8142014 100644
--- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -4002,3 +4002,20 @@ defm UZPQ1_ZZZ : sve2p1_permute_vec_elems_q<0b010, "uzpq1">;
defm UZPQ2_ZZZ : sve2p1_permute_vec_elems_q<0b011, "uzpq2">;
defm TBLQ_ZZZ : sve2p1_tblq<"tblq">;
} // End HasSVE2p1_or_HasSME2p1
+
+let Predicates = [HasSSVE_FP8DOT2] in {
+// FP8 Widening Dot-Product - Indexed Group
+defm FDOT_ZZZI_BtoH : sve2_fp8_dot_indexed<"fdot">;
+// FP8 Widening Dot-Product - Group
+// TODO: Replace nxv16i8 by nxv16f8
+defm FDOT_ZZZ_BtoH : sve_float_dot<0b0, 0b1, ZPR16, ZPR8, "fdot", nxv16i8, null_frag>;
+}
+
+// TODO: Replace nxv16i8 by nxv16f8
+let Predicates = [HasSSVE_FP8DOT4] in {
+// FP8 Widening Dot-Product - Indexed Group
+defm FDOT_ZZZI_BtoS : sve_float_dot_indexed<0b1, 0b01, ZPR8, ZPR3b8, "fdot",
+ nxv16i8, null_frag>;
+// FP8 Widening Dot-Product - Group
+defm FDOT_ZZZ_BtoS : sve_float_dot<0b1, 0b1, ZPR32, ZPR8, "fdot", nxv16i8, null_frag>;
+}
diff --git a/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp b/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
index 6e70deec3f89092..c6bc6f8dc7d4925 100644
--- a/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
+++ b/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
@@ -2603,6 +2603,7 @@ static std::optional<std::pair<int, int>> parseVectorKind(StringRef Suffix,
.Case("", {0, 0})
.Case(".1d", {1, 64})
.Case(".1q", {1, 128})
+ .Case(".2b", {2, 8})
// '.2h' needed for fp16 scalar pairwise reductions
.Case(".2h", {2, 16})
.Case(".2s", {2, 32})
@@ -3639,6 +3640,10 @@ static const struct Extension {
{"ssbs", {AArch64::FeatureSSBS}},
{"tme", {AArch64::FeatureTME}},
{"fpmr", {AArch64::FeatureFPMR}},
+ {"fp8dot2", {AArch64::FeatureFP8DOT2}},
+ {"ssve-fp8dot2", {AArch64::FeatureSSVE_FP8DOT2}},
+ {"fp8dot4", {AArch64::FeatureFP8DOT4}},
+ {"ssve-fp8dot4", {AArch64::FeatureSSVE_FP8DOT4}},
};
static void setRequiredFeatureString(FeatureBitset FBS, std::string &Str) {
diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td
index 7bb457d9188210c..2f9d5333179ee8e 100644
--- a/llvm/lib/Target/AArch64/SVEInstrFormats.td
+++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td
@@ -10078,3 +10078,13 @@ multiclass sve2p1_tblq<string mnemonic> {
def _S : sve2p1_permute_vec_elems_q<0b10, 0b110, mnemonic, ZPR32, Z_s>;
def _D : sve2p1_permute_vec_elems_q<0b11, 0b110, mnemonic, ZPR64, Z_d>;
}
+
+// FP8 Widening Dot-Product - Indexed Group
+multiclass sve2_fp8_dot_indexed<string mnemonic>{
+ def NAME : sve_float_dot_indexed<0b0, ZPR16, ZPR8, ZPR3b8, VectorIndexH, mnemonic> {
+ bits<3> iop;
+ let Inst{20-19} = iop{2-1};
+ let Inst{11} = iop{0};
+ let Inst{10} = 0b1;
+ }
+}
diff --git a/llvm/test/MC/AArch64/FP8/directive-arch-negative.s b/llvm/test/MC/AArch64/FP8/directive-arch-negative.s
new file mode 100644
index 000000000000000..0ae6f7744d74bfd
--- /dev/null
+++ b/llvm/test/MC/AArch64/FP8/directive-arch-negative.s
@@ -0,0 +1,13 @@
+// RUN: not llvm-mc -triple aarch64 -filetype asm -o - %s 2>&1 | FileCheck %s
+
+.arch armv9-a+fp8dot2
+.arch armv9-a+nofp8dot2
+fdot v31.4h, v0.8b, v0.8b
+// CHECK: error: instruction requires: fp8dot2
+// CHECK: fdot v31.4h, v0.8b, v0.8b
+
+.arch armv9-a+fp8dot4
+.arch armv9-a+nofp8dot4
+fdot v0.2s, v0.8b, v31.8b
+// CHECK: error: instruction requires: fp8dot4
+// CHECK: fdot v0.2s, v0.8b, v31.8b
diff --git a/llvm/test/MC/AArch64/FP8/directive-arch.s b/llvm/test/MC/AArch64/FP8/directive-arch.s
new file mode 100644
index 000000000000000..8f47cef83a9a38e
--- /dev/null
+++ b/llvm/test/MC/AArch64/FP8/directive-arch.s
@@ -0,0 +1,13 @@
+// RUN: llvm-mc -triple aarch64 -o - %s 2>&1 | FileCheck %s
+
+.arch armv9-a+fp8dot2
+fdot v31.4h, v0.8b, v0.8b
+// CHECK: fdot v31.4h, v0.8b, v0.8b
+
+.arch armv9-a+nofp8dot2
+
+.arch armv9-a+fp8dot4
+fdot v0.2s, v0.8b, v31.8b
+// CHECK: fdot v0.2s, v0.8b, v31.8b
+
+.arch armv9-a+nofp8dot4
diff --git a/llvm/test/MC/AArch64/FP8/dot-diagnostic.s b/llvm/test/MC/AArch64/FP8/dot-diagnostic.s
new file mode 100644
index 000000000000000..a73310280cec638
--- /dev/null
+++ b/llvm/test/MC/AArch64/FP8/dot-diagnostic.s
@@ -0,0 +1,59 @@
+// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+fp8dot2,+fp8dot4 2>&1 < %s| FileCheck %s
+
+// --------------------------------------------------------------------------//
+// Element size extension incorrect
+
+fdot v31.4h, v0.8h, v0.8b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: fdot v31.4h, v0.8h, v0.8b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+fdot v31.8h, v0.16b, v31.16h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid vector kind qualifier
+// CHECK-NEXT: fdot v31.8h, v0.16b, v31.16h
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+fdot v0.2s, v0.8s, v31.8b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid vector kind qualifier
+// CHECK-NEXT: fdot v0.2s, v0.8s, v31.8b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+fdot v31.4s, v0, v31.16b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: fdot v31.4s, v0, v31.16b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+//--------------------------------------------------------------------------//
+// Last Register range is between 0-15
+
+fdot v31.4h, v31.8b, v16.2b[0]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: fdot v31.4h, v31.8b, v16.2b[0]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+fdot v0.8h, v0.16b, v16.2b[7]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: fdot v0.8h, v0.16b, v16.2b[7]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+// --------------------------------------------------------------------------//
+// Out of range index
+fdot v31.4h, v31.8b, v15.2b[-1]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: vector lane must be an integer in range [0, 7].
+// CHECK-NEXT: fdot v31.4h, v31.8b, v15.2b[-1]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+fdot v0.8h, v0.16b, v15.2b[8]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: vector lane must be an integer in range [0, 7].
+// CHECK-NEXT: fdot v0.8h, v0.16b, v15.2b[8]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+fdot v0.2s, v0.8b, v31.4b[-1]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: vector lane must be an integer in range [0, 3].
+// CHECK-NEXT: fdot v0.2s, v0.8b, v31.4b[-1]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+fdot v0.4s, v31.16b, v0.4b[4]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: vector lane must be an integer in range [0, 3].
+// CHECK-NEXT: fdot v0.4s, v31.16b, v0.4b[4]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/llvm/test/MC/AArch64/FP8/dot.s b/llvm/test/MC/AArch64/FP8/dot.s
new file mode 100644
index 000000000000000..e755430745c34e7
--- /dev/null
+++ b/llvm/test/MC/AArch64/FP8/dot.s
@@ -0,0 +1,63 @@
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+fp8dot2,+fp8dot4 < %s \
+// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN: | FileCheck %s --check-prefix=CHECK-ERROR
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+fp8dot2,+fp8dot4 < %s \
+// RUN: | llvm-objdump -d --mattr=+fp8dot2,+fp8dot4 - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+fp8dot2,+fp8dot4 < %s \
+// RUN: | llvm-objdump -d --mattr=-sme2 - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+// Disassemble encoding and check the re-encoding (-show-encoding) matches.
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+fp8dot2,+fp8dot4 < %s \
+// RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \
+// RUN: | llvm-mc -triple=aarch64 -mattr=+fp8dot2,+fp8dot4 -disassemble -show-encoding \
+// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+
+/// VECTOR
+fdot v31.4h, v0.8b, v0.8b
+// CHECK-INST: fdot v31.4h, v0.8b, v0.8b
+// CHECK-ENCODING: [0x1f,0xfc,0x40,0x0e]
+// CHECK-ERROR: instruction requires: fp8dot2
+// CHECK-UNKNOWN: 0e40fc1f <unknown>
+
+fdot v31.8h, v0.16b, v31.16b
+// CHECK-INST: fdot v31.8h, v0.16b, v31.16b
+// CHECK-ENCODING: [0x1f,0xfc,0x5f,0x4e]
+// CHECK-ERROR: instruction requires: fp8dot2
+// CHECK-UNKNOWN: 4e5ffc1f <unknown>
+
+fdot v0.2s, v0.8b, v31.8b
+// CHECK-INST: fdot v0.2s, v0.8b, v31.8b
+// CHECK-ENCODING: [0x00,0xfc,0x1f,0x0e]
+// CHECK-ERROR: instruction requires: fp8dot4
+// CHECK-UNKNOWN: 0e1ffc00 <unknown>
+
+fdot v31.4s, v0.16b, v31.16b
+// CHECK-INST: fdot v31.4s, v0.16b, v31.16b
+// CHECK-ENCODING: [0x1f,0xfc,0x1f,0x4e]
+// CHECK-ERROR: instruction requires: fp8dot4
+// CHECK-UNKNOWN: 4e1ffc1f <unknown>
+
+//INDEXED
+fdot v31.4h, v31.8b, v15.2b[0]
+// CHECK-INST: fdot v31.4h, v31.8b, v15.2b[0]
+// CHECK-ENCODING: [0xff,0x03,0x4f,0x0f]
+// CHECK-ERROR: instruction requires: fp8dot2
+// CHECK-UNKNOWN: 0f4f03ff <unknown>
+
+fdot v0.8h, v0.16b, v15.2b[7]
+// CHECK-INST: fdot v0.8h, v0.16b, v15.2b[7]
+// CHECK-ENCODING: [0x00,0x08,0x7f,0x4f]
+// CHECK-ERROR: instruction requires: fp8dot2
+// CHECK-UNKNOWN: 4f7f0800 <unknown>
+
+fdot v0.2s, v0.8b, v31.4b[0]
+// CHECK-INST: fdot v0.2s, v0.8b, v31.4b[0]
+// CHECK-ENCODING: [0x00,0x00,0x1f,0x0f]
+// CHECK-ERROR: instruction requires: fp8dot4
+// CHECK-UNKNOWN: 0f1f0000 <unknown>
+
+fdot v0.4s, v31.16b, v0.4b[3]
+// CHECK-INST: fdot v0.4s, v31.16b, v0.4b[3]
+// CHECK-ENCODING: [0xe0,0x0b,0x20,0x4f]
+// CHECK-ERROR: instruction requires: fp8dot4
+// CHECK-UNKNOWN: 4f200be0 <unknown>
diff --git a/llvm/test/MC/AArch64/FP8_SVE2/fdot-diagnostics.s b/llvm/test/MC/AArch64/FP8_SVE2/fdot-diagnostics.s
new file mode 100644
index 000000000000000..b80e527ec295347
--- /dev/null
+++ b/llvm/test/MC/AArch64/FP8_SVE2/fdot-diagnostics.s
@@ -0,0 +1,65 @@
+// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+ssve-fp8dot2,+ssve-fp8dot4 \
+// RUN: 2>&1 < %s | FileCheck %s
+
+// FDOT2
+// --------------------------------------------------------------------------//
+
+// z register out of range for index
+
+fdot z0.h, z0.b, z8.b[0]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: fdot z0.h, z0.b, z8.b[0]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// Invalid vector lane index
+
+fdot z0.h, z0.b, z0.b[-1]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: vector lane must be an integer in range [0, 7].
+// CHECK-NEXT: fdot z0.h, z0.b, z0.b[-1]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+fdot z0.h, z0.b, z0.b[8]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: vector lane must be an integer in range [0, 7].
+// CHECK-NEXT: fdot z0.h, z0.b, z0.b[8]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+// Invalid vector suffix
+
+fdot z0.d, z0.b, z0.b[0]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: fdot z0.d, z0.b, z0.b[0]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+fdot z0.h, z0.h, z0.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: fdot z0.h, z0.h, z0.h
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+
+// FDOT4
+// --------------------------------------------------------------------------//
+// Invalid vector lane index
+
+fdot z0.s, z0.b, z0.b[-1]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: vector lane must be an integer in range [0, 3].
+// CHECK-NEXT: fdot z0.s, z0.b, z0.b[-1]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+fdot z0.s, z0.b, z0.b[4]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: vector lane must be an integer in range [0, 3].
+// CHECK-NEXT: fdot z0.s, z0.b, z0.b[4]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+// Invalid vector suffix
+
+fdot z0.s, z0.s, z0.s[0]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: fdot z0.s, z0.s, z0.s[0]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+fdot z0.b, z0.b, z0.b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: fdot z0.b, z0.b, z0.b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/llvm/test/MC/AArch64/FP8_SVE2/fdot.s b/llvm/test/MC/AArch64/FP8_SVE2/fdot.s
new file mode 100644
index 000000000000000..eb16b59de7afda0
--- /dev/null
+++ b/llvm/test/MC/AArch64/FP8_SVE2/fdot.s
@@ -0,0 +1,113 @@
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+ssve-fp8dot2,+ssve-fp8dot4 < %s \
+// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN: | FileCheck %s --check-prefix=CHECK-ERROR
+
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2,+fp8dot2,+fp8dot4 < %s \
+// RUN: | llvm-objdump -d --mattr=+sve2,+fp8dot2,+fp8dot4 --no-print-imm-hex - | FileCheck %s --check-prefix=CHECK-INST
+
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2,+fp8dot2,+fp8dot4 < %s \
+// RUN: | llvm-objdump -d --mattr=-sme2 --no-print-imm-hex - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+
+// Disassemble encoding and check the re-encoding (-show-encoding) matches.
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2,+fp8dot2,fp8dot4 < %s \
+// RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \
+// RUN: | llvm-mc -triple=aarch64 -mattr=+sve2,+fp8dot2,fp8dot4 -disassemble -show-encoding \
+// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+
+//
+// FDOT2 instructions
+//
+// fdot2 - indexed
+
+fdot z0.h, z0.b, z0.b[0] // 01100100-00100000-01000100-00000000
+// CHECK-INST: fdot z0.h, z0.b, z0.b[0]
+// CHECK-ENCODING: [0x00,0x44,0x20,0x64]
+// CHECK-ERROR: instruction requires: ssve-fp8dot2 or (sve2 and fp8dot2)
+// CHECK-UNKNOWN: 64204400 <unknown>
+
+movprfx z23, z31
+fdot z23.h, z13.b, z0.b[3] // 01100100-00101000-01001101-10110111
+// CHECK-INST: movprfx z23, z31
+// CHECK-INST: fdot z23.h, z13.b, z0.b[3]
+// CHECK-ENCODING: [0xb7,0x4d,0x28,0x64]
+// CHECK-ERROR: instruction requires: ssve-fp8dot2 or (sve2 and fp8dot2)
+// CHECK-UNKNOWN: 64284db7 <unknown>
+
+fdot z31.h, z31.b, z7.b[7] // 01100100-00111111-01001111-11111111
+// CHECK-INST: fdot z31.h, z31.b, z7.b[7]
+// CHECK-ENCODING: [0xff,0x4f,0x3f,0x64]
+// CHECK-ERROR: instruction requires: ssve-fp8dot2 or (sve2 and fp8dot2)
+// CHECK-UNKNOWN: 643f4fff <unknown>
+
+
+// fdot2 - group
+
+fdot z0.h, z0.b, z0.b // 01100100-00100000-10000100-00000000
+// CHECK-INST: fdot z0.h, z0.b, z0.b
+// CHECK-ENCODING: [0x00,0x84,0x20,0x64]
+// CHECK-ERROR: instruction requires: ssve-fp8dot2 or (sve2 and fp8dot2)
+// CHECK-UNKNOWN: 64208400 <unknown>
+
+movprfx z23, z31
+fdot z23.h, z13.b, z8.b // 01100100-00101000-10000101-10110111
+// CHECK-INST: movprfx z23, z31
+// CHECK-INST: fdot z23.h, z13.b, z8.b
+// CHECK-ENCODING: [0xb7,0x85,0x28,0x64]
+// CHECK-ERROR: instruction requires: ssve-fp8dot2 or (sve2 and fp8dot2)
+// CHECK-UNKNOWN: 642885b7 <unknown>
+
+fdot z31.h, z31.b, z31.b // 01100100-00111111-10000111-11111111
+// CHECK-INST: fdot z31.h, z31.b, z31.b
+// CHECK-ENCODING: [0xff,0x87,0x3f,0x64]
+// CHECK-ERROR: instruction requires: ssve-fp8dot2 or (sve2 and fp8dot2)
+// CHECK-UNKNOWN: 643f87ff <unknown>
+
+
+//
+// FDOT4 instructions
+//
+// fdot4 - indexed
+
+fdot z0.s, z0.b, z0.b[0] // 01100100-01100000-01000100-00000000
+// CHECK-INST: fdot z0.s, z0.b, z0.b[0]
+// CHECK-ENCODING: [0x00,0x44,0x60,0x64]
+// CHECK-ERROR: instruction requires: ssve-fp8dot4 or (sve2 and fp8dot4)
+// CHECK-UNKNOWN: 64604400 <unknown>
+
+movprfx z23, z31
+fdot z23.s, z13.b, z0.b[1] // 01100100-01101000-01000101-10110111
+// CHECK-INST: movprfx z23, z31
+// CHECK-INST: fdot z23.s, z13.b, z0.b[1]
+// CHECK-ENCODING: [0xb7,0x45,0x68,0x64]
+// CHECK-ERROR: instruction requires: ssve-fp8dot4 or (sve2 and fp8dot4)
+// CHECK-UNKNOWN: 646845b7 <unknown>
+
+fdot z31.s, z31.b, z7.b[3] // 01100100-01111111-01000111-11111111
+// CHECK-INST: fdot z31.s, z31.b, z7.b[3]
+// CHECK-ENCODING: [0xff,0x47,0x7f,0x64]
+// CHECK-ERROR: instruction requires: ssve-fp8dot4 or (sve2 and fp8dot4)
+// CHECK-UNKNOWN: 647f47ff <unknown>
+
+// fdot4 - group
+
+fdot z0.s, z0.b, z0.b // 01100100-01100000-10000100-00000000
+// CHECK-INST: fdot z0.s, z0.b, z0.b
+// CHECK-ENCODING: [0x00,0x84,0x60,0x64]
+// CHECK-ERROR: instruction requires: ssve-fp8dot4 or (sve2 and fp8dot4)
+// CHECK-UNKNOWN: 64608400 <unknown>
+
+movprfx z23, z31
+fdot z23.s, z13.b, z8.b // 01100100-01101000-10000101-10110111
+// CHECK-INST: movprfx z23, z31
+// CHECK-INST: fdot z23.s, z13.b, z8.b
+// CHECK-ENCODING: [0xb7,0x85,0x68,0x64]
+// CHECK-ERROR: instruction requires: ssve-fp8dot4 or (sve2 and fp8dot4)
+// CHECK-UNKNOWN: 646885b7 <unknown>
+
+fdot z31.s, z31.b, z31.b // 01100100-01111111-10000111-11111111
+// CHECK-INST: fdot z31.s, z31.b, z31.b
+// CHECK-ENCODING: [0xff,0x87,0x7f,0x64]
+// CHECK-ERROR: instruction requires: ssve-fp8dot4 or (sve2 and fp8dot4)
+// CHECK-UNKNOWN: 647f87ff <unknown>
diff --git a/llvm/test/MC/AArch64/SVE2p1/fdot-diagnostics.s b/llvm/test/MC/AArch64/SVE2p1/fdot-diagnostics.s
index bfcbbc4c89e6fb3..7a68b92ba30e174 100644
--- a/llvm/test/MC/AArch64/SVE2p1/fdot-diagnostics.s
+++ b/llvm/test/MC/AArch64/SVE2p1/fdot-diagnostics.s
@@ -17,7 +17,7 @@ fdot z0.s, z0.h, z0.h[-1]
// Invalid vector suffix
fdot z0.h, z0.s, z0.s
-// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
// CHECK-NEXT: fdot z0.h, z0.s, z0.s
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/llvm/unittests/TargetParser/TargetParserTest.cpp b/llvm/unittests/TargetParser/TargetParserTest.cpp
index b432e7ac2d86871..4e7b4b4ef997475 100644
--- a/llvm/unittests/TargetParser/TargetParserTest.cpp
+++ b/llvm/unittests/TargetParser/TargetParserTest.cpp
@@ -1732,7 +1732,8 @@ TEST(TargetParserTest, AArch64ExtensionFeatures) {
AArch64::AEK_RCPC3, AArch64::AEK_THE, AArch64::AEK_D128,
AArch64::AEK_LSE128, AArch64::AEK_SPECRES2, AArch64::AEK_RASv2,
AArch64::AEK_ITE, AArch64::AEK_GCS, AArch64::AEK_FPMR,
- };
+ AArch64::AEK_FP8DOT2, AArch64::AEK_SSVE_FP8DOT2,
+ AArch64::AEK_FP8DOT4, AArch64::AEK_SSVE_FP8DOT4};
std::vector<StringRef> Features;
@@ -1805,6 +1806,10 @@ TEST(TargetParserTest, AArch64ExtensionFeatures) {
EXPECT_TRUE(llvm::is_contained(Features, "+ite"));
EXPECT_TRUE(llvm::is_contained(Features, "+gcs"));
EXPECT_TRUE(llvm::is_contained(Features, "+fpmr"));
+ EXPECT_TRUE(llvm::is_contained(Features, "+fp8dot2"));
+ EXPECT_TRUE(llvm::is_contained(Features, "+ssve-fp8dot2"));
+ EXPECT_TRUE(llvm::is_contained(Features, "+fp8dot4"));
+ EXPECT_TRUE(llvm::is_contained(Features, "+ssve-fp8dot4"));
// Assuming we listed every extension above, this should produce the same
// result. (note that AEK_NONE doesn't have a name so it won't be in the
@@ -1929,6 +1934,10 @@ TEST(TargetParserTest, AArch64ArchExtFeature) {
{"rasv2", "norasv2", "+rasv2", "-rasv2"},
{"gcs", "nogcs", "+gcs", "-gcs"},
{"fpmr", "nofpmr", "+fpmr", "-fpmr"},
+ {"fp8dot2", "nofp8dot2", "+fp8dot2", "-fp8dot2"},
+ {"ssve-fp8dot2", "nossve-fp8dot2", "+ssve-fp8dot2", "-ssve-fp8dot2"},
+ {"fp8dot4", "nofp8dot4", "+fp8dot4", "-fp8dot4"},
+ {"ssve-fp8dot4", "nossve-fp8dot4", "+ssve-fp8dot4", "-ssve-fp8dot4"},
};
for (unsigned i = 0; i < std::size(ArchExt); i++) {
More information about the llvm-commits
mailing list