[llvm] [AArch64][2023][FP8]: Add FP8 instructions assembly and disassembly. (PR #69632)

Thu Oct 19 11:57:07 PDT 2023

llvmbot wrote:




@llvm/pr-subscribers-mc

Author: None (hassnaaHamdi)

<details>
<summary>Changes</summary>

This patch adds the feature flag FP8 and the assembly/disassembly for the following instructions of NEON, SVE2 and SME2:

  * NEON Instructions:
   + Advanced SIMD two-register miscellaneous:
    - BF1CVTL, BF1CVTL2, BF2CVTL, BF2CVTL2 — BF1CVTL
    - BF1CVTL, BF1CVTL2, BF2CVTL, BF2CVTL2 — BF2CVTL - F1CVTL, F1CVTL2, F2CVTL, F2CVTL2 — F1CVTL - F1CVTL, F1CVTL2, F2CVTL, F2CVTL2 — F2CVTL
   + Advanced SIMD three-register extension: - FCVTN, FCVTN2 (FP32 to FP8) - FCVTN (FP16 to FP8)
   + Advanced SIMD three same
    - FSCALE

  * SVE2 Instructions:
   + Downconvert instructions:
    - FCVTN_Z2Z_HtoB
    - FCVTNB_Z2Z_StoB - BFCVTN_Z2Z_HtoB - FCVTNT_Z2Z_StoB
   + Upconvert instructions: - F1CVT_ZZ, F2CVT_ZZ - BF1CVT_ZZ, BF2CVT_ZZ - F1CVTLT_ZZ, F2CVTLT_ZZ - BF1CVTLT_ZZ, BF2CVTLT_ZZ

  * SME2 Instructions:
    - F1CVT_2ZZ, F2CVT_2ZZ
    - BF1CVT_2ZZ, BF2CVT_2ZZ - F1CVTL_2ZZ, F2CVTL_2ZZ - BF1CVTL_2ZZ, BF2CVTL_2ZZ - FCVT_Z2Z_HtoB, BFCVT_Z2Z_HtoB - FCVT_Z4Z - FCVTN_Z4Z - FSCALE_2ZZ, FSCALE_4ZZ - FSCALE_2Z2Z, FSCALE_4Z4Z

That is according to this documentation:
https://developer.arm.com/documentation/ddi0602/2023-09

Change-Id: I56008a1b74c21ad30f36d18c4895c4dd1ba48920

---

Patch is 80.42 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/69632.diff


23 Files Affected:

- (modified) llvm/include/llvm/TargetParser/AArch64TargetParser.h (+3-1) 
- (modified) llvm/lib/Target/AArch64/AArch64.td (+3) 
- (modified) llvm/lib/Target/AArch64/AArch64InstrFormats.td (+57) 
- (modified) llvm/lib/Target/AArch64/AArch64InstrInfo.td (+15) 
- (modified) llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td (+31-8) 
- (modified) llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td (+21) 
- (modified) llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp (+1) 
- (modified) llvm/lib/Target/AArch64/SMEInstrFormats.td (+30-11) 
- (modified) llvm/lib/Target/AArch64/SVEInstrFormats.td (+43) 
- (added) llvm/test/MC/AArch64/FP8/directive-arch-negative.s (+7) 
- (added) llvm/test/MC/AArch64/FP8/directive-arch.s (+7) 
- (added) llvm/test/MC/AArch64/FP8/miscellaneous-fp8-diagnostics.s (+84) 
- (added) llvm/test/MC/AArch64/FP8/miscellaneous-fp8.s (+355) 
- (added) llvm/test/MC/AArch64/FP8_SME2/cvt-diagnostics.s (+87) 
- (added) llvm/test/MC/AArch64/FP8_SME2/cvt.s (+157) 
- (added) llvm/test/MC/AArch64/FP8_SME2/fscale-diagnostics.c (+62) 
- (added) llvm/test/MC/AArch64/FP8_SME2/fscale.s (+160) 
- (added) llvm/test/MC/AArch64/FP8_SVE2/fcvt-diagnostics.s (+131) 
- (added) llvm/test/MC/AArch64/FP8_SVE2/fcvt.s (+237) 
- (added) llvm/test/MC/AArch64/FP8_SVE2/fcvtn-diagnostics.s (+70) 
- (added) llvm/test/MC/AArch64/FP8_SVE2/fcvtn.s (+125) 
- (modified) llvm/test/MC/AArch64/SVE2/fcvtnt-diagnostics.s (+2-2) 
- (modified) llvm/unittests/TargetParser/TargetParserTest.cpp (+3-1) 


``````````diff

diff --git a/llvm/include/llvm/TargetParser/AArch64TargetParser.h b/llvm/include/llvm/TargetParser/AArch64TargetParser.h
index 616f2d79028615d..e22ab824e51e2eb 100644
--- a/llvm/include/llvm/TargetParser/AArch64TargetParser.h
+++ b/llvm/include/llvm/TargetParser/AArch64TargetParser.h
@@ -159,7 +159,8 @@ enum ArchExtKind : unsigned {
   AEK_RASv2 =         55, // FEAT_RASv2
   AEK_ITE =           56, // FEAT_ITE
   AEK_GCS =           57, // FEAT_GCS
-  AEK_NUM_EXTENSIONS =  AEK_GCS + 1
+  AEK_FP8 =           58,  // FEAT_FP8
+  AEK_NUM_EXTENSIONS
 };
 using ExtensionBitset = Bitset<AEK_NUM_EXTENSIONS>;
 // clang-format on
@@ -267,6 +268,7 @@ inline constexpr ExtensionInfo Extensions[] = {
     {"tme", AArch64::AEK_TME, "+tme", "-tme", FEAT_INIT, "", 0},
     {"wfxt", AArch64::AEK_NONE, {}, {}, FEAT_WFXT, "+wfxt", 550},
     {"gcs", AArch64::AEK_GCS, "+gcs", "-gcs", FEAT_INIT, "", 0},
+    {"fp8", AArch64::AEK_FP8, "+fp8", "-fp8", FEAT_INIT, "", 0},
     // Special cases
     {"none", AArch64::AEK_NONE, {}, {}, FEAT_INIT, "", ExtensionInfo::MaxFMVPriority},
 };
diff --git a/llvm/lib/Target/AArch64/AArch64.td b/llvm/lib/Target/AArch64/AArch64.td
index 70973c92305aa62..9254676458fb831 100644
--- a/llvm/lib/Target/AArch64/AArch64.td
+++ b/llvm/lib/Target/AArch64/AArch64.td
@@ -127,6 +127,9 @@ def FeatureCCPP : SubtargetFeature<"ccpp", "HasCCPP",
 def FeatureSVE : SubtargetFeature<"sve", "HasSVE", "true",
   "Enable Scalable Vector Extension (SVE) instructions (FEAT_SVE)", [FeatureFullFP16]>;
 
+def FeatureFP8 : SubtargetFeature<"fp8", "HasFP8", "true",
+  "Enable FP8 Instructions (FEAT_FP8)">;
+
 // This flag is currently still labeled as Experimental, but when fully
 // implemented this should tell the compiler to use the zeroing pseudos to
 // benefit from the reverse instructions (e.g. SUB vs SUBR) if the inactive
diff --git a/llvm/lib/Target/AArch64/AArch64InstrFormats.td b/llvm/lib/Target/AArch64/AArch64InstrFormats.td
index e5dbfa404b3c6bf..129866f39edc7ce 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrFormats.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrFormats.td
@@ -6056,6 +6056,53 @@ multiclass SIMDThreeSameVectorFML<bit U, bit b13, bits<3> size, string asm,
 }
 
 
+// FP8 assembly/disassembly classes
+
+//----------------------------------------------------------------------------
+// FP8 Advanced SIMD three-register extension
+//----------------------------------------------------------------------------
+class BaseSIMDThreeVectors<bit Q, bit U, bits<2> size, bits<4> op,
+                           RegisterOperand regtype1,
+                           RegisterOperand regtype2, string asm,
+                           string kind1, string kind2>
+  : I<(outs regtype1:$Rd), (ins regtype2:$Rn, regtype2:$Rm), asm,
+      "\t$Rd" # kind1 # ", $Rn" # kind2 # ", $Rm" # kind2, "", []>, Sched<[]> {
+  bits<5> Rd;
+  bits<5> Rn;
+  bits<5> Rm;
+  let Inst{31}    = 0;
+  let Inst{30}    = Q;
+  let Inst{29}    = U;
+  let Inst{28-24} = 0b01110;
+  let Inst{23-22} = size;
+  let Inst{21}    = 0b0;
+  let Inst{20-16} = Rm;
+  let Inst{15}    = 0b1;
+  let Inst{14-11} = op;
+  let Inst{10}    = 0b1;
+  let Inst{9-5}   = Rn;
+  let Inst{4-0}   = Rd;
+}
+
+
+// FCVTN (FP16 to FP8)
+multiclass SIMDThreeSameSizeVectorCvt<string asm> {
+   def v8f8 : BaseSIMDThreeVectors<0b0, 0b0, 0b01, 0b1110, V64, V64, asm, ".8b",".4h">;
+   def v16f8 : BaseSIMDThreeVectors<0b1, 0b0, 0b01, 0b1110,  V128, V128, asm, ".16b", ".8h">;
+}
+
+class BaseSIMDThreeSameSizeVectorFP8Tied<bit Q, bits<2> sz, string asm, string kind1,
+                                 string kind2, RegisterOperand RegType> :
+        BaseSIMDThreeSameVectorTied<Q, 0b0, {sz,0b0}, 0b11110, RegType, asm, kind1, []> {
+  let AsmString = !strconcat(asm, "{\t$Rd" # kind1 # ", $Rn" # kind2 # ", $Rm" # kind2 # "}");
+}
+
+// FCVTN, FCVTN2 (FP32 to FP8)
+multiclass SIMDThreeVectorCvt<string asm> {
+   def v8f8 : BaseSIMDThreeVectors<0b0, 0b0, 0b00, 0b1110, V64, V128, asm, ".8b", ".4s">;
+   def 2v16f8 : BaseSIMDThreeSameSizeVectorFP8Tied<0b1, 0b00, asm#2, ".16b", ".4s", V128>;
+}
+
 //----------------------------------------------------------------------------
 // AdvSIMD two register vector instructions.
 //----------------------------------------------------------------------------
@@ -6479,6 +6526,16 @@ multiclass SIMDMixedTwoVector<bit U, bits<5> opc, string asm,
                 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), V128:$Rn)>;
 }
 
+//----------------------------------------------------------------------------
+// FP8 Advanced SIMD two-register miscellaneous
+//----------------------------------------------------------------------------
+multiclass SIMDMixedTwoVectorFP8<bits<2>sz, string asm> {
+  def v8f16 : BaseSIMDMixedTwoVector<0b0, 0b1, sz, 0b10111, V64, V128,
+                                     asm, ".8h", ".8b", []>;
+  def 2v8f16 : BaseSIMDMixedTwoVector<0b1, 0b1, sz, 0b10111, V128, V128,
+                                     asm#2, ".8h", ".16b", []>;
+}
+
 class BaseSIMDCmpTwoVector<bit Q, bit U, bits<2> size, bits<2> size2,
                            bits<5> opcode, RegisterOperand regtype, string asm,
                            string kind, string zero, ValueType dty,
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index df59dc4ad27fadb..aec57737ab49636 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -160,6 +160,8 @@ def HasSME2          : Predicate<"Subtarget->hasSME2()">,
                                  AssemblerPredicateWithAll<(all_of FeatureSME2), "sme2">;
 def HasSME2p1        : Predicate<"Subtarget->hasSME2p1()">,
                                  AssemblerPredicateWithAll<(all_of FeatureSME2p1), "sme2p1">;
+def HasFP8           : Predicate<"Subtarget->hasFP8()">,
+                                 AssemblerPredicateWithAll<(all_of FeatureFP8), "fp8">;
 
 // A subset of SVE(2) instructions are legal in Streaming SVE execution mode,
 // they should be enabled if either has been specified.
@@ -171,6 +173,10 @@ def HasSVE2orSME
     : Predicate<"Subtarget->hasSVE2() || Subtarget->hasSME()">,
                 AssemblerPredicateWithAll<(any_of FeatureSVE2, FeatureSME),
                 "sve2 or sme">;
+def HasSVE2orSME2
+    : Predicate<"Subtarget->hasSVE2() || Subtarget->hasSME2()">,
+                AssemblerPredicateWithAll<(any_of FeatureSVE2, FeatureSME2),
+                "sve2 or sme2">;
 def HasSVE2p1_or_HasSME
     : Predicate<"Subtarget->hasSVE2p1() || Subtarget->hasSME()">,
                  AssemblerPredicateWithAll<(any_of FeatureSME, FeatureSVE2p1), "sme or sve2p1">;
@@ -9247,6 +9253,15 @@ let Predicates = [HasD128] in {
   }
 }
 
+let Predicates = [HasFP8] in {
+  defm F1CVTL  : SIMDMixedTwoVectorFP8<0b00, "f1cvtl">;
+  defm F2CVTL  : SIMDMixedTwoVectorFP8<0b01, "f2cvtl">;
+  defm BF1CVTL : SIMDMixedTwoVectorFP8<0b10, "bf1cvtl">;
+  defm BF2CVTL : SIMDMixedTwoVectorFP8<0b11, "bf2cvtl">;
+  defm FCVTN_F16_F8 : SIMDThreeSameSizeVectorCvt<"fcvtn">;
+  defm FCVTN_F32_F8 : SIMDThreeVectorCvt<"fcvtn">;
+  defm FSCALE : SIMDThreeSameVectorFP<0b1, 0b1, 0b111, "fscale", null_frag>;
+} // End let Predicates = [HasFP8]
 
 include "AArch64InstrAtomics.td"
 include "AArch64SVEInstrInfo.td"
diff --git a/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td
index 2685f2e3c8108e5..cbdc38965fc5c7e 100644
--- a/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td
@@ -330,14 +330,14 @@ defm UMLSL_VG4_M4ZZ  : sme2_int_mla_long_array_vg4_single<"umlsl", 0b11, int_aar
 defm UMLSL_VG2_M2Z2Z : sme2_int_mla_long_array_vg2_multi<"umlsl",  0b11, int_aarch64_sme_umlsl_vg2x2>;
 defm UMLSL_VG4_M4Z4Z : sme2_int_mla_long_array_vg4_multi<"umlsl",  0b11, int_aarch64_sme_umlsl_vg2x4>;
 
-defm FCVT_Z2Z_StoH   : sme2_cvt_vg2_single<"fcvt",   0b0000, nxv8f16, nxv4f32, int_aarch64_sve_fcvt_x2>;
-defm FCVTN_Z2Z_StoH  : sme2_cvt_vg2_single<"fcvtn",  0b0001, nxv8f16, nxv4f32, int_aarch64_sve_fcvtn_x2>;
-defm BFCVT_Z2Z_StoH  : sme2_cvt_vg2_single<"bfcvt",  0b1000, nxv8bf16, nxv4f32, int_aarch64_sve_bfcvt_x2>;
-defm BFCVTN_Z2Z_StoH : sme2_cvt_vg2_single<"bfcvtn", 0b1001, nxv8bf16, nxv4f32, int_aarch64_sve_bfcvtn_x2>;
-
-defm SQCVT_Z2Z_StoH  : sme2_cvt_vg2_single<"sqcvt",  0b0110, nxv8i16, nxv4i32, int_aarch64_sve_sqcvt_x2>;
-defm UQCVT_Z2Z_StoH  : sme2_cvt_vg2_single<"uqcvt",  0b0111, nxv8i16, nxv4i32, int_aarch64_sve_uqcvt_x2>;
-defm SQCVTU_Z2Z_StoH : sme2_cvt_vg2_single<"sqcvtu", 0b1110, nxv8i16, nxv4i32, int_aarch64_sve_sqcvtu_x2>;
+defm FCVT_Z2Z_StoH   : sme2_cvt_vg2_single<"fcvt",   0b00000, nxv8f16, nxv4f32, int_aarch64_sve_fcvt_x2>;
+defm FCVTN_Z2Z_StoH  : sme2_cvt_vg2_single<"fcvtn",  0b00001, nxv8f16, nxv4f32, int_aarch64_sve_fcvtn_x2>;
+defm BFCVT_Z2Z_StoH  : sme2_cvt_vg2_single<"bfcvt",  0b10000, nxv8bf16, nxv4f32, int_aarch64_sve_bfcvt_x2>;
+defm BFCVTN_Z2Z_StoH : sme2_cvt_vg2_single<"bfcvtn", 0b10001, nxv8bf16, nxv4f32, int_aarch64_sve_bfcvtn_x2>;
+
+defm SQCVT_Z2Z_StoH  : sme2_cvt_vg2_single<"sqcvt",  0b00110, nxv8i16, nxv4i32, int_aarch64_sve_sqcvt_x2>;
+defm UQCVT_Z2Z_StoH  : sme2_cvt_vg2_single<"uqcvt",  0b00111, nxv8i16, nxv4i32, int_aarch64_sve_uqcvt_x2>;
+defm SQCVTU_Z2Z_StoH : sme2_cvt_vg2_single<"sqcvtu", 0b10110, nxv8i16, nxv4i32, int_aarch64_sve_sqcvtu_x2>;
 defm SQCVT_Z4Z      : sme2_int_cvt_vg4_single<"sqcvt", 0b000, int_aarch64_sve_sqcvt_x4>;
 defm UQCVT_Z4Z      : sme2_int_cvt_vg4_single<"uqcvt", 0b001, int_aarch64_sve_uqcvt_x4>;
 defm SQCVTU_Z4Z     : sme2_int_cvt_vg4_single<"sqcvtu", 0b100, int_aarch64_sve_sqcvtu_x4>;
@@ -855,3 +855,26 @@ defm BFCLAMP_VG4_4ZZZ: sme2p1_bfclamp_vector_vg4_multi<"bfclamp">;
 defm BFMOPA_MPPZZ_H : sme2p1_fmop_tile_fp16<"bfmopa", 0b1, 0b0, 0b11, ZPR16>;
 defm BFMOPS_MPPZZ_H : sme2p1_fmop_tile_fp16<"bfmops", 0b1, 0b1, 0b11, ZPR16>;
 }
+
+let Predicates = [HasSME2, HasFP8] in {
+defm F1CVT_2ZZ : sme2p1_fp8_cvt_vector_vg2_single<"f1cvt", 0b00, 0b0>;
+defm F1CVTL_2ZZ : sme2p1_fp8_cvt_vector_vg2_single<"f1cvtl", 0b00, 0b1>;
+defm BF1CVT_2ZZ : sme2p1_fp8_cvt_vector_vg2_single<"bf1cvt", 0b01, 0b0>;
+defm BF1CVTL_2ZZ : sme2p1_fp8_cvt_vector_vg2_single<"bf1cvtl", 0b01, 0b1>;
+defm F2CVT_2ZZ : sme2p1_fp8_cvt_vector_vg2_single<"f2cvt", 0b10, 0b0>;
+defm F2CVTL_2ZZ : sme2p1_fp8_cvt_vector_vg2_single<"f2cvtl", 0b10, 0b1>;
+defm BF2CVT_2ZZ : sme2p1_fp8_cvt_vector_vg2_single<"bf2cvt", 0b11, 0b0>;
+defm BF2CVTL_2ZZ : sme2p1_fp8_cvt_vector_vg2_single<"bf2cvtl", 0b11, 0b1>;
+
+defm FCVT_Z2Z_HtoB  : sme2_fp8_cvt_vg2_single<"fcvt", 0b0>;
+defm BFCVT_Z2Z_HtoB : sme2_fp8_cvt_vg2_single<"bfcvt", 0b1>;
+defm FCVT_Z4Z : sme2_fp8_cvt_vg4_single<"fcvt", 0b0>;
+defm FCVTN_Z4Z : sme2_fp8_cvt_vg4_single<"fcvtn", 0b1>;
+
+defm FSCALE_2ZZ : sme2_fp_sve_destructive_vector_vg2_single<"fscale", 0b0011000>;
+defm FSCALE_4ZZ : sme2_fp_sve_destructive_vector_vg4_single<"fscale", 0b0011000>;
+defm FSCALE_2Z2Z : sme2_fp_sve_destructive_vector_vg2_multi<"fscale", 0b0011000>;
+defm FSCALE_4Z4Z : sme2_fp_sve_destructive_vector_vg4_multi<"fscale", 0b0011000>;
+
+} // [HasSME2, HasFP8]
+
diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
index d599ac4689e5cb3..002d5d28fcf8d53 100644
--- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -4002,3 +4002,24 @@ defm UZPQ1_ZZZ : sve2p1_permute_vec_elems_q<0b010, "uzpq1">;
 defm UZPQ2_ZZZ : sve2p1_permute_vec_elems_q<0b011, "uzpq2">;
 defm TBLQ_ZZZ  : sve2p1_tblq<"tblq">;
 } // End HasSVE2p1_or_HasSME2p1
+
+//===----------------------------------------------------------------------===//
+// SVE2 FP8 instructions
+//===----------------------------------------------------------------------===//
+let Predicates = [HasSVE2orSME2, HasFP8] in {
+// FP8 upconvert
+defm F1CVT_ZZ     : sve2_fp8_cvt_single<0b0, 0b00, "f1cvt">;
+defm F2CVT_ZZ     : sve2_fp8_cvt_single<0b0, 0b01, "f2cvt">;
+defm BF1CVT_ZZ    : sve2_fp8_cvt_single<0b0, 0b10, "bf1cvt">;
+defm BF2CVT_ZZ    : sve2_fp8_cvt_single<0b0, 0b11, "bf2cvt">;
+defm F1CVTLT_ZZ   : sve2_fp8_cvt_single<0b1, 0b00, "f1cvtlt">;
+defm F2CVTLT_ZZ   : sve2_fp8_cvt_single<0b1, 0b01, "f2cvtlt">;
+defm BF1CVTLT_ZZ  : sve2_fp8_cvt_single<0b1, 0b10, "bf1cvtlt">;
+defm BF2CVTLT_ZZ  : sve2_fp8_cvt_single<0b1, 0b11, "bf2cvtlt">;
+
+// FP8 downconvert
+defm FCVTN_Z2Z_HtoB  : sve2_fp8_down_cvt_single<0b00, "fcvtn", ZZ_h_mul_r>;
+defm FCVTNB_Z2Z_StoB : sve2_fp8_down_cvt_single<0b01, "fcvtnb", ZZ_s_mul_r>;
+defm BFCVTN_Z2Z_HtoB : sve2_fp8_down_cvt_single<0b10, "bfcvtn", ZZ_h_mul_r>;
+defm FCVTNT_Z2Z_StoB : sve2_fp8_down_cvt_single<0b11, "fcvtnt", ZZ_s_mul_r>;
+} // End HasSVE2orSME2, HasFP8
\ No newline at end of file
diff --git a/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp b/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
index cbcb49c3e2d74dc..5b00615b05c654a 100644
--- a/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
+++ b/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
@@ -3638,6 +3638,7 @@ static const struct Extension {
     {"sb", {AArch64::FeatureSB}},
     {"ssbs", {AArch64::FeatureSSBS}},
     {"tme", {AArch64::FeatureTME}},
+    {"fp8", {AArch64::FeatureFP8}},
 };
 
 static void setRequiredFeatureString(FeatureBitset FBS, std::string &Str) {
diff --git a/llvm/lib/Target/AArch64/SMEInstrFormats.td b/llvm/lib/Target/AArch64/SMEInstrFormats.td
index 823115c7d025005..b85b5ed367ca376 100644
--- a/llvm/lib/Target/AArch64/SMEInstrFormats.td
+++ b/llvm/lib/Target/AArch64/SMEInstrFormats.td
@@ -2161,15 +2161,16 @@ multiclass sme2_frint_vector_vg4_multi<string mnemonic, bits<7> op> {
                                          mnemonic>;
 }
 
-class sme2_cvt_vg2_single<string mnemonic, bits<4> op>
-    : I<(outs ZPR16:$Zd), (ins ZZ_s_mul_r:$Zn),
+class sme2_cvt_vg2_single<string mnemonic, bits<5> op,
+                           RegisterOperand first_ty, RegisterOperand second_ty>
+    : I<(outs first_ty:$Zd), (ins second_ty:$Zn),
         mnemonic, "\t$Zd, $Zn", "", []>, Sched<[]> {
   bits<4> Zn;
   bits<5> Zd;
   let Inst{31-23} = 0b110000010;
-  let Inst{22}    = op{3};
-  let Inst{21-18} = 0b1000;
-  let Inst{17-16} = op{2-1};
+  let Inst{22}    = op{4};
+  let Inst{21-19} = 0b100;
+  let Inst{18-16} = op{3-1};
   let Inst{15-10} = 0b111000;
   let Inst{9-6}   = Zn;
   let Inst{5}     = op{0};
@@ -2178,12 +2179,17 @@ class sme2_cvt_vg2_single<string mnemonic, bits<4> op>
 
 // SME2 multi-vec FP down convert two registers
 // SME2 multi-vec int down convert two registers
-multiclass sme2_cvt_vg2_single<string mnemonic, bits<4> op, ValueType out_vt,
+multiclass sme2_cvt_vg2_single<string mnemonic, bits<5> op, ValueType out_vt,
                                ValueType in_vt, SDPatternOperator intrinsic> {
-  def NAME :  sme2_cvt_vg2_single<mnemonic, op>;
+  def NAME :  sme2_cvt_vg2_single<mnemonic, op, ZPR16, ZZ_s_mul_r>;
   def : SVE2p1_Cvt_VG2_Pat<NAME, intrinsic, out_vt, in_vt>;
 }
 
+// SME2 multi-vec FP8 down convert two registers
+multiclass sme2_fp8_cvt_vg2_single<string mnemonic, bit op> {
+  def NAME :  sme2_cvt_vg2_single<mnemonic, {op, 0b1000}, ZPR8, ZZ_h_mul_r>;
+}
+
 class sme2_cvt_unpk_vector_vg2<bits<2>sz, bits<3> op, bit u, RegisterOperand first_ty,
                            RegisterOperand second_ty, string mnemonic>
     : I<(outs first_ty:$Zd), (ins second_ty:$Zn),
@@ -2212,7 +2218,13 @@ multiclass sme2p1_fp_cvt_vector_vg2_single<string mnemonic, bit l> {
   def _S : sme2_cvt_unpk_vector_vg2<0b10, 0b000, l, ZZ_s_mul_r, ZPR16, mnemonic>;
 }
 
-class sme2_cvt_vg4_single<bit sz, bits<3> op, RegisterOperand first_ty,
+// SME2 multi-vec FP8 up convert two registers
+multiclass sme2p1_fp8_cvt_vector_vg2_single<string mnemonic, bits<2> opc, bit L> {
+  def _BtoH : sme2_cvt_unpk_vector_vg2<opc, 0b110, L, ZZ_h_mul_r, ZPR8, mnemonic>;
+}
+
+
+class sme2_cvt_vg4_single<bit sz, bits<3> op, bits<4>op2,  RegisterOperand first_ty,
                           RegisterOperand second_ty, string mnemonic>
     : I<(outs first_ty:$Zd), (ins second_ty:$Zn),
         mnemonic, "\t$Zd, $Zn", "", []>, Sched<[]> {
@@ -2221,7 +2233,9 @@ class sme2_cvt_vg4_single<bit sz, bits<3> op, RegisterOperand first_ty,
   let Inst{31-24} = 0b11000001;
   let Inst{23}    = sz;
   let Inst{22}    = op{2};
-  let Inst{21-10} = 0b110011111000;
+  let Inst{21-20} = 0b11;
+  let Inst{19-16} = op2;
+  let Inst{15-10} = 0b111000;
   let Inst{9-7}   = Zn;
   let Inst{6-5}   = op{1-0};
   let Inst{4-0}   = Zd;
@@ -2229,13 +2243,18 @@ class sme2_cvt_vg4_single<bit sz, bits<3> op, RegisterOperand first_ty,
 
 // SME2 multi-vec int down convert four registers
 multiclass sme2_int_cvt_vg4_single<string mnemonic, bits<3> op, SDPatternOperator intrinsic> {
-  def _StoB : sme2_cvt_vg4_single<0, op, ZPR8, ZZZZ_s_mul_r, mnemonic>;
-  def _DtoH : sme2_cvt_vg4_single<1, op, ZPR16, ZZZZ_d_mul_r, mnemonic>;
+  def _StoB : sme2_cvt_vg4_single<0, op, 0b0011, ZPR8, ZZZZ_s_mul_r, mnemonic>;
+  def _DtoH : sme2_cvt_vg4_single<1, op, 0b0011, ZPR16, ZZZZ_d_mul_r, mnemonic>;
 
   def : SME2_Cvt_VG4_Pat<NAME # _StoB, intrinsic, nxv16i8, nxv4i32>;
   def : SME2_Cvt_VG4_Pat<NAME # _DtoH, intrinsic, nxv8i16, nxv2i64>;
 }
 
+//SME2 multi-vec FP8 down convert four registers
+multiclass sme2_fp8_cvt_vg4_single<string mnemonic, bit N> {
+ def _StoB : sme2_cvt_vg4_single<0b0, {0b00, N}, 0b0100, ZPR8, ZZZZ_s_mul_r, mnemonic>;
+}
+
 class sme2_unpk_vector_vg4<bits<2>sz, bit u, RegisterOperand first_ty,
                            RegisterOperand second_ty, string mnemonic>
     : I<(outs first_ty:$Zd), (ins second_ty:$Zn),
diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td
index 7bb457d9188210c..28e25f346cbdbc4 100644
--- a/llvm/lib/Target/AArch64/SVEInstrFormats.td
+++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td
@@ -10078,3 +10078,46 @@ multiclass sve2p1_tblq<string mnemonic> {
   def _S : sve2p1_permute_vec_elems_q<0b10, 0b110, mnemonic, ZPR32, Z_s>;
   def _D : sve2p1_permute_vec_elems_q<0b11, 0b110, mnemonic, ZPR64, Z_d>;
 }
+
+//===----------------------------------------------------------------------===//
+// SVE2 FP8 Instructions
+//===----------------------------------------------------------------------===//
+
+// FP8 upconvert
+class sve2_fp8_cvt_single<bit L, bits<2> opc, string mnemonic,
+                          ZPRRegOp dst_ty, ZPRRegOp src_ty>
+    : I<(outs dst_ty:$Zd), (ins src_ty:$Zn),
+      mnemonic, "\t$Zd, $Zn",
+      "", []>, Sched<[]>{
+  bits<5> Zd;
+  bits<5> Zn;
+  let Inst{31-17} = 0b011001010000100;
+  let Inst{16} = L;
+  let Inst{15-12} = 0b0011;
+  let Inst{11-10} = opc;
+  let Inst{9-5} = Zn;
+  let Inst{4-0} = Zd;
+}
+
+multiclass sve2_fp8_cvt_single<bit L, bits<2> opc, string mnemonic> {
+  def _BtoH : sve2_fp8_cvt_single<L, opc, mnemonic, ZPR16, ZPR8>;
+}
+
+// FP8 downconvert
+class sve2_fp8_down_cvt_single<bits<2> opc, string mnemonic,
+                              ZPRRegOp dst_ty, RegisterOperand src_ty>
+    : I<(outs dst_ty:$Zd), (ins src_ty:$Zn),
+      mnemonic, "\t$Zd, $Zn",
+      "", []>, Sched<[]>{
+  bits<5> Zd;
+  bits<4> Zn;
+  let Inst{31-12} = 0b01100101000010100011;
+  let Inst{11-10} = opc;
+  let Inst{9-6} = Zn;
+  let Inst{5} = 0b0;
+  let Inst{4-0} = Zd;
+}
+
+multiclass sve2_fp8_down_cvt_single<bits<2> opc, string mnemonic, RegisterOperand src> {
+  def NAME : sve2_fp8_down_cvt_single<opc, mnemonic, ZPR8, src>;
+}
\ No newline at end of file
diff --git a/llvm/test/MC/AArch64/FP8/directive-arch-negative.s b/llvm/test/MC/AArch64/FP8/directive-arch-negative.s
new file mode 100644
index 000000000000000..cf48416d29d8a28
--- /dev/null
+++ b/llvm/test/MC/AArch64/FP8/directive-arch-negative.s
@@ -0,0 +1,7 @@
+// RUN: not llvm-mc -triple aarch64 -filetype asm -o - %s 2>&1 | FileCheck %s
+
+.arch armv9-a+fp8
+.arch armv9-a+nofp8
+bf1cvtl v0.8h, v0.8b
+// CHECK: error: instruction requires: fp8
+// CHECK: bf1cvtl v0.8h, v0.8b
diff --git a/llvm/test/MC/AArch64/FP8/directive-arch.s b/llvm/test/MC/AArch64/FP8/directive-arch.s
new file mode 100644
index 000000000000000..8857d4f0bfbe422
--- /dev/null
+++ b/llvm/test/MC/AArch64/FP8/directive-arch.s
@@ -0,0 +1,7 @@
+// RUN: llvm-mc -triple aarch64 -o - %s 2>&1 | FileCheck %s
+
+.arch armv9-a+fp8
+bf1cvtl v0.8h, v0.8b
+// CHECK: bf1cvtl v0.8h, v0.8b
+
+.arch armv9-a+nofp8
diff --git a/llvm/test/MC/AArch64/FP8/miscellaneous-fp8-diagnosti...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/69632