[llvm] [AArch64][2023][FP8]: Add FP8 instructions assembly and disassembly. (PR #69632)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Oct 19 11:57:07 PDT 2023
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-mc
Author: None (hassnaaHamdi)
<details>
<summary>Changes</summary>
This patch adds the feature flag FP8 and the assembly/disassembly for the following instructions of NEON, SVE2 and SME2:
* NEON Instructions:
+ Advanced SIMD two-register miscellaneous:
- BF1CVTL, BF1CVTL2, BF2CVTL, BF2CVTL2 — BF1CVTL
- BF1CVTL, BF1CVTL2, BF2CVTL, BF2CVTL2 — BF2CVTL - F1CVTL, F1CVTL2, F2CVTL, F2CVTL2 — F1CVTL - F1CVTL, F1CVTL2, F2CVTL, F2CVTL2 — F2CVTL
+ Advanced SIMD three-register extension: - FCVTN, FCVTN2 (FP32 to FP8) - FCVTN (FP16 to FP8)
+ Advanced SIMD three same
- FSCALE
* SVE2 Instructions:
+ Downconvert instructions:
- FCVTN_Z2Z_HtoB
- FCVTNB_Z2Z_StoB - BFCVTN_Z2Z_HtoB - FCVTNT_Z2Z_StoB
+ Upconvert instructions: - F1CVT_ZZ, F2CVT_ZZ - BF1CVT_ZZ, BF2CVT_ZZ - F1CVTLT_ZZ, F2CVTLT_ZZ - BF1CVTLT_ZZ, BF2CVTLT_ZZ
* SME2 Instructions:
- F1CVT_2ZZ, F2CVT_2ZZ
- BF1CVT_2ZZ, BF2CVT_2ZZ - F1CVTL_2ZZ, F2CVTL_2ZZ - BF1CVTL_2ZZ, BF2CVTL_2ZZ - FCVT_Z2Z_HtoB, BFCVT_Z2Z_HtoB - FCVT_Z4Z - FCVTN_Z4Z - FSCALE_2ZZ, FSCALE_4ZZ - FSCALE_2Z2Z, FSCALE_4Z4Z
That is according to this documentation:
https://developer.arm.com/documentation/ddi0602/2023-09
Change-Id: I56008a1b74c21ad30f36d18c4895c4dd1ba48920
---
Patch is 80.42 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/69632.diff
23 Files Affected:
- (modified) llvm/include/llvm/TargetParser/AArch64TargetParser.h (+3-1)
- (modified) llvm/lib/Target/AArch64/AArch64.td (+3)
- (modified) llvm/lib/Target/AArch64/AArch64InstrFormats.td (+57)
- (modified) llvm/lib/Target/AArch64/AArch64InstrInfo.td (+15)
- (modified) llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td (+31-8)
- (modified) llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td (+21)
- (modified) llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp (+1)
- (modified) llvm/lib/Target/AArch64/SMEInstrFormats.td (+30-11)
- (modified) llvm/lib/Target/AArch64/SVEInstrFormats.td (+43)
- (added) llvm/test/MC/AArch64/FP8/directive-arch-negative.s (+7)
- (added) llvm/test/MC/AArch64/FP8/directive-arch.s (+7)
- (added) llvm/test/MC/AArch64/FP8/miscellaneous-fp8-diagnostics.s (+84)
- (added) llvm/test/MC/AArch64/FP8/miscellaneous-fp8.s (+355)
- (added) llvm/test/MC/AArch64/FP8_SME2/cvt-diagnostics.s (+87)
- (added) llvm/test/MC/AArch64/FP8_SME2/cvt.s (+157)
- (added) llvm/test/MC/AArch64/FP8_SME2/fscale-diagnostics.c (+62)
- (added) llvm/test/MC/AArch64/FP8_SME2/fscale.s (+160)
- (added) llvm/test/MC/AArch64/FP8_SVE2/fcvt-diagnostics.s (+131)
- (added) llvm/test/MC/AArch64/FP8_SVE2/fcvt.s (+237)
- (added) llvm/test/MC/AArch64/FP8_SVE2/fcvtn-diagnostics.s (+70)
- (added) llvm/test/MC/AArch64/FP8_SVE2/fcvtn.s (+125)
- (modified) llvm/test/MC/AArch64/SVE2/fcvtnt-diagnostics.s (+2-2)
- (modified) llvm/unittests/TargetParser/TargetParserTest.cpp (+3-1)
``````````diff
diff --git a/llvm/include/llvm/TargetParser/AArch64TargetParser.h b/llvm/include/llvm/TargetParser/AArch64TargetParser.h
index 616f2d79028615d..e22ab824e51e2eb 100644
--- a/llvm/include/llvm/TargetParser/AArch64TargetParser.h
+++ b/llvm/include/llvm/TargetParser/AArch64TargetParser.h
@@ -159,7 +159,8 @@ enum ArchExtKind : unsigned {
AEK_RASv2 = 55, // FEAT_RASv2
AEK_ITE = 56, // FEAT_ITE
AEK_GCS = 57, // FEAT_GCS
- AEK_NUM_EXTENSIONS = AEK_GCS + 1
+ AEK_FP8 = 58, // FEAT_FP8
+ AEK_NUM_EXTENSIONS
};
using ExtensionBitset = Bitset<AEK_NUM_EXTENSIONS>;
// clang-format on
@@ -267,6 +268,7 @@ inline constexpr ExtensionInfo Extensions[] = {
{"tme", AArch64::AEK_TME, "+tme", "-tme", FEAT_INIT, "", 0},
{"wfxt", AArch64::AEK_NONE, {}, {}, FEAT_WFXT, "+wfxt", 550},
{"gcs", AArch64::AEK_GCS, "+gcs", "-gcs", FEAT_INIT, "", 0},
+ {"fp8", AArch64::AEK_FP8, "+fp8", "-fp8", FEAT_INIT, "", 0},
// Special cases
{"none", AArch64::AEK_NONE, {}, {}, FEAT_INIT, "", ExtensionInfo::MaxFMVPriority},
};
diff --git a/llvm/lib/Target/AArch64/AArch64.td b/llvm/lib/Target/AArch64/AArch64.td
index 70973c92305aa62..9254676458fb831 100644
--- a/llvm/lib/Target/AArch64/AArch64.td
+++ b/llvm/lib/Target/AArch64/AArch64.td
@@ -127,6 +127,9 @@ def FeatureCCPP : SubtargetFeature<"ccpp", "HasCCPP",
def FeatureSVE : SubtargetFeature<"sve", "HasSVE", "true",
"Enable Scalable Vector Extension (SVE) instructions (FEAT_SVE)", [FeatureFullFP16]>;
+def FeatureFP8 : SubtargetFeature<"fp8", "HasFP8", "true",
+ "Enable FP8 Instructions (FEAT_FP8)">;
+
// This flag is currently still labeled as Experimental, but when fully
// implemented this should tell the compiler to use the zeroing pseudos to
// benefit from the reverse instructions (e.g. SUB vs SUBR) if the inactive
diff --git a/llvm/lib/Target/AArch64/AArch64InstrFormats.td b/llvm/lib/Target/AArch64/AArch64InstrFormats.td
index e5dbfa404b3c6bf..129866f39edc7ce 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrFormats.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrFormats.td
@@ -6056,6 +6056,53 @@ multiclass SIMDThreeSameVectorFML<bit U, bit b13, bits<3> size, string asm,
}
+// FP8 assembly/disassembly classes
+
+//----------------------------------------------------------------------------
+// FP8 Advanced SIMD three-register extension
+//----------------------------------------------------------------------------
+class BaseSIMDThreeVectors<bit Q, bit U, bits<2> size, bits<4> op,
+ RegisterOperand regtype1,
+ RegisterOperand regtype2, string asm,
+ string kind1, string kind2>
+ : I<(outs regtype1:$Rd), (ins regtype2:$Rn, regtype2:$Rm), asm,
+ "\t$Rd" # kind1 # ", $Rn" # kind2 # ", $Rm" # kind2, "", []>, Sched<[]> {
+ bits<5> Rd;
+ bits<5> Rn;
+ bits<5> Rm;
+ let Inst{31} = 0;
+ let Inst{30} = Q;
+ let Inst{29} = U;
+ let Inst{28-24} = 0b01110;
+ let Inst{23-22} = size;
+ let Inst{21} = 0b0;
+ let Inst{20-16} = Rm;
+ let Inst{15} = 0b1;
+ let Inst{14-11} = op;
+ let Inst{10} = 0b1;
+ let Inst{9-5} = Rn;
+ let Inst{4-0} = Rd;
+}
+
+
+// FCVTN (FP16 to FP8)
+multiclass SIMDThreeSameSizeVectorCvt<string asm> {
+ def v8f8 : BaseSIMDThreeVectors<0b0, 0b0, 0b01, 0b1110, V64, V64, asm, ".8b",".4h">;
+ def v16f8 : BaseSIMDThreeVectors<0b1, 0b0, 0b01, 0b1110, V128, V128, asm, ".16b", ".8h">;
+}
+
+class BaseSIMDThreeSameSizeVectorFP8Tied<bit Q, bits<2> sz, string asm, string kind1,
+ string kind2, RegisterOperand RegType> :
+ BaseSIMDThreeSameVectorTied<Q, 0b0, {sz,0b0}, 0b11110, RegType, asm, kind1, []> {
+ let AsmString = !strconcat(asm, "{\t$Rd" # kind1 # ", $Rn" # kind2 # ", $Rm" # kind2 # "}");
+}
+
+// FCVTN, FCVTN2 (FP32 to FP8)
+multiclass SIMDThreeVectorCvt<string asm> {
+ def v8f8 : BaseSIMDThreeVectors<0b0, 0b0, 0b00, 0b1110, V64, V128, asm, ".8b", ".4s">;
+ def 2v16f8 : BaseSIMDThreeSameSizeVectorFP8Tied<0b1, 0b00, asm#2, ".16b", ".4s", V128>;
+}
+
//----------------------------------------------------------------------------
// AdvSIMD two register vector instructions.
//----------------------------------------------------------------------------
@@ -6479,6 +6526,16 @@ multiclass SIMDMixedTwoVector<bit U, bits<5> opc, string asm,
(INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), V128:$Rn)>;
}
+//----------------------------------------------------------------------------
+// FP8 Advanced SIMD two-register miscellaneous
+//----------------------------------------------------------------------------
+multiclass SIMDMixedTwoVectorFP8<bits<2>sz, string asm> {
+ def v8f16 : BaseSIMDMixedTwoVector<0b0, 0b1, sz, 0b10111, V64, V128,
+ asm, ".8h", ".8b", []>;
+ def 2v8f16 : BaseSIMDMixedTwoVector<0b1, 0b1, sz, 0b10111, V128, V128,
+ asm#2, ".8h", ".16b", []>;
+}
+
class BaseSIMDCmpTwoVector<bit Q, bit U, bits<2> size, bits<2> size2,
bits<5> opcode, RegisterOperand regtype, string asm,
string kind, string zero, ValueType dty,
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index df59dc4ad27fadb..aec57737ab49636 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -160,6 +160,8 @@ def HasSME2 : Predicate<"Subtarget->hasSME2()">,
AssemblerPredicateWithAll<(all_of FeatureSME2), "sme2">;
def HasSME2p1 : Predicate<"Subtarget->hasSME2p1()">,
AssemblerPredicateWithAll<(all_of FeatureSME2p1), "sme2p1">;
+def HasFP8 : Predicate<"Subtarget->hasFP8()">,
+ AssemblerPredicateWithAll<(all_of FeatureFP8), "fp8">;
// A subset of SVE(2) instructions are legal in Streaming SVE execution mode,
// they should be enabled if either has been specified.
@@ -171,6 +173,10 @@ def HasSVE2orSME
: Predicate<"Subtarget->hasSVE2() || Subtarget->hasSME()">,
AssemblerPredicateWithAll<(any_of FeatureSVE2, FeatureSME),
"sve2 or sme">;
+def HasSVE2orSME2
+ : Predicate<"Subtarget->hasSVE2() || Subtarget->hasSME2()">,
+ AssemblerPredicateWithAll<(any_of FeatureSVE2, FeatureSME2),
+ "sve2 or sme2">;
def HasSVE2p1_or_HasSME
: Predicate<"Subtarget->hasSVE2p1() || Subtarget->hasSME()">,
AssemblerPredicateWithAll<(any_of FeatureSME, FeatureSVE2p1), "sme or sve2p1">;
@@ -9247,6 +9253,15 @@ let Predicates = [HasD128] in {
}
}
+let Predicates = [HasFP8] in {
+ defm F1CVTL : SIMDMixedTwoVectorFP8<0b00, "f1cvtl">;
+ defm F2CVTL : SIMDMixedTwoVectorFP8<0b01, "f2cvtl">;
+ defm BF1CVTL : SIMDMixedTwoVectorFP8<0b10, "bf1cvtl">;
+ defm BF2CVTL : SIMDMixedTwoVectorFP8<0b11, "bf2cvtl">;
+ defm FCVTN_F16_F8 : SIMDThreeSameSizeVectorCvt<"fcvtn">;
+ defm FCVTN_F32_F8 : SIMDThreeVectorCvt<"fcvtn">;
+ defm FSCALE : SIMDThreeSameVectorFP<0b1, 0b1, 0b111, "fscale", null_frag>;
+} // End let Predicates = [HasFP8]
include "AArch64InstrAtomics.td"
include "AArch64SVEInstrInfo.td"
diff --git a/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td
index 2685f2e3c8108e5..cbdc38965fc5c7e 100644
--- a/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td
@@ -330,14 +330,14 @@ defm UMLSL_VG4_M4ZZ : sme2_int_mla_long_array_vg4_single<"umlsl", 0b11, int_aar
defm UMLSL_VG2_M2Z2Z : sme2_int_mla_long_array_vg2_multi<"umlsl", 0b11, int_aarch64_sme_umlsl_vg2x2>;
defm UMLSL_VG4_M4Z4Z : sme2_int_mla_long_array_vg4_multi<"umlsl", 0b11, int_aarch64_sme_umlsl_vg2x4>;
-defm FCVT_Z2Z_StoH : sme2_cvt_vg2_single<"fcvt", 0b0000, nxv8f16, nxv4f32, int_aarch64_sve_fcvt_x2>;
-defm FCVTN_Z2Z_StoH : sme2_cvt_vg2_single<"fcvtn", 0b0001, nxv8f16, nxv4f32, int_aarch64_sve_fcvtn_x2>;
-defm BFCVT_Z2Z_StoH : sme2_cvt_vg2_single<"bfcvt", 0b1000, nxv8bf16, nxv4f32, int_aarch64_sve_bfcvt_x2>;
-defm BFCVTN_Z2Z_StoH : sme2_cvt_vg2_single<"bfcvtn", 0b1001, nxv8bf16, nxv4f32, int_aarch64_sve_bfcvtn_x2>;
-
-defm SQCVT_Z2Z_StoH : sme2_cvt_vg2_single<"sqcvt", 0b0110, nxv8i16, nxv4i32, int_aarch64_sve_sqcvt_x2>;
-defm UQCVT_Z2Z_StoH : sme2_cvt_vg2_single<"uqcvt", 0b0111, nxv8i16, nxv4i32, int_aarch64_sve_uqcvt_x2>;
-defm SQCVTU_Z2Z_StoH : sme2_cvt_vg2_single<"sqcvtu", 0b1110, nxv8i16, nxv4i32, int_aarch64_sve_sqcvtu_x2>;
+defm FCVT_Z2Z_StoH : sme2_cvt_vg2_single<"fcvt", 0b00000, nxv8f16, nxv4f32, int_aarch64_sve_fcvt_x2>;
+defm FCVTN_Z2Z_StoH : sme2_cvt_vg2_single<"fcvtn", 0b00001, nxv8f16, nxv4f32, int_aarch64_sve_fcvtn_x2>;
+defm BFCVT_Z2Z_StoH : sme2_cvt_vg2_single<"bfcvt", 0b10000, nxv8bf16, nxv4f32, int_aarch64_sve_bfcvt_x2>;
+defm BFCVTN_Z2Z_StoH : sme2_cvt_vg2_single<"bfcvtn", 0b10001, nxv8bf16, nxv4f32, int_aarch64_sve_bfcvtn_x2>;
+
+defm SQCVT_Z2Z_StoH : sme2_cvt_vg2_single<"sqcvt", 0b00110, nxv8i16, nxv4i32, int_aarch64_sve_sqcvt_x2>;
+defm UQCVT_Z2Z_StoH : sme2_cvt_vg2_single<"uqcvt", 0b00111, nxv8i16, nxv4i32, int_aarch64_sve_uqcvt_x2>;
+defm SQCVTU_Z2Z_StoH : sme2_cvt_vg2_single<"sqcvtu", 0b10110, nxv8i16, nxv4i32, int_aarch64_sve_sqcvtu_x2>;
defm SQCVT_Z4Z : sme2_int_cvt_vg4_single<"sqcvt", 0b000, int_aarch64_sve_sqcvt_x4>;
defm UQCVT_Z4Z : sme2_int_cvt_vg4_single<"uqcvt", 0b001, int_aarch64_sve_uqcvt_x4>;
defm SQCVTU_Z4Z : sme2_int_cvt_vg4_single<"sqcvtu", 0b100, int_aarch64_sve_sqcvtu_x4>;
@@ -855,3 +855,26 @@ defm BFCLAMP_VG4_4ZZZ: sme2p1_bfclamp_vector_vg4_multi<"bfclamp">;
defm BFMOPA_MPPZZ_H : sme2p1_fmop_tile_fp16<"bfmopa", 0b1, 0b0, 0b11, ZPR16>;
defm BFMOPS_MPPZZ_H : sme2p1_fmop_tile_fp16<"bfmops", 0b1, 0b1, 0b11, ZPR16>;
}
+
+let Predicates = [HasSME2, HasFP8] in {
+defm F1CVT_2ZZ : sme2p1_fp8_cvt_vector_vg2_single<"f1cvt", 0b00, 0b0>;
+defm F1CVTL_2ZZ : sme2p1_fp8_cvt_vector_vg2_single<"f1cvtl", 0b00, 0b1>;
+defm BF1CVT_2ZZ : sme2p1_fp8_cvt_vector_vg2_single<"bf1cvt", 0b01, 0b0>;
+defm BF1CVTL_2ZZ : sme2p1_fp8_cvt_vector_vg2_single<"bf1cvtl", 0b01, 0b1>;
+defm F2CVT_2ZZ : sme2p1_fp8_cvt_vector_vg2_single<"f2cvt", 0b10, 0b0>;
+defm F2CVTL_2ZZ : sme2p1_fp8_cvt_vector_vg2_single<"f2cvtl", 0b10, 0b1>;
+defm BF2CVT_2ZZ : sme2p1_fp8_cvt_vector_vg2_single<"bf2cvt", 0b11, 0b0>;
+defm BF2CVTL_2ZZ : sme2p1_fp8_cvt_vector_vg2_single<"bf2cvtl", 0b11, 0b1>;
+
+defm FCVT_Z2Z_HtoB : sme2_fp8_cvt_vg2_single<"fcvt", 0b0>;
+defm BFCVT_Z2Z_HtoB : sme2_fp8_cvt_vg2_single<"bfcvt", 0b1>;
+defm FCVT_Z4Z : sme2_fp8_cvt_vg4_single<"fcvt", 0b0>;
+defm FCVTN_Z4Z : sme2_fp8_cvt_vg4_single<"fcvtn", 0b1>;
+
+defm FSCALE_2ZZ : sme2_fp_sve_destructive_vector_vg2_single<"fscale", 0b0011000>;
+defm FSCALE_4ZZ : sme2_fp_sve_destructive_vector_vg4_single<"fscale", 0b0011000>;
+defm FSCALE_2Z2Z : sme2_fp_sve_destructive_vector_vg2_multi<"fscale", 0b0011000>;
+defm FSCALE_4Z4Z : sme2_fp_sve_destructive_vector_vg4_multi<"fscale", 0b0011000>;
+
+} // [HasSME2, HasFP8]
+
diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
index d599ac4689e5cb3..002d5d28fcf8d53 100644
--- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -4002,3 +4002,24 @@ defm UZPQ1_ZZZ : sve2p1_permute_vec_elems_q<0b010, "uzpq1">;
defm UZPQ2_ZZZ : sve2p1_permute_vec_elems_q<0b011, "uzpq2">;
defm TBLQ_ZZZ : sve2p1_tblq<"tblq">;
} // End HasSVE2p1_or_HasSME2p1
+
+//===----------------------------------------------------------------------===//
+// SVE2 FP8 instructions
+//===----------------------------------------------------------------------===//
+let Predicates = [HasSVE2orSME2, HasFP8] in {
+// FP8 upconvert
+defm F1CVT_ZZ : sve2_fp8_cvt_single<0b0, 0b00, "f1cvt">;
+defm F2CVT_ZZ : sve2_fp8_cvt_single<0b0, 0b01, "f2cvt">;
+defm BF1CVT_ZZ : sve2_fp8_cvt_single<0b0, 0b10, "bf1cvt">;
+defm BF2CVT_ZZ : sve2_fp8_cvt_single<0b0, 0b11, "bf2cvt">;
+defm F1CVTLT_ZZ : sve2_fp8_cvt_single<0b1, 0b00, "f1cvtlt">;
+defm F2CVTLT_ZZ : sve2_fp8_cvt_single<0b1, 0b01, "f2cvtlt">;
+defm BF1CVTLT_ZZ : sve2_fp8_cvt_single<0b1, 0b10, "bf1cvtlt">;
+defm BF2CVTLT_ZZ : sve2_fp8_cvt_single<0b1, 0b11, "bf2cvtlt">;
+
+// FP8 downconvert
+defm FCVTN_Z2Z_HtoB : sve2_fp8_down_cvt_single<0b00, "fcvtn", ZZ_h_mul_r>;
+defm FCVTNB_Z2Z_StoB : sve2_fp8_down_cvt_single<0b01, "fcvtnb", ZZ_s_mul_r>;
+defm BFCVTN_Z2Z_HtoB : sve2_fp8_down_cvt_single<0b10, "bfcvtn", ZZ_h_mul_r>;
+defm FCVTNT_Z2Z_StoB : sve2_fp8_down_cvt_single<0b11, "fcvtnt", ZZ_s_mul_r>;
+} // End HasSVE2orSME2, HasFP8
\ No newline at end of file
diff --git a/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp b/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
index cbcb49c3e2d74dc..5b00615b05c654a 100644
--- a/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
+++ b/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
@@ -3638,6 +3638,7 @@ static const struct Extension {
{"sb", {AArch64::FeatureSB}},
{"ssbs", {AArch64::FeatureSSBS}},
{"tme", {AArch64::FeatureTME}},
+ {"fp8", {AArch64::FeatureFP8}},
};
static void setRequiredFeatureString(FeatureBitset FBS, std::string &Str) {
diff --git a/llvm/lib/Target/AArch64/SMEInstrFormats.td b/llvm/lib/Target/AArch64/SMEInstrFormats.td
index 823115c7d025005..b85b5ed367ca376 100644
--- a/llvm/lib/Target/AArch64/SMEInstrFormats.td
+++ b/llvm/lib/Target/AArch64/SMEInstrFormats.td
@@ -2161,15 +2161,16 @@ multiclass sme2_frint_vector_vg4_multi<string mnemonic, bits<7> op> {
mnemonic>;
}
-class sme2_cvt_vg2_single<string mnemonic, bits<4> op>
- : I<(outs ZPR16:$Zd), (ins ZZ_s_mul_r:$Zn),
+class sme2_cvt_vg2_single<string mnemonic, bits<5> op,
+ RegisterOperand first_ty, RegisterOperand second_ty>
+ : I<(outs first_ty:$Zd), (ins second_ty:$Zn),
mnemonic, "\t$Zd, $Zn", "", []>, Sched<[]> {
bits<4> Zn;
bits<5> Zd;
let Inst{31-23} = 0b110000010;
- let Inst{22} = op{3};
- let Inst{21-18} = 0b1000;
- let Inst{17-16} = op{2-1};
+ let Inst{22} = op{4};
+ let Inst{21-19} = 0b100;
+ let Inst{18-16} = op{3-1};
let Inst{15-10} = 0b111000;
let Inst{9-6} = Zn;
let Inst{5} = op{0};
@@ -2178,12 +2179,17 @@ class sme2_cvt_vg2_single<string mnemonic, bits<4> op>
// SME2 multi-vec FP down convert two registers
// SME2 multi-vec int down convert two registers
-multiclass sme2_cvt_vg2_single<string mnemonic, bits<4> op, ValueType out_vt,
+multiclass sme2_cvt_vg2_single<string mnemonic, bits<5> op, ValueType out_vt,
ValueType in_vt, SDPatternOperator intrinsic> {
- def NAME : sme2_cvt_vg2_single<mnemonic, op>;
+ def NAME : sme2_cvt_vg2_single<mnemonic, op, ZPR16, ZZ_s_mul_r>;
def : SVE2p1_Cvt_VG2_Pat<NAME, intrinsic, out_vt, in_vt>;
}
+// SME2 multi-vec FP8 down convert two registers
+multiclass sme2_fp8_cvt_vg2_single<string mnemonic, bit op> {
+ def NAME : sme2_cvt_vg2_single<mnemonic, {op, 0b1000}, ZPR8, ZZ_h_mul_r>;
+}
+
class sme2_cvt_unpk_vector_vg2<bits<2>sz, bits<3> op, bit u, RegisterOperand first_ty,
RegisterOperand second_ty, string mnemonic>
: I<(outs first_ty:$Zd), (ins second_ty:$Zn),
@@ -2212,7 +2218,13 @@ multiclass sme2p1_fp_cvt_vector_vg2_single<string mnemonic, bit l> {
def _S : sme2_cvt_unpk_vector_vg2<0b10, 0b000, l, ZZ_s_mul_r, ZPR16, mnemonic>;
}
-class sme2_cvt_vg4_single<bit sz, bits<3> op, RegisterOperand first_ty,
+// SME2 multi-vec FP8 up convert two registers
+multiclass sme2p1_fp8_cvt_vector_vg2_single<string mnemonic, bits<2> opc, bit L> {
+ def _BtoH : sme2_cvt_unpk_vector_vg2<opc, 0b110, L, ZZ_h_mul_r, ZPR8, mnemonic>;
+}
+
+
+class sme2_cvt_vg4_single<bit sz, bits<3> op, bits<4>op2, RegisterOperand first_ty,
RegisterOperand second_ty, string mnemonic>
: I<(outs first_ty:$Zd), (ins second_ty:$Zn),
mnemonic, "\t$Zd, $Zn", "", []>, Sched<[]> {
@@ -2221,7 +2233,9 @@ class sme2_cvt_vg4_single<bit sz, bits<3> op, RegisterOperand first_ty,
let Inst{31-24} = 0b11000001;
let Inst{23} = sz;
let Inst{22} = op{2};
- let Inst{21-10} = 0b110011111000;
+ let Inst{21-20} = 0b11;
+ let Inst{19-16} = op2;
+ let Inst{15-10} = 0b111000;
let Inst{9-7} = Zn;
let Inst{6-5} = op{1-0};
let Inst{4-0} = Zd;
@@ -2229,13 +2243,18 @@ class sme2_cvt_vg4_single<bit sz, bits<3> op, RegisterOperand first_ty,
// SME2 multi-vec int down convert four registers
multiclass sme2_int_cvt_vg4_single<string mnemonic, bits<3> op, SDPatternOperator intrinsic> {
- def _StoB : sme2_cvt_vg4_single<0, op, ZPR8, ZZZZ_s_mul_r, mnemonic>;
- def _DtoH : sme2_cvt_vg4_single<1, op, ZPR16, ZZZZ_d_mul_r, mnemonic>;
+ def _StoB : sme2_cvt_vg4_single<0, op, 0b0011, ZPR8, ZZZZ_s_mul_r, mnemonic>;
+ def _DtoH : sme2_cvt_vg4_single<1, op, 0b0011, ZPR16, ZZZZ_d_mul_r, mnemonic>;
def : SME2_Cvt_VG4_Pat<NAME # _StoB, intrinsic, nxv16i8, nxv4i32>;
def : SME2_Cvt_VG4_Pat<NAME # _DtoH, intrinsic, nxv8i16, nxv2i64>;
}
+//SME2 multi-vec FP8 down convert four registers
+multiclass sme2_fp8_cvt_vg4_single<string mnemonic, bit N> {
+ def _StoB : sme2_cvt_vg4_single<0b0, {0b00, N}, 0b0100, ZPR8, ZZZZ_s_mul_r, mnemonic>;
+}
+
class sme2_unpk_vector_vg4<bits<2>sz, bit u, RegisterOperand first_ty,
RegisterOperand second_ty, string mnemonic>
: I<(outs first_ty:$Zd), (ins second_ty:$Zn),
diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td
index 7bb457d9188210c..28e25f346cbdbc4 100644
--- a/llvm/lib/Target/AArch64/SVEInstrFormats.td
+++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td
@@ -10078,3 +10078,46 @@ multiclass sve2p1_tblq<string mnemonic> {
def _S : sve2p1_permute_vec_elems_q<0b10, 0b110, mnemonic, ZPR32, Z_s>;
def _D : sve2p1_permute_vec_elems_q<0b11, 0b110, mnemonic, ZPR64, Z_d>;
}
+
+//===----------------------------------------------------------------------===//
+// SVE2 FP8 Instructions
+//===----------------------------------------------------------------------===//
+
+// FP8 upconvert
+class sve2_fp8_cvt_single<bit L, bits<2> opc, string mnemonic,
+ ZPRRegOp dst_ty, ZPRRegOp src_ty>
+ : I<(outs dst_ty:$Zd), (ins src_ty:$Zn),
+ mnemonic, "\t$Zd, $Zn",
+ "", []>, Sched<[]>{
+ bits<5> Zd;
+ bits<5> Zn;
+ let Inst{31-17} = 0b011001010000100;
+ let Inst{16} = L;
+ let Inst{15-12} = 0b0011;
+ let Inst{11-10} = opc;
+ let Inst{9-5} = Zn;
+ let Inst{4-0} = Zd;
+}
+
+multiclass sve2_fp8_cvt_single<bit L, bits<2> opc, string mnemonic> {
+ def _BtoH : sve2_fp8_cvt_single<L, opc, mnemonic, ZPR16, ZPR8>;
+}
+
+// FP8 downconvert
+class sve2_fp8_down_cvt_single<bits<2> opc, string mnemonic,
+ ZPRRegOp dst_ty, RegisterOperand src_ty>
+ : I<(outs dst_ty:$Zd), (ins src_ty:$Zn),
+ mnemonic, "\t$Zd, $Zn",
+ "", []>, Sched<[]>{
+ bits<5> Zd;
+ bits<4> Zn;
+ let Inst{31-12} = 0b01100101000010100011;
+ let Inst{11-10} = opc;
+ let Inst{9-6} = Zn;
+ let Inst{5} = 0b0;
+ let Inst{4-0} = Zd;
+}
+
+multiclass sve2_fp8_down_cvt_single<bits<2> opc, string mnemonic, RegisterOperand src> {
+ def NAME : sve2_fp8_down_cvt_single<opc, mnemonic, ZPR8, src>;
+}
\ No newline at end of file
diff --git a/llvm/test/MC/AArch64/FP8/directive-arch-negative.s b/llvm/test/MC/AArch64/FP8/directive-arch-negative.s
new file mode 100644
index 000000000000000..cf48416d29d8a28
--- /dev/null
+++ b/llvm/test/MC/AArch64/FP8/directive-arch-negative.s
@@ -0,0 +1,7 @@
+// RUN: not llvm-mc -triple aarch64 -filetype asm -o - %s 2>&1 | FileCheck %s
+
+.arch armv9-a+fp8
+.arch armv9-a+nofp8
+bf1cvtl v0.8h, v0.8b
+// CHECK: error: instruction requires: fp8
+// CHECK: bf1cvtl v0.8h, v0.8b
diff --git a/llvm/test/MC/AArch64/FP8/directive-arch.s b/llvm/test/MC/AArch64/FP8/directive-arch.s
new file mode 100644
index 000000000000000..8857d4f0bfbe422
--- /dev/null
+++ b/llvm/test/MC/AArch64/FP8/directive-arch.s
@@ -0,0 +1,7 @@
+// RUN: llvm-mc -triple aarch64 -o - %s 2>&1 | FileCheck %s
+
+.arch armv9-a+fp8
+bf1cvtl v0.8h, v0.8b
+// CHECK: bf1cvtl v0.8h, v0.8b
+
+.arch armv9-a+nofp8
diff --git a/llvm/test/MC/AArch64/FP8/miscellaneous-fp8-diagnosti...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/69632
More information about the llvm-commits
mailing list