[clang] [llvm] [AArch64] Add intrinsics for SME FP8 FDOT LANE instructions (PR #118492)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Dec 4 03:56:49 PST 2024
================
@@ -5737,3 +5768,113 @@ multiclass sme2_fmop4a_fp8_fp16_2way<string mnemonic> {
// Multiple vectors
def _M2Z2Z_BtoH : sme2_fp8_fp16_quarter_tile_outer_product<0b1, 0b1, mnemonic, ZZ_b_mul_r_Lo, ZZ_b_mul_r_Hi>;
}
+
+// FP8 SME FDOT instructions
+
+// Selection DAG patterns - map to first level of pseudo-instructions (xxx_PSEUDO)
+
+class SME2_FP8_FDOT_Index_VG1x2_Pat<string name, SDPatternOperator intrinsic,
+ ComplexPattern tileslice, Operand offset_ty, Operand imm_ty,
+ ValueType vt = nxv16i8>
+ : Pat<(intrinsic (i32 (tileslice MatrixIndexGPR32Op8_11:$base, offset_ty:$offset)),
+ vt:$Zn1, vt:$Zn2, vt:$Zm, (i32 imm_ty:$i), i64:$fpmr),
+ (!cast<Instruction>(name # _PSEUDO) $base, $offset,
+ (REG_SEQUENCE ZPR2Mul2, vt:$Zn1, zsub0, vt:$Zn2, zsub1),
+ ZPR4b8:$Zm, imm_ty:$i, GPR64:$fpmr)>;
+
+class SME2_FP8_FDOT_Index_VG1x4_Pat<string name, SDPatternOperator intrinsic,
+ ComplexPattern tileslice, Operand offset_ty, Operand imm_ty,
+ ValueType vt = nxv16i8>
+ : Pat<(intrinsic (i32 (tileslice MatrixIndexGPR32Op8_11:$base, offset_ty:$offset)),
+ vt:$Zn1, vt:$Zn2, vt:$Zn3, vt:$Zn4,
+ vt:$Zm, (i32 imm_ty:$i), i64:$fpmr),
+ (!cast<Instruction>(name # _PSEUDO) $base, $offset,
+ (REG_SEQUENCE ZPR4Mul4, vt:$Zn1, zsub0, vt:$Zn2, zsub1, vt:$Zn3, zsub2, vt:$Zn4, zsub3),
+ ZPR4b8:$Zm, imm_ty:$i, GPR64:$fpmr)>;
+
+// First level pseudo-instructions (xxx_PSEUDO) - transformed to second level pseudo-instructions (xxx_FPMR_PSEUDO)
+// during instruction selection.
+class sme2_fp8_fdot_index_pseudo<string name, Operand offset_ty, RegisterOperand src1_ty, RegisterOperand src2_ty, Operand imm_ty>
+ : SMEPseudo2Instr<name, 0>,
+ Pseudo<(outs), (ins MatrixIndexGPR32Op8_11:$Rv, offset_ty:$offs, src1_ty:$Zn, src2_ty:$Zm, imm_ty:$i, GPR64:$fpmr), []> {
+ let SMEMatrixType = SMEMatrixArray;
+ let usesCustomInserter = 1;
+}
+
+class sme2_fp8_fdot_pseudo<string name, Operand offset_ty, RegisterOperand src1_ty, RegisterOperand src2_ty>
+ : SMEPseudo2Instr<name, 0>,
+ Pseudo<(outs), (ins MatrixIndexGPR32Op8_11:$Rv, offset_ty:$offs, src1_ty:$Zn, src2_ty:$Zm, GPR64:$fpmr), []> {
+ let SMEMatrixType = SMEMatrixArray;
+ let usesCustomInserter = 1;
+}
+
+// Second level pseudo-instruction - expanded to real instruction by the AArch64 pseudo instruction expansion pass
+class sme2_fp8_fdot_index_fpmr_pseudo<string name, MatrixOperand matrix_ty, Operand offset_ty,
+ RegisterOperand src1_ty, RegisterOperand src2_ty,
+ Operand imm_ty>
+ : Pseudo<(outs matrix_ty:$ZAda),
+ (ins matrix_ty:$_ZAda, MatrixIndexGPR32Op8_11:$Rv, offset_ty:$offs,
+ src1_ty:$Zn, src2_ty:$Zm, imm_ty:$i, GPR64:$fpmr), []>,
+ SMEPseudo2Instr<name, 1> {
+ let hasNoSchedulingInfo = 1;
+ let Constraints = "$ZAda = $_ZAda";
+}
+
+class sme2_fp8_fdot_fpmr_pseudo<string name, MatrixOperand matrix_ty, Operand offset_ty,
+ RegisterOperand src1_ty, RegisterOperand src2_ty>
+ : Pseudo<(outs matrix_ty:$ZAda),
+ (ins matrix_ty:$_ZAda, MatrixIndexGPR32Op8_11:$Rv, offset_ty:$offs,
+ src1_ty:$Zn, src2_ty:$Zm, GPR64:$fpmr), []>,
+ SMEPseudo2Instr<name, 1> {
+ let hasNoSchedulingInfo = 1;
+ let Constraints = "$ZAda = $_ZAda";
+}
----------------
SpencerAbson wrote:
This entire section is dead and can be removed, it would be related to how we originally tied the emission of `msr FPMR, Xn` with the target instruction.
This is now handled by lowering `llvm.aarch64.set.fpmr`, which I can see in this patch so no other change is needed.
https://github.com/llvm/llvm-project/pull/118492
More information about the llvm-commits
mailing list