[clang] [llvm] [AArch64] Add FP8 Neon intrinsics for dot-product (PR #119911)
via llvm-commits
llvm-commits at lists.llvm.org
Fri Dec 13 10:11:23 PST 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-aarch64
Author: Momchil Velikov (momchil-velikov)
<details>
<summary>Changes</summary>
This patch adds the following intrinsics:
float16x4_t vdot_f16_mf8_fpm(float16x4_t vd, mfloat8x8_t vn, mfloat8x8_t vm, fpm_t fpm)
float16x8_t vdotq_f16_mf8_fpm(float16x8_t vd, mfloat8x16_t vn, mfloat8x16_t vm, fpm_t fpm)
float16x4_t vdot_lane_f16_mf8_fpm(float16x4_t vd, mfloat8x8_t vn, mfloat8x8_t vm, __builtin_constant_p(lane), fpm_t fpm)
float16x4_t vdot_laneq_f16_mf8_fpm(float16x4_t vd, mfloat8x8_t vn, mfloat8x16_t vm, __builtin_constant_p(lane), fpm_t fpm)
float16x8_t vdotq_lane_f16_mf8_fpm(float16x8_t vd, mfloat8x16_t vn, mfloat8x8_t vm, __builtin_constant_p(lane), fpm_t fpm)
float16x8_t vdotq_laneq_f16_mf8_fpm(float16x8_t vd, mfloat8x16_t vn, mfloat8x16_t vm, __builtin_constant_p(lane), fpm_t fpm)
---
Patch is 107.21 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/119911.diff
25 Files Affected:
- (modified) clang/include/clang/AST/Type.h (+9)
- (modified) clang/include/clang/Basic/AArch64SVEACLETypes.def (+18-6)
- (modified) clang/include/clang/Basic/DiagnosticSemaKinds.td (+5)
- (modified) clang/include/clang/Basic/arm_neon.td (+46-1)
- (modified) clang/include/clang/Basic/arm_neon_incl.td (+2)
- (modified) clang/lib/AST/ASTContext.cpp (+30-7)
- (modified) clang/lib/AST/ItaniumMangle.cpp (+5)
- (modified) clang/lib/AST/Type.cpp (+14-3)
- (modified) clang/lib/CodeGen/CGBuiltin.cpp (+166-1)
- (modified) clang/lib/CodeGen/CodeGenFunction.h (+8)
- (modified) clang/lib/CodeGen/CodeGenTypes.cpp (+10-3)
- (modified) clang/lib/CodeGen/Targets/AArch64.cpp (+5-2)
- (modified) clang/lib/Sema/SemaChecking.cpp (+33-6)
- (added) clang/test/CodeGen/AArch64/builtin-shufflevector-fp8.c (+123)
- (added) clang/test/CodeGen/AArch64/fp8-intrinsics/acle_neon_fp8_cvt.c (+308)
- (added) clang/test/CodeGen/AArch64/fp8-intrinsics/acle_neon_fp8_fdot.c (+143)
- (added) clang/test/Sema/aarch64-fp8-intrinsics/acle_neon_fp8.c (+57)
- (added) clang/test/Sema/builtin-shufflevector.c (+30)
- (modified) clang/utils/TableGen/NeonEmitter.cpp (+22-2)
- (modified) clang/utils/TableGen/SveEmitter.cpp (+2-2)
- (modified) llvm/include/llvm/IR/IntrinsicsAArch64.td (+43)
- (modified) llvm/lib/Target/AArch64/AArch64InstrFormats.td (+83-45)
- (modified) llvm/lib/Target/AArch64/AArch64InstrInfo.td (+14-14)
- (added) llvm/test/CodeGen/AArch64/fp8-neon-fdot.ll (+74)
- (added) llvm/test/CodeGen/AArch64/neon-fp8-cvt.ll (+112)
``````````diff
diff --git a/clang/include/clang/AST/Type.h b/clang/include/clang/AST/Type.h
index f0eee77c73ef06d..cacd3d75ffcd8de 100644
--- a/clang/include/clang/AST/Type.h
+++ b/clang/include/clang/AST/Type.h
@@ -2404,6 +2404,10 @@ class alignas(TypeAlignment) Type : public ExtQualsTypeCommonBase {
/// SVE vector or predicate, excluding tuple types such as svint32x4_t.
bool isSveVLSBuiltinType() const;
+ /// Determines if this is a *builtin* NEON vector type, a type not built with
+ /// `neon_vector_type`
+ bool isNeonVectorBuiltinType() const;
+
/// Returns the representative type for the element of an SVE builtin type.
/// This is used to represent fixed-length SVE vectors created with the
/// 'arm_sve_vector_bits' type attribute as VectorType.
@@ -2518,6 +2522,7 @@ class alignas(TypeAlignment) Type : public ExtQualsTypeCommonBase {
bool isFloat32Type() const;
bool isDoubleType() const;
bool isBFloat16Type() const;
+ bool isMFloat8Type() const;
bool isFloat128Type() const;
bool isIbm128Type() const;
bool isRealType() const; // C99 6.2.5p17 (real floating + integer)
@@ -8532,6 +8537,10 @@ inline bool Type::isBFloat16Type() const {
return isSpecificBuiltinType(BuiltinType::BFloat16);
}
+inline bool Type::isMFloat8Type() const {
+ return isSpecificBuiltinType(BuiltinType::MFloat8);
+}
+
inline bool Type::isFloat128Type() const {
return isSpecificBuiltinType(BuiltinType::Float128);
}
diff --git a/clang/include/clang/Basic/AArch64SVEACLETypes.def b/clang/include/clang/Basic/AArch64SVEACLETypes.def
index 063cac1f4a58ee7..6b704b386536c9b 100644
--- a/clang/include/clang/Basic/AArch64SVEACLETypes.def
+++ b/clang/include/clang/Basic/AArch64SVEACLETypes.def
@@ -57,6 +57,11 @@
// - IsBF true for vector of brain float elements.
//===----------------------------------------------------------------------===//
+#ifndef SVE_SCALAR_TYPE
+#define SVE_SCALAR_TYPE(Name, MangledName, Id, SingletonId, Bits) \
+ SVE_TYPE(Name, Id, SingletonId)
+#endif
+
#ifndef SVE_VECTOR_TYPE
#define SVE_VECTOR_TYPE(Name, MangledName, Id, SingletonId) \
SVE_TYPE(Name, Id, SingletonId)
@@ -72,6 +77,11 @@
SVE_VECTOR_TYPE_DETAILS(Name, MangledName, Id, SingletonId, NumEls, ElBits, NF, false, false, true)
#endif
+#ifndef SVE_VECTOR_TYPE_MFLOAT
+#define SVE_VECTOR_TYPE_MFLOAT(Name, MangledName, Id, SingletonId, NumEls, ElBits, NF) \
+ SVE_VECTOR_TYPE_DETAILS(Name, MangledName, Id, SingletonId, NumEls, ElBits, NF, false, false, false)
+#endif
+
#ifndef SVE_VECTOR_TYPE_FLOAT
#define SVE_VECTOR_TYPE_FLOAT(Name, MangledName, Id, SingletonId, NumEls, ElBits, NF) \
SVE_VECTOR_TYPE_DETAILS(Name, MangledName, Id, SingletonId, NumEls, ElBits, NF, false, true, false)
@@ -125,8 +135,7 @@ SVE_VECTOR_TYPE_FLOAT("__SVFloat64_t", "__SVFloat64_t", SveFloat64, SveFloat64Ty
SVE_VECTOR_TYPE_BFLOAT("__SVBfloat16_t", "__SVBfloat16_t", SveBFloat16, SveBFloat16Ty, 8, 16, 1)
-// This is a 8 bits opaque type.
-SVE_VECTOR_TYPE_INT("__SVMfloat8_t", "__SVMfloat8_t", SveMFloat8, SveMFloat8Ty, 16, 8, 1, false)
+SVE_VECTOR_TYPE_MFLOAT("__SVMfloat8_t", "__SVMfloat8_t", SveMFloat8, SveMFloat8Ty, 16, 8, 1)
//
// x2
@@ -148,7 +157,7 @@ SVE_VECTOR_TYPE_FLOAT("__clang_svfloat64x2_t", "svfloat64x2_t", SveFloat64x2, Sv
SVE_VECTOR_TYPE_BFLOAT("__clang_svbfloat16x2_t", "svbfloat16x2_t", SveBFloat16x2, SveBFloat16x2Ty, 8, 16, 2)
-SVE_VECTOR_TYPE_INT("__clang_svmfloat8x2_t", "svmfloat8x2_t", SveMFloat8x2, SveMFloat8x2Ty, 16, 8, 2, false)
+SVE_VECTOR_TYPE_MFLOAT("__clang_svmfloat8x2_t", "svmfloat8x2_t", SveMFloat8x2, SveMFloat8x2Ty, 16, 8, 2)
//
// x3
@@ -170,7 +179,7 @@ SVE_VECTOR_TYPE_FLOAT("__clang_svfloat64x3_t", "svfloat64x3_t", SveFloat64x3, Sv
SVE_VECTOR_TYPE_BFLOAT("__clang_svbfloat16x3_t", "svbfloat16x3_t", SveBFloat16x3, SveBFloat16x3Ty, 8, 16, 3)
-SVE_VECTOR_TYPE_INT("__clang_svmfloat8x3_t", "svmfloat8x3_t", SveMFloat8x3, SveMFloat8x3Ty, 16, 8, 3, false)
+SVE_VECTOR_TYPE_MFLOAT("__clang_svmfloat8x3_t", "svmfloat8x3_t", SveMFloat8x3, SveMFloat8x3Ty, 16, 8, 3)
//
// x4
@@ -192,7 +201,7 @@ SVE_VECTOR_TYPE_FLOAT("__clang_svfloat64x4_t", "svfloat64x4_t", SveFloat64x4, Sv
SVE_VECTOR_TYPE_BFLOAT("__clang_svbfloat16x4_t", "svbfloat16x4_t", SveBFloat16x4, SveBFloat16x4Ty, 8, 16, 4)
-SVE_VECTOR_TYPE_INT("__clang_svmfloat8x4_t", "svmfloat8x4_t", SveMFloat8x4, SveMFloat8x4Ty, 16, 8, 4, false)
+SVE_VECTOR_TYPE_MFLOAT("__clang_svmfloat8x4_t", "svmfloat8x4_t", SveMFloat8x4, SveMFloat8x4Ty, 16, 8, 4)
SVE_PREDICATE_TYPE_ALL("__SVBool_t", "__SVBool_t", SveBool, SveBoolTy, 16, 1)
SVE_PREDICATE_TYPE_ALL("__clang_svboolx2_t", "svboolx2_t", SveBoolx2, SveBoolx2Ty, 16, 2)
@@ -200,11 +209,13 @@ SVE_PREDICATE_TYPE_ALL("__clang_svboolx4_t", "svboolx4_t", SveBoolx4, SveBoolx4T
SVE_OPAQUE_TYPE("__SVCount_t", "__SVCount_t", SveCount, SveCountTy)
-AARCH64_VECTOR_TYPE_MFLOAT("__mfp8", "__mfp8", MFloat8, MFloat8Ty, 1, 8, 1)
+SVE_SCALAR_TYPE("__mfp8", "__mfp8", MFloat8, MFloat8Ty, 8)
+
AARCH64_VECTOR_TYPE_MFLOAT("__MFloat8x8_t", "__MFloat8x8_t", MFloat8x8, MFloat8x8Ty, 8, 8, 1)
AARCH64_VECTOR_TYPE_MFLOAT("__MFloat8x16_t", "__MFloat8x16_t", MFloat8x16, MFloat8x16Ty, 16, 8, 1)
#undef SVE_VECTOR_TYPE
+#undef SVE_VECTOR_TYPE_MFLOAT
#undef SVE_VECTOR_TYPE_BFLOAT
#undef SVE_VECTOR_TYPE_FLOAT
#undef SVE_VECTOR_TYPE_INT
@@ -213,4 +224,5 @@ AARCH64_VECTOR_TYPE_MFLOAT("__MFloat8x16_t", "__MFloat8x16_t", MFloat8x16, MFloa
#undef SVE_OPAQUE_TYPE
#undef AARCH64_VECTOR_TYPE_MFLOAT
#undef AARCH64_VECTOR_TYPE
+#undef SVE_SCALAR_TYPE
#undef SVE_TYPE
diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td
index 0a245e2077f68ff..a7440a6d7826c90 100644
--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -10545,6 +10545,9 @@ def err_vec_builtin_incompatible_vector : Error<
def err_vsx_builtin_nonconstant_argument : Error<
"argument %0 to %1 must be a 2-bit unsigned literal (i.e. 0, 1, 2 or 3)">;
+def err_shufflevector_incompatible_index_vector : Error<
+ "second argument for __builtin_shufflevector must be integer vector "
+ "with length equal to the length of the first argument">;
def err_shufflevector_nonconstant_argument : Error<
"index for __builtin_shufflevector must be a constant integer">;
def err_shufflevector_argument_too_large : Error<
@@ -10552,6 +10555,8 @@ def err_shufflevector_argument_too_large : Error<
"of vector elements">;
def err_shufflevector_minus_one_is_undefined_behavior_constexpr : Error<
"index for __builtin_shufflevector not within the bounds of the input vectors; index of -1 found at position %0 is not permitted in a constexpr context">;
+def err_shufflevector_unsupported_result_vector_type : Error<
+ "unsupported vector type for the result">;
def err_convertvector_non_vector : Error<
"first argument to __builtin_convertvector must be a vector">;
diff --git a/clang/include/clang/Basic/arm_neon.td b/clang/include/clang/Basic/arm_neon.td
index ef89fa4358dfeb0..7c2e68b3f7a195c 100644
--- a/clang/include/clang/Basic/arm_neon.td
+++ b/clang/include/clang/Basic/arm_neon.td
@@ -2125,6 +2125,51 @@ let ArchGuard = "defined(__aarch64__)", TargetGuard = "lut" in {
}
}
+let ArchGuard = "defined(__aarch64__)", TargetGuard = "fp8,bf16,neon" in {
+ def VBF1CVT_BF16_MF8 : VInst<"vcvt1_bf16_mf8_fpm", "(QB).V", "m">;
+ def VBF1CVT_LOW_BF16_MF8 : VInst<"vcvt1_low_bf16_mf8_fpm", "B.V", "Qm">;
+ def VBF2CVTL_BF16_MF8 : VInst<"vcvt2_bf16_mf8_fpm", "(QB).V", "m">;
+ def VBF2CVTL_LOW_BF16_MF8 : VInst<"vcvt2_low_bf16_mf8_fpm", "B.V", "Qm">;
+ def VBF1CVTL2_HIGH_BF16_MF8 : VInst<"vcvt1_high_bf16_mf8_fpm", "B.V", "Qm">;
+ def VBF2CVTL2_HIGH_BF16_MF8 : VInst<"vcvt2_high_bf16_mf8_fpm", "B.V", "Qm">;
+}
+
+let ArchGuard = "defined(__aarch64__)", TargetGuard = "fp8,neon" in {
+ def VF1CVT_F16_MF8 : VInst<"vcvt1_f16_mf8_fpm", "(>QF).V", "m">;
+ def VF1CVT_LOW_F16_MF8 : VInst<"vcvt1_low_f16_mf8_fpm", "(>F).V", "Qm">;
+ def VF2CVTL_F16_MF8 : VInst<"vcvt2_f16_mf8_fpm", "(>QF).V", "m">;
+ def VF2CVTL_LOW_F16_MF8 : VInst<"vcvt2_low_f16_mf8_fpm", "(>F).V", "Qm">;
+ def VF1CVTL2_HIGH_F16_MF8 : VInst<"vcvt1_high_f16_mf8_fpm", "(>F).V", "Qm">;
+ def VF2CVTL2_HIGH_F16_MF8 : VInst<"vcvt2_high_f16_mf8_fpm", "(>F).V", "Qm">;
+
+ def VCVTN_LOW_F8_F32 : VInst<"vcvt_mf8_f32_fpm", ".(>>QF)(>>QF)V", "m">;
+ def VCVTN_HIGH_F8_F32 : VInst<"vcvt_high_mf8_f32_fpm", ".(q)(>>F)(>>F)V", "Qm">;
+ def VCVTN_F8_F16 : VInst<"vcvt_mf8_f16_fpm", ".(>F)(>F)V", "m">;
+ def VCVTNQ_F8_F16 : VInst<"vcvtq_mf8_f16_fpm", ".(>F)(>F)V", "Qm">;
+}
+
+let ArchGuard = "defined(__aarch64__)", TargetGuard = "fp8dot2,neon" in {
+ def VDOT_F16_MF8 : VInst<"vdot_f16_mf8_fpm", "(>F)(>F)..V", "m">;
+ def VDOTQ_F16_MF8 : VInst<"vdotq_f16_mf8_fpm", "(>F)(>F)..V", "Qm">;
+
+ def VDOT_LANE_F16_MF8 : VInst<"vdot_lane_f16_mf8_fpm", "(>F)(>F)..IV", "m", [ImmCheck<3, ImmCheck0_3, 0>]>;
+ def VDOT_LANEQ_F16_MF8 : VInst<"vdot_laneq_f16_mf8_fpm", "(>F)(>F).QIV", "m", [ImmCheck<3, ImmCheck0_7, 0>]>;
+
+ def VDOTQ_LANE_F16_MF8 : VInst<"vdotq_lane_f16_mf8_fpm", "(>F)(>F).qIV", "Qm", [ImmCheck<3, ImmCheck0_3, 0>]>;
+ def VDOTQ_LANEQ_F16_MF8 : VInst<"vdotq_laneq_f16_mf8_fpm", "(>F)(>F)..IV", "Qm", [ImmCheck<3, ImmCheck0_7, 0>]>;
+}
+
+let ArchGuard = "defined(__aarch64__)", TargetGuard = "fp8dot4,neon" in {
+ def VDOT_F32_MF8 : VInst<"vdot_f32_mf8_fpm", "(>>F)(>>F)..V", "m">;
+ def VDOTQ_F32_MF8 : VInst<"vdotq_f32_mf8_fpm", "(>>F)(>>F)..V", "Qm">;
+
+ def VDOT_LANE_F32_MF8 : VInst<"vdot_lane_f32_mf8_fpm", "(>>F)(>>F)..IV", "m", [ImmCheck<3, ImmCheck0_1, 0>]>;
+ def VDOT_LANEQ_F32_MF8 : VInst<"vdot_laneq_f32_mf8_fpm", "(>>F)(>>F).QIV", "m", [ImmCheck<3, ImmCheck0_3, 0>]>;
+
+ def VDOTQ_LANE_F32_MF8 : VInst<"vdotq_lane_f32_mf8_fpm", "(>>F)(>>F).qIV", "Qm", [ImmCheck<3, ImmCheck0_1, 0>]>;
+ def VDOTQ_LANEQ_F32_MF8 : VInst<"vdotq_laneq_f32_mf8_fpm", "(>>F)(>>F)..IV", "Qm", [ImmCheck<3, ImmCheck0_3, 0>]>;
+}
+
let ArchGuard = "defined(__aarch64__)", TargetGuard = "neon,faminmax" in {
def FAMIN : WInst<"vamin", "...", "fhQdQfQh">;
def FAMAX : WInst<"vamax", "...", "fhQdQfQh">;
@@ -2134,4 +2179,4 @@ let ArchGuard = "defined(__aarch64__)", TargetGuard = "fp8,neon" in {
// fscale
def FSCALE_V128 : WInst<"vscale", "..(.S)", "QdQfQh">;
def FSCALE_V64 : WInst<"vscale", "(.q)(.q)(.qS)", "fh">;
-}
\ No newline at end of file
+}
diff --git a/clang/include/clang/Basic/arm_neon_incl.td b/clang/include/clang/Basic/arm_neon_incl.td
index fd800e5a6278e4a..b9b9d509c22512b 100644
--- a/clang/include/clang/Basic/arm_neon_incl.td
+++ b/clang/include/clang/Basic/arm_neon_incl.td
@@ -243,6 +243,7 @@ def OP_UNAVAILABLE : Operation {
// B: change to BFloat16
// P: change to polynomial category.
// p: change polynomial to equivalent integer category. Otherwise nop.
+// V: change to fpm_t
//
// >: double element width (vector size unchanged).
// <: half element width (vector size unchanged).
@@ -301,6 +302,7 @@ class Inst <string n, string p, string t, Operation o, list<ImmCheck> ch = []>{
class SInst<string n, string p, string t, list<ImmCheck> ch = []> : Inst<n, p, t, OP_NONE, ch> {}
class IInst<string n, string p, string t, list<ImmCheck> ch = []> : Inst<n, p, t, OP_NONE, ch> {}
class WInst<string n, string p, string t, list<ImmCheck> ch = []> : Inst<n, p, t, OP_NONE, ch> {}
+class VInst<string n, string p, string t, list<ImmCheck> ch = []> : Inst<n, p, t, OP_NONE, ch> {}
// The following instruction classes are implemented via operators
// instead of builtins. As such these declarations are only used for
diff --git a/clang/lib/AST/ASTContext.cpp b/clang/lib/AST/ASTContext.cpp
index 6ec927e13a7552e..904df6f6163bc03 100644
--- a/clang/lib/AST/ASTContext.cpp
+++ b/clang/lib/AST/ASTContext.cpp
@@ -2275,6 +2275,11 @@ TypeInfo ASTContext::getTypeInfoImpl(const Type *T) const {
Width = NumEls * ElBits * NF; \
Align = NumEls * ElBits; \
break;
+#define SVE_SCALAR_TYPE(Name, MangledName, Id, SingletonId, Bits) \
+ case BuiltinType::Id: \
+ Width = Bits; \
+ Align = Bits; \
+ break;
#include "clang/Basic/AArch64SVEACLETypes.def"
#define PPC_VECTOR_TYPE(Name, Id, Size) \
case BuiltinType::Id: \
@@ -4395,15 +4400,18 @@ ASTContext::getBuiltinVectorTypeInfo(const BuiltinType *Ty) const {
ElBits, NF) \
case BuiltinType::Id: \
return {BFloat16Ty, llvm::ElementCount::getScalable(NumEls), NF};
+#define SVE_VECTOR_TYPE_MFLOAT(Name, MangledName, Id, SingletonId, NumEls, \
+ ElBits, NF) \
+ case BuiltinType::Id: \
+ return {MFloat8Ty, llvm::ElementCount::getScalable(NumEls), NF};
#define SVE_PREDICATE_TYPE_ALL(Name, MangledName, Id, SingletonId, NumEls, NF) \
case BuiltinType::Id: \
return {BoolTy, llvm::ElementCount::getScalable(NumEls), NF};
#define AARCH64_VECTOR_TYPE_MFLOAT(Name, MangledName, Id, SingletonId, NumEls, \
ElBits, NF) \
case BuiltinType::Id: \
- return {getIntTypeForBitwidth(ElBits, false), \
- llvm::ElementCount::getFixed(NumEls), NF};
-#define SVE_OPAQUE_TYPE(Name, MangledName, Id, SingletonId)
+ return {MFloat8Ty, llvm::ElementCount::getFixed(NumEls), NF};
+#define SVE_TYPE(Name, Id, SingletonId)
#include "clang/Basic/AArch64SVEACLETypes.def"
#define RVV_VECTOR_TYPE_INT(Name, Id, SingletonId, NumEls, ElBits, NF, \
@@ -4465,11 +4473,16 @@ QualType ASTContext::getScalableVectorType(QualType EltTy, unsigned NumElts,
EltTySize == ElBits && NumElts == (NumEls * NF) && NumFields == 1) { \
return SingletonId; \
}
+#define SVE_VECTOR_TYPE_MFLOAT(Name, MangledName, Id, SingletonId, NumEls, \
+ ElBits, NF) \
+ if (EltTy->isMFloat8Type() && EltTySize == ElBits && \
+ NumElts == (NumEls * NF) && NumFields == 1) { \
+ return SingletonId; \
+ }
#define SVE_PREDICATE_TYPE_ALL(Name, MangledName, Id, SingletonId, NumEls, NF) \
if (EltTy->isBooleanType() && NumElts == (NumEls * NF) && NumFields == 1) \
return SingletonId;
-#define SVE_OPAQUE_TYPE(Name, MangledName, Id, SingletonId)
-#define AARCH64_VECTOR_TYPE(Name, MangledName, Id, SingletonId)
+#define SVE_TYPE(Name, Id, SingletonId)
#include "clang/Basic/AArch64SVEACLETypes.def"
} else if (Target->hasRISCVVTypes()) {
uint64_t EltTySize = getTypeSize(EltTy);
@@ -12177,8 +12190,15 @@ static QualType DecodeTypeFromStr(const char *&Str, const ASTContext &Context,
RequiresICE, false);
assert(!RequiresICE && "Can't require vector ICE");
- // TODO: No way to make AltiVec vectors in builtins yet.
- Type = Context.getVectorType(ElementType, NumElements, VectorKind::Generic);
+ if (ElementType == Context.MFloat8Ty) {
+ assert((NumElements == 8 || NumElements == 16) &&
+ "Invalid number of elements");
+ Type = NumElements == 8 ? Context.MFloat8x8Ty : Context.MFloat8x16Ty;
+ } else {
+ // TODO: No way to make AltiVec vectors in builtins yet.
+ Type =
+ Context.getVectorType(ElementType, NumElements, VectorKind::Generic);
+ }
break;
}
case 'E': {
@@ -12234,6 +12254,9 @@ static QualType DecodeTypeFromStr(const char *&Str, const ASTContext &Context,
case 'p':
Type = Context.getProcessIDType();
break;
+ case 'm':
+ Type = Context.MFloat8Ty;
+ break;
}
// If there are modifiers and if we're allowed to parse them, go for it.
diff --git a/clang/lib/AST/ItaniumMangle.cpp b/clang/lib/AST/ItaniumMangle.cpp
index 47aa9b40dab845b..9404f9fd9b151d5 100644
--- a/clang/lib/AST/ItaniumMangle.cpp
+++ b/clang/lib/AST/ItaniumMangle.cpp
@@ -3438,6 +3438,11 @@ void CXXNameMangler::mangleType(const BuiltinType *T) {
type_name = MangledName; \
Out << (type_name == Name ? "u" : "") << type_name.size() << type_name; \
break;
+#define SVE_SCALAR_TYPE(Name, MangledName, Id, SingletonId, Bits) \
+ case BuiltinType::Id: \
+ type_name = MangledName; \
+ Out << (type_name == Name ? "u" : "") << type_name.size() << type_name; \
+ break;
#include "clang/Basic/AArch64SVEACLETypes.def"
#define PPC_VECTOR_TYPE(Name, Id, Size) \
case BuiltinType::Id: \
diff --git a/clang/lib/AST/Type.cpp b/clang/lib/AST/Type.cpp
index 976361d07b68bf5..1c21fad75253d81 100644
--- a/clang/lib/AST/Type.cpp
+++ b/clang/lib/AST/Type.cpp
@@ -2527,9 +2527,7 @@ bool Type::isSVESizelessBuiltinType() const {
#define SVE_PREDICATE_TYPE(Name, MangledName, Id, SingletonId) \
case BuiltinType::Id: \
return true;
-#define AARCH64_VECTOR_TYPE(Name, MangledName, Id, SingletonId) \
- case BuiltinType::Id: \
- return false;
+#define SVE_TYPE(Name, Id, SingletonId)
#include "clang/Basic/AArch64SVEACLETypes.def"
default:
return false;
@@ -2578,6 +2576,19 @@ bool Type::isSveVLSBuiltinType() const {
return false;
}
+bool Type::isNeonVectorBuiltinType() const {
+ if (const BuiltinType *BT = getAs<BuiltinType>()) {
+ switch (BT->getKind()) {
+ case BuiltinType::MFloat8x8:
+ case BuiltinType::MFloat8x16:
+ return true;
+ default:
+ return false;
+ }
+ }
+ return false;
+}
+
QualType Type::getSizelessVectorEltType(const ASTContext &Ctx) const {
assert(isSizelessVectorType() && "Must be sizeless vector type");
// Currently supports SVE and RVV
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 49a4c1ecc825e74..1ab04663eaaeca9 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -6868,6 +6868,32 @@ Value *CodeGenFunction::EmitNeonCall(Function *F, SmallVectorImpl<Value*> &Ops,
return Builder.CreateCall(F, Ops, name);
}
+Value *CodeGenFunction::EmitFP8NeonCall(Function *F,
+ SmallVectorImpl<Value *> &Ops,
+ Value *FPM, const char *name) {
+ Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_set_fpmr), FPM);
+ return EmitNeonCall(F, Ops, name);
+}
+
+llvm::Value *CodeGenFunction::EmitFP8NeonFDOTCall(
+ unsigned IID, bool ExtendLane, llvm::Type *RetTy,
+ SmallVectorImpl<llvm::Value *> &Ops, unsigned ICEArguments,
+ const CallExpr *E, const char *name) {
+
+ const unsigned ElemCount = Ops[0]->getType()->getPrimitiveSizeInBits() /
+ RetTy->getPrimitiveSizeInBits();
+ llvm::Type *Tys[] = {llvm::FixedVectorType::get(RetTy, ElemCount),
+ Ops[1]->getType()};
+ if (ExtendLane) {
+ auto *VT = llvm::FixedVectorType::get(Int8Ty, 16);
+ Ops[2] = Builder.CreateInsertVector(VT, PoisonValue::get(VT), Ops[2],
+ Builder.getInt64(0));
+ }
+ llvm::Value *FPM =
+ EmitScalarOrConstFoldImmArg(ICEArguments, E->getNumArgs() - 1, E);
+ return EmitFP8NeonCall(CGM.getIntrinsic(IID, Tys), Ops, FPM, name);
+}
+
Value *CodeGenFunction::EmitNeonShiftVector(Val...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/119911
More information about the llvm-commits
mailing list