[all-commits] [llvm/llvm-project] f75860: [AArch64] Implement NEON FP8 intrinsics for fused ...
Momchil Velikov via All-commits
all-commits at lists.llvm.org
Mon Jan 27 16:39:06 PST 2025
Branch: refs/heads/main
Home: https://github.com/llvm/llvm-project
Commit: f75860f89522453f361e1ef54d7a33be2a2d75b1
https://github.com/llvm/llvm-project/commit/f75860f89522453f361e1ef54d7a33be2a2d75b1
Author: Momchil Velikov <momchil.velikov at arm.com>
Date: 2025-01-28 (Tue, 28 Jan 2025)
Changed paths:
M clang/include/clang/Basic/arm_neon.td
M clang/lib/CodeGen/CGBuiltin.cpp
M clang/lib/CodeGen/CodeGenFunction.h
A clang/test/CodeGen/AArch64/fp8-intrinsics/acle_neon_fp8_fmla.c
A clang/test/Sema/aarch64-fp8-intrinsics/acle_neon_fp8_fmla.c
M llvm/include/llvm/IR/IntrinsicsAArch64.td
M llvm/lib/Target/AArch64/AArch64InstrFormats.td
M llvm/lib/Target/AArch64/AArch64InstrInfo.td
A llvm/test/CodeGen/AArch64/fp8-neon-fmla.ll
Log Message:
-----------
[AArch64] Implement NEON FP8 intrinsics for fused multiply-add (#123615)
This patch adds the following intrinsics:
* Fused multiply-add non-indexed
float16x8_t vmlalbq_f16_mf8_fpm(float16x8_t, mfloat8x16_t, mfloat8x16_t,
fpm_t)
float16x8_t vmlaltq_f16_mf8_fpm(float16x8_t, mfloat8x16_t, mfloat8x16_t,
fpm_t)
float32x4_t vmlallbbq_f32_mf8_fpm(float32x4_t, mfloat8x16_t,
mfloat8x16_t, fpm_t)
float32x4_t vmlallbtq_f32_mf8_fpm(float32x4_t, mfloat8x16_t,
mfloat8x16_t, fpm_t)
float32x4_t vmlalltbq_f32_mf8_fpm(float32x4_t, mfloat8x16_t,
mfloat8x16_t, fpm_t)
float32x4_t vmlallttq_f32_mf8_fpm(float32x4_t, mfloat8x16_t,
mfloat8x16_t, fpm_t)
* Floating-point multiply-add long to half-precision (vector, by
element)
float16x8_t vmlalbq_lane_f16_mf8_fpm(float16x8_t vd, mfloat8x16_t vn,
mfloat8x8_t vm, __builtin_constant_p(lane), fpm_t fpm)
float16x8_t vmlalbq_laneq_f16_mf8_fpm(float16x8_t vd, mfloat8x16_t vn,
mfloat8x16_t vm, __builtin_constant_p(lane), fpm_t fpm)
float16x8_t vmlaltq_lane_f16_mf8_fpm(float16x8_t vd, mfloat8x16_t vn,
mfloat8x8_t vm, __builtin_constant_p(lane), fpm_t fpm)
float16x8_t vmlaltq_laneq_f16_mf8_fpm(float16x8_t vd, mfloat8x16_t vn,
mfloat8x16_t vm, __builtin_constant_p(lane), fpm_t fpm)
* Floating-point multiply-add long-long to single-precision (vector, by
element)
float32x4_t vmlallbbq_lane_f32_mf8_fpm(float32x4_t vd, mfloat8x16_t vn,
mfloat8x8_t vm, __builtin_constant_p(lane), fpm_t fpm)
float32x4_t vmlallbbq_laneq_f32_mf8_fpm(float32x4_t vd, mfloat8x16_t vn,
mfloat8x16_t vm, __builtin_constant_p(lane), fpm_t fpm)
float32x4_t vmlallbtq_lane_f32_mf8_fpm(float32x4_t vd, mfloat8x16_t vn,
mfloat8x8_t vm, __builtin_constant_p(lane), fpm_t fpm)
float32x4_t vmlallbtq_laneq_f32_mf8_fpm(float32x4_t vd, mfloat8x16_t vn,
mfloat8x16_t vm, __builtin_constant_p(lane), fpm_t fpm)
float32x4_t vmlalltbq_lane_f32_mf8_fpm(float32x4_t vd, mfloat8x16_t vn,
mfloat8x8_t vm, __builtin_constant_p(lane), fpm_t fpm)
float32x4_t vmlalltbq_laneq_f32_mf8_fpm(float32x4_t vd, mfloat8x16_t vn,
mfloat8x16_t vm, __builtin_constant_p(lane), fpm_t fpm)
float32x4_t vmlallttq_lane_f32_mf8_fpm(float32x4_t vd, mfloat8x16_t vn,
mfloat8x8_t vm, __builtin_constant_p(lane), fpm_t fpm)
float32x4_t vmlallttq_laneq_f32_mf8_fpm(float32x4_t vd, mfloat8x16_t vn,
mfloat8x16_t vm, __builtin_constant_p(lane), fpm_t fpm)
To unsubscribe from these emails, change your notification settings at https://github.com/llvm/llvm-project/settings/notifications
More information about the All-commits
mailing list