[clang] [AArch64] Implement FP8 floating-point mode helper intrinsics (PR #100608)

Momchil Velikov via cfe-commits cfe-commits at lists.llvm.org
Tue Sep 3 09:44:36 PDT 2024


https://github.com/momchil-velikov updated https://github.com/llvm/llvm-project/pull/100608

>From 17964507593a4ae3d2b13c4fe84500472705485f Mon Sep 17 00:00:00 2001
From: Momchil Velikov <momchil.velikov at arm.com>
Date: Thu, 25 Jul 2024 18:25:40 +0100
Subject: [PATCH 1/2] [AArch64] Implement FP8 floating-point mode helper
 intrinsics

---
 clang/test/CodeGen/aarch64-fpm-helpers.c | 162 +++++++++++++++++++++++
 clang/utils/TableGen/NeonEmitter.cpp     |  54 ++++++++
 2 files changed, 216 insertions(+)
 create mode 100644 clang/test/CodeGen/aarch64-fpm-helpers.c

diff --git a/clang/test/CodeGen/aarch64-fpm-helpers.c b/clang/test/CodeGen/aarch64-fpm-helpers.c
new file mode 100644
index 00000000000000..dba79cebae5478
--- /dev/null
+++ b/clang/test/CodeGen/aarch64-fpm-helpers.c
@@ -0,0 +1,162 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
+
+// RUN: %clang_cc1 -O2 -triple aarch64 -emit-llvm -x c   -DUSE_NEON_H  %s -o - | FileCheck %s
+// RUN: %clang_cc1 -O2 -triple aarch64 -emit-llvm -x c   -DUSE_SVE_H   %s -o - | FileCheck %s
+// RUN: %clang_cc1 -O2 -triple aarch64 -emit-llvm -x c   -DUSE_SME_H   %s -o - | FileCheck %s
+// RUN: %clang_cc1 -O2 -triple aarch64 -emit-llvm -x c++ -DUSE_NEON_H  %s -o - | FileCheck %s
+// RUN: %clang_cc1 -O2 -triple aarch64 -emit-llvm -x c++ -DUSE_SVE_H   %s -o - | FileCheck %s
+// RUN: %clang_cc1 -O2 -triple aarch64 -emit-llvm -x c++ -DUSE_SME_H   %s -o - | FileCheck %s
+
+// REQUIRES: aarch64-registered-target
+
+#ifdef USE_NEON_H
+#include "arm_neon.h"
+#endif
+
+#ifdef USE_SVE_H
+#include "arm_sve.h"
+#endif
+
+#ifdef USE_SME_H
+#include "arm_sme.h"
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+// CHECK-LABEL: define dso_local noundef i64 @test_init(
+// CHECK-SAME: ) local_unnamed_addr #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    ret i64 0
+//
+fpm_t test_init() { return __arm_fpm_init(); }
+
+// CHECK-LABEL: define dso_local noundef range(i64 0, 2) i64 @test_src1_1(
+// CHECK-SAME: ) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    ret i64 0
+//
+fpm_t test_src1_1() {
+  return __arm_set_fpm_src1_format(__arm_fpm_init(), __ARM_FPM_E5M2);
+}
+
+// CHECK-LABEL: define dso_local noundef range(i64 0, 2) i64 @test_src1_2(
+// CHECK-SAME: ) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    ret i64 1
+//
+fpm_t test_src1_2() {
+  return __arm_set_fpm_src1_format(__arm_fpm_init(), __ARM_FPM_E4M3);
+}
+
+// CHECK-LABEL: define dso_local noundef range(i64 0, 16) i64 @test_src2_1(
+// CHECK-SAME: ) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    ret i64 0
+//
+fpm_t test_src2_1() {
+  return __arm_set_fpm_src2_format(__arm_fpm_init(), __ARM_FPM_E5M2);
+}
+
+// CHECK-LABEL: define dso_local noundef range(i64 0, 16) i64 @test_src2_2(
+// CHECK-SAME: ) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    ret i64 8
+//
+fpm_t test_src2_2() {
+  return __arm_set_fpm_src2_format(__arm_fpm_init(), __ARM_FPM_E4M3);
+}
+
+// CHECK-LABEL: define dso_local noundef range(i64 0, 128) i64 @test_dst1_1(
+// CHECK-SAME: ) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    ret i64 0
+//
+fpm_t test_dst1_1() {
+  return __arm_set_fpm_dst_format(__arm_fpm_init(), __ARM_FPM_E5M2);
+}
+
+// CHECK-LABEL: define dso_local noundef range(i64 0, 128) i64 @test_dst2_2(
+// CHECK-SAME: ) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    ret i64 64
+//
+fpm_t test_dst2_2() {
+  return __arm_set_fpm_dst_format(__arm_fpm_init(), __ARM_FPM_E4M3);
+}
+
+// CHECK-LABEL: define dso_local noundef range(i64 0, 32768) i64 @test_of_mul_1(
+// CHECK-SAME: ) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    ret i64 0
+//
+fpm_t test_of_mul_1() {
+  return __arm_set_fpm_overflow_mul(__arm_fpm_init(), __ARM_FPM_INFNAN);
+}
+
+// CHECK-LABEL: define dso_local noundef range(i64 0, 32768) i64 @test_of_mul_2(
+// CHECK-SAME: ) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    ret i64 16384
+//
+fpm_t test_of_mul_2() {
+  return __arm_set_fpm_overflow_mul(__arm_fpm_init(), __ARM_FPM_SATURATE);
+}
+
+// CHECK-LABEL: define dso_local noundef range(i64 0, 65536) i64 @test_of_cvt_1(
+// CHECK-SAME: ) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    ret i64 0
+//
+fpm_t test_of_cvt_1() {
+  return __arm_set_fpm_overflow_cvt(__arm_fpm_init(), __ARM_FPM_INFNAN);
+}
+
+// CHECK-LABEL: define dso_local noundef range(i64 0, 65536) i64 @test_of_cvt_2(
+// CHECK-SAME: ) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    ret i64 32768
+//
+fpm_t test_of_cvt_2() {
+  return __arm_set_fpm_overflow_cvt(__arm_fpm_init(), __ARM_FPM_SATURATE);
+}
+
+// CHECK-LABEL: define dso_local noundef i64 @test_lscale(
+// CHECK-SAME: ) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    ret i64 8323072
+//
+fpm_t test_lscale() { return __arm_set_fpm_lscale(__arm_fpm_init(), 127); }
+
+// CHECK-LABEL: define dso_local noundef i64 @test_lscale2(
+// CHECK-SAME: ) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    ret i64 270582939648
+//
+fpm_t test_lscale2() { return __arm_set_fpm_lscale2(__arm_fpm_init(), 63); }
+
+// CHECK-LABEL: define dso_local noundef range(i64 0, 4294967296) i64 @test_nscale_1(
+// CHECK-SAME: ) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    ret i64 2147483648
+//
+fpm_t test_nscale_1() { return __arm_set_fpm_nscale(__arm_fpm_init(), -128); }
+
+// CHECK-LABEL: define dso_local noundef range(i64 0, 4294967296) i64 @test_nscale_2(
+// CHECK-SAME: ) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    ret i64 2130706432
+//
+fpm_t test_nscale_2() { return __arm_set_fpm_nscale(__arm_fpm_init(), 127); }
+
+// CHECK-LABEL: define dso_local noundef range(i64 0, 4294967296) i64 @test_nscale_3(
+// CHECK-SAME: ) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    ret i64 4278190080
+//
+fpm_t test_nscale_3() { return __arm_set_fpm_nscale(__arm_fpm_init(), -1); }
+
+#ifdef __cplusplus
+}
+#endif
diff --git a/clang/utils/TableGen/NeonEmitter.cpp b/clang/utils/TableGen/NeonEmitter.cpp
index 8ec8e67388bbd2..58b36a14ef9b8e 100644
--- a/clang/utils/TableGen/NeonEmitter.cpp
+++ b/clang/utils/TableGen/NeonEmitter.cpp
@@ -2582,6 +2582,60 @@ void NeonEmitter::runVectorTypes(raw_ostream &OS) {
   OS << "typedef double float64_t;\n";
   OS << "#endif\n\n";
 
+  OS << R"(
+typedef uint64_t fpm_t;
+
+enum __ARM_FPM_FORMAT { __ARM_FPM_E5M2, __ARM_FPM_E4M3 };
+
+enum __ARM_FPM_OVERFLOW { __ARM_FPM_INFNAN, __ARM_FPM_SATURATE };
+
+static __inline__ fpm_t __attribute__((__always_inline__, __nodebug__))
+__arm_fpm_init(void) {
+  return 0;
+}
+
+static __inline__ fpm_t __attribute__((__always_inline__, __nodebug__))
+__arm_set_fpm_src1_format(fpm_t __fpm, enum __ARM_FPM_FORMAT __format) {
+  return (__fpm & ~7ull) | (fpm_t)__format;
+}
+
+static __inline__ fpm_t __attribute__((__always_inline__, __nodebug__))
+__arm_set_fpm_src2_format(fpm_t __fpm, enum __ARM_FPM_FORMAT __format) {
+  return (__fpm & ~0x38ull) | ((fpm_t)__format << 3u);
+}
+
+static __inline__ fpm_t __attribute__((__always_inline__, __nodebug__))
+__arm_set_fpm_dst_format(fpm_t __fpm, enum __ARM_FPM_FORMAT __format) {
+  return (__fpm & ~0x1c0ull) | ((fpm_t)__format << 6u);
+}
+
+static __inline__ fpm_t __attribute__((__always_inline__, __nodebug__))
+__arm_set_fpm_overflow_mul(fpm_t __fpm, enum __ARM_FPM_OVERFLOW __behaviour) {
+  return (__fpm & ~0x4000ull) | ((fpm_t)__behaviour << 14u);
+}
+
+static __inline__ fpm_t __attribute__((__always_inline__, __nodebug__))
+__arm_set_fpm_overflow_cvt(fpm_t __fpm, enum __ARM_FPM_OVERFLOW __behaviour) {
+  return (__fpm & ~0x8000ull) | ((fpm_t)__behaviour << 15u);
+}
+
+static __inline__ fpm_t __attribute__((__always_inline__, __nodebug__))
+__arm_set_fpm_lscale(fpm_t __fpm, uint64_t __scale) {
+  return (__fpm & ~0x7f0000ull) | (__scale << 16u);
+}
+
+static __inline__ fpm_t __attribute__((__always_inline__, __nodebug__))
+__arm_set_fpm_nscale(fpm_t __fpm, int64_t __scale) {
+  return (__fpm & ~0xff000000ull) | (((fpm_t)__scale & 0xffu) << 24u);
+}
+
+static __inline__ fpm_t __attribute__((__always_inline__, __nodebug__))
+__arm_set_fpm_lscale2(fpm_t __fpm, uint64_t __scale) {
+  return (uint32_t)__fpm | (__scale << 32u);
+}
+
+)";
+
   emitNeonTypeDefs("cQcsQsiQilQlUcQUcUsQUsUiQUiUlQUlhQhfQfdQd", OS);
 
   emitNeonTypeDefs("bQb", OS);

>From 01548e238ccd0f5a3ed1da318929ca73f9181102 Mon Sep 17 00:00:00 2001
From: Momchil Velikov <momchil.velikov at arm.com>
Date: Tue, 3 Sep 2024 17:41:43 +0100
Subject: [PATCH 2/2] [fixup] Update the test

---
 clang/test/CodeGen/aarch64-fpm-helpers.c | 63 +++++++++++++-----------
 1 file changed, 33 insertions(+), 30 deletions(-)

diff --git a/clang/test/CodeGen/aarch64-fpm-helpers.c b/clang/test/CodeGen/aarch64-fpm-helpers.c
index dba79cebae5478..4bced01d5c71fa 100644
--- a/clang/test/CodeGen/aarch64-fpm-helpers.c
+++ b/clang/test/CodeGen/aarch64-fpm-helpers.c
@@ -25,6 +25,9 @@
 extern "C" {
 #endif
 
+#define INIT_ZERO 0
+#define INIT_ONES 0xffffffffffffffffU
+
 // CHECK-LABEL: define dso_local noundef i64 @test_init(
 // CHECK-SAME: ) local_unnamed_addr #[[ATTR0:[0-9]+]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
@@ -32,94 +35,94 @@ extern "C" {
 //
 fpm_t test_init() { return __arm_fpm_init(); }
 
-// CHECK-LABEL: define dso_local noundef range(i64 0, 2) i64 @test_src1_1(
+// CHECK-LABEL: define dso_local noundef i64 @test_src1_1(
 // CHECK-SAME: ) local_unnamed_addr #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    ret i64 0
+// CHECK-NEXT:    ret i64 -8
 //
 fpm_t test_src1_1() {
-  return __arm_set_fpm_src1_format(__arm_fpm_init(), __ARM_FPM_E5M2);
+  return __arm_set_fpm_src1_format(INIT_ONES, __ARM_FPM_E5M2);
 }
 
-// CHECK-LABEL: define dso_local noundef range(i64 0, 2) i64 @test_src1_2(
+// CHECK-LABEL: define dso_local noundef i64 @test_src1_2(
 // CHECK-SAME: ) local_unnamed_addr #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    ret i64 1
 //
 fpm_t test_src1_2() {
-  return __arm_set_fpm_src1_format(__arm_fpm_init(), __ARM_FPM_E4M3);
+  return __arm_set_fpm_src1_format(INIT_ZERO, __ARM_FPM_E4M3);
 }
 
-// CHECK-LABEL: define dso_local noundef range(i64 0, 16) i64 @test_src2_1(
+// CHECK-LABEL: define dso_local noundef i64 @test_src2_1(
 // CHECK-SAME: ) local_unnamed_addr #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    ret i64 0
+// CHECK-NEXT:    ret i64 -57
 //
 fpm_t test_src2_1() {
-  return __arm_set_fpm_src2_format(__arm_fpm_init(), __ARM_FPM_E5M2);
+  return __arm_set_fpm_src2_format(INIT_ONES, __ARM_FPM_E5M2);
 }
 
-// CHECK-LABEL: define dso_local noundef range(i64 0, 16) i64 @test_src2_2(
+// CHECK-LABEL: define dso_local noundef i64 @test_src2_2(
 // CHECK-SAME: ) local_unnamed_addr #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    ret i64 8
 //
 fpm_t test_src2_2() {
-  return __arm_set_fpm_src2_format(__arm_fpm_init(), __ARM_FPM_E4M3);
+  return __arm_set_fpm_src2_format(INIT_ZERO, __ARM_FPM_E4M3);
 }
 
-// CHECK-LABEL: define dso_local noundef range(i64 0, 128) i64 @test_dst1_1(
+// CHECK-LABEL: define dso_local noundef i64 @test_dst1_1(
 // CHECK-SAME: ) local_unnamed_addr #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    ret i64 0
+// CHECK-NEXT:    ret i64 -449
 //
 fpm_t test_dst1_1() {
-  return __arm_set_fpm_dst_format(__arm_fpm_init(), __ARM_FPM_E5M2);
+  return __arm_set_fpm_dst_format(INIT_ONES, __ARM_FPM_E5M2);
 }
 
-// CHECK-LABEL: define dso_local noundef range(i64 0, 128) i64 @test_dst2_2(
+// CHECK-LABEL: define dso_local noundef i64 @test_dst2_2(
 // CHECK-SAME: ) local_unnamed_addr #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    ret i64 64
 //
 fpm_t test_dst2_2() {
-  return __arm_set_fpm_dst_format(__arm_fpm_init(), __ARM_FPM_E4M3);
+  return __arm_set_fpm_dst_format(INIT_ZERO, __ARM_FPM_E4M3);
 }
 
-// CHECK-LABEL: define dso_local noundef range(i64 0, 32768) i64 @test_of_mul_1(
+// CHECK-LABEL: define dso_local noundef i64 @test_of_mul_1(
 // CHECK-SAME: ) local_unnamed_addr #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    ret i64 0
+// CHECK-NEXT:    ret i64 -16385
 //
 fpm_t test_of_mul_1() {
-  return __arm_set_fpm_overflow_mul(__arm_fpm_init(), __ARM_FPM_INFNAN);
+  return __arm_set_fpm_overflow_mul(INIT_ONES, __ARM_FPM_INFNAN);
 }
 
-// CHECK-LABEL: define dso_local noundef range(i64 0, 32768) i64 @test_of_mul_2(
+// CHECK-LABEL: define dso_local noundef i64 @test_of_mul_2(
 // CHECK-SAME: ) local_unnamed_addr #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    ret i64 16384
 //
 fpm_t test_of_mul_2() {
-  return __arm_set_fpm_overflow_mul(__arm_fpm_init(), __ARM_FPM_SATURATE);
+  return __arm_set_fpm_overflow_mul(INIT_ZERO, __ARM_FPM_SATURATE);
 }
 
-// CHECK-LABEL: define dso_local noundef range(i64 0, 65536) i64 @test_of_cvt_1(
+// CHECK-LABEL: define dso_local noundef i64 @test_of_cvt_1(
 // CHECK-SAME: ) local_unnamed_addr #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    ret i64 0
+// CHECK-NEXT:    ret i64 -32769
 //
 fpm_t test_of_cvt_1() {
-  return __arm_set_fpm_overflow_cvt(__arm_fpm_init(), __ARM_FPM_INFNAN);
+  return __arm_set_fpm_overflow_cvt(INIT_ONES, __ARM_FPM_INFNAN);
 }
 
-// CHECK-LABEL: define dso_local noundef range(i64 0, 65536) i64 @test_of_cvt_2(
+// CHECK-LABEL: define dso_local noundef i64 @test_of_cvt_2(
 // CHECK-SAME: ) local_unnamed_addr #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    ret i64 32768
 //
 fpm_t test_of_cvt_2() {
-  return __arm_set_fpm_overflow_cvt(__arm_fpm_init(), __ARM_FPM_SATURATE);
+  return __arm_set_fpm_overflow_cvt(INIT_ZERO, __ARM_FPM_SATURATE);
 }
 
 // CHECK-LABEL: define dso_local noundef i64 @test_lscale(
@@ -127,35 +130,35 @@ fpm_t test_of_cvt_2() {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    ret i64 8323072
 //
-fpm_t test_lscale() { return __arm_set_fpm_lscale(__arm_fpm_init(), 127); }
+fpm_t test_lscale() { return __arm_set_fpm_lscale(INIT_ZERO, 127); }
 
 // CHECK-LABEL: define dso_local noundef i64 @test_lscale2(
 // CHECK-SAME: ) local_unnamed_addr #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    ret i64 270582939648
 //
-fpm_t test_lscale2() { return __arm_set_fpm_lscale2(__arm_fpm_init(), 63); }
+fpm_t test_lscale2() { return __arm_set_fpm_lscale2(INIT_ZERO, 63); }
 
 // CHECK-LABEL: define dso_local noundef range(i64 0, 4294967296) i64 @test_nscale_1(
 // CHECK-SAME: ) local_unnamed_addr #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    ret i64 2147483648
 //
-fpm_t test_nscale_1() { return __arm_set_fpm_nscale(__arm_fpm_init(), -128); }
+fpm_t test_nscale_1() { return __arm_set_fpm_nscale(INIT_ZERO, -128); }
 
 // CHECK-LABEL: define dso_local noundef range(i64 0, 4294967296) i64 @test_nscale_2(
 // CHECK-SAME: ) local_unnamed_addr #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    ret i64 2130706432
 //
-fpm_t test_nscale_2() { return __arm_set_fpm_nscale(__arm_fpm_init(), 127); }
+fpm_t test_nscale_2() { return __arm_set_fpm_nscale(INIT_ZERO, 127); }
 
 // CHECK-LABEL: define dso_local noundef range(i64 0, 4294967296) i64 @test_nscale_3(
 // CHECK-SAME: ) local_unnamed_addr #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    ret i64 4278190080
 //
-fpm_t test_nscale_3() { return __arm_set_fpm_nscale(__arm_fpm_init(), -1); }
+fpm_t test_nscale_3() { return __arm_set_fpm_nscale(INIT_ZERO, -1); }
 
 #ifdef __cplusplus
 }



More information about the cfe-commits mailing list