[clang] 30b67c6 - [AArch64] Make ACLE intrinsics always available part1

Daniel Kiss via cfe-commits cfe-commits at lists.llvm.org
Fri Oct 14 08:23:22 PDT 2022


Author: Daniel Kiss
Date: 2022-10-14T17:23:11+02:00
New Revision: 30b67c677c6baf0d6ef6c3051cf270133c43e4d2

URL: https://github.com/llvm/llvm-project/commit/30b67c677c6baf0d6ef6c3051cf270133c43e4d2
DIFF: https://github.com/llvm/llvm-project/commit/30b67c677c6baf0d6ef6c3051cf270133c43e4d2.diff

LOG: [AArch64] Make ACLE intrinsics always available part1

A given arch feature might enabled by a pragma or a function attribute so in this cases would be nice to use intrinsics.
Today GCC offers the intrinsics without the march flag[1].
PR[2] for ACLE to clarify the intention and remove the need for -march flag for a given intrinsics.

This is going to be more useful when D127812 lands.

[1] https://godbolt.org/z/bxcMhav3z
[2] https://github.com/ARM-software/acle/pull/214

Reviewed By: dmgreen

Differential Revision: https://reviews.llvm.org/D133359

Added: 
    

Modified: 
    clang/include/clang/Basic/BuiltinsAArch64.def
    clang/lib/Headers/arm_acle.h
    clang/test/CodeGen/arm_acle.c
    clang/test/CodeGen/builtins-arm64.c

Removed: 
    


################################################################################
diff  --git a/clang/include/clang/Basic/BuiltinsAArch64.def b/clang/include/clang/Basic/BuiltinsAArch64.def
index 036df7435bfb..e6e375bc2b83 100644
--- a/clang/include/clang/Basic/BuiltinsAArch64.def
+++ b/clang/include/clang/Basic/BuiltinsAArch64.def
@@ -74,7 +74,7 @@ BUILTIN(__builtin_arm_dmb, "vUi", "nc")
 BUILTIN(__builtin_arm_dsb, "vUi", "nc")
 BUILTIN(__builtin_arm_isb, "vUi", "nc")
 
-BUILTIN(__builtin_arm_jcvt, "Zid", "nc")
+TARGET_BUILTIN(__builtin_arm_jcvt, "Zid", "nc", "v8.3a")
 
 // Prefetch
 BUILTIN(__builtin_arm_prefetch, "vvC*UiUiUiUi", "nc")
@@ -107,24 +107,24 @@ BUILTIN(__builtin_arm_tcancel, "vWUIi", "n")
 BUILTIN(__builtin_arm_ttest, "WUi", "nc")
 
 // Armv8.5-A FP rounding intrinsics
-BUILTIN(__builtin_arm_rint32zf, "ff", "")
-BUILTIN(__builtin_arm_rint32z, "dd", "")
-BUILTIN(__builtin_arm_rint64zf, "ff", "")
-BUILTIN(__builtin_arm_rint64z, "dd", "")
-BUILTIN(__builtin_arm_rint32xf, "ff", "")
-BUILTIN(__builtin_arm_rint32x, "dd", "")
-BUILTIN(__builtin_arm_rint64xf, "ff", "")
-BUILTIN(__builtin_arm_rint64x, "dd", "")
+TARGET_BUILTIN(__builtin_arm_rint32zf, "ff", "", "v8.5a")
+TARGET_BUILTIN(__builtin_arm_rint32z, "dd", "", "v8.5a")
+TARGET_BUILTIN(__builtin_arm_rint64zf, "ff", "", "v8.5a")
+TARGET_BUILTIN(__builtin_arm_rint64z, "dd", "", "v8.5a")
+TARGET_BUILTIN(__builtin_arm_rint32xf, "ff", "", "v8.5a")
+TARGET_BUILTIN(__builtin_arm_rint32x, "dd", "", "v8.5a")
+TARGET_BUILTIN(__builtin_arm_rint64xf, "ff", "", "v8.5a")
+TARGET_BUILTIN(__builtin_arm_rint64x, "dd", "", "v8.5a")
 
 // Armv8.5-A Random number generation intrinsics
-BUILTIN(__builtin_arm_rndr,   "iWUi*", "n")
-BUILTIN(__builtin_arm_rndrrs, "iWUi*", "n")
+TARGET_BUILTIN(__builtin_arm_rndr,   "iWUi*", "n", "rand")
+TARGET_BUILTIN(__builtin_arm_rndrrs, "iWUi*", "n", "rand")
 
 // Armv8.7-A load/store 64-byte intrinsics
-BUILTIN(__builtin_arm_ld64b, "vvC*WUi*", "n")
-BUILTIN(__builtin_arm_st64b, "vv*WUiC*", "n")
-BUILTIN(__builtin_arm_st64bv, "WUiv*WUiC*", "n")
-BUILTIN(__builtin_arm_st64bv0, "WUiv*WUiC*", "n")
+TARGET_BUILTIN(__builtin_arm_ld64b, "vvC*WUi*", "n", "ls64")
+TARGET_BUILTIN(__builtin_arm_st64b, "vv*WUiC*", "n", "ls64")
+TARGET_BUILTIN(__builtin_arm_st64bv, "WUiv*WUiC*", "n", "ls64")
+TARGET_BUILTIN(__builtin_arm_st64bv0, "WUiv*WUiC*", "n", "ls64")
 
 TARGET_HEADER_BUILTIN(_BitScanForward, "UcUNi*UNi", "nh", "intrin.h", ALL_MS_LANGUAGES, "")
 TARGET_HEADER_BUILTIN(_BitScanReverse, "UcUNi*UNi", "nh", "intrin.h", ALL_MS_LANGUAGES, "")

diff  --git a/clang/lib/Headers/arm_acle.h b/clang/lib/Headers/arm_acle.h
index ed3fc1de1fd4..d73b6bf82d69 100644
--- a/clang/lib/Headers/arm_acle.h
+++ b/clang/lib/Headers/arm_acle.h
@@ -589,122 +589,123 @@ __smusdx(int16x2_t __a, int16x2_t __b) {
 #endif
 
 /* 9.7 CRC32 intrinsics */
-#if defined(__ARM_FEATURE_CRC32) && __ARM_FEATURE_CRC32
-static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
+#if (defined(__ARM_FEATURE_CRC32) && __ARM_FEATURE_CRC32) ||                   \
+    (defined(__ARM_64BIT_STATE) && __ARM_64BIT_STATE)
+static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__, target("crc")))
 __crc32b(uint32_t __a, uint8_t __b) {
   return __builtin_arm_crc32b(__a, __b);
 }
 
-static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
+static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__, target("crc")))
 __crc32h(uint32_t __a, uint16_t __b) {
   return __builtin_arm_crc32h(__a, __b);
 }
 
-static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
+static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__, target("crc")))
 __crc32w(uint32_t __a, uint32_t __b) {
   return __builtin_arm_crc32w(__a, __b);
 }
 
-static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
+static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__, target("crc")))
 __crc32d(uint32_t __a, uint64_t __b) {
   return __builtin_arm_crc32d(__a, __b);
 }
 
-static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
+static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__, target("crc")))
 __crc32cb(uint32_t __a, uint8_t __b) {
   return __builtin_arm_crc32cb(__a, __b);
 }
 
-static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
+static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__, target("crc")))
 __crc32ch(uint32_t __a, uint16_t __b) {
   return __builtin_arm_crc32ch(__a, __b);
 }
 
-static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
+static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__, target("crc")))
 __crc32cw(uint32_t __a, uint32_t __b) {
   return __builtin_arm_crc32cw(__a, __b);
 }
 
-static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
+static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__, target("crc")))
 __crc32cd(uint32_t __a, uint64_t __b) {
   return __builtin_arm_crc32cd(__a, __b);
 }
 #endif
 
 /* Armv8.3-A Javascript conversion intrinsic */
-#if defined(__ARM_64BIT_STATE) && __ARM_64BIT_STATE && defined(__ARM_FEATURE_JCVT)
-static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))
+#if defined(__ARM_64BIT_STATE) && __ARM_64BIT_STATE
+static __inline__ int32_t __attribute__((__always_inline__, __nodebug__, target("v8.3a")))
 __jcvt(double __a) {
   return __builtin_arm_jcvt(__a);
 }
 #endif
 
 /* Armv8.5-A FP rounding intrinsics */
-#if defined(__ARM_64BIT_STATE) && __ARM_64BIT_STATE && defined(__ARM_FEATURE_FRINT)
-static __inline__ float __attribute__((__always_inline__, __nodebug__))
+#if defined(__ARM_64BIT_STATE) && __ARM_64BIT_STATE
+static __inline__ float __attribute__((__always_inline__, __nodebug__, target("v8.5a")))
 __rint32zf(float __a) {
   return __builtin_arm_rint32zf(__a);
 }
 
-static __inline__ double __attribute__((__always_inline__, __nodebug__))
+static __inline__ double __attribute__((__always_inline__, __nodebug__, target("v8.5a")))
 __rint32z(double __a) {
   return __builtin_arm_rint32z(__a);
 }
 
-static __inline__ float __attribute__((__always_inline__, __nodebug__))
+static __inline__ float __attribute__((__always_inline__, __nodebug__, target("v8.5a")))
 __rint64zf(float __a) {
   return __builtin_arm_rint64zf(__a);
 }
 
-static __inline__ double __attribute__((__always_inline__, __nodebug__))
+static __inline__ double __attribute__((__always_inline__, __nodebug__, target("v8.5a")))
 __rint64z(double __a) {
   return __builtin_arm_rint64z(__a);
 }
 
-static __inline__ float __attribute__((__always_inline__, __nodebug__))
+static __inline__ float __attribute__((__always_inline__, __nodebug__, target("v8.5a")))
 __rint32xf(float __a) {
   return __builtin_arm_rint32xf(__a);
 }
 
-static __inline__ double __attribute__((__always_inline__, __nodebug__))
+static __inline__ double __attribute__((__always_inline__, __nodebug__, target("v8.5a")))
 __rint32x(double __a) {
   return __builtin_arm_rint32x(__a);
 }
 
-static __inline__ float __attribute__((__always_inline__, __nodebug__))
+static __inline__ float __attribute__((__always_inline__, __nodebug__, target("v8.5a")))
 __rint64xf(float __a) {
   return __builtin_arm_rint64xf(__a);
 }
 
-static __inline__ double __attribute__((__always_inline__, __nodebug__))
+static __inline__ double __attribute__((__always_inline__, __nodebug__, target("v8.5a")))
 __rint64x(double __a) {
   return __builtin_arm_rint64x(__a);
 }
 #endif
 
 /* Armv8.7-A load/store 64-byte intrinsics */
-#if defined(__ARM_64BIT_STATE) && __ARM_64BIT_STATE && defined(__ARM_FEATURE_LS64)
+#if defined(__ARM_64BIT_STATE) && __ARM_64BIT_STATE
 typedef struct {
     uint64_t val[8];
 } data512_t;
 
-static __inline__ data512_t __attribute__((__always_inline__, __nodebug__))
+static __inline__ data512_t __attribute__((__always_inline__, __nodebug__, target("ls64")))
 __arm_ld64b(const void *__addr) {
-    data512_t __value;
-    __builtin_arm_ld64b(__addr, __value.val);
-    return __value;
+  data512_t __value;
+  __builtin_arm_ld64b(__addr, __value.val);
+  return __value;
 }
-static __inline__ void __attribute__((__always_inline__, __nodebug__))
+static __inline__ void __attribute__((__always_inline__, __nodebug__, target("ls64")))
 __arm_st64b(void *__addr, data512_t __value) {
-    __builtin_arm_st64b(__addr, __value.val);
+  __builtin_arm_st64b(__addr, __value.val);
 }
-static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
+static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__, target("ls64")))
 __arm_st64bv(void *__addr, data512_t __value) {
-    return __builtin_arm_st64bv(__addr, __value.val);
+  return __builtin_arm_st64bv(__addr, __value.val);
 }
-static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
+static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__, target("ls64")))
 __arm_st64bv0(void *__addr, data512_t __value) {
-    return __builtin_arm_st64bv0(__addr, __value.val);
+  return __builtin_arm_st64bv0(__addr, __value.val);
 }
 #endif
 
@@ -759,12 +760,12 @@ __arm_st64bv0(void *__addr, data512_t __value) {
 #endif /* __ARM_FEATURE_TME */
 
 /* Armv8.5-A Random number generation intrinsics */
-#if defined(__ARM_64BIT_STATE) && __ARM_64BIT_STATE && defined(__ARM_FEATURE_RNG)
-static __inline__ int __attribute__((__always_inline__, __nodebug__))
+#if defined(__ARM_64BIT_STATE) && __ARM_64BIT_STATE
+static __inline__ int __attribute__((__always_inline__, __nodebug__, target("rand")))
 __rndr(uint64_t *__p) {
   return __builtin_arm_rndr(__p);
 }
-static __inline__ int __attribute__((__always_inline__, __nodebug__))
+static __inline__ int __attribute__((__always_inline__, __nodebug__, target("rand")))
 __rndrrs(uint64_t *__p) {
   return __builtin_arm_rndrrs(__p);
 }

diff  --git a/clang/test/CodeGen/arm_acle.c b/clang/test/CodeGen/arm_acle.c
index 60c4d9a5855b..6003dd2c7ba2 100644
--- a/clang/test/CodeGen/arm_acle.c
+++ b/clang/test/CodeGen/arm_acle.c
@@ -124,7 +124,7 @@ void test_sevl(void) {
   __sevl();
 }
 
-#if __ARM_32BIT_STATE
+#ifdef __ARM_32BIT_STATE
 // AArch32-LABEL: @test_dbg(
 // AArch32-NEXT:  entry:
 // AArch32-NEXT:    call void @llvm.arm.dbg(i32 0)
@@ -1646,7 +1646,7 @@ void test_wsrf64(double v) {
 #endif
 }
 
-#ifdef __ARM_64BIT_STATE
+#if defined(__ARM_64BIT_STATE) && defined(__ARM_FEATURE_JCVT)
 // AArch6483-LABEL: @test_jcvt(
 // AArch6483-NEXT:  entry:
 // AArch6483-NEXT:    [[TMP0:%.*]] = call i32 @llvm.aarch64.fjcvtzs(double [[V:%.*]])
@@ -1658,7 +1658,7 @@ int32_t test_jcvt(double v) {
 #endif
 
 
-#if __ARM_64BIT_STATE && defined(__ARM_FEATURE_RNG)
+#if defined(__ARM_64BIT_STATE) && defined(__ARM_FEATURE_RNG)
 
 // AArch6485-LABEL: @test_rndr(
 // AArch6485-NEXT:  entry:

diff  --git a/clang/test/CodeGen/builtins-arm64.c b/clang/test/CodeGen/builtins-arm64.c
index daa0b08a9e57..4619b6ba610f 100644
--- a/clang/test/CodeGen/builtins-arm64.c
+++ b/clang/test/CodeGen/builtins-arm64.c
@@ -59,6 +59,7 @@ void prefetch(void) {
   // CHECK: call {{.*}} @llvm.prefetch.p0(ptr null, i32 0, i32 3, i32 0)
 }
 
+__attribute__((target("v8.5a")))
 int32_t jcvt(double v) {
   //CHECK-LABEL: @jcvt(
   //CHECK: call i32 @llvm.aarch64.fjcvtzs
@@ -133,6 +134,7 @@ unsigned int clsll(uint64_t v) {
 // CHECK-NEXT:    [[TMP3:%.*]] = zext i1 [[TMP2]] to i32
 // CHECK-NEXT:    ret i32 [[TMP3]]
 //
+__attribute__((target("rand")))
 int rndr(uint64_t *__addr) {
   return __builtin_arm_rndr(__addr);
 }
@@ -146,6 +148,7 @@ int rndr(uint64_t *__addr) {
 // CHECK-NEXT:    [[TMP3:%.*]] = zext i1 [[TMP2]] to i32
 // CHECK-NEXT:    ret i32 [[TMP3]]
 //
+__attribute__((target("rand")))
 int rndrrs(uint64_t *__addr) {
   return __builtin_arm_rndrrs(__addr);
 }


        


More information about the cfe-commits mailing list