[clang] 30b67c6 - [AArch64] Make ACLE intrinsics always available part1
Daniel Kiss via cfe-commits
cfe-commits at lists.llvm.org
Fri Oct 14 08:23:22 PDT 2022
Author: Daniel Kiss
Date: 2022-10-14T17:23:11+02:00
New Revision: 30b67c677c6baf0d6ef6c3051cf270133c43e4d2
URL: https://github.com/llvm/llvm-project/commit/30b67c677c6baf0d6ef6c3051cf270133c43e4d2
DIFF: https://github.com/llvm/llvm-project/commit/30b67c677c6baf0d6ef6c3051cf270133c43e4d2.diff
LOG: [AArch64] Make ACLE intrinsics always available part1
A given arch feature might enabled by a pragma or a function attribute so in this cases would be nice to use intrinsics.
Today GCC offers the intrinsics without the march flag[1].
PR[2] for ACLE to clarify the intention and remove the need for -march flag for a given intrinsics.
This is going to be more useful when D127812 lands.
[1] https://godbolt.org/z/bxcMhav3z
[2] https://github.com/ARM-software/acle/pull/214
Reviewed By: dmgreen
Differential Revision: https://reviews.llvm.org/D133359
Added:
Modified:
clang/include/clang/Basic/BuiltinsAArch64.def
clang/lib/Headers/arm_acle.h
clang/test/CodeGen/arm_acle.c
clang/test/CodeGen/builtins-arm64.c
Removed:
################################################################################
diff --git a/clang/include/clang/Basic/BuiltinsAArch64.def b/clang/include/clang/Basic/BuiltinsAArch64.def
index 036df7435bfb..e6e375bc2b83 100644
--- a/clang/include/clang/Basic/BuiltinsAArch64.def
+++ b/clang/include/clang/Basic/BuiltinsAArch64.def
@@ -74,7 +74,7 @@ BUILTIN(__builtin_arm_dmb, "vUi", "nc")
BUILTIN(__builtin_arm_dsb, "vUi", "nc")
BUILTIN(__builtin_arm_isb, "vUi", "nc")
-BUILTIN(__builtin_arm_jcvt, "Zid", "nc")
+TARGET_BUILTIN(__builtin_arm_jcvt, "Zid", "nc", "v8.3a")
// Prefetch
BUILTIN(__builtin_arm_prefetch, "vvC*UiUiUiUi", "nc")
@@ -107,24 +107,24 @@ BUILTIN(__builtin_arm_tcancel, "vWUIi", "n")
BUILTIN(__builtin_arm_ttest, "WUi", "nc")
// Armv8.5-A FP rounding intrinsics
-BUILTIN(__builtin_arm_rint32zf, "ff", "")
-BUILTIN(__builtin_arm_rint32z, "dd", "")
-BUILTIN(__builtin_arm_rint64zf, "ff", "")
-BUILTIN(__builtin_arm_rint64z, "dd", "")
-BUILTIN(__builtin_arm_rint32xf, "ff", "")
-BUILTIN(__builtin_arm_rint32x, "dd", "")
-BUILTIN(__builtin_arm_rint64xf, "ff", "")
-BUILTIN(__builtin_arm_rint64x, "dd", "")
+TARGET_BUILTIN(__builtin_arm_rint32zf, "ff", "", "v8.5a")
+TARGET_BUILTIN(__builtin_arm_rint32z, "dd", "", "v8.5a")
+TARGET_BUILTIN(__builtin_arm_rint64zf, "ff", "", "v8.5a")
+TARGET_BUILTIN(__builtin_arm_rint64z, "dd", "", "v8.5a")
+TARGET_BUILTIN(__builtin_arm_rint32xf, "ff", "", "v8.5a")
+TARGET_BUILTIN(__builtin_arm_rint32x, "dd", "", "v8.5a")
+TARGET_BUILTIN(__builtin_arm_rint64xf, "ff", "", "v8.5a")
+TARGET_BUILTIN(__builtin_arm_rint64x, "dd", "", "v8.5a")
// Armv8.5-A Random number generation intrinsics
-BUILTIN(__builtin_arm_rndr, "iWUi*", "n")
-BUILTIN(__builtin_arm_rndrrs, "iWUi*", "n")
+TARGET_BUILTIN(__builtin_arm_rndr, "iWUi*", "n", "rand")
+TARGET_BUILTIN(__builtin_arm_rndrrs, "iWUi*", "n", "rand")
// Armv8.7-A load/store 64-byte intrinsics
-BUILTIN(__builtin_arm_ld64b, "vvC*WUi*", "n")
-BUILTIN(__builtin_arm_st64b, "vv*WUiC*", "n")
-BUILTIN(__builtin_arm_st64bv, "WUiv*WUiC*", "n")
-BUILTIN(__builtin_arm_st64bv0, "WUiv*WUiC*", "n")
+TARGET_BUILTIN(__builtin_arm_ld64b, "vvC*WUi*", "n", "ls64")
+TARGET_BUILTIN(__builtin_arm_st64b, "vv*WUiC*", "n", "ls64")
+TARGET_BUILTIN(__builtin_arm_st64bv, "WUiv*WUiC*", "n", "ls64")
+TARGET_BUILTIN(__builtin_arm_st64bv0, "WUiv*WUiC*", "n", "ls64")
TARGET_HEADER_BUILTIN(_BitScanForward, "UcUNi*UNi", "nh", "intrin.h", ALL_MS_LANGUAGES, "")
TARGET_HEADER_BUILTIN(_BitScanReverse, "UcUNi*UNi", "nh", "intrin.h", ALL_MS_LANGUAGES, "")
diff --git a/clang/lib/Headers/arm_acle.h b/clang/lib/Headers/arm_acle.h
index ed3fc1de1fd4..d73b6bf82d69 100644
--- a/clang/lib/Headers/arm_acle.h
+++ b/clang/lib/Headers/arm_acle.h
@@ -589,122 +589,123 @@ __smusdx(int16x2_t __a, int16x2_t __b) {
#endif
/* 9.7 CRC32 intrinsics */
-#if defined(__ARM_FEATURE_CRC32) && __ARM_FEATURE_CRC32
-static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
+#if (defined(__ARM_FEATURE_CRC32) && __ARM_FEATURE_CRC32) || \
+ (defined(__ARM_64BIT_STATE) && __ARM_64BIT_STATE)
+static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__, target("crc")))
__crc32b(uint32_t __a, uint8_t __b) {
return __builtin_arm_crc32b(__a, __b);
}
-static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
+static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__, target("crc")))
__crc32h(uint32_t __a, uint16_t __b) {
return __builtin_arm_crc32h(__a, __b);
}
-static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
+static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__, target("crc")))
__crc32w(uint32_t __a, uint32_t __b) {
return __builtin_arm_crc32w(__a, __b);
}
-static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
+static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__, target("crc")))
__crc32d(uint32_t __a, uint64_t __b) {
return __builtin_arm_crc32d(__a, __b);
}
-static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
+static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__, target("crc")))
__crc32cb(uint32_t __a, uint8_t __b) {
return __builtin_arm_crc32cb(__a, __b);
}
-static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
+static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__, target("crc")))
__crc32ch(uint32_t __a, uint16_t __b) {
return __builtin_arm_crc32ch(__a, __b);
}
-static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
+static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__, target("crc")))
__crc32cw(uint32_t __a, uint32_t __b) {
return __builtin_arm_crc32cw(__a, __b);
}
-static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
+static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__, target("crc")))
__crc32cd(uint32_t __a, uint64_t __b) {
return __builtin_arm_crc32cd(__a, __b);
}
#endif
/* Armv8.3-A Javascript conversion intrinsic */
-#if defined(__ARM_64BIT_STATE) && __ARM_64BIT_STATE && defined(__ARM_FEATURE_JCVT)
-static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))
+#if defined(__ARM_64BIT_STATE) && __ARM_64BIT_STATE
+static __inline__ int32_t __attribute__((__always_inline__, __nodebug__, target("v8.3a")))
__jcvt(double __a) {
return __builtin_arm_jcvt(__a);
}
#endif
/* Armv8.5-A FP rounding intrinsics */
-#if defined(__ARM_64BIT_STATE) && __ARM_64BIT_STATE && defined(__ARM_FEATURE_FRINT)
-static __inline__ float __attribute__((__always_inline__, __nodebug__))
+#if defined(__ARM_64BIT_STATE) && __ARM_64BIT_STATE
+static __inline__ float __attribute__((__always_inline__, __nodebug__, target("v8.5a")))
__rint32zf(float __a) {
return __builtin_arm_rint32zf(__a);
}
-static __inline__ double __attribute__((__always_inline__, __nodebug__))
+static __inline__ double __attribute__((__always_inline__, __nodebug__, target("v8.5a")))
__rint32z(double __a) {
return __builtin_arm_rint32z(__a);
}
-static __inline__ float __attribute__((__always_inline__, __nodebug__))
+static __inline__ float __attribute__((__always_inline__, __nodebug__, target("v8.5a")))
__rint64zf(float __a) {
return __builtin_arm_rint64zf(__a);
}
-static __inline__ double __attribute__((__always_inline__, __nodebug__))
+static __inline__ double __attribute__((__always_inline__, __nodebug__, target("v8.5a")))
__rint64z(double __a) {
return __builtin_arm_rint64z(__a);
}
-static __inline__ float __attribute__((__always_inline__, __nodebug__))
+static __inline__ float __attribute__((__always_inline__, __nodebug__, target("v8.5a")))
__rint32xf(float __a) {
return __builtin_arm_rint32xf(__a);
}
-static __inline__ double __attribute__((__always_inline__, __nodebug__))
+static __inline__ double __attribute__((__always_inline__, __nodebug__, target("v8.5a")))
__rint32x(double __a) {
return __builtin_arm_rint32x(__a);
}
-static __inline__ float __attribute__((__always_inline__, __nodebug__))
+static __inline__ float __attribute__((__always_inline__, __nodebug__, target("v8.5a")))
__rint64xf(float __a) {
return __builtin_arm_rint64xf(__a);
}
-static __inline__ double __attribute__((__always_inline__, __nodebug__))
+static __inline__ double __attribute__((__always_inline__, __nodebug__, target("v8.5a")))
__rint64x(double __a) {
return __builtin_arm_rint64x(__a);
}
#endif
/* Armv8.7-A load/store 64-byte intrinsics */
-#if defined(__ARM_64BIT_STATE) && __ARM_64BIT_STATE && defined(__ARM_FEATURE_LS64)
+#if defined(__ARM_64BIT_STATE) && __ARM_64BIT_STATE
typedef struct {
uint64_t val[8];
} data512_t;
-static __inline__ data512_t __attribute__((__always_inline__, __nodebug__))
+static __inline__ data512_t __attribute__((__always_inline__, __nodebug__, target("ls64")))
__arm_ld64b(const void *__addr) {
- data512_t __value;
- __builtin_arm_ld64b(__addr, __value.val);
- return __value;
+ data512_t __value;
+ __builtin_arm_ld64b(__addr, __value.val);
+ return __value;
}
-static __inline__ void __attribute__((__always_inline__, __nodebug__))
+static __inline__ void __attribute__((__always_inline__, __nodebug__, target("ls64")))
__arm_st64b(void *__addr, data512_t __value) {
- __builtin_arm_st64b(__addr, __value.val);
+ __builtin_arm_st64b(__addr, __value.val);
}
-static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
+static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__, target("ls64")))
__arm_st64bv(void *__addr, data512_t __value) {
- return __builtin_arm_st64bv(__addr, __value.val);
+ return __builtin_arm_st64bv(__addr, __value.val);
}
-static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
+static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__, target("ls64")))
__arm_st64bv0(void *__addr, data512_t __value) {
- return __builtin_arm_st64bv0(__addr, __value.val);
+ return __builtin_arm_st64bv0(__addr, __value.val);
}
#endif
@@ -759,12 +760,12 @@ __arm_st64bv0(void *__addr, data512_t __value) {
#endif /* __ARM_FEATURE_TME */
/* Armv8.5-A Random number generation intrinsics */
-#if defined(__ARM_64BIT_STATE) && __ARM_64BIT_STATE && defined(__ARM_FEATURE_RNG)
-static __inline__ int __attribute__((__always_inline__, __nodebug__))
+#if defined(__ARM_64BIT_STATE) && __ARM_64BIT_STATE
+static __inline__ int __attribute__((__always_inline__, __nodebug__, target("rand")))
__rndr(uint64_t *__p) {
return __builtin_arm_rndr(__p);
}
-static __inline__ int __attribute__((__always_inline__, __nodebug__))
+static __inline__ int __attribute__((__always_inline__, __nodebug__, target("rand")))
__rndrrs(uint64_t *__p) {
return __builtin_arm_rndrrs(__p);
}
diff --git a/clang/test/CodeGen/arm_acle.c b/clang/test/CodeGen/arm_acle.c
index 60c4d9a5855b..6003dd2c7ba2 100644
--- a/clang/test/CodeGen/arm_acle.c
+++ b/clang/test/CodeGen/arm_acle.c
@@ -124,7 +124,7 @@ void test_sevl(void) {
__sevl();
}
-#if __ARM_32BIT_STATE
+#ifdef __ARM_32BIT_STATE
// AArch32-LABEL: @test_dbg(
// AArch32-NEXT: entry:
// AArch32-NEXT: call void @llvm.arm.dbg(i32 0)
@@ -1646,7 +1646,7 @@ void test_wsrf64(double v) {
#endif
}
-#ifdef __ARM_64BIT_STATE
+#if defined(__ARM_64BIT_STATE) && defined(__ARM_FEATURE_JCVT)
// AArch6483-LABEL: @test_jcvt(
// AArch6483-NEXT: entry:
// AArch6483-NEXT: [[TMP0:%.*]] = call i32 @llvm.aarch64.fjcvtzs(double [[V:%.*]])
@@ -1658,7 +1658,7 @@ int32_t test_jcvt(double v) {
#endif
-#if __ARM_64BIT_STATE && defined(__ARM_FEATURE_RNG)
+#if defined(__ARM_64BIT_STATE) && defined(__ARM_FEATURE_RNG)
// AArch6485-LABEL: @test_rndr(
// AArch6485-NEXT: entry:
diff --git a/clang/test/CodeGen/builtins-arm64.c b/clang/test/CodeGen/builtins-arm64.c
index daa0b08a9e57..4619b6ba610f 100644
--- a/clang/test/CodeGen/builtins-arm64.c
+++ b/clang/test/CodeGen/builtins-arm64.c
@@ -59,6 +59,7 @@ void prefetch(void) {
// CHECK: call {{.*}} @llvm.prefetch.p0(ptr null, i32 0, i32 3, i32 0)
}
+__attribute__((target("v8.5a")))
int32_t jcvt(double v) {
//CHECK-LABEL: @jcvt(
//CHECK: call i32 @llvm.aarch64.fjcvtzs
@@ -133,6 +134,7 @@ unsigned int clsll(uint64_t v) {
// CHECK-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32
// CHECK-NEXT: ret i32 [[TMP3]]
//
+__attribute__((target("rand")))
int rndr(uint64_t *__addr) {
return __builtin_arm_rndr(__addr);
}
@@ -146,6 +148,7 @@ int rndr(uint64_t *__addr) {
// CHECK-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32
// CHECK-NEXT: ret i32 [[TMP3]]
//
+__attribute__((target("rand")))
int rndrrs(uint64_t *__addr) {
return __builtin_arm_rndrrs(__addr);
}
More information about the cfe-commits
mailing list