[clang] [Clang][ARM] Make CRC and DSP intrinsics always available. (PR #107417)
via cfe-commits
cfe-commits at lists.llvm.org
Thu Sep 5 08:55:53 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-clang
Author: Daniel Kiss (DanielKristofKiss)
<details>
<summary>Changes</summary>
Both feature has target feature so can be checked if the usage is valid.
---
Full diff: https://github.com/llvm/llvm-project/pull/107417.diff
2 Files Affected:
- (modified) clang/lib/Headers/arm_acle.h (+18-21)
- (modified) clang/test/CodeGen/arm_acle.c (+73-3)
``````````diff
diff --git a/clang/lib/Headers/arm_acle.h b/clang/lib/Headers/arm_acle.h
index 1518b0c4c8428f..b1dc90f84ad36f 100644
--- a/clang/lib/Headers/arm_acle.h
+++ b/clang/lib/Headers/arm_acle.h
@@ -264,28 +264,28 @@ __rbitl(unsigned long __t) {
}
/* 8.3 16-bit multiplications */
-#if defined(__ARM_FEATURE_DSP) && __ARM_FEATURE_DSP
-static __inline__ int32_t __attribute__((__always_inline__,__nodebug__))
+#if defined(__ARM_32BIT_STATE) && __ARM_32BIT_STATE
+static __inline__ int32_t __attribute__((__always_inline__,__nodebug__, target("dsp")))
__smulbb(int32_t __a, int32_t __b) {
return __builtin_arm_smulbb(__a, __b);
}
-static __inline__ int32_t __attribute__((__always_inline__,__nodebug__))
+static __inline__ int32_t __attribute__((__always_inline__,__nodebug__, target("dsp")))
__smulbt(int32_t __a, int32_t __b) {
return __builtin_arm_smulbt(__a, __b);
}
-static __inline__ int32_t __attribute__((__always_inline__,__nodebug__))
+static __inline__ int32_t __attribute__((__always_inline__,__nodebug__, target("dsp")))
__smultb(int32_t __a, int32_t __b) {
return __builtin_arm_smultb(__a, __b);
}
-static __inline__ int32_t __attribute__((__always_inline__,__nodebug__))
+static __inline__ int32_t __attribute__((__always_inline__,__nodebug__, target("dsp")))
__smultt(int32_t __a, int32_t __b) {
return __builtin_arm_smultt(__a, __b);
}
-static __inline__ int32_t __attribute__((__always_inline__,__nodebug__))
+static __inline__ int32_t __attribute__((__always_inline__,__nodebug__, target("dsp")))
__smulwb(int32_t __a, int32_t __b) {
return __builtin_arm_smulwb(__a, __b);
}
-static __inline__ int32_t __attribute__((__always_inline__,__nodebug__))
+static __inline__ int32_t __attribute__((__always_inline__,__nodebug__, target("dsp")))
__smulwt(int32_t __a, int32_t __b) {
return __builtin_arm_smulwt(__a, __b);
}
@@ -304,46 +304,46 @@ __smulwt(int32_t __a, int32_t __b) {
#endif
/* 8.4.2 Saturating addition and subtraction intrinsics */
-#if defined(__ARM_FEATURE_DSP) && __ARM_FEATURE_DSP
-static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))
+#if defined(__ARM_32BIT_STATE) && __ARM_32BIT_STATE
+static __inline__ int32_t __attribute__((__always_inline__, __nodebug__, target("dsp")))
__qadd(int32_t __t, int32_t __v) {
return __builtin_arm_qadd(__t, __v);
}
-static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))
+static __inline__ int32_t __attribute__((__always_inline__, __nodebug__, target("dsp")))
__qsub(int32_t __t, int32_t __v) {
return __builtin_arm_qsub(__t, __v);
}
-static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))
+static __inline__ int32_t __attribute__((__always_inline__, __nodebug__, target("dsp")))
__qdbl(int32_t __t) {
return __builtin_arm_qadd(__t, __t);
}
#endif
/* 8.4.3 Accumulating multiplications */
-#if defined(__ARM_FEATURE_DSP) && __ARM_FEATURE_DSP
-static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))
+#if defined(__ARM_32BIT_STATE) && __ARM_32BIT_STATE
+static __inline__ int32_t __attribute__((__always_inline__, __nodebug__, target("dsp")))
__smlabb(int32_t __a, int32_t __b, int32_t __c) {
return __builtin_arm_smlabb(__a, __b, __c);
}
-static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))
+static __inline__ int32_t __attribute__((__always_inline__, __nodebug__, target("dsp")))
__smlabt(int32_t __a, int32_t __b, int32_t __c) {
return __builtin_arm_smlabt(__a, __b, __c);
}
-static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))
+static __inline__ int32_t __attribute__((__always_inline__, __nodebug__, target("dsp")))
__smlatb(int32_t __a, int32_t __b, int32_t __c) {
return __builtin_arm_smlatb(__a, __b, __c);
}
-static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))
+static __inline__ int32_t __attribute__((__always_inline__, __nodebug__, target("dsp")))
__smlatt(int32_t __a, int32_t __b, int32_t __c) {
return __builtin_arm_smlatt(__a, __b, __c);
}
-static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))
+static __inline__ int32_t __attribute__((__always_inline__, __nodebug__, target("dsp")))
__smlawb(int32_t __a, int32_t __b, int32_t __c) {
return __builtin_arm_smlawb(__a, __b, __c);
}
-static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))
+static __inline__ int32_t __attribute__((__always_inline__, __nodebug__, target("dsp")))
__smlawt(int32_t __a, int32_t __b, int32_t __c) {
return __builtin_arm_smlawt(__a, __b, __c);
}
@@ -621,8 +621,6 @@ __rintnf(float __a) {
#endif
/* 8.8 CRC32 intrinsics */
-#if (defined(__ARM_FEATURE_CRC32) && __ARM_FEATURE_CRC32) || \
- (defined(__ARM_64BIT_STATE) && __ARM_64BIT_STATE)
static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__, target("crc")))
__crc32b(uint32_t __a, uint8_t __b) {
return __builtin_arm_crc32b(__a, __b);
@@ -662,7 +660,6 @@ static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__, target
__crc32cd(uint32_t __a, uint64_t __b) {
return __builtin_arm_crc32cd(__a, __b);
}
-#endif
/* 8.6 Floating-point data-processing intrinsics */
/* Armv8.3-A Javascript conversion intrinsic */
diff --git a/clang/test/CodeGen/arm_acle.c b/clang/test/CodeGen/arm_acle.c
index 1c41f1b5d23f0c..74de8246d7de6e 100644
--- a/clang/test/CodeGen/arm_acle.c
+++ b/clang/test/CodeGen/arm_acle.c
@@ -1,4 +1,5 @@
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// RUN: %clang_cc1 -ffreestanding -triple armv8a-none-eabi -O0 -disable-O0-optnone -emit-llvm -o - %s | opt -S -passes=mem2reg | FileCheck %s -check-prefixes=ARM,AArch32
// RUN: %clang_cc1 -ffreestanding -triple armv8a-none-eabi -target-feature +crc -target-feature +dsp -O0 -disable-O0-optnone -emit-llvm -o - %s | opt -S -passes=mem2reg | FileCheck %s -check-prefixes=ARM,AArch32
// RUN: %clang_cc1 -ffreestanding -Wno-error=implicit-function-declaration -triple aarch64-none-elf -target-feature +neon -target-feature +crc -target-feature +crypto -O0 -disable-O0-optnone -emit-llvm -o - %s | opt -S -passes=mem2reg | FileCheck %s -check-prefixes=ARM,AArch64
// RUN: %clang_cc1 -ffreestanding -triple aarch64-none-elf -target-feature +v8.3a -target-feature +crc -O0 -disable-O0-optnone -emit-llvm -o - %s | opt -S -passes=mem2reg | FileCheck %s -check-prefixes=ARM,AArch64,AArch6483
@@ -638,12 +639,15 @@ uint32_t test_usat(int32_t t) {
#endif
/* 9.4.2 Saturating addition and subtraction intrinsics */
-#ifdef __ARM_FEATURE_DSP
+#ifdef __ARM_32BIT_STATE
// AArch32-LABEL: @test_qadd(
// AArch32-NEXT: entry:
// AArch32-NEXT: [[TMP0:%.*]] = call i32 @llvm.arm.qadd(i32 [[A:%.*]], i32 [[B:%.*]])
// AArch32-NEXT: ret i32 [[TMP0]]
//
+#ifndef __ARM_FEATURE_DSP
+__attribute__((target("dsp")))
+#endif
int32_t test_qadd(int32_t a, int32_t b) {
return __qadd(a, b);
}
@@ -653,6 +657,9 @@ int32_t test_qadd(int32_t a, int32_t b) {
// AArch32-NEXT: [[TMP0:%.*]] = call i32 @llvm.arm.qsub(i32 [[A:%.*]], i32 [[B:%.*]])
// AArch32-NEXT: ret i32 [[TMP0]]
//
+#ifndef __ARM_FEATURE_DSP
+__attribute__((target("dsp")))
+#endif
int32_t test_qsub(int32_t a, int32_t b) {
return __qsub(a, b);
}
@@ -664,6 +671,9 @@ extern int32_t f();
// AArch32-NEXT: [[TMP0:%.*]] = call i32 @llvm.arm.qadd(i32 [[CALL]], i32 [[CALL]])
// AArch32-NEXT: ret i32 [[TMP0]]
//
+#ifndef __ARM_FEATURE_DSP
+__attribute__((target("dsp")))
+#endif
int32_t test_qdbl() {
return __qdbl(f());
}
@@ -672,12 +682,15 @@ int32_t test_qdbl() {
/*
* 9.3 16-bit multiplications
*/
-#if __ARM_FEATURE_DSP
+#ifdef __ARM_32BIT_STATE
// AArch32-LABEL: @test_smulbb(
// AArch32-NEXT: entry:
// AArch32-NEXT: [[TMP0:%.*]] = call i32 @llvm.arm.smulbb(i32 [[A:%.*]], i32 [[B:%.*]])
// AArch32-NEXT: ret i32 [[TMP0]]
//
+#ifndef __ARM_FEATURE_DSP
+__attribute__((target("dsp")))
+#endif
int32_t test_smulbb(int32_t a, int32_t b) {
return __smulbb(a, b);
}
@@ -687,6 +700,9 @@ int32_t test_smulbb(int32_t a, int32_t b) {
// AArch32-NEXT: [[TMP0:%.*]] = call i32 @llvm.arm.smulbt(i32 [[A:%.*]], i32 [[B:%.*]])
// AArch32-NEXT: ret i32 [[TMP0]]
//
+#ifndef __ARM_FEATURE_DSP
+__attribute__((target("dsp")))
+#endif
int32_t test_smulbt(int32_t a, int32_t b) {
return __smulbt(a, b);
}
@@ -696,6 +712,9 @@ int32_t test_smulbt(int32_t a, int32_t b) {
// AArch32-NEXT: [[TMP0:%.*]] = call i32 @llvm.arm.smultb(i32 [[A:%.*]], i32 [[B:%.*]])
// AArch32-NEXT: ret i32 [[TMP0]]
//
+#ifndef __ARM_FEATURE_DSP
+__attribute__((target("dsp")))
+#endif
int32_t test_smultb(int32_t a, int32_t b) {
return __smultb(a, b);
}
@@ -705,6 +724,9 @@ int32_t test_smultb(int32_t a, int32_t b) {
// AArch32-NEXT: [[TMP0:%.*]] = call i32 @llvm.arm.smultt(i32 [[A:%.*]], i32 [[B:%.*]])
// AArch32-NEXT: ret i32 [[TMP0]]
//
+#ifndef __ARM_FEATURE_DSP
+__attribute__((target("dsp")))
+#endif
int32_t test_smultt(int32_t a, int32_t b) {
return __smultt(a, b);
}
@@ -714,6 +736,9 @@ int32_t test_smultt(int32_t a, int32_t b) {
// AArch32-NEXT: [[TMP0:%.*]] = call i32 @llvm.arm.smulwb(i32 [[A:%.*]], i32 [[B:%.*]])
// AArch32-NEXT: ret i32 [[TMP0]]
//
+#ifndef __ARM_FEATURE_DSP
+__attribute__((target("dsp")))
+#endif
int32_t test_smulwb(int32_t a, int32_t b) {
return __smulwb(a, b);
}
@@ -723,18 +748,24 @@ int32_t test_smulwb(int32_t a, int32_t b) {
// AArch32-NEXT: [[TMP0:%.*]] = call i32 @llvm.arm.smulwt(i32 [[A:%.*]], i32 [[B:%.*]])
// AArch32-NEXT: ret i32 [[TMP0]]
//
+#ifndef __ARM_FEATURE_DSP
+__attribute__((target("dsp")))
+#endif
int32_t test_smulwt(int32_t a, int32_t b) {
return __smulwt(a, b);
}
#endif
/* 9.4.3 Accumultating multiplications */
-#if __ARM_FEATURE_DSP
+#ifdef __ARM_32BIT_STATE
// AArch32-LABEL: @test_smlabb(
// AArch32-NEXT: entry:
// AArch32-NEXT: [[TMP0:%.*]] = call i32 @llvm.arm.smlabb(i32 [[A:%.*]], i32 [[B:%.*]], i32 [[C:%.*]])
// AArch32-NEXT: ret i32 [[TMP0]]
//
+#ifndef __ARM_FEATURE_DSP
+__attribute__((target("dsp")))
+#endif
int32_t test_smlabb(int32_t a, int32_t b, int32_t c) {
return __smlabb(a, b, c);
}
@@ -744,6 +775,9 @@ int32_t test_smlabb(int32_t a, int32_t b, int32_t c) {
// AArch32-NEXT: [[TMP0:%.*]] = call i32 @llvm.arm.smlabt(i32 [[A:%.*]], i32 [[B:%.*]], i32 [[C:%.*]])
// AArch32-NEXT: ret i32 [[TMP0]]
//
+#ifndef __ARM_FEATURE_DSP
+__attribute__((target("dsp")))
+#endif
int32_t test_smlabt(int32_t a, int32_t b, int32_t c) {
return __smlabt(a, b, c);
}
@@ -753,6 +787,9 @@ int32_t test_smlabt(int32_t a, int32_t b, int32_t c) {
// AArch32-NEXT: [[TMP0:%.*]] = call i32 @llvm.arm.smlatb(i32 [[A:%.*]], i32 [[B:%.*]], i32 [[C:%.*]])
// AArch32-NEXT: ret i32 [[TMP0]]
//
+#ifndef __ARM_FEATURE_DSP
+__attribute__((target("dsp")))
+#endif
int32_t test_smlatb(int32_t a, int32_t b, int32_t c) {
return __smlatb(a, b, c);
}
@@ -762,6 +799,9 @@ int32_t test_smlatb(int32_t a, int32_t b, int32_t c) {
// AArch32-NEXT: [[TMP0:%.*]] = call i32 @llvm.arm.smlatt(i32 [[A:%.*]], i32 [[B:%.*]], i32 [[C:%.*]])
// AArch32-NEXT: ret i32 [[TMP0]]
//
+#ifndef __ARM_FEATURE_DSP
+__attribute__((target("dsp")))
+#endif
int32_t test_smlatt(int32_t a, int32_t b, int32_t c) {
return __smlatt(a, b, c);
}
@@ -771,6 +811,9 @@ int32_t test_smlatt(int32_t a, int32_t b, int32_t c) {
// AArch32-NEXT: [[TMP0:%.*]] = call i32 @llvm.arm.smlawb(i32 [[A:%.*]], i32 [[B:%.*]], i32 [[C:%.*]])
// AArch32-NEXT: ret i32 [[TMP0]]
//
+#ifndef __ARM_FEATURE_DSP
+__attribute__((target("dsp")))
+#endif
int32_t test_smlawb(int32_t a, int32_t b, int32_t c) {
return __smlawb(a, b, c);
}
@@ -780,6 +823,9 @@ int32_t test_smlawb(int32_t a, int32_t b, int32_t c) {
// AArch32-NEXT: [[TMP0:%.*]] = call i32 @llvm.arm.smlawt(i32 [[A:%.*]], i32 [[B:%.*]], i32 [[C:%.*]])
// AArch32-NEXT: ret i32 [[TMP0]]
//
+#ifndef __ARM_FEATURE_DSP
+__attribute__((target("dsp")))
+#endif
int32_t test_smlawt(int32_t a, int32_t b, int32_t c) {
return __smlawt(a, b, c);
}
@@ -1335,6 +1381,9 @@ int32_t test_smusdx(int16x2_t a, int16x2_t b) {
// AArch64-NEXT: [[TMP1:%.*]] = call i32 @llvm.aarch64.crc32b(i32 [[A:%.*]], i32 [[TMP0]])
// AArch64-NEXT: ret i32 [[TMP1]]
//
+#ifndef __ARM_FEATURE_CRC32
+__attribute__((target("crc")))
+#endif
uint32_t test_crc32b(uint32_t a, uint8_t b) {
return __crc32b(a, b);
}
@@ -1351,6 +1400,9 @@ uint32_t test_crc32b(uint32_t a, uint8_t b) {
// AArch64-NEXT: [[TMP1:%.*]] = call i32 @llvm.aarch64.crc32h(i32 [[A:%.*]], i32 [[TMP0]])
// AArch64-NEXT: ret i32 [[TMP1]]
//
+#ifndef __ARM_FEATURE_CRC32
+__attribute__((target("crc")))
+#endif
uint32_t test_crc32h(uint32_t a, uint16_t b) {
return __crc32h(a, b);
}
@@ -1365,6 +1417,9 @@ uint32_t test_crc32h(uint32_t a, uint16_t b) {
// AArch64-NEXT: [[TMP0:%.*]] = call i32 @llvm.aarch64.crc32w(i32 [[A:%.*]], i32 [[B:%.*]])
// AArch64-NEXT: ret i32 [[TMP0]]
//
+#ifndef __ARM_FEATURE_CRC32
+__attribute__((target("crc")))
+#endif
uint32_t test_crc32w(uint32_t a, uint32_t b) {
return __crc32w(a, b);
}
@@ -1383,6 +1438,9 @@ uint32_t test_crc32w(uint32_t a, uint32_t b) {
// AArch64-NEXT: [[TMP0:%.*]] = call i32 @llvm.aarch64.crc32x(i32 [[A:%.*]], i64 [[B:%.*]])
// AArch64-NEXT: ret i32 [[TMP0]]
//
+#ifndef __ARM_FEATURE_CRC32
+__attribute__((target("crc")))
+#endif
uint32_t test_crc32d(uint32_t a, uint64_t b) {
return __crc32d(a, b);
}
@@ -1399,6 +1457,9 @@ uint32_t test_crc32d(uint32_t a, uint64_t b) {
// AArch64-NEXT: [[TMP1:%.*]] = call i32 @llvm.aarch64.crc32cb(i32 [[A:%.*]], i32 [[TMP0]])
// AArch64-NEXT: ret i32 [[TMP1]]
//
+#ifndef __ARM_FEATURE_CRC32
+__attribute__((target("crc")))
+#endif
uint32_t test_crc32cb(uint32_t a, uint8_t b) {
return __crc32cb(a, b);
}
@@ -1415,6 +1476,9 @@ uint32_t test_crc32cb(uint32_t a, uint8_t b) {
// AArch64-NEXT: [[TMP1:%.*]] = call i32 @llvm.aarch64.crc32ch(i32 [[A:%.*]], i32 [[TMP0]])
// AArch64-NEXT: ret i32 [[TMP1]]
//
+#ifndef __ARM_FEATURE_CRC32
+__attribute__((target("crc")))
+#endif
uint32_t test_crc32ch(uint32_t a, uint16_t b) {
return __crc32ch(a, b);
}
@@ -1429,6 +1493,9 @@ uint32_t test_crc32ch(uint32_t a, uint16_t b) {
// AArch64-NEXT: [[TMP0:%.*]] = call i32 @llvm.aarch64.crc32cw(i32 [[A:%.*]], i32 [[B:%.*]])
// AArch64-NEXT: ret i32 [[TMP0]]
//
+#ifndef __ARM_FEATURE_CRC32
+__attribute__((target("crc")))
+#endif
uint32_t test_crc32cw(uint32_t a, uint32_t b) {
return __crc32cw(a, b);
}
@@ -1447,6 +1514,9 @@ uint32_t test_crc32cw(uint32_t a, uint32_t b) {
// AArch64-NEXT: [[TMP0:%.*]] = call i32 @llvm.aarch64.crc32cx(i32 [[A:%.*]], i64 [[B:%.*]])
// AArch64-NEXT: ret i32 [[TMP0]]
//
+#ifndef __ARM_FEATURE_CRC32
+__attribute__((target("crc")))
+#endif
uint32_t test_crc32cd(uint32_t a, uint64_t b) {
return __crc32cd(a, b);
}
``````````
</details>
https://github.com/llvm/llvm-project/pull/107417
More information about the cfe-commits
mailing list