[clang] 51e188d - [AArch64] Support for memset tagged intrinsic
via cfe-commits
cfe-commits at lists.llvm.org
Mon Jan 31 12:49:46 PST 2022
Author: tyb0807
Date: 2022-01-31T20:49:34Z
New Revision: 51e188d079f6ee9d8bc640351f2f772234d809dd
URL: https://github.com/llvm/llvm-project/commit/51e188d079f6ee9d8bc640351f2f772234d809dd
DIFF: https://github.com/llvm/llvm-project/commit/51e188d079f6ee9d8bc640351f2f772234d809dd.diff
LOG: [AArch64] Support for memset tagged intrinsic
This introduces a new ACLE intrinsic for memset tagged
(https://github.com/ARM-software/acle/blob/next-release/main/acle.md#memcpy-family-of-operations-intrinsics---mops).
void *__builtin_arm_mops_memset_tag(void *, int, size_t)
A corresponding LLVM intrinsic is introduced:
i8* llvm.aarch64.mops.memset.tag(i8*, i8, i64)
The types match llvm.memset but the return type is not void.
This is part 1/4 of a series of patches split from
https://reviews.llvm.org/D117405 to facilitate reviewing.
Patch by Tomas Matheson
Differential Revision: https://reviews.llvm.org/D117753
Added:
clang/test/CodeGen/aarch64-mops.c
Modified:
clang/include/clang/Basic/BuiltinsAArch64.def
clang/lib/CodeGen/CGBuiltin.cpp
clang/lib/Headers/arm_acle.h
llvm/include/llvm/IR/IntrinsicsAArch64.td
Removed:
################################################################################
diff --git a/clang/include/clang/Basic/BuiltinsAArch64.def b/clang/include/clang/Basic/BuiltinsAArch64.def
index 634bcaed20a6..0869b87e32fb 100644
--- a/clang/include/clang/Basic/BuiltinsAArch64.def
+++ b/clang/include/clang/Basic/BuiltinsAArch64.def
@@ -62,6 +62,9 @@ BUILTIN(__builtin_arm_ldg, "v*v*", "t")
BUILTIN(__builtin_arm_stg, "vv*", "t")
BUILTIN(__builtin_arm_subp, "Uiv*v*", "t")
+// Memory Operations
+BUILTIN(__builtin_arm_mops_memset_tag, "v*v*iz", "")
+
// Memory barrier
BUILTIN(__builtin_arm_dmb, "vUi", "nc")
BUILTIN(__builtin_arm_dsb, "vUi", "nc")
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 2b7862e618bd..d071c7a5b4a4 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -9777,6 +9777,18 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
return Builder.CreateCall(F, {Arg0, Arg1});
}
+ // Memory Operations (MOPS)
+ if (BuiltinID == AArch64::BI__builtin_arm_mops_memset_tag) {
+ Value *Dst = EmitScalarExpr(E->getArg(0));
+ Value *Val = EmitScalarExpr(E->getArg(1));
+ Value *Size = EmitScalarExpr(E->getArg(2));
+ Dst = Builder.CreatePointerCast(Dst, Int8PtrTy);
+ Val = Builder.CreateTrunc(Val, Int8Ty);
+ Size = Builder.CreateIntCast(Size, Int64Ty, false);
+ return Builder.CreateCall(
+ CGM.getIntrinsic(Intrinsic::aarch64_mops_memset_tag), {Dst, Val, Size});
+ }
+
// Memory Tagging Extensions (MTE) Intrinsics
Intrinsic::ID MTEIntrinsicID = Intrinsic::not_intrinsic;
switch (BuiltinID) {
diff --git a/clang/lib/Headers/arm_acle.h b/clang/lib/Headers/arm_acle.h
index 45fac248dadb..1cfc1403276d 100644
--- a/clang/lib/Headers/arm_acle.h
+++ b/clang/lib/Headers/arm_acle.h
@@ -730,6 +730,12 @@ __arm_st64bv0(void *__addr, data512_t __value) {
#define __arm_mte_ptr
diff (__ptra, __ptrb) __builtin_arm_subp(__ptra, __ptrb)
#endif
+/* Memory Operations Intrinsics */
+#if __ARM_FEATURE_MOPS && __ARM_FEATURE_MEMORY_TAGGING
+#define __arm_mops_memset_tag(__tagged_address, __value, __size) \
+ __builtin_arm_mops_memset_tag(__tagged_address, __value, __size)
+#endif
+
/* Transactional Memory Extension (TME) Intrinsics */
#if __ARM_FEATURE_TME
diff --git a/clang/test/CodeGen/aarch64-mops.c b/clang/test/CodeGen/aarch64-mops.c
new file mode 100644
index 000000000000..0b6a2c26c1e8
--- /dev/null
+++ b/clang/test/CodeGen/aarch64-mops.c
@@ -0,0 +1,153 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+
+// RUN: %clang_cc1 -triple aarch64-arm-unknown-eabi -target-feature +mops -target-feature +mte -S -emit-llvm -o - %s | FileCheck %s
+
+#define __ARM_FEATURE_MOPS 1
+#include <arm_acle.h>
+#include <stddef.h>
+
+// CHECK-LABEL: @bzero_0(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[DST_ADDR:%.*]] = alloca i8*, align 8
+// CHECK-NEXT: store i8* [[DST:%.*]], i8** [[DST_ADDR]], align 8
+// CHECK-NEXT: [[TMP0:%.*]] = load i8*, i8** [[DST_ADDR]], align 8
+// CHECK-NEXT: [[TMP1:%.*]] = call i8* @llvm.aarch64.mops.memset.tag(i8* [[TMP0]], i8 0, i64 0)
+// CHECK-NEXT: ret i8* [[TMP1]]
+//
+void *bzero_0(void *dst) {
+ return __arm_mops_memset_tag(dst, 0, 0);
+}
+
+// CHECK-LABEL: @bzero_1(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[DST_ADDR:%.*]] = alloca i8*, align 8
+// CHECK-NEXT: store i8* [[DST:%.*]], i8** [[DST_ADDR]], align 8
+// CHECK-NEXT: [[TMP0:%.*]] = load i8*, i8** [[DST_ADDR]], align 8
+// CHECK-NEXT: [[TMP1:%.*]] = call i8* @llvm.aarch64.mops.memset.tag(i8* [[TMP0]], i8 0, i64 1)
+// CHECK-NEXT: ret i8* [[TMP1]]
+//
+void *bzero_1(void *dst) {
+ return __arm_mops_memset_tag(dst, 0, 1);
+}
+
+// CHECK-LABEL: @bzero_10(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[DST_ADDR:%.*]] = alloca i8*, align 8
+// CHECK-NEXT: store i8* [[DST:%.*]], i8** [[DST_ADDR]], align 8
+// CHECK-NEXT: [[TMP0:%.*]] = load i8*, i8** [[DST_ADDR]], align 8
+// CHECK-NEXT: [[TMP1:%.*]] = call i8* @llvm.aarch64.mops.memset.tag(i8* [[TMP0]], i8 0, i64 10)
+// CHECK-NEXT: ret i8* [[TMP1]]
+//
+void *bzero_10(void *dst) {
+ return __arm_mops_memset_tag(dst, 0, 10);
+}
+
+// CHECK-LABEL: @bzero_10000(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[DST_ADDR:%.*]] = alloca i8*, align 8
+// CHECK-NEXT: store i8* [[DST:%.*]], i8** [[DST_ADDR]], align 8
+// CHECK-NEXT: [[TMP0:%.*]] = load i8*, i8** [[DST_ADDR]], align 8
+// CHECK-NEXT: [[TMP1:%.*]] = call i8* @llvm.aarch64.mops.memset.tag(i8* [[TMP0]], i8 0, i64 10000)
+// CHECK-NEXT: ret i8* [[TMP1]]
+//
+void *bzero_10000(void *dst) {
+ return __arm_mops_memset_tag(dst, 0, 10000);
+}
+
+// CHECK-LABEL: @bzero_n(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[DST_ADDR:%.*]] = alloca i8*, align 8
+// CHECK-NEXT: [[SIZE_ADDR:%.*]] = alloca i64, align 8
+// CHECK-NEXT: store i8* [[DST:%.*]], i8** [[DST_ADDR]], align 8
+// CHECK-NEXT: store i64 [[SIZE:%.*]], i64* [[SIZE_ADDR]], align 8
+// CHECK-NEXT: [[TMP0:%.*]] = load i8*, i8** [[DST_ADDR]], align 8
+// CHECK-NEXT: [[TMP1:%.*]] = load i64, i64* [[SIZE_ADDR]], align 8
+// CHECK-NEXT: [[TMP2:%.*]] = call i8* @llvm.aarch64.mops.memset.tag(i8* [[TMP0]], i8 0, i64 [[TMP1]])
+// CHECK-NEXT: ret i8* [[TMP2]]
+//
+void *bzero_n(void *dst, size_t size) {
+ return __arm_mops_memset_tag(dst, 0, size);
+}
+
+// CHECK-LABEL: @memset_0(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[DST_ADDR:%.*]] = alloca i8*, align 8
+// CHECK-NEXT: [[VALUE_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT: store i8* [[DST:%.*]], i8** [[DST_ADDR]], align 8
+// CHECK-NEXT: store i32 [[VALUE:%.*]], i32* [[VALUE_ADDR]], align 4
+// CHECK-NEXT: [[TMP0:%.*]] = load i8*, i8** [[DST_ADDR]], align 8
+// CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[VALUE_ADDR]], align 4
+// CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8
+// CHECK-NEXT: [[TMP3:%.*]] = call i8* @llvm.aarch64.mops.memset.tag(i8* [[TMP0]], i8 [[TMP2]], i64 0)
+// CHECK-NEXT: ret i8* [[TMP3]]
+//
+void *memset_0(void *dst, int value) {
+ return __arm_mops_memset_tag(dst, value, 0);
+}
+
+// CHECK-LABEL: @memset_1(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[DST_ADDR:%.*]] = alloca i8*, align 8
+// CHECK-NEXT: [[VALUE_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT: store i8* [[DST:%.*]], i8** [[DST_ADDR]], align 8
+// CHECK-NEXT: store i32 [[VALUE:%.*]], i32* [[VALUE_ADDR]], align 4
+// CHECK-NEXT: [[TMP0:%.*]] = load i8*, i8** [[DST_ADDR]], align 8
+// CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[VALUE_ADDR]], align 4
+// CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8
+// CHECK-NEXT: [[TMP3:%.*]] = call i8* @llvm.aarch64.mops.memset.tag(i8* [[TMP0]], i8 [[TMP2]], i64 1)
+// CHECK-NEXT: ret i8* [[TMP3]]
+//
+void *memset_1(void *dst, int value) {
+ return __arm_mops_memset_tag(dst, value, 1);
+}
+
+// CHECK-LABEL: @memset_10(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[DST_ADDR:%.*]] = alloca i8*, align 8
+// CHECK-NEXT: [[VALUE_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT: store i8* [[DST:%.*]], i8** [[DST_ADDR]], align 8
+// CHECK-NEXT: store i32 [[VALUE:%.*]], i32* [[VALUE_ADDR]], align 4
+// CHECK-NEXT: [[TMP0:%.*]] = load i8*, i8** [[DST_ADDR]], align 8
+// CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[VALUE_ADDR]], align 4
+// CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8
+// CHECK-NEXT: [[TMP3:%.*]] = call i8* @llvm.aarch64.mops.memset.tag(i8* [[TMP0]], i8 [[TMP2]], i64 10)
+// CHECK-NEXT: ret i8* [[TMP3]]
+//
+void *memset_10(void *dst, int value) {
+ return __arm_mops_memset_tag(dst, value, 10);
+}
+
+// CHECK-LABEL: @memset_10000(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[DST_ADDR:%.*]] = alloca i8*, align 8
+// CHECK-NEXT: [[VALUE_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT: store i8* [[DST:%.*]], i8** [[DST_ADDR]], align 8
+// CHECK-NEXT: store i32 [[VALUE:%.*]], i32* [[VALUE_ADDR]], align 4
+// CHECK-NEXT: [[TMP0:%.*]] = load i8*, i8** [[DST_ADDR]], align 8
+// CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[VALUE_ADDR]], align 4
+// CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8
+// CHECK-NEXT: [[TMP3:%.*]] = call i8* @llvm.aarch64.mops.memset.tag(i8* [[TMP0]], i8 [[TMP2]], i64 10000)
+// CHECK-NEXT: ret i8* [[TMP3]]
+//
+void *memset_10000(void *dst, int value) {
+ return __arm_mops_memset_tag(dst, value, 10000);
+}
+
+// CHECK-LABEL: @memset_n(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[DST_ADDR:%.*]] = alloca i8*, align 8
+// CHECK-NEXT: [[VALUE_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT: [[SIZE_ADDR:%.*]] = alloca i64, align 8
+// CHECK-NEXT: store i8* [[DST:%.*]], i8** [[DST_ADDR]], align 8
+// CHECK-NEXT: store i32 [[VALUE:%.*]], i32* [[VALUE_ADDR]], align 4
+// CHECK-NEXT: store i64 [[SIZE:%.*]], i64* [[SIZE_ADDR]], align 8
+// CHECK-NEXT: [[TMP0:%.*]] = load i8*, i8** [[DST_ADDR]], align 8
+// CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[VALUE_ADDR]], align 4
+// CHECK-NEXT: [[TMP2:%.*]] = load i64, i64* [[SIZE_ADDR]], align 8
+// CHECK-NEXT: [[TMP3:%.*]] = trunc i32 [[TMP1]] to i8
+// CHECK-NEXT: [[TMP4:%.*]] = call i8* @llvm.aarch64.mops.memset.tag(i8* [[TMP0]], i8 [[TMP3]], i64 [[TMP2]])
+// CHECK-NEXT: ret i8* [[TMP4]]
+//
+void *memset_n(void *dst, int value, size_t size) {
+ return __arm_mops_memset_tag(dst, value, size);
+}
diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td
index e610c28a5923..a65ddff07a29 100644
--- a/llvm/include/llvm/IR/IntrinsicsAArch64.td
+++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td
@@ -897,6 +897,14 @@ def int_aarch64_stgp : DefaultAttrsIntrinsic<[], [llvm_ptr_ty, llvm_i64_ty, llv
[IntrWriteMem, IntrArgMemOnly, NoCapture<ArgIndex<0>>, WriteOnly<ArgIndex<0>>]>;
}
+//===----------------------------------------------------------------------===//
+// Memory Operations (MOPS) Intrinsics
+let TargetPrefix = "aarch64" in {
+ // Sizes are chosen to correspond to the llvm.memset intrinsic: ptr, i8, i64
+ def int_aarch64_mops_memset_tag : DefaultAttrsIntrinsic<[llvm_ptr_ty], [llvm_ptr_ty, llvm_i8_ty, llvm_i64_ty],
+ [IntrWriteMem, IntrArgMemOnly, NoCapture<ArgIndex<0>>, WriteOnly<ArgIndex<0>>]>;
+}
+
// Transactional Memory Extension (TME) Intrinsics
let TargetPrefix = "aarch64" in {
def int_aarch64_tstart : GCCBuiltin<"__builtin_arm_tstart">,
More information about the cfe-commits
mailing list