[clang] 51e188d - [AArch64] Support for memset tagged intrinsic

via cfe-commits cfe-commits at lists.llvm.org
Mon Jan 31 12:49:46 PST 2022


Author: tyb0807
Date: 2022-01-31T20:49:34Z
New Revision: 51e188d079f6ee9d8bc640351f2f772234d809dd

URL: https://github.com/llvm/llvm-project/commit/51e188d079f6ee9d8bc640351f2f772234d809dd
DIFF: https://github.com/llvm/llvm-project/commit/51e188d079f6ee9d8bc640351f2f772234d809dd.diff

LOG: [AArch64] Support for memset tagged intrinsic

This introduces a new ACLE intrinsic for memset tagged
(https://github.com/ARM-software/acle/blob/next-release/main/acle.md#memcpy-family-of-operations-intrinsics---mops).

  void *__builtin_arm_mops_memset_tag(void *, int, size_t)

A corresponding LLVM intrinsic is introduced:

  i8* llvm.aarch64.mops.memset.tag(i8*, i8, i64)

The types match llvm.memset but the return type is not void.

This is part 1/4 of a series of patches split from
https://reviews.llvm.org/D117405 to facilitate reviewing.

Patch by Tomas Matheson

Differential Revision: https://reviews.llvm.org/D117753

Added: 
    clang/test/CodeGen/aarch64-mops.c

Modified: 
    clang/include/clang/Basic/BuiltinsAArch64.def
    clang/lib/CodeGen/CGBuiltin.cpp
    clang/lib/Headers/arm_acle.h
    llvm/include/llvm/IR/IntrinsicsAArch64.td

Removed: 
    


################################################################################
diff  --git a/clang/include/clang/Basic/BuiltinsAArch64.def b/clang/include/clang/Basic/BuiltinsAArch64.def
index 634bcaed20a6..0869b87e32fb 100644
--- a/clang/include/clang/Basic/BuiltinsAArch64.def
+++ b/clang/include/clang/Basic/BuiltinsAArch64.def
@@ -62,6 +62,9 @@ BUILTIN(__builtin_arm_ldg, "v*v*", "t")
 BUILTIN(__builtin_arm_stg, "vv*", "t")
 BUILTIN(__builtin_arm_subp, "Uiv*v*", "t")
 
+// Memory Operations
+BUILTIN(__builtin_arm_mops_memset_tag, "v*v*iz", "")
+
 // Memory barrier
 BUILTIN(__builtin_arm_dmb, "vUi", "nc")
 BUILTIN(__builtin_arm_dsb, "vUi", "nc")

diff  --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 2b7862e618bd..d071c7a5b4a4 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -9777,6 +9777,18 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
     return Builder.CreateCall(F, {Arg0, Arg1});
   }
 
+  // Memory Operations (MOPS)
+  if (BuiltinID == AArch64::BI__builtin_arm_mops_memset_tag) {
+    Value *Dst = EmitScalarExpr(E->getArg(0));
+    Value *Val = EmitScalarExpr(E->getArg(1));
+    Value *Size = EmitScalarExpr(E->getArg(2));
+    Dst = Builder.CreatePointerCast(Dst, Int8PtrTy);
+    Val = Builder.CreateTrunc(Val, Int8Ty);
+    Size = Builder.CreateIntCast(Size, Int64Ty, false);
+    return Builder.CreateCall(
+        CGM.getIntrinsic(Intrinsic::aarch64_mops_memset_tag), {Dst, Val, Size});
+  }
+
   // Memory Tagging Extensions (MTE) Intrinsics
   Intrinsic::ID MTEIntrinsicID = Intrinsic::not_intrinsic;
   switch (BuiltinID) {

diff  --git a/clang/lib/Headers/arm_acle.h b/clang/lib/Headers/arm_acle.h
index 45fac248dadb..1cfc1403276d 100644
--- a/clang/lib/Headers/arm_acle.h
+++ b/clang/lib/Headers/arm_acle.h
@@ -730,6 +730,12 @@ __arm_st64bv0(void *__addr, data512_t __value) {
 #define __arm_mte_ptr
diff (__ptra, __ptrb) __builtin_arm_subp(__ptra, __ptrb)
 #endif
 
+/* Memory Operations Intrinsics */
+#if __ARM_FEATURE_MOPS && __ARM_FEATURE_MEMORY_TAGGING
+#define __arm_mops_memset_tag(__tagged_address, __value, __size)    \
+  __builtin_arm_mops_memset_tag(__tagged_address, __value, __size)
+#endif
+
 /* Transactional Memory Extension (TME) Intrinsics */
 #if __ARM_FEATURE_TME
 

diff  --git a/clang/test/CodeGen/aarch64-mops.c b/clang/test/CodeGen/aarch64-mops.c
new file mode 100644
index 000000000000..0b6a2c26c1e8
--- /dev/null
+++ b/clang/test/CodeGen/aarch64-mops.c
@@ -0,0 +1,153 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+
+// RUN: %clang_cc1 -triple aarch64-arm-unknown-eabi -target-feature +mops -target-feature +mte -S -emit-llvm -o - %s  | FileCheck %s
+
+#define __ARM_FEATURE_MOPS 1
+#include <arm_acle.h>
+#include <stddef.h>
+
+// CHECK-LABEL: @bzero_0(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[DST_ADDR:%.*]] = alloca i8*, align 8
+// CHECK-NEXT:    store i8* [[DST:%.*]], i8** [[DST_ADDR]], align 8
+// CHECK-NEXT:    [[TMP0:%.*]] = load i8*, i8** [[DST_ADDR]], align 8
+// CHECK-NEXT:    [[TMP1:%.*]] = call i8* @llvm.aarch64.mops.memset.tag(i8* [[TMP0]], i8 0, i64 0)
+// CHECK-NEXT:    ret i8* [[TMP1]]
+//
+void *bzero_0(void *dst) {
+  return __arm_mops_memset_tag(dst, 0, 0);
+}
+
+// CHECK-LABEL: @bzero_1(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[DST_ADDR:%.*]] = alloca i8*, align 8
+// CHECK-NEXT:    store i8* [[DST:%.*]], i8** [[DST_ADDR]], align 8
+// CHECK-NEXT:    [[TMP0:%.*]] = load i8*, i8** [[DST_ADDR]], align 8
+// CHECK-NEXT:    [[TMP1:%.*]] = call i8* @llvm.aarch64.mops.memset.tag(i8* [[TMP0]], i8 0, i64 1)
+// CHECK-NEXT:    ret i8* [[TMP1]]
+//
+void *bzero_1(void *dst) {
+  return __arm_mops_memset_tag(dst, 0, 1);
+}
+
+// CHECK-LABEL: @bzero_10(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[DST_ADDR:%.*]] = alloca i8*, align 8
+// CHECK-NEXT:    store i8* [[DST:%.*]], i8** [[DST_ADDR]], align 8
+// CHECK-NEXT:    [[TMP0:%.*]] = load i8*, i8** [[DST_ADDR]], align 8
+// CHECK-NEXT:    [[TMP1:%.*]] = call i8* @llvm.aarch64.mops.memset.tag(i8* [[TMP0]], i8 0, i64 10)
+// CHECK-NEXT:    ret i8* [[TMP1]]
+//
+void *bzero_10(void *dst) {
+  return __arm_mops_memset_tag(dst, 0, 10);
+}
+
+// CHECK-LABEL: @bzero_10000(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[DST_ADDR:%.*]] = alloca i8*, align 8
+// CHECK-NEXT:    store i8* [[DST:%.*]], i8** [[DST_ADDR]], align 8
+// CHECK-NEXT:    [[TMP0:%.*]] = load i8*, i8** [[DST_ADDR]], align 8
+// CHECK-NEXT:    [[TMP1:%.*]] = call i8* @llvm.aarch64.mops.memset.tag(i8* [[TMP0]], i8 0, i64 10000)
+// CHECK-NEXT:    ret i8* [[TMP1]]
+//
+void *bzero_10000(void *dst) {
+  return __arm_mops_memset_tag(dst, 0, 10000);
+}
+
+// CHECK-LABEL: @bzero_n(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[DST_ADDR:%.*]] = alloca i8*, align 8
+// CHECK-NEXT:    [[SIZE_ADDR:%.*]] = alloca i64, align 8
+// CHECK-NEXT:    store i8* [[DST:%.*]], i8** [[DST_ADDR]], align 8
+// CHECK-NEXT:    store i64 [[SIZE:%.*]], i64* [[SIZE_ADDR]], align 8
+// CHECK-NEXT:    [[TMP0:%.*]] = load i8*, i8** [[DST_ADDR]], align 8
+// CHECK-NEXT:    [[TMP1:%.*]] = load i64, i64* [[SIZE_ADDR]], align 8
+// CHECK-NEXT:    [[TMP2:%.*]] = call i8* @llvm.aarch64.mops.memset.tag(i8* [[TMP0]], i8 0, i64 [[TMP1]])
+// CHECK-NEXT:    ret i8* [[TMP2]]
+//
+void *bzero_n(void *dst, size_t size) {
+  return __arm_mops_memset_tag(dst, 0, size);
+}
+
+// CHECK-LABEL: @memset_0(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[DST_ADDR:%.*]] = alloca i8*, align 8
+// CHECK-NEXT:    [[VALUE_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    store i8* [[DST:%.*]], i8** [[DST_ADDR]], align 8
+// CHECK-NEXT:    store i32 [[VALUE:%.*]], i32* [[VALUE_ADDR]], align 4
+// CHECK-NEXT:    [[TMP0:%.*]] = load i8*, i8** [[DST_ADDR]], align 8
+// CHECK-NEXT:    [[TMP1:%.*]] = load i32, i32* [[VALUE_ADDR]], align 4
+// CHECK-NEXT:    [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8
+// CHECK-NEXT:    [[TMP3:%.*]] = call i8* @llvm.aarch64.mops.memset.tag(i8* [[TMP0]], i8 [[TMP2]], i64 0)
+// CHECK-NEXT:    ret i8* [[TMP3]]
+//
+void *memset_0(void *dst, int value) {
+  return __arm_mops_memset_tag(dst, value, 0);
+}
+
+// CHECK-LABEL: @memset_1(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[DST_ADDR:%.*]] = alloca i8*, align 8
+// CHECK-NEXT:    [[VALUE_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    store i8* [[DST:%.*]], i8** [[DST_ADDR]], align 8
+// CHECK-NEXT:    store i32 [[VALUE:%.*]], i32* [[VALUE_ADDR]], align 4
+// CHECK-NEXT:    [[TMP0:%.*]] = load i8*, i8** [[DST_ADDR]], align 8
+// CHECK-NEXT:    [[TMP1:%.*]] = load i32, i32* [[VALUE_ADDR]], align 4
+// CHECK-NEXT:    [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8
+// CHECK-NEXT:    [[TMP3:%.*]] = call i8* @llvm.aarch64.mops.memset.tag(i8* [[TMP0]], i8 [[TMP2]], i64 1)
+// CHECK-NEXT:    ret i8* [[TMP3]]
+//
+void *memset_1(void *dst, int value) {
+  return __arm_mops_memset_tag(dst, value, 1);
+}
+
+// CHECK-LABEL: @memset_10(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[DST_ADDR:%.*]] = alloca i8*, align 8
+// CHECK-NEXT:    [[VALUE_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    store i8* [[DST:%.*]], i8** [[DST_ADDR]], align 8
+// CHECK-NEXT:    store i32 [[VALUE:%.*]], i32* [[VALUE_ADDR]], align 4
+// CHECK-NEXT:    [[TMP0:%.*]] = load i8*, i8** [[DST_ADDR]], align 8
+// CHECK-NEXT:    [[TMP1:%.*]] = load i32, i32* [[VALUE_ADDR]], align 4
+// CHECK-NEXT:    [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8
+// CHECK-NEXT:    [[TMP3:%.*]] = call i8* @llvm.aarch64.mops.memset.tag(i8* [[TMP0]], i8 [[TMP2]], i64 10)
+// CHECK-NEXT:    ret i8* [[TMP3]]
+//
+void *memset_10(void *dst, int value) {
+  return __arm_mops_memset_tag(dst, value, 10);
+}
+
+// CHECK-LABEL: @memset_10000(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[DST_ADDR:%.*]] = alloca i8*, align 8
+// CHECK-NEXT:    [[VALUE_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    store i8* [[DST:%.*]], i8** [[DST_ADDR]], align 8
+// CHECK-NEXT:    store i32 [[VALUE:%.*]], i32* [[VALUE_ADDR]], align 4
+// CHECK-NEXT:    [[TMP0:%.*]] = load i8*, i8** [[DST_ADDR]], align 8
+// CHECK-NEXT:    [[TMP1:%.*]] = load i32, i32* [[VALUE_ADDR]], align 4
+// CHECK-NEXT:    [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8
+// CHECK-NEXT:    [[TMP3:%.*]] = call i8* @llvm.aarch64.mops.memset.tag(i8* [[TMP0]], i8 [[TMP2]], i64 10000)
+// CHECK-NEXT:    ret i8* [[TMP3]]
+//
+void *memset_10000(void *dst, int value) {
+  return __arm_mops_memset_tag(dst, value, 10000);
+}
+
+// CHECK-LABEL: @memset_n(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[DST_ADDR:%.*]] = alloca i8*, align 8
+// CHECK-NEXT:    [[VALUE_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[SIZE_ADDR:%.*]] = alloca i64, align 8
+// CHECK-NEXT:    store i8* [[DST:%.*]], i8** [[DST_ADDR]], align 8
+// CHECK-NEXT:    store i32 [[VALUE:%.*]], i32* [[VALUE_ADDR]], align 4
+// CHECK-NEXT:    store i64 [[SIZE:%.*]], i64* [[SIZE_ADDR]], align 8
+// CHECK-NEXT:    [[TMP0:%.*]] = load i8*, i8** [[DST_ADDR]], align 8
+// CHECK-NEXT:    [[TMP1:%.*]] = load i32, i32* [[VALUE_ADDR]], align 4
+// CHECK-NEXT:    [[TMP2:%.*]] = load i64, i64* [[SIZE_ADDR]], align 8
+// CHECK-NEXT:    [[TMP3:%.*]] = trunc i32 [[TMP1]] to i8
+// CHECK-NEXT:    [[TMP4:%.*]] = call i8* @llvm.aarch64.mops.memset.tag(i8* [[TMP0]], i8 [[TMP3]], i64 [[TMP2]])
+// CHECK-NEXT:    ret i8* [[TMP4]]
+//
+void *memset_n(void *dst, int value, size_t size) {
+  return __arm_mops_memset_tag(dst, value, size);
+}

diff  --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td
index e610c28a5923..a65ddff07a29 100644
--- a/llvm/include/llvm/IR/IntrinsicsAArch64.td
+++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td
@@ -897,6 +897,14 @@ def int_aarch64_stgp  : DefaultAttrsIntrinsic<[], [llvm_ptr_ty, llvm_i64_ty, llv
     [IntrWriteMem, IntrArgMemOnly, NoCapture<ArgIndex<0>>, WriteOnly<ArgIndex<0>>]>;
 }
 
+//===----------------------------------------------------------------------===//
+// Memory Operations (MOPS) Intrinsics
+let TargetPrefix = "aarch64" in {
+  // Sizes are chosen to correspond to the llvm.memset intrinsic: ptr, i8, i64
+  def int_aarch64_mops_memset_tag : DefaultAttrsIntrinsic<[llvm_ptr_ty], [llvm_ptr_ty, llvm_i8_ty, llvm_i64_ty],
+      [IntrWriteMem, IntrArgMemOnly, NoCapture<ArgIndex<0>>, WriteOnly<ArgIndex<0>>]>;
+}
+
 // Transactional Memory Extension (TME) Intrinsics
 let TargetPrefix = "aarch64" in {
 def int_aarch64_tstart  : GCCBuiltin<"__builtin_arm_tstart">,


        


More information about the cfe-commits mailing list