[clang] b593bfd - [clang][SveEmitter] SVE builtins for `svusdot` and `svsudot` ACLE.

Francesco Petrogalli via cfe-commits cfe-commits at lists.llvm.org
Mon May 18 16:10:56 PDT 2020


Author: Francesco Petrogalli
Date: 2020-05-18T23:07:23Z
New Revision: b593bfd4d8e3716c80d4313b5a0eeeb6e9575a74

URL: https://github.com/llvm/llvm-project/commit/b593bfd4d8e3716c80d4313b5a0eeeb6e9575a74
DIFF: https://github.com/llvm/llvm-project/commit/b593bfd4d8e3716c80d4313b5a0eeeb6e9575a74.diff

LOG: [clang][SveEmitter] SVE builtins for `svusdot` and `svsudot` ACLE.

Summary:
Intrinsics, guarded by `__ARM_FEATURE_SVE_MATMUL_INT8`:

* svusdot[_s32]
* svusdot[_n_s32]
* svusdot_lane[_s32]
* svsudot[_s32]
* svsudot[_n_s32]
* svsudot_lane[_s32]

Reviewers: sdesmalen, efriedma, david-arm, rengolin

Subscribers: tschuett, kristof.beyls, cfe-commits

Tags: #clang

Differential Revision: https://reviews.llvm.org/D79877

Added: 
    clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_sudot.c
    clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_usdot.c

Modified: 
    clang/include/clang/Basic/TargetBuiltins.h
    clang/include/clang/Basic/arm_sve.td
    clang/lib/CodeGen/CGBuiltin.cpp
    clang/utils/TableGen/SveEmitter.cpp

Removed: 
    


################################################################################
diff  --git a/clang/include/clang/Basic/TargetBuiltins.h b/clang/include/clang/Basic/TargetBuiltins.h
index db10077895ee..b7dafecf3ce1 100644
--- a/clang/include/clang/Basic/TargetBuiltins.h
+++ b/clang/include/clang/Basic/TargetBuiltins.h
@@ -244,6 +244,7 @@ namespace clang {
     bool isAppendSVALL() const { return Flags & IsAppendSVALL; }
     bool isInsertOp1SVALL() const { return Flags & IsInsertOp1SVALL; }
     bool isGatherPrefetch() const { return Flags & IsGatherPrefetch; }
+    bool isReverseUSDOT() const { return Flags & ReverseUSDOT; }
 
     uint64_t getBits() const { return Flags; }
     bool isFlagSet(uint64_t Flag) const { return Flags & Flag; }

diff  --git a/clang/include/clang/Basic/arm_sve.td b/clang/include/clang/Basic/arm_sve.td
index 4b77b0575637..caf141532fcf 100644
--- a/clang/include/clang/Basic/arm_sve.td
+++ b/clang/include/clang/Basic/arm_sve.td
@@ -68,6 +68,7 @@
 // a: scalar of element type (splat to vector type)
 // R: scalar of 1/2 width element type (splat to vector type)
 // r: scalar of 1/4 width element type (splat to vector type)
+// @: unsigned scalar of 1/4 width element type (splat to vector type)
 // e: 1/2 width unsigned elements, 2x element count
 // b: 1/4 width unsigned elements, 4x element count
 // h: 1/2 width elements, 2x element count
@@ -196,6 +197,7 @@ def IsInsertOp1SVALL          : FlagType<0x04000000>; // Inserts SV_ALL as the s
 def IsPrefetch                : FlagType<0x08000000>; // Contiguous prefetches.
 def IsGatherPrefetch          : FlagType<0x10000000>;
 def ReverseCompare            : FlagType<0x20000000>; // Compare operands must be swapped.
+def ReverseUSDOT              : FlagType<0x40000000>; // Unsigned/signed operands must be swapped.
 
 // These must be kept in sync with the flags in include/clang/Basic/TargetBuiltins.h
 class ImmCheckType<int val> {
@@ -1240,6 +1242,14 @@ let ArchGuard = "defined(__ARM_FEATURE_SVE_MATMUL_INT8)" in {
 def SVMLLA_S32   : SInst<"svmmla[_s32]",   "ddqq","i",  MergeNone, "aarch64_sve_smmla">;
 def SVMLLA_U32   : SInst<"svmmla[_u32]",   "ddqq","Ui", MergeNone, "aarch64_sve_ummla">;
 def SVUSMLLA_S32 : SInst<"svusmmla[_s32]", "ddbq","i",  MergeNone, "aarch64_sve_usmmla">;
+
+def SVUSDOT_S    : SInst<"svusdot[_s32]",    "ddbq", "i",       MergeNone, "aarch64_sve_usdot">;
+def SVUSDOT_N_S  : SInst<"svusdot[_n_s32]",  "ddbr", "i",       MergeNone, "aarch64_sve_usdot">;
+def SVSUDOT_S    : SInst<"svsudot[_s32]",    "ddqb", "i",       MergeNone, "aarch64_sve_usdot", [ReverseUSDOT]>;
+def SVSUDOT_N_S  : SInst<"svsudot[_n_s32]",  "ddq@", "i",       MergeNone, "aarch64_sve_usdot", [ReverseUSDOT]>;
+
+def SVUSDOT_LANE_S : SInst<"svusdot_lane[_s32]",  "ddbqi",  "i",   MergeNone, "aarch64_sve_usdot_lane", [], [ImmCheck<3, ImmCheckLaneIndexDot, 2>]>;
+def SVSUDOT_LANE_S : SInst<"svsudot_lane[_s32]",  "ddqbi",  "i",   MergeNone, "aarch64_sve_sudot_lane", [], [ImmCheck<3, ImmCheckLaneIndexDot, 2>]>;
 }
 
 let ArchGuard = "defined(__ARM_FEATURE_SVE_MATMUL_FP32)" in {

diff  --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 524924e36638..b58f167d9ed2 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -8040,6 +8040,9 @@ Value *CodeGenFunction::EmitAArch64SVEBuiltinExpr(unsigned BuiltinID,
     if (TypeFlags.isReverseCompare())
       std::swap(Ops[1], Ops[2]);
 
+    if (TypeFlags.isReverseUSDOT())
+      std::swap(Ops[1], Ops[2]);
+
     // Predicated intrinsics with _z suffix need a select w/ zeroinitializer.
     if (TypeFlags.getMergeType() == SVETypeFlags::MergeZero) {
       llvm::Type *OpndTy = Ops[1]->getType();

diff  --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_sudot.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_sudot.c
new file mode 100644
index 000000000000..d678851bad58
--- /dev/null
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_sudot.c
@@ -0,0 +1,54 @@
+// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -D__ARM_FEATURE_SVE_MATMUL_INT8 -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -D__ARM_FEATURE_SVE_MATMUL_INT8 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
+
+#include <arm_sve.h>
+
+#ifdef SVE_OVERLOADED_FORMS
+// A simple used,unused... macro, long enough to represent any SVE builtin.
+#define SVE_ACLE_FUNC(A1, A2_UNUSED, A3, A4_UNUSED) A1##A3
+#else
+#define SVE_ACLE_FUNC(A1, A2, A3, A4) A1##A2##A3##A4
+#endif
+
+svint32_t test_svsudot_s32(svint32_t x, svint8_t y, svuint8_t z) {
+  // CHECK-LABEL: test_svsudot_s32
+  // CHECK: %[[RET:.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.usdot.nxv4i32(<vscale x 4 x i32> %x, <vscale x 16 x i8> %z, <vscale x 16 x i8> %y)
+  // CHECK: ret <vscale x 4 x i32> %[[RET]]
+  return SVE_ACLE_FUNC(svsudot, _s32, , )(x, y, z);
+}
+
+svint32_t test_svsudot_n_s32(svint32_t x, svint8_t y, uint8_t z) {
+  // CHECK-LABEL: test_svsudot_n_s32
+  // CHECK: %[[SPLAT:.*]] = call <vscale x 16 x i8> @llvm.aarch64.sve.dup.x.nxv16i8(i8 %z)
+  // CHECK: %[[RET:.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.usdot.nxv4i32(<vscale x 4 x i32> %x, <vscale x 16 x i8> %[[SPLAT]], <vscale x 16 x i8> %y)
+  // CHECK: ret <vscale x 4 x i32> %[[RET]]
+  return SVE_ACLE_FUNC(svsudot, _n_s32, , )(x, y, z);
+}
+
+svint32_t test_svsudot_lane_s32_0(svint32_t x, svint8_t y, svuint8_t z) {
+  // CHECK-LABEL: test_svsudot_lane_s32_0
+  // CHECK: %[[RET:.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.sudot.lane.nxv4i32(<vscale x 4 x i32> %x, <vscale x 16 x i8> %y, <vscale x 16 x i8> %z, i32 0)
+  // CHECK: ret <vscale x 4 x i32> %[[RET]]
+  return SVE_ACLE_FUNC(svsudot_lane, _s32, , )(x, y, z, 0);
+}
+
+svint32_t test_svsudot_lane_s32_1(svint32_t x, svint8_t y, svuint8_t z) {
+  // CHECK-LABEL: test_svsudot_lane_s32_1
+  // CHECK: %[[RET:.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.sudot.lane.nxv4i32(<vscale x 4 x i32> %x, <vscale x 16 x i8> %y, <vscale x 16 x i8> %z, i32 1)
+  // CHECK: ret <vscale x 4 x i32> %[[RET]]
+  return SVE_ACLE_FUNC(svsudot_lane, _s32, , )(x, y, z, 1);
+}
+
+svint32_t test_svsudot_lane_s32_2(svint32_t x, svint8_t y, svuint8_t z) {
+  // CHECK-LABEL: test_svsudot_lane_s32_2
+  // CHECK: %[[RET:.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.sudot.lane.nxv4i32(<vscale x 4 x i32> %x, <vscale x 16 x i8> %y, <vscale x 16 x i8> %z, i32 2)
+  // CHECK: ret <vscale x 4 x i32> %[[RET]]
+  return SVE_ACLE_FUNC(svsudot_lane, _s32, , )(x, y, z, 2);
+}
+
+svint32_t test_svsudot_lane_s32_3(svint32_t x, svint8_t y, svuint8_t z) {
+  // CHECK-LABEL: test_svsudot_lane_s32_3
+  // CHECK: %[[RET:.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.sudot.lane.nxv4i32(<vscale x 4 x i32> %x, <vscale x 16 x i8> %y, <vscale x 16 x i8> %z, i32 3)
+  // CHECK: ret <vscale x 4 x i32> %[[RET]]
+  return SVE_ACLE_FUNC(svsudot_lane, _s32, , )(x, y, z, 3);
+}

diff  --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_usdot.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_usdot.c
new file mode 100644
index 000000000000..1639cbb2b86f
--- /dev/null
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_usdot.c
@@ -0,0 +1,54 @@
+// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -D__ARM_FEATURE_SVE_MATMUL_INT8 -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -D__ARM_FEATURE_SVE_MATMUL_INT8 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
+
+#include <arm_sve.h>
+
+#ifdef SVE_OVERLOADED_FORMS
+// A simple used,unused... macro, long enough to represent any SVE builtin.
+#define SVE_ACLE_FUNC(A1, A2_UNUSED, A3, A4_UNUSED) A1##A3
+#else
+#define SVE_ACLE_FUNC(A1, A2, A3, A4) A1##A2##A3##A4
+#endif
+
+svint32_t test_svusdot_s32(svint32_t x, svuint8_t y, svint8_t z) {
+  // CHECK-LABEL: test_svusdot_s32
+  // CHECK: %[[RET:.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.usdot.nxv4i32(<vscale x 4 x i32> %x, <vscale x 16 x i8> %y, <vscale x 16 x i8> %z)
+  // CHECK: ret <vscale x 4 x i32> %[[RET]]
+  return SVE_ACLE_FUNC(svusdot, _s32, , )(x, y, z);
+}
+
+svint32_t test_svusdot_n_s32(svint32_t x, svuint8_t y, int8_t z) {
+  // CHECK-LABEL: test_svusdot_n_s32
+  // CHECK: %[[SPLAT:.*]] = call <vscale x 16 x i8> @llvm.aarch64.sve.dup.x.nxv16i8(i8 %z)
+  // CHECK: %[[RET:.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.usdot.nxv4i32(<vscale x 4 x i32> %x, <vscale x 16 x i8> %y, <vscale x 16 x i8> %[[SPLAT]])
+  // CHECK: ret <vscale x 4 x i32> %[[RET]]
+  return SVE_ACLE_FUNC(svusdot, _n_s32, , )(x, y, z);
+}
+
+svint32_t test_svusdot_lane_s32_0(svint32_t x, svuint8_t y, svint8_t z) {
+  // CHECK-LABEL: test_svusdot_lane_s32_0
+  // CHECK: %[[RET:.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.usdot.lane.nxv4i32(<vscale x 4 x i32> %x, <vscale x 16 x i8> %y, <vscale x 16 x i8> %z, i32 0)
+  // CHECK: ret <vscale x 4 x i32> %[[RET]]
+  return SVE_ACLE_FUNC(svusdot_lane, _s32, , )(x, y, z, 0);
+}
+
+svint32_t test_svusdot_lane_s32_1(svint32_t x, svuint8_t y, svint8_t z) {
+  // CHECK-LABEL: test_svusdot_lane_s32_1
+  // CHECK: %[[RET:.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.usdot.lane.nxv4i32(<vscale x 4 x i32> %x, <vscale x 16 x i8> %y, <vscale x 16 x i8> %z, i32 1)
+  // CHECK: ret <vscale x 4 x i32> %[[RET]]
+  return SVE_ACLE_FUNC(svusdot_lane, _s32, , )(x, y, z, 1);
+}
+
+svint32_t test_svusdot_lane_s32_2(svint32_t x, svuint8_t y, svint8_t z) {
+  // CHECK-LABEL: test_svusdot_lane_s32_2
+  // CHECK: %[[RET:.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.usdot.lane.nxv4i32(<vscale x 4 x i32> %x, <vscale x 16 x i8> %y, <vscale x 16 x i8> %z, i32 2)
+  // CHECK: ret <vscale x 4 x i32> %[[RET]]
+  return SVE_ACLE_FUNC(svusdot_lane, _s32, , )(x, y, z, 2);
+}
+
+svint32_t test_svusdot_lane_s32_3(svint32_t x, svuint8_t y, svint8_t z) {
+  // CHECK-LABEL: test_svusdot_lane_s32_3
+  // CHECK: %[[RET:.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.usdot.lane.nxv4i32(<vscale x 4 x i32> %x, <vscale x 16 x i8> %y, <vscale x 16 x i8> %z, i32 3)
+  // CHECK: ret <vscale x 4 x i32> %[[RET]]
+  return SVE_ACLE_FUNC(svusdot_lane, _s32, , )(x, y, z, 3);
+}

diff  --git a/clang/utils/TableGen/SveEmitter.cpp b/clang/utils/TableGen/SveEmitter.cpp
index ae1d938eed01..7d99e39f9d03 100644
--- a/clang/utils/TableGen/SveEmitter.cpp
+++ b/clang/utils/TableGen/SveEmitter.cpp
@@ -213,13 +213,13 @@ class Intrinsic {
   /// Return true if the intrinsic takes a splat operand.
   bool hasSplat() const {
     // These prototype modifiers are described in arm_sve.td.
-    return Proto.find_first_of("ajfrKLR") != std::string::npos;
+    return Proto.find_first_of("ajfrKLR@") != std::string::npos;
   }
 
   /// Return the parameter index of the splat operand.
   unsigned getSplatIdx() const {
     // These prototype modifiers are described in arm_sve.td.
-    auto Idx = Proto.find_first_of("ajfrKLR");
+    auto Idx = Proto.find_first_of("ajfrKLR@");
     assert(Idx != std::string::npos && Idx > 0 &&
            "Prototype has no splat operand");
     return Idx - 1;
@@ -541,6 +541,12 @@ void SVEType::applyModifier(char Mod) {
     ElementBitwidth /= 4;
     NumVectors = 0;
     break;
+  case '@':
+    Signed = false;
+    Float = false;
+    ElementBitwidth /= 4;
+    NumVectors = 0;
+    break;
   case 'K':
     Signed = true;
     Float = false;


        


More information about the cfe-commits mailing list