[clang] fc64539 - [SveEmitter] Add immediate checks for lanes and complex imms

Mon Apr 20 07:12:00 PDT 2020

Author: Sander de Smalen
Date: 2020-04-20T15:10:54+01:00
New Revision: fc645397498037ccb7df230a07e9a8762aaf8c8f

URL: https://github.com/llvm/llvm-project/commit/fc645397498037ccb7df230a07e9a8762aaf8c8f
DIFF: https://github.com/llvm/llvm-project/commit/fc645397498037ccb7df230a07e9a8762aaf8c8f.diff

LOG: [SveEmitter] Add immediate checks for lanes and complex imms

Adds another bunch of of intrinsics that take immediates with
varying ranges based, some being a complex rotation immediate
which are a set of allowed immediates rather than a range.

    svmla_lane:   lane immediate ranging 0..(128/(1*sizeinbits(elt)) - 1)
    svcmla_lane:  lane immediate ranging 0..(128/(2*sizeinbits(elt)) - 1)
    svdot_lane:   lane immediate ranging 0..(128/(4*sizeinbits(elt)) - 1)
    svcadd:       complex rotate immediate [90, 270]
    svcmla:
    svcmla_lane:  complex rotate immediate [0, 90, 180, 270]

Reviewers: efriedma, SjoerdMeijer, rovka

Reviewed By: efriedma

Tags: #clang

Differential Revision: https://reviews.llvm.org/D76680

Added: 
    clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_cmla.c
    clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_dot.c
    clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_mla.c
    clang/test/CodeGen/aarch64-sve-intrinsics/negative/acle_sve_cadd.c
    clang/test/CodeGen/aarch64-sve-intrinsics/negative/acle_sve_cmla.c
    clang/test/CodeGen/aarch64-sve-intrinsics/negative/acle_sve_dot.c
    clang/test/CodeGen/aarch64-sve-intrinsics/negative/acle_sve_mla.c

Modified: 
    clang/include/clang/Basic/DiagnosticSemaKinds.td
    clang/include/clang/Basic/arm_sve.td
    clang/lib/Sema/SemaChecking.cpp
    clang/utils/TableGen/SveEmitter.cpp

Removed: 
    


################################################################################
diff  --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td
index 97ad1a6c7920..a64e313bf271 100644

--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -9244,6 +9244,10 @@ def err_argument_not_shifted_byte : Error<
   "argument should be an 8-bit value shifted by a multiple of 8 bits">;
 def err_argument_not_shifted_byte_or_xxff : Error<
   "argument should be an 8-bit value shifted by a multiple of 8 bits, or in the form 0x??FF">;
+def err_rotation_argument_to_cadd
+    : Error<"argument should be the value 90 or 270">;
+def err_rotation_argument_to_cmla
+    : Error<"argument should be the value 0, 90, 180 or 270">;
 def warn_neon_vector_initializer_non_portable : Warning<
   "vector initializers are not compatible with NEON intrinsics in big endian "
   "mode">, InGroup<DiagGroup<"nonportable-vector-initialization">>;

diff  --git a/clang/include/clang/Basic/arm_sve.td b/clang/include/clang/Basic/arm_sve.td
index 9fe4715e4ea1..84f03e60b51f 100644
--- a/clang/include/clang/Basic/arm_sve.td
+++ b/clang/include/clang/Basic/arm_sve.td
@@ -62,7 +62,10 @@
 // d: default
 // c: const pointer type
 // P: predicate type
+// e: 1/2 width unsigned elements, 2x element count
 // h: 1/2 width elements, 2x element count
+// q: 1/4 width elements, 4x element count
+// o: 4x width elements, 1/4 element count
 //
 // i: constant uint64_t
 //
@@ -164,6 +167,11 @@ def ImmCheckShiftRight          : ImmCheckType<3>;  // 1..sizeinbits(elt)
 def ImmCheckShiftRightNarrow    : ImmCheckType<4>;  // 1..sizeinbits(elt)/2
 def ImmCheckShiftLeft           : ImmCheckType<5>;  // 0..(sizeinbits(elt) - 1)
 def ImmCheck0_7                 : ImmCheckType<6>;  // 0..7
+def ImmCheckLaneIndex           : ImmCheckType<7>;  // 0..(128/(1*sizeinbits(elt)) - 1)
+def ImmCheckLaneIndexCompRotate : ImmCheckType<8>;  // 0..(128/(2*sizeinbits(elt)) - 1)
+def ImmCheckLaneIndexDot        : ImmCheckType<9>;  // 0..(128/(4*sizeinbits(elt)) - 1)
+def ImmCheckComplexRot90_270    : ImmCheckType<10>; // [90,270]
+def ImmCheckComplexRotAll90     : ImmCheckType<11>; // [0, 90, 180,270]
 
 class ImmCheck<int arg, ImmCheckType kind, int eltSizeArg = -1> {
   int Arg = arg;
@@ -312,7 +320,19 @@ def SVQSHLU_M  : SInst<"svqshlu[_n_{d}]", "uPdi", "csil",         MergeOp1,  "aa
 // Floating-point arithmetic
 def SVTMAD  : SInst<"svtmad[_{d}]",  "dddi", "hfd", MergeNone, "aarch64_sve_ftmad_x", [], [ImmCheck<2, ImmCheck0_7>]>;
 
+def SVMLA_LANE  : SInst<"svmla_lane[_{d}]",  "ddddi",  "hfd", MergeNone, "aarch64_sve_fmla_lane", [], [ImmCheck<3, ImmCheckLaneIndex, 2>]>;
+def SVCMLA_LANE : SInst<"svcmla_lane[_{d}]", "ddddii", "hf",  MergeNone, "aarch64_sve_fcmla_lane", [], [ImmCheck<3, ImmCheckLaneIndexCompRotate, 2>,
+                                                                                                        ImmCheck<4, ImmCheckComplexRotAll90>]>;
+
+def SVCADD_M : SInst<"svcadd[_{d}]", "dPddi",  "hfd", MergeOp1,  "aarch64_sve_fcadd", [], [ImmCheck<3, ImmCheckComplexRot90_270>]>;
+def SVCMLA_M : SInst<"svcmla[_{d}]", "dPdddi", "hfd", MergeOp1,  "aarch64_sve_fcmla", [], [ImmCheck<4, ImmCheckComplexRotAll90>]>;
+
 ////////////////////////////////////////////////////////////////////////////////
 // Saturating scalar arithmetic
 def SVQDECH_S : SInst<"svqdech_pat[_{d}]",   "ddIi", "s", MergeNone, "aarch64_sve_sqdech", [], [ImmCheck<2, ImmCheck1_16>]>;
 def SVQDECH_U : SInst<"svqdech_pat[_{d}]",   "ddIi", "Us", MergeNone, "aarch64_sve_uqdech", [], [ImmCheck<2, ImmCheck1_16>]>;
+
+////////////////////////////////////////////////////////////////////////////////
+// Integer arithmetic
+def SVDOT_LANE_S : SInst<"svdot_lane[_{d}]",  "ddqqi",  "il",   MergeNone, "aarch64_sve_sdot_lane", [], [ImmCheck<3, ImmCheckLaneIndexDot, 2>]>;
+def SVDOT_LANE_U : SInst<"svdot_lane[_{d}]",  "ddqqi",  "UiUl", MergeNone, "aarch64_sve_udot_lane", [], [ImmCheck<3, ImmCheckLaneIndexDot, 2>]>;

diff  --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp
index c13d5fc5c3e2..559ad52e6d3a 100644
--- a/clang/lib/Sema/SemaChecking.cpp
+++ b/clang/lib/Sema/SemaChecking.cpp
@@ -2016,6 +2016,27 @@ bool Sema::CheckSVEBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) {
     int ArgNum, CheckTy, ElementSizeInBits;
     std::tie(ArgNum, CheckTy, ElementSizeInBits) = I;
 
+    typedef bool(*OptionSetCheckFnTy)(int64_t Value);
+
+    // Function that checks whether the operand (ArgNum) is an immediate
+    // that is one of the predefined values.
+    auto CheckImmediateInSet = [&](OptionSetCheckFnTy CheckImm,
+                                   int ErrDiag) -> bool {
+      // We can't check the value of a dependent argument.
+      Expr *Arg = TheCall->getArg(ArgNum);
+      if (Arg->isTypeDependent() || Arg->isValueDependent())
+        return false;
+
+      // Check constant-ness first.
+      llvm::APSInt Imm;
+      if (SemaBuiltinConstantArg(TheCall, ArgNum, Imm))
+        return true;
+
+      if (!CheckImm(Imm.getSExtValue()))
+        return Diag(TheCall->getBeginLoc(), ErrDiag) << Arg->getSourceRange();
+      return false;
+    };
+
     switch ((SVETypeFlags::ImmCheckType)CheckTy) {
     case SVETypeFlags::ImmCheck0_31:
       if (SemaBuiltinConstantArgRange(TheCall, ArgNum, 0, 31))
@@ -2048,6 +2069,34 @@ bool Sema::CheckSVEBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) {
                                       ElementSizeInBits - 1))
         HasError = true;
       break;
+    case SVETypeFlags::ImmCheckLaneIndex:
+      if (SemaBuiltinConstantArgRange(TheCall, ArgNum, 0,
+                                      (128 / (1 * ElementSizeInBits)) - 1))
+        HasError = true;
+      break;
+    case SVETypeFlags::ImmCheckLaneIndexCompRotate:
+      if (SemaBuiltinConstantArgRange(TheCall, ArgNum, 0,
+                                      (128 / (2 * ElementSizeInBits)) - 1))
+        HasError = true;
+      break;
+    case SVETypeFlags::ImmCheckLaneIndexDot:
+      if (SemaBuiltinConstantArgRange(TheCall, ArgNum, 0,
+                                      (128 / (4 * ElementSizeInBits)) - 1))
+        HasError = true;
+      break;
+    case SVETypeFlags::ImmCheckComplexRot90_270:
+      if (CheckImmediateInSet([](int64_t V) { return V == 90 || V == 270; },
+                              diag::err_rotation_argument_to_cadd))
+        HasError = true;
+      break;
+    case SVETypeFlags::ImmCheckComplexRotAll90:
+      if (CheckImmediateInSet(
+              [](int64_t V) {
+                return V == 0 || V == 90 || V == 180 || V == 270;
+              },
+              diag::err_rotation_argument_to_cmla))
+        HasError = true;
+      break;
     }
   }
 

diff  --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_cmla.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_cmla.c
new file mode 100644
index 000000000000..454d834d23bd
--- /dev/null
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_cmla.c
@@ -0,0 +1,70 @@
+// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
+
+#include <arm_sve.h>
+
+#ifdef SVE_OVERLOADED_FORMS
+// A simple used,unused... macro, long enough to represent any SVE builtin.
+#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
+#else
+#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4
+#endif
+
+svfloat16_t test_svcmla_f16_m(svbool_t pg, svfloat16_t op1, svfloat16_t op2, svfloat16_t op3)
+{
+  // CHECK-LABEL: test_svcmla_f16_m
+  // CHECK: %[[PG:.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %pg)
+  // CHECK: %[[INTRINSIC:.*]] = call <vscale x 8 x half> @llvm.aarch64.sve.fcmla.nxv8f16(<vscale x 8 x i1> %[[PG]], <vscale x 8 x half> %op1, <vscale x 8 x half> %op2, <vscale x 8 x half> %op3, i32 180)
+  // CHECK: ret <vscale x 8 x half> %[[INTRINSIC]]
+  return SVE_ACLE_FUNC(svcmla,_f16,_m,)(pg, op1, op2, op3, 180);
+}
+
+svfloat32_t test_svcmla_f32_m(svbool_t pg, svfloat32_t op1, svfloat32_t op2, svfloat32_t op3)
+{
+  // CHECK-LABEL: test_svcmla_f32_m
+  // CHECK: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK: %[[INTRINSIC:.*]] = call <vscale x 4 x float> @llvm.aarch64.sve.fcmla.nxv4f32(<vscale x 4 x i1> %[[PG]], <vscale x 4 x float> %op1, <vscale x 4 x float> %op2, <vscale x 4 x float> %op3, i32 270)
+  // CHECK: ret <vscale x 4 x float> %[[INTRINSIC]]
+  return SVE_ACLE_FUNC(svcmla,_f32,_m,)(pg, op1, op2, op3, 270);
+}
+
+svfloat64_t test_svcmla_f64_m(svbool_t pg, svfloat64_t op1, svfloat64_t op2, svfloat64_t op3)
+{
+  // CHECK-LABEL: test_svcmla_f64_m
+  // CHECK: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: %[[INTRINSIC:.*]] = call <vscale x 2 x double> @llvm.aarch64.sve.fcmla.nxv2f64(<vscale x 2 x i1> %[[PG]], <vscale x 2 x double> %op1, <vscale x 2 x double> %op2, <vscale x 2 x double> %op3, i32 0)
+  // CHECK: ret <vscale x 2 x double> %[[INTRINSIC]]
+  return SVE_ACLE_FUNC(svcmla,_f64,_m,)(pg, op1, op2, op3, 0);
+}
+
+svfloat16_t test_svcmla_lane_f16(svfloat16_t op1, svfloat16_t op2, svfloat16_t op3)
+{
+  // CHECK-LABEL: test_svcmla_lane_f16
+  // CHECK: %[[INTRINSIC:.*]] = call <vscale x 8 x half> @llvm.aarch64.sve.fcmla.lane.nxv8f16(<vscale x 8 x half> %op1, <vscale x 8 x half> %op2, <vscale x 8 x half> %op3, i32 0, i32 0)
+  // CHECK: ret <vscale x 8 x half> %[[INTRINSIC]]
+  return SVE_ACLE_FUNC(svcmla_lane,_f16,,)(op1, op2, op3, 0, 0);
+}
+
+svfloat16_t test_svcmla_lane_f16_1(svfloat16_t op1, svfloat16_t op2, svfloat16_t op3)
+{
+  // CHECK-LABEL: test_svcmla_lane_f16_1
+  // CHECK: %[[INTRINSIC:.*]] = call <vscale x 8 x half> @llvm.aarch64.sve.fcmla.lane.nxv8f16(<vscale x 8 x half> %op1, <vscale x 8 x half> %op2, <vscale x 8 x half> %op3, i32 3, i32 90)
+  // CHECK: ret <vscale x 8 x half> %[[INTRINSIC]]
+  return SVE_ACLE_FUNC(svcmla_lane,_f16,,)(op1, op2, op3, 3, 90);
+}
+
+svfloat32_t test_svcmla_lane_f32(svfloat32_t op1, svfloat32_t op2, svfloat32_t op3)
+{
+  // CHECK-LABEL: test_svcmla_lane_f32
+  // CHECK: %[[INTRINSIC:.*]] = call <vscale x 4 x float> @llvm.aarch64.sve.fcmla.lane.nxv4f32(<vscale x 4 x float> %op1, <vscale x 4 x float> %op2, <vscale x 4 x float> %op3, i32 0, i32 180)
+  // CHECK: ret <vscale x 4 x float> %[[INTRINSIC]]
+  return SVE_ACLE_FUNC(svcmla_lane,_f32,,)(op1, op2, op3, 0, 180);
+}
+
+svfloat32_t test_svcmla_lane_f32_1(svfloat32_t op1, svfloat32_t op2, svfloat32_t op3)
+{
+  // CHECK-LABEL: test_svcmla_lane_f32_1
+  // CHECK: %[[INTRINSIC:.*]] = call <vscale x 4 x float> @llvm.aarch64.sve.fcmla.lane.nxv4f32(<vscale x 4 x float> %op1, <vscale x 4 x float> %op2, <vscale x 4 x float> %op3, i32 1, i32 270)
+  // CHECK: ret <vscale x 4 x float> %[[INTRINSIC]]
+  return SVE_ACLE_FUNC(svcmla_lane,_f32,,)(op1, op2, op3, 1, 270);
+}

diff  --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_dot.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_dot.c
new file mode 100644
index 000000000000..94e67604fae2
--- /dev/null
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_dot.c
@@ -0,0 +1,59 @@
+// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
+
+#include <arm_sve.h>
+
+#ifdef SVE_OVERLOADED_FORMS
+// A simple used,unused... macro, long enough to represent any SVE builtin.
+#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
+#else
+#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4
+#endif
+
+svint32_t test_svdot_lane_s32(svint32_t op1, svint8_t op2, svint8_t op3)
+{
+  // CHECK-LABEL: test_svdot_lane_s32
+  // CHECK: %[[INTRINSIC:.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.sdot.lane.nxv4i32(<vscale x 4 x i32> %op1, <vscale x 16 x i8> %op2, <vscale x 16 x i8> %op3, i32 0)
+  // CHECK: ret <vscale x 4 x i32> %[[INTRINSIC]]
+  return SVE_ACLE_FUNC(svdot_lane,_s32,,)(op1, op2, op3, 0);
+}
+
+svint32_t test_svdot_lane_s32_1(svint32_t op1, svint8_t op2, svint8_t op3)
+{
+  // CHECK-LABEL: test_svdot_lane_s32_1
+  // CHECK: %[[INTRINSIC:.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.sdot.lane.nxv4i32(<vscale x 4 x i32> %op1, <vscale x 16 x i8> %op2, <vscale x 16 x i8> %op3, i32 3)
+  // CHECK: ret <vscale x 4 x i32> %[[INTRINSIC]]
+  return SVE_ACLE_FUNC(svdot_lane,_s32,,)(op1, op2, op3, 3);
+}
+
+svint64_t test_svdot_lane_s64(svint64_t op1, svint16_t op2, svint16_t op3)
+{
+  // CHECK-LABEL: test_svdot_lane_s64
+  // CHECK: %[[INTRINSIC:.*]] = call <vscale x 2 x i64> @llvm.aarch64.sve.sdot.lane.nxv2i64(<vscale x 2 x i64> %op1, <vscale x 8 x i16> %op2, <vscale x 8 x i16> %op3, i32 0)
+  // CHECK: ret <vscale x 2 x i64> %[[INTRINSIC]]
+  return SVE_ACLE_FUNC(svdot_lane,_s64,,)(op1, op2, op3, 0);
+}
+
+svint64_t test_svdot_lane_s64_1(svint64_t op1, svint16_t op2, svint16_t op3)
+{
+  // CHECK-LABEL: test_svdot_lane_s64_1
+  // CHECK: %[[INTRINSIC:.*]] = call <vscale x 2 x i64> @llvm.aarch64.sve.sdot.lane.nxv2i64(<vscale x 2 x i64> %op1, <vscale x 8 x i16> %op2, <vscale x 8 x i16> %op3, i32 1)
+  // CHECK: ret <vscale x 2 x i64> %[[INTRINSIC]]
+  return SVE_ACLE_FUNC(svdot_lane,_s64,,)(op1, op2, op3, 1);
+}
+
+svuint32_t test_svdot_lane_u32(svuint32_t op1, svuint8_t op2, svuint8_t op3)
+{
+  // CHECK-LABEL: test_svdot_lane_u32
+  // CHECK: %[[INTRINSIC:.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.udot.lane.nxv4i32(<vscale x 4 x i32> %op1, <vscale x 16 x i8> %op2, <vscale x 16 x i8> %op3, i32 3)
+  // CHECK: ret <vscale x 4 x i32> %[[INTRINSIC]]
+  return SVE_ACLE_FUNC(svdot_lane,_u32,,)(op1, op2, op3, 3);
+}
+
+svuint64_t test_svdot_lane_u64(svuint64_t op1, svuint16_t op2, svuint16_t op3)
+{
+  // CHECK-LABEL: test_svdot_lane_u64
+  // CHECK: %[[INTRINSIC:.*]] = call <vscale x 2 x i64> @llvm.aarch64.sve.udot.lane.nxv2i64(<vscale x 2 x i64> %op1, <vscale x 8 x i16> %op2, <vscale x 8 x i16> %op3, i32 1)
+  // CHECK: ret <vscale x 2 x i64> %[[INTRINSIC]]
+  return SVE_ACLE_FUNC(svdot_lane,_u64,,)(op1, op2, op3, 1);
+}

diff  --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_mla.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_mla.c
new file mode 100644
index 000000000000..8e5e354b9157
--- /dev/null
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_mla.c
@@ -0,0 +1,59 @@
+// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
+
+#include <arm_sve.h>
+
+#ifdef SVE_OVERLOADED_FORMS
+// A simple used,unused... macro, long enough to represent any SVE builtin.
+#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
+#else
+#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4
+#endif
+
+svfloat16_t test_svmla_lane_f16(svfloat16_t op1, svfloat16_t op2, svfloat16_t op3)
+{
+  // CHECK-LABEL: test_svmla_lane_f16
+  // CHECK: %[[INTRINSIC:.*]] = call <vscale x 8 x half> @llvm.aarch64.sve.fmla.lane.nxv8f16(<vscale x 8 x half> %op1, <vscale x 8 x half> %op2, <vscale x 8 x half> %op3, i32 0)
+  // CHECK: ret <vscale x 8 x half> %[[INTRINSIC]]
+  return SVE_ACLE_FUNC(svmla_lane,_f16,,)(op1, op2, op3, 0);
+}
+
+svfloat16_t test_svmla_lane_f16_1(svfloat16_t op1, svfloat16_t op2, svfloat16_t op3)
+{
+  // CHECK-LABEL: test_svmla_lane_f16_1
+  // CHECK: %[[INTRINSIC:.*]] = call <vscale x 8 x half> @llvm.aarch64.sve.fmla.lane.nxv8f16(<vscale x 8 x half> %op1, <vscale x 8 x half> %op2, <vscale x 8 x half> %op3, i32 7)
+  // CHECK: ret <vscale x 8 x half> %[[INTRINSIC]]
+  return SVE_ACLE_FUNC(svmla_lane,_f16,,)(op1, op2, op3, 7);
+}
+
+svfloat32_t test_svmla_lane_f32(svfloat32_t op1, svfloat32_t op2, svfloat32_t op3)
+{
+  // CHECK-LABEL: test_svmla_lane_f32
+  // CHECK: %[[INTRINSIC:.*]] = call <vscale x 4 x float> @llvm.aarch64.sve.fmla.lane.nxv4f32(<vscale x 4 x float> %op1, <vscale x 4 x float> %op2, <vscale x 4 x float> %op3, i32 0)
+  // CHECK: ret <vscale x 4 x float> %[[INTRINSIC]]
+  return SVE_ACLE_FUNC(svmla_lane,_f32,,)(op1, op2, op3, 0);
+}
+
+svfloat32_t test_svmla_lane_f32_1(svfloat32_t op1, svfloat32_t op2, svfloat32_t op3)
+{
+  // CHECK-LABEL: test_svmla_lane_f32_1
+  // CHECK: %[[INTRINSIC:.*]] = call <vscale x 4 x float> @llvm.aarch64.sve.fmla.lane.nxv4f32(<vscale x 4 x float> %op1, <vscale x 4 x float> %op2, <vscale x 4 x float> %op3, i32 3)
+  // CHECK: ret <vscale x 4 x float> %[[INTRINSIC]]
+  return SVE_ACLE_FUNC(svmla_lane,_f32,,)(op1, op2, op3, 3);
+}
+
+svfloat64_t test_svmla_lane_f64(svfloat64_t op1, svfloat64_t op2, svfloat64_t op3)
+{
+  // CHECK-LABEL: test_svmla_lane_f64
+  // CHECK: %[[INTRINSIC:.*]] = call <vscale x 2 x double> @llvm.aarch64.sve.fmla.lane.nxv2f64(<vscale x 2 x double> %op1, <vscale x 2 x double> %op2, <vscale x 2 x double> %op3, i32 0)
+  // CHECK: ret <vscale x 2 x double> %[[INTRINSIC]]
+  return SVE_ACLE_FUNC(svmla_lane,_f64,,)(op1, op2, op3, 0);
+}
+
+svfloat64_t test_svmla_lane_f64_1(svfloat64_t op1, svfloat64_t op2, svfloat64_t op3)
+{
+  // CHECK-LABEL: test_svmla_lane_f64_1
+  // CHECK: %[[INTRINSIC:.*]] = call <vscale x 2 x double> @llvm.aarch64.sve.fmla.lane.nxv2f64(<vscale x 2 x double> %op1, <vscale x 2 x double> %op2, <vscale x 2 x double> %op3, i32 1)
+  // CHECK: ret <vscale x 2 x double> %[[INTRINSIC]]
+  return SVE_ACLE_FUNC(svmla_lane,_f64,,)(op1, op2, op3, 1);
+}

diff  --git a/clang/test/CodeGen/aarch64-sve-intrinsics/negative/acle_sve_cadd.c b/clang/test/CodeGen/aarch64-sve-intrinsics/negative/acle_sve_cadd.c
new file mode 100644
index 000000000000..1f68f6b19438
--- /dev/null
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/negative/acle_sve_cadd.c
@@ -0,0 +1,29 @@
+// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -fsyntax-only -verify %s
+// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -fsyntax-only -verify %s
+
+#ifdef SVE_OVERLOADED_FORMS
+// A simple used,unused... macro, long enough to represent any SVE builtin.
+#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
+#else
+#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4
+#endif
+
+#include <arm_sve.h>
+
+svfloat16_t test_svcadd_f16_m(svbool_t pg, svfloat16_t op1, svfloat16_t op2)
+{
+  // expected-error at +1 {{argument should be the value 90 or 270}}
+  return SVE_ACLE_FUNC(svcadd,_f16,_m,)(pg, op1, op2, 0);
+}
+
+svfloat32_t test_svcadd_f32_m(svbool_t pg, svfloat32_t op1, svfloat32_t op2)
+{
+  // expected-error at +1 {{argument should be the value 90 or 270}}
+  return SVE_ACLE_FUNC(svcadd,_f32,_m,)(pg, op1, op2, 0);
+}
+
+svfloat64_t test_svcadd_f64_m(svbool_t pg, svfloat64_t op1, svfloat64_t op2)
+{
+  // expected-error at +1 {{argument should be the value 90 or 270}}
+  return SVE_ACLE_FUNC(svcadd,_f64,_m,)(pg, op1, op2, 0);
+}

diff  --git a/clang/test/CodeGen/aarch64-sve-intrinsics/negative/acle_sve_cmla.c b/clang/test/CodeGen/aarch64-sve-intrinsics/negative/acle_sve_cmla.c
new file mode 100644
index 000000000000..94f36260ba37
--- /dev/null
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/negative/acle_sve_cmla.c
@@ -0,0 +1,107 @@
+// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -fsyntax-only -verify %s
+// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -fsyntax-only -verify %s
+
+#ifdef SVE_OVERLOADED_FORMS
+// A simple used,unused... macro, long enough to represent any SVE builtin.
+#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
+#else
+#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4
+#endif
+
+#include <arm_sve.h>
+
+svfloat16_t test_svcmla_f16_m(svbool_t pg, svfloat16_t op1, svfloat16_t op2, svfloat16_t op3)
+{
+  // expected-error at +1 {{argument should be the value 0, 90, 180 or 270}}
+  return SVE_ACLE_FUNC(svcmla,_f16,_m,)(pg, op1, op2, op3, 19);
+}
+
+svfloat32_t test_svcmla_f32_m(svbool_t pg, svfloat32_t op1, svfloat32_t op2, svfloat32_t op3)
+{
+  // expected-error at +1 {{argument should be the value 0, 90, 180 or 270}}
+  return SVE_ACLE_FUNC(svcmla,_f32,_m,)(pg, op1, op2, op3, 19);
+}
+
+svfloat64_t test_svcmla_f64_m(svbool_t pg, svfloat64_t op1, svfloat64_t op2, svfloat64_t op3)
+{
+  // expected-error at +1 {{argument should be the value 0, 90, 180 or 270}}
+  return SVE_ACLE_FUNC(svcmla,_f64,_m,)(pg, op1, op2, op3, 19);
+}
+
+svfloat16_t test_svcmla_lane_f16(svfloat16_t op1, svfloat16_t op2, svfloat16_t op3)
+{
+  // expected-error at +1 {{argument value 18446744073709551615 is outside the valid range [0, 3]}}
+  return SVE_ACLE_FUNC(svcmla_lane,_f16,,)(op1, op2, op3, -1, 0);
+}
+
+svfloat16_t test_svcmla_lane_f16_1(svfloat16_t op1, svfloat16_t op2, svfloat16_t op3)
+{
+  // expected-error at +1 {{argument value 18446744073709551615 is outside the valid range [0, 3]}}
+  return SVE_ACLE_FUNC(svcmla_lane,_f16,,)(op1, op2, op3, -1, 90);
+}
+
+svfloat16_t test_svcmla_lane_f16_2(svfloat16_t op1, svfloat16_t op2, svfloat16_t op3)
+{
+  // expected-error at +1 {{argument value 18446744073709551615 is outside the valid range [0, 3]}}
+  return SVE_ACLE_FUNC(svcmla_lane,_f16,,)(op1, op2, op3, -1, 180);
+}
+
+svfloat16_t test_svcmla_lane_f16_3(svfloat16_t op1, svfloat16_t op2, svfloat16_t op3)
+{
+  // expected-error at +1 {{argument value 18446744073709551615 is outside the valid range [0, 3]}}
+  return SVE_ACLE_FUNC(svcmla_lane,_f16,,)(op1, op2, op3, -1, 270);
+}
+
+svfloat16_t test_svcmla_lane_f16_4(svfloat16_t op1, svfloat16_t op2, svfloat16_t op3)
+{
+  // expected-error at +1 {{argument should be the value 0, 90, 180 or 270}}
+  return SVE_ACLE_FUNC(svcmla_lane,_f16,,)(op1, op2, op3, 0, 19);
+}
+
+svfloat16_t test_svcmla_lane_f16_5(svfloat16_t op1, svfloat16_t op2, svfloat16_t op3)
+{
+  // expected-error at +1 {{argument should be the value 0, 90, 180 or 270}}
+  return SVE_ACLE_FUNC(svcmla_lane,_f16,,)(op1, op2, op3, 1, 19);
+}
+
+svfloat16_t test_svcmla_lane_f16_6(svfloat16_t op1, svfloat16_t op2, svfloat16_t op3)
+{
+  // expected-error at +1 {{argument should be the value 0, 90, 180 or 270}}
+  return SVE_ACLE_FUNC(svcmla_lane,_f16,,)(op1, op2, op3, 2, 19);
+}
+
+svfloat16_t test_svcmla_lane_f16_7(svfloat16_t op1, svfloat16_t op2, svfloat16_t op3)
+{
+  // expected-error at +1 {{argument should be the value 0, 90, 180 or 270}}
+  return SVE_ACLE_FUNC(svcmla_lane,_f16,,)(op1, op2, op3, 3, 19);
+}
+
+svfloat16_t test_svcmla_lane_f16_8(svfloat16_t op1, svfloat16_t op2, svfloat16_t op3)
+{
+  // expected-error at +1 {{argument value 4 is outside the valid range [0, 3]}}
+  return SVE_ACLE_FUNC(svcmla_lane,_f16,,)(op1, op2, op3, 4, 0);
+}
+
+svfloat32_t test_svcmla_lane_f32(svfloat32_t op1, svfloat32_t op2, svfloat32_t op3)
+{
+  // expected-error at +1 {{argument value 18446744073709551615 is outside the valid range [0, 1]}}
+  return SVE_ACLE_FUNC(svcmla_lane,_f32,,)(op1, op2, op3, -1, 0);
+}
+
+svfloat32_t test_svcmla_lane_f32_1(svfloat32_t op1, svfloat32_t op2, svfloat32_t op3)
+{
+  // expected-error at +1 {{argument should be the value 0, 90, 180 or 270}}
+  return SVE_ACLE_FUNC(svcmla_lane,_f32,,)(op1, op2, op3, 0, 19);
+}
+
+svfloat32_t test_svcmla_lane_f32_2(svfloat32_t op1, svfloat32_t op2, svfloat32_t op3)
+{
+  // expected-error at +1 {{argument should be the value 0, 90, 180 or 270}}
+  return SVE_ACLE_FUNC(svcmla_lane,_f32,,)(op1, op2, op3, 1, 19);
+}
+
+svfloat32_t test_svcmla_lane_f32_3(svfloat32_t op1, svfloat32_t op2, svfloat32_t op3)
+{
+  // expected-error at +1 {{argument value 2 is outside the valid range [0, 1]}}
+  return SVE_ACLE_FUNC(svcmla_lane,_f32,,)(op1, op2, op3, 2, 0);
+}

diff  --git a/clang/test/CodeGen/aarch64-sve-intrinsics/negative/acle_sve_dot.c b/clang/test/CodeGen/aarch64-sve-intrinsics/negative/acle_sve_dot.c
new file mode 100644
index 000000000000..9ea5abfcb410
--- /dev/null
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/negative/acle_sve_dot.c
@@ -0,0 +1,47 @@
+// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -fsyntax-only -verify %s
+// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -fsyntax-only -verify %s
+
+#ifdef SVE_OVERLOADED_FORMS
+// A simple used,unused... macro, long enough to represent any SVE builtin.
+#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
+#else
+#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4
+#endif
+
+#include <arm_sve.h>
+
+svint32_t test_svdot_lane_s32(svint32_t op1, svint8_t op2, svint8_t op3)
+{
+  // expected-error-re at +1 {{argument value {{[0-9]+}} is outside the valid range [0, 3]}}
+  return SVE_ACLE_FUNC(svdot_lane,_s32,,)(op1, op2, op3, -1);
+}
+
+svint32_t test_svdot_lane_s32_1(svint32_t op1, svint8_t op2, svint8_t op3)
+{
+  // expected-error-re at +1 {{argument value {{[0-9]+}} is outside the valid range [0, 3]}}
+  return SVE_ACLE_FUNC(svdot_lane,_s32,,)(op1, op2, op3, 4);
+}
+
+svint64_t test_svdot_lane_s64(svint64_t op1, svint16_t op2, svint16_t op3)
+{
+  // expected-error-re at +1 {{argument value {{[0-9]+}} is outside the valid range [0, 1]}}
+  return SVE_ACLE_FUNC(svdot_lane,_s64,,)(op1, op2, op3, -1);
+}
+
+svint64_t test_svdot_lane_s64_1(svint64_t op1, svint16_t op2, svint16_t op3)
+{
+  // expected-error-re at +1 {{argument value {{[0-9]+}} is outside the valid range [0, 1]}}
+  return SVE_ACLE_FUNC(svdot_lane,_s64,,)(op1, op2, op3, 2);
+}
+
+svuint32_t test_svdot_lane_u32(svuint32_t op1, svuint8_t op2, svuint8_t op3)
+{
+  // expected-error-re at +1 {{argument value {{[0-9]+}} is outside the valid range [0, 3]}}
+  return SVE_ACLE_FUNC(svdot_lane,_u32,,)(op1, op2, op3, 4);
+}
+
+svuint64_t test_svdot_lane_u64(svuint64_t op1, svuint16_t op2, svuint16_t op3)
+{
+  // expected-error-re at +1 {{argument value {{[0-9]+}} is outside the valid range [0, 1]}}
+  return SVE_ACLE_FUNC(svdot_lane,_u64,,)(op1, op2, op3, 2);
+}

diff  --git a/clang/test/CodeGen/aarch64-sve-intrinsics/negative/acle_sve_mla.c b/clang/test/CodeGen/aarch64-sve-intrinsics/negative/acle_sve_mla.c
new file mode 100644
index 000000000000..62249694f23e
--- /dev/null
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/negative/acle_sve_mla.c
@@ -0,0 +1,29 @@
+// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -fsyntax-only -verify %s
+// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -fsyntax-only -verify %s
+
+#ifdef SVE_OVERLOADED_FORMS
+// A simple used,unused... macro, long enough to represent any SVE builtin.
+#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
+#else
+#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4
+#endif
+
+#include <arm_sve.h>
+
+svfloat16_t test_svmla_lane_f16(svfloat16_t op1, svfloat16_t op2, svfloat16_t op3)
+{
+  // expected-error-re at +1 {{argument value {{[0-9]+}} is outside the valid range [0, 7]}}
+  return SVE_ACLE_FUNC(svmla_lane,_f16,,)(op1, op2, op3, 8);
+}
+
+svfloat32_t test_svmla_lane_f32(svfloat32_t op1, svfloat32_t op2, svfloat32_t op3)
+{
+  // expected-error-re at +1 {{argument value {{[0-9]+}} is outside the valid range [0, 3]}}
+  return SVE_ACLE_FUNC(svmla_lane,_f32,,)(op1, op2, op3, -1);
+}
+
+svfloat64_t test_svmla_lane_f64(svfloat64_t op1, svfloat64_t op2, svfloat64_t op3)
+{
+  // expected-error-re at +1 {{argument value {{[0-9]+}} is outside the valid range [0, 1]}}
+  return SVE_ACLE_FUNC(svmla_lane,_f64,,)(op1, op2, op3, 2);
+}

diff  --git a/clang/utils/TableGen/SveEmitter.cpp b/clang/utils/TableGen/SveEmitter.cpp
index 03fe8228dd86..1e01f611bfa2 100644
--- a/clang/utils/TableGen/SveEmitter.cpp
+++ b/clang/utils/TableGen/SveEmitter.cpp
@@ -456,9 +456,19 @@ void SVEType::applyModifier(char Mod) {
     Bitwidth = ElementBitwidth;
     NumVectors = 0;
     break;
+  case 'e':
+    Signed = false;
+    ElementBitwidth /= 2;
+    break;
   case 'h':
     ElementBitwidth /= 2;
     break;
+  case 'q':
+    ElementBitwidth /= 4;
+    break;
+  case 'o':
+    ElementBitwidth *= 4;
+    break;
   case 'P':
     Signed = true;
     Float = false;