[clang] 3cb8b4c - [SveEmitter] Add builtins for SVE2 Polynomial arithmetic

Thu May 7 03:53:24 PDT 2020

Author: Sander de Smalen
Date: 2020-05-07T11:53:04+01:00
New Revision: 3cb8b4c193c1904543511dfe892475c4e733a778

URL: https://github.com/llvm/llvm-project/commit/3cb8b4c193c1904543511dfe892475c4e733a778
DIFF: https://github.com/llvm/llvm-project/commit/3cb8b4c193c1904543511dfe892475c4e733a778.diff

LOG: [SveEmitter] Add builtins for SVE2 Polynomial arithmetic

This patch adds builtins for:
- sveorbt
- sveortb
- svpmul
- svpmullb, svpmullb_pair
- svpmullt, svpmullt_pair

The svpmullb and svpmullt builtins are expressed using the svpmullb_pair
and svpmullt_pair LLVM IR intrinsics, respectively.

Reviewers: SjoerdMeijer, efriedma, rengolin

Reviewed By: efriedma

Differential Revision: https://reviews.llvm.org/D79480

Added: 
    clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_eorbt.c
    clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_eortb.c
    clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_pmul.c
    clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_pmullb.c
    clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_pmullt.c

Modified: 
    clang/include/clang/Basic/arm_sve.td
    clang/lib/CodeGen/CGBuiltin.cpp
    clang/lib/CodeGen/CodeGenFunction.h
    clang/utils/TableGen/SveEmitter.cpp

Removed: 
    


################################################################################
diff  --git a/clang/include/clang/Basic/arm_sve.td b/clang/include/clang/Basic/arm_sve.td
index 2d2a09d4524d..32273c3250ae 100644

--- a/clang/include/clang/Basic/arm_sve.td
+++ b/clang/include/clang/Basic/arm_sve.td
@@ -66,6 +66,7 @@
 // P: predicate type
 // s: scalar of element type
 // a: scalar of element type (splat to vector type)
+// R: scalar of 1/2 width element type (splat to vector type)
 // e: 1/2 width unsigned elements, 2x element count
 // h: 1/2 width elements, 2x element count
 // q: 1/4 width elements, 4x element count
@@ -1319,6 +1320,26 @@ def SVSTNT1H_SCATTER_INDEX_S : MInst<"svstnt1h_scatter[_{2}base]_index[_{d}]", "
 def SVSTNT1W_SCATTER_INDEX_S : MInst<"svstnt1w_scatter[_{2}base]_index[_{d}]", "vPuld", "lUl",      [IsScatterStore], MemEltTyInt32,   "aarch64_sve_stnt1_scatter_scalar_offset">;
 }
 
+////////////////////////////////////////////////////////////////////////////////
+// SVE2 - Polynomial arithmetic
+
+let ArchGuard = "defined(__ARM_FEATURE_SVE2)" in {
+def SVEORBT         : SInst<"sveorbt[_{d}]",         "dddd", "csilUcUsUiUl", MergeNone, "aarch64_sve_eorbt">;
+def SVEORBT_N       : SInst<"sveorbt[_n_{d}]",       "ddda", "csilUcUsUiUl", MergeNone, "aarch64_sve_eorbt">;
+def SVEORTB         : SInst<"sveortb[_{d}]",         "dddd", "csilUcUsUiUl", MergeNone, "aarch64_sve_eortb">;
+def SVEORTB_N       : SInst<"sveortb[_n_{d}]",       "ddda", "csilUcUsUiUl", MergeNone, "aarch64_sve_eortb">;
+def SVPMUL          : SInst<"svpmul[_{d}]",          "ddd",  "Uc",           MergeNone, "aarch64_sve_pmul">;
+def SVPMUL_N        : SInst<"svpmul[_n_{d}]",        "dda",  "Uc",           MergeNone, "aarch64_sve_pmul">;
+def SVPMULLB        : SInst<"svpmullb[_{d}]",        "dhh",  "UsUl",         MergeNone>;
+def SVPMULLB_N      : SInst<"svpmullb[_n_{d}]",      "dhR",  "UsUl",         MergeNone>;
+def SVPMULLB_PAIR   : SInst<"svpmullb_pair[_{d}]",   "ddd",  "UcUi",         MergeNone, "aarch64_sve_pmullb_pair">;
+def SVPMULLB_PAIR_N : SInst<"svpmullb_pair[_n_{d}]", "dda",  "UcUi",         MergeNone, "aarch64_sve_pmullb_pair">;
+def SVPMULLT        : SInst<"svpmullt[_{d}]",        "dhh",  "UsUl",         MergeNone>;
+def SVPMULLT_N      : SInst<"svpmullt[_n_{d}]",      "dhR",  "UsUl",         MergeNone>;
+def SVPMULLT_PAIR   : SInst<"svpmullt_pair[_{d}]",   "ddd",  "UcUi",         MergeNone, "aarch64_sve_pmullt_pair">;
+def SVPMULLT_PAIR_N : SInst<"svpmullt_pair[_n_{d}]", "dda",  "UcUi",         MergeNone, "aarch64_sve_pmullt_pair">;
+}
+
 ////////////////////////////////////////////////////////////////////////////////
 // SVE2 - Contiguous conflict detection
 let ArchGuard = "defined(__ARM_FEATURE_SVE2)" in {

diff  --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 112a0ee7752f..dbe8826454dc 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -7803,6 +7803,27 @@ Value *CodeGenFunction::EmitSVEGatherPrefetch(SVETypeFlags TypeFlags,
   return Builder.CreateCall(F, Ops);
 }
 
+// SVE2's svpmullb and svpmullt builtins are similar to the svpmullb_pair and
+// svpmullt_pair intrinsics, with the exception that their results are bitcast
+// to a wider type.
+Value *CodeGenFunction::EmitSVEPMull(SVETypeFlags TypeFlags,
+                                     SmallVectorImpl<Value *> &Ops,
+                                     unsigned BuiltinID) {
+  // Splat scalar operand to vector (intrinsics with _n infix)
+  if (TypeFlags.hasSplatOperand()) {
+    unsigned OpNo = TypeFlags.getSplatOperand();
+    Ops[OpNo] = EmitSVEDupX(Ops[OpNo]);
+  }
+
+  // The pair-wise function has a narrower overloaded type.
+  Function *F = CGM.getIntrinsic(BuiltinID, Ops[0]->getType());
+  Value *Call = Builder.CreateCall(F, {Ops[0], Ops[1]});
+
+  // Now bitcast to the wider result type.
+  llvm::ScalableVectorType *Ty = getSVEType(TypeFlags);
+  return EmitSVEReinterpret(Call, Ty);
+}
+
 Value *CodeGenFunction::EmitSVEPrefetchLoad(SVETypeFlags TypeFlags,
                                             SmallVectorImpl<Value *> &Ops,
                                             unsigned BuiltinID) {
@@ -7887,6 +7908,16 @@ Value *CodeGenFunction::EmitSVEDupX(Value* Scalar) {
   return Builder.CreateCall(F, Scalar);
 }
 
+Value *CodeGenFunction::EmitSVEReinterpret(Value *Val, llvm::Type *Ty) {
+  // FIXME: For big endian this needs an additional REV, or needs a separate
+  // intrinsic that is code-generated as a no-op, because the LLVM bitcast
+  // instruction is defined as 'bitwise' equivalent from memory point of
+  // view (when storing/reloading), whereas the svreinterpret builtin
+  // implements bitwise equivalent cast from register point of view.
+  // LLVM CodeGen for a bitcast must add an explicit REV for big-endian.
+  return Builder.CreateBitCast(Val, Ty);
+}
+
 static void InsertExplicitZeroOperand(CGBuilderTy &Builder, llvm::Type *Ty,
                                       SmallVectorImpl<Value *> &Ops) {
   auto *SplatZero = Constant::getNullValue(Ty);
@@ -7932,13 +7963,7 @@ Value *CodeGenFunction::EmitAArch64SVEBuiltinExpr(unsigned BuiltinID,
   if (BuiltinID >= SVE::BI__builtin_sve_reinterpret_s8_s8 &&
       BuiltinID <= SVE::BI__builtin_sve_reinterpret_f64_f64) {
     Value *Val = EmitScalarExpr(E->getArg(0));
-    // FIXME: For big endian this needs an additional REV, or needs a separate
-    // intrinsic that is code-generated as a no-op, because the LLVM bitcast
-    // instruction is defined as 'bitwise' equivalent from memory point of
-    // view (when storing/reloading), whereas the svreinterpret builtin
-    // implements bitwise equivalent cast from register point of view.
-    // LLVM CodeGen for a bitcast must add an explicit REV for big-endian.
-    return Builder.CreateBitCast(Val, Ty);
+    return EmitSVEReinterpret(Val, Ty);
   }
 
   llvm::SmallVector<Value *, 4> Ops;
@@ -8045,6 +8070,18 @@ Value *CodeGenFunction::EmitAArch64SVEBuiltinExpr(unsigned BuiltinID,
     return Builder.CreateCall(F, {Ops[0], Ops[1], Ops[0]});
   }
 
+  case SVE::BI__builtin_sve_svpmullt_u16:
+  case SVE::BI__builtin_sve_svpmullt_u64:
+  case SVE::BI__builtin_sve_svpmullt_n_u16:
+  case SVE::BI__builtin_sve_svpmullt_n_u64:
+    return EmitSVEPMull(TypeFlags, Ops, Intrinsic::aarch64_sve_pmullt_pair);
+
+  case SVE::BI__builtin_sve_svpmullb_u16:
+  case SVE::BI__builtin_sve_svpmullb_u64:
+  case SVE::BI__builtin_sve_svpmullb_n_u16:
+  case SVE::BI__builtin_sve_svpmullb_n_u64:
+    return EmitSVEPMull(TypeFlags, Ops, Intrinsic::aarch64_sve_pmullb_pair);
+
   case SVE::BI__builtin_sve_svdupq_n_b8:
   case SVE::BI__builtin_sve_svdupq_n_b16:
   case SVE::BI__builtin_sve_svdupq_n_b32:

diff  --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h
index da8681ab51c2..06898f3232f4 100644
--- a/clang/lib/CodeGen/CodeGenFunction.h
+++ b/clang/lib/CodeGen/CodeGenFunction.h
@@ -3920,6 +3920,10 @@ class CodeGenFunction : public CodeGenTypeCache {
   llvm::ScalableVectorType *getSVEPredType(SVETypeFlags TypeFlags);
   llvm::Value *EmitSVEAllTruePred(SVETypeFlags TypeFlags);
   llvm::Value *EmitSVEDupX(llvm::Value *Scalar);
+  llvm::Value *EmitSVEReinterpret(llvm::Value *Val, llvm::Type *Ty);
+  llvm::Value *EmitSVEPMull(SVETypeFlags TypeFlags,
+                            llvm::SmallVectorImpl<llvm::Value *> &Ops,
+                            unsigned BuiltinID);
   llvm::Value *EmitSVEPredicateCast(llvm::Value *Pred,
                                     llvm::ScalableVectorType *VTy);
   llvm::Value *EmitSVEGatherLoad(SVETypeFlags TypeFlags,

diff  --git a/clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_eorbt.c b/clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_eorbt.c
new file mode 100644
index 000000000000..865ac2e444e5
--- /dev/null
+++ b/clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_eorbt.c
@@ -0,0 +1,181 @@
+// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -D__ARM_FEATURE_SVE2 -triple aarch64-none-linux-gnu -target-feature +sve2 -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -D__ARM_FEATURE_SVE2 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2 -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -fsyntax-only -verify -verify-ignore-unexpected=error %s
+// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -fsyntax-only -verify=overload -verify-ignore-unexpected=error %s
+
+#include <arm_sve.h>
+
+#ifdef SVE_OVERLOADED_FORMS
+// A simple used,unused... macro, long enough to represent any SVE builtin.
+#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
+#else
+#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4
+#endif
+
+svint8_t test_sveorbt_s8(svint8_t op1, svint8_t op2, svint8_t op3)
+{
+  // CHECK-LABEL: test_sveorbt_s8
+  // CHECK: %[[INTRINSIC:.*]] = call <vscale x 16 x i8> @llvm.aarch64.sve.eorbt.nxv16i8(<vscale x 16 x i8> %op1, <vscale x 16 x i8> %op2, <vscale x 16 x i8> %op3)
+  // CHECK: ret <vscale x 16 x i8> %[[INTRINSIC]]
+  // overload-warning at +2 {{implicit declaration of function 'sveorbt'}}
+  // expected-warning at +1 {{implicit declaration of function 'sveorbt_s8'}}
+  return SVE_ACLE_FUNC(sveorbt,_s8,,)(op1, op2, op3);
+}
+
+svint16_t test_sveorbt_s16(svint16_t op1, svint16_t op2, svint16_t op3)
+{
+  // CHECK-LABEL: test_sveorbt_s16
+  // CHECK: %[[INTRINSIC:.*]] = call <vscale x 8 x i16> @llvm.aarch64.sve.eorbt.nxv8i16(<vscale x 8 x i16> %op1, <vscale x 8 x i16> %op2, <vscale x 8 x i16> %op3)
+  // CHECK: ret <vscale x 8 x i16> %[[INTRINSIC]]
+  // overload-warning at +2 {{implicit declaration of function 'sveorbt'}}
+  // expected-warning at +1 {{implicit declaration of function 'sveorbt_s16'}}
+  return SVE_ACLE_FUNC(sveorbt,_s16,,)(op1, op2, op3);
+}
+
+svint32_t test_sveorbt_s32(svint32_t op1, svint32_t op2, svint32_t op3)
+{
+  // CHECK-LABEL: test_sveorbt_s32
+  // CHECK: %[[INTRINSIC:.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.eorbt.nxv4i32(<vscale x 4 x i32> %op1, <vscale x 4 x i32> %op2, <vscale x 4 x i32> %op3)
+  // CHECK: ret <vscale x 4 x i32> %[[INTRINSIC]]
+  // overload-warning at +2 {{implicit declaration of function 'sveorbt'}}
+  // expected-warning at +1 {{implicit declaration of function 'sveorbt_s32'}}
+  return SVE_ACLE_FUNC(sveorbt,_s32,,)(op1, op2, op3);
+}
+
+svint64_t test_sveorbt_s64(svint64_t op1, svint64_t op2, svint64_t op3)
+{
+  // CHECK-LABEL: test_sveorbt_s64
+  // CHECK: %[[INTRINSIC:.*]] = call <vscale x 2 x i64> @llvm.aarch64.sve.eorbt.nxv2i64(<vscale x 2 x i64> %op1, <vscale x 2 x i64> %op2, <vscale x 2 x i64> %op3)
+  // CHECK: ret <vscale x 2 x i64> %[[INTRINSIC]]
+  // overload-warning at +2 {{implicit declaration of function 'sveorbt'}}
+  // expected-warning at +1 {{implicit declaration of function 'sveorbt_s64'}}
+  return SVE_ACLE_FUNC(sveorbt,_s64,,)(op1, op2, op3);
+}
+
+svuint8_t test_sveorbt_u8(svuint8_t op1, svuint8_t op2, svuint8_t op3)
+{
+  // CHECK-LABEL: test_sveorbt_u8
+  // CHECK: %[[INTRINSIC:.*]] = call <vscale x 16 x i8> @llvm.aarch64.sve.eorbt.nxv16i8(<vscale x 16 x i8> %op1, <vscale x 16 x i8> %op2, <vscale x 16 x i8> %op3)
+  // CHECK: ret <vscale x 16 x i8> %[[INTRINSIC]]
+  // overload-warning at +2 {{implicit declaration of function 'sveorbt'}}
+  // expected-warning at +1 {{implicit declaration of function 'sveorbt_u8'}}
+  return SVE_ACLE_FUNC(sveorbt,_u8,,)(op1, op2, op3);
+}
+
+svuint16_t test_sveorbt_u16(svuint16_t op1, svuint16_t op2, svuint16_t op3)
+{
+  // CHECK-LABEL: test_sveorbt_u16
+  // CHECK: %[[INTRINSIC:.*]] = call <vscale x 8 x i16> @llvm.aarch64.sve.eorbt.nxv8i16(<vscale x 8 x i16> %op1, <vscale x 8 x i16> %op2, <vscale x 8 x i16> %op3)
+  // CHECK: ret <vscale x 8 x i16> %[[INTRINSIC]]
+  // overload-warning at +2 {{implicit declaration of function 'sveorbt'}}
+  // expected-warning at +1 {{implicit declaration of function 'sveorbt_u16'}}
+  return SVE_ACLE_FUNC(sveorbt,_u16,,)(op1, op2, op3);
+}
+
+svuint32_t test_sveorbt_u32(svuint32_t op1, svuint32_t op2, svuint32_t op3)
+{
+  // CHECK-LABEL: test_sveorbt_u32
+  // CHECK: %[[INTRINSIC:.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.eorbt.nxv4i32(<vscale x 4 x i32> %op1, <vscale x 4 x i32> %op2, <vscale x 4 x i32> %op3)
+  // CHECK: ret <vscale x 4 x i32> %[[INTRINSIC]]
+  // overload-warning at +2 {{implicit declaration of function 'sveorbt'}}
+  // expected-warning at +1 {{implicit declaration of function 'sveorbt_u32'}}
+  return SVE_ACLE_FUNC(sveorbt,_u32,,)(op1, op2, op3);
+}
+
+svuint64_t test_sveorbt_u64(svuint64_t op1, svuint64_t op2, svuint64_t op3)
+{
+  // CHECK-LABEL: test_sveorbt_u64
+  // CHECK: %[[INTRINSIC:.*]] = call <vscale x 2 x i64> @llvm.aarch64.sve.eorbt.nxv2i64(<vscale x 2 x i64> %op1, <vscale x 2 x i64> %op2, <vscale x 2 x i64> %op3)
+  // CHECK: ret <vscale x 2 x i64> %[[INTRINSIC]]
+  // overload-warning at +2 {{implicit declaration of function 'sveorbt'}}
+  // expected-warning at +1 {{implicit declaration of function 'sveorbt_u64'}}
+  return SVE_ACLE_FUNC(sveorbt,_u64,,)(op1, op2, op3);
+}
+
+svint8_t test_sveorbt_n_s8(svint8_t op1, svint8_t op2, int8_t op3)
+{
+  // CHECK-LABEL: test_sveorbt_n_s8
+  // CHECK: %[[DUP:.*]] = call <vscale x 16 x i8> @llvm.aarch64.sve.dup.x.nxv16i8(i8 %op3)
+  // CHECK: %[[INTRINSIC:.*]] = call <vscale x 16 x i8> @llvm.aarch64.sve.eorbt.nxv16i8(<vscale x 16 x i8> %op1, <vscale x 16 x i8> %op2, <vscale x 16 x i8> %[[DUP]])
+  // CHECK: ret <vscale x 16 x i8> %[[INTRINSIC]]
+  // overload-warning at +2 {{implicit declaration of function 'sveorbt'}}
+  // expected-warning at +1 {{implicit declaration of function 'sveorbt_n_s8'}}
+  return SVE_ACLE_FUNC(sveorbt,_n_s8,,)(op1, op2, op3);
+}
+
+svint16_t test_sveorbt_n_s16(svint16_t op1, svint16_t op2, int16_t op3)
+{
+  // CHECK-LABEL: test_sveorbt_n_s16
+  // CHECK: %[[DUP:.*]] = call <vscale x 8 x i16> @llvm.aarch64.sve.dup.x.nxv8i16(i16 %op3)
+  // CHECK: %[[INTRINSIC:.*]] = call <vscale x 8 x i16> @llvm.aarch64.sve.eorbt.nxv8i16(<vscale x 8 x i16> %op1, <vscale x 8 x i16> %op2, <vscale x 8 x i16> %[[DUP]])
+  // CHECK: ret <vscale x 8 x i16> %[[INTRINSIC]]
+  // overload-warning at +2 {{implicit declaration of function 'sveorbt'}}
+  // expected-warning at +1 {{implicit declaration of function 'sveorbt_n_s16'}}
+  return SVE_ACLE_FUNC(sveorbt,_n_s16,,)(op1, op2, op3);
+}
+
+svint32_t test_sveorbt_n_s32(svint32_t op1, svint32_t op2, int32_t op3)
+{
+  // CHECK-LABEL: test_sveorbt_n_s32
+  // CHECK: %[[DUP:.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.dup.x.nxv4i32(i32 %op3)
+  // CHECK: %[[INTRINSIC:.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.eorbt.nxv4i32(<vscale x 4 x i32> %op1, <vscale x 4 x i32> %op2, <vscale x 4 x i32> %[[DUP]])
+  // CHECK: ret <vscale x 4 x i32> %[[INTRINSIC]]
+  // overload-warning at +2 {{implicit declaration of function 'sveorbt'}}
+  // expected-warning at +1 {{implicit declaration of function 'sveorbt_n_s32'}}
+  return SVE_ACLE_FUNC(sveorbt,_n_s32,,)(op1, op2, op3);
+}
+
+svint64_t test_sveorbt_n_s64(svint64_t op1, svint64_t op2, int64_t op3)
+{
+  // CHECK-LABEL: test_sveorbt_n_s64
+  // CHECK: %[[DUP:.*]] = call <vscale x 2 x i64> @llvm.aarch64.sve.dup.x.nxv2i64(i64 %op3)
+  // CHECK: %[[INTRINSIC:.*]] = call <vscale x 2 x i64> @llvm.aarch64.sve.eorbt.nxv2i64(<vscale x 2 x i64> %op1, <vscale x 2 x i64> %op2, <vscale x 2 x i64> %[[DUP]])
+  // CHECK: ret <vscale x 2 x i64> %[[INTRINSIC]]
+  // overload-warning at +2 {{implicit declaration of function 'sveorbt'}}
+  // expected-warning at +1 {{implicit declaration of function 'sveorbt_n_s64'}}
+  return SVE_ACLE_FUNC(sveorbt,_n_s64,,)(op1, op2, op3);
+}
+
+svuint8_t test_sveorbt_n_u8(svuint8_t op1, svuint8_t op2, uint8_t op3)
+{
+  // CHECK-LABEL: test_sveorbt_n_u8
+  // CHECK: %[[DUP:.*]] = call <vscale x 16 x i8> @llvm.aarch64.sve.dup.x.nxv16i8(i8 %op3)
+  // CHECK: %[[INTRINSIC:.*]] = call <vscale x 16 x i8> @llvm.aarch64.sve.eorbt.nxv16i8(<vscale x 16 x i8> %op1, <vscale x 16 x i8> %op2, <vscale x 16 x i8> %[[DUP]])
+  // CHECK: ret <vscale x 16 x i8> %[[INTRINSIC]]
+  // overload-warning at +2 {{implicit declaration of function 'sveorbt'}}
+  // expected-warning at +1 {{implicit declaration of function 'sveorbt_n_u8'}}
+  return SVE_ACLE_FUNC(sveorbt,_n_u8,,)(op1, op2, op3);
+}
+
+svuint16_t test_sveorbt_n_u16(svuint16_t op1, svuint16_t op2, uint16_t op3)
+{
+  // CHECK-LABEL: test_sveorbt_n_u16
+  // CHECK: %[[DUP:.*]] = call <vscale x 8 x i16> @llvm.aarch64.sve.dup.x.nxv8i16(i16 %op3)
+  // CHECK: %[[INTRINSIC:.*]] = call <vscale x 8 x i16> @llvm.aarch64.sve.eorbt.nxv8i16(<vscale x 8 x i16> %op1, <vscale x 8 x i16> %op2, <vscale x 8 x i16> %[[DUP]])
+  // CHECK: ret <vscale x 8 x i16> %[[INTRINSIC]]
+  // overload-warning at +2 {{implicit declaration of function 'sveorbt'}}
+  // expected-warning at +1 {{implicit declaration of function 'sveorbt_n_u16'}}
+  return SVE_ACLE_FUNC(sveorbt,_n_u16,,)(op1, op2, op3);
+}
+
+svuint32_t test_sveorbt_n_u32(svuint32_t op1, svuint32_t op2, uint32_t op3)
+{
+  // CHECK-LABEL: test_sveorbt_n_u32
+  // CHECK: %[[DUP:.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.dup.x.nxv4i32(i32 %op3)
+  // CHECK: %[[INTRINSIC:.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.eorbt.nxv4i32(<vscale x 4 x i32> %op1, <vscale x 4 x i32> %op2, <vscale x 4 x i32> %[[DUP]])
+  // CHECK: ret <vscale x 4 x i32> %[[INTRINSIC]]
+  // overload-warning at +2 {{implicit declaration of function 'sveorbt'}}
+  // expected-warning at +1 {{implicit declaration of function 'sveorbt_n_u32'}}
+  return SVE_ACLE_FUNC(sveorbt,_n_u32,,)(op1, op2, op3);
+}
+
+svuint64_t test_sveorbt_n_u64(svuint64_t op1, svuint64_t op2, uint64_t op3)
+{
+  // CHECK-LABEL: test_sveorbt_n_u64
+  // CHECK: %[[DUP:.*]] = call <vscale x 2 x i64> @llvm.aarch64.sve.dup.x.nxv2i64(i64 %op3)
+  // CHECK: %[[INTRINSIC:.*]] = call <vscale x 2 x i64> @llvm.aarch64.sve.eorbt.nxv2i64(<vscale x 2 x i64> %op1, <vscale x 2 x i64> %op2, <vscale x 2 x i64> %[[DUP]])
+  // CHECK: ret <vscale x 2 x i64> %[[INTRINSIC]]
+  // overload-warning at +2 {{implicit declaration of function 'sveorbt'}}
+  // expected-warning at +1 {{implicit declaration of function 'sveorbt_n_u64'}}
+  return SVE_ACLE_FUNC(sveorbt,_n_u64,,)(op1, op2, op3);
+}

diff  --git a/clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_eortb.c b/clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_eortb.c
new file mode 100644
index 000000000000..6734d9fa0c6d
--- /dev/null
+++ b/clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_eortb.c
@@ -0,0 +1,181 @@
+// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -D__ARM_FEATURE_SVE2 -triple aarch64-none-linux-gnu -target-feature +sve2 -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -D__ARM_FEATURE_SVE2 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2 -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -fsyntax-only -verify -verify-ignore-unexpected=error %s
+// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -fsyntax-only -verify=overload -verify-ignore-unexpected=error %s
+
+#include <arm_sve.h>
+
+#ifdef SVE_OVERLOADED_FORMS
+// A simple used,unused... macro, long enough to represent any SVE builtin.
+#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
+#else
+#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4
+#endif
+
+svint8_t test_sveortb_s8(svint8_t op1, svint8_t op2, svint8_t op3)
+{
+  // CHECK-LABEL: test_sveortb_s8
+  // CHECK: %[[INTRINSIC:.*]] = call <vscale x 16 x i8> @llvm.aarch64.sve.eortb.nxv16i8(<vscale x 16 x i8> %op1, <vscale x 16 x i8> %op2, <vscale x 16 x i8> %op3)
+  // CHECK: ret <vscale x 16 x i8> %[[INTRINSIC]]
+  // overload-warning at +2 {{implicit declaration of function 'sveortb'}}
+  // expected-warning at +1 {{implicit declaration of function 'sveortb_s8'}}
+  return SVE_ACLE_FUNC(sveortb,_s8,,)(op1, op2, op3);
+}
+
+svint16_t test_sveortb_s16(svint16_t op1, svint16_t op2, svint16_t op3)
+{
+  // CHECK-LABEL: test_sveortb_s16
+  // CHECK: %[[INTRINSIC:.*]] = call <vscale x 8 x i16> @llvm.aarch64.sve.eortb.nxv8i16(<vscale x 8 x i16> %op1, <vscale x 8 x i16> %op2, <vscale x 8 x i16> %op3)
+  // CHECK: ret <vscale x 8 x i16> %[[INTRINSIC]]
+  // overload-warning at +2 {{implicit declaration of function 'sveortb'}}
+  // expected-warning at +1 {{implicit declaration of function 'sveortb_s16'}}
+  return SVE_ACLE_FUNC(sveortb,_s16,,)(op1, op2, op3);
+}
+
+svint32_t test_sveortb_s32(svint32_t op1, svint32_t op2, svint32_t op3)
+{
+  // CHECK-LABEL: test_sveortb_s32
+  // CHECK: %[[INTRINSIC:.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.eortb.nxv4i32(<vscale x 4 x i32> %op1, <vscale x 4 x i32> %op2, <vscale x 4 x i32> %op3)
+  // CHECK: ret <vscale x 4 x i32> %[[INTRINSIC]]
+  // overload-warning at +2 {{implicit declaration of function 'sveortb'}}
+  // expected-warning at +1 {{implicit declaration of function 'sveortb_s32'}}
+  return SVE_ACLE_FUNC(sveortb,_s32,,)(op1, op2, op3);
+}
+
+svint64_t test_sveortb_s64(svint64_t op1, svint64_t op2, svint64_t op3)
+{
+  // CHECK-LABEL: test_sveortb_s64
+  // CHECK: %[[INTRINSIC:.*]] = call <vscale x 2 x i64> @llvm.aarch64.sve.eortb.nxv2i64(<vscale x 2 x i64> %op1, <vscale x 2 x i64> %op2, <vscale x 2 x i64> %op3)
+  // CHECK: ret <vscale x 2 x i64> %[[INTRINSIC]]
+  // overload-warning at +2 {{implicit declaration of function 'sveortb'}}
+  // expected-warning at +1 {{implicit declaration of function 'sveortb_s64'}}
+  return SVE_ACLE_FUNC(sveortb,_s64,,)(op1, op2, op3);
+}
+
+svuint8_t test_sveortb_u8(svuint8_t op1, svuint8_t op2, svuint8_t op3)
+{
+  // CHECK-LABEL: test_sveortb_u8
+  // CHECK: %[[INTRINSIC:.*]] = call <vscale x 16 x i8> @llvm.aarch64.sve.eortb.nxv16i8(<vscale x 16 x i8> %op1, <vscale x 16 x i8> %op2, <vscale x 16 x i8> %op3)
+  // CHECK: ret <vscale x 16 x i8> %[[INTRINSIC]]
+  // overload-warning at +2 {{implicit declaration of function 'sveortb'}}
+  // expected-warning at +1 {{implicit declaration of function 'sveortb_u8'}}
+  return SVE_ACLE_FUNC(sveortb,_u8,,)(op1, op2, op3);
+}
+
+svuint16_t test_sveortb_u16(svuint16_t op1, svuint16_t op2, svuint16_t op3)
+{
+  // CHECK-LABEL: test_sveortb_u16
+  // CHECK: %[[INTRINSIC:.*]] = call <vscale x 8 x i16> @llvm.aarch64.sve.eortb.nxv8i16(<vscale x 8 x i16> %op1, <vscale x 8 x i16> %op2, <vscale x 8 x i16> %op3)
+  // CHECK: ret <vscale x 8 x i16> %[[INTRINSIC]]
+  // overload-warning at +2 {{implicit declaration of function 'sveortb'}}
+  // expected-warning at +1 {{implicit declaration of function 'sveortb_u16'}}
+  return SVE_ACLE_FUNC(sveortb,_u16,,)(op1, op2, op3);
+}
+
+svuint32_t test_sveortb_u32(svuint32_t op1, svuint32_t op2, svuint32_t op3)
+{
+  // CHECK-LABEL: test_sveortb_u32
+  // CHECK: %[[INTRINSIC:.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.eortb.nxv4i32(<vscale x 4 x i32> %op1, <vscale x 4 x i32> %op2, <vscale x 4 x i32> %op3)
+  // CHECK: ret <vscale x 4 x i32> %[[INTRINSIC]]
+  // overload-warning at +2 {{implicit declaration of function 'sveortb'}}
+  // expected-warning at +1 {{implicit declaration of function 'sveortb_u32'}}
+  return SVE_ACLE_FUNC(sveortb,_u32,,)(op1, op2, op3);
+}
+
+svuint64_t test_sveortb_u64(svuint64_t op1, svuint64_t op2, svuint64_t op3)
+{
+  // CHECK-LABEL: test_sveortb_u64
+  // CHECK: %[[INTRINSIC:.*]] = call <vscale x 2 x i64> @llvm.aarch64.sve.eortb.nxv2i64(<vscale x 2 x i64> %op1, <vscale x 2 x i64> %op2, <vscale x 2 x i64> %op3)
+  // CHECK: ret <vscale x 2 x i64> %[[INTRINSIC]]
+  // overload-warning at +2 {{implicit declaration of function 'sveortb'}}
+  // expected-warning at +1 {{implicit declaration of function 'sveortb_u64'}}
+  return SVE_ACLE_FUNC(sveortb,_u64,,)(op1, op2, op3);
+}
+
+svint8_t test_sveortb_n_s8(svint8_t op1, svint8_t op2, int8_t op3)
+{
+  // CHECK-LABEL: test_sveortb_n_s8
+  // CHECK: %[[DUP:.*]] = call <vscale x 16 x i8> @llvm.aarch64.sve.dup.x.nxv16i8(i8 %op3)
+  // CHECK: %[[INTRINSIC:.*]] = call <vscale x 16 x i8> @llvm.aarch64.sve.eortb.nxv16i8(<vscale x 16 x i8> %op1, <vscale x 16 x i8> %op2, <vscale x 16 x i8> %[[DUP]])
+  // CHECK: ret <vscale x 16 x i8> %[[INTRINSIC]]
+  // overload-warning at +2 {{implicit declaration of function 'sveortb'}}
+  // expected-warning at +1 {{implicit declaration of function 'sveortb_n_s8'}}
+  return SVE_ACLE_FUNC(sveortb,_n_s8,,)(op1, op2, op3);
+}
+
+svint16_t test_sveortb_n_s16(svint16_t op1, svint16_t op2, int16_t op3)
+{
+  // CHECK-LABEL: test_sveortb_n_s16
+  // CHECK: %[[DUP:.*]] = call <vscale x 8 x i16> @llvm.aarch64.sve.dup.x.nxv8i16(i16 %op3)
+  // CHECK: %[[INTRINSIC:.*]] = call <vscale x 8 x i16> @llvm.aarch64.sve.eortb.nxv8i16(<vscale x 8 x i16> %op1, <vscale x 8 x i16> %op2, <vscale x 8 x i16> %[[DUP]])
+  // CHECK: ret <vscale x 8 x i16> %[[INTRINSIC]]
+  // overload-warning at +2 {{implicit declaration of function 'sveortb'}}
+  // expected-warning at +1 {{implicit declaration of function 'sveortb_n_s16'}}
+  return SVE_ACLE_FUNC(sveortb,_n_s16,,)(op1, op2, op3);
+}
+
+svint32_t test_sveortb_n_s32(svint32_t op1, svint32_t op2, int32_t op3)
+{
+  // CHECK-LABEL: test_sveortb_n_s32
+  // CHECK: %[[DUP:.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.dup.x.nxv4i32(i32 %op3)
+  // CHECK: %[[INTRINSIC:.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.eortb.nxv4i32(<vscale x 4 x i32> %op1, <vscale x 4 x i32> %op2, <vscale x 4 x i32> %[[DUP]])
+  // CHECK: ret <vscale x 4 x i32> %[[INTRINSIC]]
+  // overload-warning at +2 {{implicit declaration of function 'sveortb'}}
+  // expected-warning at +1 {{implicit declaration of function 'sveortb_n_s32'}}
+  return SVE_ACLE_FUNC(sveortb,_n_s32,,)(op1, op2, op3);
+}
+
+svint64_t test_sveortb_n_s64(svint64_t op1, svint64_t op2, int64_t op3)
+{
+  // CHECK-LABEL: test_sveortb_n_s64
+  // CHECK: %[[DUP:.*]] = call <vscale x 2 x i64> @llvm.aarch64.sve.dup.x.nxv2i64(i64 %op3)
+  // CHECK: %[[INTRINSIC:.*]] = call <vscale x 2 x i64> @llvm.aarch64.sve.eortb.nxv2i64(<vscale x 2 x i64> %op1, <vscale x 2 x i64> %op2, <vscale x 2 x i64> %[[DUP]])
+  // CHECK: ret <vscale x 2 x i64> %[[INTRINSIC]]
+  // overload-warning at +2 {{implicit declaration of function 'sveortb'}}
+  // expected-warning at +1 {{implicit declaration of function 'sveortb_n_s64'}}
+  return SVE_ACLE_FUNC(sveortb,_n_s64,,)(op1, op2, op3);
+}
+
+svuint8_t test_sveortb_n_u8(svuint8_t op1, svuint8_t op2, uint8_t op3)
+{
+  // CHECK-LABEL: test_sveortb_n_u8
+  // CHECK: %[[DUP:.*]] = call <vscale x 16 x i8> @llvm.aarch64.sve.dup.x.nxv16i8(i8 %op3)
+  // CHECK: %[[INTRINSIC:.*]] = call <vscale x 16 x i8> @llvm.aarch64.sve.eortb.nxv16i8(<vscale x 16 x i8> %op1, <vscale x 16 x i8> %op2, <vscale x 16 x i8> %[[DUP]])
+  // CHECK: ret <vscale x 16 x i8> %[[INTRINSIC]]
+  // overload-warning at +2 {{implicit declaration of function 'sveortb'}}
+  // expected-warning at +1 {{implicit declaration of function 'sveortb_n_u8'}}
+  return SVE_ACLE_FUNC(sveortb,_n_u8,,)(op1, op2, op3);
+}
+
+svuint16_t test_sveortb_n_u16(svuint16_t op1, svuint16_t op2, uint16_t op3)
+{
+  // CHECK-LABEL: test_sveortb_n_u16
+  // CHECK: %[[DUP:.*]] = call <vscale x 8 x i16> @llvm.aarch64.sve.dup.x.nxv8i16(i16 %op3)
+  // CHECK: %[[INTRINSIC:.*]] = call <vscale x 8 x i16> @llvm.aarch64.sve.eortb.nxv8i16(<vscale x 8 x i16> %op1, <vscale x 8 x i16> %op2, <vscale x 8 x i16> %[[DUP]])
+  // CHECK: ret <vscale x 8 x i16> %[[INTRINSIC]]
+  // overload-warning at +2 {{implicit declaration of function 'sveortb'}}
+  // expected-warning at +1 {{implicit declaration of function 'sveortb_n_u16'}}
+  return SVE_ACLE_FUNC(sveortb,_n_u16,,)(op1, op2, op3);
+}
+
+svuint32_t test_sveortb_n_u32(svuint32_t op1, svuint32_t op2, uint32_t op3)
+{
+  // CHECK-LABEL: test_sveortb_n_u32
+  // CHECK: %[[DUP:.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.dup.x.nxv4i32(i32 %op3)
+  // CHECK: %[[INTRINSIC:.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.eortb.nxv4i32(<vscale x 4 x i32> %op1, <vscale x 4 x i32> %op2, <vscale x 4 x i32> %[[DUP]])
+  // CHECK: ret <vscale x 4 x i32> %[[INTRINSIC]]
+  // overload-warning at +2 {{implicit declaration of function 'sveortb'}}
+  // expected-warning at +1 {{implicit declaration of function 'sveortb_n_u32'}}
+  return SVE_ACLE_FUNC(sveortb,_n_u32,,)(op1, op2, op3);
+}
+
+svuint64_t test_sveortb_n_u64(svuint64_t op1, svuint64_t op2, uint64_t op3)
+{
+  // CHECK-LABEL: test_sveortb_n_u64
+  // CHECK: %[[DUP:.*]] = call <vscale x 2 x i64> @llvm.aarch64.sve.dup.x.nxv2i64(i64 %op3)
+  // CHECK: %[[INTRINSIC:.*]] = call <vscale x 2 x i64> @llvm.aarch64.sve.eortb.nxv2i64(<vscale x 2 x i64> %op1, <vscale x 2 x i64> %op2, <vscale x 2 x i64> %[[DUP]])
+  // CHECK: ret <vscale x 2 x i64> %[[INTRINSIC]]
+  // overload-warning at +2 {{implicit declaration of function 'sveortb'}}
+  // expected-warning at +1 {{implicit declaration of function 'sveortb_n_u64'}}
+  return SVE_ACLE_FUNC(sveortb,_n_u64,,)(op1, op2, op3);
+}

diff  --git a/clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_pmul.c b/clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_pmul.c
new file mode 100644
index 000000000000..05dca4ecc420
--- /dev/null
+++ b/clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_pmul.c
@@ -0,0 +1,34 @@
+// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -D__ARM_FEATURE_SVE2 -triple aarch64-none-linux-gnu -target-feature +sve2 -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -D__ARM_FEATURE_SVE2 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2 -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -fsyntax-only -verify -verify-ignore-unexpected=error %s
+// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -fsyntax-only -verify=overload -verify-ignore-unexpected=error %s
+
+#include <arm_sve.h>
+
+#ifdef SVE_OVERLOADED_FORMS
+// A simple used,unused... macro, long enough to represent any SVE builtin.
+#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
+#else
+#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4
+#endif
+
+svuint8_t test_svpmul_u8(svuint8_t op1, svuint8_t op2)
+{
+  // CHECK-LABEL: test_svpmul_u8
+  // CHECK: %[[INTRINSIC:.*]] = call <vscale x 16 x i8> @llvm.aarch64.sve.pmul.nxv16i8(<vscale x 16 x i8> %op1, <vscale x 16 x i8> %op2)
+  // CHECK: ret <vscale x 16 x i8> %[[INTRINSIC]]
+  // overload-warning at +2 {{implicit declaration of function 'svpmul'}}
+  // expected-warning at +1 {{implicit declaration of function 'svpmul_u8'}}
+  return SVE_ACLE_FUNC(svpmul,_u8,,)(op1, op2);
+}
+
+svuint8_t test_svpmul_n_u8(svuint8_t op1, uint8_t op2)
+{
+  // CHECK-LABEL: test_svpmul_n_u8
+  // CHECK: %[[DUP:.*]] = call <vscale x 16 x i8> @llvm.aarch64.sve.dup.x.nxv16i8(i8 %op2)
+  // CHECK: %[[INTRINSIC:.*]] = call <vscale x 16 x i8> @llvm.aarch64.sve.pmul.nxv16i8(<vscale x 16 x i8> %op1, <vscale x 16 x i8> %[[DUP]])
+  // CHECK: ret <vscale x 16 x i8> %[[INTRINSIC]]
+  // overload-warning at +2 {{implicit declaration of function 'svpmul'}}
+  // expected-warning at +1 {{implicit declaration of function 'svpmul_n_u8'}}
+  return SVE_ACLE_FUNC(svpmul,_n_u8,,)(op1, op2);
+}

diff  --git a/clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_pmullb.c b/clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_pmullb.c
new file mode 100644
index 000000000000..a12d1fd09ffd
--- /dev/null
+++ b/clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_pmullb.c
@@ -0,0 +1,101 @@
+// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -D__ARM_FEATURE_SVE2 -triple aarch64-none-linux-gnu -target-feature +sve2 -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -D__ARM_FEATURE_SVE2 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2 -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -fsyntax-only -verify -verify-ignore-unexpected=error %s
+// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -fsyntax-only -verify=overload -verify-ignore-unexpected=error %s
+
+#include <arm_sve.h>
+
+#ifdef SVE_OVERLOADED_FORMS
+// A simple used,unused... macro, long enough to represent any SVE builtin.
+#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
+#else
+#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4
+#endif
+
+svuint8_t test_svpmullb_pair_u8(svuint8_t op1, svuint8_t op2)
+{
+  // CHECK-LABEL: test_svpmullb_pair_u8
+  // CHECK: %[[INTRINSIC:.*]] = call <vscale x 16 x i8> @llvm.aarch64.sve.pmullb.pair.nxv16i8(<vscale x 16 x i8> %op1, <vscale x 16 x i8> %op2)
+  // CHECK: ret <vscale x 16 x i8> %[[INTRINSIC]]
+  // overload-warning at +2 {{implicit declaration of function 'svpmullb_pair'}}
+  // expected-warning at +1 {{implicit declaration of function 'svpmullb_pair_u8'}}
+  return SVE_ACLE_FUNC(svpmullb_pair,_u8,,)(op1, op2);
+}
+
+svuint32_t test_svpmullb_pair_u32(svuint32_t op1, svuint32_t op2)
+{
+  // CHECK-LABEL: test_svpmullb_pair_u32
+  // CHECK: %[[INTRINSIC:.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.pmullb.pair.nxv4i32(<vscale x 4 x i32> %op1, <vscale x 4 x i32> %op2)
+  // CHECK: ret <vscale x 4 x i32> %[[INTRINSIC]]
+  // overload-warning at +2 {{implicit declaration of function 'svpmullb_pair'}}
+  // expected-warning at +1 {{implicit declaration of function 'svpmullb_pair_u32'}}
+  return SVE_ACLE_FUNC(svpmullb_pair,_u32,,)(op1, op2);
+}
+
+svuint8_t test_svpmullb_pair_n_u8(svuint8_t op1, uint8_t op2)
+{
+  // CHECK-LABEL: test_svpmullb_pair_n_u8
+  // CHECK: %[[DUP:.*]] = call <vscale x 16 x i8> @llvm.aarch64.sve.dup.x.nxv16i8(i8 %op2)
+  // CHECK: %[[INTRINSIC:.*]] = call <vscale x 16 x i8> @llvm.aarch64.sve.pmullb.pair.nxv16i8(<vscale x 16 x i8> %op1, <vscale x 16 x i8> %[[DUP]])
+  // CHECK: ret <vscale x 16 x i8> %[[INTRINSIC]]
+  // overload-warning at +2 {{implicit declaration of function 'svpmullb_pair'}}
+  // expected-warning at +1 {{implicit declaration of function 'svpmullb_pair_n_u8'}}
+  return SVE_ACLE_FUNC(svpmullb_pair,_n_u8,,)(op1, op2);
+}
+
+svuint32_t test_svpmullb_pair_n_u32(svuint32_t op1, uint32_t op2)
+{
+  // CHECK-LABEL: test_svpmullb_pair_n_u32
+  // CHECK: %[[DUP:.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.dup.x.nxv4i32(i32 %op2)
+  // CHECK: %[[INTRINSIC:.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.pmullb.pair.nxv4i32(<vscale x 4 x i32> %op1, <vscale x 4 x i32> %[[DUP]])
+  // CHECK: ret <vscale x 4 x i32> %[[INTRINSIC]]
+  // overload-warning at +2 {{implicit declaration of function 'svpmullb_pair'}}
+  // expected-warning at +1 {{implicit declaration of function 'svpmullb_pair_n_u32'}}
+  return SVE_ACLE_FUNC(svpmullb_pair,_n_u32,,)(op1, op2);
+}
+
+svuint16_t test_svpmullb_u16(svuint8_t op1, svuint8_t op2)
+{
+  // CHECK-LABEL: test_svpmullb_u16
+  // CHECK: %[[INTRINSIC:.*]] = call <vscale x 16 x i8> @llvm.aarch64.sve.pmullb.pair.nxv16i8(<vscale x 16 x i8> %op1, <vscale x 16 x i8> %op2)
+  // CHECK: %[[BITCAST:.*]] = bitcast <vscale x 16 x i8> %[[INTRINSIC]] to <vscale x 8 x i16>
+  // CHECK: ret <vscale x 8 x i16> %[[BITCAST]]
+  // overload-warning at +2 {{implicit declaration of function 'svpmullb'}}
+  // expected-warning at +1 {{implicit declaration of function 'svpmullb_u16'}}
+  return SVE_ACLE_FUNC(svpmullb,_u16,,)(op1, op2);
+}
+
+svuint64_t test_svpmullb_u64(svuint32_t op1, svuint32_t op2)
+{
+  // CHECK-LABEL: test_svpmullb_u64
+  // CHECK: %[[INTRINSIC:.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.pmullb.pair.nxv4i32(<vscale x 4 x i32> %op1, <vscale x 4 x i32> %op2)
+  // CHECK: %[[BITCAST:.*]] = bitcast <vscale x 4 x i32> %[[INTRINSIC]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> %[[BITCAST]]
+  // overload-warning at +2 {{implicit declaration of function 'svpmullb'}}
+  // expected-warning at +1 {{implicit declaration of function 'svpmullb_u64'}}
+  return SVE_ACLE_FUNC(svpmullb,_u64,,)(op1, op2);
+}
+
+svuint16_t test_svpmullb_n_u16(svuint8_t op1, uint8_t op2)
+{
+  // CHECK-LABEL: test_svpmullb_n_u16
+  // CHECK: %[[DUP:.*]] = call <vscale x 16 x i8> @llvm.aarch64.sve.dup.x.nxv16i8(i8 %op2)
+  // CHECK: %[[INTRINSIC:.*]] = call <vscale x 16 x i8> @llvm.aarch64.sve.pmullb.pair.nxv16i8(<vscale x 16 x i8> %op1, <vscale x 16 x i8> %[[DUP]])
+  // CHECK: %[[BITCAST:.*]] = bitcast <vscale x 16 x i8> %[[INTRINSIC]] to <vscale x 8 x i16>
+  // CHECK: ret <vscale x 8 x i16> %[[BITCAST]]
+  // overload-warning at +2 {{implicit declaration of function 'svpmullb'}}
+  // expected-warning at +1 {{implicit declaration of function 'svpmullb_n_u16'}}
+  return SVE_ACLE_FUNC(svpmullb,_n_u16,,)(op1, op2);
+}
+
+svuint64_t test_svpmullb_n_u64(svuint32_t op1, uint32_t op2)
+{
+  // CHECK-LABEL: test_svpmullb_n_u64
+  // CHECK: %[[DUP:.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.dup.x.nxv4i32(i32 %op2)
+  // CHECK: %[[INTRINSIC:.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.pmullb.pair.nxv4i32(<vscale x 4 x i32> %op1, <vscale x 4 x i32> %[[DUP]])
+  // CHECK: %[[BITCAST:.*]] = bitcast <vscale x 4 x i32> %[[INTRINSIC]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> %[[BITCAST]]
+  // overload-warning at +2 {{implicit declaration of function 'svpmullb'}}
+  // expected-warning at +1 {{implicit declaration of function 'svpmullb_n_u64'}}
+  return SVE_ACLE_FUNC(svpmullb,_n_u64,,)(op1, op2);
+}

diff  --git a/clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_pmullt.c b/clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_pmullt.c
new file mode 100644
index 000000000000..5b4b05aa72fb
--- /dev/null
+++ b/clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_pmullt.c
@@ -0,0 +1,101 @@
+// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -D__ARM_FEATURE_SVE2 -triple aarch64-none-linux-gnu -target-feature +sve2 -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -D__ARM_FEATURE_SVE2 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2 -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -fsyntax-only -verify -verify-ignore-unexpected=error %s
+// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -fsyntax-only -verify=overload -verify-ignore-unexpected=error %s
+
+#include <arm_sve.h>
+
+#ifdef SVE_OVERLOADED_FORMS
+// A simple used,unused... macro, long enough to represent any SVE builtin.
+#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
+#else
+#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4
+#endif
+
+svuint8_t test_svpmullt_pair_u8(svuint8_t op1, svuint8_t op2)
+{
+  // CHECK-LABEL: test_svpmullt_pair_u8
+  // CHECK: %[[INTRINSIC:.*]] = call <vscale x 16 x i8> @llvm.aarch64.sve.pmullt.pair.nxv16i8(<vscale x 16 x i8> %op1, <vscale x 16 x i8> %op2)
+  // CHECK: ret <vscale x 16 x i8> %[[INTRINSIC]]
+  // overload-warning at +2 {{implicit declaration of function 'svpmullt_pair'}}
+  // expected-warning at +1 {{implicit declaration of function 'svpmullt_pair_u8'}}
+  return SVE_ACLE_FUNC(svpmullt_pair,_u8,,)(op1, op2);
+}
+
+svuint32_t test_svpmullt_pair_u32(svuint32_t op1, svuint32_t op2)
+{
+  // CHECK-LABEL: test_svpmullt_pair_u32
+  // CHECK: %[[INTRINSIC:.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.pmullt.pair.nxv4i32(<vscale x 4 x i32> %op1, <vscale x 4 x i32> %op2)
+  // CHECK: ret <vscale x 4 x i32> %[[INTRINSIC]]
+  // overload-warning at +2 {{implicit declaration of function 'svpmullt_pair'}}
+  // expected-warning at +1 {{implicit declaration of function 'svpmullt_pair_u32'}}
+  return SVE_ACLE_FUNC(svpmullt_pair,_u32,,)(op1, op2);
+}
+
+svuint8_t test_svpmullt_pair_n_u8(svuint8_t op1, uint8_t op2)
+{
+  // CHECK-LABEL: test_svpmullt_pair_n_u8
+  // CHECK: %[[DUP:.*]] = call <vscale x 16 x i8> @llvm.aarch64.sve.dup.x.nxv16i8(i8 %op2)
+  // CHECK: %[[INTRINSIC:.*]] = call <vscale x 16 x i8> @llvm.aarch64.sve.pmullt.pair.nxv16i8(<vscale x 16 x i8> %op1, <vscale x 16 x i8> %[[DUP]])
+  // CHECK: ret <vscale x 16 x i8> %[[INTRINSIC]]
+  // overload-warning at +2 {{implicit declaration of function 'svpmullt_pair'}}
+  // expected-warning at +1 {{implicit declaration of function 'svpmullt_pair_n_u8'}}
+  return SVE_ACLE_FUNC(svpmullt_pair,_n_u8,,)(op1, op2);
+}
+
+svuint32_t test_svpmullt_pair_n_u32(svuint32_t op1, uint32_t op2)
+{
+  // CHECK-LABEL: test_svpmullt_pair_n_u32
+  // CHECK: %[[DUP:.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.dup.x.nxv4i32(i32 %op2)
+  // CHECK: %[[INTRINSIC:.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.pmullt.pair.nxv4i32(<vscale x 4 x i32> %op1, <vscale x 4 x i32> %[[DUP]])
+  // CHECK: ret <vscale x 4 x i32> %[[INTRINSIC]]
+  // overload-warning at +2 {{implicit declaration of function 'svpmullt_pair'}}
+  // expected-warning at +1 {{implicit declaration of function 'svpmullt_pair_n_u32'}}
+  return SVE_ACLE_FUNC(svpmullt_pair,_n_u32,,)(op1, op2);
+}
+
+svuint16_t test_svpmullt_u16(svuint8_t op1, svuint8_t op2)
+{
+  // CHECK-LABEL: test_svpmullt_u16
+  // CHECK: %[[INTRINSIC:.*]] = call <vscale x 16 x i8> @llvm.aarch64.sve.pmullt.pair.nxv16i8(<vscale x 16 x i8> %op1, <vscale x 16 x i8> %op2)
+  // CHECK: %[[BITCAST:.*]] = bitcast <vscale x 16 x i8> %[[INTRINSIC]] to <vscale x 8 x i16>
+  // CHECK: ret <vscale x 8 x i16> %[[BITCAST]]
+  // overload-warning at +2 {{implicit declaration of function 'svpmullt'}}
+  // expected-warning at +1 {{implicit declaration of function 'svpmullt_u16'}}
+  return SVE_ACLE_FUNC(svpmullt,_u16,,)(op1, op2);
+}
+
+svuint64_t test_svpmullt_u64(svuint32_t op1, svuint32_t op2)
+{
+  // CHECK-LABEL: test_svpmullt_u64
+  // CHECK: %[[INTRINSIC:.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.pmullt.pair.nxv4i32(<vscale x 4 x i32> %op1, <vscale x 4 x i32> %op2)
+  // CHECK: %[[BITCAST:.*]] = bitcast <vscale x 4 x i32> %[[INTRINSIC]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> %[[BITCAST]]
+  // overload-warning at +2 {{implicit declaration of function 'svpmullt'}}
+  // expected-warning at +1 {{implicit declaration of function 'svpmullt_u64'}}
+  return SVE_ACLE_FUNC(svpmullt,_u64,,)(op1, op2);
+}
+
+svuint16_t test_svpmullt_n_u16(svuint8_t op1, uint8_t op2)
+{
+  // CHECK-LABEL: test_svpmullt_n_u16
+  // CHECK: %[[DUP:.*]] = call <vscale x 16 x i8> @llvm.aarch64.sve.dup.x.nxv16i8(i8 %op2)
+  // CHECK: %[[INTRINSIC:.*]] = call <vscale x 16 x i8> @llvm.aarch64.sve.pmullt.pair.nxv16i8(<vscale x 16 x i8> %op1, <vscale x 16 x i8> %[[DUP]])
+  // CHECK: %[[BITCAST:.*]] = bitcast <vscale x 16 x i8> %[[INTRINSIC]] to <vscale x 8 x i16>
+  // CHECK: ret <vscale x 8 x i16> %[[BITCAST]]
+  // overload-warning at +2 {{implicit declaration of function 'svpmullt'}}
+  // expected-warning at +1 {{implicit declaration of function 'svpmullt_n_u16'}}
+  return SVE_ACLE_FUNC(svpmullt,_n_u16,,)(op1, op2);
+}
+
+svuint64_t test_svpmullt_n_u64(svuint32_t op1, uint32_t op2)
+{
+  // CHECK-LABEL: test_svpmullt_n_u64
+  // CHECK: %[[DUP:.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.dup.x.nxv4i32(i32 %op2)
+  // CHECK: %[[INTRINSIC:.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.pmullt.pair.nxv4i32(<vscale x 4 x i32> %op1, <vscale x 4 x i32> %[[DUP]])
+  // CHECK: %[[BITCAST:.*]] = bitcast <vscale x 4 x i32> %[[INTRINSIC]] to <vscale x 2 x i64>
+  // CHECK: ret <vscale x 2 x i64> %[[BITCAST]]
+  // overload-warning at +2 {{implicit declaration of function 'svpmullt'}}
+  // expected-warning at +1 {{implicit declaration of function 'svpmullt_n_u64'}}
+  return SVE_ACLE_FUNC(svpmullt,_n_u64,,)(op1, op2);
+}

diff  --git a/clang/utils/TableGen/SveEmitter.cpp b/clang/utils/TableGen/SveEmitter.cpp
index 5b734fe44e47..0638a216c386 100644
--- a/clang/utils/TableGen/SveEmitter.cpp
+++ b/clang/utils/TableGen/SveEmitter.cpp
@@ -528,6 +528,10 @@ void SVEType::applyModifier(char Mod) {
     Bitwidth = ElementBitwidth;
     NumVectors = 0;
     break;
+  case 'R':
+    ElementBitwidth /= 2;
+    NumVectors = 0;
+    break;
   case 'K':
     Signed = true;
     Float = false;