[clang] 47edf5b - [ARM, CDE] Generalize MVE intrinsics infrastructure to support CDE

Mikhail Maltsev via cfe-commits cfe-commits at lists.llvm.org
Tue Mar 10 07:03:32 PDT 2020


Author: Mikhail Maltsev
Date: 2020-03-10T14:03:16Z
New Revision: 47edf5bafb8ede52dca836eac770efffbf657d30

URL: https://github.com/llvm/llvm-project/commit/47edf5bafb8ede52dca836eac770efffbf657d30
DIFF: https://github.com/llvm/llvm-project/commit/47edf5bafb8ede52dca836eac770efffbf657d30.diff

LOG: [ARM,CDE] Generalize MVE intrinsics infrastructure to support CDE

Summary:
This patch generalizes the existing code to support CDE intrinsics
which will share some properties with existing MVE intrinsics
(some of the intrinsics will be polymorphic and accept/return values
of MVE vector types).
Specifically the patch:
* Adds new tablegen backends -gen-arm-cde-builtin-def,
  -gen-arm-cde-builtin-codegen, -gen-arm-cde-builtin-sema,
  -gen-arm-cde-builtin-aliases, -gen-arm-cde-builtin-header based on
  existing MVE backends.
* Renames the '__clang_arm_mve_alias' attribute into
  '__clang_arm_builtin_alias' (it will be used with CDE intrinsics as
  well as MVE intrinsics)
* Implements semantic checks for the coprocessor argument of the CDE
  intrinsics as well as the existing coprocessor intrinsics.
* Adds one CDE intrinsic __arm_cx1 to test the above changes

Reviewers: simon_tatham, MarkMurrayARM, ostannard, dmgreen

Reviewed By: simon_tatham

Subscribers: sdesmalen, mgorny, kristof.beyls, danielkiss, cfe-commits, llvm-commits

Tags: #clang, #llvm

Differential Revision: https://reviews.llvm.org/D75850

Added: 
    clang/include/clang/Basic/arm_cde.td
    clang/test/CodeGen/arm-cde-gpr.c
    clang/test/Headers/arm-cde-header.c
    clang/test/Sema/arm-cde-immediates.c

Modified: 
    clang/include/clang/Basic/Attr.td
    clang/include/clang/Basic/AttrDocs.td
    clang/include/clang/Basic/BuiltinsARM.def
    clang/include/clang/Basic/CMakeLists.txt
    clang/include/clang/Basic/DiagnosticSemaKinds.td
    clang/include/clang/Basic/arm_mve_defs.td
    clang/include/clang/Sema/Sema.h
    clang/lib/AST/Decl.cpp
    clang/lib/CodeGen/CGBuiltin.cpp
    clang/lib/CodeGen/CodeGenFunction.h
    clang/lib/Headers/CMakeLists.txt
    clang/lib/Sema/SemaChecking.cpp
    clang/lib/Sema/SemaDeclAttr.cpp
    clang/test/Misc/pragma-attribute-supported-attributes-list.test
    clang/test/Sema/arm-mve-alias-attribute.c
    clang/utils/TableGen/MveEmitter.cpp
    clang/utils/TableGen/TableGen.cpp
    clang/utils/TableGen/TableGenBackends.h
    llvm/include/llvm/IR/IntrinsicsARM.td

Removed: 
    


################################################################################
diff  --git a/clang/include/clang/Basic/Attr.td b/clang/include/clang/Basic/Attr.td
index a5b053209866..b18cfef33fba 100644
--- a/clang/include/clang/Basic/Attr.td
+++ b/clang/include/clang/Basic/Attr.td
@@ -622,11 +622,11 @@ def Alias : Attr {
   let Documentation = [Undocumented];
 }
 
-def ArmMveAlias : InheritableAttr, TargetSpecificAttr<TargetARM> {
-  let Spellings = [Clang<"__clang_arm_mve_alias">];
+def ArmBuiltinAlias : InheritableAttr, TargetSpecificAttr<TargetARM> {
+  let Spellings = [Clang<"__clang_arm_builtin_alias">];
   let Args = [IdentifierArgument<"BuiltinName">];
   let Subjects = SubjectList<[Function], ErrorDiag>;
-  let Documentation = [ArmMveAliasDocs];
+  let Documentation = [ArmBuiltinAliasDocs];
 }
 
 def Aligned : InheritableAttr {

diff  --git a/clang/include/clang/Basic/AttrDocs.td b/clang/include/clang/Basic/AttrDocs.td
index cc9d3c80c0da..aea574995c8e 100644
--- a/clang/include/clang/Basic/AttrDocs.td
+++ b/clang/include/clang/Basic/AttrDocs.td
@@ -4669,11 +4669,11 @@ When the Owner's lifetime ends, it will consider the Pointer to be dangling.
 }];
 }
 
-def ArmMveAliasDocs : Documentation {
+def ArmBuiltinAliasDocs : Documentation {
   let Category = DocCatFunction;
   let Content = [{
-This attribute is used in the implementation of the ACLE intrinsics
-for the Arm MVE instruction set. It allows the intrinsic functions to
+This attribute is used in the implementation of the ACLE intrinsics.
+It allows the intrinsic functions to
 be declared using the names defined in ACLE, and still be recognized
 as clang builtins equivalent to the underlying name. For example,
 ``arm_mve.h`` declares the function ``vaddq_u32`` with
@@ -4684,8 +4684,8 @@ recognized as that clang builtin, and in the latter case, the choice
 of which builtin to identify the function as can be deferred until
 after overload resolution.
 
-This attribute can only be used to set up the aliases for the MVE
-intrinsic functions; it is intended for use only inside ``arm_mve.h``,
+This attribute can only be used to set up the aliases for certain Arm
+intrinsic functions; it is intended for use only inside ``arm_*.h``
 and is not a general mechanism for declaring arbitrary aliases for
 clang builtin functions.
   }];

diff  --git a/clang/include/clang/Basic/BuiltinsARM.def b/clang/include/clang/Basic/BuiltinsARM.def
index 848abb44ad36..be20c24aa28a 100644
--- a/clang/include/clang/Basic/BuiltinsARM.def
+++ b/clang/include/clang/Basic/BuiltinsARM.def
@@ -202,6 +202,8 @@ BUILTIN(__builtin_arm_wsrp, "vcC*vC*", "nc")
 // aren't included from both here and BuiltinsAArch64.def.)
 #include "clang/Basic/arm_mve_builtins.inc"
 
+#include "clang/Basic/arm_cde_builtins.inc"
+
 // MSVC
 LANGBUILTIN(__emit, "vIUiC", "", ALL_MS_LANGUAGES)
 

diff  --git a/clang/include/clang/Basic/CMakeLists.txt b/clang/include/clang/Basic/CMakeLists.txt
index 5912034d8250..ea011a8af177 100644
--- a/clang/include/clang/Basic/CMakeLists.txt
+++ b/clang/include/clang/Basic/CMakeLists.txt
@@ -60,3 +60,16 @@ clang_tablegen(arm_mve_builtin_sema.inc -gen-arm-mve-builtin-sema
 clang_tablegen(arm_mve_builtin_aliases.inc -gen-arm-mve-builtin-aliases
   SOURCE arm_mve.td
   TARGET ClangARMMveBuiltinAliases)
+
+clang_tablegen(arm_cde_builtins.inc -gen-arm-cde-builtin-def
+  SOURCE arm_cde.td
+  TARGET ClangARMCdeBuiltinsDef)
+clang_tablegen(arm_cde_builtin_cg.inc -gen-arm-cde-builtin-codegen
+  SOURCE arm_cde.td
+  TARGET ClangARMCdeBuiltinCG)
+clang_tablegen(arm_cde_builtin_sema.inc -gen-arm-cde-builtin-sema
+  SOURCE arm_cde.td
+  TARGET ClangARMCdeBuiltinSema)
+clang_tablegen(arm_cde_builtin_aliases.inc -gen-arm-cde-builtin-aliases
+  SOURCE arm_cde.td
+  TARGET ClangARMCdeBuiltinAliases)

diff  --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td
index f4ed6b2e649e..d8b43519dd3e 100644
--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -639,6 +639,8 @@ def warn_redecl_library_builtin : Warning<
 def err_builtin_definition : Error<"definition of builtin function %0">;
 def err_builtin_redeclare : Error<"cannot redeclare builtin function %0">;
 def err_arm_invalid_specialreg : Error<"invalid special register for builtin">;
+def err_arm_invalid_coproc : Error<"coprocessor %0 must be configured as "
+  "%select{GCP|CDE}1">;
 def err_invalid_cpu_supports : Error<"invalid cpu feature string for builtin">;
 def err_invalid_cpu_is : Error<"invalid cpu name for builtin">;
 def err_invalid_cpu_specific_dispatch_value : Error<
@@ -6722,8 +6724,8 @@ def warn_objc_unsafe_perform_selector : Warning<
   InGroup<DiagGroup<"objc-unsafe-perform-selector">>;
 def note_objc_unsafe_perform_selector_method_declared_here :  Note<
   "method %0 that returns %1 declared here">;
-def err_attribute_arm_mve_alias : Error<
-  "'__clang_arm_mve_alias' attribute can only be applied to an ARM MVE builtin">;
+def err_attribute_arm_builtin_alias : Error<
+  "'__clang_arm_builtin_alias' attribute can only be applied to an ARM builtin">;
 def err_attribute_arm_mve_polymorphism : Error<
   "'__clang_arm_mve_strict_polymorphism' attribute can only be applied to an MVE/NEON vector type">;
 

diff  --git a/clang/include/clang/Basic/arm_cde.td b/clang/include/clang/Basic/arm_cde.td
new file mode 100644
index 000000000000..222b63966a38
--- /dev/null
+++ b/clang/include/clang/Basic/arm_cde.td
@@ -0,0 +1,45 @@
+//===--- arm_cde.td - ACLE intrinsic functions for CDE --------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the set of ACLE-specified source-level intrinsic
+// functions wrapping the CDE instructions.
+//
+//===----------------------------------------------------------------------===//
+
+include "arm_mve_defs.td"
+
+class CDEIntrinsic<Type ret, dag args, dag codegen>
+  : Intrinsic<ret, args, codegen> {
+  let builtinExtension = "cde";
+}
+
+// Immediate (in range [0, 2^numBits - 1])
+class IB_ConstBits<int numBits> : IB_ConstRange<0, !add(!shl(1, numBits), -1)>;
+// numBits-wide immediate of type u32
+class CDEImmediateBits<int numBits> : Immediate<u32, IB_ConstBits<numBits>>;
+
+// LLVM IR CDE intrinsic
+class CDEIRInt<string name, list<Type> params = [], bit appendKind = 0>
+      : IRIntBase<"arm_cde_" # name, params, appendKind>;
+
+// Coprocessor immediate
+def imm_coproc : Immediate<sint, IB_ConstRange<0, 7>>;
+
+// Immediate integer parameters
+def imm_3b : CDEImmediateBits<3>;
+def imm_4b : CDEImmediateBits<4>;
+def imm_6b :  CDEImmediateBits<6>;
+def imm_7b :  CDEImmediateBits<7>;
+def imm_9b :  CDEImmediateBits<9>;
+def imm_11b : CDEImmediateBits<11>;
+def imm_12b : CDEImmediateBits<12>;
+def imm_13b : CDEImmediateBits<13>;
+
+let pnt = PNT_None, params = T.None in
+def cx1 : CDEIntrinsic<u32, (args imm_coproc:$cp, imm_13b:$imm),
+                            (CDEIRInt<"cx1"> $cp, $imm)>;

diff  --git a/clang/include/clang/Basic/arm_mve_defs.td b/clang/include/clang/Basic/arm_mve_defs.td
index daf73871f052..f1424f2ea594 100644
--- a/clang/include/clang/Basic/arm_mve_defs.td
+++ b/clang/include/clang/Basic/arm_mve_defs.td
@@ -476,9 +476,16 @@ class Intrinsic<Type ret_, dag args_, dag codegen_> {
   // True if the builtin has to avoid evaluating its arguments.
   bit nonEvaluating = 0;
 
+  // True if the intrinsic needs only the C header part (no codegen, semantic
+  // checks, etc). Used for redeclaring MVE intrinsics in the arm_cde.h header.
+  bit headerOnly = 0;
+
   // Use to override the suffix letter to make e.g.vfooq_p16
   // with an override suffix letter of "p".
   string overrideKindLetter = "";
+
+  // Name of the architecture extension, used in the Clang builtin name
+  string builtinExtension = "mve";
 }
 
 // Sometimes you have to use two separate Intrinsic declarations to
@@ -553,6 +560,7 @@ multiclass IntrinsicMXNameOverride<Type rettype, dag arguments, dag cg,
 // instead of having to repeat a long list every time.
 
 def T {
+  list<Type> None = [Void];
   list<Type> Signed = [s8, s16, s32];
   list<Type> Unsigned = [u8, u16, u32];
   list<Type> Int = Signed # Unsigned;

diff  --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h
index 2304a9718567..60a4f2ef7048 100644
--- a/clang/include/clang/Sema/Sema.h
+++ b/clang/include/clang/Sema/Sema.h
@@ -11764,6 +11764,8 @@ class Sema final {
                                     unsigned MaxWidth);
   bool CheckNeonBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall);
   bool CheckMVEBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall);
+  bool CheckCDEBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall);
+  bool CheckARMCoprocessorImmediate(const Expr *CoprocArg, bool WantCDE);
   bool CheckARMBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall);
 
   bool CheckAArch64BuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall);

diff  --git a/clang/lib/AST/Decl.cpp b/clang/lib/AST/Decl.cpp
index 7625acd7d9a4..544054449c48 100644
--- a/clang/lib/AST/Decl.cpp
+++ b/clang/lib/AST/Decl.cpp
@@ -3148,8 +3148,8 @@ FunctionDecl *FunctionDecl::getCanonicalDecl() { return getFirstDecl(); }
 unsigned FunctionDecl::getBuiltinID(bool ConsiderWrapperFunctions) const {
   unsigned BuiltinID;
 
-  if (const auto *AMAA = getAttr<ArmMveAliasAttr>()) {
-    BuiltinID = AMAA->getBuiltinName()->getBuiltinID();
+  if (const auto *ABAA = getAttr<ArmBuiltinAliasAttr>()) {
+    BuiltinID = ABAA->getBuiltinName()->getBuiltinID();
   } else {
     if (!getIdentifier())
       return 0;
@@ -3181,7 +3181,7 @@ unsigned FunctionDecl::getBuiltinID(bool ConsiderWrapperFunctions) const {
   // If the function is marked "overloadable", it has a 
diff erent mangled name
   // and is not the C library function.
   if (!ConsiderWrapperFunctions && hasAttr<OverloadableAttr>() &&
-      !hasAttr<ArmMveAliasAttr>())
+      !hasAttr<ArmBuiltinAliasAttr>())
     return 0;
 
   if (!Context.BuiltinInfo.isPredefinedLibFunction(BuiltinID))

diff  --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 5a1fb5d391f1..090efaadc4c6 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -6538,6 +6538,9 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID,
   // Deal with MVE builtins
   if (Value *Result = EmitARMMVEBuiltinExpr(BuiltinID, E, ReturnValue, Arch))
     return Result;
+  // Handle CDE builtins
+  if (Value *Result = EmitARMCDEBuiltinExpr(BuiltinID, E, ReturnValue, Arch))
+    return Result;
 
   // Find out if any arguments are required to be integer constant
   // expressions.
@@ -7212,6 +7215,17 @@ Value *CodeGenFunction::EmitARMMVEBuiltinExpr(unsigned BuiltinID,
   llvm_unreachable("unknown custom codegen type.");
 }
 
+Value *CodeGenFunction::EmitARMCDEBuiltinExpr(unsigned BuiltinID,
+                                              const CallExpr *E,
+                                              ReturnValueSlot ReturnValue,
+                                              llvm::Triple::ArchType Arch) {
+  switch (BuiltinID) {
+  default:
+    return nullptr;
+#include "clang/Basic/arm_cde_builtin_cg.inc"
+  }
+}
+
 static Value *EmitAArch64TblBuiltinExpr(CodeGenFunction &CGF, unsigned BuiltinID,
                                       const CallExpr *E,
                                       SmallVectorImpl<Value *> &Ops,

diff  --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h
index 907b4d744b07..f82e489e6217 100644
--- a/clang/lib/CodeGen/CodeGenFunction.h
+++ b/clang/lib/CodeGen/CodeGenFunction.h
@@ -3873,6 +3873,9 @@ class CodeGenFunction : public CodeGenTypeCache {
   llvm::Value *EmitARMMVEBuiltinExpr(unsigned BuiltinID, const CallExpr *E,
                                      ReturnValueSlot ReturnValue,
                                      llvm::Triple::ArchType Arch);
+  llvm::Value *EmitARMCDEBuiltinExpr(unsigned BuiltinID, const CallExpr *E,
+                                     ReturnValueSlot ReturnValue,
+                                     llvm::Triple::ArchType Arch);
 
   llvm::Value *EmitCommonNeonBuiltinExpr(unsigned BuiltinID,
                                          unsigned LLVMIntrinsic,

diff  --git a/clang/lib/Headers/CMakeLists.txt b/clang/lib/Headers/CMakeLists.txt
index f172d7a1203f..923784305d05 100644
--- a/clang/lib/Headers/CMakeLists.txt
+++ b/clang/lib/Headers/CMakeLists.txt
@@ -186,6 +186,8 @@ clang_generate_header(-gen-arm-neon arm_neon.td arm_neon.h)
 clang_generate_header(-gen-arm-fp16 arm_fp16.td arm_fp16.h)
 # Generate arm_mve.h
 clang_generate_header(-gen-arm-mve-header arm_mve.td arm_mve.h)
+# Generate arm_cde.h
+clang_generate_header(-gen-arm-cde-header arm_cde.td arm_cde.h)
 
 add_custom_target(clang-resource-headers ALL DEPENDS ${out_files})
 set_target_properties(clang-resource-headers PROPERTIES

diff  --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp
index 2e73fcac93e1..24d0d9209a1d 100644
--- a/clang/lib/Sema/SemaChecking.cpp
+++ b/clang/lib/Sema/SemaChecking.cpp
@@ -2070,6 +2070,44 @@ bool Sema::CheckMVEBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) {
   }
 }
 
+bool Sema::CheckCDEBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) {
+  bool Err = false;
+  switch (BuiltinID) {
+  default:
+    return false;
+#include "clang/Basic/arm_cde_builtin_sema.inc"
+  }
+
+  if (Err)
+    return true;
+
+  return CheckARMCoprocessorImmediate(TheCall->getArg(0), /*WantCDE*/ true);
+}
+
+bool Sema::CheckARMCoprocessorImmediate(const Expr *CoprocArg, bool WantCDE) {
+  if (isConstantEvaluated())
+    return false;
+
+  // We can't check the value of a dependent argument.
+  if (CoprocArg->isTypeDependent() || CoprocArg->isValueDependent())
+    return false;
+
+  llvm::APSInt CoprocNoAP;
+  bool IsICE = CoprocArg->isIntegerConstantExpr(CoprocNoAP, Context);
+  assert(IsICE && "Coprocossor immediate is not a constant expression");
+  int64_t CoprocNo = CoprocNoAP.getExtValue();
+  assert(CoprocNo >= 0 && "Coprocessor immediate must be non-negative");
+
+  uint32_t CDECoprocMask = Context.getTargetInfo().getARMCDECoprocMask();
+  bool IsCDECoproc = CoprocNo <= 7 && (CDECoprocMask & (1 << CoprocNo));
+
+  if (IsCDECoproc != WantCDE)
+    return Diag(CoprocArg->getBeginLoc(), diag::err_arm_invalid_coproc)
+           << (int)CoprocNo << (int)WantCDE << CoprocArg->getSourceRange();
+
+  return false;
+}
+
 bool Sema::CheckARMBuiltinExclusiveCall(unsigned BuiltinID, CallExpr *TheCall,
                                         unsigned MaxWidth) {
   assert((BuiltinID == ARM::BI__builtin_arm_ldrex ||
@@ -2212,6 +2250,8 @@ bool Sema::CheckARMBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) {
     return true;
   if (CheckMVEBuiltinFunctionCall(BuiltinID, TheCall))
     return true;
+  if (CheckCDEBuiltinFunctionCall(BuiltinID, TheCall))
+    return true;
 
   // For intrinsics which take an immediate value as part of the instruction,
   // range check them here.
@@ -2234,6 +2274,26 @@ bool Sema::CheckARMBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) {
   case ARM::BI__builtin_arm_isb:
   case ARM::BI__builtin_arm_dbg:
     return SemaBuiltinConstantArgRange(TheCall, 0, 0, 15);
+  case ARM::BI__builtin_arm_cdp:
+  case ARM::BI__builtin_arm_cdp2:
+  case ARM::BI__builtin_arm_mcr:
+  case ARM::BI__builtin_arm_mcr2:
+  case ARM::BI__builtin_arm_mrc:
+  case ARM::BI__builtin_arm_mrc2:
+  case ARM::BI__builtin_arm_mcrr:
+  case ARM::BI__builtin_arm_mcrr2:
+  case ARM::BI__builtin_arm_mrrc:
+  case ARM::BI__builtin_arm_mrrc2:
+  case ARM::BI__builtin_arm_ldc:
+  case ARM::BI__builtin_arm_ldcl:
+  case ARM::BI__builtin_arm_ldc2:
+  case ARM::BI__builtin_arm_ldc2l:
+  case ARM::BI__builtin_arm_stc:
+  case ARM::BI__builtin_arm_stcl:
+  case ARM::BI__builtin_arm_stc2:
+  case ARM::BI__builtin_arm_stc2l:
+    return SemaBuiltinConstantArgRange(TheCall, 0, 0, 15) ||
+           CheckARMCoprocessorImmediate(TheCall->getArg(0), /*WantCDE*/ false);
   }
 }
 

diff  --git a/clang/lib/Sema/SemaDeclAttr.cpp b/clang/lib/Sema/SemaDeclAttr.cpp
index 3a6c2af30d8b..2e1710f70753 100644
--- a/clang/lib/Sema/SemaDeclAttr.cpp
+++ b/clang/lib/Sema/SemaDeclAttr.cpp
@@ -4937,13 +4937,47 @@ static void handlePatchableFunctionEntryAttr(Sema &S, Decl *D,
                  PatchableFunctionEntryAttr(S.Context, AL, Count, Offset));
 }
 
-static bool ArmMveAliasValid(unsigned BuiltinID, StringRef AliasName) {
+namespace {
+struct IntrinToName {
+  uint32_t Id;
+  int32_t FullName;
+  int32_t ShortName;
+};
+} // unnamed namespace
+
+static bool ArmBuiltinAliasValid(unsigned BuiltinID, StringRef AliasName,
+                                 ArrayRef<IntrinToName> Map,
+                                 const char *IntrinNames) {
   if (AliasName.startswith("__arm_"))
     AliasName = AliasName.substr(6);
+  const IntrinToName *It = std::lower_bound(
+      Map.begin(), Map.end(), BuiltinID,
+      [](const IntrinToName &L, unsigned Id) { return L.Id < Id; });
+  if (It == Map.end() || It->Id != BuiltinID)
+    return false;
+  StringRef FullName(&IntrinNames[It->FullName]);
+  if (AliasName == FullName)
+    return true;
+  if (It->ShortName == -1)
+    return false;
+  StringRef ShortName(&IntrinNames[It->ShortName]);
+  return AliasName == ShortName;
+}
+
+static bool ArmMveAliasValid(unsigned BuiltinID, StringRef AliasName) {
 #include "clang/Basic/arm_mve_builtin_aliases.inc"
+  // The included file defines:
+  // - ArrayRef<IntrinToName> Map
+  // - const char IntrinNames[]
+  return ArmBuiltinAliasValid(BuiltinID, AliasName, Map, IntrinNames);
+}
+
+static bool ArmCdeAliasValid(unsigned BuiltinID, StringRef AliasName) {
+#include "clang/Basic/arm_cde_builtin_aliases.inc"
+  return ArmBuiltinAliasValid(BuiltinID, AliasName, Map, IntrinNames);
 }
 
-static void handleArmMveAliasAttr(Sema &S, Decl *D, const ParsedAttr &AL) {
+static void handleArmBuiltinAliasAttr(Sema &S, Decl *D, const ParsedAttr &AL) {
   if (!AL.isArgIdent(0)) {
     S.Diag(AL.getLoc(), diag::err_attribute_argument_n_type)
         << AL << 1 << AANT_ArgumentIdentifier;
@@ -4952,14 +4986,15 @@ static void handleArmMveAliasAttr(Sema &S, Decl *D, const ParsedAttr &AL) {
 
   IdentifierInfo *Ident = AL.getArgAsIdent(0)->Ident;
   unsigned BuiltinID = Ident->getBuiltinID();
+  StringRef AliasName = cast<FunctionDecl>(D)->getIdentifier()->getName();
 
-  if (!ArmMveAliasValid(BuiltinID,
-                        cast<FunctionDecl>(D)->getIdentifier()->getName())) {
-    S.Diag(AL.getLoc(), diag::err_attribute_arm_mve_alias);
+  if (!ArmMveAliasValid(BuiltinID, AliasName) &&
+      !ArmCdeAliasValid(BuiltinID, AliasName)) {
+    S.Diag(AL.getLoc(), diag::err_attribute_arm_builtin_alias);
     return;
   }
 
-  D->addAttr(::new (S.Context) ArmMveAliasAttr(S.Context, AL, Ident));
+  D->addAttr(::new (S.Context) ArmBuiltinAliasAttr(S.Context, AL, Ident));
 }
 
 //===----------------------------------------------------------------------===//
@@ -7441,8 +7476,8 @@ static void ProcessDeclAttribute(Sema &S, Scope *scope, Decl *D,
     handleMSAllocatorAttr(S, D, AL);
     break;
 
-  case ParsedAttr::AT_ArmMveAlias:
-    handleArmMveAliasAttr(S, D, AL);
+  case ParsedAttr::AT_ArmBuiltinAlias:
+    handleArmBuiltinAliasAttr(S, D, AL);
     break;
 
   case ParsedAttr::AT_AcquireHandle:

diff  --git a/clang/test/CodeGen/arm-cde-gpr.c b/clang/test/CodeGen/arm-cde-gpr.c
new file mode 100644
index 000000000000..9a24b1540b67
--- /dev/null
+++ b/clang/test/CodeGen/arm-cde-gpr.c
@@ -0,0 +1,16 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// RUN: %clang_cc1 -triple thumbv8.1m.main-arm-none-eabi \
+// RUN:   -target-feature +cdecp0 -target-feature +cdecp1 \
+// RUN:   -mfloat-abi hard -O0 -disable-O0-optnone \
+// RUN:   -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s
+
+#include <arm_cde.h>
+
+// CHECK-LABEL: @test_cx1(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = call i32 @llvm.arm.cde.cx1(i32 0, i32 123)
+// CHECK-NEXT:    ret i32 [[TMP0]]
+//
+uint32_t test_cx1() {
+  return __arm_cx1(0, 123);
+}

diff  --git a/clang/test/Headers/arm-cde-header.c b/clang/test/Headers/arm-cde-header.c
new file mode 100644
index 000000000000..e9ff64905162
--- /dev/null
+++ b/clang/test/Headers/arm-cde-header.c
@@ -0,0 +1,8 @@
+// RUN: %clang_cc1 -std=c89 -triple thumbv8.1m.main-arm-none-eabi -fallow-half-arguments-and-returns -target-feature +mve.fp -target-feature +cdecp0 -fsyntax-only %s
+// RUN: %clang_cc1 -std=c99 -triple thumbv8.1m.main-arm-none-eabi -fallow-half-arguments-and-returns -target-feature +mve.fp -target-feature +cdecp0 -fsyntax-only %s
+// RUN: %clang_cc1 -std=c11 -triple thumbv8.1m.main-arm-none-eabi -fallow-half-arguments-and-returns -target-feature +mve.fp -target-feature +cdecp0 -fsyntax-only %s
+// RUN: %clang_cc1 -triple thumbv8.1m.main-arm-none-eabi -fallow-half-arguments-and-returns -target-feature +mve.fp -target-feature +cdecp0 -fsyntax-only %s
+
+// Check that the headers don't conflict with each other
+#include <arm_cde.h>
+#include <arm_mve.h>

diff  --git a/clang/test/Misc/pragma-attribute-supported-attributes-list.test b/clang/test/Misc/pragma-attribute-supported-attributes-list.test
index 3e29eb48da6c..769da310a8c4 100644
--- a/clang/test/Misc/pragma-attribute-supported-attributes-list.test
+++ b/clang/test/Misc/pragma-attribute-supported-attributes-list.test
@@ -18,7 +18,7 @@
 // CHECK-NEXT: Annotate ()
 // CHECK-NEXT: AnyX86NoCfCheck (SubjectMatchRule_hasType_functionType)
 // CHECK-NEXT: ArcWeakrefUnavailable (SubjectMatchRule_objc_interface)
-// CHECK-NEXT: ArmMveAlias (SubjectMatchRule_function)
+// CHECK-NEXT: ArmBuiltinAlias (SubjectMatchRule_function)
 // CHECK-NEXT: AssumeAligned (SubjectMatchRule_objc_method, SubjectMatchRule_function)
 // CHECK-NEXT: Availability ((SubjectMatchRule_record, SubjectMatchRule_enum, SubjectMatchRule_enum_constant, SubjectMatchRule_field, SubjectMatchRule_function, SubjectMatchRule_namespace, SubjectMatchRule_objc_category, SubjectMatchRule_objc_implementation, SubjectMatchRule_objc_interface, SubjectMatchRule_objc_method, SubjectMatchRule_objc_property, SubjectMatchRule_objc_protocol, SubjectMatchRule_record, SubjectMatchRule_type_alias, SubjectMatchRule_variable))
 // CHECK-NEXT: BPFPreserveAccessIndex (SubjectMatchRule_record)

diff  --git a/clang/test/Sema/arm-cde-immediates.c b/clang/test/Sema/arm-cde-immediates.c
new file mode 100644
index 000000000000..bbc13668a2a1
--- /dev/null
+++ b/clang/test/Sema/arm-cde-immediates.c
@@ -0,0 +1,40 @@
+// RUN: %clang_cc1 -triple thumbv8.1m.main-arm-none-eabi -fallow-half-arguments-and-returns -target-feature +mve.fp -target-feature +cdecp0 -verify -fsyntax-only %s
+
+#include <arm_cde.h>
+#include <arm_acle.h>
+
+void test_coproc_gcp_instr(int a) {
+  __builtin_arm_cdp(0, 2, 3, 4, 5, 6); // expected-error {{coprocessor 0 must be configured as GCP}}
+  __builtin_arm_cdp2(0, 2, 3, 4, 5, 6); // expected-error {{coprocessor 0 must be configured as GCP}}
+  __builtin_arm_mcr(0, 0, a, 13, 0, 3); // expected-error {{coprocessor 0 must be configured as GCP}}
+  __builtin_arm_mcr2(0, 0, a, 13, 0, 3); // expected-error {{coprocessor 0 must be configured as GCP}}
+  __builtin_arm_mrc(0, 0, 13, 0, 3); // expected-error {{coprocessor 0 must be configured as GCP}}
+  __builtin_arm_mrc2(0, 0, 13, 0, 3); // expected-error {{coprocessor 0 must be configured as GCP}}
+  __builtin_arm_mcrr(0, 0, a, 0); // expected-error {{coprocessor 0 must be configured as GCP}}
+  __builtin_arm_mcrr2(0, 0, a, 0); // expected-error {{coprocessor 0 must be configured as GCP}}
+  __builtin_arm_mrrc(0, 0, 0); // expected-error {{coprocessor 0 must be configured as GCP}}
+  __builtin_arm_mrrc2(0, 0, 0); // expected-error {{coprocessor 0 must be configured as GCP}}
+  __builtin_arm_ldc(0, 2, &a); // expected-error {{coprocessor 0 must be configured as GCP}}
+  __builtin_arm_ldcl(0, 2, &a); // expected-error {{coprocessor 0 must be configured as GCP}}
+  __builtin_arm_ldc2(0, 2, &a); // expected-error {{coprocessor 0 must be configured as GCP}}
+  __builtin_arm_ldc2l(0, 2, &a); // expected-error {{coprocessor 0 must be configured as GCP}}
+  __builtin_arm_stc(0, 2, &a); // expected-error {{coprocessor 0 must be configured as GCP}}
+  __builtin_arm_stcl(0, 2, &a); // expected-error {{coprocessor 0 must be configured as GCP}}
+  __builtin_arm_stc2(0, 2, &a); // expected-error {{coprocessor 0 must be configured as GCP}}
+  __builtin_arm_stc2l(0, 2, &a); // expected-error {{coprocessor 0 must be configured as GCP}}
+}
+
+void test_coproc(uint32_t a) {
+  (void)__arm_cx1(0, 0);
+  __arm_cx1(a, 0); // expected-error {{argument to '__arm_cx1' must be a constant integer}}
+  __arm_cx1(-1, 0); // expected-error {{argument value -1 is outside the valid range [0, 7]}}
+  __arm_cx1(8, 0);  // expected-error {{argument value 8 is outside the valid range [0, 7]}}
+  __arm_cx1(1, 0); // expected-error {{coprocessor 1 must be configured as CDE}}
+}
+
+void test_cx(uint32_t a) {
+  (void)__arm_cx1(0, 0);
+  __arm_cx1(a, 0); // expected-error {{argument to '__arm_cx1' must be a constant integer}}
+  __arm_cx1(0, a);  // expected-error {{argument to '__arm_cx1' must be a constant integer}}
+  __arm_cx1(0, 8192);  // expected-error {{argument value 8192 is outside the valid range [0, 8191]}}
+}

diff  --git a/clang/test/Sema/arm-mve-alias-attribute.c b/clang/test/Sema/arm-mve-alias-attribute.c
index c2f526df490c..6741caebb4ee 100644
--- a/clang/test/Sema/arm-mve-alias-attribute.c
+++ b/clang/test/Sema/arm-mve-alias-attribute.c
@@ -1,22 +1,28 @@
 // RUN: %clang_cc1 -triple armv8.1m.main-arm-none-eabi -verify -fsyntax-only %s
 
-static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_nop))) // expected-error {{'__clang_arm_mve_alias' attribute can only be applied to an ARM MVE builtin}}
-void nop(void);
+static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_nop))) // expected-error {{'__clang_arm_builtin_alias' attribute can only be applied to an ARM builtin}}
+void
+nop(void);
 
-static __inline__ __attribute__((__clang_arm_mve_alias)) // expected-error {{'__clang_arm_mve_alias' attribute takes one argument}}
-void noparens(void);
+static __inline__ __attribute__((__clang_arm_builtin_alias)) // expected-error {{'__clang_arm_builtin_alias' attribute takes one argument}}
+void
+noparens(void);
 
-static __inline__ __attribute__((__clang_arm_mve_alias())) // expected-error {{'__clang_arm_mve_alias' attribute takes one argument}}
-void emptyparens(void);
+static __inline__ __attribute__((__clang_arm_builtin_alias())) // expected-error {{'__clang_arm_builtin_alias' attribute takes one argument}}
+void
+emptyparens(void);
 
-static __inline__ __attribute__((__clang_arm_mve_alias("string literal"))) // expected-error {{'__clang_arm_mve_alias' attribute requires parameter 1 to be an identifier}}
-void stringliteral(void);
+static __inline__ __attribute__((__clang_arm_builtin_alias("string literal"))) // expected-error {{'__clang_arm_builtin_alias' attribute requires parameter 1 to be an identifier}}
+void
+stringliteral(void);
 
-static __inline__ __attribute__((__clang_arm_mve_alias(1))) // expected-error {{'__clang_arm_mve_alias' attribute requires parameter 1 to be an identifier}}
-void integer(void);
+static __inline__ __attribute__((__clang_arm_builtin_alias(1))) // expected-error {{'__clang_arm_builtin_alias' attribute requires parameter 1 to be an identifier}}
+void
+integer(void);
 
-static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_nop, 2))) // expected-error {{'__clang_arm_mve_alias' attribute takes one argument}}
-void twoargs(void);
+static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_nop, 2))) // expected-error {{'__clang_arm_builtin_alias' attribute takes one argument}}
+void
+twoargs(void);
 
-static __attribute__((__clang_arm_mve_alias(__builtin_arm_nop))) // expected-error {{'__clang_arm_mve_alias' attribute only applies to functions}}
+static __attribute__((__clang_arm_builtin_alias(__builtin_arm_nop))) // expected-error {{'__clang_arm_builtin_alias' attribute only applies to functions}}
 int variable;

diff  --git a/clang/utils/TableGen/MveEmitter.cpp b/clang/utils/TableGen/MveEmitter.cpp
index bc3c21f3a44f..9a9fe00eed74 100644
--- a/clang/utils/TableGen/MveEmitter.cpp
+++ b/clang/utils/TableGen/MveEmitter.cpp
@@ -80,7 +80,7 @@ using namespace llvm;
 
 namespace {
 
-class MveEmitter;
+class EmitterBase;
 class Result;
 
 // -----------------------------------------------------------------------------
@@ -140,6 +140,7 @@ class Type {
   TypeKind typeKind() const { return TKind; }
   virtual ~Type() = default;
   virtual bool requiresFloat() const = 0;
+  virtual bool requiresMVE() const = 0;
   virtual unsigned sizeInBits() const = 0;
   virtual std::string cName() const = 0;
   virtual std::string llvmName() const {
@@ -179,6 +180,7 @@ class VoidType : public Type {
   VoidType() : Type(TypeKind::Void) {}
   unsigned sizeInBits() const override { return 0; }
   bool requiresFloat() const override { return false; }
+  bool requiresMVE() const override { return false; }
   std::string cName() const override { return "void"; }
 
   static bool classof(const Type *T) { return T->typeKind() == TypeKind::Void; }
@@ -194,6 +196,7 @@ class PointerType : public Type {
       : Type(TypeKind::Pointer), Pointee(Pointee), Const(Const) {}
   unsigned sizeInBits() const override { return 32; }
   bool requiresFloat() const override { return Pointee->requiresFloat(); }
+  bool requiresMVE() const override { return Pointee->requiresMVE(); }
   std::string cName() const override {
     std::string Name = Pointee->cName();
 
@@ -274,6 +277,7 @@ class ScalarType : public CRegularNamedType {
   }
   bool isInteger() const { return Kind != ScalarTypeKind::Float; }
   bool requiresFloat() const override { return !isInteger(); }
+  bool requiresMVE() const override { return false; }
   bool hasNonstandardName() const { return !NameOverride.empty(); }
 
   static bool classof(const Type *T) {
@@ -291,6 +295,7 @@ class VectorType : public CRegularNamedType {
   unsigned sizeInBits() const override { return Lanes * Element->sizeInBits(); }
   unsigned lanes() const { return Lanes; }
   bool requiresFloat() const override { return Element->requiresFloat(); }
+  bool requiresMVE() const override { return true; }
   std::string cNameBase() const override {
     return Element->cNameBase() + "x" + utostr(Lanes);
   }
@@ -317,6 +322,7 @@ class MultiVectorType : public CRegularNamedType {
   }
   unsigned registers() const { return Registers; }
   bool requiresFloat() const override { return Element->requiresFloat(); }
+  bool requiresMVE() const override { return true; }
   std::string cNameBase() const override {
     return Element->cNameBase() + "x" + utostr(Registers);
   }
@@ -341,6 +347,7 @@ class PredicateType : public CRegularNamedType {
   unsigned sizeInBits() const override { return 16; }
   std::string cNameBase() const override { return "mve_pred16"; }
   bool requiresFloat() const override { return false; };
+  bool requiresMVE() const override { return true; }
   std::string llvmName() const override {
     // Use <4 x i1> instead of <2 x i1> for two-lane vector types. See
     // the comment in llvm/lib/Target/ARM/ARMInstrMVE.td for further
@@ -405,7 +412,7 @@ struct CodeGenParamAllocator {
   // We rely on the recursive code generation working identically in passes 1
   // and 2, so that the same list of calls to allocParam happen in the same
   // order. That guarantees that the parameter numbers recorded in pass 1 will
-  // match the entries in this vector that store what MveEmitter::EmitBuiltinCG
+  // match the entries in this vector that store what EmitterBase::EmitBuiltinCG
   // decided to do about each one in pass 2.
   std::vector<int> *ParamNumberMap = nullptr;
 
@@ -794,6 +801,9 @@ class ACLEIntrinsic {
   // shares with at least one other intrinsic.
   std::string ShortName, FullName;
 
+  // Name of the architecture extension, used in the Clang builtin name
+  StringRef BuiltinExtension;
+
   // A very small number of intrinsics _only_ have a polymorphic
   // variant (vuninitializedq taking an unevaluated argument).
   bool PolymorphicOnly;
@@ -802,6 +812,10 @@ class ACLEIntrinsic {
   // evaluate its argument(s) at all.
   bool NonEvaluating;
 
+  // True if the intrinsic needs only the C header part (no codegen, semantic
+  // checks, etc). Used for redeclaring MVE intrinsics in the arm_cde.h header.
+  bool HeaderOnly;
+
   const Type *ReturnType;
   std::vector<const Type *> ArgTypes;
   std::map<unsigned, ImmediateArg> ImmediateArgs;
@@ -824,6 +838,7 @@ class ACLEIntrinsic {
 public:
   const std::string &shortName() const { return ShortName; }
   const std::string &fullName() const { return FullName; }
+  StringRef builtinExtension() const { return BuiltinExtension; }
   const Type *returnType() const { return ReturnType; }
   const std::vector<const Type *> &argTypes() const { return ArgTypes; }
   bool requiresFloat() const {
@@ -834,13 +849,19 @@ class ACLEIntrinsic {
         return true;
     return false;
   }
+  bool requiresMVE() const {
+    return ReturnType->requiresMVE() ||
+           any_of(ArgTypes, [](const Type *T) { return T->requiresMVE(); });
+  }
   bool polymorphic() const { return ShortName != FullName; }
   bool polymorphicOnly() const { return PolymorphicOnly; }
   bool nonEvaluating() const { return NonEvaluating; }
+  bool headerOnly() const { return HeaderOnly; }
 
-  // External entry point for code generation, called from MveEmitter.
+  // External entry point for code generation, called from EmitterBase.
   void genCode(raw_ostream &OS, CodeGenParamAllocator &ParamAlloc,
                unsigned Pass) const {
+    assert(!headerOnly() && "Called genCode for header-only intrinsic");
     if (!hasCode()) {
       for (auto kv : CustomCodeGenArgs)
         OS << "  " << kv.first << " = " << kv.second << ";\n";
@@ -881,6 +902,7 @@ class ACLEIntrinsic {
   }
 
   std::string genSema() const {
+    assert(!headerOnly() && "Called genSema for header-only intrinsic");
     std::vector<std::string> SemaChecks;
 
     for (const auto &kv : ImmediateArgs) {
@@ -932,22 +954,21 @@ class ACLEIntrinsic {
     }
     if (SemaChecks.empty())
       return "";
-    return (Twine("  return ") +
-            join(std::begin(SemaChecks), std::end(SemaChecks),
-                 " ||\n         ") +
-            ";\n")
-        .str();
+    return join(std::begin(SemaChecks), std::end(SemaChecks),
+                " ||\n         ") +
+           ";\n";
   }
 
-  ACLEIntrinsic(MveEmitter &ME, Record *R, const Type *Param);
+  ACLEIntrinsic(EmitterBase &ME, Record *R, const Type *Param);
 };
 
 // -----------------------------------------------------------------------------
 // The top-level class that holds all the state from analyzing the entire
 // Tablegen input.
 
-class MveEmitter {
-  // MveEmitter holds a collection of all the types we've instantiated.
+class EmitterBase {
+protected:
+  // EmitterBase holds a collection of all the types we've instantiated.
   VoidType Void;
   std::map<std::string, std::unique_ptr<ScalarType>> ScalarTypes;
   std::map<std::tuple<ScalarTypeKind, unsigned, unsigned>,
@@ -1022,18 +1043,21 @@ class MveEmitter {
   Result::Ptr getCodeForArg(unsigned ArgNum, const Type *ArgType, bool Promote,
                             bool Immediate);
 
+  void GroupSemaChecks(std::map<std::string, std::set<std::string>> &Checks);
+
   // Constructor and top-level functions.
 
-  MveEmitter(RecordKeeper &Records);
+  EmitterBase(RecordKeeper &Records);
+  virtual ~EmitterBase() = default;
 
-  void EmitHeader(raw_ostream &OS);
-  void EmitBuiltinDef(raw_ostream &OS);
-  void EmitBuiltinSema(raw_ostream &OS);
+  virtual void EmitHeader(raw_ostream &OS) = 0;
+  virtual void EmitBuiltinDef(raw_ostream &OS) = 0;
+  virtual void EmitBuiltinSema(raw_ostream &OS) = 0;
   void EmitBuiltinCG(raw_ostream &OS);
   void EmitBuiltinAliases(raw_ostream &OS);
 };
 
-const Type *MveEmitter::getType(Init *I, const Type *Param) {
+const Type *EmitterBase::getType(Init *I, const Type *Param) {
   if (auto Dag = dyn_cast<DagInit>(I))
     return getType(Dag, Param);
   if (auto Def = dyn_cast<DefInit>(I))
@@ -1042,7 +1066,7 @@ const Type *MveEmitter::getType(Init *I, const Type *Param) {
   PrintFatalError("Could not convert this value into a type");
 }
 
-const Type *MveEmitter::getType(Record *R, const Type *Param) {
+const Type *EmitterBase::getType(Record *R, const Type *Param) {
   // Pass to a subfield of any wrapper records. We don't expect more than one
   // of these: immediate operands are used as plain numbers rather than as
   // llvm::Value, so it's meaningless to promote their type anyway.
@@ -1061,7 +1085,7 @@ const Type *MveEmitter::getType(Record *R, const Type *Param) {
   PrintFatalError(R->getLoc(), "Could not convert this record into a type");
 }
 
-const Type *MveEmitter::getType(DagInit *D, const Type *Param) {
+const Type *EmitterBase::getType(DagInit *D, const Type *Param) {
   // The meat of the getType system: types in the Tablegen are represented by a
   // dag whose operators select sub-cases of this function.
 
@@ -1129,8 +1153,8 @@ const Type *MveEmitter::getType(DagInit *D, const Type *Param) {
   PrintFatalError("Bad operator in type dag expression");
 }
 
-Result::Ptr MveEmitter::getCodeForDag(DagInit *D, const Result::Scope &Scope,
-                                      const Type *Param) {
+Result::Ptr EmitterBase::getCodeForDag(DagInit *D, const Result::Scope &Scope,
+                                       const Type *Param) {
   Record *Op = cast<DefInit>(D->getOperator())->getDef();
 
   if (Op->getName() == "seq") {
@@ -1232,9 +1256,9 @@ Result::Ptr MveEmitter::getCodeForDag(DagInit *D, const Result::Scope &Scope,
   }
 }
 
-Result::Ptr MveEmitter::getCodeForDagArg(DagInit *D, unsigned ArgNum,
-                                         const Result::Scope &Scope,
-                                         const Type *Param) {
+Result::Ptr EmitterBase::getCodeForDagArg(DagInit *D, unsigned ArgNum,
+                                          const Result::Scope &Scope,
+                                          const Type *Param) {
   Init *Arg = D->getArg(ArgNum);
   StringRef Name = D->getArgNameStr(ArgNum);
 
@@ -1266,8 +1290,8 @@ Result::Ptr MveEmitter::getCodeForDagArg(DagInit *D, unsigned ArgNum,
   PrintFatalError("bad dag argument type for code generation");
 }
 
-Result::Ptr MveEmitter::getCodeForArg(unsigned ArgNum, const Type *ArgType,
-                                      bool Promote, bool Immediate) {
+Result::Ptr EmitterBase::getCodeForArg(unsigned ArgNum, const Type *ArgType,
+                                       bool Promote, bool Immediate) {
   Result::Ptr V = std::make_shared<BuiltinArgResult>(
       ArgNum, isa<PointerType>(ArgType), Immediate);
 
@@ -1286,7 +1310,7 @@ Result::Ptr MveEmitter::getCodeForArg(unsigned ArgNum, const Type *ArgType,
   return V;
 }
 
-ACLEIntrinsic::ACLEIntrinsic(MveEmitter &ME, Record *R, const Type *Param)
+ACLEIntrinsic::ACLEIntrinsic(EmitterBase &ME, Record *R, const Type *Param)
     : ReturnType(ME.getType(R->getValueAsDef("ret"), Param)) {
   // Derive the intrinsic's full name, by taking the name of the
   // Tablegen record (or override) and appending the suffix from its
@@ -1325,8 +1349,11 @@ ACLEIntrinsic::ACLEIntrinsic(MveEmitter &ME, Record *R, const Type *Param)
   }
   ShortName = join(std::begin(NameParts), std::end(NameParts), "_");
 
+  BuiltinExtension = R->getValueAsString("builtinExtension");
+
   PolymorphicOnly = R->getValueAsBit("polymorphicOnly");
   NonEvaluating = R->getValueAsBit("nonEvaluating");
+  HeaderOnly = R->getValueAsBit("headerOnly");
 
   // Process the intrinsic's argument list.
   DagInit *ArgsDag = R->getValueAsDag("args");
@@ -1419,8 +1446,8 @@ ACLEIntrinsic::ACLEIntrinsic(MveEmitter &ME, Record *R, const Type *Param)
   }
 }
 
-MveEmitter::MveEmitter(RecordKeeper &Records) {
-  // Construct the whole MveEmitter.
+EmitterBase::EmitterBase(RecordKeeper &Records) {
+  // Construct the whole EmitterBase.
 
   // First, look up all the instances of PrimitiveType. This gives us the list
   // of vector typedefs we have to put in arm_mve.h, and also allows us to
@@ -1460,6 +1487,260 @@ class raw_self_contained_string_ostream : private string_holder,
       : string_holder(), raw_string_ostream(S) {}
 };
 
+const char LLVMLicenseHeader[] =
+    " *\n"
+    " *\n"
+    " * Part of the LLVM Project, under the Apache License v2.0 with LLVM"
+    " Exceptions.\n"
+    " * See https://llvm.org/LICENSE.txt for license information.\n"
+    " * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception\n"
+    " *\n"
+    " *===-----------------------------------------------------------------"
+    "------===\n"
+    " */\n"
+    "\n";
+
+// Machinery for the grouping of intrinsics by similar codegen.
+//
+// The general setup is that 'MergeableGroup' stores the things that a set of
+// similarly shaped intrinsics have in common: the text of their code
+// generation, and the number and type of their parameter variables.
+// MergeableGroup is the key in a std::map whose value is a set of
+// OutputIntrinsic, which stores the ways in which a particular intrinsic
+// specializes the MergeableGroup's generic description: the function name and
+// the _values_ of the parameter variables.
+
+struct ComparableStringVector : std::vector<std::string> {
+  // Infrastructure: a derived class of vector<string> which comes with an
+  // ordering, so that it can be used as a key in maps and an element in sets.
+  // There's no requirement on the ordering beyond being deterministic.
+  bool operator<(const ComparableStringVector &rhs) const {
+    if (size() != rhs.size())
+      return size() < rhs.size();
+    for (size_t i = 0, e = size(); i < e; ++i)
+      if ((*this)[i] != rhs[i])
+        return (*this)[i] < rhs[i];
+    return false;
+  }
+};
+
+struct OutputIntrinsic {
+  const ACLEIntrinsic *Int;
+  std::string Name;
+  ComparableStringVector ParamValues;
+  bool operator<(const OutputIntrinsic &rhs) const {
+    if (Name != rhs.Name)
+      return Name < rhs.Name;
+    return ParamValues < rhs.ParamValues;
+  }
+};
+struct MergeableGroup {
+  std::string Code;
+  ComparableStringVector ParamTypes;
+  bool operator<(const MergeableGroup &rhs) const {
+    if (Code != rhs.Code)
+      return Code < rhs.Code;
+    return ParamTypes < rhs.ParamTypes;
+  }
+};
+
+void EmitterBase::EmitBuiltinCG(raw_ostream &OS) {
+  // Pass 1: generate code for all the intrinsics as if every type or constant
+  // that can possibly be abstracted out into a parameter variable will be.
+  // This identifies the sets of intrinsics we'll group together into a single
+  // piece of code generation.
+
+  std::map<MergeableGroup, std::set<OutputIntrinsic>> MergeableGroupsPrelim;
+
+  for (const auto &kv : ACLEIntrinsics) {
+    const ACLEIntrinsic &Int = *kv.second;
+    if (Int.headerOnly())
+      continue;
+
+    MergeableGroup MG;
+    OutputIntrinsic OI;
+
+    OI.Int = ∬
+    OI.Name = Int.fullName();
+    CodeGenParamAllocator ParamAllocPrelim{&MG.ParamTypes, &OI.ParamValues};
+    raw_string_ostream OS(MG.Code);
+    Int.genCode(OS, ParamAllocPrelim, 1);
+    OS.flush();
+
+    MergeableGroupsPrelim[MG].insert(OI);
+  }
+
+  // Pass 2: for each of those groups, optimize the parameter variable set by
+  // eliminating 'parameters' that are the same for all intrinsics in the
+  // group, and merging together pairs of parameter variables that take the
+  // same values as each other for all intrinsics in the group.
+
+  std::map<MergeableGroup, std::set<OutputIntrinsic>> MergeableGroups;
+
+  for (const auto &kv : MergeableGroupsPrelim) {
+    const MergeableGroup &MG = kv.first;
+    std::vector<int> ParamNumbers;
+    std::map<ComparableStringVector, int> ParamNumberMap;
+
+    // Loop over the parameters for this group.
+    for (size_t i = 0, e = MG.ParamTypes.size(); i < e; ++i) {
+      // Is this parameter the same for all intrinsics in the group?
+      const OutputIntrinsic &OI_first = *kv.second.begin();
+      bool Constant = all_of(kv.second, [&](const OutputIntrinsic &OI) {
+        return OI.ParamValues[i] == OI_first.ParamValues[i];
+      });
+
+      // If so, record it as -1, meaning 'no parameter variable needed'. Then
+      // the corresponding call to allocParam in pass 2 will not generate a
+      // variable at all, and just use the value inline.
+      if (Constant) {
+        ParamNumbers.push_back(-1);
+        continue;
+      }
+
+      // Otherwise, make a list of the values this parameter takes for each
+      // intrinsic, and see if that value vector matches anything we already
+      // have. We also record the parameter type, so that we don't accidentally
+      // match up two parameter variables with 
diff erent types. (Not that
+      // there's much chance of them having textually equivalent values, but in
+      // _principle_ it could happen.)
+      ComparableStringVector key;
+      key.push_back(MG.ParamTypes[i]);
+      for (const auto &OI : kv.second)
+        key.push_back(OI.ParamValues[i]);
+
+      auto Found = ParamNumberMap.find(key);
+      if (Found != ParamNumberMap.end()) {
+        // Yes, an existing parameter variable can be reused for this.
+        ParamNumbers.push_back(Found->second);
+        continue;
+      }
+
+      // No, we need a new parameter variable.
+      int ExistingIndex = ParamNumberMap.size();
+      ParamNumberMap[key] = ExistingIndex;
+      ParamNumbers.push_back(ExistingIndex);
+    }
+
+    // Now we're ready to do the pass 2 code generation, which will emit the
+    // reduced set of parameter variables we've just worked out.
+
+    for (const auto &OI_prelim : kv.second) {
+      const ACLEIntrinsic *Int = OI_prelim.Int;
+
+      MergeableGroup MG;
+      OutputIntrinsic OI;
+
+      OI.Int = OI_prelim.Int;
+      OI.Name = OI_prelim.Name;
+      CodeGenParamAllocator ParamAlloc{&MG.ParamTypes, &OI.ParamValues,
+                                       &ParamNumbers};
+      raw_string_ostream OS(MG.Code);
+      Int->genCode(OS, ParamAlloc, 2);
+      OS.flush();
+
+      MergeableGroups[MG].insert(OI);
+    }
+  }
+
+  // Output the actual C++ code.
+
+  for (const auto &kv : MergeableGroups) {
+    const MergeableGroup &MG = kv.first;
+
+    // List of case statements in the main switch on BuiltinID, and an open
+    // brace.
+    const char *prefix = "";
+    for (const auto &OI : kv.second) {
+      OS << prefix << "case ARM::BI__builtin_arm_" << OI.Int->builtinExtension()
+         << "_" << OI.Name << ":";
+
+      prefix = "\n";
+    }
+    OS << " {\n";
+
+    if (!MG.ParamTypes.empty()) {
+      // If we've got some parameter variables, then emit their declarations...
+      for (size_t i = 0, e = MG.ParamTypes.size(); i < e; ++i) {
+        StringRef Type = MG.ParamTypes[i];
+        OS << "  " << Type;
+        if (!Type.endswith("*"))
+          OS << " ";
+        OS << " Param" << utostr(i) << ";\n";
+      }
+
+      // ... and an inner switch on BuiltinID that will fill them in with each
+      // individual intrinsic's values.
+      OS << "  switch (BuiltinID) {\n";
+      for (const auto &OI : kv.second) {
+        OS << "  case ARM::BI__builtin_arm_" << OI.Int->builtinExtension()
+           << "_" << OI.Name << ":\n";
+        for (size_t i = 0, e = MG.ParamTypes.size(); i < e; ++i)
+          OS << "    Param" << utostr(i) << " = " << OI.ParamValues[i] << ";\n";
+        OS << "    break;\n";
+      }
+      OS << "  }\n";
+    }
+
+    // And finally, output the code, and close the outer pair of braces. (The
+    // code will always end with a 'return' statement, so we need not insert a
+    // 'break' here.)
+    OS << MG.Code << "}\n";
+  }
+}
+
+void EmitterBase::EmitBuiltinAliases(raw_ostream &OS) {
+  // Build a sorted table of:
+  // - intrinsic id number
+  // - full name
+  // - polymorphic name or -1
+  StringToOffsetTable StringTable;
+  OS << "static const IntrinToName MapData[] = {\n";
+  for (const auto &kv : ACLEIntrinsics) {
+    const ACLEIntrinsic &Int = *kv.second;
+    if (Int.headerOnly())
+      continue;
+    int32_t ShortNameOffset =
+        Int.polymorphic() ? StringTable.GetOrAddStringOffset(Int.shortName())
+                          : -1;
+    OS << "  { ARM::BI__builtin_arm_" << Int.builtinExtension() << "_"
+       << Int.fullName() << ", "
+       << StringTable.GetOrAddStringOffset(Int.fullName()) << ", "
+       << ShortNameOffset << "},\n";
+  }
+  OS << "};\n\n";
+
+  OS << "ArrayRef<IntrinToName> Map(MapData);\n\n";
+
+  OS << "static const char IntrinNames[] = {\n";
+  StringTable.EmitString(OS);
+  OS << "};\n\n";
+}
+
+void EmitterBase::GroupSemaChecks(
+    std::map<std::string, std::set<std::string>> &Checks) {
+  for (const auto &kv : ACLEIntrinsics) {
+    const ACLEIntrinsic &Int = *kv.second;
+    if (Int.headerOnly())
+      continue;
+    std::string Check = Int.genSema();
+    if (!Check.empty())
+      Checks[Check].insert(Int.fullName());
+  }
+}
+
+// -----------------------------------------------------------------------------
+// The class used for generating arm_mve.h and related Clang bits
+//
+
+class MveEmitter : public EmitterBase {
+public:
+  MveEmitter(RecordKeeper &Records) : EmitterBase(Records){};
+  void EmitHeader(raw_ostream &OS) override;
+  void EmitBuiltinDef(raw_ostream &OS) override;
+  void EmitBuiltinSema(raw_ostream &OS) override;
+};
+
 void MveEmitter::EmitHeader(raw_ostream &OS) {
   // Accumulate pieces of the header file that will be enabled under various
   // 
diff erent combinations of #ifdef. The index into parts[] is made up of
@@ -1554,7 +1835,7 @@ void MveEmitter::EmitHeader(raw_ostream &OS) {
         // included to be part of the type signature of a builtin that
         // was known to clang already.
         //
-        // The declarations use __attribute__(__clang_arm_mve_alias),
+        // The declarations use __attribute__(__clang_arm_builtin_alias),
         // so that each function declared will be recognized as the
         // appropriate MVE builtin in spite of its user-facing name.
         //
@@ -1594,7 +1875,7 @@ void MveEmitter::EmitHeader(raw_ostream &OS) {
 
         OS << "static __inline__ __attribute__(("
            << (Polymorphic ? "overloadable, " : "")
-           << "__clang_arm_mve_alias(__builtin_arm_mve_" << Int.fullName()
+           << "__clang_arm_builtin_alias(__builtin_arm_mve_" << Int.fullName()
            << ")))\n"
            << RetTypeName << FunctionName << "(" << ArgTypesString << ");\n";
       }
@@ -1608,19 +1889,8 @@ void MveEmitter::EmitHeader(raw_ostream &OS) {
 
   OS << "/*===---- arm_mve.h - ARM MVE intrinsics "
         "-----------------------------------===\n"
-        " *\n"
-        " *\n"
-        " * Part of the LLVM Project, under the Apache License v2.0 with LLVM "
-        "Exceptions.\n"
-        " * See https://llvm.org/LICENSE.txt for license information.\n"
-        " * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception\n"
-        " *\n"
-        " *===-------------------------------------------------------------"
-        "----"
-        "------===\n"
-        " */\n"
-        "\n"
-        "#ifndef __ARM_MVE_H\n"
+     << LLVMLicenseHeader
+     << "#ifndef __ARM_MVE_H\n"
         "#define __ARM_MVE_H\n"
         "\n"
         "#if !__ARM_FEATURE_MVE\n"
@@ -1683,252 +1953,172 @@ void MveEmitter::EmitBuiltinDef(raw_ostream &OS) {
 
 void MveEmitter::EmitBuiltinSema(raw_ostream &OS) {
   std::map<std::string, std::set<std::string>> Checks;
-
-  for (const auto &kv : ACLEIntrinsics) {
-    const ACLEIntrinsic &Int = *kv.second;
-    std::string Check = Int.genSema();
-    if (!Check.empty())
-      Checks[Check].insert(Int.fullName());
-  }
+  GroupSemaChecks(Checks);
 
   for (const auto &kv : Checks) {
     for (StringRef Name : kv.second)
       OS << "case ARM::BI__builtin_arm_mve_" << Name << ":\n";
-    OS << kv.first;
+    OS << "  return " << kv.first;
   }
 }
 
-// Machinery for the grouping of intrinsics by similar codegen.
+// -----------------------------------------------------------------------------
+// The class used for generating arm_cde.h and related Clang bits
 //
-// The general setup is that 'MergeableGroup' stores the things that a set of
-// similarly shaped intrinsics have in common: the text of their code
-// generation, and the number and type of their parameter variables.
-// MergeableGroup is the key in a std::map whose value is a set of
-// OutputIntrinsic, which stores the ways in which a particular intrinsic
-// specializes the MergeableGroup's generic description: the function name and
-// the _values_ of the parameter variables.
-
-struct ComparableStringVector : std::vector<std::string> {
-  // Infrastructure: a derived class of vector<string> which comes with an
-  // ordering, so that it can be used as a key in maps and an element in sets.
-  // There's no requirement on the ordering beyond being deterministic.
-  bool operator<(const ComparableStringVector &rhs) const {
-    if (size() != rhs.size())
-      return size() < rhs.size();
-    for (size_t i = 0, e = size(); i < e; ++i)
-      if ((*this)[i] != rhs[i])
-        return (*this)[i] < rhs[i];
-    return false;
-  }
-};
 
-struct OutputIntrinsic {
-  const ACLEIntrinsic *Int;
-  std::string Name;
-  ComparableStringVector ParamValues;
-  bool operator<(const OutputIntrinsic &rhs) const {
-    if (Name != rhs.Name)
-      return Name < rhs.Name;
-    return ParamValues < rhs.ParamValues;
-  }
-};
-struct MergeableGroup {
-  std::string Code;
-  ComparableStringVector ParamTypes;
-  bool operator<(const MergeableGroup &rhs) const {
-    if (Code != rhs.Code)
-      return Code < rhs.Code;
-    return ParamTypes < rhs.ParamTypes;
-  }
+class CdeEmitter : public EmitterBase {
+public:
+  CdeEmitter(RecordKeeper &Records) : EmitterBase(Records){};
+  void EmitHeader(raw_ostream &OS) override;
+  void EmitBuiltinDef(raw_ostream &OS) override;
+  void EmitBuiltinSema(raw_ostream &OS) override;
 };
 
-void MveEmitter::EmitBuiltinCG(raw_ostream &OS) {
-  // Pass 1: generate code for all the intrinsics as if every type or constant
-  // that can possibly be abstracted out into a parameter variable will be.
-  // This identifies the sets of intrinsics we'll group together into a single
-  // piece of code generation.
-
-  std::map<MergeableGroup, std::set<OutputIntrinsic>> MergeableGroupsPrelim;
+void CdeEmitter::EmitHeader(raw_ostream &OS) {
+  // Accumulate pieces of the header file that will be enabled under various
+  // 
diff erent combinations of #ifdef. The index into parts[] is one of the
+  // following:
+  constexpr unsigned None = 0;
+  constexpr unsigned MVE = 1;
+  constexpr unsigned MVEFloat = 2;
 
-  for (const auto &kv : ACLEIntrinsics) {
-    const ACLEIntrinsic &Int = *kv.second;
+  constexpr unsigned NumParts = 3;
+  raw_self_contained_string_ostream parts[NumParts];
 
-    MergeableGroup MG;
-    OutputIntrinsic OI;
+  // Write typedefs for all the required vector types, and a few scalar
+  // types that don't already have the name we want them to have.
 
-    OI.Int = ∬
-    OI.Name = Int.fullName();
-    CodeGenParamAllocator ParamAllocPrelim{&MG.ParamTypes, &OI.ParamValues};
-    raw_string_ostream OS(MG.Code);
-    Int.genCode(OS, ParamAllocPrelim, 1);
-    OS.flush();
+  parts[MVE] << "typedef uint16_t mve_pred16_t;\n";
+  parts[MVEFloat] << "typedef __fp16 float16_t;\n"
+                     "typedef float float32_t;\n";
+  for (const auto &kv : ScalarTypes) {
+    const ScalarType *ST = kv.second.get();
+    if (ST->hasNonstandardName())
+      continue;
+    raw_ostream &OS = parts[ST->requiresFloat() ? MVEFloat : MVE];
+    const VectorType *VT = getVectorType(ST);
 
-    MergeableGroupsPrelim[MG].insert(OI);
+    OS << "typedef __attribute__((__neon_vector_type__(" << VT->lanes()
+       << "), __clang_arm_mve_strict_polymorphism)) " << ST->cName() << " "
+       << VT->cName() << ";\n";
   }
+  parts[MVE] << "\n";
+  parts[MVEFloat] << "\n";
 
-  // Pass 2: for each of those groups, optimize the parameter variable set by
-  // eliminating 'parameters' that are the same for all intrinsics in the
-  // group, and merging together pairs of parameter variables that take the
-  // same values as each other for all intrinsics in the group.
-
-  std::map<MergeableGroup, std::set<OutputIntrinsic>> MergeableGroups;
-
-  for (const auto &kv : MergeableGroupsPrelim) {
-    const MergeableGroup &MG = kv.first;
-    std::vector<int> ParamNumbers;
-    std::map<ComparableStringVector, int> ParamNumberMap;
+  // Write declarations for all the intrinsics.
 
-    // Loop over the parameters for this group.
-    for (size_t i = 0, e = MG.ParamTypes.size(); i < e; ++i) {
-      // Is this parameter the same for all intrinsics in the group?
-      const OutputIntrinsic &OI_first = *kv.second.begin();
-      bool Constant = all_of(kv.second, [&](const OutputIntrinsic &OI) {
-        return OI.ParamValues[i] == OI_first.ParamValues[i];
-      });
+  for (const auto &kv : ACLEIntrinsics) {
+    const ACLEIntrinsic &Int = *kv.second;
 
-      // If so, record it as -1, meaning 'no parameter variable needed'. Then
-      // the corresponding call to allocParam in pass 2 will not generate a
-      // variable at all, and just use the value inline.
-      if (Constant) {
-        ParamNumbers.push_back(-1);
+    // We generate each intrinsic twice, under its full unambiguous
+    // name and its shorter polymorphic name (if the latter exists).
+    for (bool Polymorphic : {false, true}) {
+      if (Polymorphic && !Int.polymorphic())
         continue;
-      }
-
-      // Otherwise, make a list of the values this parameter takes for each
-      // intrinsic, and see if that value vector matches anything we already
-      // have. We also record the parameter type, so that we don't accidentally
-      // match up two parameter variables with 
diff erent types. (Not that
-      // there's much chance of them having textually equivalent values, but in
-      // _principle_ it could happen.)
-      ComparableStringVector key;
-      key.push_back(MG.ParamTypes[i]);
-      for (const auto &OI : kv.second)
-        key.push_back(OI.ParamValues[i]);
-
-      auto Found = ParamNumberMap.find(key);
-      if (Found != ParamNumberMap.end()) {
-        // Yes, an existing parameter variable can be reused for this.
-        ParamNumbers.push_back(Found->second);
+      if (!Polymorphic && Int.polymorphicOnly())
         continue;
-      }
-
-      // No, we need a new parameter variable.
-      int ExistingIndex = ParamNumberMap.size();
-      ParamNumberMap[key] = ExistingIndex;
-      ParamNumbers.push_back(ExistingIndex);
-    }
-
-    // Now we're ready to do the pass 2 code generation, which will emit the
-    // reduced set of parameter variables we've just worked out.
-
-    for (const auto &OI_prelim : kv.second) {
-      const ACLEIntrinsic *Int = OI_prelim.Int;
-
-      MergeableGroup MG;
-      OutputIntrinsic OI;
 
-      OI.Int = OI_prelim.Int;
-      OI.Name = OI_prelim.Name;
-      CodeGenParamAllocator ParamAlloc{&MG.ParamTypes, &OI.ParamValues,
-                                       &ParamNumbers};
-      raw_string_ostream OS(MG.Code);
-      Int->genCode(OS, ParamAlloc, 2);
-      OS.flush();
-
-      MergeableGroups[MG].insert(OI);
+      raw_ostream &OS =
+          parts[Int.requiresFloat() ? MVEFloat
+                                    : Int.requiresMVE() ? MVE : None];
+
+      // Make the name of the function in this declaration.
+      std::string FunctionName =
+          "__arm_" + (Polymorphic ? Int.shortName() : Int.fullName());
+
+      // Make strings for the types involved in the function's
+      // prototype.
+      std::string RetTypeName = Int.returnType()->cName();
+      if (!StringRef(RetTypeName).endswith("*"))
+        RetTypeName += " ";
+
+      std::vector<std::string> ArgTypeNames;
+      for (const Type *ArgTypePtr : Int.argTypes())
+        ArgTypeNames.push_back(ArgTypePtr->cName());
+      std::string ArgTypesString =
+          join(std::begin(ArgTypeNames), std::end(ArgTypeNames), ", ");
+
+      // Emit the actual declaration. See MveEmitter::EmitHeader for detailed
+      // comments
+      OS << "static __inline__ __attribute__(("
+         << (Polymorphic ? "overloadable, " : "")
+         << "__clang_arm_builtin_alias(__builtin_arm_" << Int.builtinExtension()
+         << "_" << Int.fullName() << ")))\n"
+         << RetTypeName << FunctionName << "(" << ArgTypesString << ");\n";
     }
   }
 
-  // Output the actual C++ code.
-
-  for (const auto &kv : MergeableGroups) {
-    const MergeableGroup &MG = kv.first;
+  for (auto &part : parts)
+    part << "\n";
 
-    // List of case statements in the main switch on BuiltinID, and an open
-    // brace.
-    const char *prefix = "";
-    for (const auto &OI : kv.second) {
-      OS << prefix << "case ARM::BI__builtin_arm_mve_" << OI.Name << ":";
-      prefix = "\n";
-    }
-    OS << " {\n";
+  // Now we've finished accumulating bits and pieces into the parts[] array.
+  // Put it all together to write the final output file.
 
-    if (!MG.ParamTypes.empty()) {
-      // If we've got some parameter variables, then emit their declarations...
-      for (size_t i = 0, e = MG.ParamTypes.size(); i < e; ++i) {
-        StringRef Type = MG.ParamTypes[i];
-        OS << "  " << Type;
-        if (!Type.endswith("*"))
-          OS << " ";
-        OS << " Param" << utostr(i) << ";\n";
-      }
+  OS << "/*===---- arm_cde.h - ARM CDE intrinsics "
+        "-----------------------------------===\n"
+     << LLVMLicenseHeader
+     << "#ifndef __ARM_CDE_H\n"
+        "#define __ARM_CDE_H\n"
+        "\n"
+        "#if !__ARM_FEATURE_CDE\n"
+        "#error \"CDE support not enabled\"\n"
+        "#endif\n"
+        "\n"
+        "#include <stdint.h>\n"
+        "\n"
+        "#ifdef __cplusplus\n"
+        "extern \"C\" {\n"
+        "#endif\n"
+        "\n";
 
-      // ... and an inner switch on BuiltinID that will fill them in with each
-      // individual intrinsic's values.
-      OS << "  switch (BuiltinID) {\n";
-      for (const auto &OI : kv.second) {
-        OS << "  case ARM::BI__builtin_arm_mve_" << OI.Name << ":\n";
-        for (size_t i = 0, e = MG.ParamTypes.size(); i < e; ++i)
-          OS << "    Param" << utostr(i) << " = " << OI.ParamValues[i] << ";\n";
-        OS << "    break;\n";
-      }
-      OS << "  }\n";
-    }
+  for (size_t i = 0; i < NumParts; ++i) {
+    std::string condition;
+    if (i == MVEFloat)
+      condition = "__ARM_FEATURE_MVE & 2";
+    else if (i == MVE)
+      condition = "__ARM_FEATURE_MVE";
 
-    // And finally, output the code, and close the outer pair of braces. (The
-    // code will always end with a 'return' statement, so we need not insert a
-    // 'break' here.)
-    OS << MG.Code << "}\n";
+    if (!condition.empty())
+      OS << "#if " << condition << "\n\n";
+    OS << parts[i].str();
+    if (!condition.empty())
+      OS << "#endif /* " << condition << " */\n\n";
   }
+
+  OS << "#ifdef __cplusplus\n"
+        "} /* extern \"C\" */\n"
+        "#endif\n"
+        "\n"
+        "#endif /* __ARM_CDE_H */\n";
 }
 
-void MveEmitter::EmitBuiltinAliases(raw_ostream &OS) {
-  // Build a sorted table of:
-  // - intrinsic id number
-  // - full name
-  // - polymorphic name or -1
-  StringToOffsetTable StringTable;
-  OS << "struct IntrinToName {\n"
-        "  uint32_t Id;\n"
-        "  int32_t FullName;\n"
-        "  int32_t ShortName;\n"
-        "};\n";
-  OS << "static const IntrinToName Map[] = {\n";
+void CdeEmitter::EmitBuiltinDef(raw_ostream &OS) {
   for (const auto &kv : ACLEIntrinsics) {
+    if (kv.second->headerOnly())
+      continue;
     const ACLEIntrinsic &Int = *kv.second;
-    int32_t ShortNameOffset =
-        Int.polymorphic() ? StringTable.GetOrAddStringOffset(Int.shortName())
-                          : -1;
-    OS << "  { ARM::BI__builtin_arm_mve_" << Int.fullName() << ", "
-       << StringTable.GetOrAddStringOffset(Int.fullName()) << ", "
-       << ShortNameOffset << "},\n";
+    OS << "TARGET_HEADER_BUILTIN(__builtin_arm_cde_" << Int.fullName()
+       << ", \"\", \"ncU\", \"arm_cde.h\", ALL_LANGUAGES, \"\")\n";
   }
-  OS << "};\n\n";
+}
 
-  OS << "static const char IntrinNames[] = {\n";
-  StringTable.EmitString(OS);
-  OS << "};\n\n";
+void CdeEmitter::EmitBuiltinSema(raw_ostream &OS) {
+  std::map<std::string, std::set<std::string>> Checks;
+  GroupSemaChecks(Checks);
 
-  OS << "auto It = std::lower_bound(std::begin(Map), "
-        "std::end(Map), BuiltinID,\n"
-        "  [](const IntrinToName &L, unsigned Id) {\n"
-        "    return L.Id < Id;\n"
-        "  });\n";
-  OS << "if (It == std::end(Map) || It->Id != BuiltinID)\n"
-        "  return false;\n";
-  OS << "StringRef FullName(&IntrinNames[It->FullName]);\n";
-  OS << "if (AliasName == FullName)\n"
-        "  return true;\n";
-  OS << "if (It->ShortName == -1)\n"
-        "  return false;\n";
-  OS << "StringRef ShortName(&IntrinNames[It->ShortName]);\n";
-  OS << "return AliasName == ShortName;\n";
+  for (const auto &kv : Checks) {
+    for (StringRef Name : kv.second)
+      OS << "case ARM::BI__builtin_arm_cde_" << Name << ":\n";
+    OS << "  Err = " << kv.first << "  break;\n";
+  }
 }
 
 } // namespace
 
 namespace clang {
 
+// MVE
+
 void EmitMveHeader(RecordKeeper &Records, raw_ostream &OS) {
   MveEmitter(Records).EmitHeader(OS);
 }
@@ -1949,4 +2139,26 @@ void EmitMveBuiltinAliases(RecordKeeper &Records, raw_ostream &OS) {
   MveEmitter(Records).EmitBuiltinAliases(OS);
 }
 
+// CDE
+
+void EmitCdeHeader(RecordKeeper &Records, raw_ostream &OS) {
+  CdeEmitter(Records).EmitHeader(OS);
+}
+
+void EmitCdeBuiltinDef(RecordKeeper &Records, raw_ostream &OS) {
+  CdeEmitter(Records).EmitBuiltinDef(OS);
+}
+
+void EmitCdeBuiltinSema(RecordKeeper &Records, raw_ostream &OS) {
+  CdeEmitter(Records).EmitBuiltinSema(OS);
+}
+
+void EmitCdeBuiltinCG(RecordKeeper &Records, raw_ostream &OS) {
+  CdeEmitter(Records).EmitBuiltinCG(OS);
+}
+
+void EmitCdeBuiltinAliases(RecordKeeper &Records, raw_ostream &OS) {
+  CdeEmitter(Records).EmitBuiltinAliases(OS);
+}
+
 } // end namespace clang

diff  --git a/clang/utils/TableGen/TableGen.cpp b/clang/utils/TableGen/TableGen.cpp
index 6ba90cee4aae..3d8f6dc352d0 100644
--- a/clang/utils/TableGen/TableGen.cpp
+++ b/clang/utils/TableGen/TableGen.cpp
@@ -70,6 +70,11 @@ enum ActionType {
   GenArmMveBuiltinSema,
   GenArmMveBuiltinCG,
   GenArmMveBuiltinAliases,
+  GenArmCdeHeader,
+  GenArmCdeBuiltinDef,
+  GenArmCdeBuiltinSema,
+  GenArmCdeBuiltinCG,
+  GenArmCdeBuiltinAliases,
   GenAttrDocs,
   GenDiagDocs,
   GenOptDocs,
@@ -190,6 +195,16 @@ cl::opt<ActionType> Action(
                    "Generate ARM MVE builtin code-generator for clang"),
         clEnumValN(GenArmMveBuiltinAliases, "gen-arm-mve-builtin-aliases",
                    "Generate list of valid ARM MVE builtin aliases for clang"),
+        clEnumValN(GenArmCdeHeader, "gen-arm-cde-header",
+                   "Generate arm_cde.h for clang"),
+        clEnumValN(GenArmCdeBuiltinDef, "gen-arm-cde-builtin-def",
+                   "Generate ARM CDE builtin definitions for clang"),
+        clEnumValN(GenArmCdeBuiltinSema, "gen-arm-cde-builtin-sema",
+                   "Generate ARM CDE builtin sema checks for clang"),
+        clEnumValN(GenArmCdeBuiltinCG, "gen-arm-cde-builtin-codegen",
+                   "Generate ARM CDE builtin code-generator for clang"),
+        clEnumValN(GenArmCdeBuiltinAliases, "gen-arm-cde-builtin-aliases",
+                   "Generate list of valid ARM CDE builtin aliases for clang"),
         clEnumValN(GenAttrDocs, "gen-attr-docs",
                    "Generate attribute documentation"),
         clEnumValN(GenDiagDocs, "gen-diag-docs",
@@ -351,6 +366,21 @@ bool ClangTableGenMain(raw_ostream &OS, RecordKeeper &Records) {
   case GenArmMveBuiltinAliases:
     EmitMveBuiltinAliases(Records, OS);
     break;
+  case GenArmCdeHeader:
+    EmitCdeHeader(Records, OS);
+    break;
+  case GenArmCdeBuiltinDef:
+    EmitCdeBuiltinDef(Records, OS);
+    break;
+  case GenArmCdeBuiltinSema:
+    EmitCdeBuiltinSema(Records, OS);
+    break;
+  case GenArmCdeBuiltinCG:
+    EmitCdeBuiltinCG(Records, OS);
+    break;
+  case GenArmCdeBuiltinAliases:
+    EmitCdeBuiltinAliases(Records, OS);
+    break;
   case GenAttrDocs:
     EmitClangAttrDocs(Records, OS);
     break;

diff  --git a/clang/utils/TableGen/TableGenBackends.h b/clang/utils/TableGen/TableGenBackends.h
index 7ac2e0eeb1f3..cc300319c100 100644
--- a/clang/utils/TableGen/TableGenBackends.h
+++ b/clang/utils/TableGen/TableGenBackends.h
@@ -97,6 +97,12 @@ void EmitMveBuiltinSema(llvm::RecordKeeper &Records, llvm::raw_ostream &OS);
 void EmitMveBuiltinCG(llvm::RecordKeeper &Records, llvm::raw_ostream &OS);
 void EmitMveBuiltinAliases(llvm::RecordKeeper &Records, llvm::raw_ostream &OS);
 
+void EmitCdeHeader(llvm::RecordKeeper &Records, llvm::raw_ostream &OS);
+void EmitCdeBuiltinDef(llvm::RecordKeeper &Records, llvm::raw_ostream &OS);
+void EmitCdeBuiltinSema(llvm::RecordKeeper &Records, llvm::raw_ostream &OS);
+void EmitCdeBuiltinCG(llvm::RecordKeeper &Records, llvm::raw_ostream &OS);
+void EmitCdeBuiltinAliases(llvm::RecordKeeper &Records, llvm::raw_ostream &OS);
+
 void EmitClangAttrDocs(llvm::RecordKeeper &Records, llvm::raw_ostream &OS);
 void EmitClangDiagDocs(llvm::RecordKeeper &Records, llvm::raw_ostream &OS);
 void EmitClangOptDocs(llvm::RecordKeeper &Records, llvm::raw_ostream &OS);

diff  --git a/llvm/include/llvm/IR/IntrinsicsARM.td b/llvm/include/llvm/IR/IntrinsicsARM.td
index 9018ea1e081a..4968689a4b56 100644
--- a/llvm/include/llvm/IR/IntrinsicsARM.td
+++ b/llvm/include/llvm/IR/IntrinsicsARM.td
@@ -1243,4 +1243,11 @@ def int_arm_mve_vqmovn_predicated: Intrinsic<[llvm_anyvector_ty],
     llvm_i32_ty /* unsigned output */, llvm_i32_ty /* unsigned input */,
     llvm_i32_ty /* top half */, llvm_anyvector_ty /* pred */], [IntrNoMem]>;
 
+// CDE (Custom Datapath Extension)
+
+def int_arm_cde_cx1: Intrinsic<
+  [llvm_i32_ty],
+  [llvm_i32_ty /* coproc */, llvm_i32_ty /* imm */],
+  [IntrNoMem, ImmArg<0>, ImmArg<1>]>;
+
 } // end TargetPrefix


        


More information about the cfe-commits mailing list